From: Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
Date: Wed, 2 Nov 2011 15:03:19 +0000 (+0000)
Subject: Move libgcc1 to toplevel libgcc
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=45b86625d7edd2278c0cdcf335e007a47671813f;p=gcc.git

Move libgcc1 to toplevel libgcc

	gcc:
	* Makefile.in (LIB1ASMSRC): Don't export.
	(libgcc.mvars): Don't emit LIB1ASMFUNCS, LIB1ASMSRC.
	* config/arm/arm.c: Update lib1funcs.asm filename.
	* config/arm/linux-eabi.h: Likewise.
	* config/arm/bpabi-v6m.S, config/arm/bpabi.S,
	config/arm/ieee754-df.S, config/arm/ieee754-sf.S: Move to
	../libgcc/config/arm.
	* config/arm/lib1funcs.asm: Move to ../libgcc/config/arm/lib1funcs.S.
	* config/arm/t-arm (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
	* config/arm/t-arm-elf (LIB1ASMFUNCS): Remove.
	* config/arm/t-bpabi: Likewise.
	* config/arm/t-linux (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
	* config/arm/t-linux-eabi (LIB1ASMFUNCS): Remove.
	* config/arm/t-strongarm-elf: Likewise.
	* config/arm/t-symbian: Likewise.
	* config/arm/t-vxworks: Likewise.
	* config/arm/t-wince-pe: Likewise.
	* config/avr/libgcc.S: Move to ../libgcc/config/avr.
	* config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
	* config/bfin/lib1funcs.asm: Move to
	../libgcc/config/bfin/lib1funcs.S.
	* config/bfin/t-bfin: Remove.
	* config/bfin/t-bfin-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
	* config/bfin/t-bfin-linux: Likewise.
	* config/bfin/t-bfin-uclinux: Likewise.
	* config/c6x/lib1funcs.asm: Move to
	../libgcc/config/c6x/lib1funcs.S.
	* config/c6x/t-c6x-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
	* config/fr30/lib1funcs.asm: Move to
	../libgcc/config/fr30/lib1funcs.S.
	* config/fr30/t-fr30 (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
	* config/frv/lib1funcs.asm: Move to
	../libgcc/config/frv/lib1funcs.S.
	* config/frv/t-frv (CROSS_LIBGCC1, LIB1ASMSRC, LIB1ASMFUNCS): Remove.
	* config/h8300/fixunssfsi.c: Update lib1funcs.asm filename.
	* config/h8300/lib1funcs.asm: Move to
	../libgcc/config/h8300/lib1funcs.S.
	* config/h8300/t-h8300 (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
	* config/i386/cygwin.asm: Move to ../libgcc/config/i386/cygwin.S.
	* config/i386/t-cygming (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
	* config/i386/t-interix: Likewise.
	* config/ia64/lib1funcs.asm: Move to
	../libgcc/config/ia64/lib1funcs.S.
	* config/ia64/t-hpux (LIB1ASMFUNCS, LIBGCC1_TEST): Remove.
	* config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
	* config/iq2000/t-iq2000 (LIBGCC1, CROSS_LIBGCC1): Remove.
	* config/m32c/m32c.c: Update m32c-lib1.S filename.
	* config/m32c/m32c-lib1.S: Move to ../libgcc/config/m32c/lib1funcs.S.
	* config/m32c/t-m32c (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
	* config/m32r/t-linux (CROSS_LIBGCC1, LIBGCC1, LIBGCC1_TEST): Remove.
	* config/m68k/lb1sf68.asm: Move to ../libgcc/config/m68k/lb1sf68.S.
	* config/m68k/t-floatlib (LIB1ASMSRC, LIB1ASMFUNCS): New file.
	* config/mcore/lib1.asm: Move to ../libgcc/config/mcore/lib1funcs.S.
	* config/mcore/t-mcore (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
	* config/mep/mep-lib1.asm: Move to ../libgcc/config/mep/lib1funcs.S.
	* config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
	* config/mips/mips16.S: Move to ../libgcc/config/mips.
	* config/mips/t-libgcc-mips16: Remove.
	* config/mips/t-sr71k (LIBGCC1, CROSS_LIBGCC1): Remove.
	* config/pa/milli64.S: Move to ../libgcc/config/pa.
	* config/pa/t-linux (LIB1ASMFUNCS, LIB1ASMSRC): Remove.
	* config/pa/t-linux64: Likewise.
	* config/picochip/libgccExtras/fake_libgcc.asm: Move to
	../libgcc/config/picochip/lib1funcs.S.
	* config/picochip/t-picochip (LIB1ASMFUNCS, LIB1ASMSRC): Remove.
	* config/sh/lib1funcs.asm: Move to ../libgcc/config/sh/lib1funcs.S.
	* config/sh/lib1funcs.h: Move to ../libgcc/config/sh.
	* config/sh/sh.h: Update lib1funcs.asm filename.
	* config/sh/t-linux (LIB1ASMFUNCS_CACHE): Remove.
	* config/sh/t-netbsd: Likewise.
	* config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE):
	Remove.
	* config/sh/t-sh64 (LIB1ASMFUNCS): Remove.
	* config/sparc/lb1spc.asm: Move to ../libgcc/config/sparc/lb1spc.S.
	* config/sparc/lb1spl.asm: Remove.
	* config/sparc/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
	* config/sparc/t-leon: Likewise.
	* config/spu/t-spu-elf (LIBGCC1, CROSS_LIBGCC1): Remove.
	* config/v850/lib1funcs.asm: Move to ../libgcc/config/v850/lib1funcs.S.
	* config/v850/t-v850 (LIB1ASMSRC, LIB1ASMFUNCS): Remove
	* config/vax/lib1funcs.asm: Move to ../libgcc/config/vax/lib1funcs.S.
	* config/vax/t-linux: Remove.
	* config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S: Move to
	../libgcc/config/xtensa.
	* config/xtensa/lib1funcs.asm: Move to
	../libgcc/config/xtensa/lib1funcs.S.
	* config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
	* config.gcc (bfin*-rtems*): Remove bfin/t-bfin from tmake_file.
	(bfin*-*): Likewise.
	(mips64*-*-linux*, mipsisa64*-*-linux*): Remove
	mips/t-libgcc-mips16 from tmake_file.
	(mips*-*-linux*): Likewise.
	(mips*-sde-elf*): Likewise.
	(mipsisa32-*-elf*, mipsisa32el-*-elf*, mipsisa32r2-*-elf*)
	(mipsisa32r2el-*-elf*, mipsisa64-*-elf*, mipsisa64el-*-elf*)
	(mipsisa64r2-*-elf*, mipsisa64r2el-*-elf*): Likewise.
	(mipsisa64sb1-*-elf*, mipsisa64sb1el-*-elf*): Likewise.
	(mips-*-elf*, mipsel-*-elf*): Likewise.
	(mips64-*-elf*, mips64el-*-elf*): Likewise.
	(mips64orion-*-elf*, mips64orionel-*-elf*): Likewise.
	(mips*-*-rtems*): Likewise.
	(mipstx39-*-elf*, mipstx39el-*-elf*): Likewise.
	(vax-*-linux*): Remove vax/t-linux from tmake_file.

	libgcc:
	* Makefile.in ($(lib1asmfuncs-o), $(lib1asmfuncs-s-o)): Use
	$(srcdir) to refer to $(LIB1ASMSRC).
	Use $<.
	* config/arm/bpabi-v6m.S, config/arm/bpabi.S,
	config/arm/ieee754-df.S, config/arm/ieee754-sf.S,
	config/arm/lib1funcs.S: New files.
	* config/arm/libunwind.S [!__symbian__]: Use lib1funcs.S.
	* config/arm/t-arm: New file.
	* config/arm/t-bpabi (LIB1ASMFUNCS): Set.
	* config/arm/t-elf, config/arm/t-linux, config/arm/t-linux-eabi,
	config/arm/t-strongarm-elf: New files.
	* config/arm/t-symbian (LIB1ASMFUNCS): Set.
	* config/arm/t-vxworks, config/arm/t-wince-pe: New files.
	* config/avr/lib1funcs.S: New file.
	* config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Set.
	* config/bfin/lib1funcs.S, config/bfin/t-bfin: New files.
	* config/c6x/lib1funcs.S: New file.
	* config/c6x/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Set.
	* config/fr30/lib1funcs.S, config/fr30/t-fr30: New files.
	* config/frv/lib1funcs.S: New file.
	* config/frv/t-frv (LIB1ASMSRC, LIB1ASMFUNCS): Set.
	* config/h8300/lib1funcs.S, config/h8300/t-h8300: New files.
	* config/i386/cygwin.S, config/i386/t-chkstk: New files.
	* config/ia64/__divxf3.asm: Rename to ...
	* config/ia64/__divxf3.S: ... this.
	Adapt lib1funcs.asm filename.
	* config/ia64/_fixtfdi.asm: Rename to ...
	* config/ia64/_fixtfdi.S: ... this.
	Adapt lib1funcs.asm filename.
	* config/ia64/_fixunstfdi.asm: Rename to ...
	* config/ia64/_fixunstfdi.S: ... this.
	Adapt lib1funcs.asm filename.
	* config/ia64/_floatditf.asm: Rename to ...
	* config/ia64/_floatditf.S: ... this.
	Adapt lib1funcs.asm filename.
	* config/ia64/lib1funcs.S: New file.
	* config/ia64/t-hpux (LIB1ASMFUNCS): Set.
	* config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Set.
	* config/ia64/t-softfp-compat (libgcc1-tf-compats): Adapt suffix.
	* config/m32c/lib1funcs.S, config/m32c/t-m32c: New files.
	* config/m68k/lb1sf68.S, config/m68k/t-floatlib: New files.
	* config/mcore/lib1funcs.S, config/mcore/t-mcore: New files.
	* config/mep/lib1funcs.S: New file.
	* config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Set.
	* config/mips/mips16.S: New file.
	* config/mips/t-mips16 (LIB1ASMSRC, LIB1ASMFUNCS): Set.
	* config/pa/milli64.S: New file.
	* config/pa/t-linux, config/pa/t-linux64: New files.
	* config/picochip/lib1funcs.S: New file.
	* config/picochip/t-picochip (LIB1ASMSRC, LIB1ASMFUNCS): Set.
	* config/sh/lib1funcs.S, config/sh/lib1funcs.h: New files.
	* config/sh/t-linux (LIB1ASMFUNCS_CACHE): Set.
	* config/sh/t-netbsd: New file.
	* config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE): Set.
	Use $(srcdir) to refer to lib1funcs.S, adapt filename.
	* config/sh/t-sh64: New file.
	* config/sparc/lb1spc.S: New file.
	* config/sparc/t-softmul (LIB1ASMSRC): Adapt sparc/lb1spc.asm
	filename.
	* config/v850/lib1funcs.S, config/v850/t-v850: New files.
	* config/vax/lib1funcs.S, config/vax/t-linux: New files.
	* config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S,
	config/xtensa/lib1funcs.S: New files.
	* config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Set.
	* config.host (arm-wrs-vxworks): Add arm/t-arm, arm/t-vxworks to
	tmake_file.
	(arm*-*-freebsd*): Add arm/t-arm, arm/t-strongarm-elf to tmake_file.
	(arm*-*-netbsdelf*): Add arm/t-arm to tmake_file.
	(arm*-*-linux*): Likewise.
	Add arm/t-elf, arm/t-bpabi, arm/t-linux-eabi to tmake_file for
	arm*-*-linux-*eabi, add arm/t-linux otherwise.
	(arm*-*-uclinux*): Add arm/t-arm, arm/t-elf to tmake_file.
	(arm*-*-ecos-elf): Likewise.
	(arm*-*-eabi*, arm*-*-symbianelf*): Likewise.
	(arm*-*-rtems*): Likewise.
	(arm*-*-elf): Likewise.
	(arm*-wince-pe*): Add arm/t-arm, arm/t-wince-pe to tmake_file.
	(avr-*-rtems*): Add to tmake_file, add avr/t-avr.
	(bfin*-elf*): Add bfin/t-bfin to tmake_file.
	(bfin*-uclinux*): Likewise.
	(bfin*-linux-uclibc*): Likewise.
	(bfin*-rtems*): Likewise.
	(bfin*-*): Likewise.
	(fido-*-elf): Merge into m68k-*-elf*.
	(fr30-*-elf)): Add fr30/t-fr30 to tmake_file.
	(frv-*-*linux*): Add frv/t-frv to tmake_file.
	(h8300-*-rtems*): Add h8300/t-h8300 to tmake_file.
	(h8300-*-elf*): Likewise.
	(hppa*64*-*-linux*): Add pa/t-linux, pa/t-linux64 to tmake_file.
	(hppa*-*-linux*): Add pa/t-linux to tmake_file.
	(i[34567]86-*-cygwin*): Add i386/t-chkstk to tmake_file.
	(i[34567]86-*-mingw*): Likewise.
	(x86_64-*-mingw*): Likewise.
	(i[34567]86-*-interix3*): Likewise.
	(ia64*-*-hpux*): Add ia64/t-ia64, ia64/t-hpux to tmake_file.
	(ia64-hp-*vms*): Add ia64/t-ia64 to tmake_file.
	(m68k-*-elf*): Also handle fido-*-elf.
	Add m68k/t-floatlib to tmake_file.
	(m68k-*-uclinux*): Add m68k/t-floatlib to tmake_file.
	(m68k-*-linux*): Likewise.
	(m68k-*-rtems*): Likewise.
	(mcore-*-elf): Add mcore/t-mcore to tmake_file.
	(sh-*-elf*, sh[12346l]*-*-elf*): Add sh/t-sh64 to tmake_file for
	sh64*-*-*.
	(sh-*-linux*, sh[2346lbe]*-*-linux*): Add sh/t-sh to tmake_file.
	Add sh/t-sh64 to tmake_file for sh64*-*-linux*.
	(sh-*-netbsdelf*, shl*-*-netbsdelf*, sh5-*-netbsd*)
	(sh5l*-*-netbsd*, sh64-*-netbsd*, sh64l*-*-netbsd*): Add sh/t-sh,
	sh/t-netbsd to tmake_file.
	Add sh/t-sh64 to tmake_file for sh5*-*-netbsd*, sh64*-netbsd*.
	(sh-*-rtems*): Add sh/t-sh to tmake_file.
	(sh-wrs-vxworks): Likewise.
	(sparc-*-linux*): Add sparc/t-softmul to tmake_file except for
	*-leon[3-9]*.
	(v850*-*-*): Add v850/t-v850 to tmake_file.
	(vax-*-linux*): Add vax/t-linux to tmake_file.
	(m32c-*-elf*, m32c-*-rtems*): Add m32c/t-m32c to tmake_file.

From-SVN: r180773
---

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5c3a91da561..071cce6c29c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,109 @@
+2011-11-02  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
+
+	* Makefile.in (LIB1ASMSRC): Don't export.
+	(libgcc.mvars): Don't emit LIB1ASMFUNCS, LIB1ASMSRC.
+	* config/arm/arm.c: Update lib1funcs.asm filename.
+	* config/arm/linux-eabi.h: Likewise.
+	* config/arm/bpabi-v6m.S, config/arm/bpabi.S,
+	config/arm/ieee754-df.S, config/arm/ieee754-sf.S: Move to
+	../libgcc/config/arm.
+	* config/arm/lib1funcs.asm: Move to ../libgcc/config/arm/lib1funcs.S.
+	* config/arm/t-arm (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+	* config/arm/t-arm-elf (LIB1ASMFUNCS): Remove.
+	* config/arm/t-bpabi: Likewise.
+	* config/arm/t-linux (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+	* config/arm/t-linux-eabi (LIB1ASMFUNCS): Remove.
+	* config/arm/t-strongarm-elf: Likewise.
+	* config/arm/t-symbian: Likewise.
+	* config/arm/t-vxworks: Likewise.
+	* config/arm/t-wince-pe: Likewise.
+	* config/avr/libgcc.S: Move to ../libgcc/config/avr.
+	* config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+	* config/bfin/lib1funcs.asm: Move to
+	../libgcc/config/bfin/lib1funcs.S.
+	* config/bfin/t-bfin: Remove.
+	* config/bfin/t-bfin-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+	* config/bfin/t-bfin-linux: Likewise.
+	* config/bfin/t-bfin-uclinux: Likewise.
+	* config/c6x/lib1funcs.asm: Move to
+	../libgcc/config/c6x/lib1funcs.S.
+	* config/c6x/t-c6x-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+	* config/fr30/lib1funcs.asm: Move to
+	../libgcc/config/fr30/lib1funcs.S.
+	* config/fr30/t-fr30 (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+	* config/frv/lib1funcs.asm: Move to
+	../libgcc/config/frv/lib1funcs.S.
+	* config/frv/t-frv (CROSS_LIBGCC1, LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+	* config/h8300/fixunssfsi.c: Update lib1funcs.asm filename.
+	* config/h8300/lib1funcs.asm: Move to
+	../libgcc/config/h8300/lib1funcs.S.
+	* config/h8300/t-h8300 (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+	* config/i386/cygwin.asm: Move to ../libgcc/config/i386/cygwin.S.
+	* config/i386/t-cygming (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+	* config/i386/t-interix: Likewise.
+	* config/ia64/lib1funcs.asm: Move to
+	../libgcc/config/ia64/lib1funcs.S.
+	* config/ia64/t-hpux (LIB1ASMFUNCS, LIBGCC1_TEST): Remove.
+	* config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+	* config/iq2000/t-iq2000 (LIBGCC1, CROSS_LIBGCC1): Remove.
+	* config/m32c/m32c.c: Update m32c-lib1.S filename.
+	* config/m32c/m32c-lib1.S: Move to ../libgcc/config/m32c/lib1funcs.S.
+	* config/m32c/t-m32c (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+	* config/m32r/t-linux (CROSS_LIBGCC1, LIBGCC1, LIBGCC1_TEST): Remove.
+	* config/m68k/lb1sf68.asm: Move to ../libgcc/config/m68k/lb1sf68.S.
+	* config/m68k/t-floatlib (LIB1ASMSRC, LIB1ASMFUNCS): New file.
+	* config/mcore/lib1.asm: Move to ../libgcc/config/mcore/lib1funcs.S.
+	* config/mcore/t-mcore (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+	* config/mep/mep-lib1.asm: Move to ../libgcc/config/mep/lib1funcs.S.
+	* config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+	* config/mips/mips16.S: Move to ../libgcc/config/mips.
+	* config/mips/t-libgcc-mips16: Remove.
+	* config/mips/t-sr71k (LIBGCC1, CROSS_LIBGCC1): Remove.
+	* config/pa/milli64.S: Move to ../libgcc/config/pa.
+	* config/pa/t-linux (LIB1ASMFUNCS, LIB1ASMSRC): Remove.
+	* config/pa/t-linux64: Likewise.
+	* config/picochip/libgccExtras/fake_libgcc.asm: Move to
+	../libgcc/config/picochip/lib1funcs.S.
+	* config/picochip/t-picochip (LIB1ASMFUNCS, LIB1ASMSRC): Remove.
+	* config/sh/lib1funcs.asm: Move to ../libgcc/config/sh/lib1funcs.S.
+	* config/sh/lib1funcs.h: Move to ../libgcc/config/sh.
+	* config/sh/sh.h: Update lib1funcs.asm filename.
+	* config/sh/t-linux (LIB1ASMFUNCS_CACHE): Remove.
+	* config/sh/t-netbsd: Likewise.
+	* config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE):
+	Remove.
+	* config/sh/t-sh64 (LIB1ASMFUNCS): Remove.
+	* config/sparc/lb1spc.asm: Move to ../libgcc/config/sparc/lb1spc.S.
+	* config/sparc/lb1spl.asm: Remove.
+	* config/sparc/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+	* config/sparc/t-leon: Likewise.
+	* config/spu/t-spu-elf (LIBGCC1, CROSS_LIBGCC1): Remove.
+	* config/v850/lib1funcs.asm: Move to ../libgcc/config/v850/lib1funcs.S.
+	* config/v850/t-v850 (LIB1ASMSRC, LIB1ASMFUNCS): Remove
+	* config/vax/lib1funcs.asm: Move to ../libgcc/config/vax/lib1funcs.S.
+	* config/vax/t-linux: Remove.
+	* config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S: Move to
+	../libgcc/config/xtensa.
+	* config/xtensa/lib1funcs.asm: Move to
+	../libgcc/config/xtensa/lib1funcs.S.
+	* config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+	* config.gcc (bfin*-rtems*): Remove bfin/t-bfin from tmake_file.
+	(bfin*-*): Likewise.
+	(mips64*-*-linux*, mipsisa64*-*-linux*): Remove
+	mips/t-libgcc-mips16 from tmake_file.
+	(mips*-*-linux*): Likewise.
+	(mips*-sde-elf*): Likewise.
+	(mipsisa32-*-elf*, mipsisa32el-*-elf*, mipsisa32r2-*-elf*)
+	(mipsisa32r2el-*-elf*, mipsisa64-*-elf*, mipsisa64el-*-elf*)
+	(mipsisa64r2-*-elf*, mipsisa64r2el-*-elf*): Likewise.
+	(mipsisa64sb1-*-elf*, mipsisa64sb1el-*-elf*): Likewise.
+	(mips-*-elf*, mipsel-*-elf*): Likewise.
+	(mips64-*-elf*, mips64el-*-elf*): Likewise.
+	(mips64orion-*-elf*, mips64orionel-*-elf*): Likewise.
+	(mips*-*-rtems*): Likewise.
+	(mipstx39-*-elf*, mipstx39el-*-elf*): Likewise.
+	(vax-*-linux*): Remove vax/t-linux from tmake_file.
+
 2011-11-02  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
 
 	* config.gcc (extra_parts): Remove.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index b6951dc1486..38449d7c30e 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1110,7 +1110,6 @@ export DESTDIR
 export GCC_FOR_TARGET
 export INCLUDES
 export INSTALL_DATA
-export LIB1ASMSRC
 export LIBGCC2_CFLAGS
 export LIPO_FOR_TARGET
 export MACHMODE_H
@@ -1878,8 +1877,6 @@ libgcc-support: libgcc.mvars stmp-int-hdrs $(TCONFIG_H) \
 libgcc.mvars: config.status Makefile $(LIB2ADD) $(LIB2ADD_ST) specs \
 		xgcc$(exeext)
 	: > tmp-libgcc.mvars
-	echo LIB1ASMFUNCS = '$(LIB1ASMFUNCS)' >> tmp-libgcc.mvars
-	echo LIB1ASMSRC = '$(LIB1ASMSRC)' >> tmp-libgcc.mvars
 	echo LIB2FUNCS_ST = '$(LIB2FUNCS_ST)' >> tmp-libgcc.mvars
 	echo LIB2FUNCS_EXCLUDE = '$(LIB2FUNCS_EXCLUDE)' >> tmp-libgcc.mvars
 	echo LIB2ADD = '$(call srcdirify,$(LIB2ADD))' >> tmp-libgcc.mvars
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 6bbec7db39e..79230a6935a 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -950,11 +950,10 @@ bfin*-linux-uclibc*)
 	;;
 bfin*-rtems*)
 	tm_file="${tm_file} dbxelf.h elfos.h bfin/elf.h bfin/rtems.h rtems.h newlib-stdint.h"
-	tmake_file="bfin/t-bfin t-rtems bfin/t-rtems"
+	tmake_file="t-rtems bfin/t-rtems"
 	;;
 bfin*-*)
 	tm_file="${tm_file} dbxelf.h elfos.h newlib-stdint.h bfin/elf.h"
-	tmake_file=bfin/t-bfin
 	use_collect2=no
 	use_gcc_stdint=wrap
 	;;
@@ -1737,7 +1736,7 @@ mips*-*-netbsd*)			# NetBSD/mips, either endian.
 	;;
 mips64*-*-linux* | mipsisa64*-*-linux*)
 	tm_file="dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h ${tm_file} mips/gnu-user.h mips/gnu-user64.h mips/linux64.h"
-	tmake_file="${tmake_file} mips/t-linux64 mips/t-libgcc-mips16"
+	tmake_file="${tmake_file} mips/t-linux64"
 	tm_defines="${tm_defines} MIPS_ABI_DEFAULT=ABI_N32"
 	case ${target} in
 		mips64el-st-linux-gnu)
@@ -1758,7 +1757,6 @@ mips64*-*-linux* | mipsisa64*-*-linux*)
 	;;
 mips*-*-linux*)				# Linux MIPS, either endian.
         tm_file="dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h ${tm_file} mips/gnu-user.h mips/linux.h"
-	tmake_file="${tmake_file} mips/t-libgcc-mips16"
 	if test x$enable_targets = xall; then
 		tm_file="${tm_file} mips/gnu-user64.h mips/linux64.h"
 		tmake_file="${tmake_file} mips/t-linux64"
@@ -1785,7 +1783,7 @@ mips*-*-openbsd*)
 	;;
 mips*-sde-elf*)
 	tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h mips/sde.h"
-	tmake_file="mips/t-sde mips/t-libgcc-mips16"
+	tmake_file="mips/t-sde"
 	extra_options="${extra_options} mips/sde.opt"
 	case "${with_newlib}" in
 	  yes)
@@ -1822,7 +1820,7 @@ mipsisa32r2-*-elf* | mipsisa32r2el-*-elf* | \
 mipsisa64-*-elf* | mipsisa64el-*-elf* | \
 mipsisa64r2-*-elf* | mipsisa64r2el-*-elf*)
 	tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h"
-	tmake_file="mips/t-isa3264 mips/t-libgcc-mips16"
+	tmake_file="mips/t-isa3264"
 	case ${target} in
 	  mipsisa32r2*)
 	    tm_defines="${tm_defines} MIPS_ISA_DEFAULT=33"
@@ -1859,17 +1857,17 @@ mipsisa64sr71k-*-elf*)
         ;;
 mipsisa64sb1-*-elf* | mipsisa64sb1el-*-elf*)
 	tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h"
-	tmake_file="mips/t-elf mips/t-libgcc-mips16 mips/t-sb1"
+	tmake_file="mips/t-elf mips/t-sb1"
 	target_cpu_default="MASK_64BIT|MASK_FLOAT64"
 	tm_defines="${tm_defines} MIPS_ISA_DEFAULT=64 MIPS_CPU_STRING_DEFAULT=\\\"sb1\\\" MIPS_ABI_DEFAULT=ABI_O64"
 	;;
 mips-*-elf* | mipsel-*-elf*)
 	tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h"
-	tmake_file="mips/t-elf mips/t-libgcc-mips16"
+	tmake_file="mips/t-elf"
 	;;
 mips64-*-elf* | mips64el-*-elf*)
 	tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h"
-	tmake_file="mips/t-elf mips/t-libgcc-mips16"
+	tmake_file="mips/t-elf"
 	target_cpu_default="MASK_64BIT|MASK_FLOAT64"
 	tm_defines="${tm_defines} MIPS_ISA_DEFAULT=3 MIPS_ABI_DEFAULT=ABI_O64"
 	;;
@@ -1880,13 +1878,13 @@ mips64vr-*-elf* | mips64vrel-*-elf*)
         ;;
 mips64orion-*-elf* | mips64orionel-*-elf*)
 	tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elforion.h mips/elf.h"
-	tmake_file="mips/t-elf mips/t-libgcc-mips16"
+	tmake_file="mips/t-elf"
 	target_cpu_default="MASK_64BIT|MASK_FLOAT64"
 	tm_defines="${tm_defines} MIPS_ISA_DEFAULT=3 MIPS_ABI_DEFAULT=ABI_O64"
 	;;
 mips*-*-rtems*)
 	tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h mips/rtems.h rtems.h"
-	tmake_file="mips/t-elf mips/t-libgcc-mips16 t-rtems mips/t-rtems"
+	tmake_file="mips/t-elf t-rtems mips/t-rtems"
 	;;
 mips-wrs-vxworks)
 	tm_file="elfos.h ${tm_file} mips/elf.h vx-common.h vxworks.h mips/vxworks.h"
@@ -1894,7 +1892,7 @@ mips-wrs-vxworks)
 	;;
 mipstx39-*-elf* | mipstx39el-*-elf*)
 	tm_file="elfos.h newlib-stdint.h ${tm_file} mips/r3900.h mips/elf.h"
-	tmake_file="mips/t-r3900 mips/t-libgcc-mips16"
+	tmake_file="mips/t-r3900"
 	;;
 mmix-knuth-mmixware)
 	tm_file="${tm_file} newlib-stdint.h"
@@ -2511,7 +2509,6 @@ v850*-*-*)
 vax-*-linux*)
 	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h vax/elf.h vax/linux.h"
 	extra_options="${extra_options} vax/elf.opt"
-	tmake_file="${tmake_file} vax/t-linux"
 	;;
 vax-*-netbsdelf*)
 	tm_file="${tm_file} elfos.h netbsd.h netbsd-elf.h vax/elf.h vax/netbsd-elf.h"
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index e07c8c328c6..5f0d5629462 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -23495,7 +23495,7 @@ arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
 
 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
    ARM insns and therefore guarantee that the shift count is modulo 256.
-   DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
+   DImode shifts (those implemented by lib1funcs.S or by optabs.c)
    guarantee no particular behavior for out-of-range counts.  */
 
 static unsigned HOST_WIDE_INT
diff --git a/gcc/config/arm/bpabi-v6m.S b/gcc/config/arm/bpabi-v6m.S
deleted file mode 100644
index 4ecea6da5a6..00000000000
--- a/gcc/config/arm/bpabi-v6m.S
+++ /dev/null
@@ -1,318 +0,0 @@
-/* Miscellaneous BPABI functions.  ARMv6M implementation
-
-   Copyright (C) 2006, 2008, 2009, 2010  Free Software Foundation, Inc.
-   Contributed by CodeSourcery.
-
-   This file is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by the
-   Free Software Foundation; either version 3, or (at your option) any
-   later version.
-
-   This file is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifdef __ARM_EABI__
-/* Some attributes that are common to all routines in this file.  */
-	/* Tag_ABI_align_needed: This code does not require 8-byte
-	   alignment from the caller.  */
-	/* .eabi_attribute 24, 0  -- default setting.  */
-	/* Tag_ABI_align_preserved: This code preserves 8-byte
-	   alignment in any callee.  */
-	.eabi_attribute 25, 1
-#endif /* __ARM_EABI__ */
-
-#ifdef L_aeabi_lcmp
-
-FUNC_START aeabi_lcmp
-	cmp	xxh, yyh
-	beq	1f
-	bgt	2f
-	mov	r0, #1
-	neg	r0, r0
-	RET
-2:
-	mov	r0, #1
-	RET
-1:
-	sub	r0, xxl, yyl
-	beq	1f
-	bhi	2f
-	mov	r0, #1
-	neg	r0, r0
-	RET
-2:
-	mov	r0, #1
-1:
-	RET
-	FUNC_END aeabi_lcmp
-
-#endif /* L_aeabi_lcmp */
-	
-#ifdef L_aeabi_ulcmp
-
-FUNC_START aeabi_ulcmp
-	cmp	xxh, yyh
-	bne	1f
-	sub	r0, xxl, yyl
-	beq	2f
-1:
-	bcs	1f
-	mov	r0, #1
-	neg	r0, r0
-	RET
-1:
-	mov	r0, #1
-2:
-	RET
-	FUNC_END aeabi_ulcmp
-
-#endif /* L_aeabi_ulcmp */
-
-.macro test_div_by_zero signed
-	cmp	yyh, #0
-	bne	7f
-	cmp	yyl, #0
-	bne	7f
-	cmp	xxh, #0
-	bne	2f
-	cmp	xxl, #0
-2:
-	.ifc	\signed, unsigned
-	beq	3f
-	mov	xxh, #0
-	mvn	xxh, xxh		@ 0xffffffff
-	mov	xxl, xxh
-3:
-	.else
-	beq	5f
-	blt	6f
-	mov	xxl, #0
-	mvn	xxl, xxl		@ 0xffffffff
-	lsr	xxh, xxl, #1		@ 0x7fffffff
-	b	5f
-6:	mov	xxh, #0x80
-	lsl	xxh, xxh, #24		@ 0x80000000
-	mov	xxl, #0
-5:
-	.endif
-	@ tailcalls are tricky on v6-m.
-	push	{r0, r1, r2}
-	ldr	r0, 1f
-	adr	r1, 1f
-	add	r0, r1
-	str	r0, [sp, #8]
-	@ We know we are not on armv4t, so pop pc is safe.
-	pop	{r0, r1, pc}
-	.align	2
-1:
-	.word	__aeabi_ldiv0 - 1b
-7:
-.endm
-
-#ifdef L_aeabi_ldivmod
-
-FUNC_START aeabi_ldivmod
-	test_div_by_zero signed
-
-	push {r0, r1}
-	mov r0, sp
-	push {r0, lr}
-	ldr r0, [sp, #8]
-	bl SYM(__gnu_ldivmod_helper)
-	ldr r3, [sp, #4]
-	mov lr, r3
-	add sp, sp, #8
-	pop {r2, r3}
-	RET
-	FUNC_END aeabi_ldivmod
-
-#endif /* L_aeabi_ldivmod */
-
-#ifdef L_aeabi_uldivmod
-
-FUNC_START aeabi_uldivmod
-	test_div_by_zero unsigned
-
-	push {r0, r1}
-	mov r0, sp
-	push {r0, lr}
-	ldr r0, [sp, #8]
-	bl SYM(__gnu_uldivmod_helper)
-	ldr r3, [sp, #4]
-	mov lr, r3
-	add sp, sp, #8
-	pop {r2, r3}
-	RET
-	FUNC_END aeabi_uldivmod
-	
-#endif /* L_aeabi_uldivmod */
-
-#ifdef L_arm_addsubsf3
-
-FUNC_START aeabi_frsub
-
-      push	{r4, lr}
-      mov	r4, #1
-      lsl	r4, #31
-      eor	r0, r0, r4
-      bl	__aeabi_fadd
-      pop	{r4, pc}
-
-      FUNC_END aeabi_frsub
-
-#endif /* L_arm_addsubsf3 */
-
-#ifdef L_arm_cmpsf2
-
-FUNC_START aeabi_cfrcmple
-
-	mov	ip, r0
-	mov	r0, r1
-	mov	r1, ip
-	b	6f
-
-FUNC_START aeabi_cfcmpeq
-FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
-
-	@ The status-returning routines are required to preserve all
-	@ registers except ip, lr, and cpsr.
-6:	push	{r0, r1, r2, r3, r4, lr}
-	bl	__lesf2
-	@ Set the Z flag correctly, and the C flag unconditionally.
-	cmp	r0, #0
-	@ Clear the C flag if the return value was -1, indicating
-	@ that the first operand was smaller than the second.
-	bmi 1f
-	mov	r1, #0
-	cmn	r0, r1
-1:
-	pop	{r0, r1, r2, r3, r4, pc}
-
-	FUNC_END aeabi_cfcmple
-	FUNC_END aeabi_cfcmpeq
-	FUNC_END aeabi_cfrcmple
-
-FUNC_START	aeabi_fcmpeq
-
-	push	{r4, lr}
-	bl	__eqsf2
-	neg	r0, r0
-	add	r0, r0, #1
-	pop	{r4, pc}
-
-	FUNC_END aeabi_fcmpeq
-
-.macro COMPARISON cond, helper, mode=sf2
-FUNC_START	aeabi_fcmp\cond
-
-	push	{r4, lr}
-	bl	__\helper\mode
-	cmp	r0, #0
-	b\cond	1f
-	mov	r0, #0
-	pop	{r4, pc}
-1:
-	mov	r0, #1
-	pop	{r4, pc}
-
-	FUNC_END aeabi_fcmp\cond
-.endm
-
-COMPARISON lt, le
-COMPARISON le, le
-COMPARISON gt, ge
-COMPARISON ge, ge
-
-#endif /* L_arm_cmpsf2 */
-
-#ifdef L_arm_addsubdf3
-
-FUNC_START aeabi_drsub
-
-      push	{r4, lr}
-      mov	r4, #1
-      lsl	r4, #31
-      eor	xxh, xxh, r4
-      bl	__aeabi_dadd
-      pop	{r4, pc}
-
-      FUNC_END aeabi_drsub
-
-#endif /* L_arm_addsubdf3 */
-
-#ifdef L_arm_cmpdf2
-
-FUNC_START aeabi_cdrcmple
-
-	mov	ip, r0
-	mov	r0, r2
-	mov	r2, ip
-	mov	ip, r1
-	mov	r1, r3
-	mov	r3, ip
-	b	6f
-
-FUNC_START aeabi_cdcmpeq
-FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
-
-	@ The status-returning routines are required to preserve all
-	@ registers except ip, lr, and cpsr.
-6:	push	{r0, r1, r2, r3, r4, lr}
-	bl	__ledf2
-	@ Set the Z flag correctly, and the C flag unconditionally.
-	cmp	r0, #0
-	@ Clear the C flag if the return value was -1, indicating
-	@ that the first operand was smaller than the second.
-	bmi 1f
-	mov	r1, #0
-	cmn	r0, r1
-1:
-	pop	{r0, r1, r2, r3, r4, pc}
-
-	FUNC_END aeabi_cdcmple
-	FUNC_END aeabi_cdcmpeq
-	FUNC_END aeabi_cdrcmple
-
-FUNC_START	aeabi_dcmpeq
-
-	push	{r4, lr}
-	bl	__eqdf2
-	neg	r0, r0
-	add	r0, r0, #1
-	pop	{r4, pc}
-
-	FUNC_END aeabi_dcmpeq
-
-.macro COMPARISON cond, helper, mode=df2
-FUNC_START	aeabi_dcmp\cond
-
-	push	{r4, lr}
-	bl	__\helper\mode
-	cmp	r0, #0
-	b\cond	1f
-	mov	r0, #0
-	pop	{r4, pc}
-1:
-	mov	r0, #1
-	pop	{r4, pc}
-
-	FUNC_END aeabi_dcmp\cond
-.endm
-
-COMPARISON lt, le
-COMPARISON le, le
-COMPARISON gt, ge
-COMPARISON ge, ge
-
-#endif /* L_arm_cmpdf2 */
diff --git a/gcc/config/arm/bpabi.S b/gcc/config/arm/bpabi.S
deleted file mode 100644
index 2ff338927fa..00000000000
--- a/gcc/config/arm/bpabi.S
+++ /dev/null
@@ -1,163 +0,0 @@
-/* Miscellaneous BPABI functions.
-
-   Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010
-   Free Software Foundation, Inc.
-   Contributed by CodeSourcery, LLC.
-
-   This file is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by the
-   Free Software Foundation; either version 3, or (at your option) any
-   later version.
-
-   This file is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifdef __ARM_EABI__
-/* Some attributes that are common to all routines in this file.  */
-	/* Tag_ABI_align_needed: This code does not require 8-byte
-	   alignment from the caller.  */
-	/* .eabi_attribute 24, 0  -- default setting.  */
-	/* Tag_ABI_align_preserved: This code preserves 8-byte
-	   alignment in any callee.  */
-	.eabi_attribute 25, 1
-#endif /* __ARM_EABI__ */
-
-#ifdef L_aeabi_lcmp
-
-ARM_FUNC_START aeabi_lcmp
-	cmp	xxh, yyh
-	do_it	lt
-	movlt	r0, #-1
-	do_it	gt
-	movgt	r0, #1
-	do_it	ne
-	RETc(ne)
-	subs	r0, xxl, yyl
-	do_it	lo
-	movlo	r0, #-1
-	do_it	hi
-	movhi	r0, #1
-	RET
-	FUNC_END aeabi_lcmp
-
-#endif /* L_aeabi_lcmp */
-	
-#ifdef L_aeabi_ulcmp
-
-ARM_FUNC_START aeabi_ulcmp
-	cmp	xxh, yyh
-	do_it	lo
-	movlo	r0, #-1
-	do_it	hi
-	movhi	r0, #1
-	do_it	ne
-	RETc(ne)
-	cmp	xxl, yyl
-	do_it	lo
-	movlo	r0, #-1
-	do_it	hi
-	movhi	r0, #1
-	do_it	eq
-	moveq	r0, #0
-	RET
-	FUNC_END aeabi_ulcmp
-
-#endif /* L_aeabi_ulcmp */
-
-.macro test_div_by_zero signed
-/* Tail-call to divide-by-zero handlers which may be overridden by the user,
-   so unwinding works properly.  */
-#if defined(__thumb2__)
-	cbnz	yyh, 1f
-	cbnz	yyl, 1f
-	cmp	xxh, #0
-	do_it	eq
-	cmpeq	xxl, #0
-	.ifc \signed, unsigned
-	beq	2f
-	mov	xxh, #0xffffffff
-	mov	xxl, xxh
-2:
-	.else
-	do_it	lt, t
-	movlt	xxl, #0
-	movlt	xxh, #0x80000000
-	do_it	gt, t
-	movgt	xxh, #0x7fffffff
-	movgt	xxl, #0xffffffff
-	.endif
-	b	SYM (__aeabi_ldiv0) __PLT__
-1:
-#else
-	/* Note: Thumb-1 code calls via an ARM shim on processors which
-	   support ARM mode.  */
-	cmp	yyh, #0
-	cmpeq	yyl, #0
-	bne	2f
-	cmp	xxh, #0
-	cmpeq	xxl, #0
-	.ifc \signed, unsigned
-	movne	xxh, #0xffffffff
-	movne	xxl, #0xffffffff
-	.else
-	movlt	xxh, #0x80000000
-	movlt	xxl, #0
-	movgt	xxh, #0x7fffffff
-	movgt	xxl, #0xffffffff
-	.endif
-	b	SYM (__aeabi_ldiv0) __PLT__
-2:
-#endif
-.endm
-
-#ifdef L_aeabi_ldivmod
-
-ARM_FUNC_START aeabi_ldivmod
-	test_div_by_zero signed
-
-	sub sp, sp, #8
-#if defined(__thumb2__)
-	mov ip, sp
-	push {ip, lr}
-#else
-	do_push {sp, lr}
-#endif
-	bl SYM(__gnu_ldivmod_helper) __PLT__
-	ldr lr, [sp, #4]
-	add sp, sp, #8
-	do_pop {r2, r3}
-	RET
-	
-#endif /* L_aeabi_ldivmod */
-
-#ifdef L_aeabi_uldivmod
-
-ARM_FUNC_START aeabi_uldivmod
-	test_div_by_zero unsigned
-
-	sub sp, sp, #8
-#if defined(__thumb2__)
-	mov ip, sp
-	push {ip, lr}
-#else
-	do_push {sp, lr}
-#endif
-	bl SYM(__gnu_uldivmod_helper) __PLT__
-	ldr lr, [sp, #4]
-	add sp, sp, #8
-	do_pop {r2, r3}
-	RET
-	
-#endif /* L_aeabi_divmod */
-	
diff --git a/gcc/config/arm/ieee754-df.S b/gcc/config/arm/ieee754-df.S
deleted file mode 100644
index eb0c38632d0..00000000000
--- a/gcc/config/arm/ieee754-df.S
+++ /dev/null
@@ -1,1447 +0,0 @@
-/* ieee754-df.S double-precision floating point support for ARM
-
-   Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009  Free Software Foundation, Inc.
-   Contributed by Nicolas Pitre (nico@cam.org)
-
-   This file is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by the
-   Free Software Foundation; either version 3, or (at your option) any
-   later version.
-
-   This file is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/*
- * Notes: 
- * 
- * The goal of this code is to be as fast as possible.  This is
- * not meant to be easy to understand for the casual reader.
- * For slightly simpler code please see the single precision version
- * of this file.
- * 
- * Only the default rounding mode is intended for best performances.
- * Exceptions aren't supported yet, but that can be added quite easily
- * if necessary without impacting performances.
- */
-
-
-@ For FPA, float words are always big-endian.
-@ For VFP, floats words follow the memory system mode.
-#if defined(__VFP_FP__) && !defined(__ARMEB__)
-#define xl r0
-#define xh r1
-#define yl r2
-#define yh r3
-#else
-#define xh r0
-#define xl r1
-#define yh r2
-#define yl r3
-#endif
-
-
-#ifdef L_arm_negdf2
-
-ARM_FUNC_START negdf2
-ARM_FUNC_ALIAS aeabi_dneg negdf2
-
-	@ flip sign bit
-	eor	xh, xh, #0x80000000
-	RET
-
-	FUNC_END aeabi_dneg
-	FUNC_END negdf2
-
-#endif
-
-#ifdef L_arm_addsubdf3
-
-ARM_FUNC_START aeabi_drsub
-
-	eor	xh, xh, #0x80000000	@ flip sign bit of first arg
-	b	1f	
-
-ARM_FUNC_START subdf3
-ARM_FUNC_ALIAS aeabi_dsub subdf3
-
-	eor	yh, yh, #0x80000000	@ flip sign bit of second arg
-#if defined(__INTERWORKING_STUBS__)
-	b	1f			@ Skip Thumb-code prologue
-#endif
-
-ARM_FUNC_START adddf3
-ARM_FUNC_ALIAS aeabi_dadd adddf3
-
-1:	do_push	{r4, r5, lr}
-
-	@ Look for zeroes, equal values, INF, or NAN.
-	shift1	lsl, r4, xh, #1
-	shift1	lsl, r5, yh, #1
-	teq	r4, r5
-	do_it	eq
-	teqeq	xl, yl
-	do_it	ne, ttt
-	COND(orr,s,ne)	ip, r4, xl
-	COND(orr,s,ne)	ip, r5, yl
-	COND(mvn,s,ne)	ip, r4, asr #21
-	COND(mvn,s,ne)	ip, r5, asr #21
-	beq	LSYM(Lad_s)
-
-	@ Compute exponent difference.  Make largest exponent in r4,
-	@ corresponding arg in xh-xl, and positive exponent difference in r5.
-	shift1	lsr, r4, r4, #21
-	rsbs	r5, r4, r5, lsr #21
-	do_it	lt
-	rsblt	r5, r5, #0
-	ble	1f
-	add	r4, r4, r5
-	eor	yl, xl, yl
-	eor	yh, xh, yh
-	eor	xl, yl, xl
-	eor	xh, yh, xh
-	eor	yl, xl, yl
-	eor	yh, xh, yh
-1:
-	@ If exponent difference is too large, return largest argument
-	@ already in xh-xl.  We need up to 54 bit to handle proper rounding
-	@ of 0x1p54 - 1.1.
-	cmp	r5, #54
-	do_it	hi
-	RETLDM	"r4, r5" hi
-
-	@ Convert mantissa to signed integer.
-	tst	xh, #0x80000000
-	mov	xh, xh, lsl #12
-	mov	ip, #0x00100000
-	orr	xh, ip, xh, lsr #12
-	beq	1f
-#if defined(__thumb2__)
-	negs	xl, xl
-	sbc	xh, xh, xh, lsl #1
-#else
-	rsbs	xl, xl, #0
-	rsc	xh, xh, #0
-#endif
-1:
-	tst	yh, #0x80000000
-	mov	yh, yh, lsl #12
-	orr	yh, ip, yh, lsr #12
-	beq	1f
-#if defined(__thumb2__)
-	negs	yl, yl
-	sbc	yh, yh, yh, lsl #1
-#else
-	rsbs	yl, yl, #0
-	rsc	yh, yh, #0
-#endif
-1:
-	@ If exponent == difference, one or both args were denormalized.
-	@ Since this is not common case, rescale them off line.
-	teq	r4, r5
-	beq	LSYM(Lad_d)
-LSYM(Lad_x):
-
-	@ Compensate for the exponent overlapping the mantissa MSB added later
-	sub	r4, r4, #1
-
-	@ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip.
-	rsbs	lr, r5, #32
-	blt	1f
-	shift1	lsl, ip, yl, lr
-	shiftop adds xl xl yl lsr r5 yl
-	adc	xh, xh, #0
-	shiftop adds xl xl yh lsl lr yl
-	shiftop adcs xh xh yh asr r5 yh
-	b	2f
-1:	sub	r5, r5, #32
-	add	lr, lr, #32
-	cmp	yl, #1
-	shift1	lsl,ip, yh, lr
-	do_it	cs
-	orrcs	ip, ip, #2		@ 2 not 1, to allow lsr #1 later
-	shiftop adds xl xl yh asr r5 yh
-	adcs	xh, xh, yh, asr #31
-2:
-	@ We now have a result in xh-xl-ip.
-	@ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above)
-	and	r5, xh, #0x80000000
-	bpl	LSYM(Lad_p)
-#if defined(__thumb2__)
-	mov	lr, #0
-	negs	ip, ip
-	sbcs	xl, lr, xl
-	sbc	xh, lr, xh
-#else
-	rsbs	ip, ip, #0
-	rscs	xl, xl, #0
-	rsc	xh, xh, #0
-#endif
-
-	@ Determine how to normalize the result.
-LSYM(Lad_p):
-	cmp	xh, #0x00100000
-	bcc	LSYM(Lad_a)
-	cmp	xh, #0x00200000
-	bcc	LSYM(Lad_e)
-
-	@ Result needs to be shifted right.
-	movs	xh, xh, lsr #1
-	movs	xl, xl, rrx
-	mov	ip, ip, rrx
-	add	r4, r4, #1
-
-	@ Make sure we did not bust our exponent.
-	mov	r2, r4, lsl #21
-	cmn	r2, #(2 << 21)
-	bcs	LSYM(Lad_o)
-
-	@ Our result is now properly aligned into xh-xl, remaining bits in ip.
-	@ Round with MSB of ip. If halfway between two numbers, round towards
-	@ LSB of xl = 0.
-	@ Pack final result together.
-LSYM(Lad_e):
-	cmp	ip, #0x80000000
-	do_it	eq
-	COND(mov,s,eq)	ip, xl, lsr #1
-	adcs	xl, xl, #0
-	adc	xh, xh, r4, lsl #20
-	orr	xh, xh, r5
-	RETLDM	"r4, r5"
-
-	@ Result must be shifted left and exponent adjusted.
-LSYM(Lad_a):
-	movs	ip, ip, lsl #1
-	adcs	xl, xl, xl
-	adc	xh, xh, xh
-	tst	xh, #0x00100000
-	sub	r4, r4, #1
-	bne	LSYM(Lad_e)
-
-	@ No rounding necessary since ip will always be 0 at this point.
-LSYM(Lad_l):
-
-#if __ARM_ARCH__ < 5
-
-	teq	xh, #0
-	movne	r3, #20
-	moveq	r3, #52
-	moveq	xh, xl
-	moveq	xl, #0
-	mov	r2, xh
-	cmp	r2, #(1 << 16)
-	movhs	r2, r2, lsr #16
-	subhs	r3, r3, #16
-	cmp	r2, #(1 << 8)
-	movhs	r2, r2, lsr #8
-	subhs	r3, r3, #8
-	cmp	r2, #(1 << 4)
-	movhs	r2, r2, lsr #4
-	subhs	r3, r3, #4
-	cmp	r2, #(1 << 2)
-	subhs	r3, r3, #2
-	sublo	r3, r3, r2, lsr #1
-	sub	r3, r3, r2, lsr #3
-
-#else
-
-	teq	xh, #0
-	do_it	eq, t
-	moveq	xh, xl
-	moveq	xl, #0
-	clz	r3, xh
-	do_it	eq
-	addeq	r3, r3, #32
-	sub	r3, r3, #11
-
-#endif
-
-	@ determine how to shift the value.
-	subs	r2, r3, #32
-	bge	2f
-	adds	r2, r2, #12
-	ble	1f
-
-	@ shift value left 21 to 31 bits, or actually right 11 to 1 bits
-	@ since a register switch happened above.
-	add	ip, r2, #20
-	rsb	r2, r2, #12
-	shift1	lsl, xl, xh, ip
-	shift1	lsr, xh, xh, r2
-	b	3f
-
-	@ actually shift value left 1 to 20 bits, which might also represent
-	@ 32 to 52 bits if counting the register switch that happened earlier.
-1:	add	r2, r2, #20
-2:	do_it	le
-	rsble	ip, r2, #32
-	shift1	lsl, xh, xh, r2
-#if defined(__thumb2__)
-	lsr	ip, xl, ip
-	itt	le
-	orrle	xh, xh, ip
-	lslle	xl, xl, r2
-#else
-	orrle	xh, xh, xl, lsr ip
-	movle	xl, xl, lsl r2
-#endif
-
-	@ adjust exponent accordingly.
-3:	subs	r4, r4, r3
-	do_it	ge, tt
-	addge	xh, xh, r4, lsl #20
-	orrge	xh, xh, r5
-	RETLDM	"r4, r5" ge
-
-	@ Exponent too small, denormalize result.
-	@ Find out proper shift value.
-	mvn	r4, r4
-	subs	r4, r4, #31
-	bge	2f
-	adds	r4, r4, #12
-	bgt	1f
-
-	@ shift result right of 1 to 20 bits, sign is in r5.
-	add	r4, r4, #20
-	rsb	r2, r4, #32
-	shift1	lsr, xl, xl, r4
-	shiftop orr xl xl xh lsl r2 yh
-	shiftop orr xh r5 xh lsr r4 yh
-	RETLDM	"r4, r5"
-
-	@ shift result right of 21 to 31 bits, or left 11 to 1 bits after
-	@ a register switch from xh to xl.
-1:	rsb	r4, r4, #12
-	rsb	r2, r4, #32
-	shift1	lsr, xl, xl, r2
-	shiftop orr xl xl xh lsl r4 yh
-	mov	xh, r5
-	RETLDM	"r4, r5"
-
-	@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
-	@ from xh to xl.
-2:	shift1	lsr, xl, xh, r4
-	mov	xh, r5
-	RETLDM	"r4, r5"
-
-	@ Adjust exponents for denormalized arguments.
-	@ Note that r4 must not remain equal to 0.
-LSYM(Lad_d):
-	teq	r4, #0
-	eor	yh, yh, #0x00100000
-	do_it	eq, te
-	eoreq	xh, xh, #0x00100000
-	addeq	r4, r4, #1
-	subne	r5, r5, #1
-	b	LSYM(Lad_x)
-
-
-LSYM(Lad_s):
-	mvns	ip, r4, asr #21
-	do_it	ne
-	COND(mvn,s,ne)	ip, r5, asr #21
-	beq	LSYM(Lad_i)
-
-	teq	r4, r5
-	do_it	eq
-	teqeq	xl, yl
-	beq	1f
-
-	@ Result is x + 0.0 = x or 0.0 + y = y.
-	orrs	ip, r4, xl
-	do_it	eq, t
-	moveq	xh, yh
-	moveq	xl, yl
-	RETLDM	"r4, r5"
-
-1:	teq	xh, yh
-
-	@ Result is x - x = 0.
-	do_it	ne, tt
-	movne	xh, #0
-	movne	xl, #0
-	RETLDM	"r4, r5" ne
-
-	@ Result is x + x = 2x.
-	movs	ip, r4, lsr #21
-	bne	2f
-	movs	xl, xl, lsl #1
-	adcs	xh, xh, xh
-	do_it	cs
-	orrcs	xh, xh, #0x80000000
-	RETLDM	"r4, r5"
-2:	adds	r4, r4, #(2 << 21)
-	do_it	cc, t
-	addcc	xh, xh, #(1 << 20)
-	RETLDM	"r4, r5" cc
-	and	r5, xh, #0x80000000
-
-	@ Overflow: return INF.
-LSYM(Lad_o):
-	orr	xh, r5, #0x7f000000
-	orr	xh, xh, #0x00f00000
-	mov	xl, #0
-	RETLDM	"r4, r5"
-
-	@ At least one of x or y is INF/NAN.
-	@   if xh-xl != INF/NAN: return yh-yl (which is INF/NAN)
-	@   if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
-	@   if either is NAN: return NAN
-	@   if opposite sign: return NAN
-	@   otherwise return xh-xl (which is INF or -INF)
-LSYM(Lad_i):
-	mvns	ip, r4, asr #21
-	do_it	ne, te
-	movne	xh, yh
-	movne	xl, yl
-	COND(mvn,s,eq)	ip, r5, asr #21
-	do_it	ne, t
-	movne	yh, xh
-	movne	yl, xl
-	orrs	r4, xl, xh, lsl #12
-	do_it	eq, te
-	COND(orr,s,eq)	r5, yl, yh, lsl #12
-	teqeq	xh, yh
-	orrne	xh, xh, #0x00080000	@ quiet NAN
-	RETLDM	"r4, r5"
-
-	FUNC_END aeabi_dsub
-	FUNC_END subdf3
-	FUNC_END aeabi_dadd
-	FUNC_END adddf3
-
-ARM_FUNC_START floatunsidf
-ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
-
-	teq	r0, #0
-	do_it	eq, t
-	moveq	r1, #0
-	RETc(eq)
-	do_push	{r4, r5, lr}
-	mov	r4, #0x400		@ initial exponent
-	add	r4, r4, #(52-1 - 1)
-	mov	r5, #0			@ sign bit is 0
-	.ifnc	xl, r0
-	mov	xl, r0
-	.endif
-	mov	xh, #0
-	b	LSYM(Lad_l)
-
-	FUNC_END aeabi_ui2d
-	FUNC_END floatunsidf
-
-ARM_FUNC_START floatsidf
-ARM_FUNC_ALIAS aeabi_i2d floatsidf
-
-	teq	r0, #0
-	do_it	eq, t
-	moveq	r1, #0
-	RETc(eq)
-	do_push	{r4, r5, lr}
-	mov	r4, #0x400		@ initial exponent
-	add	r4, r4, #(52-1 - 1)
-	ands	r5, r0, #0x80000000	@ sign bit in r5
-	do_it	mi
-	rsbmi	r0, r0, #0		@ absolute value
-	.ifnc	xl, r0
-	mov	xl, r0
-	.endif
-	mov	xh, #0
-	b	LSYM(Lad_l)
-
-	FUNC_END aeabi_i2d
-	FUNC_END floatsidf
-
-ARM_FUNC_START extendsfdf2
-ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
-
-	movs	r2, r0, lsl #1		@ toss sign bit
-	mov	xh, r2, asr #3		@ stretch exponent
-	mov	xh, xh, rrx		@ retrieve sign bit
-	mov	xl, r2, lsl #28		@ retrieve remaining bits
-	do_it	ne, ttt
-	COND(and,s,ne)	r3, r2, #0xff000000	@ isolate exponent
-	teqne	r3, #0xff000000		@ if not 0, check if INF or NAN
-	eorne	xh, xh, #0x38000000	@ fixup exponent otherwise.
-	RETc(ne)			@ and return it.
-
-	teq	r2, #0			@ if actually 0
-	do_it	ne, e
-	teqne	r3, #0xff000000		@ or INF or NAN
-	RETc(eq)			@ we are done already.
-
-	@ value was denormalized.  We can normalize it now.
-	do_push	{r4, r5, lr}
-	mov	r4, #0x380		@ setup corresponding exponent
-	and	r5, xh, #0x80000000	@ move sign bit in r5
-	bic	xh, xh, #0x80000000
-	b	LSYM(Lad_l)
-
-	FUNC_END aeabi_f2d
-	FUNC_END extendsfdf2
-
-ARM_FUNC_START floatundidf
-ARM_FUNC_ALIAS aeabi_ul2d floatundidf
-
-	orrs	r2, r0, r1
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-	do_it	eq, t
-	mvfeqd	f0, #0.0
-#else
-	do_it	eq
-#endif
-	RETc(eq)
-
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-	@ For hard FPA code we want to return via the tail below so that
-	@ we can return the result in f0 as well as in r0/r1 for backwards
-	@ compatibility.
-	adr	ip, LSYM(f0_ret)
-	@ Push pc as well so that RETLDM works correctly.
-	do_push	{r4, r5, ip, lr, pc}
-#else
-	do_push	{r4, r5, lr}
-#endif
-
-	mov	r5, #0
-	b	2f
-
-ARM_FUNC_START floatdidf
-ARM_FUNC_ALIAS aeabi_l2d floatdidf
-
-	orrs	r2, r0, r1
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-	do_it	eq, t
-	mvfeqd	f0, #0.0
-#else
-	do_it	eq
-#endif
-	RETc(eq)
-
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-	@ For hard FPA code we want to return via the tail below so that
-	@ we can return the result in f0 as well as in r0/r1 for backwards
-	@ compatibility.
-	adr	ip, LSYM(f0_ret)
-	@ Push pc as well so that RETLDM works correctly.
-	do_push	{r4, r5, ip, lr, pc}
-#else
-	do_push	{r4, r5, lr}
-#endif
-
-	ands	r5, ah, #0x80000000	@ sign bit in r5
-	bpl	2f
-#if defined(__thumb2__)
-	negs	al, al
-	sbc	ah, ah, ah, lsl #1
-#else
-	rsbs	al, al, #0
-	rsc	ah, ah, #0
-#endif
-2:
-	mov	r4, #0x400		@ initial exponent
-	add	r4, r4, #(52-1 - 1)
-
-	@ FPA little-endian: must swap the word order.
-	.ifnc	xh, ah
-	mov	ip, al
-	mov	xh, ah
-	mov	xl, ip
-	.endif
-
-	movs	ip, xh, lsr #22
-	beq	LSYM(Lad_p)
-
-	@ The value is too big.  Scale it down a bit...
-	mov	r2, #3
-	movs	ip, ip, lsr #3
-	do_it	ne
-	addne	r2, r2, #3
-	movs	ip, ip, lsr #3
-	do_it	ne
-	addne	r2, r2, #3
-	add	r2, r2, ip, lsr #3
-
-	rsb	r3, r2, #32
-	shift1	lsl, ip, xl, r3
-	shift1	lsr, xl, xl, r2
-	shiftop orr xl xl xh lsl r3 lr
-	shift1	lsr, xh, xh, r2
-	add	r4, r4, r2
-	b	LSYM(Lad_p)
-
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-
-	@ Legacy code expects the result to be returned in f0.  Copy it
-	@ there as well.
-LSYM(f0_ret):
-	do_push	{r0, r1}
-	ldfd	f0, [sp], #8
-	RETLDM
-
-#endif
-
-	FUNC_END floatdidf
-	FUNC_END aeabi_l2d
-	FUNC_END floatundidf
-	FUNC_END aeabi_ul2d
-
-#endif /* L_addsubdf3 */
-
-#ifdef L_arm_muldivdf3
-
-ARM_FUNC_START muldf3
-ARM_FUNC_ALIAS aeabi_dmul muldf3
-	do_push	{r4, r5, r6, lr}
-
-	@ Mask out exponents, trap any zero/denormal/INF/NAN.
-	mov	ip, #0xff
-	orr	ip, ip, #0x700
-	ands	r4, ip, xh, lsr #20
-	do_it	ne, tte
-	COND(and,s,ne)	r5, ip, yh, lsr #20
-	teqne	r4, ip
-	teqne	r5, ip
-	bleq	LSYM(Lml_s)
-
-	@ Add exponents together
-	add	r4, r4, r5
-
-	@ Determine final sign.
-	eor	r6, xh, yh
-
-	@ Convert mantissa to unsigned integer.
-	@ If power of two, branch to a separate path.
-	bic	xh, xh, ip, lsl #21
-	bic	yh, yh, ip, lsl #21
-	orrs	r5, xl, xh, lsl #12
-	do_it	ne
-	COND(orr,s,ne)	r5, yl, yh, lsl #12
-	orr	xh, xh, #0x00100000
-	orr	yh, yh, #0x00100000
-	beq	LSYM(Lml_1)
-
-#if __ARM_ARCH__ < 4
-
-	@ Put sign bit in r6, which will be restored in yl later.
-	and   r6, r6, #0x80000000
-
-	@ Well, no way to make it shorter without the umull instruction.
-	stmfd	sp!, {r6, r7, r8, r9, sl, fp}
-	mov	r7, xl, lsr #16
-	mov	r8, yl, lsr #16
-	mov	r9, xh, lsr #16
-	mov	sl, yh, lsr #16
-	bic	xl, xl, r7, lsl #16
-	bic	yl, yl, r8, lsl #16
-	bic	xh, xh, r9, lsl #16
-	bic	yh, yh, sl, lsl #16
-	mul	ip, xl, yl
-	mul	fp, xl, r8
-	mov	lr, #0
-	adds	ip, ip, fp, lsl #16
-	adc	lr, lr, fp, lsr #16
-	mul	fp, r7, yl
-	adds	ip, ip, fp, lsl #16
-	adc	lr, lr, fp, lsr #16
-	mul	fp, xl, sl
-	mov	r5, #0
-	adds	lr, lr, fp, lsl #16
-	adc	r5, r5, fp, lsr #16
-	mul	fp, r7, yh
-	adds	lr, lr, fp, lsl #16
-	adc	r5, r5, fp, lsr #16
-	mul	fp, xh, r8
-	adds	lr, lr, fp, lsl #16
-	adc	r5, r5, fp, lsr #16
-	mul	fp, r9, yl
-	adds	lr, lr, fp, lsl #16
-	adc	r5, r5, fp, lsr #16
-	mul	fp, xh, sl
-	mul	r6, r9, sl
-	adds	r5, r5, fp, lsl #16
-	adc	r6, r6, fp, lsr #16
-	mul	fp, r9, yh
-	adds	r5, r5, fp, lsl #16
-	adc	r6, r6, fp, lsr #16
-	mul	fp, xl, yh
-	adds	lr, lr, fp
-	mul	fp, r7, sl
-	adcs	r5, r5, fp
-	mul	fp, xh, yl
-	adc	r6, r6, #0
-	adds	lr, lr, fp
-	mul	fp, r9, r8
-	adcs	r5, r5, fp
-	mul	fp, r7, r8
-	adc	r6, r6, #0
-	adds	lr, lr, fp
-	mul	fp, xh, yh
-	adcs	r5, r5, fp
-	adc	r6, r6, #0
-	ldmfd	sp!, {yl, r7, r8, r9, sl, fp}
-
-#else
-
-	@ Here is the actual multiplication.
-	umull	ip, lr, xl, yl
-	mov	r5, #0
-	umlal	lr, r5, xh, yl
-	and	yl, r6, #0x80000000
-	umlal	lr, r5, xl, yh
-	mov	r6, #0
-	umlal	r5, r6, xh, yh
-
-#endif
-
-	@ The LSBs in ip are only significant for the final rounding.
-	@ Fold them into lr.
-	teq	ip, #0
-	do_it	ne
-	orrne	lr, lr, #1
-
-	@ Adjust result upon the MSB position.
-	sub	r4, r4, #0xff
-	cmp	r6, #(1 << (20-11))
-	sbc	r4, r4, #0x300
-	bcs	1f
-	movs	lr, lr, lsl #1
-	adcs	r5, r5, r5
-	adc	r6, r6, r6
-1:
-	@ Shift to final position, add sign to result.
-	orr	xh, yl, r6, lsl #11
-	orr	xh, xh, r5, lsr #21
-	mov	xl, r5, lsl #11
-	orr	xl, xl, lr, lsr #21
-	mov	lr, lr, lsl #11
-
-	@ Check exponent range for under/overflow.
-	subs	ip, r4, #(254 - 1)
-	do_it	hi
-	cmphi	ip, #0x700
-	bhi	LSYM(Lml_u)
-
-	@ Round the result, merge final exponent.
-	cmp	lr, #0x80000000
-	do_it	eq
-	COND(mov,s,eq)	lr, xl, lsr #1
-	adcs	xl, xl, #0
-	adc	xh, xh, r4, lsl #20
-	RETLDM	"r4, r5, r6"
-
-	@ Multiplication by 0x1p*: let''s shortcut a lot of code.
-LSYM(Lml_1):
-	and	r6, r6, #0x80000000
-	orr	xh, r6, xh
-	orr	xl, xl, yl
-	eor	xh, xh, yh
-	subs	r4, r4, ip, lsr #1
-	do_it	gt, tt
-	COND(rsb,s,gt)	r5, r4, ip
-	orrgt	xh, xh, r4, lsl #20
-	RETLDM	"r4, r5, r6" gt
-
-	@ Under/overflow: fix things up for the code below.
-	orr	xh, xh, #0x00100000
-	mov	lr, #0
-	subs	r4, r4, #1
-
-LSYM(Lml_u):
-	@ Overflow?
-	bgt	LSYM(Lml_o)
-
-	@ Check if denormalized result is possible, otherwise return signed 0.
-	cmn	r4, #(53 + 1)
-	do_it	le, tt
-	movle	xl, #0
-	bicle	xh, xh, #0x7fffffff
-	RETLDM	"r4, r5, r6" le
-
-	@ Find out proper shift value.
-	rsb	r4, r4, #0
-	subs	r4, r4, #32
-	bge	2f
-	adds	r4, r4, #12
-	bgt	1f
-
-	@ shift result right of 1 to 20 bits, preserve sign bit, round, etc.
-	add	r4, r4, #20
-	rsb	r5, r4, #32
-	shift1	lsl, r3, xl, r5
-	shift1	lsr, xl, xl, r4
-	shiftop orr xl xl xh lsl r5 r2
-	and	r2, xh, #0x80000000
-	bic	xh, xh, #0x80000000
-	adds	xl, xl, r3, lsr #31
-	shiftop adc xh r2 xh lsr r4 r6
-	orrs	lr, lr, r3, lsl #1
-	do_it	eq
-	biceq	xl, xl, r3, lsr #31
-	RETLDM	"r4, r5, r6"
-
-	@ shift result right of 21 to 31 bits, or left 11 to 1 bits after
-	@ a register switch from xh to xl. Then round.
-1:	rsb	r4, r4, #12
-	rsb	r5, r4, #32
-	shift1	lsl, r3, xl, r4
-	shift1	lsr, xl, xl, r5
-	shiftop orr xl xl xh lsl r4 r2
-	bic	xh, xh, #0x7fffffff
-	adds	xl, xl, r3, lsr #31
-	adc	xh, xh, #0
-	orrs	lr, lr, r3, lsl #1
-	do_it	eq
-	biceq	xl, xl, r3, lsr #31
-	RETLDM	"r4, r5, r6"
-
-	@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
-	@ from xh to xl.  Leftover bits are in r3-r6-lr for rounding.
-2:	rsb	r5, r4, #32
-	shiftop orr lr lr xl lsl r5 r2
-	shift1	lsr, r3, xl, r4
-	shiftop orr r3 r3 xh lsl r5 r2
-	shift1	lsr, xl, xh, r4
-	bic	xh, xh, #0x7fffffff
-	shiftop bic xl xl xh lsr r4 r2
-	add	xl, xl, r3, lsr #31
-	orrs	lr, lr, r3, lsl #1
-	do_it	eq
-	biceq	xl, xl, r3, lsr #31
-	RETLDM	"r4, r5, r6"
-
-	@ One or both arguments are denormalized.
-	@ Scale them leftwards and preserve sign bit.
-LSYM(Lml_d):
-	teq	r4, #0
-	bne	2f
-	and	r6, xh, #0x80000000
-1:	movs	xl, xl, lsl #1
-	adc	xh, xh, xh
-	tst	xh, #0x00100000
-	do_it	eq
-	subeq	r4, r4, #1
-	beq	1b
-	orr	xh, xh, r6
-	teq	r5, #0
-	do_it	ne
-	RETc(ne)
-2:	and	r6, yh, #0x80000000
-3:	movs	yl, yl, lsl #1
-	adc	yh, yh, yh
-	tst	yh, #0x00100000
-	do_it	eq
-	subeq	r5, r5, #1
-	beq	3b
-	orr	yh, yh, r6
-	RET
-
-LSYM(Lml_s):
-	@ Isolate the INF and NAN cases away
-	teq	r4, ip
-	and	r5, ip, yh, lsr #20
-	do_it	ne
-	teqne	r5, ip
-	beq	1f
-
-	@ Here, one or more arguments are either denormalized or zero.
-	orrs	r6, xl, xh, lsl #1
-	do_it	ne
-	COND(orr,s,ne)	r6, yl, yh, lsl #1
-	bne	LSYM(Lml_d)
-
-	@ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
-	eor	xh, xh, yh
-	and	xh, xh, #0x80000000
-	mov	xl, #0
-	RETLDM	"r4, r5, r6"
-
-1:	@ One or both args are INF or NAN.
-	orrs	r6, xl, xh, lsl #1
-	do_it	eq, te
-	moveq	xl, yl
-	moveq	xh, yh
-	COND(orr,s,ne)	r6, yl, yh, lsl #1
-	beq	LSYM(Lml_n)		@ 0 * INF or INF * 0 -> NAN
-	teq	r4, ip
-	bne	1f
-	orrs	r6, xl, xh, lsl #12
-	bne	LSYM(Lml_n)		@ NAN * <anything> -> NAN
-1:	teq	r5, ip
-	bne	LSYM(Lml_i)
-	orrs	r6, yl, yh, lsl #12
-	do_it	ne, t
-	movne	xl, yl
-	movne	xh, yh
-	bne	LSYM(Lml_n)		@ <anything> * NAN -> NAN
-
-	@ Result is INF, but we need to determine its sign.
-LSYM(Lml_i):
-	eor	xh, xh, yh
-
-	@ Overflow: return INF (sign already in xh).
-LSYM(Lml_o):
-	and	xh, xh, #0x80000000
-	orr	xh, xh, #0x7f000000
-	orr	xh, xh, #0x00f00000
-	mov	xl, #0
-	RETLDM	"r4, r5, r6"
-
-	@ Return a quiet NAN.
-LSYM(Lml_n):
-	orr	xh, xh, #0x7f000000
-	orr	xh, xh, #0x00f80000
-	RETLDM	"r4, r5, r6"
-
-	FUNC_END aeabi_dmul
-	FUNC_END muldf3
-
-ARM_FUNC_START divdf3
-ARM_FUNC_ALIAS aeabi_ddiv divdf3
-	
-	do_push	{r4, r5, r6, lr}
-
-	@ Mask out exponents, trap any zero/denormal/INF/NAN.
-	mov	ip, #0xff
-	orr	ip, ip, #0x700
-	ands	r4, ip, xh, lsr #20
-	do_it	ne, tte
-	COND(and,s,ne)	r5, ip, yh, lsr #20
-	teqne	r4, ip
-	teqne	r5, ip
-	bleq	LSYM(Ldv_s)
-
-	@ Substract divisor exponent from dividend''s.
-	sub	r4, r4, r5
-
-	@ Preserve final sign into lr.
-	eor	lr, xh, yh
-
-	@ Convert mantissa to unsigned integer.
-	@ Dividend -> r5-r6, divisor -> yh-yl.
-	orrs	r5, yl, yh, lsl #12
-	mov	xh, xh, lsl #12
-	beq	LSYM(Ldv_1)
-	mov	yh, yh, lsl #12
-	mov	r5, #0x10000000
-	orr	yh, r5, yh, lsr #4
-	orr	yh, yh, yl, lsr #24
-	mov	yl, yl, lsl #8
-	orr	r5, r5, xh, lsr #4
-	orr	r5, r5, xl, lsr #24
-	mov	r6, xl, lsl #8
-
-	@ Initialize xh with final sign bit.
-	and	xh, lr, #0x80000000
-
-	@ Ensure result will land to known bit position.
-	@ Apply exponent bias accordingly.
-	cmp	r5, yh
-	do_it	eq
-	cmpeq	r6, yl
-	adc	r4, r4, #(255 - 2)
-	add	r4, r4, #0x300
-	bcs	1f
-	movs	yh, yh, lsr #1
-	mov	yl, yl, rrx
-1:
-	@ Perform first substraction to align result to a nibble.
-	subs	r6, r6, yl
-	sbc	r5, r5, yh
-	movs	yh, yh, lsr #1
-	mov	yl, yl, rrx
-	mov	xl, #0x00100000
-	mov	ip, #0x00080000
-
-	@ The actual division loop.
-1:	subs	lr, r6, yl
-	sbcs	lr, r5, yh
-	do_it	cs, tt
-	subcs	r6, r6, yl
-	movcs	r5, lr
-	orrcs	xl, xl, ip
-	movs	yh, yh, lsr #1
-	mov	yl, yl, rrx
-	subs	lr, r6, yl
-	sbcs	lr, r5, yh
-	do_it	cs, tt
-	subcs	r6, r6, yl
-	movcs	r5, lr
-	orrcs	xl, xl, ip, lsr #1
-	movs	yh, yh, lsr #1
-	mov	yl, yl, rrx
-	subs	lr, r6, yl
-	sbcs	lr, r5, yh
-	do_it	cs, tt
-	subcs	r6, r6, yl
-	movcs	r5, lr
-	orrcs	xl, xl, ip, lsr #2
-	movs	yh, yh, lsr #1
-	mov	yl, yl, rrx
-	subs	lr, r6, yl
-	sbcs	lr, r5, yh
-	do_it	cs, tt
-	subcs	r6, r6, yl
-	movcs	r5, lr
-	orrcs	xl, xl, ip, lsr #3
-
-	orrs	lr, r5, r6
-	beq	2f
-	mov	r5, r5, lsl #4
-	orr	r5, r5, r6, lsr #28
-	mov	r6, r6, lsl #4
-	mov	yh, yh, lsl #3
-	orr	yh, yh, yl, lsr #29
-	mov	yl, yl, lsl #3
-	movs	ip, ip, lsr #4
-	bne	1b
-
-	@ We are done with a word of the result.
-	@ Loop again for the low word if this pass was for the high word.
-	tst	xh, #0x00100000
-	bne	3f
-	orr	xh, xh, xl
-	mov	xl, #0
-	mov	ip, #0x80000000
-	b	1b
-2:
-	@ Be sure result starts in the high word.
-	tst	xh, #0x00100000
-	do_it	eq, t
-	orreq	xh, xh, xl
-	moveq	xl, #0
-3:
-	@ Check exponent range for under/overflow.
-	subs	ip, r4, #(254 - 1)
-	do_it	hi
-	cmphi	ip, #0x700
-	bhi	LSYM(Lml_u)
-
-	@ Round the result, merge final exponent.
-	subs	ip, r5, yh
-	do_it	eq, t
-	COND(sub,s,eq)	ip, r6, yl
-	COND(mov,s,eq)	ip, xl, lsr #1
-	adcs	xl, xl, #0
-	adc	xh, xh, r4, lsl #20
-	RETLDM	"r4, r5, r6"
-
-	@ Division by 0x1p*: shortcut a lot of code.
-LSYM(Ldv_1):
-	and	lr, lr, #0x80000000
-	orr	xh, lr, xh, lsr #12
-	adds	r4, r4, ip, lsr #1
-	do_it	gt, tt
-	COND(rsb,s,gt)	r5, r4, ip
-	orrgt	xh, xh, r4, lsl #20
-	RETLDM	"r4, r5, r6" gt
-
-	orr	xh, xh, #0x00100000
-	mov	lr, #0
-	subs	r4, r4, #1
-	b	LSYM(Lml_u)
-
-	@ Result mightt need to be denormalized: put remainder bits
-	@ in lr for rounding considerations.
-LSYM(Ldv_u):
-	orr	lr, r5, r6
-	b	LSYM(Lml_u)
-
-	@ One or both arguments is either INF, NAN or zero.
-LSYM(Ldv_s):
-	and	r5, ip, yh, lsr #20
-	teq	r4, ip
-	do_it	eq
-	teqeq	r5, ip
-	beq	LSYM(Lml_n)		@ INF/NAN / INF/NAN -> NAN
-	teq	r4, ip
-	bne	1f
-	orrs	r4, xl, xh, lsl #12
-	bne	LSYM(Lml_n)		@ NAN / <anything> -> NAN
-	teq	r5, ip
-	bne	LSYM(Lml_i)		@ INF / <anything> -> INF
-	mov	xl, yl
-	mov	xh, yh
-	b	LSYM(Lml_n)		@ INF / (INF or NAN) -> NAN
-1:	teq	r5, ip
-	bne	2f
-	orrs	r5, yl, yh, lsl #12
-	beq	LSYM(Lml_z)		@ <anything> / INF -> 0
-	mov	xl, yl
-	mov	xh, yh
-	b	LSYM(Lml_n)		@ <anything> / NAN -> NAN
-2:	@ If both are nonzero, we need to normalize and resume above.
-	orrs	r6, xl, xh, lsl #1
-	do_it	ne
-	COND(orr,s,ne)	r6, yl, yh, lsl #1
-	bne	LSYM(Lml_d)
-	@ One or both arguments are 0.
-	orrs	r4, xl, xh, lsl #1
-	bne	LSYM(Lml_i)		@ <non_zero> / 0 -> INF
-	orrs	r5, yl, yh, lsl #1
-	bne	LSYM(Lml_z)		@ 0 / <non_zero> -> 0
-	b	LSYM(Lml_n)		@ 0 / 0 -> NAN
-
-	FUNC_END aeabi_ddiv
-	FUNC_END divdf3
-
-#endif /* L_muldivdf3 */
-
-#ifdef L_arm_cmpdf2
-
-@ Note: only r0 (return value) and ip are clobbered here.
-
-ARM_FUNC_START gtdf2
-ARM_FUNC_ALIAS gedf2 gtdf2
-	mov	ip, #-1
-	b	1f
-
-ARM_FUNC_START ltdf2
-ARM_FUNC_ALIAS ledf2 ltdf2
-	mov	ip, #1
-	b	1f
-
-ARM_FUNC_START cmpdf2
-ARM_FUNC_ALIAS nedf2 cmpdf2
-ARM_FUNC_ALIAS eqdf2 cmpdf2
-	mov	ip, #1			@ how should we specify unordered here?
-
-1:	str	ip, [sp, #-4]!
-
-	@ Trap any INF/NAN first.
-	mov	ip, xh, lsl #1
-	mvns	ip, ip, asr #21
-	mov	ip, yh, lsl #1
-	do_it	ne
-	COND(mvn,s,ne)	ip, ip, asr #21
-	beq	3f
-
-	@ Test for equality.
-	@ Note that 0.0 is equal to -0.0.
-2:	add	sp, sp, #4
-	orrs	ip, xl, xh, lsl #1	@ if x == 0.0 or -0.0
-	do_it	eq, e
-	COND(orr,s,eq)	ip, yl, yh, lsl #1	@ and y == 0.0 or -0.0
-	teqne	xh, yh			@ or xh == yh
-	do_it	eq, tt
-	teqeq	xl, yl			@ and xl == yl
-	moveq	r0, #0			@ then equal.
-	RETc(eq)
-
-	@ Clear C flag
-	cmn	r0, #0
-
-	@ Compare sign, 
-	teq	xh, yh
-
-	@ Compare values if same sign
-	do_it	pl
-	cmppl	xh, yh
-	do_it	eq
-	cmpeq	xl, yl
-
-	@ Result:
-	do_it	cs, e
-	movcs	r0, yh, asr #31
-	mvncc	r0, yh, asr #31
-	orr	r0, r0, #1
-	RET
-
-	@ Look for a NAN.
-3:	mov	ip, xh, lsl #1
-	mvns	ip, ip, asr #21
-	bne	4f
-	orrs	ip, xl, xh, lsl #12
-	bne	5f			@ x is NAN
-4:	mov	ip, yh, lsl #1
-	mvns	ip, ip, asr #21
-	bne	2b
-	orrs	ip, yl, yh, lsl #12
-	beq	2b			@ y is not NAN
-5:	ldr	r0, [sp], #4		@ unordered return code
-	RET
-
-	FUNC_END gedf2
-	FUNC_END gtdf2
-	FUNC_END ledf2
-	FUNC_END ltdf2
-	FUNC_END nedf2
-	FUNC_END eqdf2
-	FUNC_END cmpdf2
-
-ARM_FUNC_START aeabi_cdrcmple
-
-	mov	ip, r0
-	mov	r0, r2
-	mov	r2, ip
-	mov	ip, r1
-	mov	r1, r3
-	mov	r3, ip
-	b	6f
-	
-ARM_FUNC_START aeabi_cdcmpeq
-ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
-
-	@ The status-returning routines are required to preserve all
-	@ registers except ip, lr, and cpsr.
-6:	do_push	{r0, lr}
-	ARM_CALL cmpdf2
-	@ Set the Z flag correctly, and the C flag unconditionally.
-	cmp	r0, #0
-	@ Clear the C flag if the return value was -1, indicating
-	@ that the first operand was smaller than the second.
-	do_it	mi
-	cmnmi	r0, #0
-	RETLDM	"r0"
-
-	FUNC_END aeabi_cdcmple
-	FUNC_END aeabi_cdcmpeq
-	FUNC_END aeabi_cdrcmple
-	
-ARM_FUNC_START	aeabi_dcmpeq
-
-	str	lr, [sp, #-8]!
-	ARM_CALL aeabi_cdcmple
-	do_it	eq, e
-	moveq	r0, #1	@ Equal to.
-	movne	r0, #0	@ Less than, greater than, or unordered.
-	RETLDM
-
-	FUNC_END aeabi_dcmpeq
-
-ARM_FUNC_START	aeabi_dcmplt
-
-	str	lr, [sp, #-8]!
-	ARM_CALL aeabi_cdcmple
-	do_it	cc, e
-	movcc	r0, #1	@ Less than.
-	movcs	r0, #0	@ Equal to, greater than, or unordered.
-	RETLDM
-
-	FUNC_END aeabi_dcmplt
-
-ARM_FUNC_START	aeabi_dcmple
-
-	str	lr, [sp, #-8]!
-	ARM_CALL aeabi_cdcmple
-	do_it	ls, e
-	movls	r0, #1  @ Less than or equal to.
-	movhi	r0, #0	@ Greater than or unordered.
-	RETLDM
-
-	FUNC_END aeabi_dcmple
-
-ARM_FUNC_START	aeabi_dcmpge
-
-	str	lr, [sp, #-8]!
-	ARM_CALL aeabi_cdrcmple
-	do_it	ls, e
-	movls	r0, #1	@ Operand 2 is less than or equal to operand 1.
-	movhi	r0, #0	@ Operand 2 greater than operand 1, or unordered.
-	RETLDM
-
-	FUNC_END aeabi_dcmpge
-
-ARM_FUNC_START	aeabi_dcmpgt
-
-	str	lr, [sp, #-8]!
-	ARM_CALL aeabi_cdrcmple
-	do_it	cc, e
-	movcc	r0, #1	@ Operand 2 is less than operand 1.
-	movcs	r0, #0  @ Operand 2 is greater than or equal to operand 1,
-			@ or they are unordered.
-	RETLDM
-
-	FUNC_END aeabi_dcmpgt
-
-#endif /* L_cmpdf2 */
-
-#ifdef L_arm_unorddf2
-
-ARM_FUNC_START unorddf2
-ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
-
-	mov	ip, xh, lsl #1
-	mvns	ip, ip, asr #21
-	bne	1f
-	orrs	ip, xl, xh, lsl #12
-	bne	3f			@ x is NAN
-1:	mov	ip, yh, lsl #1
-	mvns	ip, ip, asr #21
-	bne	2f
-	orrs	ip, yl, yh, lsl #12
-	bne	3f			@ y is NAN
-2:	mov	r0, #0			@ arguments are ordered.
-	RET
-
-3:	mov	r0, #1			@ arguments are unordered.
-	RET
-
-	FUNC_END aeabi_dcmpun
-	FUNC_END unorddf2
-
-#endif /* L_unorddf2 */
-
-#ifdef L_arm_fixdfsi
-
-ARM_FUNC_START fixdfsi
-ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
-
-	@ check exponent range.
-	mov	r2, xh, lsl #1
-	adds	r2, r2, #(1 << 21)
-	bcs	2f			@ value is INF or NAN
-	bpl	1f			@ value is too small
-	mov	r3, #(0xfffffc00 + 31)
-	subs	r2, r3, r2, asr #21
-	bls	3f			@ value is too large
-
-	@ scale value
-	mov	r3, xh, lsl #11
-	orr	r3, r3, #0x80000000
-	orr	r3, r3, xl, lsr #21
-	tst	xh, #0x80000000		@ the sign bit
-	shift1	lsr, r0, r3, r2
-	do_it	ne
-	rsbne	r0, r0, #0
-	RET
-
-1:	mov	r0, #0
-	RET
-
-2:	orrs	xl, xl, xh, lsl #12
-	bne	4f			@ x is NAN.
-3:	ands	r0, xh, #0x80000000	@ the sign bit
-	do_it	eq
-	moveq	r0, #0x7fffffff		@ maximum signed positive si
-	RET
-
-4:	mov	r0, #0			@ How should we convert NAN?
-	RET
-
-	FUNC_END aeabi_d2iz
-	FUNC_END fixdfsi
-
-#endif /* L_fixdfsi */
-
-#ifdef L_arm_fixunsdfsi
-
-ARM_FUNC_START fixunsdfsi
-ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
-
-	@ check exponent range.
-	movs	r2, xh, lsl #1
-	bcs	1f			@ value is negative
-	adds	r2, r2, #(1 << 21)
-	bcs	2f			@ value is INF or NAN
-	bpl	1f			@ value is too small
-	mov	r3, #(0xfffffc00 + 31)
-	subs	r2, r3, r2, asr #21
-	bmi	3f			@ value is too large
-
-	@ scale value
-	mov	r3, xh, lsl #11
-	orr	r3, r3, #0x80000000
-	orr	r3, r3, xl, lsr #21
-	shift1	lsr, r0, r3, r2
-	RET
-
-1:	mov	r0, #0
-	RET
-
-2:	orrs	xl, xl, xh, lsl #12
-	bne	4f			@ value is NAN.
-3:	mov	r0, #0xffffffff		@ maximum unsigned si
-	RET
-
-4:	mov	r0, #0			@ How should we convert NAN?
-	RET
-
-	FUNC_END aeabi_d2uiz
-	FUNC_END fixunsdfsi
-
-#endif /* L_fixunsdfsi */
-
-#ifdef L_arm_truncdfsf2
-
-ARM_FUNC_START truncdfsf2
-ARM_FUNC_ALIAS aeabi_d2f truncdfsf2
-
-	@ check exponent range.
-	mov	r2, xh, lsl #1
-	subs	r3, r2, #((1023 - 127) << 21)
-	do_it	cs, t
-	COND(sub,s,cs)	ip, r3, #(1 << 21)
-	COND(rsb,s,cs)	ip, ip, #(254 << 21)
-	bls	2f			@ value is out of range
-
-1:	@ shift and round mantissa
-	and	ip, xh, #0x80000000
-	mov	r2, xl, lsl #3
-	orr	xl, ip, xl, lsr #29
-	cmp	r2, #0x80000000
-	adc	r0, xl, r3, lsl #2
-	do_it	eq
-	biceq	r0, r0, #1
-	RET
-
-2:	@ either overflow or underflow
-	tst	xh, #0x40000000
-	bne	3f			@ overflow
-
-	@ check if denormalized value is possible
-	adds	r2, r3, #(23 << 21)
-	do_it	lt, t
-	andlt	r0, xh, #0x80000000	@ too small, return signed 0.
-	RETc(lt)
-
-	@ denormalize value so we can resume with the code above afterwards.
-	orr	xh, xh, #0x00100000
-	mov	r2, r2, lsr #21
-	rsb	r2, r2, #24
-	rsb	ip, r2, #32
-#if defined(__thumb2__)
-	lsls	r3, xl, ip
-#else
-	movs	r3, xl, lsl ip
-#endif
-	shift1	lsr, xl, xl, r2
-	do_it	ne
-	orrne	xl, xl, #1		@ fold r3 for rounding considerations. 
-	mov	r3, xh, lsl #11
-	mov	r3, r3, lsr #11
-	shiftop orr xl xl r3 lsl ip ip
-	shift1	lsr, r3, r3, r2
-	mov	r3, r3, lsl #1
-	b	1b
-
-3:	@ chech for NAN
-	mvns	r3, r2, asr #21
-	bne	5f			@ simple overflow
-	orrs	r3, xl, xh, lsl #12
-	do_it	ne, tt
-	movne	r0, #0x7f000000
-	orrne	r0, r0, #0x00c00000
-	RETc(ne)			@ return NAN
-
-5:	@ return INF with sign
-	and	r0, xh, #0x80000000
-	orr	r0, r0, #0x7f000000
-	orr	r0, r0, #0x00800000
-	RET
-
-	FUNC_END aeabi_d2f
-	FUNC_END truncdfsf2
-
-#endif /* L_truncdfsf2 */
diff --git a/gcc/config/arm/ieee754-sf.S b/gcc/config/arm/ieee754-sf.S
deleted file mode 100644
index c93f66d8ff8..00000000000
--- a/gcc/config/arm/ieee754-sf.S
+++ /dev/null
@@ -1,1060 +0,0 @@
-/* ieee754-sf.S single-precision floating point support for ARM
-
-   Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009  Free Software Foundation, Inc.
-   Contributed by Nicolas Pitre (nico@cam.org)
-
-   This file is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by the
-   Free Software Foundation; either version 3, or (at your option) any
-   later version.
-
-   This file is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/*
- * Notes:
- *
- * The goal of this code is to be as fast as possible.  This is
- * not meant to be easy to understand for the casual reader.
- *
- * Only the default rounding mode is intended for best performances.
- * Exceptions aren't supported yet, but that can be added quite easily
- * if necessary without impacting performances.
- */
-
-#ifdef L_arm_negsf2
-	
-ARM_FUNC_START negsf2
-ARM_FUNC_ALIAS aeabi_fneg negsf2
-
-	eor	r0, r0, #0x80000000	@ flip sign bit
-	RET
-
-	FUNC_END aeabi_fneg
-	FUNC_END negsf2
-
-#endif
-
-#ifdef L_arm_addsubsf3
-
-ARM_FUNC_START aeabi_frsub
-
-	eor	r0, r0, #0x80000000	@ flip sign bit of first arg
-	b	1f
-
-ARM_FUNC_START subsf3
-ARM_FUNC_ALIAS aeabi_fsub subsf3
-
-	eor	r1, r1, #0x80000000	@ flip sign bit of second arg
-#if defined(__INTERWORKING_STUBS__)
-	b	1f			@ Skip Thumb-code prologue
-#endif
-
-ARM_FUNC_START addsf3
-ARM_FUNC_ALIAS aeabi_fadd addsf3
-
-1:	@ Look for zeroes, equal values, INF, or NAN.
-	movs	r2, r0, lsl #1
-	do_it	ne, ttt
-	COND(mov,s,ne)	r3, r1, lsl #1
-	teqne	r2, r3
-	COND(mvn,s,ne)	ip, r2, asr #24
-	COND(mvn,s,ne)	ip, r3, asr #24
-	beq	LSYM(Lad_s)
-
-	@ Compute exponent difference.  Make largest exponent in r2,
-	@ corresponding arg in r0, and positive exponent difference in r3.
-	mov	r2, r2, lsr #24
-	rsbs	r3, r2, r3, lsr #24
-	do_it	gt, ttt
-	addgt	r2, r2, r3
-	eorgt	r1, r0, r1
-	eorgt	r0, r1, r0
-	eorgt	r1, r0, r1
-	do_it	lt
-	rsblt	r3, r3, #0
-
-	@ If exponent difference is too large, return largest argument
-	@ already in r0.  We need up to 25 bit to handle proper rounding
-	@ of 0x1p25 - 1.1.
-	cmp	r3, #25
-	do_it	hi
-	RETc(hi)
-
-	@ Convert mantissa to signed integer.
-	tst	r0, #0x80000000
-	orr	r0, r0, #0x00800000
-	bic	r0, r0, #0xff000000
-	do_it	ne
-	rsbne	r0, r0, #0
-	tst	r1, #0x80000000
-	orr	r1, r1, #0x00800000
-	bic	r1, r1, #0xff000000
-	do_it	ne
-	rsbne	r1, r1, #0
-
-	@ If exponent == difference, one or both args were denormalized.
-	@ Since this is not common case, rescale them off line.
-	teq	r2, r3
-	beq	LSYM(Lad_d)
-LSYM(Lad_x):
-
-	@ Compensate for the exponent overlapping the mantissa MSB added later
-	sub	r2, r2, #1
-
-	@ Shift and add second arg to first arg in r0.
-	@ Keep leftover bits into r1.
-	shiftop adds r0 r0 r1 asr r3 ip
-	rsb	r3, r3, #32
-	shift1	lsl, r1, r1, r3
-
-	@ Keep absolute value in r0-r1, sign in r3 (the n bit was set above)
-	and	r3, r0, #0x80000000
-	bpl	LSYM(Lad_p)
-#if defined(__thumb2__)
-	negs	r1, r1
-	sbc	r0, r0, r0, lsl #1
-#else
-	rsbs	r1, r1, #0
-	rsc	r0, r0, #0
-#endif
-
-	@ Determine how to normalize the result.
-LSYM(Lad_p):
-	cmp	r0, #0x00800000
-	bcc	LSYM(Lad_a)
-	cmp	r0, #0x01000000
-	bcc	LSYM(Lad_e)
-
-	@ Result needs to be shifted right.
-	movs	r0, r0, lsr #1
-	mov	r1, r1, rrx
-	add	r2, r2, #1
-
-	@ Make sure we did not bust our exponent.
-	cmp	r2, #254
-	bhs	LSYM(Lad_o)
-
-	@ Our result is now properly aligned into r0, remaining bits in r1.
-	@ Pack final result together.
-	@ Round with MSB of r1. If halfway between two numbers, round towards
-	@ LSB of r0 = 0. 
-LSYM(Lad_e):
-	cmp	r1, #0x80000000
-	adc	r0, r0, r2, lsl #23
-	do_it	eq
-	biceq	r0, r0, #1
-	orr	r0, r0, r3
-	RET
-
-	@ Result must be shifted left and exponent adjusted.
-LSYM(Lad_a):
-	movs	r1, r1, lsl #1
-	adc	r0, r0, r0
-	tst	r0, #0x00800000
-	sub	r2, r2, #1
-	bne	LSYM(Lad_e)
-	
-	@ No rounding necessary since r1 will always be 0 at this point.
-LSYM(Lad_l):
-
-#if __ARM_ARCH__ < 5
-
-	movs	ip, r0, lsr #12
-	moveq	r0, r0, lsl #12
-	subeq	r2, r2, #12
-	tst	r0, #0x00ff0000
-	moveq	r0, r0, lsl #8
-	subeq	r2, r2, #8
-	tst	r0, #0x00f00000
-	moveq	r0, r0, lsl #4
-	subeq	r2, r2, #4
-	tst	r0, #0x00c00000
-	moveq	r0, r0, lsl #2
-	subeq	r2, r2, #2
-	cmp	r0, #0x00800000
-	movcc	r0, r0, lsl #1
-	sbcs	r2, r2, #0
-
-#else
-
-	clz	ip, r0
-	sub	ip, ip, #8
-	subs	r2, r2, ip
-	shift1	lsl, r0, r0, ip
-
-#endif
-
-	@ Final result with sign
-	@ If exponent negative, denormalize result.
-	do_it	ge, et
-	addge	r0, r0, r2, lsl #23
-	rsblt	r2, r2, #0
-	orrge	r0, r0, r3
-#if defined(__thumb2__)
-	do_it	lt, t
-	lsrlt	r0, r0, r2
-	orrlt	r0, r3, r0
-#else
-	orrlt	r0, r3, r0, lsr r2
-#endif
-	RET
-
-	@ Fixup and adjust bit position for denormalized arguments.
-	@ Note that r2 must not remain equal to 0.
-LSYM(Lad_d):
-	teq	r2, #0
-	eor	r1, r1, #0x00800000
-	do_it	eq, te
-	eoreq	r0, r0, #0x00800000
-	addeq	r2, r2, #1
-	subne	r3, r3, #1
-	b	LSYM(Lad_x)
-
-LSYM(Lad_s):
-	mov	r3, r1, lsl #1
-
-	mvns	ip, r2, asr #24
-	do_it	ne
-	COND(mvn,s,ne)	ip, r3, asr #24
-	beq	LSYM(Lad_i)
-
-	teq	r2, r3
-	beq	1f
-
-	@ Result is x + 0.0 = x or 0.0 + y = y.
-	teq	r2, #0
-	do_it	eq
-	moveq	r0, r1
-	RET
-
-1:	teq	r0, r1
-
-	@ Result is x - x = 0.
-	do_it	ne, t
-	movne	r0, #0
-	RETc(ne)
-
-	@ Result is x + x = 2x.
-	tst	r2, #0xff000000
-	bne	2f
-	movs	r0, r0, lsl #1
-	do_it	cs
-	orrcs	r0, r0, #0x80000000
-	RET
-2:	adds	r2, r2, #(2 << 24)
-	do_it	cc, t
-	addcc	r0, r0, #(1 << 23)
-	RETc(cc)
-	and	r3, r0, #0x80000000
-
-	@ Overflow: return INF.
-LSYM(Lad_o):
-	orr	r0, r3, #0x7f000000
-	orr	r0, r0, #0x00800000
-	RET
-
-	@ At least one of r0/r1 is INF/NAN.
-	@   if r0 != INF/NAN: return r1 (which is INF/NAN)
-	@   if r1 != INF/NAN: return r0 (which is INF/NAN)
-	@   if r0 or r1 is NAN: return NAN
-	@   if opposite sign: return NAN
-	@   otherwise return r0 (which is INF or -INF)
-LSYM(Lad_i):
-	mvns	r2, r2, asr #24
-	do_it	ne, et
-	movne	r0, r1
-	COND(mvn,s,eq)	r3, r3, asr #24
-	movne	r1, r0
-	movs	r2, r0, lsl #9
-	do_it	eq, te
-	COND(mov,s,eq)	r3, r1, lsl #9
-	teqeq	r0, r1
-	orrne	r0, r0, #0x00400000	@ quiet NAN
-	RET
-
-	FUNC_END aeabi_frsub
-	FUNC_END aeabi_fadd
-	FUNC_END addsf3
-	FUNC_END aeabi_fsub
-	FUNC_END subsf3
-
-ARM_FUNC_START floatunsisf
-ARM_FUNC_ALIAS aeabi_ui2f floatunsisf
-		
-	mov	r3, #0
-	b	1f
-
-ARM_FUNC_START floatsisf
-ARM_FUNC_ALIAS aeabi_i2f floatsisf
-	
-	ands	r3, r0, #0x80000000
-	do_it	mi
-	rsbmi	r0, r0, #0
-
-1:	movs	ip, r0
-	do_it	eq
-	RETc(eq)
-
-	@ Add initial exponent to sign
-	orr	r3, r3, #((127 + 23) << 23)
-
-	.ifnc	ah, r0
-	mov	ah, r0
-	.endif
-	mov	al, #0
-	b	2f
-
-	FUNC_END aeabi_i2f
-	FUNC_END floatsisf
-	FUNC_END aeabi_ui2f
-	FUNC_END floatunsisf
-
-ARM_FUNC_START floatundisf
-ARM_FUNC_ALIAS aeabi_ul2f floatundisf
-
-	orrs	r2, r0, r1
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-	do_it	eq, t
-	mvfeqs	f0, #0.0
-#else
-	do_it	eq
-#endif
-	RETc(eq)
-
-	mov	r3, #0
-	b	1f
-
-ARM_FUNC_START floatdisf
-ARM_FUNC_ALIAS aeabi_l2f floatdisf
-
-	orrs	r2, r0, r1
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-	do_it	eq, t
-	mvfeqs	f0, #0.0
-#else
-	do_it	eq
-#endif
-	RETc(eq)
-
-	ands	r3, ah, #0x80000000	@ sign bit in r3
-	bpl	1f
-#if defined(__thumb2__)
-	negs	al, al
-	sbc	ah, ah, ah, lsl #1
-#else
-	rsbs	al, al, #0
-	rsc	ah, ah, #0
-#endif
-1:
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-	@ For hard FPA code we want to return via the tail below so that
-	@ we can return the result in f0 as well as in r0 for backwards
-	@ compatibility.
-	str	lr, [sp, #-8]!
-	adr	lr, LSYM(f0_ret)
-#endif
-
-	movs	ip, ah
-	do_it	eq, tt
-	moveq	ip, al
-	moveq	ah, al
-	moveq	al, #0
-
-	@ Add initial exponent to sign
-	orr	r3, r3, #((127 + 23 + 32) << 23)
-	do_it	eq
-	subeq	r3, r3, #(32 << 23)
-2:	sub	r3, r3, #(1 << 23)
-
-#if __ARM_ARCH__ < 5
-
-	mov	r2, #23
-	cmp	ip, #(1 << 16)
-	do_it	hs, t
-	movhs	ip, ip, lsr #16
-	subhs	r2, r2, #16
-	cmp	ip, #(1 << 8)
-	do_it	hs, t
-	movhs	ip, ip, lsr #8
-	subhs	r2, r2, #8
-	cmp	ip, #(1 << 4)
-	do_it	hs, t
-	movhs	ip, ip, lsr #4
-	subhs	r2, r2, #4
-	cmp	ip, #(1 << 2)
-	do_it	hs, e
-	subhs	r2, r2, #2
-	sublo	r2, r2, ip, lsr #1
-	subs	r2, r2, ip, lsr #3
-
-#else
-
-	clz	r2, ip
-	subs	r2, r2, #8
-
-#endif
-
-	sub	r3, r3, r2, lsl #23
-	blt	3f
-
-	shiftop add r3 r3 ah lsl r2 ip
-	shift1	lsl, ip, al, r2
-	rsb	r2, r2, #32
-	cmp	ip, #0x80000000
-	shiftop adc r0 r3 al lsr r2 r2
-	do_it	eq
-	biceq	r0, r0, #1
-	RET
-
-3:	add	r2, r2, #32
-	shift1	lsl, ip, ah, r2
-	rsb	r2, r2, #32
-	orrs	al, al, ip, lsl #1
-	shiftop adc r0 r3 ah lsr r2 r2
-	do_it	eq
-	biceq	r0, r0, ip, lsr #31
-	RET
-
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-
-LSYM(f0_ret):
-	str	r0, [sp, #-4]!
-	ldfs	f0, [sp], #4
-	RETLDM
-
-#endif
-
-	FUNC_END floatdisf
-	FUNC_END aeabi_l2f
-	FUNC_END floatundisf
-	FUNC_END aeabi_ul2f
-
-#endif /* L_addsubsf3 */
-
-#ifdef L_arm_muldivsf3
-
-ARM_FUNC_START mulsf3
-ARM_FUNC_ALIAS aeabi_fmul mulsf3
-
-	@ Mask out exponents, trap any zero/denormal/INF/NAN.
-	mov	ip, #0xff
-	ands	r2, ip, r0, lsr #23
-	do_it	ne, tt
-	COND(and,s,ne)	r3, ip, r1, lsr #23
-	teqne	r2, ip
-	teqne	r3, ip
-	beq	LSYM(Lml_s)
-LSYM(Lml_x):
-
-	@ Add exponents together
-	add	r2, r2, r3
-
-	@ Determine final sign.
-	eor	ip, r0, r1
-
-	@ Convert mantissa to unsigned integer.
-	@ If power of two, branch to a separate path.
-	@ Make up for final alignment.
-	movs	r0, r0, lsl #9
-	do_it	ne
-	COND(mov,s,ne)	r1, r1, lsl #9
-	beq	LSYM(Lml_1)
-	mov	r3, #0x08000000
-	orr	r0, r3, r0, lsr #5
-	orr	r1, r3, r1, lsr #5
-
-#if __ARM_ARCH__ < 4
-
-	@ Put sign bit in r3, which will be restored into r0 later.
-	and	r3, ip, #0x80000000
-
-	@ Well, no way to make it shorter without the umull instruction.
-	do_push	{r3, r4, r5}
-	mov	r4, r0, lsr #16
-	mov	r5, r1, lsr #16
-	bic	r0, r0, r4, lsl #16
-	bic	r1, r1, r5, lsl #16
-	mul	ip, r4, r5
-	mul	r3, r0, r1
-	mul	r0, r5, r0
-	mla	r0, r4, r1, r0
-	adds	r3, r3, r0, lsl #16
-	adc	r1, ip, r0, lsr #16
-	do_pop	{r0, r4, r5}
-
-#else
-
-	@ The actual multiplication.
-	umull	r3, r1, r0, r1
-
-	@ Put final sign in r0.
-	and	r0, ip, #0x80000000
-
-#endif
-
-	@ Adjust result upon the MSB position.
-	cmp	r1, #(1 << 23)
-	do_it	cc, tt
-	movcc	r1, r1, lsl #1
-	orrcc	r1, r1, r3, lsr #31
-	movcc	r3, r3, lsl #1
-
-	@ Add sign to result.
-	orr	r0, r0, r1
-
-	@ Apply exponent bias, check for under/overflow.
-	sbc	r2, r2, #127
-	cmp	r2, #(254 - 1)
-	bhi	LSYM(Lml_u)
-
-	@ Round the result, merge final exponent.
-	cmp	r3, #0x80000000
-	adc	r0, r0, r2, lsl #23
-	do_it	eq
-	biceq	r0, r0, #1
-	RET
-
-	@ Multiplication by 0x1p*: let''s shortcut a lot of code.
-LSYM(Lml_1):
-	teq	r0, #0
-	and	ip, ip, #0x80000000
-	do_it	eq
-	moveq	r1, r1, lsl #9
-	orr	r0, ip, r0, lsr #9
-	orr	r0, r0, r1, lsr #9
-	subs	r2, r2, #127
-	do_it	gt, tt
-	COND(rsb,s,gt)	r3, r2, #255
-	orrgt	r0, r0, r2, lsl #23
-	RETc(gt)
-
-	@ Under/overflow: fix things up for the code below.
-	orr	r0, r0, #0x00800000
-	mov	r3, #0
-	subs	r2, r2, #1
-
-LSYM(Lml_u):
-	@ Overflow?
-	bgt	LSYM(Lml_o)
-
-	@ Check if denormalized result is possible, otherwise return signed 0.
-	cmn	r2, #(24 + 1)
-	do_it	le, t
-	bicle	r0, r0, #0x7fffffff
-	RETc(le)
-
-	@ Shift value right, round, etc.
-	rsb	r2, r2, #0
-	movs	r1, r0, lsl #1
-	shift1	lsr, r1, r1, r2
-	rsb	r2, r2, #32
-	shift1	lsl, ip, r0, r2
-	movs	r0, r1, rrx
-	adc	r0, r0, #0
-	orrs	r3, r3, ip, lsl #1
-	do_it	eq
-	biceq	r0, r0, ip, lsr #31
-	RET
-
-	@ One or both arguments are denormalized.
-	@ Scale them leftwards and preserve sign bit.
-LSYM(Lml_d):
-	teq	r2, #0
-	and	ip, r0, #0x80000000
-1:	do_it	eq, tt
-	moveq	r0, r0, lsl #1
-	tsteq	r0, #0x00800000
-	subeq	r2, r2, #1
-	beq	1b
-	orr	r0, r0, ip
-	teq	r3, #0
-	and	ip, r1, #0x80000000
-2:	do_it	eq, tt
-	moveq	r1, r1, lsl #1
-	tsteq	r1, #0x00800000
-	subeq	r3, r3, #1
-	beq	2b
-	orr	r1, r1, ip
-	b	LSYM(Lml_x)
-
-LSYM(Lml_s):
-	@ Isolate the INF and NAN cases away
-	and	r3, ip, r1, lsr #23
-	teq	r2, ip
-	do_it	ne
-	teqne	r3, ip
-	beq	1f
-
-	@ Here, one or more arguments are either denormalized or zero.
-	bics	ip, r0, #0x80000000
-	do_it	ne
-	COND(bic,s,ne)	ip, r1, #0x80000000
-	bne	LSYM(Lml_d)
-
-	@ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
-	eor	r0, r0, r1
-	bic	r0, r0, #0x7fffffff
-	RET
-
-1:	@ One or both args are INF or NAN.
-	teq	r0, #0x0
-	do_it	ne, ett
-	teqne	r0, #0x80000000
-	moveq	r0, r1
-	teqne	r1, #0x0
-	teqne	r1, #0x80000000
-	beq	LSYM(Lml_n)		@ 0 * INF or INF * 0 -> NAN
-	teq	r2, ip
-	bne	1f
-	movs	r2, r0, lsl #9
-	bne	LSYM(Lml_n)		@ NAN * <anything> -> NAN
-1:	teq	r3, ip
-	bne	LSYM(Lml_i)
-	movs	r3, r1, lsl #9
-	do_it	ne
-	movne	r0, r1
-	bne	LSYM(Lml_n)		@ <anything> * NAN -> NAN
-
-	@ Result is INF, but we need to determine its sign.
-LSYM(Lml_i):
-	eor	r0, r0, r1
-
-	@ Overflow: return INF (sign already in r0).
-LSYM(Lml_o):
-	and	r0, r0, #0x80000000
-	orr	r0, r0, #0x7f000000
-	orr	r0, r0, #0x00800000
-	RET
-
-	@ Return a quiet NAN.
-LSYM(Lml_n):
-	orr	r0, r0, #0x7f000000
-	orr	r0, r0, #0x00c00000
-	RET
-
-	FUNC_END aeabi_fmul
-	FUNC_END mulsf3
-
-ARM_FUNC_START divsf3
-ARM_FUNC_ALIAS aeabi_fdiv divsf3
-
-	@ Mask out exponents, trap any zero/denormal/INF/NAN.
-	mov	ip, #0xff
-	ands	r2, ip, r0, lsr #23
-	do_it	ne, tt
-	COND(and,s,ne)	r3, ip, r1, lsr #23
-	teqne	r2, ip
-	teqne	r3, ip
-	beq	LSYM(Ldv_s)
-LSYM(Ldv_x):
-
-	@ Substract divisor exponent from dividend''s
-	sub	r2, r2, r3
-
-	@ Preserve final sign into ip.
-	eor	ip, r0, r1
-
-	@ Convert mantissa to unsigned integer.
-	@ Dividend -> r3, divisor -> r1.
-	movs	r1, r1, lsl #9
-	mov	r0, r0, lsl #9
-	beq	LSYM(Ldv_1)
-	mov	r3, #0x10000000
-	orr	r1, r3, r1, lsr #4
-	orr	r3, r3, r0, lsr #4
-
-	@ Initialize r0 (result) with final sign bit.
-	and	r0, ip, #0x80000000
-
-	@ Ensure result will land to known bit position.
-	@ Apply exponent bias accordingly.
-	cmp	r3, r1
-	do_it	cc
-	movcc	r3, r3, lsl #1
-	adc	r2, r2, #(127 - 2)
-
-	@ The actual division loop.
-	mov	ip, #0x00800000
-1:	cmp	r3, r1
-	do_it	cs, t
-	subcs	r3, r3, r1
-	orrcs	r0, r0, ip
-	cmp	r3, r1, lsr #1
-	do_it	cs, t
-	subcs	r3, r3, r1, lsr #1
-	orrcs	r0, r0, ip, lsr #1
-	cmp	r3, r1, lsr #2
-	do_it	cs, t
-	subcs	r3, r3, r1, lsr #2
-	orrcs	r0, r0, ip, lsr #2
-	cmp	r3, r1, lsr #3
-	do_it	cs, t
-	subcs	r3, r3, r1, lsr #3
-	orrcs	r0, r0, ip, lsr #3
-	movs	r3, r3, lsl #4
-	do_it	ne
-	COND(mov,s,ne)	ip, ip, lsr #4
-	bne	1b
-
-	@ Check exponent for under/overflow.
-	cmp	r2, #(254 - 1)
-	bhi	LSYM(Lml_u)
-
-	@ Round the result, merge final exponent.
-	cmp	r3, r1
-	adc	r0, r0, r2, lsl #23
-	do_it	eq
-	biceq	r0, r0, #1
-	RET
-
-	@ Division by 0x1p*: let''s shortcut a lot of code.
-LSYM(Ldv_1):
-	and	ip, ip, #0x80000000
-	orr	r0, ip, r0, lsr #9
-	adds	r2, r2, #127
-	do_it	gt, tt
-	COND(rsb,s,gt)	r3, r2, #255
-	orrgt	r0, r0, r2, lsl #23
-	RETc(gt)
-
-	orr	r0, r0, #0x00800000
-	mov	r3, #0
-	subs	r2, r2, #1
-	b	LSYM(Lml_u)
-
-	@ One or both arguments are denormalized.
-	@ Scale them leftwards and preserve sign bit.
-LSYM(Ldv_d):
-	teq	r2, #0
-	and	ip, r0, #0x80000000
-1:	do_it	eq, tt
-	moveq	r0, r0, lsl #1
-	tsteq	r0, #0x00800000
-	subeq	r2, r2, #1
-	beq	1b
-	orr	r0, r0, ip
-	teq	r3, #0
-	and	ip, r1, #0x80000000
-2:	do_it	eq, tt
-	moveq	r1, r1, lsl #1
-	tsteq	r1, #0x00800000
-	subeq	r3, r3, #1
-	beq	2b
-	orr	r1, r1, ip
-	b	LSYM(Ldv_x)
-
-	@ One or both arguments are either INF, NAN, zero or denormalized.
-LSYM(Ldv_s):
-	and	r3, ip, r1, lsr #23
-	teq	r2, ip
-	bne	1f
-	movs	r2, r0, lsl #9
-	bne	LSYM(Lml_n)		@ NAN / <anything> -> NAN
-	teq	r3, ip
-	bne	LSYM(Lml_i)		@ INF / <anything> -> INF
-	mov	r0, r1
-	b	LSYM(Lml_n)		@ INF / (INF or NAN) -> NAN
-1:	teq	r3, ip
-	bne	2f
-	movs	r3, r1, lsl #9
-	beq	LSYM(Lml_z)		@ <anything> / INF -> 0
-	mov	r0, r1
-	b	LSYM(Lml_n)		@ <anything> / NAN -> NAN
-2:	@ If both are nonzero, we need to normalize and resume above.
-	bics	ip, r0, #0x80000000
-	do_it	ne
-	COND(bic,s,ne)	ip, r1, #0x80000000
-	bne	LSYM(Ldv_d)
-	@ One or both arguments are zero.
-	bics	r2, r0, #0x80000000
-	bne	LSYM(Lml_i)		@ <non_zero> / 0 -> INF
-	bics	r3, r1, #0x80000000
-	bne	LSYM(Lml_z)		@ 0 / <non_zero> -> 0
-	b	LSYM(Lml_n)		@ 0 / 0 -> NAN
-
-	FUNC_END aeabi_fdiv
-	FUNC_END divsf3
-
-#endif /* L_muldivsf3 */
-
-#ifdef L_arm_cmpsf2
-
-	@ The return value in r0 is
-	@
-	@   0  if the operands are equal
-	@   1  if the first operand is greater than the second, or
-	@      the operands are unordered and the operation is
-	@      CMP, LT, LE, NE, or EQ.
-	@   -1 if the first operand is less than the second, or
-	@      the operands are unordered and the operation is GT
-	@      or GE.
-	@
-	@ The Z flag will be set iff the operands are equal.
-	@
-	@ The following registers are clobbered by this function:
-	@   ip, r0, r1, r2, r3
-
-ARM_FUNC_START gtsf2
-ARM_FUNC_ALIAS gesf2 gtsf2
-	mov	ip, #-1
-	b	1f
-
-ARM_FUNC_START ltsf2
-ARM_FUNC_ALIAS lesf2 ltsf2
-	mov	ip, #1
-	b	1f
-
-ARM_FUNC_START cmpsf2
-ARM_FUNC_ALIAS nesf2 cmpsf2
-ARM_FUNC_ALIAS eqsf2 cmpsf2
-	mov	ip, #1			@ how should we specify unordered here?
-
-1:	str	ip, [sp, #-4]!
-
-	@ Trap any INF/NAN first.
-	mov	r2, r0, lsl #1
-	mov	r3, r1, lsl #1
-	mvns	ip, r2, asr #24
-	do_it	ne
-	COND(mvn,s,ne)	ip, r3, asr #24
-	beq	3f
-
-	@ Compare values.
-	@ Note that 0.0 is equal to -0.0.
-2:	add	sp, sp, #4
-	orrs	ip, r2, r3, lsr #1	@ test if both are 0, clear C flag
-	do_it	ne
-	teqne	r0, r1			@ if not 0 compare sign
-	do_it	pl
-	COND(sub,s,pl)	r0, r2, r3		@ if same sign compare values, set r0
-
-	@ Result:
-	do_it	hi
-	movhi	r0, r1, asr #31
-	do_it	lo
-	mvnlo	r0, r1, asr #31
-	do_it	ne
-	orrne	r0, r0, #1
-	RET
-
-	@ Look for a NAN. 
-3:	mvns	ip, r2, asr #24
-	bne	4f
-	movs	ip, r0, lsl #9
-	bne	5f			@ r0 is NAN
-4:	mvns	ip, r3, asr #24
-	bne	2b
-	movs	ip, r1, lsl #9
-	beq	2b			@ r1 is not NAN
-5:	ldr	r0, [sp], #4		@ return unordered code.
-	RET
-
-	FUNC_END gesf2
-	FUNC_END gtsf2
-	FUNC_END lesf2
-	FUNC_END ltsf2
-	FUNC_END nesf2
-	FUNC_END eqsf2
-	FUNC_END cmpsf2
-
-ARM_FUNC_START aeabi_cfrcmple
-
-	mov	ip, r0
-	mov	r0, r1
-	mov	r1, ip
-	b	6f
-
-ARM_FUNC_START aeabi_cfcmpeq
-ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
-
-	@ The status-returning routines are required to preserve all
-	@ registers except ip, lr, and cpsr.
-6:	do_push	{r0, r1, r2, r3, lr}
-	ARM_CALL cmpsf2
-	@ Set the Z flag correctly, and the C flag unconditionally.
-	cmp	r0, #0
-	@ Clear the C flag if the return value was -1, indicating
-	@ that the first operand was smaller than the second.
-	do_it	mi
-	cmnmi	r0, #0
-	RETLDM	"r0, r1, r2, r3"
-
-	FUNC_END aeabi_cfcmple
-	FUNC_END aeabi_cfcmpeq
-	FUNC_END aeabi_cfrcmple
-
-ARM_FUNC_START	aeabi_fcmpeq
-
-	str	lr, [sp, #-8]!
-	ARM_CALL aeabi_cfcmple
-	do_it	eq, e
-	moveq	r0, #1	@ Equal to.
-	movne	r0, #0	@ Less than, greater than, or unordered.
-	RETLDM
-
-	FUNC_END aeabi_fcmpeq
-
-ARM_FUNC_START	aeabi_fcmplt
-
-	str	lr, [sp, #-8]!
-	ARM_CALL aeabi_cfcmple
-	do_it	cc, e
-	movcc	r0, #1	@ Less than.
-	movcs	r0, #0	@ Equal to, greater than, or unordered.
-	RETLDM
-
-	FUNC_END aeabi_fcmplt
-
-ARM_FUNC_START	aeabi_fcmple
-
-	str	lr, [sp, #-8]!
-	ARM_CALL aeabi_cfcmple
-	do_it	ls, e
-	movls	r0, #1  @ Less than or equal to.
-	movhi	r0, #0	@ Greater than or unordered.
-	RETLDM
-
-	FUNC_END aeabi_fcmple
-
-ARM_FUNC_START	aeabi_fcmpge
-
-	str	lr, [sp, #-8]!
-	ARM_CALL aeabi_cfrcmple
-	do_it	ls, e
-	movls	r0, #1	@ Operand 2 is less than or equal to operand 1.
-	movhi	r0, #0	@ Operand 2 greater than operand 1, or unordered.
-	RETLDM
-
-	FUNC_END aeabi_fcmpge
-
-ARM_FUNC_START	aeabi_fcmpgt
-
-	str	lr, [sp, #-8]!
-	ARM_CALL aeabi_cfrcmple
-	do_it	cc, e
-	movcc	r0, #1	@ Operand 2 is less than operand 1.
-	movcs	r0, #0  @ Operand 2 is greater than or equal to operand 1,
-			@ or they are unordered.
-	RETLDM
-
-	FUNC_END aeabi_fcmpgt
-
-#endif /* L_cmpsf2 */
-
-#ifdef L_arm_unordsf2
-
-ARM_FUNC_START unordsf2
-ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
-
-	mov	r2, r0, lsl #1
-	mov	r3, r1, lsl #1
-	mvns	ip, r2, asr #24
-	bne	1f
-	movs	ip, r0, lsl #9
-	bne	3f			@ r0 is NAN
-1:	mvns	ip, r3, asr #24
-	bne	2f
-	movs	ip, r1, lsl #9
-	bne	3f			@ r1 is NAN
-2:	mov	r0, #0			@ arguments are ordered.
-	RET
-3:	mov	r0, #1			@ arguments are unordered.
-	RET
-
-	FUNC_END aeabi_fcmpun
-	FUNC_END unordsf2
-
-#endif /* L_unordsf2 */
-
-#ifdef L_arm_fixsfsi
-
-ARM_FUNC_START fixsfsi
-ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
-
-	@ check exponent range.
-	mov	r2, r0, lsl #1
-	cmp	r2, #(127 << 24)
-	bcc	1f			@ value is too small
-	mov	r3, #(127 + 31)
-	subs	r2, r3, r2, lsr #24
-	bls	2f			@ value is too large
-
-	@ scale value
-	mov	r3, r0, lsl #8
-	orr	r3, r3, #0x80000000
-	tst	r0, #0x80000000		@ the sign bit
-	shift1	lsr, r0, r3, r2
-	do_it	ne
-	rsbne	r0, r0, #0
-	RET
-
-1:	mov	r0, #0
-	RET
-
-2:	cmp	r2, #(127 + 31 - 0xff)
-	bne	3f
-	movs	r2, r0, lsl #9
-	bne	4f			@ r0 is NAN.
-3:	ands	r0, r0, #0x80000000	@ the sign bit
-	do_it	eq
-	moveq	r0, #0x7fffffff		@ the maximum signed positive si
-	RET
-
-4:	mov	r0, #0			@ What should we convert NAN to?
-	RET
-
-	FUNC_END aeabi_f2iz
-	FUNC_END fixsfsi
-
-#endif /* L_fixsfsi */
-
-#ifdef L_arm_fixunssfsi
-
-ARM_FUNC_START fixunssfsi
-ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
-
-	@ check exponent range.
-	movs	r2, r0, lsl #1
-	bcs	1f			@ value is negative
-	cmp	r2, #(127 << 24)
-	bcc	1f			@ value is too small
-	mov	r3, #(127 + 31)
-	subs	r2, r3, r2, lsr #24
-	bmi	2f			@ value is too large
-
-	@ scale the value
-	mov	r3, r0, lsl #8
-	orr	r3, r3, #0x80000000
-	shift1	lsr, r0, r3, r2
-	RET
-
-1:	mov	r0, #0
-	RET
-
-2:	cmp	r2, #(127 + 31 - 0xff)
-	bne	3f
-	movs	r2, r0, lsl #9
-	bne	4f			@ r0 is NAN.
-3:	mov	r0, #0xffffffff		@ maximum unsigned si
-	RET
-
-4:	mov	r0, #0			@ What should we convert NAN to?
-	RET
-
-	FUNC_END aeabi_f2uiz
-	FUNC_END fixunssfsi
-
-#endif /* L_fixunssfsi */
diff --git a/gcc/config/arm/lib1funcs.asm b/gcc/config/arm/lib1funcs.asm
deleted file mode 100644
index 2e76c01df4b..00000000000
--- a/gcc/config/arm/lib1funcs.asm
+++ /dev/null
@@ -1,1829 +0,0 @@
-@ libgcc routines for ARM cpu.
-@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
-
-/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007, 2008,
-   2009, 2010 Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/* An executable stack is *not* required for these functions.  */
-#if defined(__ELF__) && defined(__linux__)
-.section .note.GNU-stack,"",%progbits
-.previous
-#endif  /* __ELF__ and __linux__ */
-
-#ifdef __ARM_EABI__
-/* Some attributes that are common to all routines in this file.  */
-	/* Tag_ABI_align_needed: This code does not require 8-byte
-	   alignment from the caller.  */
-	/* .eabi_attribute 24, 0  -- default setting.  */
-	/* Tag_ABI_align_preserved: This code preserves 8-byte
-	   alignment in any callee.  */
-	.eabi_attribute 25, 1
-#endif /* __ARM_EABI__ */
-/* ------------------------------------------------------------------------ */
-
-/* We need to know what prefix to add to function names.  */
-
-#ifndef __USER_LABEL_PREFIX__
-#error  __USER_LABEL_PREFIX__ not defined
-#endif
-
-/* ANSI concatenation macros.  */
-
-#define CONCAT1(a, b) CONCAT2(a, b)
-#define CONCAT2(a, b) a ## b
-
-/* Use the right prefix for global labels.  */
-
-#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
-
-#ifdef __ELF__
-#ifdef __thumb__
-#define __PLT__  /* Not supported in Thumb assembler (for now).  */
-#elif defined __vxworks && !defined __PIC__
-#define __PLT__ /* Not supported by the kernel loader.  */
-#else
-#define __PLT__ (PLT)
-#endif
-#define TYPE(x) .type SYM(x),function
-#define SIZE(x) .size SYM(x), . - SYM(x)
-#define LSYM(x) .x
-#else
-#define __PLT__
-#define TYPE(x)
-#define SIZE(x)
-#define LSYM(x) x
-#endif
-
-/* Function end macros.  Variants for interworking.  */
-
-#if defined(__ARM_ARCH_2__)
-# define __ARM_ARCH__ 2
-#endif
-
-#if defined(__ARM_ARCH_3__)
-# define __ARM_ARCH__ 3
-#endif
-
-#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
-	|| defined(__ARM_ARCH_4T__)
-/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
-   long multiply instructions.  That includes v3M.  */
-# define __ARM_ARCH__ 4
-#endif
-	
-#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
-	|| defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
-	|| defined(__ARM_ARCH_5TEJ__)
-# define __ARM_ARCH__ 5
-#endif
-
-#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
-	|| defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
-	|| defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
-	|| defined(__ARM_ARCH_6M__)
-# define __ARM_ARCH__ 6
-#endif
-
-#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
-	|| defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
-	|| defined(__ARM_ARCH_7EM__)
-# define __ARM_ARCH__ 7
-#endif
-
-#ifndef __ARM_ARCH__
-#error Unable to determine architecture.
-#endif
-
-/* There are times when we might prefer Thumb1 code even if ARM code is
-   permitted, for example, the code might be smaller, or there might be
-   interworking problems with switching to ARM state if interworking is
-   disabled.  */
-#if (defined(__thumb__)			\
-     && !defined(__thumb2__)		\
-     && (!defined(__THUMB_INTERWORK__)	\
-	 || defined (__OPTIMIZE_SIZE__)	\
-	 || defined(__ARM_ARCH_6M__)))
-# define __prefer_thumb__
-#endif
-
-/* How to return from a function call depends on the architecture variant.  */
-
-#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
-
-# define RET		bx	lr
-# define RETc(x)	bx##x	lr
-
-/* Special precautions for interworking on armv4t.  */
-# if (__ARM_ARCH__ == 4)
-
-/* Always use bx, not ldr pc.  */
-#  if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
-#    define __INTERWORKING__
-#   endif /* __THUMB__ || __THUMB_INTERWORK__ */
-
-/* Include thumb stub before arm mode code.  */
-#  if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
-#   define __INTERWORKING_STUBS__
-#  endif /* __thumb__ && !__THUMB_INTERWORK__ */
-
-#endif /* __ARM_ARCH == 4 */
-
-#else
-
-# define RET		mov	pc, lr
-# define RETc(x)	mov##x	pc, lr
-
-#endif
-
-.macro	cfi_pop		advance, reg, cfa_offset
-#ifdef __ELF__
-	.pushsection	.debug_frame
-	.byte	0x4		/* DW_CFA_advance_loc4 */
-	.4byte	\advance
-	.byte	(0xc0 | \reg)	/* DW_CFA_restore */
-	.byte	0xe		/* DW_CFA_def_cfa_offset */
-	.uleb128 \cfa_offset
-	.popsection
-#endif
-.endm
-.macro	cfi_push	advance, reg, offset, cfa_offset
-#ifdef __ELF__
-	.pushsection	.debug_frame
-	.byte	0x4		/* DW_CFA_advance_loc4 */
-	.4byte	\advance
-	.byte	(0x80 | \reg)	/* DW_CFA_offset */
-	.uleb128 (\offset / -4)
-	.byte	0xe		/* DW_CFA_def_cfa_offset */
-	.uleb128 \cfa_offset
-	.popsection
-#endif
-.endm
-.macro cfi_start	start_label, end_label
-#ifdef __ELF__
-	.pushsection	.debug_frame
-LSYM(Lstart_frame):
-	.4byte	LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
-LSYM(Lstart_cie):
-        .4byte	0xffffffff	@ CIE Identifier Tag
-        .byte	0x1	@ CIE Version
-        .ascii	"\0"	@ CIE Augmentation
-        .uleb128 0x1	@ CIE Code Alignment Factor
-        .sleb128 -4	@ CIE Data Alignment Factor
-        .byte	0xe	@ CIE RA Column
-        .byte	0xc	@ DW_CFA_def_cfa
-        .uleb128 0xd
-        .uleb128 0x0
-
-	.align 2
-LSYM(Lend_cie):
-	.4byte	LSYM(Lend_fde)-LSYM(Lstart_fde)	@ FDE Length
-LSYM(Lstart_fde):
-	.4byte	LSYM(Lstart_frame)	@ FDE CIE offset
-	.4byte	\start_label	@ FDE initial location
-	.4byte	\end_label-\start_label	@ FDE address range
-	.popsection
-#endif
-.endm
-.macro cfi_end	end_label
-#ifdef __ELF__
-	.pushsection	.debug_frame
-	.align	2
-LSYM(Lend_fde):
-	.popsection
-\end_label:
-#endif
-.endm
-
-/* Don't pass dirn, it's there just to get token pasting right.  */
-
-.macro	RETLDM	regs=, cond=, unwind=, dirn=ia
-#if defined (__INTERWORKING__)
-	.ifc "\regs",""
-	ldr\cond	lr, [sp], #8
-	.else
-# if defined(__thumb2__)
-	pop\cond	{\regs, lr}
-# else
-	ldm\cond\dirn	sp!, {\regs, lr}
-# endif
-	.endif
-	.ifnc "\unwind", ""
-	/* Mark LR as restored.  */
-97:	cfi_pop 97b - \unwind, 0xe, 0x0
-	.endif
-	bx\cond	lr
-#else
-	/* Caller is responsible for providing IT instruction.  */
-	.ifc "\regs",""
-	ldr\cond	pc, [sp], #8
-	.else
-# if defined(__thumb2__)
-	pop\cond	{\regs, pc}
-# else
-	ldm\cond\dirn	sp!, {\regs, pc}
-# endif
-	.endif
-#endif
-.endm
-
-/* The Unified assembly syntax allows the same code to be assembled for both
-   ARM and Thumb-2.  However this is only supported by recent gas, so define
-   a set of macros to allow ARM code on older assemblers.  */
-#if defined(__thumb2__)
-.macro do_it cond, suffix=""
-	it\suffix	\cond
-.endm
-.macro shift1 op, arg0, arg1, arg2
-	\op	\arg0, \arg1, \arg2
-.endm
-#define do_push	push
-#define do_pop	pop
-#define COND(op1, op2, cond) op1 ## op2 ## cond
-/* Perform an arithmetic operation with a variable shift operand.  This
-   requires two instructions and a scratch register on Thumb-2.  */
-.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
-	\shiftop \tmp, \src2, \shiftreg
-	\name \dest, \src1, \tmp
-.endm
-#else
-.macro do_it cond, suffix=""
-.endm
-.macro shift1 op, arg0, arg1, arg2
-	mov	\arg0, \arg1, \op \arg2
-.endm
-#define do_push	stmfd sp!,
-#define do_pop	ldmfd sp!,
-#define COND(op1, op2, cond) op1 ## cond ## op2
-.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
-	\name \dest, \src1, \src2, \shiftop \shiftreg
-.endm
-#endif
-
-#ifdef __ARM_EABI__
-.macro ARM_LDIV0 name signed
-	cmp	r0, #0
-	.ifc	\signed, unsigned
-	movne	r0, #0xffffffff
-	.else
-	movgt	r0, #0x7fffffff
-	movlt	r0, #0x80000000
-	.endif
-	b	SYM (__aeabi_idiv0) __PLT__
-.endm
-#else
-.macro ARM_LDIV0 name signed
-	str	lr, [sp, #-8]!
-98:	cfi_push 98b - __\name, 0xe, -0x8, 0x8
-	bl	SYM (__div0) __PLT__
-	mov	r0, #0			@ About as wrong as it could be.
-	RETLDM	unwind=98b
-.endm
-#endif
-
-
-#ifdef __ARM_EABI__
-.macro THUMB_LDIV0 name signed
-#if defined(__ARM_ARCH_6M__)
-	.ifc \signed, unsigned
-	cmp	r0, #0
-	beq	1f
-	mov	r0, #0
-	mvn	r0, r0		@ 0xffffffff
-1:
-	.else
-	cmp	r0, #0
-	beq	2f
-	blt	3f
-	mov	r0, #0
-	mvn	r0, r0
-	lsr	r0, r0, #1	@ 0x7fffffff
-	b	2f
-3:	mov	r0, #0x80
-	lsl	r0, r0, #24	@ 0x80000000
-2:
-	.endif
-	push	{r0, r1, r2}
-	ldr	r0, 4f
-	adr	r1, 4f
-	add	r0, r1
-	str	r0, [sp, #8]
-	@ We know we are not on armv4t, so pop pc is safe.
-	pop	{r0, r1, pc}
-	.align	2
-4:
-	.word	__aeabi_idiv0 - 4b
-#elif defined(__thumb2__)
-	.syntax unified
-	.ifc \signed, unsigned
-	cbz	r0, 1f
-	mov	r0, #0xffffffff
-1:
-	.else
-	cmp	r0, #0
-	do_it	gt
-	movgt	r0, #0x7fffffff
-	do_it	lt
-	movlt	r0, #0x80000000
-	.endif
-	b.w	SYM(__aeabi_idiv0) __PLT__
-#else
-	.align	2
-	bx	pc
-	nop
-	.arm
-	cmp	r0, #0
-	.ifc	\signed, unsigned
-	movne	r0, #0xffffffff
-	.else
-	movgt	r0, #0x7fffffff
-	movlt	r0, #0x80000000
-	.endif
-	b	SYM(__aeabi_idiv0) __PLT__
-	.thumb
-#endif
-.endm
-#else
-.macro THUMB_LDIV0 name signed
-	push	{ r1, lr }
-98:	cfi_push 98b - __\name, 0xe, -0x4, 0x8
-	bl	SYM (__div0)
-	mov	r0, #0			@ About as wrong as it could be.
-#if defined (__INTERWORKING__)
-	pop	{ r1, r2 }
-	bx	r2
-#else
-	pop	{ r1, pc }
-#endif
-.endm
-#endif
-
-.macro FUNC_END name
-	SIZE (__\name)
-.endm
-
-.macro DIV_FUNC_END name signed
-	cfi_start	__\name, LSYM(Lend_div0)
-LSYM(Ldiv0):
-#ifdef __thumb__
-	THUMB_LDIV0 \name \signed
-#else
-	ARM_LDIV0 \name \signed
-#endif
-	cfi_end	LSYM(Lend_div0)
-	FUNC_END \name
-.endm
-
-.macro THUMB_FUNC_START name
-	.globl	SYM (\name)
-	TYPE	(\name)
-	.thumb_func
-SYM (\name):
-.endm
-
-/* Function start macros.  Variants for ARM and Thumb.  */
-
-#ifdef __thumb__
-#define THUMB_FUNC .thumb_func
-#define THUMB_CODE .force_thumb
-# if defined(__thumb2__)
-#define THUMB_SYNTAX .syntax divided
-# else
-#define THUMB_SYNTAX
-# endif
-#else
-#define THUMB_FUNC
-#define THUMB_CODE
-#define THUMB_SYNTAX
-#endif
-
-.macro FUNC_START name
-	.text
-	.globl SYM (__\name)
-	TYPE (__\name)
-	.align 0
-	THUMB_CODE
-	THUMB_FUNC
-	THUMB_SYNTAX
-SYM (__\name):
-.endm
-
-/* Special function that will always be coded in ARM assembly, even if
-   in Thumb-only compilation.  */
-
-#if defined(__thumb2__)
-
-/* For Thumb-2 we build everything in thumb mode.  */
-.macro ARM_FUNC_START name
-       FUNC_START \name
-       .syntax unified
-.endm
-#define EQUIV .thumb_set
-.macro  ARM_CALL name
-	bl	__\name
-.endm
-
-#elif defined(__INTERWORKING_STUBS__)
-
-.macro	ARM_FUNC_START name
-	FUNC_START \name
-	bx	pc
-	nop
-	.arm
-/* A hook to tell gdb that we've switched to ARM mode.  Also used to call
-   directly from other local arm routines.  */
-_L__\name:		
-.endm
-#define EQUIV .thumb_set
-/* Branch directly to a function declared with ARM_FUNC_START.
-   Must be called in arm mode.  */
-.macro  ARM_CALL name
-	bl	_L__\name
-.endm
-
-#else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
-
-#ifdef __ARM_ARCH_6M__
-#define EQUIV .thumb_set
-#else
-.macro	ARM_FUNC_START name
-	.text
-	.globl SYM (__\name)
-	TYPE (__\name)
-	.align 0
-	.arm
-SYM (__\name):
-.endm
-#define EQUIV .set
-.macro  ARM_CALL name
-	bl	__\name
-.endm
-#endif
-
-#endif
-
-.macro	FUNC_ALIAS new old
-	.globl	SYM (__\new)
-#if defined (__thumb__)
-	.thumb_set	SYM (__\new), SYM (__\old)
-#else
-	.set	SYM (__\new), SYM (__\old)
-#endif
-.endm
-
-#ifndef __ARM_ARCH_6M__
-.macro	ARM_FUNC_ALIAS new old
-	.globl	SYM (__\new)
-	EQUIV	SYM (__\new), SYM (__\old)
-#if defined(__INTERWORKING_STUBS__)
-	.set	SYM (_L__\new), SYM (_L__\old)
-#endif
-.endm
-#endif
-
-#ifdef __ARMEB__
-#define xxh r0
-#define xxl r1
-#define yyh r2
-#define yyl r3
-#else
-#define xxh r1
-#define xxl r0
-#define yyh r3
-#define yyl r2
-#endif	
-
-#ifdef __ARM_EABI__
-.macro	WEAK name
-	.weak SYM (__\name)
-.endm
-#endif
-
-#ifdef __thumb__
-/* Register aliases.  */
-
-work		.req	r4	@ XXXX is this safe ?
-dividend	.req	r0
-divisor		.req	r1
-overdone	.req	r2
-result		.req	r2
-curbit		.req	r3
-#endif
-#if 0
-ip		.req	r12
-sp		.req	r13
-lr		.req	r14
-pc		.req	r15
-#endif
-
-/* ------------------------------------------------------------------------ */
-/*		Bodies of the division and modulo routines.		    */
-/* ------------------------------------------------------------------------ */	
-.macro ARM_DIV_BODY dividend, divisor, result, curbit
-
-#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
-
-#if defined (__thumb2__)
-	clz	\curbit, \dividend
-	clz	\result, \divisor
-	sub	\curbit, \result, \curbit
-	rsb	\curbit, \curbit, #31
-	adr	\result, 1f
-	add	\curbit, \result, \curbit, lsl #4
-	mov	\result, #0
-	mov	pc, \curbit
-.p2align 3
-1:
-	.set	shift, 32
-	.rept	32
-	.set	shift, shift - 1
-	cmp.w	\dividend, \divisor, lsl #shift
-	nop.n
-	adc.w	\result, \result, \result
-	it	cs
-	subcs.w	\dividend, \dividend, \divisor, lsl #shift
-	.endr
-#else
-	clz	\curbit, \dividend
-	clz	\result, \divisor
-	sub	\curbit, \result, \curbit
-	rsbs	\curbit, \curbit, #31
-	addne	\curbit, \curbit, \curbit, lsl #1
-	mov	\result, #0
-	addne	pc, pc, \curbit, lsl #2
-	nop
-	.set	shift, 32
-	.rept	32
-	.set	shift, shift - 1
-	cmp	\dividend, \divisor, lsl #shift
-	adc	\result, \result, \result
-	subcs	\dividend, \dividend, \divisor, lsl #shift
-	.endr
-#endif
-
-#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
-#if __ARM_ARCH__ >= 5
-
-	clz	\curbit, \divisor
-	clz	\result, \dividend
-	sub	\result, \curbit, \result
-	mov	\curbit, #1
-	mov	\divisor, \divisor, lsl \result
-	mov	\curbit, \curbit, lsl \result
-	mov	\result, #0
-	
-#else /* __ARM_ARCH__ < 5 */
-
-	@ Initially shift the divisor left 3 bits if possible,
-	@ set curbit accordingly.  This allows for curbit to be located
-	@ at the left end of each 4-bit nibbles in the division loop
-	@ to save one loop in most cases.
-	tst	\divisor, #0xe0000000
-	moveq	\divisor, \divisor, lsl #3
-	moveq	\curbit, #8
-	movne	\curbit, #1
-
-	@ Unless the divisor is very big, shift it up in multiples of
-	@ four bits, since this is the amount of unwinding in the main
-	@ division loop.  Continue shifting until the divisor is 
-	@ larger than the dividend.
-1:	cmp	\divisor, #0x10000000
-	cmplo	\divisor, \dividend
-	movlo	\divisor, \divisor, lsl #4
-	movlo	\curbit, \curbit, lsl #4
-	blo	1b
-
-	@ For very big divisors, we must shift it a bit at a time, or
-	@ we will be in danger of overflowing.
-1:	cmp	\divisor, #0x80000000
-	cmplo	\divisor, \dividend
-	movlo	\divisor, \divisor, lsl #1
-	movlo	\curbit, \curbit, lsl #1
-	blo	1b
-
-	mov	\result, #0
-
-#endif /* __ARM_ARCH__ < 5 */
-
-	@ Division loop
-1:	cmp	\dividend, \divisor
-	do_it	hs, t
-	subhs	\dividend, \dividend, \divisor
-	orrhs	\result,   \result,   \curbit
-	cmp	\dividend, \divisor,  lsr #1
-	do_it	hs, t
-	subhs	\dividend, \dividend, \divisor, lsr #1
-	orrhs	\result,   \result,   \curbit,  lsr #1
-	cmp	\dividend, \divisor,  lsr #2
-	do_it	hs, t
-	subhs	\dividend, \dividend, \divisor, lsr #2
-	orrhs	\result,   \result,   \curbit,  lsr #2
-	cmp	\dividend, \divisor,  lsr #3
-	do_it	hs, t
-	subhs	\dividend, \dividend, \divisor, lsr #3
-	orrhs	\result,   \result,   \curbit,  lsr #3
-	cmp	\dividend, #0			@ Early termination?
-	do_it	ne, t
-	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
-	movne	\divisor,  \divisor, lsr #4
-	bne	1b
-
-#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
-
-.endm
-/* ------------------------------------------------------------------------ */	
-.macro ARM_DIV2_ORDER divisor, order
-
-#if __ARM_ARCH__ >= 5
-
-	clz	\order, \divisor
-	rsb	\order, \order, #31
-
-#else
-
-	cmp	\divisor, #(1 << 16)
-	movhs	\divisor, \divisor, lsr #16
-	movhs	\order, #16
-	movlo	\order, #0
-
-	cmp	\divisor, #(1 << 8)
-	movhs	\divisor, \divisor, lsr #8
-	addhs	\order, \order, #8
-
-	cmp	\divisor, #(1 << 4)
-	movhs	\divisor, \divisor, lsr #4
-	addhs	\order, \order, #4
-
-	cmp	\divisor, #(1 << 2)
-	addhi	\order, \order, #3
-	addls	\order, \order, \divisor, lsr #1
-
-#endif
-
-.endm
-/* ------------------------------------------------------------------------ */
-.macro ARM_MOD_BODY dividend, divisor, order, spare
-
-#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
-
-	clz	\order, \divisor
-	clz	\spare, \dividend
-	sub	\order, \order, \spare
-	rsbs	\order, \order, #31
-	addne	pc, pc, \order, lsl #3
-	nop
-	.set	shift, 32
-	.rept	32
-	.set	shift, shift - 1
-	cmp	\dividend, \divisor, lsl #shift
-	subcs	\dividend, \dividend, \divisor, lsl #shift
-	.endr
-
-#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
-#if __ARM_ARCH__ >= 5
-
-	clz	\order, \divisor
-	clz	\spare, \dividend
-	sub	\order, \order, \spare
-	mov	\divisor, \divisor, lsl \order
-	
-#else /* __ARM_ARCH__ < 5 */
-
-	mov	\order, #0
-
-	@ Unless the divisor is very big, shift it up in multiples of
-	@ four bits, since this is the amount of unwinding in the main
-	@ division loop.  Continue shifting until the divisor is 
-	@ larger than the dividend.
-1:	cmp	\divisor, #0x10000000
-	cmplo	\divisor, \dividend
-	movlo	\divisor, \divisor, lsl #4
-	addlo	\order, \order, #4
-	blo	1b
-
-	@ For very big divisors, we must shift it a bit at a time, or
-	@ we will be in danger of overflowing.
-1:	cmp	\divisor, #0x80000000
-	cmplo	\divisor, \dividend
-	movlo	\divisor, \divisor, lsl #1
-	addlo	\order, \order, #1
-	blo	1b
-
-#endif /* __ARM_ARCH__ < 5 */
-
-	@ Perform all needed substractions to keep only the reminder.
-	@ Do comparisons in batch of 4 first.
-	subs	\order, \order, #3		@ yes, 3 is intended here
-	blt	2f
-
-1:	cmp	\dividend, \divisor
-	subhs	\dividend, \dividend, \divisor
-	cmp	\dividend, \divisor,  lsr #1
-	subhs	\dividend, \dividend, \divisor, lsr #1
-	cmp	\dividend, \divisor,  lsr #2
-	subhs	\dividend, \dividend, \divisor, lsr #2
-	cmp	\dividend, \divisor,  lsr #3
-	subhs	\dividend, \dividend, \divisor, lsr #3
-	cmp	\dividend, #1
-	mov	\divisor, \divisor, lsr #4
-	subges	\order, \order, #4
-	bge	1b
-
-	tst	\order, #3
-	teqne	\dividend, #0
-	beq	5f
-
-	@ Either 1, 2 or 3 comparison/substractions are left.
-2:	cmn	\order, #2
-	blt	4f
-	beq	3f
-	cmp	\dividend, \divisor
-	subhs	\dividend, \dividend, \divisor
-	mov	\divisor,  \divisor,  lsr #1
-3:	cmp	\dividend, \divisor
-	subhs	\dividend, \dividend, \divisor
-	mov	\divisor,  \divisor,  lsr #1
-4:	cmp	\dividend, \divisor
-	subhs	\dividend, \dividend, \divisor
-5:
-
-#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
-
-.endm
-/* ------------------------------------------------------------------------ */
-.macro THUMB_DIV_MOD_BODY modulo
-	@ Load the constant 0x10000000 into our work register.
-	mov	work, #1
-	lsl	work, #28
-LSYM(Loop1):
-	@ Unless the divisor is very big, shift it up in multiples of
-	@ four bits, since this is the amount of unwinding in the main
-	@ division loop.  Continue shifting until the divisor is 
-	@ larger than the dividend.
-	cmp	divisor, work
-	bhs	LSYM(Lbignum)
-	cmp	divisor, dividend
-	bhs	LSYM(Lbignum)
-	lsl	divisor, #4
-	lsl	curbit,  #4
-	b	LSYM(Loop1)
-LSYM(Lbignum):
-	@ Set work to 0x80000000
-	lsl	work, #3
-LSYM(Loop2):
-	@ For very big divisors, we must shift it a bit at a time, or
-	@ we will be in danger of overflowing.
-	cmp	divisor, work
-	bhs	LSYM(Loop3)
-	cmp	divisor, dividend
-	bhs	LSYM(Loop3)
-	lsl	divisor, #1
-	lsl	curbit,  #1
-	b	LSYM(Loop2)
-LSYM(Loop3):
-	@ Test for possible subtractions ...
-  .if \modulo
-	@ ... On the final pass, this may subtract too much from the dividend, 
-	@ so keep track of which subtractions are done, we can fix them up 
-	@ afterwards.
-	mov	overdone, #0
-	cmp	dividend, divisor
-	blo	LSYM(Lover1)
-	sub	dividend, dividend, divisor
-LSYM(Lover1):
-	lsr	work, divisor, #1
-	cmp	dividend, work
-	blo	LSYM(Lover2)
-	sub	dividend, dividend, work
-	mov	ip, curbit
-	mov	work, #1
-	ror	curbit, work
-	orr	overdone, curbit
-	mov	curbit, ip
-LSYM(Lover2):
-	lsr	work, divisor, #2
-	cmp	dividend, work
-	blo	LSYM(Lover3)
-	sub	dividend, dividend, work
-	mov	ip, curbit
-	mov	work, #2
-	ror	curbit, work
-	orr	overdone, curbit
-	mov	curbit, ip
-LSYM(Lover3):
-	lsr	work, divisor, #3
-	cmp	dividend, work
-	blo	LSYM(Lover4)
-	sub	dividend, dividend, work
-	mov	ip, curbit
-	mov	work, #3
-	ror	curbit, work
-	orr	overdone, curbit
-	mov	curbit, ip
-LSYM(Lover4):
-	mov	ip, curbit
-  .else
-	@ ... and note which bits are done in the result.  On the final pass,
-	@ this may subtract too much from the dividend, but the result will be ok,
-	@ since the "bit" will have been shifted out at the bottom.
-	cmp	dividend, divisor
-	blo	LSYM(Lover1)
-	sub	dividend, dividend, divisor
-	orr	result, result, curbit
-LSYM(Lover1):
-	lsr	work, divisor, #1
-	cmp	dividend, work
-	blo	LSYM(Lover2)
-	sub	dividend, dividend, work
-	lsr	work, curbit, #1
-	orr	result, work
-LSYM(Lover2):
-	lsr	work, divisor, #2
-	cmp	dividend, work
-	blo	LSYM(Lover3)
-	sub	dividend, dividend, work
-	lsr	work, curbit, #2
-	orr	result, work
-LSYM(Lover3):
-	lsr	work, divisor, #3
-	cmp	dividend, work
-	blo	LSYM(Lover4)
-	sub	dividend, dividend, work
-	lsr	work, curbit, #3
-	orr	result, work
-LSYM(Lover4):
-  .endif
-	
-	cmp	dividend, #0			@ Early termination?
-	beq	LSYM(Lover5)
-	lsr	curbit,  #4			@ No, any more bits to do?
-	beq	LSYM(Lover5)
-	lsr	divisor, #4
-	b	LSYM(Loop3)
-LSYM(Lover5):
-  .if \modulo
-	@ Any subtractions that we should not have done will be recorded in
-	@ the top three bits of "overdone".  Exactly which were not needed
-	@ are governed by the position of the bit, stored in ip.
-	mov	work, #0xe
-	lsl	work, #28
-	and	overdone, work
-	beq	LSYM(Lgot_result)
-	
-	@ If we terminated early, because dividend became zero, then the 
-	@ bit in ip will not be in the bottom nibble, and we should not
-	@ perform the additions below.  We must test for this though
-	@ (rather relying upon the TSTs to prevent the additions) since
-	@ the bit in ip could be in the top two bits which might then match
-	@ with one of the smaller RORs.
-	mov	curbit, ip
-	mov	work, #0x7
-	tst	curbit, work
-	beq	LSYM(Lgot_result)
-	
-	mov	curbit, ip
-	mov	work, #3
-	ror	curbit, work
-	tst	overdone, curbit
-	beq	LSYM(Lover6)
-	lsr	work, divisor, #3
-	add	dividend, work
-LSYM(Lover6):
-	mov	curbit, ip
-	mov	work, #2
-	ror	curbit, work
-	tst	overdone, curbit
-	beq	LSYM(Lover7)
-	lsr	work, divisor, #2
-	add	dividend, work
-LSYM(Lover7):
-	mov	curbit, ip
-	mov	work, #1
-	ror	curbit, work
-	tst	overdone, curbit
-	beq	LSYM(Lgot_result)
-	lsr	work, divisor, #1
-	add	dividend, work
-  .endif
-LSYM(Lgot_result):
-.endm	
-/* ------------------------------------------------------------------------ */
-/*		Start of the Real Functions				    */
-/* ------------------------------------------------------------------------ */
-#ifdef L_udivsi3
-
-#if defined(__prefer_thumb__)
-
-	FUNC_START udivsi3
-	FUNC_ALIAS aeabi_uidiv udivsi3
-
-	cmp	divisor, #0
-	beq	LSYM(Ldiv0)
-LSYM(udivsi3_skip_div0_test):
-	mov	curbit, #1
-	mov	result, #0
-	
-	push	{ work }
-	cmp	dividend, divisor
-	blo	LSYM(Lgot_result)
-
-	THUMB_DIV_MOD_BODY 0
-	
-	mov	r0, result
-	pop	{ work }
-	RET
-
-#else /* ARM version/Thumb-2.  */
-
-	ARM_FUNC_START udivsi3
-	ARM_FUNC_ALIAS aeabi_uidiv udivsi3
-
-	/* Note: if called via udivsi3_skip_div0_test, this will unnecessarily
-	   check for division-by-zero a second time.  */
-LSYM(udivsi3_skip_div0_test):
-	subs	r2, r1, #1
-	do_it	eq
-	RETc(eq)
-	bcc	LSYM(Ldiv0)
-	cmp	r0, r1
-	bls	11f
-	tst	r1, r2
-	beq	12f
-	
-	ARM_DIV_BODY r0, r1, r2, r3
-	
-	mov	r0, r2
-	RET	
-
-11:	do_it	eq, e
-	moveq	r0, #1
-	movne	r0, #0
-	RET
-
-12:	ARM_DIV2_ORDER r1, r2
-
-	mov	r0, r0, lsr r2
-	RET
-
-#endif /* ARM version */
-
-	DIV_FUNC_END udivsi3 unsigned
-
-#if defined(__prefer_thumb__)
-FUNC_START aeabi_uidivmod
-	cmp	r1, #0
-	beq	LSYM(Ldiv0)
-	push	{r0, r1, lr}
-	bl	LSYM(udivsi3_skip_div0_test)
-	POP	{r1, r2, r3}
-	mul	r2, r0
-	sub	r1, r1, r2
-	bx	r3
-#else
-ARM_FUNC_START aeabi_uidivmod
-	cmp	r1, #0
-	beq	LSYM(Ldiv0)
-	stmfd	sp!, { r0, r1, lr }
-	bl	LSYM(udivsi3_skip_div0_test)
-	ldmfd	sp!, { r1, r2, lr }
-	mul	r3, r2, r0
-	sub	r1, r1, r3
-	RET
-#endif
-	FUNC_END aeabi_uidivmod
-	
-#endif /* L_udivsi3 */
-/* ------------------------------------------------------------------------ */
-#ifdef L_umodsi3
-
-	FUNC_START umodsi3
-
-#ifdef __thumb__
-
-	cmp	divisor, #0
-	beq	LSYM(Ldiv0)
-	mov	curbit, #1
-	cmp	dividend, divisor
-	bhs	LSYM(Lover10)
-	RET	
-
-LSYM(Lover10):
-	push	{ work }
-
-	THUMB_DIV_MOD_BODY 1
-	
-	pop	{ work }
-	RET
-	
-#else  /* ARM version.  */
-	
-	subs	r2, r1, #1			@ compare divisor with 1
-	bcc	LSYM(Ldiv0)
-	cmpne	r0, r1				@ compare dividend with divisor
-	moveq   r0, #0
-	tsthi	r1, r2				@ see if divisor is power of 2
-	andeq	r0, r0, r2
-	RETc(ls)
-
-	ARM_MOD_BODY r0, r1, r2, r3
-	
-	RET	
-
-#endif /* ARM version.  */
-	
-	DIV_FUNC_END umodsi3 unsigned
-
-#endif /* L_umodsi3 */
-/* ------------------------------------------------------------------------ */
-#ifdef L_divsi3
-
-#if defined(__prefer_thumb__)
-
-	FUNC_START divsi3	
-	FUNC_ALIAS aeabi_idiv divsi3
-
-	cmp	divisor, #0
-	beq	LSYM(Ldiv0)
-LSYM(divsi3_skip_div0_test):
-	push	{ work }
-	mov	work, dividend
-	eor	work, divisor		@ Save the sign of the result.
-	mov	ip, work
-	mov	curbit, #1
-	mov	result, #0
-	cmp	divisor, #0
-	bpl	LSYM(Lover10)
-	neg	divisor, divisor	@ Loops below use unsigned.
-LSYM(Lover10):
-	cmp	dividend, #0
-	bpl	LSYM(Lover11)
-	neg	dividend, dividend
-LSYM(Lover11):
-	cmp	dividend, divisor
-	blo	LSYM(Lgot_result)
-
-	THUMB_DIV_MOD_BODY 0
-	
-	mov	r0, result
-	mov	work, ip
-	cmp	work, #0
-	bpl	LSYM(Lover12)
-	neg	r0, r0
-LSYM(Lover12):
-	pop	{ work }
-	RET
-
-#else /* ARM/Thumb-2 version.  */
-	
-	ARM_FUNC_START divsi3	
-	ARM_FUNC_ALIAS aeabi_idiv divsi3
-
-	cmp	r1, #0
-	beq	LSYM(Ldiv0)
-LSYM(divsi3_skip_div0_test):
-	eor	ip, r0, r1			@ save the sign of the result.
-	do_it	mi
-	rsbmi	r1, r1, #0			@ loops below use unsigned.
-	subs	r2, r1, #1			@ division by 1 or -1 ?
-	beq	10f
-	movs	r3, r0
-	do_it	mi
-	rsbmi	r3, r0, #0			@ positive dividend value
-	cmp	r3, r1
-	bls	11f
-	tst	r1, r2				@ divisor is power of 2 ?
-	beq	12f
-
-	ARM_DIV_BODY r3, r1, r0, r2
-	
-	cmp	ip, #0
-	do_it	mi
-	rsbmi	r0, r0, #0
-	RET	
-
-10:	teq	ip, r0				@ same sign ?
-	do_it	mi
-	rsbmi	r0, r0, #0
-	RET	
-
-11:	do_it	lo
-	movlo	r0, #0
-	do_it	eq,t
-	moveq	r0, ip, asr #31
-	orreq	r0, r0, #1
-	RET
-
-12:	ARM_DIV2_ORDER r1, r2
-
-	cmp	ip, #0
-	mov	r0, r3, lsr r2
-	do_it	mi
-	rsbmi	r0, r0, #0
-	RET
-
-#endif /* ARM version */
-	
-	DIV_FUNC_END divsi3 signed
-
-#if defined(__prefer_thumb__)
-FUNC_START aeabi_idivmod
-	cmp	r1, #0
-	beq	LSYM(Ldiv0)
-	push	{r0, r1, lr}
-	bl	LSYM(divsi3_skip_div0_test)
-	POP	{r1, r2, r3}
-	mul	r2, r0
-	sub	r1, r1, r2
-	bx	r3
-#else
-ARM_FUNC_START aeabi_idivmod
-	cmp	r1, #0
-	beq	LSYM(Ldiv0)
-	stmfd	sp!, { r0, r1, lr }
-	bl	LSYM(divsi3_skip_div0_test)
-	ldmfd	sp!, { r1, r2, lr }
-	mul	r3, r2, r0
-	sub	r1, r1, r3
-	RET
-#endif
-	FUNC_END aeabi_idivmod
-	
-#endif /* L_divsi3 */
-/* ------------------------------------------------------------------------ */
-#ifdef L_modsi3
-
-	FUNC_START modsi3
-
-#ifdef __thumb__
-
-	mov	curbit, #1
-	cmp	divisor, #0
-	beq	LSYM(Ldiv0)
-	bpl	LSYM(Lover10)
-	neg	divisor, divisor		@ Loops below use unsigned.
-LSYM(Lover10):
-	push	{ work }
-	@ Need to save the sign of the dividend, unfortunately, we need
-	@ work later on.  Must do this after saving the original value of
-	@ the work register, because we will pop this value off first.
-	push	{ dividend }
-	cmp	dividend, #0
-	bpl	LSYM(Lover11)
-	neg	dividend, dividend
-LSYM(Lover11):
-	cmp	dividend, divisor
-	blo	LSYM(Lgot_result)
-
-	THUMB_DIV_MOD_BODY 1
-		
-	pop	{ work }
-	cmp	work, #0
-	bpl	LSYM(Lover12)
-	neg	dividend, dividend
-LSYM(Lover12):
-	pop	{ work }
-	RET	
-
-#else /* ARM version.  */
-	
-	cmp	r1, #0
-	beq	LSYM(Ldiv0)
-	rsbmi	r1, r1, #0			@ loops below use unsigned.
-	movs	ip, r0				@ preserve sign of dividend
-	rsbmi	r0, r0, #0			@ if negative make positive
-	subs	r2, r1, #1			@ compare divisor with 1
-	cmpne	r0, r1				@ compare dividend with divisor
-	moveq	r0, #0
-	tsthi	r1, r2				@ see if divisor is power of 2
-	andeq	r0, r0, r2
-	bls	10f
-
-	ARM_MOD_BODY r0, r1, r2, r3
-
-10:	cmp	ip, #0
-	rsbmi	r0, r0, #0
-	RET	
-
-#endif /* ARM version */
-	
-	DIV_FUNC_END modsi3 signed
-
-#endif /* L_modsi3 */
-/* ------------------------------------------------------------------------ */
-#ifdef L_dvmd_tls
-
-#ifdef __ARM_EABI__
-	WEAK aeabi_idiv0
-	WEAK aeabi_ldiv0
-	FUNC_START aeabi_idiv0
-	FUNC_START aeabi_ldiv0
-	RET
-	FUNC_END aeabi_ldiv0
-	FUNC_END aeabi_idiv0
-#else
-	FUNC_START div0
-	RET
-	FUNC_END div0
-#endif
-	
-#endif /* L_divmodsi_tools */
-/* ------------------------------------------------------------------------ */
-#ifdef L_dvmd_lnx
-@ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
-
-/* Constant taken from <asm/signal.h>.  */
-#define SIGFPE	8
-
-#ifdef __ARM_EABI__
-	WEAK aeabi_idiv0
-	WEAK aeabi_ldiv0
-	ARM_FUNC_START aeabi_idiv0
-	ARM_FUNC_START aeabi_ldiv0
-#else
-	ARM_FUNC_START div0
-#endif
-
-	do_push	{r1, lr}
-	mov	r0, #SIGFPE
-	bl	SYM(raise) __PLT__
-	RETLDM	r1
-
-#ifdef __ARM_EABI__
-	FUNC_END aeabi_ldiv0
-	FUNC_END aeabi_idiv0
-#else
-	FUNC_END div0
-#endif
-	
-#endif /* L_dvmd_lnx */
-#ifdef L_clear_cache
-#if defined __ARM_EABI__ && defined __linux__
-@ EABI GNU/Linux call to cacheflush syscall.
-	ARM_FUNC_START clear_cache
-	do_push	{r7}
-#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__)
-	movw	r7, #2
-	movt	r7, #0xf
-#else
-	mov	r7, #0xf0000
-	add	r7, r7, #2
-#endif
-	mov	r2, #0
-	swi	0
-	do_pop	{r7}
-	RET
-	FUNC_END clear_cache
-#else
-#error "This is only for ARM EABI GNU/Linux"
-#endif
-#endif /* L_clear_cache */
-/* ------------------------------------------------------------------------ */
-/* Dword shift operations.  */
-/* All the following Dword shift variants rely on the fact that
-	shft xxx, Reg
-   is in fact done as
-	shft xxx, (Reg & 255)
-   so for Reg value in (32...63) and (-1...-31) we will get zero (in the
-   case of logical shifts) or the sign (for asr).  */
-
-#ifdef __ARMEB__
-#define al	r1
-#define ah	r0
-#else
-#define al	r0
-#define ah	r1
-#endif
-
-/* Prevent __aeabi double-word shifts from being produced on SymbianOS.  */
-#ifndef __symbian__
-
-#ifdef L_lshrdi3
-
-	FUNC_START lshrdi3
-	FUNC_ALIAS aeabi_llsr lshrdi3
-	
-#ifdef __thumb__
-	lsr	al, r2
-	mov	r3, ah
-	lsr	ah, r2
-	mov	ip, r3
-	sub	r2, #32
-	lsr	r3, r2
-	orr	al, r3
-	neg	r2, r2
-	mov	r3, ip
-	lsl	r3, r2
-	orr	al, r3
-	RET
-#else
-	subs	r3, r2, #32
-	rsb	ip, r2, #32
-	movmi	al, al, lsr r2
-	movpl	al, ah, lsr r3
-	orrmi	al, al, ah, lsl ip
-	mov	ah, ah, lsr r2
-	RET
-#endif
-	FUNC_END aeabi_llsr
-	FUNC_END lshrdi3
-
-#endif
-	
-#ifdef L_ashrdi3
-	
-	FUNC_START ashrdi3
-	FUNC_ALIAS aeabi_lasr ashrdi3
-	
-#ifdef __thumb__
-	lsr	al, r2
-	mov	r3, ah
-	asr	ah, r2
-	sub	r2, #32
-	@ If r2 is negative at this point the following step would OR
-	@ the sign bit into all of AL.  That's not what we want...
-	bmi	1f
-	mov	ip, r3
-	asr	r3, r2
-	orr	al, r3
-	mov	r3, ip
-1:
-	neg	r2, r2
-	lsl	r3, r2
-	orr	al, r3
-	RET
-#else
-	subs	r3, r2, #32
-	rsb	ip, r2, #32
-	movmi	al, al, lsr r2
-	movpl	al, ah, asr r3
-	orrmi	al, al, ah, lsl ip
-	mov	ah, ah, asr r2
-	RET
-#endif
-
-	FUNC_END aeabi_lasr
-	FUNC_END ashrdi3
-
-#endif
-
-#ifdef L_ashldi3
-
-	FUNC_START ashldi3
-	FUNC_ALIAS aeabi_llsl ashldi3
-	
-#ifdef __thumb__
-	lsl	ah, r2
-	mov	r3, al
-	lsl	al, r2
-	mov	ip, r3
-	sub	r2, #32
-	lsl	r3, r2
-	orr	ah, r3
-	neg	r2, r2
-	mov	r3, ip
-	lsr	r3, r2
-	orr	ah, r3
-	RET
-#else
-	subs	r3, r2, #32
-	rsb	ip, r2, #32
-	movmi	ah, ah, lsl r2
-	movpl	ah, al, lsl r3
-	orrmi	ah, ah, al, lsr ip
-	mov	al, al, lsl r2
-	RET
-#endif
-	FUNC_END aeabi_llsl
-	FUNC_END ashldi3
-
-#endif
-
-#endif /* __symbian__ */
-
-#if ((__ARM_ARCH__ > 5) && !defined(__ARM_ARCH_6M__)) \
-    || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
-    || defined(__ARM_ARCH_5TEJ__)
-#define HAVE_ARM_CLZ 1
-#endif
-
-#ifdef L_clzsi2
-#if defined(__ARM_ARCH_6M__)
-FUNC_START clzsi2
-	mov	r1, #28
-	mov	r3, #1
-	lsl	r3, r3, #16
-	cmp	r0, r3 /* 0x10000 */
-	bcc	2f
-	lsr	r0, r0, #16
-	sub	r1, r1, #16
-2:	lsr	r3, r3, #8
-	cmp	r0, r3 /* #0x100 */
-	bcc	2f
-	lsr	r0, r0, #8
-	sub	r1, r1, #8
-2:	lsr	r3, r3, #4
-	cmp	r0, r3 /* #0x10 */
-	bcc	2f
-	lsr	r0, r0, #4
-	sub	r1, r1, #4
-2:	adr	r2, 1f
-	ldrb	r0, [r2, r0]
-	add	r0, r0, r1
-	bx lr
-.align 2
-1:
-.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
-	FUNC_END clzsi2
-#else
-ARM_FUNC_START clzsi2
-# if defined(HAVE_ARM_CLZ)
-	clz	r0, r0
-	RET
-# else
-	mov	r1, #28
-	cmp	r0, #0x10000
-	do_it	cs, t
-	movcs	r0, r0, lsr #16
-	subcs	r1, r1, #16
-	cmp	r0, #0x100
-	do_it	cs, t
-	movcs	r0, r0, lsr #8
-	subcs	r1, r1, #8
-	cmp	r0, #0x10
-	do_it	cs, t
-	movcs	r0, r0, lsr #4
-	subcs	r1, r1, #4
-	adr	r2, 1f
-	ldrb	r0, [r2, r0]
-	add	r0, r0, r1
-	RET
-.align 2
-1:
-.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
-# endif /* !HAVE_ARM_CLZ */
-	FUNC_END clzsi2
-#endif
-#endif /* L_clzsi2 */
-
-#ifdef L_clzdi2
-#if !defined(HAVE_ARM_CLZ)
-
-# if defined(__ARM_ARCH_6M__)
-FUNC_START clzdi2
-	push	{r4, lr}
-# else
-ARM_FUNC_START clzdi2
-	do_push	{r4, lr}
-# endif
-	cmp	xxh, #0
-	bne	1f
-# ifdef __ARMEB__
-	mov	r0, xxl
-	bl	__clzsi2
-	add	r0, r0, #32
-	b 2f
-1:
-	bl	__clzsi2
-# else
-	bl	__clzsi2
-	add	r0, r0, #32
-	b 2f
-1:
-	mov	r0, xxh
-	bl	__clzsi2
-# endif
-2:
-# if defined(__ARM_ARCH_6M__)
-	pop	{r4, pc}
-# else
-	RETLDM	r4
-# endif
-	FUNC_END clzdi2
-
-#else /* HAVE_ARM_CLZ */
-
-ARM_FUNC_START clzdi2
-	cmp	xxh, #0
-	do_it	eq, et
-	clzeq	r0, xxl
-	clzne	r0, xxh
-	addeq	r0, r0, #32
-	RET
-	FUNC_END clzdi2
-
-#endif
-#endif /* L_clzdi2 */
-
-/* ------------------------------------------------------------------------ */
-/* These next two sections are here despite the fact that they contain Thumb 
-   assembler because their presence allows interworked code to be linked even
-   when the GCC library is this one.  */
-		
-/* Do not build the interworking functions when the target architecture does 
-   not support Thumb instructions.  (This can be a multilib option).  */
-#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
-      || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
-      || __ARM_ARCH__ >= 6
-
-#if defined L_call_via_rX
-
-/* These labels & instructions are used by the Arm/Thumb interworking code. 
-   The address of function to be called is loaded into a register and then 
-   one of these labels is called via a BL instruction.  This puts the 
-   return address into the link register with the bottom bit set, and the 
-   code here switches to the correct mode before executing the function.  */
-	
-	.text
-	.align 0
-        .force_thumb
-
-.macro call_via register
-	THUMB_FUNC_START _call_via_\register
-
-	bx	\register
-	nop
-
-	SIZE	(_call_via_\register)
-.endm
-
-	call_via r0
-	call_via r1
-	call_via r2
-	call_via r3
-	call_via r4
-	call_via r5
-	call_via r6
-	call_via r7
-	call_via r8
-	call_via r9
-	call_via sl
-	call_via fp
-	call_via ip
-	call_via sp
-	call_via lr
-
-#endif /* L_call_via_rX */
-
-/* Don't bother with the old interworking routines for Thumb-2.  */
-/* ??? Maybe only omit these on "m" variants.  */
-#if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__)
-
-#if defined L_interwork_call_via_rX
-
-/* These labels & instructions are used by the Arm/Thumb interworking code,
-   when the target address is in an unknown instruction set.  The address 
-   of function to be called is loaded into a register and then one of these
-   labels is called via a BL instruction.  This puts the return address 
-   into the link register with the bottom bit set, and the code here 
-   switches to the correct mode before executing the function.  Unfortunately
-   the target code cannot be relied upon to return via a BX instruction, so
-   instead we have to store the resturn address on the stack and allow the
-   called function to return here instead.  Upon return we recover the real
-   return address and use a BX to get back to Thumb mode.
-
-   There are three variations of this code.  The first,
-   _interwork_call_via_rN(), will push the return address onto the
-   stack and pop it in _arm_return().  It should only be used if all
-   arguments are passed in registers.
-
-   The second, _interwork_r7_call_via_rN(), instead stores the return
-   address at [r7, #-4].  It is the caller's responsibility to ensure
-   that this address is valid and contains no useful data.
-
-   The third, _interwork_r11_call_via_rN(), works in the same way but
-   uses r11 instead of r7.  It is useful if the caller does not really
-   need a frame pointer.  */
-	
-	.text
-	.align 0
-
-	.code   32
-	.globl _arm_return
-LSYM(Lstart_arm_return):
-	cfi_start	LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
-	cfi_push	0, 0xe, -0x8, 0x8
-	nop	@ This nop is for the benefit of debuggers, so that
-		@ backtraces will use the correct unwind information.
-_arm_return:
-	RETLDM	unwind=LSYM(Lstart_arm_return)
-	cfi_end	LSYM(Lend_arm_return)
-
-	.globl _arm_return_r7
-_arm_return_r7:
-	ldr	lr, [r7, #-4]
-	bx	lr
-
-	.globl _arm_return_r11
-_arm_return_r11:
-	ldr	lr, [r11, #-4]
-	bx	lr
-
-.macro interwork_with_frame frame, register, name, return
-	.code	16
-
-	THUMB_FUNC_START \name
-
-	bx	pc
-	nop
-
-	.code	32
-	tst	\register, #1
-	streq	lr, [\frame, #-4]
-	adreq	lr, _arm_return_\frame
-	bx	\register
-
-	SIZE	(\name)
-.endm
-
-.macro interwork register
-	.code	16
-
-	THUMB_FUNC_START _interwork_call_via_\register
-
-	bx	pc
-	nop
-
-	.code	32
-	.globl LSYM(Lchange_\register)
-LSYM(Lchange_\register):
-	tst	\register, #1
-	streq	lr, [sp, #-8]!
-	adreq	lr, _arm_return
-	bx	\register
-
-	SIZE	(_interwork_call_via_\register)
-
-	interwork_with_frame r7,\register,_interwork_r7_call_via_\register
-	interwork_with_frame r11,\register,_interwork_r11_call_via_\register
-.endm
-	
-	interwork r0
-	interwork r1
-	interwork r2
-	interwork r3
-	interwork r4
-	interwork r5
-	interwork r6
-	interwork r7
-	interwork r8
-	interwork r9
-	interwork sl
-	interwork fp
-	interwork ip
-	interwork sp
-	
-	/* The LR case has to be handled a little differently...  */
-	.code 16
-
-	THUMB_FUNC_START _interwork_call_via_lr
-
-	bx 	pc
-	nop
-	
-	.code 32
-	.globl .Lchange_lr
-.Lchange_lr:
-	tst	lr, #1
-	stmeqdb	r13!, {lr, pc}
-	mov	ip, lr
-	adreq	lr, _arm_return
-	bx	ip
-	
-	SIZE	(_interwork_call_via_lr)
-	
-#endif /* L_interwork_call_via_rX */
-#endif /* !__thumb2__ */
-
-/* Functions to support compact pic switch tables in thumb1 state.
-   All these routines take an index into the table in r0.  The
-   table is at LR & ~1 (but this must be rounded up in the case
-   of 32-bit entires).  They are only permitted to clobber r12
-   and r14 and r0 must be preserved on exit.  */
-#ifdef L_thumb1_case_sqi
-	
-	.text
-	.align 0
-        .force_thumb
-	.syntax unified
-	THUMB_FUNC_START __gnu_thumb1_case_sqi
-	push	{r1}
-	mov	r1, lr
-	lsrs	r1, r1, #1
-	lsls	r1, r1, #1
-	ldrsb	r1, [r1, r0]
-	lsls	r1, r1, #1
-	add	lr, lr, r1
-	pop	{r1}
-	bx	lr
-	SIZE (__gnu_thumb1_case_sqi)
-#endif
-
-#ifdef L_thumb1_case_uqi
-	
-	.text
-	.align 0
-        .force_thumb
-	.syntax unified
-	THUMB_FUNC_START __gnu_thumb1_case_uqi
-	push	{r1}
-	mov	r1, lr
-	lsrs	r1, r1, #1
-	lsls	r1, r1, #1
-	ldrb	r1, [r1, r0]
-	lsls	r1, r1, #1
-	add	lr, lr, r1
-	pop	{r1}
-	bx	lr
-	SIZE (__gnu_thumb1_case_uqi)
-#endif
-
-#ifdef L_thumb1_case_shi
-	
-	.text
-	.align 0
-        .force_thumb
-	.syntax unified
-	THUMB_FUNC_START __gnu_thumb1_case_shi
-	push	{r0, r1}
-	mov	r1, lr
-	lsrs	r1, r1, #1
-	lsls	r0, r0, #1
-	lsls	r1, r1, #1
-	ldrsh	r1, [r1, r0]
-	lsls	r1, r1, #1
-	add	lr, lr, r1
-	pop	{r0, r1}
-	bx	lr
-	SIZE (__gnu_thumb1_case_shi)
-#endif
-
-#ifdef L_thumb1_case_uhi
-	
-	.text
-	.align 0
-        .force_thumb
-	.syntax unified
-	THUMB_FUNC_START __gnu_thumb1_case_uhi
-	push	{r0, r1}
-	mov	r1, lr
-	lsrs	r1, r1, #1
-	lsls	r0, r0, #1
-	lsls	r1, r1, #1
-	ldrh	r1, [r1, r0]
-	lsls	r1, r1, #1
-	add	lr, lr, r1
-	pop	{r0, r1}
-	bx	lr
-	SIZE (__gnu_thumb1_case_uhi)
-#endif
-
-#ifdef L_thumb1_case_si
-	
-	.text
-	.align 0
-        .force_thumb
-	.syntax unified
-	THUMB_FUNC_START __gnu_thumb1_case_si
-	push	{r0, r1}
-	mov	r1, lr
-	adds.n	r1, r1, #2	/* Align to word.  */
-	lsrs	r1, r1, #2
-	lsls	r0, r0, #2
-	lsls	r1, r1, #2
-	ldr	r0, [r1, r0]
-	adds	r0, r0, r1
-	mov	lr, r0
-	pop	{r0, r1}
-	mov	pc, lr		/* We know we were called from thumb code.  */
-	SIZE (__gnu_thumb1_case_si)
-#endif
-
-#endif /* Arch supports thumb.  */
-
-#ifndef __symbian__
-#ifndef __ARM_ARCH_6M__
-#include "ieee754-df.S"
-#include "ieee754-sf.S"
-#include "bpabi.S"
-#else /* __ARM_ARCH_6M__ */
-#include "bpabi-v6m.S"
-#endif /* __ARM_ARCH_6M__ */
-#endif /* !__symbian__ */
diff --git a/gcc/config/arm/linux-eabi.h b/gcc/config/arm/linux-eabi.h
index a3830955948..80bd8259375 100644
--- a/gcc/config/arm/linux-eabi.h
+++ b/gcc/config/arm/linux-eabi.h
@@ -97,7 +97,7 @@
 #undef LIBGCC_SPEC
 
 /* Clear the instruction cache from `beg' to `end'.  This is
-   implemented in lib1funcs.asm, so ensure an error if this definition
+   implemented in lib1funcs.S, so ensure an error if this definition
    is used.  */
 #undef  CLEAR_INSN_CACHE
 #define CLEAR_INSN_CACHE(BEG, END) not_used
diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm
index b970ec26a35..a9a174d473d 100644
--- a/gcc/config/arm/t-arm
+++ b/gcc/config/arm/t-arm
@@ -40,9 +40,6 @@ MD_INCLUDES= 	$(srcdir)/config/arm/arm-tune.md \
 		$(srcdir)/config/arm/thumb2.md \
 		$(srcdir)/config/arm/arm-fixed.md
 
-LIB1ASMSRC = arm/lib1funcs.asm
-LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi _thumb1_case_shi \
-	_thumb1_case_uhi _thumb1_case_si
 s-config s-conditions s-flags s-codes s-constants s-emit s-recog s-preds \
 	s-opinit s-extract s-peep s-attr s-attrtab s-output: $(MD_INCLUDES)
 
diff --git a/gcc/config/arm/t-arm-elf b/gcc/config/arm/t-arm-elf
index bfcf6ffd939..a605d26244f 100644
--- a/gcc/config/arm/t-arm-elf
+++ b/gcc/config/arm/t-arm-elf
@@ -17,20 +17,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-# For most CPUs we have an assembly soft-float implementations.
-# However this is not true for ARMv6M.  Here we want to use the soft-fp C
-# implementation.  The soft-fp code is only build for ARMv6M.  This pulls
-# in the asm implementation for other CPUs.
-LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func \
-	_call_via_rX _interwork_call_via_rX \
-	_lshrdi3 _ashrdi3 _ashldi3 \
-	_arm_negdf2 _arm_addsubdf3 _arm_muldivdf3 _arm_cmpdf2 _arm_unorddf2 \
-	_arm_fixdfsi _arm_fixunsdfsi \
-	_arm_truncdfsf2 _arm_negsf2 _arm_addsubsf3 _arm_muldivsf3 \
-	_arm_cmpsf2 _arm_unordsf2 _arm_fixsfsi _arm_fixunssfsi \
-	_arm_floatdidf _arm_floatdisf _arm_floatundidf _arm_floatundisf \
-	_clzsi2 _clzdi2 
-
 MULTILIB_OPTIONS     = marm/mthumb
 MULTILIB_DIRNAMES    = arm thumb
 MULTILIB_EXCEPTIONS  = 
diff --git a/gcc/config/arm/t-bpabi b/gcc/config/arm/t-bpabi
index 047525682fc..c9d5ed4d674 100644
--- a/gcc/config/arm/t-bpabi
+++ b/gcc/config/arm/t-bpabi
@@ -16,9 +16,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-# Add the bpabi.S functions.
-LIB1ASMFUNCS += _aeabi_lcmp _aeabi_ulcmp _aeabi_ldivmod _aeabi_uldivmod
-
 # Add the BPABI C functions.
 LIB2FUNCS_EXTRA = $(srcdir)/config/arm/bpabi.c \
 		  $(srcdir)/config/arm/unaligned-funcs.c
diff --git a/gcc/config/arm/t-linux b/gcc/config/arm/t-linux
index a0c5110f0c0..a204834014e 100644
--- a/gcc/config/arm/t-linux
+++ b/gcc/config/arm/t-linux
@@ -21,10 +21,6 @@
 # difference.
 TARGET_LIBGCC2_CFLAGS = -fomit-frame-pointer -fPIC
 
-LIB1ASMSRC = arm/lib1funcs.asm
-LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx _clzsi2 _clzdi2 \
-	_arm_addsubdf3 _arm_addsubsf3
-
 # MULTILIB_OPTIONS = mfloat-abi=hard/mfloat-abi=soft
 # MULTILIB_DIRNAMES = hard-float soft-float
 
diff --git a/gcc/config/arm/t-linux-eabi b/gcc/config/arm/t-linux-eabi
index fed979e980b..3030229fafa 100644
--- a/gcc/config/arm/t-linux-eabi
+++ b/gcc/config/arm/t-linux-eabi
@@ -28,8 +28,5 @@ MULTILIB_DIRNAMES	=
 #MULTILIB_DIRNAMES    += fa606te fa626te fmp626 fa726te
 #MULTILIB_EXCEPTIONS  += *mthumb/*mcpu=fa606te *mthumb/*mcpu=fa626te *mthumb/*mcpu=fmp626 *mthumb/*mcpu=fa726te*
 
-# Use a version of div0 which raises SIGFPE, and a special __clear_cache.
-LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx _clear_cache
-
 LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic.c
 LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic-64bit.c
diff --git a/gcc/config/arm/t-strongarm-elf b/gcc/config/arm/t-strongarm-elf
index 95680031e54..4d51e660c8b 100644
--- a/gcc/config/arm/t-strongarm-elf
+++ b/gcc/config/arm/t-strongarm-elf
@@ -17,8 +17,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _clzsi2 _clzdi2
-
 MULTILIB_OPTIONS     = mlittle-endian/mbig-endian mfloat-abi=hard/mfloat-abi=soft
 MULTILIB_DIRNAMES    = le be fpu soft
 MULTILIB_EXCEPTIONS  =
diff --git a/gcc/config/arm/t-symbian b/gcc/config/arm/t-symbian
index cf716147849..736a01d10f4 100644
--- a/gcc/config/arm/t-symbian
+++ b/gcc/config/arm/t-symbian
@@ -16,20 +16,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMFUNCS += _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
-
-# These functions have __aeabi equivalents and will never be called by GCC.  
-# By putting them in LIB1ASMFUNCS, we avoid the standard libgcc2.c code being
-# used -- and we make sure that definitions are not available in lib1funcs.asm,
-# either, so they end up undefined.
-LIB1ASMFUNCS += \
-	_ashldi3 _ashrdi3 _divdi3 _floatdidf _udivmoddi4 _umoddi3 \
-	_udivdi3 _lshrdi3 _moddi3 _muldi3 _negdi2 _cmpdi2 \
-	_fixdfdi _fixsfdi _fixunsdfdi _fixunssfdi _floatdisf \
-	_negdf2 _addsubdf3 _muldivdf3 _cmpdf2 _unorddf2 _fixdfsi _fixunsdfsi \
-	_truncdfsf2 _negsf2 _addsubsf3 _muldivsf3 _cmpsf2 _unordsf2 \
-	_fixsfsi _fixunssfsi
-
 EXTRA_HEADERS += $(srcdir)/ginclude/unwind-arm-common.h
 # Include half-float helpers.
 LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/arm/fp16.c
diff --git a/gcc/config/arm/t-vxworks b/gcc/config/arm/t-vxworks
index 8ac0d9bcec5..0900ffe15ed 100644
--- a/gcc/config/arm/t-vxworks
+++ b/gcc/config/arm/t-vxworks
@@ -16,8 +16,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
-
 MULTILIB_OPTIONS = \
   mrtp fPIC \
   t4/t4be/t4t/t4tbe/t5/t5be/t5t/t5tbe/tstrongarm/txscale/txscalebe
diff --git a/gcc/config/arm/t-wince-pe b/gcc/config/arm/t-wince-pe
index 9ce1f313140..8a8c65fd396 100644
--- a/gcc/config/arm/t-wince-pe
+++ b/gcc/config/arm/t-wince-pe
@@ -17,8 +17,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
-
 pe.o: $(srcdir)/config/arm/pe.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
   $(RTL_H) output.h flags.h $(TREE_H) expr.h $(TM_P_H)
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
diff --git a/gcc/config/avr/libgcc.S b/gcc/config/avr/libgcc.S
deleted file mode 100644
index 8c369c96a77..00000000000
--- a/gcc/config/avr/libgcc.S
+++ /dev/null
@@ -1,1533 +0,0 @@
-/*  -*- Mode: Asm -*-  */
-/* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009
-   Free Software Foundation, Inc.
-   Contributed by Denis Chertykov <chertykov@gmail.com>
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-#define __zero_reg__ r1
-#define __tmp_reg__ r0
-#define __SREG__ 0x3f
-#define __SP_H__ 0x3e
-#define __SP_L__ 0x3d
-#define __RAMPZ__ 0x3B
-#define __EIND__  0x3C
-
-/* Most of the functions here are called directly from avr.md
-   patterns, instead of using the standard libcall mechanisms.
-   This can make better code because GCC knows exactly which
-   of the call-used registers (not all of them) are clobbered.  */
-
-/* FIXME:  At present, there is no SORT directive in the linker
-           script so that we must not assume that different modules
-           in the same input section like .libgcc.text.mul will be
-           located close together.  Therefore, we cannot use
-           RCALL/RJMP to call a function like __udivmodhi4 from
-           __divmodhi4 and have to use lengthy XCALL/XJMP even
-           though they are in the same input section and all same
-           input sections together are small enough to reach every
-           location with a RCALL/RJMP instruction.  */
-
-	.macro	mov_l  r_dest, r_src
-#if defined (__AVR_HAVE_MOVW__)
-	movw	\r_dest, \r_src
-#else
-	mov	\r_dest, \r_src
-#endif
-	.endm
-
-	.macro	mov_h  r_dest, r_src
-#if defined (__AVR_HAVE_MOVW__)
-	; empty
-#else
-	mov	\r_dest, \r_src
-#endif
-	.endm
-
-#if defined (__AVR_HAVE_JMP_CALL__)
-#define XCALL call
-#define XJMP  jmp
-#else
-#define XCALL rcall
-#define XJMP  rjmp
-#endif
-
-.macro DEFUN name
-.global \name
-.func \name
-\name:
-.endm
-
-.macro ENDF name
-.size \name, .-\name
-.endfunc
-.endm
-
-
-.section .text.libgcc.mul, "ax", @progbits
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-/* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
-#if !defined (__AVR_HAVE_MUL__)
-/*******************************************************
-    Multiplication  8 x 8  without MUL
-*******************************************************/
-#if defined (L_mulqi3)
-
-#define	r_arg2	r22		/* multiplicand */
-#define	r_arg1 	r24		/* multiplier */
-#define r_res	__tmp_reg__	/* result */
-
-DEFUN __mulqi3
-	clr	r_res		; clear result
-__mulqi3_loop:
-	sbrc	r_arg1,0
-	add	r_res,r_arg2
-	add	r_arg2,r_arg2	; shift multiplicand
-	breq	__mulqi3_exit	; while multiplicand != 0
-	lsr	r_arg1		; 
-	brne	__mulqi3_loop	; exit if multiplier = 0
-__mulqi3_exit:	
-	mov	r_arg1,r_res	; result to return register
-	ret
-ENDF __mulqi3
-
-#undef r_arg2  
-#undef r_arg1  
-#undef r_res   
-	
-#endif 	/* defined (L_mulqi3) */
-
-#if defined (L_mulqihi3)
-DEFUN __mulqihi3
-	clr	r25
-	sbrc	r24, 7
-	dec	r25
-	clr	r23
-	sbrc	r22, 7
-	dec	r22
-	XJMP	__mulhi3
-ENDF __mulqihi3:
-#endif /* defined (L_mulqihi3) */
-
-#if defined (L_umulqihi3)
-DEFUN __umulqihi3
-	clr	r25
-	clr	r23
-	XJMP	__mulhi3
-ENDF __umulqihi3
-#endif /* defined (L_umulqihi3) */
-
-/*******************************************************
-    Multiplication  16 x 16  without MUL
-*******************************************************/
-#if defined (L_mulhi3)
-#define	r_arg1L	r24		/* multiplier Low */
-#define	r_arg1H	r25		/* multiplier High */
-#define	r_arg2L	r22		/* multiplicand Low */
-#define	r_arg2H	r23		/* multiplicand High */
-#define r_resL	__tmp_reg__	/* result Low */
-#define r_resH  r21		/* result High */
-
-DEFUN __mulhi3
-	clr	r_resH		; clear result
-	clr	r_resL		; clear result
-__mulhi3_loop:
-	sbrs	r_arg1L,0
-	rjmp	__mulhi3_skip1
-	add	r_resL,r_arg2L	; result + multiplicand
-	adc	r_resH,r_arg2H
-__mulhi3_skip1:	
-	add	r_arg2L,r_arg2L	; shift multiplicand
-	adc	r_arg2H,r_arg2H
-
-	cp	r_arg2L,__zero_reg__
-	cpc	r_arg2H,__zero_reg__
-	breq	__mulhi3_exit	; while multiplicand != 0
-
-	lsr	r_arg1H		; gets LSB of multiplier
-	ror	r_arg1L
-	sbiw	r_arg1L,0
-	brne	__mulhi3_loop	; exit if multiplier = 0
-__mulhi3_exit:
-	mov	r_arg1H,r_resH	; result to return register
-	mov	r_arg1L,r_resL
-	ret
-ENDF __mulhi3
-
-#undef r_arg1L
-#undef r_arg1H
-#undef r_arg2L
-#undef r_arg2H
-#undef r_resL 	
-#undef r_resH 
-
-#endif /* defined (L_mulhi3) */
-
-/*******************************************************
-    Widening Multiplication  32 = 16 x 16  without MUL
-*******************************************************/
-
-#if defined (L_mulhisi3)
-DEFUN __mulhisi3
-;;; FIXME: This is dead code (noone calls it)
-    mov_l   r18, r24
-    mov_h   r19, r25
-    clr     r24
-    sbrc    r23, 7
-    dec     r24
-    mov     r25, r24
-    clr     r20
-    sbrc    r19, 7
-    dec     r20
-    mov     r21, r20
-    XJMP    __mulsi3
-ENDF __mulhisi3
-#endif /* defined (L_mulhisi3) */
-
-#if defined (L_umulhisi3)
-DEFUN __umulhisi3
-;;; FIXME: This is dead code (noone calls it)
-    mov_l   r18, r24
-    mov_h   r19, r25
-    clr     r24
-    clr     r25
-    mov_l   r20, r24
-    mov_h   r21, r25
-    XJMP    __mulsi3
-ENDF __umulhisi3
-#endif /* defined (L_umulhisi3) */
-
-#if defined (L_mulsi3)
-/*******************************************************
-    Multiplication  32 x 32  without MUL
-*******************************************************/
-#define r_arg1L  r22		/* multiplier Low */
-#define r_arg1H  r23
-#define	r_arg1HL r24
-#define	r_arg1HH r25		/* multiplier High */
-
-#define	r_arg2L  r18		/* multiplicand Low */
-#define	r_arg2H  r19	
-#define	r_arg2HL r20
-#define	r_arg2HH r21		/* multiplicand High */
-	
-#define r_resL	 r26		/* result Low */
-#define r_resH   r27
-#define r_resHL	 r30
-#define r_resHH  r31		/* result High */
-
-DEFUN __mulsi3
-	clr	r_resHH		; clear result
-	clr	r_resHL		; clear result
-	clr	r_resH		; clear result
-	clr	r_resL		; clear result
-__mulsi3_loop:
-	sbrs	r_arg1L,0
-	rjmp	__mulsi3_skip1
-	add	r_resL,r_arg2L		; result + multiplicand
-	adc	r_resH,r_arg2H
-	adc	r_resHL,r_arg2HL
-	adc	r_resHH,r_arg2HH
-__mulsi3_skip1:
-	add	r_arg2L,r_arg2L		; shift multiplicand
-	adc	r_arg2H,r_arg2H
-	adc	r_arg2HL,r_arg2HL
-	adc	r_arg2HH,r_arg2HH
-	
-	lsr	r_arg1HH	; gets LSB of multiplier
-	ror	r_arg1HL
-	ror	r_arg1H
-	ror	r_arg1L
-	brne	__mulsi3_loop
-	sbiw	r_arg1HL,0
-	cpc	r_arg1H,r_arg1L
-	brne	__mulsi3_loop		; exit if multiplier = 0
-__mulsi3_exit:
-	mov_h	r_arg1HH,r_resHH	; result to return register
-	mov_l	r_arg1HL,r_resHL
-	mov_h	r_arg1H,r_resH
-	mov_l	r_arg1L,r_resL
-	ret
-ENDF __mulsi3
-
-#undef r_arg1L 
-#undef r_arg1H 
-#undef r_arg1HL
-#undef r_arg1HH
-             
-#undef r_arg2L 
-#undef r_arg2H 
-#undef r_arg2HL
-#undef r_arg2HH
-             
-#undef r_resL  
-#undef r_resH  
-#undef r_resHL 
-#undef r_resHH 
-
-#endif /* defined (L_mulsi3) */
-
-#endif /* !defined (__AVR_HAVE_MUL__) */
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-#if defined (__AVR_HAVE_MUL__)    
-#define A0 26
-#define B0 18
-#define C0 22
-
-#define A1 A0+1
-
-#define B1 B0+1
-#define B2 B0+2
-#define B3 B0+3
-
-#define C1 C0+1
-#define C2 C0+2
-#define C3 C0+3
-
-/*******************************************************
-    Widening Multiplication  32 = 16 x 16
-*******************************************************/
-                              
-#if defined (L_mulhisi3)
-;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
-;;; C3:C0   = (signed long) A1:A0   * (signed long) B1:B0
-;;; Clobbers: __tmp_reg__
-DEFUN __mulhisi3
-    XCALL   __umulhisi3
-    ;; Sign-extend B
-    tst     B1
-    brpl    1f
-    sub     C2, A0
-    sbc     C3, A1
-1:  ;; Sign-extend A
-    XJMP __usmulhisi3_tail
-ENDF __mulhisi3
-#endif /* L_mulhisi3 */
-
-#if defined (L_usmulhisi3)
-;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
-;;; C3:C0   = (signed long) A1:A0   * (unsigned long) B1:B0
-;;; Clobbers: __tmp_reg__
-DEFUN __usmulhisi3
-    XCALL   __umulhisi3
-    ;; FALLTHRU
-ENDF __usmulhisi3
-
-DEFUN __usmulhisi3_tail
-    ;; Sign-extend A
-    sbrs    A1, 7
-    ret
-    sub     C2, B0
-    sbc     C3, B1
-    ret
-ENDF __usmulhisi3_tail
-#endif /* L_usmulhisi3 */
-
-#if defined (L_umulhisi3)
-;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
-;;; C3:C0   = (unsigned long) A1:A0   * (unsigned long) B1:B0
-;;; Clobbers: __tmp_reg__
-DEFUN __umulhisi3
-    mul     A0, B0
-    movw    C0, r0
-    mul     A1, B1
-    movw    C2, r0
-    mul     A0, B1
-    rcall   1f
-    mul     A1, B0
-1:  add     C1, r0
-    adc     C2, r1
-    clr     __zero_reg__
-    adc     C3, __zero_reg__
-    ret
-ENDF __umulhisi3
-#endif /* L_umulhisi3 */
-
-/*******************************************************
-    Widening Multiplication  32 = 16 x 32
-*******************************************************/
-
-#if defined (L_mulshisi3)
-;;; R25:R22 = (signed long) R27:R26 * R21:R18
-;;; (C3:C0) = (signed long) A1:A0   * B3:B0
-;;; Clobbers: __tmp_reg__
-DEFUN __mulshisi3
-#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
-    ;; Some cores have problem skipping 2-word instruction
-    tst     A1
-    brmi    __mulohisi3
-#else
-    sbrs    A1, 7
-#endif /* __AVR_HAVE_JMP_CALL__ */
-    XJMP    __muluhisi3
-    ;; FALLTHRU
-ENDF __mulshisi3
-    
-;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
-;;; (C3:C0) = (one-extended long) A1:A0   * B3:B0
-;;; Clobbers: __tmp_reg__
-DEFUN __mulohisi3
-    XCALL   __muluhisi3
-    ;; One-extend R27:R26 (A1:A0)
-    sub     C2, B0
-    sbc     C3, B1
-    ret
-ENDF __mulohisi3
-#endif /* L_mulshisi3 */
-
-#if defined (L_muluhisi3)
-;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
-;;; (C3:C0) = (unsigned long) A1:A0   * B3:B0
-;;; Clobbers: __tmp_reg__
-DEFUN __muluhisi3
-    XCALL   __umulhisi3
-    mul     A0, B3
-    add     C3, r0
-    mul     A1, B2
-    add     C3, r0
-    mul     A0, B2
-    add     C2, r0
-    adc     C3, r1
-    clr     __zero_reg__
-    ret
-ENDF __muluhisi3
-#endif /* L_muluhisi3 */
-
-/*******************************************************
-    Multiplication  32 x 32
-*******************************************************/
-
-#if defined (L_mulsi3)
-;;; R25:R22 = R25:R22 * R21:R18
-;;; (C3:C0) = C3:C0   * B3:B0
-;;; Clobbers: R26, R27, __tmp_reg__
-DEFUN __mulsi3
-    movw    A0, C0
-    push    C2
-    push    C3
-    XCALL   __muluhisi3
-    pop     A1
-    pop     A0
-    ;; A1:A0 now contains the high word of A
-    mul     A0, B0
-    add     C2, r0
-    adc     C3, r1
-    mul     A0, B1
-    add     C3, r0
-    mul     A1, B0
-    add     C3, r0
-    clr     __zero_reg__
-    ret
-ENDF __mulsi3
-#endif /* L_mulsi3 */
-
-#undef A0
-#undef A1
-
-#undef B0
-#undef B1
-#undef B2
-#undef B3
-
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-
-#endif /* __AVR_HAVE_MUL__ */
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-	
-
-.section .text.libgcc.div, "ax", @progbits
-
-/*******************************************************
-       Division 8 / 8 => (result + remainder)
-*******************************************************/
-#define	r_rem	r25	/* remainder */
-#define	r_arg1	r24	/* dividend, quotient */
-#define	r_arg2	r22	/* divisor */
-#define	r_cnt	r23	/* loop count */
-
-#if defined (L_udivmodqi4)
-DEFUN __udivmodqi4
-	sub	r_rem,r_rem	; clear remainder and carry
-	ldi	r_cnt,9		; init loop counter
-	rjmp	__udivmodqi4_ep	; jump to entry point
-__udivmodqi4_loop:
-	rol	r_rem		; shift dividend into remainder
-	cp	r_rem,r_arg2	; compare remainder & divisor
-	brcs	__udivmodqi4_ep	; remainder <= divisor
-	sub	r_rem,r_arg2	; restore remainder
-__udivmodqi4_ep:
-	rol	r_arg1		; shift dividend (with CARRY)
-	dec	r_cnt		; decrement loop counter
-	brne	__udivmodqi4_loop
-	com	r_arg1		; complement result 
-				; because C flag was complemented in loop
-	ret
-ENDF __udivmodqi4
-#endif /* defined (L_udivmodqi4) */
-
-#if defined (L_divmodqi4)
-DEFUN __divmodqi4
-        bst     r_arg1,7	; store sign of dividend
-        mov     __tmp_reg__,r_arg1
-        eor     __tmp_reg__,r_arg2; r0.7 is sign of result
-        sbrc	r_arg1,7
-	neg     r_arg1		; dividend negative : negate
-        sbrc	r_arg2,7
-	neg     r_arg2		; divisor negative : negate
-	XCALL	__udivmodqi4	; do the unsigned div/mod
-	brtc	__divmodqi4_1
-	neg	r_rem		; correct remainder sign
-__divmodqi4_1:
-	sbrc	__tmp_reg__,7
-	neg	r_arg1		; correct result sign
-__divmodqi4_exit:
-	ret
-ENDF __divmodqi4
-#endif /* defined (L_divmodqi4) */
-
-#undef r_rem
-#undef r_arg1
-#undef r_arg2
-#undef r_cnt
-	
-		
-/*******************************************************
-       Division 16 / 16 => (result + remainder)
-*******************************************************/
-#define	r_remL	r26	/* remainder Low */
-#define	r_remH	r27	/* remainder High */
-
-/* return: remainder */
-#define	r_arg1L	r24	/* dividend Low */
-#define	r_arg1H	r25	/* dividend High */
-
-/* return: quotient */
-#define	r_arg2L	r22	/* divisor Low */
-#define	r_arg2H	r23	/* divisor High */
-	
-#define	r_cnt	r21	/* loop count */
-
-#if defined (L_udivmodhi4)
-DEFUN __udivmodhi4
-	sub	r_remL,r_remL
-	sub	r_remH,r_remH	; clear remainder and carry
-	ldi	r_cnt,17	; init loop counter
-	rjmp	__udivmodhi4_ep	; jump to entry point
-__udivmodhi4_loop:
-        rol	r_remL		; shift dividend into remainder
-	rol	r_remH
-        cp	r_remL,r_arg2L	; compare remainder & divisor
-	cpc	r_remH,r_arg2H
-        brcs	__udivmodhi4_ep	; remainder < divisor
-        sub	r_remL,r_arg2L	; restore remainder
-        sbc	r_remH,r_arg2H
-__udivmodhi4_ep:
-        rol	r_arg1L		; shift dividend (with CARRY)
-        rol	r_arg1H
-        dec	r_cnt		; decrement loop counter
-        brne	__udivmodhi4_loop
-	com	r_arg1L
-	com	r_arg1H
-; div/mod results to return registers, as for the div() function
-	mov_l	r_arg2L, r_arg1L	; quotient
-	mov_h	r_arg2H, r_arg1H
-	mov_l	r_arg1L, r_remL		; remainder
-	mov_h	r_arg1H, r_remH
-	ret
-ENDF __udivmodhi4
-#endif /* defined (L_udivmodhi4) */
-
-#if defined (L_divmodhi4)
-DEFUN __divmodhi4
-	.global	_div
-_div:
-        bst     r_arg1H,7	; store sign of dividend
-        mov     __tmp_reg__,r_arg1H
-        eor     __tmp_reg__,r_arg2H   ; r0.7 is sign of result
-	rcall	__divmodhi4_neg1 ; dividend negative : negate
-	sbrc	r_arg2H,7
-	rcall	__divmodhi4_neg2 ; divisor negative : negate
-	XCALL	__udivmodhi4	; do the unsigned div/mod
-	rcall	__divmodhi4_neg1 ; correct remainder sign
-	tst	__tmp_reg__
-	brpl	__divmodhi4_exit
-__divmodhi4_neg2:
-	com	r_arg2H
-	neg	r_arg2L		; correct divisor/result sign
-	sbci	r_arg2H,0xff
-__divmodhi4_exit:
-	ret
-__divmodhi4_neg1:
-	brtc	__divmodhi4_exit
-	com	r_arg1H
-	neg	r_arg1L		; correct dividend/remainder sign
-	sbci	r_arg1H,0xff
-	ret
-ENDF __divmodhi4
-#endif /* defined (L_divmodhi4) */
-
-#undef r_remH  
-#undef r_remL  
-             
-#undef r_arg1H 
-#undef r_arg1L 
-             
-#undef r_arg2H 
-#undef r_arg2L 
-             	
-#undef r_cnt   	
-	
-/*******************************************************
-       Division 32 / 32 => (result + remainder)
-*******************************************************/
-#define	r_remHH	r31	/* remainder High */
-#define	r_remHL	r30
-#define	r_remH	r27
-#define	r_remL	r26	/* remainder Low */
-
-/* return: remainder */
-#define	r_arg1HH r25	/* dividend High */
-#define	r_arg1HL r24
-#define	r_arg1H  r23
-#define	r_arg1L  r22	/* dividend Low */
-
-/* return: quotient */
-#define	r_arg2HH r21	/* divisor High */
-#define	r_arg2HL r20
-#define	r_arg2H  r19
-#define	r_arg2L  r18	/* divisor Low */
-	
-#define	r_cnt __zero_reg__  /* loop count (0 after the loop!) */
-
-#if defined (L_udivmodsi4)
-DEFUN __udivmodsi4
-	ldi	r_remL, 33	; init loop counter
-	mov	r_cnt, r_remL
-	sub	r_remL,r_remL
-	sub	r_remH,r_remH	; clear remainder and carry
-	mov_l	r_remHL, r_remL
-	mov_h	r_remHH, r_remH
-	rjmp	__udivmodsi4_ep	; jump to entry point
-__udivmodsi4_loop:
-        rol	r_remL		; shift dividend into remainder
-	rol	r_remH
-	rol	r_remHL
-	rol	r_remHH
-        cp	r_remL,r_arg2L	; compare remainder & divisor
-	cpc	r_remH,r_arg2H
-	cpc	r_remHL,r_arg2HL
-	cpc	r_remHH,r_arg2HH
-	brcs	__udivmodsi4_ep	; remainder <= divisor
-        sub	r_remL,r_arg2L	; restore remainder
-        sbc	r_remH,r_arg2H
-        sbc	r_remHL,r_arg2HL
-        sbc	r_remHH,r_arg2HH
-__udivmodsi4_ep:
-        rol	r_arg1L		; shift dividend (with CARRY)
-        rol	r_arg1H
-        rol	r_arg1HL
-        rol	r_arg1HH
-        dec	r_cnt		; decrement loop counter
-        brne	__udivmodsi4_loop
-				; __zero_reg__ now restored (r_cnt == 0)
-	com	r_arg1L
-	com	r_arg1H
-	com	r_arg1HL
-	com	r_arg1HH
-; div/mod results to return registers, as for the ldiv() function
-	mov_l	r_arg2L,  r_arg1L	; quotient
-	mov_h	r_arg2H,  r_arg1H
-	mov_l	r_arg2HL, r_arg1HL
-	mov_h	r_arg2HH, r_arg1HH
-	mov_l	r_arg1L,  r_remL	; remainder
-	mov_h	r_arg1H,  r_remH
-	mov_l	r_arg1HL, r_remHL
-	mov_h	r_arg1HH, r_remHH
-	ret
-ENDF __udivmodsi4
-#endif /* defined (L_udivmodsi4) */
-
-#if defined (L_divmodsi4)
-DEFUN __divmodsi4
-        bst     r_arg1HH,7	; store sign of dividend
-        mov     __tmp_reg__,r_arg1HH
-        eor     __tmp_reg__,r_arg2HH   ; r0.7 is sign of result
-	rcall	__divmodsi4_neg1 ; dividend negative : negate
-	sbrc	r_arg2HH,7
-	rcall	__divmodsi4_neg2 ; divisor negative : negate
-	XCALL	__udivmodsi4	; do the unsigned div/mod
-	rcall	__divmodsi4_neg1 ; correct remainder sign
-	rol	__tmp_reg__
-	brcc	__divmodsi4_exit
-__divmodsi4_neg2:
-	com	r_arg2HH
-	com	r_arg2HL
-	com	r_arg2H
-	neg	r_arg2L		; correct divisor/quotient sign
-	sbci	r_arg2H,0xff
-	sbci	r_arg2HL,0xff
-	sbci	r_arg2HH,0xff
-__divmodsi4_exit:
-	ret
-__divmodsi4_neg1:
-	brtc	__divmodsi4_exit
-	com	r_arg1HH
-	com	r_arg1HL
-	com	r_arg1H
-	neg	r_arg1L		; correct dividend/remainder sign
-	sbci	r_arg1H, 0xff
-	sbci	r_arg1HL,0xff
-	sbci	r_arg1HH,0xff
-	ret
-ENDF __divmodsi4
-#endif /* defined (L_divmodsi4) */
-
-
-.section .text.libgcc.prologue, "ax", @progbits
-    
-/**********************************
- * This is a prologue subroutine
- **********************************/
-#if defined (L_prologue)
-
-DEFUN __prologue_saves__
-	push r2
-	push r3
-	push r4
-	push r5
-	push r6
-	push r7
-	push r8
-	push r9
-	push r10
-	push r11
-	push r12
-	push r13
-	push r14
-	push r15
-	push r16
-	push r17
-	push r28
-	push r29
-	in	r28,__SP_L__
-	in	r29,__SP_H__
-	sub	r28,r26
-	sbc	r29,r27
-	in	__tmp_reg__,__SREG__
-	cli
-	out	__SP_H__,r29
-	out	__SREG__,__tmp_reg__
-	out	__SP_L__,r28
-#if defined (__AVR_HAVE_EIJMP_EICALL__)
-	eijmp
-#else
-	ijmp
-#endif
-
-ENDF __prologue_saves__
-#endif /* defined (L_prologue) */
-
-/*
- * This is an epilogue subroutine
- */
-#if defined (L_epilogue)
-
-DEFUN __epilogue_restores__
-	ldd	r2,Y+18
-	ldd	r3,Y+17
-	ldd	r4,Y+16
-	ldd	r5,Y+15
-	ldd	r6,Y+14
-	ldd	r7,Y+13
-	ldd	r8,Y+12
-	ldd	r9,Y+11
-	ldd	r10,Y+10
-	ldd	r11,Y+9
-	ldd	r12,Y+8
-	ldd	r13,Y+7
-	ldd	r14,Y+6
-	ldd	r15,Y+5
-	ldd	r16,Y+4
-	ldd	r17,Y+3
-	ldd	r26,Y+2
-	ldd	r27,Y+1
-	add	r28,r30
-	adc	r29,__zero_reg__
-	in	__tmp_reg__,__SREG__
-	cli
-	out	__SP_H__,r29
-	out	__SREG__,__tmp_reg__
-	out	__SP_L__,r28
-	mov_l	r28, r26
-	mov_h	r29, r27
-	ret
-ENDF __epilogue_restores__
-#endif /* defined (L_epilogue) */
-
-#ifdef L_exit
-	.section .fini9,"ax",@progbits
-DEFUN _exit
-	.weak	exit
-exit:
-ENDF _exit
-
-	/* Code from .fini8 ... .fini1 sections inserted by ld script.  */
-
-	.section .fini0,"ax",@progbits
-	cli
-__stop_program:
-	rjmp	__stop_program
-#endif /* defined (L_exit) */
-
-#ifdef L_cleanup
-	.weak	_cleanup
-	.func	_cleanup
-_cleanup:
-	ret
-.endfunc
-#endif /* defined (L_cleanup) */
-
-
-.section .text.libgcc, "ax", @progbits
-    
-#ifdef L_tablejump
-DEFUN __tablejump2__
-	lsl	r30
-	rol	r31
-    ;; FALLTHRU
-ENDF __tablejump2__
-
-DEFUN __tablejump__
-#if defined (__AVR_HAVE_LPMX__)
-	lpm __tmp_reg__, Z+
-	lpm r31, Z
-	mov r30, __tmp_reg__
-#if defined (__AVR_HAVE_EIJMP_EICALL__)
-	eijmp
-#else
-	ijmp
-#endif
-
-#else /* !HAVE_LPMX */
-	lpm
-	adiw r30, 1
-	push r0
-	lpm
-	push r0
-#if defined (__AVR_HAVE_EIJMP_EICALL__)
-	in   __tmp_reg__, __EIND__
-	push __tmp_reg__
-#endif
-	ret
-#endif /* !HAVE_LPMX */
-ENDF __tablejump__
-#endif /* defined (L_tablejump) */
-
-#ifdef L_copy_data
-	.section .init4,"ax",@progbits
-DEFUN __do_copy_data
-#if defined(__AVR_HAVE_ELPMX__)
-	ldi	r17, hi8(__data_end)
-	ldi	r26, lo8(__data_start)
-	ldi	r27, hi8(__data_start)
-	ldi	r30, lo8(__data_load_start)
-	ldi	r31, hi8(__data_load_start)
-	ldi	r16, hh8(__data_load_start)
-	out	__RAMPZ__, r16
-	rjmp	.L__do_copy_data_start
-.L__do_copy_data_loop:
-	elpm	r0, Z+
-	st	X+, r0
-.L__do_copy_data_start:
-	cpi	r26, lo8(__data_end)
-	cpc	r27, r17
-	brne	.L__do_copy_data_loop
-#elif  !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
-	ldi	r17, hi8(__data_end)
-	ldi	r26, lo8(__data_start)
-	ldi	r27, hi8(__data_start)
-	ldi	r30, lo8(__data_load_start)
-	ldi	r31, hi8(__data_load_start)
-	ldi	r16, hh8(__data_load_start - 0x10000)
-.L__do_copy_data_carry:
-	inc	r16
-	out	__RAMPZ__, r16
-	rjmp	.L__do_copy_data_start
-.L__do_copy_data_loop:
-	elpm
-	st	X+, r0
-	adiw	r30, 1
-	brcs	.L__do_copy_data_carry
-.L__do_copy_data_start:
-	cpi	r26, lo8(__data_end)
-	cpc	r27, r17
-	brne	.L__do_copy_data_loop
-#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
-	ldi	r17, hi8(__data_end)
-	ldi	r26, lo8(__data_start)
-	ldi	r27, hi8(__data_start)
-	ldi	r30, lo8(__data_load_start)
-	ldi	r31, hi8(__data_load_start)
-	rjmp	.L__do_copy_data_start
-.L__do_copy_data_loop:
-#if defined (__AVR_HAVE_LPMX__)
-	lpm	r0, Z+
-#else
-	lpm
-	adiw	r30, 1
-#endif
-	st	X+, r0
-.L__do_copy_data_start:
-	cpi	r26, lo8(__data_end)
-	cpc	r27, r17
-	brne	.L__do_copy_data_loop
-#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
-ENDF __do_copy_data
-#endif /* L_copy_data */
-
-/* __do_clear_bss is only necessary if there is anything in .bss section.  */
-
-#ifdef L_clear_bss
-	.section .init4,"ax",@progbits
-DEFUN __do_clear_bss
-	ldi	r17, hi8(__bss_end)
-	ldi	r26, lo8(__bss_start)
-	ldi	r27, hi8(__bss_start)
-	rjmp	.do_clear_bss_start
-.do_clear_bss_loop:
-	st	X+, __zero_reg__
-.do_clear_bss_start:
-	cpi	r26, lo8(__bss_end)
-	cpc	r27, r17
-	brne	.do_clear_bss_loop
-ENDF __do_clear_bss
-#endif /* L_clear_bss */
-
-/* __do_global_ctors and __do_global_dtors are only necessary
-   if there are any constructors/destructors.  */
-
-#ifdef L_ctors
-	.section .init6,"ax",@progbits
-DEFUN __do_global_ctors
-#if defined(__AVR_HAVE_RAMPZ__)
-	ldi	r17, hi8(__ctors_start)
-	ldi	r28, lo8(__ctors_end)
-	ldi	r29, hi8(__ctors_end)
-	ldi	r16, hh8(__ctors_end)
-	rjmp	.L__do_global_ctors_start
-.L__do_global_ctors_loop:
-	sbiw	r28, 2
-	sbc     r16, __zero_reg__
-	mov_h	r31, r29
-	mov_l	r30, r28
-	out     __RAMPZ__, r16
-	XCALL	__tablejump_elpm__
-.L__do_global_ctors_start:
-	cpi	r28, lo8(__ctors_start)
-	cpc	r29, r17
-	ldi	r24, hh8(__ctors_start)
-	cpc	r16, r24
-	brne	.L__do_global_ctors_loop
-#else
-	ldi	r17, hi8(__ctors_start)
-	ldi	r28, lo8(__ctors_end)
-	ldi	r29, hi8(__ctors_end)
-	rjmp	.L__do_global_ctors_start
-.L__do_global_ctors_loop:
-	sbiw	r28, 2
-	mov_h	r31, r29
-	mov_l	r30, r28
-	XCALL	__tablejump__
-.L__do_global_ctors_start:
-	cpi	r28, lo8(__ctors_start)
-	cpc	r29, r17
-	brne	.L__do_global_ctors_loop
-#endif /* defined(__AVR_HAVE_RAMPZ__) */
-ENDF __do_global_ctors
-#endif /* L_ctors */
-
-#ifdef L_dtors
-	.section .fini6,"ax",@progbits
-DEFUN __do_global_dtors
-#if defined(__AVR_HAVE_RAMPZ__)
-	ldi	r17, hi8(__dtors_end)
-	ldi	r28, lo8(__dtors_start)
-	ldi	r29, hi8(__dtors_start)
-	ldi	r16, hh8(__dtors_start)
-	rjmp	.L__do_global_dtors_start
-.L__do_global_dtors_loop:
-	sbiw	r28, 2
-	sbc     r16, __zero_reg__
-	mov_h	r31, r29
-	mov_l	r30, r28
-	out     __RAMPZ__, r16
-	XCALL	__tablejump_elpm__
-.L__do_global_dtors_start:
-	cpi	r28, lo8(__dtors_end)
-	cpc	r29, r17
-	ldi	r24, hh8(__dtors_end)
-	cpc	r16, r24
-	brne	.L__do_global_dtors_loop
-#else
-	ldi	r17, hi8(__dtors_end)
-	ldi	r28, lo8(__dtors_start)
-	ldi	r29, hi8(__dtors_start)
-	rjmp	.L__do_global_dtors_start
-.L__do_global_dtors_loop:
-	mov_h	r31, r29
-	mov_l	r30, r28
-	XCALL	__tablejump__
-	adiw	r28, 2
-.L__do_global_dtors_start:
-	cpi	r28, lo8(__dtors_end)
-	cpc	r29, r17
-	brne	.L__do_global_dtors_loop
-#endif /* defined(__AVR_HAVE_RAMPZ__) */
-ENDF __do_global_dtors
-#endif /* L_dtors */
-
-.section .text.libgcc, "ax", @progbits
-    
-#ifdef L_tablejump_elpm
-DEFUN __tablejump_elpm__
-#if defined (__AVR_HAVE_ELPM__)
-#if defined (__AVR_HAVE_LPMX__)
-	elpm	__tmp_reg__, Z+
-	elpm	r31, Z
-	mov	r30, __tmp_reg__
-#if defined (__AVR_HAVE_EIJMP_EICALL__)
-	eijmp
-#else
-	ijmp
-#endif
-
-#else
-	elpm
-	adiw	r30, 1
-	push	r0
-	elpm
-	push	r0
-#if defined (__AVR_HAVE_EIJMP_EICALL__)
-	in      __tmp_reg__, __EIND__
-	push    __tmp_reg__
-#endif
-	ret
-#endif
-#endif /* defined (__AVR_HAVE_ELPM__) */
-ENDF __tablejump_elpm__
-#endif /* defined (L_tablejump_elpm) */
-
-
-.section .text.libgcc.builtins, "ax", @progbits
-
-/**********************************
- * Find first set Bit (ffs)
- **********************************/
-
-#if defined (L_ffssi2)
-;; find first set bit
-;; r25:r24 = ffs32 (r25:r22)
-;; clobbers: r22, r26
-DEFUN __ffssi2
-    clr  r26
-    tst  r22
-    brne 1f
-    subi r26, -8
-    or   r22, r23
-    brne 1f
-    subi r26, -8
-    or   r22, r24
-    brne 1f
-    subi r26, -8
-    or   r22, r25
-    brne 1f
-    ret
-1:  mov  r24, r22
-    XJMP __loop_ffsqi2
-ENDF __ffssi2
-#endif /* defined (L_ffssi2) */
-
-#if defined (L_ffshi2)
-;; find first set bit
-;; r25:r24 = ffs16 (r25:r24)
-;; clobbers: r26
-DEFUN __ffshi2
-    clr  r26
-#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
-    ;; Some cores have problem skipping 2-word instruction
-    tst  r24
-    breq 2f
-#else
-    cpse r24, __zero_reg__
-#endif /* __AVR_HAVE_JMP_CALL__ */
-1:  XJMP __loop_ffsqi2
-2:  ldi  r26, 8
-    or   r24, r25
-    brne 1b
-    ret
-ENDF __ffshi2
-#endif /* defined (L_ffshi2) */
-
-#if defined (L_loop_ffsqi2)
-;; Helper for ffshi2, ffssi2
-;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
-;; r24 must be != 0
-;; clobbers: r26
-DEFUN __loop_ffsqi2
-    inc  r26
-    lsr  r24
-    brcc __loop_ffsqi2
-    mov  r24, r26
-    clr  r25
-    ret    
-ENDF __loop_ffsqi2
-#endif /* defined (L_loop_ffsqi2) */
-
-
-/**********************************
- * Count trailing Zeros (ctz)
- **********************************/
-
-#if defined (L_ctzsi2)
-;; count trailing zeros
-;; r25:r24 = ctz32 (r25:r22)
-;; clobbers: r26, r22
-;; ctz(0) = 255
-;; Note that ctz(0) in undefined for GCC
-DEFUN __ctzsi2
-    XCALL __ffssi2
-    dec  r24
-    ret
-ENDF __ctzsi2
-#endif /* defined (L_ctzsi2) */
-
-#if defined (L_ctzhi2)
-;; count trailing zeros
-;; r25:r24 = ctz16 (r25:r24)
-;; clobbers: r26
-;; ctz(0) = 255
-;; Note that ctz(0) in undefined for GCC
-DEFUN __ctzhi2
-    XCALL __ffshi2
-    dec  r24
-    ret
-ENDF __ctzhi2
-#endif /* defined (L_ctzhi2) */
-
-
-/**********************************
- * Count leading Zeros (clz)
- **********************************/
-
-#if defined (L_clzdi2)
-;; count leading zeros
-;; r25:r24 = clz64 (r25:r18)
-;; clobbers: r22, r23, r26
-DEFUN __clzdi2
-    XCALL __clzsi2
-    sbrs r24, 5
-    ret
-    mov_l r22, r18
-    mov_h r23, r19
-    mov_l r24, r20
-    mov_h r25, r21
-    XCALL __clzsi2
-    subi r24, -32
-    ret
-ENDF __clzdi2
-#endif /* defined (L_clzdi2) */
-
-#if defined (L_clzsi2)
-;; count leading zeros
-;; r25:r24 = clz32 (r25:r22)
-;; clobbers: r26
-DEFUN __clzsi2
-    XCALL __clzhi2
-    sbrs r24, 4
-    ret
-    mov_l r24, r22
-    mov_h r25, r23
-    XCALL __clzhi2
-    subi r24, -16
-    ret
-ENDF __clzsi2
-#endif /* defined (L_clzsi2) */
-
-#if defined (L_clzhi2)
-;; count leading zeros
-;; r25:r24 = clz16 (r25:r24)
-;; clobbers: r26
-DEFUN __clzhi2
-    clr  r26
-    tst  r25
-    brne 1f
-    subi r26, -8
-    or   r25, r24
-    brne 1f
-    ldi  r24, 16
-    ret
-1:  cpi  r25, 16
-    brsh 3f
-    subi r26, -3
-    swap r25
-2:  inc  r26
-3:  lsl  r25
-    brcc 2b
-    mov  r24, r26
-    clr  r25
-    ret
-ENDF __clzhi2
-#endif /* defined (L_clzhi2) */
-
-
-/**********************************
- * Parity 
- **********************************/
-
-#if defined (L_paritydi2)
-;; r25:r24 = parity64 (r25:r18)
-;; clobbers: __tmp_reg__
-DEFUN __paritydi2
-    eor  r24, r18
-    eor  r24, r19
-    eor  r24, r20
-    eor  r24, r21
-    XJMP __paritysi2
-ENDF __paritydi2
-#endif /* defined (L_paritydi2) */
-
-#if defined (L_paritysi2)
-;; r25:r24 = parity32 (r25:r22)
-;; clobbers: __tmp_reg__
-DEFUN __paritysi2
-    eor  r24, r22
-    eor  r24, r23
-    XJMP __parityhi2
-ENDF __paritysi2
-#endif /* defined (L_paritysi2) */
-
-#if defined (L_parityhi2)
-;; r25:r24 = parity16 (r25:r24)
-;; clobbers: __tmp_reg__
-DEFUN __parityhi2
-    eor  r24, r25
-;; FALLTHRU
-ENDF __parityhi2
-
-;; r25:r24 = parity8 (r24)
-;; clobbers: __tmp_reg__
-DEFUN __parityqi2
-    ;; parity is in r24[0..7]
-    mov  __tmp_reg__, r24
-    swap __tmp_reg__
-    eor  r24, __tmp_reg__
-    ;; parity is in r24[0..3]
-    subi r24, -4
-    andi r24, -5
-    subi r24, -6
-    ;; parity is in r24[0,3]
-    sbrc r24, 3
-    inc  r24
-    ;; parity is in r24[0]
-    andi r24, 1
-    clr  r25
-    ret
-ENDF __parityqi2
-#endif /* defined (L_parityhi2) */
-
-
-/**********************************
- * Population Count
- **********************************/
-
-#if defined (L_popcounthi2)
-;; population count
-;; r25:r24 = popcount16 (r25:r24)
-;; clobbers: __tmp_reg__
-DEFUN __popcounthi2
-    XCALL __popcountqi2
-    push r24
-    mov  r24, r25
-    XCALL __popcountqi2
-    clr  r25
-    ;; FALLTHRU
-ENDF __popcounthi2
-
-DEFUN __popcounthi2_tail
-    pop   __tmp_reg__
-    add   r24, __tmp_reg__
-    ret
-ENDF __popcounthi2_tail
-#endif /* defined (L_popcounthi2) */
-
-#if defined (L_popcountsi2)
-;; population count
-;; r25:r24 = popcount32 (r25:r22)
-;; clobbers: __tmp_reg__
-DEFUN __popcountsi2
-    XCALL __popcounthi2
-    push  r24
-    mov_l r24, r22
-    mov_h r25, r23
-    XCALL __popcounthi2
-    XJMP  __popcounthi2_tail
-ENDF __popcountsi2
-#endif /* defined (L_popcountsi2) */
-
-#if defined (L_popcountdi2)
-;; population count
-;; r25:r24 = popcount64 (r25:r18)
-;; clobbers: r22, r23, __tmp_reg__
-DEFUN __popcountdi2
-    XCALL __popcountsi2
-    push  r24
-    mov_l r22, r18
-    mov_h r23, r19
-    mov_l r24, r20
-    mov_h r25, r21
-    XCALL __popcountsi2
-    XJMP  __popcounthi2_tail
-ENDF __popcountdi2
-#endif /* defined (L_popcountdi2) */
-
-#if defined (L_popcountqi2)
-;; population count
-;; r24 = popcount8 (r24)
-;; clobbers: __tmp_reg__
-DEFUN __popcountqi2
-    mov  __tmp_reg__, r24
-    andi r24, 1
-    lsr  __tmp_reg__    
-    lsr  __tmp_reg__    
-    adc  r24, __zero_reg__
-    lsr  __tmp_reg__    
-    adc  r24, __zero_reg__
-    lsr  __tmp_reg__    
-    adc  r24, __zero_reg__
-    lsr  __tmp_reg__    
-    adc  r24, __zero_reg__
-    lsr  __tmp_reg__    
-    adc  r24, __zero_reg__
-    lsr  __tmp_reg__    
-    adc  r24, __tmp_reg__    
-    ret    
-ENDF __popcountqi2
-#endif /* defined (L_popcountqi2) */
-
-
-/**********************************
- * Swap bytes
- **********************************/
-
-;; swap two registers with different register number
-.macro bswap a, b
-    eor \a, \b
-    eor \b, \a
-    eor \a, \b
-.endm
-
-#if defined (L_bswapsi2)
-;; swap bytes
-;; r25:r22 = bswap32 (r25:r22)
-DEFUN __bswapsi2
-    bswap r22, r25
-    bswap r23, r24
-    ret
-ENDF __bswapsi2
-#endif /* defined (L_bswapsi2) */
-
-#if defined (L_bswapdi2)
-;; swap bytes
-;; r25:r18 = bswap64 (r25:r18)
-DEFUN __bswapdi2
-    bswap r18, r25
-    bswap r19, r24
-    bswap r20, r23
-    bswap r21, r22
-    ret
-ENDF __bswapdi2
-#endif /* defined (L_bswapdi2) */
-
-
-/**********************************
- * 64-bit shifts
- **********************************/
-
-#if defined (L_ashrdi3)
-;; Arithmetic shift right
-;; r25:r18 = ashr64 (r25:r18, r17:r16)
-DEFUN __ashrdi3
-    push r16
-    andi r16, 63
-    breq 2f
-1:  asr  r25
-    ror  r24
-    ror  r23
-    ror  r22
-    ror  r21
-    ror  r20
-    ror  r19
-    ror  r18
-    dec  r16
-    brne 1b
-2:  pop  r16
-    ret
-ENDF __ashrdi3
-#endif /* defined (L_ashrdi3) */
-
-#if defined (L_lshrdi3)
-;; Logic shift right
-;; r25:r18 = lshr64 (r25:r18, r17:r16)
-DEFUN __lshrdi3
-    push r16
-    andi r16, 63
-    breq 2f
-1:  lsr  r25
-    ror  r24
-    ror  r23
-    ror  r22
-    ror  r21
-    ror  r20
-    ror  r19
-    ror  r18
-    dec  r16
-    brne 1b
-2:  pop  r16
-    ret
-ENDF __lshrdi3
-#endif /* defined (L_lshrdi3) */
-
-#if defined (L_ashldi3)
-;; Shift left
-;; r25:r18 = ashl64 (r25:r18, r17:r16)
-DEFUN __ashldi3
-    push r16
-    andi r16, 63
-    breq 2f
-1:  lsl  r18
-    rol  r19
-    rol  r20
-    rol  r21
-    rol  r22
-    rol  r23
-    rol  r24
-    rol  r25
-    dec  r16
-    brne 1b
-2:  pop  r16
-    ret
-ENDF __ashldi3
-#endif /* defined (L_ashldi3) */
-
-
-.section .text.libgcc.fmul, "ax", @progbits
-
-/***********************************************************/    
-;;; Softmul versions of FMUL, FMULS and FMULSU to implement
-;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
-/***********************************************************/    
-
-#define A1 24
-#define B1 25
-#define C0 22
-#define C1 23
-#define A0 __tmp_reg__
-
-#ifdef L_fmuls
-;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
-;;; Clobbers: r24, r25, __tmp_reg__
-DEFUN __fmuls
-    ;; A0.7 = negate result?
-    mov  A0, A1
-    eor  A0, B1
-    ;; B1 = |B1|
-    sbrc B1, 7
-    neg  B1
-    XJMP __fmulsu_exit
-ENDF __fmuls
-#endif /* L_fmuls */
-
-#ifdef L_fmulsu
-;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
-;;; Clobbers: r24, r25, __tmp_reg__
-DEFUN __fmulsu
-    ;; A0.7 = negate result?
-    mov  A0, A1
-;; FALLTHRU
-ENDF __fmulsu
-
-;; Helper for __fmuls and __fmulsu
-DEFUN __fmulsu_exit
-    ;; A1 = |A1|
-    sbrc A1, 7
-    neg  A1
-#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
-    ;; Some cores have problem skipping 2-word instruction
-    tst  A0
-    brmi 1f
-#else
-    sbrs A0, 7
-#endif /* __AVR_HAVE_JMP_CALL__ */
-    XJMP  __fmul
-1:  XCALL __fmul
-    ;; C = -C iff A0.7 = 1
-    com  C1
-    neg  C0
-    sbci C1, -1
-    ret
-ENDF __fmulsu_exit
-#endif /* L_fmulsu */
-
-
-#ifdef L_fmul
-;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
-;;; Clobbers: r24, r25, __tmp_reg__
-DEFUN __fmul
-    ; clear result
-    clr   C0
-    clr   C1
-    clr   A0
-1:  tst   B1
-    ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
-2:  brpl  3f
-    ;; C += A
-    add   C0, A0
-    adc   C1, A1
-3:  ;; A >>= 1
-    lsr   A1
-    ror   A0
-    ;; B <<= 1
-    lsl   B1
-    brne  2b
-    ret
-ENDF __fmul
-#endif /* L_fmul */
-
-#undef A0
-#undef A1
-#undef B1
-#undef C0
-#undef C1
diff --git a/gcc/config/avr/t-avr b/gcc/config/avr/t-avr
index 30e8d96447e..3f37e591f8e 100644
--- a/gcc/config/avr/t-avr
+++ b/gcc/config/avr/t-avr
@@ -39,54 +39,6 @@ $(srcdir)/config/avr/avr-tables.opt: $(srcdir)/config/avr/genopt.sh \
 	$(SHELL) $(srcdir)/config/avr/genopt.sh $(srcdir)/config/avr > \
 		$(srcdir)/config/avr/avr-tables.opt
 
-LIB1ASMSRC = avr/libgcc.S
-LIB1ASMFUNCS = \
-	_mulqi3 \
-	_mulhi3 \
-	_mulhisi3 \
-	_umulhisi3 \
-	_usmulhisi3 \
-	_muluhisi3 \
-	_mulshisi3 \
-	_mulsi3 \
-	_udivmodqi4 \
-	_divmodqi4 \
-	_udivmodhi4 \
-	_divmodhi4 \
-	_udivmodsi4 \
-	_divmodsi4 \
-	_prologue \
-	_epilogue \
-	_exit \
-	_cleanup \
-	_tablejump \
-	_tablejump_elpm \
-	_copy_data \
-	_clear_bss \
-	_ctors \
-	_dtors \
-	_ffssi2 \
-	_ffshi2 \
-	_loop_ffsqi2 \
-	_ctzsi2 \
-	_ctzhi2 \
-	_clzdi2 \
-	_clzsi2 \
-	_clzhi2 \
-	_paritydi2 \
-	_paritysi2 \
-	_parityhi2 \
-	_popcounthi2 \
-	_popcountsi2 \
-	_popcountdi2 \
-	_popcountqi2 \
-	_bswapsi2 \
-	_bswapdi2 \
-	_ashldi3 \
-	_ashrdi3 \
-	_lshrdi3 \
-	_fmul _fmuls _fmulsu
-
 LIB2FUNCS_EXCLUDE = \
 	_clz
 
diff --git a/gcc/config/bfin/lib1funcs.asm b/gcc/config/bfin/lib1funcs.asm
deleted file mode 100644
index c7bf4f3f05c..00000000000
--- a/gcc/config/bfin/lib1funcs.asm
+++ /dev/null
@@ -1,211 +0,0 @@
-/* libgcc functions for Blackfin.
-   Copyright (C) 2005, 2009 Free Software Foundation, Inc.
-   Contributed by Analog Devices.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-#ifdef L_divsi3
-.text
-.align 2
-.global ___divsi3;
-.type ___divsi3, STT_FUNC;
-
-___divsi3:
-        [--SP]= RETS;
-	[--SP] = R7;
-
-	R2 = -R0;
-        CC = R0 < 0;
-	IF CC R0 = R2;
-	R7 = CC;
-
-	R2 = -R1;
-        CC = R1 < 0;
-	IF CC R1 = R2;
-	R2 = CC;
-	R7 = R7 ^ R2;
-
-        CALL ___udivsi3;
-
-	CC = R7;
-	R1 = -R0;
-	IF CC R0 = R1;
-
-	R7 = [SP++];
-        RETS = [SP++];
-        RTS;
-#endif
-
-#ifdef L_modsi3	
-.align 2
-.global ___modsi3;
-.type ___modsi3, STT_FUNC;
-
-___modsi3:
-	[--SP] = RETS;
-	[--SP] = R0;
-	[--SP] = R1;
-	CALL ___divsi3;
-	R2 = [SP++];
-	R1 = [SP++];
-	R2 *= R0;
-	R0 = R1 - R2;
-	RETS = [SP++];
-	RTS; 
-#endif
-
-#ifdef L_udivsi3
-.align 2
-.global ___udivsi3;
-.type ___udivsi3, STT_FUNC;
-
-___udivsi3:
-        P0 = 32;
-        LSETUP (0f, 1f) LC0 = P0;
-	/* upper half of dividend */
-        R3 = 0;
-0:
-	/* The first time round in the loop we shift in garbage, but since we
-	   perform 33 shifts, it doesn't matter.  */
-	R0 = ROT R0 BY 1;
-	R3 = ROT R3 BY 1;
-	R2 = R3 - R1;
-        CC = R3 < R1 (IU);
-1:
-	/* Last instruction of the loop.  */
-	IF ! CC R3 = R2;
-
-	/* Shift in the last bit.  */
-	R0 = ROT R0 BY 1;
-	/* R0 is the result, R3 contains the remainder.  */
-	R0 = ~ R0;
-        RTS;
-#endif
-
-#ifdef L_umodsi3
-.align 2
-.global ___umodsi3;
-.type ___umodsi3, STT_FUNC;
-
-___umodsi3:
-	[--SP] = RETS;
-	CALL ___udivsi3;
-	R0 = R3;
-	RETS = [SP++]; 
-	RTS;
-#endif
-
-#ifdef L_umulsi3_highpart
-.align 2
-.global ___umulsi3_highpart;
-.type ___umulsi3_highpart, STT_FUNC;
-
-___umulsi3_highpart:
-	A1 = R1.L * R0.L (FU);
-	A1 = A1 >> 16;
-	A0 = R1.H * R0.H, A1 += R1.L * R0.H (FU);
-	A1 += R0.L * R1.H (FU);
-	A1 = A1 >> 16;
-	A0 += A1;
-	R0 = A0 (FU);
-	RTS;
-#endif
-
-#ifdef L_smulsi3_highpart
-.align 2
-.global ___smulsi3_highpart;
-.type ___smulsi3_highpart, STT_FUNC;
-
-___smulsi3_highpart:
-	A1 = R1.L * R0.L (FU);
-	A1 = A1 >> 16;
-	A0 = R0.H * R1.H, A1 += R0.H * R1.L (IS,M);
-	A1 += R1.H * R0.L (IS,M);
-	A1 = A1 >>> 16;
-	R0 = (A0 += A1);
-	RTS;
-#endif
-
-#ifdef L_muldi3
-.align 2
-.global ___muldi3;
-.type ___muldi3, STT_FUNC;
-
-/*
-	   R1:R0 * R3:R2
-	 = R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l
-[X]	 = (R1.h * R3.h) * 2^96
-[X]	   + (R1.h * R3.l + R1.l * R3.h) * 2^80
-[X]	   + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^64
-[T1]	   + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^48
-[T2]	   + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^32
-[T3]	   + (R0.l * R2.h + R2.l * R0.h) * 2^16
-[T4]	   + (R0.l * R2.l)
-
-	We can discard the first three lines marked "X" since we produce
-	only a 64 bit result.  So, we need ten 16-bit multiplies.
-
-	Individual mul-acc results:
-[E1]	 =  R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h
-[E2]	 =  R1.l * R2.l + R3.l * R0.l + R0.h * R2.h
-[E3]	 =  R0.l * R2.h + R2.l * R0.h
-[E4]	 =  R0.l * R2.l
-
-	We also need to add high parts from lower-level results to higher ones:
-	E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E4
-
-	One interesting property is that all parts of the result that depend
-	on the sign of the multiplication are discarded.  Those would be the
-	multiplications involving R1.h and R3.h, but only the top 16 bit of
-	the 32 bit result depend on the sign, and since R1.h and R3.h only
-	occur in E1, the top half of these results is cut off.
-	So, we can just use FU mode for all of the 16-bit multiplies, and
-	ignore questions of when to use mixed mode.  */
-
-___muldi3:
-	/* [SP] technically is part of the caller's frame, but we can
-	   use it as scratch space.  */
-	A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12];	/* E1 */
-	A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4;		/* E1 */
-	A0 += A1;							/* E1 */
-	R4 = A0.w;
-	A0 = R0.l * R3.l (FU);						/* E2 */
-	A0 += R2.l * R1.l (FU);						/* E2 */
-
-	A1 = R2.L * R0.L (FU);						/* E4 */
-	R3 = A1.w;
-	A1 = A1 >> 16;							/* E3c */
-	A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU);			/* E2, E3c */
-	A1 += R0.L * R2.H (FU);						/* E3c */
-	R0 = A1.w;
-	A1 = A1 >> 16;							/* E2c */
-	A0 += A1;							/* E2c */
-	R1 = A0.w;
-
-	/* low(result) = low(E3c):low(E4) */
-	R0 = PACK (R0.l, R3.l);
-	/* high(result) = E2c + (E1 << 16) */
-	R1.h = R1.h + R4.l (NS) || R4 = [SP];
-	RTS;
-
-.size ___muldi3, .-___muldi3
-#endif
diff --git a/gcc/config/bfin/t-bfin b/gcc/config/bfin/t-bfin
deleted file mode 100644
index bb95ab4139e..00000000000
--- a/gcc/config/bfin/t-bfin
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (C) 2005, 2007, 2011 Free Software Foundation, Inc.
-#
-# This file is part of GCC.
-#
-# GCC is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# GCC is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GCC; see the file COPYING3.  If not see
-# <http://www.gnu.org/licenses/>.
-
-## Target part of the Makefile
-
-LIB1ASMSRC = bfin/lib1funcs.asm
-LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart
-LIB1ASMFUNCS += _smulsi3_highpart
diff --git a/gcc/config/bfin/t-bfin-elf b/gcc/config/bfin/t-bfin-elf
index fcf76c4ddfe..5cbcfeeb87f 100644
--- a/gcc/config/bfin/t-bfin-elf
+++ b/gcc/config/bfin/t-bfin-elf
@@ -18,10 +18,6 @@
 
 ## Target part of the Makefile
 
-LIB1ASMSRC = bfin/lib1funcs.asm
-LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart
-LIB1ASMFUNCS += _smulsi3_highpart
-
 TARGET_LIBGCC2_CFLAGS = -fpic
 
 MULTILIB_OPTIONS=mcpu=bf532-none
diff --git a/gcc/config/bfin/t-bfin-linux b/gcc/config/bfin/t-bfin-linux
index a83f9f2da83..9a1d6a09437 100644
--- a/gcc/config/bfin/t-bfin-linux
+++ b/gcc/config/bfin/t-bfin-linux
@@ -18,10 +18,6 @@
 
 ## Target part of the Makefile
 
-LIB1ASMSRC = bfin/lib1funcs.asm
-LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart
-LIB1ASMFUNCS += _smulsi3_highpart
-
 TARGET_LIBGCC2_CFLAGS = -fpic
 
 MULTILIB_OPTIONS=mcpu=bf532-none
diff --git a/gcc/config/bfin/t-bfin-uclinux b/gcc/config/bfin/t-bfin-uclinux
index 1be0796987b..b9fca803e0a 100644
--- a/gcc/config/bfin/t-bfin-uclinux
+++ b/gcc/config/bfin/t-bfin-uclinux
@@ -18,10 +18,6 @@
 
 ## Target part of the Makefile
 
-LIB1ASMSRC = bfin/lib1funcs.asm
-LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart
-LIB1ASMFUNCS += _smulsi3_highpart
-
 TARGET_LIBGCC2_CFLAGS = -fpic
 
 MULTILIB_OPTIONS=mcpu=bf532-none
diff --git a/gcc/config/c6x/lib1funcs.asm b/gcc/config/c6x/lib1funcs.asm
deleted file mode 100644
index 5bf34474bbd..00000000000
--- a/gcc/config/c6x/lib1funcs.asm
+++ /dev/null
@@ -1,438 +0,0 @@
-/* Copyright 2010, 2011  Free Software Foundation, Inc.
-   Contributed by Bernd Schmidt <bernds@codesourcery.com>.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-	;; ABI considerations for the divide functions
-	;; The following registers are call-used:
-	;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
-	;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
-	;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
-	;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
-	;;
-	;; In our implementation, divu and remu are leaf functions,
-	;; while both divi and remi call into divu.
-	;; A0 is not clobbered by any of the functions.
-	;; divu does not clobber B2 either, which is taken advantage of
-	;; in remi.
-	;; divi uses B5 to hold the original return address during
-	;; the call to divu.
-	;; remi uses B2 and A5 to hold the input values during the
-	;; call to divu.  It stores B3 in on the stack.
-
-#ifdef L_divsi3
-.text
-.align 2
-.global __c6xabi_divi
-.hidden __c6xabi_divi
-.type __c6xabi_divi, STT_FUNC
-
-__c6xabi_divi:
-	call .s2	__c6xabi_divu
-||	mv .d2		B3, B5
-||	cmpgt .l1	0, A4, A1
-||	cmpgt .l2	0, B4, B1
-
-	[A1] neg .l1	A4, A4
-||	[B1] neg .l2	B4, B4
-||	xor .s1x	A1, B1, A1
-
-#ifdef _TMS320C6400
-	[A1] addkpc .s2	1f, B3, 4
-#else
-	[A1] mvkl .s2	1f, B3
-	[A1] mvkh .s2	1f, B3
-	nop		2
-#endif
-1:
-	neg .l1		A4, A4
-||	mv .l2		B3,B5
-||	ret .s2		B5
-	nop		5
-#endif
-
-#if defined L_modsi3 || defined L_divmodsi4
-.align 2
-#ifdef L_modsi3
-#define MOD_OUTPUT_REG A4
-.global __c6xabi_remi
-.hidden __c6xabi_remi
-.type __c6xabi_remi, STT_FUNC
-#else
-#define MOD_OUTPUT_REG A5
-.global __c6xabi_divremi
-.hidden __c6xabi_divremi
-.type __c6xabi_divremi, STT_FUNC
-__c6xabi_divremi:
-#endif
-
-__c6xabi_remi:
-	stw .d2t2	B3, *B15--[2]
-||	cmpgt .l1	0, A4, A1
-||	cmpgt .l2	0, B4, B2
-||	mv .s1		A4, A5
-||	call .s2	__c6xabi_divu
-
-	[A1] neg .l1	A4, A4
-||	[B2] neg .l2	B4, B4
-||	xor .s2x	B2, A1, B0
-||	mv .d2		B4, B2
-
-#ifdef _TMS320C6400
-	[B0] addkpc .s2	1f, B3, 1
-	[!B0] addkpc .s2 2f, B3, 1
-	nop		2
-#else
-	[B0] mvkl .s2	1f,B3
-	[!B0] mvkl .s2	2f,B3
-
-	[B0] mvkh .s2	1f,B3
-	[!B0] mvkh .s2	2f,B3
-#endif
-1:
-	neg .l1		A4, A4
-2:
-	ldw .d2t2	*++B15[2], B3
-
-#ifdef _TMS320C6400_PLUS
-	mpy32 .m1x	A4, B2, A6
-	nop		3
-	ret .s2		B3
-	sub .l1		A5, A6, MOD_OUTPUT_REG
-	nop		4
-#else
-	mpyu .m1x	A4, B2, A1
-	nop		1
-	mpylhu .m1x	A4, B2, A6
-||	mpylhu .m2x	B2, A4, B2
-	nop		1
-	add .l1x	A6, B2, A6
-||	ret .s2		B3
-	shl .s1		A6, 16, A6
-	add .d1		A6, A1, A6
-	sub .l1		A5, A6, MOD_OUTPUT_REG
-	nop		2
-#endif
-
-#endif
-
-#if defined L_udivsi3 || defined L_udivmodsi4
-.align 2
-#ifdef L_udivsi3
-.global __c6xabi_divu
-.hidden __c6xabi_divu
-.type __c6xabi_divu, STT_FUNC
-__c6xabi_divu:
-#else
-.global __c6xabi_divremu
-.hidden __c6xabi_divremu
-.type __c6xabi_divremu, STT_FUNC
-__c6xabi_divremu:
-#endif
-	;; We use a series of up to 31 subc instructions.  First, we find
-	;; out how many leading zero bits there are in the divisor.  This
-	;; gives us both a shift count for aligning (shifting) the divisor
-	;; to the, and the number of times we have to execute subc.
-
-	;; At the end, we have both the remainder and most of the quotient
-	;; in A4.  The top bit of the quotient is computed first and is
-	;; placed in A2.
-
-	;; Return immediately if the dividend is zero.  Setting B4 to 1
-	;; is a trick to allow us to leave the following insns in the jump
-	;; delay slot without affecting the result.
-	mv	.s2x	A4, B1
-
-#ifndef _TMS320C6400
-[!b1]	mvk	.s2	1, B4
-#endif
-[b1]	lmbd	.l2	1, B4, B1
-||[!b1] b	.s2	B3	; RETURN A
-#ifdef _TMS320C6400
-||[!b1] mvk	.d2	1, B4
-#endif
-#ifdef L_udivmodsi4
-||[!b1] zero	.s1	A5
-#endif
-	mv	.l1x	B1, A6
-||	shl	.s2	B4, B1, B4
-
-	;; The loop performs a maximum of 28 steps, so we do the
-	;; first 3 here.
-	cmpltu	.l1x	A4, B4, A2
-[!A2]	sub	.l1x	A4, B4, A4
-||	shru	.s2	B4, 1, B4
-||	xor	.s1	1, A2, A2
-
-	shl	.s1	A2, 31, A2
-|| [b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-[b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-
-	;; RETURN A may happen here (note: must happen before the next branch)
-0:
-	cmpgt	.l2	B1, 7, B0
-|| [b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-[b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-|| [b0] b	.s1	0b
-[b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-[b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-[b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-[b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-[b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-	;; loop backwards branch happens here
-
-	ret	.s2	B3
-||	mvk	.s1	32, A1
-	sub	.l1	A1, A6, A6
-#ifdef L_udivmodsi4
-||	extu	.s1	A4, A6, A5
-#endif
-	shl	.s1	A4, A6, A4
-	shru	.s1	A4, 1, A4
-||	sub	.l1	A6, 1, A6
-	or	.l1	A2, A4, A4
-	shru	.s1	A4, A6, A4
-	nop
-
-#endif
-
-#ifdef L_umodsi3
-.align 2
-.global __c6xabi_remu
-.hidden __c6xabi_remu
-.type __c6xabi_remu, STT_FUNC
-__c6xabi_remu:
-	;; The ABI seems designed to prevent these functions calling each other,
-	;; so we duplicate most of the divsi3 code here.
-	mv	.s2x	A4, B1
-#ifndef _TMS320C6400
-[!b1]	mvk	.s2	1, B4
-#endif
-	lmbd	.l2	1, B4, B1
-||[!b1] b	.s2	B3	; RETURN A
-#ifdef _TMS320C6400
-||[!b1] mvk	.d2	1, B4
-#endif
-
-	mv	.l1x	B1, A7
-||	shl	.s2	B4, B1, B4
-
-	cmpltu	.l1x	A4, B4, A1
-[!a1]	sub	.l1x	A4, B4, A4
-	shru	.s2	B4, 1, B4
-
-0:
-	cmpgt	.l2	B1, 7, B0
-|| [b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-	;; RETURN A may happen here (note: must happen before the next branch)
-[b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-|| [b0] b	.s1	0b
-[b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-[b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-[b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-[b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-[b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-	;; loop backwards branch happens here
-
-	ret	.s2	B3
-[b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-[b1]	subc	.l1x	A4,B4,A4
-
-	extu	.s1	A4, A7, A4
-	nop	2
-#endif
-
-#if defined L_strasgi_64plus && defined _TMS320C6400_PLUS
-
-.align 2
-.global __c6xabi_strasgi_64plus
-.hidden __c6xabi_strasgi_64plus
-.type __c6xabi_strasgi_64plus, STT_FUNC
-__c6xabi_strasgi_64plus:
-	shru	.s2x	a6, 2, b31
-||	mv	.s1	a4, a30
-||	mv	.d2	b4, b30
-
-	add	.s2	-4, b31, b31
-
-	sploopd		1
-||	mvc	.s2	b31, ilc
-	ldw	.d2t2	*b30++, b31
-	nop	4
-	mv	.s1x	b31,a31
-	spkernel	6, 0
-||	stw	.d1t1	a31, *a30++
-
-	ret	.s2	b3
-	nop 5
-#endif
-
-#ifdef L_strasgi
-.global __c6xabi_strasgi
-.type __c6xabi_strasgi, STT_FUNC
-__c6xabi_strasgi:
-	;; This is essentially memcpy, with alignment known to be at least
-	;; 4, and the size a multiple of 4 greater than or equal to 28.
-	ldw	.d2t1	*B4++, A0
-||	mvk	.s2	16, B1
-	ldw	.d2t1	*B4++, A1
-||	mvk	.s2	20, B2
-||	sub	.d1	A6, 24, A6
-	ldw	.d2t1	*B4++, A5
-	ldw	.d2t1	*B4++, A7
-||	mv	.l2x	A6, B7
-	ldw	.d2t1	*B4++, A8
-	ldw	.d2t1	*B4++, A9
-||	mv	.s2x	A0, B5
-||	cmpltu	.l2	B2, B7, B0
-
-0:
-	stw	.d1t2	B5, *A4++
-||[b0]	ldw	.d2t1	*B4++, A0
-||	mv	.s2x	A1, B5
-||	mv	.l2	B7, B6
-
-[b0]	sub	.d2	B6, 24, B7
-||[b0]	b	.s2	0b
-||	cmpltu	.l2	B1, B6, B0
-
-[b0]	ldw	.d2t1	*B4++, A1
-||	stw	.d1t2	B5, *A4++
-||	mv	.s2x	A5, B5
-||	cmpltu	.l2	12, B6, B0
-
-[b0]	ldw	.d2t1	*B4++, A5
-||	stw	.d1t2	B5, *A4++
-||	mv	.s2x	A7, B5
-||	cmpltu	.l2	8, B6, B0
-
-[b0]	ldw	.d2t1	*B4++, A7
-||	stw	.d1t2	B5, *A4++
-||	mv	.s2x	A8, B5
-||	cmpltu	.l2	4, B6, B0
-
-[b0]	ldw	.d2t1	*B4++, A8
-||	stw	.d1t2	B5, *A4++
-||	mv	.s2x	A9, B5
-||	cmpltu	.l2	0, B6, B0
-
-[b0]	ldw	.d2t1	*B4++, A9
-||	stw	.d1t2	B5, *A4++
-||	mv	.s2x	A0, B5
-||	cmpltu	.l2	B2, B7, B0
-
-	;; loop back branch happens here
-
-	cmpltu	.l2	B1, B6, B0
-||	ret	.s2	b3
-
-[b0]	stw	.d1t1	A1, *A4++
-||	cmpltu	.l2	12, B6, B0
-[b0]	stw	.d1t1	A5, *A4++
-||	cmpltu	.l2	8, B6, B0
-[b0]	stw	.d1t1	A7, *A4++
-||	cmpltu	.l2	4, B6, B0
-[b0]	stw	.d1t1	A8, *A4++
-||	cmpltu	.l2	0, B6, B0
-[b0]	stw	.d1t1	A9, *A4++
-
-	;; return happens here
-
-#endif
-
-#ifdef _TMS320C6400_PLUS
-#ifdef L_push_rts
-.align 2
-.global __c6xabi_push_rts
-.hidden __c6xabi_push_rts
-.type __c6xabi_push_rts, STT_FUNC
-__c6xabi_push_rts:
-	stw .d2t2	B14, *B15--[2]
-	stdw .d2t1	A15:A14, *B15--
-||	b .s2x		A3
-	stdw .d2t2	B13:B12, *B15--
-	stdw .d2t1	A13:A12, *B15--
-	stdw .d2t2	B11:B10, *B15--
-	stdw .d2t1	A11:A10, *B15--
-	stdw .d2t2	B3:B2, *B15--
-#endif
-
-#ifdef L_pop_rts
-.align 2
-.global __c6xabi_pop_rts
-.hidden __c6xabi_pop_rts
-.type __c6xabi_pop_rts, STT_FUNC
-__c6xabi_pop_rts:
-	lddw .d2t2	*++B15, B3:B2
-	lddw .d2t1	*++B15, A11:A10
-	lddw .d2t2	*++B15, B11:B10
-	lddw .d2t1	*++B15, A13:A12
-	lddw .d2t2	*++B15, B13:B12
-	lddw .d2t1	*++B15, A15:A14
-||	b .s2		B3
-	ldw .d2t2	*++B15[2], B14
-	nop		4
-#endif
-
-#ifdef L_call_stub
-.align 2
-.global __c6xabi_call_stub
-.type __c6xabi_call_stub, STT_FUNC
-__c6xabi_call_stub:
-	stw .d2t1	A2, *B15--[2]
-	stdw .d2t1	A7:A6, *B15--
-||	call .s2	B31
-	stdw .d2t1	A1:A0, *B15--
-	stdw .d2t2	B7:B6, *B15--
-	stdw .d2t2	B5:B4, *B15--
-	stdw .d2t2	B1:B0, *B15--
-	stdw .d2t2	B3:B2, *B15--
-||	addkpc .s2	1f, B3, 0
-1:
-	lddw .d2t2	*++B15, B3:B2
-	lddw .d2t2	*++B15, B1:B0
-	lddw .d2t2	*++B15, B5:B4
-	lddw .d2t2	*++B15, B7:B6
-	lddw .d2t1	*++B15, A1:A0
-	lddw .d2t1	*++B15, A7:A6
-||	b .s2		B3
-	ldw .d2t1	*++B15[2], A2
-	nop		4
-#endif
-
-#endif
-
diff --git a/gcc/config/c6x/t-c6x-elf b/gcc/config/c6x/t-c6x-elf
index b3b4b850fe8..6bc2832026d 100644
--- a/gcc/config/c6x/t-c6x-elf
+++ b/gcc/config/c6x/t-c6x-elf
@@ -18,11 +18,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMSRC = c6x/lib1funcs.asm
-LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _udivmodsi4 _divmodsi4
-LIB1ASMFUNCS += _strasgi _strasgi_64plus _clzsi2 _clzdi2 _clz
-LIB1ASMFUNCS += _push_rts _pop_rts _call_stub
-
 LIB2FUNCS_EXCLUDE = _cmpdi2 _ucmpdi2 _gcc_bcmp _eprintf _clzsi _clzdi
 EXTRA_HEADERS += $(srcdir)/ginclude/unwind-arm-common.h
 
diff --git a/gcc/config/fr30/lib1funcs.asm b/gcc/config/fr30/lib1funcs.asm
deleted file mode 100644
index 7c63453123a..00000000000
--- a/gcc/config/fr30/lib1funcs.asm
+++ /dev/null
@@ -1,115 +0,0 @@
-/* libgcc routines for the FR30.
-   Copyright (C) 1998, 1999, 2009 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-	.macro FUNC_START name
-	.text
-	.globl __\name
-	.type  __\name, @function
-__\name:
-	.endm
-
-	.macro FUNC_END name
-	.size  __\name, . - __\name
-	.endm
-
-	.macro DIV_BODY reg number
-	.if \number
-	DIV_BODY  \reg, "\number - 1"
-	div1	\reg
-	.endif
-	.endm
-	
-#ifdef L_udivsi3
-FUNC_START udivsi3
-	;; Perform an unsiged division of r4 / r5 and place the result in r4.
-	;; Does not handle overflow yet...
-	mov	r4, mdl
-	div0u	r5
-	DIV_BODY r5 32
-	mov	mdl, r4
-	ret
-FUNC_END udivsi3
-#endif /* L_udivsi3 */
-
-#ifdef L_divsi3
-FUNC_START divsi3
-	;; Perform a siged division of r4 / r5 and place the result in r4.
-	;; Does not handle overflow yet...
-	mov	r4, mdl
-	div0s	r5
-	DIV_BODY r5 32
-	div2    r5
-	div3
-	div4s
-	mov	mdl, r4
-	ret
-FUNC_END divsi3
-#endif /* L_divsi3 */
-
-#ifdef L_umodsi3
-FUNC_START umodsi3
-	;; Perform an unsiged division of r4 / r5 and places the remainder in r4.
-	;; Does not handle overflow yet...
-	mov	r4, mdl
-	div0u	r5
-	DIV_BODY r5 32
-	mov	mdh, r4
-	ret
-FUNC_END umodsi3
-#endif /* L_umodsi3 */
-
-#ifdef L_modsi3
-FUNC_START modsi3
-	;; Perform a siged division of r4 / r5 and place the remainder in r4.
-	;; Does not handle overflow yet...
-	mov	r4, mdl
-	div0s	r5
-	DIV_BODY r5 32
-	div2    r5
-	div3
-	div4s
-	mov	mdh, r4
-	ret
-FUNC_END modsi3
-#endif /* L_modsi3 */
-
-#ifdef L_negsi2
-FUNC_START negsi2
-	ldi:8	#0, r0
-	sub	r4, r0
-	mov	r0, r4
-	ret
-FUNC_END negsi2
-#endif /* L_negsi2 */
-
-#ifdef L_one_cmplsi2
-FUNC_START one_cmplsi2
-	ldi:8	#0xff, r0
-	extsb	r0
-	eor	r0, r4
-	ret
-FUNC_END one_cmplsi2
-#endif /* L_one_cmplsi2 */
-
-
diff --git a/gcc/config/fr30/t-fr30 b/gcc/config/fr30/t-fr30
index 75009d4eb70..e37921681d0 100644
--- a/gcc/config/fr30/t-fr30
+++ b/gcc/config/fr30/t-fr30
@@ -16,9 +16,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMSRC    = fr30/lib1funcs.asm
-LIB1ASMFUNCS  = _udivsi3 _divsi3 _umodsi3 _modsi3
-
 # If any special flags are necessary when building libgcc2 put them here.
 #
 # TARGET_LIBGCC2_CFLAGS
diff --git a/gcc/config/frv/lib1funcs.asm b/gcc/config/frv/lib1funcs.asm
deleted file mode 100644
index d1ffcab6133..00000000000
--- a/gcc/config/frv/lib1funcs.asm
+++ /dev/null
@@ -1,269 +0,0 @@
-/* Library functions.
-   Copyright (C) 2000, 2003, 2008, 2009 Free Software Foundation, Inc.
-   Contributed by Red Hat, Inc.
-  
-   This file is part of GCC.
-  
-   GCC is free software ; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-  
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY ; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-  
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <frv-asm.h>
-
-
-#ifdef L_cmpll
-/* icc0 = __cmpll (long long a, long long b)  */
-
-	.globl	EXT(__cmpll)
-	.type	EXT(__cmpll),@function
-	.text
-	.p2align 4
-EXT(__cmpll):
-	cmp	gr8, gr10, icc0
-	ckeq	icc0, cc4
-	P(ccmp)	gr9, gr11, cc4, 1
-	ret
-.Lend:
-	.size	EXT(__cmpll),.Lend-EXT(__cmpll)
-#endif /* L_cmpll */
-
-#ifdef L_cmpf
-/* icc0 = __cmpf (float a, float b) */
-/* Note, because this function returns the result in ICC0, it means it can't
-   handle NaNs.  */
-
-	.globl	EXT(__cmpf)
-	.type	EXT(__cmpf),@function
-	.text
-	.p2align 4
-EXT(__cmpf):
-#ifdef __FRV_HARD_FLOAT__	/* floating point instructions available */
-	movgf	gr8, fr0
-	P(movgf) gr9, fr1
-	setlos	#1, gr8
-	fcmps	fr0, fr1, fcc0
-	P(fcklt) fcc0, cc0
-	fckeq	fcc0, cc1
-	csub	gr0, gr8, gr8, cc0, 1
-	cmov	gr0, gr8, cc1, 1
-	cmpi	gr8, 0, icc0
-	ret
-#else				/* no floating point instructions available */
-	movsg	lr, gr4
-	addi	sp, #-16, sp
-	sti	gr4, @(sp, 8)
-	st	fp, @(sp, gr0)
-	mov	sp, fp
-	call	EXT(__cmpsf2)
-	cmpi	gr8, #0, icc0
-	ldi	@(sp, 8), gr4
-	movgs	gr4, lr
-	ld	@(sp,gr0), fp
-	addi	sp, #16, sp
-	ret
-#endif
-.Lend:
-	.size	EXT(__cmpf),.Lend-EXT(__cmpf)
-#endif
-
-#ifdef L_cmpd
-/* icc0 = __cmpd (double a, double b) */
-/* Note, because this function returns the result in ICC0, it means it can't
-   handle NaNs.  */
-
-	.globl	EXT(__cmpd)
-	.type	EXT(__cmpd),@function
-	.text
-	.p2align 4
-EXT(__cmpd):
-	movsg	lr, gr4
-	addi	sp, #-16, sp
-	sti	gr4, @(sp, 8)
-	st	fp, @(sp, gr0)
-	mov	sp, fp
-	call	EXT(__cmpdf2)
-	cmpi	gr8, #0, icc0
-	ldi	@(sp, 8), gr4
-	movgs	gr4, lr
-	ld	@(sp,gr0), fp
-	addi	sp, #16, sp
-	ret
-.Lend:
-	.size	EXT(__cmpd),.Lend-EXT(__cmpd)
-#endif
-
-#ifdef L_addll
-/* gr8,gr9 = __addll (long long a, long long b) */
-/* Note, gcc will never call this function, but it is present in case an
-   ABI program calls it.  */
-
-	.globl	EXT(__addll)
-	.type	EXT(__addll),@function
-	.text
-	.p2align
-EXT(__addll):
-	addcc	gr9, gr11, gr9, icc0
-	addx	gr8, gr10, gr8, icc0
-	ret
-.Lend:
-	.size	EXT(__addll),.Lend-EXT(__addll)
-#endif
-
-#ifdef L_subll
-/* gr8,gr9 = __subll (long long a, long long b) */
-/* Note, gcc will never call this function, but it is present in case an
-   ABI program calls it.  */
-
-	.globl	EXT(__subll)
-	.type	EXT(__subll),@function
-	.text
-	.p2align 4
-EXT(__subll):
-	subcc	gr9, gr11, gr9, icc0
-	subx	gr8, gr10, gr8, icc0
-	ret
-.Lend:
-	.size	EXT(__subll),.Lend-EXT(__subll)
-#endif
-
-#ifdef L_andll
-/* gr8,gr9 = __andll (long long a, long long b) */
-/* Note, gcc will never call this function, but it is present in case an
-   ABI program calls it.  */
-
-	.globl	EXT(__andll)
-	.type	EXT(__andll),@function
-	.text
-	.p2align 4
-EXT(__andll):
-	P(and)	gr9, gr11, gr9
-	P2(and)	gr8, gr10, gr8
-	ret
-.Lend:
-	.size	EXT(__andll),.Lend-EXT(__andll)
-#endif
-
-#ifdef L_orll
-/* gr8,gr9 = __orll (long long a, long long b) */
-/* Note, gcc will never call this function, but it is present in case an
-   ABI program calls it.  */
-
-	.globl	EXT(__orll)
-	.type	EXT(__orll),@function
-	.text
-	.p2align 4
-EXT(__orll):
-	P(or)	gr9, gr11, gr9
-	P2(or)	gr8, gr10, gr8
-	ret
-.Lend:
-	.size	EXT(__orll),.Lend-EXT(__orll)
-#endif
-
-#ifdef L_xorll
-/* gr8,gr9 = __xorll (long long a, long long b) */
-/* Note, gcc will never call this function, but it is present in case an
-   ABI program calls it.  */
-
-	.globl	EXT(__xorll)
-	.type	EXT(__xorll),@function
-	.text
-	.p2align 4
-EXT(__xorll):
-	P(xor)	gr9, gr11, gr9
-	P2(xor)	gr8, gr10, gr8
-	ret
-.Lend:
-	.size	EXT(__xorll),.Lend-EXT(__xorll)
-#endif
-
-#ifdef L_notll
-/* gr8,gr9 = __notll (long long a) */
-/* Note, gcc will never call this function, but it is present in case an
-   ABI program calls it.  */
-
-	.globl	EXT(__notll)
-	.type	EXT(__notll),@function
-	.text
-	.p2align 4
-EXT(__notll):
-	P(not)	gr9, gr9
-	P2(not)	gr8, gr8
-	ret
-.Lend:
-	.size	EXT(__notll),.Lend-EXT(__notll)
-#endif
-
-#ifdef L_cmov
-/* (void) __cmov (char *dest, const char *src, size_t len) */
-/*
- * void __cmov (char *dest, const char *src, size_t len)
- * {
- *   size_t i;
- * 
- *   if (dest < src || dest > src+len)
- *     {
- *	 for (i = 0; i < len; i++)
- *	 dest[i] = src[i];
- *     }
- *   else
- *     {
- *	 while (len-- > 0)
- *	 dest[len] = src[len];
- *     }
- * }
- */
-
-	.globl	EXT(__cmov)
-	.type	EXT(__cmov),@function
-	.text
-	.p2align 4
-EXT(__cmov):
-	P(cmp)	gr8, gr9, icc0
-	add	gr9, gr10, gr4
-	P(cmp)	gr8, gr4, icc1
-	bc	icc0, 0, .Lfwd
-	bls	icc1, 0, .Lback
-.Lfwd:
-	/* move bytes in a forward direction */
-	P(setlos) #0, gr5
-	cmp	gr0, gr10, icc0
-	P(subi)	gr9, #1, gr9
-	P2(subi) gr8, #1, gr8
-	bnc	icc0, 0, .Lret
-.Lfloop:
-	/* forward byte move loop */
-	addi	gr5, #1, gr5
-	P(ldsb)	@(gr9, gr5), gr4
-	cmp	gr5, gr10, icc0
-	P(stb)	gr4, @(gr8, gr5)
-	bc	icc0, 0, .Lfloop
-	ret
-.Lbloop:
-	/* backward byte move loop body */
-	ldsb	@(gr9,gr10),gr4
-	stb	gr4,@(gr8,gr10)
-.Lback:
-	P(cmpi)	gr10, #0, icc0
-	addi	gr10, #-1, gr10
-	bne	icc0, 0, .Lbloop
-.Lret:
-	ret
-.Lend:
-	.size	 EXT(__cmov),.Lend-EXT(__cmov)
-#endif
diff --git a/gcc/config/frv/t-frv b/gcc/config/frv/t-frv
index 03f3cd8cde1..e31f823c30a 100644
--- a/gcc/config/frv/t-frv
+++ b/gcc/config/frv/t-frv
@@ -16,15 +16,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-# Name of assembly file containing libgcc1 functions.
-# This entry must be present, but it can be empty if the target does
-# not need any assembler functions to support its code generation.
-#
-# Alternatively if assembler functions *are* needed then define the
-# entries below:
-CROSS_LIBGCC1	= libgcc1-asm.a
-LIB1ASMSRC	= frv/lib1funcs.asm
-LIB1ASMFUNCS	= _cmpll _cmpf _cmpd _addll _subll _andll _orll _xorll _notll _cmov
 LIB2FUNCS_EXTRA	= cmovh.c cmovw.c cmovd.c modi.c umodi.c uitof.c uitod.c ulltof.c ulltod.c
 
 # If any special flags are necessary when building libgcc2 put them here.
diff --git a/gcc/config/h8300/fixunssfsi.c b/gcc/config/h8300/fixunssfsi.c
index 2fe62b7a1a8..940d0c6dc6a 100644
--- a/gcc/config/h8300/fixunssfsi.c
+++ b/gcc/config/h8300/fixunssfsi.c
@@ -1,6 +1,6 @@
 /* More subroutines needed by GCC output code on some machines.  */
 /* Compile this one with gcc.  */
-/* Copyright (C) 1989, 1992, 2001, 2002, 2003, 2004, 2009
+/* Copyright (C) 1989, 1992, 2001, 2002, 2003, 2004, 2009, 2011
    Free Software Foundation, Inc.
 
 This file is part of GCC.
@@ -26,7 +26,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 
 /* The libgcc2.c implementation gets confused by our type setup and creates
    a directly recursive call, so we do our own implementation.  For
-   the H8/300, that's in lib1funcs.asm, for H8/300H and H8S, it's here.  */
+   the H8/300, that's in lib1funcs.S, for H8/300H and H8S, it's here.  */
 
 #ifndef __H8300__
 long __fixunssfsi (float a);
diff --git a/gcc/config/h8300/lib1funcs.asm b/gcc/config/h8300/lib1funcs.asm
deleted file mode 100644
index 1b75b73269d..00000000000
--- a/gcc/config/h8300/lib1funcs.asm
+++ /dev/null
@@ -1,838 +0,0 @@
-;; libgcc routines for the Renesas H8/300 CPU.
-;; Contributed by Steve Chamberlain <sac@cygnus.com>
-;; Optimizations by Toshiyasu Morita <toshiyasu.morita@renesas.com>
-
-/* Copyright (C) 1994, 2000, 2001, 2002, 2003, 2004, 2009
-   Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/* Assembler register definitions.  */
-
-#define A0 r0
-#define A0L r0l
-#define A0H r0h
-
-#define A1 r1
-#define A1L r1l
-#define A1H r1h
-
-#define A2 r2
-#define A2L r2l
-#define A2H r2h
-
-#define A3 r3
-#define A3L r3l
-#define A3H r3h
-
-#define S0 r4
-#define S0L r4l
-#define S0H r4h
-
-#define S1 r5
-#define S1L r5l
-#define S1H r5h
-
-#define S2 r6
-#define S2L r6l
-#define S2H r6h
-
-#ifdef __H8300__
-#define PUSHP	push
-#define POPP	pop
-
-#define A0P	r0
-#define A1P	r1
-#define A2P	r2
-#define A3P	r3
-#define S0P	r4
-#define S1P	r5
-#define S2P	r6
-#endif
-
-#if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__)
-#define PUSHP	push.l
-#define POPP	pop.l
-
-#define A0P	er0
-#define A1P	er1
-#define A2P	er2
-#define A3P	er3
-#define S0P	er4
-#define S1P	er5
-#define S2P	er6
-
-#define A0E	e0
-#define A1E	e1
-#define A2E	e2
-#define A3E	e3
-#endif
-
-#ifdef __H8300H__
-#ifdef __NORMAL_MODE__
-	.h8300hn
-#else
-	.h8300h
-#endif
-#endif
-
-#ifdef __H8300S__
-#ifdef __NORMAL_MODE__
-	.h8300sn
-#else
-	.h8300s
-#endif
-#endif
-#ifdef __H8300SX__
-#ifdef __NORMAL_MODE__
-	.h8300sxn
-#else
-	.h8300sx
-#endif
-#endif
-
-#ifdef L_cmpsi2
-#ifdef __H8300__
-	.section .text
-	.align 2
-	.global ___cmpsi2
-___cmpsi2:
-	cmp.w	A0,A2
-	bne	.L2
-	cmp.w	A1,A3
-	bne	.L4
-	mov.w	#1,A0
-	rts
-.L2:
-	bgt	.L5
-.L3:
-	mov.w	#2,A0
-	rts
-.L4:
-	bls	.L3
-.L5:
-	sub.w	A0,A0
-	rts
-	.end
-#endif
-#endif /* L_cmpsi2 */
-
-#ifdef L_ucmpsi2
-#ifdef __H8300__
-	.section .text
-	.align 2
-	.global ___ucmpsi2
-___ucmpsi2:
-	cmp.w	A0,A2
-	bne	.L2
-	cmp.w	A1,A3
-	bne	.L4
-	mov.w	#1,A0
-	rts
-.L2:
-	bhi	.L5
-.L3:
-	mov.w	#2,A0
-	rts
-.L4:
-	bls	.L3
-.L5:
-	sub.w	A0,A0
-	rts
-	.end
-#endif
-#endif /* L_ucmpsi2 */
-
-#ifdef L_divhi3
-
-;; HImode divides for the H8/300.
-;; We bunch all of this into one object file since there are several
-;; "supporting routines".
-
-; general purpose normalize routine
-;
-; divisor in A0
-; dividend in A1
-; turns both into +ve numbers, and leaves what the answer sign
-; should be in A2L
-
-#ifdef __H8300__
-	.section .text
-	.align 2
-divnorm:
-	or	A0H,A0H		; is divisor > 0
-	stc	ccr,A2L
-	bge	_lab1
-	not	A0H		; no - then make it +ve
-	not	A0L
-	adds	#1,A0
-_lab1:	or	A1H,A1H	; look at dividend
-	bge	_lab2
-	not	A1H		; it is -ve, make it positive
-	not	A1L
-	adds	#1,A1
-	xor	#0x8,A2L; and toggle sign of result
-_lab2:	rts
-;; Basically the same, except that the sign of the divisor determines
-;; the sign.
-modnorm:
-	or	A0H,A0H		; is divisor > 0
-	stc	ccr,A2L
-	bge	_lab7
-	not	A0H		; no - then make it +ve
-	not	A0L
-	adds	#1,A0
-_lab7:	or	A1H,A1H	; look at dividend
-	bge	_lab8
-	not	A1H		; it is -ve, make it positive
-	not	A1L
-	adds	#1,A1
-_lab8:	rts
-
-; A0=A0/A1 signed
-
-	.global	___divhi3
-___divhi3:
-	bsr	divnorm
-	bsr	___udivhi3
-negans:	btst	#3,A2L	; should answer be negative ?
-	beq	_lab4
-	not	A0H	; yes, so make it so
-	not	A0L
-	adds	#1,A0
-_lab4:	rts
-
-; A0=A0%A1 signed
-
-	.global	___modhi3
-___modhi3:
-	bsr	modnorm
-	bsr	___udivhi3
-	mov	A3,A0
-	bra	negans
-
-; A0=A0%A1 unsigned
-
-	.global	___umodhi3
-___umodhi3:
-	bsr	___udivhi3
-	mov	A3,A0
-	rts
-
-; A0=A0/A1 unsigned
-; A3=A0%A1 unsigned
-; A2H trashed
-; D high 8 bits of denom
-; d low 8 bits of denom
-; N high 8 bits of num
-; n low 8 bits of num
-; M high 8 bits of mod
-; m low 8 bits of mod
-; Q high 8 bits of quot
-; q low 8 bits of quot
-; P preserve
-
-; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
-; see how to partition up the expression.
-
-	.global	___udivhi3
-___udivhi3:
-				; A0 A1 A2 A3
-				; Nn Dd       P
-	sub.w	A3,A3		; Nn Dd xP 00
-	or	A1H,A1H
-	bne	divlongway
-	or	A0H,A0H
-	beq	_lab6
-
-; we know that D == 0 and N is != 0
-	mov.b	A0H,A3L		; Nn Dd xP 0N
-	divxu	A1L,A3		;          MQ
-	mov.b	A3L,A0H	 	; Q
-; dealt with N, do n
-_lab6:	mov.b	A0L,A3L		;           n
-	divxu	A1L,A3		;          mq
-	mov.b	A3L,A0L		; Qq
-	mov.b	A3H,A3L         ;           m
-	mov.b	#0x0,A3H	; Qq       0m
-	rts
-
-; D != 0 - which means the denominator is
-;          loop around to get the result.
-
-divlongway:
-	mov.b	A0H,A3L		; Nn Dd xP 0N
-	mov.b	#0x0,A0H	; high byte of answer has to be zero
-	mov.b	#0x8,A2H	;       8
-div8:	add.b	A0L,A0L		; n*=2
-	rotxl	A3L		; Make remainder bigger
-	rotxl	A3H
-	sub.w	A1,A3		; Q-=N
-	bhs	setbit		; set a bit ?
-	add.w	A1,A3		;  no : too far , Q+=N
-
-	dec	A2H
-	bne	div8		; next bit
-	rts
-
-setbit:	inc	A0L		; do insert bit
-	dec	A2H
-	bne	div8		; next bit
-	rts
-
-#endif /* __H8300__ */
-#endif /* L_divhi3 */
-
-#ifdef L_divsi3
-
-;; 4 byte integer divides for the H8/300.
-;;
-;; We have one routine which does all the work and lots of
-;; little ones which prepare the args and massage the sign.
-;; We bunch all of this into one object file since there are several
-;; "supporting routines".
-
-	.section .text
-	.align 2
-
-; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
-; This function is here to keep branch displacements small.
-
-#ifdef __H8300__
-
-divnorm:
-	mov.b	A0H,A0H		; is the numerator -ve
-	stc	ccr,S2L		; keep the sign in bit 3 of S2L
-	bge	postive
-
-	; negate arg
-	not	A0H
-	not	A1H
-	not	A0L
-	not	A1L
-
-	add	#1,A1L
-	addx	#0,A1H
-	addx	#0,A0L
-	addx	#0,A0H
-postive:
-	mov.b	A2H,A2H		; is the denominator -ve
-	bge	postive2
-	not	A2L
-	not	A2H
-	not	A3L
-	not	A3H
-	add.b	#1,A3L
-	addx	#0,A3H
-	addx	#0,A2L
-	addx	#0,A2H
-	xor.b	#0x08,S2L	; toggle the result sign
-postive2:
-	rts
-
-;; Basically the same, except that the sign of the divisor determines
-;; the sign.
-modnorm:
-	mov.b	A0H,A0H		; is the numerator -ve
-	stc	ccr,S2L		; keep the sign in bit 3 of S2L
-	bge	mpostive
-
-	; negate arg
-	not	A0H
-	not	A1H
-	not	A0L
-	not	A1L
-
-	add	#1,A1L
-	addx	#0,A1H
-	addx	#0,A0L
-	addx	#0,A0H
-mpostive:
-	mov.b	A2H,A2H		; is the denominator -ve
-	bge	mpostive2
-	not	A2L
-	not	A2H
-	not	A3L
-	not	A3H
-	add.b	#1,A3L
-	addx	#0,A3H
-	addx	#0,A2L
-	addx	#0,A2H
-mpostive2:
-	rts
-
-#else /* __H8300H__ */
-
-divnorm:
-	mov.l	A0P,A0P		; is the numerator -ve
-	stc	ccr,S2L		; keep the sign in bit 3 of S2L
-	bge	postive
-
-	neg.l	A0P		; negate arg
-
-postive:
-	mov.l	A1P,A1P		; is the denominator -ve
-	bge	postive2
-
-	neg.l	A1P		; negate arg
-	xor.b	#0x08,S2L	; toggle the result sign
-
-postive2:
-	rts
-
-;; Basically the same, except that the sign of the divisor determines
-;; the sign.
-modnorm:
-	mov.l	A0P,A0P		; is the numerator -ve
-	stc	ccr,S2L		; keep the sign in bit 3 of S2L
-	bge	mpostive
-
-	neg.l	A0P		; negate arg
-
-mpostive:
-	mov.l	A1P,A1P		; is the denominator -ve
-	bge	mpostive2
-
-	neg.l	A1P		; negate arg
-
-mpostive2:
-	rts
-
-#endif
-
-; numerator in A0/A1
-; denominator in A2/A3
-	.global	___modsi3
-___modsi3:
-#ifdef __H8300__
-	PUSHP	S2P
-	PUSHP	S0P
-	PUSHP	S1P
-	bsr	modnorm
-	bsr	divmodsi4
-	mov	S0,A0
-	mov	S1,A1
-	bra	exitdiv
-#else
-	PUSHP	S2P
-	bsr	modnorm
-	bsr	___udivsi3
-	mov.l	er3,er0
-	bra	exitdiv
-#endif
-
-	;; H8/300H and H8S version of ___udivsi3 is defined later in
-	;; the file.
-#ifdef __H8300__
-	.global	___udivsi3
-___udivsi3:
-	PUSHP	S2P
-	PUSHP	S0P
-	PUSHP	S1P
-	bsr	divmodsi4
-	bra	reti
-#endif
-
-	.global	___umodsi3
-___umodsi3:
-#ifdef __H8300__
-	PUSHP	S2P
-	PUSHP	S0P
-	PUSHP	S1P
-	bsr	divmodsi4
-	mov	S0,A0
-	mov	S1,A1
-	bra	reti
-#else
-	bsr	___udivsi3
-	mov.l	er3,er0
-	rts
-#endif
-
-	.global	___divsi3
-___divsi3:
-#ifdef __H8300__
-	PUSHP	S2P
-	PUSHP	S0P
-	PUSHP	S1P
-	jsr	divnorm
-	jsr	divmodsi4
-#else
-	PUSHP	S2P
-	jsr	divnorm
-	bsr	___udivsi3
-#endif
-
-	; examine what the sign should be
-exitdiv:
-	btst	#3,S2L
-	beq	reti
-
-	; should be -ve
-#ifdef __H8300__
-	not	A0H
-	not	A1H
-	not	A0L
-	not	A1L
-
-	add	#1,A1L
-	addx	#0,A1H
-	addx	#0,A0L
-	addx	#0,A0H
-#else /* __H8300H__ */
-	neg.l	A0P
-#endif
-
-reti:
-#ifdef __H8300__
-	POPP	S1P
-	POPP	S0P
-#endif
-	POPP	S2P
-	rts
-
-	; takes A0/A1 numerator (A0P for H8/300H)
-	; A2/A3 denominator (A1P for H8/300H)
-	; returns A0/A1 quotient (A0P for H8/300H)
-	; S0/S1 remainder (S0P for H8/300H)
-	; trashes S2H
-
-#ifdef __H8300__
-
-divmodsi4:
-        sub.w	S0,S0		; zero play area
-        mov.w	S0,S1
-        mov.b	A2H,S2H
-        or	A2L,S2H
-        or	A3H,S2H
-        bne	DenHighNonZero
-        mov.b	A0H,A0H
-        bne	NumByte0Zero
-        mov.b	A0L,A0L
-        bne	NumByte1Zero
-        mov.b	A1H,A1H
-        bne	NumByte2Zero
-        bra	NumByte3Zero
-NumByte0Zero:
-	mov.b	A0H,S1L
-        divxu	A3L,S1
-        mov.b	S1L,A0H
-NumByte1Zero:
-	mov.b	A0L,S1L
-        divxu	A3L,S1
-        mov.b	S1L,A0L
-NumByte2Zero:
-	mov.b	A1H,S1L
-        divxu	A3L,S1
-        mov.b	S1L,A1H
-NumByte3Zero:
-	mov.b	A1L,S1L
-        divxu	A3L,S1
-        mov.b	S1L,A1L
-
-        mov.b	S1H,S1L
-        mov.b	#0x0,S1H
-        rts
-
-; have to do the divide by shift and test
-DenHighNonZero:
-	mov.b	A0H,S1L
-        mov.b	A0L,A0H
-        mov.b	A1H,A0L
-        mov.b	A1L,A1H
-
-        mov.b	#0,A1L
-        mov.b	#24,S2H	; only do 24 iterations
-
-nextbit:
-	add.w	A1,A1	; double the answer guess
-        rotxl	A0L
-        rotxl	A0H
-
-        rotxl	S1L	; double remainder
-        rotxl	S1H
-        rotxl	S0L
-        rotxl	S0H
-        sub.w	A3,S1	; does it all fit
-        subx	A2L,S0L
-        subx	A2H,S0H
-        bhs	setone
-
-        add.w	A3,S1	; no, restore mistake
-        addx	A2L,S0L
-        addx	A2H,S0H
-
-        dec	S2H
-        bne	nextbit
-        rts
-
-setone:
-	inc	A1L
-        dec	S2H
-        bne	nextbit
-        rts
-
-#else /* __H8300H__ */
-
-	;; This function also computes the remainder and stores it in er3.
-	.global	___udivsi3
-___udivsi3:
-	mov.w	A1E,A1E		; denominator top word 0?
-	bne	DenHighNonZero
-
-	; do it the easy way, see page 107 in manual
-	mov.w	A0E,A2
-	extu.l	A2P
-	divxu.w	A1,A2P
-	mov.w	A2E,A0E
-	divxu.w	A1,A0P
-	mov.w	A0E,A3
-	mov.w	A2,A0E
-	extu.l	A3P
-	rts
-
- 	; er0 = er0 / er1
- 	; er3 = er0 % er1
- 	; trashes er1 er2
- 	; expects er1 >= 2^16
-DenHighNonZero:
-	mov.l	er0,er3
-	mov.l	er1,er2
-#ifdef __H8300H__
-divmod_L21:
-	shlr.l	er0
-	shlr.l	er2		; make divisor < 2^16
-	mov.w	e2,e2
-	bne	divmod_L21
-#else
-	shlr.l	#2,er2		; make divisor < 2^16
-	mov.w	e2,e2
-	beq	divmod_L22A
-divmod_L21:
-	shlr.l	#2,er0
-divmod_L22:
-	shlr.l	#2,er2		; make divisor < 2^16
-	mov.w	e2,e2
-	bne	divmod_L21
-divmod_L22A:
-	rotxl.w	r2
-	bcs	divmod_L23
-	shlr.l	er0
-	bra	divmod_L24
-divmod_L23:
-	rotxr.w	r2
-	shlr.l	#2,er0
-divmod_L24:
-#endif
-	;; At this point,
-	;;  er0 contains shifted dividend
-	;;  er1 contains divisor
-	;;  er2 contains shifted divisor
-	;;  er3 contains dividend, later remainder
-	divxu.w	r2,er0		; r0 now contains the approximate quotient (AQ)
-	extu.l	er0
-	beq	divmod_L25
-	subs	#1,er0		; er0 = AQ - 1
-	mov.w	e1,r2
-	mulxu.w	r0,er2		; er2 = upper (AQ - 1) * divisor
-	sub.w	r2,e3		; dividend - 65536 * er2
-	mov.w	r1,r2
-	mulxu.w	r0,er2		; compute er3 = remainder (tentative)
-	sub.l	er2,er3		; er3 = dividend - (AQ - 1) * divisor
-divmod_L25:
- 	cmp.l	er1,er3		; is divisor < remainder?
-	blo	divmod_L26
- 	adds	#1,er0
-	sub.l	er1,er3		; correct the remainder
-divmod_L26:
-	rts
-
-#endif
-#endif /* L_divsi3 */
-
-#ifdef L_mulhi3
-
-;; HImode multiply.
-; The H8/300 only has an 8*8->16 multiply.
-; The answer is the same as:
-;
-; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
-; (we can ignore A1.h * A0.h cause that will all off the top)
-; A0 in
-; A1 in
-; A0 answer
-
-#ifdef __H8300__
-	.section .text
-	.align 2
-	.global	___mulhi3
-___mulhi3:
-	mov.b	A1L,A2L		; A2l gets srcb.l
-	mulxu	A0L,A2		; A2 gets first sub product
-
-	mov.b	A0H,A3L		; prepare for
-	mulxu	A1L,A3		; second sub product
-
-	add.b	A3L,A2H		; sum first two terms
-
-	mov.b	A1H,A3L		; third sub product
-	mulxu	A0L,A3
-
-	add.b	A3L,A2H		; almost there
-	mov.w	A2,A0		; that is
-	rts
-
-#endif
-#endif /* L_mulhi3 */
-
-#ifdef L_mulsi3
-
-;; SImode multiply.
-;;
-;; I think that shift and add may be sufficient for this.  Using the
-;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead.  This way
-;; the inner loop uses maybe 20 cycles + overhead, but terminates
-;; quickly on small args.
-;;
-;; A0/A1 src_a
-;; A2/A3 src_b
-;;
-;;  while (a)
-;;    {
-;;      if (a & 1)
-;;        r += b;
-;;      a >>= 1;
-;;      b <<= 1;
-;;    }
-
-	.section .text
-	.align 2
-
-#ifdef __H8300__
-
-	.global	___mulsi3
-___mulsi3:
-	PUSHP	S0P
-	PUSHP	S1P
-
-	sub.w	S0,S0
-	sub.w	S1,S1
-
-	; while (a)
-_top:	mov.w	A0,A0
-	bne	_more
-	mov.w	A1,A1
-	beq	_done
-_more:	; if (a & 1)
-	bld	#0,A1L
-	bcc	_nobit
-	; r += b
-	add.w	A3,S1
-	addx	A2L,S0L
-	addx	A2H,S0H
-_nobit:
-	; a >>= 1
-	shlr	A0H
-	rotxr	A0L
-	rotxr	A1H
-	rotxr	A1L
-
-	; b <<= 1
-	add.w	A3,A3
-	addx	A2L,A2L
-	addx	A2H,A2H
-	bra 	_top
-
-_done:
-	mov.w	S0,A0
-	mov.w	S1,A1
-	POPP	S1P
-	POPP	S0P
-	rts
-
-#else /* __H8300H__ */
-
-;
-; mulsi3 for H8/300H - based on Renesas SH implementation
-;
-; by Toshiyasu Morita
-;
-; Old code:
-;
-; 16b * 16b = 372 states (worst case)
-; 32b * 32b = 724 states (worst case)
-;
-; New code:
-;
-; 16b * 16b =  48 states
-; 16b * 32b =  72 states
-; 32b * 32b =  92 states
-;
-
-	.global	___mulsi3
-___mulsi3:
-	mov.w	r1,r2   ; ( 2 states) b * d
-	mulxu	r0,er2  ; (22 states)
-
-	mov.w	e0,r3   ; ( 2 states) a * d
-	beq	L_skip1 ; ( 4 states)
-	mulxu	r1,er3  ; (22 states)
-	add.w	r3,e2   ; ( 2 states)
-
-L_skip1:
-	mov.w	e1,r3   ; ( 2 states) c * b
-	beq	L_skip2 ; ( 4 states)
-	mulxu	r0,er3  ; (22 states)
-	add.w	r3,e2   ; ( 2 states)
-
-L_skip2:
-	mov.l	er2,er0	; ( 2 states)
-	rts		; (10 states)
-
-#endif
-#endif /* L_mulsi3 */
-#ifdef L_fixunssfsi_asm
-/* For the h8300 we use asm to save some bytes, to
-   allow more programs to fit into the tiny address
-   space.  For the H8/300H and H8S, the C version is good enough.  */
-#ifdef __H8300__
-/* We still treat NANs different than libgcc2.c, but then, the
-   behavior is undefined anyways.  */
-	.global	___fixunssfsi
-___fixunssfsi:
-	cmp.b #0x4f,r0h
-	bge Large_num
-	jmp     @___fixsfsi
-Large_num:
-	bhi L_huge_num
-	xor.b #0x80,A0L
-	bmi L_shift8
-L_huge_num:
-	mov.w #65535,A0
-	mov.w A0,A1
-	rts
-L_shift8:
-	mov.b A0L,A0H
-	mov.b A1H,A0L
-	mov.b A1L,A1H
-	mov.b #0,A1L
-	rts
-#endif
-#endif /* L_fixunssfsi_asm */
diff --git a/gcc/config/h8300/t-h8300 b/gcc/config/h8300/t-h8300
index 616849007b4..7083c673acf 100644
--- a/gcc/config/h8300/t-h8300
+++ b/gcc/config/h8300/t-h8300
@@ -17,10 +17,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMSRC = h8300/lib1funcs.asm
-LIB1ASMFUNCS = _cmpsi2 _ucmpsi2 _divhi3 _divsi3 _mulhi3 _mulsi3 \
-  _fixunssfsi_asm
-
 LIB2FUNCS_EXTRA = \
 	$(srcdir)/config/h8300/clzhi2.c \
 	$(srcdir)/config/h8300/ctzhi2.c \
diff --git a/gcc/config/i386/cygwin.asm b/gcc/config/i386/cygwin.asm
deleted file mode 100644
index 8f9c486850e..00000000000
--- a/gcc/config/i386/cygwin.asm
+++ /dev/null
@@ -1,188 +0,0 @@
-/* stuff needed for libgcc on win32.
- *
- *   Copyright (C) 1996, 1998, 2001, 2003, 2008, 2009, 2010
- *   Free Software Foundation, Inc.
- *   Written By Steve Chamberlain
- * 
- * This file is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option) any
- * later version.
- * 
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- * 
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- * 
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
- * <http://www.gnu.org/licenses/>.
- */
-
-#include "auto-host.h"
-
-#ifdef HAVE_GAS_CFI_SECTIONS_DIRECTIVE
-	.cfi_sections	.debug_frame
-# define cfi_startproc()		.cfi_startproc
-# define cfi_endproc()			.cfi_endproc
-# define cfi_adjust_cfa_offset(X) 	.cfi_adjust_cfa_offset X
-# define cfi_def_cfa_register(X)	.cfi_def_cfa_register X
-# define cfi_register(D,S)		.cfi_register D, S
-# ifdef _WIN64
-#  define cfi_push(X)		.cfi_adjust_cfa_offset 8; .cfi_rel_offset X, 0
-#  define cfi_pop(X)		.cfi_adjust_cfa_offset -8; .cfi_restore X
-# else
-#  define cfi_push(X)		.cfi_adjust_cfa_offset 4; .cfi_rel_offset X, 0
-#  define cfi_pop(X)		.cfi_adjust_cfa_offset -4; .cfi_restore X
-# endif
-#else
-# define cfi_startproc()
-# define cfi_endproc()
-# define cfi_adjust_cfa_offset(X)
-# define cfi_def_cfa_register(X)
-# define cfi_register(D,S)
-# define cfi_push(X)
-# define cfi_pop(X)
-#endif /* HAVE_GAS_CFI_SECTIONS_DIRECTIVE */
-
-#ifdef L_chkstk
-/* Function prologue calls __chkstk to probe the stack when allocating more
-   than CHECK_STACK_LIMIT bytes in one go.  Touching the stack at 4K
-   increments is necessary to ensure that the guard pages used
-   by the OS virtual memory manger are allocated in correct sequence.  */
-
-	.global ___chkstk
-	.global	__alloca
-#ifdef _WIN64
-/* __alloca is a normal function call, which uses %rcx as the argument.  */
-	cfi_startproc()
-__alloca:
-	movq	%rcx, %rax
-	/* FALLTHRU */
-
-/* ___chkstk is a *special* function call, which uses %rax as the argument.
-   We avoid clobbering the 4 integer argument registers, %rcx, %rdx, 
-   %r8 and %r9, which leaves us with %rax, %r10, and %r11 to use.  */
-	.align	4
-___chkstk:
-	popq	%r11			/* pop return address */
-	cfi_adjust_cfa_offset(-8)	/* indicate return address in r11 */
-	cfi_register(%rip, %r11)
-	movq	%rsp, %r10
-	cmpq	$0x1000, %rax		/* > 4k ?*/
-	jb	2f
-
-1:	subq	$0x1000, %r10  		/* yes, move pointer down 4k*/
-	orl	$0x0, (%r10)   		/* probe there */
-	subq	$0x1000, %rax  	 	/* decrement count */
-	cmpq	$0x1000, %rax
-	ja	1b			/* and do it again */
-
-2:	subq	%rax, %r10
-	movq	%rsp, %rax		/* hold CFA until return */
-	cfi_def_cfa_register(%rax)
-	orl	$0x0, (%r10)		/* less than 4k, just peek here */
-	movq	%r10, %rsp		/* decrement stack */
-
-	/* Push the return value back.  Doing this instead of just
-	   jumping to %r11 preserves the cached call-return stack
-	   used by most modern processors.  */
-	pushq	%r11
-	ret
-	cfi_endproc()
-#else
-	cfi_startproc()
-___chkstk:
-__alloca:
-	pushl	%ecx			/* save temp */
-	cfi_push(%eax)
-	leal	8(%esp), %ecx		/* point past return addr */
-	cmpl	$0x1000, %eax		/* > 4k ?*/
-	jb	2f
-
-1:	subl	$0x1000, %ecx  		/* yes, move pointer down 4k*/
-	orl	$0x0, (%ecx)   		/* probe there */
-	subl	$0x1000, %eax  	 	/* decrement count */
-	cmpl	$0x1000, %eax
-	ja	1b			/* and do it again */
-
-2:	subl	%eax, %ecx	   
-	orl	$0x0, (%ecx)		/* less than 4k, just peek here */
-	movl	%esp, %eax		/* save current stack pointer */
-	cfi_def_cfa_register(%eax)
-	movl	%ecx, %esp		/* decrement stack */
-	movl	(%eax), %ecx		/* recover saved temp */
-
-	/* Copy the return register.  Doing this instead of just jumping to
-	   the address preserves the cached call-return stack used by most
-	   modern processors.  */
-	pushl	4(%eax)
-	ret
-	cfi_endproc()
-#endif /* _WIN64 */
-#endif /* L_chkstk */
-
-#ifdef L_chkstk_ms
-/* ___chkstk_ms is a *special* function call, which uses %rax as the argument.
-   We avoid clobbering any registers.  Unlike ___chkstk, it just probes the
-   stack and does no stack allocation.  */
-	.global ___chkstk_ms
-#ifdef _WIN64
-	cfi_startproc()
-___chkstk_ms:
-	pushq	%rcx			/* save temps */
-	cfi_push(%rcx)
-	pushq	%rax
-	cfi_push(%rax)
-	cmpq	$0x1000, %rax		/* > 4k ?*/
-	leaq	24(%rsp), %rcx		/* point past return addr */
-	jb	2f
-
-1:	subq	$0x1000, %rcx  		/* yes, move pointer down 4k */
-	orq	$0x0, (%rcx)   		/* probe there */
-	subq	$0x1000, %rax  	 	/* decrement count */
-	cmpq	$0x1000, %rax
-	ja	1b			/* and do it again */
-
-2:	subq	%rax, %rcx
-	orq	$0x0, (%rcx)		/* less than 4k, just peek here */
-
-	popq	%rax
-	cfi_pop(%rax)
-	popq	%rcx
-	cfi_pop(%rcx)
-	ret
-	cfi_endproc()
-#else
-	cfi_startproc()
-___chkstk_ms:
-	pushl	%ecx			/* save temp */
-	cfi_push(%ecx)
-	pushl	%eax
-	cfi_push(%eax)
-	cmpl	$0x1000, %eax		/* > 4k ?*/
-	leal	12(%esp), %ecx		/* point past return addr */
-	jb	2f
-
-1:	subl	$0x1000, %ecx  		/* yes, move pointer down 4k*/
-	orl	$0x0, (%ecx)   		/* probe there */
-	subl	$0x1000, %eax  	 	/* decrement count */
-	cmpl	$0x1000, %eax
-	ja	1b			/* and do it again */
-
-2:	subl	%eax, %ecx
-	orl	$0x0, (%ecx)		/* less than 4k, just peek here */
-
-	popl	%eax
-	cfi_pop(%eax)
-	popl	%ecx
-	cfi_pop(%ecx)
-	ret
-	cfi_endproc()
-#endif /* _WIN64 */
-#endif /* L_chkstk_ms */
diff --git a/gcc/config/i386/t-cygming b/gcc/config/i386/t-cygming
index 242d7f27f65..3e7f7cdd036 100644
--- a/gcc/config/i386/t-cygming
+++ b/gcc/config/i386/t-cygming
@@ -17,9 +17,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMSRC = i386/cygwin.asm
-LIB1ASMFUNCS = _chkstk _chkstk_ms
-
 # cygwin and mingw always have a limits.h, but, depending upon how we are
 # doing the build, it may not be installed yet.
 LIMITS_H_TEST = true
diff --git a/gcc/config/i386/t-interix b/gcc/config/i386/t-interix
index e7b016f1e7a..09c9127f6af 100644
--- a/gcc/config/i386/t-interix
+++ b/gcc/config/i386/t-interix
@@ -1,6 +1,3 @@
-LIB1ASMSRC = i386/cygwin.asm
-LIB1ASMFUNCS = _chkstk _chkstk_ms
-
 winnt.o: $(srcdir)/config/i386/winnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
   $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
   $(TM_P_H) $(HASHTAB_H) $(GGC_H)
diff --git a/gcc/config/ia64/lib1funcs.asm b/gcc/config/ia64/lib1funcs.asm
deleted file mode 100644
index b7eaa6eca3c..00000000000
--- a/gcc/config/ia64/lib1funcs.asm
+++ /dev/null
@@ -1,795 +0,0 @@
-/* Copyright (C) 2000, 2001, 2003, 2005, 2009 Free Software Foundation, Inc.
-   Contributed by James E. Wilson <wilson@cygnus.com>.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifdef L__divxf3
-// Compute a 80-bit IEEE double-extended quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// farg0 holds the dividend.  farg1 holds the divisor.
-//
-// __divtf3 is an alternate symbol name for backward compatibility.
-
-	.text
-	.align 16
-	.global __divxf3
-	.proc __divxf3
-__divxf3:
-#ifdef SHARED
-	.global __divtf3
-__divtf3:
-#endif
-	cmp.eq p7, p0 = r0, r0
-	frcpa.s0 f10, p6 = farg0, farg1
-	;;
-(p6)	cmp.ne p7, p0 = r0, r0
-	.pred.rel.mutex p6, p7
-(p6)	fnma.s1 f11 = farg1, f10, f1
-(p6)	fma.s1 f12 = farg0, f10, f0
-	;;
-(p6)	fma.s1 f13 = f11, f11, f0
-(p6)	fma.s1 f14 = f11, f11, f11
-	;;
-(p6)	fma.s1 f11 = f13, f13, f11
-(p6)	fma.s1 f13 = f14, f10, f10
-	;;
-(p6)	fma.s1 f10 = f13, f11, f10
-(p6)	fnma.s1 f11 = farg1, f12, farg0
-	;;
-(p6)	fma.s1 f11 = f11, f10, f12
-(p6)	fnma.s1 f12 = farg1, f10, f1
-	;;
-(p6)	fma.s1 f10 = f12, f10, f10
-(p6)	fnma.s1 f12 = farg1, f11, farg0
-	;;
-(p6)	fma.s0 fret0 = f12, f10, f11
-(p7)	mov fret0 = f10
-	br.ret.sptk rp
-	.endp __divxf3
-#endif
-
-#ifdef L__divdf3
-// Compute a 64-bit IEEE double quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// farg0 holds the dividend.  farg1 holds the divisor.
-
-	.text
-	.align 16
-	.global __divdf3
-	.proc __divdf3
-__divdf3:
-	cmp.eq p7, p0 = r0, r0
-	frcpa.s0 f10, p6 = farg0, farg1
-	;;
-(p6)	cmp.ne p7, p0 = r0, r0
-	.pred.rel.mutex p6, p7
-(p6)	fmpy.s1 f11 = farg0, f10
-(p6)	fnma.s1 f12 = farg1, f10, f1
-	;;
-(p6)	fma.s1 f11 = f12, f11, f11
-(p6)	fmpy.s1 f13 = f12, f12
-	;;
-(p6)	fma.s1 f10 = f12, f10, f10
-(p6)	fma.s1 f11 = f13, f11, f11
-	;;
-(p6)	fmpy.s1 f12 = f13, f13
-(p6)	fma.s1 f10 = f13, f10, f10
-	;;
-(p6)	fma.d.s1 f11 = f12, f11, f11
-(p6)	fma.s1 f10 = f12, f10, f10
-	;;
-(p6)	fnma.d.s1 f8 = farg1, f11, farg0
-	;;
-(p6)	fma.d fret0 = f8, f10, f11
-(p7)	mov fret0 = f10
-	br.ret.sptk rp
-	;;
-	.endp __divdf3
-#endif
-
-#ifdef L__divsf3
-// Compute a 32-bit IEEE float quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// farg0 holds the dividend.  farg1 holds the divisor.
-
-	.text
-	.align 16
-	.global __divsf3
-	.proc __divsf3
-__divsf3:
-	cmp.eq p7, p0 = r0, r0
-	frcpa.s0 f10, p6 = farg0, farg1
-	;;
-(p6)	cmp.ne p7, p0 = r0, r0
-	.pred.rel.mutex p6, p7
-(p6)	fmpy.s1 f8 = farg0, f10
-(p6)	fnma.s1 f9 = farg1, f10, f1
-	;;
-(p6)	fma.s1 f8 = f9, f8, f8
-(p6)	fmpy.s1 f9 = f9, f9
-	;;
-(p6)	fma.s1 f8 = f9, f8, f8
-(p6)	fmpy.s1 f9 = f9, f9
-	;;
-(p6)	fma.d.s1 f10 = f9, f8, f8
-	;;
-(p6)	fnorm.s.s0 fret0 = f10
-(p7)	mov fret0 = f10
-	br.ret.sptk rp
-	;;
-	.endp __divsf3
-#endif
-
-#ifdef L__divdi3
-// Compute a 64-bit integer quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend.  in1 holds the divisor.
-
-	.text
-	.align 16
-	.global __divdi3
-	.proc __divdi3
-__divdi3:
-	.regstk 2,0,0,0
-	// Transfer inputs to FP registers.
-	setf.sig f8 = in0
-	setf.sig f9 = in1
-	// Check divide by zero.
-	cmp.ne.unc p0,p7=0,in1
-	;;
-	// Convert the inputs to FP, so that they won't be treated as unsigned.
-	fcvt.xf f8 = f8
-	fcvt.xf f9 = f9
-(p7)	break 1
-	;;
-	// Compute the reciprocal approximation.
-	frcpa.s1 f10, p6 = f8, f9
-	;;
-	// 3 Newton-Raphson iterations.
-(p6)	fnma.s1 f11 = f9, f10, f1
-(p6)	fmpy.s1 f12 = f8, f10
-	;;
-(p6)	fmpy.s1 f13 = f11, f11
-(p6)	fma.s1 f12 = f11, f12, f12
-	;;
-(p6)	fma.s1 f10 = f11, f10, f10
-(p6)	fma.s1 f11 = f13, f12, f12
-	;;
-(p6)	fma.s1 f10 = f13, f10, f10
-(p6)	fnma.s1 f12 = f9, f11, f8
-	;;
-(p6)	fma.s1 f10 = f12, f10, f11
-	;;
-	// Round quotient to an integer.
-	fcvt.fx.trunc.s1 f10 = f10
-	;;
-	// Transfer result to GP registers.
-	getf.sig ret0 = f10
-	br.ret.sptk rp
-	;;
-	.endp __divdi3
-#endif
-
-#ifdef L__moddi3
-// Compute a 64-bit integer modulus.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend (a).  in1 holds the divisor (b).
-
-	.text
-	.align 16
-	.global __moddi3
-	.proc __moddi3
-__moddi3:
-	.regstk 2,0,0,0
-	// Transfer inputs to FP registers.
-	setf.sig f14 = in0
-	setf.sig f9 = in1
-	// Check divide by zero.
-	cmp.ne.unc p0,p7=0,in1
-	;;
-	// Convert the inputs to FP, so that they won't be treated as unsigned.
-	fcvt.xf f8 = f14
-	fcvt.xf f9 = f9
-(p7)	break 1
-	;;
-	// Compute the reciprocal approximation.
-	frcpa.s1 f10, p6 = f8, f9
-	;;
-	// 3 Newton-Raphson iterations.
-(p6)	fmpy.s1 f12 = f8, f10
-(p6)	fnma.s1 f11 = f9, f10, f1
-	;;
-(p6)	fma.s1 f12 = f11, f12, f12
-(p6)	fmpy.s1 f13 = f11, f11
-	;;
-(p6)	fma.s1 f10 = f11, f10, f10
-(p6)	fma.s1 f11 = f13, f12, f12
-	;;
-	sub in1 = r0, in1
-(p6)	fma.s1 f10 = f13, f10, f10
-(p6)	fnma.s1 f12 = f9, f11, f8
-	;;
-	setf.sig f9 = in1
-(p6)	fma.s1 f10 = f12, f10, f11
-	;;
-	fcvt.fx.trunc.s1 f10 = f10
-	;;
-	// r = q * (-b) + a
-	xma.l f10 = f10, f9, f14
-	;;
-	// Transfer result to GP registers.
-	getf.sig ret0 = f10
-	br.ret.sptk rp
-	;;
-	.endp __moddi3
-#endif
-
-#ifdef L__udivdi3
-// Compute a 64-bit unsigned integer quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend.  in1 holds the divisor.
-
-	.text
-	.align 16
-	.global __udivdi3
-	.proc __udivdi3
-__udivdi3:
-	.regstk 2,0,0,0
-	// Transfer inputs to FP registers.
-	setf.sig f8 = in0
-	setf.sig f9 = in1
-	// Check divide by zero.
-	cmp.ne.unc p0,p7=0,in1
-	;;
-	// Convert the inputs to FP, to avoid FP software-assist faults.
-	fcvt.xuf.s1 f8 = f8
-	fcvt.xuf.s1 f9 = f9
-(p7)	break 1
-	;;
-	// Compute the reciprocal approximation.
-	frcpa.s1 f10, p6 = f8, f9
-	;;
-	// 3 Newton-Raphson iterations.
-(p6)	fnma.s1 f11 = f9, f10, f1
-(p6)	fmpy.s1 f12 = f8, f10
-	;;
-(p6)	fmpy.s1 f13 = f11, f11
-(p6)	fma.s1 f12 = f11, f12, f12
-	;;
-(p6)	fma.s1 f10 = f11, f10, f10
-(p6)	fma.s1 f11 = f13, f12, f12
-	;;
-(p6)	fma.s1 f10 = f13, f10, f10
-(p6)	fnma.s1 f12 = f9, f11, f8
-	;;
-(p6)	fma.s1 f10 = f12, f10, f11
-	;;
-	// Round quotient to an unsigned integer.
-	fcvt.fxu.trunc.s1 f10 = f10
-	;;
-	// Transfer result to GP registers.
-	getf.sig ret0 = f10
-	br.ret.sptk rp
-	;;
-	.endp __udivdi3
-#endif
-
-#ifdef L__umoddi3
-// Compute a 64-bit unsigned integer modulus.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend (a).  in1 holds the divisor (b).
-
-	.text
-	.align 16
-	.global __umoddi3
-	.proc __umoddi3
-__umoddi3:
-	.regstk 2,0,0,0
-	// Transfer inputs to FP registers.
-	setf.sig f14 = in0
-	setf.sig f9 = in1
-	// Check divide by zero.
-	cmp.ne.unc p0,p7=0,in1
-	;;
-	// Convert the inputs to FP, to avoid FP software assist faults.
-	fcvt.xuf.s1 f8 = f14
-	fcvt.xuf.s1 f9 = f9
-(p7)	break 1;
-	;;
-	// Compute the reciprocal approximation.
-	frcpa.s1 f10, p6 = f8, f9
-	;;
-	// 3 Newton-Raphson iterations.
-(p6)	fmpy.s1 f12 = f8, f10
-(p6)	fnma.s1 f11 = f9, f10, f1
-	;;
-(p6)	fma.s1 f12 = f11, f12, f12
-(p6)	fmpy.s1 f13 = f11, f11
-	;;
-(p6)	fma.s1 f10 = f11, f10, f10
-(p6)	fma.s1 f11 = f13, f12, f12
-	;;
-	sub in1 = r0, in1
-(p6)	fma.s1 f10 = f13, f10, f10
-(p6)	fnma.s1 f12 = f9, f11, f8
-	;;
-	setf.sig f9 = in1
-(p6)	fma.s1 f10 = f12, f10, f11
-	;;
-	// Round quotient to an unsigned integer.
-	fcvt.fxu.trunc.s1 f10 = f10
-	;;
-	// r = q * (-b) + a
-	xma.l f10 = f10, f9, f14
-	;;
-	// Transfer result to GP registers.
-	getf.sig ret0 = f10
-	br.ret.sptk rp
-	;;
-	.endp __umoddi3
-#endif
-
-#ifdef L__divsi3
-// Compute a 32-bit integer quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend.  in1 holds the divisor.
-
-	.text
-	.align 16
-	.global __divsi3
-	.proc __divsi3
-__divsi3:
-	.regstk 2,0,0,0
-	// Check divide by zero.
-	cmp.ne.unc p0,p7=0,in1
-	sxt4 in0 = in0
-	sxt4 in1 = in1
-	;;
-	setf.sig f8 = in0
-	setf.sig f9 = in1
-(p7)	break 1
-	;;
-	mov r2 = 0x0ffdd
-	fcvt.xf f8 = f8
-	fcvt.xf f9 = f9
-	;;
-	setf.exp f11 = r2
-	frcpa.s1 f10, p6 = f8, f9
-	;;
-(p6)	fmpy.s1 f8 = f8, f10
-(p6)	fnma.s1 f9 = f9, f10, f1
-	;;
-(p6)	fma.s1 f8 = f9, f8, f8
-(p6)	fma.s1 f9 = f9, f9, f11
-	;;
-(p6)	fma.s1 f10 = f9, f8, f8
-	;;
-	fcvt.fx.trunc.s1 f10 = f10
-	;;
-	getf.sig ret0 = f10
-	br.ret.sptk rp
-	;;
-	.endp __divsi3
-#endif
-
-#ifdef L__modsi3
-// Compute a 32-bit integer modulus.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend.  in1 holds the divisor.
-
-	.text
-	.align 16
-	.global __modsi3
-	.proc __modsi3
-__modsi3:
-	.regstk 2,0,0,0
-	mov r2 = 0x0ffdd
-	sxt4 in0 = in0
-	sxt4 in1 = in1
-	;;
-	setf.sig f13 = r32
-	setf.sig f9 = r33
-	// Check divide by zero.
-	cmp.ne.unc p0,p7=0,in1
-	;;
-	sub in1 = r0, in1
-	fcvt.xf f8 = f13
-	fcvt.xf f9 = f9
-	;;
-	setf.exp f11 = r2
-	frcpa.s1 f10, p6 = f8, f9
-(p7)	break 1
-	;;
-(p6)	fmpy.s1 f12 = f8, f10
-(p6)	fnma.s1 f10 = f9, f10, f1
-	;;
-	setf.sig f9 = in1
-(p6)	fma.s1 f12 = f10, f12, f12
-(p6)	fma.s1 f10 = f10, f10, f11	
-	;;
-(p6)	fma.s1 f10 = f10, f12, f12
-	;;
-	fcvt.fx.trunc.s1 f10 = f10
-	;;
-	xma.l f10 = f10, f9, f13
-	;;
-	getf.sig ret0 = f10
-	br.ret.sptk rp
-	;;
-	.endp __modsi3
-#endif
-
-#ifdef L__udivsi3
-// Compute a 32-bit unsigned integer quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend.  in1 holds the divisor.
-
-	.text
-	.align 16
-	.global __udivsi3
-	.proc __udivsi3
-__udivsi3:
-	.regstk 2,0,0,0
-	mov r2 = 0x0ffdd
-	zxt4 in0 = in0
-	zxt4 in1 = in1
-	;;
-	setf.sig f8 = in0
-	setf.sig f9 = in1
-	// Check divide by zero.
-	cmp.ne.unc p0,p7=0,in1
-	;;
-	fcvt.xf f8 = f8
-	fcvt.xf f9 = f9
-(p7)	break 1
-	;;
-	setf.exp f11 = r2
-	frcpa.s1 f10, p6 = f8, f9
-	;;
-(p6)	fmpy.s1 f8 = f8, f10
-(p6)	fnma.s1 f9 = f9, f10, f1
-	;;
-(p6)	fma.s1 f8 = f9, f8, f8
-(p6)	fma.s1 f9 = f9, f9, f11
-	;;
-(p6)	fma.s1 f10 = f9, f8, f8
-	;;
-	fcvt.fxu.trunc.s1 f10 = f10
-	;;
-	getf.sig ret0 = f10
-	br.ret.sptk rp
-	;;
-	.endp __udivsi3
-#endif
-
-#ifdef L__umodsi3
-// Compute a 32-bit unsigned integer modulus.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend.  in1 holds the divisor.
-
-	.text
-	.align 16
-	.global __umodsi3
-	.proc __umodsi3
-__umodsi3:
-	.regstk 2,0,0,0
-	mov r2 = 0x0ffdd
-	zxt4 in0 = in0
-	zxt4 in1 = in1
-	;;
-	setf.sig f13 = in0
-	setf.sig f9 = in1
-	// Check divide by zero.
-	cmp.ne.unc p0,p7=0,in1
-	;;
-	sub in1 = r0, in1
-	fcvt.xf f8 = f13
-	fcvt.xf f9 = f9
-	;;
-	setf.exp f11 = r2
-	frcpa.s1 f10, p6 = f8, f9
-(p7)	break 1;
-	;;
-(p6)	fmpy.s1 f12 = f8, f10
-(p6)	fnma.s1 f10 = f9, f10, f1
-	;;
-	setf.sig f9 = in1
-(p6)	fma.s1 f12 = f10, f12, f12
-(p6)	fma.s1 f10 = f10, f10, f11
-	;;
-(p6)	fma.s1 f10 = f10, f12, f12
-	;;
-	fcvt.fxu.trunc.s1 f10 = f10
-	;;
-	xma.l f10 = f10, f9, f13
-	;;
-	getf.sig ret0 = f10
-	br.ret.sptk rp
-	;;
-	.endp __umodsi3
-#endif
-
-#ifdef L__save_stack_nonlocal
-// Notes on save/restore stack nonlocal: We read ar.bsp but write
-// ar.bspstore.  This is because ar.bsp can be read at all times
-// (independent of the RSE mode) but since it's read-only we need to
-// restore the value via ar.bspstore.  This is OK because
-// ar.bsp==ar.bspstore after executing "flushrs".
-
-// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer)
-
-	.text
-	.align 16
-	.global __ia64_save_stack_nonlocal
-	.proc __ia64_save_stack_nonlocal
-__ia64_save_stack_nonlocal:
-	{ .mmf
-	  alloc r18 = ar.pfs, 2, 0, 0, 0
-	  mov r19 = ar.rsc
-	  ;;
-	}
-	{ .mmi
-	  flushrs
-	  st8 [in0] = in1, 24
-	  and r19 = 0x1c, r19
-	  ;;
-	}
-	{ .mmi
-	  st8 [in0] = r18, -16
-	  mov ar.rsc = r19
-	  or r19 = 0x3, r19
-	  ;;
-	}
-	{ .mmi
-	  mov r16 = ar.bsp
-	  mov r17 = ar.rnat
-	  adds r2 = 8, in0
-	  ;;
-	}
-	{ .mmi
-	  st8 [in0] = r16
-	  st8 [r2] = r17
-	}
-	{ .mib
-	  mov ar.rsc = r19
-	  br.ret.sptk.few rp
-	  ;;
-	}
-	.endp __ia64_save_stack_nonlocal
-#endif
-
-#ifdef L__nonlocal_goto
-// void __ia64_nonlocal_goto(void *target_label, void *save_area,
-//			     void *static_chain);
-
-	.text
-	.align 16
-	.global __ia64_nonlocal_goto
-	.proc __ia64_nonlocal_goto
-__ia64_nonlocal_goto:
-	{ .mmi
-	  alloc r20 = ar.pfs, 3, 0, 0, 0
-	  ld8 r12 = [in1], 8
-	  mov.ret.sptk rp = in0, .L0
-	  ;;
-	}
-	{ .mmf
-	  ld8 r16 = [in1], 8
-	  mov r19 = ar.rsc
-	  ;;
-	}
-	{ .mmi
-	  flushrs
-	  ld8 r17 = [in1], 8
-	  and r19 = 0x1c, r19
-	  ;;
-	}
-	{ .mmi
-	  ld8 r18 = [in1]
-	  mov ar.rsc = r19
-	  or r19 = 0x3, r19
-	  ;;
-	}
-	{ .mmi
-	  mov ar.bspstore = r16
-	  ;;
-	  mov ar.rnat = r17
-	  ;;
-	}
-	{ .mmi
-	  loadrs
-	  invala
-	  mov r15 = in2
-	  ;;
-	}
-.L0:	{ .mib
-	  mov ar.rsc = r19
-	  mov ar.pfs = r18
-	  br.ret.sptk.few rp
-	  ;;
-	}
-	.endp __ia64_nonlocal_goto
-#endif
-
-#ifdef L__restore_stack_nonlocal
-// This is mostly the same as nonlocal_goto above.
-// ??? This has not been tested yet.
-
-// void __ia64_restore_stack_nonlocal(void *save_area)
-
-	.text
-	.align 16
-	.global __ia64_restore_stack_nonlocal
-	.proc __ia64_restore_stack_nonlocal
-__ia64_restore_stack_nonlocal:
-	{ .mmf
-	  alloc r20 = ar.pfs, 4, 0, 0, 0
-	  ld8 r12 = [in0], 8
-	  ;;
-	}
-	{ .mmb
-	  ld8 r16=[in0], 8
-	  mov r19 = ar.rsc
-	  ;;
-	}
-	{ .mmi
-	  flushrs
-	  ld8 r17 = [in0], 8
-	  and r19 = 0x1c, r19
-	  ;;
-	}
-	{ .mmf
-	  ld8 r18 = [in0]
-	  mov ar.rsc = r19
-	  ;;
-	}
-	{ .mmi
-	  mov ar.bspstore = r16
-	  ;;
-	  mov ar.rnat = r17
-	  or r19 = 0x3, r19
-	  ;;
-	}
-	{ .mmf
-	  loadrs
-	  invala
-	  ;;
-	}
-.L0:	{ .mib
-	  mov ar.rsc = r19
-	  mov ar.pfs = r18
-	  br.ret.sptk.few rp
-	  ;;
-	}
-	.endp __ia64_restore_stack_nonlocal
-#endif
-
-#ifdef L__trampoline
-// Implement the nested function trampoline.  This is out of line
-// so that we don't have to bother with flushing the icache, as
-// well as making the on-stack trampoline smaller.
-//
-// The trampoline has the following form:
-//
-//		+-------------------+ >
-//	TRAMP:	| __ia64_trampoline | |
-//		+-------------------+  > fake function descriptor
-//		| TRAMP+16          | |
-//		+-------------------+ >
-//		| target descriptor |
-//		+-------------------+
-//		| static link	    |
-//		+-------------------+
-
-	.text
-	.align 16
-	.global __ia64_trampoline
-	.proc __ia64_trampoline
-__ia64_trampoline:
-	{ .mmi
-	  ld8 r2 = [r1], 8
-	  ;;
-	  ld8 r15 = [r1]
-	}
-	{ .mmi
-	  ld8 r3 = [r2], 8
-	  ;;
-	  ld8 r1 = [r2]
-	  mov b6 = r3
-	}
-	{ .bbb
-	  br.sptk.many b6
-	  ;;
-	}
-	.endp __ia64_trampoline
-#endif
-
-#ifdef SHARED
-// Thunks for backward compatibility.
-#ifdef L_fixtfdi
-	.text
-	.align 16
-	.global __fixtfti
-	.proc __fixtfti
-__fixtfti:
-	{ .bbb
-	  br.sptk.many __fixxfti
-	  ;;
-	}
-	.endp __fixtfti
-#endif
-#ifdef L_fixunstfdi
-	.align 16
-	.global __fixunstfti
-	.proc __fixunstfti
-__fixunstfti:
-	{ .bbb
-	  br.sptk.many __fixunsxfti
-	  ;;
-	}
-	.endp __fixunstfti
-#endif
-#ifdef L_floatditf
-	.align 16
-	.global __floattitf
-	.proc __floattitf
-__floattitf:
-	{ .bbb
-	  br.sptk.many __floattixf
-	  ;;
-	}
-	.endp __floattitf
-#endif
-#endif
diff --git a/gcc/config/ia64/t-hpux b/gcc/config/ia64/t-hpux
index e1554861d18..23691f3856c 100644
--- a/gcc/config/ia64/t-hpux
+++ b/gcc/config/ia64/t-hpux
@@ -26,12 +26,6 @@ MULTILIB_OPTIONS = milp32/mlp64
 MULTILIB_DIRNAMES = hpux32 hpux64
 MULTILIB_MATCHES =
 
-# On HP-UX we do not want _fixtfdi, _fixunstfdi, or _floatditf from
-# LIB1ASMSRC.  These functions map the 128 bit conversion function names
-# to 80 bit conversions and were done for Linux backwards compatibility.
-
-LIB1ASMFUNCS := $(filter-out _fixtfdi _fixunstfdi _floatditf,$(LIB1ASMFUNCS))
-
 # Support routines for HP-UX 128 bit floats.
 
 LIB2FUNCS_EXTRA=quadlib.c $(srcdir)/config/floatunsitf.c
@@ -39,12 +33,6 @@ LIB2FUNCS_EXTRA=quadlib.c $(srcdir)/config/floatunsitf.c
 quadlib.c: $(srcdir)/config/ia64/quadlib.c
 	cat $(srcdir)/config/ia64/quadlib.c > quadlib.c
 
-# We get an undefined main when building a cross compiler because our
-# linkspec has "-u main" and we want that for linking but it makes
-# LIBGCC1_TEST fail because it uses -nostdlib -nostartup.
-
-LIBGCC1_TEST =
-
 # We do not want to include the EH stuff that linux uses, we want to use
 # the HP-UX libunwind library.
 
diff --git a/gcc/config/ia64/t-ia64 b/gcc/config/ia64/t-ia64
index a143d43d56c..8a54d46b458 100644
--- a/gcc/config/ia64/t-ia64
+++ b/gcc/config/ia64/t-ia64
@@ -18,19 +18,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMSRC    = ia64/lib1funcs.asm
-
-# We use different names for the DImode div/mod files so that they won't
-# conflict with libgcc2.c files.  We used to use __ia64 as a prefix, now
-# we use __ as the prefix.  Note that L_divdi3 in libgcc2.c actually defines
-# a TImode divide function, so there is no actual overlap here between
-# libgcc2.c and lib1funcs.asm.
-LIB1ASMFUNCS  = __divxf3 __divdf3 __divsf3 \
-	__divdi3 __moddi3 __udivdi3 __umoddi3 \
-	__divsi3 __modsi3 __udivsi3 __umodsi3 __save_stack_nonlocal \
-	__nonlocal_goto __restore_stack_nonlocal __trampoline \
-	_fixtfdi _fixunstfdi _floatditf
-
 # ??? Hack to get -P option used when compiling lib1funcs.asm, because Intel
 # assembler does not accept # line number as a comment.
 # ??? This breaks C++ pragma interface/implementation, which is used in the
diff --git a/gcc/config/iq2000/t-iq2000 b/gcc/config/iq2000/t-iq2000
index 03d8c703f86..c634e58646e 100644
--- a/gcc/config/iq2000/t-iq2000
+++ b/gcc/config/iq2000/t-iq2000
@@ -16,11 +16,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-# Suppress building libgcc1.a, since the MIPS compiler port is complete
-# and does not need anything from libgcc1.a.
-LIBGCC1 =
-CROSS_LIBGCC1 =
-
 LIB2FUNCS_EXTRA = $(srcdir)/config/udivmod.c $(srcdir)/config/divmod.c $(srcdir)/config/udivmodsi4.c $(srcdir)/config/iq2000/lib2extra-funcs.c
 
 # Enable the following if multilibs are needed.
diff --git a/gcc/config/m32c/m32c-lib1.S b/gcc/config/m32c/m32c-lib1.S
deleted file mode 100644
index 9b657787187..00000000000
--- a/gcc/config/m32c/m32c-lib1.S
+++ /dev/null
@@ -1,231 +0,0 @@
-/* libgcc routines for R8C/M16C/M32C
-   Copyright (C) 2005, 2009, 2010
-   Free Software Foundation, Inc.
-   Contributed by Red Hat.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if defined(__r8c_cpu__) || defined(__m16c_cpu__)
-#define A16
-#define A(n,w) n
-#define W w
-#else
-#define A24
-#define A(n,w) w
-#define W l
-#endif
-
-
-#ifdef L__m32c_memregs
-
-/* Warning: these memory locations are used as a register bank.  They
-   *must* end up consecutive in any final executable, so you may *not*
-   use the otherwise obvious ".comm" directive to allocate space for
-   them. */
-
-	.bss
-	.global	mem0
-mem0:	.space	1
-	.global	mem1
-mem1:	.space	1
-	.global	mem2
-mem2:	.space	1
-	.global	mem3
-mem3:	.space	1
-	.global	mem4
-mem4:	.space	1
-	.global	mem5
-mem5:	.space	1
-	.global	mem6
-mem6:	.space	1
-	.global	mem7
-mem7:	.space	1
-	.global	mem8
-mem8:	.space	1
-	.global	mem9
-mem9:	.space	1
-	.global	mem10
-mem10:	.space	1
-	.global	mem11
-mem11:	.space	1
-	.global	mem12
-mem12:	.space	1
-	.global	mem13
-mem13:	.space	1
-	.global	mem14
-mem14:	.space	1
-	.global	mem15
-mem15:	.space	1
-
-#endif
-
-#ifdef L__m32c_eh_return
-	.text
-	.global __m32c_eh_return
-__m32c_eh_return:	
-
-	/* At this point, r0 has the stack adjustment, r1r3 has the
-	   address to return to.  The stack looks like this:
-
-	   old_ra
-	   old_fp
-	   <- unwound sp
-	   ...
-	   fb
-	   through
-	   r0
-	   <- sp
-
-	   What we need to do is restore all the registers, update the
-	   stack, and return to the right place.
-	*/
-
-	stc	sp,a0
-	
-	add.W	A(#16,#24),a0
-	/* a0 points to the current stack, just above the register
-	   save areas */
-
-	mov.w	a0,a1
-	exts.w	r0
-	sub.W	A(r0,r2r0),a1
-	sub.W	A(#3,#4),a1
-	/* a1 points to the new stack.  */
-
-	/* This is for the "rts" below.  */
-	mov.w	r1,[a1]
-#ifdef A16
-	mov.w	r2,r1
-	mov.b	r1l,2[a1]
-#else
-	mov.w	r2,2[a1]
-#endif
-
-	/* This is for the "popc sp" below.  */
-	mov.W	a1,[a0]	
-
-	popm    r0,r1,r2,r3,a0,a1,sb,fb
-	popc	sp
-	rts
-#endif
-
-/* SImode arguments for SI foo(SI,SI) functions.  */
-#ifdef A16
-#define SAL  5[fb]
-#define SAH  7[fb]
-#define SBL  9[fb]
-#define SBH 11[fb]
-#else
-#define SAL  8[fb]
-#define SAH 10[fb]
-#define SBL 12[fb]
-#define SBH 14[fb]
-#endif
-
-#ifdef L__m32c_mulsi3
-	.text
-	.global ___mulsi3
-___mulsi3:
-	enter	#0
-	push.w	r2
-	mov.w	SAL,r0
-	mulu.w	SBL,r0		/* writes to r2r0 */
-	mov.w	r0,mem0
-	mov.w	r2,mem2
-	mov.w	SAL,r0
-	mulu.w	SBH,r0		/* writes to r2r0 */
-	add.w	r0,mem2
-	mov.w	SAH,r0
-	mulu.w	SBL,r0		/* writes to r2r0 */
-	add.w	r0,mem2
-	pop.w	r2
-	exitd
-#endif
-
-#ifdef L__m32c_cmpsi2
-	.text
-	.global ___cmpsi2
-___cmpsi2:
-	enter	#0
-	cmp.w	SBH,SAH
-	jgt	cmpsi_gt
-	jlt	cmpsi_lt
-	cmp.w	SBL,SAL
-	jgt	cmpsi_gt
-	jlt	cmpsi_lt
-	mov.w	#1,r0
-	exitd
-cmpsi_gt:
-	mov.w	#2,r0
-	exitd
-cmpsi_lt:
-	mov.w	#0,r0
-	exitd
-#endif
-
-#ifdef L__m32c_ucmpsi2
-	.text
-	.global ___ucmpsi2
-___ucmpsi2:
-	enter	#0
-	cmp.w	SBH,SAH
-	jgtu	cmpsi_gt
-	jltu	cmpsi_lt
-	cmp.w	SBL,SAL
-	jgtu	cmpsi_gt
-	jltu	cmpsi_lt
-	mov.w	#1,r0
-	exitd
-cmpsi_gt:
-	mov.w	#2,r0
-	exitd
-cmpsi_lt:
-	mov.w	#0,r0
-	exitd
-#endif
-
-#ifdef L__m32c_jsri16
-	.text
-#ifdef A16
-	.global	m32c_jsri16
-m32c_jsri16:
-	add.w	#-1, sp
-
-	/* Read the address (16 bits) and return address (24 bits) off
-	the stack.  */
-	mov.w	4[sp], r0
-	mov.w	1[sp], r3
-	mov.b	3[sp], a0 /* This zero-extends, so the high byte has
-			     zero in it.  */
-
-	/* Write the return address, then new address, to the stack.  */
-	mov.w	a0, 1[sp] /* Just to get the zero in 2[sp].  */
-	mov.w	r0, 0[sp]
-	mov.w	r3, 3[sp]
-	mov.b	a0, 5[sp]
-
-	/* This "returns" to the target address, leaving the pending
-	return address on the stack.  */
-	rts
-#endif
-
-#endif
diff --git a/gcc/config/m32c/m32c.c b/gcc/config/m32c/m32c.c
index 7040df69fcf..04f69050609 100644
--- a/gcc/config/m32c/m32c.c
+++ b/gcc/config/m32c/m32c.c
@@ -391,7 +391,7 @@ class_can_hold_mode (reg_class_t rclass, enum machine_mode mode)
    we allow the user to limit the number of memregs available, in
    order to try to persuade gcc to try harder to use real registers.
 
-   Memregs are provided by m32c-lib1.S.
+   Memregs are provided by lib1funcs.S.
 */
 
 int ok_to_change_target_memregs = TRUE;
diff --git a/gcc/config/m32c/t-m32c b/gcc/config/m32c/t-m32c
index b11f34d674f..aad972a2575 100644
--- a/gcc/config/m32c/t-m32c
+++ b/gcc/config/m32c/t-m32c
@@ -19,16 +19,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMSRC = m32c/m32c-lib1.S
-
-LIB1ASMFUNCS = \
-	__m32c_memregs \
-	__m32c_eh_return \
-	__m32c_mulsi3 \
-	__m32c_cmpsi2 \
-	__m32c_ucmpsi2 \
-	__m32c_jsri16
-
 LIB2FUNCS_EXTRA = $(srcdir)/config/m32c/m32c-lib2.c $(srcdir)/config/m32c/m32c-lib2-trapv.c
 
 # target-specific files
diff --git a/gcc/config/m32r/t-linux b/gcc/config/m32r/t-linux
index 487c0198786..f3b89d21d0b 100644
--- a/gcc/config/m32r/t-linux
+++ b/gcc/config/m32r/t-linux
@@ -16,9 +16,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-# lib1funcs.asm is currently empty.
-CROSS_LIBGCC1 =
-
 # Turn off the SDA while compiling libgcc2.  There are no headers for it
 # and we want maximal upward compatibility here.
 
@@ -26,9 +23,3 @@ TARGET_LIBGCC2_CFLAGS = -G 0 -fPIC
 
 # Don't install "assert.h" in gcc. We use the one in glibc.
 INSTALL_ASSERT_H =
- 
-# Do not build libgcc1. Let gcc generate those functions. The GNU/Linux
-# C library can handle them.
-LIBGCC1 = 
-CROSS_LIBGCC1 =
-LIBGCC1_TEST =
diff --git a/gcc/config/m68k/lb1sf68.asm b/gcc/config/m68k/lb1sf68.asm
deleted file mode 100644
index 0339a092c4f..00000000000
--- a/gcc/config/m68k/lb1sf68.asm
+++ /dev/null
@@ -1,4116 +0,0 @@
-/* libgcc routines for 68000 w/o floating-point hardware.
-   Copyright (C) 1994, 1996, 1997, 1998, 2008, 2009 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/* Use this one for any 680x0; assumes no floating point hardware.
-   The trailing " '" appearing on some lines is for ANSI preprocessors.  Yuk.
-   Some of this code comes from MINIX, via the folks at ericsson.
-   D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
-*/
-
-/* These are predefined by new versions of GNU cpp.  */
-
-#ifndef __USER_LABEL_PREFIX__
-#define __USER_LABEL_PREFIX__ _
-#endif
-
-#ifndef __REGISTER_PREFIX__
-#define __REGISTER_PREFIX__
-#endif
-
-#ifndef __IMMEDIATE_PREFIX__
-#define __IMMEDIATE_PREFIX__ #
-#endif
-
-/* ANSI concatenation macros.  */
-
-#define CONCAT1(a, b) CONCAT2(a, b)
-#define CONCAT2(a, b) a ## b
-
-/* Use the right prefix for global labels.  */
-
-#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
-
-/* Note that X is a function.  */
-	
-#ifdef __ELF__
-#define FUNC(x) .type SYM(x),function
-#else
-/* The .proc pseudo-op is accepted, but ignored, by GAS.  We could just	
-   define this to the empty string for non-ELF systems, but defining it
-   to .proc means that the information is available to the assembler if
-   the need arises.  */
-#define FUNC(x) .proc
-#endif
-		
-/* Use the right prefix for registers.  */
-
-#define REG(x) CONCAT1 (__REGISTER_PREFIX__, x)
-
-/* Use the right prefix for immediate values.  */
-
-#define IMM(x) CONCAT1 (__IMMEDIATE_PREFIX__, x)
-
-#define d0 REG (d0)
-#define d1 REG (d1)
-#define d2 REG (d2)
-#define d3 REG (d3)
-#define d4 REG (d4)
-#define d5 REG (d5)
-#define d6 REG (d6)
-#define d7 REG (d7)
-#define a0 REG (a0)
-#define a1 REG (a1)
-#define a2 REG (a2)
-#define a3 REG (a3)
-#define a4 REG (a4)
-#define a5 REG (a5)
-#define a6 REG (a6)
-#define fp REG (fp)
-#define sp REG (sp)
-#define pc REG (pc)
-
-/* Provide a few macros to allow for PIC code support.
- * With PIC, data is stored A5 relative so we've got to take a bit of special
- * care to ensure that all loads of global data is via A5.  PIC also requires
- * jumps and subroutine calls to be PC relative rather than absolute.  We cheat
- * a little on this and in the PIC case, we use short offset branches and
- * hope that the final object code is within range (which it should be).
- */
-#ifndef __PIC__
-
-	/* Non PIC (absolute/relocatable) versions */
-
-	.macro PICCALL addr
-	jbsr	\addr
-	.endm
-
-	.macro PICJUMP addr
-	jmp	\addr
-	.endm
-
-	.macro PICLEA sym, reg
-	lea	\sym, \reg
-	.endm
-
-	.macro PICPEA sym, areg
-	pea	\sym
-	.endm
-
-#else /* __PIC__ */
-
-# if defined (__uClinux__)
-
-	/* Versions for uClinux */
-
-#  if defined(__ID_SHARED_LIBRARY__)
-
-	/* -mid-shared-library versions  */
-
-	.macro PICLEA sym, reg
-	movel	a5@(_current_shared_library_a5_offset_), \reg
-	movel	\sym@GOT(\reg), \reg
-	.endm
-
-	.macro PICPEA sym, areg
-	movel	a5@(_current_shared_library_a5_offset_), \areg
-	movel	\sym@GOT(\areg), sp@-
-	.endm
-
-	.macro PICCALL addr
-	PICLEA	\addr,a0
-	jsr	a0@
-	.endm
-
-	.macro PICJUMP addr
-	PICLEA	\addr,a0
-	jmp	a0@
-	.endm
-
-#  else /* !__ID_SHARED_LIBRARY__ */
-
-	/* Versions for -msep-data */
-
-	.macro PICLEA sym, reg
-	movel	\sym@GOT(a5), \reg
-	.endm
-
-	.macro PICPEA sym, areg
-	movel	\sym@GOT(a5), sp@-
-	.endm
-
-	.macro PICCALL addr
-#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__)
-	lea	\addr-.-8,a0
-	jsr	pc@(a0)
-#else
-	jbsr	\addr
-#endif
-	.endm
-
-	.macro PICJUMP addr
-	/* ISA C has no bra.l instruction, and since this assembly file
-	   gets assembled into multiple object files, we avoid the
-	   bra instruction entirely.  */
-#if defined (__mcoldfire__) && !defined (__mcfisab__)
-	lea	\addr-.-8,a0
-	jmp	pc@(a0)
-#else
-	bra	\addr
-#endif
-	.endm
-
-#  endif
-
-# else /* !__uClinux__ */
-
-	/* Versions for Linux */
-
-	.macro PICLEA sym, reg
-	movel	#_GLOBAL_OFFSET_TABLE_@GOTPC, \reg
-	lea	(-6, pc, \reg), \reg
-	movel	\sym@GOT(\reg), \reg
-	.endm
-
-	.macro PICPEA sym, areg
-	movel	#_GLOBAL_OFFSET_TABLE_@GOTPC, \areg
-	lea	(-6, pc, \areg), \areg
-	movel	\sym@GOT(\areg), sp@-
-	.endm
-
-	.macro PICCALL addr
-#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__)
-	lea	\addr-.-8,a0
-	jsr	pc@(a0)
-#else
-	jbsr	\addr
-#endif
-	.endm
-
-	.macro PICJUMP addr
-	/* ISA C has no bra.l instruction, and since this assembly file
-	   gets assembled into multiple object files, we avoid the
-	   bra instruction entirely.  */
-#if defined (__mcoldfire__) && !defined (__mcfisab__)
-	lea	\addr-.-8,a0
-	jmp	pc@(a0)
-#else
-	bra	\addr
-#endif
-	.endm
-
-# endif
-#endif /* __PIC__ */
-
-
-#ifdef L_floatex
-
-| This is an attempt at a decent floating point (single, double and 
-| extended double) code for the GNU C compiler. It should be easy to
-| adapt to other compilers (but beware of the local labels!).
-
-| Starting date: 21 October, 1990
-
-| It is convenient to introduce the notation (s,e,f) for a floating point
-| number, where s=sign, e=exponent, f=fraction. We will call a floating
-| point number fpn to abbreviate, independently of the precision.
-| Let MAX_EXP be in each case the maximum exponent (255 for floats, 1023 
-| for doubles and 16383 for long doubles). We then have the following 
-| different cases:
-|  1. Normalized fpns have 0 < e < MAX_EXP. They correspond to 
-|     (-1)^s x 1.f x 2^(e-bias-1).
-|  2. Denormalized fpns have e=0. They correspond to numbers of the form
-|     (-1)^s x 0.f x 2^(-bias).
-|  3. +/-INFINITY have e=MAX_EXP, f=0.
-|  4. Quiet NaN (Not a Number) have all bits set.
-|  5. Signaling NaN (Not a Number) have s=0, e=MAX_EXP, f=1.
-
-|=============================================================================
-|                                  exceptions
-|=============================================================================
-
-| This is the floating point condition code register (_fpCCR):
-|
-| struct {
-|   short _exception_bits;	
-|   short _trap_enable_bits;	
-|   short _sticky_bits;
-|   short _rounding_mode;
-|   short _format;
-|   short _last_operation;
-|   union {
-|     float sf;
-|     double df;
-|   } _operand1;
-|   union {
-|     float sf;
-|     double df;
-|   } _operand2;
-| } _fpCCR;
-
-	.data
-	.even
-
-	.globl	SYM (_fpCCR)
-	
-SYM (_fpCCR):
-__exception_bits:
-	.word	0
-__trap_enable_bits:
-	.word	0
-__sticky_bits:
-	.word	0
-__rounding_mode:
-	.word	ROUND_TO_NEAREST
-__format:
-	.word	NIL
-__last_operation:
-	.word	NOOP
-__operand1:
-	.long	0
-	.long	0
-__operand2:
-	.long 	0
-	.long	0
-
-| Offsets:
-EBITS  = __exception_bits - SYM (_fpCCR)
-TRAPE  = __trap_enable_bits - SYM (_fpCCR)
-STICK  = __sticky_bits - SYM (_fpCCR)
-ROUND  = __rounding_mode - SYM (_fpCCR)
-FORMT  = __format - SYM (_fpCCR)
-LASTO  = __last_operation - SYM (_fpCCR)
-OPER1  = __operand1 - SYM (_fpCCR)
-OPER2  = __operand2 - SYM (_fpCCR)
-
-| The following exception types are supported:
-INEXACT_RESULT 		= 0x0001
-UNDERFLOW 		= 0x0002
-OVERFLOW 		= 0x0004
-DIVIDE_BY_ZERO 		= 0x0008
-INVALID_OPERATION 	= 0x0010
-
-| The allowed rounding modes are:
-UNKNOWN           = -1
-ROUND_TO_NEAREST  = 0 | round result to nearest representable value
-ROUND_TO_ZERO     = 1 | round result towards zero
-ROUND_TO_PLUS     = 2 | round result towards plus infinity
-ROUND_TO_MINUS    = 3 | round result towards minus infinity
-
-| The allowed values of format are:
-NIL          = 0
-SINGLE_FLOAT = 1
-DOUBLE_FLOAT = 2
-LONG_FLOAT   = 3
-
-| The allowed values for the last operation are:
-NOOP         = 0
-ADD          = 1
-MULTIPLY     = 2
-DIVIDE       = 3
-NEGATE       = 4
-COMPARE      = 5
-EXTENDSFDF   = 6
-TRUNCDFSF    = 7
-
-|=============================================================================
-|                           __clear_sticky_bits
-|=============================================================================
-
-| The sticky bits are normally not cleared (thus the name), whereas the 
-| exception type and exception value reflect the last computation. 
-| This routine is provided to clear them (you can also write to _fpCCR,
-| since it is globally visible).
-
-	.globl  SYM (__clear_sticky_bit)
-
-	.text
-	.even
-
-| void __clear_sticky_bits(void);
-SYM (__clear_sticky_bit):		
-	PICLEA	SYM (_fpCCR),a0
-#ifndef __mcoldfire__
-	movew	IMM (0),a0@(STICK)
-#else
-	clr.w	a0@(STICK)
-#endif
-	rts
-
-|=============================================================================
-|                           $_exception_handler
-|=============================================================================
-
-	.globl  $_exception_handler
-
-	.text
-	.even
-
-| This is the common exit point if an exception occurs.
-| NOTE: it is NOT callable from C!
-| It expects the exception type in d7, the format (SINGLE_FLOAT,
-| DOUBLE_FLOAT or LONG_FLOAT) in d6, and the last operation code in d5.
-| It sets the corresponding exception and sticky bits, and the format. 
-| Depending on the format if fills the corresponding slots for the 
-| operands which produced the exception (all this information is provided
-| so if you write your own exception handlers you have enough information
-| to deal with the problem).
-| Then checks to see if the corresponding exception is trap-enabled, 
-| in which case it pushes the address of _fpCCR and traps through 
-| trap FPTRAP (15 for the moment).
-
-FPTRAP = 15
-
-$_exception_handler:
-	PICLEA	SYM (_fpCCR),a0
-	movew	d7,a0@(EBITS)	| set __exception_bits
-#ifndef __mcoldfire__
-	orw	d7,a0@(STICK)	| and __sticky_bits
-#else
-	movew	a0@(STICK),d4
-	orl	d7,d4
-	movew	d4,a0@(STICK)
-#endif
-	movew	d6,a0@(FORMT)	| and __format
-	movew	d5,a0@(LASTO)	| and __last_operation
-
-| Now put the operands in place:
-#ifndef __mcoldfire__
-	cmpw	IMM (SINGLE_FLOAT),d6
-#else
-	cmpl	IMM (SINGLE_FLOAT),d6
-#endif
-	beq	1f
-	movel	a6@(8),a0@(OPER1)
-	movel	a6@(12),a0@(OPER1+4)
-	movel	a6@(16),a0@(OPER2)
-	movel	a6@(20),a0@(OPER2+4)
-	bra	2f
-1:	movel	a6@(8),a0@(OPER1)
-	movel	a6@(12),a0@(OPER2)
-2:
-| And check whether the exception is trap-enabled:
-#ifndef __mcoldfire__
-	andw	a0@(TRAPE),d7	| is exception trap-enabled?
-#else
-	clrl	d6
-	movew	a0@(TRAPE),d6
-	andl	d6,d7
-#endif
-	beq	1f		| no, exit
-	PICPEA	SYM (_fpCCR),a1	| yes, push address of _fpCCR
-	trap	IMM (FPTRAP)	| and trap
-#ifndef __mcoldfire__
-1:	moveml	sp@+,d2-d7	| restore data registers
-#else
-1:	moveml	sp@,d2-d7
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6		| and return
-	rts
-#endif /* L_floatex */
-
-#ifdef  L_mulsi3
-	.text
-	FUNC(__mulsi3)
-	.globl	SYM (__mulsi3)
-SYM (__mulsi3):
-	movew	sp@(4), d0	/* x0 -> d0 */
-	muluw	sp@(10), d0	/* x0*y1 */
-	movew	sp@(6), d1	/* x1 -> d1 */
-	muluw	sp@(8), d1	/* x1*y0 */
-#ifndef __mcoldfire__
-	addw	d1, d0
-#else
-	addl	d1, d0
-#endif
-	swap	d0
-	clrw	d0
-	movew	sp@(6), d1	/* x1 -> d1 */
-	muluw	sp@(10), d1	/* x1*y1 */
-	addl	d1, d0
-
-	rts
-#endif /* L_mulsi3 */
-
-#ifdef  L_udivsi3
-	.text
-	FUNC(__udivsi3)
-	.globl	SYM (__udivsi3)
-SYM (__udivsi3):
-#ifndef __mcoldfire__
-	movel	d2, sp@-
-	movel	sp@(12), d1	/* d1 = divisor */
-	movel	sp@(8), d0	/* d0 = dividend */
-
-	cmpl	IMM (0x10000), d1 /* divisor >= 2 ^ 16 ?   */
-	jcc	L3		/* then try next algorithm */
-	movel	d0, d2
-	clrw	d2
-	swap	d2
-	divu	d1, d2          /* high quotient in lower word */
-	movew	d2, d0		/* save high quotient */
-	swap	d0
-	movew	sp@(10), d2	/* get low dividend + high rest */
-	divu	d1, d2		/* low quotient */
-	movew	d2, d0
-	jra	L6
-
-L3:	movel	d1, d2		/* use d2 as divisor backup */
-L4:	lsrl	IMM (1), d1	/* shift divisor */
-	lsrl	IMM (1), d0	/* shift dividend */
-	cmpl	IMM (0x10000), d1 /* still divisor >= 2 ^ 16 ?  */
-	jcc	L4
-	divu	d1, d0		/* now we have 16-bit divisor */
-	andl	IMM (0xffff), d0 /* mask out divisor, ignore remainder */
-
-/* Multiply the 16-bit tentative quotient with the 32-bit divisor.  Because of
-   the operand ranges, this might give a 33-bit product.  If this product is
-   greater than the dividend, the tentative quotient was too large. */
-	movel	d2, d1
-	mulu	d0, d1		/* low part, 32 bits */
-	swap	d2
-	mulu	d0, d2		/* high part, at most 17 bits */
-	swap	d2		/* align high part with low part */
-	tstw	d2		/* high part 17 bits? */
-	jne	L5		/* if 17 bits, quotient was too large */
-	addl	d2, d1		/* add parts */
-	jcs	L5		/* if sum is 33 bits, quotient was too large */
-	cmpl	sp@(8), d1	/* compare the sum with the dividend */
-	jls	L6		/* if sum > dividend, quotient was too large */
-L5:	subql	IMM (1), d0	/* adjust quotient */
-
-L6:	movel	sp@+, d2
-	rts
-
-#else /* __mcoldfire__ */
-
-/* ColdFire implementation of non-restoring division algorithm from
-   Hennessy & Patterson, Appendix A. */
-	link	a6,IMM (-12)
-	moveml	d2-d4,sp@
-	movel	a6@(8),d0
-	movel	a6@(12),d1
-	clrl	d2		| clear p
-	moveq	IMM (31),d4
-L1:	addl	d0,d0		| shift reg pair (p,a) one bit left
-	addxl	d2,d2
-	movl	d2,d3		| subtract b from p, store in tmp.
-	subl	d1,d3
-	jcs	L2		| if no carry,
-	bset	IMM (0),d0	| set the low order bit of a to 1,
-	movl	d3,d2		| and store tmp in p.
-L2:	subql	IMM (1),d4
-	jcc	L1
-	moveml	sp@,d2-d4	| restore data registers
-	unlk	a6		| and return
-	rts
-#endif /* __mcoldfire__ */
-
-#endif /* L_udivsi3 */
-
-#ifdef  L_divsi3
-	.text
-	FUNC(__divsi3)
-	.globl	SYM (__divsi3)
-SYM (__divsi3):
-	movel	d2, sp@-
-
-	moveq	IMM (1), d2	/* sign of result stored in d2 (=1 or =-1) */
-	movel	sp@(12), d1	/* d1 = divisor */
-	jpl	L1
-	negl	d1
-#ifndef __mcoldfire__
-	negb	d2		/* change sign because divisor <0  */
-#else
-	negl	d2		/* change sign because divisor <0  */
-#endif
-L1:	movel	sp@(8), d0	/* d0 = dividend */
-	jpl	L2
-	negl	d0
-#ifndef __mcoldfire__
-	negb	d2
-#else
-	negl	d2
-#endif
-
-L2:	movel	d1, sp@-
-	movel	d0, sp@-
-	PICCALL	SYM (__udivsi3)	/* divide abs(dividend) by abs(divisor) */
-	addql	IMM (8), sp
-
-	tstb	d2
-	jpl	L3
-	negl	d0
-
-L3:	movel	sp@+, d2
-	rts
-#endif /* L_divsi3 */
-
-#ifdef  L_umodsi3
-	.text
-	FUNC(__umodsi3)
-	.globl	SYM (__umodsi3)
-SYM (__umodsi3):
-	movel	sp@(8), d1	/* d1 = divisor */
-	movel	sp@(4), d0	/* d0 = dividend */
-	movel	d1, sp@-
-	movel	d0, sp@-
-	PICCALL	SYM (__udivsi3)
-	addql	IMM (8), sp
-	movel	sp@(8), d1	/* d1 = divisor */
-#ifndef __mcoldfire__
-	movel	d1, sp@-
-	movel	d0, sp@-
-	PICCALL	SYM (__mulsi3)	/* d0 = (a/b)*b */
-	addql	IMM (8), sp
-#else
-	mulsl	d1,d0
-#endif
-	movel	sp@(4), d1	/* d1 = dividend */
-	subl	d0, d1		/* d1 = a - (a/b)*b */
-	movel	d1, d0
-	rts
-#endif /* L_umodsi3 */
-
-#ifdef  L_modsi3
-	.text
-	FUNC(__modsi3)
-	.globl	SYM (__modsi3)
-SYM (__modsi3):
-	movel	sp@(8), d1	/* d1 = divisor */
-	movel	sp@(4), d0	/* d0 = dividend */
-	movel	d1, sp@-
-	movel	d0, sp@-
-	PICCALL	SYM (__divsi3)
-	addql	IMM (8), sp
-	movel	sp@(8), d1	/* d1 = divisor */
-#ifndef __mcoldfire__
-	movel	d1, sp@-
-	movel	d0, sp@-
-	PICCALL	SYM (__mulsi3)	/* d0 = (a/b)*b */
-	addql	IMM (8), sp
-#else
-	mulsl	d1,d0
-#endif
-	movel	sp@(4), d1	/* d1 = dividend */
-	subl	d0, d1		/* d1 = a - (a/b)*b */
-	movel	d1, d0
-	rts
-#endif /* L_modsi3 */
-
-
-#ifdef  L_double
-
-	.globl	SYM (_fpCCR)
-	.globl  $_exception_handler
-
-QUIET_NaN      = 0xffffffff
-
-D_MAX_EXP      = 0x07ff
-D_BIAS         = 1022
-DBL_MAX_EXP    = D_MAX_EXP - D_BIAS
-DBL_MIN_EXP    = 1 - D_BIAS
-DBL_MANT_DIG   = 53
-
-INEXACT_RESULT 		= 0x0001
-UNDERFLOW 		= 0x0002
-OVERFLOW 		= 0x0004
-DIVIDE_BY_ZERO 		= 0x0008
-INVALID_OPERATION 	= 0x0010
-
-DOUBLE_FLOAT = 2
-
-NOOP         = 0
-ADD          = 1
-MULTIPLY     = 2
-DIVIDE       = 3
-NEGATE       = 4
-COMPARE      = 5
-EXTENDSFDF   = 6
-TRUNCDFSF    = 7
-
-UNKNOWN           = -1
-ROUND_TO_NEAREST  = 0 | round result to nearest representable value
-ROUND_TO_ZERO     = 1 | round result towards zero
-ROUND_TO_PLUS     = 2 | round result towards plus infinity
-ROUND_TO_MINUS    = 3 | round result towards minus infinity
-
-| Entry points:
-
-	.globl SYM (__adddf3)
-	.globl SYM (__subdf3)
-	.globl SYM (__muldf3)
-	.globl SYM (__divdf3)
-	.globl SYM (__negdf2)
-	.globl SYM (__cmpdf2)
-	.globl SYM (__cmpdf2_internal)
-	.hidden SYM (__cmpdf2_internal)
-
-	.text
-	.even
-
-| These are common routines to return and signal exceptions.	
-
-Ld$den:
-| Return and signal a denormalized number
-	orl	d7,d0
-	movew	IMM (INEXACT_RESULT+UNDERFLOW),d7
-	moveq	IMM (DOUBLE_FLOAT),d6
-	PICJUMP	$_exception_handler
-
-Ld$infty:
-Ld$overflow:
-| Return a properly signed INFINITY and set the exception flags 
-	movel	IMM (0x7ff00000),d0
-	movel	IMM (0),d1
-	orl	d7,d0
-	movew	IMM (INEXACT_RESULT+OVERFLOW),d7
-	moveq	IMM (DOUBLE_FLOAT),d6
-	PICJUMP	$_exception_handler
-
-Ld$underflow:
-| Return 0 and set the exception flags 
-	movel	IMM (0),d0
-	movel	d0,d1
-	movew	IMM (INEXACT_RESULT+UNDERFLOW),d7
-	moveq	IMM (DOUBLE_FLOAT),d6
-	PICJUMP	$_exception_handler
-
-Ld$inop:
-| Return a quiet NaN and set the exception flags
-	movel	IMM (QUIET_NaN),d0
-	movel	d0,d1
-	movew	IMM (INEXACT_RESULT+INVALID_OPERATION),d7
-	moveq	IMM (DOUBLE_FLOAT),d6
-	PICJUMP	$_exception_handler
-
-Ld$div$0:
-| Return a properly signed INFINITY and set the exception flags
-	movel	IMM (0x7ff00000),d0
-	movel	IMM (0),d1
-	orl	d7,d0
-	movew	IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7
-	moveq	IMM (DOUBLE_FLOAT),d6
-	PICJUMP	$_exception_handler
-
-|=============================================================================
-|=============================================================================
-|                         double precision routines
-|=============================================================================
-|=============================================================================
-
-| A double precision floating point number (double) has the format:
-|
-| struct _double {
-|  unsigned int sign      : 1;  /* sign bit */ 
-|  unsigned int exponent  : 11; /* exponent, shifted by 126 */
-|  unsigned int fraction  : 52; /* fraction */
-| } double;
-| 
-| Thus sizeof(double) = 8 (64 bits). 
-|
-| All the routines are callable from C programs, and return the result 
-| in the register pair d0-d1. They also preserve all registers except 
-| d0-d1 and a0-a1.
-
-|=============================================================================
-|                              __subdf3
-|=============================================================================
-
-| double __subdf3(double, double);
-	FUNC(__subdf3)
-SYM (__subdf3):
-	bchg	IMM (31),sp@(12) | change sign of second operand
-				| and fall through, so we always add
-|=============================================================================
-|                              __adddf3
-|=============================================================================
-
-| double __adddf3(double, double);
-	FUNC(__adddf3)
-SYM (__adddf3):
-#ifndef __mcoldfire__
-	link	a6,IMM (0)	| everything will be done in registers
-	moveml	d2-d7,sp@-	| save all data registers and a2 (but d0-d1)
-#else
-	link	a6,IMM (-24)
-	moveml	d2-d7,sp@
-#endif
-	movel	a6@(8),d0	| get first operand
-	movel	a6@(12),d1	| 
-	movel	a6@(16),d2	| get second operand
-	movel	a6@(20),d3	| 
-
-	movel	d0,d7		| get d0's sign bit in d7 '
-	addl	d1,d1		| check and clear sign bit of a, and gain one
-	addxl	d0,d0		| bit of extra precision
-	beq	Ladddf$b	| if zero return second operand
-
-	movel	d2,d6		| save sign in d6 
-	addl	d3,d3		| get rid of sign bit and gain one bit of
-	addxl	d2,d2		| extra precision
-	beq	Ladddf$a	| if zero return first operand
-
-	andl	IMM (0x80000000),d7 | isolate a's sign bit '
-        swap	d6		| and also b's sign bit '
-#ifndef __mcoldfire__
-	andw	IMM (0x8000),d6	|
-	orw	d6,d7		| and combine them into d7, so that a's sign '
-				| bit is in the high word and b's is in the '
-				| low word, so d6 is free to be used
-#else
-	andl	IMM (0x8000),d6
-	orl	d6,d7
-#endif
-	movel	d7,a0		| now save d7 into a0, so d7 is free to
-                		| be used also
-
-| Get the exponents and check for denormalized and/or infinity.
-
-	movel	IMM (0x001fffff),d6 | mask for the fraction
-	movel	IMM (0x00200000),d7 | mask to put hidden bit back
-
-	movel	d0,d4		| 
-	andl	d6,d0		| get fraction in d0
-	notl	d6		| make d6 into mask for the exponent
-	andl	d6,d4		| get exponent in d4
-	beq	Ladddf$a$den	| branch if a is denormalized
-	cmpl	d6,d4		| check for INFINITY or NaN
-	beq	Ladddf$nf       | 
-	orl	d7,d0		| and put hidden bit back
-Ladddf$1:
-	swap	d4		| shift right exponent so that it starts
-#ifndef __mcoldfire__
-	lsrw	IMM (5),d4	| in bit 0 and not bit 20
-#else
-	lsrl	IMM (5),d4	| in bit 0 and not bit 20
-#endif
-| Now we have a's exponent in d4 and fraction in d0-d1 '
-	movel	d2,d5		| save b to get exponent
-	andl	d6,d5		| get exponent in d5
-	beq	Ladddf$b$den	| branch if b is denormalized
-	cmpl	d6,d5		| check for INFINITY or NaN
-	beq	Ladddf$nf
-	notl	d6		| make d6 into mask for the fraction again
-	andl	d6,d2		| and get fraction in d2
-	orl	d7,d2		| and put hidden bit back
-Ladddf$2:
-	swap	d5		| shift right exponent so that it starts
-#ifndef __mcoldfire__
-	lsrw	IMM (5),d5	| in bit 0 and not bit 20
-#else
-	lsrl	IMM (5),d5	| in bit 0 and not bit 20
-#endif
-
-| Now we have b's exponent in d5 and fraction in d2-d3. '
-
-| The situation now is as follows: the signs are combined in a0, the 
-| numbers are in d0-d1 (a) and d2-d3 (b), and the exponents in d4 (a)
-| and d5 (b). To do the rounding correctly we need to keep all the
-| bits until the end, so we need to use d0-d1-d2-d3 for the first number
-| and d4-d5-d6-d7 for the second. To do this we store (temporarily) the
-| exponents in a2-a3.
-
-#ifndef __mcoldfire__
-	moveml	a2-a3,sp@-	| save the address registers
-#else
-	movel	a2,sp@-	
-	movel	a3,sp@-	
-	movel	a4,sp@-	
-#endif
-
-	movel	d4,a2		| save the exponents
-	movel	d5,a3		| 
-
-	movel	IMM (0),d7	| and move the numbers around
-	movel	d7,d6		|
-	movel	d3,d5		|
-	movel	d2,d4		|
-	movel	d7,d3		|
-	movel	d7,d2		|
-
-| Here we shift the numbers until the exponents are the same, and put 
-| the largest exponent in a2.
-#ifndef __mcoldfire__
-	exg	d4,a2		| get exponents back
-	exg	d5,a3		|
-	cmpw	d4,d5		| compare the exponents
-#else
-	movel	d4,a4		| get exponents back
-	movel	a2,d4
-	movel	a4,a2
-	movel	d5,a4
-	movel	a3,d5
-	movel	a4,a3
-	cmpl	d4,d5		| compare the exponents
-#endif
-	beq	Ladddf$3	| if equal don't shift '
-	bhi	9f		| branch if second exponent is higher
-
-| Here we have a's exponent larger than b's, so we have to shift b. We do 
-| this by using as counter d2:
-1:	movew	d4,d2		| move largest exponent to d2
-#ifndef __mcoldfire__
-	subw	d5,d2		| and subtract second exponent
-	exg	d4,a2		| get back the longs we saved
-	exg	d5,a3		|
-#else
-	subl	d5,d2		| and subtract second exponent
-	movel	d4,a4		| get back the longs we saved
-	movel	a2,d4
-	movel	a4,a2
-	movel	d5,a4
-	movel	a3,d5
-	movel	a4,a3
-#endif
-| if difference is too large we don't shift (actually, we can just exit) '
-#ifndef __mcoldfire__
-	cmpw	IMM (DBL_MANT_DIG+2),d2
-#else
-	cmpl	IMM (DBL_MANT_DIG+2),d2
-#endif
-	bge	Ladddf$b$small
-#ifndef __mcoldfire__
-	cmpw	IMM (32),d2	| if difference >= 32, shift by longs
-#else
-	cmpl	IMM (32),d2	| if difference >= 32, shift by longs
-#endif
-	bge	5f
-2:
-#ifndef __mcoldfire__
-	cmpw	IMM (16),d2	| if difference >= 16, shift by words	
-#else
-	cmpl	IMM (16),d2	| if difference >= 16, shift by words	
-#endif
-	bge	6f
-	bra	3f		| enter dbra loop
-
-4:
-#ifndef __mcoldfire__
-	lsrl	IMM (1),d4
-	roxrl	IMM (1),d5
-	roxrl	IMM (1),d6
-	roxrl	IMM (1),d7
-#else
-	lsrl	IMM (1),d7
-	btst	IMM (0),d6
-	beq	10f
-	bset	IMM (31),d7
-10:	lsrl	IMM (1),d6
-	btst	IMM (0),d5
-	beq	11f
-	bset	IMM (31),d6
-11:	lsrl	IMM (1),d5
-	btst	IMM (0),d4
-	beq	12f
-	bset	IMM (31),d5
-12:	lsrl	IMM (1),d4
-#endif
-3:
-#ifndef __mcoldfire__
-	dbra	d2,4b
-#else
-	subql	IMM (1),d2
-	bpl	4b	
-#endif
-	movel	IMM (0),d2
-	movel	d2,d3	
-	bra	Ladddf$4
-5:
-	movel	d6,d7
-	movel	d5,d6
-	movel	d4,d5
-	movel	IMM (0),d4
-#ifndef __mcoldfire__
-	subw	IMM (32),d2
-#else
-	subl	IMM (32),d2
-#endif
-	bra	2b
-6:
-	movew	d6,d7
-	swap	d7
-	movew	d5,d6
-	swap	d6
-	movew	d4,d5
-	swap	d5
-	movew	IMM (0),d4
-	swap	d4
-#ifndef __mcoldfire__
-	subw	IMM (16),d2
-#else
-	subl	IMM (16),d2
-#endif
-	bra	3b
-	
-9:
-#ifndef __mcoldfire__
-	exg	d4,d5
-	movew	d4,d6
-	subw	d5,d6		| keep d5 (largest exponent) in d4
-	exg	d4,a2
-	exg	d5,a3
-#else
-	movel	d5,d6
-	movel	d4,d5
-	movel	d6,d4
-	subl	d5,d6
-	movel	d4,a4
-	movel	a2,d4
-	movel	a4,a2
-	movel	d5,a4
-	movel	a3,d5
-	movel	a4,a3
-#endif
-| if difference is too large we don't shift (actually, we can just exit) '
-#ifndef __mcoldfire__
-	cmpw	IMM (DBL_MANT_DIG+2),d6
-#else
-	cmpl	IMM (DBL_MANT_DIG+2),d6
-#endif
-	bge	Ladddf$a$small
-#ifndef __mcoldfire__
-	cmpw	IMM (32),d6	| if difference >= 32, shift by longs
-#else
-	cmpl	IMM (32),d6	| if difference >= 32, shift by longs
-#endif
-	bge	5f
-2:
-#ifndef __mcoldfire__
-	cmpw	IMM (16),d6	| if difference >= 16, shift by words	
-#else
-	cmpl	IMM (16),d6	| if difference >= 16, shift by words	
-#endif
-	bge	6f
-	bra	3f		| enter dbra loop
-
-4:
-#ifndef __mcoldfire__
-	lsrl	IMM (1),d0
-	roxrl	IMM (1),d1
-	roxrl	IMM (1),d2
-	roxrl	IMM (1),d3
-#else
-	lsrl	IMM (1),d3
-	btst	IMM (0),d2
-	beq	10f
-	bset	IMM (31),d3
-10:	lsrl	IMM (1),d2
-	btst	IMM (0),d1
-	beq	11f
-	bset	IMM (31),d2
-11:	lsrl	IMM (1),d1
-	btst	IMM (0),d0
-	beq	12f
-	bset	IMM (31),d1
-12:	lsrl	IMM (1),d0
-#endif
-3:
-#ifndef __mcoldfire__
-	dbra	d6,4b
-#else
-	subql	IMM (1),d6
-	bpl	4b
-#endif
-	movel	IMM (0),d7
-	movel	d7,d6
-	bra	Ladddf$4
-5:
-	movel	d2,d3
-	movel	d1,d2
-	movel	d0,d1
-	movel	IMM (0),d0
-#ifndef __mcoldfire__
-	subw	IMM (32),d6
-#else
-	subl	IMM (32),d6
-#endif
-	bra	2b
-6:
-	movew	d2,d3
-	swap	d3
-	movew	d1,d2
-	swap	d2
-	movew	d0,d1
-	swap	d1
-	movew	IMM (0),d0
-	swap	d0
-#ifndef __mcoldfire__
-	subw	IMM (16),d6
-#else
-	subl	IMM (16),d6
-#endif
-	bra	3b
-Ladddf$3:
-#ifndef __mcoldfire__
-	exg	d4,a2	
-	exg	d5,a3
-#else
-	movel	d4,a4
-	movel	a2,d4
-	movel	a4,a2
-	movel	d5,a4
-	movel	a3,d5
-	movel	a4,a3
-#endif
-Ladddf$4:	
-| Now we have the numbers in d0--d3 and d4--d7, the exponent in a2, and
-| the signs in a4.
-
-| Here we have to decide whether to add or subtract the numbers:
-#ifndef __mcoldfire__
-	exg	d7,a0		| get the signs 
-	exg	d6,a3		| a3 is free to be used
-#else
-	movel	d7,a4
-	movel	a0,d7
-	movel	a4,a0
-	movel	d6,a4
-	movel	a3,d6
-	movel	a4,a3
-#endif
-	movel	d7,d6		|
-	movew	IMM (0),d7	| get a's sign in d7 '
-	swap	d6              |
-	movew	IMM (0),d6	| and b's sign in d6 '
-	eorl	d7,d6		| compare the signs
-	bmi	Lsubdf$0	| if the signs are different we have 
-				| to subtract
-#ifndef __mcoldfire__
-	exg	d7,a0		| else we add the numbers
-	exg	d6,a3		|
-#else
-	movel	d7,a4
-	movel	a0,d7
-	movel	a4,a0
-	movel	d6,a4
-	movel	a3,d6
-	movel	a4,a3
-#endif
-	addl	d7,d3		|
-	addxl	d6,d2		|
-	addxl	d5,d1		| 
-	addxl	d4,d0           |
-
-	movel	a2,d4		| return exponent to d4
-	movel	a0,d7		| 
-	andl	IMM (0x80000000),d7 | d7 now has the sign
-
-#ifndef __mcoldfire__
-	moveml	sp@+,a2-a3	
-#else
-	movel	sp@+,a4	
-	movel	sp@+,a3	
-	movel	sp@+,a2	
-#endif
-
-| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider
-| the case of denormalized numbers in the rounding routine itself).
-| As in the addition (not in the subtraction!) we could have set 
-| one more bit we check this:
-	btst	IMM (DBL_MANT_DIG+1),d0	
-	beq	1f
-#ifndef __mcoldfire__
-	lsrl	IMM (1),d0
-	roxrl	IMM (1),d1
-	roxrl	IMM (1),d2
-	roxrl	IMM (1),d3
-	addw	IMM (1),d4
-#else
-	lsrl	IMM (1),d3
-	btst	IMM (0),d2
-	beq	10f
-	bset	IMM (31),d3
-10:	lsrl	IMM (1),d2
-	btst	IMM (0),d1
-	beq	11f
-	bset	IMM (31),d2
-11:	lsrl	IMM (1),d1
-	btst	IMM (0),d0
-	beq	12f
-	bset	IMM (31),d1
-12:	lsrl	IMM (1),d0
-	addl	IMM (1),d4
-#endif
-1:
-	lea	pc@(Ladddf$5),a0 | to return from rounding routine
-	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
-#ifdef __mcoldfire__
-	clrl	d6
-#endif
-	movew	a1@(6),d6	| rounding mode in d6
-	beq	Lround$to$nearest
-#ifndef __mcoldfire__
-	cmpw	IMM (ROUND_TO_PLUS),d6
-#else
-	cmpl	IMM (ROUND_TO_PLUS),d6
-#endif
-	bhi	Lround$to$minus
-	blt	Lround$to$zero
-	bra	Lround$to$plus
-Ladddf$5:
-| Put back the exponent and check for overflow
-#ifndef __mcoldfire__
-	cmpw	IMM (0x7ff),d4	| is the exponent big?
-#else
-	cmpl	IMM (0x7ff),d4	| is the exponent big?
-#endif
-	bge	1f
-	bclr	IMM (DBL_MANT_DIG-1),d0
-#ifndef __mcoldfire__
-	lslw	IMM (4),d4	| put exponent back into position
-#else
-	lsll	IMM (4),d4	| put exponent back into position
-#endif
-	swap	d0		| 
-#ifndef __mcoldfire__
-	orw	d4,d0		|
-#else
-	orl	d4,d0		|
-#endif
-	swap	d0		|
-	bra	Ladddf$ret
-1:
-	moveq	IMM (ADD),d5
-	bra	Ld$overflow
-
-Lsubdf$0:
-| Here we do the subtraction.
-#ifndef __mcoldfire__
-	exg	d7,a0		| put sign back in a0
-	exg	d6,a3		|
-#else
-	movel	d7,a4
-	movel	a0,d7
-	movel	a4,a0
-	movel	d6,a4
-	movel	a3,d6
-	movel	a4,a3
-#endif
-	subl	d7,d3		|
-	subxl	d6,d2		|
-	subxl	d5,d1		|
-	subxl	d4,d0		|
-	beq	Ladddf$ret$1	| if zero just exit
-	bpl	1f		| if positive skip the following
-	movel	a0,d7		|
-	bchg	IMM (31),d7	| change sign bit in d7
-	movel	d7,a0		|
-	negl	d3		|
-	negxl	d2		|
-	negxl	d1              | and negate result
-	negxl	d0              |
-1:	
-	movel	a2,d4		| return exponent to d4
-	movel	a0,d7
-	andl	IMM (0x80000000),d7 | isolate sign bit
-#ifndef __mcoldfire__
-	moveml	sp@+,a2-a3	|
-#else
-	movel	sp@+,a4
-	movel	sp@+,a3
-	movel	sp@+,a2
-#endif
-
-| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider
-| the case of denormalized numbers in the rounding routine itself).
-| As in the addition (not in the subtraction!) we could have set 
-| one more bit we check this:
-	btst	IMM (DBL_MANT_DIG+1),d0	
-	beq	1f
-#ifndef __mcoldfire__
-	lsrl	IMM (1),d0
-	roxrl	IMM (1),d1
-	roxrl	IMM (1),d2
-	roxrl	IMM (1),d3
-	addw	IMM (1),d4
-#else
-	lsrl	IMM (1),d3
-	btst	IMM (0),d2
-	beq	10f
-	bset	IMM (31),d3
-10:	lsrl	IMM (1),d2
-	btst	IMM (0),d1
-	beq	11f
-	bset	IMM (31),d2
-11:	lsrl	IMM (1),d1
-	btst	IMM (0),d0
-	beq	12f
-	bset	IMM (31),d1
-12:	lsrl	IMM (1),d0
-	addl	IMM (1),d4
-#endif
-1:
-	lea	pc@(Lsubdf$1),a0 | to return from rounding routine
-	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
-#ifdef __mcoldfire__
-	clrl	d6
-#endif
-	movew	a1@(6),d6	| rounding mode in d6
-	beq	Lround$to$nearest
-#ifndef __mcoldfire__
-	cmpw	IMM (ROUND_TO_PLUS),d6
-#else
-	cmpl	IMM (ROUND_TO_PLUS),d6
-#endif
-	bhi	Lround$to$minus
-	blt	Lround$to$zero
-	bra	Lround$to$plus
-Lsubdf$1:
-| Put back the exponent and sign (we don't have overflow). '
-	bclr	IMM (DBL_MANT_DIG-1),d0	
-#ifndef __mcoldfire__
-	lslw	IMM (4),d4	| put exponent back into position
-#else
-	lsll	IMM (4),d4	| put exponent back into position
-#endif
-	swap	d0		| 
-#ifndef __mcoldfire__
-	orw	d4,d0		|
-#else
-	orl	d4,d0		|
-#endif
-	swap	d0		|
-	bra	Ladddf$ret
-
-| If one of the numbers was too small (difference of exponents >= 
-| DBL_MANT_DIG+1) we return the other (and now we don't have to '
-| check for finiteness or zero).
-Ladddf$a$small:
-#ifndef __mcoldfire__
-	moveml	sp@+,a2-a3	
-#else
-	movel	sp@+,a4
-	movel	sp@+,a3
-	movel	sp@+,a2
-#endif
-	movel	a6@(16),d0
-	movel	a6@(20),d1
-	PICLEA	SYM (_fpCCR),a0
-	movew	IMM (0),a0@
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7	| restore data registers
-#else
-	moveml	sp@,d2-d7
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6		| and return
-	rts
-
-Ladddf$b$small:
-#ifndef __mcoldfire__
-	moveml	sp@+,a2-a3	
-#else
-	movel	sp@+,a4	
-	movel	sp@+,a3	
-	movel	sp@+,a2	
-#endif
-	movel	a6@(8),d0
-	movel	a6@(12),d1
-	PICLEA	SYM (_fpCCR),a0
-	movew	IMM (0),a0@
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7	| restore data registers
-#else
-	moveml	sp@,d2-d7
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6		| and return
-	rts
-
-Ladddf$a$den:
-	movel	d7,d4		| d7 contains 0x00200000
-	bra	Ladddf$1
-
-Ladddf$b$den:
-	movel	d7,d5           | d7 contains 0x00200000
-	notl	d6
-	bra	Ladddf$2
-
-Ladddf$b:
-| Return b (if a is zero)
-	movel	d2,d0
-	movel	d3,d1
-	bne	1f			| Check if b is -0
-	cmpl	IMM (0x80000000),d0
-	bne	1f
-	andl	IMM (0x80000000),d7	| Use the sign of a
-	clrl	d0
-	bra	Ladddf$ret
-Ladddf$a:
-	movel	a6@(8),d0
-	movel	a6@(12),d1
-1:
-	moveq	IMM (ADD),d5
-| Check for NaN and +/-INFINITY.
-	movel	d0,d7         		|
-	andl	IMM (0x80000000),d7	|
-	bclr	IMM (31),d0		|
-	cmpl	IMM (0x7ff00000),d0	|
-	bge	2f			|
-	movel	d0,d0           	| check for zero, since we don't  '
-	bne	Ladddf$ret		| want to return -0 by mistake
-	bclr	IMM (31),d7		|
-	bra	Ladddf$ret		|
-2:
-	andl	IMM (0x000fffff),d0	| check for NaN (nonzero fraction)
-	orl	d1,d0			|
-	bne	Ld$inop         	|
-	bra	Ld$infty		|
-	
-Ladddf$ret$1:
-#ifndef __mcoldfire__
-	moveml	sp@+,a2-a3	| restore regs and exit
-#else
-	movel	sp@+,a4
-	movel	sp@+,a3
-	movel	sp@+,a2
-#endif
-
-Ladddf$ret:
-| Normal exit.
-	PICLEA	SYM (_fpCCR),a0
-	movew	IMM (0),a0@
-	orl	d7,d0		| put sign bit back
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7
-#else
-	moveml	sp@,d2-d7
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6
-	rts
-
-Ladddf$ret$den:
-| Return a denormalized number.
-#ifndef __mcoldfire__
-	lsrl	IMM (1),d0	| shift right once more
-	roxrl	IMM (1),d1	|
-#else
-	lsrl	IMM (1),d1
-	btst	IMM (0),d0
-	beq	10f
-	bset	IMM (31),d1
-10:	lsrl	IMM (1),d0
-#endif
-	bra	Ladddf$ret
-
-Ladddf$nf:
-	moveq	IMM (ADD),d5
-| This could be faster but it is not worth the effort, since it is not
-| executed very often. We sacrifice speed for clarity here.
-	movel	a6@(8),d0	| get the numbers back (remember that we
-	movel	a6@(12),d1	| did some processing already)
-	movel	a6@(16),d2	| 
-	movel	a6@(20),d3	| 
-	movel	IMM (0x7ff00000),d4 | useful constant (INFINITY)
-	movel	d0,d7		| save sign bits
-	movel	d2,d6		| 
-	bclr	IMM (31),d0	| clear sign bits
-	bclr	IMM (31),d2	| 
-| We know that one of them is either NaN of +/-INFINITY
-| Check for NaN (if either one is NaN return NaN)
-	cmpl	d4,d0		| check first a (d0)
-	bhi	Ld$inop		| if d0 > 0x7ff00000 or equal and
-	bne	2f
-	tstl	d1		| d1 > 0, a is NaN
-	bne	Ld$inop		| 
-2:	cmpl	d4,d2		| check now b (d1)
-	bhi	Ld$inop		| 
-	bne	3f
-	tstl	d3		| 
-	bne	Ld$inop		| 
-3:
-| Now comes the check for +/-INFINITY. We know that both are (maybe not
-| finite) numbers, but we have to check if both are infinite whether we
-| are adding or subtracting them.
-	eorl	d7,d6		| to check sign bits
-	bmi	1f
-	andl	IMM (0x80000000),d7 | get (common) sign bit
-	bra	Ld$infty
-1:
-| We know one (or both) are infinite, so we test for equality between the
-| two numbers (if they are equal they have to be infinite both, so we
-| return NaN).
-	cmpl	d2,d0		| are both infinite?
-	bne	1f		| if d0 <> d2 they are not equal
-	cmpl	d3,d1		| if d0 == d2 test d3 and d1
-	beq	Ld$inop		| if equal return NaN
-1:	
-	andl	IMM (0x80000000),d7 | get a's sign bit '
-	cmpl	d4,d0		| test now for infinity
-	beq	Ld$infty	| if a is INFINITY return with this sign
-	bchg	IMM (31),d7	| else we know b is INFINITY and has
-	bra	Ld$infty	| the opposite sign
-
-|=============================================================================
-|                              __muldf3
-|=============================================================================
-
-| double __muldf3(double, double);
-	FUNC(__muldf3)
-SYM (__muldf3):
-#ifndef __mcoldfire__
-	link	a6,IMM (0)
-	moveml	d2-d7,sp@-
-#else
-	link	a6,IMM (-24)
-	moveml	d2-d7,sp@
-#endif
-	movel	a6@(8),d0		| get a into d0-d1
-	movel	a6@(12),d1		| 
-	movel	a6@(16),d2		| and b into d2-d3
-	movel	a6@(20),d3		|
-	movel	d0,d7			| d7 will hold the sign of the product
-	eorl	d2,d7			|
-	andl	IMM (0x80000000),d7	|
-	movel	d7,a0			| save sign bit into a0 
-	movel	IMM (0x7ff00000),d7	| useful constant (+INFINITY)
-	movel	d7,d6			| another (mask for fraction)
-	notl	d6			|
-	bclr	IMM (31),d0		| get rid of a's sign bit '
-	movel	d0,d4			| 
-	orl	d1,d4			| 
-	beq	Lmuldf$a$0		| branch if a is zero
-	movel	d0,d4			|
-	bclr	IMM (31),d2		| get rid of b's sign bit '
-	movel	d2,d5			|
-	orl	d3,d5			| 
-	beq	Lmuldf$b$0		| branch if b is zero
-	movel	d2,d5			| 
-	cmpl	d7,d0			| is a big?
-	bhi	Lmuldf$inop		| if a is NaN return NaN
-	beq	Lmuldf$a$nf		| we still have to check d1 and b ...
-	cmpl	d7,d2			| now compare b with INFINITY
-	bhi	Lmuldf$inop		| is b NaN?
-	beq	Lmuldf$b$nf 		| we still have to check d3 ...
-| Here we have both numbers finite and nonzero (and with no sign bit).
-| Now we get the exponents into d4 and d5.
-	andl	d7,d4			| isolate exponent in d4
-	beq	Lmuldf$a$den		| if exponent zero, have denormalized
-	andl	d6,d0			| isolate fraction
-	orl	IMM (0x00100000),d0	| and put hidden bit back
-	swap	d4			| I like exponents in the first byte
-#ifndef __mcoldfire__
-	lsrw	IMM (4),d4		| 
-#else
-	lsrl	IMM (4),d4		| 
-#endif
-Lmuldf$1:			
-	andl	d7,d5			|
-	beq	Lmuldf$b$den		|
-	andl	d6,d2			|
-	orl	IMM (0x00100000),d2	| and put hidden bit back
-	swap	d5			|
-#ifndef __mcoldfire__
-	lsrw	IMM (4),d5		|
-#else
-	lsrl	IMM (4),d5		|
-#endif
-Lmuldf$2:				|
-#ifndef __mcoldfire__
-	addw	d5,d4			| add exponents
-	subw	IMM (D_BIAS+1),d4	| and subtract bias (plus one)
-#else
-	addl	d5,d4			| add exponents
-	subl	IMM (D_BIAS+1),d4	| and subtract bias (plus one)
-#endif
-
-| We are now ready to do the multiplication. The situation is as follows:
-| both a and b have bit 52 ( bit 20 of d0 and d2) set (even if they were 
-| denormalized to start with!), which means that in the product bit 104 
-| (which will correspond to bit 8 of the fourth long) is set.
-
-| Here we have to do the product.
-| To do it we have to juggle the registers back and forth, as there are not
-| enough to keep everything in them. So we use the address registers to keep
-| some intermediate data.
-
-#ifndef __mcoldfire__
-	moveml	a2-a3,sp@-	| save a2 and a3 for temporary use
-#else
-	movel	a2,sp@-
-	movel	a3,sp@-
-	movel	a4,sp@-
-#endif
-	movel	IMM (0),a2	| a2 is a null register
-	movel	d4,a3		| and a3 will preserve the exponent
-
-| First, shift d2-d3 so bit 20 becomes bit 31:
-#ifndef __mcoldfire__
-	rorl	IMM (5),d2	| rotate d2 5 places right
-	swap	d2		| and swap it
-	rorl	IMM (5),d3	| do the same thing with d3
-	swap	d3		|
-	movew	d3,d6		| get the rightmost 11 bits of d3
-	andw	IMM (0x07ff),d6	|
-	orw	d6,d2		| and put them into d2
-	andw	IMM (0xf800),d3	| clear those bits in d3
-#else
-	moveq	IMM (11),d7	| left shift d2 11 bits
-	lsll	d7,d2
-	movel	d3,d6		| get a copy of d3
-	lsll	d7,d3		| left shift d3 11 bits
-	andl	IMM (0xffe00000),d6 | get the top 11 bits of d3
-	moveq	IMM (21),d7	| right shift them 21 bits
-	lsrl	d7,d6
-	orl	d6,d2		| stick them at the end of d2
-#endif
-
-	movel	d2,d6		| move b into d6-d7
-	movel	d3,d7           | move a into d4-d5
-	movel	d0,d4           | and clear d0-d1-d2-d3 (to put result)
-	movel	d1,d5           |
-	movel	IMM (0),d3	|
-	movel	d3,d2           |
-	movel	d3,d1           |
-	movel	d3,d0	        |
-
-| We use a1 as counter:	
-	movel	IMM (DBL_MANT_DIG-1),a1		
-#ifndef __mcoldfire__
-	exg	d7,a1
-#else
-	movel	d7,a4
-	movel	a1,d7
-	movel	a4,a1
-#endif
-
-1:
-#ifndef __mcoldfire__
-	exg	d7,a1		| put counter back in a1
-#else
-	movel	d7,a4
-	movel	a1,d7
-	movel	a4,a1
-#endif
-	addl	d3,d3		| shift sum once left
-	addxl	d2,d2           |
-	addxl	d1,d1           |
-	addxl	d0,d0           |
-	addl	d7,d7		|
-	addxl	d6,d6		|
-	bcc	2f		| if bit clear skip the following
-#ifndef __mcoldfire__
-	exg	d7,a2		|
-#else
-	movel	d7,a4
-	movel	a2,d7
-	movel	a4,a2
-#endif
-	addl	d5,d3		| else add a to the sum
-	addxl	d4,d2		|
-	addxl	d7,d1		|
-	addxl	d7,d0		|
-#ifndef __mcoldfire__
-	exg	d7,a2		| 
-#else
-	movel	d7,a4
-	movel	a2,d7
-	movel	a4,a2
-#endif
-2:
-#ifndef __mcoldfire__
-	exg	d7,a1		| put counter in d7
-	dbf	d7,1b		| decrement and branch
-#else
-	movel	d7,a4
-	movel	a1,d7
-	movel	a4,a1
-	subql	IMM (1),d7
-	bpl	1b
-#endif
-
-	movel	a3,d4		| restore exponent
-#ifndef __mcoldfire__
-	moveml	sp@+,a2-a3
-#else
-	movel	sp@+,a4
-	movel	sp@+,a3
-	movel	sp@+,a2
-#endif
-
-| Now we have the product in d0-d1-d2-d3, with bit 8 of d0 set. The 
-| first thing to do now is to normalize it so bit 8 becomes bit 
-| DBL_MANT_DIG-32 (to do the rounding); later we will shift right.
-	swap	d0
-	swap	d1
-	movew	d1,d0
-	swap	d2
-	movew	d2,d1
-	swap	d3
-	movew	d3,d2
-	movew	IMM (0),d3
-#ifndef __mcoldfire__
-	lsrl	IMM (1),d0
-	roxrl	IMM (1),d1
-	roxrl	IMM (1),d2
-	roxrl	IMM (1),d3
-	lsrl	IMM (1),d0
-	roxrl	IMM (1),d1
-	roxrl	IMM (1),d2
-	roxrl	IMM (1),d3
-	lsrl	IMM (1),d0
-	roxrl	IMM (1),d1
-	roxrl	IMM (1),d2
-	roxrl	IMM (1),d3
-#else
-	moveq	IMM (29),d6
-	lsrl	IMM (3),d3
-	movel	d2,d7
-	lsll	d6,d7
-	orl	d7,d3
-	lsrl	IMM (3),d2
-	movel	d1,d7
-	lsll	d6,d7
-	orl	d7,d2
-	lsrl	IMM (3),d1
-	movel	d0,d7
-	lsll	d6,d7
-	orl	d7,d1
-	lsrl	IMM (3),d0
-#endif
-	
-| Now round, check for over- and underflow, and exit.
-	movel	a0,d7		| get sign bit back into d7
-	moveq	IMM (MULTIPLY),d5
-
-	btst	IMM (DBL_MANT_DIG+1-32),d0
-	beq	Lround$exit
-#ifndef __mcoldfire__
-	lsrl	IMM (1),d0
-	roxrl	IMM (1),d1
-	addw	IMM (1),d4
-#else
-	lsrl	IMM (1),d1
-	btst	IMM (0),d0
-	beq	10f
-	bset	IMM (31),d1
-10:	lsrl	IMM (1),d0
-	addl	IMM (1),d4
-#endif
-	bra	Lround$exit
-
-Lmuldf$inop:
-	moveq	IMM (MULTIPLY),d5
-	bra	Ld$inop
-
-Lmuldf$b$nf:
-	moveq	IMM (MULTIPLY),d5
-	movel	a0,d7		| get sign bit back into d7
-	tstl	d3		| we know d2 == 0x7ff00000, so check d3
-	bne	Ld$inop		| if d3 <> 0 b is NaN
-	bra	Ld$overflow	| else we have overflow (since a is finite)
-
-Lmuldf$a$nf:
-	moveq	IMM (MULTIPLY),d5
-	movel	a0,d7		| get sign bit back into d7
-	tstl	d1		| we know d0 == 0x7ff00000, so check d1
-	bne	Ld$inop		| if d1 <> 0 a is NaN
-	bra	Ld$overflow	| else signal overflow
-
-| If either number is zero return zero, unless the other is +/-INFINITY or
-| NaN, in which case we return NaN.
-Lmuldf$b$0:
-	moveq	IMM (MULTIPLY),d5
-#ifndef __mcoldfire__
-	exg	d2,d0		| put b (==0) into d0-d1
-	exg	d3,d1		| and a (with sign bit cleared) into d2-d3
-	movel	a0,d0		| set result sign
-#else
-	movel	d0,d2		| put a into d2-d3
-	movel	d1,d3
-	movel	a0,d0		| put result zero into d0-d1
-	movq	IMM(0),d1
-#endif
-	bra	1f
-Lmuldf$a$0:
-	movel	a0,d0		| set result sign
-	movel	a6@(16),d2	| put b into d2-d3 again
-	movel	a6@(20),d3	|
-	bclr	IMM (31),d2	| clear sign bit
-1:	cmpl	IMM (0x7ff00000),d2 | check for non-finiteness
-	bge	Ld$inop		| in case NaN or +/-INFINITY return NaN
-	PICLEA	SYM (_fpCCR),a0
-	movew	IMM (0),a0@
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7
-#else
-	moveml	sp@,d2-d7
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6
-	rts
-
-| If a number is denormalized we put an exponent of 1 but do not put the 
-| hidden bit back into the fraction; instead we shift left until bit 21
-| (the hidden bit) is set, adjusting the exponent accordingly. We do this
-| to ensure that the product of the fractions is close to 1.
-Lmuldf$a$den:
-	movel	IMM (1),d4
-	andl	d6,d0
-1:	addl	d1,d1           | shift a left until bit 20 is set
-	addxl	d0,d0		|
-#ifndef __mcoldfire__
-	subw	IMM (1),d4	| and adjust exponent
-#else
-	subl	IMM (1),d4	| and adjust exponent
-#endif
-	btst	IMM (20),d0	|
-	bne	Lmuldf$1        |
-	bra	1b
-
-Lmuldf$b$den:
-	movel	IMM (1),d5
-	andl	d6,d2
-1:	addl	d3,d3		| shift b left until bit 20 is set
-	addxl	d2,d2		|
-#ifndef __mcoldfire__
-	subw	IMM (1),d5	| and adjust exponent
-#else
-	subql	IMM (1),d5	| and adjust exponent
-#endif
-	btst	IMM (20),d2	|
-	bne	Lmuldf$2	|
-	bra	1b
-
-
-|=============================================================================
-|                              __divdf3
-|=============================================================================
-
-| double __divdf3(double, double);
-	FUNC(__divdf3)
-SYM (__divdf3):
-#ifndef __mcoldfire__
-	link	a6,IMM (0)
-	moveml	d2-d7,sp@-
-#else
-	link	a6,IMM (-24)
-	moveml	d2-d7,sp@
-#endif
-	movel	a6@(8),d0	| get a into d0-d1
-	movel	a6@(12),d1	| 
-	movel	a6@(16),d2	| and b into d2-d3
-	movel	a6@(20),d3	|
-	movel	d0,d7		| d7 will hold the sign of the result
-	eorl	d2,d7		|
-	andl	IMM (0x80000000),d7
-	movel	d7,a0		| save sign into a0
-	movel	IMM (0x7ff00000),d7 | useful constant (+INFINITY)
-	movel	d7,d6		| another (mask for fraction)
-	notl	d6		|
-	bclr	IMM (31),d0	| get rid of a's sign bit '
-	movel	d0,d4		|
-	orl	d1,d4		|
-	beq	Ldivdf$a$0	| branch if a is zero
-	movel	d0,d4		|
-	bclr	IMM (31),d2	| get rid of b's sign bit '
-	movel	d2,d5		|
-	orl	d3,d5		|
-	beq	Ldivdf$b$0	| branch if b is zero
-	movel	d2,d5
-	cmpl	d7,d0		| is a big?
-	bhi	Ldivdf$inop	| if a is NaN return NaN
-	beq	Ldivdf$a$nf	| if d0 == 0x7ff00000 we check d1
-	cmpl	d7,d2		| now compare b with INFINITY 
-	bhi	Ldivdf$inop	| if b is NaN return NaN
-	beq	Ldivdf$b$nf	| if d2 == 0x7ff00000 we check d3
-| Here we have both numbers finite and nonzero (and with no sign bit).
-| Now we get the exponents into d4 and d5 and normalize the numbers to
-| ensure that the ratio of the fractions is around 1. We do this by
-| making sure that both numbers have bit #DBL_MANT_DIG-32-1 (hidden bit)
-| set, even if they were denormalized to start with.
-| Thus, the result will satisfy: 2 > result > 1/2.
-	andl	d7,d4		| and isolate exponent in d4
-	beq	Ldivdf$a$den	| if exponent is zero we have a denormalized
-	andl	d6,d0		| and isolate fraction
-	orl	IMM (0x00100000),d0 | and put hidden bit back
-	swap	d4		| I like exponents in the first byte
-#ifndef __mcoldfire__
-	lsrw	IMM (4),d4	| 
-#else
-	lsrl	IMM (4),d4	| 
-#endif
-Ldivdf$1:			| 
-	andl	d7,d5		|
-	beq	Ldivdf$b$den	|
-	andl	d6,d2		|
-	orl	IMM (0x00100000),d2
-	swap	d5		|
-#ifndef __mcoldfire__
-	lsrw	IMM (4),d5	|
-#else
-	lsrl	IMM (4),d5	|
-#endif
-Ldivdf$2:			|
-#ifndef __mcoldfire__
-	subw	d5,d4		| subtract exponents
-	addw	IMM (D_BIAS),d4	| and add bias
-#else
-	subl	d5,d4		| subtract exponents
-	addl	IMM (D_BIAS),d4	| and add bias
-#endif
-
-| We are now ready to do the division. We have prepared things in such a way
-| that the ratio of the fractions will be less than 2 but greater than 1/2.
-| At this point the registers in use are:
-| d0-d1	hold a (first operand, bit DBL_MANT_DIG-32=0, bit 
-| DBL_MANT_DIG-1-32=1)
-| d2-d3	hold b (second operand, bit DBL_MANT_DIG-32=1)
-| d4	holds the difference of the exponents, corrected by the bias
-| a0	holds the sign of the ratio
-
-| To do the rounding correctly we need to keep information about the
-| nonsignificant bits. One way to do this would be to do the division
-| using four registers; another is to use two registers (as originally
-| I did), but use a sticky bit to preserve information about the 
-| fractional part. Note that we can keep that info in a1, which is not
-| used.
-	movel	IMM (0),d6	| d6-d7 will hold the result
-	movel	d6,d7		| 
-	movel	IMM (0),a1	| and a1 will hold the sticky bit
-
-	movel	IMM (DBL_MANT_DIG-32+1),d5	
-	
-1:	cmpl	d0,d2		| is a < b?
-	bhi	3f		| if b > a skip the following
-	beq	4f		| if d0==d2 check d1 and d3
-2:	subl	d3,d1		| 
-	subxl	d2,d0		| a <-- a - b
-	bset	d5,d6		| set the corresponding bit in d6
-3:	addl	d1,d1		| shift a by 1
-	addxl	d0,d0		|
-#ifndef __mcoldfire__
-	dbra	d5,1b		| and branch back
-#else
-	subql	IMM (1), d5
-	bpl	1b
-#endif
-	bra	5f			
-4:	cmpl	d1,d3		| here d0==d2, so check d1 and d3
-	bhi	3b		| if d1 > d2 skip the subtraction
-	bra	2b		| else go do it
-5:
-| Here we have to start setting the bits in the second long.
-	movel	IMM (31),d5	| again d5 is counter
-
-1:	cmpl	d0,d2		| is a < b?
-	bhi	3f		| if b > a skip the following
-	beq	4f		| if d0==d2 check d1 and d3
-2:	subl	d3,d1		| 
-	subxl	d2,d0		| a <-- a - b
-	bset	d5,d7		| set the corresponding bit in d7
-3:	addl	d1,d1		| shift a by 1
-	addxl	d0,d0		|
-#ifndef __mcoldfire__
-	dbra	d5,1b		| and branch back
-#else
-	subql	IMM (1), d5
-	bpl	1b
-#endif
-	bra	5f			
-4:	cmpl	d1,d3		| here d0==d2, so check d1 and d3
-	bhi	3b		| if d1 > d2 skip the subtraction
-	bra	2b		| else go do it
-5:
-| Now go ahead checking until we hit a one, which we store in d2.
-	movel	IMM (DBL_MANT_DIG),d5
-1:	cmpl	d2,d0		| is a < b?
-	bhi	4f		| if b < a, exit
-	beq	3f		| if d0==d2 check d1 and d3
-2:	addl	d1,d1		| shift a by 1
-	addxl	d0,d0		|
-#ifndef __mcoldfire__
-	dbra	d5,1b		| and branch back
-#else
-	subql	IMM (1), d5
-	bpl	1b
-#endif
-	movel	IMM (0),d2	| here no sticky bit was found
-	movel	d2,d3
-	bra	5f			
-3:	cmpl	d1,d3		| here d0==d2, so check d1 and d3
-	bhi	2b		| if d1 > d2 go back
-4:
-| Here put the sticky bit in d2-d3 (in the position which actually corresponds
-| to it; if you don't do this the algorithm loses in some cases). '
-	movel	IMM (0),d2
-	movel	d2,d3
-#ifndef __mcoldfire__
-	subw	IMM (DBL_MANT_DIG),d5
-	addw	IMM (63),d5
-	cmpw	IMM (31),d5
-#else
-	subl	IMM (DBL_MANT_DIG),d5
-	addl	IMM (63),d5
-	cmpl	IMM (31),d5
-#endif
-	bhi	2f
-1:	bset	d5,d3
-	bra	5f
-#ifndef __mcoldfire__
-	subw	IMM (32),d5
-#else
-	subl	IMM (32),d5
-#endif
-2:	bset	d5,d2
-5:
-| Finally we are finished! Move the longs in the address registers to
-| their final destination:
-	movel	d6,d0
-	movel	d7,d1
-	movel	IMM (0),d3
-
-| Here we have finished the division, with the result in d0-d1-d2-d3, with
-| 2^21 <= d6 < 2^23. Thus bit 23 is not set, but bit 22 could be set.
-| If it is not, then definitely bit 21 is set. Normalize so bit 22 is
-| not set:
-	btst	IMM (DBL_MANT_DIG-32+1),d0
-	beq	1f
-#ifndef __mcoldfire__
-	lsrl	IMM (1),d0
-	roxrl	IMM (1),d1
-	roxrl	IMM (1),d2
-	roxrl	IMM (1),d3
-	addw	IMM (1),d4
-#else
-	lsrl	IMM (1),d3
-	btst	IMM (0),d2
-	beq	10f
-	bset	IMM (31),d3
-10:	lsrl	IMM (1),d2
-	btst	IMM (0),d1
-	beq	11f
-	bset	IMM (31),d2
-11:	lsrl	IMM (1),d1
-	btst	IMM (0),d0
-	beq	12f
-	bset	IMM (31),d1
-12:	lsrl	IMM (1),d0
-	addl	IMM (1),d4
-#endif
-1:
-| Now round, check for over- and underflow, and exit.
-	movel	a0,d7		| restore sign bit to d7
-	moveq	IMM (DIVIDE),d5
-	bra	Lround$exit
-
-Ldivdf$inop:
-	moveq	IMM (DIVIDE),d5
-	bra	Ld$inop
-
-Ldivdf$a$0:
-| If a is zero check to see whether b is zero also. In that case return
-| NaN; then check if b is NaN, and return NaN also in that case. Else
-| return a properly signed zero.
-	moveq	IMM (DIVIDE),d5
-	bclr	IMM (31),d2	|
-	movel	d2,d4		| 
-	orl	d3,d4		| 
-	beq	Ld$inop		| if b is also zero return NaN
-	cmpl	IMM (0x7ff00000),d2 | check for NaN
-	bhi	Ld$inop		| 
-	blt	1f		|
-	tstl	d3		|
-	bne	Ld$inop		|
-1:	movel	a0,d0		| else return signed zero
-	moveq	IMM(0),d1	| 
-	PICLEA	SYM (_fpCCR),a0	| clear exception flags
-	movew	IMM (0),a0@	|
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7	| 
-#else
-	moveml	sp@,d2-d7	| 
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6		| 
-	rts			| 	
-
-Ldivdf$b$0:
-	moveq	IMM (DIVIDE),d5
-| If we got here a is not zero. Check if a is NaN; in that case return NaN,
-| else return +/-INFINITY. Remember that a is in d0 with the sign bit 
-| cleared already.
-	movel	a0,d7		| put a's sign bit back in d7 '
-	cmpl	IMM (0x7ff00000),d0 | compare d0 with INFINITY
-	bhi	Ld$inop		| if larger it is NaN
-	tstl	d1		| 
-	bne	Ld$inop		| 
-	bra	Ld$div$0	| else signal DIVIDE_BY_ZERO
-
-Ldivdf$b$nf:
-	moveq	IMM (DIVIDE),d5
-| If d2 == 0x7ff00000 we have to check d3.
-	tstl	d3		|
-	bne	Ld$inop		| if d3 <> 0, b is NaN
-	bra	Ld$underflow	| else b is +/-INFINITY, so signal underflow
-
-Ldivdf$a$nf:
-	moveq	IMM (DIVIDE),d5
-| If d0 == 0x7ff00000 we have to check d1.
-	tstl	d1		|
-	bne	Ld$inop		| if d1 <> 0, a is NaN
-| If a is INFINITY we have to check b
-	cmpl	d7,d2		| compare b with INFINITY 
-	bge	Ld$inop		| if b is NaN or INFINITY return NaN
-	tstl	d3		|
-	bne	Ld$inop		| 
-	bra	Ld$overflow	| else return overflow
-
-| If a number is denormalized we put an exponent of 1 but do not put the 
-| bit back into the fraction.
-Ldivdf$a$den:
-	movel	IMM (1),d4
-	andl	d6,d0
-1:	addl	d1,d1		| shift a left until bit 20 is set
-	addxl	d0,d0
-#ifndef __mcoldfire__
-	subw	IMM (1),d4	| and adjust exponent
-#else
-	subl	IMM (1),d4	| and adjust exponent
-#endif
-	btst	IMM (DBL_MANT_DIG-32-1),d0
-	bne	Ldivdf$1
-	bra	1b
-
-Ldivdf$b$den:
-	movel	IMM (1),d5
-	andl	d6,d2
-1:	addl	d3,d3		| shift b left until bit 20 is set
-	addxl	d2,d2
-#ifndef __mcoldfire__
-	subw	IMM (1),d5	| and adjust exponent
-#else
-	subql	IMM (1),d5	| and adjust exponent
-#endif
-	btst	IMM (DBL_MANT_DIG-32-1),d2
-	bne	Ldivdf$2
-	bra	1b
-
-Lround$exit:
-| This is a common exit point for __muldf3 and __divdf3. When they enter
-| this point the sign of the result is in d7, the result in d0-d1, normalized
-| so that 2^21 <= d0 < 2^22, and the exponent is in the lower byte of d4.
-
-| First check for underlow in the exponent:
-#ifndef __mcoldfire__
-	cmpw	IMM (-DBL_MANT_DIG-1),d4		
-#else
-	cmpl	IMM (-DBL_MANT_DIG-1),d4		
-#endif
-	blt	Ld$underflow	
-| It could happen that the exponent is less than 1, in which case the 
-| number is denormalized. In this case we shift right and adjust the 
-| exponent until it becomes 1 or the fraction is zero (in the latter case 
-| we signal underflow and return zero).
-	movel	d7,a0		|
-	movel	IMM (0),d6	| use d6-d7 to collect bits flushed right
-	movel	d6,d7		| use d6-d7 to collect bits flushed right
-#ifndef __mcoldfire__
-	cmpw	IMM (1),d4	| if the exponent is less than 1 we 
-#else
-	cmpl	IMM (1),d4	| if the exponent is less than 1 we 
-#endif
-	bge	2f		| have to shift right (denormalize)
-1:
-#ifndef __mcoldfire__
-	addw	IMM (1),d4	| adjust the exponent
-	lsrl	IMM (1),d0	| shift right once 
-	roxrl	IMM (1),d1	|
-	roxrl	IMM (1),d2	|
-	roxrl	IMM (1),d3	|
-	roxrl	IMM (1),d6	| 
-	roxrl	IMM (1),d7	|
-	cmpw	IMM (1),d4	| is the exponent 1 already?
-#else
-	addl	IMM (1),d4	| adjust the exponent
-	lsrl	IMM (1),d7
-	btst	IMM (0),d6
-	beq	13f
-	bset	IMM (31),d7
-13:	lsrl	IMM (1),d6
-	btst	IMM (0),d3
-	beq	14f
-	bset	IMM (31),d6
-14:	lsrl	IMM (1),d3
-	btst	IMM (0),d2
-	beq	10f
-	bset	IMM (31),d3
-10:	lsrl	IMM (1),d2
-	btst	IMM (0),d1
-	beq	11f
-	bset	IMM (31),d2
-11:	lsrl	IMM (1),d1
-	btst	IMM (0),d0
-	beq	12f
-	bset	IMM (31),d1
-12:	lsrl	IMM (1),d0
-	cmpl	IMM (1),d4	| is the exponent 1 already?
-#endif
-	beq	2f		| if not loop back
-	bra	1b              |
-	bra	Ld$underflow	| safety check, shouldn't execute '
-2:	orl	d6,d2		| this is a trick so we don't lose  '
-	orl	d7,d3		| the bits which were flushed right
-	movel	a0,d7		| get back sign bit into d7
-| Now call the rounding routine (which takes care of denormalized numbers):
-	lea	pc@(Lround$0),a0 | to return from rounding routine
-	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
-#ifdef __mcoldfire__
-	clrl	d6
-#endif
-	movew	a1@(6),d6	| rounding mode in d6
-	beq	Lround$to$nearest
-#ifndef __mcoldfire__
-	cmpw	IMM (ROUND_TO_PLUS),d6
-#else
-	cmpl	IMM (ROUND_TO_PLUS),d6
-#endif
-	bhi	Lround$to$minus
-	blt	Lround$to$zero
-	bra	Lround$to$plus
-Lround$0:
-| Here we have a correctly rounded result (either normalized or denormalized).
-
-| Here we should have either a normalized number or a denormalized one, and
-| the exponent is necessarily larger or equal to 1 (so we don't have to  '
-| check again for underflow!). We have to check for overflow or for a 
-| denormalized number (which also signals underflow).
-| Check for overflow (i.e., exponent >= 0x7ff).
-#ifndef __mcoldfire__
-	cmpw	IMM (0x07ff),d4
-#else
-	cmpl	IMM (0x07ff),d4
-#endif
-	bge	Ld$overflow
-| Now check for a denormalized number (exponent==0):
-	movew	d4,d4
-	beq	Ld$den
-1:
-| Put back the exponents and sign and return.
-#ifndef __mcoldfire__
-	lslw	IMM (4),d4	| exponent back to fourth byte
-#else
-	lsll	IMM (4),d4	| exponent back to fourth byte
-#endif
-	bclr	IMM (DBL_MANT_DIG-32-1),d0
-	swap	d0		| and put back exponent
-#ifndef __mcoldfire__
-	orw	d4,d0		| 
-#else
-	orl	d4,d0		| 
-#endif
-	swap	d0		|
-	orl	d7,d0		| and sign also
-
-	PICLEA	SYM (_fpCCR),a0
-	movew	IMM (0),a0@
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7
-#else
-	moveml	sp@,d2-d7
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6
-	rts
-
-|=============================================================================
-|                              __negdf2
-|=============================================================================
-
-| double __negdf2(double, double);
-	FUNC(__negdf2)
-SYM (__negdf2):
-#ifndef __mcoldfire__
-	link	a6,IMM (0)
-	moveml	d2-d7,sp@-
-#else
-	link	a6,IMM (-24)
-	moveml	d2-d7,sp@
-#endif
-	moveq	IMM (NEGATE),d5
-	movel	a6@(8),d0	| get number to negate in d0-d1
-	movel	a6@(12),d1	|
-	bchg	IMM (31),d0	| negate
-	movel	d0,d2		| make a positive copy (for the tests)
-	bclr	IMM (31),d2	|
-	movel	d2,d4		| check for zero
-	orl	d1,d4		|
-	beq	2f		| if zero (either sign) return +zero
-	cmpl	IMM (0x7ff00000),d2 | compare to +INFINITY
-	blt	1f		| if finite, return
-	bhi	Ld$inop		| if larger (fraction not zero) is NaN
-	tstl	d1		| if d2 == 0x7ff00000 check d1
-	bne	Ld$inop		|
-	movel	d0,d7		| else get sign and return INFINITY
-	andl	IMM (0x80000000),d7
-	bra	Ld$infty		
-1:	PICLEA	SYM (_fpCCR),a0
-	movew	IMM (0),a0@
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7
-#else
-	moveml	sp@,d2-d7
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6
-	rts
-2:	bclr	IMM (31),d0
-	bra	1b
-
-|=============================================================================
-|                              __cmpdf2
-|=============================================================================
-
-GREATER =  1
-LESS    = -1
-EQUAL   =  0
-
-| int __cmpdf2_internal(double, double, int);
-SYM (__cmpdf2_internal):
-#ifndef __mcoldfire__
-	link	a6,IMM (0)
-	moveml	d2-d7,sp@- 	| save registers
-#else
-	link	a6,IMM (-24)
-	moveml	d2-d7,sp@
-#endif
-	moveq	IMM (COMPARE),d5
-	movel	a6@(8),d0	| get first operand
-	movel	a6@(12),d1	|
-	movel	a6@(16),d2	| get second operand
-	movel	a6@(20),d3	|
-| First check if a and/or b are (+/-) zero and in that case clear
-| the sign bit.
-	movel	d0,d6		| copy signs into d6 (a) and d7(b)
-	bclr	IMM (31),d0	| and clear signs in d0 and d2
-	movel	d2,d7		|
-	bclr	IMM (31),d2	|
-	cmpl	IMM (0x7ff00000),d0 | check for a == NaN
-	bhi	Lcmpd$inop		| if d0 > 0x7ff00000, a is NaN
-	beq	Lcmpdf$a$nf	| if equal can be INFINITY, so check d1
-	movel	d0,d4		| copy into d4 to test for zero
-	orl	d1,d4		|
-	beq	Lcmpdf$a$0	|
-Lcmpdf$0:
-	cmpl	IMM (0x7ff00000),d2 | check for b == NaN
-	bhi	Lcmpd$inop		| if d2 > 0x7ff00000, b is NaN
-	beq	Lcmpdf$b$nf	| if equal can be INFINITY, so check d3
-	movel	d2,d4		|
-	orl	d3,d4		|
-	beq	Lcmpdf$b$0	|
-Lcmpdf$1:
-| Check the signs
-	eorl	d6,d7
-	bpl	1f
-| If the signs are not equal check if a >= 0
-	tstl	d6
-	bpl	Lcmpdf$a$gt$b	| if (a >= 0 && b < 0) => a > b
-	bmi	Lcmpdf$b$gt$a	| if (a < 0 && b >= 0) => a < b
-1:
-| If the signs are equal check for < 0
-	tstl	d6
-	bpl	1f
-| If both are negative exchange them
-#ifndef __mcoldfire__
-	exg	d0,d2
-	exg	d1,d3
-#else
-	movel	d0,d7
-	movel	d2,d0
-	movel	d7,d2
-	movel	d1,d7
-	movel	d3,d1
-	movel	d7,d3
-#endif
-1:
-| Now that they are positive we just compare them as longs (does this also
-| work for denormalized numbers?).
-	cmpl	d0,d2
-	bhi	Lcmpdf$b$gt$a	| |b| > |a|
-	bne	Lcmpdf$a$gt$b	| |b| < |a|
-| If we got here d0 == d2, so we compare d1 and d3.
-	cmpl	d1,d3
-	bhi	Lcmpdf$b$gt$a	| |b| > |a|
-	bne	Lcmpdf$a$gt$b	| |b| < |a|
-| If we got here a == b.
-	movel	IMM (EQUAL),d0
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7 	| put back the registers
-#else
-	moveml	sp@,d2-d7
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6
-	rts
-Lcmpdf$a$gt$b:
-	movel	IMM (GREATER),d0
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7 	| put back the registers
-#else
-	moveml	sp@,d2-d7
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6
-	rts
-Lcmpdf$b$gt$a:
-	movel	IMM (LESS),d0
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7 	| put back the registers
-#else
-	moveml	sp@,d2-d7
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6
-	rts
-
-Lcmpdf$a$0:	
-	bclr	IMM (31),d6
-	bra	Lcmpdf$0
-Lcmpdf$b$0:
-	bclr	IMM (31),d7
-	bra	Lcmpdf$1
-
-Lcmpdf$a$nf:
-	tstl	d1
-	bne	Ld$inop
-	bra	Lcmpdf$0
-
-Lcmpdf$b$nf:
-	tstl	d3
-	bne	Ld$inop
-	bra	Lcmpdf$1
-
-Lcmpd$inop:
-	movl	a6@(24),d0
-	moveq	IMM (INEXACT_RESULT+INVALID_OPERATION),d7
-	moveq	IMM (DOUBLE_FLOAT),d6
-	PICJUMP	$_exception_handler
-
-| int __cmpdf2(double, double);
-	FUNC(__cmpdf2)
-SYM (__cmpdf2):
-	link	a6,IMM (0)
-	pea	1
-	movl	a6@(20),sp@-
-	movl	a6@(16),sp@-
-	movl	a6@(12),sp@-
-	movl	a6@(8),sp@-
-	PICCALL	SYM (__cmpdf2_internal)
-	unlk	a6
-	rts
-
-|=============================================================================
-|                           rounding routines
-|=============================================================================
-
-| The rounding routines expect the number to be normalized in registers
-| d0-d1-d2-d3, with the exponent in register d4. They assume that the 
-| exponent is larger or equal to 1. They return a properly normalized number
-| if possible, and a denormalized number otherwise. The exponent is returned
-| in d4.
-
-Lround$to$nearest:
-| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"):
-| Here we assume that the exponent is not too small (this should be checked
-| before entering the rounding routine), but the number could be denormalized.
-
-| Check for denormalized numbers:
-1:	btst	IMM (DBL_MANT_DIG-32),d0
-	bne	2f		| if set the number is normalized
-| Normalize shifting left until bit #DBL_MANT_DIG-32 is set or the exponent 
-| is one (remember that a denormalized number corresponds to an 
-| exponent of -D_BIAS+1).
-#ifndef __mcoldfire__
-	cmpw	IMM (1),d4	| remember that the exponent is at least one
-#else
-	cmpl	IMM (1),d4	| remember that the exponent is at least one
-#endif
- 	beq	2f		| an exponent of one means denormalized
-	addl	d3,d3		| else shift and adjust the exponent
-	addxl	d2,d2		|
-	addxl	d1,d1		|
-	addxl	d0,d0		|
-#ifndef __mcoldfire__
-	dbra	d4,1b		|
-#else
-	subql	IMM (1), d4
-	bpl	1b
-#endif
-2:
-| Now round: we do it as follows: after the shifting we can write the
-| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2.
-| If delta < 1, do nothing. If delta > 1, add 1 to f. 
-| If delta == 1, we make sure the rounded number will be even (odd?) 
-| (after shifting).
-	btst	IMM (0),d1	| is delta < 1?
-	beq	2f		| if so, do not do anything
-	orl	d2,d3		| is delta == 1?
-	bne	1f		| if so round to even
-	movel	d1,d3		| 
-	andl	IMM (2),d3	| bit 1 is the last significant bit
-	movel	IMM (0),d2	|
-	addl	d3,d1		|
-	addxl	d2,d0		|
-	bra	2f		| 
-1:	movel	IMM (1),d3	| else add 1 
-	movel	IMM (0),d2	|
-	addl	d3,d1		|
-	addxl	d2,d0
-| Shift right once (because we used bit #DBL_MANT_DIG-32!).
-2:
-#ifndef __mcoldfire__
-	lsrl	IMM (1),d0
-	roxrl	IMM (1),d1		
-#else
-	lsrl	IMM (1),d1
-	btst	IMM (0),d0
-	beq	10f
-	bset	IMM (31),d1
-10:	lsrl	IMM (1),d0
-#endif
-
-| Now check again bit #DBL_MANT_DIG-32 (rounding could have produced a
-| 'fraction overflow' ...).
-	btst	IMM (DBL_MANT_DIG-32),d0	
-	beq	1f
-#ifndef __mcoldfire__
-	lsrl	IMM (1),d0
-	roxrl	IMM (1),d1
-	addw	IMM (1),d4
-#else
-	lsrl	IMM (1),d1
-	btst	IMM (0),d0
-	beq	10f
-	bset	IMM (31),d1
-10:	lsrl	IMM (1),d0
-	addl	IMM (1),d4
-#endif
-1:
-| If bit #DBL_MANT_DIG-32-1 is clear we have a denormalized number, so we 
-| have to put the exponent to zero and return a denormalized number.
-	btst	IMM (DBL_MANT_DIG-32-1),d0
-	beq	1f
-	jmp	a0@
-1:	movel	IMM (0),d4
-	jmp	a0@
-
-Lround$to$zero:
-Lround$to$plus:
-Lround$to$minus:
-	jmp	a0@
-#endif /* L_double */
-
-#ifdef  L_float
-
-	.globl	SYM (_fpCCR)
-	.globl  $_exception_handler
-
-QUIET_NaN    = 0xffffffff
-SIGNL_NaN    = 0x7f800001
-INFINITY     = 0x7f800000
-
-F_MAX_EXP      = 0xff
-F_BIAS         = 126
-FLT_MAX_EXP    = F_MAX_EXP - F_BIAS
-FLT_MIN_EXP    = 1 - F_BIAS
-FLT_MANT_DIG   = 24
-
-INEXACT_RESULT 		= 0x0001
-UNDERFLOW 		= 0x0002
-OVERFLOW 		= 0x0004
-DIVIDE_BY_ZERO 		= 0x0008
-INVALID_OPERATION 	= 0x0010
-
-SINGLE_FLOAT = 1
-
-NOOP         = 0
-ADD          = 1
-MULTIPLY     = 2
-DIVIDE       = 3
-NEGATE       = 4
-COMPARE      = 5
-EXTENDSFDF   = 6
-TRUNCDFSF    = 7
-
-UNKNOWN           = -1
-ROUND_TO_NEAREST  = 0 | round result to nearest representable value
-ROUND_TO_ZERO     = 1 | round result towards zero
-ROUND_TO_PLUS     = 2 | round result towards plus infinity
-ROUND_TO_MINUS    = 3 | round result towards minus infinity
-
-| Entry points:
-
-	.globl SYM (__addsf3)
-	.globl SYM (__subsf3)
-	.globl SYM (__mulsf3)
-	.globl SYM (__divsf3)
-	.globl SYM (__negsf2)
-	.globl SYM (__cmpsf2)
-	.globl SYM (__cmpsf2_internal)
-	.hidden SYM (__cmpsf2_internal)
-
-| These are common routines to return and signal exceptions.	
-
-	.text
-	.even
-
-Lf$den:
-| Return and signal a denormalized number
-	orl	d7,d0
-	moveq	IMM (INEXACT_RESULT+UNDERFLOW),d7
-	moveq	IMM (SINGLE_FLOAT),d6
-	PICJUMP	$_exception_handler
-
-Lf$infty:
-Lf$overflow:
-| Return a properly signed INFINITY and set the exception flags 
-	movel	IMM (INFINITY),d0
-	orl	d7,d0
-	moveq	IMM (INEXACT_RESULT+OVERFLOW),d7
-	moveq	IMM (SINGLE_FLOAT),d6
-	PICJUMP	$_exception_handler
-
-Lf$underflow:
-| Return 0 and set the exception flags 
-	moveq	IMM (0),d0
-	moveq	IMM (INEXACT_RESULT+UNDERFLOW),d7
-	moveq	IMM (SINGLE_FLOAT),d6
-	PICJUMP	$_exception_handler
-
-Lf$inop:
-| Return a quiet NaN and set the exception flags
-	movel	IMM (QUIET_NaN),d0
-	moveq	IMM (INEXACT_RESULT+INVALID_OPERATION),d7
-	moveq	IMM (SINGLE_FLOAT),d6
-	PICJUMP	$_exception_handler
-
-Lf$div$0:
-| Return a properly signed INFINITY and set the exception flags
-	movel	IMM (INFINITY),d0
-	orl	d7,d0
-	moveq	IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7
-	moveq	IMM (SINGLE_FLOAT),d6
-	PICJUMP	$_exception_handler
-
-|=============================================================================
-|=============================================================================
-|                         single precision routines
-|=============================================================================
-|=============================================================================
-
-| A single precision floating point number (float) has the format:
-|
-| struct _float {
-|  unsigned int sign      : 1;  /* sign bit */ 
-|  unsigned int exponent  : 8;  /* exponent, shifted by 126 */
-|  unsigned int fraction  : 23; /* fraction */
-| } float;
-| 
-| Thus sizeof(float) = 4 (32 bits). 
-|
-| All the routines are callable from C programs, and return the result 
-| in the single register d0. They also preserve all registers except 
-| d0-d1 and a0-a1.
-
-|=============================================================================
-|                              __subsf3
-|=============================================================================
-
-| float __subsf3(float, float);
-	FUNC(__subsf3)
-SYM (__subsf3):
-	bchg	IMM (31),sp@(8)	| change sign of second operand
-				| and fall through
-|=============================================================================
-|                              __addsf3
-|=============================================================================
-
-| float __addsf3(float, float);
-	FUNC(__addsf3)
-SYM (__addsf3):
-#ifndef __mcoldfire__
-	link	a6,IMM (0)	| everything will be done in registers
-	moveml	d2-d7,sp@-	| save all data registers but d0-d1
-#else
-	link	a6,IMM (-24)
-	moveml	d2-d7,sp@
-#endif
-	movel	a6@(8),d0	| get first operand
-	movel	a6@(12),d1	| get second operand
-	movel	d0,a0		| get d0's sign bit '
-	addl	d0,d0		| check and clear sign bit of a
-	beq	Laddsf$b	| if zero return second operand
-	movel	d1,a1		| save b's sign bit '
-	addl	d1,d1		| get rid of sign bit
-	beq	Laddsf$a	| if zero return first operand
-
-| Get the exponents and check for denormalized and/or infinity.
-
-	movel	IMM (0x00ffffff),d4	| mask to get fraction
-	movel	IMM (0x01000000),d5	| mask to put hidden bit back
-
-	movel	d0,d6		| save a to get exponent
-	andl	d4,d0		| get fraction in d0
-	notl 	d4		| make d4 into a mask for the exponent
-	andl	d4,d6		| get exponent in d6
-	beq	Laddsf$a$den	| branch if a is denormalized
-	cmpl	d4,d6		| check for INFINITY or NaN
-	beq	Laddsf$nf
-	swap	d6		| put exponent into first word
-	orl	d5,d0		| and put hidden bit back
-Laddsf$1:
-| Now we have a's exponent in d6 (second byte) and the mantissa in d0. '
-	movel	d1,d7		| get exponent in d7
-	andl	d4,d7		| 
-	beq	Laddsf$b$den	| branch if b is denormalized
-	cmpl	d4,d7		| check for INFINITY or NaN
-	beq	Laddsf$nf
-	swap	d7		| put exponent into first word
-	notl 	d4		| make d4 into a mask for the fraction
-	andl	d4,d1		| get fraction in d1
-	orl	d5,d1		| and put hidden bit back
-Laddsf$2:
-| Now we have b's exponent in d7 (second byte) and the mantissa in d1. '
-
-| Note that the hidden bit corresponds to bit #FLT_MANT_DIG-1, and we 
-| shifted right once, so bit #FLT_MANT_DIG is set (so we have one extra
-| bit).
-
-	movel	d1,d2		| move b to d2, since we want to use
-				| two registers to do the sum
-	movel	IMM (0),d1	| and clear the new ones
-	movel	d1,d3		|
-
-| Here we shift the numbers in registers d0 and d1 so the exponents are the
-| same, and put the largest exponent in d6. Note that we are using two
-| registers for each number (see the discussion by D. Knuth in "Seminumerical 
-| Algorithms").
-#ifndef __mcoldfire__
-	cmpw	d6,d7		| compare exponents
-#else
-	cmpl	d6,d7		| compare exponents
-#endif
-	beq	Laddsf$3	| if equal don't shift '
-	bhi	5f		| branch if second exponent largest
-1:
-	subl	d6,d7		| keep the largest exponent
-	negl	d7
-#ifndef __mcoldfire__
-	lsrw	IMM (8),d7	| put difference in lower byte
-#else
-	lsrl	IMM (8),d7	| put difference in lower byte
-#endif
-| if difference is too large we don't shift (actually, we can just exit) '
-#ifndef __mcoldfire__
-	cmpw	IMM (FLT_MANT_DIG+2),d7		
-#else
-	cmpl	IMM (FLT_MANT_DIG+2),d7		
-#endif
-	bge	Laddsf$b$small
-#ifndef __mcoldfire__
-	cmpw	IMM (16),d7	| if difference >= 16 swap
-#else
-	cmpl	IMM (16),d7	| if difference >= 16 swap
-#endif
-	bge	4f
-2:
-#ifndef __mcoldfire__
-	subw	IMM (1),d7
-#else
-	subql	IMM (1), d7
-#endif
-3:
-#ifndef __mcoldfire__
-	lsrl	IMM (1),d2	| shift right second operand
-	roxrl	IMM (1),d3
-	dbra	d7,3b
-#else
-	lsrl	IMM (1),d3
-	btst	IMM (0),d2
-	beq	10f
-	bset	IMM (31),d3
-10:	lsrl	IMM (1),d2
-	subql	IMM (1), d7
-	bpl	3b
-#endif
-	bra	Laddsf$3
-4:
-	movew	d2,d3
-	swap	d3
-	movew	d3,d2
-	swap	d2
-#ifndef __mcoldfire__
-	subw	IMM (16),d7
-#else
-	subl	IMM (16),d7
-#endif
-	bne	2b		| if still more bits, go back to normal case
-	bra	Laddsf$3
-5:
-#ifndef __mcoldfire__
-	exg	d6,d7		| exchange the exponents
-#else
-	eorl	d6,d7
-	eorl	d7,d6
-	eorl	d6,d7
-#endif
-	subl	d6,d7		| keep the largest exponent
-	negl	d7		|
-#ifndef __mcoldfire__
-	lsrw	IMM (8),d7	| put difference in lower byte
-#else
-	lsrl	IMM (8),d7	| put difference in lower byte
-#endif
-| if difference is too large we don't shift (and exit!) '
-#ifndef __mcoldfire__
-	cmpw	IMM (FLT_MANT_DIG+2),d7		
-#else
-	cmpl	IMM (FLT_MANT_DIG+2),d7		
-#endif
-	bge	Laddsf$a$small
-#ifndef __mcoldfire__
-	cmpw	IMM (16),d7	| if difference >= 16 swap
-#else
-	cmpl	IMM (16),d7	| if difference >= 16 swap
-#endif
-	bge	8f
-6:
-#ifndef __mcoldfire__
-	subw	IMM (1),d7
-#else
-	subl	IMM (1),d7
-#endif
-7:
-#ifndef __mcoldfire__
-	lsrl	IMM (1),d0	| shift right first operand
-	roxrl	IMM (1),d1
-	dbra	d7,7b
-#else
-	lsrl	IMM (1),d1
-	btst	IMM (0),d0
-	beq	10f
-	bset	IMM (31),d1
-10:	lsrl	IMM (1),d0
-	subql	IMM (1),d7
-	bpl	7b
-#endif
-	bra	Laddsf$3
-8:
-	movew	d0,d1
-	swap	d1
-	movew	d1,d0
-	swap	d0
-#ifndef __mcoldfire__
-	subw	IMM (16),d7
-#else
-	subl	IMM (16),d7
-#endif
-	bne	6b		| if still more bits, go back to normal case
-				| otherwise we fall through
-
-| Now we have a in d0-d1, b in d2-d3, and the largest exponent in d6 (the
-| signs are stored in a0 and a1).
-
-Laddsf$3:
-| Here we have to decide whether to add or subtract the numbers
-#ifndef __mcoldfire__
-	exg	d6,a0		| get signs back
-	exg	d7,a1		| and save the exponents
-#else
-	movel	d6,d4
-	movel	a0,d6
-	movel	d4,a0
-	movel	d7,d4
-	movel	a1,d7
-	movel	d4,a1
-#endif
-	eorl	d6,d7		| combine sign bits
-	bmi	Lsubsf$0	| if negative a and b have opposite 
-				| sign so we actually subtract the
-				| numbers
-
-| Here we have both positive or both negative
-#ifndef __mcoldfire__
-	exg	d6,a0		| now we have the exponent in d6
-#else
-	movel	d6,d4
-	movel	a0,d6
-	movel	d4,a0
-#endif
-	movel	a0,d7		| and sign in d7
-	andl	IMM (0x80000000),d7
-| Here we do the addition.
-	addl	d3,d1
-	addxl	d2,d0
-| Note: now we have d2, d3, d4 and d5 to play with! 
-
-| Put the exponent, in the first byte, in d2, to use the "standard" rounding
-| routines:
-	movel	d6,d2
-#ifndef __mcoldfire__
-	lsrw	IMM (8),d2
-#else
-	lsrl	IMM (8),d2
-#endif
-
-| Before rounding normalize so bit #FLT_MANT_DIG is set (we will consider
-| the case of denormalized numbers in the rounding routine itself).
-| As in the addition (not in the subtraction!) we could have set 
-| one more bit we check this:
-	btst	IMM (FLT_MANT_DIG+1),d0	
-	beq	1f
-#ifndef __mcoldfire__
-	lsrl	IMM (1),d0
-	roxrl	IMM (1),d1
-#else
-	lsrl	IMM (1),d1
-	btst	IMM (0),d0
-	beq	10f
-	bset	IMM (31),d1
-10:	lsrl	IMM (1),d0
-#endif
-	addl	IMM (1),d2
-1:
-	lea	pc@(Laddsf$4),a0 | to return from rounding routine
-	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
-#ifdef __mcoldfire__
-	clrl	d6
-#endif
-	movew	a1@(6),d6	| rounding mode in d6
-	beq	Lround$to$nearest
-#ifndef __mcoldfire__
-	cmpw	IMM (ROUND_TO_PLUS),d6
-#else
-	cmpl	IMM (ROUND_TO_PLUS),d6
-#endif
-	bhi	Lround$to$minus
-	blt	Lround$to$zero
-	bra	Lround$to$plus
-Laddsf$4:
-| Put back the exponent, but check for overflow.
-#ifndef __mcoldfire__
-	cmpw	IMM (0xff),d2
-#else
-	cmpl	IMM (0xff),d2
-#endif
-	bhi	1f
-	bclr	IMM (FLT_MANT_DIG-1),d0
-#ifndef __mcoldfire__
-	lslw	IMM (7),d2
-#else
-	lsll	IMM (7),d2
-#endif
-	swap	d2
-	orl	d2,d0
-	bra	Laddsf$ret
-1:
-	moveq	IMM (ADD),d5
-	bra	Lf$overflow
-
-Lsubsf$0:
-| We are here if a > 0 and b < 0 (sign bits cleared).
-| Here we do the subtraction.
-	movel	d6,d7		| put sign in d7
-	andl	IMM (0x80000000),d7
-
-	subl	d3,d1		| result in d0-d1
-	subxl	d2,d0		|
-	beq	Laddsf$ret	| if zero just exit
-	bpl	1f		| if positive skip the following
-	bchg	IMM (31),d7	| change sign bit in d7
-	negl	d1
-	negxl	d0
-1:
-#ifndef __mcoldfire__
-	exg	d2,a0		| now we have the exponent in d2
-	lsrw	IMM (8),d2	| put it in the first byte
-#else
-	movel	d2,d4
-	movel	a0,d2
-	movel	d4,a0
-	lsrl	IMM (8),d2	| put it in the first byte
-#endif
-
-| Now d0-d1 is positive and the sign bit is in d7.
-
-| Note that we do not have to normalize, since in the subtraction bit
-| #FLT_MANT_DIG+1 is never set, and denormalized numbers are handled by
-| the rounding routines themselves.
-	lea	pc@(Lsubsf$1),a0 | to return from rounding routine
-	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
-#ifdef __mcoldfire__
-	clrl	d6
-#endif
-	movew	a1@(6),d6	| rounding mode in d6
-	beq	Lround$to$nearest
-#ifndef __mcoldfire__
-	cmpw	IMM (ROUND_TO_PLUS),d6
-#else
-	cmpl	IMM (ROUND_TO_PLUS),d6
-#endif
-	bhi	Lround$to$minus
-	blt	Lround$to$zero
-	bra	Lround$to$plus
-Lsubsf$1:
-| Put back the exponent (we can't have overflow!). '
-	bclr	IMM (FLT_MANT_DIG-1),d0
-#ifndef __mcoldfire__
-	lslw	IMM (7),d2
-#else
-	lsll	IMM (7),d2
-#endif
-	swap	d2
-	orl	d2,d0
-	bra	Laddsf$ret
-
-| If one of the numbers was too small (difference of exponents >= 
-| FLT_MANT_DIG+2) we return the other (and now we don't have to '
-| check for finiteness or zero).
-Laddsf$a$small:
-	movel	a6@(12),d0
-	PICLEA	SYM (_fpCCR),a0
-	movew	IMM (0),a0@
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7	| restore data registers
-#else
-	moveml	sp@,d2-d7
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6		| and return
-	rts
-
-Laddsf$b$small:
-	movel	a6@(8),d0
-	PICLEA	SYM (_fpCCR),a0
-	movew	IMM (0),a0@
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7	| restore data registers
-#else
-	moveml	sp@,d2-d7
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6		| and return
-	rts
-
-| If the numbers are denormalized remember to put exponent equal to 1.
-
-Laddsf$a$den:
-	movel	d5,d6		| d5 contains 0x01000000
-	swap	d6
-	bra	Laddsf$1
-
-Laddsf$b$den:
-	movel	d5,d7
-	swap	d7
-	notl 	d4		| make d4 into a mask for the fraction
-				| (this was not executed after the jump)
-	bra	Laddsf$2
-
-| The rest is mainly code for the different results which can be 
-| returned (checking always for +/-INFINITY and NaN).
-
-Laddsf$b:
-| Return b (if a is zero).
-	movel	a6@(12),d0
-	cmpl	IMM (0x80000000),d0	| Check if b is -0
-	bne	1f
-	movel	a0,d7
-	andl	IMM (0x80000000),d7	| Use the sign of a
-	clrl	d0
-	bra	Laddsf$ret
-Laddsf$a:
-| Return a (if b is zero).
-	movel	a6@(8),d0
-1:
-	moveq	IMM (ADD),d5
-| We have to check for NaN and +/-infty.
-	movel	d0,d7
-	andl	IMM (0x80000000),d7	| put sign in d7
-	bclr	IMM (31),d0		| clear sign
-	cmpl	IMM (INFINITY),d0	| check for infty or NaN
-	bge	2f
-	movel	d0,d0		| check for zero (we do this because we don't '
-	bne	Laddsf$ret	| want to return -0 by mistake
-	bclr	IMM (31),d7	| if zero be sure to clear sign
-	bra	Laddsf$ret	| if everything OK just return
-2:
-| The value to be returned is either +/-infty or NaN
-	andl	IMM (0x007fffff),d0	| check for NaN
-	bne	Lf$inop			| if mantissa not zero is NaN
-	bra	Lf$infty
-
-Laddsf$ret:
-| Normal exit (a and b nonzero, result is not NaN nor +/-infty).
-| We have to clear the exception flags (just the exception type).
-	PICLEA	SYM (_fpCCR),a0
-	movew	IMM (0),a0@
-	orl	d7,d0		| put sign bit
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7	| restore data registers
-#else
-	moveml	sp@,d2-d7
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6		| and return
-	rts
-
-Laddsf$ret$den:
-| Return a denormalized number (for addition we don't signal underflow) '
-	lsrl	IMM (1),d0	| remember to shift right back once
-	bra	Laddsf$ret	| and return
-
-| Note: when adding two floats of the same sign if either one is 
-| NaN we return NaN without regard to whether the other is finite or 
-| not. When subtracting them (i.e., when adding two numbers of 
-| opposite signs) things are more complicated: if both are INFINITY 
-| we return NaN, if only one is INFINITY and the other is NaN we return
-| NaN, but if it is finite we return INFINITY with the corresponding sign.
-
-Laddsf$nf:
-	moveq	IMM (ADD),d5
-| This could be faster but it is not worth the effort, since it is not
-| executed very often. We sacrifice speed for clarity here.
-	movel	a6@(8),d0	| get the numbers back (remember that we
-	movel	a6@(12),d1	| did some processing already)
-	movel	IMM (INFINITY),d4 | useful constant (INFINITY)
-	movel	d0,d2		| save sign bits
-	movel	d1,d3
-	bclr	IMM (31),d0	| clear sign bits
-	bclr	IMM (31),d1
-| We know that one of them is either NaN of +/-INFINITY
-| Check for NaN (if either one is NaN return NaN)
-	cmpl	d4,d0		| check first a (d0)
-	bhi	Lf$inop		
-	cmpl	d4,d1		| check now b (d1)
-	bhi	Lf$inop		
-| Now comes the check for +/-INFINITY. We know that both are (maybe not
-| finite) numbers, but we have to check if both are infinite whether we
-| are adding or subtracting them.
-	eorl	d3,d2		| to check sign bits
-	bmi	1f
-	movel	d0,d7
-	andl	IMM (0x80000000),d7	| get (common) sign bit
-	bra	Lf$infty
-1:
-| We know one (or both) are infinite, so we test for equality between the
-| two numbers (if they are equal they have to be infinite both, so we
-| return NaN).
-	cmpl	d1,d0		| are both infinite?
-	beq	Lf$inop		| if so return NaN
-
-	movel	d0,d7
-	andl	IMM (0x80000000),d7 | get a's sign bit '
-	cmpl	d4,d0		| test now for infinity
-	beq	Lf$infty	| if a is INFINITY return with this sign
-	bchg	IMM (31),d7	| else we know b is INFINITY and has
-	bra	Lf$infty	| the opposite sign
-
-|=============================================================================
-|                             __mulsf3
-|=============================================================================
-
-| float __mulsf3(float, float);
-	FUNC(__mulsf3)
-SYM (__mulsf3):
-#ifndef __mcoldfire__
-	link	a6,IMM (0)
-	moveml	d2-d7,sp@-
-#else
-	link	a6,IMM (-24)
-	moveml	d2-d7,sp@
-#endif
-	movel	a6@(8),d0	| get a into d0
-	movel	a6@(12),d1	| and b into d1
-	movel	d0,d7		| d7 will hold the sign of the product
-	eorl	d1,d7		|
-	andl	IMM (0x80000000),d7
-	movel	IMM (INFINITY),d6	| useful constant (+INFINITY)
-	movel	d6,d5			| another (mask for fraction)
-	notl	d5			|
-	movel	IMM (0x00800000),d4	| this is to put hidden bit back
-	bclr	IMM (31),d0		| get rid of a's sign bit '
-	movel	d0,d2			|
-	beq	Lmulsf$a$0		| branch if a is zero
-	bclr	IMM (31),d1		| get rid of b's sign bit '
-	movel	d1,d3		|
-	beq	Lmulsf$b$0	| branch if b is zero
-	cmpl	d6,d0		| is a big?
-	bhi	Lmulsf$inop	| if a is NaN return NaN
-	beq	Lmulsf$inf	| if a is INFINITY we have to check b
-	cmpl	d6,d1		| now compare b with INFINITY
-	bhi	Lmulsf$inop	| is b NaN?
-	beq	Lmulsf$overflow | is b INFINITY?
-| Here we have both numbers finite and nonzero (and with no sign bit).
-| Now we get the exponents into d2 and d3.
-	andl	d6,d2		| and isolate exponent in d2
-	beq	Lmulsf$a$den	| if exponent is zero we have a denormalized
-	andl	d5,d0		| and isolate fraction
-	orl	d4,d0		| and put hidden bit back
-	swap	d2		| I like exponents in the first byte
-#ifndef __mcoldfire__
-	lsrw	IMM (7),d2	| 
-#else
-	lsrl	IMM (7),d2	| 
-#endif
-Lmulsf$1:			| number
-	andl	d6,d3		|
-	beq	Lmulsf$b$den	|
-	andl	d5,d1		|
-	orl	d4,d1		|
-	swap	d3		|
-#ifndef __mcoldfire__
-	lsrw	IMM (7),d3	|
-#else
-	lsrl	IMM (7),d3	|
-#endif
-Lmulsf$2:			|
-#ifndef __mcoldfire__
-	addw	d3,d2		| add exponents
-	subw	IMM (F_BIAS+1),d2 | and subtract bias (plus one)
-#else
-	addl	d3,d2		| add exponents
-	subl	IMM (F_BIAS+1),d2 | and subtract bias (plus one)
-#endif
-
-| We are now ready to do the multiplication. The situation is as follows:
-| both a and b have bit FLT_MANT_DIG-1 set (even if they were 
-| denormalized to start with!), which means that in the product 
-| bit 2*(FLT_MANT_DIG-1) (that is, bit 2*FLT_MANT_DIG-2-32 of the 
-| high long) is set. 
-
-| To do the multiplication let us move the number a little bit around ...
-	movel	d1,d6		| second operand in d6
-	movel	d0,d5		| first operand in d4-d5
-	movel	IMM (0),d4
-	movel	d4,d1		| the sums will go in d0-d1
-	movel	d4,d0
-
-| now bit FLT_MANT_DIG-1 becomes bit 31:
-	lsll	IMM (31-FLT_MANT_DIG+1),d6		
-
-| Start the loop (we loop #FLT_MANT_DIG times):
-	moveq	IMM (FLT_MANT_DIG-1),d3	
-1:	addl	d1,d1		| shift sum 
-	addxl	d0,d0
-	lsll	IMM (1),d6	| get bit bn
-	bcc	2f		| if not set skip sum
-	addl	d5,d1		| add a
-	addxl	d4,d0
-2:
-#ifndef __mcoldfire__
-	dbf	d3,1b		| loop back
-#else
-	subql	IMM (1),d3
-	bpl	1b
-#endif
-
-| Now we have the product in d0-d1, with bit (FLT_MANT_DIG - 1) + FLT_MANT_DIG
-| (mod 32) of d0 set. The first thing to do now is to normalize it so bit 
-| FLT_MANT_DIG is set (to do the rounding).
-#ifndef __mcoldfire__
-	rorl	IMM (6),d1
-	swap	d1
-	movew	d1,d3
-	andw	IMM (0x03ff),d3
-	andw	IMM (0xfd00),d1
-#else
-	movel	d1,d3
-	lsll	IMM (8),d1
-	addl	d1,d1
-	addl	d1,d1
-	moveq	IMM (22),d5
-	lsrl	d5,d3
-	orl	d3,d1
-	andl	IMM (0xfffffd00),d1
-#endif
-	lsll	IMM (8),d0
-	addl	d0,d0
-	addl	d0,d0
-#ifndef __mcoldfire__
-	orw	d3,d0
-#else
-	orl	d3,d0
-#endif
-
-	moveq	IMM (MULTIPLY),d5
-	
-	btst	IMM (FLT_MANT_DIG+1),d0
-	beq	Lround$exit
-#ifndef __mcoldfire__
-	lsrl	IMM (1),d0
-	roxrl	IMM (1),d1
-	addw	IMM (1),d2
-#else
-	lsrl	IMM (1),d1
-	btst	IMM (0),d0
-	beq	10f
-	bset	IMM (31),d1
-10:	lsrl	IMM (1),d0
-	addql	IMM (1),d2
-#endif
-	bra	Lround$exit
-
-Lmulsf$inop:
-	moveq	IMM (MULTIPLY),d5
-	bra	Lf$inop
-
-Lmulsf$overflow:
-	moveq	IMM (MULTIPLY),d5
-	bra	Lf$overflow
-
-Lmulsf$inf:
-	moveq	IMM (MULTIPLY),d5
-| If either is NaN return NaN; else both are (maybe infinite) numbers, so
-| return INFINITY with the correct sign (which is in d7).
-	cmpl	d6,d1		| is b NaN?
-	bhi	Lf$inop		| if so return NaN
-	bra	Lf$overflow	| else return +/-INFINITY
-
-| If either number is zero return zero, unless the other is +/-INFINITY, 
-| or NaN, in which case we return NaN.
-Lmulsf$b$0:
-| Here d1 (==b) is zero.
-	movel	a6@(8),d1	| get a again to check for non-finiteness
-	bra	1f
-Lmulsf$a$0:
-	movel	a6@(12),d1	| get b again to check for non-finiteness
-1:	bclr	IMM (31),d1	| clear sign bit 
-	cmpl	IMM (INFINITY),d1 | and check for a large exponent
-	bge	Lf$inop		| if b is +/-INFINITY or NaN return NaN
-	movel	d7,d0		| else return signed zero
-	PICLEA	SYM (_fpCCR),a0	|
-	movew	IMM (0),a0@	| 
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7	| 
-#else
-	moveml	sp@,d2-d7
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6		| 
-	rts			| 
-
-| If a number is denormalized we put an exponent of 1 but do not put the 
-| hidden bit back into the fraction; instead we shift left until bit 23
-| (the hidden bit) is set, adjusting the exponent accordingly. We do this
-| to ensure that the product of the fractions is close to 1.
-Lmulsf$a$den:
-	movel	IMM (1),d2
-	andl	d5,d0
-1:	addl	d0,d0		| shift a left (until bit 23 is set)
-#ifndef __mcoldfire__
-	subw	IMM (1),d2	| and adjust exponent
-#else
-	subql	IMM (1),d2	| and adjust exponent
-#endif
-	btst	IMM (FLT_MANT_DIG-1),d0
-	bne	Lmulsf$1	|
-	bra	1b		| else loop back
-
-Lmulsf$b$den:
-	movel	IMM (1),d3
-	andl	d5,d1
-1:	addl	d1,d1		| shift b left until bit 23 is set
-#ifndef __mcoldfire__
-	subw	IMM (1),d3	| and adjust exponent
-#else
-	subql	IMM (1),d3	| and adjust exponent
-#endif
-	btst	IMM (FLT_MANT_DIG-1),d1
-	bne	Lmulsf$2	|
-	bra	1b		| else loop back
-
-|=============================================================================
-|                             __divsf3
-|=============================================================================
-
-| float __divsf3(float, float);
-	FUNC(__divsf3)
-SYM (__divsf3):
-#ifndef __mcoldfire__
-	link	a6,IMM (0)
-	moveml	d2-d7,sp@-
-#else
-	link	a6,IMM (-24)
-	moveml	d2-d7,sp@
-#endif
-	movel	a6@(8),d0		| get a into d0
-	movel	a6@(12),d1		| and b into d1
-	movel	d0,d7			| d7 will hold the sign of the result
-	eorl	d1,d7			|
-	andl	IMM (0x80000000),d7	| 
-	movel	IMM (INFINITY),d6	| useful constant (+INFINITY)
-	movel	d6,d5			| another (mask for fraction)
-	notl	d5			|
-	movel	IMM (0x00800000),d4	| this is to put hidden bit back
-	bclr	IMM (31),d0		| get rid of a's sign bit '
-	movel	d0,d2			|
-	beq	Ldivsf$a$0		| branch if a is zero
-	bclr	IMM (31),d1		| get rid of b's sign bit '
-	movel	d1,d3			|
-	beq	Ldivsf$b$0		| branch if b is zero
-	cmpl	d6,d0			| is a big?
-	bhi	Ldivsf$inop		| if a is NaN return NaN
-	beq	Ldivsf$inf		| if a is INFINITY we have to check b
-	cmpl	d6,d1			| now compare b with INFINITY 
-	bhi	Ldivsf$inop		| if b is NaN return NaN
-	beq	Ldivsf$underflow
-| Here we have both numbers finite and nonzero (and with no sign bit).
-| Now we get the exponents into d2 and d3 and normalize the numbers to
-| ensure that the ratio of the fractions is close to 1. We do this by
-| making sure that bit #FLT_MANT_DIG-1 (hidden bit) is set.
-	andl	d6,d2		| and isolate exponent in d2
-	beq	Ldivsf$a$den	| if exponent is zero we have a denormalized
-	andl	d5,d0		| and isolate fraction
-	orl	d4,d0		| and put hidden bit back
-	swap	d2		| I like exponents in the first byte
-#ifndef __mcoldfire__
-	lsrw	IMM (7),d2	| 
-#else
-	lsrl	IMM (7),d2	| 
-#endif
-Ldivsf$1:			| 
-	andl	d6,d3		|
-	beq	Ldivsf$b$den	|
-	andl	d5,d1		|
-	orl	d4,d1		|
-	swap	d3		|
-#ifndef __mcoldfire__
-	lsrw	IMM (7),d3	|
-#else
-	lsrl	IMM (7),d3	|
-#endif
-Ldivsf$2:			|
-#ifndef __mcoldfire__
-	subw	d3,d2		| subtract exponents
- 	addw	IMM (F_BIAS),d2	| and add bias
-#else
-	subl	d3,d2		| subtract exponents
- 	addl	IMM (F_BIAS),d2	| and add bias
-#endif
- 
-| We are now ready to do the division. We have prepared things in such a way
-| that the ratio of the fractions will be less than 2 but greater than 1/2.
-| At this point the registers in use are:
-| d0	holds a (first operand, bit FLT_MANT_DIG=0, bit FLT_MANT_DIG-1=1)
-| d1	holds b (second operand, bit FLT_MANT_DIG=1)
-| d2	holds the difference of the exponents, corrected by the bias
-| d7	holds the sign of the ratio
-| d4, d5, d6 hold some constants
-	movel	d7,a0		| d6-d7 will hold the ratio of the fractions
-	movel	IMM (0),d6	| 
-	movel	d6,d7
-
-	moveq	IMM (FLT_MANT_DIG+1),d3
-1:	cmpl	d0,d1		| is a < b?
-	bhi	2f		|
-	bset	d3,d6		| set a bit in d6
-	subl	d1,d0		| if a >= b  a <-- a-b
-	beq	3f		| if a is zero, exit
-2:	addl	d0,d0		| multiply a by 2
-#ifndef __mcoldfire__
-	dbra	d3,1b
-#else
-	subql	IMM (1),d3
-	bpl	1b
-#endif
-
-| Now we keep going to set the sticky bit ...
-	moveq	IMM (FLT_MANT_DIG),d3
-1:	cmpl	d0,d1
-	ble	2f
-	addl	d0,d0
-#ifndef __mcoldfire__
-	dbra	d3,1b
-#else
-	subql	IMM(1),d3
-	bpl	1b
-#endif
-	movel	IMM (0),d1
-	bra	3f
-2:	movel	IMM (0),d1
-#ifndef __mcoldfire__
-	subw	IMM (FLT_MANT_DIG),d3
-	addw	IMM (31),d3
-#else
-	subl	IMM (FLT_MANT_DIG),d3
-	addl	IMM (31),d3
-#endif
-	bset	d3,d1
-3:
-	movel	d6,d0		| put the ratio in d0-d1
-	movel	a0,d7		| get sign back
-
-| Because of the normalization we did before we are guaranteed that 
-| d0 is smaller than 2^26 but larger than 2^24. Thus bit 26 is not set,
-| bit 25 could be set, and if it is not set then bit 24 is necessarily set.
-	btst	IMM (FLT_MANT_DIG+1),d0		
-	beq	1f              | if it is not set, then bit 24 is set
-	lsrl	IMM (1),d0	|
-#ifndef __mcoldfire__
-	addw	IMM (1),d2	|
-#else
-	addl	IMM (1),d2	|
-#endif
-1:
-| Now round, check for over- and underflow, and exit.
-	moveq	IMM (DIVIDE),d5
-	bra	Lround$exit
-
-Ldivsf$inop:
-	moveq	IMM (DIVIDE),d5
-	bra	Lf$inop
-
-Ldivsf$overflow:
-	moveq	IMM (DIVIDE),d5
-	bra	Lf$overflow
-
-Ldivsf$underflow:
-	moveq	IMM (DIVIDE),d5
-	bra	Lf$underflow
-
-Ldivsf$a$0:
-	moveq	IMM (DIVIDE),d5
-| If a is zero check to see whether b is zero also. In that case return
-| NaN; then check if b is NaN, and return NaN also in that case. Else
-| return a properly signed zero.
-	andl	IMM (0x7fffffff),d1	| clear sign bit and test b
-	beq	Lf$inop			| if b is also zero return NaN
-	cmpl	IMM (INFINITY),d1	| check for NaN
-	bhi	Lf$inop			| 
-	movel	d7,d0			| else return signed zero
-	PICLEA	SYM (_fpCCR),a0		|
-	movew	IMM (0),a0@		|
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7		| 
-#else
-	moveml	sp@,d2-d7		| 
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6			| 
-	rts				| 
-	
-Ldivsf$b$0:
-	moveq	IMM (DIVIDE),d5
-| If we got here a is not zero. Check if a is NaN; in that case return NaN,
-| else return +/-INFINITY. Remember that a is in d0 with the sign bit 
-| cleared already.
-	cmpl	IMM (INFINITY),d0	| compare d0 with INFINITY
-	bhi	Lf$inop			| if larger it is NaN
-	bra	Lf$div$0		| else signal DIVIDE_BY_ZERO
-
-Ldivsf$inf:
-	moveq	IMM (DIVIDE),d5
-| If a is INFINITY we have to check b
-	cmpl	IMM (INFINITY),d1	| compare b with INFINITY 
-	bge	Lf$inop			| if b is NaN or INFINITY return NaN
-	bra	Lf$overflow		| else return overflow
-
-| If a number is denormalized we put an exponent of 1 but do not put the 
-| bit back into the fraction.
-Ldivsf$a$den:
-	movel	IMM (1),d2
-	andl	d5,d0
-1:	addl	d0,d0		| shift a left until bit FLT_MANT_DIG-1 is set
-#ifndef __mcoldfire__
-	subw	IMM (1),d2	| and adjust exponent
-#else
-	subl	IMM (1),d2	| and adjust exponent
-#endif
-	btst	IMM (FLT_MANT_DIG-1),d0
-	bne	Ldivsf$1
-	bra	1b
-
-Ldivsf$b$den:
-	movel	IMM (1),d3
-	andl	d5,d1
-1:	addl	d1,d1		| shift b left until bit FLT_MANT_DIG is set
-#ifndef __mcoldfire__
-	subw	IMM (1),d3	| and adjust exponent
-#else
-	subl	IMM (1),d3	| and adjust exponent
-#endif
-	btst	IMM (FLT_MANT_DIG-1),d1
-	bne	Ldivsf$2
-	bra	1b
-
-Lround$exit:
-| This is a common exit point for __mulsf3 and __divsf3. 
-
-| First check for underlow in the exponent:
-#ifndef __mcoldfire__
-	cmpw	IMM (-FLT_MANT_DIG-1),d2		
-#else
-	cmpl	IMM (-FLT_MANT_DIG-1),d2		
-#endif
-	blt	Lf$underflow	
-| It could happen that the exponent is less than 1, in which case the 
-| number is denormalized. In this case we shift right and adjust the 
-| exponent until it becomes 1 or the fraction is zero (in the latter case 
-| we signal underflow and return zero).
-	movel	IMM (0),d6	| d6 is used temporarily
-#ifndef __mcoldfire__
-	cmpw	IMM (1),d2	| if the exponent is less than 1 we 
-#else
-	cmpl	IMM (1),d2	| if the exponent is less than 1 we 
-#endif
-	bge	2f		| have to shift right (denormalize)
-1:
-#ifndef __mcoldfire__
-	addw	IMM (1),d2	| adjust the exponent
-	lsrl	IMM (1),d0	| shift right once 
-	roxrl	IMM (1),d1	|
-	roxrl	IMM (1),d6	| d6 collect bits we would lose otherwise
-	cmpw	IMM (1),d2	| is the exponent 1 already?
-#else
-	addql	IMM (1),d2	| adjust the exponent
-	lsrl	IMM (1),d6
-	btst	IMM (0),d1
-	beq	11f
-	bset	IMM (31),d6
-11:	lsrl	IMM (1),d1
-	btst	IMM (0),d0
-	beq	10f
-	bset	IMM (31),d1
-10:	lsrl	IMM (1),d0
-	cmpl	IMM (1),d2	| is the exponent 1 already?
-#endif
-	beq	2f		| if not loop back
-	bra	1b              |
-	bra	Lf$underflow	| safety check, shouldn't execute '
-2:	orl	d6,d1		| this is a trick so we don't lose  '
-				| the extra bits which were flushed right
-| Now call the rounding routine (which takes care of denormalized numbers):
-	lea	pc@(Lround$0),a0 | to return from rounding routine
-	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
-#ifdef __mcoldfire__
-	clrl	d6
-#endif
-	movew	a1@(6),d6	| rounding mode in d6
-	beq	Lround$to$nearest
-#ifndef __mcoldfire__
-	cmpw	IMM (ROUND_TO_PLUS),d6
-#else
-	cmpl	IMM (ROUND_TO_PLUS),d6
-#endif
-	bhi	Lround$to$minus
-	blt	Lround$to$zero
-	bra	Lround$to$plus
-Lround$0:
-| Here we have a correctly rounded result (either normalized or denormalized).
-
-| Here we should have either a normalized number or a denormalized one, and
-| the exponent is necessarily larger or equal to 1 (so we don't have to  '
-| check again for underflow!). We have to check for overflow or for a 
-| denormalized number (which also signals underflow).
-| Check for overflow (i.e., exponent >= 255).
-#ifndef __mcoldfire__
-	cmpw	IMM (0x00ff),d2
-#else
-	cmpl	IMM (0x00ff),d2
-#endif
-	bge	Lf$overflow
-| Now check for a denormalized number (exponent==0).
-	movew	d2,d2
-	beq	Lf$den
-1:
-| Put back the exponents and sign and return.
-#ifndef __mcoldfire__
-	lslw	IMM (7),d2	| exponent back to fourth byte
-#else
-	lsll	IMM (7),d2	| exponent back to fourth byte
-#endif
-	bclr	IMM (FLT_MANT_DIG-1),d0
-	swap	d0		| and put back exponent
-#ifndef __mcoldfire__
-	orw	d2,d0		| 
-#else
-	orl	d2,d0
-#endif
-	swap	d0		|
-	orl	d7,d0		| and sign also
-
-	PICLEA	SYM (_fpCCR),a0
-	movew	IMM (0),a0@
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7
-#else
-	moveml	sp@,d2-d7
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6
-	rts
-
-|=============================================================================
-|                             __negsf2
-|=============================================================================
-
-| This is trivial and could be shorter if we didn't bother checking for NaN '
-| and +/-INFINITY.
-
-| float __negsf2(float);
-	FUNC(__negsf2)
-SYM (__negsf2):
-#ifndef __mcoldfire__
-	link	a6,IMM (0)
-	moveml	d2-d7,sp@-
-#else
-	link	a6,IMM (-24)
-	moveml	d2-d7,sp@
-#endif
-	moveq	IMM (NEGATE),d5
-	movel	a6@(8),d0	| get number to negate in d0
-	bchg	IMM (31),d0	| negate
-	movel	d0,d1		| make a positive copy
-	bclr	IMM (31),d1	|
-	tstl	d1		| check for zero
-	beq	2f		| if zero (either sign) return +zero
-	cmpl	IMM (INFINITY),d1 | compare to +INFINITY
-	blt	1f		|
-	bhi	Lf$inop		| if larger (fraction not zero) is NaN
-	movel	d0,d7		| else get sign and return INFINITY
-	andl	IMM (0x80000000),d7
-	bra	Lf$infty		
-1:	PICLEA	SYM (_fpCCR),a0
-	movew	IMM (0),a0@
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7
-#else
-	moveml	sp@,d2-d7
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6
-	rts
-2:	bclr	IMM (31),d0
-	bra	1b
-
-|=============================================================================
-|                             __cmpsf2
-|=============================================================================
-
-GREATER =  1
-LESS    = -1
-EQUAL   =  0
-
-| int __cmpsf2_internal(float, float, int);
-SYM (__cmpsf2_internal):
-#ifndef __mcoldfire__
-	link	a6,IMM (0)
-	moveml	d2-d7,sp@- 	| save registers
-#else
-	link	a6,IMM (-24)
-	moveml	d2-d7,sp@
-#endif
-	moveq	IMM (COMPARE),d5
-	movel	a6@(8),d0	| get first operand
-	movel	a6@(12),d1	| get second operand
-| Check if either is NaN, and in that case return garbage and signal
-| INVALID_OPERATION. Check also if either is zero, and clear the signs
-| if necessary.
-	movel	d0,d6
-	andl	IMM (0x7fffffff),d0
-	beq	Lcmpsf$a$0
-	cmpl	IMM (0x7f800000),d0
-	bhi	Lcmpf$inop
-Lcmpsf$1:
-	movel	d1,d7
-	andl	IMM (0x7fffffff),d1
-	beq	Lcmpsf$b$0
-	cmpl	IMM (0x7f800000),d1
-	bhi	Lcmpf$inop
-Lcmpsf$2:
-| Check the signs
-	eorl	d6,d7
-	bpl	1f
-| If the signs are not equal check if a >= 0
-	tstl	d6
-	bpl	Lcmpsf$a$gt$b	| if (a >= 0 && b < 0) => a > b
-	bmi	Lcmpsf$b$gt$a	| if (a < 0 && b >= 0) => a < b
-1:
-| If the signs are equal check for < 0
-	tstl	d6
-	bpl	1f
-| If both are negative exchange them
-#ifndef __mcoldfire__
-	exg	d0,d1
-#else
-	movel	d0,d7
-	movel	d1,d0
-	movel	d7,d1
-#endif
-1:
-| Now that they are positive we just compare them as longs (does this also
-| work for denormalized numbers?).
-	cmpl	d0,d1
-	bhi	Lcmpsf$b$gt$a	| |b| > |a|
-	bne	Lcmpsf$a$gt$b	| |b| < |a|
-| If we got here a == b.
-	movel	IMM (EQUAL),d0
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7 	| put back the registers
-#else
-	moveml	sp@,d2-d7
-#endif
-	unlk	a6
-	rts
-Lcmpsf$a$gt$b:
-	movel	IMM (GREATER),d0
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7 	| put back the registers
-#else
-	moveml	sp@,d2-d7
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6
-	rts
-Lcmpsf$b$gt$a:
-	movel	IMM (LESS),d0
-#ifndef __mcoldfire__
-	moveml	sp@+,d2-d7 	| put back the registers
-#else
-	moveml	sp@,d2-d7
-	| XXX if frame pointer is ever removed, stack pointer must
-	| be adjusted here.
-#endif
-	unlk	a6
-	rts
-
-Lcmpsf$a$0:	
-	bclr	IMM (31),d6
-	bra	Lcmpsf$1
-Lcmpsf$b$0:
-	bclr	IMM (31),d7
-	bra	Lcmpsf$2
-
-Lcmpf$inop:
-	movl	a6@(16),d0
-	moveq	IMM (INEXACT_RESULT+INVALID_OPERATION),d7
-	moveq	IMM (SINGLE_FLOAT),d6
-	PICJUMP	$_exception_handler
-
-| int __cmpsf2(float, float);
-	FUNC(__cmpsf2)
-SYM (__cmpsf2):
-	link	a6,IMM (0)
-	pea	1
-	movl	a6@(12),sp@-
-	movl	a6@(8),sp@-
-	PICCALL SYM (__cmpsf2_internal)
-	unlk	a6
-	rts
-
-|=============================================================================
-|                           rounding routines
-|=============================================================================
-
-| The rounding routines expect the number to be normalized in registers
-| d0-d1, with the exponent in register d2. They assume that the 
-| exponent is larger or equal to 1. They return a properly normalized number
-| if possible, and a denormalized number otherwise. The exponent is returned
-| in d2.
-
-Lround$to$nearest:
-| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"):
-| Here we assume that the exponent is not too small (this should be checked
-| before entering the rounding routine), but the number could be denormalized.
-
-| Check for denormalized numbers:
-1:	btst	IMM (FLT_MANT_DIG),d0
-	bne	2f		| if set the number is normalized
-| Normalize shifting left until bit #FLT_MANT_DIG is set or the exponent 
-| is one (remember that a denormalized number corresponds to an 
-| exponent of -F_BIAS+1).
-#ifndef __mcoldfire__
-	cmpw	IMM (1),d2	| remember that the exponent is at least one
-#else
-	cmpl	IMM (1),d2	| remember that the exponent is at least one
-#endif
- 	beq	2f		| an exponent of one means denormalized
-	addl	d1,d1		| else shift and adjust the exponent
-	addxl	d0,d0		|
-#ifndef __mcoldfire__
-	dbra	d2,1b		|
-#else
-	subql	IMM (1),d2
-	bpl	1b
-#endif
-2:
-| Now round: we do it as follows: after the shifting we can write the
-| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2.
-| If delta < 1, do nothing. If delta > 1, add 1 to f. 
-| If delta == 1, we make sure the rounded number will be even (odd?) 
-| (after shifting).
-	btst	IMM (0),d0	| is delta < 1?
-	beq	2f		| if so, do not do anything
-	tstl	d1		| is delta == 1?
-	bne	1f		| if so round to even
-	movel	d0,d1		| 
-	andl	IMM (2),d1	| bit 1 is the last significant bit
-	addl	d1,d0		| 
-	bra	2f		| 
-1:	movel	IMM (1),d1	| else add 1 
-	addl	d1,d0		|
-| Shift right once (because we used bit #FLT_MANT_DIG!).
-2:	lsrl	IMM (1),d0		
-| Now check again bit #FLT_MANT_DIG (rounding could have produced a
-| 'fraction overflow' ...).
-	btst	IMM (FLT_MANT_DIG),d0	
-	beq	1f
-	lsrl	IMM (1),d0
-#ifndef __mcoldfire__
-	addw	IMM (1),d2
-#else
-	addql	IMM (1),d2
-#endif
-1:
-| If bit #FLT_MANT_DIG-1 is clear we have a denormalized number, so we 
-| have to put the exponent to zero and return a denormalized number.
-	btst	IMM (FLT_MANT_DIG-1),d0
-	beq	1f
-	jmp	a0@
-1:	movel	IMM (0),d2
-	jmp	a0@
-
-Lround$to$zero:
-Lround$to$plus:
-Lround$to$minus:
-	jmp	a0@
-#endif /* L_float */
-
-| gcc expects the routines __eqdf2, __nedf2, __gtdf2, __gedf2,
-| __ledf2, __ltdf2 to all return the same value as a direct call to
-| __cmpdf2 would.  In this implementation, each of these routines
-| simply calls __cmpdf2.  It would be more efficient to give the
-| __cmpdf2 routine several names, but separating them out will make it
-| easier to write efficient versions of these routines someday.
-| If the operands recompare unordered unordered __gtdf2 and __gedf2 return -1.
-| The other routines return 1.
-
-#ifdef  L_eqdf2
-	.text
-	FUNC(__eqdf2)
-	.globl	SYM (__eqdf2)
-SYM (__eqdf2):
-	link	a6,IMM (0)
-	pea	1
-	movl	a6@(20),sp@-
-	movl	a6@(16),sp@-
-	movl	a6@(12),sp@-
-	movl	a6@(8),sp@-
-	PICCALL	SYM (__cmpdf2_internal)
-	unlk	a6
-	rts
-#endif /* L_eqdf2 */
-
-#ifdef  L_nedf2
-	.text
-	FUNC(__nedf2)
-	.globl	SYM (__nedf2)
-SYM (__nedf2):
-	link	a6,IMM (0)
-	pea	1
-	movl	a6@(20),sp@-
-	movl	a6@(16),sp@-
-	movl	a6@(12),sp@-
-	movl	a6@(8),sp@-
-	PICCALL	SYM (__cmpdf2_internal)
-	unlk	a6
-	rts
-#endif /* L_nedf2 */
-
-#ifdef  L_gtdf2
-	.text
-	FUNC(__gtdf2)
-	.globl	SYM (__gtdf2)
-SYM (__gtdf2):
-	link	a6,IMM (0)
-	pea	-1
-	movl	a6@(20),sp@-
-	movl	a6@(16),sp@-
-	movl	a6@(12),sp@-
-	movl	a6@(8),sp@-
-	PICCALL	SYM (__cmpdf2_internal)
-	unlk	a6
-	rts
-#endif /* L_gtdf2 */
-
-#ifdef  L_gedf2
-	.text
-	FUNC(__gedf2)
-	.globl	SYM (__gedf2)
-SYM (__gedf2):
-	link	a6,IMM (0)
-	pea	-1
-	movl	a6@(20),sp@-
-	movl	a6@(16),sp@-
-	movl	a6@(12),sp@-
-	movl	a6@(8),sp@-
-	PICCALL	SYM (__cmpdf2_internal)
-	unlk	a6
-	rts
-#endif /* L_gedf2 */
-
-#ifdef  L_ltdf2
-	.text
-	FUNC(__ltdf2)
-	.globl	SYM (__ltdf2)
-SYM (__ltdf2):
-	link	a6,IMM (0)
-	pea	1
-	movl	a6@(20),sp@-
-	movl	a6@(16),sp@-
-	movl	a6@(12),sp@-
-	movl	a6@(8),sp@-
-	PICCALL	SYM (__cmpdf2_internal)
-	unlk	a6
-	rts
-#endif /* L_ltdf2 */
-
-#ifdef  L_ledf2
-	.text
-	FUNC(__ledf2)
-	.globl	SYM (__ledf2)
-SYM (__ledf2):
-	link	a6,IMM (0)
-	pea	1
-	movl	a6@(20),sp@-
-	movl	a6@(16),sp@-
-	movl	a6@(12),sp@-
-	movl	a6@(8),sp@-
-	PICCALL	SYM (__cmpdf2_internal)
-	unlk	a6
-	rts
-#endif /* L_ledf2 */
-
-| The comments above about __eqdf2, et. al., also apply to __eqsf2,
-| et. al., except that the latter call __cmpsf2 rather than __cmpdf2.
-
-#ifdef  L_eqsf2
-	.text
-	FUNC(__eqsf2)
-	.globl	SYM (__eqsf2)
-SYM (__eqsf2):
-	link	a6,IMM (0)
-	pea	1
-	movl	a6@(12),sp@-
-	movl	a6@(8),sp@-
-	PICCALL	SYM (__cmpsf2_internal)
-	unlk	a6
-	rts
-#endif /* L_eqsf2 */
-
-#ifdef  L_nesf2
-	.text
-	FUNC(__nesf2)
-	.globl	SYM (__nesf2)
-SYM (__nesf2):
-	link	a6,IMM (0)
-	pea	1
-	movl	a6@(12),sp@-
-	movl	a6@(8),sp@-
-	PICCALL	SYM (__cmpsf2_internal)
-	unlk	a6
-	rts
-#endif /* L_nesf2 */
-
-#ifdef  L_gtsf2
-	.text
-	FUNC(__gtsf2)
-	.globl	SYM (__gtsf2)
-SYM (__gtsf2):
-	link	a6,IMM (0)
-	pea	-1
-	movl	a6@(12),sp@-
-	movl	a6@(8),sp@-
-	PICCALL	SYM (__cmpsf2_internal)
-	unlk	a6
-	rts
-#endif /* L_gtsf2 */
-
-#ifdef  L_gesf2
-	.text
-	FUNC(__gesf2)
-	.globl	SYM (__gesf2)
-SYM (__gesf2):
-	link	a6,IMM (0)
-	pea	-1
-	movl	a6@(12),sp@-
-	movl	a6@(8),sp@-
-	PICCALL	SYM (__cmpsf2_internal)
-	unlk	a6
-	rts
-#endif /* L_gesf2 */
-
-#ifdef  L_ltsf2
-	.text
-	FUNC(__ltsf2)
-	.globl	SYM (__ltsf2)
-SYM (__ltsf2):
-	link	a6,IMM (0)
-	pea	1
-	movl	a6@(12),sp@-
-	movl	a6@(8),sp@-
-	PICCALL	SYM (__cmpsf2_internal)
-	unlk	a6
-	rts
-#endif /* L_ltsf2 */
-
-#ifdef  L_lesf2
-	.text
-	FUNC(__lesf2)
-	.globl	SYM (__lesf2)
-SYM (__lesf2):
-	link	a6,IMM (0)
-	pea	1
-	movl	a6@(12),sp@-
-	movl	a6@(8),sp@-
-	PICCALL	SYM (__cmpsf2_internal)
-	unlk	a6
-	rts
-#endif /* L_lesf2 */
-
-#if defined (__ELF__) && defined (__linux__)
-	/* Make stack non-executable for ELF linux targets.  */
-	.section	.note.GNU-stack,"",@progbits
-#endif
diff --git a/gcc/config/m68k/t-floatlib b/gcc/config/m68k/t-floatlib
index 2039d1d0dc4..23734be40bd 100644
--- a/gcc/config/m68k/t-floatlib
+++ b/gcc/config/m68k/t-floatlib
@@ -1,4 +1,4 @@
-# Copyright (C) 2007 Free Software Foundation, Inc.
+# Copyright (C) 2007, 2011 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -16,12 +16,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMSRC = m68k/lb1sf68.asm
-LIB1ASMFUNCS = _mulsi3 _udivsi3 _divsi3 _umodsi3 _modsi3 \
-   _double _float _floatex \
-   _eqdf2 _nedf2 _gtdf2 _gedf2 _ltdf2 _ledf2 \
-   _eqsf2 _nesf2 _gtsf2 _gesf2 _ltsf2 _lesf2
-
 LIB2FUNCS_EXTRA = fpgnulib.c xfgnulib.c
 
 fpgnulib.c: $(srcdir)/config/m68k/fpgnulib.c
diff --git a/gcc/config/mcore/lib1.asm b/gcc/config/mcore/lib1.asm
deleted file mode 100644
index 701762f2a3c..00000000000
--- a/gcc/config/mcore/lib1.asm
+++ /dev/null
@@ -1,303 +0,0 @@
-/* libgcc routines for the MCore.
-   Copyright (C) 1993, 1999, 2000, 2009 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-#define CONCAT1(a, b) CONCAT2(a, b)
-#define CONCAT2(a, b) a ## b
-
-/* Use the right prefix for global labels.  */
-
-#define SYM(x) CONCAT1 (__, x)
-
-#ifdef __ELF__
-#define TYPE(x) .type SYM (x),@function
-#define SIZE(x) .size SYM (x), . - SYM (x)
-#else
-#define TYPE(x)
-#define SIZE(x)
-#endif
-
-.macro FUNC_START name
-	.text
-	.globl SYM (\name)
-	TYPE (\name)
-SYM (\name):
-.endm
-
-.macro FUNC_END name
-	SIZE (\name)
-.endm
-
-#ifdef	L_udivsi3
-FUNC_START udiv32
-FUNC_START udivsi32
-
-	movi	r1,0		// r1-r2 form 64 bit dividend
-	movi	r4,1		// r4 is quotient (1 for a sentinel)
-
-	cmpnei	r3,0		// look for 0 divisor
-	bt	9f
-	trap	3		// divide by 0
-9:
-	// control iterations; skip across high order 0 bits in dividend
-	mov	r7,r2
-	cmpnei	r7,0
-	bt	8f
-	movi	r2,0		// 0 dividend
-	jmp	r15		// quick return
-8:
-	ff1	r7		// figure distance to skip
-	lsl	r4,r7		// move the sentinel along (with 0's behind)
-	lsl	r2,r7		// and the low 32 bits of numerator
-
-// appears to be wrong...
-// tested out incorrectly in our OS work...
-//	mov	r7,r3		// looking at divisor
-//	ff1	r7		// I can move 32-r7 more bits to left.
-//	addi	r7,1		// ok, one short of that...
-//	mov	r1,r2
-//	lsr	r1,r7		// bits that came from low order...
-//	rsubi	r7,31		// r7 == "32-n" == LEFT distance
-//	addi	r7,1		// this is (32-n)
-//	lsl	r4,r7		// fixes the high 32 (quotient)
-//	lsl	r2,r7
-//	cmpnei	r4,0
-//	bf	4f		// the sentinel went away...
-
-	// run the remaining bits
-
-1:	lslc	r2,1		// 1 bit left shift of r1-r2
-	addc	r1,r1
-	cmphs	r1,r3		// upper 32 of dividend >= divisor?
-	bf	2f
-	sub	r1,r3		// if yes, subtract divisor
-2:	addc	r4,r4		// shift by 1 and count subtracts
-	bf	1b		// if sentinel falls out of quotient, stop
-
-4:	mov	r2,r4		// return quotient
-	mov	r3,r1		// and piggyback the remainder
-	jmp	r15
-FUNC_END udiv32
-FUNC_END udivsi32
-#endif
-
-#ifdef	L_umodsi3
-FUNC_START urem32
-FUNC_START umodsi3
-	movi	r1,0		// r1-r2 form 64 bit dividend
-	movi	r4,1		// r4 is quotient (1 for a sentinel)
-	cmpnei	r3,0		// look for 0 divisor
-	bt	9f
-	trap	3		// divide by 0
-9:
-	// control iterations; skip across high order 0 bits in dividend
-	mov	r7,r2
-	cmpnei	r7,0
-	bt	8f
-	movi	r2,0		// 0 dividend
-	jmp	r15		// quick return
-8:
-	ff1	r7		// figure distance to skip
-	lsl	r4,r7		// move the sentinel along (with 0's behind)
-	lsl	r2,r7		// and the low 32 bits of numerator
-
-1:	lslc	r2,1		// 1 bit left shift of r1-r2
-	addc	r1,r1
-	cmphs	r1,r3		// upper 32 of dividend >= divisor?
-	bf	2f
-	sub	r1,r3		// if yes, subtract divisor
-2:	addc	r4,r4		// shift by 1 and count subtracts
-	bf	1b		// if sentinel falls out of quotient, stop
-	mov	r2,r1		// return remainder
-	jmp	r15
-FUNC_END urem32
-FUNC_END umodsi3
-#endif
-
-#ifdef	L_divsi3
-FUNC_START div32
-FUNC_START divsi3
-	mov	r5,r2		// calc sign of quotient
-	xor	r5,r3
-	abs	r2		// do unsigned divide
-	abs	r3
-	movi	r1,0		// r1-r2 form 64 bit dividend
-	movi	r4,1		// r4 is quotient (1 for a sentinel)
-	cmpnei	r3,0		// look for 0 divisor
-	bt	9f
-	trap	3		// divide by 0
-9:
-	// control iterations; skip across high order 0 bits in dividend
-	mov	r7,r2
-	cmpnei	r7,0
-	bt	8f
-	movi	r2,0		// 0 dividend
-	jmp	r15		// quick return
-8:
-	ff1	r7		// figure distance to skip
-	lsl	r4,r7		// move the sentinel along (with 0's behind)
-	lsl	r2,r7		// and the low 32 bits of numerator
-
-// tested out incorrectly in our OS work...
-//	mov	r7,r3		// looking at divisor
-//	ff1	r7		// I can move 32-r7 more bits to left.
-//	addi	r7,1		// ok, one short of that...
-//	mov	r1,r2
-//	lsr	r1,r7		// bits that came from low order...
-//	rsubi	r7,31		// r7 == "32-n" == LEFT distance
-//	addi	r7,1		// this is (32-n)
-//	lsl	r4,r7		// fixes the high 32 (quotient)
-//	lsl	r2,r7
-//	cmpnei	r4,0
-//	bf	4f		// the sentinel went away...
-
-	// run the remaining bits
-1:	lslc	r2,1		// 1 bit left shift of r1-r2
-	addc	r1,r1
-	cmphs	r1,r3		// upper 32 of dividend >= divisor?
-	bf	2f
-	sub	r1,r3		// if yes, subtract divisor
-2:	addc	r4,r4		// shift by 1 and count subtracts
-	bf	1b		// if sentinel falls out of quotient, stop
-
-4:	mov	r2,r4		// return quotient
-	mov	r3,r1		// piggyback the remainder
-	btsti	r5,31		// after adjusting for sign
-	bf	3f
-	rsubi	r2,0
-	rsubi	r3,0
-3:	jmp	r15
-FUNC_END div32
-FUNC_END divsi3
-#endif
-
-#ifdef	L_modsi3
-FUNC_START rem32
-FUNC_START modsi3
-	mov	r5,r2		// calc sign of remainder
-	abs	r2		// do unsigned divide
-	abs	r3
-	movi	r1,0		// r1-r2 form 64 bit dividend
-	movi	r4,1		// r4 is quotient (1 for a sentinel)
-	cmpnei	r3,0		// look for 0 divisor
-	bt	9f
-	trap	3		// divide by 0
-9: 
-	// control iterations; skip across high order 0 bits in dividend
-	mov	r7,r2
-	cmpnei	r7,0
-	bt	8f
-	movi	r2,0		// 0 dividend
-	jmp	r15		// quick return
-8:
-	ff1	r7		// figure distance to skip
-	lsl	r4,r7		// move the sentinel along (with 0's behind)
-	lsl	r2,r7		// and the low 32 bits of numerator
-
-1:	lslc	r2,1		// 1 bit left shift of r1-r2
-	addc	r1,r1
-	cmphs	r1,r3		// upper 32 of dividend >= divisor?
-	bf	2f
-	sub	r1,r3		// if yes, subtract divisor
-2:	addc	r4,r4		// shift by 1 and count subtracts
-	bf	1b		// if sentinel falls out of quotient, stop
-	mov	r2,r1		// return remainder
-	btsti	r5,31		// after adjusting for sign
-	bf	3f
-	rsubi	r2,0
-3:	jmp	r15
-FUNC_END rem32
-FUNC_END modsi3
-#endif
-
-
-/* GCC expects that {__eq,__ne,__gt,__ge,__le,__lt}{df2,sf2}
-   will behave as __cmpdf2. So, we stub the implementations to
-   jump on to __cmpdf2 and __cmpsf2.
- 
-   All of these shortcircuit the return path so that __cmp{sd}f2
-   will go directly back to the caller.  */
-
-.macro  COMPARE_DF_JUMP name
-	.import SYM (cmpdf2)
-FUNC_START \name
-	jmpi SYM (cmpdf2)
-FUNC_END \name
-.endm
-		
-#ifdef  L_eqdf2
-COMPARE_DF_JUMP eqdf2
-#endif /* L_eqdf2 */
-
-#ifdef  L_nedf2
-COMPARE_DF_JUMP nedf2
-#endif /* L_nedf2 */
-
-#ifdef  L_gtdf2
-COMPARE_DF_JUMP gtdf2
-#endif /* L_gtdf2 */
-
-#ifdef  L_gedf2
-COMPARE_DF_JUMP gedf2
-#endif /* L_gedf2 */
-
-#ifdef  L_ltdf2
-COMPARE_DF_JUMP ltdf2
-#endif /* L_ltdf2 */
-	
-#ifdef  L_ledf2
-COMPARE_DF_JUMP ledf2
-#endif /* L_ledf2 */
-
-/* SINGLE PRECISION FLOATING POINT STUBS */
-
-.macro  COMPARE_SF_JUMP name
-	.import SYM (cmpsf2)
-FUNC_START \name
-	jmpi SYM (cmpsf2)
-FUNC_END \name
-.endm
-		
-#ifdef  L_eqsf2
-COMPARE_SF_JUMP eqsf2
-#endif /* L_eqsf2 */
-	
-#ifdef  L_nesf2
-COMPARE_SF_JUMP nesf2
-#endif /* L_nesf2 */
-	
-#ifdef  L_gtsf2
-COMPARE_SF_JUMP gtsf2
-#endif /* L_gtsf2 */
-	
-#ifdef  L_gesf2
-COMPARE_SF_JUMP __gesf2
-#endif /* L_gesf2 */
-	
-#ifdef  L_ltsf2
-COMPARE_SF_JUMP __ltsf2
-#endif /* L_ltsf2 */
-	
-#ifdef  L_lesf2
-COMPARE_SF_JUMP lesf2
-#endif /* L_lesf2 */
diff --git a/gcc/config/mcore/t-mcore b/gcc/config/mcore/t-mcore
index 9c84d850f20..265399cecfe 100644
--- a/gcc/config/mcore/t-mcore
+++ b/gcc/config/mcore/t-mcore
@@ -16,9 +16,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMSRC    = mcore/lib1.asm
-LIB1ASMFUNCS  = _divsi3 _udivsi3 _modsi3 _umodsi3
-
 # could use -msifilter to be safe from interrupt/jmp interactions and others.
 TARGET_LIBGCC2_CFLAGS=-O3 -DNO_FLOATLIB_FIXUNSDFSI #-msifilter
 
diff --git a/gcc/config/mep/mep-lib1.asm b/gcc/config/mep/mep-lib1.asm
deleted file mode 100644
index 0a18913f927..00000000000
--- a/gcc/config/mep/mep-lib1.asm
+++ /dev/null
@@ -1,125 +0,0 @@
-/* libgcc routines for Toshiba Media Processor.
-   Copyright (C) 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-  
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-  
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-#define SAVEALL \
-	add3	$sp, $sp, -16*4 ; \
-	sw	$0, ($sp) ; \
-	sw	$1, 4($sp) ; \
-	sw	$2, 8($sp) ; \
-	sw	$3, 12($sp) ; \
-	sw	$4, 16($sp) ; \
-	sw	$5, 20($sp) ; \
-	sw	$6, 24($sp) ; \
-	sw	$7, 28($sp) ; \
-	sw	$8, 32($sp) ; \
-	sw	$9, 36($sp) ; \
-	sw	$10, 40($sp) ; \
-	sw	$11, 44($sp) ; \
-	sw	$12, 48($sp) ; \
-	sw	$13, 52($sp) ; \
-	sw	$14, 56($sp) ; \
-	ldc	$5, $lp	; \
-	add	$5, 3 ; \
-	mov	$6, -4 ; \
-	and	$5, $6
-
-#define RESTOREALL \
-	stc	$5, $lp ; \
-	lw	$14, 56($sp) ; \
-	lw	$13, 52($sp) ; \
-	lw	$12, 48($sp) ; \
-	lw	$11, 44($sp) ; \
-	lw	$10, 40($sp) ; \
-	lw	$9, 36($sp) ; \
-	lw	$8, 32($sp) ; \
-	lw	$7, 28($sp) ; \
-	lw	$6, 24($sp) ; \
-	lw	$5, 20($sp) ; \
-	lw	$4, 16($sp) ; \
-	lw	$3, 12($sp) ; \
-	lw	$2, 8($sp) ; \
-	lw	$1, 4($sp) ; \
-	lw	$0, ($sp) ; \
-	add3	$sp, $sp, 16*4 ; \
-	ret
-
-#ifdef L_mep_profile
-	.text
-	.global __mep_mcount
-__mep_mcount:
-	SAVEALL
-	ldc	$1, $lp
-	mov	$2, $0
-	bsr	__mep_mcount_2
-	RESTOREALL
-#endif
-
-#ifdef L_mep_bb_init_trace
-	.text
-	.global __mep_bb_init_trace_func
-__mep_bb_init_trace_func:
-	SAVEALL
-	lw	$1, ($5)
-	lw	$2, 4($5)
-	add	$5, 8
-	bsr	__bb_init_trace_func
-	RESTOREALL
-#endif
-
-#ifdef L_mep_bb_init
-	.text
-	.global __mep_bb_init_func
-__mep_bb_init_func:
-	SAVEALL
-	lw	$1, ($5)
-	add	$5, 4
-	bsr	__bb_init_func
-	RESTOREALL
-#endif
-
-#ifdef L_mep_bb_trace
-	.text
-	.global __mep_bb_trace_func
-__mep_bb_trace_func:
-	SAVEALL
-	movu	$3, __bb
-	lw	$1, ($5)
-	sw	$1, ($3)
-	lw	$2, 4($5)
-	sw	$2, 4($3)
-	add	$5, 8
-	bsr	__bb_trace_func
-	RESTOREALL
-#endif
-
-#ifdef L_mep_bb_increment
-	.text
-	.global __mep_bb_increment_func
-__mep_bb_increment_func:
-	SAVEALL
-	lw	$1, ($5)
-	lw	$0, ($1)
-	add	$0, 1
-	sw	$0, ($1)
-	add	$5, 4
-	RESTOREALL
-#endif
diff --git a/gcc/config/mep/t-mep b/gcc/config/mep/t-mep
index d560db0aa4b..ac4ad95bc87 100644
--- a/gcc/config/mep/t-mep
+++ b/gcc/config/mep/t-mep
@@ -32,16 +32,6 @@ mep-pragma.o: $(srcdir)/config/mep/mep-pragma.c $(CONFIG_H) $(SYSTEM_H) \
 	function.h insn-config.h reload.h $(TARGET_H)
 	$(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
 
-# profiling support
-
-LIB1ASMSRC = mep/mep-lib1.asm
-
-LIB1ASMFUNCS = _mep_profile \
-	       _mep_bb_init_trace \
-	       _mep_bb_init \
-	       _mep_bb_trace \
-	       _mep_bb_increment
-
 # multiply and divide routines
 
 LIB2FUNCS_EXTRA = \
diff --git a/gcc/config/mips/mips16.S b/gcc/config/mips/mips16.S
deleted file mode 100644
index ec331b5f65e..00000000000
--- a/gcc/config/mips/mips16.S
+++ /dev/null
@@ -1,712 +0,0 @@
-/* mips16 floating point support code
-   Copyright (C) 1996, 1997, 1998, 2008, 2009, 2010
-   Free Software Foundation, Inc.
-   Contributed by Cygnus Support
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/* This file contains mips16 floating point support functions.  These
-   functions are called by mips16 code to handle floating point when
-   -msoft-float is not used.  They accept the arguments and return
-   values using the soft-float calling convention, but do the actual
-   operation using the hard floating point instructions.  */
-
-#if defined _MIPS_SIM && (_MIPS_SIM == _ABIO32 || _MIPS_SIM == _ABIO64)
-
-/* This file contains 32-bit assembly code.  */
-	.set nomips16
-
-/* Start a function.  */
-
-#define STARTFN(NAME) .globl NAME; .ent NAME; NAME:
-
-/* Finish a function.  */
-
-#define ENDFN(NAME) .end NAME
-
-/* ARG1
-	The FPR that holds the first floating-point argument.
-
-   ARG2
-	The FPR that holds the second floating-point argument.
-
-   RET
-	The FPR that holds a floating-point return value.  */
-
-#define RET $f0
-#define ARG1 $f12
-#ifdef __mips64
-#define ARG2 $f13
-#else
-#define ARG2 $f14
-#endif
-
-/* Set 64-bit register GPR so that its high 32 bits contain HIGH_FPR
-   and so that its low 32 bits contain LOW_FPR.  */
-#define MERGE_GPRf(GPR, HIGH_FPR, LOW_FPR)	\
-	.set	noat;				\
-	mfc1	$1, LOW_FPR;			\
-	mfc1	GPR, HIGH_FPR;			\
-	dsll	$1, $1, 32;			\
-	dsll	GPR, GPR, 32;			\
-	dsrl	$1, $1, 32;			\
-	or	GPR, GPR, $1;			\
-	.set	at
-
-/* Move the high 32 bits of GPR to HIGH_FPR and the low 32 bits of
-   GPR to LOW_FPR.  */
-#define MERGE_GPRt(GPR, HIGH_FPR, LOW_FPR)	\
-	.set	noat;				\
-	dsrl	$1, GPR, 32;			\
-	mtc1	GPR, LOW_FPR;			\
-	mtc1	$1, HIGH_FPR;			\
-	.set	at
-
-/* Jump to T, and use "OPCODE, OP2" to implement a delayed move.  */
-#define DELAYt(T, OPCODE, OP2)			\
-	.set	noreorder;			\
-	jr	T;				\
-	OPCODE, OP2;				\
-	.set	reorder
-
-/* Use "OPCODE. OP2" and jump to T.  */
-#define DELAYf(T, OPCODE, OP2) OPCODE, OP2; jr T
-
-/* MOVE_SF_BYTE0(D)
-	Move the first single-precision floating-point argument between
-	GPRs and FPRs.
-
-   MOVE_SI_BYTE0(D)
-	Likewise the first single-precision integer argument.
-
-   MOVE_SF_BYTE4(D)
-	Move the second single-precision floating-point argument between
-	GPRs and FPRs, given that the first argument occupies 4 bytes.
-
-   MOVE_SF_BYTE8(D)
-	Move the second single-precision floating-point argument between
-	GPRs and FPRs, given that the first argument occupies 8 bytes.
-
-   MOVE_DF_BYTE0(D)
-	Move the first double-precision floating-point argument between
-	GPRs and FPRs.
-
-   MOVE_DF_BYTE8(D)
-	Likewise the second double-precision floating-point argument.
-
-   MOVE_SF_RET(D, T)
-	Likewise a single-precision floating-point return value,
-	then jump to T.
-
-   MOVE_SC_RET(D, T)
-	Likewise a complex single-precision floating-point return value.
-
-   MOVE_DF_RET(D, T)
-	Likewise a double-precision floating-point return value.
-
-   MOVE_DC_RET(D, T)
-	Likewise a complex double-precision floating-point return value.
-
-   MOVE_SI_RET(D, T)
-	Likewise a single-precision integer return value.
-
-   The D argument is "t" to move to FPRs and "f" to move from FPRs.
-   The return macros may assume that the target of the jump does not
-   use a floating-point register.  */
-
-#define MOVE_SF_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0)
-#define MOVE_SI_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0)
-
-#if defined(__mips64) && defined(__MIPSEB__)
-#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f0, $f1); jr T
-#elif defined(__mips64)
-/* The high 32 bits of $2 correspond to the second word in memory;
-   i.e. the imaginary part.  */
-#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f1, $f0); jr T
-#elif __mips_fpr == 64
-#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1)
-#else
-#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f2)
-#endif
-
-#if defined(__mips64)
-#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12
-#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f13
-#define MOVE_SF_BYTE8(D) m##D##c1 $5,$f13
-#else
-#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12
-#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f14
-#define MOVE_SF_BYTE8(D) m##D##c1 $6,$f14
-#endif
-#define MOVE_SI_BYTE0(D) MOVE_SF_BYTE0(D)
-
-#if defined(__mips64)
-#define MOVE_DF_BYTE0(D) dm##D##c1 $4,$f12
-#define MOVE_DF_BYTE8(D) dm##D##c1 $5,$f13
-#define MOVE_DF_RET(D, T) DELAY##D (T, dm##D##c1 $2,$f0)
-#define MOVE_DC_RET(D, T) dm##D##c1 $3,$f1; MOVE_DF_RET (D, T)
-#elif __mips_fpr == 64 && defined(__MIPSEB__)
-#define MOVE_DF_BYTE0(D) m##D##c1 $5,$f12; m##D##hc1 $4,$f12
-#define MOVE_DF_BYTE8(D) m##D##c1 $7,$f14; m##D##hc1 $6,$f14
-#define MOVE_DF_RET(D, T) m##D##c1 $3,$f0; DELAY##D (T, m##D##hc1 $2,$f0)
-#define MOVE_DC_RET(D, T) m##D##c1 $5,$f1; m##D##hc1 $4,$f1; MOVE_DF_RET (D, T)
-#elif __mips_fpr == 64
-#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##hc1 $5,$f12
-#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##hc1 $7,$f14
-#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##hc1 $3,$f0)
-#define MOVE_DC_RET(D, T) m##D##c1 $4,$f1; m##D##hc1 $5,$f1; MOVE_DF_RET (D, T)
-#elif defined(__MIPSEB__)
-/* FPRs are little-endian.  */
-#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f13; m##D##c1 $5,$f12
-#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f15; m##D##c1 $7,$f14
-#define MOVE_DF_RET(D, T) m##D##c1 $2,$f1; DELAY##D (T, m##D##c1 $3,$f0)
-#define MOVE_DC_RET(D, T) m##D##c1 $4,$f3; m##D##c1 $5,$f2; MOVE_DF_RET (D, T)
-#else
-#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##c1 $5,$f13
-#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##c1 $7,$f15
-#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1)
-#define MOVE_DC_RET(D, T) m##D##c1 $4,$f2; m##D##c1 $5,$f3; MOVE_DF_RET (D, T)
-#endif
-
-/* Single-precision math.  */
-
-/* Define a function NAME that loads two single-precision values,
-   performs FPU operation OPCODE on them, and returns the single-
-   precision result.  */
-
-#define OPSF3(NAME, OPCODE)	\
-STARTFN (NAME);			\
-	MOVE_SF_BYTE0 (t);	\
-	MOVE_SF_BYTE4 (t);	\
-	OPCODE	RET,ARG1,ARG2;	\
-	MOVE_SF_RET (f, $31);	\
-	ENDFN (NAME)
-
-#ifdef L_m16addsf3
-OPSF3 (__mips16_addsf3, add.s)
-#endif
-#ifdef L_m16subsf3
-OPSF3 (__mips16_subsf3, sub.s)
-#endif
-#ifdef L_m16mulsf3
-OPSF3 (__mips16_mulsf3, mul.s)
-#endif
-#ifdef L_m16divsf3
-OPSF3 (__mips16_divsf3, div.s)
-#endif
-
-/* Define a function NAME that loads a single-precision value,
-   performs FPU operation OPCODE on it, and returns the single-
-   precision result.  */
-
-#define OPSF2(NAME, OPCODE)	\
-STARTFN (NAME);			\
-	MOVE_SF_BYTE0 (t);	\
-	OPCODE	RET,ARG1;	\
-	MOVE_SF_RET (f, $31);	\
-	ENDFN (NAME)
-
-#ifdef L_m16negsf2
-OPSF2 (__mips16_negsf2, neg.s)
-#endif
-#ifdef L_m16abssf2
-OPSF2 (__mips16_abssf2, abs.s)
-#endif
-
-/* Single-precision comparisons.  */
-
-/* Define a function NAME that loads two single-precision values,
-   performs floating point comparison OPCODE, and returns TRUE or
-   FALSE depending on the result.  */
-
-#define CMPSF(NAME, OPCODE, TRUE, FALSE)	\
-STARTFN (NAME);					\
-	MOVE_SF_BYTE0 (t);			\
-	MOVE_SF_BYTE4 (t);			\
-	OPCODE	ARG1,ARG2;			\
-	li	$2,TRUE;			\
-	bc1t	1f;				\
-	li	$2,FALSE;			\
-1:;						\
-	j	$31;				\
-	ENDFN (NAME)
-
-/* Like CMPSF, but reverse the comparison operands.  */
-
-#define REVCMPSF(NAME, OPCODE, TRUE, FALSE)	\
-STARTFN (NAME);					\
-	MOVE_SF_BYTE0 (t);			\
-	MOVE_SF_BYTE4 (t);			\
-	OPCODE	ARG2,ARG1;			\
-	li	$2,TRUE;			\
-	bc1t	1f;				\
-	li	$2,FALSE;			\
-1:;						\
-	j	$31;				\
-	ENDFN (NAME)
-
-#ifdef L_m16eqsf2
-CMPSF (__mips16_eqsf2, c.eq.s, 0, 1)
-#endif
-#ifdef L_m16nesf2
-CMPSF (__mips16_nesf2, c.eq.s, 0, 1)
-#endif
-#ifdef L_m16gtsf2
-REVCMPSF (__mips16_gtsf2, c.lt.s, 1, 0)
-#endif
-#ifdef L_m16gesf2
-REVCMPSF (__mips16_gesf2, c.le.s, 0, -1)
-#endif
-#ifdef L_m16lesf2
-CMPSF (__mips16_lesf2, c.le.s, 0, 1)
-#endif
-#ifdef L_m16ltsf2
-CMPSF (__mips16_ltsf2, c.lt.s, -1, 0)
-#endif
-#ifdef L_m16unordsf2
-CMPSF(__mips16_unordsf2, c.un.s, 1, 0)
-#endif
-
-
-/* Single-precision conversions.  */
-
-#ifdef L_m16fltsisf
-STARTFN (__mips16_floatsisf)
-	MOVE_SF_BYTE0 (t)
-	cvt.s.w	RET,ARG1
-	MOVE_SF_RET (f, $31)
-	ENDFN (__mips16_floatsisf)
-#endif
-
-#ifdef L_m16fltunsisf
-STARTFN (__mips16_floatunsisf)
-	.set	noreorder
-	bltz	$4,1f
-	MOVE_SF_BYTE0 (t)
-	.set	reorder
-	cvt.s.w	RET,ARG1
-	MOVE_SF_RET (f, $31)
-1:		
-	and	$2,$4,1
-	srl	$3,$4,1
-	or	$2,$2,$3
-	mtc1	$2,RET
-	cvt.s.w	RET,RET
-	add.s	RET,RET,RET
-	MOVE_SF_RET (f, $31)
-	ENDFN (__mips16_floatunsisf)
-#endif
-	
-#ifdef L_m16fix_truncsfsi
-STARTFN (__mips16_fix_truncsfsi)
-	MOVE_SF_BYTE0 (t)
-	trunc.w.s RET,ARG1,$4
-	MOVE_SI_RET (f, $31)
-	ENDFN (__mips16_fix_truncsfsi)
-#endif
-
-#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
-
-/* Double-precision math.  */
-
-/* Define a function NAME that loads two double-precision values,
-   performs FPU operation OPCODE on them, and returns the double-
-   precision result.  */
-
-#define OPDF3(NAME, OPCODE)	\
-STARTFN (NAME);			\
-	MOVE_DF_BYTE0 (t);	\
-	MOVE_DF_BYTE8 (t);	\
-	OPCODE RET,ARG1,ARG2;	\
-	MOVE_DF_RET (f, $31);	\
-	ENDFN (NAME)
-
-#ifdef L_m16adddf3
-OPDF3 (__mips16_adddf3, add.d)
-#endif
-#ifdef L_m16subdf3
-OPDF3 (__mips16_subdf3, sub.d)
-#endif
-#ifdef L_m16muldf3
-OPDF3 (__mips16_muldf3, mul.d)
-#endif
-#ifdef L_m16divdf3
-OPDF3 (__mips16_divdf3, div.d)
-#endif
-
-/* Define a function NAME that loads a double-precision value,
-   performs FPU operation OPCODE on it, and returns the double-
-   precision result.  */
-
-#define OPDF2(NAME, OPCODE)	\
-STARTFN (NAME);			\
-	MOVE_DF_BYTE0 (t);	\
-	OPCODE RET,ARG1;	\
-	MOVE_DF_RET (f, $31);	\
-	ENDFN (NAME)
-
-#ifdef L_m16negdf2
-OPDF2 (__mips16_negdf2, neg.d)
-#endif
-#ifdef L_m16absdf2
-OPDF2 (__mips16_absdf2, abs.d)
-#endif
-
-/* Conversions between single and double precision.  */
-
-#ifdef L_m16extsfdf2
-STARTFN (__mips16_extendsfdf2)
-	MOVE_SF_BYTE0 (t)
-	cvt.d.s	RET,ARG1
-	MOVE_DF_RET (f, $31)
-	ENDFN (__mips16_extendsfdf2)
-#endif
-
-#ifdef L_m16trdfsf2
-STARTFN (__mips16_truncdfsf2)
-	MOVE_DF_BYTE0 (t)
-	cvt.s.d	RET,ARG1
-	MOVE_SF_RET (f, $31)
-	ENDFN (__mips16_truncdfsf2)
-#endif
-
-/* Double-precision comparisons.  */
-
-/* Define a function NAME that loads two double-precision values,
-   performs floating point comparison OPCODE, and returns TRUE or
-   FALSE depending on the result.  */
-
-#define CMPDF(NAME, OPCODE, TRUE, FALSE)	\
-STARTFN (NAME);					\
-	MOVE_DF_BYTE0 (t);			\
-	MOVE_DF_BYTE8 (t);			\
-	OPCODE	ARG1,ARG2;			\
-	li	$2,TRUE;			\
-	bc1t	1f;				\
-	li	$2,FALSE;			\
-1:;						\
-	j	$31;				\
-	ENDFN (NAME)
-
-/* Like CMPDF, but reverse the comparison operands.  */
-
-#define REVCMPDF(NAME, OPCODE, TRUE, FALSE)	\
-STARTFN (NAME);					\
-	MOVE_DF_BYTE0 (t);			\
-	MOVE_DF_BYTE8 (t);			\
-	OPCODE	ARG2,ARG1;			\
-	li	$2,TRUE;			\
-	bc1t	1f;				\
-	li	$2,FALSE;			\
-1:;						\
-	j	$31;				\
-	ENDFN (NAME)
-
-#ifdef L_m16eqdf2
-CMPDF (__mips16_eqdf2, c.eq.d, 0, 1)
-#endif
-#ifdef L_m16nedf2
-CMPDF (__mips16_nedf2, c.eq.d, 0, 1)
-#endif
-#ifdef L_m16gtdf2
-REVCMPDF (__mips16_gtdf2, c.lt.d, 1, 0)
-#endif
-#ifdef L_m16gedf2
-REVCMPDF (__mips16_gedf2, c.le.d, 0, -1)
-#endif
-#ifdef L_m16ledf2
-CMPDF (__mips16_ledf2, c.le.d, 0, 1)
-#endif
-#ifdef L_m16ltdf2
-CMPDF (__mips16_ltdf2, c.lt.d, -1, 0)
-#endif
-#ifdef L_m16unorddf2
-CMPDF(__mips16_unorddf2, c.un.d, 1, 0)
-#endif
-
-/* Double-precision conversions.  */
-
-#ifdef L_m16fltsidf
-STARTFN (__mips16_floatsidf)
-	MOVE_SI_BYTE0 (t)
-	cvt.d.w	RET,ARG1
-	MOVE_DF_RET (f, $31)
-	ENDFN (__mips16_floatsidf)
-#endif
-	
-#ifdef L_m16fltunsidf
-STARTFN (__mips16_floatunsidf)
-	MOVE_SI_BYTE0 (t)
-	cvt.d.w RET,ARG1
-	bgez	$4,1f
-	li.d	ARG1, 4.294967296e+9
-	add.d	RET, RET, ARG1
-1:	MOVE_DF_RET (f, $31)
-	ENDFN (__mips16_floatunsidf)
-#endif
-	
-#ifdef L_m16fix_truncdfsi
-STARTFN (__mips16_fix_truncdfsi)
-	MOVE_DF_BYTE0 (t)
-	trunc.w.d RET,ARG1,$4
-	MOVE_SI_RET (f, $31)
-	ENDFN (__mips16_fix_truncdfsi)
-#endif
-#endif /* !__mips_single_float */
-
-/* Define a function NAME that moves a return value of mode MODE from
-   FPRs to GPRs.  */
-
-#define RET_FUNCTION(NAME, MODE)	\
-STARTFN (NAME);				\
-	MOVE_##MODE##_RET (t, $31);	\
-	ENDFN (NAME)
-
-#ifdef L_m16retsf
-RET_FUNCTION (__mips16_ret_sf, SF)
-#endif
-
-#ifdef L_m16retsc
-RET_FUNCTION (__mips16_ret_sc, SC)
-#endif
-
-#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
-#ifdef L_m16retdf
-RET_FUNCTION (__mips16_ret_df, DF)
-#endif
-
-#ifdef L_m16retdc
-RET_FUNCTION (__mips16_ret_dc, DC)
-#endif
-#endif /* !__mips_single_float */
-
-/* STUB_ARGS_X copies the arguments from GPRs to FPRs for argument
-   code X.  X is calculated as ARG1 + ARG2 * 4, where ARG1 and ARG2
-   classify the first and second arguments as follows:
-
-	1: a single-precision argument
-	2: a double-precision argument
-	0: no argument, or not one of the above.  */
-
-#define STUB_ARGS_0						/* () */
-#define STUB_ARGS_1 MOVE_SF_BYTE0 (t)				/* (sf) */
-#define STUB_ARGS_5 MOVE_SF_BYTE0 (t); MOVE_SF_BYTE4 (t)	/* (sf, sf) */
-#define STUB_ARGS_9 MOVE_SF_BYTE0 (t); MOVE_DF_BYTE8 (t)	/* (sf, df) */
-#define STUB_ARGS_2 MOVE_DF_BYTE0 (t)				/* (df) */
-#define STUB_ARGS_6 MOVE_DF_BYTE0 (t); MOVE_SF_BYTE8 (t)	/* (df, sf) */
-#define STUB_ARGS_10 MOVE_DF_BYTE0 (t); MOVE_DF_BYTE8 (t)	/* (df, df) */
-
-/* These functions are used by 16-bit code when calling via a function
-   pointer.  They must copy the floating point arguments from the GPRs
-   to FPRs and then call function $2.  */
-
-#define CALL_STUB_NO_RET(NAME, CODE)	\
-STARTFN (NAME);				\
-	STUB_ARGS_##CODE;		\
-	.set	noreorder;		\
-	jr	$2;			\
-	move	$25,$2;			\
-	.set	reorder;		\
-	ENDFN (NAME)
-
-#ifdef L_m16stub1
-CALL_STUB_NO_RET (__mips16_call_stub_1, 1)
-#endif
-
-#ifdef L_m16stub5
-CALL_STUB_NO_RET (__mips16_call_stub_5, 5)
-#endif
-
-#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
-
-#ifdef L_m16stub2
-CALL_STUB_NO_RET (__mips16_call_stub_2, 2)
-#endif
-
-#ifdef L_m16stub6
-CALL_STUB_NO_RET (__mips16_call_stub_6, 6)
-#endif
-
-#ifdef L_m16stub9
-CALL_STUB_NO_RET (__mips16_call_stub_9, 9)
-#endif
-
-#ifdef L_m16stub10
-CALL_STUB_NO_RET (__mips16_call_stub_10, 10)
-#endif
-#endif /* !__mips_single_float */
-
-/* Now we have the same set of functions, except that this time the
-   function being called returns an SFmode, SCmode, DFmode or DCmode
-   value; we need to instantiate a set for each case.  The calling
-   function will arrange to preserve $18, so these functions are free
-   to use it to hold the return address.
-
-   Note that we do not know whether the function we are calling is 16
-   bit or 32 bit.  However, it does not matter, because 16-bit
-   functions always return floating point values in both the gp and
-   the fp regs.  It would be possible to check whether the function
-   being called is 16 bits, in which case the copy is unnecessary;
-   however, it's faster to always do the copy.  */
-
-#define CALL_STUB_RET(NAME, CODE, MODE)	\
-STARTFN (NAME);				\
-	move	$18,$31;		\
-	STUB_ARGS_##CODE;		\
-	.set	noreorder;		\
-	jalr	$2;			\
-	move	$25,$2;			\
-	.set	reorder;		\
-	MOVE_##MODE##_RET (f, $18);	\
-	ENDFN (NAME)
-
-/* First, instantiate the single-float set.  */
-
-#ifdef L_m16stubsf0
-CALL_STUB_RET (__mips16_call_stub_sf_0, 0, SF)
-#endif
-
-#ifdef L_m16stubsf1
-CALL_STUB_RET (__mips16_call_stub_sf_1, 1, SF)
-#endif
-
-#ifdef L_m16stubsf5
-CALL_STUB_RET (__mips16_call_stub_sf_5, 5, SF)
-#endif
-
-#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
-#ifdef L_m16stubsf2
-CALL_STUB_RET (__mips16_call_stub_sf_2, 2, SF)
-#endif
-
-#ifdef L_m16stubsf6
-CALL_STUB_RET (__mips16_call_stub_sf_6, 6, SF)
-#endif
-
-#ifdef L_m16stubsf9
-CALL_STUB_RET (__mips16_call_stub_sf_9, 9, SF)
-#endif
-
-#ifdef L_m16stubsf10
-CALL_STUB_RET (__mips16_call_stub_sf_10, 10, SF)
-#endif
-#endif /* !__mips_single_float */
-
-
-/* Now we have the same set of functions again, except that this time
-   the function being called returns an DFmode value.  */
-
-#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
-#ifdef L_m16stubdf0
-CALL_STUB_RET (__mips16_call_stub_df_0, 0, DF)
-#endif
-
-#ifdef L_m16stubdf1
-CALL_STUB_RET (__mips16_call_stub_df_1, 1, DF)
-#endif
-
-#ifdef L_m16stubdf5
-CALL_STUB_RET (__mips16_call_stub_df_5, 5, DF)
-#endif
-
-#ifdef L_m16stubdf2
-CALL_STUB_RET (__mips16_call_stub_df_2, 2, DF)
-#endif
-
-#ifdef L_m16stubdf6
-CALL_STUB_RET (__mips16_call_stub_df_6, 6, DF)
-#endif
-
-#ifdef L_m16stubdf9
-CALL_STUB_RET (__mips16_call_stub_df_9, 9, DF)
-#endif
-
-#ifdef L_m16stubdf10
-CALL_STUB_RET (__mips16_call_stub_df_10, 10, DF)
-#endif
-#endif /* !__mips_single_float */
-
-
-/* Ho hum.  Here we have the same set of functions again, this time
-   for when the function being called returns an SCmode value.  */
-
-#ifdef L_m16stubsc0
-CALL_STUB_RET (__mips16_call_stub_sc_0, 0, SC)
-#endif
-
-#ifdef L_m16stubsc1
-CALL_STUB_RET (__mips16_call_stub_sc_1, 1, SC)
-#endif
-
-#ifdef L_m16stubsc5
-CALL_STUB_RET (__mips16_call_stub_sc_5, 5, SC)
-#endif
-
-#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
-#ifdef L_m16stubsc2
-CALL_STUB_RET (__mips16_call_stub_sc_2, 2, SC)
-#endif
-
-#ifdef L_m16stubsc6
-CALL_STUB_RET (__mips16_call_stub_sc_6, 6, SC)
-#endif
-
-#ifdef L_m16stubsc9
-CALL_STUB_RET (__mips16_call_stub_sc_9, 9, SC)
-#endif
-
-#ifdef L_m16stubsc10
-CALL_STUB_RET (__mips16_call_stub_sc_10, 10, SC)
-#endif
-#endif /* !__mips_single_float */
-
-
-/* Finally, another set of functions for DCmode.  */
-
-#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
-#ifdef L_m16stubdc0
-CALL_STUB_RET (__mips16_call_stub_dc_0, 0, DC)
-#endif
-
-#ifdef L_m16stubdc1
-CALL_STUB_RET (__mips16_call_stub_dc_1, 1, DC)
-#endif
-
-#ifdef L_m16stubdc5
-CALL_STUB_RET (__mips16_call_stub_dc_5, 5, DC)
-#endif
-
-#ifdef L_m16stubdc2
-CALL_STUB_RET (__mips16_call_stub_dc_2, 2, DC)
-#endif
-
-#ifdef L_m16stubdc6
-CALL_STUB_RET (__mips16_call_stub_dc_6, 6, DC)
-#endif
-
-#ifdef L_m16stubdc9
-CALL_STUB_RET (__mips16_call_stub_dc_9, 9, DC)
-#endif
-
-#ifdef L_m16stubdc10
-CALL_STUB_RET (__mips16_call_stub_dc_10, 10, DC)
-#endif
-#endif /* !__mips_single_float */
-#endif
diff --git a/gcc/config/mips/t-libgcc-mips16 b/gcc/config/mips/t-libgcc-mips16
deleted file mode 100644
index 31a042bb75e..00000000000
--- a/gcc/config/mips/t-libgcc-mips16
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (C) 2007, 2008, 2011 Free Software Foundation, Inc.
-#
-# This file is part of GCC.
-#
-# GCC is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# GCC is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GCC; see the file COPYING3.  If not see
-# <http://www.gnu.org/licenses/>.
-
-LIB1ASMSRC = mips/mips16.S
-LIB1ASMFUNCS = _m16addsf3 _m16subsf3 _m16mulsf3 _m16divsf3 \
-	_m16eqsf2 _m16nesf2 _m16gtsf2 _m16gesf2 _m16lesf2 _m16ltsf2 \
-	_m16unordsf2 \
-	_m16fltsisf _m16fix_truncsfsi _m16fltunsisf \
-	_m16adddf3 _m16subdf3 _m16muldf3 _m16divdf3 \
-	_m16extsfdf2 _m16trdfsf2 \
-	_m16eqdf2 _m16nedf2 _m16gtdf2 _m16gedf2 _m16ledf2 _m16ltdf2 \
-	_m16unorddf2 \
-	_m16fltsidf _m16fix_truncdfsi _m16fltunsidf \
-	_m16retsf _m16retdf \
-	_m16retsc _m16retdc \
-	_m16stub1 _m16stub2 _m16stub5 _m16stub6 _m16stub9 _m16stub10 \
-	_m16stubsf0 _m16stubsf1 _m16stubsf2 _m16stubsf5 _m16stubsf6 \
-	_m16stubsf9 _m16stubsf10 \
-	_m16stubdf0 _m16stubdf1 _m16stubdf2 _m16stubdf5 _m16stubdf6 \
-	_m16stubdf9 _m16stubdf10 \
-	_m16stubsc0 _m16stubsc1 _m16stubsc2 _m16stubsc5 _m16stubsc6 \
-	_m16stubsc9 _m16stubsc10 \
-	_m16stubdc0 _m16stubdc1 _m16stubdc2 _m16stubdc5 _m16stubdc6 \
-	_m16stubdc9 _m16stubdc10
diff --git a/gcc/config/mips/t-sr71k b/gcc/config/mips/t-sr71k
index 7b8669fefd2..f204017faa8 100644
--- a/gcc/config/mips/t-sr71k
+++ b/gcc/config/mips/t-sr71k
@@ -16,11 +16,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-# Suppress building libgcc1.a, since the MIPS compiler port is complete
-# and does not need anything from libgcc1.a.
-LIBGCC1 =
-CROSS_LIBGCC1 =
-
 # We must build libgcc2.a with -G 0, in case the user wants to link
 # without the $gp register.
 TARGET_LIBGCC2_CFLAGS = -G 0
diff --git a/gcc/config/pa/milli64.S b/gcc/config/pa/milli64.S
deleted file mode 100644
index 2e9c4f741b6..00000000000
--- a/gcc/config/pa/milli64.S
+++ /dev/null
@@ -1,2134 +0,0 @@
-/* 32 and 64-bit millicode, original author Hewlett-Packard
-   adapted for gcc by Paul Bame <bame@debian.org>
-   and Alan Modra <alan@linuxcare.com.au>.
-
-   Copyright 2001, 2002, 2003, 2007, 2009 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-#ifdef pa64
-        .level  2.0w
-#endif
-
-/* Hardware General Registers.  */
-r0:	.reg	%r0
-r1:	.reg	%r1
-r2:	.reg	%r2
-r3:	.reg	%r3
-r4:	.reg	%r4
-r5:	.reg	%r5
-r6:	.reg	%r6
-r7:	.reg	%r7
-r8:	.reg	%r8
-r9:	.reg	%r9
-r10:	.reg	%r10
-r11:	.reg	%r11
-r12:	.reg	%r12
-r13:	.reg	%r13
-r14:	.reg	%r14
-r15:	.reg	%r15
-r16:	.reg	%r16
-r17:	.reg	%r17
-r18:	.reg	%r18
-r19:	.reg	%r19
-r20:	.reg	%r20
-r21:	.reg	%r21
-r22:	.reg	%r22
-r23:	.reg	%r23
-r24:	.reg	%r24
-r25:	.reg	%r25
-r26:	.reg	%r26
-r27:	.reg	%r27
-r28:	.reg	%r28
-r29:	.reg	%r29
-r30:	.reg	%r30
-r31:	.reg	%r31
-
-/* Hardware Space Registers.  */
-sr0:	.reg	%sr0
-sr1:	.reg	%sr1
-sr2:	.reg	%sr2
-sr3:	.reg	%sr3
-sr4:	.reg	%sr4
-sr5:	.reg	%sr5
-sr6:	.reg	%sr6
-sr7:	.reg	%sr7
-
-/* Hardware Floating Point Registers.  */
-fr0:	.reg	%fr0
-fr1:	.reg	%fr1
-fr2:	.reg	%fr2
-fr3:	.reg	%fr3
-fr4:	.reg	%fr4
-fr5:	.reg	%fr5
-fr6:	.reg	%fr6
-fr7:	.reg	%fr7
-fr8:	.reg	%fr8
-fr9:	.reg	%fr9
-fr10:	.reg	%fr10
-fr11:	.reg	%fr11
-fr12:	.reg	%fr12
-fr13:	.reg	%fr13
-fr14:	.reg	%fr14
-fr15:	.reg	%fr15
-
-/* Hardware Control Registers.  */
-cr11:	.reg	%cr11
-sar:	.reg	%cr11	/* Shift Amount Register */
-
-/* Software Architecture General Registers.  */
-rp:	.reg    r2	/* return pointer */
-#ifdef pa64
-mrp:	.reg	r2 	/* millicode return pointer */
-#else
-mrp:	.reg	r31	/* millicode return pointer */
-#endif
-ret0:	.reg    r28	/* return value */
-ret1:	.reg    r29	/* return value (high part of double) */
-sp:	.reg 	r30	/* stack pointer */
-dp:	.reg	r27	/* data pointer */
-arg0:	.reg	r26	/* argument */
-arg1:	.reg	r25	/* argument or high part of double argument */
-arg2:	.reg	r24	/* argument */
-arg3:	.reg	r23	/* argument or high part of double argument */
-
-/* Software Architecture Space Registers.  */
-/* 		sr0	; return link from BLE */
-sret:	.reg	sr1	/* return value */
-sarg:	.reg	sr1	/* argument */
-/* 		sr4	; PC SPACE tracker */
-/* 		sr5	; process private data */
-
-/* Frame Offsets (millicode convention!)  Used when calling other
-   millicode routines.  Stack unwinding is dependent upon these
-   definitions.  */
-r31_slot:	.equ	-20	/* "current RP" slot */
-sr0_slot:	.equ	-16     /* "static link" slot */
-#if defined(pa64)
-mrp_slot:       .equ    -16	/* "current RP" slot */
-psp_slot:       .equ    -8	/* "previous SP" slot */
-#else
-mrp_slot:	.equ	-20     /* "current RP" slot (replacing "r31_slot") */
-#endif
-
-
-#define DEFINE(name,value)name:	.EQU	value
-#define RDEFINE(name,value)name:	.REG	value
-#ifdef milliext
-#define MILLI_BE(lbl)   BE    lbl(sr7,r0)
-#define MILLI_BEN(lbl)  BE,n  lbl(sr7,r0)
-#define MILLI_BLE(lbl)	BLE   lbl(sr7,r0)
-#define MILLI_BLEN(lbl)	BLE,n lbl(sr7,r0)
-#define MILLIRETN	BE,n  0(sr0,mrp)
-#define MILLIRET	BE    0(sr0,mrp)
-#define MILLI_RETN	BE,n  0(sr0,mrp)
-#define MILLI_RET	BE    0(sr0,mrp)
-#else
-#define MILLI_BE(lbl)	B     lbl
-#define MILLI_BEN(lbl)  B,n   lbl
-#define MILLI_BLE(lbl)	BL    lbl,mrp
-#define MILLI_BLEN(lbl)	BL,n  lbl,mrp
-#define MILLIRETN	BV,n  0(mrp)
-#define MILLIRET	BV    0(mrp)
-#define MILLI_RETN	BV,n  0(mrp)
-#define MILLI_RET	BV    0(mrp)
-#endif
-
-#ifdef __STDC__
-#define CAT(a,b)	a##b
-#else
-#define CAT(a,b)	a/**/b
-#endif
-
-#ifdef ELF
-#define SUBSPA_MILLI	 .section .text
-#define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16
-#define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16
-#define ATTR_MILLI
-#define SUBSPA_DATA	 .section .data
-#define ATTR_DATA
-#define GLOBAL		 $global$
-#define GSYM(sym) 	 !sym:
-#define LSYM(sym)	 !CAT(.L,sym:)
-#define LREF(sym)	 CAT(.L,sym)
-
-#else
-
-#ifdef coff
-/* This used to be .milli but since link32 places different named
-   sections in different segments millicode ends up a long ways away
-   from .text (1meg?).  This way they will be a lot closer.
-
-   The SUBSPA_MILLI_* specify locality sets for certain millicode
-   modules in order to ensure that modules that call one another are
-   placed close together. Without locality sets this is unlikely to
-   happen because of the Dynamite linker library search algorithm. We
-   want these modules close together so that short calls always reach
-   (we don't want to require long calls or use long call stubs).  */
-
-#define SUBSPA_MILLI	 .subspa .text
-#define SUBSPA_MILLI_DIV .subspa .text$dv,align=16
-#define SUBSPA_MILLI_MUL .subspa .text$mu,align=16
-#define ATTR_MILLI	 .attr code,read,execute
-#define SUBSPA_DATA	 .subspa .data
-#define ATTR_DATA	 .attr init_data,read,write
-#define GLOBAL		 _gp
-#else
-#define SUBSPA_MILLI	 .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8
-#define SUBSPA_MILLI_DIV SUBSPA_MILLI
-#define SUBSPA_MILLI_MUL SUBSPA_MILLI
-#define ATTR_MILLI
-#define SUBSPA_DATA	 .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero
-#define ATTR_DATA
-#define GLOBAL		 $global$
-#endif
-#define SPACE_DATA	 .space $PRIVATE$,spnum=1,sort=16
-
-#define GSYM(sym)	 !sym
-#define LSYM(sym)	 !CAT(L$,sym)
-#define LREF(sym)	 CAT(L$,sym)
-#endif
-
-#ifdef L_dyncall
-	SUBSPA_MILLI
-	ATTR_DATA
-GSYM($$dyncall)
-	.export $$dyncall,millicode
-	.proc
-	.callinfo	millicode
-	.entry
-	bb,>=,n %r22,30,LREF(1)		; branch if not plabel address
-	depi	0,31,2,%r22		; clear the two least significant bits
-	ldw	4(%r22),%r19		; load new LTP value
-	ldw	0(%r22),%r22		; load address of target
-LSYM(1)
-#ifdef LINUX
-	bv	%r0(%r22)		; branch to the real target
-#else
-	ldsid	(%sr0,%r22),%r1		; get the "space ident" selected by r22
-	mtsp	%r1,%sr0		; move that space identifier into sr0
-	be	0(%sr0,%r22)		; branch to the real target
-#endif
-	stw	%r2,-24(%r30)		; save return address into frame marker
-	.exit
-	.procend
-#endif
-
-#ifdef L_divI
-/* ROUTINES:	$$divI, $$divoI
-
-   Single precision divide for signed binary integers.
-
-   The quotient is truncated towards zero.
-   The sign of the quotient is the XOR of the signs of the dividend and
-   divisor.
-   Divide by zero is trapped.
-   Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI.
-
-   INPUT REGISTERS:
-   .	arg0 ==	dividend
-   .	arg1 ==	divisor
-   .	mrp  == return pc
-   .	sr0  == return space when called externally
-
-   OUTPUT REGISTERS:
-   .	arg0 =	undefined
-   .	arg1 =	undefined
-   .	ret1 =	quotient
-
-   OTHER REGISTERS AFFECTED:
-   .	r1   =	undefined
-
-   SIDE EFFECTS:
-   .	Causes a trap under the following conditions:
-   .		divisor is zero  (traps with ADDIT,=  0,25,0)
-   .		dividend==-2**31  and divisor==-1 and routine is $$divoI
-   .				 (traps with ADDO  26,25,0)
-   .	Changes memory at the following places:
-   .		NONE
-
-   PERMISSIBLE CONTEXT:
-   .	Unwindable.
-   .	Suitable for internal or external millicode.
-   .	Assumes the special millicode register conventions.
-
-   DISCUSSION:
-   .	Branchs to other millicode routines using BE
-   .		$$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15
-   .
-   .	For selected divisors, calls a divide by constant routine written by
-   .	Karl Pettis.  Eligible divisors are 1..15 excluding 11 and 13.
-   .
-   .	The only overflow case is -2**31 divided by -1.
-   .	Both routines return -2**31 but only $$divoI traps.  */
-
-RDEFINE(temp,r1)
-RDEFINE(retreg,ret1)	/*  r29 */
-RDEFINE(temp1,arg0)
-	SUBSPA_MILLI_DIV
-	ATTR_MILLI
-	.import $$divI_2,millicode
-	.import $$divI_3,millicode
-	.import $$divI_4,millicode
-	.import $$divI_5,millicode
-	.import $$divI_6,millicode
-	.import $$divI_7,millicode
-	.import $$divI_8,millicode
-	.import $$divI_9,millicode
-	.import $$divI_10,millicode
-	.import $$divI_12,millicode
-	.import $$divI_14,millicode
-	.import $$divI_15,millicode
-	.export $$divI,millicode
-	.export	$$divoI,millicode
-	.proc
-	.callinfo	millicode
-	.entry
-GSYM($$divoI)
-	comib,=,n  -1,arg1,LREF(negative1)	/*  when divisor == -1 */
-GSYM($$divI)
-	ldo	-1(arg1),temp		/*  is there at most one bit set ? */
-	and,<>	arg1,temp,r0		/*  if not, don't use power of 2 divide */
-	addi,>	0,arg1,r0		/*  if divisor > 0, use power of 2 divide */
-	b,n	LREF(neg_denom)
-LSYM(pow2)
-	addi,>=	0,arg0,retreg		/*  if numerator is negative, add the */
-	add	arg0,temp,retreg	/*  (denominaotr -1) to correct for shifts */
-	extru,=	arg1,15,16,temp		/*  test denominator with 0xffff0000 */
-	extrs	retreg,15,16,retreg	/*  retreg = retreg >> 16 */
-	or	arg1,temp,arg1		/*  arg1 = arg1 | (arg1 >> 16) */
-	ldi	0xcc,temp1		/*  setup 0xcc in temp1 */
-	extru,= arg1,23,8,temp		/*  test denominator with 0xff00 */
-	extrs	retreg,23,24,retreg	/*  retreg = retreg >> 8 */
-	or	arg1,temp,arg1		/*  arg1 = arg1 | (arg1 >> 8) */
-	ldi	0xaa,temp		/*  setup 0xaa in temp */
-	extru,= arg1,27,4,r0		/*  test denominator with 0xf0 */
-	extrs	retreg,27,28,retreg	/*  retreg = retreg >> 4 */
-	and,=	arg1,temp1,r0		/*  test denominator with 0xcc */
-	extrs	retreg,29,30,retreg	/*  retreg = retreg >> 2 */
-	and,=	arg1,temp,r0		/*  test denominator with 0xaa */
-	extrs	retreg,30,31,retreg	/*  retreg = retreg >> 1 */
-	MILLIRETN
-LSYM(neg_denom)
-	addi,<	0,arg1,r0		/*  if arg1 >= 0, it's not power of 2 */
-	b,n	LREF(regular_seq)
-	sub	r0,arg1,temp		/*  make denominator positive */
-	comb,=,n  arg1,temp,LREF(regular_seq)	/*  test against 0x80000000 and 0 */
-	ldo	-1(temp),retreg		/*  is there at most one bit set ? */
-	and,=	temp,retreg,r0		/*  if so, the denominator is power of 2 */
-	b,n	LREF(regular_seq)
-	sub	r0,arg0,retreg		/*  negate numerator */
-	comb,=,n arg0,retreg,LREF(regular_seq) /*  test against 0x80000000 */
-	copy	retreg,arg0		/*  set up arg0, arg1 and temp	*/
-	copy	temp,arg1		/*  before branching to pow2 */
-	b	LREF(pow2)
-	ldo	-1(arg1),temp
-LSYM(regular_seq)
-	comib,>>=,n 15,arg1,LREF(small_divisor)
-	add,>=	0,arg0,retreg		/*  move dividend, if retreg < 0, */
-LSYM(normal)
-	subi	0,retreg,retreg		/*    make it positive */
-	sub	0,arg1,temp		/*  clear carry,  */
-					/*    negate the divisor */
-	ds	0,temp,0		/*  set V-bit to the comple- */
-					/*    ment of the divisor sign */
-	add	retreg,retreg,retreg	/*  shift msb bit into carry */
-	ds	r0,arg1,temp		/*  1st divide step, if no carry */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  2nd divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  3rd divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  4th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  5th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  6th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  7th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  8th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  9th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  10th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  11th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  12th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  13th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  14th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  15th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  16th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  17th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  18th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  19th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  20th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  21st divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  22nd divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  23rd divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  24th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  25th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  26th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  27th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  28th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  29th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  30th divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  31st divide step */
-	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds	temp,arg1,temp		/*  32nd divide step, */
-	addc	retreg,retreg,retreg	/*  shift last retreg bit into retreg */
-	xor,>=	arg0,arg1,0		/*  get correct sign of quotient */
-	  sub	0,retreg,retreg		/*    based on operand signs */
-	MILLIRETN
-	nop
-
-LSYM(small_divisor)
-
-#if defined(pa64)
-/*  Clear the upper 32 bits of the arg1 register.  We are working with	*/
-/*  small divisors (and 32-bit integers)   We must not be mislead  */
-/*  by "1" bits left in the upper 32 bits.  */
-	depd %r0,31,32,%r25
-#endif
-	blr,n	arg1,r0
-	nop
-/*  table for divisor == 0,1, ... ,15 */
-	addit,=	0,arg1,r0	/*  trap if divisor == 0 */
-	nop
-	MILLIRET		/*  divisor == 1 */
-	copy	arg0,retreg
-	MILLI_BEN($$divI_2)	/*  divisor == 2 */
-	nop
-	MILLI_BEN($$divI_3)	/*  divisor == 3 */
-	nop
-	MILLI_BEN($$divI_4)	/*  divisor == 4 */
-	nop
-	MILLI_BEN($$divI_5)	/*  divisor == 5 */
-	nop
-	MILLI_BEN($$divI_6)	/*  divisor == 6 */
-	nop
-	MILLI_BEN($$divI_7)	/*  divisor == 7 */
-	nop
-	MILLI_BEN($$divI_8)	/*  divisor == 8 */
-	nop
-	MILLI_BEN($$divI_9)	/*  divisor == 9 */
-	nop
-	MILLI_BEN($$divI_10)	/*  divisor == 10 */
-	nop
-	b	LREF(normal)		/*  divisor == 11 */
-	add,>=	0,arg0,retreg
-	MILLI_BEN($$divI_12)	/*  divisor == 12 */
-	nop
-	b	LREF(normal)		/*  divisor == 13 */
-	add,>=	0,arg0,retreg
-	MILLI_BEN($$divI_14)	/*  divisor == 14 */
-	nop
-	MILLI_BEN($$divI_15)	/*  divisor == 15 */
-	nop
-
-LSYM(negative1)
-	sub	0,arg0,retreg	/*  result is negation of dividend */
-	MILLIRET
-	addo	arg0,arg1,r0	/*  trap iff dividend==0x80000000 && divisor==-1 */
-	.exit
-	.procend
-	.end
-#endif
-
-#ifdef L_divU
-/* ROUTINE:	$$divU
-   .
-   .	Single precision divide for unsigned integers.
-   .
-   .	Quotient is truncated towards zero.
-   .	Traps on divide by zero.
-
-   INPUT REGISTERS:
-   .	arg0 ==	dividend
-   .	arg1 ==	divisor
-   .	mrp  == return pc
-   .	sr0  == return space when called externally
-
-   OUTPUT REGISTERS:
-   .	arg0 =	undefined
-   .	arg1 =	undefined
-   .	ret1 =	quotient
-
-   OTHER REGISTERS AFFECTED:
-   .	r1   =	undefined
-
-   SIDE EFFECTS:
-   .	Causes a trap under the following conditions:
-   .		divisor is zero
-   .	Changes memory at the following places:
-   .		NONE
-
-   PERMISSIBLE CONTEXT:
-   .	Unwindable.
-   .	Does not create a stack frame.
-   .	Suitable for internal or external millicode.
-   .	Assumes the special millicode register conventions.
-
-   DISCUSSION:
-   .	Branchs to other millicode routines using BE:
-   .		$$divU_# for 3,5,6,7,9,10,12,14,15
-   .
-   .	For selected small divisors calls the special divide by constant
-   .	routines written by Karl Pettis.  These are: 3,5,6,7,9,10,12,14,15.  */
-
-RDEFINE(temp,r1)
-RDEFINE(retreg,ret1)	/* r29 */
-RDEFINE(temp1,arg0)
-	SUBSPA_MILLI_DIV
-	ATTR_MILLI
-	.export $$divU,millicode
-	.import $$divU_3,millicode
-	.import $$divU_5,millicode
-	.import $$divU_6,millicode
-	.import $$divU_7,millicode
-	.import $$divU_9,millicode
-	.import $$divU_10,millicode
-	.import $$divU_12,millicode
-	.import $$divU_14,millicode
-	.import $$divU_15,millicode
-	.proc
-	.callinfo	millicode
-	.entry
-GSYM($$divU)
-/* The subtract is not nullified since it does no harm and can be used
-   by the two cases that branch back to "normal".  */
-	ldo	-1(arg1),temp		/* is there at most one bit set ? */
-	and,=	arg1,temp,r0		/* if so, denominator is power of 2 */
-	b	LREF(regular_seq)
-	addit,=	0,arg1,0		/* trap for zero dvr */
-	copy	arg0,retreg
-	extru,= arg1,15,16,temp		/* test denominator with 0xffff0000 */
-	extru	retreg,15,16,retreg	/* retreg = retreg >> 16 */
-	or	arg1,temp,arg1		/* arg1 = arg1 | (arg1 >> 16) */
-	ldi	0xcc,temp1		/* setup 0xcc in temp1 */
-	extru,= arg1,23,8,temp		/* test denominator with 0xff00 */
-	extru	retreg,23,24,retreg	/* retreg = retreg >> 8 */
-	or	arg1,temp,arg1		/* arg1 = arg1 | (arg1 >> 8) */
-	ldi	0xaa,temp		/* setup 0xaa in temp */
-	extru,= arg1,27,4,r0		/* test denominator with 0xf0 */
-	extru	retreg,27,28,retreg	/* retreg = retreg >> 4 */
-	and,=	arg1,temp1,r0		/* test denominator with 0xcc */
-	extru	retreg,29,30,retreg	/* retreg = retreg >> 2 */
-	and,=	arg1,temp,r0		/* test denominator with 0xaa */
-	extru	retreg,30,31,retreg	/* retreg = retreg >> 1 */
-	MILLIRETN
-	nop	
-LSYM(regular_seq)
-	comib,>=  15,arg1,LREF(special_divisor)
-	subi	0,arg1,temp		/* clear carry, negate the divisor */
-	ds	r0,temp,r0		/* set V-bit to 1 */
-LSYM(normal)
-	add	arg0,arg0,retreg	/* shift msb bit into carry */
-	ds	r0,arg1,temp		/* 1st divide step, if no carry */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 2nd divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 3rd divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 4th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 5th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 6th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 7th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 8th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 9th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 10th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 11th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 12th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 13th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 14th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 15th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 16th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 17th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 18th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 19th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 20th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 21st divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 22nd divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 23rd divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 24th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 25th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 26th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 27th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 28th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 29th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 30th divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 31st divide step */
-	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
-	ds	temp,arg1,temp		/* 32nd divide step, */
-	MILLIRET
-	addc	retreg,retreg,retreg	/* shift last retreg bit into retreg */
-
-/* Handle the cases where divisor is a small constant or has high bit on.  */
-LSYM(special_divisor)
-/*	blr	arg1,r0 */
-/*	comib,>,n  0,arg1,LREF(big_divisor) ; nullify previous instruction */
-
-/* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from
-   generating such a blr, comib sequence. A problem in nullification. So I
-   rewrote this code.  */
-
-#if defined(pa64)
-/* Clear the upper 32 bits of the arg1 register.  We are working with
-   small divisors (and 32-bit unsigned integers)   We must not be mislead
-   by "1" bits left in the upper 32 bits.  */
-	depd %r0,31,32,%r25
-#endif
-	comib,>	0,arg1,LREF(big_divisor)
-	nop
-	blr	arg1,r0
-	nop
-
-LSYM(zero_divisor)	/* this label is here to provide external visibility */
-	addit,=	0,arg1,0		/* trap for zero dvr */
-	nop
-	MILLIRET			/* divisor == 1 */
-	copy	arg0,retreg
-	MILLIRET			/* divisor == 2 */
-	extru	arg0,30,31,retreg
-	MILLI_BEN($$divU_3)		/* divisor == 3 */
-	nop
-	MILLIRET			/* divisor == 4 */
-	extru	arg0,29,30,retreg
-	MILLI_BEN($$divU_5)		/* divisor == 5 */
-	nop
-	MILLI_BEN($$divU_6)		/* divisor == 6 */
-	nop
-	MILLI_BEN($$divU_7)		/* divisor == 7 */
-	nop
-	MILLIRET			/* divisor == 8 */
-	extru	arg0,28,29,retreg
-	MILLI_BEN($$divU_9)		/* divisor == 9 */
-	nop
-	MILLI_BEN($$divU_10)		/* divisor == 10 */
-	nop
-	b	LREF(normal)		/* divisor == 11 */
-	ds	r0,temp,r0		/* set V-bit to 1 */
-	MILLI_BEN($$divU_12)		/* divisor == 12 */
-	nop
-	b	LREF(normal)		/* divisor == 13 */
-	ds	r0,temp,r0		/* set V-bit to 1 */
-	MILLI_BEN($$divU_14)		/* divisor == 14 */
-	nop
-	MILLI_BEN($$divU_15)		/* divisor == 15 */
-	nop
-
-/* Handle the case where the high bit is on in the divisor.
-   Compute:	if( dividend>=divisor) quotient=1; else quotient=0;
-   Note:	dividend>==divisor iff dividend-divisor does not borrow
-   and		not borrow iff carry.  */
-LSYM(big_divisor)
-	sub	arg0,arg1,r0
-	MILLIRET
-	addc	r0,r0,retreg
-	.exit
-	.procend
-	.end
-#endif
-
-#ifdef L_remI
-/* ROUTINE:	$$remI
-
-   DESCRIPTION:
-   .	$$remI returns the remainder of the division of two signed 32-bit
-   .	integers.  The sign of the remainder is the same as the sign of
-   .	the dividend.
-
-
-   INPUT REGISTERS:
-   .	arg0 == dividend
-   .	arg1 == divisor
-   .	mrp  == return pc
-   .	sr0  == return space when called externally
-
-   OUTPUT REGISTERS:
-   .	arg0 = destroyed
-   .	arg1 = destroyed
-   .	ret1 = remainder
-
-   OTHER REGISTERS AFFECTED:
-   .	r1   = undefined
-
-   SIDE EFFECTS:
-   .	Causes a trap under the following conditions:  DIVIDE BY ZERO
-   .	Changes memory at the following places:  NONE
-
-   PERMISSIBLE CONTEXT:
-   .	Unwindable
-   .	Does not create a stack frame
-   .	Is usable for internal or external microcode
-
-   DISCUSSION:
-   .	Calls other millicode routines via mrp:  NONE
-   .	Calls other millicode routines:  NONE  */
-
-RDEFINE(tmp,r1)
-RDEFINE(retreg,ret1)
-
-	SUBSPA_MILLI
-	ATTR_MILLI
-	.proc
-	.callinfo millicode
-	.entry
-GSYM($$remI)
-GSYM($$remoI)
-	.export $$remI,MILLICODE
-	.export $$remoI,MILLICODE
-	ldo		-1(arg1),tmp		/*  is there at most one bit set ? */
-	and,<>		arg1,tmp,r0		/*  if not, don't use power of 2 */
-	addi,>		0,arg1,r0		/*  if denominator > 0, use power */
-						/*  of 2 */
-	b,n		LREF(neg_denom)
-LSYM(pow2)
-	comb,>,n	0,arg0,LREF(neg_num)	/*  is numerator < 0 ? */
-	and		arg0,tmp,retreg		/*  get the result */
-	MILLIRETN
-LSYM(neg_num)
-	subi		0,arg0,arg0		/*  negate numerator */
-	and		arg0,tmp,retreg		/*  get the result */
-	subi		0,retreg,retreg		/*  negate result */
-	MILLIRETN
-LSYM(neg_denom)
-	addi,<		0,arg1,r0		/*  if arg1 >= 0, it's not power */
-						/*  of 2 */
-	b,n		LREF(regular_seq)
-	sub		r0,arg1,tmp		/*  make denominator positive */
-	comb,=,n	arg1,tmp,LREF(regular_seq) /*  test against 0x80000000 and 0 */
-	ldo		-1(tmp),retreg		/*  is there at most one bit set ? */
-	and,=		tmp,retreg,r0		/*  if not, go to regular_seq */
-	b,n		LREF(regular_seq)
-	comb,>,n	0,arg0,LREF(neg_num_2)	/*  if arg0 < 0, negate it  */
-	and		arg0,retreg,retreg
-	MILLIRETN
-LSYM(neg_num_2)
-	subi		0,arg0,tmp		/*  test against 0x80000000 */
-	and		tmp,retreg,retreg
-	subi		0,retreg,retreg
-	MILLIRETN
-LSYM(regular_seq)
-	addit,=		0,arg1,0		/*  trap if div by zero */
-	add,>=		0,arg0,retreg		/*  move dividend, if retreg < 0, */
-	sub		0,retreg,retreg		/*    make it positive */
-	sub		0,arg1, tmp		/*  clear carry,  */
-						/*    negate the divisor */
-	ds		0, tmp,0		/*  set V-bit to the comple- */
-						/*    ment of the divisor sign */
-	or		0,0, tmp		/*  clear  tmp */
-	add		retreg,retreg,retreg	/*  shift msb bit into carry */
-	ds		 tmp,arg1, tmp		/*  1st divide step, if no carry */
-						/*    out, msb of quotient = 0 */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-LSYM(t1)
-	ds		 tmp,arg1, tmp		/*  2nd divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  3rd divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  4th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  5th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  6th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  7th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  8th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  9th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  10th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  11th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  12th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  13th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  14th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  15th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  16th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  17th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  18th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  19th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  20th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  21st divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  22nd divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  23rd divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  24th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  25th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  26th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  27th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  28th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  29th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  30th divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  31st divide step */
-	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
-	ds		 tmp,arg1, tmp		/*  32nd divide step, */
-	addc		retreg,retreg,retreg	/*  shift last bit into retreg */
-	movb,>=,n	 tmp,retreg,LREF(finish) /*  branch if pos.  tmp */
-	add,<		arg1,0,0		/*  if arg1 > 0, add arg1 */
-	add,tr		 tmp,arg1,retreg	/*    for correcting remainder tmp */
-	sub		 tmp,arg1,retreg	/*  else add absolute value arg1 */
-LSYM(finish)
-	add,>=		arg0,0,0		/*  set sign of remainder */
-	sub		0,retreg,retreg		/*    to sign of dividend */
-	MILLIRET
-	nop
-	.exit
-	.procend
-#ifdef milliext
-	.origin 0x00000200
-#endif
-	.end
-#endif
-
-#ifdef L_remU
-/* ROUTINE:	$$remU
-   .	Single precision divide for remainder with unsigned binary integers.
-   .
-   .	The remainder must be dividend-(dividend/divisor)*divisor.
-   .	Divide by zero is trapped.
-
-   INPUT REGISTERS:
-   .	arg0 ==	dividend
-   .	arg1 == divisor
-   .	mrp  == return pc
-   .	sr0  == return space when called externally
-
-   OUTPUT REGISTERS:
-   .	arg0 =	undefined
-   .	arg1 =	undefined
-   .	ret1 =	remainder
-
-   OTHER REGISTERS AFFECTED:
-   .	r1   =	undefined
-
-   SIDE EFFECTS:
-   .	Causes a trap under the following conditions:  DIVIDE BY ZERO
-   .	Changes memory at the following places:  NONE
-
-   PERMISSIBLE CONTEXT:
-   .	Unwindable.
-   .	Does not create a stack frame.
-   .	Suitable for internal or external millicode.
-   .	Assumes the special millicode register conventions.
-
-   DISCUSSION:
-   .	Calls other millicode routines using mrp: NONE
-   .	Calls other millicode routines: NONE  */
-
-
-RDEFINE(temp,r1)
-RDEFINE(rmndr,ret1)	/*  r29 */
-	SUBSPA_MILLI
-	ATTR_MILLI
-	.export $$remU,millicode
-	.proc
-	.callinfo	millicode
-	.entry
-GSYM($$remU)
-	ldo	-1(arg1),temp		/*  is there at most one bit set ? */
-	and,=	arg1,temp,r0		/*  if not, don't use power of 2 */
-	b	LREF(regular_seq)
-	addit,=	0,arg1,r0		/*  trap on div by zero */
-	and	arg0,temp,rmndr		/*  get the result for power of 2 */
-	MILLIRETN
-LSYM(regular_seq)
-	comib,>=,n  0,arg1,LREF(special_case)
-	subi	0,arg1,rmndr		/*  clear carry, negate the divisor */
-	ds	r0,rmndr,r0		/*  set V-bit to 1 */
-	add	arg0,arg0,temp		/*  shift msb bit into carry */
-	ds	r0,arg1,rmndr		/*  1st divide step, if no carry */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  2nd divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  3rd divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  4th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  5th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  6th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  7th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  8th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  9th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  10th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  11th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  12th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  13th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  14th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  15th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  16th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  17th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  18th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  19th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  20th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  21st divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  22nd divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  23rd divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  24th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  25th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  26th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  27th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  28th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  29th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  30th divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  31st divide step */
-	addc	temp,temp,temp		/*  shift temp with/into carry */
-	ds	rmndr,arg1,rmndr		/*  32nd divide step, */
-	comiclr,<= 0,rmndr,r0
-	  add	rmndr,arg1,rmndr	/*  correction */
-	MILLIRETN
-	nop
-
-/* Putting >= on the last DS and deleting COMICLR does not work!  */
-LSYM(special_case)
-	sub,>>=	arg0,arg1,rmndr
-	  copy	arg0,rmndr
-	MILLIRETN
-	nop
-	.exit
-	.procend
-	.end
-#endif
-
-#ifdef L_div_const
-/* ROUTINE:	$$divI_2
-   .		$$divI_3	$$divU_3
-   .		$$divI_4
-   .		$$divI_5	$$divU_5
-   .		$$divI_6	$$divU_6
-   .		$$divI_7	$$divU_7
-   .		$$divI_8
-   .		$$divI_9	$$divU_9
-   .		$$divI_10	$$divU_10
-   .
-   .		$$divI_12	$$divU_12
-   .
-   .		$$divI_14	$$divU_14
-   .		$$divI_15	$$divU_15
-   .		$$divI_16
-   .		$$divI_17	$$divU_17
-   .
-   .	Divide by selected constants for single precision binary integers.
-
-   INPUT REGISTERS:
-   .	arg0 ==	dividend
-   .	mrp  == return pc
-   .	sr0  == return space when called externally
-
-   OUTPUT REGISTERS:
-   .	arg0 =	undefined
-   .	arg1 =	undefined
-   .	ret1 =	quotient
-
-   OTHER REGISTERS AFFECTED:
-   .	r1   =	undefined
-
-   SIDE EFFECTS:
-   .	Causes a trap under the following conditions: NONE
-   .	Changes memory at the following places:  NONE
-
-   PERMISSIBLE CONTEXT:
-   .	Unwindable.
-   .	Does not create a stack frame.
-   .	Suitable for internal or external millicode.
-   .	Assumes the special millicode register conventions.
-
-   DISCUSSION:
-   .	Calls other millicode routines using mrp:  NONE
-   .	Calls other millicode routines:  NONE  */
-
-
-/* TRUNCATED DIVISION BY SMALL INTEGERS
-
-   We are interested in q(x) = floor(x/y), where x >= 0 and y > 0
-   (with y fixed).
-
-   Let a = floor(z/y), for some choice of z.  Note that z will be
-   chosen so that division by z is cheap.
-
-   Let r be the remainder(z/y).  In other words, r = z - ay.
-
-   Now, our method is to choose a value for b such that
-
-   q'(x) = floor((ax+b)/z)
-
-   is equal to q(x) over as large a range of x as possible.  If the
-   two are equal over a sufficiently large range, and if it is easy to
-   form the product (ax), and it is easy to divide by z, then we can
-   perform the division much faster than the general division algorithm.
-
-   So, we want the following to be true:
-
-   .	For x in the following range:
-   .
-   .	    ky <= x < (k+1)y
-   .
-   .	implies that
-   .
-   .	    k <= (ax+b)/z < (k+1)
-
-   We want to determine b such that this is true for all k in the
-   range {0..K} for some maximum K.
-
-   Since (ax+b) is an increasing function of x, we can take each
-   bound separately to determine the "best" value for b.
-
-   (ax+b)/z < (k+1)	       implies
-
-   (a((k+1)y-1)+b < (k+1)z     implies
-
-   b < a + (k+1)(z-ay)	       implies
-
-   b < a + (k+1)r
-
-   This needs to be true for all k in the range {0..K}.  In
-   particular, it is true for k = 0 and this leads to a maximum
-   acceptable value for b.
-
-   b < a+r   or   b <= a+r-1
-
-   Taking the other bound, we have
-
-   k <= (ax+b)/z	       implies
-
-   k <= (aky+b)/z	       implies
-
-   k(z-ay) <= b		       implies
-
-   kr <= b
-
-   Clearly, the largest range for k will be achieved by maximizing b,
-   when r is not zero.	When r is zero, then the simplest choice for b
-   is 0.  When r is not 0, set
-
-   .	b = a+r-1
-
-   Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)
-   for all x in the range:
-
-   .	0 <= x < (K+1)y
-
-   We need to determine what K is.  Of our two bounds,
-
-   .	b < a+(k+1)r	is satisfied for all k >= 0, by construction.
-
-   The other bound is
-
-   .	kr <= b
-
-   This is always true if r = 0.  If r is not 0 (the usual case), then
-   K = floor((a+r-1)/r), is the maximum value for k.
-
-   Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct
-   answer for q(x) = floor(x/y) when x is in the range
-
-   (0,(K+1)y-1)	       K = floor((a+r-1)/r)
-
-   To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that
-   the formula for q'(x) yields the correct value of q(x) for all x
-   representable by a single word in HPPA.
-
-   We are also constrained in that computing the product (ax), adding
-   b, and dividing by z must all be done quickly, otherwise we will be
-   better off going through the general algorithm using the DS
-   instruction, which uses approximately 70 cycles.
-
-   For each y, there is a choice of z which satisfies the constraints
-   for (K+1)y >= 2**32.  We may not, however, be able to satisfy the
-   timing constraints for arbitrary y.	It seems that z being equal to
-   a power of 2 or a power of 2 minus 1 is as good as we can do, since
-   it minimizes the time to do division by z.  We want the choice of z
-   to also result in a value for (a) that minimizes the computation of
-   the product (ax).  This is best achieved if (a) has a regular bit
-   pattern (so the multiplication can be done with shifts and adds).
-   The value of (a) also needs to be less than 2**32 so the product is
-   always guaranteed to fit in 2 words.
-
-   In actual practice, the following should be done:
-
-   1) For negative x, you should take the absolute value and remember
-   .  the fact so that the result can be negated.  This obviously does
-   .  not apply in the unsigned case.
-   2) For even y, you should factor out the power of 2 that divides y
-   .  and divide x by it.  You can then proceed by dividing by the
-   .  odd factor of y.
-
-   Here is a table of some odd values of y, and corresponding choices
-   for z which are "good".
-
-    y	  z	  r	 a (hex)     max x (hex)
-
-    3	2**32	  1	55555555      100000001
-    5	2**32	  1	33333333      100000003
-    7  2**24-1	  0	  249249     (infinite)
-    9  2**24-1	  0	  1c71c7     (infinite)
-   11  2**20-1	  0	   1745d     (infinite)
-   13  2**24-1	  0	  13b13b     (infinite)
-   15	2**32	  1	11111111      10000000d
-   17	2**32	  1	 f0f0f0f      10000000f
-
-   If r is 1, then b = a+r-1 = a.  This simplifies the computation
-   of (ax+b), since you can compute (x+1)(a) instead.  If r is 0,
-   then b = 0 is ok to use which simplifies (ax+b).
-
-   The bit patterns for 55555555, 33333333, and 11111111 are obviously
-   very regular.  The bit patterns for the other values of a above are:
-
-    y	   (hex)	  (binary)
-
-    7	  249249  001001001001001001001001  << regular >>
-    9	  1c71c7  000111000111000111000111  << regular >>
-   11	   1745d  000000010111010001011101  << irregular >>
-   13	  13b13b  000100111011000100111011  << irregular >>
-
-   The bit patterns for (a) corresponding to (y) of 11 and 13 may be
-   too irregular to warrant using this method.
-
-   When z is a power of 2 minus 1, then the division by z is slightly
-   more complicated, involving an iterative solution.
-
-   The code presented here solves division by 1 through 17, except for
-   11 and 13. There are algorithms for both signed and unsigned
-   quantities given.
-
-   TIMINGS (cycles)
-
-   divisor  positive  negative	unsigned
-
-   .   1	2	   2	     2
-   .   2	4	   4	     2
-   .   3       19	  21	    19
-   .   4	4	   4	     2
-   .   5       18	  22	    19
-   .   6       19	  22	    19
-   .   8	4	   4	     2
-   .  10       18	  19	    17
-   .  12       18	  20	    18
-   .  15       16	  18	    16
-   .  16	4	   4	     2
-   .  17       16	  18	    16
-
-   Now, the algorithm for 7, 9, and 14 is an iterative one.  That is,
-   a loop body is executed until the tentative quotient is 0.  The
-   number of times the loop body is executed varies depending on the
-   dividend, but is never more than two times.	If the dividend is
-   less than the divisor, then the loop body is not executed at all.
-   Each iteration adds 4 cycles to the timings.
-
-   divisor  positive  negative	unsigned
-
-   .   7       19+4n	 20+4n	   20+4n    n = number of iterations
-   .   9       21+4n	 22+4n	   21+4n
-   .  14       21+4n	 22+4n	   20+4n
-
-   To give an idea of how the number of iterations varies, here is a
-   table of dividend versus number of iterations when dividing by 7.
-
-   smallest	 largest       required
-   dividend	dividend      iterations
-
-   .	0	     6		    0
-   .	7	 0x6ffffff	    1
-   0x1000006	0xffffffff	    2
-
-   There is some overlap in the range of numbers requiring 1 and 2
-   iterations.	*/
-
-RDEFINE(t2,r1)
-RDEFINE(x2,arg0)	/*  r26 */
-RDEFINE(t1,arg1)	/*  r25 */
-RDEFINE(x1,ret1)	/*  r29 */
-
-	SUBSPA_MILLI_DIV
-	ATTR_MILLI
-
-	.proc
-	.callinfo	millicode
-	.entry
-/* NONE of these routines require a stack frame
-   ALL of these routines are unwindable from millicode	*/
-
-GSYM($$divide_by_constant)
-	.export $$divide_by_constant,millicode
-/*  Provides a "nice" label for the code covered by the unwind descriptor
-    for things like gprof.  */
-
-/* DIVISION BY 2 (shift by 1) */
-GSYM($$divI_2)
-	.export		$$divI_2,millicode
-	comclr,>=	arg0,0,0
-	addi		1,arg0,arg0
-	MILLIRET
-	extrs		arg0,30,31,ret1
-
-
-/* DIVISION BY 4 (shift by 2) */
-GSYM($$divI_4)
-	.export		$$divI_4,millicode
-	comclr,>=	arg0,0,0
-	addi		3,arg0,arg0
-	MILLIRET
-	extrs		arg0,29,30,ret1
-
-
-/* DIVISION BY 8 (shift by 3) */
-GSYM($$divI_8)
-	.export		$$divI_8,millicode
-	comclr,>=	arg0,0,0
-	addi		7,arg0,arg0
-	MILLIRET
-	extrs		arg0,28,29,ret1
-
-/* DIVISION BY 16 (shift by 4) */
-GSYM($$divI_16)
-	.export		$$divI_16,millicode
-	comclr,>=	arg0,0,0
-	addi		15,arg0,arg0
-	MILLIRET
-	extrs		arg0,27,28,ret1
-
-/****************************************************************************
-*
-*	DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these
-*
-*	includes 3,5,15,17 and also 6,10,12
-*
-****************************************************************************/
-
-/* DIVISION BY 3 (use z = 2**32; a = 55555555) */
-
-GSYM($$divI_3)
-	.export		$$divI_3,millicode
-	comb,<,N	x2,0,LREF(neg3)
-
-	addi		1,x2,x2		/* this cannot overflow	*/
-	extru		x2,1,2,x1	/* multiply by 5 to get started */
-	sh2add		x2,x2,x2
-	b		LREF(pos)
-	addc		x1,0,x1
-
-LSYM(neg3)
-	subi		1,x2,x2		/* this cannot overflow	*/
-	extru		x2,1,2,x1	/* multiply by 5 to get started */
-	sh2add		x2,x2,x2
-	b		LREF(neg)
-	addc		x1,0,x1
-
-GSYM($$divU_3)
-	.export		$$divU_3,millicode
-	addi		1,x2,x2		/* this CAN overflow */
-	addc		0,0,x1
-	shd		x1,x2,30,t1	/* multiply by 5 to get started */
-	sh2add		x2,x2,x2
-	b		LREF(pos)
-	addc		x1,t1,x1
-
-/* DIVISION BY 5 (use z = 2**32; a = 33333333) */
-
-GSYM($$divI_5)
-	.export		$$divI_5,millicode
-	comb,<,N	x2,0,LREF(neg5)
-
-	addi		3,x2,t1		/* this cannot overflow	*/
-	sh1add		x2,t1,x2	/* multiply by 3 to get started */
-	b		LREF(pos)
-	addc		0,0,x1
-
-LSYM(neg5)
-	sub		0,x2,x2		/* negate x2			*/
-	addi		1,x2,x2		/* this cannot overflow	*/
-	shd		0,x2,31,x1	/* get top bit (can be 1)	*/
-	sh1add		x2,x2,x2	/* multiply by 3 to get started */
-	b		LREF(neg)
-	addc		x1,0,x1
-
-GSYM($$divU_5)
-	.export		$$divU_5,millicode
-	addi		1,x2,x2		/* this CAN overflow */
-	addc		0,0,x1
-	shd		x1,x2,31,t1	/* multiply by 3 to get started */
-	sh1add		x2,x2,x2
-	b		LREF(pos)
-	addc		t1,x1,x1
-
-/* DIVISION BY	6 (shift to divide by 2 then divide by 3) */
-GSYM($$divI_6)
-	.export		$$divI_6,millicode
-	comb,<,N	x2,0,LREF(neg6)
-	extru		x2,30,31,x2	/* divide by 2			*/
-	addi		5,x2,t1		/* compute 5*(x2+1) = 5*x2+5	*/
-	sh2add		x2,t1,x2	/* multiply by 5 to get started */
-	b		LREF(pos)
-	addc		0,0,x1
-
-LSYM(neg6)
-	subi		2,x2,x2		/* negate, divide by 2, and add 1 */
-					/* negation and adding 1 are done */
-					/* at the same time by the SUBI   */
-	extru		x2,30,31,x2
-	shd		0,x2,30,x1
-	sh2add		x2,x2,x2	/* multiply by 5 to get started */
-	b		LREF(neg)
-	addc		x1,0,x1
-
-GSYM($$divU_6)
-	.export		$$divU_6,millicode
-	extru		x2,30,31,x2	/* divide by 2 */
-	addi		1,x2,x2		/* cannot carry */
-	shd		0,x2,30,x1	/* multiply by 5 to get started */
-	sh2add		x2,x2,x2
-	b		LREF(pos)
-	addc		x1,0,x1
-
-/* DIVISION BY 10 (shift to divide by 2 then divide by 5) */
-GSYM($$divU_10)
-	.export		$$divU_10,millicode
-	extru		x2,30,31,x2	/* divide by 2 */
-	addi		3,x2,t1		/* compute 3*(x2+1) = (3*x2)+3	*/
-	sh1add		x2,t1,x2	/* multiply by 3 to get started */
-	addc		0,0,x1
-LSYM(pos)
-	shd		x1,x2,28,t1	/* multiply by 0x11 */
-	shd		x2,0,28,t2
-	add		x2,t2,x2
-	addc		x1,t1,x1
-LSYM(pos_for_17)
-	shd		x1,x2,24,t1	/* multiply by 0x101 */
-	shd		x2,0,24,t2
-	add		x2,t2,x2
-	addc		x1,t1,x1
-
-	shd		x1,x2,16,t1	/* multiply by 0x10001 */
-	shd		x2,0,16,t2
-	add		x2,t2,x2
-	MILLIRET
-	addc		x1,t1,x1
-
-GSYM($$divI_10)
-	.export		$$divI_10,millicode
-	comb,<		x2,0,LREF(neg10)
-	copy		0,x1
-	extru		x2,30,31,x2	/* divide by 2 */
-	addib,TR	1,x2,LREF(pos)	/* add 1 (cannot overflow)     */
-	sh1add		x2,x2,x2	/* multiply by 3 to get started */
-
-LSYM(neg10)
-	subi		2,x2,x2		/* negate, divide by 2, and add 1 */
-					/* negation and adding 1 are done */
-					/* at the same time by the SUBI   */
-	extru		x2,30,31,x2
-	sh1add		x2,x2,x2	/* multiply by 3 to get started */
-LSYM(neg)
-	shd		x1,x2,28,t1	/* multiply by 0x11 */
-	shd		x2,0,28,t2
-	add		x2,t2,x2
-	addc		x1,t1,x1
-LSYM(neg_for_17)
-	shd		x1,x2,24,t1	/* multiply by 0x101 */
-	shd		x2,0,24,t2
-	add		x2,t2,x2
-	addc		x1,t1,x1
-
-	shd		x1,x2,16,t1	/* multiply by 0x10001 */
-	shd		x2,0,16,t2
-	add		x2,t2,x2
-	addc		x1,t1,x1
-	MILLIRET
-	sub		0,x1,x1
-
-/* DIVISION BY 12 (shift to divide by 4 then divide by 3) */
-GSYM($$divI_12)
-	.export		$$divI_12,millicode
-	comb,<		x2,0,LREF(neg12)
-	copy		0,x1
-	extru		x2,29,30,x2	/* divide by 4			*/
-	addib,tr	1,x2,LREF(pos)	/* compute 5*(x2+1) = 5*x2+5    */
-	sh2add		x2,x2,x2	/* multiply by 5 to get started */
-
-LSYM(neg12)
-	subi		4,x2,x2		/* negate, divide by 4, and add 1 */
-					/* negation and adding 1 are done */
-					/* at the same time by the SUBI   */
-	extru		x2,29,30,x2
-	b		LREF(neg)
-	sh2add		x2,x2,x2	/* multiply by 5 to get started */
-
-GSYM($$divU_12)
-	.export		$$divU_12,millicode
-	extru		x2,29,30,x2	/* divide by 4   */
-	addi		5,x2,t1		/* cannot carry */
-	sh2add		x2,t1,x2	/* multiply by 5 to get started */
-	b		LREF(pos)
-	addc		0,0,x1
-
-/* DIVISION BY 15 (use z = 2**32; a = 11111111) */
-GSYM($$divI_15)
-	.export		$$divI_15,millicode
-	comb,<		x2,0,LREF(neg15)
-	copy		0,x1
-	addib,tr	1,x2,LREF(pos)+4
-	shd		x1,x2,28,t1
-
-LSYM(neg15)
-	b		LREF(neg)
-	subi		1,x2,x2
-
-GSYM($$divU_15)
-	.export		$$divU_15,millicode
-	addi		1,x2,x2		/* this CAN overflow */
-	b		LREF(pos)
-	addc		0,0,x1
-
-/* DIVISION BY 17 (use z = 2**32; a =  f0f0f0f) */
-GSYM($$divI_17)
-	.export		$$divI_17,millicode
-	comb,<,n	x2,0,LREF(neg17)
-	addi		1,x2,x2		/* this cannot overflow */
-	shd		0,x2,28,t1	/* multiply by 0xf to get started */
-	shd		x2,0,28,t2
-	sub		t2,x2,x2
-	b		LREF(pos_for_17)
-	subb		t1,0,x1
-
-LSYM(neg17)
-	subi		1,x2,x2		/* this cannot overflow */
-	shd		0,x2,28,t1	/* multiply by 0xf to get started */
-	shd		x2,0,28,t2
-	sub		t2,x2,x2
-	b		LREF(neg_for_17)
-	subb		t1,0,x1
-
-GSYM($$divU_17)
-	.export		$$divU_17,millicode
-	addi		1,x2,x2		/* this CAN overflow */
-	addc		0,0,x1
-	shd		x1,x2,28,t1	/* multiply by 0xf to get started */
-LSYM(u17)
-	shd		x2,0,28,t2
-	sub		t2,x2,x2
-	b		LREF(pos_for_17)
-	subb		t1,x1,x1
-
-
-/* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these
-   includes 7,9 and also 14
-
-
-   z = 2**24-1
-   r = z mod x = 0
-
-   so choose b = 0
-
-   Also, in order to divide by z = 2**24-1, we approximate by dividing
-   by (z+1) = 2**24 (which is easy), and then correcting.
-
-   (ax) = (z+1)q' + r
-   .	= zq' + (q'+r)
-
-   So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)
-   Then the true remainder of (ax)/z is (q'+r).  Repeat the process
-   with this new remainder, adding the tentative quotients together,
-   until a tentative quotient is 0 (and then we are done).  There is
-   one last correction to be done.  It is possible that (q'+r) = z.
-   If so, then (q'+r)/(z+1) = 0 and it looks like we are done.	But,
-   in fact, we need to add 1 more to the quotient.  Now, it turns
-   out that this happens if and only if the original value x is
-   an exact multiple of y.  So, to avoid a three instruction test at
-   the end, instead use 1 instruction to add 1 to x at the beginning.  */
-
-/* DIVISION BY 7 (use z = 2**24-1; a = 249249) */
-GSYM($$divI_7)
-	.export		$$divI_7,millicode
-	comb,<,n	x2,0,LREF(neg7)
-LSYM(7)
-	addi		1,x2,x2		/* cannot overflow */
-	shd		0,x2,29,x1
-	sh3add		x2,x2,x2
-	addc		x1,0,x1
-LSYM(pos7)
-	shd		x1,x2,26,t1
-	shd		x2,0,26,t2
-	add		x2,t2,x2
-	addc		x1,t1,x1
-
-	shd		x1,x2,20,t1
-	shd		x2,0,20,t2
-	add		x2,t2,x2
-	addc		x1,t1,t1
-
-	/* computed <t1,x2>.  Now divide it by (2**24 - 1)	*/
-
-	copy		0,x1
-	shd,=		t1,x2,24,t1	/* tentative quotient  */
-LSYM(1)
-	addb,tr		t1,x1,LREF(2)	/* add to previous quotient   */
-	extru		x2,31,24,x2	/* new remainder (unadjusted) */
-
-	MILLIRETN
-
-LSYM(2)
-	addb,tr		t1,x2,LREF(1)	/* adjust remainder */
-	extru,=		x2,7,8,t1	/* new quotient     */
-
-LSYM(neg7)
-	subi		1,x2,x2		/* negate x2 and add 1 */
-LSYM(8)
-	shd		0,x2,29,x1
-	sh3add		x2,x2,x2
-	addc		x1,0,x1
-
-LSYM(neg7_shift)
-	shd		x1,x2,26,t1
-	shd		x2,0,26,t2
-	add		x2,t2,x2
-	addc		x1,t1,x1
-
-	shd		x1,x2,20,t1
-	shd		x2,0,20,t2
-	add		x2,t2,x2
-	addc		x1,t1,t1
-
-	/* computed <t1,x2>.  Now divide it by (2**24 - 1)	*/
-
-	copy		0,x1
-	shd,=		t1,x2,24,t1	/* tentative quotient  */
-LSYM(3)
-	addb,tr		t1,x1,LREF(4)	/* add to previous quotient   */
-	extru		x2,31,24,x2	/* new remainder (unadjusted) */
-
-	MILLIRET
-	sub		0,x1,x1		/* negate result    */
-
-LSYM(4)
-	addb,tr		t1,x2,LREF(3)	/* adjust remainder */
-	extru,=		x2,7,8,t1	/* new quotient     */
-
-GSYM($$divU_7)
-	.export		$$divU_7,millicode
-	addi		1,x2,x2		/* can carry */
-	addc		0,0,x1
-	shd		x1,x2,29,t1
-	sh3add		x2,x2,x2
-	b		LREF(pos7)
-	addc		t1,x1,x1
-
-/* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */
-GSYM($$divI_9)
-	.export		$$divI_9,millicode
-	comb,<,n	x2,0,LREF(neg9)
-	addi		1,x2,x2		/* cannot overflow */
-	shd		0,x2,29,t1
-	shd		x2,0,29,t2
-	sub		t2,x2,x2
-	b		LREF(pos7)
-	subb		t1,0,x1
-
-LSYM(neg9)
-	subi		1,x2,x2		/* negate and add 1 */
-	shd		0,x2,29,t1
-	shd		x2,0,29,t2
-	sub		t2,x2,x2
-	b		LREF(neg7_shift)
-	subb		t1,0,x1
-
-GSYM($$divU_9)
-	.export		$$divU_9,millicode
-	addi		1,x2,x2		/* can carry */
-	addc		0,0,x1
-	shd		x1,x2,29,t1
-	shd		x2,0,29,t2
-	sub		t2,x2,x2
-	b		LREF(pos7)
-	subb		t1,x1,x1
-
-/* DIVISION BY 14 (shift to divide by 2 then divide by 7) */
-GSYM($$divI_14)
-	.export		$$divI_14,millicode
-	comb,<,n	x2,0,LREF(neg14)
-GSYM($$divU_14)
-	.export		$$divU_14,millicode
-	b		LREF(7)		/* go to 7 case */
-	extru		x2,30,31,x2	/* divide by 2  */
-
-LSYM(neg14)
-	subi		2,x2,x2		/* negate (and add 2) */
-	b		LREF(8)
-	extru		x2,30,31,x2	/* divide by 2	      */
-	.exit
-	.procend
-	.end
-#endif
-
-#ifdef L_mulI
-/* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */
-/******************************************************************************
-This routine is used on PA2.0 processors when gcc -mno-fpregs is used
-
-ROUTINE:	$$mulI
-
-
-DESCRIPTION:	
-
-	$$mulI multiplies two single word integers, giving a single 
-	word result.  
-
-
-INPUT REGISTERS:
-
-	arg0 = Operand 1
-	arg1 = Operand 2
-	r31  == return pc
-	sr0  == return space when called externally 
-
-
-OUTPUT REGISTERS:
-
-	arg0 = undefined
-	arg1 = undefined
-	ret1 = result 
-
-OTHER REGISTERS AFFECTED:
-
-	r1   = undefined
-
-SIDE EFFECTS:
-
-	Causes a trap under the following conditions:  NONE
-	Changes memory at the following places:  NONE
-
-PERMISSIBLE CONTEXT:
-
-	Unwindable
-	Does not create a stack frame
-	Is usable for internal or external microcode
-
-DISCUSSION:
-
-	Calls other millicode routines via mrp:  NONE
-	Calls other millicode routines:  NONE
-
-***************************************************************************/
-
-
-#define	a0	%arg0
-#define	a1	%arg1
-#define	t0	%r1
-#define	r	%ret1
-
-#define	a0__128a0	zdep	a0,24,25,a0
-#define	a0__256a0	zdep	a0,23,24,a0
-#define	a1_ne_0_b_l0	comb,<>	a1,0,LREF(l0)
-#define	a1_ne_0_b_l1	comb,<>	a1,0,LREF(l1)
-#define	a1_ne_0_b_l2	comb,<>	a1,0,LREF(l2)
-#define	b_n_ret_t0	b,n	LREF(ret_t0)
-#define	b_e_shift	b	LREF(e_shift)
-#define	b_e_t0ma0	b	LREF(e_t0ma0)
-#define	b_e_t0		b	LREF(e_t0)
-#define	b_e_t0a0	b	LREF(e_t0a0)
-#define	b_e_t02a0	b	LREF(e_t02a0)
-#define	b_e_t04a0	b	LREF(e_t04a0)
-#define	b_e_2t0		b	LREF(e_2t0)
-#define	b_e_2t0a0	b	LREF(e_2t0a0)
-#define	b_e_2t04a0	b	LREF(e2t04a0)
-#define	b_e_3t0		b	LREF(e_3t0)
-#define	b_e_4t0		b	LREF(e_4t0)
-#define	b_e_4t0a0	b	LREF(e_4t0a0)
-#define	b_e_4t08a0	b	LREF(e4t08a0)
-#define	b_e_5t0		b	LREF(e_5t0)
-#define	b_e_8t0		b	LREF(e_8t0)
-#define	b_e_8t0a0	b	LREF(e_8t0a0)
-#define	r__r_a0		add	r,a0,r
-#define	r__r_2a0	sh1add	a0,r,r
-#define	r__r_4a0	sh2add	a0,r,r
-#define	r__r_8a0	sh3add	a0,r,r
-#define	r__r_t0		add	r,t0,r
-#define	r__r_2t0	sh1add	t0,r,r
-#define	r__r_4t0	sh2add	t0,r,r
-#define	r__r_8t0	sh3add	t0,r,r
-#define	t0__3a0		sh1add	a0,a0,t0
-#define	t0__4a0		sh2add	a0,0,t0
-#define	t0__5a0		sh2add	a0,a0,t0
-#define	t0__8a0		sh3add	a0,0,t0
-#define	t0__9a0		sh3add	a0,a0,t0
-#define	t0__16a0	zdep	a0,27,28,t0
-#define	t0__32a0	zdep	a0,26,27,t0
-#define	t0__64a0	zdep	a0,25,26,t0
-#define	t0__128a0	zdep	a0,24,25,t0
-#define	t0__t0ma0	sub	t0,a0,t0
-#define	t0__t0_a0	add	t0,a0,t0
-#define	t0__t0_2a0	sh1add	a0,t0,t0
-#define	t0__t0_4a0	sh2add	a0,t0,t0
-#define	t0__t0_8a0	sh3add	a0,t0,t0
-#define	t0__2t0_a0	sh1add	t0,a0,t0
-#define	t0__3t0		sh1add	t0,t0,t0
-#define	t0__4t0		sh2add	t0,0,t0
-#define	t0__4t0_a0	sh2add	t0,a0,t0
-#define	t0__5t0		sh2add	t0,t0,t0
-#define	t0__8t0		sh3add	t0,0,t0
-#define	t0__8t0_a0	sh3add	t0,a0,t0
-#define	t0__9t0		sh3add	t0,t0,t0
-#define	t0__16t0	zdep	t0,27,28,t0
-#define	t0__32t0	zdep	t0,26,27,t0
-#define	t0__256a0	zdep	a0,23,24,t0
-
-
-	SUBSPA_MILLI
-	ATTR_MILLI
-	.align 16
-	.proc
-	.callinfo millicode
-	.export $$mulI,millicode
-GSYM($$mulI)	
-	combt,<<=	a1,a0,LREF(l4)	/* swap args if unsigned a1>a0 */
-	copy		0,r		/* zero out the result */
-	xor		a0,a1,a0	/* swap a0 & a1 using the */
-	xor		a0,a1,a1	/*  old xor trick */
-	xor		a0,a1,a0
-LSYM(l4)
-	combt,<=	0,a0,LREF(l3)		/* if a0>=0 then proceed like unsigned */
-	zdep		a1,30,8,t0	/* t0 = (a1&0xff)<<1 ********* */
-	sub,>		0,a1,t0		/* otherwise negate both and */
-	combt,<=,n	a0,t0,LREF(l2)	/*  swap back if |a0|<|a1| */
-	sub		0,a0,a1
-	movb,tr,n	t0,a0,LREF(l2)	/* 10th inst.  */
-
-LSYM(l0)	r__r_t0				/* add in this partial product */
-LSYM(l1)	a0__256a0			/* a0 <<= 8 ****************** */
-LSYM(l2)	zdep		a1,30,8,t0	/* t0 = (a1&0xff)<<1 ********* */
-LSYM(l3)	blr		t0,0		/* case on these 8 bits ****** */
-		extru		a1,23,24,a1	/* a1 >>= 8 ****************** */
-
-/*16 insts before this.  */
-/*			  a0 <<= 8 ************************** */
-LSYM(x0)	a1_ne_0_b_l2	! a0__256a0	! MILLIRETN	! nop
-LSYM(x1)	a1_ne_0_b_l1	! r__r_a0	! MILLIRETN	! nop
-LSYM(x2)	a1_ne_0_b_l1	! r__r_2a0	! MILLIRETN	! nop
-LSYM(x3)	a1_ne_0_b_l0	! t0__3a0	! MILLIRET	! r__r_t0
-LSYM(x4)	a1_ne_0_b_l1	! r__r_4a0	! MILLIRETN	! nop
-LSYM(x5)	a1_ne_0_b_l0	! t0__5a0	! MILLIRET	! r__r_t0
-LSYM(x6)	t0__3a0		! a1_ne_0_b_l1	! r__r_2t0	! MILLIRETN
-LSYM(x7)	t0__3a0		! a1_ne_0_b_l0	! r__r_4a0	! b_n_ret_t0
-LSYM(x8)	a1_ne_0_b_l1	! r__r_8a0	! MILLIRETN	! nop
-LSYM(x9)	a1_ne_0_b_l0	! t0__9a0	! MILLIRET	! r__r_t0
-LSYM(x10)	t0__5a0		! a1_ne_0_b_l1	! r__r_2t0	! MILLIRETN
-LSYM(x11)	t0__3a0		! a1_ne_0_b_l0	! r__r_8a0	! b_n_ret_t0
-LSYM(x12)	t0__3a0		! a1_ne_0_b_l1	! r__r_4t0	! MILLIRETN
-LSYM(x13)	t0__5a0		! a1_ne_0_b_l0	! r__r_8a0	! b_n_ret_t0
-LSYM(x14)	t0__3a0		! t0__2t0_a0	! b_e_shift	! r__r_2t0
-LSYM(x15)	t0__5a0		! a1_ne_0_b_l0	! t0__3t0	! b_n_ret_t0
-LSYM(x16)	t0__16a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
-LSYM(x17)	t0__9a0		! a1_ne_0_b_l0	! t0__t0_8a0	! b_n_ret_t0
-LSYM(x18)	t0__9a0		! a1_ne_0_b_l1	! r__r_2t0	! MILLIRETN
-LSYM(x19)	t0__9a0		! a1_ne_0_b_l0	! t0__2t0_a0	! b_n_ret_t0
-LSYM(x20)	t0__5a0		! a1_ne_0_b_l1	! r__r_4t0	! MILLIRETN
-LSYM(x21)	t0__5a0		! a1_ne_0_b_l0	! t0__4t0_a0	! b_n_ret_t0
-LSYM(x22)	t0__5a0		! t0__2t0_a0	! b_e_shift	! r__r_2t0
-LSYM(x23)	t0__5a0		! t0__2t0_a0	! b_e_t0	! t0__2t0_a0
-LSYM(x24)	t0__3a0		! a1_ne_0_b_l1	! r__r_8t0	! MILLIRETN
-LSYM(x25)	t0__5a0		! a1_ne_0_b_l0	! t0__5t0	! b_n_ret_t0
-LSYM(x26)	t0__3a0		! t0__4t0_a0	! b_e_shift	! r__r_2t0
-LSYM(x27)	t0__3a0		! a1_ne_0_b_l0	! t0__9t0	! b_n_ret_t0
-LSYM(x28)	t0__3a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
-LSYM(x29)	t0__3a0		! t0__2t0_a0	! b_e_t0	! t0__4t0_a0
-LSYM(x30)	t0__5a0		! t0__3t0	! b_e_shift	! r__r_2t0
-LSYM(x31)	t0__32a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
-LSYM(x32)	t0__32a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
-LSYM(x33)	t0__8a0		! a1_ne_0_b_l0	! t0__4t0_a0	! b_n_ret_t0
-LSYM(x34)	t0__16a0	! t0__t0_a0	! b_e_shift	! r__r_2t0
-LSYM(x35)	t0__9a0		! t0__3t0	! b_e_t0	! t0__t0_8a0
-LSYM(x36)	t0__9a0		! a1_ne_0_b_l1	! r__r_4t0	! MILLIRETN
-LSYM(x37)	t0__9a0		! a1_ne_0_b_l0	! t0__4t0_a0	! b_n_ret_t0
-LSYM(x38)	t0__9a0		! t0__2t0_a0	! b_e_shift	! r__r_2t0
-LSYM(x39)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__2t0_a0
-LSYM(x40)	t0__5a0		! a1_ne_0_b_l1	! r__r_8t0	! MILLIRETN
-LSYM(x41)	t0__5a0		! a1_ne_0_b_l0	! t0__8t0_a0	! b_n_ret_t0
-LSYM(x42)	t0__5a0		! t0__4t0_a0	! b_e_shift	! r__r_2t0
-LSYM(x43)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__2t0_a0
-LSYM(x44)	t0__5a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
-LSYM(x45)	t0__9a0		! a1_ne_0_b_l0	! t0__5t0	! b_n_ret_t0
-LSYM(x46)	t0__9a0		! t0__5t0	! b_e_t0	! t0__t0_a0
-LSYM(x47)	t0__9a0		! t0__5t0	! b_e_t0	! t0__t0_2a0
-LSYM(x48)	t0__3a0		! a1_ne_0_b_l0	! t0__16t0	! b_n_ret_t0
-LSYM(x49)	t0__9a0		! t0__5t0	! b_e_t0	! t0__t0_4a0
-LSYM(x50)	t0__5a0		! t0__5t0	! b_e_shift	! r__r_2t0
-LSYM(x51)	t0__9a0		! t0__t0_8a0	! b_e_t0	! t0__3t0
-LSYM(x52)	t0__3a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
-LSYM(x53)	t0__3a0		! t0__4t0_a0	! b_e_t0	! t0__4t0_a0
-LSYM(x54)	t0__9a0		! t0__3t0	! b_e_shift	! r__r_2t0
-LSYM(x55)	t0__9a0		! t0__3t0	! b_e_t0	! t0__2t0_a0
-LSYM(x56)	t0__3a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
-LSYM(x57)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__3t0
-LSYM(x58)	t0__3a0		! t0__2t0_a0	! b_e_2t0	! t0__4t0_a0
-LSYM(x59)	t0__9a0		! t0__2t0_a0	! b_e_t02a0	! t0__3t0
-LSYM(x60)	t0__5a0		! t0__3t0	! b_e_shift	! r__r_4t0
-LSYM(x61)	t0__5a0		! t0__3t0	! b_e_t0	! t0__4t0_a0
-LSYM(x62)	t0__32a0	! t0__t0ma0	! b_e_shift	! r__r_2t0
-LSYM(x63)	t0__64a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
-LSYM(x64)	t0__64a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
-LSYM(x65)	t0__8a0		! a1_ne_0_b_l0	! t0__8t0_a0	! b_n_ret_t0
-LSYM(x66)	t0__32a0	! t0__t0_a0	! b_e_shift	! r__r_2t0
-LSYM(x67)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__2t0_a0
-LSYM(x68)	t0__8a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
-LSYM(x69)	t0__8a0		! t0__2t0_a0	! b_e_t0	! t0__4t0_a0
-LSYM(x70)	t0__64a0	! t0__t0_4a0	! b_e_t0	! t0__t0_2a0
-LSYM(x71)	t0__9a0		! t0__8t0	! b_e_t0	! t0__t0ma0
-LSYM(x72)	t0__9a0		! a1_ne_0_b_l1	! r__r_8t0	! MILLIRETN
-LSYM(x73)	t0__9a0		! t0__8t0_a0	! b_e_shift	! r__r_t0
-LSYM(x74)	t0__9a0		! t0__4t0_a0	! b_e_shift	! r__r_2t0
-LSYM(x75)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__2t0_a0
-LSYM(x76)	t0__9a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
-LSYM(x77)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__4t0_a0
-LSYM(x78)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__2t0_a0
-LSYM(x79)	t0__16a0	! t0__5t0	! b_e_t0	! t0__t0ma0
-LSYM(x80)	t0__16a0	! t0__5t0	! b_e_shift	! r__r_t0
-LSYM(x81)	t0__9a0		! t0__9t0	! b_e_shift	! r__r_t0
-LSYM(x82)	t0__5a0		! t0__8t0_a0	! b_e_shift	! r__r_2t0
-LSYM(x83)	t0__5a0		! t0__8t0_a0	! b_e_t0	! t0__2t0_a0
-LSYM(x84)	t0__5a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
-LSYM(x85)	t0__8a0		! t0__2t0_a0	! b_e_t0	! t0__5t0
-LSYM(x86)	t0__5a0		! t0__4t0_a0	! b_e_2t0	! t0__2t0_a0
-LSYM(x87)	t0__9a0		! t0__9t0	! b_e_t02a0	! t0__t0_4a0
-LSYM(x88)	t0__5a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
-LSYM(x89)	t0__5a0		! t0__2t0_a0	! b_e_t0	! t0__8t0_a0
-LSYM(x90)	t0__9a0		! t0__5t0	! b_e_shift	! r__r_2t0
-LSYM(x91)	t0__9a0		! t0__5t0	! b_e_t0	! t0__2t0_a0
-LSYM(x92)	t0__5a0		! t0__2t0_a0	! b_e_4t0	! t0__2t0_a0
-LSYM(x93)	t0__32a0	! t0__t0ma0	! b_e_t0	! t0__3t0
-LSYM(x94)	t0__9a0		! t0__5t0	! b_e_2t0	! t0__t0_2a0
-LSYM(x95)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__5t0
-LSYM(x96)	t0__8a0		! t0__3t0	! b_e_shift	! r__r_4t0
-LSYM(x97)	t0__8a0		! t0__3t0	! b_e_t0	! t0__4t0_a0
-LSYM(x98)	t0__32a0	! t0__3t0	! b_e_t0	! t0__t0_2a0
-LSYM(x99)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__3t0
-LSYM(x100)	t0__5a0		! t0__5t0	! b_e_shift	! r__r_4t0
-LSYM(x101)	t0__5a0		! t0__5t0	! b_e_t0	! t0__4t0_a0
-LSYM(x102)	t0__32a0	! t0__t0_2a0	! b_e_t0	! t0__3t0
-LSYM(x103)	t0__5a0		! t0__5t0	! b_e_t02a0	! t0__4t0_a0
-LSYM(x104)	t0__3a0		! t0__4t0_a0	! b_e_shift	! r__r_8t0
-LSYM(x105)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__5t0
-LSYM(x106)	t0__3a0		! t0__4t0_a0	! b_e_2t0	! t0__4t0_a0
-LSYM(x107)	t0__9a0		! t0__t0_4a0	! b_e_t02a0	! t0__8t0_a0
-LSYM(x108)	t0__9a0		! t0__3t0	! b_e_shift	! r__r_4t0
-LSYM(x109)	t0__9a0		! t0__3t0	! b_e_t0	! t0__4t0_a0
-LSYM(x110)	t0__9a0		! t0__3t0	! b_e_2t0	! t0__2t0_a0
-LSYM(x111)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__3t0
-LSYM(x112)	t0__3a0		! t0__2t0_a0	! b_e_t0	! t0__16t0
-LSYM(x113)	t0__9a0		! t0__4t0_a0	! b_e_t02a0	! t0__3t0
-LSYM(x114)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__3t0
-LSYM(x115)	t0__9a0		! t0__2t0_a0	! b_e_2t0a0	! t0__3t0
-LSYM(x116)	t0__3a0		! t0__2t0_a0	! b_e_4t0	! t0__4t0_a0
-LSYM(x117)	t0__3a0		! t0__4t0_a0	! b_e_t0	! t0__9t0
-LSYM(x118)	t0__3a0		! t0__4t0_a0	! b_e_t0a0	! t0__9t0
-LSYM(x119)	t0__3a0		! t0__4t0_a0	! b_e_t02a0	! t0__9t0
-LSYM(x120)	t0__5a0		! t0__3t0	! b_e_shift	! r__r_8t0
-LSYM(x121)	t0__5a0		! t0__3t0	! b_e_t0	! t0__8t0_a0
-LSYM(x122)	t0__5a0		! t0__3t0	! b_e_2t0	! t0__4t0_a0
-LSYM(x123)	t0__5a0		! t0__8t0_a0	! b_e_t0	! t0__3t0
-LSYM(x124)	t0__32a0	! t0__t0ma0	! b_e_shift	! r__r_4t0
-LSYM(x125)	t0__5a0		! t0__5t0	! b_e_t0	! t0__5t0
-LSYM(x126)	t0__64a0	! t0__t0ma0	! b_e_shift	! r__r_2t0
-LSYM(x127)	t0__128a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
-LSYM(x128)	t0__128a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
-LSYM(x129)	t0__128a0	! a1_ne_0_b_l0	! t0__t0_a0	! b_n_ret_t0
-LSYM(x130)	t0__64a0	! t0__t0_a0	! b_e_shift	! r__r_2t0
-LSYM(x131)	t0__8a0		! t0__8t0_a0	! b_e_t0	! t0__2t0_a0
-LSYM(x132)	t0__8a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
-LSYM(x133)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__4t0_a0
-LSYM(x134)	t0__8a0		! t0__4t0_a0	! b_e_2t0	! t0__2t0_a0
-LSYM(x135)	t0__9a0		! t0__5t0	! b_e_t0	! t0__3t0
-LSYM(x136)	t0__8a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
-LSYM(x137)	t0__8a0		! t0__2t0_a0	! b_e_t0	! t0__8t0_a0
-LSYM(x138)	t0__8a0		! t0__2t0_a0	! b_e_2t0	! t0__4t0_a0
-LSYM(x139)	t0__8a0		! t0__2t0_a0	! b_e_2t0a0	! t0__4t0_a0
-LSYM(x140)	t0__3a0		! t0__2t0_a0	! b_e_4t0	! t0__5t0
-LSYM(x141)	t0__8a0		! t0__2t0_a0	! b_e_4t0a0	! t0__2t0_a0
-LSYM(x142)	t0__9a0		! t0__8t0	! b_e_2t0	! t0__t0ma0
-LSYM(x143)	t0__16a0	! t0__9t0	! b_e_t0	! t0__t0ma0
-LSYM(x144)	t0__9a0		! t0__8t0	! b_e_shift	! r__r_2t0
-LSYM(x145)	t0__9a0		! t0__8t0	! b_e_t0	! t0__2t0_a0
-LSYM(x146)	t0__9a0		! t0__8t0_a0	! b_e_shift	! r__r_2t0
-LSYM(x147)	t0__9a0		! t0__8t0_a0	! b_e_t0	! t0__2t0_a0
-LSYM(x148)	t0__9a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
-LSYM(x149)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__4t0_a0
-LSYM(x150)	t0__9a0		! t0__4t0_a0	! b_e_2t0	! t0__2t0_a0
-LSYM(x151)	t0__9a0		! t0__4t0_a0	! b_e_2t0a0	! t0__2t0_a0
-LSYM(x152)	t0__9a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
-LSYM(x153)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__8t0_a0
-LSYM(x154)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__4t0_a0
-LSYM(x155)	t0__32a0	! t0__t0ma0	! b_e_t0	! t0__5t0
-LSYM(x156)	t0__9a0		! t0__2t0_a0	! b_e_4t0	! t0__2t0_a0
-LSYM(x157)	t0__32a0	! t0__t0ma0	! b_e_t02a0	! t0__5t0
-LSYM(x158)	t0__16a0	! t0__5t0	! b_e_2t0	! t0__t0ma0
-LSYM(x159)	t0__32a0	! t0__5t0	! b_e_t0	! t0__t0ma0
-LSYM(x160)	t0__5a0		! t0__4t0	! b_e_shift	! r__r_8t0
-LSYM(x161)	t0__8a0		! t0__5t0	! b_e_t0	! t0__4t0_a0
-LSYM(x162)	t0__9a0		! t0__9t0	! b_e_shift	! r__r_2t0
-LSYM(x163)	t0__9a0		! t0__9t0	! b_e_t0	! t0__2t0_a0
-LSYM(x164)	t0__5a0		! t0__8t0_a0	! b_e_shift	! r__r_4t0
-LSYM(x165)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__5t0
-LSYM(x166)	t0__5a0		! t0__8t0_a0	! b_e_2t0	! t0__2t0_a0
-LSYM(x167)	t0__5a0		! t0__8t0_a0	! b_e_2t0a0	! t0__2t0_a0
-LSYM(x168)	t0__5a0		! t0__4t0_a0	! b_e_shift	! r__r_8t0
-LSYM(x169)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__8t0_a0
-LSYM(x170)	t0__32a0	! t0__t0_2a0	! b_e_t0	! t0__5t0
-LSYM(x171)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__9t0
-LSYM(x172)	t0__5a0		! t0__4t0_a0	! b_e_4t0	! t0__2t0_a0
-LSYM(x173)	t0__9a0		! t0__2t0_a0	! b_e_t02a0	! t0__9t0
-LSYM(x174)	t0__32a0	! t0__t0_2a0	! b_e_t04a0	! t0__5t0
-LSYM(x175)	t0__8a0		! t0__2t0_a0	! b_e_5t0	! t0__2t0_a0
-LSYM(x176)	t0__5a0		! t0__4t0_a0	! b_e_8t0	! t0__t0_a0
-LSYM(x177)	t0__5a0		! t0__4t0_a0	! b_e_8t0a0	! t0__t0_a0
-LSYM(x178)	t0__5a0		! t0__2t0_a0	! b_e_2t0	! t0__8t0_a0
-LSYM(x179)	t0__5a0		! t0__2t0_a0	! b_e_2t0a0	! t0__8t0_a0
-LSYM(x180)	t0__9a0		! t0__5t0	! b_e_shift	! r__r_4t0
-LSYM(x181)	t0__9a0		! t0__5t0	! b_e_t0	! t0__4t0_a0
-LSYM(x182)	t0__9a0		! t0__5t0	! b_e_2t0	! t0__2t0_a0
-LSYM(x183)	t0__9a0		! t0__5t0	! b_e_2t0a0	! t0__2t0_a0
-LSYM(x184)	t0__5a0		! t0__9t0	! b_e_4t0	! t0__t0_a0
-LSYM(x185)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__5t0
-LSYM(x186)	t0__32a0	! t0__t0ma0	! b_e_2t0	! t0__3t0
-LSYM(x187)	t0__9a0		! t0__4t0_a0	! b_e_t02a0	! t0__5t0
-LSYM(x188)	t0__9a0		! t0__5t0	! b_e_4t0	! t0__t0_2a0
-LSYM(x189)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__9t0
-LSYM(x190)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__5t0
-LSYM(x191)	t0__64a0	! t0__3t0	! b_e_t0	! t0__t0ma0
-LSYM(x192)	t0__8a0		! t0__3t0	! b_e_shift	! r__r_8t0
-LSYM(x193)	t0__8a0		! t0__3t0	! b_e_t0	! t0__8t0_a0
-LSYM(x194)	t0__8a0		! t0__3t0	! b_e_2t0	! t0__4t0_a0
-LSYM(x195)	t0__8a0		! t0__8t0_a0	! b_e_t0	! t0__3t0
-LSYM(x196)	t0__8a0		! t0__3t0	! b_e_4t0	! t0__2t0_a0
-LSYM(x197)	t0__8a0		! t0__3t0	! b_e_4t0a0	! t0__2t0_a0
-LSYM(x198)	t0__64a0	! t0__t0_2a0	! b_e_t0	! t0__3t0
-LSYM(x199)	t0__8a0		! t0__4t0_a0	! b_e_2t0a0	! t0__3t0
-LSYM(x200)	t0__5a0		! t0__5t0	! b_e_shift	! r__r_8t0
-LSYM(x201)	t0__5a0		! t0__5t0	! b_e_t0	! t0__8t0_a0
-LSYM(x202)	t0__5a0		! t0__5t0	! b_e_2t0	! t0__4t0_a0
-LSYM(x203)	t0__5a0		! t0__5t0	! b_e_2t0a0	! t0__4t0_a0
-LSYM(x204)	t0__8a0		! t0__2t0_a0	! b_e_4t0	! t0__3t0
-LSYM(x205)	t0__5a0		! t0__8t0_a0	! b_e_t0	! t0__5t0
-LSYM(x206)	t0__64a0	! t0__t0_4a0	! b_e_t02a0	! t0__3t0
-LSYM(x207)	t0__8a0		! t0__2t0_a0	! b_e_3t0	! t0__4t0_a0
-LSYM(x208)	t0__5a0		! t0__5t0	! b_e_8t0	! t0__t0_a0
-LSYM(x209)	t0__5a0		! t0__5t0	! b_e_8t0a0	! t0__t0_a0
-LSYM(x210)	t0__5a0		! t0__4t0_a0	! b_e_2t0	! t0__5t0
-LSYM(x211)	t0__5a0		! t0__4t0_a0	! b_e_2t0a0	! t0__5t0
-LSYM(x212)	t0__3a0		! t0__4t0_a0	! b_e_4t0	! t0__4t0_a0
-LSYM(x213)	t0__3a0		! t0__4t0_a0	! b_e_4t0a0	! t0__4t0_a0
-LSYM(x214)	t0__9a0		! t0__t0_4a0	! b_e_2t04a0	! t0__8t0_a0
-LSYM(x215)	t0__5a0		! t0__4t0_a0	! b_e_5t0	! t0__2t0_a0
-LSYM(x216)	t0__9a0		! t0__3t0	! b_e_shift	! r__r_8t0
-LSYM(x217)	t0__9a0		! t0__3t0	! b_e_t0	! t0__8t0_a0
-LSYM(x218)	t0__9a0		! t0__3t0	! b_e_2t0	! t0__4t0_a0
-LSYM(x219)	t0__9a0		! t0__8t0_a0	! b_e_t0	! t0__3t0
-LSYM(x220)	t0__3a0		! t0__9t0	! b_e_4t0	! t0__2t0_a0
-LSYM(x221)	t0__3a0		! t0__9t0	! b_e_4t0a0	! t0__2t0_a0
-LSYM(x222)	t0__9a0		! t0__4t0_a0	! b_e_2t0	! t0__3t0
-LSYM(x223)	t0__9a0		! t0__4t0_a0	! b_e_2t0a0	! t0__3t0
-LSYM(x224)	t0__9a0		! t0__3t0	! b_e_8t0	! t0__t0_a0
-LSYM(x225)	t0__9a0		! t0__5t0	! b_e_t0	! t0__5t0
-LSYM(x226)	t0__3a0		! t0__2t0_a0	! b_e_t02a0	! t0__32t0
-LSYM(x227)	t0__9a0		! t0__5t0	! b_e_t02a0	! t0__5t0
-LSYM(x228)	t0__9a0		! t0__2t0_a0	! b_e_4t0	! t0__3t0
-LSYM(x229)	t0__9a0		! t0__2t0_a0	! b_e_4t0a0	! t0__3t0
-LSYM(x230)	t0__9a0		! t0__5t0	! b_e_5t0	! t0__t0_a0
-LSYM(x231)	t0__9a0		! t0__2t0_a0	! b_e_3t0	! t0__4t0_a0
-LSYM(x232)	t0__3a0		! t0__2t0_a0	! b_e_8t0	! t0__4t0_a0
-LSYM(x233)	t0__3a0		! t0__2t0_a0	! b_e_8t0a0	! t0__4t0_a0
-LSYM(x234)	t0__3a0		! t0__4t0_a0	! b_e_2t0	! t0__9t0
-LSYM(x235)	t0__3a0		! t0__4t0_a0	! b_e_2t0a0	! t0__9t0
-LSYM(x236)	t0__9a0		! t0__2t0_a0	! b_e_4t08a0	! t0__3t0
-LSYM(x237)	t0__16a0	! t0__5t0	! b_e_3t0	! t0__t0ma0
-LSYM(x238)	t0__3a0		! t0__4t0_a0	! b_e_2t04a0	! t0__9t0
-LSYM(x239)	t0__16a0	! t0__5t0	! b_e_t0ma0	! t0__3t0
-LSYM(x240)	t0__9a0		! t0__t0_a0	! b_e_8t0	! t0__3t0
-LSYM(x241)	t0__9a0		! t0__t0_a0	! b_e_8t0a0	! t0__3t0
-LSYM(x242)	t0__5a0		! t0__3t0	! b_e_2t0	! t0__8t0_a0
-LSYM(x243)	t0__9a0		! t0__9t0	! b_e_t0	! t0__3t0
-LSYM(x244)	t0__5a0		! t0__3t0	! b_e_4t0	! t0__4t0_a0
-LSYM(x245)	t0__8a0		! t0__3t0	! b_e_5t0	! t0__2t0_a0
-LSYM(x246)	t0__5a0		! t0__8t0_a0	! b_e_2t0	! t0__3t0
-LSYM(x247)	t0__5a0		! t0__8t0_a0	! b_e_2t0a0	! t0__3t0
-LSYM(x248)	t0__32a0	! t0__t0ma0	! b_e_shift	! r__r_8t0
-LSYM(x249)	t0__32a0	! t0__t0ma0	! b_e_t0	! t0__8t0_a0
-LSYM(x250)	t0__5a0		! t0__5t0	! b_e_2t0	! t0__5t0
-LSYM(x251)	t0__5a0		! t0__5t0	! b_e_2t0a0	! t0__5t0
-LSYM(x252)	t0__64a0	! t0__t0ma0	! b_e_shift	! r__r_4t0
-LSYM(x253)	t0__64a0	! t0__t0ma0	! b_e_t0	! t0__4t0_a0
-LSYM(x254)	t0__128a0	! t0__t0ma0	! b_e_shift	! r__r_2t0
-LSYM(x255)	t0__256a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
-/*1040 insts before this.  */
-LSYM(ret_t0)	MILLIRET
-LSYM(e_t0)	r__r_t0
-LSYM(e_shift)	a1_ne_0_b_l2
-	a0__256a0	/* a0 <<= 8 *********** */
-	MILLIRETN
-LSYM(e_t0ma0)	a1_ne_0_b_l0
-	t0__t0ma0
-	MILLIRET
-	r__r_t0
-LSYM(e_t0a0)	a1_ne_0_b_l0
-	t0__t0_a0
-	MILLIRET
-	r__r_t0
-LSYM(e_t02a0)	a1_ne_0_b_l0
-	t0__t0_2a0
-	MILLIRET
-	r__r_t0
-LSYM(e_t04a0)	a1_ne_0_b_l0
-	t0__t0_4a0
-	MILLIRET
-	r__r_t0
-LSYM(e_2t0)	a1_ne_0_b_l1
-	r__r_2t0
-	MILLIRETN
-LSYM(e_2t0a0)	a1_ne_0_b_l0
-	t0__2t0_a0
-	MILLIRET
-	r__r_t0
-LSYM(e2t04a0)	t0__t0_2a0
-	a1_ne_0_b_l1
-	r__r_2t0
-	MILLIRETN
-LSYM(e_3t0)	a1_ne_0_b_l0
-	t0__3t0
-	MILLIRET
-	r__r_t0
-LSYM(e_4t0)	a1_ne_0_b_l1
-	r__r_4t0
-	MILLIRETN
-LSYM(e_4t0a0)	a1_ne_0_b_l0
-	t0__4t0_a0
-	MILLIRET
-	r__r_t0
-LSYM(e4t08a0)	t0__t0_2a0
-	a1_ne_0_b_l1
-	r__r_4t0
-	MILLIRETN
-LSYM(e_5t0)	a1_ne_0_b_l0
-	t0__5t0
-	MILLIRET
-	r__r_t0
-LSYM(e_8t0)	a1_ne_0_b_l1
-	r__r_8t0
-	MILLIRETN
-LSYM(e_8t0a0)	a1_ne_0_b_l0
-	t0__8t0_a0
-	MILLIRET
-	r__r_t0
-
-	.procend
-	.end
-#endif
diff --git a/gcc/config/pa/t-linux b/gcc/config/pa/t-linux
index df351e11458..b94ebd250a8 100644
--- a/gcc/config/pa/t-linux
+++ b/gcc/config/pa/t-linux
@@ -16,13 +16,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-#Plug millicode routines into libgcc.a  We want these on both native and
-#cross compiles.  We use the "64-bit" routines because the "32-bit" code
-#is broken for certain corner cases.
-
-LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI _dyncall
-LIB1ASMSRC = pa/milli64.S
-
 # Compile libgcc2.a as PIC.
 TARGET_LIBGCC2_CFLAGS = -fPIC -DELF=1 -DLINUX=1
 
diff --git a/gcc/config/pa/t-linux64 b/gcc/config/pa/t-linux64
index d40546cabcc..af803a27ed3 100644
--- a/gcc/config/pa/t-linux64
+++ b/gcc/config/pa/t-linux64
@@ -16,12 +16,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-#Plug millicode routines into libgcc.a  We want these on both native and
-#cross compiles.
-
-LIB1ASMFUNCS =  _divI _divU _remI _remU _div_const _mulI
-LIB1ASMSRC = pa/milli64.S
-
 LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/pa/linux-atomic.c
 
 # Compile libgcc2.a as PIC.
diff --git a/gcc/config/picochip/libgccExtras/fake_libgcc.asm b/gcc/config/picochip/libgccExtras/fake_libgcc.asm
deleted file mode 100644
index e4b78f1e1f1..00000000000
--- a/gcc/config/picochip/libgccExtras/fake_libgcc.asm
+++ /dev/null
@@ -1,6 +0,0 @@
-// picoChip ASM file
-// Fake libgcc asm file. This contains nothing, but is used to prevent gcc
-// getting upset about the lack of a libgcc.S file when LIB1ASMFUNCS is defined
-// to switch off the compilation of parts of libgcc.
-
-
diff --git a/gcc/config/picochip/t-picochip b/gcc/config/picochip/t-picochip
index 222d7a646b9..0f3fe8c3d81 100644
--- a/gcc/config/picochip/t-picochip
+++ b/gcc/config/picochip/t-picochip
@@ -35,14 +35,6 @@ LIB2FUNCS_EXTRA = \
 	$(srcdir)/config/picochip/libgccExtras/parityhi2.asm		\
 	$(srcdir)/config/picochip/libgccExtras/popcounthi2.asm
 
-# Prevent some of the more complicated libgcc functions from being
-# compiled. This is because they are generally too big to fit into an
-# AE anyway, so there is no point in having them. Also, some don't
-# compile properly so we'll ignore them for the moment.
-
-LIB1ASMFUNCS = _mulsc3 _divsc3
-LIB1ASMSRC = picochip/libgccExtras/fake_libgcc.asm
-
 # Turn off ranlib on target libraries.
 RANLIB_FOR_TARGET = cat
 
diff --git a/gcc/config/sh/lib1funcs.asm b/gcc/config/sh/lib1funcs.asm
deleted file mode 100644
index 2f0ca16cd91..00000000000
--- a/gcc/config/sh/lib1funcs.asm
+++ /dev/null
@@ -1,3933 +0,0 @@
-/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005, 2006, 2009
-   Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-
-!! libgcc routines for the Renesas / SuperH SH CPUs.
-!! Contributed by Steve Chamberlain.
-!! sac@cygnus.com
-
-!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
-!! recoded in assembly by Toshiyasu Morita
-!! tm@netcom.com
-
-#if defined(__ELF__) && defined(__linux__)
-.section .note.GNU-stack,"",%progbits
-.previous
-#endif
-
-/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
-   ELF local label prefixes by J"orn Rennecke
-   amylaar@cygnus.com  */
-
-#include "lib1funcs.h"
-
-/* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
-   so it is more convenient to define NO_FPSCR_VALUES here than to
-   define it on the command line.  */
-#if defined __vxworks && defined __PIC__
-#define NO_FPSCR_VALUES
-#endif
-	
-#if ! __SH5__
-#ifdef L_ashiftrt
-	.global	GLOBAL(ashiftrt_r4_0)
-	.global	GLOBAL(ashiftrt_r4_1)
-	.global	GLOBAL(ashiftrt_r4_2)
-	.global	GLOBAL(ashiftrt_r4_3)
-	.global	GLOBAL(ashiftrt_r4_4)
-	.global	GLOBAL(ashiftrt_r4_5)
-	.global	GLOBAL(ashiftrt_r4_6)
-	.global	GLOBAL(ashiftrt_r4_7)
-	.global	GLOBAL(ashiftrt_r4_8)
-	.global	GLOBAL(ashiftrt_r4_9)
-	.global	GLOBAL(ashiftrt_r4_10)
-	.global	GLOBAL(ashiftrt_r4_11)
-	.global	GLOBAL(ashiftrt_r4_12)
-	.global	GLOBAL(ashiftrt_r4_13)
-	.global	GLOBAL(ashiftrt_r4_14)
-	.global	GLOBAL(ashiftrt_r4_15)
-	.global	GLOBAL(ashiftrt_r4_16)
-	.global	GLOBAL(ashiftrt_r4_17)
-	.global	GLOBAL(ashiftrt_r4_18)
-	.global	GLOBAL(ashiftrt_r4_19)
-	.global	GLOBAL(ashiftrt_r4_20)
-	.global	GLOBAL(ashiftrt_r4_21)
-	.global	GLOBAL(ashiftrt_r4_22)
-	.global	GLOBAL(ashiftrt_r4_23)
-	.global	GLOBAL(ashiftrt_r4_24)
-	.global	GLOBAL(ashiftrt_r4_25)
-	.global	GLOBAL(ashiftrt_r4_26)
-	.global	GLOBAL(ashiftrt_r4_27)
-	.global	GLOBAL(ashiftrt_r4_28)
-	.global	GLOBAL(ashiftrt_r4_29)
-	.global	GLOBAL(ashiftrt_r4_30)
-	.global	GLOBAL(ashiftrt_r4_31)
-	.global	GLOBAL(ashiftrt_r4_32)
-
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
-
-	.align	1
-GLOBAL(ashiftrt_r4_32):
-GLOBAL(ashiftrt_r4_31):
-	rotcl	r4
-	rts
-	subc	r4,r4
-
-GLOBAL(ashiftrt_r4_30):
-	shar	r4
-GLOBAL(ashiftrt_r4_29):
-	shar	r4
-GLOBAL(ashiftrt_r4_28):
-	shar	r4
-GLOBAL(ashiftrt_r4_27):
-	shar	r4
-GLOBAL(ashiftrt_r4_26):
-	shar	r4
-GLOBAL(ashiftrt_r4_25):
-	shar	r4
-GLOBAL(ashiftrt_r4_24):
-	shlr16	r4
-	shlr8	r4
-	rts
-	exts.b	r4,r4
-
-GLOBAL(ashiftrt_r4_23):
-	shar	r4
-GLOBAL(ashiftrt_r4_22):
-	shar	r4
-GLOBAL(ashiftrt_r4_21):
-	shar	r4
-GLOBAL(ashiftrt_r4_20):
-	shar	r4
-GLOBAL(ashiftrt_r4_19):
-	shar	r4
-GLOBAL(ashiftrt_r4_18):
-	shar	r4
-GLOBAL(ashiftrt_r4_17):
-	shar	r4
-GLOBAL(ashiftrt_r4_16):
-	shlr16	r4
-	rts
-	exts.w	r4,r4
-
-GLOBAL(ashiftrt_r4_15):
-	shar	r4
-GLOBAL(ashiftrt_r4_14):
-	shar	r4
-GLOBAL(ashiftrt_r4_13):
-	shar	r4
-GLOBAL(ashiftrt_r4_12):
-	shar	r4
-GLOBAL(ashiftrt_r4_11):
-	shar	r4
-GLOBAL(ashiftrt_r4_10):
-	shar	r4
-GLOBAL(ashiftrt_r4_9):
-	shar	r4
-GLOBAL(ashiftrt_r4_8):
-	shar	r4
-GLOBAL(ashiftrt_r4_7):
-	shar	r4
-GLOBAL(ashiftrt_r4_6):
-	shar	r4
-GLOBAL(ashiftrt_r4_5):
-	shar	r4
-GLOBAL(ashiftrt_r4_4):
-	shar	r4
-GLOBAL(ashiftrt_r4_3):
-	shar	r4
-GLOBAL(ashiftrt_r4_2):
-	shar	r4
-GLOBAL(ashiftrt_r4_1):
-	rts
-	shar	r4
-
-GLOBAL(ashiftrt_r4_0):
-	rts
-	nop
-
-	ENDFUNC(GLOBAL(ashiftrt_r4_0))
-	ENDFUNC(GLOBAL(ashiftrt_r4_1))
-	ENDFUNC(GLOBAL(ashiftrt_r4_2))
-	ENDFUNC(GLOBAL(ashiftrt_r4_3))
-	ENDFUNC(GLOBAL(ashiftrt_r4_4))
-	ENDFUNC(GLOBAL(ashiftrt_r4_5))
-	ENDFUNC(GLOBAL(ashiftrt_r4_6))
-	ENDFUNC(GLOBAL(ashiftrt_r4_7))
-	ENDFUNC(GLOBAL(ashiftrt_r4_8))
-	ENDFUNC(GLOBAL(ashiftrt_r4_9))
-	ENDFUNC(GLOBAL(ashiftrt_r4_10))
-	ENDFUNC(GLOBAL(ashiftrt_r4_11))
-	ENDFUNC(GLOBAL(ashiftrt_r4_12))
-	ENDFUNC(GLOBAL(ashiftrt_r4_13))
-	ENDFUNC(GLOBAL(ashiftrt_r4_14))
-	ENDFUNC(GLOBAL(ashiftrt_r4_15))
-	ENDFUNC(GLOBAL(ashiftrt_r4_16))
-	ENDFUNC(GLOBAL(ashiftrt_r4_17))
-	ENDFUNC(GLOBAL(ashiftrt_r4_18))
-	ENDFUNC(GLOBAL(ashiftrt_r4_19))
-	ENDFUNC(GLOBAL(ashiftrt_r4_20))
-	ENDFUNC(GLOBAL(ashiftrt_r4_21))
-	ENDFUNC(GLOBAL(ashiftrt_r4_22))
-	ENDFUNC(GLOBAL(ashiftrt_r4_23))
-	ENDFUNC(GLOBAL(ashiftrt_r4_24))
-	ENDFUNC(GLOBAL(ashiftrt_r4_25))
-	ENDFUNC(GLOBAL(ashiftrt_r4_26))
-	ENDFUNC(GLOBAL(ashiftrt_r4_27))
-	ENDFUNC(GLOBAL(ashiftrt_r4_28))
-	ENDFUNC(GLOBAL(ashiftrt_r4_29))
-	ENDFUNC(GLOBAL(ashiftrt_r4_30))
-	ENDFUNC(GLOBAL(ashiftrt_r4_31))
-	ENDFUNC(GLOBAL(ashiftrt_r4_32))
-#endif
-
-#ifdef L_ashiftrt_n
-
-!
-! GLOBAL(ashrsi3)
-!
-! Entry:
-!
-! r4: Value to shift
-! r5: Shifts
-!
-! Exit:
-!
-! r0: Result
-!
-! Destroys:
-!
-! (none)
-!
-
-	.global	GLOBAL(ashrsi3)
-	HIDDEN_FUNC(GLOBAL(ashrsi3))
-	.align	2
-GLOBAL(ashrsi3):
-	mov	#31,r0
-	and	r0,r5
-	mova	LOCAL(ashrsi3_table),r0
-	mov.b	@(r0,r5),r5
-#ifdef __sh1__
-	add	r5,r0
-	jmp	@r0
-#else
-	braf	r5
-#endif
-	mov	r4,r0
-
-	.align	2
-LOCAL(ashrsi3_table):
-	.byte		LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
-
-LOCAL(ashrsi3_31):
-	rotcl	r0
-	rts
-	subc	r0,r0
-
-LOCAL(ashrsi3_30):
-	shar	r0
-LOCAL(ashrsi3_29):
-	shar	r0
-LOCAL(ashrsi3_28):
-	shar	r0
-LOCAL(ashrsi3_27):
-	shar	r0
-LOCAL(ashrsi3_26):
-	shar	r0
-LOCAL(ashrsi3_25):
-	shar	r0
-LOCAL(ashrsi3_24):
-	shlr16	r0
-	shlr8	r0
-	rts
-	exts.b	r0,r0
-
-LOCAL(ashrsi3_23):
-	shar	r0
-LOCAL(ashrsi3_22):
-	shar	r0
-LOCAL(ashrsi3_21):
-	shar	r0
-LOCAL(ashrsi3_20):
-	shar	r0
-LOCAL(ashrsi3_19):
-	shar	r0
-LOCAL(ashrsi3_18):
-	shar	r0
-LOCAL(ashrsi3_17):
-	shar	r0
-LOCAL(ashrsi3_16):
-	shlr16	r0
-	rts
-	exts.w	r0,r0
-
-LOCAL(ashrsi3_15):
-	shar	r0
-LOCAL(ashrsi3_14):
-	shar	r0
-LOCAL(ashrsi3_13):
-	shar	r0
-LOCAL(ashrsi3_12):
-	shar	r0
-LOCAL(ashrsi3_11):
-	shar	r0
-LOCAL(ashrsi3_10):
-	shar	r0
-LOCAL(ashrsi3_9):
-	shar	r0
-LOCAL(ashrsi3_8):
-	shar	r0
-LOCAL(ashrsi3_7):
-	shar	r0
-LOCAL(ashrsi3_6):
-	shar	r0
-LOCAL(ashrsi3_5):
-	shar	r0
-LOCAL(ashrsi3_4):
-	shar	r0
-LOCAL(ashrsi3_3):
-	shar	r0
-LOCAL(ashrsi3_2):
-	shar	r0
-LOCAL(ashrsi3_1):
-	rts
-	shar	r0
-
-LOCAL(ashrsi3_0):
-	rts
-	nop
-
-	ENDFUNC(GLOBAL(ashrsi3))
-#endif
-
-#ifdef L_ashiftlt
-
-!
-! GLOBAL(ashlsi3)
-!
-! Entry:
-!
-! r4: Value to shift
-! r5: Shifts
-!
-! Exit:
-!
-! r0: Result
-!
-! Destroys:
-!
-! (none)
-!
-	.global	GLOBAL(ashlsi3)
-	HIDDEN_FUNC(GLOBAL(ashlsi3))
-	.align	2
-GLOBAL(ashlsi3):
-	mov	#31,r0
-	and	r0,r5
-	mova	LOCAL(ashlsi3_table),r0
-	mov.b	@(r0,r5),r5
-#ifdef __sh1__
-	add	r5,r0
-	jmp	@r0
-#else
-	braf	r5
-#endif
-	mov	r4,r0
-
-	.align	2
-LOCAL(ashlsi3_table):
-	.byte		LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
-
-LOCAL(ashlsi3_6):
-	shll2	r0
-LOCAL(ashlsi3_4):
-	shll2	r0
-LOCAL(ashlsi3_2):
-	rts
-	shll2	r0
-
-LOCAL(ashlsi3_7):
-	shll2	r0
-LOCAL(ashlsi3_5):
-	shll2	r0
-LOCAL(ashlsi3_3):
-	shll2	r0
-LOCAL(ashlsi3_1):
-	rts
-	shll	r0
-
-LOCAL(ashlsi3_14):
-	shll2	r0
-LOCAL(ashlsi3_12):
-	shll2	r0
-LOCAL(ashlsi3_10):
-	shll2	r0
-LOCAL(ashlsi3_8):
-	rts
-	shll8	r0
-
-LOCAL(ashlsi3_15):
-	shll2	r0
-LOCAL(ashlsi3_13):
-	shll2	r0
-LOCAL(ashlsi3_11):
-	shll2	r0
-LOCAL(ashlsi3_9):
-	shll8	r0
-	rts
-	shll	r0
-
-LOCAL(ashlsi3_22):
-	shll2	r0
-LOCAL(ashlsi3_20):
-	shll2	r0
-LOCAL(ashlsi3_18):
-	shll2	r0
-LOCAL(ashlsi3_16):
-	rts
-	shll16	r0
-
-LOCAL(ashlsi3_23):
-	shll2	r0
-LOCAL(ashlsi3_21):
-	shll2	r0
-LOCAL(ashlsi3_19):
-	shll2	r0
-LOCAL(ashlsi3_17):
-	shll16	r0
-	rts
-	shll	r0
-
-LOCAL(ashlsi3_30):
-	shll2	r0
-LOCAL(ashlsi3_28):
-	shll2	r0
-LOCAL(ashlsi3_26):
-	shll2	r0
-LOCAL(ashlsi3_24):
-	shll16	r0
-	rts
-	shll8	r0
-
-LOCAL(ashlsi3_31):
-	shll2	r0
-LOCAL(ashlsi3_29):
-	shll2	r0
-LOCAL(ashlsi3_27):
-	shll2	r0
-LOCAL(ashlsi3_25):
-	shll16	r0
-	shll8	r0
-	rts
-	shll	r0
-
-LOCAL(ashlsi3_0):
-	rts
-	nop
-
-	ENDFUNC(GLOBAL(ashlsi3))
-#endif
-
-#ifdef L_lshiftrt
-
-!
-! GLOBAL(lshrsi3)
-!
-! Entry:
-!
-! r4: Value to shift
-! r5: Shifts
-!
-! Exit:
-!
-! r0: Result
-!
-! Destroys:
-!
-! (none)
-!
-	.global	GLOBAL(lshrsi3)
-	HIDDEN_FUNC(GLOBAL(lshrsi3))
-	.align	2
-GLOBAL(lshrsi3):
-	mov	#31,r0
-	and	r0,r5
-	mova	LOCAL(lshrsi3_table),r0
-	mov.b	@(r0,r5),r5
-#ifdef __sh1__
-	add	r5,r0
-	jmp	@r0
-#else
-	braf	r5
-#endif
-	mov	r4,r0
-
-	.align	2
-LOCAL(lshrsi3_table):
-	.byte		LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
-
-LOCAL(lshrsi3_6):
-	shlr2	r0
-LOCAL(lshrsi3_4):
-	shlr2	r0
-LOCAL(lshrsi3_2):
-	rts
-	shlr2	r0
-
-LOCAL(lshrsi3_7):
-	shlr2	r0
-LOCAL(lshrsi3_5):
-	shlr2	r0
-LOCAL(lshrsi3_3):
-	shlr2	r0
-LOCAL(lshrsi3_1):
-	rts
-	shlr	r0
-
-LOCAL(lshrsi3_14):
-	shlr2	r0
-LOCAL(lshrsi3_12):
-	shlr2	r0
-LOCAL(lshrsi3_10):
-	shlr2	r0
-LOCAL(lshrsi3_8):
-	rts
-	shlr8	r0
-
-LOCAL(lshrsi3_15):
-	shlr2	r0
-LOCAL(lshrsi3_13):
-	shlr2	r0
-LOCAL(lshrsi3_11):
-	shlr2	r0
-LOCAL(lshrsi3_9):
-	shlr8	r0
-	rts
-	shlr	r0
-
-LOCAL(lshrsi3_22):
-	shlr2	r0
-LOCAL(lshrsi3_20):
-	shlr2	r0
-LOCAL(lshrsi3_18):
-	shlr2	r0
-LOCAL(lshrsi3_16):
-	rts
-	shlr16	r0
-
-LOCAL(lshrsi3_23):
-	shlr2	r0
-LOCAL(lshrsi3_21):
-	shlr2	r0
-LOCAL(lshrsi3_19):
-	shlr2	r0
-LOCAL(lshrsi3_17):
-	shlr16	r0
-	rts
-	shlr	r0
-
-LOCAL(lshrsi3_30):
-	shlr2	r0
-LOCAL(lshrsi3_28):
-	shlr2	r0
-LOCAL(lshrsi3_26):
-	shlr2	r0
-LOCAL(lshrsi3_24):
-	shlr16	r0
-	rts
-	shlr8	r0
-
-LOCAL(lshrsi3_31):
-	shlr2	r0
-LOCAL(lshrsi3_29):
-	shlr2	r0
-LOCAL(lshrsi3_27):
-	shlr2	r0
-LOCAL(lshrsi3_25):
-	shlr16	r0
-	shlr8	r0
-	rts
-	shlr	r0
-
-LOCAL(lshrsi3_0):
-	rts
-	nop
-
-	ENDFUNC(GLOBAL(lshrsi3))
-#endif
-
-#ifdef L_movmem
-	.text
-	.balign	4
-	.global	GLOBAL(movmem)
-	HIDDEN_FUNC(GLOBAL(movmem))
-	HIDDEN_ALIAS(movstr,movmem)
-	/* This would be a lot simpler if r6 contained the byte count
-	   minus 64, and we wouldn't be called here for a byte count of 64.  */
-GLOBAL(movmem):
-	sts.l	pr,@-r15
-	shll2	r6
-	bsr	GLOBAL(movmemSI52+2)
-	mov.l	@(48,r5),r0
-	.balign	4
-LOCAL(movmem_loop): /* Reached with rts */
-	mov.l	@(60,r5),r0
-	add	#-64,r6
-	mov.l	r0,@(60,r4)
-	tst	r6,r6
-	mov.l	@(56,r5),r0
-	bt	LOCAL(movmem_done)
-	mov.l	r0,@(56,r4)
-	cmp/pl	r6
-	mov.l	@(52,r5),r0
-	add	#64,r5
-	mov.l	r0,@(52,r4)
-	add	#64,r4
-	bt	GLOBAL(movmemSI52)
-! done all the large groups, do the remainder
-! jump to movmem+
-	mova	GLOBAL(movmemSI4)+4,r0
-	add	r6,r0
-	jmp	@r0
-LOCAL(movmem_done): ! share slot insn, works out aligned.
-	lds.l	@r15+,pr
-	mov.l	r0,@(56,r4)
-	mov.l	@(52,r5),r0
-	rts
-	mov.l	r0,@(52,r4)
-	.balign	4
-! ??? We need aliases movstr* for movmem* for the older libraries.  These
-! aliases will be removed at the some point in the future.
-	.global	GLOBAL(movmemSI64)
-	HIDDEN_FUNC(GLOBAL(movmemSI64))
-	HIDDEN_ALIAS(movstrSI64,movmemSI64)
-GLOBAL(movmemSI64):
-	mov.l	@(60,r5),r0
-	mov.l	r0,@(60,r4)
-	.global	GLOBAL(movmemSI60)
-	HIDDEN_FUNC(GLOBAL(movmemSI60))
-	HIDDEN_ALIAS(movstrSI60,movmemSI60)
-GLOBAL(movmemSI60):
-	mov.l	@(56,r5),r0
-	mov.l	r0,@(56,r4)
-	.global	GLOBAL(movmemSI56)
-	HIDDEN_FUNC(GLOBAL(movmemSI56))
-	HIDDEN_ALIAS(movstrSI56,movmemSI56)
-GLOBAL(movmemSI56):
-	mov.l	@(52,r5),r0
-	mov.l	r0,@(52,r4)
-	.global	GLOBAL(movmemSI52)
-	HIDDEN_FUNC(GLOBAL(movmemSI52))
-	HIDDEN_ALIAS(movstrSI52,movmemSI52)
-GLOBAL(movmemSI52):
-	mov.l	@(48,r5),r0
-	mov.l	r0,@(48,r4)
-	.global	GLOBAL(movmemSI48)
-	HIDDEN_FUNC(GLOBAL(movmemSI48))
-	HIDDEN_ALIAS(movstrSI48,movmemSI48)
-GLOBAL(movmemSI48):
-	mov.l	@(44,r5),r0
-	mov.l	r0,@(44,r4)
-	.global	GLOBAL(movmemSI44)
-	HIDDEN_FUNC(GLOBAL(movmemSI44))
-	HIDDEN_ALIAS(movstrSI44,movmemSI44)
-GLOBAL(movmemSI44):
-	mov.l	@(40,r5),r0
-	mov.l	r0,@(40,r4)
-	.global	GLOBAL(movmemSI40)
-	HIDDEN_FUNC(GLOBAL(movmemSI40))
-	HIDDEN_ALIAS(movstrSI40,movmemSI40)
-GLOBAL(movmemSI40):
-	mov.l	@(36,r5),r0
-	mov.l	r0,@(36,r4)
-	.global	GLOBAL(movmemSI36)
-	HIDDEN_FUNC(GLOBAL(movmemSI36))
-	HIDDEN_ALIAS(movstrSI36,movmemSI36)
-GLOBAL(movmemSI36):
-	mov.l	@(32,r5),r0
-	mov.l	r0,@(32,r4)
-	.global	GLOBAL(movmemSI32)
-	HIDDEN_FUNC(GLOBAL(movmemSI32))
-	HIDDEN_ALIAS(movstrSI32,movmemSI32)
-GLOBAL(movmemSI32):
-	mov.l	@(28,r5),r0
-	mov.l	r0,@(28,r4)
-	.global	GLOBAL(movmemSI28)
-	HIDDEN_FUNC(GLOBAL(movmemSI28))
-	HIDDEN_ALIAS(movstrSI28,movmemSI28)
-GLOBAL(movmemSI28):
-	mov.l	@(24,r5),r0
-	mov.l	r0,@(24,r4)
-	.global	GLOBAL(movmemSI24)
-	HIDDEN_FUNC(GLOBAL(movmemSI24))
-	HIDDEN_ALIAS(movstrSI24,movmemSI24)
-GLOBAL(movmemSI24):
-	mov.l	@(20,r5),r0
-	mov.l	r0,@(20,r4)
-	.global	GLOBAL(movmemSI20)
-	HIDDEN_FUNC(GLOBAL(movmemSI20))
-	HIDDEN_ALIAS(movstrSI20,movmemSI20)
-GLOBAL(movmemSI20):
-	mov.l	@(16,r5),r0
-	mov.l	r0,@(16,r4)
-	.global	GLOBAL(movmemSI16)
-	HIDDEN_FUNC(GLOBAL(movmemSI16))
-	HIDDEN_ALIAS(movstrSI16,movmemSI16)
-GLOBAL(movmemSI16):
-	mov.l	@(12,r5),r0
-	mov.l	r0,@(12,r4)
-	.global	GLOBAL(movmemSI12)
-	HIDDEN_FUNC(GLOBAL(movmemSI12))
-	HIDDEN_ALIAS(movstrSI12,movmemSI12)
-GLOBAL(movmemSI12):
-	mov.l	@(8,r5),r0
-	mov.l	r0,@(8,r4)
-	.global	GLOBAL(movmemSI8)
-	HIDDEN_FUNC(GLOBAL(movmemSI8))
-	HIDDEN_ALIAS(movstrSI8,movmemSI8)
-GLOBAL(movmemSI8):
-	mov.l	@(4,r5),r0
-	mov.l	r0,@(4,r4)
-	.global	GLOBAL(movmemSI4)
-	HIDDEN_FUNC(GLOBAL(movmemSI4))
-	HIDDEN_ALIAS(movstrSI4,movmemSI4)
-GLOBAL(movmemSI4):
-	mov.l	@(0,r5),r0
-	rts
-	mov.l	r0,@(0,r4)
-
-	ENDFUNC(GLOBAL(movmemSI64))
-	ENDFUNC(GLOBAL(movmemSI60))
-	ENDFUNC(GLOBAL(movmemSI56))
-	ENDFUNC(GLOBAL(movmemSI52))
-	ENDFUNC(GLOBAL(movmemSI48))
-	ENDFUNC(GLOBAL(movmemSI44))
-	ENDFUNC(GLOBAL(movmemSI40))
-	ENDFUNC(GLOBAL(movmemSI36))
-	ENDFUNC(GLOBAL(movmemSI32))
-	ENDFUNC(GLOBAL(movmemSI28))
-	ENDFUNC(GLOBAL(movmemSI24))
-	ENDFUNC(GLOBAL(movmemSI20))
-	ENDFUNC(GLOBAL(movmemSI16))
-	ENDFUNC(GLOBAL(movmemSI12))
-	ENDFUNC(GLOBAL(movmemSI8))
-	ENDFUNC(GLOBAL(movmemSI4))
-	ENDFUNC(GLOBAL(movmem))
-#endif
-
-#ifdef L_movmem_i4
-	.text
-	.global	GLOBAL(movmem_i4_even)
-	.global	GLOBAL(movmem_i4_odd)
-	.global	GLOBAL(movmemSI12_i4)
-
-	HIDDEN_FUNC(GLOBAL(movmem_i4_even))
-	HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
-	HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
-
-	HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
-	HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
-	HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
-
-	.p2align	5
-L_movmem_2mod4_end:
-	mov.l	r0,@(16,r4)
-	rts
-	mov.l	r1,@(20,r4)
-
-	.p2align	2
-
-GLOBAL(movmem_i4_even):
-	mov.l	@r5+,r0
-	bra	L_movmem_start_even
-	mov.l	@r5+,r1
-
-GLOBAL(movmem_i4_odd):
-	mov.l	@r5+,r1
-	add	#-4,r4
-	mov.l	@r5+,r2
-	mov.l	@r5+,r3
-	mov.l	r1,@(4,r4)
-	mov.l	r2,@(8,r4)
-
-L_movmem_loop:
-	mov.l	r3,@(12,r4)
-	dt	r6
-	mov.l	@r5+,r0
-	bt/s	L_movmem_2mod4_end
-	mov.l	@r5+,r1
-	add	#16,r4
-L_movmem_start_even:
-	mov.l	@r5+,r2
-	mov.l	@r5+,r3
-	mov.l	r0,@r4
-	dt	r6
-	mov.l	r1,@(4,r4)
-	bf/s	L_movmem_loop
-	mov.l	r2,@(8,r4)
-	rts
-	mov.l	r3,@(12,r4)
-
-	ENDFUNC(GLOBAL(movmem_i4_even))
-	ENDFUNC(GLOBAL(movmem_i4_odd))
-
-	.p2align	4
-GLOBAL(movmemSI12_i4):
-	mov.l	@r5,r0
-	mov.l	@(4,r5),r1
-	mov.l	@(8,r5),r2
-	mov.l	r0,@r4
-	mov.l	r1,@(4,r4)
-	rts
-	mov.l	r2,@(8,r4)
-
-	ENDFUNC(GLOBAL(movmemSI12_i4))
-#endif
-
-#ifdef L_mulsi3
-
-
-	.global	GLOBAL(mulsi3)
-	HIDDEN_FUNC(GLOBAL(mulsi3))
-
-! r4 =       aabb
-! r5 =       ccdd
-! r0 = aabb*ccdd  via partial products
-!
-! if aa == 0 and cc = 0
-! r0 = bb*dd
-!
-! else
-! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
-!
-
-GLOBAL(mulsi3):
-	mulu.w  r4,r5		! multiply the lsws  macl=bb*dd
-	mov     r5,r3		! r3 = ccdd
-	swap.w  r4,r2		! r2 = bbaa
-	xtrct   r2,r3		! r3 = aacc
-	tst  	r3,r3		! msws zero ?
-	bf      hiset
-	rts			! yes - then we have the answer
-	sts     macl,r0
-
-hiset:	sts	macl,r0		! r0 = bb*dd
-	mulu.w	r2,r5		! brewing macl = aa*dd
-	sts	macl,r1
-	mulu.w	r3,r4		! brewing macl = cc*bb
-	sts	macl,r2
-	add	r1,r2
-	shll16	r2
-	rts
-	add	r2,r0
-
-	ENDFUNC(GLOBAL(mulsi3))
-#endif
-#endif /* ! __SH5__ */
-#ifdef L_sdivsi3_i4
-	.title "SH DIVIDE"
-!! 4 byte integer Divide code for the Renesas SH
-#ifdef __SH4__
-!! args in r4 and r5, result in fpul, clobber dr0, dr2
-
-	.global	GLOBAL(sdivsi3_i4)
-	HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
-GLOBAL(sdivsi3_i4):
-	lds r4,fpul
-	float fpul,dr0
-	lds r5,fpul
-	float fpul,dr2
-	fdiv dr2,dr0
-	rts
-	ftrc dr0,fpul
-
-	ENDFUNC(GLOBAL(sdivsi3_i4))
-#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
-!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
-
-#if ! __SH5__ || __SH5__ == 32
-#if __SH5__
-	.mode	SHcompact
-#endif
-	.global	GLOBAL(sdivsi3_i4)
-	HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
-GLOBAL(sdivsi3_i4):
-	sts.l fpscr,@-r15
-	mov #8,r2
-	swap.w r2,r2
-	lds r2,fpscr
-	lds r4,fpul
-	float fpul,dr0
-	lds r5,fpul
-	float fpul,dr2
-	fdiv dr2,dr0
-	ftrc dr0,fpul
-	rts
-	lds.l @r15+,fpscr
-
-	ENDFUNC(GLOBAL(sdivsi3_i4))
-#endif /* ! __SH5__ || __SH5__ == 32 */
-#endif /* ! __SH4__ */
-#endif
-
-#ifdef L_sdivsi3
-/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
-   sh2e/sh3e code.  */
-#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
-!!
-!! Steve Chamberlain
-!! sac@cygnus.com
-!!
-!!
-
-!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
-
-	.global	GLOBAL(sdivsi3)
-#if __SHMEDIA__
-#if __SH5__ == 32
-	.section	.text..SHmedia32,"ax"
-#else
-	.text
-#endif
-	.align	2
-#if 0
-/* The assembly code that follows is a hand-optimized version of the C
-   code that follows.  Note that the registers that are modified are
-   exactly those listed as clobbered in the patterns divsi3_i1 and
-   divsi3_i1_media.
-	
-int __sdivsi3 (i, j)
-     int i, j;
-{
-  register unsigned long long r18 asm ("r18");
-  register unsigned long long r19 asm ("r19");
-  register unsigned long long r0 asm ("r0") = 0;
-  register unsigned long long r1 asm ("r1") = 1;
-  register int r2 asm ("r2") = i >> 31;
-  register int r3 asm ("r3") = j >> 31;
-
-  r2 = r2 ? r2 : r1;
-  r3 = r3 ? r3 : r1;
-  r18 = i * r2;
-  r19 = j * r3;
-  r2 *= r3;
-  
-  r19 <<= 31;
-  r1 <<= 31;
-  do
-    if (r18 >= r19)
-      r0 |= r1, r18 -= r19;
-  while (r19 >>= 1, r1 >>= 1);
-
-  return r2 * (int)r0;
-}
-*/
-GLOBAL(sdivsi3):
-	pt/l	LOCAL(sdivsi3_dontadd), tr2
-	pt/l	LOCAL(sdivsi3_loop), tr1
-	ptabs/l	r18, tr0
-	movi	0, r0
-	movi	1, r1
-	shari.l	r4, 31, r2
-	shari.l	r5, 31, r3
-	cmveq	r2, r1, r2
-	cmveq	r3, r1, r3
-	muls.l	r4, r2, r18
-	muls.l	r5, r3, r19
-	muls.l	r2, r3, r2
-	shlli	r19, 31, r19
-	shlli	r1, 31, r1
-LOCAL(sdivsi3_loop):
-	bgtu	r19, r18, tr2
-	or	r0, r1, r0
-	sub	r18, r19, r18
-LOCAL(sdivsi3_dontadd):
-	shlri	r1, 1, r1
-	shlri	r19, 1, r19
-	bnei	r1, 0, tr1
-	muls.l	r0, r2, r0
-	add.l	r0, r63, r0
-	blink	tr0, r63
-#elif 0 /* ! 0 */
- // inputs: r4,r5
- // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
- // result in r0
-GLOBAL(sdivsi3):
- // can create absolute value without extra latency,
- // but dependent on proper sign extension of inputs:
- // shari.l r5,31,r2
- // xor r5,r2,r20
- // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
- shari.l r5,31,r2
- ori r2,1,r2
- muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
- movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
- shari.l r4,31,r3
- nsb r20,r0
- shlld r20,r0,r25
- shlri r25,48,r25
- sub r19,r25,r1
- mmulfx.w r1,r1,r2
- mshflo.w r1,r63,r1
- // If r4 was to be used in-place instead of r21, could use this sequence
- // to compute absolute:
- // sub r63,r4,r19 // compute absolute value of r4
- // shlri r4,32,r3 // into lower 32 bit of r4, keeping
- // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
- ori r3,1,r3
- mmulfx.w r25,r2,r2
- sub r19,r0,r0
- muls.l r4,r3,r21
- msub.w r1,r2,r2
- addi r2,-2,r1
- mulu.l r21,r1,r19
- mmulfx.w r2,r2,r2
- shlli r1,15,r1
- shlrd r19,r0,r19
- mulu.l r19,r20,r3
- mmacnfx.wl r25,r2,r1
- ptabs r18,tr0
- sub r21,r3,r25
-
- mulu.l r25,r1,r2
- addi r0,14,r0
- xor r4,r5,r18
- shlrd r2,r0,r2
- mulu.l r2,r20,r3
- add r19,r2,r19
- shari.l r18,31,r18
- sub r25,r3,r25
-
- mulu.l r25,r1,r2
- sub r25,r20,r25
- add r19,r18,r19
- shlrd r2,r0,r2
- mulu.l r2,r20,r3
- addi r25,1,r25
- add r19,r2,r19
-
- cmpgt r25,r3,r25
- add.l r19,r25,r0
- xor r0,r18,r0
- blink tr0,r63
-#else /* ! 0 && ! 0 */
-
- // inputs: r4,r5
- // clobbered: r1,r18,r19,r20,r21,r25,tr0
- // result in r0
-	HIDDEN_FUNC(GLOBAL(sdivsi3_2))
-#ifndef __pic__
-	FUNC(GLOBAL(sdivsi3))
-GLOBAL(sdivsi3): /* this is the shcompact entry point */
- // The special SHmedia entry point sdivsi3_1 prevents accidental linking
- // with the SHcompact implementation, which clobbers tr1 / tr2.
- .global GLOBAL(sdivsi3_1)
-GLOBAL(sdivsi3_1):
- .global GLOBAL(div_table_internal)
- movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
- shori GLOBAL(div_table_internal) & 65535, r20
-#endif
- .global GLOBAL(sdivsi3_2)
- // div_table in r20
- // clobbered: r1,r18,r19,r21,r25,tr0
-GLOBAL(sdivsi3_2):
- nsb r5, r1
- shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
- shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
- ldx.ub r20, r21, r19 // u0.8
- shari r25, 32, r25   // normalize to s2.30
- shlli r21, 1, r21
- muls.l r25, r19, r19 // s2.38
- ldx.w r20, r21, r21  // s2.14
-  ptabs r18, tr0
- shari r19, 24, r19   // truncate to s2.14
- sub r21, r19, r19    // some 11 bit inverse in s1.14
- muls.l r19, r19, r21 // u0.28
-  sub r63, r1, r1
-  addi r1, 92, r1
- muls.l r25, r21, r18 // s2.58
- shlli r19, 45, r19   // multiply by two and convert to s2.58
-  /* bubble */
- sub r19, r18, r18
- shari r18, 28, r18   // some 22 bit inverse in s1.30
- muls.l r18, r25, r0  // s2.60
-  muls.l r18, r4, r25 // s32.30
-  /* bubble */
- shari r0, 16, r19   // s-16.44
- muls.l r19, r18, r19 // s-16.74
-  shari r25, 63, r0
-  shari r4, 14, r18   // s19.-14
- shari r19, 30, r19   // s-16.44
- muls.l r19, r18, r19 // s15.30
-  xor r21, r0, r21    // You could also use the constant 1 << 27.
-  add r21, r25, r21
- sub r21, r19, r21
- shard r21, r1, r21
- sub r21, r0, r0
- blink tr0, r63
-#ifndef __pic__
-	ENDFUNC(GLOBAL(sdivsi3))
-#endif
-	ENDFUNC(GLOBAL(sdivsi3_2))
-#endif
-#elif defined __SHMEDIA__
-/* m5compact-nofpu */
- // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
-	.mode	SHmedia
-	.section	.text..SHmedia32,"ax"
-	.align	2
-	FUNC(GLOBAL(sdivsi3))
-GLOBAL(sdivsi3):
-	pt/l LOCAL(sdivsi3_dontsub), tr0
-	pt/l LOCAL(sdivsi3_loop), tr1
-	ptabs/l r18,tr2
-	shari.l r4,31,r18
-	shari.l r5,31,r19
-	xor r4,r18,r20
-	xor r5,r19,r21
-	sub.l r20,r18,r20
-	sub.l r21,r19,r21
-	xor r18,r19,r19
-	shlli r21,32,r25
-	addi r25,-1,r21
-	addz.l r20,r63,r20
-LOCAL(sdivsi3_loop):
-	shlli r20,1,r20
-	bgeu/u r21,r20,tr0
-	sub r20,r21,r20
-LOCAL(sdivsi3_dontsub):
-	addi.l r25,-1,r25
-	bnei r25,-32,tr1
-	xor r20,r19,r20
-	sub.l r20,r19,r0
-	blink tr2,r63
-	ENDFUNC(GLOBAL(sdivsi3))
-#else /* ! __SHMEDIA__ */
-	FUNC(GLOBAL(sdivsi3))
-GLOBAL(sdivsi3):
-	mov	r4,r1
-	mov	r5,r0
-
-	tst	r0,r0
-	bt	div0
-	mov	#0,r2
-	div0s	r2,r1
-	subc	r3,r3
-	subc	r2,r1
-	div0s	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	addc	r2,r1
-	rts
-	mov	r1,r0
-
-
-div0:	rts
-	mov	#0,r0
-
-	ENDFUNC(GLOBAL(sdivsi3))
-#endif /* ! __SHMEDIA__ */
-#endif /* ! __SH4__ */
-#endif
-#ifdef L_udivsi3_i4
-
-	.title "SH DIVIDE"
-!! 4 byte integer Divide code for the Renesas SH
-#ifdef __SH4__
-!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
-!! and t bit
-
-	.global	GLOBAL(udivsi3_i4)
-	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
-GLOBAL(udivsi3_i4):
-	mov #1,r1
-	cmp/hi r1,r5
-	bf trivial
-	rotr r1
-	xor r1,r4
-	lds r4,fpul
-	mova L1,r0
-#ifdef FMOVD_WORKS
-	fmov.d @r0+,dr4
-#else
-	fmov.s @r0+,DR40
-	fmov.s @r0,DR41
-#endif
-	float fpul,dr0
-	xor r1,r5
-	lds r5,fpul
-	float fpul,dr2
-	fadd dr4,dr0
-	fadd dr4,dr2
-	fdiv dr2,dr0
-	rts
-	ftrc dr0,fpul
-
-trivial:
-	rts
-	lds r4,fpul
-
-	.align 2
-#ifdef FMOVD_WORKS
-	.align 3	! make double below 8 byte aligned.
-#endif
-L1:
-	.double 2147483648
-
-	ENDFUNC(GLOBAL(udivsi3_i4))
-#elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
-#if ! __SH5__ || __SH5__ == 32
-!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
-	.mode	SHmedia
-	.global	GLOBAL(udivsi3_i4)
-	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
-GLOBAL(udivsi3_i4):
-	addz.l	r4,r63,r20
-	addz.l	r5,r63,r21
-	fmov.qd	r20,dr0
-	fmov.qd	r21,dr32
-	ptabs	r18,tr0
-	float.qd dr0,dr0
-	float.qd dr32,dr32
-	fdiv.d	dr0,dr32,dr0
-	ftrc.dq dr0,dr32
-	fmov.s fr33,fr32
-	blink tr0,r63
-
-	ENDFUNC(GLOBAL(udivsi3_i4))
-#endif /* ! __SH5__ || __SH5__ == 32 */
-#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
-!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
-
-	.global	GLOBAL(udivsi3_i4)
-	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
-GLOBAL(udivsi3_i4):
-	mov #1,r1
-	cmp/hi r1,r5
-	bf trivial
-	sts.l fpscr,@-r15
-	mova L1,r0
-	lds.l @r0+,fpscr
-	rotr r1
-	xor r1,r4
-	lds r4,fpul
-#ifdef FMOVD_WORKS
-	fmov.d @r0+,dr4
-#else
-	fmov.s @r0+,DR40
-	fmov.s @r0,DR41
-#endif
-	float fpul,dr0
-	xor r1,r5
-	lds r5,fpul
-	float fpul,dr2
-	fadd dr4,dr0
-	fadd dr4,dr2
-	fdiv dr2,dr0
-	ftrc dr0,fpul
-	rts
-	lds.l @r15+,fpscr
-
-#ifdef FMOVD_WORKS
-	.align 3	! make double below 8 byte aligned.
-#endif
-trivial:
-	rts
-	lds r4,fpul
-
-	.align 2
-L1:
-#ifndef FMOVD_WORKS
-	.long 0x80000
-#else
-	.long 0x180000
-#endif
-	.double 2147483648
-
-	ENDFUNC(GLOBAL(udivsi3_i4))
-#endif /* ! __SH4__ */
-#endif
-
-#ifdef L_udivsi3
-/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
-   sh2e/sh3e code.  */
-#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
-
-!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
-	.global	GLOBAL(udivsi3)
-	HIDDEN_FUNC(GLOBAL(udivsi3))
-
-#if __SHMEDIA__
-#if __SH5__ == 32
-	.section	.text..SHmedia32,"ax"
-#else
-	.text
-#endif
-	.align	2
-#if 0
-/* The assembly code that follows is a hand-optimized version of the C
-   code that follows.  Note that the registers that are modified are
-   exactly those listed as clobbered in the patterns udivsi3_i1 and
-   udivsi3_i1_media.
-	
-unsigned 
-__udivsi3 (i, j)
-    unsigned i, j; 
-{
-  register unsigned long long r0 asm ("r0") = 0;
-  register unsigned long long r18 asm ("r18") = 1;
-  register unsigned long long r4 asm ("r4") = i;
-  register unsigned long long r19 asm ("r19") = j;
-
-  r19 <<= 31;
-  r18 <<= 31;
-  do
-    if (r4 >= r19)
-      r0 |= r18, r4 -= r19;
-  while (r19 >>= 1, r18 >>= 1);
-
-  return r0;
-}
-*/
-GLOBAL(udivsi3):
-	pt/l	LOCAL(udivsi3_dontadd), tr2
-	pt/l	LOCAL(udivsi3_loop), tr1
-	ptabs/l	r18, tr0
-	movi	0, r0
-	movi	1, r18
-	addz.l	r5, r63, r19
-	addz.l	r4, r63, r4
-	shlli	r19, 31, r19
-	shlli	r18, 31, r18
-LOCAL(udivsi3_loop):
-	bgtu	r19, r4, tr2
-	or	r0, r18, r0
-	sub	r4, r19, r4
-LOCAL(udivsi3_dontadd):
-	shlri	r18, 1, r18
-	shlri	r19, 1, r19
-	bnei	r18, 0, tr1
-	blink	tr0, r63
-#else
-GLOBAL(udivsi3):
- // inputs: r4,r5
- // clobbered: r18,r19,r20,r21,r22,r25,tr0
- // result in r0.
- addz.l r5,r63,r22
- nsb r22,r0
- shlld r22,r0,r25
- shlri r25,48,r25
- movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
- sub r20,r25,r21
- mmulfx.w r21,r21,r19
- mshflo.w r21,r63,r21
- ptabs r18,tr0
- mmulfx.w r25,r19,r19
- sub r20,r0,r0
- /* bubble */
- msub.w r21,r19,r19
- addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
-		    before the msub.w, but we need a different value for
-		    r19 to keep errors under control.  */
- mulu.l r4,r21,r18
- mmulfx.w r19,r19,r19
- shlli r21,15,r21
- shlrd r18,r0,r18
- mulu.l r18,r22,r20
- mmacnfx.wl r25,r19,r21
- /* bubble */
- sub r4,r20,r25
-
- mulu.l r25,r21,r19
- addi r0,14,r0
- /* bubble */
- shlrd r19,r0,r19
- mulu.l r19,r22,r20
- add r18,r19,r18
- /* bubble */
- sub.l r25,r20,r25
-
- mulu.l r25,r21,r19
- addz.l r25,r63,r25
- sub r25,r22,r25
- shlrd r19,r0,r19
- mulu.l r19,r22,r20
- addi r25,1,r25
- add r18,r19,r18
-
- cmpgt r25,r20,r25
- add.l r18,r25,r0
- blink tr0,r63
-#endif
-#elif defined (__SHMEDIA__)
-/* m5compact-nofpu - more emphasis on code size than on speed, but don't
-   ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
-   So use a short shmedia loop.  */
- // clobbered: r20,r21,r25,tr0,tr1,tr2
-	.mode	SHmedia
-	.section	.text..SHmedia32,"ax"
-	.align	2
-GLOBAL(udivsi3):
- pt/l LOCAL(udivsi3_dontsub), tr0
- pt/l LOCAL(udivsi3_loop), tr1
- ptabs/l r18,tr2
- shlli r5,32,r25
- addi r25,-1,r21
- addz.l r4,r63,r20
-LOCAL(udivsi3_loop):
- shlli r20,1,r20
- bgeu/u r21,r20,tr0
- sub r20,r21,r20
-LOCAL(udivsi3_dontsub):
- addi.l r25,-1,r25
- bnei r25,-32,tr1
- add.l r20,r63,r0
- blink tr2,r63
-#else /* ! defined (__SHMEDIA__) */
-LOCAL(div8):
- div1 r5,r4
-LOCAL(div7):
- div1 r5,r4; div1 r5,r4; div1 r5,r4
- div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
-
-LOCAL(divx4):
- div1 r5,r4; rotcl r0
- div1 r5,r4; rotcl r0
- div1 r5,r4; rotcl r0
- rts; div1 r5,r4
-
-GLOBAL(udivsi3):
- sts.l pr,@-r15
- extu.w r5,r0
- cmp/eq r5,r0
-#ifdef __sh1__
- bf LOCAL(large_divisor)
-#else
- bf/s LOCAL(large_divisor)
-#endif
- div0u
- swap.w r4,r0
- shlr16 r4
- bsr LOCAL(div8)
- shll16 r5
- bsr LOCAL(div7)
- div1 r5,r4
- xtrct r4,r0
- xtrct r0,r4
- bsr LOCAL(div8)
- swap.w r4,r4
- bsr LOCAL(div7)
- div1 r5,r4
- lds.l @r15+,pr
- xtrct r4,r0
- swap.w r0,r0
- rotcl r0
- rts
- shlr16 r5
-
-LOCAL(large_divisor):
-#ifdef __sh1__
- div0u
-#endif
- mov #0,r0
- xtrct r4,r0
- xtrct r0,r4
- bsr LOCAL(divx4)
- rotcl r0
- bsr LOCAL(divx4)
- rotcl r0
- bsr LOCAL(divx4)
- rotcl r0
- bsr LOCAL(divx4)
- rotcl r0
- lds.l @r15+,pr
- rts
- rotcl r0
-
-	ENDFUNC(GLOBAL(udivsi3))
-#endif /* ! __SHMEDIA__ */
-#endif /* __SH4__ */
-#endif /* L_udivsi3 */
-
-#ifdef L_udivdi3
-#ifdef __SHMEDIA__
-	.mode	SHmedia
-	.section	.text..SHmedia32,"ax"
-	.align	2
-	.global	GLOBAL(udivdi3)
-	FUNC(GLOBAL(udivdi3))
-GLOBAL(udivdi3):
-	HIDDEN_ALIAS(udivdi3_internal,udivdi3)
-	shlri r3,1,r4
-	nsb r4,r22
-	shlld r3,r22,r6
-	shlri r6,49,r5
-	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
-	sub r21,r5,r1
-	mmulfx.w r1,r1,r4
-	mshflo.w r1,r63,r1
-	sub r63,r22,r20 // r63 == 64 % 64
-	mmulfx.w r5,r4,r4
-	pta LOCAL(large_divisor),tr0
-	addi r20,32,r9
-	msub.w r1,r4,r1
-	madd.w r1,r1,r1
-	mmulfx.w r1,r1,r4
-	shlri r6,32,r7
-	bgt/u r9,r63,tr0 // large_divisor
-	mmulfx.w r5,r4,r4
-	shlri r2,32+14,r19
-	addi r22,-31,r0
-	msub.w r1,r4,r1
-
-	mulu.l r1,r7,r4
-	addi r1,-3,r5
-	mulu.l r5,r19,r5
-	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
-	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
-	                 the case may be, %0000000000000000 000.11111111111, still */
-	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
-	mulu.l r5,r3,r8
-	mshalds.l r1,r21,r1
-	shari r4,26,r4
-	shlld r8,r0,r8
-	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
-	sub r2,r8,r2
-	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
-
-	shlri r2,22,r21
-	mulu.l r21,r1,r21
-	shlld r5,r0,r8
-	addi r20,30-22,r0
-	shlrd r21,r0,r21
-	mulu.l r21,r3,r5
-	add r8,r21,r8
-	mcmpgt.l r21,r63,r21 // See Note 1
-	addi r20,30,r0
-	mshfhi.l r63,r21,r21
-	sub r2,r5,r2
-	andc r2,r21,r2
-
-	/* small divisor: need a third divide step */
-	mulu.l r2,r1,r7
-	ptabs r18,tr0
-	addi r2,1,r2
-	shlrd r7,r0,r7
-	mulu.l r7,r3,r5
-	add r8,r7,r8
-	sub r2,r3,r2
-	cmpgt r2,r5,r5
-	add r8,r5,r2
-	/* could test r3 here to check for divide by zero.  */
-	blink tr0,r63
-
-LOCAL(large_divisor):
-	mmulfx.w r5,r4,r4
-	shlrd r2,r9,r25
-	shlri r25,32,r8
-	msub.w r1,r4,r1
-
-	mulu.l r1,r7,r4
-	addi r1,-3,r5
-	mulu.l r5,r8,r5
-	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
-	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
-	                 the case may be, %0000000000000000 000.11111111111, still */
-	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
-	shlri r5,14-1,r8
-	mulu.l r8,r7,r5
-	mshalds.l r1,r21,r1
-	shari r4,26,r4
-	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
-	sub r25,r5,r25
-	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
-
-	shlri r25,22,r21
-	mulu.l r21,r1,r21
-	pta LOCAL(no_lo_adj),tr0
-	addi r22,32,r0
-	shlri r21,40,r21
-	mulu.l r21,r7,r5
-	add r8,r21,r8
-	shlld r2,r0,r2
-	sub r25,r5,r25
-	bgtu/u r7,r25,tr0 // no_lo_adj
-	addi r8,1,r8
-	sub r25,r7,r25
-LOCAL(no_lo_adj):
-	mextr4 r2,r25,r2
-
-	/* large_divisor: only needs a few adjustments.  */
-	mulu.l r8,r6,r5
-	ptabs r18,tr0
-	/* bubble */
-	cmpgtu r5,r2,r5
-	sub r8,r5,r2
-	blink tr0,r63
-	ENDFUNC(GLOBAL(udivdi3))
-/* Note 1: To shift the result of the second divide stage so that the result
-   always fits into 32 bits, yet we still reduce the rest sufficiently
-   would require a lot of instructions to do the shifts just right.  Using
-   the full 64 bit shift result to multiply with the divisor would require
-   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
-   Fortunately, if the upper 32 bits of the shift result are nonzero, we
-   know that the rest after taking this partial result into account will
-   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
-   upper 32 bits of the partial result are nonzero.  */
-#endif /* __SHMEDIA__ */
-#endif /* L_udivdi3 */
-
-#ifdef L_divdi3
-#ifdef __SHMEDIA__
-	.mode	SHmedia
-	.section	.text..SHmedia32,"ax"
-	.align	2
-	.global	GLOBAL(divdi3)
-	FUNC(GLOBAL(divdi3))
-GLOBAL(divdi3):
-	pta GLOBAL(udivdi3_internal),tr0
-	shari r2,63,r22
-	shari r3,63,r23
-	xor r2,r22,r2
-	xor r3,r23,r3
-	sub r2,r22,r2
-	sub r3,r23,r3
-	beq/u r22,r23,tr0
-	ptabs r18,tr1
-	blink tr0,r18
-	sub r63,r2,r2
-	blink tr1,r63
-	ENDFUNC(GLOBAL(divdi3))
-#endif /* __SHMEDIA__ */
-#endif /* L_divdi3 */
-
-#ifdef L_umoddi3
-#ifdef __SHMEDIA__
-	.mode	SHmedia
-	.section	.text..SHmedia32,"ax"
-	.align	2
-	.global	GLOBAL(umoddi3)
-	FUNC(GLOBAL(umoddi3))
-GLOBAL(umoddi3):
-	HIDDEN_ALIAS(umoddi3_internal,umoddi3)
-	shlri r3,1,r4
-	nsb r4,r22
-	shlld r3,r22,r6
-	shlri r6,49,r5
-	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
-	sub r21,r5,r1
-	mmulfx.w r1,r1,r4
-	mshflo.w r1,r63,r1
-	sub r63,r22,r20 // r63 == 64 % 64
-	mmulfx.w r5,r4,r4
-	pta LOCAL(large_divisor),tr0
-	addi r20,32,r9
-	msub.w r1,r4,r1
-	madd.w r1,r1,r1
-	mmulfx.w r1,r1,r4
-	shlri r6,32,r7
-	bgt/u r9,r63,tr0 // large_divisor
-	mmulfx.w r5,r4,r4
-	shlri r2,32+14,r19
-	addi r22,-31,r0
-	msub.w r1,r4,r1
-
-	mulu.l r1,r7,r4
-	addi r1,-3,r5
-	mulu.l r5,r19,r5
-	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
-	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
-	                 the case may be, %0000000000000000 000.11111111111, still */
-	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
-	mulu.l r5,r3,r5
-	mshalds.l r1,r21,r1
-	shari r4,26,r4
-	shlld r5,r0,r5
-	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
-	sub r2,r5,r2
-	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
-
-	shlri r2,22,r21
-	mulu.l r21,r1,r21
-	addi r20,30-22,r0
-	/* bubble */ /* could test r3 here to check for divide by zero.  */
-	shlrd r21,r0,r21
-	mulu.l r21,r3,r5
-	mcmpgt.l r21,r63,r21 // See Note 1
-	addi r20,30,r0
-	mshfhi.l r63,r21,r21
-	sub r2,r5,r2
-	andc r2,r21,r2
-
-	/* small divisor: need a third divide step */
-	mulu.l r2,r1,r7
-	ptabs r18,tr0
-	sub r2,r3,r8 /* re-use r8 here for rest - r3 */
-	shlrd r7,r0,r7
-	mulu.l r7,r3,r5
-	/* bubble */
-	addi r8,1,r7
-	cmpgt r7,r5,r7
-	cmvne r7,r8,r2
-	sub r2,r5,r2
-	blink tr0,r63
-
-LOCAL(large_divisor):
-	mmulfx.w r5,r4,r4
-	shlrd r2,r9,r25
-	shlri r25,32,r8
-	msub.w r1,r4,r1
-
-	mulu.l r1,r7,r4
-	addi r1,-3,r5
-	mulu.l r5,r8,r5
-	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
-	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
-	                 the case may be, %0000000000000000 000.11111111111, still */
-	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
-	shlri r5,14-1,r8
-	mulu.l r8,r7,r5
-	mshalds.l r1,r21,r1
-	shari r4,26,r4
-	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
-	sub r25,r5,r25
-	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
-
-	shlri r25,22,r21
-	mulu.l r21,r1,r21
-	pta LOCAL(no_lo_adj),tr0
-	addi r22,32,r0
-	shlri r21,40,r21
-	mulu.l r21,r7,r5
-	add r8,r21,r8
-	shlld r2,r0,r2
-	sub r25,r5,r25
-	bgtu/u r7,r25,tr0 // no_lo_adj
-	addi r8,1,r8
-	sub r25,r7,r25
-LOCAL(no_lo_adj):
-	mextr4 r2,r25,r2
-
-	/* large_divisor: only needs a few adjustments.  */
-	mulu.l r8,r6,r5
-	ptabs r18,tr0
-	add r2,r6,r7
-	cmpgtu r5,r2,r8
-	cmvne r8,r7,r2
-	sub r2,r5,r2
-	shlrd r2,r22,r2
-	blink tr0,r63
-	ENDFUNC(GLOBAL(umoddi3))
-/* Note 1: To shift the result of the second divide stage so that the result
-   always fits into 32 bits, yet we still reduce the rest sufficiently
-   would require a lot of instructions to do the shifts just right.  Using
-   the full 64 bit shift result to multiply with the divisor would require
-   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
-   Fortunately, if the upper 32 bits of the shift result are nonzero, we
-   know that the rest after taking this partial result into account will
-   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
-   upper 32 bits of the partial result are nonzero.  */
-#endif /* __SHMEDIA__ */
-#endif /* L_umoddi3 */
-
-#ifdef L_moddi3
-#ifdef __SHMEDIA__
-	.mode	SHmedia
-	.section	.text..SHmedia32,"ax"
-	.align	2
-	.global	GLOBAL(moddi3)
-	FUNC(GLOBAL(moddi3))
-GLOBAL(moddi3):
-	pta GLOBAL(umoddi3_internal),tr0
-	shari r2,63,r22
-	shari r3,63,r23
-	xor r2,r22,r2
-	xor r3,r23,r3
-	sub r2,r22,r2
-	sub r3,r23,r3
-	beq/u r22,r63,tr0
-	ptabs r18,tr1
-	blink tr0,r18
-	sub r63,r2,r2
-	blink tr1,r63
-	ENDFUNC(GLOBAL(moddi3))
-#endif /* __SHMEDIA__ */
-#endif /* L_moddi3 */
-
-#ifdef L_set_fpscr
-#if !defined (__SH2A_NOFPU__)
-#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
-#ifdef __SH5__
-	.mode	SHcompact
-#endif
-	.global GLOBAL(set_fpscr)
-	HIDDEN_FUNC(GLOBAL(set_fpscr))
-GLOBAL(set_fpscr):
-	lds r4,fpscr
-#ifdef __PIC__
-	mov.l	r12,@-r15
-#ifdef __vxworks
-	mov.l	LOCAL(set_fpscr_L0_base),r12
-	mov.l	LOCAL(set_fpscr_L0_index),r0
-	mov.l	@r12,r12
-	mov.l	@(r0,r12),r12
-#else
-	mova	LOCAL(set_fpscr_L0),r0
-	mov.l	LOCAL(set_fpscr_L0),r12
-	add	r0,r12
-#endif
-	mov.l	LOCAL(set_fpscr_L1),r0
-	mov.l	@(r0,r12),r1
-	mov.l	@r15+,r12
-#else
-	mov.l LOCAL(set_fpscr_L1),r1
-#endif
-	swap.w r4,r0
-	or #24,r0
-#ifndef FMOVD_WORKS
-	xor #16,r0
-#endif
-#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
-	swap.w r0,r3
-	mov.l r3,@(4,r1)
-#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
-	swap.w r0,r2
-	mov.l r2,@r1
-#endif
-#ifndef FMOVD_WORKS
-	xor #8,r0
-#else
-	xor #24,r0
-#endif
-#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
-	swap.w r0,r2
-	rts
-	mov.l r2,@r1
-#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
-	swap.w r0,r3
-	rts
-	mov.l r3,@(4,r1)
-#endif
-	.align 2
-#ifdef __PIC__
-#ifdef __vxworks
-LOCAL(set_fpscr_L0_base):
-	.long ___GOTT_BASE__
-LOCAL(set_fpscr_L0_index):
-	.long ___GOTT_INDEX__
-#else
-LOCAL(set_fpscr_L0):
-	.long _GLOBAL_OFFSET_TABLE_
-#endif
-LOCAL(set_fpscr_L1):
-	.long GLOBAL(fpscr_values@GOT)
-#else
-LOCAL(set_fpscr_L1):
-	.long GLOBAL(fpscr_values)
-#endif
-
-	ENDFUNC(GLOBAL(set_fpscr))
-#ifndef NO_FPSCR_VALUES
-#ifdef __ELF__
-        .comm   GLOBAL(fpscr_values),8,4
-#else
-        .comm   GLOBAL(fpscr_values),8
-#endif /* ELF */
-#endif /* NO_FPSCR_VALUES */
-#endif /* SH2E / SH3E / SH4 */
-#endif /* __SH2A_NOFPU__ */
-#endif /* L_set_fpscr */
-#ifdef L_ic_invalidate
-#if __SH5__ == 32
-	.mode	SHmedia
-	.section	.text..SHmedia32,"ax"
-	.align	2
-	.global	GLOBAL(init_trampoline)
-	HIDDEN_FUNC(GLOBAL(init_trampoline))
-GLOBAL(init_trampoline):
-	st.l	r0,8,r2
-#ifdef __LITTLE_ENDIAN__
-	movi	9,r20
-	shori	0x402b,r20
-	shori	0xd101,r20
-	shori	0xd002,r20
-#else
-	movi	0xffffffffffffd002,r20
-	shori	0xd101,r20
-	shori	0x402b,r20
-	shori	9,r20
-#endif
-	st.q	r0,0,r20
-	st.l	r0,12,r3
-	ENDFUNC(GLOBAL(init_trampoline))
-	.global	GLOBAL(ic_invalidate)
-	HIDDEN_FUNC(GLOBAL(ic_invalidate))
-GLOBAL(ic_invalidate):
-	ocbwb	r0,0
-	synco
-	icbi	r0, 0
-	ptabs	r18, tr0
-	synci
-	blink	tr0, r63
-	ENDFUNC(GLOBAL(ic_invalidate))
-#elif defined(__SH4A__)
-	.global GLOBAL(ic_invalidate)
-	HIDDEN_FUNC(GLOBAL(ic_invalidate))
-GLOBAL(ic_invalidate):
-	ocbwb	@r4
-	synco
-	icbi	@r4
-	rts
-	  nop
-	ENDFUNC(GLOBAL(ic_invalidate))
-#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
-	/* For system code, we use ic_invalidate_line_i, but user code
-	   needs a different mechanism.  A kernel call is generally not
-	   available, and it would also be slow.  Different SH4 variants use
-	   different sizes and associativities of the Icache.  We use a small
-	   bit of dispatch code that can be put hidden in every shared object,
-	   which calls the actual processor-specific invalidation code in a
-	   separate module.
-	   Or if you have operating system support, the OS could mmap the
-	   procesor-specific code from a single page, since it is highly
-	   repetitive.  */
-	.global GLOBAL(ic_invalidate)
-	HIDDEN_FUNC(GLOBAL(ic_invalidate))
-GLOBAL(ic_invalidate):
-#ifdef __pic__
-#ifdef __vxworks
-	mov.l	1f,r1
-	mov.l	2f,r0
-	mov.l	@r1,r1
-	mov.l	0f,r2
-	mov.l	@(r0,r1),r0
-#else
-	mov.l	1f,r1
-	mova	1f,r0
-	mov.l	0f,r2
-	add	r1,r0
-#endif
-	mov.l	@(r0,r2),r1
-#else
-	mov.l	0f,r1
-#endif
-	ocbwb	@r4
-	mov.l	@(8,r1),r0
-	sub	r1,r4
-	and	r4,r0
-	add	r1,r0
-	jmp	@r0
-	mov.l	@(4,r1),r0
-	.align	2
-#ifndef __pic__
-0:	.long   GLOBAL(ic_invalidate_array)
-#else /* __pic__ */
-	.global GLOBAL(ic_invalidate_array)
-0:	.long   GLOBAL(ic_invalidate_array)@GOT
-#ifdef __vxworks
-1:	.long	___GOTT_BASE__
-2:	.long	___GOTT_INDEX__
-#else
-1:	.long   _GLOBAL_OFFSET_TABLE_
-#endif
-	ENDFUNC(GLOBAL(ic_invalidate))
-#endif /* __pic__ */
-#endif /* SH4 */
-#endif /* L_ic_invalidate */
-
-#ifdef L_ic_invalidate_array
-#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
-	.global GLOBAL(ic_invalidate_array)
-	/* This is needed when an SH4 dso with trampolines is used on SH4A.  */
-	.global GLOBAL(ic_invalidate_array)
-	FUNC(GLOBAL(ic_invalidate_array))
-GLOBAL(ic_invalidate_array):
-	add	r1,r4
-	synco
-	icbi	@r4
-	rts
-	  nop
-	.align 2
-	.long	0
-	ENDFUNC(GLOBAL(ic_invalidate_array))
-#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
-	.global GLOBAL(ic_invalidate_array)
-	.p2align 5
-	FUNC(GLOBAL(ic_invalidate_array))
-/* This must be aligned to the beginning of a cache line.  */
-GLOBAL(ic_invalidate_array):
-#ifndef WAYS
-#define WAYS 4
-#define WAY_SIZE 0x4000
-#endif
-#if WAYS == 1
-	.rept	WAY_SIZE * WAYS / 32
-	rts
-	nop
-	.rept	7
-	.long	WAY_SIZE - 32
-	.endr
-	.endr
-#elif WAYS <= 6
-	.rept	WAY_SIZE * WAYS / 32
-	braf	r0
-	add	#-8,r0
-	.long	WAY_SIZE + 8
-	.long	WAY_SIZE - 32
-	.rept	WAYS-2
-	braf	r0
-	nop
-	.endr
-	.rept	7 - WAYS
-	rts
-	nop
-	.endr
-	.endr
-#else /* WAYS > 6 */
-	/* This variant needs two different pages for mmap-ing.  */
- 	.rept	WAYS-1
-	.rept	WAY_SIZE / 32
-	braf	r0
-	nop
-	.long	WAY_SIZE
-	.rept 6
-	.long	WAY_SIZE - 32
-	.endr
-	.endr
-	.endr
-	.rept	WAY_SIZE / 32
-	rts
-	.rept	15
-	nop
-	.endr
-	.endr
-#endif /* WAYS */
-	ENDFUNC(GLOBAL(ic_invalidate_array))
-#endif /* SH4 */
-#endif /* L_ic_invalidate_array */
-
-#if defined (__SH5__) && __SH5__ == 32
-#ifdef L_shcompact_call_trampoline
-	.section	.rodata
-	.align	1
-LOCAL(ct_main_table):
-.word	LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
-	.mode	SHmedia
-	.section	.text..SHmedia32, "ax"
-	.align	2
-	
-     /* This function loads 64-bit general-purpose registers from the
-	stack, from a memory address contained in them or from an FP
-	register, according to a cookie passed in r1.  Its execution
-	time is linear on the number of registers that actually have
-	to be copied.  See sh.h for details on the actual bit pattern.
-
-	The function to be called is passed in r0.  If a 32-bit return
-	value is expected, the actual function will be tail-called,
-	otherwise the return address will be stored in r10 (that the
-	caller should expect to be clobbered) and the return value
-	will be expanded into r2/r3 upon return.  */
-	
-	.global	GLOBAL(GCC_shcompact_call_trampoline)
-	FUNC(GLOBAL(GCC_shcompact_call_trampoline))
-GLOBAL(GCC_shcompact_call_trampoline):
-	ptabs/l	r0, tr0	/* Prepare to call the actual function.  */
-	movi	((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
-	pt/l	LOCAL(ct_loop), tr1
-	addz.l	r1, r63, r1
-	shori	((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
-LOCAL(ct_loop):
-	nsb	r1, r28
-	shlli	r28, 1, r29
-	ldx.w	r0, r29, r30
-LOCAL(ct_main_label):
-	ptrel/l	r30, tr2
-	blink	tr2, r63
-LOCAL(ct_r2_fp):	/* Copy r2 from an FP register.  */
-	/* It must be dr0, so just do it.  */
-	fmov.dq	dr0, r2
-	movi	7, r30
-	shlli	r30, 29, r31
-	andc	r1, r31, r1
-	blink	tr1, r63
-LOCAL(ct_r3_fp):	/* Copy r3 from an FP register.  */
-	/* It is either dr0 or dr2.  */
-	movi	7, r30
-	shlri	r1, 26, r32
-	shlli	r30, 26, r31
-	andc	r1, r31, r1
-	fmov.dq	dr0, r3
-	beqi/l	r32, 4, tr1
-	fmov.dq	dr2, r3
-	blink	tr1, r63
-LOCAL(ct_r4_fp):	/* Copy r4 from an FP register.  */
-	shlri	r1, 23 - 3, r34
-	andi	r34, 3 << 3, r33
-	addi	r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
-LOCAL(ct_r4_fp_base):
-	ptrel/l	r32, tr2
-	movi	7, r30
-	shlli	r30, 23, r31
-	andc	r1, r31, r1
-	blink	tr2, r63
-LOCAL(ct_r4_fp_copy):
-	fmov.dq	dr0, r4
-	blink	tr1, r63
-	fmov.dq	dr2, r4
-	blink	tr1, r63
-	fmov.dq	dr4, r4
-	blink	tr1, r63
-LOCAL(ct_r5_fp):	/* Copy r5 from an FP register.  */
-	shlri	r1, 20 - 3, r34
-	andi	r34, 3 << 3, r33
-	addi	r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
-LOCAL(ct_r5_fp_base):
-	ptrel/l	r32, tr2
-	movi	7, r30
-	shlli	r30, 20, r31
-	andc	r1, r31, r1
-	blink	tr2, r63
-LOCAL(ct_r5_fp_copy):
-	fmov.dq	dr0, r5
-	blink	tr1, r63
-	fmov.dq	dr2, r5
-	blink	tr1, r63
-	fmov.dq	dr4, r5
-	blink	tr1, r63
-	fmov.dq	dr6, r5
-	blink	tr1, r63
-LOCAL(ct_r6_fph):	/* Copy r6 from a high FP register.  */
-	/* It must be dr8.  */
-	fmov.dq	dr8, r6
-	movi	15, r30
-	shlli	r30, 16, r31
-	andc	r1, r31, r1
-	blink	tr1, r63
-LOCAL(ct_r6_fpl):	/* Copy r6 from a low FP register.  */
-	shlri	r1, 16 - 3, r34
-	andi	r34, 3 << 3, r33
-	addi	r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
-LOCAL(ct_r6_fp_base):
-	ptrel/l	r32, tr2
-	movi	7, r30
-	shlli	r30, 16, r31
-	andc	r1, r31, r1
-	blink	tr2, r63
-LOCAL(ct_r6_fp_copy):
-	fmov.dq	dr0, r6
-	blink	tr1, r63
-	fmov.dq	dr2, r6
-	blink	tr1, r63
-	fmov.dq	dr4, r6
-	blink	tr1, r63
-	fmov.dq	dr6, r6
-	blink	tr1, r63
-LOCAL(ct_r7_fph):	/* Copy r7 from a high FP register.  */
-	/* It is either dr8 or dr10.  */
-	movi	15 << 12, r31
-	shlri	r1, 12, r32
-	andc	r1, r31, r1
-	fmov.dq	dr8, r7
-	beqi/l	r32, 8, tr1
-	fmov.dq	dr10, r7
-	blink	tr1, r63
-LOCAL(ct_r7_fpl):	/* Copy r7 from a low FP register.  */
-	shlri	r1, 12 - 3, r34
-	andi	r34, 3 << 3, r33
-	addi	r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
-LOCAL(ct_r7_fp_base):
-	ptrel/l	r32, tr2
-	movi	7 << 12, r31
-	andc	r1, r31, r1
-	blink	tr2, r63
-LOCAL(ct_r7_fp_copy):
-	fmov.dq	dr0, r7
-	blink	tr1, r63
-	fmov.dq	dr2, r7
-	blink	tr1, r63
-	fmov.dq	dr4, r7
-	blink	tr1, r63
-	fmov.dq	dr6, r7
-	blink	tr1, r63
-LOCAL(ct_r8_fph):	/* Copy r8 from a high FP register.  */
-	/* It is either dr8 or dr10.  */
-	movi	15 << 8, r31
-	andi	r1, 1 << 8, r32
-	andc	r1, r31, r1
-	fmov.dq	dr8, r8
-	beq/l	r32, r63, tr1
-	fmov.dq	dr10, r8
-	blink	tr1, r63
-LOCAL(ct_r8_fpl):	/* Copy r8 from a low FP register.  */
-	shlri	r1, 8 - 3, r34
-	andi	r34, 3 << 3, r33
-	addi	r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
-LOCAL(ct_r8_fp_base):
-	ptrel/l	r32, tr2
-	movi	7 << 8, r31
-	andc	r1, r31, r1
-	blink	tr2, r63
-LOCAL(ct_r8_fp_copy):
-	fmov.dq	dr0, r8
-	blink	tr1, r63
-	fmov.dq	dr2, r8
-	blink	tr1, r63
-	fmov.dq	dr4, r8
-	blink	tr1, r63
-	fmov.dq	dr6, r8
-	blink	tr1, r63
-LOCAL(ct_r9_fph):	/* Copy r9 from a high FP register.  */
-	/* It is either dr8 or dr10.  */
-	movi	15 << 4, r31
-	andi	r1, 1 << 4, r32
-	andc	r1, r31, r1
-	fmov.dq	dr8, r9
-	beq/l	r32, r63, tr1
-	fmov.dq	dr10, r9
-	blink	tr1, r63
-LOCAL(ct_r9_fpl):	/* Copy r9 from a low FP register.  */
-	shlri	r1, 4 - 3, r34
-	andi	r34, 3 << 3, r33
-	addi	r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
-LOCAL(ct_r9_fp_base):
-	ptrel/l	r32, tr2
-	movi	7 << 4, r31
-	andc	r1, r31, r1
-	blink	tr2, r63
-LOCAL(ct_r9_fp_copy):
-	fmov.dq	dr0, r9
-	blink	tr1, r63
-	fmov.dq	dr2, r9
-	blink	tr1, r63
-	fmov.dq	dr4, r9
-	blink	tr1, r63
-	fmov.dq	dr6, r9
-	blink	tr1, r63
-LOCAL(ct_r2_ld):	/* Copy r2 from a memory address.  */
-	pt/l	LOCAL(ct_r2_load), tr2
-	movi	3, r30
-	shlli	r30, 29, r31
-	and	r1, r31, r32
-	andc	r1, r31, r1
-	beq/l	r31, r32, tr2
-	addi.l	r2, 8, r3
-	ldx.q	r2, r63, r2
-	/* Fall through.  */
-LOCAL(ct_r3_ld):	/* Copy r3 from a memory address.  */
-	pt/l	LOCAL(ct_r3_load), tr2
-	movi	3, r30
-	shlli	r30, 26, r31
-	and	r1, r31, r32
-	andc	r1, r31, r1
-	beq/l	r31, r32, tr2
-	addi.l	r3, 8, r4
-	ldx.q	r3, r63, r3
-LOCAL(ct_r4_ld):	/* Copy r4 from a memory address.  */
-	pt/l	LOCAL(ct_r4_load), tr2
-	movi	3, r30
-	shlli	r30, 23, r31
-	and	r1, r31, r32
-	andc	r1, r31, r1
-	beq/l	r31, r32, tr2
-	addi.l	r4, 8, r5
-	ldx.q	r4, r63, r4
-LOCAL(ct_r5_ld):	/* Copy r5 from a memory address.  */
-	pt/l	LOCAL(ct_r5_load), tr2
-	movi	3, r30
-	shlli	r30, 20, r31
-	and	r1, r31, r32
-	andc	r1, r31, r1
-	beq/l	r31, r32, tr2
-	addi.l	r5, 8, r6
-	ldx.q	r5, r63, r5
-LOCAL(ct_r6_ld):	/* Copy r6 from a memory address.  */
-	pt/l	LOCAL(ct_r6_load), tr2
-	movi	3 << 16, r31
-	and	r1, r31, r32
-	andc	r1, r31, r1
-	beq/l	r31, r32, tr2
-	addi.l	r6, 8, r7
-	ldx.q	r6, r63, r6
-LOCAL(ct_r7_ld):	/* Copy r7 from a memory address.  */
-	pt/l	LOCAL(ct_r7_load), tr2
-	movi	3 << 12, r31
-	and	r1, r31, r32
-	andc	r1, r31, r1
-	beq/l	r31, r32, tr2
-	addi.l	r7, 8, r8
-	ldx.q	r7, r63, r7
-LOCAL(ct_r8_ld):	/* Copy r8 from a memory address.  */
-	pt/l	LOCAL(ct_r8_load), tr2
-	movi	3 << 8, r31
-	and	r1, r31, r32
-	andc	r1, r31, r1
-	beq/l	r31, r32, tr2
-	addi.l	r8, 8, r9
-	ldx.q	r8, r63, r8
-LOCAL(ct_r9_ld):	/* Copy r9 from a memory address.  */
-	pt/l	LOCAL(ct_check_tramp), tr2
-	ldx.q	r9, r63, r9
-	blink	tr2, r63
-LOCAL(ct_r2_load):
-	ldx.q	r2, r63, r2
-	blink	tr1, r63
-LOCAL(ct_r3_load):
-	ldx.q	r3, r63, r3
-	blink	tr1, r63
-LOCAL(ct_r4_load):
-	ldx.q	r4, r63, r4
-	blink	tr1, r63
-LOCAL(ct_r5_load):
-	ldx.q	r5, r63, r5
-	blink	tr1, r63
-LOCAL(ct_r6_load):
-	ldx.q	r6, r63, r6
-	blink	tr1, r63
-LOCAL(ct_r7_load):
-	ldx.q	r7, r63, r7
-	blink	tr1, r63
-LOCAL(ct_r8_load):
-	ldx.q	r8, r63, r8
-	blink	tr1, r63
-LOCAL(ct_r2_pop):	/* Pop r2 from the stack.  */
-	movi	1, r30
-	ldx.q	r15, r63, r2
-	shlli	r30, 29, r31
-	addi.l	r15, 8, r15
-	andc	r1, r31, r1
-	blink	tr1, r63
-LOCAL(ct_r3_pop):	/* Pop r3 from the stack.  */
-	movi	1, r30
-	ldx.q	r15, r63, r3
-	shlli	r30, 26, r31
-	addi.l	r15, 8, r15
-	andc	r1, r31, r1
-	blink	tr1, r63
-LOCAL(ct_r4_pop):	/* Pop r4 from the stack.  */
-	movi	1, r30
-	ldx.q	r15, r63, r4
-	shlli	r30, 23, r31
-	addi.l	r15, 8, r15
-	andc	r1, r31, r1
-	blink	tr1, r63
-LOCAL(ct_r5_pop):	/* Pop r5 from the stack.  */
-	movi	1, r30
-	ldx.q	r15, r63, r5
-	shlli	r30, 20, r31
-	addi.l	r15, 8, r15
-	andc	r1, r31, r1
-	blink	tr1, r63
-LOCAL(ct_r6_pop):	/* Pop r6 from the stack.  */
-	movi	1, r30
-	ldx.q	r15, r63, r6
-	shlli	r30, 16, r31
-	addi.l	r15, 8, r15
-	andc	r1, r31, r1
-	blink	tr1, r63
-LOCAL(ct_r7_pop):	/* Pop r7 from the stack.  */
-	ldx.q	r15, r63, r7
-	movi	1 << 12, r31
-	addi.l	r15, 8, r15
-	andc	r1, r31, r1
-	blink	tr1, r63
-LOCAL(ct_r8_pop):	/* Pop r8 from the stack.  */
-	ldx.q	r15, r63, r8
-	movi	1 << 8, r31
-	addi.l	r15, 8, r15
-	andc	r1, r31, r1
-	blink	tr1, r63
-LOCAL(ct_pop_seq):	/* Pop a sequence of registers off the stack.  */
-	andi	r1, 7 << 1, r30
-	movi	(LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
-	shlli	r30, 2, r31
-	shori	LOCAL(ct_end_of_pop_seq) & 65535, r32
-	sub.l	r32, r31, r33
-	ptabs/l	r33, tr2
-	blink	tr2, r63
-LOCAL(ct_start_of_pop_seq):	/* Beginning of pop sequence.  */
-	ldx.q	r15, r63, r3
-	addi.l	r15, 8, r15
-	ldx.q	r15, r63, r4
-	addi.l	r15, 8, r15
-	ldx.q	r15, r63, r5
-	addi.l	r15, 8, r15
-	ldx.q	r15, r63, r6
-	addi.l	r15, 8, r15
-	ldx.q	r15, r63, r7
-	addi.l	r15, 8, r15
-	ldx.q	r15, r63, r8
-	addi.l	r15, 8, r15
-LOCAL(ct_r9_pop):	/* Pop r9 from the stack.  */
-	ldx.q	r15, r63, r9
-	addi.l	r15, 8, r15
-LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction.  */
-LOCAL(ct_check_tramp):	/* Check whether we need a trampoline.  */
-	pt/u	LOCAL(ct_ret_wide), tr2
-	andi	r1, 1, r1
-	bne/u	r1, r63, tr2
-LOCAL(ct_call_func):	/* Just branch to the function.  */
-	blink	tr0, r63
-LOCAL(ct_ret_wide):	/* Call the function, so that we can unpack its 
-			   64-bit return value.  */
-	add.l	r18, r63, r10
-	blink	tr0, r18
-	ptabs	r10, tr0
-#if __LITTLE_ENDIAN__
-	shari	r2, 32, r3
-	add.l	r2, r63, r2
-#else
-	add.l	r2, r63, r3
-	shari	r2, 32, r2
-#endif
-	blink	tr0, r63
-
-	ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
-#endif /* L_shcompact_call_trampoline */
-
-#ifdef L_shcompact_return_trampoline
-     /* This function does the converse of the code in `ret_wide'
-	above.  It is tail-called by SHcompact functions returning
-	64-bit non-floating-point values, to pack the 32-bit values in
-	r2 and r3 into r2.  */
-
-	.mode	SHmedia
-	.section	.text..SHmedia32, "ax"
-	.align	2
-	.global	GLOBAL(GCC_shcompact_return_trampoline)
-	HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
-GLOBAL(GCC_shcompact_return_trampoline):
-	ptabs/l	r18, tr0
-#if __LITTLE_ENDIAN__
-	addz.l	r2, r63, r2
-	shlli	r3, 32, r3
-#else
-	addz.l	r3, r63, r3
-	shlli	r2, 32, r2
-#endif
-	or	r3, r2, r2
-	blink	tr0, r63
-
-	ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
-#endif /* L_shcompact_return_trampoline */
-
-#ifdef L_shcompact_incoming_args
-	.section	.rodata
-	.align	1
-LOCAL(ia_main_table):
-.word	1 /* Invalid, just loop */
-.word	LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
-.word	1 /* Invalid, just loop */
-.word	LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
-.word	1 /* Invalid, just loop */
-.word	LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
-.word	1 /* Invalid, just loop */
-.word	LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
-.word	1 /* Invalid, just loop */
-.word	1 /* Invalid, just loop */
-.word	LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
-.word	1 /* Invalid, just loop */
-.word	1 /* Invalid, just loop */
-.word	LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
-.word	1 /* Invalid, just loop */
-.word	1 /* Invalid, just loop */
-.word	LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
-.word	1 /* Invalid, just loop */
-.word	1 /* Invalid, just loop */
-.word	LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
-	.mode	SHmedia
-	.section	.text..SHmedia32, "ax"
-	.align	2
-	
-     /* This function stores 64-bit general-purpose registers back in
-	the stack, and loads the address in which each register
-	was stored into itself.  The lower 32 bits of r17 hold the address
-	to begin storing, and the upper 32 bits of r17 hold the cookie.
-	Its execution time is linear on the
-	number of registers that actually have to be copied, and it is
-	optimized for structures larger than 64 bits, as opposed to
-	individual `long long' arguments.  See sh.h for details on the
-	actual bit pattern.  */
-	
-	.global	GLOBAL(GCC_shcompact_incoming_args)
- 	FUNC(GLOBAL(GCC_shcompact_incoming_args))
-GLOBAL(GCC_shcompact_incoming_args):
-	ptabs/l	r18, tr0	/* Prepare to return.  */
-	shlri	r17, 32, r0	/* Load the cookie.  */
-	movi	((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
-	pt/l	LOCAL(ia_loop), tr1
-	add.l	r17, r63, r17
-	shori	((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
-LOCAL(ia_loop):
-	nsb	r0, r36
-	shlli	r36, 1, r37
-	ldx.w	r43, r37, r38
-LOCAL(ia_main_label):
-	ptrel/l	r38, tr2
-	blink	tr2, r63
-LOCAL(ia_r2_ld):	/* Store r2 and load its address.  */
-	movi	3, r38
-	shlli	r38, 29, r39
-	and	r0, r39, r40
-	andc	r0, r39, r0
-	stx.q	r17, r63, r2
-	add.l	r17, r63, r2
-	addi.l	r17, 8, r17
-	beq/u	r39, r40, tr1
-LOCAL(ia_r3_ld):	/* Store r3 and load its address.  */
-	movi	3, r38
-	shlli	r38, 26, r39
-	and	r0, r39, r40
-	andc	r0, r39, r0
-	stx.q	r17, r63, r3
-	add.l	r17, r63, r3
-	addi.l	r17, 8, r17
-	beq/u	r39, r40, tr1
-LOCAL(ia_r4_ld):	/* Store r4 and load its address.  */
-	movi	3, r38
-	shlli	r38, 23, r39
-	and	r0, r39, r40
-	andc	r0, r39, r0
-	stx.q	r17, r63, r4
-	add.l	r17, r63, r4
-	addi.l	r17, 8, r17
-	beq/u	r39, r40, tr1
-LOCAL(ia_r5_ld):	/* Store r5 and load its address.  */
-	movi	3, r38
-	shlli	r38, 20, r39
-	and	r0, r39, r40
-	andc	r0, r39, r0
-	stx.q	r17, r63, r5
-	add.l	r17, r63, r5
-	addi.l	r17, 8, r17
-	beq/u	r39, r40, tr1
-LOCAL(ia_r6_ld):	/* Store r6 and load its address.  */
-	movi	3, r38
-	shlli	r38, 16, r39
-	and	r0, r39, r40
-	andc	r0, r39, r0
-	stx.q	r17, r63, r6
-	add.l	r17, r63, r6
-	addi.l	r17, 8, r17
-	beq/u	r39, r40, tr1
-LOCAL(ia_r7_ld):	/* Store r7 and load its address.  */
-	movi	3 << 12, r39
-	and	r0, r39, r40
-	andc	r0, r39, r0
-	stx.q	r17, r63, r7
-	add.l	r17, r63, r7
-	addi.l	r17, 8, r17
-	beq/u	r39, r40, tr1
-LOCAL(ia_r8_ld):	/* Store r8 and load its address.  */
-	movi	3 << 8, r39
-	and	r0, r39, r40
-	andc	r0, r39, r0
-	stx.q	r17, r63, r8
-	add.l	r17, r63, r8
-	addi.l	r17, 8, r17
-	beq/u	r39, r40, tr1
-LOCAL(ia_r9_ld):	/* Store r9 and load its address.  */
-	stx.q	r17, r63, r9
-	add.l	r17, r63, r9
-	blink	tr0, r63
-LOCAL(ia_r2_push):	/* Push r2 onto the stack.  */
-	movi	1, r38
-	shlli	r38, 29, r39
-	andc	r0, r39, r0
-	stx.q	r17, r63, r2
-	addi.l	r17, 8, r17
-	blink	tr1, r63
-LOCAL(ia_r3_push):	/* Push r3 onto the stack.  */
-	movi	1, r38
-	shlli	r38, 26, r39
-	andc	r0, r39, r0
-	stx.q	r17, r63, r3
-	addi.l	r17, 8, r17
-	blink	tr1, r63
-LOCAL(ia_r4_push):	/* Push r4 onto the stack.  */
-	movi	1, r38
-	shlli	r38, 23, r39
-	andc	r0, r39, r0
-	stx.q	r17, r63, r4
-	addi.l	r17, 8, r17
-	blink	tr1, r63
-LOCAL(ia_r5_push):	/* Push r5 onto the stack.  */
-	movi	1, r38
-	shlli	r38, 20, r39
-	andc	r0, r39, r0
-	stx.q	r17, r63, r5
-	addi.l	r17, 8, r17
-	blink	tr1, r63
-LOCAL(ia_r6_push):	/* Push r6 onto the stack.  */
-	movi	1, r38
-	shlli	r38, 16, r39
-	andc	r0, r39, r0
-	stx.q	r17, r63, r6
-	addi.l	r17, 8, r17
-	blink	tr1, r63
-LOCAL(ia_r7_push):	/* Push r7 onto the stack.  */
-	movi	1 << 12, r39
-	andc	r0, r39, r0
-	stx.q	r17, r63, r7
-	addi.l	r17, 8, r17
-	blink	tr1, r63
-LOCAL(ia_r8_push):	/* Push r8 onto the stack.  */
-	movi	1 << 8, r39
-	andc	r0, r39, r0
-	stx.q	r17, r63, r8
-	addi.l	r17, 8, r17
-	blink	tr1, r63
-LOCAL(ia_push_seq):	/* Push a sequence of registers onto the stack.  */
-	andi	r0, 7 << 1, r38
-	movi	(LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
-	shlli	r38, 2, r39
-	shori	LOCAL(ia_end_of_push_seq) & 65535, r40
-	sub.l	r40, r39, r41
-	ptabs/l	r41, tr2
-	blink	tr2, r63
-LOCAL(ia_stack_of_push_seq):	 /* Beginning of push sequence.  */
-	stx.q	r17, r63, r3
-	addi.l	r17, 8, r17
-	stx.q	r17, r63, r4
-	addi.l	r17, 8, r17
-	stx.q	r17, r63, r5
-	addi.l	r17, 8, r17
-	stx.q	r17, r63, r6
-	addi.l	r17, 8, r17
-	stx.q	r17, r63, r7
-	addi.l	r17, 8, r17
-	stx.q	r17, r63, r8
-	addi.l	r17, 8, r17
-LOCAL(ia_r9_push):	/* Push r9 onto the stack.  */
-	stx.q	r17, r63, r9
-LOCAL(ia_return):	/* Return.  */
-	blink	tr0, r63
-LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction.  */
-	ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
-#endif /* L_shcompact_incoming_args */
-#endif
-#if __SH5__
-#ifdef L_nested_trampoline
-#if __SH5__ == 32
-	.section	.text..SHmedia32,"ax"
-#else
-	.text
-#endif
-	.align	3 /* It is copied in units of 8 bytes in SHmedia mode.  */
-	.global	GLOBAL(GCC_nested_trampoline)
-	HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
-GLOBAL(GCC_nested_trampoline):
-	.mode	SHmedia
-	ptrel/u	r63, tr0
-	gettr	tr0, r0
-#if __SH5__ == 64
-	ld.q	r0, 24, r1
-#else
-	ld.l	r0, 24, r1
-#endif
-	ptabs/l	r1, tr1
-#if __SH5__ == 64
-	ld.q	r0, 32, r1
-#else
-	ld.l	r0, 28, r1
-#endif
-	blink	tr1, r63
-
-	ENDFUNC(GLOBAL(GCC_nested_trampoline))
-#endif /* L_nested_trampoline */
-#endif /* __SH5__ */
-#if __SH5__ == 32
-#ifdef L_push_pop_shmedia_regs
-	.section	.text..SHmedia32,"ax"
-	.mode	SHmedia
-	.align	2
-#ifndef __SH4_NOFPU__	
-	.global	GLOBAL(GCC_push_shmedia_regs)
-	FUNC(GLOBAL(GCC_push_shmedia_regs))
-GLOBAL(GCC_push_shmedia_regs):
-	addi.l	r15, -14*8, r15
-	fst.d	r15, 13*8, dr62
-	fst.d	r15, 12*8, dr60
-	fst.d	r15, 11*8, dr58
-	fst.d	r15, 10*8, dr56
-	fst.d	r15,  9*8, dr54
-	fst.d	r15,  8*8, dr52
-	fst.d	r15,  7*8, dr50
-	fst.d	r15,  6*8, dr48
-	fst.d	r15,  5*8, dr46
-	fst.d	r15,  4*8, dr44
-	fst.d	r15,  3*8, dr42
-	fst.d	r15,  2*8, dr40
-	fst.d	r15,  1*8, dr38
-	fst.d	r15,  0*8, dr36
-#else /* ! __SH4_NOFPU__ */
-	.global	GLOBAL(GCC_push_shmedia_regs_nofpu)
-	FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
-GLOBAL(GCC_push_shmedia_regs_nofpu):
-#endif /* ! __SH4_NOFPU__ */
-	ptabs/l	r18, tr0
-	addi.l	r15, -27*8, r15
-	gettr	tr7, r62
-	gettr	tr6, r61
-	gettr	tr5, r60
-	st.q	r15, 26*8, r62
-	st.q	r15, 25*8, r61
-	st.q	r15, 24*8, r60
-	st.q	r15, 23*8, r59
-	st.q	r15, 22*8, r58
-	st.q	r15, 21*8, r57
-	st.q	r15, 20*8, r56
-	st.q	r15, 19*8, r55
-	st.q	r15, 18*8, r54
-	st.q	r15, 17*8, r53
-	st.q	r15, 16*8, r52
-	st.q	r15, 15*8, r51
-	st.q	r15, 14*8, r50
-	st.q	r15, 13*8, r49
-	st.q	r15, 12*8, r48
-	st.q	r15, 11*8, r47
-	st.q	r15, 10*8, r46
-	st.q	r15,  9*8, r45
-	st.q	r15,  8*8, r44
-	st.q	r15,  7*8, r35
-	st.q	r15,  6*8, r34
-	st.q	r15,  5*8, r33
-	st.q	r15,  4*8, r32
-	st.q	r15,  3*8, r31
-	st.q	r15,  2*8, r30
-	st.q	r15,  1*8, r29
-	st.q	r15,  0*8, r28
-	blink	tr0, r63
-#ifndef __SH4_NOFPU__	
-	ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
-#else
-	ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
-#endif
-#ifndef __SH4_NOFPU__	
-	.global	GLOBAL(GCC_pop_shmedia_regs)
-	FUNC(GLOBAL(GCC_pop_shmedia_regs))
-GLOBAL(GCC_pop_shmedia_regs):
-	pt	.L0, tr1
-	movi	41*8, r0
-	fld.d	r15, 40*8, dr62
-	fld.d	r15, 39*8, dr60
-	fld.d	r15, 38*8, dr58
-	fld.d	r15, 37*8, dr56
-	fld.d	r15, 36*8, dr54
-	fld.d	r15, 35*8, dr52
-	fld.d	r15, 34*8, dr50
-	fld.d	r15, 33*8, dr48
-	fld.d	r15, 32*8, dr46
-	fld.d	r15, 31*8, dr44
-	fld.d	r15, 30*8, dr42
-	fld.d	r15, 29*8, dr40
-	fld.d	r15, 28*8, dr38
-	fld.d	r15, 27*8, dr36
-	blink	tr1, r63
-#else /* ! __SH4_NOFPU__	*/
-	.global	GLOBAL(GCC_pop_shmedia_regs_nofpu)
-	FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
-GLOBAL(GCC_pop_shmedia_regs_nofpu):
-#endif /* ! __SH4_NOFPU__	*/
-	movi	27*8, r0
-.L0:
-	ptabs	r18, tr0
-	ld.q	r15, 26*8, r62
-	ld.q	r15, 25*8, r61
-	ld.q	r15, 24*8, r60
-	ptabs	r62, tr7
-	ptabs	r61, tr6
-	ptabs	r60, tr5
-	ld.q	r15, 23*8, r59
-	ld.q	r15, 22*8, r58
-	ld.q	r15, 21*8, r57
-	ld.q	r15, 20*8, r56
-	ld.q	r15, 19*8, r55
-	ld.q	r15, 18*8, r54
-	ld.q	r15, 17*8, r53
-	ld.q	r15, 16*8, r52
-	ld.q	r15, 15*8, r51
-	ld.q	r15, 14*8, r50
-	ld.q	r15, 13*8, r49
-	ld.q	r15, 12*8, r48
-	ld.q	r15, 11*8, r47
-	ld.q	r15, 10*8, r46
-	ld.q	r15,  9*8, r45
-	ld.q	r15,  8*8, r44
-	ld.q	r15,  7*8, r35
-	ld.q	r15,  6*8, r34
-	ld.q	r15,  5*8, r33
-	ld.q	r15,  4*8, r32
-	ld.q	r15,  3*8, r31
-	ld.q	r15,  2*8, r30
-	ld.q	r15,  1*8, r29
-	ld.q	r15,  0*8, r28
-	add.l	r15, r0, r15
-	blink	tr0, r63
-
-#ifndef __SH4_NOFPU__
-	ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
-#else
-	ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
-#endif
-#endif /* __SH5__ == 32 */
-#endif /* L_push_pop_shmedia_regs */
-
-#ifdef L_div_table
-#if __SH5__
-#if defined(__pic__) && defined(__SHMEDIA__)
-	.global	GLOBAL(sdivsi3)
-	FUNC(GLOBAL(sdivsi3))
-#if __SH5__ == 32
-	.section	.text..SHmedia32,"ax"
-#else
-	.text
-#endif
-#if 0
-/* ??? FIXME: Presumably due to a linker bug, exporting data symbols
-   in a text section does not work (at least for shared libraries):
-   the linker sets the LSB of the address as if this was SHmedia code.  */
-#define TEXT_DATA_BUG
-#endif
-	.align	2
- // inputs: r4,r5
- // clobbered: r1,r18,r19,r20,r21,r25,tr0
- // result in r0
- .global GLOBAL(sdivsi3)
-GLOBAL(sdivsi3):
-#ifdef TEXT_DATA_BUG
- ptb datalabel Local_div_table,tr0
-#else
- ptb GLOBAL(div_table_internal),tr0
-#endif
- nsb r5, r1
- shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
- shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
- /* bubble */
- gettr tr0,r20
- ldx.ub r20, r21, r19 // u0.8
- shari r25, 32, r25   // normalize to s2.30
- shlli r21, 1, r21
- muls.l r25, r19, r19 // s2.38
- ldx.w r20, r21, r21  // s2.14
-  ptabs r18, tr0
- shari r19, 24, r19   // truncate to s2.14
- sub r21, r19, r19    // some 11 bit inverse in s1.14
- muls.l r19, r19, r21 // u0.28
-  sub r63, r1, r1
-  addi r1, 92, r1
- muls.l r25, r21, r18 // s2.58
- shlli r19, 45, r19   // multiply by two and convert to s2.58
-  /* bubble */
- sub r19, r18, r18
- shari r18, 28, r18   // some 22 bit inverse in s1.30
- muls.l r18, r25, r0  // s2.60
-  muls.l r18, r4, r25 // s32.30
-  /* bubble */
- shari r0, 16, r19   // s-16.44
- muls.l r19, r18, r19 // s-16.74
-  shari r25, 63, r0
-  shari r4, 14, r18   // s19.-14
- shari r19, 30, r19   // s-16.44
- muls.l r19, r18, r19 // s15.30
-  xor r21, r0, r21    // You could also use the constant 1 << 27.
-  add r21, r25, r21
- sub r21, r19, r21
- shard r21, r1, r21
- sub r21, r0, r0
- blink tr0, r63
-	ENDFUNC(GLOBAL(sdivsi3))
-/* This table has been generated by divtab.c .
-Defects for bias -330:
-   Max defect: 6.081536e-07 at -1.000000e+00
-   Min defect: 2.849516e-08 at 1.030651e+00
-   Max 2nd step defect: 9.606539e-12 at -1.000000e+00
-   Min 2nd step defect: 0.000000e+00 at 0.000000e+00
-   Defect at 1: 1.238659e-07
-   Defect at -2: 1.061708e-07 */
-#else /* ! __pic__ || ! __SHMEDIA__ */
-	.section	.rodata
-#endif /* __pic__ */
-#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
-	.balign 2
-	.type	Local_div_table,@object
-	.size	Local_div_table,128
-/* negative division constants */
-	.word	-16638
-	.word	-17135
-	.word	-17737
-	.word	-18433
-	.word	-19103
-	.word	-19751
-	.word	-20583
-	.word	-21383
-	.word	-22343
-	.word	-23353
-	.word	-24407
-	.word	-25582
-	.word	-26863
-	.word	-28382
-	.word	-29965
-	.word	-31800
-/* negative division factors */
-	.byte	66
-	.byte	70
-	.byte	75
-	.byte	81
-	.byte	87
-	.byte	93
-	.byte	101
-	.byte	109
-	.byte	119
-	.byte	130
-	.byte	142
-	.byte	156
-	.byte	172
-	.byte	192
-	.byte	214
-	.byte	241
-	.skip 16
-Local_div_table:
-	.skip 16
-/* positive division factors */
-	.byte	241
-	.byte	214
-	.byte	192
-	.byte	172
-	.byte	156
-	.byte	142
-	.byte	130
-	.byte	119
-	.byte	109
-	.byte	101
-	.byte	93
-	.byte	87
-	.byte	81
-	.byte	75
-	.byte	70
-	.byte	66
-/* positive division constants */
-	.word	31801
-	.word	29966
-	.word	28383
-	.word	26864
-	.word	25583
-	.word	24408
-	.word	23354
-	.word	22344
-	.word	21384
-	.word	20584
-	.word	19752
-	.word	19104
-	.word	18434
-	.word	17738
-	.word	17136
-	.word	16639
-	.section	.rodata
-#endif /* TEXT_DATA_BUG */
-	.balign 2
-	.type	GLOBAL(div_table),@object
-	.size	GLOBAL(div_table),128
-/* negative division constants */
-	.word	-16638
-	.word	-17135
-	.word	-17737
-	.word	-18433
-	.word	-19103
-	.word	-19751
-	.word	-20583
-	.word	-21383
-	.word	-22343
-	.word	-23353
-	.word	-24407
-	.word	-25582
-	.word	-26863
-	.word	-28382
-	.word	-29965
-	.word	-31800
-/* negative division factors */
-	.byte	66
-	.byte	70
-	.byte	75
-	.byte	81
-	.byte	87
-	.byte	93
-	.byte	101
-	.byte	109
-	.byte	119
-	.byte	130
-	.byte	142
-	.byte	156
-	.byte	172
-	.byte	192
-	.byte	214
-	.byte	241
-	.skip 16
-	.global	GLOBAL(div_table)
-GLOBAL(div_table):
-	HIDDEN_ALIAS(div_table_internal,div_table)
-	.skip 16
-/* positive division factors */
-	.byte	241
-	.byte	214
-	.byte	192
-	.byte	172
-	.byte	156
-	.byte	142
-	.byte	130
-	.byte	119
-	.byte	109
-	.byte	101
-	.byte	93
-	.byte	87
-	.byte	81
-	.byte	75
-	.byte	70
-	.byte	66
-/* positive division constants */
-	.word	31801
-	.word	29966
-	.word	28383
-	.word	26864
-	.word	25583
-	.word	24408
-	.word	23354
-	.word	22344
-	.word	21384
-	.word	20584
-	.word	19752
-	.word	19104
-	.word	18434
-	.word	17738
-	.word	17136
-	.word	16639
-
-#elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
-/* This code used shld, thus is not suitable for SH1 / SH2.  */
-
-/* Signed / unsigned division without use of FPU, optimized for SH4.
-   Uses a lookup table for divisors in the range -128 .. +128, and
-   div1 with case distinction for larger divisors in three more ranges.
-   The code is lumped together with the table to allow the use of mova.  */
-#ifdef __LITTLE_ENDIAN__
-#define L_LSB 0
-#define L_LSWMSB 1
-#define L_MSWLSB 2
-#else
-#define L_LSB 3
-#define L_LSWMSB 2
-#define L_MSWLSB 1
-#endif
-
-	.balign 4
-	.global	GLOBAL(udivsi3_i4i)
-	FUNC(GLOBAL(udivsi3_i4i))
-GLOBAL(udivsi3_i4i):
-	mov.w LOCAL(c128_w), r1
-	div0u
-	mov r4,r0
-	shlr8 r0
-	cmp/hi r1,r5
-	extu.w r5,r1
-	bf LOCAL(udiv_le128)
-	cmp/eq r5,r1
-	bf LOCAL(udiv_ge64k)
-	shlr r0
-	mov r5,r1
-	shll16 r5
-	mov.l r4,@-r15
-	div1 r5,r0
-	mov.l r1,@-r15
-	div1 r5,r0
-	div1 r5,r0
-	bra LOCAL(udiv_25)
-	div1 r5,r0
-
-LOCAL(div_le128):
-	mova LOCAL(div_table_ix),r0
-	bra LOCAL(div_le128_2)
-	mov.b @(r0,r5),r1
-LOCAL(udiv_le128):
-	mov.l r4,@-r15
-	mova LOCAL(div_table_ix),r0
-	mov.b @(r0,r5),r1
-	mov.l r5,@-r15
-LOCAL(div_le128_2):
-	mova LOCAL(div_table_inv),r0
-	mov.l @(r0,r1),r1
-	mov r5,r0
-	tst #0xfe,r0
-	mova LOCAL(div_table_clz),r0
-	dmulu.l r1,r4
-	mov.b @(r0,r5),r1
-	bt/s LOCAL(div_by_1)
-	mov r4,r0
-	mov.l @r15+,r5
-	sts mach,r0
-	/* clrt */
-	addc r4,r0
-	mov.l @r15+,r4
-	rotcr r0
-	rts
-	shld r1,r0
-
-LOCAL(div_by_1_neg):
-	neg r4,r0
-LOCAL(div_by_1):
-	mov.l @r15+,r5
-	rts
-	mov.l @r15+,r4
-
-LOCAL(div_ge64k):
-	bt/s LOCAL(div_r8)
-	div0u
-	shll8 r5
-	bra LOCAL(div_ge64k_2)
-	div1 r5,r0
-LOCAL(udiv_ge64k):
-	cmp/hi r0,r5
-	mov r5,r1
-	bt LOCAL(udiv_r8)
-	shll8 r5
-	mov.l r4,@-r15
-	div1 r5,r0
-	mov.l r1,@-r15
-LOCAL(div_ge64k_2):
-	div1 r5,r0
-	mov.l LOCAL(zero_l),r1
-	.rept 4
-	div1 r5,r0
-	.endr
-	mov.l r1,@-r15
-	div1 r5,r0
-	mov.w LOCAL(m256_w),r1
-	div1 r5,r0
-	mov.b r0,@(L_LSWMSB,r15)
-	xor r4,r0
-	and r1,r0
-	bra LOCAL(div_ge64k_end)
-	xor r4,r0
-	
-LOCAL(div_r8):
-	shll16 r4
-	bra LOCAL(div_r8_2)
-	shll8 r4
-LOCAL(udiv_r8):
-	mov.l r4,@-r15
-	shll16 r4
-	clrt
-	shll8 r4
-	mov.l r5,@-r15
-LOCAL(div_r8_2):
-	rotcl r4
-	mov r0,r1
-	div1 r5,r1
-	mov r4,r0
-	rotcl r0
-	mov r5,r4
-	div1 r5,r1
-	.rept 5
-	rotcl r0; div1 r5,r1
-	.endr
-	rotcl r0
-	mov.l @r15+,r5
-	div1 r4,r1
-	mov.l @r15+,r4
-	rts
-	rotcl r0
-
-	ENDFUNC(GLOBAL(udivsi3_i4i))
-
-	.global	GLOBAL(sdivsi3_i4i)
-	FUNC(GLOBAL(sdivsi3_i4i))
-	/* This is link-compatible with a GLOBAL(sdivsi3) call,
-	   but we effectively clobber only r1.  */
-GLOBAL(sdivsi3_i4i):
-	mov.l r4,@-r15
-	cmp/pz r5
-	mov.w LOCAL(c128_w), r1
-	bt/s LOCAL(pos_divisor)
-	cmp/pz r4
-	mov.l r5,@-r15
-	neg r5,r5
-	bt/s LOCAL(neg_result)
-	cmp/hi r1,r5
-	neg r4,r4
-LOCAL(pos_result):
-	extu.w r5,r0
-	bf LOCAL(div_le128)
-	cmp/eq r5,r0
-	mov r4,r0
-	shlr8 r0
-	bf/s LOCAL(div_ge64k)
-	cmp/hi r0,r5
-	div0u
-	shll16 r5
-	div1 r5,r0
-	div1 r5,r0
-	div1 r5,r0
-LOCAL(udiv_25):
-	mov.l LOCAL(zero_l),r1
-	div1 r5,r0
-	div1 r5,r0
-	mov.l r1,@-r15
-	.rept 3
-	div1 r5,r0
-	.endr
-	mov.b r0,@(L_MSWLSB,r15)
-	xtrct r4,r0
-	swap.w r0,r0
-	.rept 8
-	div1 r5,r0
-	.endr
-	mov.b r0,@(L_LSWMSB,r15)
-LOCAL(div_ge64k_end):
-	.rept 8
-	div1 r5,r0
-	.endr
-	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
-	extu.b r0,r0
-	mov.l @r15+,r5
-	or r4,r0
-	mov.l @r15+,r4
-	rts
-	rotcl r0
-
-LOCAL(div_le128_neg):
-	tst #0xfe,r0
-	mova LOCAL(div_table_ix),r0
-	mov.b @(r0,r5),r1
-	mova LOCAL(div_table_inv),r0
-	bt/s LOCAL(div_by_1_neg)
-	mov.l @(r0,r1),r1
-	mova LOCAL(div_table_clz),r0
-	dmulu.l r1,r4
-	mov.b @(r0,r5),r1
-	mov.l @r15+,r5
-	sts mach,r0
-	/* clrt */
-	addc r4,r0
-	mov.l @r15+,r4
-	rotcr r0
-	shld r1,r0
-	rts
-	neg r0,r0
-
-LOCAL(pos_divisor):
-	mov.l r5,@-r15
-	bt/s LOCAL(pos_result)
-	cmp/hi r1,r5
-	neg r4,r4
-LOCAL(neg_result):
-	extu.w r5,r0
-	bf LOCAL(div_le128_neg)
-	cmp/eq r5,r0
-	mov r4,r0
-	shlr8 r0
-	bf/s LOCAL(div_ge64k_neg)
-	cmp/hi r0,r5
-	div0u
-	mov.l LOCAL(zero_l),r1
-	shll16 r5
-	div1 r5,r0
-	mov.l r1,@-r15
-	.rept 7
-	div1 r5,r0
-	.endr
-	mov.b r0,@(L_MSWLSB,r15)
-	xtrct r4,r0
-	swap.w r0,r0
-	.rept 8
-	div1 r5,r0
-	.endr
-	mov.b r0,@(L_LSWMSB,r15)
-LOCAL(div_ge64k_neg_end):
-	.rept 8
-	div1 r5,r0
-	.endr
-	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
-	extu.b r0,r1
-	mov.l @r15+,r5
-	or r4,r1
-LOCAL(div_r8_neg_end):
-	mov.l @r15+,r4
-	rotcl r1
-	rts
-	neg r1,r0
-
-LOCAL(div_ge64k_neg):
-	bt/s LOCAL(div_r8_neg)
-	div0u
-	shll8 r5
-	mov.l LOCAL(zero_l),r1
-	.rept 6
-	div1 r5,r0
-	.endr
-	mov.l r1,@-r15
-	div1 r5,r0
-	mov.w LOCAL(m256_w),r1
-	div1 r5,r0
-	mov.b r0,@(L_LSWMSB,r15)
-	xor r4,r0
-	and r1,r0
-	bra LOCAL(div_ge64k_neg_end)
-	xor r4,r0
-
-LOCAL(c128_w):
-	.word 128
-
-LOCAL(div_r8_neg):
-	clrt
-	shll16 r4
-	mov r4,r1
-	shll8 r1
-	mov r5,r4
-	.rept 7
-	rotcl r1; div1 r5,r0
-	.endr
-	mov.l @r15+,r5
-	rotcl r1
-	bra LOCAL(div_r8_neg_end)
-	div1 r4,r0
-
-LOCAL(m256_w):
-	.word 0xff00
-/* This table has been generated by divtab-sh4.c.  */
-	.balign 4
-LOCAL(div_table_clz):
-	.byte	0
-	.byte	1
-	.byte	0
-	.byte	-1
-	.byte	-1
-	.byte	-2
-	.byte	-2
-	.byte	-2
-	.byte	-2
-	.byte	-3
-	.byte	-3
-	.byte	-3
-	.byte	-3
-	.byte	-3
-	.byte	-3
-	.byte	-3
-	.byte	-3
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-/* Lookup table translating positive divisor to index into table of
-   normalized inverse.  N.B. the '0' entry is also the last entry of the
- previous table, and causes an unaligned access for division by zero.  */
-LOCAL(div_table_ix):
-	.byte	-6
-	.byte	-128
-	.byte	-128
-	.byte	0
-	.byte	-128
-	.byte	-64
-	.byte	0
-	.byte	64
-	.byte	-128
-	.byte	-96
-	.byte	-64
-	.byte	-32
-	.byte	0
-	.byte	32
-	.byte	64
-	.byte	96
-	.byte	-128
-	.byte	-112
-	.byte	-96
-	.byte	-80
-	.byte	-64
-	.byte	-48
-	.byte	-32
-	.byte	-16
-	.byte	0
-	.byte	16
-	.byte	32
-	.byte	48
-	.byte	64
-	.byte	80
-	.byte	96
-	.byte	112
-	.byte	-128
-	.byte	-120
-	.byte	-112
-	.byte	-104
-	.byte	-96
-	.byte	-88
-	.byte	-80
-	.byte	-72
-	.byte	-64
-	.byte	-56
-	.byte	-48
-	.byte	-40
-	.byte	-32
-	.byte	-24
-	.byte	-16
-	.byte	-8
-	.byte	0
-	.byte	8
-	.byte	16
-	.byte	24
-	.byte	32
-	.byte	40
-	.byte	48
-	.byte	56
-	.byte	64
-	.byte	72
-	.byte	80
-	.byte	88
-	.byte	96
-	.byte	104
-	.byte	112
-	.byte	120
-	.byte	-128
-	.byte	-124
-	.byte	-120
-	.byte	-116
-	.byte	-112
-	.byte	-108
-	.byte	-104
-	.byte	-100
-	.byte	-96
-	.byte	-92
-	.byte	-88
-	.byte	-84
-	.byte	-80
-	.byte	-76
-	.byte	-72
-	.byte	-68
-	.byte	-64
-	.byte	-60
-	.byte	-56
-	.byte	-52
-	.byte	-48
-	.byte	-44
-	.byte	-40
-	.byte	-36
-	.byte	-32
-	.byte	-28
-	.byte	-24
-	.byte	-20
-	.byte	-16
-	.byte	-12
-	.byte	-8
-	.byte	-4
-	.byte	0
-	.byte	4
-	.byte	8
-	.byte	12
-	.byte	16
-	.byte	20
-	.byte	24
-	.byte	28
-	.byte	32
-	.byte	36
-	.byte	40
-	.byte	44
-	.byte	48
-	.byte	52
-	.byte	56
-	.byte	60
-	.byte	64
-	.byte	68
-	.byte	72
-	.byte	76
-	.byte	80
-	.byte	84
-	.byte	88
-	.byte	92
-	.byte	96
-	.byte	100
-	.byte	104
-	.byte	108
-	.byte	112
-	.byte	116
-	.byte	120
-	.byte	124
-	.byte	-128
-/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */
-	.balign 4
-LOCAL(zero_l):
-	.long	0x0
-	.long	0xF81F81F9
-	.long	0xF07C1F08
-	.long	0xE9131AC0
-	.long	0xE1E1E1E2
-	.long	0xDAE6076C
-	.long	0xD41D41D5
-	.long	0xCD856891
-	.long	0xC71C71C8
-	.long	0xC0E07039
-	.long	0xBACF914D
-	.long	0xB4E81B4F
-	.long	0xAF286BCB
-	.long	0xA98EF607
-	.long	0xA41A41A5
-	.long	0x9EC8E952
-	.long	0x9999999A
-	.long	0x948B0FCE
-	.long	0x8F9C18FA
-	.long	0x8ACB90F7
-	.long	0x86186187
-	.long	0x81818182
-	.long	0x7D05F418
-	.long	0x78A4C818
-	.long	0x745D1746
-	.long	0x702E05C1
-	.long	0x6C16C16D
-	.long	0x68168169
-	.long	0x642C8591
-	.long	0x60581606
-	.long	0x5C9882BA
-	.long	0x58ED2309
-LOCAL(div_table_inv):
-	.long	0x55555556
-	.long	0x51D07EAF
-	.long	0x4E5E0A73
-	.long	0x4AFD6A06
-	.long	0x47AE147B
-	.long	0x446F8657
-	.long	0x41414142
-	.long	0x3E22CBCF
-	.long	0x3B13B13C
-	.long	0x38138139
-	.long	0x3521CFB3
-	.long	0x323E34A3
-	.long	0x2F684BDB
-	.long	0x2C9FB4D9
-	.long	0x29E4129F
-	.long	0x27350B89
-	.long	0x24924925
-	.long	0x21FB7813
-	.long	0x1F7047DD
-	.long	0x1CF06ADB
-	.long	0x1A7B9612
-	.long	0x18118119
-	.long	0x15B1E5F8
-	.long	0x135C8114
-	.long	0x11111112
-	.long	0xECF56BF
-	.long	0xC9714FC
-	.long	0xA6810A7
-	.long	0x8421085
-	.long	0x624DD30
-	.long	0x4104105
-	.long	0x2040811
-	/* maximum error: 0.987342 scaled: 0.921875*/
-
-	ENDFUNC(GLOBAL(sdivsi3_i4i))
-#endif /* SH3 / SH4 */
-
-#endif /* L_div_table */
-
-#ifdef L_udiv_qrnnd_16
-#if !__SHMEDIA__
-	HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
-	/* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
-	/* n1 < d, but n1 might be larger than d1.  */
-	.global GLOBAL(udiv_qrnnd_16)
-	.balign 8
-GLOBAL(udiv_qrnnd_16):
-	div0u
-	cmp/hi r6,r0
-	bt .Lots
-	.rept 16
-	div1 r6,r0 
-	.endr
-	extu.w r0,r1
-	bt 0f
-	add r6,r0
-0:	rotcl r1
-	mulu.w r1,r5
-	xtrct r4,r0
-	swap.w r0,r0
-	sts macl,r2
-	cmp/hs r2,r0
-	sub r2,r0
-	bt 0f
-	addc r5,r0
-	add #-1,r1
-	bt 0f
-1:	add #-1,r1
-	rts
-	add r5,r0
-	.balign 8
-.Lots:
-	sub r5,r0
-	swap.w r4,r1
-	xtrct r0,r1
-	clrt
-	mov r1,r0
-	addc r5,r0
-	mov #-1,r1
-	SL1(bf, 1b,
-	shlr16 r1)
-0:	rts
-	nop
-	ENDFUNC(GLOBAL(udiv_qrnnd_16))
-#endif /* !__SHMEDIA__ */
-#endif /* L_udiv_qrnnd_16 */
diff --git a/gcc/config/sh/lib1funcs.h b/gcc/config/sh/lib1funcs.h
deleted file mode 100644
index af4b41cc314..00000000000
--- a/gcc/config/sh/lib1funcs.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005, 2006, 2009
-   Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-#ifdef __ELF__
-#define LOCAL(X)	.L_##X
-#define FUNC(X)		.type X,@function
-#define HIDDEN_FUNC(X)	FUNC(X); .hidden X
-#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X)
-#define ENDFUNC0(X)	.Lfe_##X: .size X,.Lfe_##X-X
-#define ENDFUNC(X)	ENDFUNC0(X)
-#else
-#define LOCAL(X)	L_##X
-#define FUNC(X)
-#define HIDDEN_FUNC(X)
-#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y)
-#define ENDFUNC(X)
-#endif
-
-#define	CONCAT(A,B)	A##B
-#define	GLOBAL0(U,X)	CONCAT(U,__##X)
-#define	GLOBAL(X)	GLOBAL0(__USER_LABEL_PREFIX__,X)
-
-#define ALIAS(X,Y)	.global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y)
-
-#if defined __SH2A__ && defined __FMOVD_ENABLED__
-#undef  FMOVD_WORKS
-#define FMOVD_WORKS
-#endif
-
-#ifdef __LITTLE_ENDIAN__
-#define DR00 fr1
-#define DR01 fr0
-#define DR20 fr3
-#define DR21 fr2
-#define DR40 fr5
-#define DR41 fr4
-#else /* !__LITTLE_ENDIAN__ */
-#define DR00 fr0
-#define DR01 fr1
-#define DR20 fr2
-#define DR21 fr3
-#define DR40 fr4
-#define DR41 fr5
-#endif /* !__LITTLE_ENDIAN__ */
-
-#ifdef __sh1__
-#define SL(branch, dest, in_slot, in_slot_arg2) \
-	in_slot, in_slot_arg2; branch dest
-#define SL1(branch, dest, in_slot) \
-	in_slot; branch dest
-#else /* ! __sh1__ */
-#define SL(branch, dest, in_slot, in_slot_arg2) \
-	branch##.s dest; in_slot, in_slot_arg2
-#define SL1(branch, dest, in_slot) \
-	branch##/s dest; in_slot
-#endif /* !__sh1__ */
diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h
index 1e654801334..cc26e05a764 100644
--- a/gcc/config/sh/sh.h
+++ b/gcc/config/sh/sh.h
@@ -1983,7 +1983,7 @@ struct sh_args {
    that the native compiler puts too large (> 32) immediate shift counts
    into a register and shifts by the register, letting the SH decide what
    to do instead of doing that itself.  */
-/* ??? The library routines in lib1funcs.asm truncate the shift count.
+/* ??? The library routines in lib1funcs.S truncate the shift count.
    However, the SH3 has hardware shifts that do not truncate exactly as gcc
    expects - the sign bit is significant - so it appears that we need to
    leave this zero for correct SH3 code.  */
diff --git a/gcc/config/sh/t-linux b/gcc/config/sh/t-linux
index a5c711618c6..2304fb176cb 100644
--- a/gcc/config/sh/t-linux
+++ b/gcc/config/sh/t-linux
@@ -1,5 +1,3 @@
-LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array
-
 LIB2FUNCS_EXTRA= $(srcdir)/config/sh/linux-atomic.asm
 
 MULTILIB_DIRNAMES= 
diff --git a/gcc/config/sh/t-netbsd b/gcc/config/sh/t-netbsd
index de172d3f73f..dea1c478cb5 100644
--- a/gcc/config/sh/t-netbsd
+++ b/gcc/config/sh/t-netbsd
@@ -17,6 +17,5 @@
 # <http://www.gnu.org/licenses/>.
 
 TARGET_LIBGCC2_CFLAGS = -fpic -mieee
-LIB1ASMFUNCS_CACHE = _ic_invalidate
 
 LIB2FUNCS_EXTRA=
diff --git a/gcc/config/sh/t-sh b/gcc/config/sh/t-sh
index 6eaf784e8ae..56ea83e0697 100644
--- a/gcc/config/sh/t-sh
+++ b/gcc/config/sh/t-sh
@@ -22,13 +22,6 @@ sh-c.o: $(srcdir)/config/sh/sh-c.c \
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 		$(srcdir)/config/sh/sh-c.c
 
-LIB1ASMSRC = sh/lib1funcs.asm
-LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movmem \
-  _movmem_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
-  _div_table _udiv_qrnnd_16 \
-  $(LIB1ASMFUNCS_CACHE)
-LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array
-
 TARGET_LIBGCC2_CFLAGS = -mieee
 
 DEFAULT_ENDIAN = $(word 1,$(TM_ENDIAN_CONFIG))
diff --git a/gcc/config/sh/t-sh64 b/gcc/config/sh/t-sh64
index d88f929fd7a..3bd9205079b 100644
--- a/gcc/config/sh/t-sh64
+++ b/gcc/config/sh/t-sh64
@@ -1,4 +1,4 @@
-# Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
+# Copyright (C) 2002, 2004, 2005, 2011 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -16,13 +16,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMFUNCS = \
-  _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
-  _shcompact_call_trampoline _shcompact_return_trampoline \
-  _shcompact_incoming_args _ic_invalidate _nested_trampoline \
-  _push_pop_shmedia_regs \
-  _udivdi3 _divdi3 _umoddi3 _moddi3 _div_table
-
 MULTILIB_CPU_DIRS= $(ML_sh1) $(ML_sh2e) $(ML_sh2) $(ML_sh3e) $(ML_sh3) $(ML_sh4_nofpu) $(ML_sh4_single_only) $(ML_sh4_single) $(ML_sh4) $(ML_sh5_32media:m5-32media/=media32) $(ML_sh5_32media_nofpu:m5-32media-nofpu/=nofpu/media32) $(ML_sh5_compact:m5-compact/=compact) $(ML_sh5_compact_nofpu:m5-compact-nofpu/=nofpu/compact) $(ML_sh5_64media:m5-64media/=media64) $(ML_sh5_64media_nofpu:m5-64media-nofpu/=nofpu/media64)
 
 MULTILIB_RAW_DIRNAMES= $(MULTILIB_ENDIAN:/mb= mb) $(MULTILIB_CPU_DIRS:/=)
diff --git a/gcc/config/sparc/lb1spc.asm b/gcc/config/sparc/lb1spc.asm
deleted file mode 100644
index b60bd5740e7..00000000000
--- a/gcc/config/sparc/lb1spc.asm
+++ /dev/null
@@ -1,784 +0,0 @@
-/* This is an assembly language implementation of mulsi3, divsi3, and modsi3
-   for the sparc processor.
-
-   These routines are derived from the SPARC Architecture Manual, version 8,
-   slightly edited to match the desired calling convention, and also to
-   optimize them for our purposes.  */
-
-#ifdef L_mulsi3
-.text
-	.align 4
-	.global .umul
-	.proc 4
-.umul:
-	or	%o0, %o1, %o4	! logical or of multiplier and multiplicand
-	mov	%o0, %y		! multiplier to Y register
-	andncc	%o4, 0xfff, %o5	! mask out lower 12 bits
-	be	mul_shortway	! can do it the short way
-	andcc	%g0, %g0, %o4	! zero the partial product and clear NV cc
-	!
-	! long multiply
-	!
-	mulscc	%o4, %o1, %o4	! first iteration of 33
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4	! 32nd iteration
-	mulscc	%o4, %g0, %o4	! last iteration only shifts
-	! the upper 32 bits of product are wrong, but we do not care
-	retl
-	rd	%y, %o0
-	!
-	! short multiply
-	!
-mul_shortway:
-	mulscc	%o4, %o1, %o4	! first iteration of 13
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4	! 12th iteration
-	mulscc	%o4, %g0, %o4	! last iteration only shifts
-	rd	%y, %o5
-	sll	%o4, 12, %o4	! left shift partial product by 12 bits
-	srl	%o5, 20, %o5	! right shift partial product by 20 bits
-	retl
-	or	%o5, %o4, %o0	! merge for true product
-#endif
-
-#ifdef L_divsi3
-/*
- * Division and remainder, from Appendix E of the SPARC Version 8
- * Architecture Manual, with fixes from Gordon Irlam.
- */
-
-/*
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * m4 parameters:
- *  .div	name of function to generate
- *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1
- *  true		true=true => signed; true=false => unsigned
- *
- * Algorithm parameters:
- *  N		how many bits per iteration we try to get (4)
- *  WORDSIZE	total number of bits (32)
- *
- * Derived constants:
- *  TOPBITS	number of bits in the top decade of a number
- *
- * Important variables:
- *  Q		the partial quotient under development (initially 0)
- *  R		the remainder so far, initially the dividend
- *  ITER	number of main division loop iterations required;
- *		equal to ceil(log2(quotient) / N).  Note that this
- *		is the log base (2^N) of the quotient.
- *  V		the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- *  Current estimate for non-large dividend is
- *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
- *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
- *  different path, as the upper bits of the quotient must be developed
- *  one bit at a time.
- */
-        .global .udiv
-        .align 4
-        .proc 4
-        .text
-.udiv:
-         b ready_to_divide
-         mov 0, %g3             ! result is always positive
-
-        .global .div
-        .align 4
-        .proc 4
-        .text
-.div:
-	! compute sign of result; if neither is negative, no problem
-	orcc	%o1, %o0, %g0	! either negative?
-	bge	ready_to_divide	! no, go do the divide
-	xor	%o1, %o0, %g3	! compute sign in any case
-	tst	%o1
-	bge	1f
-	tst	%o0
-	! %o1 is definitely negative; %o0 might also be negative
-	bge	ready_to_divide	! if %o0 not negative...
-	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
-1:	! %o0 is negative, %o1 is nonnegative
-	sub	%g0, %o0, %o0	! make %o0 nonnegative
-
-
-ready_to_divide:
-
-	! Ready to divide.  Compute size of quotient; scale comparand.
-	orcc	%o1, %g0, %o5
-	bne	1f
-	mov	%o0, %o3
-
-	! Divide by zero trap.  If it returns, return 0 (about as
-	! wrong as possible, but that is what SunOS does...).
-	ta	0x2    		! ST_DIV0
-	retl
-	clr	%o0
-
-1:
-	cmp	%o3, %o5		! if %o1 exceeds %o0, done
-	blu	got_result		! (and algorithm fails otherwise)
-	clr	%o2
-	sethi	%hi(1 << (32 - 4 - 1)), %g1
-	cmp	%o3, %g1
-	blu	not_really_big
-	clr	%o4
-
-	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
-	! as our usual N-at-a-shot divide step will cause overflow and havoc.
-	! The number of bits in the result here is N*ITER+SC, where SC <= N.
-	! Compute ITER in an unorthodox manner: know we need to shift V into
-	! the top decade: so do not even bother to compare to R.
-	1:
-		cmp	%o5, %g1
-		bgeu	3f
-		mov	1, %g2
-		sll	%o5, 4, %o5
-		b	1b
-		add	%o4, 1, %o4
-
-	! Now compute %g2.
-	2:	addcc	%o5, %o5, %o5
-		bcc	not_too_big
-		add	%g2, 1, %g2
-
-		! We get here if the %o1 overflowed while shifting.
-		! This means that %o3 has the high-order bit set.
-		! Restore %o5 and subtract from %o3.
-		sll	%g1, 4, %g1	! high order bit
-		srl	%o5, 1, %o5	! rest of %o5
-		add	%o5, %g1, %o5
-		b	do_single_div
-		sub	%g2, 1, %g2
-
-	not_too_big:
-	3:	cmp	%o5, %o3
-		blu	2b
-		nop
-		be	do_single_div
-		nop
-	/* NB: these are commented out in the V8-SPARC manual as well */
-	/* (I do not understand this) */
-	! %o5 > %o3: went too far: back up 1 step
-	!	srl	%o5, 1, %o5
-	!	dec	%g2
-	! do single-bit divide steps
-	!
-	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
-	! first divide step without thinking.  BUT, the others are conditional,
-	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
-	! order bit set in the first step, just falling into the regular
-	! division loop will mess up the first time around.
-	! So we unroll slightly...
-	do_single_div:
-		subcc	%g2, 1, %g2
-		bl	end_regular_divide
-		nop
-		sub	%o3, %o5, %o3
-		mov	1, %o2
-		b	end_single_divloop
-		nop
-	single_divloop:
-		sll	%o2, 1, %o2
-		bl	1f
-		srl	%o5, 1, %o5
-		! %o3 >= 0
-		sub	%o3, %o5, %o3
-		b	2f
-		add	%o2, 1, %o2
-	1:	! %o3 < 0
-		add	%o3, %o5, %o3
-		sub	%o2, 1, %o2
-	2:
-	end_single_divloop:
-		subcc	%g2, 1, %g2
-		bge	single_divloop
-		tst	%o3
-		b,a	end_regular_divide
-
-not_really_big:
-1:
-	sll	%o5, 4, %o5
-	cmp	%o5, %o3
-	bleu	1b
-	addcc	%o4, 1, %o4
-	be	got_result
-	sub	%o4, 1, %o4
-
-	tst	%o3	! set up for initial iteration
-divloop:
-	sll	%o2, 4, %o2
-	! depth 1, accumulated bits 0
-	bl	L1.16
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 2, accumulated bits 1
-	bl	L2.17
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 3, accumulated bits 3
-	bl	L3.19
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 4, accumulated bits 7
-	bl	L4.23
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (7*2+1), %o2
-	
-L4.23:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (7*2-1), %o2
-	
-	
-L3.19:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 4, accumulated bits 5
-	bl	L4.21
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (5*2+1), %o2
-	
-L4.21:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (5*2-1), %o2
-	
-L2.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 3, accumulated bits 1
-	bl	L3.17
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 4, accumulated bits 3
-	bl	L4.19
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (3*2+1), %o2
-	
-L4.19:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (3*2-1), %o2
-
-L3.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 4, accumulated bits 1
-	bl	L4.17
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (1*2+1), %o2
-
-L4.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (1*2-1), %o2
-	
-L1.16:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 2, accumulated bits -1
-	bl	L2.15
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 3, accumulated bits -1
-	bl	L3.15
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 4, accumulated bits -1
-	bl	L4.15
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-1*2+1), %o2
-	
-L4.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-1*2-1), %o2
-	
-L3.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 4, accumulated bits -3
-	bl	L4.13
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-3*2+1), %o2
-	
-L4.13:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-3*2-1), %o2
-	
-L2.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 3, accumulated bits -3
-	bl	L3.13
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 4, accumulated bits -5
-	bl	L4.11
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-5*2+1), %o2
-	
-L4.11:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-5*2-1), %o2
-	
-L3.13:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 4, accumulated bits -7
-	bl	L4.9
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-7*2+1), %o2
-
-L4.9:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-7*2-1), %o2
-	
-	9:
-end_regular_divide:
-	subcc	%o4, 1, %o4
-	bge	divloop
-	tst	%o3
-	bl,a	got_result
-	! non-restoring fixup here (one instruction only!)
-	sub	%o2, 1, %o2
-
-
-got_result:
-	! check to see if answer should be < 0
-	tst	%g3
-	bl,a	1f
-	sub %g0, %o2, %o2
-1:
-	retl
-	mov %o2, %o0
-#endif
-
-#ifdef L_modsi3
-/* This implementation was taken from glibc:
- *
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * Algorithm parameters:
- *  N		how many bits per iteration we try to get (4)
- *  WORDSIZE	total number of bits (32)
- *
- * Derived constants:
- *  TOPBITS	number of bits in the top decade of a number
- *
- * Important variables:
- *  Q		the partial quotient under development (initially 0)
- *  R		the remainder so far, initially the dividend
- *  ITER	number of main division loop iterations required;
- *		equal to ceil(log2(quotient) / N).  Note that this
- *		is the log base (2^N) of the quotient.
- *  V		the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- *  Current estimate for non-large dividend is
- *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
- *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
- *  different path, as the upper bits of the quotient must be developed
- *  one bit at a time.
- */
-.text
-	.align 4
-	.global	.urem
-	.proc 4
-.urem:
-	b	divide
-	mov	0, %g3		! result always positive
-
-        .align 4
-	.global .rem
-	.proc 4
-.rem:
-	! compute sign of result; if neither is negative, no problem
-	orcc	%o1, %o0, %g0	! either negative?
-	bge	2f			! no, go do the divide
-	mov	%o0, %g3		! sign of remainder matches %o0
-	tst	%o1
-	bge	1f
-	tst	%o0
-	! %o1 is definitely negative; %o0 might also be negative
-	bge	2f			! if %o0 not negative...
-	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
-1:	! %o0 is negative, %o1 is nonnegative
-	sub	%g0, %o0, %o0	! make %o0 nonnegative
-2:
-
-	! Ready to divide.  Compute size of quotient; scale comparand.
-divide:
-	orcc	%o1, %g0, %o5
-	bne	1f
-	mov	%o0, %o3
-
-		! Divide by zero trap.  If it returns, return 0 (about as
-		! wrong as possible, but that is what SunOS does...).
-		ta	0x2   !ST_DIV0
-		retl
-		clr	%o0
-
-1:
-	cmp	%o3, %o5		! if %o1 exceeds %o0, done
-	blu	got_result		! (and algorithm fails otherwise)
-	clr	%o2
-	sethi	%hi(1 << (32 - 4 - 1)), %g1
-	cmp	%o3, %g1
-	blu	not_really_big
-	clr	%o4
-
-	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
-	! as our usual N-at-a-shot divide step will cause overflow and havoc.
-	! The number of bits in the result here is N*ITER+SC, where SC <= N.
-	! Compute ITER in an unorthodox manner: know we need to shift V into
-	! the top decade: so do not even bother to compare to R.
-	1:
-		cmp	%o5, %g1
-		bgeu	3f
-		mov	1, %g2
-		sll	%o5, 4, %o5
-		b	1b
-		add	%o4, 1, %o4
-
-	! Now compute %g2.
-	2:	addcc	%o5, %o5, %o5
-		bcc	not_too_big
-		add	%g2, 1, %g2
-
-		! We get here if the %o1 overflowed while shifting.
-		! This means that %o3 has the high-order bit set.
-		! Restore %o5 and subtract from %o3.
-		sll	%g1, 4, %g1	! high order bit
-		srl	%o5, 1, %o5		! rest of %o5
-		add	%o5, %g1, %o5
-		b	do_single_div
-		sub	%g2, 1, %g2
-
-	not_too_big:
-	3:	cmp	%o5, %o3
-		blu	2b
-		nop
-		be	do_single_div
-		nop
-	/* NB: these are commented out in the V8-SPARC manual as well */
-	/* (I do not understand this) */
-	! %o5 > %o3: went too far: back up 1 step
-	!	srl	%o5, 1, %o5
-	!	dec	%g2
-	! do single-bit divide steps
-	!
-	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
-	! first divide step without thinking.  BUT, the others are conditional,
-	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
-	! order bit set in the first step, just falling into the regular
-	! division loop will mess up the first time around.
-	! So we unroll slightly...
-	do_single_div:
-		subcc	%g2, 1, %g2
-		bl	end_regular_divide
-		nop
-		sub	%o3, %o5, %o3
-		mov	1, %o2
-		b	end_single_divloop
-		nop
-	single_divloop:
-		sll	%o2, 1, %o2
-		bl	1f
-		srl	%o5, 1, %o5
-		! %o3 >= 0
-		sub	%o3, %o5, %o3
-		b	2f
-		add	%o2, 1, %o2
-	1:	! %o3 < 0
-		add	%o3, %o5, %o3
-		sub	%o2, 1, %o2
-	2:
-	end_single_divloop:
-		subcc	%g2, 1, %g2
-		bge	single_divloop
-		tst	%o3
-		b,a	end_regular_divide
-
-not_really_big:
-1:
-	sll	%o5, 4, %o5
-	cmp	%o5, %o3
-	bleu	1b
-	addcc	%o4, 1, %o4
-	be	got_result
-	sub	%o4, 1, %o4
-
-	tst	%o3	! set up for initial iteration
-divloop:
-	sll	%o2, 4, %o2
-		! depth 1, accumulated bits 0
-	bl	L1.16
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 2, accumulated bits 1
-	bl	L2.17
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 3, accumulated bits 3
-	bl	L3.19
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 4, accumulated bits 7
-	bl	L4.23
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (7*2+1), %o2
-L4.23:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (7*2-1), %o2
-	
-L3.19:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 4, accumulated bits 5
-	bl	L4.21
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (5*2+1), %o2
-	
-L4.21:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (5*2-1), %o2
-	
-L2.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 3, accumulated bits 1
-	bl	L3.17
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 4, accumulated bits 3
-	bl	L4.19
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (3*2+1), %o2
-	
-L4.19:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (3*2-1), %o2
-	
-L3.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 4, accumulated bits 1
-	bl	L4.17
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (1*2+1), %o2
-	
-L4.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (1*2-1), %o2
-	
-L1.16:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 2, accumulated bits -1
-	bl	L2.15
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 3, accumulated bits -1
-	bl	L3.15
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 4, accumulated bits -1
-	bl	L4.15
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-1*2+1), %o2
-	
-L4.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-1*2-1), %o2
-	
-L3.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 4, accumulated bits -3
-	bl	L4.13
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-3*2+1), %o2
-	
-L4.13:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-3*2-1), %o2
-	
-L2.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 3, accumulated bits -3
-	bl	L3.13
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 4, accumulated bits -5
-	bl	L4.11
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-5*2+1), %o2
-	
-L4.11:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-5*2-1), %o2
-	
-L3.13:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 4, accumulated bits -7
-	bl	L4.9
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-7*2+1), %o2
-	
-L4.9:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-7*2-1), %o2
-	
-	9:
-end_regular_divide:
-	subcc	%o4, 1, %o4
-	bge	divloop
-	tst	%o3
-	bl,a	got_result
-	! non-restoring fixup here (one instruction only!)
-	add	%o3, %o1, %o3
-
-got_result:
-	! check to see if answer should be < 0
-	tst	%g3
-	bl,a	1f
-	sub %g0, %o3, %o3
-1:
-	retl
-	mov %o3, %o0
-
-#endif
-
diff --git a/gcc/config/sparc/lb1spl.asm b/gcc/config/sparc/lb1spl.asm
deleted file mode 100644
index 973401f8018..00000000000
--- a/gcc/config/sparc/lb1spl.asm
+++ /dev/null
@@ -1,246 +0,0 @@
-/* This is an assembly language implementation of mulsi3, divsi3, and modsi3
-   for the sparclite processor.
-
-   These routines are all from the SPARClite User's Guide, slightly edited
-   to match the desired calling convention, and also to optimize them.  */
-
-#ifdef L_udivsi3
-.text
-	.align 4
-	.global .udiv
-	.proc	04
-.udiv:
-	wr	%g0,%g0,%y	! Not a delayed write for sparclite
-	tst	%g0
-	divscc	%o0,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	retl
-	divscc	%g1,%o1,%o0
-#endif
-
-#ifdef L_umodsi3
-.text
-	.align 4
-	.global .urem
-	.proc	04
-.urem:
-	wr	%g0,%g0,%y	! Not a delayed write for sparclite
-	tst	%g0
-	divscc	%o0,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	bl 1f
-	rd	%y,%o0
-	retl
-	nop
-1:	retl
-	add	%o0,%o1,%o0
-#endif
-
-#ifdef L_divsi3
-.text
-	.align 4
-	.global .div
-	.proc	04
-! ??? This routine could be made faster if was optimized, and if it was
-! rewritten to only calculate the quotient.
-.div:
-	wr	%g0,%g0,%y	! Not a delayed write for sparclite
-	mov	%o1,%o4
-	tst	%o1
-	bl,a	1f
-	sub	%g0,%o4,%o4
-1:	tst	%o0
-	bl,a	2f
-	mov	-1,%y
-2:	divscc	%o0,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	be	6f
-	mov	%y,%o3
-	bg	4f
-	addcc	%o3,%o4,%g0
-	be,a	6f
-	mov	%g0,%o3
-	tst	%o0
-	bl	5f
-	tst	%g1
-	ba	5f
-	add	%o3,%o4,%o3
-4:	subcc	%o3,%o4,%g0
-	be,a	6f
-	mov	%g0,%o3
-	tst	%o0
-	bge	5f
-	tst	%g1
-	sub	%o3,%o4,%o3
-5:	bl,a	6f
-	add	%g1,1,%g1
-6:	tst	%o1
-	bl,a	7f
-	sub	%g0,%g1,%g1
-7:	retl
-	mov	%g1,%o0		! Quotient is in %g1.
-#endif
-
-#ifdef L_modsi3
-.text
-	.align 4
-	.global .rem
-	.proc	04
-! ??? This routine could be made faster if was optimized, and if it was
-! rewritten to only calculate the remainder.
-.rem:
-	wr	%g0,%g0,%y	! Not a delayed write for sparclite
-	mov	%o1,%o4
-	tst	%o1
-	bl,a	1f
-	sub	%g0,%o4,%o4
-1:	tst	%o0
-	bl,a	2f
-	mov	-1,%y
-2:	divscc	%o0,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	be	6f
-	mov	%y,%o3
-	bg	4f
-	addcc	%o3,%o4,%g0
-	be,a	6f
-	mov	%g0,%o3
-	tst	%o0
-	bl	5f
-	tst	%g1
-	ba	5f
-	add	%o3,%o4,%o3
-4:	subcc	%o3,%o4,%g0
-	be,a	6f
-	mov	%g0,%o3
-	tst	%o0
-	bge	5f
-	tst	%g1
-	sub	%o3,%o4,%o3
-5:	bl,a	6f
-	add	%g1,1,%g1
-6:	tst	%o1
-	bl,a	7f
-	sub	%g0,%g1,%g1
-7:	retl
-	mov	%o3,%o0		! Remainder is in %o3.
-#endif
diff --git a/gcc/config/sparc/t-elf b/gcc/config/sparc/t-elf
index 7073bcb7721..be926585481 100644
--- a/gcc/config/sparc/t-elf
+++ b/gcc/config/sparc/t-elf
@@ -17,9 +17,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMSRC = sparc/lb1spc.asm
-LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3
-
 MULTILIB_OPTIONS = msoft-float mcpu=v8 mflat
 MULTILIB_DIRNAMES = soft v8 flat
 MULTILIB_MATCHES = msoft-float=mno-fpu
diff --git a/gcc/config/sparc/t-leon b/gcc/config/sparc/t-leon
index 4f9d0a9e797..8e5e30f7ff7 100644
--- a/gcc/config/sparc/t-leon
+++ b/gcc/config/sparc/t-leon
@@ -16,9 +16,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMSRC = sparc/lb1spc.asm
-LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3
-
 # Multilibs for LEON
 # LEON is a SPARC-V8, but the AT697 implementation has a bug in the
 # V8-specific instructions.
diff --git a/gcc/config/spu/t-spu-elf b/gcc/config/spu/t-spu-elf
index b1660353ee6..45802499525 100644
--- a/gcc/config/spu/t-spu-elf
+++ b/gcc/config/spu/t-spu-elf
@@ -15,10 +15,6 @@
 #  along with GCC; see the file COPYING3.  If not see
 #  <http://www.gnu.org/licenses/>.
 
-# Suppress building libgcc1.a
-LIBGCC1 =
-CROSS_LIBGCC1 =
-
 TARGET_LIBGCC2_CFLAGS = -fPIC -mwarn-reloc -D__IN_LIBGCC2
 
 # We exclude those because the libgcc2.c default versions do not support
diff --git a/gcc/config/v850/lib1funcs.asm b/gcc/config/v850/lib1funcs.asm
deleted file mode 100644
index 04e9b1e0ad4..00000000000
--- a/gcc/config/v850/lib1funcs.asm
+++ /dev/null
@@ -1,2330 +0,0 @@
-/* libgcc routines for NEC V850.
-   Copyright (C) 1996, 1997, 2002, 2005, 2009, 2010
-   Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-#ifdef L_mulsi3
-	.text
-	.globl ___mulsi3
-	.type  ___mulsi3,@function
-___mulsi3:
-#ifdef __v850__	
-/*
-   #define SHIFT 12
-   #define MASK ((1 << SHIFT) - 1)
-    
-   #define STEP(i, j)                               \
-   ({                                               \
-       short a_part = (a >> (i)) & MASK;            \
-       short b_part = (b >> (j)) & MASK;            \
-       int res = (((int) a_part) * ((int) b_part)); \
-       res;                                         \
-   })
-  
-   int
-   __mulsi3 (unsigned a, unsigned b)
-   {
-      return STEP (0, 0) +
-          ((STEP (SHIFT, 0) + STEP (0, SHIFT)) << SHIFT) +
-          ((STEP (0, 2 * SHIFT) + STEP (SHIFT, SHIFT) + STEP (2 * SHIFT, 0))
-           << (2 * SHIFT));
-   }
-*/
-        mov   r6, r14
-        movea lo(32767), r0, r10
-        and   r10, r14
-        mov   r7,  r15
-        and   r10, r15
-        shr   15,  r6
-        mov   r6,  r13
-        and   r10, r13
-        shr   15,  r7
-        mov   r7,  r12
-        and   r10, r12
-        shr   15,  r6
-        shr   15,  r7
-        mov   r14, r10
-        mulh  r15, r10
-        mov   r14, r11
-        mulh  r12, r11
-        mov   r13, r16
-        mulh  r15, r16
-        mulh  r14, r7
-        mulh  r15, r6
-        add   r16, r11
-        mulh  r13, r12
-        shl   15,  r11
-        add   r11, r10
-        add   r12, r7
-        add   r6,  r7
-        shl   30,  r7
-        add   r7,  r10
-        jmp   [r31]
-#endif /* __v850__ */
-#if defined(__v850e__) || defined(__v850ea__) || defined(__v850e2__) || defined(__v850e2v3__)
-        /* This routine is almost unneccesarry because gcc
-           generates the MUL instruction for the RTX mulsi3.
-           But if someone wants to link his application with
-           previsously compiled v850 objects then they will 
-	   need this function.  */
- 
-        /* It isn't good to put the inst sequence as below;
-              mul r7, r6,
-              mov r6, r10, r0
-           In this case, there is a RAW hazard between them.
-           MUL inst takes 2 cycle in EX stage, then MOV inst
-           must wait 1cycle.  */
-        mov   r7, r10
-        mul   r6, r10, r0
-        jmp   [r31]
-#endif /* __v850e__ */
-	.size ___mulsi3,.-___mulsi3
-#endif /* L_mulsi3 */
-
-
-#ifdef L_udivsi3
-	.text
-	.global ___udivsi3
-	.type	___udivsi3,@function
-___udivsi3:
-#ifdef __v850__
-	mov 1,r12
-	mov 0,r10
-	cmp r6,r7
-	bnl .L12
-	movhi hi(-2147483648),r0,r13
-	cmp r0,r7
-	blt .L12
-.L4:
-	shl 1,r7
-	shl 1,r12
-	cmp r6,r7
-	bnl .L12
-	cmp r0,r12
-	be .L8
-	mov r7,r19
-	and r13,r19
-	be .L4
-	br .L12
-.L9:
-	cmp r7,r6
-	bl .L10
-	sub r7,r6
-	or r12,r10
-.L10:
-	shr 1,r12
-	shr 1,r7
-.L12:
-	cmp r0,r12
-	bne .L9
-.L8:
-	jmp [r31]
-
-#else /* defined(__v850e__) */
-
-	/* See comments at end of __mulsi3.  */
-	mov   r6, r10	
-	divu  r7, r10, r0
-	jmp   [r31]		
-
-#endif /* __v850e__ */
-
-	.size ___udivsi3,.-___udivsi3
-#endif
-
-#ifdef L_divsi3
-	.text
-	.globl ___divsi3
-	.type  ___divsi3,@function
-___divsi3:
-#ifdef __v850__
-	add -8,sp
-	st.w r31,4[sp]
-	st.w r22,0[sp]
-	mov 1,r22
-	tst r7,r7
-	bp .L3
-	subr r0,r7
-	subr r0,r22
-.L3:
-	tst r6,r6
-	bp .L4
-	subr r0,r6
-	subr r0,r22
-.L4:
-	jarl ___udivsi3,r31
-	cmp r0,r22
-	bp .L7
-	subr r0,r10
-.L7:
-	ld.w 0[sp],r22
-	ld.w 4[sp],r31
-	add 8,sp
-	jmp [r31]
-
-#else /* defined(__v850e__) */
-
-	/* See comments at end of __mulsi3.  */
-	mov   r6, r10
-	div   r7, r10, r0
-	jmp   [r31]
-
-#endif /* __v850e__ */
-
-	.size ___divsi3,.-___divsi3
-#endif
-
-#ifdef  L_umodsi3
-	.text
-	.globl ___umodsi3
-	.type  ___umodsi3,@function
-___umodsi3:
-#ifdef __v850__
-	add -12,sp
-	st.w r31,8[sp]
-	st.w r7,4[sp]
-	st.w r6,0[sp]
-	jarl ___udivsi3,r31
-	ld.w 4[sp],r7
-	mov r10,r6
-	jarl ___mulsi3,r31
-	ld.w 0[sp],r6
-	subr r6,r10
-	ld.w 8[sp],r31
-	add 12,sp
-	jmp [r31]
-
-#else /* defined(__v850e__) */
-
-	/* See comments at end of __mulsi3.  */
-	divu  r7, r6, r10
-	jmp   [r31]
-
-#endif /* __v850e__ */
-
-	.size ___umodsi3,.-___umodsi3
-#endif /* L_umodsi3 */
-
-#ifdef  L_modsi3
-	.text
-	.globl ___modsi3
-	.type  ___modsi3,@function
-___modsi3:
-#ifdef __v850__	
-	add -12,sp
-	st.w r31,8[sp]
-	st.w r7,4[sp]
-	st.w r6,0[sp]
-	jarl ___divsi3,r31
-	ld.w 4[sp],r7
-	mov r10,r6
-	jarl ___mulsi3,r31
-	ld.w 0[sp],r6
-	subr r6,r10
-	ld.w 8[sp],r31
-	add 12,sp
-	jmp [r31]
-
-#else /* defined(__v850e__) */
-
-	/* See comments at end of __mulsi3.  */
-	div  r7, r6, r10
-	jmp [r31]
-
-#endif /* __v850e__ */
-
-	.size ___modsi3,.-___modsi3
-#endif /* L_modsi3 */
-
-#ifdef	L_save_2
-	.text
-	.align	2
-	.globl	__save_r2_r29
-	.type	__save_r2_r29,@function
-	/* Allocate space and save registers 2, 20 .. 29 on the stack.  */
-	/* Called via:	jalr __save_r2_r29,r10.  */
-__save_r2_r29:
-#ifdef __EP__
-	mov	ep,r1
-	addi	-44,sp,sp
-	mov	sp,ep
-	sst.w	r29,0[ep]
-	sst.w	r28,4[ep]
-	sst.w	r27,8[ep]
-	sst.w	r26,12[ep]
-	sst.w	r25,16[ep]
-	sst.w	r24,20[ep]
-	sst.w	r23,24[ep]
-	sst.w	r22,28[ep]
-	sst.w	r21,32[ep]
-	sst.w	r20,36[ep]
-	sst.w	r2,40[ep]
-	mov	r1,ep
-#else
-	addi	-44,sp,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	st.w	r27,8[sp]
-	st.w	r26,12[sp]
-	st.w	r25,16[sp]
-	st.w	r24,20[sp]
-	st.w	r23,24[sp]
-	st.w	r22,28[sp]
-	st.w	r21,32[sp]
-	st.w	r20,36[sp]
-	st.w	r2,40[sp]
-#endif
-	jmp	[r10]
-	.size	__save_r2_r29,.-__save_r2_r29
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r2_r29.  */
-	.align	2
-	.globl	__return_r2_r29
-	.type	__return_r2_r29,@function
-__return_r2_r29:
-#ifdef __EP__
-	mov	ep,r1
-	mov	sp,ep
-	sld.w	0[ep],r29
-	sld.w	4[ep],r28
-	sld.w	8[ep],r27
-	sld.w	12[ep],r26
-	sld.w	16[ep],r25
-	sld.w	20[ep],r24
-	sld.w	24[ep],r23
-	sld.w	28[ep],r22
-	sld.w	32[ep],r21
-	sld.w	36[ep],r20
-	sld.w	40[ep],r2
-	addi	44,sp,sp
-	mov	r1,ep
-#else
-	ld.w	0[sp],r29
-	ld.w	4[sp],r28
-	ld.w	8[sp],r27
-	ld.w	12[sp],r26
-	ld.w	16[sp],r25
-	ld.w	20[sp],r24
-	ld.w	24[sp],r23
-	ld.w	28[sp],r22
-	ld.w	32[sp],r21
-	ld.w	36[sp],r20
-	ld.w	40[sp],r2
-	addi	44,sp,sp
-#endif
-	jmp	[r31]
-	.size	__return_r2_r29,.-__return_r2_r29
-#endif /* L_save_2 */
-
-#ifdef	L_save_20
-	.text
-	.align	2
-	.globl	__save_r20_r29
-	.type	__save_r20_r29,@function
-	/* Allocate space and save registers 20 .. 29 on the stack.  */
-	/* Called via:	jalr __save_r20_r29,r10.  */
-__save_r20_r29:
-#ifdef __EP__
-	mov	ep,r1
-	addi	-40,sp,sp
-	mov	sp,ep
-	sst.w	r29,0[ep]
-	sst.w	r28,4[ep]
-	sst.w	r27,8[ep]
-	sst.w	r26,12[ep]
-	sst.w	r25,16[ep]
-	sst.w	r24,20[ep]
-	sst.w	r23,24[ep]
-	sst.w	r22,28[ep]
-	sst.w	r21,32[ep]
-	sst.w	r20,36[ep]
-	mov	r1,ep
-#else
-	addi	-40,sp,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	st.w	r27,8[sp]
-	st.w	r26,12[sp]
-	st.w	r25,16[sp]
-	st.w	r24,20[sp]
-	st.w	r23,24[sp]
-	st.w	r22,28[sp]
-	st.w	r21,32[sp]
-	st.w	r20,36[sp]
-#endif
-	jmp	[r10]
-	.size	__save_r20_r29,.-__save_r20_r29
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r20_r29.  */
-	.align	2
-	.globl	__return_r20_r29
-	.type	__return_r20_r29,@function
-__return_r20_r29:
-#ifdef __EP__
-	mov	ep,r1
-	mov	sp,ep
-	sld.w	0[ep],r29
-	sld.w	4[ep],r28
-	sld.w	8[ep],r27
-	sld.w	12[ep],r26
-	sld.w	16[ep],r25
-	sld.w	20[ep],r24
-	sld.w	24[ep],r23
-	sld.w	28[ep],r22
-	sld.w	32[ep],r21
-	sld.w	36[ep],r20
-	addi	40,sp,sp
-	mov	r1,ep
-#else
-	ld.w	0[sp],r29
-	ld.w	4[sp],r28
-	ld.w	8[sp],r27
-	ld.w	12[sp],r26
-	ld.w	16[sp],r25
-	ld.w	20[sp],r24
-	ld.w	24[sp],r23
-	ld.w	28[sp],r22
-	ld.w	32[sp],r21
-	ld.w	36[sp],r20
-	addi	40,sp,sp
-#endif
-	jmp	[r31]
-	.size	__return_r20_r29,.-__return_r20_r29
-#endif /* L_save_20 */
-
-#ifdef	L_save_21
-	.text
-	.align	2
-	.globl	__save_r21_r29
-	.type	__save_r21_r29,@function
-	/* Allocate space and save registers 21 .. 29 on the stack.  */
-	/* Called via:	jalr __save_r21_r29,r10.  */
-__save_r21_r29:
-#ifdef __EP__
-	mov	ep,r1
-	addi	-36,sp,sp
-	mov	sp,ep
-	sst.w	r29,0[ep]
-	sst.w	r28,4[ep]
-	sst.w	r27,8[ep]
-	sst.w	r26,12[ep]
-	sst.w	r25,16[ep]
-	sst.w	r24,20[ep]
-	sst.w	r23,24[ep]
-	sst.w	r22,28[ep]
-	sst.w	r21,32[ep]
-	mov	r1,ep
-#else
-	addi	-36,sp,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	st.w	r27,8[sp]
-	st.w	r26,12[sp]
-	st.w	r25,16[sp]
-	st.w	r24,20[sp]
-	st.w	r23,24[sp]
-	st.w	r22,28[sp]
-	st.w	r21,32[sp]
-#endif
-	jmp	[r10]
-	.size	__save_r21_r29,.-__save_r21_r29
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r21_r29.  */
-	.align	2
-	.globl	__return_r21_r29
-	.type	__return_r21_r29,@function
-__return_r21_r29:
-#ifdef __EP__
-	mov	ep,r1
-	mov	sp,ep
-	sld.w	0[ep],r29
-	sld.w	4[ep],r28
-	sld.w	8[ep],r27
-	sld.w	12[ep],r26
-	sld.w	16[ep],r25
-	sld.w	20[ep],r24
-	sld.w	24[ep],r23
-	sld.w	28[ep],r22
-	sld.w	32[ep],r21
-	addi	36,sp,sp
-	mov	r1,ep
-#else
-	ld.w	0[sp],r29
-	ld.w	4[sp],r28
-	ld.w	8[sp],r27
-	ld.w	12[sp],r26
-	ld.w	16[sp],r25
-	ld.w	20[sp],r24
-	ld.w	24[sp],r23
-	ld.w	28[sp],r22
-	ld.w	32[sp],r21
-	addi	36,sp,sp
-#endif
-	jmp	[r31]
-	.size	__return_r21_r29,.-__return_r21_r29
-#endif /* L_save_21 */
-
-#ifdef	L_save_22
-	.text
-	.align	2
-	.globl	__save_r22_r29
-	.type	__save_r22_r29,@function
-	/* Allocate space and save registers 22 .. 29 on the stack.  */
-	/* Called via:	jalr __save_r22_r29,r10.  */
-__save_r22_r29:
-#ifdef __EP__
-	mov	ep,r1
-	addi	-32,sp,sp
-	mov	sp,ep
-	sst.w	r29,0[ep]
-	sst.w	r28,4[ep]
-	sst.w	r27,8[ep]
-	sst.w	r26,12[ep]
-	sst.w	r25,16[ep]
-	sst.w	r24,20[ep]
-	sst.w	r23,24[ep]
-	sst.w	r22,28[ep]
-	mov	r1,ep
-#else
-	addi	-32,sp,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	st.w	r27,8[sp]
-	st.w	r26,12[sp]
-	st.w	r25,16[sp]
-	st.w	r24,20[sp]
-	st.w	r23,24[sp]
-	st.w	r22,28[sp]
-#endif
-	jmp	[r10]
-	.size	__save_r22_r29,.-__save_r22_r29
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r22_r29.  */
-	.align	2
-	.globl	__return_r22_r29
-	.type	__return_r22_r29,@function
-__return_r22_r29:
-#ifdef __EP__
-	mov	ep,r1
-	mov	sp,ep
-	sld.w	0[ep],r29
-	sld.w	4[ep],r28
-	sld.w	8[ep],r27
-	sld.w	12[ep],r26
-	sld.w	16[ep],r25
-	sld.w	20[ep],r24
-	sld.w	24[ep],r23
-	sld.w	28[ep],r22
-	addi	32,sp,sp
-	mov	r1,ep
-#else
-	ld.w	0[sp],r29
-	ld.w	4[sp],r28
-	ld.w	8[sp],r27
-	ld.w	12[sp],r26
-	ld.w	16[sp],r25
-	ld.w	20[sp],r24
-	ld.w	24[sp],r23
-	ld.w	28[sp],r22
-	addi	32,sp,sp
-#endif
-	jmp	[r31]
-	.size	__return_r22_r29,.-__return_r22_r29
-#endif /* L_save_22 */
-
-#ifdef	L_save_23
-	.text
-	.align	2
-	.globl	__save_r23_r29
-	.type	__save_r23_r29,@function
-	/* Allocate space and save registers 23 .. 29 on the stack.  */
-	/* Called via:	jalr __save_r23_r29,r10.  */
-__save_r23_r29:
-#ifdef __EP__
-	mov	ep,r1
-	addi	-28,sp,sp
-	mov	sp,ep
-	sst.w	r29,0[ep]
-	sst.w	r28,4[ep]
-	sst.w	r27,8[ep]
-	sst.w	r26,12[ep]
-	sst.w	r25,16[ep]
-	sst.w	r24,20[ep]
-	sst.w	r23,24[ep]
-	mov	r1,ep
-#else
-	addi	-28,sp,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	st.w	r27,8[sp]
-	st.w	r26,12[sp]
-	st.w	r25,16[sp]
-	st.w	r24,20[sp]
-	st.w	r23,24[sp]
-#endif
-	jmp	[r10]
-	.size	__save_r23_r29,.-__save_r23_r29
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r23_r29.  */
-	.align	2
-	.globl	__return_r23_r29
-	.type	__return_r23_r29,@function
-__return_r23_r29:
-#ifdef __EP__
-	mov	ep,r1
-	mov	sp,ep
-	sld.w	0[ep],r29
-	sld.w	4[ep],r28
-	sld.w	8[ep],r27
-	sld.w	12[ep],r26
-	sld.w	16[ep],r25
-	sld.w	20[ep],r24
-	sld.w	24[ep],r23
-	addi	28,sp,sp
-	mov	r1,ep
-#else
-	ld.w	0[sp],r29
-	ld.w	4[sp],r28
-	ld.w	8[sp],r27
-	ld.w	12[sp],r26
-	ld.w	16[sp],r25
-	ld.w	20[sp],r24
-	ld.w	24[sp],r23
-	addi	28,sp,sp
-#endif
-	jmp	[r31]
-	.size	__return_r23_r29,.-__return_r23_r29
-#endif /* L_save_23 */
-
-#ifdef	L_save_24
-	.text
-	.align	2
-	.globl	__save_r24_r29
-	.type	__save_r24_r29,@function
-	/* Allocate space and save registers 24 .. 29 on the stack.  */
-	/* Called via:	jalr __save_r24_r29,r10.  */
-__save_r24_r29:
-#ifdef __EP__
-	mov	ep,r1
-	addi	-24,sp,sp
-	mov	sp,ep
-	sst.w	r29,0[ep]
-	sst.w	r28,4[ep]
-	sst.w	r27,8[ep]
-	sst.w	r26,12[ep]
-	sst.w	r25,16[ep]
-	sst.w	r24,20[ep]
-	mov	r1,ep
-#else
-	addi	-24,sp,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	st.w	r27,8[sp]
-	st.w	r26,12[sp]
-	st.w	r25,16[sp]
-	st.w	r24,20[sp]
-#endif
-	jmp	[r10]
-	.size	__save_r24_r29,.-__save_r24_r29
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r24_r29.  */
-	.align	2
-	.globl	__return_r24_r29
-	.type	__return_r24_r29,@function
-__return_r24_r29:
-#ifdef __EP__
-	mov	ep,r1
-	mov	sp,ep
-	sld.w	0[ep],r29
-	sld.w	4[ep],r28
-	sld.w	8[ep],r27
-	sld.w	12[ep],r26
-	sld.w	16[ep],r25
-	sld.w	20[ep],r24
-	addi	24,sp,sp
-	mov	r1,ep
-#else
-	ld.w	0[sp],r29
-	ld.w	4[sp],r28
-	ld.w	8[sp],r27
-	ld.w	12[sp],r26
-	ld.w	16[sp],r25
-	ld.w	20[sp],r24
-	addi	24,sp,sp
-#endif
-	jmp	[r31]
-	.size	__return_r24_r29,.-__return_r24_r29
-#endif /* L_save_24 */
-
-#ifdef	L_save_25
-	.text
-	.align	2
-	.globl	__save_r25_r29
-	.type	__save_r25_r29,@function
-	/* Allocate space and save registers 25 .. 29 on the stack.  */
-	/* Called via:	jalr __save_r25_r29,r10.  */
-__save_r25_r29:
-#ifdef __EP__
-	mov	ep,r1
-	addi	-20,sp,sp
-	mov	sp,ep
-	sst.w	r29,0[ep]
-	sst.w	r28,4[ep]
-	sst.w	r27,8[ep]
-	sst.w	r26,12[ep]
-	sst.w	r25,16[ep]
-	mov	r1,ep
-#else
-	addi	-20,sp,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	st.w	r27,8[sp]
-	st.w	r26,12[sp]
-	st.w	r25,16[sp]
-#endif
-	jmp	[r10]
-	.size	__save_r25_r29,.-__save_r25_r29
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r25_r29.  */
-	.align	2
-	.globl	__return_r25_r29
-	.type	__return_r25_r29,@function
-__return_r25_r29:
-#ifdef __EP__
-	mov	ep,r1
-	mov	sp,ep
-	sld.w	0[ep],r29
-	sld.w	4[ep],r28
-	sld.w	8[ep],r27
-	sld.w	12[ep],r26
-	sld.w	16[ep],r25
-	addi	20,sp,sp
-	mov	r1,ep
-#else
-	ld.w	0[ep],r29
-	ld.w	4[ep],r28
-	ld.w	8[ep],r27
-	ld.w	12[ep],r26
-	ld.w	16[ep],r25
-	addi	20,sp,sp
-#endif
-	jmp	[r31]
-	.size	__return_r25_r29,.-__return_r25_r29
-#endif /* L_save_25 */
-
-#ifdef	L_save_26
-	.text
-	.align	2
-	.globl	__save_r26_r29
-	.type	__save_r26_r29,@function
-	/* Allocate space and save registers 26 .. 29 on the stack.  */
-	/* Called via:	jalr __save_r26_r29,r10.  */
-__save_r26_r29:
-#ifdef __EP__
-	mov	ep,r1
-	add	-16,sp
-	mov	sp,ep
-	sst.w	r29,0[ep]
-	sst.w	r28,4[ep]
-	sst.w	r27,8[ep]
-	sst.w	r26,12[ep]
-	mov	r1,ep
-#else
-	add	-16,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	st.w	r27,8[sp]
-	st.w	r26,12[sp]
-#endif
-	jmp	[r10]
-	.size	__save_r26_r29,.-__save_r26_r29
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r26_r29.  */
-	.align	2
-	.globl	__return_r26_r29
-	.type	__return_r26_r29,@function
-__return_r26_r29:
-#ifdef __EP__
-	mov	ep,r1
-	mov	sp,ep
-	sld.w	0[ep],r29
-	sld.w	4[ep],r28
-	sld.w	8[ep],r27
-	sld.w	12[ep],r26
-	addi	16,sp,sp
-	mov	r1,ep
-#else
-	ld.w	0[sp],r29
-	ld.w	4[sp],r28
-	ld.w	8[sp],r27
-	ld.w	12[sp],r26
-	addi	16,sp,sp
-#endif
-	jmp	[r31]
-	.size	__return_r26_r29,.-__return_r26_r29
-#endif /* L_save_26 */
-
-#ifdef	L_save_27
-	.text
-	.align	2
-	.globl	__save_r27_r29
-	.type	__save_r27_r29,@function
-	/* Allocate space and save registers 27 .. 29 on the stack.  */
-	/* Called via:	jalr __save_r27_r29,r10.  */
-__save_r27_r29:
-	add	-12,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	st.w	r27,8[sp]
-	jmp	[r10]
-	.size	__save_r27_r29,.-__save_r27_r29
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r27_r29.  */
-	.align	2
-	.globl	__return_r27_r29
-	.type	__return_r27_r29,@function
-__return_r27_r29:
-	ld.w	0[sp],r29
-	ld.w	4[sp],r28
-	ld.w	8[sp],r27
-	add	12,sp
-	jmp	[r31]
-	.size	__return_r27_r29,.-__return_r27_r29
-#endif /* L_save_27 */
-
-#ifdef	L_save_28
-	.text
-	.align	2
-	.globl	__save_r28_r29
-	.type	__save_r28_r29,@function
-	/* Allocate space and save registers 28,29 on the stack.  */
-	/* Called via:	jalr __save_r28_r29,r10.  */
-__save_r28_r29:
-	add	-8,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	jmp	[r10]
-	.size	__save_r28_r29,.-__save_r28_r29
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r28_r29.  */
-	.align	2
-	.globl	__return_r28_r29
-	.type	__return_r28_r29,@function
-__return_r28_r29:
-	ld.w	0[sp],r29
-	ld.w	4[sp],r28
-	add	8,sp
-	jmp	[r31]
-	.size	__return_r28_r29,.-__return_r28_r29
-#endif /* L_save_28 */
-
-#ifdef	L_save_29
-	.text
-	.align	2
-	.globl	__save_r29
-	.type	__save_r29,@function
-	/* Allocate space and save register 29 on the stack.  */
-	/* Called via:	jalr __save_r29,r10.  */
-__save_r29:
-	add	-4,sp
-	st.w	r29,0[sp]
-	jmp	[r10]
-	.size	__save_r29,.-__save_r29
-
-	/* Restore saved register 29, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r29.  */
-	.align	2
-	.globl	__return_r29
-	.type	__return_r29,@function
-__return_r29:
-	ld.w	0[sp],r29
-	add	4,sp
-	jmp	[r31]
-	.size	__return_r29,.-__return_r29
-#endif /* L_save_28 */
-
-#ifdef	L_save_2c
-	.text
-	.align	2
-	.globl	__save_r2_r31
-	.type	__save_r2_r31,@function
-	/* Allocate space and save registers 20 .. 29, 31 on the stack.  */
-	/* Also allocate space for the argument save area.  */
-	/* Called via:	jalr __save_r2_r31,r10.  */
-__save_r2_r31:
-#ifdef __EP__
-	mov	ep,r1
-	addi	-48,sp,sp
-	mov	sp,ep
-	sst.w	r29,0[ep]
-	sst.w	r28,4[ep]
-	sst.w	r27,8[ep]
-	sst.w	r26,12[ep]
-	sst.w	r25,16[ep]
-	sst.w	r24,20[ep]
-	sst.w	r23,24[ep]
-	sst.w	r22,28[ep]
-	sst.w	r21,32[ep]
-	sst.w	r20,36[ep]
-	sst.w	r2,40[ep]
-	sst.w	r31,44[ep]
-	mov	r1,ep
-#else
-	addi	-48,sp,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	st.w	r27,8[sp]
-	st.w	r26,12[sp]
-	st.w	r25,16[sp]
-	st.w	r24,20[sp]
-	st.w	r23,24[sp]
-	st.w	r22,28[sp]
-	st.w	r21,32[sp]
-	st.w	r20,36[sp]
-	st.w	r2,40[sp]
-	st.w	r31,44[sp]
-#endif
-	jmp	[r10]
-	.size	__save_r2_r31,.-__save_r2_r31
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r20_r31.  */
-	.align	2
-	.globl	__return_r2_r31
-	.type	__return_r2_r31,@function
-__return_r2_r31:
-#ifdef __EP__
-	mov	ep,r1
-	mov	sp,ep
-	sld.w	0[ep],r29
-	sld.w	4[ep],r28
-	sld.w	8[ep],r27
-	sld.w	12[ep],r26
-	sld.w	16[ep],r25
-	sld.w	20[ep],r24
-	sld.w	24[ep],r23
-	sld.w	28[ep],r22
-	sld.w	32[ep],r21
-	sld.w	36[ep],r20
-	sld.w	40[ep],r2
-	sld.w	44[ep],r31
-	addi	48,sp,sp
-	mov	r1,ep
-#else
-	ld.w	44[sp],r29
-	ld.w	40[sp],r28
-	ld.w	36[sp],r27
-	ld.w	32[sp],r26
-	ld.w	28[sp],r25
-	ld.w	24[sp],r24
-	ld.w	20[sp],r23
-	ld.w	16[sp],r22
-	ld.w	12[sp],r21
-	ld.w	8[sp],r20
-	ld.w	4[sp],r2
-	ld.w	0[sp],r31
-	addi	48,sp,sp
-#endif
-	jmp	[r31]
-	.size	__return_r2_r31,.-__return_r2_r31
-#endif /* L_save_2c */
-
-#ifdef	L_save_20c
-	.text
-	.align	2
-	.globl	__save_r20_r31
-	.type	__save_r20_r31,@function
-	/* Allocate space and save registers 20 .. 29, 31 on the stack.  */
-	/* Also allocate space for the argument save area.  */
-	/* Called via:	jalr __save_r20_r31,r10.  */
-__save_r20_r31:
-#ifdef __EP__
-	mov	ep,r1
-	addi	-44,sp,sp
-	mov	sp,ep
-	sst.w	r29,0[ep]
-	sst.w	r28,4[ep]
-	sst.w	r27,8[ep]
-	sst.w	r26,12[ep]
-	sst.w	r25,16[ep]
-	sst.w	r24,20[ep]
-	sst.w	r23,24[ep]
-	sst.w	r22,28[ep]
-	sst.w	r21,32[ep]
-	sst.w	r20,36[ep]
-	sst.w	r31,40[ep]
-	mov	r1,ep
-#else
-	addi	-44,sp,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	st.w	r27,8[sp]
-	st.w	r26,12[sp]
-	st.w	r25,16[sp]
-	st.w	r24,20[sp]
-	st.w	r23,24[sp]
-	st.w	r22,28[sp]
-	st.w	r21,32[sp]
-	st.w	r20,36[sp]
-	st.w	r31,40[sp]
-#endif
-	jmp	[r10]
-	.size	__save_r20_r31,.-__save_r20_r31
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r20_r31.  */
-	.align	2
-	.globl	__return_r20_r31
-	.type	__return_r20_r31,@function
-__return_r20_r31:
-#ifdef __EP__
-	mov	ep,r1
-	mov	sp,ep
-	sld.w	0[ep],r29
-	sld.w	4[ep],r28
-	sld.w	8[ep],r27
-	sld.w	12[ep],r26
-	sld.w	16[ep],r25
-	sld.w	20[ep],r24
-	sld.w	24[ep],r23
-	sld.w	28[ep],r22
-	sld.w	32[ep],r21
-	sld.w	36[ep],r20
-	sld.w	40[ep],r31
-	addi	44,sp,sp
-	mov	r1,ep
-#else
-	ld.w	0[sp],r29
-	ld.w	4[sp],r28
-	ld.w	8[sp],r27
-	ld.w	12[sp],r26
-	ld.w	16[sp],r25
-	ld.w	20[sp],r24
-	ld.w	24[sp],r23
-	ld.w	28[sp],r22
-	ld.w	32[sp],r21
-	ld.w	36[sp],r20
-	ld.w	40[sp],r31
-	addi	44,sp,sp
-#endif
-	jmp	[r31]
-	.size	__return_r20_r31,.-__return_r20_r31
-#endif /* L_save_20c */
-
-#ifdef	L_save_21c
-	.text
-	.align	2
-	.globl	__save_r21_r31
-	.type	__save_r21_r31,@function
-	/* Allocate space and save registers 21 .. 29, 31 on the stack.  */
-	/* Also allocate space for the argument save area.  */
-	/* Called via:	jalr __save_r21_r31,r10.  */
-__save_r21_r31:
-#ifdef __EP__	
-	mov	ep,r1
-	addi	-40,sp,sp
-	mov	sp,ep
-	sst.w	r29,0[ep]
-	sst.w	r28,4[ep]
-	sst.w	r27,8[ep]
-	sst.w	r26,12[ep]
-	sst.w	r25,16[ep]
-	sst.w	r24,20[ep]
-	sst.w	r23,24[ep]
-	sst.w	r22,28[ep]
-	sst.w	r21,32[ep]
-	sst.w	r31,36[ep]
-	mov	r1,ep
-	jmp	[r10]
-#else	
-	addi	-40,sp,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	st.w	r27,8[sp]
-	st.w	r26,12[sp]
-	st.w	r25,16[sp]
-	st.w	r24,20[sp]
-	st.w	r23,24[sp]
-	st.w	r22,28[sp]
-	st.w	r21,32[sp]
-	st.w	r31,36[sp]
-	jmp	[r10]
-#endif	
-	.size	__save_r21_r31,.-__save_r21_r31
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r21_r31.  */
-	.align	2
-	.globl	__return_r21_r31
-	.type	__return_r21_r31,@function
-__return_r21_r31:
-#ifdef __EP__
-	mov	ep,r1
-	mov	sp,ep
-	sld.w	0[ep],r29
-	sld.w	4[ep],r28
-	sld.w	8[ep],r27
-	sld.w	12[ep],r26
-	sld.w	16[ep],r25
-	sld.w	20[ep],r24
-	sld.w	24[ep],r23
-	sld.w	28[ep],r22
-	sld.w	32[ep],r21
-	sld.w	36[ep],r31
-	addi	40,sp,sp
-	mov	r1,ep
-#else
-	ld.w	0[sp],r29
-	ld.w	4[sp],r28
-	ld.w	8[sp],r27
-	ld.w	12[sp],r26
-	ld.w	16[sp],r25
-	ld.w	20[sp],r24
-	ld.w	24[sp],r23
-	ld.w	28[sp],r22
-	ld.w	32[sp],r21
-	ld.w	36[sp],r31
-	addi	40,sp,sp
-#endif
-	jmp	[r31]
-	.size	__return_r21_r31,.-__return_r21_r31
-#endif /* L_save_21c */
-
-#ifdef	L_save_22c
-	.text
-	.align	2
-	.globl	__save_r22_r31
-	.type	__save_r22_r31,@function
-	/* Allocate space and save registers 22 .. 29, 31 on the stack.  */
-	/* Also allocate space for the argument save area.  */
-	/* Called via:	jalr __save_r22_r31,r10.  */
-__save_r22_r31:
-#ifdef __EP__
-	mov	ep,r1
-	addi	-36,sp,sp
-	mov	sp,ep
-	sst.w	r29,0[ep]
-	sst.w	r28,4[ep]
-	sst.w	r27,8[ep]
-	sst.w	r26,12[ep]
-	sst.w	r25,16[ep]
-	sst.w	r24,20[ep]
-	sst.w	r23,24[ep]
-	sst.w	r22,28[ep]
-	sst.w	r31,32[ep]
-	mov	r1,ep
-#else
-	addi	-36,sp,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	st.w	r27,8[sp]
-	st.w	r26,12[sp]
-	st.w	r25,16[sp]
-	st.w	r24,20[sp]
-	st.w	r23,24[sp]
-	st.w	r22,28[sp]
-	st.w	r31,32[sp]
-#endif
-	jmp	[r10]
-	.size	__save_r22_r31,.-__save_r22_r31
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r22_r31.  */
-	.align	2
-	.globl	__return_r22_r31
-	.type	__return_r22_r31,@function
-__return_r22_r31:
-#ifdef __EP__
-	mov	ep,r1
-	mov	sp,ep
-	sld.w	0[ep],r29
-	sld.w	4[ep],r28
-	sld.w	8[ep],r27
-	sld.w	12[ep],r26
-	sld.w	16[ep],r25
-	sld.w	20[ep],r24
-	sld.w	24[ep],r23
-	sld.w	28[ep],r22
-	sld.w	32[ep],r31
-	addi	36,sp,sp
-	mov	r1,ep
-#else
-	ld.w	0[sp],r29
-	ld.w	4[sp],r28
-	ld.w	8[sp],r27
-	ld.w	12[sp],r26
-	ld.w	16[sp],r25
-	ld.w	20[sp],r24
-	ld.w	24[sp],r23
-	ld.w	28[sp],r22
-	ld.w	32[sp],r31
-	addi	36,sp,sp
-#endif
-	jmp	[r31]
-	.size	__return_r22_r31,.-__return_r22_r31
-#endif /* L_save_22c */
-
-#ifdef	L_save_23c
-	.text
-	.align	2
-	.globl	__save_r23_r31
-	.type	__save_r23_r31,@function
-	/* Allocate space and save registers 23 .. 29, 31 on the stack.  */
-	/* Also allocate space for the argument save area.  */
-	/* Called via:	jalr __save_r23_r31,r10.  */
-__save_r23_r31:
-#ifdef __EP__
-	mov	ep,r1
-	addi	-32,sp,sp
-	mov	sp,ep
-	sst.w	r29,0[ep]
-	sst.w	r28,4[ep]
-	sst.w	r27,8[ep]
-	sst.w	r26,12[ep]
-	sst.w	r25,16[ep]
-	sst.w	r24,20[ep]
-	sst.w	r23,24[ep]
-	sst.w	r31,28[ep]
-	mov	r1,ep
-#else
-	addi	-32,sp,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	st.w	r27,8[sp]
-	st.w	r26,12[sp]
-	st.w	r25,16[sp]
-	st.w	r24,20[sp]
-	st.w	r23,24[sp]
-	st.w	r31,28[sp]
-#endif
-	jmp	[r10]
-	.size	__save_r23_r31,.-__save_r23_r31
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r23_r31.  */
-	.align	2
-	.globl	__return_r23_r31
-	.type	__return_r23_r31,@function
-__return_r23_r31:
-#ifdef __EP__
-	mov	ep,r1
-	mov	sp,ep
-	sld.w	0[ep],r29
-	sld.w	4[ep],r28
-	sld.w	8[ep],r27
-	sld.w	12[ep],r26
-	sld.w	16[ep],r25
-	sld.w	20[ep],r24
-	sld.w	24[ep],r23
-	sld.w	28[ep],r31
-	addi	32,sp,sp
-	mov	r1,ep
-#else
-	ld.w	0[sp],r29
-	ld.w	4[sp],r28
-	ld.w	8[sp],r27
-	ld.w	12[sp],r26
-	ld.w	16[sp],r25
-	ld.w	20[sp],r24
-	ld.w	24[sp],r23
-	ld.w	28[sp],r31
-	addi	32,sp,sp
-#endif
-	jmp	[r31]
-	.size	__return_r23_r31,.-__return_r23_r31
-#endif /* L_save_23c */
-
-#ifdef	L_save_24c
-	.text
-	.align	2
-	.globl	__save_r24_r31
-	.type	__save_r24_r31,@function
-	/* Allocate space and save registers 24 .. 29, 31 on the stack.  */
-	/* Also allocate space for the argument save area.  */
-	/* Called via:	jalr __save_r24_r31,r10.  */
-__save_r24_r31:
-#ifdef __EP__
-	mov	ep,r1
-	addi	-28,sp,sp
-	mov	sp,ep
-	sst.w	r29,0[ep]
-	sst.w	r28,4[ep]
-	sst.w	r27,8[ep]
-	sst.w	r26,12[ep]
-	sst.w	r25,16[ep]
-	sst.w	r24,20[ep]
-	sst.w	r31,24[ep]
-	mov	r1,ep
-#else
-	addi	-28,sp,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	st.w	r27,8[sp]
-	st.w	r26,12[sp]
-	st.w	r25,16[sp]
-	st.w	r24,20[sp]
-	st.w	r31,24[sp]
-#endif
-	jmp	[r10]
-	.size	__save_r24_r31,.-__save_r24_r31
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r24_r31.  */
-	.align	2
-	.globl	__return_r24_r31
-	.type	__return_r24_r31,@function
-__return_r24_r31:
-#ifdef __EP__
-	mov	ep,r1
-	mov	sp,ep
-	sld.w	0[ep],r29
-	sld.w	4[ep],r28
-	sld.w	8[ep],r27
-	sld.w	12[ep],r26
-	sld.w	16[ep],r25
-	sld.w	20[ep],r24
-	sld.w	24[ep],r31
-	addi	28,sp,sp
-	mov	r1,ep
-#else
-	ld.w	0[sp],r29
-	ld.w	4[sp],r28
-	ld.w	8[sp],r27
-	ld.w	12[sp],r26
-	ld.w	16[sp],r25
-	ld.w	20[sp],r24
-	ld.w	24[sp],r31
-	addi	28,sp,sp
-#endif
-	jmp	[r31]
-	.size	__return_r24_r31,.-__return_r24_r31
-#endif /* L_save_24c */
-
-#ifdef	L_save_25c
-	.text
-	.align	2
-	.globl	__save_r25_r31
-	.type	__save_r25_r31,@function
-	/* Allocate space and save registers 25 .. 29, 31 on the stack.  */
-	/* Also allocate space for the argument save area.  */
-	/* Called via:	jalr __save_r25_r31,r10.  */
-__save_r25_r31:
-#ifdef __EP__
-	mov	ep,r1
-	addi	-24,sp,sp
-	mov	sp,ep
-	sst.w	r29,0[ep]
-	sst.w	r28,4[ep]
-	sst.w	r27,8[ep]
-	sst.w	r26,12[ep]
-	sst.w	r25,16[ep]
-	sst.w	r31,20[ep]
-	mov	r1,ep
-#else
-	addi	-24,sp,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	st.w	r27,8[sp]
-	st.w	r26,12[sp]
-	st.w	r25,16[sp]
-	st.w	r31,20[sp]
-#endif
-	jmp	[r10]
-	.size	__save_r25_r31,.-__save_r25_r31
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r25_r31.  */
-	.align	2
-	.globl	__return_r25_r31
-	.type	__return_r25_r31,@function
-__return_r25_r31:
-#ifdef __EP__
-	mov	ep,r1
-	mov	sp,ep
-	sld.w	0[ep],r29
-	sld.w	4[ep],r28
-	sld.w	8[ep],r27
-	sld.w	12[ep],r26
-	sld.w	16[ep],r25
-	sld.w	20[ep],r31
-	addi	24,sp,sp
-	mov	r1,ep
-#else
-	ld.w	0[sp],r29
-	ld.w	4[sp],r28
-	ld.w	8[sp],r27
-	ld.w	12[sp],r26
-	ld.w	16[sp],r25
-	ld.w	20[sp],r31
-	addi	24,sp,sp
-#endif
-	jmp	[r31]
-	.size	__return_r25_r31,.-__return_r25_r31
-#endif /* L_save_25c */
-
-#ifdef	L_save_26c
-	.text
-	.align	2
-	.globl	__save_r26_r31
-	.type	__save_r26_r31,@function
-	/* Allocate space and save registers 26 .. 29, 31 on the stack.  */
-	/* Also allocate space for the argument save area.  */
-	/* Called via:	jalr __save_r26_r31,r10.  */
-__save_r26_r31:
-#ifdef __EP__
-	mov	ep,r1
-	addi	-20,sp,sp
-	mov	sp,ep
-	sst.w	r29,0[ep]
-	sst.w	r28,4[ep]
-	sst.w	r27,8[ep]
-	sst.w	r26,12[ep]
-	sst.w	r31,16[ep]
-	mov	r1,ep
-#else
-	addi	-20,sp,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	st.w	r27,8[sp]
-	st.w	r26,12[sp]
-	st.w	r31,16[sp]
-#endif
-	jmp	[r10]
-	.size	__save_r26_r31,.-__save_r26_r31
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r26_r31.  */
-	.align	2
-	.globl	__return_r26_r31
-	.type	__return_r26_r31,@function
-__return_r26_r31:
-#ifdef __EP__
-	mov	ep,r1
-	mov	sp,ep
-	sld.w	0[ep],r29
-	sld.w	4[ep],r28
-	sld.w	8[ep],r27
-	sld.w	12[ep],r26
-	sld.w	16[ep],r31
-	addi	20,sp,sp
-	mov	r1,ep
-#else
-	ld.w	0[sp],r29
-	ld.w	4[sp],r28
-	ld.w	8[sp],r27
-	ld.w	12[sp],r26
-	ld.w	16[sp],r31
-	addi	20,sp,sp
-#endif
-	jmp	[r31]
-	.size	__return_r26_r31,.-__return_r26_r31
-#endif /* L_save_26c */
-
-#ifdef	L_save_27c
-	.text
-	.align	2
-	.globl	__save_r27_r31
-	.type	__save_r27_r31,@function
-	/* Allocate space and save registers 27 .. 29, 31 on the stack.  */
-	/* Also allocate space for the argument save area.  */
-	/* Called via:	jalr __save_r27_r31,r10.  */
-__save_r27_r31:
-#ifdef __EP__
-	mov	ep,r1
-	addi	-16,sp,sp
-	mov	sp,ep
-	sst.w	r29,0[ep]
-	sst.w	r28,4[ep]
-	sst.w	r27,8[ep]
-	sst.w	r31,12[ep]
-	mov	r1,ep
-#else
-	addi	-16,sp,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	st.w	r27,8[sp]
-	st.w	r31,12[sp]
-#endif
-	jmp	[r10]
-	.size	__save_r27_r31,.-__save_r27_r31
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r27_r31.  */
-	.align	2
-	.globl	__return_r27_r31
-	.type	__return_r27_r31,@function
-__return_r27_r31:
-#ifdef __EP__
-	mov	ep,r1
-	mov	sp,ep
-	sld.w	0[ep],r29
-	sld.w	4[ep],r28
-	sld.w	8[ep],r27
-	sld.w	12[ep],r31
-	addi	16,sp,sp
-	mov	r1,ep
-#else
-	ld.w	0[sp],r29
-	ld.w	4[sp],r28
-	ld.w	8[sp],r27
-	ld.w	12[sp],r31
-	addi	16,sp,sp
-#endif
-	jmp	[r31]
-	.size	__return_r27_r31,.-__return_r27_r31
-#endif /* L_save_27c */
-
-#ifdef	L_save_28c
-	.text
-	.align	2
-	.globl	__save_r28_r31
-	.type	__save_r28_r31,@function
-	/* Allocate space and save registers 28 .. 29, 31 on the stack.  */
-	/* Also allocate space for the argument save area.  */
-	/* Called via:	jalr __save_r28_r31,r10.  */
-__save_r28_r31:
-	addi	-12,sp,sp
-	st.w	r29,0[sp]
-	st.w	r28,4[sp]
-	st.w	r31,8[sp]
-	jmp	[r10]
-	.size	__save_r28_r31,.-__save_r28_r31
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r28_r31.  */
-	.align	2
-	.globl	__return_r28_r31
-	.type	__return_r28_r31,@function
-__return_r28_r31:
-	ld.w	0[sp],r29
-	ld.w	4[sp],r28
-	ld.w	8[sp],r31
-	addi	12,sp,sp
-	jmp	[r31]
-	.size	__return_r28_r31,.-__return_r28_r31
-#endif /* L_save_28c */
-
-#ifdef	L_save_29c
-	.text
-	.align	2
-	.globl	__save_r29_r31
-	.type	__save_r29_r31,@function
-	/* Allocate space and save registers 29 & 31 on the stack.  */
-	/* Also allocate space for the argument save area.  */
-	/* Called via:	jalr __save_r29_r31,r10.  */
-__save_r29_r31:
-	addi	-8,sp,sp
-	st.w	r29,0[sp]
-	st.w	r31,4[sp]
-	jmp	[r10]
-	.size	__save_r29_r31,.-__save_r29_r31
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r29_r31.  */
-	.align	2
-	.globl	__return_r29_r31
-	.type	__return_r29_r31,@function
-__return_r29_r31:
-	ld.w	0[sp],r29
-	ld.w	4[sp],r31
-	addi	8,sp,sp
-	jmp	[r31]
-	.size	__return_r29_r31,.-__return_r29_r31
-#endif /* L_save_29c */
-
-#ifdef	L_save_31c
-	.text
-	.align	2
-	.globl	__save_r31
-	.type	__save_r31,@function
-	/* Allocate space and save register 31 on the stack.  */
-	/* Also allocate space for the argument save area.  */
-	/* Called via:	jalr __save_r31,r10.  */
-__save_r31:
-	addi	-4,sp,sp
-	st.w	r31,0[sp]
-	jmp	[r10]
-	.size	__save_r31,.-__save_r31
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	jr __return_r31.  */
-	.align	2
-	.globl	__return_r31
-	.type	__return_r31,@function
-__return_r31:
-	ld.w	0[sp],r31
-	addi	4,sp,sp
-	jmp	[r31]
-        .size   __return_r31,.-__return_r31
-#endif /* L_save_31c */
-
-#ifdef	L_save_interrupt
-	.text
-	.align	2
-	.globl	__save_interrupt
-	.type	__save_interrupt,@function
-	/* Save registers r1, r4 on stack and load up with expected values.  */
-	/* Note, 20 bytes of stack have already been allocated.  */
-	/* Called via:	jalr __save_interrupt,r10.  */
-__save_interrupt:
-       /* add -20,sp ; st.w r11,16[sp] ; st.w r10,12[sp] ; */
-	st.w	ep,0[sp]
-	st.w	gp,4[sp]
-	st.w	r1,8[sp]
-	movhi	hi(__ep),r0,ep
-	movea	lo(__ep),ep,ep
-	movhi	hi(__gp),r0,gp
-	movea	lo(__gp),gp,gp
-	jmp	[r10]
-	.size	__save_interrupt,.-__save_interrupt
-
-	/* Restore saved registers, deallocate stack and return from the interrupt.  */
-	/* Called via:	jr __return_interrupt.  */
-	.align	2
-	.globl	__return_interrupt
-	.type	__return_interrupt,@function
-__return_interrupt:
-	ld.w	0[sp],ep
-	ld.w	4[sp],gp
-	ld.w	8[sp],r1
-	ld.w	12[sp],r10
-	ld.w    16[sp],r11
-	addi    20,sp,sp
-	reti
-	.size	__return_interrupt,.-__return_interrupt
-#endif /* L_save_interrupt */
-
-#ifdef L_save_all_interrupt
-	.text
-	.align	2
-	.globl	__save_all_interrupt
-	.type	__save_all_interrupt,@function
-	/* Save all registers except for those saved in __save_interrupt.  */
-	/* Allocate enough stack for all of the registers & 16 bytes of space.  */
-	/* Called via:	jalr __save_all_interrupt,r10.  */
-__save_all_interrupt:
-	addi	-104,sp,sp
-#ifdef __EP__
-	mov	ep,r1
-	mov	sp,ep
-	sst.w	r31,100[ep]
-	sst.w	r2,96[ep]
-	sst.w	gp,92[ep]
-	sst.w	r6,88[ep]
-	sst.w	r7,84[ep]
-	sst.w	r8,80[ep]
-	sst.w	r9,76[ep]
-	sst.w	r11,72[ep]
-	sst.w	r12,68[ep]
-	sst.w	r13,64[ep]
-	sst.w	r14,60[ep]
-	sst.w	r15,56[ep]
-	sst.w	r16,52[ep]
-	sst.w	r17,48[ep]
-	sst.w	r18,44[ep]
-	sst.w	r19,40[ep]
-	sst.w	r20,36[ep]
-	sst.w	r21,32[ep]
-	sst.w	r22,28[ep]
-	sst.w	r23,24[ep]
-	sst.w	r24,20[ep]
-	sst.w	r25,16[ep]
-	sst.w	r26,12[ep]
-	sst.w	r27,8[ep]
-	sst.w	r28,4[ep]
-	sst.w	r29,0[ep]
-	mov	r1,ep
-#else
-	st.w	r31,100[sp]
-	st.w	r2,96[sp]
-	st.w	gp,92[sp]
-	st.w	r6,88[sp]
-	st.w	r7,84[sp]
-	st.w	r8,80[sp]
-	st.w	r9,76[sp]
-	st.w	r11,72[sp]
-	st.w	r12,68[sp]
-	st.w	r13,64[sp]
-	st.w	r14,60[sp]
-	st.w	r15,56[sp]
-	st.w	r16,52[sp]
-	st.w	r17,48[sp]
-	st.w	r18,44[sp]
-	st.w	r19,40[sp]
-	st.w	r20,36[sp]
-	st.w	r21,32[sp]
-	st.w	r22,28[sp]
-	st.w	r23,24[sp]
-	st.w	r24,20[sp]
-	st.w	r25,16[sp]
-	st.w	r26,12[sp]
-	st.w	r27,8[sp]
-	st.w	r28,4[sp]
-	st.w	r29,0[sp]
-#endif
-	jmp	[r10]
-	.size	__save_all_interrupt,.-__save_all_interrupt
-
-	.globl	__restore_all_interrupt
-	.type	__restore_all_interrupt,@function
-	/* Restore all registers saved in __save_all_interrupt and
-	   deallocate the stack space.  */
-	/* Called via:	jalr __restore_all_interrupt,r10.  */
-__restore_all_interrupt:
-#ifdef __EP__
-	mov	ep,r1
-	mov	sp,ep
-	sld.w	100[ep],r31
-	sld.w	96[ep],r2
-	sld.w	92[ep],gp
-	sld.w	88[ep],r6
-	sld.w	84[ep],r7
-	sld.w	80[ep],r8
-	sld.w	76[ep],r9
-	sld.w	72[ep],r11
-	sld.w	68[ep],r12
-	sld.w	64[ep],r13
-	sld.w	60[ep],r14
-	sld.w	56[ep],r15
-	sld.w	52[ep],r16
-	sld.w	48[ep],r17
-	sld.w	44[ep],r18
-	sld.w	40[ep],r19
-	sld.w	36[ep],r20
-	sld.w	32[ep],r21
-	sld.w	28[ep],r22
-	sld.w	24[ep],r23
-	sld.w	20[ep],r24
-	sld.w	16[ep],r25
-	sld.w	12[ep],r26
-	sld.w	8[ep],r27
-	sld.w	4[ep],r28
-	sld.w	0[ep],r29
-	mov	r1,ep
-#else
-	ld.w	100[sp],r31
-	ld.w	96[sp],r2
-	ld.w	92[sp],gp
-	ld.w	88[sp],r6
-	ld.w	84[sp],r7
-	ld.w	80[sp],r8
-	ld.w	76[sp],r9
-	ld.w	72[sp],r11
-	ld.w	68[sp],r12
-	ld.w	64[sp],r13
-	ld.w	60[sp],r14
-	ld.w	56[sp],r15
-	ld.w	52[sp],r16
-	ld.w	48[sp],r17
-	ld.w	44[sp],r18
-	ld.w	40[sp],r19
-	ld.w	36[sp],r20
-	ld.w	32[sp],r21
-	ld.w	28[sp],r22
-	ld.w	24[sp],r23
-	ld.w	20[sp],r24
-	ld.w	16[sp],r25
-	ld.w	12[sp],r26
-	ld.w	8[sp],r27
-	ld.w	4[sp],r28
-	ld.w	0[sp],r29
-#endif
-	addi	104,sp,sp	
-	jmp	[r10]
-	.size	__restore_all_interrupt,.-__restore_all_interrupt
-#endif /* L_save_all_interrupt */
-	
-#if defined(__v850e__) || defined(__v850e1__) || defined(__v850e2__) || defined(__v850e2v3__)
-#ifdef	L_callt_save_r2_r29
-	/* Put these functions into the call table area.  */
-	.call_table_text
-	
-	/* Allocate space and save registers 2, 20 .. 29 on the stack.  */
-	/* Called via:	callt ctoff(__callt_save_r2_r29).  */
-	.align	2
-.L_save_r2_r29:
-	add	-4, sp
-	st.w	r2, 0[sp]
-	prepare {r20 - r29}, 0
-	ctret
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	callt ctoff(__callt_return_r2_r29).  */
-	.align	2
-.L_return_r2_r29:
-	dispose 0, {r20-r29}
-	ld.w    0[sp], r2
-	add	4, sp
-	jmp     [r31]
-
-	/* Place the offsets of the start of these routines into the call table.  */
-	.call_table_data
-
-	.global	__callt_save_r2_r29
-	.type	__callt_save_r2_r29,@function
-__callt_save_r2_r29:	.short ctoff(.L_save_r2_r29)
-	
-	.global	__callt_return_r2_r29
-	.type	__callt_return_r2_r29,@function
-__callt_return_r2_r29:	.short ctoff(.L_return_r2_r29)
-	
-#endif /* L_callt_save_r2_r29.  */
-
-#ifdef	L_callt_save_r2_r31
-	/* Put these functions into the call table area.  */
-	.call_table_text
-	
-	/* Allocate space and save registers 2 and 20 .. 29, 31 on the stack.  */
-	/* Also allocate space for the argument save area.  */
-	/* Called via:	callt ctoff(__callt_save_r2_r31).  */
-	.align	2
-.L_save_r2_r31:
-	add	-4, sp
-	st.w	r2, 0[sp]
-	prepare {r20 - r29, r31}, 0
-	ctret
-
-	/* Restore saved registers, deallocate stack and return to the user.  */
-	/* Called via:	callt ctoff(__callt_return_r2_r31).  */
-	.align	2
-.L_return_r2_r31:
-	dispose 0, {r20 - r29, r31}
-	ld.w    0[sp], r2
-	addi	4, sp, sp
-	jmp     [r31]
-
-	/* Place the offsets of the start of these routines into the call table.  */
-	.call_table_data
-
-	.global	__callt_save_r2_r31
-	.type	__callt_save_r2_r31,@function
-__callt_save_r2_r31:	.short ctoff(.L_save_r2_r31)
-	
-	.global	__callt_return_r2_r31
-	.type	__callt_return_r2_r31,@function
-__callt_return_r2_r31:	.short ctoff(.L_return_r2_r31)
-	
-#endif /* L_callt_save_r2_r31 */
-
-#ifdef	L_callt_save_interrupt
-	/* Put these functions into the call table area.  */
-	.call_table_text
-	
-	/* Save registers r1, ep, gp, r10 on stack and load up with expected values.  */
-	/* Called via:	callt ctoff(__callt_save_interrupt).  */
-	.align	2
-.L_save_interrupt:
-        /* SP has already been moved before callt ctoff(_save_interrupt).  */
-        /* R1,R10,R11,ctpc,ctpsw has alread been saved bofore callt ctoff(_save_interrupt).  */
-        /* addi -28, sp, sp  */
-        /* st.w r1,    24[sp] */
-        /* st.w r10,   12[sp] */
-        /* st.w r11,   16[sp] */
-        /* stsr ctpc,  r10    */
-        /* st.w r10,   20[sp] */
-        /* stsr ctpsw, r10    */
-        /* st.w r10,   24[sp] */
-        st.w    ep,  0[sp]
-        st.w    gp,  4[sp]
-        st.w    r1,  8[sp]
-	mov	hilo(__ep),ep
-	mov	hilo(__gp),gp
-	ctret
-
-        .call_table_text
-	/* Restore saved registers, deallocate stack and return from the interrupt.  */
-        /* Called via:  callt ctoff(__callt_restore_interrupt).  */
-	.align	2
-	.globl	__return_interrupt
-	.type	__return_interrupt,@function
-.L_return_interrupt:
-        ld.w    24[sp], r1
-        ldsr    r1,     ctpsw
-        ld.w    20[sp], r1
-        ldsr    r1,     ctpc
-        ld.w    16[sp], r11
-        ld.w    12[sp], r10
-        ld.w     8[sp], r1
-        ld.w     4[sp], gp
-        ld.w     0[sp], ep
-        addi    28, sp, sp
-        reti
-
-	/* Place the offsets of the start of these routines into the call table.  */
-	.call_table_data
-
-        .global __callt_save_interrupt
-        .type   __callt_save_interrupt,@function
-__callt_save_interrupt:         .short ctoff(.L_save_interrupt)
-
-        .global __callt_return_interrupt
-        .type   __callt_return_interrupt,@function
-__callt_return_interrupt:       .short ctoff(.L_return_interrupt)
-	
-#endif /* L_callt_save_interrupt */
-
-#ifdef L_callt_save_all_interrupt
-	/* Put these functions into the call table area.  */
-	.call_table_text
-	
-	/* Save all registers except for those saved in __save_interrupt.  */
-	/* Allocate enough stack for all of the registers & 16 bytes of space.  */
-	/* Called via:	callt ctoff(__callt_save_all_interrupt).  */
-	.align	2
-.L_save_all_interrupt:
-	addi	-60, sp, sp
-#ifdef __EP__
-	mov	ep,  r1
-	mov	sp,  ep
-	sst.w	r2,  56[ep]
-	sst.w	r5,  52[ep]
-	sst.w	r6,  48[ep]
-	sst.w	r7,  44[ep]
-	sst.w	r8,  40[ep]
-	sst.w	r9,  36[ep]
-	sst.w	r11, 32[ep]
-	sst.w	r12, 28[ep]
-	sst.w	r13, 24[ep]
-	sst.w	r14, 20[ep]
-	sst.w	r15, 16[ep]
-	sst.w	r16, 12[ep]
-	sst.w	r17, 8[ep]
-	sst.w	r18, 4[ep]
-	sst.w	r19, 0[ep]
-	mov	r1,  ep
-#else
-	st.w	r2,  56[sp]
-	st.w	r5,  52[sp]
-	st.w	r6,  48[sp]
-	st.w	r7,  44[sp]
-	st.w	r8,  40[sp]
-	st.w	r9,  36[sp]
-	st.w	r11, 32[sp]
-	st.w	r12, 28[sp]
-	st.w	r13, 24[sp]
-	st.w	r14, 20[sp]
-	st.w	r15, 16[sp]
-	st.w	r16, 12[sp]
-	st.w	r17, 8[sp]
-	st.w	r18, 4[sp]
-	st.w	r19, 0[sp]
-#endif
-	prepare {r20 - r29, r31}, 0
-	ctret	
-
-	/* Restore all registers saved in __save_all_interrupt
-	   deallocate the stack space.  */
-	/* Called via:	callt ctoff(__callt_restore_all_interrupt).  */
-	.align 2
-.L_restore_all_interrupt:
-	dispose 0, {r20 - r29, r31}
-#ifdef __EP__
-	mov	ep, r1
-	mov	sp, ep
-	sld.w	0 [ep], r19
-	sld.w	4 [ep], r18
-	sld.w	8 [ep], r17
-	sld.w	12[ep], r16
-	sld.w	16[ep], r15
-	sld.w	20[ep], r14
-	sld.w	24[ep], r13
-	sld.w	28[ep], r12
-	sld.w	32[ep], r11
-	sld.w	36[ep], r9
-	sld.w	40[ep], r8
-	sld.w	44[ep], r7
-	sld.w	48[ep], r6
-	sld.w	52[ep], r5
-	sld.w	56[ep], r2
-	mov	r1, ep
-#else
-	ld.w	0 [sp], r19
-	ld.w	4 [sp], r18
-	ld.w	8 [sp], r17
-	ld.w	12[sp], r16
-	ld.w	16[sp], r15
-	ld.w	20[sp], r14
-	ld.w	24[sp], r13
-	ld.w	28[sp], r12
-	ld.w	32[sp], r11
-	ld.w	36[sp], r9
-	ld.w	40[sp], r8
-	ld.w	44[sp], r7
-	ld.w	48[sp], r6
-	ld.w	52[sp], r5
-	ld.w	56[sp], r2
-#endif
-	addi	60, sp, sp
-	ctret
-
-	/* Place the offsets of the start of these routines into the call table.  */
-	.call_table_data
-
-	.global	__callt_save_all_interrupt
-	.type	__callt_save_all_interrupt,@function
-__callt_save_all_interrupt:	.short ctoff(.L_save_all_interrupt)
-	
-	.global	__callt_restore_all_interrupt
-	.type	__callt_restore_all_interrupt,@function
-__callt_restore_all_interrupt:	.short ctoff(.L_restore_all_interrupt)
-	
-#endif /* L_callt_save_all_interrupt */
-
-
-#define MAKE_CALLT_FUNCS( START )						\
-	.call_table_text							;\
-	.align	2								;\
-	/* Allocate space and save registers START .. r29 on the stack.  */	;\
-	/* Called via:	callt ctoff(__callt_save_START_r29).  */		;\
-.L_save_##START##_r29:								;\
-	prepare { START - r29 }, 0						;\
-	ctret									;\
-										;\
-	/* Restore saved registers, deallocate stack and return.  */		;\
-	/* Called via:	callt ctoff(__return_START_r29).  */			;\
-	.align	2								;\
-.L_return_##START##_r29:							;\
-	dispose 0, { START - r29 }, r31						;\
-										;\
-	/* Place the offsets of the start of these funcs into the call table.  */;\
-	.call_table_data							;\
-										;\
-	.global	__callt_save_##START##_r29					;\
-	.type	__callt_save_##START##_r29,@function				;\
-__callt_save_##START##_r29:	.short ctoff(.L_save_##START##_r29 )		;\
-										;\
-	.global	__callt_return_##START##_r29					;\
-	.type	__callt_return_##START##_r29,@function				;\
-__callt_return_##START##_r29:	.short ctoff(.L_return_##START##_r29 )	
-
-
-#define MAKE_CALLT_CFUNCS( START )						\
-	.call_table_text							;\
-	.align	2								;\
-	/* Allocate space and save registers START .. r31 on the stack.  */	;\
-	/* Called via:	callt ctoff(__callt_save_START_r31c).  */		;\
-.L_save_##START##_r31c:								;\
-	prepare { START - r29, r31}, 0						;\
-	ctret									;\
-										;\
-	/* Restore saved registers, deallocate stack and return.  */		;\
-	/* Called via:	callt ctoff(__return_START_r31c).  */			;\
-	.align	2								;\
-.L_return_##START##_r31c:							;\
-	dispose 0, { START - r29, r31}, r31					;\
-										;\
-	/* Place the offsets of the start of these funcs into the call table.  */;\
-	.call_table_data							;\
-										;\
-	.global	__callt_save_##START##_r31c					;\
-	.type	__callt_save_##START##_r31c,@function				;\
-__callt_save_##START##_r31c:    .short ctoff(.L_save_##START##_r31c )		;\
-										;\
-	.global	__callt_return_##START##_r31c					;\
-	.type	__callt_return_##START##_r31c,@function				;\
-__callt_return_##START##_r31c:  .short ctoff(.L_return_##START##_r31c )	
-
-	
-#ifdef	L_callt_save_20
-	MAKE_CALLT_FUNCS (r20)
-#endif
-#ifdef	L_callt_save_21
-	MAKE_CALLT_FUNCS (r21)
-#endif
-#ifdef	L_callt_save_22
-	MAKE_CALLT_FUNCS (r22)
-#endif
-#ifdef	L_callt_save_23
-	MAKE_CALLT_FUNCS (r23)
-#endif
-#ifdef	L_callt_save_24
-	MAKE_CALLT_FUNCS (r24)
-#endif
-#ifdef	L_callt_save_25
-	MAKE_CALLT_FUNCS (r25)
-#endif
-#ifdef	L_callt_save_26
-	MAKE_CALLT_FUNCS (r26)
-#endif
-#ifdef	L_callt_save_27
-	MAKE_CALLT_FUNCS (r27)
-#endif
-#ifdef	L_callt_save_28
-	MAKE_CALLT_FUNCS (r28)
-#endif
-#ifdef	L_callt_save_29
-	MAKE_CALLT_FUNCS (r29)
-#endif
-
-#ifdef	L_callt_save_20c
-	MAKE_CALLT_CFUNCS (r20)
-#endif
-#ifdef	L_callt_save_21c
-	MAKE_CALLT_CFUNCS (r21)
-#endif
-#ifdef	L_callt_save_22c
-	MAKE_CALLT_CFUNCS (r22)
-#endif
-#ifdef	L_callt_save_23c
-	MAKE_CALLT_CFUNCS (r23)
-#endif
-#ifdef	L_callt_save_24c
-	MAKE_CALLT_CFUNCS (r24)
-#endif
-#ifdef	L_callt_save_25c
-	MAKE_CALLT_CFUNCS (r25)
-#endif
-#ifdef	L_callt_save_26c
-	MAKE_CALLT_CFUNCS (r26)
-#endif
-#ifdef	L_callt_save_27c
-	MAKE_CALLT_CFUNCS (r27)
-#endif
-#ifdef	L_callt_save_28c
-	MAKE_CALLT_CFUNCS (r28)
-#endif
-#ifdef	L_callt_save_29c
-	MAKE_CALLT_CFUNCS (r29)
-#endif
-
-	
-#ifdef	L_callt_save_31c
-	.call_table_text
-	.align	2
-	/* Allocate space and save register r31 on the stack.  */
-	/* Called via:	callt ctoff(__callt_save_r31c).  */
-.L_callt_save_r31c:
-	prepare {r31}, 0
-	ctret
-
-	/* Restore saved registers, deallocate stack and return.  */
-	/* Called via:	callt ctoff(__return_r31c).  */
-	.align	2
-.L_callt_return_r31c:
-	dispose 0, {r31}, r31
-	
-	/* Place the offsets of the start of these funcs into the call table.  */
-	.call_table_data
-
-	.global	__callt_save_r31c
-	.type	__callt_save_r31c,@function
-__callt_save_r31c:	.short ctoff(.L_callt_save_r31c)
-
-	.global	__callt_return_r31c
-	.type	__callt_return_r31c,@function
-__callt_return_r31c:	.short ctoff(.L_callt_return_r31c)		
-#endif
-
-#endif /* __v850e__ */
-
-/*  libgcc2 routines for NEC V850.  */
-/*  Double Integer Arithmetical Operation.  */
-
-#ifdef L_negdi2
-	.text
-	.global ___negdi2
-	.type   ___negdi2, @function
-___negdi2:
-	not	r6, r10
-	add	1,  r10
-	setf	l,  r6
-	not	r7, r11
-	add	r6, r11
-	jmp	[lp]
-
-	.size ___negdi2,.-___negdi2
-#endif
-
-#ifdef L_cmpdi2
-	.text
-	.global ___cmpdi2
-	.type	___cmpdi2,@function
-___cmpdi2:
-	# Signed comparison bitween each high word.
-	cmp	r9, r7
-	be	.L_cmpdi_cmp_low
-	setf	ge, r10
-	setf	gt, r6
-	add	r6, r10
-	jmp	[lp]
-.L_cmpdi_cmp_low:
-	# Unsigned comparigon bitween each low word.
-	cmp     r8, r6
-	setf	nl, r10
-	setf	h,  r6
-	add	r6, r10
-	jmp	[lp]	
-	.size ___cmpdi2, . - ___cmpdi2	
-#endif
-
-#ifdef L_ucmpdi2
-	.text
-	.global ___ucmpdi2
-	.type	___ucmpdi2,@function
-___ucmpdi2:
-	cmp	r9, r7  # Check if each high word are same.
-	bne	.L_ucmpdi_check_psw
-	cmp     r8, r6  # Compare the word.
-.L_ucmpdi_check_psw:
-	setf	nl, r10 # 
-	setf	h,  r6  # 
-	add	r6, r10 # Add the result of comparison NL and comparison H.
-	jmp	[lp]	
-	.size ___ucmpdi2, . - ___ucmpdi2
-#endif
-
-#ifdef L_muldi3
-	.text
-	.global ___muldi3
-	.type	___muldi3,@function
-___muldi3:
-#ifdef __v850__
-        jarl  __save_r26_r31, r10
-        addi  16,  sp, sp
-        mov   r6,  r28
-        shr   15,  r28
-        movea lo(32767), r0, r14
-        and   r14, r28
-        mov   r8,  r10
-        shr   15,  r10
-        and   r14, r10
-        mov   r6,  r19
-        shr   30,  r19
-        mov   r7,  r12
-        shl   2,   r12
-        or    r12, r19
-        and   r14, r19
-        mov   r8,  r13
-        shr   30,  r13
-        mov   r9,  r12
-        shl   2,   r12
-        or    r12, r13
-        and   r14, r13
-        mov   r7,  r11
-        shr   13,  r11
-        and   r14, r11
-        mov   r9,  r31
-        shr   13,  r31
-        and   r14, r31
-        mov   r7,  r29
-        shr   28,  r29
-        and   r14, r29
-        mov   r9,  r12
-        shr   28,  r12
-        and   r14, r12
-        and   r14, r6
-        and   r14, r8
-        mov   r6,  r14
-        mulh  r8,  r14
-        mov   r6,  r16
-        mulh  r10, r16
-        mov   r6,  r18
-        mulh  r13, r18
-        mov   r6,  r15
-        mulh  r31, r15
-        mulh  r12, r6
-        mov   r28,  r17
-        mulh  r10, r17
-        add   -16, sp
-        mov   r28,  r12
-        mulh  r8,  r12
-        add   r17, r18
-        mov   r28,  r17
-        mulh  r31, r17
-        add   r12, r16
-        mov   r28,  r12
-        mulh  r13, r12
-        add   r17, r6
-        mov   r19, r17
-        add   r12, r15
-        mov   r19, r12
-        mulh  r8,  r12
-        mulh  r10, r17
-        add   r12, r18
-        mov   r19, r12
-        mulh  r13, r12
-        add   r17, r15
-        mov   r11, r13
-        mulh  r8,  r13
-        add   r12, r6
-        mov   r11, r12
-        mulh  r10, r12
-        add   r13, r15
-        mulh  r29, r8
-        add   r12, r6
-        mov   r16, r13
-        shl   15,  r13
-        add   r14, r13
-        mov   r18, r12
-        shl   30,  r12
-        mov   r13, r26
-        add   r12, r26
-        shr   15,  r14
-        movhi hi(131071), r0,  r12
-        movea lo(131071), r12, r13
-        and   r13, r14
-        mov   r16, r12
-        and   r13, r12
-        add   r12, r14
-        mov   r18, r12
-        shl   15,  r12
-        and   r13, r12
-        add   r12, r14
-        shr   17,  r14
-        shr   17,  r16
-        add   r14, r16
-        shl   13,  r15
-        shr   2,   r18
-        add   r18, r15
-        add   r15, r16
-        mov   r16, r27
-        add   r8,  r6
-        shl   28,  r6
-        add   r6,  r27
-        mov   r26, r10
-        mov   r27, r11
-        jr    __return_r26_r31
-#else /* defined(__v850e__) */
-	/*  (Ahi << 32 + Alo) * (Bhi << 32 + Blo) */
-	/*   r7           r6      r9         r8   */
-	mov  r8, r10
-	mulu r7, r8,  r0		/* Ahi * Blo */
-	mulu r6, r9,  r0		/* Alo * Bhi */
-	mulu r6, r10, r11		/* Alo * Blo */
-	add  r8, r11
-	add  r9, r11
-	jmp  [r31]
-#endif /* defined(__v850e__) */
-	.size ___muldi3, . - ___muldi3
-#endif
-	
diff --git a/gcc/config/v850/t-v850 b/gcc/config/v850/t-v850
index fcd3b841e30..7885229e631 100644
--- a/gcc/config/v850/t-v850
+++ b/gcc/config/v850/t-v850
@@ -17,67 +17,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMSRC = v850/lib1funcs.asm
-LIB1ASMFUNCS	= _mulsi3 \
-		  _divsi3 \
-		  _udivsi3 \
-		  _modsi3 \
-		  _umodsi3 \
-		  _save_2 \
-		  _save_20 \
-		  _save_21 \
-		  _save_22 \
-		  _save_23 \
-		  _save_24 \
-		  _save_25 \
-		  _save_26 \
-		  _save_27 \
-		  _save_28 \
-		  _save_29 \
-		  _save_2c \
-		  _save_20c \
-		  _save_21c \
-		  _save_22c \
-		  _save_23c \
-		  _save_24c \
-		  _save_25c \
-		  _save_26c \
-		  _save_27c \
-		  _save_28c \
-		  _save_29c \
-		  _save_31c \
-		  _save_interrupt \
-		  _save_all_interrupt \
-                  _callt_save_20 \
-		  _callt_save_21 \
-		  _callt_save_22 \
-		  _callt_save_23 \
-		  _callt_save_24 \
-		  _callt_save_25 \
-		  _callt_save_26 \
-		  _callt_save_27 \
-		  _callt_save_28 \
-		  _callt_save_29 \
-		  _callt_save_20c \
-		  _callt_save_21c \
-		  _callt_save_22c \
-		  _callt_save_23c \
-		  _callt_save_24c \
-		  _callt_save_25c \
-		  _callt_save_26c \
-		  _callt_save_27c \
-		  _callt_save_28c \
-		  _callt_save_29c \
-		  _callt_save_31c \
-		  _callt_save_interrupt \
-		  _callt_save_all_interrupt \
-		  _callt_save_r2_r29 \
-		  _callt_save_r2_r31 \
-		  _negdi2 \
-		  _cmpdi2 \
-		  _ucmpdi2 \
-		  _muldi3
-
 # Create target-specific versions of the libraries
 MULTILIB_OPTIONS  = mv850/mv850e/mv850e2/mv850e2v3
 MULTILIB_DIRNAMES = v850 v850e v850e2 v850e2v3
diff --git a/gcc/config/vax/lib1funcs.asm b/gcc/config/vax/lib1funcs.asm
deleted file mode 100644
index 1d57b56dad9..00000000000
--- a/gcc/config/vax/lib1funcs.asm
+++ /dev/null
@@ -1,92 +0,0 @@
-/* Copyright (C) 2009 Free Software Foundation, Inc.
-   This file is part of GCC.
-   Contributed by Maciej W. Rozycki <macro@linux-mips.org>.
-
-   This file is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by the
-   Free Software Foundation; either version 3, or (at your option) any
-   later version.
-
-   This file is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifdef L_udivsi3
-	.text
-	.globl	__udivsi3
-	.type	__udivsi3, @function
-__udivsi3:
-	.word	0
-	movl	8(%ap), %r1
-	blss	0f			/* Check bit #31 of divisor.  */
-	movl	4(%ap), %r2
-	blss	1f			/* Check bit #31 of dividend.  */
-
-	/* Both zero, do a standard division.  */
-
-	divl3	%r1, %r2, %r0
-	ret
-
-	/* MSB of divisor set, only 1 or 0 may result.  */
-0:
-	decl	%r1
-	clrl	%r0
-	cmpl	%r1, 4(%ap)
-	adwc	$0, %r0
-	ret
-
-	/* MSB of dividend set, do an extended division.  */
-1:
-	clrl	%r3
-	ediv	%r1, %r2, %r0, %r3
-	ret
-	.size	__udivsi3, . - __udivsi3
-	.previous
-#endif
-
-#ifdef L_umodsi3
-	.text
-	.globl	__umodsi3
-	.type	__umodsi3, @function
-__umodsi3:
-	.word	0
-	movl	8(%ap), %r1
-	blss	0f			/* Check bit #31 of divisor.  */
-	movl	4(%ap), %r2
-	blss	1f			/* Check bit #31 of dividend.  */
-
-	/* Both zero, do a standard division.  */
-
-	divl3	%r1, %r2, %r0
-	mull2	%r0, %r1
-	subl3	%r1, %r2, %r0
-	ret
-
-	/* MSB of divisor set, subtract the divisor at most once.  */
-0:
-	movl	4(%ap), %r2
-	clrl	%r0
-	cmpl	%r2, %r1
-	sbwc	$0, %r0
-	bicl2	%r0, %r1
-	subl3	%r1, %r2, %r0
-	ret
-
-	/* MSB of dividend set, do an extended division.  */
-1:
-	clrl	%r3
-	ediv	%r1, %r2, %r3, %r0
-	ret
-	.size	__umodsi3, . - __umodsi3
-	.previous
-#endif
diff --git a/gcc/config/vax/t-linux b/gcc/config/vax/t-linux
deleted file mode 100644
index 9af1edb0fab..00000000000
--- a/gcc/config/vax/t-linux
+++ /dev/null
@@ -1,2 +0,0 @@
-LIB1ASMSRC = vax/lib1funcs.asm
-LIB1ASMFUNCS = _udivsi3 _umodsi3
diff --git a/gcc/config/xtensa/ieee754-df.S b/gcc/config/xtensa/ieee754-df.S
deleted file mode 100644
index 9b46889bdc2..00000000000
--- a/gcc/config/xtensa/ieee754-df.S
+++ /dev/null
@@ -1,2388 +0,0 @@
-/* IEEE-754 double-precision functions for Xtensa
-   Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
-   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifdef __XTENSA_EB__
-#define xh a2
-#define xl a3
-#define yh a4
-#define yl a5
-#else
-#define xh a3
-#define xl a2
-#define yh a5
-#define yl a4
-#endif
-
-/*  Warning!  The branch displacements for some Xtensa branch instructions
-    are quite small, and this code has been carefully laid out to keep
-    branch targets in range.  If you change anything, be sure to check that
-    the assembler is not relaxing anything to branch over a jump.  */
-
-#ifdef L_negdf2
-
-	.align	4
-	.global	__negdf2
-	.type	__negdf2, @function
-__negdf2:
-	leaf_entry sp, 16
-	movi	a4, 0x80000000
-	xor	xh, xh, a4
-	leaf_return
-
-#endif /* L_negdf2 */
-
-#ifdef L_addsubdf3
-
-	/* Addition */
-__adddf3_aux:
-	
-	/* Handle NaNs and Infinities.  (This code is placed before the
-	   start of the function just to keep it in range of the limited
-	   branch displacements.)  */
-
-.Ladd_xnan_or_inf:
-	/* If y is neither Infinity nor NaN, return x.  */
-	bnall	yh, a6, 1f
-	/* If x is a NaN, return it.  Otherwise, return y.  */
-	slli	a7, xh, 12
-	or	a7, a7, xl
-	beqz	a7, .Ladd_ynan_or_inf
-1:	leaf_return
-
-.Ladd_ynan_or_inf:
-	/* Return y.  */
-	mov	xh, yh
-	mov	xl, yl
-	leaf_return
-
-.Ladd_opposite_signs:
-	/* Operand signs differ.  Do a subtraction.  */
-	slli	a7, a6, 11
-	xor	yh, yh, a7
-	j	.Lsub_same_sign
-
-	.align	4
-	.global	__adddf3
-	.type	__adddf3, @function
-__adddf3:
-	leaf_entry sp, 16
-	movi	a6, 0x7ff00000
-
-	/* Check if the two operands have the same sign.  */
-	xor	a7, xh, yh
-	bltz	a7, .Ladd_opposite_signs
-
-.Ladd_same_sign:	
-	/* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
-	ball	xh, a6, .Ladd_xnan_or_inf
-	ball	yh, a6, .Ladd_ynan_or_inf
-
-	/* Compare the exponents.  The smaller operand will be shifted
-	   right by the exponent difference and added to the larger
-	   one.  */
-	extui	a7, xh, 20, 12
-	extui	a8, yh, 20, 12
-	bltu	a7, a8, .Ladd_shiftx
-
-.Ladd_shifty:
-	/* Check if the smaller (or equal) exponent is zero.  */
-	bnone	yh, a6, .Ladd_yexpzero
-
-	/* Replace yh sign/exponent with 0x001.  */
-	or	yh, yh, a6
-	slli	yh, yh, 11
-	srli	yh, yh, 11
-
-.Ladd_yexpdiff:
-	/* Compute the exponent difference.  Optimize for difference < 32.  */
-	sub	a10, a7, a8
-	bgeui	a10, 32, .Ladd_bigshifty
-	
-	/* Shift yh/yl right by the exponent difference.  Any bits that are
-	   shifted out of yl are saved in a9 for rounding the result.  */
-	ssr	a10
-	movi	a9, 0
-	src	a9, yl, a9
-	src	yl, yh, yl
-	srl	yh, yh
-
-.Ladd_addy:
-	/* Do the 64-bit addition.  */
-	add	xl, xl, yl
-	add	xh, xh, yh
-	bgeu	xl, yl, 1f
-	addi	xh, xh, 1
-1:
-	/* Check if the add overflowed into the exponent.  */
-	extui	a10, xh, 20, 12
-	beq	a10, a7, .Ladd_round
-	mov	a8, a7
-	j	.Ladd_carry
-
-.Ladd_yexpzero:
-	/* y is a subnormal value.  Replace its sign/exponent with zero,
-	   i.e., no implicit "1.0", and increment the apparent exponent
-	   because subnormals behave as if they had the minimum (nonzero)
-	   exponent.  Test for the case when both exponents are zero.  */
-	slli	yh, yh, 12
-	srli	yh, yh, 12
-	bnone	xh, a6, .Ladd_bothexpzero
-	addi	a8, a8, 1
-	j	.Ladd_yexpdiff
-
-.Ladd_bothexpzero:
-	/* Both exponents are zero.  Handle this as a special case.  There
-	   is no need to shift or round, and the normal code for handling
-	   a carry into the exponent field will not work because it
-	   assumes there is an implicit "1.0" that needs to be added.  */
-	add	xl, xl, yl
-	add	xh, xh, yh
-	bgeu	xl, yl, 1f
-	addi	xh, xh, 1
-1:	leaf_return
-
-.Ladd_bigshifty:
-	/* Exponent difference > 64 -- just return the bigger value.  */
-	bgeui	a10, 64, 1b
-
-	/* Shift yh/yl right by the exponent difference.  Any bits that are
-	   shifted out are saved in a9 for rounding the result.  */
-	ssr	a10
-	sll	a11, yl		/* lost bits shifted out of yl */
-	src	a9, yh, yl
-	srl	yl, yh
-	movi	yh, 0
-	beqz	a11, .Ladd_addy
-	or	a9, a9, a10	/* any positive, nonzero value will work */
-	j	.Ladd_addy
-
-.Ladd_xexpzero:
-	/* Same as "yexpzero" except skip handling the case when both
-	   exponents are zero.  */
-	slli	xh, xh, 12
-	srli	xh, xh, 12
-	addi	a7, a7, 1
-	j	.Ladd_xexpdiff
-
-.Ladd_shiftx:
-	/* Same thing as the "shifty" code, but with x and y swapped.  Also,
-	   because the exponent difference is always nonzero in this version,
-	   the shift sequence can use SLL and skip loading a constant zero.  */
-	bnone	xh, a6, .Ladd_xexpzero
-
-	or	xh, xh, a6
-	slli	xh, xh, 11
-	srli	xh, xh, 11
-
-.Ladd_xexpdiff:
-	sub	a10, a8, a7
-	bgeui	a10, 32, .Ladd_bigshiftx
-	
-	ssr	a10
-	sll	a9, xl
-	src	xl, xh, xl
-	srl	xh, xh
-
-.Ladd_addx:
-	add	xl, xl, yl
-	add	xh, xh, yh
-	bgeu	xl, yl, 1f
-	addi	xh, xh, 1
-1:
-	/* Check if the add overflowed into the exponent.  */
-	extui	a10, xh, 20, 12
-	bne	a10, a8, .Ladd_carry
-
-.Ladd_round:
-	/* Round up if the leftover fraction is >= 1/2.  */
-	bgez	a9, 1f
-	addi	xl, xl, 1
-	beqz	xl, .Ladd_roundcarry
-
-	/* Check if the leftover fraction is exactly 1/2.  */
-	slli	a9, a9, 1
-	beqz	a9, .Ladd_exactlyhalf
-1:	leaf_return
-
-.Ladd_bigshiftx:
-	/* Mostly the same thing as "bigshifty"....  */
-	bgeui	a10, 64, .Ladd_returny
-
-	ssr	a10
-	sll	a11, xl
-	src	a9, xh, xl
-	srl	xl, xh
-	movi	xh, 0
-	beqz	a11, .Ladd_addx
-	or	a9, a9, a10
-	j	.Ladd_addx
-
-.Ladd_returny:
-	mov	xh, yh
-	mov	xl, yl
-	leaf_return
-
-.Ladd_carry:	
-	/* The addition has overflowed into the exponent field, so the
-	   value needs to be renormalized.  The mantissa of the result
-	   can be recovered by subtracting the original exponent and
-	   adding 0x100000 (which is the explicit "1.0" for the
-	   mantissa of the non-shifted operand -- the "1.0" for the
-	   shifted operand was already added).  The mantissa can then
-	   be shifted right by one bit.  The explicit "1.0" of the
-	   shifted mantissa then needs to be replaced by the exponent,
-	   incremented by one to account for the normalizing shift.
-	   It is faster to combine these operations: do the shift first
-	   and combine the additions and subtractions.  If x is the
-	   original exponent, the result is:
-	       shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
-	   or:
-	       shifted mantissa + ((x + 1) << 19)
-	   Note that the exponent is incremented here by leaving the
-	   explicit "1.0" of the mantissa in the exponent field.  */
-
-	/* Shift xh/xl right by one bit.  Save the lsb of xl.  */
-	mov	a10, xl
-	ssai	1
-	src	xl, xh, xl
-	srl	xh, xh
-
-	/* See explanation above.  The original exponent is in a8.  */
-	addi	a8, a8, 1
-	slli	a8, a8, 19
-	add	xh, xh, a8
-
-	/* Return an Infinity if the exponent overflowed.  */
-	ball	xh, a6, .Ladd_infinity
-	
-	/* Same thing as the "round" code except the msb of the leftover
-	   fraction is bit 0 of a10, with the rest of the fraction in a9.  */
-	bbci.l	a10, 0, 1f
-	addi	xl, xl, 1
-	beqz	xl, .Ladd_roundcarry
-	beqz	a9, .Ladd_exactlyhalf
-1:	leaf_return
-
-.Ladd_infinity:
-	/* Clear the mantissa.  */
-	movi	xl, 0
-	srli	xh, xh, 20
-	slli	xh, xh, 20
-
-	/* The sign bit may have been lost in a carry-out.  Put it back.  */
-	slli	a8, a8, 1
-	or	xh, xh, a8
-	leaf_return
-
-.Ladd_exactlyhalf:
-	/* Round down to the nearest even value.  */
-	srli	xl, xl, 1
-	slli	xl, xl, 1
-	leaf_return
-
-.Ladd_roundcarry:
-	/* xl is always zero when the rounding increment overflows, so
-	   there's no need to round it to an even value.  */
-	addi	xh, xh, 1
-	/* Overflow to the exponent is OK.  */
-	leaf_return
-
-
-	/* Subtraction */
-__subdf3_aux:
-	
-	/* Handle NaNs and Infinities.  (This code is placed before the
-	   start of the function just to keep it in range of the limited
-	   branch displacements.)  */
-
-.Lsub_xnan_or_inf:
-	/* If y is neither Infinity nor NaN, return x.  */
-	bnall	yh, a6, 1f
-	/* Both x and y are either NaN or Inf, so the result is NaN.  */
-	movi	a4, 0x80000	/* make it a quiet NaN */
-	or	xh, xh, a4
-1:	leaf_return
-
-.Lsub_ynan_or_inf:
-	/* Negate y and return it.  */
-	slli	a7, a6, 11
-	xor	xh, yh, a7
-	mov	xl, yl
-	leaf_return
-
-.Lsub_opposite_signs:
-	/* Operand signs differ.  Do an addition.  */
-	slli	a7, a6, 11
-	xor	yh, yh, a7
-	j	.Ladd_same_sign
-
-	.align	4
-	.global	__subdf3
-	.type	__subdf3, @function
-__subdf3:
-	leaf_entry sp, 16
-	movi	a6, 0x7ff00000
-
-	/* Check if the two operands have the same sign.  */
-	xor	a7, xh, yh
-	bltz	a7, .Lsub_opposite_signs
-
-.Lsub_same_sign:	
-	/* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
-	ball	xh, a6, .Lsub_xnan_or_inf
-	ball	yh, a6, .Lsub_ynan_or_inf
-
-	/* Compare the operands.  In contrast to addition, the entire
-	   value matters here.  */
-	extui	a7, xh, 20, 11
-	extui	a8, yh, 20, 11
-	bltu	xh, yh, .Lsub_xsmaller
-	beq	xh, yh, .Lsub_compare_low
-
-.Lsub_ysmaller:
-	/* Check if the smaller (or equal) exponent is zero.  */
-	bnone	yh, a6, .Lsub_yexpzero
-
-	/* Replace yh sign/exponent with 0x001.  */
-	or	yh, yh, a6
-	slli	yh, yh, 11
-	srli	yh, yh, 11
-
-.Lsub_yexpdiff:
-	/* Compute the exponent difference.  Optimize for difference < 32.  */
-	sub	a10, a7, a8
-	bgeui	a10, 32, .Lsub_bigshifty
-	
-	/* Shift yh/yl right by the exponent difference.  Any bits that are
-	   shifted out of yl are saved in a9 for rounding the result.  */
-	ssr	a10
-	movi	a9, 0
-	src	a9, yl, a9
-	src	yl, yh, yl
-	srl	yh, yh
-
-.Lsub_suby:
-	/* Do the 64-bit subtraction.  */
-	sub	xh, xh, yh
-	bgeu	xl, yl, 1f
-	addi	xh, xh, -1
-1:	sub	xl, xl, yl
-
-	/* Subtract the leftover bits in a9 from zero and propagate any
-	   borrow from xh/xl.  */
-	neg	a9, a9
-	beqz	a9, 1f
-	addi	a5, xh, -1
-	moveqz	xh, a5, xl
-	addi	xl, xl, -1
-1:
-	/* Check if the subtract underflowed into the exponent.  */
-	extui	a10, xh, 20, 11
-	beq	a10, a7, .Lsub_round
-	j	.Lsub_borrow
-
-.Lsub_compare_low:
-	/* The high words are equal.  Compare the low words.  */
-	bltu	xl, yl, .Lsub_xsmaller
-	bltu	yl, xl, .Lsub_ysmaller
-	/* The operands are equal.  Return 0.0.  */
-	movi	xh, 0
-	movi	xl, 0
-1:	leaf_return
-
-.Lsub_yexpzero:
-	/* y is a subnormal value.  Replace its sign/exponent with zero,
-	   i.e., no implicit "1.0".  Unless x is also a subnormal, increment
-	   y's apparent exponent because subnormals behave as if they had
-	   the minimum (nonzero) exponent.  */
-	slli	yh, yh, 12
-	srli	yh, yh, 12
-	bnone	xh, a6, .Lsub_yexpdiff
-	addi	a8, a8, 1
-	j	.Lsub_yexpdiff
-
-.Lsub_bigshifty:
-	/* Exponent difference > 64 -- just return the bigger value.  */
-	bgeui	a10, 64, 1b
-
-	/* Shift yh/yl right by the exponent difference.  Any bits that are
-	   shifted out are saved in a9 for rounding the result.  */
-	ssr	a10
-	sll	a11, yl		/* lost bits shifted out of yl */
-	src	a9, yh, yl
-	srl	yl, yh
-	movi	yh, 0
-	beqz	a11, .Lsub_suby
-	or	a9, a9, a10	/* any positive, nonzero value will work */
-	j	.Lsub_suby
-
-.Lsub_xsmaller:
-	/* Same thing as the "ysmaller" code, but with x and y swapped and
-	   with y negated.  */
-	bnone	xh, a6, .Lsub_xexpzero
-
-	or	xh, xh, a6
-	slli	xh, xh, 11
-	srli	xh, xh, 11
-
-.Lsub_xexpdiff:
-	sub	a10, a8, a7
-	bgeui	a10, 32, .Lsub_bigshiftx
-	
-	ssr	a10
-	movi	a9, 0
-	src	a9, xl, a9
-	src	xl, xh, xl
-	srl	xh, xh
-
-	/* Negate y.  */
-	slli	a11, a6, 11
-	xor	yh, yh, a11
-
-.Lsub_subx:
-	sub	xl, yl, xl
-	sub	xh, yh, xh
-	bgeu	yl, xl, 1f
-	addi	xh, xh, -1
-1:
-	/* Subtract the leftover bits in a9 from zero and propagate any
-	   borrow from xh/xl.  */
-	neg	a9, a9
-	beqz	a9, 1f
-	addi	a5, xh, -1
-	moveqz	xh, a5, xl
-	addi	xl, xl, -1
-1:
-	/* Check if the subtract underflowed into the exponent.  */
-	extui	a10, xh, 20, 11
-	bne	a10, a8, .Lsub_borrow
-
-.Lsub_round:
-	/* Round up if the leftover fraction is >= 1/2.  */
-	bgez	a9, 1f
-	addi	xl, xl, 1
-	beqz	xl, .Lsub_roundcarry
-
-	/* Check if the leftover fraction is exactly 1/2.  */
-	slli	a9, a9, 1
-	beqz	a9, .Lsub_exactlyhalf
-1:	leaf_return
-
-.Lsub_xexpzero:
-	/* Same as "yexpzero".  */
-	slli	xh, xh, 12
-	srli	xh, xh, 12
-	bnone	yh, a6, .Lsub_xexpdiff
-	addi	a7, a7, 1
-	j	.Lsub_xexpdiff
-
-.Lsub_bigshiftx:
-	/* Mostly the same thing as "bigshifty", but with the sign bit of the
-	   shifted value set so that the subsequent subtraction flips the
-	   sign of y.  */
-	bgeui	a10, 64, .Lsub_returny
-
-	ssr	a10
-	sll	a11, xl
-	src	a9, xh, xl
-	srl	xl, xh
-	slli	xh, a6, 11	/* set sign bit of xh */
-	beqz	a11, .Lsub_subx
-	or	a9, a9, a10
-	j	.Lsub_subx
-
-.Lsub_returny:
-	/* Negate and return y.  */
-	slli	a7, a6, 11
-	xor	xh, yh, a7
-	mov	xl, yl
-	leaf_return
-
-.Lsub_borrow:	
-	/* The subtraction has underflowed into the exponent field, so the
-	   value needs to be renormalized.  Shift the mantissa left as
-	   needed to remove any leading zeros and adjust the exponent
-	   accordingly.  If the exponent is not large enough to remove
-	   all the leading zeros, the result will be a subnormal value.  */
-
-	slli	a8, xh, 12
-	beqz	a8, .Lsub_xhzero
-	do_nsau	a6, a8, a7, a11
-	srli	a8, a8, 12
-	bge	a6, a10, .Lsub_subnormal
-	addi	a6, a6, 1
-
-.Lsub_shift_lt32:
-	/* Shift the mantissa (a8/xl/a9) left by a6.  */
-	ssl	a6
-	src	a8, a8, xl
-	src	xl, xl, a9
-	sll	a9, a9
-
-	/* Combine the shifted mantissa with the sign and exponent,
-	   decrementing the exponent by a6.  (The exponent has already
-	   been decremented by one due to the borrow from the subtraction,
-	   but adding the mantissa will increment the exponent by one.)  */
-	srli	xh, xh, 20
-	sub	xh, xh, a6
-	slli	xh, xh, 20
-	add	xh, xh, a8
-	j	.Lsub_round
-
-.Lsub_exactlyhalf:
-	/* Round down to the nearest even value.  */
-	srli	xl, xl, 1
-	slli	xl, xl, 1
-	leaf_return
-
-.Lsub_roundcarry:
-	/* xl is always zero when the rounding increment overflows, so
-	   there's no need to round it to an even value.  */
-	addi	xh, xh, 1
-	/* Overflow to the exponent is OK.  */
-	leaf_return
-
-.Lsub_xhzero:
-	/* When normalizing the result, all the mantissa bits in the high
-	   word are zero.  Shift by "20 + (leading zero count of xl) + 1".  */
-	do_nsau	a6, xl, a7, a11
-	addi	a6, a6, 21
-	blt	a10, a6, .Lsub_subnormal
-
-.Lsub_normalize_shift:
-	bltui	a6, 32, .Lsub_shift_lt32
-
-	ssl	a6
-	src	a8, xl, a9
-	sll	xl, a9
-	movi	a9, 0
-
-	srli	xh, xh, 20
-	sub	xh, xh, a6
-	slli	xh, xh, 20
-	add	xh, xh, a8
-	j	.Lsub_round
-
-.Lsub_subnormal:
-	/* The exponent is too small to shift away all the leading zeros.
-	   Set a6 to the current exponent (which has already been
-	   decremented by the borrow) so that the exponent of the result
-	   will be zero.  Do not add 1 to a6 in this case, because: (1)
-	   adding the mantissa will not increment the exponent, so there is
-	   no need to subtract anything extra from the exponent to
-	   compensate, and (2) the effective exponent of a subnormal is 1
-	   not 0 so the shift amount must be 1 smaller than normal. */
-	mov	a6, a10
-	j	.Lsub_normalize_shift
-
-#endif /* L_addsubdf3 */
-
-#ifdef L_muldf3
-
-	/* Multiplication */
-#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
-#define XCHAL_NO_MUL 1
-#endif
-
-__muldf3_aux:
-
-	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
-	   (This code is placed before the start of the function just to
-	   keep it in range of the limited branch displacements.)  */
-
-.Lmul_xexpzero:
-	/* Clear the sign bit of x.  */
-	slli	xh, xh, 1
-	srli	xh, xh, 1
-
-	/* If x is zero, return zero.  */
-	or	a10, xh, xl
-	beqz	a10, .Lmul_return_zero
-
-	/* Normalize x.  Adjust the exponent in a8.  */
-	beqz	xh, .Lmul_xh_zero
-	do_nsau	a10, xh, a11, a12
-	addi	a10, a10, -11
-	ssl	a10
-	src	xh, xh, xl
-	sll	xl, xl
-	movi	a8, 1
-	sub	a8, a8, a10
-	j	.Lmul_xnormalized	
-.Lmul_xh_zero:
-	do_nsau	a10, xl, a11, a12
-	addi	a10, a10, -11
-	movi	a8, -31
-	sub	a8, a8, a10
-	ssl	a10
-	bltz	a10, .Lmul_xl_srl
-	sll	xh, xl
-	movi	xl, 0
-	j	.Lmul_xnormalized
-.Lmul_xl_srl:
-	srl	xh, xl
-	sll	xl, xl
-	j	.Lmul_xnormalized
-	
-.Lmul_yexpzero:
-	/* Clear the sign bit of y.  */
-	slli	yh, yh, 1
-	srli	yh, yh, 1
-
-	/* If y is zero, return zero.  */
-	or	a10, yh, yl
-	beqz	a10, .Lmul_return_zero
-
-	/* Normalize y.  Adjust the exponent in a9.  */
-	beqz	yh, .Lmul_yh_zero
-	do_nsau	a10, yh, a11, a12
-	addi	a10, a10, -11
-	ssl	a10
-	src	yh, yh, yl
-	sll	yl, yl
-	movi	a9, 1
-	sub	a9, a9, a10
-	j	.Lmul_ynormalized	
-.Lmul_yh_zero:
-	do_nsau	a10, yl, a11, a12
-	addi	a10, a10, -11
-	movi	a9, -31
-	sub	a9, a9, a10
-	ssl	a10
-	bltz	a10, .Lmul_yl_srl
-	sll	yh, yl
-	movi	yl, 0
-	j	.Lmul_ynormalized
-.Lmul_yl_srl:
-	srl	yh, yl
-	sll	yl, yl
-	j	.Lmul_ynormalized	
-
-.Lmul_return_zero:
-	/* Return zero with the appropriate sign bit.  */
-	srli	xh, a7, 31
-	slli	xh, xh, 31
-	movi	xl, 0
-	j	.Lmul_done
-
-.Lmul_xnan_or_inf:
-	/* If y is zero, return NaN.  */
-	bnez	yl, 1f
-	slli	a8, yh, 1
-	bnez	a8, 1f
-	movi	a4, 0x80000	/* make it a quiet NaN */
-	or	xh, xh, a4
-	j	.Lmul_done
-1:
-	/* If y is NaN, return y.  */
-	bnall	yh, a6, .Lmul_returnx
-	slli	a8, yh, 12
-	or	a8, a8, yl
-	beqz	a8, .Lmul_returnx
-
-.Lmul_returny:
-	mov	xh, yh
-	mov	xl, yl
-
-.Lmul_returnx:
-	/* Set the sign bit and return.  */
-	extui	a7, a7, 31, 1
-	slli	xh, xh, 1
-	ssai	1
-	src	xh, a7, xh
-	j	.Lmul_done
-
-.Lmul_ynan_or_inf:
-	/* If x is zero, return NaN.  */
-	bnez	xl, .Lmul_returny
-	slli	a8, xh, 1
-	bnez	a8, .Lmul_returny
-	movi	a7, 0x80000	/* make it a quiet NaN */
-	or	xh, yh, a7
-	j	.Lmul_done
-
-	.align	4
-	.global	__muldf3
-	.type	__muldf3, @function
-__muldf3:
-#if __XTENSA_CALL0_ABI__
-	leaf_entry sp, 32
-	addi	sp, sp, -32
-	s32i	a12, sp, 16
-	s32i	a13, sp, 20
-	s32i	a14, sp, 24
-	s32i	a15, sp, 28
-#elif XCHAL_NO_MUL
-	/* This is not really a leaf function; allocate enough stack space
-	   to allow CALL12s to a helper function.  */
-	leaf_entry sp, 64
-#else
-	leaf_entry sp, 32
-#endif
-	movi	a6, 0x7ff00000
-
-	/* Get the sign of the result.  */
-	xor	a7, xh, yh
-
-	/* Check for NaN and infinity.  */
-	ball	xh, a6, .Lmul_xnan_or_inf
-	ball	yh, a6, .Lmul_ynan_or_inf
-
-	/* Extract the exponents.  */
-	extui	a8, xh, 20, 11
-	extui	a9, yh, 20, 11
-
-	beqz	a8, .Lmul_xexpzero
-.Lmul_xnormalized:	
-	beqz	a9, .Lmul_yexpzero
-.Lmul_ynormalized:	
-
-	/* Add the exponents.  */
-	add	a8, a8, a9
-
-	/* Replace sign/exponent fields with explicit "1.0".  */
-	movi	a10, 0x1fffff
-	or	xh, xh, a6
-	and	xh, xh, a10
-	or	yh, yh, a6
-	and	yh, yh, a10
-
-	/* Multiply 64x64 to 128 bits.  The result ends up in xh/xl/a6.
-	   The least-significant word of the result is thrown away except
-	   that if it is nonzero, the lsb of a6 is set to 1.  */
-#if XCHAL_HAVE_MUL32_HIGH
-
-	/* Compute a6 with any carry-outs in a10.  */
-	movi	a10, 0
-	mull	a6, xl, yh
-	mull	a11, xh, yl
-	add	a6, a6, a11
-	bgeu	a6, a11, 1f
-	addi	a10, a10, 1
-1:
-	muluh	a11, xl, yl
-	add	a6, a6, a11
-	bgeu	a6, a11, 1f
-	addi	a10, a10, 1
-1:	
-	/* If the low word of the result is nonzero, set the lsb of a6.  */
-	mull	a11, xl, yl
-	beqz	a11, 1f
-	movi	a9, 1
-	or	a6, a6, a9
-1:
-	/* Compute xl with any carry-outs in a9.  */
-	movi	a9, 0
-	mull	a11, xh, yh
-	add	a10, a10, a11
-	bgeu	a10, a11, 1f
-	addi	a9, a9, 1
-1:	
-	muluh	a11, xh, yl
-	add	a10, a10, a11
-	bgeu	a10, a11, 1f
-	addi	a9, a9, 1
-1:	
-	muluh	xl, xl, yh
-	add	xl, xl, a10
-	bgeu	xl, a10, 1f
-	addi	a9, a9, 1
-1:
-	/* Compute xh.  */
-	muluh	xh, xh, yh
-	add	xh, xh, a9
-
-#else /* ! XCHAL_HAVE_MUL32_HIGH */
-
-	/* Break the inputs into 16-bit chunks and compute 16 32-bit partial
-	   products.  These partial products are:
-
-		0 xll * yll
-
-		1 xll * ylh
-		2 xlh * yll
-
-		3 xll * yhl
-		4 xlh * ylh
-		5 xhl * yll
-
-		6 xll * yhh
-		7 xlh * yhl
-		8 xhl * ylh
-		9 xhh * yll
-
-		10 xlh * yhh
-		11 xhl * yhl
-		12 xhh * ylh
-
-		13 xhl * yhh
-		14 xhh * yhl
-
-		15 xhh * yhh
-
-	   where the input chunks are (hh, hl, lh, ll).  If using the Mul16
-	   or Mul32 multiplier options, these input chunks must be stored in
-	   separate registers.  For Mac16, the UMUL.AA.* opcodes can specify
-	   that the inputs come from either half of the registers, so there
-	   is no need to shift them out ahead of time.  If there is no
-	   multiply hardware, the 16-bit chunks can be extracted when setting
-	   up the arguments to the separate multiply function.  */
-
-	/* Save a7 since it is needed to hold a temporary value.  */
-	s32i	a7, sp, 4
-#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
-	/* Calling a separate multiply function will clobber a0 and requires
-	   use of a8 as a temporary, so save those values now.  (The function
-	   uses a custom ABI so nothing else needs to be saved.)  */
-	s32i	a0, sp, 0
-	s32i	a8, sp, 8
-#endif
-
-#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
-
-#define xlh a12
-#define ylh a13
-#define xhh a14
-#define yhh a15
-
-	/* Get the high halves of the inputs into registers.  */
-	srli	xlh, xl, 16
-	srli	ylh, yl, 16
-	srli	xhh, xh, 16
-	srli	yhh, yh, 16
-
-#define xll xl
-#define yll yl
-#define xhl xh
-#define yhl yh
-
-#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
-	/* Clear the high halves of the inputs.  This does not matter
-	   for MUL16 because the high bits are ignored.  */
-	extui	xl, xl, 0, 16
-	extui	xh, xh, 0, 16
-	extui	yl, yl, 0, 16
-	extui	yh, yh, 0, 16
-#endif
-#endif /* MUL16 || MUL32 */
-
-
-#if XCHAL_HAVE_MUL16
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-	mul16u	dst, xreg ## xhalf, yreg ## yhalf
-
-#elif XCHAL_HAVE_MUL32
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-	mull	dst, xreg ## xhalf, yreg ## yhalf
-
-#elif XCHAL_HAVE_MAC16
-
-/* The preprocessor insists on inserting a space when concatenating after
-   a period in the definition of do_mul below.  These macros are a workaround
-   using underscores instead of periods when doing the concatenation.  */
-#define umul_aa_ll umul.aa.ll
-#define umul_aa_lh umul.aa.lh
-#define umul_aa_hl umul.aa.hl
-#define umul_aa_hh umul.aa.hh
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
-	rsr	dst, ACCLO
-
-#else /* no multiply hardware */
-	
-#define set_arg_l(dst, src) \
-	extui	dst, src, 0, 16
-#define set_arg_h(dst, src) \
-	srli	dst, src, 16
-
-#if __XTENSA_CALL0_ABI__
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-	set_arg_ ## xhalf (a13, xreg); \
-	set_arg_ ## yhalf (a14, yreg); \
-	call0	.Lmul_mulsi3; \
-	mov	dst, a12
-#else
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-	set_arg_ ## xhalf (a14, xreg); \
-	set_arg_ ## yhalf (a15, yreg); \
-	call12	.Lmul_mulsi3; \
-	mov	dst, a14
-#endif /* __XTENSA_CALL0_ABI__ */
-
-#endif /* no multiply hardware */
-
-	/* Add pp1 and pp2 into a10 with carry-out in a9.  */
-	do_mul(a10, xl, l, yl, h)	/* pp 1 */
-	do_mul(a11, xl, h, yl, l)	/* pp 2 */
-	movi	a9, 0
-	add	a10, a10, a11
-	bgeu	a10, a11, 1f
-	addi	a9, a9, 1
-1:
-	/* Initialize a6 with a9/a10 shifted into position.  Note that
-	   this value can be safely incremented without any carry-outs.  */
-	ssai	16
-	src	a6, a9, a10
-
-	/* Compute the low word into a10.  */
-	do_mul(a11, xl, l, yl, l)	/* pp 0 */
-	sll	a10, a10
-	add	a10, a10, a11
-	bgeu	a10, a11, 1f
-	addi	a6, a6, 1
-1:
-	/* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
-	   This is good enough to determine the low half of a6, so that any
-	   nonzero bits from the low word of the result can be collapsed
-	   into a6, freeing up a register.  */
-	movi	a9, 0
-	do_mul(a11, xl, l, yh, l)	/* pp 3 */
-	add	a6, a6, a11
-	bgeu	a6, a11, 1f
-	addi	a9, a9, 1
-1:
-	do_mul(a11, xl, h, yl, h)	/* pp 4 */
-	add	a6, a6, a11
-	bgeu	a6, a11, 1f
-	addi	a9, a9, 1
-1:
-	do_mul(a11, xh, l, yl, l)	/* pp 5 */
-	add	a6, a6, a11
-	bgeu	a6, a11, 1f
-	addi	a9, a9, 1
-1:
-	/* Collapse any nonzero bits from the low word into a6.  */
-	beqz	a10, 1f
-	movi	a11, 1
-	or	a6, a6, a11
-1:
-	/* Add pp6-9 into a11 with carry-outs in a10.  */
-	do_mul(a7, xl, l, yh, h)	/* pp 6 */
-	do_mul(a11, xh, h, yl, l)	/* pp 9 */
-	movi	a10, 0
-	add	a11, a11, a7
-	bgeu	a11, a7, 1f
-	addi	a10, a10, 1
-1:	
-	do_mul(a7, xl, h, yh, l)	/* pp 7 */
-	add	a11, a11, a7
-	bgeu	a11, a7, 1f
-	addi	a10, a10, 1
-1:	
-	do_mul(a7, xh, l, yl, h)	/* pp 8 */
-	add	a11, a11, a7
-	bgeu	a11, a7, 1f
-	addi	a10, a10, 1
-1:	
-	/* Shift a10/a11 into position, and add low half of a11 to a6.  */
-	src	a10, a10, a11
-	add	a10, a10, a9
-	sll	a11, a11
-	add	a6, a6, a11
-	bgeu	a6, a11, 1f
-	addi	a10, a10, 1
-1:
-	/* Add pp10-12 into xl with carry-outs in a9.  */
-	movi	a9, 0
-	do_mul(xl, xl, h, yh, h)	/* pp 10 */
-	add	xl, xl, a10
-	bgeu	xl, a10, 1f
-	addi	a9, a9, 1
-1:
-	do_mul(a10, xh, l, yh, l)	/* pp 11 */
-	add	xl, xl, a10
-	bgeu	xl, a10, 1f
-	addi	a9, a9, 1
-1:
-	do_mul(a10, xh, h, yl, h)	/* pp 12 */
-	add	xl, xl, a10
-	bgeu	xl, a10, 1f
-	addi	a9, a9, 1
-1:
-	/* Add pp13-14 into a11 with carry-outs in a10.  */
-	do_mul(a11, xh, l, yh, h)	/* pp 13 */
-	do_mul(a7, xh, h, yh, l)	/* pp 14 */
-	movi	a10, 0
-	add	a11, a11, a7
-	bgeu	a11, a7, 1f
-	addi	a10, a10, 1
-1:
-	/* Shift a10/a11 into position, and add low half of a11 to a6.  */
-	src	a10, a10, a11
-	add	a10, a10, a9
-	sll	a11, a11
-	add	xl, xl, a11
-	bgeu	xl, a11, 1f
-	addi	a10, a10, 1
-1:
-	/* Compute xh.  */
-	do_mul(xh, xh, h, yh, h)	/* pp 15 */
-	add	xh, xh, a10
-
-	/* Restore values saved on the stack during the multiplication.  */
-	l32i	a7, sp, 4
-#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
-	l32i	a0, sp, 0
-	l32i	a8, sp, 8
-#endif
-#endif /* ! XCHAL_HAVE_MUL32_HIGH */
-
-	/* Shift left by 12 bits, unless there was a carry-out from the
-	   multiply, in which case, shift by 11 bits and increment the
-	   exponent.  Note: It is convenient to use the constant 0x3ff
-	   instead of 0x400 when removing the extra exponent bias (so that
-	   it is easy to construct 0x7fe for the overflow check).  Reverse
-	   the logic here to decrement the exponent sum by one unless there
-	   was a carry-out.  */
-	movi	a4, 11
-	srli	a5, xh, 21 - 12
-	bnez	a5, 1f
-	addi	a4, a4, 1
-	addi	a8, a8, -1
-1:	ssl	a4
-	src	xh, xh, xl
-	src	xl, xl, a6
-	sll	a6, a6
-
-	/* Subtract the extra bias from the exponent sum (plus one to account
-	   for the explicit "1.0" of the mantissa that will be added to the
-	   exponent in the final result).  */
-	movi	a4, 0x3ff
-	sub	a8, a8, a4
-	
-	/* Check for over/underflow.  The value in a8 is one less than the
-	   final exponent, so values in the range 0..7fd are OK here.  */
-	slli	a4, a4, 1	/* 0x7fe */
-	bgeu	a8, a4, .Lmul_overflow
-	
-.Lmul_round:
-	/* Round.  */
-	bgez	a6, .Lmul_rounded
-	addi	xl, xl, 1
-	beqz	xl, .Lmul_roundcarry
-	slli	a6, a6, 1
-	beqz	a6, .Lmul_exactlyhalf
-
-.Lmul_rounded:
-	/* Add the exponent to the mantissa.  */
-	slli	a8, a8, 20
-	add	xh, xh, a8
-
-.Lmul_addsign:
-	/* Add the sign bit.  */
-	srli	a7, a7, 31
-	slli	a7, a7, 31
-	or	xh, xh, a7
-
-.Lmul_done:
-#if __XTENSA_CALL0_ABI__
-	l32i	a12, sp, 16
-	l32i	a13, sp, 20
-	l32i	a14, sp, 24
-	l32i	a15, sp, 28
-	addi	sp, sp, 32
-#endif
-	leaf_return
-
-.Lmul_exactlyhalf:
-	/* Round down to the nearest even value.  */
-	srli	xl, xl, 1
-	slli	xl, xl, 1
-	j	.Lmul_rounded
-
-.Lmul_roundcarry:
-	/* xl is always zero when the rounding increment overflows, so
-	   there's no need to round it to an even value.  */
-	addi	xh, xh, 1
-	/* Overflow is OK -- it will be added to the exponent.  */
-	j	.Lmul_rounded
-
-.Lmul_overflow:
-	bltz	a8, .Lmul_underflow
-	/* Return +/- Infinity.  */
-	addi	a8, a4, 1	/* 0x7ff */
-	slli	xh, a8, 20
-	movi	xl, 0
-	j	.Lmul_addsign
-
-.Lmul_underflow:
-	/* Create a subnormal value, where the exponent field contains zero,
-	   but the effective exponent is 1.  The value of a8 is one less than
-	   the actual exponent, so just negate it to get the shift amount.  */
-	neg	a8, a8
-	mov	a9, a6
-	ssr	a8
-	bgeui	a8, 32, .Lmul_bigshift
-	
-	/* Shift xh/xl right.  Any bits that are shifted out of xl are saved
-	   in a6 (combined with the shifted-out bits currently in a6) for
-	   rounding the result.  */
-	sll	a6, xl
-	src	xl, xh, xl
-	srl	xh, xh
-	j	1f
-
-.Lmul_bigshift:
-	bgeui	a8, 64, .Lmul_flush_to_zero
-	sll	a10, xl		/* lost bits shifted out of xl */
-	src	a6, xh, xl
-	srl	xl, xh
-	movi	xh, 0
-	or	a9, a9, a10
-
-	/* Set the exponent to zero.  */
-1:	movi	a8, 0
-
-	/* Pack any nonzero bits shifted out into a6.  */
-	beqz	a9, .Lmul_round
-	movi	a9, 1
-	or	a6, a6, a9
-	j	.Lmul_round
-	
-.Lmul_flush_to_zero:
-	/* Return zero with the appropriate sign bit.  */
-	srli	xh, a7, 31
-	slli	xh, xh, 31
-	movi	xl, 0
-	j	.Lmul_done
-
-#if XCHAL_NO_MUL
-	
-	/* For Xtensa processors with no multiply hardware, this simplified
-	   version of _mulsi3 is used for multiplying 16-bit chunks of
-	   the floating-point mantissas.  When using CALL0, this function
-	   uses a custom ABI: the inputs are passed in a13 and a14, the
-	   result is returned in a12, and a8 and a15 are clobbered.  */
-	.align	4
-.Lmul_mulsi3:
-	leaf_entry sp, 16
-	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
-	movi	\dst, 0
-1:	add	\tmp1, \src2, \dst
-	extui	\tmp2, \src1, 0, 1
-	movnez	\dst, \tmp1, \tmp2
-
-	do_addx2 \tmp1, \src2, \dst, \tmp1
-	extui	\tmp2, \src1, 1, 1
-	movnez	\dst, \tmp1, \tmp2
-
-	do_addx4 \tmp1, \src2, \dst, \tmp1
-	extui	\tmp2, \src1, 2, 1
-	movnez	\dst, \tmp1, \tmp2
-
-	do_addx8 \tmp1, \src2, \dst, \tmp1
-	extui	\tmp2, \src1, 3, 1
-	movnez	\dst, \tmp1, \tmp2
-
-	srli	\src1, \src1, 4
-	slli	\src2, \src2, 4
-	bnez	\src1, 1b
-	.endm
-#if __XTENSA_CALL0_ABI__
-	mul_mulsi3_body a12, a13, a14, a15, a8
-#else
-	/* The result will be written into a2, so save that argument in a4.  */
-	mov	a4, a2
-	mul_mulsi3_body a2, a4, a3, a5, a6
-#endif
-	leaf_return
-#endif /* XCHAL_NO_MUL */
-#endif /* L_muldf3 */
-
-#ifdef L_divdf3
-
-	/* Division */
-__divdf3_aux:
-
-	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
-	   (This code is placed before the start of the function just to
-	   keep it in range of the limited branch displacements.)  */
-
-.Ldiv_yexpzero:
-	/* Clear the sign bit of y.  */
-	slli	yh, yh, 1
-	srli	yh, yh, 1
-
-	/* Check for division by zero.  */
-	or	a10, yh, yl
-	beqz	a10, .Ldiv_yzero
-
-	/* Normalize y.  Adjust the exponent in a9.  */
-	beqz	yh, .Ldiv_yh_zero
-	do_nsau	a10, yh, a11, a9
-	addi	a10, a10, -11
-	ssl	a10
-	src	yh, yh, yl
-	sll	yl, yl
-	movi	a9, 1
-	sub	a9, a9, a10
-	j	.Ldiv_ynormalized	
-.Ldiv_yh_zero:
-	do_nsau	a10, yl, a11, a9
-	addi	a10, a10, -11
-	movi	a9, -31
-	sub	a9, a9, a10
-	ssl	a10
-	bltz	a10, .Ldiv_yl_srl
-	sll	yh, yl
-	movi	yl, 0
-	j	.Ldiv_ynormalized
-.Ldiv_yl_srl:
-	srl	yh, yl
-	sll	yl, yl
-	j	.Ldiv_ynormalized	
-
-.Ldiv_yzero:
-	/* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
-	slli	xh, xh, 1
-	srli	xh, xh, 1
-	or	xl, xl, xh
-	srli	xh, a7, 31
-	slli	xh, xh, 31
-	or	xh, xh, a6
-	bnez	xl, 1f
-	movi	a4, 0x80000	/* make it a quiet NaN */
-	or	xh, xh, a4
-1:	movi	xl, 0
-	leaf_return
-
-.Ldiv_xexpzero:
-	/* Clear the sign bit of x.  */
-	slli	xh, xh, 1
-	srli	xh, xh, 1
-
-	/* If x is zero, return zero.  */
-	or	a10, xh, xl
-	beqz	a10, .Ldiv_return_zero
-
-	/* Normalize x.  Adjust the exponent in a8.  */
-	beqz	xh, .Ldiv_xh_zero
-	do_nsau	a10, xh, a11, a8
-	addi	a10, a10, -11
-	ssl	a10
-	src	xh, xh, xl
-	sll	xl, xl
-	movi	a8, 1
-	sub	a8, a8, a10
-	j	.Ldiv_xnormalized	
-.Ldiv_xh_zero:
-	do_nsau	a10, xl, a11, a8
-	addi	a10, a10, -11
-	movi	a8, -31
-	sub	a8, a8, a10
-	ssl	a10
-	bltz	a10, .Ldiv_xl_srl
-	sll	xh, xl
-	movi	xl, 0
-	j	.Ldiv_xnormalized
-.Ldiv_xl_srl:
-	srl	xh, xl
-	sll	xl, xl
-	j	.Ldiv_xnormalized
-	
-.Ldiv_return_zero:
-	/* Return zero with the appropriate sign bit.  */
-	srli	xh, a7, 31
-	slli	xh, xh, 31
-	movi	xl, 0
-	leaf_return
-
-.Ldiv_xnan_or_inf:
-	/* Set the sign bit of the result.  */
-	srli	a7, yh, 31
-	slli	a7, a7, 31
-	xor	xh, xh, a7
-	/* If y is NaN or Inf, return NaN.  */
-	bnall	yh, a6, 1f
-	movi	a4, 0x80000	/* make it a quiet NaN */
-	or	xh, xh, a4
-1:	leaf_return
-
-.Ldiv_ynan_or_inf:
-	/* If y is Infinity, return zero.  */
-	slli	a8, yh, 12
-	or	a8, a8, yl
-	beqz	a8, .Ldiv_return_zero
-	/* y is NaN; return it.  */
-	mov	xh, yh
-	mov	xl, yl
-	leaf_return
-
-.Ldiv_highequal1:
-	bltu	xl, yl, 2f
-	j	3f
-
-	.align	4
-	.global	__divdf3
-	.type	__divdf3, @function
-__divdf3:
-	leaf_entry sp, 16
-	movi	a6, 0x7ff00000
-
-	/* Get the sign of the result.  */
-	xor	a7, xh, yh
-
-	/* Check for NaN and infinity.  */
-	ball	xh, a6, .Ldiv_xnan_or_inf
-	ball	yh, a6, .Ldiv_ynan_or_inf
-
-	/* Extract the exponents.  */
-	extui	a8, xh, 20, 11
-	extui	a9, yh, 20, 11
-
-	beqz	a9, .Ldiv_yexpzero
-.Ldiv_ynormalized:	
-	beqz	a8, .Ldiv_xexpzero
-.Ldiv_xnormalized:	
-
-	/* Subtract the exponents.  */
-	sub	a8, a8, a9
-
-	/* Replace sign/exponent fields with explicit "1.0".  */
-	movi	a10, 0x1fffff
-	or	xh, xh, a6
-	and	xh, xh, a10
-	or	yh, yh, a6
-	and	yh, yh, a10
-
-	/* Set SAR for left shift by one.  */
-	ssai	(32 - 1)
-
-	/* The first digit of the mantissa division must be a one.
-	   Shift x (and adjust the exponent) as needed to make this true.  */
-	bltu	yh, xh, 3f
-	beq	yh, xh, .Ldiv_highequal1
-2:	src	xh, xh, xl
-	sll	xl, xl
-	addi	a8, a8, -1
-3:
-	/* Do the first subtraction and shift.  */
-	sub	xh, xh, yh
-	bgeu	xl, yl, 1f
-	addi	xh, xh, -1
-1:	sub	xl, xl, yl
-	src	xh, xh, xl
-	sll	xl, xl
-
-	/* Put the quotient into a10/a11.  */
-	movi	a10, 0
-	movi	a11, 1
-
-	/* Divide one bit at a time for 52 bits.  */
-	movi	a9, 52
-#if XCHAL_HAVE_LOOPS
-	loop	a9, .Ldiv_loopend
-#endif
-.Ldiv_loop:
-	/* Shift the quotient << 1.  */
-	src	a10, a10, a11
-	sll	a11, a11
-
-	/* Is this digit a 0 or 1?  */
-	bltu	xh, yh, 3f
-	beq	xh, yh, .Ldiv_highequal2
-
-	/* Output a 1 and subtract.  */
-2:	addi	a11, a11, 1
-	sub	xh, xh, yh
-	bgeu	xl, yl, 1f
-	addi	xh, xh, -1
-1:	sub	xl, xl, yl
-
-	/* Shift the dividend << 1.  */
-3:	src	xh, xh, xl
-	sll	xl, xl
-
-#if !XCHAL_HAVE_LOOPS
-	addi	a9, a9, -1
-	bnez	a9, .Ldiv_loop
-#endif
-.Ldiv_loopend:
-
-	/* Add the exponent bias (less one to account for the explicit "1.0"
-	   of the mantissa that will be added to the exponent in the final
-	   result).  */
-	movi	a9, 0x3fe
-	add	a8, a8, a9
-	
-	/* Check for over/underflow.  The value in a8 is one less than the
-	   final exponent, so values in the range 0..7fd are OK here.  */
-	addmi	a9, a9, 0x400	/* 0x7fe */
-	bgeu	a8, a9, .Ldiv_overflow
-
-.Ldiv_round:
-	/* Round.  The remainder (<< 1) is in xh/xl.  */
-	bltu	xh, yh, .Ldiv_rounded
-	beq	xh, yh, .Ldiv_highequal3
-.Ldiv_roundup:
-	addi	a11, a11, 1
-	beqz	a11, .Ldiv_roundcarry
-
-.Ldiv_rounded:
-	mov	xl, a11
-	/* Add the exponent to the mantissa.  */
-	slli	a8, a8, 20
-	add	xh, a10, a8
-
-.Ldiv_addsign:
-	/* Add the sign bit.  */
-	srli	a7, a7, 31
-	slli	a7, a7, 31
-	or	xh, xh, a7
-	leaf_return
-
-.Ldiv_highequal2:
-	bgeu	xl, yl, 2b
-	j	3b
-
-.Ldiv_highequal3:
-	bltu	xl, yl, .Ldiv_rounded
-	bne	xl, yl, .Ldiv_roundup
-
-	/* Remainder is exactly half the divisor.  Round even.  */
-	addi	a11, a11, 1
-	beqz	a11, .Ldiv_roundcarry
-	srli	a11, a11, 1
-	slli	a11, a11, 1
-	j	.Ldiv_rounded
-
-.Ldiv_overflow:
-	bltz	a8, .Ldiv_underflow
-	/* Return +/- Infinity.  */
-	addi	a8, a9, 1	/* 0x7ff */
-	slli	xh, a8, 20
-	movi	xl, 0
-	j	.Ldiv_addsign
-
-.Ldiv_underflow:
-	/* Create a subnormal value, where the exponent field contains zero,
-	   but the effective exponent is 1.  The value of a8 is one less than
-	   the actual exponent, so just negate it to get the shift amount.  */
-	neg	a8, a8
-	ssr	a8
-	bgeui	a8, 32, .Ldiv_bigshift
-	
-	/* Shift a10/a11 right.  Any bits that are shifted out of a11 are
-	   saved in a6 for rounding the result.  */
-	sll	a6, a11
-	src	a11, a10, a11
-	srl	a10, a10
-	j	1f
-
-.Ldiv_bigshift:
-	bgeui	a8, 64, .Ldiv_flush_to_zero
-	sll	a9, a11		/* lost bits shifted out of a11 */
-	src	a6, a10, a11
-	srl	a11, a10
-	movi	a10, 0
-	or	xl, xl, a9
-
-	/* Set the exponent to zero.  */
-1:	movi	a8, 0
-
-	/* Pack any nonzero remainder (in xh/xl) into a6.  */
-	or	xh, xh, xl
-	beqz	xh, 1f
-	movi	a9, 1
-	or	a6, a6, a9
-	
-	/* Round a10/a11 based on the bits shifted out into a6.  */
-1:	bgez	a6, .Ldiv_rounded
-	addi	a11, a11, 1
-	beqz	a11, .Ldiv_roundcarry
-	slli	a6, a6, 1
-	bnez	a6, .Ldiv_rounded
-	srli	a11, a11, 1
-	slli	a11, a11, 1
-	j	.Ldiv_rounded
-
-.Ldiv_roundcarry:
-	/* a11 is always zero when the rounding increment overflows, so
-	   there's no need to round it to an even value.  */
-	addi	a10, a10, 1
-	/* Overflow to the exponent field is OK.  */
-	j	.Ldiv_rounded
-
-.Ldiv_flush_to_zero:
-	/* Return zero with the appropriate sign bit.  */
-	srli	xh, a7, 31
-	slli	xh, xh, 31
-	movi	xl, 0
-	leaf_return
-
-#endif /* L_divdf3 */
-
-#ifdef L_cmpdf2
-
-	/* Equal and Not Equal */
-
-	.align	4
-	.global	__eqdf2
-	.global	__nedf2
-	.set	__nedf2, __eqdf2
-	.type	__eqdf2, @function
-__eqdf2:
-	leaf_entry sp, 16
-	bne	xl, yl, 2f
-	bne	xh, yh, 4f
-
-	/* The values are equal but NaN != NaN.  Check the exponent.  */
-	movi	a6, 0x7ff00000
-	ball	xh, a6, 3f
-
-	/* Equal.  */
-	movi	a2, 0
-	leaf_return
-
-	/* Not equal.  */
-2:	movi	a2, 1
-	leaf_return
-
-	/* Check if the mantissas are nonzero.  */
-3:	slli	a7, xh, 12
-	or	a7, a7, xl
-	j	5f
-
-	/* Check if x and y are zero with different signs.  */
-4:	or	a7, xh, yh
-	slli	a7, a7, 1
-	or	a7, a7, xl	/* xl == yl here */
-
-	/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
-	   or x when exponent(x) = 0x7ff and x == y.  */
-5:	movi	a2, 0
-	movi	a3, 1
-	movnez	a2, a3, a7	
-	leaf_return
-
-
-	/* Greater Than */
-
-	.align	4
-	.global	__gtdf2
-	.type	__gtdf2, @function
-__gtdf2:
-	leaf_entry sp, 16
-	movi	a6, 0x7ff00000
-	ball	xh, a6, 2f
-1:	bnall	yh, a6, .Lle_cmp
-
-	/* Check if y is a NaN.  */
-	slli	a7, yh, 12
-	or	a7, a7, yl
-	beqz	a7, .Lle_cmp
-	movi	a2, 0
-	leaf_return
-
-	/* Check if x is a NaN.  */
-2:	slli	a7, xh, 12
-	or	a7, a7, xl
-	beqz	a7, 1b
-	movi	a2, 0
-	leaf_return
-
-
-	/* Less Than or Equal */
-
-	.align	4
-	.global	__ledf2
-	.type	__ledf2, @function
-__ledf2:
-	leaf_entry sp, 16
-	movi	a6, 0x7ff00000
-	ball	xh, a6, 2f
-1:	bnall	yh, a6, .Lle_cmp
-
-	/* Check if y is a NaN.  */
-	slli	a7, yh, 12
-	or	a7, a7, yl
-	beqz	a7, .Lle_cmp
-	movi	a2, 1
-	leaf_return
-
-	/* Check if x is a NaN.  */
-2:	slli	a7, xh, 12
-	or	a7, a7, xl
-	beqz	a7, 1b
-	movi	a2, 1
-	leaf_return
-
-.Lle_cmp:
-	/* Check if x and y have different signs.  */
-	xor	a7, xh, yh
-	bltz	a7, .Lle_diff_signs
-
-	/* Check if x is negative.  */
-	bltz	xh, .Lle_xneg
-
-	/* Check if x <= y.  */
-	bltu	xh, yh, 4f
-	bne	xh, yh, 5f
-	bltu	yl, xl, 5f
-4:	movi	a2, 0
-	leaf_return
-
-.Lle_xneg:
-	/* Check if y <= x.  */
-	bltu	yh, xh, 4b
-	bne	yh, xh, 5f
-	bgeu	xl, yl, 4b
-5:	movi	a2, 1
-	leaf_return
-
-.Lle_diff_signs:
-	bltz	xh, 4b
-
-	/* Check if both x and y are zero.  */
-	or	a7, xh, yh
-	slli	a7, a7, 1
-	or	a7, a7, xl
-	or	a7, a7, yl
-	movi	a2, 1
-	movi	a3, 0
-	moveqz	a2, a3, a7
-	leaf_return
-
-
-	/* Greater Than or Equal */
-
-	.align	4
-	.global	__gedf2
-	.type	__gedf2, @function
-__gedf2:
-	leaf_entry sp, 16
-	movi	a6, 0x7ff00000
-	ball	xh, a6, 2f
-1:	bnall	yh, a6, .Llt_cmp
-
-	/* Check if y is a NaN.  */
-	slli	a7, yh, 12
-	or	a7, a7, yl
-	beqz	a7, .Llt_cmp
-	movi	a2, -1
-	leaf_return
-
-	/* Check if x is a NaN.  */
-2:	slli	a7, xh, 12
-	or	a7, a7, xl
-	beqz	a7, 1b
-	movi	a2, -1
-	leaf_return
-
-
-	/* Less Than */
-
-	.align	4
-	.global	__ltdf2
-	.type	__ltdf2, @function
-__ltdf2:
-	leaf_entry sp, 16
-	movi	a6, 0x7ff00000
-	ball	xh, a6, 2f
-1:	bnall	yh, a6, .Llt_cmp
-
-	/* Check if y is a NaN.  */
-	slli	a7, yh, 12
-	or	a7, a7, yl
-	beqz	a7, .Llt_cmp
-	movi	a2, 0
-	leaf_return
-
-	/* Check if x is a NaN.  */
-2:	slli	a7, xh, 12
-	or	a7, a7, xl
-	beqz	a7, 1b
-	movi	a2, 0
-	leaf_return
-
-.Llt_cmp:
-	/* Check if x and y have different signs.  */
-	xor	a7, xh, yh
-	bltz	a7, .Llt_diff_signs
-
-	/* Check if x is negative.  */
-	bltz	xh, .Llt_xneg
-
-	/* Check if x < y.  */
-	bltu	xh, yh, 4f
-	bne	xh, yh, 5f
-	bgeu	xl, yl, 5f
-4:	movi	a2, -1
-	leaf_return
-
-.Llt_xneg:
-	/* Check if y < x.  */
-	bltu	yh, xh, 4b
-	bne	yh, xh, 5f
-	bltu	yl, xl, 4b
-5:	movi	a2, 0
-	leaf_return
-
-.Llt_diff_signs:
-	bgez	xh, 5b
-
-	/* Check if both x and y are nonzero.  */
-	or	a7, xh, yh
-	slli	a7, a7, 1
-	or	a7, a7, xl
-	or	a7, a7, yl
-	movi	a2, 0
-	movi	a3, -1
-	movnez	a2, a3, a7
-	leaf_return
-
-
-	/* Unordered */
-
-	.align	4
-	.global	__unorddf2
-	.type	__unorddf2, @function
-__unorddf2:
-	leaf_entry sp, 16
-	movi	a6, 0x7ff00000
-	ball	xh, a6, 3f
-1:	ball	yh, a6, 4f
-2:	movi	a2, 0
-	leaf_return
-
-3:	slli	a7, xh, 12
-	or	a7, a7, xl
-	beqz	a7, 1b
-	movi	a2, 1
-	leaf_return
-
-4:	slli	a7, yh, 12
-	or	a7, a7, yl
-	beqz	a7, 2b
-	movi	a2, 1
-	leaf_return
-
-#endif /* L_cmpdf2 */
-
-#ifdef L_fixdfsi
-
-	.align	4
-	.global	__fixdfsi
-	.type	__fixdfsi, @function
-__fixdfsi:
-	leaf_entry sp, 16
-
-	/* Check for NaN and Infinity.  */
-	movi	a6, 0x7ff00000
-	ball	xh, a6, .Lfixdfsi_nan_or_inf
-
-	/* Extract the exponent and check if 0 < (exp - 0x3fe) < 32.  */
-	extui	a4, xh, 20, 11
-	extui	a5, a6, 19, 10	/* 0x3fe */
-	sub	a4, a4, a5
-	bgei	a4, 32, .Lfixdfsi_maxint
-	blti	a4, 1, .Lfixdfsi_zero
-
-	/* Add explicit "1.0" and shift << 11.  */
-	or	a7, xh, a6
-	ssai	(32 - 11)
-	src	a5, a7, xl
-
-	/* Shift back to the right, based on the exponent.  */
-	ssl	a4		/* shift by 32 - a4 */
-	srl	a5, a5
-
-	/* Negate the result if sign != 0.  */
-	neg	a2, a5
-	movgez	a2, a5, a7
-	leaf_return
-
-.Lfixdfsi_nan_or_inf:
-	/* Handle Infinity and NaN.  */
-	slli	a4, xh, 12
-	or	a4, a4, xl
-	beqz	a4, .Lfixdfsi_maxint
-
-	/* Translate NaN to +maxint.  */
-	movi	xh, 0
-
-.Lfixdfsi_maxint:
-	slli	a4, a6, 11	/* 0x80000000 */
-	addi	a5, a4, -1	/* 0x7fffffff */
-	movgez	a4, a5, xh
-	mov	a2, a4
-	leaf_return
-
-.Lfixdfsi_zero:
-	movi	a2, 0
-	leaf_return
-
-#endif /* L_fixdfsi */
-
-#ifdef L_fixdfdi
-
-	.align	4
-	.global	__fixdfdi
-	.type	__fixdfdi, @function
-__fixdfdi:
-	leaf_entry sp, 16
-
-	/* Check for NaN and Infinity.  */
-	movi	a6, 0x7ff00000
-	ball	xh, a6, .Lfixdfdi_nan_or_inf
-
-	/* Extract the exponent and check if 0 < (exp - 0x3fe) < 64.  */
-	extui	a4, xh, 20, 11
-	extui	a5, a6, 19, 10	/* 0x3fe */
-	sub	a4, a4, a5
-	bgei	a4, 64, .Lfixdfdi_maxint
-	blti	a4, 1, .Lfixdfdi_zero
-
-	/* Add explicit "1.0" and shift << 11.  */
-	or	a7, xh, a6
-	ssai	(32 - 11)
-	src	xh, a7, xl
-	sll	xl, xl
-
-	/* Shift back to the right, based on the exponent.  */
-	ssl	a4		/* shift by 64 - a4 */
-	bgei	a4, 32, .Lfixdfdi_smallshift
-	srl	xl, xh
-	movi	xh, 0
-
-.Lfixdfdi_shifted:	
-	/* Negate the result if sign != 0.  */
-	bgez	a7, 1f
-	neg	xl, xl
-	neg	xh, xh
-	beqz	xl, 1f
-	addi	xh, xh, -1
-1:	leaf_return
-
-.Lfixdfdi_smallshift:
-	src	xl, xh, xl
-	srl	xh, xh
-	j	.Lfixdfdi_shifted
-
-.Lfixdfdi_nan_or_inf:
-	/* Handle Infinity and NaN.  */
-	slli	a4, xh, 12
-	or	a4, a4, xl
-	beqz	a4, .Lfixdfdi_maxint
-
-	/* Translate NaN to +maxint.  */
-	movi	xh, 0
-
-.Lfixdfdi_maxint:
-	slli	a7, a6, 11	/* 0x80000000 */
-	bgez	xh, 1f
-	mov	xh, a7
-	movi	xl, 0
-	leaf_return
-
-1:	addi	xh, a7, -1	/* 0x7fffffff */
-	movi	xl, -1
-	leaf_return
-
-.Lfixdfdi_zero:
-	movi	xh, 0
-	movi	xl, 0
-	leaf_return
-
-#endif /* L_fixdfdi */
-
-#ifdef L_fixunsdfsi
-
-	.align	4
-	.global	__fixunsdfsi
-	.type	__fixunsdfsi, @function
-__fixunsdfsi:
-	leaf_entry sp, 16
-
-	/* Check for NaN and Infinity.  */
-	movi	a6, 0x7ff00000
-	ball	xh, a6, .Lfixunsdfsi_nan_or_inf
-
-	/* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32.  */
-	extui	a4, xh, 20, 11
-	extui	a5, a6, 20, 10	/* 0x3ff */
-	sub	a4, a4, a5
-	bgei	a4, 32, .Lfixunsdfsi_maxint
-	bltz	a4, .Lfixunsdfsi_zero
-
-	/* Add explicit "1.0" and shift << 11.  */
-	or	a7, xh, a6
-	ssai	(32 - 11)
-	src	a5, a7, xl
-
-	/* Shift back to the right, based on the exponent.  */
-	addi	a4, a4, 1
-	beqi	a4, 32, .Lfixunsdfsi_bigexp
-	ssl	a4		/* shift by 32 - a4 */
-	srl	a5, a5
-
-	/* Negate the result if sign != 0.  */
-	neg	a2, a5
-	movgez	a2, a5, a7
-	leaf_return
-
-.Lfixunsdfsi_nan_or_inf:
-	/* Handle Infinity and NaN.  */
-	slli	a4, xh, 12
-	or	a4, a4, xl
-	beqz	a4, .Lfixunsdfsi_maxint
-
-	/* Translate NaN to 0xffffffff.  */
-	movi	a2, -1
-	leaf_return
-
-.Lfixunsdfsi_maxint:
-	slli	a4, a6, 11	/* 0x80000000 */
-	movi	a5, -1		/* 0xffffffff */
-	movgez	a4, a5, xh
-	mov	a2, a4
-	leaf_return
-
-.Lfixunsdfsi_zero:
-	movi	a2, 0
-	leaf_return
-
-.Lfixunsdfsi_bigexp:
-	/* Handle unsigned maximum exponent case.  */
-	bltz	xh, 1f
-	mov	a2, a5		/* no shift needed */
-	leaf_return
-
-	/* Return 0x80000000 if negative.  */
-1:	slli	a2, a6, 11
-	leaf_return
-
-#endif /* L_fixunsdfsi */
-
-#ifdef L_fixunsdfdi
-
-	.align	4
-	.global	__fixunsdfdi
-	.type	__fixunsdfdi, @function
-__fixunsdfdi:
-	leaf_entry sp, 16
-
-	/* Check for NaN and Infinity.  */
-	movi	a6, 0x7ff00000
-	ball	xh, a6, .Lfixunsdfdi_nan_or_inf
-
-	/* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64.  */
-	extui	a4, xh, 20, 11
-	extui	a5, a6, 20, 10	/* 0x3ff */
-	sub	a4, a4, a5
-	bgei	a4, 64, .Lfixunsdfdi_maxint
-	bltz	a4, .Lfixunsdfdi_zero
-
-	/* Add explicit "1.0" and shift << 11.  */
-	or	a7, xh, a6
-	ssai	(32 - 11)
-	src	xh, a7, xl
-	sll	xl, xl
-
-	/* Shift back to the right, based on the exponent.  */
-	addi	a4, a4, 1
-	beqi	a4, 64, .Lfixunsdfdi_bigexp
-	ssl	a4		/* shift by 64 - a4 */
-	bgei	a4, 32, .Lfixunsdfdi_smallshift
-	srl	xl, xh
-	movi	xh, 0
-
-.Lfixunsdfdi_shifted:
-	/* Negate the result if sign != 0.  */
-	bgez	a7, 1f
-	neg	xl, xl
-	neg	xh, xh
-	beqz	xl, 1f
-	addi	xh, xh, -1
-1:	leaf_return
-
-.Lfixunsdfdi_smallshift:
-	src	xl, xh, xl
-	srl	xh, xh
-	j	.Lfixunsdfdi_shifted
-
-.Lfixunsdfdi_nan_or_inf:
-	/* Handle Infinity and NaN.  */
-	slli	a4, xh, 12
-	or	a4, a4, xl
-	beqz	a4, .Lfixunsdfdi_maxint
-
-	/* Translate NaN to 0xffffffff.... */
-1:	movi	xh, -1
-	movi	xl, -1
-	leaf_return
-
-.Lfixunsdfdi_maxint:
-	bgez	xh, 1b
-2:	slli	xh, a6, 11	/* 0x80000000 */
-	movi	xl, 0
-	leaf_return
-
-.Lfixunsdfdi_zero:
-	movi	xh, 0
-	movi	xl, 0
-	leaf_return
-
-.Lfixunsdfdi_bigexp:
-	/* Handle unsigned maximum exponent case.  */
-	bltz	a7, 2b
-	leaf_return		/* no shift needed */
-
-#endif /* L_fixunsdfdi */
-
-#ifdef L_floatsidf
-
-	.align	4
-	.global	__floatunsidf
-	.type	__floatunsidf, @function
-__floatunsidf:
-	leaf_entry sp, 16
-	beqz	a2, .Lfloatsidf_return_zero
-
-	/* Set the sign to zero and jump to the floatsidf code.  */
-	movi	a7, 0
-	j	.Lfloatsidf_normalize
-
-	.align	4
-	.global	__floatsidf
-	.type	__floatsidf, @function
-__floatsidf:
-	leaf_entry sp, 16
-
-	/* Check for zero.  */
-	beqz	a2, .Lfloatsidf_return_zero
-
-	/* Save the sign.  */
-	extui	a7, a2, 31, 1
-
-	/* Get the absolute value.  */
-#if XCHAL_HAVE_ABS
-	abs	a2, a2
-#else
-	neg	a4, a2
-	movltz	a2, a4, a2
-#endif
-
-.Lfloatsidf_normalize:
-	/* Normalize with the first 1 bit in the msb.  */
-	do_nsau	a4, a2, a5, a6
-	ssl	a4
-	sll	a5, a2
-
-	/* Shift the mantissa into position.  */
-	srli	xh, a5, 11
-	slli	xl, a5, (32 - 11)
-
-	/* Set the exponent.  */
-	movi	a5, 0x41d	/* 0x3fe + 31 */
-	sub	a5, a5, a4
-	slli	a5, a5, 20
-	add	xh, xh, a5
-
-	/* Add the sign and return. */
-	slli	a7, a7, 31
-	or	xh, xh, a7
-	leaf_return
-
-.Lfloatsidf_return_zero:
-	movi	a3, 0
-	leaf_return
-
-#endif /* L_floatsidf */
-
-#ifdef L_floatdidf
-
-	.align	4
-	.global	__floatundidf
-	.type	__floatundidf, @function
-__floatundidf:
-	leaf_entry sp, 16
-
-	/* Check for zero.  */
-	or	a4, xh, xl
-	beqz	a4, 2f
-
-	/* Set the sign to zero and jump to the floatdidf code.  */
-	movi	a7, 0
-	j	.Lfloatdidf_normalize
-
-	.align	4
-	.global	__floatdidf
-	.type	__floatdidf, @function
-__floatdidf:
-	leaf_entry sp, 16
-
-	/* Check for zero.  */
-	or	a4, xh, xl
-	beqz	a4, 2f
-
-	/* Save the sign.  */
-	extui	a7, xh, 31, 1
-
-	/* Get the absolute value.  */
-	bgez	xh, .Lfloatdidf_normalize
-	neg	xl, xl
-	neg	xh, xh
-	beqz	xl, .Lfloatdidf_normalize
-	addi	xh, xh, -1
-
-.Lfloatdidf_normalize:
-	/* Normalize with the first 1 bit in the msb of xh.  */
-	beqz	xh, .Lfloatdidf_bigshift
-	do_nsau	a4, xh, a5, a6
-	ssl	a4
-	src	xh, xh, xl
-	sll	xl, xl
-
-.Lfloatdidf_shifted:
-	/* Shift the mantissa into position, with rounding bits in a6.  */
-	ssai	11
-	sll	a6, xl
-	src	xl, xh, xl
-	srl	xh, xh
-
-	/* Set the exponent.  */
-	movi	a5, 0x43d	/* 0x3fe + 63 */
-	sub	a5, a5, a4
-	slli	a5, a5, 20
-	add	xh, xh, a5
-
-	/* Add the sign.  */
-	slli	a7, a7, 31
-	or	xh, xh, a7
-
-	/* Round up if the leftover fraction is >= 1/2.  */
-	bgez	a6, 2f
-	addi	xl, xl, 1
-	beqz	xl, .Lfloatdidf_roundcarry
-
-	/* Check if the leftover fraction is exactly 1/2.  */
-	slli	a6, a6, 1
-	beqz	a6, .Lfloatdidf_exactlyhalf
-2:	leaf_return
-
-.Lfloatdidf_bigshift:
-	/* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
-	do_nsau	a4, xl, a5, a6
-	ssl	a4
-	sll	xh, xl
-	movi	xl, 0
-	addi	a4, a4, 32
-	j	.Lfloatdidf_shifted
-
-.Lfloatdidf_exactlyhalf:
-	/* Round down to the nearest even value.  */
-	srli	xl, xl, 1
-	slli	xl, xl, 1
-	leaf_return
-
-.Lfloatdidf_roundcarry:
-	/* xl is always zero when the rounding increment overflows, so
-	   there's no need to round it to an even value.  */
-	addi	xh, xh, 1
-	/* Overflow to the exponent is OK.  */
-	leaf_return
-
-#endif /* L_floatdidf */
-
-#ifdef L_truncdfsf2
-
-	.align	4
-	.global	__truncdfsf2
-	.type	__truncdfsf2, @function
-__truncdfsf2:
-	leaf_entry sp, 16
-
-	/* Adjust the exponent bias.  */
-	movi	a4, (0x3ff - 0x7f) << 20
-	sub	a5, xh, a4
-
-	/* Check for underflow.  */
-	xor	a6, xh, a5
-	bltz	a6, .Ltrunc_underflow
-	extui	a6, a5, 20, 11
-	beqz	a6, .Ltrunc_underflow
-
-	/* Check for overflow.  */
-	movi	a4, 255
-	bge	a6, a4, .Ltrunc_overflow
-
-	/* Shift a5/xl << 3 into a5/a4.  */
-	ssai	(32 - 3)
-	src	a5, a5, xl
-	sll	a4, xl
-
-.Ltrunc_addsign:
-	/* Add the sign bit.  */
-	extui	a6, xh, 31, 1
-	slli	a6, a6, 31
-	or	a2, a6, a5
-
-	/* Round up if the leftover fraction is >= 1/2.  */
-	bgez	a4, 1f
-	addi	a2, a2, 1
-	/* Overflow to the exponent is OK.  The answer will be correct.  */
-
-	/* Check if the leftover fraction is exactly 1/2.  */
-	slli	a4, a4, 1
-	beqz	a4, .Ltrunc_exactlyhalf
-1:	leaf_return
-
-.Ltrunc_exactlyhalf:
-	/* Round down to the nearest even value.  */
-	srli	a2, a2, 1
-	slli	a2, a2, 1
-	leaf_return
-
-.Ltrunc_overflow:
-	/* Check if exponent == 0x7ff.  */
-	movi	a4, 0x7ff00000
-	bnall	xh, a4, 1f
-
-	/* Check if mantissa is nonzero.  */
-	slli	a5, xh, 12
-	or	a5, a5, xl
-	beqz	a5, 1f
-
-	/* Shift a4 to set a bit in the mantissa, making a quiet NaN.  */
-	srli	a4, a4, 1
-
-1:	slli	a4, a4, 4	/* 0xff000000 or 0xff800000 */
-	/* Add the sign bit.  */
-	extui	a6, xh, 31, 1
-	ssai	1
-	src	a2, a6, a4
-	leaf_return
-
-.Ltrunc_underflow:
-	/* Find shift count for a subnormal.  Flush to zero if >= 32.  */
-	extui	a6, xh, 20, 11
-	movi	a5, 0x3ff - 0x7f
-	sub	a6, a5, a6
-	addi	a6, a6, 1
-	bgeui	a6, 32, 1f
-
-	/* Replace the exponent with an explicit "1.0".  */
-	slli	a5, a5, 13	/* 0x700000 */
-	or	a5, a5, xh
-	slli	a5, a5, 11
-	srli	a5, a5, 11
-
-	/* Shift the mantissa left by 3 bits (into a5/a4).  */
-	ssai	(32 - 3)
-	src	a5, a5, xl
-	sll	a4, xl
-
-	/* Shift right by a6.  */
-	ssr	a6
-	sll	a7, a4
-	src	a4, a5, a4
-	srl	a5, a5
-	beqz	a7, .Ltrunc_addsign
-	or	a4, a4, a6	/* any positive, nonzero value will work */
-	j	.Ltrunc_addsign
-
-	/* Return +/- zero.  */
-1:	extui	a2, xh, 31, 1
-	slli	a2, a2, 31
-	leaf_return
-
-#endif /* L_truncdfsf2 */
-
-#ifdef L_extendsfdf2
-
-	.align	4
-	.global	__extendsfdf2
-	.type	__extendsfdf2, @function
-__extendsfdf2:
-	leaf_entry sp, 16
-
-	/* Save the sign bit and then shift it off.  */
-	extui	a5, a2, 31, 1
-	slli	a5, a5, 31
-	slli	a4, a2, 1
-
-	/* Extract and check the exponent.  */
-	extui	a6, a2, 23, 8
-	beqz	a6, .Lextend_expzero
-	addi	a6, a6, 1
-	beqi	a6, 256, .Lextend_nan_or_inf
-
-	/* Shift >> 3 into a4/xl.  */
-	srli	a4, a4, 4
-	slli	xl, a2, (32 - 3)
-
-	/* Adjust the exponent bias.  */
-	movi	a6, (0x3ff - 0x7f) << 20
-	add	a4, a4, a6
-
-	/* Add the sign bit.  */
-	or	xh, a4, a5
-	leaf_return
-
-.Lextend_nan_or_inf:
-	movi	a4, 0x7ff00000
-
-	/* Check for NaN.  */
-	slli	a7, a2, 9
-	beqz	a7, 1f
-
-	slli	a6, a6, 11	/* 0x80000 */
-	or	a4, a4, a6
-
-	/* Add the sign and return.  */
-1:	or	xh, a4, a5
-	movi	xl, 0
-	leaf_return
-
-.Lextend_expzero:
-	beqz	a4, 1b
-
-	/* Normalize it to have 8 zero bits before the first 1 bit.  */
-	do_nsau	a7, a4, a2, a3
-	addi	a7, a7, -8
-	ssl	a7
-	sll	a4, a4
-	
-	/* Shift >> 3 into a4/xl.  */
-	slli	xl, a4, (32 - 3)
-	srli	a4, a4, 3
-
-	/* Set the exponent.  */
-	movi	a6, 0x3fe - 0x7f
-	sub	a6, a6, a7
-	slli	a6, a6, 20
-	add	a4, a4, a6
-
-	/* Add the sign and return.  */
-	or	xh, a4, a5
-	leaf_return
-
-#endif /* L_extendsfdf2 */
-
-
diff --git a/gcc/config/xtensa/ieee754-sf.S b/gcc/config/xtensa/ieee754-sf.S
deleted file mode 100644
index d75be0e5ae5..00000000000
--- a/gcc/config/xtensa/ieee754-sf.S
+++ /dev/null
@@ -1,1757 +0,0 @@
-/* IEEE-754 single-precision functions for Xtensa
-   Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
-   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifdef __XTENSA_EB__
-#define xh a2
-#define xl a3
-#define yh a4
-#define yl a5
-#else
-#define xh a3
-#define xl a2
-#define yh a5
-#define yl a4
-#endif
-
-/*  Warning!  The branch displacements for some Xtensa branch instructions
-    are quite small, and this code has been carefully laid out to keep
-    branch targets in range.  If you change anything, be sure to check that
-    the assembler is not relaxing anything to branch over a jump.  */
-
-#ifdef L_negsf2
-
-	.align	4
-	.global	__negsf2
-	.type	__negsf2, @function
-__negsf2:
-	leaf_entry sp, 16
-	movi	a4, 0x80000000
-	xor	a2, a2, a4
-	leaf_return
-
-#endif /* L_negsf2 */
-
-#ifdef L_addsubsf3
-
-	/* Addition */
-__addsf3_aux:
-
-	/* Handle NaNs and Infinities.  (This code is placed before the
-	   start of the function just to keep it in range of the limited
-	   branch displacements.)  */
-
-.Ladd_xnan_or_inf:
-	/* If y is neither Infinity nor NaN, return x.  */
-	bnall	a3, a6, 1f
-	/* If x is a NaN, return it.  Otherwise, return y.  */
-	slli	a7, a2, 9
-	beqz	a7, .Ladd_ynan_or_inf
-1:	leaf_return
-
-.Ladd_ynan_or_inf:
-	/* Return y.  */
-	mov	a2, a3
-	leaf_return
-
-.Ladd_opposite_signs:
-	/* Operand signs differ.  Do a subtraction.  */
-	slli	a7, a6, 8
-	xor	a3, a3, a7
-	j	.Lsub_same_sign
-
-	.align	4
-	.global	__addsf3
-	.type	__addsf3, @function
-__addsf3:
-	leaf_entry sp, 16
-	movi	a6, 0x7f800000
-
-	/* Check if the two operands have the same sign.  */
-	xor	a7, a2, a3
-	bltz	a7, .Ladd_opposite_signs
-
-.Ladd_same_sign:	
-	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
-	ball	a2, a6, .Ladd_xnan_or_inf
-	ball	a3, a6, .Ladd_ynan_or_inf
-
-	/* Compare the exponents.  The smaller operand will be shifted
-	   right by the exponent difference and added to the larger
-	   one.  */
-	extui	a7, a2, 23, 9
-	extui	a8, a3, 23, 9
-	bltu	a7, a8, .Ladd_shiftx
-
-.Ladd_shifty:
-	/* Check if the smaller (or equal) exponent is zero.  */
-	bnone	a3, a6, .Ladd_yexpzero
-
-	/* Replace y sign/exponent with 0x008.  */
-	or	a3, a3, a6
-	slli	a3, a3, 8
-	srli	a3, a3, 8
-
-.Ladd_yexpdiff:
-	/* Compute the exponent difference.  */
-	sub	a10, a7, a8
-
-	/* Exponent difference > 32 -- just return the bigger value.  */
-	bgeui	a10, 32, 1f
-	
-	/* Shift y right by the exponent difference.  Any bits that are
-	   shifted out of y are saved in a9 for rounding the result.  */
-	ssr	a10
-	movi	a9, 0
-	src	a9, a3, a9
-	srl	a3, a3
-
-	/* Do the addition.  */
-	add	a2, a2, a3
-
-	/* Check if the add overflowed into the exponent.  */
-	extui	a10, a2, 23, 9
-	beq	a10, a7, .Ladd_round
-	mov	a8, a7
-	j	.Ladd_carry
-
-.Ladd_yexpzero:
-	/* y is a subnormal value.  Replace its sign/exponent with zero,
-	   i.e., no implicit "1.0", and increment the apparent exponent
-	   because subnormals behave as if they had the minimum (nonzero)
-	   exponent.  Test for the case when both exponents are zero.  */
-	slli	a3, a3, 9
-	srli	a3, a3, 9
-	bnone	a2, a6, .Ladd_bothexpzero
-	addi	a8, a8, 1
-	j	.Ladd_yexpdiff
-
-.Ladd_bothexpzero:
-	/* Both exponents are zero.  Handle this as a special case.  There
-	   is no need to shift or round, and the normal code for handling
-	   a carry into the exponent field will not work because it
-	   assumes there is an implicit "1.0" that needs to be added.  */
-	add	a2, a2, a3
-1:	leaf_return
-
-.Ladd_xexpzero:
-	/* Same as "yexpzero" except skip handling the case when both
-	   exponents are zero.  */
-	slli	a2, a2, 9
-	srli	a2, a2, 9
-	addi	a7, a7, 1
-	j	.Ladd_xexpdiff
-
-.Ladd_shiftx:
-	/* Same thing as the "shifty" code, but with x and y swapped.  Also,
-	   because the exponent difference is always nonzero in this version,
-	   the shift sequence can use SLL and skip loading a constant zero.  */
-	bnone	a2, a6, .Ladd_xexpzero
-
-	or	a2, a2, a6
-	slli	a2, a2, 8
-	srli	a2, a2, 8
-
-.Ladd_xexpdiff:
-	sub	a10, a8, a7
-	bgeui	a10, 32, .Ladd_returny
-	
-	ssr	a10
-	sll	a9, a2
-	srl	a2, a2
-
-	add	a2, a2, a3
-
-	/* Check if the add overflowed into the exponent.  */
-	extui	a10, a2, 23, 9
-	bne	a10, a8, .Ladd_carry
-
-.Ladd_round:
-	/* Round up if the leftover fraction is >= 1/2.  */
-	bgez	a9, 1f
-	addi	a2, a2, 1
-
-	/* Check if the leftover fraction is exactly 1/2.  */
-	slli	a9, a9, 1
-	beqz	a9, .Ladd_exactlyhalf
-1:	leaf_return
-
-.Ladd_returny:
-	mov	a2, a3
-	leaf_return
-
-.Ladd_carry:	
-	/* The addition has overflowed into the exponent field, so the
-	   value needs to be renormalized.  The mantissa of the result
-	   can be recovered by subtracting the original exponent and
-	   adding 0x800000 (which is the explicit "1.0" for the
-	   mantissa of the non-shifted operand -- the "1.0" for the
-	   shifted operand was already added).  The mantissa can then
-	   be shifted right by one bit.  The explicit "1.0" of the
-	   shifted mantissa then needs to be replaced by the exponent,
-	   incremented by one to account for the normalizing shift.
-	   It is faster to combine these operations: do the shift first
-	   and combine the additions and subtractions.  If x is the
-	   original exponent, the result is:
-	       shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
-	   or:
-	       shifted mantissa + ((x + 1) << 22)
-	   Note that the exponent is incremented here by leaving the
-	   explicit "1.0" of the mantissa in the exponent field.  */
-
-	/* Shift x right by one bit.  Save the lsb.  */
-	mov	a10, a2
-	srli	a2, a2, 1
-
-	/* See explanation above.  The original exponent is in a8.  */
-	addi	a8, a8, 1
-	slli	a8, a8, 22
-	add	a2, a2, a8
-
-	/* Return an Infinity if the exponent overflowed.  */
-	ball	a2, a6, .Ladd_infinity
-	
-	/* Same thing as the "round" code except the msb of the leftover
-	   fraction is bit 0 of a10, with the rest of the fraction in a9.  */
-	bbci.l	a10, 0, 1f
-	addi	a2, a2, 1
-	beqz	a9, .Ladd_exactlyhalf
-1:	leaf_return
-
-.Ladd_infinity:
-	/* Clear the mantissa.  */
-	srli	a2, a2, 23
-	slli	a2, a2, 23
-
-	/* The sign bit may have been lost in a carry-out.  Put it back.  */
-	slli	a8, a8, 1
-	or	a2, a2, a8
-	leaf_return
-
-.Ladd_exactlyhalf:
-	/* Round down to the nearest even value.  */
-	srli	a2, a2, 1
-	slli	a2, a2, 1
-	leaf_return
-
-
-	/* Subtraction */
-__subsf3_aux:
-	
-	/* Handle NaNs and Infinities.  (This code is placed before the
-	   start of the function just to keep it in range of the limited
-	   branch displacements.)  */
-
-.Lsub_xnan_or_inf:
-	/* If y is neither Infinity nor NaN, return x.  */
-	bnall	a3, a6, 1f
-	/* Both x and y are either NaN or Inf, so the result is NaN.  */
-	movi	a4, 0x400000	/* make it a quiet NaN */
-	or	a2, a2, a4
-1:	leaf_return
-
-.Lsub_ynan_or_inf:
-	/* Negate y and return it.  */
-	slli	a7, a6, 8
-	xor	a2, a3, a7
-	leaf_return
-
-.Lsub_opposite_signs:
-	/* Operand signs differ.  Do an addition.  */
-	slli	a7, a6, 8
-	xor	a3, a3, a7
-	j	.Ladd_same_sign
-
-	.align	4
-	.global	__subsf3
-	.type	__subsf3, @function
-__subsf3:
-	leaf_entry sp, 16
-	movi	a6, 0x7f800000
-
-	/* Check if the two operands have the same sign.  */
-	xor	a7, a2, a3
-	bltz	a7, .Lsub_opposite_signs
-
-.Lsub_same_sign:	
-	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
-	ball	a2, a6, .Lsub_xnan_or_inf
-	ball	a3, a6, .Lsub_ynan_or_inf
-
-	/* Compare the operands.  In contrast to addition, the entire
-	   value matters here.  */
-	extui	a7, a2, 23, 8
-	extui	a8, a3, 23, 8
-	bltu	a2, a3, .Lsub_xsmaller
-
-.Lsub_ysmaller:
-	/* Check if the smaller (or equal) exponent is zero.  */
-	bnone	a3, a6, .Lsub_yexpzero
-
-	/* Replace y sign/exponent with 0x008.  */
-	or	a3, a3, a6
-	slli	a3, a3, 8
-	srli	a3, a3, 8
-
-.Lsub_yexpdiff:
-	/* Compute the exponent difference.  */
-	sub	a10, a7, a8
-
-	/* Exponent difference > 32 -- just return the bigger value.  */
-	bgeui	a10, 32, 1f
-	
-	/* Shift y right by the exponent difference.  Any bits that are
-	   shifted out of y are saved in a9 for rounding the result.  */
-	ssr	a10
-	movi	a9, 0
-	src	a9, a3, a9
-	srl	a3, a3
-
-	sub	a2, a2, a3
-
-	/* Subtract the leftover bits in a9 from zero and propagate any
-	   borrow from a2.  */
-	neg	a9, a9
-	addi	a10, a2, -1
-	movnez	a2, a10, a9
-
-	/* Check if the subtract underflowed into the exponent.  */
-	extui	a10, a2, 23, 8
-	beq	a10, a7, .Lsub_round
-	j	.Lsub_borrow
-
-.Lsub_yexpzero:
-	/* Return zero if the inputs are equal.  (For the non-subnormal
-	   case, subtracting the "1.0" will cause a borrow from the exponent
-	   and this case can be detected when handling the borrow.)  */
-	beq	a2, a3, .Lsub_return_zero
-
-	/* y is a subnormal value.  Replace its sign/exponent with zero,
-	   i.e., no implicit "1.0".  Unless x is also a subnormal, increment
-	   y's apparent exponent because subnormals behave as if they had
-	   the minimum (nonzero) exponent.  */
-	slli	a3, a3, 9
-	srli	a3, a3, 9
-	bnone	a2, a6, .Lsub_yexpdiff
-	addi	a8, a8, 1
-	j	.Lsub_yexpdiff
-
-.Lsub_returny:
-	/* Negate and return y.  */
-	slli	a7, a6, 8
-	xor	a2, a3, a7
-1:	leaf_return
-
-.Lsub_xsmaller:
-	/* Same thing as the "ysmaller" code, but with x and y swapped and
-	   with y negated.  */
-	bnone	a2, a6, .Lsub_xexpzero
-
-	or	a2, a2, a6
-	slli	a2, a2, 8
-	srli	a2, a2, 8
-
-.Lsub_xexpdiff:
-	sub	a10, a8, a7
-	bgeui	a10, 32, .Lsub_returny
-	
-	ssr	a10
-	movi	a9, 0
-	src	a9, a2, a9
-	srl	a2, a2
-
-	/* Negate y.  */
-	slli	a11, a6, 8
-	xor	a3, a3, a11
-
-	sub	a2, a3, a2
-
-	neg	a9, a9
-	addi	a10, a2, -1
-	movnez	a2, a10, a9
-
-	/* Check if the subtract underflowed into the exponent.  */
-	extui	a10, a2, 23, 8
-	bne	a10, a8, .Lsub_borrow
-
-.Lsub_round:
-	/* Round up if the leftover fraction is >= 1/2.  */
-	bgez	a9, 1f
-	addi	a2, a2, 1
-
-	/* Check if the leftover fraction is exactly 1/2.  */
-	slli	a9, a9, 1
-	beqz	a9, .Lsub_exactlyhalf
-1:	leaf_return
-
-.Lsub_xexpzero:
-	/* Same as "yexpzero".  */
-	beq	a2, a3, .Lsub_return_zero
-	slli	a2, a2, 9
-	srli	a2, a2, 9
-	bnone	a3, a6, .Lsub_xexpdiff
-	addi	a7, a7, 1
-	j	.Lsub_xexpdiff
-
-.Lsub_return_zero:
-	movi	a2, 0
-	leaf_return
-
-.Lsub_borrow:	
-	/* The subtraction has underflowed into the exponent field, so the
-	   value needs to be renormalized.  Shift the mantissa left as
-	   needed to remove any leading zeros and adjust the exponent
-	   accordingly.  If the exponent is not large enough to remove
-	   all the leading zeros, the result will be a subnormal value.  */
-
-	slli	a8, a2, 9
-	beqz	a8, .Lsub_xzero
-	do_nsau	a6, a8, a7, a11
-	srli	a8, a8, 9
-	bge	a6, a10, .Lsub_subnormal
-	addi	a6, a6, 1
-
-.Lsub_normalize_shift:
-	/* Shift the mantissa (a8/a9) left by a6.  */
-	ssl	a6
-	src	a8, a8, a9
-	sll	a9, a9
-
-	/* Combine the shifted mantissa with the sign and exponent,
-	   decrementing the exponent by a6.  (The exponent has already
-	   been decremented by one due to the borrow from the subtraction,
-	   but adding the mantissa will increment the exponent by one.)  */
-	srli	a2, a2, 23
-	sub	a2, a2, a6
-	slli	a2, a2, 23
-	add	a2, a2, a8
-	j	.Lsub_round
-
-.Lsub_exactlyhalf:
-	/* Round down to the nearest even value.  */
-	srli	a2, a2, 1
-	slli	a2, a2, 1
-	leaf_return
-
-.Lsub_xzero:
-	/* If there was a borrow from the exponent, and the mantissa and
-	   guard digits are all zero, then the inputs were equal and the
-	   result should be zero.  */
-	beqz	a9, .Lsub_return_zero
-
-	/* Only the guard digit is nonzero.  Shift by min(24, a10).  */
-	addi	a11, a10, -24
-	movi	a6, 24
-	movltz	a6, a10, a11
-	j	.Lsub_normalize_shift
-
-.Lsub_subnormal:
-	/* The exponent is too small to shift away all the leading zeros.
-	   Set a6 to the current exponent (which has already been
-	   decremented by the borrow) so that the exponent of the result
-	   will be zero.  Do not add 1 to a6 in this case, because: (1)
-	   adding the mantissa will not increment the exponent, so there is
-	   no need to subtract anything extra from the exponent to
-	   compensate, and (2) the effective exponent of a subnormal is 1
-	   not 0 so the shift amount must be 1 smaller than normal. */
-	mov	a6, a10
-	j	.Lsub_normalize_shift
-
-#endif /* L_addsubsf3 */
-
-#ifdef L_mulsf3
-
-	/* Multiplication */
-#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
-#define XCHAL_NO_MUL 1
-#endif
-
-__mulsf3_aux:
-
-	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
-	   (This code is placed before the start of the function just to
-	   keep it in range of the limited branch displacements.)  */
-
-.Lmul_xexpzero:
-	/* Clear the sign bit of x.  */
-	slli	a2, a2, 1
-	srli	a2, a2, 1
-
-	/* If x is zero, return zero.  */
-	beqz	a2, .Lmul_return_zero
-
-	/* Normalize x.  Adjust the exponent in a8.  */
-	do_nsau	a10, a2, a11, a12
-	addi	a10, a10, -8
-	ssl	a10
-	sll	a2, a2 
-	movi	a8, 1
-	sub	a8, a8, a10
-	j	.Lmul_xnormalized	
-	
-.Lmul_yexpzero:
-	/* Clear the sign bit of y.  */
-	slli	a3, a3, 1
-	srli	a3, a3, 1
-
-	/* If y is zero, return zero.  */
-	beqz	a3, .Lmul_return_zero
-
-	/* Normalize y.  Adjust the exponent in a9.  */
-	do_nsau	a10, a3, a11, a12
-	addi	a10, a10, -8
-	ssl	a10
-	sll	a3, a3
-	movi	a9, 1
-	sub	a9, a9, a10
-	j	.Lmul_ynormalized	
-
-.Lmul_return_zero:
-	/* Return zero with the appropriate sign bit.  */
-	srli	a2, a7, 31
-	slli	a2, a2, 31
-	j	.Lmul_done
-
-.Lmul_xnan_or_inf:
-	/* If y is zero, return NaN.  */
-	slli	a8, a3, 1
-	bnez	a8, 1f
-	movi	a4, 0x400000	/* make it a quiet NaN */
-	or	a2, a2, a4
-	j	.Lmul_done
-1:
-	/* If y is NaN, return y.  */
-	bnall	a3, a6, .Lmul_returnx
-	slli	a8, a3, 9
-	beqz	a8, .Lmul_returnx
-
-.Lmul_returny:
-	mov	a2, a3
-
-.Lmul_returnx:
-	/* Set the sign bit and return.  */
-	extui	a7, a7, 31, 1
-	slli	a2, a2, 1
-	ssai	1
-	src	a2, a7, a2
-	j	.Lmul_done
-
-.Lmul_ynan_or_inf:
-	/* If x is zero, return NaN.  */
-	slli	a8, a2, 1
-	bnez	a8, .Lmul_returny
-	movi	a7, 0x400000	/* make it a quiet NaN */
-	or	a2, a3, a7
-	j	.Lmul_done
-
-	.align	4
-	.global	__mulsf3
-	.type	__mulsf3, @function
-__mulsf3:
-#if __XTENSA_CALL0_ABI__
-	leaf_entry sp, 32
-	addi	sp, sp, -32
-	s32i	a12, sp, 16
-	s32i	a13, sp, 20
-	s32i	a14, sp, 24
-	s32i	a15, sp, 28
-#elif XCHAL_NO_MUL
-	/* This is not really a leaf function; allocate enough stack space
-	   to allow CALL12s to a helper function.  */
-	leaf_entry sp, 64
-#else
-	leaf_entry sp, 32
-#endif
-	movi	a6, 0x7f800000
-
-	/* Get the sign of the result.  */
-	xor	a7, a2, a3
-
-	/* Check for NaN and infinity.  */
-	ball	a2, a6, .Lmul_xnan_or_inf
-	ball	a3, a6, .Lmul_ynan_or_inf
-
-	/* Extract the exponents.  */
-	extui	a8, a2, 23, 8
-	extui	a9, a3, 23, 8
-
-	beqz	a8, .Lmul_xexpzero
-.Lmul_xnormalized:	
-	beqz	a9, .Lmul_yexpzero
-.Lmul_ynormalized:	
-
-	/* Add the exponents.  */
-	add	a8, a8, a9
-
-	/* Replace sign/exponent fields with explicit "1.0".  */
-	movi	a10, 0xffffff
-	or	a2, a2, a6
-	and	a2, a2, a10
-	or	a3, a3, a6
-	and	a3, a3, a10
-
-	/* Multiply 32x32 to 64 bits.  The result ends up in a2/a6.  */
-
-#if XCHAL_HAVE_MUL32_HIGH
-
-	mull	a6, a2, a3
-	muluh	a2, a2, a3
-
-#else
-
-	/* Break the inputs into 16-bit chunks and compute 4 32-bit partial
-	   products.  These partial products are:
-
-		0 xl * yl
-
-		1 xl * yh
-		2 xh * yl
-
-		3 xh * yh
-
-	   If using the Mul16 or Mul32 multiplier options, these input
-	   chunks must be stored in separate registers.  For Mac16, the
-	   UMUL.AA.* opcodes can specify that the inputs come from either
-	   half of the registers, so there is no need to shift them out
-	   ahead of time.  If there is no multiply hardware, the 16-bit
-	   chunks can be extracted when setting up the arguments to the
-	   separate multiply function.  */
-
-#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
-	/* Calling a separate multiply function will clobber a0 and requires
-	   use of a8 as a temporary, so save those values now.  (The function
-	   uses a custom ABI so nothing else needs to be saved.)  */
-	s32i	a0, sp, 0
-	s32i	a8, sp, 4
-#endif
-
-#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
-
-#define a2h a4
-#define a3h a5
-
-	/* Get the high halves of the inputs into registers.  */
-	srli	a2h, a2, 16
-	srli	a3h, a3, 16
-
-#define a2l a2
-#define a3l a3
-
-#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
-	/* Clear the high halves of the inputs.  This does not matter
-	   for MUL16 because the high bits are ignored.  */
-	extui	a2, a2, 0, 16
-	extui	a3, a3, 0, 16
-#endif
-#endif /* MUL16 || MUL32 */
-
-
-#if XCHAL_HAVE_MUL16
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-	mul16u	dst, xreg ## xhalf, yreg ## yhalf
-
-#elif XCHAL_HAVE_MUL32
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-	mull	dst, xreg ## xhalf, yreg ## yhalf
-
-#elif XCHAL_HAVE_MAC16
-
-/* The preprocessor insists on inserting a space when concatenating after
-   a period in the definition of do_mul below.  These macros are a workaround
-   using underscores instead of periods when doing the concatenation.  */
-#define umul_aa_ll umul.aa.ll
-#define umul_aa_lh umul.aa.lh
-#define umul_aa_hl umul.aa.hl
-#define umul_aa_hh umul.aa.hh
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
-	rsr	dst, ACCLO
-
-#else /* no multiply hardware */
-	
-#define set_arg_l(dst, src) \
-	extui	dst, src, 0, 16
-#define set_arg_h(dst, src) \
-	srli	dst, src, 16
-
-#if __XTENSA_CALL0_ABI__
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-	set_arg_ ## xhalf (a13, xreg); \
-	set_arg_ ## yhalf (a14, yreg); \
-	call0	.Lmul_mulsi3; \
-	mov	dst, a12
-#else
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-	set_arg_ ## xhalf (a14, xreg); \
-	set_arg_ ## yhalf (a15, yreg); \
-	call12	.Lmul_mulsi3; \
-	mov	dst, a14
-#endif /* __XTENSA_CALL0_ABI__ */
-
-#endif /* no multiply hardware */
-
-	/* Add pp1 and pp2 into a6 with carry-out in a9.  */
-	do_mul(a6, a2, l, a3, h)	/* pp 1 */
-	do_mul(a11, a2, h, a3, l)	/* pp 2 */
-	movi	a9, 0
-	add	a6, a6, a11
-	bgeu	a6, a11, 1f
-	addi	a9, a9, 1
-1:
-	/* Shift the high half of a9/a6 into position in a9.  Note that
-	   this value can be safely incremented without any carry-outs.  */
-	ssai	16
-	src	a9, a9, a6
-
-	/* Compute the low word into a6.  */
-	do_mul(a11, a2, l, a3, l)	/* pp 0 */
-	sll	a6, a6
-	add	a6, a6, a11
-	bgeu	a6, a11, 1f
-	addi	a9, a9, 1
-1:
-	/* Compute the high word into a2.  */
-	do_mul(a2, a2, h, a3, h)	/* pp 3 */
-	add	a2, a2, a9
-	
-#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
-	/* Restore values saved on the stack during the multiplication.  */
-	l32i	a0, sp, 0
-	l32i	a8, sp, 4
-#endif
-#endif /* ! XCHAL_HAVE_MUL32_HIGH */
-
-	/* Shift left by 9 bits, unless there was a carry-out from the
-	   multiply, in which case, shift by 8 bits and increment the
-	   exponent.  */
-	movi	a4, 9
-	srli	a5, a2, 24 - 9
-	beqz	a5, 1f
-	addi	a4, a4, -1
-	addi	a8, a8, 1
-1:	ssl	a4
-	src	a2, a2, a6
-	sll	a6, a6
-
-	/* Subtract the extra bias from the exponent sum (plus one to account
-	   for the explicit "1.0" of the mantissa that will be added to the
-	   exponent in the final result).  */
-	movi	a4, 0x80
-	sub	a8, a8, a4
-	
-	/* Check for over/underflow.  The value in a8 is one less than the
-	   final exponent, so values in the range 0..fd are OK here.  */
-	movi	a4, 0xfe
-	bgeu	a8, a4, .Lmul_overflow
-	
-.Lmul_round:
-	/* Round.  */
-	bgez	a6, .Lmul_rounded
-	addi	a2, a2, 1
-	slli	a6, a6, 1
-	beqz	a6, .Lmul_exactlyhalf
-
-.Lmul_rounded:
-	/* Add the exponent to the mantissa.  */
-	slli	a8, a8, 23
-	add	a2, a2, a8
-
-.Lmul_addsign:
-	/* Add the sign bit.  */
-	srli	a7, a7, 31
-	slli	a7, a7, 31
-	or	a2, a2, a7
-
-.Lmul_done:
-#if __XTENSA_CALL0_ABI__
-	l32i	a12, sp, 16
-	l32i	a13, sp, 20
-	l32i	a14, sp, 24
-	l32i	a15, sp, 28
-	addi	sp, sp, 32
-#endif
-	leaf_return
-
-.Lmul_exactlyhalf:
-	/* Round down to the nearest even value.  */
-	srli	a2, a2, 1
-	slli	a2, a2, 1
-	j	.Lmul_rounded
-
-.Lmul_overflow:
-	bltz	a8, .Lmul_underflow
-	/* Return +/- Infinity.  */
-	movi	a8, 0xff
-	slli	a2, a8, 23
-	j	.Lmul_addsign
-
-.Lmul_underflow:
-	/* Create a subnormal value, where the exponent field contains zero,
-	   but the effective exponent is 1.  The value of a8 is one less than
-	   the actual exponent, so just negate it to get the shift amount.  */
-	neg	a8, a8
-	mov	a9, a6
-	ssr	a8
-	bgeui	a8, 32, .Lmul_flush_to_zero
-	
-	/* Shift a2 right.  Any bits that are shifted out of a2 are saved
-	   in a6 (combined with the shifted-out bits currently in a6) for
-	   rounding the result.  */
-	sll	a6, a2
-	srl	a2, a2
-
-	/* Set the exponent to zero.  */
-	movi	a8, 0
-
-	/* Pack any nonzero bits shifted out into a6.  */
-	beqz	a9, .Lmul_round
-	movi	a9, 1
-	or	a6, a6, a9
-	j	.Lmul_round
-	
-.Lmul_flush_to_zero:
-	/* Return zero with the appropriate sign bit.  */
-	srli	a2, a7, 31
-	slli	a2, a2, 31
-	j	.Lmul_done
-
-#if XCHAL_NO_MUL
-	
-	/* For Xtensa processors with no multiply hardware, this simplified
-	   version of _mulsi3 is used for multiplying 16-bit chunks of
-	   the floating-point mantissas.  When using CALL0, this function
-	   uses a custom ABI: the inputs are passed in a13 and a14, the
-	   result is returned in a12, and a8 and a15 are clobbered.  */
-	.align	4
-.Lmul_mulsi3:
-	leaf_entry sp, 16
-	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
-	movi	\dst, 0
-1:	add	\tmp1, \src2, \dst
-	extui	\tmp2, \src1, 0, 1
-	movnez	\dst, \tmp1, \tmp2
-
-	do_addx2 \tmp1, \src2, \dst, \tmp1
-	extui	\tmp2, \src1, 1, 1
-	movnez	\dst, \tmp1, \tmp2
-
-	do_addx4 \tmp1, \src2, \dst, \tmp1
-	extui	\tmp2, \src1, 2, 1
-	movnez	\dst, \tmp1, \tmp2
-
-	do_addx8 \tmp1, \src2, \dst, \tmp1
-	extui	\tmp2, \src1, 3, 1
-	movnez	\dst, \tmp1, \tmp2
-
-	srli	\src1, \src1, 4
-	slli	\src2, \src2, 4
-	bnez	\src1, 1b
-	.endm
-#if __XTENSA_CALL0_ABI__
-	mul_mulsi3_body a12, a13, a14, a15, a8
-#else
-	/* The result will be written into a2, so save that argument in a4.  */
-	mov	a4, a2
-	mul_mulsi3_body a2, a4, a3, a5, a6
-#endif
-	leaf_return
-#endif /* XCHAL_NO_MUL */
-#endif /* L_mulsf3 */
-
-#ifdef L_divsf3
-
-	/* Division */
-__divsf3_aux:
-
-	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
-	   (This code is placed before the start of the function just to
-	   keep it in range of the limited branch displacements.)  */
-
-.Ldiv_yexpzero:
-	/* Clear the sign bit of y.  */
-	slli	a3, a3, 1
-	srli	a3, a3, 1
-
-	/* Check for division by zero.  */
-	beqz	a3, .Ldiv_yzero
-
-	/* Normalize y.  Adjust the exponent in a9.  */
-	do_nsau	a10, a3, a4, a5
-	addi	a10, a10, -8
-	ssl	a10
-	sll	a3, a3
-	movi	a9, 1
-	sub	a9, a9, a10
-	j	.Ldiv_ynormalized	
-
-.Ldiv_yzero:
-	/* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
-	slli	a4, a2, 1
-	srli	a4, a4, 1
-	srli	a2, a7, 31
-	slli	a2, a2, 31
-	or	a2, a2, a6
-	bnez	a4, 1f
-	movi	a4, 0x400000	/* make it a quiet NaN */
-	or	a2, a2, a4
-1:	leaf_return
-
-.Ldiv_xexpzero:
-	/* Clear the sign bit of x.  */
-	slli	a2, a2, 1
-	srli	a2, a2, 1
-
-	/* If x is zero, return zero.  */
-	beqz	a2, .Ldiv_return_zero
-
-	/* Normalize x.  Adjust the exponent in a8.  */
-	do_nsau	a10, a2, a4, a5
-	addi	a10, a10, -8
-	ssl	a10
-	sll	a2, a2
-	movi	a8, 1
-	sub	a8, a8, a10
-	j	.Ldiv_xnormalized	
-	
-.Ldiv_return_zero:
-	/* Return zero with the appropriate sign bit.  */
-	srli	a2, a7, 31
-	slli	a2, a2, 31
-	leaf_return
-
-.Ldiv_xnan_or_inf:
-	/* Set the sign bit of the result.  */
-	srli	a7, a3, 31
-	slli	a7, a7, 31
-	xor	a2, a2, a7
-	/* If y is NaN or Inf, return NaN.  */
-	bnall	a3, a6, 1f
-	movi	a4, 0x400000	/* make it a quiet NaN */
-	or	a2, a2, a4
-1:	leaf_return
-
-.Ldiv_ynan_or_inf:
-	/* If y is Infinity, return zero.  */
-	slli	a8, a3, 9
-	beqz	a8, .Ldiv_return_zero
-	/* y is NaN; return it.  */
-	mov	a2, a3
-	leaf_return
-
-	.align	4
-	.global	__divsf3
-	.type	__divsf3, @function
-__divsf3:
-	leaf_entry sp, 16
-	movi	a6, 0x7f800000
-
-	/* Get the sign of the result.  */
-	xor	a7, a2, a3
-
-	/* Check for NaN and infinity.  */
-	ball	a2, a6, .Ldiv_xnan_or_inf
-	ball	a3, a6, .Ldiv_ynan_or_inf
-
-	/* Extract the exponents.  */
-	extui	a8, a2, 23, 8
-	extui	a9, a3, 23, 8
-
-	beqz	a9, .Ldiv_yexpzero
-.Ldiv_ynormalized:	
-	beqz	a8, .Ldiv_xexpzero
-.Ldiv_xnormalized:	
-
-	/* Subtract the exponents.  */
-	sub	a8, a8, a9
-
-	/* Replace sign/exponent fields with explicit "1.0".  */
-	movi	a10, 0xffffff
-	or	a2, a2, a6
-	and	a2, a2, a10
-	or	a3, a3, a6
-	and	a3, a3, a10
-
-	/* The first digit of the mantissa division must be a one.
-	   Shift x (and adjust the exponent) as needed to make this true.  */
-	bltu	a3, a2, 1f
-	slli	a2, a2, 1
-	addi	a8, a8, -1
-1:
-	/* Do the first subtraction and shift.  */
-	sub	a2, a2, a3
-	slli	a2, a2, 1
-
-	/* Put the quotient into a10.  */
-	movi	a10, 1
-
-	/* Divide one bit at a time for 23 bits.  */
-	movi	a9, 23
-#if XCHAL_HAVE_LOOPS
-	loop	a9, .Ldiv_loopend
-#endif
-.Ldiv_loop:
-	/* Shift the quotient << 1.  */
-	slli	a10, a10, 1
-
-	/* Is this digit a 0 or 1?  */
-	bltu	a2, a3, 1f
-
-	/* Output a 1 and subtract.  */
-	addi	a10, a10, 1
-	sub	a2, a2, a3
-
-	/* Shift the dividend << 1.  */
-1:	slli	a2, a2, 1
-
-#if !XCHAL_HAVE_LOOPS
-	addi	a9, a9, -1
-	bnez	a9, .Ldiv_loop
-#endif
-.Ldiv_loopend:
-
-	/* Add the exponent bias (less one to account for the explicit "1.0"
-	   of the mantissa that will be added to the exponent in the final
-	   result).  */
-	addi	a8, a8, 0x7e
-	
-	/* Check for over/underflow.  The value in a8 is one less than the
-	   final exponent, so values in the range 0..fd are OK here.  */
-	movi	a4, 0xfe
-	bgeu	a8, a4, .Ldiv_overflow
-	
-.Ldiv_round:
-	/* Round.  The remainder (<< 1) is in a2.  */
-	bltu	a2, a3, .Ldiv_rounded
-	addi	a10, a10, 1
-	beq	a2, a3, .Ldiv_exactlyhalf
-
-.Ldiv_rounded:
-	/* Add the exponent to the mantissa.  */
-	slli	a8, a8, 23
-	add	a2, a10, a8
-
-.Ldiv_addsign:
-	/* Add the sign bit.  */
-	srli	a7, a7, 31
-	slli	a7, a7, 31
-	or	a2, a2, a7
-	leaf_return
-
-.Ldiv_overflow:
-	bltz	a8, .Ldiv_underflow
-	/* Return +/- Infinity.  */
-	addi	a8, a4, 1	/* 0xff */
-	slli	a2, a8, 23
-	j	.Ldiv_addsign
-
-.Ldiv_exactlyhalf:
-	/* Remainder is exactly half the divisor.  Round even.  */
-	srli	a10, a10, 1
-	slli	a10, a10, 1
-	j	.Ldiv_rounded
-
-.Ldiv_underflow:
-	/* Create a subnormal value, where the exponent field contains zero,
-	   but the effective exponent is 1.  The value of a8 is one less than
-	   the actual exponent, so just negate it to get the shift amount.  */
-	neg	a8, a8
-	ssr	a8
-	bgeui	a8, 32, .Ldiv_flush_to_zero
-	
-	/* Shift a10 right.  Any bits that are shifted out of a10 are
-	   saved in a6 for rounding the result.  */
-	sll	a6, a10
-	srl	a10, a10
-
-	/* Set the exponent to zero.  */
-	movi	a8, 0
-
-	/* Pack any nonzero remainder (in a2) into a6.  */
-	beqz	a2, 1f
-	movi	a9, 1
-	or	a6, a6, a9
-	
-	/* Round a10 based on the bits shifted out into a6.  */
-1:	bgez	a6, .Ldiv_rounded
-	addi	a10, a10, 1
-	slli	a6, a6, 1
-	bnez	a6, .Ldiv_rounded
-	srli	a10, a10, 1
-	slli	a10, a10, 1
-	j	.Ldiv_rounded
-
-.Ldiv_flush_to_zero:
-	/* Return zero with the appropriate sign bit.  */
-	srli	a2, a7, 31
-	slli	a2, a2, 31
-	leaf_return
-
-#endif /* L_divsf3 */
-
-#ifdef L_cmpsf2
-
-	/* Equal and Not Equal */
-
-	.align	4
-	.global	__eqsf2
-	.global	__nesf2
-	.set	__nesf2, __eqsf2
-	.type	__eqsf2, @function
-__eqsf2:
-	leaf_entry sp, 16
-	bne	a2, a3, 4f
-
-	/* The values are equal but NaN != NaN.  Check the exponent.  */
-	movi	a6, 0x7f800000
-	ball	a2, a6, 3f
-
-	/* Equal.  */
-	movi	a2, 0
-	leaf_return
-
-	/* Not equal.  */
-2:	movi	a2, 1
-	leaf_return
-
-	/* Check if the mantissas are nonzero.  */
-3:	slli	a7, a2, 9
-	j	5f
-
-	/* Check if x and y are zero with different signs.  */
-4:	or	a7, a2, a3
-	slli	a7, a7, 1
-
-	/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
-	   or x when exponent(x) = 0x7f8 and x == y.  */
-5:	movi	a2, 0
-	movi	a3, 1
-	movnez	a2, a3, a7	
-	leaf_return
-
-
-	/* Greater Than */
-
-	.align	4
-	.global	__gtsf2
-	.type	__gtsf2, @function
-__gtsf2:
-	leaf_entry sp, 16
-	movi	a6, 0x7f800000
-	ball	a2, a6, 2f
-1:	bnall	a3, a6, .Lle_cmp
-
-	/* Check if y is a NaN.  */
-	slli	a7, a3, 9
-	beqz	a7, .Lle_cmp
-	movi	a2, 0
-	leaf_return
-
-	/* Check if x is a NaN.  */
-2:	slli	a7, a2, 9
-	beqz	a7, 1b
-	movi	a2, 0
-	leaf_return
-
-
-	/* Less Than or Equal */
-
-	.align	4
-	.global	__lesf2
-	.type	__lesf2, @function
-__lesf2:
-	leaf_entry sp, 16
-	movi	a6, 0x7f800000
-	ball	a2, a6, 2f
-1:	bnall	a3, a6, .Lle_cmp
-
-	/* Check if y is a NaN.  */
-	slli	a7, a3, 9
-	beqz	a7, .Lle_cmp
-	movi	a2, 1
-	leaf_return
-
-	/* Check if x is a NaN.  */
-2:	slli	a7, a2, 9
-	beqz	a7, 1b
-	movi	a2, 1
-	leaf_return
-
-.Lle_cmp:
-	/* Check if x and y have different signs.  */
-	xor	a7, a2, a3
-	bltz	a7, .Lle_diff_signs
-
-	/* Check if x is negative.  */
-	bltz	a2, .Lle_xneg
-
-	/* Check if x <= y.  */
-	bltu	a3, a2, 5f
-4:	movi	a2, 0
-	leaf_return
-
-.Lle_xneg:
-	/* Check if y <= x.  */
-	bgeu	a2, a3, 4b
-5:	movi	a2, 1
-	leaf_return
-
-.Lle_diff_signs:
-	bltz	a2, 4b
-
-	/* Check if both x and y are zero.  */
-	or	a7, a2, a3
-	slli	a7, a7, 1
-	movi	a2, 1
-	movi	a3, 0
-	moveqz	a2, a3, a7
-	leaf_return
-
-
-	/* Greater Than or Equal */
-
-	.align	4
-	.global	__gesf2
-	.type	__gesf2, @function
-__gesf2:
-	leaf_entry sp, 16
-	movi	a6, 0x7f800000
-	ball	a2, a6, 2f
-1:	bnall	a3, a6, .Llt_cmp
-
-	/* Check if y is a NaN.  */
-	slli	a7, a3, 9
-	beqz	a7, .Llt_cmp
-	movi	a2, -1
-	leaf_return
-
-	/* Check if x is a NaN.  */
-2:	slli	a7, a2, 9
-	beqz	a7, 1b
-	movi	a2, -1
-	leaf_return
-
-
-	/* Less Than */
-
-	.align	4
-	.global	__ltsf2
-	.type	__ltsf2, @function
-__ltsf2:
-	leaf_entry sp, 16
-	movi	a6, 0x7f800000
-	ball	a2, a6, 2f
-1:	bnall	a3, a6, .Llt_cmp
-
-	/* Check if y is a NaN.  */
-	slli	a7, a3, 9
-	beqz	a7, .Llt_cmp
-	movi	a2, 0
-	leaf_return
-
-	/* Check if x is a NaN.  */
-2:	slli	a7, a2, 9
-	beqz	a7, 1b
-	movi	a2, 0
-	leaf_return
-
-.Llt_cmp:
-	/* Check if x and y have different signs.  */
-	xor	a7, a2, a3
-	bltz	a7, .Llt_diff_signs
-
-	/* Check if x is negative.  */
-	bltz	a2, .Llt_xneg
-
-	/* Check if x < y.  */
-	bgeu	a2, a3, 5f
-4:	movi	a2, -1
-	leaf_return
-
-.Llt_xneg:
-	/* Check if y < x.  */
-	bltu	a3, a2, 4b
-5:	movi	a2, 0
-	leaf_return
-
-.Llt_diff_signs:
-	bgez	a2, 5b
-
-	/* Check if both x and y are nonzero.  */
-	or	a7, a2, a3
-	slli	a7, a7, 1
-	movi	a2, 0
-	movi	a3, -1
-	movnez	a2, a3, a7
-	leaf_return
-
-
-	/* Unordered */
-
-	.align	4
-	.global	__unordsf2
-	.type	__unordsf2, @function
-__unordsf2:
-	leaf_entry sp, 16
-	movi	a6, 0x7f800000
-	ball	a2, a6, 3f
-1:	ball	a3, a6, 4f
-2:	movi	a2, 0
-	leaf_return
-
-3:	slli	a7, a2, 9
-	beqz	a7, 1b
-	movi	a2, 1
-	leaf_return
-
-4:	slli	a7, a3, 9
-	beqz	a7, 2b
-	movi	a2, 1
-	leaf_return
-
-#endif /* L_cmpsf2 */
-
-#ifdef L_fixsfsi
-
-	.align	4
-	.global	__fixsfsi
-	.type	__fixsfsi, @function
-__fixsfsi:
-	leaf_entry sp, 16
-
-	/* Check for NaN and Infinity.  */
-	movi	a6, 0x7f800000
-	ball	a2, a6, .Lfixsfsi_nan_or_inf
-
-	/* Extract the exponent and check if 0 < (exp - 0x7e) < 32.  */
-	extui	a4, a2, 23, 8
-	addi	a4, a4, -0x7e
-	bgei	a4, 32, .Lfixsfsi_maxint
-	blti	a4, 1, .Lfixsfsi_zero
-
-	/* Add explicit "1.0" and shift << 8.  */
-	or	a7, a2, a6
-	slli	a5, a7, 8
-
-	/* Shift back to the right, based on the exponent.  */
-	ssl	a4		/* shift by 32 - a4 */
-	srl	a5, a5
-
-	/* Negate the result if sign != 0.  */
-	neg	a2, a5
-	movgez	a2, a5, a7
-	leaf_return
-
-.Lfixsfsi_nan_or_inf:
-	/* Handle Infinity and NaN.  */
-	slli	a4, a2, 9
-	beqz	a4, .Lfixsfsi_maxint
-
-	/* Translate NaN to +maxint.  */
-	movi	a2, 0
-
-.Lfixsfsi_maxint:
-	slli	a4, a6, 8	/* 0x80000000 */
-	addi	a5, a4, -1	/* 0x7fffffff */
-	movgez	a4, a5, a2
-	mov	a2, a4
-	leaf_return
-
-.Lfixsfsi_zero:
-	movi	a2, 0
-	leaf_return
-
-#endif /* L_fixsfsi */
-
-#ifdef L_fixsfdi
-
-	.align	4
-	.global	__fixsfdi
-	.type	__fixsfdi, @function
-__fixsfdi:
-	leaf_entry sp, 16
-
-	/* Check for NaN and Infinity.  */
-	movi	a6, 0x7f800000
-	ball	a2, a6, .Lfixsfdi_nan_or_inf
-
-	/* Extract the exponent and check if 0 < (exp - 0x7e) < 64.  */
-	extui	a4, a2, 23, 8
-	addi	a4, a4, -0x7e
-	bgei	a4, 64, .Lfixsfdi_maxint
-	blti	a4, 1, .Lfixsfdi_zero
-
-	/* Add explicit "1.0" and shift << 8.  */
-	or	a7, a2, a6
-	slli	xh, a7, 8
-
-	/* Shift back to the right, based on the exponent.  */
-	ssl	a4		/* shift by 64 - a4 */
-	bgei	a4, 32, .Lfixsfdi_smallshift
-	srl	xl, xh
-	movi	xh, 0
-
-.Lfixsfdi_shifted:	
-	/* Negate the result if sign != 0.  */
-	bgez	a7, 1f
-	neg	xl, xl
-	neg	xh, xh
-	beqz	xl, 1f
-	addi	xh, xh, -1
-1:	leaf_return
-
-.Lfixsfdi_smallshift:
-	movi	xl, 0
-	sll	xl, xh
-	srl	xh, xh
-	j	.Lfixsfdi_shifted
-
-.Lfixsfdi_nan_or_inf:
-	/* Handle Infinity and NaN.  */
-	slli	a4, a2, 9
-	beqz	a4, .Lfixsfdi_maxint
-
-	/* Translate NaN to +maxint.  */
-	movi	a2, 0
-
-.Lfixsfdi_maxint:
-	slli	a7, a6, 8	/* 0x80000000 */
-	bgez	a2, 1f
-	mov	xh, a7
-	movi	xl, 0
-	leaf_return
-
-1:	addi	xh, a7, -1	/* 0x7fffffff */
-	movi	xl, -1
-	leaf_return
-
-.Lfixsfdi_zero:
-	movi	xh, 0
-	movi	xl, 0
-	leaf_return
-
-#endif /* L_fixsfdi */
-
-#ifdef L_fixunssfsi
-
-	.align	4
-	.global	__fixunssfsi
-	.type	__fixunssfsi, @function
-__fixunssfsi:
-	leaf_entry sp, 16
-
-	/* Check for NaN and Infinity.  */
-	movi	a6, 0x7f800000
-	ball	a2, a6, .Lfixunssfsi_nan_or_inf
-
-	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 32.  */
-	extui	a4, a2, 23, 8
-	addi	a4, a4, -0x7f
-	bgei	a4, 32, .Lfixunssfsi_maxint
-	bltz	a4, .Lfixunssfsi_zero
-
-	/* Add explicit "1.0" and shift << 8.  */
-	or	a7, a2, a6
-	slli	a5, a7, 8
-
-	/* Shift back to the right, based on the exponent.  */
-	addi	a4, a4, 1
-	beqi	a4, 32, .Lfixunssfsi_bigexp
-	ssl	a4		/* shift by 32 - a4 */
-	srl	a5, a5
-
-	/* Negate the result if sign != 0.  */
-	neg	a2, a5
-	movgez	a2, a5, a7
-	leaf_return
-
-.Lfixunssfsi_nan_or_inf:
-	/* Handle Infinity and NaN.  */
-	slli	a4, a2, 9
-	beqz	a4, .Lfixunssfsi_maxint
-
-	/* Translate NaN to 0xffffffff.  */
-	movi	a2, -1
-	leaf_return
-
-.Lfixunssfsi_maxint:
-	slli	a4, a6, 8	/* 0x80000000 */
-	movi	a5, -1		/* 0xffffffff */
-	movgez	a4, a5, a2
-	mov	a2, a4
-	leaf_return
-
-.Lfixunssfsi_zero:
-	movi	a2, 0
-	leaf_return
-
-.Lfixunssfsi_bigexp:
-	/* Handle unsigned maximum exponent case.  */
-	bltz	a2, 1f
-	mov	a2, a5		/* no shift needed */
-	leaf_return
-
-	/* Return 0x80000000 if negative.  */
-1:	slli	a2, a6, 8
-	leaf_return
-
-#endif /* L_fixunssfsi */
-
-#ifdef L_fixunssfdi
-
-	.align	4
-	.global	__fixunssfdi
-	.type	__fixunssfdi, @function
-__fixunssfdi:
-	leaf_entry sp, 16
-
-	/* Check for NaN and Infinity.  */
-	movi	a6, 0x7f800000
-	ball	a2, a6, .Lfixunssfdi_nan_or_inf
-
-	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 64.  */
-	extui	a4, a2, 23, 8
-	addi	a4, a4, -0x7f
-	bgei	a4, 64, .Lfixunssfdi_maxint
-	bltz	a4, .Lfixunssfdi_zero
-
-	/* Add explicit "1.0" and shift << 8.  */
-	or	a7, a2, a6
-	slli	xh, a7, 8
-
-	/* Shift back to the right, based on the exponent.  */
-	addi	a4, a4, 1
-	beqi	a4, 64, .Lfixunssfdi_bigexp
-	ssl	a4		/* shift by 64 - a4 */
-	bgei	a4, 32, .Lfixunssfdi_smallshift
-	srl	xl, xh
-	movi	xh, 0
-
-.Lfixunssfdi_shifted:
-	/* Negate the result if sign != 0.  */
-	bgez	a7, 1f
-	neg	xl, xl
-	neg	xh, xh
-	beqz	xl, 1f
-	addi	xh, xh, -1
-1:	leaf_return
-
-.Lfixunssfdi_smallshift:
-	movi	xl, 0
-	src	xl, xh, xl
-	srl	xh, xh
-	j	.Lfixunssfdi_shifted
-
-.Lfixunssfdi_nan_or_inf:
-	/* Handle Infinity and NaN.  */
-	slli	a4, a2, 9
-	beqz	a4, .Lfixunssfdi_maxint
-
-	/* Translate NaN to 0xffffffff.... */
-1:	movi	xh, -1
-	movi	xl, -1
-	leaf_return
-
-.Lfixunssfdi_maxint:
-	bgez	a2, 1b
-2:	slli	xh, a6, 8	/* 0x80000000 */
-	movi	xl, 0
-	leaf_return
-
-.Lfixunssfdi_zero:
-	movi	xh, 0
-	movi	xl, 0
-	leaf_return
-
-.Lfixunssfdi_bigexp:
-	/* Handle unsigned maximum exponent case.  */
-	bltz	a7, 2b
-	movi	xl, 0
-	leaf_return		/* no shift needed */
-
-#endif /* L_fixunssfdi */
-
-#ifdef L_floatsisf
-
-	.align	4
-	.global	__floatunsisf
-	.type	__floatunsisf, @function
-__floatunsisf:
-	leaf_entry sp, 16
-	beqz	a2, .Lfloatsisf_return
-
-	/* Set the sign to zero and jump to the floatsisf code.  */
-	movi	a7, 0
-	j	.Lfloatsisf_normalize
-
-	.align	4
-	.global	__floatsisf
-	.type	__floatsisf, @function
-__floatsisf:
-	leaf_entry sp, 16
-
-	/* Check for zero.  */
-	beqz	a2, .Lfloatsisf_return
-
-	/* Save the sign.  */
-	extui	a7, a2, 31, 1
-
-	/* Get the absolute value.  */
-#if XCHAL_HAVE_ABS
-	abs	a2, a2
-#else
-	neg	a4, a2
-	movltz	a2, a4, a2
-#endif
-
-.Lfloatsisf_normalize:
-	/* Normalize with the first 1 bit in the msb.  */
-	do_nsau	a4, a2, a5, a6
-	ssl	a4
-	sll	a5, a2
-
-	/* Shift the mantissa into position, with rounding bits in a6.  */
-	srli	a2, a5, 8
-	slli	a6, a5, (32 - 8)
-
-	/* Set the exponent.  */
-	movi	a5, 0x9d	/* 0x7e + 31 */
-	sub	a5, a5, a4
-	slli	a5, a5, 23
-	add	a2, a2, a5
-
-	/* Add the sign.  */
-	slli	a7, a7, 31
-	or	a2, a2, a7
-
-	/* Round up if the leftover fraction is >= 1/2.  */
-	bgez	a6, .Lfloatsisf_return
-	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
-
-	/* Check if the leftover fraction is exactly 1/2.  */
-	slli	a6, a6, 1
-	beqz	a6, .Lfloatsisf_exactlyhalf
-
-.Lfloatsisf_return:
-	leaf_return
-
-.Lfloatsisf_exactlyhalf:
-	/* Round down to the nearest even value.  */
-	srli	a2, a2, 1
-	slli	a2, a2, 1
-	leaf_return
-
-#endif /* L_floatsisf */
-
-#ifdef L_floatdisf
-
-	.align	4
-	.global	__floatundisf
-	.type	__floatundisf, @function
-__floatundisf:
-	leaf_entry sp, 16
-
-	/* Check for zero.  */
-	or	a4, xh, xl
-	beqz	a4, 2f
-
-	/* Set the sign to zero and jump to the floatdisf code.  */
-	movi	a7, 0
-	j	.Lfloatdisf_normalize
-
-	.align	4
-	.global	__floatdisf
-	.type	__floatdisf, @function
-__floatdisf:
-	leaf_entry sp, 16
-
-	/* Check for zero.  */
-	or	a4, xh, xl
-	beqz	a4, 2f
-
-	/* Save the sign.  */
-	extui	a7, xh, 31, 1
-
-	/* Get the absolute value.  */
-	bgez	xh, .Lfloatdisf_normalize
-	neg	xl, xl
-	neg	xh, xh
-	beqz	xl, .Lfloatdisf_normalize
-	addi	xh, xh, -1
-
-.Lfloatdisf_normalize:
-	/* Normalize with the first 1 bit in the msb of xh.  */
-	beqz	xh, .Lfloatdisf_bigshift
-	do_nsau	a4, xh, a5, a6
-	ssl	a4
-	src	xh, xh, xl
-	sll	xl, xl
-
-.Lfloatdisf_shifted:
-	/* Shift the mantissa into position, with rounding bits in a6.  */
-	ssai	8
-	sll	a5, xl
-	src	a6, xh, xl
-	srl	xh, xh
-	beqz	a5, 1f
-	movi	a5, 1
-	or	a6, a6, a5
-1:
-	/* Set the exponent.  */
-	movi	a5, 0xbd	/* 0x7e + 63 */
-	sub	a5, a5, a4
-	slli	a5, a5, 23
-	add	a2, xh, a5
-
-	/* Add the sign.  */
-	slli	a7, a7, 31
-	or	a2, a2, a7
-
-	/* Round up if the leftover fraction is >= 1/2.  */
-	bgez	a6, 2f
-	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
-
-	/* Check if the leftover fraction is exactly 1/2.  */
-	slli	a6, a6, 1
-	beqz	a6, .Lfloatdisf_exactlyhalf
-2:	leaf_return
-
-.Lfloatdisf_bigshift:
-	/* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
-	do_nsau	a4, xl, a5, a6
-	ssl	a4
-	sll	xh, xl
-	movi	xl, 0
-	addi	a4, a4, 32
-	j	.Lfloatdisf_shifted
-
-.Lfloatdisf_exactlyhalf:
-	/* Round down to the nearest even value.  */
-	srli	a2, a2, 1
-	slli	a2, a2, 1
-	leaf_return
-
-#endif /* L_floatdisf */
diff --git a/gcc/config/xtensa/lib1funcs.asm b/gcc/config/xtensa/lib1funcs.asm
deleted file mode 100644
index 071b9171177..00000000000
--- a/gcc/config/xtensa/lib1funcs.asm
+++ /dev/null
@@ -1,845 +0,0 @@
-/* Assembly functions for the Xtensa version of libgcc1.
-   Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009
-   Free Software Foundation, Inc.
-   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-#include "xtensa-config.h"
-
-/* Define macros for the ABS and ADDX* instructions to handle cases
-   where they are not included in the Xtensa processor configuration.  */
-
-	.macro	do_abs dst, src, tmp
-#if XCHAL_HAVE_ABS
-	abs	\dst, \src
-#else
-	neg	\tmp, \src
-	movgez	\tmp, \src, \src
-	mov	\dst, \tmp
-#endif
-	.endm
-
-	.macro	do_addx2 dst, as, at, tmp
-#if XCHAL_HAVE_ADDX
-	addx2	\dst, \as, \at
-#else
-	slli	\tmp, \as, 1
-	add	\dst, \tmp, \at
-#endif
-	.endm
-
-	.macro	do_addx4 dst, as, at, tmp
-#if XCHAL_HAVE_ADDX
-	addx4	\dst, \as, \at
-#else
-	slli	\tmp, \as, 2
-	add	\dst, \tmp, \at
-#endif
-	.endm
-
-	.macro	do_addx8 dst, as, at, tmp
-#if XCHAL_HAVE_ADDX
-	addx8	\dst, \as, \at
-#else
-	slli	\tmp, \as, 3
-	add	\dst, \tmp, \at
-#endif
-	.endm
-
-/* Define macros for leaf function entry and return, supporting either the
-   standard register windowed ABI or the non-windowed call0 ABI.  These
-   macros do not allocate any extra stack space, so they only work for
-   leaf functions that do not need to spill anything to the stack.  */
-
-	.macro leaf_entry reg, size
-#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
-	entry \reg, \size
-#else
-	/* do nothing */
-#endif
-	.endm
-
-	.macro leaf_return
-#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
-	retw
-#else
-	ret
-#endif
-	.endm
-
-
-#ifdef L_mulsi3
-	.align	4
-	.global	__mulsi3
-	.type	__mulsi3, @function
-__mulsi3:
-	leaf_entry sp, 16
-
-#if XCHAL_HAVE_MUL32
-	mull	a2, a2, a3
-
-#elif XCHAL_HAVE_MUL16
-	or	a4, a2, a3
-	srai	a4, a4, 16
-	bnez	a4, .LMUL16
-	mul16u	a2, a2, a3
-	leaf_return
-.LMUL16:
-	srai	a4, a2, 16
-	srai	a5, a3, 16
-	mul16u	a7, a4, a3
-	mul16u	a6, a5, a2
-	mul16u	a4, a2, a3
-	add	a7, a7, a6
-	slli	a7, a7, 16
-	add	a2, a7, a4
-
-#elif XCHAL_HAVE_MAC16
-	mul.aa.hl a2, a3
-	mula.aa.lh a2, a3
-	rsr	a5, ACCLO
-	umul.aa.ll a2, a3
-	rsr	a4, ACCLO
-	slli	a5, a5, 16
-	add	a2, a4, a5
-
-#else /* !MUL32 && !MUL16 && !MAC16 */
-
-	/* Multiply one bit at a time, but unroll the loop 4x to better
-	   exploit the addx instructions and avoid overhead.
-	   Peel the first iteration to save a cycle on init.  */
-
-	/* Avoid negative numbers.  */
-	xor	a5, a2, a3	/* Top bit is 1 if one input is negative.  */
-	do_abs	a3, a3, a6
-	do_abs	a2, a2, a6
-
-	/* Swap so the second argument is smaller.  */
-	sub	a7, a2, a3
-	mov	a4, a3
-	movgez	a4, a2, a7	/* a4 = max (a2, a3) */
-	movltz	a3, a2, a7	/* a3 = min (a2, a3) */
-
-	movi	a2, 0
-	extui	a6, a3, 0, 1
-	movnez	a2, a4, a6
-
-	do_addx2 a7, a4, a2, a7
-	extui	a6, a3, 1, 1
-	movnez	a2, a7, a6
-
-	do_addx4 a7, a4, a2, a7
-	extui	a6, a3, 2, 1
-	movnez	a2, a7, a6
-
-	do_addx8 a7, a4, a2, a7
-	extui	a6, a3, 3, 1
-	movnez	a2, a7, a6
-
-	bgeui	a3, 16, .Lmult_main_loop
-	neg	a3, a2
-	movltz	a2, a3, a5
-	leaf_return
-
-	.align	4
-.Lmult_main_loop:
-	srli	a3, a3, 4
-	slli	a4, a4, 4
-
-	add	a7, a4, a2
-	extui	a6, a3, 0, 1
-	movnez	a2, a7, a6
-
-	do_addx2 a7, a4, a2, a7
-	extui	a6, a3, 1, 1
-	movnez	a2, a7, a6
-
-	do_addx4 a7, a4, a2, a7
-	extui	a6, a3, 2, 1
-	movnez	a2, a7, a6
-
-	do_addx8 a7, a4, a2, a7
-	extui	a6, a3, 3, 1
-	movnez	a2, a7, a6
-
-	bgeui	a3, 16, .Lmult_main_loop
-
-	neg	a3, a2
-	movltz	a2, a3, a5
-
-#endif /* !MUL32 && !MUL16 && !MAC16 */
-
-	leaf_return
-	.size	__mulsi3, . - __mulsi3
-
-#endif /* L_mulsi3 */
-
-
-#ifdef L_umulsidi3
-
-#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
-#define XCHAL_NO_MUL 1
-#endif
-
-	.align	4
-	.global	__umulsidi3
-	.type	__umulsidi3, @function
-__umulsidi3:
-#if __XTENSA_CALL0_ABI__
-	leaf_entry sp, 32
-	addi	sp, sp, -32
-	s32i	a12, sp, 16
-	s32i	a13, sp, 20
-	s32i	a14, sp, 24
-	s32i	a15, sp, 28
-#elif XCHAL_NO_MUL
-	/* This is not really a leaf function; allocate enough stack space
-	   to allow CALL12s to a helper function.  */
-	leaf_entry sp, 48
-#else
-	leaf_entry sp, 16
-#endif
-
-#ifdef __XTENSA_EB__
-#define wh a2
-#define wl a3
-#else
-#define wh a3
-#define wl a2
-#endif /* __XTENSA_EB__ */
-
-	/* This code is taken from the mulsf3 routine in ieee754-sf.S.
-	   See more comments there.  */
-
-#if XCHAL_HAVE_MUL32_HIGH
-	mull	a6, a2, a3
-	muluh	wh, a2, a3
-	mov	wl, a6
-
-#else /* ! MUL32_HIGH */
-
-#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
-	/* a0 and a8 will be clobbered by calling the multiply function
-	   but a8 is not used here and need not be saved.  */
-	s32i	a0, sp, 0
-#endif
-
-#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
-
-#define a2h a4
-#define a3h a5
-
-	/* Get the high halves of the inputs into registers.  */
-	srli	a2h, a2, 16
-	srli	a3h, a3, 16
-
-#define a2l a2
-#define a3l a3
-
-#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
-	/* Clear the high halves of the inputs.  This does not matter
-	   for MUL16 because the high bits are ignored.  */
-	extui	a2, a2, 0, 16
-	extui	a3, a3, 0, 16
-#endif
-#endif /* MUL16 || MUL32 */
-
-
-#if XCHAL_HAVE_MUL16
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-	mul16u	dst, xreg ## xhalf, yreg ## yhalf
-
-#elif XCHAL_HAVE_MUL32
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-	mull	dst, xreg ## xhalf, yreg ## yhalf
-
-#elif XCHAL_HAVE_MAC16
-
-/* The preprocessor insists on inserting a space when concatenating after
-   a period in the definition of do_mul below.  These macros are a workaround
-   using underscores instead of periods when doing the concatenation.  */
-#define umul_aa_ll umul.aa.ll
-#define umul_aa_lh umul.aa.lh
-#define umul_aa_hl umul.aa.hl
-#define umul_aa_hh umul.aa.hh
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
-	rsr	dst, ACCLO
-
-#else /* no multiply hardware */
-
-#define set_arg_l(dst, src) \
-	extui	dst, src, 0, 16
-#define set_arg_h(dst, src) \
-	srli	dst, src, 16
-
-#if __XTENSA_CALL0_ABI__
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-	set_arg_ ## xhalf (a13, xreg); \
-	set_arg_ ## yhalf (a14, yreg); \
-	call0	.Lmul_mulsi3; \
-	mov	dst, a12
-#else
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-	set_arg_ ## xhalf (a14, xreg); \
-	set_arg_ ## yhalf (a15, yreg); \
-	call12	.Lmul_mulsi3; \
-	mov	dst, a14
-#endif /* __XTENSA_CALL0_ABI__ */
-
-#endif /* no multiply hardware */
-
-	/* Add pp1 and pp2 into a6 with carry-out in a9.  */
-	do_mul(a6, a2, l, a3, h)	/* pp 1 */
-	do_mul(a11, a2, h, a3, l)	/* pp 2 */
-	movi	a9, 0
-	add	a6, a6, a11
-	bgeu	a6, a11, 1f
-	addi	a9, a9, 1
-1:
-	/* Shift the high half of a9/a6 into position in a9.  Note that
-	   this value can be safely incremented without any carry-outs.  */
-	ssai	16
-	src	a9, a9, a6
-
-	/* Compute the low word into a6.  */
-	do_mul(a11, a2, l, a3, l)	/* pp 0 */
-	sll	a6, a6
-	add	a6, a6, a11
-	bgeu	a6, a11, 1f
-	addi	a9, a9, 1
-1:
-	/* Compute the high word into wh.  */
-	do_mul(wh, a2, h, a3, h)	/* pp 3 */
-	add	wh, wh, a9
-	mov	wl, a6
-
-#endif /* !MUL32_HIGH */
-
-#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
-	/* Restore the original return address.  */
-	l32i	a0, sp, 0
-#endif
-#if __XTENSA_CALL0_ABI__
-	l32i	a12, sp, 16
-	l32i	a13, sp, 20
-	l32i	a14, sp, 24
-	l32i	a15, sp, 28
-	addi	sp, sp, 32
-#endif
-	leaf_return
-
-#if XCHAL_NO_MUL
-
-	/* For Xtensa processors with no multiply hardware, this simplified
-	   version of _mulsi3 is used for multiplying 16-bit chunks of
-	   the floating-point mantissas.  When using CALL0, this function
-	   uses a custom ABI: the inputs are passed in a13 and a14, the
-	   result is returned in a12, and a8 and a15 are clobbered.  */
-	.align	4
-.Lmul_mulsi3:
-	leaf_entry sp, 16
-	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
-	movi	\dst, 0
-1:	add	\tmp1, \src2, \dst
-	extui	\tmp2, \src1, 0, 1
-	movnez	\dst, \tmp1, \tmp2
-
-	do_addx2 \tmp1, \src2, \dst, \tmp1
-	extui	\tmp2, \src1, 1, 1
-	movnez	\dst, \tmp1, \tmp2
-
-	do_addx4 \tmp1, \src2, \dst, \tmp1
-	extui	\tmp2, \src1, 2, 1
-	movnez	\dst, \tmp1, \tmp2
-
-	do_addx8 \tmp1, \src2, \dst, \tmp1
-	extui	\tmp2, \src1, 3, 1
-	movnez	\dst, \tmp1, \tmp2
-
-	srli	\src1, \src1, 4
-	slli	\src2, \src2, 4
-	bnez	\src1, 1b
-	.endm
-#if __XTENSA_CALL0_ABI__
-	mul_mulsi3_body a12, a13, a14, a15, a8
-#else
-	/* The result will be written into a2, so save that argument in a4.  */
-	mov	a4, a2
-	mul_mulsi3_body a2, a4, a3, a5, a6
-#endif
-	leaf_return
-#endif /* XCHAL_NO_MUL */
-
-	.size	__umulsidi3, . - __umulsidi3
-
-#endif /* L_umulsidi3 */
-
-
-/* Define a macro for the NSAU (unsigned normalize shift amount)
-   instruction, which computes the number of leading zero bits,
-   to handle cases where it is not included in the Xtensa processor
-   configuration.  */
-
-	.macro	do_nsau cnt, val, tmp, a
-#if XCHAL_HAVE_NSA
-	nsau	\cnt, \val
-#else
-	mov	\a, \val
-	movi	\cnt, 0
-	extui	\tmp, \a, 16, 16
-	bnez	\tmp, 0f
-	movi	\cnt, 16
-	slli	\a, \a, 16
-0:
-	extui	\tmp, \a, 24, 8
-	bnez	\tmp, 1f
-	addi	\cnt, \cnt, 8
-	slli	\a, \a, 8
-1:
-	movi	\tmp, __nsau_data
-	extui	\a, \a, 24, 8
-	add	\tmp, \tmp, \a
-	l8ui	\tmp, \tmp, 0
-	add	\cnt, \cnt, \tmp
-#endif /* !XCHAL_HAVE_NSA */
-	.endm
-
-#ifdef L_clz
-	.section .rodata
-	.align	4
-	.global	__nsau_data
-	.type	__nsau_data, @object
-__nsau_data:
-#if !XCHAL_HAVE_NSA
-	.byte	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
-	.byte	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
-	.byte	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
-	.byte	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
-	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-#endif /* !XCHAL_HAVE_NSA */
-	.size	__nsau_data, . - __nsau_data
-	.hidden	__nsau_data
-#endif /* L_clz */
-
-
-#ifdef L_clzsi2
-	.align	4
-	.global	__clzsi2
-	.type	__clzsi2, @function
-__clzsi2:
-	leaf_entry sp, 16
-	do_nsau	a2, a2, a3, a4
-	leaf_return
-	.size	__clzsi2, . - __clzsi2
-
-#endif /* L_clzsi2 */
-
-
-#ifdef L_ctzsi2
-	.align	4
-	.global	__ctzsi2
-	.type	__ctzsi2, @function
-__ctzsi2:
-	leaf_entry sp, 16
-	neg	a3, a2
-	and	a3, a3, a2
-	do_nsau	a2, a3, a4, a5
-	neg	a2, a2
-	addi	a2, a2, 31
-	leaf_return
-	.size	__ctzsi2, . - __ctzsi2
-
-#endif /* L_ctzsi2 */
-
-
-#ifdef L_ffssi2
-	.align	4
-	.global	__ffssi2
-	.type	__ffssi2, @function
-__ffssi2:
-	leaf_entry sp, 16
-	neg	a3, a2
-	and	a3, a3, a2
-	do_nsau	a2, a3, a4, a5
-	neg	a2, a2
-	addi	a2, a2, 32
-	leaf_return
-	.size	__ffssi2, . - __ffssi2
-
-#endif /* L_ffssi2 */
-
-
-#ifdef L_udivsi3
-	.align	4
-	.global	__udivsi3
-	.type	__udivsi3, @function
-__udivsi3:
-	leaf_entry sp, 16
-#if XCHAL_HAVE_DIV32
-	quou	a2, a2, a3
-#else
-	bltui	a3, 2, .Lle_one	/* check if the divisor <= 1 */
-
-	mov	a6, a2		/* keep dividend in a6 */
-	do_nsau	a5, a6, a2, a7	/* dividend_shift = nsau (dividend) */
-	do_nsau	a4, a3, a2, a7	/* divisor_shift = nsau (divisor) */
-	bgeu	a5, a4, .Lspecial
-
-	sub	a4, a4, a5	/* count = divisor_shift - dividend_shift */
-	ssl	a4
-	sll	a3, a3		/* divisor <<= count */
-	movi	a2, 0		/* quotient = 0 */
-
-	/* test-subtract-and-shift loop; one quotient bit on each iteration */
-#if XCHAL_HAVE_LOOPS
-	loopnez	a4, .Lloopend
-#endif /* XCHAL_HAVE_LOOPS */
-.Lloop:
-	bltu	a6, a3, .Lzerobit
-	sub	a6, a6, a3
-	addi	a2, a2, 1
-.Lzerobit:
-	slli	a2, a2, 1
-	srli	a3, a3, 1
-#if !XCHAL_HAVE_LOOPS
-	addi	a4, a4, -1
-	bnez	a4, .Lloop
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lloopend:
-
-	bltu	a6, a3, .Lreturn
-	addi	a2, a2, 1	/* increment quotient if dividend >= divisor */
-.Lreturn:
-	leaf_return
-
-.Lle_one:
-	beqz	a3, .Lerror	/* if divisor == 1, return the dividend */
-	leaf_return
-
-.Lspecial:
-	/* return dividend >= divisor */
-	bltu	a6, a3, .Lreturn0
-	movi	a2, 1
-	leaf_return
-
-.Lerror:
-	/* Divide by zero: Use an illegal instruction to force an exception.
-	   The subsequent "DIV0" string can be recognized by the exception
-	   handler to identify the real cause of the exception.  */
-	ill
-	.ascii	"DIV0"
-
-.Lreturn0:
-	movi	a2, 0
-#endif /* XCHAL_HAVE_DIV32 */
-	leaf_return
-	.size	__udivsi3, . - __udivsi3
-
-#endif /* L_udivsi3 */
-
-
-#ifdef L_divsi3
-	.align	4
-	.global	__divsi3
-	.type	__divsi3, @function
-__divsi3:
-	leaf_entry sp, 16
-#if XCHAL_HAVE_DIV32
-	quos	a2, a2, a3
-#else
-	xor	a7, a2, a3	/* sign = dividend ^ divisor */
-	do_abs	a6, a2, a4	/* udividend = abs (dividend) */
-	do_abs	a3, a3, a4	/* udivisor = abs (divisor) */
-	bltui	a3, 2, .Lle_one	/* check if udivisor <= 1 */
-	do_nsau	a5, a6, a2, a8	/* udividend_shift = nsau (udividend) */
-	do_nsau	a4, a3, a2, a8	/* udivisor_shift = nsau (udivisor) */
-	bgeu	a5, a4, .Lspecial
-
-	sub	a4, a4, a5	/* count = udivisor_shift - udividend_shift */
-	ssl	a4
-	sll	a3, a3		/* udivisor <<= count */
-	movi	a2, 0		/* quotient = 0 */
-
-	/* test-subtract-and-shift loop; one quotient bit on each iteration */
-#if XCHAL_HAVE_LOOPS
-	loopnez	a4, .Lloopend
-#endif /* XCHAL_HAVE_LOOPS */
-.Lloop:
-	bltu	a6, a3, .Lzerobit
-	sub	a6, a6, a3
-	addi	a2, a2, 1
-.Lzerobit:
-	slli	a2, a2, 1
-	srli	a3, a3, 1
-#if !XCHAL_HAVE_LOOPS
-	addi	a4, a4, -1
-	bnez	a4, .Lloop
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lloopend:
-
-	bltu	a6, a3, .Lreturn
-	addi	a2, a2, 1	/* increment if udividend >= udivisor */
-.Lreturn:
-	neg	a5, a2
-	movltz	a2, a5, a7	/* return (sign < 0) ? -quotient : quotient */
-	leaf_return
-
-.Lle_one:
-	beqz	a3, .Lerror
-	neg	a2, a6		/* if udivisor == 1, then return... */
-	movgez	a2, a6, a7	/* (sign < 0) ? -udividend : udividend */
-	leaf_return
-
-.Lspecial:
-	bltu	a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
-	movi	a2, 1
-	movi	a4, -1
-	movltz	a2, a4, a7	/* else return (sign < 0) ? -1 : 1 */
-	leaf_return
-
-.Lerror:
-	/* Divide by zero: Use an illegal instruction to force an exception.
-	   The subsequent "DIV0" string can be recognized by the exception
-	   handler to identify the real cause of the exception.  */
-	ill
-	.ascii	"DIV0"
-
-.Lreturn0:
-	movi	a2, 0
-#endif /* XCHAL_HAVE_DIV32 */
-	leaf_return
-	.size	__divsi3, . - __divsi3
-
-#endif /* L_divsi3 */
-
-
-#ifdef L_umodsi3
-	.align	4
-	.global	__umodsi3
-	.type	__umodsi3, @function
-__umodsi3:
-	leaf_entry sp, 16
-#if XCHAL_HAVE_DIV32
-	remu	a2, a2, a3
-#else
-	bltui	a3, 2, .Lle_one	/* check if the divisor is <= 1 */
-
-	do_nsau	a5, a2, a6, a7	/* dividend_shift = nsau (dividend) */
-	do_nsau	a4, a3, a6, a7	/* divisor_shift = nsau (divisor) */
-	bgeu	a5, a4, .Lspecial
-
-	sub	a4, a4, a5	/* count = divisor_shift - dividend_shift */
-	ssl	a4
-	sll	a3, a3		/* divisor <<= count */
-
-	/* test-subtract-and-shift loop */
-#if XCHAL_HAVE_LOOPS
-	loopnez	a4, .Lloopend
-#endif /* XCHAL_HAVE_LOOPS */
-.Lloop:
-	bltu	a2, a3, .Lzerobit
-	sub	a2, a2, a3
-.Lzerobit:
-	srli	a3, a3, 1
-#if !XCHAL_HAVE_LOOPS
-	addi	a4, a4, -1
-	bnez	a4, .Lloop
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lloopend:
-
-.Lspecial:
-	bltu	a2, a3, .Lreturn
-	sub	a2, a2, a3	/* subtract once more if dividend >= divisor */
-.Lreturn:
-	leaf_return
-
-.Lle_one:
-	bnez	a3, .Lreturn0
-
-	/* Divide by zero: Use an illegal instruction to force an exception.
-	   The subsequent "DIV0" string can be recognized by the exception
-	   handler to identify the real cause of the exception.  */
-	ill
-	.ascii	"DIV0"
-
-.Lreturn0:
-	movi	a2, 0
-#endif /* XCHAL_HAVE_DIV32 */
-	leaf_return
-	.size	__umodsi3, . - __umodsi3
-
-#endif /* L_umodsi3 */
-
-
-#ifdef L_modsi3
-	.align	4
-	.global	__modsi3
-	.type	__modsi3, @function
-__modsi3:
-	leaf_entry sp, 16
-#if XCHAL_HAVE_DIV32
-	rems	a2, a2, a3
-#else
-	mov	a7, a2		/* save original (signed) dividend */
-	do_abs	a2, a2, a4	/* udividend = abs (dividend) */
-	do_abs	a3, a3, a4	/* udivisor = abs (divisor) */
-	bltui	a3, 2, .Lle_one	/* check if udivisor <= 1 */
-	do_nsau	a5, a2, a6, a8	/* udividend_shift = nsau (udividend) */
-	do_nsau	a4, a3, a6, a8	/* udivisor_shift = nsau (udivisor) */
-	bgeu	a5, a4, .Lspecial
-
-	sub	a4, a4, a5	/* count = udivisor_shift - udividend_shift */
-	ssl	a4
-	sll	a3, a3		/* udivisor <<= count */
-
-	/* test-subtract-and-shift loop */
-#if XCHAL_HAVE_LOOPS
-	loopnez	a4, .Lloopend
-#endif /* XCHAL_HAVE_LOOPS */
-.Lloop:
-	bltu	a2, a3, .Lzerobit
-	sub	a2, a2, a3
-.Lzerobit:
-	srli	a3, a3, 1
-#if !XCHAL_HAVE_LOOPS
-	addi	a4, a4, -1
-	bnez	a4, .Lloop
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lloopend:
-
-.Lspecial:
-	bltu	a2, a3, .Lreturn
-	sub	a2, a2, a3	/* subtract again if udividend >= udivisor */
-.Lreturn:
-	bgez	a7, .Lpositive
-	neg	a2, a2		/* if (dividend < 0), return -udividend */
-.Lpositive:
-	leaf_return
-
-.Lle_one:
-	bnez	a3, .Lreturn0
-
-	/* Divide by zero: Use an illegal instruction to force an exception.
-	   The subsequent "DIV0" string can be recognized by the exception
-	   handler to identify the real cause of the exception.  */
-	ill
-	.ascii	"DIV0"
-
-.Lreturn0:
-	movi	a2, 0
-#endif /* XCHAL_HAVE_DIV32 */
-	leaf_return
-	.size	__modsi3, . - __modsi3
-
-#endif /* L_modsi3 */
-
-
-#ifdef __XTENSA_EB__
-#define uh a2
-#define ul a3
-#else
-#define uh a3
-#define ul a2
-#endif /* __XTENSA_EB__ */
-
-
-#ifdef L_ashldi3
-	.align	4
-	.global	__ashldi3
-	.type	__ashldi3, @function
-__ashldi3:
-	leaf_entry sp, 16
-	ssl	a4
-	bgei	a4, 32, .Llow_only
-	src	uh, uh, ul
-	sll	ul, ul
-	leaf_return
-
-.Llow_only:
-	sll	uh, ul
-	movi	ul, 0
-	leaf_return
-	.size	__ashldi3, . - __ashldi3
-
-#endif /* L_ashldi3 */
-
-
-#ifdef L_ashrdi3
-	.align	4
-	.global	__ashrdi3
-	.type	__ashrdi3, @function
-__ashrdi3:
-	leaf_entry sp, 16
-	ssr	a4
-	bgei	a4, 32, .Lhigh_only
-	src	ul, uh, ul
-	sra	uh, uh
-	leaf_return
-
-.Lhigh_only:
-	sra	ul, uh
-	srai	uh, uh, 31
-	leaf_return
-	.size	__ashrdi3, . - __ashrdi3
-
-#endif /* L_ashrdi3 */
-
-
-#ifdef L_lshrdi3
-	.align	4
-	.global	__lshrdi3
-	.type	__lshrdi3, @function
-__lshrdi3:
-	leaf_entry sp, 16
-	ssr	a4
-	bgei	a4, 32, .Lhigh_only1
-	src	ul, uh, ul
-	srl	uh, uh
-	leaf_return
-
-.Lhigh_only1:
-	srl	ul, uh
-	movi	uh, 0
-	leaf_return
-	.size	__lshrdi3, . - __lshrdi3
-
-#endif /* L_lshrdi3 */
-
-
-#include "ieee754-df.S"
-#include "ieee754-sf.S"
diff --git a/gcc/config/xtensa/t-xtensa b/gcc/config/xtensa/t-xtensa
index c0a7cb5202f..31ac2ad2452 100644
--- a/gcc/config/xtensa/t-xtensa
+++ b/gcc/config/xtensa/t-xtensa
@@ -17,18 +17,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMSRC = xtensa/lib1funcs.asm
-LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \
-	_umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \
-	_ashldi3 _ashrdi3 _lshrdi3 \
-	_negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \
-	_fixunssfsi _fixunssfdi _floatsisf _floatunsisf \
-	_floatdisf _floatundisf \
-	_negdf2 _addsubdf3 _muldf3 _divdf3 _cmpdf2 _fixdfsi _fixdfdi \
-	_fixunsdfsi _fixunsdfdi _floatsidf _floatunsidf \
-	_floatdidf _floatundidf \
-	_truncdfsf2 _extendsfdf2
-
 LIB2FUNCS_EXTRA = $(srcdir)/config/xtensa/lib2funcs.S
 
 $(out_object_file): gt-xtensa.h
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index b5d9c243a98..6b2514aba9a 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,123 @@
+2011-11-02  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
+
+	* Makefile.in ($(lib1asmfuncs-o), $(lib1asmfuncs-s-o)): Use
+	$(srcdir) to refer to $(LIB1ASMSRC).
+	Use $<.
+	* config/arm/bpabi-v6m.S, config/arm/bpabi.S,
+	config/arm/ieee754-df.S, config/arm/ieee754-sf.S,
+	config/arm/lib1funcs.S: New files.
+	* config/arm/libunwind.S [!__symbian__]: Use lib1funcs.S.
+	* config/arm/t-arm: New file.
+	* config/arm/t-bpabi (LIB1ASMFUNCS): Set.
+	* config/arm/t-elf, config/arm/t-linux, config/arm/t-linux-eabi,
+	config/arm/t-strongarm-elf: New files.
+	* config/arm/t-symbian (LIB1ASMFUNCS): Set.
+	* config/arm/t-vxworks, config/arm/t-wince-pe: New files.
+	* config/avr/lib1funcs.S: New file.
+	* config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+	* config/bfin/lib1funcs.S, config/bfin/t-bfin: New files.
+	* config/c6x/lib1funcs.S: New file.
+	* config/c6x/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+	* config/fr30/lib1funcs.S, config/fr30/t-fr30: New files.
+	* config/frv/lib1funcs.S: New file.
+	* config/frv/t-frv (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+	* config/h8300/lib1funcs.S, config/h8300/t-h8300: New files.
+	* config/i386/cygwin.S, config/i386/t-chkstk: New files.
+	* config/ia64/__divxf3.asm: Rename to ...
+	* config/ia64/__divxf3.S: ... this.
+	Adapt lib1funcs.asm filename.
+	* config/ia64/_fixtfdi.asm: Rename to ...
+	* config/ia64/_fixtfdi.S: ... this.
+	Adapt lib1funcs.asm filename.
+	* config/ia64/_fixunstfdi.asm: Rename to ...
+	* config/ia64/_fixunstfdi.S: ... this.
+	Adapt lib1funcs.asm filename.
+	* config/ia64/_floatditf.asm: Rename to ...
+	* config/ia64/_floatditf.S: ... this.
+	Adapt lib1funcs.asm filename.
+	* config/ia64/lib1funcs.S: New file.
+	* config/ia64/t-hpux (LIB1ASMFUNCS): Set.
+	* config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+	* config/ia64/t-softfp-compat (libgcc1-tf-compats): Adapt suffix.
+	* config/m32c/lib1funcs.S, config/m32c/t-m32c: New files.
+	* config/m68k/lb1sf68.S, config/m68k/t-floatlib: New files.
+	* config/mcore/lib1funcs.S, config/mcore/t-mcore: New files.
+	* config/mep/lib1funcs.S: New file.
+	* config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+	* config/mips/mips16.S: New file.
+	* config/mips/t-mips16 (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+	* config/pa/milli64.S: New file.
+	* config/pa/t-linux, config/pa/t-linux64: New files.
+	* config/picochip/lib1funcs.S: New file.
+	* config/picochip/t-picochip (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+	* config/sh/lib1funcs.S, config/sh/lib1funcs.h: New files.
+	* config/sh/t-linux (LIB1ASMFUNCS_CACHE): Set.
+	* config/sh/t-netbsd: New file.
+	* config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE): Set.
+	Use $(srcdir) to refer to lib1funcs.S, adapt filename.
+	* config/sh/t-sh64: New file.
+	* config/sparc/lb1spc.S: New file.
+	* config/sparc/t-softmul (LIB1ASMSRC): Adapt sparc/lb1spc.asm
+	filename.
+	* config/v850/lib1funcs.S, config/v850/t-v850: New files.
+	* config/vax/lib1funcs.S, config/vax/t-linux: New files.
+	* config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S,
+	config/xtensa/lib1funcs.S: New files.
+	* config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+	* config.host (arm-wrs-vxworks): Add arm/t-arm, arm/t-vxworks to
+	tmake_file.
+	(arm*-*-freebsd*): Add arm/t-arm, arm/t-strongarm-elf to tmake_file.
+	(arm*-*-netbsdelf*): Add arm/t-arm to tmake_file.
+	(arm*-*-linux*): Likewise.
+	Add arm/t-elf, arm/t-bpabi, arm/t-linux-eabi to tmake_file for
+	arm*-*-linux-*eabi, add arm/t-linux otherwise.
+	(arm*-*-uclinux*): Add arm/t-arm, arm/t-elf to tmake_file.
+	(arm*-*-ecos-elf): Likewise.
+	(arm*-*-eabi*, arm*-*-symbianelf*): Likewise.
+	(arm*-*-rtems*): Likewise.
+	(arm*-*-elf): Likewise.
+	(arm*-wince-pe*): Add arm/t-arm, arm/t-wince-pe to tmake_file.
+	(avr-*-rtems*): Add to tmake_file, add avr/t-avr.
+	(bfin*-elf*): Add bfin/t-bfin to tmake_file.
+	(bfin*-uclinux*): Likewise.
+	(bfin*-linux-uclibc*): Likewise.
+	(bfin*-rtems*): Likewise.
+	(bfin*-*): Likewise.
+	(fido-*-elf): Merge into m68k-*-elf*.
+	(fr30-*-elf)): Add fr30/t-fr30 to tmake_file.
+	(frv-*-*linux*): Add frv/t-frv to tmake_file.
+	(h8300-*-rtems*): Add h8300/t-h8300 to tmake_file.
+	(h8300-*-elf*): Likewise.
+	(hppa*64*-*-linux*): Add pa/t-linux, pa/t-linux64 to tmake_file.
+	(hppa*-*-linux*): Add pa/t-linux to tmake_file.
+	(i[34567]86-*-cygwin*): Add i386/t-chkstk to tmake_file.
+	(i[34567]86-*-mingw*): Likewise.
+	(x86_64-*-mingw*): Likewise.
+	(i[34567]86-*-interix3*): Likewise.
+	(ia64*-*-hpux*): Add ia64/t-ia64, ia64/t-hpux to tmake_file.
+	(ia64-hp-*vms*): Add ia64/t-ia64 to tmake_file.
+	(m68k-*-elf*): Also handle fido-*-elf.
+	Add m68k/t-floatlib to tmake_file.
+	(m68k-*-uclinux*): Add m68k/t-floatlib to tmake_file.
+	(m68k-*-linux*): Likewise.
+	(m68k-*-rtems*): Likewise.
+	(mcore-*-elf): Add mcore/t-mcore to tmake_file.
+	(sh-*-elf*, sh[12346l]*-*-elf*): Add sh/t-sh64 to tmake_file for
+	sh64*-*-*.
+	(sh-*-linux*, sh[2346lbe]*-*-linux*): Add sh/t-sh to tmake_file.
+	Add sh/t-sh64 to tmake_file for sh64*-*-linux*.
+	(sh-*-netbsdelf*, shl*-*-netbsdelf*, sh5-*-netbsd*)
+	(sh5l*-*-netbsd*, sh64-*-netbsd*, sh64l*-*-netbsd*): Add sh/t-sh,
+	sh/t-netbsd to tmake_file.
+	Add sh/t-sh64 to tmake_file for sh5*-*-netbsd*, sh64*-netbsd*.
+	(sh-*-rtems*): Add sh/t-sh to tmake_file.
+	(sh-wrs-vxworks): Likewise.
+	(sparc-*-linux*): Add sparc/t-softmul to tmake_file except for
+	*-leon[3-9]*.
+	(v850*-*-*): Add v850/t-v850 to tmake_file.
+	(vax-*-linux*): Add vax/t-linux to tmake_file.
+	(m32c-*-elf*, m32c-*-rtems*): Add m32c/t-m32c to tmake_file.
+
 2011-11-02  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
 
 	* crtstuff.c: New file.
diff --git a/libgcc/Makefile.in b/libgcc/Makefile.in
index 467901b057a..6bbb369f8e8 100644
--- a/libgcc/Makefile.in
+++ b/libgcc/Makefile.in
@@ -394,25 +394,22 @@ LIB2_DIVMOD_FUNCS := $(filter-out $(LIB2FUNCS_EXCLUDE) $(LIB1ASMFUNCS), \
 ifeq ($(enable_shared),yes)
 
 lib1asmfuncs-o = $(patsubst %,%$(objext),$(LIB1ASMFUNCS))
-$(lib1asmfuncs-o): %$(objext): $(gcc_srcdir)/config/$(LIB1ASMSRC) %.vis
-	$(gcc_compile) -DL$* -xassembler-with-cpp \
-	  -c $(gcc_srcdir)/config/$(LIB1ASMSRC) -include $*.vis
+$(lib1asmfuncs-o): %$(objext): $(srcdir)/config/$(LIB1ASMSRC) %.vis
+	$(gcc_compile) -DL$* -xassembler-with-cpp -c $< -include $*.vis
 $(patsubst %,%.vis,$(LIB1ASMFUNCS)): %.vis: %_s$(objext)
 	$(gen-hide-list)
 libgcc-objects += $(lib1asmfuncs-o)
 
 lib1asmfuncs-s-o = $(patsubst %,%_s$(objext),$(LIB1ASMFUNCS))
-$(lib1asmfuncs-s-o): %_s$(objext): $(gcc_srcdir)/config/$(LIB1ASMSRC)
-	$(gcc_s_compile) -DL$* -xassembler-with-cpp \
-	  -c $(gcc_srcdir)/config/$(LIB1ASMSRC)
+$(lib1asmfuncs-s-o): %_s$(objext): $(srcdir)/config/$(LIB1ASMSRC)
+	$(gcc_s_compile) -DL$* -xassembler-with-cpp -c $<
 libgcc-s-objects += $(lib1asmfuncs-s-o)
 
 else
 
 lib1asmfuncs-o = $(patsubst %,%$(objext),$(LIB1ASMFUNCS))
-$(lib1asmfuncs-o): %$(objext): $(gcc_srcdir)/config/$(LIB1ASMSRC)
-	$(gcc_compile) -DL$* -xassembler-with-cpp \
-	  -c $(gcc_srcdir)/config/$(LIB1ASMSRC)
+$(lib1asmfuncs-o): %$(objext): $(srcdir)/config/$(LIB1ASMSRC)
+	$(gcc_compile) -DL$* -xassembler-with-cpp -c $<
 libgcc-objects += $(lib1asmfuncs-o)
 
 endif
diff --git a/libgcc/config.host b/libgcc/config.host
index 01e2f21a797..0a05ea184b0 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -306,22 +306,25 @@ alpha*-dec-*vms*)
 	md_unwind_header=alpha/vms-unwind.h
 	;;
 arm-wrs-vxworks)
-	tmake_file="$tmake_file t-fdpbit"
+	tmake_file="$tmake_file arm/t-arm arm/t-vxworks t-fdpbit"
 	extra_parts="$extra_parts crti.o crtn.o"
 	;;
 arm*-*-freebsd*)
-	tmake_file="$tmake_file t-fdpbit"
+	tmake_file="$tmake_file arm/t-arm arm/t-strongarm-elf t-fdpbit"
 	;;
 arm*-*-netbsdelf*)
-	tmake_file="$tmake_file t-slibgcc-gld-nover"
+	tmake_file="$tmake_file arm/t-arm t-slibgcc-gld-nover"
 	;;
 arm*-*-linux*)			# ARM GNU/Linux with ELF
-	tmake_file="${tmake_file} t-fixedpoint-gnu-prefix"
+	tmake_file="${tmake_file} arm/t-arm t-fixedpoint-gnu-prefix"
 	case ${host} in
 	arm*-*-linux-*eabi)
-	  tmake_file="${tmake_file} arm/t-bpabi t-slibgcc-libgcc"
+	  tmake_file="${tmake_file} arm/t-elf arm/t-bpabi arm/t-linux-eabi t-slibgcc-libgcc"
 	  unwind_header=config/arm/unwind-arm.h
 	  ;;
+	*)
+	  tmake_file="$tmake_file arm/t-linux"
+ 	  ;;
 	esac
 	tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
 	;;
@@ -333,15 +336,15 @@ arm*-*-uclinux*)		# ARM ucLinux
 	  unwind_header=config/arm/unwind-arm.h
 	  ;;
 	esac
-	tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
+	tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
 	extra_parts="$extra_parts crti.o crtn.o"
 	;;
 arm*-*-ecos-elf)
-	tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
+	tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
 	extra_parts="$extra_parts crti.o crtn.o"
 	;;
 arm*-*-eabi* | arm*-*-symbianelf* )
-	tmake_file="${tmake_file} t-fixedpoint-gnu-prefix"
+	tmake_file="${tmake_file} arm/t-arm arm/t-elf t-fixedpoint-gnu-prefix"
 	case ${host} in
 	arm*-*-eabi*)
 	  tmake_file="${tmake_file} arm/t-bpabi"
@@ -356,17 +359,18 @@ arm*-*-eabi* | arm*-*-symbianelf* )
 	unwind_header=config/arm/unwind-arm.h
 	;;
 arm*-*-rtems*)
-	tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
+	tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
 	extra_parts="$extra_parts crti.o crtn.o"
 	;;
 arm*-*-elf)
-	tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
+	tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
 	extra_parts="$extra_parts crti.o crtn.o"
 	;;
 arm*-wince-pe*)
+	tmake_file="$tmake_file arm/t-arm arm/t-wince-pe"
 	;;
 avr-*-rtems*)
-	tmake_file=t-fpbit
+	tmake_file="$tmake_file avr/t-avr t-fpbit"
 	# Don't use default.
 	extra_parts=
 	;;
@@ -375,27 +379,27 @@ avr-*-*)
 	tmake_file="${cpu_type}/t-avr t-fpbit"
 	;;
 bfin*-elf*)
-	tmake_file="bfin/t-crtlibid bfin/t-crtstuff t-fdpbit"
+	tmake_file="bfin/t-bfin bfin/t-crtlibid bfin/t-crtstuff t-fdpbit"
 	extra_parts="$extra_parts crtbeginS.o crtendS.o crti.o crtn.o crtlibid.o"
         ;;
 bfin*-uclinux*)
-	tmake_file="bfin/t-crtlibid bfin/t-crtstuff t-fdpbit"
+	tmake_file="bfin/t-bfin bfin/t-crtlibid bfin/t-crtstuff t-fdpbit"
 	extra_parts="$extra_parts crtbeginS.o crtendS.o crtlibid.o"
 	md_unwind_header=bfin/linux-unwind.h
         ;;
 bfin*-linux-uclibc*)
-	tmake_file="$tmake_file bfin/t-crtstuff t-fdpbit bfin/t-linux"
+	tmake_file="$tmake_file bfin/t-bfin bfin/t-crtstuff t-fdpbit bfin/t-linux"
 	# No need to build crtbeginT.o on uClibc systems.  Should probably
 	# be moved to the OS specific section above.
 	extra_parts="crtbegin.o crtbeginS.o crtend.o crtendS.o"
 	md_unwind_header=bfin/linux-unwind.h
 	;;
 bfin*-rtems*)
-	tmake_file="$tmake_file t-fdpbit"
+	tmake_file="$tmake_file bfin/t-bfin t-fdpbit"
 	extra_parts="$extra_parts crti.o crtn.o"
 	;;
 bfin*-*)
-	tmake_file="$tmake_file t-fdpbit"
+	tmake_file="$tmake_file bfin/t-bfin t-fdpbit"
 	extra_parts="crtbegin.o crtend.o crti.o crtn.o"
         ;;
 crisv32-*-elf)
@@ -415,10 +419,8 @@ cris-*-none)
 cris-*-linux* | crisv32-*-linux*)
 	tmake_file="$tmake_file t-fdpbit cris/t-linux"
 	;;
-fido-*-elf)
-	;;
 fr30-*-elf)
-	tmake_file="$tmake_file t-fdpbit"
+	tmake_file="$tmake_file fr30/t-fr30 t-fdpbit"
 	extra_parts="$extra_parts crti.o crtn.o"
 	;;
 frv-*-elf)
@@ -427,20 +429,21 @@ frv-*-elf)
 	extra_parts="frvbegin.o frvend.o"
 	;;
 frv-*-*linux*)
-	tmake_file="$tmake_file t-fdpbit frv/t-linux"
+	tmake_file="$tmake_file frv/t-frv frv/t-linux t-fdpbit"
 	;;
 h8300-*-rtems*)
-	tmake_file="$tmake_file t-fpbit"
+	tmake_file="$tmake_file h8300/t-h8300 t-fpbit"
 	extra_parts="$extra_parts crti.o crtn.o"
 	;;
 h8300-*-elf*)
-	tmake_file="$tmake_file t-fpbit"
+	tmake_file="$tmake_file h8300/t-h8300 t-fpbit"
 	extra_parts="$extra_parts crti.o crtn.o"
 	;;
 hppa*64*-*-linux*)
+	tmake_file="$tmake_file pa/t-linux pa/t-linux64"
 	;;
 hppa*-*-linux*)
-	tmake_file="$tmake_file t-slibgcc-libgcc"
+	tmake_file="$tmake_file pa/t-linux t-slibgcc-libgcc"
 	# Set the libgcc version number
 	if test x$enable_sjlj_exceptions = xyes; then
 	    tmake_file="$tmake_file pa/t-slibgcc-sjlj-ver"
@@ -565,7 +568,7 @@ i[34567]86-*-cygwin*)
 	else
 		tmake_dlldir_file="i386/t-dlldir-x"
 	fi
-	tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-cygming i386/t-cygwin i386/t-crtfm t-dfprules"
+	tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-cygming i386/t-cygwin i386/t-crtfm i386/t-chkstk t-dfprules"
 	case ${target_thread_file} in
 	  posix)
 	    tmake_file="i386/t-mingw-pthread $tmake_file"
@@ -586,7 +589,7 @@ i[34567]86-*-mingw*)
 	else
 		tmake_dlldir_file="i386/t-dlldir-x"
 	fi
-	tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-cygming i386/t-mingw32 i386/t-crtfm t-dfprules"
+	tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-cygming i386/t-mingw32 i386/t-crtfm i386/t-chkstk t-dfprules"
 	md_unwind_header=i386/w32-unwind.h
 	;;
 x86_64-*-mingw*)
@@ -602,10 +605,11 @@ x86_64-*-mingw*)
 	else
 		tmake_dlldir_file="i386/t-dlldir-x"
 	fi
-	tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-mingw32 t-dfprules i386/t-crtfm"
+	tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-mingw32 t-dfprules i386/t-crtfm i386/t-chkstk"
 	extra_parts="$extra_parts crtfastmath.o"
 	;;
 i[34567]86-*-interix3*)
+	tmake_file="$tmake_file i386/t-chkstk"
 	;;
 ia64*-*-elf*)
 	extra_parts="$extra_parts crtbeginS.o crtendS.o crtfastmath.o"
@@ -625,10 +629,10 @@ ia64*-*-linux*)
 	md_unwind_header=ia64/linux-unwind.h
 	;;
 ia64*-*-hpux*)
-	tmake_file="ia64/t-hpux t-slibgcc ia64/t-slibgcc-hpux t-slibgcc-hpux"
+	tmake_file="ia64/t-ia64 ia64/t-hpux t-slibgcc ia64/t-slibgcc-hpux t-slibgcc-hpux"
 	;;
 ia64-hp-*vms*)
-	tmake_file="$tmake_file ia64/t-eh-ia64 ia64/t-vms t-slibgcc-vms"
+	tmake_file="$tmake_file ia64/t-ia64 ia64/t-eh-ia64 ia64/t-vms t-slibgcc-vms"
 	extra_parts="$extra_parts crtinitS.o"
 	md_unwind_header=ia64/vms-unwind.h
 	;;
@@ -660,18 +664,21 @@ m32r-*-linux*)
 m32rle-*-linux*)
 	tmake_file="$tmake_file m32r/t-linux t-fdpbit"
 	;;
-m68k-*-elf*)
+m68k-*-elf* | fido-*-elf)
+	tmake_file="$tmake_file m68k/t-floatlib"
 	;;
 m68k*-*-netbsdelf*)
 	;;
 m68k*-*-openbsd*)
 	;;
 m68k-*-uclinux*)		# Motorola m68k/ColdFire running uClinux with uClibc
+	tmake_file="$tmake_file m68k/t-floatlib"
 	md_unwind_header=m68k/linux-unwind.h
 	;;
 m68k-*-linux*)			# Motorola m68k's running GNU/Linux
 				# with ELF format using glibc 2
 				# aka the GNU/Linux C library 6.
+	tmake_file="$tmake_file m68k/t-floatlib"
 	# If not configured with --enable-sjlj-exceptions, bump the
 	# libgcc version number.
 	if test x$enable_sjlj_exceptions != xyes; then
@@ -680,10 +687,11 @@ m68k-*-linux*)			# Motorola m68k's running GNU/Linux
 	md_unwind_header=m68k/linux-unwind.h
 	;;
 m68k-*-rtems*)
+	tmake_file="$tmake_file m68k/t-floatlib"
 	extra_parts="$extra_parts crti.o crtn.o"
 	;;
 mcore-*-elf)
-	tmake_file=t-fdpbit
+	tmake_file="mcore/t-mcore t-fdpbit"
 	extra_parts="$extra_parts crti.o crtn.o"
 	;;
 microblaze*-linux*)
@@ -905,6 +913,10 @@ sh-*-elf* | sh[12346l]*-*-elf*)
 		libic_invalidate_array_4-200.a \
 		libic_invalidate_array_4a.a \
 		libgcc-Os-4-200.a libgcc-4-300.a"
+	case ${host} in sh64*-*-*)
+		tmake_file="$tmake_file sh/t-sh64"
+		;;
+	esac
 	case ${host} in
 	sh*-superh-elf)
 		tmake_file="$tmake_file sh/t-superh"
@@ -913,23 +925,33 @@ sh-*-elf* | sh[12346l]*-*-elf*)
  	esac
 	;;
 sh-*-linux* | sh[2346lbe]*-*-linux*)
-	tmake_file="${tmake_file} t-slibgcc-libgcc sh/t-linux t-fdpbit"
+	tmake_file="${tmake_file} sh/t-sh t-slibgcc-libgcc sh/t-linux t-fdpbit"
+	case ${host} in sh64*-*-linux*)
+		tmake_file="$tmake_file sh/t-sh64"
+		;;
+	esac
 	md_unwind_header=sh/linux-unwind.h
 	;;
 sh-*-netbsdelf* | shl*-*-netbsdelf* | sh5-*-netbsd* | sh5l*-*-netbsd* | \
   sh64-*-netbsd* | sh64l*-*-netbsd*)
+	tmake_file="$tmake_file sh/t-sh sh/t-netbsd"
+	case ${host} in
+	sh5*-*-netbsd* | sh64*-netbsd*)
+		tmake_file="$tmake_file sh/t-sh64"
+		;;
+	esac
 	# NetBSD's C library includes a fast software FP library that
 	# has support for setting/setting the rounding mode, exception
 	# mask, etc.  Therefore, we don't want to include software FP
 	# in libgcc.
 	;;
 sh-*-rtems*)
-	tmake_file="$tmake_file t-crtstuff-pic t-fdpbit"
+	tmake_file="$tmake_file sh/t-sh t-crtstuff-pic t-fdpbit"
 	extra_parts="$extra_parts crt1.o crti.o crtn.o crtbeginS.o crtendS.o \
 		$sh_ic_extra_parts $sh_opt_extra_parts"
 	;;
 sh-wrs-vxworks)
-	tmake_file="$tmake_file t-crtstuff-pic t-fdpbit"
+	tmake_file="$tmake_file sh/t-sh t-crtstuff-pic t-fdpbit"
 	;;
 sparc-*-netbsdelf*)
 	;;
@@ -956,6 +978,13 @@ sparc-*-linux*)		# SPARC's running GNU/Linux, libc6
 		tmake_file="${tmake_file} sparc/t-linux"
 		;;
 	esac
+	case ${host} in
+	*-leon[3-9]*)
+		;;
+	*)
+	  	tmake_file="$tmake_file sparc/t-softmul"
+	  	;;
+	esac
 	extra_parts="$extra_parts crtfastmath.o"
 	md_unwind_header=sparc/linux-unwind.h
 	;;
@@ -1007,9 +1036,10 @@ tic6x-*-elf)
 	unwind_header=config/c6x/unwind-c6x.h
 	;;
 v850*-*-*)
-	tmake_file=t-fdpbit
+	tmake_file="v850/t-v850 t-fdpbit"
 	;;
 vax-*-linux*)
+	tmake_file="$tmake_file vax/t-linux"
 	;;
 vax-*-netbsdelf*)
 	;;
@@ -1032,6 +1062,7 @@ am33_2.0-*-linux*)
 	tmake_file="$tmake_file t-fdpbit"
 	;;
 m32c-*-elf*|m32c-*-rtems*)
+	tmake_file="$tmake_file m32c/t-m32c"
  	;;
 mep*-*-*)
 	tmake_file="mep/t-mep t-fdpbit"
diff --git a/libgcc/config/arm/bpabi-v6m.S b/libgcc/config/arm/bpabi-v6m.S
new file mode 100644
index 00000000000..4ecea6da5a6
--- /dev/null
+++ b/libgcc/config/arm/bpabi-v6m.S
@@ -0,0 +1,318 @@
+/* Miscellaneous BPABI functions.  ARMv6M implementation
+
+   Copyright (C) 2006, 2008, 2009, 2010  Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef __ARM_EABI__
+/* Some attributes that are common to all routines in this file.  */
+	/* Tag_ABI_align_needed: This code does not require 8-byte
+	   alignment from the caller.  */
+	/* .eabi_attribute 24, 0  -- default setting.  */
+	/* Tag_ABI_align_preserved: This code preserves 8-byte
+	   alignment in any callee.  */
+	.eabi_attribute 25, 1
+#endif /* __ARM_EABI__ */
+
+#ifdef L_aeabi_lcmp
+
+FUNC_START aeabi_lcmp
+	cmp	xxh, yyh
+	beq	1f
+	bgt	2f
+	mov	r0, #1
+	neg	r0, r0
+	RET
+2:
+	mov	r0, #1
+	RET
+1:
+	sub	r0, xxl, yyl
+	beq	1f
+	bhi	2f
+	mov	r0, #1
+	neg	r0, r0
+	RET
+2:
+	mov	r0, #1
+1:
+	RET
+	FUNC_END aeabi_lcmp
+
+#endif /* L_aeabi_lcmp */
+	
+#ifdef L_aeabi_ulcmp
+
+FUNC_START aeabi_ulcmp
+	cmp	xxh, yyh
+	bne	1f
+	sub	r0, xxl, yyl
+	beq	2f
+1:
+	bcs	1f
+	mov	r0, #1
+	neg	r0, r0
+	RET
+1:
+	mov	r0, #1
+2:
+	RET
+	FUNC_END aeabi_ulcmp
+
+#endif /* L_aeabi_ulcmp */
+
+.macro test_div_by_zero signed
+	cmp	yyh, #0
+	bne	7f
+	cmp	yyl, #0
+	bne	7f
+	cmp	xxh, #0
+	bne	2f
+	cmp	xxl, #0
+2:
+	.ifc	\signed, unsigned
+	beq	3f
+	mov	xxh, #0
+	mvn	xxh, xxh		@ 0xffffffff
+	mov	xxl, xxh
+3:
+	.else
+	beq	5f
+	blt	6f
+	mov	xxl, #0
+	mvn	xxl, xxl		@ 0xffffffff
+	lsr	xxh, xxl, #1		@ 0x7fffffff
+	b	5f
+6:	mov	xxh, #0x80
+	lsl	xxh, xxh, #24		@ 0x80000000
+	mov	xxl, #0
+5:
+	.endif
+	@ tailcalls are tricky on v6-m.
+	push	{r0, r1, r2}
+	ldr	r0, 1f
+	adr	r1, 1f
+	add	r0, r1
+	str	r0, [sp, #8]
+	@ We know we are not on armv4t, so pop pc is safe.
+	pop	{r0, r1, pc}
+	.align	2
+1:
+	.word	__aeabi_ldiv0 - 1b
+7:
+.endm
+
+#ifdef L_aeabi_ldivmod
+
+FUNC_START aeabi_ldivmod
+	test_div_by_zero signed
+
+	push {r0, r1}
+	mov r0, sp
+	push {r0, lr}
+	ldr r0, [sp, #8]
+	bl SYM(__gnu_ldivmod_helper)
+	ldr r3, [sp, #4]
+	mov lr, r3
+	add sp, sp, #8
+	pop {r2, r3}
+	RET
+	FUNC_END aeabi_ldivmod
+
+#endif /* L_aeabi_ldivmod */
+
+#ifdef L_aeabi_uldivmod
+
+FUNC_START aeabi_uldivmod
+	test_div_by_zero unsigned
+
+	push {r0, r1}
+	mov r0, sp
+	push {r0, lr}
+	ldr r0, [sp, #8]
+	bl SYM(__gnu_uldivmod_helper)
+	ldr r3, [sp, #4]
+	mov lr, r3
+	add sp, sp, #8
+	pop {r2, r3}
+	RET
+	FUNC_END aeabi_uldivmod
+	
+#endif /* L_aeabi_uldivmod */
+
+#ifdef L_arm_addsubsf3
+
+FUNC_START aeabi_frsub
+
+      push	{r4, lr}
+      mov	r4, #1
+      lsl	r4, #31
+      eor	r0, r0, r4
+      bl	__aeabi_fadd
+      pop	{r4, pc}
+
+      FUNC_END aeabi_frsub
+
+#endif /* L_arm_addsubsf3 */
+
+#ifdef L_arm_cmpsf2
+
+FUNC_START aeabi_cfrcmple
+
+	mov	ip, r0
+	mov	r0, r1
+	mov	r1, ip
+	b	6f
+
+FUNC_START aeabi_cfcmpeq
+FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
+
+	@ The status-returning routines are required to preserve all
+	@ registers except ip, lr, and cpsr.
+6:	push	{r0, r1, r2, r3, r4, lr}
+	bl	__lesf2
+	@ Set the Z flag correctly, and the C flag unconditionally.
+	cmp	r0, #0
+	@ Clear the C flag if the return value was -1, indicating
+	@ that the first operand was smaller than the second.
+	bmi 1f
+	mov	r1, #0
+	cmn	r0, r1
+1:
+	pop	{r0, r1, r2, r3, r4, pc}
+
+	FUNC_END aeabi_cfcmple
+	FUNC_END aeabi_cfcmpeq
+	FUNC_END aeabi_cfrcmple
+
+FUNC_START	aeabi_fcmpeq
+
+	push	{r4, lr}
+	bl	__eqsf2
+	neg	r0, r0
+	add	r0, r0, #1
+	pop	{r4, pc}
+
+	FUNC_END aeabi_fcmpeq
+
+.macro COMPARISON cond, helper, mode=sf2
+FUNC_START	aeabi_fcmp\cond
+
+	push	{r4, lr}
+	bl	__\helper\mode
+	cmp	r0, #0
+	b\cond	1f
+	mov	r0, #0
+	pop	{r4, pc}
+1:
+	mov	r0, #1
+	pop	{r4, pc}
+
+	FUNC_END aeabi_fcmp\cond
+.endm
+
+COMPARISON lt, le
+COMPARISON le, le
+COMPARISON gt, ge
+COMPARISON ge, ge
+
+#endif /* L_arm_cmpsf2 */
+
+#ifdef L_arm_addsubdf3
+
+FUNC_START aeabi_drsub
+
+      push	{r4, lr}
+      mov	r4, #1
+      lsl	r4, #31
+      eor	xxh, xxh, r4
+      bl	__aeabi_dadd
+      pop	{r4, pc}
+
+      FUNC_END aeabi_drsub
+
+#endif /* L_arm_addsubdf3 */
+
+#ifdef L_arm_cmpdf2
+
+FUNC_START aeabi_cdrcmple
+
+	mov	ip, r0
+	mov	r0, r2
+	mov	r2, ip
+	mov	ip, r1
+	mov	r1, r3
+	mov	r3, ip
+	b	6f
+
+FUNC_START aeabi_cdcmpeq
+FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
+
+	@ The status-returning routines are required to preserve all
+	@ registers except ip, lr, and cpsr.
+6:	push	{r0, r1, r2, r3, r4, lr}
+	bl	__ledf2
+	@ Set the Z flag correctly, and the C flag unconditionally.
+	cmp	r0, #0
+	@ Clear the C flag if the return value was -1, indicating
+	@ that the first operand was smaller than the second.
+	bmi 1f
+	mov	r1, #0
+	cmn	r0, r1
+1:
+	pop	{r0, r1, r2, r3, r4, pc}
+
+	FUNC_END aeabi_cdcmple
+	FUNC_END aeabi_cdcmpeq
+	FUNC_END aeabi_cdrcmple
+
+FUNC_START	aeabi_dcmpeq
+
+	push	{r4, lr}
+	bl	__eqdf2
+	neg	r0, r0
+	add	r0, r0, #1
+	pop	{r4, pc}
+
+	FUNC_END aeabi_dcmpeq
+
+.macro COMPARISON cond, helper, mode=df2
+FUNC_START	aeabi_dcmp\cond
+
+	push	{r4, lr}
+	bl	__\helper\mode
+	cmp	r0, #0
+	b\cond	1f
+	mov	r0, #0
+	pop	{r4, pc}
+1:
+	mov	r0, #1
+	pop	{r4, pc}
+
+	FUNC_END aeabi_dcmp\cond
+.endm
+
+COMPARISON lt, le
+COMPARISON le, le
+COMPARISON gt, ge
+COMPARISON ge, ge
+
+#endif /* L_arm_cmpdf2 */
diff --git a/libgcc/config/arm/bpabi.S b/libgcc/config/arm/bpabi.S
new file mode 100644
index 00000000000..2ff338927fa
--- /dev/null
+++ b/libgcc/config/arm/bpabi.S
@@ -0,0 +1,163 @@
+/* Miscellaneous BPABI functions.
+
+   Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef __ARM_EABI__
+/* Some attributes that are common to all routines in this file.  */
+	/* Tag_ABI_align_needed: This code does not require 8-byte
+	   alignment from the caller.  */
+	/* .eabi_attribute 24, 0  -- default setting.  */
+	/* Tag_ABI_align_preserved: This code preserves 8-byte
+	   alignment in any callee.  */
+	.eabi_attribute 25, 1
+#endif /* __ARM_EABI__ */
+
+#ifdef L_aeabi_lcmp
+
+ARM_FUNC_START aeabi_lcmp
+	cmp	xxh, yyh
+	do_it	lt
+	movlt	r0, #-1
+	do_it	gt
+	movgt	r0, #1
+	do_it	ne
+	RETc(ne)
+	subs	r0, xxl, yyl
+	do_it	lo
+	movlo	r0, #-1
+	do_it	hi
+	movhi	r0, #1
+	RET
+	FUNC_END aeabi_lcmp
+
+#endif /* L_aeabi_lcmp */
+	
+#ifdef L_aeabi_ulcmp
+
+ARM_FUNC_START aeabi_ulcmp
+	cmp	xxh, yyh
+	do_it	lo
+	movlo	r0, #-1
+	do_it	hi
+	movhi	r0, #1
+	do_it	ne
+	RETc(ne)
+	cmp	xxl, yyl
+	do_it	lo
+	movlo	r0, #-1
+	do_it	hi
+	movhi	r0, #1
+	do_it	eq
+	moveq	r0, #0
+	RET
+	FUNC_END aeabi_ulcmp
+
+#endif /* L_aeabi_ulcmp */
+
+.macro test_div_by_zero signed
+/* Tail-call to divide-by-zero handlers which may be overridden by the user,
+   so unwinding works properly.  */
+#if defined(__thumb2__)
+	cbnz	yyh, 1f
+	cbnz	yyl, 1f
+	cmp	xxh, #0
+	do_it	eq
+	cmpeq	xxl, #0
+	.ifc \signed, unsigned
+	beq	2f
+	mov	xxh, #0xffffffff
+	mov	xxl, xxh
+2:
+	.else
+	do_it	lt, t
+	movlt	xxl, #0
+	movlt	xxh, #0x80000000
+	do_it	gt, t
+	movgt	xxh, #0x7fffffff
+	movgt	xxl, #0xffffffff
+	.endif
+	b	SYM (__aeabi_ldiv0) __PLT__
+1:
+#else
+	/* Note: Thumb-1 code calls via an ARM shim on processors which
+	   support ARM mode.  */
+	cmp	yyh, #0
+	cmpeq	yyl, #0
+	bne	2f
+	cmp	xxh, #0
+	cmpeq	xxl, #0
+	.ifc \signed, unsigned
+	movne	xxh, #0xffffffff
+	movne	xxl, #0xffffffff
+	.else
+	movlt	xxh, #0x80000000
+	movlt	xxl, #0
+	movgt	xxh, #0x7fffffff
+	movgt	xxl, #0xffffffff
+	.endif
+	b	SYM (__aeabi_ldiv0) __PLT__
+2:
+#endif
+.endm
+
+#ifdef L_aeabi_ldivmod
+
+ARM_FUNC_START aeabi_ldivmod
+	test_div_by_zero signed
+
+	sub sp, sp, #8
+#if defined(__thumb2__)
+	mov ip, sp
+	push {ip, lr}
+#else
+	do_push {sp, lr}
+#endif
+	bl SYM(__gnu_ldivmod_helper) __PLT__
+	ldr lr, [sp, #4]
+	add sp, sp, #8
+	do_pop {r2, r3}
+	RET
+	
+#endif /* L_aeabi_ldivmod */
+
+#ifdef L_aeabi_uldivmod
+
+ARM_FUNC_START aeabi_uldivmod
+	test_div_by_zero unsigned
+
+	sub sp, sp, #8
+#if defined(__thumb2__)
+	mov ip, sp
+	push {ip, lr}
+#else
+	do_push {sp, lr}
+#endif
+	bl SYM(__gnu_uldivmod_helper) __PLT__
+	ldr lr, [sp, #4]
+	add sp, sp, #8
+	do_pop {r2, r3}
+	RET
+	
+#endif /* L_aeabi_divmod */
+	
diff --git a/libgcc/config/arm/ieee754-df.S b/libgcc/config/arm/ieee754-df.S
new file mode 100644
index 00000000000..eb0c38632d0
--- /dev/null
+++ b/libgcc/config/arm/ieee754-df.S
@@ -0,0 +1,1447 @@
+/* ieee754-df.S double-precision floating point support for ARM
+
+   Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009  Free Software Foundation, Inc.
+   Contributed by Nicolas Pitre (nico@cam.org)
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/*
+ * Notes: 
+ * 
+ * The goal of this code is to be as fast as possible.  This is
+ * not meant to be easy to understand for the casual reader.
+ * For slightly simpler code please see the single precision version
+ * of this file.
+ * 
+ * Only the default rounding mode is intended for best performances.
+ * Exceptions aren't supported yet, but that can be added quite easily
+ * if necessary without impacting performances.
+ */
+
+
+@ For FPA, float words are always big-endian.
+@ For VFP, floats words follow the memory system mode.
+#if defined(__VFP_FP__) && !defined(__ARMEB__)
+#define xl r0
+#define xh r1
+#define yl r2
+#define yh r3
+#else
+#define xh r0
+#define xl r1
+#define yh r2
+#define yl r3
+#endif
+
+
+#ifdef L_arm_negdf2
+
+ARM_FUNC_START negdf2
+ARM_FUNC_ALIAS aeabi_dneg negdf2
+
+	@ flip sign bit
+	eor	xh, xh, #0x80000000
+	RET
+
+	FUNC_END aeabi_dneg
+	FUNC_END negdf2
+
+#endif
+
+#ifdef L_arm_addsubdf3
+
+ARM_FUNC_START aeabi_drsub
+
+	eor	xh, xh, #0x80000000	@ flip sign bit of first arg
+	b	1f	
+
+ARM_FUNC_START subdf3
+ARM_FUNC_ALIAS aeabi_dsub subdf3
+
+	eor	yh, yh, #0x80000000	@ flip sign bit of second arg
+#if defined(__INTERWORKING_STUBS__)
+	b	1f			@ Skip Thumb-code prologue
+#endif
+
+ARM_FUNC_START adddf3
+ARM_FUNC_ALIAS aeabi_dadd adddf3
+
+1:	do_push	{r4, r5, lr}
+
+	@ Look for zeroes, equal values, INF, or NAN.
+	shift1	lsl, r4, xh, #1
+	shift1	lsl, r5, yh, #1
+	teq	r4, r5
+	do_it	eq
+	teqeq	xl, yl
+	do_it	ne, ttt
+	COND(orr,s,ne)	ip, r4, xl
+	COND(orr,s,ne)	ip, r5, yl
+	COND(mvn,s,ne)	ip, r4, asr #21
+	COND(mvn,s,ne)	ip, r5, asr #21
+	beq	LSYM(Lad_s)
+
+	@ Compute exponent difference.  Make largest exponent in r4,
+	@ corresponding arg in xh-xl, and positive exponent difference in r5.
+	shift1	lsr, r4, r4, #21
+	rsbs	r5, r4, r5, lsr #21
+	do_it	lt
+	rsblt	r5, r5, #0
+	ble	1f
+	add	r4, r4, r5
+	eor	yl, xl, yl
+	eor	yh, xh, yh
+	eor	xl, yl, xl
+	eor	xh, yh, xh
+	eor	yl, xl, yl
+	eor	yh, xh, yh
+1:
+	@ If exponent difference is too large, return largest argument
+	@ already in xh-xl.  We need up to 54 bit to handle proper rounding
+	@ of 0x1p54 - 1.1.
+	cmp	r5, #54
+	do_it	hi
+	RETLDM	"r4, r5" hi
+
+	@ Convert mantissa to signed integer.
+	tst	xh, #0x80000000
+	mov	xh, xh, lsl #12
+	mov	ip, #0x00100000
+	orr	xh, ip, xh, lsr #12
+	beq	1f
+#if defined(__thumb2__)
+	negs	xl, xl
+	sbc	xh, xh, xh, lsl #1
+#else
+	rsbs	xl, xl, #0
+	rsc	xh, xh, #0
+#endif
+1:
+	tst	yh, #0x80000000
+	mov	yh, yh, lsl #12
+	orr	yh, ip, yh, lsr #12
+	beq	1f
+#if defined(__thumb2__)
+	negs	yl, yl
+	sbc	yh, yh, yh, lsl #1
+#else
+	rsbs	yl, yl, #0
+	rsc	yh, yh, #0
+#endif
+1:
+	@ If exponent == difference, one or both args were denormalized.
+	@ Since this is not common case, rescale them off line.
+	teq	r4, r5
+	beq	LSYM(Lad_d)
+LSYM(Lad_x):
+
+	@ Compensate for the exponent overlapping the mantissa MSB added later
+	sub	r4, r4, #1
+
+	@ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip.
+	rsbs	lr, r5, #32
+	blt	1f
+	shift1	lsl, ip, yl, lr
+	shiftop adds xl xl yl lsr r5 yl
+	adc	xh, xh, #0
+	shiftop adds xl xl yh lsl lr yl
+	shiftop adcs xh xh yh asr r5 yh
+	b	2f
+1:	sub	r5, r5, #32
+	add	lr, lr, #32
+	cmp	yl, #1
+	shift1	lsl,ip, yh, lr
+	do_it	cs
+	orrcs	ip, ip, #2		@ 2 not 1, to allow lsr #1 later
+	shiftop adds xl xl yh asr r5 yh
+	adcs	xh, xh, yh, asr #31
+2:
+	@ We now have a result in xh-xl-ip.
+	@ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above)
+	and	r5, xh, #0x80000000
+	bpl	LSYM(Lad_p)
+#if defined(__thumb2__)
+	mov	lr, #0
+	negs	ip, ip
+	sbcs	xl, lr, xl
+	sbc	xh, lr, xh
+#else
+	rsbs	ip, ip, #0
+	rscs	xl, xl, #0
+	rsc	xh, xh, #0
+#endif
+
+	@ Determine how to normalize the result.
+LSYM(Lad_p):
+	cmp	xh, #0x00100000
+	bcc	LSYM(Lad_a)
+	cmp	xh, #0x00200000
+	bcc	LSYM(Lad_e)
+
+	@ Result needs to be shifted right.
+	movs	xh, xh, lsr #1
+	movs	xl, xl, rrx
+	mov	ip, ip, rrx
+	add	r4, r4, #1
+
+	@ Make sure we did not bust our exponent.
+	mov	r2, r4, lsl #21
+	cmn	r2, #(2 << 21)
+	bcs	LSYM(Lad_o)
+
+	@ Our result is now properly aligned into xh-xl, remaining bits in ip.
+	@ Round with MSB of ip. If halfway between two numbers, round towards
+	@ LSB of xl = 0.
+	@ Pack final result together.
+LSYM(Lad_e):
+	cmp	ip, #0x80000000
+	do_it	eq
+	COND(mov,s,eq)	ip, xl, lsr #1
+	adcs	xl, xl, #0
+	adc	xh, xh, r4, lsl #20
+	orr	xh, xh, r5
+	RETLDM	"r4, r5"
+
+	@ Result must be shifted left and exponent adjusted.
+LSYM(Lad_a):
+	movs	ip, ip, lsl #1
+	adcs	xl, xl, xl
+	adc	xh, xh, xh
+	tst	xh, #0x00100000
+	sub	r4, r4, #1
+	bne	LSYM(Lad_e)
+
+	@ No rounding necessary since ip will always be 0 at this point.
+LSYM(Lad_l):
+
+#if __ARM_ARCH__ < 5
+
+	teq	xh, #0
+	movne	r3, #20
+	moveq	r3, #52
+	moveq	xh, xl
+	moveq	xl, #0
+	mov	r2, xh
+	cmp	r2, #(1 << 16)
+	movhs	r2, r2, lsr #16
+	subhs	r3, r3, #16
+	cmp	r2, #(1 << 8)
+	movhs	r2, r2, lsr #8
+	subhs	r3, r3, #8
+	cmp	r2, #(1 << 4)
+	movhs	r2, r2, lsr #4
+	subhs	r3, r3, #4
+	cmp	r2, #(1 << 2)
+	subhs	r3, r3, #2
+	sublo	r3, r3, r2, lsr #1
+	sub	r3, r3, r2, lsr #3
+
+#else
+
+	teq	xh, #0
+	do_it	eq, t
+	moveq	xh, xl
+	moveq	xl, #0
+	clz	r3, xh
+	do_it	eq
+	addeq	r3, r3, #32
+	sub	r3, r3, #11
+
+#endif
+
+	@ determine how to shift the value.
+	subs	r2, r3, #32
+	bge	2f
+	adds	r2, r2, #12
+	ble	1f
+
+	@ shift value left 21 to 31 bits, or actually right 11 to 1 bits
+	@ since a register switch happened above.
+	add	ip, r2, #20
+	rsb	r2, r2, #12
+	shift1	lsl, xl, xh, ip
+	shift1	lsr, xh, xh, r2
+	b	3f
+
+	@ actually shift value left 1 to 20 bits, which might also represent
+	@ 32 to 52 bits if counting the register switch that happened earlier.
+1:	add	r2, r2, #20
+2:	do_it	le
+	rsble	ip, r2, #32
+	shift1	lsl, xh, xh, r2
+#if defined(__thumb2__)
+	lsr	ip, xl, ip
+	itt	le
+	orrle	xh, xh, ip
+	lslle	xl, xl, r2
+#else
+	orrle	xh, xh, xl, lsr ip
+	movle	xl, xl, lsl r2
+#endif
+
+	@ adjust exponent accordingly.
+3:	subs	r4, r4, r3
+	do_it	ge, tt
+	addge	xh, xh, r4, lsl #20
+	orrge	xh, xh, r5
+	RETLDM	"r4, r5" ge
+
+	@ Exponent too small, denormalize result.
+	@ Find out proper shift value.
+	mvn	r4, r4
+	subs	r4, r4, #31
+	bge	2f
+	adds	r4, r4, #12
+	bgt	1f
+
+	@ shift result right of 1 to 20 bits, sign is in r5.
+	add	r4, r4, #20
+	rsb	r2, r4, #32
+	shift1	lsr, xl, xl, r4
+	shiftop orr xl xl xh lsl r2 yh
+	shiftop orr xh r5 xh lsr r4 yh
+	RETLDM	"r4, r5"
+
+	@ shift result right of 21 to 31 bits, or left 11 to 1 bits after
+	@ a register switch from xh to xl.
+1:	rsb	r4, r4, #12
+	rsb	r2, r4, #32
+	shift1	lsr, xl, xl, r2
+	shiftop orr xl xl xh lsl r4 yh
+	mov	xh, r5
+	RETLDM	"r4, r5"
+
+	@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
+	@ from xh to xl.
+2:	shift1	lsr, xl, xh, r4
+	mov	xh, r5
+	RETLDM	"r4, r5"
+
+	@ Adjust exponents for denormalized arguments.
+	@ Note that r4 must not remain equal to 0.
+LSYM(Lad_d):
+	teq	r4, #0
+	eor	yh, yh, #0x00100000
+	do_it	eq, te
+	eoreq	xh, xh, #0x00100000
+	addeq	r4, r4, #1
+	subne	r5, r5, #1
+	b	LSYM(Lad_x)
+
+
+LSYM(Lad_s):
+	mvns	ip, r4, asr #21
+	do_it	ne
+	COND(mvn,s,ne)	ip, r5, asr #21
+	beq	LSYM(Lad_i)
+
+	teq	r4, r5
+	do_it	eq
+	teqeq	xl, yl
+	beq	1f
+
+	@ Result is x + 0.0 = x or 0.0 + y = y.
+	orrs	ip, r4, xl
+	do_it	eq, t
+	moveq	xh, yh
+	moveq	xl, yl
+	RETLDM	"r4, r5"
+
+1:	teq	xh, yh
+
+	@ Result is x - x = 0.
+	do_it	ne, tt
+	movne	xh, #0
+	movne	xl, #0
+	RETLDM	"r4, r5" ne
+
+	@ Result is x + x = 2x.
+	movs	ip, r4, lsr #21
+	bne	2f
+	movs	xl, xl, lsl #1
+	adcs	xh, xh, xh
+	do_it	cs
+	orrcs	xh, xh, #0x80000000
+	RETLDM	"r4, r5"
+2:	adds	r4, r4, #(2 << 21)
+	do_it	cc, t
+	addcc	xh, xh, #(1 << 20)
+	RETLDM	"r4, r5" cc
+	and	r5, xh, #0x80000000
+
+	@ Overflow: return INF.
+LSYM(Lad_o):
+	orr	xh, r5, #0x7f000000
+	orr	xh, xh, #0x00f00000
+	mov	xl, #0
+	RETLDM	"r4, r5"
+
+	@ At least one of x or y is INF/NAN.
+	@   if xh-xl != INF/NAN: return yh-yl (which is INF/NAN)
+	@   if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
+	@   if either is NAN: return NAN
+	@   if opposite sign: return NAN
+	@   otherwise return xh-xl (which is INF or -INF)
+LSYM(Lad_i):
+	mvns	ip, r4, asr #21
+	do_it	ne, te
+	movne	xh, yh
+	movne	xl, yl
+	COND(mvn,s,eq)	ip, r5, asr #21
+	do_it	ne, t
+	movne	yh, xh
+	movne	yl, xl
+	orrs	r4, xl, xh, lsl #12
+	do_it	eq, te
+	COND(orr,s,eq)	r5, yl, yh, lsl #12
+	teqeq	xh, yh
+	orrne	xh, xh, #0x00080000	@ quiet NAN
+	RETLDM	"r4, r5"
+
+	FUNC_END aeabi_dsub
+	FUNC_END subdf3
+	FUNC_END aeabi_dadd
+	FUNC_END adddf3
+
+ARM_FUNC_START floatunsidf
+ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
+
+	teq	r0, #0
+	do_it	eq, t
+	moveq	r1, #0
+	RETc(eq)
+	do_push	{r4, r5, lr}
+	mov	r4, #0x400		@ initial exponent
+	add	r4, r4, #(52-1 - 1)
+	mov	r5, #0			@ sign bit is 0
+	.ifnc	xl, r0
+	mov	xl, r0
+	.endif
+	mov	xh, #0
+	b	LSYM(Lad_l)
+
+	FUNC_END aeabi_ui2d
+	FUNC_END floatunsidf
+
+ARM_FUNC_START floatsidf
+ARM_FUNC_ALIAS aeabi_i2d floatsidf
+
+	teq	r0, #0
+	do_it	eq, t
+	moveq	r1, #0
+	RETc(eq)
+	do_push	{r4, r5, lr}
+	mov	r4, #0x400		@ initial exponent
+	add	r4, r4, #(52-1 - 1)
+	ands	r5, r0, #0x80000000	@ sign bit in r5
+	do_it	mi
+	rsbmi	r0, r0, #0		@ absolute value
+	.ifnc	xl, r0
+	mov	xl, r0
+	.endif
+	mov	xh, #0
+	b	LSYM(Lad_l)
+
+	FUNC_END aeabi_i2d
+	FUNC_END floatsidf
+
+ARM_FUNC_START extendsfdf2
+ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
+
+	movs	r2, r0, lsl #1		@ toss sign bit
+	mov	xh, r2, asr #3		@ stretch exponent
+	mov	xh, xh, rrx		@ retrieve sign bit
+	mov	xl, r2, lsl #28		@ retrieve remaining bits
+	do_it	ne, ttt
+	COND(and,s,ne)	r3, r2, #0xff000000	@ isolate exponent
+	teqne	r3, #0xff000000		@ if not 0, check if INF or NAN
+	eorne	xh, xh, #0x38000000	@ fixup exponent otherwise.
+	RETc(ne)			@ and return it.
+
+	teq	r2, #0			@ if actually 0
+	do_it	ne, e
+	teqne	r3, #0xff000000		@ or INF or NAN
+	RETc(eq)			@ we are done already.
+
+	@ value was denormalized.  We can normalize it now.
+	do_push	{r4, r5, lr}
+	mov	r4, #0x380		@ setup corresponding exponent
+	and	r5, xh, #0x80000000	@ move sign bit in r5
+	bic	xh, xh, #0x80000000
+	b	LSYM(Lad_l)
+
+	FUNC_END aeabi_f2d
+	FUNC_END extendsfdf2
+
+ARM_FUNC_START floatundidf
+ARM_FUNC_ALIAS aeabi_ul2d floatundidf
+
+	orrs	r2, r0, r1
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+	do_it	eq, t
+	mvfeqd	f0, #0.0
+#else
+	do_it	eq
+#endif
+	RETc(eq)
+
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+	@ For hard FPA code we want to return via the tail below so that
+	@ we can return the result in f0 as well as in r0/r1 for backwards
+	@ compatibility.
+	adr	ip, LSYM(f0_ret)
+	@ Push pc as well so that RETLDM works correctly.
+	do_push	{r4, r5, ip, lr, pc}
+#else
+	do_push	{r4, r5, lr}
+#endif
+
+	mov	r5, #0
+	b	2f
+
+ARM_FUNC_START floatdidf
+ARM_FUNC_ALIAS aeabi_l2d floatdidf
+
+	orrs	r2, r0, r1
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+	do_it	eq, t
+	mvfeqd	f0, #0.0
+#else
+	do_it	eq
+#endif
+	RETc(eq)
+
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+	@ For hard FPA code we want to return via the tail below so that
+	@ we can return the result in f0 as well as in r0/r1 for backwards
+	@ compatibility.
+	adr	ip, LSYM(f0_ret)
+	@ Push pc as well so that RETLDM works correctly.
+	do_push	{r4, r5, ip, lr, pc}
+#else
+	do_push	{r4, r5, lr}
+#endif
+
+	ands	r5, ah, #0x80000000	@ sign bit in r5
+	bpl	2f
+#if defined(__thumb2__)
+	negs	al, al
+	sbc	ah, ah, ah, lsl #1
+#else
+	rsbs	al, al, #0
+	rsc	ah, ah, #0
+#endif
+2:
+	mov	r4, #0x400		@ initial exponent
+	add	r4, r4, #(52-1 - 1)
+
+	@ FPA little-endian: must swap the word order.
+	.ifnc	xh, ah
+	mov	ip, al
+	mov	xh, ah
+	mov	xl, ip
+	.endif
+
+	movs	ip, xh, lsr #22
+	beq	LSYM(Lad_p)
+
+	@ The value is too big.  Scale it down a bit...
+	mov	r2, #3
+	movs	ip, ip, lsr #3
+	do_it	ne
+	addne	r2, r2, #3
+	movs	ip, ip, lsr #3
+	do_it	ne
+	addne	r2, r2, #3
+	add	r2, r2, ip, lsr #3
+
+	rsb	r3, r2, #32
+	shift1	lsl, ip, xl, r3
+	shift1	lsr, xl, xl, r2
+	shiftop orr xl xl xh lsl r3 lr
+	shift1	lsr, xh, xh, r2
+	add	r4, r4, r2
+	b	LSYM(Lad_p)
+
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+
+	@ Legacy code expects the result to be returned in f0.  Copy it
+	@ there as well.
+LSYM(f0_ret):
+	do_push	{r0, r1}
+	ldfd	f0, [sp], #8
+	RETLDM
+
+#endif
+
+	FUNC_END floatdidf
+	FUNC_END aeabi_l2d
+	FUNC_END floatundidf
+	FUNC_END aeabi_ul2d
+
+#endif /* L_addsubdf3 */
+
+#ifdef L_arm_muldivdf3
+
+ARM_FUNC_START muldf3
+ARM_FUNC_ALIAS aeabi_dmul muldf3
+	do_push	{r4, r5, r6, lr}
+
+	@ Mask out exponents, trap any zero/denormal/INF/NAN.
+	mov	ip, #0xff
+	orr	ip, ip, #0x700
+	ands	r4, ip, xh, lsr #20
+	do_it	ne, tte
+	COND(and,s,ne)	r5, ip, yh, lsr #20
+	teqne	r4, ip
+	teqne	r5, ip
+	bleq	LSYM(Lml_s)
+
+	@ Add exponents together
+	add	r4, r4, r5
+
+	@ Determine final sign.
+	eor	r6, xh, yh
+
+	@ Convert mantissa to unsigned integer.
+	@ If power of two, branch to a separate path.
+	bic	xh, xh, ip, lsl #21
+	bic	yh, yh, ip, lsl #21
+	orrs	r5, xl, xh, lsl #12
+	do_it	ne
+	COND(orr,s,ne)	r5, yl, yh, lsl #12
+	orr	xh, xh, #0x00100000
+	orr	yh, yh, #0x00100000
+	beq	LSYM(Lml_1)
+
+#if __ARM_ARCH__ < 4
+
+	@ Put sign bit in r6, which will be restored in yl later.
+	and   r6, r6, #0x80000000
+
+	@ Well, no way to make it shorter without the umull instruction.
+	stmfd	sp!, {r6, r7, r8, r9, sl, fp}
+	mov	r7, xl, lsr #16
+	mov	r8, yl, lsr #16
+	mov	r9, xh, lsr #16
+	mov	sl, yh, lsr #16
+	bic	xl, xl, r7, lsl #16
+	bic	yl, yl, r8, lsl #16
+	bic	xh, xh, r9, lsl #16
+	bic	yh, yh, sl, lsl #16
+	mul	ip, xl, yl
+	mul	fp, xl, r8
+	mov	lr, #0
+	adds	ip, ip, fp, lsl #16
+	adc	lr, lr, fp, lsr #16
+	mul	fp, r7, yl
+	adds	ip, ip, fp, lsl #16
+	adc	lr, lr, fp, lsr #16
+	mul	fp, xl, sl
+	mov	r5, #0
+	adds	lr, lr, fp, lsl #16
+	adc	r5, r5, fp, lsr #16
+	mul	fp, r7, yh
+	adds	lr, lr, fp, lsl #16
+	adc	r5, r5, fp, lsr #16
+	mul	fp, xh, r8
+	adds	lr, lr, fp, lsl #16
+	adc	r5, r5, fp, lsr #16
+	mul	fp, r9, yl
+	adds	lr, lr, fp, lsl #16
+	adc	r5, r5, fp, lsr #16
+	mul	fp, xh, sl
+	mul	r6, r9, sl
+	adds	r5, r5, fp, lsl #16
+	adc	r6, r6, fp, lsr #16
+	mul	fp, r9, yh
+	adds	r5, r5, fp, lsl #16
+	adc	r6, r6, fp, lsr #16
+	mul	fp, xl, yh
+	adds	lr, lr, fp
+	mul	fp, r7, sl
+	adcs	r5, r5, fp
+	mul	fp, xh, yl
+	adc	r6, r6, #0
+	adds	lr, lr, fp
+	mul	fp, r9, r8
+	adcs	r5, r5, fp
+	mul	fp, r7, r8
+	adc	r6, r6, #0
+	adds	lr, lr, fp
+	mul	fp, xh, yh
+	adcs	r5, r5, fp
+	adc	r6, r6, #0
+	ldmfd	sp!, {yl, r7, r8, r9, sl, fp}
+
+#else
+
+	@ Here is the actual multiplication.
+	umull	ip, lr, xl, yl
+	mov	r5, #0
+	umlal	lr, r5, xh, yl
+	and	yl, r6, #0x80000000
+	umlal	lr, r5, xl, yh
+	mov	r6, #0
+	umlal	r5, r6, xh, yh
+
+#endif
+
+	@ The LSBs in ip are only significant for the final rounding.
+	@ Fold them into lr.
+	teq	ip, #0
+	do_it	ne
+	orrne	lr, lr, #1
+
+	@ Adjust result upon the MSB position.
+	sub	r4, r4, #0xff
+	cmp	r6, #(1 << (20-11))
+	sbc	r4, r4, #0x300
+	bcs	1f
+	movs	lr, lr, lsl #1
+	adcs	r5, r5, r5
+	adc	r6, r6, r6
+1:
+	@ Shift to final position, add sign to result.
+	orr	xh, yl, r6, lsl #11
+	orr	xh, xh, r5, lsr #21
+	mov	xl, r5, lsl #11
+	orr	xl, xl, lr, lsr #21
+	mov	lr, lr, lsl #11
+
+	@ Check exponent range for under/overflow.
+	subs	ip, r4, #(254 - 1)
+	do_it	hi
+	cmphi	ip, #0x700
+	bhi	LSYM(Lml_u)
+
+	@ Round the result, merge final exponent.
+	cmp	lr, #0x80000000
+	do_it	eq
+	COND(mov,s,eq)	lr, xl, lsr #1
+	adcs	xl, xl, #0
+	adc	xh, xh, r4, lsl #20
+	RETLDM	"r4, r5, r6"
+
+	@ Multiplication by 0x1p*: let''s shortcut a lot of code.
+LSYM(Lml_1):
+	and	r6, r6, #0x80000000
+	orr	xh, r6, xh
+	orr	xl, xl, yl
+	eor	xh, xh, yh
+	subs	r4, r4, ip, lsr #1
+	do_it	gt, tt
+	COND(rsb,s,gt)	r5, r4, ip
+	orrgt	xh, xh, r4, lsl #20
+	RETLDM	"r4, r5, r6" gt
+
+	@ Under/overflow: fix things up for the code below.
+	orr	xh, xh, #0x00100000
+	mov	lr, #0
+	subs	r4, r4, #1
+
+LSYM(Lml_u):
+	@ Overflow?
+	bgt	LSYM(Lml_o)
+
+	@ Check if denormalized result is possible, otherwise return signed 0.
+	cmn	r4, #(53 + 1)
+	do_it	le, tt
+	movle	xl, #0
+	bicle	xh, xh, #0x7fffffff
+	RETLDM	"r4, r5, r6" le
+
+	@ Find out proper shift value.
+	rsb	r4, r4, #0
+	subs	r4, r4, #32
+	bge	2f
+	adds	r4, r4, #12
+	bgt	1f
+
+	@ shift result right of 1 to 20 bits, preserve sign bit, round, etc.
+	add	r4, r4, #20
+	rsb	r5, r4, #32
+	shift1	lsl, r3, xl, r5
+	shift1	lsr, xl, xl, r4
+	shiftop orr xl xl xh lsl r5 r2
+	and	r2, xh, #0x80000000
+	bic	xh, xh, #0x80000000
+	adds	xl, xl, r3, lsr #31
+	shiftop adc xh r2 xh lsr r4 r6
+	orrs	lr, lr, r3, lsl #1
+	do_it	eq
+	biceq	xl, xl, r3, lsr #31
+	RETLDM	"r4, r5, r6"
+
+	@ shift result right of 21 to 31 bits, or left 11 to 1 bits after
+	@ a register switch from xh to xl. Then round.
+1:	rsb	r4, r4, #12
+	rsb	r5, r4, #32
+	shift1	lsl, r3, xl, r4
+	shift1	lsr, xl, xl, r5
+	shiftop orr xl xl xh lsl r4 r2
+	bic	xh, xh, #0x7fffffff
+	adds	xl, xl, r3, lsr #31
+	adc	xh, xh, #0
+	orrs	lr, lr, r3, lsl #1
+	do_it	eq
+	biceq	xl, xl, r3, lsr #31
+	RETLDM	"r4, r5, r6"
+
+	@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
+	@ from xh to xl.  Leftover bits are in r3-r6-lr for rounding.
+2:	rsb	r5, r4, #32
+	shiftop orr lr lr xl lsl r5 r2
+	shift1	lsr, r3, xl, r4
+	shiftop orr r3 r3 xh lsl r5 r2
+	shift1	lsr, xl, xh, r4
+	bic	xh, xh, #0x7fffffff
+	shiftop bic xl xl xh lsr r4 r2
+	add	xl, xl, r3, lsr #31
+	orrs	lr, lr, r3, lsl #1
+	do_it	eq
+	biceq	xl, xl, r3, lsr #31
+	RETLDM	"r4, r5, r6"
+
+	@ One or both arguments are denormalized.
+	@ Scale them leftwards and preserve sign bit.
+LSYM(Lml_d):
+	teq	r4, #0
+	bne	2f
+	and	r6, xh, #0x80000000
+1:	movs	xl, xl, lsl #1
+	adc	xh, xh, xh
+	tst	xh, #0x00100000
+	do_it	eq
+	subeq	r4, r4, #1
+	beq	1b
+	orr	xh, xh, r6
+	teq	r5, #0
+	do_it	ne
+	RETc(ne)
+2:	and	r6, yh, #0x80000000
+3:	movs	yl, yl, lsl #1
+	adc	yh, yh, yh
+	tst	yh, #0x00100000
+	do_it	eq
+	subeq	r5, r5, #1
+	beq	3b
+	orr	yh, yh, r6
+	RET
+
+LSYM(Lml_s):
+	@ Isolate the INF and NAN cases away
+	teq	r4, ip
+	and	r5, ip, yh, lsr #20
+	do_it	ne
+	teqne	r5, ip
+	beq	1f
+
+	@ Here, one or more arguments are either denormalized or zero.
+	orrs	r6, xl, xh, lsl #1
+	do_it	ne
+	COND(orr,s,ne)	r6, yl, yh, lsl #1
+	bne	LSYM(Lml_d)
+
+	@ Result is 0, but determine sign anyway.
+LSYM(Lml_z):
+	eor	xh, xh, yh
+	and	xh, xh, #0x80000000
+	mov	xl, #0
+	RETLDM	"r4, r5, r6"
+
+1:	@ One or both args are INF or NAN.
+	orrs	r6, xl, xh, lsl #1
+	do_it	eq, te
+	moveq	xl, yl
+	moveq	xh, yh
+	COND(orr,s,ne)	r6, yl, yh, lsl #1
+	beq	LSYM(Lml_n)		@ 0 * INF or INF * 0 -> NAN
+	teq	r4, ip
+	bne	1f
+	orrs	r6, xl, xh, lsl #12
+	bne	LSYM(Lml_n)		@ NAN * <anything> -> NAN
+1:	teq	r5, ip
+	bne	LSYM(Lml_i)
+	orrs	r6, yl, yh, lsl #12
+	do_it	ne, t
+	movne	xl, yl
+	movne	xh, yh
+	bne	LSYM(Lml_n)		@ <anything> * NAN -> NAN
+
+	@ Result is INF, but we need to determine its sign.
+LSYM(Lml_i):
+	eor	xh, xh, yh
+
+	@ Overflow: return INF (sign already in xh).
+LSYM(Lml_o):
+	and	xh, xh, #0x80000000
+	orr	xh, xh, #0x7f000000
+	orr	xh, xh, #0x00f00000
+	mov	xl, #0
+	RETLDM	"r4, r5, r6"
+
+	@ Return a quiet NAN.
+LSYM(Lml_n):
+	orr	xh, xh, #0x7f000000
+	orr	xh, xh, #0x00f80000
+	RETLDM	"r4, r5, r6"
+
+	FUNC_END aeabi_dmul
+	FUNC_END muldf3
+
+ARM_FUNC_START divdf3
+ARM_FUNC_ALIAS aeabi_ddiv divdf3
+	
+	do_push	{r4, r5, r6, lr}
+
+	@ Mask out exponents, trap any zero/denormal/INF/NAN.
+	mov	ip, #0xff
+	orr	ip, ip, #0x700
+	ands	r4, ip, xh, lsr #20
+	do_it	ne, tte
+	COND(and,s,ne)	r5, ip, yh, lsr #20
+	teqne	r4, ip
+	teqne	r5, ip
+	bleq	LSYM(Ldv_s)
+
+	@ Substract divisor exponent from dividend''s.
+	sub	r4, r4, r5
+
+	@ Preserve final sign into lr.
+	eor	lr, xh, yh
+
+	@ Convert mantissa to unsigned integer.
+	@ Dividend -> r5-r6, divisor -> yh-yl.
+	orrs	r5, yl, yh, lsl #12
+	mov	xh, xh, lsl #12
+	beq	LSYM(Ldv_1)
+	mov	yh, yh, lsl #12
+	mov	r5, #0x10000000
+	orr	yh, r5, yh, lsr #4
+	orr	yh, yh, yl, lsr #24
+	mov	yl, yl, lsl #8
+	orr	r5, r5, xh, lsr #4
+	orr	r5, r5, xl, lsr #24
+	mov	r6, xl, lsl #8
+
+	@ Initialize xh with final sign bit.
+	and	xh, lr, #0x80000000
+
+	@ Ensure result will land to known bit position.
+	@ Apply exponent bias accordingly.
+	cmp	r5, yh
+	do_it	eq
+	cmpeq	r6, yl
+	adc	r4, r4, #(255 - 2)
+	add	r4, r4, #0x300
+	bcs	1f
+	movs	yh, yh, lsr #1
+	mov	yl, yl, rrx
+1:
+	@ Perform first substraction to align result to a nibble.
+	subs	r6, r6, yl
+	sbc	r5, r5, yh
+	movs	yh, yh, lsr #1
+	mov	yl, yl, rrx
+	mov	xl, #0x00100000
+	mov	ip, #0x00080000
+
+	@ The actual division loop.
+1:	subs	lr, r6, yl
+	sbcs	lr, r5, yh
+	do_it	cs, tt
+	subcs	r6, r6, yl
+	movcs	r5, lr
+	orrcs	xl, xl, ip
+	movs	yh, yh, lsr #1
+	mov	yl, yl, rrx
+	subs	lr, r6, yl
+	sbcs	lr, r5, yh
+	do_it	cs, tt
+	subcs	r6, r6, yl
+	movcs	r5, lr
+	orrcs	xl, xl, ip, lsr #1
+	movs	yh, yh, lsr #1
+	mov	yl, yl, rrx
+	subs	lr, r6, yl
+	sbcs	lr, r5, yh
+	do_it	cs, tt
+	subcs	r6, r6, yl
+	movcs	r5, lr
+	orrcs	xl, xl, ip, lsr #2
+	movs	yh, yh, lsr #1
+	mov	yl, yl, rrx
+	subs	lr, r6, yl
+	sbcs	lr, r5, yh
+	do_it	cs, tt
+	subcs	r6, r6, yl
+	movcs	r5, lr
+	orrcs	xl, xl, ip, lsr #3
+
+	orrs	lr, r5, r6
+	beq	2f
+	mov	r5, r5, lsl #4
+	orr	r5, r5, r6, lsr #28
+	mov	r6, r6, lsl #4
+	mov	yh, yh, lsl #3
+	orr	yh, yh, yl, lsr #29
+	mov	yl, yl, lsl #3
+	movs	ip, ip, lsr #4
+	bne	1b
+
+	@ We are done with a word of the result.
+	@ Loop again for the low word if this pass was for the high word.
+	tst	xh, #0x00100000
+	bne	3f
+	orr	xh, xh, xl
+	mov	xl, #0
+	mov	ip, #0x80000000
+	b	1b
+2:
+	@ Be sure result starts in the high word.
+	tst	xh, #0x00100000
+	do_it	eq, t
+	orreq	xh, xh, xl
+	moveq	xl, #0
+3:
+	@ Check exponent range for under/overflow.
+	subs	ip, r4, #(254 - 1)
+	do_it	hi
+	cmphi	ip, #0x700
+	bhi	LSYM(Lml_u)
+
+	@ Round the result, merge final exponent.
+	subs	ip, r5, yh
+	do_it	eq, t
+	COND(sub,s,eq)	ip, r6, yl
+	COND(mov,s,eq)	ip, xl, lsr #1
+	adcs	xl, xl, #0
+	adc	xh, xh, r4, lsl #20
+	RETLDM	"r4, r5, r6"
+
+	@ Division by 0x1p*: shortcut a lot of code.
+LSYM(Ldv_1):
+	and	lr, lr, #0x80000000
+	orr	xh, lr, xh, lsr #12
+	adds	r4, r4, ip, lsr #1
+	do_it	gt, tt
+	COND(rsb,s,gt)	r5, r4, ip
+	orrgt	xh, xh, r4, lsl #20
+	RETLDM	"r4, r5, r6" gt
+
+	orr	xh, xh, #0x00100000
+	mov	lr, #0
+	subs	r4, r4, #1
+	b	LSYM(Lml_u)
+
+	@ Result mightt need to be denormalized: put remainder bits
+	@ in lr for rounding considerations.
+LSYM(Ldv_u):
+	orr	lr, r5, r6
+	b	LSYM(Lml_u)
+
+	@ One or both arguments is either INF, NAN or zero.
+LSYM(Ldv_s):
+	and	r5, ip, yh, lsr #20
+	teq	r4, ip
+	do_it	eq
+	teqeq	r5, ip
+	beq	LSYM(Lml_n)		@ INF/NAN / INF/NAN -> NAN
+	teq	r4, ip
+	bne	1f
+	orrs	r4, xl, xh, lsl #12
+	bne	LSYM(Lml_n)		@ NAN / <anything> -> NAN
+	teq	r5, ip
+	bne	LSYM(Lml_i)		@ INF / <anything> -> INF
+	mov	xl, yl
+	mov	xh, yh
+	b	LSYM(Lml_n)		@ INF / (INF or NAN) -> NAN
+1:	teq	r5, ip
+	bne	2f
+	orrs	r5, yl, yh, lsl #12
+	beq	LSYM(Lml_z)		@ <anything> / INF -> 0
+	mov	xl, yl
+	mov	xh, yh
+	b	LSYM(Lml_n)		@ <anything> / NAN -> NAN
+2:	@ If both are nonzero, we need to normalize and resume above.
+	orrs	r6, xl, xh, lsl #1
+	do_it	ne
+	COND(orr,s,ne)	r6, yl, yh, lsl #1
+	bne	LSYM(Lml_d)
+	@ One or both arguments are 0.
+	orrs	r4, xl, xh, lsl #1
+	bne	LSYM(Lml_i)		@ <non_zero> / 0 -> INF
+	orrs	r5, yl, yh, lsl #1
+	bne	LSYM(Lml_z)		@ 0 / <non_zero> -> 0
+	b	LSYM(Lml_n)		@ 0 / 0 -> NAN
+
+	FUNC_END aeabi_ddiv
+	FUNC_END divdf3
+
+#endif /* L_muldivdf3 */
+
+#ifdef L_arm_cmpdf2
+
+@ Note: only r0 (return value) and ip are clobbered here.
+
+ARM_FUNC_START gtdf2
+ARM_FUNC_ALIAS gedf2 gtdf2
+	mov	ip, #-1
+	b	1f
+
+ARM_FUNC_START ltdf2
+ARM_FUNC_ALIAS ledf2 ltdf2
+	mov	ip, #1
+	b	1f
+
+ARM_FUNC_START cmpdf2
+ARM_FUNC_ALIAS nedf2 cmpdf2
+ARM_FUNC_ALIAS eqdf2 cmpdf2
+	mov	ip, #1			@ how should we specify unordered here?
+
+1:	str	ip, [sp, #-4]!
+
+	@ Trap any INF/NAN first.
+	mov	ip, xh, lsl #1
+	mvns	ip, ip, asr #21
+	mov	ip, yh, lsl #1
+	do_it	ne
+	COND(mvn,s,ne)	ip, ip, asr #21
+	beq	3f
+
+	@ Test for equality.
+	@ Note that 0.0 is equal to -0.0.
+2:	add	sp, sp, #4
+	orrs	ip, xl, xh, lsl #1	@ if x == 0.0 or -0.0
+	do_it	eq, e
+	COND(orr,s,eq)	ip, yl, yh, lsl #1	@ and y == 0.0 or -0.0
+	teqne	xh, yh			@ or xh == yh
+	do_it	eq, tt
+	teqeq	xl, yl			@ and xl == yl
+	moveq	r0, #0			@ then equal.
+	RETc(eq)
+
+	@ Clear C flag
+	cmn	r0, #0
+
+	@ Compare sign, 
+	teq	xh, yh
+
+	@ Compare values if same sign
+	do_it	pl
+	cmppl	xh, yh
+	do_it	eq
+	cmpeq	xl, yl
+
+	@ Result:
+	do_it	cs, e
+	movcs	r0, yh, asr #31
+	mvncc	r0, yh, asr #31
+	orr	r0, r0, #1
+	RET
+
+	@ Look for a NAN.
+3:	mov	ip, xh, lsl #1
+	mvns	ip, ip, asr #21
+	bne	4f
+	orrs	ip, xl, xh, lsl #12
+	bne	5f			@ x is NAN
+4:	mov	ip, yh, lsl #1
+	mvns	ip, ip, asr #21
+	bne	2b
+	orrs	ip, yl, yh, lsl #12
+	beq	2b			@ y is not NAN
+5:	ldr	r0, [sp], #4		@ unordered return code
+	RET
+
+	FUNC_END gedf2
+	FUNC_END gtdf2
+	FUNC_END ledf2
+	FUNC_END ltdf2
+	FUNC_END nedf2
+	FUNC_END eqdf2
+	FUNC_END cmpdf2
+
+ARM_FUNC_START aeabi_cdrcmple
+
+	mov	ip, r0
+	mov	r0, r2
+	mov	r2, ip
+	mov	ip, r1
+	mov	r1, r3
+	mov	r3, ip
+	b	6f
+	
+ARM_FUNC_START aeabi_cdcmpeq
+ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
+
+	@ The status-returning routines are required to preserve all
+	@ registers except ip, lr, and cpsr.
+6:	do_push	{r0, lr}
+	ARM_CALL cmpdf2
+	@ Set the Z flag correctly, and the C flag unconditionally.
+	cmp	r0, #0
+	@ Clear the C flag if the return value was -1, indicating
+	@ that the first operand was smaller than the second.
+	do_it	mi
+	cmnmi	r0, #0
+	RETLDM	"r0"
+
+	FUNC_END aeabi_cdcmple
+	FUNC_END aeabi_cdcmpeq
+	FUNC_END aeabi_cdrcmple
+	
+ARM_FUNC_START	aeabi_dcmpeq
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cdcmple
+	do_it	eq, e
+	moveq	r0, #1	@ Equal to.
+	movne	r0, #0	@ Less than, greater than, or unordered.
+	RETLDM
+
+	FUNC_END aeabi_dcmpeq
+
+ARM_FUNC_START	aeabi_dcmplt
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cdcmple
+	do_it	cc, e
+	movcc	r0, #1	@ Less than.
+	movcs	r0, #0	@ Equal to, greater than, or unordered.
+	RETLDM
+
+	FUNC_END aeabi_dcmplt
+
+ARM_FUNC_START	aeabi_dcmple
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cdcmple
+	do_it	ls, e
+	movls	r0, #1  @ Less than or equal to.
+	movhi	r0, #0	@ Greater than or unordered.
+	RETLDM
+
+	FUNC_END aeabi_dcmple
+
+ARM_FUNC_START	aeabi_dcmpge
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cdrcmple
+	do_it	ls, e
+	movls	r0, #1	@ Operand 2 is less than or equal to operand 1.
+	movhi	r0, #0	@ Operand 2 greater than operand 1, or unordered.
+	RETLDM
+
+	FUNC_END aeabi_dcmpge
+
+ARM_FUNC_START	aeabi_dcmpgt
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cdrcmple
+	do_it	cc, e
+	movcc	r0, #1	@ Operand 2 is less than operand 1.
+	movcs	r0, #0  @ Operand 2 is greater than or equal to operand 1,
+			@ or they are unordered.
+	RETLDM
+
+	FUNC_END aeabi_dcmpgt
+
+#endif /* L_cmpdf2 */
+
+#ifdef L_arm_unorddf2
+
+ARM_FUNC_START unorddf2
+ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
+
+	mov	ip, xh, lsl #1
+	mvns	ip, ip, asr #21
+	bne	1f
+	orrs	ip, xl, xh, lsl #12
+	bne	3f			@ x is NAN
+1:	mov	ip, yh, lsl #1
+	mvns	ip, ip, asr #21
+	bne	2f
+	orrs	ip, yl, yh, lsl #12
+	bne	3f			@ y is NAN
+2:	mov	r0, #0			@ arguments are ordered.
+	RET
+
+3:	mov	r0, #1			@ arguments are unordered.
+	RET
+
+	FUNC_END aeabi_dcmpun
+	FUNC_END unorddf2
+
+#endif /* L_unorddf2 */
+
+#ifdef L_arm_fixdfsi
+
+ARM_FUNC_START fixdfsi
+ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
+
+	@ check exponent range.
+	mov	r2, xh, lsl #1
+	adds	r2, r2, #(1 << 21)
+	bcs	2f			@ value is INF or NAN
+	bpl	1f			@ value is too small
+	mov	r3, #(0xfffffc00 + 31)
+	subs	r2, r3, r2, asr #21
+	bls	3f			@ value is too large
+
+	@ scale value
+	mov	r3, xh, lsl #11
+	orr	r3, r3, #0x80000000
+	orr	r3, r3, xl, lsr #21
+	tst	xh, #0x80000000		@ the sign bit
+	shift1	lsr, r0, r3, r2
+	do_it	ne
+	rsbne	r0, r0, #0
+	RET
+
+1:	mov	r0, #0
+	RET
+
+2:	orrs	xl, xl, xh, lsl #12
+	bne	4f			@ x is NAN.
+3:	ands	r0, xh, #0x80000000	@ the sign bit
+	do_it	eq
+	moveq	r0, #0x7fffffff		@ maximum signed positive si
+	RET
+
+4:	mov	r0, #0			@ How should we convert NAN?
+	RET
+
+	FUNC_END aeabi_d2iz
+	FUNC_END fixdfsi
+
+#endif /* L_fixdfsi */
+
+#ifdef L_arm_fixunsdfsi
+
+ARM_FUNC_START fixunsdfsi
+ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
+
+	@ check exponent range.
+	movs	r2, xh, lsl #1
+	bcs	1f			@ value is negative
+	adds	r2, r2, #(1 << 21)
+	bcs	2f			@ value is INF or NAN
+	bpl	1f			@ value is too small
+	mov	r3, #(0xfffffc00 + 31)
+	subs	r2, r3, r2, asr #21
+	bmi	3f			@ value is too large
+
+	@ scale value
+	mov	r3, xh, lsl #11
+	orr	r3, r3, #0x80000000
+	orr	r3, r3, xl, lsr #21
+	shift1	lsr, r0, r3, r2
+	RET
+
+1:	mov	r0, #0
+	RET
+
+2:	orrs	xl, xl, xh, lsl #12
+	bne	4f			@ value is NAN.
+3:	mov	r0, #0xffffffff		@ maximum unsigned si
+	RET
+
+4:	mov	r0, #0			@ How should we convert NAN?
+	RET
+
+	FUNC_END aeabi_d2uiz
+	FUNC_END fixunsdfsi
+
+#endif /* L_fixunsdfsi */
+
+#ifdef L_arm_truncdfsf2
+
+ARM_FUNC_START truncdfsf2
+ARM_FUNC_ALIAS aeabi_d2f truncdfsf2
+
+	@ check exponent range.
+	mov	r2, xh, lsl #1
+	subs	r3, r2, #((1023 - 127) << 21)
+	do_it	cs, t
+	COND(sub,s,cs)	ip, r3, #(1 << 21)
+	COND(rsb,s,cs)	ip, ip, #(254 << 21)
+	bls	2f			@ value is out of range
+
+1:	@ shift and round mantissa
+	and	ip, xh, #0x80000000
+	mov	r2, xl, lsl #3
+	orr	xl, ip, xl, lsr #29
+	cmp	r2, #0x80000000
+	adc	r0, xl, r3, lsl #2
+	do_it	eq
+	biceq	r0, r0, #1
+	RET
+
+2:	@ either overflow or underflow
+	tst	xh, #0x40000000
+	bne	3f			@ overflow
+
+	@ check if denormalized value is possible
+	adds	r2, r3, #(23 << 21)
+	do_it	lt, t
+	andlt	r0, xh, #0x80000000	@ too small, return signed 0.
+	RETc(lt)
+
+	@ denormalize value so we can resume with the code above afterwards.
+	orr	xh, xh, #0x00100000
+	mov	r2, r2, lsr #21
+	rsb	r2, r2, #24
+	rsb	ip, r2, #32
+#if defined(__thumb2__)
+	lsls	r3, xl, ip
+#else
+	movs	r3, xl, lsl ip
+#endif
+	shift1	lsr, xl, xl, r2
+	do_it	ne
+	orrne	xl, xl, #1		@ fold r3 for rounding considerations. 
+	mov	r3, xh, lsl #11
+	mov	r3, r3, lsr #11
+	shiftop orr xl xl r3 lsl ip ip
+	shift1	lsr, r3, r3, r2
+	mov	r3, r3, lsl #1
+	b	1b
+
+3:	@ chech for NAN
+	mvns	r3, r2, asr #21
+	bne	5f			@ simple overflow
+	orrs	r3, xl, xh, lsl #12
+	do_it	ne, tt
+	movne	r0, #0x7f000000
+	orrne	r0, r0, #0x00c00000
+	RETc(ne)			@ return NAN
+
+5:	@ return INF with sign
+	and	r0, xh, #0x80000000
+	orr	r0, r0, #0x7f000000
+	orr	r0, r0, #0x00800000
+	RET
+
+	FUNC_END aeabi_d2f
+	FUNC_END truncdfsf2
+
+#endif /* L_truncdfsf2 */
diff --git a/libgcc/config/arm/ieee754-sf.S b/libgcc/config/arm/ieee754-sf.S
new file mode 100644
index 00000000000..c93f66d8ff8
--- /dev/null
+++ b/libgcc/config/arm/ieee754-sf.S
@@ -0,0 +1,1060 @@
+/* ieee754-sf.S single-precision floating point support for ARM
+
+   Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009  Free Software Foundation, Inc.
+   Contributed by Nicolas Pitre (nico@cam.org)
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/*
+ * Notes:
+ *
+ * The goal of this code is to be as fast as possible.  This is
+ * not meant to be easy to understand for the casual reader.
+ *
+ * Only the default rounding mode is intended for best performances.
+ * Exceptions aren't supported yet, but that can be added quite easily
+ * if necessary without impacting performances.
+ */
+
+#ifdef L_arm_negsf2
+	
+ARM_FUNC_START negsf2
+ARM_FUNC_ALIAS aeabi_fneg negsf2
+
+	eor	r0, r0, #0x80000000	@ flip sign bit
+	RET
+
+	FUNC_END aeabi_fneg
+	FUNC_END negsf2
+
+#endif
+
+#ifdef L_arm_addsubsf3
+
+ARM_FUNC_START aeabi_frsub
+
+	eor	r0, r0, #0x80000000	@ flip sign bit of first arg
+	b	1f
+
+ARM_FUNC_START subsf3
+ARM_FUNC_ALIAS aeabi_fsub subsf3
+
+	eor	r1, r1, #0x80000000	@ flip sign bit of second arg
+#if defined(__INTERWORKING_STUBS__)
+	b	1f			@ Skip Thumb-code prologue
+#endif
+
+ARM_FUNC_START addsf3
+ARM_FUNC_ALIAS aeabi_fadd addsf3
+
+1:	@ Look for zeroes, equal values, INF, or NAN.
+	movs	r2, r0, lsl #1
+	do_it	ne, ttt
+	COND(mov,s,ne)	r3, r1, lsl #1
+	teqne	r2, r3
+	COND(mvn,s,ne)	ip, r2, asr #24
+	COND(mvn,s,ne)	ip, r3, asr #24
+	beq	LSYM(Lad_s)
+
+	@ Compute exponent difference.  Make largest exponent in r2,
+	@ corresponding arg in r0, and positive exponent difference in r3.
+	mov	r2, r2, lsr #24
+	rsbs	r3, r2, r3, lsr #24
+	do_it	gt, ttt
+	addgt	r2, r2, r3
+	eorgt	r1, r0, r1
+	eorgt	r0, r1, r0
+	eorgt	r1, r0, r1
+	do_it	lt
+	rsblt	r3, r3, #0
+
+	@ If exponent difference is too large, return largest argument
+	@ already in r0.  We need up to 25 bit to handle proper rounding
+	@ of 0x1p25 - 1.1.
+	cmp	r3, #25
+	do_it	hi
+	RETc(hi)
+
+	@ Convert mantissa to signed integer.
+	tst	r0, #0x80000000
+	orr	r0, r0, #0x00800000
+	bic	r0, r0, #0xff000000
+	do_it	ne
+	rsbne	r0, r0, #0
+	tst	r1, #0x80000000
+	orr	r1, r1, #0x00800000
+	bic	r1, r1, #0xff000000
+	do_it	ne
+	rsbne	r1, r1, #0
+
+	@ If exponent == difference, one or both args were denormalized.
+	@ Since this is not common case, rescale them off line.
+	teq	r2, r3
+	beq	LSYM(Lad_d)
+LSYM(Lad_x):
+
+	@ Compensate for the exponent overlapping the mantissa MSB added later
+	sub	r2, r2, #1
+
+	@ Shift and add second arg to first arg in r0.
+	@ Keep leftover bits into r1.
+	shiftop adds r0 r0 r1 asr r3 ip
+	rsb	r3, r3, #32
+	shift1	lsl, r1, r1, r3
+
+	@ Keep absolute value in r0-r1, sign in r3 (the n bit was set above)
+	and	r3, r0, #0x80000000
+	bpl	LSYM(Lad_p)
+#if defined(__thumb2__)
+	negs	r1, r1
+	sbc	r0, r0, r0, lsl #1
+#else
+	rsbs	r1, r1, #0
+	rsc	r0, r0, #0
+#endif
+
+	@ Determine how to normalize the result.
+LSYM(Lad_p):
+	cmp	r0, #0x00800000
+	bcc	LSYM(Lad_a)
+	cmp	r0, #0x01000000
+	bcc	LSYM(Lad_e)
+
+	@ Result needs to be shifted right.
+	movs	r0, r0, lsr #1
+	mov	r1, r1, rrx
+	add	r2, r2, #1
+
+	@ Make sure we did not bust our exponent.
+	cmp	r2, #254
+	bhs	LSYM(Lad_o)
+
+	@ Our result is now properly aligned into r0, remaining bits in r1.
+	@ Pack final result together.
+	@ Round with MSB of r1. If halfway between two numbers, round towards
+	@ LSB of r0 = 0. 
+LSYM(Lad_e):
+	cmp	r1, #0x80000000
+	adc	r0, r0, r2, lsl #23
+	do_it	eq
+	biceq	r0, r0, #1
+	orr	r0, r0, r3
+	RET
+
+	@ Result must be shifted left and exponent adjusted.
+LSYM(Lad_a):
+	movs	r1, r1, lsl #1
+	adc	r0, r0, r0
+	tst	r0, #0x00800000
+	sub	r2, r2, #1
+	bne	LSYM(Lad_e)
+	
+	@ No rounding necessary since r1 will always be 0 at this point.
+LSYM(Lad_l):
+
+#if __ARM_ARCH__ < 5
+
+	movs	ip, r0, lsr #12
+	moveq	r0, r0, lsl #12
+	subeq	r2, r2, #12
+	tst	r0, #0x00ff0000
+	moveq	r0, r0, lsl #8
+	subeq	r2, r2, #8
+	tst	r0, #0x00f00000
+	moveq	r0, r0, lsl #4
+	subeq	r2, r2, #4
+	tst	r0, #0x00c00000
+	moveq	r0, r0, lsl #2
+	subeq	r2, r2, #2
+	cmp	r0, #0x00800000
+	movcc	r0, r0, lsl #1
+	sbcs	r2, r2, #0
+
+#else
+
+	clz	ip, r0
+	sub	ip, ip, #8
+	subs	r2, r2, ip
+	shift1	lsl, r0, r0, ip
+
+#endif
+
+	@ Final result with sign
+	@ If exponent negative, denormalize result.
+	do_it	ge, et
+	addge	r0, r0, r2, lsl #23
+	rsblt	r2, r2, #0
+	orrge	r0, r0, r3
+#if defined(__thumb2__)
+	do_it	lt, t
+	lsrlt	r0, r0, r2
+	orrlt	r0, r3, r0
+#else
+	orrlt	r0, r3, r0, lsr r2
+#endif
+	RET
+
+	@ Fixup and adjust bit position for denormalized arguments.
+	@ Note that r2 must not remain equal to 0.
+LSYM(Lad_d):
+	teq	r2, #0
+	eor	r1, r1, #0x00800000
+	do_it	eq, te
+	eoreq	r0, r0, #0x00800000
+	addeq	r2, r2, #1
+	subne	r3, r3, #1
+	b	LSYM(Lad_x)
+
+LSYM(Lad_s):
+	mov	r3, r1, lsl #1
+
+	mvns	ip, r2, asr #24
+	do_it	ne
+	COND(mvn,s,ne)	ip, r3, asr #24
+	beq	LSYM(Lad_i)
+
+	teq	r2, r3
+	beq	1f
+
+	@ Result is x + 0.0 = x or 0.0 + y = y.
+	teq	r2, #0
+	do_it	eq
+	moveq	r0, r1
+	RET
+
+1:	teq	r0, r1
+
+	@ Result is x - x = 0.
+	do_it	ne, t
+	movne	r0, #0
+	RETc(ne)
+
+	@ Result is x + x = 2x.
+	tst	r2, #0xff000000
+	bne	2f
+	movs	r0, r0, lsl #1
+	do_it	cs
+	orrcs	r0, r0, #0x80000000
+	RET
+2:	adds	r2, r2, #(2 << 24)
+	do_it	cc, t
+	addcc	r0, r0, #(1 << 23)
+	RETc(cc)
+	and	r3, r0, #0x80000000
+
+	@ Overflow: return INF.
+LSYM(Lad_o):
+	orr	r0, r3, #0x7f000000
+	orr	r0, r0, #0x00800000
+	RET
+
+	@ At least one of r0/r1 is INF/NAN.
+	@   if r0 != INF/NAN: return r1 (which is INF/NAN)
+	@   if r1 != INF/NAN: return r0 (which is INF/NAN)
+	@   if r0 or r1 is NAN: return NAN
+	@   if opposite sign: return NAN
+	@   otherwise return r0 (which is INF or -INF)
+LSYM(Lad_i):
+	mvns	r2, r2, asr #24
+	do_it	ne, et
+	movne	r0, r1
+	COND(mvn,s,eq)	r3, r3, asr #24
+	movne	r1, r0
+	movs	r2, r0, lsl #9
+	do_it	eq, te
+	COND(mov,s,eq)	r3, r1, lsl #9
+	teqeq	r0, r1
+	orrne	r0, r0, #0x00400000	@ quiet NAN
+	RET
+
+	FUNC_END aeabi_frsub
+	FUNC_END aeabi_fadd
+	FUNC_END addsf3
+	FUNC_END aeabi_fsub
+	FUNC_END subsf3
+
+ARM_FUNC_START floatunsisf
+ARM_FUNC_ALIAS aeabi_ui2f floatunsisf
+		
+	mov	r3, #0
+	b	1f
+
+ARM_FUNC_START floatsisf
+ARM_FUNC_ALIAS aeabi_i2f floatsisf
+	
+	ands	r3, r0, #0x80000000
+	do_it	mi
+	rsbmi	r0, r0, #0
+
+1:	movs	ip, r0
+	do_it	eq
+	RETc(eq)
+
+	@ Add initial exponent to sign
+	orr	r3, r3, #((127 + 23) << 23)
+
+	.ifnc	ah, r0
+	mov	ah, r0
+	.endif
+	mov	al, #0
+	b	2f
+
+	FUNC_END aeabi_i2f
+	FUNC_END floatsisf
+	FUNC_END aeabi_ui2f
+	FUNC_END floatunsisf
+
+ARM_FUNC_START floatundisf
+ARM_FUNC_ALIAS aeabi_ul2f floatundisf
+
+	orrs	r2, r0, r1
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+	do_it	eq, t
+	mvfeqs	f0, #0.0
+#else
+	do_it	eq
+#endif
+	RETc(eq)
+
+	mov	r3, #0
+	b	1f
+
+ARM_FUNC_START floatdisf
+ARM_FUNC_ALIAS aeabi_l2f floatdisf
+
+	orrs	r2, r0, r1
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+	do_it	eq, t
+	mvfeqs	f0, #0.0
+#else
+	do_it	eq
+#endif
+	RETc(eq)
+
+	ands	r3, ah, #0x80000000	@ sign bit in r3
+	bpl	1f
+#if defined(__thumb2__)
+	negs	al, al
+	sbc	ah, ah, ah, lsl #1
+#else
+	rsbs	al, al, #0
+	rsc	ah, ah, #0
+#endif
+1:
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+	@ For hard FPA code we want to return via the tail below so that
+	@ we can return the result in f0 as well as in r0 for backwards
+	@ compatibility.
+	str	lr, [sp, #-8]!
+	adr	lr, LSYM(f0_ret)
+#endif
+
+	movs	ip, ah
+	do_it	eq, tt
+	moveq	ip, al
+	moveq	ah, al
+	moveq	al, #0
+
+	@ Add initial exponent to sign
+	orr	r3, r3, #((127 + 23 + 32) << 23)
+	do_it	eq
+	subeq	r3, r3, #(32 << 23)
+2:	sub	r3, r3, #(1 << 23)
+
+#if __ARM_ARCH__ < 5
+
+	mov	r2, #23
+	cmp	ip, #(1 << 16)
+	do_it	hs, t
+	movhs	ip, ip, lsr #16
+	subhs	r2, r2, #16
+	cmp	ip, #(1 << 8)
+	do_it	hs, t
+	movhs	ip, ip, lsr #8
+	subhs	r2, r2, #8
+	cmp	ip, #(1 << 4)
+	do_it	hs, t
+	movhs	ip, ip, lsr #4
+	subhs	r2, r2, #4
+	cmp	ip, #(1 << 2)
+	do_it	hs, e
+	subhs	r2, r2, #2
+	sublo	r2, r2, ip, lsr #1
+	subs	r2, r2, ip, lsr #3
+
+#else
+
+	clz	r2, ip
+	subs	r2, r2, #8
+
+#endif
+
+	sub	r3, r3, r2, lsl #23
+	blt	3f
+
+	shiftop add r3 r3 ah lsl r2 ip
+	shift1	lsl, ip, al, r2
+	rsb	r2, r2, #32
+	cmp	ip, #0x80000000
+	shiftop adc r0 r3 al lsr r2 r2
+	do_it	eq
+	biceq	r0, r0, #1
+	RET
+
+3:	add	r2, r2, #32
+	shift1	lsl, ip, ah, r2
+	rsb	r2, r2, #32
+	orrs	al, al, ip, lsl #1
+	shiftop adc r0 r3 ah lsr r2 r2
+	do_it	eq
+	biceq	r0, r0, ip, lsr #31
+	RET
+
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+
+LSYM(f0_ret):
+	str	r0, [sp, #-4]!
+	ldfs	f0, [sp], #4
+	RETLDM
+
+#endif
+
+	FUNC_END floatdisf
+	FUNC_END aeabi_l2f
+	FUNC_END floatundisf
+	FUNC_END aeabi_ul2f
+
+#endif /* L_addsubsf3 */
+
+#ifdef L_arm_muldivsf3
+
+ARM_FUNC_START mulsf3
+ARM_FUNC_ALIAS aeabi_fmul mulsf3
+
+	@ Mask out exponents, trap any zero/denormal/INF/NAN.
+	mov	ip, #0xff
+	ands	r2, ip, r0, lsr #23
+	do_it	ne, tt
+	COND(and,s,ne)	r3, ip, r1, lsr #23
+	teqne	r2, ip
+	teqne	r3, ip
+	beq	LSYM(Lml_s)
+LSYM(Lml_x):
+
+	@ Add exponents together
+	add	r2, r2, r3
+
+	@ Determine final sign.
+	eor	ip, r0, r1
+
+	@ Convert mantissa to unsigned integer.
+	@ If power of two, branch to a separate path.
+	@ Make up for final alignment.
+	movs	r0, r0, lsl #9
+	do_it	ne
+	COND(mov,s,ne)	r1, r1, lsl #9
+	beq	LSYM(Lml_1)
+	mov	r3, #0x08000000
+	orr	r0, r3, r0, lsr #5
+	orr	r1, r3, r1, lsr #5
+
+#if __ARM_ARCH__ < 4
+
+	@ Put sign bit in r3, which will be restored into r0 later.
+	and	r3, ip, #0x80000000
+
+	@ Well, no way to make it shorter without the umull instruction.
+	do_push	{r3, r4, r5}
+	mov	r4, r0, lsr #16
+	mov	r5, r1, lsr #16
+	bic	r0, r0, r4, lsl #16
+	bic	r1, r1, r5, lsl #16
+	mul	ip, r4, r5
+	mul	r3, r0, r1
+	mul	r0, r5, r0
+	mla	r0, r4, r1, r0
+	adds	r3, r3, r0, lsl #16
+	adc	r1, ip, r0, lsr #16
+	do_pop	{r0, r4, r5}
+
+#else
+
+	@ The actual multiplication.
+	umull	r3, r1, r0, r1
+
+	@ Put final sign in r0.
+	and	r0, ip, #0x80000000
+
+#endif
+
+	@ Adjust result upon the MSB position.
+	cmp	r1, #(1 << 23)
+	do_it	cc, tt
+	movcc	r1, r1, lsl #1
+	orrcc	r1, r1, r3, lsr #31
+	movcc	r3, r3, lsl #1
+
+	@ Add sign to result.
+	orr	r0, r0, r1
+
+	@ Apply exponent bias, check for under/overflow.
+	sbc	r2, r2, #127
+	cmp	r2, #(254 - 1)
+	bhi	LSYM(Lml_u)
+
+	@ Round the result, merge final exponent.
+	cmp	r3, #0x80000000
+	adc	r0, r0, r2, lsl #23
+	do_it	eq
+	biceq	r0, r0, #1
+	RET
+
+	@ Multiplication by 0x1p*: let''s shortcut a lot of code.
+LSYM(Lml_1):
+	teq	r0, #0
+	and	ip, ip, #0x80000000
+	do_it	eq
+	moveq	r1, r1, lsl #9
+	orr	r0, ip, r0, lsr #9
+	orr	r0, r0, r1, lsr #9
+	subs	r2, r2, #127
+	do_it	gt, tt
+	COND(rsb,s,gt)	r3, r2, #255
+	orrgt	r0, r0, r2, lsl #23
+	RETc(gt)
+
+	@ Under/overflow: fix things up for the code below.
+	orr	r0, r0, #0x00800000
+	mov	r3, #0
+	subs	r2, r2, #1
+
+LSYM(Lml_u):
+	@ Overflow?
+	bgt	LSYM(Lml_o)
+
+	@ Check if denormalized result is possible, otherwise return signed 0.
+	cmn	r2, #(24 + 1)
+	do_it	le, t
+	bicle	r0, r0, #0x7fffffff
+	RETc(le)
+
+	@ Shift value right, round, etc.
+	rsb	r2, r2, #0
+	movs	r1, r0, lsl #1
+	shift1	lsr, r1, r1, r2
+	rsb	r2, r2, #32
+	shift1	lsl, ip, r0, r2
+	movs	r0, r1, rrx
+	adc	r0, r0, #0
+	orrs	r3, r3, ip, lsl #1
+	do_it	eq
+	biceq	r0, r0, ip, lsr #31
+	RET
+
+	@ One or both arguments are denormalized.
+	@ Scale them leftwards and preserve sign bit.
+LSYM(Lml_d):
+	teq	r2, #0
+	and	ip, r0, #0x80000000
+1:	do_it	eq, tt
+	moveq	r0, r0, lsl #1
+	tsteq	r0, #0x00800000
+	subeq	r2, r2, #1
+	beq	1b
+	orr	r0, r0, ip
+	teq	r3, #0
+	and	ip, r1, #0x80000000
+2:	do_it	eq, tt
+	moveq	r1, r1, lsl #1
+	tsteq	r1, #0x00800000
+	subeq	r3, r3, #1
+	beq	2b
+	orr	r1, r1, ip
+	b	LSYM(Lml_x)
+
+LSYM(Lml_s):
+	@ Isolate the INF and NAN cases away
+	and	r3, ip, r1, lsr #23
+	teq	r2, ip
+	do_it	ne
+	teqne	r3, ip
+	beq	1f
+
+	@ Here, one or more arguments are either denormalized or zero.
+	bics	ip, r0, #0x80000000
+	do_it	ne
+	COND(bic,s,ne)	ip, r1, #0x80000000
+	bne	LSYM(Lml_d)
+
+	@ Result is 0, but determine sign anyway.
+LSYM(Lml_z):
+	eor	r0, r0, r1
+	bic	r0, r0, #0x7fffffff
+	RET
+
+1:	@ One or both args are INF or NAN.
+	teq	r0, #0x0
+	do_it	ne, ett
+	teqne	r0, #0x80000000
+	moveq	r0, r1
+	teqne	r1, #0x0
+	teqne	r1, #0x80000000
+	beq	LSYM(Lml_n)		@ 0 * INF or INF * 0 -> NAN
+	teq	r2, ip
+	bne	1f
+	movs	r2, r0, lsl #9
+	bne	LSYM(Lml_n)		@ NAN * <anything> -> NAN
+1:	teq	r3, ip
+	bne	LSYM(Lml_i)
+	movs	r3, r1, lsl #9
+	do_it	ne
+	movne	r0, r1
+	bne	LSYM(Lml_n)		@ <anything> * NAN -> NAN
+
+	@ Result is INF, but we need to determine its sign.
+LSYM(Lml_i):
+	eor	r0, r0, r1
+
+	@ Overflow: return INF (sign already in r0).
+LSYM(Lml_o):
+	and	r0, r0, #0x80000000
+	orr	r0, r0, #0x7f000000
+	orr	r0, r0, #0x00800000
+	RET
+
+	@ Return a quiet NAN.
+LSYM(Lml_n):
+	orr	r0, r0, #0x7f000000
+	orr	r0, r0, #0x00c00000
+	RET
+
+	FUNC_END aeabi_fmul
+	FUNC_END mulsf3
+
+ARM_FUNC_START divsf3
+ARM_FUNC_ALIAS aeabi_fdiv divsf3
+
+	@ Mask out exponents, trap any zero/denormal/INF/NAN.
+	mov	ip, #0xff
+	ands	r2, ip, r0, lsr #23
+	do_it	ne, tt
+	COND(and,s,ne)	r3, ip, r1, lsr #23
+	teqne	r2, ip
+	teqne	r3, ip
+	beq	LSYM(Ldv_s)
+LSYM(Ldv_x):
+
+	@ Substract divisor exponent from dividend''s
+	sub	r2, r2, r3
+
+	@ Preserve final sign into ip.
+	eor	ip, r0, r1
+
+	@ Convert mantissa to unsigned integer.
+	@ Dividend -> r3, divisor -> r1.
+	movs	r1, r1, lsl #9
+	mov	r0, r0, lsl #9
+	beq	LSYM(Ldv_1)
+	mov	r3, #0x10000000
+	orr	r1, r3, r1, lsr #4
+	orr	r3, r3, r0, lsr #4
+
+	@ Initialize r0 (result) with final sign bit.
+	and	r0, ip, #0x80000000
+
+	@ Ensure result will land to known bit position.
+	@ Apply exponent bias accordingly.
+	cmp	r3, r1
+	do_it	cc
+	movcc	r3, r3, lsl #1
+	adc	r2, r2, #(127 - 2)
+
+	@ The actual division loop.
+	mov	ip, #0x00800000
+1:	cmp	r3, r1
+	do_it	cs, t
+	subcs	r3, r3, r1
+	orrcs	r0, r0, ip
+	cmp	r3, r1, lsr #1
+	do_it	cs, t
+	subcs	r3, r3, r1, lsr #1
+	orrcs	r0, r0, ip, lsr #1
+	cmp	r3, r1, lsr #2
+	do_it	cs, t
+	subcs	r3, r3, r1, lsr #2
+	orrcs	r0, r0, ip, lsr #2
+	cmp	r3, r1, lsr #3
+	do_it	cs, t
+	subcs	r3, r3, r1, lsr #3
+	orrcs	r0, r0, ip, lsr #3
+	movs	r3, r3, lsl #4
+	do_it	ne
+	COND(mov,s,ne)	ip, ip, lsr #4
+	bne	1b
+
+	@ Check exponent for under/overflow.
+	cmp	r2, #(254 - 1)
+	bhi	LSYM(Lml_u)
+
+	@ Round the result, merge final exponent.
+	cmp	r3, r1
+	adc	r0, r0, r2, lsl #23
+	do_it	eq
+	biceq	r0, r0, #1
+	RET
+
+	@ Division by 0x1p*: let''s shortcut a lot of code.
+LSYM(Ldv_1):
+	and	ip, ip, #0x80000000
+	orr	r0, ip, r0, lsr #9
+	adds	r2, r2, #127
+	do_it	gt, tt
+	COND(rsb,s,gt)	r3, r2, #255
+	orrgt	r0, r0, r2, lsl #23
+	RETc(gt)
+
+	orr	r0, r0, #0x00800000
+	mov	r3, #0
+	subs	r2, r2, #1
+	b	LSYM(Lml_u)
+
+	@ One or both arguments are denormalized.
+	@ Scale them leftwards and preserve sign bit.
+LSYM(Ldv_d):
+	teq	r2, #0
+	and	ip, r0, #0x80000000
+1:	do_it	eq, tt
+	moveq	r0, r0, lsl #1
+	tsteq	r0, #0x00800000
+	subeq	r2, r2, #1
+	beq	1b
+	orr	r0, r0, ip
+	teq	r3, #0
+	and	ip, r1, #0x80000000
+2:	do_it	eq, tt
+	moveq	r1, r1, lsl #1
+	tsteq	r1, #0x00800000
+	subeq	r3, r3, #1
+	beq	2b
+	orr	r1, r1, ip
+	b	LSYM(Ldv_x)
+
+	@ One or both arguments are either INF, NAN, zero or denormalized.
+LSYM(Ldv_s):
+	and	r3, ip, r1, lsr #23
+	teq	r2, ip
+	bne	1f
+	movs	r2, r0, lsl #9
+	bne	LSYM(Lml_n)		@ NAN / <anything> -> NAN
+	teq	r3, ip
+	bne	LSYM(Lml_i)		@ INF / <anything> -> INF
+	mov	r0, r1
+	b	LSYM(Lml_n)		@ INF / (INF or NAN) -> NAN
+1:	teq	r3, ip
+	bne	2f
+	movs	r3, r1, lsl #9
+	beq	LSYM(Lml_z)		@ <anything> / INF -> 0
+	mov	r0, r1
+	b	LSYM(Lml_n)		@ <anything> / NAN -> NAN
+2:	@ If both are nonzero, we need to normalize and resume above.
+	bics	ip, r0, #0x80000000
+	do_it	ne
+	COND(bic,s,ne)	ip, r1, #0x80000000
+	bne	LSYM(Ldv_d)
+	@ One or both arguments are zero.
+	bics	r2, r0, #0x80000000
+	bne	LSYM(Lml_i)		@ <non_zero> / 0 -> INF
+	bics	r3, r1, #0x80000000
+	bne	LSYM(Lml_z)		@ 0 / <non_zero> -> 0
+	b	LSYM(Lml_n)		@ 0 / 0 -> NAN
+
+	FUNC_END aeabi_fdiv
+	FUNC_END divsf3
+
+#endif /* L_muldivsf3 */
+
+#ifdef L_arm_cmpsf2
+
+	@ The return value in r0 is
+	@
+	@   0  if the operands are equal
+	@   1  if the first operand is greater than the second, or
+	@      the operands are unordered and the operation is
+	@      CMP, LT, LE, NE, or EQ.
+	@   -1 if the first operand is less than the second, or
+	@      the operands are unordered and the operation is GT
+	@      or GE.
+	@
+	@ The Z flag will be set iff the operands are equal.
+	@
+	@ The following registers are clobbered by this function:
+	@   ip, r0, r1, r2, r3
+
+ARM_FUNC_START gtsf2
+ARM_FUNC_ALIAS gesf2 gtsf2
+	mov	ip, #-1
+	b	1f
+
+ARM_FUNC_START ltsf2
+ARM_FUNC_ALIAS lesf2 ltsf2
+	mov	ip, #1
+	b	1f
+
+ARM_FUNC_START cmpsf2
+ARM_FUNC_ALIAS nesf2 cmpsf2
+ARM_FUNC_ALIAS eqsf2 cmpsf2
+	mov	ip, #1			@ how should we specify unordered here?
+
+1:	str	ip, [sp, #-4]!
+
+	@ Trap any INF/NAN first.
+	mov	r2, r0, lsl #1
+	mov	r3, r1, lsl #1
+	mvns	ip, r2, asr #24
+	do_it	ne
+	COND(mvn,s,ne)	ip, r3, asr #24
+	beq	3f
+
+	@ Compare values.
+	@ Note that 0.0 is equal to -0.0.
+2:	add	sp, sp, #4
+	orrs	ip, r2, r3, lsr #1	@ test if both are 0, clear C flag
+	do_it	ne
+	teqne	r0, r1			@ if not 0 compare sign
+	do_it	pl
+	COND(sub,s,pl)	r0, r2, r3		@ if same sign compare values, set r0
+
+	@ Result:
+	do_it	hi
+	movhi	r0, r1, asr #31
+	do_it	lo
+	mvnlo	r0, r1, asr #31
+	do_it	ne
+	orrne	r0, r0, #1
+	RET
+
+	@ Look for a NAN. 
+3:	mvns	ip, r2, asr #24
+	bne	4f
+	movs	ip, r0, lsl #9
+	bne	5f			@ r0 is NAN
+4:	mvns	ip, r3, asr #24
+	bne	2b
+	movs	ip, r1, lsl #9
+	beq	2b			@ r1 is not NAN
+5:	ldr	r0, [sp], #4		@ return unordered code.
+	RET
+
+	FUNC_END gesf2
+	FUNC_END gtsf2
+	FUNC_END lesf2
+	FUNC_END ltsf2
+	FUNC_END nesf2
+	FUNC_END eqsf2
+	FUNC_END cmpsf2
+
+ARM_FUNC_START aeabi_cfrcmple
+
+	mov	ip, r0
+	mov	r0, r1
+	mov	r1, ip
+	b	6f
+
+ARM_FUNC_START aeabi_cfcmpeq
+ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
+
+	@ The status-returning routines are required to preserve all
+	@ registers except ip, lr, and cpsr.
+6:	do_push	{r0, r1, r2, r3, lr}
+	ARM_CALL cmpsf2
+	@ Set the Z flag correctly, and the C flag unconditionally.
+	cmp	r0, #0
+	@ Clear the C flag if the return value was -1, indicating
+	@ that the first operand was smaller than the second.
+	do_it	mi
+	cmnmi	r0, #0
+	RETLDM	"r0, r1, r2, r3"
+
+	FUNC_END aeabi_cfcmple
+	FUNC_END aeabi_cfcmpeq
+	FUNC_END aeabi_cfrcmple
+
+ARM_FUNC_START	aeabi_fcmpeq
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cfcmple
+	do_it	eq, e
+	moveq	r0, #1	@ Equal to.
+	movne	r0, #0	@ Less than, greater than, or unordered.
+	RETLDM
+
+	FUNC_END aeabi_fcmpeq
+
+ARM_FUNC_START	aeabi_fcmplt
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cfcmple
+	do_it	cc, e
+	movcc	r0, #1	@ Less than.
+	movcs	r0, #0	@ Equal to, greater than, or unordered.
+	RETLDM
+
+	FUNC_END aeabi_fcmplt
+
+ARM_FUNC_START	aeabi_fcmple
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cfcmple
+	do_it	ls, e
+	movls	r0, #1  @ Less than or equal to.
+	movhi	r0, #0	@ Greater than or unordered.
+	RETLDM
+
+	FUNC_END aeabi_fcmple
+
+ARM_FUNC_START	aeabi_fcmpge
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cfrcmple
+	do_it	ls, e
+	movls	r0, #1	@ Operand 2 is less than or equal to operand 1.
+	movhi	r0, #0	@ Operand 2 greater than operand 1, or unordered.
+	RETLDM
+
+	FUNC_END aeabi_fcmpge
+
+ARM_FUNC_START	aeabi_fcmpgt
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cfrcmple
+	do_it	cc, e
+	movcc	r0, #1	@ Operand 2 is less than operand 1.
+	movcs	r0, #0  @ Operand 2 is greater than or equal to operand 1,
+			@ or they are unordered.
+	RETLDM
+
+	FUNC_END aeabi_fcmpgt
+
+#endif /* L_cmpsf2 */
+
+#ifdef L_arm_unordsf2
+
+ARM_FUNC_START unordsf2
+ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
+
+	mov	r2, r0, lsl #1
+	mov	r3, r1, lsl #1
+	mvns	ip, r2, asr #24
+	bne	1f
+	movs	ip, r0, lsl #9
+	bne	3f			@ r0 is NAN
+1:	mvns	ip, r3, asr #24
+	bne	2f
+	movs	ip, r1, lsl #9
+	bne	3f			@ r1 is NAN
+2:	mov	r0, #0			@ arguments are ordered.
+	RET
+3:	mov	r0, #1			@ arguments are unordered.
+	RET
+
+	FUNC_END aeabi_fcmpun
+	FUNC_END unordsf2
+
+#endif /* L_unordsf2 */
+
+#ifdef L_arm_fixsfsi
+
+ARM_FUNC_START fixsfsi
+ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
+
+	@ check exponent range.
+	mov	r2, r0, lsl #1
+	cmp	r2, #(127 << 24)
+	bcc	1f			@ value is too small
+	mov	r3, #(127 + 31)
+	subs	r2, r3, r2, lsr #24
+	bls	2f			@ value is too large
+
+	@ scale value
+	mov	r3, r0, lsl #8
+	orr	r3, r3, #0x80000000
+	tst	r0, #0x80000000		@ the sign bit
+	shift1	lsr, r0, r3, r2
+	do_it	ne
+	rsbne	r0, r0, #0
+	RET
+
+1:	mov	r0, #0
+	RET
+
+2:	cmp	r2, #(127 + 31 - 0xff)
+	bne	3f
+	movs	r2, r0, lsl #9
+	bne	4f			@ r0 is NAN.
+3:	ands	r0, r0, #0x80000000	@ the sign bit
+	do_it	eq
+	moveq	r0, #0x7fffffff		@ the maximum signed positive si
+	RET
+
+4:	mov	r0, #0			@ What should we convert NAN to?
+	RET
+
+	FUNC_END aeabi_f2iz
+	FUNC_END fixsfsi
+
+#endif /* L_fixsfsi */
+
+#ifdef L_arm_fixunssfsi
+
+ARM_FUNC_START fixunssfsi
+ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
+
+	@ check exponent range.
+	movs	r2, r0, lsl #1
+	bcs	1f			@ value is negative
+	cmp	r2, #(127 << 24)
+	bcc	1f			@ value is too small
+	mov	r3, #(127 + 31)
+	subs	r2, r3, r2, lsr #24
+	bmi	2f			@ value is too large
+
+	@ scale the value
+	mov	r3, r0, lsl #8
+	orr	r3, r3, #0x80000000
+	shift1	lsr, r0, r3, r2
+	RET
+
+1:	mov	r0, #0
+	RET
+
+2:	cmp	r2, #(127 + 31 - 0xff)
+	bne	3f
+	movs	r2, r0, lsl #9
+	bne	4f			@ r0 is NAN.
+3:	mov	r0, #0xffffffff		@ maximum unsigned si
+	RET
+
+4:	mov	r0, #0			@ What should we convert NAN to?
+	RET
+
+	FUNC_END aeabi_f2uiz
+	FUNC_END fixunssfsi
+
+#endif /* L_fixunssfsi */
diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
new file mode 100644
index 00000000000..2e76c01df4b
--- /dev/null
+++ b/libgcc/config/arm/lib1funcs.S
@@ -0,0 +1,1829 @@
+@ libgcc routines for ARM cpu.
+@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
+
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007, 2008,
+   2009, 2010 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* An executable stack is *not* required for these functions.  */
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+.previous
+#endif  /* __ELF__ and __linux__ */
+
+#ifdef __ARM_EABI__
+/* Some attributes that are common to all routines in this file.  */
+	/* Tag_ABI_align_needed: This code does not require 8-byte
+	   alignment from the caller.  */
+	/* .eabi_attribute 24, 0  -- default setting.  */
+	/* Tag_ABI_align_preserved: This code preserves 8-byte
+	   alignment in any callee.  */
+	.eabi_attribute 25, 1
+#endif /* __ARM_EABI__ */
+/* ------------------------------------------------------------------------ */
+
+/* We need to know what prefix to add to function names.  */
+
+#ifndef __USER_LABEL_PREFIX__
+#error  __USER_LABEL_PREFIX__ not defined
+#endif
+
+/* ANSI concatenation macros.  */
+
+#define CONCAT1(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+/* Use the right prefix for global labels.  */
+
+#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
+
+#ifdef __ELF__
+#ifdef __thumb__
+#define __PLT__  /* Not supported in Thumb assembler (for now).  */
+#elif defined __vxworks && !defined __PIC__
+#define __PLT__ /* Not supported by the kernel loader.  */
+#else
+#define __PLT__ (PLT)
+#endif
+#define TYPE(x) .type SYM(x),function
+#define SIZE(x) .size SYM(x), . - SYM(x)
+#define LSYM(x) .x
+#else
+#define __PLT__
+#define TYPE(x)
+#define SIZE(x)
+#define LSYM(x) x
+#endif
+
+/* Function end macros.  Variants for interworking.  */
+
+#if defined(__ARM_ARCH_2__)
+# define __ARM_ARCH__ 2
+#endif
+
+#if defined(__ARM_ARCH_3__)
+# define __ARM_ARCH__ 3
+#endif
+
+#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
+	|| defined(__ARM_ARCH_4T__)
+/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
+   long multiply instructions.  That includes v3M.  */
+# define __ARM_ARCH__ 4
+#endif
+	
+#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
+	|| defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
+	|| defined(__ARM_ARCH_5TEJ__)
+# define __ARM_ARCH__ 5
+#endif
+
+#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+	|| defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
+	|| defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
+	|| defined(__ARM_ARCH_6M__)
+# define __ARM_ARCH__ 6
+#endif
+
+#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+	|| defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+	|| defined(__ARM_ARCH_7EM__)
+# define __ARM_ARCH__ 7
+#endif
+
+#ifndef __ARM_ARCH__
+#error Unable to determine architecture.
+#endif
+
+/* There are times when we might prefer Thumb1 code even if ARM code is
+   permitted, for example, the code might be smaller, or there might be
+   interworking problems with switching to ARM state if interworking is
+   disabled.  */
+#if (defined(__thumb__)			\
+     && !defined(__thumb2__)		\
+     && (!defined(__THUMB_INTERWORK__)	\
+	 || defined (__OPTIMIZE_SIZE__)	\
+	 || defined(__ARM_ARCH_6M__)))
+# define __prefer_thumb__
+#endif
+
+/* How to return from a function call depends on the architecture variant.  */
+
+#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
+
+# define RET		bx	lr
+# define RETc(x)	bx##x	lr
+
+/* Special precautions for interworking on armv4t.  */
+# if (__ARM_ARCH__ == 4)
+
+/* Always use bx, not ldr pc.  */
+#  if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
+#    define __INTERWORKING__
+#   endif /* __THUMB__ || __THUMB_INTERWORK__ */
+
+/* Include thumb stub before arm mode code.  */
+#  if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
+#   define __INTERWORKING_STUBS__
+#  endif /* __thumb__ && !__THUMB_INTERWORK__ */
+
+#endif /* __ARM_ARCH == 4 */
+
+#else
+
+# define RET		mov	pc, lr
+# define RETc(x)	mov##x	pc, lr
+
+#endif
+
+.macro	cfi_pop		advance, reg, cfa_offset
+#ifdef __ELF__
+	.pushsection	.debug_frame
+	.byte	0x4		/* DW_CFA_advance_loc4 */
+	.4byte	\advance
+	.byte	(0xc0 | \reg)	/* DW_CFA_restore */
+	.byte	0xe		/* DW_CFA_def_cfa_offset */
+	.uleb128 \cfa_offset
+	.popsection
+#endif
+.endm
+.macro	cfi_push	advance, reg, offset, cfa_offset
+#ifdef __ELF__
+	.pushsection	.debug_frame
+	.byte	0x4		/* DW_CFA_advance_loc4 */
+	.4byte	\advance
+	.byte	(0x80 | \reg)	/* DW_CFA_offset */
+	.uleb128 (\offset / -4)
+	.byte	0xe		/* DW_CFA_def_cfa_offset */
+	.uleb128 \cfa_offset
+	.popsection
+#endif
+.endm
+.macro cfi_start	start_label, end_label
+#ifdef __ELF__
+	.pushsection	.debug_frame
+LSYM(Lstart_frame):
+	.4byte	LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
+LSYM(Lstart_cie):
+        .4byte	0xffffffff	@ CIE Identifier Tag
+        .byte	0x1	@ CIE Version
+        .ascii	"\0"	@ CIE Augmentation
+        .uleb128 0x1	@ CIE Code Alignment Factor
+        .sleb128 -4	@ CIE Data Alignment Factor
+        .byte	0xe	@ CIE RA Column
+        .byte	0xc	@ DW_CFA_def_cfa
+        .uleb128 0xd
+        .uleb128 0x0
+
+	.align 2
+LSYM(Lend_cie):
+	.4byte	LSYM(Lend_fde)-LSYM(Lstart_fde)	@ FDE Length
+LSYM(Lstart_fde):
+	.4byte	LSYM(Lstart_frame)	@ FDE CIE offset
+	.4byte	\start_label	@ FDE initial location
+	.4byte	\end_label-\start_label	@ FDE address range
+	.popsection
+#endif
+.endm
+.macro cfi_end	end_label
+#ifdef __ELF__
+	.pushsection	.debug_frame
+	.align	2
+LSYM(Lend_fde):
+	.popsection
+\end_label:
+#endif
+.endm
+
+/* Don't pass dirn, it's there just to get token pasting right.  */
+
+.macro	RETLDM	regs=, cond=, unwind=, dirn=ia
+#if defined (__INTERWORKING__)
+	.ifc "\regs",""
+	ldr\cond	lr, [sp], #8
+	.else
+# if defined(__thumb2__)
+	pop\cond	{\regs, lr}
+# else
+	ldm\cond\dirn	sp!, {\regs, lr}
+# endif
+	.endif
+	.ifnc "\unwind", ""
+	/* Mark LR as restored.  */
+97:	cfi_pop 97b - \unwind, 0xe, 0x0
+	.endif
+	bx\cond	lr
+#else
+	/* Caller is responsible for providing IT instruction.  */
+	.ifc "\regs",""
+	ldr\cond	pc, [sp], #8
+	.else
+# if defined(__thumb2__)
+	pop\cond	{\regs, pc}
+# else
+	ldm\cond\dirn	sp!, {\regs, pc}
+# endif
+	.endif
+#endif
+.endm
+
+/* The Unified assembly syntax allows the same code to be assembled for both
+   ARM and Thumb-2.  However this is only supported by recent gas, so define
+   a set of macros to allow ARM code on older assemblers.  */
+#if defined(__thumb2__)
+.macro do_it cond, suffix=""
+	it\suffix	\cond
+.endm
+.macro shift1 op, arg0, arg1, arg2
+	\op	\arg0, \arg1, \arg2
+.endm
+#define do_push	push
+#define do_pop	pop
+#define COND(op1, op2, cond) op1 ## op2 ## cond
+/* Perform an arithmetic operation with a variable shift operand.  This
+   requires two instructions and a scratch register on Thumb-2.  */
+.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
+	\shiftop \tmp, \src2, \shiftreg
+	\name \dest, \src1, \tmp
+.endm
+#else
+.macro do_it cond, suffix=""
+.endm
+.macro shift1 op, arg0, arg1, arg2
+	mov	\arg0, \arg1, \op \arg2
+.endm
+#define do_push	stmfd sp!,
+#define do_pop	ldmfd sp!,
+#define COND(op1, op2, cond) op1 ## cond ## op2
+.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
+	\name \dest, \src1, \src2, \shiftop \shiftreg
+.endm
+#endif
+
+#ifdef __ARM_EABI__
+.macro ARM_LDIV0 name signed
+	cmp	r0, #0
+	.ifc	\signed, unsigned
+	movne	r0, #0xffffffff
+	.else
+	movgt	r0, #0x7fffffff
+	movlt	r0, #0x80000000
+	.endif
+	b	SYM (__aeabi_idiv0) __PLT__
+.endm
+#else
+.macro ARM_LDIV0 name signed
+	str	lr, [sp, #-8]!
+98:	cfi_push 98b - __\name, 0xe, -0x8, 0x8
+	bl	SYM (__div0) __PLT__
+	mov	r0, #0			@ About as wrong as it could be.
+	RETLDM	unwind=98b
+.endm
+#endif
+
+
+#ifdef __ARM_EABI__
+.macro THUMB_LDIV0 name signed
+#if defined(__ARM_ARCH_6M__)
+	.ifc \signed, unsigned
+	cmp	r0, #0
+	beq	1f
+	mov	r0, #0
+	mvn	r0, r0		@ 0xffffffff
+1:
+	.else
+	cmp	r0, #0
+	beq	2f
+	blt	3f
+	mov	r0, #0
+	mvn	r0, r0
+	lsr	r0, r0, #1	@ 0x7fffffff
+	b	2f
+3:	mov	r0, #0x80
+	lsl	r0, r0, #24	@ 0x80000000
+2:
+	.endif
+	push	{r0, r1, r2}
+	ldr	r0, 4f
+	adr	r1, 4f
+	add	r0, r1
+	str	r0, [sp, #8]
+	@ We know we are not on armv4t, so pop pc is safe.
+	pop	{r0, r1, pc}
+	.align	2
+4:
+	.word	__aeabi_idiv0 - 4b
+#elif defined(__thumb2__)
+	.syntax unified
+	.ifc \signed, unsigned
+	cbz	r0, 1f
+	mov	r0, #0xffffffff
+1:
+	.else
+	cmp	r0, #0
+	do_it	gt
+	movgt	r0, #0x7fffffff
+	do_it	lt
+	movlt	r0, #0x80000000
+	.endif
+	b.w	SYM(__aeabi_idiv0) __PLT__
+#else
+	.align	2
+	bx	pc
+	nop
+	.arm
+	cmp	r0, #0
+	.ifc	\signed, unsigned
+	movne	r0, #0xffffffff
+	.else
+	movgt	r0, #0x7fffffff
+	movlt	r0, #0x80000000
+	.endif
+	b	SYM(__aeabi_idiv0) __PLT__
+	.thumb
+#endif
+.endm
+#else
+.macro THUMB_LDIV0 name signed
+	push	{ r1, lr }
+98:	cfi_push 98b - __\name, 0xe, -0x4, 0x8
+	bl	SYM (__div0)
+	mov	r0, #0			@ About as wrong as it could be.
+#if defined (__INTERWORKING__)
+	pop	{ r1, r2 }
+	bx	r2
+#else
+	pop	{ r1, pc }
+#endif
+.endm
+#endif
+
+.macro FUNC_END name
+	SIZE (__\name)
+.endm
+
+.macro DIV_FUNC_END name signed
+	cfi_start	__\name, LSYM(Lend_div0)
+LSYM(Ldiv0):
+#ifdef __thumb__
+	THUMB_LDIV0 \name \signed
+#else
+	ARM_LDIV0 \name \signed
+#endif
+	cfi_end	LSYM(Lend_div0)
+	FUNC_END \name
+.endm
+
+.macro THUMB_FUNC_START name
+	.globl	SYM (\name)
+	TYPE	(\name)
+	.thumb_func
+SYM (\name):
+.endm
+
+/* Function start macros.  Variants for ARM and Thumb.  */
+
+#ifdef __thumb__
+#define THUMB_FUNC .thumb_func
+#define THUMB_CODE .force_thumb
+# if defined(__thumb2__)
+#define THUMB_SYNTAX .syntax divided
+# else
+#define THUMB_SYNTAX
+# endif
+#else
+#define THUMB_FUNC
+#define THUMB_CODE
+#define THUMB_SYNTAX
+#endif
+
+.macro FUNC_START name
+	.text
+	.globl SYM (__\name)
+	TYPE (__\name)
+	.align 0
+	THUMB_CODE
+	THUMB_FUNC
+	THUMB_SYNTAX
+SYM (__\name):
+.endm
+
+/* Special function that will always be coded in ARM assembly, even if
+   in Thumb-only compilation.  */
+
+#if defined(__thumb2__)
+
+/* For Thumb-2 we build everything in thumb mode.  */
+.macro ARM_FUNC_START name
+       FUNC_START \name
+       .syntax unified
+.endm
+#define EQUIV .thumb_set
+.macro  ARM_CALL name
+	bl	__\name
+.endm
+
+#elif defined(__INTERWORKING_STUBS__)
+
+.macro	ARM_FUNC_START name
+	FUNC_START \name
+	bx	pc
+	nop
+	.arm
+/* A hook to tell gdb that we've switched to ARM mode.  Also used to call
+   directly from other local arm routines.  */
+_L__\name:		
+.endm
+#define EQUIV .thumb_set
+/* Branch directly to a function declared with ARM_FUNC_START.
+   Must be called in arm mode.  */
+.macro  ARM_CALL name
+	bl	_L__\name
+.endm
+
+#else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
+
+#ifdef __ARM_ARCH_6M__
+#define EQUIV .thumb_set
+#else
+.macro	ARM_FUNC_START name
+	.text
+	.globl SYM (__\name)
+	TYPE (__\name)
+	.align 0
+	.arm
+SYM (__\name):
+.endm
+#define EQUIV .set
+.macro  ARM_CALL name
+	bl	__\name
+.endm
+#endif
+
+#endif
+
+.macro	FUNC_ALIAS new old
+	.globl	SYM (__\new)
+#if defined (__thumb__)
+	.thumb_set	SYM (__\new), SYM (__\old)
+#else
+	.set	SYM (__\new), SYM (__\old)
+#endif
+.endm
+
+#ifndef __ARM_ARCH_6M__
+.macro	ARM_FUNC_ALIAS new old
+	.globl	SYM (__\new)
+	EQUIV	SYM (__\new), SYM (__\old)
+#if defined(__INTERWORKING_STUBS__)
+	.set	SYM (_L__\new), SYM (_L__\old)
+#endif
+.endm
+#endif
+
+#ifdef __ARMEB__
+#define xxh r0
+#define xxl r1
+#define yyh r2
+#define yyl r3
+#else
+#define xxh r1
+#define xxl r0
+#define yyh r3
+#define yyl r2
+#endif	
+
+#ifdef __ARM_EABI__
+.macro	WEAK name
+	.weak SYM (__\name)
+.endm
+#endif
+
+#ifdef __thumb__
+/* Register aliases.  */
+
+work		.req	r4	@ XXXX is this safe ?
+dividend	.req	r0
+divisor		.req	r1
+overdone	.req	r2
+result		.req	r2
+curbit		.req	r3
+#endif
+#if 0
+ip		.req	r12
+sp		.req	r13
+lr		.req	r14
+pc		.req	r15
+#endif
+
+/* ------------------------------------------------------------------------ */
+/*		Bodies of the division and modulo routines.		    */
+/* ------------------------------------------------------------------------ */	
+.macro ARM_DIV_BODY dividend, divisor, result, curbit
+
+#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
+
+#if defined (__thumb2__)
+	clz	\curbit, \dividend
+	clz	\result, \divisor
+	sub	\curbit, \result, \curbit
+	rsb	\curbit, \curbit, #31
+	adr	\result, 1f
+	add	\curbit, \result, \curbit, lsl #4
+	mov	\result, #0
+	mov	pc, \curbit
+.p2align 3
+1:
+	.set	shift, 32
+	.rept	32
+	.set	shift, shift - 1
+	cmp.w	\dividend, \divisor, lsl #shift
+	nop.n
+	adc.w	\result, \result, \result
+	it	cs
+	subcs.w	\dividend, \dividend, \divisor, lsl #shift
+	.endr
+#else
+	clz	\curbit, \dividend
+	clz	\result, \divisor
+	sub	\curbit, \result, \curbit
+	rsbs	\curbit, \curbit, #31
+	addne	\curbit, \curbit, \curbit, lsl #1
+	mov	\result, #0
+	addne	pc, pc, \curbit, lsl #2
+	nop
+	.set	shift, 32
+	.rept	32
+	.set	shift, shift - 1
+	cmp	\dividend, \divisor, lsl #shift
+	adc	\result, \result, \result
+	subcs	\dividend, \dividend, \divisor, lsl #shift
+	.endr
+#endif
+
+#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
+#if __ARM_ARCH__ >= 5
+
+	clz	\curbit, \divisor
+	clz	\result, \dividend
+	sub	\result, \curbit, \result
+	mov	\curbit, #1
+	mov	\divisor, \divisor, lsl \result
+	mov	\curbit, \curbit, lsl \result
+	mov	\result, #0
+	
+#else /* __ARM_ARCH__ < 5 */
+
+	@ Initially shift the divisor left 3 bits if possible,
+	@ set curbit accordingly.  This allows for curbit to be located
+	@ at the left end of each 4-bit nibbles in the division loop
+	@ to save one loop in most cases.
+	tst	\divisor, #0xe0000000
+	moveq	\divisor, \divisor, lsl #3
+	moveq	\curbit, #8
+	movne	\curbit, #1
+
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+1:	cmp	\divisor, #0x10000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #4
+	movlo	\curbit, \curbit, lsl #4
+	blo	1b
+
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+1:	cmp	\divisor, #0x80000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #1
+	movlo	\curbit, \curbit, lsl #1
+	blo	1b
+
+	mov	\result, #0
+
+#endif /* __ARM_ARCH__ < 5 */
+
+	@ Division loop
+1:	cmp	\dividend, \divisor
+	do_it	hs, t
+	subhs	\dividend, \dividend, \divisor
+	orrhs	\result,   \result,   \curbit
+	cmp	\dividend, \divisor,  lsr #1
+	do_it	hs, t
+	subhs	\dividend, \dividend, \divisor, lsr #1
+	orrhs	\result,   \result,   \curbit,  lsr #1
+	cmp	\dividend, \divisor,  lsr #2
+	do_it	hs, t
+	subhs	\dividend, \dividend, \divisor, lsr #2
+	orrhs	\result,   \result,   \curbit,  lsr #2
+	cmp	\dividend, \divisor,  lsr #3
+	do_it	hs, t
+	subhs	\dividend, \dividend, \divisor, lsr #3
+	orrhs	\result,   \result,   \curbit,  lsr #3
+	cmp	\dividend, #0			@ Early termination?
+	do_it	ne, t
+	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
+	movne	\divisor,  \divisor, lsr #4
+	bne	1b
+
+#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
+
+.endm
+/* ------------------------------------------------------------------------ */	
+.macro ARM_DIV2_ORDER divisor, order
+
+#if __ARM_ARCH__ >= 5
+
+	clz	\order, \divisor
+	rsb	\order, \order, #31
+
+#else
+
+	cmp	\divisor, #(1 << 16)
+	movhs	\divisor, \divisor, lsr #16
+	movhs	\order, #16
+	movlo	\order, #0
+
+	cmp	\divisor, #(1 << 8)
+	movhs	\divisor, \divisor, lsr #8
+	addhs	\order, \order, #8
+
+	cmp	\divisor, #(1 << 4)
+	movhs	\divisor, \divisor, lsr #4
+	addhs	\order, \order, #4
+
+	cmp	\divisor, #(1 << 2)
+	addhi	\order, \order, #3
+	addls	\order, \order, \divisor, lsr #1
+
+#endif
+
+.endm
+/* ------------------------------------------------------------------------ */
+.macro ARM_MOD_BODY dividend, divisor, order, spare
+
+#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
+
+	clz	\order, \divisor
+	clz	\spare, \dividend
+	sub	\order, \order, \spare
+	rsbs	\order, \order, #31
+	addne	pc, pc, \order, lsl #3
+	nop
+	.set	shift, 32
+	.rept	32
+	.set	shift, shift - 1
+	cmp	\dividend, \divisor, lsl #shift
+	subcs	\dividend, \dividend, \divisor, lsl #shift
+	.endr
+
+#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
+#if __ARM_ARCH__ >= 5
+
+	clz	\order, \divisor
+	clz	\spare, \dividend
+	sub	\order, \order, \spare
+	mov	\divisor, \divisor, lsl \order
+	
+#else /* __ARM_ARCH__ < 5 */
+
+	mov	\order, #0
+
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+1:	cmp	\divisor, #0x10000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #4
+	addlo	\order, \order, #4
+	blo	1b
+
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+1:	cmp	\divisor, #0x80000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #1
+	addlo	\order, \order, #1
+	blo	1b
+
+#endif /* __ARM_ARCH__ < 5 */
+
+	@ Perform all needed substractions to keep only the reminder.
+	@ Do comparisons in batch of 4 first.
+	subs	\order, \order, #3		@ yes, 3 is intended here
+	blt	2f
+
+1:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	cmp	\dividend, \divisor,  lsr #1
+	subhs	\dividend, \dividend, \divisor, lsr #1
+	cmp	\dividend, \divisor,  lsr #2
+	subhs	\dividend, \dividend, \divisor, lsr #2
+	cmp	\dividend, \divisor,  lsr #3
+	subhs	\dividend, \dividend, \divisor, lsr #3
+	cmp	\dividend, #1
+	mov	\divisor, \divisor, lsr #4
+	subges	\order, \order, #4
+	bge	1b
+
+	tst	\order, #3
+	teqne	\dividend, #0
+	beq	5f
+
+	@ Either 1, 2 or 3 comparison/substractions are left.
+2:	cmn	\order, #2
+	blt	4f
+	beq	3f
+	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	mov	\divisor,  \divisor,  lsr #1
+3:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	mov	\divisor,  \divisor,  lsr #1
+4:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+5:
+
+#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
+
+.endm
+/* ------------------------------------------------------------------------ */
+.macro THUMB_DIV_MOD_BODY modulo
+	@ Load the constant 0x10000000 into our work register.
+	mov	work, #1
+	lsl	work, #28
+LSYM(Loop1):
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+	cmp	divisor, work
+	bhs	LSYM(Lbignum)
+	cmp	divisor, dividend
+	bhs	LSYM(Lbignum)
+	lsl	divisor, #4
+	lsl	curbit,  #4
+	b	LSYM(Loop1)
+LSYM(Lbignum):
+	@ Set work to 0x80000000
+	lsl	work, #3
+LSYM(Loop2):
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+	cmp	divisor, work
+	bhs	LSYM(Loop3)
+	cmp	divisor, dividend
+	bhs	LSYM(Loop3)
+	lsl	divisor, #1
+	lsl	curbit,  #1
+	b	LSYM(Loop2)
+LSYM(Loop3):
+	@ Test for possible subtractions ...
+  .if \modulo
+	@ ... On the final pass, this may subtract too much from the dividend, 
+	@ so keep track of which subtractions are done, we can fix them up 
+	@ afterwards.
+	mov	overdone, #0
+	cmp	dividend, divisor
+	blo	LSYM(Lover1)
+	sub	dividend, dividend, divisor
+LSYM(Lover1):
+	lsr	work, divisor, #1
+	cmp	dividend, work
+	blo	LSYM(Lover2)
+	sub	dividend, dividend, work
+	mov	ip, curbit
+	mov	work, #1
+	ror	curbit, work
+	orr	overdone, curbit
+	mov	curbit, ip
+LSYM(Lover2):
+	lsr	work, divisor, #2
+	cmp	dividend, work
+	blo	LSYM(Lover3)
+	sub	dividend, dividend, work
+	mov	ip, curbit
+	mov	work, #2
+	ror	curbit, work
+	orr	overdone, curbit
+	mov	curbit, ip
+LSYM(Lover3):
+	lsr	work, divisor, #3
+	cmp	dividend, work
+	blo	LSYM(Lover4)
+	sub	dividend, dividend, work
+	mov	ip, curbit
+	mov	work, #3
+	ror	curbit, work
+	orr	overdone, curbit
+	mov	curbit, ip
+LSYM(Lover4):
+	mov	ip, curbit
+  .else
+	@ ... and note which bits are done in the result.  On the final pass,
+	@ this may subtract too much from the dividend, but the result will be ok,
+	@ since the "bit" will have been shifted out at the bottom.
+	cmp	dividend, divisor
+	blo	LSYM(Lover1)
+	sub	dividend, dividend, divisor
+	orr	result, result, curbit
+LSYM(Lover1):
+	lsr	work, divisor, #1
+	cmp	dividend, work
+	blo	LSYM(Lover2)
+	sub	dividend, dividend, work
+	lsr	work, curbit, #1
+	orr	result, work
+LSYM(Lover2):
+	lsr	work, divisor, #2
+	cmp	dividend, work
+	blo	LSYM(Lover3)
+	sub	dividend, dividend, work
+	lsr	work, curbit, #2
+	orr	result, work
+LSYM(Lover3):
+	lsr	work, divisor, #3
+	cmp	dividend, work
+	blo	LSYM(Lover4)
+	sub	dividend, dividend, work
+	lsr	work, curbit, #3
+	orr	result, work
+LSYM(Lover4):
+  .endif
+	
+	cmp	dividend, #0			@ Early termination?
+	beq	LSYM(Lover5)
+	lsr	curbit,  #4			@ No, any more bits to do?
+	beq	LSYM(Lover5)
+	lsr	divisor, #4
+	b	LSYM(Loop3)
+LSYM(Lover5):
+  .if \modulo
+	@ Any subtractions that we should not have done will be recorded in
+	@ the top three bits of "overdone".  Exactly which were not needed
+	@ are governed by the position of the bit, stored in ip.
+	mov	work, #0xe
+	lsl	work, #28
+	and	overdone, work
+	beq	LSYM(Lgot_result)
+	
+	@ If we terminated early, because dividend became zero, then the 
+	@ bit in ip will not be in the bottom nibble, and we should not
+	@ perform the additions below.  We must test for this though
+	@ (rather relying upon the TSTs to prevent the additions) since
+	@ the bit in ip could be in the top two bits which might then match
+	@ with one of the smaller RORs.
+	mov	curbit, ip
+	mov	work, #0x7
+	tst	curbit, work
+	beq	LSYM(Lgot_result)
+	
+	mov	curbit, ip
+	mov	work, #3
+	ror	curbit, work
+	tst	overdone, curbit
+	beq	LSYM(Lover6)
+	lsr	work, divisor, #3
+	add	dividend, work
+LSYM(Lover6):
+	mov	curbit, ip
+	mov	work, #2
+	ror	curbit, work
+	tst	overdone, curbit
+	beq	LSYM(Lover7)
+	lsr	work, divisor, #2
+	add	dividend, work
+LSYM(Lover7):
+	mov	curbit, ip
+	mov	work, #1
+	ror	curbit, work
+	tst	overdone, curbit
+	beq	LSYM(Lgot_result)
+	lsr	work, divisor, #1
+	add	dividend, work
+  .endif
+LSYM(Lgot_result):
+.endm	
+/* ------------------------------------------------------------------------ */
+/*		Start of the Real Functions				    */
+/* ------------------------------------------------------------------------ */
+#ifdef L_udivsi3
+
+#if defined(__prefer_thumb__)
+
+	FUNC_START udivsi3
+	FUNC_ALIAS aeabi_uidiv udivsi3
+
+	cmp	divisor, #0
+	beq	LSYM(Ldiv0)
+LSYM(udivsi3_skip_div0_test):
+	mov	curbit, #1
+	mov	result, #0
+	
+	push	{ work }
+	cmp	dividend, divisor
+	blo	LSYM(Lgot_result)
+
+	THUMB_DIV_MOD_BODY 0
+	
+	mov	r0, result
+	pop	{ work }
+	RET
+
+#else /* ARM version/Thumb-2.  */
+
+	ARM_FUNC_START udivsi3
+	ARM_FUNC_ALIAS aeabi_uidiv udivsi3
+
+	/* Note: if called via udivsi3_skip_div0_test, this will unnecessarily
+	   check for division-by-zero a second time.  */
+LSYM(udivsi3_skip_div0_test):
+	subs	r2, r1, #1
+	do_it	eq
+	RETc(eq)
+	bcc	LSYM(Ldiv0)
+	cmp	r0, r1
+	bls	11f
+	tst	r1, r2
+	beq	12f
+	
+	ARM_DIV_BODY r0, r1, r2, r3
+	
+	mov	r0, r2
+	RET	
+
+11:	do_it	eq, e
+	moveq	r0, #1
+	movne	r0, #0
+	RET
+
+12:	ARM_DIV2_ORDER r1, r2
+
+	mov	r0, r0, lsr r2
+	RET
+
+#endif /* ARM version */
+
+	DIV_FUNC_END udivsi3 unsigned
+
+#if defined(__prefer_thumb__)
+FUNC_START aeabi_uidivmod
+	cmp	r1, #0
+	beq	LSYM(Ldiv0)
+	push	{r0, r1, lr}
+	bl	LSYM(udivsi3_skip_div0_test)
+	POP	{r1, r2, r3}
+	mul	r2, r0
+	sub	r1, r1, r2
+	bx	r3
+#else
+ARM_FUNC_START aeabi_uidivmod
+	cmp	r1, #0
+	beq	LSYM(Ldiv0)
+	stmfd	sp!, { r0, r1, lr }
+	bl	LSYM(udivsi3_skip_div0_test)
+	ldmfd	sp!, { r1, r2, lr }
+	mul	r3, r2, r0
+	sub	r1, r1, r3
+	RET
+#endif
+	FUNC_END aeabi_uidivmod
+	
+#endif /* L_udivsi3 */
+/* ------------------------------------------------------------------------ */
+#ifdef L_umodsi3
+
+	FUNC_START umodsi3
+
+#ifdef __thumb__
+
+	cmp	divisor, #0
+	beq	LSYM(Ldiv0)
+	mov	curbit, #1
+	cmp	dividend, divisor
+	bhs	LSYM(Lover10)
+	RET	
+
+LSYM(Lover10):
+	push	{ work }
+
+	THUMB_DIV_MOD_BODY 1
+	
+	pop	{ work }
+	RET
+	
+#else  /* ARM version.  */
+	
+	subs	r2, r1, #1			@ compare divisor with 1
+	bcc	LSYM(Ldiv0)
+	cmpne	r0, r1				@ compare dividend with divisor
+	moveq   r0, #0
+	tsthi	r1, r2				@ see if divisor is power of 2
+	andeq	r0, r0, r2
+	RETc(ls)
+
+	ARM_MOD_BODY r0, r1, r2, r3
+	
+	RET	
+
+#endif /* ARM version.  */
+	
+	DIV_FUNC_END umodsi3 unsigned
+
+#endif /* L_umodsi3 */
+/* ------------------------------------------------------------------------ */
+#ifdef L_divsi3
+
+#if defined(__prefer_thumb__)
+
+	FUNC_START divsi3	
+	FUNC_ALIAS aeabi_idiv divsi3
+
+	cmp	divisor, #0
+	beq	LSYM(Ldiv0)
+LSYM(divsi3_skip_div0_test):
+	push	{ work }
+	mov	work, dividend
+	eor	work, divisor		@ Save the sign of the result.
+	mov	ip, work
+	mov	curbit, #1
+	mov	result, #0
+	cmp	divisor, #0
+	bpl	LSYM(Lover10)
+	neg	divisor, divisor	@ Loops below use unsigned.
+LSYM(Lover10):
+	cmp	dividend, #0
+	bpl	LSYM(Lover11)
+	neg	dividend, dividend
+LSYM(Lover11):
+	cmp	dividend, divisor
+	blo	LSYM(Lgot_result)
+
+	THUMB_DIV_MOD_BODY 0
+	
+	mov	r0, result
+	mov	work, ip
+	cmp	work, #0
+	bpl	LSYM(Lover12)
+	neg	r0, r0
+LSYM(Lover12):
+	pop	{ work }
+	RET
+
+#else /* ARM/Thumb-2 version.  */
+	
+	ARM_FUNC_START divsi3	
+	ARM_FUNC_ALIAS aeabi_idiv divsi3
+
+	cmp	r1, #0
+	beq	LSYM(Ldiv0)
+LSYM(divsi3_skip_div0_test):
+	eor	ip, r0, r1			@ save the sign of the result.
+	do_it	mi
+	rsbmi	r1, r1, #0			@ loops below use unsigned.
+	subs	r2, r1, #1			@ division by 1 or -1 ?
+	beq	10f
+	movs	r3, r0
+	do_it	mi
+	rsbmi	r3, r0, #0			@ positive dividend value
+	cmp	r3, r1
+	bls	11f
+	tst	r1, r2				@ divisor is power of 2 ?
+	beq	12f
+
+	ARM_DIV_BODY r3, r1, r0, r2
+	
+	cmp	ip, #0
+	do_it	mi
+	rsbmi	r0, r0, #0
+	RET	
+
+10:	teq	ip, r0				@ same sign ?
+	do_it	mi
+	rsbmi	r0, r0, #0
+	RET	
+
+11:	do_it	lo
+	movlo	r0, #0
+	do_it	eq,t
+	moveq	r0, ip, asr #31
+	orreq	r0, r0, #1
+	RET
+
+12:	ARM_DIV2_ORDER r1, r2
+
+	cmp	ip, #0
+	mov	r0, r3, lsr r2
+	do_it	mi
+	rsbmi	r0, r0, #0
+	RET
+
+#endif /* ARM version */
+	
+	DIV_FUNC_END divsi3 signed
+
+#if defined(__prefer_thumb__)
+FUNC_START aeabi_idivmod
+	cmp	r1, #0
+	beq	LSYM(Ldiv0)
+	push	{r0, r1, lr}
+	bl	LSYM(divsi3_skip_div0_test)
+	POP	{r1, r2, r3}
+	mul	r2, r0
+	sub	r1, r1, r2
+	bx	r3
+#else
+ARM_FUNC_START aeabi_idivmod
+	cmp	r1, #0
+	beq	LSYM(Ldiv0)
+	stmfd	sp!, { r0, r1, lr }
+	bl	LSYM(divsi3_skip_div0_test)
+	ldmfd	sp!, { r1, r2, lr }
+	mul	r3, r2, r0
+	sub	r1, r1, r3
+	RET
+#endif
+	FUNC_END aeabi_idivmod
+	
+#endif /* L_divsi3 */
+/* ------------------------------------------------------------------------ */
+#ifdef L_modsi3
+
+	FUNC_START modsi3
+
+#ifdef __thumb__
+
+	mov	curbit, #1
+	cmp	divisor, #0
+	beq	LSYM(Ldiv0)
+	bpl	LSYM(Lover10)
+	neg	divisor, divisor		@ Loops below use unsigned.
+LSYM(Lover10):
+	push	{ work }
+	@ Need to save the sign of the dividend, unfortunately, we need
+	@ work later on.  Must do this after saving the original value of
+	@ the work register, because we will pop this value off first.
+	push	{ dividend }
+	cmp	dividend, #0
+	bpl	LSYM(Lover11)
+	neg	dividend, dividend
+LSYM(Lover11):
+	cmp	dividend, divisor
+	blo	LSYM(Lgot_result)
+
+	THUMB_DIV_MOD_BODY 1
+		
+	pop	{ work }
+	cmp	work, #0
+	bpl	LSYM(Lover12)
+	neg	dividend, dividend
+LSYM(Lover12):
+	pop	{ work }
+	RET	
+
+#else /* ARM version.  */
+	
+	cmp	r1, #0
+	beq	LSYM(Ldiv0)
+	rsbmi	r1, r1, #0			@ loops below use unsigned.
+	movs	ip, r0				@ preserve sign of dividend
+	rsbmi	r0, r0, #0			@ if negative make positive
+	subs	r2, r1, #1			@ compare divisor with 1
+	cmpne	r0, r1				@ compare dividend with divisor
+	moveq	r0, #0
+	tsthi	r1, r2				@ see if divisor is power of 2
+	andeq	r0, r0, r2
+	bls	10f
+
+	ARM_MOD_BODY r0, r1, r2, r3
+
+10:	cmp	ip, #0
+	rsbmi	r0, r0, #0
+	RET	
+
+#endif /* ARM version */
+	
+	DIV_FUNC_END modsi3 signed
+
+#endif /* L_modsi3 */
+/* ------------------------------------------------------------------------ */
+#ifdef L_dvmd_tls
+
+#ifdef __ARM_EABI__
+	WEAK aeabi_idiv0
+	WEAK aeabi_ldiv0
+	FUNC_START aeabi_idiv0
+	FUNC_START aeabi_ldiv0
+	RET
+	FUNC_END aeabi_ldiv0
+	FUNC_END aeabi_idiv0
+#else
+	FUNC_START div0
+	RET
+	FUNC_END div0
+#endif
+	
+#endif /* L_divmodsi_tools */
+/* ------------------------------------------------------------------------ */
+#ifdef L_dvmd_lnx
+@ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
+
+/* Constant taken from <asm/signal.h>.  */
+#define SIGFPE	8
+
+#ifdef __ARM_EABI__
+	WEAK aeabi_idiv0
+	WEAK aeabi_ldiv0
+	ARM_FUNC_START aeabi_idiv0
+	ARM_FUNC_START aeabi_ldiv0
+#else
+	ARM_FUNC_START div0
+#endif
+
+	do_push	{r1, lr}
+	mov	r0, #SIGFPE
+	bl	SYM(raise) __PLT__
+	RETLDM	r1
+
+#ifdef __ARM_EABI__
+	FUNC_END aeabi_ldiv0
+	FUNC_END aeabi_idiv0
+#else
+	FUNC_END div0
+#endif
+	
+#endif /* L_dvmd_lnx */
+#ifdef L_clear_cache
+#if defined __ARM_EABI__ && defined __linux__
+@ EABI GNU/Linux call to cacheflush syscall.
+	ARM_FUNC_START clear_cache
+	do_push	{r7}
+#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__)
+	movw	r7, #2
+	movt	r7, #0xf
+#else
+	mov	r7, #0xf0000
+	add	r7, r7, #2
+#endif
+	mov	r2, #0
+	swi	0
+	do_pop	{r7}
+	RET
+	FUNC_END clear_cache
+#else
+#error "This is only for ARM EABI GNU/Linux"
+#endif
+#endif /* L_clear_cache */
+/* ------------------------------------------------------------------------ */
+/* Dword shift operations.  */
+/* All the following Dword shift variants rely on the fact that
+	shft xxx, Reg
+   is in fact done as
+	shft xxx, (Reg & 255)
+   so for Reg value in (32...63) and (-1...-31) we will get zero (in the
+   case of logical shifts) or the sign (for asr).  */
+
+#ifdef __ARMEB__
+#define al	r1
+#define ah	r0
+#else
+#define al	r0
+#define ah	r1
+#endif
+
+/* Prevent __aeabi double-word shifts from being produced on SymbianOS.  */
+#ifndef __symbian__
+
+#ifdef L_lshrdi3
+
+	FUNC_START lshrdi3
+	FUNC_ALIAS aeabi_llsr lshrdi3
+	
+#ifdef __thumb__
+	lsr	al, r2
+	mov	r3, ah
+	lsr	ah, r2
+	mov	ip, r3
+	sub	r2, #32
+	lsr	r3, r2
+	orr	al, r3
+	neg	r2, r2
+	mov	r3, ip
+	lsl	r3, r2
+	orr	al, r3
+	RET
+#else
+	subs	r3, r2, #32
+	rsb	ip, r2, #32
+	movmi	al, al, lsr r2
+	movpl	al, ah, lsr r3
+	orrmi	al, al, ah, lsl ip
+	mov	ah, ah, lsr r2
+	RET
+#endif
+	FUNC_END aeabi_llsr
+	FUNC_END lshrdi3
+
+#endif
+	
+#ifdef L_ashrdi3
+	
+	FUNC_START ashrdi3
+	FUNC_ALIAS aeabi_lasr ashrdi3
+	
+#ifdef __thumb__
+	lsr	al, r2
+	mov	r3, ah
+	asr	ah, r2
+	sub	r2, #32
+	@ If r2 is negative at this point the following step would OR
+	@ the sign bit into all of AL.  That's not what we want...
+	bmi	1f
+	mov	ip, r3
+	asr	r3, r2
+	orr	al, r3
+	mov	r3, ip
+1:
+	neg	r2, r2
+	lsl	r3, r2
+	orr	al, r3
+	RET
+#else
+	subs	r3, r2, #32
+	rsb	ip, r2, #32
+	movmi	al, al, lsr r2
+	movpl	al, ah, asr r3
+	orrmi	al, al, ah, lsl ip
+	mov	ah, ah, asr r2
+	RET
+#endif
+
+	FUNC_END aeabi_lasr
+	FUNC_END ashrdi3
+
+#endif
+
+#ifdef L_ashldi3
+
+	FUNC_START ashldi3
+	FUNC_ALIAS aeabi_llsl ashldi3
+	
+#ifdef __thumb__
+	lsl	ah, r2
+	mov	r3, al
+	lsl	al, r2
+	mov	ip, r3
+	sub	r2, #32
+	lsl	r3, r2
+	orr	ah, r3
+	neg	r2, r2
+	mov	r3, ip
+	lsr	r3, r2
+	orr	ah, r3
+	RET
+#else
+	subs	r3, r2, #32
+	rsb	ip, r2, #32
+	movmi	ah, ah, lsl r2
+	movpl	ah, al, lsl r3
+	orrmi	ah, ah, al, lsr ip
+	mov	al, al, lsl r2
+	RET
+#endif
+	FUNC_END aeabi_llsl
+	FUNC_END ashldi3
+
+#endif
+
+#endif /* __symbian__ */
+
+#if ((__ARM_ARCH__ > 5) && !defined(__ARM_ARCH_6M__)) \
+    || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
+    || defined(__ARM_ARCH_5TEJ__)
+#define HAVE_ARM_CLZ 1
+#endif
+
+#ifdef L_clzsi2
+#if defined(__ARM_ARCH_6M__)
+FUNC_START clzsi2
+	mov	r1, #28
+	mov	r3, #1
+	lsl	r3, r3, #16
+	cmp	r0, r3 /* 0x10000 */
+	bcc	2f
+	lsr	r0, r0, #16
+	sub	r1, r1, #16
+2:	lsr	r3, r3, #8
+	cmp	r0, r3 /* #0x100 */
+	bcc	2f
+	lsr	r0, r0, #8
+	sub	r1, r1, #8
+2:	lsr	r3, r3, #4
+	cmp	r0, r3 /* #0x10 */
+	bcc	2f
+	lsr	r0, r0, #4
+	sub	r1, r1, #4
+2:	adr	r2, 1f
+	ldrb	r0, [r2, r0]
+	add	r0, r0, r1
+	bx lr
+.align 2
+1:
+.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
+	FUNC_END clzsi2
+#else
+ARM_FUNC_START clzsi2
+# if defined(HAVE_ARM_CLZ)
+	clz	r0, r0
+	RET
+# else
+	mov	r1, #28
+	cmp	r0, #0x10000
+	do_it	cs, t
+	movcs	r0, r0, lsr #16
+	subcs	r1, r1, #16
+	cmp	r0, #0x100
+	do_it	cs, t
+	movcs	r0, r0, lsr #8
+	subcs	r1, r1, #8
+	cmp	r0, #0x10
+	do_it	cs, t
+	movcs	r0, r0, lsr #4
+	subcs	r1, r1, #4
+	adr	r2, 1f
+	ldrb	r0, [r2, r0]
+	add	r0, r0, r1
+	RET
+.align 2
+1:
+.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
+# endif /* !HAVE_ARM_CLZ */
+	FUNC_END clzsi2
+#endif
+#endif /* L_clzsi2 */
+
+#ifdef L_clzdi2
+#if !defined(HAVE_ARM_CLZ)
+
+# if defined(__ARM_ARCH_6M__)
+FUNC_START clzdi2
+	push	{r4, lr}
+# else
+ARM_FUNC_START clzdi2
+	do_push	{r4, lr}
+# endif
+	cmp	xxh, #0
+	bne	1f
+# ifdef __ARMEB__
+	mov	r0, xxl
+	bl	__clzsi2
+	add	r0, r0, #32
+	b 2f
+1:
+	bl	__clzsi2
+# else
+	bl	__clzsi2
+	add	r0, r0, #32
+	b 2f
+1:
+	mov	r0, xxh
+	bl	__clzsi2
+# endif
+2:
+# if defined(__ARM_ARCH_6M__)
+	pop	{r4, pc}
+# else
+	RETLDM	r4
+# endif
+	FUNC_END clzdi2
+
+#else /* HAVE_ARM_CLZ */
+
+ARM_FUNC_START clzdi2
+	cmp	xxh, #0
+	do_it	eq, et
+	clzeq	r0, xxl
+	clzne	r0, xxh
+	addeq	r0, r0, #32
+	RET
+	FUNC_END clzdi2
+
+#endif
+#endif /* L_clzdi2 */
+
+/* ------------------------------------------------------------------------ */
+/* These next two sections are here despite the fact that they contain Thumb 
+   assembler because their presence allows interworked code to be linked even
+   when the GCC library is this one.  */
+		
+/* Do not build the interworking functions when the target architecture does 
+   not support Thumb instructions.  (This can be a multilib option).  */
+#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
+      || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
+      || __ARM_ARCH__ >= 6
+
+#if defined L_call_via_rX
+
+/* These labels & instructions are used by the Arm/Thumb interworking code. 
+   The address of function to be called is loaded into a register and then 
+   one of these labels is called via a BL instruction.  This puts the 
+   return address into the link register with the bottom bit set, and the 
+   code here switches to the correct mode before executing the function.  */
+	
+	.text
+	.align 0
+        .force_thumb
+
+.macro call_via register
+	THUMB_FUNC_START _call_via_\register
+
+	bx	\register
+	nop
+
+	SIZE	(_call_via_\register)
+.endm
+
+	call_via r0
+	call_via r1
+	call_via r2
+	call_via r3
+	call_via r4
+	call_via r5
+	call_via r6
+	call_via r7
+	call_via r8
+	call_via r9
+	call_via sl
+	call_via fp
+	call_via ip
+	call_via sp
+	call_via lr
+
+#endif /* L_call_via_rX */
+
+/* Don't bother with the old interworking routines for Thumb-2.  */
+/* ??? Maybe only omit these on "m" variants.  */
+#if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__)
+
+#if defined L_interwork_call_via_rX
+
+/* These labels & instructions are used by the Arm/Thumb interworking code,
+   when the target address is in an unknown instruction set.  The address 
+   of function to be called is loaded into a register and then one of these
+   labels is called via a BL instruction.  This puts the return address 
+   into the link register with the bottom bit set, and the code here 
+   switches to the correct mode before executing the function.  Unfortunately
+   the target code cannot be relied upon to return via a BX instruction, so
+   instead we have to store the resturn address on the stack and allow the
+   called function to return here instead.  Upon return we recover the real
+   return address and use a BX to get back to Thumb mode.
+
+   There are three variations of this code.  The first,
+   _interwork_call_via_rN(), will push the return address onto the
+   stack and pop it in _arm_return().  It should only be used if all
+   arguments are passed in registers.
+
+   The second, _interwork_r7_call_via_rN(), instead stores the return
+   address at [r7, #-4].  It is the caller's responsibility to ensure
+   that this address is valid and contains no useful data.
+
+   The third, _interwork_r11_call_via_rN(), works in the same way but
+   uses r11 instead of r7.  It is useful if the caller does not really
+   need a frame pointer.  */
+	
+	.text
+	.align 0
+
+	.code   32
+	.globl _arm_return
+LSYM(Lstart_arm_return):
+	cfi_start	LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
+	cfi_push	0, 0xe, -0x8, 0x8
+	nop	@ This nop is for the benefit of debuggers, so that
+		@ backtraces will use the correct unwind information.
+_arm_return:
+	RETLDM	unwind=LSYM(Lstart_arm_return)
+	cfi_end	LSYM(Lend_arm_return)
+
+	.globl _arm_return_r7
+_arm_return_r7:
+	ldr	lr, [r7, #-4]
+	bx	lr
+
+	.globl _arm_return_r11
+_arm_return_r11:
+	ldr	lr, [r11, #-4]
+	bx	lr
+
+.macro interwork_with_frame frame, register, name, return
+	.code	16
+
+	THUMB_FUNC_START \name
+
+	bx	pc
+	nop
+
+	.code	32
+	tst	\register, #1
+	streq	lr, [\frame, #-4]
+	adreq	lr, _arm_return_\frame
+	bx	\register
+
+	SIZE	(\name)
+.endm
+
+.macro interwork register
+	.code	16
+
+	THUMB_FUNC_START _interwork_call_via_\register
+
+	bx	pc
+	nop
+
+	.code	32
+	.globl LSYM(Lchange_\register)
+LSYM(Lchange_\register):
+	tst	\register, #1
+	streq	lr, [sp, #-8]!
+	adreq	lr, _arm_return
+	bx	\register
+
+	SIZE	(_interwork_call_via_\register)
+
+	interwork_with_frame r7,\register,_interwork_r7_call_via_\register
+	interwork_with_frame r11,\register,_interwork_r11_call_via_\register
+.endm
+	
+	interwork r0
+	interwork r1
+	interwork r2
+	interwork r3
+	interwork r4
+	interwork r5
+	interwork r6
+	interwork r7
+	interwork r8
+	interwork r9
+	interwork sl
+	interwork fp
+	interwork ip
+	interwork sp
+	
+	/* The LR case has to be handled a little differently...  */
+	.code 16
+
+	THUMB_FUNC_START _interwork_call_via_lr
+
+	bx 	pc
+	nop
+	
+	.code 32
+	.globl .Lchange_lr
+.Lchange_lr:
+	tst	lr, #1
+	stmeqdb	r13!, {lr, pc}
+	mov	ip, lr
+	adreq	lr, _arm_return
+	bx	ip
+	
+	SIZE	(_interwork_call_via_lr)
+	
+#endif /* L_interwork_call_via_rX */
+#endif /* !__thumb2__ */
+
+/* Functions to support compact pic switch tables in thumb1 state.
+   All these routines take an index into the table in r0.  The
+   table is at LR & ~1 (but this must be rounded up in the case
+   of 32-bit entires).  They are only permitted to clobber r12
+   and r14 and r0 must be preserved on exit.  */
+#ifdef L_thumb1_case_sqi
+	
+	.text
+	.align 0
+        .force_thumb
+	.syntax unified
+	THUMB_FUNC_START __gnu_thumb1_case_sqi
+	push	{r1}
+	mov	r1, lr
+	lsrs	r1, r1, #1
+	lsls	r1, r1, #1
+	ldrsb	r1, [r1, r0]
+	lsls	r1, r1, #1
+	add	lr, lr, r1
+	pop	{r1}
+	bx	lr
+	SIZE (__gnu_thumb1_case_sqi)
+#endif
+
+#ifdef L_thumb1_case_uqi
+	
+	.text
+	.align 0
+        .force_thumb
+	.syntax unified
+	THUMB_FUNC_START __gnu_thumb1_case_uqi
+	push	{r1}
+	mov	r1, lr
+	lsrs	r1, r1, #1
+	lsls	r1, r1, #1
+	ldrb	r1, [r1, r0]
+	lsls	r1, r1, #1
+	add	lr, lr, r1
+	pop	{r1}
+	bx	lr
+	SIZE (__gnu_thumb1_case_uqi)
+#endif
+
+#ifdef L_thumb1_case_shi
+	
+	.text
+	.align 0
+        .force_thumb
+	.syntax unified
+	THUMB_FUNC_START __gnu_thumb1_case_shi
+	push	{r0, r1}
+	mov	r1, lr
+	lsrs	r1, r1, #1
+	lsls	r0, r0, #1
+	lsls	r1, r1, #1
+	ldrsh	r1, [r1, r0]
+	lsls	r1, r1, #1
+	add	lr, lr, r1
+	pop	{r0, r1}
+	bx	lr
+	SIZE (__gnu_thumb1_case_shi)
+#endif
+
+#ifdef L_thumb1_case_uhi
+	
+	.text
+	.align 0
+        .force_thumb
+	.syntax unified
+	THUMB_FUNC_START __gnu_thumb1_case_uhi
+	push	{r0, r1}
+	mov	r1, lr
+	lsrs	r1, r1, #1
+	lsls	r0, r0, #1
+	lsls	r1, r1, #1
+	ldrh	r1, [r1, r0]
+	lsls	r1, r1, #1
+	add	lr, lr, r1
+	pop	{r0, r1}
+	bx	lr
+	SIZE (__gnu_thumb1_case_uhi)
+#endif
+
+#ifdef L_thumb1_case_si
+	
+	.text
+	.align 0
+        .force_thumb
+	.syntax unified
+	THUMB_FUNC_START __gnu_thumb1_case_si
+	push	{r0, r1}
+	mov	r1, lr
+	adds.n	r1, r1, #2	/* Align to word.  */
+	lsrs	r1, r1, #2
+	lsls	r0, r0, #2
+	lsls	r1, r1, #2
+	ldr	r0, [r1, r0]
+	adds	r0, r0, r1
+	mov	lr, r0
+	pop	{r0, r1}
+	mov	pc, lr		/* We know we were called from thumb code.  */
+	SIZE (__gnu_thumb1_case_si)
+#endif
+
+#endif /* Arch supports thumb.  */
+
+#ifndef __symbian__
+#ifndef __ARM_ARCH_6M__
+#include "ieee754-df.S"
+#include "ieee754-sf.S"
+#include "bpabi.S"
+#else /* __ARM_ARCH_6M__ */
+#include "bpabi-v6m.S"
+#endif /* __ARM_ARCH_6M__ */
+#endif /* !__symbian__ */
diff --git a/libgcc/config/arm/libunwind.S b/libgcc/config/arm/libunwind.S
index a3a19daab4b..8166cd86e47 100644
--- a/libgcc/config/arm/libunwind.S
+++ b/libgcc/config/arm/libunwind.S
@@ -40,7 +40,7 @@
 
 #ifndef __symbian__
 
-#include "config/arm/lib1funcs.asm"
+#include "lib1funcs.S"
 
 .macro UNPREFIX name
 	.global SYM (\name)
diff --git a/libgcc/config/arm/t-arm b/libgcc/config/arm/t-arm
new file mode 100644
index 00000000000..4e17e99b4a5
--- /dev/null
+++ b/libgcc/config/arm/t-arm
@@ -0,0 +1,3 @@
+LIB1ASMSRC = arm/lib1funcs.S
+LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi _thumb1_case_shi \
+	_thumb1_case_uhi _thumb1_case_si
diff --git a/libgcc/config/arm/t-bpabi b/libgcc/config/arm/t-bpabi
index ebb2f9fd85d..8787285ab1f 100644
--- a/libgcc/config/arm/t-bpabi
+++ b/libgcc/config/arm/t-bpabi
@@ -1,3 +1,6 @@
+# Add the bpabi.S functions.
+LIB1ASMFUNCS += _aeabi_lcmp _aeabi_ulcmp _aeabi_ldivmod _aeabi_uldivmod
+
 LIB2ADDEH = $(srcdir)/config/arm/unwind-arm.c \
   $(srcdir)/config/arm/libunwind.S \
   $(srcdir)/config/arm/pr-support.c $(srcdir)/unwind-c.c
diff --git a/libgcc/config/arm/t-elf b/libgcc/config/arm/t-elf
new file mode 100644
index 00000000000..fab32e445be
--- /dev/null
+++ b/libgcc/config/arm/t-elf
@@ -0,0 +1,13 @@
+# For most CPUs we have an assembly soft-float implementations.
+# However this is not true for ARMv6M.  Here we want to use the soft-fp C
+# implementation.  The soft-fp code is only build for ARMv6M.  This pulls
+# in the asm implementation for other CPUs.
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func \
+	_call_via_rX _interwork_call_via_rX \
+	_lshrdi3 _ashrdi3 _ashldi3 \
+	_arm_negdf2 _arm_addsubdf3 _arm_muldivdf3 _arm_cmpdf2 _arm_unorddf2 \
+	_arm_fixdfsi _arm_fixunsdfsi \
+	_arm_truncdfsf2 _arm_negsf2 _arm_addsubsf3 _arm_muldivsf3 \
+	_arm_cmpsf2 _arm_unordsf2 _arm_fixsfsi _arm_fixunssfsi \
+	_arm_floatdidf _arm_floatdisf _arm_floatundidf _arm_floatundisf \
+	_clzsi2 _clzdi2 
diff --git a/libgcc/config/arm/t-linux b/libgcc/config/arm/t-linux
new file mode 100644
index 00000000000..a154f775a0f
--- /dev/null
+++ b/libgcc/config/arm/t-linux
@@ -0,0 +1,3 @@
+LIB1ASMSRC = arm/lib1funcs.S
+LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx _clzsi2 _clzdi2 \
+	_arm_addsubdf3 _arm_addsubsf3
diff --git a/libgcc/config/arm/t-linux-eabi b/libgcc/config/arm/t-linux-eabi
new file mode 100644
index 00000000000..dfc9197ea45
--- /dev/null
+++ b/libgcc/config/arm/t-linux-eabi
@@ -0,0 +1,2 @@
+# Use a version of div0 which raises SIGFPE, and a special __clear_cache.
+LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx _clear_cache
diff --git a/libgcc/config/arm/t-strongarm-elf b/libgcc/config/arm/t-strongarm-elf
new file mode 100644
index 00000000000..cd9f9667ddf
--- /dev/null
+++ b/libgcc/config/arm/t-strongarm-elf
@@ -0,0 +1 @@
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _clzsi2 _clzdi2
diff --git a/libgcc/config/arm/t-symbian b/libgcc/config/arm/t-symbian
index 6788d5f40b3..1989696c8a3 100644
--- a/libgcc/config/arm/t-symbian
+++ b/libgcc/config/arm/t-symbian
@@ -1,2 +1,16 @@
+LIB1ASMFUNCS += _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
+
+# These functions have __aeabi equivalents and will never be called by GCC.  
+# By putting them in LIB1ASMFUNCS, we avoid the standard libgcc2.c code being
+# used -- and we make sure that definitions are not available in lib1funcs.S,
+# either, so they end up undefined.
+LIB1ASMFUNCS += \
+	_ashldi3 _ashrdi3 _divdi3 _floatdidf _udivmoddi4 _umoddi3 \
+	_udivdi3 _lshrdi3 _moddi3 _muldi3 _negdi2 _cmpdi2 \
+	_fixdfdi _fixsfdi _fixunsdfdi _fixunssfdi _floatdisf \
+	_negdf2 _addsubdf3 _muldivdf3 _cmpdf2 _unorddf2 _fixdfsi _fixunsdfsi \
+	_truncdfsf2 _negsf2 _addsubsf3 _muldivsf3 _cmpsf2 _unordsf2 \
+	_fixsfsi _fixunssfsi
+
 # Include the gcc personality routine
 LIB2ADDEH = $(srcdir)/unwind-c.c $(srcdir)/config/arm/pr-support.c
diff --git a/libgcc/config/arm/t-vxworks b/libgcc/config/arm/t-vxworks
new file mode 100644
index 00000000000..70ccdc1556a
--- /dev/null
+++ b/libgcc/config/arm/t-vxworks
@@ -0,0 +1 @@
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
diff --git a/libgcc/config/arm/t-wince-pe b/libgcc/config/arm/t-wince-pe
new file mode 100644
index 00000000000..33ea969ccf4
--- /dev/null
+++ b/libgcc/config/arm/t-wince-pe
@@ -0,0 +1 @@
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
diff --git a/libgcc/config/avr/lib1funcs.S b/libgcc/config/avr/lib1funcs.S
new file mode 100644
index 00000000000..8c369c96a77
--- /dev/null
+++ b/libgcc/config/avr/lib1funcs.S
@@ -0,0 +1,1533 @@
+/*  -*- Mode: Asm -*-  */
+/* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009
+   Free Software Foundation, Inc.
+   Contributed by Denis Chertykov <chertykov@gmail.com>
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define __zero_reg__ r1
+#define __tmp_reg__ r0
+#define __SREG__ 0x3f
+#define __SP_H__ 0x3e
+#define __SP_L__ 0x3d
+#define __RAMPZ__ 0x3B
+#define __EIND__  0x3C
+
+/* Most of the functions here are called directly from avr.md
+   patterns, instead of using the standard libcall mechanisms.
+   This can make better code because GCC knows exactly which
+   of the call-used registers (not all of them) are clobbered.  */
+
+/* FIXME:  At present, there is no SORT directive in the linker
+           script so that we must not assume that different modules
+           in the same input section like .libgcc.text.mul will be
+           located close together.  Therefore, we cannot use
+           RCALL/RJMP to call a function like __udivmodhi4 from
+           __divmodhi4 and have to use lengthy XCALL/XJMP even
+           though they are in the same input section and all same
+           input sections together are small enough to reach every
+           location with a RCALL/RJMP instruction.  */
+
+	.macro	mov_l  r_dest, r_src
+#if defined (__AVR_HAVE_MOVW__)
+	movw	\r_dest, \r_src
+#else
+	mov	\r_dest, \r_src
+#endif
+	.endm
+
+	.macro	mov_h  r_dest, r_src
+#if defined (__AVR_HAVE_MOVW__)
+	; empty
+#else
+	mov	\r_dest, \r_src
+#endif
+	.endm
+
+#if defined (__AVR_HAVE_JMP_CALL__)
+#define XCALL call
+#define XJMP  jmp
+#else
+#define XCALL rcall
+#define XJMP  rjmp
+#endif
+
+.macro DEFUN name
+.global \name
+.func \name
+\name:
+.endm
+
+.macro ENDF name
+.size \name, .-\name
+.endfunc
+.endm
+
+
+.section .text.libgcc.mul, "ax", @progbits
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+/* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
+#if !defined (__AVR_HAVE_MUL__)
+/*******************************************************
+    Multiplication  8 x 8  without MUL
+*******************************************************/
+#if defined (L_mulqi3)
+
+#define	r_arg2	r22		/* multiplicand */
+#define	r_arg1 	r24		/* multiplier */
+#define r_res	__tmp_reg__	/* result */
+
+DEFUN __mulqi3
+	clr	r_res		; clear result
+__mulqi3_loop:
+	sbrc	r_arg1,0
+	add	r_res,r_arg2
+	add	r_arg2,r_arg2	; shift multiplicand
+	breq	__mulqi3_exit	; while multiplicand != 0
+	lsr	r_arg1		; 
+	brne	__mulqi3_loop	; exit if multiplier = 0
+__mulqi3_exit:	
+	mov	r_arg1,r_res	; result to return register
+	ret
+ENDF __mulqi3
+
+#undef r_arg2  
+#undef r_arg1  
+#undef r_res   
+	
+#endif 	/* defined (L_mulqi3) */
+
+#if defined (L_mulqihi3)
+DEFUN __mulqihi3
+	clr	r25
+	sbrc	r24, 7
+	dec	r25
+	clr	r23
+	sbrc	r22, 7
+	dec	r22
+	XJMP	__mulhi3
+ENDF __mulqihi3:
+#endif /* defined (L_mulqihi3) */
+
+#if defined (L_umulqihi3)
+DEFUN __umulqihi3
+	clr	r25
+	clr	r23
+	XJMP	__mulhi3
+ENDF __umulqihi3
+#endif /* defined (L_umulqihi3) */
+
+/*******************************************************
+    Multiplication  16 x 16  without MUL
+*******************************************************/
+#if defined (L_mulhi3)
+#define	r_arg1L	r24		/* multiplier Low */
+#define	r_arg1H	r25		/* multiplier High */
+#define	r_arg2L	r22		/* multiplicand Low */
+#define	r_arg2H	r23		/* multiplicand High */
+#define r_resL	__tmp_reg__	/* result Low */
+#define r_resH  r21		/* result High */
+
+DEFUN __mulhi3
+	clr	r_resH		; clear result
+	clr	r_resL		; clear result
+__mulhi3_loop:
+	sbrs	r_arg1L,0
+	rjmp	__mulhi3_skip1
+	add	r_resL,r_arg2L	; result + multiplicand
+	adc	r_resH,r_arg2H
+__mulhi3_skip1:	
+	add	r_arg2L,r_arg2L	; shift multiplicand
+	adc	r_arg2H,r_arg2H
+
+	cp	r_arg2L,__zero_reg__
+	cpc	r_arg2H,__zero_reg__
+	breq	__mulhi3_exit	; while multiplicand != 0
+
+	lsr	r_arg1H		; gets LSB of multiplier
+	ror	r_arg1L
+	sbiw	r_arg1L,0
+	brne	__mulhi3_loop	; exit if multiplier = 0
+__mulhi3_exit:
+	mov	r_arg1H,r_resH	; result to return register
+	mov	r_arg1L,r_resL
+	ret
+ENDF __mulhi3
+
+#undef r_arg1L
+#undef r_arg1H
+#undef r_arg2L
+#undef r_arg2H
+#undef r_resL 	
+#undef r_resH 
+
+#endif /* defined (L_mulhi3) */
+
+/*******************************************************
+    Widening Multiplication  32 = 16 x 16  without MUL
+*******************************************************/
+
+#if defined (L_mulhisi3)
+DEFUN __mulhisi3
+;;; FIXME: This is dead code (noone calls it)
+    mov_l   r18, r24
+    mov_h   r19, r25
+    clr     r24
+    sbrc    r23, 7
+    dec     r24
+    mov     r25, r24
+    clr     r20
+    sbrc    r19, 7
+    dec     r20
+    mov     r21, r20
+    XJMP    __mulsi3
+ENDF __mulhisi3
+#endif /* defined (L_mulhisi3) */
+
+#if defined (L_umulhisi3)
+DEFUN __umulhisi3
+;;; FIXME: This is dead code (noone calls it)
+    mov_l   r18, r24
+    mov_h   r19, r25
+    clr     r24
+    clr     r25
+    mov_l   r20, r24
+    mov_h   r21, r25
+    XJMP    __mulsi3
+ENDF __umulhisi3
+#endif /* defined (L_umulhisi3) */
+
+#if defined (L_mulsi3)
+/*******************************************************
+    Multiplication  32 x 32  without MUL
+*******************************************************/
+#define r_arg1L  r22		/* multiplier Low */
+#define r_arg1H  r23
+#define	r_arg1HL r24
+#define	r_arg1HH r25		/* multiplier High */
+
+#define	r_arg2L  r18		/* multiplicand Low */
+#define	r_arg2H  r19	
+#define	r_arg2HL r20
+#define	r_arg2HH r21		/* multiplicand High */
+	
+#define r_resL	 r26		/* result Low */
+#define r_resH   r27
+#define r_resHL	 r30
+#define r_resHH  r31		/* result High */
+
+DEFUN __mulsi3
+	clr	r_resHH		; clear result
+	clr	r_resHL		; clear result
+	clr	r_resH		; clear result
+	clr	r_resL		; clear result
+__mulsi3_loop:
+	sbrs	r_arg1L,0
+	rjmp	__mulsi3_skip1
+	add	r_resL,r_arg2L		; result + multiplicand
+	adc	r_resH,r_arg2H
+	adc	r_resHL,r_arg2HL
+	adc	r_resHH,r_arg2HH
+__mulsi3_skip1:
+	add	r_arg2L,r_arg2L		; shift multiplicand
+	adc	r_arg2H,r_arg2H
+	adc	r_arg2HL,r_arg2HL
+	adc	r_arg2HH,r_arg2HH
+	
+	lsr	r_arg1HH	; gets LSB of multiplier
+	ror	r_arg1HL
+	ror	r_arg1H
+	ror	r_arg1L
+	brne	__mulsi3_loop
+	sbiw	r_arg1HL,0
+	cpc	r_arg1H,r_arg1L
+	brne	__mulsi3_loop		; exit if multiplier = 0
+__mulsi3_exit:
+	mov_h	r_arg1HH,r_resHH	; result to return register
+	mov_l	r_arg1HL,r_resHL
+	mov_h	r_arg1H,r_resH
+	mov_l	r_arg1L,r_resL
+	ret
+ENDF __mulsi3
+
+#undef r_arg1L 
+#undef r_arg1H 
+#undef r_arg1HL
+#undef r_arg1HH
+             
+#undef r_arg2L 
+#undef r_arg2H 
+#undef r_arg2HL
+#undef r_arg2HH
+             
+#undef r_resL  
+#undef r_resH  
+#undef r_resHL 
+#undef r_resHH 
+
+#endif /* defined (L_mulsi3) */
+
+#endif /* !defined (__AVR_HAVE_MUL__) */
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+#if defined (__AVR_HAVE_MUL__)    
+#define A0 26
+#define B0 18
+#define C0 22
+
+#define A1 A0+1
+
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+
+/*******************************************************
+    Widening Multiplication  32 = 16 x 16
+*******************************************************/
+                              
+#if defined (L_mulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
+;;; C3:C0   = (signed long) A1:A0   * (signed long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulhisi3
+    XCALL   __umulhisi3
+    ;; Sign-extend B
+    tst     B1
+    brpl    1f
+    sub     C2, A0
+    sbc     C3, A1
+1:  ;; Sign-extend A
+    XJMP __usmulhisi3_tail
+ENDF __mulhisi3
+#endif /* L_mulhisi3 */
+
+#if defined (L_usmulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0   = (signed long) A1:A0   * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __usmulhisi3
+    XCALL   __umulhisi3
+    ;; FALLTHRU
+ENDF __usmulhisi3
+
+DEFUN __usmulhisi3_tail
+    ;; Sign-extend A
+    sbrs    A1, 7
+    ret
+    sub     C2, B0
+    sbc     C3, B1
+    ret
+ENDF __usmulhisi3_tail
+#endif /* L_usmulhisi3 */
+
+#if defined (L_umulhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0   = (unsigned long) A1:A0   * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __umulhisi3
+    mul     A0, B0
+    movw    C0, r0
+    mul     A1, B1
+    movw    C2, r0
+    mul     A0, B1
+    rcall   1f
+    mul     A1, B0
+1:  add     C1, r0
+    adc     C2, r1
+    clr     __zero_reg__
+    adc     C3, __zero_reg__
+    ret
+ENDF __umulhisi3
+#endif /* L_umulhisi3 */
+
+/*******************************************************
+    Widening Multiplication  32 = 16 x 32
+*******************************************************/
+
+#if defined (L_mulshisi3)
+;;; R25:R22 = (signed long) R27:R26 * R21:R18
+;;; (C3:C0) = (signed long) A1:A0   * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulshisi3
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+    ;; Some cores have problem skipping 2-word instruction
+    tst     A1
+    brmi    __mulohisi3
+#else
+    sbrs    A1, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+    XJMP    __muluhisi3
+    ;; FALLTHRU
+ENDF __mulshisi3
+    
+;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
+;;; (C3:C0) = (one-extended long) A1:A0   * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulohisi3
+    XCALL   __muluhisi3
+    ;; One-extend R27:R26 (A1:A0)
+    sub     C2, B0
+    sbc     C3, B1
+    ret
+ENDF __mulohisi3
+#endif /* L_mulshisi3 */
+
+#if defined (L_muluhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
+;;; (C3:C0) = (unsigned long) A1:A0   * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __muluhisi3
+    XCALL   __umulhisi3
+    mul     A0, B3
+    add     C3, r0
+    mul     A1, B2
+    add     C3, r0
+    mul     A0, B2
+    add     C2, r0
+    adc     C3, r1
+    clr     __zero_reg__
+    ret
+ENDF __muluhisi3
+#endif /* L_muluhisi3 */
+
+/*******************************************************
+    Multiplication  32 x 32
+*******************************************************/
+
+#if defined (L_mulsi3)
+;;; R25:R22 = R25:R22 * R21:R18
+;;; (C3:C0) = C3:C0   * B3:B0
+;;; Clobbers: R26, R27, __tmp_reg__
+DEFUN __mulsi3
+    movw    A0, C0
+    push    C2
+    push    C3
+    XCALL   __muluhisi3
+    pop     A1
+    pop     A0
+    ;; A1:A0 now contains the high word of A
+    mul     A0, B0
+    add     C2, r0
+    adc     C3, r1
+    mul     A0, B1
+    add     C3, r0
+    mul     A1, B0
+    add     C3, r0
+    clr     __zero_reg__
+    ret
+ENDF __mulsi3
+#endif /* L_mulsi3 */
+
+#undef A0
+#undef A1
+
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+
+#endif /* __AVR_HAVE_MUL__ */
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+	
+
+.section .text.libgcc.div, "ax", @progbits
+
+/*******************************************************
+       Division 8 / 8 => (result + remainder)
+*******************************************************/
+#define	r_rem	r25	/* remainder */
+#define	r_arg1	r24	/* dividend, quotient */
+#define	r_arg2	r22	/* divisor */
+#define	r_cnt	r23	/* loop count */
+
+#if defined (L_udivmodqi4)
+DEFUN __udivmodqi4
+	sub	r_rem,r_rem	; clear remainder and carry
+	ldi	r_cnt,9		; init loop counter
+	rjmp	__udivmodqi4_ep	; jump to entry point
+__udivmodqi4_loop:
+	rol	r_rem		; shift dividend into remainder
+	cp	r_rem,r_arg2	; compare remainder & divisor
+	brcs	__udivmodqi4_ep	; remainder <= divisor
+	sub	r_rem,r_arg2	; restore remainder
+__udivmodqi4_ep:
+	rol	r_arg1		; shift dividend (with CARRY)
+	dec	r_cnt		; decrement loop counter
+	brne	__udivmodqi4_loop
+	com	r_arg1		; complement result 
+				; because C flag was complemented in loop
+	ret
+ENDF __udivmodqi4
+#endif /* defined (L_udivmodqi4) */
+
+#if defined (L_divmodqi4)
+DEFUN __divmodqi4
+        bst     r_arg1,7	; store sign of dividend
+        mov     __tmp_reg__,r_arg1
+        eor     __tmp_reg__,r_arg2; r0.7 is sign of result
+        sbrc	r_arg1,7
+	neg     r_arg1		; dividend negative : negate
+        sbrc	r_arg2,7
+	neg     r_arg2		; divisor negative : negate
+	XCALL	__udivmodqi4	; do the unsigned div/mod
+	brtc	__divmodqi4_1
+	neg	r_rem		; correct remainder sign
+__divmodqi4_1:
+	sbrc	__tmp_reg__,7
+	neg	r_arg1		; correct result sign
+__divmodqi4_exit:
+	ret
+ENDF __divmodqi4
+#endif /* defined (L_divmodqi4) */
+
+#undef r_rem
+#undef r_arg1
+#undef r_arg2
+#undef r_cnt
+	
+		
+/*******************************************************
+       Division 16 / 16 => (result + remainder)
+*******************************************************/
+#define	r_remL	r26	/* remainder Low */
+#define	r_remH	r27	/* remainder High */
+
+/* return: remainder */
+#define	r_arg1L	r24	/* dividend Low */
+#define	r_arg1H	r25	/* dividend High */
+
+/* return: quotient */
+#define	r_arg2L	r22	/* divisor Low */
+#define	r_arg2H	r23	/* divisor High */
+	
+#define	r_cnt	r21	/* loop count */
+
+#if defined (L_udivmodhi4)
+DEFUN __udivmodhi4
+	sub	r_remL,r_remL
+	sub	r_remH,r_remH	; clear remainder and carry
+	ldi	r_cnt,17	; init loop counter
+	rjmp	__udivmodhi4_ep	; jump to entry point
+__udivmodhi4_loop:
+        rol	r_remL		; shift dividend into remainder
+	rol	r_remH
+        cp	r_remL,r_arg2L	; compare remainder & divisor
+	cpc	r_remH,r_arg2H
+        brcs	__udivmodhi4_ep	; remainder < divisor
+        sub	r_remL,r_arg2L	; restore remainder
+        sbc	r_remH,r_arg2H
+__udivmodhi4_ep:
+        rol	r_arg1L		; shift dividend (with CARRY)
+        rol	r_arg1H
+        dec	r_cnt		; decrement loop counter
+        brne	__udivmodhi4_loop
+	com	r_arg1L
+	com	r_arg1H
+; div/mod results to return registers, as for the div() function
+	mov_l	r_arg2L, r_arg1L	; quotient
+	mov_h	r_arg2H, r_arg1H
+	mov_l	r_arg1L, r_remL		; remainder
+	mov_h	r_arg1H, r_remH
+	ret
+ENDF __udivmodhi4
+#endif /* defined (L_udivmodhi4) */
+
+#if defined (L_divmodhi4)
+DEFUN __divmodhi4
+	.global	_div
+_div:
+        bst     r_arg1H,7	; store sign of dividend
+        mov     __tmp_reg__,r_arg1H
+        eor     __tmp_reg__,r_arg2H   ; r0.7 is sign of result
+	rcall	__divmodhi4_neg1 ; dividend negative : negate
+	sbrc	r_arg2H,7
+	rcall	__divmodhi4_neg2 ; divisor negative : negate
+	XCALL	__udivmodhi4	; do the unsigned div/mod
+	rcall	__divmodhi4_neg1 ; correct remainder sign
+	tst	__tmp_reg__
+	brpl	__divmodhi4_exit
+__divmodhi4_neg2:
+	com	r_arg2H
+	neg	r_arg2L		; correct divisor/result sign
+	sbci	r_arg2H,0xff
+__divmodhi4_exit:
+	ret
+__divmodhi4_neg1:
+	brtc	__divmodhi4_exit
+	com	r_arg1H
+	neg	r_arg1L		; correct dividend/remainder sign
+	sbci	r_arg1H,0xff
+	ret
+ENDF __divmodhi4
+#endif /* defined (L_divmodhi4) */
+
+#undef r_remH  
+#undef r_remL  
+             
+#undef r_arg1H 
+#undef r_arg1L 
+             
+#undef r_arg2H 
+#undef r_arg2L 
+             	
+#undef r_cnt   	
+	
+/*******************************************************
+       Division 32 / 32 => (result + remainder)
+*******************************************************/
+#define	r_remHH	r31	/* remainder High */
+#define	r_remHL	r30
+#define	r_remH	r27
+#define	r_remL	r26	/* remainder Low */
+
+/* return: remainder */
+#define	r_arg1HH r25	/* dividend High */
+#define	r_arg1HL r24
+#define	r_arg1H  r23
+#define	r_arg1L  r22	/* dividend Low */
+
+/* return: quotient */
+#define	r_arg2HH r21	/* divisor High */
+#define	r_arg2HL r20
+#define	r_arg2H  r19
+#define	r_arg2L  r18	/* divisor Low */
+	
+#define	r_cnt __zero_reg__  /* loop count (0 after the loop!) */
+
+#if defined (L_udivmodsi4)
+DEFUN __udivmodsi4
+	ldi	r_remL, 33	; init loop counter
+	mov	r_cnt, r_remL
+	sub	r_remL,r_remL
+	sub	r_remH,r_remH	; clear remainder and carry
+	mov_l	r_remHL, r_remL
+	mov_h	r_remHH, r_remH
+	rjmp	__udivmodsi4_ep	; jump to entry point
+__udivmodsi4_loop:
+        rol	r_remL		; shift dividend into remainder
+	rol	r_remH
+	rol	r_remHL
+	rol	r_remHH
+        cp	r_remL,r_arg2L	; compare remainder & divisor
+	cpc	r_remH,r_arg2H
+	cpc	r_remHL,r_arg2HL
+	cpc	r_remHH,r_arg2HH
+	brcs	__udivmodsi4_ep	; remainder <= divisor
+        sub	r_remL,r_arg2L	; restore remainder
+        sbc	r_remH,r_arg2H
+        sbc	r_remHL,r_arg2HL
+        sbc	r_remHH,r_arg2HH
+__udivmodsi4_ep:
+        rol	r_arg1L		; shift dividend (with CARRY)
+        rol	r_arg1H
+        rol	r_arg1HL
+        rol	r_arg1HH
+        dec	r_cnt		; decrement loop counter
+        brne	__udivmodsi4_loop
+				; __zero_reg__ now restored (r_cnt == 0)
+	com	r_arg1L
+	com	r_arg1H
+	com	r_arg1HL
+	com	r_arg1HH
+; div/mod results to return registers, as for the ldiv() function
+	mov_l	r_arg2L,  r_arg1L	; quotient
+	mov_h	r_arg2H,  r_arg1H
+	mov_l	r_arg2HL, r_arg1HL
+	mov_h	r_arg2HH, r_arg1HH
+	mov_l	r_arg1L,  r_remL	; remainder
+	mov_h	r_arg1H,  r_remH
+	mov_l	r_arg1HL, r_remHL
+	mov_h	r_arg1HH, r_remHH
+	ret
+ENDF __udivmodsi4
+#endif /* defined (L_udivmodsi4) */
+
+#if defined (L_divmodsi4)
+DEFUN __divmodsi4
+        bst     r_arg1HH,7	; store sign of dividend
+        mov     __tmp_reg__,r_arg1HH
+        eor     __tmp_reg__,r_arg2HH   ; r0.7 is sign of result
+	rcall	__divmodsi4_neg1 ; dividend negative : negate
+	sbrc	r_arg2HH,7
+	rcall	__divmodsi4_neg2 ; divisor negative : negate
+	XCALL	__udivmodsi4	; do the unsigned div/mod
+	rcall	__divmodsi4_neg1 ; correct remainder sign
+	rol	__tmp_reg__
+	brcc	__divmodsi4_exit
+__divmodsi4_neg2:
+	com	r_arg2HH
+	com	r_arg2HL
+	com	r_arg2H
+	neg	r_arg2L		; correct divisor/quotient sign
+	sbci	r_arg2H,0xff
+	sbci	r_arg2HL,0xff
+	sbci	r_arg2HH,0xff
+__divmodsi4_exit:
+	ret
+__divmodsi4_neg1:
+	brtc	__divmodsi4_exit
+	com	r_arg1HH
+	com	r_arg1HL
+	com	r_arg1H
+	neg	r_arg1L		; correct dividend/remainder sign
+	sbci	r_arg1H, 0xff
+	sbci	r_arg1HL,0xff
+	sbci	r_arg1HH,0xff
+	ret
+ENDF __divmodsi4
+#endif /* defined (L_divmodsi4) */
+
+
+.section .text.libgcc.prologue, "ax", @progbits
+    
+/**********************************
+ * This is a prologue subroutine
+ **********************************/
+#if defined (L_prologue)
+
+DEFUN __prologue_saves__
+	push r2
+	push r3
+	push r4
+	push r5
+	push r6
+	push r7
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r14
+	push r15
+	push r16
+	push r17
+	push r28
+	push r29
+	in	r28,__SP_L__
+	in	r29,__SP_H__
+	sub	r28,r26
+	sbc	r29,r27
+	in	__tmp_reg__,__SREG__
+	cli
+	out	__SP_H__,r29
+	out	__SREG__,__tmp_reg__
+	out	__SP_L__,r28
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+	eijmp
+#else
+	ijmp
+#endif
+
+ENDF __prologue_saves__
+#endif /* defined (L_prologue) */
+
+/*
+ * This is an epilogue subroutine
+ */
+#if defined (L_epilogue)
+
+DEFUN __epilogue_restores__
+	ldd	r2,Y+18
+	ldd	r3,Y+17
+	ldd	r4,Y+16
+	ldd	r5,Y+15
+	ldd	r6,Y+14
+	ldd	r7,Y+13
+	ldd	r8,Y+12
+	ldd	r9,Y+11
+	ldd	r10,Y+10
+	ldd	r11,Y+9
+	ldd	r12,Y+8
+	ldd	r13,Y+7
+	ldd	r14,Y+6
+	ldd	r15,Y+5
+	ldd	r16,Y+4
+	ldd	r17,Y+3
+	ldd	r26,Y+2
+	ldd	r27,Y+1
+	add	r28,r30
+	adc	r29,__zero_reg__
+	in	__tmp_reg__,__SREG__
+	cli
+	out	__SP_H__,r29
+	out	__SREG__,__tmp_reg__
+	out	__SP_L__,r28
+	mov_l	r28, r26
+	mov_h	r29, r27
+	ret
+ENDF __epilogue_restores__
+#endif /* defined (L_epilogue) */
+
+#ifdef L_exit
+	.section .fini9,"ax",@progbits
+DEFUN _exit
+	.weak	exit
+exit:
+ENDF _exit
+
+	/* Code from .fini8 ... .fini1 sections inserted by ld script.  */
+
+	.section .fini0,"ax",@progbits
+	cli
+__stop_program:
+	rjmp	__stop_program
+#endif /* defined (L_exit) */
+
+#ifdef L_cleanup
+	.weak	_cleanup
+	.func	_cleanup
+_cleanup:
+	ret
+.endfunc
+#endif /* defined (L_cleanup) */
+
+
+.section .text.libgcc, "ax", @progbits
+    
+#ifdef L_tablejump
+DEFUN __tablejump2__
+	lsl	r30
+	rol	r31
+    ;; FALLTHRU
+ENDF __tablejump2__
+
+DEFUN __tablejump__
+#if defined (__AVR_HAVE_LPMX__)
+	lpm __tmp_reg__, Z+
+	lpm r31, Z
+	mov r30, __tmp_reg__
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+	eijmp
+#else
+	ijmp
+#endif
+
+#else /* !HAVE_LPMX */
+	lpm
+	adiw r30, 1
+	push r0
+	lpm
+	push r0
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+	in   __tmp_reg__, __EIND__
+	push __tmp_reg__
+#endif
+	ret
+#endif /* !HAVE_LPMX */
+ENDF __tablejump__
+#endif /* defined (L_tablejump) */
+
+#ifdef L_copy_data
+	.section .init4,"ax",@progbits
+DEFUN __do_copy_data
+#if defined(__AVR_HAVE_ELPMX__)
+	ldi	r17, hi8(__data_end)
+	ldi	r26, lo8(__data_start)
+	ldi	r27, hi8(__data_start)
+	ldi	r30, lo8(__data_load_start)
+	ldi	r31, hi8(__data_load_start)
+	ldi	r16, hh8(__data_load_start)
+	out	__RAMPZ__, r16
+	rjmp	.L__do_copy_data_start
+.L__do_copy_data_loop:
+	elpm	r0, Z+
+	st	X+, r0
+.L__do_copy_data_start:
+	cpi	r26, lo8(__data_end)
+	cpc	r27, r17
+	brne	.L__do_copy_data_loop
+#elif  !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
+	ldi	r17, hi8(__data_end)
+	ldi	r26, lo8(__data_start)
+	ldi	r27, hi8(__data_start)
+	ldi	r30, lo8(__data_load_start)
+	ldi	r31, hi8(__data_load_start)
+	ldi	r16, hh8(__data_load_start - 0x10000)
+.L__do_copy_data_carry:
+	inc	r16
+	out	__RAMPZ__, r16
+	rjmp	.L__do_copy_data_start
+.L__do_copy_data_loop:
+	elpm
+	st	X+, r0
+	adiw	r30, 1
+	brcs	.L__do_copy_data_carry
+.L__do_copy_data_start:
+	cpi	r26, lo8(__data_end)
+	cpc	r27, r17
+	brne	.L__do_copy_data_loop
+#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
+	ldi	r17, hi8(__data_end)
+	ldi	r26, lo8(__data_start)
+	ldi	r27, hi8(__data_start)
+	ldi	r30, lo8(__data_load_start)
+	ldi	r31, hi8(__data_load_start)
+	rjmp	.L__do_copy_data_start
+.L__do_copy_data_loop:
+#if defined (__AVR_HAVE_LPMX__)
+	lpm	r0, Z+
+#else
+	lpm
+	adiw	r30, 1
+#endif
+	st	X+, r0
+.L__do_copy_data_start:
+	cpi	r26, lo8(__data_end)
+	cpc	r27, r17
+	brne	.L__do_copy_data_loop
+#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
+ENDF __do_copy_data
+#endif /* L_copy_data */
+
+/* __do_clear_bss is only necessary if there is anything in .bss section.  */
+
+#ifdef L_clear_bss
+	.section .init4,"ax",@progbits
+DEFUN __do_clear_bss
+	ldi	r17, hi8(__bss_end)
+	ldi	r26, lo8(__bss_start)
+	ldi	r27, hi8(__bss_start)
+	rjmp	.do_clear_bss_start
+.do_clear_bss_loop:
+	st	X+, __zero_reg__
+.do_clear_bss_start:
+	cpi	r26, lo8(__bss_end)
+	cpc	r27, r17
+	brne	.do_clear_bss_loop
+ENDF __do_clear_bss
+#endif /* L_clear_bss */
+
+/* __do_global_ctors and __do_global_dtors are only necessary
+   if there are any constructors/destructors.  */
+
+#ifdef L_ctors
+	.section .init6,"ax",@progbits
+DEFUN __do_global_ctors
+#if defined(__AVR_HAVE_RAMPZ__)
+	ldi	r17, hi8(__ctors_start)
+	ldi	r28, lo8(__ctors_end)
+	ldi	r29, hi8(__ctors_end)
+	ldi	r16, hh8(__ctors_end)
+	rjmp	.L__do_global_ctors_start
+.L__do_global_ctors_loop:
+	sbiw	r28, 2
+	sbc     r16, __zero_reg__
+	mov_h	r31, r29
+	mov_l	r30, r28
+	out     __RAMPZ__, r16
+	XCALL	__tablejump_elpm__
+.L__do_global_ctors_start:
+	cpi	r28, lo8(__ctors_start)
+	cpc	r29, r17
+	ldi	r24, hh8(__ctors_start)
+	cpc	r16, r24
+	brne	.L__do_global_ctors_loop
+#else
+	ldi	r17, hi8(__ctors_start)
+	ldi	r28, lo8(__ctors_end)
+	ldi	r29, hi8(__ctors_end)
+	rjmp	.L__do_global_ctors_start
+.L__do_global_ctors_loop:
+	sbiw	r28, 2
+	mov_h	r31, r29
+	mov_l	r30, r28
+	XCALL	__tablejump__
+.L__do_global_ctors_start:
+	cpi	r28, lo8(__ctors_start)
+	cpc	r29, r17
+	brne	.L__do_global_ctors_loop
+#endif /* defined(__AVR_HAVE_RAMPZ__) */
+ENDF __do_global_ctors
+#endif /* L_ctors */
+
+#ifdef L_dtors
+	.section .fini6,"ax",@progbits
+DEFUN __do_global_dtors
+#if defined(__AVR_HAVE_RAMPZ__)
+	ldi	r17, hi8(__dtors_end)
+	ldi	r28, lo8(__dtors_start)
+	ldi	r29, hi8(__dtors_start)
+	ldi	r16, hh8(__dtors_start)
+	rjmp	.L__do_global_dtors_start
+.L__do_global_dtors_loop:
+	sbiw	r28, 2
+	sbc     r16, __zero_reg__
+	mov_h	r31, r29
+	mov_l	r30, r28
+	out     __RAMPZ__, r16
+	XCALL	__tablejump_elpm__
+.L__do_global_dtors_start:
+	cpi	r28, lo8(__dtors_end)
+	cpc	r29, r17
+	ldi	r24, hh8(__dtors_end)
+	cpc	r16, r24
+	brne	.L__do_global_dtors_loop
+#else
+	ldi	r17, hi8(__dtors_end)
+	ldi	r28, lo8(__dtors_start)
+	ldi	r29, hi8(__dtors_start)
+	rjmp	.L__do_global_dtors_start
+.L__do_global_dtors_loop:
+	mov_h	r31, r29
+	mov_l	r30, r28
+	XCALL	__tablejump__
+	adiw	r28, 2
+.L__do_global_dtors_start:
+	cpi	r28, lo8(__dtors_end)
+	cpc	r29, r17
+	brne	.L__do_global_dtors_loop
+#endif /* defined(__AVR_HAVE_RAMPZ__) */
+ENDF __do_global_dtors
+#endif /* L_dtors */
+
+.section .text.libgcc, "ax", @progbits
+    
+#ifdef L_tablejump_elpm
+DEFUN __tablejump_elpm__
+#if defined (__AVR_HAVE_ELPM__)
+#if defined (__AVR_HAVE_LPMX__)
+	elpm	__tmp_reg__, Z+
+	elpm	r31, Z
+	mov	r30, __tmp_reg__
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+	eijmp
+#else
+	ijmp
+#endif
+
+#else
+	elpm
+	adiw	r30, 1
+	push	r0
+	elpm
+	push	r0
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+	in      __tmp_reg__, __EIND__
+	push    __tmp_reg__
+#endif
+	ret
+#endif
+#endif /* defined (__AVR_HAVE_ELPM__) */
+ENDF __tablejump_elpm__
+#endif /* defined (L_tablejump_elpm) */
+
+
+.section .text.libgcc.builtins, "ax", @progbits
+
+/**********************************
+ * Find first set Bit (ffs)
+ **********************************/
+
+#if defined (L_ffssi2)
+;; find first set bit
+;; r25:r24 = ffs32 (r25:r22)
+;; clobbers: r22, r26
+DEFUN __ffssi2
+    clr  r26
+    tst  r22
+    brne 1f
+    subi r26, -8
+    or   r22, r23
+    brne 1f
+    subi r26, -8
+    or   r22, r24
+    brne 1f
+    subi r26, -8
+    or   r22, r25
+    brne 1f
+    ret
+1:  mov  r24, r22
+    XJMP __loop_ffsqi2
+ENDF __ffssi2
+#endif /* defined (L_ffssi2) */
+
+#if defined (L_ffshi2)
+;; find first set bit
+;; r25:r24 = ffs16 (r25:r24)
+;; clobbers: r26
+DEFUN __ffshi2
+    clr  r26
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+    ;; Some cores have problem skipping 2-word instruction
+    tst  r24
+    breq 2f
+#else
+    cpse r24, __zero_reg__
+#endif /* __AVR_HAVE_JMP_CALL__ */
+1:  XJMP __loop_ffsqi2
+2:  ldi  r26, 8
+    or   r24, r25
+    brne 1b
+    ret
+ENDF __ffshi2
+#endif /* defined (L_ffshi2) */
+
+#if defined (L_loop_ffsqi2)
+;; Helper for ffshi2, ffssi2
+;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
+;; r24 must be != 0
+;; clobbers: r26
+DEFUN __loop_ffsqi2
+    inc  r26
+    lsr  r24
+    brcc __loop_ffsqi2
+    mov  r24, r26
+    clr  r25
+    ret    
+ENDF __loop_ffsqi2
+#endif /* defined (L_loop_ffsqi2) */
+
+
+/**********************************
+ * Count trailing Zeros (ctz)
+ **********************************/
+
+#if defined (L_ctzsi2)
+;; count trailing zeros
+;; r25:r24 = ctz32 (r25:r22)
+;; clobbers: r26, r22
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
+DEFUN __ctzsi2
+    XCALL __ffssi2
+    dec  r24
+    ret
+ENDF __ctzsi2
+#endif /* defined (L_ctzsi2) */
+
+#if defined (L_ctzhi2)
+;; count trailing zeros
+;; r25:r24 = ctz16 (r25:r24)
+;; clobbers: r26
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
+DEFUN __ctzhi2
+    XCALL __ffshi2
+    dec  r24
+    ret
+ENDF __ctzhi2
+#endif /* defined (L_ctzhi2) */
+
+
+/**********************************
+ * Count leading Zeros (clz)
+ **********************************/
+
+#if defined (L_clzdi2)
+;; count leading zeros
+;; r25:r24 = clz64 (r25:r18)
+;; clobbers: r22, r23, r26
+DEFUN __clzdi2
+    XCALL __clzsi2
+    sbrs r24, 5
+    ret
+    mov_l r22, r18
+    mov_h r23, r19
+    mov_l r24, r20
+    mov_h r25, r21
+    XCALL __clzsi2
+    subi r24, -32
+    ret
+ENDF __clzdi2
+#endif /* defined (L_clzdi2) */
+
+#if defined (L_clzsi2)
+;; count leading zeros
+;; r25:r24 = clz32 (r25:r22)
+;; clobbers: r26
+DEFUN __clzsi2
+    XCALL __clzhi2
+    sbrs r24, 4
+    ret
+    mov_l r24, r22
+    mov_h r25, r23
+    XCALL __clzhi2
+    subi r24, -16
+    ret
+ENDF __clzsi2
+#endif /* defined (L_clzsi2) */
+
+#if defined (L_clzhi2)
+;; count leading zeros
+;; r25:r24 = clz16 (r25:r24)
+;; clobbers: r26
+DEFUN __clzhi2
+    clr  r26
+    tst  r25
+    brne 1f
+    subi r26, -8
+    or   r25, r24
+    brne 1f
+    ldi  r24, 16
+    ret
+1:  cpi  r25, 16
+    brsh 3f
+    subi r26, -3
+    swap r25
+2:  inc  r26
+3:  lsl  r25
+    brcc 2b
+    mov  r24, r26
+    clr  r25
+    ret
+ENDF __clzhi2
+#endif /* defined (L_clzhi2) */
+
+
+/**********************************
+ * Parity 
+ **********************************/
+
+#if defined (L_paritydi2)
+;; r25:r24 = parity64 (r25:r18)
+;; clobbers: __tmp_reg__
+DEFUN __paritydi2
+    eor  r24, r18
+    eor  r24, r19
+    eor  r24, r20
+    eor  r24, r21
+    XJMP __paritysi2
+ENDF __paritydi2
+#endif /* defined (L_paritydi2) */
+
+#if defined (L_paritysi2)
+;; r25:r24 = parity32 (r25:r22)
+;; clobbers: __tmp_reg__
+DEFUN __paritysi2
+    eor  r24, r22
+    eor  r24, r23
+    XJMP __parityhi2
+ENDF __paritysi2
+#endif /* defined (L_paritysi2) */
+
+#if defined (L_parityhi2)
+;; r25:r24 = parity16 (r25:r24)
+;; clobbers: __tmp_reg__
+DEFUN __parityhi2
+    eor  r24, r25
+;; FALLTHRU
+ENDF __parityhi2
+
+;; r25:r24 = parity8 (r24)
+;; clobbers: __tmp_reg__
+DEFUN __parityqi2
+    ;; parity is in r24[0..7]
+    mov  __tmp_reg__, r24
+    swap __tmp_reg__
+    eor  r24, __tmp_reg__
+    ;; parity is in r24[0..3]
+    subi r24, -4
+    andi r24, -5
+    subi r24, -6
+    ;; parity is in r24[0,3]
+    sbrc r24, 3
+    inc  r24
+    ;; parity is in r24[0]
+    andi r24, 1
+    clr  r25
+    ret
+ENDF __parityqi2
+#endif /* defined (L_parityhi2) */
+
+
+/**********************************
+ * Population Count
+ **********************************/
+
+#if defined (L_popcounthi2)
+;; population count
+;; r25:r24 = popcount16 (r25:r24)
+;; clobbers: __tmp_reg__
+DEFUN __popcounthi2
+    XCALL __popcountqi2
+    push r24
+    mov  r24, r25
+    XCALL __popcountqi2
+    clr  r25
+    ;; FALLTHRU
+ENDF __popcounthi2
+
+DEFUN __popcounthi2_tail
+    pop   __tmp_reg__
+    add   r24, __tmp_reg__
+    ret
+ENDF __popcounthi2_tail
+#endif /* defined (L_popcounthi2) */
+
+#if defined (L_popcountsi2)
+;; population count
+;; r25:r24 = popcount32 (r25:r22)
+;; clobbers: __tmp_reg__
+DEFUN __popcountsi2
+    XCALL __popcounthi2
+    push  r24
+    mov_l r24, r22
+    mov_h r25, r23
+    XCALL __popcounthi2
+    XJMP  __popcounthi2_tail
+ENDF __popcountsi2
+#endif /* defined (L_popcountsi2) */
+
+#if defined (L_popcountdi2)
+;; population count
+;; r25:r24 = popcount64 (r25:r18)
+;; clobbers: r22, r23, __tmp_reg__
+DEFUN __popcountdi2
+    XCALL __popcountsi2
+    push  r24
+    mov_l r22, r18
+    mov_h r23, r19
+    mov_l r24, r20
+    mov_h r25, r21
+    XCALL __popcountsi2
+    XJMP  __popcounthi2_tail
+ENDF __popcountdi2
+#endif /* defined (L_popcountdi2) */
+
+#if defined (L_popcountqi2)
+;; population count
+;; r24 = popcount8 (r24)
+;; clobbers: __tmp_reg__
+DEFUN __popcountqi2
+    mov  __tmp_reg__, r24
+    andi r24, 1
+    lsr  __tmp_reg__    
+    lsr  __tmp_reg__    
+    adc  r24, __zero_reg__
+    lsr  __tmp_reg__    
+    adc  r24, __zero_reg__
+    lsr  __tmp_reg__    
+    adc  r24, __zero_reg__
+    lsr  __tmp_reg__    
+    adc  r24, __zero_reg__
+    lsr  __tmp_reg__    
+    adc  r24, __zero_reg__
+    lsr  __tmp_reg__    
+    adc  r24, __tmp_reg__    
+    ret    
+ENDF __popcountqi2
+#endif /* defined (L_popcountqi2) */
+
+
+/**********************************
+ * Swap bytes
+ **********************************/
+
+;; swap two registers with different register number
+.macro bswap a, b
+    eor \a, \b
+    eor \b, \a
+    eor \a, \b
+.endm
+
+#if defined (L_bswapsi2)
+;; swap bytes
+;; r25:r22 = bswap32 (r25:r22)
+DEFUN __bswapsi2
+    bswap r22, r25
+    bswap r23, r24
+    ret
+ENDF __bswapsi2
+#endif /* defined (L_bswapsi2) */
+
+#if defined (L_bswapdi2)
+;; swap bytes
+;; r25:r18 = bswap64 (r25:r18)
+DEFUN __bswapdi2
+    bswap r18, r25
+    bswap r19, r24
+    bswap r20, r23
+    bswap r21, r22
+    ret
+ENDF __bswapdi2
+#endif /* defined (L_bswapdi2) */
+
+
+/**********************************
+ * 64-bit shifts
+ **********************************/
+
+#if defined (L_ashrdi3)
+;; Arithmetic shift right
+;; r25:r18 = ashr64 (r25:r18, r17:r16)
+DEFUN __ashrdi3
+    push r16
+    andi r16, 63
+    breq 2f
+1:  asr  r25
+    ror  r24
+    ror  r23
+    ror  r22
+    ror  r21
+    ror  r20
+    ror  r19
+    ror  r18
+    dec  r16
+    brne 1b
+2:  pop  r16
+    ret
+ENDF __ashrdi3
+#endif /* defined (L_ashrdi3) */
+
+#if defined (L_lshrdi3)
+;; Logic shift right
+;; r25:r18 = lshr64 (r25:r18, r17:r16)
+DEFUN __lshrdi3
+    push r16
+    andi r16, 63
+    breq 2f
+1:  lsr  r25
+    ror  r24
+    ror  r23
+    ror  r22
+    ror  r21
+    ror  r20
+    ror  r19
+    ror  r18
+    dec  r16
+    brne 1b
+2:  pop  r16
+    ret
+ENDF __lshrdi3
+#endif /* defined (L_lshrdi3) */
+
+#if defined (L_ashldi3)
+;; Shift left
+;; r25:r18 = ashl64 (r25:r18, r17:r16)
+DEFUN __ashldi3
+    push r16
+    andi r16, 63
+    breq 2f
+1:  lsl  r18
+    rol  r19
+    rol  r20
+    rol  r21
+    rol  r22
+    rol  r23
+    rol  r24
+    rol  r25
+    dec  r16
+    brne 1b
+2:  pop  r16
+    ret
+ENDF __ashldi3
+#endif /* defined (L_ashldi3) */
+
+
+.section .text.libgcc.fmul, "ax", @progbits
+
+/***********************************************************/    
+;;; Softmul versions of FMUL, FMULS and FMULSU to implement
+;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
+/***********************************************************/    
+
+#define A1 24
+#define B1 25
+#define C0 22
+#define C1 23
+#define A0 __tmp_reg__
+
+#ifdef L_fmuls
+;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmuls
+    ;; A0.7 = negate result?
+    mov  A0, A1
+    eor  A0, B1
+    ;; B1 = |B1|
+    sbrc B1, 7
+    neg  B1
+    XJMP __fmulsu_exit
+ENDF __fmuls
+#endif /* L_fmuls */
+
+#ifdef L_fmulsu
+;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmulsu
+    ;; A0.7 = negate result?
+    mov  A0, A1
+;; FALLTHRU
+ENDF __fmulsu
+
+;; Helper for __fmuls and __fmulsu
+DEFUN __fmulsu_exit
+    ;; A1 = |A1|
+    sbrc A1, 7
+    neg  A1
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+    ;; Some cores have problem skipping 2-word instruction
+    tst  A0
+    brmi 1f
+#else
+    sbrs A0, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+    XJMP  __fmul
+1:  XCALL __fmul
+    ;; C = -C iff A0.7 = 1
+    com  C1
+    neg  C0
+    sbci C1, -1
+    ret
+ENDF __fmulsu_exit
+#endif /* L_fmulsu */
+
+
+#ifdef L_fmul
+;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmul
+    ; clear result
+    clr   C0
+    clr   C1
+    clr   A0
+1:  tst   B1
+    ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
+2:  brpl  3f
+    ;; C += A
+    add   C0, A0
+    adc   C1, A1
+3:  ;; A >>= 1
+    lsr   A1
+    ror   A0
+    ;; B <<= 1
+    lsl   B1
+    brne  2b
+    ret
+ENDF __fmul
+#endif /* L_fmul */
+
+#undef A0
+#undef A1
+#undef B1
+#undef C0
+#undef C1
diff --git a/libgcc/config/avr/t-avr b/libgcc/config/avr/t-avr
index 78829c76af4..f1c114a6dd6 100644
--- a/libgcc/config/avr/t-avr
+++ b/libgcc/config/avr/t-avr
@@ -1,3 +1,51 @@
+LIB1ASMSRC = avr/lib1funcs.S
+LIB1ASMFUNCS = \
+	_mulqi3 \
+	_mulhi3 \
+	_mulhisi3 \
+	_umulhisi3 \
+	_usmulhisi3 \
+	_muluhisi3 \
+	_mulshisi3 \
+	_mulsi3 \
+	_udivmodqi4 \
+	_divmodqi4 \
+	_udivmodhi4 \
+	_divmodhi4 \
+	_udivmodsi4 \
+	_divmodsi4 \
+	_prologue \
+	_epilogue \
+	_exit \
+	_cleanup \
+	_tablejump \
+	_tablejump_elpm \
+	_copy_data \
+	_clear_bss \
+	_ctors \
+	_dtors \
+	_ffssi2 \
+	_ffshi2 \
+	_loop_ffsqi2 \
+	_ctzsi2 \
+	_ctzhi2 \
+	_clzdi2 \
+	_clzsi2 \
+	_clzhi2 \
+	_paritydi2 \
+	_paritysi2 \
+	_parityhi2 \
+	_popcounthi2 \
+	_popcountsi2 \
+	_popcountdi2 \
+	_popcountqi2 \
+	_bswapsi2 \
+	_bswapdi2 \
+	_ashldi3 \
+	_ashrdi3 \
+	_lshrdi3 \
+	_fmul _fmuls _fmulsu
+
 # Extra 16-bit integer functions.
 intfuncs16 = _absvXX2 _addvXX3 _subvXX3 _mulvXX3 _negvXX2 _clrsbXX2
 
diff --git a/libgcc/config/bfin/lib1funcs.S b/libgcc/config/bfin/lib1funcs.S
new file mode 100644
index 00000000000..c7bf4f3f05c
--- /dev/null
+++ b/libgcc/config/bfin/lib1funcs.S
@@ -0,0 +1,211 @@
+/* libgcc functions for Blackfin.
+   Copyright (C) 2005, 2009 Free Software Foundation, Inc.
+   Contributed by Analog Devices.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef L_divsi3
+.text
+.align 2
+.global ___divsi3;
+.type ___divsi3, STT_FUNC;
+
+___divsi3:
+        [--SP]= RETS;
+	[--SP] = R7;
+
+	R2 = -R0;
+        CC = R0 < 0;
+	IF CC R0 = R2;
+	R7 = CC;
+
+	R2 = -R1;
+        CC = R1 < 0;
+	IF CC R1 = R2;
+	R2 = CC;
+	R7 = R7 ^ R2;
+
+        CALL ___udivsi3;
+
+	CC = R7;
+	R1 = -R0;
+	IF CC R0 = R1;
+
+	R7 = [SP++];
+        RETS = [SP++];
+        RTS;
+#endif
+
+#ifdef L_modsi3	
+.align 2
+.global ___modsi3;
+.type ___modsi3, STT_FUNC;
+
+___modsi3:
+	[--SP] = RETS;
+	[--SP] = R0;
+	[--SP] = R1;
+	CALL ___divsi3;
+	R2 = [SP++];
+	R1 = [SP++];
+	R2 *= R0;
+	R0 = R1 - R2;
+	RETS = [SP++];
+	RTS; 
+#endif
+
+#ifdef L_udivsi3
+.align 2
+.global ___udivsi3;
+.type ___udivsi3, STT_FUNC;
+
+___udivsi3:
+        P0 = 32;
+        LSETUP (0f, 1f) LC0 = P0;
+	/* upper half of dividend */
+        R3 = 0;
+0:
+	/* The first time round in the loop we shift in garbage, but since we
+	   perform 33 shifts, it doesn't matter.  */
+	R0 = ROT R0 BY 1;
+	R3 = ROT R3 BY 1;
+	R2 = R3 - R1;
+        CC = R3 < R1 (IU);
+1:
+	/* Last instruction of the loop.  */
+	IF ! CC R3 = R2;
+
+	/* Shift in the last bit.  */
+	R0 = ROT R0 BY 1;
+	/* R0 is the result, R3 contains the remainder.  */
+	R0 = ~ R0;
+        RTS;
+#endif
+
+#ifdef L_umodsi3
+.align 2
+.global ___umodsi3;
+.type ___umodsi3, STT_FUNC;
+
+___umodsi3:
+	[--SP] = RETS;
+	CALL ___udivsi3;
+	R0 = R3;
+	RETS = [SP++]; 
+	RTS;
+#endif
+
+#ifdef L_umulsi3_highpart
+.align 2
+.global ___umulsi3_highpart;
+.type ___umulsi3_highpart, STT_FUNC;
+
+___umulsi3_highpart:
+	A1 = R1.L * R0.L (FU);
+	A1 = A1 >> 16;
+	A0 = R1.H * R0.H, A1 += R1.L * R0.H (FU);
+	A1 += R0.L * R1.H (FU);
+	A1 = A1 >> 16;
+	A0 += A1;
+	R0 = A0 (FU);
+	RTS;
+#endif
+
+#ifdef L_smulsi3_highpart
+.align 2
+.global ___smulsi3_highpart;
+.type ___smulsi3_highpart, STT_FUNC;
+
+___smulsi3_highpart:
+	A1 = R1.L * R0.L (FU);
+	A1 = A1 >> 16;
+	A0 = R0.H * R1.H, A1 += R0.H * R1.L (IS,M);
+	A1 += R1.H * R0.L (IS,M);
+	A1 = A1 >>> 16;
+	R0 = (A0 += A1);
+	RTS;
+#endif
+
+#ifdef L_muldi3
+.align 2
+.global ___muldi3;
+.type ___muldi3, STT_FUNC;
+
+/*
+	   R1:R0 * R3:R2
+	 = R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l
+[X]	 = (R1.h * R3.h) * 2^96
+[X]	   + (R1.h * R3.l + R1.l * R3.h) * 2^80
+[X]	   + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^64
+[T1]	   + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^48
+[T2]	   + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^32
+[T3]	   + (R0.l * R2.h + R2.l * R0.h) * 2^16
+[T4]	   + (R0.l * R2.l)
+
+	We can discard the first three lines marked "X" since we produce
+	only a 64 bit result.  So, we need ten 16-bit multiplies.
+
+	Individual mul-acc results:
+[E1]	 =  R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h
+[E2]	 =  R1.l * R2.l + R3.l * R0.l + R0.h * R2.h
+[E3]	 =  R0.l * R2.h + R2.l * R0.h
+[E4]	 =  R0.l * R2.l
+
+	We also need to add high parts from lower-level results to higher ones:
+	E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E4
+
+	One interesting property is that all parts of the result that depend
+	on the sign of the multiplication are discarded.  Those would be the
+	multiplications involving R1.h and R3.h, but only the top 16 bit of
+	the 32 bit result depend on the sign, and since R1.h and R3.h only
+	occur in E1, the top half of these results is cut off.
+	So, we can just use FU mode for all of the 16-bit multiplies, and
+	ignore questions of when to use mixed mode.  */
+
+___muldi3:
+	/* [SP] technically is part of the caller's frame, but we can
+	   use it as scratch space.  */
+	A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12];	/* E1 */
+	A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4;		/* E1 */
+	A0 += A1;							/* E1 */
+	R4 = A0.w;
+	A0 = R0.l * R3.l (FU);						/* E2 */
+	A0 += R2.l * R1.l (FU);						/* E2 */
+
+	A1 = R2.L * R0.L (FU);						/* E4 */
+	R3 = A1.w;
+	A1 = A1 >> 16;							/* E3c */
+	A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU);			/* E2, E3c */
+	A1 += R0.L * R2.H (FU);						/* E3c */
+	R0 = A1.w;
+	A1 = A1 >> 16;							/* E2c */
+	A0 += A1;							/* E2c */
+	R1 = A0.w;
+
+	/* low(result) = low(E3c):low(E4) */
+	R0 = PACK (R0.l, R3.l);
+	/* high(result) = E2c + (E1 << 16) */
+	R1.h = R1.h + R4.l (NS) || R4 = [SP];
+	RTS;
+
+.size ___muldi3, .-___muldi3
+#endif
diff --git a/libgcc/config/bfin/t-bfin b/libgcc/config/bfin/t-bfin
new file mode 100644
index 00000000000..bc2b088ffc1
--- /dev/null
+++ b/libgcc/config/bfin/t-bfin
@@ -0,0 +1,3 @@
+LIB1ASMSRC = bfin/lib1funcs.S
+LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart
+LIB1ASMFUNCS += _smulsi3_highpart
diff --git a/libgcc/config/c6x/lib1funcs.S b/libgcc/config/c6x/lib1funcs.S
new file mode 100644
index 00000000000..5bf34474bbd
--- /dev/null
+++ b/libgcc/config/c6x/lib1funcs.S
@@ -0,0 +1,438 @@
+/* Copyright 2010, 2011  Free Software Foundation, Inc.
+   Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+	;; ABI considerations for the divide functions
+	;; The following registers are call-used:
+	;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
+	;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
+	;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
+	;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
+	;;
+	;; In our implementation, divu and remu are leaf functions,
+	;; while both divi and remi call into divu.
+	;; A0 is not clobbered by any of the functions.
+	;; divu does not clobber B2 either, which is taken advantage of
+	;; in remi.
+	;; divi uses B5 to hold the original return address during
+	;; the call to divu.
+	;; remi uses B2 and A5 to hold the input values during the
+	;; call to divu.  It stores B3 in on the stack.
+
+#ifdef L_divsi3
+.text
+.align 2
+.global __c6xabi_divi
+.hidden __c6xabi_divi
+.type __c6xabi_divi, STT_FUNC
+
+__c6xabi_divi:
+	call .s2	__c6xabi_divu
+||	mv .d2		B3, B5
+||	cmpgt .l1	0, A4, A1
+||	cmpgt .l2	0, B4, B1
+
+	[A1] neg .l1	A4, A4
+||	[B1] neg .l2	B4, B4
+||	xor .s1x	A1, B1, A1
+
+#ifdef _TMS320C6400
+	[A1] addkpc .s2	1f, B3, 4
+#else
+	[A1] mvkl .s2	1f, B3
+	[A1] mvkh .s2	1f, B3
+	nop		2
+#endif
+1:
+	neg .l1		A4, A4
+||	mv .l2		B3,B5
+||	ret .s2		B5
+	nop		5
+#endif
+
+#if defined L_modsi3 || defined L_divmodsi4
+.align 2
+#ifdef L_modsi3
+#define MOD_OUTPUT_REG A4
+.global __c6xabi_remi
+.hidden __c6xabi_remi
+.type __c6xabi_remi, STT_FUNC
+#else
+#define MOD_OUTPUT_REG A5
+.global __c6xabi_divremi
+.hidden __c6xabi_divremi
+.type __c6xabi_divremi, STT_FUNC
+__c6xabi_divremi:
+#endif
+
+__c6xabi_remi:
+	stw .d2t2	B3, *B15--[2]
+||	cmpgt .l1	0, A4, A1
+||	cmpgt .l2	0, B4, B2
+||	mv .s1		A4, A5
+||	call .s2	__c6xabi_divu
+
+	[A1] neg .l1	A4, A4
+||	[B2] neg .l2	B4, B4
+||	xor .s2x	B2, A1, B0
+||	mv .d2		B4, B2
+
+#ifdef _TMS320C6400
+	[B0] addkpc .s2	1f, B3, 1
+	[!B0] addkpc .s2 2f, B3, 1
+	nop		2
+#else
+	[B0] mvkl .s2	1f,B3
+	[!B0] mvkl .s2	2f,B3
+
+	[B0] mvkh .s2	1f,B3
+	[!B0] mvkh .s2	2f,B3
+#endif
+1:
+	neg .l1		A4, A4
+2:
+	ldw .d2t2	*++B15[2], B3
+
+#ifdef _TMS320C6400_PLUS
+	mpy32 .m1x	A4, B2, A6
+	nop		3
+	ret .s2		B3
+	sub .l1		A5, A6, MOD_OUTPUT_REG
+	nop		4
+#else
+	mpyu .m1x	A4, B2, A1
+	nop		1
+	mpylhu .m1x	A4, B2, A6
+||	mpylhu .m2x	B2, A4, B2
+	nop		1
+	add .l1x	A6, B2, A6
+||	ret .s2		B3
+	shl .s1		A6, 16, A6
+	add .d1		A6, A1, A6
+	sub .l1		A5, A6, MOD_OUTPUT_REG
+	nop		2
+#endif
+
+#endif
+
+#if defined L_udivsi3 || defined L_udivmodsi4
+.align 2
+#ifdef L_udivsi3
+.global __c6xabi_divu
+.hidden __c6xabi_divu
+.type __c6xabi_divu, STT_FUNC
+__c6xabi_divu:
+#else
+.global __c6xabi_divremu
+.hidden __c6xabi_divremu
+.type __c6xabi_divremu, STT_FUNC
+__c6xabi_divremu:
+#endif
+	;; We use a series of up to 31 subc instructions.  First, we find
+	;; out how many leading zero bits there are in the divisor.  This
+	;; gives us both a shift count for aligning (shifting) the divisor
+	;; to the, and the number of times we have to execute subc.
+
+	;; At the end, we have both the remainder and most of the quotient
+	;; in A4.  The top bit of the quotient is computed first and is
+	;; placed in A2.
+
+	;; Return immediately if the dividend is zero.  Setting B4 to 1
+	;; is a trick to allow us to leave the following insns in the jump
+	;; delay slot without affecting the result.
+	mv	.s2x	A4, B1
+
+#ifndef _TMS320C6400
+[!b1]	mvk	.s2	1, B4
+#endif
+[b1]	lmbd	.l2	1, B4, B1
+||[!b1] b	.s2	B3	; RETURN A
+#ifdef _TMS320C6400
+||[!b1] mvk	.d2	1, B4
+#endif
+#ifdef L_udivmodsi4
+||[!b1] zero	.s1	A5
+#endif
+	mv	.l1x	B1, A6
+||	shl	.s2	B4, B1, B4
+
+	;; The loop performs a maximum of 28 steps, so we do the
+	;; first 3 here.
+	cmpltu	.l1x	A4, B4, A2
+[!A2]	sub	.l1x	A4, B4, A4
+||	shru	.s2	B4, 1, B4
+||	xor	.s1	1, A2, A2
+
+	shl	.s1	A2, 31, A2
+|| [b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+[b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+
+	;; RETURN A may happen here (note: must happen before the next branch)
+0:
+	cmpgt	.l2	B1, 7, B0
+|| [b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+[b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+|| [b0] b	.s1	0b
+[b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+[b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+[b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+[b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+[b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+	;; loop backwards branch happens here
+
+	ret	.s2	B3
+||	mvk	.s1	32, A1
+	sub	.l1	A1, A6, A6
+#ifdef L_udivmodsi4
+||	extu	.s1	A4, A6, A5
+#endif
+	shl	.s1	A4, A6, A4
+	shru	.s1	A4, 1, A4
+||	sub	.l1	A6, 1, A6
+	or	.l1	A2, A4, A4
+	shru	.s1	A4, A6, A4
+	nop
+
+#endif
+
+#ifdef L_umodsi3
+.align 2
+.global __c6xabi_remu
+.hidden __c6xabi_remu
+.type __c6xabi_remu, STT_FUNC
+__c6xabi_remu:
+	;; The ABI seems designed to prevent these functions calling each other,
+	;; so we duplicate most of the divsi3 code here.
+	mv	.s2x	A4, B1
+#ifndef _TMS320C6400
+[!b1]	mvk	.s2	1, B4
+#endif
+	lmbd	.l2	1, B4, B1
+||[!b1] b	.s2	B3	; RETURN A
+#ifdef _TMS320C6400
+||[!b1] mvk	.d2	1, B4
+#endif
+
+	mv	.l1x	B1, A7
+||	shl	.s2	B4, B1, B4
+
+	cmpltu	.l1x	A4, B4, A1
+[!a1]	sub	.l1x	A4, B4, A4
+	shru	.s2	B4, 1, B4
+
+0:
+	cmpgt	.l2	B1, 7, B0
+|| [b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+	;; RETURN A may happen here (note: must happen before the next branch)
+[b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+|| [b0] b	.s1	0b
+[b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+[b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+[b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+[b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+[b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+	;; loop backwards branch happens here
+
+	ret	.s2	B3
+[b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+[b1]	subc	.l1x	A4,B4,A4
+
+	extu	.s1	A4, A7, A4
+	nop	2
+#endif
+
+#if defined L_strasgi_64plus && defined _TMS320C6400_PLUS
+
+.align 2
+.global __c6xabi_strasgi_64plus
+.hidden __c6xabi_strasgi_64plus
+.type __c6xabi_strasgi_64plus, STT_FUNC
+__c6xabi_strasgi_64plus:
+	shru	.s2x	a6, 2, b31
+||	mv	.s1	a4, a30
+||	mv	.d2	b4, b30
+
+	add	.s2	-4, b31, b31
+
+	sploopd		1
+||	mvc	.s2	b31, ilc
+	ldw	.d2t2	*b30++, b31
+	nop	4
+	mv	.s1x	b31,a31
+	spkernel	6, 0
+||	stw	.d1t1	a31, *a30++
+
+	ret	.s2	b3
+	nop 5
+#endif
+
+#ifdef L_strasgi
+.global __c6xabi_strasgi
+.type __c6xabi_strasgi, STT_FUNC
+__c6xabi_strasgi:
+	;; This is essentially memcpy, with alignment known to be at least
+	;; 4, and the size a multiple of 4 greater than or equal to 28.
+	ldw	.d2t1	*B4++, A0
+||	mvk	.s2	16, B1
+	ldw	.d2t1	*B4++, A1
+||	mvk	.s2	20, B2
+||	sub	.d1	A6, 24, A6
+	ldw	.d2t1	*B4++, A5
+	ldw	.d2t1	*B4++, A7
+||	mv	.l2x	A6, B7
+	ldw	.d2t1	*B4++, A8
+	ldw	.d2t1	*B4++, A9
+||	mv	.s2x	A0, B5
+||	cmpltu	.l2	B2, B7, B0
+
+0:
+	stw	.d1t2	B5, *A4++
+||[b0]	ldw	.d2t1	*B4++, A0
+||	mv	.s2x	A1, B5
+||	mv	.l2	B7, B6
+
+[b0]	sub	.d2	B6, 24, B7
+||[b0]	b	.s2	0b
+||	cmpltu	.l2	B1, B6, B0
+
+[b0]	ldw	.d2t1	*B4++, A1
+||	stw	.d1t2	B5, *A4++
+||	mv	.s2x	A5, B5
+||	cmpltu	.l2	12, B6, B0
+
+[b0]	ldw	.d2t1	*B4++, A5
+||	stw	.d1t2	B5, *A4++
+||	mv	.s2x	A7, B5
+||	cmpltu	.l2	8, B6, B0
+
+[b0]	ldw	.d2t1	*B4++, A7
+||	stw	.d1t2	B5, *A4++
+||	mv	.s2x	A8, B5
+||	cmpltu	.l2	4, B6, B0
+
+[b0]	ldw	.d2t1	*B4++, A8
+||	stw	.d1t2	B5, *A4++
+||	mv	.s2x	A9, B5
+||	cmpltu	.l2	0, B6, B0
+
+[b0]	ldw	.d2t1	*B4++, A9
+||	stw	.d1t2	B5, *A4++
+||	mv	.s2x	A0, B5
+||	cmpltu	.l2	B2, B7, B0
+
+	;; loop back branch happens here
+
+	cmpltu	.l2	B1, B6, B0
+||	ret	.s2	b3
+
+[b0]	stw	.d1t1	A1, *A4++
+||	cmpltu	.l2	12, B6, B0
+[b0]	stw	.d1t1	A5, *A4++
+||	cmpltu	.l2	8, B6, B0
+[b0]	stw	.d1t1	A7, *A4++
+||	cmpltu	.l2	4, B6, B0
+[b0]	stw	.d1t1	A8, *A4++
+||	cmpltu	.l2	0, B6, B0
+[b0]	stw	.d1t1	A9, *A4++
+
+	;; return happens here
+
+#endif
+
+#ifdef _TMS320C6400_PLUS
+#ifdef L_push_rts
+.align 2
+.global __c6xabi_push_rts
+.hidden __c6xabi_push_rts
+.type __c6xabi_push_rts, STT_FUNC
+__c6xabi_push_rts:
+	stw .d2t2	B14, *B15--[2]
+	stdw .d2t1	A15:A14, *B15--
+||	b .s2x		A3
+	stdw .d2t2	B13:B12, *B15--
+	stdw .d2t1	A13:A12, *B15--
+	stdw .d2t2	B11:B10, *B15--
+	stdw .d2t1	A11:A10, *B15--
+	stdw .d2t2	B3:B2, *B15--
+#endif
+
+#ifdef L_pop_rts
+.align 2
+.global __c6xabi_pop_rts
+.hidden __c6xabi_pop_rts
+.type __c6xabi_pop_rts, STT_FUNC
+__c6xabi_pop_rts:
+	lddw .d2t2	*++B15, B3:B2
+	lddw .d2t1	*++B15, A11:A10
+	lddw .d2t2	*++B15, B11:B10
+	lddw .d2t1	*++B15, A13:A12
+	lddw .d2t2	*++B15, B13:B12
+	lddw .d2t1	*++B15, A15:A14
+||	b .s2		B3
+	ldw .d2t2	*++B15[2], B14
+	nop		4
+#endif
+
+#ifdef L_call_stub
+.align 2
+.global __c6xabi_call_stub
+.type __c6xabi_call_stub, STT_FUNC
+__c6xabi_call_stub:
+	stw .d2t1	A2, *B15--[2]
+	stdw .d2t1	A7:A6, *B15--
+||	call .s2	B31
+	stdw .d2t1	A1:A0, *B15--
+	stdw .d2t2	B7:B6, *B15--
+	stdw .d2t2	B5:B4, *B15--
+	stdw .d2t2	B1:B0, *B15--
+	stdw .d2t2	B3:B2, *B15--
+||	addkpc .s2	1f, B3, 0
+1:
+	lddw .d2t2	*++B15, B3:B2
+	lddw .d2t2	*++B15, B1:B0
+	lddw .d2t2	*++B15, B5:B4
+	lddw .d2t2	*++B15, B7:B6
+	lddw .d2t1	*++B15, A1:A0
+	lddw .d2t1	*++B15, A7:A6
+||	b .s2		B3
+	ldw .d2t1	*++B15[2], A2
+	nop		4
+#endif
+
+#endif
+
diff --git a/libgcc/config/c6x/t-elf b/libgcc/config/c6x/t-elf
index 99d0cd2d5ca..e01c4109e52 100644
--- a/libgcc/config/c6x/t-elf
+++ b/libgcc/config/c6x/t-elf
@@ -1,6 +1,11 @@
 # Cannot use default rules due to $(CRTSTUFF_T_CFLAGS).
 CUSTOM_CRTIN = yes
 
+LIB1ASMSRC = c6x/lib1funcs.S
+LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _udivmodsi4 _divmodsi4
+LIB1ASMFUNCS += _strasgi _strasgi_64plus _clzsi2 _clzdi2 _clz
+LIB1ASMFUNCS += _push_rts _pop_rts _call_stub
+
 # Assemble startup files.
 crti.o: $(srcdir)/config/c6x/crti.S
 	$(crt_compile) -c $(CRTSTUFF_T_CFLAGS) $<
diff --git a/libgcc/config/fr30/lib1funcs.S b/libgcc/config/fr30/lib1funcs.S
new file mode 100644
index 00000000000..7c63453123a
--- /dev/null
+++ b/libgcc/config/fr30/lib1funcs.S
@@ -0,0 +1,115 @@
+/* libgcc routines for the FR30.
+   Copyright (C) 1998, 1999, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+	.macro FUNC_START name
+	.text
+	.globl __\name
+	.type  __\name, @function
+__\name:
+	.endm
+
+	.macro FUNC_END name
+	.size  __\name, . - __\name
+	.endm
+
+	.macro DIV_BODY reg number
+	.if \number
+	DIV_BODY  \reg, "\number - 1"
+	div1	\reg
+	.endif
+	.endm
+	
+#ifdef L_udivsi3
+FUNC_START udivsi3
+	;; Perform an unsiged division of r4 / r5 and place the result in r4.
+	;; Does not handle overflow yet...
+	mov	r4, mdl
+	div0u	r5
+	DIV_BODY r5 32
+	mov	mdl, r4
+	ret
+FUNC_END udivsi3
+#endif /* L_udivsi3 */
+
+#ifdef L_divsi3
+FUNC_START divsi3
+	;; Perform a siged division of r4 / r5 and place the result in r4.
+	;; Does not handle overflow yet...
+	mov	r4, mdl
+	div0s	r5
+	DIV_BODY r5 32
+	div2    r5
+	div3
+	div4s
+	mov	mdl, r4
+	ret
+FUNC_END divsi3
+#endif /* L_divsi3 */
+
+#ifdef L_umodsi3
+FUNC_START umodsi3
+	;; Perform an unsiged division of r4 / r5 and places the remainder in r4.
+	;; Does not handle overflow yet...
+	mov	r4, mdl
+	div0u	r5
+	DIV_BODY r5 32
+	mov	mdh, r4
+	ret
+FUNC_END umodsi3
+#endif /* L_umodsi3 */
+
+#ifdef L_modsi3
+FUNC_START modsi3
+	;; Perform a siged division of r4 / r5 and place the remainder in r4.
+	;; Does not handle overflow yet...
+	mov	r4, mdl
+	div0s	r5
+	DIV_BODY r5 32
+	div2    r5
+	div3
+	div4s
+	mov	mdh, r4
+	ret
+FUNC_END modsi3
+#endif /* L_modsi3 */
+
+#ifdef L_negsi2
+FUNC_START negsi2
+	ldi:8	#0, r0
+	sub	r4, r0
+	mov	r0, r4
+	ret
+FUNC_END negsi2
+#endif /* L_negsi2 */
+
+#ifdef L_one_cmplsi2
+FUNC_START one_cmplsi2
+	ldi:8	#0xff, r0
+	extsb	r0
+	eor	r0, r4
+	ret
+FUNC_END one_cmplsi2
+#endif /* L_one_cmplsi2 */
+
+
diff --git a/libgcc/config/fr30/t-fr30 b/libgcc/config/fr30/t-fr30
new file mode 100644
index 00000000000..ee5ed9a127e
--- /dev/null
+++ b/libgcc/config/fr30/t-fr30
@@ -0,0 +1,2 @@
+LIB1ASMSRC    = fr30/lib1funcs.S
+LIB1ASMFUNCS  = _udivsi3 _divsi3 _umodsi3 _modsi3
diff --git a/libgcc/config/frv/lib1funcs.S b/libgcc/config/frv/lib1funcs.S
new file mode 100644
index 00000000000..d1ffcab6133
--- /dev/null
+++ b/libgcc/config/frv/lib1funcs.S
@@ -0,0 +1,269 @@
+/* Library functions.
+   Copyright (C) 2000, 2003, 2008, 2009 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+  
+   This file is part of GCC.
+  
+   GCC is free software ; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+  
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY ; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+  
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <frv-asm.h>
+
+
+#ifdef L_cmpll
+/* icc0 = __cmpll (long long a, long long b)  */
+
+	.globl	EXT(__cmpll)
+	.type	EXT(__cmpll),@function
+	.text
+	.p2align 4
+EXT(__cmpll):
+	cmp	gr8, gr10, icc0
+	ckeq	icc0, cc4
+	P(ccmp)	gr9, gr11, cc4, 1
+	ret
+.Lend:
+	.size	EXT(__cmpll),.Lend-EXT(__cmpll)
+#endif /* L_cmpll */
+
+#ifdef L_cmpf
+/* icc0 = __cmpf (float a, float b) */
+/* Note, because this function returns the result in ICC0, it means it can't
+   handle NaNs.  */
+
+	.globl	EXT(__cmpf)
+	.type	EXT(__cmpf),@function
+	.text
+	.p2align 4
+EXT(__cmpf):
+#ifdef __FRV_HARD_FLOAT__	/* floating point instructions available */
+	movgf	gr8, fr0
+	P(movgf) gr9, fr1
+	setlos	#1, gr8
+	fcmps	fr0, fr1, fcc0
+	P(fcklt) fcc0, cc0
+	fckeq	fcc0, cc1
+	csub	gr0, gr8, gr8, cc0, 1
+	cmov	gr0, gr8, cc1, 1
+	cmpi	gr8, 0, icc0
+	ret
+#else				/* no floating point instructions available */
+	movsg	lr, gr4
+	addi	sp, #-16, sp
+	sti	gr4, @(sp, 8)
+	st	fp, @(sp, gr0)
+	mov	sp, fp
+	call	EXT(__cmpsf2)
+	cmpi	gr8, #0, icc0
+	ldi	@(sp, 8), gr4
+	movgs	gr4, lr
+	ld	@(sp,gr0), fp
+	addi	sp, #16, sp
+	ret
+#endif
+.Lend:
+	.size	EXT(__cmpf),.Lend-EXT(__cmpf)
+#endif
+
+#ifdef L_cmpd
+/* icc0 = __cmpd (double a, double b) */
+/* Note, because this function returns the result in ICC0, it means it can't
+   handle NaNs.  */
+
+	.globl	EXT(__cmpd)
+	.type	EXT(__cmpd),@function
+	.text
+	.p2align 4
+EXT(__cmpd):
+	movsg	lr, gr4
+	addi	sp, #-16, sp
+	sti	gr4, @(sp, 8)
+	st	fp, @(sp, gr0)
+	mov	sp, fp
+	call	EXT(__cmpdf2)
+	cmpi	gr8, #0, icc0
+	ldi	@(sp, 8), gr4
+	movgs	gr4, lr
+	ld	@(sp,gr0), fp
+	addi	sp, #16, sp
+	ret
+.Lend:
+	.size	EXT(__cmpd),.Lend-EXT(__cmpd)
+#endif
+
+#ifdef L_addll
+/* gr8,gr9 = __addll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+   ABI program calls it.  */
+
+	.globl	EXT(__addll)
+	.type	EXT(__addll),@function
+	.text
+	.p2align
+EXT(__addll):
+	addcc	gr9, gr11, gr9, icc0
+	addx	gr8, gr10, gr8, icc0
+	ret
+.Lend:
+	.size	EXT(__addll),.Lend-EXT(__addll)
+#endif
+
+#ifdef L_subll
+/* gr8,gr9 = __subll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+   ABI program calls it.  */
+
+	.globl	EXT(__subll)
+	.type	EXT(__subll),@function
+	.text
+	.p2align 4
+EXT(__subll):
+	subcc	gr9, gr11, gr9, icc0
+	subx	gr8, gr10, gr8, icc0
+	ret
+.Lend:
+	.size	EXT(__subll),.Lend-EXT(__subll)
+#endif
+
+#ifdef L_andll
+/* gr8,gr9 = __andll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+   ABI program calls it.  */
+
+	.globl	EXT(__andll)
+	.type	EXT(__andll),@function
+	.text
+	.p2align 4
+EXT(__andll):
+	P(and)	gr9, gr11, gr9
+	P2(and)	gr8, gr10, gr8
+	ret
+.Lend:
+	.size	EXT(__andll),.Lend-EXT(__andll)
+#endif
+
+#ifdef L_orll
+/* gr8,gr9 = __orll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+   ABI program calls it.  */
+
+	.globl	EXT(__orll)
+	.type	EXT(__orll),@function
+	.text
+	.p2align 4
+EXT(__orll):
+	P(or)	gr9, gr11, gr9
+	P2(or)	gr8, gr10, gr8
+	ret
+.Lend:
+	.size	EXT(__orll),.Lend-EXT(__orll)
+#endif
+
+#ifdef L_xorll
+/* gr8,gr9 = __xorll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+   ABI program calls it.  */
+
+	.globl	EXT(__xorll)
+	.type	EXT(__xorll),@function
+	.text
+	.p2align 4
+EXT(__xorll):
+	P(xor)	gr9, gr11, gr9
+	P2(xor)	gr8, gr10, gr8
+	ret
+.Lend:
+	.size	EXT(__xorll),.Lend-EXT(__xorll)
+#endif
+
+#ifdef L_notll
+/* gr8,gr9 = __notll (long long a) */
+/* Note, gcc will never call this function, but it is present in case an
+   ABI program calls it.  */
+
+	.globl	EXT(__notll)
+	.type	EXT(__notll),@function
+	.text
+	.p2align 4
+EXT(__notll):
+	P(not)	gr9, gr9
+	P2(not)	gr8, gr8
+	ret
+.Lend:
+	.size	EXT(__notll),.Lend-EXT(__notll)
+#endif
+
+#ifdef L_cmov
+/* (void) __cmov (char *dest, const char *src, size_t len) */
+/*
+ * void __cmov (char *dest, const char *src, size_t len)
+ * {
+ *   size_t i;
+ * 
+ *   if (dest < src || dest > src+len)
+ *     {
+ *	 for (i = 0; i < len; i++)
+ *	 dest[i] = src[i];
+ *     }
+ *   else
+ *     {
+ *	 while (len-- > 0)
+ *	 dest[len] = src[len];
+ *     }
+ * }
+ */
+
+	.globl	EXT(__cmov)
+	.type	EXT(__cmov),@function
+	.text
+	.p2align 4
+EXT(__cmov):
+	P(cmp)	gr8, gr9, icc0
+	add	gr9, gr10, gr4
+	P(cmp)	gr8, gr4, icc1
+	bc	icc0, 0, .Lfwd
+	bls	icc1, 0, .Lback
+.Lfwd:
+	/* move bytes in a forward direction */
+	P(setlos) #0, gr5
+	cmp	gr0, gr10, icc0
+	P(subi)	gr9, #1, gr9
+	P2(subi) gr8, #1, gr8
+	bnc	icc0, 0, .Lret
+.Lfloop:
+	/* forward byte move loop */
+	addi	gr5, #1, gr5
+	P(ldsb)	@(gr9, gr5), gr4
+	cmp	gr5, gr10, icc0
+	P(stb)	gr4, @(gr8, gr5)
+	bc	icc0, 0, .Lfloop
+	ret
+.Lbloop:
+	/* backward byte move loop body */
+	ldsb	@(gr9,gr10),gr4
+	stb	gr4,@(gr8,gr10)
+.Lback:
+	P(cmpi)	gr10, #0, icc0
+	addi	gr10, #-1, gr10
+	bne	icc0, 0, .Lbloop
+.Lret:
+	ret
+.Lend:
+	.size	 EXT(__cmov),.Lend-EXT(__cmov)
+#endif
diff --git a/libgcc/config/frv/t-frv b/libgcc/config/frv/t-frv
index b364a5a25b9..9773722d8e7 100644
--- a/libgcc/config/frv/t-frv
+++ b/libgcc/config/frv/t-frv
@@ -1,3 +1,6 @@
+LIB1ASMSRC	= frv/lib1funcs.S
+LIB1ASMFUNCS	= _cmpll _cmpf _cmpd _addll _subll _andll _orll _xorll _notll _cmov
+
 # Compile two additional files that are linked with every program
 # linked using GCC on systems using COFF or ELF, for the sake of C++
 # constructors.
diff --git a/libgcc/config/h8300/lib1funcs.S b/libgcc/config/h8300/lib1funcs.S
new file mode 100644
index 00000000000..1b75b73269d
--- /dev/null
+++ b/libgcc/config/h8300/lib1funcs.S
@@ -0,0 +1,838 @@
+;; libgcc routines for the Renesas H8/300 CPU.
+;; Contributed by Steve Chamberlain <sac@cygnus.com>
+;; Optimizations by Toshiyasu Morita <toshiyasu.morita@renesas.com>
+
+/* Copyright (C) 1994, 2000, 2001, 2002, 2003, 2004, 2009
+   Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Assembler register definitions.  */
+
+#define A0 r0
+#define A0L r0l
+#define A0H r0h
+
+#define A1 r1
+#define A1L r1l
+#define A1H r1h
+
+#define A2 r2
+#define A2L r2l
+#define A2H r2h
+
+#define A3 r3
+#define A3L r3l
+#define A3H r3h
+
+#define S0 r4
+#define S0L r4l
+#define S0H r4h
+
+#define S1 r5
+#define S1L r5l
+#define S1H r5h
+
+#define S2 r6
+#define S2L r6l
+#define S2H r6h
+
+#ifdef __H8300__
+#define PUSHP	push
+#define POPP	pop
+
+#define A0P	r0
+#define A1P	r1
+#define A2P	r2
+#define A3P	r3
+#define S0P	r4
+#define S1P	r5
+#define S2P	r6
+#endif
+
+#if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__)
+#define PUSHP	push.l
+#define POPP	pop.l
+
+#define A0P	er0
+#define A1P	er1
+#define A2P	er2
+#define A3P	er3
+#define S0P	er4
+#define S1P	er5
+#define S2P	er6
+
+#define A0E	e0
+#define A1E	e1
+#define A2E	e2
+#define A3E	e3
+#endif
+
+#ifdef __H8300H__
+#ifdef __NORMAL_MODE__
+	.h8300hn
+#else
+	.h8300h
+#endif
+#endif
+
+#ifdef __H8300S__
+#ifdef __NORMAL_MODE__
+	.h8300sn
+#else
+	.h8300s
+#endif
+#endif
+#ifdef __H8300SX__
+#ifdef __NORMAL_MODE__
+	.h8300sxn
+#else
+	.h8300sx
+#endif
+#endif
+
+#ifdef L_cmpsi2
+#ifdef __H8300__
+	.section .text
+	.align 2
+	.global ___cmpsi2
+___cmpsi2:
+	cmp.w	A0,A2
+	bne	.L2
+	cmp.w	A1,A3
+	bne	.L4
+	mov.w	#1,A0
+	rts
+.L2:
+	bgt	.L5
+.L3:
+	mov.w	#2,A0
+	rts
+.L4:
+	bls	.L3
+.L5:
+	sub.w	A0,A0
+	rts
+	.end
+#endif
+#endif /* L_cmpsi2 */
+
+#ifdef L_ucmpsi2
+#ifdef __H8300__
+	.section .text
+	.align 2
+	.global ___ucmpsi2
+___ucmpsi2:
+	cmp.w	A0,A2
+	bne	.L2
+	cmp.w	A1,A3
+	bne	.L4
+	mov.w	#1,A0
+	rts
+.L2:
+	bhi	.L5
+.L3:
+	mov.w	#2,A0
+	rts
+.L4:
+	bls	.L3
+.L5:
+	sub.w	A0,A0
+	rts
+	.end
+#endif
+#endif /* L_ucmpsi2 */
+
+#ifdef L_divhi3
+
+;; HImode divides for the H8/300.
+;; We bunch all of this into one object file since there are several
+;; "supporting routines".
+
+; general purpose normalize routine
+;
+; divisor in A0
+; dividend in A1
+; turns both into +ve numbers, and leaves what the answer sign
+; should be in A2L
+
+#ifdef __H8300__
+	.section .text
+	.align 2
+divnorm:
+	or	A0H,A0H		; is divisor > 0
+	stc	ccr,A2L
+	bge	_lab1
+	not	A0H		; no - then make it +ve
+	not	A0L
+	adds	#1,A0
+_lab1:	or	A1H,A1H	; look at dividend
+	bge	_lab2
+	not	A1H		; it is -ve, make it positive
+	not	A1L
+	adds	#1,A1
+	xor	#0x8,A2L; and toggle sign of result
+_lab2:	rts
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+	or	A0H,A0H		; is divisor > 0
+	stc	ccr,A2L
+	bge	_lab7
+	not	A0H		; no - then make it +ve
+	not	A0L
+	adds	#1,A0
+_lab7:	or	A1H,A1H	; look at dividend
+	bge	_lab8
+	not	A1H		; it is -ve, make it positive
+	not	A1L
+	adds	#1,A1
+_lab8:	rts
+
+; A0=A0/A1 signed
+
+	.global	___divhi3
+___divhi3:
+	bsr	divnorm
+	bsr	___udivhi3
+negans:	btst	#3,A2L	; should answer be negative ?
+	beq	_lab4
+	not	A0H	; yes, so make it so
+	not	A0L
+	adds	#1,A0
+_lab4:	rts
+
+; A0=A0%A1 signed
+
+	.global	___modhi3
+___modhi3:
+	bsr	modnorm
+	bsr	___udivhi3
+	mov	A3,A0
+	bra	negans
+
+; A0=A0%A1 unsigned
+
+	.global	___umodhi3
+___umodhi3:
+	bsr	___udivhi3
+	mov	A3,A0
+	rts
+
+; A0=A0/A1 unsigned
+; A3=A0%A1 unsigned
+; A2H trashed
+; D high 8 bits of denom
+; d low 8 bits of denom
+; N high 8 bits of num
+; n low 8 bits of num
+; M high 8 bits of mod
+; m low 8 bits of mod
+; Q high 8 bits of quot
+; q low 8 bits of quot
+; P preserve
+
+; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
+; see how to partition up the expression.
+
+	.global	___udivhi3
+___udivhi3:
+				; A0 A1 A2 A3
+				; Nn Dd       P
+	sub.w	A3,A3		; Nn Dd xP 00
+	or	A1H,A1H
+	bne	divlongway
+	or	A0H,A0H
+	beq	_lab6
+
+; we know that D == 0 and N is != 0
+	mov.b	A0H,A3L		; Nn Dd xP 0N
+	divxu	A1L,A3		;          MQ
+	mov.b	A3L,A0H	 	; Q
+; dealt with N, do n
+_lab6:	mov.b	A0L,A3L		;           n
+	divxu	A1L,A3		;          mq
+	mov.b	A3L,A0L		; Qq
+	mov.b	A3H,A3L         ;           m
+	mov.b	#0x0,A3H	; Qq       0m
+	rts
+
+; D != 0 - which means the denominator is
+;          loop around to get the result.
+
+divlongway:
+	mov.b	A0H,A3L		; Nn Dd xP 0N
+	mov.b	#0x0,A0H	; high byte of answer has to be zero
+	mov.b	#0x8,A2H	;       8
+div8:	add.b	A0L,A0L		; n*=2
+	rotxl	A3L		; Make remainder bigger
+	rotxl	A3H
+	sub.w	A1,A3		; Q-=N
+	bhs	setbit		; set a bit ?
+	add.w	A1,A3		;  no : too far , Q+=N
+
+	dec	A2H
+	bne	div8		; next bit
+	rts
+
+setbit:	inc	A0L		; do insert bit
+	dec	A2H
+	bne	div8		; next bit
+	rts
+
+#endif /* __H8300__ */
+#endif /* L_divhi3 */
+
+#ifdef L_divsi3
+
+;; 4 byte integer divides for the H8/300.
+;;
+;; We have one routine which does all the work and lots of
+;; little ones which prepare the args and massage the sign.
+;; We bunch all of this into one object file since there are several
+;; "supporting routines".
+
+	.section .text
+	.align 2
+
+; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
+; This function is here to keep branch displacements small.
+
+#ifdef __H8300__
+
+divnorm:
+	mov.b	A0H,A0H		; is the numerator -ve
+	stc	ccr,S2L		; keep the sign in bit 3 of S2L
+	bge	postive
+
+	; negate arg
+	not	A0H
+	not	A1H
+	not	A0L
+	not	A1L
+
+	add	#1,A1L
+	addx	#0,A1H
+	addx	#0,A0L
+	addx	#0,A0H
+postive:
+	mov.b	A2H,A2H		; is the denominator -ve
+	bge	postive2
+	not	A2L
+	not	A2H
+	not	A3L
+	not	A3H
+	add.b	#1,A3L
+	addx	#0,A3H
+	addx	#0,A2L
+	addx	#0,A2H
+	xor.b	#0x08,S2L	; toggle the result sign
+postive2:
+	rts
+
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+	mov.b	A0H,A0H		; is the numerator -ve
+	stc	ccr,S2L		; keep the sign in bit 3 of S2L
+	bge	mpostive
+
+	; negate arg
+	not	A0H
+	not	A1H
+	not	A0L
+	not	A1L
+
+	add	#1,A1L
+	addx	#0,A1H
+	addx	#0,A0L
+	addx	#0,A0H
+mpostive:
+	mov.b	A2H,A2H		; is the denominator -ve
+	bge	mpostive2
+	not	A2L
+	not	A2H
+	not	A3L
+	not	A3H
+	add.b	#1,A3L
+	addx	#0,A3H
+	addx	#0,A2L
+	addx	#0,A2H
+mpostive2:
+	rts
+
+#else /* __H8300H__ */
+
+divnorm:
+	mov.l	A0P,A0P		; is the numerator -ve
+	stc	ccr,S2L		; keep the sign in bit 3 of S2L
+	bge	postive
+
+	neg.l	A0P		; negate arg
+
+postive:
+	mov.l	A1P,A1P		; is the denominator -ve
+	bge	postive2
+
+	neg.l	A1P		; negate arg
+	xor.b	#0x08,S2L	; toggle the result sign
+
+postive2:
+	rts
+
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+	mov.l	A0P,A0P		; is the numerator -ve
+	stc	ccr,S2L		; keep the sign in bit 3 of S2L
+	bge	mpostive
+
+	neg.l	A0P		; negate arg
+
+mpostive:
+	mov.l	A1P,A1P		; is the denominator -ve
+	bge	mpostive2
+
+	neg.l	A1P		; negate arg
+
+mpostive2:
+	rts
+
+#endif
+
+; numerator in A0/A1
+; denominator in A2/A3
+	.global	___modsi3
+___modsi3:
+#ifdef __H8300__
+	PUSHP	S2P
+	PUSHP	S0P
+	PUSHP	S1P
+	bsr	modnorm
+	bsr	divmodsi4
+	mov	S0,A0
+	mov	S1,A1
+	bra	exitdiv
+#else
+	PUSHP	S2P
+	bsr	modnorm
+	bsr	___udivsi3
+	mov.l	er3,er0
+	bra	exitdiv
+#endif
+
+	;; H8/300H and H8S version of ___udivsi3 is defined later in
+	;; the file.
+#ifdef __H8300__
+	.global	___udivsi3
+___udivsi3:
+	PUSHP	S2P
+	PUSHP	S0P
+	PUSHP	S1P
+	bsr	divmodsi4
+	bra	reti
+#endif
+
+	.global	___umodsi3
+___umodsi3:
+#ifdef __H8300__
+	PUSHP	S2P
+	PUSHP	S0P
+	PUSHP	S1P
+	bsr	divmodsi4
+	mov	S0,A0
+	mov	S1,A1
+	bra	reti
+#else
+	bsr	___udivsi3
+	mov.l	er3,er0
+	rts
+#endif
+
+	.global	___divsi3
+___divsi3:
+#ifdef __H8300__
+	PUSHP	S2P
+	PUSHP	S0P
+	PUSHP	S1P
+	jsr	divnorm
+	jsr	divmodsi4
+#else
+	PUSHP	S2P
+	jsr	divnorm
+	bsr	___udivsi3
+#endif
+
+	; examine what the sign should be
+exitdiv:
+	btst	#3,S2L
+	beq	reti
+
+	; should be -ve
+#ifdef __H8300__
+	not	A0H
+	not	A1H
+	not	A0L
+	not	A1L
+
+	add	#1,A1L
+	addx	#0,A1H
+	addx	#0,A0L
+	addx	#0,A0H
+#else /* __H8300H__ */
+	neg.l	A0P
+#endif
+
+reti:
+#ifdef __H8300__
+	POPP	S1P
+	POPP	S0P
+#endif
+	POPP	S2P
+	rts
+
+	; takes A0/A1 numerator (A0P for H8/300H)
+	; A2/A3 denominator (A1P for H8/300H)
+	; returns A0/A1 quotient (A0P for H8/300H)
+	; S0/S1 remainder (S0P for H8/300H)
+	; trashes S2H
+
+#ifdef __H8300__
+
+divmodsi4:
+        sub.w	S0,S0		; zero play area
+        mov.w	S0,S1
+        mov.b	A2H,S2H
+        or	A2L,S2H
+        or	A3H,S2H
+        bne	DenHighNonZero
+        mov.b	A0H,A0H
+        bne	NumByte0Zero
+        mov.b	A0L,A0L
+        bne	NumByte1Zero
+        mov.b	A1H,A1H
+        bne	NumByte2Zero
+        bra	NumByte3Zero
+NumByte0Zero:
+	mov.b	A0H,S1L
+        divxu	A3L,S1
+        mov.b	S1L,A0H
+NumByte1Zero:
+	mov.b	A0L,S1L
+        divxu	A3L,S1
+        mov.b	S1L,A0L
+NumByte2Zero:
+	mov.b	A1H,S1L
+        divxu	A3L,S1
+        mov.b	S1L,A1H
+NumByte3Zero:
+	mov.b	A1L,S1L
+        divxu	A3L,S1
+        mov.b	S1L,A1L
+
+        mov.b	S1H,S1L
+        mov.b	#0x0,S1H
+        rts
+
+; have to do the divide by shift and test
+DenHighNonZero:
+	mov.b	A0H,S1L
+        mov.b	A0L,A0H
+        mov.b	A1H,A0L
+        mov.b	A1L,A1H
+
+        mov.b	#0,A1L
+        mov.b	#24,S2H	; only do 24 iterations
+
+nextbit:
+	add.w	A1,A1	; double the answer guess
+        rotxl	A0L
+        rotxl	A0H
+
+        rotxl	S1L	; double remainder
+        rotxl	S1H
+        rotxl	S0L
+        rotxl	S0H
+        sub.w	A3,S1	; does it all fit
+        subx	A2L,S0L
+        subx	A2H,S0H
+        bhs	setone
+
+        add.w	A3,S1	; no, restore mistake
+        addx	A2L,S0L
+        addx	A2H,S0H
+
+        dec	S2H
+        bne	nextbit
+        rts
+
+setone:
+	inc	A1L
+        dec	S2H
+        bne	nextbit
+        rts
+
+#else /* __H8300H__ */
+
+	;; This function also computes the remainder and stores it in er3.
+	.global	___udivsi3
+___udivsi3:
+	mov.w	A1E,A1E		; denominator top word 0?
+	bne	DenHighNonZero
+
+	; do it the easy way, see page 107 in manual
+	mov.w	A0E,A2
+	extu.l	A2P
+	divxu.w	A1,A2P
+	mov.w	A2E,A0E
+	divxu.w	A1,A0P
+	mov.w	A0E,A3
+	mov.w	A2,A0E
+	extu.l	A3P
+	rts
+
+ 	; er0 = er0 / er1
+ 	; er3 = er0 % er1
+ 	; trashes er1 er2
+ 	; expects er1 >= 2^16
+DenHighNonZero:
+	mov.l	er0,er3
+	mov.l	er1,er2
+#ifdef __H8300H__
+divmod_L21:
+	shlr.l	er0
+	shlr.l	er2		; make divisor < 2^16
+	mov.w	e2,e2
+	bne	divmod_L21
+#else
+	shlr.l	#2,er2		; make divisor < 2^16
+	mov.w	e2,e2
+	beq	divmod_L22A
+divmod_L21:
+	shlr.l	#2,er0
+divmod_L22:
+	shlr.l	#2,er2		; make divisor < 2^16
+	mov.w	e2,e2
+	bne	divmod_L21
+divmod_L22A:
+	rotxl.w	r2
+	bcs	divmod_L23
+	shlr.l	er0
+	bra	divmod_L24
+divmod_L23:
+	rotxr.w	r2
+	shlr.l	#2,er0
+divmod_L24:
+#endif
+	;; At this point,
+	;;  er0 contains shifted dividend
+	;;  er1 contains divisor
+	;;  er2 contains shifted divisor
+	;;  er3 contains dividend, later remainder
+	divxu.w	r2,er0		; r0 now contains the approximate quotient (AQ)
+	extu.l	er0
+	beq	divmod_L25
+	subs	#1,er0		; er0 = AQ - 1
+	mov.w	e1,r2
+	mulxu.w	r0,er2		; er2 = upper (AQ - 1) * divisor
+	sub.w	r2,e3		; dividend - 65536 * er2
+	mov.w	r1,r2
+	mulxu.w	r0,er2		; compute er3 = remainder (tentative)
+	sub.l	er2,er3		; er3 = dividend - (AQ - 1) * divisor
+divmod_L25:
+ 	cmp.l	er1,er3		; is divisor < remainder?
+	blo	divmod_L26
+ 	adds	#1,er0
+	sub.l	er1,er3		; correct the remainder
+divmod_L26:
+	rts
+
+#endif
+#endif /* L_divsi3 */
+
+#ifdef L_mulhi3
+
+;; HImode multiply.
+; The H8/300 only has an 8*8->16 multiply.
+; The answer is the same as:
+;
+; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
+; (we can ignore A1.h * A0.h cause that will all off the top)
+; A0 in
+; A1 in
+; A0 answer
+
+#ifdef __H8300__
+	.section .text
+	.align 2
+	.global	___mulhi3
+___mulhi3:
+	mov.b	A1L,A2L		; A2l gets srcb.l
+	mulxu	A0L,A2		; A2 gets first sub product
+
+	mov.b	A0H,A3L		; prepare for
+	mulxu	A1L,A3		; second sub product
+
+	add.b	A3L,A2H		; sum first two terms
+
+	mov.b	A1H,A3L		; third sub product
+	mulxu	A0L,A3
+
+	add.b	A3L,A2H		; almost there
+	mov.w	A2,A0		; that is
+	rts
+
+#endif
+#endif /* L_mulhi3 */
+
+#ifdef L_mulsi3
+
+;; SImode multiply.
+;;
+;; I think that shift and add may be sufficient for this.  Using the
+;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead.  This way
+;; the inner loop uses maybe 20 cycles + overhead, but terminates
+;; quickly on small args.
+;;
+;; A0/A1 src_a
+;; A2/A3 src_b
+;;
+;;  while (a)
+;;    {
+;;      if (a & 1)
+;;        r += b;
+;;      a >>= 1;
+;;      b <<= 1;
+;;    }
+
+	.section .text
+	.align 2
+
+#ifdef __H8300__
+
+	.global	___mulsi3
+___mulsi3:
+	PUSHP	S0P
+	PUSHP	S1P
+
+	sub.w	S0,S0
+	sub.w	S1,S1
+
+	; while (a)
+_top:	mov.w	A0,A0
+	bne	_more
+	mov.w	A1,A1
+	beq	_done
+_more:	; if (a & 1)
+	bld	#0,A1L
+	bcc	_nobit
+	; r += b
+	add.w	A3,S1
+	addx	A2L,S0L
+	addx	A2H,S0H
+_nobit:
+	; a >>= 1
+	shlr	A0H
+	rotxr	A0L
+	rotxr	A1H
+	rotxr	A1L
+
+	; b <<= 1
+	add.w	A3,A3
+	addx	A2L,A2L
+	addx	A2H,A2H
+	bra 	_top
+
+_done:
+	mov.w	S0,A0
+	mov.w	S1,A1
+	POPP	S1P
+	POPP	S0P
+	rts
+
+#else /* __H8300H__ */
+
+;
+; mulsi3 for H8/300H - based on Renesas SH implementation
+;
+; by Toshiyasu Morita
+;
+; Old code:
+;
+; 16b * 16b = 372 states (worst case)
+; 32b * 32b = 724 states (worst case)
+;
+; New code:
+;
+; 16b * 16b =  48 states
+; 16b * 32b =  72 states
+; 32b * 32b =  92 states
+;
+
+	.global	___mulsi3
+___mulsi3:
+	mov.w	r1,r2   ; ( 2 states) b * d
+	mulxu	r0,er2  ; (22 states)
+
+	mov.w	e0,r3   ; ( 2 states) a * d
+	beq	L_skip1 ; ( 4 states)
+	mulxu	r1,er3  ; (22 states)
+	add.w	r3,e2   ; ( 2 states)
+
+L_skip1:
+	mov.w	e1,r3   ; ( 2 states) c * b
+	beq	L_skip2 ; ( 4 states)
+	mulxu	r0,er3  ; (22 states)
+	add.w	r3,e2   ; ( 2 states)
+
+L_skip2:
+	mov.l	er2,er0	; ( 2 states)
+	rts		; (10 states)
+
+#endif
+#endif /* L_mulsi3 */
+#ifdef L_fixunssfsi_asm
+/* For the h8300 we use asm to save some bytes, to
+   allow more programs to fit into the tiny address
+   space.  For the H8/300H and H8S, the C version is good enough.  */
+#ifdef __H8300__
+/* We still treat NANs different than libgcc2.c, but then, the
+   behavior is undefined anyways.  */
+	.global	___fixunssfsi
+___fixunssfsi:
+	cmp.b #0x4f,r0h
+	bge Large_num
+	jmp     @___fixsfsi
+Large_num:
+	bhi L_huge_num
+	xor.b #0x80,A0L
+	bmi L_shift8
+L_huge_num:
+	mov.w #65535,A0
+	mov.w A0,A1
+	rts
+L_shift8:
+	mov.b A0L,A0H
+	mov.b A1H,A0L
+	mov.b A1L,A1H
+	mov.b #0,A1L
+	rts
+#endif
+#endif /* L_fixunssfsi_asm */
diff --git a/libgcc/config/h8300/t-h8300 b/libgcc/config/h8300/t-h8300
new file mode 100644
index 00000000000..4602ff8b9ef
--- /dev/null
+++ b/libgcc/config/h8300/t-h8300
@@ -0,0 +1,3 @@
+LIB1ASMSRC = h8300/lib1funcs.S
+LIB1ASMFUNCS = _cmpsi2 _ucmpsi2 _divhi3 _divsi3 _mulhi3 _mulsi3 \
+  _fixunssfsi_asm
diff --git a/libgcc/config/i386/cygwin.S b/libgcc/config/i386/cygwin.S
new file mode 100644
index 00000000000..8f9c486850e
--- /dev/null
+++ b/libgcc/config/i386/cygwin.S
@@ -0,0 +1,188 @@
+/* stuff needed for libgcc on win32.
+ *
+ *   Copyright (C) 1996, 1998, 2001, 2003, 2008, 2009, 2010
+ *   Free Software Foundation, Inc.
+ *   Written By Steve Chamberlain
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ * 
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#include "auto-host.h"
+
+#ifdef HAVE_GAS_CFI_SECTIONS_DIRECTIVE
+	.cfi_sections	.debug_frame
+# define cfi_startproc()		.cfi_startproc
+# define cfi_endproc()			.cfi_endproc
+# define cfi_adjust_cfa_offset(X) 	.cfi_adjust_cfa_offset X
+# define cfi_def_cfa_register(X)	.cfi_def_cfa_register X
+# define cfi_register(D,S)		.cfi_register D, S
+# ifdef _WIN64
+#  define cfi_push(X)		.cfi_adjust_cfa_offset 8; .cfi_rel_offset X, 0
+#  define cfi_pop(X)		.cfi_adjust_cfa_offset -8; .cfi_restore X
+# else
+#  define cfi_push(X)		.cfi_adjust_cfa_offset 4; .cfi_rel_offset X, 0
+#  define cfi_pop(X)		.cfi_adjust_cfa_offset -4; .cfi_restore X
+# endif
+#else
+# define cfi_startproc()
+# define cfi_endproc()
+# define cfi_adjust_cfa_offset(X)
+# define cfi_def_cfa_register(X)
+# define cfi_register(D,S)
+# define cfi_push(X)
+# define cfi_pop(X)
+#endif /* HAVE_GAS_CFI_SECTIONS_DIRECTIVE */
+
+#ifdef L_chkstk
+/* Function prologue calls __chkstk to probe the stack when allocating more
+   than CHECK_STACK_LIMIT bytes in one go.  Touching the stack at 4K
+   increments is necessary to ensure that the guard pages used
+   by the OS virtual memory manger are allocated in correct sequence.  */
+
+	.global ___chkstk
+	.global	__alloca
+#ifdef _WIN64
+/* __alloca is a normal function call, which uses %rcx as the argument.  */
+	cfi_startproc()
+__alloca:
+	movq	%rcx, %rax
+	/* FALLTHRU */
+
+/* ___chkstk is a *special* function call, which uses %rax as the argument.
+   We avoid clobbering the 4 integer argument registers, %rcx, %rdx, 
+   %r8 and %r9, which leaves us with %rax, %r10, and %r11 to use.  */
+	.align	4
+___chkstk:
+	popq	%r11			/* pop return address */
+	cfi_adjust_cfa_offset(-8)	/* indicate return address in r11 */
+	cfi_register(%rip, %r11)
+	movq	%rsp, %r10
+	cmpq	$0x1000, %rax		/* > 4k ?*/
+	jb	2f
+
+1:	subq	$0x1000, %r10  		/* yes, move pointer down 4k*/
+	orl	$0x0, (%r10)   		/* probe there */
+	subq	$0x1000, %rax  	 	/* decrement count */
+	cmpq	$0x1000, %rax
+	ja	1b			/* and do it again */
+
+2:	subq	%rax, %r10
+	movq	%rsp, %rax		/* hold CFA until return */
+	cfi_def_cfa_register(%rax)
+	orl	$0x0, (%r10)		/* less than 4k, just peek here */
+	movq	%r10, %rsp		/* decrement stack */
+
+	/* Push the return value back.  Doing this instead of just
+	   jumping to %r11 preserves the cached call-return stack
+	   used by most modern processors.  */
+	pushq	%r11
+	ret
+	cfi_endproc()
+#else
+	cfi_startproc()
+___chkstk:
+__alloca:
+	pushl	%ecx			/* save temp */
+	cfi_push(%eax)
+	leal	8(%esp), %ecx		/* point past return addr */
+	cmpl	$0x1000, %eax		/* > 4k ?*/
+	jb	2f
+
+1:	subl	$0x1000, %ecx  		/* yes, move pointer down 4k*/
+	orl	$0x0, (%ecx)   		/* probe there */
+	subl	$0x1000, %eax  	 	/* decrement count */
+	cmpl	$0x1000, %eax
+	ja	1b			/* and do it again */
+
+2:	subl	%eax, %ecx	   
+	orl	$0x0, (%ecx)		/* less than 4k, just peek here */
+	movl	%esp, %eax		/* save current stack pointer */
+	cfi_def_cfa_register(%eax)
+	movl	%ecx, %esp		/* decrement stack */
+	movl	(%eax), %ecx		/* recover saved temp */
+
+	/* Copy the return register.  Doing this instead of just jumping to
+	   the address preserves the cached call-return stack used by most
+	   modern processors.  */
+	pushl	4(%eax)
+	ret
+	cfi_endproc()
+#endif /* _WIN64 */
+#endif /* L_chkstk */
+
+#ifdef L_chkstk_ms
+/* ___chkstk_ms is a *special* function call, which uses %rax as the argument.
+   We avoid clobbering any registers.  Unlike ___chkstk, it just probes the
+   stack and does no stack allocation.  */
+	.global ___chkstk_ms
+#ifdef _WIN64
+	cfi_startproc()
+___chkstk_ms:
+	pushq	%rcx			/* save temps */
+	cfi_push(%rcx)
+	pushq	%rax
+	cfi_push(%rax)
+	cmpq	$0x1000, %rax		/* > 4k ?*/
+	leaq	24(%rsp), %rcx		/* point past return addr */
+	jb	2f
+
+1:	subq	$0x1000, %rcx  		/* yes, move pointer down 4k */
+	orq	$0x0, (%rcx)   		/* probe there */
+	subq	$0x1000, %rax  	 	/* decrement count */
+	cmpq	$0x1000, %rax
+	ja	1b			/* and do it again */
+
+2:	subq	%rax, %rcx
+	orq	$0x0, (%rcx)		/* less than 4k, just peek here */
+
+	popq	%rax
+	cfi_pop(%rax)
+	popq	%rcx
+	cfi_pop(%rcx)
+	ret
+	cfi_endproc()
+#else
+	cfi_startproc()
+___chkstk_ms:
+	pushl	%ecx			/* save temp */
+	cfi_push(%ecx)
+	pushl	%eax
+	cfi_push(%eax)
+	cmpl	$0x1000, %eax		/* > 4k ?*/
+	leal	12(%esp), %ecx		/* point past return addr */
+	jb	2f
+
+1:	subl	$0x1000, %ecx  		/* yes, move pointer down 4k*/
+	orl	$0x0, (%ecx)   		/* probe there */
+	subl	$0x1000, %eax  	 	/* decrement count */
+	cmpl	$0x1000, %eax
+	ja	1b			/* and do it again */
+
+2:	subl	%eax, %ecx
+	orl	$0x0, (%ecx)		/* less than 4k, just peek here */
+
+	popl	%eax
+	cfi_pop(%eax)
+	popl	%ecx
+	cfi_pop(%ecx)
+	ret
+	cfi_endproc()
+#endif /* _WIN64 */
+#endif /* L_chkstk_ms */
diff --git a/libgcc/config/i386/t-chkstk b/libgcc/config/i386/t-chkstk
new file mode 100644
index 00000000000..822981faab8
--- /dev/null
+++ b/libgcc/config/i386/t-chkstk
@@ -0,0 +1,2 @@
+LIB1ASMSRC = i386/cygwin.S
+LIB1ASMFUNCS = _chkstk _chkstk_ms
diff --git a/libgcc/config/ia64/__divxf3.S b/libgcc/config/ia64/__divxf3.S
new file mode 100644
index 00000000000..9cba8f59423
--- /dev/null
+++ b/libgcc/config/ia64/__divxf3.S
@@ -0,0 +1,11 @@
+#ifdef SHARED
+#define __divtf3 __divtf3_compat
+#endif
+
+#define L__divxf3
+#include "config/ia64/lib1funcs.S"
+
+#ifdef SHARED
+#undef __divtf3
+.symver __divtf3_compat, __divtf3@GCC_3.0
+#endif
diff --git a/libgcc/config/ia64/__divxf3.asm b/libgcc/config/ia64/__divxf3.asm
deleted file mode 100644
index f741bdaf9bc..00000000000
--- a/libgcc/config/ia64/__divxf3.asm
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifdef SHARED
-#define __divtf3 __divtf3_compat
-#endif
-
-#define L__divxf3
-#include "config/ia64/lib1funcs.asm"
-
-#ifdef SHARED
-#undef __divtf3
-.symver __divtf3_compat, __divtf3@GCC_3.0
-#endif
diff --git a/libgcc/config/ia64/_fixtfdi.S b/libgcc/config/ia64/_fixtfdi.S
new file mode 100644
index 00000000000..863b70f7edc
--- /dev/null
+++ b/libgcc/config/ia64/_fixtfdi.S
@@ -0,0 +1,11 @@
+#ifdef SHARED
+#define __fixtfti __fixtfti_compat
+#endif
+
+#define L_fixtfdi
+#include "config/ia64/lib1funcs.S"
+
+#ifdef SHARED
+#undef __fixtfti
+.symver __fixtfti_compat, __fixtfti@GCC_3.0
+#endif
diff --git a/libgcc/config/ia64/_fixtfdi.asm b/libgcc/config/ia64/_fixtfdi.asm
deleted file mode 100644
index 4d13c808c51..00000000000
--- a/libgcc/config/ia64/_fixtfdi.asm
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifdef SHARED
-#define __fixtfti __fixtfti_compat
-#endif
-
-#define L_fixtfdi
-#include "config/ia64/lib1funcs.asm"
-
-#ifdef SHARED
-#undef __fixtfti
-.symver __fixtfti_compat, __fixtfti@GCC_3.0
-#endif
diff --git a/libgcc/config/ia64/_fixunstfdi.S b/libgcc/config/ia64/_fixunstfdi.S
new file mode 100644
index 00000000000..aac6a284eaa
--- /dev/null
+++ b/libgcc/config/ia64/_fixunstfdi.S
@@ -0,0 +1,11 @@
+#ifdef SHARED
+#define __fixunstfti __fixunstfti_compat
+#endif
+
+#define L_fixunstfdi
+#include "config/ia64/lib1funcs.S"
+
+#ifdef SHARED
+#undef __fixunstfti
+.symver __fixunstfti_compat, __fixunstfti@GCC_3.0
+#endif
diff --git a/libgcc/config/ia64/_fixunstfdi.asm b/libgcc/config/ia64/_fixunstfdi.asm
deleted file mode 100644
index b722d9e90dc..00000000000
--- a/libgcc/config/ia64/_fixunstfdi.asm
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifdef SHARED
-#define __fixunstfti __fixunstfti_compat
-#endif
-
-#define L_fixunstfdi
-#include "config/ia64/lib1funcs.asm"
-
-#ifdef SHARED
-#undef __fixunstfti
-.symver __fixunstfti_compat, __fixunstfti@GCC_3.0
-#endif
diff --git a/libgcc/config/ia64/_floatditf.S b/libgcc/config/ia64/_floatditf.S
new file mode 100644
index 00000000000..e37404d26d5
--- /dev/null
+++ b/libgcc/config/ia64/_floatditf.S
@@ -0,0 +1,11 @@
+#ifdef SHARED
+#define __floattitf __floattitf_compat
+#endif
+
+#define L_floatditf
+#include "config/ia64/lib1funcs.S"
+
+#ifdef SHARED
+#undef __floattitf
+.symver __floattitf_compat, __floattitf@GCC_3.0
+#endif
diff --git a/libgcc/config/ia64/_floatditf.asm b/libgcc/config/ia64/_floatditf.asm
deleted file mode 100644
index 21d77028176..00000000000
--- a/libgcc/config/ia64/_floatditf.asm
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifdef SHARED
-#define __floattitf __floattitf_compat
-#endif
-
-#define L_floatditf
-#include "config/ia64/lib1funcs.asm"
-
-#ifdef SHARED
-#undef __floattitf
-.symver __floattitf_compat, __floattitf@GCC_3.0
-#endif
diff --git a/libgcc/config/ia64/lib1funcs.S b/libgcc/config/ia64/lib1funcs.S
new file mode 100644
index 00000000000..b7eaa6eca3c
--- /dev/null
+++ b/libgcc/config/ia64/lib1funcs.S
@@ -0,0 +1,795 @@
+/* Copyright (C) 2000, 2001, 2003, 2005, 2009 Free Software Foundation, Inc.
+   Contributed by James E. Wilson <wilson@cygnus.com>.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef L__divxf3
+// Compute a 80-bit IEEE double-extended quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// farg0 holds the dividend.  farg1 holds the divisor.
+//
+// __divtf3 is an alternate symbol name for backward compatibility.
+
+	.text
+	.align 16
+	.global __divxf3
+	.proc __divxf3
+__divxf3:
+#ifdef SHARED
+	.global __divtf3
+__divtf3:
+#endif
+	cmp.eq p7, p0 = r0, r0
+	frcpa.s0 f10, p6 = farg0, farg1
+	;;
+(p6)	cmp.ne p7, p0 = r0, r0
+	.pred.rel.mutex p6, p7
+(p6)	fnma.s1 f11 = farg1, f10, f1
+(p6)	fma.s1 f12 = farg0, f10, f0
+	;;
+(p6)	fma.s1 f13 = f11, f11, f0
+(p6)	fma.s1 f14 = f11, f11, f11
+	;;
+(p6)	fma.s1 f11 = f13, f13, f11
+(p6)	fma.s1 f13 = f14, f10, f10
+	;;
+(p6)	fma.s1 f10 = f13, f11, f10
+(p6)	fnma.s1 f11 = farg1, f12, farg0
+	;;
+(p6)	fma.s1 f11 = f11, f10, f12
+(p6)	fnma.s1 f12 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f10 = f12, f10, f10
+(p6)	fnma.s1 f12 = farg1, f11, farg0
+	;;
+(p6)	fma.s0 fret0 = f12, f10, f11
+(p7)	mov fret0 = f10
+	br.ret.sptk rp
+	.endp __divxf3
+#endif
+
+#ifdef L__divdf3
+// Compute a 64-bit IEEE double quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// farg0 holds the dividend.  farg1 holds the divisor.
+
+	.text
+	.align 16
+	.global __divdf3
+	.proc __divdf3
+__divdf3:
+	cmp.eq p7, p0 = r0, r0
+	frcpa.s0 f10, p6 = farg0, farg1
+	;;
+(p6)	cmp.ne p7, p0 = r0, r0
+	.pred.rel.mutex p6, p7
+(p6)	fmpy.s1 f11 = farg0, f10
+(p6)	fnma.s1 f12 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f11 = f12, f11, f11
+(p6)	fmpy.s1 f13 = f12, f12
+	;;
+(p6)	fma.s1 f10 = f12, f10, f10
+(p6)	fma.s1 f11 = f13, f11, f11
+	;;
+(p6)	fmpy.s1 f12 = f13, f13
+(p6)	fma.s1 f10 = f13, f10, f10
+	;;
+(p6)	fma.d.s1 f11 = f12, f11, f11
+(p6)	fma.s1 f10 = f12, f10, f10
+	;;
+(p6)	fnma.d.s1 f8 = farg1, f11, farg0
+	;;
+(p6)	fma.d fret0 = f8, f10, f11
+(p7)	mov fret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __divdf3
+#endif
+
+#ifdef L__divsf3
+// Compute a 32-bit IEEE float quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// farg0 holds the dividend.  farg1 holds the divisor.
+
+	.text
+	.align 16
+	.global __divsf3
+	.proc __divsf3
+__divsf3:
+	cmp.eq p7, p0 = r0, r0
+	frcpa.s0 f10, p6 = farg0, farg1
+	;;
+(p6)	cmp.ne p7, p0 = r0, r0
+	.pred.rel.mutex p6, p7
+(p6)	fmpy.s1 f8 = farg0, f10
+(p6)	fnma.s1 f9 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f8 = f9, f8, f8
+(p6)	fmpy.s1 f9 = f9, f9
+	;;
+(p6)	fma.s1 f8 = f9, f8, f8
+(p6)	fmpy.s1 f9 = f9, f9
+	;;
+(p6)	fma.d.s1 f10 = f9, f8, f8
+	;;
+(p6)	fnorm.s.s0 fret0 = f10
+(p7)	mov fret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __divsf3
+#endif
+
+#ifdef L__divdi3
+// Compute a 64-bit integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+	.text
+	.align 16
+	.global __divdi3
+	.proc __divdi3
+__divdi3:
+	.regstk 2,0,0,0
+	// Transfer inputs to FP registers.
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	// Convert the inputs to FP, so that they won't be treated as unsigned.
+	fcvt.xf f8 = f8
+	fcvt.xf f9 = f9
+(p7)	break 1
+	;;
+	// Compute the reciprocal approximation.
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	// 3 Newton-Raphson iterations.
+(p6)	fnma.s1 f11 = f9, f10, f1
+(p6)	fmpy.s1 f12 = f8, f10
+	;;
+(p6)	fmpy.s1 f13 = f11, f11
+(p6)	fma.s1 f12 = f11, f12, f12
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	// Round quotient to an integer.
+	fcvt.fx.trunc.s1 f10 = f10
+	;;
+	// Transfer result to GP registers.
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __divdi3
+#endif
+
+#ifdef L__moddi3
+// Compute a 64-bit integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend (a).  in1 holds the divisor (b).
+
+	.text
+	.align 16
+	.global __moddi3
+	.proc __moddi3
+__moddi3:
+	.regstk 2,0,0,0
+	// Transfer inputs to FP registers.
+	setf.sig f14 = in0
+	setf.sig f9 = in1
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	// Convert the inputs to FP, so that they won't be treated as unsigned.
+	fcvt.xf f8 = f14
+	fcvt.xf f9 = f9
+(p7)	break 1
+	;;
+	// Compute the reciprocal approximation.
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	// 3 Newton-Raphson iterations.
+(p6)	fmpy.s1 f12 = f8, f10
+(p6)	fnma.s1 f11 = f9, f10, f1
+	;;
+(p6)	fma.s1 f12 = f11, f12, f12
+(p6)	fmpy.s1 f13 = f11, f11
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+	sub in1 = r0, in1
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+	setf.sig f9 = in1
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	fcvt.fx.trunc.s1 f10 = f10
+	;;
+	// r = q * (-b) + a
+	xma.l f10 = f10, f9, f14
+	;;
+	// Transfer result to GP registers.
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __moddi3
+#endif
+
+#ifdef L__udivdi3
+// Compute a 64-bit unsigned integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+	.text
+	.align 16
+	.global __udivdi3
+	.proc __udivdi3
+__udivdi3:
+	.regstk 2,0,0,0
+	// Transfer inputs to FP registers.
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	// Convert the inputs to FP, to avoid FP software-assist faults.
+	fcvt.xuf.s1 f8 = f8
+	fcvt.xuf.s1 f9 = f9
+(p7)	break 1
+	;;
+	// Compute the reciprocal approximation.
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	// 3 Newton-Raphson iterations.
+(p6)	fnma.s1 f11 = f9, f10, f1
+(p6)	fmpy.s1 f12 = f8, f10
+	;;
+(p6)	fmpy.s1 f13 = f11, f11
+(p6)	fma.s1 f12 = f11, f12, f12
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	// Round quotient to an unsigned integer.
+	fcvt.fxu.trunc.s1 f10 = f10
+	;;
+	// Transfer result to GP registers.
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __udivdi3
+#endif
+
+#ifdef L__umoddi3
+// Compute a 64-bit unsigned integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend (a).  in1 holds the divisor (b).
+
+	.text
+	.align 16
+	.global __umoddi3
+	.proc __umoddi3
+__umoddi3:
+	.regstk 2,0,0,0
+	// Transfer inputs to FP registers.
+	setf.sig f14 = in0
+	setf.sig f9 = in1
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	// Convert the inputs to FP, to avoid FP software assist faults.
+	fcvt.xuf.s1 f8 = f14
+	fcvt.xuf.s1 f9 = f9
+(p7)	break 1;
+	;;
+	// Compute the reciprocal approximation.
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	// 3 Newton-Raphson iterations.
+(p6)	fmpy.s1 f12 = f8, f10
+(p6)	fnma.s1 f11 = f9, f10, f1
+	;;
+(p6)	fma.s1 f12 = f11, f12, f12
+(p6)	fmpy.s1 f13 = f11, f11
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+	sub in1 = r0, in1
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+	setf.sig f9 = in1
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	// Round quotient to an unsigned integer.
+	fcvt.fxu.trunc.s1 f10 = f10
+	;;
+	// r = q * (-b) + a
+	xma.l f10 = f10, f9, f14
+	;;
+	// Transfer result to GP registers.
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __umoddi3
+#endif
+
+#ifdef L__divsi3
+// Compute a 32-bit integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+	.text
+	.align 16
+	.global __divsi3
+	.proc __divsi3
+__divsi3:
+	.regstk 2,0,0,0
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	sxt4 in0 = in0
+	sxt4 in1 = in1
+	;;
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+(p7)	break 1
+	;;
+	mov r2 = 0x0ffdd
+	fcvt.xf f8 = f8
+	fcvt.xf f9 = f9
+	;;
+	setf.exp f11 = r2
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+(p6)	fmpy.s1 f8 = f8, f10
+(p6)	fnma.s1 f9 = f9, f10, f1
+	;;
+(p6)	fma.s1 f8 = f9, f8, f8
+(p6)	fma.s1 f9 = f9, f9, f11
+	;;
+(p6)	fma.s1 f10 = f9, f8, f8
+	;;
+	fcvt.fx.trunc.s1 f10 = f10
+	;;
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __divsi3
+#endif
+
+#ifdef L__modsi3
+// Compute a 32-bit integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+	.text
+	.align 16
+	.global __modsi3
+	.proc __modsi3
+__modsi3:
+	.regstk 2,0,0,0
+	mov r2 = 0x0ffdd
+	sxt4 in0 = in0
+	sxt4 in1 = in1
+	;;
+	setf.sig f13 = r32
+	setf.sig f9 = r33
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	sub in1 = r0, in1
+	fcvt.xf f8 = f13
+	fcvt.xf f9 = f9
+	;;
+	setf.exp f11 = r2
+	frcpa.s1 f10, p6 = f8, f9
+(p7)	break 1
+	;;
+(p6)	fmpy.s1 f12 = f8, f10
+(p6)	fnma.s1 f10 = f9, f10, f1
+	;;
+	setf.sig f9 = in1
+(p6)	fma.s1 f12 = f10, f12, f12
+(p6)	fma.s1 f10 = f10, f10, f11	
+	;;
+(p6)	fma.s1 f10 = f10, f12, f12
+	;;
+	fcvt.fx.trunc.s1 f10 = f10
+	;;
+	xma.l f10 = f10, f9, f13
+	;;
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __modsi3
+#endif
+
+#ifdef L__udivsi3
+// Compute a 32-bit unsigned integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+	.text
+	.align 16
+	.global __udivsi3
+	.proc __udivsi3
+__udivsi3:
+	.regstk 2,0,0,0
+	mov r2 = 0x0ffdd
+	zxt4 in0 = in0
+	zxt4 in1 = in1
+	;;
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	fcvt.xf f8 = f8
+	fcvt.xf f9 = f9
+(p7)	break 1
+	;;
+	setf.exp f11 = r2
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+(p6)	fmpy.s1 f8 = f8, f10
+(p6)	fnma.s1 f9 = f9, f10, f1
+	;;
+(p6)	fma.s1 f8 = f9, f8, f8
+(p6)	fma.s1 f9 = f9, f9, f11
+	;;
+(p6)	fma.s1 f10 = f9, f8, f8
+	;;
+	fcvt.fxu.trunc.s1 f10 = f10
+	;;
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __udivsi3
+#endif
+
+#ifdef L__umodsi3
+// Compute a 32-bit unsigned integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+	.text
+	.align 16
+	.global __umodsi3
+	.proc __umodsi3
+__umodsi3:
+	.regstk 2,0,0,0
+	mov r2 = 0x0ffdd
+	zxt4 in0 = in0
+	zxt4 in1 = in1
+	;;
+	setf.sig f13 = in0
+	setf.sig f9 = in1
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	sub in1 = r0, in1
+	fcvt.xf f8 = f13
+	fcvt.xf f9 = f9
+	;;
+	setf.exp f11 = r2
+	frcpa.s1 f10, p6 = f8, f9
+(p7)	break 1;
+	;;
+(p6)	fmpy.s1 f12 = f8, f10
+(p6)	fnma.s1 f10 = f9, f10, f1
+	;;
+	setf.sig f9 = in1
+(p6)	fma.s1 f12 = f10, f12, f12
+(p6)	fma.s1 f10 = f10, f10, f11
+	;;
+(p6)	fma.s1 f10 = f10, f12, f12
+	;;
+	fcvt.fxu.trunc.s1 f10 = f10
+	;;
+	xma.l f10 = f10, f9, f13
+	;;
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __umodsi3
+#endif
+
+#ifdef L__save_stack_nonlocal
+// Notes on save/restore stack nonlocal: We read ar.bsp but write
+// ar.bspstore.  This is because ar.bsp can be read at all times
+// (independent of the RSE mode) but since it's read-only we need to
+// restore the value via ar.bspstore.  This is OK because
+// ar.bsp==ar.bspstore after executing "flushrs".
+
+// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer)
+
+	.text
+	.align 16
+	.global __ia64_save_stack_nonlocal
+	.proc __ia64_save_stack_nonlocal
+__ia64_save_stack_nonlocal:
+	{ .mmf
+	  alloc r18 = ar.pfs, 2, 0, 0, 0
+	  mov r19 = ar.rsc
+	  ;;
+	}
+	{ .mmi
+	  flushrs
+	  st8 [in0] = in1, 24
+	  and r19 = 0x1c, r19
+	  ;;
+	}
+	{ .mmi
+	  st8 [in0] = r18, -16
+	  mov ar.rsc = r19
+	  or r19 = 0x3, r19
+	  ;;
+	}
+	{ .mmi
+	  mov r16 = ar.bsp
+	  mov r17 = ar.rnat
+	  adds r2 = 8, in0
+	  ;;
+	}
+	{ .mmi
+	  st8 [in0] = r16
+	  st8 [r2] = r17
+	}
+	{ .mib
+	  mov ar.rsc = r19
+	  br.ret.sptk.few rp
+	  ;;
+	}
+	.endp __ia64_save_stack_nonlocal
+#endif
+
+#ifdef L__nonlocal_goto
+// void __ia64_nonlocal_goto(void *target_label, void *save_area,
+//			     void *static_chain);
+
+	.text
+	.align 16
+	.global __ia64_nonlocal_goto
+	.proc __ia64_nonlocal_goto
+__ia64_nonlocal_goto:
+	{ .mmi
+	  alloc r20 = ar.pfs, 3, 0, 0, 0
+	  ld8 r12 = [in1], 8
+	  mov.ret.sptk rp = in0, .L0
+	  ;;
+	}
+	{ .mmf
+	  ld8 r16 = [in1], 8
+	  mov r19 = ar.rsc
+	  ;;
+	}
+	{ .mmi
+	  flushrs
+	  ld8 r17 = [in1], 8
+	  and r19 = 0x1c, r19
+	  ;;
+	}
+	{ .mmi
+	  ld8 r18 = [in1]
+	  mov ar.rsc = r19
+	  or r19 = 0x3, r19
+	  ;;
+	}
+	{ .mmi
+	  mov ar.bspstore = r16
+	  ;;
+	  mov ar.rnat = r17
+	  ;;
+	}
+	{ .mmi
+	  loadrs
+	  invala
+	  mov r15 = in2
+	  ;;
+	}
+.L0:	{ .mib
+	  mov ar.rsc = r19
+	  mov ar.pfs = r18
+	  br.ret.sptk.few rp
+	  ;;
+	}
+	.endp __ia64_nonlocal_goto
+#endif
+
+#ifdef L__restore_stack_nonlocal
+// This is mostly the same as nonlocal_goto above.
+// ??? This has not been tested yet.
+
+// void __ia64_restore_stack_nonlocal(void *save_area)
+
+	.text
+	.align 16
+	.global __ia64_restore_stack_nonlocal
+	.proc __ia64_restore_stack_nonlocal
+__ia64_restore_stack_nonlocal:
+	{ .mmf
+	  alloc r20 = ar.pfs, 4, 0, 0, 0
+	  ld8 r12 = [in0], 8
+	  ;;
+	}
+	{ .mmb
+	  ld8 r16=[in0], 8
+	  mov r19 = ar.rsc
+	  ;;
+	}
+	{ .mmi
+	  flushrs
+	  ld8 r17 = [in0], 8
+	  and r19 = 0x1c, r19
+	  ;;
+	}
+	{ .mmf
+	  ld8 r18 = [in0]
+	  mov ar.rsc = r19
+	  ;;
+	}
+	{ .mmi
+	  mov ar.bspstore = r16
+	  ;;
+	  mov ar.rnat = r17
+	  or r19 = 0x3, r19
+	  ;;
+	}
+	{ .mmf
+	  loadrs
+	  invala
+	  ;;
+	}
+.L0:	{ .mib
+	  mov ar.rsc = r19
+	  mov ar.pfs = r18
+	  br.ret.sptk.few rp
+	  ;;
+	}
+	.endp __ia64_restore_stack_nonlocal
+#endif
+
+#ifdef L__trampoline
+// Implement the nested function trampoline.  This is out of line
+// so that we don't have to bother with flushing the icache, as
+// well as making the on-stack trampoline smaller.
+//
+// The trampoline has the following form:
+//
+//		+-------------------+ >
+//	TRAMP:	| __ia64_trampoline | |
+//		+-------------------+  > fake function descriptor
+//		| TRAMP+16          | |
+//		+-------------------+ >
+//		| target descriptor |
+//		+-------------------+
+//		| static link	    |
+//		+-------------------+
+
+	.text
+	.align 16
+	.global __ia64_trampoline
+	.proc __ia64_trampoline
+__ia64_trampoline:
+	{ .mmi
+	  ld8 r2 = [r1], 8
+	  ;;
+	  ld8 r15 = [r1]
+	}
+	{ .mmi
+	  ld8 r3 = [r2], 8
+	  ;;
+	  ld8 r1 = [r2]
+	  mov b6 = r3
+	}
+	{ .bbb
+	  br.sptk.many b6
+	  ;;
+	}
+	.endp __ia64_trampoline
+#endif
+
+#ifdef SHARED
+// Thunks for backward compatibility.
+#ifdef L_fixtfdi
+	.text
+	.align 16
+	.global __fixtfti
+	.proc __fixtfti
+__fixtfti:
+	{ .bbb
+	  br.sptk.many __fixxfti
+	  ;;
+	}
+	.endp __fixtfti
+#endif
+#ifdef L_fixunstfdi
+	.align 16
+	.global __fixunstfti
+	.proc __fixunstfti
+__fixunstfti:
+	{ .bbb
+	  br.sptk.many __fixunsxfti
+	  ;;
+	}
+	.endp __fixunstfti
+#endif
+#ifdef L_floatditf
+	.align 16
+	.global __floattitf
+	.proc __floattitf
+__floattitf:
+	{ .bbb
+	  br.sptk.many __floattixf
+	  ;;
+	}
+	.endp __floattitf
+#endif
+#endif
diff --git a/libgcc/config/ia64/t-hpux b/libgcc/config/ia64/t-hpux
index ef3387e7a61..1fee41385c0 100644
--- a/libgcc/config/ia64/t-hpux
+++ b/libgcc/config/ia64/t-hpux
@@ -1 +1,6 @@
+# On HP-UX we do not want _fixtfdi, _fixunstfdi, or _floatditf from
+# LIB1ASMSRC.  These functions map the 128 bit conversion function names
+# to 80 bit conversions and were done for Linux backwards compatibility.
+LIB1ASMFUNCS := $(filter-out _fixtfdi _fixunstfdi _floatditf,$(LIB1ASMFUNCS))
+
 LIB2ADDEH = $(srcdir)/unwind-c.c
diff --git a/libgcc/config/ia64/t-ia64 b/libgcc/config/ia64/t-ia64
index 59cf3aa75f4..80445d8a2a8 100644
--- a/libgcc/config/ia64/t-ia64
+++ b/libgcc/config/ia64/t-ia64
@@ -1,3 +1,16 @@
+LIB1ASMSRC    = ia64/lib1funcs.S
+
+# We use different names for the DImode div/mod files so that they won't
+# conflict with libgcc2.c files.  We used to use __ia64 as a prefix, now
+# we use __ as the prefix.  Note that L_divdi3 in libgcc2.c actually defines
+# a TImode divide function, so there is no actual overlap here between
+# libgcc2.c and lib1funcs.S.
+LIB1ASMFUNCS  = __divxf3 __divdf3 __divsf3 \
+	__divdi3 __moddi3 __udivdi3 __umoddi3 \
+	__divsi3 __modsi3 __udivsi3 __umodsi3 __save_stack_nonlocal \
+	__nonlocal_goto __restore_stack_nonlocal __trampoline \
+	_fixtfdi _fixunstfdi _floatditf
+
 CUSTOM_CRTSTUFF = yes
 
 # Assemble startup files.
diff --git a/libgcc/config/ia64/t-softfp-compat b/libgcc/config/ia64/t-softfp-compat
index d3dad68c48f..00f45d51cd0 100644
--- a/libgcc/config/ia64/t-softfp-compat
+++ b/libgcc/config/ia64/t-softfp-compat
@@ -3,5 +3,5 @@
 # Replace __dvxf3 _fixtfdi _fixunstfdi _floatditf
 libgcc1-tf-functions = __divxf3  _fixtfdi _fixunstfdi _floatditf
 LIB1ASMFUNCS := $(filter-out $(libgcc1-tf-functions), $(LIB1ASMFUNCS))
-libgcc1-tf-compats = $(addsuffix .asm, $(libgcc1-tf-functions))
+libgcc1-tf-compats = $(addsuffix .S, $(libgcc1-tf-functions))
 LIB2ADD += $(addprefix $(srcdir)/config/ia64/, $(libgcc1-tf-compats))
diff --git a/libgcc/config/m32c/lib1funcs.S b/libgcc/config/m32c/lib1funcs.S
new file mode 100644
index 00000000000..9b657787187
--- /dev/null
+++ b/libgcc/config/m32c/lib1funcs.S
@@ -0,0 +1,231 @@
+/* libgcc routines for R8C/M16C/M32C
+   Copyright (C) 2005, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if defined(__r8c_cpu__) || defined(__m16c_cpu__)
+#define A16
+#define A(n,w) n
+#define W w
+#else
+#define A24
+#define A(n,w) w
+#define W l
+#endif
+
+
+#ifdef L__m32c_memregs
+
+/* Warning: these memory locations are used as a register bank.  They
+   *must* end up consecutive in any final executable, so you may *not*
+   use the otherwise obvious ".comm" directive to allocate space for
+   them. */
+
+	.bss
+	.global	mem0
+mem0:	.space	1
+	.global	mem1
+mem1:	.space	1
+	.global	mem2
+mem2:	.space	1
+	.global	mem3
+mem3:	.space	1
+	.global	mem4
+mem4:	.space	1
+	.global	mem5
+mem5:	.space	1
+	.global	mem6
+mem6:	.space	1
+	.global	mem7
+mem7:	.space	1
+	.global	mem8
+mem8:	.space	1
+	.global	mem9
+mem9:	.space	1
+	.global	mem10
+mem10:	.space	1
+	.global	mem11
+mem11:	.space	1
+	.global	mem12
+mem12:	.space	1
+	.global	mem13
+mem13:	.space	1
+	.global	mem14
+mem14:	.space	1
+	.global	mem15
+mem15:	.space	1
+
+#endif
+
+#ifdef L__m32c_eh_return
+	.text
+	.global __m32c_eh_return
+__m32c_eh_return:	
+
+	/* At this point, r0 has the stack adjustment, r1r3 has the
+	   address to return to.  The stack looks like this:
+
+	   old_ra
+	   old_fp
+	   <- unwound sp
+	   ...
+	   fb
+	   through
+	   r0
+	   <- sp
+
+	   What we need to do is restore all the registers, update the
+	   stack, and return to the right place.
+	*/
+
+	stc	sp,a0
+	
+	add.W	A(#16,#24),a0
+	/* a0 points to the current stack, just above the register
+	   save areas */
+
+	mov.w	a0,a1
+	exts.w	r0
+	sub.W	A(r0,r2r0),a1
+	sub.W	A(#3,#4),a1
+	/* a1 points to the new stack.  */
+
+	/* This is for the "rts" below.  */
+	mov.w	r1,[a1]
+#ifdef A16
+	mov.w	r2,r1
+	mov.b	r1l,2[a1]
+#else
+	mov.w	r2,2[a1]
+#endif
+
+	/* This is for the "popc sp" below.  */
+	mov.W	a1,[a0]	
+
+	popm    r0,r1,r2,r3,a0,a1,sb,fb
+	popc	sp
+	rts
+#endif
+
+/* SImode arguments for SI foo(SI,SI) functions.  */
+#ifdef A16
+#define SAL  5[fb]
+#define SAH  7[fb]
+#define SBL  9[fb]
+#define SBH 11[fb]
+#else
+#define SAL  8[fb]
+#define SAH 10[fb]
+#define SBL 12[fb]
+#define SBH 14[fb]
+#endif
+
+#ifdef L__m32c_mulsi3
+	.text
+	.global ___mulsi3
+___mulsi3:
+	enter	#0
+	push.w	r2
+	mov.w	SAL,r0
+	mulu.w	SBL,r0		/* writes to r2r0 */
+	mov.w	r0,mem0
+	mov.w	r2,mem2
+	mov.w	SAL,r0
+	mulu.w	SBH,r0		/* writes to r2r0 */
+	add.w	r0,mem2
+	mov.w	SAH,r0
+	mulu.w	SBL,r0		/* writes to r2r0 */
+	add.w	r0,mem2
+	pop.w	r2
+	exitd
+#endif
+
+#ifdef L__m32c_cmpsi2
+	.text
+	.global ___cmpsi2
+___cmpsi2:
+	enter	#0
+	cmp.w	SBH,SAH
+	jgt	cmpsi_gt
+	jlt	cmpsi_lt
+	cmp.w	SBL,SAL
+	jgt	cmpsi_gt
+	jlt	cmpsi_lt
+	mov.w	#1,r0
+	exitd
+cmpsi_gt:
+	mov.w	#2,r0
+	exitd
+cmpsi_lt:
+	mov.w	#0,r0
+	exitd
+#endif
+
+#ifdef L__m32c_ucmpsi2
+	.text
+	.global ___ucmpsi2
+___ucmpsi2:
+	enter	#0
+	cmp.w	SBH,SAH
+	jgtu	cmpsi_gt
+	jltu	cmpsi_lt
+	cmp.w	SBL,SAL
+	jgtu	cmpsi_gt
+	jltu	cmpsi_lt
+	mov.w	#1,r0
+	exitd
+cmpsi_gt:
+	mov.w	#2,r0
+	exitd
+cmpsi_lt:
+	mov.w	#0,r0
+	exitd
+#endif
+
+#ifdef L__m32c_jsri16
+	.text
+#ifdef A16
+	.global	m32c_jsri16
+m32c_jsri16:
+	add.w	#-1, sp
+
+	/* Read the address (16 bits) and return address (24 bits) off
+	the stack.  */
+	mov.w	4[sp], r0
+	mov.w	1[sp], r3
+	mov.b	3[sp], a0 /* This zero-extends, so the high byte has
+			     zero in it.  */
+
+	/* Write the return address, then new address, to the stack.  */
+	mov.w	a0, 1[sp] /* Just to get the zero in 2[sp].  */
+	mov.w	r0, 0[sp]
+	mov.w	r3, 3[sp]
+	mov.b	a0, 5[sp]
+
+	/* This "returns" to the target address, leaving the pending
+	return address on the stack.  */
+	rts
+#endif
+
+#endif
diff --git a/libgcc/config/m32c/t-m32c b/libgcc/config/m32c/t-m32c
new file mode 100644
index 00000000000..d21483750fd
--- /dev/null
+++ b/libgcc/config/m32c/t-m32c
@@ -0,0 +1,9 @@
+LIB1ASMSRC = m32c/lib1funcs.S
+
+LIB1ASMFUNCS = \
+	__m32c_memregs \
+	__m32c_eh_return \
+	__m32c_mulsi3 \
+	__m32c_cmpsi2 \
+	__m32c_ucmpsi2 \
+	__m32c_jsri16
diff --git a/libgcc/config/m32r/initfini.c b/libgcc/config/m32r/initfini.c
index 6e7d58614c7..56332459223 100644
--- a/libgcc/config/m32r/initfini.c
+++ b/libgcc/config/m32r/initfini.c
@@ -1,5 +1,5 @@
 /* .init/.fini section handling + C++ global constructor/destructor handling.
-   This file is based on crtstuff.c, sol2-crti.asm, sol2-crtn.asm.
+   This file is based on crtstuff.c, sol2-crti.S, sol2-crtn.S.
 
    Copyright (C) 1996, 1997, 1998, 2006, 2009 Free Software Foundation, Inc.
 
diff --git a/libgcc/config/m68k/lb1sf68.S b/libgcc/config/m68k/lb1sf68.S
new file mode 100644
index 00000000000..0339a092c4f
--- /dev/null
+++ b/libgcc/config/m68k/lb1sf68.S
@@ -0,0 +1,4116 @@
+/* libgcc routines for 68000 w/o floating-point hardware.
+   Copyright (C) 1994, 1996, 1997, 1998, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Use this one for any 680x0; assumes no floating point hardware.
+   The trailing " '" appearing on some lines is for ANSI preprocessors.  Yuk.
+   Some of this code comes from MINIX, via the folks at ericsson.
+   D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
+*/
+
+/* These are predefined by new versions of GNU cpp.  */
+
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__ _
+#endif
+
+#ifndef __REGISTER_PREFIX__
+#define __REGISTER_PREFIX__
+#endif
+
+#ifndef __IMMEDIATE_PREFIX__
+#define __IMMEDIATE_PREFIX__ #
+#endif
+
+/* ANSI concatenation macros.  */
+
+#define CONCAT1(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+/* Use the right prefix for global labels.  */
+
+#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
+
+/* Note that X is a function.  */
+	
+#ifdef __ELF__
+#define FUNC(x) .type SYM(x),function
+#else
+/* The .proc pseudo-op is accepted, but ignored, by GAS.  We could just	
+   define this to the empty string for non-ELF systems, but defining it
+   to .proc means that the information is available to the assembler if
+   the need arises.  */
+#define FUNC(x) .proc
+#endif
+		
+/* Use the right prefix for registers.  */
+
+#define REG(x) CONCAT1 (__REGISTER_PREFIX__, x)
+
+/* Use the right prefix for immediate values.  */
+
+#define IMM(x) CONCAT1 (__IMMEDIATE_PREFIX__, x)
+
+#define d0 REG (d0)
+#define d1 REG (d1)
+#define d2 REG (d2)
+#define d3 REG (d3)
+#define d4 REG (d4)
+#define d5 REG (d5)
+#define d6 REG (d6)
+#define d7 REG (d7)
+#define a0 REG (a0)
+#define a1 REG (a1)
+#define a2 REG (a2)
+#define a3 REG (a3)
+#define a4 REG (a4)
+#define a5 REG (a5)
+#define a6 REG (a6)
+#define fp REG (fp)
+#define sp REG (sp)
+#define pc REG (pc)
+
+/* Provide a few macros to allow for PIC code support.
+ * With PIC, data is stored A5 relative so we've got to take a bit of special
+ * care to ensure that all loads of global data is via A5.  PIC also requires
+ * jumps and subroutine calls to be PC relative rather than absolute.  We cheat
+ * a little on this and in the PIC case, we use short offset branches and
+ * hope that the final object code is within range (which it should be).
+ */
+#ifndef __PIC__
+
+	/* Non PIC (absolute/relocatable) versions */
+
+	.macro PICCALL addr
+	jbsr	\addr
+	.endm
+
+	.macro PICJUMP addr
+	jmp	\addr
+	.endm
+
+	.macro PICLEA sym, reg
+	lea	\sym, \reg
+	.endm
+
+	.macro PICPEA sym, areg
+	pea	\sym
+	.endm
+
+#else /* __PIC__ */
+
+# if defined (__uClinux__)
+
+	/* Versions for uClinux */
+
+#  if defined(__ID_SHARED_LIBRARY__)
+
+	/* -mid-shared-library versions  */
+
+	.macro PICLEA sym, reg
+	movel	a5@(_current_shared_library_a5_offset_), \reg
+	movel	\sym@GOT(\reg), \reg
+	.endm
+
+	.macro PICPEA sym, areg
+	movel	a5@(_current_shared_library_a5_offset_), \areg
+	movel	\sym@GOT(\areg), sp@-
+	.endm
+
+	.macro PICCALL addr
+	PICLEA	\addr,a0
+	jsr	a0@
+	.endm
+
+	.macro PICJUMP addr
+	PICLEA	\addr,a0
+	jmp	a0@
+	.endm
+
+#  else /* !__ID_SHARED_LIBRARY__ */
+
+	/* Versions for -msep-data */
+
+	.macro PICLEA sym, reg
+	movel	\sym@GOT(a5), \reg
+	.endm
+
+	.macro PICPEA sym, areg
+	movel	\sym@GOT(a5), sp@-
+	.endm
+
+	.macro PICCALL addr
+#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__)
+	lea	\addr-.-8,a0
+	jsr	pc@(a0)
+#else
+	jbsr	\addr
+#endif
+	.endm
+
+	.macro PICJUMP addr
+	/* ISA C has no bra.l instruction, and since this assembly file
+	   gets assembled into multiple object files, we avoid the
+	   bra instruction entirely.  */
+#if defined (__mcoldfire__) && !defined (__mcfisab__)
+	lea	\addr-.-8,a0
+	jmp	pc@(a0)
+#else
+	bra	\addr
+#endif
+	.endm
+
+#  endif
+
+# else /* !__uClinux__ */
+
+	/* Versions for Linux */
+
+	.macro PICLEA sym, reg
+	movel	#_GLOBAL_OFFSET_TABLE_@GOTPC, \reg
+	lea	(-6, pc, \reg), \reg
+	movel	\sym@GOT(\reg), \reg
+	.endm
+
+	.macro PICPEA sym, areg
+	movel	#_GLOBAL_OFFSET_TABLE_@GOTPC, \areg
+	lea	(-6, pc, \areg), \areg
+	movel	\sym@GOT(\areg), sp@-
+	.endm
+
+	.macro PICCALL addr
+#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__)
+	lea	\addr-.-8,a0
+	jsr	pc@(a0)
+#else
+	jbsr	\addr
+#endif
+	.endm
+
+	.macro PICJUMP addr
+	/* ISA C has no bra.l instruction, and since this assembly file
+	   gets assembled into multiple object files, we avoid the
+	   bra instruction entirely.  */
+#if defined (__mcoldfire__) && !defined (__mcfisab__)
+	lea	\addr-.-8,a0
+	jmp	pc@(a0)
+#else
+	bra	\addr
+#endif
+	.endm
+
+# endif
+#endif /* __PIC__ */
+
+
+#ifdef L_floatex
+
+| This is an attempt at a decent floating point (single, double and 
+| extended double) code for the GNU C compiler. It should be easy to
+| adapt to other compilers (but beware of the local labels!).
+
+| Starting date: 21 October, 1990
+
+| It is convenient to introduce the notation (s,e,f) for a floating point
+| number, where s=sign, e=exponent, f=fraction. We will call a floating
+| point number fpn to abbreviate, independently of the precision.
+| Let MAX_EXP be in each case the maximum exponent (255 for floats, 1023 
+| for doubles and 16383 for long doubles). We then have the following 
+| different cases:
+|  1. Normalized fpns have 0 < e < MAX_EXP. They correspond to 
+|     (-1)^s x 1.f x 2^(e-bias-1).
+|  2. Denormalized fpns have e=0. They correspond to numbers of the form
+|     (-1)^s x 0.f x 2^(-bias).
+|  3. +/-INFINITY have e=MAX_EXP, f=0.
+|  4. Quiet NaN (Not a Number) have all bits set.
+|  5. Signaling NaN (Not a Number) have s=0, e=MAX_EXP, f=1.
+
+|=============================================================================
+|                                  exceptions
+|=============================================================================
+
+| This is the floating point condition code register (_fpCCR):
+|
+| struct {
+|   short _exception_bits;	
+|   short _trap_enable_bits;	
+|   short _sticky_bits;
+|   short _rounding_mode;
+|   short _format;
+|   short _last_operation;
+|   union {
+|     float sf;
+|     double df;
+|   } _operand1;
+|   union {
+|     float sf;
+|     double df;
+|   } _operand2;
+| } _fpCCR;
+
+	.data
+	.even
+
+	.globl	SYM (_fpCCR)
+	
+SYM (_fpCCR):
+__exception_bits:
+	.word	0
+__trap_enable_bits:
+	.word	0
+__sticky_bits:
+	.word	0
+__rounding_mode:
+	.word	ROUND_TO_NEAREST
+__format:
+	.word	NIL
+__last_operation:
+	.word	NOOP
+__operand1:
+	.long	0
+	.long	0
+__operand2:
+	.long 	0
+	.long	0
+
+| Offsets:
+EBITS  = __exception_bits - SYM (_fpCCR)
+TRAPE  = __trap_enable_bits - SYM (_fpCCR)
+STICK  = __sticky_bits - SYM (_fpCCR)
+ROUND  = __rounding_mode - SYM (_fpCCR)
+FORMT  = __format - SYM (_fpCCR)
+LASTO  = __last_operation - SYM (_fpCCR)
+OPER1  = __operand1 - SYM (_fpCCR)
+OPER2  = __operand2 - SYM (_fpCCR)
+
+| The following exception types are supported:
+INEXACT_RESULT 		= 0x0001
+UNDERFLOW 		= 0x0002
+OVERFLOW 		= 0x0004
+DIVIDE_BY_ZERO 		= 0x0008
+INVALID_OPERATION 	= 0x0010
+
+| The allowed rounding modes are:
+UNKNOWN           = -1
+ROUND_TO_NEAREST  = 0 | round result to nearest representable value
+ROUND_TO_ZERO     = 1 | round result towards zero
+ROUND_TO_PLUS     = 2 | round result towards plus infinity
+ROUND_TO_MINUS    = 3 | round result towards minus infinity
+
+| The allowed values of format are:
+NIL          = 0
+SINGLE_FLOAT = 1
+DOUBLE_FLOAT = 2
+LONG_FLOAT   = 3
+
+| The allowed values for the last operation are:
+NOOP         = 0
+ADD          = 1
+MULTIPLY     = 2
+DIVIDE       = 3
+NEGATE       = 4
+COMPARE      = 5
+EXTENDSFDF   = 6
+TRUNCDFSF    = 7
+
+|=============================================================================
+|                           __clear_sticky_bits
+|=============================================================================
+
+| The sticky bits are normally not cleared (thus the name), whereas the 
+| exception type and exception value reflect the last computation. 
+| This routine is provided to clear them (you can also write to _fpCCR,
+| since it is globally visible).
+
+	.globl  SYM (__clear_sticky_bit)
+
+	.text
+	.even
+
+| void __clear_sticky_bits(void);
+SYM (__clear_sticky_bit):		
+	PICLEA	SYM (_fpCCR),a0
+#ifndef __mcoldfire__
+	movew	IMM (0),a0@(STICK)
+#else
+	clr.w	a0@(STICK)
+#endif
+	rts
+
+|=============================================================================
+|                           $_exception_handler
+|=============================================================================
+
+	.globl  $_exception_handler
+
+	.text
+	.even
+
+| This is the common exit point if an exception occurs.
+| NOTE: it is NOT callable from C!
+| It expects the exception type in d7, the format (SINGLE_FLOAT,
+| DOUBLE_FLOAT or LONG_FLOAT) in d6, and the last operation code in d5.
+| It sets the corresponding exception and sticky bits, and the format. 
+| Depending on the format if fills the corresponding slots for the 
+| operands which produced the exception (all this information is provided
+| so if you write your own exception handlers you have enough information
+| to deal with the problem).
+| Then checks to see if the corresponding exception is trap-enabled, 
+| in which case it pushes the address of _fpCCR and traps through 
+| trap FPTRAP (15 for the moment).
+
+FPTRAP = 15
+
+$_exception_handler:
+	PICLEA	SYM (_fpCCR),a0
+	movew	d7,a0@(EBITS)	| set __exception_bits
+#ifndef __mcoldfire__
+	orw	d7,a0@(STICK)	| and __sticky_bits
+#else
+	movew	a0@(STICK),d4
+	orl	d7,d4
+	movew	d4,a0@(STICK)
+#endif
+	movew	d6,a0@(FORMT)	| and __format
+	movew	d5,a0@(LASTO)	| and __last_operation
+
+| Now put the operands in place:
+#ifndef __mcoldfire__
+	cmpw	IMM (SINGLE_FLOAT),d6
+#else
+	cmpl	IMM (SINGLE_FLOAT),d6
+#endif
+	beq	1f
+	movel	a6@(8),a0@(OPER1)
+	movel	a6@(12),a0@(OPER1+4)
+	movel	a6@(16),a0@(OPER2)
+	movel	a6@(20),a0@(OPER2+4)
+	bra	2f
+1:	movel	a6@(8),a0@(OPER1)
+	movel	a6@(12),a0@(OPER2)
+2:
+| And check whether the exception is trap-enabled:
+#ifndef __mcoldfire__
+	andw	a0@(TRAPE),d7	| is exception trap-enabled?
+#else
+	clrl	d6
+	movew	a0@(TRAPE),d6
+	andl	d6,d7
+#endif
+	beq	1f		| no, exit
+	PICPEA	SYM (_fpCCR),a1	| yes, push address of _fpCCR
+	trap	IMM (FPTRAP)	| and trap
+#ifndef __mcoldfire__
+1:	moveml	sp@+,d2-d7	| restore data registers
+#else
+1:	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| and return
+	rts
+#endif /* L_floatex */
+
+#ifdef  L_mulsi3
+	.text
+	FUNC(__mulsi3)
+	.globl	SYM (__mulsi3)
+SYM (__mulsi3):
+	movew	sp@(4), d0	/* x0 -> d0 */
+	muluw	sp@(10), d0	/* x0*y1 */
+	movew	sp@(6), d1	/* x1 -> d1 */
+	muluw	sp@(8), d1	/* x1*y0 */
+#ifndef __mcoldfire__
+	addw	d1, d0
+#else
+	addl	d1, d0
+#endif
+	swap	d0
+	clrw	d0
+	movew	sp@(6), d1	/* x1 -> d1 */
+	muluw	sp@(10), d1	/* x1*y1 */
+	addl	d1, d0
+
+	rts
+#endif /* L_mulsi3 */
+
+#ifdef  L_udivsi3
+	.text
+	FUNC(__udivsi3)
+	.globl	SYM (__udivsi3)
+SYM (__udivsi3):
+#ifndef __mcoldfire__
+	movel	d2, sp@-
+	movel	sp@(12), d1	/* d1 = divisor */
+	movel	sp@(8), d0	/* d0 = dividend */
+
+	cmpl	IMM (0x10000), d1 /* divisor >= 2 ^ 16 ?   */
+	jcc	L3		/* then try next algorithm */
+	movel	d0, d2
+	clrw	d2
+	swap	d2
+	divu	d1, d2          /* high quotient in lower word */
+	movew	d2, d0		/* save high quotient */
+	swap	d0
+	movew	sp@(10), d2	/* get low dividend + high rest */
+	divu	d1, d2		/* low quotient */
+	movew	d2, d0
+	jra	L6
+
+L3:	movel	d1, d2		/* use d2 as divisor backup */
+L4:	lsrl	IMM (1), d1	/* shift divisor */
+	lsrl	IMM (1), d0	/* shift dividend */
+	cmpl	IMM (0x10000), d1 /* still divisor >= 2 ^ 16 ?  */
+	jcc	L4
+	divu	d1, d0		/* now we have 16-bit divisor */
+	andl	IMM (0xffff), d0 /* mask out divisor, ignore remainder */
+
+/* Multiply the 16-bit tentative quotient with the 32-bit divisor.  Because of
+   the operand ranges, this might give a 33-bit product.  If this product is
+   greater than the dividend, the tentative quotient was too large. */
+	movel	d2, d1
+	mulu	d0, d1		/* low part, 32 bits */
+	swap	d2
+	mulu	d0, d2		/* high part, at most 17 bits */
+	swap	d2		/* align high part with low part */
+	tstw	d2		/* high part 17 bits? */
+	jne	L5		/* if 17 bits, quotient was too large */
+	addl	d2, d1		/* add parts */
+	jcs	L5		/* if sum is 33 bits, quotient was too large */
+	cmpl	sp@(8), d1	/* compare the sum with the dividend */
+	jls	L6		/* if sum > dividend, quotient was too large */
+L5:	subql	IMM (1), d0	/* adjust quotient */
+
+L6:	movel	sp@+, d2
+	rts
+
+#else /* __mcoldfire__ */
+
+/* ColdFire implementation of non-restoring division algorithm from
+   Hennessy & Patterson, Appendix A. */
+	link	a6,IMM (-12)
+	moveml	d2-d4,sp@
+	movel	a6@(8),d0
+	movel	a6@(12),d1
+	clrl	d2		| clear p
+	moveq	IMM (31),d4
+L1:	addl	d0,d0		| shift reg pair (p,a) one bit left
+	addxl	d2,d2
+	movl	d2,d3		| subtract b from p, store in tmp.
+	subl	d1,d3
+	jcs	L2		| if no carry,
+	bset	IMM (0),d0	| set the low order bit of a to 1,
+	movl	d3,d2		| and store tmp in p.
+L2:	subql	IMM (1),d4
+	jcc	L1
+	moveml	sp@,d2-d4	| restore data registers
+	unlk	a6		| and return
+	rts
+#endif /* __mcoldfire__ */
+
+#endif /* L_udivsi3 */
+
+#ifdef  L_divsi3
+	.text
+	FUNC(__divsi3)
+	.globl	SYM (__divsi3)
+SYM (__divsi3):
+	movel	d2, sp@-
+
+	moveq	IMM (1), d2	/* sign of result stored in d2 (=1 or =-1) */
+	movel	sp@(12), d1	/* d1 = divisor */
+	jpl	L1
+	negl	d1
+#ifndef __mcoldfire__
+	negb	d2		/* change sign because divisor <0  */
+#else
+	negl	d2		/* change sign because divisor <0  */
+#endif
+L1:	movel	sp@(8), d0	/* d0 = dividend */
+	jpl	L2
+	negl	d0
+#ifndef __mcoldfire__
+	negb	d2
+#else
+	negl	d2
+#endif
+
+L2:	movel	d1, sp@-
+	movel	d0, sp@-
+	PICCALL	SYM (__udivsi3)	/* divide abs(dividend) by abs(divisor) */
+	addql	IMM (8), sp
+
+	tstb	d2
+	jpl	L3
+	negl	d0
+
+L3:	movel	sp@+, d2
+	rts
+#endif /* L_divsi3 */
+
+#ifdef  L_umodsi3
+	.text
+	FUNC(__umodsi3)
+	.globl	SYM (__umodsi3)
+SYM (__umodsi3):
+	movel	sp@(8), d1	/* d1 = divisor */
+	movel	sp@(4), d0	/* d0 = dividend */
+	movel	d1, sp@-
+	movel	d0, sp@-
+	PICCALL	SYM (__udivsi3)
+	addql	IMM (8), sp
+	movel	sp@(8), d1	/* d1 = divisor */
+#ifndef __mcoldfire__
+	movel	d1, sp@-
+	movel	d0, sp@-
+	PICCALL	SYM (__mulsi3)	/* d0 = (a/b)*b */
+	addql	IMM (8), sp
+#else
+	mulsl	d1,d0
+#endif
+	movel	sp@(4), d1	/* d1 = dividend */
+	subl	d0, d1		/* d1 = a - (a/b)*b */
+	movel	d1, d0
+	rts
+#endif /* L_umodsi3 */
+
+#ifdef  L_modsi3
+	.text
+	FUNC(__modsi3)
+	.globl	SYM (__modsi3)
+SYM (__modsi3):
+	movel	sp@(8), d1	/* d1 = divisor */
+	movel	sp@(4), d0	/* d0 = dividend */
+	movel	d1, sp@-
+	movel	d0, sp@-
+	PICCALL	SYM (__divsi3)
+	addql	IMM (8), sp
+	movel	sp@(8), d1	/* d1 = divisor */
+#ifndef __mcoldfire__
+	movel	d1, sp@-
+	movel	d0, sp@-
+	PICCALL	SYM (__mulsi3)	/* d0 = (a/b)*b */
+	addql	IMM (8), sp
+#else
+	mulsl	d1,d0
+#endif
+	movel	sp@(4), d1	/* d1 = dividend */
+	subl	d0, d1		/* d1 = a - (a/b)*b */
+	movel	d1, d0
+	rts
+#endif /* L_modsi3 */
+
+
+#ifdef  L_double
+
+	.globl	SYM (_fpCCR)
+	.globl  $_exception_handler
+
+QUIET_NaN      = 0xffffffff
+
+D_MAX_EXP      = 0x07ff
+D_BIAS         = 1022
+DBL_MAX_EXP    = D_MAX_EXP - D_BIAS
+DBL_MIN_EXP    = 1 - D_BIAS
+DBL_MANT_DIG   = 53
+
+INEXACT_RESULT 		= 0x0001
+UNDERFLOW 		= 0x0002
+OVERFLOW 		= 0x0004
+DIVIDE_BY_ZERO 		= 0x0008
+INVALID_OPERATION 	= 0x0010
+
+DOUBLE_FLOAT = 2
+
+NOOP         = 0
+ADD          = 1
+MULTIPLY     = 2
+DIVIDE       = 3
+NEGATE       = 4
+COMPARE      = 5
+EXTENDSFDF   = 6
+TRUNCDFSF    = 7
+
+UNKNOWN           = -1
+ROUND_TO_NEAREST  = 0 | round result to nearest representable value
+ROUND_TO_ZERO     = 1 | round result towards zero
+ROUND_TO_PLUS     = 2 | round result towards plus infinity
+ROUND_TO_MINUS    = 3 | round result towards minus infinity
+
+| Entry points:
+
+	.globl SYM (__adddf3)
+	.globl SYM (__subdf3)
+	.globl SYM (__muldf3)
+	.globl SYM (__divdf3)
+	.globl SYM (__negdf2)
+	.globl SYM (__cmpdf2)
+	.globl SYM (__cmpdf2_internal)
+	.hidden SYM (__cmpdf2_internal)
+
+	.text
+	.even
+
+| These are common routines to return and signal exceptions.	
+
+Ld$den:
+| Return and signal a denormalized number
+	orl	d7,d0
+	movew	IMM (INEXACT_RESULT+UNDERFLOW),d7
+	moveq	IMM (DOUBLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Ld$infty:
+Ld$overflow:
+| Return a properly signed INFINITY and set the exception flags 
+	movel	IMM (0x7ff00000),d0
+	movel	IMM (0),d1
+	orl	d7,d0
+	movew	IMM (INEXACT_RESULT+OVERFLOW),d7
+	moveq	IMM (DOUBLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Ld$underflow:
+| Return 0 and set the exception flags 
+	movel	IMM (0),d0
+	movel	d0,d1
+	movew	IMM (INEXACT_RESULT+UNDERFLOW),d7
+	moveq	IMM (DOUBLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Ld$inop:
+| Return a quiet NaN and set the exception flags
+	movel	IMM (QUIET_NaN),d0
+	movel	d0,d1
+	movew	IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+	moveq	IMM (DOUBLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Ld$div$0:
+| Return a properly signed INFINITY and set the exception flags
+	movel	IMM (0x7ff00000),d0
+	movel	IMM (0),d1
+	orl	d7,d0
+	movew	IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7
+	moveq	IMM (DOUBLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+|=============================================================================
+|=============================================================================
+|                         double precision routines
+|=============================================================================
+|=============================================================================
+
+| A double precision floating point number (double) has the format:
+|
+| struct _double {
+|  unsigned int sign      : 1;  /* sign bit */ 
+|  unsigned int exponent  : 11; /* exponent, shifted by 126 */
+|  unsigned int fraction  : 52; /* fraction */
+| } double;
+| 
+| Thus sizeof(double) = 8 (64 bits). 
+|
+| All the routines are callable from C programs, and return the result 
+| in the register pair d0-d1. They also preserve all registers except 
+| d0-d1 and a0-a1.
+
+|=============================================================================
+|                              __subdf3
+|=============================================================================
+
+| double __subdf3(double, double);
+	FUNC(__subdf3)
+SYM (__subdf3):
+	bchg	IMM (31),sp@(12) | change sign of second operand
+				| and fall through, so we always add
+|=============================================================================
+|                              __adddf3
+|=============================================================================
+
+| double __adddf3(double, double);
+	FUNC(__adddf3)
+SYM (__adddf3):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)	| everything will be done in registers
+	moveml	d2-d7,sp@-	| save all data registers and a2 (but d0-d1)
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	movel	a6@(8),d0	| get first operand
+	movel	a6@(12),d1	| 
+	movel	a6@(16),d2	| get second operand
+	movel	a6@(20),d3	| 
+
+	movel	d0,d7		| get d0's sign bit in d7 '
+	addl	d1,d1		| check and clear sign bit of a, and gain one
+	addxl	d0,d0		| bit of extra precision
+	beq	Ladddf$b	| if zero return second operand
+
+	movel	d2,d6		| save sign in d6 
+	addl	d3,d3		| get rid of sign bit and gain one bit of
+	addxl	d2,d2		| extra precision
+	beq	Ladddf$a	| if zero return first operand
+
+	andl	IMM (0x80000000),d7 | isolate a's sign bit '
+        swap	d6		| and also b's sign bit '
+#ifndef __mcoldfire__
+	andw	IMM (0x8000),d6	|
+	orw	d6,d7		| and combine them into d7, so that a's sign '
+				| bit is in the high word and b's is in the '
+				| low word, so d6 is free to be used
+#else
+	andl	IMM (0x8000),d6
+	orl	d6,d7
+#endif
+	movel	d7,a0		| now save d7 into a0, so d7 is free to
+                		| be used also
+
+| Get the exponents and check for denormalized and/or infinity.
+
+	movel	IMM (0x001fffff),d6 | mask for the fraction
+	movel	IMM (0x00200000),d7 | mask to put hidden bit back
+
+	movel	d0,d4		| 
+	andl	d6,d0		| get fraction in d0
+	notl	d6		| make d6 into mask for the exponent
+	andl	d6,d4		| get exponent in d4
+	beq	Ladddf$a$den	| branch if a is denormalized
+	cmpl	d6,d4		| check for INFINITY or NaN
+	beq	Ladddf$nf       | 
+	orl	d7,d0		| and put hidden bit back
+Ladddf$1:
+	swap	d4		| shift right exponent so that it starts
+#ifndef __mcoldfire__
+	lsrw	IMM (5),d4	| in bit 0 and not bit 20
+#else
+	lsrl	IMM (5),d4	| in bit 0 and not bit 20
+#endif
+| Now we have a's exponent in d4 and fraction in d0-d1 '
+	movel	d2,d5		| save b to get exponent
+	andl	d6,d5		| get exponent in d5
+	beq	Ladddf$b$den	| branch if b is denormalized
+	cmpl	d6,d5		| check for INFINITY or NaN
+	beq	Ladddf$nf
+	notl	d6		| make d6 into mask for the fraction again
+	andl	d6,d2		| and get fraction in d2
+	orl	d7,d2		| and put hidden bit back
+Ladddf$2:
+	swap	d5		| shift right exponent so that it starts
+#ifndef __mcoldfire__
+	lsrw	IMM (5),d5	| in bit 0 and not bit 20
+#else
+	lsrl	IMM (5),d5	| in bit 0 and not bit 20
+#endif
+
+| Now we have b's exponent in d5 and fraction in d2-d3. '
+
+| The situation now is as follows: the signs are combined in a0, the 
+| numbers are in d0-d1 (a) and d2-d3 (b), and the exponents in d4 (a)
+| and d5 (b). To do the rounding correctly we need to keep all the
+| bits until the end, so we need to use d0-d1-d2-d3 for the first number
+| and d4-d5-d6-d7 for the second. To do this we store (temporarily) the
+| exponents in a2-a3.
+
+#ifndef __mcoldfire__
+	moveml	a2-a3,sp@-	| save the address registers
+#else
+	movel	a2,sp@-	
+	movel	a3,sp@-	
+	movel	a4,sp@-	
+#endif
+
+	movel	d4,a2		| save the exponents
+	movel	d5,a3		| 
+
+	movel	IMM (0),d7	| and move the numbers around
+	movel	d7,d6		|
+	movel	d3,d5		|
+	movel	d2,d4		|
+	movel	d7,d3		|
+	movel	d7,d2		|
+
+| Here we shift the numbers until the exponents are the same, and put 
+| the largest exponent in a2.
+#ifndef __mcoldfire__
+	exg	d4,a2		| get exponents back
+	exg	d5,a3		|
+	cmpw	d4,d5		| compare the exponents
+#else
+	movel	d4,a4		| get exponents back
+	movel	a2,d4
+	movel	a4,a2
+	movel	d5,a4
+	movel	a3,d5
+	movel	a4,a3
+	cmpl	d4,d5		| compare the exponents
+#endif
+	beq	Ladddf$3	| if equal don't shift '
+	bhi	9f		| branch if second exponent is higher
+
+| Here we have a's exponent larger than b's, so we have to shift b. We do 
+| this by using as counter d2:
+1:	movew	d4,d2		| move largest exponent to d2
+#ifndef __mcoldfire__
+	subw	d5,d2		| and subtract second exponent
+	exg	d4,a2		| get back the longs we saved
+	exg	d5,a3		|
+#else
+	subl	d5,d2		| and subtract second exponent
+	movel	d4,a4		| get back the longs we saved
+	movel	a2,d4
+	movel	a4,a2
+	movel	d5,a4
+	movel	a3,d5
+	movel	a4,a3
+#endif
+| if difference is too large we don't shift (actually, we can just exit) '
+#ifndef __mcoldfire__
+	cmpw	IMM (DBL_MANT_DIG+2),d2
+#else
+	cmpl	IMM (DBL_MANT_DIG+2),d2
+#endif
+	bge	Ladddf$b$small
+#ifndef __mcoldfire__
+	cmpw	IMM (32),d2	| if difference >= 32, shift by longs
+#else
+	cmpl	IMM (32),d2	| if difference >= 32, shift by longs
+#endif
+	bge	5f
+2:
+#ifndef __mcoldfire__
+	cmpw	IMM (16),d2	| if difference >= 16, shift by words	
+#else
+	cmpl	IMM (16),d2	| if difference >= 16, shift by words	
+#endif
+	bge	6f
+	bra	3f		| enter dbra loop
+
+4:
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d4
+	roxrl	IMM (1),d5
+	roxrl	IMM (1),d6
+	roxrl	IMM (1),d7
+#else
+	lsrl	IMM (1),d7
+	btst	IMM (0),d6
+	beq	10f
+	bset	IMM (31),d7
+10:	lsrl	IMM (1),d6
+	btst	IMM (0),d5
+	beq	11f
+	bset	IMM (31),d6
+11:	lsrl	IMM (1),d5
+	btst	IMM (0),d4
+	beq	12f
+	bset	IMM (31),d5
+12:	lsrl	IMM (1),d4
+#endif
+3:
+#ifndef __mcoldfire__
+	dbra	d2,4b
+#else
+	subql	IMM (1),d2
+	bpl	4b	
+#endif
+	movel	IMM (0),d2
+	movel	d2,d3	
+	bra	Ladddf$4
+5:
+	movel	d6,d7
+	movel	d5,d6
+	movel	d4,d5
+	movel	IMM (0),d4
+#ifndef __mcoldfire__
+	subw	IMM (32),d2
+#else
+	subl	IMM (32),d2
+#endif
+	bra	2b
+6:
+	movew	d6,d7
+	swap	d7
+	movew	d5,d6
+	swap	d6
+	movew	d4,d5
+	swap	d5
+	movew	IMM (0),d4
+	swap	d4
+#ifndef __mcoldfire__
+	subw	IMM (16),d2
+#else
+	subl	IMM (16),d2
+#endif
+	bra	3b
+	
+9:
+#ifndef __mcoldfire__
+	exg	d4,d5
+	movew	d4,d6
+	subw	d5,d6		| keep d5 (largest exponent) in d4
+	exg	d4,a2
+	exg	d5,a3
+#else
+	movel	d5,d6
+	movel	d4,d5
+	movel	d6,d4
+	subl	d5,d6
+	movel	d4,a4
+	movel	a2,d4
+	movel	a4,a2
+	movel	d5,a4
+	movel	a3,d5
+	movel	a4,a3
+#endif
+| if difference is too large we don't shift (actually, we can just exit) '
+#ifndef __mcoldfire__
+	cmpw	IMM (DBL_MANT_DIG+2),d6
+#else
+	cmpl	IMM (DBL_MANT_DIG+2),d6
+#endif
+	bge	Ladddf$a$small
+#ifndef __mcoldfire__
+	cmpw	IMM (32),d6	| if difference >= 32, shift by longs
+#else
+	cmpl	IMM (32),d6	| if difference >= 32, shift by longs
+#endif
+	bge	5f
+2:
+#ifndef __mcoldfire__
+	cmpw	IMM (16),d6	| if difference >= 16, shift by words	
+#else
+	cmpl	IMM (16),d6	| if difference >= 16, shift by words	
+#endif
+	bge	6f
+	bra	3f		| enter dbra loop
+
+4:
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+#else
+	lsrl	IMM (1),d3
+	btst	IMM (0),d2
+	beq	10f
+	bset	IMM (31),d3
+10:	lsrl	IMM (1),d2
+	btst	IMM (0),d1
+	beq	11f
+	bset	IMM (31),d2
+11:	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	12f
+	bset	IMM (31),d1
+12:	lsrl	IMM (1),d0
+#endif
+3:
+#ifndef __mcoldfire__
+	dbra	d6,4b
+#else
+	subql	IMM (1),d6
+	bpl	4b
+#endif
+	movel	IMM (0),d7
+	movel	d7,d6
+	bra	Ladddf$4
+5:
+	movel	d2,d3
+	movel	d1,d2
+	movel	d0,d1
+	movel	IMM (0),d0
+#ifndef __mcoldfire__
+	subw	IMM (32),d6
+#else
+	subl	IMM (32),d6
+#endif
+	bra	2b
+6:
+	movew	d2,d3
+	swap	d3
+	movew	d1,d2
+	swap	d2
+	movew	d0,d1
+	swap	d1
+	movew	IMM (0),d0
+	swap	d0
+#ifndef __mcoldfire__
+	subw	IMM (16),d6
+#else
+	subl	IMM (16),d6
+#endif
+	bra	3b
+Ladddf$3:
+#ifndef __mcoldfire__
+	exg	d4,a2	
+	exg	d5,a3
+#else
+	movel	d4,a4
+	movel	a2,d4
+	movel	a4,a2
+	movel	d5,a4
+	movel	a3,d5
+	movel	a4,a3
+#endif
+Ladddf$4:	
+| Now we have the numbers in d0--d3 and d4--d7, the exponent in a2, and
+| the signs in a4.
+
+| Here we have to decide whether to add or subtract the numbers:
+#ifndef __mcoldfire__
+	exg	d7,a0		| get the signs 
+	exg	d6,a3		| a3 is free to be used
+#else
+	movel	d7,a4
+	movel	a0,d7
+	movel	a4,a0
+	movel	d6,a4
+	movel	a3,d6
+	movel	a4,a3
+#endif
+	movel	d7,d6		|
+	movew	IMM (0),d7	| get a's sign in d7 '
+	swap	d6              |
+	movew	IMM (0),d6	| and b's sign in d6 '
+	eorl	d7,d6		| compare the signs
+	bmi	Lsubdf$0	| if the signs are different we have 
+				| to subtract
+#ifndef __mcoldfire__
+	exg	d7,a0		| else we add the numbers
+	exg	d6,a3		|
+#else
+	movel	d7,a4
+	movel	a0,d7
+	movel	a4,a0
+	movel	d6,a4
+	movel	a3,d6
+	movel	a4,a3
+#endif
+	addl	d7,d3		|
+	addxl	d6,d2		|
+	addxl	d5,d1		| 
+	addxl	d4,d0           |
+
+	movel	a2,d4		| return exponent to d4
+	movel	a0,d7		| 
+	andl	IMM (0x80000000),d7 | d7 now has the sign
+
+#ifndef __mcoldfire__
+	moveml	sp@+,a2-a3	
+#else
+	movel	sp@+,a4	
+	movel	sp@+,a3	
+	movel	sp@+,a2	
+#endif
+
+| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider
+| the case of denormalized numbers in the rounding routine itself).
+| As in the addition (not in the subtraction!) we could have set 
+| one more bit we check this:
+	btst	IMM (DBL_MANT_DIG+1),d0	
+	beq	1f
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+	addw	IMM (1),d4
+#else
+	lsrl	IMM (1),d3
+	btst	IMM (0),d2
+	beq	10f
+	bset	IMM (31),d3
+10:	lsrl	IMM (1),d2
+	btst	IMM (0),d1
+	beq	11f
+	bset	IMM (31),d2
+11:	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	12f
+	bset	IMM (31),d1
+12:	lsrl	IMM (1),d0
+	addl	IMM (1),d4
+#endif
+1:
+	lea	pc@(Ladddf$5),a0 | to return from rounding routine
+	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
+#ifdef __mcoldfire__
+	clrl	d6
+#endif
+	movew	a1@(6),d6	| rounding mode in d6
+	beq	Lround$to$nearest
+#ifndef __mcoldfire__
+	cmpw	IMM (ROUND_TO_PLUS),d6
+#else
+	cmpl	IMM (ROUND_TO_PLUS),d6
+#endif
+	bhi	Lround$to$minus
+	blt	Lround$to$zero
+	bra	Lround$to$plus
+Ladddf$5:
+| Put back the exponent and check for overflow
+#ifndef __mcoldfire__
+	cmpw	IMM (0x7ff),d4	| is the exponent big?
+#else
+	cmpl	IMM (0x7ff),d4	| is the exponent big?
+#endif
+	bge	1f
+	bclr	IMM (DBL_MANT_DIG-1),d0
+#ifndef __mcoldfire__
+	lslw	IMM (4),d4	| put exponent back into position
+#else
+	lsll	IMM (4),d4	| put exponent back into position
+#endif
+	swap	d0		| 
+#ifndef __mcoldfire__
+	orw	d4,d0		|
+#else
+	orl	d4,d0		|
+#endif
+	swap	d0		|
+	bra	Ladddf$ret
+1:
+	moveq	IMM (ADD),d5
+	bra	Ld$overflow
+
+Lsubdf$0:
+| Here we do the subtraction.
+#ifndef __mcoldfire__
+	exg	d7,a0		| put sign back in a0
+	exg	d6,a3		|
+#else
+	movel	d7,a4
+	movel	a0,d7
+	movel	a4,a0
+	movel	d6,a4
+	movel	a3,d6
+	movel	a4,a3
+#endif
+	subl	d7,d3		|
+	subxl	d6,d2		|
+	subxl	d5,d1		|
+	subxl	d4,d0		|
+	beq	Ladddf$ret$1	| if zero just exit
+	bpl	1f		| if positive skip the following
+	movel	a0,d7		|
+	bchg	IMM (31),d7	| change sign bit in d7
+	movel	d7,a0		|
+	negl	d3		|
+	negxl	d2		|
+	negxl	d1              | and negate result
+	negxl	d0              |
+1:	
+	movel	a2,d4		| return exponent to d4
+	movel	a0,d7
+	andl	IMM (0x80000000),d7 | isolate sign bit
+#ifndef __mcoldfire__
+	moveml	sp@+,a2-a3	|
+#else
+	movel	sp@+,a4
+	movel	sp@+,a3
+	movel	sp@+,a2
+#endif
+
+| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider
+| the case of denormalized numbers in the rounding routine itself).
+| As in the addition (not in the subtraction!) we could have set 
+| one more bit we check this:
+	btst	IMM (DBL_MANT_DIG+1),d0	
+	beq	1f
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+	addw	IMM (1),d4
+#else
+	lsrl	IMM (1),d3
+	btst	IMM (0),d2
+	beq	10f
+	bset	IMM (31),d3
+10:	lsrl	IMM (1),d2
+	btst	IMM (0),d1
+	beq	11f
+	bset	IMM (31),d2
+11:	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	12f
+	bset	IMM (31),d1
+12:	lsrl	IMM (1),d0
+	addl	IMM (1),d4
+#endif
+1:
+	lea	pc@(Lsubdf$1),a0 | to return from rounding routine
+	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
+#ifdef __mcoldfire__
+	clrl	d6
+#endif
+	movew	a1@(6),d6	| rounding mode in d6
+	beq	Lround$to$nearest
+#ifndef __mcoldfire__
+	cmpw	IMM (ROUND_TO_PLUS),d6
+#else
+	cmpl	IMM (ROUND_TO_PLUS),d6
+#endif
+	bhi	Lround$to$minus
+	blt	Lround$to$zero
+	bra	Lround$to$plus
+Lsubdf$1:
+| Put back the exponent and sign (we don't have overflow). '
+	bclr	IMM (DBL_MANT_DIG-1),d0	
+#ifndef __mcoldfire__
+	lslw	IMM (4),d4	| put exponent back into position
+#else
+	lsll	IMM (4),d4	| put exponent back into position
+#endif
+	swap	d0		| 
+#ifndef __mcoldfire__
+	orw	d4,d0		|
+#else
+	orl	d4,d0		|
+#endif
+	swap	d0		|
+	bra	Ladddf$ret
+
+| If one of the numbers was too small (difference of exponents >= 
+| DBL_MANT_DIG+1) we return the other (and now we don't have to '
+| check for finiteness or zero).
+Ladddf$a$small:
+#ifndef __mcoldfire__
+	moveml	sp@+,a2-a3	
+#else
+	movel	sp@+,a4
+	movel	sp@+,a3
+	movel	sp@+,a2
+#endif
+	movel	a6@(16),d0
+	movel	a6@(20),d1
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| restore data registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| and return
+	rts
+
+Ladddf$b$small:
+#ifndef __mcoldfire__
+	moveml	sp@+,a2-a3	
+#else
+	movel	sp@+,a4	
+	movel	sp@+,a3	
+	movel	sp@+,a2	
+#endif
+	movel	a6@(8),d0
+	movel	a6@(12),d1
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| restore data registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| and return
+	rts
+
+Ladddf$a$den:
+	movel	d7,d4		| d7 contains 0x00200000
+	bra	Ladddf$1
+
+Ladddf$b$den:
+	movel	d7,d5           | d7 contains 0x00200000
+	notl	d6
+	bra	Ladddf$2
+
+Ladddf$b:
+| Return b (if a is zero)
+	movel	d2,d0
+	movel	d3,d1
+	bne	1f			| Check if b is -0
+	cmpl	IMM (0x80000000),d0
+	bne	1f
+	andl	IMM (0x80000000),d7	| Use the sign of a
+	clrl	d0
+	bra	Ladddf$ret
+Ladddf$a:
+	movel	a6@(8),d0
+	movel	a6@(12),d1
+1:
+	moveq	IMM (ADD),d5
+| Check for NaN and +/-INFINITY.
+	movel	d0,d7         		|
+	andl	IMM (0x80000000),d7	|
+	bclr	IMM (31),d0		|
+	cmpl	IMM (0x7ff00000),d0	|
+	bge	2f			|
+	movel	d0,d0           	| check for zero, since we don't  '
+	bne	Ladddf$ret		| want to return -0 by mistake
+	bclr	IMM (31),d7		|
+	bra	Ladddf$ret		|
+2:
+	andl	IMM (0x000fffff),d0	| check for NaN (nonzero fraction)
+	orl	d1,d0			|
+	bne	Ld$inop         	|
+	bra	Ld$infty		|
+	
+Ladddf$ret$1:
+#ifndef __mcoldfire__
+	moveml	sp@+,a2-a3	| restore regs and exit
+#else
+	movel	sp@+,a4
+	movel	sp@+,a3
+	movel	sp@+,a2
+#endif
+
+Ladddf$ret:
+| Normal exit.
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+	orl	d7,d0		| put sign bit back
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+
+Ladddf$ret$den:
+| Return a denormalized number.
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0	| shift right once more
+	roxrl	IMM (1),d1	|
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+#endif
+	bra	Ladddf$ret
+
+Ladddf$nf:
+	moveq	IMM (ADD),d5
+| This could be faster but it is not worth the effort, since it is not
+| executed very often. We sacrifice speed for clarity here.
+	movel	a6@(8),d0	| get the numbers back (remember that we
+	movel	a6@(12),d1	| did some processing already)
+	movel	a6@(16),d2	| 
+	movel	a6@(20),d3	| 
+	movel	IMM (0x7ff00000),d4 | useful constant (INFINITY)
+	movel	d0,d7		| save sign bits
+	movel	d2,d6		| 
+	bclr	IMM (31),d0	| clear sign bits
+	bclr	IMM (31),d2	| 
+| We know that one of them is either NaN of +/-INFINITY
+| Check for NaN (if either one is NaN return NaN)
+	cmpl	d4,d0		| check first a (d0)
+	bhi	Ld$inop		| if d0 > 0x7ff00000 or equal and
+	bne	2f
+	tstl	d1		| d1 > 0, a is NaN
+	bne	Ld$inop		| 
+2:	cmpl	d4,d2		| check now b (d1)
+	bhi	Ld$inop		| 
+	bne	3f
+	tstl	d3		| 
+	bne	Ld$inop		| 
+3:
+| Now comes the check for +/-INFINITY. We know that both are (maybe not
+| finite) numbers, but we have to check if both are infinite whether we
+| are adding or subtracting them.
+	eorl	d7,d6		| to check sign bits
+	bmi	1f
+	andl	IMM (0x80000000),d7 | get (common) sign bit
+	bra	Ld$infty
+1:
+| We know one (or both) are infinite, so we test for equality between the
+| two numbers (if they are equal they have to be infinite both, so we
+| return NaN).
+	cmpl	d2,d0		| are both infinite?
+	bne	1f		| if d0 <> d2 they are not equal
+	cmpl	d3,d1		| if d0 == d2 test d3 and d1
+	beq	Ld$inop		| if equal return NaN
+1:	
+	andl	IMM (0x80000000),d7 | get a's sign bit '
+	cmpl	d4,d0		| test now for infinity
+	beq	Ld$infty	| if a is INFINITY return with this sign
+	bchg	IMM (31),d7	| else we know b is INFINITY and has
+	bra	Ld$infty	| the opposite sign
+
+|=============================================================================
+|                              __muldf3
+|=============================================================================
+
+| double __muldf3(double, double);
+	FUNC(__muldf3)
+SYM (__muldf3):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@-
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	movel	a6@(8),d0		| get a into d0-d1
+	movel	a6@(12),d1		| 
+	movel	a6@(16),d2		| and b into d2-d3
+	movel	a6@(20),d3		|
+	movel	d0,d7			| d7 will hold the sign of the product
+	eorl	d2,d7			|
+	andl	IMM (0x80000000),d7	|
+	movel	d7,a0			| save sign bit into a0 
+	movel	IMM (0x7ff00000),d7	| useful constant (+INFINITY)
+	movel	d7,d6			| another (mask for fraction)
+	notl	d6			|
+	bclr	IMM (31),d0		| get rid of a's sign bit '
+	movel	d0,d4			| 
+	orl	d1,d4			| 
+	beq	Lmuldf$a$0		| branch if a is zero
+	movel	d0,d4			|
+	bclr	IMM (31),d2		| get rid of b's sign bit '
+	movel	d2,d5			|
+	orl	d3,d5			| 
+	beq	Lmuldf$b$0		| branch if b is zero
+	movel	d2,d5			| 
+	cmpl	d7,d0			| is a big?
+	bhi	Lmuldf$inop		| if a is NaN return NaN
+	beq	Lmuldf$a$nf		| we still have to check d1 and b ...
+	cmpl	d7,d2			| now compare b with INFINITY
+	bhi	Lmuldf$inop		| is b NaN?
+	beq	Lmuldf$b$nf 		| we still have to check d3 ...
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d4 and d5.
+	andl	d7,d4			| isolate exponent in d4
+	beq	Lmuldf$a$den		| if exponent zero, have denormalized
+	andl	d6,d0			| isolate fraction
+	orl	IMM (0x00100000),d0	| and put hidden bit back
+	swap	d4			| I like exponents in the first byte
+#ifndef __mcoldfire__
+	lsrw	IMM (4),d4		| 
+#else
+	lsrl	IMM (4),d4		| 
+#endif
+Lmuldf$1:			
+	andl	d7,d5			|
+	beq	Lmuldf$b$den		|
+	andl	d6,d2			|
+	orl	IMM (0x00100000),d2	| and put hidden bit back
+	swap	d5			|
+#ifndef __mcoldfire__
+	lsrw	IMM (4),d5		|
+#else
+	lsrl	IMM (4),d5		|
+#endif
+Lmuldf$2:				|
+#ifndef __mcoldfire__
+	addw	d5,d4			| add exponents
+	subw	IMM (D_BIAS+1),d4	| and subtract bias (plus one)
+#else
+	addl	d5,d4			| add exponents
+	subl	IMM (D_BIAS+1),d4	| and subtract bias (plus one)
+#endif
+
+| We are now ready to do the multiplication. The situation is as follows:
+| both a and b have bit 52 ( bit 20 of d0 and d2) set (even if they were 
+| denormalized to start with!), which means that in the product bit 104 
+| (which will correspond to bit 8 of the fourth long) is set.
+
+| Here we have to do the product.
+| To do it we have to juggle the registers back and forth, as there are not
+| enough to keep everything in them. So we use the address registers to keep
+| some intermediate data.
+
+#ifndef __mcoldfire__
+	moveml	a2-a3,sp@-	| save a2 and a3 for temporary use
+#else
+	movel	a2,sp@-
+	movel	a3,sp@-
+	movel	a4,sp@-
+#endif
+	movel	IMM (0),a2	| a2 is a null register
+	movel	d4,a3		| and a3 will preserve the exponent
+
+| First, shift d2-d3 so bit 20 becomes bit 31:
+#ifndef __mcoldfire__
+	rorl	IMM (5),d2	| rotate d2 5 places right
+	swap	d2		| and swap it
+	rorl	IMM (5),d3	| do the same thing with d3
+	swap	d3		|
+	movew	d3,d6		| get the rightmost 11 bits of d3
+	andw	IMM (0x07ff),d6	|
+	orw	d6,d2		| and put them into d2
+	andw	IMM (0xf800),d3	| clear those bits in d3
+#else
+	moveq	IMM (11),d7	| left shift d2 11 bits
+	lsll	d7,d2
+	movel	d3,d6		| get a copy of d3
+	lsll	d7,d3		| left shift d3 11 bits
+	andl	IMM (0xffe00000),d6 | get the top 11 bits of d3
+	moveq	IMM (21),d7	| right shift them 21 bits
+	lsrl	d7,d6
+	orl	d6,d2		| stick them at the end of d2
+#endif
+
+	movel	d2,d6		| move b into d6-d7
+	movel	d3,d7           | move a into d4-d5
+	movel	d0,d4           | and clear d0-d1-d2-d3 (to put result)
+	movel	d1,d5           |
+	movel	IMM (0),d3	|
+	movel	d3,d2           |
+	movel	d3,d1           |
+	movel	d3,d0	        |
+
+| We use a1 as counter:	
+	movel	IMM (DBL_MANT_DIG-1),a1		
+#ifndef __mcoldfire__
+	exg	d7,a1
+#else
+	movel	d7,a4
+	movel	a1,d7
+	movel	a4,a1
+#endif
+
+1:
+#ifndef __mcoldfire__
+	exg	d7,a1		| put counter back in a1
+#else
+	movel	d7,a4
+	movel	a1,d7
+	movel	a4,a1
+#endif
+	addl	d3,d3		| shift sum once left
+	addxl	d2,d2           |
+	addxl	d1,d1           |
+	addxl	d0,d0           |
+	addl	d7,d7		|
+	addxl	d6,d6		|
+	bcc	2f		| if bit clear skip the following
+#ifndef __mcoldfire__
+	exg	d7,a2		|
+#else
+	movel	d7,a4
+	movel	a2,d7
+	movel	a4,a2
+#endif
+	addl	d5,d3		| else add a to the sum
+	addxl	d4,d2		|
+	addxl	d7,d1		|
+	addxl	d7,d0		|
+#ifndef __mcoldfire__
+	exg	d7,a2		| 
+#else
+	movel	d7,a4
+	movel	a2,d7
+	movel	a4,a2
+#endif
+2:
+#ifndef __mcoldfire__
+	exg	d7,a1		| put counter in d7
+	dbf	d7,1b		| decrement and branch
+#else
+	movel	d7,a4
+	movel	a1,d7
+	movel	a4,a1
+	subql	IMM (1),d7
+	bpl	1b
+#endif
+
+	movel	a3,d4		| restore exponent
+#ifndef __mcoldfire__
+	moveml	sp@+,a2-a3
+#else
+	movel	sp@+,a4
+	movel	sp@+,a3
+	movel	sp@+,a2
+#endif
+
+| Now we have the product in d0-d1-d2-d3, with bit 8 of d0 set. The 
+| first thing to do now is to normalize it so bit 8 becomes bit 
+| DBL_MANT_DIG-32 (to do the rounding); later we will shift right.
+	swap	d0
+	swap	d1
+	movew	d1,d0
+	swap	d2
+	movew	d2,d1
+	swap	d3
+	movew	d3,d2
+	movew	IMM (0),d3
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+#else
+	moveq	IMM (29),d6
+	lsrl	IMM (3),d3
+	movel	d2,d7
+	lsll	d6,d7
+	orl	d7,d3
+	lsrl	IMM (3),d2
+	movel	d1,d7
+	lsll	d6,d7
+	orl	d7,d2
+	lsrl	IMM (3),d1
+	movel	d0,d7
+	lsll	d6,d7
+	orl	d7,d1
+	lsrl	IMM (3),d0
+#endif
+	
+| Now round, check for over- and underflow, and exit.
+	movel	a0,d7		| get sign bit back into d7
+	moveq	IMM (MULTIPLY),d5
+
+	btst	IMM (DBL_MANT_DIG+1-32),d0
+	beq	Lround$exit
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	addw	IMM (1),d4
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+	addl	IMM (1),d4
+#endif
+	bra	Lround$exit
+
+Lmuldf$inop:
+	moveq	IMM (MULTIPLY),d5
+	bra	Ld$inop
+
+Lmuldf$b$nf:
+	moveq	IMM (MULTIPLY),d5
+	movel	a0,d7		| get sign bit back into d7
+	tstl	d3		| we know d2 == 0x7ff00000, so check d3
+	bne	Ld$inop		| if d3 <> 0 b is NaN
+	bra	Ld$overflow	| else we have overflow (since a is finite)
+
+Lmuldf$a$nf:
+	moveq	IMM (MULTIPLY),d5
+	movel	a0,d7		| get sign bit back into d7
+	tstl	d1		| we know d0 == 0x7ff00000, so check d1
+	bne	Ld$inop		| if d1 <> 0 a is NaN
+	bra	Ld$overflow	| else signal overflow
+
+| If either number is zero return zero, unless the other is +/-INFINITY or
+| NaN, in which case we return NaN.
+Lmuldf$b$0:
+	moveq	IMM (MULTIPLY),d5
+#ifndef __mcoldfire__
+	exg	d2,d0		| put b (==0) into d0-d1
+	exg	d3,d1		| and a (with sign bit cleared) into d2-d3
+	movel	a0,d0		| set result sign
+#else
+	movel	d0,d2		| put a into d2-d3
+	movel	d1,d3
+	movel	a0,d0		| put result zero into d0-d1
+	movq	IMM(0),d1
+#endif
+	bra	1f
+Lmuldf$a$0:
+	movel	a0,d0		| set result sign
+	movel	a6@(16),d2	| put b into d2-d3 again
+	movel	a6@(20),d3	|
+	bclr	IMM (31),d2	| clear sign bit
+1:	cmpl	IMM (0x7ff00000),d2 | check for non-finiteness
+	bge	Ld$inop		| in case NaN or +/-INFINITY return NaN
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+
+| If a number is denormalized we put an exponent of 1 but do not put the 
+| hidden bit back into the fraction; instead we shift left until bit 21
+| (the hidden bit) is set, adjusting the exponent accordingly. We do this
+| to ensure that the product of the fractions is close to 1.
+Lmuldf$a$den:
+	movel	IMM (1),d4
+	andl	d6,d0
+1:	addl	d1,d1           | shift a left until bit 20 is set
+	addxl	d0,d0		|
+#ifndef __mcoldfire__
+	subw	IMM (1),d4	| and adjust exponent
+#else
+	subl	IMM (1),d4	| and adjust exponent
+#endif
+	btst	IMM (20),d0	|
+	bne	Lmuldf$1        |
+	bra	1b
+
+Lmuldf$b$den:
+	movel	IMM (1),d5
+	andl	d6,d2
+1:	addl	d3,d3		| shift b left until bit 20 is set
+	addxl	d2,d2		|
+#ifndef __mcoldfire__
+	subw	IMM (1),d5	| and adjust exponent
+#else
+	subql	IMM (1),d5	| and adjust exponent
+#endif
+	btst	IMM (20),d2	|
+	bne	Lmuldf$2	|
+	bra	1b
+
+
+|=============================================================================
+|                              __divdf3
+|=============================================================================
+
+| double __divdf3(double, double);
+	FUNC(__divdf3)
+SYM (__divdf3):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@-
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	movel	a6@(8),d0	| get a into d0-d1
+	movel	a6@(12),d1	| 
+	movel	a6@(16),d2	| and b into d2-d3
+	movel	a6@(20),d3	|
+	movel	d0,d7		| d7 will hold the sign of the result
+	eorl	d2,d7		|
+	andl	IMM (0x80000000),d7
+	movel	d7,a0		| save sign into a0
+	movel	IMM (0x7ff00000),d7 | useful constant (+INFINITY)
+	movel	d7,d6		| another (mask for fraction)
+	notl	d6		|
+	bclr	IMM (31),d0	| get rid of a's sign bit '
+	movel	d0,d4		|
+	orl	d1,d4		|
+	beq	Ldivdf$a$0	| branch if a is zero
+	movel	d0,d4		|
+	bclr	IMM (31),d2	| get rid of b's sign bit '
+	movel	d2,d5		|
+	orl	d3,d5		|
+	beq	Ldivdf$b$0	| branch if b is zero
+	movel	d2,d5
+	cmpl	d7,d0		| is a big?
+	bhi	Ldivdf$inop	| if a is NaN return NaN
+	beq	Ldivdf$a$nf	| if d0 == 0x7ff00000 we check d1
+	cmpl	d7,d2		| now compare b with INFINITY 
+	bhi	Ldivdf$inop	| if b is NaN return NaN
+	beq	Ldivdf$b$nf	| if d2 == 0x7ff00000 we check d3
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d4 and d5 and normalize the numbers to
+| ensure that the ratio of the fractions is around 1. We do this by
+| making sure that both numbers have bit #DBL_MANT_DIG-32-1 (hidden bit)
+| set, even if they were denormalized to start with.
+| Thus, the result will satisfy: 2 > result > 1/2.
+	andl	d7,d4		| and isolate exponent in d4
+	beq	Ldivdf$a$den	| if exponent is zero we have a denormalized
+	andl	d6,d0		| and isolate fraction
+	orl	IMM (0x00100000),d0 | and put hidden bit back
+	swap	d4		| I like exponents in the first byte
+#ifndef __mcoldfire__
+	lsrw	IMM (4),d4	| 
+#else
+	lsrl	IMM (4),d4	| 
+#endif
+Ldivdf$1:			| 
+	andl	d7,d5		|
+	beq	Ldivdf$b$den	|
+	andl	d6,d2		|
+	orl	IMM (0x00100000),d2
+	swap	d5		|
+#ifndef __mcoldfire__
+	lsrw	IMM (4),d5	|
+#else
+	lsrl	IMM (4),d5	|
+#endif
+Ldivdf$2:			|
+#ifndef __mcoldfire__
+	subw	d5,d4		| subtract exponents
+	addw	IMM (D_BIAS),d4	| and add bias
+#else
+	subl	d5,d4		| subtract exponents
+	addl	IMM (D_BIAS),d4	| and add bias
+#endif
+
+| We are now ready to do the division. We have prepared things in such a way
+| that the ratio of the fractions will be less than 2 but greater than 1/2.
+| At this point the registers in use are:
+| d0-d1	hold a (first operand, bit DBL_MANT_DIG-32=0, bit 
+| DBL_MANT_DIG-1-32=1)
+| d2-d3	hold b (second operand, bit DBL_MANT_DIG-32=1)
+| d4	holds the difference of the exponents, corrected by the bias
+| a0	holds the sign of the ratio
+
+| To do the rounding correctly we need to keep information about the
+| nonsignificant bits. One way to do this would be to do the division
+| using four registers; another is to use two registers (as originally
+| I did), but use a sticky bit to preserve information about the 
+| fractional part. Note that we can keep that info in a1, which is not
+| used.
+	movel	IMM (0),d6	| d6-d7 will hold the result
+	movel	d6,d7		| 
+	movel	IMM (0),a1	| and a1 will hold the sticky bit
+
+	movel	IMM (DBL_MANT_DIG-32+1),d5	
+	
+1:	cmpl	d0,d2		| is a < b?
+	bhi	3f		| if b > a skip the following
+	beq	4f		| if d0==d2 check d1 and d3
+2:	subl	d3,d1		| 
+	subxl	d2,d0		| a <-- a - b
+	bset	d5,d6		| set the corresponding bit in d6
+3:	addl	d1,d1		| shift a by 1
+	addxl	d0,d0		|
+#ifndef __mcoldfire__
+	dbra	d5,1b		| and branch back
+#else
+	subql	IMM (1), d5
+	bpl	1b
+#endif
+	bra	5f			
+4:	cmpl	d1,d3		| here d0==d2, so check d1 and d3
+	bhi	3b		| if d1 > d2 skip the subtraction
+	bra	2b		| else go do it
+5:
+| Here we have to start setting the bits in the second long.
+	movel	IMM (31),d5	| again d5 is counter
+
+1:	cmpl	d0,d2		| is a < b?
+	bhi	3f		| if b > a skip the following
+	beq	4f		| if d0==d2 check d1 and d3
+2:	subl	d3,d1		| 
+	subxl	d2,d0		| a <-- a - b
+	bset	d5,d7		| set the corresponding bit in d7
+3:	addl	d1,d1		| shift a by 1
+	addxl	d0,d0		|
+#ifndef __mcoldfire__
+	dbra	d5,1b		| and branch back
+#else
+	subql	IMM (1), d5
+	bpl	1b
+#endif
+	bra	5f			
+4:	cmpl	d1,d3		| here d0==d2, so check d1 and d3
+	bhi	3b		| if d1 > d2 skip the subtraction
+	bra	2b		| else go do it
+5:
+| Now go ahead checking until we hit a one, which we store in d2.
+	movel	IMM (DBL_MANT_DIG),d5
+1:	cmpl	d2,d0		| is a < b?
+	bhi	4f		| if b < a, exit
+	beq	3f		| if d0==d2 check d1 and d3
+2:	addl	d1,d1		| shift a by 1
+	addxl	d0,d0		|
+#ifndef __mcoldfire__
+	dbra	d5,1b		| and branch back
+#else
+	subql	IMM (1), d5
+	bpl	1b
+#endif
+	movel	IMM (0),d2	| here no sticky bit was found
+	movel	d2,d3
+	bra	5f			
+3:	cmpl	d1,d3		| here d0==d2, so check d1 and d3
+	bhi	2b		| if d1 > d2 go back
+4:
+| Here put the sticky bit in d2-d3 (in the position which actually corresponds
+| to it; if you don't do this the algorithm loses in some cases). '
+	movel	IMM (0),d2
+	movel	d2,d3
+#ifndef __mcoldfire__
+	subw	IMM (DBL_MANT_DIG),d5
+	addw	IMM (63),d5
+	cmpw	IMM (31),d5
+#else
+	subl	IMM (DBL_MANT_DIG),d5
+	addl	IMM (63),d5
+	cmpl	IMM (31),d5
+#endif
+	bhi	2f
+1:	bset	d5,d3
+	bra	5f
+#ifndef __mcoldfire__
+	subw	IMM (32),d5
+#else
+	subl	IMM (32),d5
+#endif
+2:	bset	d5,d2
+5:
+| Finally we are finished! Move the longs in the address registers to
+| their final destination:
+	movel	d6,d0
+	movel	d7,d1
+	movel	IMM (0),d3
+
+| Here we have finished the division, with the result in d0-d1-d2-d3, with
+| 2^21 <= d6 < 2^23. Thus bit 23 is not set, but bit 22 could be set.
+| If it is not, then definitely bit 21 is set. Normalize so bit 22 is
+| not set:
+	btst	IMM (DBL_MANT_DIG-32+1),d0
+	beq	1f
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+	addw	IMM (1),d4
+#else
+	lsrl	IMM (1),d3
+	btst	IMM (0),d2
+	beq	10f
+	bset	IMM (31),d3
+10:	lsrl	IMM (1),d2
+	btst	IMM (0),d1
+	beq	11f
+	bset	IMM (31),d2
+11:	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	12f
+	bset	IMM (31),d1
+12:	lsrl	IMM (1),d0
+	addl	IMM (1),d4
+#endif
+1:
+| Now round, check for over- and underflow, and exit.
+	movel	a0,d7		| restore sign bit to d7
+	moveq	IMM (DIVIDE),d5
+	bra	Lround$exit
+
+Ldivdf$inop:
+	moveq	IMM (DIVIDE),d5
+	bra	Ld$inop
+
+Ldivdf$a$0:
+| If a is zero check to see whether b is zero also. In that case return
+| NaN; then check if b is NaN, and return NaN also in that case. Else
+| return a properly signed zero.
+	moveq	IMM (DIVIDE),d5
+	bclr	IMM (31),d2	|
+	movel	d2,d4		| 
+	orl	d3,d4		| 
+	beq	Ld$inop		| if b is also zero return NaN
+	cmpl	IMM (0x7ff00000),d2 | check for NaN
+	bhi	Ld$inop		| 
+	blt	1f		|
+	tstl	d3		|
+	bne	Ld$inop		|
+1:	movel	a0,d0		| else return signed zero
+	moveq	IMM(0),d1	| 
+	PICLEA	SYM (_fpCCR),a0	| clear exception flags
+	movew	IMM (0),a0@	|
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| 
+#else
+	moveml	sp@,d2-d7	| 
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| 
+	rts			| 	
+
+Ldivdf$b$0:
+	moveq	IMM (DIVIDE),d5
+| If we got here a is not zero. Check if a is NaN; in that case return NaN,
+| else return +/-INFINITY. Remember that a is in d0 with the sign bit 
+| cleared already.
+	movel	a0,d7		| put a's sign bit back in d7 '
+	cmpl	IMM (0x7ff00000),d0 | compare d0 with INFINITY
+	bhi	Ld$inop		| if larger it is NaN
+	tstl	d1		| 
+	bne	Ld$inop		| 
+	bra	Ld$div$0	| else signal DIVIDE_BY_ZERO
+
+Ldivdf$b$nf:
+	moveq	IMM (DIVIDE),d5
+| If d2 == 0x7ff00000 we have to check d3.
+	tstl	d3		|
+	bne	Ld$inop		| if d3 <> 0, b is NaN
+	bra	Ld$underflow	| else b is +/-INFINITY, so signal underflow
+
+Ldivdf$a$nf:
+	moveq	IMM (DIVIDE),d5
+| If d0 == 0x7ff00000 we have to check d1.
+	tstl	d1		|
+	bne	Ld$inop		| if d1 <> 0, a is NaN
+| If a is INFINITY we have to check b
+	cmpl	d7,d2		| compare b with INFINITY 
+	bge	Ld$inop		| if b is NaN or INFINITY return NaN
+	tstl	d3		|
+	bne	Ld$inop		| 
+	bra	Ld$overflow	| else return overflow
+
+| If a number is denormalized we put an exponent of 1 but do not put the 
+| bit back into the fraction.
+Ldivdf$a$den:
+	movel	IMM (1),d4
+	andl	d6,d0
+1:	addl	d1,d1		| shift a left until bit 20 is set
+	addxl	d0,d0
+#ifndef __mcoldfire__
+	subw	IMM (1),d4	| and adjust exponent
+#else
+	subl	IMM (1),d4	| and adjust exponent
+#endif
+	btst	IMM (DBL_MANT_DIG-32-1),d0
+	bne	Ldivdf$1
+	bra	1b
+
+Ldivdf$b$den:
+	movel	IMM (1),d5
+	andl	d6,d2
+1:	addl	d3,d3		| shift b left until bit 20 is set
+	addxl	d2,d2
+#ifndef __mcoldfire__
+	subw	IMM (1),d5	| and adjust exponent
+#else
+	subql	IMM (1),d5	| and adjust exponent
+#endif
+	btst	IMM (DBL_MANT_DIG-32-1),d2
+	bne	Ldivdf$2
+	bra	1b
+
+Lround$exit:
+| This is a common exit point for __muldf3 and __divdf3. When they enter
+| this point the sign of the result is in d7, the result in d0-d1, normalized
+| so that 2^21 <= d0 < 2^22, and the exponent is in the lower byte of d4.
+
+| First check for underlow in the exponent:
+#ifndef __mcoldfire__
+	cmpw	IMM (-DBL_MANT_DIG-1),d4		
+#else
+	cmpl	IMM (-DBL_MANT_DIG-1),d4		
+#endif
+	blt	Ld$underflow	
+| It could happen that the exponent is less than 1, in which case the 
+| number is denormalized. In this case we shift right and adjust the 
+| exponent until it becomes 1 or the fraction is zero (in the latter case 
+| we signal underflow and return zero).
+	movel	d7,a0		|
+	movel	IMM (0),d6	| use d6-d7 to collect bits flushed right
+	movel	d6,d7		| use d6-d7 to collect bits flushed right
+#ifndef __mcoldfire__
+	cmpw	IMM (1),d4	| if the exponent is less than 1 we 
+#else
+	cmpl	IMM (1),d4	| if the exponent is less than 1 we 
+#endif
+	bge	2f		| have to shift right (denormalize)
+1:
+#ifndef __mcoldfire__
+	addw	IMM (1),d4	| adjust the exponent
+	lsrl	IMM (1),d0	| shift right once 
+	roxrl	IMM (1),d1	|
+	roxrl	IMM (1),d2	|
+	roxrl	IMM (1),d3	|
+	roxrl	IMM (1),d6	| 
+	roxrl	IMM (1),d7	|
+	cmpw	IMM (1),d4	| is the exponent 1 already?
+#else
+	addl	IMM (1),d4	| adjust the exponent
+	lsrl	IMM (1),d7
+	btst	IMM (0),d6
+	beq	13f
+	bset	IMM (31),d7
+13:	lsrl	IMM (1),d6
+	btst	IMM (0),d3
+	beq	14f
+	bset	IMM (31),d6
+14:	lsrl	IMM (1),d3
+	btst	IMM (0),d2
+	beq	10f
+	bset	IMM (31),d3
+10:	lsrl	IMM (1),d2
+	btst	IMM (0),d1
+	beq	11f
+	bset	IMM (31),d2
+11:	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	12f
+	bset	IMM (31),d1
+12:	lsrl	IMM (1),d0
+	cmpl	IMM (1),d4	| is the exponent 1 already?
+#endif
+	beq	2f		| if not loop back
+	bra	1b              |
+	bra	Ld$underflow	| safety check, shouldn't execute '
+2:	orl	d6,d2		| this is a trick so we don't lose  '
+	orl	d7,d3		| the bits which were flushed right
+	movel	a0,d7		| get back sign bit into d7
+| Now call the rounding routine (which takes care of denormalized numbers):
+	lea	pc@(Lround$0),a0 | to return from rounding routine
+	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
+#ifdef __mcoldfire__
+	clrl	d6
+#endif
+	movew	a1@(6),d6	| rounding mode in d6
+	beq	Lround$to$nearest
+#ifndef __mcoldfire__
+	cmpw	IMM (ROUND_TO_PLUS),d6
+#else
+	cmpl	IMM (ROUND_TO_PLUS),d6
+#endif
+	bhi	Lround$to$minus
+	blt	Lround$to$zero
+	bra	Lround$to$plus
+Lround$0:
+| Here we have a correctly rounded result (either normalized or denormalized).
+
+| Here we should have either a normalized number or a denormalized one, and
+| the exponent is necessarily larger or equal to 1 (so we don't have to  '
+| check again for underflow!). We have to check for overflow or for a 
+| denormalized number (which also signals underflow).
+| Check for overflow (i.e., exponent >= 0x7ff).
+#ifndef __mcoldfire__
+	cmpw	IMM (0x07ff),d4
+#else
+	cmpl	IMM (0x07ff),d4
+#endif
+	bge	Ld$overflow
+| Now check for a denormalized number (exponent==0):
+	movew	d4,d4
+	beq	Ld$den
+1:
+| Put back the exponents and sign and return.
+#ifndef __mcoldfire__
+	lslw	IMM (4),d4	| exponent back to fourth byte
+#else
+	lsll	IMM (4),d4	| exponent back to fourth byte
+#endif
+	bclr	IMM (DBL_MANT_DIG-32-1),d0
+	swap	d0		| and put back exponent
+#ifndef __mcoldfire__
+	orw	d4,d0		| 
+#else
+	orl	d4,d0		| 
+#endif
+	swap	d0		|
+	orl	d7,d0		| and sign also
+
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+
+|=============================================================================
+|                              __negdf2
+|=============================================================================
+
+| double __negdf2(double, double);
+	FUNC(__negdf2)
+SYM (__negdf2):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@-
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	moveq	IMM (NEGATE),d5
+	movel	a6@(8),d0	| get number to negate in d0-d1
+	movel	a6@(12),d1	|
+	bchg	IMM (31),d0	| negate
+	movel	d0,d2		| make a positive copy (for the tests)
+	bclr	IMM (31),d2	|
+	movel	d2,d4		| check for zero
+	orl	d1,d4		|
+	beq	2f		| if zero (either sign) return +zero
+	cmpl	IMM (0x7ff00000),d2 | compare to +INFINITY
+	blt	1f		| if finite, return
+	bhi	Ld$inop		| if larger (fraction not zero) is NaN
+	tstl	d1		| if d2 == 0x7ff00000 check d1
+	bne	Ld$inop		|
+	movel	d0,d7		| else get sign and return INFINITY
+	andl	IMM (0x80000000),d7
+	bra	Ld$infty		
+1:	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+2:	bclr	IMM (31),d0
+	bra	1b
+
+|=============================================================================
+|                              __cmpdf2
+|=============================================================================
+
+GREATER =  1
+LESS    = -1
+EQUAL   =  0
+
+| int __cmpdf2_internal(double, double, int);
+SYM (__cmpdf2_internal):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@- 	| save registers
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	moveq	IMM (COMPARE),d5
+	movel	a6@(8),d0	| get first operand
+	movel	a6@(12),d1	|
+	movel	a6@(16),d2	| get second operand
+	movel	a6@(20),d3	|
+| First check if a and/or b are (+/-) zero and in that case clear
+| the sign bit.
+	movel	d0,d6		| copy signs into d6 (a) and d7(b)
+	bclr	IMM (31),d0	| and clear signs in d0 and d2
+	movel	d2,d7		|
+	bclr	IMM (31),d2	|
+	cmpl	IMM (0x7ff00000),d0 | check for a == NaN
+	bhi	Lcmpd$inop		| if d0 > 0x7ff00000, a is NaN
+	beq	Lcmpdf$a$nf	| if equal can be INFINITY, so check d1
+	movel	d0,d4		| copy into d4 to test for zero
+	orl	d1,d4		|
+	beq	Lcmpdf$a$0	|
+Lcmpdf$0:
+	cmpl	IMM (0x7ff00000),d2 | check for b == NaN
+	bhi	Lcmpd$inop		| if d2 > 0x7ff00000, b is NaN
+	beq	Lcmpdf$b$nf	| if equal can be INFINITY, so check d3
+	movel	d2,d4		|
+	orl	d3,d4		|
+	beq	Lcmpdf$b$0	|
+Lcmpdf$1:
+| Check the signs
+	eorl	d6,d7
+	bpl	1f
+| If the signs are not equal check if a >= 0
+	tstl	d6
+	bpl	Lcmpdf$a$gt$b	| if (a >= 0 && b < 0) => a > b
+	bmi	Lcmpdf$b$gt$a	| if (a < 0 && b >= 0) => a < b
+1:
+| If the signs are equal check for < 0
+	tstl	d6
+	bpl	1f
+| If both are negative exchange them
+#ifndef __mcoldfire__
+	exg	d0,d2
+	exg	d1,d3
+#else
+	movel	d0,d7
+	movel	d2,d0
+	movel	d7,d2
+	movel	d1,d7
+	movel	d3,d1
+	movel	d7,d3
+#endif
+1:
+| Now that they are positive we just compare them as longs (does this also
+| work for denormalized numbers?).
+	cmpl	d0,d2
+	bhi	Lcmpdf$b$gt$a	| |b| > |a|
+	bne	Lcmpdf$a$gt$b	| |b| < |a|
+| If we got here d0 == d2, so we compare d1 and d3.
+	cmpl	d1,d3
+	bhi	Lcmpdf$b$gt$a	| |b| > |a|
+	bne	Lcmpdf$a$gt$b	| |b| < |a|
+| If we got here a == b.
+	movel	IMM (EQUAL),d0
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7 	| put back the registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+Lcmpdf$a$gt$b:
+	movel	IMM (GREATER),d0
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7 	| put back the registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+Lcmpdf$b$gt$a:
+	movel	IMM (LESS),d0
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7 	| put back the registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+
+Lcmpdf$a$0:	
+	bclr	IMM (31),d6
+	bra	Lcmpdf$0
+Lcmpdf$b$0:
+	bclr	IMM (31),d7
+	bra	Lcmpdf$1
+
+Lcmpdf$a$nf:
+	tstl	d1
+	bne	Ld$inop
+	bra	Lcmpdf$0
+
+Lcmpdf$b$nf:
+	tstl	d3
+	bne	Ld$inop
+	bra	Lcmpdf$1
+
+Lcmpd$inop:
+	movl	a6@(24),d0
+	moveq	IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+	moveq	IMM (DOUBLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+| int __cmpdf2(double, double);
+	FUNC(__cmpdf2)
+SYM (__cmpdf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+
+|=============================================================================
+|                           rounding routines
+|=============================================================================
+
+| The rounding routines expect the number to be normalized in registers
+| d0-d1-d2-d3, with the exponent in register d4. They assume that the 
+| exponent is larger or equal to 1. They return a properly normalized number
+| if possible, and a denormalized number otherwise. The exponent is returned
+| in d4.
+
+Lround$to$nearest:
+| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"):
+| Here we assume that the exponent is not too small (this should be checked
+| before entering the rounding routine), but the number could be denormalized.
+
+| Check for denormalized numbers:
+1:	btst	IMM (DBL_MANT_DIG-32),d0
+	bne	2f		| if set the number is normalized
+| Normalize shifting left until bit #DBL_MANT_DIG-32 is set or the exponent 
+| is one (remember that a denormalized number corresponds to an 
+| exponent of -D_BIAS+1).
+#ifndef __mcoldfire__
+	cmpw	IMM (1),d4	| remember that the exponent is at least one
+#else
+	cmpl	IMM (1),d4	| remember that the exponent is at least one
+#endif
+ 	beq	2f		| an exponent of one means denormalized
+	addl	d3,d3		| else shift and adjust the exponent
+	addxl	d2,d2		|
+	addxl	d1,d1		|
+	addxl	d0,d0		|
+#ifndef __mcoldfire__
+	dbra	d4,1b		|
+#else
+	subql	IMM (1), d4
+	bpl	1b
+#endif
+2:
+| Now round: we do it as follows: after the shifting we can write the
+| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2.
+| If delta < 1, do nothing. If delta > 1, add 1 to f. 
+| If delta == 1, we make sure the rounded number will be even (odd?) 
+| (after shifting).
+	btst	IMM (0),d1	| is delta < 1?
+	beq	2f		| if so, do not do anything
+	orl	d2,d3		| is delta == 1?
+	bne	1f		| if so round to even
+	movel	d1,d3		| 
+	andl	IMM (2),d3	| bit 1 is the last significant bit
+	movel	IMM (0),d2	|
+	addl	d3,d1		|
+	addxl	d2,d0		|
+	bra	2f		| 
+1:	movel	IMM (1),d3	| else add 1 
+	movel	IMM (0),d2	|
+	addl	d3,d1		|
+	addxl	d2,d0
+| Shift right once (because we used bit #DBL_MANT_DIG-32!).
+2:
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1		
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+#endif
+
+| Now check again bit #DBL_MANT_DIG-32 (rounding could have produced a
+| 'fraction overflow' ...).
+	btst	IMM (DBL_MANT_DIG-32),d0	
+	beq	1f
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	addw	IMM (1),d4
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+	addl	IMM (1),d4
+#endif
+1:
+| If bit #DBL_MANT_DIG-32-1 is clear we have a denormalized number, so we 
+| have to put the exponent to zero and return a denormalized number.
+	btst	IMM (DBL_MANT_DIG-32-1),d0
+	beq	1f
+	jmp	a0@
+1:	movel	IMM (0),d4
+	jmp	a0@
+
+Lround$to$zero:
+Lround$to$plus:
+Lround$to$minus:
+	jmp	a0@
+#endif /* L_double */
+
+#ifdef  L_float
+
+	.globl	SYM (_fpCCR)
+	.globl  $_exception_handler
+
+QUIET_NaN    = 0xffffffff
+SIGNL_NaN    = 0x7f800001
+INFINITY     = 0x7f800000
+
+F_MAX_EXP      = 0xff
+F_BIAS         = 126
+FLT_MAX_EXP    = F_MAX_EXP - F_BIAS
+FLT_MIN_EXP    = 1 - F_BIAS
+FLT_MANT_DIG   = 24
+
+INEXACT_RESULT 		= 0x0001
+UNDERFLOW 		= 0x0002
+OVERFLOW 		= 0x0004
+DIVIDE_BY_ZERO 		= 0x0008
+INVALID_OPERATION 	= 0x0010
+
+SINGLE_FLOAT = 1
+
+NOOP         = 0
+ADD          = 1
+MULTIPLY     = 2
+DIVIDE       = 3
+NEGATE       = 4
+COMPARE      = 5
+EXTENDSFDF   = 6
+TRUNCDFSF    = 7
+
+UNKNOWN           = -1
+ROUND_TO_NEAREST  = 0 | round result to nearest representable value
+ROUND_TO_ZERO     = 1 | round result towards zero
+ROUND_TO_PLUS     = 2 | round result towards plus infinity
+ROUND_TO_MINUS    = 3 | round result towards minus infinity
+
+| Entry points:
+
+	.globl SYM (__addsf3)
+	.globl SYM (__subsf3)
+	.globl SYM (__mulsf3)
+	.globl SYM (__divsf3)
+	.globl SYM (__negsf2)
+	.globl SYM (__cmpsf2)
+	.globl SYM (__cmpsf2_internal)
+	.hidden SYM (__cmpsf2_internal)
+
+| These are common routines to return and signal exceptions.	
+
+	.text
+	.even
+
+Lf$den:
+| Return and signal a denormalized number
+	orl	d7,d0
+	moveq	IMM (INEXACT_RESULT+UNDERFLOW),d7
+	moveq	IMM (SINGLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Lf$infty:
+Lf$overflow:
+| Return a properly signed INFINITY and set the exception flags 
+	movel	IMM (INFINITY),d0
+	orl	d7,d0
+	moveq	IMM (INEXACT_RESULT+OVERFLOW),d7
+	moveq	IMM (SINGLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Lf$underflow:
+| Return 0 and set the exception flags 
+	moveq	IMM (0),d0
+	moveq	IMM (INEXACT_RESULT+UNDERFLOW),d7
+	moveq	IMM (SINGLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Lf$inop:
+| Return a quiet NaN and set the exception flags
+	movel	IMM (QUIET_NaN),d0
+	moveq	IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+	moveq	IMM (SINGLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Lf$div$0:
+| Return a properly signed INFINITY and set the exception flags
+	movel	IMM (INFINITY),d0
+	orl	d7,d0
+	moveq	IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7
+	moveq	IMM (SINGLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+|=============================================================================
+|=============================================================================
+|                         single precision routines
+|=============================================================================
+|=============================================================================
+
+| A single precision floating point number (float) has the format:
+|
+| struct _float {
+|  unsigned int sign      : 1;  /* sign bit */ 
+|  unsigned int exponent  : 8;  /* exponent, shifted by 126 */
+|  unsigned int fraction  : 23; /* fraction */
+| } float;
+| 
+| Thus sizeof(float) = 4 (32 bits). 
+|
+| All the routines are callable from C programs, and return the result 
+| in the single register d0. They also preserve all registers except 
+| d0-d1 and a0-a1.
+
+|=============================================================================
+|                              __subsf3
+|=============================================================================
+
+| float __subsf3(float, float);
+	FUNC(__subsf3)
+SYM (__subsf3):
+	bchg	IMM (31),sp@(8)	| change sign of second operand
+				| and fall through
+|=============================================================================
+|                              __addsf3
+|=============================================================================
+
+| float __addsf3(float, float);
+	FUNC(__addsf3)
+SYM (__addsf3):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)	| everything will be done in registers
+	moveml	d2-d7,sp@-	| save all data registers but d0-d1
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	movel	a6@(8),d0	| get first operand
+	movel	a6@(12),d1	| get second operand
+	movel	d0,a0		| get d0's sign bit '
+	addl	d0,d0		| check and clear sign bit of a
+	beq	Laddsf$b	| if zero return second operand
+	movel	d1,a1		| save b's sign bit '
+	addl	d1,d1		| get rid of sign bit
+	beq	Laddsf$a	| if zero return first operand
+
+| Get the exponents and check for denormalized and/or infinity.
+
+	movel	IMM (0x00ffffff),d4	| mask to get fraction
+	movel	IMM (0x01000000),d5	| mask to put hidden bit back
+
+	movel	d0,d6		| save a to get exponent
+	andl	d4,d0		| get fraction in d0
+	notl 	d4		| make d4 into a mask for the exponent
+	andl	d4,d6		| get exponent in d6
+	beq	Laddsf$a$den	| branch if a is denormalized
+	cmpl	d4,d6		| check for INFINITY or NaN
+	beq	Laddsf$nf
+	swap	d6		| put exponent into first word
+	orl	d5,d0		| and put hidden bit back
+Laddsf$1:
+| Now we have a's exponent in d6 (second byte) and the mantissa in d0. '
+	movel	d1,d7		| get exponent in d7
+	andl	d4,d7		| 
+	beq	Laddsf$b$den	| branch if b is denormalized
+	cmpl	d4,d7		| check for INFINITY or NaN
+	beq	Laddsf$nf
+	swap	d7		| put exponent into first word
+	notl 	d4		| make d4 into a mask for the fraction
+	andl	d4,d1		| get fraction in d1
+	orl	d5,d1		| and put hidden bit back
+Laddsf$2:
+| Now we have b's exponent in d7 (second byte) and the mantissa in d1. '
+
+| Note that the hidden bit corresponds to bit #FLT_MANT_DIG-1, and we 
+| shifted right once, so bit #FLT_MANT_DIG is set (so we have one extra
+| bit).
+
+	movel	d1,d2		| move b to d2, since we want to use
+				| two registers to do the sum
+	movel	IMM (0),d1	| and clear the new ones
+	movel	d1,d3		|
+
+| Here we shift the numbers in registers d0 and d1 so the exponents are the
+| same, and put the largest exponent in d6. Note that we are using two
+| registers for each number (see the discussion by D. Knuth in "Seminumerical 
+| Algorithms").
+#ifndef __mcoldfire__
+	cmpw	d6,d7		| compare exponents
+#else
+	cmpl	d6,d7		| compare exponents
+#endif
+	beq	Laddsf$3	| if equal don't shift '
+	bhi	5f		| branch if second exponent largest
+1:
+	subl	d6,d7		| keep the largest exponent
+	negl	d7
+#ifndef __mcoldfire__
+	lsrw	IMM (8),d7	| put difference in lower byte
+#else
+	lsrl	IMM (8),d7	| put difference in lower byte
+#endif
+| if difference is too large we don't shift (actually, we can just exit) '
+#ifndef __mcoldfire__
+	cmpw	IMM (FLT_MANT_DIG+2),d7		
+#else
+	cmpl	IMM (FLT_MANT_DIG+2),d7		
+#endif
+	bge	Laddsf$b$small
+#ifndef __mcoldfire__
+	cmpw	IMM (16),d7	| if difference >= 16 swap
+#else
+	cmpl	IMM (16),d7	| if difference >= 16 swap
+#endif
+	bge	4f
+2:
+#ifndef __mcoldfire__
+	subw	IMM (1),d7
+#else
+	subql	IMM (1), d7
+#endif
+3:
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d2	| shift right second operand
+	roxrl	IMM (1),d3
+	dbra	d7,3b
+#else
+	lsrl	IMM (1),d3
+	btst	IMM (0),d2
+	beq	10f
+	bset	IMM (31),d3
+10:	lsrl	IMM (1),d2
+	subql	IMM (1), d7
+	bpl	3b
+#endif
+	bra	Laddsf$3
+4:
+	movew	d2,d3
+	swap	d3
+	movew	d3,d2
+	swap	d2
+#ifndef __mcoldfire__
+	subw	IMM (16),d7
+#else
+	subl	IMM (16),d7
+#endif
+	bne	2b		| if still more bits, go back to normal case
+	bra	Laddsf$3
+5:
+#ifndef __mcoldfire__
+	exg	d6,d7		| exchange the exponents
+#else
+	eorl	d6,d7
+	eorl	d7,d6
+	eorl	d6,d7
+#endif
+	subl	d6,d7		| keep the largest exponent
+	negl	d7		|
+#ifndef __mcoldfire__
+	lsrw	IMM (8),d7	| put difference in lower byte
+#else
+	lsrl	IMM (8),d7	| put difference in lower byte
+#endif
+| if difference is too large we don't shift (and exit!) '
+#ifndef __mcoldfire__
+	cmpw	IMM (FLT_MANT_DIG+2),d7		
+#else
+	cmpl	IMM (FLT_MANT_DIG+2),d7		
+#endif
+	bge	Laddsf$a$small
+#ifndef __mcoldfire__
+	cmpw	IMM (16),d7	| if difference >= 16 swap
+#else
+	cmpl	IMM (16),d7	| if difference >= 16 swap
+#endif
+	bge	8f
+6:
+#ifndef __mcoldfire__
+	subw	IMM (1),d7
+#else
+	subl	IMM (1),d7
+#endif
+7:
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0	| shift right first operand
+	roxrl	IMM (1),d1
+	dbra	d7,7b
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+	subql	IMM (1),d7
+	bpl	7b
+#endif
+	bra	Laddsf$3
+8:
+	movew	d0,d1
+	swap	d1
+	movew	d1,d0
+	swap	d0
+#ifndef __mcoldfire__
+	subw	IMM (16),d7
+#else
+	subl	IMM (16),d7
+#endif
+	bne	6b		| if still more bits, go back to normal case
+				| otherwise we fall through
+
+| Now we have a in d0-d1, b in d2-d3, and the largest exponent in d6 (the
+| signs are stored in a0 and a1).
+
+Laddsf$3:
+| Here we have to decide whether to add or subtract the numbers
+#ifndef __mcoldfire__
+	exg	d6,a0		| get signs back
+	exg	d7,a1		| and save the exponents
+#else
+	movel	d6,d4
+	movel	a0,d6
+	movel	d4,a0
+	movel	d7,d4
+	movel	a1,d7
+	movel	d4,a1
+#endif
+	eorl	d6,d7		| combine sign bits
+	bmi	Lsubsf$0	| if negative a and b have opposite 
+				| sign so we actually subtract the
+				| numbers
+
+| Here we have both positive or both negative
+#ifndef __mcoldfire__
+	exg	d6,a0		| now we have the exponent in d6
+#else
+	movel	d6,d4
+	movel	a0,d6
+	movel	d4,a0
+#endif
+	movel	a0,d7		| and sign in d7
+	andl	IMM (0x80000000),d7
+| Here we do the addition.
+	addl	d3,d1
+	addxl	d2,d0
+| Note: now we have d2, d3, d4 and d5 to play with! 
+
+| Put the exponent, in the first byte, in d2, to use the "standard" rounding
+| routines:
+	movel	d6,d2
+#ifndef __mcoldfire__
+	lsrw	IMM (8),d2
+#else
+	lsrl	IMM (8),d2
+#endif
+
+| Before rounding normalize so bit #FLT_MANT_DIG is set (we will consider
+| the case of denormalized numbers in the rounding routine itself).
+| As in the addition (not in the subtraction!) we could have set 
+| one more bit we check this:
+	btst	IMM (FLT_MANT_DIG+1),d0	
+	beq	1f
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+#endif
+	addl	IMM (1),d2
+1:
+	lea	pc@(Laddsf$4),a0 | to return from rounding routine
+	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
+#ifdef __mcoldfire__
+	clrl	d6
+#endif
+	movew	a1@(6),d6	| rounding mode in d6
+	beq	Lround$to$nearest
+#ifndef __mcoldfire__
+	cmpw	IMM (ROUND_TO_PLUS),d6
+#else
+	cmpl	IMM (ROUND_TO_PLUS),d6
+#endif
+	bhi	Lround$to$minus
+	blt	Lround$to$zero
+	bra	Lround$to$plus
+Laddsf$4:
+| Put back the exponent, but check for overflow.
+#ifndef __mcoldfire__
+	cmpw	IMM (0xff),d2
+#else
+	cmpl	IMM (0xff),d2
+#endif
+	bhi	1f
+	bclr	IMM (FLT_MANT_DIG-1),d0
+#ifndef __mcoldfire__
+	lslw	IMM (7),d2
+#else
+	lsll	IMM (7),d2
+#endif
+	swap	d2
+	orl	d2,d0
+	bra	Laddsf$ret
+1:
+	moveq	IMM (ADD),d5
+	bra	Lf$overflow
+
+Lsubsf$0:
+| We are here if a > 0 and b < 0 (sign bits cleared).
+| Here we do the subtraction.
+	movel	d6,d7		| put sign in d7
+	andl	IMM (0x80000000),d7
+
+	subl	d3,d1		| result in d0-d1
+	subxl	d2,d0		|
+	beq	Laddsf$ret	| if zero just exit
+	bpl	1f		| if positive skip the following
+	bchg	IMM (31),d7	| change sign bit in d7
+	negl	d1
+	negxl	d0
+1:
+#ifndef __mcoldfire__
+	exg	d2,a0		| now we have the exponent in d2
+	lsrw	IMM (8),d2	| put it in the first byte
+#else
+	movel	d2,d4
+	movel	a0,d2
+	movel	d4,a0
+	lsrl	IMM (8),d2	| put it in the first byte
+#endif
+
+| Now d0-d1 is positive and the sign bit is in d7.
+
+| Note that we do not have to normalize, since in the subtraction bit
+| #FLT_MANT_DIG+1 is never set, and denormalized numbers are handled by
+| the rounding routines themselves.
+	lea	pc@(Lsubsf$1),a0 | to return from rounding routine
+	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
+#ifdef __mcoldfire__
+	clrl	d6
+#endif
+	movew	a1@(6),d6	| rounding mode in d6
+	beq	Lround$to$nearest
+#ifndef __mcoldfire__
+	cmpw	IMM (ROUND_TO_PLUS),d6
+#else
+	cmpl	IMM (ROUND_TO_PLUS),d6
+#endif
+	bhi	Lround$to$minus
+	blt	Lround$to$zero
+	bra	Lround$to$plus
+Lsubsf$1:
+| Put back the exponent (we can't have overflow!). '
+	bclr	IMM (FLT_MANT_DIG-1),d0
+#ifndef __mcoldfire__
+	lslw	IMM (7),d2
+#else
+	lsll	IMM (7),d2
+#endif
+	swap	d2
+	orl	d2,d0
+	bra	Laddsf$ret
+
+| If one of the numbers was too small (difference of exponents >= 
+| FLT_MANT_DIG+2) we return the other (and now we don't have to '
+| check for finiteness or zero).
+Laddsf$a$small:
+	movel	a6@(12),d0
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| restore data registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| and return
+	rts
+
+Laddsf$b$small:
+	movel	a6@(8),d0
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| restore data registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| and return
+	rts
+
+| If the numbers are denormalized remember to put exponent equal to 1.
+
+Laddsf$a$den:
+	movel	d5,d6		| d5 contains 0x01000000
+	swap	d6
+	bra	Laddsf$1
+
+Laddsf$b$den:
+	movel	d5,d7
+	swap	d7
+	notl 	d4		| make d4 into a mask for the fraction
+				| (this was not executed after the jump)
+	bra	Laddsf$2
+
+| The rest is mainly code for the different results which can be 
+| returned (checking always for +/-INFINITY and NaN).
+
+Laddsf$b:
+| Return b (if a is zero).
+	movel	a6@(12),d0
+	cmpl	IMM (0x80000000),d0	| Check if b is -0
+	bne	1f
+	movel	a0,d7
+	andl	IMM (0x80000000),d7	| Use the sign of a
+	clrl	d0
+	bra	Laddsf$ret
+Laddsf$a:
+| Return a (if b is zero).
+	movel	a6@(8),d0
+1:
+	moveq	IMM (ADD),d5
+| We have to check for NaN and +/-infty.
+	movel	d0,d7
+	andl	IMM (0x80000000),d7	| put sign in d7
+	bclr	IMM (31),d0		| clear sign
+	cmpl	IMM (INFINITY),d0	| check for infty or NaN
+	bge	2f
+	movel	d0,d0		| check for zero (we do this because we don't '
+	bne	Laddsf$ret	| want to return -0 by mistake
+	bclr	IMM (31),d7	| if zero be sure to clear sign
+	bra	Laddsf$ret	| if everything OK just return
+2:
+| The value to be returned is either +/-infty or NaN
+	andl	IMM (0x007fffff),d0	| check for NaN
+	bne	Lf$inop			| if mantissa not zero is NaN
+	bra	Lf$infty
+
+Laddsf$ret:
+| Normal exit (a and b nonzero, result is not NaN nor +/-infty).
+| We have to clear the exception flags (just the exception type).
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+	orl	d7,d0		| put sign bit
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| restore data registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| and return
+	rts
+
+Laddsf$ret$den:
+| Return a denormalized number (for addition we don't signal underflow) '
+	lsrl	IMM (1),d0	| remember to shift right back once
+	bra	Laddsf$ret	| and return
+
+| Note: when adding two floats of the same sign if either one is 
+| NaN we return NaN without regard to whether the other is finite or 
+| not. When subtracting them (i.e., when adding two numbers of 
+| opposite signs) things are more complicated: if both are INFINITY 
+| we return NaN, if only one is INFINITY and the other is NaN we return
+| NaN, but if it is finite we return INFINITY with the corresponding sign.
+
+Laddsf$nf:
+	moveq	IMM (ADD),d5
+| This could be faster but it is not worth the effort, since it is not
+| executed very often. We sacrifice speed for clarity here.
+	movel	a6@(8),d0	| get the numbers back (remember that we
+	movel	a6@(12),d1	| did some processing already)
+	movel	IMM (INFINITY),d4 | useful constant (INFINITY)
+	movel	d0,d2		| save sign bits
+	movel	d1,d3
+	bclr	IMM (31),d0	| clear sign bits
+	bclr	IMM (31),d1
+| We know that one of them is either NaN of +/-INFINITY
+| Check for NaN (if either one is NaN return NaN)
+	cmpl	d4,d0		| check first a (d0)
+	bhi	Lf$inop		
+	cmpl	d4,d1		| check now b (d1)
+	bhi	Lf$inop		
+| Now comes the check for +/-INFINITY. We know that both are (maybe not
+| finite) numbers, but we have to check if both are infinite whether we
+| are adding or subtracting them.
+	eorl	d3,d2		| to check sign bits
+	bmi	1f
+	movel	d0,d7
+	andl	IMM (0x80000000),d7	| get (common) sign bit
+	bra	Lf$infty
+1:
+| We know one (or both) are infinite, so we test for equality between the
+| two numbers (if they are equal they have to be infinite both, so we
+| return NaN).
+	cmpl	d1,d0		| are both infinite?
+	beq	Lf$inop		| if so return NaN
+
+	movel	d0,d7
+	andl	IMM (0x80000000),d7 | get a's sign bit '
+	cmpl	d4,d0		| test now for infinity
+	beq	Lf$infty	| if a is INFINITY return with this sign
+	bchg	IMM (31),d7	| else we know b is INFINITY and has
+	bra	Lf$infty	| the opposite sign
+
+|=============================================================================
+|                             __mulsf3
+|=============================================================================
+
+| float __mulsf3(float, float);
+	FUNC(__mulsf3)
+SYM (__mulsf3):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@-
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	movel	a6@(8),d0	| get a into d0
+	movel	a6@(12),d1	| and b into d1
+	movel	d0,d7		| d7 will hold the sign of the product
+	eorl	d1,d7		|
+	andl	IMM (0x80000000),d7
+	movel	IMM (INFINITY),d6	| useful constant (+INFINITY)
+	movel	d6,d5			| another (mask for fraction)
+	notl	d5			|
+	movel	IMM (0x00800000),d4	| this is to put hidden bit back
+	bclr	IMM (31),d0		| get rid of a's sign bit '
+	movel	d0,d2			|
+	beq	Lmulsf$a$0		| branch if a is zero
+	bclr	IMM (31),d1		| get rid of b's sign bit '
+	movel	d1,d3		|
+	beq	Lmulsf$b$0	| branch if b is zero
+	cmpl	d6,d0		| is a big?
+	bhi	Lmulsf$inop	| if a is NaN return NaN
+	beq	Lmulsf$inf	| if a is INFINITY we have to check b
+	cmpl	d6,d1		| now compare b with INFINITY
+	bhi	Lmulsf$inop	| is b NaN?
+	beq	Lmulsf$overflow | is b INFINITY?
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d2 and d3.
+	andl	d6,d2		| and isolate exponent in d2
+	beq	Lmulsf$a$den	| if exponent is zero we have a denormalized
+	andl	d5,d0		| and isolate fraction
+	orl	d4,d0		| and put hidden bit back
+	swap	d2		| I like exponents in the first byte
+#ifndef __mcoldfire__
+	lsrw	IMM (7),d2	| 
+#else
+	lsrl	IMM (7),d2	| 
+#endif
+Lmulsf$1:			| number
+	andl	d6,d3		|
+	beq	Lmulsf$b$den	|
+	andl	d5,d1		|
+	orl	d4,d1		|
+	swap	d3		|
+#ifndef __mcoldfire__
+	lsrw	IMM (7),d3	|
+#else
+	lsrl	IMM (7),d3	|
+#endif
+Lmulsf$2:			|
+#ifndef __mcoldfire__
+	addw	d3,d2		| add exponents
+	subw	IMM (F_BIAS+1),d2 | and subtract bias (plus one)
+#else
+	addl	d3,d2		| add exponents
+	subl	IMM (F_BIAS+1),d2 | and subtract bias (plus one)
+#endif
+
+| We are now ready to do the multiplication. The situation is as follows:
+| both a and b have bit FLT_MANT_DIG-1 set (even if they were 
+| denormalized to start with!), which means that in the product 
+| bit 2*(FLT_MANT_DIG-1) (that is, bit 2*FLT_MANT_DIG-2-32 of the 
+| high long) is set. 
+
+| To do the multiplication let us move the number a little bit around ...
+	movel	d1,d6		| second operand in d6
+	movel	d0,d5		| first operand in d4-d5
+	movel	IMM (0),d4
+	movel	d4,d1		| the sums will go in d0-d1
+	movel	d4,d0
+
+| now bit FLT_MANT_DIG-1 becomes bit 31:
+	lsll	IMM (31-FLT_MANT_DIG+1),d6		
+
+| Start the loop (we loop #FLT_MANT_DIG times):
+	moveq	IMM (FLT_MANT_DIG-1),d3	
+1:	addl	d1,d1		| shift sum 
+	addxl	d0,d0
+	lsll	IMM (1),d6	| get bit bn
+	bcc	2f		| if not set skip sum
+	addl	d5,d1		| add a
+	addxl	d4,d0
+2:
+#ifndef __mcoldfire__
+	dbf	d3,1b		| loop back
+#else
+	subql	IMM (1),d3
+	bpl	1b
+#endif
+
+| Now we have the product in d0-d1, with bit (FLT_MANT_DIG - 1) + FLT_MANT_DIG
+| (mod 32) of d0 set. The first thing to do now is to normalize it so bit 
+| FLT_MANT_DIG is set (to do the rounding).
+#ifndef __mcoldfire__
+	rorl	IMM (6),d1
+	swap	d1
+	movew	d1,d3
+	andw	IMM (0x03ff),d3
+	andw	IMM (0xfd00),d1
+#else
+	movel	d1,d3
+	lsll	IMM (8),d1
+	addl	d1,d1
+	addl	d1,d1
+	moveq	IMM (22),d5
+	lsrl	d5,d3
+	orl	d3,d1
+	andl	IMM (0xfffffd00),d1
+#endif
+	lsll	IMM (8),d0
+	addl	d0,d0
+	addl	d0,d0
+#ifndef __mcoldfire__
+	orw	d3,d0
+#else
+	orl	d3,d0
+#endif
+
+	moveq	IMM (MULTIPLY),d5
+	
+	btst	IMM (FLT_MANT_DIG+1),d0
+	beq	Lround$exit
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	addw	IMM (1),d2
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+	addql	IMM (1),d2
+#endif
+	bra	Lround$exit
+
+Lmulsf$inop:
+	moveq	IMM (MULTIPLY),d5
+	bra	Lf$inop
+
+Lmulsf$overflow:
+	moveq	IMM (MULTIPLY),d5
+	bra	Lf$overflow
+
+Lmulsf$inf:
+	moveq	IMM (MULTIPLY),d5
+| If either is NaN return NaN; else both are (maybe infinite) numbers, so
+| return INFINITY with the correct sign (which is in d7).
+	cmpl	d6,d1		| is b NaN?
+	bhi	Lf$inop		| if so return NaN
+	bra	Lf$overflow	| else return +/-INFINITY
+
+| If either number is zero return zero, unless the other is +/-INFINITY, 
+| or NaN, in which case we return NaN.
+Lmulsf$b$0:
+| Here d1 (==b) is zero.
+	movel	a6@(8),d1	| get a again to check for non-finiteness
+	bra	1f
+Lmulsf$a$0:
+	movel	a6@(12),d1	| get b again to check for non-finiteness
+1:	bclr	IMM (31),d1	| clear sign bit 
+	cmpl	IMM (INFINITY),d1 | and check for a large exponent
+	bge	Lf$inop		| if b is +/-INFINITY or NaN return NaN
+	movel	d7,d0		| else return signed zero
+	PICLEA	SYM (_fpCCR),a0	|
+	movew	IMM (0),a0@	| 
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| 
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| 
+	rts			| 
+
+| If a number is denormalized we put an exponent of 1 but do not put the 
+| hidden bit back into the fraction; instead we shift left until bit 23
+| (the hidden bit) is set, adjusting the exponent accordingly. We do this
+| to ensure that the product of the fractions is close to 1.
+Lmulsf$a$den:
+	movel	IMM (1),d2
+	andl	d5,d0
+1:	addl	d0,d0		| shift a left (until bit 23 is set)
+#ifndef __mcoldfire__
+	subw	IMM (1),d2	| and adjust exponent
+#else
+	subql	IMM (1),d2	| and adjust exponent
+#endif
+	btst	IMM (FLT_MANT_DIG-1),d0
+	bne	Lmulsf$1	|
+	bra	1b		| else loop back
+
+Lmulsf$b$den:
+	movel	IMM (1),d3
+	andl	d5,d1
+1:	addl	d1,d1		| shift b left until bit 23 is set
+#ifndef __mcoldfire__
+	subw	IMM (1),d3	| and adjust exponent
+#else
+	subql	IMM (1),d3	| and adjust exponent
+#endif
+	btst	IMM (FLT_MANT_DIG-1),d1
+	bne	Lmulsf$2	|
+	bra	1b		| else loop back
+
+|=============================================================================
+|                             __divsf3
+|=============================================================================
+
+| float __divsf3(float, float);
+	FUNC(__divsf3)
+SYM (__divsf3):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@-
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	movel	a6@(8),d0		| get a into d0
+	movel	a6@(12),d1		| and b into d1
+	movel	d0,d7			| d7 will hold the sign of the result
+	eorl	d1,d7			|
+	andl	IMM (0x80000000),d7	| 
+	movel	IMM (INFINITY),d6	| useful constant (+INFINITY)
+	movel	d6,d5			| another (mask for fraction)
+	notl	d5			|
+	movel	IMM (0x00800000),d4	| this is to put hidden bit back
+	bclr	IMM (31),d0		| get rid of a's sign bit '
+	movel	d0,d2			|
+	beq	Ldivsf$a$0		| branch if a is zero
+	bclr	IMM (31),d1		| get rid of b's sign bit '
+	movel	d1,d3			|
+	beq	Ldivsf$b$0		| branch if b is zero
+	cmpl	d6,d0			| is a big?
+	bhi	Ldivsf$inop		| if a is NaN return NaN
+	beq	Ldivsf$inf		| if a is INFINITY we have to check b
+	cmpl	d6,d1			| now compare b with INFINITY 
+	bhi	Ldivsf$inop		| if b is NaN return NaN
+	beq	Ldivsf$underflow
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d2 and d3 and normalize the numbers to
+| ensure that the ratio of the fractions is close to 1. We do this by
+| making sure that bit #FLT_MANT_DIG-1 (hidden bit) is set.
+	andl	d6,d2		| and isolate exponent in d2
+	beq	Ldivsf$a$den	| if exponent is zero we have a denormalized
+	andl	d5,d0		| and isolate fraction
+	orl	d4,d0		| and put hidden bit back
+	swap	d2		| I like exponents in the first byte
+#ifndef __mcoldfire__
+	lsrw	IMM (7),d2	| 
+#else
+	lsrl	IMM (7),d2	| 
+#endif
+Ldivsf$1:			| 
+	andl	d6,d3		|
+	beq	Ldivsf$b$den	|
+	andl	d5,d1		|
+	orl	d4,d1		|
+	swap	d3		|
+#ifndef __mcoldfire__
+	lsrw	IMM (7),d3	|
+#else
+	lsrl	IMM (7),d3	|
+#endif
+Ldivsf$2:			|
+#ifndef __mcoldfire__
+	subw	d3,d2		| subtract exponents
+ 	addw	IMM (F_BIAS),d2	| and add bias
+#else
+	subl	d3,d2		| subtract exponents
+ 	addl	IMM (F_BIAS),d2	| and add bias
+#endif
+ 
+| We are now ready to do the division. We have prepared things in such a way
+| that the ratio of the fractions will be less than 2 but greater than 1/2.
+| At this point the registers in use are:
+| d0	holds a (first operand, bit FLT_MANT_DIG=0, bit FLT_MANT_DIG-1=1)
+| d1	holds b (second operand, bit FLT_MANT_DIG=1)
+| d2	holds the difference of the exponents, corrected by the bias
+| d7	holds the sign of the ratio
+| d4, d5, d6 hold some constants
+	movel	d7,a0		| d6-d7 will hold the ratio of the fractions
+	movel	IMM (0),d6	| 
+	movel	d6,d7
+
+	moveq	IMM (FLT_MANT_DIG+1),d3
+1:	cmpl	d0,d1		| is a < b?
+	bhi	2f		|
+	bset	d3,d6		| set a bit in d6
+	subl	d1,d0		| if a >= b  a <-- a-b
+	beq	3f		| if a is zero, exit
+2:	addl	d0,d0		| multiply a by 2
+#ifndef __mcoldfire__
+	dbra	d3,1b
+#else
+	subql	IMM (1),d3
+	bpl	1b
+#endif
+
+| Now we keep going to set the sticky bit ...
+	moveq	IMM (FLT_MANT_DIG),d3
+1:	cmpl	d0,d1
+	ble	2f
+	addl	d0,d0
+#ifndef __mcoldfire__
+	dbra	d3,1b
+#else
+	subql	IMM(1),d3
+	bpl	1b
+#endif
+	movel	IMM (0),d1
+	bra	3f
+2:	movel	IMM (0),d1
+#ifndef __mcoldfire__
+	subw	IMM (FLT_MANT_DIG),d3
+	addw	IMM (31),d3
+#else
+	subl	IMM (FLT_MANT_DIG),d3
+	addl	IMM (31),d3
+#endif
+	bset	d3,d1
+3:
+	movel	d6,d0		| put the ratio in d0-d1
+	movel	a0,d7		| get sign back
+
+| Because of the normalization we did before we are guaranteed that 
+| d0 is smaller than 2^26 but larger than 2^24. Thus bit 26 is not set,
+| bit 25 could be set, and if it is not set then bit 24 is necessarily set.
+	btst	IMM (FLT_MANT_DIG+1),d0		
+	beq	1f              | if it is not set, then bit 24 is set
+	lsrl	IMM (1),d0	|
+#ifndef __mcoldfire__
+	addw	IMM (1),d2	|
+#else
+	addl	IMM (1),d2	|
+#endif
+1:
+| Now round, check for over- and underflow, and exit.
+	moveq	IMM (DIVIDE),d5
+	bra	Lround$exit
+
+Ldivsf$inop:
+	moveq	IMM (DIVIDE),d5
+	bra	Lf$inop
+
+Ldivsf$overflow:
+	moveq	IMM (DIVIDE),d5
+	bra	Lf$overflow
+
+Ldivsf$underflow:
+	moveq	IMM (DIVIDE),d5
+	bra	Lf$underflow
+
+Ldivsf$a$0:
+	moveq	IMM (DIVIDE),d5
+| If a is zero check to see whether b is zero also. In that case return
+| NaN; then check if b is NaN, and return NaN also in that case. Else
+| return a properly signed zero.
+	andl	IMM (0x7fffffff),d1	| clear sign bit and test b
+	beq	Lf$inop			| if b is also zero return NaN
+	cmpl	IMM (INFINITY),d1	| check for NaN
+	bhi	Lf$inop			| 
+	movel	d7,d0			| else return signed zero
+	PICLEA	SYM (_fpCCR),a0		|
+	movew	IMM (0),a0@		|
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7		| 
+#else
+	moveml	sp@,d2-d7		| 
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6			| 
+	rts				| 
+	
+Ldivsf$b$0:
+	moveq	IMM (DIVIDE),d5
+| If we got here a is not zero. Check if a is NaN; in that case return NaN,
+| else return +/-INFINITY. Remember that a is in d0 with the sign bit 
+| cleared already.
+	cmpl	IMM (INFINITY),d0	| compare d0 with INFINITY
+	bhi	Lf$inop			| if larger it is NaN
+	bra	Lf$div$0		| else signal DIVIDE_BY_ZERO
+
+Ldivsf$inf:
+	moveq	IMM (DIVIDE),d5
+| If a is INFINITY we have to check b
+	cmpl	IMM (INFINITY),d1	| compare b with INFINITY 
+	bge	Lf$inop			| if b is NaN or INFINITY return NaN
+	bra	Lf$overflow		| else return overflow
+
+| If a number is denormalized we put an exponent of 1 but do not put the 
+| bit back into the fraction.
+Ldivsf$a$den:
+	movel	IMM (1),d2
+	andl	d5,d0
+1:	addl	d0,d0		| shift a left until bit FLT_MANT_DIG-1 is set
+#ifndef __mcoldfire__
+	subw	IMM (1),d2	| and adjust exponent
+#else
+	subl	IMM (1),d2	| and adjust exponent
+#endif
+	btst	IMM (FLT_MANT_DIG-1),d0
+	bne	Ldivsf$1
+	bra	1b
+
+Ldivsf$b$den:
+	movel	IMM (1),d3
+	andl	d5,d1
+1:	addl	d1,d1		| shift b left until bit FLT_MANT_DIG is set
+#ifndef __mcoldfire__
+	subw	IMM (1),d3	| and adjust exponent
+#else
+	subl	IMM (1),d3	| and adjust exponent
+#endif
+	btst	IMM (FLT_MANT_DIG-1),d1
+	bne	Ldivsf$2
+	bra	1b
+
+Lround$exit:
+| This is a common exit point for __mulsf3 and __divsf3. 
+
+| First check for underlow in the exponent:
+#ifndef __mcoldfire__
+	cmpw	IMM (-FLT_MANT_DIG-1),d2		
+#else
+	cmpl	IMM (-FLT_MANT_DIG-1),d2		
+#endif
+	blt	Lf$underflow	
+| It could happen that the exponent is less than 1, in which case the 
+| number is denormalized. In this case we shift right and adjust the 
+| exponent until it becomes 1 or the fraction is zero (in the latter case 
+| we signal underflow and return zero).
+	movel	IMM (0),d6	| d6 is used temporarily
+#ifndef __mcoldfire__
+	cmpw	IMM (1),d2	| if the exponent is less than 1 we 
+#else
+	cmpl	IMM (1),d2	| if the exponent is less than 1 we 
+#endif
+	bge	2f		| have to shift right (denormalize)
+1:
+#ifndef __mcoldfire__
+	addw	IMM (1),d2	| adjust the exponent
+	lsrl	IMM (1),d0	| shift right once 
+	roxrl	IMM (1),d1	|
+	roxrl	IMM (1),d6	| d6 collect bits we would lose otherwise
+	cmpw	IMM (1),d2	| is the exponent 1 already?
+#else
+	addql	IMM (1),d2	| adjust the exponent
+	lsrl	IMM (1),d6
+	btst	IMM (0),d1
+	beq	11f
+	bset	IMM (31),d6
+11:	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+	cmpl	IMM (1),d2	| is the exponent 1 already?
+#endif
+	beq	2f		| if not loop back
+	bra	1b              |
+	bra	Lf$underflow	| safety check, shouldn't execute '
+2:	orl	d6,d1		| this is a trick so we don't lose  '
+				| the extra bits which were flushed right
+| Now call the rounding routine (which takes care of denormalized numbers):
+	lea	pc@(Lround$0),a0 | to return from rounding routine
+	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
+#ifdef __mcoldfire__
+	clrl	d6
+#endif
+	movew	a1@(6),d6	| rounding mode in d6
+	beq	Lround$to$nearest
+#ifndef __mcoldfire__
+	cmpw	IMM (ROUND_TO_PLUS),d6
+#else
+	cmpl	IMM (ROUND_TO_PLUS),d6
+#endif
+	bhi	Lround$to$minus
+	blt	Lround$to$zero
+	bra	Lround$to$plus
+Lround$0:
+| Here we have a correctly rounded result (either normalized or denormalized).
+
+| Here we should have either a normalized number or a denormalized one, and
+| the exponent is necessarily larger or equal to 1 (so we don't have to  '
+| check again for underflow!). We have to check for overflow or for a 
+| denormalized number (which also signals underflow).
+| Check for overflow (i.e., exponent >= 255).
+#ifndef __mcoldfire__
+	cmpw	IMM (0x00ff),d2
+#else
+	cmpl	IMM (0x00ff),d2
+#endif
+	bge	Lf$overflow
+| Now check for a denormalized number (exponent==0).
+	movew	d2,d2
+	beq	Lf$den
+1:
+| Put back the exponents and sign and return.
+#ifndef __mcoldfire__
+	lslw	IMM (7),d2	| exponent back to fourth byte
+#else
+	lsll	IMM (7),d2	| exponent back to fourth byte
+#endif
+	bclr	IMM (FLT_MANT_DIG-1),d0
+	swap	d0		| and put back exponent
+#ifndef __mcoldfire__
+	orw	d2,d0		| 
+#else
+	orl	d2,d0
+#endif
+	swap	d0		|
+	orl	d7,d0		| and sign also
+
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+
+|=============================================================================
+|                             __negsf2
+|=============================================================================
+
+| This is trivial and could be shorter if we didn't bother checking for NaN '
+| and +/-INFINITY.
+
+| float __negsf2(float);
+	FUNC(__negsf2)
+SYM (__negsf2):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@-
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	moveq	IMM (NEGATE),d5
+	movel	a6@(8),d0	| get number to negate in d0
+	bchg	IMM (31),d0	| negate
+	movel	d0,d1		| make a positive copy
+	bclr	IMM (31),d1	|
+	tstl	d1		| check for zero
+	beq	2f		| if zero (either sign) return +zero
+	cmpl	IMM (INFINITY),d1 | compare to +INFINITY
+	blt	1f		|
+	bhi	Lf$inop		| if larger (fraction not zero) is NaN
+	movel	d0,d7		| else get sign and return INFINITY
+	andl	IMM (0x80000000),d7
+	bra	Lf$infty		
+1:	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+2:	bclr	IMM (31),d0
+	bra	1b
+
+|=============================================================================
+|                             __cmpsf2
+|=============================================================================
+
+GREATER =  1
+LESS    = -1
+EQUAL   =  0
+
+| int __cmpsf2_internal(float, float, int);
+SYM (__cmpsf2_internal):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@- 	| save registers
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	moveq	IMM (COMPARE),d5
+	movel	a6@(8),d0	| get first operand
+	movel	a6@(12),d1	| get second operand
+| Check if either is NaN, and in that case return garbage and signal
+| INVALID_OPERATION. Check also if either is zero, and clear the signs
+| if necessary.
+	movel	d0,d6
+	andl	IMM (0x7fffffff),d0
+	beq	Lcmpsf$a$0
+	cmpl	IMM (0x7f800000),d0
+	bhi	Lcmpf$inop
+Lcmpsf$1:
+	movel	d1,d7
+	andl	IMM (0x7fffffff),d1
+	beq	Lcmpsf$b$0
+	cmpl	IMM (0x7f800000),d1
+	bhi	Lcmpf$inop
+Lcmpsf$2:
+| Check the signs
+	eorl	d6,d7
+	bpl	1f
+| If the signs are not equal check if a >= 0
+	tstl	d6
+	bpl	Lcmpsf$a$gt$b	| if (a >= 0 && b < 0) => a > b
+	bmi	Lcmpsf$b$gt$a	| if (a < 0 && b >= 0) => a < b
+1:
+| If the signs are equal check for < 0
+	tstl	d6
+	bpl	1f
+| If both are negative exchange them
+#ifndef __mcoldfire__
+	exg	d0,d1
+#else
+	movel	d0,d7
+	movel	d1,d0
+	movel	d7,d1
+#endif
+1:
+| Now that they are positive we just compare them as longs (does this also
+| work for denormalized numbers?).
+	cmpl	d0,d1
+	bhi	Lcmpsf$b$gt$a	| |b| > |a|
+	bne	Lcmpsf$a$gt$b	| |b| < |a|
+| If we got here a == b.
+	movel	IMM (EQUAL),d0
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7 	| put back the registers
+#else
+	moveml	sp@,d2-d7
+#endif
+	unlk	a6
+	rts
+Lcmpsf$a$gt$b:
+	movel	IMM (GREATER),d0
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7 	| put back the registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+Lcmpsf$b$gt$a:
+	movel	IMM (LESS),d0
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7 	| put back the registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+
+Lcmpsf$a$0:	
+	bclr	IMM (31),d6
+	bra	Lcmpsf$1
+Lcmpsf$b$0:
+	bclr	IMM (31),d7
+	bra	Lcmpsf$2
+
+Lcmpf$inop:
+	movl	a6@(16),d0
+	moveq	IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+	moveq	IMM (SINGLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+| int __cmpsf2(float, float);
+	FUNC(__cmpsf2)
+SYM (__cmpsf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+
+|=============================================================================
+|                           rounding routines
+|=============================================================================
+
+| The rounding routines expect the number to be normalized in registers
+| d0-d1, with the exponent in register d2. They assume that the 
+| exponent is larger or equal to 1. They return a properly normalized number
+| if possible, and a denormalized number otherwise. The exponent is returned
+| in d2.
+
+Lround$to$nearest:
+| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"):
+| Here we assume that the exponent is not too small (this should be checked
+| before entering the rounding routine), but the number could be denormalized.
+
+| Check for denormalized numbers:
+1:	btst	IMM (FLT_MANT_DIG),d0
+	bne	2f		| if set the number is normalized
+| Normalize shifting left until bit #FLT_MANT_DIG is set or the exponent 
+| is one (remember that a denormalized number corresponds to an 
+| exponent of -F_BIAS+1).
+#ifndef __mcoldfire__
+	cmpw	IMM (1),d2	| remember that the exponent is at least one
+#else
+	cmpl	IMM (1),d2	| remember that the exponent is at least one
+#endif
+ 	beq	2f		| an exponent of one means denormalized
+	addl	d1,d1		| else shift and adjust the exponent
+	addxl	d0,d0		|
+#ifndef __mcoldfire__
+	dbra	d2,1b		|
+#else
+	subql	IMM (1),d2
+	bpl	1b
+#endif
+2:
+| Now round: we do it as follows: after the shifting we can write the
+| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2.
+| If delta < 1, do nothing. If delta > 1, add 1 to f. 
+| If delta == 1, we make sure the rounded number will be even (odd?) 
+| (after shifting).
+	btst	IMM (0),d0	| is delta < 1?
+	beq	2f		| if so, do not do anything
+	tstl	d1		| is delta == 1?
+	bne	1f		| if so round to even
+	movel	d0,d1		| 
+	andl	IMM (2),d1	| bit 1 is the last significant bit
+	addl	d1,d0		| 
+	bra	2f		| 
+1:	movel	IMM (1),d1	| else add 1 
+	addl	d1,d0		|
+| Shift right once (because we used bit #FLT_MANT_DIG!).
+2:	lsrl	IMM (1),d0		
+| Now check again bit #FLT_MANT_DIG (rounding could have produced a
+| 'fraction overflow' ...).
+	btst	IMM (FLT_MANT_DIG),d0	
+	beq	1f
+	lsrl	IMM (1),d0
+#ifndef __mcoldfire__
+	addw	IMM (1),d2
+#else
+	addql	IMM (1),d2
+#endif
+1:
+| If bit #FLT_MANT_DIG-1 is clear we have a denormalized number, so we 
+| have to put the exponent to zero and return a denormalized number.
+	btst	IMM (FLT_MANT_DIG-1),d0
+	beq	1f
+	jmp	a0@
+1:	movel	IMM (0),d2
+	jmp	a0@
+
+Lround$to$zero:
+Lround$to$plus:
+Lround$to$minus:
+	jmp	a0@
+#endif /* L_float */
+
+| gcc expects the routines __eqdf2, __nedf2, __gtdf2, __gedf2,
+| __ledf2, __ltdf2 to all return the same value as a direct call to
+| __cmpdf2 would.  In this implementation, each of these routines
+| simply calls __cmpdf2.  It would be more efficient to give the
+| __cmpdf2 routine several names, but separating them out will make it
+| easier to write efficient versions of these routines someday.
+| If the operands recompare unordered unordered __gtdf2 and __gedf2 return -1.
+| The other routines return 1.
+
+#ifdef  L_eqdf2
+	.text
+	FUNC(__eqdf2)
+	.globl	SYM (__eqdf2)
+SYM (__eqdf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+#endif /* L_eqdf2 */
+
+#ifdef  L_nedf2
+	.text
+	FUNC(__nedf2)
+	.globl	SYM (__nedf2)
+SYM (__nedf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+#endif /* L_nedf2 */
+
+#ifdef  L_gtdf2
+	.text
+	FUNC(__gtdf2)
+	.globl	SYM (__gtdf2)
+SYM (__gtdf2):
+	link	a6,IMM (0)
+	pea	-1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+#endif /* L_gtdf2 */
+
+#ifdef  L_gedf2
+	.text
+	FUNC(__gedf2)
+	.globl	SYM (__gedf2)
+SYM (__gedf2):
+	link	a6,IMM (0)
+	pea	-1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+#endif /* L_gedf2 */
+
+#ifdef  L_ltdf2
+	.text
+	FUNC(__ltdf2)
+	.globl	SYM (__ltdf2)
+SYM (__ltdf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+#endif /* L_ltdf2 */
+
+#ifdef  L_ledf2
+	.text
+	FUNC(__ledf2)
+	.globl	SYM (__ledf2)
+SYM (__ledf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+#endif /* L_ledf2 */
+
+| The comments above about __eqdf2, et. al., also apply to __eqsf2,
+| et. al., except that the latter call __cmpsf2 rather than __cmpdf2.
+
+#ifdef  L_eqsf2
+	.text
+	FUNC(__eqsf2)
+	.globl	SYM (__eqsf2)
+SYM (__eqsf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+#endif /* L_eqsf2 */
+
+#ifdef  L_nesf2
+	.text
+	FUNC(__nesf2)
+	.globl	SYM (__nesf2)
+SYM (__nesf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+#endif /* L_nesf2 */
+
+#ifdef  L_gtsf2
+	.text
+	FUNC(__gtsf2)
+	.globl	SYM (__gtsf2)
+SYM (__gtsf2):
+	link	a6,IMM (0)
+	pea	-1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+#endif /* L_gtsf2 */
+
+#ifdef  L_gesf2
+	.text
+	FUNC(__gesf2)
+	.globl	SYM (__gesf2)
+SYM (__gesf2):
+	link	a6,IMM (0)
+	pea	-1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+#endif /* L_gesf2 */
+
+#ifdef  L_ltsf2
+	.text
+	FUNC(__ltsf2)
+	.globl	SYM (__ltsf2)
+SYM (__ltsf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+#endif /* L_ltsf2 */
+
+#ifdef  L_lesf2
+	.text
+	FUNC(__lesf2)
+	.globl	SYM (__lesf2)
+SYM (__lesf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+#endif /* L_lesf2 */
+
+#if defined (__ELF__) && defined (__linux__)
+	/* Make stack non-executable for ELF linux targets.  */
+	.section	.note.GNU-stack,"",@progbits
+#endif
diff --git a/libgcc/config/m68k/t-floatlib b/libgcc/config/m68k/t-floatlib
new file mode 100644
index 00000000000..4160eb9f537
--- /dev/null
+++ b/libgcc/config/m68k/t-floatlib
@@ -0,0 +1,5 @@
+LIB1ASMSRC = m68k/lb1sf68.S
+LIB1ASMFUNCS = _mulsi3 _udivsi3 _divsi3 _umodsi3 _modsi3 \
+   _double _float _floatex \
+   _eqdf2 _nedf2 _gtdf2 _gedf2 _ltdf2 _ledf2 \
+   _eqsf2 _nesf2 _gtsf2 _gesf2 _ltsf2 _lesf2
diff --git a/libgcc/config/mcore/lib1funcs.S b/libgcc/config/mcore/lib1funcs.S
new file mode 100644
index 00000000000..701762f2a3c
--- /dev/null
+++ b/libgcc/config/mcore/lib1funcs.S
@@ -0,0 +1,303 @@
+/* libgcc routines for the MCore.
+   Copyright (C) 1993, 1999, 2000, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define CONCAT1(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+/* Use the right prefix for global labels.  */
+
+#define SYM(x) CONCAT1 (__, x)
+
+#ifdef __ELF__
+#define TYPE(x) .type SYM (x),@function
+#define SIZE(x) .size SYM (x), . - SYM (x)
+#else
+#define TYPE(x)
+#define SIZE(x)
+#endif
+
+.macro FUNC_START name
+	.text
+	.globl SYM (\name)
+	TYPE (\name)
+SYM (\name):
+.endm
+
+.macro FUNC_END name
+	SIZE (\name)
+.endm
+
+#ifdef	L_udivsi3
+FUNC_START udiv32
+FUNC_START udivsi32
+
+	movi	r1,0		// r1-r2 form 64 bit dividend
+	movi	r4,1		// r4 is quotient (1 for a sentinel)
+
+	cmpnei	r3,0		// look for 0 divisor
+	bt	9f
+	trap	3		// divide by 0
+9:
+	// control iterations; skip across high order 0 bits in dividend
+	mov	r7,r2
+	cmpnei	r7,0
+	bt	8f
+	movi	r2,0		// 0 dividend
+	jmp	r15		// quick return
+8:
+	ff1	r7		// figure distance to skip
+	lsl	r4,r7		// move the sentinel along (with 0's behind)
+	lsl	r2,r7		// and the low 32 bits of numerator
+
+// appears to be wrong...
+// tested out incorrectly in our OS work...
+//	mov	r7,r3		// looking at divisor
+//	ff1	r7		// I can move 32-r7 more bits to left.
+//	addi	r7,1		// ok, one short of that...
+//	mov	r1,r2
+//	lsr	r1,r7		// bits that came from low order...
+//	rsubi	r7,31		// r7 == "32-n" == LEFT distance
+//	addi	r7,1		// this is (32-n)
+//	lsl	r4,r7		// fixes the high 32 (quotient)
+//	lsl	r2,r7
+//	cmpnei	r4,0
+//	bf	4f		// the sentinel went away...
+
+	// run the remaining bits
+
+1:	lslc	r2,1		// 1 bit left shift of r1-r2
+	addc	r1,r1
+	cmphs	r1,r3		// upper 32 of dividend >= divisor?
+	bf	2f
+	sub	r1,r3		// if yes, subtract divisor
+2:	addc	r4,r4		// shift by 1 and count subtracts
+	bf	1b		// if sentinel falls out of quotient, stop
+
+4:	mov	r2,r4		// return quotient
+	mov	r3,r1		// and piggyback the remainder
+	jmp	r15
+FUNC_END udiv32
+FUNC_END udivsi32
+#endif
+
+#ifdef	L_umodsi3
+FUNC_START urem32
+FUNC_START umodsi3
+	movi	r1,0		// r1-r2 form 64 bit dividend
+	movi	r4,1		// r4 is quotient (1 for a sentinel)
+	cmpnei	r3,0		// look for 0 divisor
+	bt	9f
+	trap	3		// divide by 0
+9:
+	// control iterations; skip across high order 0 bits in dividend
+	mov	r7,r2
+	cmpnei	r7,0
+	bt	8f
+	movi	r2,0		// 0 dividend
+	jmp	r15		// quick return
+8:
+	ff1	r7		// figure distance to skip
+	lsl	r4,r7		// move the sentinel along (with 0's behind)
+	lsl	r2,r7		// and the low 32 bits of numerator
+
+1:	lslc	r2,1		// 1 bit left shift of r1-r2
+	addc	r1,r1
+	cmphs	r1,r3		// upper 32 of dividend >= divisor?
+	bf	2f
+	sub	r1,r3		// if yes, subtract divisor
+2:	addc	r4,r4		// shift by 1 and count subtracts
+	bf	1b		// if sentinel falls out of quotient, stop
+	mov	r2,r1		// return remainder
+	jmp	r15
+FUNC_END urem32
+FUNC_END umodsi3
+#endif
+
+#ifdef	L_divsi3
+FUNC_START div32
+FUNC_START divsi3
+	mov	r5,r2		// calc sign of quotient
+	xor	r5,r3
+	abs	r2		// do unsigned divide
+	abs	r3
+	movi	r1,0		// r1-r2 form 64 bit dividend
+	movi	r4,1		// r4 is quotient (1 for a sentinel)
+	cmpnei	r3,0		// look for 0 divisor
+	bt	9f
+	trap	3		// divide by 0
+9:
+	// control iterations; skip across high order 0 bits in dividend
+	mov	r7,r2
+	cmpnei	r7,0
+	bt	8f
+	movi	r2,0		// 0 dividend
+	jmp	r15		// quick return
+8:
+	ff1	r7		// figure distance to skip
+	lsl	r4,r7		// move the sentinel along (with 0's behind)
+	lsl	r2,r7		// and the low 32 bits of numerator
+
+// tested out incorrectly in our OS work...
+//	mov	r7,r3		// looking at divisor
+//	ff1	r7		// I can move 32-r7 more bits to left.
+//	addi	r7,1		// ok, one short of that...
+//	mov	r1,r2
+//	lsr	r1,r7		// bits that came from low order...
+//	rsubi	r7,31		// r7 == "32-n" == LEFT distance
+//	addi	r7,1		// this is (32-n)
+//	lsl	r4,r7		// fixes the high 32 (quotient)
+//	lsl	r2,r7
+//	cmpnei	r4,0
+//	bf	4f		// the sentinel went away...
+
+	// run the remaining bits
+1:	lslc	r2,1		// 1 bit left shift of r1-r2
+	addc	r1,r1
+	cmphs	r1,r3		// upper 32 of dividend >= divisor?
+	bf	2f
+	sub	r1,r3		// if yes, subtract divisor
+2:	addc	r4,r4		// shift by 1 and count subtracts
+	bf	1b		// if sentinel falls out of quotient, stop
+
+4:	mov	r2,r4		// return quotient
+	mov	r3,r1		// piggyback the remainder
+	btsti	r5,31		// after adjusting for sign
+	bf	3f
+	rsubi	r2,0
+	rsubi	r3,0
+3:	jmp	r15
+FUNC_END div32
+FUNC_END divsi3
+#endif
+
+#ifdef	L_modsi3
+FUNC_START rem32
+FUNC_START modsi3
+	mov	r5,r2		// calc sign of remainder
+	abs	r2		// do unsigned divide
+	abs	r3
+	movi	r1,0		// r1-r2 form 64 bit dividend
+	movi	r4,1		// r4 is quotient (1 for a sentinel)
+	cmpnei	r3,0		// look for 0 divisor
+	bt	9f
+	trap	3		// divide by 0
+9: 
+	// control iterations; skip across high order 0 bits in dividend
+	mov	r7,r2
+	cmpnei	r7,0
+	bt	8f
+	movi	r2,0		// 0 dividend
+	jmp	r15		// quick return
+8:
+	ff1	r7		// figure distance to skip
+	lsl	r4,r7		// move the sentinel along (with 0's behind)
+	lsl	r2,r7		// and the low 32 bits of numerator
+
+1:	lslc	r2,1		// 1 bit left shift of r1-r2
+	addc	r1,r1
+	cmphs	r1,r3		// upper 32 of dividend >= divisor?
+	bf	2f
+	sub	r1,r3		// if yes, subtract divisor
+2:	addc	r4,r4		// shift by 1 and count subtracts
+	bf	1b		// if sentinel falls out of quotient, stop
+	mov	r2,r1		// return remainder
+	btsti	r5,31		// after adjusting for sign
+	bf	3f
+	rsubi	r2,0
+3:	jmp	r15
+FUNC_END rem32
+FUNC_END modsi3
+#endif
+
+
+/* GCC expects that {__eq,__ne,__gt,__ge,__le,__lt}{df2,sf2}
+   will behave as __cmpdf2. So, we stub the implementations to
+   jump on to __cmpdf2 and __cmpsf2.
+ 
+   All of these shortcircuit the return path so that __cmp{sd}f2
+   will go directly back to the caller.  */
+
+.macro  COMPARE_DF_JUMP name
+	.import SYM (cmpdf2)
+FUNC_START \name
+	jmpi SYM (cmpdf2)
+FUNC_END \name
+.endm
+		
+#ifdef  L_eqdf2
+COMPARE_DF_JUMP eqdf2
+#endif /* L_eqdf2 */
+
+#ifdef  L_nedf2
+COMPARE_DF_JUMP nedf2
+#endif /* L_nedf2 */
+
+#ifdef  L_gtdf2
+COMPARE_DF_JUMP gtdf2
+#endif /* L_gtdf2 */
+
+#ifdef  L_gedf2
+COMPARE_DF_JUMP gedf2
+#endif /* L_gedf2 */
+
+#ifdef  L_ltdf2
+COMPARE_DF_JUMP ltdf2
+#endif /* L_ltdf2 */
+	
+#ifdef  L_ledf2
+COMPARE_DF_JUMP ledf2
+#endif /* L_ledf2 */
+
+/* SINGLE PRECISION FLOATING POINT STUBS */
+
+.macro  COMPARE_SF_JUMP name
+	.import SYM (cmpsf2)
+FUNC_START \name
+	jmpi SYM (cmpsf2)
+FUNC_END \name
+.endm
+		
+#ifdef  L_eqsf2
+COMPARE_SF_JUMP eqsf2
+#endif /* L_eqsf2 */
+	
+#ifdef  L_nesf2
+COMPARE_SF_JUMP nesf2
+#endif /* L_nesf2 */
+	
+#ifdef  L_gtsf2
+COMPARE_SF_JUMP gtsf2
+#endif /* L_gtsf2 */
+	
+#ifdef  L_gesf2
+COMPARE_SF_JUMP __gesf2
+#endif /* L_gesf2 */
+	
+#ifdef  L_ltsf2
+COMPARE_SF_JUMP __ltsf2
+#endif /* L_ltsf2 */
+	
+#ifdef  L_lesf2
+COMPARE_SF_JUMP lesf2
+#endif /* L_lesf2 */
diff --git a/libgcc/config/mcore/t-mcore b/libgcc/config/mcore/t-mcore
new file mode 100644
index 00000000000..19c4c15cd0b
--- /dev/null
+++ b/libgcc/config/mcore/t-mcore
@@ -0,0 +1,2 @@
+LIB1ASMSRC    = mcore/lib1funcs.S
+LIB1ASMFUNCS  = _divsi3 _udivsi3 _modsi3 _umodsi3
diff --git a/libgcc/config/mep/lib1funcs.S b/libgcc/config/mep/lib1funcs.S
new file mode 100644
index 00000000000..0a18913f927
--- /dev/null
+++ b/libgcc/config/mep/lib1funcs.S
@@ -0,0 +1,125 @@
+/* libgcc routines for Toshiba Media Processor.
+   Copyright (C) 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+  
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+  
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define SAVEALL \
+	add3	$sp, $sp, -16*4 ; \
+	sw	$0, ($sp) ; \
+	sw	$1, 4($sp) ; \
+	sw	$2, 8($sp) ; \
+	sw	$3, 12($sp) ; \
+	sw	$4, 16($sp) ; \
+	sw	$5, 20($sp) ; \
+	sw	$6, 24($sp) ; \
+	sw	$7, 28($sp) ; \
+	sw	$8, 32($sp) ; \
+	sw	$9, 36($sp) ; \
+	sw	$10, 40($sp) ; \
+	sw	$11, 44($sp) ; \
+	sw	$12, 48($sp) ; \
+	sw	$13, 52($sp) ; \
+	sw	$14, 56($sp) ; \
+	ldc	$5, $lp	; \
+	add	$5, 3 ; \
+	mov	$6, -4 ; \
+	and	$5, $6
+
+#define RESTOREALL \
+	stc	$5, $lp ; \
+	lw	$14, 56($sp) ; \
+	lw	$13, 52($sp) ; \
+	lw	$12, 48($sp) ; \
+	lw	$11, 44($sp) ; \
+	lw	$10, 40($sp) ; \
+	lw	$9, 36($sp) ; \
+	lw	$8, 32($sp) ; \
+	lw	$7, 28($sp) ; \
+	lw	$6, 24($sp) ; \
+	lw	$5, 20($sp) ; \
+	lw	$4, 16($sp) ; \
+	lw	$3, 12($sp) ; \
+	lw	$2, 8($sp) ; \
+	lw	$1, 4($sp) ; \
+	lw	$0, ($sp) ; \
+	add3	$sp, $sp, 16*4 ; \
+	ret
+
+#ifdef L_mep_profile
+	.text
+	.global __mep_mcount
+__mep_mcount:
+	SAVEALL
+	ldc	$1, $lp
+	mov	$2, $0
+	bsr	__mep_mcount_2
+	RESTOREALL
+#endif
+
+#ifdef L_mep_bb_init_trace
+	.text
+	.global __mep_bb_init_trace_func
+__mep_bb_init_trace_func:
+	SAVEALL
+	lw	$1, ($5)
+	lw	$2, 4($5)
+	add	$5, 8
+	bsr	__bb_init_trace_func
+	RESTOREALL
+#endif
+
+#ifdef L_mep_bb_init
+	.text
+	.global __mep_bb_init_func
+__mep_bb_init_func:
+	SAVEALL
+	lw	$1, ($5)
+	add	$5, 4
+	bsr	__bb_init_func
+	RESTOREALL
+#endif
+
+#ifdef L_mep_bb_trace
+	.text
+	.global __mep_bb_trace_func
+__mep_bb_trace_func:
+	SAVEALL
+	movu	$3, __bb
+	lw	$1, ($5)
+	sw	$1, ($3)
+	lw	$2, 4($5)
+	sw	$2, 4($3)
+	add	$5, 8
+	bsr	__bb_trace_func
+	RESTOREALL
+#endif
+
+#ifdef L_mep_bb_increment
+	.text
+	.global __mep_bb_increment_func
+__mep_bb_increment_func:
+	SAVEALL
+	lw	$1, ($5)
+	lw	$0, ($1)
+	add	$0, 1
+	sw	$0, ($1)
+	add	$5, 4
+	RESTOREALL
+#endif
diff --git a/libgcc/config/mep/t-mep b/libgcc/config/mep/t-mep
index 36e6f5dc771..d1fb094a41e 100644
--- a/libgcc/config/mep/t-mep
+++ b/libgcc/config/mep/t-mep
@@ -1,2 +1,11 @@
+# profiling support
+LIB1ASMSRC = mep/lib1funcs.S
+
+LIB1ASMFUNCS = _mep_profile \
+	       _mep_bb_init_trace \
+	       _mep_bb_init \
+	       _mep_bb_trace \
+	       _mep_bb_increment
+
 # Use -O0 instead of -O2 so we don't get complex relocations
 CRTSTUFF_CFLAGS += -O0
diff --git a/libgcc/config/mips/mips16.S b/libgcc/config/mips/mips16.S
new file mode 100644
index 00000000000..ec331b5f65e
--- /dev/null
+++ b/libgcc/config/mips/mips16.S
@@ -0,0 +1,712 @@
+/* mips16 floating point support code
+   Copyright (C) 1996, 1997, 1998, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Cygnus Support
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* This file contains mips16 floating point support functions.  These
+   functions are called by mips16 code to handle floating point when
+   -msoft-float is not used.  They accept the arguments and return
+   values using the soft-float calling convention, but do the actual
+   operation using the hard floating point instructions.  */
+
+#if defined _MIPS_SIM && (_MIPS_SIM == _ABIO32 || _MIPS_SIM == _ABIO64)
+
+/* This file contains 32-bit assembly code.  */
+	.set nomips16
+
+/* Start a function.  */
+
+#define STARTFN(NAME) .globl NAME; .ent NAME; NAME:
+
+/* Finish a function.  */
+
+#define ENDFN(NAME) .end NAME
+
+/* ARG1
+	The FPR that holds the first floating-point argument.
+
+   ARG2
+	The FPR that holds the second floating-point argument.
+
+   RET
+	The FPR that holds a floating-point return value.  */
+
+#define RET $f0
+#define ARG1 $f12
+#ifdef __mips64
+#define ARG2 $f13
+#else
+#define ARG2 $f14
+#endif
+
+/* Set 64-bit register GPR so that its high 32 bits contain HIGH_FPR
+   and so that its low 32 bits contain LOW_FPR.  */
+#define MERGE_GPRf(GPR, HIGH_FPR, LOW_FPR)	\
+	.set	noat;				\
+	mfc1	$1, LOW_FPR;			\
+	mfc1	GPR, HIGH_FPR;			\
+	dsll	$1, $1, 32;			\
+	dsll	GPR, GPR, 32;			\
+	dsrl	$1, $1, 32;			\
+	or	GPR, GPR, $1;			\
+	.set	at
+
+/* Move the high 32 bits of GPR to HIGH_FPR and the low 32 bits of
+   GPR to LOW_FPR.  */
+#define MERGE_GPRt(GPR, HIGH_FPR, LOW_FPR)	\
+	.set	noat;				\
+	dsrl	$1, GPR, 32;			\
+	mtc1	GPR, LOW_FPR;			\
+	mtc1	$1, HIGH_FPR;			\
+	.set	at
+
+/* Jump to T, and use "OPCODE, OP2" to implement a delayed move.  */
+#define DELAYt(T, OPCODE, OP2)			\
+	.set	noreorder;			\
+	jr	T;				\
+	OPCODE, OP2;				\
+	.set	reorder
+
+/* Use "OPCODE. OP2" and jump to T.  */
+#define DELAYf(T, OPCODE, OP2) OPCODE, OP2; jr T
+
+/* MOVE_SF_BYTE0(D)
+	Move the first single-precision floating-point argument between
+	GPRs and FPRs.
+
+   MOVE_SI_BYTE0(D)
+	Likewise the first single-precision integer argument.
+
+   MOVE_SF_BYTE4(D)
+	Move the second single-precision floating-point argument between
+	GPRs and FPRs, given that the first argument occupies 4 bytes.
+
+   MOVE_SF_BYTE8(D)
+	Move the second single-precision floating-point argument between
+	GPRs and FPRs, given that the first argument occupies 8 bytes.
+
+   MOVE_DF_BYTE0(D)
+	Move the first double-precision floating-point argument between
+	GPRs and FPRs.
+
+   MOVE_DF_BYTE8(D)
+	Likewise the second double-precision floating-point argument.
+
+   MOVE_SF_RET(D, T)
+	Likewise a single-precision floating-point return value,
+	then jump to T.
+
+   MOVE_SC_RET(D, T)
+	Likewise a complex single-precision floating-point return value.
+
+   MOVE_DF_RET(D, T)
+	Likewise a double-precision floating-point return value.
+
+   MOVE_DC_RET(D, T)
+	Likewise a complex double-precision floating-point return value.
+
+   MOVE_SI_RET(D, T)
+	Likewise a single-precision integer return value.
+
+   The D argument is "t" to move to FPRs and "f" to move from FPRs.
+   The return macros may assume that the target of the jump does not
+   use a floating-point register.  */
+
+#define MOVE_SF_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0)
+#define MOVE_SI_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0)
+
+#if defined(__mips64) && defined(__MIPSEB__)
+#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f0, $f1); jr T
+#elif defined(__mips64)
+/* The high 32 bits of $2 correspond to the second word in memory;
+   i.e. the imaginary part.  */
+#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f1, $f0); jr T
+#elif __mips_fpr == 64
+#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1)
+#else
+#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f2)
+#endif
+
+#if defined(__mips64)
+#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12
+#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f13
+#define MOVE_SF_BYTE8(D) m##D##c1 $5,$f13
+#else
+#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12
+#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f14
+#define MOVE_SF_BYTE8(D) m##D##c1 $6,$f14
+#endif
+#define MOVE_SI_BYTE0(D) MOVE_SF_BYTE0(D)
+
+#if defined(__mips64)
+#define MOVE_DF_BYTE0(D) dm##D##c1 $4,$f12
+#define MOVE_DF_BYTE8(D) dm##D##c1 $5,$f13
+#define MOVE_DF_RET(D, T) DELAY##D (T, dm##D##c1 $2,$f0)
+#define MOVE_DC_RET(D, T) dm##D##c1 $3,$f1; MOVE_DF_RET (D, T)
+#elif __mips_fpr == 64 && defined(__MIPSEB__)
+#define MOVE_DF_BYTE0(D) m##D##c1 $5,$f12; m##D##hc1 $4,$f12
+#define MOVE_DF_BYTE8(D) m##D##c1 $7,$f14; m##D##hc1 $6,$f14
+#define MOVE_DF_RET(D, T) m##D##c1 $3,$f0; DELAY##D (T, m##D##hc1 $2,$f0)
+#define MOVE_DC_RET(D, T) m##D##c1 $5,$f1; m##D##hc1 $4,$f1; MOVE_DF_RET (D, T)
+#elif __mips_fpr == 64
+#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##hc1 $5,$f12
+#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##hc1 $7,$f14
+#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##hc1 $3,$f0)
+#define MOVE_DC_RET(D, T) m##D##c1 $4,$f1; m##D##hc1 $5,$f1; MOVE_DF_RET (D, T)
+#elif defined(__MIPSEB__)
+/* FPRs are little-endian.  */
+#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f13; m##D##c1 $5,$f12
+#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f15; m##D##c1 $7,$f14
+#define MOVE_DF_RET(D, T) m##D##c1 $2,$f1; DELAY##D (T, m##D##c1 $3,$f0)
+#define MOVE_DC_RET(D, T) m##D##c1 $4,$f3; m##D##c1 $5,$f2; MOVE_DF_RET (D, T)
+#else
+#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##c1 $5,$f13
+#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##c1 $7,$f15
+#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1)
+#define MOVE_DC_RET(D, T) m##D##c1 $4,$f2; m##D##c1 $5,$f3; MOVE_DF_RET (D, T)
+#endif
+
+/* Single-precision math.  */
+
+/* Define a function NAME that loads two single-precision values,
+   performs FPU operation OPCODE on them, and returns the single-
+   precision result.  */
+
+#define OPSF3(NAME, OPCODE)	\
+STARTFN (NAME);			\
+	MOVE_SF_BYTE0 (t);	\
+	MOVE_SF_BYTE4 (t);	\
+	OPCODE	RET,ARG1,ARG2;	\
+	MOVE_SF_RET (f, $31);	\
+	ENDFN (NAME)
+
+#ifdef L_m16addsf3
+OPSF3 (__mips16_addsf3, add.s)
+#endif
+#ifdef L_m16subsf3
+OPSF3 (__mips16_subsf3, sub.s)
+#endif
+#ifdef L_m16mulsf3
+OPSF3 (__mips16_mulsf3, mul.s)
+#endif
+#ifdef L_m16divsf3
+OPSF3 (__mips16_divsf3, div.s)
+#endif
+
+/* Define a function NAME that loads a single-precision value,
+   performs FPU operation OPCODE on it, and returns the single-
+   precision result.  */
+
+#define OPSF2(NAME, OPCODE)	\
+STARTFN (NAME);			\
+	MOVE_SF_BYTE0 (t);	\
+	OPCODE	RET,ARG1;	\
+	MOVE_SF_RET (f, $31);	\
+	ENDFN (NAME)
+
+#ifdef L_m16negsf2
+OPSF2 (__mips16_negsf2, neg.s)
+#endif
+#ifdef L_m16abssf2
+OPSF2 (__mips16_abssf2, abs.s)
+#endif
+
+/* Single-precision comparisons.  */
+
+/* Define a function NAME that loads two single-precision values,
+   performs floating point comparison OPCODE, and returns TRUE or
+   FALSE depending on the result.  */
+
+#define CMPSF(NAME, OPCODE, TRUE, FALSE)	\
+STARTFN (NAME);					\
+	MOVE_SF_BYTE0 (t);			\
+	MOVE_SF_BYTE4 (t);			\
+	OPCODE	ARG1,ARG2;			\
+	li	$2,TRUE;			\
+	bc1t	1f;				\
+	li	$2,FALSE;			\
+1:;						\
+	j	$31;				\
+	ENDFN (NAME)
+
+/* Like CMPSF, but reverse the comparison operands.  */
+
+#define REVCMPSF(NAME, OPCODE, TRUE, FALSE)	\
+STARTFN (NAME);					\
+	MOVE_SF_BYTE0 (t);			\
+	MOVE_SF_BYTE4 (t);			\
+	OPCODE	ARG2,ARG1;			\
+	li	$2,TRUE;			\
+	bc1t	1f;				\
+	li	$2,FALSE;			\
+1:;						\
+	j	$31;				\
+	ENDFN (NAME)
+
+#ifdef L_m16eqsf2
+CMPSF (__mips16_eqsf2, c.eq.s, 0, 1)
+#endif
+#ifdef L_m16nesf2
+CMPSF (__mips16_nesf2, c.eq.s, 0, 1)
+#endif
+#ifdef L_m16gtsf2
+REVCMPSF (__mips16_gtsf2, c.lt.s, 1, 0)
+#endif
+#ifdef L_m16gesf2
+REVCMPSF (__mips16_gesf2, c.le.s, 0, -1)
+#endif
+#ifdef L_m16lesf2
+CMPSF (__mips16_lesf2, c.le.s, 0, 1)
+#endif
+#ifdef L_m16ltsf2
+CMPSF (__mips16_ltsf2, c.lt.s, -1, 0)
+#endif
+#ifdef L_m16unordsf2
+CMPSF(__mips16_unordsf2, c.un.s, 1, 0)
+#endif
+
+
+/* Single-precision conversions.  */
+
+#ifdef L_m16fltsisf
+STARTFN (__mips16_floatsisf)
+	MOVE_SF_BYTE0 (t)
+	cvt.s.w	RET,ARG1
+	MOVE_SF_RET (f, $31)
+	ENDFN (__mips16_floatsisf)
+#endif
+
+#ifdef L_m16fltunsisf
+STARTFN (__mips16_floatunsisf)
+	.set	noreorder
+	bltz	$4,1f
+	MOVE_SF_BYTE0 (t)
+	.set	reorder
+	cvt.s.w	RET,ARG1
+	MOVE_SF_RET (f, $31)
+1:		
+	and	$2,$4,1
+	srl	$3,$4,1
+	or	$2,$2,$3
+	mtc1	$2,RET
+	cvt.s.w	RET,RET
+	add.s	RET,RET,RET
+	MOVE_SF_RET (f, $31)
+	ENDFN (__mips16_floatunsisf)
+#endif
+	
+#ifdef L_m16fix_truncsfsi
+STARTFN (__mips16_fix_truncsfsi)
+	MOVE_SF_BYTE0 (t)
+	trunc.w.s RET,ARG1,$4
+	MOVE_SI_RET (f, $31)
+	ENDFN (__mips16_fix_truncsfsi)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+
+/* Double-precision math.  */
+
+/* Define a function NAME that loads two double-precision values,
+   performs FPU operation OPCODE on them, and returns the double-
+   precision result.  */
+
+#define OPDF3(NAME, OPCODE)	\
+STARTFN (NAME);			\
+	MOVE_DF_BYTE0 (t);	\
+	MOVE_DF_BYTE8 (t);	\
+	OPCODE RET,ARG1,ARG2;	\
+	MOVE_DF_RET (f, $31);	\
+	ENDFN (NAME)
+
+#ifdef L_m16adddf3
+OPDF3 (__mips16_adddf3, add.d)
+#endif
+#ifdef L_m16subdf3
+OPDF3 (__mips16_subdf3, sub.d)
+#endif
+#ifdef L_m16muldf3
+OPDF3 (__mips16_muldf3, mul.d)
+#endif
+#ifdef L_m16divdf3
+OPDF3 (__mips16_divdf3, div.d)
+#endif
+
+/* Define a function NAME that loads a double-precision value,
+   performs FPU operation OPCODE on it, and returns the double-
+   precision result.  */
+
+#define OPDF2(NAME, OPCODE)	\
+STARTFN (NAME);			\
+	MOVE_DF_BYTE0 (t);	\
+	OPCODE RET,ARG1;	\
+	MOVE_DF_RET (f, $31);	\
+	ENDFN (NAME)
+
+#ifdef L_m16negdf2
+OPDF2 (__mips16_negdf2, neg.d)
+#endif
+#ifdef L_m16absdf2
+OPDF2 (__mips16_absdf2, abs.d)
+#endif
+
+/* Conversions between single and double precision.  */
+
+#ifdef L_m16extsfdf2
+STARTFN (__mips16_extendsfdf2)
+	MOVE_SF_BYTE0 (t)
+	cvt.d.s	RET,ARG1
+	MOVE_DF_RET (f, $31)
+	ENDFN (__mips16_extendsfdf2)
+#endif
+
+#ifdef L_m16trdfsf2
+STARTFN (__mips16_truncdfsf2)
+	MOVE_DF_BYTE0 (t)
+	cvt.s.d	RET,ARG1
+	MOVE_SF_RET (f, $31)
+	ENDFN (__mips16_truncdfsf2)
+#endif
+
+/* Double-precision comparisons.  */
+
+/* Define a function NAME that loads two double-precision values,
+   performs floating point comparison OPCODE, and returns TRUE or
+   FALSE depending on the result.  */
+
+#define CMPDF(NAME, OPCODE, TRUE, FALSE)	\
+STARTFN (NAME);					\
+	MOVE_DF_BYTE0 (t);			\
+	MOVE_DF_BYTE8 (t);			\
+	OPCODE	ARG1,ARG2;			\
+	li	$2,TRUE;			\
+	bc1t	1f;				\
+	li	$2,FALSE;			\
+1:;						\
+	j	$31;				\
+	ENDFN (NAME)
+
+/* Like CMPDF, but reverse the comparison operands.  */
+
+#define REVCMPDF(NAME, OPCODE, TRUE, FALSE)	\
+STARTFN (NAME);					\
+	MOVE_DF_BYTE0 (t);			\
+	MOVE_DF_BYTE8 (t);			\
+	OPCODE	ARG2,ARG1;			\
+	li	$2,TRUE;			\
+	bc1t	1f;				\
+	li	$2,FALSE;			\
+1:;						\
+	j	$31;				\
+	ENDFN (NAME)
+
+#ifdef L_m16eqdf2
+CMPDF (__mips16_eqdf2, c.eq.d, 0, 1)
+#endif
+#ifdef L_m16nedf2
+CMPDF (__mips16_nedf2, c.eq.d, 0, 1)
+#endif
+#ifdef L_m16gtdf2
+REVCMPDF (__mips16_gtdf2, c.lt.d, 1, 0)
+#endif
+#ifdef L_m16gedf2
+REVCMPDF (__mips16_gedf2, c.le.d, 0, -1)
+#endif
+#ifdef L_m16ledf2
+CMPDF (__mips16_ledf2, c.le.d, 0, 1)
+#endif
+#ifdef L_m16ltdf2
+CMPDF (__mips16_ltdf2, c.lt.d, -1, 0)
+#endif
+#ifdef L_m16unorddf2
+CMPDF(__mips16_unorddf2, c.un.d, 1, 0)
+#endif
+
+/* Double-precision conversions.  */
+
+#ifdef L_m16fltsidf
+STARTFN (__mips16_floatsidf)
+	MOVE_SI_BYTE0 (t)
+	cvt.d.w	RET,ARG1
+	MOVE_DF_RET (f, $31)
+	ENDFN (__mips16_floatsidf)
+#endif
+	
+#ifdef L_m16fltunsidf
+STARTFN (__mips16_floatunsidf)
+	MOVE_SI_BYTE0 (t)
+	cvt.d.w RET,ARG1
+	bgez	$4,1f
+	li.d	ARG1, 4.294967296e+9
+	add.d	RET, RET, ARG1
+1:	MOVE_DF_RET (f, $31)
+	ENDFN (__mips16_floatunsidf)
+#endif
+	
+#ifdef L_m16fix_truncdfsi
+STARTFN (__mips16_fix_truncdfsi)
+	MOVE_DF_BYTE0 (t)
+	trunc.w.d RET,ARG1,$4
+	MOVE_SI_RET (f, $31)
+	ENDFN (__mips16_fix_truncdfsi)
+#endif
+#endif /* !__mips_single_float */
+
+/* Define a function NAME that moves a return value of mode MODE from
+   FPRs to GPRs.  */
+
+#define RET_FUNCTION(NAME, MODE)	\
+STARTFN (NAME);				\
+	MOVE_##MODE##_RET (t, $31);	\
+	ENDFN (NAME)
+
+#ifdef L_m16retsf
+RET_FUNCTION (__mips16_ret_sf, SF)
+#endif
+
+#ifdef L_m16retsc
+RET_FUNCTION (__mips16_ret_sc, SC)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16retdf
+RET_FUNCTION (__mips16_ret_df, DF)
+#endif
+
+#ifdef L_m16retdc
+RET_FUNCTION (__mips16_ret_dc, DC)
+#endif
+#endif /* !__mips_single_float */
+
+/* STUB_ARGS_X copies the arguments from GPRs to FPRs for argument
+   code X.  X is calculated as ARG1 + ARG2 * 4, where ARG1 and ARG2
+   classify the first and second arguments as follows:
+
+	1: a single-precision argument
+	2: a double-precision argument
+	0: no argument, or not one of the above.  */
+
+#define STUB_ARGS_0						/* () */
+#define STUB_ARGS_1 MOVE_SF_BYTE0 (t)				/* (sf) */
+#define STUB_ARGS_5 MOVE_SF_BYTE0 (t); MOVE_SF_BYTE4 (t)	/* (sf, sf) */
+#define STUB_ARGS_9 MOVE_SF_BYTE0 (t); MOVE_DF_BYTE8 (t)	/* (sf, df) */
+#define STUB_ARGS_2 MOVE_DF_BYTE0 (t)				/* (df) */
+#define STUB_ARGS_6 MOVE_DF_BYTE0 (t); MOVE_SF_BYTE8 (t)	/* (df, sf) */
+#define STUB_ARGS_10 MOVE_DF_BYTE0 (t); MOVE_DF_BYTE8 (t)	/* (df, df) */
+
+/* These functions are used by 16-bit code when calling via a function
+   pointer.  They must copy the floating point arguments from the GPRs
+   to FPRs and then call function $2.  */
+
+#define CALL_STUB_NO_RET(NAME, CODE)	\
+STARTFN (NAME);				\
+	STUB_ARGS_##CODE;		\
+	.set	noreorder;		\
+	jr	$2;			\
+	move	$25,$2;			\
+	.set	reorder;		\
+	ENDFN (NAME)
+
+#ifdef L_m16stub1
+CALL_STUB_NO_RET (__mips16_call_stub_1, 1)
+#endif
+
+#ifdef L_m16stub5
+CALL_STUB_NO_RET (__mips16_call_stub_5, 5)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+
+#ifdef L_m16stub2
+CALL_STUB_NO_RET (__mips16_call_stub_2, 2)
+#endif
+
+#ifdef L_m16stub6
+CALL_STUB_NO_RET (__mips16_call_stub_6, 6)
+#endif
+
+#ifdef L_m16stub9
+CALL_STUB_NO_RET (__mips16_call_stub_9, 9)
+#endif
+
+#ifdef L_m16stub10
+CALL_STUB_NO_RET (__mips16_call_stub_10, 10)
+#endif
+#endif /* !__mips_single_float */
+
+/* Now we have the same set of functions, except that this time the
+   function being called returns an SFmode, SCmode, DFmode or DCmode
+   value; we need to instantiate a set for each case.  The calling
+   function will arrange to preserve $18, so these functions are free
+   to use it to hold the return address.
+
+   Note that we do not know whether the function we are calling is 16
+   bit or 32 bit.  However, it does not matter, because 16-bit
+   functions always return floating point values in both the gp and
+   the fp regs.  It would be possible to check whether the function
+   being called is 16 bits, in which case the copy is unnecessary;
+   however, it's faster to always do the copy.  */
+
+#define CALL_STUB_RET(NAME, CODE, MODE)	\
+STARTFN (NAME);				\
+	move	$18,$31;		\
+	STUB_ARGS_##CODE;		\
+	.set	noreorder;		\
+	jalr	$2;			\
+	move	$25,$2;			\
+	.set	reorder;		\
+	MOVE_##MODE##_RET (f, $18);	\
+	ENDFN (NAME)
+
+/* First, instantiate the single-float set.  */
+
+#ifdef L_m16stubsf0
+CALL_STUB_RET (__mips16_call_stub_sf_0, 0, SF)
+#endif
+
+#ifdef L_m16stubsf1
+CALL_STUB_RET (__mips16_call_stub_sf_1, 1, SF)
+#endif
+
+#ifdef L_m16stubsf5
+CALL_STUB_RET (__mips16_call_stub_sf_5, 5, SF)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16stubsf2
+CALL_STUB_RET (__mips16_call_stub_sf_2, 2, SF)
+#endif
+
+#ifdef L_m16stubsf6
+CALL_STUB_RET (__mips16_call_stub_sf_6, 6, SF)
+#endif
+
+#ifdef L_m16stubsf9
+CALL_STUB_RET (__mips16_call_stub_sf_9, 9, SF)
+#endif
+
+#ifdef L_m16stubsf10
+CALL_STUB_RET (__mips16_call_stub_sf_10, 10, SF)
+#endif
+#endif /* !__mips_single_float */
+
+
+/* Now we have the same set of functions again, except that this time
+   the function being called returns an DFmode value.  */
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16stubdf0
+CALL_STUB_RET (__mips16_call_stub_df_0, 0, DF)
+#endif
+
+#ifdef L_m16stubdf1
+CALL_STUB_RET (__mips16_call_stub_df_1, 1, DF)
+#endif
+
+#ifdef L_m16stubdf5
+CALL_STUB_RET (__mips16_call_stub_df_5, 5, DF)
+#endif
+
+#ifdef L_m16stubdf2
+CALL_STUB_RET (__mips16_call_stub_df_2, 2, DF)
+#endif
+
+#ifdef L_m16stubdf6
+CALL_STUB_RET (__mips16_call_stub_df_6, 6, DF)
+#endif
+
+#ifdef L_m16stubdf9
+CALL_STUB_RET (__mips16_call_stub_df_9, 9, DF)
+#endif
+
+#ifdef L_m16stubdf10
+CALL_STUB_RET (__mips16_call_stub_df_10, 10, DF)
+#endif
+#endif /* !__mips_single_float */
+
+
+/* Ho hum.  Here we have the same set of functions again, this time
+   for when the function being called returns an SCmode value.  */
+
+#ifdef L_m16stubsc0
+CALL_STUB_RET (__mips16_call_stub_sc_0, 0, SC)
+#endif
+
+#ifdef L_m16stubsc1
+CALL_STUB_RET (__mips16_call_stub_sc_1, 1, SC)
+#endif
+
+#ifdef L_m16stubsc5
+CALL_STUB_RET (__mips16_call_stub_sc_5, 5, SC)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16stubsc2
+CALL_STUB_RET (__mips16_call_stub_sc_2, 2, SC)
+#endif
+
+#ifdef L_m16stubsc6
+CALL_STUB_RET (__mips16_call_stub_sc_6, 6, SC)
+#endif
+
+#ifdef L_m16stubsc9
+CALL_STUB_RET (__mips16_call_stub_sc_9, 9, SC)
+#endif
+
+#ifdef L_m16stubsc10
+CALL_STUB_RET (__mips16_call_stub_sc_10, 10, SC)
+#endif
+#endif /* !__mips_single_float */
+
+
+/* Finally, another set of functions for DCmode.  */
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16stubdc0
+CALL_STUB_RET (__mips16_call_stub_dc_0, 0, DC)
+#endif
+
+#ifdef L_m16stubdc1
+CALL_STUB_RET (__mips16_call_stub_dc_1, 1, DC)
+#endif
+
+#ifdef L_m16stubdc5
+CALL_STUB_RET (__mips16_call_stub_dc_5, 5, DC)
+#endif
+
+#ifdef L_m16stubdc2
+CALL_STUB_RET (__mips16_call_stub_dc_2, 2, DC)
+#endif
+
+#ifdef L_m16stubdc6
+CALL_STUB_RET (__mips16_call_stub_dc_6, 6, DC)
+#endif
+
+#ifdef L_m16stubdc9
+CALL_STUB_RET (__mips16_call_stub_dc_9, 9, DC)
+#endif
+
+#ifdef L_m16stubdc10
+CALL_STUB_RET (__mips16_call_stub_dc_10, 10, DC)
+#endif
+#endif /* !__mips_single_float */
+#endif
diff --git a/libgcc/config/mips/t-mips16 b/libgcc/config/mips/t-mips16
index 46c7472f5f6..5553ed76e2d 100644
--- a/libgcc/config/mips/t-mips16
+++ b/libgcc/config/mips/t-mips16
@@ -1,3 +1,43 @@
+# Copyright (C) 2007, 2008, 2011 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC = mips/mips16.S
+LIB1ASMFUNCS = _m16addsf3 _m16subsf3 _m16mulsf3 _m16divsf3 \
+	_m16eqsf2 _m16nesf2 _m16gtsf2 _m16gesf2 _m16lesf2 _m16ltsf2 \
+	_m16unordsf2 \
+	_m16fltsisf _m16fix_truncsfsi _m16fltunsisf \
+	_m16adddf3 _m16subdf3 _m16muldf3 _m16divdf3 \
+	_m16extsfdf2 _m16trdfsf2 \
+	_m16eqdf2 _m16nedf2 _m16gtdf2 _m16gedf2 _m16ledf2 _m16ltdf2 \
+	_m16unorddf2 \
+	_m16fltsidf _m16fix_truncdfsi _m16fltunsidf \
+	_m16retsf _m16retdf \
+	_m16retsc _m16retdc \
+	_m16stub1 _m16stub2 _m16stub5 _m16stub6 _m16stub9 _m16stub10 \
+	_m16stubsf0 _m16stubsf1 _m16stubsf2 _m16stubsf5 _m16stubsf6 \
+	_m16stubsf9 _m16stubsf10 \
+	_m16stubdf0 _m16stubdf1 _m16stubdf2 _m16stubdf5 _m16stubdf6 \
+	_m16stubdf9 _m16stubdf10 \
+	_m16stubsc0 _m16stubsc1 _m16stubsc2 _m16stubsc5 _m16stubsc6 \
+	_m16stubsc9 _m16stubsc10 \
+	_m16stubdc0 _m16stubdc1 _m16stubdc2 _m16stubdc5 _m16stubdc6 \
+	_m16stubdc9 _m16stubdc10
+
 SYNC = yes
 SYNC_CFLAGS = -mno-mips16
 
diff --git a/libgcc/config/pa/milli64.S b/libgcc/config/pa/milli64.S
new file mode 100644
index 00000000000..2e9c4f741b6
--- /dev/null
+++ b/libgcc/config/pa/milli64.S
@@ -0,0 +1,2134 @@
+/* 32 and 64-bit millicode, original author Hewlett-Packard
+   adapted for gcc by Paul Bame <bame@debian.org>
+   and Alan Modra <alan@linuxcare.com.au>.
+
+   Copyright 2001, 2002, 2003, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef pa64
+        .level  2.0w
+#endif
+
+/* Hardware General Registers.  */
+r0:	.reg	%r0
+r1:	.reg	%r1
+r2:	.reg	%r2
+r3:	.reg	%r3
+r4:	.reg	%r4
+r5:	.reg	%r5
+r6:	.reg	%r6
+r7:	.reg	%r7
+r8:	.reg	%r8
+r9:	.reg	%r9
+r10:	.reg	%r10
+r11:	.reg	%r11
+r12:	.reg	%r12
+r13:	.reg	%r13
+r14:	.reg	%r14
+r15:	.reg	%r15
+r16:	.reg	%r16
+r17:	.reg	%r17
+r18:	.reg	%r18
+r19:	.reg	%r19
+r20:	.reg	%r20
+r21:	.reg	%r21
+r22:	.reg	%r22
+r23:	.reg	%r23
+r24:	.reg	%r24
+r25:	.reg	%r25
+r26:	.reg	%r26
+r27:	.reg	%r27
+r28:	.reg	%r28
+r29:	.reg	%r29
+r30:	.reg	%r30
+r31:	.reg	%r31
+
+/* Hardware Space Registers.  */
+sr0:	.reg	%sr0
+sr1:	.reg	%sr1
+sr2:	.reg	%sr2
+sr3:	.reg	%sr3
+sr4:	.reg	%sr4
+sr5:	.reg	%sr5
+sr6:	.reg	%sr6
+sr7:	.reg	%sr7
+
+/* Hardware Floating Point Registers.  */
+fr0:	.reg	%fr0
+fr1:	.reg	%fr1
+fr2:	.reg	%fr2
+fr3:	.reg	%fr3
+fr4:	.reg	%fr4
+fr5:	.reg	%fr5
+fr6:	.reg	%fr6
+fr7:	.reg	%fr7
+fr8:	.reg	%fr8
+fr9:	.reg	%fr9
+fr10:	.reg	%fr10
+fr11:	.reg	%fr11
+fr12:	.reg	%fr12
+fr13:	.reg	%fr13
+fr14:	.reg	%fr14
+fr15:	.reg	%fr15
+
+/* Hardware Control Registers.  */
+cr11:	.reg	%cr11
+sar:	.reg	%cr11	/* Shift Amount Register */
+
+/* Software Architecture General Registers.  */
+rp:	.reg    r2	/* return pointer */
+#ifdef pa64
+mrp:	.reg	r2 	/* millicode return pointer */
+#else
+mrp:	.reg	r31	/* millicode return pointer */
+#endif
+ret0:	.reg    r28	/* return value */
+ret1:	.reg    r29	/* return value (high part of double) */
+sp:	.reg 	r30	/* stack pointer */
+dp:	.reg	r27	/* data pointer */
+arg0:	.reg	r26	/* argument */
+arg1:	.reg	r25	/* argument or high part of double argument */
+arg2:	.reg	r24	/* argument */
+arg3:	.reg	r23	/* argument or high part of double argument */
+
+/* Software Architecture Space Registers.  */
+/* 		sr0	; return link from BLE */
+sret:	.reg	sr1	/* return value */
+sarg:	.reg	sr1	/* argument */
+/* 		sr4	; PC SPACE tracker */
+/* 		sr5	; process private data */
+
+/* Frame Offsets (millicode convention!)  Used when calling other
+   millicode routines.  Stack unwinding is dependent upon these
+   definitions.  */
+r31_slot:	.equ	-20	/* "current RP" slot */
+sr0_slot:	.equ	-16     /* "static link" slot */
+#if defined(pa64)
+mrp_slot:       .equ    -16	/* "current RP" slot */
+psp_slot:       .equ    -8	/* "previous SP" slot */
+#else
+mrp_slot:	.equ	-20     /* "current RP" slot (replacing "r31_slot") */
+#endif
+
+
+#define DEFINE(name,value)name:	.EQU	value
+#define RDEFINE(name,value)name:	.REG	value
+#ifdef milliext
+#define MILLI_BE(lbl)   BE    lbl(sr7,r0)
+#define MILLI_BEN(lbl)  BE,n  lbl(sr7,r0)
+#define MILLI_BLE(lbl)	BLE   lbl(sr7,r0)
+#define MILLI_BLEN(lbl)	BLE,n lbl(sr7,r0)
+#define MILLIRETN	BE,n  0(sr0,mrp)
+#define MILLIRET	BE    0(sr0,mrp)
+#define MILLI_RETN	BE,n  0(sr0,mrp)
+#define MILLI_RET	BE    0(sr0,mrp)
+#else
+#define MILLI_BE(lbl)	B     lbl
+#define MILLI_BEN(lbl)  B,n   lbl
+#define MILLI_BLE(lbl)	BL    lbl,mrp
+#define MILLI_BLEN(lbl)	BL,n  lbl,mrp
+#define MILLIRETN	BV,n  0(mrp)
+#define MILLIRET	BV    0(mrp)
+#define MILLI_RETN	BV,n  0(mrp)
+#define MILLI_RET	BV    0(mrp)
+#endif
+
+#ifdef __STDC__
+#define CAT(a,b)	a##b
+#else
+#define CAT(a,b)	a/**/b
+#endif
+
+#ifdef ELF
+#define SUBSPA_MILLI	 .section .text
+#define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16
+#define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16
+#define ATTR_MILLI
+#define SUBSPA_DATA	 .section .data
+#define ATTR_DATA
+#define GLOBAL		 $global$
+#define GSYM(sym) 	 !sym:
+#define LSYM(sym)	 !CAT(.L,sym:)
+#define LREF(sym)	 CAT(.L,sym)
+
+#else
+
+#ifdef coff
+/* This used to be .milli but since link32 places different named
+   sections in different segments millicode ends up a long ways away
+   from .text (1meg?).  This way they will be a lot closer.
+
+   The SUBSPA_MILLI_* specify locality sets for certain millicode
+   modules in order to ensure that modules that call one another are
+   placed close together. Without locality sets this is unlikely to
+   happen because of the Dynamite linker library search algorithm. We
+   want these modules close together so that short calls always reach
+   (we don't want to require long calls or use long call stubs).  */
+
+#define SUBSPA_MILLI	 .subspa .text
+#define SUBSPA_MILLI_DIV .subspa .text$dv,align=16
+#define SUBSPA_MILLI_MUL .subspa .text$mu,align=16
+#define ATTR_MILLI	 .attr code,read,execute
+#define SUBSPA_DATA	 .subspa .data
+#define ATTR_DATA	 .attr init_data,read,write
+#define GLOBAL		 _gp
+#else
+#define SUBSPA_MILLI	 .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8
+#define SUBSPA_MILLI_DIV SUBSPA_MILLI
+#define SUBSPA_MILLI_MUL SUBSPA_MILLI
+#define ATTR_MILLI
+#define SUBSPA_DATA	 .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero
+#define ATTR_DATA
+#define GLOBAL		 $global$
+#endif
+#define SPACE_DATA	 .space $PRIVATE$,spnum=1,sort=16
+
+#define GSYM(sym)	 !sym
+#define LSYM(sym)	 !CAT(L$,sym)
+#define LREF(sym)	 CAT(L$,sym)
+#endif
+
+#ifdef L_dyncall
+	SUBSPA_MILLI
+	ATTR_DATA
+GSYM($$dyncall)
+	.export $$dyncall,millicode
+	.proc
+	.callinfo	millicode
+	.entry
+	bb,>=,n %r22,30,LREF(1)		; branch if not plabel address
+	depi	0,31,2,%r22		; clear the two least significant bits
+	ldw	4(%r22),%r19		; load new LTP value
+	ldw	0(%r22),%r22		; load address of target
+LSYM(1)
+#ifdef LINUX
+	bv	%r0(%r22)		; branch to the real target
+#else
+	ldsid	(%sr0,%r22),%r1		; get the "space ident" selected by r22
+	mtsp	%r1,%sr0		; move that space identifier into sr0
+	be	0(%sr0,%r22)		; branch to the real target
+#endif
+	stw	%r2,-24(%r30)		; save return address into frame marker
+	.exit
+	.procend
+#endif
+
+#ifdef L_divI
+/* ROUTINES:	$$divI, $$divoI
+
+   Single precision divide for signed binary integers.
+
+   The quotient is truncated towards zero.
+   The sign of the quotient is the XOR of the signs of the dividend and
+   divisor.
+   Divide by zero is trapped.
+   Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI.
+
+   INPUT REGISTERS:
+   .	arg0 ==	dividend
+   .	arg1 ==	divisor
+   .	mrp  == return pc
+   .	sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .	arg0 =	undefined
+   .	arg1 =	undefined
+   .	ret1 =	quotient
+
+   OTHER REGISTERS AFFECTED:
+   .	r1   =	undefined
+
+   SIDE EFFECTS:
+   .	Causes a trap under the following conditions:
+   .		divisor is zero  (traps with ADDIT,=  0,25,0)
+   .		dividend==-2**31  and divisor==-1 and routine is $$divoI
+   .				 (traps with ADDO  26,25,0)
+   .	Changes memory at the following places:
+   .		NONE
+
+   PERMISSIBLE CONTEXT:
+   .	Unwindable.
+   .	Suitable for internal or external millicode.
+   .	Assumes the special millicode register conventions.
+
+   DISCUSSION:
+   .	Branchs to other millicode routines using BE
+   .		$$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15
+   .
+   .	For selected divisors, calls a divide by constant routine written by
+   .	Karl Pettis.  Eligible divisors are 1..15 excluding 11 and 13.
+   .
+   .	The only overflow case is -2**31 divided by -1.
+   .	Both routines return -2**31 but only $$divoI traps.  */
+
+RDEFINE(temp,r1)
+RDEFINE(retreg,ret1)	/*  r29 */
+RDEFINE(temp1,arg0)
+	SUBSPA_MILLI_DIV
+	ATTR_MILLI
+	.import $$divI_2,millicode
+	.import $$divI_3,millicode
+	.import $$divI_4,millicode
+	.import $$divI_5,millicode
+	.import $$divI_6,millicode
+	.import $$divI_7,millicode
+	.import $$divI_8,millicode
+	.import $$divI_9,millicode
+	.import $$divI_10,millicode
+	.import $$divI_12,millicode
+	.import $$divI_14,millicode
+	.import $$divI_15,millicode
+	.export $$divI,millicode
+	.export	$$divoI,millicode
+	.proc
+	.callinfo	millicode
+	.entry
+GSYM($$divoI)
+	comib,=,n  -1,arg1,LREF(negative1)	/*  when divisor == -1 */
+GSYM($$divI)
+	ldo	-1(arg1),temp		/*  is there at most one bit set ? */
+	and,<>	arg1,temp,r0		/*  if not, don't use power of 2 divide */
+	addi,>	0,arg1,r0		/*  if divisor > 0, use power of 2 divide */
+	b,n	LREF(neg_denom)
+LSYM(pow2)
+	addi,>=	0,arg0,retreg		/*  if numerator is negative, add the */
+	add	arg0,temp,retreg	/*  (denominaotr -1) to correct for shifts */
+	extru,=	arg1,15,16,temp		/*  test denominator with 0xffff0000 */
+	extrs	retreg,15,16,retreg	/*  retreg = retreg >> 16 */
+	or	arg1,temp,arg1		/*  arg1 = arg1 | (arg1 >> 16) */
+	ldi	0xcc,temp1		/*  setup 0xcc in temp1 */
+	extru,= arg1,23,8,temp		/*  test denominator with 0xff00 */
+	extrs	retreg,23,24,retreg	/*  retreg = retreg >> 8 */
+	or	arg1,temp,arg1		/*  arg1 = arg1 | (arg1 >> 8) */
+	ldi	0xaa,temp		/*  setup 0xaa in temp */
+	extru,= arg1,27,4,r0		/*  test denominator with 0xf0 */
+	extrs	retreg,27,28,retreg	/*  retreg = retreg >> 4 */
+	and,=	arg1,temp1,r0		/*  test denominator with 0xcc */
+	extrs	retreg,29,30,retreg	/*  retreg = retreg >> 2 */
+	and,=	arg1,temp,r0		/*  test denominator with 0xaa */
+	extrs	retreg,30,31,retreg	/*  retreg = retreg >> 1 */
+	MILLIRETN
+LSYM(neg_denom)
+	addi,<	0,arg1,r0		/*  if arg1 >= 0, it's not power of 2 */
+	b,n	LREF(regular_seq)
+	sub	r0,arg1,temp		/*  make denominator positive */
+	comb,=,n  arg1,temp,LREF(regular_seq)	/*  test against 0x80000000 and 0 */
+	ldo	-1(temp),retreg		/*  is there at most one bit set ? */
+	and,=	temp,retreg,r0		/*  if so, the denominator is power of 2 */
+	b,n	LREF(regular_seq)
+	sub	r0,arg0,retreg		/*  negate numerator */
+	comb,=,n arg0,retreg,LREF(regular_seq) /*  test against 0x80000000 */
+	copy	retreg,arg0		/*  set up arg0, arg1 and temp	*/
+	copy	temp,arg1		/*  before branching to pow2 */
+	b	LREF(pow2)
+	ldo	-1(arg1),temp
+LSYM(regular_seq)
+	comib,>>=,n 15,arg1,LREF(small_divisor)
+	add,>=	0,arg0,retreg		/*  move dividend, if retreg < 0, */
+LSYM(normal)
+	subi	0,retreg,retreg		/*    make it positive */
+	sub	0,arg1,temp		/*  clear carry,  */
+					/*    negate the divisor */
+	ds	0,temp,0		/*  set V-bit to the comple- */
+					/*    ment of the divisor sign */
+	add	retreg,retreg,retreg	/*  shift msb bit into carry */
+	ds	r0,arg1,temp		/*  1st divide step, if no carry */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  2nd divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  3rd divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  4th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  5th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  6th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  7th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  8th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  9th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  10th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  11th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  12th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  13th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  14th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  15th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  16th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  17th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  18th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  19th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  20th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  21st divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  22nd divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  23rd divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  24th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  25th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  26th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  27th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  28th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  29th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  30th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  31st divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  32nd divide step, */
+	addc	retreg,retreg,retreg	/*  shift last retreg bit into retreg */
+	xor,>=	arg0,arg1,0		/*  get correct sign of quotient */
+	  sub	0,retreg,retreg		/*    based on operand signs */
+	MILLIRETN
+	nop
+
+LSYM(small_divisor)
+
+#if defined(pa64)
+/*  Clear the upper 32 bits of the arg1 register.  We are working with	*/
+/*  small divisors (and 32-bit integers)   We must not be mislead  */
+/*  by "1" bits left in the upper 32 bits.  */
+	depd %r0,31,32,%r25
+#endif
+	blr,n	arg1,r0
+	nop
+/*  table for divisor == 0,1, ... ,15 */
+	addit,=	0,arg1,r0	/*  trap if divisor == 0 */
+	nop
+	MILLIRET		/*  divisor == 1 */
+	copy	arg0,retreg
+	MILLI_BEN($$divI_2)	/*  divisor == 2 */
+	nop
+	MILLI_BEN($$divI_3)	/*  divisor == 3 */
+	nop
+	MILLI_BEN($$divI_4)	/*  divisor == 4 */
+	nop
+	MILLI_BEN($$divI_5)	/*  divisor == 5 */
+	nop
+	MILLI_BEN($$divI_6)	/*  divisor == 6 */
+	nop
+	MILLI_BEN($$divI_7)	/*  divisor == 7 */
+	nop
+	MILLI_BEN($$divI_8)	/*  divisor == 8 */
+	nop
+	MILLI_BEN($$divI_9)	/*  divisor == 9 */
+	nop
+	MILLI_BEN($$divI_10)	/*  divisor == 10 */
+	nop
+	b	LREF(normal)		/*  divisor == 11 */
+	add,>=	0,arg0,retreg
+	MILLI_BEN($$divI_12)	/*  divisor == 12 */
+	nop
+	b	LREF(normal)		/*  divisor == 13 */
+	add,>=	0,arg0,retreg
+	MILLI_BEN($$divI_14)	/*  divisor == 14 */
+	nop
+	MILLI_BEN($$divI_15)	/*  divisor == 15 */
+	nop
+
+LSYM(negative1)
+	sub	0,arg0,retreg	/*  result is negation of dividend */
+	MILLIRET
+	addo	arg0,arg1,r0	/*  trap iff dividend==0x80000000 && divisor==-1 */
+	.exit
+	.procend
+	.end
+#endif
+
+#ifdef L_divU
+/* ROUTINE:	$$divU
+   .
+   .	Single precision divide for unsigned integers.
+   .
+   .	Quotient is truncated towards zero.
+   .	Traps on divide by zero.
+
+   INPUT REGISTERS:
+   .	arg0 ==	dividend
+   .	arg1 ==	divisor
+   .	mrp  == return pc
+   .	sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .	arg0 =	undefined
+   .	arg1 =	undefined
+   .	ret1 =	quotient
+
+   OTHER REGISTERS AFFECTED:
+   .	r1   =	undefined
+
+   SIDE EFFECTS:
+   .	Causes a trap under the following conditions:
+   .		divisor is zero
+   .	Changes memory at the following places:
+   .		NONE
+
+   PERMISSIBLE CONTEXT:
+   .	Unwindable.
+   .	Does not create a stack frame.
+   .	Suitable for internal or external millicode.
+   .	Assumes the special millicode register conventions.
+
+   DISCUSSION:
+   .	Branchs to other millicode routines using BE:
+   .		$$divU_# for 3,5,6,7,9,10,12,14,15
+   .
+   .	For selected small divisors calls the special divide by constant
+   .	routines written by Karl Pettis.  These are: 3,5,6,7,9,10,12,14,15.  */
+
+RDEFINE(temp,r1)
+RDEFINE(retreg,ret1)	/* r29 */
+RDEFINE(temp1,arg0)
+	SUBSPA_MILLI_DIV
+	ATTR_MILLI
+	.export $$divU,millicode
+	.import $$divU_3,millicode
+	.import $$divU_5,millicode
+	.import $$divU_6,millicode
+	.import $$divU_7,millicode
+	.import $$divU_9,millicode
+	.import $$divU_10,millicode
+	.import $$divU_12,millicode
+	.import $$divU_14,millicode
+	.import $$divU_15,millicode
+	.proc
+	.callinfo	millicode
+	.entry
+GSYM($$divU)
+/* The subtract is not nullified since it does no harm and can be used
+   by the two cases that branch back to "normal".  */
+	ldo	-1(arg1),temp		/* is there at most one bit set ? */
+	and,=	arg1,temp,r0		/* if so, denominator is power of 2 */
+	b	LREF(regular_seq)
+	addit,=	0,arg1,0		/* trap for zero dvr */
+	copy	arg0,retreg
+	extru,= arg1,15,16,temp		/* test denominator with 0xffff0000 */
+	extru	retreg,15,16,retreg	/* retreg = retreg >> 16 */
+	or	arg1,temp,arg1		/* arg1 = arg1 | (arg1 >> 16) */
+	ldi	0xcc,temp1		/* setup 0xcc in temp1 */
+	extru,= arg1,23,8,temp		/* test denominator with 0xff00 */
+	extru	retreg,23,24,retreg	/* retreg = retreg >> 8 */
+	or	arg1,temp,arg1		/* arg1 = arg1 | (arg1 >> 8) */
+	ldi	0xaa,temp		/* setup 0xaa in temp */
+	extru,= arg1,27,4,r0		/* test denominator with 0xf0 */
+	extru	retreg,27,28,retreg	/* retreg = retreg >> 4 */
+	and,=	arg1,temp1,r0		/* test denominator with 0xcc */
+	extru	retreg,29,30,retreg	/* retreg = retreg >> 2 */
+	and,=	arg1,temp,r0		/* test denominator with 0xaa */
+	extru	retreg,30,31,retreg	/* retreg = retreg >> 1 */
+	MILLIRETN
+	nop	
+LSYM(regular_seq)
+	comib,>=  15,arg1,LREF(special_divisor)
+	subi	0,arg1,temp		/* clear carry, negate the divisor */
+	ds	r0,temp,r0		/* set V-bit to 1 */
+LSYM(normal)
+	add	arg0,arg0,retreg	/* shift msb bit into carry */
+	ds	r0,arg1,temp		/* 1st divide step, if no carry */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 2nd divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 3rd divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 4th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 5th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 6th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 7th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 8th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 9th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 10th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 11th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 12th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 13th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 14th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 15th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 16th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 17th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 18th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 19th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 20th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 21st divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 22nd divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 23rd divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 24th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 25th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 26th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 27th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 28th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 29th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 30th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 31st divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 32nd divide step, */
+	MILLIRET
+	addc	retreg,retreg,retreg	/* shift last retreg bit into retreg */
+
+/* Handle the cases where divisor is a small constant or has high bit on.  */
+LSYM(special_divisor)
+/*	blr	arg1,r0 */
+/*	comib,>,n  0,arg1,LREF(big_divisor) ; nullify previous instruction */
+
+/* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from
+   generating such a blr, comib sequence. A problem in nullification. So I
+   rewrote this code.  */
+
+#if defined(pa64)
+/* Clear the upper 32 bits of the arg1 register.  We are working with
+   small divisors (and 32-bit unsigned integers)   We must not be mislead
+   by "1" bits left in the upper 32 bits.  */
+	depd %r0,31,32,%r25
+#endif
+	comib,>	0,arg1,LREF(big_divisor)
+	nop
+	blr	arg1,r0
+	nop
+
+LSYM(zero_divisor)	/* this label is here to provide external visibility */
+	addit,=	0,arg1,0		/* trap for zero dvr */
+	nop
+	MILLIRET			/* divisor == 1 */
+	copy	arg0,retreg
+	MILLIRET			/* divisor == 2 */
+	extru	arg0,30,31,retreg
+	MILLI_BEN($$divU_3)		/* divisor == 3 */
+	nop
+	MILLIRET			/* divisor == 4 */
+	extru	arg0,29,30,retreg
+	MILLI_BEN($$divU_5)		/* divisor == 5 */
+	nop
+	MILLI_BEN($$divU_6)		/* divisor == 6 */
+	nop
+	MILLI_BEN($$divU_7)		/* divisor == 7 */
+	nop
+	MILLIRET			/* divisor == 8 */
+	extru	arg0,28,29,retreg
+	MILLI_BEN($$divU_9)		/* divisor == 9 */
+	nop
+	MILLI_BEN($$divU_10)		/* divisor == 10 */
+	nop
+	b	LREF(normal)		/* divisor == 11 */
+	ds	r0,temp,r0		/* set V-bit to 1 */
+	MILLI_BEN($$divU_12)		/* divisor == 12 */
+	nop
+	b	LREF(normal)		/* divisor == 13 */
+	ds	r0,temp,r0		/* set V-bit to 1 */
+	MILLI_BEN($$divU_14)		/* divisor == 14 */
+	nop
+	MILLI_BEN($$divU_15)		/* divisor == 15 */
+	nop
+
+/* Handle the case where the high bit is on in the divisor.
+   Compute:	if( dividend>=divisor) quotient=1; else quotient=0;
+   Note:	dividend>==divisor iff dividend-divisor does not borrow
+   and		not borrow iff carry.  */
+LSYM(big_divisor)
+	sub	arg0,arg1,r0
+	MILLIRET
+	addc	r0,r0,retreg
+	.exit
+	.procend
+	.end
+#endif
+
+#ifdef L_remI
+/* ROUTINE:	$$remI
+
+   DESCRIPTION:
+   .	$$remI returns the remainder of the division of two signed 32-bit
+   .	integers.  The sign of the remainder is the same as the sign of
+   .	the dividend.
+
+
+   INPUT REGISTERS:
+   .	arg0 == dividend
+   .	arg1 == divisor
+   .	mrp  == return pc
+   .	sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .	arg0 = destroyed
+   .	arg1 = destroyed
+   .	ret1 = remainder
+
+   OTHER REGISTERS AFFECTED:
+   .	r1   = undefined
+
+   SIDE EFFECTS:
+   .	Causes a trap under the following conditions:  DIVIDE BY ZERO
+   .	Changes memory at the following places:  NONE
+
+   PERMISSIBLE CONTEXT:
+   .	Unwindable
+   .	Does not create a stack frame
+   .	Is usable for internal or external microcode
+
+   DISCUSSION:
+   .	Calls other millicode routines via mrp:  NONE
+   .	Calls other millicode routines:  NONE  */
+
+RDEFINE(tmp,r1)
+RDEFINE(retreg,ret1)
+
+	SUBSPA_MILLI
+	ATTR_MILLI
+	.proc
+	.callinfo millicode
+	.entry
+GSYM($$remI)
+GSYM($$remoI)
+	.export $$remI,MILLICODE
+	.export $$remoI,MILLICODE
+	ldo		-1(arg1),tmp		/*  is there at most one bit set ? */
+	and,<>		arg1,tmp,r0		/*  if not, don't use power of 2 */
+	addi,>		0,arg1,r0		/*  if denominator > 0, use power */
+						/*  of 2 */
+	b,n		LREF(neg_denom)
+LSYM(pow2)
+	comb,>,n	0,arg0,LREF(neg_num)	/*  is numerator < 0 ? */
+	and		arg0,tmp,retreg		/*  get the result */
+	MILLIRETN
+LSYM(neg_num)
+	subi		0,arg0,arg0		/*  negate numerator */
+	and		arg0,tmp,retreg		/*  get the result */
+	subi		0,retreg,retreg		/*  negate result */
+	MILLIRETN
+LSYM(neg_denom)
+	addi,<		0,arg1,r0		/*  if arg1 >= 0, it's not power */
+						/*  of 2 */
+	b,n		LREF(regular_seq)
+	sub		r0,arg1,tmp		/*  make denominator positive */
+	comb,=,n	arg1,tmp,LREF(regular_seq) /*  test against 0x80000000 and 0 */
+	ldo		-1(tmp),retreg		/*  is there at most one bit set ? */
+	and,=		tmp,retreg,r0		/*  if not, go to regular_seq */
+	b,n		LREF(regular_seq)
+	comb,>,n	0,arg0,LREF(neg_num_2)	/*  if arg0 < 0, negate it  */
+	and		arg0,retreg,retreg
+	MILLIRETN
+LSYM(neg_num_2)
+	subi		0,arg0,tmp		/*  test against 0x80000000 */
+	and		tmp,retreg,retreg
+	subi		0,retreg,retreg
+	MILLIRETN
+LSYM(regular_seq)
+	addit,=		0,arg1,0		/*  trap if div by zero */
+	add,>=		0,arg0,retreg		/*  move dividend, if retreg < 0, */
+	sub		0,retreg,retreg		/*    make it positive */
+	sub		0,arg1, tmp		/*  clear carry,  */
+						/*    negate the divisor */
+	ds		0, tmp,0		/*  set V-bit to the comple- */
+						/*    ment of the divisor sign */
+	or		0,0, tmp		/*  clear  tmp */
+	add		retreg,retreg,retreg	/*  shift msb bit into carry */
+	ds		 tmp,arg1, tmp		/*  1st divide step, if no carry */
+						/*    out, msb of quotient = 0 */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+LSYM(t1)
+	ds		 tmp,arg1, tmp		/*  2nd divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  3rd divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  4th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  5th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  6th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  7th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  8th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  9th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  10th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  11th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  12th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  13th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  14th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  15th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  16th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  17th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  18th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  19th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  20th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  21st divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  22nd divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  23rd divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  24th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  25th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  26th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  27th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  28th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  29th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  30th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  31st divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  32nd divide step, */
+	addc		retreg,retreg,retreg	/*  shift last bit into retreg */
+	movb,>=,n	 tmp,retreg,LREF(finish) /*  branch if pos.  tmp */
+	add,<		arg1,0,0		/*  if arg1 > 0, add arg1 */
+	add,tr		 tmp,arg1,retreg	/*    for correcting remainder tmp */
+	sub		 tmp,arg1,retreg	/*  else add absolute value arg1 */
+LSYM(finish)
+	add,>=		arg0,0,0		/*  set sign of remainder */
+	sub		0,retreg,retreg		/*    to sign of dividend */
+	MILLIRET
+	nop
+	.exit
+	.procend
+#ifdef milliext
+	.origin 0x00000200
+#endif
+	.end
+#endif
+
+#ifdef L_remU
+/* ROUTINE:	$$remU
+   .	Single precision divide for remainder with unsigned binary integers.
+   .
+   .	The remainder must be dividend-(dividend/divisor)*divisor.
+   .	Divide by zero is trapped.
+
+   INPUT REGISTERS:
+   .	arg0 ==	dividend
+   .	arg1 == divisor
+   .	mrp  == return pc
+   .	sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .	arg0 =	undefined
+   .	arg1 =	undefined
+   .	ret1 =	remainder
+
+   OTHER REGISTERS AFFECTED:
+   .	r1   =	undefined
+
+   SIDE EFFECTS:
+   .	Causes a trap under the following conditions:  DIVIDE BY ZERO
+   .	Changes memory at the following places:  NONE
+
+   PERMISSIBLE CONTEXT:
+   .	Unwindable.
+   .	Does not create a stack frame.
+   .	Suitable for internal or external millicode.
+   .	Assumes the special millicode register conventions.
+
+   DISCUSSION:
+   .	Calls other millicode routines using mrp: NONE
+   .	Calls other millicode routines: NONE  */
+
+
+RDEFINE(temp,r1)
+RDEFINE(rmndr,ret1)	/*  r29 */
+	SUBSPA_MILLI
+	ATTR_MILLI
+	.export $$remU,millicode
+	.proc
+	.callinfo	millicode
+	.entry
+GSYM($$remU)
+	ldo	-1(arg1),temp		/*  is there at most one bit set ? */
+	and,=	arg1,temp,r0		/*  if not, don't use power of 2 */
+	b	LREF(regular_seq)
+	addit,=	0,arg1,r0		/*  trap on div by zero */
+	and	arg0,temp,rmndr		/*  get the result for power of 2 */
+	MILLIRETN
+LSYM(regular_seq)
+	comib,>=,n  0,arg1,LREF(special_case)
+	subi	0,arg1,rmndr		/*  clear carry, negate the divisor */
+	ds	r0,rmndr,r0		/*  set V-bit to 1 */
+	add	arg0,arg0,temp		/*  shift msb bit into carry */
+	ds	r0,arg1,rmndr		/*  1st divide step, if no carry */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  2nd divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  3rd divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  4th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  5th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  6th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  7th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  8th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  9th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  10th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  11th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  12th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  13th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  14th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  15th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  16th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  17th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  18th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  19th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  20th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  21st divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  22nd divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  23rd divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  24th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  25th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  26th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  27th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  28th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  29th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  30th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  31st divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  32nd divide step, */
+	comiclr,<= 0,rmndr,r0
+	  add	rmndr,arg1,rmndr	/*  correction */
+	MILLIRETN
+	nop
+
+/* Putting >= on the last DS and deleting COMICLR does not work!  */
+LSYM(special_case)
+	sub,>>=	arg0,arg1,rmndr
+	  copy	arg0,rmndr
+	MILLIRETN
+	nop
+	.exit
+	.procend
+	.end
+#endif
+
+#ifdef L_div_const
+/* ROUTINE:	$$divI_2
+   .		$$divI_3	$$divU_3
+   .		$$divI_4
+   .		$$divI_5	$$divU_5
+   .		$$divI_6	$$divU_6
+   .		$$divI_7	$$divU_7
+   .		$$divI_8
+   .		$$divI_9	$$divU_9
+   .		$$divI_10	$$divU_10
+   .
+   .		$$divI_12	$$divU_12
+   .
+   .		$$divI_14	$$divU_14
+   .		$$divI_15	$$divU_15
+   .		$$divI_16
+   .		$$divI_17	$$divU_17
+   .
+   .	Divide by selected constants for single precision binary integers.
+
+   INPUT REGISTERS:
+   .	arg0 ==	dividend
+   .	mrp  == return pc
+   .	sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .	arg0 =	undefined
+   .	arg1 =	undefined
+   .	ret1 =	quotient
+
+   OTHER REGISTERS AFFECTED:
+   .	r1   =	undefined
+
+   SIDE EFFECTS:
+   .	Causes a trap under the following conditions: NONE
+   .	Changes memory at the following places:  NONE
+
+   PERMISSIBLE CONTEXT:
+   .	Unwindable.
+   .	Does not create a stack frame.
+   .	Suitable for internal or external millicode.
+   .	Assumes the special millicode register conventions.
+
+   DISCUSSION:
+   .	Calls other millicode routines using mrp:  NONE
+   .	Calls other millicode routines:  NONE  */
+
+
+/* TRUNCATED DIVISION BY SMALL INTEGERS
+
+   We are interested in q(x) = floor(x/y), where x >= 0 and y > 0
+   (with y fixed).
+
+   Let a = floor(z/y), for some choice of z.  Note that z will be
+   chosen so that division by z is cheap.
+
+   Let r be the remainder(z/y).  In other words, r = z - ay.
+
+   Now, our method is to choose a value for b such that
+
+   q'(x) = floor((ax+b)/z)
+
+   is equal to q(x) over as large a range of x as possible.  If the
+   two are equal over a sufficiently large range, and if it is easy to
+   form the product (ax), and it is easy to divide by z, then we can
+   perform the division much faster than the general division algorithm.
+
+   So, we want the following to be true:
+
+   .	For x in the following range:
+   .
+   .	    ky <= x < (k+1)y
+   .
+   .	implies that
+   .
+   .	    k <= (ax+b)/z < (k+1)
+
+   We want to determine b such that this is true for all k in the
+   range {0..K} for some maximum K.
+
+   Since (ax+b) is an increasing function of x, we can take each
+   bound separately to determine the "best" value for b.
+
+   (ax+b)/z < (k+1)	       implies
+
+   (a((k+1)y-1)+b < (k+1)z     implies
+
+   b < a + (k+1)(z-ay)	       implies
+
+   b < a + (k+1)r
+
+   This needs to be true for all k in the range {0..K}.  In
+   particular, it is true for k = 0 and this leads to a maximum
+   acceptable value for b.
+
+   b < a+r   or   b <= a+r-1
+
+   Taking the other bound, we have
+
+   k <= (ax+b)/z	       implies
+
+   k <= (aky+b)/z	       implies
+
+   k(z-ay) <= b		       implies
+
+   kr <= b
+
+   Clearly, the largest range for k will be achieved by maximizing b,
+   when r is not zero.	When r is zero, then the simplest choice for b
+   is 0.  When r is not 0, set
+
+   .	b = a+r-1
+
+   Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)
+   for all x in the range:
+
+   .	0 <= x < (K+1)y
+
+   We need to determine what K is.  Of our two bounds,
+
+   .	b < a+(k+1)r	is satisfied for all k >= 0, by construction.
+
+   The other bound is
+
+   .	kr <= b
+
+   This is always true if r = 0.  If r is not 0 (the usual case), then
+   K = floor((a+r-1)/r), is the maximum value for k.
+
+   Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct
+   answer for q(x) = floor(x/y) when x is in the range
+
+   (0,(K+1)y-1)	       K = floor((a+r-1)/r)
+
+   To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that
+   the formula for q'(x) yields the correct value of q(x) for all x
+   representable by a single word in HPPA.
+
+   We are also constrained in that computing the product (ax), adding
+   b, and dividing by z must all be done quickly, otherwise we will be
+   better off going through the general algorithm using the DS
+   instruction, which uses approximately 70 cycles.
+
+   For each y, there is a choice of z which satisfies the constraints
+   for (K+1)y >= 2**32.  We may not, however, be able to satisfy the
+   timing constraints for arbitrary y.	It seems that z being equal to
+   a power of 2 or a power of 2 minus 1 is as good as we can do, since
+   it minimizes the time to do division by z.  We want the choice of z
+   to also result in a value for (a) that minimizes the computation of
+   the product (ax).  This is best achieved if (a) has a regular bit
+   pattern (so the multiplication can be done with shifts and adds).
+   The value of (a) also needs to be less than 2**32 so the product is
+   always guaranteed to fit in 2 words.
+
+   In actual practice, the following should be done:
+
+   1) For negative x, you should take the absolute value and remember
+   .  the fact so that the result can be negated.  This obviously does
+   .  not apply in the unsigned case.
+   2) For even y, you should factor out the power of 2 that divides y
+   .  and divide x by it.  You can then proceed by dividing by the
+   .  odd factor of y.
+
+   Here is a table of some odd values of y, and corresponding choices
+   for z which are "good".
+
+    y	  z	  r	 a (hex)     max x (hex)
+
+    3	2**32	  1	55555555      100000001
+    5	2**32	  1	33333333      100000003
+    7  2**24-1	  0	  249249     (infinite)
+    9  2**24-1	  0	  1c71c7     (infinite)
+   11  2**20-1	  0	   1745d     (infinite)
+   13  2**24-1	  0	  13b13b     (infinite)
+   15	2**32	  1	11111111      10000000d
+   17	2**32	  1	 f0f0f0f      10000000f
+
+   If r is 1, then b = a+r-1 = a.  This simplifies the computation
+   of (ax+b), since you can compute (x+1)(a) instead.  If r is 0,
+   then b = 0 is ok to use which simplifies (ax+b).
+
+   The bit patterns for 55555555, 33333333, and 11111111 are obviously
+   very regular.  The bit patterns for the other values of a above are:
+
+    y	   (hex)	  (binary)
+
+    7	  249249  001001001001001001001001  << regular >>
+    9	  1c71c7  000111000111000111000111  << regular >>
+   11	   1745d  000000010111010001011101  << irregular >>
+   13	  13b13b  000100111011000100111011  << irregular >>
+
+   The bit patterns for (a) corresponding to (y) of 11 and 13 may be
+   too irregular to warrant using this method.
+
+   When z is a power of 2 minus 1, then the division by z is slightly
+   more complicated, involving an iterative solution.
+
+   The code presented here solves division by 1 through 17, except for
+   11 and 13. There are algorithms for both signed and unsigned
+   quantities given.
+
+   TIMINGS (cycles)
+
+   divisor  positive  negative	unsigned
+
+   .   1	2	   2	     2
+   .   2	4	   4	     2
+   .   3       19	  21	    19
+   .   4	4	   4	     2
+   .   5       18	  22	    19
+   .   6       19	  22	    19
+   .   8	4	   4	     2
+   .  10       18	  19	    17
+   .  12       18	  20	    18
+   .  15       16	  18	    16
+   .  16	4	   4	     2
+   .  17       16	  18	    16
+
+   Now, the algorithm for 7, 9, and 14 is an iterative one.  That is,
+   a loop body is executed until the tentative quotient is 0.  The
+   number of times the loop body is executed varies depending on the
+   dividend, but is never more than two times.	If the dividend is
+   less than the divisor, then the loop body is not executed at all.
+   Each iteration adds 4 cycles to the timings.
+
+   divisor  positive  negative	unsigned
+
+   .   7       19+4n	 20+4n	   20+4n    n = number of iterations
+   .   9       21+4n	 22+4n	   21+4n
+   .  14       21+4n	 22+4n	   20+4n
+
+   To give an idea of how the number of iterations varies, here is a
+   table of dividend versus number of iterations when dividing by 7.
+
+   smallest	 largest       required
+   dividend	dividend      iterations
+
+   .	0	     6		    0
+   .	7	 0x6ffffff	    1
+   0x1000006	0xffffffff	    2
+
+   There is some overlap in the range of numbers requiring 1 and 2
+   iterations.	*/
+
+RDEFINE(t2,r1)
+RDEFINE(x2,arg0)	/*  r26 */
+RDEFINE(t1,arg1)	/*  r25 */
+RDEFINE(x1,ret1)	/*  r29 */
+
+	SUBSPA_MILLI_DIV
+	ATTR_MILLI
+
+	.proc
+	.callinfo	millicode
+	.entry
+/* NONE of these routines require a stack frame
+   ALL of these routines are unwindable from millicode	*/
+
+GSYM($$divide_by_constant)
+	.export $$divide_by_constant,millicode
+/*  Provides a "nice" label for the code covered by the unwind descriptor
+    for things like gprof.  */
+
+/* DIVISION BY 2 (shift by 1) */
+GSYM($$divI_2)
+	.export		$$divI_2,millicode
+	comclr,>=	arg0,0,0
+	addi		1,arg0,arg0
+	MILLIRET
+	extrs		arg0,30,31,ret1
+
+
+/* DIVISION BY 4 (shift by 2) */
+GSYM($$divI_4)
+	.export		$$divI_4,millicode
+	comclr,>=	arg0,0,0
+	addi		3,arg0,arg0
+	MILLIRET
+	extrs		arg0,29,30,ret1
+
+
+/* DIVISION BY 8 (shift by 3) */
+GSYM($$divI_8)
+	.export		$$divI_8,millicode
+	comclr,>=	arg0,0,0
+	addi		7,arg0,arg0
+	MILLIRET
+	extrs		arg0,28,29,ret1
+
+/* DIVISION BY 16 (shift by 4) */
+GSYM($$divI_16)
+	.export		$$divI_16,millicode
+	comclr,>=	arg0,0,0
+	addi		15,arg0,arg0
+	MILLIRET
+	extrs		arg0,27,28,ret1
+
+/****************************************************************************
+*
+*	DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these
+*
+*	includes 3,5,15,17 and also 6,10,12
+*
+****************************************************************************/
+
+/* DIVISION BY 3 (use z = 2**32; a = 55555555) */
+
+GSYM($$divI_3)
+	.export		$$divI_3,millicode
+	comb,<,N	x2,0,LREF(neg3)
+
+	addi		1,x2,x2		/* this cannot overflow	*/
+	extru		x2,1,2,x1	/* multiply by 5 to get started */
+	sh2add		x2,x2,x2
+	b		LREF(pos)
+	addc		x1,0,x1
+
+LSYM(neg3)
+	subi		1,x2,x2		/* this cannot overflow	*/
+	extru		x2,1,2,x1	/* multiply by 5 to get started */
+	sh2add		x2,x2,x2
+	b		LREF(neg)
+	addc		x1,0,x1
+
+GSYM($$divU_3)
+	.export		$$divU_3,millicode
+	addi		1,x2,x2		/* this CAN overflow */
+	addc		0,0,x1
+	shd		x1,x2,30,t1	/* multiply by 5 to get started */
+	sh2add		x2,x2,x2
+	b		LREF(pos)
+	addc		x1,t1,x1
+
+/* DIVISION BY 5 (use z = 2**32; a = 33333333) */
+
+GSYM($$divI_5)
+	.export		$$divI_5,millicode
+	comb,<,N	x2,0,LREF(neg5)
+
+	addi		3,x2,t1		/* this cannot overflow	*/
+	sh1add		x2,t1,x2	/* multiply by 3 to get started */
+	b		LREF(pos)
+	addc		0,0,x1
+
+LSYM(neg5)
+	sub		0,x2,x2		/* negate x2			*/
+	addi		1,x2,x2		/* this cannot overflow	*/
+	shd		0,x2,31,x1	/* get top bit (can be 1)	*/
+	sh1add		x2,x2,x2	/* multiply by 3 to get started */
+	b		LREF(neg)
+	addc		x1,0,x1
+
+GSYM($$divU_5)
+	.export		$$divU_5,millicode
+	addi		1,x2,x2		/* this CAN overflow */
+	addc		0,0,x1
+	shd		x1,x2,31,t1	/* multiply by 3 to get started */
+	sh1add		x2,x2,x2
+	b		LREF(pos)
+	addc		t1,x1,x1
+
+/* DIVISION BY	6 (shift to divide by 2 then divide by 3) */
+GSYM($$divI_6)
+	.export		$$divI_6,millicode
+	comb,<,N	x2,0,LREF(neg6)
+	extru		x2,30,31,x2	/* divide by 2			*/
+	addi		5,x2,t1		/* compute 5*(x2+1) = 5*x2+5	*/
+	sh2add		x2,t1,x2	/* multiply by 5 to get started */
+	b		LREF(pos)
+	addc		0,0,x1
+
+LSYM(neg6)
+	subi		2,x2,x2		/* negate, divide by 2, and add 1 */
+					/* negation and adding 1 are done */
+					/* at the same time by the SUBI   */
+	extru		x2,30,31,x2
+	shd		0,x2,30,x1
+	sh2add		x2,x2,x2	/* multiply by 5 to get started */
+	b		LREF(neg)
+	addc		x1,0,x1
+
+GSYM($$divU_6)
+	.export		$$divU_6,millicode
+	extru		x2,30,31,x2	/* divide by 2 */
+	addi		1,x2,x2		/* cannot carry */
+	shd		0,x2,30,x1	/* multiply by 5 to get started */
+	sh2add		x2,x2,x2
+	b		LREF(pos)
+	addc		x1,0,x1
+
+/* DIVISION BY 10 (shift to divide by 2 then divide by 5) */
+GSYM($$divU_10)
+	.export		$$divU_10,millicode
+	extru		x2,30,31,x2	/* divide by 2 */
+	addi		3,x2,t1		/* compute 3*(x2+1) = (3*x2)+3	*/
+	sh1add		x2,t1,x2	/* multiply by 3 to get started */
+	addc		0,0,x1
+LSYM(pos)
+	shd		x1,x2,28,t1	/* multiply by 0x11 */
+	shd		x2,0,28,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+LSYM(pos_for_17)
+	shd		x1,x2,24,t1	/* multiply by 0x101 */
+	shd		x2,0,24,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+
+	shd		x1,x2,16,t1	/* multiply by 0x10001 */
+	shd		x2,0,16,t2
+	add		x2,t2,x2
+	MILLIRET
+	addc		x1,t1,x1
+
+GSYM($$divI_10)
+	.export		$$divI_10,millicode
+	comb,<		x2,0,LREF(neg10)
+	copy		0,x1
+	extru		x2,30,31,x2	/* divide by 2 */
+	addib,TR	1,x2,LREF(pos)	/* add 1 (cannot overflow)     */
+	sh1add		x2,x2,x2	/* multiply by 3 to get started */
+
+LSYM(neg10)
+	subi		2,x2,x2		/* negate, divide by 2, and add 1 */
+					/* negation and adding 1 are done */
+					/* at the same time by the SUBI   */
+	extru		x2,30,31,x2
+	sh1add		x2,x2,x2	/* multiply by 3 to get started */
+LSYM(neg)
+	shd		x1,x2,28,t1	/* multiply by 0x11 */
+	shd		x2,0,28,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+LSYM(neg_for_17)
+	shd		x1,x2,24,t1	/* multiply by 0x101 */
+	shd		x2,0,24,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+
+	shd		x1,x2,16,t1	/* multiply by 0x10001 */
+	shd		x2,0,16,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+	MILLIRET
+	sub		0,x1,x1
+
+/* DIVISION BY 12 (shift to divide by 4 then divide by 3) */
+GSYM($$divI_12)
+	.export		$$divI_12,millicode
+	comb,<		x2,0,LREF(neg12)
+	copy		0,x1
+	extru		x2,29,30,x2	/* divide by 4			*/
+	addib,tr	1,x2,LREF(pos)	/* compute 5*(x2+1) = 5*x2+5    */
+	sh2add		x2,x2,x2	/* multiply by 5 to get started */
+
+LSYM(neg12)
+	subi		4,x2,x2		/* negate, divide by 4, and add 1 */
+					/* negation and adding 1 are done */
+					/* at the same time by the SUBI   */
+	extru		x2,29,30,x2
+	b		LREF(neg)
+	sh2add		x2,x2,x2	/* multiply by 5 to get started */
+
+GSYM($$divU_12)
+	.export		$$divU_12,millicode
+	extru		x2,29,30,x2	/* divide by 4   */
+	addi		5,x2,t1		/* cannot carry */
+	sh2add		x2,t1,x2	/* multiply by 5 to get started */
+	b		LREF(pos)
+	addc		0,0,x1
+
+/* DIVISION BY 15 (use z = 2**32; a = 11111111) */
+GSYM($$divI_15)
+	.export		$$divI_15,millicode
+	comb,<		x2,0,LREF(neg15)
+	copy		0,x1
+	addib,tr	1,x2,LREF(pos)+4
+	shd		x1,x2,28,t1
+
+LSYM(neg15)
+	b		LREF(neg)
+	subi		1,x2,x2
+
+GSYM($$divU_15)
+	.export		$$divU_15,millicode
+	addi		1,x2,x2		/* this CAN overflow */
+	b		LREF(pos)
+	addc		0,0,x1
+
+/* DIVISION BY 17 (use z = 2**32; a =  f0f0f0f) */
+GSYM($$divI_17)
+	.export		$$divI_17,millicode
+	comb,<,n	x2,0,LREF(neg17)
+	addi		1,x2,x2		/* this cannot overflow */
+	shd		0,x2,28,t1	/* multiply by 0xf to get started */
+	shd		x2,0,28,t2
+	sub		t2,x2,x2
+	b		LREF(pos_for_17)
+	subb		t1,0,x1
+
+LSYM(neg17)
+	subi		1,x2,x2		/* this cannot overflow */
+	shd		0,x2,28,t1	/* multiply by 0xf to get started */
+	shd		x2,0,28,t2
+	sub		t2,x2,x2
+	b		LREF(neg_for_17)
+	subb		t1,0,x1
+
+GSYM($$divU_17)
+	.export		$$divU_17,millicode
+	addi		1,x2,x2		/* this CAN overflow */
+	addc		0,0,x1
+	shd		x1,x2,28,t1	/* multiply by 0xf to get started */
+LSYM(u17)
+	shd		x2,0,28,t2
+	sub		t2,x2,x2
+	b		LREF(pos_for_17)
+	subb		t1,x1,x1
+
+
+/* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these
+   includes 7,9 and also 14
+
+
+   z = 2**24-1
+   r = z mod x = 0
+
+   so choose b = 0
+
+   Also, in order to divide by z = 2**24-1, we approximate by dividing
+   by (z+1) = 2**24 (which is easy), and then correcting.
+
+   (ax) = (z+1)q' + r
+   .	= zq' + (q'+r)
+
+   So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)
+   Then the true remainder of (ax)/z is (q'+r).  Repeat the process
+   with this new remainder, adding the tentative quotients together,
+   until a tentative quotient is 0 (and then we are done).  There is
+   one last correction to be done.  It is possible that (q'+r) = z.
+   If so, then (q'+r)/(z+1) = 0 and it looks like we are done.	But,
+   in fact, we need to add 1 more to the quotient.  Now, it turns
+   out that this happens if and only if the original value x is
+   an exact multiple of y.  So, to avoid a three instruction test at
+   the end, instead use 1 instruction to add 1 to x at the beginning.  */
+
+/* DIVISION BY 7 (use z = 2**24-1; a = 249249) */
+GSYM($$divI_7)
+	.export		$$divI_7,millicode
+	comb,<,n	x2,0,LREF(neg7)
+LSYM(7)
+	addi		1,x2,x2		/* cannot overflow */
+	shd		0,x2,29,x1
+	sh3add		x2,x2,x2
+	addc		x1,0,x1
+LSYM(pos7)
+	shd		x1,x2,26,t1
+	shd		x2,0,26,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+
+	shd		x1,x2,20,t1
+	shd		x2,0,20,t2
+	add		x2,t2,x2
+	addc		x1,t1,t1
+
+	/* computed <t1,x2>.  Now divide it by (2**24 - 1)	*/
+
+	copy		0,x1
+	shd,=		t1,x2,24,t1	/* tentative quotient  */
+LSYM(1)
+	addb,tr		t1,x1,LREF(2)	/* add to previous quotient   */
+	extru		x2,31,24,x2	/* new remainder (unadjusted) */
+
+	MILLIRETN
+
+LSYM(2)
+	addb,tr		t1,x2,LREF(1)	/* adjust remainder */
+	extru,=		x2,7,8,t1	/* new quotient     */
+
+LSYM(neg7)
+	subi		1,x2,x2		/* negate x2 and add 1 */
+LSYM(8)
+	shd		0,x2,29,x1
+	sh3add		x2,x2,x2
+	addc		x1,0,x1
+
+LSYM(neg7_shift)
+	shd		x1,x2,26,t1
+	shd		x2,0,26,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+
+	shd		x1,x2,20,t1
+	shd		x2,0,20,t2
+	add		x2,t2,x2
+	addc		x1,t1,t1
+
+	/* computed <t1,x2>.  Now divide it by (2**24 - 1)	*/
+
+	copy		0,x1
+	shd,=		t1,x2,24,t1	/* tentative quotient  */
+LSYM(3)
+	addb,tr		t1,x1,LREF(4)	/* add to previous quotient   */
+	extru		x2,31,24,x2	/* new remainder (unadjusted) */
+
+	MILLIRET
+	sub		0,x1,x1		/* negate result    */
+
+LSYM(4)
+	addb,tr		t1,x2,LREF(3)	/* adjust remainder */
+	extru,=		x2,7,8,t1	/* new quotient     */
+
+GSYM($$divU_7)
+	.export		$$divU_7,millicode
+	addi		1,x2,x2		/* can carry */
+	addc		0,0,x1
+	shd		x1,x2,29,t1
+	sh3add		x2,x2,x2
+	b		LREF(pos7)
+	addc		t1,x1,x1
+
+/* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */
+GSYM($$divI_9)
+	.export		$$divI_9,millicode
+	comb,<,n	x2,0,LREF(neg9)
+	addi		1,x2,x2		/* cannot overflow */
+	shd		0,x2,29,t1
+	shd		x2,0,29,t2
+	sub		t2,x2,x2
+	b		LREF(pos7)
+	subb		t1,0,x1
+
+LSYM(neg9)
+	subi		1,x2,x2		/* negate and add 1 */
+	shd		0,x2,29,t1
+	shd		x2,0,29,t2
+	sub		t2,x2,x2
+	b		LREF(neg7_shift)
+	subb		t1,0,x1
+
+GSYM($$divU_9)
+	.export		$$divU_9,millicode
+	addi		1,x2,x2		/* can carry */
+	addc		0,0,x1
+	shd		x1,x2,29,t1
+	shd		x2,0,29,t2
+	sub		t2,x2,x2
+	b		LREF(pos7)
+	subb		t1,x1,x1
+
+/* DIVISION BY 14 (shift to divide by 2 then divide by 7) */
+GSYM($$divI_14)
+	.export		$$divI_14,millicode
+	comb,<,n	x2,0,LREF(neg14)
+GSYM($$divU_14)
+	.export		$$divU_14,millicode
+	b		LREF(7)		/* go to 7 case */
+	extru		x2,30,31,x2	/* divide by 2  */
+
+LSYM(neg14)
+	subi		2,x2,x2		/* negate (and add 2) */
+	b		LREF(8)
+	extru		x2,30,31,x2	/* divide by 2	      */
+	.exit
+	.procend
+	.end
+#endif
+
+#ifdef L_mulI
+/* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */
+/******************************************************************************
+This routine is used on PA2.0 processors when gcc -mno-fpregs is used
+
+ROUTINE:	$$mulI
+
+
+DESCRIPTION:	
+
+	$$mulI multiplies two single word integers, giving a single 
+	word result.  
+
+
+INPUT REGISTERS:
+
+	arg0 = Operand 1
+	arg1 = Operand 2
+	r31  == return pc
+	sr0  == return space when called externally 
+
+
+OUTPUT REGISTERS:
+
+	arg0 = undefined
+	arg1 = undefined
+	ret1 = result 
+
+OTHER REGISTERS AFFECTED:
+
+	r1   = undefined
+
+SIDE EFFECTS:
+
+	Causes a trap under the following conditions:  NONE
+	Changes memory at the following places:  NONE
+
+PERMISSIBLE CONTEXT:
+
+	Unwindable
+	Does not create a stack frame
+	Is usable for internal or external microcode
+
+DISCUSSION:
+
+	Calls other millicode routines via mrp:  NONE
+	Calls other millicode routines:  NONE
+
+***************************************************************************/
+
+
+#define	a0	%arg0
+#define	a1	%arg1
+#define	t0	%r1
+#define	r	%ret1
+
+#define	a0__128a0	zdep	a0,24,25,a0
+#define	a0__256a0	zdep	a0,23,24,a0
+#define	a1_ne_0_b_l0	comb,<>	a1,0,LREF(l0)
+#define	a1_ne_0_b_l1	comb,<>	a1,0,LREF(l1)
+#define	a1_ne_0_b_l2	comb,<>	a1,0,LREF(l2)
+#define	b_n_ret_t0	b,n	LREF(ret_t0)
+#define	b_e_shift	b	LREF(e_shift)
+#define	b_e_t0ma0	b	LREF(e_t0ma0)
+#define	b_e_t0		b	LREF(e_t0)
+#define	b_e_t0a0	b	LREF(e_t0a0)
+#define	b_e_t02a0	b	LREF(e_t02a0)
+#define	b_e_t04a0	b	LREF(e_t04a0)
+#define	b_e_2t0		b	LREF(e_2t0)
+#define	b_e_2t0a0	b	LREF(e_2t0a0)
+#define	b_e_2t04a0	b	LREF(e2t04a0)
+#define	b_e_3t0		b	LREF(e_3t0)
+#define	b_e_4t0		b	LREF(e_4t0)
+#define	b_e_4t0a0	b	LREF(e_4t0a0)
+#define	b_e_4t08a0	b	LREF(e4t08a0)
+#define	b_e_5t0		b	LREF(e_5t0)
+#define	b_e_8t0		b	LREF(e_8t0)
+#define	b_e_8t0a0	b	LREF(e_8t0a0)
+#define	r__r_a0		add	r,a0,r
+#define	r__r_2a0	sh1add	a0,r,r
+#define	r__r_4a0	sh2add	a0,r,r
+#define	r__r_8a0	sh3add	a0,r,r
+#define	r__r_t0		add	r,t0,r
+#define	r__r_2t0	sh1add	t0,r,r
+#define	r__r_4t0	sh2add	t0,r,r
+#define	r__r_8t0	sh3add	t0,r,r
+#define	t0__3a0		sh1add	a0,a0,t0
+#define	t0__4a0		sh2add	a0,0,t0
+#define	t0__5a0		sh2add	a0,a0,t0
+#define	t0__8a0		sh3add	a0,0,t0
+#define	t0__9a0		sh3add	a0,a0,t0
+#define	t0__16a0	zdep	a0,27,28,t0
+#define	t0__32a0	zdep	a0,26,27,t0
+#define	t0__64a0	zdep	a0,25,26,t0
+#define	t0__128a0	zdep	a0,24,25,t0
+#define	t0__t0ma0	sub	t0,a0,t0
+#define	t0__t0_a0	add	t0,a0,t0
+#define	t0__t0_2a0	sh1add	a0,t0,t0
+#define	t0__t0_4a0	sh2add	a0,t0,t0
+#define	t0__t0_8a0	sh3add	a0,t0,t0
+#define	t0__2t0_a0	sh1add	t0,a0,t0
+#define	t0__3t0		sh1add	t0,t0,t0
+#define	t0__4t0		sh2add	t0,0,t0
+#define	t0__4t0_a0	sh2add	t0,a0,t0
+#define	t0__5t0		sh2add	t0,t0,t0
+#define	t0__8t0		sh3add	t0,0,t0
+#define	t0__8t0_a0	sh3add	t0,a0,t0
+#define	t0__9t0		sh3add	t0,t0,t0
+#define	t0__16t0	zdep	t0,27,28,t0
+#define	t0__32t0	zdep	t0,26,27,t0
+#define	t0__256a0	zdep	a0,23,24,t0
+
+
+	SUBSPA_MILLI
+	ATTR_MILLI
+	.align 16
+	.proc
+	.callinfo millicode
+	.export $$mulI,millicode
+GSYM($$mulI)	
+	combt,<<=	a1,a0,LREF(l4)	/* swap args if unsigned a1>a0 */
+	copy		0,r		/* zero out the result */
+	xor		a0,a1,a0	/* swap a0 & a1 using the */
+	xor		a0,a1,a1	/*  old xor trick */
+	xor		a0,a1,a0
+LSYM(l4)
+	combt,<=	0,a0,LREF(l3)		/* if a0>=0 then proceed like unsigned */
+	zdep		a1,30,8,t0	/* t0 = (a1&0xff)<<1 ********* */
+	sub,>		0,a1,t0		/* otherwise negate both and */
+	combt,<=,n	a0,t0,LREF(l2)	/*  swap back if |a0|<|a1| */
+	sub		0,a0,a1
+	movb,tr,n	t0,a0,LREF(l2)	/* 10th inst.  */
+
+LSYM(l0)	r__r_t0				/* add in this partial product */
+LSYM(l1)	a0__256a0			/* a0 <<= 8 ****************** */
+LSYM(l2)	zdep		a1,30,8,t0	/* t0 = (a1&0xff)<<1 ********* */
+LSYM(l3)	blr		t0,0		/* case on these 8 bits ****** */
+		extru		a1,23,24,a1	/* a1 >>= 8 ****************** */
+
+/*16 insts before this.  */
+/*			  a0 <<= 8 ************************** */
+LSYM(x0)	a1_ne_0_b_l2	! a0__256a0	! MILLIRETN	! nop
+LSYM(x1)	a1_ne_0_b_l1	! r__r_a0	! MILLIRETN	! nop
+LSYM(x2)	a1_ne_0_b_l1	! r__r_2a0	! MILLIRETN	! nop
+LSYM(x3)	a1_ne_0_b_l0	! t0__3a0	! MILLIRET	! r__r_t0
+LSYM(x4)	a1_ne_0_b_l1	! r__r_4a0	! MILLIRETN	! nop
+LSYM(x5)	a1_ne_0_b_l0	! t0__5a0	! MILLIRET	! r__r_t0
+LSYM(x6)	t0__3a0		! a1_ne_0_b_l1	! r__r_2t0	! MILLIRETN
+LSYM(x7)	t0__3a0		! a1_ne_0_b_l0	! r__r_4a0	! b_n_ret_t0
+LSYM(x8)	a1_ne_0_b_l1	! r__r_8a0	! MILLIRETN	! nop
+LSYM(x9)	a1_ne_0_b_l0	! t0__9a0	! MILLIRET	! r__r_t0
+LSYM(x10)	t0__5a0		! a1_ne_0_b_l1	! r__r_2t0	! MILLIRETN
+LSYM(x11)	t0__3a0		! a1_ne_0_b_l0	! r__r_8a0	! b_n_ret_t0
+LSYM(x12)	t0__3a0		! a1_ne_0_b_l1	! r__r_4t0	! MILLIRETN
+LSYM(x13)	t0__5a0		! a1_ne_0_b_l0	! r__r_8a0	! b_n_ret_t0
+LSYM(x14)	t0__3a0		! t0__2t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x15)	t0__5a0		! a1_ne_0_b_l0	! t0__3t0	! b_n_ret_t0
+LSYM(x16)	t0__16a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
+LSYM(x17)	t0__9a0		! a1_ne_0_b_l0	! t0__t0_8a0	! b_n_ret_t0
+LSYM(x18)	t0__9a0		! a1_ne_0_b_l1	! r__r_2t0	! MILLIRETN
+LSYM(x19)	t0__9a0		! a1_ne_0_b_l0	! t0__2t0_a0	! b_n_ret_t0
+LSYM(x20)	t0__5a0		! a1_ne_0_b_l1	! r__r_4t0	! MILLIRETN
+LSYM(x21)	t0__5a0		! a1_ne_0_b_l0	! t0__4t0_a0	! b_n_ret_t0
+LSYM(x22)	t0__5a0		! t0__2t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x23)	t0__5a0		! t0__2t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x24)	t0__3a0		! a1_ne_0_b_l1	! r__r_8t0	! MILLIRETN
+LSYM(x25)	t0__5a0		! a1_ne_0_b_l0	! t0__5t0	! b_n_ret_t0
+LSYM(x26)	t0__3a0		! t0__4t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x27)	t0__3a0		! a1_ne_0_b_l0	! t0__9t0	! b_n_ret_t0
+LSYM(x28)	t0__3a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x29)	t0__3a0		! t0__2t0_a0	! b_e_t0	! t0__4t0_a0
+LSYM(x30)	t0__5a0		! t0__3t0	! b_e_shift	! r__r_2t0
+LSYM(x31)	t0__32a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
+LSYM(x32)	t0__32a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
+LSYM(x33)	t0__8a0		! a1_ne_0_b_l0	! t0__4t0_a0	! b_n_ret_t0
+LSYM(x34)	t0__16a0	! t0__t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x35)	t0__9a0		! t0__3t0	! b_e_t0	! t0__t0_8a0
+LSYM(x36)	t0__9a0		! a1_ne_0_b_l1	! r__r_4t0	! MILLIRETN
+LSYM(x37)	t0__9a0		! a1_ne_0_b_l0	! t0__4t0_a0	! b_n_ret_t0
+LSYM(x38)	t0__9a0		! t0__2t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x39)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x40)	t0__5a0		! a1_ne_0_b_l1	! r__r_8t0	! MILLIRETN
+LSYM(x41)	t0__5a0		! a1_ne_0_b_l0	! t0__8t0_a0	! b_n_ret_t0
+LSYM(x42)	t0__5a0		! t0__4t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x43)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x44)	t0__5a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x45)	t0__9a0		! a1_ne_0_b_l0	! t0__5t0	! b_n_ret_t0
+LSYM(x46)	t0__9a0		! t0__5t0	! b_e_t0	! t0__t0_a0
+LSYM(x47)	t0__9a0		! t0__5t0	! b_e_t0	! t0__t0_2a0
+LSYM(x48)	t0__3a0		! a1_ne_0_b_l0	! t0__16t0	! b_n_ret_t0
+LSYM(x49)	t0__9a0		! t0__5t0	! b_e_t0	! t0__t0_4a0
+LSYM(x50)	t0__5a0		! t0__5t0	! b_e_shift	! r__r_2t0
+LSYM(x51)	t0__9a0		! t0__t0_8a0	! b_e_t0	! t0__3t0
+LSYM(x52)	t0__3a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x53)	t0__3a0		! t0__4t0_a0	! b_e_t0	! t0__4t0_a0
+LSYM(x54)	t0__9a0		! t0__3t0	! b_e_shift	! r__r_2t0
+LSYM(x55)	t0__9a0		! t0__3t0	! b_e_t0	! t0__2t0_a0
+LSYM(x56)	t0__3a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
+LSYM(x57)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__3t0
+LSYM(x58)	t0__3a0		! t0__2t0_a0	! b_e_2t0	! t0__4t0_a0
+LSYM(x59)	t0__9a0		! t0__2t0_a0	! b_e_t02a0	! t0__3t0
+LSYM(x60)	t0__5a0		! t0__3t0	! b_e_shift	! r__r_4t0
+LSYM(x61)	t0__5a0		! t0__3t0	! b_e_t0	! t0__4t0_a0
+LSYM(x62)	t0__32a0	! t0__t0ma0	! b_e_shift	! r__r_2t0
+LSYM(x63)	t0__64a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
+LSYM(x64)	t0__64a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
+LSYM(x65)	t0__8a0		! a1_ne_0_b_l0	! t0__8t0_a0	! b_n_ret_t0
+LSYM(x66)	t0__32a0	! t0__t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x67)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x68)	t0__8a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x69)	t0__8a0		! t0__2t0_a0	! b_e_t0	! t0__4t0_a0
+LSYM(x70)	t0__64a0	! t0__t0_4a0	! b_e_t0	! t0__t0_2a0
+LSYM(x71)	t0__9a0		! t0__8t0	! b_e_t0	! t0__t0ma0
+LSYM(x72)	t0__9a0		! a1_ne_0_b_l1	! r__r_8t0	! MILLIRETN
+LSYM(x73)	t0__9a0		! t0__8t0_a0	! b_e_shift	! r__r_t0
+LSYM(x74)	t0__9a0		! t0__4t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x75)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x76)	t0__9a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x77)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__4t0_a0
+LSYM(x78)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__2t0_a0
+LSYM(x79)	t0__16a0	! t0__5t0	! b_e_t0	! t0__t0ma0
+LSYM(x80)	t0__16a0	! t0__5t0	! b_e_shift	! r__r_t0
+LSYM(x81)	t0__9a0		! t0__9t0	! b_e_shift	! r__r_t0
+LSYM(x82)	t0__5a0		! t0__8t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x83)	t0__5a0		! t0__8t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x84)	t0__5a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x85)	t0__8a0		! t0__2t0_a0	! b_e_t0	! t0__5t0
+LSYM(x86)	t0__5a0		! t0__4t0_a0	! b_e_2t0	! t0__2t0_a0
+LSYM(x87)	t0__9a0		! t0__9t0	! b_e_t02a0	! t0__t0_4a0
+LSYM(x88)	t0__5a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
+LSYM(x89)	t0__5a0		! t0__2t0_a0	! b_e_t0	! t0__8t0_a0
+LSYM(x90)	t0__9a0		! t0__5t0	! b_e_shift	! r__r_2t0
+LSYM(x91)	t0__9a0		! t0__5t0	! b_e_t0	! t0__2t0_a0
+LSYM(x92)	t0__5a0		! t0__2t0_a0	! b_e_4t0	! t0__2t0_a0
+LSYM(x93)	t0__32a0	! t0__t0ma0	! b_e_t0	! t0__3t0
+LSYM(x94)	t0__9a0		! t0__5t0	! b_e_2t0	! t0__t0_2a0
+LSYM(x95)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__5t0
+LSYM(x96)	t0__8a0		! t0__3t0	! b_e_shift	! r__r_4t0
+LSYM(x97)	t0__8a0		! t0__3t0	! b_e_t0	! t0__4t0_a0
+LSYM(x98)	t0__32a0	! t0__3t0	! b_e_t0	! t0__t0_2a0
+LSYM(x99)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__3t0
+LSYM(x100)	t0__5a0		! t0__5t0	! b_e_shift	! r__r_4t0
+LSYM(x101)	t0__5a0		! t0__5t0	! b_e_t0	! t0__4t0_a0
+LSYM(x102)	t0__32a0	! t0__t0_2a0	! b_e_t0	! t0__3t0
+LSYM(x103)	t0__5a0		! t0__5t0	! b_e_t02a0	! t0__4t0_a0
+LSYM(x104)	t0__3a0		! t0__4t0_a0	! b_e_shift	! r__r_8t0
+LSYM(x105)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__5t0
+LSYM(x106)	t0__3a0		! t0__4t0_a0	! b_e_2t0	! t0__4t0_a0
+LSYM(x107)	t0__9a0		! t0__t0_4a0	! b_e_t02a0	! t0__8t0_a0
+LSYM(x108)	t0__9a0		! t0__3t0	! b_e_shift	! r__r_4t0
+LSYM(x109)	t0__9a0		! t0__3t0	! b_e_t0	! t0__4t0_a0
+LSYM(x110)	t0__9a0		! t0__3t0	! b_e_2t0	! t0__2t0_a0
+LSYM(x111)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__3t0
+LSYM(x112)	t0__3a0		! t0__2t0_a0	! b_e_t0	! t0__16t0
+LSYM(x113)	t0__9a0		! t0__4t0_a0	! b_e_t02a0	! t0__3t0
+LSYM(x114)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__3t0
+LSYM(x115)	t0__9a0		! t0__2t0_a0	! b_e_2t0a0	! t0__3t0
+LSYM(x116)	t0__3a0		! t0__2t0_a0	! b_e_4t0	! t0__4t0_a0
+LSYM(x117)	t0__3a0		! t0__4t0_a0	! b_e_t0	! t0__9t0
+LSYM(x118)	t0__3a0		! t0__4t0_a0	! b_e_t0a0	! t0__9t0
+LSYM(x119)	t0__3a0		! t0__4t0_a0	! b_e_t02a0	! t0__9t0
+LSYM(x120)	t0__5a0		! t0__3t0	! b_e_shift	! r__r_8t0
+LSYM(x121)	t0__5a0		! t0__3t0	! b_e_t0	! t0__8t0_a0
+LSYM(x122)	t0__5a0		! t0__3t0	! b_e_2t0	! t0__4t0_a0
+LSYM(x123)	t0__5a0		! t0__8t0_a0	! b_e_t0	! t0__3t0
+LSYM(x124)	t0__32a0	! t0__t0ma0	! b_e_shift	! r__r_4t0
+LSYM(x125)	t0__5a0		! t0__5t0	! b_e_t0	! t0__5t0
+LSYM(x126)	t0__64a0	! t0__t0ma0	! b_e_shift	! r__r_2t0
+LSYM(x127)	t0__128a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
+LSYM(x128)	t0__128a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
+LSYM(x129)	t0__128a0	! a1_ne_0_b_l0	! t0__t0_a0	! b_n_ret_t0
+LSYM(x130)	t0__64a0	! t0__t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x131)	t0__8a0		! t0__8t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x132)	t0__8a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x133)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__4t0_a0
+LSYM(x134)	t0__8a0		! t0__4t0_a0	! b_e_2t0	! t0__2t0_a0
+LSYM(x135)	t0__9a0		! t0__5t0	! b_e_t0	! t0__3t0
+LSYM(x136)	t0__8a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
+LSYM(x137)	t0__8a0		! t0__2t0_a0	! b_e_t0	! t0__8t0_a0
+LSYM(x138)	t0__8a0		! t0__2t0_a0	! b_e_2t0	! t0__4t0_a0
+LSYM(x139)	t0__8a0		! t0__2t0_a0	! b_e_2t0a0	! t0__4t0_a0
+LSYM(x140)	t0__3a0		! t0__2t0_a0	! b_e_4t0	! t0__5t0
+LSYM(x141)	t0__8a0		! t0__2t0_a0	! b_e_4t0a0	! t0__2t0_a0
+LSYM(x142)	t0__9a0		! t0__8t0	! b_e_2t0	! t0__t0ma0
+LSYM(x143)	t0__16a0	! t0__9t0	! b_e_t0	! t0__t0ma0
+LSYM(x144)	t0__9a0		! t0__8t0	! b_e_shift	! r__r_2t0
+LSYM(x145)	t0__9a0		! t0__8t0	! b_e_t0	! t0__2t0_a0
+LSYM(x146)	t0__9a0		! t0__8t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x147)	t0__9a0		! t0__8t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x148)	t0__9a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x149)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__4t0_a0
+LSYM(x150)	t0__9a0		! t0__4t0_a0	! b_e_2t0	! t0__2t0_a0
+LSYM(x151)	t0__9a0		! t0__4t0_a0	! b_e_2t0a0	! t0__2t0_a0
+LSYM(x152)	t0__9a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
+LSYM(x153)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__8t0_a0
+LSYM(x154)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__4t0_a0
+LSYM(x155)	t0__32a0	! t0__t0ma0	! b_e_t0	! t0__5t0
+LSYM(x156)	t0__9a0		! t0__2t0_a0	! b_e_4t0	! t0__2t0_a0
+LSYM(x157)	t0__32a0	! t0__t0ma0	! b_e_t02a0	! t0__5t0
+LSYM(x158)	t0__16a0	! t0__5t0	! b_e_2t0	! t0__t0ma0
+LSYM(x159)	t0__32a0	! t0__5t0	! b_e_t0	! t0__t0ma0
+LSYM(x160)	t0__5a0		! t0__4t0	! b_e_shift	! r__r_8t0
+LSYM(x161)	t0__8a0		! t0__5t0	! b_e_t0	! t0__4t0_a0
+LSYM(x162)	t0__9a0		! t0__9t0	! b_e_shift	! r__r_2t0
+LSYM(x163)	t0__9a0		! t0__9t0	! b_e_t0	! t0__2t0_a0
+LSYM(x164)	t0__5a0		! t0__8t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x165)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__5t0
+LSYM(x166)	t0__5a0		! t0__8t0_a0	! b_e_2t0	! t0__2t0_a0
+LSYM(x167)	t0__5a0		! t0__8t0_a0	! b_e_2t0a0	! t0__2t0_a0
+LSYM(x168)	t0__5a0		! t0__4t0_a0	! b_e_shift	! r__r_8t0
+LSYM(x169)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__8t0_a0
+LSYM(x170)	t0__32a0	! t0__t0_2a0	! b_e_t0	! t0__5t0
+LSYM(x171)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__9t0
+LSYM(x172)	t0__5a0		! t0__4t0_a0	! b_e_4t0	! t0__2t0_a0
+LSYM(x173)	t0__9a0		! t0__2t0_a0	! b_e_t02a0	! t0__9t0
+LSYM(x174)	t0__32a0	! t0__t0_2a0	! b_e_t04a0	! t0__5t0
+LSYM(x175)	t0__8a0		! t0__2t0_a0	! b_e_5t0	! t0__2t0_a0
+LSYM(x176)	t0__5a0		! t0__4t0_a0	! b_e_8t0	! t0__t0_a0
+LSYM(x177)	t0__5a0		! t0__4t0_a0	! b_e_8t0a0	! t0__t0_a0
+LSYM(x178)	t0__5a0		! t0__2t0_a0	! b_e_2t0	! t0__8t0_a0
+LSYM(x179)	t0__5a0		! t0__2t0_a0	! b_e_2t0a0	! t0__8t0_a0
+LSYM(x180)	t0__9a0		! t0__5t0	! b_e_shift	! r__r_4t0
+LSYM(x181)	t0__9a0		! t0__5t0	! b_e_t0	! t0__4t0_a0
+LSYM(x182)	t0__9a0		! t0__5t0	! b_e_2t0	! t0__2t0_a0
+LSYM(x183)	t0__9a0		! t0__5t0	! b_e_2t0a0	! t0__2t0_a0
+LSYM(x184)	t0__5a0		! t0__9t0	! b_e_4t0	! t0__t0_a0
+LSYM(x185)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__5t0
+LSYM(x186)	t0__32a0	! t0__t0ma0	! b_e_2t0	! t0__3t0
+LSYM(x187)	t0__9a0		! t0__4t0_a0	! b_e_t02a0	! t0__5t0
+LSYM(x188)	t0__9a0		! t0__5t0	! b_e_4t0	! t0__t0_2a0
+LSYM(x189)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__9t0
+LSYM(x190)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__5t0
+LSYM(x191)	t0__64a0	! t0__3t0	! b_e_t0	! t0__t0ma0
+LSYM(x192)	t0__8a0		! t0__3t0	! b_e_shift	! r__r_8t0
+LSYM(x193)	t0__8a0		! t0__3t0	! b_e_t0	! t0__8t0_a0
+LSYM(x194)	t0__8a0		! t0__3t0	! b_e_2t0	! t0__4t0_a0
+LSYM(x195)	t0__8a0		! t0__8t0_a0	! b_e_t0	! t0__3t0
+LSYM(x196)	t0__8a0		! t0__3t0	! b_e_4t0	! t0__2t0_a0
+LSYM(x197)	t0__8a0		! t0__3t0	! b_e_4t0a0	! t0__2t0_a0
+LSYM(x198)	t0__64a0	! t0__t0_2a0	! b_e_t0	! t0__3t0
+LSYM(x199)	t0__8a0		! t0__4t0_a0	! b_e_2t0a0	! t0__3t0
+LSYM(x200)	t0__5a0		! t0__5t0	! b_e_shift	! r__r_8t0
+LSYM(x201)	t0__5a0		! t0__5t0	! b_e_t0	! t0__8t0_a0
+LSYM(x202)	t0__5a0		! t0__5t0	! b_e_2t0	! t0__4t0_a0
+LSYM(x203)	t0__5a0		! t0__5t0	! b_e_2t0a0	! t0__4t0_a0
+LSYM(x204)	t0__8a0		! t0__2t0_a0	! b_e_4t0	! t0__3t0
+LSYM(x205)	t0__5a0		! t0__8t0_a0	! b_e_t0	! t0__5t0
+LSYM(x206)	t0__64a0	! t0__t0_4a0	! b_e_t02a0	! t0__3t0
+LSYM(x207)	t0__8a0		! t0__2t0_a0	! b_e_3t0	! t0__4t0_a0
+LSYM(x208)	t0__5a0		! t0__5t0	! b_e_8t0	! t0__t0_a0
+LSYM(x209)	t0__5a0		! t0__5t0	! b_e_8t0a0	! t0__t0_a0
+LSYM(x210)	t0__5a0		! t0__4t0_a0	! b_e_2t0	! t0__5t0
+LSYM(x211)	t0__5a0		! t0__4t0_a0	! b_e_2t0a0	! t0__5t0
+LSYM(x212)	t0__3a0		! t0__4t0_a0	! b_e_4t0	! t0__4t0_a0
+LSYM(x213)	t0__3a0		! t0__4t0_a0	! b_e_4t0a0	! t0__4t0_a0
+LSYM(x214)	t0__9a0		! t0__t0_4a0	! b_e_2t04a0	! t0__8t0_a0
+LSYM(x215)	t0__5a0		! t0__4t0_a0	! b_e_5t0	! t0__2t0_a0
+LSYM(x216)	t0__9a0		! t0__3t0	! b_e_shift	! r__r_8t0
+LSYM(x217)	t0__9a0		! t0__3t0	! b_e_t0	! t0__8t0_a0
+LSYM(x218)	t0__9a0		! t0__3t0	! b_e_2t0	! t0__4t0_a0
+LSYM(x219)	t0__9a0		! t0__8t0_a0	! b_e_t0	! t0__3t0
+LSYM(x220)	t0__3a0		! t0__9t0	! b_e_4t0	! t0__2t0_a0
+LSYM(x221)	t0__3a0		! t0__9t0	! b_e_4t0a0	! t0__2t0_a0
+LSYM(x222)	t0__9a0		! t0__4t0_a0	! b_e_2t0	! t0__3t0
+LSYM(x223)	t0__9a0		! t0__4t0_a0	! b_e_2t0a0	! t0__3t0
+LSYM(x224)	t0__9a0		! t0__3t0	! b_e_8t0	! t0__t0_a0
+LSYM(x225)	t0__9a0		! t0__5t0	! b_e_t0	! t0__5t0
+LSYM(x226)	t0__3a0		! t0__2t0_a0	! b_e_t02a0	! t0__32t0
+LSYM(x227)	t0__9a0		! t0__5t0	! b_e_t02a0	! t0__5t0
+LSYM(x228)	t0__9a0		! t0__2t0_a0	! b_e_4t0	! t0__3t0
+LSYM(x229)	t0__9a0		! t0__2t0_a0	! b_e_4t0a0	! t0__3t0
+LSYM(x230)	t0__9a0		! t0__5t0	! b_e_5t0	! t0__t0_a0
+LSYM(x231)	t0__9a0		! t0__2t0_a0	! b_e_3t0	! t0__4t0_a0
+LSYM(x232)	t0__3a0		! t0__2t0_a0	! b_e_8t0	! t0__4t0_a0
+LSYM(x233)	t0__3a0		! t0__2t0_a0	! b_e_8t0a0	! t0__4t0_a0
+LSYM(x234)	t0__3a0		! t0__4t0_a0	! b_e_2t0	! t0__9t0
+LSYM(x235)	t0__3a0		! t0__4t0_a0	! b_e_2t0a0	! t0__9t0
+LSYM(x236)	t0__9a0		! t0__2t0_a0	! b_e_4t08a0	! t0__3t0
+LSYM(x237)	t0__16a0	! t0__5t0	! b_e_3t0	! t0__t0ma0
+LSYM(x238)	t0__3a0		! t0__4t0_a0	! b_e_2t04a0	! t0__9t0
+LSYM(x239)	t0__16a0	! t0__5t0	! b_e_t0ma0	! t0__3t0
+LSYM(x240)	t0__9a0		! t0__t0_a0	! b_e_8t0	! t0__3t0
+LSYM(x241)	t0__9a0		! t0__t0_a0	! b_e_8t0a0	! t0__3t0
+LSYM(x242)	t0__5a0		! t0__3t0	! b_e_2t0	! t0__8t0_a0
+LSYM(x243)	t0__9a0		! t0__9t0	! b_e_t0	! t0__3t0
+LSYM(x244)	t0__5a0		! t0__3t0	! b_e_4t0	! t0__4t0_a0
+LSYM(x245)	t0__8a0		! t0__3t0	! b_e_5t0	! t0__2t0_a0
+LSYM(x246)	t0__5a0		! t0__8t0_a0	! b_e_2t0	! t0__3t0
+LSYM(x247)	t0__5a0		! t0__8t0_a0	! b_e_2t0a0	! t0__3t0
+LSYM(x248)	t0__32a0	! t0__t0ma0	! b_e_shift	! r__r_8t0
+LSYM(x249)	t0__32a0	! t0__t0ma0	! b_e_t0	! t0__8t0_a0
+LSYM(x250)	t0__5a0		! t0__5t0	! b_e_2t0	! t0__5t0
+LSYM(x251)	t0__5a0		! t0__5t0	! b_e_2t0a0	! t0__5t0
+LSYM(x252)	t0__64a0	! t0__t0ma0	! b_e_shift	! r__r_4t0
+LSYM(x253)	t0__64a0	! t0__t0ma0	! b_e_t0	! t0__4t0_a0
+LSYM(x254)	t0__128a0	! t0__t0ma0	! b_e_shift	! r__r_2t0
+LSYM(x255)	t0__256a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
+/*1040 insts before this.  */
+LSYM(ret_t0)	MILLIRET
+LSYM(e_t0)	r__r_t0
+LSYM(e_shift)	a1_ne_0_b_l2
+	a0__256a0	/* a0 <<= 8 *********** */
+	MILLIRETN
+LSYM(e_t0ma0)	a1_ne_0_b_l0
+	t0__t0ma0
+	MILLIRET
+	r__r_t0
+LSYM(e_t0a0)	a1_ne_0_b_l0
+	t0__t0_a0
+	MILLIRET
+	r__r_t0
+LSYM(e_t02a0)	a1_ne_0_b_l0
+	t0__t0_2a0
+	MILLIRET
+	r__r_t0
+LSYM(e_t04a0)	a1_ne_0_b_l0
+	t0__t0_4a0
+	MILLIRET
+	r__r_t0
+LSYM(e_2t0)	a1_ne_0_b_l1
+	r__r_2t0
+	MILLIRETN
+LSYM(e_2t0a0)	a1_ne_0_b_l0
+	t0__2t0_a0
+	MILLIRET
+	r__r_t0
+LSYM(e2t04a0)	t0__t0_2a0
+	a1_ne_0_b_l1
+	r__r_2t0
+	MILLIRETN
+LSYM(e_3t0)	a1_ne_0_b_l0
+	t0__3t0
+	MILLIRET
+	r__r_t0
+LSYM(e_4t0)	a1_ne_0_b_l1
+	r__r_4t0
+	MILLIRETN
+LSYM(e_4t0a0)	a1_ne_0_b_l0
+	t0__4t0_a0
+	MILLIRET
+	r__r_t0
+LSYM(e4t08a0)	t0__t0_2a0
+	a1_ne_0_b_l1
+	r__r_4t0
+	MILLIRETN
+LSYM(e_5t0)	a1_ne_0_b_l0
+	t0__5t0
+	MILLIRET
+	r__r_t0
+LSYM(e_8t0)	a1_ne_0_b_l1
+	r__r_8t0
+	MILLIRETN
+LSYM(e_8t0a0)	a1_ne_0_b_l0
+	t0__8t0_a0
+	MILLIRET
+	r__r_t0
+
+	.procend
+	.end
+#endif
diff --git a/libgcc/config/pa/t-linux b/libgcc/config/pa/t-linux
new file mode 100644
index 00000000000..d396bf7705a
--- /dev/null
+++ b/libgcc/config/pa/t-linux
@@ -0,0 +1,6 @@
+#Plug millicode routines into libgcc.a  We want these on both native and
+#cross compiles.  We use the "64-bit" routines because the "32-bit" code
+#is broken for certain corner cases.
+
+LIB1ASMSRC = pa/milli64.S
+LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI _dyncall
diff --git a/libgcc/config/pa/t-linux64 b/libgcc/config/pa/t-linux64
new file mode 100644
index 00000000000..6cb9806ff2e
--- /dev/null
+++ b/libgcc/config/pa/t-linux64
@@ -0,0 +1,4 @@
+# Plug millicode routines into libgcc.a  We want these on both native and
+# cross compiles.
+# FIXME: Explain.
+LIB1ASMFUNCS := $(filter-out _dyncall, $(LIB1ASMFUNCS))
diff --git a/libgcc/config/picochip/lib1funcs.S b/libgcc/config/picochip/lib1funcs.S
new file mode 100644
index 00000000000..d344170d248
--- /dev/null
+++ b/libgcc/config/picochip/lib1funcs.S
@@ -0,0 +1,4 @@
+// picoChip ASM file
+// Fake libgcc asm file. This contains nothing, but is used to prevent gcc
+// getting upset about the lack of a lib1funcs.S file when LIB1ASMFUNCS is
+// defined to switch off the compilation of parts of libgcc.
diff --git a/libgcc/config/picochip/t-picochip b/libgcc/config/picochip/t-picochip
index 5135d500cbb..a596ec98947 100644
--- a/libgcc/config/picochip/t-picochip
+++ b/libgcc/config/picochip/t-picochip
@@ -1,2 +1,9 @@
+# Prevent some of the more complicated libgcc functions from being
+# compiled.  This is because they are generally too big to fit into an
+# AE anyway, so there is no point in having them.  Also, some don't
+# compile properly so we'll ignore them for the moment.
+LIB1ASMSRC = picochip/lib1funcs.S
+LIB1ASMFUNCS = _mulsc3 _divsc3
+
 # Turn off the building of exception handling libraries.
 LIB2ADDEH =
diff --git a/libgcc/config/sh/lib1funcs.S b/libgcc/config/sh/lib1funcs.S
new file mode 100644
index 00000000000..2f0ca16cd91
--- /dev/null
+++ b/libgcc/config/sh/lib1funcs.S
@@ -0,0 +1,3933 @@
+/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+   2004, 2005, 2006, 2009
+   Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+!! libgcc routines for the Renesas / SuperH SH CPUs.
+!! Contributed by Steve Chamberlain.
+!! sac@cygnus.com
+
+!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
+!! recoded in assembly by Toshiyasu Morita
+!! tm@netcom.com
+
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+.previous
+#endif
+
+/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
+   ELF local label prefixes by J"orn Rennecke
+   amylaar@cygnus.com  */
+
+#include "lib1funcs.h"
+
+/* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
+   so it is more convenient to define NO_FPSCR_VALUES here than to
+   define it on the command line.  */
+#if defined __vxworks && defined __PIC__
+#define NO_FPSCR_VALUES
+#endif
+	
+#if ! __SH5__
+#ifdef L_ashiftrt
+	.global	GLOBAL(ashiftrt_r4_0)
+	.global	GLOBAL(ashiftrt_r4_1)
+	.global	GLOBAL(ashiftrt_r4_2)
+	.global	GLOBAL(ashiftrt_r4_3)
+	.global	GLOBAL(ashiftrt_r4_4)
+	.global	GLOBAL(ashiftrt_r4_5)
+	.global	GLOBAL(ashiftrt_r4_6)
+	.global	GLOBAL(ashiftrt_r4_7)
+	.global	GLOBAL(ashiftrt_r4_8)
+	.global	GLOBAL(ashiftrt_r4_9)
+	.global	GLOBAL(ashiftrt_r4_10)
+	.global	GLOBAL(ashiftrt_r4_11)
+	.global	GLOBAL(ashiftrt_r4_12)
+	.global	GLOBAL(ashiftrt_r4_13)
+	.global	GLOBAL(ashiftrt_r4_14)
+	.global	GLOBAL(ashiftrt_r4_15)
+	.global	GLOBAL(ashiftrt_r4_16)
+	.global	GLOBAL(ashiftrt_r4_17)
+	.global	GLOBAL(ashiftrt_r4_18)
+	.global	GLOBAL(ashiftrt_r4_19)
+	.global	GLOBAL(ashiftrt_r4_20)
+	.global	GLOBAL(ashiftrt_r4_21)
+	.global	GLOBAL(ashiftrt_r4_22)
+	.global	GLOBAL(ashiftrt_r4_23)
+	.global	GLOBAL(ashiftrt_r4_24)
+	.global	GLOBAL(ashiftrt_r4_25)
+	.global	GLOBAL(ashiftrt_r4_26)
+	.global	GLOBAL(ashiftrt_r4_27)
+	.global	GLOBAL(ashiftrt_r4_28)
+	.global	GLOBAL(ashiftrt_r4_29)
+	.global	GLOBAL(ashiftrt_r4_30)
+	.global	GLOBAL(ashiftrt_r4_31)
+	.global	GLOBAL(ashiftrt_r4_32)
+
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
+
+	.align	1
+GLOBAL(ashiftrt_r4_32):
+GLOBAL(ashiftrt_r4_31):
+	rotcl	r4
+	rts
+	subc	r4,r4
+
+GLOBAL(ashiftrt_r4_30):
+	shar	r4
+GLOBAL(ashiftrt_r4_29):
+	shar	r4
+GLOBAL(ashiftrt_r4_28):
+	shar	r4
+GLOBAL(ashiftrt_r4_27):
+	shar	r4
+GLOBAL(ashiftrt_r4_26):
+	shar	r4
+GLOBAL(ashiftrt_r4_25):
+	shar	r4
+GLOBAL(ashiftrt_r4_24):
+	shlr16	r4
+	shlr8	r4
+	rts
+	exts.b	r4,r4
+
+GLOBAL(ashiftrt_r4_23):
+	shar	r4
+GLOBAL(ashiftrt_r4_22):
+	shar	r4
+GLOBAL(ashiftrt_r4_21):
+	shar	r4
+GLOBAL(ashiftrt_r4_20):
+	shar	r4
+GLOBAL(ashiftrt_r4_19):
+	shar	r4
+GLOBAL(ashiftrt_r4_18):
+	shar	r4
+GLOBAL(ashiftrt_r4_17):
+	shar	r4
+GLOBAL(ashiftrt_r4_16):
+	shlr16	r4
+	rts
+	exts.w	r4,r4
+
+GLOBAL(ashiftrt_r4_15):
+	shar	r4
+GLOBAL(ashiftrt_r4_14):
+	shar	r4
+GLOBAL(ashiftrt_r4_13):
+	shar	r4
+GLOBAL(ashiftrt_r4_12):
+	shar	r4
+GLOBAL(ashiftrt_r4_11):
+	shar	r4
+GLOBAL(ashiftrt_r4_10):
+	shar	r4
+GLOBAL(ashiftrt_r4_9):
+	shar	r4
+GLOBAL(ashiftrt_r4_8):
+	shar	r4
+GLOBAL(ashiftrt_r4_7):
+	shar	r4
+GLOBAL(ashiftrt_r4_6):
+	shar	r4
+GLOBAL(ashiftrt_r4_5):
+	shar	r4
+GLOBAL(ashiftrt_r4_4):
+	shar	r4
+GLOBAL(ashiftrt_r4_3):
+	shar	r4
+GLOBAL(ashiftrt_r4_2):
+	shar	r4
+GLOBAL(ashiftrt_r4_1):
+	rts
+	shar	r4
+
+GLOBAL(ashiftrt_r4_0):
+	rts
+	nop
+
+	ENDFUNC(GLOBAL(ashiftrt_r4_0))
+	ENDFUNC(GLOBAL(ashiftrt_r4_1))
+	ENDFUNC(GLOBAL(ashiftrt_r4_2))
+	ENDFUNC(GLOBAL(ashiftrt_r4_3))
+	ENDFUNC(GLOBAL(ashiftrt_r4_4))
+	ENDFUNC(GLOBAL(ashiftrt_r4_5))
+	ENDFUNC(GLOBAL(ashiftrt_r4_6))
+	ENDFUNC(GLOBAL(ashiftrt_r4_7))
+	ENDFUNC(GLOBAL(ashiftrt_r4_8))
+	ENDFUNC(GLOBAL(ashiftrt_r4_9))
+	ENDFUNC(GLOBAL(ashiftrt_r4_10))
+	ENDFUNC(GLOBAL(ashiftrt_r4_11))
+	ENDFUNC(GLOBAL(ashiftrt_r4_12))
+	ENDFUNC(GLOBAL(ashiftrt_r4_13))
+	ENDFUNC(GLOBAL(ashiftrt_r4_14))
+	ENDFUNC(GLOBAL(ashiftrt_r4_15))
+	ENDFUNC(GLOBAL(ashiftrt_r4_16))
+	ENDFUNC(GLOBAL(ashiftrt_r4_17))
+	ENDFUNC(GLOBAL(ashiftrt_r4_18))
+	ENDFUNC(GLOBAL(ashiftrt_r4_19))
+	ENDFUNC(GLOBAL(ashiftrt_r4_20))
+	ENDFUNC(GLOBAL(ashiftrt_r4_21))
+	ENDFUNC(GLOBAL(ashiftrt_r4_22))
+	ENDFUNC(GLOBAL(ashiftrt_r4_23))
+	ENDFUNC(GLOBAL(ashiftrt_r4_24))
+	ENDFUNC(GLOBAL(ashiftrt_r4_25))
+	ENDFUNC(GLOBAL(ashiftrt_r4_26))
+	ENDFUNC(GLOBAL(ashiftrt_r4_27))
+	ENDFUNC(GLOBAL(ashiftrt_r4_28))
+	ENDFUNC(GLOBAL(ashiftrt_r4_29))
+	ENDFUNC(GLOBAL(ashiftrt_r4_30))
+	ENDFUNC(GLOBAL(ashiftrt_r4_31))
+	ENDFUNC(GLOBAL(ashiftrt_r4_32))
+#endif
+
+#ifdef L_ashiftrt_n
+
+!
+! GLOBAL(ashrsi3)
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! (none)
+!
+
+	.global	GLOBAL(ashrsi3)
+	HIDDEN_FUNC(GLOBAL(ashrsi3))
+	.align	2
+GLOBAL(ashrsi3):
+	mov	#31,r0
+	and	r0,r5
+	mova	LOCAL(ashrsi3_table),r0
+	mov.b	@(r0,r5),r5
+#ifdef __sh1__
+	add	r5,r0
+	jmp	@r0
+#else
+	braf	r5
+#endif
+	mov	r4,r0
+
+	.align	2
+LOCAL(ashrsi3_table):
+	.byte		LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
+
+LOCAL(ashrsi3_31):
+	rotcl	r0
+	rts
+	subc	r0,r0
+
+LOCAL(ashrsi3_30):
+	shar	r0
+LOCAL(ashrsi3_29):
+	shar	r0
+LOCAL(ashrsi3_28):
+	shar	r0
+LOCAL(ashrsi3_27):
+	shar	r0
+LOCAL(ashrsi3_26):
+	shar	r0
+LOCAL(ashrsi3_25):
+	shar	r0
+LOCAL(ashrsi3_24):
+	shlr16	r0
+	shlr8	r0
+	rts
+	exts.b	r0,r0
+
+LOCAL(ashrsi3_23):
+	shar	r0
+LOCAL(ashrsi3_22):
+	shar	r0
+LOCAL(ashrsi3_21):
+	shar	r0
+LOCAL(ashrsi3_20):
+	shar	r0
+LOCAL(ashrsi3_19):
+	shar	r0
+LOCAL(ashrsi3_18):
+	shar	r0
+LOCAL(ashrsi3_17):
+	shar	r0
+LOCAL(ashrsi3_16):
+	shlr16	r0
+	rts
+	exts.w	r0,r0
+
+LOCAL(ashrsi3_15):
+	shar	r0
+LOCAL(ashrsi3_14):
+	shar	r0
+LOCAL(ashrsi3_13):
+	shar	r0
+LOCAL(ashrsi3_12):
+	shar	r0
+LOCAL(ashrsi3_11):
+	shar	r0
+LOCAL(ashrsi3_10):
+	shar	r0
+LOCAL(ashrsi3_9):
+	shar	r0
+LOCAL(ashrsi3_8):
+	shar	r0
+LOCAL(ashrsi3_7):
+	shar	r0
+LOCAL(ashrsi3_6):
+	shar	r0
+LOCAL(ashrsi3_5):
+	shar	r0
+LOCAL(ashrsi3_4):
+	shar	r0
+LOCAL(ashrsi3_3):
+	shar	r0
+LOCAL(ashrsi3_2):
+	shar	r0
+LOCAL(ashrsi3_1):
+	rts
+	shar	r0
+
+LOCAL(ashrsi3_0):
+	rts
+	nop
+
+	ENDFUNC(GLOBAL(ashrsi3))
+#endif
+
+#ifdef L_ashiftlt
+
+!
+! GLOBAL(ashlsi3)
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! (none)
+!
+	.global	GLOBAL(ashlsi3)
+	HIDDEN_FUNC(GLOBAL(ashlsi3))
+	.align	2
+GLOBAL(ashlsi3):
+	mov	#31,r0
+	and	r0,r5
+	mova	LOCAL(ashlsi3_table),r0
+	mov.b	@(r0,r5),r5
+#ifdef __sh1__
+	add	r5,r0
+	jmp	@r0
+#else
+	braf	r5
+#endif
+	mov	r4,r0
+
+	.align	2
+LOCAL(ashlsi3_table):
+	.byte		LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
+
+LOCAL(ashlsi3_6):
+	shll2	r0
+LOCAL(ashlsi3_4):
+	shll2	r0
+LOCAL(ashlsi3_2):
+	rts
+	shll2	r0
+
+LOCAL(ashlsi3_7):
+	shll2	r0
+LOCAL(ashlsi3_5):
+	shll2	r0
+LOCAL(ashlsi3_3):
+	shll2	r0
+LOCAL(ashlsi3_1):
+	rts
+	shll	r0
+
+LOCAL(ashlsi3_14):
+	shll2	r0
+LOCAL(ashlsi3_12):
+	shll2	r0
+LOCAL(ashlsi3_10):
+	shll2	r0
+LOCAL(ashlsi3_8):
+	rts
+	shll8	r0
+
+LOCAL(ashlsi3_15):
+	shll2	r0
+LOCAL(ashlsi3_13):
+	shll2	r0
+LOCAL(ashlsi3_11):
+	shll2	r0
+LOCAL(ashlsi3_9):
+	shll8	r0
+	rts
+	shll	r0
+
+LOCAL(ashlsi3_22):
+	shll2	r0
+LOCAL(ashlsi3_20):
+	shll2	r0
+LOCAL(ashlsi3_18):
+	shll2	r0
+LOCAL(ashlsi3_16):
+	rts
+	shll16	r0
+
+LOCAL(ashlsi3_23):
+	shll2	r0
+LOCAL(ashlsi3_21):
+	shll2	r0
+LOCAL(ashlsi3_19):
+	shll2	r0
+LOCAL(ashlsi3_17):
+	shll16	r0
+	rts
+	shll	r0
+
+LOCAL(ashlsi3_30):
+	shll2	r0
+LOCAL(ashlsi3_28):
+	shll2	r0
+LOCAL(ashlsi3_26):
+	shll2	r0
+LOCAL(ashlsi3_24):
+	shll16	r0
+	rts
+	shll8	r0
+
+LOCAL(ashlsi3_31):
+	shll2	r0
+LOCAL(ashlsi3_29):
+	shll2	r0
+LOCAL(ashlsi3_27):
+	shll2	r0
+LOCAL(ashlsi3_25):
+	shll16	r0
+	shll8	r0
+	rts
+	shll	r0
+
+LOCAL(ashlsi3_0):
+	rts
+	nop
+
+	ENDFUNC(GLOBAL(ashlsi3))
+#endif
+
+#ifdef L_lshiftrt
+
+!
+! GLOBAL(lshrsi3)
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! (none)
+!
+	.global	GLOBAL(lshrsi3)
+	HIDDEN_FUNC(GLOBAL(lshrsi3))
+	.align	2
+GLOBAL(lshrsi3):
+	mov	#31,r0
+	and	r0,r5
+	mova	LOCAL(lshrsi3_table),r0
+	mov.b	@(r0,r5),r5
+#ifdef __sh1__
+	add	r5,r0
+	jmp	@r0
+#else
+	braf	r5
+#endif
+	mov	r4,r0
+
+	.align	2
+LOCAL(lshrsi3_table):
+	.byte		LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
+
+LOCAL(lshrsi3_6):
+	shlr2	r0
+LOCAL(lshrsi3_4):
+	shlr2	r0
+LOCAL(lshrsi3_2):
+	rts
+	shlr2	r0
+
+LOCAL(lshrsi3_7):
+	shlr2	r0
+LOCAL(lshrsi3_5):
+	shlr2	r0
+LOCAL(lshrsi3_3):
+	shlr2	r0
+LOCAL(lshrsi3_1):
+	rts
+	shlr	r0
+
+LOCAL(lshrsi3_14):
+	shlr2	r0
+LOCAL(lshrsi3_12):
+	shlr2	r0
+LOCAL(lshrsi3_10):
+	shlr2	r0
+LOCAL(lshrsi3_8):
+	rts
+	shlr8	r0
+
+LOCAL(lshrsi3_15):
+	shlr2	r0
+LOCAL(lshrsi3_13):
+	shlr2	r0
+LOCAL(lshrsi3_11):
+	shlr2	r0
+LOCAL(lshrsi3_9):
+	shlr8	r0
+	rts
+	shlr	r0
+
+LOCAL(lshrsi3_22):
+	shlr2	r0
+LOCAL(lshrsi3_20):
+	shlr2	r0
+LOCAL(lshrsi3_18):
+	shlr2	r0
+LOCAL(lshrsi3_16):
+	rts
+	shlr16	r0
+
+LOCAL(lshrsi3_23):
+	shlr2	r0
+LOCAL(lshrsi3_21):
+	shlr2	r0
+LOCAL(lshrsi3_19):
+	shlr2	r0
+LOCAL(lshrsi3_17):
+	shlr16	r0
+	rts
+	shlr	r0
+
+LOCAL(lshrsi3_30):
+	shlr2	r0
+LOCAL(lshrsi3_28):
+	shlr2	r0
+LOCAL(lshrsi3_26):
+	shlr2	r0
+LOCAL(lshrsi3_24):
+	shlr16	r0
+	rts
+	shlr8	r0
+
+LOCAL(lshrsi3_31):
+	shlr2	r0
+LOCAL(lshrsi3_29):
+	shlr2	r0
+LOCAL(lshrsi3_27):
+	shlr2	r0
+LOCAL(lshrsi3_25):
+	shlr16	r0
+	shlr8	r0
+	rts
+	shlr	r0
+
+LOCAL(lshrsi3_0):
+	rts
+	nop
+
+	ENDFUNC(GLOBAL(lshrsi3))
+#endif
+
+#ifdef L_movmem
+	.text
+	.balign	4
+	.global	GLOBAL(movmem)
+	HIDDEN_FUNC(GLOBAL(movmem))
+	HIDDEN_ALIAS(movstr,movmem)
+	/* This would be a lot simpler if r6 contained the byte count
+	   minus 64, and we wouldn't be called here for a byte count of 64.  */
+GLOBAL(movmem):
+	sts.l	pr,@-r15
+	shll2	r6
+	bsr	GLOBAL(movmemSI52+2)
+	mov.l	@(48,r5),r0
+	.balign	4
+LOCAL(movmem_loop): /* Reached with rts */
+	mov.l	@(60,r5),r0
+	add	#-64,r6
+	mov.l	r0,@(60,r4)
+	tst	r6,r6
+	mov.l	@(56,r5),r0
+	bt	LOCAL(movmem_done)
+	mov.l	r0,@(56,r4)
+	cmp/pl	r6
+	mov.l	@(52,r5),r0
+	add	#64,r5
+	mov.l	r0,@(52,r4)
+	add	#64,r4
+	bt	GLOBAL(movmemSI52)
+! done all the large groups, do the remainder
+! jump to movmem+
+	mova	GLOBAL(movmemSI4)+4,r0
+	add	r6,r0
+	jmp	@r0
+LOCAL(movmem_done): ! share slot insn, works out aligned.
+	lds.l	@r15+,pr
+	mov.l	r0,@(56,r4)
+	mov.l	@(52,r5),r0
+	rts
+	mov.l	r0,@(52,r4)
+	.balign	4
+! ??? We need aliases movstr* for movmem* for the older libraries.  These
+! aliases will be removed at the some point in the future.
+	.global	GLOBAL(movmemSI64)
+	HIDDEN_FUNC(GLOBAL(movmemSI64))
+	HIDDEN_ALIAS(movstrSI64,movmemSI64)
+GLOBAL(movmemSI64):
+	mov.l	@(60,r5),r0
+	mov.l	r0,@(60,r4)
+	.global	GLOBAL(movmemSI60)
+	HIDDEN_FUNC(GLOBAL(movmemSI60))
+	HIDDEN_ALIAS(movstrSI60,movmemSI60)
+GLOBAL(movmemSI60):
+	mov.l	@(56,r5),r0
+	mov.l	r0,@(56,r4)
+	.global	GLOBAL(movmemSI56)
+	HIDDEN_FUNC(GLOBAL(movmemSI56))
+	HIDDEN_ALIAS(movstrSI56,movmemSI56)
+GLOBAL(movmemSI56):
+	mov.l	@(52,r5),r0
+	mov.l	r0,@(52,r4)
+	.global	GLOBAL(movmemSI52)
+	HIDDEN_FUNC(GLOBAL(movmemSI52))
+	HIDDEN_ALIAS(movstrSI52,movmemSI52)
+GLOBAL(movmemSI52):
+	mov.l	@(48,r5),r0
+	mov.l	r0,@(48,r4)
+	.global	GLOBAL(movmemSI48)
+	HIDDEN_FUNC(GLOBAL(movmemSI48))
+	HIDDEN_ALIAS(movstrSI48,movmemSI48)
+GLOBAL(movmemSI48):
+	mov.l	@(44,r5),r0
+	mov.l	r0,@(44,r4)
+	.global	GLOBAL(movmemSI44)
+	HIDDEN_FUNC(GLOBAL(movmemSI44))
+	HIDDEN_ALIAS(movstrSI44,movmemSI44)
+GLOBAL(movmemSI44):
+	mov.l	@(40,r5),r0
+	mov.l	r0,@(40,r4)
+	.global	GLOBAL(movmemSI40)
+	HIDDEN_FUNC(GLOBAL(movmemSI40))
+	HIDDEN_ALIAS(movstrSI40,movmemSI40)
+GLOBAL(movmemSI40):
+	mov.l	@(36,r5),r0
+	mov.l	r0,@(36,r4)
+	.global	GLOBAL(movmemSI36)
+	HIDDEN_FUNC(GLOBAL(movmemSI36))
+	HIDDEN_ALIAS(movstrSI36,movmemSI36)
+GLOBAL(movmemSI36):
+	mov.l	@(32,r5),r0
+	mov.l	r0,@(32,r4)
+	.global	GLOBAL(movmemSI32)
+	HIDDEN_FUNC(GLOBAL(movmemSI32))
+	HIDDEN_ALIAS(movstrSI32,movmemSI32)
+GLOBAL(movmemSI32):
+	mov.l	@(28,r5),r0
+	mov.l	r0,@(28,r4)
+	.global	GLOBAL(movmemSI28)
+	HIDDEN_FUNC(GLOBAL(movmemSI28))
+	HIDDEN_ALIAS(movstrSI28,movmemSI28)
+GLOBAL(movmemSI28):
+	mov.l	@(24,r5),r0
+	mov.l	r0,@(24,r4)
+	.global	GLOBAL(movmemSI24)
+	HIDDEN_FUNC(GLOBAL(movmemSI24))
+	HIDDEN_ALIAS(movstrSI24,movmemSI24)
+GLOBAL(movmemSI24):
+	mov.l	@(20,r5),r0
+	mov.l	r0,@(20,r4)
+	.global	GLOBAL(movmemSI20)
+	HIDDEN_FUNC(GLOBAL(movmemSI20))
+	HIDDEN_ALIAS(movstrSI20,movmemSI20)
+GLOBAL(movmemSI20):
+	mov.l	@(16,r5),r0
+	mov.l	r0,@(16,r4)
+	.global	GLOBAL(movmemSI16)
+	HIDDEN_FUNC(GLOBAL(movmemSI16))
+	HIDDEN_ALIAS(movstrSI16,movmemSI16)
+GLOBAL(movmemSI16):
+	mov.l	@(12,r5),r0
+	mov.l	r0,@(12,r4)
+	.global	GLOBAL(movmemSI12)
+	HIDDEN_FUNC(GLOBAL(movmemSI12))
+	HIDDEN_ALIAS(movstrSI12,movmemSI12)
+GLOBAL(movmemSI12):
+	mov.l	@(8,r5),r0
+	mov.l	r0,@(8,r4)
+	.global	GLOBAL(movmemSI8)
+	HIDDEN_FUNC(GLOBAL(movmemSI8))
+	HIDDEN_ALIAS(movstrSI8,movmemSI8)
+GLOBAL(movmemSI8):
+	mov.l	@(4,r5),r0
+	mov.l	r0,@(4,r4)
+	.global	GLOBAL(movmemSI4)
+	HIDDEN_FUNC(GLOBAL(movmemSI4))
+	HIDDEN_ALIAS(movstrSI4,movmemSI4)
+GLOBAL(movmemSI4):
+	mov.l	@(0,r5),r0
+	rts
+	mov.l	r0,@(0,r4)
+
+	ENDFUNC(GLOBAL(movmemSI64))
+	ENDFUNC(GLOBAL(movmemSI60))
+	ENDFUNC(GLOBAL(movmemSI56))
+	ENDFUNC(GLOBAL(movmemSI52))
+	ENDFUNC(GLOBAL(movmemSI48))
+	ENDFUNC(GLOBAL(movmemSI44))
+	ENDFUNC(GLOBAL(movmemSI40))
+	ENDFUNC(GLOBAL(movmemSI36))
+	ENDFUNC(GLOBAL(movmemSI32))
+	ENDFUNC(GLOBAL(movmemSI28))
+	ENDFUNC(GLOBAL(movmemSI24))
+	ENDFUNC(GLOBAL(movmemSI20))
+	ENDFUNC(GLOBAL(movmemSI16))
+	ENDFUNC(GLOBAL(movmemSI12))
+	ENDFUNC(GLOBAL(movmemSI8))
+	ENDFUNC(GLOBAL(movmemSI4))
+	ENDFUNC(GLOBAL(movmem))
+#endif
+
+#ifdef L_movmem_i4
+	.text
+	.global	GLOBAL(movmem_i4_even)
+	.global	GLOBAL(movmem_i4_odd)
+	.global	GLOBAL(movmemSI12_i4)
+
+	HIDDEN_FUNC(GLOBAL(movmem_i4_even))
+	HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
+	HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
+
+	HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
+	HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
+	HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
+
+	.p2align	5
+L_movmem_2mod4_end:
+	mov.l	r0,@(16,r4)
+	rts
+	mov.l	r1,@(20,r4)
+
+	.p2align	2
+
+GLOBAL(movmem_i4_even):
+	mov.l	@r5+,r0
+	bra	L_movmem_start_even
+	mov.l	@r5+,r1
+
+GLOBAL(movmem_i4_odd):
+	mov.l	@r5+,r1
+	add	#-4,r4
+	mov.l	@r5+,r2
+	mov.l	@r5+,r3
+	mov.l	r1,@(4,r4)
+	mov.l	r2,@(8,r4)
+
+L_movmem_loop:
+	mov.l	r3,@(12,r4)
+	dt	r6
+	mov.l	@r5+,r0
+	bt/s	L_movmem_2mod4_end
+	mov.l	@r5+,r1
+	add	#16,r4
+L_movmem_start_even:
+	mov.l	@r5+,r2
+	mov.l	@r5+,r3
+	mov.l	r0,@r4
+	dt	r6
+	mov.l	r1,@(4,r4)
+	bf/s	L_movmem_loop
+	mov.l	r2,@(8,r4)
+	rts
+	mov.l	r3,@(12,r4)
+
+	ENDFUNC(GLOBAL(movmem_i4_even))
+	ENDFUNC(GLOBAL(movmem_i4_odd))
+
+	.p2align	4
+GLOBAL(movmemSI12_i4):
+	mov.l	@r5,r0
+	mov.l	@(4,r5),r1
+	mov.l	@(8,r5),r2
+	mov.l	r0,@r4
+	mov.l	r1,@(4,r4)
+	rts
+	mov.l	r2,@(8,r4)
+
+	ENDFUNC(GLOBAL(movmemSI12_i4))
+#endif
+
+#ifdef L_mulsi3
+
+
+	.global	GLOBAL(mulsi3)
+	HIDDEN_FUNC(GLOBAL(mulsi3))
+
+! r4 =       aabb
+! r5 =       ccdd
+! r0 = aabb*ccdd  via partial products
+!
+! if aa == 0 and cc = 0
+! r0 = bb*dd
+!
+! else
+! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
+!
+
+GLOBAL(mulsi3):
+	mulu.w  r4,r5		! multiply the lsws  macl=bb*dd
+	mov     r5,r3		! r3 = ccdd
+	swap.w  r4,r2		! r2 = bbaa
+	xtrct   r2,r3		! r3 = aacc
+	tst  	r3,r3		! msws zero ?
+	bf      hiset
+	rts			! yes - then we have the answer
+	sts     macl,r0
+
+hiset:	sts	macl,r0		! r0 = bb*dd
+	mulu.w	r2,r5		! brewing macl = aa*dd
+	sts	macl,r1
+	mulu.w	r3,r4		! brewing macl = cc*bb
+	sts	macl,r2
+	add	r1,r2
+	shll16	r2
+	rts
+	add	r2,r0
+
+	ENDFUNC(GLOBAL(mulsi3))
+#endif
+#endif /* ! __SH5__ */
+#ifdef L_sdivsi3_i4
+	.title "SH DIVIDE"
+!! 4 byte integer Divide code for the Renesas SH
+#ifdef __SH4__
+!! args in r4 and r5, result in fpul, clobber dr0, dr2
+
+	.global	GLOBAL(sdivsi3_i4)
+	HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
+GLOBAL(sdivsi3_i4):
+	lds r4,fpul
+	float fpul,dr0
+	lds r5,fpul
+	float fpul,dr2
+	fdiv dr2,dr0
+	rts
+	ftrc dr0,fpul
+
+	ENDFUNC(GLOBAL(sdivsi3_i4))
+#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
+!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
+
+#if ! __SH5__ || __SH5__ == 32
+#if __SH5__
+	.mode	SHcompact
+#endif
+	.global	GLOBAL(sdivsi3_i4)
+	HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
+GLOBAL(sdivsi3_i4):
+	sts.l fpscr,@-r15
+	mov #8,r2
+	swap.w r2,r2
+	lds r2,fpscr
+	lds r4,fpul
+	float fpul,dr0
+	lds r5,fpul
+	float fpul,dr2
+	fdiv dr2,dr0
+	ftrc dr0,fpul
+	rts
+	lds.l @r15+,fpscr
+
+	ENDFUNC(GLOBAL(sdivsi3_i4))
+#endif /* ! __SH5__ || __SH5__ == 32 */
+#endif /* ! __SH4__ */
+#endif
+
+#ifdef L_sdivsi3
+/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
+   sh2e/sh3e code.  */
+#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
+!!
+!! Steve Chamberlain
+!! sac@cygnus.com
+!!
+!!
+
+!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
+
+	.global	GLOBAL(sdivsi3)
+#if __SHMEDIA__
+#if __SH5__ == 32
+	.section	.text..SHmedia32,"ax"
+#else
+	.text
+#endif
+	.align	2
+#if 0
+/* The assembly code that follows is a hand-optimized version of the C
+   code that follows.  Note that the registers that are modified are
+   exactly those listed as clobbered in the patterns divsi3_i1 and
+   divsi3_i1_media.
+	
+int __sdivsi3 (i, j)
+     int i, j;
+{
+  register unsigned long long r18 asm ("r18");
+  register unsigned long long r19 asm ("r19");
+  register unsigned long long r0 asm ("r0") = 0;
+  register unsigned long long r1 asm ("r1") = 1;
+  register int r2 asm ("r2") = i >> 31;
+  register int r3 asm ("r3") = j >> 31;
+
+  r2 = r2 ? r2 : r1;
+  r3 = r3 ? r3 : r1;
+  r18 = i * r2;
+  r19 = j * r3;
+  r2 *= r3;
+  
+  r19 <<= 31;
+  r1 <<= 31;
+  do
+    if (r18 >= r19)
+      r0 |= r1, r18 -= r19;
+  while (r19 >>= 1, r1 >>= 1);
+
+  return r2 * (int)r0;
+}
+*/
+GLOBAL(sdivsi3):
+	pt/l	LOCAL(sdivsi3_dontadd), tr2
+	pt/l	LOCAL(sdivsi3_loop), tr1
+	ptabs/l	r18, tr0
+	movi	0, r0
+	movi	1, r1
+	shari.l	r4, 31, r2
+	shari.l	r5, 31, r3
+	cmveq	r2, r1, r2
+	cmveq	r3, r1, r3
+	muls.l	r4, r2, r18
+	muls.l	r5, r3, r19
+	muls.l	r2, r3, r2
+	shlli	r19, 31, r19
+	shlli	r1, 31, r1
+LOCAL(sdivsi3_loop):
+	bgtu	r19, r18, tr2
+	or	r0, r1, r0
+	sub	r18, r19, r18
+LOCAL(sdivsi3_dontadd):
+	shlri	r1, 1, r1
+	shlri	r19, 1, r19
+	bnei	r1, 0, tr1
+	muls.l	r0, r2, r0
+	add.l	r0, r63, r0
+	blink	tr0, r63
+#elif 0 /* ! 0 */
+ // inputs: r4,r5
+ // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
+ // result in r0
+GLOBAL(sdivsi3):
+ // can create absolute value without extra latency,
+ // but dependent on proper sign extension of inputs:
+ // shari.l r5,31,r2
+ // xor r5,r2,r20
+ // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
+ shari.l r5,31,r2
+ ori r2,1,r2
+ muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
+ movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
+ shari.l r4,31,r3
+ nsb r20,r0
+ shlld r20,r0,r25
+ shlri r25,48,r25
+ sub r19,r25,r1
+ mmulfx.w r1,r1,r2
+ mshflo.w r1,r63,r1
+ // If r4 was to be used in-place instead of r21, could use this sequence
+ // to compute absolute:
+ // sub r63,r4,r19 // compute absolute value of r4
+ // shlri r4,32,r3 // into lower 32 bit of r4, keeping
+ // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
+ ori r3,1,r3
+ mmulfx.w r25,r2,r2
+ sub r19,r0,r0
+ muls.l r4,r3,r21
+ msub.w r1,r2,r2
+ addi r2,-2,r1
+ mulu.l r21,r1,r19
+ mmulfx.w r2,r2,r2
+ shlli r1,15,r1
+ shlrd r19,r0,r19
+ mulu.l r19,r20,r3
+ mmacnfx.wl r25,r2,r1
+ ptabs r18,tr0
+ sub r21,r3,r25
+
+ mulu.l r25,r1,r2
+ addi r0,14,r0
+ xor r4,r5,r18
+ shlrd r2,r0,r2
+ mulu.l r2,r20,r3
+ add r19,r2,r19
+ shari.l r18,31,r18
+ sub r25,r3,r25
+
+ mulu.l r25,r1,r2
+ sub r25,r20,r25
+ add r19,r18,r19
+ shlrd r2,r0,r2
+ mulu.l r2,r20,r3
+ addi r25,1,r25
+ add r19,r2,r19
+
+ cmpgt r25,r3,r25
+ add.l r19,r25,r0
+ xor r0,r18,r0
+ blink tr0,r63
+#else /* ! 0 && ! 0 */
+
+ // inputs: r4,r5
+ // clobbered: r1,r18,r19,r20,r21,r25,tr0
+ // result in r0
+	HIDDEN_FUNC(GLOBAL(sdivsi3_2))
+#ifndef __pic__
+	FUNC(GLOBAL(sdivsi3))
+GLOBAL(sdivsi3): /* this is the shcompact entry point */
+ // The special SHmedia entry point sdivsi3_1 prevents accidental linking
+ // with the SHcompact implementation, which clobbers tr1 / tr2.
+ .global GLOBAL(sdivsi3_1)
+GLOBAL(sdivsi3_1):
+ .global GLOBAL(div_table_internal)
+ movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
+ shori GLOBAL(div_table_internal) & 65535, r20
+#endif
+ .global GLOBAL(sdivsi3_2)
+ // div_table in r20
+ // clobbered: r1,r18,r19,r21,r25,tr0
+GLOBAL(sdivsi3_2):
+ nsb r5, r1
+ shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
+ shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
+ ldx.ub r20, r21, r19 // u0.8
+ shari r25, 32, r25   // normalize to s2.30
+ shlli r21, 1, r21
+ muls.l r25, r19, r19 // s2.38
+ ldx.w r20, r21, r21  // s2.14
+  ptabs r18, tr0
+ shari r19, 24, r19   // truncate to s2.14
+ sub r21, r19, r19    // some 11 bit inverse in s1.14
+ muls.l r19, r19, r21 // u0.28
+  sub r63, r1, r1
+  addi r1, 92, r1
+ muls.l r25, r21, r18 // s2.58
+ shlli r19, 45, r19   // multiply by two and convert to s2.58
+  /* bubble */
+ sub r19, r18, r18
+ shari r18, 28, r18   // some 22 bit inverse in s1.30
+ muls.l r18, r25, r0  // s2.60
+  muls.l r18, r4, r25 // s32.30
+  /* bubble */
+ shari r0, 16, r19   // s-16.44
+ muls.l r19, r18, r19 // s-16.74
+  shari r25, 63, r0
+  shari r4, 14, r18   // s19.-14
+ shari r19, 30, r19   // s-16.44
+ muls.l r19, r18, r19 // s15.30
+  xor r21, r0, r21    // You could also use the constant 1 << 27.
+  add r21, r25, r21
+ sub r21, r19, r21
+ shard r21, r1, r21
+ sub r21, r0, r0
+ blink tr0, r63
+#ifndef __pic__
+	ENDFUNC(GLOBAL(sdivsi3))
+#endif
+	ENDFUNC(GLOBAL(sdivsi3_2))
+#endif
+#elif defined __SHMEDIA__
+/* m5compact-nofpu */
+ // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+	FUNC(GLOBAL(sdivsi3))
+GLOBAL(sdivsi3):
+	pt/l LOCAL(sdivsi3_dontsub), tr0
+	pt/l LOCAL(sdivsi3_loop), tr1
+	ptabs/l r18,tr2
+	shari.l r4,31,r18
+	shari.l r5,31,r19
+	xor r4,r18,r20
+	xor r5,r19,r21
+	sub.l r20,r18,r20
+	sub.l r21,r19,r21
+	xor r18,r19,r19
+	shlli r21,32,r25
+	addi r25,-1,r21
+	addz.l r20,r63,r20
+LOCAL(sdivsi3_loop):
+	shlli r20,1,r20
+	bgeu/u r21,r20,tr0
+	sub r20,r21,r20
+LOCAL(sdivsi3_dontsub):
+	addi.l r25,-1,r25
+	bnei r25,-32,tr1
+	xor r20,r19,r20
+	sub.l r20,r19,r0
+	blink tr2,r63
+	ENDFUNC(GLOBAL(sdivsi3))
+#else /* ! __SHMEDIA__ */
+	FUNC(GLOBAL(sdivsi3))
+GLOBAL(sdivsi3):
+	mov	r4,r1
+	mov	r5,r0
+
+	tst	r0,r0
+	bt	div0
+	mov	#0,r2
+	div0s	r2,r1
+	subc	r3,r3
+	subc	r2,r1
+	div0s	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	addc	r2,r1
+	rts
+	mov	r1,r0
+
+
+div0:	rts
+	mov	#0,r0
+
+	ENDFUNC(GLOBAL(sdivsi3))
+#endif /* ! __SHMEDIA__ */
+#endif /* ! __SH4__ */
+#endif
+#ifdef L_udivsi3_i4
+
+	.title "SH DIVIDE"
+!! 4 byte integer Divide code for the Renesas SH
+#ifdef __SH4__
+!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
+!! and t bit
+
+	.global	GLOBAL(udivsi3_i4)
+	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
+GLOBAL(udivsi3_i4):
+	mov #1,r1
+	cmp/hi r1,r5
+	bf trivial
+	rotr r1
+	xor r1,r4
+	lds r4,fpul
+	mova L1,r0
+#ifdef FMOVD_WORKS
+	fmov.d @r0+,dr4
+#else
+	fmov.s @r0+,DR40
+	fmov.s @r0,DR41
+#endif
+	float fpul,dr0
+	xor r1,r5
+	lds r5,fpul
+	float fpul,dr2
+	fadd dr4,dr0
+	fadd dr4,dr2
+	fdiv dr2,dr0
+	rts
+	ftrc dr0,fpul
+
+trivial:
+	rts
+	lds r4,fpul
+
+	.align 2
+#ifdef FMOVD_WORKS
+	.align 3	! make double below 8 byte aligned.
+#endif
+L1:
+	.double 2147483648
+
+	ENDFUNC(GLOBAL(udivsi3_i4))
+#elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
+#if ! __SH5__ || __SH5__ == 32
+!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
+	.mode	SHmedia
+	.global	GLOBAL(udivsi3_i4)
+	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
+GLOBAL(udivsi3_i4):
+	addz.l	r4,r63,r20
+	addz.l	r5,r63,r21
+	fmov.qd	r20,dr0
+	fmov.qd	r21,dr32
+	ptabs	r18,tr0
+	float.qd dr0,dr0
+	float.qd dr32,dr32
+	fdiv.d	dr0,dr32,dr0
+	ftrc.dq dr0,dr32
+	fmov.s fr33,fr32
+	blink tr0,r63
+
+	ENDFUNC(GLOBAL(udivsi3_i4))
+#endif /* ! __SH5__ || __SH5__ == 32 */
+#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
+!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
+
+	.global	GLOBAL(udivsi3_i4)
+	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
+GLOBAL(udivsi3_i4):
+	mov #1,r1
+	cmp/hi r1,r5
+	bf trivial
+	sts.l fpscr,@-r15
+	mova L1,r0
+	lds.l @r0+,fpscr
+	rotr r1
+	xor r1,r4
+	lds r4,fpul
+#ifdef FMOVD_WORKS
+	fmov.d @r0+,dr4
+#else
+	fmov.s @r0+,DR40
+	fmov.s @r0,DR41
+#endif
+	float fpul,dr0
+	xor r1,r5
+	lds r5,fpul
+	float fpul,dr2
+	fadd dr4,dr0
+	fadd dr4,dr2
+	fdiv dr2,dr0
+	ftrc dr0,fpul
+	rts
+	lds.l @r15+,fpscr
+
+#ifdef FMOVD_WORKS
+	.align 3	! make double below 8 byte aligned.
+#endif
+trivial:
+	rts
+	lds r4,fpul
+
+	.align 2
+L1:
+#ifndef FMOVD_WORKS
+	.long 0x80000
+#else
+	.long 0x180000
+#endif
+	.double 2147483648
+
+	ENDFUNC(GLOBAL(udivsi3_i4))
+#endif /* ! __SH4__ */
+#endif
+
+#ifdef L_udivsi3
+/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
+   sh2e/sh3e code.  */
+#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
+
+!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
+	.global	GLOBAL(udivsi3)
+	HIDDEN_FUNC(GLOBAL(udivsi3))
+
+#if __SHMEDIA__
+#if __SH5__ == 32
+	.section	.text..SHmedia32,"ax"
+#else
+	.text
+#endif
+	.align	2
+#if 0
+/* The assembly code that follows is a hand-optimized version of the C
+   code that follows.  Note that the registers that are modified are
+   exactly those listed as clobbered in the patterns udivsi3_i1 and
+   udivsi3_i1_media.
+	
+unsigned 
+__udivsi3 (i, j)
+    unsigned i, j; 
+{
+  register unsigned long long r0 asm ("r0") = 0;
+  register unsigned long long r18 asm ("r18") = 1;
+  register unsigned long long r4 asm ("r4") = i;
+  register unsigned long long r19 asm ("r19") = j;
+
+  r19 <<= 31;
+  r18 <<= 31;
+  do
+    if (r4 >= r19)
+      r0 |= r18, r4 -= r19;
+  while (r19 >>= 1, r18 >>= 1);
+
+  return r0;
+}
+*/
+GLOBAL(udivsi3):
+	pt/l	LOCAL(udivsi3_dontadd), tr2
+	pt/l	LOCAL(udivsi3_loop), tr1
+	ptabs/l	r18, tr0
+	movi	0, r0
+	movi	1, r18
+	addz.l	r5, r63, r19
+	addz.l	r4, r63, r4
+	shlli	r19, 31, r19
+	shlli	r18, 31, r18
+LOCAL(udivsi3_loop):
+	bgtu	r19, r4, tr2
+	or	r0, r18, r0
+	sub	r4, r19, r4
+LOCAL(udivsi3_dontadd):
+	shlri	r18, 1, r18
+	shlri	r19, 1, r19
+	bnei	r18, 0, tr1
+	blink	tr0, r63
+#else
+GLOBAL(udivsi3):
+ // inputs: r4,r5
+ // clobbered: r18,r19,r20,r21,r22,r25,tr0
+ // result in r0.
+ addz.l r5,r63,r22
+ nsb r22,r0
+ shlld r22,r0,r25
+ shlri r25,48,r25
+ movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
+ sub r20,r25,r21
+ mmulfx.w r21,r21,r19
+ mshflo.w r21,r63,r21
+ ptabs r18,tr0
+ mmulfx.w r25,r19,r19
+ sub r20,r0,r0
+ /* bubble */
+ msub.w r21,r19,r19
+ addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
+		    before the msub.w, but we need a different value for
+		    r19 to keep errors under control.  */
+ mulu.l r4,r21,r18
+ mmulfx.w r19,r19,r19
+ shlli r21,15,r21
+ shlrd r18,r0,r18
+ mulu.l r18,r22,r20
+ mmacnfx.wl r25,r19,r21
+ /* bubble */
+ sub r4,r20,r25
+
+ mulu.l r25,r21,r19
+ addi r0,14,r0
+ /* bubble */
+ shlrd r19,r0,r19
+ mulu.l r19,r22,r20
+ add r18,r19,r18
+ /* bubble */
+ sub.l r25,r20,r25
+
+ mulu.l r25,r21,r19
+ addz.l r25,r63,r25
+ sub r25,r22,r25
+ shlrd r19,r0,r19
+ mulu.l r19,r22,r20
+ addi r25,1,r25
+ add r18,r19,r18
+
+ cmpgt r25,r20,r25
+ add.l r18,r25,r0
+ blink tr0,r63
+#endif
+#elif defined (__SHMEDIA__)
+/* m5compact-nofpu - more emphasis on code size than on speed, but don't
+   ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
+   So use a short shmedia loop.  */
+ // clobbered: r20,r21,r25,tr0,tr1,tr2
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+GLOBAL(udivsi3):
+ pt/l LOCAL(udivsi3_dontsub), tr0
+ pt/l LOCAL(udivsi3_loop), tr1
+ ptabs/l r18,tr2
+ shlli r5,32,r25
+ addi r25,-1,r21
+ addz.l r4,r63,r20
+LOCAL(udivsi3_loop):
+ shlli r20,1,r20
+ bgeu/u r21,r20,tr0
+ sub r20,r21,r20
+LOCAL(udivsi3_dontsub):
+ addi.l r25,-1,r25
+ bnei r25,-32,tr1
+ add.l r20,r63,r0
+ blink tr2,r63
+#else /* ! defined (__SHMEDIA__) */
+LOCAL(div8):
+ div1 r5,r4
+LOCAL(div7):
+ div1 r5,r4; div1 r5,r4; div1 r5,r4
+ div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
+
+LOCAL(divx4):
+ div1 r5,r4; rotcl r0
+ div1 r5,r4; rotcl r0
+ div1 r5,r4; rotcl r0
+ rts; div1 r5,r4
+
+GLOBAL(udivsi3):
+ sts.l pr,@-r15
+ extu.w r5,r0
+ cmp/eq r5,r0
+#ifdef __sh1__
+ bf LOCAL(large_divisor)
+#else
+ bf/s LOCAL(large_divisor)
+#endif
+ div0u
+ swap.w r4,r0
+ shlr16 r4
+ bsr LOCAL(div8)
+ shll16 r5
+ bsr LOCAL(div7)
+ div1 r5,r4
+ xtrct r4,r0
+ xtrct r0,r4
+ bsr LOCAL(div8)
+ swap.w r4,r4
+ bsr LOCAL(div7)
+ div1 r5,r4
+ lds.l @r15+,pr
+ xtrct r4,r0
+ swap.w r0,r0
+ rotcl r0
+ rts
+ shlr16 r5
+
+LOCAL(large_divisor):
+#ifdef __sh1__
+ div0u
+#endif
+ mov #0,r0
+ xtrct r4,r0
+ xtrct r0,r4
+ bsr LOCAL(divx4)
+ rotcl r0
+ bsr LOCAL(divx4)
+ rotcl r0
+ bsr LOCAL(divx4)
+ rotcl r0
+ bsr LOCAL(divx4)
+ rotcl r0
+ lds.l @r15+,pr
+ rts
+ rotcl r0
+
+	ENDFUNC(GLOBAL(udivsi3))
+#endif /* ! __SHMEDIA__ */
+#endif /* __SH4__ */
+#endif /* L_udivsi3 */
+
+#ifdef L_udivdi3
+#ifdef __SHMEDIA__
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+	.global	GLOBAL(udivdi3)
+	FUNC(GLOBAL(udivdi3))
+GLOBAL(udivdi3):
+	HIDDEN_ALIAS(udivdi3_internal,udivdi3)
+	shlri r3,1,r4
+	nsb r4,r22
+	shlld r3,r22,r6
+	shlri r6,49,r5
+	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
+	sub r21,r5,r1
+	mmulfx.w r1,r1,r4
+	mshflo.w r1,r63,r1
+	sub r63,r22,r20 // r63 == 64 % 64
+	mmulfx.w r5,r4,r4
+	pta LOCAL(large_divisor),tr0
+	addi r20,32,r9
+	msub.w r1,r4,r1
+	madd.w r1,r1,r1
+	mmulfx.w r1,r1,r4
+	shlri r6,32,r7
+	bgt/u r9,r63,tr0 // large_divisor
+	mmulfx.w r5,r4,r4
+	shlri r2,32+14,r19
+	addi r22,-31,r0
+	msub.w r1,r4,r1
+
+	mulu.l r1,r7,r4
+	addi r1,-3,r5
+	mulu.l r5,r19,r5
+	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+	                 the case may be, %0000000000000000 000.11111111111, still */
+	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
+	mulu.l r5,r3,r8
+	mshalds.l r1,r21,r1
+	shari r4,26,r4
+	shlld r8,r0,r8
+	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+	sub r2,r8,r2
+	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
+
+	shlri r2,22,r21
+	mulu.l r21,r1,r21
+	shlld r5,r0,r8
+	addi r20,30-22,r0
+	shlrd r21,r0,r21
+	mulu.l r21,r3,r5
+	add r8,r21,r8
+	mcmpgt.l r21,r63,r21 // See Note 1
+	addi r20,30,r0
+	mshfhi.l r63,r21,r21
+	sub r2,r5,r2
+	andc r2,r21,r2
+
+	/* small divisor: need a third divide step */
+	mulu.l r2,r1,r7
+	ptabs r18,tr0
+	addi r2,1,r2
+	shlrd r7,r0,r7
+	mulu.l r7,r3,r5
+	add r8,r7,r8
+	sub r2,r3,r2
+	cmpgt r2,r5,r5
+	add r8,r5,r2
+	/* could test r3 here to check for divide by zero.  */
+	blink tr0,r63
+
+LOCAL(large_divisor):
+	mmulfx.w r5,r4,r4
+	shlrd r2,r9,r25
+	shlri r25,32,r8
+	msub.w r1,r4,r1
+
+	mulu.l r1,r7,r4
+	addi r1,-3,r5
+	mulu.l r5,r8,r5
+	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+	                 the case may be, %0000000000000000 000.11111111111, still */
+	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
+	shlri r5,14-1,r8
+	mulu.l r8,r7,r5
+	mshalds.l r1,r21,r1
+	shari r4,26,r4
+	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+	sub r25,r5,r25
+	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
+
+	shlri r25,22,r21
+	mulu.l r21,r1,r21
+	pta LOCAL(no_lo_adj),tr0
+	addi r22,32,r0
+	shlri r21,40,r21
+	mulu.l r21,r7,r5
+	add r8,r21,r8
+	shlld r2,r0,r2
+	sub r25,r5,r25
+	bgtu/u r7,r25,tr0 // no_lo_adj
+	addi r8,1,r8
+	sub r25,r7,r25
+LOCAL(no_lo_adj):
+	mextr4 r2,r25,r2
+
+	/* large_divisor: only needs a few adjustments.  */
+	mulu.l r8,r6,r5
+	ptabs r18,tr0
+	/* bubble */
+	cmpgtu r5,r2,r5
+	sub r8,r5,r2
+	blink tr0,r63
+	ENDFUNC(GLOBAL(udivdi3))
+/* Note 1: To shift the result of the second divide stage so that the result
+   always fits into 32 bits, yet we still reduce the rest sufficiently
+   would require a lot of instructions to do the shifts just right.  Using
+   the full 64 bit shift result to multiply with the divisor would require
+   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
+   Fortunately, if the upper 32 bits of the shift result are nonzero, we
+   know that the rest after taking this partial result into account will
+   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
+   upper 32 bits of the partial result are nonzero.  */
+#endif /* __SHMEDIA__ */
+#endif /* L_udivdi3 */
+
+#ifdef L_divdi3
+#ifdef __SHMEDIA__
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+	.global	GLOBAL(divdi3)
+	FUNC(GLOBAL(divdi3))
+GLOBAL(divdi3):
+	pta GLOBAL(udivdi3_internal),tr0
+	shari r2,63,r22
+	shari r3,63,r23
+	xor r2,r22,r2
+	xor r3,r23,r3
+	sub r2,r22,r2
+	sub r3,r23,r3
+	beq/u r22,r23,tr0
+	ptabs r18,tr1
+	blink tr0,r18
+	sub r63,r2,r2
+	blink tr1,r63
+	ENDFUNC(GLOBAL(divdi3))
+#endif /* __SHMEDIA__ */
+#endif /* L_divdi3 */
+
+#ifdef L_umoddi3
+#ifdef __SHMEDIA__
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+	.global	GLOBAL(umoddi3)
+	FUNC(GLOBAL(umoddi3))
+GLOBAL(umoddi3):
+	HIDDEN_ALIAS(umoddi3_internal,umoddi3)
+	shlri r3,1,r4
+	nsb r4,r22
+	shlld r3,r22,r6
+	shlri r6,49,r5
+	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
+	sub r21,r5,r1
+	mmulfx.w r1,r1,r4
+	mshflo.w r1,r63,r1
+	sub r63,r22,r20 // r63 == 64 % 64
+	mmulfx.w r5,r4,r4
+	pta LOCAL(large_divisor),tr0
+	addi r20,32,r9
+	msub.w r1,r4,r1
+	madd.w r1,r1,r1
+	mmulfx.w r1,r1,r4
+	shlri r6,32,r7
+	bgt/u r9,r63,tr0 // large_divisor
+	mmulfx.w r5,r4,r4
+	shlri r2,32+14,r19
+	addi r22,-31,r0
+	msub.w r1,r4,r1
+
+	mulu.l r1,r7,r4
+	addi r1,-3,r5
+	mulu.l r5,r19,r5
+	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+	                 the case may be, %0000000000000000 000.11111111111, still */
+	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
+	mulu.l r5,r3,r5
+	mshalds.l r1,r21,r1
+	shari r4,26,r4
+	shlld r5,r0,r5
+	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+	sub r2,r5,r2
+	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
+
+	shlri r2,22,r21
+	mulu.l r21,r1,r21
+	addi r20,30-22,r0
+	/* bubble */ /* could test r3 here to check for divide by zero.  */
+	shlrd r21,r0,r21
+	mulu.l r21,r3,r5
+	mcmpgt.l r21,r63,r21 // See Note 1
+	addi r20,30,r0
+	mshfhi.l r63,r21,r21
+	sub r2,r5,r2
+	andc r2,r21,r2
+
+	/* small divisor: need a third divide step */
+	mulu.l r2,r1,r7
+	ptabs r18,tr0
+	sub r2,r3,r8 /* re-use r8 here for rest - r3 */
+	shlrd r7,r0,r7
+	mulu.l r7,r3,r5
+	/* bubble */
+	addi r8,1,r7
+	cmpgt r7,r5,r7
+	cmvne r7,r8,r2
+	sub r2,r5,r2
+	blink tr0,r63
+
+LOCAL(large_divisor):
+	mmulfx.w r5,r4,r4
+	shlrd r2,r9,r25
+	shlri r25,32,r8
+	msub.w r1,r4,r1
+
+	mulu.l r1,r7,r4
+	addi r1,-3,r5
+	mulu.l r5,r8,r5
+	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+	                 the case may be, %0000000000000000 000.11111111111, still */
+	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
+	shlri r5,14-1,r8
+	mulu.l r8,r7,r5
+	mshalds.l r1,r21,r1
+	shari r4,26,r4
+	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+	sub r25,r5,r25
+	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
+
+	shlri r25,22,r21
+	mulu.l r21,r1,r21
+	pta LOCAL(no_lo_adj),tr0
+	addi r22,32,r0
+	shlri r21,40,r21
+	mulu.l r21,r7,r5
+	add r8,r21,r8
+	shlld r2,r0,r2
+	sub r25,r5,r25
+	bgtu/u r7,r25,tr0 // no_lo_adj
+	addi r8,1,r8
+	sub r25,r7,r25
+LOCAL(no_lo_adj):
+	mextr4 r2,r25,r2
+
+	/* large_divisor: only needs a few adjustments.  */
+	mulu.l r8,r6,r5
+	ptabs r18,tr0
+	add r2,r6,r7
+	cmpgtu r5,r2,r8
+	cmvne r8,r7,r2
+	sub r2,r5,r2
+	shlrd r2,r22,r2
+	blink tr0,r63
+	ENDFUNC(GLOBAL(umoddi3))
+/* Note 1: To shift the result of the second divide stage so that the result
+   always fits into 32 bits, yet we still reduce the rest sufficiently
+   would require a lot of instructions to do the shifts just right.  Using
+   the full 64 bit shift result to multiply with the divisor would require
+   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
+   Fortunately, if the upper 32 bits of the shift result are nonzero, we
+   know that the rest after taking this partial result into account will
+   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
+   upper 32 bits of the partial result are nonzero.  */
+#endif /* __SHMEDIA__ */
+#endif /* L_umoddi3 */
+
+#ifdef L_moddi3
+#ifdef __SHMEDIA__
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+	.global	GLOBAL(moddi3)
+	FUNC(GLOBAL(moddi3))
+GLOBAL(moddi3):
+	pta GLOBAL(umoddi3_internal),tr0
+	shari r2,63,r22
+	shari r3,63,r23
+	xor r2,r22,r2
+	xor r3,r23,r3
+	sub r2,r22,r2
+	sub r3,r23,r3
+	beq/u r22,r63,tr0
+	ptabs r18,tr1
+	blink tr0,r18
+	sub r63,r2,r2
+	blink tr1,r63
+	ENDFUNC(GLOBAL(moddi3))
+#endif /* __SHMEDIA__ */
+#endif /* L_moddi3 */
+
+#ifdef L_set_fpscr
+#if !defined (__SH2A_NOFPU__)
+#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
+#ifdef __SH5__
+	.mode	SHcompact
+#endif
+	.global GLOBAL(set_fpscr)
+	HIDDEN_FUNC(GLOBAL(set_fpscr))
+GLOBAL(set_fpscr):
+	lds r4,fpscr
+#ifdef __PIC__
+	mov.l	r12,@-r15
+#ifdef __vxworks
+	mov.l	LOCAL(set_fpscr_L0_base),r12
+	mov.l	LOCAL(set_fpscr_L0_index),r0
+	mov.l	@r12,r12
+	mov.l	@(r0,r12),r12
+#else
+	mova	LOCAL(set_fpscr_L0),r0
+	mov.l	LOCAL(set_fpscr_L0),r12
+	add	r0,r12
+#endif
+	mov.l	LOCAL(set_fpscr_L1),r0
+	mov.l	@(r0,r12),r1
+	mov.l	@r15+,r12
+#else
+	mov.l LOCAL(set_fpscr_L1),r1
+#endif
+	swap.w r4,r0
+	or #24,r0
+#ifndef FMOVD_WORKS
+	xor #16,r0
+#endif
+#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
+	swap.w r0,r3
+	mov.l r3,@(4,r1)
+#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
+	swap.w r0,r2
+	mov.l r2,@r1
+#endif
+#ifndef FMOVD_WORKS
+	xor #8,r0
+#else
+	xor #24,r0
+#endif
+#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
+	swap.w r0,r2
+	rts
+	mov.l r2,@r1
+#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
+	swap.w r0,r3
+	rts
+	mov.l r3,@(4,r1)
+#endif
+	.align 2
+#ifdef __PIC__
+#ifdef __vxworks
+LOCAL(set_fpscr_L0_base):
+	.long ___GOTT_BASE__
+LOCAL(set_fpscr_L0_index):
+	.long ___GOTT_INDEX__
+#else
+LOCAL(set_fpscr_L0):
+	.long _GLOBAL_OFFSET_TABLE_
+#endif
+LOCAL(set_fpscr_L1):
+	.long GLOBAL(fpscr_values@GOT)
+#else
+LOCAL(set_fpscr_L1):
+	.long GLOBAL(fpscr_values)
+#endif
+
+	ENDFUNC(GLOBAL(set_fpscr))
+#ifndef NO_FPSCR_VALUES
+#ifdef __ELF__
+        .comm   GLOBAL(fpscr_values),8,4
+#else
+        .comm   GLOBAL(fpscr_values),8
+#endif /* ELF */
+#endif /* NO_FPSCR_VALUES */
+#endif /* SH2E / SH3E / SH4 */
+#endif /* __SH2A_NOFPU__ */
+#endif /* L_set_fpscr */
+#ifdef L_ic_invalidate
+#if __SH5__ == 32
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+	.global	GLOBAL(init_trampoline)
+	HIDDEN_FUNC(GLOBAL(init_trampoline))
+GLOBAL(init_trampoline):
+	st.l	r0,8,r2
+#ifdef __LITTLE_ENDIAN__
+	movi	9,r20
+	shori	0x402b,r20
+	shori	0xd101,r20
+	shori	0xd002,r20
+#else
+	movi	0xffffffffffffd002,r20
+	shori	0xd101,r20
+	shori	0x402b,r20
+	shori	9,r20
+#endif
+	st.q	r0,0,r20
+	st.l	r0,12,r3
+	ENDFUNC(GLOBAL(init_trampoline))
+	.global	GLOBAL(ic_invalidate)
+	HIDDEN_FUNC(GLOBAL(ic_invalidate))
+GLOBAL(ic_invalidate):
+	ocbwb	r0,0
+	synco
+	icbi	r0, 0
+	ptabs	r18, tr0
+	synci
+	blink	tr0, r63
+	ENDFUNC(GLOBAL(ic_invalidate))
+#elif defined(__SH4A__)
+	.global GLOBAL(ic_invalidate)
+	HIDDEN_FUNC(GLOBAL(ic_invalidate))
+GLOBAL(ic_invalidate):
+	ocbwb	@r4
+	synco
+	icbi	@r4
+	rts
+	  nop
+	ENDFUNC(GLOBAL(ic_invalidate))
+#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
+	/* For system code, we use ic_invalidate_line_i, but user code
+	   needs a different mechanism.  A kernel call is generally not
+	   available, and it would also be slow.  Different SH4 variants use
+	   different sizes and associativities of the Icache.  We use a small
+	   bit of dispatch code that can be put hidden in every shared object,
+	   which calls the actual processor-specific invalidation code in a
+	   separate module.
+	   Or if you have operating system support, the OS could mmap the
+	   procesor-specific code from a single page, since it is highly
+	   repetitive.  */
+	.global GLOBAL(ic_invalidate)
+	HIDDEN_FUNC(GLOBAL(ic_invalidate))
+GLOBAL(ic_invalidate):
+#ifdef __pic__
+#ifdef __vxworks
+	mov.l	1f,r1
+	mov.l	2f,r0
+	mov.l	@r1,r1
+	mov.l	0f,r2
+	mov.l	@(r0,r1),r0
+#else
+	mov.l	1f,r1
+	mova	1f,r0
+	mov.l	0f,r2
+	add	r1,r0
+#endif
+	mov.l	@(r0,r2),r1
+#else
+	mov.l	0f,r1
+#endif
+	ocbwb	@r4
+	mov.l	@(8,r1),r0
+	sub	r1,r4
+	and	r4,r0
+	add	r1,r0
+	jmp	@r0
+	mov.l	@(4,r1),r0
+	.align	2
+#ifndef __pic__
+0:	.long   GLOBAL(ic_invalidate_array)
+#else /* __pic__ */
+	.global GLOBAL(ic_invalidate_array)
+0:	.long   GLOBAL(ic_invalidate_array)@GOT
+#ifdef __vxworks
+1:	.long	___GOTT_BASE__
+2:	.long	___GOTT_INDEX__
+#else
+1:	.long   _GLOBAL_OFFSET_TABLE_
+#endif
+	ENDFUNC(GLOBAL(ic_invalidate))
+#endif /* __pic__ */
+#endif /* SH4 */
+#endif /* L_ic_invalidate */
+
+#ifdef L_ic_invalidate_array
+#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
+	.global GLOBAL(ic_invalidate_array)
+	/* This is needed when an SH4 dso with trampolines is used on SH4A.  */
+	.global GLOBAL(ic_invalidate_array)
+	FUNC(GLOBAL(ic_invalidate_array))
+GLOBAL(ic_invalidate_array):
+	add	r1,r4
+	synco
+	icbi	@r4
+	rts
+	  nop
+	.align 2
+	.long	0
+	ENDFUNC(GLOBAL(ic_invalidate_array))
+#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
+	.global GLOBAL(ic_invalidate_array)
+	.p2align 5
+	FUNC(GLOBAL(ic_invalidate_array))
+/* This must be aligned to the beginning of a cache line.  */
+GLOBAL(ic_invalidate_array):
+#ifndef WAYS
+#define WAYS 4
+#define WAY_SIZE 0x4000
+#endif
+#if WAYS == 1
+	.rept	WAY_SIZE * WAYS / 32
+	rts
+	nop
+	.rept	7
+	.long	WAY_SIZE - 32
+	.endr
+	.endr
+#elif WAYS <= 6
+	.rept	WAY_SIZE * WAYS / 32
+	braf	r0
+	add	#-8,r0
+	.long	WAY_SIZE + 8
+	.long	WAY_SIZE - 32
+	.rept	WAYS-2
+	braf	r0
+	nop
+	.endr
+	.rept	7 - WAYS
+	rts
+	nop
+	.endr
+	.endr
+#else /* WAYS > 6 */
+	/* This variant needs two different pages for mmap-ing.  */
+ 	.rept	WAYS-1
+	.rept	WAY_SIZE / 32
+	braf	r0
+	nop
+	.long	WAY_SIZE
+	.rept 6
+	.long	WAY_SIZE - 32
+	.endr
+	.endr
+	.endr
+	.rept	WAY_SIZE / 32
+	rts
+	.rept	15
+	nop
+	.endr
+	.endr
+#endif /* WAYS */
+	ENDFUNC(GLOBAL(ic_invalidate_array))
+#endif /* SH4 */
+#endif /* L_ic_invalidate_array */
+
+#if defined (__SH5__) && __SH5__ == 32
+#ifdef L_shcompact_call_trampoline
+	.section	.rodata
+	.align	1
+LOCAL(ct_main_table):
+.word	LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
+	.mode	SHmedia
+	.section	.text..SHmedia32, "ax"
+	.align	2
+	
+     /* This function loads 64-bit general-purpose registers from the
+	stack, from a memory address contained in them or from an FP
+	register, according to a cookie passed in r1.  Its execution
+	time is linear on the number of registers that actually have
+	to be copied.  See sh.h for details on the actual bit pattern.
+
+	The function to be called is passed in r0.  If a 32-bit return
+	value is expected, the actual function will be tail-called,
+	otherwise the return address will be stored in r10 (that the
+	caller should expect to be clobbered) and the return value
+	will be expanded into r2/r3 upon return.  */
+	
+	.global	GLOBAL(GCC_shcompact_call_trampoline)
+	FUNC(GLOBAL(GCC_shcompact_call_trampoline))
+GLOBAL(GCC_shcompact_call_trampoline):
+	ptabs/l	r0, tr0	/* Prepare to call the actual function.  */
+	movi	((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
+	pt/l	LOCAL(ct_loop), tr1
+	addz.l	r1, r63, r1
+	shori	((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
+LOCAL(ct_loop):
+	nsb	r1, r28
+	shlli	r28, 1, r29
+	ldx.w	r0, r29, r30
+LOCAL(ct_main_label):
+	ptrel/l	r30, tr2
+	blink	tr2, r63
+LOCAL(ct_r2_fp):	/* Copy r2 from an FP register.  */
+	/* It must be dr0, so just do it.  */
+	fmov.dq	dr0, r2
+	movi	7, r30
+	shlli	r30, 29, r31
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r3_fp):	/* Copy r3 from an FP register.  */
+	/* It is either dr0 or dr2.  */
+	movi	7, r30
+	shlri	r1, 26, r32
+	shlli	r30, 26, r31
+	andc	r1, r31, r1
+	fmov.dq	dr0, r3
+	beqi/l	r32, 4, tr1
+	fmov.dq	dr2, r3
+	blink	tr1, r63
+LOCAL(ct_r4_fp):	/* Copy r4 from an FP register.  */
+	shlri	r1, 23 - 3, r34
+	andi	r34, 3 << 3, r33
+	addi	r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
+LOCAL(ct_r4_fp_base):
+	ptrel/l	r32, tr2
+	movi	7, r30
+	shlli	r30, 23, r31
+	andc	r1, r31, r1
+	blink	tr2, r63
+LOCAL(ct_r4_fp_copy):
+	fmov.dq	dr0, r4
+	blink	tr1, r63
+	fmov.dq	dr2, r4
+	blink	tr1, r63
+	fmov.dq	dr4, r4
+	blink	tr1, r63
+LOCAL(ct_r5_fp):	/* Copy r5 from an FP register.  */
+	shlri	r1, 20 - 3, r34
+	andi	r34, 3 << 3, r33
+	addi	r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
+LOCAL(ct_r5_fp_base):
+	ptrel/l	r32, tr2
+	movi	7, r30
+	shlli	r30, 20, r31
+	andc	r1, r31, r1
+	blink	tr2, r63
+LOCAL(ct_r5_fp_copy):
+	fmov.dq	dr0, r5
+	blink	tr1, r63
+	fmov.dq	dr2, r5
+	blink	tr1, r63
+	fmov.dq	dr4, r5
+	blink	tr1, r63
+	fmov.dq	dr6, r5
+	blink	tr1, r63
+LOCAL(ct_r6_fph):	/* Copy r6 from a high FP register.  */
+	/* It must be dr8.  */
+	fmov.dq	dr8, r6
+	movi	15, r30
+	shlli	r30, 16, r31
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r6_fpl):	/* Copy r6 from a low FP register.  */
+	shlri	r1, 16 - 3, r34
+	andi	r34, 3 << 3, r33
+	addi	r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
+LOCAL(ct_r6_fp_base):
+	ptrel/l	r32, tr2
+	movi	7, r30
+	shlli	r30, 16, r31
+	andc	r1, r31, r1
+	blink	tr2, r63
+LOCAL(ct_r6_fp_copy):
+	fmov.dq	dr0, r6
+	blink	tr1, r63
+	fmov.dq	dr2, r6
+	blink	tr1, r63
+	fmov.dq	dr4, r6
+	blink	tr1, r63
+	fmov.dq	dr6, r6
+	blink	tr1, r63
+LOCAL(ct_r7_fph):	/* Copy r7 from a high FP register.  */
+	/* It is either dr8 or dr10.  */
+	movi	15 << 12, r31
+	shlri	r1, 12, r32
+	andc	r1, r31, r1
+	fmov.dq	dr8, r7
+	beqi/l	r32, 8, tr1
+	fmov.dq	dr10, r7
+	blink	tr1, r63
+LOCAL(ct_r7_fpl):	/* Copy r7 from a low FP register.  */
+	shlri	r1, 12 - 3, r34
+	andi	r34, 3 << 3, r33
+	addi	r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
+LOCAL(ct_r7_fp_base):
+	ptrel/l	r32, tr2
+	movi	7 << 12, r31
+	andc	r1, r31, r1
+	blink	tr2, r63
+LOCAL(ct_r7_fp_copy):
+	fmov.dq	dr0, r7
+	blink	tr1, r63
+	fmov.dq	dr2, r7
+	blink	tr1, r63
+	fmov.dq	dr4, r7
+	blink	tr1, r63
+	fmov.dq	dr6, r7
+	blink	tr1, r63
+LOCAL(ct_r8_fph):	/* Copy r8 from a high FP register.  */
+	/* It is either dr8 or dr10.  */
+	movi	15 << 8, r31
+	andi	r1, 1 << 8, r32
+	andc	r1, r31, r1
+	fmov.dq	dr8, r8
+	beq/l	r32, r63, tr1
+	fmov.dq	dr10, r8
+	blink	tr1, r63
+LOCAL(ct_r8_fpl):	/* Copy r8 from a low FP register.  */
+	shlri	r1, 8 - 3, r34
+	andi	r34, 3 << 3, r33
+	addi	r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
+LOCAL(ct_r8_fp_base):
+	ptrel/l	r32, tr2
+	movi	7 << 8, r31
+	andc	r1, r31, r1
+	blink	tr2, r63
+LOCAL(ct_r8_fp_copy):
+	fmov.dq	dr0, r8
+	blink	tr1, r63
+	fmov.dq	dr2, r8
+	blink	tr1, r63
+	fmov.dq	dr4, r8
+	blink	tr1, r63
+	fmov.dq	dr6, r8
+	blink	tr1, r63
+LOCAL(ct_r9_fph):	/* Copy r9 from a high FP register.  */
+	/* It is either dr8 or dr10.  */
+	movi	15 << 4, r31
+	andi	r1, 1 << 4, r32
+	andc	r1, r31, r1
+	fmov.dq	dr8, r9
+	beq/l	r32, r63, tr1
+	fmov.dq	dr10, r9
+	blink	tr1, r63
+LOCAL(ct_r9_fpl):	/* Copy r9 from a low FP register.  */
+	shlri	r1, 4 - 3, r34
+	andi	r34, 3 << 3, r33
+	addi	r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
+LOCAL(ct_r9_fp_base):
+	ptrel/l	r32, tr2
+	movi	7 << 4, r31
+	andc	r1, r31, r1
+	blink	tr2, r63
+LOCAL(ct_r9_fp_copy):
+	fmov.dq	dr0, r9
+	blink	tr1, r63
+	fmov.dq	dr2, r9
+	blink	tr1, r63
+	fmov.dq	dr4, r9
+	blink	tr1, r63
+	fmov.dq	dr6, r9
+	blink	tr1, r63
+LOCAL(ct_r2_ld):	/* Copy r2 from a memory address.  */
+	pt/l	LOCAL(ct_r2_load), tr2
+	movi	3, r30
+	shlli	r30, 29, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r2, 8, r3
+	ldx.q	r2, r63, r2
+	/* Fall through.  */
+LOCAL(ct_r3_ld):	/* Copy r3 from a memory address.  */
+	pt/l	LOCAL(ct_r3_load), tr2
+	movi	3, r30
+	shlli	r30, 26, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r3, 8, r4
+	ldx.q	r3, r63, r3
+LOCAL(ct_r4_ld):	/* Copy r4 from a memory address.  */
+	pt/l	LOCAL(ct_r4_load), tr2
+	movi	3, r30
+	shlli	r30, 23, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r4, 8, r5
+	ldx.q	r4, r63, r4
+LOCAL(ct_r5_ld):	/* Copy r5 from a memory address.  */
+	pt/l	LOCAL(ct_r5_load), tr2
+	movi	3, r30
+	shlli	r30, 20, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r5, 8, r6
+	ldx.q	r5, r63, r5
+LOCAL(ct_r6_ld):	/* Copy r6 from a memory address.  */
+	pt/l	LOCAL(ct_r6_load), tr2
+	movi	3 << 16, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r6, 8, r7
+	ldx.q	r6, r63, r6
+LOCAL(ct_r7_ld):	/* Copy r7 from a memory address.  */
+	pt/l	LOCAL(ct_r7_load), tr2
+	movi	3 << 12, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r7, 8, r8
+	ldx.q	r7, r63, r7
+LOCAL(ct_r8_ld):	/* Copy r8 from a memory address.  */
+	pt/l	LOCAL(ct_r8_load), tr2
+	movi	3 << 8, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r8, 8, r9
+	ldx.q	r8, r63, r8
+LOCAL(ct_r9_ld):	/* Copy r9 from a memory address.  */
+	pt/l	LOCAL(ct_check_tramp), tr2
+	ldx.q	r9, r63, r9
+	blink	tr2, r63
+LOCAL(ct_r2_load):
+	ldx.q	r2, r63, r2
+	blink	tr1, r63
+LOCAL(ct_r3_load):
+	ldx.q	r3, r63, r3
+	blink	tr1, r63
+LOCAL(ct_r4_load):
+	ldx.q	r4, r63, r4
+	blink	tr1, r63
+LOCAL(ct_r5_load):
+	ldx.q	r5, r63, r5
+	blink	tr1, r63
+LOCAL(ct_r6_load):
+	ldx.q	r6, r63, r6
+	blink	tr1, r63
+LOCAL(ct_r7_load):
+	ldx.q	r7, r63, r7
+	blink	tr1, r63
+LOCAL(ct_r8_load):
+	ldx.q	r8, r63, r8
+	blink	tr1, r63
+LOCAL(ct_r2_pop):	/* Pop r2 from the stack.  */
+	movi	1, r30
+	ldx.q	r15, r63, r2
+	shlli	r30, 29, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r3_pop):	/* Pop r3 from the stack.  */
+	movi	1, r30
+	ldx.q	r15, r63, r3
+	shlli	r30, 26, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r4_pop):	/* Pop r4 from the stack.  */
+	movi	1, r30
+	ldx.q	r15, r63, r4
+	shlli	r30, 23, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r5_pop):	/* Pop r5 from the stack.  */
+	movi	1, r30
+	ldx.q	r15, r63, r5
+	shlli	r30, 20, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r6_pop):	/* Pop r6 from the stack.  */
+	movi	1, r30
+	ldx.q	r15, r63, r6
+	shlli	r30, 16, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r7_pop):	/* Pop r7 from the stack.  */
+	ldx.q	r15, r63, r7
+	movi	1 << 12, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r8_pop):	/* Pop r8 from the stack.  */
+	ldx.q	r15, r63, r8
+	movi	1 << 8, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_pop_seq):	/* Pop a sequence of registers off the stack.  */
+	andi	r1, 7 << 1, r30
+	movi	(LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
+	shlli	r30, 2, r31
+	shori	LOCAL(ct_end_of_pop_seq) & 65535, r32
+	sub.l	r32, r31, r33
+	ptabs/l	r33, tr2
+	blink	tr2, r63
+LOCAL(ct_start_of_pop_seq):	/* Beginning of pop sequence.  */
+	ldx.q	r15, r63, r3
+	addi.l	r15, 8, r15
+	ldx.q	r15, r63, r4
+	addi.l	r15, 8, r15
+	ldx.q	r15, r63, r5
+	addi.l	r15, 8, r15
+	ldx.q	r15, r63, r6
+	addi.l	r15, 8, r15
+	ldx.q	r15, r63, r7
+	addi.l	r15, 8, r15
+	ldx.q	r15, r63, r8
+	addi.l	r15, 8, r15
+LOCAL(ct_r9_pop):	/* Pop r9 from the stack.  */
+	ldx.q	r15, r63, r9
+	addi.l	r15, 8, r15
+LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction.  */
+LOCAL(ct_check_tramp):	/* Check whether we need a trampoline.  */
+	pt/u	LOCAL(ct_ret_wide), tr2
+	andi	r1, 1, r1
+	bne/u	r1, r63, tr2
+LOCAL(ct_call_func):	/* Just branch to the function.  */
+	blink	tr0, r63
+LOCAL(ct_ret_wide):	/* Call the function, so that we can unpack its 
+			   64-bit return value.  */
+	add.l	r18, r63, r10
+	blink	tr0, r18
+	ptabs	r10, tr0
+#if __LITTLE_ENDIAN__
+	shari	r2, 32, r3
+	add.l	r2, r63, r2
+#else
+	add.l	r2, r63, r3
+	shari	r2, 32, r2
+#endif
+	blink	tr0, r63
+
+	ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
+#endif /* L_shcompact_call_trampoline */
+
+#ifdef L_shcompact_return_trampoline
+     /* This function does the converse of the code in `ret_wide'
+	above.  It is tail-called by SHcompact functions returning
+	64-bit non-floating-point values, to pack the 32-bit values in
+	r2 and r3 into r2.  */
+
+	.mode	SHmedia
+	.section	.text..SHmedia32, "ax"
+	.align	2
+	.global	GLOBAL(GCC_shcompact_return_trampoline)
+	HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
+GLOBAL(GCC_shcompact_return_trampoline):
+	ptabs/l	r18, tr0
+#if __LITTLE_ENDIAN__
+	addz.l	r2, r63, r2
+	shlli	r3, 32, r3
+#else
+	addz.l	r3, r63, r3
+	shlli	r2, 32, r2
+#endif
+	or	r3, r2, r2
+	blink	tr0, r63
+
+	ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
+#endif /* L_shcompact_return_trampoline */
+
+#ifdef L_shcompact_incoming_args
+	.section	.rodata
+	.align	1
+LOCAL(ia_main_table):
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
+	.mode	SHmedia
+	.section	.text..SHmedia32, "ax"
+	.align	2
+	
+     /* This function stores 64-bit general-purpose registers back in
+	the stack, and loads the address in which each register
+	was stored into itself.  The lower 32 bits of r17 hold the address
+	to begin storing, and the upper 32 bits of r17 hold the cookie.
+	Its execution time is linear on the
+	number of registers that actually have to be copied, and it is
+	optimized for structures larger than 64 bits, as opposed to
+	individual `long long' arguments.  See sh.h for details on the
+	actual bit pattern.  */
+	
+	.global	GLOBAL(GCC_shcompact_incoming_args)
+ 	FUNC(GLOBAL(GCC_shcompact_incoming_args))
+GLOBAL(GCC_shcompact_incoming_args):
+	ptabs/l	r18, tr0	/* Prepare to return.  */
+	shlri	r17, 32, r0	/* Load the cookie.  */
+	movi	((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
+	pt/l	LOCAL(ia_loop), tr1
+	add.l	r17, r63, r17
+	shori	((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
+LOCAL(ia_loop):
+	nsb	r0, r36
+	shlli	r36, 1, r37
+	ldx.w	r43, r37, r38
+LOCAL(ia_main_label):
+	ptrel/l	r38, tr2
+	blink	tr2, r63
+LOCAL(ia_r2_ld):	/* Store r2 and load its address.  */
+	movi	3, r38
+	shlli	r38, 29, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r2
+	add.l	r17, r63, r2
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r3_ld):	/* Store r3 and load its address.  */
+	movi	3, r38
+	shlli	r38, 26, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r3
+	add.l	r17, r63, r3
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r4_ld):	/* Store r4 and load its address.  */
+	movi	3, r38
+	shlli	r38, 23, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r4
+	add.l	r17, r63, r4
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r5_ld):	/* Store r5 and load its address.  */
+	movi	3, r38
+	shlli	r38, 20, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r5
+	add.l	r17, r63, r5
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r6_ld):	/* Store r6 and load its address.  */
+	movi	3, r38
+	shlli	r38, 16, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r6
+	add.l	r17, r63, r6
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r7_ld):	/* Store r7 and load its address.  */
+	movi	3 << 12, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r7
+	add.l	r17, r63, r7
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r8_ld):	/* Store r8 and load its address.  */
+	movi	3 << 8, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r8
+	add.l	r17, r63, r8
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r9_ld):	/* Store r9 and load its address.  */
+	stx.q	r17, r63, r9
+	add.l	r17, r63, r9
+	blink	tr0, r63
+LOCAL(ia_r2_push):	/* Push r2 onto the stack.  */
+	movi	1, r38
+	shlli	r38, 29, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r2
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_r3_push):	/* Push r3 onto the stack.  */
+	movi	1, r38
+	shlli	r38, 26, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r3
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_r4_push):	/* Push r4 onto the stack.  */
+	movi	1, r38
+	shlli	r38, 23, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r4
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_r5_push):	/* Push r5 onto the stack.  */
+	movi	1, r38
+	shlli	r38, 20, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r5
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_r6_push):	/* Push r6 onto the stack.  */
+	movi	1, r38
+	shlli	r38, 16, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r6
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_r7_push):	/* Push r7 onto the stack.  */
+	movi	1 << 12, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r7
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_r8_push):	/* Push r8 onto the stack.  */
+	movi	1 << 8, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r8
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_push_seq):	/* Push a sequence of registers onto the stack.  */
+	andi	r0, 7 << 1, r38
+	movi	(LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
+	shlli	r38, 2, r39
+	shori	LOCAL(ia_end_of_push_seq) & 65535, r40
+	sub.l	r40, r39, r41
+	ptabs/l	r41, tr2
+	blink	tr2, r63
+LOCAL(ia_stack_of_push_seq):	 /* Beginning of push sequence.  */
+	stx.q	r17, r63, r3
+	addi.l	r17, 8, r17
+	stx.q	r17, r63, r4
+	addi.l	r17, 8, r17
+	stx.q	r17, r63, r5
+	addi.l	r17, 8, r17
+	stx.q	r17, r63, r6
+	addi.l	r17, 8, r17
+	stx.q	r17, r63, r7
+	addi.l	r17, 8, r17
+	stx.q	r17, r63, r8
+	addi.l	r17, 8, r17
+LOCAL(ia_r9_push):	/* Push r9 onto the stack.  */
+	stx.q	r17, r63, r9
+LOCAL(ia_return):	/* Return.  */
+	blink	tr0, r63
+LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction.  */
+	ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
+#endif /* L_shcompact_incoming_args */
+#endif
+#if __SH5__
+#ifdef L_nested_trampoline
+#if __SH5__ == 32
+	.section	.text..SHmedia32,"ax"
+#else
+	.text
+#endif
+	.align	3 /* It is copied in units of 8 bytes in SHmedia mode.  */
+	.global	GLOBAL(GCC_nested_trampoline)
+	HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
+GLOBAL(GCC_nested_trampoline):
+	.mode	SHmedia
+	ptrel/u	r63, tr0
+	gettr	tr0, r0
+#if __SH5__ == 64
+	ld.q	r0, 24, r1
+#else
+	ld.l	r0, 24, r1
+#endif
+	ptabs/l	r1, tr1
+#if __SH5__ == 64
+	ld.q	r0, 32, r1
+#else
+	ld.l	r0, 28, r1
+#endif
+	blink	tr1, r63
+
+	ENDFUNC(GLOBAL(GCC_nested_trampoline))
+#endif /* L_nested_trampoline */
+#endif /* __SH5__ */
+#if __SH5__ == 32
+#ifdef L_push_pop_shmedia_regs
+	.section	.text..SHmedia32,"ax"
+	.mode	SHmedia
+	.align	2
+#ifndef __SH4_NOFPU__	
+	.global	GLOBAL(GCC_push_shmedia_regs)
+	FUNC(GLOBAL(GCC_push_shmedia_regs))
+GLOBAL(GCC_push_shmedia_regs):
+	addi.l	r15, -14*8, r15
+	fst.d	r15, 13*8, dr62
+	fst.d	r15, 12*8, dr60
+	fst.d	r15, 11*8, dr58
+	fst.d	r15, 10*8, dr56
+	fst.d	r15,  9*8, dr54
+	fst.d	r15,  8*8, dr52
+	fst.d	r15,  7*8, dr50
+	fst.d	r15,  6*8, dr48
+	fst.d	r15,  5*8, dr46
+	fst.d	r15,  4*8, dr44
+	fst.d	r15,  3*8, dr42
+	fst.d	r15,  2*8, dr40
+	fst.d	r15,  1*8, dr38
+	fst.d	r15,  0*8, dr36
+#else /* ! __SH4_NOFPU__ */
+	.global	GLOBAL(GCC_push_shmedia_regs_nofpu)
+	FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
+GLOBAL(GCC_push_shmedia_regs_nofpu):
+#endif /* ! __SH4_NOFPU__ */
+	ptabs/l	r18, tr0
+	addi.l	r15, -27*8, r15
+	gettr	tr7, r62
+	gettr	tr6, r61
+	gettr	tr5, r60
+	st.q	r15, 26*8, r62
+	st.q	r15, 25*8, r61
+	st.q	r15, 24*8, r60
+	st.q	r15, 23*8, r59
+	st.q	r15, 22*8, r58
+	st.q	r15, 21*8, r57
+	st.q	r15, 20*8, r56
+	st.q	r15, 19*8, r55
+	st.q	r15, 18*8, r54
+	st.q	r15, 17*8, r53
+	st.q	r15, 16*8, r52
+	st.q	r15, 15*8, r51
+	st.q	r15, 14*8, r50
+	st.q	r15, 13*8, r49
+	st.q	r15, 12*8, r48
+	st.q	r15, 11*8, r47
+	st.q	r15, 10*8, r46
+	st.q	r15,  9*8, r45
+	st.q	r15,  8*8, r44
+	st.q	r15,  7*8, r35
+	st.q	r15,  6*8, r34
+	st.q	r15,  5*8, r33
+	st.q	r15,  4*8, r32
+	st.q	r15,  3*8, r31
+	st.q	r15,  2*8, r30
+	st.q	r15,  1*8, r29
+	st.q	r15,  0*8, r28
+	blink	tr0, r63
+#ifndef __SH4_NOFPU__	
+	ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
+#else
+	ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
+#endif
+#ifndef __SH4_NOFPU__	
+	.global	GLOBAL(GCC_pop_shmedia_regs)
+	FUNC(GLOBAL(GCC_pop_shmedia_regs))
+GLOBAL(GCC_pop_shmedia_regs):
+	pt	.L0, tr1
+	movi	41*8, r0
+	fld.d	r15, 40*8, dr62
+	fld.d	r15, 39*8, dr60
+	fld.d	r15, 38*8, dr58
+	fld.d	r15, 37*8, dr56
+	fld.d	r15, 36*8, dr54
+	fld.d	r15, 35*8, dr52
+	fld.d	r15, 34*8, dr50
+	fld.d	r15, 33*8, dr48
+	fld.d	r15, 32*8, dr46
+	fld.d	r15, 31*8, dr44
+	fld.d	r15, 30*8, dr42
+	fld.d	r15, 29*8, dr40
+	fld.d	r15, 28*8, dr38
+	fld.d	r15, 27*8, dr36
+	blink	tr1, r63
+#else /* ! __SH4_NOFPU__	*/
+	.global	GLOBAL(GCC_pop_shmedia_regs_nofpu)
+	FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
+GLOBAL(GCC_pop_shmedia_regs_nofpu):
+#endif /* ! __SH4_NOFPU__	*/
+	movi	27*8, r0
+.L0:
+	ptabs	r18, tr0
+	ld.q	r15, 26*8, r62
+	ld.q	r15, 25*8, r61
+	ld.q	r15, 24*8, r60
+	ptabs	r62, tr7
+	ptabs	r61, tr6
+	ptabs	r60, tr5
+	ld.q	r15, 23*8, r59
+	ld.q	r15, 22*8, r58
+	ld.q	r15, 21*8, r57
+	ld.q	r15, 20*8, r56
+	ld.q	r15, 19*8, r55
+	ld.q	r15, 18*8, r54
+	ld.q	r15, 17*8, r53
+	ld.q	r15, 16*8, r52
+	ld.q	r15, 15*8, r51
+	ld.q	r15, 14*8, r50
+	ld.q	r15, 13*8, r49
+	ld.q	r15, 12*8, r48
+	ld.q	r15, 11*8, r47
+	ld.q	r15, 10*8, r46
+	ld.q	r15,  9*8, r45
+	ld.q	r15,  8*8, r44
+	ld.q	r15,  7*8, r35
+	ld.q	r15,  6*8, r34
+	ld.q	r15,  5*8, r33
+	ld.q	r15,  4*8, r32
+	ld.q	r15,  3*8, r31
+	ld.q	r15,  2*8, r30
+	ld.q	r15,  1*8, r29
+	ld.q	r15,  0*8, r28
+	add.l	r15, r0, r15
+	blink	tr0, r63
+
+#ifndef __SH4_NOFPU__
+	ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
+#else
+	ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
+#endif
+#endif /* __SH5__ == 32 */
+#endif /* L_push_pop_shmedia_regs */
+
+#ifdef L_div_table
+#if __SH5__
+#if defined(__pic__) && defined(__SHMEDIA__)
+	.global	GLOBAL(sdivsi3)
+	FUNC(GLOBAL(sdivsi3))
+#if __SH5__ == 32
+	.section	.text..SHmedia32,"ax"
+#else
+	.text
+#endif
+#if 0
+/* ??? FIXME: Presumably due to a linker bug, exporting data symbols
+   in a text section does not work (at least for shared libraries):
+   the linker sets the LSB of the address as if this was SHmedia code.  */
+#define TEXT_DATA_BUG
+#endif
+	.align	2
+ // inputs: r4,r5
+ // clobbered: r1,r18,r19,r20,r21,r25,tr0
+ // result in r0
+ .global GLOBAL(sdivsi3)
+GLOBAL(sdivsi3):
+#ifdef TEXT_DATA_BUG
+ ptb datalabel Local_div_table,tr0
+#else
+ ptb GLOBAL(div_table_internal),tr0
+#endif
+ nsb r5, r1
+ shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
+ shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
+ /* bubble */
+ gettr tr0,r20
+ ldx.ub r20, r21, r19 // u0.8
+ shari r25, 32, r25   // normalize to s2.30
+ shlli r21, 1, r21
+ muls.l r25, r19, r19 // s2.38
+ ldx.w r20, r21, r21  // s2.14
+  ptabs r18, tr0
+ shari r19, 24, r19   // truncate to s2.14
+ sub r21, r19, r19    // some 11 bit inverse in s1.14
+ muls.l r19, r19, r21 // u0.28
+  sub r63, r1, r1
+  addi r1, 92, r1
+ muls.l r25, r21, r18 // s2.58
+ shlli r19, 45, r19   // multiply by two and convert to s2.58
+  /* bubble */
+ sub r19, r18, r18
+ shari r18, 28, r18   // some 22 bit inverse in s1.30
+ muls.l r18, r25, r0  // s2.60
+  muls.l r18, r4, r25 // s32.30
+  /* bubble */
+ shari r0, 16, r19   // s-16.44
+ muls.l r19, r18, r19 // s-16.74
+  shari r25, 63, r0
+  shari r4, 14, r18   // s19.-14
+ shari r19, 30, r19   // s-16.44
+ muls.l r19, r18, r19 // s15.30
+  xor r21, r0, r21    // You could also use the constant 1 << 27.
+  add r21, r25, r21
+ sub r21, r19, r21
+ shard r21, r1, r21
+ sub r21, r0, r0
+ blink tr0, r63
+	ENDFUNC(GLOBAL(sdivsi3))
+/* This table has been generated by divtab.c .
+Defects for bias -330:
+   Max defect: 6.081536e-07 at -1.000000e+00
+   Min defect: 2.849516e-08 at 1.030651e+00
+   Max 2nd step defect: 9.606539e-12 at -1.000000e+00
+   Min 2nd step defect: 0.000000e+00 at 0.000000e+00
+   Defect at 1: 1.238659e-07
+   Defect at -2: 1.061708e-07 */
+#else /* ! __pic__ || ! __SHMEDIA__ */
+	.section	.rodata
+#endif /* __pic__ */
+#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
+	.balign 2
+	.type	Local_div_table,@object
+	.size	Local_div_table,128
+/* negative division constants */
+	.word	-16638
+	.word	-17135
+	.word	-17737
+	.word	-18433
+	.word	-19103
+	.word	-19751
+	.word	-20583
+	.word	-21383
+	.word	-22343
+	.word	-23353
+	.word	-24407
+	.word	-25582
+	.word	-26863
+	.word	-28382
+	.word	-29965
+	.word	-31800
+/* negative division factors */
+	.byte	66
+	.byte	70
+	.byte	75
+	.byte	81
+	.byte	87
+	.byte	93
+	.byte	101
+	.byte	109
+	.byte	119
+	.byte	130
+	.byte	142
+	.byte	156
+	.byte	172
+	.byte	192
+	.byte	214
+	.byte	241
+	.skip 16
+Local_div_table:
+	.skip 16
+/* positive division factors */
+	.byte	241
+	.byte	214
+	.byte	192
+	.byte	172
+	.byte	156
+	.byte	142
+	.byte	130
+	.byte	119
+	.byte	109
+	.byte	101
+	.byte	93
+	.byte	87
+	.byte	81
+	.byte	75
+	.byte	70
+	.byte	66
+/* positive division constants */
+	.word	31801
+	.word	29966
+	.word	28383
+	.word	26864
+	.word	25583
+	.word	24408
+	.word	23354
+	.word	22344
+	.word	21384
+	.word	20584
+	.word	19752
+	.word	19104
+	.word	18434
+	.word	17738
+	.word	17136
+	.word	16639
+	.section	.rodata
+#endif /* TEXT_DATA_BUG */
+	.balign 2
+	.type	GLOBAL(div_table),@object
+	.size	GLOBAL(div_table),128
+/* negative division constants */
+	.word	-16638
+	.word	-17135
+	.word	-17737
+	.word	-18433
+	.word	-19103
+	.word	-19751
+	.word	-20583
+	.word	-21383
+	.word	-22343
+	.word	-23353
+	.word	-24407
+	.word	-25582
+	.word	-26863
+	.word	-28382
+	.word	-29965
+	.word	-31800
+/* negative division factors */
+	.byte	66
+	.byte	70
+	.byte	75
+	.byte	81
+	.byte	87
+	.byte	93
+	.byte	101
+	.byte	109
+	.byte	119
+	.byte	130
+	.byte	142
+	.byte	156
+	.byte	172
+	.byte	192
+	.byte	214
+	.byte	241
+	.skip 16
+	.global	GLOBAL(div_table)
+GLOBAL(div_table):
+	HIDDEN_ALIAS(div_table_internal,div_table)
+	.skip 16
+/* positive division factors */
+	.byte	241
+	.byte	214
+	.byte	192
+	.byte	172
+	.byte	156
+	.byte	142
+	.byte	130
+	.byte	119
+	.byte	109
+	.byte	101
+	.byte	93
+	.byte	87
+	.byte	81
+	.byte	75
+	.byte	70
+	.byte	66
+/* positive division constants */
+	.word	31801
+	.word	29966
+	.word	28383
+	.word	26864
+	.word	25583
+	.word	24408
+	.word	23354
+	.word	22344
+	.word	21384
+	.word	20584
+	.word	19752
+	.word	19104
+	.word	18434
+	.word	17738
+	.word	17136
+	.word	16639
+
+#elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
+/* This code used shld, thus is not suitable for SH1 / SH2.  */
+
+/* Signed / unsigned division without use of FPU, optimized for SH4.
+   Uses a lookup table for divisors in the range -128 .. +128, and
+   div1 with case distinction for larger divisors in three more ranges.
+   The code is lumped together with the table to allow the use of mova.  */
+#ifdef __LITTLE_ENDIAN__
+#define L_LSB 0
+#define L_LSWMSB 1
+#define L_MSWLSB 2
+#else
+#define L_LSB 3
+#define L_LSWMSB 2
+#define L_MSWLSB 1
+#endif
+
+	.balign 4
+	.global	GLOBAL(udivsi3_i4i)
+	FUNC(GLOBAL(udivsi3_i4i))
+GLOBAL(udivsi3_i4i):
+	mov.w LOCAL(c128_w), r1
+	div0u
+	mov r4,r0
+	shlr8 r0
+	cmp/hi r1,r5
+	extu.w r5,r1
+	bf LOCAL(udiv_le128)
+	cmp/eq r5,r1
+	bf LOCAL(udiv_ge64k)
+	shlr r0
+	mov r5,r1
+	shll16 r5
+	mov.l r4,@-r15
+	div1 r5,r0
+	mov.l r1,@-r15
+	div1 r5,r0
+	div1 r5,r0
+	bra LOCAL(udiv_25)
+	div1 r5,r0
+
+LOCAL(div_le128):
+	mova LOCAL(div_table_ix),r0
+	bra LOCAL(div_le128_2)
+	mov.b @(r0,r5),r1
+LOCAL(udiv_le128):
+	mov.l r4,@-r15
+	mova LOCAL(div_table_ix),r0
+	mov.b @(r0,r5),r1
+	mov.l r5,@-r15
+LOCAL(div_le128_2):
+	mova LOCAL(div_table_inv),r0
+	mov.l @(r0,r1),r1
+	mov r5,r0
+	tst #0xfe,r0
+	mova LOCAL(div_table_clz),r0
+	dmulu.l r1,r4
+	mov.b @(r0,r5),r1
+	bt/s LOCAL(div_by_1)
+	mov r4,r0
+	mov.l @r15+,r5
+	sts mach,r0
+	/* clrt */
+	addc r4,r0
+	mov.l @r15+,r4
+	rotcr r0
+	rts
+	shld r1,r0
+
+LOCAL(div_by_1_neg):
+	neg r4,r0
+LOCAL(div_by_1):
+	mov.l @r15+,r5
+	rts
+	mov.l @r15+,r4
+
+LOCAL(div_ge64k):
+	bt/s LOCAL(div_r8)
+	div0u
+	shll8 r5
+	bra LOCAL(div_ge64k_2)
+	div1 r5,r0
+LOCAL(udiv_ge64k):
+	cmp/hi r0,r5
+	mov r5,r1
+	bt LOCAL(udiv_r8)
+	shll8 r5
+	mov.l r4,@-r15
+	div1 r5,r0
+	mov.l r1,@-r15
+LOCAL(div_ge64k_2):
+	div1 r5,r0
+	mov.l LOCAL(zero_l),r1
+	.rept 4
+	div1 r5,r0
+	.endr
+	mov.l r1,@-r15
+	div1 r5,r0
+	mov.w LOCAL(m256_w),r1
+	div1 r5,r0
+	mov.b r0,@(L_LSWMSB,r15)
+	xor r4,r0
+	and r1,r0
+	bra LOCAL(div_ge64k_end)
+	xor r4,r0
+	
+LOCAL(div_r8):
+	shll16 r4
+	bra LOCAL(div_r8_2)
+	shll8 r4
+LOCAL(udiv_r8):
+	mov.l r4,@-r15
+	shll16 r4
+	clrt
+	shll8 r4
+	mov.l r5,@-r15
+LOCAL(div_r8_2):
+	rotcl r4
+	mov r0,r1
+	div1 r5,r1
+	mov r4,r0
+	rotcl r0
+	mov r5,r4
+	div1 r5,r1
+	.rept 5
+	rotcl r0; div1 r5,r1
+	.endr
+	rotcl r0
+	mov.l @r15+,r5
+	div1 r4,r1
+	mov.l @r15+,r4
+	rts
+	rotcl r0
+
+	ENDFUNC(GLOBAL(udivsi3_i4i))
+
+	.global	GLOBAL(sdivsi3_i4i)
+	FUNC(GLOBAL(sdivsi3_i4i))
+	/* This is link-compatible with a GLOBAL(sdivsi3) call,
+	   but we effectively clobber only r1.  */
+GLOBAL(sdivsi3_i4i):
+	mov.l r4,@-r15
+	cmp/pz r5
+	mov.w LOCAL(c128_w), r1
+	bt/s LOCAL(pos_divisor)
+	cmp/pz r4
+	mov.l r5,@-r15
+	neg r5,r5
+	bt/s LOCAL(neg_result)
+	cmp/hi r1,r5
+	neg r4,r4
+LOCAL(pos_result):
+	extu.w r5,r0
+	bf LOCAL(div_le128)
+	cmp/eq r5,r0
+	mov r4,r0
+	shlr8 r0
+	bf/s LOCAL(div_ge64k)
+	cmp/hi r0,r5
+	div0u
+	shll16 r5
+	div1 r5,r0
+	div1 r5,r0
+	div1 r5,r0
+LOCAL(udiv_25):
+	mov.l LOCAL(zero_l),r1
+	div1 r5,r0
+	div1 r5,r0
+	mov.l r1,@-r15
+	.rept 3
+	div1 r5,r0
+	.endr
+	mov.b r0,@(L_MSWLSB,r15)
+	xtrct r4,r0
+	swap.w r0,r0
+	.rept 8
+	div1 r5,r0
+	.endr
+	mov.b r0,@(L_LSWMSB,r15)
+LOCAL(div_ge64k_end):
+	.rept 8
+	div1 r5,r0
+	.endr
+	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
+	extu.b r0,r0
+	mov.l @r15+,r5
+	or r4,r0
+	mov.l @r15+,r4
+	rts
+	rotcl r0
+
+LOCAL(div_le128_neg):
+	tst #0xfe,r0
+	mova LOCAL(div_table_ix),r0
+	mov.b @(r0,r5),r1
+	mova LOCAL(div_table_inv),r0
+	bt/s LOCAL(div_by_1_neg)
+	mov.l @(r0,r1),r1
+	mova LOCAL(div_table_clz),r0
+	dmulu.l r1,r4
+	mov.b @(r0,r5),r1
+	mov.l @r15+,r5
+	sts mach,r0
+	/* clrt */
+	addc r4,r0
+	mov.l @r15+,r4
+	rotcr r0
+	shld r1,r0
+	rts
+	neg r0,r0
+
+LOCAL(pos_divisor):
+	mov.l r5,@-r15
+	bt/s LOCAL(pos_result)
+	cmp/hi r1,r5
+	neg r4,r4
+LOCAL(neg_result):
+	extu.w r5,r0
+	bf LOCAL(div_le128_neg)
+	cmp/eq r5,r0
+	mov r4,r0
+	shlr8 r0
+	bf/s LOCAL(div_ge64k_neg)
+	cmp/hi r0,r5
+	div0u
+	mov.l LOCAL(zero_l),r1
+	shll16 r5
+	div1 r5,r0
+	mov.l r1,@-r15
+	.rept 7
+	div1 r5,r0
+	.endr
+	mov.b r0,@(L_MSWLSB,r15)
+	xtrct r4,r0
+	swap.w r0,r0
+	.rept 8
+	div1 r5,r0
+	.endr
+	mov.b r0,@(L_LSWMSB,r15)
+LOCAL(div_ge64k_neg_end):
+	.rept 8
+	div1 r5,r0
+	.endr
+	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
+	extu.b r0,r1
+	mov.l @r15+,r5
+	or r4,r1
+LOCAL(div_r8_neg_end):
+	mov.l @r15+,r4
+	rotcl r1
+	rts
+	neg r1,r0
+
+LOCAL(div_ge64k_neg):
+	bt/s LOCAL(div_r8_neg)
+	div0u
+	shll8 r5
+	mov.l LOCAL(zero_l),r1
+	.rept 6
+	div1 r5,r0
+	.endr
+	mov.l r1,@-r15
+	div1 r5,r0
+	mov.w LOCAL(m256_w),r1
+	div1 r5,r0
+	mov.b r0,@(L_LSWMSB,r15)
+	xor r4,r0
+	and r1,r0
+	bra LOCAL(div_ge64k_neg_end)
+	xor r4,r0
+
+LOCAL(c128_w):
+	.word 128
+
+LOCAL(div_r8_neg):
+	clrt
+	shll16 r4
+	mov r4,r1
+	shll8 r1
+	mov r5,r4
+	.rept 7
+	rotcl r1; div1 r5,r0
+	.endr
+	mov.l @r15+,r5
+	rotcl r1
+	bra LOCAL(div_r8_neg_end)
+	div1 r4,r0
+
+LOCAL(m256_w):
+	.word 0xff00
+/* This table has been generated by divtab-sh4.c.  */
+	.balign 4
+LOCAL(div_table_clz):
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	-1
+	.byte	-1
+	.byte	-2
+	.byte	-2
+	.byte	-2
+	.byte	-2
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+/* Lookup table translating positive divisor to index into table of
+   normalized inverse.  N.B. the '0' entry is also the last entry of the
+ previous table, and causes an unaligned access for division by zero.  */
+LOCAL(div_table_ix):
+	.byte	-6
+	.byte	-128
+	.byte	-128
+	.byte	0
+	.byte	-128
+	.byte	-64
+	.byte	0
+	.byte	64
+	.byte	-128
+	.byte	-96
+	.byte	-64
+	.byte	-32
+	.byte	0
+	.byte	32
+	.byte	64
+	.byte	96
+	.byte	-128
+	.byte	-112
+	.byte	-96
+	.byte	-80
+	.byte	-64
+	.byte	-48
+	.byte	-32
+	.byte	-16
+	.byte	0
+	.byte	16
+	.byte	32
+	.byte	48
+	.byte	64
+	.byte	80
+	.byte	96
+	.byte	112
+	.byte	-128
+	.byte	-120
+	.byte	-112
+	.byte	-104
+	.byte	-96
+	.byte	-88
+	.byte	-80
+	.byte	-72
+	.byte	-64
+	.byte	-56
+	.byte	-48
+	.byte	-40
+	.byte	-32
+	.byte	-24
+	.byte	-16
+	.byte	-8
+	.byte	0
+	.byte	8
+	.byte	16
+	.byte	24
+	.byte	32
+	.byte	40
+	.byte	48
+	.byte	56
+	.byte	64
+	.byte	72
+	.byte	80
+	.byte	88
+	.byte	96
+	.byte	104
+	.byte	112
+	.byte	120
+	.byte	-128
+	.byte	-124
+	.byte	-120
+	.byte	-116
+	.byte	-112
+	.byte	-108
+	.byte	-104
+	.byte	-100
+	.byte	-96
+	.byte	-92
+	.byte	-88
+	.byte	-84
+	.byte	-80
+	.byte	-76
+	.byte	-72
+	.byte	-68
+	.byte	-64
+	.byte	-60
+	.byte	-56
+	.byte	-52
+	.byte	-48
+	.byte	-44
+	.byte	-40
+	.byte	-36
+	.byte	-32
+	.byte	-28
+	.byte	-24
+	.byte	-20
+	.byte	-16
+	.byte	-12
+	.byte	-8
+	.byte	-4
+	.byte	0
+	.byte	4
+	.byte	8
+	.byte	12
+	.byte	16
+	.byte	20
+	.byte	24
+	.byte	28
+	.byte	32
+	.byte	36
+	.byte	40
+	.byte	44
+	.byte	48
+	.byte	52
+	.byte	56
+	.byte	60
+	.byte	64
+	.byte	68
+	.byte	72
+	.byte	76
+	.byte	80
+	.byte	84
+	.byte	88
+	.byte	92
+	.byte	96
+	.byte	100
+	.byte	104
+	.byte	108
+	.byte	112
+	.byte	116
+	.byte	120
+	.byte	124
+	.byte	-128
+/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */
+	.balign 4
+LOCAL(zero_l):
+	.long	0x0
+	.long	0xF81F81F9
+	.long	0xF07C1F08
+	.long	0xE9131AC0
+	.long	0xE1E1E1E2
+	.long	0xDAE6076C
+	.long	0xD41D41D5
+	.long	0xCD856891
+	.long	0xC71C71C8
+	.long	0xC0E07039
+	.long	0xBACF914D
+	.long	0xB4E81B4F
+	.long	0xAF286BCB
+	.long	0xA98EF607
+	.long	0xA41A41A5
+	.long	0x9EC8E952
+	.long	0x9999999A
+	.long	0x948B0FCE
+	.long	0x8F9C18FA
+	.long	0x8ACB90F7
+	.long	0x86186187
+	.long	0x81818182
+	.long	0x7D05F418
+	.long	0x78A4C818
+	.long	0x745D1746
+	.long	0x702E05C1
+	.long	0x6C16C16D
+	.long	0x68168169
+	.long	0x642C8591
+	.long	0x60581606
+	.long	0x5C9882BA
+	.long	0x58ED2309
+LOCAL(div_table_inv):
+	.long	0x55555556
+	.long	0x51D07EAF
+	.long	0x4E5E0A73
+	.long	0x4AFD6A06
+	.long	0x47AE147B
+	.long	0x446F8657
+	.long	0x41414142
+	.long	0x3E22CBCF
+	.long	0x3B13B13C
+	.long	0x38138139
+	.long	0x3521CFB3
+	.long	0x323E34A3
+	.long	0x2F684BDB
+	.long	0x2C9FB4D9
+	.long	0x29E4129F
+	.long	0x27350B89
+	.long	0x24924925
+	.long	0x21FB7813
+	.long	0x1F7047DD
+	.long	0x1CF06ADB
+	.long	0x1A7B9612
+	.long	0x18118119
+	.long	0x15B1E5F8
+	.long	0x135C8114
+	.long	0x11111112
+	.long	0xECF56BF
+	.long	0xC9714FC
+	.long	0xA6810A7
+	.long	0x8421085
+	.long	0x624DD30
+	.long	0x4104105
+	.long	0x2040811
+	/* maximum error: 0.987342 scaled: 0.921875*/
+
+	ENDFUNC(GLOBAL(sdivsi3_i4i))
+#endif /* SH3 / SH4 */
+
+#endif /* L_div_table */
+
+#ifdef L_udiv_qrnnd_16
+#if !__SHMEDIA__
+	HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
+	/* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
+	/* n1 < d, but n1 might be larger than d1.  */
+	.global GLOBAL(udiv_qrnnd_16)
+	.balign 8
+GLOBAL(udiv_qrnnd_16):
+	div0u
+	cmp/hi r6,r0
+	bt .Lots
+	.rept 16
+	div1 r6,r0 
+	.endr
+	extu.w r0,r1
+	bt 0f
+	add r6,r0
+0:	rotcl r1
+	mulu.w r1,r5
+	xtrct r4,r0
+	swap.w r0,r0
+	sts macl,r2
+	cmp/hs r2,r0
+	sub r2,r0
+	bt 0f
+	addc r5,r0
+	add #-1,r1
+	bt 0f
+1:	add #-1,r1
+	rts
+	add r5,r0
+	.balign 8
+.Lots:
+	sub r5,r0
+	swap.w r4,r1
+	xtrct r0,r1
+	clrt
+	mov r1,r0
+	addc r5,r0
+	mov #-1,r1
+	SL1(bf, 1b,
+	shlr16 r1)
+0:	rts
+	nop
+	ENDFUNC(GLOBAL(udiv_qrnnd_16))
+#endif /* !__SHMEDIA__ */
+#endif /* L_udiv_qrnnd_16 */
diff --git a/libgcc/config/sh/lib1funcs.h b/libgcc/config/sh/lib1funcs.h
new file mode 100644
index 00000000000..af4b41cc314
--- /dev/null
+++ b/libgcc/config/sh/lib1funcs.h
@@ -0,0 +1,76 @@
+/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+   2004, 2005, 2006, 2009
+   Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef __ELF__
+#define LOCAL(X)	.L_##X
+#define FUNC(X)		.type X,@function
+#define HIDDEN_FUNC(X)	FUNC(X); .hidden X
+#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X)
+#define ENDFUNC0(X)	.Lfe_##X: .size X,.Lfe_##X-X
+#define ENDFUNC(X)	ENDFUNC0(X)
+#else
+#define LOCAL(X)	L_##X
+#define FUNC(X)
+#define HIDDEN_FUNC(X)
+#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y)
+#define ENDFUNC(X)
+#endif
+
+#define	CONCAT(A,B)	A##B
+#define	GLOBAL0(U,X)	CONCAT(U,__##X)
+#define	GLOBAL(X)	GLOBAL0(__USER_LABEL_PREFIX__,X)
+
+#define ALIAS(X,Y)	.global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y)
+
+#if defined __SH2A__ && defined __FMOVD_ENABLED__
+#undef  FMOVD_WORKS
+#define FMOVD_WORKS
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define DR00 fr1
+#define DR01 fr0
+#define DR20 fr3
+#define DR21 fr2
+#define DR40 fr5
+#define DR41 fr4
+#else /* !__LITTLE_ENDIAN__ */
+#define DR00 fr0
+#define DR01 fr1
+#define DR20 fr2
+#define DR21 fr3
+#define DR40 fr4
+#define DR41 fr5
+#endif /* !__LITTLE_ENDIAN__ */
+
+#ifdef __sh1__
+#define SL(branch, dest, in_slot, in_slot_arg2) \
+	in_slot, in_slot_arg2; branch dest
+#define SL1(branch, dest, in_slot) \
+	in_slot; branch dest
+#else /* ! __sh1__ */
+#define SL(branch, dest, in_slot, in_slot_arg2) \
+	branch##.s dest; in_slot, in_slot_arg2
+#define SL1(branch, dest, in_slot) \
+	branch##/s dest; in_slot
+#endif /* !__sh1__ */
diff --git a/libgcc/config/sh/t-linux b/libgcc/config/sh/t-linux
index af618e260c6..9b1feacd1f3 100644
--- a/libgcc/config/sh/t-linux
+++ b/libgcc/config/sh/t-linux
@@ -1,3 +1,5 @@
+LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array
+
 HOST_LIBGCC2_CFLAGS = -fpic -mieee -DNO_FPSCR_VALUES
 
 # Override t-slibgcc-elf-ver to export some libgcc symbols with
diff --git a/libgcc/config/sh/t-netbsd b/libgcc/config/sh/t-netbsd
new file mode 100644
index 00000000000..663edbf4187
--- /dev/null
+++ b/libgcc/config/sh/t-netbsd
@@ -0,0 +1 @@
+LIB1ASMFUNCS_CACHE = _ic_invalidate
diff --git a/libgcc/config/sh/t-sh b/libgcc/config/sh/t-sh
index ab4d98089b1..2319adbef1d 100644
--- a/libgcc/config/sh/t-sh
+++ b/libgcc/config/sh/t-sh
@@ -17,26 +17,33 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
+LIB1ASMSRC = sh/lib1funcs.S
+LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movmem \
+  _movmem_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
+  _div_table _udiv_qrnnd_16 \
+  $(LIB1ASMFUNCS_CACHE)
+LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array
+
 crt1.o: $(srcdir)/config/sh/crt1.S
 	$(gcc_compile) -c $<
 
-ic_invalidate_array_4-100.o: $(gcc_srcdir)/config/sh/lib1funcs.asm
+ic_invalidate_array_4-100.o: $(srcdir)/config/sh/lib1funcs.S
 	$(gcc_compile) -c -DL_ic_invalidate_array -DWAYS=1 -DWAY_SIZE=0x2000 $<
 libic_invalidate_array_4-100.a: ic_invalidate_array_4-100.o
 	$(AR_CREATE_FOR_TARGET) $@ $<
 
-ic_invalidate_array_4-200.o: $(gcc_srcdir)/config/sh/lib1funcs.asm
+ic_invalidate_array_4-200.o: $(srcdir)/config/sh/lib1funcs.S
 	$(gcc_compile) -c -DL_ic_invalidate_array -DWAYS=2 -DWAY_SIZE=0x2000 $<
 libic_invalidate_array_4-200.a: ic_invalidate_array_4-200.o
 	$(AR_CREATE_FOR_TARGET) $@ $<
 
-ic_invalidate_array_4a.o: $(gcc_srcdir)/config/sh/lib1funcs.asm
+ic_invalidate_array_4a.o: $(srcdir)/config/sh/lib1funcs.S
 	$(gcc_compile) -c -DL_ic_invalidate_array -D__FORCE_SH4A__ $<
 libic_invalidate_array_4a.a: ic_invalidate_array_4a.o
 	$(AR_CREATE_FOR_TARGET) $@ $<
 
 sdivsi3_i4i-Os-4-200.o: $(srcdir)/config/sh/lib1funcs-Os-4-200.S
-	$(gcc_compile) -c -DL_sdivsi3_i4i $<
+	$(compile) -c -DL_sdivsi3_i4i $<
 udivsi3_i4i-Os-4-200.o: $(srcdir)/config/sh/lib1funcs-Os-4-200.S
 	$(gcc_compile) -c -DL_udivsi3_i4i $<
 unwind-dw2-Os-4-200.o: $(gcc_srcdir)/unwind-dw2.c
diff --git a/libgcc/config/sh/t-sh64 b/libgcc/config/sh/t-sh64
new file mode 100644
index 00000000000..fa9950e03b2
--- /dev/null
+++ b/libgcc/config/sh/t-sh64
@@ -0,0 +1,6 @@
+LIB1ASMFUNCS = \
+  _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
+  _shcompact_call_trampoline _shcompact_return_trampoline \
+  _shcompact_incoming_args _ic_invalidate _nested_trampoline \
+  _push_pop_shmedia_regs \
+  _udivdi3 _divdi3 _umoddi3 _moddi3 _div_table
diff --git a/libgcc/config/sparc/lb1spc.S b/libgcc/config/sparc/lb1spc.S
new file mode 100644
index 00000000000..b60bd5740e7
--- /dev/null
+++ b/libgcc/config/sparc/lb1spc.S
@@ -0,0 +1,784 @@
+/* This is an assembly language implementation of mulsi3, divsi3, and modsi3
+   for the sparc processor.
+
+   These routines are derived from the SPARC Architecture Manual, version 8,
+   slightly edited to match the desired calling convention, and also to
+   optimize them for our purposes.  */
+
+#ifdef L_mulsi3
+.text
+	.align 4
+	.global .umul
+	.proc 4
+.umul:
+	or	%o0, %o1, %o4	! logical or of multiplier and multiplicand
+	mov	%o0, %y		! multiplier to Y register
+	andncc	%o4, 0xfff, %o5	! mask out lower 12 bits
+	be	mul_shortway	! can do it the short way
+	andcc	%g0, %g0, %o4	! zero the partial product and clear NV cc
+	!
+	! long multiply
+	!
+	mulscc	%o4, %o1, %o4	! first iteration of 33
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4	! 32nd iteration
+	mulscc	%o4, %g0, %o4	! last iteration only shifts
+	! the upper 32 bits of product are wrong, but we do not care
+	retl
+	rd	%y, %o0
+	!
+	! short multiply
+	!
+mul_shortway:
+	mulscc	%o4, %o1, %o4	! first iteration of 13
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4	! 12th iteration
+	mulscc	%o4, %g0, %o4	! last iteration only shifts
+	rd	%y, %o5
+	sll	%o4, 12, %o4	! left shift partial product by 12 bits
+	srl	%o5, 20, %o5	! right shift partial product by 20 bits
+	retl
+	or	%o5, %o4, %o0	! merge for true product
+#endif
+
+#ifdef L_divsi3
+/*
+ * Division and remainder, from Appendix E of the SPARC Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  .div	name of function to generate
+ *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1
+ *  true		true=true => signed; true=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N		how many bits per iteration we try to get (4)
+ *  WORDSIZE	total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS	number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q		the partial quotient under development (initially 0)
+ *  R		the remainder so far, initially the dividend
+ *  ITER	number of main division loop iterations required;
+ *		equal to ceil(log2(quotient) / N).  Note that this
+ *		is the log base (2^N) of the quotient.
+ *  V		the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+        .global .udiv
+        .align 4
+        .proc 4
+        .text
+.udiv:
+         b ready_to_divide
+         mov 0, %g3             ! result is always positive
+
+        .global .div
+        .align 4
+        .proc 4
+        .text
+.div:
+	! compute sign of result; if neither is negative, no problem
+	orcc	%o1, %o0, %g0	! either negative?
+	bge	ready_to_divide	! no, go do the divide
+	xor	%o1, %o0, %g3	! compute sign in any case
+	tst	%o1
+	bge	1f
+	tst	%o0
+	! %o1 is definitely negative; %o0 might also be negative
+	bge	ready_to_divide	! if %o0 not negative...
+	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
+1:	! %o0 is negative, %o1 is nonnegative
+	sub	%g0, %o0, %o0	! make %o0 nonnegative
+
+
+ready_to_divide:
+
+	! Ready to divide.  Compute size of quotient; scale comparand.
+	orcc	%o1, %g0, %o5
+	bne	1f
+	mov	%o0, %o3
+
+	! Divide by zero trap.  If it returns, return 0 (about as
+	! wrong as possible, but that is what SunOS does...).
+	ta	0x2    		! ST_DIV0
+	retl
+	clr	%o0
+
+1:
+	cmp	%o3, %o5		! if %o1 exceeds %o0, done
+	blu	got_result		! (and algorithm fails otherwise)
+	clr	%o2
+	sethi	%hi(1 << (32 - 4 - 1)), %g1
+	cmp	%o3, %g1
+	blu	not_really_big
+	clr	%o4
+
+	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+	! as our usual N-at-a-shot divide step will cause overflow and havoc.
+	! The number of bits in the result here is N*ITER+SC, where SC <= N.
+	! Compute ITER in an unorthodox manner: know we need to shift V into
+	! the top decade: so do not even bother to compare to R.
+	1:
+		cmp	%o5, %g1
+		bgeu	3f
+		mov	1, %g2
+		sll	%o5, 4, %o5
+		b	1b
+		add	%o4, 1, %o4
+
+	! Now compute %g2.
+	2:	addcc	%o5, %o5, %o5
+		bcc	not_too_big
+		add	%g2, 1, %g2
+
+		! We get here if the %o1 overflowed while shifting.
+		! This means that %o3 has the high-order bit set.
+		! Restore %o5 and subtract from %o3.
+		sll	%g1, 4, %g1	! high order bit
+		srl	%o5, 1, %o5	! rest of %o5
+		add	%o5, %g1, %o5
+		b	do_single_div
+		sub	%g2, 1, %g2
+
+	not_too_big:
+	3:	cmp	%o5, %o3
+		blu	2b
+		nop
+		be	do_single_div
+		nop
+	/* NB: these are commented out in the V8-SPARC manual as well */
+	/* (I do not understand this) */
+	! %o5 > %o3: went too far: back up 1 step
+	!	srl	%o5, 1, %o5
+	!	dec	%g2
+	! do single-bit divide steps
+	!
+	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+	! first divide step without thinking.  BUT, the others are conditional,
+	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+	! order bit set in the first step, just falling into the regular
+	! division loop will mess up the first time around.
+	! So we unroll slightly...
+	do_single_div:
+		subcc	%g2, 1, %g2
+		bl	end_regular_divide
+		nop
+		sub	%o3, %o5, %o3
+		mov	1, %o2
+		b	end_single_divloop
+		nop
+	single_divloop:
+		sll	%o2, 1, %o2
+		bl	1f
+		srl	%o5, 1, %o5
+		! %o3 >= 0
+		sub	%o3, %o5, %o3
+		b	2f
+		add	%o2, 1, %o2
+	1:	! %o3 < 0
+		add	%o3, %o5, %o3
+		sub	%o2, 1, %o2
+	2:
+	end_single_divloop:
+		subcc	%g2, 1, %g2
+		bge	single_divloop
+		tst	%o3
+		b,a	end_regular_divide
+
+not_really_big:
+1:
+	sll	%o5, 4, %o5
+	cmp	%o5, %o3
+	bleu	1b
+	addcc	%o4, 1, %o4
+	be	got_result
+	sub	%o4, 1, %o4
+
+	tst	%o3	! set up for initial iteration
+divloop:
+	sll	%o2, 4, %o2
+	! depth 1, accumulated bits 0
+	bl	L1.16
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 2, accumulated bits 1
+	bl	L2.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 3, accumulated bits 3
+	bl	L3.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 4, accumulated bits 7
+	bl	L4.23
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (7*2+1), %o2
+	
+L4.23:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (7*2-1), %o2
+	
+	
+L3.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 4, accumulated bits 5
+	bl	L4.21
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (5*2+1), %o2
+	
+L4.21:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (5*2-1), %o2
+	
+L2.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 3, accumulated bits 1
+	bl	L3.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 4, accumulated bits 3
+	bl	L4.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (3*2+1), %o2
+	
+L4.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (3*2-1), %o2
+
+L3.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 4, accumulated bits 1
+	bl	L4.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (1*2+1), %o2
+
+L4.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (1*2-1), %o2
+	
+L1.16:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 2, accumulated bits -1
+	bl	L2.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 3, accumulated bits -1
+	bl	L3.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 4, accumulated bits -1
+	bl	L4.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-1*2+1), %o2
+	
+L4.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-1*2-1), %o2
+	
+L3.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 4, accumulated bits -3
+	bl	L4.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-3*2+1), %o2
+	
+L4.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-3*2-1), %o2
+	
+L2.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 3, accumulated bits -3
+	bl	L3.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 4, accumulated bits -5
+	bl	L4.11
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-5*2+1), %o2
+	
+L4.11:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-5*2-1), %o2
+	
+L3.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 4, accumulated bits -7
+	bl	L4.9
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-7*2+1), %o2
+
+L4.9:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-7*2-1), %o2
+	
+	9:
+end_regular_divide:
+	subcc	%o4, 1, %o4
+	bge	divloop
+	tst	%o3
+	bl,a	got_result
+	! non-restoring fixup here (one instruction only!)
+	sub	%o2, 1, %o2
+
+
+got_result:
+	! check to see if answer should be < 0
+	tst	%g3
+	bl,a	1f
+	sub %g0, %o2, %o2
+1:
+	retl
+	mov %o2, %o0
+#endif
+
+#ifdef L_modsi3
+/* This implementation was taken from glibc:
+ *
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * Algorithm parameters:
+ *  N		how many bits per iteration we try to get (4)
+ *  WORDSIZE	total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS	number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q		the partial quotient under development (initially 0)
+ *  R		the remainder so far, initially the dividend
+ *  ITER	number of main division loop iterations required;
+ *		equal to ceil(log2(quotient) / N).  Note that this
+ *		is the log base (2^N) of the quotient.
+ *  V		the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+.text
+	.align 4
+	.global	.urem
+	.proc 4
+.urem:
+	b	divide
+	mov	0, %g3		! result always positive
+
+        .align 4
+	.global .rem
+	.proc 4
+.rem:
+	! compute sign of result; if neither is negative, no problem
+	orcc	%o1, %o0, %g0	! either negative?
+	bge	2f			! no, go do the divide
+	mov	%o0, %g3		! sign of remainder matches %o0
+	tst	%o1
+	bge	1f
+	tst	%o0
+	! %o1 is definitely negative; %o0 might also be negative
+	bge	2f			! if %o0 not negative...
+	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
+1:	! %o0 is negative, %o1 is nonnegative
+	sub	%g0, %o0, %o0	! make %o0 nonnegative
+2:
+
+	! Ready to divide.  Compute size of quotient; scale comparand.
+divide:
+	orcc	%o1, %g0, %o5
+	bne	1f
+	mov	%o0, %o3
+
+		! Divide by zero trap.  If it returns, return 0 (about as
+		! wrong as possible, but that is what SunOS does...).
+		ta	0x2   !ST_DIV0
+		retl
+		clr	%o0
+
+1:
+	cmp	%o3, %o5		! if %o1 exceeds %o0, done
+	blu	got_result		! (and algorithm fails otherwise)
+	clr	%o2
+	sethi	%hi(1 << (32 - 4 - 1)), %g1
+	cmp	%o3, %g1
+	blu	not_really_big
+	clr	%o4
+
+	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+	! as our usual N-at-a-shot divide step will cause overflow and havoc.
+	! The number of bits in the result here is N*ITER+SC, where SC <= N.
+	! Compute ITER in an unorthodox manner: know we need to shift V into
+	! the top decade: so do not even bother to compare to R.
+	1:
+		cmp	%o5, %g1
+		bgeu	3f
+		mov	1, %g2
+		sll	%o5, 4, %o5
+		b	1b
+		add	%o4, 1, %o4
+
+	! Now compute %g2.
+	2:	addcc	%o5, %o5, %o5
+		bcc	not_too_big
+		add	%g2, 1, %g2
+
+		! We get here if the %o1 overflowed while shifting.
+		! This means that %o3 has the high-order bit set.
+		! Restore %o5 and subtract from %o3.
+		sll	%g1, 4, %g1	! high order bit
+		srl	%o5, 1, %o5		! rest of %o5
+		add	%o5, %g1, %o5
+		b	do_single_div
+		sub	%g2, 1, %g2
+
+	not_too_big:
+	3:	cmp	%o5, %o3
+		blu	2b
+		nop
+		be	do_single_div
+		nop
+	/* NB: these are commented out in the V8-SPARC manual as well */
+	/* (I do not understand this) */
+	! %o5 > %o3: went too far: back up 1 step
+	!	srl	%o5, 1, %o5
+	!	dec	%g2
+	! do single-bit divide steps
+	!
+	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+	! first divide step without thinking.  BUT, the others are conditional,
+	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+	! order bit set in the first step, just falling into the regular
+	! division loop will mess up the first time around.
+	! So we unroll slightly...
+	do_single_div:
+		subcc	%g2, 1, %g2
+		bl	end_regular_divide
+		nop
+		sub	%o3, %o5, %o3
+		mov	1, %o2
+		b	end_single_divloop
+		nop
+	single_divloop:
+		sll	%o2, 1, %o2
+		bl	1f
+		srl	%o5, 1, %o5
+		! %o3 >= 0
+		sub	%o3, %o5, %o3
+		b	2f
+		add	%o2, 1, %o2
+	1:	! %o3 < 0
+		add	%o3, %o5, %o3
+		sub	%o2, 1, %o2
+	2:
+	end_single_divloop:
+		subcc	%g2, 1, %g2
+		bge	single_divloop
+		tst	%o3
+		b,a	end_regular_divide
+
+not_really_big:
+1:
+	sll	%o5, 4, %o5
+	cmp	%o5, %o3
+	bleu	1b
+	addcc	%o4, 1, %o4
+	be	got_result
+	sub	%o4, 1, %o4
+
+	tst	%o3	! set up for initial iteration
+divloop:
+	sll	%o2, 4, %o2
+		! depth 1, accumulated bits 0
+	bl	L1.16
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 2, accumulated bits 1
+	bl	L2.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 3, accumulated bits 3
+	bl	L3.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 4, accumulated bits 7
+	bl	L4.23
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (7*2+1), %o2
+L4.23:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (7*2-1), %o2
+	
+L3.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 4, accumulated bits 5
+	bl	L4.21
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (5*2+1), %o2
+	
+L4.21:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (5*2-1), %o2
+	
+L2.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 3, accumulated bits 1
+	bl	L3.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 4, accumulated bits 3
+	bl	L4.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (3*2+1), %o2
+	
+L4.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (3*2-1), %o2
+	
+L3.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 4, accumulated bits 1
+	bl	L4.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (1*2+1), %o2
+	
+L4.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (1*2-1), %o2
+	
+L1.16:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 2, accumulated bits -1
+	bl	L2.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 3, accumulated bits -1
+	bl	L3.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 4, accumulated bits -1
+	bl	L4.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-1*2+1), %o2
+	
+L4.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-1*2-1), %o2
+	
+L3.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 4, accumulated bits -3
+	bl	L4.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-3*2+1), %o2
+	
+L4.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-3*2-1), %o2
+	
+L2.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 3, accumulated bits -3
+	bl	L3.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 4, accumulated bits -5
+	bl	L4.11
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-5*2+1), %o2
+	
+L4.11:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-5*2-1), %o2
+	
+L3.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 4, accumulated bits -7
+	bl	L4.9
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-7*2+1), %o2
+	
+L4.9:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-7*2-1), %o2
+	
+	9:
+end_regular_divide:
+	subcc	%o4, 1, %o4
+	bge	divloop
+	tst	%o3
+	bl,a	got_result
+	! non-restoring fixup here (one instruction only!)
+	add	%o3, %o1, %o3
+
+got_result:
+	! check to see if answer should be < 0
+	tst	%g3
+	bl,a	1f
+	sub %g0, %o3, %o3
+1:
+	retl
+	mov %o3, %o0
+
+#endif
+
diff --git a/libgcc/config/sparc/t-softmul b/libgcc/config/sparc/t-softmul
index 49faae47c53..7142200600f 100644
--- a/libgcc/config/sparc/t-softmul
+++ b/libgcc/config/sparc/t-softmul
@@ -1,2 +1,2 @@
-LIB1ASMSRC = sparc/lb1spc.asm
+LIB1ASMSRC = sparc/lb1spc.S
 LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3
diff --git a/libgcc/config/v850/lib1funcs.S b/libgcc/config/v850/lib1funcs.S
new file mode 100644
index 00000000000..04e9b1e0ad4
--- /dev/null
+++ b/libgcc/config/v850/lib1funcs.S
@@ -0,0 +1,2330 @@
+/* libgcc routines for NEC V850.
+   Copyright (C) 1996, 1997, 2002, 2005, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef L_mulsi3
+	.text
+	.globl ___mulsi3
+	.type  ___mulsi3,@function
+___mulsi3:
+#ifdef __v850__	
+/*
+   #define SHIFT 12
+   #define MASK ((1 << SHIFT) - 1)
+    
+   #define STEP(i, j)                               \
+   ({                                               \
+       short a_part = (a >> (i)) & MASK;            \
+       short b_part = (b >> (j)) & MASK;            \
+       int res = (((int) a_part) * ((int) b_part)); \
+       res;                                         \
+   })
+  
+   int
+   __mulsi3 (unsigned a, unsigned b)
+   {
+      return STEP (0, 0) +
+          ((STEP (SHIFT, 0) + STEP (0, SHIFT)) << SHIFT) +
+          ((STEP (0, 2 * SHIFT) + STEP (SHIFT, SHIFT) + STEP (2 * SHIFT, 0))
+           << (2 * SHIFT));
+   }
+*/
+        mov   r6, r14
+        movea lo(32767), r0, r10
+        and   r10, r14
+        mov   r7,  r15
+        and   r10, r15
+        shr   15,  r6
+        mov   r6,  r13
+        and   r10, r13
+        shr   15,  r7
+        mov   r7,  r12
+        and   r10, r12
+        shr   15,  r6
+        shr   15,  r7
+        mov   r14, r10
+        mulh  r15, r10
+        mov   r14, r11
+        mulh  r12, r11
+        mov   r13, r16
+        mulh  r15, r16
+        mulh  r14, r7
+        mulh  r15, r6
+        add   r16, r11
+        mulh  r13, r12
+        shl   15,  r11
+        add   r11, r10
+        add   r12, r7
+        add   r6,  r7
+        shl   30,  r7
+        add   r7,  r10
+        jmp   [r31]
+#endif /* __v850__ */
+#if defined(__v850e__) || defined(__v850ea__) || defined(__v850e2__) || defined(__v850e2v3__)
+        /* This routine is almost unneccesarry because gcc
+           generates the MUL instruction for the RTX mulsi3.
+           But if someone wants to link his application with
+           previsously compiled v850 objects then they will 
+	   need this function.  */
+ 
+        /* It isn't good to put the inst sequence as below;
+              mul r7, r6,
+              mov r6, r10, r0
+           In this case, there is a RAW hazard between them.
+           MUL inst takes 2 cycle in EX stage, then MOV inst
+           must wait 1cycle.  */
+        mov   r7, r10
+        mul   r6, r10, r0
+        jmp   [r31]
+#endif /* __v850e__ */
+	.size ___mulsi3,.-___mulsi3
+#endif /* L_mulsi3 */
+
+
+#ifdef L_udivsi3
+	.text
+	.global ___udivsi3
+	.type	___udivsi3,@function
+___udivsi3:
+#ifdef __v850__
+	mov 1,r12
+	mov 0,r10
+	cmp r6,r7
+	bnl .L12
+	movhi hi(-2147483648),r0,r13
+	cmp r0,r7
+	blt .L12
+.L4:
+	shl 1,r7
+	shl 1,r12
+	cmp r6,r7
+	bnl .L12
+	cmp r0,r12
+	be .L8
+	mov r7,r19
+	and r13,r19
+	be .L4
+	br .L12
+.L9:
+	cmp r7,r6
+	bl .L10
+	sub r7,r6
+	or r12,r10
+.L10:
+	shr 1,r12
+	shr 1,r7
+.L12:
+	cmp r0,r12
+	bne .L9
+.L8:
+	jmp [r31]
+
+#else /* defined(__v850e__) */
+
+	/* See comments at end of __mulsi3.  */
+	mov   r6, r10	
+	divu  r7, r10, r0
+	jmp   [r31]		
+
+#endif /* __v850e__ */
+
+	.size ___udivsi3,.-___udivsi3
+#endif
+
+#ifdef L_divsi3
+	.text
+	.globl ___divsi3
+	.type  ___divsi3,@function
+___divsi3:
+#ifdef __v850__
+	add -8,sp
+	st.w r31,4[sp]
+	st.w r22,0[sp]
+	mov 1,r22
+	tst r7,r7
+	bp .L3
+	subr r0,r7
+	subr r0,r22
+.L3:
+	tst r6,r6
+	bp .L4
+	subr r0,r6
+	subr r0,r22
+.L4:
+	jarl ___udivsi3,r31
+	cmp r0,r22
+	bp .L7
+	subr r0,r10
+.L7:
+	ld.w 0[sp],r22
+	ld.w 4[sp],r31
+	add 8,sp
+	jmp [r31]
+
+#else /* defined(__v850e__) */
+
+	/* See comments at end of __mulsi3.  */
+	mov   r6, r10
+	div   r7, r10, r0
+	jmp   [r31]
+
+#endif /* __v850e__ */
+
+	.size ___divsi3,.-___divsi3
+#endif
+
+#ifdef  L_umodsi3
+	.text
+	.globl ___umodsi3
+	.type  ___umodsi3,@function
+___umodsi3:
+#ifdef __v850__
+	add -12,sp
+	st.w r31,8[sp]
+	st.w r7,4[sp]
+	st.w r6,0[sp]
+	jarl ___udivsi3,r31
+	ld.w 4[sp],r7
+	mov r10,r6
+	jarl ___mulsi3,r31
+	ld.w 0[sp],r6
+	subr r6,r10
+	ld.w 8[sp],r31
+	add 12,sp
+	jmp [r31]
+
+#else /* defined(__v850e__) */
+
+	/* See comments at end of __mulsi3.  */
+	divu  r7, r6, r10
+	jmp   [r31]
+
+#endif /* __v850e__ */
+
+	.size ___umodsi3,.-___umodsi3
+#endif /* L_umodsi3 */
+
+#ifdef  L_modsi3
+	.text
+	.globl ___modsi3
+	.type  ___modsi3,@function
+___modsi3:
+#ifdef __v850__	
+	add -12,sp
+	st.w r31,8[sp]
+	st.w r7,4[sp]
+	st.w r6,0[sp]
+	jarl ___divsi3,r31
+	ld.w 4[sp],r7
+	mov r10,r6
+	jarl ___mulsi3,r31
+	ld.w 0[sp],r6
+	subr r6,r10
+	ld.w 8[sp],r31
+	add 12,sp
+	jmp [r31]
+
+#else /* defined(__v850e__) */
+
+	/* See comments at end of __mulsi3.  */
+	div  r7, r6, r10
+	jmp [r31]
+
+#endif /* __v850e__ */
+
+	.size ___modsi3,.-___modsi3
+#endif /* L_modsi3 */
+
+#ifdef	L_save_2
+	.text
+	.align	2
+	.globl	__save_r2_r29
+	.type	__save_r2_r29,@function
+	/* Allocate space and save registers 2, 20 .. 29 on the stack.  */
+	/* Called via:	jalr __save_r2_r29,r10.  */
+__save_r2_r29:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-44,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	sst.w	r22,28[ep]
+	sst.w	r21,32[ep]
+	sst.w	r20,36[ep]
+	sst.w	r2,40[ep]
+	mov	r1,ep
+#else
+	addi	-44,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+	st.w	r22,28[sp]
+	st.w	r21,32[sp]
+	st.w	r20,36[sp]
+	st.w	r2,40[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r2_r29,.-__save_r2_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r2_r29.  */
+	.align	2
+	.globl	__return_r2_r29
+	.type	__return_r2_r29,@function
+__return_r2_r29:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	sld.w	28[ep],r22
+	sld.w	32[ep],r21
+	sld.w	36[ep],r20
+	sld.w	40[ep],r2
+	addi	44,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r23
+	ld.w	28[sp],r22
+	ld.w	32[sp],r21
+	ld.w	36[sp],r20
+	ld.w	40[sp],r2
+	addi	44,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r2_r29,.-__return_r2_r29
+#endif /* L_save_2 */
+
+#ifdef	L_save_20
+	.text
+	.align	2
+	.globl	__save_r20_r29
+	.type	__save_r20_r29,@function
+	/* Allocate space and save registers 20 .. 29 on the stack.  */
+	/* Called via:	jalr __save_r20_r29,r10.  */
+__save_r20_r29:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-40,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	sst.w	r22,28[ep]
+	sst.w	r21,32[ep]
+	sst.w	r20,36[ep]
+	mov	r1,ep
+#else
+	addi	-40,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+	st.w	r22,28[sp]
+	st.w	r21,32[sp]
+	st.w	r20,36[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r20_r29,.-__save_r20_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r20_r29.  */
+	.align	2
+	.globl	__return_r20_r29
+	.type	__return_r20_r29,@function
+__return_r20_r29:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	sld.w	28[ep],r22
+	sld.w	32[ep],r21
+	sld.w	36[ep],r20
+	addi	40,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r23
+	ld.w	28[sp],r22
+	ld.w	32[sp],r21
+	ld.w	36[sp],r20
+	addi	40,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r20_r29,.-__return_r20_r29
+#endif /* L_save_20 */
+
+#ifdef	L_save_21
+	.text
+	.align	2
+	.globl	__save_r21_r29
+	.type	__save_r21_r29,@function
+	/* Allocate space and save registers 21 .. 29 on the stack.  */
+	/* Called via:	jalr __save_r21_r29,r10.  */
+__save_r21_r29:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-36,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	sst.w	r22,28[ep]
+	sst.w	r21,32[ep]
+	mov	r1,ep
+#else
+	addi	-36,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+	st.w	r22,28[sp]
+	st.w	r21,32[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r21_r29,.-__save_r21_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r21_r29.  */
+	.align	2
+	.globl	__return_r21_r29
+	.type	__return_r21_r29,@function
+__return_r21_r29:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	sld.w	28[ep],r22
+	sld.w	32[ep],r21
+	addi	36,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r23
+	ld.w	28[sp],r22
+	ld.w	32[sp],r21
+	addi	36,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r21_r29,.-__return_r21_r29
+#endif /* L_save_21 */
+
+#ifdef	L_save_22
+	.text
+	.align	2
+	.globl	__save_r22_r29
+	.type	__save_r22_r29,@function
+	/* Allocate space and save registers 22 .. 29 on the stack.  */
+	/* Called via:	jalr __save_r22_r29,r10.  */
+__save_r22_r29:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-32,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	sst.w	r22,28[ep]
+	mov	r1,ep
+#else
+	addi	-32,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+	st.w	r22,28[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r22_r29,.-__save_r22_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r22_r29.  */
+	.align	2
+	.globl	__return_r22_r29
+	.type	__return_r22_r29,@function
+__return_r22_r29:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	sld.w	28[ep],r22
+	addi	32,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r23
+	ld.w	28[sp],r22
+	addi	32,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r22_r29,.-__return_r22_r29
+#endif /* L_save_22 */
+
+#ifdef	L_save_23
+	.text
+	.align	2
+	.globl	__save_r23_r29
+	.type	__save_r23_r29,@function
+	/* Allocate space and save registers 23 .. 29 on the stack.  */
+	/* Called via:	jalr __save_r23_r29,r10.  */
+__save_r23_r29:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-28,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	mov	r1,ep
+#else
+	addi	-28,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r23_r29,.-__save_r23_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r23_r29.  */
+	.align	2
+	.globl	__return_r23_r29
+	.type	__return_r23_r29,@function
+__return_r23_r29:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	addi	28,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r23
+	addi	28,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r23_r29,.-__return_r23_r29
+#endif /* L_save_23 */
+
+#ifdef	L_save_24
+	.text
+	.align	2
+	.globl	__save_r24_r29
+	.type	__save_r24_r29,@function
+	/* Allocate space and save registers 24 .. 29 on the stack.  */
+	/* Called via:	jalr __save_r24_r29,r10.  */
+__save_r24_r29:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-24,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	mov	r1,ep
+#else
+	addi	-24,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r24_r29,.-__save_r24_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r24_r29.  */
+	.align	2
+	.globl	__return_r24_r29
+	.type	__return_r24_r29,@function
+__return_r24_r29:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	addi	24,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	addi	24,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r24_r29,.-__return_r24_r29
+#endif /* L_save_24 */
+
+#ifdef	L_save_25
+	.text
+	.align	2
+	.globl	__save_r25_r29
+	.type	__save_r25_r29,@function
+	/* Allocate space and save registers 25 .. 29 on the stack.  */
+	/* Called via:	jalr __save_r25_r29,r10.  */
+__save_r25_r29:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-20,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	mov	r1,ep
+#else
+	addi	-20,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r25_r29,.-__save_r25_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r25_r29.  */
+	.align	2
+	.globl	__return_r25_r29
+	.type	__return_r25_r29,@function
+__return_r25_r29:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	addi	20,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[ep],r29
+	ld.w	4[ep],r28
+	ld.w	8[ep],r27
+	ld.w	12[ep],r26
+	ld.w	16[ep],r25
+	addi	20,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r25_r29,.-__return_r25_r29
+#endif /* L_save_25 */
+
+#ifdef	L_save_26
+	.text
+	.align	2
+	.globl	__save_r26_r29
+	.type	__save_r26_r29,@function
+	/* Allocate space and save registers 26 .. 29 on the stack.  */
+	/* Called via:	jalr __save_r26_r29,r10.  */
+__save_r26_r29:
+#ifdef __EP__
+	mov	ep,r1
+	add	-16,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	mov	r1,ep
+#else
+	add	-16,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r26_r29,.-__save_r26_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r26_r29.  */
+	.align	2
+	.globl	__return_r26_r29
+	.type	__return_r26_r29,@function
+__return_r26_r29:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	addi	16,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	addi	16,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r26_r29,.-__return_r26_r29
+#endif /* L_save_26 */
+
+#ifdef	L_save_27
+	.text
+	.align	2
+	.globl	__save_r27_r29
+	.type	__save_r27_r29,@function
+	/* Allocate space and save registers 27 .. 29 on the stack.  */
+	/* Called via:	jalr __save_r27_r29,r10.  */
+__save_r27_r29:
+	add	-12,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	jmp	[r10]
+	.size	__save_r27_r29,.-__save_r27_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r27_r29.  */
+	.align	2
+	.globl	__return_r27_r29
+	.type	__return_r27_r29,@function
+__return_r27_r29:
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	add	12,sp
+	jmp	[r31]
+	.size	__return_r27_r29,.-__return_r27_r29
+#endif /* L_save_27 */
+
+#ifdef	L_save_28
+	.text
+	.align	2
+	.globl	__save_r28_r29
+	.type	__save_r28_r29,@function
+	/* Allocate space and save registers 28,29 on the stack.  */
+	/* Called via:	jalr __save_r28_r29,r10.  */
+__save_r28_r29:
+	add	-8,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	jmp	[r10]
+	.size	__save_r28_r29,.-__save_r28_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r28_r29.  */
+	.align	2
+	.globl	__return_r28_r29
+	.type	__return_r28_r29,@function
+__return_r28_r29:
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	add	8,sp
+	jmp	[r31]
+	.size	__return_r28_r29,.-__return_r28_r29
+#endif /* L_save_28 */
+
+#ifdef	L_save_29
+	.text
+	.align	2
+	.globl	__save_r29
+	.type	__save_r29,@function
+	/* Allocate space and save register 29 on the stack.  */
+	/* Called via:	jalr __save_r29,r10.  */
+__save_r29:
+	add	-4,sp
+	st.w	r29,0[sp]
+	jmp	[r10]
+	.size	__save_r29,.-__save_r29
+
+	/* Restore saved register 29, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r29.  */
+	.align	2
+	.globl	__return_r29
+	.type	__return_r29,@function
+__return_r29:
+	ld.w	0[sp],r29
+	add	4,sp
+	jmp	[r31]
+	.size	__return_r29,.-__return_r29
+#endif /* L_save_28 */
+
+#ifdef	L_save_2c
+	.text
+	.align	2
+	.globl	__save_r2_r31
+	.type	__save_r2_r31,@function
+	/* Allocate space and save registers 20 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r2_r31,r10.  */
+__save_r2_r31:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-48,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	sst.w	r22,28[ep]
+	sst.w	r21,32[ep]
+	sst.w	r20,36[ep]
+	sst.w	r2,40[ep]
+	sst.w	r31,44[ep]
+	mov	r1,ep
+#else
+	addi	-48,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+	st.w	r22,28[sp]
+	st.w	r21,32[sp]
+	st.w	r20,36[sp]
+	st.w	r2,40[sp]
+	st.w	r31,44[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r2_r31,.-__save_r2_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r20_r31.  */
+	.align	2
+	.globl	__return_r2_r31
+	.type	__return_r2_r31,@function
+__return_r2_r31:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	sld.w	28[ep],r22
+	sld.w	32[ep],r21
+	sld.w	36[ep],r20
+	sld.w	40[ep],r2
+	sld.w	44[ep],r31
+	addi	48,sp,sp
+	mov	r1,ep
+#else
+	ld.w	44[sp],r29
+	ld.w	40[sp],r28
+	ld.w	36[sp],r27
+	ld.w	32[sp],r26
+	ld.w	28[sp],r25
+	ld.w	24[sp],r24
+	ld.w	20[sp],r23
+	ld.w	16[sp],r22
+	ld.w	12[sp],r21
+	ld.w	8[sp],r20
+	ld.w	4[sp],r2
+	ld.w	0[sp],r31
+	addi	48,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r2_r31,.-__return_r2_r31
+#endif /* L_save_2c */
+
+#ifdef	L_save_20c
+	.text
+	.align	2
+	.globl	__save_r20_r31
+	.type	__save_r20_r31,@function
+	/* Allocate space and save registers 20 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r20_r31,r10.  */
+__save_r20_r31:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-44,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	sst.w	r22,28[ep]
+	sst.w	r21,32[ep]
+	sst.w	r20,36[ep]
+	sst.w	r31,40[ep]
+	mov	r1,ep
+#else
+	addi	-44,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+	st.w	r22,28[sp]
+	st.w	r21,32[sp]
+	st.w	r20,36[sp]
+	st.w	r31,40[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r20_r31,.-__save_r20_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r20_r31.  */
+	.align	2
+	.globl	__return_r20_r31
+	.type	__return_r20_r31,@function
+__return_r20_r31:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	sld.w	28[ep],r22
+	sld.w	32[ep],r21
+	sld.w	36[ep],r20
+	sld.w	40[ep],r31
+	addi	44,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r23
+	ld.w	28[sp],r22
+	ld.w	32[sp],r21
+	ld.w	36[sp],r20
+	ld.w	40[sp],r31
+	addi	44,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r20_r31,.-__return_r20_r31
+#endif /* L_save_20c */
+
+#ifdef	L_save_21c
+	.text
+	.align	2
+	.globl	__save_r21_r31
+	.type	__save_r21_r31,@function
+	/* Allocate space and save registers 21 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r21_r31,r10.  */
+__save_r21_r31:
+#ifdef __EP__	
+	mov	ep,r1
+	addi	-40,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	sst.w	r22,28[ep]
+	sst.w	r21,32[ep]
+	sst.w	r31,36[ep]
+	mov	r1,ep
+	jmp	[r10]
+#else	
+	addi	-40,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+	st.w	r22,28[sp]
+	st.w	r21,32[sp]
+	st.w	r31,36[sp]
+	jmp	[r10]
+#endif	
+	.size	__save_r21_r31,.-__save_r21_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r21_r31.  */
+	.align	2
+	.globl	__return_r21_r31
+	.type	__return_r21_r31,@function
+__return_r21_r31:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	sld.w	28[ep],r22
+	sld.w	32[ep],r21
+	sld.w	36[ep],r31
+	addi	40,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r23
+	ld.w	28[sp],r22
+	ld.w	32[sp],r21
+	ld.w	36[sp],r31
+	addi	40,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r21_r31,.-__return_r21_r31
+#endif /* L_save_21c */
+
+#ifdef	L_save_22c
+	.text
+	.align	2
+	.globl	__save_r22_r31
+	.type	__save_r22_r31,@function
+	/* Allocate space and save registers 22 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r22_r31,r10.  */
+__save_r22_r31:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-36,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	sst.w	r22,28[ep]
+	sst.w	r31,32[ep]
+	mov	r1,ep
+#else
+	addi	-36,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+	st.w	r22,28[sp]
+	st.w	r31,32[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r22_r31,.-__save_r22_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r22_r31.  */
+	.align	2
+	.globl	__return_r22_r31
+	.type	__return_r22_r31,@function
+__return_r22_r31:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	sld.w	28[ep],r22
+	sld.w	32[ep],r31
+	addi	36,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r23
+	ld.w	28[sp],r22
+	ld.w	32[sp],r31
+	addi	36,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r22_r31,.-__return_r22_r31
+#endif /* L_save_22c */
+
+#ifdef	L_save_23c
+	.text
+	.align	2
+	.globl	__save_r23_r31
+	.type	__save_r23_r31,@function
+	/* Allocate space and save registers 23 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r23_r31,r10.  */
+__save_r23_r31:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-32,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	sst.w	r31,28[ep]
+	mov	r1,ep
+#else
+	addi	-32,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+	st.w	r31,28[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r23_r31,.-__save_r23_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r23_r31.  */
+	.align	2
+	.globl	__return_r23_r31
+	.type	__return_r23_r31,@function
+__return_r23_r31:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	sld.w	28[ep],r31
+	addi	32,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r23
+	ld.w	28[sp],r31
+	addi	32,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r23_r31,.-__return_r23_r31
+#endif /* L_save_23c */
+
+#ifdef	L_save_24c
+	.text
+	.align	2
+	.globl	__save_r24_r31
+	.type	__save_r24_r31,@function
+	/* Allocate space and save registers 24 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r24_r31,r10.  */
+__save_r24_r31:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-28,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r31,24[ep]
+	mov	r1,ep
+#else
+	addi	-28,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r31,24[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r24_r31,.-__save_r24_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r24_r31.  */
+	.align	2
+	.globl	__return_r24_r31
+	.type	__return_r24_r31,@function
+__return_r24_r31:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r31
+	addi	28,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r31
+	addi	28,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r24_r31,.-__return_r24_r31
+#endif /* L_save_24c */
+
+#ifdef	L_save_25c
+	.text
+	.align	2
+	.globl	__save_r25_r31
+	.type	__save_r25_r31,@function
+	/* Allocate space and save registers 25 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r25_r31,r10.  */
+__save_r25_r31:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-24,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r31,20[ep]
+	mov	r1,ep
+#else
+	addi	-24,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r31,20[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r25_r31,.-__save_r25_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r25_r31.  */
+	.align	2
+	.globl	__return_r25_r31
+	.type	__return_r25_r31,@function
+__return_r25_r31:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r31
+	addi	24,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r31
+	addi	24,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r25_r31,.-__return_r25_r31
+#endif /* L_save_25c */
+
+#ifdef	L_save_26c
+	.text
+	.align	2
+	.globl	__save_r26_r31
+	.type	__save_r26_r31,@function
+	/* Allocate space and save registers 26 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r26_r31,r10.  */
+__save_r26_r31:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-20,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r31,16[ep]
+	mov	r1,ep
+#else
+	addi	-20,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r31,16[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r26_r31,.-__save_r26_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r26_r31.  */
+	.align	2
+	.globl	__return_r26_r31
+	.type	__return_r26_r31,@function
+__return_r26_r31:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r31
+	addi	20,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r31
+	addi	20,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r26_r31,.-__return_r26_r31
+#endif /* L_save_26c */
+
+#ifdef	L_save_27c
+	.text
+	.align	2
+	.globl	__save_r27_r31
+	.type	__save_r27_r31,@function
+	/* Allocate space and save registers 27 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r27_r31,r10.  */
+__save_r27_r31:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-16,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r31,12[ep]
+	mov	r1,ep
+#else
+	addi	-16,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r31,12[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r27_r31,.-__save_r27_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r27_r31.  */
+	.align	2
+	.globl	__return_r27_r31
+	.type	__return_r27_r31,@function
+__return_r27_r31:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r31
+	addi	16,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r31
+	addi	16,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r27_r31,.-__return_r27_r31
+#endif /* L_save_27c */
+
+#ifdef	L_save_28c
+	.text
+	.align	2
+	.globl	__save_r28_r31
+	.type	__save_r28_r31,@function
+	/* Allocate space and save registers 28 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r28_r31,r10.  */
+__save_r28_r31:
+	addi	-12,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r31,8[sp]
+	jmp	[r10]
+	.size	__save_r28_r31,.-__save_r28_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r28_r31.  */
+	.align	2
+	.globl	__return_r28_r31
+	.type	__return_r28_r31,@function
+__return_r28_r31:
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r31
+	addi	12,sp,sp
+	jmp	[r31]
+	.size	__return_r28_r31,.-__return_r28_r31
+#endif /* L_save_28c */
+
+#ifdef	L_save_29c
+	.text
+	.align	2
+	.globl	__save_r29_r31
+	.type	__save_r29_r31,@function
+	/* Allocate space and save registers 29 & 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r29_r31,r10.  */
+__save_r29_r31:
+	addi	-8,sp,sp
+	st.w	r29,0[sp]
+	st.w	r31,4[sp]
+	jmp	[r10]
+	.size	__save_r29_r31,.-__save_r29_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r29_r31.  */
+	.align	2
+	.globl	__return_r29_r31
+	.type	__return_r29_r31,@function
+__return_r29_r31:
+	ld.w	0[sp],r29
+	ld.w	4[sp],r31
+	addi	8,sp,sp
+	jmp	[r31]
+	.size	__return_r29_r31,.-__return_r29_r31
+#endif /* L_save_29c */
+
+#ifdef	L_save_31c
+	.text
+	.align	2
+	.globl	__save_r31
+	.type	__save_r31,@function
+	/* Allocate space and save register 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r31,r10.  */
+__save_r31:
+	addi	-4,sp,sp
+	st.w	r31,0[sp]
+	jmp	[r10]
+	.size	__save_r31,.-__save_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r31.  */
+	.align	2
+	.globl	__return_r31
+	.type	__return_r31,@function
+__return_r31:
+	ld.w	0[sp],r31
+	addi	4,sp,sp
+	jmp	[r31]
+        .size   __return_r31,.-__return_r31
+#endif /* L_save_31c */
+
+#ifdef	L_save_interrupt
+	.text
+	.align	2
+	.globl	__save_interrupt
+	.type	__save_interrupt,@function
+	/* Save registers r1, r4 on stack and load up with expected values.  */
+	/* Note, 20 bytes of stack have already been allocated.  */
+	/* Called via:	jalr __save_interrupt,r10.  */
+__save_interrupt:
+       /* add -20,sp ; st.w r11,16[sp] ; st.w r10,12[sp] ; */
+	st.w	ep,0[sp]
+	st.w	gp,4[sp]
+	st.w	r1,8[sp]
+	movhi	hi(__ep),r0,ep
+	movea	lo(__ep),ep,ep
+	movhi	hi(__gp),r0,gp
+	movea	lo(__gp),gp,gp
+	jmp	[r10]
+	.size	__save_interrupt,.-__save_interrupt
+
+	/* Restore saved registers, deallocate stack and return from the interrupt.  */
+	/* Called via:	jr __return_interrupt.  */
+	.align	2
+	.globl	__return_interrupt
+	.type	__return_interrupt,@function
+__return_interrupt:
+	ld.w	0[sp],ep
+	ld.w	4[sp],gp
+	ld.w	8[sp],r1
+	ld.w	12[sp],r10
+	ld.w    16[sp],r11
+	addi    20,sp,sp
+	reti
+	.size	__return_interrupt,.-__return_interrupt
+#endif /* L_save_interrupt */
+
+#ifdef L_save_all_interrupt
+	.text
+	.align	2
+	.globl	__save_all_interrupt
+	.type	__save_all_interrupt,@function
+	/* Save all registers except for those saved in __save_interrupt.  */
+	/* Allocate enough stack for all of the registers & 16 bytes of space.  */
+	/* Called via:	jalr __save_all_interrupt,r10.  */
+__save_all_interrupt:
+	addi	-104,sp,sp
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sst.w	r31,100[ep]
+	sst.w	r2,96[ep]
+	sst.w	gp,92[ep]
+	sst.w	r6,88[ep]
+	sst.w	r7,84[ep]
+	sst.w	r8,80[ep]
+	sst.w	r9,76[ep]
+	sst.w	r11,72[ep]
+	sst.w	r12,68[ep]
+	sst.w	r13,64[ep]
+	sst.w	r14,60[ep]
+	sst.w	r15,56[ep]
+	sst.w	r16,52[ep]
+	sst.w	r17,48[ep]
+	sst.w	r18,44[ep]
+	sst.w	r19,40[ep]
+	sst.w	r20,36[ep]
+	sst.w	r21,32[ep]
+	sst.w	r22,28[ep]
+	sst.w	r23,24[ep]
+	sst.w	r24,20[ep]
+	sst.w	r25,16[ep]
+	sst.w	r26,12[ep]
+	sst.w	r27,8[ep]
+	sst.w	r28,4[ep]
+	sst.w	r29,0[ep]
+	mov	r1,ep
+#else
+	st.w	r31,100[sp]
+	st.w	r2,96[sp]
+	st.w	gp,92[sp]
+	st.w	r6,88[sp]
+	st.w	r7,84[sp]
+	st.w	r8,80[sp]
+	st.w	r9,76[sp]
+	st.w	r11,72[sp]
+	st.w	r12,68[sp]
+	st.w	r13,64[sp]
+	st.w	r14,60[sp]
+	st.w	r15,56[sp]
+	st.w	r16,52[sp]
+	st.w	r17,48[sp]
+	st.w	r18,44[sp]
+	st.w	r19,40[sp]
+	st.w	r20,36[sp]
+	st.w	r21,32[sp]
+	st.w	r22,28[sp]
+	st.w	r23,24[sp]
+	st.w	r24,20[sp]
+	st.w	r25,16[sp]
+	st.w	r26,12[sp]
+	st.w	r27,8[sp]
+	st.w	r28,4[sp]
+	st.w	r29,0[sp]
+#endif
+	jmp	[r10]
+	.size	__save_all_interrupt,.-__save_all_interrupt
+
+	.globl	__restore_all_interrupt
+	.type	__restore_all_interrupt,@function
+	/* Restore all registers saved in __save_all_interrupt and
+	   deallocate the stack space.  */
+	/* Called via:	jalr __restore_all_interrupt,r10.  */
+__restore_all_interrupt:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	100[ep],r31
+	sld.w	96[ep],r2
+	sld.w	92[ep],gp
+	sld.w	88[ep],r6
+	sld.w	84[ep],r7
+	sld.w	80[ep],r8
+	sld.w	76[ep],r9
+	sld.w	72[ep],r11
+	sld.w	68[ep],r12
+	sld.w	64[ep],r13
+	sld.w	60[ep],r14
+	sld.w	56[ep],r15
+	sld.w	52[ep],r16
+	sld.w	48[ep],r17
+	sld.w	44[ep],r18
+	sld.w	40[ep],r19
+	sld.w	36[ep],r20
+	sld.w	32[ep],r21
+	sld.w	28[ep],r22
+	sld.w	24[ep],r23
+	sld.w	20[ep],r24
+	sld.w	16[ep],r25
+	sld.w	12[ep],r26
+	sld.w	8[ep],r27
+	sld.w	4[ep],r28
+	sld.w	0[ep],r29
+	mov	r1,ep
+#else
+	ld.w	100[sp],r31
+	ld.w	96[sp],r2
+	ld.w	92[sp],gp
+	ld.w	88[sp],r6
+	ld.w	84[sp],r7
+	ld.w	80[sp],r8
+	ld.w	76[sp],r9
+	ld.w	72[sp],r11
+	ld.w	68[sp],r12
+	ld.w	64[sp],r13
+	ld.w	60[sp],r14
+	ld.w	56[sp],r15
+	ld.w	52[sp],r16
+	ld.w	48[sp],r17
+	ld.w	44[sp],r18
+	ld.w	40[sp],r19
+	ld.w	36[sp],r20
+	ld.w	32[sp],r21
+	ld.w	28[sp],r22
+	ld.w	24[sp],r23
+	ld.w	20[sp],r24
+	ld.w	16[sp],r25
+	ld.w	12[sp],r26
+	ld.w	8[sp],r27
+	ld.w	4[sp],r28
+	ld.w	0[sp],r29
+#endif
+	addi	104,sp,sp	
+	jmp	[r10]
+	.size	__restore_all_interrupt,.-__restore_all_interrupt
+#endif /* L_save_all_interrupt */
+	
+#if defined(__v850e__) || defined(__v850e1__) || defined(__v850e2__) || defined(__v850e2v3__)
+#ifdef	L_callt_save_r2_r29
+	/* Put these functions into the call table area.  */
+	.call_table_text
+	
+	/* Allocate space and save registers 2, 20 .. 29 on the stack.  */
+	/* Called via:	callt ctoff(__callt_save_r2_r29).  */
+	.align	2
+.L_save_r2_r29:
+	add	-4, sp
+	st.w	r2, 0[sp]
+	prepare {r20 - r29}, 0
+	ctret
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	callt ctoff(__callt_return_r2_r29).  */
+	.align	2
+.L_return_r2_r29:
+	dispose 0, {r20-r29}
+	ld.w    0[sp], r2
+	add	4, sp
+	jmp     [r31]
+
+	/* Place the offsets of the start of these routines into the call table.  */
+	.call_table_data
+
+	.global	__callt_save_r2_r29
+	.type	__callt_save_r2_r29,@function
+__callt_save_r2_r29:	.short ctoff(.L_save_r2_r29)
+	
+	.global	__callt_return_r2_r29
+	.type	__callt_return_r2_r29,@function
+__callt_return_r2_r29:	.short ctoff(.L_return_r2_r29)
+	
+#endif /* L_callt_save_r2_r29.  */
+
+#ifdef	L_callt_save_r2_r31
+	/* Put these functions into the call table area.  */
+	.call_table_text
+	
+	/* Allocate space and save registers 2 and 20 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	callt ctoff(__callt_save_r2_r31).  */
+	.align	2
+.L_save_r2_r31:
+	add	-4, sp
+	st.w	r2, 0[sp]
+	prepare {r20 - r29, r31}, 0
+	ctret
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	callt ctoff(__callt_return_r2_r31).  */
+	.align	2
+.L_return_r2_r31:
+	dispose 0, {r20 - r29, r31}
+	ld.w    0[sp], r2
+	addi	4, sp, sp
+	jmp     [r31]
+
+	/* Place the offsets of the start of these routines into the call table.  */
+	.call_table_data
+
+	.global	__callt_save_r2_r31
+	.type	__callt_save_r2_r31,@function
+__callt_save_r2_r31:	.short ctoff(.L_save_r2_r31)
+	
+	.global	__callt_return_r2_r31
+	.type	__callt_return_r2_r31,@function
+__callt_return_r2_r31:	.short ctoff(.L_return_r2_r31)
+	
+#endif /* L_callt_save_r2_r31 */
+
+#ifdef	L_callt_save_interrupt
+	/* Put these functions into the call table area.  */
+	.call_table_text
+	
+	/* Save registers r1, ep, gp, r10 on stack and load up with expected values.  */
+	/* Called via:	callt ctoff(__callt_save_interrupt).  */
+	.align	2
+.L_save_interrupt:
+        /* SP has already been moved before callt ctoff(_save_interrupt).  */
+        /* R1,R10,R11,ctpc,ctpsw has alread been saved bofore callt ctoff(_save_interrupt).  */
+        /* addi -28, sp, sp  */
+        /* st.w r1,    24[sp] */
+        /* st.w r10,   12[sp] */
+        /* st.w r11,   16[sp] */
+        /* stsr ctpc,  r10    */
+        /* st.w r10,   20[sp] */
+        /* stsr ctpsw, r10    */
+        /* st.w r10,   24[sp] */
+        st.w    ep,  0[sp]
+        st.w    gp,  4[sp]
+        st.w    r1,  8[sp]
+	mov	hilo(__ep),ep
+	mov	hilo(__gp),gp
+	ctret
+
+        .call_table_text
+	/* Restore saved registers, deallocate stack and return from the interrupt.  */
+        /* Called via:  callt ctoff(__callt_restore_interrupt).  */
+	.align	2
+	.globl	__return_interrupt
+	.type	__return_interrupt,@function
+.L_return_interrupt:
+        ld.w    24[sp], r1
+        ldsr    r1,     ctpsw
+        ld.w    20[sp], r1
+        ldsr    r1,     ctpc
+        ld.w    16[sp], r11
+        ld.w    12[sp], r10
+        ld.w     8[sp], r1
+        ld.w     4[sp], gp
+        ld.w     0[sp], ep
+        addi    28, sp, sp
+        reti
+
+	/* Place the offsets of the start of these routines into the call table.  */
+	.call_table_data
+
+        .global __callt_save_interrupt
+        .type   __callt_save_interrupt,@function
+__callt_save_interrupt:         .short ctoff(.L_save_interrupt)
+
+        .global __callt_return_interrupt
+        .type   __callt_return_interrupt,@function
+__callt_return_interrupt:       .short ctoff(.L_return_interrupt)
+	
+#endif /* L_callt_save_interrupt */
+
+#ifdef L_callt_save_all_interrupt
+	/* Put these functions into the call table area.  */
+	.call_table_text
+	
+	/* Save all registers except for those saved in __save_interrupt.  */
+	/* Allocate enough stack for all of the registers & 16 bytes of space.  */
+	/* Called via:	callt ctoff(__callt_save_all_interrupt).  */
+	.align	2
+.L_save_all_interrupt:
+	addi	-60, sp, sp
+#ifdef __EP__
+	mov	ep,  r1
+	mov	sp,  ep
+	sst.w	r2,  56[ep]
+	sst.w	r5,  52[ep]
+	sst.w	r6,  48[ep]
+	sst.w	r7,  44[ep]
+	sst.w	r8,  40[ep]
+	sst.w	r9,  36[ep]
+	sst.w	r11, 32[ep]
+	sst.w	r12, 28[ep]
+	sst.w	r13, 24[ep]
+	sst.w	r14, 20[ep]
+	sst.w	r15, 16[ep]
+	sst.w	r16, 12[ep]
+	sst.w	r17, 8[ep]
+	sst.w	r18, 4[ep]
+	sst.w	r19, 0[ep]
+	mov	r1,  ep
+#else
+	st.w	r2,  56[sp]
+	st.w	r5,  52[sp]
+	st.w	r6,  48[sp]
+	st.w	r7,  44[sp]
+	st.w	r8,  40[sp]
+	st.w	r9,  36[sp]
+	st.w	r11, 32[sp]
+	st.w	r12, 28[sp]
+	st.w	r13, 24[sp]
+	st.w	r14, 20[sp]
+	st.w	r15, 16[sp]
+	st.w	r16, 12[sp]
+	st.w	r17, 8[sp]
+	st.w	r18, 4[sp]
+	st.w	r19, 0[sp]
+#endif
+	prepare {r20 - r29, r31}, 0
+	ctret	
+
+	/* Restore all registers saved in __save_all_interrupt
+	   deallocate the stack space.  */
+	/* Called via:	callt ctoff(__callt_restore_all_interrupt).  */
+	.align 2
+.L_restore_all_interrupt:
+	dispose 0, {r20 - r29, r31}
+#ifdef __EP__
+	mov	ep, r1
+	mov	sp, ep
+	sld.w	0 [ep], r19
+	sld.w	4 [ep], r18
+	sld.w	8 [ep], r17
+	sld.w	12[ep], r16
+	sld.w	16[ep], r15
+	sld.w	20[ep], r14
+	sld.w	24[ep], r13
+	sld.w	28[ep], r12
+	sld.w	32[ep], r11
+	sld.w	36[ep], r9
+	sld.w	40[ep], r8
+	sld.w	44[ep], r7
+	sld.w	48[ep], r6
+	sld.w	52[ep], r5
+	sld.w	56[ep], r2
+	mov	r1, ep
+#else
+	ld.w	0 [sp], r19
+	ld.w	4 [sp], r18
+	ld.w	8 [sp], r17
+	ld.w	12[sp], r16
+	ld.w	16[sp], r15
+	ld.w	20[sp], r14
+	ld.w	24[sp], r13
+	ld.w	28[sp], r12
+	ld.w	32[sp], r11
+	ld.w	36[sp], r9
+	ld.w	40[sp], r8
+	ld.w	44[sp], r7
+	ld.w	48[sp], r6
+	ld.w	52[sp], r5
+	ld.w	56[sp], r2
+#endif
+	addi	60, sp, sp
+	ctret
+
+	/* Place the offsets of the start of these routines into the call table.  */
+	.call_table_data
+
+	.global	__callt_save_all_interrupt
+	.type	__callt_save_all_interrupt,@function
+__callt_save_all_interrupt:	.short ctoff(.L_save_all_interrupt)
+	
+	.global	__callt_restore_all_interrupt
+	.type	__callt_restore_all_interrupt,@function
+__callt_restore_all_interrupt:	.short ctoff(.L_restore_all_interrupt)
+	
+#endif /* L_callt_save_all_interrupt */
+
+
+#define MAKE_CALLT_FUNCS( START )						\
+	.call_table_text							;\
+	.align	2								;\
+	/* Allocate space and save registers START .. r29 on the stack.  */	;\
+	/* Called via:	callt ctoff(__callt_save_START_r29).  */		;\
+.L_save_##START##_r29:								;\
+	prepare { START - r29 }, 0						;\
+	ctret									;\
+										;\
+	/* Restore saved registers, deallocate stack and return.  */		;\
+	/* Called via:	callt ctoff(__return_START_r29).  */			;\
+	.align	2								;\
+.L_return_##START##_r29:							;\
+	dispose 0, { START - r29 }, r31						;\
+										;\
+	/* Place the offsets of the start of these funcs into the call table.  */;\
+	.call_table_data							;\
+										;\
+	.global	__callt_save_##START##_r29					;\
+	.type	__callt_save_##START##_r29,@function				;\
+__callt_save_##START##_r29:	.short ctoff(.L_save_##START##_r29 )		;\
+										;\
+	.global	__callt_return_##START##_r29					;\
+	.type	__callt_return_##START##_r29,@function				;\
+__callt_return_##START##_r29:	.short ctoff(.L_return_##START##_r29 )	
+
+
+#define MAKE_CALLT_CFUNCS( START )						\
+	.call_table_text							;\
+	.align	2								;\
+	/* Allocate space and save registers START .. r31 on the stack.  */	;\
+	/* Called via:	callt ctoff(__callt_save_START_r31c).  */		;\
+.L_save_##START##_r31c:								;\
+	prepare { START - r29, r31}, 0						;\
+	ctret									;\
+										;\
+	/* Restore saved registers, deallocate stack and return.  */		;\
+	/* Called via:	callt ctoff(__return_START_r31c).  */			;\
+	.align	2								;\
+.L_return_##START##_r31c:							;\
+	dispose 0, { START - r29, r31}, r31					;\
+										;\
+	/* Place the offsets of the start of these funcs into the call table.  */;\
+	.call_table_data							;\
+										;\
+	.global	__callt_save_##START##_r31c					;\
+	.type	__callt_save_##START##_r31c,@function				;\
+__callt_save_##START##_r31c:    .short ctoff(.L_save_##START##_r31c )		;\
+										;\
+	.global	__callt_return_##START##_r31c					;\
+	.type	__callt_return_##START##_r31c,@function				;\
+__callt_return_##START##_r31c:  .short ctoff(.L_return_##START##_r31c )	
+
+	
+#ifdef	L_callt_save_20
+	MAKE_CALLT_FUNCS (r20)
+#endif
+#ifdef	L_callt_save_21
+	MAKE_CALLT_FUNCS (r21)
+#endif
+#ifdef	L_callt_save_22
+	MAKE_CALLT_FUNCS (r22)
+#endif
+#ifdef	L_callt_save_23
+	MAKE_CALLT_FUNCS (r23)
+#endif
+#ifdef	L_callt_save_24
+	MAKE_CALLT_FUNCS (r24)
+#endif
+#ifdef	L_callt_save_25
+	MAKE_CALLT_FUNCS (r25)
+#endif
+#ifdef	L_callt_save_26
+	MAKE_CALLT_FUNCS (r26)
+#endif
+#ifdef	L_callt_save_27
+	MAKE_CALLT_FUNCS (r27)
+#endif
+#ifdef	L_callt_save_28
+	MAKE_CALLT_FUNCS (r28)
+#endif
+#ifdef	L_callt_save_29
+	MAKE_CALLT_FUNCS (r29)
+#endif
+
+#ifdef	L_callt_save_20c
+	MAKE_CALLT_CFUNCS (r20)
+#endif
+#ifdef	L_callt_save_21c
+	MAKE_CALLT_CFUNCS (r21)
+#endif
+#ifdef	L_callt_save_22c
+	MAKE_CALLT_CFUNCS (r22)
+#endif
+#ifdef	L_callt_save_23c
+	MAKE_CALLT_CFUNCS (r23)
+#endif
+#ifdef	L_callt_save_24c
+	MAKE_CALLT_CFUNCS (r24)
+#endif
+#ifdef	L_callt_save_25c
+	MAKE_CALLT_CFUNCS (r25)
+#endif
+#ifdef	L_callt_save_26c
+	MAKE_CALLT_CFUNCS (r26)
+#endif
+#ifdef	L_callt_save_27c
+	MAKE_CALLT_CFUNCS (r27)
+#endif
+#ifdef	L_callt_save_28c
+	MAKE_CALLT_CFUNCS (r28)
+#endif
+#ifdef	L_callt_save_29c
+	MAKE_CALLT_CFUNCS (r29)
+#endif
+
+	
+#ifdef	L_callt_save_31c
+	.call_table_text
+	.align	2
+	/* Allocate space and save register r31 on the stack.  */
+	/* Called via:	callt ctoff(__callt_save_r31c).  */
+.L_callt_save_r31c:
+	prepare {r31}, 0
+	ctret
+
+	/* Restore saved registers, deallocate stack and return.  */
+	/* Called via:	callt ctoff(__return_r31c).  */
+	.align	2
+.L_callt_return_r31c:
+	dispose 0, {r31}, r31
+	
+	/* Place the offsets of the start of these funcs into the call table.  */
+	.call_table_data
+
+	.global	__callt_save_r31c
+	.type	__callt_save_r31c,@function
+__callt_save_r31c:	.short ctoff(.L_callt_save_r31c)
+
+	.global	__callt_return_r31c
+	.type	__callt_return_r31c,@function
+__callt_return_r31c:	.short ctoff(.L_callt_return_r31c)		
+#endif
+
+#endif /* __v850e__ */
+
+/*  libgcc2 routines for NEC V850.  */
+/*  Double Integer Arithmetical Operation.  */
+
+#ifdef L_negdi2
+	.text
+	.global ___negdi2
+	.type   ___negdi2, @function
+___negdi2:
+	not	r6, r10
+	add	1,  r10
+	setf	l,  r6
+	not	r7, r11
+	add	r6, r11
+	jmp	[lp]
+
+	.size ___negdi2,.-___negdi2
+#endif
+
+#ifdef L_cmpdi2
+	.text
+	.global ___cmpdi2
+	.type	___cmpdi2,@function
+___cmpdi2:
+	# Signed comparison bitween each high word.
+	cmp	r9, r7
+	be	.L_cmpdi_cmp_low
+	setf	ge, r10
+	setf	gt, r6
+	add	r6, r10
+	jmp	[lp]
+.L_cmpdi_cmp_low:
+	# Unsigned comparigon bitween each low word.
+	cmp     r8, r6
+	setf	nl, r10
+	setf	h,  r6
+	add	r6, r10
+	jmp	[lp]	
+	.size ___cmpdi2, . - ___cmpdi2	
+#endif
+
+#ifdef L_ucmpdi2
+	.text
+	.global ___ucmpdi2
+	.type	___ucmpdi2,@function
+___ucmpdi2:
+	cmp	r9, r7  # Check if each high word are same.
+	bne	.L_ucmpdi_check_psw
+	cmp     r8, r6  # Compare the word.
+.L_ucmpdi_check_psw:
+	setf	nl, r10 # 
+	setf	h,  r6  # 
+	add	r6, r10 # Add the result of comparison NL and comparison H.
+	jmp	[lp]	
+	.size ___ucmpdi2, . - ___ucmpdi2
+#endif
+
+#ifdef L_muldi3
+	.text
+	.global ___muldi3
+	.type	___muldi3,@function
+___muldi3:
+#ifdef __v850__
+        jarl  __save_r26_r31, r10
+        addi  16,  sp, sp
+        mov   r6,  r28
+        shr   15,  r28
+        movea lo(32767), r0, r14
+        and   r14, r28
+        mov   r8,  r10
+        shr   15,  r10
+        and   r14, r10
+        mov   r6,  r19
+        shr   30,  r19
+        mov   r7,  r12
+        shl   2,   r12
+        or    r12, r19
+        and   r14, r19
+        mov   r8,  r13
+        shr   30,  r13
+        mov   r9,  r12
+        shl   2,   r12
+        or    r12, r13
+        and   r14, r13
+        mov   r7,  r11
+        shr   13,  r11
+        and   r14, r11
+        mov   r9,  r31
+        shr   13,  r31
+        and   r14, r31
+        mov   r7,  r29
+        shr   28,  r29
+        and   r14, r29
+        mov   r9,  r12
+        shr   28,  r12
+        and   r14, r12
+        and   r14, r6
+        and   r14, r8
+        mov   r6,  r14
+        mulh  r8,  r14
+        mov   r6,  r16
+        mulh  r10, r16
+        mov   r6,  r18
+        mulh  r13, r18
+        mov   r6,  r15
+        mulh  r31, r15
+        mulh  r12, r6
+        mov   r28,  r17
+        mulh  r10, r17
+        add   -16, sp
+        mov   r28,  r12
+        mulh  r8,  r12
+        add   r17, r18
+        mov   r28,  r17
+        mulh  r31, r17
+        add   r12, r16
+        mov   r28,  r12
+        mulh  r13, r12
+        add   r17, r6
+        mov   r19, r17
+        add   r12, r15
+        mov   r19, r12
+        mulh  r8,  r12
+        mulh  r10, r17
+        add   r12, r18
+        mov   r19, r12
+        mulh  r13, r12
+        add   r17, r15
+        mov   r11, r13
+        mulh  r8,  r13
+        add   r12, r6
+        mov   r11, r12
+        mulh  r10, r12
+        add   r13, r15
+        mulh  r29, r8
+        add   r12, r6
+        mov   r16, r13
+        shl   15,  r13
+        add   r14, r13
+        mov   r18, r12
+        shl   30,  r12
+        mov   r13, r26
+        add   r12, r26
+        shr   15,  r14
+        movhi hi(131071), r0,  r12
+        movea lo(131071), r12, r13
+        and   r13, r14
+        mov   r16, r12
+        and   r13, r12
+        add   r12, r14
+        mov   r18, r12
+        shl   15,  r12
+        and   r13, r12
+        add   r12, r14
+        shr   17,  r14
+        shr   17,  r16
+        add   r14, r16
+        shl   13,  r15
+        shr   2,   r18
+        add   r18, r15
+        add   r15, r16
+        mov   r16, r27
+        add   r8,  r6
+        shl   28,  r6
+        add   r6,  r27
+        mov   r26, r10
+        mov   r27, r11
+        jr    __return_r26_r31
+#else /* defined(__v850e__) */
+	/*  (Ahi << 32 + Alo) * (Bhi << 32 + Blo) */
+	/*   r7           r6      r9         r8   */
+	mov  r8, r10
+	mulu r7, r8,  r0		/* Ahi * Blo */
+	mulu r6, r9,  r0		/* Alo * Bhi */
+	mulu r6, r10, r11		/* Alo * Blo */
+	add  r8, r11
+	add  r9, r11
+	jmp  [r31]
+#endif /* defined(__v850e__) */
+	.size ___muldi3, . - ___muldi3
+#endif
+	
diff --git a/libgcc/config/v850/t-v850 b/libgcc/config/v850/t-v850
new file mode 100644
index 00000000000..b61703ace09
--- /dev/null
+++ b/libgcc/config/v850/t-v850
@@ -0,0 +1,60 @@
+LIB1ASMSRC = v850/lib1funcs.S
+LIB1ASMFUNCS	= _mulsi3 \
+		  _divsi3 \
+		  _udivsi3 \
+		  _modsi3 \
+		  _umodsi3 \
+		  _save_2 \
+		  _save_20 \
+		  _save_21 \
+		  _save_22 \
+		  _save_23 \
+		  _save_24 \
+		  _save_25 \
+		  _save_26 \
+		  _save_27 \
+		  _save_28 \
+		  _save_29 \
+		  _save_2c \
+		  _save_20c \
+		  _save_21c \
+		  _save_22c \
+		  _save_23c \
+		  _save_24c \
+		  _save_25c \
+		  _save_26c \
+		  _save_27c \
+		  _save_28c \
+		  _save_29c \
+		  _save_31c \
+		  _save_interrupt \
+		  _save_all_interrupt \
+                  _callt_save_20 \
+		  _callt_save_21 \
+		  _callt_save_22 \
+		  _callt_save_23 \
+		  _callt_save_24 \
+		  _callt_save_25 \
+		  _callt_save_26 \
+		  _callt_save_27 \
+		  _callt_save_28 \
+		  _callt_save_29 \
+		  _callt_save_20c \
+		  _callt_save_21c \
+		  _callt_save_22c \
+		  _callt_save_23c \
+		  _callt_save_24c \
+		  _callt_save_25c \
+		  _callt_save_26c \
+		  _callt_save_27c \
+		  _callt_save_28c \
+		  _callt_save_29c \
+		  _callt_save_31c \
+		  _callt_save_interrupt \
+		  _callt_save_all_interrupt \
+		  _callt_save_r2_r29 \
+		  _callt_save_r2_r31 \
+		  _negdi2 \
+		  _cmpdi2 \
+		  _ucmpdi2 \
+		  _muldi3
diff --git a/libgcc/config/vax/lib1funcs.S b/libgcc/config/vax/lib1funcs.S
new file mode 100644
index 00000000000..1d57b56dad9
--- /dev/null
+++ b/libgcc/config/vax/lib1funcs.S
@@ -0,0 +1,92 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+   This file is part of GCC.
+   Contributed by Maciej W. Rozycki <macro@linux-mips.org>.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef L_udivsi3
+	.text
+	.globl	__udivsi3
+	.type	__udivsi3, @function
+__udivsi3:
+	.word	0
+	movl	8(%ap), %r1
+	blss	0f			/* Check bit #31 of divisor.  */
+	movl	4(%ap), %r2
+	blss	1f			/* Check bit #31 of dividend.  */
+
+	/* Both zero, do a standard division.  */
+
+	divl3	%r1, %r2, %r0
+	ret
+
+	/* MSB of divisor set, only 1 or 0 may result.  */
+0:
+	decl	%r1
+	clrl	%r0
+	cmpl	%r1, 4(%ap)
+	adwc	$0, %r0
+	ret
+
+	/* MSB of dividend set, do an extended division.  */
+1:
+	clrl	%r3
+	ediv	%r1, %r2, %r0, %r3
+	ret
+	.size	__udivsi3, . - __udivsi3
+	.previous
+#endif
+
+#ifdef L_umodsi3
+	.text
+	.globl	__umodsi3
+	.type	__umodsi3, @function
+__umodsi3:
+	.word	0
+	movl	8(%ap), %r1
+	blss	0f			/* Check bit #31 of divisor.  */
+	movl	4(%ap), %r2
+	blss	1f			/* Check bit #31 of dividend.  */
+
+	/* Both zero, do a standard division.  */
+
+	divl3	%r1, %r2, %r0
+	mull2	%r0, %r1
+	subl3	%r1, %r2, %r0
+	ret
+
+	/* MSB of divisor set, subtract the divisor at most once.  */
+0:
+	movl	4(%ap), %r2
+	clrl	%r0
+	cmpl	%r2, %r1
+	sbwc	$0, %r0
+	bicl2	%r0, %r1
+	subl3	%r1, %r2, %r0
+	ret
+
+	/* MSB of dividend set, do an extended division.  */
+1:
+	clrl	%r3
+	ediv	%r1, %r2, %r3, %r0
+	ret
+	.size	__umodsi3, . - __umodsi3
+	.previous
+#endif
diff --git a/libgcc/config/vax/t-linux b/libgcc/config/vax/t-linux
new file mode 100644
index 00000000000..17929c8717c
--- /dev/null
+++ b/libgcc/config/vax/t-linux
@@ -0,0 +1,2 @@
+LIB1ASMSRC = vax/lib1funcs.S
+LIB1ASMFUNCS = _udivsi3 _umodsi3
diff --git a/libgcc/config/xtensa/ieee754-df.S b/libgcc/config/xtensa/ieee754-df.S
new file mode 100644
index 00000000000..9b46889bdc2
--- /dev/null
+++ b/libgcc/config/xtensa/ieee754-df.S
@@ -0,0 +1,2388 @@
+/* IEEE-754 double-precision functions for Xtensa
+   Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef __XTENSA_EB__
+#define xh a2
+#define xl a3
+#define yh a4
+#define yl a5
+#else
+#define xh a3
+#define xl a2
+#define yh a5
+#define yl a4
+#endif
+
+/*  Warning!  The branch displacements for some Xtensa branch instructions
+    are quite small, and this code has been carefully laid out to keep
+    branch targets in range.  If you change anything, be sure to check that
+    the assembler is not relaxing anything to branch over a jump.  */
+
+#ifdef L_negdf2
+
+	.align	4
+	.global	__negdf2
+	.type	__negdf2, @function
+__negdf2:
+	leaf_entry sp, 16
+	movi	a4, 0x80000000
+	xor	xh, xh, a4
+	leaf_return
+
+#endif /* L_negdf2 */
+
+#ifdef L_addsubdf3
+
+	/* Addition */
+__adddf3_aux:
+	
+	/* Handle NaNs and Infinities.  (This code is placed before the
+	   start of the function just to keep it in range of the limited
+	   branch displacements.)  */
+
+.Ladd_xnan_or_inf:
+	/* If y is neither Infinity nor NaN, return x.  */
+	bnall	yh, a6, 1f
+	/* If x is a NaN, return it.  Otherwise, return y.  */
+	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, .Ladd_ynan_or_inf
+1:	leaf_return
+
+.Ladd_ynan_or_inf:
+	/* Return y.  */
+	mov	xh, yh
+	mov	xl, yl
+	leaf_return
+
+.Ladd_opposite_signs:
+	/* Operand signs differ.  Do a subtraction.  */
+	slli	a7, a6, 11
+	xor	yh, yh, a7
+	j	.Lsub_same_sign
+
+	.align	4
+	.global	__adddf3
+	.type	__adddf3, @function
+__adddf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+
+	/* Check if the two operands have the same sign.  */
+	xor	a7, xh, yh
+	bltz	a7, .Ladd_opposite_signs
+
+.Ladd_same_sign:	
+	/* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
+	ball	xh, a6, .Ladd_xnan_or_inf
+	ball	yh, a6, .Ladd_ynan_or_inf
+
+	/* Compare the exponents.  The smaller operand will be shifted
+	   right by the exponent difference and added to the larger
+	   one.  */
+	extui	a7, xh, 20, 12
+	extui	a8, yh, 20, 12
+	bltu	a7, a8, .Ladd_shiftx
+
+.Ladd_shifty:
+	/* Check if the smaller (or equal) exponent is zero.  */
+	bnone	yh, a6, .Ladd_yexpzero
+
+	/* Replace yh sign/exponent with 0x001.  */
+	or	yh, yh, a6
+	slli	yh, yh, 11
+	srli	yh, yh, 11
+
+.Ladd_yexpdiff:
+	/* Compute the exponent difference.  Optimize for difference < 32.  */
+	sub	a10, a7, a8
+	bgeui	a10, 32, .Ladd_bigshifty
+	
+	/* Shift yh/yl right by the exponent difference.  Any bits that are
+	   shifted out of yl are saved in a9 for rounding the result.  */
+	ssr	a10
+	movi	a9, 0
+	src	a9, yl, a9
+	src	yl, yh, yl
+	srl	yh, yh
+
+.Ladd_addy:
+	/* Do the 64-bit addition.  */
+	add	xl, xl, yl
+	add	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, 1
+1:
+	/* Check if the add overflowed into the exponent.  */
+	extui	a10, xh, 20, 12
+	beq	a10, a7, .Ladd_round
+	mov	a8, a7
+	j	.Ladd_carry
+
+.Ladd_yexpzero:
+	/* y is a subnormal value.  Replace its sign/exponent with zero,
+	   i.e., no implicit "1.0", and increment the apparent exponent
+	   because subnormals behave as if they had the minimum (nonzero)
+	   exponent.  Test for the case when both exponents are zero.  */
+	slli	yh, yh, 12
+	srli	yh, yh, 12
+	bnone	xh, a6, .Ladd_bothexpzero
+	addi	a8, a8, 1
+	j	.Ladd_yexpdiff
+
+.Ladd_bothexpzero:
+	/* Both exponents are zero.  Handle this as a special case.  There
+	   is no need to shift or round, and the normal code for handling
+	   a carry into the exponent field will not work because it
+	   assumes there is an implicit "1.0" that needs to be added.  */
+	add	xl, xl, yl
+	add	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, 1
+1:	leaf_return
+
+.Ladd_bigshifty:
+	/* Exponent difference > 64 -- just return the bigger value.  */
+	bgeui	a10, 64, 1b
+
+	/* Shift yh/yl right by the exponent difference.  Any bits that are
+	   shifted out are saved in a9 for rounding the result.  */
+	ssr	a10
+	sll	a11, yl		/* lost bits shifted out of yl */
+	src	a9, yh, yl
+	srl	yl, yh
+	movi	yh, 0
+	beqz	a11, .Ladd_addy
+	or	a9, a9, a10	/* any positive, nonzero value will work */
+	j	.Ladd_addy
+
+.Ladd_xexpzero:
+	/* Same as "yexpzero" except skip handling the case when both
+	   exponents are zero.  */
+	slli	xh, xh, 12
+	srli	xh, xh, 12
+	addi	a7, a7, 1
+	j	.Ladd_xexpdiff
+
+.Ladd_shiftx:
+	/* Same thing as the "shifty" code, but with x and y swapped.  Also,
+	   because the exponent difference is always nonzero in this version,
+	   the shift sequence can use SLL and skip loading a constant zero.  */
+	bnone	xh, a6, .Ladd_xexpzero
+
+	or	xh, xh, a6
+	slli	xh, xh, 11
+	srli	xh, xh, 11
+
+.Ladd_xexpdiff:
+	sub	a10, a8, a7
+	bgeui	a10, 32, .Ladd_bigshiftx
+	
+	ssr	a10
+	sll	a9, xl
+	src	xl, xh, xl
+	srl	xh, xh
+
+.Ladd_addx:
+	add	xl, xl, yl
+	add	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, 1
+1:
+	/* Check if the add overflowed into the exponent.  */
+	extui	a10, xh, 20, 12
+	bne	a10, a8, .Ladd_carry
+
+.Ladd_round:
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a9, 1f
+	addi	xl, xl, 1
+	beqz	xl, .Ladd_roundcarry
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a9, a9, 1
+	beqz	a9, .Ladd_exactlyhalf
+1:	leaf_return
+
+.Ladd_bigshiftx:
+	/* Mostly the same thing as "bigshifty"....  */
+	bgeui	a10, 64, .Ladd_returny
+
+	ssr	a10
+	sll	a11, xl
+	src	a9, xh, xl
+	srl	xl, xh
+	movi	xh, 0
+	beqz	a11, .Ladd_addx
+	or	a9, a9, a10
+	j	.Ladd_addx
+
+.Ladd_returny:
+	mov	xh, yh
+	mov	xl, yl
+	leaf_return
+
+.Ladd_carry:	
+	/* The addition has overflowed into the exponent field, so the
+	   value needs to be renormalized.  The mantissa of the result
+	   can be recovered by subtracting the original exponent and
+	   adding 0x100000 (which is the explicit "1.0" for the
+	   mantissa of the non-shifted operand -- the "1.0" for the
+	   shifted operand was already added).  The mantissa can then
+	   be shifted right by one bit.  The explicit "1.0" of the
+	   shifted mantissa then needs to be replaced by the exponent,
+	   incremented by one to account for the normalizing shift.
+	   It is faster to combine these operations: do the shift first
+	   and combine the additions and subtractions.  If x is the
+	   original exponent, the result is:
+	       shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
+	   or:
+	       shifted mantissa + ((x + 1) << 19)
+	   Note that the exponent is incremented here by leaving the
+	   explicit "1.0" of the mantissa in the exponent field.  */
+
+	/* Shift xh/xl right by one bit.  Save the lsb of xl.  */
+	mov	a10, xl
+	ssai	1
+	src	xl, xh, xl
+	srl	xh, xh
+
+	/* See explanation above.  The original exponent is in a8.  */
+	addi	a8, a8, 1
+	slli	a8, a8, 19
+	add	xh, xh, a8
+
+	/* Return an Infinity if the exponent overflowed.  */
+	ball	xh, a6, .Ladd_infinity
+	
+	/* Same thing as the "round" code except the msb of the leftover
+	   fraction is bit 0 of a10, with the rest of the fraction in a9.  */
+	bbci.l	a10, 0, 1f
+	addi	xl, xl, 1
+	beqz	xl, .Ladd_roundcarry
+	beqz	a9, .Ladd_exactlyhalf
+1:	leaf_return
+
+.Ladd_infinity:
+	/* Clear the mantissa.  */
+	movi	xl, 0
+	srli	xh, xh, 20
+	slli	xh, xh, 20
+
+	/* The sign bit may have been lost in a carry-out.  Put it back.  */
+	slli	a8, a8, 1
+	or	xh, xh, a8
+	leaf_return
+
+.Ladd_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	xl, xl, 1
+	slli	xl, xl, 1
+	leaf_return
+
+.Ladd_roundcarry:
+	/* xl is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	xh, xh, 1
+	/* Overflow to the exponent is OK.  */
+	leaf_return
+
+
+	/* Subtraction */
+__subdf3_aux:
+	
+	/* Handle NaNs and Infinities.  (This code is placed before the
+	   start of the function just to keep it in range of the limited
+	   branch displacements.)  */
+
+.Lsub_xnan_or_inf:
+	/* If y is neither Infinity nor NaN, return x.  */
+	bnall	yh, a6, 1f
+	/* Both x and y are either NaN or Inf, so the result is NaN.  */
+	movi	a4, 0x80000	/* make it a quiet NaN */
+	or	xh, xh, a4
+1:	leaf_return
+
+.Lsub_ynan_or_inf:
+	/* Negate y and return it.  */
+	slli	a7, a6, 11
+	xor	xh, yh, a7
+	mov	xl, yl
+	leaf_return
+
+.Lsub_opposite_signs:
+	/* Operand signs differ.  Do an addition.  */
+	slli	a7, a6, 11
+	xor	yh, yh, a7
+	j	.Ladd_same_sign
+
+	.align	4
+	.global	__subdf3
+	.type	__subdf3, @function
+__subdf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+
+	/* Check if the two operands have the same sign.  */
+	xor	a7, xh, yh
+	bltz	a7, .Lsub_opposite_signs
+
+.Lsub_same_sign:	
+	/* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
+	ball	xh, a6, .Lsub_xnan_or_inf
+	ball	yh, a6, .Lsub_ynan_or_inf
+
+	/* Compare the operands.  In contrast to addition, the entire
+	   value matters here.  */
+	extui	a7, xh, 20, 11
+	extui	a8, yh, 20, 11
+	bltu	xh, yh, .Lsub_xsmaller
+	beq	xh, yh, .Lsub_compare_low
+
+.Lsub_ysmaller:
+	/* Check if the smaller (or equal) exponent is zero.  */
+	bnone	yh, a6, .Lsub_yexpzero
+
+	/* Replace yh sign/exponent with 0x001.  */
+	or	yh, yh, a6
+	slli	yh, yh, 11
+	srli	yh, yh, 11
+
+.Lsub_yexpdiff:
+	/* Compute the exponent difference.  Optimize for difference < 32.  */
+	sub	a10, a7, a8
+	bgeui	a10, 32, .Lsub_bigshifty
+	
+	/* Shift yh/yl right by the exponent difference.  Any bits that are
+	   shifted out of yl are saved in a9 for rounding the result.  */
+	ssr	a10
+	movi	a9, 0
+	src	a9, yl, a9
+	src	yl, yh, yl
+	srl	yh, yh
+
+.Lsub_suby:
+	/* Do the 64-bit subtraction.  */
+	sub	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, -1
+1:	sub	xl, xl, yl
+
+	/* Subtract the leftover bits in a9 from zero and propagate any
+	   borrow from xh/xl.  */
+	neg	a9, a9
+	beqz	a9, 1f
+	addi	a5, xh, -1
+	moveqz	xh, a5, xl
+	addi	xl, xl, -1
+1:
+	/* Check if the subtract underflowed into the exponent.  */
+	extui	a10, xh, 20, 11
+	beq	a10, a7, .Lsub_round
+	j	.Lsub_borrow
+
+.Lsub_compare_low:
+	/* The high words are equal.  Compare the low words.  */
+	bltu	xl, yl, .Lsub_xsmaller
+	bltu	yl, xl, .Lsub_ysmaller
+	/* The operands are equal.  Return 0.0.  */
+	movi	xh, 0
+	movi	xl, 0
+1:	leaf_return
+
+.Lsub_yexpzero:
+	/* y is a subnormal value.  Replace its sign/exponent with zero,
+	   i.e., no implicit "1.0".  Unless x is also a subnormal, increment
+	   y's apparent exponent because subnormals behave as if they had
+	   the minimum (nonzero) exponent.  */
+	slli	yh, yh, 12
+	srli	yh, yh, 12
+	bnone	xh, a6, .Lsub_yexpdiff
+	addi	a8, a8, 1
+	j	.Lsub_yexpdiff
+
+.Lsub_bigshifty:
+	/* Exponent difference > 64 -- just return the bigger value.  */
+	bgeui	a10, 64, 1b
+
+	/* Shift yh/yl right by the exponent difference.  Any bits that are
+	   shifted out are saved in a9 for rounding the result.  */
+	ssr	a10
+	sll	a11, yl		/* lost bits shifted out of yl */
+	src	a9, yh, yl
+	srl	yl, yh
+	movi	yh, 0
+	beqz	a11, .Lsub_suby
+	or	a9, a9, a10	/* any positive, nonzero value will work */
+	j	.Lsub_suby
+
+.Lsub_xsmaller:
+	/* Same thing as the "ysmaller" code, but with x and y swapped and
+	   with y negated.  */
+	bnone	xh, a6, .Lsub_xexpzero
+
+	or	xh, xh, a6
+	slli	xh, xh, 11
+	srli	xh, xh, 11
+
+.Lsub_xexpdiff:
+	sub	a10, a8, a7
+	bgeui	a10, 32, .Lsub_bigshiftx
+	
+	ssr	a10
+	movi	a9, 0
+	src	a9, xl, a9
+	src	xl, xh, xl
+	srl	xh, xh
+
+	/* Negate y.  */
+	slli	a11, a6, 11
+	xor	yh, yh, a11
+
+.Lsub_subx:
+	sub	xl, yl, xl
+	sub	xh, yh, xh
+	bgeu	yl, xl, 1f
+	addi	xh, xh, -1
+1:
+	/* Subtract the leftover bits in a9 from zero and propagate any
+	   borrow from xh/xl.  */
+	neg	a9, a9
+	beqz	a9, 1f
+	addi	a5, xh, -1
+	moveqz	xh, a5, xl
+	addi	xl, xl, -1
+1:
+	/* Check if the subtract underflowed into the exponent.  */
+	extui	a10, xh, 20, 11
+	bne	a10, a8, .Lsub_borrow
+
+.Lsub_round:
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a9, 1f
+	addi	xl, xl, 1
+	beqz	xl, .Lsub_roundcarry
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a9, a9, 1
+	beqz	a9, .Lsub_exactlyhalf
+1:	leaf_return
+
+.Lsub_xexpzero:
+	/* Same as "yexpzero".  */
+	slli	xh, xh, 12
+	srli	xh, xh, 12
+	bnone	yh, a6, .Lsub_xexpdiff
+	addi	a7, a7, 1
+	j	.Lsub_xexpdiff
+
+.Lsub_bigshiftx:
+	/* Mostly the same thing as "bigshifty", but with the sign bit of the
+	   shifted value set so that the subsequent subtraction flips the
+	   sign of y.  */
+	bgeui	a10, 64, .Lsub_returny
+
+	ssr	a10
+	sll	a11, xl
+	src	a9, xh, xl
+	srl	xl, xh
+	slli	xh, a6, 11	/* set sign bit of xh */
+	beqz	a11, .Lsub_subx
+	or	a9, a9, a10
+	j	.Lsub_subx
+
+.Lsub_returny:
+	/* Negate and return y.  */
+	slli	a7, a6, 11
+	xor	xh, yh, a7
+	mov	xl, yl
+	leaf_return
+
+.Lsub_borrow:	
+	/* The subtraction has underflowed into the exponent field, so the
+	   value needs to be renormalized.  Shift the mantissa left as
+	   needed to remove any leading zeros and adjust the exponent
+	   accordingly.  If the exponent is not large enough to remove
+	   all the leading zeros, the result will be a subnormal value.  */
+
+	slli	a8, xh, 12
+	beqz	a8, .Lsub_xhzero
+	do_nsau	a6, a8, a7, a11
+	srli	a8, a8, 12
+	bge	a6, a10, .Lsub_subnormal
+	addi	a6, a6, 1
+
+.Lsub_shift_lt32:
+	/* Shift the mantissa (a8/xl/a9) left by a6.  */
+	ssl	a6
+	src	a8, a8, xl
+	src	xl, xl, a9
+	sll	a9, a9
+
+	/* Combine the shifted mantissa with the sign and exponent,
+	   decrementing the exponent by a6.  (The exponent has already
+	   been decremented by one due to the borrow from the subtraction,
+	   but adding the mantissa will increment the exponent by one.)  */
+	srli	xh, xh, 20
+	sub	xh, xh, a6
+	slli	xh, xh, 20
+	add	xh, xh, a8
+	j	.Lsub_round
+
+.Lsub_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	xl, xl, 1
+	slli	xl, xl, 1
+	leaf_return
+
+.Lsub_roundcarry:
+	/* xl is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	xh, xh, 1
+	/* Overflow to the exponent is OK.  */
+	leaf_return
+
+.Lsub_xhzero:
+	/* When normalizing the result, all the mantissa bits in the high
+	   word are zero.  Shift by "20 + (leading zero count of xl) + 1".  */
+	do_nsau	a6, xl, a7, a11
+	addi	a6, a6, 21
+	blt	a10, a6, .Lsub_subnormal
+
+.Lsub_normalize_shift:
+	bltui	a6, 32, .Lsub_shift_lt32
+
+	ssl	a6
+	src	a8, xl, a9
+	sll	xl, a9
+	movi	a9, 0
+
+	srli	xh, xh, 20
+	sub	xh, xh, a6
+	slli	xh, xh, 20
+	add	xh, xh, a8
+	j	.Lsub_round
+
+.Lsub_subnormal:
+	/* The exponent is too small to shift away all the leading zeros.
+	   Set a6 to the current exponent (which has already been
+	   decremented by the borrow) so that the exponent of the result
+	   will be zero.  Do not add 1 to a6 in this case, because: (1)
+	   adding the mantissa will not increment the exponent, so there is
+	   no need to subtract anything extra from the exponent to
+	   compensate, and (2) the effective exponent of a subnormal is 1
+	   not 0 so the shift amount must be 1 smaller than normal. */
+	mov	a6, a10
+	j	.Lsub_normalize_shift
+
+#endif /* L_addsubdf3 */
+
+#ifdef L_muldf3
+
+	/* Multiplication */
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+__muldf3_aux:
+
+	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+	   (This code is placed before the start of the function just to
+	   keep it in range of the limited branch displacements.)  */
+
+.Lmul_xexpzero:
+	/* Clear the sign bit of x.  */
+	slli	xh, xh, 1
+	srli	xh, xh, 1
+
+	/* If x is zero, return zero.  */
+	or	a10, xh, xl
+	beqz	a10, .Lmul_return_zero
+
+	/* Normalize x.  Adjust the exponent in a8.  */
+	beqz	xh, .Lmul_xh_zero
+	do_nsau	a10, xh, a11, a12
+	addi	a10, a10, -11
+	ssl	a10
+	src	xh, xh, xl
+	sll	xl, xl
+	movi	a8, 1
+	sub	a8, a8, a10
+	j	.Lmul_xnormalized	
+.Lmul_xh_zero:
+	do_nsau	a10, xl, a11, a12
+	addi	a10, a10, -11
+	movi	a8, -31
+	sub	a8, a8, a10
+	ssl	a10
+	bltz	a10, .Lmul_xl_srl
+	sll	xh, xl
+	movi	xl, 0
+	j	.Lmul_xnormalized
+.Lmul_xl_srl:
+	srl	xh, xl
+	sll	xl, xl
+	j	.Lmul_xnormalized
+	
+.Lmul_yexpzero:
+	/* Clear the sign bit of y.  */
+	slli	yh, yh, 1
+	srli	yh, yh, 1
+
+	/* If y is zero, return zero.  */
+	or	a10, yh, yl
+	beqz	a10, .Lmul_return_zero
+
+	/* Normalize y.  Adjust the exponent in a9.  */
+	beqz	yh, .Lmul_yh_zero
+	do_nsau	a10, yh, a11, a12
+	addi	a10, a10, -11
+	ssl	a10
+	src	yh, yh, yl
+	sll	yl, yl
+	movi	a9, 1
+	sub	a9, a9, a10
+	j	.Lmul_ynormalized	
+.Lmul_yh_zero:
+	do_nsau	a10, yl, a11, a12
+	addi	a10, a10, -11
+	movi	a9, -31
+	sub	a9, a9, a10
+	ssl	a10
+	bltz	a10, .Lmul_yl_srl
+	sll	yh, yl
+	movi	yl, 0
+	j	.Lmul_ynormalized
+.Lmul_yl_srl:
+	srl	yh, yl
+	sll	yl, yl
+	j	.Lmul_ynormalized	
+
+.Lmul_return_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	movi	xl, 0
+	j	.Lmul_done
+
+.Lmul_xnan_or_inf:
+	/* If y is zero, return NaN.  */
+	bnez	yl, 1f
+	slli	a8, yh, 1
+	bnez	a8, 1f
+	movi	a4, 0x80000	/* make it a quiet NaN */
+	or	xh, xh, a4
+	j	.Lmul_done
+1:
+	/* If y is NaN, return y.  */
+	bnall	yh, a6, .Lmul_returnx
+	slli	a8, yh, 12
+	or	a8, a8, yl
+	beqz	a8, .Lmul_returnx
+
+.Lmul_returny:
+	mov	xh, yh
+	mov	xl, yl
+
+.Lmul_returnx:
+	/* Set the sign bit and return.  */
+	extui	a7, a7, 31, 1
+	slli	xh, xh, 1
+	ssai	1
+	src	xh, a7, xh
+	j	.Lmul_done
+
+.Lmul_ynan_or_inf:
+	/* If x is zero, return NaN.  */
+	bnez	xl, .Lmul_returny
+	slli	a8, xh, 1
+	bnez	a8, .Lmul_returny
+	movi	a7, 0x80000	/* make it a quiet NaN */
+	or	xh, yh, a7
+	j	.Lmul_done
+
+	.align	4
+	.global	__muldf3
+	.type	__muldf3, @function
+__muldf3:
+#if __XTENSA_CALL0_ABI__
+	leaf_entry sp, 32
+	addi	sp, sp, -32
+	s32i	a12, sp, 16
+	s32i	a13, sp, 20
+	s32i	a14, sp, 24
+	s32i	a15, sp, 28
+#elif XCHAL_NO_MUL
+	/* This is not really a leaf function; allocate enough stack space
+	   to allow CALL12s to a helper function.  */
+	leaf_entry sp, 64
+#else
+	leaf_entry sp, 32
+#endif
+	movi	a6, 0x7ff00000
+
+	/* Get the sign of the result.  */
+	xor	a7, xh, yh
+
+	/* Check for NaN and infinity.  */
+	ball	xh, a6, .Lmul_xnan_or_inf
+	ball	yh, a6, .Lmul_ynan_or_inf
+
+	/* Extract the exponents.  */
+	extui	a8, xh, 20, 11
+	extui	a9, yh, 20, 11
+
+	beqz	a8, .Lmul_xexpzero
+.Lmul_xnormalized:	
+	beqz	a9, .Lmul_yexpzero
+.Lmul_ynormalized:	
+
+	/* Add the exponents.  */
+	add	a8, a8, a9
+
+	/* Replace sign/exponent fields with explicit "1.0".  */
+	movi	a10, 0x1fffff
+	or	xh, xh, a6
+	and	xh, xh, a10
+	or	yh, yh, a6
+	and	yh, yh, a10
+
+	/* Multiply 64x64 to 128 bits.  The result ends up in xh/xl/a6.
+	   The least-significant word of the result is thrown away except
+	   that if it is nonzero, the lsb of a6 is set to 1.  */
+#if XCHAL_HAVE_MUL32_HIGH
+
+	/* Compute a6 with any carry-outs in a10.  */
+	movi	a10, 0
+	mull	a6, xl, yh
+	mull	a11, xh, yl
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a10, a10, 1
+1:
+	muluh	a11, xl, yl
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a10, a10, 1
+1:	
+	/* If the low word of the result is nonzero, set the lsb of a6.  */
+	mull	a11, xl, yl
+	beqz	a11, 1f
+	movi	a9, 1
+	or	a6, a6, a9
+1:
+	/* Compute xl with any carry-outs in a9.  */
+	movi	a9, 0
+	mull	a11, xh, yh
+	add	a10, a10, a11
+	bgeu	a10, a11, 1f
+	addi	a9, a9, 1
+1:	
+	muluh	a11, xh, yl
+	add	a10, a10, a11
+	bgeu	a10, a11, 1f
+	addi	a9, a9, 1
+1:	
+	muluh	xl, xl, yh
+	add	xl, xl, a10
+	bgeu	xl, a10, 1f
+	addi	a9, a9, 1
+1:
+	/* Compute xh.  */
+	muluh	xh, xh, yh
+	add	xh, xh, a9
+
+#else /* ! XCHAL_HAVE_MUL32_HIGH */
+
+	/* Break the inputs into 16-bit chunks and compute 16 32-bit partial
+	   products.  These partial products are:
+
+		0 xll * yll
+
+		1 xll * ylh
+		2 xlh * yll
+
+		3 xll * yhl
+		4 xlh * ylh
+		5 xhl * yll
+
+		6 xll * yhh
+		7 xlh * yhl
+		8 xhl * ylh
+		9 xhh * yll
+
+		10 xlh * yhh
+		11 xhl * yhl
+		12 xhh * ylh
+
+		13 xhl * yhh
+		14 xhh * yhl
+
+		15 xhh * yhh
+
+	   where the input chunks are (hh, hl, lh, ll).  If using the Mul16
+	   or Mul32 multiplier options, these input chunks must be stored in
+	   separate registers.  For Mac16, the UMUL.AA.* opcodes can specify
+	   that the inputs come from either half of the registers, so there
+	   is no need to shift them out ahead of time.  If there is no
+	   multiply hardware, the 16-bit chunks can be extracted when setting
+	   up the arguments to the separate multiply function.  */
+
+	/* Save a7 since it is needed to hold a temporary value.  */
+	s32i	a7, sp, 4
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* Calling a separate multiply function will clobber a0 and requires
+	   use of a8 as a temporary, so save those values now.  (The function
+	   uses a custom ABI so nothing else needs to be saved.)  */
+	s32i	a0, sp, 0
+	s32i	a8, sp, 8
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define xlh a12
+#define ylh a13
+#define xhh a14
+#define yhh a15
+
+	/* Get the high halves of the inputs into registers.  */
+	srli	xlh, xl, 16
+	srli	ylh, yl, 16
+	srli	xhh, xh, 16
+	srli	yhh, yh, 16
+
+#define xll xl
+#define yll yl
+#define xhl xh
+#define yhl yh
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+	/* Clear the high halves of the inputs.  This does not matter
+	   for MUL16 because the high bits are ignored.  */
+	extui	xl, xl, 0, 16
+	extui	xh, xh, 0, 16
+	extui	yl, yl, 0, 16
+	extui	yh, yh, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mul16u	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mull	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+   a period in the definition of do_mul below.  These macros are a workaround
+   using underscores instead of periods when doing the concatenation.  */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
+	rsr	dst, ACCLO
+
+#else /* no multiply hardware */
+	
+#define set_arg_l(dst, src) \
+	extui	dst, src, 0, 16
+#define set_arg_h(dst, src) \
+	srli	dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a13, xreg); \
+	set_arg_ ## yhalf (a14, yreg); \
+	call0	.Lmul_mulsi3; \
+	mov	dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a14, xreg); \
+	set_arg_ ## yhalf (a15, yreg); \
+	call12	.Lmul_mulsi3; \
+	mov	dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+	/* Add pp1 and pp2 into a10 with carry-out in a9.  */
+	do_mul(a10, xl, l, yl, h)	/* pp 1 */
+	do_mul(a11, xl, h, yl, l)	/* pp 2 */
+	movi	a9, 0
+	add	a10, a10, a11
+	bgeu	a10, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Initialize a6 with a9/a10 shifted into position.  Note that
+	   this value can be safely incremented without any carry-outs.  */
+	ssai	16
+	src	a6, a9, a10
+
+	/* Compute the low word into a10.  */
+	do_mul(a11, xl, l, yl, l)	/* pp 0 */
+	sll	a10, a10
+	add	a10, a10, a11
+	bgeu	a10, a11, 1f
+	addi	a6, a6, 1
+1:
+	/* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
+	   This is good enough to determine the low half of a6, so that any
+	   nonzero bits from the low word of the result can be collapsed
+	   into a6, freeing up a register.  */
+	movi	a9, 0
+	do_mul(a11, xl, l, yh, l)	/* pp 3 */
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	do_mul(a11, xl, h, yl, h)	/* pp 4 */
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	do_mul(a11, xh, l, yl, l)	/* pp 5 */
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Collapse any nonzero bits from the low word into a6.  */
+	beqz	a10, 1f
+	movi	a11, 1
+	or	a6, a6, a11
+1:
+	/* Add pp6-9 into a11 with carry-outs in a10.  */
+	do_mul(a7, xl, l, yh, h)	/* pp 6 */
+	do_mul(a11, xh, h, yl, l)	/* pp 9 */
+	movi	a10, 0
+	add	a11, a11, a7
+	bgeu	a11, a7, 1f
+	addi	a10, a10, 1
+1:	
+	do_mul(a7, xl, h, yh, l)	/* pp 7 */
+	add	a11, a11, a7
+	bgeu	a11, a7, 1f
+	addi	a10, a10, 1
+1:	
+	do_mul(a7, xh, l, yl, h)	/* pp 8 */
+	add	a11, a11, a7
+	bgeu	a11, a7, 1f
+	addi	a10, a10, 1
+1:	
+	/* Shift a10/a11 into position, and add low half of a11 to a6.  */
+	src	a10, a10, a11
+	add	a10, a10, a9
+	sll	a11, a11
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a10, a10, 1
+1:
+	/* Add pp10-12 into xl with carry-outs in a9.  */
+	movi	a9, 0
+	do_mul(xl, xl, h, yh, h)	/* pp 10 */
+	add	xl, xl, a10
+	bgeu	xl, a10, 1f
+	addi	a9, a9, 1
+1:
+	do_mul(a10, xh, l, yh, l)	/* pp 11 */
+	add	xl, xl, a10
+	bgeu	xl, a10, 1f
+	addi	a9, a9, 1
+1:
+	do_mul(a10, xh, h, yl, h)	/* pp 12 */
+	add	xl, xl, a10
+	bgeu	xl, a10, 1f
+	addi	a9, a9, 1
+1:
+	/* Add pp13-14 into a11 with carry-outs in a10.  */
+	do_mul(a11, xh, l, yh, h)	/* pp 13 */
+	do_mul(a7, xh, h, yh, l)	/* pp 14 */
+	movi	a10, 0
+	add	a11, a11, a7
+	bgeu	a11, a7, 1f
+	addi	a10, a10, 1
+1:
+	/* Shift a10/a11 into position, and add low half of a11 to a6.  */
+	src	a10, a10, a11
+	add	a10, a10, a9
+	sll	a11, a11
+	add	xl, xl, a11
+	bgeu	xl, a11, 1f
+	addi	a10, a10, 1
+1:
+	/* Compute xh.  */
+	do_mul(xh, xh, h, yh, h)	/* pp 15 */
+	add	xh, xh, a10
+
+	/* Restore values saved on the stack during the multiplication.  */
+	l32i	a7, sp, 4
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	l32i	a0, sp, 0
+	l32i	a8, sp, 8
+#endif
+#endif /* ! XCHAL_HAVE_MUL32_HIGH */
+
+	/* Shift left by 12 bits, unless there was a carry-out from the
+	   multiply, in which case, shift by 11 bits and increment the
+	   exponent.  Note: It is convenient to use the constant 0x3ff
+	   instead of 0x400 when removing the extra exponent bias (so that
+	   it is easy to construct 0x7fe for the overflow check).  Reverse
+	   the logic here to decrement the exponent sum by one unless there
+	   was a carry-out.  */
+	movi	a4, 11
+	srli	a5, xh, 21 - 12
+	bnez	a5, 1f
+	addi	a4, a4, 1
+	addi	a8, a8, -1
+1:	ssl	a4
+	src	xh, xh, xl
+	src	xl, xl, a6
+	sll	a6, a6
+
+	/* Subtract the extra bias from the exponent sum (plus one to account
+	   for the explicit "1.0" of the mantissa that will be added to the
+	   exponent in the final result).  */
+	movi	a4, 0x3ff
+	sub	a8, a8, a4
+	
+	/* Check for over/underflow.  The value in a8 is one less than the
+	   final exponent, so values in the range 0..7fd are OK here.  */
+	slli	a4, a4, 1	/* 0x7fe */
+	bgeu	a8, a4, .Lmul_overflow
+	
+.Lmul_round:
+	/* Round.  */
+	bgez	a6, .Lmul_rounded
+	addi	xl, xl, 1
+	beqz	xl, .Lmul_roundcarry
+	slli	a6, a6, 1
+	beqz	a6, .Lmul_exactlyhalf
+
+.Lmul_rounded:
+	/* Add the exponent to the mantissa.  */
+	slli	a8, a8, 20
+	add	xh, xh, a8
+
+.Lmul_addsign:
+	/* Add the sign bit.  */
+	srli	a7, a7, 31
+	slli	a7, a7, 31
+	or	xh, xh, a7
+
+.Lmul_done:
+#if __XTENSA_CALL0_ABI__
+	l32i	a12, sp, 16
+	l32i	a13, sp, 20
+	l32i	a14, sp, 24
+	l32i	a15, sp, 28
+	addi	sp, sp, 32
+#endif
+	leaf_return
+
+.Lmul_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	xl, xl, 1
+	slli	xl, xl, 1
+	j	.Lmul_rounded
+
+.Lmul_roundcarry:
+	/* xl is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	xh, xh, 1
+	/* Overflow is OK -- it will be added to the exponent.  */
+	j	.Lmul_rounded
+
+.Lmul_overflow:
+	bltz	a8, .Lmul_underflow
+	/* Return +/- Infinity.  */
+	addi	a8, a4, 1	/* 0x7ff */
+	slli	xh, a8, 20
+	movi	xl, 0
+	j	.Lmul_addsign
+
+.Lmul_underflow:
+	/* Create a subnormal value, where the exponent field contains zero,
+	   but the effective exponent is 1.  The value of a8 is one less than
+	   the actual exponent, so just negate it to get the shift amount.  */
+	neg	a8, a8
+	mov	a9, a6
+	ssr	a8
+	bgeui	a8, 32, .Lmul_bigshift
+	
+	/* Shift xh/xl right.  Any bits that are shifted out of xl are saved
+	   in a6 (combined with the shifted-out bits currently in a6) for
+	   rounding the result.  */
+	sll	a6, xl
+	src	xl, xh, xl
+	srl	xh, xh
+	j	1f
+
+.Lmul_bigshift:
+	bgeui	a8, 64, .Lmul_flush_to_zero
+	sll	a10, xl		/* lost bits shifted out of xl */
+	src	a6, xh, xl
+	srl	xl, xh
+	movi	xh, 0
+	or	a9, a9, a10
+
+	/* Set the exponent to zero.  */
+1:	movi	a8, 0
+
+	/* Pack any nonzero bits shifted out into a6.  */
+	beqz	a9, .Lmul_round
+	movi	a9, 1
+	or	a6, a6, a9
+	j	.Lmul_round
+	
+.Lmul_flush_to_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	movi	xl, 0
+	j	.Lmul_done
+
+#if XCHAL_NO_MUL
+	
+	/* For Xtensa processors with no multiply hardware, this simplified
+	   version of _mulsi3 is used for multiplying 16-bit chunks of
+	   the floating-point mantissas.  When using CALL0, this function
+	   uses a custom ABI: the inputs are passed in a13 and a14, the
+	   result is returned in a12, and a8 and a15 are clobbered.  */
+	.align	4
+.Lmul_mulsi3:
+	leaf_entry sp, 16
+	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+	movi	\dst, 0
+1:	add	\tmp1, \src2, \dst
+	extui	\tmp2, \src1, 0, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx2 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 1, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx4 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 2, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx8 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 3, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	srli	\src1, \src1, 4
+	slli	\src2, \src2, 4
+	bnez	\src1, 1b
+	.endm
+#if __XTENSA_CALL0_ABI__
+	mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+	/* The result will be written into a2, so save that argument in a4.  */
+	mov	a4, a2
+	mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+	leaf_return
+#endif /* XCHAL_NO_MUL */
+#endif /* L_muldf3 */
+
+#ifdef L_divdf3
+
+	/* Division */
+__divdf3_aux:
+
+	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+	   (This code is placed before the start of the function just to
+	   keep it in range of the limited branch displacements.)  */
+
+.Ldiv_yexpzero:
+	/* Clear the sign bit of y.  */
+	slli	yh, yh, 1
+	srli	yh, yh, 1
+
+	/* Check for division by zero.  */
+	or	a10, yh, yl
+	beqz	a10, .Ldiv_yzero
+
+	/* Normalize y.  Adjust the exponent in a9.  */
+	beqz	yh, .Ldiv_yh_zero
+	do_nsau	a10, yh, a11, a9
+	addi	a10, a10, -11
+	ssl	a10
+	src	yh, yh, yl
+	sll	yl, yl
+	movi	a9, 1
+	sub	a9, a9, a10
+	j	.Ldiv_ynormalized	
+.Ldiv_yh_zero:
+	do_nsau	a10, yl, a11, a9
+	addi	a10, a10, -11
+	movi	a9, -31
+	sub	a9, a9, a10
+	ssl	a10
+	bltz	a10, .Ldiv_yl_srl
+	sll	yh, yl
+	movi	yl, 0
+	j	.Ldiv_ynormalized
+.Ldiv_yl_srl:
+	srl	yh, yl
+	sll	yl, yl
+	j	.Ldiv_ynormalized	
+
+.Ldiv_yzero:
+	/* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
+	slli	xh, xh, 1
+	srli	xh, xh, 1
+	or	xl, xl, xh
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	or	xh, xh, a6
+	bnez	xl, 1f
+	movi	a4, 0x80000	/* make it a quiet NaN */
+	or	xh, xh, a4
+1:	movi	xl, 0
+	leaf_return
+
+.Ldiv_xexpzero:
+	/* Clear the sign bit of x.  */
+	slli	xh, xh, 1
+	srli	xh, xh, 1
+
+	/* If x is zero, return zero.  */
+	or	a10, xh, xl
+	beqz	a10, .Ldiv_return_zero
+
+	/* Normalize x.  Adjust the exponent in a8.  */
+	beqz	xh, .Ldiv_xh_zero
+	do_nsau	a10, xh, a11, a8
+	addi	a10, a10, -11
+	ssl	a10
+	src	xh, xh, xl
+	sll	xl, xl
+	movi	a8, 1
+	sub	a8, a8, a10
+	j	.Ldiv_xnormalized	
+.Ldiv_xh_zero:
+	do_nsau	a10, xl, a11, a8
+	addi	a10, a10, -11
+	movi	a8, -31
+	sub	a8, a8, a10
+	ssl	a10
+	bltz	a10, .Ldiv_xl_srl
+	sll	xh, xl
+	movi	xl, 0
+	j	.Ldiv_xnormalized
+.Ldiv_xl_srl:
+	srl	xh, xl
+	sll	xl, xl
+	j	.Ldiv_xnormalized
+	
+.Ldiv_return_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	movi	xl, 0
+	leaf_return
+
+.Ldiv_xnan_or_inf:
+	/* Set the sign bit of the result.  */
+	srli	a7, yh, 31
+	slli	a7, a7, 31
+	xor	xh, xh, a7
+	/* If y is NaN or Inf, return NaN.  */
+	bnall	yh, a6, 1f
+	movi	a4, 0x80000	/* make it a quiet NaN */
+	or	xh, xh, a4
+1:	leaf_return
+
+.Ldiv_ynan_or_inf:
+	/* If y is Infinity, return zero.  */
+	slli	a8, yh, 12
+	or	a8, a8, yl
+	beqz	a8, .Ldiv_return_zero
+	/* y is NaN; return it.  */
+	mov	xh, yh
+	mov	xl, yl
+	leaf_return
+
+.Ldiv_highequal1:
+	bltu	xl, yl, 2f
+	j	3f
+
+	.align	4
+	.global	__divdf3
+	.type	__divdf3, @function
+__divdf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+
+	/* Get the sign of the result.  */
+	xor	a7, xh, yh
+
+	/* Check for NaN and infinity.  */
+	ball	xh, a6, .Ldiv_xnan_or_inf
+	ball	yh, a6, .Ldiv_ynan_or_inf
+
+	/* Extract the exponents.  */
+	extui	a8, xh, 20, 11
+	extui	a9, yh, 20, 11
+
+	beqz	a9, .Ldiv_yexpzero
+.Ldiv_ynormalized:	
+	beqz	a8, .Ldiv_xexpzero
+.Ldiv_xnormalized:	
+
+	/* Subtract the exponents.  */
+	sub	a8, a8, a9
+
+	/* Replace sign/exponent fields with explicit "1.0".  */
+	movi	a10, 0x1fffff
+	or	xh, xh, a6
+	and	xh, xh, a10
+	or	yh, yh, a6
+	and	yh, yh, a10
+
+	/* Set SAR for left shift by one.  */
+	ssai	(32 - 1)
+
+	/* The first digit of the mantissa division must be a one.
+	   Shift x (and adjust the exponent) as needed to make this true.  */
+	bltu	yh, xh, 3f
+	beq	yh, xh, .Ldiv_highequal1
+2:	src	xh, xh, xl
+	sll	xl, xl
+	addi	a8, a8, -1
+3:
+	/* Do the first subtraction and shift.  */
+	sub	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, -1
+1:	sub	xl, xl, yl
+	src	xh, xh, xl
+	sll	xl, xl
+
+	/* Put the quotient into a10/a11.  */
+	movi	a10, 0
+	movi	a11, 1
+
+	/* Divide one bit at a time for 52 bits.  */
+	movi	a9, 52
+#if XCHAL_HAVE_LOOPS
+	loop	a9, .Ldiv_loopend
+#endif
+.Ldiv_loop:
+	/* Shift the quotient << 1.  */
+	src	a10, a10, a11
+	sll	a11, a11
+
+	/* Is this digit a 0 or 1?  */
+	bltu	xh, yh, 3f
+	beq	xh, yh, .Ldiv_highequal2
+
+	/* Output a 1 and subtract.  */
+2:	addi	a11, a11, 1
+	sub	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, -1
+1:	sub	xl, xl, yl
+
+	/* Shift the dividend << 1.  */
+3:	src	xh, xh, xl
+	sll	xl, xl
+
+#if !XCHAL_HAVE_LOOPS
+	addi	a9, a9, -1
+	bnez	a9, .Ldiv_loop
+#endif
+.Ldiv_loopend:
+
+	/* Add the exponent bias (less one to account for the explicit "1.0"
+	   of the mantissa that will be added to the exponent in the final
+	   result).  */
+	movi	a9, 0x3fe
+	add	a8, a8, a9
+	
+	/* Check for over/underflow.  The value in a8 is one less than the
+	   final exponent, so values in the range 0..7fd are OK here.  */
+	addmi	a9, a9, 0x400	/* 0x7fe */
+	bgeu	a8, a9, .Ldiv_overflow
+
+.Ldiv_round:
+	/* Round.  The remainder (<< 1) is in xh/xl.  */
+	bltu	xh, yh, .Ldiv_rounded
+	beq	xh, yh, .Ldiv_highequal3
+.Ldiv_roundup:
+	addi	a11, a11, 1
+	beqz	a11, .Ldiv_roundcarry
+
+.Ldiv_rounded:
+	mov	xl, a11
+	/* Add the exponent to the mantissa.  */
+	slli	a8, a8, 20
+	add	xh, a10, a8
+
+.Ldiv_addsign:
+	/* Add the sign bit.  */
+	srli	a7, a7, 31
+	slli	a7, a7, 31
+	or	xh, xh, a7
+	leaf_return
+
+.Ldiv_highequal2:
+	bgeu	xl, yl, 2b
+	j	3b
+
+.Ldiv_highequal3:
+	bltu	xl, yl, .Ldiv_rounded
+	bne	xl, yl, .Ldiv_roundup
+
+	/* Remainder is exactly half the divisor.  Round even.  */
+	addi	a11, a11, 1
+	beqz	a11, .Ldiv_roundcarry
+	srli	a11, a11, 1
+	slli	a11, a11, 1
+	j	.Ldiv_rounded
+
+.Ldiv_overflow:
+	bltz	a8, .Ldiv_underflow
+	/* Return +/- Infinity.  */
+	addi	a8, a9, 1	/* 0x7ff */
+	slli	xh, a8, 20
+	movi	xl, 0
+	j	.Ldiv_addsign
+
+.Ldiv_underflow:
+	/* Create a subnormal value, where the exponent field contains zero,
+	   but the effective exponent is 1.  The value of a8 is one less than
+	   the actual exponent, so just negate it to get the shift amount.  */
+	neg	a8, a8
+	ssr	a8
+	bgeui	a8, 32, .Ldiv_bigshift
+	
+	/* Shift a10/a11 right.  Any bits that are shifted out of a11 are
+	   saved in a6 for rounding the result.  */
+	sll	a6, a11
+	src	a11, a10, a11
+	srl	a10, a10
+	j	1f
+
+.Ldiv_bigshift:
+	bgeui	a8, 64, .Ldiv_flush_to_zero
+	sll	a9, a11		/* lost bits shifted out of a11 */
+	src	a6, a10, a11
+	srl	a11, a10
+	movi	a10, 0
+	or	xl, xl, a9
+
+	/* Set the exponent to zero.  */
+1:	movi	a8, 0
+
+	/* Pack any nonzero remainder (in xh/xl) into a6.  */
+	or	xh, xh, xl
+	beqz	xh, 1f
+	movi	a9, 1
+	or	a6, a6, a9
+	
+	/* Round a10/a11 based on the bits shifted out into a6.  */
+1:	bgez	a6, .Ldiv_rounded
+	addi	a11, a11, 1
+	beqz	a11, .Ldiv_roundcarry
+	slli	a6, a6, 1
+	bnez	a6, .Ldiv_rounded
+	srli	a11, a11, 1
+	slli	a11, a11, 1
+	j	.Ldiv_rounded
+
+.Ldiv_roundcarry:
+	/* a11 is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	a10, a10, 1
+	/* Overflow to the exponent field is OK.  */
+	j	.Ldiv_rounded
+
+.Ldiv_flush_to_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	movi	xl, 0
+	leaf_return
+
+#endif /* L_divdf3 */
+
+#ifdef L_cmpdf2
+
+	/* Equal and Not Equal */
+
+	.align	4
+	.global	__eqdf2
+	.global	__nedf2
+	.set	__nedf2, __eqdf2
+	.type	__eqdf2, @function
+__eqdf2:
+	leaf_entry sp, 16
+	bne	xl, yl, 2f
+	bne	xh, yh, 4f
+
+	/* The values are equal but NaN != NaN.  Check the exponent.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 3f
+
+	/* Equal.  */
+	movi	a2, 0
+	leaf_return
+
+	/* Not equal.  */
+2:	movi	a2, 1
+	leaf_return
+
+	/* Check if the mantissas are nonzero.  */
+3:	slli	a7, xh, 12
+	or	a7, a7, xl
+	j	5f
+
+	/* Check if x and y are zero with different signs.  */
+4:	or	a7, xh, yh
+	slli	a7, a7, 1
+	or	a7, a7, xl	/* xl == yl here */
+
+	/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
+	   or x when exponent(x) = 0x7ff and x == y.  */
+5:	movi	a2, 0
+	movi	a3, 1
+	movnez	a2, a3, a7	
+	leaf_return
+
+
+	/* Greater Than */
+
+	.align	4
+	.global	__gtdf2
+	.type	__gtdf2, @function
+__gtdf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 2f
+1:	bnall	yh, a6, .Lle_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, .Lle_cmp
+	movi	a2, 0
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, 0
+	leaf_return
+
+
+	/* Less Than or Equal */
+
+	.align	4
+	.global	__ledf2
+	.type	__ledf2, @function
+__ledf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 2f
+1:	bnall	yh, a6, .Lle_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, .Lle_cmp
+	movi	a2, 1
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, 1
+	leaf_return
+
+.Lle_cmp:
+	/* Check if x and y have different signs.  */
+	xor	a7, xh, yh
+	bltz	a7, .Lle_diff_signs
+
+	/* Check if x is negative.  */
+	bltz	xh, .Lle_xneg
+
+	/* Check if x <= y.  */
+	bltu	xh, yh, 4f
+	bne	xh, yh, 5f
+	bltu	yl, xl, 5f
+4:	movi	a2, 0
+	leaf_return
+
+.Lle_xneg:
+	/* Check if y <= x.  */
+	bltu	yh, xh, 4b
+	bne	yh, xh, 5f
+	bgeu	xl, yl, 4b
+5:	movi	a2, 1
+	leaf_return
+
+.Lle_diff_signs:
+	bltz	xh, 4b
+
+	/* Check if both x and y are zero.  */
+	or	a7, xh, yh
+	slli	a7, a7, 1
+	or	a7, a7, xl
+	or	a7, a7, yl
+	movi	a2, 1
+	movi	a3, 0
+	moveqz	a2, a3, a7
+	leaf_return
+
+
+	/* Greater Than or Equal */
+
+	.align	4
+	.global	__gedf2
+	.type	__gedf2, @function
+__gedf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 2f
+1:	bnall	yh, a6, .Llt_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, .Llt_cmp
+	movi	a2, -1
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, -1
+	leaf_return
+
+
+	/* Less Than */
+
+	.align	4
+	.global	__ltdf2
+	.type	__ltdf2, @function
+__ltdf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 2f
+1:	bnall	yh, a6, .Llt_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, .Llt_cmp
+	movi	a2, 0
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, 0
+	leaf_return
+
+.Llt_cmp:
+	/* Check if x and y have different signs.  */
+	xor	a7, xh, yh
+	bltz	a7, .Llt_diff_signs
+
+	/* Check if x is negative.  */
+	bltz	xh, .Llt_xneg
+
+	/* Check if x < y.  */
+	bltu	xh, yh, 4f
+	bne	xh, yh, 5f
+	bgeu	xl, yl, 5f
+4:	movi	a2, -1
+	leaf_return
+
+.Llt_xneg:
+	/* Check if y < x.  */
+	bltu	yh, xh, 4b
+	bne	yh, xh, 5f
+	bltu	yl, xl, 4b
+5:	movi	a2, 0
+	leaf_return
+
+.Llt_diff_signs:
+	bgez	xh, 5b
+
+	/* Check if both x and y are nonzero.  */
+	or	a7, xh, yh
+	slli	a7, a7, 1
+	or	a7, a7, xl
+	or	a7, a7, yl
+	movi	a2, 0
+	movi	a3, -1
+	movnez	a2, a3, a7
+	leaf_return
+
+
+	/* Unordered */
+
+	.align	4
+	.global	__unorddf2
+	.type	__unorddf2, @function
+__unorddf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 3f
+1:	ball	yh, a6, 4f
+2:	movi	a2, 0
+	leaf_return
+
+3:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, 1
+	leaf_return
+
+4:	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, 2b
+	movi	a2, 1
+	leaf_return
+
+#endif /* L_cmpdf2 */
+
+#ifdef L_fixdfsi
+
+	.align	4
+	.global	__fixdfsi
+	.type	__fixdfsi, @function
+__fixdfsi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, .Lfixdfsi_nan_or_inf
+
+	/* Extract the exponent and check if 0 < (exp - 0x3fe) < 32.  */
+	extui	a4, xh, 20, 11
+	extui	a5, a6, 19, 10	/* 0x3fe */
+	sub	a4, a4, a5
+	bgei	a4, 32, .Lfixdfsi_maxint
+	blti	a4, 1, .Lfixdfsi_zero
+
+	/* Add explicit "1.0" and shift << 11.  */
+	or	a7, xh, a6
+	ssai	(32 - 11)
+	src	a5, a7, xl
+
+	/* Shift back to the right, based on the exponent.  */
+	ssl	a4		/* shift by 32 - a4 */
+	srl	a5, a5
+
+	/* Negate the result if sign != 0.  */
+	neg	a2, a5
+	movgez	a2, a5, a7
+	leaf_return
+
+.Lfixdfsi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, xh, 12
+	or	a4, a4, xl
+	beqz	a4, .Lfixdfsi_maxint
+
+	/* Translate NaN to +maxint.  */
+	movi	xh, 0
+
+.Lfixdfsi_maxint:
+	slli	a4, a6, 11	/* 0x80000000 */
+	addi	a5, a4, -1	/* 0x7fffffff */
+	movgez	a4, a5, xh
+	mov	a2, a4
+	leaf_return
+
+.Lfixdfsi_zero:
+	movi	a2, 0
+	leaf_return
+
+#endif /* L_fixdfsi */
+
+#ifdef L_fixdfdi
+
+	.align	4
+	.global	__fixdfdi
+	.type	__fixdfdi, @function
+__fixdfdi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, .Lfixdfdi_nan_or_inf
+
+	/* Extract the exponent and check if 0 < (exp - 0x3fe) < 64.  */
+	extui	a4, xh, 20, 11
+	extui	a5, a6, 19, 10	/* 0x3fe */
+	sub	a4, a4, a5
+	bgei	a4, 64, .Lfixdfdi_maxint
+	blti	a4, 1, .Lfixdfdi_zero
+
+	/* Add explicit "1.0" and shift << 11.  */
+	or	a7, xh, a6
+	ssai	(32 - 11)
+	src	xh, a7, xl
+	sll	xl, xl
+
+	/* Shift back to the right, based on the exponent.  */
+	ssl	a4		/* shift by 64 - a4 */
+	bgei	a4, 32, .Lfixdfdi_smallshift
+	srl	xl, xh
+	movi	xh, 0
+
+.Lfixdfdi_shifted:	
+	/* Negate the result if sign != 0.  */
+	bgez	a7, 1f
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, 1f
+	addi	xh, xh, -1
+1:	leaf_return
+
+.Lfixdfdi_smallshift:
+	src	xl, xh, xl
+	srl	xh, xh
+	j	.Lfixdfdi_shifted
+
+.Lfixdfdi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, xh, 12
+	or	a4, a4, xl
+	beqz	a4, .Lfixdfdi_maxint
+
+	/* Translate NaN to +maxint.  */
+	movi	xh, 0
+
+.Lfixdfdi_maxint:
+	slli	a7, a6, 11	/* 0x80000000 */
+	bgez	xh, 1f
+	mov	xh, a7
+	movi	xl, 0
+	leaf_return
+
+1:	addi	xh, a7, -1	/* 0x7fffffff */
+	movi	xl, -1
+	leaf_return
+
+.Lfixdfdi_zero:
+	movi	xh, 0
+	movi	xl, 0
+	leaf_return
+
+#endif /* L_fixdfdi */
+
+#ifdef L_fixunsdfsi
+
+	.align	4
+	.global	__fixunsdfsi
+	.type	__fixunsdfsi, @function
+__fixunsdfsi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, .Lfixunsdfsi_nan_or_inf
+
+	/* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32.  */
+	extui	a4, xh, 20, 11
+	extui	a5, a6, 20, 10	/* 0x3ff */
+	sub	a4, a4, a5
+	bgei	a4, 32, .Lfixunsdfsi_maxint
+	bltz	a4, .Lfixunsdfsi_zero
+
+	/* Add explicit "1.0" and shift << 11.  */
+	or	a7, xh, a6
+	ssai	(32 - 11)
+	src	a5, a7, xl
+
+	/* Shift back to the right, based on the exponent.  */
+	addi	a4, a4, 1
+	beqi	a4, 32, .Lfixunsdfsi_bigexp
+	ssl	a4		/* shift by 32 - a4 */
+	srl	a5, a5
+
+	/* Negate the result if sign != 0.  */
+	neg	a2, a5
+	movgez	a2, a5, a7
+	leaf_return
+
+.Lfixunsdfsi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, xh, 12
+	or	a4, a4, xl
+	beqz	a4, .Lfixunsdfsi_maxint
+
+	/* Translate NaN to 0xffffffff.  */
+	movi	a2, -1
+	leaf_return
+
+.Lfixunsdfsi_maxint:
+	slli	a4, a6, 11	/* 0x80000000 */
+	movi	a5, -1		/* 0xffffffff */
+	movgez	a4, a5, xh
+	mov	a2, a4
+	leaf_return
+
+.Lfixunsdfsi_zero:
+	movi	a2, 0
+	leaf_return
+
+.Lfixunsdfsi_bigexp:
+	/* Handle unsigned maximum exponent case.  */
+	bltz	xh, 1f
+	mov	a2, a5		/* no shift needed */
+	leaf_return
+
+	/* Return 0x80000000 if negative.  */
+1:	slli	a2, a6, 11
+	leaf_return
+
+#endif /* L_fixunsdfsi */
+
+#ifdef L_fixunsdfdi
+
+	.align	4
+	.global	__fixunsdfdi
+	.type	__fixunsdfdi, @function
+__fixunsdfdi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, .Lfixunsdfdi_nan_or_inf
+
+	/* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64.  */
+	extui	a4, xh, 20, 11
+	extui	a5, a6, 20, 10	/* 0x3ff */
+	sub	a4, a4, a5
+	bgei	a4, 64, .Lfixunsdfdi_maxint
+	bltz	a4, .Lfixunsdfdi_zero
+
+	/* Add explicit "1.0" and shift << 11.  */
+	or	a7, xh, a6
+	ssai	(32 - 11)
+	src	xh, a7, xl
+	sll	xl, xl
+
+	/* Shift back to the right, based on the exponent.  */
+	addi	a4, a4, 1
+	beqi	a4, 64, .Lfixunsdfdi_bigexp
+	ssl	a4		/* shift by 64 - a4 */
+	bgei	a4, 32, .Lfixunsdfdi_smallshift
+	srl	xl, xh
+	movi	xh, 0
+
+.Lfixunsdfdi_shifted:
+	/* Negate the result if sign != 0.  */
+	bgez	a7, 1f
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, 1f
+	addi	xh, xh, -1
+1:	leaf_return
+
+.Lfixunsdfdi_smallshift:
+	src	xl, xh, xl
+	srl	xh, xh
+	j	.Lfixunsdfdi_shifted
+
+.Lfixunsdfdi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, xh, 12
+	or	a4, a4, xl
+	beqz	a4, .Lfixunsdfdi_maxint
+
+	/* Translate NaN to 0xffffffff.... */
+1:	movi	xh, -1
+	movi	xl, -1
+	leaf_return
+
+.Lfixunsdfdi_maxint:
+	bgez	xh, 1b
+2:	slli	xh, a6, 11	/* 0x80000000 */
+	movi	xl, 0
+	leaf_return
+
+.Lfixunsdfdi_zero:
+	movi	xh, 0
+	movi	xl, 0
+	leaf_return
+
+.Lfixunsdfdi_bigexp:
+	/* Handle unsigned maximum exponent case.  */
+	bltz	a7, 2b
+	leaf_return		/* no shift needed */
+
+#endif /* L_fixunsdfdi */
+
+#ifdef L_floatsidf
+
+	.align	4
+	.global	__floatunsidf
+	.type	__floatunsidf, @function
+__floatunsidf:
+	leaf_entry sp, 16
+	beqz	a2, .Lfloatsidf_return_zero
+
+	/* Set the sign to zero and jump to the floatsidf code.  */
+	movi	a7, 0
+	j	.Lfloatsidf_normalize
+
+	.align	4
+	.global	__floatsidf
+	.type	__floatsidf, @function
+__floatsidf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	beqz	a2, .Lfloatsidf_return_zero
+
+	/* Save the sign.  */
+	extui	a7, a2, 31, 1
+
+	/* Get the absolute value.  */
+#if XCHAL_HAVE_ABS
+	abs	a2, a2
+#else
+	neg	a4, a2
+	movltz	a2, a4, a2
+#endif
+
+.Lfloatsidf_normalize:
+	/* Normalize with the first 1 bit in the msb.  */
+	do_nsau	a4, a2, a5, a6
+	ssl	a4
+	sll	a5, a2
+
+	/* Shift the mantissa into position.  */
+	srli	xh, a5, 11
+	slli	xl, a5, (32 - 11)
+
+	/* Set the exponent.  */
+	movi	a5, 0x41d	/* 0x3fe + 31 */
+	sub	a5, a5, a4
+	slli	a5, a5, 20
+	add	xh, xh, a5
+
+	/* Add the sign and return. */
+	slli	a7, a7, 31
+	or	xh, xh, a7
+	leaf_return
+
+.Lfloatsidf_return_zero:
+	movi	a3, 0
+	leaf_return
+
+#endif /* L_floatsidf */
+
+#ifdef L_floatdidf
+
+	.align	4
+	.global	__floatundidf
+	.type	__floatundidf, @function
+__floatundidf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	or	a4, xh, xl
+	beqz	a4, 2f
+
+	/* Set the sign to zero and jump to the floatdidf code.  */
+	movi	a7, 0
+	j	.Lfloatdidf_normalize
+
+	.align	4
+	.global	__floatdidf
+	.type	__floatdidf, @function
+__floatdidf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	or	a4, xh, xl
+	beqz	a4, 2f
+
+	/* Save the sign.  */
+	extui	a7, xh, 31, 1
+
+	/* Get the absolute value.  */
+	bgez	xh, .Lfloatdidf_normalize
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, .Lfloatdidf_normalize
+	addi	xh, xh, -1
+
+.Lfloatdidf_normalize:
+	/* Normalize with the first 1 bit in the msb of xh.  */
+	beqz	xh, .Lfloatdidf_bigshift
+	do_nsau	a4, xh, a5, a6
+	ssl	a4
+	src	xh, xh, xl
+	sll	xl, xl
+
+.Lfloatdidf_shifted:
+	/* Shift the mantissa into position, with rounding bits in a6.  */
+	ssai	11
+	sll	a6, xl
+	src	xl, xh, xl
+	srl	xh, xh
+
+	/* Set the exponent.  */
+	movi	a5, 0x43d	/* 0x3fe + 63 */
+	sub	a5, a5, a4
+	slli	a5, a5, 20
+	add	xh, xh, a5
+
+	/* Add the sign.  */
+	slli	a7, a7, 31
+	or	xh, xh, a7
+
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a6, 2f
+	addi	xl, xl, 1
+	beqz	xl, .Lfloatdidf_roundcarry
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a6, a6, 1
+	beqz	a6, .Lfloatdidf_exactlyhalf
+2:	leaf_return
+
+.Lfloatdidf_bigshift:
+	/* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
+	do_nsau	a4, xl, a5, a6
+	ssl	a4
+	sll	xh, xl
+	movi	xl, 0
+	addi	a4, a4, 32
+	j	.Lfloatdidf_shifted
+
+.Lfloatdidf_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	xl, xl, 1
+	slli	xl, xl, 1
+	leaf_return
+
+.Lfloatdidf_roundcarry:
+	/* xl is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	xh, xh, 1
+	/* Overflow to the exponent is OK.  */
+	leaf_return
+
+#endif /* L_floatdidf */
+
+#ifdef L_truncdfsf2
+
+	.align	4
+	.global	__truncdfsf2
+	.type	__truncdfsf2, @function
+__truncdfsf2:
+	leaf_entry sp, 16
+
+	/* Adjust the exponent bias.  */
+	movi	a4, (0x3ff - 0x7f) << 20
+	sub	a5, xh, a4
+
+	/* Check for underflow.  */
+	xor	a6, xh, a5
+	bltz	a6, .Ltrunc_underflow
+	extui	a6, a5, 20, 11
+	beqz	a6, .Ltrunc_underflow
+
+	/* Check for overflow.  */
+	movi	a4, 255
+	bge	a6, a4, .Ltrunc_overflow
+
+	/* Shift a5/xl << 3 into a5/a4.  */
+	ssai	(32 - 3)
+	src	a5, a5, xl
+	sll	a4, xl
+
+.Ltrunc_addsign:
+	/* Add the sign bit.  */
+	extui	a6, xh, 31, 1
+	slli	a6, a6, 31
+	or	a2, a6, a5
+
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a4, 1f
+	addi	a2, a2, 1
+	/* Overflow to the exponent is OK.  The answer will be correct.  */
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a4, a4, 1
+	beqz	a4, .Ltrunc_exactlyhalf
+1:	leaf_return
+
+.Ltrunc_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+.Ltrunc_overflow:
+	/* Check if exponent == 0x7ff.  */
+	movi	a4, 0x7ff00000
+	bnall	xh, a4, 1f
+
+	/* Check if mantissa is nonzero.  */
+	slli	a5, xh, 12
+	or	a5, a5, xl
+	beqz	a5, 1f
+
+	/* Shift a4 to set a bit in the mantissa, making a quiet NaN.  */
+	srli	a4, a4, 1
+
+1:	slli	a4, a4, 4	/* 0xff000000 or 0xff800000 */
+	/* Add the sign bit.  */
+	extui	a6, xh, 31, 1
+	ssai	1
+	src	a2, a6, a4
+	leaf_return
+
+.Ltrunc_underflow:
+	/* Find shift count for a subnormal.  Flush to zero if >= 32.  */
+	extui	a6, xh, 20, 11
+	movi	a5, 0x3ff - 0x7f
+	sub	a6, a5, a6
+	addi	a6, a6, 1
+	bgeui	a6, 32, 1f
+
+	/* Replace the exponent with an explicit "1.0".  */
+	slli	a5, a5, 13	/* 0x700000 */
+	or	a5, a5, xh
+	slli	a5, a5, 11
+	srli	a5, a5, 11
+
+	/* Shift the mantissa left by 3 bits (into a5/a4).  */
+	ssai	(32 - 3)
+	src	a5, a5, xl
+	sll	a4, xl
+
+	/* Shift right by a6.  */
+	ssr	a6
+	sll	a7, a4
+	src	a4, a5, a4
+	srl	a5, a5
+	beqz	a7, .Ltrunc_addsign
+	or	a4, a4, a6	/* any positive, nonzero value will work */
+	j	.Ltrunc_addsign
+
+	/* Return +/- zero.  */
+1:	extui	a2, xh, 31, 1
+	slli	a2, a2, 31
+	leaf_return
+
+#endif /* L_truncdfsf2 */
+
+#ifdef L_extendsfdf2
+
+	.align	4
+	.global	__extendsfdf2
+	.type	__extendsfdf2, @function
+__extendsfdf2:
+	leaf_entry sp, 16
+
+	/* Save the sign bit and then shift it off.  */
+	extui	a5, a2, 31, 1
+	slli	a5, a5, 31
+	slli	a4, a2, 1
+
+	/* Extract and check the exponent.  */
+	extui	a6, a2, 23, 8
+	beqz	a6, .Lextend_expzero
+	addi	a6, a6, 1
+	beqi	a6, 256, .Lextend_nan_or_inf
+
+	/* Shift >> 3 into a4/xl.  */
+	srli	a4, a4, 4
+	slli	xl, a2, (32 - 3)
+
+	/* Adjust the exponent bias.  */
+	movi	a6, (0x3ff - 0x7f) << 20
+	add	a4, a4, a6
+
+	/* Add the sign bit.  */
+	or	xh, a4, a5
+	leaf_return
+
+.Lextend_nan_or_inf:
+	movi	a4, 0x7ff00000
+
+	/* Check for NaN.  */
+	slli	a7, a2, 9
+	beqz	a7, 1f
+
+	slli	a6, a6, 11	/* 0x80000 */
+	or	a4, a4, a6
+
+	/* Add the sign and return.  */
+1:	or	xh, a4, a5
+	movi	xl, 0
+	leaf_return
+
+.Lextend_expzero:
+	beqz	a4, 1b
+
+	/* Normalize it to have 8 zero bits before the first 1 bit.  */
+	do_nsau	a7, a4, a2, a3
+	addi	a7, a7, -8
+	ssl	a7
+	sll	a4, a4
+	
+	/* Shift >> 3 into a4/xl.  */
+	slli	xl, a4, (32 - 3)
+	srli	a4, a4, 3
+
+	/* Set the exponent.  */
+	movi	a6, 0x3fe - 0x7f
+	sub	a6, a6, a7
+	slli	a6, a6, 20
+	add	a4, a4, a6
+
+	/* Add the sign and return.  */
+	or	xh, a4, a5
+	leaf_return
+
+#endif /* L_extendsfdf2 */
+
+
diff --git a/libgcc/config/xtensa/ieee754-sf.S b/libgcc/config/xtensa/ieee754-sf.S
new file mode 100644
index 00000000000..d75be0e5ae5
--- /dev/null
+++ b/libgcc/config/xtensa/ieee754-sf.S
@@ -0,0 +1,1757 @@
+/* IEEE-754 single-precision functions for Xtensa
+   Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef __XTENSA_EB__
+#define xh a2
+#define xl a3
+#define yh a4
+#define yl a5
+#else
+#define xh a3
+#define xl a2
+#define yh a5
+#define yl a4
+#endif
+
+/*  Warning!  The branch displacements for some Xtensa branch instructions
+    are quite small, and this code has been carefully laid out to keep
+    branch targets in range.  If you change anything, be sure to check that
+    the assembler is not relaxing anything to branch over a jump.  */
+
+#ifdef L_negsf2
+
+	.align	4
+	.global	__negsf2
+	.type	__negsf2, @function
+__negsf2:
+	leaf_entry sp, 16
+	movi	a4, 0x80000000
+	xor	a2, a2, a4
+	leaf_return
+
+#endif /* L_negsf2 */
+
+#ifdef L_addsubsf3
+
+	/* Addition */
+__addsf3_aux:
+
+	/* Handle NaNs and Infinities.  (This code is placed before the
+	   start of the function just to keep it in range of the limited
+	   branch displacements.)  */
+
+.Ladd_xnan_or_inf:
+	/* If y is neither Infinity nor NaN, return x.  */
+	bnall	a3, a6, 1f
+	/* If x is a NaN, return it.  Otherwise, return y.  */
+	slli	a7, a2, 9
+	beqz	a7, .Ladd_ynan_or_inf
+1:	leaf_return
+
+.Ladd_ynan_or_inf:
+	/* Return y.  */
+	mov	a2, a3
+	leaf_return
+
+.Ladd_opposite_signs:
+	/* Operand signs differ.  Do a subtraction.  */
+	slli	a7, a6, 8
+	xor	a3, a3, a7
+	j	.Lsub_same_sign
+
+	.align	4
+	.global	__addsf3
+	.type	__addsf3, @function
+__addsf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+
+	/* Check if the two operands have the same sign.  */
+	xor	a7, a2, a3
+	bltz	a7, .Ladd_opposite_signs
+
+.Ladd_same_sign:	
+	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
+	ball	a2, a6, .Ladd_xnan_or_inf
+	ball	a3, a6, .Ladd_ynan_or_inf
+
+	/* Compare the exponents.  The smaller operand will be shifted
+	   right by the exponent difference and added to the larger
+	   one.  */
+	extui	a7, a2, 23, 9
+	extui	a8, a3, 23, 9
+	bltu	a7, a8, .Ladd_shiftx
+
+.Ladd_shifty:
+	/* Check if the smaller (or equal) exponent is zero.  */
+	bnone	a3, a6, .Ladd_yexpzero
+
+	/* Replace y sign/exponent with 0x008.  */
+	or	a3, a3, a6
+	slli	a3, a3, 8
+	srli	a3, a3, 8
+
+.Ladd_yexpdiff:
+	/* Compute the exponent difference.  */
+	sub	a10, a7, a8
+
+	/* Exponent difference > 32 -- just return the bigger value.  */
+	bgeui	a10, 32, 1f
+	
+	/* Shift y right by the exponent difference.  Any bits that are
+	   shifted out of y are saved in a9 for rounding the result.  */
+	ssr	a10
+	movi	a9, 0
+	src	a9, a3, a9
+	srl	a3, a3
+
+	/* Do the addition.  */
+	add	a2, a2, a3
+
+	/* Check if the add overflowed into the exponent.  */
+	extui	a10, a2, 23, 9
+	beq	a10, a7, .Ladd_round
+	mov	a8, a7
+	j	.Ladd_carry
+
+.Ladd_yexpzero:
+	/* y is a subnormal value.  Replace its sign/exponent with zero,
+	   i.e., no implicit "1.0", and increment the apparent exponent
+	   because subnormals behave as if they had the minimum (nonzero)
+	   exponent.  Test for the case when both exponents are zero.  */
+	slli	a3, a3, 9
+	srli	a3, a3, 9
+	bnone	a2, a6, .Ladd_bothexpzero
+	addi	a8, a8, 1
+	j	.Ladd_yexpdiff
+
+.Ladd_bothexpzero:
+	/* Both exponents are zero.  Handle this as a special case.  There
+	   is no need to shift or round, and the normal code for handling
+	   a carry into the exponent field will not work because it
+	   assumes there is an implicit "1.0" that needs to be added.  */
+	add	a2, a2, a3
+1:	leaf_return
+
+.Ladd_xexpzero:
+	/* Same as "yexpzero" except skip handling the case when both
+	   exponents are zero.  */
+	slli	a2, a2, 9
+	srli	a2, a2, 9
+	addi	a7, a7, 1
+	j	.Ladd_xexpdiff
+
+.Ladd_shiftx:
+	/* Same thing as the "shifty" code, but with x and y swapped.  Also,
+	   because the exponent difference is always nonzero in this version,
+	   the shift sequence can use SLL and skip loading a constant zero.  */
+	bnone	a2, a6, .Ladd_xexpzero
+
+	or	a2, a2, a6
+	slli	a2, a2, 8
+	srli	a2, a2, 8
+
+.Ladd_xexpdiff:
+	sub	a10, a8, a7
+	bgeui	a10, 32, .Ladd_returny
+	
+	ssr	a10
+	sll	a9, a2
+	srl	a2, a2
+
+	add	a2, a2, a3
+
+	/* Check if the add overflowed into the exponent.  */
+	extui	a10, a2, 23, 9
+	bne	a10, a8, .Ladd_carry
+
+.Ladd_round:
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a9, 1f
+	addi	a2, a2, 1
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a9, a9, 1
+	beqz	a9, .Ladd_exactlyhalf
+1:	leaf_return
+
+.Ladd_returny:
+	mov	a2, a3
+	leaf_return
+
+.Ladd_carry:	
+	/* The addition has overflowed into the exponent field, so the
+	   value needs to be renormalized.  The mantissa of the result
+	   can be recovered by subtracting the original exponent and
+	   adding 0x800000 (which is the explicit "1.0" for the
+	   mantissa of the non-shifted operand -- the "1.0" for the
+	   shifted operand was already added).  The mantissa can then
+	   be shifted right by one bit.  The explicit "1.0" of the
+	   shifted mantissa then needs to be replaced by the exponent,
+	   incremented by one to account for the normalizing shift.
+	   It is faster to combine these operations: do the shift first
+	   and combine the additions and subtractions.  If x is the
+	   original exponent, the result is:
+	       shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
+	   or:
+	       shifted mantissa + ((x + 1) << 22)
+	   Note that the exponent is incremented here by leaving the
+	   explicit "1.0" of the mantissa in the exponent field.  */
+
+	/* Shift x right by one bit.  Save the lsb.  */
+	mov	a10, a2
+	srli	a2, a2, 1
+
+	/* See explanation above.  The original exponent is in a8.  */
+	addi	a8, a8, 1
+	slli	a8, a8, 22
+	add	a2, a2, a8
+
+	/* Return an Infinity if the exponent overflowed.  */
+	ball	a2, a6, .Ladd_infinity
+	
+	/* Same thing as the "round" code except the msb of the leftover
+	   fraction is bit 0 of a10, with the rest of the fraction in a9.  */
+	bbci.l	a10, 0, 1f
+	addi	a2, a2, 1
+	beqz	a9, .Ladd_exactlyhalf
+1:	leaf_return
+
+.Ladd_infinity:
+	/* Clear the mantissa.  */
+	srli	a2, a2, 23
+	slli	a2, a2, 23
+
+	/* The sign bit may have been lost in a carry-out.  Put it back.  */
+	slli	a8, a8, 1
+	or	a2, a2, a8
+	leaf_return
+
+.Ladd_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+
+	/* Subtraction */
+__subsf3_aux:
+	
+	/* Handle NaNs and Infinities.  (This code is placed before the
+	   start of the function just to keep it in range of the limited
+	   branch displacements.)  */
+
+.Lsub_xnan_or_inf:
+	/* If y is neither Infinity nor NaN, return x.  */
+	bnall	a3, a6, 1f
+	/* Both x and y are either NaN or Inf, so the result is NaN.  */
+	movi	a4, 0x400000	/* make it a quiet NaN */
+	or	a2, a2, a4
+1:	leaf_return
+
+.Lsub_ynan_or_inf:
+	/* Negate y and return it.  */
+	slli	a7, a6, 8
+	xor	a2, a3, a7
+	leaf_return
+
+.Lsub_opposite_signs:
+	/* Operand signs differ.  Do an addition.  */
+	slli	a7, a6, 8
+	xor	a3, a3, a7
+	j	.Ladd_same_sign
+
+	.align	4
+	.global	__subsf3
+	.type	__subsf3, @function
+__subsf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+
+	/* Check if the two operands have the same sign.  */
+	xor	a7, a2, a3
+	bltz	a7, .Lsub_opposite_signs
+
+.Lsub_same_sign:	
+	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
+	ball	a2, a6, .Lsub_xnan_or_inf
+	ball	a3, a6, .Lsub_ynan_or_inf
+
+	/* Compare the operands.  In contrast to addition, the entire
+	   value matters here.  */
+	extui	a7, a2, 23, 8
+	extui	a8, a3, 23, 8
+	bltu	a2, a3, .Lsub_xsmaller
+
+.Lsub_ysmaller:
+	/* Check if the smaller (or equal) exponent is zero.  */
+	bnone	a3, a6, .Lsub_yexpzero
+
+	/* Replace y sign/exponent with 0x008.  */
+	or	a3, a3, a6
+	slli	a3, a3, 8
+	srli	a3, a3, 8
+
+.Lsub_yexpdiff:
+	/* Compute the exponent difference.  */
+	sub	a10, a7, a8
+
+	/* Exponent difference > 32 -- just return the bigger value.  */
+	bgeui	a10, 32, 1f
+	
+	/* Shift y right by the exponent difference.  Any bits that are
+	   shifted out of y are saved in a9 for rounding the result.  */
+	ssr	a10
+	movi	a9, 0
+	src	a9, a3, a9
+	srl	a3, a3
+
+	sub	a2, a2, a3
+
+	/* Subtract the leftover bits in a9 from zero and propagate any
+	   borrow from a2.  */
+	neg	a9, a9
+	addi	a10, a2, -1
+	movnez	a2, a10, a9
+
+	/* Check if the subtract underflowed into the exponent.  */
+	extui	a10, a2, 23, 8
+	beq	a10, a7, .Lsub_round
+	j	.Lsub_borrow
+
+.Lsub_yexpzero:
+	/* Return zero if the inputs are equal.  (For the non-subnormal
+	   case, subtracting the "1.0" will cause a borrow from the exponent
+	   and this case can be detected when handling the borrow.)  */
+	beq	a2, a3, .Lsub_return_zero
+
+	/* y is a subnormal value.  Replace its sign/exponent with zero,
+	   i.e., no implicit "1.0".  Unless x is also a subnormal, increment
+	   y's apparent exponent because subnormals behave as if they had
+	   the minimum (nonzero) exponent.  */
+	slli	a3, a3, 9
+	srli	a3, a3, 9
+	bnone	a2, a6, .Lsub_yexpdiff
+	addi	a8, a8, 1
+	j	.Lsub_yexpdiff
+
+.Lsub_returny:
+	/* Negate and return y.  */
+	slli	a7, a6, 8
+	xor	a2, a3, a7
+1:	leaf_return
+
+.Lsub_xsmaller:
+	/* Same thing as the "ysmaller" code, but with x and y swapped and
+	   with y negated.  */
+	bnone	a2, a6, .Lsub_xexpzero
+
+	or	a2, a2, a6
+	slli	a2, a2, 8
+	srli	a2, a2, 8
+
+.Lsub_xexpdiff:
+	sub	a10, a8, a7
+	bgeui	a10, 32, .Lsub_returny
+	
+	ssr	a10
+	movi	a9, 0
+	src	a9, a2, a9
+	srl	a2, a2
+
+	/* Negate y.  */
+	slli	a11, a6, 8
+	xor	a3, a3, a11
+
+	sub	a2, a3, a2
+
+	neg	a9, a9
+	addi	a10, a2, -1
+	movnez	a2, a10, a9
+
+	/* Check if the subtract underflowed into the exponent.  */
+	extui	a10, a2, 23, 8
+	bne	a10, a8, .Lsub_borrow
+
+.Lsub_round:
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a9, 1f
+	addi	a2, a2, 1
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a9, a9, 1
+	beqz	a9, .Lsub_exactlyhalf
+1:	leaf_return
+
+.Lsub_xexpzero:
+	/* Same as "yexpzero".  */
+	beq	a2, a3, .Lsub_return_zero
+	slli	a2, a2, 9
+	srli	a2, a2, 9
+	bnone	a3, a6, .Lsub_xexpdiff
+	addi	a7, a7, 1
+	j	.Lsub_xexpdiff
+
+.Lsub_return_zero:
+	movi	a2, 0
+	leaf_return
+
+.Lsub_borrow:	
+	/* The subtraction has underflowed into the exponent field, so the
+	   value needs to be renormalized.  Shift the mantissa left as
+	   needed to remove any leading zeros and adjust the exponent
+	   accordingly.  If the exponent is not large enough to remove
+	   all the leading zeros, the result will be a subnormal value.  */
+
+	slli	a8, a2, 9
+	beqz	a8, .Lsub_xzero
+	do_nsau	a6, a8, a7, a11
+	srli	a8, a8, 9
+	bge	a6, a10, .Lsub_subnormal
+	addi	a6, a6, 1
+
+.Lsub_normalize_shift:
+	/* Shift the mantissa (a8/a9) left by a6.  */
+	ssl	a6
+	src	a8, a8, a9
+	sll	a9, a9
+
+	/* Combine the shifted mantissa with the sign and exponent,
+	   decrementing the exponent by a6.  (The exponent has already
+	   been decremented by one due to the borrow from the subtraction,
+	   but adding the mantissa will increment the exponent by one.)  */
+	srli	a2, a2, 23
+	sub	a2, a2, a6
+	slli	a2, a2, 23
+	add	a2, a2, a8
+	j	.Lsub_round
+
+.Lsub_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+.Lsub_xzero:
+	/* If there was a borrow from the exponent, and the mantissa and
+	   guard digits are all zero, then the inputs were equal and the
+	   result should be zero.  */
+	beqz	a9, .Lsub_return_zero
+
+	/* Only the guard digit is nonzero.  Shift by min(24, a10).  */
+	addi	a11, a10, -24
+	movi	a6, 24
+	movltz	a6, a10, a11
+	j	.Lsub_normalize_shift
+
+.Lsub_subnormal:
+	/* The exponent is too small to shift away all the leading zeros.
+	   Set a6 to the current exponent (which has already been
+	   decremented by the borrow) so that the exponent of the result
+	   will be zero.  Do not add 1 to a6 in this case, because: (1)
+	   adding the mantissa will not increment the exponent, so there is
+	   no need to subtract anything extra from the exponent to
+	   compensate, and (2) the effective exponent of a subnormal is 1
+	   not 0 so the shift amount must be 1 smaller than normal. */
+	mov	a6, a10
+	j	.Lsub_normalize_shift
+
+#endif /* L_addsubsf3 */
+
+#ifdef L_mulsf3
+
+	/* Multiplication */
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+__mulsf3_aux:
+
+	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+	   (This code is placed before the start of the function just to
+	   keep it in range of the limited branch displacements.)  */
+
+.Lmul_xexpzero:
+	/* Clear the sign bit of x.  */
+	slli	a2, a2, 1
+	srli	a2, a2, 1
+
+	/* If x is zero, return zero.  */
+	beqz	a2, .Lmul_return_zero
+
+	/* Normalize x.  Adjust the exponent in a8.  */
+	do_nsau	a10, a2, a11, a12
+	addi	a10, a10, -8
+	ssl	a10
+	sll	a2, a2 
+	movi	a8, 1
+	sub	a8, a8, a10
+	j	.Lmul_xnormalized	
+	
+.Lmul_yexpzero:
+	/* Clear the sign bit of y.  */
+	slli	a3, a3, 1
+	srli	a3, a3, 1
+
+	/* If y is zero, return zero.  */
+	beqz	a3, .Lmul_return_zero
+
+	/* Normalize y.  Adjust the exponent in a9.  */
+	do_nsau	a10, a3, a11, a12
+	addi	a10, a10, -8
+	ssl	a10
+	sll	a3, a3
+	movi	a9, 1
+	sub	a9, a9, a10
+	j	.Lmul_ynormalized	
+
+.Lmul_return_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	j	.Lmul_done
+
+.Lmul_xnan_or_inf:
+	/* If y is zero, return NaN.  */
+	slli	a8, a3, 1
+	bnez	a8, 1f
+	movi	a4, 0x400000	/* make it a quiet NaN */
+	or	a2, a2, a4
+	j	.Lmul_done
+1:
+	/* If y is NaN, return y.  */
+	bnall	a3, a6, .Lmul_returnx
+	slli	a8, a3, 9
+	beqz	a8, .Lmul_returnx
+
+.Lmul_returny:
+	mov	a2, a3
+
+.Lmul_returnx:
+	/* Set the sign bit and return.  */
+	extui	a7, a7, 31, 1
+	slli	a2, a2, 1
+	ssai	1
+	src	a2, a7, a2
+	j	.Lmul_done
+
+.Lmul_ynan_or_inf:
+	/* If x is zero, return NaN.  */
+	slli	a8, a2, 1
+	bnez	a8, .Lmul_returny
+	movi	a7, 0x400000	/* make it a quiet NaN */
+	or	a2, a3, a7
+	j	.Lmul_done
+
+	.align	4
+	.global	__mulsf3
+	.type	__mulsf3, @function
+__mulsf3:
+#if __XTENSA_CALL0_ABI__
+	leaf_entry sp, 32
+	addi	sp, sp, -32
+	s32i	a12, sp, 16
+	s32i	a13, sp, 20
+	s32i	a14, sp, 24
+	s32i	a15, sp, 28
+#elif XCHAL_NO_MUL
+	/* This is not really a leaf function; allocate enough stack space
+	   to allow CALL12s to a helper function.  */
+	leaf_entry sp, 64
+#else
+	leaf_entry sp, 32
+#endif
+	movi	a6, 0x7f800000
+
+	/* Get the sign of the result.  */
+	xor	a7, a2, a3
+
+	/* Check for NaN and infinity.  */
+	ball	a2, a6, .Lmul_xnan_or_inf
+	ball	a3, a6, .Lmul_ynan_or_inf
+
+	/* Extract the exponents.  */
+	extui	a8, a2, 23, 8
+	extui	a9, a3, 23, 8
+
+	beqz	a8, .Lmul_xexpzero
+.Lmul_xnormalized:	
+	beqz	a9, .Lmul_yexpzero
+.Lmul_ynormalized:	
+
+	/* Add the exponents.  */
+	add	a8, a8, a9
+
+	/* Replace sign/exponent fields with explicit "1.0".  */
+	movi	a10, 0xffffff
+	or	a2, a2, a6
+	and	a2, a2, a10
+	or	a3, a3, a6
+	and	a3, a3, a10
+
+	/* Multiply 32x32 to 64 bits.  The result ends up in a2/a6.  */
+
+#if XCHAL_HAVE_MUL32_HIGH
+
+	mull	a6, a2, a3
+	muluh	a2, a2, a3
+
+#else
+
+	/* Break the inputs into 16-bit chunks and compute 4 32-bit partial
+	   products.  These partial products are:
+
+		0 xl * yl
+
+		1 xl * yh
+		2 xh * yl
+
+		3 xh * yh
+
+	   If using the Mul16 or Mul32 multiplier options, these input
+	   chunks must be stored in separate registers.  For Mac16, the
+	   UMUL.AA.* opcodes can specify that the inputs come from either
+	   half of the registers, so there is no need to shift them out
+	   ahead of time.  If there is no multiply hardware, the 16-bit
+	   chunks can be extracted when setting up the arguments to the
+	   separate multiply function.  */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* Calling a separate multiply function will clobber a0 and requires
+	   use of a8 as a temporary, so save those values now.  (The function
+	   uses a custom ABI so nothing else needs to be saved.)  */
+	s32i	a0, sp, 0
+	s32i	a8, sp, 4
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define a2h a4
+#define a3h a5
+
+	/* Get the high halves of the inputs into registers.  */
+	srli	a2h, a2, 16
+	srli	a3h, a3, 16
+
+#define a2l a2
+#define a3l a3
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+	/* Clear the high halves of the inputs.  This does not matter
+	   for MUL16 because the high bits are ignored.  */
+	extui	a2, a2, 0, 16
+	extui	a3, a3, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mul16u	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mull	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+   a period in the definition of do_mul below.  These macros are a workaround
+   using underscores instead of periods when doing the concatenation.  */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
+	rsr	dst, ACCLO
+
+#else /* no multiply hardware */
+	
+#define set_arg_l(dst, src) \
+	extui	dst, src, 0, 16
+#define set_arg_h(dst, src) \
+	srli	dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a13, xreg); \
+	set_arg_ ## yhalf (a14, yreg); \
+	call0	.Lmul_mulsi3; \
+	mov	dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a14, xreg); \
+	set_arg_ ## yhalf (a15, yreg); \
+	call12	.Lmul_mulsi3; \
+	mov	dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+	/* Add pp1 and pp2 into a6 with carry-out in a9.  */
+	do_mul(a6, a2, l, a3, h)	/* pp 1 */
+	do_mul(a11, a2, h, a3, l)	/* pp 2 */
+	movi	a9, 0
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Shift the high half of a9/a6 into position in a9.  Note that
+	   this value can be safely incremented without any carry-outs.  */
+	ssai	16
+	src	a9, a9, a6
+
+	/* Compute the low word into a6.  */
+	do_mul(a11, a2, l, a3, l)	/* pp 0 */
+	sll	a6, a6
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Compute the high word into a2.  */
+	do_mul(a2, a2, h, a3, h)	/* pp 3 */
+	add	a2, a2, a9
+	
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* Restore values saved on the stack during the multiplication.  */
+	l32i	a0, sp, 0
+	l32i	a8, sp, 4
+#endif
+#endif /* ! XCHAL_HAVE_MUL32_HIGH */
+
+	/* Shift left by 9 bits, unless there was a carry-out from the
+	   multiply, in which case, shift by 8 bits and increment the
+	   exponent.  */
+	movi	a4, 9
+	srli	a5, a2, 24 - 9
+	beqz	a5, 1f
+	addi	a4, a4, -1
+	addi	a8, a8, 1
+1:	ssl	a4
+	src	a2, a2, a6
+	sll	a6, a6
+
+	/* Subtract the extra bias from the exponent sum (plus one to account
+	   for the explicit "1.0" of the mantissa that will be added to the
+	   exponent in the final result).  */
+	movi	a4, 0x80
+	sub	a8, a8, a4
+	
+	/* Check for over/underflow.  The value in a8 is one less than the
+	   final exponent, so values in the range 0..fd are OK here.  */
+	movi	a4, 0xfe
+	bgeu	a8, a4, .Lmul_overflow
+	
+.Lmul_round:
+	/* Round.  */
+	bgez	a6, .Lmul_rounded
+	addi	a2, a2, 1
+	slli	a6, a6, 1
+	beqz	a6, .Lmul_exactlyhalf
+
+.Lmul_rounded:
+	/* Add the exponent to the mantissa.  */
+	slli	a8, a8, 23
+	add	a2, a2, a8
+
+.Lmul_addsign:
+	/* Add the sign bit.  */
+	srli	a7, a7, 31
+	slli	a7, a7, 31
+	or	a2, a2, a7
+
+.Lmul_done:
+#if __XTENSA_CALL0_ABI__
+	l32i	a12, sp, 16
+	l32i	a13, sp, 20
+	l32i	a14, sp, 24
+	l32i	a15, sp, 28
+	addi	sp, sp, 32
+#endif
+	leaf_return
+
+.Lmul_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	j	.Lmul_rounded
+
+.Lmul_overflow:
+	bltz	a8, .Lmul_underflow
+	/* Return +/- Infinity.  */
+	movi	a8, 0xff
+	slli	a2, a8, 23
+	j	.Lmul_addsign
+
+.Lmul_underflow:
+	/* Create a subnormal value, where the exponent field contains zero,
+	   but the effective exponent is 1.  The value of a8 is one less than
+	   the actual exponent, so just negate it to get the shift amount.  */
+	neg	a8, a8
+	mov	a9, a6
+	ssr	a8
+	bgeui	a8, 32, .Lmul_flush_to_zero
+	
+	/* Shift a2 right.  Any bits that are shifted out of a2 are saved
+	   in a6 (combined with the shifted-out bits currently in a6) for
+	   rounding the result.  */
+	sll	a6, a2
+	srl	a2, a2
+
+	/* Set the exponent to zero.  */
+	movi	a8, 0
+
+	/* Pack any nonzero bits shifted out into a6.  */
+	beqz	a9, .Lmul_round
+	movi	a9, 1
+	or	a6, a6, a9
+	j	.Lmul_round
+	
+.Lmul_flush_to_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	j	.Lmul_done
+
+#if XCHAL_NO_MUL
+	
+	/* For Xtensa processors with no multiply hardware, this simplified
+	   version of _mulsi3 is used for multiplying 16-bit chunks of
+	   the floating-point mantissas.  When using CALL0, this function
+	   uses a custom ABI: the inputs are passed in a13 and a14, the
+	   result is returned in a12, and a8 and a15 are clobbered.  */
+	.align	4
+.Lmul_mulsi3:
+	leaf_entry sp, 16
+	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+	movi	\dst, 0
+1:	add	\tmp1, \src2, \dst
+	extui	\tmp2, \src1, 0, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx2 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 1, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx4 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 2, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx8 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 3, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	srli	\src1, \src1, 4
+	slli	\src2, \src2, 4
+	bnez	\src1, 1b
+	.endm
+#if __XTENSA_CALL0_ABI__
+	mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+	/* The result will be written into a2, so save that argument in a4.  */
+	mov	a4, a2
+	mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+	leaf_return
+#endif /* XCHAL_NO_MUL */
+#endif /* L_mulsf3 */
+
+#ifdef L_divsf3
+
+	/* Division */
+__divsf3_aux:
+
+	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+	   (This code is placed before the start of the function just to
+	   keep it in range of the limited branch displacements.)  */
+
+.Ldiv_yexpzero:
+	/* Clear the sign bit of y.  */
+	slli	a3, a3, 1
+	srli	a3, a3, 1
+
+	/* Check for division by zero.  */
+	beqz	a3, .Ldiv_yzero
+
+	/* Normalize y.  Adjust the exponent in a9.  */
+	do_nsau	a10, a3, a4, a5
+	addi	a10, a10, -8
+	ssl	a10
+	sll	a3, a3
+	movi	a9, 1
+	sub	a9, a9, a10
+	j	.Ldiv_ynormalized	
+
+.Ldiv_yzero:
+	/* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
+	slli	a4, a2, 1
+	srli	a4, a4, 1
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	or	a2, a2, a6
+	bnez	a4, 1f
+	movi	a4, 0x400000	/* make it a quiet NaN */
+	or	a2, a2, a4
+1:	leaf_return
+
+.Ldiv_xexpzero:
+	/* Clear the sign bit of x.  */
+	slli	a2, a2, 1
+	srli	a2, a2, 1
+
+	/* If x is zero, return zero.  */
+	beqz	a2, .Ldiv_return_zero
+
+	/* Normalize x.  Adjust the exponent in a8.  */
+	do_nsau	a10, a2, a4, a5
+	addi	a10, a10, -8
+	ssl	a10
+	sll	a2, a2
+	movi	a8, 1
+	sub	a8, a8, a10
+	j	.Ldiv_xnormalized	
+	
+.Ldiv_return_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	leaf_return
+
+.Ldiv_xnan_or_inf:
+	/* Set the sign bit of the result.  */
+	srli	a7, a3, 31
+	slli	a7, a7, 31
+	xor	a2, a2, a7
+	/* If y is NaN or Inf, return NaN.  */
+	bnall	a3, a6, 1f
+	movi	a4, 0x400000	/* make it a quiet NaN */
+	or	a2, a2, a4
+1:	leaf_return
+
+.Ldiv_ynan_or_inf:
+	/* If y is Infinity, return zero.  */
+	slli	a8, a3, 9
+	beqz	a8, .Ldiv_return_zero
+	/* y is NaN; return it.  */
+	mov	a2, a3
+	leaf_return
+
+	.align	4
+	.global	__divsf3
+	.type	__divsf3, @function
+__divsf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+
+	/* Get the sign of the result.  */
+	xor	a7, a2, a3
+
+	/* Check for NaN and infinity.  */
+	ball	a2, a6, .Ldiv_xnan_or_inf
+	ball	a3, a6, .Ldiv_ynan_or_inf
+
+	/* Extract the exponents.  */
+	extui	a8, a2, 23, 8
+	extui	a9, a3, 23, 8
+
+	beqz	a9, .Ldiv_yexpzero
+.Ldiv_ynormalized:	
+	beqz	a8, .Ldiv_xexpzero
+.Ldiv_xnormalized:	
+
+	/* Subtract the exponents.  */
+	sub	a8, a8, a9
+
+	/* Replace sign/exponent fields with explicit "1.0".  */
+	movi	a10, 0xffffff
+	or	a2, a2, a6
+	and	a2, a2, a10
+	or	a3, a3, a6
+	and	a3, a3, a10
+
+	/* The first digit of the mantissa division must be a one.
+	   Shift x (and adjust the exponent) as needed to make this true.  */
+	bltu	a3, a2, 1f
+	slli	a2, a2, 1
+	addi	a8, a8, -1
+1:
+	/* Do the first subtraction and shift.  */
+	sub	a2, a2, a3
+	slli	a2, a2, 1
+
+	/* Put the quotient into a10.  */
+	movi	a10, 1
+
+	/* Divide one bit at a time for 23 bits.  */
+	movi	a9, 23
+#if XCHAL_HAVE_LOOPS
+	loop	a9, .Ldiv_loopend
+#endif
+.Ldiv_loop:
+	/* Shift the quotient << 1.  */
+	slli	a10, a10, 1
+
+	/* Is this digit a 0 or 1?  */
+	bltu	a2, a3, 1f
+
+	/* Output a 1 and subtract.  */
+	addi	a10, a10, 1
+	sub	a2, a2, a3
+
+	/* Shift the dividend << 1.  */
+1:	slli	a2, a2, 1
+
+#if !XCHAL_HAVE_LOOPS
+	addi	a9, a9, -1
+	bnez	a9, .Ldiv_loop
+#endif
+.Ldiv_loopend:
+
+	/* Add the exponent bias (less one to account for the explicit "1.0"
+	   of the mantissa that will be added to the exponent in the final
+	   result).  */
+	addi	a8, a8, 0x7e
+	
+	/* Check for over/underflow.  The value in a8 is one less than the
+	   final exponent, so values in the range 0..fd are OK here.  */
+	movi	a4, 0xfe
+	bgeu	a8, a4, .Ldiv_overflow
+	
+.Ldiv_round:
+	/* Round.  The remainder (<< 1) is in a2.  */
+	bltu	a2, a3, .Ldiv_rounded
+	addi	a10, a10, 1
+	beq	a2, a3, .Ldiv_exactlyhalf
+
+.Ldiv_rounded:
+	/* Add the exponent to the mantissa.  */
+	slli	a8, a8, 23
+	add	a2, a10, a8
+
+.Ldiv_addsign:
+	/* Add the sign bit.  */
+	srli	a7, a7, 31
+	slli	a7, a7, 31
+	or	a2, a2, a7
+	leaf_return
+
+.Ldiv_overflow:
+	bltz	a8, .Ldiv_underflow
+	/* Return +/- Infinity.  */
+	addi	a8, a4, 1	/* 0xff */
+	slli	a2, a8, 23
+	j	.Ldiv_addsign
+
+.Ldiv_exactlyhalf:
+	/* Remainder is exactly half the divisor.  Round even.  */
+	srli	a10, a10, 1
+	slli	a10, a10, 1
+	j	.Ldiv_rounded
+
+.Ldiv_underflow:
+	/* Create a subnormal value, where the exponent field contains zero,
+	   but the effective exponent is 1.  The value of a8 is one less than
+	   the actual exponent, so just negate it to get the shift amount.  */
+	neg	a8, a8
+	ssr	a8
+	bgeui	a8, 32, .Ldiv_flush_to_zero
+	
+	/* Shift a10 right.  Any bits that are shifted out of a10 are
+	   saved in a6 for rounding the result.  */
+	sll	a6, a10
+	srl	a10, a10
+
+	/* Set the exponent to zero.  */
+	movi	a8, 0
+
+	/* Pack any nonzero remainder (in a2) into a6.  */
+	beqz	a2, 1f
+	movi	a9, 1
+	or	a6, a6, a9
+	
+	/* Round a10 based on the bits shifted out into a6.  */
+1:	bgez	a6, .Ldiv_rounded
+	addi	a10, a10, 1
+	slli	a6, a6, 1
+	bnez	a6, .Ldiv_rounded
+	srli	a10, a10, 1
+	slli	a10, a10, 1
+	j	.Ldiv_rounded
+
+.Ldiv_flush_to_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	leaf_return
+
+#endif /* L_divsf3 */
+
+#ifdef L_cmpsf2
+
+	/* Equal and Not Equal */
+
+	.align	4
+	.global	__eqsf2
+	.global	__nesf2
+	.set	__nesf2, __eqsf2
+	.type	__eqsf2, @function
+__eqsf2:
+	leaf_entry sp, 16
+	bne	a2, a3, 4f
+
+	/* The values are equal but NaN != NaN.  Check the exponent.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, 3f
+
+	/* Equal.  */
+	movi	a2, 0
+	leaf_return
+
+	/* Not equal.  */
+2:	movi	a2, 1
+	leaf_return
+
+	/* Check if the mantissas are nonzero.  */
+3:	slli	a7, a2, 9
+	j	5f
+
+	/* Check if x and y are zero with different signs.  */
+4:	or	a7, a2, a3
+	slli	a7, a7, 1
+
+	/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
+	   or x when exponent(x) = 0x7f8 and x == y.  */
+5:	movi	a2, 0
+	movi	a3, 1
+	movnez	a2, a3, a7	
+	leaf_return
+
+
+	/* Greater Than */
+
+	.align	4
+	.global	__gtsf2
+	.type	__gtsf2, @function
+__gtsf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 2f
+1:	bnall	a3, a6, .Lle_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, a3, 9
+	beqz	a7, .Lle_cmp
+	movi	a2, 0
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, 0
+	leaf_return
+
+
+	/* Less Than or Equal */
+
+	.align	4
+	.global	__lesf2
+	.type	__lesf2, @function
+__lesf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 2f
+1:	bnall	a3, a6, .Lle_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, a3, 9
+	beqz	a7, .Lle_cmp
+	movi	a2, 1
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, 1
+	leaf_return
+
+.Lle_cmp:
+	/* Check if x and y have different signs.  */
+	xor	a7, a2, a3
+	bltz	a7, .Lle_diff_signs
+
+	/* Check if x is negative.  */
+	bltz	a2, .Lle_xneg
+
+	/* Check if x <= y.  */
+	bltu	a3, a2, 5f
+4:	movi	a2, 0
+	leaf_return
+
+.Lle_xneg:
+	/* Check if y <= x.  */
+	bgeu	a2, a3, 4b
+5:	movi	a2, 1
+	leaf_return
+
+.Lle_diff_signs:
+	bltz	a2, 4b
+
+	/* Check if both x and y are zero.  */
+	or	a7, a2, a3
+	slli	a7, a7, 1
+	movi	a2, 1
+	movi	a3, 0
+	moveqz	a2, a3, a7
+	leaf_return
+
+
+	/* Greater Than or Equal */
+
+	.align	4
+	.global	__gesf2
+	.type	__gesf2, @function
+__gesf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 2f
+1:	bnall	a3, a6, .Llt_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, a3, 9
+	beqz	a7, .Llt_cmp
+	movi	a2, -1
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, -1
+	leaf_return
+
+
+	/* Less Than */
+
+	.align	4
+	.global	__ltsf2
+	.type	__ltsf2, @function
+__ltsf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 2f
+1:	bnall	a3, a6, .Llt_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, a3, 9
+	beqz	a7, .Llt_cmp
+	movi	a2, 0
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, 0
+	leaf_return
+
+.Llt_cmp:
+	/* Check if x and y have different signs.  */
+	xor	a7, a2, a3
+	bltz	a7, .Llt_diff_signs
+
+	/* Check if x is negative.  */
+	bltz	a2, .Llt_xneg
+
+	/* Check if x < y.  */
+	bgeu	a2, a3, 5f
+4:	movi	a2, -1
+	leaf_return
+
+.Llt_xneg:
+	/* Check if y < x.  */
+	bltu	a3, a2, 4b
+5:	movi	a2, 0
+	leaf_return
+
+.Llt_diff_signs:
+	bgez	a2, 5b
+
+	/* Check if both x and y are nonzero.  */
+	or	a7, a2, a3
+	slli	a7, a7, 1
+	movi	a2, 0
+	movi	a3, -1
+	movnez	a2, a3, a7
+	leaf_return
+
+
+	/* Unordered */
+
+	.align	4
+	.global	__unordsf2
+	.type	__unordsf2, @function
+__unordsf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 3f
+1:	ball	a3, a6, 4f
+2:	movi	a2, 0
+	leaf_return
+
+3:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, 1
+	leaf_return
+
+4:	slli	a7, a3, 9
+	beqz	a7, 2b
+	movi	a2, 1
+	leaf_return
+
+#endif /* L_cmpsf2 */
+
+#ifdef L_fixsfsi
+
+	.align	4
+	.global	__fixsfsi
+	.type	__fixsfsi, @function
+__fixsfsi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, .Lfixsfsi_nan_or_inf
+
+	/* Extract the exponent and check if 0 < (exp - 0x7e) < 32.  */
+	extui	a4, a2, 23, 8
+	addi	a4, a4, -0x7e
+	bgei	a4, 32, .Lfixsfsi_maxint
+	blti	a4, 1, .Lfixsfsi_zero
+
+	/* Add explicit "1.0" and shift << 8.  */
+	or	a7, a2, a6
+	slli	a5, a7, 8
+
+	/* Shift back to the right, based on the exponent.  */
+	ssl	a4		/* shift by 32 - a4 */
+	srl	a5, a5
+
+	/* Negate the result if sign != 0.  */
+	neg	a2, a5
+	movgez	a2, a5, a7
+	leaf_return
+
+.Lfixsfsi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, a2, 9
+	beqz	a4, .Lfixsfsi_maxint
+
+	/* Translate NaN to +maxint.  */
+	movi	a2, 0
+
+.Lfixsfsi_maxint:
+	slli	a4, a6, 8	/* 0x80000000 */
+	addi	a5, a4, -1	/* 0x7fffffff */
+	movgez	a4, a5, a2
+	mov	a2, a4
+	leaf_return
+
+.Lfixsfsi_zero:
+	movi	a2, 0
+	leaf_return
+
+#endif /* L_fixsfsi */
+
+#ifdef L_fixsfdi
+
+	.align	4
+	.global	__fixsfdi
+	.type	__fixsfdi, @function
+__fixsfdi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, .Lfixsfdi_nan_or_inf
+
+	/* Extract the exponent and check if 0 < (exp - 0x7e) < 64.  */
+	extui	a4, a2, 23, 8
+	addi	a4, a4, -0x7e
+	bgei	a4, 64, .Lfixsfdi_maxint
+	blti	a4, 1, .Lfixsfdi_zero
+
+	/* Add explicit "1.0" and shift << 8.  */
+	or	a7, a2, a6
+	slli	xh, a7, 8
+
+	/* Shift back to the right, based on the exponent.  */
+	ssl	a4		/* shift by 64 - a4 */
+	bgei	a4, 32, .Lfixsfdi_smallshift
+	srl	xl, xh
+	movi	xh, 0
+
+.Lfixsfdi_shifted:	
+	/* Negate the result if sign != 0.  */
+	bgez	a7, 1f
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, 1f
+	addi	xh, xh, -1
+1:	leaf_return
+
+.Lfixsfdi_smallshift:
+	movi	xl, 0
+	sll	xl, xh
+	srl	xh, xh
+	j	.Lfixsfdi_shifted
+
+.Lfixsfdi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, a2, 9
+	beqz	a4, .Lfixsfdi_maxint
+
+	/* Translate NaN to +maxint.  */
+	movi	a2, 0
+
+.Lfixsfdi_maxint:
+	slli	a7, a6, 8	/* 0x80000000 */
+	bgez	a2, 1f
+	mov	xh, a7
+	movi	xl, 0
+	leaf_return
+
+1:	addi	xh, a7, -1	/* 0x7fffffff */
+	movi	xl, -1
+	leaf_return
+
+.Lfixsfdi_zero:
+	movi	xh, 0
+	movi	xl, 0
+	leaf_return
+
+#endif /* L_fixsfdi */
+
+#ifdef L_fixunssfsi
+
+	.align	4
+	.global	__fixunssfsi
+	.type	__fixunssfsi, @function
+__fixunssfsi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, .Lfixunssfsi_nan_or_inf
+
+	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 32.  */
+	extui	a4, a2, 23, 8
+	addi	a4, a4, -0x7f
+	bgei	a4, 32, .Lfixunssfsi_maxint
+	bltz	a4, .Lfixunssfsi_zero
+
+	/* Add explicit "1.0" and shift << 8.  */
+	or	a7, a2, a6
+	slli	a5, a7, 8
+
+	/* Shift back to the right, based on the exponent.  */
+	addi	a4, a4, 1
+	beqi	a4, 32, .Lfixunssfsi_bigexp
+	ssl	a4		/* shift by 32 - a4 */
+	srl	a5, a5
+
+	/* Negate the result if sign != 0.  */
+	neg	a2, a5
+	movgez	a2, a5, a7
+	leaf_return
+
+.Lfixunssfsi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, a2, 9
+	beqz	a4, .Lfixunssfsi_maxint
+
+	/* Translate NaN to 0xffffffff.  */
+	movi	a2, -1
+	leaf_return
+
+.Lfixunssfsi_maxint:
+	slli	a4, a6, 8	/* 0x80000000 */
+	movi	a5, -1		/* 0xffffffff */
+	movgez	a4, a5, a2
+	mov	a2, a4
+	leaf_return
+
+.Lfixunssfsi_zero:
+	movi	a2, 0
+	leaf_return
+
+.Lfixunssfsi_bigexp:
+	/* Handle unsigned maximum exponent case.  */
+	bltz	a2, 1f
+	mov	a2, a5		/* no shift needed */
+	leaf_return
+
+	/* Return 0x80000000 if negative.  */
+1:	slli	a2, a6, 8
+	leaf_return
+
+#endif /* L_fixunssfsi */
+
+#ifdef L_fixunssfdi
+
+	.align	4
+	.global	__fixunssfdi
+	.type	__fixunssfdi, @function
+__fixunssfdi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, .Lfixunssfdi_nan_or_inf
+
+	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 64.  */
+	extui	a4, a2, 23, 8
+	addi	a4, a4, -0x7f
+	bgei	a4, 64, .Lfixunssfdi_maxint
+	bltz	a4, .Lfixunssfdi_zero
+
+	/* Add explicit "1.0" and shift << 8.  */
+	or	a7, a2, a6
+	slli	xh, a7, 8
+
+	/* Shift back to the right, based on the exponent.  */
+	addi	a4, a4, 1
+	beqi	a4, 64, .Lfixunssfdi_bigexp
+	ssl	a4		/* shift by 64 - a4 */
+	bgei	a4, 32, .Lfixunssfdi_smallshift
+	srl	xl, xh
+	movi	xh, 0
+
+.Lfixunssfdi_shifted:
+	/* Negate the result if sign != 0.  */
+	bgez	a7, 1f
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, 1f
+	addi	xh, xh, -1
+1:	leaf_return
+
+.Lfixunssfdi_smallshift:
+	movi	xl, 0
+	src	xl, xh, xl
+	srl	xh, xh
+	j	.Lfixunssfdi_shifted
+
+.Lfixunssfdi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, a2, 9
+	beqz	a4, .Lfixunssfdi_maxint
+
+	/* Translate NaN to 0xffffffff.... */
+1:	movi	xh, -1
+	movi	xl, -1
+	leaf_return
+
+.Lfixunssfdi_maxint:
+	bgez	a2, 1b
+2:	slli	xh, a6, 8	/* 0x80000000 */
+	movi	xl, 0
+	leaf_return
+
+.Lfixunssfdi_zero:
+	movi	xh, 0
+	movi	xl, 0
+	leaf_return
+
+.Lfixunssfdi_bigexp:
+	/* Handle unsigned maximum exponent case.  */
+	bltz	a7, 2b
+	movi	xl, 0
+	leaf_return		/* no shift needed */
+
+#endif /* L_fixunssfdi */
+
+#ifdef L_floatsisf
+
+	.align	4
+	.global	__floatunsisf
+	.type	__floatunsisf, @function
+__floatunsisf:
+	leaf_entry sp, 16
+	beqz	a2, .Lfloatsisf_return
+
+	/* Set the sign to zero and jump to the floatsisf code.  */
+	movi	a7, 0
+	j	.Lfloatsisf_normalize
+
+	.align	4
+	.global	__floatsisf
+	.type	__floatsisf, @function
+__floatsisf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	beqz	a2, .Lfloatsisf_return
+
+	/* Save the sign.  */
+	extui	a7, a2, 31, 1
+
+	/* Get the absolute value.  */
+#if XCHAL_HAVE_ABS
+	abs	a2, a2
+#else
+	neg	a4, a2
+	movltz	a2, a4, a2
+#endif
+
+.Lfloatsisf_normalize:
+	/* Normalize with the first 1 bit in the msb.  */
+	do_nsau	a4, a2, a5, a6
+	ssl	a4
+	sll	a5, a2
+
+	/* Shift the mantissa into position, with rounding bits in a6.  */
+	srli	a2, a5, 8
+	slli	a6, a5, (32 - 8)
+
+	/* Set the exponent.  */
+	movi	a5, 0x9d	/* 0x7e + 31 */
+	sub	a5, a5, a4
+	slli	a5, a5, 23
+	add	a2, a2, a5
+
+	/* Add the sign.  */
+	slli	a7, a7, 31
+	or	a2, a2, a7
+
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a6, .Lfloatsisf_return
+	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a6, a6, 1
+	beqz	a6, .Lfloatsisf_exactlyhalf
+
+.Lfloatsisf_return:
+	leaf_return
+
+.Lfloatsisf_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+#endif /* L_floatsisf */
+
+#ifdef L_floatdisf
+
+	.align	4
+	.global	__floatundisf
+	.type	__floatundisf, @function
+__floatundisf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	or	a4, xh, xl
+	beqz	a4, 2f
+
+	/* Set the sign to zero and jump to the floatdisf code.  */
+	movi	a7, 0
+	j	.Lfloatdisf_normalize
+
+	.align	4
+	.global	__floatdisf
+	.type	__floatdisf, @function
+__floatdisf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	or	a4, xh, xl
+	beqz	a4, 2f
+
+	/* Save the sign.  */
+	extui	a7, xh, 31, 1
+
+	/* Get the absolute value.  */
+	bgez	xh, .Lfloatdisf_normalize
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, .Lfloatdisf_normalize
+	addi	xh, xh, -1
+
+.Lfloatdisf_normalize:
+	/* Normalize with the first 1 bit in the msb of xh.  */
+	beqz	xh, .Lfloatdisf_bigshift
+	do_nsau	a4, xh, a5, a6
+	ssl	a4
+	src	xh, xh, xl
+	sll	xl, xl
+
+.Lfloatdisf_shifted:
+	/* Shift the mantissa into position, with rounding bits in a6.  */
+	ssai	8
+	sll	a5, xl
+	src	a6, xh, xl
+	srl	xh, xh
+	beqz	a5, 1f
+	movi	a5, 1
+	or	a6, a6, a5
+1:
+	/* Set the exponent.  */
+	movi	a5, 0xbd	/* 0x7e + 63 */
+	sub	a5, a5, a4
+	slli	a5, a5, 23
+	add	a2, xh, a5
+
+	/* Add the sign.  */
+	slli	a7, a7, 31
+	or	a2, a2, a7
+
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a6, 2f
+	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a6, a6, 1
+	beqz	a6, .Lfloatdisf_exactlyhalf
+2:	leaf_return
+
+.Lfloatdisf_bigshift:
+	/* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
+	do_nsau	a4, xl, a5, a6
+	ssl	a4
+	sll	xh, xl
+	movi	xl, 0
+	addi	a4, a4, 32
+	j	.Lfloatdisf_shifted
+
+.Lfloatdisf_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+#endif /* L_floatdisf */
diff --git a/libgcc/config/xtensa/lib1funcs.S b/libgcc/config/xtensa/lib1funcs.S
new file mode 100644
index 00000000000..071b9171177
--- /dev/null
+++ b/libgcc/config/xtensa/lib1funcs.S
@@ -0,0 +1,845 @@
+/* Assembly functions for the Xtensa version of libgcc1.
+   Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009
+   Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "xtensa-config.h"
+
+/* Define macros for the ABS and ADDX* instructions to handle cases
+   where they are not included in the Xtensa processor configuration.  */
+
+	.macro	do_abs dst, src, tmp
+#if XCHAL_HAVE_ABS
+	abs	\dst, \src
+#else
+	neg	\tmp, \src
+	movgez	\tmp, \src, \src
+	mov	\dst, \tmp
+#endif
+	.endm
+
+	.macro	do_addx2 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+	addx2	\dst, \as, \at
+#else
+	slli	\tmp, \as, 1
+	add	\dst, \tmp, \at
+#endif
+	.endm
+
+	.macro	do_addx4 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+	addx4	\dst, \as, \at
+#else
+	slli	\tmp, \as, 2
+	add	\dst, \tmp, \at
+#endif
+	.endm
+
+	.macro	do_addx8 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+	addx8	\dst, \as, \at
+#else
+	slli	\tmp, \as, 3
+	add	\dst, \tmp, \at
+#endif
+	.endm
+
+/* Define macros for leaf function entry and return, supporting either the
+   standard register windowed ABI or the non-windowed call0 ABI.  These
+   macros do not allocate any extra stack space, so they only work for
+   leaf functions that do not need to spill anything to the stack.  */
+
+	.macro leaf_entry reg, size
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	entry \reg, \size
+#else
+	/* do nothing */
+#endif
+	.endm
+
+	.macro leaf_return
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	retw
+#else
+	ret
+#endif
+	.endm
+
+
+#ifdef L_mulsi3
+	.align	4
+	.global	__mulsi3
+	.type	__mulsi3, @function
+__mulsi3:
+	leaf_entry sp, 16
+
+#if XCHAL_HAVE_MUL32
+	mull	a2, a2, a3
+
+#elif XCHAL_HAVE_MUL16
+	or	a4, a2, a3
+	srai	a4, a4, 16
+	bnez	a4, .LMUL16
+	mul16u	a2, a2, a3
+	leaf_return
+.LMUL16:
+	srai	a4, a2, 16
+	srai	a5, a3, 16
+	mul16u	a7, a4, a3
+	mul16u	a6, a5, a2
+	mul16u	a4, a2, a3
+	add	a7, a7, a6
+	slli	a7, a7, 16
+	add	a2, a7, a4
+
+#elif XCHAL_HAVE_MAC16
+	mul.aa.hl a2, a3
+	mula.aa.lh a2, a3
+	rsr	a5, ACCLO
+	umul.aa.ll a2, a3
+	rsr	a4, ACCLO
+	slli	a5, a5, 16
+	add	a2, a4, a5
+
+#else /* !MUL32 && !MUL16 && !MAC16 */
+
+	/* Multiply one bit at a time, but unroll the loop 4x to better
+	   exploit the addx instructions and avoid overhead.
+	   Peel the first iteration to save a cycle on init.  */
+
+	/* Avoid negative numbers.  */
+	xor	a5, a2, a3	/* Top bit is 1 if one input is negative.  */
+	do_abs	a3, a3, a6
+	do_abs	a2, a2, a6
+
+	/* Swap so the second argument is smaller.  */
+	sub	a7, a2, a3
+	mov	a4, a3
+	movgez	a4, a2, a7	/* a4 = max (a2, a3) */
+	movltz	a3, a2, a7	/* a3 = min (a2, a3) */
+
+	movi	a2, 0
+	extui	a6, a3, 0, 1
+	movnez	a2, a4, a6
+
+	do_addx2 a7, a4, a2, a7
+	extui	a6, a3, 1, 1
+	movnez	a2, a7, a6
+
+	do_addx4 a7, a4, a2, a7
+	extui	a6, a3, 2, 1
+	movnez	a2, a7, a6
+
+	do_addx8 a7, a4, a2, a7
+	extui	a6, a3, 3, 1
+	movnez	a2, a7, a6
+
+	bgeui	a3, 16, .Lmult_main_loop
+	neg	a3, a2
+	movltz	a2, a3, a5
+	leaf_return
+
+	.align	4
+.Lmult_main_loop:
+	srli	a3, a3, 4
+	slli	a4, a4, 4
+
+	add	a7, a4, a2
+	extui	a6, a3, 0, 1
+	movnez	a2, a7, a6
+
+	do_addx2 a7, a4, a2, a7
+	extui	a6, a3, 1, 1
+	movnez	a2, a7, a6
+
+	do_addx4 a7, a4, a2, a7
+	extui	a6, a3, 2, 1
+	movnez	a2, a7, a6
+
+	do_addx8 a7, a4, a2, a7
+	extui	a6, a3, 3, 1
+	movnez	a2, a7, a6
+
+	bgeui	a3, 16, .Lmult_main_loop
+
+	neg	a3, a2
+	movltz	a2, a3, a5
+
+#endif /* !MUL32 && !MUL16 && !MAC16 */
+
+	leaf_return
+	.size	__mulsi3, . - __mulsi3
+
+#endif /* L_mulsi3 */
+
+
+#ifdef L_umulsidi3
+
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+	.align	4
+	.global	__umulsidi3
+	.type	__umulsidi3, @function
+__umulsidi3:
+#if __XTENSA_CALL0_ABI__
+	leaf_entry sp, 32
+	addi	sp, sp, -32
+	s32i	a12, sp, 16
+	s32i	a13, sp, 20
+	s32i	a14, sp, 24
+	s32i	a15, sp, 28
+#elif XCHAL_NO_MUL
+	/* This is not really a leaf function; allocate enough stack space
+	   to allow CALL12s to a helper function.  */
+	leaf_entry sp, 48
+#else
+	leaf_entry sp, 16
+#endif
+
+#ifdef __XTENSA_EB__
+#define wh a2
+#define wl a3
+#else
+#define wh a3
+#define wl a2
+#endif /* __XTENSA_EB__ */
+
+	/* This code is taken from the mulsf3 routine in ieee754-sf.S.
+	   See more comments there.  */
+
+#if XCHAL_HAVE_MUL32_HIGH
+	mull	a6, a2, a3
+	muluh	wh, a2, a3
+	mov	wl, a6
+
+#else /* ! MUL32_HIGH */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* a0 and a8 will be clobbered by calling the multiply function
+	   but a8 is not used here and need not be saved.  */
+	s32i	a0, sp, 0
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define a2h a4
+#define a3h a5
+
+	/* Get the high halves of the inputs into registers.  */
+	srli	a2h, a2, 16
+	srli	a3h, a3, 16
+
+#define a2l a2
+#define a3l a3
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+	/* Clear the high halves of the inputs.  This does not matter
+	   for MUL16 because the high bits are ignored.  */
+	extui	a2, a2, 0, 16
+	extui	a3, a3, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mul16u	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mull	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+   a period in the definition of do_mul below.  These macros are a workaround
+   using underscores instead of periods when doing the concatenation.  */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
+	rsr	dst, ACCLO
+
+#else /* no multiply hardware */
+
+#define set_arg_l(dst, src) \
+	extui	dst, src, 0, 16
+#define set_arg_h(dst, src) \
+	srli	dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a13, xreg); \
+	set_arg_ ## yhalf (a14, yreg); \
+	call0	.Lmul_mulsi3; \
+	mov	dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a14, xreg); \
+	set_arg_ ## yhalf (a15, yreg); \
+	call12	.Lmul_mulsi3; \
+	mov	dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+	/* Add pp1 and pp2 into a6 with carry-out in a9.  */
+	do_mul(a6, a2, l, a3, h)	/* pp 1 */
+	do_mul(a11, a2, h, a3, l)	/* pp 2 */
+	movi	a9, 0
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Shift the high half of a9/a6 into position in a9.  Note that
+	   this value can be safely incremented without any carry-outs.  */
+	ssai	16
+	src	a9, a9, a6
+
+	/* Compute the low word into a6.  */
+	do_mul(a11, a2, l, a3, l)	/* pp 0 */
+	sll	a6, a6
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Compute the high word into wh.  */
+	do_mul(wh, a2, h, a3, h)	/* pp 3 */
+	add	wh, wh, a9
+	mov	wl, a6
+
+#endif /* !MUL32_HIGH */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* Restore the original return address.  */
+	l32i	a0, sp, 0
+#endif
+#if __XTENSA_CALL0_ABI__
+	l32i	a12, sp, 16
+	l32i	a13, sp, 20
+	l32i	a14, sp, 24
+	l32i	a15, sp, 28
+	addi	sp, sp, 32
+#endif
+	leaf_return
+
+#if XCHAL_NO_MUL
+
+	/* For Xtensa processors with no multiply hardware, this simplified
+	   version of _mulsi3 is used for multiplying 16-bit chunks of
+	   the floating-point mantissas.  When using CALL0, this function
+	   uses a custom ABI: the inputs are passed in a13 and a14, the
+	   result is returned in a12, and a8 and a15 are clobbered.  */
+	.align	4
+.Lmul_mulsi3:
+	leaf_entry sp, 16
+	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+	movi	\dst, 0
+1:	add	\tmp1, \src2, \dst
+	extui	\tmp2, \src1, 0, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx2 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 1, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx4 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 2, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx8 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 3, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	srli	\src1, \src1, 4
+	slli	\src2, \src2, 4
+	bnez	\src1, 1b
+	.endm
+#if __XTENSA_CALL0_ABI__
+	mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+	/* The result will be written into a2, so save that argument in a4.  */
+	mov	a4, a2
+	mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+	leaf_return
+#endif /* XCHAL_NO_MUL */
+
+	.size	__umulsidi3, . - __umulsidi3
+
+#endif /* L_umulsidi3 */
+
+
+/* Define a macro for the NSAU (unsigned normalize shift amount)
+   instruction, which computes the number of leading zero bits,
+   to handle cases where it is not included in the Xtensa processor
+   configuration.  */
+
+	.macro	do_nsau cnt, val, tmp, a
+#if XCHAL_HAVE_NSA
+	nsau	\cnt, \val
+#else
+	mov	\a, \val
+	movi	\cnt, 0
+	extui	\tmp, \a, 16, 16
+	bnez	\tmp, 0f
+	movi	\cnt, 16
+	slli	\a, \a, 16
+0:
+	extui	\tmp, \a, 24, 8
+	bnez	\tmp, 1f
+	addi	\cnt, \cnt, 8
+	slli	\a, \a, 8
+1:
+	movi	\tmp, __nsau_data
+	extui	\a, \a, 24, 8
+	add	\tmp, \tmp, \a
+	l8ui	\tmp, \tmp, 0
+	add	\cnt, \cnt, \tmp
+#endif /* !XCHAL_HAVE_NSA */
+	.endm
+
+#ifdef L_clz
+	.section .rodata
+	.align	4
+	.global	__nsau_data
+	.type	__nsau_data, @object
+__nsau_data:
+#if !XCHAL_HAVE_NSA
+	.byte	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
+	.byte	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
+	.byte	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+	.byte	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+#endif /* !XCHAL_HAVE_NSA */
+	.size	__nsau_data, . - __nsau_data
+	.hidden	__nsau_data
+#endif /* L_clz */
+
+
+#ifdef L_clzsi2
+	.align	4
+	.global	__clzsi2
+	.type	__clzsi2, @function
+__clzsi2:
+	leaf_entry sp, 16
+	do_nsau	a2, a2, a3, a4
+	leaf_return
+	.size	__clzsi2, . - __clzsi2
+
+#endif /* L_clzsi2 */
+
+
+#ifdef L_ctzsi2
+	.align	4
+	.global	__ctzsi2
+	.type	__ctzsi2, @function
+__ctzsi2:
+	leaf_entry sp, 16
+	neg	a3, a2
+	and	a3, a3, a2
+	do_nsau	a2, a3, a4, a5
+	neg	a2, a2
+	addi	a2, a2, 31
+	leaf_return
+	.size	__ctzsi2, . - __ctzsi2
+
+#endif /* L_ctzsi2 */
+
+
+#ifdef L_ffssi2
+	.align	4
+	.global	__ffssi2
+	.type	__ffssi2, @function
+__ffssi2:
+	leaf_entry sp, 16
+	neg	a3, a2
+	and	a3, a3, a2
+	do_nsau	a2, a3, a4, a5
+	neg	a2, a2
+	addi	a2, a2, 32
+	leaf_return
+	.size	__ffssi2, . - __ffssi2
+
+#endif /* L_ffssi2 */
+
+
+#ifdef L_udivsi3
+	.align	4
+	.global	__udivsi3
+	.type	__udivsi3, @function
+__udivsi3:
+	leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+	quou	a2, a2, a3
+#else
+	bltui	a3, 2, .Lle_one	/* check if the divisor <= 1 */
+
+	mov	a6, a2		/* keep dividend in a6 */
+	do_nsau	a5, a6, a2, a7	/* dividend_shift = nsau (dividend) */
+	do_nsau	a4, a3, a2, a7	/* divisor_shift = nsau (divisor) */
+	bgeu	a5, a4, .Lspecial
+
+	sub	a4, a4, a5	/* count = divisor_shift - dividend_shift */
+	ssl	a4
+	sll	a3, a3		/* divisor <<= count */
+	movi	a2, 0		/* quotient = 0 */
+
+	/* test-subtract-and-shift loop; one quotient bit on each iteration */
+#if XCHAL_HAVE_LOOPS
+	loopnez	a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+	bltu	a6, a3, .Lzerobit
+	sub	a6, a6, a3
+	addi	a2, a2, 1
+.Lzerobit:
+	slli	a2, a2, 1
+	srli	a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+	addi	a4, a4, -1
+	bnez	a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+	bltu	a6, a3, .Lreturn
+	addi	a2, a2, 1	/* increment quotient if dividend >= divisor */
+.Lreturn:
+	leaf_return
+
+.Lle_one:
+	beqz	a3, .Lerror	/* if divisor == 1, return the dividend */
+	leaf_return
+
+.Lspecial:
+	/* return dividend >= divisor */
+	bltu	a6, a3, .Lreturn0
+	movi	a2, 1
+	leaf_return
+
+.Lerror:
+	/* Divide by zero: Use an illegal instruction to force an exception.
+	   The subsequent "DIV0" string can be recognized by the exception
+	   handler to identify the real cause of the exception.  */
+	ill
+	.ascii	"DIV0"
+
+.Lreturn0:
+	movi	a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+	leaf_return
+	.size	__udivsi3, . - __udivsi3
+
+#endif /* L_udivsi3 */
+
+
+#ifdef L_divsi3
+	.align	4
+	.global	__divsi3
+	.type	__divsi3, @function
+__divsi3:
+	leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+	quos	a2, a2, a3
+#else
+	xor	a7, a2, a3	/* sign = dividend ^ divisor */
+	do_abs	a6, a2, a4	/* udividend = abs (dividend) */
+	do_abs	a3, a3, a4	/* udivisor = abs (divisor) */
+	bltui	a3, 2, .Lle_one	/* check if udivisor <= 1 */
+	do_nsau	a5, a6, a2, a8	/* udividend_shift = nsau (udividend) */
+	do_nsau	a4, a3, a2, a8	/* udivisor_shift = nsau (udivisor) */
+	bgeu	a5, a4, .Lspecial
+
+	sub	a4, a4, a5	/* count = udivisor_shift - udividend_shift */
+	ssl	a4
+	sll	a3, a3		/* udivisor <<= count */
+	movi	a2, 0		/* quotient = 0 */
+
+	/* test-subtract-and-shift loop; one quotient bit on each iteration */
+#if XCHAL_HAVE_LOOPS
+	loopnez	a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+	bltu	a6, a3, .Lzerobit
+	sub	a6, a6, a3
+	addi	a2, a2, 1
+.Lzerobit:
+	slli	a2, a2, 1
+	srli	a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+	addi	a4, a4, -1
+	bnez	a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+	bltu	a6, a3, .Lreturn
+	addi	a2, a2, 1	/* increment if udividend >= udivisor */
+.Lreturn:
+	neg	a5, a2
+	movltz	a2, a5, a7	/* return (sign < 0) ? -quotient : quotient */
+	leaf_return
+
+.Lle_one:
+	beqz	a3, .Lerror
+	neg	a2, a6		/* if udivisor == 1, then return... */
+	movgez	a2, a6, a7	/* (sign < 0) ? -udividend : udividend */
+	leaf_return
+
+.Lspecial:
+	bltu	a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
+	movi	a2, 1
+	movi	a4, -1
+	movltz	a2, a4, a7	/* else return (sign < 0) ? -1 : 1 */
+	leaf_return
+
+.Lerror:
+	/* Divide by zero: Use an illegal instruction to force an exception.
+	   The subsequent "DIV0" string can be recognized by the exception
+	   handler to identify the real cause of the exception.  */
+	ill
+	.ascii	"DIV0"
+
+.Lreturn0:
+	movi	a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+	leaf_return
+	.size	__divsi3, . - __divsi3
+
+#endif /* L_divsi3 */
+
+
+#ifdef L_umodsi3
+	.align	4
+	.global	__umodsi3
+	.type	__umodsi3, @function
+__umodsi3:
+	leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+	remu	a2, a2, a3
+#else
+	bltui	a3, 2, .Lle_one	/* check if the divisor is <= 1 */
+
+	do_nsau	a5, a2, a6, a7	/* dividend_shift = nsau (dividend) */
+	do_nsau	a4, a3, a6, a7	/* divisor_shift = nsau (divisor) */
+	bgeu	a5, a4, .Lspecial
+
+	sub	a4, a4, a5	/* count = divisor_shift - dividend_shift */
+	ssl	a4
+	sll	a3, a3		/* divisor <<= count */
+
+	/* test-subtract-and-shift loop */
+#if XCHAL_HAVE_LOOPS
+	loopnez	a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+	bltu	a2, a3, .Lzerobit
+	sub	a2, a2, a3
+.Lzerobit:
+	srli	a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+	addi	a4, a4, -1
+	bnez	a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+.Lspecial:
+	bltu	a2, a3, .Lreturn
+	sub	a2, a2, a3	/* subtract once more if dividend >= divisor */
+.Lreturn:
+	leaf_return
+
+.Lle_one:
+	bnez	a3, .Lreturn0
+
+	/* Divide by zero: Use an illegal instruction to force an exception.
+	   The subsequent "DIV0" string can be recognized by the exception
+	   handler to identify the real cause of the exception.  */
+	ill
+	.ascii	"DIV0"
+
+.Lreturn0:
+	movi	a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+	leaf_return
+	.size	__umodsi3, . - __umodsi3
+
+#endif /* L_umodsi3 */
+
+
+#ifdef L_modsi3
+	.align	4
+	.global	__modsi3
+	.type	__modsi3, @function
+__modsi3:
+	leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+	rems	a2, a2, a3
+#else
+	mov	a7, a2		/* save original (signed) dividend */
+	do_abs	a2, a2, a4	/* udividend = abs (dividend) */
+	do_abs	a3, a3, a4	/* udivisor = abs (divisor) */
+	bltui	a3, 2, .Lle_one	/* check if udivisor <= 1 */
+	do_nsau	a5, a2, a6, a8	/* udividend_shift = nsau (udividend) */
+	do_nsau	a4, a3, a6, a8	/* udivisor_shift = nsau (udivisor) */
+	bgeu	a5, a4, .Lspecial
+
+	sub	a4, a4, a5	/* count = udivisor_shift - udividend_shift */
+	ssl	a4
+	sll	a3, a3		/* udivisor <<= count */
+
+	/* test-subtract-and-shift loop */
+#if XCHAL_HAVE_LOOPS
+	loopnez	a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+	bltu	a2, a3, .Lzerobit
+	sub	a2, a2, a3
+.Lzerobit:
+	srli	a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+	addi	a4, a4, -1
+	bnez	a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+.Lspecial:
+	bltu	a2, a3, .Lreturn
+	sub	a2, a2, a3	/* subtract again if udividend >= udivisor */
+.Lreturn:
+	bgez	a7, .Lpositive
+	neg	a2, a2		/* if (dividend < 0), return -udividend */
+.Lpositive:
+	leaf_return
+
+.Lle_one:
+	bnez	a3, .Lreturn0
+
+	/* Divide by zero: Use an illegal instruction to force an exception.
+	   The subsequent "DIV0" string can be recognized by the exception
+	   handler to identify the real cause of the exception.  */
+	ill
+	.ascii	"DIV0"
+
+.Lreturn0:
+	movi	a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+	leaf_return
+	.size	__modsi3, . - __modsi3
+
+#endif /* L_modsi3 */
+
+
+#ifdef __XTENSA_EB__
+#define uh a2
+#define ul a3
+#else
+#define uh a3
+#define ul a2
+#endif /* __XTENSA_EB__ */
+
+
+#ifdef L_ashldi3
+	.align	4
+	.global	__ashldi3
+	.type	__ashldi3, @function
+__ashldi3:
+	leaf_entry sp, 16
+	ssl	a4
+	bgei	a4, 32, .Llow_only
+	src	uh, uh, ul
+	sll	ul, ul
+	leaf_return
+
+.Llow_only:
+	sll	uh, ul
+	movi	ul, 0
+	leaf_return
+	.size	__ashldi3, . - __ashldi3
+
+#endif /* L_ashldi3 */
+
+
+#ifdef L_ashrdi3
+	.align	4
+	.global	__ashrdi3
+	.type	__ashrdi3, @function
+__ashrdi3:
+	leaf_entry sp, 16
+	ssr	a4
+	bgei	a4, 32, .Lhigh_only
+	src	ul, uh, ul
+	sra	uh, uh
+	leaf_return
+
+.Lhigh_only:
+	sra	ul, uh
+	srai	uh, uh, 31
+	leaf_return
+	.size	__ashrdi3, . - __ashrdi3
+
+#endif /* L_ashrdi3 */
+
+
+#ifdef L_lshrdi3
+	.align	4
+	.global	__lshrdi3
+	.type	__lshrdi3, @function
+__lshrdi3:
+	leaf_entry sp, 16
+	ssr	a4
+	bgei	a4, 32, .Lhigh_only1
+	src	ul, uh, ul
+	srl	uh, uh
+	leaf_return
+
+.Lhigh_only1:
+	srl	ul, uh
+	movi	uh, 0
+	leaf_return
+	.size	__lshrdi3, . - __lshrdi3
+
+#endif /* L_lshrdi3 */
+
+
+#include "ieee754-df.S"
+#include "ieee754-sf.S"
diff --git a/libgcc/config/xtensa/t-xtensa b/libgcc/config/xtensa/t-xtensa
index 7d9e9db0487..5bcc0946243 100644
--- a/libgcc/config/xtensa/t-xtensa
+++ b/libgcc/config/xtensa/t-xtensa
@@ -1,2 +1,14 @@
+LIB1ASMSRC = xtensa/lib1funcs.S
+LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \
+	_umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \
+	_ashldi3 _ashrdi3 _lshrdi3 \
+	_negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \
+	_fixunssfsi _fixunssfdi _floatsisf _floatunsisf \
+	_floatdisf _floatundisf \
+	_negdf2 _addsubdf3 _muldf3 _divdf3 _cmpdf2 _fixdfsi _fixdfdi \
+	_fixunsdfsi _fixunsdfdi _floatsidf _floatunsidf \
+	_floatdidf _floatundidf \
+	_truncdfsf2 _extendsfdf2
+
 LIB2ADDEH = $(srcdir)/config/xtensa/unwind-dw2-xtensa.c \
    $(srcdir)/unwind-dw2-fde.c $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c