Move libgcc1 to toplevel libgcc

author Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>

Wed, 2 Nov 2011 15:03:19 +0000 (15:03 +0000)

committer Rainer Orth <ro@gcc.gnu.org>

Wed, 2 Nov 2011 15:03:19 +0000 (15:03 +0000)
author Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
Wed, 2 Nov 2011 15:03:19 +0000 (15:03 +0000)
committer Rainer Orth <ro@gcc.gnu.org>
Wed, 2 Nov 2011 15:03:19 +0000 (15:03 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 5c3a91da561c4c3b0180b8585866361b7572b786..071cce6c29c67dc2b9357cc099ae3165fded0b2e 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,109 @@
+2011-11-02  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
+
+       * Makefile.in (LIB1ASMSRC): Don't export.
+       (libgcc.mvars): Don't emit LIB1ASMFUNCS, LIB1ASMSRC.
+       * config/arm/arm.c: Update lib1funcs.asm filename.
+       * config/arm/linux-eabi.h: Likewise.
+       * config/arm/bpabi-v6m.S, config/arm/bpabi.S,
+       config/arm/ieee754-df.S, config/arm/ieee754-sf.S: Move to
+       ../libgcc/config/arm.
+       * config/arm/lib1funcs.asm: Move to ../libgcc/config/arm/lib1funcs.S.
+       * config/arm/t-arm (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+       * config/arm/t-arm-elf (LIB1ASMFUNCS): Remove.
+       * config/arm/t-bpabi: Likewise.
+       * config/arm/t-linux (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+       * config/arm/t-linux-eabi (LIB1ASMFUNCS): Remove.
+       * config/arm/t-strongarm-elf: Likewise.
+       * config/arm/t-symbian: Likewise.
+       * config/arm/t-vxworks: Likewise.
+       * config/arm/t-wince-pe: Likewise.
+       * config/avr/libgcc.S: Move to ../libgcc/config/avr.
+       * config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+       * config/bfin/lib1funcs.asm: Move to
+       ../libgcc/config/bfin/lib1funcs.S.
+       * config/bfin/t-bfin: Remove.
+       * config/bfin/t-bfin-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+       * config/bfin/t-bfin-linux: Likewise.
+       * config/bfin/t-bfin-uclinux: Likewise.
+       * config/c6x/lib1funcs.asm: Move to
+       ../libgcc/config/c6x/lib1funcs.S.
+       * config/c6x/t-c6x-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+       * config/fr30/lib1funcs.asm: Move to
+       ../libgcc/config/fr30/lib1funcs.S.
+       * config/fr30/t-fr30 (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+       * config/frv/lib1funcs.asm: Move to
+       ../libgcc/config/frv/lib1funcs.S.
+       * config/frv/t-frv (CROSS_LIBGCC1, LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+       * config/h8300/fixunssfsi.c: Update lib1funcs.asm filename.
+       * config/h8300/lib1funcs.asm: Move to
+       ../libgcc/config/h8300/lib1funcs.S.
+       * config/h8300/t-h8300 (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+       * config/i386/cygwin.asm: Move to ../libgcc/config/i386/cygwin.S.
+       * config/i386/t-cygming (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+       * config/i386/t-interix: Likewise.
+       * config/ia64/lib1funcs.asm: Move to
+       ../libgcc/config/ia64/lib1funcs.S.
+       * config/ia64/t-hpux (LIB1ASMFUNCS, LIBGCC1_TEST): Remove.
+       * config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+       * config/iq2000/t-iq2000 (LIBGCC1, CROSS_LIBGCC1): Remove.
+       * config/m32c/m32c.c: Update m32c-lib1.S filename.
+       * config/m32c/m32c-lib1.S: Move to ../libgcc/config/m32c/lib1funcs.S.
+       * config/m32c/t-m32c (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+       * config/m32r/t-linux (CROSS_LIBGCC1, LIBGCC1, LIBGCC1_TEST): Remove.
+       * config/m68k/lb1sf68.asm: Move to ../libgcc/config/m68k/lb1sf68.S.
+       * config/m68k/t-floatlib (LIB1ASMSRC, LIB1ASMFUNCS): New file.
+       * config/mcore/lib1.asm: Move to ../libgcc/config/mcore/lib1funcs.S.
+       * config/mcore/t-mcore (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+       * config/mep/mep-lib1.asm: Move to ../libgcc/config/mep/lib1funcs.S.
+       * config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+       * config/mips/mips16.S: Move to ../libgcc/config/mips.
+       * config/mips/t-libgcc-mips16: Remove.
+       * config/mips/t-sr71k (LIBGCC1, CROSS_LIBGCC1): Remove.
+       * config/pa/milli64.S: Move to ../libgcc/config/pa.
+       * config/pa/t-linux (LIB1ASMFUNCS, LIB1ASMSRC): Remove.
+       * config/pa/t-linux64: Likewise.
+       * config/picochip/libgccExtras/fake_libgcc.asm: Move to
+       ../libgcc/config/picochip/lib1funcs.S.
+       * config/picochip/t-picochip (LIB1ASMFUNCS, LIB1ASMSRC): Remove.
+       * config/sh/lib1funcs.asm: Move to ../libgcc/config/sh/lib1funcs.S.
+       * config/sh/lib1funcs.h: Move to ../libgcc/config/sh.
+       * config/sh/sh.h: Update lib1funcs.asm filename.
+       * config/sh/t-linux (LIB1ASMFUNCS_CACHE): Remove.
+       * config/sh/t-netbsd: Likewise.
+       * config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE):
+       Remove.
+       * config/sh/t-sh64 (LIB1ASMFUNCS): Remove.
+       * config/sparc/lb1spc.asm: Move to ../libgcc/config/sparc/lb1spc.S.
+       * config/sparc/lb1spl.asm: Remove.
+       * config/sparc/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+       * config/sparc/t-leon: Likewise.
+       * config/spu/t-spu-elf (LIBGCC1, CROSS_LIBGCC1): Remove.
+       * config/v850/lib1funcs.asm: Move to ../libgcc/config/v850/lib1funcs.S.
+       * config/v850/t-v850 (LIB1ASMSRC, LIB1ASMFUNCS): Remove
+       * config/vax/lib1funcs.asm: Move to ../libgcc/config/vax/lib1funcs.S.
+       * config/vax/t-linux: Remove.
+       * config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S: Move to
+       ../libgcc/config/xtensa.
+       * config/xtensa/lib1funcs.asm: Move to
+       ../libgcc/config/xtensa/lib1funcs.S.
+       * config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
+       * config.gcc (bfin*-rtems*): Remove bfin/t-bfin from tmake_file.
+       (bfin*-*): Likewise.
+       (mips64*-*-linux*, mipsisa64*-*-linux*): Remove
+       mips/t-libgcc-mips16 from tmake_file.
+       (mips*-*-linux*): Likewise.
+       (mips*-sde-elf*): Likewise.
+       (mipsisa32-*-elf*, mipsisa32el-*-elf*, mipsisa32r2-*-elf*)
+       (mipsisa32r2el-*-elf*, mipsisa64-*-elf*, mipsisa64el-*-elf*)
+       (mipsisa64r2-*-elf*, mipsisa64r2el-*-elf*): Likewise.
+       (mipsisa64sb1-*-elf*, mipsisa64sb1el-*-elf*): Likewise.
+       (mips-*-elf*, mipsel-*-elf*): Likewise.
+       (mips64-*-elf*, mips64el-*-elf*): Likewise.
+       (mips64orion-*-elf*, mips64orionel-*-elf*): Likewise.
+       (mips*-*-rtems*): Likewise.
+       (mipstx39-*-elf*, mipstx39el-*-elf*): Likewise.
+       (vax-*-linux*): Remove vax/t-linux from tmake_file.
+
  2011-11-02  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
  
         * config.gcc (extra_parts): Remove.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in

index b6951dc1486c99a7cf56be4e320e47d5a437da62..38449d7c30e8b3ca46b6cbe663168c83605949e9 100644 (file)
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1110,7 +1110,6 @@ export DESTDIR
  export GCC_FOR_TARGET
  export INCLUDES
  export INSTALL_DATA
-export LIB1ASMSRC
  export LIBGCC2_CFLAGS
  export LIPO_FOR_TARGET
  export MACHMODE_H
@@ -1878,8 +1877,6 @@ libgcc-support: libgcc.mvars stmp-int-hdrs $(TCONFIG_H) \
  libgcc.mvars: config.status Makefile $(LIB2ADD) $(LIB2ADD_ST) specs \
                 xgcc$(exeext)
         : > tmp-libgcc.mvars
-       echo LIB1ASMFUNCS = '$(LIB1ASMFUNCS)' >> tmp-libgcc.mvars
-       echo LIB1ASMSRC = '$(LIB1ASMSRC)' >> tmp-libgcc.mvars
         echo LIB2FUNCS_ST = '$(LIB2FUNCS_ST)' >> tmp-libgcc.mvars
         echo LIB2FUNCS_EXCLUDE = '$(LIB2FUNCS_EXCLUDE)' >> tmp-libgcc.mvars
         echo LIB2ADD = '$(call srcdirify,$(LIB2ADD))' >> tmp-libgcc.mvars
diff --git a/gcc/config.gcc b/gcc/config.gcc

index 6bbec7db39eb7d04453f19faabe3d04fb11e89be..79230a6935ac0f7f7e2a0de9b24c03361537b216 100644 (file)
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -950,11 +950,10 @@ bfin*-linux-uclibc*)
         ;;
  bfin*-rtems*)
         tm_file="${tm_file} dbxelf.h elfos.h bfin/elf.h bfin/rtems.h rtems.h newlib-stdint.h"
-       tmake_file="bfin/t-bfin t-rtems bfin/t-rtems"
+       tmake_file="t-rtems bfin/t-rtems"
         ;;
  bfin*-*)
         tm_file="${tm_file} dbxelf.h elfos.h newlib-stdint.h bfin/elf.h"
-       tmake_file=bfin/t-bfin
         use_collect2=no
         use_gcc_stdint=wrap
         ;;
@@ -1737,7 +1736,7 @@ mips*-*-netbsd*)                  # NetBSD/mips, either endian.
         ;;
  mips64*-*-linux* | mipsisa64*-*-linux*)
         tm_file="dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h ${tm_file} mips/gnu-user.h mips/gnu-user64.h mips/linux64.h"
-       tmake_file="${tmake_file} mips/t-linux64 mips/t-libgcc-mips16"
+       tmake_file="${tmake_file} mips/t-linux64"
         tm_defines="${tm_defines} MIPS_ABI_DEFAULT=ABI_N32"
         case ${target} in
                 mips64el-st-linux-gnu)
@@ -1758,7 +1757,6 @@ mips64*-*-linux* | mipsisa64*-*-linux*)
         ;;
  mips*-*-linux*)                                # Linux MIPS, either endian.
          tm_file="dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h ${tm_file} mips/gnu-user.h mips/linux.h"
-       tmake_file="${tmake_file} mips/t-libgcc-mips16"
         if test x$enable_targets = xall; then
                 tm_file="${tm_file} mips/gnu-user64.h mips/linux64.h"
                 tmake_file="${tmake_file} mips/t-linux64"
@@ -1785,7 +1783,7 @@ mips*-*-openbsd*)
         ;;
  mips*-sde-elf*)
         tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h mips/sde.h"
-       tmake_file="mips/t-sde mips/t-libgcc-mips16"
+       tmake_file="mips/t-sde"
         extra_options="${extra_options} mips/sde.opt"
         case "${with_newlib}" in
           yes)
@@ -1822,7 +1820,7 @@ mipsisa32r2-*-elf* | mipsisa32r2el-*-elf* | \
  mipsisa64-*-elf* | mipsisa64el-*-elf* | \
  mipsisa64r2-*-elf* | mipsisa64r2el-*-elf*)
         tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h"
-       tmake_file="mips/t-isa3264 mips/t-libgcc-mips16"
+       tmake_file="mips/t-isa3264"
         case ${target} in
           mipsisa32r2*)
             tm_defines="${tm_defines} MIPS_ISA_DEFAULT=33"
@@ -1859,17 +1857,17 @@ mipsisa64sr71k-*-elf*)
          ;;
  mipsisa64sb1-*-elf* | mipsisa64sb1el-*-elf*)
         tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h"
-       tmake_file="mips/t-elf mips/t-libgcc-mips16 mips/t-sb1"
+       tmake_file="mips/t-elf mips/t-sb1"
         target_cpu_default="MASK_64BIT|MASK_FLOAT64"
         tm_defines="${tm_defines} MIPS_ISA_DEFAULT=64 MIPS_CPU_STRING_DEFAULT=\\\"sb1\\\" MIPS_ABI_DEFAULT=ABI_O64"
         ;;
  mips-*-elf* | mipsel-*-elf*)
         tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h"
-       tmake_file="mips/t-elf mips/t-libgcc-mips16"
+       tmake_file="mips/t-elf"
         ;;
  mips64-*-elf* | mips64el-*-elf*)
         tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h"
-       tmake_file="mips/t-elf mips/t-libgcc-mips16"
+       tmake_file="mips/t-elf"
         target_cpu_default="MASK_64BIT|MASK_FLOAT64"
         tm_defines="${tm_defines} MIPS_ISA_DEFAULT=3 MIPS_ABI_DEFAULT=ABI_O64"
         ;;
@@ -1880,13 +1878,13 @@ mips64vr-*-elf* | mips64vrel-*-elf*)
          ;;
  mips64orion-*-elf* | mips64orionel-*-elf*)
         tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elforion.h mips/elf.h"
-       tmake_file="mips/t-elf mips/t-libgcc-mips16"
+       tmake_file="mips/t-elf"
         target_cpu_default="MASK_64BIT|MASK_FLOAT64"
         tm_defines="${tm_defines} MIPS_ISA_DEFAULT=3 MIPS_ABI_DEFAULT=ABI_O64"
         ;;
  mips*-*-rtems*)
         tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h mips/rtems.h rtems.h"
-       tmake_file="mips/t-elf mips/t-libgcc-mips16 t-rtems mips/t-rtems"
+       tmake_file="mips/t-elf t-rtems mips/t-rtems"
         ;;
  mips-wrs-vxworks)
         tm_file="elfos.h ${tm_file} mips/elf.h vx-common.h vxworks.h mips/vxworks.h"
@@ -1894,7 +1892,7 @@ mips-wrs-vxworks)
         ;;
  mipstx39-*-elf* | mipstx39el-*-elf*)
         tm_file="elfos.h newlib-stdint.h ${tm_file} mips/r3900.h mips/elf.h"
-       tmake_file="mips/t-r3900 mips/t-libgcc-mips16"
+       tmake_file="mips/t-r3900"
         ;;
  mmix-knuth-mmixware)
         tm_file="${tm_file} newlib-stdint.h"
@@ -2511,7 +2509,6 @@ v850*-*-*)
  vax-*-linux*)
         tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h vax/elf.h vax/linux.h"
         extra_options="${extra_options} vax/elf.opt"
-       tmake_file="${tmake_file} vax/t-linux"
         ;;
  vax-*-netbsdelf*)
         tm_file="${tm_file} elfos.h netbsd.h netbsd-elf.h vax/elf.h vax/netbsd-elf.h"
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c

index e07c8c328c612c603249496d022a965a84a0dd48..5f0d5629462f6aebfd48e5bf85a2729c4b134f44 100644 (file)
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -23495,7 +23495,7 @@ arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
  
  /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
     ARM insns and therefore guarantee that the shift count is modulo 256.
-   DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
+   DImode shifts (those implemented by lib1funcs.S or by optabs.c)
     guarantee no particular behavior for out-of-range counts.  */
  
  static unsigned HOST_WIDE_INT
diff --git a/gcc/config/arm/bpabi-v6m.S b/gcc/config/arm/bpabi-v6m.S

deleted file mode 100644 (file)

index 4ecea6d..0000000
--- a/gcc/config/arm/bpabi-v6m.S
+++ /dev/null
@@ -1,318 +0,0 @@
-/* Miscellaneous BPABI functions.  ARMv6M implementation
-
-   Copyright (C) 2006, 2008, 2009, 2010  Free Software Foundation, Inc.
-   Contributed by CodeSourcery.
-
-   This file is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by the
-   Free Software Foundation; either version 3, or (at your option) any
-   later version.
-
-   This file is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifdef __ARM_EABI__
-/* Some attributes that are common to all routines in this file.  */
-       /* Tag_ABI_align_needed: This code does not require 8-byte
-          alignment from the caller.  */
-       /* .eabi_attribute 24, 0  -- default setting.  */
-       /* Tag_ABI_align_preserved: This code preserves 8-byte
-          alignment in any callee.  */
-       .eabi_attribute 25, 1
-#endif /* __ARM_EABI__ */
-
-#ifdef L_aeabi_lcmp
-
-FUNC_START aeabi_lcmp
-       cmp     xxh, yyh
-       beq     1f
-       bgt     2f
-       mov     r0, #1
-       neg     r0, r0
-       RET
-2:
-       mov     r0, #1
-       RET
-1:
-       sub     r0, xxl, yyl
-       beq     1f
-       bhi     2f
-       mov     r0, #1
-       neg     r0, r0
-       RET
-2:
-       mov     r0, #1
-1:
-       RET
-       FUNC_END aeabi_lcmp
-
-#endif /* L_aeabi_lcmp */
-       
-#ifdef L_aeabi_ulcmp
-
-FUNC_START aeabi_ulcmp
-       cmp     xxh, yyh
-       bne     1f
-       sub     r0, xxl, yyl
-       beq     2f
-1:
-       bcs     1f
-       mov     r0, #1
-       neg     r0, r0
-       RET
-1:
-       mov     r0, #1
-2:
-       RET
-       FUNC_END aeabi_ulcmp
-
-#endif /* L_aeabi_ulcmp */
-
-.macro test_div_by_zero signed
-       cmp     yyh, #0
-       bne     7f
-       cmp     yyl, #0
-       bne     7f
-       cmp     xxh, #0
-       bne     2f
-       cmp     xxl, #0
-2:
-       .ifc    \signed, unsigned
-       beq     3f
-       mov     xxh, #0
-       mvn     xxh, xxh                @ 0xffffffff
-       mov     xxl, xxh
-3:
-       .else
-       beq     5f
-       blt     6f
-       mov     xxl, #0
-       mvn     xxl, xxl                @ 0xffffffff
-       lsr     xxh, xxl, #1            @ 0x7fffffff
-       b       5f
-6:     mov     xxh, #0x80
-       lsl     xxh, xxh, #24           @ 0x80000000
-       mov     xxl, #0
-5:
-       .endif
-       @ tailcalls are tricky on v6-m.
-       push    {r0, r1, r2}
-       ldr     r0, 1f
-       adr     r1, 1f
-       add     r0, r1
-       str     r0, [sp, #8]
-       @ We know we are not on armv4t, so pop pc is safe.
-       pop     {r0, r1, pc}
-       .align  2
-1:
-       .word   __aeabi_ldiv0 - 1b
-7:
-.endm
-
-#ifdef L_aeabi_ldivmod
-
-FUNC_START aeabi_ldivmod
-       test_div_by_zero signed
-
-       push {r0, r1}
-       mov r0, sp
-       push {r0, lr}
-       ldr r0, [sp, #8]
-       bl SYM(__gnu_ldivmod_helper)
-       ldr r3, [sp, #4]
-       mov lr, r3
-       add sp, sp, #8
-       pop {r2, r3}
-       RET
-       FUNC_END aeabi_ldivmod
-
-#endif /* L_aeabi_ldivmod */
-
-#ifdef L_aeabi_uldivmod
-
-FUNC_START aeabi_uldivmod
-       test_div_by_zero unsigned
-
-       push {r0, r1}
-       mov r0, sp
-       push {r0, lr}
-       ldr r0, [sp, #8]
-       bl SYM(__gnu_uldivmod_helper)
-       ldr r3, [sp, #4]
-       mov lr, r3
-       add sp, sp, #8
-       pop {r2, r3}
-       RET
-       FUNC_END aeabi_uldivmod
-       
-#endif /* L_aeabi_uldivmod */
-
-#ifdef L_arm_addsubsf3
-
-FUNC_START aeabi_frsub
-
-      push     {r4, lr}
-      mov      r4, #1
-      lsl      r4, #31
-      eor      r0, r0, r4
-      bl       __aeabi_fadd
-      pop      {r4, pc}
-
-      FUNC_END aeabi_frsub
-
-#endif /* L_arm_addsubsf3 */
-
-#ifdef L_arm_cmpsf2
-
-FUNC_START aeabi_cfrcmple
-
-       mov     ip, r0
-       mov     r0, r1
-       mov     r1, ip
-       b       6f
-
-FUNC_START aeabi_cfcmpeq
-FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
-
-       @ The status-returning routines are required to preserve all
-       @ registers except ip, lr, and cpsr.
-6:     push    {r0, r1, r2, r3, r4, lr}
-       bl      __lesf2
-       @ Set the Z flag correctly, and the C flag unconditionally.
-       cmp     r0, #0
-       @ Clear the C flag if the return value was -1, indicating
-       @ that the first operand was smaller than the second.
-       bmi 1f
-       mov     r1, #0
-       cmn     r0, r1
-1:
-       pop     {r0, r1, r2, r3, r4, pc}
-
-       FUNC_END aeabi_cfcmple
-       FUNC_END aeabi_cfcmpeq
-       FUNC_END aeabi_cfrcmple
-
-FUNC_START     aeabi_fcmpeq
-
-       push    {r4, lr}
-       bl      __eqsf2
-       neg     r0, r0
-       add     r0, r0, #1
-       pop     {r4, pc}
-
-       FUNC_END aeabi_fcmpeq
-
-.macro COMPARISON cond, helper, mode=sf2
-FUNC_START     aeabi_fcmp\cond
-
-       push    {r4, lr}
-       bl      __\helper\mode
-       cmp     r0, #0
-       b\cond  1f
-       mov     r0, #0
-       pop     {r4, pc}
-1:
-       mov     r0, #1
-       pop     {r4, pc}
-
-       FUNC_END aeabi_fcmp\cond
-.endm
-
-COMPARISON lt, le
-COMPARISON le, le
-COMPARISON gt, ge
-COMPARISON ge, ge
-
-#endif /* L_arm_cmpsf2 */
-
-#ifdef L_arm_addsubdf3
-
-FUNC_START aeabi_drsub
-
-      push     {r4, lr}
-      mov      r4, #1
-      lsl      r4, #31
-      eor      xxh, xxh, r4
-      bl       __aeabi_dadd
-      pop      {r4, pc}
-
-      FUNC_END aeabi_drsub
-
-#endif /* L_arm_addsubdf3 */
-
-#ifdef L_arm_cmpdf2
-
-FUNC_START aeabi_cdrcmple
-
-       mov     ip, r0
-       mov     r0, r2
-       mov     r2, ip
-       mov     ip, r1
-       mov     r1, r3
-       mov     r3, ip
-       b       6f
-
-FUNC_START aeabi_cdcmpeq
-FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
-
-       @ The status-returning routines are required to preserve all
-       @ registers except ip, lr, and cpsr.
-6:     push    {r0, r1, r2, r3, r4, lr}
-       bl      __ledf2
-       @ Set the Z flag correctly, and the C flag unconditionally.
-       cmp     r0, #0
-       @ Clear the C flag if the return value was -1, indicating
-       @ that the first operand was smaller than the second.
-       bmi 1f
-       mov     r1, #0
-       cmn     r0, r1
-1:
-       pop     {r0, r1, r2, r3, r4, pc}
-
-       FUNC_END aeabi_cdcmple
-       FUNC_END aeabi_cdcmpeq
-       FUNC_END aeabi_cdrcmple
-
-FUNC_START     aeabi_dcmpeq
-
-       push    {r4, lr}
-       bl      __eqdf2
-       neg     r0, r0
-       add     r0, r0, #1
-       pop     {r4, pc}
-
-       FUNC_END aeabi_dcmpeq
-
-.macro COMPARISON cond, helper, mode=df2
-FUNC_START     aeabi_dcmp\cond
-
-       push    {r4, lr}
-       bl      __\helper\mode
-       cmp     r0, #0
-       b\cond  1f
-       mov     r0, #0
-       pop     {r4, pc}
-1:
-       mov     r0, #1
-       pop     {r4, pc}
-
-       FUNC_END aeabi_dcmp\cond
-.endm
-
-COMPARISON lt, le
-COMPARISON le, le
-COMPARISON gt, ge
-COMPARISON ge, ge
-
-#endif /* L_arm_cmpdf2 */
diff --git a/gcc/config/arm/bpabi.S b/gcc/config/arm/bpabi.S

deleted file mode 100644 (file)

index 2ff3389..0000000
--- a/gcc/config/arm/bpabi.S
+++ /dev/null
@@ -1,163 +0,0 @@
-/* Miscellaneous BPABI functions.
-
-   Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010
-   Free Software Foundation, Inc.
-   Contributed by CodeSourcery, LLC.
-
-   This file is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by the
-   Free Software Foundation; either version 3, or (at your option) any
-   later version.
-
-   This file is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifdef __ARM_EABI__
-/* Some attributes that are common to all routines in this file.  */
-       /* Tag_ABI_align_needed: This code does not require 8-byte
-          alignment from the caller.  */
-       /* .eabi_attribute 24, 0  -- default setting.  */
-       /* Tag_ABI_align_preserved: This code preserves 8-byte
-          alignment in any callee.  */
-       .eabi_attribute 25, 1
-#endif /* __ARM_EABI__ */
-
-#ifdef L_aeabi_lcmp
-
-ARM_FUNC_START aeabi_lcmp
-       cmp     xxh, yyh
-       do_it   lt
-       movlt   r0, #-1
-       do_it   gt
-       movgt   r0, #1
-       do_it   ne
-       RETc(ne)
-       subs    r0, xxl, yyl
-       do_it   lo
-       movlo   r0, #-1
-       do_it   hi
-       movhi   r0, #1
-       RET
-       FUNC_END aeabi_lcmp
-
-#endif /* L_aeabi_lcmp */
-       
-#ifdef L_aeabi_ulcmp
-
-ARM_FUNC_START aeabi_ulcmp
-       cmp     xxh, yyh
-       do_it   lo
-       movlo   r0, #-1
-       do_it   hi
-       movhi   r0, #1
-       do_it   ne
-       RETc(ne)
-       cmp     xxl, yyl
-       do_it   lo
-       movlo   r0, #-1
-       do_it   hi
-       movhi   r0, #1
-       do_it   eq
-       moveq   r0, #0
-       RET
-       FUNC_END aeabi_ulcmp
-
-#endif /* L_aeabi_ulcmp */
-
-.macro test_div_by_zero signed
-/* Tail-call to divide-by-zero handlers which may be overridden by the user,
-   so unwinding works properly.  */
-#if defined(__thumb2__)
-       cbnz    yyh, 1f
-       cbnz    yyl, 1f
-       cmp     xxh, #0
-       do_it   eq
-       cmpeq   xxl, #0
-       .ifc \signed, unsigned
-       beq     2f
-       mov     xxh, #0xffffffff
-       mov     xxl, xxh
-2:
-       .else
-       do_it   lt, t
-       movlt   xxl, #0
-       movlt   xxh, #0x80000000
-       do_it   gt, t
-       movgt   xxh, #0x7fffffff
-       movgt   xxl, #0xffffffff
-       .endif
-       b       SYM (__aeabi_ldiv0) __PLT__
-1:
-#else
-       /* Note: Thumb-1 code calls via an ARM shim on processors which
-          support ARM mode.  */
-       cmp     yyh, #0
-       cmpeq   yyl, #0
-       bne     2f
-       cmp     xxh, #0
-       cmpeq   xxl, #0
-       .ifc \signed, unsigned
-       movne   xxh, #0xffffffff
-       movne   xxl, #0xffffffff
-       .else
-       movlt   xxh, #0x80000000
-       movlt   xxl, #0
-       movgt   xxh, #0x7fffffff
-       movgt   xxl, #0xffffffff
-       .endif
-       b       SYM (__aeabi_ldiv0) __PLT__
-2:
-#endif
-.endm
-
-#ifdef L_aeabi_ldivmod
-
-ARM_FUNC_START aeabi_ldivmod
-       test_div_by_zero signed
-
-       sub sp, sp, #8
-#if defined(__thumb2__)
-       mov ip, sp
-       push {ip, lr}
-#else
-       do_push {sp, lr}
-#endif
-       bl SYM(__gnu_ldivmod_helper) __PLT__
-       ldr lr, [sp, #4]
-       add sp, sp, #8
-       do_pop {r2, r3}
-       RET
-       
-#endif /* L_aeabi_ldivmod */
-
-#ifdef L_aeabi_uldivmod
-
-ARM_FUNC_START aeabi_uldivmod
-       test_div_by_zero unsigned
-
-       sub sp, sp, #8
-#if defined(__thumb2__)
-       mov ip, sp
-       push {ip, lr}
-#else
-       do_push {sp, lr}
-#endif
-       bl SYM(__gnu_uldivmod_helper) __PLT__
-       ldr lr, [sp, #4]
-       add sp, sp, #8
-       do_pop {r2, r3}
-       RET
-       
-#endif /* L_aeabi_divmod */
-       
diff --git a/gcc/config/arm/ieee754-df.S b/gcc/config/arm/ieee754-df.S

deleted file mode 100644 (file)

index eb0c386..0000000
--- a/gcc/config/arm/ieee754-df.S
+++ /dev/null
@@ -1,1447 +0,0 @@
-/* ieee754-df.S double-precision floating point support for ARM
-
-   Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009  Free Software Foundation, Inc.
-   Contributed by Nicolas Pitre (nico@cam.org)
-
-   This file is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by the
-   Free Software Foundation; either version 3, or (at your option) any
-   later version.
-
-   This file is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/*
- * Notes: 
- * 
- * The goal of this code is to be as fast as possible.  This is
- * not meant to be easy to understand for the casual reader.
- * For slightly simpler code please see the single precision version
- * of this file.
- * 
- * Only the default rounding mode is intended for best performances.
- * Exceptions aren't supported yet, but that can be added quite easily
- * if necessary without impacting performances.
- */
-
-
-@ For FPA, float words are always big-endian.
-@ For VFP, floats words follow the memory system mode.
-#if defined(__VFP_FP__) && !defined(__ARMEB__)
-#define xl r0
-#define xh r1
-#define yl r2
-#define yh r3
-#else
-#define xh r0
-#define xl r1
-#define yh r2
-#define yl r3
-#endif
-
-
-#ifdef L_arm_negdf2
-
-ARM_FUNC_START negdf2
-ARM_FUNC_ALIAS aeabi_dneg negdf2
-
-       @ flip sign bit
-       eor     xh, xh, #0x80000000
-       RET
-
-       FUNC_END aeabi_dneg
-       FUNC_END negdf2
-
-#endif
-
-#ifdef L_arm_addsubdf3
-
-ARM_FUNC_START aeabi_drsub
-
-       eor     xh, xh, #0x80000000     @ flip sign bit of first arg
-       b       1f      
-
-ARM_FUNC_START subdf3
-ARM_FUNC_ALIAS aeabi_dsub subdf3
-
-       eor     yh, yh, #0x80000000     @ flip sign bit of second arg
-#if defined(__INTERWORKING_STUBS__)
-       b       1f                      @ Skip Thumb-code prologue
-#endif
-
-ARM_FUNC_START adddf3
-ARM_FUNC_ALIAS aeabi_dadd adddf3
-
-1:     do_push {r4, r5, lr}
-
-       @ Look for zeroes, equal values, INF, or NAN.
-       shift1  lsl, r4, xh, #1
-       shift1  lsl, r5, yh, #1
-       teq     r4, r5
-       do_it   eq
-       teqeq   xl, yl
-       do_it   ne, ttt
-       COND(orr,s,ne)  ip, r4, xl
-       COND(orr,s,ne)  ip, r5, yl
-       COND(mvn,s,ne)  ip, r4, asr #21
-       COND(mvn,s,ne)  ip, r5, asr #21
-       beq     LSYM(Lad_s)
-
-       @ Compute exponent difference.  Make largest exponent in r4,
-       @ corresponding arg in xh-xl, and positive exponent difference in r5.
-       shift1  lsr, r4, r4, #21
-       rsbs    r5, r4, r5, lsr #21
-       do_it   lt
-       rsblt   r5, r5, #0
-       ble     1f
-       add     r4, r4, r5
-       eor     yl, xl, yl
-       eor     yh, xh, yh
-       eor     xl, yl, xl
-       eor     xh, yh, xh
-       eor     yl, xl, yl
-       eor     yh, xh, yh
-1:
-       @ If exponent difference is too large, return largest argument
-       @ already in xh-xl.  We need up to 54 bit to handle proper rounding
-       @ of 0x1p54 - 1.1.
-       cmp     r5, #54
-       do_it   hi
-       RETLDM  "r4, r5" hi
-
-       @ Convert mantissa to signed integer.
-       tst     xh, #0x80000000
-       mov     xh, xh, lsl #12
-       mov     ip, #0x00100000
-       orr     xh, ip, xh, lsr #12
-       beq     1f
-#if defined(__thumb2__)
-       negs    xl, xl
-       sbc     xh, xh, xh, lsl #1
-#else
-       rsbs    xl, xl, #0
-       rsc     xh, xh, #0
-#endif
-1:
-       tst     yh, #0x80000000
-       mov     yh, yh, lsl #12
-       orr     yh, ip, yh, lsr #12
-       beq     1f
-#if defined(__thumb2__)
-       negs    yl, yl
-       sbc     yh, yh, yh, lsl #1
-#else
-       rsbs    yl, yl, #0
-       rsc     yh, yh, #0
-#endif
-1:
-       @ If exponent == difference, one or both args were denormalized.
-       @ Since this is not common case, rescale them off line.
-       teq     r4, r5
-       beq     LSYM(Lad_d)
-LSYM(Lad_x):
-
-       @ Compensate for the exponent overlapping the mantissa MSB added later
-       sub     r4, r4, #1
-
-       @ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip.
-       rsbs    lr, r5, #32
-       blt     1f
-       shift1  lsl, ip, yl, lr
-       shiftop adds xl xl yl lsr r5 yl
-       adc     xh, xh, #0
-       shiftop adds xl xl yh lsl lr yl
-       shiftop adcs xh xh yh asr r5 yh
-       b       2f
-1:     sub     r5, r5, #32
-       add     lr, lr, #32
-       cmp     yl, #1
-       shift1  lsl,ip, yh, lr
-       do_it   cs
-       orrcs   ip, ip, #2              @ 2 not 1, to allow lsr #1 later
-       shiftop adds xl xl yh asr r5 yh
-       adcs    xh, xh, yh, asr #31
-2:
-       @ We now have a result in xh-xl-ip.
-       @ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above)
-       and     r5, xh, #0x80000000
-       bpl     LSYM(Lad_p)
-#if defined(__thumb2__)
-       mov     lr, #0
-       negs    ip, ip
-       sbcs    xl, lr, xl
-       sbc     xh, lr, xh
-#else
-       rsbs    ip, ip, #0
-       rscs    xl, xl, #0
-       rsc     xh, xh, #0
-#endif
-
-       @ Determine how to normalize the result.
-LSYM(Lad_p):
-       cmp     xh, #0x00100000
-       bcc     LSYM(Lad_a)
-       cmp     xh, #0x00200000
-       bcc     LSYM(Lad_e)
-
-       @ Result needs to be shifted right.
-       movs    xh, xh, lsr #1
-       movs    xl, xl, rrx
-       mov     ip, ip, rrx
-       add     r4, r4, #1
-
-       @ Make sure we did not bust our exponent.
-       mov     r2, r4, lsl #21
-       cmn     r2, #(2 << 21)
-       bcs     LSYM(Lad_o)
-
-       @ Our result is now properly aligned into xh-xl, remaining bits in ip.
-       @ Round with MSB of ip. If halfway between two numbers, round towards
-       @ LSB of xl = 0.
-       @ Pack final result together.
-LSYM(Lad_e):
-       cmp     ip, #0x80000000
-       do_it   eq
-       COND(mov,s,eq)  ip, xl, lsr #1
-       adcs    xl, xl, #0
-       adc     xh, xh, r4, lsl #20
-       orr     xh, xh, r5
-       RETLDM  "r4, r5"
-
-       @ Result must be shifted left and exponent adjusted.
-LSYM(Lad_a):
-       movs    ip, ip, lsl #1
-       adcs    xl, xl, xl
-       adc     xh, xh, xh
-       tst     xh, #0x00100000
-       sub     r4, r4, #1
-       bne     LSYM(Lad_e)
-
-       @ No rounding necessary since ip will always be 0 at this point.
-LSYM(Lad_l):
-
-#if __ARM_ARCH__ < 5
-
-       teq     xh, #0
-       movne   r3, #20
-       moveq   r3, #52
-       moveq   xh, xl
-       moveq   xl, #0
-       mov     r2, xh
-       cmp     r2, #(1 << 16)
-       movhs   r2, r2, lsr #16
-       subhs   r3, r3, #16
-       cmp     r2, #(1 << 8)
-       movhs   r2, r2, lsr #8
-       subhs   r3, r3, #8
-       cmp     r2, #(1 << 4)
-       movhs   r2, r2, lsr #4
-       subhs   r3, r3, #4
-       cmp     r2, #(1 << 2)
-       subhs   r3, r3, #2
-       sublo   r3, r3, r2, lsr #1
-       sub     r3, r3, r2, lsr #3
-
-#else
-
-       teq     xh, #0
-       do_it   eq, t
-       moveq   xh, xl
-       moveq   xl, #0
-       clz     r3, xh
-       do_it   eq
-       addeq   r3, r3, #32
-       sub     r3, r3, #11
-
-#endif
-
-       @ determine how to shift the value.
-       subs    r2, r3, #32
-       bge     2f
-       adds    r2, r2, #12
-       ble     1f
-
-       @ shift value left 21 to 31 bits, or actually right 11 to 1 bits
-       @ since a register switch happened above.
-       add     ip, r2, #20
-       rsb     r2, r2, #12
-       shift1  lsl, xl, xh, ip
-       shift1  lsr, xh, xh, r2
-       b       3f
-
-       @ actually shift value left 1 to 20 bits, which might also represent
-       @ 32 to 52 bits if counting the register switch that happened earlier.
-1:     add     r2, r2, #20
-2:     do_it   le
-       rsble   ip, r2, #32
-       shift1  lsl, xh, xh, r2
-#if defined(__thumb2__)
-       lsr     ip, xl, ip
-       itt     le
-       orrle   xh, xh, ip
-       lslle   xl, xl, r2
-#else
-       orrle   xh, xh, xl, lsr ip
-       movle   xl, xl, lsl r2
-#endif
-
-       @ adjust exponent accordingly.
-3:     subs    r4, r4, r3
-       do_it   ge, tt
-       addge   xh, xh, r4, lsl #20
-       orrge   xh, xh, r5
-       RETLDM  "r4, r5" ge
-
-       @ Exponent too small, denormalize result.
-       @ Find out proper shift value.
-       mvn     r4, r4
-       subs    r4, r4, #31
-       bge     2f
-       adds    r4, r4, #12
-       bgt     1f
-
-       @ shift result right of 1 to 20 bits, sign is in r5.
-       add     r4, r4, #20
-       rsb     r2, r4, #32
-       shift1  lsr, xl, xl, r4
-       shiftop orr xl xl xh lsl r2 yh
-       shiftop orr xh r5 xh lsr r4 yh
-       RETLDM  "r4, r5"
-
-       @ shift result right of 21 to 31 bits, or left 11 to 1 bits after
-       @ a register switch from xh to xl.
-1:     rsb     r4, r4, #12
-       rsb     r2, r4, #32
-       shift1  lsr, xl, xl, r2
-       shiftop orr xl xl xh lsl r4 yh
-       mov     xh, r5
-       RETLDM  "r4, r5"
-
-       @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
-       @ from xh to xl.
-2:     shift1  lsr, xl, xh, r4
-       mov     xh, r5
-       RETLDM  "r4, r5"
-
-       @ Adjust exponents for denormalized arguments.
-       @ Note that r4 must not remain equal to 0.
-LSYM(Lad_d):
-       teq     r4, #0
-       eor     yh, yh, #0x00100000
-       do_it   eq, te
-       eoreq   xh, xh, #0x00100000
-       addeq   r4, r4, #1
-       subne   r5, r5, #1
-       b       LSYM(Lad_x)
-
-
-LSYM(Lad_s):
-       mvns    ip, r4, asr #21
-       do_it   ne
-       COND(mvn,s,ne)  ip, r5, asr #21
-       beq     LSYM(Lad_i)
-
-       teq     r4, r5
-       do_it   eq
-       teqeq   xl, yl
-       beq     1f
-
-       @ Result is x + 0.0 = x or 0.0 + y = y.
-       orrs    ip, r4, xl
-       do_it   eq, t
-       moveq   xh, yh
-       moveq   xl, yl
-       RETLDM  "r4, r5"
-
-1:     teq     xh, yh
-
-       @ Result is x - x = 0.
-       do_it   ne, tt
-       movne   xh, #0
-       movne   xl, #0
-       RETLDM  "r4, r5" ne
-
-       @ Result is x + x = 2x.
-       movs    ip, r4, lsr #21
-       bne     2f
-       movs    xl, xl, lsl #1
-       adcs    xh, xh, xh
-       do_it   cs
-       orrcs   xh, xh, #0x80000000
-       RETLDM  "r4, r5"
-2:     adds    r4, r4, #(2 << 21)
-       do_it   cc, t
-       addcc   xh, xh, #(1 << 20)
-       RETLDM  "r4, r5" cc
-       and     r5, xh, #0x80000000
-
-       @ Overflow: return INF.
-LSYM(Lad_o):
-       orr     xh, r5, #0x7f000000
-       orr     xh, xh, #0x00f00000
-       mov     xl, #0
-       RETLDM  "r4, r5"
-
-       @ At least one of x or y is INF/NAN.
-       @   if xh-xl != INF/NAN: return yh-yl (which is INF/NAN)
-       @   if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
-       @   if either is NAN: return NAN
-       @   if opposite sign: return NAN
-       @   otherwise return xh-xl (which is INF or -INF)
-LSYM(Lad_i):
-       mvns    ip, r4, asr #21
-       do_it   ne, te
-       movne   xh, yh
-       movne   xl, yl
-       COND(mvn,s,eq)  ip, r5, asr #21
-       do_it   ne, t
-       movne   yh, xh
-       movne   yl, xl
-       orrs    r4, xl, xh, lsl #12
-       do_it   eq, te
-       COND(orr,s,eq)  r5, yl, yh, lsl #12
-       teqeq   xh, yh
-       orrne   xh, xh, #0x00080000     @ quiet NAN
-       RETLDM  "r4, r5"
-
-       FUNC_END aeabi_dsub
-       FUNC_END subdf3
-       FUNC_END aeabi_dadd
-       FUNC_END adddf3
-
-ARM_FUNC_START floatunsidf
-ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
-
-       teq     r0, #0
-       do_it   eq, t
-       moveq   r1, #0
-       RETc(eq)
-       do_push {r4, r5, lr}
-       mov     r4, #0x400              @ initial exponent
-       add     r4, r4, #(52-1 - 1)
-       mov     r5, #0                  @ sign bit is 0
-       .ifnc   xl, r0
-       mov     xl, r0
-       .endif
-       mov     xh, #0
-       b       LSYM(Lad_l)
-
-       FUNC_END aeabi_ui2d
-       FUNC_END floatunsidf
-
-ARM_FUNC_START floatsidf
-ARM_FUNC_ALIAS aeabi_i2d floatsidf
-
-       teq     r0, #0
-       do_it   eq, t
-       moveq   r1, #0
-       RETc(eq)
-       do_push {r4, r5, lr}
-       mov     r4, #0x400              @ initial exponent
-       add     r4, r4, #(52-1 - 1)
-       ands    r5, r0, #0x80000000     @ sign bit in r5
-       do_it   mi
-       rsbmi   r0, r0, #0              @ absolute value
-       .ifnc   xl, r0
-       mov     xl, r0
-       .endif
-       mov     xh, #0
-       b       LSYM(Lad_l)
-
-       FUNC_END aeabi_i2d
-       FUNC_END floatsidf
-
-ARM_FUNC_START extendsfdf2
-ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
-
-       movs    r2, r0, lsl #1          @ toss sign bit
-       mov     xh, r2, asr #3          @ stretch exponent
-       mov     xh, xh, rrx             @ retrieve sign bit
-       mov     xl, r2, lsl #28         @ retrieve remaining bits
-       do_it   ne, ttt
-       COND(and,s,ne)  r3, r2, #0xff000000     @ isolate exponent
-       teqne   r3, #0xff000000         @ if not 0, check if INF or NAN
-       eorne   xh, xh, #0x38000000     @ fixup exponent otherwise.
-       RETc(ne)                        @ and return it.
-
-       teq     r2, #0                  @ if actually 0
-       do_it   ne, e
-       teqne   r3, #0xff000000         @ or INF or NAN
-       RETc(eq)                        @ we are done already.
-
-       @ value was denormalized.  We can normalize it now.
-       do_push {r4, r5, lr}
-       mov     r4, #0x380              @ setup corresponding exponent
-       and     r5, xh, #0x80000000     @ move sign bit in r5
-       bic     xh, xh, #0x80000000
-       b       LSYM(Lad_l)
-
-       FUNC_END aeabi_f2d
-       FUNC_END extendsfdf2
-
-ARM_FUNC_START floatundidf
-ARM_FUNC_ALIAS aeabi_ul2d floatundidf
-
-       orrs    r2, r0, r1
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-       do_it   eq, t
-       mvfeqd  f0, #0.0
-#else
-       do_it   eq
-#endif
-       RETc(eq)
-
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-       @ For hard FPA code we want to return via the tail below so that
-       @ we can return the result in f0 as well as in r0/r1 for backwards
-       @ compatibility.
-       adr     ip, LSYM(f0_ret)
-       @ Push pc as well so that RETLDM works correctly.
-       do_push {r4, r5, ip, lr, pc}
-#else
-       do_push {r4, r5, lr}
-#endif
-
-       mov     r5, #0
-       b       2f
-
-ARM_FUNC_START floatdidf
-ARM_FUNC_ALIAS aeabi_l2d floatdidf
-
-       orrs    r2, r0, r1
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-       do_it   eq, t
-       mvfeqd  f0, #0.0
-#else
-       do_it   eq
-#endif
-       RETc(eq)
-
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-       @ For hard FPA code we want to return via the tail below so that
-       @ we can return the result in f0 as well as in r0/r1 for backwards
-       @ compatibility.
-       adr     ip, LSYM(f0_ret)
-       @ Push pc as well so that RETLDM works correctly.
-       do_push {r4, r5, ip, lr, pc}
-#else
-       do_push {r4, r5, lr}
-#endif
-
-       ands    r5, ah, #0x80000000     @ sign bit in r5
-       bpl     2f
-#if defined(__thumb2__)
-       negs    al, al
-       sbc     ah, ah, ah, lsl #1
-#else
-       rsbs    al, al, #0
-       rsc     ah, ah, #0
-#endif
-2:
-       mov     r4, #0x400              @ initial exponent
-       add     r4, r4, #(52-1 - 1)
-
-       @ FPA little-endian: must swap the word order.
-       .ifnc   xh, ah
-       mov     ip, al
-       mov     xh, ah
-       mov     xl, ip
-       .endif
-
-       movs    ip, xh, lsr #22
-       beq     LSYM(Lad_p)
-
-       @ The value is too big.  Scale it down a bit...
-       mov     r2, #3
-       movs    ip, ip, lsr #3
-       do_it   ne
-       addne   r2, r2, #3
-       movs    ip, ip, lsr #3
-       do_it   ne
-       addne   r2, r2, #3
-       add     r2, r2, ip, lsr #3
-
-       rsb     r3, r2, #32
-       shift1  lsl, ip, xl, r3
-       shift1  lsr, xl, xl, r2
-       shiftop orr xl xl xh lsl r3 lr
-       shift1  lsr, xh, xh, r2
-       add     r4, r4, r2
-       b       LSYM(Lad_p)
-
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-
-       @ Legacy code expects the result to be returned in f0.  Copy it
-       @ there as well.
-LSYM(f0_ret):
-       do_push {r0, r1}
-       ldfd    f0, [sp], #8
-       RETLDM
-
-#endif
-
-       FUNC_END floatdidf
-       FUNC_END aeabi_l2d
-       FUNC_END floatundidf
-       FUNC_END aeabi_ul2d
-
-#endif /* L_addsubdf3 */
-
-#ifdef L_arm_muldivdf3
-
-ARM_FUNC_START muldf3
-ARM_FUNC_ALIAS aeabi_dmul muldf3
-       do_push {r4, r5, r6, lr}
-
-       @ Mask out exponents, trap any zero/denormal/INF/NAN.
-       mov     ip, #0xff
-       orr     ip, ip, #0x700
-       ands    r4, ip, xh, lsr #20
-       do_it   ne, tte
-       COND(and,s,ne)  r5, ip, yh, lsr #20
-       teqne   r4, ip
-       teqne   r5, ip
-       bleq    LSYM(Lml_s)
-
-       @ Add exponents together
-       add     r4, r4, r5
-
-       @ Determine final sign.
-       eor     r6, xh, yh
-
-       @ Convert mantissa to unsigned integer.
-       @ If power of two, branch to a separate path.
-       bic     xh, xh, ip, lsl #21
-       bic     yh, yh, ip, lsl #21
-       orrs    r5, xl, xh, lsl #12
-       do_it   ne
-       COND(orr,s,ne)  r5, yl, yh, lsl #12
-       orr     xh, xh, #0x00100000
-       orr     yh, yh, #0x00100000
-       beq     LSYM(Lml_1)
-
-#if __ARM_ARCH__ < 4
-
-       @ Put sign bit in r6, which will be restored in yl later.
-       and   r6, r6, #0x80000000
-
-       @ Well, no way to make it shorter without the umull instruction.
-       stmfd   sp!, {r6, r7, r8, r9, sl, fp}
-       mov     r7, xl, lsr #16
-       mov     r8, yl, lsr #16
-       mov     r9, xh, lsr #16
-       mov     sl, yh, lsr #16
-       bic     xl, xl, r7, lsl #16
-       bic     yl, yl, r8, lsl #16
-       bic     xh, xh, r9, lsl #16
-       bic     yh, yh, sl, lsl #16
-       mul     ip, xl, yl
-       mul     fp, xl, r8
-       mov     lr, #0
-       adds    ip, ip, fp, lsl #16
-       adc     lr, lr, fp, lsr #16
-       mul     fp, r7, yl
-       adds    ip, ip, fp, lsl #16
-       adc     lr, lr, fp, lsr #16
-       mul     fp, xl, sl
-       mov     r5, #0
-       adds    lr, lr, fp, lsl #16
-       adc     r5, r5, fp, lsr #16
-       mul     fp, r7, yh
-       adds    lr, lr, fp, lsl #16
-       adc     r5, r5, fp, lsr #16
-       mul     fp, xh, r8
-       adds    lr, lr, fp, lsl #16
-       adc     r5, r5, fp, lsr #16
-       mul     fp, r9, yl
-       adds    lr, lr, fp, lsl #16
-       adc     r5, r5, fp, lsr #16
-       mul     fp, xh, sl
-       mul     r6, r9, sl
-       adds    r5, r5, fp, lsl #16
-       adc     r6, r6, fp, lsr #16
-       mul     fp, r9, yh
-       adds    r5, r5, fp, lsl #16
-       adc     r6, r6, fp, lsr #16
-       mul     fp, xl, yh
-       adds    lr, lr, fp
-       mul     fp, r7, sl
-       adcs    r5, r5, fp
-       mul     fp, xh, yl
-       adc     r6, r6, #0
-       adds    lr, lr, fp
-       mul     fp, r9, r8
-       adcs    r5, r5, fp
-       mul     fp, r7, r8
-       adc     r6, r6, #0
-       adds    lr, lr, fp
-       mul     fp, xh, yh
-       adcs    r5, r5, fp
-       adc     r6, r6, #0
-       ldmfd   sp!, {yl, r7, r8, r9, sl, fp}
-
-#else
-
-       @ Here is the actual multiplication.
-       umull   ip, lr, xl, yl
-       mov     r5, #0
-       umlal   lr, r5, xh, yl
-       and     yl, r6, #0x80000000
-       umlal   lr, r5, xl, yh
-       mov     r6, #0
-       umlal   r5, r6, xh, yh
-
-#endif
-
-       @ The LSBs in ip are only significant for the final rounding.
-       @ Fold them into lr.
-       teq     ip, #0
-       do_it   ne
-       orrne   lr, lr, #1
-
-       @ Adjust result upon the MSB position.
-       sub     r4, r4, #0xff
-       cmp     r6, #(1 << (20-11))
-       sbc     r4, r4, #0x300
-       bcs     1f
-       movs    lr, lr, lsl #1
-       adcs    r5, r5, r5
-       adc     r6, r6, r6
-1:
-       @ Shift to final position, add sign to result.
-       orr     xh, yl, r6, lsl #11
-       orr     xh, xh, r5, lsr #21
-       mov     xl, r5, lsl #11
-       orr     xl, xl, lr, lsr #21
-       mov     lr, lr, lsl #11
-
-       @ Check exponent range for under/overflow.
-       subs    ip, r4, #(254 - 1)
-       do_it   hi
-       cmphi   ip, #0x700
-       bhi     LSYM(Lml_u)
-
-       @ Round the result, merge final exponent.
-       cmp     lr, #0x80000000
-       do_it   eq
-       COND(mov,s,eq)  lr, xl, lsr #1
-       adcs    xl, xl, #0
-       adc     xh, xh, r4, lsl #20
-       RETLDM  "r4, r5, r6"
-
-       @ Multiplication by 0x1p*: let''s shortcut a lot of code.
-LSYM(Lml_1):
-       and     r6, r6, #0x80000000
-       orr     xh, r6, xh
-       orr     xl, xl, yl
-       eor     xh, xh, yh
-       subs    r4, r4, ip, lsr #1
-       do_it   gt, tt
-       COND(rsb,s,gt)  r5, r4, ip
-       orrgt   xh, xh, r4, lsl #20
-       RETLDM  "r4, r5, r6" gt
-
-       @ Under/overflow: fix things up for the code below.
-       orr     xh, xh, #0x00100000
-       mov     lr, #0
-       subs    r4, r4, #1
-
-LSYM(Lml_u):
-       @ Overflow?
-       bgt     LSYM(Lml_o)
-
-       @ Check if denormalized result is possible, otherwise return signed 0.
-       cmn     r4, #(53 + 1)
-       do_it   le, tt
-       movle   xl, #0
-       bicle   xh, xh, #0x7fffffff
-       RETLDM  "r4, r5, r6" le
-
-       @ Find out proper shift value.
-       rsb     r4, r4, #0
-       subs    r4, r4, #32
-       bge     2f
-       adds    r4, r4, #12
-       bgt     1f
-
-       @ shift result right of 1 to 20 bits, preserve sign bit, round, etc.
-       add     r4, r4, #20
-       rsb     r5, r4, #32
-       shift1  lsl, r3, xl, r5
-       shift1  lsr, xl, xl, r4
-       shiftop orr xl xl xh lsl r5 r2
-       and     r2, xh, #0x80000000
-       bic     xh, xh, #0x80000000
-       adds    xl, xl, r3, lsr #31
-       shiftop adc xh r2 xh lsr r4 r6
-       orrs    lr, lr, r3, lsl #1
-       do_it   eq
-       biceq   xl, xl, r3, lsr #31
-       RETLDM  "r4, r5, r6"
-
-       @ shift result right of 21 to 31 bits, or left 11 to 1 bits after
-       @ a register switch from xh to xl. Then round.
-1:     rsb     r4, r4, #12
-       rsb     r5, r4, #32
-       shift1  lsl, r3, xl, r4
-       shift1  lsr, xl, xl, r5
-       shiftop orr xl xl xh lsl r4 r2
-       bic     xh, xh, #0x7fffffff
-       adds    xl, xl, r3, lsr #31
-       adc     xh, xh, #0
-       orrs    lr, lr, r3, lsl #1
-       do_it   eq
-       biceq   xl, xl, r3, lsr #31
-       RETLDM  "r4, r5, r6"
-
-       @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
-       @ from xh to xl.  Leftover bits are in r3-r6-lr for rounding.
-2:     rsb     r5, r4, #32
-       shiftop orr lr lr xl lsl r5 r2
-       shift1  lsr, r3, xl, r4
-       shiftop orr r3 r3 xh lsl r5 r2
-       shift1  lsr, xl, xh, r4
-       bic     xh, xh, #0x7fffffff
-       shiftop bic xl xl xh lsr r4 r2
-       add     xl, xl, r3, lsr #31
-       orrs    lr, lr, r3, lsl #1
-       do_it   eq
-       biceq   xl, xl, r3, lsr #31
-       RETLDM  "r4, r5, r6"
-
-       @ One or both arguments are denormalized.
-       @ Scale them leftwards and preserve sign bit.
-LSYM(Lml_d):
-       teq     r4, #0
-       bne     2f
-       and     r6, xh, #0x80000000
-1:     movs    xl, xl, lsl #1
-       adc     xh, xh, xh
-       tst     xh, #0x00100000
-       do_it   eq
-       subeq   r4, r4, #1
-       beq     1b
-       orr     xh, xh, r6
-       teq     r5, #0
-       do_it   ne
-       RETc(ne)
-2:     and     r6, yh, #0x80000000
-3:     movs    yl, yl, lsl #1
-       adc     yh, yh, yh
-       tst     yh, #0x00100000
-       do_it   eq
-       subeq   r5, r5, #1
-       beq     3b
-       orr     yh, yh, r6
-       RET
-
-LSYM(Lml_s):
-       @ Isolate the INF and NAN cases away
-       teq     r4, ip
-       and     r5, ip, yh, lsr #20
-       do_it   ne
-       teqne   r5, ip
-       beq     1f
-
-       @ Here, one or more arguments are either denormalized or zero.
-       orrs    r6, xl, xh, lsl #1
-       do_it   ne
-       COND(orr,s,ne)  r6, yl, yh, lsl #1
-       bne     LSYM(Lml_d)
-
-       @ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
-       eor     xh, xh, yh
-       and     xh, xh, #0x80000000
-       mov     xl, #0
-       RETLDM  "r4, r5, r6"
-
-1:     @ One or both args are INF or NAN.
-       orrs    r6, xl, xh, lsl #1
-       do_it   eq, te
-       moveq   xl, yl
-       moveq   xh, yh
-       COND(orr,s,ne)  r6, yl, yh, lsl #1
-       beq     LSYM(Lml_n)             @ 0 * INF or INF * 0 -> NAN
-       teq     r4, ip
-       bne     1f
-       orrs    r6, xl, xh, lsl #12
-       bne     LSYM(Lml_n)             @ NAN * <anything> -> NAN
-1:     teq     r5, ip
-       bne     LSYM(Lml_i)
-       orrs    r6, yl, yh, lsl #12
-       do_it   ne, t
-       movne   xl, yl
-       movne   xh, yh
-       bne     LSYM(Lml_n)             @ <anything> * NAN -> NAN
-
-       @ Result is INF, but we need to determine its sign.
-LSYM(Lml_i):
-       eor     xh, xh, yh
-
-       @ Overflow: return INF (sign already in xh).
-LSYM(Lml_o):
-       and     xh, xh, #0x80000000
-       orr     xh, xh, #0x7f000000
-       orr     xh, xh, #0x00f00000
-       mov     xl, #0
-       RETLDM  "r4, r5, r6"
-
-       @ Return a quiet NAN.
-LSYM(Lml_n):
-       orr     xh, xh, #0x7f000000
-       orr     xh, xh, #0x00f80000
-       RETLDM  "r4, r5, r6"
-
-       FUNC_END aeabi_dmul
-       FUNC_END muldf3
-
-ARM_FUNC_START divdf3
-ARM_FUNC_ALIAS aeabi_ddiv divdf3
-       
-       do_push {r4, r5, r6, lr}
-
-       @ Mask out exponents, trap any zero/denormal/INF/NAN.
-       mov     ip, #0xff
-       orr     ip, ip, #0x700
-       ands    r4, ip, xh, lsr #20
-       do_it   ne, tte
-       COND(and,s,ne)  r5, ip, yh, lsr #20
-       teqne   r4, ip
-       teqne   r5, ip
-       bleq    LSYM(Ldv_s)
-
-       @ Substract divisor exponent from dividend''s.
-       sub     r4, r4, r5
-
-       @ Preserve final sign into lr.
-       eor     lr, xh, yh
-
-       @ Convert mantissa to unsigned integer.
-       @ Dividend -> r5-r6, divisor -> yh-yl.
-       orrs    r5, yl, yh, lsl #12
-       mov     xh, xh, lsl #12
-       beq     LSYM(Ldv_1)
-       mov     yh, yh, lsl #12
-       mov     r5, #0x10000000
-       orr     yh, r5, yh, lsr #4
-       orr     yh, yh, yl, lsr #24
-       mov     yl, yl, lsl #8
-       orr     r5, r5, xh, lsr #4
-       orr     r5, r5, xl, lsr #24
-       mov     r6, xl, lsl #8
-
-       @ Initialize xh with final sign bit.
-       and     xh, lr, #0x80000000
-
-       @ Ensure result will land to known bit position.
-       @ Apply exponent bias accordingly.
-       cmp     r5, yh
-       do_it   eq
-       cmpeq   r6, yl
-       adc     r4, r4, #(255 - 2)
-       add     r4, r4, #0x300
-       bcs     1f
-       movs    yh, yh, lsr #1
-       mov     yl, yl, rrx
-1:
-       @ Perform first substraction to align result to a nibble.
-       subs    r6, r6, yl
-       sbc     r5, r5, yh
-       movs    yh, yh, lsr #1
-       mov     yl, yl, rrx
-       mov     xl, #0x00100000
-       mov     ip, #0x00080000
-
-       @ The actual division loop.
-1:     subs    lr, r6, yl
-       sbcs    lr, r5, yh
-       do_it   cs, tt
-       subcs   r6, r6, yl
-       movcs   r5, lr
-       orrcs   xl, xl, ip
-       movs    yh, yh, lsr #1
-       mov     yl, yl, rrx
-       subs    lr, r6, yl
-       sbcs    lr, r5, yh
-       do_it   cs, tt
-       subcs   r6, r6, yl
-       movcs   r5, lr
-       orrcs   xl, xl, ip, lsr #1
-       movs    yh, yh, lsr #1
-       mov     yl, yl, rrx
-       subs    lr, r6, yl
-       sbcs    lr, r5, yh
-       do_it   cs, tt
-       subcs   r6, r6, yl
-       movcs   r5, lr
-       orrcs   xl, xl, ip, lsr #2
-       movs    yh, yh, lsr #1
-       mov     yl, yl, rrx
-       subs    lr, r6, yl
-       sbcs    lr, r5, yh
-       do_it   cs, tt
-       subcs   r6, r6, yl
-       movcs   r5, lr
-       orrcs   xl, xl, ip, lsr #3
-
-       orrs    lr, r5, r6
-       beq     2f
-       mov     r5, r5, lsl #4
-       orr     r5, r5, r6, lsr #28
-       mov     r6, r6, lsl #4
-       mov     yh, yh, lsl #3
-       orr     yh, yh, yl, lsr #29
-       mov     yl, yl, lsl #3
-       movs    ip, ip, lsr #4
-       bne     1b
-
-       @ We are done with a word of the result.
-       @ Loop again for the low word if this pass was for the high word.
-       tst     xh, #0x00100000
-       bne     3f
-       orr     xh, xh, xl
-       mov     xl, #0
-       mov     ip, #0x80000000
-       b       1b
-2:
-       @ Be sure result starts in the high word.
-       tst     xh, #0x00100000
-       do_it   eq, t
-       orreq   xh, xh, xl
-       moveq   xl, #0
-3:
-       @ Check exponent range for under/overflow.
-       subs    ip, r4, #(254 - 1)
-       do_it   hi
-       cmphi   ip, #0x700
-       bhi     LSYM(Lml_u)
-
-       @ Round the result, merge final exponent.
-       subs    ip, r5, yh
-       do_it   eq, t
-       COND(sub,s,eq)  ip, r6, yl
-       COND(mov,s,eq)  ip, xl, lsr #1
-       adcs    xl, xl, #0
-       adc     xh, xh, r4, lsl #20
-       RETLDM  "r4, r5, r6"
-
-       @ Division by 0x1p*: shortcut a lot of code.
-LSYM(Ldv_1):
-       and     lr, lr, #0x80000000
-       orr     xh, lr, xh, lsr #12
-       adds    r4, r4, ip, lsr #1
-       do_it   gt, tt
-       COND(rsb,s,gt)  r5, r4, ip
-       orrgt   xh, xh, r4, lsl #20
-       RETLDM  "r4, r5, r6" gt
-
-       orr     xh, xh, #0x00100000
-       mov     lr, #0
-       subs    r4, r4, #1
-       b       LSYM(Lml_u)
-
-       @ Result mightt need to be denormalized: put remainder bits
-       @ in lr for rounding considerations.
-LSYM(Ldv_u):
-       orr     lr, r5, r6
-       b       LSYM(Lml_u)
-
-       @ One or both arguments is either INF, NAN or zero.
-LSYM(Ldv_s):
-       and     r5, ip, yh, lsr #20
-       teq     r4, ip
-       do_it   eq
-       teqeq   r5, ip
-       beq     LSYM(Lml_n)             @ INF/NAN / INF/NAN -> NAN
-       teq     r4, ip
-       bne     1f
-       orrs    r4, xl, xh, lsl #12
-       bne     LSYM(Lml_n)             @ NAN / <anything> -> NAN
-       teq     r5, ip
-       bne     LSYM(Lml_i)             @ INF / <anything> -> INF
-       mov     xl, yl
-       mov     xh, yh
-       b       LSYM(Lml_n)             @ INF / (INF or NAN) -> NAN
-1:     teq     r5, ip
-       bne     2f
-       orrs    r5, yl, yh, lsl #12
-       beq     LSYM(Lml_z)             @ <anything> / INF -> 0
-       mov     xl, yl
-       mov     xh, yh
-       b       LSYM(Lml_n)             @ <anything> / NAN -> NAN
-2:     @ If both are nonzero, we need to normalize and resume above.
-       orrs    r6, xl, xh, lsl #1
-       do_it   ne
-       COND(orr,s,ne)  r6, yl, yh, lsl #1
-       bne     LSYM(Lml_d)
-       @ One or both arguments are 0.
-       orrs    r4, xl, xh, lsl #1
-       bne     LSYM(Lml_i)             @ <non_zero> / 0 -> INF
-       orrs    r5, yl, yh, lsl #1
-       bne     LSYM(Lml_z)             @ 0 / <non_zero> -> 0
-       b       LSYM(Lml_n)             @ 0 / 0 -> NAN
-
-       FUNC_END aeabi_ddiv
-       FUNC_END divdf3
-
-#endif /* L_muldivdf3 */
-
-#ifdef L_arm_cmpdf2
-
-@ Note: only r0 (return value) and ip are clobbered here.
-
-ARM_FUNC_START gtdf2
-ARM_FUNC_ALIAS gedf2 gtdf2
-       mov     ip, #-1
-       b       1f
-
-ARM_FUNC_START ltdf2
-ARM_FUNC_ALIAS ledf2 ltdf2
-       mov     ip, #1
-       b       1f
-
-ARM_FUNC_START cmpdf2
-ARM_FUNC_ALIAS nedf2 cmpdf2
-ARM_FUNC_ALIAS eqdf2 cmpdf2
-       mov     ip, #1                  @ how should we specify unordered here?
-
-1:     str     ip, [sp, #-4]!
-
-       @ Trap any INF/NAN first.
-       mov     ip, xh, lsl #1
-       mvns    ip, ip, asr #21
-       mov     ip, yh, lsl #1
-       do_it   ne
-       COND(mvn,s,ne)  ip, ip, asr #21
-       beq     3f
-
-       @ Test for equality.
-       @ Note that 0.0 is equal to -0.0.
-2:     add     sp, sp, #4
-       orrs    ip, xl, xh, lsl #1      @ if x == 0.0 or -0.0
-       do_it   eq, e
-       COND(orr,s,eq)  ip, yl, yh, lsl #1      @ and y == 0.0 or -0.0
-       teqne   xh, yh                  @ or xh == yh
-       do_it   eq, tt
-       teqeq   xl, yl                  @ and xl == yl
-       moveq   r0, #0                  @ then equal.
-       RETc(eq)
-
-       @ Clear C flag
-       cmn     r0, #0
-
-       @ Compare sign, 
-       teq     xh, yh
-
-       @ Compare values if same sign
-       do_it   pl
-       cmppl   xh, yh
-       do_it   eq
-       cmpeq   xl, yl
-
-       @ Result:
-       do_it   cs, e
-       movcs   r0, yh, asr #31
-       mvncc   r0, yh, asr #31
-       orr     r0, r0, #1
-       RET
-
-       @ Look for a NAN.
-3:     mov     ip, xh, lsl #1
-       mvns    ip, ip, asr #21
-       bne     4f
-       orrs    ip, xl, xh, lsl #12
-       bne     5f                      @ x is NAN
-4:     mov     ip, yh, lsl #1
-       mvns    ip, ip, asr #21
-       bne     2b
-       orrs    ip, yl, yh, lsl #12
-       beq     2b                      @ y is not NAN
-5:     ldr     r0, [sp], #4            @ unordered return code
-       RET
-
-       FUNC_END gedf2
-       FUNC_END gtdf2
-       FUNC_END ledf2
-       FUNC_END ltdf2
-       FUNC_END nedf2
-       FUNC_END eqdf2
-       FUNC_END cmpdf2
-
-ARM_FUNC_START aeabi_cdrcmple
-
-       mov     ip, r0
-       mov     r0, r2
-       mov     r2, ip
-       mov     ip, r1
-       mov     r1, r3
-       mov     r3, ip
-       b       6f
-       
-ARM_FUNC_START aeabi_cdcmpeq
-ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
-
-       @ The status-returning routines are required to preserve all
-       @ registers except ip, lr, and cpsr.
-6:     do_push {r0, lr}
-       ARM_CALL cmpdf2
-       @ Set the Z flag correctly, and the C flag unconditionally.
-       cmp     r0, #0
-       @ Clear the C flag if the return value was -1, indicating
-       @ that the first operand was smaller than the second.
-       do_it   mi
-       cmnmi   r0, #0
-       RETLDM  "r0"
-
-       FUNC_END aeabi_cdcmple
-       FUNC_END aeabi_cdcmpeq
-       FUNC_END aeabi_cdrcmple
-       
-ARM_FUNC_START aeabi_dcmpeq
-
-       str     lr, [sp, #-8]!
-       ARM_CALL aeabi_cdcmple
-       do_it   eq, e
-       moveq   r0, #1  @ Equal to.
-       movne   r0, #0  @ Less than, greater than, or unordered.
-       RETLDM
-
-       FUNC_END aeabi_dcmpeq
-
-ARM_FUNC_START aeabi_dcmplt
-
-       str     lr, [sp, #-8]!
-       ARM_CALL aeabi_cdcmple
-       do_it   cc, e
-       movcc   r0, #1  @ Less than.
-       movcs   r0, #0  @ Equal to, greater than, or unordered.
-       RETLDM
-
-       FUNC_END aeabi_dcmplt
-
-ARM_FUNC_START aeabi_dcmple
-
-       str     lr, [sp, #-8]!
-       ARM_CALL aeabi_cdcmple
-       do_it   ls, e
-       movls   r0, #1  @ Less than or equal to.
-       movhi   r0, #0  @ Greater than or unordered.
-       RETLDM
-
-       FUNC_END aeabi_dcmple
-
-ARM_FUNC_START aeabi_dcmpge
-
-       str     lr, [sp, #-8]!
-       ARM_CALL aeabi_cdrcmple
-       do_it   ls, e
-       movls   r0, #1  @ Operand 2 is less than or equal to operand 1.
-       movhi   r0, #0  @ Operand 2 greater than operand 1, or unordered.
-       RETLDM
-
-       FUNC_END aeabi_dcmpge
-
-ARM_FUNC_START aeabi_dcmpgt
-
-       str     lr, [sp, #-8]!
-       ARM_CALL aeabi_cdrcmple
-       do_it   cc, e
-       movcc   r0, #1  @ Operand 2 is less than operand 1.
-       movcs   r0, #0  @ Operand 2 is greater than or equal to operand 1,
-                       @ or they are unordered.
-       RETLDM
-
-       FUNC_END aeabi_dcmpgt
-
-#endif /* L_cmpdf2 */
-
-#ifdef L_arm_unorddf2
-
-ARM_FUNC_START unorddf2
-ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
-
-       mov     ip, xh, lsl #1
-       mvns    ip, ip, asr #21
-       bne     1f
-       orrs    ip, xl, xh, lsl #12
-       bne     3f                      @ x is NAN
-1:     mov     ip, yh, lsl #1
-       mvns    ip, ip, asr #21
-       bne     2f
-       orrs    ip, yl, yh, lsl #12
-       bne     3f                      @ y is NAN
-2:     mov     r0, #0                  @ arguments are ordered.
-       RET
-
-3:     mov     r0, #1                  @ arguments are unordered.
-       RET
-
-       FUNC_END aeabi_dcmpun
-       FUNC_END unorddf2
-
-#endif /* L_unorddf2 */
-
-#ifdef L_arm_fixdfsi
-
-ARM_FUNC_START fixdfsi
-ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
-
-       @ check exponent range.
-       mov     r2, xh, lsl #1
-       adds    r2, r2, #(1 << 21)
-       bcs     2f                      @ value is INF or NAN
-       bpl     1f                      @ value is too small
-       mov     r3, #(0xfffffc00 + 31)
-       subs    r2, r3, r2, asr #21
-       bls     3f                      @ value is too large
-
-       @ scale value
-       mov     r3, xh, lsl #11
-       orr     r3, r3, #0x80000000
-       orr     r3, r3, xl, lsr #21
-       tst     xh, #0x80000000         @ the sign bit
-       shift1  lsr, r0, r3, r2
-       do_it   ne
-       rsbne   r0, r0, #0
-       RET
-
-1:     mov     r0, #0
-       RET
-
-2:     orrs    xl, xl, xh, lsl #12
-       bne     4f                      @ x is NAN.
-3:     ands    r0, xh, #0x80000000     @ the sign bit
-       do_it   eq
-       moveq   r0, #0x7fffffff         @ maximum signed positive si
-       RET
-
-4:     mov     r0, #0                  @ How should we convert NAN?
-       RET
-
-       FUNC_END aeabi_d2iz
-       FUNC_END fixdfsi
-
-#endif /* L_fixdfsi */
-
-#ifdef L_arm_fixunsdfsi
-
-ARM_FUNC_START fixunsdfsi
-ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
-
-       @ check exponent range.
-       movs    r2, xh, lsl #1
-       bcs     1f                      @ value is negative
-       adds    r2, r2, #(1 << 21)
-       bcs     2f                      @ value is INF or NAN
-       bpl     1f                      @ value is too small
-       mov     r3, #(0xfffffc00 + 31)
-       subs    r2, r3, r2, asr #21
-       bmi     3f                      @ value is too large
-
-       @ scale value
-       mov     r3, xh, lsl #11
-       orr     r3, r3, #0x80000000
-       orr     r3, r3, xl, lsr #21
-       shift1  lsr, r0, r3, r2
-       RET
-
-1:     mov     r0, #0
-       RET
-
-2:     orrs    xl, xl, xh, lsl #12
-       bne     4f                      @ value is NAN.
-3:     mov     r0, #0xffffffff         @ maximum unsigned si
-       RET
-
-4:     mov     r0, #0                  @ How should we convert NAN?
-       RET
-
-       FUNC_END aeabi_d2uiz
-       FUNC_END fixunsdfsi
-
-#endif /* L_fixunsdfsi */
-
-#ifdef L_arm_truncdfsf2
-
-ARM_FUNC_START truncdfsf2
-ARM_FUNC_ALIAS aeabi_d2f truncdfsf2
-
-       @ check exponent range.
-       mov     r2, xh, lsl #1
-       subs    r3, r2, #((1023 - 127) << 21)
-       do_it   cs, t
-       COND(sub,s,cs)  ip, r3, #(1 << 21)
-       COND(rsb,s,cs)  ip, ip, #(254 << 21)
-       bls     2f                      @ value is out of range
-
-1:     @ shift and round mantissa
-       and     ip, xh, #0x80000000
-       mov     r2, xl, lsl #3
-       orr     xl, ip, xl, lsr #29
-       cmp     r2, #0x80000000
-       adc     r0, xl, r3, lsl #2
-       do_it   eq
-       biceq   r0, r0, #1
-       RET
-
-2:     @ either overflow or underflow
-       tst     xh, #0x40000000
-       bne     3f                      @ overflow
-
-       @ check if denormalized value is possible
-       adds    r2, r3, #(23 << 21)
-       do_it   lt, t
-       andlt   r0, xh, #0x80000000     @ too small, return signed 0.
-       RETc(lt)
-
-       @ denormalize value so we can resume with the code above afterwards.
-       orr     xh, xh, #0x00100000
-       mov     r2, r2, lsr #21
-       rsb     r2, r2, #24
-       rsb     ip, r2, #32
-#if defined(__thumb2__)
-       lsls    r3, xl, ip
-#else
-       movs    r3, xl, lsl ip
-#endif
-       shift1  lsr, xl, xl, r2
-       do_it   ne
-       orrne   xl, xl, #1              @ fold r3 for rounding considerations. 
-       mov     r3, xh, lsl #11
-       mov     r3, r3, lsr #11
-       shiftop orr xl xl r3 lsl ip ip
-       shift1  lsr, r3, r3, r2
-       mov     r3, r3, lsl #1
-       b       1b
-
-3:     @ chech for NAN
-       mvns    r3, r2, asr #21
-       bne     5f                      @ simple overflow
-       orrs    r3, xl, xh, lsl #12
-       do_it   ne, tt
-       movne   r0, #0x7f000000
-       orrne   r0, r0, #0x00c00000
-       RETc(ne)                        @ return NAN
-
-5:     @ return INF with sign
-       and     r0, xh, #0x80000000
-       orr     r0, r0, #0x7f000000
-       orr     r0, r0, #0x00800000
-       RET
-
-       FUNC_END aeabi_d2f
-       FUNC_END truncdfsf2
-
-#endif /* L_truncdfsf2 */
diff --git a/gcc/config/arm/ieee754-sf.S b/gcc/config/arm/ieee754-sf.S

deleted file mode 100644 (file)

index c93f66d..0000000
--- a/gcc/config/arm/ieee754-sf.S
+++ /dev/null
@@ -1,1060 +0,0 @@
-/* ieee754-sf.S single-precision floating point support for ARM
-
-   Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009  Free Software Foundation, Inc.
-   Contributed by Nicolas Pitre (nico@cam.org)
-
-   This file is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by the
-   Free Software Foundation; either version 3, or (at your option) any
-   later version.
-
-   This file is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/*
- * Notes:
- *
- * The goal of this code is to be as fast as possible.  This is
- * not meant to be easy to understand for the casual reader.
- *
- * Only the default rounding mode is intended for best performances.
- * Exceptions aren't supported yet, but that can be added quite easily
- * if necessary without impacting performances.
- */
-
-#ifdef L_arm_negsf2
-       
-ARM_FUNC_START negsf2
-ARM_FUNC_ALIAS aeabi_fneg negsf2
-
-       eor     r0, r0, #0x80000000     @ flip sign bit
-       RET
-
-       FUNC_END aeabi_fneg
-       FUNC_END negsf2
-
-#endif
-
-#ifdef L_arm_addsubsf3
-
-ARM_FUNC_START aeabi_frsub
-
-       eor     r0, r0, #0x80000000     @ flip sign bit of first arg
-       b       1f
-
-ARM_FUNC_START subsf3
-ARM_FUNC_ALIAS aeabi_fsub subsf3
-
-       eor     r1, r1, #0x80000000     @ flip sign bit of second arg
-#if defined(__INTERWORKING_STUBS__)
-       b       1f                      @ Skip Thumb-code prologue
-#endif
-
-ARM_FUNC_START addsf3
-ARM_FUNC_ALIAS aeabi_fadd addsf3
-
-1:     @ Look for zeroes, equal values, INF, or NAN.
-       movs    r2, r0, lsl #1
-       do_it   ne, ttt
-       COND(mov,s,ne)  r3, r1, lsl #1
-       teqne   r2, r3
-       COND(mvn,s,ne)  ip, r2, asr #24
-       COND(mvn,s,ne)  ip, r3, asr #24
-       beq     LSYM(Lad_s)
-
-       @ Compute exponent difference.  Make largest exponent in r2,
-       @ corresponding arg in r0, and positive exponent difference in r3.
-       mov     r2, r2, lsr #24
-       rsbs    r3, r2, r3, lsr #24
-       do_it   gt, ttt
-       addgt   r2, r2, r3
-       eorgt   r1, r0, r1
-       eorgt   r0, r1, r0
-       eorgt   r1, r0, r1
-       do_it   lt
-       rsblt   r3, r3, #0
-
-       @ If exponent difference is too large, return largest argument
-       @ already in r0.  We need up to 25 bit to handle proper rounding
-       @ of 0x1p25 - 1.1.
-       cmp     r3, #25
-       do_it   hi
-       RETc(hi)
-
-       @ Convert mantissa to signed integer.
-       tst     r0, #0x80000000
-       orr     r0, r0, #0x00800000
-       bic     r0, r0, #0xff000000
-       do_it   ne
-       rsbne   r0, r0, #0
-       tst     r1, #0x80000000
-       orr     r1, r1, #0x00800000
-       bic     r1, r1, #0xff000000
-       do_it   ne
-       rsbne   r1, r1, #0
-
-       @ If exponent == difference, one or both args were denormalized.
-       @ Since this is not common case, rescale them off line.
-       teq     r2, r3
-       beq     LSYM(Lad_d)
-LSYM(Lad_x):
-
-       @ Compensate for the exponent overlapping the mantissa MSB added later
-       sub     r2, r2, #1
-
-       @ Shift and add second arg to first arg in r0.
-       @ Keep leftover bits into r1.
-       shiftop adds r0 r0 r1 asr r3 ip
-       rsb     r3, r3, #32
-       shift1  lsl, r1, r1, r3
-
-       @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above)
-       and     r3, r0, #0x80000000
-       bpl     LSYM(Lad_p)
-#if defined(__thumb2__)
-       negs    r1, r1
-       sbc     r0, r0, r0, lsl #1
-#else
-       rsbs    r1, r1, #0
-       rsc     r0, r0, #0
-#endif
-
-       @ Determine how to normalize the result.
-LSYM(Lad_p):
-       cmp     r0, #0x00800000
-       bcc     LSYM(Lad_a)
-       cmp     r0, #0x01000000
-       bcc     LSYM(Lad_e)
-
-       @ Result needs to be shifted right.
-       movs    r0, r0, lsr #1
-       mov     r1, r1, rrx
-       add     r2, r2, #1
-
-       @ Make sure we did not bust our exponent.
-       cmp     r2, #254
-       bhs     LSYM(Lad_o)
-
-       @ Our result is now properly aligned into r0, remaining bits in r1.
-       @ Pack final result together.
-       @ Round with MSB of r1. If halfway between two numbers, round towards
-       @ LSB of r0 = 0. 
-LSYM(Lad_e):
-       cmp     r1, #0x80000000
-       adc     r0, r0, r2, lsl #23
-       do_it   eq
-       biceq   r0, r0, #1
-       orr     r0, r0, r3
-       RET
-
-       @ Result must be shifted left and exponent adjusted.
-LSYM(Lad_a):
-       movs    r1, r1, lsl #1
-       adc     r0, r0, r0
-       tst     r0, #0x00800000
-       sub     r2, r2, #1
-       bne     LSYM(Lad_e)
-       
-       @ No rounding necessary since r1 will always be 0 at this point.
-LSYM(Lad_l):
-
-#if __ARM_ARCH__ < 5
-
-       movs    ip, r0, lsr #12
-       moveq   r0, r0, lsl #12
-       subeq   r2, r2, #12
-       tst     r0, #0x00ff0000
-       moveq   r0, r0, lsl #8
-       subeq   r2, r2, #8
-       tst     r0, #0x00f00000
-       moveq   r0, r0, lsl #4
-       subeq   r2, r2, #4
-       tst     r0, #0x00c00000
-       moveq   r0, r0, lsl #2
-       subeq   r2, r2, #2
-       cmp     r0, #0x00800000
-       movcc   r0, r0, lsl #1
-       sbcs    r2, r2, #0
-
-#else
-
-       clz     ip, r0
-       sub     ip, ip, #8
-       subs    r2, r2, ip
-       shift1  lsl, r0, r0, ip
-
-#endif
-
-       @ Final result with sign
-       @ If exponent negative, denormalize result.
-       do_it   ge, et
-       addge   r0, r0, r2, lsl #23
-       rsblt   r2, r2, #0
-       orrge   r0, r0, r3
-#if defined(__thumb2__)
-       do_it   lt, t
-       lsrlt   r0, r0, r2
-       orrlt   r0, r3, r0
-#else
-       orrlt   r0, r3, r0, lsr r2
-#endif
-       RET
-
-       @ Fixup and adjust bit position for denormalized arguments.
-       @ Note that r2 must not remain equal to 0.
-LSYM(Lad_d):
-       teq     r2, #0
-       eor     r1, r1, #0x00800000
-       do_it   eq, te
-       eoreq   r0, r0, #0x00800000
-       addeq   r2, r2, #1
-       subne   r3, r3, #1
-       b       LSYM(Lad_x)
-
-LSYM(Lad_s):
-       mov     r3, r1, lsl #1
-
-       mvns    ip, r2, asr #24
-       do_it   ne
-       COND(mvn,s,ne)  ip, r3, asr #24
-       beq     LSYM(Lad_i)
-
-       teq     r2, r3
-       beq     1f
-
-       @ Result is x + 0.0 = x or 0.0 + y = y.
-       teq     r2, #0
-       do_it   eq
-       moveq   r0, r1
-       RET
-
-1:     teq     r0, r1
-
-       @ Result is x - x = 0.
-       do_it   ne, t
-       movne   r0, #0
-       RETc(ne)
-
-       @ Result is x + x = 2x.
-       tst     r2, #0xff000000
-       bne     2f
-       movs    r0, r0, lsl #1
-       do_it   cs
-       orrcs   r0, r0, #0x80000000
-       RET
-2:     adds    r2, r2, #(2 << 24)
-       do_it   cc, t
-       addcc   r0, r0, #(1 << 23)
-       RETc(cc)
-       and     r3, r0, #0x80000000
-
-       @ Overflow: return INF.
-LSYM(Lad_o):
-       orr     r0, r3, #0x7f000000
-       orr     r0, r0, #0x00800000
-       RET
-
-       @ At least one of r0/r1 is INF/NAN.
-       @   if r0 != INF/NAN: return r1 (which is INF/NAN)
-       @   if r1 != INF/NAN: return r0 (which is INF/NAN)
-       @   if r0 or r1 is NAN: return NAN
-       @   if opposite sign: return NAN
-       @   otherwise return r0 (which is INF or -INF)
-LSYM(Lad_i):
-       mvns    r2, r2, asr #24
-       do_it   ne, et
-       movne   r0, r1
-       COND(mvn,s,eq)  r3, r3, asr #24
-       movne   r1, r0
-       movs    r2, r0, lsl #9
-       do_it   eq, te
-       COND(mov,s,eq)  r3, r1, lsl #9
-       teqeq   r0, r1
-       orrne   r0, r0, #0x00400000     @ quiet NAN
-       RET
-
-       FUNC_END aeabi_frsub
-       FUNC_END aeabi_fadd
-       FUNC_END addsf3
-       FUNC_END aeabi_fsub
-       FUNC_END subsf3
-
-ARM_FUNC_START floatunsisf
-ARM_FUNC_ALIAS aeabi_ui2f floatunsisf
-               
-       mov     r3, #0
-       b       1f
-
-ARM_FUNC_START floatsisf
-ARM_FUNC_ALIAS aeabi_i2f floatsisf
-       
-       ands    r3, r0, #0x80000000
-       do_it   mi
-       rsbmi   r0, r0, #0
-
-1:     movs    ip, r0
-       do_it   eq
-       RETc(eq)
-
-       @ Add initial exponent to sign
-       orr     r3, r3, #((127 + 23) << 23)
-
-       .ifnc   ah, r0
-       mov     ah, r0
-       .endif
-       mov     al, #0
-       b       2f
-
-       FUNC_END aeabi_i2f
-       FUNC_END floatsisf
-       FUNC_END aeabi_ui2f
-       FUNC_END floatunsisf
-
-ARM_FUNC_START floatundisf
-ARM_FUNC_ALIAS aeabi_ul2f floatundisf
-
-       orrs    r2, r0, r1
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-       do_it   eq, t
-       mvfeqs  f0, #0.0
-#else
-       do_it   eq
-#endif
-       RETc(eq)
-
-       mov     r3, #0
-       b       1f
-
-ARM_FUNC_START floatdisf
-ARM_FUNC_ALIAS aeabi_l2f floatdisf
-
-       orrs    r2, r0, r1
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-       do_it   eq, t
-       mvfeqs  f0, #0.0
-#else
-       do_it   eq
-#endif
-       RETc(eq)
-
-       ands    r3, ah, #0x80000000     @ sign bit in r3
-       bpl     1f
-#if defined(__thumb2__)
-       negs    al, al
-       sbc     ah, ah, ah, lsl #1
-#else
-       rsbs    al, al, #0
-       rsc     ah, ah, #0
-#endif
-1:
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-       @ For hard FPA code we want to return via the tail below so that
-       @ we can return the result in f0 as well as in r0 for backwards
-       @ compatibility.
-       str     lr, [sp, #-8]!
-       adr     lr, LSYM(f0_ret)
-#endif
-
-       movs    ip, ah
-       do_it   eq, tt
-       moveq   ip, al
-       moveq   ah, al
-       moveq   al, #0
-
-       @ Add initial exponent to sign
-       orr     r3, r3, #((127 + 23 + 32) << 23)
-       do_it   eq
-       subeq   r3, r3, #(32 << 23)
-2:     sub     r3, r3, #(1 << 23)
-
-#if __ARM_ARCH__ < 5
-
-       mov     r2, #23
-       cmp     ip, #(1 << 16)
-       do_it   hs, t
-       movhs   ip, ip, lsr #16
-       subhs   r2, r2, #16
-       cmp     ip, #(1 << 8)
-       do_it   hs, t
-       movhs   ip, ip, lsr #8
-       subhs   r2, r2, #8
-       cmp     ip, #(1 << 4)
-       do_it   hs, t
-       movhs   ip, ip, lsr #4
-       subhs   r2, r2, #4
-       cmp     ip, #(1 << 2)
-       do_it   hs, e
-       subhs   r2, r2, #2
-       sublo   r2, r2, ip, lsr #1
-       subs    r2, r2, ip, lsr #3
-
-#else
-
-       clz     r2, ip
-       subs    r2, r2, #8
-
-#endif
-
-       sub     r3, r3, r2, lsl #23
-       blt     3f
-
-       shiftop add r3 r3 ah lsl r2 ip
-       shift1  lsl, ip, al, r2
-       rsb     r2, r2, #32
-       cmp     ip, #0x80000000
-       shiftop adc r0 r3 al lsr r2 r2
-       do_it   eq
-       biceq   r0, r0, #1
-       RET
-
-3:     add     r2, r2, #32
-       shift1  lsl, ip, ah, r2
-       rsb     r2, r2, #32
-       orrs    al, al, ip, lsl #1
-       shiftop adc r0 r3 ah lsr r2 r2
-       do_it   eq
-       biceq   r0, r0, ip, lsr #31
-       RET
-
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-
-LSYM(f0_ret):
-       str     r0, [sp, #-4]!
-       ldfs    f0, [sp], #4
-       RETLDM
-
-#endif
-
-       FUNC_END floatdisf
-       FUNC_END aeabi_l2f
-       FUNC_END floatundisf
-       FUNC_END aeabi_ul2f
-
-#endif /* L_addsubsf3 */
-
-#ifdef L_arm_muldivsf3
-
-ARM_FUNC_START mulsf3
-ARM_FUNC_ALIAS aeabi_fmul mulsf3
-
-       @ Mask out exponents, trap any zero/denormal/INF/NAN.
-       mov     ip, #0xff
-       ands    r2, ip, r0, lsr #23
-       do_it   ne, tt
-       COND(and,s,ne)  r3, ip, r1, lsr #23
-       teqne   r2, ip
-       teqne   r3, ip
-       beq     LSYM(Lml_s)
-LSYM(Lml_x):
-
-       @ Add exponents together
-       add     r2, r2, r3
-
-       @ Determine final sign.
-       eor     ip, r0, r1
-
-       @ Convert mantissa to unsigned integer.
-       @ If power of two, branch to a separate path.
-       @ Make up for final alignment.
-       movs    r0, r0, lsl #9
-       do_it   ne
-       COND(mov,s,ne)  r1, r1, lsl #9
-       beq     LSYM(Lml_1)
-       mov     r3, #0x08000000
-       orr     r0, r3, r0, lsr #5
-       orr     r1, r3, r1, lsr #5
-
-#if __ARM_ARCH__ < 4
-
-       @ Put sign bit in r3, which will be restored into r0 later.
-       and     r3, ip, #0x80000000
-
-       @ Well, no way to make it shorter without the umull instruction.
-       do_push {r3, r4, r5}
-       mov     r4, r0, lsr #16
-       mov     r5, r1, lsr #16
-       bic     r0, r0, r4, lsl #16
-       bic     r1, r1, r5, lsl #16
-       mul     ip, r4, r5
-       mul     r3, r0, r1
-       mul     r0, r5, r0
-       mla     r0, r4, r1, r0
-       adds    r3, r3, r0, lsl #16
-       adc     r1, ip, r0, lsr #16
-       do_pop  {r0, r4, r5}
-
-#else
-
-       @ The actual multiplication.
-       umull   r3, r1, r0, r1
-
-       @ Put final sign in r0.
-       and     r0, ip, #0x80000000
-
-#endif
-
-       @ Adjust result upon the MSB position.
-       cmp     r1, #(1 << 23)
-       do_it   cc, tt
-       movcc   r1, r1, lsl #1
-       orrcc   r1, r1, r3, lsr #31
-       movcc   r3, r3, lsl #1
-
-       @ Add sign to result.
-       orr     r0, r0, r1
-
-       @ Apply exponent bias, check for under/overflow.
-       sbc     r2, r2, #127
-       cmp     r2, #(254 - 1)
-       bhi     LSYM(Lml_u)
-
-       @ Round the result, merge final exponent.
-       cmp     r3, #0x80000000
-       adc     r0, r0, r2, lsl #23
-       do_it   eq
-       biceq   r0, r0, #1
-       RET
-
-       @ Multiplication by 0x1p*: let''s shortcut a lot of code.
-LSYM(Lml_1):
-       teq     r0, #0
-       and     ip, ip, #0x80000000
-       do_it   eq
-       moveq   r1, r1, lsl #9
-       orr     r0, ip, r0, lsr #9
-       orr     r0, r0, r1, lsr #9
-       subs    r2, r2, #127
-       do_it   gt, tt
-       COND(rsb,s,gt)  r3, r2, #255
-       orrgt   r0, r0, r2, lsl #23
-       RETc(gt)
-
-       @ Under/overflow: fix things up for the code below.
-       orr     r0, r0, #0x00800000
-       mov     r3, #0
-       subs    r2, r2, #1
-
-LSYM(Lml_u):
-       @ Overflow?
-       bgt     LSYM(Lml_o)
-
-       @ Check if denormalized result is possible, otherwise return signed 0.
-       cmn     r2, #(24 + 1)
-       do_it   le, t
-       bicle   r0, r0, #0x7fffffff
-       RETc(le)
-
-       @ Shift value right, round, etc.
-       rsb     r2, r2, #0
-       movs    r1, r0, lsl #1
-       shift1  lsr, r1, r1, r2
-       rsb     r2, r2, #32
-       shift1  lsl, ip, r0, r2
-       movs    r0, r1, rrx
-       adc     r0, r0, #0
-       orrs    r3, r3, ip, lsl #1
-       do_it   eq
-       biceq   r0, r0, ip, lsr #31
-       RET
-
-       @ One or both arguments are denormalized.
-       @ Scale them leftwards and preserve sign bit.
-LSYM(Lml_d):
-       teq     r2, #0
-       and     ip, r0, #0x80000000
-1:     do_it   eq, tt
-       moveq   r0, r0, lsl #1
-       tsteq   r0, #0x00800000
-       subeq   r2, r2, #1
-       beq     1b
-       orr     r0, r0, ip
-       teq     r3, #0
-       and     ip, r1, #0x80000000
-2:     do_it   eq, tt
-       moveq   r1, r1, lsl #1
-       tsteq   r1, #0x00800000
-       subeq   r3, r3, #1
-       beq     2b
-       orr     r1, r1, ip
-       b       LSYM(Lml_x)
-
-LSYM(Lml_s):
-       @ Isolate the INF and NAN cases away
-       and     r3, ip, r1, lsr #23
-       teq     r2, ip
-       do_it   ne
-       teqne   r3, ip
-       beq     1f
-
-       @ Here, one or more arguments are either denormalized or zero.
-       bics    ip, r0, #0x80000000
-       do_it   ne
-       COND(bic,s,ne)  ip, r1, #0x80000000
-       bne     LSYM(Lml_d)
-
-       @ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
-       eor     r0, r0, r1
-       bic     r0, r0, #0x7fffffff
-       RET
-
-1:     @ One or both args are INF or NAN.
-       teq     r0, #0x0
-       do_it   ne, ett
-       teqne   r0, #0x80000000
-       moveq   r0, r1
-       teqne   r1, #0x0
-       teqne   r1, #0x80000000
-       beq     LSYM(Lml_n)             @ 0 * INF or INF * 0 -> NAN
-       teq     r2, ip
-       bne     1f
-       movs    r2, r0, lsl #9
-       bne     LSYM(Lml_n)             @ NAN * <anything> -> NAN
-1:     teq     r3, ip
-       bne     LSYM(Lml_i)
-       movs    r3, r1, lsl #9
-       do_it   ne
-       movne   r0, r1
-       bne     LSYM(Lml_n)             @ <anything> * NAN -> NAN
-
-       @ Result is INF, but we need to determine its sign.
-LSYM(Lml_i):
-       eor     r0, r0, r1
-
-       @ Overflow: return INF (sign already in r0).
-LSYM(Lml_o):
-       and     r0, r0, #0x80000000
-       orr     r0, r0, #0x7f000000
-       orr     r0, r0, #0x00800000
-       RET
-
-       @ Return a quiet NAN.
-LSYM(Lml_n):
-       orr     r0, r0, #0x7f000000
-       orr     r0, r0, #0x00c00000
-       RET
-
-       FUNC_END aeabi_fmul
-       FUNC_END mulsf3
-
-ARM_FUNC_START divsf3
-ARM_FUNC_ALIAS aeabi_fdiv divsf3
-
-       @ Mask out exponents, trap any zero/denormal/INF/NAN.
-       mov     ip, #0xff
-       ands    r2, ip, r0, lsr #23
-       do_it   ne, tt
-       COND(and,s,ne)  r3, ip, r1, lsr #23
-       teqne   r2, ip
-       teqne   r3, ip
-       beq     LSYM(Ldv_s)
-LSYM(Ldv_x):
-
-       @ Substract divisor exponent from dividend''s
-       sub     r2, r2, r3
-
-       @ Preserve final sign into ip.
-       eor     ip, r0, r1
-
-       @ Convert mantissa to unsigned integer.
-       @ Dividend -> r3, divisor -> r1.
-       movs    r1, r1, lsl #9
-       mov     r0, r0, lsl #9
-       beq     LSYM(Ldv_1)
-       mov     r3, #0x10000000
-       orr     r1, r3, r1, lsr #4
-       orr     r3, r3, r0, lsr #4
-
-       @ Initialize r0 (result) with final sign bit.
-       and     r0, ip, #0x80000000
-
-       @ Ensure result will land to known bit position.
-       @ Apply exponent bias accordingly.
-       cmp     r3, r1
-       do_it   cc
-       movcc   r3, r3, lsl #1
-       adc     r2, r2, #(127 - 2)
-
-       @ The actual division loop.
-       mov     ip, #0x00800000
-1:     cmp     r3, r1
-       do_it   cs, t
-       subcs   r3, r3, r1
-       orrcs   r0, r0, ip
-       cmp     r3, r1, lsr #1
-       do_it   cs, t
-       subcs   r3, r3, r1, lsr #1
-       orrcs   r0, r0, ip, lsr #1
-       cmp     r3, r1, lsr #2
-       do_it   cs, t
-       subcs   r3, r3, r1, lsr #2
-       orrcs   r0, r0, ip, lsr #2
-       cmp     r3, r1, lsr #3
-       do_it   cs, t
-       subcs   r3, r3, r1, lsr #3
-       orrcs   r0, r0, ip, lsr #3
-       movs    r3, r3, lsl #4
-       do_it   ne
-       COND(mov,s,ne)  ip, ip, lsr #4
-       bne     1b
-
-       @ Check exponent for under/overflow.
-       cmp     r2, #(254 - 1)
-       bhi     LSYM(Lml_u)
-
-       @ Round the result, merge final exponent.
-       cmp     r3, r1
-       adc     r0, r0, r2, lsl #23
-       do_it   eq
-       biceq   r0, r0, #1
-       RET
-
-       @ Division by 0x1p*: let''s shortcut a lot of code.
-LSYM(Ldv_1):
-       and     ip, ip, #0x80000000
-       orr     r0, ip, r0, lsr #9
-       adds    r2, r2, #127
-       do_it   gt, tt
-       COND(rsb,s,gt)  r3, r2, #255
-       orrgt   r0, r0, r2, lsl #23
-       RETc(gt)
-
-       orr     r0, r0, #0x00800000
-       mov     r3, #0
-       subs    r2, r2, #1
-       b       LSYM(Lml_u)
-
-       @ One or both arguments are denormalized.
-       @ Scale them leftwards and preserve sign bit.
-LSYM(Ldv_d):
-       teq     r2, #0
-       and     ip, r0, #0x80000000
-1:     do_it   eq, tt
-       moveq   r0, r0, lsl #1
-       tsteq   r0, #0x00800000
-       subeq   r2, r2, #1
-       beq     1b
-       orr     r0, r0, ip
-       teq     r3, #0
-       and     ip, r1, #0x80000000
-2:     do_it   eq, tt
-       moveq   r1, r1, lsl #1
-       tsteq   r1, #0x00800000
-       subeq   r3, r3, #1
-       beq     2b
-       orr     r1, r1, ip
-       b       LSYM(Ldv_x)
-
-       @ One or both arguments are either INF, NAN, zero or denormalized.
-LSYM(Ldv_s):
-       and     r3, ip, r1, lsr #23
-       teq     r2, ip
-       bne     1f
-       movs    r2, r0, lsl #9
-       bne     LSYM(Lml_n)             @ NAN / <anything> -> NAN
-       teq     r3, ip
-       bne     LSYM(Lml_i)             @ INF / <anything> -> INF
-       mov     r0, r1
-       b       LSYM(Lml_n)             @ INF / (INF or NAN) -> NAN
-1:     teq     r3, ip
-       bne     2f
-       movs    r3, r1, lsl #9
-       beq     LSYM(Lml_z)             @ <anything> / INF -> 0
-       mov     r0, r1
-       b       LSYM(Lml_n)             @ <anything> / NAN -> NAN
-2:     @ If both are nonzero, we need to normalize and resume above.
-       bics    ip, r0, #0x80000000
-       do_it   ne
-       COND(bic,s,ne)  ip, r1, #0x80000000
-       bne     LSYM(Ldv_d)
-       @ One or both arguments are zero.
-       bics    r2, r0, #0x80000000
-       bne     LSYM(Lml_i)             @ <non_zero> / 0 -> INF
-       bics    r3, r1, #0x80000000
-       bne     LSYM(Lml_z)             @ 0 / <non_zero> -> 0
-       b       LSYM(Lml_n)             @ 0 / 0 -> NAN
-
-       FUNC_END aeabi_fdiv
-       FUNC_END divsf3
-
-#endif /* L_muldivsf3 */
-
-#ifdef L_arm_cmpsf2
-
-       @ The return value in r0 is
-       @
-       @   0  if the operands are equal
-       @   1  if the first operand is greater than the second, or
-       @      the operands are unordered and the operation is
-       @      CMP, LT, LE, NE, or EQ.
-       @   -1 if the first operand is less than the second, or
-       @      the operands are unordered and the operation is GT
-       @      or GE.
-       @
-       @ The Z flag will be set iff the operands are equal.
-       @
-       @ The following registers are clobbered by this function:
-       @   ip, r0, r1, r2, r3
-
-ARM_FUNC_START gtsf2
-ARM_FUNC_ALIAS gesf2 gtsf2
-       mov     ip, #-1
-       b       1f
-
-ARM_FUNC_START ltsf2
-ARM_FUNC_ALIAS lesf2 ltsf2
-       mov     ip, #1
-       b       1f
-
-ARM_FUNC_START cmpsf2
-ARM_FUNC_ALIAS nesf2 cmpsf2
-ARM_FUNC_ALIAS eqsf2 cmpsf2
-       mov     ip, #1                  @ how should we specify unordered here?
-
-1:     str     ip, [sp, #-4]!
-
-       @ Trap any INF/NAN first.
-       mov     r2, r0, lsl #1
-       mov     r3, r1, lsl #1
-       mvns    ip, r2, asr #24
-       do_it   ne
-       COND(mvn,s,ne)  ip, r3, asr #24
-       beq     3f
-
-       @ Compare values.
-       @ Note that 0.0 is equal to -0.0.
-2:     add     sp, sp, #4
-       orrs    ip, r2, r3, lsr #1      @ test if both are 0, clear C flag
-       do_it   ne
-       teqne   r0, r1                  @ if not 0 compare sign
-       do_it   pl
-       COND(sub,s,pl)  r0, r2, r3              @ if same sign compare values, set r0
-
-       @ Result:
-       do_it   hi
-       movhi   r0, r1, asr #31
-       do_it   lo
-       mvnlo   r0, r1, asr #31
-       do_it   ne
-       orrne   r0, r0, #1
-       RET
-
-       @ Look for a NAN. 
-3:     mvns    ip, r2, asr #24
-       bne     4f
-       movs    ip, r0, lsl #9
-       bne     5f                      @ r0 is NAN
-4:     mvns    ip, r3, asr #24
-       bne     2b
-       movs    ip, r1, lsl #9
-       beq     2b                      @ r1 is not NAN
-5:     ldr     r0, [sp], #4            @ return unordered code.
-       RET
-
-       FUNC_END gesf2
-       FUNC_END gtsf2
-       FUNC_END lesf2
-       FUNC_END ltsf2
-       FUNC_END nesf2
-       FUNC_END eqsf2
-       FUNC_END cmpsf2
-
-ARM_FUNC_START aeabi_cfrcmple
-
-       mov     ip, r0
-       mov     r0, r1
-       mov     r1, ip
-       b       6f
-
-ARM_FUNC_START aeabi_cfcmpeq
-ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
-
-       @ The status-returning routines are required to preserve all
-       @ registers except ip, lr, and cpsr.
-6:     do_push {r0, r1, r2, r3, lr}
-       ARM_CALL cmpsf2
-       @ Set the Z flag correctly, and the C flag unconditionally.
-       cmp     r0, #0
-       @ Clear the C flag if the return value was -1, indicating
-       @ that the first operand was smaller than the second.
-       do_it   mi
-       cmnmi   r0, #0
-       RETLDM  "r0, r1, r2, r3"
-
-       FUNC_END aeabi_cfcmple
-       FUNC_END aeabi_cfcmpeq
-       FUNC_END aeabi_cfrcmple
-
-ARM_FUNC_START aeabi_fcmpeq
-
-       str     lr, [sp, #-8]!
-       ARM_CALL aeabi_cfcmple
-       do_it   eq, e
-       moveq   r0, #1  @ Equal to.
-       movne   r0, #0  @ Less than, greater than, or unordered.
-       RETLDM
-
-       FUNC_END aeabi_fcmpeq
-
-ARM_FUNC_START aeabi_fcmplt
-
-       str     lr, [sp, #-8]!
-       ARM_CALL aeabi_cfcmple
-       do_it   cc, e
-       movcc   r0, #1  @ Less than.
-       movcs   r0, #0  @ Equal to, greater than, or unordered.
-       RETLDM
-
-       FUNC_END aeabi_fcmplt
-
-ARM_FUNC_START aeabi_fcmple
-
-       str     lr, [sp, #-8]!
-       ARM_CALL aeabi_cfcmple
-       do_it   ls, e
-       movls   r0, #1  @ Less than or equal to.
-       movhi   r0, #0  @ Greater than or unordered.
-       RETLDM
-
-       FUNC_END aeabi_fcmple
-
-ARM_FUNC_START aeabi_fcmpge
-
-       str     lr, [sp, #-8]!
-       ARM_CALL aeabi_cfrcmple
-       do_it   ls, e
-       movls   r0, #1  @ Operand 2 is less than or equal to operand 1.
-       movhi   r0, #0  @ Operand 2 greater than operand 1, or unordered.
-       RETLDM
-
-       FUNC_END aeabi_fcmpge
-
-ARM_FUNC_START aeabi_fcmpgt
-
-       str     lr, [sp, #-8]!
-       ARM_CALL aeabi_cfrcmple
-       do_it   cc, e
-       movcc   r0, #1  @ Operand 2 is less than operand 1.
-       movcs   r0, #0  @ Operand 2 is greater than or equal to operand 1,
-                       @ or they are unordered.
-       RETLDM
-
-       FUNC_END aeabi_fcmpgt
-
-#endif /* L_cmpsf2 */
-
-#ifdef L_arm_unordsf2
-
-ARM_FUNC_START unordsf2
-ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
-
-       mov     r2, r0, lsl #1
-       mov     r3, r1, lsl #1
-       mvns    ip, r2, asr #24
-       bne     1f
-       movs    ip, r0, lsl #9
-       bne     3f                      @ r0 is NAN
-1:     mvns    ip, r3, asr #24
-       bne     2f
-       movs    ip, r1, lsl #9
-       bne     3f                      @ r1 is NAN
-2:     mov     r0, #0                  @ arguments are ordered.
-       RET
-3:     mov     r0, #1                  @ arguments are unordered.
-       RET
-
-       FUNC_END aeabi_fcmpun
-       FUNC_END unordsf2
-
-#endif /* L_unordsf2 */
-
-#ifdef L_arm_fixsfsi
-
-ARM_FUNC_START fixsfsi
-ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
-
-       @ check exponent range.
-       mov     r2, r0, lsl #1
-       cmp     r2, #(127 << 24)
-       bcc     1f                      @ value is too small
-       mov     r3, #(127 + 31)
-       subs    r2, r3, r2, lsr #24
-       bls     2f                      @ value is too large
-
-       @ scale value
-       mov     r3, r0, lsl #8
-       orr     r3, r3, #0x80000000
-       tst     r0, #0x80000000         @ the sign bit
-       shift1  lsr, r0, r3, r2
-       do_it   ne
-       rsbne   r0, r0, #0
-       RET
-
-1:     mov     r0, #0
-       RET
-
-2:     cmp     r2, #(127 + 31 - 0xff)
-       bne     3f
-       movs    r2, r0, lsl #9
-       bne     4f                      @ r0 is NAN.
-3:     ands    r0, r0, #0x80000000     @ the sign bit
-       do_it   eq
-       moveq   r0, #0x7fffffff         @ the maximum signed positive si
-       RET
-
-4:     mov     r0, #0                  @ What should we convert NAN to?
-       RET
-
-       FUNC_END aeabi_f2iz
-       FUNC_END fixsfsi
-
-#endif /* L_fixsfsi */
-
-#ifdef L_arm_fixunssfsi
-
-ARM_FUNC_START fixunssfsi
-ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
-
-       @ check exponent range.
-       movs    r2, r0, lsl #1
-       bcs     1f                      @ value is negative
-       cmp     r2, #(127 << 24)
-       bcc     1f                      @ value is too small
-       mov     r3, #(127 + 31)
-       subs    r2, r3, r2, lsr #24
-       bmi     2f                      @ value is too large
-
-       @ scale the value
-       mov     r3, r0, lsl #8
-       orr     r3, r3, #0x80000000
-       shift1  lsr, r0, r3, r2
-       RET
-
-1:     mov     r0, #0
-       RET
-
-2:     cmp     r2, #(127 + 31 - 0xff)
-       bne     3f
-       movs    r2, r0, lsl #9
-       bne     4f                      @ r0 is NAN.
-3:     mov     r0, #0xffffffff         @ maximum unsigned si
-       RET
-
-4:     mov     r0, #0                  @ What should we convert NAN to?
-       RET
-
-       FUNC_END aeabi_f2uiz
-       FUNC_END fixunssfsi
-
-#endif /* L_fixunssfsi */
diff --git a/gcc/config/arm/lib1funcs.asm b/gcc/config/arm/lib1funcs.asm

deleted file mode 100644 (file)

index 2e76c01..0000000
--- a/gcc/config/arm/lib1funcs.asm
+++ /dev/null
@@ -1,1829 +0,0 @@
-@ libgcc routines for ARM cpu.
-@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
-
-/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007, 2008,
-   2009, 2010 Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/* An executable stack is *not* required for these functions.  */
-#if defined(__ELF__) && defined(__linux__)
-.section .note.GNU-stack,"",%progbits
-.previous
-#endif  /* __ELF__ and __linux__ */
-
-#ifdef __ARM_EABI__
-/* Some attributes that are common to all routines in this file.  */
-       /* Tag_ABI_align_needed: This code does not require 8-byte
-          alignment from the caller.  */
-       /* .eabi_attribute 24, 0  -- default setting.  */
-       /* Tag_ABI_align_preserved: This code preserves 8-byte
-          alignment in any callee.  */
-       .eabi_attribute 25, 1
-#endif /* __ARM_EABI__ */
-/* ------------------------------------------------------------------------ */
-
-/* We need to know what prefix to add to function names.  */
-
-#ifndef __USER_LABEL_PREFIX__
-#error  __USER_LABEL_PREFIX__ not defined
-#endif
-
-/* ANSI concatenation macros.  */
-
-#define CONCAT1(a, b) CONCAT2(a, b)
-#define CONCAT2(a, b) a ## b
-
-/* Use the right prefix for global labels.  */
-
-#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
-
-#ifdef __ELF__
-#ifdef __thumb__
-#define __PLT__  /* Not supported in Thumb assembler (for now).  */
-#elif defined __vxworks && !defined __PIC__
-#define __PLT__ /* Not supported by the kernel loader.  */
-#else
-#define __PLT__ (PLT)
-#endif
-#define TYPE(x) .type SYM(x),function
-#define SIZE(x) .size SYM(x), . - SYM(x)
-#define LSYM(x) .x
-#else
-#define __PLT__
-#define TYPE(x)
-#define SIZE(x)
-#define LSYM(x) x
-#endif
-
-/* Function end macros.  Variants for interworking.  */
-
-#if defined(__ARM_ARCH_2__)
-# define __ARM_ARCH__ 2
-#endif
-
-#if defined(__ARM_ARCH_3__)
-# define __ARM_ARCH__ 3
-#endif
-
-#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
-       || defined(__ARM_ARCH_4T__)
-/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
-   long multiply instructions.  That includes v3M.  */
-# define __ARM_ARCH__ 4
-#endif
-       
-#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
-       || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
-       || defined(__ARM_ARCH_5TEJ__)
-# define __ARM_ARCH__ 5
-#endif
-
-#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
-       || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
-       || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
-       || defined(__ARM_ARCH_6M__)
-# define __ARM_ARCH__ 6
-#endif
-
-#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
-       || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
-       || defined(__ARM_ARCH_7EM__)
-# define __ARM_ARCH__ 7
-#endif
-
-#ifndef __ARM_ARCH__
-#error Unable to determine architecture.
-#endif
-
-/* There are times when we might prefer Thumb1 code even if ARM code is
-   permitted, for example, the code might be smaller, or there might be
-   interworking problems with switching to ARM state if interworking is
-   disabled.  */
-#if (defined(__thumb__)                        \
-     && !defined(__thumb2__)           \
-     && (!defined(__THUMB_INTERWORK__) \
-        || defined (__OPTIMIZE_SIZE__) \
-        || defined(__ARM_ARCH_6M__)))
-# define __prefer_thumb__
-#endif
-
-/* How to return from a function call depends on the architecture variant.  */
-
-#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
-
-# define RET           bx      lr
-# define RETc(x)       bx##x   lr
-
-/* Special precautions for interworking on armv4t.  */
-# if (__ARM_ARCH__ == 4)
-
-/* Always use bx, not ldr pc.  */
-#  if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
-#    define __INTERWORKING__
-#   endif /* __THUMB__ || __THUMB_INTERWORK__ */
-
-/* Include thumb stub before arm mode code.  */
-#  if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
-#   define __INTERWORKING_STUBS__
-#  endif /* __thumb__ && !__THUMB_INTERWORK__ */
-
-#endif /* __ARM_ARCH == 4 */
-
-#else
-
-# define RET           mov     pc, lr
-# define RETc(x)       mov##x  pc, lr
-
-#endif
-
-.macro cfi_pop         advance, reg, cfa_offset
-#ifdef __ELF__
-       .pushsection    .debug_frame
-       .byte   0x4             /* DW_CFA_advance_loc4 */
-       .4byte  \advance
-       .byte   (0xc0 | \reg)   /* DW_CFA_restore */
-       .byte   0xe             /* DW_CFA_def_cfa_offset */
-       .uleb128 \cfa_offset
-       .popsection
-#endif
-.endm
-.macro cfi_push        advance, reg, offset, cfa_offset
-#ifdef __ELF__
-       .pushsection    .debug_frame
-       .byte   0x4             /* DW_CFA_advance_loc4 */
-       .4byte  \advance
-       .byte   (0x80 | \reg)   /* DW_CFA_offset */
-       .uleb128 (\offset / -4)
-       .byte   0xe             /* DW_CFA_def_cfa_offset */
-       .uleb128 \cfa_offset
-       .popsection
-#endif
-.endm
-.macro cfi_start       start_label, end_label
-#ifdef __ELF__
-       .pushsection    .debug_frame
-LSYM(Lstart_frame):
-       .4byte  LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
-LSYM(Lstart_cie):
-        .4byte 0xffffffff      @ CIE Identifier Tag
-        .byte  0x1     @ CIE Version
-        .ascii "\0"    @ CIE Augmentation
-        .uleb128 0x1   @ CIE Code Alignment Factor
-        .sleb128 -4    @ CIE Data Alignment Factor
-        .byte  0xe     @ CIE RA Column
-        .byte  0xc     @ DW_CFA_def_cfa
-        .uleb128 0xd
-        .uleb128 0x0
-
-       .align 2
-LSYM(Lend_cie):
-       .4byte  LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length
-LSYM(Lstart_fde):
-       .4byte  LSYM(Lstart_frame)      @ FDE CIE offset
-       .4byte  \start_label    @ FDE initial location
-       .4byte  \end_label-\start_label @ FDE address range
-       .popsection
-#endif
-.endm
-.macro cfi_end end_label
-#ifdef __ELF__
-       .pushsection    .debug_frame
-       .align  2
-LSYM(Lend_fde):
-       .popsection
-\end_label:
-#endif
-.endm
-
-/* Don't pass dirn, it's there just to get token pasting right.  */
-
-.macro RETLDM  regs=, cond=, unwind=, dirn=ia
-#if defined (__INTERWORKING__)
-       .ifc "\regs",""
-       ldr\cond        lr, [sp], #8
-       .else
-# if defined(__thumb2__)
-       pop\cond        {\regs, lr}
-# else
-       ldm\cond\dirn   sp!, {\regs, lr}
-# endif
-       .endif
-       .ifnc "\unwind", ""
-       /* Mark LR as restored.  */
-97:    cfi_pop 97b - \unwind, 0xe, 0x0
-       .endif
-       bx\cond lr
-#else
-       /* Caller is responsible for providing IT instruction.  */
-       .ifc "\regs",""
-       ldr\cond        pc, [sp], #8
-       .else
-# if defined(__thumb2__)
-       pop\cond        {\regs, pc}
-# else
-       ldm\cond\dirn   sp!, {\regs, pc}
-# endif
-       .endif
-#endif
-.endm
-
-/* The Unified assembly syntax allows the same code to be assembled for both
-   ARM and Thumb-2.  However this is only supported by recent gas, so define
-   a set of macros to allow ARM code on older assemblers.  */
-#if defined(__thumb2__)
-.macro do_it cond, suffix=""
-       it\suffix       \cond
-.endm
-.macro shift1 op, arg0, arg1, arg2
-       \op     \arg0, \arg1, \arg2
-.endm
-#define do_push        push
-#define do_pop pop
-#define COND(op1, op2, cond) op1 ## op2 ## cond
-/* Perform an arithmetic operation with a variable shift operand.  This
-   requires two instructions and a scratch register on Thumb-2.  */
-.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
-       \shiftop \tmp, \src2, \shiftreg
-       \name \dest, \src1, \tmp
-.endm
-#else
-.macro do_it cond, suffix=""
-.endm
-.macro shift1 op, arg0, arg1, arg2
-       mov     \arg0, \arg1, \op \arg2
-.endm
-#define do_push        stmfd sp!,
-#define do_pop ldmfd sp!,
-#define COND(op1, op2, cond) op1 ## cond ## op2
-.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
-       \name \dest, \src1, \src2, \shiftop \shiftreg
-.endm
-#endif
-
-#ifdef __ARM_EABI__
-.macro ARM_LDIV0 name signed
-       cmp     r0, #0
-       .ifc    \signed, unsigned
-       movne   r0, #0xffffffff
-       .else
-       movgt   r0, #0x7fffffff
-       movlt   r0, #0x80000000
-       .endif
-       b       SYM (__aeabi_idiv0) __PLT__
-.endm
-#else
-.macro ARM_LDIV0 name signed
-       str     lr, [sp, #-8]!
-98:    cfi_push 98b - __\name, 0xe, -0x8, 0x8
-       bl      SYM (__div0) __PLT__
-       mov     r0, #0                  @ About as wrong as it could be.
-       RETLDM  unwind=98b
-.endm
-#endif
-
-
-#ifdef __ARM_EABI__
-.macro THUMB_LDIV0 name signed
-#if defined(__ARM_ARCH_6M__)
-       .ifc \signed, unsigned
-       cmp     r0, #0
-       beq     1f
-       mov     r0, #0
-       mvn     r0, r0          @ 0xffffffff
-1:
-       .else
-       cmp     r0, #0
-       beq     2f
-       blt     3f
-       mov     r0, #0
-       mvn     r0, r0
-       lsr     r0, r0, #1      @ 0x7fffffff
-       b       2f
-3:     mov     r0, #0x80
-       lsl     r0, r0, #24     @ 0x80000000
-2:
-       .endif
-       push    {r0, r1, r2}
-       ldr     r0, 4f
-       adr     r1, 4f
-       add     r0, r1
-       str     r0, [sp, #8]
-       @ We know we are not on armv4t, so pop pc is safe.
-       pop     {r0, r1, pc}
-       .align  2
-4:
-       .word   __aeabi_idiv0 - 4b
-#elif defined(__thumb2__)
-       .syntax unified
-       .ifc \signed, unsigned
-       cbz     r0, 1f
-       mov     r0, #0xffffffff
-1:
-       .else
-       cmp     r0, #0
-       do_it   gt
-       movgt   r0, #0x7fffffff
-       do_it   lt
-       movlt   r0, #0x80000000
-       .endif
-       b.w     SYM(__aeabi_idiv0) __PLT__
-#else
-       .align  2
-       bx      pc
-       nop
-       .arm
-       cmp     r0, #0
-       .ifc    \signed, unsigned
-       movne   r0, #0xffffffff
-       .else
-       movgt   r0, #0x7fffffff
-       movlt   r0, #0x80000000
-       .endif
-       b       SYM(__aeabi_idiv0) __PLT__
-       .thumb
-#endif
-.endm
-#else
-.macro THUMB_LDIV0 name signed
-       push    { r1, lr }
-98:    cfi_push 98b - __\name, 0xe, -0x4, 0x8
-       bl      SYM (__div0)
-       mov     r0, #0                  @ About as wrong as it could be.
-#if defined (__INTERWORKING__)
-       pop     { r1, r2 }
-       bx      r2
-#else
-       pop     { r1, pc }
-#endif
-.endm
-#endif
-
-.macro FUNC_END name
-       SIZE (__\name)
-.endm
-
-.macro DIV_FUNC_END name signed
-       cfi_start       __\name, LSYM(Lend_div0)
-LSYM(Ldiv0):
-#ifdef __thumb__
-       THUMB_LDIV0 \name \signed
-#else
-       ARM_LDIV0 \name \signed
-#endif
-       cfi_end LSYM(Lend_div0)
-       FUNC_END \name
-.endm
-
-.macro THUMB_FUNC_START name
-       .globl  SYM (\name)
-       TYPE    (\name)
-       .thumb_func
-SYM (\name):
-.endm
-
-/* Function start macros.  Variants for ARM and Thumb.  */
-
-#ifdef __thumb__
-#define THUMB_FUNC .thumb_func
-#define THUMB_CODE .force_thumb
-# if defined(__thumb2__)
-#define THUMB_SYNTAX .syntax divided
-# else
-#define THUMB_SYNTAX
-# endif
-#else
-#define THUMB_FUNC
-#define THUMB_CODE
-#define THUMB_SYNTAX
-#endif
-
-.macro FUNC_START name
-       .text
-       .globl SYM (__\name)
-       TYPE (__\name)
-       .align 0
-       THUMB_CODE
-       THUMB_FUNC
-       THUMB_SYNTAX
-SYM (__\name):
-.endm
-
-/* Special function that will always be coded in ARM assembly, even if
-   in Thumb-only compilation.  */
-
-#if defined(__thumb2__)
-
-/* For Thumb-2 we build everything in thumb mode.  */
-.macro ARM_FUNC_START name
-       FUNC_START \name
-       .syntax unified
-.endm
-#define EQUIV .thumb_set
-.macro  ARM_CALL name
-       bl      __\name
-.endm
-
-#elif defined(__INTERWORKING_STUBS__)
-
-.macro ARM_FUNC_START name
-       FUNC_START \name
-       bx      pc
-       nop
-       .arm
-/* A hook to tell gdb that we've switched to ARM mode.  Also used to call
-   directly from other local arm routines.  */
-_L__\name:             
-.endm
-#define EQUIV .thumb_set
-/* Branch directly to a function declared with ARM_FUNC_START.
-   Must be called in arm mode.  */
-.macro  ARM_CALL name
-       bl      _L__\name
-.endm
-
-#else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
-
-#ifdef __ARM_ARCH_6M__
-#define EQUIV .thumb_set
-#else
-.macro ARM_FUNC_START name
-       .text
-       .globl SYM (__\name)
-       TYPE (__\name)
-       .align 0
-       .arm
-SYM (__\name):
-.endm
-#define EQUIV .set
-.macro  ARM_CALL name
-       bl      __\name
-.endm
-#endif
-
-#endif
-
-.macro FUNC_ALIAS new old
-       .globl  SYM (__\new)
-#if defined (__thumb__)
-       .thumb_set      SYM (__\new), SYM (__\old)
-#else
-       .set    SYM (__\new), SYM (__\old)
-#endif
-.endm
-
-#ifndef __ARM_ARCH_6M__
-.macro ARM_FUNC_ALIAS new old
-       .globl  SYM (__\new)
-       EQUIV   SYM (__\new), SYM (__\old)
-#if defined(__INTERWORKING_STUBS__)
-       .set    SYM (_L__\new), SYM (_L__\old)
-#endif
-.endm
-#endif
-
-#ifdef __ARMEB__
-#define xxh r0
-#define xxl r1
-#define yyh r2
-#define yyl r3
-#else
-#define xxh r1
-#define xxl r0
-#define yyh r3
-#define yyl r2
-#endif 
-
-#ifdef __ARM_EABI__
-.macro WEAK name
-       .weak SYM (__\name)
-.endm
-#endif
-
-#ifdef __thumb__
-/* Register aliases.  */
-
-work           .req    r4      @ XXXX is this safe ?
-dividend       .req    r0
-divisor                .req    r1
-overdone       .req    r2
-result         .req    r2
-curbit         .req    r3
-#endif
-#if 0
-ip             .req    r12
-sp             .req    r13
-lr             .req    r14
-pc             .req    r15
-#endif
-
-/* ------------------------------------------------------------------------ */
-/*             Bodies of the division and modulo routines.                 */
-/* ------------------------------------------------------------------------ */ 
-.macro ARM_DIV_BODY dividend, divisor, result, curbit
-
-#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
-
-#if defined (__thumb2__)
-       clz     \curbit, \dividend
-       clz     \result, \divisor
-       sub     \curbit, \result, \curbit
-       rsb     \curbit, \curbit, #31
-       adr     \result, 1f
-       add     \curbit, \result, \curbit, lsl #4
-       mov     \result, #0
-       mov     pc, \curbit
-.p2align 3
-1:
-       .set    shift, 32
-       .rept   32
-       .set    shift, shift - 1
-       cmp.w   \dividend, \divisor, lsl #shift
-       nop.n
-       adc.w   \result, \result, \result
-       it      cs
-       subcs.w \dividend, \dividend, \divisor, lsl #shift
-       .endr
-#else
-       clz     \curbit, \dividend
-       clz     \result, \divisor
-       sub     \curbit, \result, \curbit
-       rsbs    \curbit, \curbit, #31
-       addne   \curbit, \curbit, \curbit, lsl #1
-       mov     \result, #0
-       addne   pc, pc, \curbit, lsl #2
-       nop
-       .set    shift, 32
-       .rept   32
-       .set    shift, shift - 1
-       cmp     \dividend, \divisor, lsl #shift
-       adc     \result, \result, \result
-       subcs   \dividend, \dividend, \divisor, lsl #shift
-       .endr
-#endif
-
-#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
-#if __ARM_ARCH__ >= 5
-
-       clz     \curbit, \divisor
-       clz     \result, \dividend
-       sub     \result, \curbit, \result
-       mov     \curbit, #1
-       mov     \divisor, \divisor, lsl \result
-       mov     \curbit, \curbit, lsl \result
-       mov     \result, #0
-       
-#else /* __ARM_ARCH__ < 5 */
-
-       @ Initially shift the divisor left 3 bits if possible,
-       @ set curbit accordingly.  This allows for curbit to be located
-       @ at the left end of each 4-bit nibbles in the division loop
-       @ to save one loop in most cases.
-       tst     \divisor, #0xe0000000
-       moveq   \divisor, \divisor, lsl #3
-       moveq   \curbit, #8
-       movne   \curbit, #1
-
-       @ Unless the divisor is very big, shift it up in multiples of
-       @ four bits, since this is the amount of unwinding in the main
-       @ division loop.  Continue shifting until the divisor is 
-       @ larger than the dividend.
-1:     cmp     \divisor, #0x10000000
-       cmplo   \divisor, \dividend
-       movlo   \divisor, \divisor, lsl #4
-       movlo   \curbit, \curbit, lsl #4
-       blo     1b
-
-       @ For very big divisors, we must shift it a bit at a time, or
-       @ we will be in danger of overflowing.
-1:     cmp     \divisor, #0x80000000
-       cmplo   \divisor, \dividend
-       movlo   \divisor, \divisor, lsl #1
-       movlo   \curbit, \curbit, lsl #1
-       blo     1b
-
-       mov     \result, #0
-
-#endif /* __ARM_ARCH__ < 5 */
-
-       @ Division loop
-1:     cmp     \dividend, \divisor
-       do_it   hs, t
-       subhs   \dividend, \dividend, \divisor
-       orrhs   \result,   \result,   \curbit
-       cmp     \dividend, \divisor,  lsr #1
-       do_it   hs, t
-       subhs   \dividend, \dividend, \divisor, lsr #1
-       orrhs   \result,   \result,   \curbit,  lsr #1
-       cmp     \dividend, \divisor,  lsr #2
-       do_it   hs, t
-       subhs   \dividend, \dividend, \divisor, lsr #2
-       orrhs   \result,   \result,   \curbit,  lsr #2
-       cmp     \dividend, \divisor,  lsr #3
-       do_it   hs, t
-       subhs   \dividend, \dividend, \divisor, lsr #3
-       orrhs   \result,   \result,   \curbit,  lsr #3
-       cmp     \dividend, #0                   @ Early termination?
-       do_it   ne, t
-       movnes  \curbit,   \curbit,  lsr #4     @ No, any more bits to do?
-       movne   \divisor,  \divisor, lsr #4
-       bne     1b
-
-#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
-
-.endm
-/* ------------------------------------------------------------------------ */ 
-.macro ARM_DIV2_ORDER divisor, order
-
-#if __ARM_ARCH__ >= 5
-
-       clz     \order, \divisor
-       rsb     \order, \order, #31
-
-#else
-
-       cmp     \divisor, #(1 << 16)
-       movhs   \divisor, \divisor, lsr #16
-       movhs   \order, #16
-       movlo   \order, #0
-
-       cmp     \divisor, #(1 << 8)
-       movhs   \divisor, \divisor, lsr #8
-       addhs   \order, \order, #8
-
-       cmp     \divisor, #(1 << 4)
-       movhs   \divisor, \divisor, lsr #4
-       addhs   \order, \order, #4
-
-       cmp     \divisor, #(1 << 2)
-       addhi   \order, \order, #3
-       addls   \order, \order, \divisor, lsr #1
-
-#endif
-
-.endm
-/* ------------------------------------------------------------------------ */
-.macro ARM_MOD_BODY dividend, divisor, order, spare
-
-#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
-
-       clz     \order, \divisor
-       clz     \spare, \dividend
-       sub     \order, \order, \spare
-       rsbs    \order, \order, #31
-       addne   pc, pc, \order, lsl #3
-       nop
-       .set    shift, 32
-       .rept   32
-       .set    shift, shift - 1
-       cmp     \dividend, \divisor, lsl #shift
-       subcs   \dividend, \dividend, \divisor, lsl #shift
-       .endr
-
-#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
-#if __ARM_ARCH__ >= 5
-
-       clz     \order, \divisor
-       clz     \spare, \dividend
-       sub     \order, \order, \spare
-       mov     \divisor, \divisor, lsl \order
-       
-#else /* __ARM_ARCH__ < 5 */
-
-       mov     \order, #0
-
-       @ Unless the divisor is very big, shift it up in multiples of
-       @ four bits, since this is the amount of unwinding in the main
-       @ division loop.  Continue shifting until the divisor is 
-       @ larger than the dividend.
-1:     cmp     \divisor, #0x10000000
-       cmplo   \divisor, \dividend
-       movlo   \divisor, \divisor, lsl #4
-       addlo   \order, \order, #4
-       blo     1b
-
-       @ For very big divisors, we must shift it a bit at a time, or
-       @ we will be in danger of overflowing.
-1:     cmp     \divisor, #0x80000000
-       cmplo   \divisor, \dividend
-       movlo   \divisor, \divisor, lsl #1
-       addlo   \order, \order, #1
-       blo     1b
-
-#endif /* __ARM_ARCH__ < 5 */
-
-       @ Perform all needed substractions to keep only the reminder.
-       @ Do comparisons in batch of 4 first.
-       subs    \order, \order, #3              @ yes, 3 is intended here
-       blt     2f
-
-1:     cmp     \dividend, \divisor
-       subhs   \dividend, \dividend, \divisor
-       cmp     \dividend, \divisor,  lsr #1
-       subhs   \dividend, \dividend, \divisor, lsr #1
-       cmp     \dividend, \divisor,  lsr #2
-       subhs   \dividend, \dividend, \divisor, lsr #2
-       cmp     \dividend, \divisor,  lsr #3
-       subhs   \dividend, \dividend, \divisor, lsr #3
-       cmp     \dividend, #1
-       mov     \divisor, \divisor, lsr #4
-       subges  \order, \order, #4
-       bge     1b
-
-       tst     \order, #3
-       teqne   \dividend, #0
-       beq     5f
-
-       @ Either 1, 2 or 3 comparison/substractions are left.
-2:     cmn     \order, #2
-       blt     4f
-       beq     3f
-       cmp     \dividend, \divisor
-       subhs   \dividend, \dividend, \divisor
-       mov     \divisor,  \divisor,  lsr #1
-3:     cmp     \dividend, \divisor
-       subhs   \dividend, \dividend, \divisor
-       mov     \divisor,  \divisor,  lsr #1
-4:     cmp     \dividend, \divisor
-       subhs   \dividend, \dividend, \divisor
-5:
-
-#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
-
-.endm
-/* ------------------------------------------------------------------------ */
-.macro THUMB_DIV_MOD_BODY modulo
-       @ Load the constant 0x10000000 into our work register.
-       mov     work, #1
-       lsl     work, #28
-LSYM(Loop1):
-       @ Unless the divisor is very big, shift it up in multiples of
-       @ four bits, since this is the amount of unwinding in the main
-       @ division loop.  Continue shifting until the divisor is 
-       @ larger than the dividend.
-       cmp     divisor, work
-       bhs     LSYM(Lbignum)
-       cmp     divisor, dividend
-       bhs     LSYM(Lbignum)
-       lsl     divisor, #4
-       lsl     curbit,  #4
-       b       LSYM(Loop1)
-LSYM(Lbignum):
-       @ Set work to 0x80000000
-       lsl     work, #3
-LSYM(Loop2):
-       @ For very big divisors, we must shift it a bit at a time, or
-       @ we will be in danger of overflowing.
-       cmp     divisor, work
-       bhs     LSYM(Loop3)
-       cmp     divisor, dividend
-       bhs     LSYM(Loop3)
-       lsl     divisor, #1
-       lsl     curbit,  #1
-       b       LSYM(Loop2)
-LSYM(Loop3):
-       @ Test for possible subtractions ...
-  .if \modulo
-       @ ... On the final pass, this may subtract too much from the dividend, 
-       @ so keep track of which subtractions are done, we can fix them up 
-       @ afterwards.
-       mov     overdone, #0
-       cmp     dividend, divisor
-       blo     LSYM(Lover1)
-       sub     dividend, dividend, divisor
-LSYM(Lover1):
-       lsr     work, divisor, #1
-       cmp     dividend, work
-       blo     LSYM(Lover2)
-       sub     dividend, dividend, work
-       mov     ip, curbit
-       mov     work, #1
-       ror     curbit, work
-       orr     overdone, curbit
-       mov     curbit, ip
-LSYM(Lover2):
-       lsr     work, divisor, #2
-       cmp     dividend, work
-       blo     LSYM(Lover3)
-       sub     dividend, dividend, work
-       mov     ip, curbit
-       mov     work, #2
-       ror     curbit, work
-       orr     overdone, curbit
-       mov     curbit, ip
-LSYM(Lover3):
-       lsr     work, divisor, #3
-       cmp     dividend, work
-       blo     LSYM(Lover4)
-       sub     dividend, dividend, work
-       mov     ip, curbit
-       mov     work, #3
-       ror     curbit, work
-       orr     overdone, curbit
-       mov     curbit, ip
-LSYM(Lover4):
-       mov     ip, curbit
-  .else
-       @ ... and note which bits are done in the result.  On the final pass,
-       @ this may subtract too much from the dividend, but the result will be ok,
-       @ since the "bit" will have been shifted out at the bottom.
-       cmp     dividend, divisor
-       blo     LSYM(Lover1)
-       sub     dividend, dividend, divisor
-       orr     result, result, curbit
-LSYM(Lover1):
-       lsr     work, divisor, #1
-       cmp     dividend, work
-       blo     LSYM(Lover2)
-       sub     dividend, dividend, work
-       lsr     work, curbit, #1
-       orr     result, work
-LSYM(Lover2):
-       lsr     work, divisor, #2
-       cmp     dividend, work
-       blo     LSYM(Lover3)
-       sub     dividend, dividend, work
-       lsr     work, curbit, #2
-       orr     result, work
-LSYM(Lover3):
-       lsr     work, divisor, #3
-       cmp     dividend, work
-       blo     LSYM(Lover4)
-       sub     dividend, dividend, work
-       lsr     work, curbit, #3
-       orr     result, work
-LSYM(Lover4):
-  .endif
-       
-       cmp     dividend, #0                    @ Early termination?
-       beq     LSYM(Lover5)
-       lsr     curbit,  #4                     @ No, any more bits to do?
-       beq     LSYM(Lover5)
-       lsr     divisor, #4
-       b       LSYM(Loop3)
-LSYM(Lover5):
-  .if \modulo
-       @ Any subtractions that we should not have done will be recorded in
-       @ the top three bits of "overdone".  Exactly which were not needed
-       @ are governed by the position of the bit, stored in ip.
-       mov     work, #0xe
-       lsl     work, #28
-       and     overdone, work
-       beq     LSYM(Lgot_result)
-       
-       @ If we terminated early, because dividend became zero, then the 
-       @ bit in ip will not be in the bottom nibble, and we should not
-       @ perform the additions below.  We must test for this though
-       @ (rather relying upon the TSTs to prevent the additions) since
-       @ the bit in ip could be in the top two bits which might then match
-       @ with one of the smaller RORs.
-       mov     curbit, ip
-       mov     work, #0x7
-       tst     curbit, work
-       beq     LSYM(Lgot_result)
-       
-       mov     curbit, ip
-       mov     work, #3
-       ror     curbit, work
-       tst     overdone, curbit
-       beq     LSYM(Lover6)
-       lsr     work, divisor, #3
-       add     dividend, work
-LSYM(Lover6):
-       mov     curbit, ip
-       mov     work, #2
-       ror     curbit, work
-       tst     overdone, curbit
-       beq     LSYM(Lover7)
-       lsr     work, divisor, #2
-       add     dividend, work
-LSYM(Lover7):
-       mov     curbit, ip
-       mov     work, #1
-       ror     curbit, work
-       tst     overdone, curbit
-       beq     LSYM(Lgot_result)
-       lsr     work, divisor, #1
-       add     dividend, work
-  .endif
-LSYM(Lgot_result):
-.endm  
-/* ------------------------------------------------------------------------ */
-/*             Start of the Real Functions                                 */
-/* ------------------------------------------------------------------------ */
-#ifdef L_udivsi3
-
-#if defined(__prefer_thumb__)
-
-       FUNC_START udivsi3
-       FUNC_ALIAS aeabi_uidiv udivsi3
-
-       cmp     divisor, #0
-       beq     LSYM(Ldiv0)
-LSYM(udivsi3_skip_div0_test):
-       mov     curbit, #1
-       mov     result, #0
-       
-       push    { work }
-       cmp     dividend, divisor
-       blo     LSYM(Lgot_result)
-
-       THUMB_DIV_MOD_BODY 0
-       
-       mov     r0, result
-       pop     { work }
-       RET
-
-#else /* ARM version/Thumb-2.  */
-
-       ARM_FUNC_START udivsi3
-       ARM_FUNC_ALIAS aeabi_uidiv udivsi3
-
-       /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily
-          check for division-by-zero a second time.  */
-LSYM(udivsi3_skip_div0_test):
-       subs    r2, r1, #1
-       do_it   eq
-       RETc(eq)
-       bcc     LSYM(Ldiv0)
-       cmp     r0, r1
-       bls     11f
-       tst     r1, r2
-       beq     12f
-       
-       ARM_DIV_BODY r0, r1, r2, r3
-       
-       mov     r0, r2
-       RET     
-
-11:    do_it   eq, e
-       moveq   r0, #1
-       movne   r0, #0
-       RET
-
-12:    ARM_DIV2_ORDER r1, r2
-
-       mov     r0, r0, lsr r2
-       RET
-
-#endif /* ARM version */
-
-       DIV_FUNC_END udivsi3 unsigned
-
-#if defined(__prefer_thumb__)
-FUNC_START aeabi_uidivmod
-       cmp     r1, #0
-       beq     LSYM(Ldiv0)
-       push    {r0, r1, lr}
-       bl      LSYM(udivsi3_skip_div0_test)
-       POP     {r1, r2, r3}
-       mul     r2, r0
-       sub     r1, r1, r2
-       bx      r3
-#else
-ARM_FUNC_START aeabi_uidivmod
-       cmp     r1, #0
-       beq     LSYM(Ldiv0)
-       stmfd   sp!, { r0, r1, lr }
-       bl      LSYM(udivsi3_skip_div0_test)
-       ldmfd   sp!, { r1, r2, lr }
-       mul     r3, r2, r0
-       sub     r1, r1, r3
-       RET
-#endif
-       FUNC_END aeabi_uidivmod
-       
-#endif /* L_udivsi3 */
-/* ------------------------------------------------------------------------ */
-#ifdef L_umodsi3
-
-       FUNC_START umodsi3
-
-#ifdef __thumb__
-
-       cmp     divisor, #0
-       beq     LSYM(Ldiv0)
-       mov     curbit, #1
-       cmp     dividend, divisor
-       bhs     LSYM(Lover10)
-       RET     
-
-LSYM(Lover10):
-       push    { work }
-
-       THUMB_DIV_MOD_BODY 1
-       
-       pop     { work }
-       RET
-       
-#else  /* ARM version.  */
-       
-       subs    r2, r1, #1                      @ compare divisor with 1
-       bcc     LSYM(Ldiv0)
-       cmpne   r0, r1                          @ compare dividend with divisor
-       moveq   r0, #0
-       tsthi   r1, r2                          @ see if divisor is power of 2
-       andeq   r0, r0, r2
-       RETc(ls)
-
-       ARM_MOD_BODY r0, r1, r2, r3
-       
-       RET     
-
-#endif /* ARM version.  */
-       
-       DIV_FUNC_END umodsi3 unsigned
-
-#endif /* L_umodsi3 */
-/* ------------------------------------------------------------------------ */
-#ifdef L_divsi3
-
-#if defined(__prefer_thumb__)
-
-       FUNC_START divsi3       
-       FUNC_ALIAS aeabi_idiv divsi3
-
-       cmp     divisor, #0
-       beq     LSYM(Ldiv0)
-LSYM(divsi3_skip_div0_test):
-       push    { work }
-       mov     work, dividend
-       eor     work, divisor           @ Save the sign of the result.
-       mov     ip, work
-       mov     curbit, #1
-       mov     result, #0
-       cmp     divisor, #0
-       bpl     LSYM(Lover10)
-       neg     divisor, divisor        @ Loops below use unsigned.
-LSYM(Lover10):
-       cmp     dividend, #0
-       bpl     LSYM(Lover11)
-       neg     dividend, dividend
-LSYM(Lover11):
-       cmp     dividend, divisor
-       blo     LSYM(Lgot_result)
-
-       THUMB_DIV_MOD_BODY 0
-       
-       mov     r0, result
-       mov     work, ip
-       cmp     work, #0
-       bpl     LSYM(Lover12)
-       neg     r0, r0
-LSYM(Lover12):
-       pop     { work }
-       RET
-
-#else /* ARM/Thumb-2 version.  */
-       
-       ARM_FUNC_START divsi3   
-       ARM_FUNC_ALIAS aeabi_idiv divsi3
-
-       cmp     r1, #0
-       beq     LSYM(Ldiv0)
-LSYM(divsi3_skip_div0_test):
-       eor     ip, r0, r1                      @ save the sign of the result.
-       do_it   mi
-       rsbmi   r1, r1, #0                      @ loops below use unsigned.
-       subs    r2, r1, #1                      @ division by 1 or -1 ?
-       beq     10f
-       movs    r3, r0
-       do_it   mi
-       rsbmi   r3, r0, #0                      @ positive dividend value
-       cmp     r3, r1
-       bls     11f
-       tst     r1, r2                          @ divisor is power of 2 ?
-       beq     12f
-
-       ARM_DIV_BODY r3, r1, r0, r2
-       
-       cmp     ip, #0
-       do_it   mi
-       rsbmi   r0, r0, #0
-       RET     
-
-10:    teq     ip, r0                          @ same sign ?
-       do_it   mi
-       rsbmi   r0, r0, #0
-       RET     
-
-11:    do_it   lo
-       movlo   r0, #0
-       do_it   eq,t
-       moveq   r0, ip, asr #31
-       orreq   r0, r0, #1
-       RET
-
-12:    ARM_DIV2_ORDER r1, r2
-
-       cmp     ip, #0
-       mov     r0, r3, lsr r2
-       do_it   mi
-       rsbmi   r0, r0, #0
-       RET
-
-#endif /* ARM version */
-       
-       DIV_FUNC_END divsi3 signed
-
-#if defined(__prefer_thumb__)
-FUNC_START aeabi_idivmod
-       cmp     r1, #0
-       beq     LSYM(Ldiv0)
-       push    {r0, r1, lr}
-       bl      LSYM(divsi3_skip_div0_test)
-       POP     {r1, r2, r3}
-       mul     r2, r0
-       sub     r1, r1, r2
-       bx      r3
-#else
-ARM_FUNC_START aeabi_idivmod
-       cmp     r1, #0
-       beq     LSYM(Ldiv0)
-       stmfd   sp!, { r0, r1, lr }
-       bl      LSYM(divsi3_skip_div0_test)
-       ldmfd   sp!, { r1, r2, lr }
-       mul     r3, r2, r0
-       sub     r1, r1, r3
-       RET
-#endif
-       FUNC_END aeabi_idivmod
-       
-#endif /* L_divsi3 */
-/* ------------------------------------------------------------------------ */
-#ifdef L_modsi3
-
-       FUNC_START modsi3
-
-#ifdef __thumb__
-
-       mov     curbit, #1
-       cmp     divisor, #0
-       beq     LSYM(Ldiv0)
-       bpl     LSYM(Lover10)
-       neg     divisor, divisor                @ Loops below use unsigned.
-LSYM(Lover10):
-       push    { work }
-       @ Need to save the sign of the dividend, unfortunately, we need
-       @ work later on.  Must do this after saving the original value of
-       @ the work register, because we will pop this value off first.
-       push    { dividend }
-       cmp     dividend, #0
-       bpl     LSYM(Lover11)
-       neg     dividend, dividend
-LSYM(Lover11):
-       cmp     dividend, divisor
-       blo     LSYM(Lgot_result)
-
-       THUMB_DIV_MOD_BODY 1
-               
-       pop     { work }
-       cmp     work, #0
-       bpl     LSYM(Lover12)
-       neg     dividend, dividend
-LSYM(Lover12):
-       pop     { work }
-       RET     
-
-#else /* ARM version.  */
-       
-       cmp     r1, #0
-       beq     LSYM(Ldiv0)
-       rsbmi   r1, r1, #0                      @ loops below use unsigned.
-       movs    ip, r0                          @ preserve sign of dividend
-       rsbmi   r0, r0, #0                      @ if negative make positive
-       subs    r2, r1, #1                      @ compare divisor with 1
-       cmpne   r0, r1                          @ compare dividend with divisor
-       moveq   r0, #0
-       tsthi   r1, r2                          @ see if divisor is power of 2
-       andeq   r0, r0, r2
-       bls     10f
-
-       ARM_MOD_BODY r0, r1, r2, r3
-
-10:    cmp     ip, #0
-       rsbmi   r0, r0, #0
-       RET     
-
-#endif /* ARM version */
-       
-       DIV_FUNC_END modsi3 signed
-
-#endif /* L_modsi3 */
-/* ------------------------------------------------------------------------ */
-#ifdef L_dvmd_tls
-
-#ifdef __ARM_EABI__
-       WEAK aeabi_idiv0
-       WEAK aeabi_ldiv0
-       FUNC_START aeabi_idiv0
-       FUNC_START aeabi_ldiv0
-       RET
-       FUNC_END aeabi_ldiv0
-       FUNC_END aeabi_idiv0
-#else
-       FUNC_START div0
-       RET
-       FUNC_END div0
-#endif
-       
-#endif /* L_divmodsi_tools */
-/* ------------------------------------------------------------------------ */
-#ifdef L_dvmd_lnx
-@ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
-
-/* Constant taken from <asm/signal.h>.  */
-#define SIGFPE 8
-
-#ifdef __ARM_EABI__
-       WEAK aeabi_idiv0
-       WEAK aeabi_ldiv0
-       ARM_FUNC_START aeabi_idiv0
-       ARM_FUNC_START aeabi_ldiv0
-#else
-       ARM_FUNC_START div0
-#endif
-
-       do_push {r1, lr}
-       mov     r0, #SIGFPE
-       bl      SYM(raise) __PLT__
-       RETLDM  r1
-
-#ifdef __ARM_EABI__
-       FUNC_END aeabi_ldiv0
-       FUNC_END aeabi_idiv0
-#else
-       FUNC_END div0
-#endif
-       
-#endif /* L_dvmd_lnx */
-#ifdef L_clear_cache
-#if defined __ARM_EABI__ && defined __linux__
-@ EABI GNU/Linux call to cacheflush syscall.
-       ARM_FUNC_START clear_cache
-       do_push {r7}
-#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__)
-       movw    r7, #2
-       movt    r7, #0xf
-#else
-       mov     r7, #0xf0000
-       add     r7, r7, #2
-#endif
-       mov     r2, #0
-       swi     0
-       do_pop  {r7}
-       RET
-       FUNC_END clear_cache
-#else
-#error "This is only for ARM EABI GNU/Linux"
-#endif
-#endif /* L_clear_cache */
-/* ------------------------------------------------------------------------ */
-/* Dword shift operations.  */
-/* All the following Dword shift variants rely on the fact that
-       shft xxx, Reg
-   is in fact done as
-       shft xxx, (Reg & 255)
-   so for Reg value in (32...63) and (-1...-31) we will get zero (in the
-   case of logical shifts) or the sign (for asr).  */
-
-#ifdef __ARMEB__
-#define al     r1
-#define ah     r0
-#else
-#define al     r0
-#define ah     r1
-#endif
-
-/* Prevent __aeabi double-word shifts from being produced on SymbianOS.  */
-#ifndef __symbian__
-
-#ifdef L_lshrdi3
-
-       FUNC_START lshrdi3
-       FUNC_ALIAS aeabi_llsr lshrdi3
-       
-#ifdef __thumb__
-       lsr     al, r2
-       mov     r3, ah
-       lsr     ah, r2
-       mov     ip, r3
-       sub     r2, #32
-       lsr     r3, r2
-       orr     al, r3
-       neg     r2, r2
-       mov     r3, ip
-       lsl     r3, r2
-       orr     al, r3
-       RET
-#else
-       subs    r3, r2, #32
-       rsb     ip, r2, #32
-       movmi   al, al, lsr r2
-       movpl   al, ah, lsr r3
-       orrmi   al, al, ah, lsl ip
-       mov     ah, ah, lsr r2
-       RET
-#endif
-       FUNC_END aeabi_llsr
-       FUNC_END lshrdi3
-
-#endif
-       
-#ifdef L_ashrdi3
-       
-       FUNC_START ashrdi3
-       FUNC_ALIAS aeabi_lasr ashrdi3
-       
-#ifdef __thumb__
-       lsr     al, r2
-       mov     r3, ah
-       asr     ah, r2
-       sub     r2, #32
-       @ If r2 is negative at this point the following step would OR
-       @ the sign bit into all of AL.  That's not what we want...
-       bmi     1f
-       mov     ip, r3
-       asr     r3, r2
-       orr     al, r3
-       mov     r3, ip
-1:
-       neg     r2, r2
-       lsl     r3, r2
-       orr     al, r3
-       RET
-#else
-       subs    r3, r2, #32
-       rsb     ip, r2, #32
-       movmi   al, al, lsr r2
-       movpl   al, ah, asr r3
-       orrmi   al, al, ah, lsl ip
-       mov     ah, ah, asr r2
-       RET
-#endif
-
-       FUNC_END aeabi_lasr
-       FUNC_END ashrdi3
-
-#endif
-
-#ifdef L_ashldi3
-
-       FUNC_START ashldi3
-       FUNC_ALIAS aeabi_llsl ashldi3
-       
-#ifdef __thumb__
-       lsl     ah, r2
-       mov     r3, al
-       lsl     al, r2
-       mov     ip, r3
-       sub     r2, #32
-       lsl     r3, r2
-       orr     ah, r3
-       neg     r2, r2
-       mov     r3, ip
-       lsr     r3, r2
-       orr     ah, r3
-       RET
-#else
-       subs    r3, r2, #32
-       rsb     ip, r2, #32
-       movmi   ah, ah, lsl r2
-       movpl   ah, al, lsl r3
-       orrmi   ah, ah, al, lsr ip
-       mov     al, al, lsl r2
-       RET
-#endif
-       FUNC_END aeabi_llsl
-       FUNC_END ashldi3
-
-#endif
-
-#endif /* __symbian__ */
-
-#if ((__ARM_ARCH__ > 5) && !defined(__ARM_ARCH_6M__)) \
-    || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
-    || defined(__ARM_ARCH_5TEJ__)
-#define HAVE_ARM_CLZ 1
-#endif
-
-#ifdef L_clzsi2
-#if defined(__ARM_ARCH_6M__)
-FUNC_START clzsi2
-       mov     r1, #28
-       mov     r3, #1
-       lsl     r3, r3, #16
-       cmp     r0, r3 /* 0x10000 */
-       bcc     2f
-       lsr     r0, r0, #16
-       sub     r1, r1, #16
-2:     lsr     r3, r3, #8
-       cmp     r0, r3 /* #0x100 */
-       bcc     2f
-       lsr     r0, r0, #8
-       sub     r1, r1, #8
-2:     lsr     r3, r3, #4
-       cmp     r0, r3 /* #0x10 */
-       bcc     2f
-       lsr     r0, r0, #4
-       sub     r1, r1, #4
-2:     adr     r2, 1f
-       ldrb    r0, [r2, r0]
-       add     r0, r0, r1
-       bx lr
-.align 2
-1:
-.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
-       FUNC_END clzsi2
-#else
-ARM_FUNC_START clzsi2
-# if defined(HAVE_ARM_CLZ)
-       clz     r0, r0
-       RET
-# else
-       mov     r1, #28
-       cmp     r0, #0x10000
-       do_it   cs, t
-       movcs   r0, r0, lsr #16
-       subcs   r1, r1, #16
-       cmp     r0, #0x100
-       do_it   cs, t
-       movcs   r0, r0, lsr #8
-       subcs   r1, r1, #8
-       cmp     r0, #0x10
-       do_it   cs, t
-       movcs   r0, r0, lsr #4
-       subcs   r1, r1, #4
-       adr     r2, 1f
-       ldrb    r0, [r2, r0]
-       add     r0, r0, r1
-       RET
-.align 2
-1:
-.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
-# endif /* !HAVE_ARM_CLZ */
-       FUNC_END clzsi2
-#endif
-#endif /* L_clzsi2 */
-
-#ifdef L_clzdi2
-#if !defined(HAVE_ARM_CLZ)
-
-# if defined(__ARM_ARCH_6M__)
-FUNC_START clzdi2
-       push    {r4, lr}
-# else
-ARM_FUNC_START clzdi2
-       do_push {r4, lr}
-# endif
-       cmp     xxh, #0
-       bne     1f
-# ifdef __ARMEB__
-       mov     r0, xxl
-       bl      __clzsi2
-       add     r0, r0, #32
-       b 2f
-1:
-       bl      __clzsi2
-# else
-       bl      __clzsi2
-       add     r0, r0, #32
-       b 2f
-1:
-       mov     r0, xxh
-       bl      __clzsi2
-# endif
-2:
-# if defined(__ARM_ARCH_6M__)
-       pop     {r4, pc}
-# else
-       RETLDM  r4
-# endif
-       FUNC_END clzdi2
-
-#else /* HAVE_ARM_CLZ */
-
-ARM_FUNC_START clzdi2
-       cmp     xxh, #0
-       do_it   eq, et
-       clzeq   r0, xxl
-       clzne   r0, xxh
-       addeq   r0, r0, #32
-       RET
-       FUNC_END clzdi2
-
-#endif
-#endif /* L_clzdi2 */
-
-/* ------------------------------------------------------------------------ */
-/* These next two sections are here despite the fact that they contain Thumb 
-   assembler because their presence allows interworked code to be linked even
-   when the GCC library is this one.  */
-               
-/* Do not build the interworking functions when the target architecture does 
-   not support Thumb instructions.  (This can be a multilib option).  */
-#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
-      || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
-      || __ARM_ARCH__ >= 6
-
-#if defined L_call_via_rX
-
-/* These labels & instructions are used by the Arm/Thumb interworking code. 
-   The address of function to be called is loaded into a register and then 
-   one of these labels is called via a BL instruction.  This puts the 
-   return address into the link register with the bottom bit set, and the 
-   code here switches to the correct mode before executing the function.  */
-       
-       .text
-       .align 0
-        .force_thumb
-
-.macro call_via register
-       THUMB_FUNC_START _call_via_\register
-
-       bx      \register
-       nop
-
-       SIZE    (_call_via_\register)
-.endm
-
-       call_via r0
-       call_via r1
-       call_via r2
-       call_via r3
-       call_via r4
-       call_via r5
-       call_via r6
-       call_via r7
-       call_via r8
-       call_via r9
-       call_via sl
-       call_via fp
-       call_via ip
-       call_via sp
-       call_via lr
-
-#endif /* L_call_via_rX */
-
-/* Don't bother with the old interworking routines for Thumb-2.  */
-/* ??? Maybe only omit these on "m" variants.  */
-#if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__)
-
-#if defined L_interwork_call_via_rX
-
-/* These labels & instructions are used by the Arm/Thumb interworking code,
-   when the target address is in an unknown instruction set.  The address 
-   of function to be called is loaded into a register and then one of these
-   labels is called via a BL instruction.  This puts the return address 
-   into the link register with the bottom bit set, and the code here 
-   switches to the correct mode before executing the function.  Unfortunately
-   the target code cannot be relied upon to return via a BX instruction, so
-   instead we have to store the resturn address on the stack and allow the
-   called function to return here instead.  Upon return we recover the real
-   return address and use a BX to get back to Thumb mode.
-
-   There are three variations of this code.  The first,
-   _interwork_call_via_rN(), will push the return address onto the
-   stack and pop it in _arm_return().  It should only be used if all
-   arguments are passed in registers.
-
-   The second, _interwork_r7_call_via_rN(), instead stores the return
-   address at [r7, #-4].  It is the caller's responsibility to ensure
-   that this address is valid and contains no useful data.
-
-   The third, _interwork_r11_call_via_rN(), works in the same way but
-   uses r11 instead of r7.  It is useful if the caller does not really
-   need a frame pointer.  */
-       
-       .text
-       .align 0
-
-       .code   32
-       .globl _arm_return
-LSYM(Lstart_arm_return):
-       cfi_start       LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
-       cfi_push        0, 0xe, -0x8, 0x8
-       nop     @ This nop is for the benefit of debuggers, so that
-               @ backtraces will use the correct unwind information.
-_arm_return:
-       RETLDM  unwind=LSYM(Lstart_arm_return)
-       cfi_end LSYM(Lend_arm_return)
-
-       .globl _arm_return_r7
-_arm_return_r7:
-       ldr     lr, [r7, #-4]
-       bx      lr
-
-       .globl _arm_return_r11
-_arm_return_r11:
-       ldr     lr, [r11, #-4]
-       bx      lr
-
-.macro interwork_with_frame frame, register, name, return
-       .code   16
-
-       THUMB_FUNC_START \name
-
-       bx      pc
-       nop
-
-       .code   32
-       tst     \register, #1
-       streq   lr, [\frame, #-4]
-       adreq   lr, _arm_return_\frame
-       bx      \register
-
-       SIZE    (\name)
-.endm
-
-.macro interwork register
-       .code   16
-
-       THUMB_FUNC_START _interwork_call_via_\register
-
-       bx      pc
-       nop
-
-       .code   32
-       .globl LSYM(Lchange_\register)
-LSYM(Lchange_\register):
-       tst     \register, #1
-       streq   lr, [sp, #-8]!
-       adreq   lr, _arm_return
-       bx      \register
-
-       SIZE    (_interwork_call_via_\register)
-
-       interwork_with_frame r7,\register,_interwork_r7_call_via_\register
-       interwork_with_frame r11,\register,_interwork_r11_call_via_\register
-.endm
-       
-       interwork r0
-       interwork r1
-       interwork r2
-       interwork r3
-       interwork r4
-       interwork r5
-       interwork r6
-       interwork r7
-       interwork r8
-       interwork r9
-       interwork sl
-       interwork fp
-       interwork ip
-       interwork sp
-       
-       /* The LR case has to be handled a little differently...  */
-       .code 16
-
-       THUMB_FUNC_START _interwork_call_via_lr
-
-       bx      pc
-       nop
-       
-       .code 32
-       .globl .Lchange_lr
-.Lchange_lr:
-       tst     lr, #1
-       stmeqdb r13!, {lr, pc}
-       mov     ip, lr
-       adreq   lr, _arm_return
-       bx      ip
-       
-       SIZE    (_interwork_call_via_lr)
-       
-#endif /* L_interwork_call_via_rX */
-#endif /* !__thumb2__ */
-
-/* Functions to support compact pic switch tables in thumb1 state.
-   All these routines take an index into the table in r0.  The
-   table is at LR & ~1 (but this must be rounded up in the case
-   of 32-bit entires).  They are only permitted to clobber r12
-   and r14 and r0 must be preserved on exit.  */
-#ifdef L_thumb1_case_sqi
-       
-       .text
-       .align 0
-        .force_thumb
-       .syntax unified
-       THUMB_FUNC_START __gnu_thumb1_case_sqi
-       push    {r1}
-       mov     r1, lr
-       lsrs    r1, r1, #1
-       lsls    r1, r1, #1
-       ldrsb   r1, [r1, r0]
-       lsls    r1, r1, #1
-       add     lr, lr, r1
-       pop     {r1}
-       bx      lr
-       SIZE (__gnu_thumb1_case_sqi)
-#endif
-
-#ifdef L_thumb1_case_uqi
-       
-       .text
-       .align 0
-        .force_thumb
-       .syntax unified
-       THUMB_FUNC_START __gnu_thumb1_case_uqi
-       push    {r1}
-       mov     r1, lr
-       lsrs    r1, r1, #1
-       lsls    r1, r1, #1
-       ldrb    r1, [r1, r0]
-       lsls    r1, r1, #1
-       add     lr, lr, r1
-       pop     {r1}
-       bx      lr
-       SIZE (__gnu_thumb1_case_uqi)
-#endif
-
-#ifdef L_thumb1_case_shi
-       
-       .text
-       .align 0
-        .force_thumb
-       .syntax unified
-       THUMB_FUNC_START __gnu_thumb1_case_shi
-       push    {r0, r1}
-       mov     r1, lr
-       lsrs    r1, r1, #1
-       lsls    r0, r0, #1
-       lsls    r1, r1, #1
-       ldrsh   r1, [r1, r0]
-       lsls    r1, r1, #1
-       add     lr, lr, r1
-       pop     {r0, r1}
-       bx      lr
-       SIZE (__gnu_thumb1_case_shi)
-#endif
-
-#ifdef L_thumb1_case_uhi
-       
-       .text
-       .align 0
-        .force_thumb
-       .syntax unified
-       THUMB_FUNC_START __gnu_thumb1_case_uhi
-       push    {r0, r1}
-       mov     r1, lr
-       lsrs    r1, r1, #1
-       lsls    r0, r0, #1
-       lsls    r1, r1, #1
-       ldrh    r1, [r1, r0]
-       lsls    r1, r1, #1
-       add     lr, lr, r1
-       pop     {r0, r1}
-       bx      lr
-       SIZE (__gnu_thumb1_case_uhi)
-#endif
-
-#ifdef L_thumb1_case_si
-       
-       .text
-       .align 0
-        .force_thumb
-       .syntax unified
-       THUMB_FUNC_START __gnu_thumb1_case_si
-       push    {r0, r1}
-       mov     r1, lr
-       adds.n  r1, r1, #2      /* Align to word.  */
-       lsrs    r1, r1, #2
-       lsls    r0, r0, #2
-       lsls    r1, r1, #2
-       ldr     r0, [r1, r0]
-       adds    r0, r0, r1
-       mov     lr, r0
-       pop     {r0, r1}
-       mov     pc, lr          /* We know we were called from thumb code.  */
-       SIZE (__gnu_thumb1_case_si)
-#endif
-
-#endif /* Arch supports thumb.  */
-
-#ifndef __symbian__
-#ifndef __ARM_ARCH_6M__
-#include "ieee754-df.S"
-#include "ieee754-sf.S"
-#include "bpabi.S"
-#else /* __ARM_ARCH_6M__ */
-#include "bpabi-v6m.S"
-#endif /* __ARM_ARCH_6M__ */
-#endif /* !__symbian__ */
diff --git a/gcc/config/arm/linux-eabi.h b/gcc/config/arm/linux-eabi.h

index a38309559481382806c66f20e64a7de0bb53aaab..80bd82593757d215aa59fa349c190e566e73bf79 100644 (file)
--- a/gcc/config/arm/linux-eabi.h
+++ b/gcc/config/arm/linux-eabi.h
@@ -97,7 +97,7 @@
  #undef LIBGCC_SPEC
  
  /* Clear the instruction cache from `beg' to `end'.  This is
-   implemented in lib1funcs.asm, so ensure an error if this definition
+   implemented in lib1funcs.S, so ensure an error if this definition
     is used.  */
  #undef  CLEAR_INSN_CACHE
  #define CLEAR_INSN_CACHE(BEG, END) not_used
diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm

index b970ec26a356a979457cd6ec0afaf84b42013fd6..a9a174d473db0d8a92770e787adcaa50705c04ed 100644 (file)
--- a/gcc/config/arm/t-arm
+++ b/gcc/config/arm/t-arm
@@ -40,9 +40,6 @@ MD_INCLUDES=  $(srcdir)/config/arm/arm-tune.md \
                 $(srcdir)/config/arm/thumb2.md \
                 $(srcdir)/config/arm/arm-fixed.md
  
-LIB1ASMSRC = arm/lib1funcs.asm
-LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi _thumb1_case_shi \
-       _thumb1_case_uhi _thumb1_case_si
  s-config s-conditions s-flags s-codes s-constants s-emit s-recog s-preds \
         s-opinit s-extract s-peep s-attr s-attrtab s-output: $(MD_INCLUDES)
  
diff --git a/gcc/config/arm/t-arm-elf b/gcc/config/arm/t-arm-elf

index bfcf6ffd9394e9bf097793ad6ac95b0ad5c4e875..a605d26244f99b4236c66c502c3ae9c1f08a9ec3 100644 (file)
--- a/gcc/config/arm/t-arm-elf
+++ b/gcc/config/arm/t-arm-elf
@@ -17,20 +17,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-# For most CPUs we have an assembly soft-float implementations.
-# However this is not true for ARMv6M.  Here we want to use the soft-fp C
-# implementation.  The soft-fp code is only build for ARMv6M.  This pulls
-# in the asm implementation for other CPUs.
-LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func \
-       _call_via_rX _interwork_call_via_rX \
-       _lshrdi3 _ashrdi3 _ashldi3 \
-       _arm_negdf2 _arm_addsubdf3 _arm_muldivdf3 _arm_cmpdf2 _arm_unorddf2 \
-       _arm_fixdfsi _arm_fixunsdfsi \
-       _arm_truncdfsf2 _arm_negsf2 _arm_addsubsf3 _arm_muldivsf3 \
-       _arm_cmpsf2 _arm_unordsf2 _arm_fixsfsi _arm_fixunssfsi \
-       _arm_floatdidf _arm_floatdisf _arm_floatundidf _arm_floatundisf \
-       _clzsi2 _clzdi2 
-
  MULTILIB_OPTIONS     = marm/mthumb
  MULTILIB_DIRNAMES    = arm thumb
  MULTILIB_EXCEPTIONS  = 
diff --git a/gcc/config/arm/t-bpabi b/gcc/config/arm/t-bpabi

index 047525682fcb0dcc022b603e90850fc6b457c923..c9d5ed4d674fbc45df88e5f8a3504a4ffe2cf69e 100644 (file)
--- a/gcc/config/arm/t-bpabi
+++ b/gcc/config/arm/t-bpabi
@@ -16,9 +16,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-# Add the bpabi.S functions.
-LIB1ASMFUNCS += _aeabi_lcmp _aeabi_ulcmp _aeabi_ldivmod _aeabi_uldivmod
-
  # Add the BPABI C functions.
  LIB2FUNCS_EXTRA = $(srcdir)/config/arm/bpabi.c \
                   $(srcdir)/config/arm/unaligned-funcs.c
diff --git a/gcc/config/arm/t-linux b/gcc/config/arm/t-linux

index a0c5110f0c0afab0480d653ee9e8d03ec540df49..a204834014ebf0de0bdd9c0ffbe72f46f360a2b8 100644 (file)
--- a/gcc/config/arm/t-linux
+++ b/gcc/config/arm/t-linux
@@ -21,10 +21,6 @@
  # difference.
  TARGET_LIBGCC2_CFLAGS = -fomit-frame-pointer -fPIC
  
-LIB1ASMSRC = arm/lib1funcs.asm
-LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx _clzsi2 _clzdi2 \
-       _arm_addsubdf3 _arm_addsubsf3
-
  # MULTILIB_OPTIONS = mfloat-abi=hard/mfloat-abi=soft
  # MULTILIB_DIRNAMES = hard-float soft-float
  
diff --git a/gcc/config/arm/t-linux-eabi b/gcc/config/arm/t-linux-eabi

index fed979e980b942cd24aef208b94259666e64250f..3030229fafa06f7bb8fa89fef0b12da64e326196 100644 (file)
--- a/gcc/config/arm/t-linux-eabi
+++ b/gcc/config/arm/t-linux-eabi
@@ -28,8 +28,5 @@ MULTILIB_DIRNAMES     =
  #MULTILIB_DIRNAMES    += fa606te fa626te fmp626 fa726te
  #MULTILIB_EXCEPTIONS  += *mthumb/*mcpu=fa606te *mthumb/*mcpu=fa626te *mthumb/*mcpu=fmp626 *mthumb/*mcpu=fa726te*
  
-# Use a version of div0 which raises SIGFPE, and a special __clear_cache.
-LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx _clear_cache
-
  LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic.c
  LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic-64bit.c
diff --git a/gcc/config/arm/t-strongarm-elf b/gcc/config/arm/t-strongarm-elf

index 95680031e54a742842fa7508ec0c2f972dad124e..4d51e660c8bf790370cf41a0653fa5fcb23e0d29 100644 (file)
--- a/gcc/config/arm/t-strongarm-elf
+++ b/gcc/config/arm/t-strongarm-elf
@@ -17,8 +17,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _clzsi2 _clzdi2
-
  MULTILIB_OPTIONS     = mlittle-endian/mbig-endian mfloat-abi=hard/mfloat-abi=soft
  MULTILIB_DIRNAMES    = le be fpu soft
  MULTILIB_EXCEPTIONS  =
diff --git a/gcc/config/arm/t-symbian b/gcc/config/arm/t-symbian

index cf716147849b391db176b06d8b1ccec4ad5cacf8..736a01d10f48fc4bed036c203740d09e1980756c 100644 (file)
--- a/gcc/config/arm/t-symbian
+++ b/gcc/config/arm/t-symbian
@@ -16,20 +16,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-LIB1ASMFUNCS += _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
-
-# These functions have __aeabi equivalents and will never be called by GCC.  
-# By putting them in LIB1ASMFUNCS, we avoid the standard libgcc2.c code being
-# used -- and we make sure that definitions are not available in lib1funcs.asm,
-# either, so they end up undefined.
-LIB1ASMFUNCS += \
-       _ashldi3 _ashrdi3 _divdi3 _floatdidf _udivmoddi4 _umoddi3 \
-       _udivdi3 _lshrdi3 _moddi3 _muldi3 _negdi2 _cmpdi2 \
-       _fixdfdi _fixsfdi _fixunsdfdi _fixunssfdi _floatdisf \
-       _negdf2 _addsubdf3 _muldivdf3 _cmpdf2 _unorddf2 _fixdfsi _fixunsdfsi \
-       _truncdfsf2 _negsf2 _addsubsf3 _muldivsf3 _cmpsf2 _unordsf2 \
-       _fixsfsi _fixunssfsi
-
  EXTRA_HEADERS += $(srcdir)/ginclude/unwind-arm-common.h
  # Include half-float helpers.
  LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/arm/fp16.c
diff --git a/gcc/config/arm/t-vxworks b/gcc/config/arm/t-vxworks

index 8ac0d9bcec585c3bcfb302cf81c0608f4d15ddad..0900ffe15ed563508a17d2bd239697b1e139efa8 100644 (file)
--- a/gcc/config/arm/t-vxworks
+++ b/gcc/config/arm/t-vxworks
@@ -16,8 +16,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
-
  MULTILIB_OPTIONS = \
    mrtp fPIC \
    t4/t4be/t4t/t4tbe/t5/t5be/t5t/t5tbe/tstrongarm/txscale/txscalebe
diff --git a/gcc/config/arm/t-wince-pe b/gcc/config/arm/t-wince-pe

index 9ce1f313140a586d409a35765dacc07e53d44118..8a8c65fd396c2941d5a654188149bfa76c26d29c 100644 (file)
--- a/gcc/config/arm/t-wince-pe
+++ b/gcc/config/arm/t-wince-pe
@@ -17,8 +17,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
-
  pe.o: $(srcdir)/config/arm/pe.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
    $(RTL_H) output.h flags.h $(TREE_H) expr.h $(TM_P_H)
         $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
diff --git a/gcc/config/avr/libgcc.S b/gcc/config/avr/libgcc.S

deleted file mode 100644 (file)

index 8c369c9..0000000
--- a/gcc/config/avr/libgcc.S
+++ /dev/null
@@ -1,1533 +0,0 @@
-/*  -*- Mode: Asm -*-  */
-/* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009
-   Free Software Foundation, Inc.
-   Contributed by Denis Chertykov <chertykov@gmail.com>
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-#define __zero_reg__ r1
-#define __tmp_reg__ r0
-#define __SREG__ 0x3f
-#define __SP_H__ 0x3e
-#define __SP_L__ 0x3d
-#define __RAMPZ__ 0x3B
-#define __EIND__  0x3C
-
-/* Most of the functions here are called directly from avr.md
-   patterns, instead of using the standard libcall mechanisms.
-   This can make better code because GCC knows exactly which
-   of the call-used registers (not all of them) are clobbered.  */
-
-/* FIXME:  At present, there is no SORT directive in the linker
-           script so that we must not assume that different modules
-           in the same input section like .libgcc.text.mul will be
-           located close together.  Therefore, we cannot use
-           RCALL/RJMP to call a function like __udivmodhi4 from
-           __divmodhi4 and have to use lengthy XCALL/XJMP even
-           though they are in the same input section and all same
-           input sections together are small enough to reach every
-           location with a RCALL/RJMP instruction.  */
-
-       .macro  mov_l  r_dest, r_src
-#if defined (__AVR_HAVE_MOVW__)
-       movw    \r_dest, \r_src
-#else
-       mov     \r_dest, \r_src
-#endif
-       .endm
-
-       .macro  mov_h  r_dest, r_src
-#if defined (__AVR_HAVE_MOVW__)
-       ; empty
-#else
-       mov     \r_dest, \r_src
-#endif
-       .endm
-
-#if defined (__AVR_HAVE_JMP_CALL__)
-#define XCALL call
-#define XJMP  jmp
-#else
-#define XCALL rcall
-#define XJMP  rjmp
-#endif
-
-.macro DEFUN name
-.global \name
-.func \name
-\name:
-.endm
-
-.macro ENDF name
-.size \name, .-\name
-.endfunc
-.endm
-
-\f
-.section .text.libgcc.mul, "ax", @progbits
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-/* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
-#if !defined (__AVR_HAVE_MUL__)
-/*******************************************************
-    Multiplication  8 x 8  without MUL
-*******************************************************/
-#if defined (L_mulqi3)
-
-#define        r_arg2  r22             /* multiplicand */
-#define        r_arg1  r24             /* multiplier */
-#define r_res  __tmp_reg__     /* result */
-
-DEFUN __mulqi3
-       clr     r_res           ; clear result
-__mulqi3_loop:
-       sbrc    r_arg1,0
-       add     r_res,r_arg2
-       add     r_arg2,r_arg2   ; shift multiplicand
-       breq    __mulqi3_exit   ; while multiplicand != 0
-       lsr     r_arg1          ; 
-       brne    __mulqi3_loop   ; exit if multiplier = 0
-__mulqi3_exit: 
-       mov     r_arg1,r_res    ; result to return register
-       ret
-ENDF __mulqi3
-
-#undef r_arg2  
-#undef r_arg1  
-#undef r_res   
-       
-#endif         /* defined (L_mulqi3) */
-
-#if defined (L_mulqihi3)
-DEFUN __mulqihi3
-       clr     r25
-       sbrc    r24, 7
-       dec     r25
-       clr     r23
-       sbrc    r22, 7
-       dec     r22
-       XJMP    __mulhi3
-ENDF __mulqihi3:
-#endif /* defined (L_mulqihi3) */
-
-#if defined (L_umulqihi3)
-DEFUN __umulqihi3
-       clr     r25
-       clr     r23
-       XJMP    __mulhi3
-ENDF __umulqihi3
-#endif /* defined (L_umulqihi3) */
-
-/*******************************************************
-    Multiplication  16 x 16  without MUL
-*******************************************************/
-#if defined (L_mulhi3)
-#define        r_arg1L r24             /* multiplier Low */
-#define        r_arg1H r25             /* multiplier High */
-#define        r_arg2L r22             /* multiplicand Low */
-#define        r_arg2H r23             /* multiplicand High */
-#define r_resL __tmp_reg__     /* result Low */
-#define r_resH  r21            /* result High */
-
-DEFUN __mulhi3
-       clr     r_resH          ; clear result
-       clr     r_resL          ; clear result
-__mulhi3_loop:
-       sbrs    r_arg1L,0
-       rjmp    __mulhi3_skip1
-       add     r_resL,r_arg2L  ; result + multiplicand
-       adc     r_resH,r_arg2H
-__mulhi3_skip1:        
-       add     r_arg2L,r_arg2L ; shift multiplicand
-       adc     r_arg2H,r_arg2H
-
-       cp      r_arg2L,__zero_reg__
-       cpc     r_arg2H,__zero_reg__
-       breq    __mulhi3_exit   ; while multiplicand != 0
-
-       lsr     r_arg1H         ; gets LSB of multiplier
-       ror     r_arg1L
-       sbiw    r_arg1L,0
-       brne    __mulhi3_loop   ; exit if multiplier = 0
-__mulhi3_exit:
-       mov     r_arg1H,r_resH  ; result to return register
-       mov     r_arg1L,r_resL
-       ret
-ENDF __mulhi3
-
-#undef r_arg1L
-#undef r_arg1H
-#undef r_arg2L
-#undef r_arg2H
-#undef r_resL  
-#undef r_resH 
-
-#endif /* defined (L_mulhi3) */
-
-/*******************************************************
-    Widening Multiplication  32 = 16 x 16  without MUL
-*******************************************************/
-
-#if defined (L_mulhisi3)
-DEFUN __mulhisi3
-;;; FIXME: This is dead code (noone calls it)
-    mov_l   r18, r24
-    mov_h   r19, r25
-    clr     r24
-    sbrc    r23, 7
-    dec     r24
-    mov     r25, r24
-    clr     r20
-    sbrc    r19, 7
-    dec     r20
-    mov     r21, r20
-    XJMP    __mulsi3
-ENDF __mulhisi3
-#endif /* defined (L_mulhisi3) */
-
-#if defined (L_umulhisi3)
-DEFUN __umulhisi3
-;;; FIXME: This is dead code (noone calls it)
-    mov_l   r18, r24
-    mov_h   r19, r25
-    clr     r24
-    clr     r25
-    mov_l   r20, r24
-    mov_h   r21, r25
-    XJMP    __mulsi3
-ENDF __umulhisi3
-#endif /* defined (L_umulhisi3) */
-
-#if defined (L_mulsi3)
-/*******************************************************
-    Multiplication  32 x 32  without MUL
-*******************************************************/
-#define r_arg1L  r22           /* multiplier Low */
-#define r_arg1H  r23
-#define        r_arg1HL r24
-#define        r_arg1HH r25            /* multiplier High */
-
-#define        r_arg2L  r18            /* multiplicand Low */
-#define        r_arg2H  r19    
-#define        r_arg2HL r20
-#define        r_arg2HH r21            /* multiplicand High */
-       
-#define r_resL  r26            /* result Low */
-#define r_resH   r27
-#define r_resHL         r30
-#define r_resHH  r31           /* result High */
-
-DEFUN __mulsi3
-       clr     r_resHH         ; clear result
-       clr     r_resHL         ; clear result
-       clr     r_resH          ; clear result
-       clr     r_resL          ; clear result
-__mulsi3_loop:
-       sbrs    r_arg1L,0
-       rjmp    __mulsi3_skip1
-       add     r_resL,r_arg2L          ; result + multiplicand
-       adc     r_resH,r_arg2H
-       adc     r_resHL,r_arg2HL
-       adc     r_resHH,r_arg2HH
-__mulsi3_skip1:
-       add     r_arg2L,r_arg2L         ; shift multiplicand
-       adc     r_arg2H,r_arg2H
-       adc     r_arg2HL,r_arg2HL
-       adc     r_arg2HH,r_arg2HH
-       
-       lsr     r_arg1HH        ; gets LSB of multiplier
-       ror     r_arg1HL
-       ror     r_arg1H
-       ror     r_arg1L
-       brne    __mulsi3_loop
-       sbiw    r_arg1HL,0
-       cpc     r_arg1H,r_arg1L
-       brne    __mulsi3_loop           ; exit if multiplier = 0
-__mulsi3_exit:
-       mov_h   r_arg1HH,r_resHH        ; result to return register
-       mov_l   r_arg1HL,r_resHL
-       mov_h   r_arg1H,r_resH
-       mov_l   r_arg1L,r_resL
-       ret
-ENDF __mulsi3
-
-#undef r_arg1L 
-#undef r_arg1H 
-#undef r_arg1HL
-#undef r_arg1HH
-             
-#undef r_arg2L 
-#undef r_arg2H 
-#undef r_arg2HL
-#undef r_arg2HH
-             
-#undef r_resL  
-#undef r_resH  
-#undef r_resHL 
-#undef r_resHH 
-
-#endif /* defined (L_mulsi3) */
-
-#endif /* !defined (__AVR_HAVE_MUL__) */
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-\f
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-#if defined (__AVR_HAVE_MUL__)    
-#define A0 26
-#define B0 18
-#define C0 22
-
-#define A1 A0+1
-
-#define B1 B0+1
-#define B2 B0+2
-#define B3 B0+3
-
-#define C1 C0+1
-#define C2 C0+2
-#define C3 C0+3
-
-/*******************************************************
-    Widening Multiplication  32 = 16 x 16
-*******************************************************/
-                              
-#if defined (L_mulhisi3)
-;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
-;;; C3:C0   = (signed long) A1:A0   * (signed long) B1:B0
-;;; Clobbers: __tmp_reg__
-DEFUN __mulhisi3
-    XCALL   __umulhisi3
-    ;; Sign-extend B
-    tst     B1
-    brpl    1f
-    sub     C2, A0
-    sbc     C3, A1
-1:  ;; Sign-extend A
-    XJMP __usmulhisi3_tail
-ENDF __mulhisi3
-#endif /* L_mulhisi3 */
-
-#if defined (L_usmulhisi3)
-;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
-;;; C3:C0   = (signed long) A1:A0   * (unsigned long) B1:B0
-;;; Clobbers: __tmp_reg__
-DEFUN __usmulhisi3
-    XCALL   __umulhisi3
-    ;; FALLTHRU
-ENDF __usmulhisi3
-
-DEFUN __usmulhisi3_tail
-    ;; Sign-extend A
-    sbrs    A1, 7
-    ret
-    sub     C2, B0
-    sbc     C3, B1
-    ret
-ENDF __usmulhisi3_tail
-#endif /* L_usmulhisi3 */
-
-#if defined (L_umulhisi3)
-;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
-;;; C3:C0   = (unsigned long) A1:A0   * (unsigned long) B1:B0
-;;; Clobbers: __tmp_reg__
-DEFUN __umulhisi3
-    mul     A0, B0
-    movw    C0, r0
-    mul     A1, B1
-    movw    C2, r0
-    mul     A0, B1
-    rcall   1f
-    mul     A1, B0
-1:  add     C1, r0
-    adc     C2, r1
-    clr     __zero_reg__
-    adc     C3, __zero_reg__
-    ret
-ENDF __umulhisi3
-#endif /* L_umulhisi3 */
-
-/*******************************************************
-    Widening Multiplication  32 = 16 x 32
-*******************************************************/
-
-#if defined (L_mulshisi3)
-;;; R25:R22 = (signed long) R27:R26 * R21:R18
-;;; (C3:C0) = (signed long) A1:A0   * B3:B0
-;;; Clobbers: __tmp_reg__
-DEFUN __mulshisi3
-#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
-    ;; Some cores have problem skipping 2-word instruction
-    tst     A1
-    brmi    __mulohisi3
-#else
-    sbrs    A1, 7
-#endif /* __AVR_HAVE_JMP_CALL__ */
-    XJMP    __muluhisi3
-    ;; FALLTHRU
-ENDF __mulshisi3
-    
-;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
-;;; (C3:C0) = (one-extended long) A1:A0   * B3:B0
-;;; Clobbers: __tmp_reg__
-DEFUN __mulohisi3
-    XCALL   __muluhisi3
-    ;; One-extend R27:R26 (A1:A0)
-    sub     C2, B0
-    sbc     C3, B1
-    ret
-ENDF __mulohisi3
-#endif /* L_mulshisi3 */
-
-#if defined (L_muluhisi3)
-;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
-;;; (C3:C0) = (unsigned long) A1:A0   * B3:B0
-;;; Clobbers: __tmp_reg__
-DEFUN __muluhisi3
-    XCALL   __umulhisi3
-    mul     A0, B3
-    add     C3, r0
-    mul     A1, B2
-    add     C3, r0
-    mul     A0, B2
-    add     C2, r0
-    adc     C3, r1
-    clr     __zero_reg__
-    ret
-ENDF __muluhisi3
-#endif /* L_muluhisi3 */
-
-/*******************************************************
-    Multiplication  32 x 32
-*******************************************************/
-
-#if defined (L_mulsi3)
-;;; R25:R22 = R25:R22 * R21:R18
-;;; (C3:C0) = C3:C0   * B3:B0
-;;; Clobbers: R26, R27, __tmp_reg__
-DEFUN __mulsi3
-    movw    A0, C0
-    push    C2
-    push    C3
-    XCALL   __muluhisi3
-    pop     A1
-    pop     A0
-    ;; A1:A0 now contains the high word of A
-    mul     A0, B0
-    add     C2, r0
-    adc     C3, r1
-    mul     A0, B1
-    add     C3, r0
-    mul     A1, B0
-    add     C3, r0
-    clr     __zero_reg__
-    ret
-ENDF __mulsi3
-#endif /* L_mulsi3 */
-
-#undef A0
-#undef A1
-
-#undef B0
-#undef B1
-#undef B2
-#undef B3
-
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-
-#endif /* __AVR_HAVE_MUL__ */
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-       
-\f
-.section .text.libgcc.div, "ax", @progbits
-
-/*******************************************************
-       Division 8 / 8 => (result + remainder)
-*******************************************************/
-#define        r_rem   r25     /* remainder */
-#define        r_arg1  r24     /* dividend, quotient */
-#define        r_arg2  r22     /* divisor */
-#define        r_cnt   r23     /* loop count */
-
-#if defined (L_udivmodqi4)
-DEFUN __udivmodqi4
-       sub     r_rem,r_rem     ; clear remainder and carry
-       ldi     r_cnt,9         ; init loop counter
-       rjmp    __udivmodqi4_ep ; jump to entry point
-__udivmodqi4_loop:
-       rol     r_rem           ; shift dividend into remainder
-       cp      r_rem,r_arg2    ; compare remainder & divisor
-       brcs    __udivmodqi4_ep ; remainder <= divisor
-       sub     r_rem,r_arg2    ; restore remainder
-__udivmodqi4_ep:
-       rol     r_arg1          ; shift dividend (with CARRY)
-       dec     r_cnt           ; decrement loop counter
-       brne    __udivmodqi4_loop
-       com     r_arg1          ; complement result 
-                               ; because C flag was complemented in loop
-       ret
-ENDF __udivmodqi4
-#endif /* defined (L_udivmodqi4) */
-
-#if defined (L_divmodqi4)
-DEFUN __divmodqi4
-        bst     r_arg1,7       ; store sign of dividend
-        mov     __tmp_reg__,r_arg1
-        eor     __tmp_reg__,r_arg2; r0.7 is sign of result
-        sbrc   r_arg1,7
-       neg     r_arg1          ; dividend negative : negate
-        sbrc   r_arg2,7
-       neg     r_arg2          ; divisor negative : negate
-       XCALL   __udivmodqi4    ; do the unsigned div/mod
-       brtc    __divmodqi4_1
-       neg     r_rem           ; correct remainder sign
-__divmodqi4_1:
-       sbrc    __tmp_reg__,7
-       neg     r_arg1          ; correct result sign
-__divmodqi4_exit:
-       ret
-ENDF __divmodqi4
-#endif /* defined (L_divmodqi4) */
-
-#undef r_rem
-#undef r_arg1
-#undef r_arg2
-#undef r_cnt
-       
-               
-/*******************************************************
-       Division 16 / 16 => (result + remainder)
-*******************************************************/
-#define        r_remL  r26     /* remainder Low */
-#define        r_remH  r27     /* remainder High */
-
-/* return: remainder */
-#define        r_arg1L r24     /* dividend Low */
-#define        r_arg1H r25     /* dividend High */
-
-/* return: quotient */
-#define        r_arg2L r22     /* divisor Low */
-#define        r_arg2H r23     /* divisor High */
-       
-#define        r_cnt   r21     /* loop count */
-
-#if defined (L_udivmodhi4)
-DEFUN __udivmodhi4
-       sub     r_remL,r_remL
-       sub     r_remH,r_remH   ; clear remainder and carry
-       ldi     r_cnt,17        ; init loop counter
-       rjmp    __udivmodhi4_ep ; jump to entry point
-__udivmodhi4_loop:
-        rol    r_remL          ; shift dividend into remainder
-       rol     r_remH
-        cp     r_remL,r_arg2L  ; compare remainder & divisor
-       cpc     r_remH,r_arg2H
-        brcs   __udivmodhi4_ep ; remainder < divisor
-        sub    r_remL,r_arg2L  ; restore remainder
-        sbc    r_remH,r_arg2H
-__udivmodhi4_ep:
-        rol    r_arg1L         ; shift dividend (with CARRY)
-        rol    r_arg1H
-        dec    r_cnt           ; decrement loop counter
-        brne   __udivmodhi4_loop
-       com     r_arg1L
-       com     r_arg1H
-; div/mod results to return registers, as for the div() function
-       mov_l   r_arg2L, r_arg1L        ; quotient
-       mov_h   r_arg2H, r_arg1H
-       mov_l   r_arg1L, r_remL         ; remainder
-       mov_h   r_arg1H, r_remH
-       ret
-ENDF __udivmodhi4
-#endif /* defined (L_udivmodhi4) */
-
-#if defined (L_divmodhi4)
-DEFUN __divmodhi4
-       .global _div
-_div:
-        bst     r_arg1H,7      ; store sign of dividend
-        mov     __tmp_reg__,r_arg1H
-        eor     __tmp_reg__,r_arg2H   ; r0.7 is sign of result
-       rcall   __divmodhi4_neg1 ; dividend negative : negate
-       sbrc    r_arg2H,7
-       rcall   __divmodhi4_neg2 ; divisor negative : negate
-       XCALL   __udivmodhi4    ; do the unsigned div/mod
-       rcall   __divmodhi4_neg1 ; correct remainder sign
-       tst     __tmp_reg__
-       brpl    __divmodhi4_exit
-__divmodhi4_neg2:
-       com     r_arg2H
-       neg     r_arg2L         ; correct divisor/result sign
-       sbci    r_arg2H,0xff
-__divmodhi4_exit:
-       ret
-__divmodhi4_neg1:
-       brtc    __divmodhi4_exit
-       com     r_arg1H
-       neg     r_arg1L         ; correct dividend/remainder sign
-       sbci    r_arg1H,0xff
-       ret
-ENDF __divmodhi4
-#endif /* defined (L_divmodhi4) */
-
-#undef r_remH  
-#undef r_remL  
-             
-#undef r_arg1H 
-#undef r_arg1L 
-             
-#undef r_arg2H 
-#undef r_arg2L 
-               
-#undef r_cnt           
-       
-/*******************************************************
-       Division 32 / 32 => (result + remainder)
-*******************************************************/
-#define        r_remHH r31     /* remainder High */
-#define        r_remHL r30
-#define        r_remH  r27
-#define        r_remL  r26     /* remainder Low */
-
-/* return: remainder */
-#define        r_arg1HH r25    /* dividend High */
-#define        r_arg1HL r24
-#define        r_arg1H  r23
-#define        r_arg1L  r22    /* dividend Low */
-
-/* return: quotient */
-#define        r_arg2HH r21    /* divisor High */
-#define        r_arg2HL r20
-#define        r_arg2H  r19
-#define        r_arg2L  r18    /* divisor Low */
-       
-#define        r_cnt __zero_reg__  /* loop count (0 after the loop!) */
-
-#if defined (L_udivmodsi4)
-DEFUN __udivmodsi4
-       ldi     r_remL, 33      ; init loop counter
-       mov     r_cnt, r_remL
-       sub     r_remL,r_remL
-       sub     r_remH,r_remH   ; clear remainder and carry
-       mov_l   r_remHL, r_remL
-       mov_h   r_remHH, r_remH
-       rjmp    __udivmodsi4_ep ; jump to entry point
-__udivmodsi4_loop:
-        rol    r_remL          ; shift dividend into remainder
-       rol     r_remH
-       rol     r_remHL
-       rol     r_remHH
-        cp     r_remL,r_arg2L  ; compare remainder & divisor
-       cpc     r_remH,r_arg2H
-       cpc     r_remHL,r_arg2HL
-       cpc     r_remHH,r_arg2HH
-       brcs    __udivmodsi4_ep ; remainder <= divisor
-        sub    r_remL,r_arg2L  ; restore remainder
-        sbc    r_remH,r_arg2H
-        sbc    r_remHL,r_arg2HL
-        sbc    r_remHH,r_arg2HH
-__udivmodsi4_ep:
-        rol    r_arg1L         ; shift dividend (with CARRY)
-        rol    r_arg1H
-        rol    r_arg1HL
-        rol    r_arg1HH
-        dec    r_cnt           ; decrement loop counter
-        brne   __udivmodsi4_loop
-                               ; __zero_reg__ now restored (r_cnt == 0)
-       com     r_arg1L
-       com     r_arg1H
-       com     r_arg1HL
-       com     r_arg1HH
-; div/mod results to return registers, as for the ldiv() function
-       mov_l   r_arg2L,  r_arg1L       ; quotient
-       mov_h   r_arg2H,  r_arg1H
-       mov_l   r_arg2HL, r_arg1HL
-       mov_h   r_arg2HH, r_arg1HH
-       mov_l   r_arg1L,  r_remL        ; remainder
-       mov_h   r_arg1H,  r_remH
-       mov_l   r_arg1HL, r_remHL
-       mov_h   r_arg1HH, r_remHH
-       ret
-ENDF __udivmodsi4
-#endif /* defined (L_udivmodsi4) */
-
-#if defined (L_divmodsi4)
-DEFUN __divmodsi4
-        bst     r_arg1HH,7     ; store sign of dividend
-        mov     __tmp_reg__,r_arg1HH
-        eor     __tmp_reg__,r_arg2HH   ; r0.7 is sign of result
-       rcall   __divmodsi4_neg1 ; dividend negative : negate
-       sbrc    r_arg2HH,7
-       rcall   __divmodsi4_neg2 ; divisor negative : negate
-       XCALL   __udivmodsi4    ; do the unsigned div/mod
-       rcall   __divmodsi4_neg1 ; correct remainder sign
-       rol     __tmp_reg__
-       brcc    __divmodsi4_exit
-__divmodsi4_neg2:
-       com     r_arg2HH
-       com     r_arg2HL
-       com     r_arg2H
-       neg     r_arg2L         ; correct divisor/quotient sign
-       sbci    r_arg2H,0xff
-       sbci    r_arg2HL,0xff
-       sbci    r_arg2HH,0xff
-__divmodsi4_exit:
-       ret
-__divmodsi4_neg1:
-       brtc    __divmodsi4_exit
-       com     r_arg1HH
-       com     r_arg1HL
-       com     r_arg1H
-       neg     r_arg1L         ; correct dividend/remainder sign
-       sbci    r_arg1H, 0xff
-       sbci    r_arg1HL,0xff
-       sbci    r_arg1HH,0xff
-       ret
-ENDF __divmodsi4
-#endif /* defined (L_divmodsi4) */
-
-\f
-.section .text.libgcc.prologue, "ax", @progbits
-    
-/**********************************
- * This is a prologue subroutine
- **********************************/
-#if defined (L_prologue)
-
-DEFUN __prologue_saves__
-       push r2
-       push r3
-       push r4
-       push r5
-       push r6
-       push r7
-       push r8
-       push r9
-       push r10
-       push r11
-       push r12
-       push r13
-       push r14
-       push r15
-       push r16
-       push r17
-       push r28
-       push r29
-       in      r28,__SP_L__
-       in      r29,__SP_H__
-       sub     r28,r26
-       sbc     r29,r27
-       in      __tmp_reg__,__SREG__
-       cli
-       out     __SP_H__,r29
-       out     __SREG__,__tmp_reg__
-       out     __SP_L__,r28
-#if defined (__AVR_HAVE_EIJMP_EICALL__)
-       eijmp
-#else
-       ijmp
-#endif
-
-ENDF __prologue_saves__
-#endif /* defined (L_prologue) */
-
-/*
- * This is an epilogue subroutine
- */
-#if defined (L_epilogue)
-
-DEFUN __epilogue_restores__
-       ldd     r2,Y+18
-       ldd     r3,Y+17
-       ldd     r4,Y+16
-       ldd     r5,Y+15
-       ldd     r6,Y+14
-       ldd     r7,Y+13
-       ldd     r8,Y+12
-       ldd     r9,Y+11
-       ldd     r10,Y+10
-       ldd     r11,Y+9
-       ldd     r12,Y+8
-       ldd     r13,Y+7
-       ldd     r14,Y+6
-       ldd     r15,Y+5
-       ldd     r16,Y+4
-       ldd     r17,Y+3
-       ldd     r26,Y+2
-       ldd     r27,Y+1
-       add     r28,r30
-       adc     r29,__zero_reg__
-       in      __tmp_reg__,__SREG__
-       cli
-       out     __SP_H__,r29
-       out     __SREG__,__tmp_reg__
-       out     __SP_L__,r28
-       mov_l   r28, r26
-       mov_h   r29, r27
-       ret
-ENDF __epilogue_restores__
-#endif /* defined (L_epilogue) */
-
-#ifdef L_exit
-       .section .fini9,"ax",@progbits
-DEFUN _exit
-       .weak   exit
-exit:
-ENDF _exit
-
-       /* Code from .fini8 ... .fini1 sections inserted by ld script.  */
-
-       .section .fini0,"ax",@progbits
-       cli
-__stop_program:
-       rjmp    __stop_program
-#endif /* defined (L_exit) */
-
-#ifdef L_cleanup
-       .weak   _cleanup
-       .func   _cleanup
-_cleanup:
-       ret
-.endfunc
-#endif /* defined (L_cleanup) */
-
-\f
-.section .text.libgcc, "ax", @progbits
-    
-#ifdef L_tablejump
-DEFUN __tablejump2__
-       lsl     r30
-       rol     r31
-    ;; FALLTHRU
-ENDF __tablejump2__
-
-DEFUN __tablejump__
-#if defined (__AVR_HAVE_LPMX__)
-       lpm __tmp_reg__, Z+
-       lpm r31, Z
-       mov r30, __tmp_reg__
-#if defined (__AVR_HAVE_EIJMP_EICALL__)
-       eijmp
-#else
-       ijmp
-#endif
-
-#else /* !HAVE_LPMX */
-       lpm
-       adiw r30, 1
-       push r0
-       lpm
-       push r0
-#if defined (__AVR_HAVE_EIJMP_EICALL__)
-       in   __tmp_reg__, __EIND__
-       push __tmp_reg__
-#endif
-       ret
-#endif /* !HAVE_LPMX */
-ENDF __tablejump__
-#endif /* defined (L_tablejump) */
-
-#ifdef L_copy_data
-       .section .init4,"ax",@progbits
-DEFUN __do_copy_data
-#if defined(__AVR_HAVE_ELPMX__)
-       ldi     r17, hi8(__data_end)
-       ldi     r26, lo8(__data_start)
-       ldi     r27, hi8(__data_start)
-       ldi     r30, lo8(__data_load_start)
-       ldi     r31, hi8(__data_load_start)
-       ldi     r16, hh8(__data_load_start)
-       out     __RAMPZ__, r16
-       rjmp    .L__do_copy_data_start
-.L__do_copy_data_loop:
-       elpm    r0, Z+
-       st      X+, r0
-.L__do_copy_data_start:
-       cpi     r26, lo8(__data_end)
-       cpc     r27, r17
-       brne    .L__do_copy_data_loop
-#elif  !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
-       ldi     r17, hi8(__data_end)
-       ldi     r26, lo8(__data_start)
-       ldi     r27, hi8(__data_start)
-       ldi     r30, lo8(__data_load_start)
-       ldi     r31, hi8(__data_load_start)
-       ldi     r16, hh8(__data_load_start - 0x10000)
-.L__do_copy_data_carry:
-       inc     r16
-       out     __RAMPZ__, r16
-       rjmp    .L__do_copy_data_start
-.L__do_copy_data_loop:
-       elpm
-       st      X+, r0
-       adiw    r30, 1
-       brcs    .L__do_copy_data_carry
-.L__do_copy_data_start:
-       cpi     r26, lo8(__data_end)
-       cpc     r27, r17
-       brne    .L__do_copy_data_loop
-#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
-       ldi     r17, hi8(__data_end)
-       ldi     r26, lo8(__data_start)
-       ldi     r27, hi8(__data_start)
-       ldi     r30, lo8(__data_load_start)
-       ldi     r31, hi8(__data_load_start)
-       rjmp    .L__do_copy_data_start
-.L__do_copy_data_loop:
-#if defined (__AVR_HAVE_LPMX__)
-       lpm     r0, Z+
-#else
-       lpm
-       adiw    r30, 1
-#endif
-       st      X+, r0
-.L__do_copy_data_start:
-       cpi     r26, lo8(__data_end)
-       cpc     r27, r17
-       brne    .L__do_copy_data_loop
-#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
-ENDF __do_copy_data
-#endif /* L_copy_data */
-
-/* __do_clear_bss is only necessary if there is anything in .bss section.  */
-
-#ifdef L_clear_bss
-       .section .init4,"ax",@progbits
-DEFUN __do_clear_bss
-       ldi     r17, hi8(__bss_end)
-       ldi     r26, lo8(__bss_start)
-       ldi     r27, hi8(__bss_start)
-       rjmp    .do_clear_bss_start
-.do_clear_bss_loop:
-       st      X+, __zero_reg__
-.do_clear_bss_start:
-       cpi     r26, lo8(__bss_end)
-       cpc     r27, r17
-       brne    .do_clear_bss_loop
-ENDF __do_clear_bss
-#endif /* L_clear_bss */
-
-/* __do_global_ctors and __do_global_dtors are only necessary
-   if there are any constructors/destructors.  */
-
-#ifdef L_ctors
-       .section .init6,"ax",@progbits
-DEFUN __do_global_ctors
-#if defined(__AVR_HAVE_RAMPZ__)
-       ldi     r17, hi8(__ctors_start)
-       ldi     r28, lo8(__ctors_end)
-       ldi     r29, hi8(__ctors_end)
-       ldi     r16, hh8(__ctors_end)
-       rjmp    .L__do_global_ctors_start
-.L__do_global_ctors_loop:
-       sbiw    r28, 2
-       sbc     r16, __zero_reg__
-       mov_h   r31, r29
-       mov_l   r30, r28
-       out     __RAMPZ__, r16
-       XCALL   __tablejump_elpm__
-.L__do_global_ctors_start:
-       cpi     r28, lo8(__ctors_start)
-       cpc     r29, r17
-       ldi     r24, hh8(__ctors_start)
-       cpc     r16, r24
-       brne    .L__do_global_ctors_loop
-#else
-       ldi     r17, hi8(__ctors_start)
-       ldi     r28, lo8(__ctors_end)
-       ldi     r29, hi8(__ctors_end)
-       rjmp    .L__do_global_ctors_start
-.L__do_global_ctors_loop:
-       sbiw    r28, 2
-       mov_h   r31, r29
-       mov_l   r30, r28
-       XCALL   __tablejump__
-.L__do_global_ctors_start:
-       cpi     r28, lo8(__ctors_start)
-       cpc     r29, r17
-       brne    .L__do_global_ctors_loop
-#endif /* defined(__AVR_HAVE_RAMPZ__) */
-ENDF __do_global_ctors
-#endif /* L_ctors */
-
-#ifdef L_dtors
-       .section .fini6,"ax",@progbits
-DEFUN __do_global_dtors
-#if defined(__AVR_HAVE_RAMPZ__)
-       ldi     r17, hi8(__dtors_end)
-       ldi     r28, lo8(__dtors_start)
-       ldi     r29, hi8(__dtors_start)
-       ldi     r16, hh8(__dtors_start)
-       rjmp    .L__do_global_dtors_start
-.L__do_global_dtors_loop:
-       sbiw    r28, 2
-       sbc     r16, __zero_reg__
-       mov_h   r31, r29
-       mov_l   r30, r28
-       out     __RAMPZ__, r16
-       XCALL   __tablejump_elpm__
-.L__do_global_dtors_start:
-       cpi     r28, lo8(__dtors_end)
-       cpc     r29, r17
-       ldi     r24, hh8(__dtors_end)
-       cpc     r16, r24
-       brne    .L__do_global_dtors_loop
-#else
-       ldi     r17, hi8(__dtors_end)
-       ldi     r28, lo8(__dtors_start)
-       ldi     r29, hi8(__dtors_start)
-       rjmp    .L__do_global_dtors_start
-.L__do_global_dtors_loop:
-       mov_h   r31, r29
-       mov_l   r30, r28
-       XCALL   __tablejump__
-       adiw    r28, 2
-.L__do_global_dtors_start:
-       cpi     r28, lo8(__dtors_end)
-       cpc     r29, r17
-       brne    .L__do_global_dtors_loop
-#endif /* defined(__AVR_HAVE_RAMPZ__) */
-ENDF __do_global_dtors
-#endif /* L_dtors */
-
-.section .text.libgcc, "ax", @progbits
-    
-#ifdef L_tablejump_elpm
-DEFUN __tablejump_elpm__
-#if defined (__AVR_HAVE_ELPM__)
-#if defined (__AVR_HAVE_LPMX__)
-       elpm    __tmp_reg__, Z+
-       elpm    r31, Z
-       mov     r30, __tmp_reg__
-#if defined (__AVR_HAVE_EIJMP_EICALL__)
-       eijmp
-#else
-       ijmp
-#endif
-
-#else
-       elpm
-       adiw    r30, 1
-       push    r0
-       elpm
-       push    r0
-#if defined (__AVR_HAVE_EIJMP_EICALL__)
-       in      __tmp_reg__, __EIND__
-       push    __tmp_reg__
-#endif
-       ret
-#endif
-#endif /* defined (__AVR_HAVE_ELPM__) */
-ENDF __tablejump_elpm__
-#endif /* defined (L_tablejump_elpm) */
-
-\f
-.section .text.libgcc.builtins, "ax", @progbits
-
-/**********************************
- * Find first set Bit (ffs)
- **********************************/
-
-#if defined (L_ffssi2)
-;; find first set bit
-;; r25:r24 = ffs32 (r25:r22)
-;; clobbers: r22, r26
-DEFUN __ffssi2
-    clr  r26
-    tst  r22
-    brne 1f
-    subi r26, -8
-    or   r22, r23
-    brne 1f
-    subi r26, -8
-    or   r22, r24
-    brne 1f
-    subi r26, -8
-    or   r22, r25
-    brne 1f
-    ret
-1:  mov  r24, r22
-    XJMP __loop_ffsqi2
-ENDF __ffssi2
-#endif /* defined (L_ffssi2) */
-
-#if defined (L_ffshi2)
-;; find first set bit
-;; r25:r24 = ffs16 (r25:r24)
-;; clobbers: r26
-DEFUN __ffshi2
-    clr  r26
-#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
-    ;; Some cores have problem skipping 2-word instruction
-    tst  r24
-    breq 2f
-#else
-    cpse r24, __zero_reg__
-#endif /* __AVR_HAVE_JMP_CALL__ */
-1:  XJMP __loop_ffsqi2
-2:  ldi  r26, 8
-    or   r24, r25
-    brne 1b
-    ret
-ENDF __ffshi2
-#endif /* defined (L_ffshi2) */
-
-#if defined (L_loop_ffsqi2)
-;; Helper for ffshi2, ffssi2
-;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
-;; r24 must be != 0
-;; clobbers: r26
-DEFUN __loop_ffsqi2
-    inc  r26
-    lsr  r24
-    brcc __loop_ffsqi2
-    mov  r24, r26
-    clr  r25
-    ret    
-ENDF __loop_ffsqi2
-#endif /* defined (L_loop_ffsqi2) */
-
-\f
-/**********************************
- * Count trailing Zeros (ctz)
- **********************************/
-
-#if defined (L_ctzsi2)
-;; count trailing zeros
-;; r25:r24 = ctz32 (r25:r22)
-;; clobbers: r26, r22
-;; ctz(0) = 255
-;; Note that ctz(0) in undefined for GCC
-DEFUN __ctzsi2
-    XCALL __ffssi2
-    dec  r24
-    ret
-ENDF __ctzsi2
-#endif /* defined (L_ctzsi2) */
-
-#if defined (L_ctzhi2)
-;; count trailing zeros
-;; r25:r24 = ctz16 (r25:r24)
-;; clobbers: r26
-;; ctz(0) = 255
-;; Note that ctz(0) in undefined for GCC
-DEFUN __ctzhi2
-    XCALL __ffshi2
-    dec  r24
-    ret
-ENDF __ctzhi2
-#endif /* defined (L_ctzhi2) */
-
-\f
-/**********************************
- * Count leading Zeros (clz)
- **********************************/
-
-#if defined (L_clzdi2)
-;; count leading zeros
-;; r25:r24 = clz64 (r25:r18)
-;; clobbers: r22, r23, r26
-DEFUN __clzdi2
-    XCALL __clzsi2
-    sbrs r24, 5
-    ret
-    mov_l r22, r18
-    mov_h r23, r19
-    mov_l r24, r20
-    mov_h r25, r21
-    XCALL __clzsi2
-    subi r24, -32
-    ret
-ENDF __clzdi2
-#endif /* defined (L_clzdi2) */
-
-#if defined (L_clzsi2)
-;; count leading zeros
-;; r25:r24 = clz32 (r25:r22)
-;; clobbers: r26
-DEFUN __clzsi2
-    XCALL __clzhi2
-    sbrs r24, 4
-    ret
-    mov_l r24, r22
-    mov_h r25, r23
-    XCALL __clzhi2
-    subi r24, -16
-    ret
-ENDF __clzsi2
-#endif /* defined (L_clzsi2) */
-
-#if defined (L_clzhi2)
-;; count leading zeros
-;; r25:r24 = clz16 (r25:r24)
-;; clobbers: r26
-DEFUN __clzhi2
-    clr  r26
-    tst  r25
-    brne 1f
-    subi r26, -8
-    or   r25, r24
-    brne 1f
-    ldi  r24, 16
-    ret
-1:  cpi  r25, 16
-    brsh 3f
-    subi r26, -3
-    swap r25
-2:  inc  r26
-3:  lsl  r25
-    brcc 2b
-    mov  r24, r26
-    clr  r25
-    ret
-ENDF __clzhi2
-#endif /* defined (L_clzhi2) */
-
-\f
-/**********************************
- * Parity 
- **********************************/
-
-#if defined (L_paritydi2)
-;; r25:r24 = parity64 (r25:r18)
-;; clobbers: __tmp_reg__
-DEFUN __paritydi2
-    eor  r24, r18
-    eor  r24, r19
-    eor  r24, r20
-    eor  r24, r21
-    XJMP __paritysi2
-ENDF __paritydi2
-#endif /* defined (L_paritydi2) */
-
-#if defined (L_paritysi2)
-;; r25:r24 = parity32 (r25:r22)
-;; clobbers: __tmp_reg__
-DEFUN __paritysi2
-    eor  r24, r22
-    eor  r24, r23
-    XJMP __parityhi2
-ENDF __paritysi2
-#endif /* defined (L_paritysi2) */
-
-#if defined (L_parityhi2)
-;; r25:r24 = parity16 (r25:r24)
-;; clobbers: __tmp_reg__
-DEFUN __parityhi2
-    eor  r24, r25
-;; FALLTHRU
-ENDF __parityhi2
-
-;; r25:r24 = parity8 (r24)
-;; clobbers: __tmp_reg__
-DEFUN __parityqi2
-    ;; parity is in r24[0..7]
-    mov  __tmp_reg__, r24
-    swap __tmp_reg__
-    eor  r24, __tmp_reg__
-    ;; parity is in r24[0..3]
-    subi r24, -4
-    andi r24, -5
-    subi r24, -6
-    ;; parity is in r24[0,3]
-    sbrc r24, 3
-    inc  r24
-    ;; parity is in r24[0]
-    andi r24, 1
-    clr  r25
-    ret
-ENDF __parityqi2
-#endif /* defined (L_parityhi2) */
-
-\f
-/**********************************
- * Population Count
- **********************************/
-
-#if defined (L_popcounthi2)
-;; population count
-;; r25:r24 = popcount16 (r25:r24)
-;; clobbers: __tmp_reg__
-DEFUN __popcounthi2
-    XCALL __popcountqi2
-    push r24
-    mov  r24, r25
-    XCALL __popcountqi2
-    clr  r25
-    ;; FALLTHRU
-ENDF __popcounthi2
-
-DEFUN __popcounthi2_tail
-    pop   __tmp_reg__
-    add   r24, __tmp_reg__
-    ret
-ENDF __popcounthi2_tail
-#endif /* defined (L_popcounthi2) */
-
-#if defined (L_popcountsi2)
-;; population count
-;; r25:r24 = popcount32 (r25:r22)
-;; clobbers: __tmp_reg__
-DEFUN __popcountsi2
-    XCALL __popcounthi2
-    push  r24
-    mov_l r24, r22
-    mov_h r25, r23
-    XCALL __popcounthi2
-    XJMP  __popcounthi2_tail
-ENDF __popcountsi2
-#endif /* defined (L_popcountsi2) */
-
-#if defined (L_popcountdi2)
-;; population count
-;; r25:r24 = popcount64 (r25:r18)
-;; clobbers: r22, r23, __tmp_reg__
-DEFUN __popcountdi2
-    XCALL __popcountsi2
-    push  r24
-    mov_l r22, r18
-    mov_h r23, r19
-    mov_l r24, r20
-    mov_h r25, r21
-    XCALL __popcountsi2
-    XJMP  __popcounthi2_tail
-ENDF __popcountdi2
-#endif /* defined (L_popcountdi2) */
-
-#if defined (L_popcountqi2)
-;; population count
-;; r24 = popcount8 (r24)
-;; clobbers: __tmp_reg__
-DEFUN __popcountqi2
-    mov  __tmp_reg__, r24
-    andi r24, 1
-    lsr  __tmp_reg__    
-    lsr  __tmp_reg__    
-    adc  r24, __zero_reg__
-    lsr  __tmp_reg__    
-    adc  r24, __zero_reg__
-    lsr  __tmp_reg__    
-    adc  r24, __zero_reg__
-    lsr  __tmp_reg__    
-    adc  r24, __zero_reg__
-    lsr  __tmp_reg__    
-    adc  r24, __zero_reg__
-    lsr  __tmp_reg__    
-    adc  r24, __tmp_reg__    
-    ret    
-ENDF __popcountqi2
-#endif /* defined (L_popcountqi2) */
-
-\f
-/**********************************
- * Swap bytes
- **********************************/
-
-;; swap two registers with different register number
-.macro bswap a, b
-    eor \a, \b
-    eor \b, \a
-    eor \a, \b
-.endm
-
-#if defined (L_bswapsi2)
-;; swap bytes
-;; r25:r22 = bswap32 (r25:r22)
-DEFUN __bswapsi2
-    bswap r22, r25
-    bswap r23, r24
-    ret
-ENDF __bswapsi2
-#endif /* defined (L_bswapsi2) */
-
-#if defined (L_bswapdi2)
-;; swap bytes
-;; r25:r18 = bswap64 (r25:r18)
-DEFUN __bswapdi2
-    bswap r18, r25
-    bswap r19, r24
-    bswap r20, r23
-    bswap r21, r22
-    ret
-ENDF __bswapdi2
-#endif /* defined (L_bswapdi2) */
-
-\f
-/**********************************
- * 64-bit shifts
- **********************************/
-
-#if defined (L_ashrdi3)
-;; Arithmetic shift right
-;; r25:r18 = ashr64 (r25:r18, r17:r16)
-DEFUN __ashrdi3
-    push r16
-    andi r16, 63
-    breq 2f
-1:  asr  r25
-    ror  r24
-    ror  r23
-    ror  r22
-    ror  r21
-    ror  r20
-    ror  r19
-    ror  r18
-    dec  r16
-    brne 1b
-2:  pop  r16
-    ret
-ENDF __ashrdi3
-#endif /* defined (L_ashrdi3) */
-
-#if defined (L_lshrdi3)
-;; Logic shift right
-;; r25:r18 = lshr64 (r25:r18, r17:r16)
-DEFUN __lshrdi3
-    push r16
-    andi r16, 63
-    breq 2f
-1:  lsr  r25
-    ror  r24
-    ror  r23
-    ror  r22
-    ror  r21
-    ror  r20
-    ror  r19
-    ror  r18
-    dec  r16
-    brne 1b
-2:  pop  r16
-    ret
-ENDF __lshrdi3
-#endif /* defined (L_lshrdi3) */
-
-#if defined (L_ashldi3)
-;; Shift left
-;; r25:r18 = ashl64 (r25:r18, r17:r16)
-DEFUN __ashldi3
-    push r16
-    andi r16, 63
-    breq 2f
-1:  lsl  r18
-    rol  r19
-    rol  r20
-    rol  r21
-    rol  r22
-    rol  r23
-    rol  r24
-    rol  r25
-    dec  r16
-    brne 1b
-2:  pop  r16
-    ret
-ENDF __ashldi3
-#endif /* defined (L_ashldi3) */
-
-\f
-.section .text.libgcc.fmul, "ax", @progbits
-
-/***********************************************************/    
-;;; Softmul versions of FMUL, FMULS and FMULSU to implement
-;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
-/***********************************************************/    
-
-#define A1 24
-#define B1 25
-#define C0 22
-#define C1 23
-#define A0 __tmp_reg__
-
-#ifdef L_fmuls
-;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
-;;; Clobbers: r24, r25, __tmp_reg__
-DEFUN __fmuls
-    ;; A0.7 = negate result?
-    mov  A0, A1
-    eor  A0, B1
-    ;; B1 = |B1|
-    sbrc B1, 7
-    neg  B1
-    XJMP __fmulsu_exit
-ENDF __fmuls
-#endif /* L_fmuls */
-
-#ifdef L_fmulsu
-;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
-;;; Clobbers: r24, r25, __tmp_reg__
-DEFUN __fmulsu
-    ;; A0.7 = negate result?
-    mov  A0, A1
-;; FALLTHRU
-ENDF __fmulsu
-
-;; Helper for __fmuls and __fmulsu
-DEFUN __fmulsu_exit
-    ;; A1 = |A1|
-    sbrc A1, 7
-    neg  A1
-#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
-    ;; Some cores have problem skipping 2-word instruction
-    tst  A0
-    brmi 1f
-#else
-    sbrs A0, 7
-#endif /* __AVR_HAVE_JMP_CALL__ */
-    XJMP  __fmul
-1:  XCALL __fmul
-    ;; C = -C iff A0.7 = 1
-    com  C1
-    neg  C0
-    sbci C1, -1
-    ret
-ENDF __fmulsu_exit
-#endif /* L_fmulsu */
-
-
-#ifdef L_fmul
-;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
-;;; Clobbers: r24, r25, __tmp_reg__
-DEFUN __fmul
-    ; clear result
-    clr   C0
-    clr   C1
-    clr   A0
-1:  tst   B1
-    ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
-2:  brpl  3f
-    ;; C += A
-    add   C0, A0
-    adc   C1, A1
-3:  ;; A >>= 1
-    lsr   A1
-    ror   A0
-    ;; B <<= 1
-    lsl   B1
-    brne  2b
-    ret
-ENDF __fmul
-#endif /* L_fmul */
-
-#undef A0
-#undef A1
-#undef B1
-#undef C0
-#undef C1
diff --git a/gcc/config/avr/t-avr b/gcc/config/avr/t-avr

index 30e8d96447e56175aff762bbe9239feaf45abfdc..3f37e591f8ecd205480ea47808d596fa15380214 100644 (file)
--- a/gcc/config/avr/t-avr
+++ b/gcc/config/avr/t-avr
@@ -39,54 +39,6 @@ $(srcdir)/config/avr/avr-tables.opt: $(srcdir)/config/avr/genopt.sh \
         $(SHELL) $(srcdir)/config/avr/genopt.sh $(srcdir)/config/avr > \
                 $(srcdir)/config/avr/avr-tables.opt
  
-LIB1ASMSRC = avr/libgcc.S
-LIB1ASMFUNCS = \
-       _mulqi3 \
-       _mulhi3 \
-       _mulhisi3 \
-       _umulhisi3 \
-       _usmulhisi3 \
-       _muluhisi3 \
-       _mulshisi3 \
-       _mulsi3 \
-       _udivmodqi4 \
-       _divmodqi4 \
-       _udivmodhi4 \
-       _divmodhi4 \
-       _udivmodsi4 \
-       _divmodsi4 \
-       _prologue \
-       _epilogue \
-       _exit \
-       _cleanup \
-       _tablejump \
-       _tablejump_elpm \
-       _copy_data \
-       _clear_bss \
-       _ctors \
-       _dtors \
-       _ffssi2 \
-       _ffshi2 \
-       _loop_ffsqi2 \
-       _ctzsi2 \
-       _ctzhi2 \
-       _clzdi2 \
-       _clzsi2 \
-       _clzhi2 \
-       _paritydi2 \
-       _paritysi2 \
-       _parityhi2 \
-       _popcounthi2 \
-       _popcountsi2 \
-       _popcountdi2 \
-       _popcountqi2 \
-       _bswapsi2 \
-       _bswapdi2 \
-       _ashldi3 \
-       _ashrdi3 \
-       _lshrdi3 \
-       _fmul _fmuls _fmulsu
-
  LIB2FUNCS_EXCLUDE = \
         _clz
  
diff --git a/gcc/config/bfin/lib1funcs.asm b/gcc/config/bfin/lib1funcs.asm

deleted file mode 100644 (file)

index c7bf4f3..0000000
--- a/gcc/config/bfin/lib1funcs.asm
+++ /dev/null
@@ -1,211 +0,0 @@
-/* libgcc functions for Blackfin.
-   Copyright (C) 2005, 2009 Free Software Foundation, Inc.
-   Contributed by Analog Devices.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-#ifdef L_divsi3
-.text
-.align 2
-.global ___divsi3;
-.type ___divsi3, STT_FUNC;
-
-___divsi3:
-        [--SP]= RETS;
-       [--SP] = R7;
-
-       R2 = -R0;
-        CC = R0 < 0;
-       IF CC R0 = R2;
-       R7 = CC;
-
-       R2 = -R1;
-        CC = R1 < 0;
-       IF CC R1 = R2;
-       R2 = CC;
-       R7 = R7 ^ R2;
-
-        CALL ___udivsi3;
-
-       CC = R7;
-       R1 = -R0;
-       IF CC R0 = R1;
-
-       R7 = [SP++];
-        RETS = [SP++];
-        RTS;
-#endif
-
-#ifdef L_modsi3        
-.align 2
-.global ___modsi3;
-.type ___modsi3, STT_FUNC;
-
-___modsi3:
-       [--SP] = RETS;
-       [--SP] = R0;
-       [--SP] = R1;
-       CALL ___divsi3;
-       R2 = [SP++];
-       R1 = [SP++];
-       R2 *= R0;
-       R0 = R1 - R2;
-       RETS = [SP++];
-       RTS; 
-#endif
-
-#ifdef L_udivsi3
-.align 2
-.global ___udivsi3;
-.type ___udivsi3, STT_FUNC;
-
-___udivsi3:
-        P0 = 32;
-        LSETUP (0f, 1f) LC0 = P0;
-       /* upper half of dividend */
-        R3 = 0;
-0:
-       /* The first time round in the loop we shift in garbage, but since we
-          perform 33 shifts, it doesn't matter.  */
-       R0 = ROT R0 BY 1;
-       R3 = ROT R3 BY 1;
-       R2 = R3 - R1;
-        CC = R3 < R1 (IU);
-1:
-       /* Last instruction of the loop.  */
-       IF ! CC R3 = R2;
-
-       /* Shift in the last bit.  */
-       R0 = ROT R0 BY 1;
-       /* R0 is the result, R3 contains the remainder.  */
-       R0 = ~ R0;
-        RTS;
-#endif
-
-#ifdef L_umodsi3
-.align 2
-.global ___umodsi3;
-.type ___umodsi3, STT_FUNC;
-
-___umodsi3:
-       [--SP] = RETS;
-       CALL ___udivsi3;
-       R0 = R3;
-       RETS = [SP++]; 
-       RTS;
-#endif
-
-#ifdef L_umulsi3_highpart
-.align 2
-.global ___umulsi3_highpart;
-.type ___umulsi3_highpart, STT_FUNC;
-
-___umulsi3_highpart:
-       A1 = R1.L * R0.L (FU);
-       A1 = A1 >> 16;
-       A0 = R1.H * R0.H, A1 += R1.L * R0.H (FU);
-       A1 += R0.L * R1.H (FU);
-       A1 = A1 >> 16;
-       A0 += A1;
-       R0 = A0 (FU);
-       RTS;
-#endif
-
-#ifdef L_smulsi3_highpart
-.align 2
-.global ___smulsi3_highpart;
-.type ___smulsi3_highpart, STT_FUNC;
-
-___smulsi3_highpart:
-       A1 = R1.L * R0.L (FU);
-       A1 = A1 >> 16;
-       A0 = R0.H * R1.H, A1 += R0.H * R1.L (IS,M);
-       A1 += R1.H * R0.L (IS,M);
-       A1 = A1 >>> 16;
-       R0 = (A0 += A1);
-       RTS;
-#endif
-
-#ifdef L_muldi3
-.align 2
-.global ___muldi3;
-.type ___muldi3, STT_FUNC;
-
-/*
-          R1:R0 * R3:R2
-        = R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l
-[X]     = (R1.h * R3.h) * 2^96
-[X]       + (R1.h * R3.l + R1.l * R3.h) * 2^80
-[X]       + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^64
-[T1]      + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^48
-[T2]      + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^32
-[T3]      + (R0.l * R2.h + R2.l * R0.h) * 2^16
-[T4]      + (R0.l * R2.l)
-
-       We can discard the first three lines marked "X" since we produce
-       only a 64 bit result.  So, we need ten 16-bit multiplies.
-
-       Individual mul-acc results:
-[E1]    =  R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h
-[E2]    =  R1.l * R2.l + R3.l * R0.l + R0.h * R2.h
-[E3]    =  R0.l * R2.h + R2.l * R0.h
-[E4]    =  R0.l * R2.l
-
-       We also need to add high parts from lower-level results to higher ones:
-       E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E4
-
-       One interesting property is that all parts of the result that depend
-       on the sign of the multiplication are discarded.  Those would be the
-       multiplications involving R1.h and R3.h, but only the top 16 bit of
-       the 32 bit result depend on the sign, and since R1.h and R3.h only
-       occur in E1, the top half of these results is cut off.
-       So, we can just use FU mode for all of the 16-bit multiplies, and
-       ignore questions of when to use mixed mode.  */
-
-___muldi3:
-       /* [SP] technically is part of the caller's frame, but we can
-          use it as scratch space.  */
-       A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12];      /* E1 */
-       A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4;         /* E1 */
-       A0 += A1;                                                       /* E1 */
-       R4 = A0.w;
-       A0 = R0.l * R3.l (FU);                                          /* E2 */
-       A0 += R2.l * R1.l (FU);                                         /* E2 */
-
-       A1 = R2.L * R0.L (FU);                                          /* E4 */
-       R3 = A1.w;
-       A1 = A1 >> 16;                                                  /* E3c */
-       A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU);                      /* E2, E3c */
-       A1 += R0.L * R2.H (FU);                                         /* E3c */
-       R0 = A1.w;
-       A1 = A1 >> 16;                                                  /* E2c */
-       A0 += A1;                                                       /* E2c */
-       R1 = A0.w;
-
-       /* low(result) = low(E3c):low(E4) */
-       R0 = PACK (R0.l, R3.l);
-       /* high(result) = E2c + (E1 << 16) */
-       R1.h = R1.h + R4.l (NS) || R4 = [SP];
-       RTS;
-
-.size ___muldi3, .-___muldi3
-#endif
diff --git a/gcc/config/bfin/t-bfin b/gcc/config/bfin/t-bfin

deleted file mode 100644 (file)

index bb95ab4..0000000
--- a/gcc/config/bfin/t-bfin
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (C) 2005, 2007, 2011 Free Software Foundation, Inc.
-#
-# This file is part of GCC.
-#
-# GCC is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# GCC is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GCC; see the file COPYING3.  If not see
-# <http://www.gnu.org/licenses/>.
-
-## Target part of the Makefile
-
-LIB1ASMSRC = bfin/lib1funcs.asm
-LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart
-LIB1ASMFUNCS += _smulsi3_highpart
diff --git a/gcc/config/bfin/t-bfin-elf b/gcc/config/bfin/t-bfin-elf

index fcf76c4ddfef307feb9ec8e4907756f77243eae1..5cbcfeeb87f7f01a9d27445d3a7a037a38b74ef8 100644 (file)
--- a/gcc/config/bfin/t-bfin-elf
+++ b/gcc/config/bfin/t-bfin-elf
@@ -18,10 +18,6 @@
  
  ## Target part of the Makefile
  
-LIB1ASMSRC = bfin/lib1funcs.asm
-LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart
-LIB1ASMFUNCS += _smulsi3_highpart
-
  TARGET_LIBGCC2_CFLAGS = -fpic
  
  MULTILIB_OPTIONS=mcpu=bf532-none
diff --git a/gcc/config/bfin/t-bfin-linux b/gcc/config/bfin/t-bfin-linux

index a83f9f2da83a0209d45c3d6efab7e9fbad542af3..9a1d6a0943700fba3faf043c7b978163faa593fd 100644 (file)
--- a/gcc/config/bfin/t-bfin-linux
+++ b/gcc/config/bfin/t-bfin-linux
@@ -18,10 +18,6 @@
  
  ## Target part of the Makefile
  
-LIB1ASMSRC = bfin/lib1funcs.asm
-LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart
-LIB1ASMFUNCS += _smulsi3_highpart
-
  TARGET_LIBGCC2_CFLAGS = -fpic
  
  MULTILIB_OPTIONS=mcpu=bf532-none
diff --git a/gcc/config/bfin/t-bfin-uclinux b/gcc/config/bfin/t-bfin-uclinux

index 1be0796987b614b407598c0d7910e20fa88d7e52..b9fca803e0af4616655772eb3455f2641157a600 100644 (file)
--- a/gcc/config/bfin/t-bfin-uclinux
+++ b/gcc/config/bfin/t-bfin-uclinux
@@ -18,10 +18,6 @@
  
  ## Target part of the Makefile
  
-LIB1ASMSRC = bfin/lib1funcs.asm
-LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart
-LIB1ASMFUNCS += _smulsi3_highpart
-
  TARGET_LIBGCC2_CFLAGS = -fpic
  
  MULTILIB_OPTIONS=mcpu=bf532-none
diff --git a/gcc/config/c6x/lib1funcs.asm b/gcc/config/c6x/lib1funcs.asm

deleted file mode 100644 (file)

index 5bf3447..0000000
--- a/gcc/config/c6x/lib1funcs.asm
+++ /dev/null
@@ -1,438 +0,0 @@
-/* Copyright 2010, 2011  Free Software Foundation, Inc.
-   Contributed by Bernd Schmidt <bernds@codesourcery.com>.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-       ;; ABI considerations for the divide functions
-       ;; The following registers are call-used:
-       ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
-       ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
-       ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
-       ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
-       ;;
-       ;; In our implementation, divu and remu are leaf functions,
-       ;; while both divi and remi call into divu.
-       ;; A0 is not clobbered by any of the functions.
-       ;; divu does not clobber B2 either, which is taken advantage of
-       ;; in remi.
-       ;; divi uses B5 to hold the original return address during
-       ;; the call to divu.
-       ;; remi uses B2 and A5 to hold the input values during the
-       ;; call to divu.  It stores B3 in on the stack.
-
-#ifdef L_divsi3
-.text
-.align 2
-.global __c6xabi_divi
-.hidden __c6xabi_divi
-.type __c6xabi_divi, STT_FUNC
-
-__c6xabi_divi:
-       call .s2        __c6xabi_divu
-||     mv .d2          B3, B5
-||     cmpgt .l1       0, A4, A1
-||     cmpgt .l2       0, B4, B1
-
-       [A1] neg .l1    A4, A4
-||     [B1] neg .l2    B4, B4
-||     xor .s1x        A1, B1, A1
-
-#ifdef _TMS320C6400
-       [A1] addkpc .s2 1f, B3, 4
-#else
-       [A1] mvkl .s2   1f, B3
-       [A1] mvkh .s2   1f, B3
-       nop             2
-#endif
-1:
-       neg .l1         A4, A4
-||     mv .l2          B3,B5
-||     ret .s2         B5
-       nop             5
-#endif
-
-#if defined L_modsi3 || defined L_divmodsi4
-.align 2
-#ifdef L_modsi3
-#define MOD_OUTPUT_REG A4
-.global __c6xabi_remi
-.hidden __c6xabi_remi
-.type __c6xabi_remi, STT_FUNC
-#else
-#define MOD_OUTPUT_REG A5
-.global __c6xabi_divremi
-.hidden __c6xabi_divremi
-.type __c6xabi_divremi, STT_FUNC
-__c6xabi_divremi:
-#endif
-
-__c6xabi_remi:
-       stw .d2t2       B3, *B15--[2]
-||     cmpgt .l1       0, A4, A1
-||     cmpgt .l2       0, B4, B2
-||     mv .s1          A4, A5
-||     call .s2        __c6xabi_divu
-
-       [A1] neg .l1    A4, A4
-||     [B2] neg .l2    B4, B4
-||     xor .s2x        B2, A1, B0
-||     mv .d2          B4, B2
-
-#ifdef _TMS320C6400
-       [B0] addkpc .s2 1f, B3, 1
-       [!B0] addkpc .s2 2f, B3, 1
-       nop             2
-#else
-       [B0] mvkl .s2   1f,B3
-       [!B0] mvkl .s2  2f,B3
-
-       [B0] mvkh .s2   1f,B3
-       [!B0] mvkh .s2  2f,B3
-#endif
-1:
-       neg .l1         A4, A4
-2:
-       ldw .d2t2       *++B15[2], B3
-
-#ifdef _TMS320C6400_PLUS
-       mpy32 .m1x      A4, B2, A6
-       nop             3
-       ret .s2         B3
-       sub .l1         A5, A6, MOD_OUTPUT_REG
-       nop             4
-#else
-       mpyu .m1x       A4, B2, A1
-       nop             1
-       mpylhu .m1x     A4, B2, A6
-||     mpylhu .m2x     B2, A4, B2
-       nop             1
-       add .l1x        A6, B2, A6
-||     ret .s2         B3
-       shl .s1         A6, 16, A6
-       add .d1         A6, A1, A6
-       sub .l1         A5, A6, MOD_OUTPUT_REG
-       nop             2
-#endif
-
-#endif
-
-#if defined L_udivsi3 || defined L_udivmodsi4
-.align 2
-#ifdef L_udivsi3
-.global __c6xabi_divu
-.hidden __c6xabi_divu
-.type __c6xabi_divu, STT_FUNC
-__c6xabi_divu:
-#else
-.global __c6xabi_divremu
-.hidden __c6xabi_divremu
-.type __c6xabi_divremu, STT_FUNC
-__c6xabi_divremu:
-#endif
-       ;; We use a series of up to 31 subc instructions.  First, we find
-       ;; out how many leading zero bits there are in the divisor.  This
-       ;; gives us both a shift count for aligning (shifting) the divisor
-       ;; to the, and the number of times we have to execute subc.
-
-       ;; At the end, we have both the remainder and most of the quotient
-       ;; in A4.  The top bit of the quotient is computed first and is
-       ;; placed in A2.
-
-       ;; Return immediately if the dividend is zero.  Setting B4 to 1
-       ;; is a trick to allow us to leave the following insns in the jump
-       ;; delay slot without affecting the result.
-       mv      .s2x    A4, B1
-
-#ifndef _TMS320C6400
-[!b1]  mvk     .s2     1, B4
-#endif
-[b1]   lmbd    .l2     1, B4, B1
-||[!b1] b      .s2     B3      ; RETURN A
-#ifdef _TMS320C6400
-||[!b1] mvk    .d2     1, B4
-#endif
-#ifdef L_udivmodsi4
-||[!b1] zero   .s1     A5
-#endif
-       mv      .l1x    B1, A6
-||     shl     .s2     B4, B1, B4
-
-       ;; The loop performs a maximum of 28 steps, so we do the
-       ;; first 3 here.
-       cmpltu  .l1x    A4, B4, A2
-[!A2]  sub     .l1x    A4, B4, A4
-||     shru    .s2     B4, 1, B4
-||     xor     .s1     1, A2, A2
-
-       shl     .s1     A2, 31, A2
-|| [b1]        subc    .l1x    A4,B4,A4
-|| [b1]        add     .s2     -1, B1, B1
-[b1]   subc    .l1x    A4,B4,A4
-|| [b1]        add     .s2     -1, B1, B1
-
-       ;; RETURN A may happen here (note: must happen before the next branch)
-0:
-       cmpgt   .l2     B1, 7, B0
-|| [b1]        subc    .l1x    A4,B4,A4
-|| [b1]        add     .s2     -1, B1, B1
-[b1]   subc    .l1x    A4,B4,A4
-|| [b1]        add     .s2     -1, B1, B1
-|| [b0] b      .s1     0b
-[b1]   subc    .l1x    A4,B4,A4
-|| [b1]        add     .s2     -1, B1, B1
-[b1]   subc    .l1x    A4,B4,A4
-|| [b1]        add     .s2     -1, B1, B1
-[b1]   subc    .l1x    A4,B4,A4
-|| [b1]        add     .s2     -1, B1, B1
-[b1]   subc    .l1x    A4,B4,A4
-|| [b1]        add     .s2     -1, B1, B1
-[b1]   subc    .l1x    A4,B4,A4
-|| [b1]        add     .s2     -1, B1, B1
-       ;; loop backwards branch happens here
-
-       ret     .s2     B3
-||     mvk     .s1     32, A1
-       sub     .l1     A1, A6, A6
-#ifdef L_udivmodsi4
-||     extu    .s1     A4, A6, A5
-#endif
-       shl     .s1     A4, A6, A4
-       shru    .s1     A4, 1, A4
-||     sub     .l1     A6, 1, A6
-       or      .l1     A2, A4, A4
-       shru    .s1     A4, A6, A4
-       nop
-
-#endif
-
-#ifdef L_umodsi3
-.align 2
-.global __c6xabi_remu
-.hidden __c6xabi_remu
-.type __c6xabi_remu, STT_FUNC
-__c6xabi_remu:
-       ;; The ABI seems designed to prevent these functions calling each other,
-       ;; so we duplicate most of the divsi3 code here.
-       mv      .s2x    A4, B1
-#ifndef _TMS320C6400
-[!b1]  mvk     .s2     1, B4
-#endif
-       lmbd    .l2     1, B4, B1
-||[!b1] b      .s2     B3      ; RETURN A
-#ifdef _TMS320C6400
-||[!b1] mvk    .d2     1, B4
-#endif
-
-       mv      .l1x    B1, A7
-||     shl     .s2     B4, B1, B4
-
-       cmpltu  .l1x    A4, B4, A1
-[!a1]  sub     .l1x    A4, B4, A4
-       shru    .s2     B4, 1, B4
-
-0:
-       cmpgt   .l2     B1, 7, B0
-|| [b1]        subc    .l1x    A4,B4,A4
-|| [b1]        add     .s2     -1, B1, B1
-       ;; RETURN A may happen here (note: must happen before the next branch)
-[b1]   subc    .l1x    A4,B4,A4
-|| [b1]        add     .s2     -1, B1, B1
-|| [b0] b      .s1     0b
-[b1]   subc    .l1x    A4,B4,A4
-|| [b1]        add     .s2     -1, B1, B1
-[b1]   subc    .l1x    A4,B4,A4
-|| [b1]        add     .s2     -1, B1, B1
-[b1]   subc    .l1x    A4,B4,A4
-|| [b1]        add     .s2     -1, B1, B1
-[b1]   subc    .l1x    A4,B4,A4
-|| [b1]        add     .s2     -1, B1, B1
-[b1]   subc    .l1x    A4,B4,A4
-|| [b1]        add     .s2     -1, B1, B1
-       ;; loop backwards branch happens here
-
-       ret     .s2     B3
-[b1]   subc    .l1x    A4,B4,A4
-|| [b1]        add     .s2     -1, B1, B1
-[b1]   subc    .l1x    A4,B4,A4
-
-       extu    .s1     A4, A7, A4
-       nop     2
-#endif
-
-#if defined L_strasgi_64plus && defined _TMS320C6400_PLUS
-
-.align 2
-.global __c6xabi_strasgi_64plus
-.hidden __c6xabi_strasgi_64plus
-.type __c6xabi_strasgi_64plus, STT_FUNC
-__c6xabi_strasgi_64plus:
-       shru    .s2x    a6, 2, b31
-||     mv      .s1     a4, a30
-||     mv      .d2     b4, b30
-
-       add     .s2     -4, b31, b31
-
-       sploopd         1
-||     mvc     .s2     b31, ilc
-       ldw     .d2t2   *b30++, b31
-       nop     4
-       mv      .s1x    b31,a31
-       spkernel        6, 0
-||     stw     .d1t1   a31, *a30++
-
-       ret     .s2     b3
-       nop 5
-#endif
-
-#ifdef L_strasgi
-.global __c6xabi_strasgi
-.type __c6xabi_strasgi, STT_FUNC
-__c6xabi_strasgi:
-       ;; This is essentially memcpy, with alignment known to be at least
-       ;; 4, and the size a multiple of 4 greater than or equal to 28.
-       ldw     .d2t1   *B4++, A0
-||     mvk     .s2     16, B1
-       ldw     .d2t1   *B4++, A1
-||     mvk     .s2     20, B2
-||     sub     .d1     A6, 24, A6
-       ldw     .d2t1   *B4++, A5
-       ldw     .d2t1   *B4++, A7
-||     mv      .l2x    A6, B7
-       ldw     .d2t1   *B4++, A8
-       ldw     .d2t1   *B4++, A9
-||     mv      .s2x    A0, B5
-||     cmpltu  .l2     B2, B7, B0
-
-0:
-       stw     .d1t2   B5, *A4++
-||[b0] ldw     .d2t1   *B4++, A0
-||     mv      .s2x    A1, B5
-||     mv      .l2     B7, B6
-
-[b0]   sub     .d2     B6, 24, B7
-||[b0] b       .s2     0b
-||     cmpltu  .l2     B1, B6, B0
-
-[b0]   ldw     .d2t1   *B4++, A1
-||     stw     .d1t2   B5, *A4++
-||     mv      .s2x    A5, B5
-||     cmpltu  .l2     12, B6, B0
-
-[b0]   ldw     .d2t1   *B4++, A5
-||     stw     .d1t2   B5, *A4++
-||     mv      .s2x    A7, B5
-||     cmpltu  .l2     8, B6, B0
-
-[b0]   ldw     .d2t1   *B4++, A7
-||     stw     .d1t2   B5, *A4++
-||     mv      .s2x    A8, B5
-||     cmpltu  .l2     4, B6, B0
-
-[b0]   ldw     .d2t1   *B4++, A8
-||     stw     .d1t2   B5, *A4++
-||     mv      .s2x    A9, B5
-||     cmpltu  .l2     0, B6, B0
-
-[b0]   ldw     .d2t1   *B4++, A9
-||     stw     .d1t2   B5, *A4++
-||     mv      .s2x    A0, B5
-||     cmpltu  .l2     B2, B7, B0
-
-       ;; loop back branch happens here
-
-       cmpltu  .l2     B1, B6, B0
-||     ret     .s2     b3
-
-[b0]   stw     .d1t1   A1, *A4++
-||     cmpltu  .l2     12, B6, B0
-[b0]   stw     .d1t1   A5, *A4++
-||     cmpltu  .l2     8, B6, B0
-[b0]   stw     .d1t1   A7, *A4++
-||     cmpltu  .l2     4, B6, B0
-[b0]   stw     .d1t1   A8, *A4++
-||     cmpltu  .l2     0, B6, B0
-[b0]   stw     .d1t1   A9, *A4++
-
-       ;; return happens here
-
-#endif
-
-#ifdef _TMS320C6400_PLUS
-#ifdef L_push_rts
-.align 2
-.global __c6xabi_push_rts
-.hidden __c6xabi_push_rts
-.type __c6xabi_push_rts, STT_FUNC
-__c6xabi_push_rts:
-       stw .d2t2       B14, *B15--[2]
-       stdw .d2t1      A15:A14, *B15--
-||     b .s2x          A3
-       stdw .d2t2      B13:B12, *B15--
-       stdw .d2t1      A13:A12, *B15--
-       stdw .d2t2      B11:B10, *B15--
-       stdw .d2t1      A11:A10, *B15--
-       stdw .d2t2      B3:B2, *B15--
-#endif
-
-#ifdef L_pop_rts
-.align 2
-.global __c6xabi_pop_rts
-.hidden __c6xabi_pop_rts
-.type __c6xabi_pop_rts, STT_FUNC
-__c6xabi_pop_rts:
-       lddw .d2t2      *++B15, B3:B2
-       lddw .d2t1      *++B15, A11:A10
-       lddw .d2t2      *++B15, B11:B10
-       lddw .d2t1      *++B15, A13:A12
-       lddw .d2t2      *++B15, B13:B12
-       lddw .d2t1      *++B15, A15:A14
-||     b .s2           B3
-       ldw .d2t2       *++B15[2], B14
-       nop             4
-#endif
-
-#ifdef L_call_stub
-.align 2
-.global __c6xabi_call_stub
-.type __c6xabi_call_stub, STT_FUNC
-__c6xabi_call_stub:
-       stw .d2t1       A2, *B15--[2]
-       stdw .d2t1      A7:A6, *B15--
-||     call .s2        B31
-       stdw .d2t1      A1:A0, *B15--
-       stdw .d2t2      B7:B6, *B15--
-       stdw .d2t2      B5:B4, *B15--
-       stdw .d2t2      B1:B0, *B15--
-       stdw .d2t2      B3:B2, *B15--
-||     addkpc .s2      1f, B3, 0
-1:
-       lddw .d2t2      *++B15, B3:B2
-       lddw .d2t2      *++B15, B1:B0
-       lddw .d2t2      *++B15, B5:B4
-       lddw .d2t2      *++B15, B7:B6
-       lddw .d2t1      *++B15, A1:A0
-       lddw .d2t1      *++B15, A7:A6
-||     b .s2           B3
-       ldw .d2t1       *++B15[2], A2
-       nop             4
-#endif
-
-#endif
-
diff --git a/gcc/config/c6x/t-c6x-elf b/gcc/config/c6x/t-c6x-elf

index b3b4b850fe8e58049351829bdb2ca8839cc46de5..6bc2832026d034724e9027fe60def0507ff72431 100644 (file)
--- a/gcc/config/c6x/t-c6x-elf
+++ b/gcc/config/c6x/t-c6x-elf
@@ -18,11 +18,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-LIB1ASMSRC = c6x/lib1funcs.asm
-LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _udivmodsi4 _divmodsi4
-LIB1ASMFUNCS += _strasgi _strasgi_64plus _clzsi2 _clzdi2 _clz
-LIB1ASMFUNCS += _push_rts _pop_rts _call_stub
-
  LIB2FUNCS_EXCLUDE = _cmpdi2 _ucmpdi2 _gcc_bcmp _eprintf _clzsi _clzdi
  EXTRA_HEADERS += $(srcdir)/ginclude/unwind-arm-common.h
  
diff --git a/gcc/config/fr30/lib1funcs.asm b/gcc/config/fr30/lib1funcs.asm

deleted file mode 100644 (file)

index 7c63453..0000000
--- a/gcc/config/fr30/lib1funcs.asm
+++ /dev/null
@@ -1,115 +0,0 @@
-/* libgcc routines for the FR30.
-   Copyright (C) 1998, 1999, 2009 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-       .macro FUNC_START name
-       .text
-       .globl __\name
-       .type  __\name, @function
-__\name:
-       .endm
-
-       .macro FUNC_END name
-       .size  __\name, . - __\name
-       .endm
-
-       .macro DIV_BODY reg number
-       .if \number
-       DIV_BODY  \reg, "\number - 1"
-       div1    \reg
-       .endif
-       .endm
-       
-#ifdef L_udivsi3
-FUNC_START udivsi3
-       ;; Perform an unsiged division of r4 / r5 and place the result in r4.
-       ;; Does not handle overflow yet...
-       mov     r4, mdl
-       div0u   r5
-       DIV_BODY r5 32
-       mov     mdl, r4
-       ret
-FUNC_END udivsi3
-#endif /* L_udivsi3 */
-
-#ifdef L_divsi3
-FUNC_START divsi3
-       ;; Perform a siged division of r4 / r5 and place the result in r4.
-       ;; Does not handle overflow yet...
-       mov     r4, mdl
-       div0s   r5
-       DIV_BODY r5 32
-       div2    r5
-       div3
-       div4s
-       mov     mdl, r4
-       ret
-FUNC_END divsi3
-#endif /* L_divsi3 */
-
-#ifdef L_umodsi3
-FUNC_START umodsi3
-       ;; Perform an unsiged division of r4 / r5 and places the remainder in r4.
-       ;; Does not handle overflow yet...
-       mov     r4, mdl
-       div0u   r5
-       DIV_BODY r5 32
-       mov     mdh, r4
-       ret
-FUNC_END umodsi3
-#endif /* L_umodsi3 */
-
-#ifdef L_modsi3
-FUNC_START modsi3
-       ;; Perform a siged division of r4 / r5 and place the remainder in r4.
-       ;; Does not handle overflow yet...
-       mov     r4, mdl
-       div0s   r5
-       DIV_BODY r5 32
-       div2    r5
-       div3
-       div4s
-       mov     mdh, r4
-       ret
-FUNC_END modsi3
-#endif /* L_modsi3 */
-
-#ifdef L_negsi2
-FUNC_START negsi2
-       ldi:8   #0, r0
-       sub     r4, r0
-       mov     r0, r4
-       ret
-FUNC_END negsi2
-#endif /* L_negsi2 */
-
-#ifdef L_one_cmplsi2
-FUNC_START one_cmplsi2
-       ldi:8   #0xff, r0
-       extsb   r0
-       eor     r0, r4
-       ret
-FUNC_END one_cmplsi2
-#endif /* L_one_cmplsi2 */
-
-
diff --git a/gcc/config/fr30/t-fr30 b/gcc/config/fr30/t-fr30

index 75009d4eb7054edbd478373550ae7e546dae1033..e37921681d0c6edc1562f925641c258f94b5726c 100644 (file)
--- a/gcc/config/fr30/t-fr30
+++ b/gcc/config/fr30/t-fr30
@@ -16,9 +16,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-LIB1ASMSRC    = fr30/lib1funcs.asm
-LIB1ASMFUNCS  = _udivsi3 _divsi3 _umodsi3 _modsi3
-
  # If any special flags are necessary when building libgcc2 put them here.
  #
  # TARGET_LIBGCC2_CFLAGS
diff --git a/gcc/config/frv/lib1funcs.asm b/gcc/config/frv/lib1funcs.asm

deleted file mode 100644 (file)

index d1ffcab..0000000
--- a/gcc/config/frv/lib1funcs.asm
+++ /dev/null
@@ -1,269 +0,0 @@
-/* Library functions.
-   Copyright (C) 2000, 2003, 2008, 2009 Free Software Foundation, Inc.
-   Contributed by Red Hat, Inc.
-  
-   This file is part of GCC.
-  
-   GCC is free software ; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-  
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY ; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-  
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <frv-asm.h>
-
-\f
-#ifdef L_cmpll
-/* icc0 = __cmpll (long long a, long long b)  */
-
-       .globl  EXT(__cmpll)
-       .type   EXT(__cmpll),@function
-       .text
-       .p2align 4
-EXT(__cmpll):
-       cmp     gr8, gr10, icc0
-       ckeq    icc0, cc4
-       P(ccmp) gr9, gr11, cc4, 1
-       ret
-.Lend:
-       .size   EXT(__cmpll),.Lend-EXT(__cmpll)
-#endif /* L_cmpll */
-\f
-#ifdef L_cmpf
-/* icc0 = __cmpf (float a, float b) */
-/* Note, because this function returns the result in ICC0, it means it can't
-   handle NaNs.  */
-
-       .globl  EXT(__cmpf)
-       .type   EXT(__cmpf),@function
-       .text
-       .p2align 4
-EXT(__cmpf):
-#ifdef __FRV_HARD_FLOAT__      /* floating point instructions available */
-       movgf   gr8, fr0
-       P(movgf) gr9, fr1
-       setlos  #1, gr8
-       fcmps   fr0, fr1, fcc0
-       P(fcklt) fcc0, cc0
-       fckeq   fcc0, cc1
-       csub    gr0, gr8, gr8, cc0, 1
-       cmov    gr0, gr8, cc1, 1
-       cmpi    gr8, 0, icc0
-       ret
-#else                          /* no floating point instructions available */
-       movsg   lr, gr4
-       addi    sp, #-16, sp
-       sti     gr4, @(sp, 8)
-       st      fp, @(sp, gr0)
-       mov     sp, fp
-       call    EXT(__cmpsf2)
-       cmpi    gr8, #0, icc0
-       ldi     @(sp, 8), gr4
-       movgs   gr4, lr
-       ld      @(sp,gr0), fp
-       addi    sp, #16, sp
-       ret
-#endif
-.Lend:
-       .size   EXT(__cmpf),.Lend-EXT(__cmpf)
-#endif
-\f
-#ifdef L_cmpd
-/* icc0 = __cmpd (double a, double b) */
-/* Note, because this function returns the result in ICC0, it means it can't
-   handle NaNs.  */
-
-       .globl  EXT(__cmpd)
-       .type   EXT(__cmpd),@function
-       .text
-       .p2align 4
-EXT(__cmpd):
-       movsg   lr, gr4
-       addi    sp, #-16, sp
-       sti     gr4, @(sp, 8)
-       st      fp, @(sp, gr0)
-       mov     sp, fp
-       call    EXT(__cmpdf2)
-       cmpi    gr8, #0, icc0
-       ldi     @(sp, 8), gr4
-       movgs   gr4, lr
-       ld      @(sp,gr0), fp
-       addi    sp, #16, sp
-       ret
-.Lend:
-       .size   EXT(__cmpd),.Lend-EXT(__cmpd)
-#endif
-\f
-#ifdef L_addll
-/* gr8,gr9 = __addll (long long a, long long b) */
-/* Note, gcc will never call this function, but it is present in case an
-   ABI program calls it.  */
-
-       .globl  EXT(__addll)
-       .type   EXT(__addll),@function
-       .text
-       .p2align
-EXT(__addll):
-       addcc   gr9, gr11, gr9, icc0
-       addx    gr8, gr10, gr8, icc0
-       ret
-.Lend:
-       .size   EXT(__addll),.Lend-EXT(__addll)
-#endif
-\f
-#ifdef L_subll
-/* gr8,gr9 = __subll (long long a, long long b) */
-/* Note, gcc will never call this function, but it is present in case an
-   ABI program calls it.  */
-
-       .globl  EXT(__subll)
-       .type   EXT(__subll),@function
-       .text
-       .p2align 4
-EXT(__subll):
-       subcc   gr9, gr11, gr9, icc0
-       subx    gr8, gr10, gr8, icc0
-       ret
-.Lend:
-       .size   EXT(__subll),.Lend-EXT(__subll)
-#endif
-\f
-#ifdef L_andll
-/* gr8,gr9 = __andll (long long a, long long b) */
-/* Note, gcc will never call this function, but it is present in case an
-   ABI program calls it.  */
-
-       .globl  EXT(__andll)
-       .type   EXT(__andll),@function
-       .text
-       .p2align 4
-EXT(__andll):
-       P(and)  gr9, gr11, gr9
-       P2(and) gr8, gr10, gr8
-       ret
-.Lend:
-       .size   EXT(__andll),.Lend-EXT(__andll)
-#endif
-\f
-#ifdef L_orll
-/* gr8,gr9 = __orll (long long a, long long b) */
-/* Note, gcc will never call this function, but it is present in case an
-   ABI program calls it.  */
-
-       .globl  EXT(__orll)
-       .type   EXT(__orll),@function
-       .text
-       .p2align 4
-EXT(__orll):
-       P(or)   gr9, gr11, gr9
-       P2(or)  gr8, gr10, gr8
-       ret
-.Lend:
-       .size   EXT(__orll),.Lend-EXT(__orll)
-#endif
-\f
-#ifdef L_xorll
-/* gr8,gr9 = __xorll (long long a, long long b) */
-/* Note, gcc will never call this function, but it is present in case an
-   ABI program calls it.  */
-
-       .globl  EXT(__xorll)
-       .type   EXT(__xorll),@function
-       .text
-       .p2align 4
-EXT(__xorll):
-       P(xor)  gr9, gr11, gr9
-       P2(xor) gr8, gr10, gr8
-       ret
-.Lend:
-       .size   EXT(__xorll),.Lend-EXT(__xorll)
-#endif
-\f
-#ifdef L_notll
-/* gr8,gr9 = __notll (long long a) */
-/* Note, gcc will never call this function, but it is present in case an
-   ABI program calls it.  */
-
-       .globl  EXT(__notll)
-       .type   EXT(__notll),@function
-       .text
-       .p2align 4
-EXT(__notll):
-       P(not)  gr9, gr9
-       P2(not) gr8, gr8
-       ret
-.Lend:
-       .size   EXT(__notll),.Lend-EXT(__notll)
-#endif
-\f
-#ifdef L_cmov
-/* (void) __cmov (char *dest, const char *src, size_t len) */
-/*
- * void __cmov (char *dest, const char *src, size_t len)
- * {
- *   size_t i;
- * 
- *   if (dest < src || dest > src+len)
- *     {
- *      for (i = 0; i < len; i++)
- *      dest[i] = src[i];
- *     }
- *   else
- *     {
- *      while (len-- > 0)
- *      dest[len] = src[len];
- *     }
- * }
- */
-
-       .globl  EXT(__cmov)
-       .type   EXT(__cmov),@function
-       .text
-       .p2align 4
-EXT(__cmov):
-       P(cmp)  gr8, gr9, icc0
-       add     gr9, gr10, gr4
-       P(cmp)  gr8, gr4, icc1
-       bc      icc0, 0, .Lfwd
-       bls     icc1, 0, .Lback
-.Lfwd:
-       /* move bytes in a forward direction */
-       P(setlos) #0, gr5
-       cmp     gr0, gr10, icc0
-       P(subi) gr9, #1, gr9
-       P2(subi) gr8, #1, gr8
-       bnc     icc0, 0, .Lret
-.Lfloop:
-       /* forward byte move loop */
-       addi    gr5, #1, gr5
-       P(ldsb) @(gr9, gr5), gr4
-       cmp     gr5, gr10, icc0
-       P(stb)  gr4, @(gr8, gr5)
-       bc      icc0, 0, .Lfloop
-       ret
-.Lbloop:
-       /* backward byte move loop body */
-       ldsb    @(gr9,gr10),gr4
-       stb     gr4,@(gr8,gr10)
-.Lback:
-       P(cmpi) gr10, #0, icc0
-       addi    gr10, #-1, gr10
-       bne     icc0, 0, .Lbloop
-.Lret:
-       ret
-.Lend:
-       .size    EXT(__cmov),.Lend-EXT(__cmov)
-#endif
diff --git a/gcc/config/frv/t-frv b/gcc/config/frv/t-frv

index 03f3cd8cde1c9edaf352db4afd7eeadad5dc1c8d..e31f823c30ace42713fb0d43ec84f88c21dd2974 100644 (file)
--- a/gcc/config/frv/t-frv
+++ b/gcc/config/frv/t-frv
@@ -16,15 +16,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-# Name of assembly file containing libgcc1 functions.
-# This entry must be present, but it can be empty if the target does
-# not need any assembler functions to support its code generation.
-#
-# Alternatively if assembler functions *are* needed then define the
-# entries below:
-CROSS_LIBGCC1  = libgcc1-asm.a
-LIB1ASMSRC     = frv/lib1funcs.asm
-LIB1ASMFUNCS   = _cmpll _cmpf _cmpd _addll _subll _andll _orll _xorll _notll _cmov
  LIB2FUNCS_EXTRA        = cmovh.c cmovw.c cmovd.c modi.c umodi.c uitof.c uitod.c ulltof.c ulltod.c
  
  # If any special flags are necessary when building libgcc2 put them here.
diff --git a/gcc/config/h8300/fixunssfsi.c b/gcc/config/h8300/fixunssfsi.c

index 2fe62b7a1a84957502d03d958bae289fcee70d0e..940d0c6dc6aff23552868c899595aee55d658d80 100644 (file)
--- a/gcc/config/h8300/fixunssfsi.c
+++ b/gcc/config/h8300/fixunssfsi.c
@@ -1,6 +1,6 @@
  /* More subroutines needed by GCC output code on some machines.  */
  /* Compile this one with gcc.  */
-/* Copyright (C) 1989, 1992, 2001, 2002, 2003, 2004, 2009
+/* Copyright (C) 1989, 1992, 2001, 2002, 2003, 2004, 2009, 2011
     Free Software Foundation, Inc.
  
  This file is part of GCC.
@@ -26,7 +26,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  
  /* The libgcc2.c implementation gets confused by our type setup and creates
     a directly recursive call, so we do our own implementation.  For
-   the H8/300, that's in lib1funcs.asm, for H8/300H and H8S, it's here.  */
+   the H8/300, that's in lib1funcs.S, for H8/300H and H8S, it's here.  */
  
  #ifndef __H8300__
  long __fixunssfsi (float a);
diff --git a/gcc/config/h8300/lib1funcs.asm b/gcc/config/h8300/lib1funcs.asm

deleted file mode 100644 (file)

index 1b75b73..0000000
--- a/gcc/config/h8300/lib1funcs.asm
+++ /dev/null
@@ -1,838 +0,0 @@
-;; libgcc routines for the Renesas H8/300 CPU.
-;; Contributed by Steve Chamberlain <sac@cygnus.com>
-;; Optimizations by Toshiyasu Morita <toshiyasu.morita@renesas.com>
-
-/* Copyright (C) 1994, 2000, 2001, 2002, 2003, 2004, 2009
-   Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/* Assembler register definitions.  */
-
-#define A0 r0
-#define A0L r0l
-#define A0H r0h
-
-#define A1 r1
-#define A1L r1l
-#define A1H r1h
-
-#define A2 r2
-#define A2L r2l
-#define A2H r2h
-
-#define A3 r3
-#define A3L r3l
-#define A3H r3h
-
-#define S0 r4
-#define S0L r4l
-#define S0H r4h
-
-#define S1 r5
-#define S1L r5l
-#define S1H r5h
-
-#define S2 r6
-#define S2L r6l
-#define S2H r6h
-
-#ifdef __H8300__
-#define PUSHP  push
-#define POPP   pop
-
-#define A0P    r0
-#define A1P    r1
-#define A2P    r2
-#define A3P    r3
-#define S0P    r4
-#define S1P    r5
-#define S2P    r6
-#endif
-
-#if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__)
-#define PUSHP  push.l
-#define POPP   pop.l
-
-#define A0P    er0
-#define A1P    er1
-#define A2P    er2
-#define A3P    er3
-#define S0P    er4
-#define S1P    er5
-#define S2P    er6
-
-#define A0E    e0
-#define A1E    e1
-#define A2E    e2
-#define A3E    e3
-#endif
-
-#ifdef __H8300H__
-#ifdef __NORMAL_MODE__
-       .h8300hn
-#else
-       .h8300h
-#endif
-#endif
-
-#ifdef __H8300S__
-#ifdef __NORMAL_MODE__
-       .h8300sn
-#else
-       .h8300s
-#endif
-#endif
-#ifdef __H8300SX__
-#ifdef __NORMAL_MODE__
-       .h8300sxn
-#else
-       .h8300sx
-#endif
-#endif
-
-#ifdef L_cmpsi2
-#ifdef __H8300__
-       .section .text
-       .align 2
-       .global ___cmpsi2
-___cmpsi2:
-       cmp.w   A0,A2
-       bne     .L2
-       cmp.w   A1,A3
-       bne     .L4
-       mov.w   #1,A0
-       rts
-.L2:
-       bgt     .L5
-.L3:
-       mov.w   #2,A0
-       rts
-.L4:
-       bls     .L3
-.L5:
-       sub.w   A0,A0
-       rts
-       .end
-#endif
-#endif /* L_cmpsi2 */
-
-#ifdef L_ucmpsi2
-#ifdef __H8300__
-       .section .text
-       .align 2
-       .global ___ucmpsi2
-___ucmpsi2:
-       cmp.w   A0,A2
-       bne     .L2
-       cmp.w   A1,A3
-       bne     .L4
-       mov.w   #1,A0
-       rts
-.L2:
-       bhi     .L5
-.L3:
-       mov.w   #2,A0
-       rts
-.L4:
-       bls     .L3
-.L5:
-       sub.w   A0,A0
-       rts
-       .end
-#endif
-#endif /* L_ucmpsi2 */
-
-#ifdef L_divhi3
-
-;; HImode divides for the H8/300.
-;; We bunch all of this into one object file since there are several
-;; "supporting routines".
-
-; general purpose normalize routine
-;
-; divisor in A0
-; dividend in A1
-; turns both into +ve numbers, and leaves what the answer sign
-; should be in A2L
-
-#ifdef __H8300__
-       .section .text
-       .align 2
-divnorm:
-       or      A0H,A0H         ; is divisor > 0
-       stc     ccr,A2L
-       bge     _lab1
-       not     A0H             ; no - then make it +ve
-       not     A0L
-       adds    #1,A0
-_lab1: or      A1H,A1H ; look at dividend
-       bge     _lab2
-       not     A1H             ; it is -ve, make it positive
-       not     A1L
-       adds    #1,A1
-       xor     #0x8,A2L; and toggle sign of result
-_lab2: rts
-;; Basically the same, except that the sign of the divisor determines
-;; the sign.
-modnorm:
-       or      A0H,A0H         ; is divisor > 0
-       stc     ccr,A2L
-       bge     _lab7
-       not     A0H             ; no - then make it +ve
-       not     A0L
-       adds    #1,A0
-_lab7: or      A1H,A1H ; look at dividend
-       bge     _lab8
-       not     A1H             ; it is -ve, make it positive
-       not     A1L
-       adds    #1,A1
-_lab8: rts
-
-; A0=A0/A1 signed
-
-       .global ___divhi3
-___divhi3:
-       bsr     divnorm
-       bsr     ___udivhi3
-negans:        btst    #3,A2L  ; should answer be negative ?
-       beq     _lab4
-       not     A0H     ; yes, so make it so
-       not     A0L
-       adds    #1,A0
-_lab4: rts
-
-; A0=A0%A1 signed
-
-       .global ___modhi3
-___modhi3:
-       bsr     modnorm
-       bsr     ___udivhi3
-       mov     A3,A0
-       bra     negans
-
-; A0=A0%A1 unsigned
-
-       .global ___umodhi3
-___umodhi3:
-       bsr     ___udivhi3
-       mov     A3,A0
-       rts
-
-; A0=A0/A1 unsigned
-; A3=A0%A1 unsigned
-; A2H trashed
-; D high 8 bits of denom
-; d low 8 bits of denom
-; N high 8 bits of num
-; n low 8 bits of num
-; M high 8 bits of mod
-; m low 8 bits of mod
-; Q high 8 bits of quot
-; q low 8 bits of quot
-; P preserve
-
-; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
-; see how to partition up the expression.
-
-       .global ___udivhi3
-___udivhi3:
-                               ; A0 A1 A2 A3
-                               ; Nn Dd       P
-       sub.w   A3,A3           ; Nn Dd xP 00
-       or      A1H,A1H
-       bne     divlongway
-       or      A0H,A0H
-       beq     _lab6
-
-; we know that D == 0 and N is != 0
-       mov.b   A0H,A3L         ; Nn Dd xP 0N
-       divxu   A1L,A3          ;          MQ
-       mov.b   A3L,A0H         ; Q
-; dealt with N, do n
-_lab6: mov.b   A0L,A3L         ;           n
-       divxu   A1L,A3          ;          mq
-       mov.b   A3L,A0L         ; Qq
-       mov.b   A3H,A3L         ;           m
-       mov.b   #0x0,A3H        ; Qq       0m
-       rts
-
-; D != 0 - which means the denominator is
-;          loop around to get the result.
-
-divlongway:
-       mov.b   A0H,A3L         ; Nn Dd xP 0N
-       mov.b   #0x0,A0H        ; high byte of answer has to be zero
-       mov.b   #0x8,A2H        ;       8
-div8:  add.b   A0L,A0L         ; n*=2
-       rotxl   A3L             ; Make remainder bigger
-       rotxl   A3H
-       sub.w   A1,A3           ; Q-=N
-       bhs     setbit          ; set a bit ?
-       add.w   A1,A3           ;  no : too far , Q+=N
-
-       dec     A2H
-       bne     div8            ; next bit
-       rts
-
-setbit:        inc     A0L             ; do insert bit
-       dec     A2H
-       bne     div8            ; next bit
-       rts
-
-#endif /* __H8300__ */
-#endif /* L_divhi3 */
-
-#ifdef L_divsi3
-
-;; 4 byte integer divides for the H8/300.
-;;
-;; We have one routine which does all the work and lots of
-;; little ones which prepare the args and massage the sign.
-;; We bunch all of this into one object file since there are several
-;; "supporting routines".
-
-       .section .text
-       .align 2
-
-; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
-; This function is here to keep branch displacements small.
-
-#ifdef __H8300__
-
-divnorm:
-       mov.b   A0H,A0H         ; is the numerator -ve
-       stc     ccr,S2L         ; keep the sign in bit 3 of S2L
-       bge     postive
-
-       ; negate arg
-       not     A0H
-       not     A1H
-       not     A0L
-       not     A1L
-
-       add     #1,A1L
-       addx    #0,A1H
-       addx    #0,A0L
-       addx    #0,A0H
-postive:
-       mov.b   A2H,A2H         ; is the denominator -ve
-       bge     postive2
-       not     A2L
-       not     A2H
-       not     A3L
-       not     A3H
-       add.b   #1,A3L
-       addx    #0,A3H
-       addx    #0,A2L
-       addx    #0,A2H
-       xor.b   #0x08,S2L       ; toggle the result sign
-postive2:
-       rts
-
-;; Basically the same, except that the sign of the divisor determines
-;; the sign.
-modnorm:
-       mov.b   A0H,A0H         ; is the numerator -ve
-       stc     ccr,S2L         ; keep the sign in bit 3 of S2L
-       bge     mpostive
-
-       ; negate arg
-       not     A0H
-       not     A1H
-       not     A0L
-       not     A1L
-
-       add     #1,A1L
-       addx    #0,A1H
-       addx    #0,A0L
-       addx    #0,A0H
-mpostive:
-       mov.b   A2H,A2H         ; is the denominator -ve
-       bge     mpostive2
-       not     A2L
-       not     A2H
-       not     A3L
-       not     A3H
-       add.b   #1,A3L
-       addx    #0,A3H
-       addx    #0,A2L
-       addx    #0,A2H
-mpostive2:
-       rts
-
-#else /* __H8300H__ */
-
-divnorm:
-       mov.l   A0P,A0P         ; is the numerator -ve
-       stc     ccr,S2L         ; keep the sign in bit 3 of S2L
-       bge     postive
-
-       neg.l   A0P             ; negate arg
-
-postive:
-       mov.l   A1P,A1P         ; is the denominator -ve
-       bge     postive2
-
-       neg.l   A1P             ; negate arg
-       xor.b   #0x08,S2L       ; toggle the result sign
-
-postive2:
-       rts
-
-;; Basically the same, except that the sign of the divisor determines
-;; the sign.
-modnorm:
-       mov.l   A0P,A0P         ; is the numerator -ve
-       stc     ccr,S2L         ; keep the sign in bit 3 of S2L
-       bge     mpostive
-
-       neg.l   A0P             ; negate arg
-
-mpostive:
-       mov.l   A1P,A1P         ; is the denominator -ve
-       bge     mpostive2
-
-       neg.l   A1P             ; negate arg
-
-mpostive2:
-       rts
-
-#endif
-
-; numerator in A0/A1
-; denominator in A2/A3
-       .global ___modsi3
-___modsi3:
-#ifdef __H8300__
-       PUSHP   S2P
-       PUSHP   S0P
-       PUSHP   S1P
-       bsr     modnorm
-       bsr     divmodsi4
-       mov     S0,A0
-       mov     S1,A1
-       bra     exitdiv
-#else
-       PUSHP   S2P
-       bsr     modnorm
-       bsr     ___udivsi3
-       mov.l   er3,er0
-       bra     exitdiv
-#endif
-
-       ;; H8/300H and H8S version of ___udivsi3 is defined later in
-       ;; the file.
-#ifdef __H8300__
-       .global ___udivsi3
-___udivsi3:
-       PUSHP   S2P
-       PUSHP   S0P
-       PUSHP   S1P
-       bsr     divmodsi4
-       bra     reti
-#endif
-
-       .global ___umodsi3
-___umodsi3:
-#ifdef __H8300__
-       PUSHP   S2P
-       PUSHP   S0P
-       PUSHP   S1P
-       bsr     divmodsi4
-       mov     S0,A0
-       mov     S1,A1
-       bra     reti
-#else
-       bsr     ___udivsi3
-       mov.l   er3,er0
-       rts
-#endif
-
-       .global ___divsi3
-___divsi3:
-#ifdef __H8300__
-       PUSHP   S2P
-       PUSHP   S0P
-       PUSHP   S1P
-       jsr     divnorm
-       jsr     divmodsi4
-#else
-       PUSHP   S2P
-       jsr     divnorm
-       bsr     ___udivsi3
-#endif
-
-       ; examine what the sign should be
-exitdiv:
-       btst    #3,S2L
-       beq     reti
-
-       ; should be -ve
-#ifdef __H8300__
-       not     A0H
-       not     A1H
-       not     A0L
-       not     A1L
-
-       add     #1,A1L
-       addx    #0,A1H
-       addx    #0,A0L
-       addx    #0,A0H
-#else /* __H8300H__ */
-       neg.l   A0P
-#endif
-
-reti:
-#ifdef __H8300__
-       POPP    S1P
-       POPP    S0P
-#endif
-       POPP    S2P
-       rts
-
-       ; takes A0/A1 numerator (A0P for H8/300H)
-       ; A2/A3 denominator (A1P for H8/300H)
-       ; returns A0/A1 quotient (A0P for H8/300H)
-       ; S0/S1 remainder (S0P for H8/300H)
-       ; trashes S2H
-
-#ifdef __H8300__
-
-divmodsi4:
-        sub.w  S0,S0           ; zero play area
-        mov.w  S0,S1
-        mov.b  A2H,S2H
-        or     A2L,S2H
-        or     A3H,S2H
-        bne    DenHighNonZero
-        mov.b  A0H,A0H
-        bne    NumByte0Zero
-        mov.b  A0L,A0L
-        bne    NumByte1Zero
-        mov.b  A1H,A1H
-        bne    NumByte2Zero
-        bra    NumByte3Zero
-NumByte0Zero:
-       mov.b   A0H,S1L
-        divxu  A3L,S1
-        mov.b  S1L,A0H
-NumByte1Zero:
-       mov.b   A0L,S1L
-        divxu  A3L,S1
-        mov.b  S1L,A0L
-NumByte2Zero:
-       mov.b   A1H,S1L
-        divxu  A3L,S1
-        mov.b  S1L,A1H
-NumByte3Zero:
-       mov.b   A1L,S1L
-        divxu  A3L,S1
-        mov.b  S1L,A1L
-
-        mov.b  S1H,S1L
-        mov.b  #0x0,S1H
-        rts
-
-; have to do the divide by shift and test
-DenHighNonZero:
-       mov.b   A0H,S1L
-        mov.b  A0L,A0H
-        mov.b  A1H,A0L
-        mov.b  A1L,A1H
-
-        mov.b  #0,A1L
-        mov.b  #24,S2H ; only do 24 iterations
-
-nextbit:
-       add.w   A1,A1   ; double the answer guess
-        rotxl  A0L
-        rotxl  A0H
-
-        rotxl  S1L     ; double remainder
-        rotxl  S1H
-        rotxl  S0L
-        rotxl  S0H
-        sub.w  A3,S1   ; does it all fit
-        subx   A2L,S0L
-        subx   A2H,S0H
-        bhs    setone
-
-        add.w  A3,S1   ; no, restore mistake
-        addx   A2L,S0L
-        addx   A2H,S0H
-
-        dec    S2H
-        bne    nextbit
-        rts
-
-setone:
-       inc     A1L
-        dec    S2H
-        bne    nextbit
-        rts
-
-#else /* __H8300H__ */
-
-       ;; This function also computes the remainder and stores it in er3.
-       .global ___udivsi3
-___udivsi3:
-       mov.w   A1E,A1E         ; denominator top word 0?
-       bne     DenHighNonZero
-
-       ; do it the easy way, see page 107 in manual
-       mov.w   A0E,A2
-       extu.l  A2P
-       divxu.w A1,A2P
-       mov.w   A2E,A0E
-       divxu.w A1,A0P
-       mov.w   A0E,A3
-       mov.w   A2,A0E
-       extu.l  A3P
-       rts
-
-       ; er0 = er0 / er1
-       ; er3 = er0 % er1
-       ; trashes er1 er2
-       ; expects er1 >= 2^16
-DenHighNonZero:
-       mov.l   er0,er3
-       mov.l   er1,er2
-#ifdef __H8300H__
-divmod_L21:
-       shlr.l  er0
-       shlr.l  er2             ; make divisor < 2^16
-       mov.w   e2,e2
-       bne     divmod_L21
-#else
-       shlr.l  #2,er2          ; make divisor < 2^16
-       mov.w   e2,e2
-       beq     divmod_L22A
-divmod_L21:
-       shlr.l  #2,er0
-divmod_L22:
-       shlr.l  #2,er2          ; make divisor < 2^16
-       mov.w   e2,e2
-       bne     divmod_L21
-divmod_L22A:
-       rotxl.w r2
-       bcs     divmod_L23
-       shlr.l  er0
-       bra     divmod_L24
-divmod_L23:
-       rotxr.w r2
-       shlr.l  #2,er0
-divmod_L24:
-#endif
-       ;; At this point,
-       ;;  er0 contains shifted dividend
-       ;;  er1 contains divisor
-       ;;  er2 contains shifted divisor
-       ;;  er3 contains dividend, later remainder
-       divxu.w r2,er0          ; r0 now contains the approximate quotient (AQ)
-       extu.l  er0
-       beq     divmod_L25
-       subs    #1,er0          ; er0 = AQ - 1
-       mov.w   e1,r2
-       mulxu.w r0,er2          ; er2 = upper (AQ - 1) * divisor
-       sub.w   r2,e3           ; dividend - 65536 * er2
-       mov.w   r1,r2
-       mulxu.w r0,er2          ; compute er3 = remainder (tentative)
-       sub.l   er2,er3         ; er3 = dividend - (AQ - 1) * divisor
-divmod_L25:
-       cmp.l   er1,er3         ; is divisor < remainder?
-       blo     divmod_L26
-       adds    #1,er0
-       sub.l   er1,er3         ; correct the remainder
-divmod_L26:
-       rts
-
-#endif
-#endif /* L_divsi3 */
-
-#ifdef L_mulhi3
-
-;; HImode multiply.
-; The H8/300 only has an 8*8->16 multiply.
-; The answer is the same as:
-;
-; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
-; (we can ignore A1.h * A0.h cause that will all off the top)
-; A0 in
-; A1 in
-; A0 answer
-
-#ifdef __H8300__
-       .section .text
-       .align 2
-       .global ___mulhi3
-___mulhi3:
-       mov.b   A1L,A2L         ; A2l gets srcb.l
-       mulxu   A0L,A2          ; A2 gets first sub product
-
-       mov.b   A0H,A3L         ; prepare for
-       mulxu   A1L,A3          ; second sub product
-
-       add.b   A3L,A2H         ; sum first two terms
-
-       mov.b   A1H,A3L         ; third sub product
-       mulxu   A0L,A3
-
-       add.b   A3L,A2H         ; almost there
-       mov.w   A2,A0           ; that is
-       rts
-
-#endif
-#endif /* L_mulhi3 */
-
-#ifdef L_mulsi3
-
-;; SImode multiply.
-;;
-;; I think that shift and add may be sufficient for this.  Using the
-;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead.  This way
-;; the inner loop uses maybe 20 cycles + overhead, but terminates
-;; quickly on small args.
-;;
-;; A0/A1 src_a
-;; A2/A3 src_b
-;;
-;;  while (a)
-;;    {
-;;      if (a & 1)
-;;        r += b;
-;;      a >>= 1;
-;;      b <<= 1;
-;;    }
-
-       .section .text
-       .align 2
-
-#ifdef __H8300__
-
-       .global ___mulsi3
-___mulsi3:
-       PUSHP   S0P
-       PUSHP   S1P
-
-       sub.w   S0,S0
-       sub.w   S1,S1
-
-       ; while (a)
-_top:  mov.w   A0,A0
-       bne     _more
-       mov.w   A1,A1
-       beq     _done
-_more: ; if (a & 1)
-       bld     #0,A1L
-       bcc     _nobit
-       ; r += b
-       add.w   A3,S1
-       addx    A2L,S0L
-       addx    A2H,S0H
-_nobit:
-       ; a >>= 1
-       shlr    A0H
-       rotxr   A0L
-       rotxr   A1H
-       rotxr   A1L
-
-       ; b <<= 1
-       add.w   A3,A3
-       addx    A2L,A2L
-       addx    A2H,A2H
-       bra     _top
-
-_done:
-       mov.w   S0,A0
-       mov.w   S1,A1
-       POPP    S1P
-       POPP    S0P
-       rts
-
-#else /* __H8300H__ */
-
-;
-; mulsi3 for H8/300H - based on Renesas SH implementation
-;
-; by Toshiyasu Morita
-;
-; Old code:
-;
-; 16b * 16b = 372 states (worst case)
-; 32b * 32b = 724 states (worst case)
-;
-; New code:
-;
-; 16b * 16b =  48 states
-; 16b * 32b =  72 states
-; 32b * 32b =  92 states
-;
-
-       .global ___mulsi3
-___mulsi3:
-       mov.w   r1,r2   ; ( 2 states) b * d
-       mulxu   r0,er2  ; (22 states)
-
-       mov.w   e0,r3   ; ( 2 states) a * d
-       beq     L_skip1 ; ( 4 states)
-       mulxu   r1,er3  ; (22 states)
-       add.w   r3,e2   ; ( 2 states)
-
-L_skip1:
-       mov.w   e1,r3   ; ( 2 states) c * b
-       beq     L_skip2 ; ( 4 states)
-       mulxu   r0,er3  ; (22 states)
-       add.w   r3,e2   ; ( 2 states)
-
-L_skip2:
-       mov.l   er2,er0 ; ( 2 states)
-       rts             ; (10 states)
-
-#endif
-#endif /* L_mulsi3 */
-#ifdef L_fixunssfsi_asm
-/* For the h8300 we use asm to save some bytes, to
-   allow more programs to fit into the tiny address
-   space.  For the H8/300H and H8S, the C version is good enough.  */
-#ifdef __H8300__
-/* We still treat NANs different than libgcc2.c, but then, the
-   behavior is undefined anyways.  */
-       .global ___fixunssfsi
-___fixunssfsi:
-       cmp.b #0x4f,r0h
-       bge Large_num
-       jmp     @___fixsfsi
-Large_num:
-       bhi L_huge_num
-       xor.b #0x80,A0L
-       bmi L_shift8
-L_huge_num:
-       mov.w #65535,A0
-       mov.w A0,A1
-       rts
-L_shift8:
-       mov.b A0L,A0H
-       mov.b A1H,A0L
-       mov.b A1L,A1H
-       mov.b #0,A1L
-       rts
-#endif
-#endif /* L_fixunssfsi_asm */
diff --git a/gcc/config/h8300/t-h8300 b/gcc/config/h8300/t-h8300

index 616849007b461afe8e4d64e13d4ca0bbd040f8a7..7083c673acf84fb31e0be893e1d132bf9c367a1b 100644 (file)
--- a/gcc/config/h8300/t-h8300
+++ b/gcc/config/h8300/t-h8300
@@ -17,10 +17,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-LIB1ASMSRC = h8300/lib1funcs.asm
-LIB1ASMFUNCS = _cmpsi2 _ucmpsi2 _divhi3 _divsi3 _mulhi3 _mulsi3 \
-  _fixunssfsi_asm
-
  LIB2FUNCS_EXTRA = \
         $(srcdir)/config/h8300/clzhi2.c \
         $(srcdir)/config/h8300/ctzhi2.c \
diff --git a/gcc/config/i386/cygwin.asm b/gcc/config/i386/cygwin.asm

deleted file mode 100644 (file)

index 8f9c486..0000000
--- a/gcc/config/i386/cygwin.asm
+++ /dev/null
@@ -1,188 +0,0 @@
-/* stuff needed for libgcc on win32.
- *
- *   Copyright (C) 1996, 1998, 2001, 2003, 2008, 2009, 2010
- *   Free Software Foundation, Inc.
- *   Written By Steve Chamberlain
- * 
- * This file is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option) any
- * later version.
- * 
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- * 
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- * 
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
- * <http://www.gnu.org/licenses/>.
- */
-
-#include "auto-host.h"
-
-#ifdef HAVE_GAS_CFI_SECTIONS_DIRECTIVE
-       .cfi_sections   .debug_frame
-# define cfi_startproc()               .cfi_startproc
-# define cfi_endproc()                 .cfi_endproc
-# define cfi_adjust_cfa_offset(X)      .cfi_adjust_cfa_offset X
-# define cfi_def_cfa_register(X)       .cfi_def_cfa_register X
-# define cfi_register(D,S)             .cfi_register D, S
-# ifdef _WIN64
-#  define cfi_push(X)          .cfi_adjust_cfa_offset 8; .cfi_rel_offset X, 0
-#  define cfi_pop(X)           .cfi_adjust_cfa_offset -8; .cfi_restore X
-# else
-#  define cfi_push(X)          .cfi_adjust_cfa_offset 4; .cfi_rel_offset X, 0
-#  define cfi_pop(X)           .cfi_adjust_cfa_offset -4; .cfi_restore X
-# endif
-#else
-# define cfi_startproc()
-# define cfi_endproc()
-# define cfi_adjust_cfa_offset(X)
-# define cfi_def_cfa_register(X)
-# define cfi_register(D,S)
-# define cfi_push(X)
-# define cfi_pop(X)
-#endif /* HAVE_GAS_CFI_SECTIONS_DIRECTIVE */
-
-#ifdef L_chkstk
-/* Function prologue calls __chkstk to probe the stack when allocating more
-   than CHECK_STACK_LIMIT bytes in one go.  Touching the stack at 4K
-   increments is necessary to ensure that the guard pages used
-   by the OS virtual memory manger are allocated in correct sequence.  */
-
-       .global ___chkstk
-       .global __alloca
-#ifdef _WIN64
-/* __alloca is a normal function call, which uses %rcx as the argument.  */
-       cfi_startproc()
-__alloca:
-       movq    %rcx, %rax
-       /* FALLTHRU */
-
-/* ___chkstk is a *special* function call, which uses %rax as the argument.
-   We avoid clobbering the 4 integer argument registers, %rcx, %rdx, 
-   %r8 and %r9, which leaves us with %rax, %r10, and %r11 to use.  */
-       .align  4
-___chkstk:
-       popq    %r11                    /* pop return address */
-       cfi_adjust_cfa_offset(-8)       /* indicate return address in r11 */
-       cfi_register(%rip, %r11)
-       movq    %rsp, %r10
-       cmpq    $0x1000, %rax           /* > 4k ?*/
-       jb      2f
-
-1:     subq    $0x1000, %r10           /* yes, move pointer down 4k*/
-       orl     $0x0, (%r10)            /* probe there */
-       subq    $0x1000, %rax           /* decrement count */
-       cmpq    $0x1000, %rax
-       ja      1b                      /* and do it again */
-
-2:     subq    %rax, %r10
-       movq    %rsp, %rax              /* hold CFA until return */
-       cfi_def_cfa_register(%rax)
-       orl     $0x0, (%r10)            /* less than 4k, just peek here */
-       movq    %r10, %rsp              /* decrement stack */
-
-       /* Push the return value back.  Doing this instead of just
-          jumping to %r11 preserves the cached call-return stack
-          used by most modern processors.  */
-       pushq   %r11
-       ret
-       cfi_endproc()
-#else
-       cfi_startproc()
-___chkstk:
-__alloca:
-       pushl   %ecx                    /* save temp */
-       cfi_push(%eax)
-       leal    8(%esp), %ecx           /* point past return addr */
-       cmpl    $0x1000, %eax           /* > 4k ?*/
-       jb      2f
-
-1:     subl    $0x1000, %ecx           /* yes, move pointer down 4k*/
-       orl     $0x0, (%ecx)            /* probe there */
-       subl    $0x1000, %eax           /* decrement count */
-       cmpl    $0x1000, %eax
-       ja      1b                      /* and do it again */
-
-2:     subl    %eax, %ecx         
-       orl     $0x0, (%ecx)            /* less than 4k, just peek here */
-       movl    %esp, %eax              /* save current stack pointer */
-       cfi_def_cfa_register(%eax)
-       movl    %ecx, %esp              /* decrement stack */
-       movl    (%eax), %ecx            /* recover saved temp */
-
-       /* Copy the return register.  Doing this instead of just jumping to
-          the address preserves the cached call-return stack used by most
-          modern processors.  */
-       pushl   4(%eax)
-       ret
-       cfi_endproc()
-#endif /* _WIN64 */
-#endif /* L_chkstk */
-
-#ifdef L_chkstk_ms
-/* ___chkstk_ms is a *special* function call, which uses %rax as the argument.
-   We avoid clobbering any registers.  Unlike ___chkstk, it just probes the
-   stack and does no stack allocation.  */
-       .global ___chkstk_ms
-#ifdef _WIN64
-       cfi_startproc()
-___chkstk_ms:
-       pushq   %rcx                    /* save temps */
-       cfi_push(%rcx)
-       pushq   %rax
-       cfi_push(%rax)
-       cmpq    $0x1000, %rax           /* > 4k ?*/
-       leaq    24(%rsp), %rcx          /* point past return addr */
-       jb      2f
-
-1:     subq    $0x1000, %rcx           /* yes, move pointer down 4k */
-       orq     $0x0, (%rcx)            /* probe there */
-       subq    $0x1000, %rax           /* decrement count */
-       cmpq    $0x1000, %rax
-       ja      1b                      /* and do it again */
-
-2:     subq    %rax, %rcx
-       orq     $0x0, (%rcx)            /* less than 4k, just peek here */
-
-       popq    %rax
-       cfi_pop(%rax)
-       popq    %rcx
-       cfi_pop(%rcx)
-       ret
-       cfi_endproc()
-#else
-       cfi_startproc()
-___chkstk_ms:
-       pushl   %ecx                    /* save temp */
-       cfi_push(%ecx)
-       pushl   %eax
-       cfi_push(%eax)
-       cmpl    $0x1000, %eax           /* > 4k ?*/
-       leal    12(%esp), %ecx          /* point past return addr */
-       jb      2f
-
-1:     subl    $0x1000, %ecx           /* yes, move pointer down 4k*/
-       orl     $0x0, (%ecx)            /* probe there */
-       subl    $0x1000, %eax           /* decrement count */
-       cmpl    $0x1000, %eax
-       ja      1b                      /* and do it again */
-
-2:     subl    %eax, %ecx
-       orl     $0x0, (%ecx)            /* less than 4k, just peek here */
-
-       popl    %eax
-       cfi_pop(%eax)
-       popl    %ecx
-       cfi_pop(%ecx)
-       ret
-       cfi_endproc()
-#endif /* _WIN64 */
-#endif /* L_chkstk_ms */
diff --git a/gcc/config/i386/t-cygming b/gcc/config/i386/t-cygming

index 242d7f27f65f7542bdbe48136ba7992ecddc2b50..3e7f7cdd0363b082a5ae4daefa35bb6f83d6a30d 100644 (file)
--- a/gcc/config/i386/t-cygming
+++ b/gcc/config/i386/t-cygming
@@ -17,9 +17,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-LIB1ASMSRC = i386/cygwin.asm
-LIB1ASMFUNCS = _chkstk _chkstk_ms
-
  # cygwin and mingw always have a limits.h, but, depending upon how we are
  # doing the build, it may not be installed yet.
  LIMITS_H_TEST = true
diff --git a/gcc/config/i386/t-interix b/gcc/config/i386/t-interix

index e7b016f1e7a002a7b50a8e2a0f55015f137ffd84..09c9127f6af4748a37518ebcb4972335616fd598 100644 (file)
--- a/gcc/config/i386/t-interix
+++ b/gcc/config/i386/t-interix
@@ -1,6 +1,3 @@
-LIB1ASMSRC = i386/cygwin.asm
-LIB1ASMFUNCS = _chkstk _chkstk_ms
-
  winnt.o: $(srcdir)/config/i386/winnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
    $(TM_P_H) $(HASHTAB_H) $(GGC_H)
diff --git a/gcc/config/ia64/lib1funcs.asm b/gcc/config/ia64/lib1funcs.asm

deleted file mode 100644 (file)

index b7eaa6e..0000000
--- a/gcc/config/ia64/lib1funcs.asm
+++ /dev/null
@@ -1,795 +0,0 @@
-/* Copyright (C) 2000, 2001, 2003, 2005, 2009 Free Software Foundation, Inc.
-   Contributed by James E. Wilson <wilson@cygnus.com>.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifdef L__divxf3
-// Compute a 80-bit IEEE double-extended quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// farg0 holds the dividend.  farg1 holds the divisor.
-//
-// __divtf3 is an alternate symbol name for backward compatibility.
-
-       .text
-       .align 16
-       .global __divxf3
-       .proc __divxf3
-__divxf3:
-#ifdef SHARED
-       .global __divtf3
-__divtf3:
-#endif
-       cmp.eq p7, p0 = r0, r0
-       frcpa.s0 f10, p6 = farg0, farg1
-       ;;
-(p6)   cmp.ne p7, p0 = r0, r0
-       .pred.rel.mutex p6, p7
-(p6)   fnma.s1 f11 = farg1, f10, f1
-(p6)   fma.s1 f12 = farg0, f10, f0
-       ;;
-(p6)   fma.s1 f13 = f11, f11, f0
-(p6)   fma.s1 f14 = f11, f11, f11
-       ;;
-(p6)   fma.s1 f11 = f13, f13, f11
-(p6)   fma.s1 f13 = f14, f10, f10
-       ;;
-(p6)   fma.s1 f10 = f13, f11, f10
-(p6)   fnma.s1 f11 = farg1, f12, farg0
-       ;;
-(p6)   fma.s1 f11 = f11, f10, f12
-(p6)   fnma.s1 f12 = farg1, f10, f1
-       ;;
-(p6)   fma.s1 f10 = f12, f10, f10
-(p6)   fnma.s1 f12 = farg1, f11, farg0
-       ;;
-(p6)   fma.s0 fret0 = f12, f10, f11
-(p7)   mov fret0 = f10
-       br.ret.sptk rp
-       .endp __divxf3
-#endif
-
-#ifdef L__divdf3
-// Compute a 64-bit IEEE double quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// farg0 holds the dividend.  farg1 holds the divisor.
-
-       .text
-       .align 16
-       .global __divdf3
-       .proc __divdf3
-__divdf3:
-       cmp.eq p7, p0 = r0, r0
-       frcpa.s0 f10, p6 = farg0, farg1
-       ;;
-(p6)   cmp.ne p7, p0 = r0, r0
-       .pred.rel.mutex p6, p7
-(p6)   fmpy.s1 f11 = farg0, f10
-(p6)   fnma.s1 f12 = farg1, f10, f1
-       ;;
-(p6)   fma.s1 f11 = f12, f11, f11
-(p6)   fmpy.s1 f13 = f12, f12
-       ;;
-(p6)   fma.s1 f10 = f12, f10, f10
-(p6)   fma.s1 f11 = f13, f11, f11
-       ;;
-(p6)   fmpy.s1 f12 = f13, f13
-(p6)   fma.s1 f10 = f13, f10, f10
-       ;;
-(p6)   fma.d.s1 f11 = f12, f11, f11
-(p6)   fma.s1 f10 = f12, f10, f10
-       ;;
-(p6)   fnma.d.s1 f8 = farg1, f11, farg0
-       ;;
-(p6)   fma.d fret0 = f8, f10, f11
-(p7)   mov fret0 = f10
-       br.ret.sptk rp
-       ;;
-       .endp __divdf3
-#endif
-
-#ifdef L__divsf3
-// Compute a 32-bit IEEE float quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// farg0 holds the dividend.  farg1 holds the divisor.
-
-       .text
-       .align 16
-       .global __divsf3
-       .proc __divsf3
-__divsf3:
-       cmp.eq p7, p0 = r0, r0
-       frcpa.s0 f10, p6 = farg0, farg1
-       ;;
-(p6)   cmp.ne p7, p0 = r0, r0
-       .pred.rel.mutex p6, p7
-(p6)   fmpy.s1 f8 = farg0, f10
-(p6)   fnma.s1 f9 = farg1, f10, f1
-       ;;
-(p6)   fma.s1 f8 = f9, f8, f8
-(p6)   fmpy.s1 f9 = f9, f9
-       ;;
-(p6)   fma.s1 f8 = f9, f8, f8
-(p6)   fmpy.s1 f9 = f9, f9
-       ;;
-(p6)   fma.d.s1 f10 = f9, f8, f8
-       ;;
-(p6)   fnorm.s.s0 fret0 = f10
-(p7)   mov fret0 = f10
-       br.ret.sptk rp
-       ;;
-       .endp __divsf3
-#endif
-
-#ifdef L__divdi3
-// Compute a 64-bit integer quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend.  in1 holds the divisor.
-
-       .text
-       .align 16
-       .global __divdi3
-       .proc __divdi3
-__divdi3:
-       .regstk 2,0,0,0
-       // Transfer inputs to FP registers.
-       setf.sig f8 = in0
-       setf.sig f9 = in1
-       // Check divide by zero.
-       cmp.ne.unc p0,p7=0,in1
-       ;;
-       // Convert the inputs to FP, so that they won't be treated as unsigned.
-       fcvt.xf f8 = f8
-       fcvt.xf f9 = f9
-(p7)   break 1
-       ;;
-       // Compute the reciprocal approximation.
-       frcpa.s1 f10, p6 = f8, f9
-       ;;
-       // 3 Newton-Raphson iterations.
-(p6)   fnma.s1 f11 = f9, f10, f1
-(p6)   fmpy.s1 f12 = f8, f10
-       ;;
-(p6)   fmpy.s1 f13 = f11, f11
-(p6)   fma.s1 f12 = f11, f12, f12
-       ;;
-(p6)   fma.s1 f10 = f11, f10, f10
-(p6)   fma.s1 f11 = f13, f12, f12
-       ;;
-(p6)   fma.s1 f10 = f13, f10, f10
-(p6)   fnma.s1 f12 = f9, f11, f8
-       ;;
-(p6)   fma.s1 f10 = f12, f10, f11
-       ;;
-       // Round quotient to an integer.
-       fcvt.fx.trunc.s1 f10 = f10
-       ;;
-       // Transfer result to GP registers.
-       getf.sig ret0 = f10
-       br.ret.sptk rp
-       ;;
-       .endp __divdi3
-#endif
-
-#ifdef L__moddi3
-// Compute a 64-bit integer modulus.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend (a).  in1 holds the divisor (b).
-
-       .text
-       .align 16
-       .global __moddi3
-       .proc __moddi3
-__moddi3:
-       .regstk 2,0,0,0
-       // Transfer inputs to FP registers.
-       setf.sig f14 = in0
-       setf.sig f9 = in1
-       // Check divide by zero.
-       cmp.ne.unc p0,p7=0,in1
-       ;;
-       // Convert the inputs to FP, so that they won't be treated as unsigned.
-       fcvt.xf f8 = f14
-       fcvt.xf f9 = f9
-(p7)   break 1
-       ;;
-       // Compute the reciprocal approximation.
-       frcpa.s1 f10, p6 = f8, f9
-       ;;
-       // 3 Newton-Raphson iterations.
-(p6)   fmpy.s1 f12 = f8, f10
-(p6)   fnma.s1 f11 = f9, f10, f1
-       ;;
-(p6)   fma.s1 f12 = f11, f12, f12
-(p6)   fmpy.s1 f13 = f11, f11
-       ;;
-(p6)   fma.s1 f10 = f11, f10, f10
-(p6)   fma.s1 f11 = f13, f12, f12
-       ;;
-       sub in1 = r0, in1
-(p6)   fma.s1 f10 = f13, f10, f10
-(p6)   fnma.s1 f12 = f9, f11, f8
-       ;;
-       setf.sig f9 = in1
-(p6)   fma.s1 f10 = f12, f10, f11
-       ;;
-       fcvt.fx.trunc.s1 f10 = f10
-       ;;
-       // r = q * (-b) + a
-       xma.l f10 = f10, f9, f14
-       ;;
-       // Transfer result to GP registers.
-       getf.sig ret0 = f10
-       br.ret.sptk rp
-       ;;
-       .endp __moddi3
-#endif
-
-#ifdef L__udivdi3
-// Compute a 64-bit unsigned integer quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend.  in1 holds the divisor.
-
-       .text
-       .align 16
-       .global __udivdi3
-       .proc __udivdi3
-__udivdi3:
-       .regstk 2,0,0,0
-       // Transfer inputs to FP registers.
-       setf.sig f8 = in0
-       setf.sig f9 = in1
-       // Check divide by zero.
-       cmp.ne.unc p0,p7=0,in1
-       ;;
-       // Convert the inputs to FP, to avoid FP software-assist faults.
-       fcvt.xuf.s1 f8 = f8
-       fcvt.xuf.s1 f9 = f9
-(p7)   break 1
-       ;;
-       // Compute the reciprocal approximation.
-       frcpa.s1 f10, p6 = f8, f9
-       ;;
-       // 3 Newton-Raphson iterations.
-(p6)   fnma.s1 f11 = f9, f10, f1
-(p6)   fmpy.s1 f12 = f8, f10
-       ;;
-(p6)   fmpy.s1 f13 = f11, f11
-(p6)   fma.s1 f12 = f11, f12, f12
-       ;;
-(p6)   fma.s1 f10 = f11, f10, f10
-(p6)   fma.s1 f11 = f13, f12, f12
-       ;;
-(p6)   fma.s1 f10 = f13, f10, f10
-(p6)   fnma.s1 f12 = f9, f11, f8
-       ;;
-(p6)   fma.s1 f10 = f12, f10, f11
-       ;;
-       // Round quotient to an unsigned integer.
-       fcvt.fxu.trunc.s1 f10 = f10
-       ;;
-       // Transfer result to GP registers.
-       getf.sig ret0 = f10
-       br.ret.sptk rp
-       ;;
-       .endp __udivdi3
-#endif
-
-#ifdef L__umoddi3
-// Compute a 64-bit unsigned integer modulus.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend (a).  in1 holds the divisor (b).
-
-       .text
-       .align 16
-       .global __umoddi3
-       .proc __umoddi3
-__umoddi3:
-       .regstk 2,0,0,0
-       // Transfer inputs to FP registers.
-       setf.sig f14 = in0
-       setf.sig f9 = in1
-       // Check divide by zero.
-       cmp.ne.unc p0,p7=0,in1
-       ;;
-       // Convert the inputs to FP, to avoid FP software assist faults.
-       fcvt.xuf.s1 f8 = f14
-       fcvt.xuf.s1 f9 = f9
-(p7)   break 1;
-       ;;
-       // Compute the reciprocal approximation.
-       frcpa.s1 f10, p6 = f8, f9
-       ;;
-       // 3 Newton-Raphson iterations.
-(p6)   fmpy.s1 f12 = f8, f10
-(p6)   fnma.s1 f11 = f9, f10, f1
-       ;;
-(p6)   fma.s1 f12 = f11, f12, f12
-(p6)   fmpy.s1 f13 = f11, f11
-       ;;
-(p6)   fma.s1 f10 = f11, f10, f10
-(p6)   fma.s1 f11 = f13, f12, f12
-       ;;
-       sub in1 = r0, in1
-(p6)   fma.s1 f10 = f13, f10, f10
-(p6)   fnma.s1 f12 = f9, f11, f8
-       ;;
-       setf.sig f9 = in1
-(p6)   fma.s1 f10 = f12, f10, f11
-       ;;
-       // Round quotient to an unsigned integer.
-       fcvt.fxu.trunc.s1 f10 = f10
-       ;;
-       // r = q * (-b) + a
-       xma.l f10 = f10, f9, f14
-       ;;
-       // Transfer result to GP registers.
-       getf.sig ret0 = f10
-       br.ret.sptk rp
-       ;;
-       .endp __umoddi3
-#endif
-
-#ifdef L__divsi3
-// Compute a 32-bit integer quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend.  in1 holds the divisor.
-
-       .text
-       .align 16
-       .global __divsi3
-       .proc __divsi3
-__divsi3:
-       .regstk 2,0,0,0
-       // Check divide by zero.
-       cmp.ne.unc p0,p7=0,in1
-       sxt4 in0 = in0
-       sxt4 in1 = in1
-       ;;
-       setf.sig f8 = in0
-       setf.sig f9 = in1
-(p7)   break 1
-       ;;
-       mov r2 = 0x0ffdd
-       fcvt.xf f8 = f8
-       fcvt.xf f9 = f9
-       ;;
-       setf.exp f11 = r2
-       frcpa.s1 f10, p6 = f8, f9
-       ;;
-(p6)   fmpy.s1 f8 = f8, f10
-(p6)   fnma.s1 f9 = f9, f10, f1
-       ;;
-(p6)   fma.s1 f8 = f9, f8, f8
-(p6)   fma.s1 f9 = f9, f9, f11
-       ;;
-(p6)   fma.s1 f10 = f9, f8, f8
-       ;;
-       fcvt.fx.trunc.s1 f10 = f10
-       ;;
-       getf.sig ret0 = f10
-       br.ret.sptk rp
-       ;;
-       .endp __divsi3
-#endif
-
-#ifdef L__modsi3
-// Compute a 32-bit integer modulus.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend.  in1 holds the divisor.
-
-       .text
-       .align 16
-       .global __modsi3
-       .proc __modsi3
-__modsi3:
-       .regstk 2,0,0,0
-       mov r2 = 0x0ffdd
-       sxt4 in0 = in0
-       sxt4 in1 = in1
-       ;;
-       setf.sig f13 = r32
-       setf.sig f9 = r33
-       // Check divide by zero.
-       cmp.ne.unc p0,p7=0,in1
-       ;;
-       sub in1 = r0, in1
-       fcvt.xf f8 = f13
-       fcvt.xf f9 = f9
-       ;;
-       setf.exp f11 = r2
-       frcpa.s1 f10, p6 = f8, f9
-(p7)   break 1
-       ;;
-(p6)   fmpy.s1 f12 = f8, f10
-(p6)   fnma.s1 f10 = f9, f10, f1
-       ;;
-       setf.sig f9 = in1
-(p6)   fma.s1 f12 = f10, f12, f12
-(p6)   fma.s1 f10 = f10, f10, f11      
-       ;;
-(p6)   fma.s1 f10 = f10, f12, f12
-       ;;
-       fcvt.fx.trunc.s1 f10 = f10
-       ;;
-       xma.l f10 = f10, f9, f13
-       ;;
-       getf.sig ret0 = f10
-       br.ret.sptk rp
-       ;;
-       .endp __modsi3
-#endif
-
-#ifdef L__udivsi3
-// Compute a 32-bit unsigned integer quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend.  in1 holds the divisor.
-
-       .text
-       .align 16
-       .global __udivsi3
-       .proc __udivsi3
-__udivsi3:
-       .regstk 2,0,0,0
-       mov r2 = 0x0ffdd
-       zxt4 in0 = in0
-       zxt4 in1 = in1
-       ;;
-       setf.sig f8 = in0
-       setf.sig f9 = in1
-       // Check divide by zero.
-       cmp.ne.unc p0,p7=0,in1
-       ;;
-       fcvt.xf f8 = f8
-       fcvt.xf f9 = f9
-(p7)   break 1
-       ;;
-       setf.exp f11 = r2
-       frcpa.s1 f10, p6 = f8, f9
-       ;;
-(p6)   fmpy.s1 f8 = f8, f10
-(p6)   fnma.s1 f9 = f9, f10, f1
-       ;;
-(p6)   fma.s1 f8 = f9, f8, f8
-(p6)   fma.s1 f9 = f9, f9, f11
-       ;;
-(p6)   fma.s1 f10 = f9, f8, f8
-       ;;
-       fcvt.fxu.trunc.s1 f10 = f10
-       ;;
-       getf.sig ret0 = f10
-       br.ret.sptk rp
-       ;;
-       .endp __udivsi3
-#endif
-
-#ifdef L__umodsi3
-// Compute a 32-bit unsigned integer modulus.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend.  in1 holds the divisor.
-
-       .text
-       .align 16
-       .global __umodsi3
-       .proc __umodsi3
-__umodsi3:
-       .regstk 2,0,0,0
-       mov r2 = 0x0ffdd
-       zxt4 in0 = in0
-       zxt4 in1 = in1
-       ;;
-       setf.sig f13 = in0
-       setf.sig f9 = in1
-       // Check divide by zero.
-       cmp.ne.unc p0,p7=0,in1
-       ;;
-       sub in1 = r0, in1
-       fcvt.xf f8 = f13
-       fcvt.xf f9 = f9
-       ;;
-       setf.exp f11 = r2
-       frcpa.s1 f10, p6 = f8, f9
-(p7)   break 1;
-       ;;
-(p6)   fmpy.s1 f12 = f8, f10
-(p6)   fnma.s1 f10 = f9, f10, f1
-       ;;
-       setf.sig f9 = in1
-(p6)   fma.s1 f12 = f10, f12, f12
-(p6)   fma.s1 f10 = f10, f10, f11
-       ;;
-(p6)   fma.s1 f10 = f10, f12, f12
-       ;;
-       fcvt.fxu.trunc.s1 f10 = f10
-       ;;
-       xma.l f10 = f10, f9, f13
-       ;;
-       getf.sig ret0 = f10
-       br.ret.sptk rp
-       ;;
-       .endp __umodsi3
-#endif
-
-#ifdef L__save_stack_nonlocal
-// Notes on save/restore stack nonlocal: We read ar.bsp but write
-// ar.bspstore.  This is because ar.bsp can be read at all times
-// (independent of the RSE mode) but since it's read-only we need to
-// restore the value via ar.bspstore.  This is OK because
-// ar.bsp==ar.bspstore after executing "flushrs".
-
-// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer)
-
-       .text
-       .align 16
-       .global __ia64_save_stack_nonlocal
-       .proc __ia64_save_stack_nonlocal
-__ia64_save_stack_nonlocal:
-       { .mmf
-         alloc r18 = ar.pfs, 2, 0, 0, 0
-         mov r19 = ar.rsc
-         ;;
-       }
-       { .mmi
-         flushrs
-         st8 [in0] = in1, 24
-         and r19 = 0x1c, r19
-         ;;
-       }
-       { .mmi
-         st8 [in0] = r18, -16
-         mov ar.rsc = r19
-         or r19 = 0x3, r19
-         ;;
-       }
-       { .mmi
-         mov r16 = ar.bsp
-         mov r17 = ar.rnat
-         adds r2 = 8, in0
-         ;;
-       }
-       { .mmi
-         st8 [in0] = r16
-         st8 [r2] = r17
-       }
-       { .mib
-         mov ar.rsc = r19
-         br.ret.sptk.few rp
-         ;;
-       }
-       .endp __ia64_save_stack_nonlocal
-#endif
-
-#ifdef L__nonlocal_goto
-// void __ia64_nonlocal_goto(void *target_label, void *save_area,
-//                          void *static_chain);
-
-       .text
-       .align 16
-       .global __ia64_nonlocal_goto
-       .proc __ia64_nonlocal_goto
-__ia64_nonlocal_goto:
-       { .mmi
-         alloc r20 = ar.pfs, 3, 0, 0, 0
-         ld8 r12 = [in1], 8
-         mov.ret.sptk rp = in0, .L0
-         ;;
-       }
-       { .mmf
-         ld8 r16 = [in1], 8
-         mov r19 = ar.rsc
-         ;;
-       }
-       { .mmi
-         flushrs
-         ld8 r17 = [in1], 8
-         and r19 = 0x1c, r19
-         ;;
-       }
-       { .mmi
-         ld8 r18 = [in1]
-         mov ar.rsc = r19
-         or r19 = 0x3, r19
-         ;;
-       }
-       { .mmi
-         mov ar.bspstore = r16
-         ;;
-         mov ar.rnat = r17
-         ;;
-       }
-       { .mmi
-         loadrs
-         invala
-         mov r15 = in2
-         ;;
-       }
-.L0:   { .mib
-         mov ar.rsc = r19
-         mov ar.pfs = r18
-         br.ret.sptk.few rp
-         ;;
-       }
-       .endp __ia64_nonlocal_goto
-#endif
-
-#ifdef L__restore_stack_nonlocal
-// This is mostly the same as nonlocal_goto above.
-// ??? This has not been tested yet.
-
-// void __ia64_restore_stack_nonlocal(void *save_area)
-
-       .text
-       .align 16
-       .global __ia64_restore_stack_nonlocal
-       .proc __ia64_restore_stack_nonlocal
-__ia64_restore_stack_nonlocal:
-       { .mmf
-         alloc r20 = ar.pfs, 4, 0, 0, 0
-         ld8 r12 = [in0], 8
-         ;;
-       }
-       { .mmb
-         ld8 r16=[in0], 8
-         mov r19 = ar.rsc
-         ;;
-       }
-       { .mmi
-         flushrs
-         ld8 r17 = [in0], 8
-         and r19 = 0x1c, r19
-         ;;
-       }
-       { .mmf
-         ld8 r18 = [in0]
-         mov ar.rsc = r19
-         ;;
-       }
-       { .mmi
-         mov ar.bspstore = r16
-         ;;
-         mov ar.rnat = r17
-         or r19 = 0x3, r19
-         ;;
-       }
-       { .mmf
-         loadrs
-         invala
-         ;;
-       }
-.L0:   { .mib
-         mov ar.rsc = r19
-         mov ar.pfs = r18
-         br.ret.sptk.few rp
-         ;;
-       }
-       .endp __ia64_restore_stack_nonlocal
-#endif
-
-#ifdef L__trampoline
-// Implement the nested function trampoline.  This is out of line
-// so that we don't have to bother with flushing the icache, as
-// well as making the on-stack trampoline smaller.
-//
-// The trampoline has the following form:
-//
-//             +-------------------+ >
-//     TRAMP:  | __ia64_trampoline | |
-//             +-------------------+  > fake function descriptor
-//             | TRAMP+16          | |
-//             +-------------------+ >
-//             | target descriptor |
-//             +-------------------+
-//             | static link       |
-//             +-------------------+
-
-       .text
-       .align 16
-       .global __ia64_trampoline
-       .proc __ia64_trampoline
-__ia64_trampoline:
-       { .mmi
-         ld8 r2 = [r1], 8
-         ;;
-         ld8 r15 = [r1]
-       }
-       { .mmi
-         ld8 r3 = [r2], 8
-         ;;
-         ld8 r1 = [r2]
-         mov b6 = r3
-       }
-       { .bbb
-         br.sptk.many b6
-         ;;
-       }
-       .endp __ia64_trampoline
-#endif
-
-#ifdef SHARED
-// Thunks for backward compatibility.
-#ifdef L_fixtfdi
-       .text
-       .align 16
-       .global __fixtfti
-       .proc __fixtfti
-__fixtfti:
-       { .bbb
-         br.sptk.many __fixxfti
-         ;;
-       }
-       .endp __fixtfti
-#endif
-#ifdef L_fixunstfdi
-       .align 16
-       .global __fixunstfti
-       .proc __fixunstfti
-__fixunstfti:
-       { .bbb
-         br.sptk.many __fixunsxfti
-         ;;
-       }
-       .endp __fixunstfti
-#endif
-#ifdef L_floatditf
-       .align 16
-       .global __floattitf
-       .proc __floattitf
-__floattitf:
-       { .bbb
-         br.sptk.many __floattixf
-         ;;
-       }
-       .endp __floattitf
-#endif
-#endif
diff --git a/gcc/config/ia64/t-hpux b/gcc/config/ia64/t-hpux

index e1554861d18bc1e3852f1bdb2ceca85dca2117e6..23691f3856c1766cccd473dcf43334dcf59ab9e5 100644 (file)
--- a/gcc/config/ia64/t-hpux
+++ b/gcc/config/ia64/t-hpux
@@ -26,12 +26,6 @@ MULTILIB_OPTIONS = milp32/mlp64
  MULTILIB_DIRNAMES = hpux32 hpux64
  MULTILIB_MATCHES =
  
-# On HP-UX we do not want _fixtfdi, _fixunstfdi, or _floatditf from
-# LIB1ASMSRC.  These functions map the 128 bit conversion function names
-# to 80 bit conversions and were done for Linux backwards compatibility.
-
-LIB1ASMFUNCS := $(filter-out _fixtfdi _fixunstfdi _floatditf,$(LIB1ASMFUNCS))
-
  # Support routines for HP-UX 128 bit floats.
  
  LIB2FUNCS_EXTRA=quadlib.c $(srcdir)/config/floatunsitf.c
@@ -39,12 +33,6 @@ LIB2FUNCS_EXTRA=quadlib.c $(srcdir)/config/floatunsitf.c
  quadlib.c: $(srcdir)/config/ia64/quadlib.c
         cat $(srcdir)/config/ia64/quadlib.c > quadlib.c
  
-# We get an undefined main when building a cross compiler because our
-# linkspec has "-u main" and we want that for linking but it makes
-# LIBGCC1_TEST fail because it uses -nostdlib -nostartup.
-
-LIBGCC1_TEST =
-
  # We do not want to include the EH stuff that linux uses, we want to use
  # the HP-UX libunwind library.
  
diff --git a/gcc/config/ia64/t-ia64 b/gcc/config/ia64/t-ia64

index a143d43d56c3feded544978a0c2a5b3c708ad907..8a54d46b458775b203d999a3246c2d4188017e69 100644 (file)
--- a/gcc/config/ia64/t-ia64
+++ b/gcc/config/ia64/t-ia64
@@ -18,19 +18,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-LIB1ASMSRC    = ia64/lib1funcs.asm
-
-# We use different names for the DImode div/mod files so that they won't
-# conflict with libgcc2.c files.  We used to use __ia64 as a prefix, now
-# we use __ as the prefix.  Note that L_divdi3 in libgcc2.c actually defines
-# a TImode divide function, so there is no actual overlap here between
-# libgcc2.c and lib1funcs.asm.
-LIB1ASMFUNCS  = __divxf3 __divdf3 __divsf3 \
-       __divdi3 __moddi3 __udivdi3 __umoddi3 \
-       __divsi3 __modsi3 __udivsi3 __umodsi3 __save_stack_nonlocal \
-       __nonlocal_goto __restore_stack_nonlocal __trampoline \
-       _fixtfdi _fixunstfdi _floatditf
-
  # ??? Hack to get -P option used when compiling lib1funcs.asm, because Intel
  # assembler does not accept # line number as a comment.
  # ??? This breaks C++ pragma interface/implementation, which is used in the
diff --git a/gcc/config/iq2000/t-iq2000 b/gcc/config/iq2000/t-iq2000

index 03d8c703f86a051bee10b915547bf6fb26c12d35..c634e58646ee34930a2ea0af1b7195fef2c49b88 100644 (file)
--- a/gcc/config/iq2000/t-iq2000
+++ b/gcc/config/iq2000/t-iq2000
@@ -16,11 +16,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-# Suppress building libgcc1.a, since the MIPS compiler port is complete
-# and does not need anything from libgcc1.a.
-LIBGCC1 =
-CROSS_LIBGCC1 =
-
  LIB2FUNCS_EXTRA = $(srcdir)/config/udivmod.c $(srcdir)/config/divmod.c $(srcdir)/config/udivmodsi4.c $(srcdir)/config/iq2000/lib2extra-funcs.c
  
  # Enable the following if multilibs are needed.
diff --git a/gcc/config/m32c/m32c-lib1.S b/gcc/config/m32c/m32c-lib1.S

deleted file mode 100644 (file)

index 9b65778..0000000
--- a/gcc/config/m32c/m32c-lib1.S
+++ /dev/null
@@ -1,231 +0,0 @@
-/* libgcc routines for R8C/M16C/M32C
-   Copyright (C) 2005, 2009, 2010
-   Free Software Foundation, Inc.
-   Contributed by Red Hat.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if defined(__r8c_cpu__) || defined(__m16c_cpu__)
-#define A16
-#define A(n,w) n
-#define W w
-#else
-#define A24
-#define A(n,w) w
-#define W l
-#endif
-
-
-#ifdef L__m32c_memregs
-
-/* Warning: these memory locations are used as a register bank.  They
-   *must* end up consecutive in any final executable, so you may *not*
-   use the otherwise obvious ".comm" directive to allocate space for
-   them. */
-
-       .bss
-       .global mem0
-mem0:  .space  1
-       .global mem1
-mem1:  .space  1
-       .global mem2
-mem2:  .space  1
-       .global mem3
-mem3:  .space  1
-       .global mem4
-mem4:  .space  1
-       .global mem5
-mem5:  .space  1
-       .global mem6
-mem6:  .space  1
-       .global mem7
-mem7:  .space  1
-       .global mem8
-mem8:  .space  1
-       .global mem9
-mem9:  .space  1
-       .global mem10
-mem10: .space  1
-       .global mem11
-mem11: .space  1
-       .global mem12
-mem12: .space  1
-       .global mem13
-mem13: .space  1
-       .global mem14
-mem14: .space  1
-       .global mem15
-mem15: .space  1
-
-#endif
-
-#ifdef L__m32c_eh_return
-       .text
-       .global __m32c_eh_return
-__m32c_eh_return:      
-
-       /* At this point, r0 has the stack adjustment, r1r3 has the
-          address to return to.  The stack looks like this:
-
-          old_ra
-          old_fp
-          <- unwound sp
-          ...
-          fb
-          through
-          r0
-          <- sp
-
-          What we need to do is restore all the registers, update the
-          stack, and return to the right place.
-       */
-
-       stc     sp,a0
-       
-       add.W   A(#16,#24),a0
-       /* a0 points to the current stack, just above the register
-          save areas */
-
-       mov.w   a0,a1
-       exts.w  r0
-       sub.W   A(r0,r2r0),a1
-       sub.W   A(#3,#4),a1
-       /* a1 points to the new stack.  */
-
-       /* This is for the "rts" below.  */
-       mov.w   r1,[a1]
-#ifdef A16
-       mov.w   r2,r1
-       mov.b   r1l,2[a1]
-#else
-       mov.w   r2,2[a1]
-#endif
-
-       /* This is for the "popc sp" below.  */
-       mov.W   a1,[a0] 
-
-       popm    r0,r1,r2,r3,a0,a1,sb,fb
-       popc    sp
-       rts
-#endif
-
-/* SImode arguments for SI foo(SI,SI) functions.  */
-#ifdef A16
-#define SAL  5[fb]
-#define SAH  7[fb]
-#define SBL  9[fb]
-#define SBH 11[fb]
-#else
-#define SAL  8[fb]
-#define SAH 10[fb]
-#define SBL 12[fb]
-#define SBH 14[fb]
-#endif
-
-#ifdef L__m32c_mulsi3
-       .text
-       .global ___mulsi3
-___mulsi3:
-       enter   #0
-       push.w  r2
-       mov.w   SAL,r0
-       mulu.w  SBL,r0          /* writes to r2r0 */
-       mov.w   r0,mem0
-       mov.w   r2,mem2
-       mov.w   SAL,r0
-       mulu.w  SBH,r0          /* writes to r2r0 */
-       add.w   r0,mem2
-       mov.w   SAH,r0
-       mulu.w  SBL,r0          /* writes to r2r0 */
-       add.w   r0,mem2
-       pop.w   r2
-       exitd
-#endif
-
-#ifdef L__m32c_cmpsi2
-       .text
-       .global ___cmpsi2
-___cmpsi2:
-       enter   #0
-       cmp.w   SBH,SAH
-       jgt     cmpsi_gt
-       jlt     cmpsi_lt
-       cmp.w   SBL,SAL
-       jgt     cmpsi_gt
-       jlt     cmpsi_lt
-       mov.w   #1,r0
-       exitd
-cmpsi_gt:
-       mov.w   #2,r0
-       exitd
-cmpsi_lt:
-       mov.w   #0,r0
-       exitd
-#endif
-
-#ifdef L__m32c_ucmpsi2
-       .text
-       .global ___ucmpsi2
-___ucmpsi2:
-       enter   #0
-       cmp.w   SBH,SAH
-       jgtu    cmpsi_gt
-       jltu    cmpsi_lt
-       cmp.w   SBL,SAL
-       jgtu    cmpsi_gt
-       jltu    cmpsi_lt
-       mov.w   #1,r0
-       exitd
-cmpsi_gt:
-       mov.w   #2,r0
-       exitd
-cmpsi_lt:
-       mov.w   #0,r0
-       exitd
-#endif
-
-#ifdef L__m32c_jsri16
-       .text
-#ifdef A16
-       .global m32c_jsri16
-m32c_jsri16:
-       add.w   #-1, sp
-
-       /* Read the address (16 bits) and return address (24 bits) off
-       the stack.  */
-       mov.w   4[sp], r0
-       mov.w   1[sp], r3
-       mov.b   3[sp], a0 /* This zero-extends, so the high byte has
-                            zero in it.  */
-
-       /* Write the return address, then new address, to the stack.  */
-       mov.w   a0, 1[sp] /* Just to get the zero in 2[sp].  */
-       mov.w   r0, 0[sp]
-       mov.w   r3, 3[sp]
-       mov.b   a0, 5[sp]
-
-       /* This "returns" to the target address, leaving the pending
-       return address on the stack.  */
-       rts
-#endif
-
-#endif
diff --git a/gcc/config/m32c/m32c.c b/gcc/config/m32c/m32c.c

index 7040df69fcfc9b5b226e4f9a18c89f540654a5d6..04f690506098fffc1669a62fb4b5343e4b08d63c 100644 (file)
--- a/gcc/config/m32c/m32c.c
+++ b/gcc/config/m32c/m32c.c
@@ -391,7 +391,7 @@ class_can_hold_mode (reg_class_t rclass, enum machine_mode mode)
     we allow the user to limit the number of memregs available, in
     order to try to persuade gcc to try harder to use real registers.
  
-   Memregs are provided by m32c-lib1.S.
+   Memregs are provided by lib1funcs.S.
  */
  
  int ok_to_change_target_memregs = TRUE;
diff --git a/gcc/config/m32c/t-m32c b/gcc/config/m32c/t-m32c

index b11f34d674ff60d3dd1adcb3aceeff9f6ae0300f..aad972a2575d632530d8c96f034d3459a60b6526 100644 (file)
--- a/gcc/config/m32c/t-m32c
+++ b/gcc/config/m32c/t-m32c
@@ -19,16 +19,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-LIB1ASMSRC = m32c/m32c-lib1.S
-
-LIB1ASMFUNCS = \
-       __m32c_memregs \
-       __m32c_eh_return \
-       __m32c_mulsi3 \
-       __m32c_cmpsi2 \
-       __m32c_ucmpsi2 \
-       __m32c_jsri16
-
  LIB2FUNCS_EXTRA = $(srcdir)/config/m32c/m32c-lib2.c $(srcdir)/config/m32c/m32c-lib2-trapv.c
  
  # target-specific files
diff --git a/gcc/config/m32r/t-linux b/gcc/config/m32r/t-linux

index 487c0198786789c5de59ee87146f9c6333b42966..f3b89d21d0b8f80d6d038987186921aec45f454a 100644 (file)
--- a/gcc/config/m32r/t-linux
+++ b/gcc/config/m32r/t-linux
@@ -16,9 +16,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-# lib1funcs.asm is currently empty.
-CROSS_LIBGCC1 =
-
  # Turn off the SDA while compiling libgcc2.  There are no headers for it
  # and we want maximal upward compatibility here.
  
@@ -26,9 +23,3 @@ TARGET_LIBGCC2_CFLAGS = -G 0 -fPIC
  
  # Don't install "assert.h" in gcc. We use the one in glibc.
  INSTALL_ASSERT_H =
- 
-# Do not build libgcc1. Let gcc generate those functions. The GNU/Linux
-# C library can handle them.
-LIBGCC1 = 
-CROSS_LIBGCC1 =
-LIBGCC1_TEST =
diff --git a/gcc/config/m68k/lb1sf68.asm b/gcc/config/m68k/lb1sf68.asm

deleted file mode 100644 (file)

index 0339a09..0000000
--- a/gcc/config/m68k/lb1sf68.asm
+++ /dev/null
@@ -1,4116 +0,0 @@
-/* libgcc routines for 68000 w/o floating-point hardware.
-   Copyright (C) 1994, 1996, 1997, 1998, 2008, 2009 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/* Use this one for any 680x0; assumes no floating point hardware.
-   The trailing " '" appearing on some lines is for ANSI preprocessors.  Yuk.
-   Some of this code comes from MINIX, via the folks at ericsson.
-   D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
-*/
-
-/* These are predefined by new versions of GNU cpp.  */
-
-#ifndef __USER_LABEL_PREFIX__
-#define __USER_LABEL_PREFIX__ _
-#endif
-
-#ifndef __REGISTER_PREFIX__
-#define __REGISTER_PREFIX__
-#endif
-
-#ifndef __IMMEDIATE_PREFIX__
-#define __IMMEDIATE_PREFIX__ #
-#endif
-
-/* ANSI concatenation macros.  */
-
-#define CONCAT1(a, b) CONCAT2(a, b)
-#define CONCAT2(a, b) a ## b
-
-/* Use the right prefix for global labels.  */
-
-#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
-
-/* Note that X is a function.  */
-       
-#ifdef __ELF__
-#define FUNC(x) .type SYM(x),function
-#else
-/* The .proc pseudo-op is accepted, but ignored, by GAS.  We could just        
-   define this to the empty string for non-ELF systems, but defining it
-   to .proc means that the information is available to the assembler if
-   the need arises.  */
-#define FUNC(x) .proc
-#endif
-               
-/* Use the right prefix for registers.  */
-
-#define REG(x) CONCAT1 (__REGISTER_PREFIX__, x)
-
-/* Use the right prefix for immediate values.  */
-
-#define IMM(x) CONCAT1 (__IMMEDIATE_PREFIX__, x)
-
-#define d0 REG (d0)
-#define d1 REG (d1)
-#define d2 REG (d2)
-#define d3 REG (d3)
-#define d4 REG (d4)
-#define d5 REG (d5)
-#define d6 REG (d6)
-#define d7 REG (d7)
-#define a0 REG (a0)
-#define a1 REG (a1)
-#define a2 REG (a2)
-#define a3 REG (a3)
-#define a4 REG (a4)
-#define a5 REG (a5)
-#define a6 REG (a6)
-#define fp REG (fp)
-#define sp REG (sp)
-#define pc REG (pc)
-
-/* Provide a few macros to allow for PIC code support.
- * With PIC, data is stored A5 relative so we've got to take a bit of special
- * care to ensure that all loads of global data is via A5.  PIC also requires
- * jumps and subroutine calls to be PC relative rather than absolute.  We cheat
- * a little on this and in the PIC case, we use short offset branches and
- * hope that the final object code is within range (which it should be).
- */
-#ifndef __PIC__
-
-       /* Non PIC (absolute/relocatable) versions */
-
-       .macro PICCALL addr
-       jbsr    \addr
-       .endm
-
-       .macro PICJUMP addr
-       jmp     \addr
-       .endm
-
-       .macro PICLEA sym, reg
-       lea     \sym, \reg
-       .endm
-
-       .macro PICPEA sym, areg
-       pea     \sym
-       .endm
-
-#else /* __PIC__ */
-
-# if defined (__uClinux__)
-
-       /* Versions for uClinux */
-
-#  if defined(__ID_SHARED_LIBRARY__)
-
-       /* -mid-shared-library versions  */
-
-       .macro PICLEA sym, reg
-       movel   a5@(_current_shared_library_a5_offset_), \reg
-       movel   \sym@GOT(\reg), \reg
-       .endm
-
-       .macro PICPEA sym, areg
-       movel   a5@(_current_shared_library_a5_offset_), \areg
-       movel   \sym@GOT(\areg), sp@-
-       .endm
-
-       .macro PICCALL addr
-       PICLEA  \addr,a0
-       jsr     a0@
-       .endm
-
-       .macro PICJUMP addr
-       PICLEA  \addr,a0
-       jmp     a0@
-       .endm
-
-#  else /* !__ID_SHARED_LIBRARY__ */
-
-       /* Versions for -msep-data */
-
-       .macro PICLEA sym, reg
-       movel   \sym@GOT(a5), \reg
-       .endm
-
-       .macro PICPEA sym, areg
-       movel   \sym@GOT(a5), sp@-
-       .endm
-
-       .macro PICCALL addr
-#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__)
-       lea     \addr-.-8,a0
-       jsr     pc@(a0)
-#else
-       jbsr    \addr
-#endif
-       .endm
-
-       .macro PICJUMP addr
-       /* ISA C has no bra.l instruction, and since this assembly file
-          gets assembled into multiple object files, we avoid the
-          bra instruction entirely.  */
-#if defined (__mcoldfire__) && !defined (__mcfisab__)
-       lea     \addr-.-8,a0
-       jmp     pc@(a0)
-#else
-       bra     \addr
-#endif
-       .endm
-
-#  endif
-
-# else /* !__uClinux__ */
-
-       /* Versions for Linux */
-
-       .macro PICLEA sym, reg
-       movel   #_GLOBAL_OFFSET_TABLE_@GOTPC, \reg
-       lea     (-6, pc, \reg), \reg
-       movel   \sym@GOT(\reg), \reg
-       .endm
-
-       .macro PICPEA sym, areg
-       movel   #_GLOBAL_OFFSET_TABLE_@GOTPC, \areg
-       lea     (-6, pc, \areg), \areg
-       movel   \sym@GOT(\areg), sp@-
-       .endm
-
-       .macro PICCALL addr
-#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__)
-       lea     \addr-.-8,a0
-       jsr     pc@(a0)
-#else
-       jbsr    \addr
-#endif
-       .endm
-
-       .macro PICJUMP addr
-       /* ISA C has no bra.l instruction, and since this assembly file
-          gets assembled into multiple object files, we avoid the
-          bra instruction entirely.  */
-#if defined (__mcoldfire__) && !defined (__mcfisab__)
-       lea     \addr-.-8,a0
-       jmp     pc@(a0)
-#else
-       bra     \addr
-#endif
-       .endm
-
-# endif
-#endif /* __PIC__ */
-
-
-#ifdef L_floatex
-
-| This is an attempt at a decent floating point (single, double and 
-| extended double) code for the GNU C compiler. It should be easy to
-| adapt to other compilers (but beware of the local labels!).
-
-| Starting date: 21 October, 1990
-
-| It is convenient to introduce the notation (s,e,f) for a floating point
-| number, where s=sign, e=exponent, f=fraction. We will call a floating
-| point number fpn to abbreviate, independently of the precision.
-| Let MAX_EXP be in each case the maximum exponent (255 for floats, 1023 
-| for doubles and 16383 for long doubles). We then have the following 
-| different cases:
-|  1. Normalized fpns have 0 < e < MAX_EXP. They correspond to 
-|     (-1)^s x 1.f x 2^(e-bias-1).
-|  2. Denormalized fpns have e=0. They correspond to numbers of the form
-|     (-1)^s x 0.f x 2^(-bias).
-|  3. +/-INFINITY have e=MAX_EXP, f=0.
-|  4. Quiet NaN (Not a Number) have all bits set.
-|  5. Signaling NaN (Not a Number) have s=0, e=MAX_EXP, f=1.
-
-|=============================================================================
-|                                  exceptions
-|=============================================================================
-
-| This is the floating point condition code register (_fpCCR):
-|
-| struct {
-|   short _exception_bits;     
-|   short _trap_enable_bits;   
-|   short _sticky_bits;
-|   short _rounding_mode;
-|   short _format;
-|   short _last_operation;
-|   union {
-|     float sf;
-|     double df;
-|   } _operand1;
-|   union {
-|     float sf;
-|     double df;
-|   } _operand2;
-| } _fpCCR;
-
-       .data
-       .even
-
-       .globl  SYM (_fpCCR)
-       
-SYM (_fpCCR):
-__exception_bits:
-       .word   0
-__trap_enable_bits:
-       .word   0
-__sticky_bits:
-       .word   0
-__rounding_mode:
-       .word   ROUND_TO_NEAREST
-__format:
-       .word   NIL
-__last_operation:
-       .word   NOOP
-__operand1:
-       .long   0
-       .long   0
-__operand2:
-       .long   0
-       .long   0
-
-| Offsets:
-EBITS  = __exception_bits - SYM (_fpCCR)
-TRAPE  = __trap_enable_bits - SYM (_fpCCR)
-STICK  = __sticky_bits - SYM (_fpCCR)
-ROUND  = __rounding_mode - SYM (_fpCCR)
-FORMT  = __format - SYM (_fpCCR)
-LASTO  = __last_operation - SYM (_fpCCR)
-OPER1  = __operand1 - SYM (_fpCCR)
-OPER2  = __operand2 - SYM (_fpCCR)
-
-| The following exception types are supported:
-INEXACT_RESULT                 = 0x0001
-UNDERFLOW              = 0x0002
-OVERFLOW               = 0x0004
-DIVIDE_BY_ZERO                 = 0x0008
-INVALID_OPERATION      = 0x0010
-
-| The allowed rounding modes are:
-UNKNOWN           = -1
-ROUND_TO_NEAREST  = 0 | round result to nearest representable value
-ROUND_TO_ZERO     = 1 | round result towards zero
-ROUND_TO_PLUS     = 2 | round result towards plus infinity
-ROUND_TO_MINUS    = 3 | round result towards minus infinity
-
-| The allowed values of format are:
-NIL          = 0
-SINGLE_FLOAT = 1
-DOUBLE_FLOAT = 2
-LONG_FLOAT   = 3
-
-| The allowed values for the last operation are:
-NOOP         = 0
-ADD          = 1
-MULTIPLY     = 2
-DIVIDE       = 3
-NEGATE       = 4
-COMPARE      = 5
-EXTENDSFDF   = 6
-TRUNCDFSF    = 7
-
-|=============================================================================
-|                           __clear_sticky_bits
-|=============================================================================
-
-| The sticky bits are normally not cleared (thus the name), whereas the 
-| exception type and exception value reflect the last computation. 
-| This routine is provided to clear them (you can also write to _fpCCR,
-| since it is globally visible).
-
-       .globl  SYM (__clear_sticky_bit)
-
-       .text
-       .even
-
-| void __clear_sticky_bits(void);
-SYM (__clear_sticky_bit):              
-       PICLEA  SYM (_fpCCR),a0
-#ifndef __mcoldfire__
-       movew   IMM (0),a0@(STICK)
-#else
-       clr.w   a0@(STICK)
-#endif
-       rts
-
-|=============================================================================
-|                           $_exception_handler
-|=============================================================================
-
-       .globl  $_exception_handler
-
-       .text
-       .even
-
-| This is the common exit point if an exception occurs.
-| NOTE: it is NOT callable from C!
-| It expects the exception type in d7, the format (SINGLE_FLOAT,
-| DOUBLE_FLOAT or LONG_FLOAT) in d6, and the last operation code in d5.
-| It sets the corresponding exception and sticky bits, and the format. 
-| Depending on the format if fills the corresponding slots for the 
-| operands which produced the exception (all this information is provided
-| so if you write your own exception handlers you have enough information
-| to deal with the problem).
-| Then checks to see if the corresponding exception is trap-enabled, 
-| in which case it pushes the address of _fpCCR and traps through 
-| trap FPTRAP (15 for the moment).
-
-FPTRAP = 15
-
-$_exception_handler:
-       PICLEA  SYM (_fpCCR),a0
-       movew   d7,a0@(EBITS)   | set __exception_bits
-#ifndef __mcoldfire__
-       orw     d7,a0@(STICK)   | and __sticky_bits
-#else
-       movew   a0@(STICK),d4
-       orl     d7,d4
-       movew   d4,a0@(STICK)
-#endif
-       movew   d6,a0@(FORMT)   | and __format
-       movew   d5,a0@(LASTO)   | and __last_operation
-
-| Now put the operands in place:
-#ifndef __mcoldfire__
-       cmpw    IMM (SINGLE_FLOAT),d6
-#else
-       cmpl    IMM (SINGLE_FLOAT),d6
-#endif
-       beq     1f
-       movel   a6@(8),a0@(OPER1)
-       movel   a6@(12),a0@(OPER1+4)
-       movel   a6@(16),a0@(OPER2)
-       movel   a6@(20),a0@(OPER2+4)
-       bra     2f
-1:     movel   a6@(8),a0@(OPER1)
-       movel   a6@(12),a0@(OPER2)
-2:
-| And check whether the exception is trap-enabled:
-#ifndef __mcoldfire__
-       andw    a0@(TRAPE),d7   | is exception trap-enabled?
-#else
-       clrl    d6
-       movew   a0@(TRAPE),d6
-       andl    d6,d7
-#endif
-       beq     1f              | no, exit
-       PICPEA  SYM (_fpCCR),a1 | yes, push address of _fpCCR
-       trap    IMM (FPTRAP)    | and trap
-#ifndef __mcoldfire__
-1:     moveml  sp@+,d2-d7      | restore data registers
-#else
-1:     moveml  sp@,d2-d7
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6              | and return
-       rts
-#endif /* L_floatex */
-
-#ifdef  L_mulsi3
-       .text
-       FUNC(__mulsi3)
-       .globl  SYM (__mulsi3)
-SYM (__mulsi3):
-       movew   sp@(4), d0      /* x0 -> d0 */
-       muluw   sp@(10), d0     /* x0*y1 */
-       movew   sp@(6), d1      /* x1 -> d1 */
-       muluw   sp@(8), d1      /* x1*y0 */
-#ifndef __mcoldfire__
-       addw    d1, d0
-#else
-       addl    d1, d0
-#endif
-       swap    d0
-       clrw    d0
-       movew   sp@(6), d1      /* x1 -> d1 */
-       muluw   sp@(10), d1     /* x1*y1 */
-       addl    d1, d0
-
-       rts
-#endif /* L_mulsi3 */
-
-#ifdef  L_udivsi3
-       .text
-       FUNC(__udivsi3)
-       .globl  SYM (__udivsi3)
-SYM (__udivsi3):
-#ifndef __mcoldfire__
-       movel   d2, sp@-
-       movel   sp@(12), d1     /* d1 = divisor */
-       movel   sp@(8), d0      /* d0 = dividend */
-
-       cmpl    IMM (0x10000), d1 /* divisor >= 2 ^ 16 ?   */
-       jcc     L3              /* then try next algorithm */
-       movel   d0, d2
-       clrw    d2
-       swap    d2
-       divu    d1, d2          /* high quotient in lower word */
-       movew   d2, d0          /* save high quotient */
-       swap    d0
-       movew   sp@(10), d2     /* get low dividend + high rest */
-       divu    d1, d2          /* low quotient */
-       movew   d2, d0
-       jra     L6
-
-L3:    movel   d1, d2          /* use d2 as divisor backup */
-L4:    lsrl    IMM (1), d1     /* shift divisor */
-       lsrl    IMM (1), d0     /* shift dividend */
-       cmpl    IMM (0x10000), d1 /* still divisor >= 2 ^ 16 ?  */
-       jcc     L4
-       divu    d1, d0          /* now we have 16-bit divisor */
-       andl    IMM (0xffff), d0 /* mask out divisor, ignore remainder */
-
-/* Multiply the 16-bit tentative quotient with the 32-bit divisor.  Because of
-   the operand ranges, this might give a 33-bit product.  If this product is
-   greater than the dividend, the tentative quotient was too large. */
-       movel   d2, d1
-       mulu    d0, d1          /* low part, 32 bits */
-       swap    d2
-       mulu    d0, d2          /* high part, at most 17 bits */
-       swap    d2              /* align high part with low part */
-       tstw    d2              /* high part 17 bits? */
-       jne     L5              /* if 17 bits, quotient was too large */
-       addl    d2, d1          /* add parts */
-       jcs     L5              /* if sum is 33 bits, quotient was too large */
-       cmpl    sp@(8), d1      /* compare the sum with the dividend */
-       jls     L6              /* if sum > dividend, quotient was too large */
-L5:    subql   IMM (1), d0     /* adjust quotient */
-
-L6:    movel   sp@+, d2
-       rts
-
-#else /* __mcoldfire__ */
-
-/* ColdFire implementation of non-restoring division algorithm from
-   Hennessy & Patterson, Appendix A. */
-       link    a6,IMM (-12)
-       moveml  d2-d4,sp@
-       movel   a6@(8),d0
-       movel   a6@(12),d1
-       clrl    d2              | clear p
-       moveq   IMM (31),d4
-L1:    addl    d0,d0           | shift reg pair (p,a) one bit left
-       addxl   d2,d2
-       movl    d2,d3           | subtract b from p, store in tmp.
-       subl    d1,d3
-       jcs     L2              | if no carry,
-       bset    IMM (0),d0      | set the low order bit of a to 1,
-       movl    d3,d2           | and store tmp in p.
-L2:    subql   IMM (1),d4
-       jcc     L1
-       moveml  sp@,d2-d4       | restore data registers
-       unlk    a6              | and return
-       rts
-#endif /* __mcoldfire__ */
-
-#endif /* L_udivsi3 */
-
-#ifdef  L_divsi3
-       .text
-       FUNC(__divsi3)
-       .globl  SYM (__divsi3)
-SYM (__divsi3):
-       movel   d2, sp@-
-
-       moveq   IMM (1), d2     /* sign of result stored in d2 (=1 or =-1) */
-       movel   sp@(12), d1     /* d1 = divisor */
-       jpl     L1
-       negl    d1
-#ifndef __mcoldfire__
-       negb    d2              /* change sign because divisor <0  */
-#else
-       negl    d2              /* change sign because divisor <0  */
-#endif
-L1:    movel   sp@(8), d0      /* d0 = dividend */
-       jpl     L2
-       negl    d0
-#ifndef __mcoldfire__
-       negb    d2
-#else
-       negl    d2
-#endif
-
-L2:    movel   d1, sp@-
-       movel   d0, sp@-
-       PICCALL SYM (__udivsi3) /* divide abs(dividend) by abs(divisor) */
-       addql   IMM (8), sp
-
-       tstb    d2
-       jpl     L3
-       negl    d0
-
-L3:    movel   sp@+, d2
-       rts
-#endif /* L_divsi3 */
-
-#ifdef  L_umodsi3
-       .text
-       FUNC(__umodsi3)
-       .globl  SYM (__umodsi3)
-SYM (__umodsi3):
-       movel   sp@(8), d1      /* d1 = divisor */
-       movel   sp@(4), d0      /* d0 = dividend */
-       movel   d1, sp@-
-       movel   d0, sp@-
-       PICCALL SYM (__udivsi3)
-       addql   IMM (8), sp
-       movel   sp@(8), d1      /* d1 = divisor */
-#ifndef __mcoldfire__
-       movel   d1, sp@-
-       movel   d0, sp@-
-       PICCALL SYM (__mulsi3)  /* d0 = (a/b)*b */
-       addql   IMM (8), sp
-#else
-       mulsl   d1,d0
-#endif
-       movel   sp@(4), d1      /* d1 = dividend */
-       subl    d0, d1          /* d1 = a - (a/b)*b */
-       movel   d1, d0
-       rts
-#endif /* L_umodsi3 */
-
-#ifdef  L_modsi3
-       .text
-       FUNC(__modsi3)
-       .globl  SYM (__modsi3)
-SYM (__modsi3):
-       movel   sp@(8), d1      /* d1 = divisor */
-       movel   sp@(4), d0      /* d0 = dividend */
-       movel   d1, sp@-
-       movel   d0, sp@-
-       PICCALL SYM (__divsi3)
-       addql   IMM (8), sp
-       movel   sp@(8), d1      /* d1 = divisor */
-#ifndef __mcoldfire__
-       movel   d1, sp@-
-       movel   d0, sp@-
-       PICCALL SYM (__mulsi3)  /* d0 = (a/b)*b */
-       addql   IMM (8), sp
-#else
-       mulsl   d1,d0
-#endif
-       movel   sp@(4), d1      /* d1 = dividend */
-       subl    d0, d1          /* d1 = a - (a/b)*b */
-       movel   d1, d0
-       rts
-#endif /* L_modsi3 */
-
-
-#ifdef  L_double
-
-       .globl  SYM (_fpCCR)
-       .globl  $_exception_handler
-
-QUIET_NaN      = 0xffffffff
-
-D_MAX_EXP      = 0x07ff
-D_BIAS         = 1022
-DBL_MAX_EXP    = D_MAX_EXP - D_BIAS
-DBL_MIN_EXP    = 1 - D_BIAS
-DBL_MANT_DIG   = 53
-
-INEXACT_RESULT                 = 0x0001
-UNDERFLOW              = 0x0002
-OVERFLOW               = 0x0004
-DIVIDE_BY_ZERO                 = 0x0008
-INVALID_OPERATION      = 0x0010
-
-DOUBLE_FLOAT = 2
-
-NOOP         = 0
-ADD          = 1
-MULTIPLY     = 2
-DIVIDE       = 3
-NEGATE       = 4
-COMPARE      = 5
-EXTENDSFDF   = 6
-TRUNCDFSF    = 7
-
-UNKNOWN           = -1
-ROUND_TO_NEAREST  = 0 | round result to nearest representable value
-ROUND_TO_ZERO     = 1 | round result towards zero
-ROUND_TO_PLUS     = 2 | round result towards plus infinity
-ROUND_TO_MINUS    = 3 | round result towards minus infinity
-
-| Entry points:
-
-       .globl SYM (__adddf3)
-       .globl SYM (__subdf3)
-       .globl SYM (__muldf3)
-       .globl SYM (__divdf3)
-       .globl SYM (__negdf2)
-       .globl SYM (__cmpdf2)
-       .globl SYM (__cmpdf2_internal)
-       .hidden SYM (__cmpdf2_internal)
-
-       .text
-       .even
-
-| These are common routines to return and signal exceptions.   
-
-Ld$den:
-| Return and signal a denormalized number
-       orl     d7,d0
-       movew   IMM (INEXACT_RESULT+UNDERFLOW),d7
-       moveq   IMM (DOUBLE_FLOAT),d6
-       PICJUMP $_exception_handler
-
-Ld$infty:
-Ld$overflow:
-| Return a properly signed INFINITY and set the exception flags 
-       movel   IMM (0x7ff00000),d0
-       movel   IMM (0),d1
-       orl     d7,d0
-       movew   IMM (INEXACT_RESULT+OVERFLOW),d7
-       moveq   IMM (DOUBLE_FLOAT),d6
-       PICJUMP $_exception_handler
-
-Ld$underflow:
-| Return 0 and set the exception flags 
-       movel   IMM (0),d0
-       movel   d0,d1
-       movew   IMM (INEXACT_RESULT+UNDERFLOW),d7
-       moveq   IMM (DOUBLE_FLOAT),d6
-       PICJUMP $_exception_handler
-
-Ld$inop:
-| Return a quiet NaN and set the exception flags
-       movel   IMM (QUIET_NaN),d0
-       movel   d0,d1
-       movew   IMM (INEXACT_RESULT+INVALID_OPERATION),d7
-       moveq   IMM (DOUBLE_FLOAT),d6
-       PICJUMP $_exception_handler
-
-Ld$div$0:
-| Return a properly signed INFINITY and set the exception flags
-       movel   IMM (0x7ff00000),d0
-       movel   IMM (0),d1
-       orl     d7,d0
-       movew   IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7
-       moveq   IMM (DOUBLE_FLOAT),d6
-       PICJUMP $_exception_handler
-
-|=============================================================================
-|=============================================================================
-|                         double precision routines
-|=============================================================================
-|=============================================================================
-
-| A double precision floating point number (double) has the format:
-|
-| struct _double {
-|  unsigned int sign      : 1;  /* sign bit */ 
-|  unsigned int exponent  : 11; /* exponent, shifted by 126 */
-|  unsigned int fraction  : 52; /* fraction */
-| } double;
-| 
-| Thus sizeof(double) = 8 (64 bits). 
-|
-| All the routines are callable from C programs, and return the result 
-| in the register pair d0-d1. They also preserve all registers except 
-| d0-d1 and a0-a1.
-
-|=============================================================================
-|                              __subdf3
-|=============================================================================
-
-| double __subdf3(double, double);
-       FUNC(__subdf3)
-SYM (__subdf3):
-       bchg    IMM (31),sp@(12) | change sign of second operand
-                               | and fall through, so we always add
-|=============================================================================
-|                              __adddf3
-|=============================================================================
-
-| double __adddf3(double, double);
-       FUNC(__adddf3)
-SYM (__adddf3):
-#ifndef __mcoldfire__
-       link    a6,IMM (0)      | everything will be done in registers
-       moveml  d2-d7,sp@-      | save all data registers and a2 (but d0-d1)
-#else
-       link    a6,IMM (-24)
-       moveml  d2-d7,sp@
-#endif
-       movel   a6@(8),d0       | get first operand
-       movel   a6@(12),d1      | 
-       movel   a6@(16),d2      | get second operand
-       movel   a6@(20),d3      | 
-
-       movel   d0,d7           | get d0's sign bit in d7 '
-       addl    d1,d1           | check and clear sign bit of a, and gain one
-       addxl   d0,d0           | bit of extra precision
-       beq     Ladddf$b        | if zero return second operand
-
-       movel   d2,d6           | save sign in d6 
-       addl    d3,d3           | get rid of sign bit and gain one bit of
-       addxl   d2,d2           | extra precision
-       beq     Ladddf$a        | if zero return first operand
-
-       andl    IMM (0x80000000),d7 | isolate a's sign bit '
-        swap   d6              | and also b's sign bit '
-#ifndef __mcoldfire__
-       andw    IMM (0x8000),d6 |
-       orw     d6,d7           | and combine them into d7, so that a's sign '
-                               | bit is in the high word and b's is in the '
-                               | low word, so d6 is free to be used
-#else
-       andl    IMM (0x8000),d6
-       orl     d6,d7
-#endif
-       movel   d7,a0           | now save d7 into a0, so d7 is free to
-                               | be used also
-
-| Get the exponents and check for denormalized and/or infinity.
-
-       movel   IMM (0x001fffff),d6 | mask for the fraction
-       movel   IMM (0x00200000),d7 | mask to put hidden bit back
-
-       movel   d0,d4           | 
-       andl    d6,d0           | get fraction in d0
-       notl    d6              | make d6 into mask for the exponent
-       andl    d6,d4           | get exponent in d4
-       beq     Ladddf$a$den    | branch if a is denormalized
-       cmpl    d6,d4           | check for INFINITY or NaN
-       beq     Ladddf$nf       | 
-       orl     d7,d0           | and put hidden bit back
-Ladddf$1:
-       swap    d4              | shift right exponent so that it starts
-#ifndef __mcoldfire__
-       lsrw    IMM (5),d4      | in bit 0 and not bit 20
-#else
-       lsrl    IMM (5),d4      | in bit 0 and not bit 20
-#endif
-| Now we have a's exponent in d4 and fraction in d0-d1 '
-       movel   d2,d5           | save b to get exponent
-       andl    d6,d5           | get exponent in d5
-       beq     Ladddf$b$den    | branch if b is denormalized
-       cmpl    d6,d5           | check for INFINITY or NaN
-       beq     Ladddf$nf
-       notl    d6              | make d6 into mask for the fraction again
-       andl    d6,d2           | and get fraction in d2
-       orl     d7,d2           | and put hidden bit back
-Ladddf$2:
-       swap    d5              | shift right exponent so that it starts
-#ifndef __mcoldfire__
-       lsrw    IMM (5),d5      | in bit 0 and not bit 20
-#else
-       lsrl    IMM (5),d5      | in bit 0 and not bit 20
-#endif
-
-| Now we have b's exponent in d5 and fraction in d2-d3. '
-
-| The situation now is as follows: the signs are combined in a0, the 
-| numbers are in d0-d1 (a) and d2-d3 (b), and the exponents in d4 (a)
-| and d5 (b). To do the rounding correctly we need to keep all the
-| bits until the end, so we need to use d0-d1-d2-d3 for the first number
-| and d4-d5-d6-d7 for the second. To do this we store (temporarily) the
-| exponents in a2-a3.
-
-#ifndef __mcoldfire__
-       moveml  a2-a3,sp@-      | save the address registers
-#else
-       movel   a2,sp@- 
-       movel   a3,sp@- 
-       movel   a4,sp@- 
-#endif
-
-       movel   d4,a2           | save the exponents
-       movel   d5,a3           | 
-
-       movel   IMM (0),d7      | and move the numbers around
-       movel   d7,d6           |
-       movel   d3,d5           |
-       movel   d2,d4           |
-       movel   d7,d3           |
-       movel   d7,d2           |
-
-| Here we shift the numbers until the exponents are the same, and put 
-| the largest exponent in a2.
-#ifndef __mcoldfire__
-       exg     d4,a2           | get exponents back
-       exg     d5,a3           |
-       cmpw    d4,d5           | compare the exponents
-#else
-       movel   d4,a4           | get exponents back
-       movel   a2,d4
-       movel   a4,a2
-       movel   d5,a4
-       movel   a3,d5
-       movel   a4,a3
-       cmpl    d4,d5           | compare the exponents
-#endif
-       beq     Ladddf$3        | if equal don't shift '
-       bhi     9f              | branch if second exponent is higher
-
-| Here we have a's exponent larger than b's, so we have to shift b. We do 
-| this by using as counter d2:
-1:     movew   d4,d2           | move largest exponent to d2
-#ifndef __mcoldfire__
-       subw    d5,d2           | and subtract second exponent
-       exg     d4,a2           | get back the longs we saved
-       exg     d5,a3           |
-#else
-       subl    d5,d2           | and subtract second exponent
-       movel   d4,a4           | get back the longs we saved
-       movel   a2,d4
-       movel   a4,a2
-       movel   d5,a4
-       movel   a3,d5
-       movel   a4,a3
-#endif
-| if difference is too large we don't shift (actually, we can just exit) '
-#ifndef __mcoldfire__
-       cmpw    IMM (DBL_MANT_DIG+2),d2
-#else
-       cmpl    IMM (DBL_MANT_DIG+2),d2
-#endif
-       bge     Ladddf$b$small
-#ifndef __mcoldfire__
-       cmpw    IMM (32),d2     | if difference >= 32, shift by longs
-#else
-       cmpl    IMM (32),d2     | if difference >= 32, shift by longs
-#endif
-       bge     5f
-2:
-#ifndef __mcoldfire__
-       cmpw    IMM (16),d2     | if difference >= 16, shift by words   
-#else
-       cmpl    IMM (16),d2     | if difference >= 16, shift by words   
-#endif
-       bge     6f
-       bra     3f              | enter dbra loop
-
-4:
-#ifndef __mcoldfire__
-       lsrl    IMM (1),d4
-       roxrl   IMM (1),d5
-       roxrl   IMM (1),d6
-       roxrl   IMM (1),d7
-#else
-       lsrl    IMM (1),d7
-       btst    IMM (0),d6
-       beq     10f
-       bset    IMM (31),d7
-10:    lsrl    IMM (1),d6
-       btst    IMM (0),d5
-       beq     11f
-       bset    IMM (31),d6
-11:    lsrl    IMM (1),d5
-       btst    IMM (0),d4
-       beq     12f
-       bset    IMM (31),d5
-12:    lsrl    IMM (1),d4
-#endif
-3:
-#ifndef __mcoldfire__
-       dbra    d2,4b
-#else
-       subql   IMM (1),d2
-       bpl     4b      
-#endif
-       movel   IMM (0),d2
-       movel   d2,d3   
-       bra     Ladddf$4
-5:
-       movel   d6,d7
-       movel   d5,d6
-       movel   d4,d5
-       movel   IMM (0),d4
-#ifndef __mcoldfire__
-       subw    IMM (32),d2
-#else
-       subl    IMM (32),d2
-#endif
-       bra     2b
-6:
-       movew   d6,d7
-       swap    d7
-       movew   d5,d6
-       swap    d6
-       movew   d4,d5
-       swap    d5
-       movew   IMM (0),d4
-       swap    d4
-#ifndef __mcoldfire__
-       subw    IMM (16),d2
-#else
-       subl    IMM (16),d2
-#endif
-       bra     3b
-       
-9:
-#ifndef __mcoldfire__
-       exg     d4,d5
-       movew   d4,d6
-       subw    d5,d6           | keep d5 (largest exponent) in d4
-       exg     d4,a2
-       exg     d5,a3
-#else
-       movel   d5,d6
-       movel   d4,d5
-       movel   d6,d4
-       subl    d5,d6
-       movel   d4,a4
-       movel   a2,d4
-       movel   a4,a2
-       movel   d5,a4
-       movel   a3,d5
-       movel   a4,a3
-#endif
-| if difference is too large we don't shift (actually, we can just exit) '
-#ifndef __mcoldfire__
-       cmpw    IMM (DBL_MANT_DIG+2),d6
-#else
-       cmpl    IMM (DBL_MANT_DIG+2),d6
-#endif
-       bge     Ladddf$a$small
-#ifndef __mcoldfire__
-       cmpw    IMM (32),d6     | if difference >= 32, shift by longs
-#else
-       cmpl    IMM (32),d6     | if difference >= 32, shift by longs
-#endif
-       bge     5f
-2:
-#ifndef __mcoldfire__
-       cmpw    IMM (16),d6     | if difference >= 16, shift by words   
-#else
-       cmpl    IMM (16),d6     | if difference >= 16, shift by words   
-#endif
-       bge     6f
-       bra     3f              | enter dbra loop
-
-4:
-#ifndef __mcoldfire__
-       lsrl    IMM (1),d0
-       roxrl   IMM (1),d1
-       roxrl   IMM (1),d2
-       roxrl   IMM (1),d3
-#else
-       lsrl    IMM (1),d3
-       btst    IMM (0),d2
-       beq     10f
-       bset    IMM (31),d3
-10:    lsrl    IMM (1),d2
-       btst    IMM (0),d1
-       beq     11f
-       bset    IMM (31),d2
-11:    lsrl    IMM (1),d1
-       btst    IMM (0),d0
-       beq     12f
-       bset    IMM (31),d1
-12:    lsrl    IMM (1),d0
-#endif
-3:
-#ifndef __mcoldfire__
-       dbra    d6,4b
-#else
-       subql   IMM (1),d6
-       bpl     4b
-#endif
-       movel   IMM (0),d7
-       movel   d7,d6
-       bra     Ladddf$4
-5:
-       movel   d2,d3
-       movel   d1,d2
-       movel   d0,d1
-       movel   IMM (0),d0
-#ifndef __mcoldfire__
-       subw    IMM (32),d6
-#else
-       subl    IMM (32),d6
-#endif
-       bra     2b
-6:
-       movew   d2,d3
-       swap    d3
-       movew   d1,d2
-       swap    d2
-       movew   d0,d1
-       swap    d1
-       movew   IMM (0),d0
-       swap    d0
-#ifndef __mcoldfire__
-       subw    IMM (16),d6
-#else
-       subl    IMM (16),d6
-#endif
-       bra     3b
-Ladddf$3:
-#ifndef __mcoldfire__
-       exg     d4,a2   
-       exg     d5,a3
-#else
-       movel   d4,a4
-       movel   a2,d4
-       movel   a4,a2
-       movel   d5,a4
-       movel   a3,d5
-       movel   a4,a3
-#endif
-Ladddf$4:      
-| Now we have the numbers in d0--d3 and d4--d7, the exponent in a2, and
-| the signs in a4.
-
-| Here we have to decide whether to add or subtract the numbers:
-#ifndef __mcoldfire__
-       exg     d7,a0           | get the signs 
-       exg     d6,a3           | a3 is free to be used
-#else
-       movel   d7,a4
-       movel   a0,d7
-       movel   a4,a0
-       movel   d6,a4
-       movel   a3,d6
-       movel   a4,a3
-#endif
-       movel   d7,d6           |
-       movew   IMM (0),d7      | get a's sign in d7 '
-       swap    d6              |
-       movew   IMM (0),d6      | and b's sign in d6 '
-       eorl    d7,d6           | compare the signs
-       bmi     Lsubdf$0        | if the signs are different we have 
-                               | to subtract
-#ifndef __mcoldfire__
-       exg     d7,a0           | else we add the numbers
-       exg     d6,a3           |
-#else
-       movel   d7,a4
-       movel   a0,d7
-       movel   a4,a0
-       movel   d6,a4
-       movel   a3,d6
-       movel   a4,a3
-#endif
-       addl    d7,d3           |
-       addxl   d6,d2           |
-       addxl   d5,d1           | 
-       addxl   d4,d0           |
-
-       movel   a2,d4           | return exponent to d4
-       movel   a0,d7           | 
-       andl    IMM (0x80000000),d7 | d7 now has the sign
-
-#ifndef __mcoldfire__
-       moveml  sp@+,a2-a3      
-#else
-       movel   sp@+,a4 
-       movel   sp@+,a3 
-       movel   sp@+,a2 
-#endif
-
-| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider
-| the case of denormalized numbers in the rounding routine itself).
-| As in the addition (not in the subtraction!) we could have set 
-| one more bit we check this:
-       btst    IMM (DBL_MANT_DIG+1),d0 
-       beq     1f
-#ifndef __mcoldfire__
-       lsrl    IMM (1),d0
-       roxrl   IMM (1),d1
-       roxrl   IMM (1),d2
-       roxrl   IMM (1),d3
-       addw    IMM (1),d4
-#else
-       lsrl    IMM (1),d3
-       btst    IMM (0),d2
-       beq     10f
-       bset    IMM (31),d3
-10:    lsrl    IMM (1),d2
-       btst    IMM (0),d1
-       beq     11f
-       bset    IMM (31),d2
-11:    lsrl    IMM (1),d1
-       btst    IMM (0),d0
-       beq     12f
-       bset    IMM (31),d1
-12:    lsrl    IMM (1),d0
-       addl    IMM (1),d4
-#endif
-1:
-       lea     pc@(Ladddf$5),a0 | to return from rounding routine
-       PICLEA  SYM (_fpCCR),a1 | check the rounding mode
-#ifdef __mcoldfire__
-       clrl    d6
-#endif
-       movew   a1@(6),d6       | rounding mode in d6
-       beq     Lround$to$nearest
-#ifndef __mcoldfire__
-       cmpw    IMM (ROUND_TO_PLUS),d6
-#else
-       cmpl    IMM (ROUND_TO_PLUS),d6
-#endif
-       bhi     Lround$to$minus
-       blt     Lround$to$zero
-       bra     Lround$to$plus
-Ladddf$5:
-| Put back the exponent and check for overflow
-#ifndef __mcoldfire__
-       cmpw    IMM (0x7ff),d4  | is the exponent big?
-#else
-       cmpl    IMM (0x7ff),d4  | is the exponent big?
-#endif
-       bge     1f
-       bclr    IMM (DBL_MANT_DIG-1),d0
-#ifndef __mcoldfire__
-       lslw    IMM (4),d4      | put exponent back into position
-#else
-       lsll    IMM (4),d4      | put exponent back into position
-#endif
-       swap    d0              | 
-#ifndef __mcoldfire__
-       orw     d4,d0           |
-#else
-       orl     d4,d0           |
-#endif
-       swap    d0              |
-       bra     Ladddf$ret
-1:
-       moveq   IMM (ADD),d5
-       bra     Ld$overflow
-
-Lsubdf$0:
-| Here we do the subtraction.
-#ifndef __mcoldfire__
-       exg     d7,a0           | put sign back in a0
-       exg     d6,a3           |
-#else
-       movel   d7,a4
-       movel   a0,d7
-       movel   a4,a0
-       movel   d6,a4
-       movel   a3,d6
-       movel   a4,a3
-#endif
-       subl    d7,d3           |
-       subxl   d6,d2           |
-       subxl   d5,d1           |
-       subxl   d4,d0           |
-       beq     Ladddf$ret$1    | if zero just exit
-       bpl     1f              | if positive skip the following
-       movel   a0,d7           |
-       bchg    IMM (31),d7     | change sign bit in d7
-       movel   d7,a0           |
-       negl    d3              |
-       negxl   d2              |
-       negxl   d1              | and negate result
-       negxl   d0              |
-1:     
-       movel   a2,d4           | return exponent to d4
-       movel   a0,d7
-       andl    IMM (0x80000000),d7 | isolate sign bit
-#ifndef __mcoldfire__
-       moveml  sp@+,a2-a3      |
-#else
-       movel   sp@+,a4
-       movel   sp@+,a3
-       movel   sp@+,a2
-#endif
-
-| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider
-| the case of denormalized numbers in the rounding routine itself).
-| As in the addition (not in the subtraction!) we could have set 
-| one more bit we check this:
-       btst    IMM (DBL_MANT_DIG+1),d0 
-       beq     1f
-#ifndef __mcoldfire__
-       lsrl    IMM (1),d0
-       roxrl   IMM (1),d1
-       roxrl   IMM (1),d2
-       roxrl   IMM (1),d3
-       addw    IMM (1),d4
-#else
-       lsrl    IMM (1),d3
-       btst    IMM (0),d2
-       beq     10f
-       bset    IMM (31),d3
-10:    lsrl    IMM (1),d2
-       btst    IMM (0),d1
-       beq     11f
-       bset    IMM (31),d2
-11:    lsrl    IMM (1),d1
-       btst    IMM (0),d0
-       beq     12f
-       bset    IMM (31),d1
-12:    lsrl    IMM (1),d0
-       addl    IMM (1),d4
-#endif
-1:
-       lea     pc@(Lsubdf$1),a0 | to return from rounding routine
-       PICLEA  SYM (_fpCCR),a1 | check the rounding mode
-#ifdef __mcoldfire__
-       clrl    d6
-#endif
-       movew   a1@(6),d6       | rounding mode in d6
-       beq     Lround$to$nearest
-#ifndef __mcoldfire__
-       cmpw    IMM (ROUND_TO_PLUS),d6
-#else
-       cmpl    IMM (ROUND_TO_PLUS),d6
-#endif
-       bhi     Lround$to$minus
-       blt     Lround$to$zero
-       bra     Lround$to$plus
-Lsubdf$1:
-| Put back the exponent and sign (we don't have overflow). '
-       bclr    IMM (DBL_MANT_DIG-1),d0 
-#ifndef __mcoldfire__
-       lslw    IMM (4),d4      | put exponent back into position
-#else
-       lsll    IMM (4),d4      | put exponent back into position
-#endif
-       swap    d0              | 
-#ifndef __mcoldfire__
-       orw     d4,d0           |
-#else
-       orl     d4,d0           |
-#endif
-       swap    d0              |
-       bra     Ladddf$ret
-
-| If one of the numbers was too small (difference of exponents >= 
-| DBL_MANT_DIG+1) we return the other (and now we don't have to '
-| check for finiteness or zero).
-Ladddf$a$small:
-#ifndef __mcoldfire__
-       moveml  sp@+,a2-a3      
-#else
-       movel   sp@+,a4
-       movel   sp@+,a3
-       movel   sp@+,a2
-#endif
-       movel   a6@(16),d0
-       movel   a6@(20),d1
-       PICLEA  SYM (_fpCCR),a0
-       movew   IMM (0),a0@
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7      | restore data registers
-#else
-       moveml  sp@,d2-d7
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6              | and return
-       rts
-
-Ladddf$b$small:
-#ifndef __mcoldfire__
-       moveml  sp@+,a2-a3      
-#else
-       movel   sp@+,a4 
-       movel   sp@+,a3 
-       movel   sp@+,a2 
-#endif
-       movel   a6@(8),d0
-       movel   a6@(12),d1
-       PICLEA  SYM (_fpCCR),a0
-       movew   IMM (0),a0@
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7      | restore data registers
-#else
-       moveml  sp@,d2-d7
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6              | and return
-       rts
-
-Ladddf$a$den:
-       movel   d7,d4           | d7 contains 0x00200000
-       bra     Ladddf$1
-
-Ladddf$b$den:
-       movel   d7,d5           | d7 contains 0x00200000
-       notl    d6
-       bra     Ladddf$2
-
-Ladddf$b:
-| Return b (if a is zero)
-       movel   d2,d0
-       movel   d3,d1
-       bne     1f                      | Check if b is -0
-       cmpl    IMM (0x80000000),d0
-       bne     1f
-       andl    IMM (0x80000000),d7     | Use the sign of a
-       clrl    d0
-       bra     Ladddf$ret
-Ladddf$a:
-       movel   a6@(8),d0
-       movel   a6@(12),d1
-1:
-       moveq   IMM (ADD),d5
-| Check for NaN and +/-INFINITY.
-       movel   d0,d7                   |
-       andl    IMM (0x80000000),d7     |
-       bclr    IMM (31),d0             |
-       cmpl    IMM (0x7ff00000),d0     |
-       bge     2f                      |
-       movel   d0,d0                   | check for zero, since we don't  '
-       bne     Ladddf$ret              | want to return -0 by mistake
-       bclr    IMM (31),d7             |
-       bra     Ladddf$ret              |
-2:
-       andl    IMM (0x000fffff),d0     | check for NaN (nonzero fraction)
-       orl     d1,d0                   |
-       bne     Ld$inop                 |
-       bra     Ld$infty                |
-       
-Ladddf$ret$1:
-#ifndef __mcoldfire__
-       moveml  sp@+,a2-a3      | restore regs and exit
-#else
-       movel   sp@+,a4
-       movel   sp@+,a3
-       movel   sp@+,a2
-#endif
-
-Ladddf$ret:
-| Normal exit.
-       PICLEA  SYM (_fpCCR),a0
-       movew   IMM (0),a0@
-       orl     d7,d0           | put sign bit back
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7
-#else
-       moveml  sp@,d2-d7
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6
-       rts
-
-Ladddf$ret$den:
-| Return a denormalized number.
-#ifndef __mcoldfire__
-       lsrl    IMM (1),d0      | shift right once more
-       roxrl   IMM (1),d1      |
-#else
-       lsrl    IMM (1),d1
-       btst    IMM (0),d0
-       beq     10f
-       bset    IMM (31),d1
-10:    lsrl    IMM (1),d0
-#endif
-       bra     Ladddf$ret
-
-Ladddf$nf:
-       moveq   IMM (ADD),d5
-| This could be faster but it is not worth the effort, since it is not
-| executed very often. We sacrifice speed for clarity here.
-       movel   a6@(8),d0       | get the numbers back (remember that we
-       movel   a6@(12),d1      | did some processing already)
-       movel   a6@(16),d2      | 
-       movel   a6@(20),d3      | 
-       movel   IMM (0x7ff00000),d4 | useful constant (INFINITY)
-       movel   d0,d7           | save sign bits
-       movel   d2,d6           | 
-       bclr    IMM (31),d0     | clear sign bits
-       bclr    IMM (31),d2     | 
-| We know that one of them is either NaN of +/-INFINITY
-| Check for NaN (if either one is NaN return NaN)
-       cmpl    d4,d0           | check first a (d0)
-       bhi     Ld$inop         | if d0 > 0x7ff00000 or equal and
-       bne     2f
-       tstl    d1              | d1 > 0, a is NaN
-       bne     Ld$inop         | 
-2:     cmpl    d4,d2           | check now b (d1)
-       bhi     Ld$inop         | 
-       bne     3f
-       tstl    d3              | 
-       bne     Ld$inop         | 
-3:
-| Now comes the check for +/-INFINITY. We know that both are (maybe not
-| finite) numbers, but we have to check if both are infinite whether we
-| are adding or subtracting them.
-       eorl    d7,d6           | to check sign bits
-       bmi     1f
-       andl    IMM (0x80000000),d7 | get (common) sign bit
-       bra     Ld$infty
-1:
-| We know one (or both) are infinite, so we test for equality between the
-| two numbers (if they are equal they have to be infinite both, so we
-| return NaN).
-       cmpl    d2,d0           | are both infinite?
-       bne     1f              | if d0 <> d2 they are not equal
-       cmpl    d3,d1           | if d0 == d2 test d3 and d1
-       beq     Ld$inop         | if equal return NaN
-1:     
-       andl    IMM (0x80000000),d7 | get a's sign bit '
-       cmpl    d4,d0           | test now for infinity
-       beq     Ld$infty        | if a is INFINITY return with this sign
-       bchg    IMM (31),d7     | else we know b is INFINITY and has
-       bra     Ld$infty        | the opposite sign
-
-|=============================================================================
-|                              __muldf3
-|=============================================================================
-
-| double __muldf3(double, double);
-       FUNC(__muldf3)
-SYM (__muldf3):
-#ifndef __mcoldfire__
-       link    a6,IMM (0)
-       moveml  d2-d7,sp@-
-#else
-       link    a6,IMM (-24)
-       moveml  d2-d7,sp@
-#endif
-       movel   a6@(8),d0               | get a into d0-d1
-       movel   a6@(12),d1              | 
-       movel   a6@(16),d2              | and b into d2-d3
-       movel   a6@(20),d3              |
-       movel   d0,d7                   | d7 will hold the sign of the product
-       eorl    d2,d7                   |
-       andl    IMM (0x80000000),d7     |
-       movel   d7,a0                   | save sign bit into a0 
-       movel   IMM (0x7ff00000),d7     | useful constant (+INFINITY)
-       movel   d7,d6                   | another (mask for fraction)
-       notl    d6                      |
-       bclr    IMM (31),d0             | get rid of a's sign bit '
-       movel   d0,d4                   | 
-       orl     d1,d4                   | 
-       beq     Lmuldf$a$0              | branch if a is zero
-       movel   d0,d4                   |
-       bclr    IMM (31),d2             | get rid of b's sign bit '
-       movel   d2,d5                   |
-       orl     d3,d5                   | 
-       beq     Lmuldf$b$0              | branch if b is zero
-       movel   d2,d5                   | 
-       cmpl    d7,d0                   | is a big?
-       bhi     Lmuldf$inop             | if a is NaN return NaN
-       beq     Lmuldf$a$nf             | we still have to check d1 and b ...
-       cmpl    d7,d2                   | now compare b with INFINITY
-       bhi     Lmuldf$inop             | is b NaN?
-       beq     Lmuldf$b$nf             | we still have to check d3 ...
-| Here we have both numbers finite and nonzero (and with no sign bit).
-| Now we get the exponents into d4 and d5.
-       andl    d7,d4                   | isolate exponent in d4
-       beq     Lmuldf$a$den            | if exponent zero, have denormalized
-       andl    d6,d0                   | isolate fraction
-       orl     IMM (0x00100000),d0     | and put hidden bit back
-       swap    d4                      | I like exponents in the first byte
-#ifndef __mcoldfire__
-       lsrw    IMM (4),d4              | 
-#else
-       lsrl    IMM (4),d4              | 
-#endif
-Lmuldf$1:                      
-       andl    d7,d5                   |
-       beq     Lmuldf$b$den            |
-       andl    d6,d2                   |
-       orl     IMM (0x00100000),d2     | and put hidden bit back
-       swap    d5                      |
-#ifndef __mcoldfire__
-       lsrw    IMM (4),d5              |
-#else
-       lsrl    IMM (4),d5              |
-#endif
-Lmuldf$2:                              |
-#ifndef __mcoldfire__
-       addw    d5,d4                   | add exponents
-       subw    IMM (D_BIAS+1),d4       | and subtract bias (plus one)
-#else
-       addl    d5,d4                   | add exponents
-       subl    IMM (D_BIAS+1),d4       | and subtract bias (plus one)
-#endif
-
-| We are now ready to do the multiplication. The situation is as follows:
-| both a and b have bit 52 ( bit 20 of d0 and d2) set (even if they were 
-| denormalized to start with!), which means that in the product bit 104 
-| (which will correspond to bit 8 of the fourth long) is set.
-
-| Here we have to do the product.
-| To do it we have to juggle the registers back and forth, as there are not
-| enough to keep everything in them. So we use the address registers to keep
-| some intermediate data.
-
-#ifndef __mcoldfire__
-       moveml  a2-a3,sp@-      | save a2 and a3 for temporary use
-#else
-       movel   a2,sp@-
-       movel   a3,sp@-
-       movel   a4,sp@-
-#endif
-       movel   IMM (0),a2      | a2 is a null register
-       movel   d4,a3           | and a3 will preserve the exponent
-
-| First, shift d2-d3 so bit 20 becomes bit 31:
-#ifndef __mcoldfire__
-       rorl    IMM (5),d2      | rotate d2 5 places right
-       swap    d2              | and swap it
-       rorl    IMM (5),d3      | do the same thing with d3
-       swap    d3              |
-       movew   d3,d6           | get the rightmost 11 bits of d3
-       andw    IMM (0x07ff),d6 |
-       orw     d6,d2           | and put them into d2
-       andw    IMM (0xf800),d3 | clear those bits in d3
-#else
-       moveq   IMM (11),d7     | left shift d2 11 bits
-       lsll    d7,d2
-       movel   d3,d6           | get a copy of d3
-       lsll    d7,d3           | left shift d3 11 bits
-       andl    IMM (0xffe00000),d6 | get the top 11 bits of d3
-       moveq   IMM (21),d7     | right shift them 21 bits
-       lsrl    d7,d6
-       orl     d6,d2           | stick them at the end of d2
-#endif
-
-       movel   d2,d6           | move b into d6-d7
-       movel   d3,d7           | move a into d4-d5
-       movel   d0,d4           | and clear d0-d1-d2-d3 (to put result)
-       movel   d1,d5           |
-       movel   IMM (0),d3      |
-       movel   d3,d2           |
-       movel   d3,d1           |
-       movel   d3,d0           |
-
-| We use a1 as counter:        
-       movel   IMM (DBL_MANT_DIG-1),a1         
-#ifndef __mcoldfire__
-       exg     d7,a1
-#else
-       movel   d7,a4
-       movel   a1,d7
-       movel   a4,a1
-#endif
-
-1:
-#ifndef __mcoldfire__
-       exg     d7,a1           | put counter back in a1
-#else
-       movel   d7,a4
-       movel   a1,d7
-       movel   a4,a1
-#endif
-       addl    d3,d3           | shift sum once left
-       addxl   d2,d2           |
-       addxl   d1,d1           |
-       addxl   d0,d0           |
-       addl    d7,d7           |
-       addxl   d6,d6           |
-       bcc     2f              | if bit clear skip the following
-#ifndef __mcoldfire__
-       exg     d7,a2           |
-#else
-       movel   d7,a4
-       movel   a2,d7
-       movel   a4,a2
-#endif
-       addl    d5,d3           | else add a to the sum
-       addxl   d4,d2           |
-       addxl   d7,d1           |
-       addxl   d7,d0           |
-#ifndef __mcoldfire__
-       exg     d7,a2           | 
-#else
-       movel   d7,a4
-       movel   a2,d7
-       movel   a4,a2
-#endif
-2:
-#ifndef __mcoldfire__
-       exg     d7,a1           | put counter in d7
-       dbf     d7,1b           | decrement and branch
-#else
-       movel   d7,a4
-       movel   a1,d7
-       movel   a4,a1
-       subql   IMM (1),d7
-       bpl     1b
-#endif
-
-       movel   a3,d4           | restore exponent
-#ifndef __mcoldfire__
-       moveml  sp@+,a2-a3
-#else
-       movel   sp@+,a4
-       movel   sp@+,a3
-       movel   sp@+,a2
-#endif
-
-| Now we have the product in d0-d1-d2-d3, with bit 8 of d0 set. The 
-| first thing to do now is to normalize it so bit 8 becomes bit 
-| DBL_MANT_DIG-32 (to do the rounding); later we will shift right.
-       swap    d0
-       swap    d1
-       movew   d1,d0
-       swap    d2
-       movew   d2,d1
-       swap    d3
-       movew   d3,d2
-       movew   IMM (0),d3
-#ifndef __mcoldfire__
-       lsrl    IMM (1),d0
-       roxrl   IMM (1),d1
-       roxrl   IMM (1),d2
-       roxrl   IMM (1),d3
-       lsrl    IMM (1),d0
-       roxrl   IMM (1),d1
-       roxrl   IMM (1),d2
-       roxrl   IMM (1),d3
-       lsrl    IMM (1),d0
-       roxrl   IMM (1),d1
-       roxrl   IMM (1),d2
-       roxrl   IMM (1),d3
-#else
-       moveq   IMM (29),d6
-       lsrl    IMM (3),d3
-       movel   d2,d7
-       lsll    d6,d7
-       orl     d7,d3
-       lsrl    IMM (3),d2
-       movel   d1,d7
-       lsll    d6,d7
-       orl     d7,d2
-       lsrl    IMM (3),d1
-       movel   d0,d7
-       lsll    d6,d7
-       orl     d7,d1
-       lsrl    IMM (3),d0
-#endif
-       
-| Now round, check for over- and underflow, and exit.
-       movel   a0,d7           | get sign bit back into d7
-       moveq   IMM (MULTIPLY),d5
-
-       btst    IMM (DBL_MANT_DIG+1-32),d0
-       beq     Lround$exit
-#ifndef __mcoldfire__
-       lsrl    IMM (1),d0
-       roxrl   IMM (1),d1
-       addw    IMM (1),d4
-#else
-       lsrl    IMM (1),d1
-       btst    IMM (0),d0
-       beq     10f
-       bset    IMM (31),d1
-10:    lsrl    IMM (1),d0
-       addl    IMM (1),d4
-#endif
-       bra     Lround$exit
-
-Lmuldf$inop:
-       moveq   IMM (MULTIPLY),d5
-       bra     Ld$inop
-
-Lmuldf$b$nf:
-       moveq   IMM (MULTIPLY),d5
-       movel   a0,d7           | get sign bit back into d7
-       tstl    d3              | we know d2 == 0x7ff00000, so check d3
-       bne     Ld$inop         | if d3 <> 0 b is NaN
-       bra     Ld$overflow     | else we have overflow (since a is finite)
-
-Lmuldf$a$nf:
-       moveq   IMM (MULTIPLY),d5
-       movel   a0,d7           | get sign bit back into d7
-       tstl    d1              | we know d0 == 0x7ff00000, so check d1
-       bne     Ld$inop         | if d1 <> 0 a is NaN
-       bra     Ld$overflow     | else signal overflow
-
-| If either number is zero return zero, unless the other is +/-INFINITY or
-| NaN, in which case we return NaN.
-Lmuldf$b$0:
-       moveq   IMM (MULTIPLY),d5
-#ifndef __mcoldfire__
-       exg     d2,d0           | put b (==0) into d0-d1
-       exg     d3,d1           | and a (with sign bit cleared) into d2-d3
-       movel   a0,d0           | set result sign
-#else
-       movel   d0,d2           | put a into d2-d3
-       movel   d1,d3
-       movel   a0,d0           | put result zero into d0-d1
-       movq    IMM(0),d1
-#endif
-       bra     1f
-Lmuldf$a$0:
-       movel   a0,d0           | set result sign
-       movel   a6@(16),d2      | put b into d2-d3 again
-       movel   a6@(20),d3      |
-       bclr    IMM (31),d2     | clear sign bit
-1:     cmpl    IMM (0x7ff00000),d2 | check for non-finiteness
-       bge     Ld$inop         | in case NaN or +/-INFINITY return NaN
-       PICLEA  SYM (_fpCCR),a0
-       movew   IMM (0),a0@
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7
-#else
-       moveml  sp@,d2-d7
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6
-       rts
-
-| If a number is denormalized we put an exponent of 1 but do not put the 
-| hidden bit back into the fraction; instead we shift left until bit 21
-| (the hidden bit) is set, adjusting the exponent accordingly. We do this
-| to ensure that the product of the fractions is close to 1.
-Lmuldf$a$den:
-       movel   IMM (1),d4
-       andl    d6,d0
-1:     addl    d1,d1           | shift a left until bit 20 is set
-       addxl   d0,d0           |
-#ifndef __mcoldfire__
-       subw    IMM (1),d4      | and adjust exponent
-#else
-       subl    IMM (1),d4      | and adjust exponent
-#endif
-       btst    IMM (20),d0     |
-       bne     Lmuldf$1        |
-       bra     1b
-
-Lmuldf$b$den:
-       movel   IMM (1),d5
-       andl    d6,d2
-1:     addl    d3,d3           | shift b left until bit 20 is set
-       addxl   d2,d2           |
-#ifndef __mcoldfire__
-       subw    IMM (1),d5      | and adjust exponent
-#else
-       subql   IMM (1),d5      | and adjust exponent
-#endif
-       btst    IMM (20),d2     |
-       bne     Lmuldf$2        |
-       bra     1b
-
-
-|=============================================================================
-|                              __divdf3
-|=============================================================================
-
-| double __divdf3(double, double);
-       FUNC(__divdf3)
-SYM (__divdf3):
-#ifndef __mcoldfire__
-       link    a6,IMM (0)
-       moveml  d2-d7,sp@-
-#else
-       link    a6,IMM (-24)
-       moveml  d2-d7,sp@
-#endif
-       movel   a6@(8),d0       | get a into d0-d1
-       movel   a6@(12),d1      | 
-       movel   a6@(16),d2      | and b into d2-d3
-       movel   a6@(20),d3      |
-       movel   d0,d7           | d7 will hold the sign of the result
-       eorl    d2,d7           |
-       andl    IMM (0x80000000),d7
-       movel   d7,a0           | save sign into a0
-       movel   IMM (0x7ff00000),d7 | useful constant (+INFINITY)
-       movel   d7,d6           | another (mask for fraction)
-       notl    d6              |
-       bclr    IMM (31),d0     | get rid of a's sign bit '
-       movel   d0,d4           |
-       orl     d1,d4           |
-       beq     Ldivdf$a$0      | branch if a is zero
-       movel   d0,d4           |
-       bclr    IMM (31),d2     | get rid of b's sign bit '
-       movel   d2,d5           |
-       orl     d3,d5           |
-       beq     Ldivdf$b$0      | branch if b is zero
-       movel   d2,d5
-       cmpl    d7,d0           | is a big?
-       bhi     Ldivdf$inop     | if a is NaN return NaN
-       beq     Ldivdf$a$nf     | if d0 == 0x7ff00000 we check d1
-       cmpl    d7,d2           | now compare b with INFINITY 
-       bhi     Ldivdf$inop     | if b is NaN return NaN
-       beq     Ldivdf$b$nf     | if d2 == 0x7ff00000 we check d3
-| Here we have both numbers finite and nonzero (and with no sign bit).
-| Now we get the exponents into d4 and d5 and normalize the numbers to
-| ensure that the ratio of the fractions is around 1. We do this by
-| making sure that both numbers have bit #DBL_MANT_DIG-32-1 (hidden bit)
-| set, even if they were denormalized to start with.
-| Thus, the result will satisfy: 2 > result > 1/2.
-       andl    d7,d4           | and isolate exponent in d4
-       beq     Ldivdf$a$den    | if exponent is zero we have a denormalized
-       andl    d6,d0           | and isolate fraction
-       orl     IMM (0x00100000),d0 | and put hidden bit back
-       swap    d4              | I like exponents in the first byte
-#ifndef __mcoldfire__
-       lsrw    IMM (4),d4      | 
-#else
-       lsrl    IMM (4),d4      | 
-#endif
-Ldivdf$1:                      | 
-       andl    d7,d5           |
-       beq     Ldivdf$b$den    |
-       andl    d6,d2           |
-       orl     IMM (0x00100000),d2
-       swap    d5              |
-#ifndef __mcoldfire__
-       lsrw    IMM (4),d5      |
-#else
-       lsrl    IMM (4),d5      |
-#endif
-Ldivdf$2:                      |
-#ifndef __mcoldfire__
-       subw    d5,d4           | subtract exponents
-       addw    IMM (D_BIAS),d4 | and add bias
-#else
-       subl    d5,d4           | subtract exponents
-       addl    IMM (D_BIAS),d4 | and add bias
-#endif
-
-| We are now ready to do the division. We have prepared things in such a way
-| that the ratio of the fractions will be less than 2 but greater than 1/2.
-| At this point the registers in use are:
-| d0-d1        hold a (first operand, bit DBL_MANT_DIG-32=0, bit 
-| DBL_MANT_DIG-1-32=1)
-| d2-d3        hold b (second operand, bit DBL_MANT_DIG-32=1)
-| d4   holds the difference of the exponents, corrected by the bias
-| a0   holds the sign of the ratio
-
-| To do the rounding correctly we need to keep information about the
-| nonsignificant bits. One way to do this would be to do the division
-| using four registers; another is to use two registers (as originally
-| I did), but use a sticky bit to preserve information about the 
-| fractional part. Note that we can keep that info in a1, which is not
-| used.
-       movel   IMM (0),d6      | d6-d7 will hold the result
-       movel   d6,d7           | 
-       movel   IMM (0),a1      | and a1 will hold the sticky bit
-
-       movel   IMM (DBL_MANT_DIG-32+1),d5      
-       
-1:     cmpl    d0,d2           | is a < b?
-       bhi     3f              | if b > a skip the following
-       beq     4f              | if d0==d2 check d1 and d3
-2:     subl    d3,d1           | 
-       subxl   d2,d0           | a <-- a - b
-       bset    d5,d6           | set the corresponding bit in d6
-3:     addl    d1,d1           | shift a by 1
-       addxl   d0,d0           |
-#ifndef __mcoldfire__
-       dbra    d5,1b           | and branch back
-#else
-       subql   IMM (1), d5
-       bpl     1b
-#endif
-       bra     5f                      
-4:     cmpl    d1,d3           | here d0==d2, so check d1 and d3
-       bhi     3b              | if d1 > d2 skip the subtraction
-       bra     2b              | else go do it
-5:
-| Here we have to start setting the bits in the second long.
-       movel   IMM (31),d5     | again d5 is counter
-
-1:     cmpl    d0,d2           | is a < b?
-       bhi     3f              | if b > a skip the following
-       beq     4f              | if d0==d2 check d1 and d3
-2:     subl    d3,d1           | 
-       subxl   d2,d0           | a <-- a - b
-       bset    d5,d7           | set the corresponding bit in d7
-3:     addl    d1,d1           | shift a by 1
-       addxl   d0,d0           |
-#ifndef __mcoldfire__
-       dbra    d5,1b           | and branch back
-#else
-       subql   IMM (1), d5
-       bpl     1b
-#endif
-       bra     5f                      
-4:     cmpl    d1,d3           | here d0==d2, so check d1 and d3
-       bhi     3b              | if d1 > d2 skip the subtraction
-       bra     2b              | else go do it
-5:
-| Now go ahead checking until we hit a one, which we store in d2.
-       movel   IMM (DBL_MANT_DIG),d5
-1:     cmpl    d2,d0           | is a < b?
-       bhi     4f              | if b < a, exit
-       beq     3f              | if d0==d2 check d1 and d3
-2:     addl    d1,d1           | shift a by 1
-       addxl   d0,d0           |
-#ifndef __mcoldfire__
-       dbra    d5,1b           | and branch back
-#else
-       subql   IMM (1), d5
-       bpl     1b
-#endif
-       movel   IMM (0),d2      | here no sticky bit was found
-       movel   d2,d3
-       bra     5f                      
-3:     cmpl    d1,d3           | here d0==d2, so check d1 and d3
-       bhi     2b              | if d1 > d2 go back
-4:
-| Here put the sticky bit in d2-d3 (in the position which actually corresponds
-| to it; if you don't do this the algorithm loses in some cases). '
-       movel   IMM (0),d2
-       movel   d2,d3
-#ifndef __mcoldfire__
-       subw    IMM (DBL_MANT_DIG),d5
-       addw    IMM (63),d5
-       cmpw    IMM (31),d5
-#else
-       subl    IMM (DBL_MANT_DIG),d5
-       addl    IMM (63),d5
-       cmpl    IMM (31),d5
-#endif
-       bhi     2f
-1:     bset    d5,d3
-       bra     5f
-#ifndef __mcoldfire__
-       subw    IMM (32),d5
-#else
-       subl    IMM (32),d5
-#endif
-2:     bset    d5,d2
-5:
-| Finally we are finished! Move the longs in the address registers to
-| their final destination:
-       movel   d6,d0
-       movel   d7,d1
-       movel   IMM (0),d3
-
-| Here we have finished the division, with the result in d0-d1-d2-d3, with
-| 2^21 <= d6 < 2^23. Thus bit 23 is not set, but bit 22 could be set.
-| If it is not, then definitely bit 21 is set. Normalize so bit 22 is
-| not set:
-       btst    IMM (DBL_MANT_DIG-32+1),d0
-       beq     1f
-#ifndef __mcoldfire__
-       lsrl    IMM (1),d0
-       roxrl   IMM (1),d1
-       roxrl   IMM (1),d2
-       roxrl   IMM (1),d3
-       addw    IMM (1),d4
-#else
-       lsrl    IMM (1),d3
-       btst    IMM (0),d2
-       beq     10f
-       bset    IMM (31),d3
-10:    lsrl    IMM (1),d2
-       btst    IMM (0),d1
-       beq     11f
-       bset    IMM (31),d2
-11:    lsrl    IMM (1),d1
-       btst    IMM (0),d0
-       beq     12f
-       bset    IMM (31),d1
-12:    lsrl    IMM (1),d0
-       addl    IMM (1),d4
-#endif
-1:
-| Now round, check for over- and underflow, and exit.
-       movel   a0,d7           | restore sign bit to d7
-       moveq   IMM (DIVIDE),d5
-       bra     Lround$exit
-
-Ldivdf$inop:
-       moveq   IMM (DIVIDE),d5
-       bra     Ld$inop
-
-Ldivdf$a$0:
-| If a is zero check to see whether b is zero also. In that case return
-| NaN; then check if b is NaN, and return NaN also in that case. Else
-| return a properly signed zero.
-       moveq   IMM (DIVIDE),d5
-       bclr    IMM (31),d2     |
-       movel   d2,d4           | 
-       orl     d3,d4           | 
-       beq     Ld$inop         | if b is also zero return NaN
-       cmpl    IMM (0x7ff00000),d2 | check for NaN
-       bhi     Ld$inop         | 
-       blt     1f              |
-       tstl    d3              |
-       bne     Ld$inop         |
-1:     movel   a0,d0           | else return signed zero
-       moveq   IMM(0),d1       | 
-       PICLEA  SYM (_fpCCR),a0 | clear exception flags
-       movew   IMM (0),a0@     |
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7      | 
-#else
-       moveml  sp@,d2-d7       | 
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6              | 
-       rts                     |       
-
-Ldivdf$b$0:
-       moveq   IMM (DIVIDE),d5
-| If we got here a is not zero. Check if a is NaN; in that case return NaN,
-| else return +/-INFINITY. Remember that a is in d0 with the sign bit 
-| cleared already.
-       movel   a0,d7           | put a's sign bit back in d7 '
-       cmpl    IMM (0x7ff00000),d0 | compare d0 with INFINITY
-       bhi     Ld$inop         | if larger it is NaN
-       tstl    d1              | 
-       bne     Ld$inop         | 
-       bra     Ld$div$0        | else signal DIVIDE_BY_ZERO
-
-Ldivdf$b$nf:
-       moveq   IMM (DIVIDE),d5
-| If d2 == 0x7ff00000 we have to check d3.
-       tstl    d3              |
-       bne     Ld$inop         | if d3 <> 0, b is NaN
-       bra     Ld$underflow    | else b is +/-INFINITY, so signal underflow
-
-Ldivdf$a$nf:
-       moveq   IMM (DIVIDE),d5
-| If d0 == 0x7ff00000 we have to check d1.
-       tstl    d1              |
-       bne     Ld$inop         | if d1 <> 0, a is NaN
-| If a is INFINITY we have to check b
-       cmpl    d7,d2           | compare b with INFINITY 
-       bge     Ld$inop         | if b is NaN or INFINITY return NaN
-       tstl    d3              |
-       bne     Ld$inop         | 
-       bra     Ld$overflow     | else return overflow
-
-| If a number is denormalized we put an exponent of 1 but do not put the 
-| bit back into the fraction.
-Ldivdf$a$den:
-       movel   IMM (1),d4
-       andl    d6,d0
-1:     addl    d1,d1           | shift a left until bit 20 is set
-       addxl   d0,d0
-#ifndef __mcoldfire__
-       subw    IMM (1),d4      | and adjust exponent
-#else
-       subl    IMM (1),d4      | and adjust exponent
-#endif
-       btst    IMM (DBL_MANT_DIG-32-1),d0
-       bne     Ldivdf$1
-       bra     1b
-
-Ldivdf$b$den:
-       movel   IMM (1),d5
-       andl    d6,d2
-1:     addl    d3,d3           | shift b left until bit 20 is set
-       addxl   d2,d2
-#ifndef __mcoldfire__
-       subw    IMM (1),d5      | and adjust exponent
-#else
-       subql   IMM (1),d5      | and adjust exponent
-#endif
-       btst    IMM (DBL_MANT_DIG-32-1),d2
-       bne     Ldivdf$2
-       bra     1b
-
-Lround$exit:
-| This is a common exit point for __muldf3 and __divdf3. When they enter
-| this point the sign of the result is in d7, the result in d0-d1, normalized
-| so that 2^21 <= d0 < 2^22, and the exponent is in the lower byte of d4.
-
-| First check for underlow in the exponent:
-#ifndef __mcoldfire__
-       cmpw    IMM (-DBL_MANT_DIG-1),d4                
-#else
-       cmpl    IMM (-DBL_MANT_DIG-1),d4                
-#endif
-       blt     Ld$underflow    
-| It could happen that the exponent is less than 1, in which case the 
-| number is denormalized. In this case we shift right and adjust the 
-| exponent until it becomes 1 or the fraction is zero (in the latter case 
-| we signal underflow and return zero).
-       movel   d7,a0           |
-       movel   IMM (0),d6      | use d6-d7 to collect bits flushed right
-       movel   d6,d7           | use d6-d7 to collect bits flushed right
-#ifndef __mcoldfire__
-       cmpw    IMM (1),d4      | if the exponent is less than 1 we 
-#else
-       cmpl    IMM (1),d4      | if the exponent is less than 1 we 
-#endif
-       bge     2f              | have to shift right (denormalize)
-1:
-#ifndef __mcoldfire__
-       addw    IMM (1),d4      | adjust the exponent
-       lsrl    IMM (1),d0      | shift right once 
-       roxrl   IMM (1),d1      |
-       roxrl   IMM (1),d2      |
-       roxrl   IMM (1),d3      |
-       roxrl   IMM (1),d6      | 
-       roxrl   IMM (1),d7      |
-       cmpw    IMM (1),d4      | is the exponent 1 already?
-#else
-       addl    IMM (1),d4      | adjust the exponent
-       lsrl    IMM (1),d7
-       btst    IMM (0),d6
-       beq     13f
-       bset    IMM (31),d7
-13:    lsrl    IMM (1),d6
-       btst    IMM (0),d3
-       beq     14f
-       bset    IMM (31),d6
-14:    lsrl    IMM (1),d3
-       btst    IMM (0),d2
-       beq     10f
-       bset    IMM (31),d3
-10:    lsrl    IMM (1),d2
-       btst    IMM (0),d1
-       beq     11f
-       bset    IMM (31),d2
-11:    lsrl    IMM (1),d1
-       btst    IMM (0),d0
-       beq     12f
-       bset    IMM (31),d1
-12:    lsrl    IMM (1),d0
-       cmpl    IMM (1),d4      | is the exponent 1 already?
-#endif
-       beq     2f              | if not loop back
-       bra     1b              |
-       bra     Ld$underflow    | safety check, shouldn't execute '
-2:     orl     d6,d2           | this is a trick so we don't lose  '
-       orl     d7,d3           | the bits which were flushed right
-       movel   a0,d7           | get back sign bit into d7
-| Now call the rounding routine (which takes care of denormalized numbers):
-       lea     pc@(Lround$0),a0 | to return from rounding routine
-       PICLEA  SYM (_fpCCR),a1 | check the rounding mode
-#ifdef __mcoldfire__
-       clrl    d6
-#endif
-       movew   a1@(6),d6       | rounding mode in d6
-       beq     Lround$to$nearest
-#ifndef __mcoldfire__
-       cmpw    IMM (ROUND_TO_PLUS),d6
-#else
-       cmpl    IMM (ROUND_TO_PLUS),d6
-#endif
-       bhi     Lround$to$minus
-       blt     Lround$to$zero
-       bra     Lround$to$plus
-Lround$0:
-| Here we have a correctly rounded result (either normalized or denormalized).
-
-| Here we should have either a normalized number or a denormalized one, and
-| the exponent is necessarily larger or equal to 1 (so we don't have to  '
-| check again for underflow!). We have to check for overflow or for a 
-| denormalized number (which also signals underflow).
-| Check for overflow (i.e., exponent >= 0x7ff).
-#ifndef __mcoldfire__
-       cmpw    IMM (0x07ff),d4
-#else
-       cmpl    IMM (0x07ff),d4
-#endif
-       bge     Ld$overflow
-| Now check for a denormalized number (exponent==0):
-       movew   d4,d4
-       beq     Ld$den
-1:
-| Put back the exponents and sign and return.
-#ifndef __mcoldfire__
-       lslw    IMM (4),d4      | exponent back to fourth byte
-#else
-       lsll    IMM (4),d4      | exponent back to fourth byte
-#endif
-       bclr    IMM (DBL_MANT_DIG-32-1),d0
-       swap    d0              | and put back exponent
-#ifndef __mcoldfire__
-       orw     d4,d0           | 
-#else
-       orl     d4,d0           | 
-#endif
-       swap    d0              |
-       orl     d7,d0           | and sign also
-
-       PICLEA  SYM (_fpCCR),a0
-       movew   IMM (0),a0@
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7
-#else
-       moveml  sp@,d2-d7
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6
-       rts
-
-|=============================================================================
-|                              __negdf2
-|=============================================================================
-
-| double __negdf2(double, double);
-       FUNC(__negdf2)
-SYM (__negdf2):
-#ifndef __mcoldfire__
-       link    a6,IMM (0)
-       moveml  d2-d7,sp@-
-#else
-       link    a6,IMM (-24)
-       moveml  d2-d7,sp@
-#endif
-       moveq   IMM (NEGATE),d5
-       movel   a6@(8),d0       | get number to negate in d0-d1
-       movel   a6@(12),d1      |
-       bchg    IMM (31),d0     | negate
-       movel   d0,d2           | make a positive copy (for the tests)
-       bclr    IMM (31),d2     |
-       movel   d2,d4           | check for zero
-       orl     d1,d4           |
-       beq     2f              | if zero (either sign) return +zero
-       cmpl    IMM (0x7ff00000),d2 | compare to +INFINITY
-       blt     1f              | if finite, return
-       bhi     Ld$inop         | if larger (fraction not zero) is NaN
-       tstl    d1              | if d2 == 0x7ff00000 check d1
-       bne     Ld$inop         |
-       movel   d0,d7           | else get sign and return INFINITY
-       andl    IMM (0x80000000),d7
-       bra     Ld$infty                
-1:     PICLEA  SYM (_fpCCR),a0
-       movew   IMM (0),a0@
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7
-#else
-       moveml  sp@,d2-d7
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6
-       rts
-2:     bclr    IMM (31),d0
-       bra     1b
-
-|=============================================================================
-|                              __cmpdf2
-|=============================================================================
-
-GREATER =  1
-LESS    = -1
-EQUAL   =  0
-
-| int __cmpdf2_internal(double, double, int);
-SYM (__cmpdf2_internal):
-#ifndef __mcoldfire__
-       link    a6,IMM (0)
-       moveml  d2-d7,sp@-      | save registers
-#else
-       link    a6,IMM (-24)
-       moveml  d2-d7,sp@
-#endif
-       moveq   IMM (COMPARE),d5
-       movel   a6@(8),d0       | get first operand
-       movel   a6@(12),d1      |
-       movel   a6@(16),d2      | get second operand
-       movel   a6@(20),d3      |
-| First check if a and/or b are (+/-) zero and in that case clear
-| the sign bit.
-       movel   d0,d6           | copy signs into d6 (a) and d7(b)
-       bclr    IMM (31),d0     | and clear signs in d0 and d2
-       movel   d2,d7           |
-       bclr    IMM (31),d2     |
-       cmpl    IMM (0x7ff00000),d0 | check for a == NaN
-       bhi     Lcmpd$inop              | if d0 > 0x7ff00000, a is NaN
-       beq     Lcmpdf$a$nf     | if equal can be INFINITY, so check d1
-       movel   d0,d4           | copy into d4 to test for zero
-       orl     d1,d4           |
-       beq     Lcmpdf$a$0      |
-Lcmpdf$0:
-       cmpl    IMM (0x7ff00000),d2 | check for b == NaN
-       bhi     Lcmpd$inop              | if d2 > 0x7ff00000, b is NaN
-       beq     Lcmpdf$b$nf     | if equal can be INFINITY, so check d3
-       movel   d2,d4           |
-       orl     d3,d4           |
-       beq     Lcmpdf$b$0      |
-Lcmpdf$1:
-| Check the signs
-       eorl    d6,d7
-       bpl     1f
-| If the signs are not equal check if a >= 0
-       tstl    d6
-       bpl     Lcmpdf$a$gt$b   | if (a >= 0 && b < 0) => a > b
-       bmi     Lcmpdf$b$gt$a   | if (a < 0 && b >= 0) => a < b
-1:
-| If the signs are equal check for < 0
-       tstl    d6
-       bpl     1f
-| If both are negative exchange them
-#ifndef __mcoldfire__
-       exg     d0,d2
-       exg     d1,d3
-#else
-       movel   d0,d7
-       movel   d2,d0
-       movel   d7,d2
-       movel   d1,d7
-       movel   d3,d1
-       movel   d7,d3
-#endif
-1:
-| Now that they are positive we just compare them as longs (does this also
-| work for denormalized numbers?).
-       cmpl    d0,d2
-       bhi     Lcmpdf$b$gt$a   | |b| > |a|
-       bne     Lcmpdf$a$gt$b   | |b| < |a|
-| If we got here d0 == d2, so we compare d1 and d3.
-       cmpl    d1,d3
-       bhi     Lcmpdf$b$gt$a   | |b| > |a|
-       bne     Lcmpdf$a$gt$b   | |b| < |a|
-| If we got here a == b.
-       movel   IMM (EQUAL),d0
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7      | put back the registers
-#else
-       moveml  sp@,d2-d7
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6
-       rts
-Lcmpdf$a$gt$b:
-       movel   IMM (GREATER),d0
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7      | put back the registers
-#else
-       moveml  sp@,d2-d7
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6
-       rts
-Lcmpdf$b$gt$a:
-       movel   IMM (LESS),d0
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7      | put back the registers
-#else
-       moveml  sp@,d2-d7
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6
-       rts
-
-Lcmpdf$a$0:    
-       bclr    IMM (31),d6
-       bra     Lcmpdf$0
-Lcmpdf$b$0:
-       bclr    IMM (31),d7
-       bra     Lcmpdf$1
-
-Lcmpdf$a$nf:
-       tstl    d1
-       bne     Ld$inop
-       bra     Lcmpdf$0
-
-Lcmpdf$b$nf:
-       tstl    d3
-       bne     Ld$inop
-       bra     Lcmpdf$1
-
-Lcmpd$inop:
-       movl    a6@(24),d0
-       moveq   IMM (INEXACT_RESULT+INVALID_OPERATION),d7
-       moveq   IMM (DOUBLE_FLOAT),d6
-       PICJUMP $_exception_handler
-
-| int __cmpdf2(double, double);
-       FUNC(__cmpdf2)
-SYM (__cmpdf2):
-       link    a6,IMM (0)
-       pea     1
-       movl    a6@(20),sp@-
-       movl    a6@(16),sp@-
-       movl    a6@(12),sp@-
-       movl    a6@(8),sp@-
-       PICCALL SYM (__cmpdf2_internal)
-       unlk    a6
-       rts
-
-|=============================================================================
-|                           rounding routines
-|=============================================================================
-
-| The rounding routines expect the number to be normalized in registers
-| d0-d1-d2-d3, with the exponent in register d4. They assume that the 
-| exponent is larger or equal to 1. They return a properly normalized number
-| if possible, and a denormalized number otherwise. The exponent is returned
-| in d4.
-
-Lround$to$nearest:
-| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"):
-| Here we assume that the exponent is not too small (this should be checked
-| before entering the rounding routine), but the number could be denormalized.
-
-| Check for denormalized numbers:
-1:     btst    IMM (DBL_MANT_DIG-32),d0
-       bne     2f              | if set the number is normalized
-| Normalize shifting left until bit #DBL_MANT_DIG-32 is set or the exponent 
-| is one (remember that a denormalized number corresponds to an 
-| exponent of -D_BIAS+1).
-#ifndef __mcoldfire__
-       cmpw    IMM (1),d4      | remember that the exponent is at least one
-#else
-       cmpl    IMM (1),d4      | remember that the exponent is at least one
-#endif
-       beq     2f              | an exponent of one means denormalized
-       addl    d3,d3           | else shift and adjust the exponent
-       addxl   d2,d2           |
-       addxl   d1,d1           |
-       addxl   d0,d0           |
-#ifndef __mcoldfire__
-       dbra    d4,1b           |
-#else
-       subql   IMM (1), d4
-       bpl     1b
-#endif
-2:
-| Now round: we do it as follows: after the shifting we can write the
-| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2.
-| If delta < 1, do nothing. If delta > 1, add 1 to f. 
-| If delta == 1, we make sure the rounded number will be even (odd?) 
-| (after shifting).
-       btst    IMM (0),d1      | is delta < 1?
-       beq     2f              | if so, do not do anything
-       orl     d2,d3           | is delta == 1?
-       bne     1f              | if so round to even
-       movel   d1,d3           | 
-       andl    IMM (2),d3      | bit 1 is the last significant bit
-       movel   IMM (0),d2      |
-       addl    d3,d1           |
-       addxl   d2,d0           |
-       bra     2f              | 
-1:     movel   IMM (1),d3      | else add 1 
-       movel   IMM (0),d2      |
-       addl    d3,d1           |
-       addxl   d2,d0
-| Shift right once (because we used bit #DBL_MANT_DIG-32!).
-2:
-#ifndef __mcoldfire__
-       lsrl    IMM (1),d0
-       roxrl   IMM (1),d1              
-#else
-       lsrl    IMM (1),d1
-       btst    IMM (0),d0
-       beq     10f
-       bset    IMM (31),d1
-10:    lsrl    IMM (1),d0
-#endif
-
-| Now check again bit #DBL_MANT_DIG-32 (rounding could have produced a
-| 'fraction overflow' ...).
-       btst    IMM (DBL_MANT_DIG-32),d0        
-       beq     1f
-#ifndef __mcoldfire__
-       lsrl    IMM (1),d0
-       roxrl   IMM (1),d1
-       addw    IMM (1),d4
-#else
-       lsrl    IMM (1),d1
-       btst    IMM (0),d0
-       beq     10f
-       bset    IMM (31),d1
-10:    lsrl    IMM (1),d0
-       addl    IMM (1),d4
-#endif
-1:
-| If bit #DBL_MANT_DIG-32-1 is clear we have a denormalized number, so we 
-| have to put the exponent to zero and return a denormalized number.
-       btst    IMM (DBL_MANT_DIG-32-1),d0
-       beq     1f
-       jmp     a0@
-1:     movel   IMM (0),d4
-       jmp     a0@
-
-Lround$to$zero:
-Lround$to$plus:
-Lround$to$minus:
-       jmp     a0@
-#endif /* L_double */
-
-#ifdef  L_float
-
-       .globl  SYM (_fpCCR)
-       .globl  $_exception_handler
-
-QUIET_NaN    = 0xffffffff
-SIGNL_NaN    = 0x7f800001
-INFINITY     = 0x7f800000
-
-F_MAX_EXP      = 0xff
-F_BIAS         = 126
-FLT_MAX_EXP    = F_MAX_EXP - F_BIAS
-FLT_MIN_EXP    = 1 - F_BIAS
-FLT_MANT_DIG   = 24
-
-INEXACT_RESULT                 = 0x0001
-UNDERFLOW              = 0x0002
-OVERFLOW               = 0x0004
-DIVIDE_BY_ZERO                 = 0x0008
-INVALID_OPERATION      = 0x0010
-
-SINGLE_FLOAT = 1
-
-NOOP         = 0
-ADD          = 1
-MULTIPLY     = 2
-DIVIDE       = 3
-NEGATE       = 4
-COMPARE      = 5
-EXTENDSFDF   = 6
-TRUNCDFSF    = 7
-
-UNKNOWN           = -1
-ROUND_TO_NEAREST  = 0 | round result to nearest representable value
-ROUND_TO_ZERO     = 1 | round result towards zero
-ROUND_TO_PLUS     = 2 | round result towards plus infinity
-ROUND_TO_MINUS    = 3 | round result towards minus infinity
-
-| Entry points:
-
-       .globl SYM (__addsf3)
-       .globl SYM (__subsf3)
-       .globl SYM (__mulsf3)
-       .globl SYM (__divsf3)
-       .globl SYM (__negsf2)
-       .globl SYM (__cmpsf2)
-       .globl SYM (__cmpsf2_internal)
-       .hidden SYM (__cmpsf2_internal)
-
-| These are common routines to return and signal exceptions.   
-
-       .text
-       .even
-
-Lf$den:
-| Return and signal a denormalized number
-       orl     d7,d0
-       moveq   IMM (INEXACT_RESULT+UNDERFLOW),d7
-       moveq   IMM (SINGLE_FLOAT),d6
-       PICJUMP $_exception_handler
-
-Lf$infty:
-Lf$overflow:
-| Return a properly signed INFINITY and set the exception flags 
-       movel   IMM (INFINITY),d0
-       orl     d7,d0
-       moveq   IMM (INEXACT_RESULT+OVERFLOW),d7
-       moveq   IMM (SINGLE_FLOAT),d6
-       PICJUMP $_exception_handler
-
-Lf$underflow:
-| Return 0 and set the exception flags 
-       moveq   IMM (0),d0
-       moveq   IMM (INEXACT_RESULT+UNDERFLOW),d7
-       moveq   IMM (SINGLE_FLOAT),d6
-       PICJUMP $_exception_handler
-
-Lf$inop:
-| Return a quiet NaN and set the exception flags
-       movel   IMM (QUIET_NaN),d0
-       moveq   IMM (INEXACT_RESULT+INVALID_OPERATION),d7
-       moveq   IMM (SINGLE_FLOAT),d6
-       PICJUMP $_exception_handler
-
-Lf$div$0:
-| Return a properly signed INFINITY and set the exception flags
-       movel   IMM (INFINITY),d0
-       orl     d7,d0
-       moveq   IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7
-       moveq   IMM (SINGLE_FLOAT),d6
-       PICJUMP $_exception_handler
-
-|=============================================================================
-|=============================================================================
-|                         single precision routines
-|=============================================================================
-|=============================================================================
-
-| A single precision floating point number (float) has the format:
-|
-| struct _float {
-|  unsigned int sign      : 1;  /* sign bit */ 
-|  unsigned int exponent  : 8;  /* exponent, shifted by 126 */
-|  unsigned int fraction  : 23; /* fraction */
-| } float;
-| 
-| Thus sizeof(float) = 4 (32 bits). 
-|
-| All the routines are callable from C programs, and return the result 
-| in the single register d0. They also preserve all registers except 
-| d0-d1 and a0-a1.
-
-|=============================================================================
-|                              __subsf3
-|=============================================================================
-
-| float __subsf3(float, float);
-       FUNC(__subsf3)
-SYM (__subsf3):
-       bchg    IMM (31),sp@(8) | change sign of second operand
-                               | and fall through
-|=============================================================================
-|                              __addsf3
-|=============================================================================
-
-| float __addsf3(float, float);
-       FUNC(__addsf3)
-SYM (__addsf3):
-#ifndef __mcoldfire__
-       link    a6,IMM (0)      | everything will be done in registers
-       moveml  d2-d7,sp@-      | save all data registers but d0-d1
-#else
-       link    a6,IMM (-24)
-       moveml  d2-d7,sp@
-#endif
-       movel   a6@(8),d0       | get first operand
-       movel   a6@(12),d1      | get second operand
-       movel   d0,a0           | get d0's sign bit '
-       addl    d0,d0           | check and clear sign bit of a
-       beq     Laddsf$b        | if zero return second operand
-       movel   d1,a1           | save b's sign bit '
-       addl    d1,d1           | get rid of sign bit
-       beq     Laddsf$a        | if zero return first operand
-
-| Get the exponents and check for denormalized and/or infinity.
-
-       movel   IMM (0x00ffffff),d4     | mask to get fraction
-       movel   IMM (0x01000000),d5     | mask to put hidden bit back
-
-       movel   d0,d6           | save a to get exponent
-       andl    d4,d0           | get fraction in d0
-       notl    d4              | make d4 into a mask for the exponent
-       andl    d4,d6           | get exponent in d6
-       beq     Laddsf$a$den    | branch if a is denormalized
-       cmpl    d4,d6           | check for INFINITY or NaN
-       beq     Laddsf$nf
-       swap    d6              | put exponent into first word
-       orl     d5,d0           | and put hidden bit back
-Laddsf$1:
-| Now we have a's exponent in d6 (second byte) and the mantissa in d0. '
-       movel   d1,d7           | get exponent in d7
-       andl    d4,d7           | 
-       beq     Laddsf$b$den    | branch if b is denormalized
-       cmpl    d4,d7           | check for INFINITY or NaN
-       beq     Laddsf$nf
-       swap    d7              | put exponent into first word
-       notl    d4              | make d4 into a mask for the fraction
-       andl    d4,d1           | get fraction in d1
-       orl     d5,d1           | and put hidden bit back
-Laddsf$2:
-| Now we have b's exponent in d7 (second byte) and the mantissa in d1. '
-
-| Note that the hidden bit corresponds to bit #FLT_MANT_DIG-1, and we 
-| shifted right once, so bit #FLT_MANT_DIG is set (so we have one extra
-| bit).
-
-       movel   d1,d2           | move b to d2, since we want to use
-                               | two registers to do the sum
-       movel   IMM (0),d1      | and clear the new ones
-       movel   d1,d3           |
-
-| Here we shift the numbers in registers d0 and d1 so the exponents are the
-| same, and put the largest exponent in d6. Note that we are using two
-| registers for each number (see the discussion by D. Knuth in "Seminumerical 
-| Algorithms").
-#ifndef __mcoldfire__
-       cmpw    d6,d7           | compare exponents
-#else
-       cmpl    d6,d7           | compare exponents
-#endif
-       beq     Laddsf$3        | if equal don't shift '
-       bhi     5f              | branch if second exponent largest
-1:
-       subl    d6,d7           | keep the largest exponent
-       negl    d7
-#ifndef __mcoldfire__
-       lsrw    IMM (8),d7      | put difference in lower byte
-#else
-       lsrl    IMM (8),d7      | put difference in lower byte
-#endif
-| if difference is too large we don't shift (actually, we can just exit) '
-#ifndef __mcoldfire__
-       cmpw    IMM (FLT_MANT_DIG+2),d7         
-#else
-       cmpl    IMM (FLT_MANT_DIG+2),d7         
-#endif
-       bge     Laddsf$b$small
-#ifndef __mcoldfire__
-       cmpw    IMM (16),d7     | if difference >= 16 swap
-#else
-       cmpl    IMM (16),d7     | if difference >= 16 swap
-#endif
-       bge     4f
-2:
-#ifndef __mcoldfire__
-       subw    IMM (1),d7
-#else
-       subql   IMM (1), d7
-#endif
-3:
-#ifndef __mcoldfire__
-       lsrl    IMM (1),d2      | shift right second operand
-       roxrl   IMM (1),d3
-       dbra    d7,3b
-#else
-       lsrl    IMM (1),d3
-       btst    IMM (0),d2
-       beq     10f
-       bset    IMM (31),d3
-10:    lsrl    IMM (1),d2
-       subql   IMM (1), d7
-       bpl     3b
-#endif
-       bra     Laddsf$3
-4:
-       movew   d2,d3
-       swap    d3
-       movew   d3,d2
-       swap    d2
-#ifndef __mcoldfire__
-       subw    IMM (16),d7
-#else
-       subl    IMM (16),d7
-#endif
-       bne     2b              | if still more bits, go back to normal case
-       bra     Laddsf$3
-5:
-#ifndef __mcoldfire__
-       exg     d6,d7           | exchange the exponents
-#else
-       eorl    d6,d7
-       eorl    d7,d6
-       eorl    d6,d7
-#endif
-       subl    d6,d7           | keep the largest exponent
-       negl    d7              |
-#ifndef __mcoldfire__
-       lsrw    IMM (8),d7      | put difference in lower byte
-#else
-       lsrl    IMM (8),d7      | put difference in lower byte
-#endif
-| if difference is too large we don't shift (and exit!) '
-#ifndef __mcoldfire__
-       cmpw    IMM (FLT_MANT_DIG+2),d7         
-#else
-       cmpl    IMM (FLT_MANT_DIG+2),d7         
-#endif
-       bge     Laddsf$a$small
-#ifndef __mcoldfire__
-       cmpw    IMM (16),d7     | if difference >= 16 swap
-#else
-       cmpl    IMM (16),d7     | if difference >= 16 swap
-#endif
-       bge     8f
-6:
-#ifndef __mcoldfire__
-       subw    IMM (1),d7
-#else
-       subl    IMM (1),d7
-#endif
-7:
-#ifndef __mcoldfire__
-       lsrl    IMM (1),d0      | shift right first operand
-       roxrl   IMM (1),d1
-       dbra    d7,7b
-#else
-       lsrl    IMM (1),d1
-       btst    IMM (0),d0
-       beq     10f
-       bset    IMM (31),d1
-10:    lsrl    IMM (1),d0
-       subql   IMM (1),d7
-       bpl     7b
-#endif
-       bra     Laddsf$3
-8:
-       movew   d0,d1
-       swap    d1
-       movew   d1,d0
-       swap    d0
-#ifndef __mcoldfire__
-       subw    IMM (16),d7
-#else
-       subl    IMM (16),d7
-#endif
-       bne     6b              | if still more bits, go back to normal case
-                               | otherwise we fall through
-
-| Now we have a in d0-d1, b in d2-d3, and the largest exponent in d6 (the
-| signs are stored in a0 and a1).
-
-Laddsf$3:
-| Here we have to decide whether to add or subtract the numbers
-#ifndef __mcoldfire__
-       exg     d6,a0           | get signs back
-       exg     d7,a1           | and save the exponents
-#else
-       movel   d6,d4
-       movel   a0,d6
-       movel   d4,a0
-       movel   d7,d4
-       movel   a1,d7
-       movel   d4,a1
-#endif
-       eorl    d6,d7           | combine sign bits
-       bmi     Lsubsf$0        | if negative a and b have opposite 
-                               | sign so we actually subtract the
-                               | numbers
-
-| Here we have both positive or both negative
-#ifndef __mcoldfire__
-       exg     d6,a0           | now we have the exponent in d6
-#else
-       movel   d6,d4
-       movel   a0,d6
-       movel   d4,a0
-#endif
-       movel   a0,d7           | and sign in d7
-       andl    IMM (0x80000000),d7
-| Here we do the addition.
-       addl    d3,d1
-       addxl   d2,d0
-| Note: now we have d2, d3, d4 and d5 to play with! 
-
-| Put the exponent, in the first byte, in d2, to use the "standard" rounding
-| routines:
-       movel   d6,d2
-#ifndef __mcoldfire__
-       lsrw    IMM (8),d2
-#else
-       lsrl    IMM (8),d2
-#endif
-
-| Before rounding normalize so bit #FLT_MANT_DIG is set (we will consider
-| the case of denormalized numbers in the rounding routine itself).
-| As in the addition (not in the subtraction!) we could have set 
-| one more bit we check this:
-       btst    IMM (FLT_MANT_DIG+1),d0 
-       beq     1f
-#ifndef __mcoldfire__
-       lsrl    IMM (1),d0
-       roxrl   IMM (1),d1
-#else
-       lsrl    IMM (1),d1
-       btst    IMM (0),d0
-       beq     10f
-       bset    IMM (31),d1
-10:    lsrl    IMM (1),d0
-#endif
-       addl    IMM (1),d2
-1:
-       lea     pc@(Laddsf$4),a0 | to return from rounding routine
-       PICLEA  SYM (_fpCCR),a1 | check the rounding mode
-#ifdef __mcoldfire__
-       clrl    d6
-#endif
-       movew   a1@(6),d6       | rounding mode in d6
-       beq     Lround$to$nearest
-#ifndef __mcoldfire__
-       cmpw    IMM (ROUND_TO_PLUS),d6
-#else
-       cmpl    IMM (ROUND_TO_PLUS),d6
-#endif
-       bhi     Lround$to$minus
-       blt     Lround$to$zero
-       bra     Lround$to$plus
-Laddsf$4:
-| Put back the exponent, but check for overflow.
-#ifndef __mcoldfire__
-       cmpw    IMM (0xff),d2
-#else
-       cmpl    IMM (0xff),d2
-#endif
-       bhi     1f
-       bclr    IMM (FLT_MANT_DIG-1),d0
-#ifndef __mcoldfire__
-       lslw    IMM (7),d2
-#else
-       lsll    IMM (7),d2
-#endif
-       swap    d2
-       orl     d2,d0
-       bra     Laddsf$ret
-1:
-       moveq   IMM (ADD),d5
-       bra     Lf$overflow
-
-Lsubsf$0:
-| We are here if a > 0 and b < 0 (sign bits cleared).
-| Here we do the subtraction.
-       movel   d6,d7           | put sign in d7
-       andl    IMM (0x80000000),d7
-
-       subl    d3,d1           | result in d0-d1
-       subxl   d2,d0           |
-       beq     Laddsf$ret      | if zero just exit
-       bpl     1f              | if positive skip the following
-       bchg    IMM (31),d7     | change sign bit in d7
-       negl    d1
-       negxl   d0
-1:
-#ifndef __mcoldfire__
-       exg     d2,a0           | now we have the exponent in d2
-       lsrw    IMM (8),d2      | put it in the first byte
-#else
-       movel   d2,d4
-       movel   a0,d2
-       movel   d4,a0
-       lsrl    IMM (8),d2      | put it in the first byte
-#endif
-
-| Now d0-d1 is positive and the sign bit is in d7.
-
-| Note that we do not have to normalize, since in the subtraction bit
-| #FLT_MANT_DIG+1 is never set, and denormalized numbers are handled by
-| the rounding routines themselves.
-       lea     pc@(Lsubsf$1),a0 | to return from rounding routine
-       PICLEA  SYM (_fpCCR),a1 | check the rounding mode
-#ifdef __mcoldfire__
-       clrl    d6
-#endif
-       movew   a1@(6),d6       | rounding mode in d6
-       beq     Lround$to$nearest
-#ifndef __mcoldfire__
-       cmpw    IMM (ROUND_TO_PLUS),d6
-#else
-       cmpl    IMM (ROUND_TO_PLUS),d6
-#endif
-       bhi     Lround$to$minus
-       blt     Lround$to$zero
-       bra     Lround$to$plus
-Lsubsf$1:
-| Put back the exponent (we can't have overflow!). '
-       bclr    IMM (FLT_MANT_DIG-1),d0
-#ifndef __mcoldfire__
-       lslw    IMM (7),d2
-#else
-       lsll    IMM (7),d2
-#endif
-       swap    d2
-       orl     d2,d0
-       bra     Laddsf$ret
-
-| If one of the numbers was too small (difference of exponents >= 
-| FLT_MANT_DIG+2) we return the other (and now we don't have to '
-| check for finiteness or zero).
-Laddsf$a$small:
-       movel   a6@(12),d0
-       PICLEA  SYM (_fpCCR),a0
-       movew   IMM (0),a0@
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7      | restore data registers
-#else
-       moveml  sp@,d2-d7
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6              | and return
-       rts
-
-Laddsf$b$small:
-       movel   a6@(8),d0
-       PICLEA  SYM (_fpCCR),a0
-       movew   IMM (0),a0@
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7      | restore data registers
-#else
-       moveml  sp@,d2-d7
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6              | and return
-       rts
-
-| If the numbers are denormalized remember to put exponent equal to 1.
-
-Laddsf$a$den:
-       movel   d5,d6           | d5 contains 0x01000000
-       swap    d6
-       bra     Laddsf$1
-
-Laddsf$b$den:
-       movel   d5,d7
-       swap    d7
-       notl    d4              | make d4 into a mask for the fraction
-                               | (this was not executed after the jump)
-       bra     Laddsf$2
-
-| The rest is mainly code for the different results which can be 
-| returned (checking always for +/-INFINITY and NaN).
-
-Laddsf$b:
-| Return b (if a is zero).
-       movel   a6@(12),d0
-       cmpl    IMM (0x80000000),d0     | Check if b is -0
-       bne     1f
-       movel   a0,d7
-       andl    IMM (0x80000000),d7     | Use the sign of a
-       clrl    d0
-       bra     Laddsf$ret
-Laddsf$a:
-| Return a (if b is zero).
-       movel   a6@(8),d0
-1:
-       moveq   IMM (ADD),d5
-| We have to check for NaN and +/-infty.
-       movel   d0,d7
-       andl    IMM (0x80000000),d7     | put sign in d7
-       bclr    IMM (31),d0             | clear sign
-       cmpl    IMM (INFINITY),d0       | check for infty or NaN
-       bge     2f
-       movel   d0,d0           | check for zero (we do this because we don't '
-       bne     Laddsf$ret      | want to return -0 by mistake
-       bclr    IMM (31),d7     | if zero be sure to clear sign
-       bra     Laddsf$ret      | if everything OK just return
-2:
-| The value to be returned is either +/-infty or NaN
-       andl    IMM (0x007fffff),d0     | check for NaN
-       bne     Lf$inop                 | if mantissa not zero is NaN
-       bra     Lf$infty
-
-Laddsf$ret:
-| Normal exit (a and b nonzero, result is not NaN nor +/-infty).
-| We have to clear the exception flags (just the exception type).
-       PICLEA  SYM (_fpCCR),a0
-       movew   IMM (0),a0@
-       orl     d7,d0           | put sign bit
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7      | restore data registers
-#else
-       moveml  sp@,d2-d7
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6              | and return
-       rts
-
-Laddsf$ret$den:
-| Return a denormalized number (for addition we don't signal underflow) '
-       lsrl    IMM (1),d0      | remember to shift right back once
-       bra     Laddsf$ret      | and return
-
-| Note: when adding two floats of the same sign if either one is 
-| NaN we return NaN without regard to whether the other is finite or 
-| not. When subtracting them (i.e., when adding two numbers of 
-| opposite signs) things are more complicated: if both are INFINITY 
-| we return NaN, if only one is INFINITY and the other is NaN we return
-| NaN, but if it is finite we return INFINITY with the corresponding sign.
-
-Laddsf$nf:
-       moveq   IMM (ADD),d5
-| This could be faster but it is not worth the effort, since it is not
-| executed very often. We sacrifice speed for clarity here.
-       movel   a6@(8),d0       | get the numbers back (remember that we
-       movel   a6@(12),d1      | did some processing already)
-       movel   IMM (INFINITY),d4 | useful constant (INFINITY)
-       movel   d0,d2           | save sign bits
-       movel   d1,d3
-       bclr    IMM (31),d0     | clear sign bits
-       bclr    IMM (31),d1
-| We know that one of them is either NaN of +/-INFINITY
-| Check for NaN (if either one is NaN return NaN)
-       cmpl    d4,d0           | check first a (d0)
-       bhi     Lf$inop         
-       cmpl    d4,d1           | check now b (d1)
-       bhi     Lf$inop         
-| Now comes the check for +/-INFINITY. We know that both are (maybe not
-| finite) numbers, but we have to check if both are infinite whether we
-| are adding or subtracting them.
-       eorl    d3,d2           | to check sign bits
-       bmi     1f
-       movel   d0,d7
-       andl    IMM (0x80000000),d7     | get (common) sign bit
-       bra     Lf$infty
-1:
-| We know one (or both) are infinite, so we test for equality between the
-| two numbers (if they are equal they have to be infinite both, so we
-| return NaN).
-       cmpl    d1,d0           | are both infinite?
-       beq     Lf$inop         | if so return NaN
-
-       movel   d0,d7
-       andl    IMM (0x80000000),d7 | get a's sign bit '
-       cmpl    d4,d0           | test now for infinity
-       beq     Lf$infty        | if a is INFINITY return with this sign
-       bchg    IMM (31),d7     | else we know b is INFINITY and has
-       bra     Lf$infty        | the opposite sign
-
-|=============================================================================
-|                             __mulsf3
-|=============================================================================
-
-| float __mulsf3(float, float);
-       FUNC(__mulsf3)
-SYM (__mulsf3):
-#ifndef __mcoldfire__
-       link    a6,IMM (0)
-       moveml  d2-d7,sp@-
-#else
-       link    a6,IMM (-24)
-       moveml  d2-d7,sp@
-#endif
-       movel   a6@(8),d0       | get a into d0
-       movel   a6@(12),d1      | and b into d1
-       movel   d0,d7           | d7 will hold the sign of the product
-       eorl    d1,d7           |
-       andl    IMM (0x80000000),d7
-       movel   IMM (INFINITY),d6       | useful constant (+INFINITY)
-       movel   d6,d5                   | another (mask for fraction)
-       notl    d5                      |
-       movel   IMM (0x00800000),d4     | this is to put hidden bit back
-       bclr    IMM (31),d0             | get rid of a's sign bit '
-       movel   d0,d2                   |
-       beq     Lmulsf$a$0              | branch if a is zero
-       bclr    IMM (31),d1             | get rid of b's sign bit '
-       movel   d1,d3           |
-       beq     Lmulsf$b$0      | branch if b is zero
-       cmpl    d6,d0           | is a big?
-       bhi     Lmulsf$inop     | if a is NaN return NaN
-       beq     Lmulsf$inf      | if a is INFINITY we have to check b
-       cmpl    d6,d1           | now compare b with INFINITY
-       bhi     Lmulsf$inop     | is b NaN?
-       beq     Lmulsf$overflow | is b INFINITY?
-| Here we have both numbers finite and nonzero (and with no sign bit).
-| Now we get the exponents into d2 and d3.
-       andl    d6,d2           | and isolate exponent in d2
-       beq     Lmulsf$a$den    | if exponent is zero we have a denormalized
-       andl    d5,d0           | and isolate fraction
-       orl     d4,d0           | and put hidden bit back
-       swap    d2              | I like exponents in the first byte
-#ifndef __mcoldfire__
-       lsrw    IMM (7),d2      | 
-#else
-       lsrl    IMM (7),d2      | 
-#endif
-Lmulsf$1:                      | number
-       andl    d6,d3           |
-       beq     Lmulsf$b$den    |
-       andl    d5,d1           |
-       orl     d4,d1           |
-       swap    d3              |
-#ifndef __mcoldfire__
-       lsrw    IMM (7),d3      |
-#else
-       lsrl    IMM (7),d3      |
-#endif
-Lmulsf$2:                      |
-#ifndef __mcoldfire__
-       addw    d3,d2           | add exponents
-       subw    IMM (F_BIAS+1),d2 | and subtract bias (plus one)
-#else
-       addl    d3,d2           | add exponents
-       subl    IMM (F_BIAS+1),d2 | and subtract bias (plus one)
-#endif
-
-| We are now ready to do the multiplication. The situation is as follows:
-| both a and b have bit FLT_MANT_DIG-1 set (even if they were 
-| denormalized to start with!), which means that in the product 
-| bit 2*(FLT_MANT_DIG-1) (that is, bit 2*FLT_MANT_DIG-2-32 of the 
-| high long) is set. 
-
-| To do the multiplication let us move the number a little bit around ...
-       movel   d1,d6           | second operand in d6
-       movel   d0,d5           | first operand in d4-d5
-       movel   IMM (0),d4
-       movel   d4,d1           | the sums will go in d0-d1
-       movel   d4,d0
-
-| now bit FLT_MANT_DIG-1 becomes bit 31:
-       lsll    IMM (31-FLT_MANT_DIG+1),d6              
-
-| Start the loop (we loop #FLT_MANT_DIG times):
-       moveq   IMM (FLT_MANT_DIG-1),d3 
-1:     addl    d1,d1           | shift sum 
-       addxl   d0,d0
-       lsll    IMM (1),d6      | get bit bn
-       bcc     2f              | if not set skip sum
-       addl    d5,d1           | add a
-       addxl   d4,d0
-2:
-#ifndef __mcoldfire__
-       dbf     d3,1b           | loop back
-#else
-       subql   IMM (1),d3
-       bpl     1b
-#endif
-
-| Now we have the product in d0-d1, with bit (FLT_MANT_DIG - 1) + FLT_MANT_DIG
-| (mod 32) of d0 set. The first thing to do now is to normalize it so bit 
-| FLT_MANT_DIG is set (to do the rounding).
-#ifndef __mcoldfire__
-       rorl    IMM (6),d1
-       swap    d1
-       movew   d1,d3
-       andw    IMM (0x03ff),d3
-       andw    IMM (0xfd00),d1
-#else
-       movel   d1,d3
-       lsll    IMM (8),d1
-       addl    d1,d1
-       addl    d1,d1
-       moveq   IMM (22),d5
-       lsrl    d5,d3
-       orl     d3,d1
-       andl    IMM (0xfffffd00),d1
-#endif
-       lsll    IMM (8),d0
-       addl    d0,d0
-       addl    d0,d0
-#ifndef __mcoldfire__
-       orw     d3,d0
-#else
-       orl     d3,d0
-#endif
-
-       moveq   IMM (MULTIPLY),d5
-       
-       btst    IMM (FLT_MANT_DIG+1),d0
-       beq     Lround$exit
-#ifndef __mcoldfire__
-       lsrl    IMM (1),d0
-       roxrl   IMM (1),d1
-       addw    IMM (1),d2
-#else
-       lsrl    IMM (1),d1
-       btst    IMM (0),d0
-       beq     10f
-       bset    IMM (31),d1
-10:    lsrl    IMM (1),d0
-       addql   IMM (1),d2
-#endif
-       bra     Lround$exit
-
-Lmulsf$inop:
-       moveq   IMM (MULTIPLY),d5
-       bra     Lf$inop
-
-Lmulsf$overflow:
-       moveq   IMM (MULTIPLY),d5
-       bra     Lf$overflow
-
-Lmulsf$inf:
-       moveq   IMM (MULTIPLY),d5
-| If either is NaN return NaN; else both are (maybe infinite) numbers, so
-| return INFINITY with the correct sign (which is in d7).
-       cmpl    d6,d1           | is b NaN?
-       bhi     Lf$inop         | if so return NaN
-       bra     Lf$overflow     | else return +/-INFINITY
-
-| If either number is zero return zero, unless the other is +/-INFINITY, 
-| or NaN, in which case we return NaN.
-Lmulsf$b$0:
-| Here d1 (==b) is zero.
-       movel   a6@(8),d1       | get a again to check for non-finiteness
-       bra     1f
-Lmulsf$a$0:
-       movel   a6@(12),d1      | get b again to check for non-finiteness
-1:     bclr    IMM (31),d1     | clear sign bit 
-       cmpl    IMM (INFINITY),d1 | and check for a large exponent
-       bge     Lf$inop         | if b is +/-INFINITY or NaN return NaN
-       movel   d7,d0           | else return signed zero
-       PICLEA  SYM (_fpCCR),a0 |
-       movew   IMM (0),a0@     | 
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7      | 
-#else
-       moveml  sp@,d2-d7
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6              | 
-       rts                     | 
-
-| If a number is denormalized we put an exponent of 1 but do not put the 
-| hidden bit back into the fraction; instead we shift left until bit 23
-| (the hidden bit) is set, adjusting the exponent accordingly. We do this
-| to ensure that the product of the fractions is close to 1.
-Lmulsf$a$den:
-       movel   IMM (1),d2
-       andl    d5,d0
-1:     addl    d0,d0           | shift a left (until bit 23 is set)
-#ifndef __mcoldfire__
-       subw    IMM (1),d2      | and adjust exponent
-#else
-       subql   IMM (1),d2      | and adjust exponent
-#endif
-       btst    IMM (FLT_MANT_DIG-1),d0
-       bne     Lmulsf$1        |
-       bra     1b              | else loop back
-
-Lmulsf$b$den:
-       movel   IMM (1),d3
-       andl    d5,d1
-1:     addl    d1,d1           | shift b left until bit 23 is set
-#ifndef __mcoldfire__
-       subw    IMM (1),d3      | and adjust exponent
-#else
-       subql   IMM (1),d3      | and adjust exponent
-#endif
-       btst    IMM (FLT_MANT_DIG-1),d1
-       bne     Lmulsf$2        |
-       bra     1b              | else loop back
-
-|=============================================================================
-|                             __divsf3
-|=============================================================================
-
-| float __divsf3(float, float);
-       FUNC(__divsf3)
-SYM (__divsf3):
-#ifndef __mcoldfire__
-       link    a6,IMM (0)
-       moveml  d2-d7,sp@-
-#else
-       link    a6,IMM (-24)
-       moveml  d2-d7,sp@
-#endif
-       movel   a6@(8),d0               | get a into d0
-       movel   a6@(12),d1              | and b into d1
-       movel   d0,d7                   | d7 will hold the sign of the result
-       eorl    d1,d7                   |
-       andl    IMM (0x80000000),d7     | 
-       movel   IMM (INFINITY),d6       | useful constant (+INFINITY)
-       movel   d6,d5                   | another (mask for fraction)
-       notl    d5                      |
-       movel   IMM (0x00800000),d4     | this is to put hidden bit back
-       bclr    IMM (31),d0             | get rid of a's sign bit '
-       movel   d0,d2                   |
-       beq     Ldivsf$a$0              | branch if a is zero
-       bclr    IMM (31),d1             | get rid of b's sign bit '
-       movel   d1,d3                   |
-       beq     Ldivsf$b$0              | branch if b is zero
-       cmpl    d6,d0                   | is a big?
-       bhi     Ldivsf$inop             | if a is NaN return NaN
-       beq     Ldivsf$inf              | if a is INFINITY we have to check b
-       cmpl    d6,d1                   | now compare b with INFINITY 
-       bhi     Ldivsf$inop             | if b is NaN return NaN
-       beq     Ldivsf$underflow
-| Here we have both numbers finite and nonzero (and with no sign bit).
-| Now we get the exponents into d2 and d3 and normalize the numbers to
-| ensure that the ratio of the fractions is close to 1. We do this by
-| making sure that bit #FLT_MANT_DIG-1 (hidden bit) is set.
-       andl    d6,d2           | and isolate exponent in d2
-       beq     Ldivsf$a$den    | if exponent is zero we have a denormalized
-       andl    d5,d0           | and isolate fraction
-       orl     d4,d0           | and put hidden bit back
-       swap    d2              | I like exponents in the first byte
-#ifndef __mcoldfire__
-       lsrw    IMM (7),d2      | 
-#else
-       lsrl    IMM (7),d2      | 
-#endif
-Ldivsf$1:                      | 
-       andl    d6,d3           |
-       beq     Ldivsf$b$den    |
-       andl    d5,d1           |
-       orl     d4,d1           |
-       swap    d3              |
-#ifndef __mcoldfire__
-       lsrw    IMM (7),d3      |
-#else
-       lsrl    IMM (7),d3      |
-#endif
-Ldivsf$2:                      |
-#ifndef __mcoldfire__
-       subw    d3,d2           | subtract exponents
-       addw    IMM (F_BIAS),d2 | and add bias
-#else
-       subl    d3,d2           | subtract exponents
-       addl    IMM (F_BIAS),d2 | and add bias
-#endif
- 
-| We are now ready to do the division. We have prepared things in such a way
-| that the ratio of the fractions will be less than 2 but greater than 1/2.
-| At this point the registers in use are:
-| d0   holds a (first operand, bit FLT_MANT_DIG=0, bit FLT_MANT_DIG-1=1)
-| d1   holds b (second operand, bit FLT_MANT_DIG=1)
-| d2   holds the difference of the exponents, corrected by the bias
-| d7   holds the sign of the ratio
-| d4, d5, d6 hold some constants
-       movel   d7,a0           | d6-d7 will hold the ratio of the fractions
-       movel   IMM (0),d6      | 
-       movel   d6,d7
-
-       moveq   IMM (FLT_MANT_DIG+1),d3
-1:     cmpl    d0,d1           | is a < b?
-       bhi     2f              |
-       bset    d3,d6           | set a bit in d6
-       subl    d1,d0           | if a >= b  a <-- a-b
-       beq     3f              | if a is zero, exit
-2:     addl    d0,d0           | multiply a by 2
-#ifndef __mcoldfire__
-       dbra    d3,1b
-#else
-       subql   IMM (1),d3
-       bpl     1b
-#endif
-
-| Now we keep going to set the sticky bit ...
-       moveq   IMM (FLT_MANT_DIG),d3
-1:     cmpl    d0,d1
-       ble     2f
-       addl    d0,d0
-#ifndef __mcoldfire__
-       dbra    d3,1b
-#else
-       subql   IMM(1),d3
-       bpl     1b
-#endif
-       movel   IMM (0),d1
-       bra     3f
-2:     movel   IMM (0),d1
-#ifndef __mcoldfire__
-       subw    IMM (FLT_MANT_DIG),d3
-       addw    IMM (31),d3
-#else
-       subl    IMM (FLT_MANT_DIG),d3
-       addl    IMM (31),d3
-#endif
-       bset    d3,d1
-3:
-       movel   d6,d0           | put the ratio in d0-d1
-       movel   a0,d7           | get sign back
-
-| Because of the normalization we did before we are guaranteed that 
-| d0 is smaller than 2^26 but larger than 2^24. Thus bit 26 is not set,
-| bit 25 could be set, and if it is not set then bit 24 is necessarily set.
-       btst    IMM (FLT_MANT_DIG+1),d0         
-       beq     1f              | if it is not set, then bit 24 is set
-       lsrl    IMM (1),d0      |
-#ifndef __mcoldfire__
-       addw    IMM (1),d2      |
-#else
-       addl    IMM (1),d2      |
-#endif
-1:
-| Now round, check for over- and underflow, and exit.
-       moveq   IMM (DIVIDE),d5
-       bra     Lround$exit
-
-Ldivsf$inop:
-       moveq   IMM (DIVIDE),d5
-       bra     Lf$inop
-
-Ldivsf$overflow:
-       moveq   IMM (DIVIDE),d5
-       bra     Lf$overflow
-
-Ldivsf$underflow:
-       moveq   IMM (DIVIDE),d5
-       bra     Lf$underflow
-
-Ldivsf$a$0:
-       moveq   IMM (DIVIDE),d5
-| If a is zero check to see whether b is zero also. In that case return
-| NaN; then check if b is NaN, and return NaN also in that case. Else
-| return a properly signed zero.
-       andl    IMM (0x7fffffff),d1     | clear sign bit and test b
-       beq     Lf$inop                 | if b is also zero return NaN
-       cmpl    IMM (INFINITY),d1       | check for NaN
-       bhi     Lf$inop                 | 
-       movel   d7,d0                   | else return signed zero
-       PICLEA  SYM (_fpCCR),a0         |
-       movew   IMM (0),a0@             |
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7              | 
-#else
-       moveml  sp@,d2-d7               | 
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6                      | 
-       rts                             | 
-       
-Ldivsf$b$0:
-       moveq   IMM (DIVIDE),d5
-| If we got here a is not zero. Check if a is NaN; in that case return NaN,
-| else return +/-INFINITY. Remember that a is in d0 with the sign bit 
-| cleared already.
-       cmpl    IMM (INFINITY),d0       | compare d0 with INFINITY
-       bhi     Lf$inop                 | if larger it is NaN
-       bra     Lf$div$0                | else signal DIVIDE_BY_ZERO
-
-Ldivsf$inf:
-       moveq   IMM (DIVIDE),d5
-| If a is INFINITY we have to check b
-       cmpl    IMM (INFINITY),d1       | compare b with INFINITY 
-       bge     Lf$inop                 | if b is NaN or INFINITY return NaN
-       bra     Lf$overflow             | else return overflow
-
-| If a number is denormalized we put an exponent of 1 but do not put the 
-| bit back into the fraction.
-Ldivsf$a$den:
-       movel   IMM (1),d2
-       andl    d5,d0
-1:     addl    d0,d0           | shift a left until bit FLT_MANT_DIG-1 is set
-#ifndef __mcoldfire__
-       subw    IMM (1),d2      | and adjust exponent
-#else
-       subl    IMM (1),d2      | and adjust exponent
-#endif
-       btst    IMM (FLT_MANT_DIG-1),d0
-       bne     Ldivsf$1
-       bra     1b
-
-Ldivsf$b$den:
-       movel   IMM (1),d3
-       andl    d5,d1
-1:     addl    d1,d1           | shift b left until bit FLT_MANT_DIG is set
-#ifndef __mcoldfire__
-       subw    IMM (1),d3      | and adjust exponent
-#else
-       subl    IMM (1),d3      | and adjust exponent
-#endif
-       btst    IMM (FLT_MANT_DIG-1),d1
-       bne     Ldivsf$2
-       bra     1b
-
-Lround$exit:
-| This is a common exit point for __mulsf3 and __divsf3. 
-
-| First check for underlow in the exponent:
-#ifndef __mcoldfire__
-       cmpw    IMM (-FLT_MANT_DIG-1),d2                
-#else
-       cmpl    IMM (-FLT_MANT_DIG-1),d2                
-#endif
-       blt     Lf$underflow    
-| It could happen that the exponent is less than 1, in which case the 
-| number is denormalized. In this case we shift right and adjust the 
-| exponent until it becomes 1 or the fraction is zero (in the latter case 
-| we signal underflow and return zero).
-       movel   IMM (0),d6      | d6 is used temporarily
-#ifndef __mcoldfire__
-       cmpw    IMM (1),d2      | if the exponent is less than 1 we 
-#else
-       cmpl    IMM (1),d2      | if the exponent is less than 1 we 
-#endif
-       bge     2f              | have to shift right (denormalize)
-1:
-#ifndef __mcoldfire__
-       addw    IMM (1),d2      | adjust the exponent
-       lsrl    IMM (1),d0      | shift right once 
-       roxrl   IMM (1),d1      |
-       roxrl   IMM (1),d6      | d6 collect bits we would lose otherwise
-       cmpw    IMM (1),d2      | is the exponent 1 already?
-#else
-       addql   IMM (1),d2      | adjust the exponent
-       lsrl    IMM (1),d6
-       btst    IMM (0),d1
-       beq     11f
-       bset    IMM (31),d6
-11:    lsrl    IMM (1),d1
-       btst    IMM (0),d0
-       beq     10f
-       bset    IMM (31),d1
-10:    lsrl    IMM (1),d0
-       cmpl    IMM (1),d2      | is the exponent 1 already?
-#endif
-       beq     2f              | if not loop back
-       bra     1b              |
-       bra     Lf$underflow    | safety check, shouldn't execute '
-2:     orl     d6,d1           | this is a trick so we don't lose  '
-                               | the extra bits which were flushed right
-| Now call the rounding routine (which takes care of denormalized numbers):
-       lea     pc@(Lround$0),a0 | to return from rounding routine
-       PICLEA  SYM (_fpCCR),a1 | check the rounding mode
-#ifdef __mcoldfire__
-       clrl    d6
-#endif
-       movew   a1@(6),d6       | rounding mode in d6
-       beq     Lround$to$nearest
-#ifndef __mcoldfire__
-       cmpw    IMM (ROUND_TO_PLUS),d6
-#else
-       cmpl    IMM (ROUND_TO_PLUS),d6
-#endif
-       bhi     Lround$to$minus
-       blt     Lround$to$zero
-       bra     Lround$to$plus
-Lround$0:
-| Here we have a correctly rounded result (either normalized or denormalized).
-
-| Here we should have either a normalized number or a denormalized one, and
-| the exponent is necessarily larger or equal to 1 (so we don't have to  '
-| check again for underflow!). We have to check for overflow or for a 
-| denormalized number (which also signals underflow).
-| Check for overflow (i.e., exponent >= 255).
-#ifndef __mcoldfire__
-       cmpw    IMM (0x00ff),d2
-#else
-       cmpl    IMM (0x00ff),d2
-#endif
-       bge     Lf$overflow
-| Now check for a denormalized number (exponent==0).
-       movew   d2,d2
-       beq     Lf$den
-1:
-| Put back the exponents and sign and return.
-#ifndef __mcoldfire__
-       lslw    IMM (7),d2      | exponent back to fourth byte
-#else
-       lsll    IMM (7),d2      | exponent back to fourth byte
-#endif
-       bclr    IMM (FLT_MANT_DIG-1),d0
-       swap    d0              | and put back exponent
-#ifndef __mcoldfire__
-       orw     d2,d0           | 
-#else
-       orl     d2,d0
-#endif
-       swap    d0              |
-       orl     d7,d0           | and sign also
-
-       PICLEA  SYM (_fpCCR),a0
-       movew   IMM (0),a0@
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7
-#else
-       moveml  sp@,d2-d7
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6
-       rts
-
-|=============================================================================
-|                             __negsf2
-|=============================================================================
-
-| This is trivial and could be shorter if we didn't bother checking for NaN '
-| and +/-INFINITY.
-
-| float __negsf2(float);
-       FUNC(__negsf2)
-SYM (__negsf2):
-#ifndef __mcoldfire__
-       link    a6,IMM (0)
-       moveml  d2-d7,sp@-
-#else
-       link    a6,IMM (-24)
-       moveml  d2-d7,sp@
-#endif
-       moveq   IMM (NEGATE),d5
-       movel   a6@(8),d0       | get number to negate in d0
-       bchg    IMM (31),d0     | negate
-       movel   d0,d1           | make a positive copy
-       bclr    IMM (31),d1     |
-       tstl    d1              | check for zero
-       beq     2f              | if zero (either sign) return +zero
-       cmpl    IMM (INFINITY),d1 | compare to +INFINITY
-       blt     1f              |
-       bhi     Lf$inop         | if larger (fraction not zero) is NaN
-       movel   d0,d7           | else get sign and return INFINITY
-       andl    IMM (0x80000000),d7
-       bra     Lf$infty                
-1:     PICLEA  SYM (_fpCCR),a0
-       movew   IMM (0),a0@
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7
-#else
-       moveml  sp@,d2-d7
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6
-       rts
-2:     bclr    IMM (31),d0
-       bra     1b
-
-|=============================================================================
-|                             __cmpsf2
-|=============================================================================
-
-GREATER =  1
-LESS    = -1
-EQUAL   =  0
-
-| int __cmpsf2_internal(float, float, int);
-SYM (__cmpsf2_internal):
-#ifndef __mcoldfire__
-       link    a6,IMM (0)
-       moveml  d2-d7,sp@-      | save registers
-#else
-       link    a6,IMM (-24)
-       moveml  d2-d7,sp@
-#endif
-       moveq   IMM (COMPARE),d5
-       movel   a6@(8),d0       | get first operand
-       movel   a6@(12),d1      | get second operand
-| Check if either is NaN, and in that case return garbage and signal
-| INVALID_OPERATION. Check also if either is zero, and clear the signs
-| if necessary.
-       movel   d0,d6
-       andl    IMM (0x7fffffff),d0
-       beq     Lcmpsf$a$0
-       cmpl    IMM (0x7f800000),d0
-       bhi     Lcmpf$inop
-Lcmpsf$1:
-       movel   d1,d7
-       andl    IMM (0x7fffffff),d1
-       beq     Lcmpsf$b$0
-       cmpl    IMM (0x7f800000),d1
-       bhi     Lcmpf$inop
-Lcmpsf$2:
-| Check the signs
-       eorl    d6,d7
-       bpl     1f
-| If the signs are not equal check if a >= 0
-       tstl    d6
-       bpl     Lcmpsf$a$gt$b   | if (a >= 0 && b < 0) => a > b
-       bmi     Lcmpsf$b$gt$a   | if (a < 0 && b >= 0) => a < b
-1:
-| If the signs are equal check for < 0
-       tstl    d6
-       bpl     1f
-| If both are negative exchange them
-#ifndef __mcoldfire__
-       exg     d0,d1
-#else
-       movel   d0,d7
-       movel   d1,d0
-       movel   d7,d1
-#endif
-1:
-| Now that they are positive we just compare them as longs (does this also
-| work for denormalized numbers?).
-       cmpl    d0,d1
-       bhi     Lcmpsf$b$gt$a   | |b| > |a|
-       bne     Lcmpsf$a$gt$b   | |b| < |a|
-| If we got here a == b.
-       movel   IMM (EQUAL),d0
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7      | put back the registers
-#else
-       moveml  sp@,d2-d7
-#endif
-       unlk    a6
-       rts
-Lcmpsf$a$gt$b:
-       movel   IMM (GREATER),d0
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7      | put back the registers
-#else
-       moveml  sp@,d2-d7
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6
-       rts
-Lcmpsf$b$gt$a:
-       movel   IMM (LESS),d0
-#ifndef __mcoldfire__
-       moveml  sp@+,d2-d7      | put back the registers
-#else
-       moveml  sp@,d2-d7
-       | XXX if frame pointer is ever removed, stack pointer must
-       | be adjusted here.
-#endif
-       unlk    a6
-       rts
-
-Lcmpsf$a$0:    
-       bclr    IMM (31),d6
-       bra     Lcmpsf$1
-Lcmpsf$b$0:
-       bclr    IMM (31),d7
-       bra     Lcmpsf$2
-
-Lcmpf$inop:
-       movl    a6@(16),d0
-       moveq   IMM (INEXACT_RESULT+INVALID_OPERATION),d7
-       moveq   IMM (SINGLE_FLOAT),d6
-       PICJUMP $_exception_handler
-
-| int __cmpsf2(float, float);
-       FUNC(__cmpsf2)
-SYM (__cmpsf2):
-       link    a6,IMM (0)
-       pea     1
-       movl    a6@(12),sp@-
-       movl    a6@(8),sp@-
-       PICCALL SYM (__cmpsf2_internal)
-       unlk    a6
-       rts
-
-|=============================================================================
-|                           rounding routines
-|=============================================================================
-
-| The rounding routines expect the number to be normalized in registers
-| d0-d1, with the exponent in register d2. They assume that the 
-| exponent is larger or equal to 1. They return a properly normalized number
-| if possible, and a denormalized number otherwise. The exponent is returned
-| in d2.
-
-Lround$to$nearest:
-| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"):
-| Here we assume that the exponent is not too small (this should be checked
-| before entering the rounding routine), but the number could be denormalized.
-
-| Check for denormalized numbers:
-1:     btst    IMM (FLT_MANT_DIG),d0
-       bne     2f              | if set the number is normalized
-| Normalize shifting left until bit #FLT_MANT_DIG is set or the exponent 
-| is one (remember that a denormalized number corresponds to an 
-| exponent of -F_BIAS+1).
-#ifndef __mcoldfire__
-       cmpw    IMM (1),d2      | remember that the exponent is at least one
-#else
-       cmpl    IMM (1),d2      | remember that the exponent is at least one
-#endif
-       beq     2f              | an exponent of one means denormalized
-       addl    d1,d1           | else shift and adjust the exponent
-       addxl   d0,d0           |
-#ifndef __mcoldfire__
-       dbra    d2,1b           |
-#else
-       subql   IMM (1),d2
-       bpl     1b
-#endif
-2:
-| Now round: we do it as follows: after the shifting we can write the
-| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2.
-| If delta < 1, do nothing. If delta > 1, add 1 to f. 
-| If delta == 1, we make sure the rounded number will be even (odd?) 
-| (after shifting).
-       btst    IMM (0),d0      | is delta < 1?
-       beq     2f              | if so, do not do anything
-       tstl    d1              | is delta == 1?
-       bne     1f              | if so round to even
-       movel   d0,d1           | 
-       andl    IMM (2),d1      | bit 1 is the last significant bit
-       addl    d1,d0           | 
-       bra     2f              | 
-1:     movel   IMM (1),d1      | else add 1 
-       addl    d1,d0           |
-| Shift right once (because we used bit #FLT_MANT_DIG!).
-2:     lsrl    IMM (1),d0              
-| Now check again bit #FLT_MANT_DIG (rounding could have produced a
-| 'fraction overflow' ...).
-       btst    IMM (FLT_MANT_DIG),d0   
-       beq     1f
-       lsrl    IMM (1),d0
-#ifndef __mcoldfire__
-       addw    IMM (1),d2
-#else
-       addql   IMM (1),d2
-#endif
-1:
-| If bit #FLT_MANT_DIG-1 is clear we have a denormalized number, so we 
-| have to put the exponent to zero and return a denormalized number.
-       btst    IMM (FLT_MANT_DIG-1),d0
-       beq     1f
-       jmp     a0@
-1:     movel   IMM (0),d2
-       jmp     a0@
-
-Lround$to$zero:
-Lround$to$plus:
-Lround$to$minus:
-       jmp     a0@
-#endif /* L_float */
-
-| gcc expects the routines __eqdf2, __nedf2, __gtdf2, __gedf2,
-| __ledf2, __ltdf2 to all return the same value as a direct call to
-| __cmpdf2 would.  In this implementation, each of these routines
-| simply calls __cmpdf2.  It would be more efficient to give the
-| __cmpdf2 routine several names, but separating them out will make it
-| easier to write efficient versions of these routines someday.
-| If the operands recompare unordered unordered __gtdf2 and __gedf2 return -1.
-| The other routines return 1.
-
-#ifdef  L_eqdf2
-       .text
-       FUNC(__eqdf2)
-       .globl  SYM (__eqdf2)
-SYM (__eqdf2):
-       link    a6,IMM (0)
-       pea     1
-       movl    a6@(20),sp@-
-       movl    a6@(16),sp@-
-       movl    a6@(12),sp@-
-       movl    a6@(8),sp@-
-       PICCALL SYM (__cmpdf2_internal)
-       unlk    a6
-       rts
-#endif /* L_eqdf2 */
-
-#ifdef  L_nedf2
-       .text
-       FUNC(__nedf2)
-       .globl  SYM (__nedf2)
-SYM (__nedf2):
-       link    a6,IMM (0)
-       pea     1
-       movl    a6@(20),sp@-
-       movl    a6@(16),sp@-
-       movl    a6@(12),sp@-
-       movl    a6@(8),sp@-
-       PICCALL SYM (__cmpdf2_internal)
-       unlk    a6
-       rts
-#endif /* L_nedf2 */
-
-#ifdef  L_gtdf2
-       .text
-       FUNC(__gtdf2)
-       .globl  SYM (__gtdf2)
-SYM (__gtdf2):
-       link    a6,IMM (0)
-       pea     -1
-       movl    a6@(20),sp@-
-       movl    a6@(16),sp@-
-       movl    a6@(12),sp@-
-       movl    a6@(8),sp@-
-       PICCALL SYM (__cmpdf2_internal)
-       unlk    a6
-       rts
-#endif /* L_gtdf2 */
-
-#ifdef  L_gedf2
-       .text
-       FUNC(__gedf2)
-       .globl  SYM (__gedf2)
-SYM (__gedf2):
-       link    a6,IMM (0)
-       pea     -1
-       movl    a6@(20),sp@-
-       movl    a6@(16),sp@-
-       movl    a6@(12),sp@-
-       movl    a6@(8),sp@-
-       PICCALL SYM (__cmpdf2_internal)
-       unlk    a6
-       rts
-#endif /* L_gedf2 */
-
-#ifdef  L_ltdf2
-       .text
-       FUNC(__ltdf2)
-       .globl  SYM (__ltdf2)
-SYM (__ltdf2):
-       link    a6,IMM (0)
-       pea     1
-       movl    a6@(20),sp@-
-       movl    a6@(16),sp@-
-       movl    a6@(12),sp@-
-       movl    a6@(8),sp@-
-       PICCALL SYM (__cmpdf2_internal)
-       unlk    a6
-       rts
-#endif /* L_ltdf2 */
-
-#ifdef  L_ledf2
-       .text
-       FUNC(__ledf2)
-       .globl  SYM (__ledf2)
-SYM (__ledf2):
-       link    a6,IMM (0)
-       pea     1
-       movl    a6@(20),sp@-
-       movl    a6@(16),sp@-
-       movl    a6@(12),sp@-
-       movl    a6@(8),sp@-
-       PICCALL SYM (__cmpdf2_internal)
-       unlk    a6
-       rts
-#endif /* L_ledf2 */
-
-| The comments above about __eqdf2, et. al., also apply to __eqsf2,
-| et. al., except that the latter call __cmpsf2 rather than __cmpdf2.
-
-#ifdef  L_eqsf2
-       .text
-       FUNC(__eqsf2)
-       .globl  SYM (__eqsf2)
-SYM (__eqsf2):
-       link    a6,IMM (0)
-       pea     1
-       movl    a6@(12),sp@-
-       movl    a6@(8),sp@-
-       PICCALL SYM (__cmpsf2_internal)
-       unlk    a6
-       rts
-#endif /* L_eqsf2 */
-
-#ifdef  L_nesf2
-       .text
-       FUNC(__nesf2)
-       .globl  SYM (__nesf2)
-SYM (__nesf2):
-       link    a6,IMM (0)
-       pea     1
-       movl    a6@(12),sp@-
-       movl    a6@(8),sp@-
-       PICCALL SYM (__cmpsf2_internal)
-       unlk    a6
-       rts
-#endif /* L_nesf2 */
-
-#ifdef  L_gtsf2
-       .text
-       FUNC(__gtsf2)
-       .globl  SYM (__gtsf2)
-SYM (__gtsf2):
-       link    a6,IMM (0)
-       pea     -1
-       movl    a6@(12),sp@-
-       movl    a6@(8),sp@-
-       PICCALL SYM (__cmpsf2_internal)
-       unlk    a6
-       rts
-#endif /* L_gtsf2 */
-
-#ifdef  L_gesf2
-       .text
-       FUNC(__gesf2)
-       .globl  SYM (__gesf2)
-SYM (__gesf2):
-       link    a6,IMM (0)
-       pea     -1
-       movl    a6@(12),sp@-
-       movl    a6@(8),sp@-
-       PICCALL SYM (__cmpsf2_internal)
-       unlk    a6
-       rts
-#endif /* L_gesf2 */
-
-#ifdef  L_ltsf2
-       .text
-       FUNC(__ltsf2)
-       .globl  SYM (__ltsf2)
-SYM (__ltsf2):
-       link    a6,IMM (0)
-       pea     1
-       movl    a6@(12),sp@-
-       movl    a6@(8),sp@-
-       PICCALL SYM (__cmpsf2_internal)
-       unlk    a6
-       rts
-#endif /* L_ltsf2 */
-
-#ifdef  L_lesf2
-       .text
-       FUNC(__lesf2)
-       .globl  SYM (__lesf2)
-SYM (__lesf2):
-       link    a6,IMM (0)
-       pea     1
-       movl    a6@(12),sp@-
-       movl    a6@(8),sp@-
-       PICCALL SYM (__cmpsf2_internal)
-       unlk    a6
-       rts
-#endif /* L_lesf2 */
-
-#if defined (__ELF__) && defined (__linux__)
-       /* Make stack non-executable for ELF linux targets.  */
-       .section        .note.GNU-stack,"",@progbits
-#endif
diff --git a/gcc/config/m68k/t-floatlib b/gcc/config/m68k/t-floatlib

index 2039d1d0dc41512711f61a2b78537741f781f0fc..23734be40bdb2045c8c07fddc88f1c0ae4d0fa00 100644 (file)
--- a/gcc/config/m68k/t-floatlib
+++ b/gcc/config/m68k/t-floatlib
@@ -1,4 +1,4 @@
-# Copyright (C) 2007 Free Software Foundation, Inc.
+# Copyright (C) 2007, 2011 Free Software Foundation, Inc.
  #
  # This file is part of GCC.
  #
@@ -16,12 +16,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-LIB1ASMSRC = m68k/lb1sf68.asm
-LIB1ASMFUNCS = _mulsi3 _udivsi3 _divsi3 _umodsi3 _modsi3 \
-   _double _float _floatex \
-   _eqdf2 _nedf2 _gtdf2 _gedf2 _ltdf2 _ledf2 \
-   _eqsf2 _nesf2 _gtsf2 _gesf2 _ltsf2 _lesf2
-
  LIB2FUNCS_EXTRA = fpgnulib.c xfgnulib.c
  
  fpgnulib.c: $(srcdir)/config/m68k/fpgnulib.c
diff --git a/gcc/config/mcore/lib1.asm b/gcc/config/mcore/lib1.asm

deleted file mode 100644 (file)

index 701762f..0000000
--- a/gcc/config/mcore/lib1.asm
+++ /dev/null
@@ -1,303 +0,0 @@
-/* libgcc routines for the MCore.
-   Copyright (C) 1993, 1999, 2000, 2009 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-#define CONCAT1(a, b) CONCAT2(a, b)
-#define CONCAT2(a, b) a ## b
-
-/* Use the right prefix for global labels.  */
-
-#define SYM(x) CONCAT1 (__, x)
-
-#ifdef __ELF__
-#define TYPE(x) .type SYM (x),@function
-#define SIZE(x) .size SYM (x), . - SYM (x)
-#else
-#define TYPE(x)
-#define SIZE(x)
-#endif
-
-.macro FUNC_START name
-       .text
-       .globl SYM (\name)
-       TYPE (\name)
-SYM (\name):
-.endm
-
-.macro FUNC_END name
-       SIZE (\name)
-.endm
-
-#ifdef L_udivsi3
-FUNC_START udiv32
-FUNC_START udivsi32
-
-       movi    r1,0            // r1-r2 form 64 bit dividend
-       movi    r4,1            // r4 is quotient (1 for a sentinel)
-
-       cmpnei  r3,0            // look for 0 divisor
-       bt      9f
-       trap    3               // divide by 0
-9:
-       // control iterations; skip across high order 0 bits in dividend
-       mov     r7,r2
-       cmpnei  r7,0
-       bt      8f
-       movi    r2,0            // 0 dividend
-       jmp     r15             // quick return
-8:
-       ff1     r7              // figure distance to skip
-       lsl     r4,r7           // move the sentinel along (with 0's behind)
-       lsl     r2,r7           // and the low 32 bits of numerator
-
-// appears to be wrong...
-// tested out incorrectly in our OS work...
-//     mov     r7,r3           // looking at divisor
-//     ff1     r7              // I can move 32-r7 more bits to left.
-//     addi    r7,1            // ok, one short of that...
-//     mov     r1,r2
-//     lsr     r1,r7           // bits that came from low order...
-//     rsubi   r7,31           // r7 == "32-n" == LEFT distance
-//     addi    r7,1            // this is (32-n)
-//     lsl     r4,r7           // fixes the high 32 (quotient)
-//     lsl     r2,r7
-//     cmpnei  r4,0
-//     bf      4f              // the sentinel went away...
-
-       // run the remaining bits
-
-1:     lslc    r2,1            // 1 bit left shift of r1-r2
-       addc    r1,r1
-       cmphs   r1,r3           // upper 32 of dividend >= divisor?
-       bf      2f
-       sub     r1,r3           // if yes, subtract divisor
-2:     addc    r4,r4           // shift by 1 and count subtracts
-       bf      1b              // if sentinel falls out of quotient, stop
-
-4:     mov     r2,r4           // return quotient
-       mov     r3,r1           // and piggyback the remainder
-       jmp     r15
-FUNC_END udiv32
-FUNC_END udivsi32
-#endif
-
-#ifdef L_umodsi3
-FUNC_START urem32
-FUNC_START umodsi3
-       movi    r1,0            // r1-r2 form 64 bit dividend
-       movi    r4,1            // r4 is quotient (1 for a sentinel)
-       cmpnei  r3,0            // look for 0 divisor
-       bt      9f
-       trap    3               // divide by 0
-9:
-       // control iterations; skip across high order 0 bits in dividend
-       mov     r7,r2
-       cmpnei  r7,0
-       bt      8f
-       movi    r2,0            // 0 dividend
-       jmp     r15             // quick return
-8:
-       ff1     r7              // figure distance to skip
-       lsl     r4,r7           // move the sentinel along (with 0's behind)
-       lsl     r2,r7           // and the low 32 bits of numerator
-
-1:     lslc    r2,1            // 1 bit left shift of r1-r2
-       addc    r1,r1
-       cmphs   r1,r3           // upper 32 of dividend >= divisor?
-       bf      2f
-       sub     r1,r3           // if yes, subtract divisor
-2:     addc    r4,r4           // shift by 1 and count subtracts
-       bf      1b              // if sentinel falls out of quotient, stop
-       mov     r2,r1           // return remainder
-       jmp     r15
-FUNC_END urem32
-FUNC_END umodsi3
-#endif
-
-#ifdef L_divsi3
-FUNC_START div32
-FUNC_START divsi3
-       mov     r5,r2           // calc sign of quotient
-       xor     r5,r3
-       abs     r2              // do unsigned divide
-       abs     r3
-       movi    r1,0            // r1-r2 form 64 bit dividend
-       movi    r4,1            // r4 is quotient (1 for a sentinel)
-       cmpnei  r3,0            // look for 0 divisor
-       bt      9f
-       trap    3               // divide by 0
-9:
-       // control iterations; skip across high order 0 bits in dividend
-       mov     r7,r2
-       cmpnei  r7,0
-       bt      8f
-       movi    r2,0            // 0 dividend
-       jmp     r15             // quick return
-8:
-       ff1     r7              // figure distance to skip
-       lsl     r4,r7           // move the sentinel along (with 0's behind)
-       lsl     r2,r7           // and the low 32 bits of numerator
-
-// tested out incorrectly in our OS work...
-//     mov     r7,r3           // looking at divisor
-//     ff1     r7              // I can move 32-r7 more bits to left.
-//     addi    r7,1            // ok, one short of that...
-//     mov     r1,r2
-//     lsr     r1,r7           // bits that came from low order...
-//     rsubi   r7,31           // r7 == "32-n" == LEFT distance
-//     addi    r7,1            // this is (32-n)
-//     lsl     r4,r7           // fixes the high 32 (quotient)
-//     lsl     r2,r7
-//     cmpnei  r4,0
-//     bf      4f              // the sentinel went away...
-
-       // run the remaining bits
-1:     lslc    r2,1            // 1 bit left shift of r1-r2
-       addc    r1,r1
-       cmphs   r1,r3           // upper 32 of dividend >= divisor?
-       bf      2f
-       sub     r1,r3           // if yes, subtract divisor
-2:     addc    r4,r4           // shift by 1 and count subtracts
-       bf      1b              // if sentinel falls out of quotient, stop
-
-4:     mov     r2,r4           // return quotient
-       mov     r3,r1           // piggyback the remainder
-       btsti   r5,31           // after adjusting for sign
-       bf      3f
-       rsubi   r2,0
-       rsubi   r3,0
-3:     jmp     r15
-FUNC_END div32
-FUNC_END divsi3
-#endif
-
-#ifdef L_modsi3
-FUNC_START rem32
-FUNC_START modsi3
-       mov     r5,r2           // calc sign of remainder
-       abs     r2              // do unsigned divide
-       abs     r3
-       movi    r1,0            // r1-r2 form 64 bit dividend
-       movi    r4,1            // r4 is quotient (1 for a sentinel)
-       cmpnei  r3,0            // look for 0 divisor
-       bt      9f
-       trap    3               // divide by 0
-9: 
-       // control iterations; skip across high order 0 bits in dividend
-       mov     r7,r2
-       cmpnei  r7,0
-       bt      8f
-       movi    r2,0            // 0 dividend
-       jmp     r15             // quick return
-8:
-       ff1     r7              // figure distance to skip
-       lsl     r4,r7           // move the sentinel along (with 0's behind)
-       lsl     r2,r7           // and the low 32 bits of numerator
-
-1:     lslc    r2,1            // 1 bit left shift of r1-r2
-       addc    r1,r1
-       cmphs   r1,r3           // upper 32 of dividend >= divisor?
-       bf      2f
-       sub     r1,r3           // if yes, subtract divisor
-2:     addc    r4,r4           // shift by 1 and count subtracts
-       bf      1b              // if sentinel falls out of quotient, stop
-       mov     r2,r1           // return remainder
-       btsti   r5,31           // after adjusting for sign
-       bf      3f
-       rsubi   r2,0
-3:     jmp     r15
-FUNC_END rem32
-FUNC_END modsi3
-#endif
-
-
-/* GCC expects that {__eq,__ne,__gt,__ge,__le,__lt}{df2,sf2}
-   will behave as __cmpdf2. So, we stub the implementations to
-   jump on to __cmpdf2 and __cmpsf2.
- 
-   All of these shortcircuit the return path so that __cmp{sd}f2
-   will go directly back to the caller.  */
-
-.macro  COMPARE_DF_JUMP name
-       .import SYM (cmpdf2)
-FUNC_START \name
-       jmpi SYM (cmpdf2)
-FUNC_END \name
-.endm
-               
-#ifdef  L_eqdf2
-COMPARE_DF_JUMP eqdf2
-#endif /* L_eqdf2 */
-
-#ifdef  L_nedf2
-COMPARE_DF_JUMP nedf2
-#endif /* L_nedf2 */
-
-#ifdef  L_gtdf2
-COMPARE_DF_JUMP gtdf2
-#endif /* L_gtdf2 */
-
-#ifdef  L_gedf2
-COMPARE_DF_JUMP gedf2
-#endif /* L_gedf2 */
-
-#ifdef  L_ltdf2
-COMPARE_DF_JUMP ltdf2
-#endif /* L_ltdf2 */
-       
-#ifdef  L_ledf2
-COMPARE_DF_JUMP ledf2
-#endif /* L_ledf2 */
-
-/* SINGLE PRECISION FLOATING POINT STUBS */
-
-.macro  COMPARE_SF_JUMP name
-       .import SYM (cmpsf2)
-FUNC_START \name
-       jmpi SYM (cmpsf2)
-FUNC_END \name
-.endm
-               
-#ifdef  L_eqsf2
-COMPARE_SF_JUMP eqsf2
-#endif /* L_eqsf2 */
-       
-#ifdef  L_nesf2
-COMPARE_SF_JUMP nesf2
-#endif /* L_nesf2 */
-       
-#ifdef  L_gtsf2
-COMPARE_SF_JUMP gtsf2
-#endif /* L_gtsf2 */
-       
-#ifdef  L_gesf2
-COMPARE_SF_JUMP __gesf2
-#endif /* L_gesf2 */
-       
-#ifdef  L_ltsf2
-COMPARE_SF_JUMP __ltsf2
-#endif /* L_ltsf2 */
-       
-#ifdef  L_lesf2
-COMPARE_SF_JUMP lesf2
-#endif /* L_lesf2 */
diff --git a/gcc/config/mcore/t-mcore b/gcc/config/mcore/t-mcore

index 9c84d850f20270428b236ab4df0bddc750892aab..265399cecfeef8200ea17db3f70e2e6a4026e7ac 100644 (file)
--- a/gcc/config/mcore/t-mcore
+++ b/gcc/config/mcore/t-mcore
@@ -16,9 +16,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-LIB1ASMSRC    = mcore/lib1.asm
-LIB1ASMFUNCS  = _divsi3 _udivsi3 _modsi3 _umodsi3
-
  # could use -msifilter to be safe from interrupt/jmp interactions and others.
  TARGET_LIBGCC2_CFLAGS=-O3 -DNO_FLOATLIB_FIXUNSDFSI #-msifilter
  
diff --git a/gcc/config/mep/mep-lib1.asm b/gcc/config/mep/mep-lib1.asm

deleted file mode 100644 (file)

index 0a18913..0000000
--- a/gcc/config/mep/mep-lib1.asm
+++ /dev/null
@@ -1,125 +0,0 @@
-/* libgcc routines for Toshiba Media Processor.
-   Copyright (C) 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-  
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-  
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-#define SAVEALL \
-       add3    $sp, $sp, -16*4 ; \
-       sw      $0, ($sp) ; \
-       sw      $1, 4($sp) ; \
-       sw      $2, 8($sp) ; \
-       sw      $3, 12($sp) ; \
-       sw      $4, 16($sp) ; \
-       sw      $5, 20($sp) ; \
-       sw      $6, 24($sp) ; \
-       sw      $7, 28($sp) ; \
-       sw      $8, 32($sp) ; \
-       sw      $9, 36($sp) ; \
-       sw      $10, 40($sp) ; \
-       sw      $11, 44($sp) ; \
-       sw      $12, 48($sp) ; \
-       sw      $13, 52($sp) ; \
-       sw      $14, 56($sp) ; \
-       ldc     $5, $lp ; \
-       add     $5, 3 ; \
-       mov     $6, -4 ; \
-       and     $5, $6
-
-#define RESTOREALL \
-       stc     $5, $lp ; \
-       lw      $14, 56($sp) ; \
-       lw      $13, 52($sp) ; \
-       lw      $12, 48($sp) ; \
-       lw      $11, 44($sp) ; \
-       lw      $10, 40($sp) ; \
-       lw      $9, 36($sp) ; \
-       lw      $8, 32($sp) ; \
-       lw      $7, 28($sp) ; \
-       lw      $6, 24($sp) ; \
-       lw      $5, 20($sp) ; \
-       lw      $4, 16($sp) ; \
-       lw      $3, 12($sp) ; \
-       lw      $2, 8($sp) ; \
-       lw      $1, 4($sp) ; \
-       lw      $0, ($sp) ; \
-       add3    $sp, $sp, 16*4 ; \
-       ret
-
-#ifdef L_mep_profile
-       .text
-       .global __mep_mcount
-__mep_mcount:
-       SAVEALL
-       ldc     $1, $lp
-       mov     $2, $0
-       bsr     __mep_mcount_2
-       RESTOREALL
-#endif
-
-#ifdef L_mep_bb_init_trace
-       .text
-       .global __mep_bb_init_trace_func
-__mep_bb_init_trace_func:
-       SAVEALL
-       lw      $1, ($5)
-       lw      $2, 4($5)
-       add     $5, 8
-       bsr     __bb_init_trace_func
-       RESTOREALL
-#endif
-
-#ifdef L_mep_bb_init
-       .text
-       .global __mep_bb_init_func
-__mep_bb_init_func:
-       SAVEALL
-       lw      $1, ($5)
-       add     $5, 4
-       bsr     __bb_init_func
-       RESTOREALL
-#endif
-
-#ifdef L_mep_bb_trace
-       .text
-       .global __mep_bb_trace_func
-__mep_bb_trace_func:
-       SAVEALL
-       movu    $3, __bb
-       lw      $1, ($5)
-       sw      $1, ($3)
-       lw      $2, 4($5)
-       sw      $2, 4($3)
-       add     $5, 8
-       bsr     __bb_trace_func
-       RESTOREALL
-#endif
-
-#ifdef L_mep_bb_increment
-       .text
-       .global __mep_bb_increment_func
-__mep_bb_increment_func:
-       SAVEALL
-       lw      $1, ($5)
-       lw      $0, ($1)
-       add     $0, 1
-       sw      $0, ($1)
-       add     $5, 4
-       RESTOREALL
-#endif
diff --git a/gcc/config/mep/t-mep b/gcc/config/mep/t-mep

index d560db0aa4bd688a498228775a2cee4b087a8551..ac4ad95bc872842a91fec5e26adc575284828399 100644 (file)
--- a/gcc/config/mep/t-mep
+++ b/gcc/config/mep/t-mep
@@ -32,16 +32,6 @@ mep-pragma.o: $(srcdir)/config/mep/mep-pragma.c $(CONFIG_H) $(SYSTEM_H) \
         function.h insn-config.h reload.h $(TARGET_H)
         $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
  
-# profiling support
-
-LIB1ASMSRC = mep/mep-lib1.asm
-
-LIB1ASMFUNCS = _mep_profile \
-              _mep_bb_init_trace \
-              _mep_bb_init \
-              _mep_bb_trace \
-              _mep_bb_increment
-
  # multiply and divide routines
  
  LIB2FUNCS_EXTRA = \
diff --git a/gcc/config/mips/mips16.S b/gcc/config/mips/mips16.S

deleted file mode 100644 (file)

index ec331b5..0000000
--- a/gcc/config/mips/mips16.S
+++ /dev/null
@@ -1,712 +0,0 @@
-/* mips16 floating point support code
-   Copyright (C) 1996, 1997, 1998, 2008, 2009, 2010
-   Free Software Foundation, Inc.
-   Contributed by Cygnus Support
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/* This file contains mips16 floating point support functions.  These
-   functions are called by mips16 code to handle floating point when
-   -msoft-float is not used.  They accept the arguments and return
-   values using the soft-float calling convention, but do the actual
-   operation using the hard floating point instructions.  */
-
-#if defined _MIPS_SIM && (_MIPS_SIM == _ABIO32 || _MIPS_SIM == _ABIO64)
-
-/* This file contains 32-bit assembly code.  */
-       .set nomips16
-
-/* Start a function.  */
-
-#define STARTFN(NAME) .globl NAME; .ent NAME; NAME:
-
-/* Finish a function.  */
-
-#define ENDFN(NAME) .end NAME
-
-/* ARG1
-       The FPR that holds the first floating-point argument.
-
-   ARG2
-       The FPR that holds the second floating-point argument.
-
-   RET
-       The FPR that holds a floating-point return value.  */
-
-#define RET $f0
-#define ARG1 $f12
-#ifdef __mips64
-#define ARG2 $f13
-#else
-#define ARG2 $f14
-#endif
-
-/* Set 64-bit register GPR so that its high 32 bits contain HIGH_FPR
-   and so that its low 32 bits contain LOW_FPR.  */
-#define MERGE_GPRf(GPR, HIGH_FPR, LOW_FPR)     \
-       .set    noat;                           \
-       mfc1    $1, LOW_FPR;                    \
-       mfc1    GPR, HIGH_FPR;                  \
-       dsll    $1, $1, 32;                     \
-       dsll    GPR, GPR, 32;                   \
-       dsrl    $1, $1, 32;                     \
-       or      GPR, GPR, $1;                   \
-       .set    at
-
-/* Move the high 32 bits of GPR to HIGH_FPR and the low 32 bits of
-   GPR to LOW_FPR.  */
-#define MERGE_GPRt(GPR, HIGH_FPR, LOW_FPR)     \
-       .set    noat;                           \
-       dsrl    $1, GPR, 32;                    \
-       mtc1    GPR, LOW_FPR;                   \
-       mtc1    $1, HIGH_FPR;                   \
-       .set    at
-
-/* Jump to T, and use "OPCODE, OP2" to implement a delayed move.  */
-#define DELAYt(T, OPCODE, OP2)                 \
-       .set    noreorder;                      \
-       jr      T;                              \
-       OPCODE, OP2;                            \
-       .set    reorder
-
-/* Use "OPCODE. OP2" and jump to T.  */
-#define DELAYf(T, OPCODE, OP2) OPCODE, OP2; jr T
-
-/* MOVE_SF_BYTE0(D)
-       Move the first single-precision floating-point argument between
-       GPRs and FPRs.
-
-   MOVE_SI_BYTE0(D)
-       Likewise the first single-precision integer argument.
-
-   MOVE_SF_BYTE4(D)
-       Move the second single-precision floating-point argument between
-       GPRs and FPRs, given that the first argument occupies 4 bytes.
-
-   MOVE_SF_BYTE8(D)
-       Move the second single-precision floating-point argument between
-       GPRs and FPRs, given that the first argument occupies 8 bytes.
-
-   MOVE_DF_BYTE0(D)
-       Move the first double-precision floating-point argument between
-       GPRs and FPRs.
-
-   MOVE_DF_BYTE8(D)
-       Likewise the second double-precision floating-point argument.
-
-   MOVE_SF_RET(D, T)
-       Likewise a single-precision floating-point return value,
-       then jump to T.
-
-   MOVE_SC_RET(D, T)
-       Likewise a complex single-precision floating-point return value.
-
-   MOVE_DF_RET(D, T)
-       Likewise a double-precision floating-point return value.
-
-   MOVE_DC_RET(D, T)
-       Likewise a complex double-precision floating-point return value.
-
-   MOVE_SI_RET(D, T)
-       Likewise a single-precision integer return value.
-
-   The D argument is "t" to move to FPRs and "f" to move from FPRs.
-   The return macros may assume that the target of the jump does not
-   use a floating-point register.  */
-
-#define MOVE_SF_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0)
-#define MOVE_SI_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0)
-
-#if defined(__mips64) && defined(__MIPSEB__)
-#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f0, $f1); jr T
-#elif defined(__mips64)
-/* The high 32 bits of $2 correspond to the second word in memory;
-   i.e. the imaginary part.  */
-#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f1, $f0); jr T
-#elif __mips_fpr == 64
-#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1)
-#else
-#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f2)
-#endif
-
-#if defined(__mips64)
-#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12
-#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f13
-#define MOVE_SF_BYTE8(D) m##D##c1 $5,$f13
-#else
-#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12
-#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f14
-#define MOVE_SF_BYTE8(D) m##D##c1 $6,$f14
-#endif
-#define MOVE_SI_BYTE0(D) MOVE_SF_BYTE0(D)
-
-#if defined(__mips64)
-#define MOVE_DF_BYTE0(D) dm##D##c1 $4,$f12
-#define MOVE_DF_BYTE8(D) dm##D##c1 $5,$f13
-#define MOVE_DF_RET(D, T) DELAY##D (T, dm##D##c1 $2,$f0)
-#define MOVE_DC_RET(D, T) dm##D##c1 $3,$f1; MOVE_DF_RET (D, T)
-#elif __mips_fpr == 64 && defined(__MIPSEB__)
-#define MOVE_DF_BYTE0(D) m##D##c1 $5,$f12; m##D##hc1 $4,$f12
-#define MOVE_DF_BYTE8(D) m##D##c1 $7,$f14; m##D##hc1 $6,$f14
-#define MOVE_DF_RET(D, T) m##D##c1 $3,$f0; DELAY##D (T, m##D##hc1 $2,$f0)
-#define MOVE_DC_RET(D, T) m##D##c1 $5,$f1; m##D##hc1 $4,$f1; MOVE_DF_RET (D, T)
-#elif __mips_fpr == 64
-#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##hc1 $5,$f12
-#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##hc1 $7,$f14
-#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##hc1 $3,$f0)
-#define MOVE_DC_RET(D, T) m##D##c1 $4,$f1; m##D##hc1 $5,$f1; MOVE_DF_RET (D, T)
-#elif defined(__MIPSEB__)
-/* FPRs are little-endian.  */
-#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f13; m##D##c1 $5,$f12
-#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f15; m##D##c1 $7,$f14
-#define MOVE_DF_RET(D, T) m##D##c1 $2,$f1; DELAY##D (T, m##D##c1 $3,$f0)
-#define MOVE_DC_RET(D, T) m##D##c1 $4,$f3; m##D##c1 $5,$f2; MOVE_DF_RET (D, T)
-#else
-#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##c1 $5,$f13
-#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##c1 $7,$f15
-#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1)
-#define MOVE_DC_RET(D, T) m##D##c1 $4,$f2; m##D##c1 $5,$f3; MOVE_DF_RET (D, T)
-#endif
-
-/* Single-precision math.  */
-
-/* Define a function NAME that loads two single-precision values,
-   performs FPU operation OPCODE on them, and returns the single-
-   precision result.  */
-
-#define OPSF3(NAME, OPCODE)    \
-STARTFN (NAME);                        \
-       MOVE_SF_BYTE0 (t);      \
-       MOVE_SF_BYTE4 (t);      \
-       OPCODE  RET,ARG1,ARG2;  \
-       MOVE_SF_RET (f, $31);   \
-       ENDFN (NAME)
-
-#ifdef L_m16addsf3
-OPSF3 (__mips16_addsf3, add.s)
-#endif
-#ifdef L_m16subsf3
-OPSF3 (__mips16_subsf3, sub.s)
-#endif
-#ifdef L_m16mulsf3
-OPSF3 (__mips16_mulsf3, mul.s)
-#endif
-#ifdef L_m16divsf3
-OPSF3 (__mips16_divsf3, div.s)
-#endif
-
-/* Define a function NAME that loads a single-precision value,
-   performs FPU operation OPCODE on it, and returns the single-
-   precision result.  */
-
-#define OPSF2(NAME, OPCODE)    \
-STARTFN (NAME);                        \
-       MOVE_SF_BYTE0 (t);      \
-       OPCODE  RET,ARG1;       \
-       MOVE_SF_RET (f, $31);   \
-       ENDFN (NAME)
-
-#ifdef L_m16negsf2
-OPSF2 (__mips16_negsf2, neg.s)
-#endif
-#ifdef L_m16abssf2
-OPSF2 (__mips16_abssf2, abs.s)
-#endif
-
-/* Single-precision comparisons.  */
-
-/* Define a function NAME that loads two single-precision values,
-   performs floating point comparison OPCODE, and returns TRUE or
-   FALSE depending on the result.  */
-
-#define CMPSF(NAME, OPCODE, TRUE, FALSE)       \
-STARTFN (NAME);                                        \
-       MOVE_SF_BYTE0 (t);                      \
-       MOVE_SF_BYTE4 (t);                      \
-       OPCODE  ARG1,ARG2;                      \
-       li      $2,TRUE;                        \
-       bc1t    1f;                             \
-       li      $2,FALSE;                       \
-1:;                                            \
-       j       $31;                            \
-       ENDFN (NAME)
-
-/* Like CMPSF, but reverse the comparison operands.  */
-
-#define REVCMPSF(NAME, OPCODE, TRUE, FALSE)    \
-STARTFN (NAME);                                        \
-       MOVE_SF_BYTE0 (t);                      \
-       MOVE_SF_BYTE4 (t);                      \
-       OPCODE  ARG2,ARG1;                      \
-       li      $2,TRUE;                        \
-       bc1t    1f;                             \
-       li      $2,FALSE;                       \
-1:;                                            \
-       j       $31;                            \
-       ENDFN (NAME)
-
-#ifdef L_m16eqsf2
-CMPSF (__mips16_eqsf2, c.eq.s, 0, 1)
-#endif
-#ifdef L_m16nesf2
-CMPSF (__mips16_nesf2, c.eq.s, 0, 1)
-#endif
-#ifdef L_m16gtsf2
-REVCMPSF (__mips16_gtsf2, c.lt.s, 1, 0)
-#endif
-#ifdef L_m16gesf2
-REVCMPSF (__mips16_gesf2, c.le.s, 0, -1)
-#endif
-#ifdef L_m16lesf2
-CMPSF (__mips16_lesf2, c.le.s, 0, 1)
-#endif
-#ifdef L_m16ltsf2
-CMPSF (__mips16_ltsf2, c.lt.s, -1, 0)
-#endif
-#ifdef L_m16unordsf2
-CMPSF(__mips16_unordsf2, c.un.s, 1, 0)
-#endif
-
-
-/* Single-precision conversions.  */
-
-#ifdef L_m16fltsisf
-STARTFN (__mips16_floatsisf)
-       MOVE_SF_BYTE0 (t)
-       cvt.s.w RET,ARG1
-       MOVE_SF_RET (f, $31)
-       ENDFN (__mips16_floatsisf)
-#endif
-
-#ifdef L_m16fltunsisf
-STARTFN (__mips16_floatunsisf)
-       .set    noreorder
-       bltz    $4,1f
-       MOVE_SF_BYTE0 (t)
-       .set    reorder
-       cvt.s.w RET,ARG1
-       MOVE_SF_RET (f, $31)
-1:             
-       and     $2,$4,1
-       srl     $3,$4,1
-       or      $2,$2,$3
-       mtc1    $2,RET
-       cvt.s.w RET,RET
-       add.s   RET,RET,RET
-       MOVE_SF_RET (f, $31)
-       ENDFN (__mips16_floatunsisf)
-#endif
-       
-#ifdef L_m16fix_truncsfsi
-STARTFN (__mips16_fix_truncsfsi)
-       MOVE_SF_BYTE0 (t)
-       trunc.w.s RET,ARG1,$4
-       MOVE_SI_RET (f, $31)
-       ENDFN (__mips16_fix_truncsfsi)
-#endif
-
-#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
-
-/* Double-precision math.  */
-
-/* Define a function NAME that loads two double-precision values,
-   performs FPU operation OPCODE on them, and returns the double-
-   precision result.  */
-
-#define OPDF3(NAME, OPCODE)    \
-STARTFN (NAME);                        \
-       MOVE_DF_BYTE0 (t);      \
-       MOVE_DF_BYTE8 (t);      \
-       OPCODE RET,ARG1,ARG2;   \
-       MOVE_DF_RET (f, $31);   \
-       ENDFN (NAME)
-
-#ifdef L_m16adddf3
-OPDF3 (__mips16_adddf3, add.d)
-#endif
-#ifdef L_m16subdf3
-OPDF3 (__mips16_subdf3, sub.d)
-#endif
-#ifdef L_m16muldf3
-OPDF3 (__mips16_muldf3, mul.d)
-#endif
-#ifdef L_m16divdf3
-OPDF3 (__mips16_divdf3, div.d)
-#endif
-
-/* Define a function NAME that loads a double-precision value,
-   performs FPU operation OPCODE on it, and returns the double-
-   precision result.  */
-
-#define OPDF2(NAME, OPCODE)    \
-STARTFN (NAME);                        \
-       MOVE_DF_BYTE0 (t);      \
-       OPCODE RET,ARG1;        \
-       MOVE_DF_RET (f, $31);   \
-       ENDFN (NAME)
-
-#ifdef L_m16negdf2
-OPDF2 (__mips16_negdf2, neg.d)
-#endif
-#ifdef L_m16absdf2
-OPDF2 (__mips16_absdf2, abs.d)
-#endif
-
-/* Conversions between single and double precision.  */
-
-#ifdef L_m16extsfdf2
-STARTFN (__mips16_extendsfdf2)
-       MOVE_SF_BYTE0 (t)
-       cvt.d.s RET,ARG1
-       MOVE_DF_RET (f, $31)
-       ENDFN (__mips16_extendsfdf2)
-#endif
-
-#ifdef L_m16trdfsf2
-STARTFN (__mips16_truncdfsf2)
-       MOVE_DF_BYTE0 (t)
-       cvt.s.d RET,ARG1
-       MOVE_SF_RET (f, $31)
-       ENDFN (__mips16_truncdfsf2)
-#endif
-
-/* Double-precision comparisons.  */
-
-/* Define a function NAME that loads two double-precision values,
-   performs floating point comparison OPCODE, and returns TRUE or
-   FALSE depending on the result.  */
-
-#define CMPDF(NAME, OPCODE, TRUE, FALSE)       \
-STARTFN (NAME);                                        \
-       MOVE_DF_BYTE0 (t);                      \
-       MOVE_DF_BYTE8 (t);                      \
-       OPCODE  ARG1,ARG2;                      \
-       li      $2,TRUE;                        \
-       bc1t    1f;                             \
-       li      $2,FALSE;                       \
-1:;                                            \
-       j       $31;                            \
-       ENDFN (NAME)
-
-/* Like CMPDF, but reverse the comparison operands.  */
-
-#define REVCMPDF(NAME, OPCODE, TRUE, FALSE)    \
-STARTFN (NAME);                                        \
-       MOVE_DF_BYTE0 (t);                      \
-       MOVE_DF_BYTE8 (t);                      \
-       OPCODE  ARG2,ARG1;                      \
-       li      $2,TRUE;                        \
-       bc1t    1f;                             \
-       li      $2,FALSE;                       \
-1:;                                            \
-       j       $31;                            \
-       ENDFN (NAME)
-
-#ifdef L_m16eqdf2
-CMPDF (__mips16_eqdf2, c.eq.d, 0, 1)
-#endif
-#ifdef L_m16nedf2
-CMPDF (__mips16_nedf2, c.eq.d, 0, 1)
-#endif
-#ifdef L_m16gtdf2
-REVCMPDF (__mips16_gtdf2, c.lt.d, 1, 0)
-#endif
-#ifdef L_m16gedf2
-REVCMPDF (__mips16_gedf2, c.le.d, 0, -1)
-#endif
-#ifdef L_m16ledf2
-CMPDF (__mips16_ledf2, c.le.d, 0, 1)
-#endif
-#ifdef L_m16ltdf2
-CMPDF (__mips16_ltdf2, c.lt.d, -1, 0)
-#endif
-#ifdef L_m16unorddf2
-CMPDF(__mips16_unorddf2, c.un.d, 1, 0)
-#endif
-
-/* Double-precision conversions.  */
-
-#ifdef L_m16fltsidf
-STARTFN (__mips16_floatsidf)
-       MOVE_SI_BYTE0 (t)
-       cvt.d.w RET,ARG1
-       MOVE_DF_RET (f, $31)
-       ENDFN (__mips16_floatsidf)
-#endif
-       
-#ifdef L_m16fltunsidf
-STARTFN (__mips16_floatunsidf)
-       MOVE_SI_BYTE0 (t)
-       cvt.d.w RET,ARG1
-       bgez    $4,1f
-       li.d    ARG1, 4.294967296e+9
-       add.d   RET, RET, ARG1
-1:     MOVE_DF_RET (f, $31)
-       ENDFN (__mips16_floatunsidf)
-#endif
-       
-#ifdef L_m16fix_truncdfsi
-STARTFN (__mips16_fix_truncdfsi)
-       MOVE_DF_BYTE0 (t)
-       trunc.w.d RET,ARG1,$4
-       MOVE_SI_RET (f, $31)
-       ENDFN (__mips16_fix_truncdfsi)
-#endif
-#endif /* !__mips_single_float */
-
-/* Define a function NAME that moves a return value of mode MODE from
-   FPRs to GPRs.  */
-
-#define RET_FUNCTION(NAME, MODE)       \
-STARTFN (NAME);                                \
-       MOVE_##MODE##_RET (t, $31);     \
-       ENDFN (NAME)
-
-#ifdef L_m16retsf
-RET_FUNCTION (__mips16_ret_sf, SF)
-#endif
-
-#ifdef L_m16retsc
-RET_FUNCTION (__mips16_ret_sc, SC)
-#endif
-
-#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
-#ifdef L_m16retdf
-RET_FUNCTION (__mips16_ret_df, DF)
-#endif
-
-#ifdef L_m16retdc
-RET_FUNCTION (__mips16_ret_dc, DC)
-#endif
-#endif /* !__mips_single_float */
-
-/* STUB_ARGS_X copies the arguments from GPRs to FPRs for argument
-   code X.  X is calculated as ARG1 + ARG2 * 4, where ARG1 and ARG2
-   classify the first and second arguments as follows:
-
-       1: a single-precision argument
-       2: a double-precision argument
-       0: no argument, or not one of the above.  */
-
-#define STUB_ARGS_0                                            /* () */
-#define STUB_ARGS_1 MOVE_SF_BYTE0 (t)                          /* (sf) */
-#define STUB_ARGS_5 MOVE_SF_BYTE0 (t); MOVE_SF_BYTE4 (t)       /* (sf, sf) */
-#define STUB_ARGS_9 MOVE_SF_BYTE0 (t); MOVE_DF_BYTE8 (t)       /* (sf, df) */
-#define STUB_ARGS_2 MOVE_DF_BYTE0 (t)                          /* (df) */
-#define STUB_ARGS_6 MOVE_DF_BYTE0 (t); MOVE_SF_BYTE8 (t)       /* (df, sf) */
-#define STUB_ARGS_10 MOVE_DF_BYTE0 (t); MOVE_DF_BYTE8 (t)      /* (df, df) */
-
-/* These functions are used by 16-bit code when calling via a function
-   pointer.  They must copy the floating point arguments from the GPRs
-   to FPRs and then call function $2.  */
-
-#define CALL_STUB_NO_RET(NAME, CODE)   \
-STARTFN (NAME);                                \
-       STUB_ARGS_##CODE;               \
-       .set    noreorder;              \
-       jr      $2;                     \
-       move    $25,$2;                 \
-       .set    reorder;                \
-       ENDFN (NAME)
-
-#ifdef L_m16stub1
-CALL_STUB_NO_RET (__mips16_call_stub_1, 1)
-#endif
-
-#ifdef L_m16stub5
-CALL_STUB_NO_RET (__mips16_call_stub_5, 5)
-#endif
-
-#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
-
-#ifdef L_m16stub2
-CALL_STUB_NO_RET (__mips16_call_stub_2, 2)
-#endif
-
-#ifdef L_m16stub6
-CALL_STUB_NO_RET (__mips16_call_stub_6, 6)
-#endif
-
-#ifdef L_m16stub9
-CALL_STUB_NO_RET (__mips16_call_stub_9, 9)
-#endif
-
-#ifdef L_m16stub10
-CALL_STUB_NO_RET (__mips16_call_stub_10, 10)
-#endif
-#endif /* !__mips_single_float */
-
-/* Now we have the same set of functions, except that this time the
-   function being called returns an SFmode, SCmode, DFmode or DCmode
-   value; we need to instantiate a set for each case.  The calling
-   function will arrange to preserve $18, so these functions are free
-   to use it to hold the return address.
-
-   Note that we do not know whether the function we are calling is 16
-   bit or 32 bit.  However, it does not matter, because 16-bit
-   functions always return floating point values in both the gp and
-   the fp regs.  It would be possible to check whether the function
-   being called is 16 bits, in which case the copy is unnecessary;
-   however, it's faster to always do the copy.  */
-
-#define CALL_STUB_RET(NAME, CODE, MODE)        \
-STARTFN (NAME);                                \
-       move    $18,$31;                \
-       STUB_ARGS_##CODE;               \
-       .set    noreorder;              \
-       jalr    $2;                     \
-       move    $25,$2;                 \
-       .set    reorder;                \
-       MOVE_##MODE##_RET (f, $18);     \
-       ENDFN (NAME)
-
-/* First, instantiate the single-float set.  */
-
-#ifdef L_m16stubsf0
-CALL_STUB_RET (__mips16_call_stub_sf_0, 0, SF)
-#endif
-
-#ifdef L_m16stubsf1
-CALL_STUB_RET (__mips16_call_stub_sf_1, 1, SF)
-#endif
-
-#ifdef L_m16stubsf5
-CALL_STUB_RET (__mips16_call_stub_sf_5, 5, SF)
-#endif
-
-#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
-#ifdef L_m16stubsf2
-CALL_STUB_RET (__mips16_call_stub_sf_2, 2, SF)
-#endif
-
-#ifdef L_m16stubsf6
-CALL_STUB_RET (__mips16_call_stub_sf_6, 6, SF)
-#endif
-
-#ifdef L_m16stubsf9
-CALL_STUB_RET (__mips16_call_stub_sf_9, 9, SF)
-#endif
-
-#ifdef L_m16stubsf10
-CALL_STUB_RET (__mips16_call_stub_sf_10, 10, SF)
-#endif
-#endif /* !__mips_single_float */
-
-
-/* Now we have the same set of functions again, except that this time
-   the function being called returns an DFmode value.  */
-
-#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
-#ifdef L_m16stubdf0
-CALL_STUB_RET (__mips16_call_stub_df_0, 0, DF)
-#endif
-
-#ifdef L_m16stubdf1
-CALL_STUB_RET (__mips16_call_stub_df_1, 1, DF)
-#endif
-
-#ifdef L_m16stubdf5
-CALL_STUB_RET (__mips16_call_stub_df_5, 5, DF)
-#endif
-
-#ifdef L_m16stubdf2
-CALL_STUB_RET (__mips16_call_stub_df_2, 2, DF)
-#endif
-
-#ifdef L_m16stubdf6
-CALL_STUB_RET (__mips16_call_stub_df_6, 6, DF)
-#endif
-
-#ifdef L_m16stubdf9
-CALL_STUB_RET (__mips16_call_stub_df_9, 9, DF)
-#endif
-
-#ifdef L_m16stubdf10
-CALL_STUB_RET (__mips16_call_stub_df_10, 10, DF)
-#endif
-#endif /* !__mips_single_float */
-
-
-/* Ho hum.  Here we have the same set of functions again, this time
-   for when the function being called returns an SCmode value.  */
-
-#ifdef L_m16stubsc0
-CALL_STUB_RET (__mips16_call_stub_sc_0, 0, SC)
-#endif
-
-#ifdef L_m16stubsc1
-CALL_STUB_RET (__mips16_call_stub_sc_1, 1, SC)
-#endif
-
-#ifdef L_m16stubsc5
-CALL_STUB_RET (__mips16_call_stub_sc_5, 5, SC)
-#endif
-
-#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
-#ifdef L_m16stubsc2
-CALL_STUB_RET (__mips16_call_stub_sc_2, 2, SC)
-#endif
-
-#ifdef L_m16stubsc6
-CALL_STUB_RET (__mips16_call_stub_sc_6, 6, SC)
-#endif
-
-#ifdef L_m16stubsc9
-CALL_STUB_RET (__mips16_call_stub_sc_9, 9, SC)
-#endif
-
-#ifdef L_m16stubsc10
-CALL_STUB_RET (__mips16_call_stub_sc_10, 10, SC)
-#endif
-#endif /* !__mips_single_float */
-
-
-/* Finally, another set of functions for DCmode.  */
-
-#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
-#ifdef L_m16stubdc0
-CALL_STUB_RET (__mips16_call_stub_dc_0, 0, DC)
-#endif
-
-#ifdef L_m16stubdc1
-CALL_STUB_RET (__mips16_call_stub_dc_1, 1, DC)
-#endif
-
-#ifdef L_m16stubdc5
-CALL_STUB_RET (__mips16_call_stub_dc_5, 5, DC)
-#endif
-
-#ifdef L_m16stubdc2
-CALL_STUB_RET (__mips16_call_stub_dc_2, 2, DC)
-#endif
-
-#ifdef L_m16stubdc6
-CALL_STUB_RET (__mips16_call_stub_dc_6, 6, DC)
-#endif
-
-#ifdef L_m16stubdc9
-CALL_STUB_RET (__mips16_call_stub_dc_9, 9, DC)
-#endif
-
-#ifdef L_m16stubdc10
-CALL_STUB_RET (__mips16_call_stub_dc_10, 10, DC)
-#endif
-#endif /* !__mips_single_float */
-#endif
diff --git a/gcc/config/mips/t-libgcc-mips16 b/gcc/config/mips/t-libgcc-mips16

deleted file mode 100644 (file)

index 31a042b..0000000
--- a/gcc/config/mips/t-libgcc-mips16
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (C) 2007, 2008, 2011 Free Software Foundation, Inc.
-#
-# This file is part of GCC.
-#
-# GCC is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# GCC is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GCC; see the file COPYING3.  If not see
-# <http://www.gnu.org/licenses/>.
-
-LIB1ASMSRC = mips/mips16.S
-LIB1ASMFUNCS = _m16addsf3 _m16subsf3 _m16mulsf3 _m16divsf3 \
-       _m16eqsf2 _m16nesf2 _m16gtsf2 _m16gesf2 _m16lesf2 _m16ltsf2 \
-       _m16unordsf2 \
-       _m16fltsisf _m16fix_truncsfsi _m16fltunsisf \
-       _m16adddf3 _m16subdf3 _m16muldf3 _m16divdf3 \
-       _m16extsfdf2 _m16trdfsf2 \
-       _m16eqdf2 _m16nedf2 _m16gtdf2 _m16gedf2 _m16ledf2 _m16ltdf2 \
-       _m16unorddf2 \
-       _m16fltsidf _m16fix_truncdfsi _m16fltunsidf \
-       _m16retsf _m16retdf \
-       _m16retsc _m16retdc \
-       _m16stub1 _m16stub2 _m16stub5 _m16stub6 _m16stub9 _m16stub10 \
-       _m16stubsf0 _m16stubsf1 _m16stubsf2 _m16stubsf5 _m16stubsf6 \
-       _m16stubsf9 _m16stubsf10 \
-       _m16stubdf0 _m16stubdf1 _m16stubdf2 _m16stubdf5 _m16stubdf6 \
-       _m16stubdf9 _m16stubdf10 \
-       _m16stubsc0 _m16stubsc1 _m16stubsc2 _m16stubsc5 _m16stubsc6 \
-       _m16stubsc9 _m16stubsc10 \
-       _m16stubdc0 _m16stubdc1 _m16stubdc2 _m16stubdc5 _m16stubdc6 \
-       _m16stubdc9 _m16stubdc10
diff --git a/gcc/config/mips/t-sr71k b/gcc/config/mips/t-sr71k

index 7b8669fefd273d9f5f853d22d696340a697c4e3a..f204017faa809ec510cd7525ff965fd5ec16b2ef 100644 (file)
--- a/gcc/config/mips/t-sr71k
+++ b/gcc/config/mips/t-sr71k
@@ -16,11 +16,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-# Suppress building libgcc1.a, since the MIPS compiler port is complete
-# and does not need anything from libgcc1.a.
-LIBGCC1 =
-CROSS_LIBGCC1 =
-
  # We must build libgcc2.a with -G 0, in case the user wants to link
  # without the $gp register.
  TARGET_LIBGCC2_CFLAGS = -G 0
diff --git a/gcc/config/pa/milli64.S b/gcc/config/pa/milli64.S

deleted file mode 100644 (file)

index 2e9c4f7..0000000
--- a/gcc/config/pa/milli64.S
+++ /dev/null
@@ -1,2134 +0,0 @@
-/* 32 and 64-bit millicode, original author Hewlett-Packard
-   adapted for gcc by Paul Bame <bame@debian.org>
-   and Alan Modra <alan@linuxcare.com.au>.
-
-   Copyright 2001, 2002, 2003, 2007, 2009 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-#ifdef pa64
-        .level  2.0w
-#endif
-
-/* Hardware General Registers.  */
-r0:    .reg    %r0
-r1:    .reg    %r1
-r2:    .reg    %r2
-r3:    .reg    %r3
-r4:    .reg    %r4
-r5:    .reg    %r5
-r6:    .reg    %r6
-r7:    .reg    %r7
-r8:    .reg    %r8
-r9:    .reg    %r9
-r10:   .reg    %r10
-r11:   .reg    %r11
-r12:   .reg    %r12
-r13:   .reg    %r13
-r14:   .reg    %r14
-r15:   .reg    %r15
-r16:   .reg    %r16
-r17:   .reg    %r17
-r18:   .reg    %r18
-r19:   .reg    %r19
-r20:   .reg    %r20
-r21:   .reg    %r21
-r22:   .reg    %r22
-r23:   .reg    %r23
-r24:   .reg    %r24
-r25:   .reg    %r25
-r26:   .reg    %r26
-r27:   .reg    %r27
-r28:   .reg    %r28
-r29:   .reg    %r29
-r30:   .reg    %r30
-r31:   .reg    %r31
-
-/* Hardware Space Registers.  */
-sr0:   .reg    %sr0
-sr1:   .reg    %sr1
-sr2:   .reg    %sr2
-sr3:   .reg    %sr3
-sr4:   .reg    %sr4
-sr5:   .reg    %sr5
-sr6:   .reg    %sr6
-sr7:   .reg    %sr7
-
-/* Hardware Floating Point Registers.  */
-fr0:   .reg    %fr0
-fr1:   .reg    %fr1
-fr2:   .reg    %fr2
-fr3:   .reg    %fr3
-fr4:   .reg    %fr4
-fr5:   .reg    %fr5
-fr6:   .reg    %fr6
-fr7:   .reg    %fr7
-fr8:   .reg    %fr8
-fr9:   .reg    %fr9
-fr10:  .reg    %fr10
-fr11:  .reg    %fr11
-fr12:  .reg    %fr12
-fr13:  .reg    %fr13
-fr14:  .reg    %fr14
-fr15:  .reg    %fr15
-
-/* Hardware Control Registers.  */
-cr11:  .reg    %cr11
-sar:   .reg    %cr11   /* Shift Amount Register */
-
-/* Software Architecture General Registers.  */
-rp:    .reg    r2      /* return pointer */
-#ifdef pa64
-mrp:   .reg    r2      /* millicode return pointer */
-#else
-mrp:   .reg    r31     /* millicode return pointer */
-#endif
-ret0:  .reg    r28     /* return value */
-ret1:  .reg    r29     /* return value (high part of double) */
-sp:    .reg    r30     /* stack pointer */
-dp:    .reg    r27     /* data pointer */
-arg0:  .reg    r26     /* argument */
-arg1:  .reg    r25     /* argument or high part of double argument */
-arg2:  .reg    r24     /* argument */
-arg3:  .reg    r23     /* argument or high part of double argument */
-
-/* Software Architecture Space Registers.  */
-/*             sr0     ; return link from BLE */
-sret:  .reg    sr1     /* return value */
-sarg:  .reg    sr1     /* argument */
-/*             sr4     ; PC SPACE tracker */
-/*             sr5     ; process private data */
-
-/* Frame Offsets (millicode convention!)  Used when calling other
-   millicode routines.  Stack unwinding is dependent upon these
-   definitions.  */
-r31_slot:      .equ    -20     /* "current RP" slot */
-sr0_slot:      .equ    -16     /* "static link" slot */
-#if defined(pa64)
-mrp_slot:       .equ    -16    /* "current RP" slot */
-psp_slot:       .equ    -8     /* "previous SP" slot */
-#else
-mrp_slot:      .equ    -20     /* "current RP" slot (replacing "r31_slot") */
-#endif
-
-
-#define DEFINE(name,value)name:        .EQU    value
-#define RDEFINE(name,value)name:       .REG    value
-#ifdef milliext
-#define MILLI_BE(lbl)   BE    lbl(sr7,r0)
-#define MILLI_BEN(lbl)  BE,n  lbl(sr7,r0)
-#define MILLI_BLE(lbl) BLE   lbl(sr7,r0)
-#define MILLI_BLEN(lbl)        BLE,n lbl(sr7,r0)
-#define MILLIRETN      BE,n  0(sr0,mrp)
-#define MILLIRET       BE    0(sr0,mrp)
-#define MILLI_RETN     BE,n  0(sr0,mrp)
-#define MILLI_RET      BE    0(sr0,mrp)
-#else
-#define MILLI_BE(lbl)  B     lbl
-#define MILLI_BEN(lbl)  B,n   lbl
-#define MILLI_BLE(lbl) BL    lbl,mrp
-#define MILLI_BLEN(lbl)        BL,n  lbl,mrp
-#define MILLIRETN      BV,n  0(mrp)
-#define MILLIRET       BV    0(mrp)
-#define MILLI_RETN     BV,n  0(mrp)
-#define MILLI_RET      BV    0(mrp)
-#endif
-
-#ifdef __STDC__
-#define CAT(a,b)       a##b
-#else
-#define CAT(a,b)       a/**/b
-#endif
-
-#ifdef ELF
-#define SUBSPA_MILLI    .section .text
-#define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16
-#define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16
-#define ATTR_MILLI
-#define SUBSPA_DATA     .section .data
-#define ATTR_DATA
-#define GLOBAL          $global$
-#define GSYM(sym)       !sym:
-#define LSYM(sym)       !CAT(.L,sym:)
-#define LREF(sym)       CAT(.L,sym)
-
-#else
-
-#ifdef coff
-/* This used to be .milli but since link32 places different named
-   sections in different segments millicode ends up a long ways away
-   from .text (1meg?).  This way they will be a lot closer.
-
-   The SUBSPA_MILLI_* specify locality sets for certain millicode
-   modules in order to ensure that modules that call one another are
-   placed close together. Without locality sets this is unlikely to
-   happen because of the Dynamite linker library search algorithm. We
-   want these modules close together so that short calls always reach
-   (we don't want to require long calls or use long call stubs).  */
-
-#define SUBSPA_MILLI    .subspa .text
-#define SUBSPA_MILLI_DIV .subspa .text$dv,align=16
-#define SUBSPA_MILLI_MUL .subspa .text$mu,align=16
-#define ATTR_MILLI      .attr code,read,execute
-#define SUBSPA_DATA     .subspa .data
-#define ATTR_DATA       .attr init_data,read,write
-#define GLOBAL          _gp
-#else
-#define SUBSPA_MILLI    .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8
-#define SUBSPA_MILLI_DIV SUBSPA_MILLI
-#define SUBSPA_MILLI_MUL SUBSPA_MILLI
-#define ATTR_MILLI
-#define SUBSPA_DATA     .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero
-#define ATTR_DATA
-#define GLOBAL          $global$
-#endif
-#define SPACE_DATA      .space $PRIVATE$,spnum=1,sort=16
-
-#define GSYM(sym)       !sym
-#define LSYM(sym)       !CAT(L$,sym)
-#define LREF(sym)       CAT(L$,sym)
-#endif
-
-#ifdef L_dyncall
-       SUBSPA_MILLI
-       ATTR_DATA
-GSYM($$dyncall)
-       .export $$dyncall,millicode
-       .proc
-       .callinfo       millicode
-       .entry
-       bb,>=,n %r22,30,LREF(1)         ; branch if not plabel address
-       depi    0,31,2,%r22             ; clear the two least significant bits
-       ldw     4(%r22),%r19            ; load new LTP value
-       ldw     0(%r22),%r22            ; load address of target
-LSYM(1)
-#ifdef LINUX
-       bv      %r0(%r22)               ; branch to the real target
-#else
-       ldsid   (%sr0,%r22),%r1         ; get the "space ident" selected by r22
-       mtsp    %r1,%sr0                ; move that space identifier into sr0
-       be      0(%sr0,%r22)            ; branch to the real target
-#endif
-       stw     %r2,-24(%r30)           ; save return address into frame marker
-       .exit
-       .procend
-#endif
-
-#ifdef L_divI
-/* ROUTINES:   $$divI, $$divoI
-
-   Single precision divide for signed binary integers.
-
-   The quotient is truncated towards zero.
-   The sign of the quotient is the XOR of the signs of the dividend and
-   divisor.
-   Divide by zero is trapped.
-   Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI.
-
-   INPUT REGISTERS:
-   .   arg0 == dividend
-   .   arg1 == divisor
-   .   mrp  == return pc
-   .   sr0  == return space when called externally
-
-   OUTPUT REGISTERS:
-   .   arg0 =  undefined
-   .   arg1 =  undefined
-   .   ret1 =  quotient
-
-   OTHER REGISTERS AFFECTED:
-   .   r1   =  undefined
-
-   SIDE EFFECTS:
-   .   Causes a trap under the following conditions:
-   .           divisor is zero  (traps with ADDIT,=  0,25,0)
-   .           dividend==-2**31  and divisor==-1 and routine is $$divoI
-   .                            (traps with ADDO  26,25,0)
-   .   Changes memory at the following places:
-   .           NONE
-
-   PERMISSIBLE CONTEXT:
-   .   Unwindable.
-   .   Suitable for internal or external millicode.
-   .   Assumes the special millicode register conventions.
-
-   DISCUSSION:
-   .   Branchs to other millicode routines using BE
-   .           $$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15
-   .
-   .   For selected divisors, calls a divide by constant routine written by
-   .   Karl Pettis.  Eligible divisors are 1..15 excluding 11 and 13.
-   .
-   .   The only overflow case is -2**31 divided by -1.
-   .   Both routines return -2**31 but only $$divoI traps.  */
-
-RDEFINE(temp,r1)
-RDEFINE(retreg,ret1)   /*  r29 */
-RDEFINE(temp1,arg0)
-       SUBSPA_MILLI_DIV
-       ATTR_MILLI
-       .import $$divI_2,millicode
-       .import $$divI_3,millicode
-       .import $$divI_4,millicode
-       .import $$divI_5,millicode
-       .import $$divI_6,millicode
-       .import $$divI_7,millicode
-       .import $$divI_8,millicode
-       .import $$divI_9,millicode
-       .import $$divI_10,millicode
-       .import $$divI_12,millicode
-       .import $$divI_14,millicode
-       .import $$divI_15,millicode
-       .export $$divI,millicode
-       .export $$divoI,millicode
-       .proc
-       .callinfo       millicode
-       .entry
-GSYM($$divoI)
-       comib,=,n  -1,arg1,LREF(negative1)      /*  when divisor == -1 */
-GSYM($$divI)
-       ldo     -1(arg1),temp           /*  is there at most one bit set ? */
-       and,<>  arg1,temp,r0            /*  if not, don't use power of 2 divide */
-       addi,>  0,arg1,r0               /*  if divisor > 0, use power of 2 divide */
-       b,n     LREF(neg_denom)
-LSYM(pow2)
-       addi,>= 0,arg0,retreg           /*  if numerator is negative, add the */
-       add     arg0,temp,retreg        /*  (denominaotr -1) to correct for shifts */
-       extru,= arg1,15,16,temp         /*  test denominator with 0xffff0000 */
-       extrs   retreg,15,16,retreg     /*  retreg = retreg >> 16 */
-       or      arg1,temp,arg1          /*  arg1 = arg1 | (arg1 >> 16) */
-       ldi     0xcc,temp1              /*  setup 0xcc in temp1 */
-       extru,= arg1,23,8,temp          /*  test denominator with 0xff00 */
-       extrs   retreg,23,24,retreg     /*  retreg = retreg >> 8 */
-       or      arg1,temp,arg1          /*  arg1 = arg1 | (arg1 >> 8) */
-       ldi     0xaa,temp               /*  setup 0xaa in temp */
-       extru,= arg1,27,4,r0            /*  test denominator with 0xf0 */
-       extrs   retreg,27,28,retreg     /*  retreg = retreg >> 4 */
-       and,=   arg1,temp1,r0           /*  test denominator with 0xcc */
-       extrs   retreg,29,30,retreg     /*  retreg = retreg >> 2 */
-       and,=   arg1,temp,r0            /*  test denominator with 0xaa */
-       extrs   retreg,30,31,retreg     /*  retreg = retreg >> 1 */
-       MILLIRETN
-LSYM(neg_denom)
-       addi,<  0,arg1,r0               /*  if arg1 >= 0, it's not power of 2 */
-       b,n     LREF(regular_seq)
-       sub     r0,arg1,temp            /*  make denominator positive */
-       comb,=,n  arg1,temp,LREF(regular_seq)   /*  test against 0x80000000 and 0 */
-       ldo     -1(temp),retreg         /*  is there at most one bit set ? */
-       and,=   temp,retreg,r0          /*  if so, the denominator is power of 2 */
-       b,n     LREF(regular_seq)
-       sub     r0,arg0,retreg          /*  negate numerator */
-       comb,=,n arg0,retreg,LREF(regular_seq) /*  test against 0x80000000 */
-       copy    retreg,arg0             /*  set up arg0, arg1 and temp  */
-       copy    temp,arg1               /*  before branching to pow2 */
-       b       LREF(pow2)
-       ldo     -1(arg1),temp
-LSYM(regular_seq)
-       comib,>>=,n 15,arg1,LREF(small_divisor)
-       add,>=  0,arg0,retreg           /*  move dividend, if retreg < 0, */
-LSYM(normal)
-       subi    0,retreg,retreg         /*    make it positive */
-       sub     0,arg1,temp             /*  clear carry,  */
-                                       /*    negate the divisor */
-       ds      0,temp,0                /*  set V-bit to the comple- */
-                                       /*    ment of the divisor sign */
-       add     retreg,retreg,retreg    /*  shift msb bit into carry */
-       ds      r0,arg1,temp            /*  1st divide step, if no carry */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  2nd divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  3rd divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  4th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  5th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  6th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  7th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  8th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  9th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  10th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  11th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  12th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  13th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  14th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  15th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  16th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  17th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  18th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  19th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  20th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  21st divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  22nd divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  23rd divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  24th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  25th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  26th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  27th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  28th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  29th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  30th divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  31st divide step */
-       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds      temp,arg1,temp          /*  32nd divide step, */
-       addc    retreg,retreg,retreg    /*  shift last retreg bit into retreg */
-       xor,>=  arg0,arg1,0             /*  get correct sign of quotient */
-         sub   0,retreg,retreg         /*    based on operand signs */
-       MILLIRETN
-       nop
-
-LSYM(small_divisor)
-
-#if defined(pa64)
-/*  Clear the upper 32 bits of the arg1 register.  We are working with */
-/*  small divisors (and 32-bit integers)   We must not be mislead  */
-/*  by "1" bits left in the upper 32 bits.  */
-       depd %r0,31,32,%r25
-#endif
-       blr,n   arg1,r0
-       nop
-/*  table for divisor == 0,1, ... ,15 */
-       addit,= 0,arg1,r0       /*  trap if divisor == 0 */
-       nop
-       MILLIRET                /*  divisor == 1 */
-       copy    arg0,retreg
-       MILLI_BEN($$divI_2)     /*  divisor == 2 */
-       nop
-       MILLI_BEN($$divI_3)     /*  divisor == 3 */
-       nop
-       MILLI_BEN($$divI_4)     /*  divisor == 4 */
-       nop
-       MILLI_BEN($$divI_5)     /*  divisor == 5 */
-       nop
-       MILLI_BEN($$divI_6)     /*  divisor == 6 */
-       nop
-       MILLI_BEN($$divI_7)     /*  divisor == 7 */
-       nop
-       MILLI_BEN($$divI_8)     /*  divisor == 8 */
-       nop
-       MILLI_BEN($$divI_9)     /*  divisor == 9 */
-       nop
-       MILLI_BEN($$divI_10)    /*  divisor == 10 */
-       nop
-       b       LREF(normal)            /*  divisor == 11 */
-       add,>=  0,arg0,retreg
-       MILLI_BEN($$divI_12)    /*  divisor == 12 */
-       nop
-       b       LREF(normal)            /*  divisor == 13 */
-       add,>=  0,arg0,retreg
-       MILLI_BEN($$divI_14)    /*  divisor == 14 */
-       nop
-       MILLI_BEN($$divI_15)    /*  divisor == 15 */
-       nop
-
-LSYM(negative1)
-       sub     0,arg0,retreg   /*  result is negation of dividend */
-       MILLIRET
-       addo    arg0,arg1,r0    /*  trap iff dividend==0x80000000 && divisor==-1 */
-       .exit
-       .procend
-       .end
-#endif
-
-#ifdef L_divU
-/* ROUTINE:    $$divU
-   .
-   .   Single precision divide for unsigned integers.
-   .
-   .   Quotient is truncated towards zero.
-   .   Traps on divide by zero.
-
-   INPUT REGISTERS:
-   .   arg0 == dividend
-   .   arg1 == divisor
-   .   mrp  == return pc
-   .   sr0  == return space when called externally
-
-   OUTPUT REGISTERS:
-   .   arg0 =  undefined
-   .   arg1 =  undefined
-   .   ret1 =  quotient
-
-   OTHER REGISTERS AFFECTED:
-   .   r1   =  undefined
-
-   SIDE EFFECTS:
-   .   Causes a trap under the following conditions:
-   .           divisor is zero
-   .   Changes memory at the following places:
-   .           NONE
-
-   PERMISSIBLE CONTEXT:
-   .   Unwindable.
-   .   Does not create a stack frame.
-   .   Suitable for internal or external millicode.
-   .   Assumes the special millicode register conventions.
-
-   DISCUSSION:
-   .   Branchs to other millicode routines using BE:
-   .           $$divU_# for 3,5,6,7,9,10,12,14,15
-   .
-   .   For selected small divisors calls the special divide by constant
-   .   routines written by Karl Pettis.  These are: 3,5,6,7,9,10,12,14,15.  */
-
-RDEFINE(temp,r1)
-RDEFINE(retreg,ret1)   /* r29 */
-RDEFINE(temp1,arg0)
-       SUBSPA_MILLI_DIV
-       ATTR_MILLI
-       .export $$divU,millicode
-       .import $$divU_3,millicode
-       .import $$divU_5,millicode
-       .import $$divU_6,millicode
-       .import $$divU_7,millicode
-       .import $$divU_9,millicode
-       .import $$divU_10,millicode
-       .import $$divU_12,millicode
-       .import $$divU_14,millicode
-       .import $$divU_15,millicode
-       .proc
-       .callinfo       millicode
-       .entry
-GSYM($$divU)
-/* The subtract is not nullified since it does no harm and can be used
-   by the two cases that branch back to "normal".  */
-       ldo     -1(arg1),temp           /* is there at most one bit set ? */
-       and,=   arg1,temp,r0            /* if so, denominator is power of 2 */
-       b       LREF(regular_seq)
-       addit,= 0,arg1,0                /* trap for zero dvr */
-       copy    arg0,retreg
-       extru,= arg1,15,16,temp         /* test denominator with 0xffff0000 */
-       extru   retreg,15,16,retreg     /* retreg = retreg >> 16 */
-       or      arg1,temp,arg1          /* arg1 = arg1 | (arg1 >> 16) */
-       ldi     0xcc,temp1              /* setup 0xcc in temp1 */
-       extru,= arg1,23,8,temp          /* test denominator with 0xff00 */
-       extru   retreg,23,24,retreg     /* retreg = retreg >> 8 */
-       or      arg1,temp,arg1          /* arg1 = arg1 | (arg1 >> 8) */
-       ldi     0xaa,temp               /* setup 0xaa in temp */
-       extru,= arg1,27,4,r0            /* test denominator with 0xf0 */
-       extru   retreg,27,28,retreg     /* retreg = retreg >> 4 */
-       and,=   arg1,temp1,r0           /* test denominator with 0xcc */
-       extru   retreg,29,30,retreg     /* retreg = retreg >> 2 */
-       and,=   arg1,temp,r0            /* test denominator with 0xaa */
-       extru   retreg,30,31,retreg     /* retreg = retreg >> 1 */
-       MILLIRETN
-       nop     
-LSYM(regular_seq)
-       comib,>=  15,arg1,LREF(special_divisor)
-       subi    0,arg1,temp             /* clear carry, negate the divisor */
-       ds      r0,temp,r0              /* set V-bit to 1 */
-LSYM(normal)
-       add     arg0,arg0,retreg        /* shift msb bit into carry */
-       ds      r0,arg1,temp            /* 1st divide step, if no carry */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 2nd divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 3rd divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 4th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 5th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 6th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 7th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 8th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 9th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 10th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 11th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 12th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 13th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 14th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 15th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 16th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 17th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 18th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 19th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 20th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 21st divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 22nd divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 23rd divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 24th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 25th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 26th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 27th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 28th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 29th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 30th divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 31st divide step */
-       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
-       ds      temp,arg1,temp          /* 32nd divide step, */
-       MILLIRET
-       addc    retreg,retreg,retreg    /* shift last retreg bit into retreg */
-
-/* Handle the cases where divisor is a small constant or has high bit on.  */
-LSYM(special_divisor)
-/*     blr     arg1,r0 */
-/*     comib,>,n  0,arg1,LREF(big_divisor) ; nullify previous instruction */
-
-/* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from
-   generating such a blr, comib sequence. A problem in nullification. So I
-   rewrote this code.  */
-
-#if defined(pa64)
-/* Clear the upper 32 bits of the arg1 register.  We are working with
-   small divisors (and 32-bit unsigned integers)   We must not be mislead
-   by "1" bits left in the upper 32 bits.  */
-       depd %r0,31,32,%r25
-#endif
-       comib,> 0,arg1,LREF(big_divisor)
-       nop
-       blr     arg1,r0
-       nop
-
-LSYM(zero_divisor)     /* this label is here to provide external visibility */
-       addit,= 0,arg1,0                /* trap for zero dvr */
-       nop
-       MILLIRET                        /* divisor == 1 */
-       copy    arg0,retreg
-       MILLIRET                        /* divisor == 2 */
-       extru   arg0,30,31,retreg
-       MILLI_BEN($$divU_3)             /* divisor == 3 */
-       nop
-       MILLIRET                        /* divisor == 4 */
-       extru   arg0,29,30,retreg
-       MILLI_BEN($$divU_5)             /* divisor == 5 */
-       nop
-       MILLI_BEN($$divU_6)             /* divisor == 6 */
-       nop
-       MILLI_BEN($$divU_7)             /* divisor == 7 */
-       nop
-       MILLIRET                        /* divisor == 8 */
-       extru   arg0,28,29,retreg
-       MILLI_BEN($$divU_9)             /* divisor == 9 */
-       nop
-       MILLI_BEN($$divU_10)            /* divisor == 10 */
-       nop
-       b       LREF(normal)            /* divisor == 11 */
-       ds      r0,temp,r0              /* set V-bit to 1 */
-       MILLI_BEN($$divU_12)            /* divisor == 12 */
-       nop
-       b       LREF(normal)            /* divisor == 13 */
-       ds      r0,temp,r0              /* set V-bit to 1 */
-       MILLI_BEN($$divU_14)            /* divisor == 14 */
-       nop
-       MILLI_BEN($$divU_15)            /* divisor == 15 */
-       nop
-
-/* Handle the case where the high bit is on in the divisor.
-   Compute:    if( dividend>=divisor) quotient=1; else quotient=0;
-   Note:       dividend>==divisor iff dividend-divisor does not borrow
-   and         not borrow iff carry.  */
-LSYM(big_divisor)
-       sub     arg0,arg1,r0
-       MILLIRET
-       addc    r0,r0,retreg
-       .exit
-       .procend
-       .end
-#endif
-
-#ifdef L_remI
-/* ROUTINE:    $$remI
-
-   DESCRIPTION:
-   .   $$remI returns the remainder of the division of two signed 32-bit
-   .   integers.  The sign of the remainder is the same as the sign of
-   .   the dividend.
-
-
-   INPUT REGISTERS:
-   .   arg0 == dividend
-   .   arg1 == divisor
-   .   mrp  == return pc
-   .   sr0  == return space when called externally
-
-   OUTPUT REGISTERS:
-   .   arg0 = destroyed
-   .   arg1 = destroyed
-   .   ret1 = remainder
-
-   OTHER REGISTERS AFFECTED:
-   .   r1   = undefined
-
-   SIDE EFFECTS:
-   .   Causes a trap under the following conditions:  DIVIDE BY ZERO
-   .   Changes memory at the following places:  NONE
-
-   PERMISSIBLE CONTEXT:
-   .   Unwindable
-   .   Does not create a stack frame
-   .   Is usable for internal or external microcode
-
-   DISCUSSION:
-   .   Calls other millicode routines via mrp:  NONE
-   .   Calls other millicode routines:  NONE  */
-
-RDEFINE(tmp,r1)
-RDEFINE(retreg,ret1)
-
-       SUBSPA_MILLI
-       ATTR_MILLI
-       .proc
-       .callinfo millicode
-       .entry
-GSYM($$remI)
-GSYM($$remoI)
-       .export $$remI,MILLICODE
-       .export $$remoI,MILLICODE
-       ldo             -1(arg1),tmp            /*  is there at most one bit set ? */
-       and,<>          arg1,tmp,r0             /*  if not, don't use power of 2 */
-       addi,>          0,arg1,r0               /*  if denominator > 0, use power */
-                                               /*  of 2 */
-       b,n             LREF(neg_denom)
-LSYM(pow2)
-       comb,>,n        0,arg0,LREF(neg_num)    /*  is numerator < 0 ? */
-       and             arg0,tmp,retreg         /*  get the result */
-       MILLIRETN
-LSYM(neg_num)
-       subi            0,arg0,arg0             /*  negate numerator */
-       and             arg0,tmp,retreg         /*  get the result */
-       subi            0,retreg,retreg         /*  negate result */
-       MILLIRETN
-LSYM(neg_denom)
-       addi,<          0,arg1,r0               /*  if arg1 >= 0, it's not power */
-                                               /*  of 2 */
-       b,n             LREF(regular_seq)
-       sub             r0,arg1,tmp             /*  make denominator positive */
-       comb,=,n        arg1,tmp,LREF(regular_seq) /*  test against 0x80000000 and 0 */
-       ldo             -1(tmp),retreg          /*  is there at most one bit set ? */
-       and,=           tmp,retreg,r0           /*  if not, go to regular_seq */
-       b,n             LREF(regular_seq)
-       comb,>,n        0,arg0,LREF(neg_num_2)  /*  if arg0 < 0, negate it  */
-       and             arg0,retreg,retreg
-       MILLIRETN
-LSYM(neg_num_2)
-       subi            0,arg0,tmp              /*  test against 0x80000000 */
-       and             tmp,retreg,retreg
-       subi            0,retreg,retreg
-       MILLIRETN
-LSYM(regular_seq)
-       addit,=         0,arg1,0                /*  trap if div by zero */
-       add,>=          0,arg0,retreg           /*  move dividend, if retreg < 0, */
-       sub             0,retreg,retreg         /*    make it positive */
-       sub             0,arg1, tmp             /*  clear carry,  */
-                                               /*    negate the divisor */
-       ds              0, tmp,0                /*  set V-bit to the comple- */
-                                               /*    ment of the divisor sign */
-       or              0,0, tmp                /*  clear  tmp */
-       add             retreg,retreg,retreg    /*  shift msb bit into carry */
-       ds               tmp,arg1, tmp          /*  1st divide step, if no carry */
-                                               /*    out, msb of quotient = 0 */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-LSYM(t1)
-       ds               tmp,arg1, tmp          /*  2nd divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  3rd divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  4th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  5th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  6th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  7th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  8th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  9th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  10th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  11th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  12th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  13th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  14th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  15th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  16th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  17th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  18th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  19th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  20th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  21st divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  22nd divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  23rd divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  24th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  25th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  26th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  27th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  28th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  29th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  30th divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  31st divide step */
-       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
-       ds               tmp,arg1, tmp          /*  32nd divide step, */
-       addc            retreg,retreg,retreg    /*  shift last bit into retreg */
-       movb,>=,n        tmp,retreg,LREF(finish) /*  branch if pos.  tmp */
-       add,<           arg1,0,0                /*  if arg1 > 0, add arg1 */
-       add,tr           tmp,arg1,retreg        /*    for correcting remainder tmp */
-       sub              tmp,arg1,retreg        /*  else add absolute value arg1 */
-LSYM(finish)
-       add,>=          arg0,0,0                /*  set sign of remainder */
-       sub             0,retreg,retreg         /*    to sign of dividend */
-       MILLIRET
-       nop
-       .exit
-       .procend
-#ifdef milliext
-       .origin 0x00000200
-#endif
-       .end
-#endif
-
-#ifdef L_remU
-/* ROUTINE:    $$remU
-   .   Single precision divide for remainder with unsigned binary integers.
-   .
-   .   The remainder must be dividend-(dividend/divisor)*divisor.
-   .   Divide by zero is trapped.
-
-   INPUT REGISTERS:
-   .   arg0 == dividend
-   .   arg1 == divisor
-   .   mrp  == return pc
-   .   sr0  == return space when called externally
-
-   OUTPUT REGISTERS:
-   .   arg0 =  undefined
-   .   arg1 =  undefined
-   .   ret1 =  remainder
-
-   OTHER REGISTERS AFFECTED:
-   .   r1   =  undefined
-
-   SIDE EFFECTS:
-   .   Causes a trap under the following conditions:  DIVIDE BY ZERO
-   .   Changes memory at the following places:  NONE
-
-   PERMISSIBLE CONTEXT:
-   .   Unwindable.
-   .   Does not create a stack frame.
-   .   Suitable for internal or external millicode.
-   .   Assumes the special millicode register conventions.
-
-   DISCUSSION:
-   .   Calls other millicode routines using mrp: NONE
-   .   Calls other millicode routines: NONE  */
-
-
-RDEFINE(temp,r1)
-RDEFINE(rmndr,ret1)    /*  r29 */
-       SUBSPA_MILLI
-       ATTR_MILLI
-       .export $$remU,millicode
-       .proc
-       .callinfo       millicode
-       .entry
-GSYM($$remU)
-       ldo     -1(arg1),temp           /*  is there at most one bit set ? */
-       and,=   arg1,temp,r0            /*  if not, don't use power of 2 */
-       b       LREF(regular_seq)
-       addit,= 0,arg1,r0               /*  trap on div by zero */
-       and     arg0,temp,rmndr         /*  get the result for power of 2 */
-       MILLIRETN
-LSYM(regular_seq)
-       comib,>=,n  0,arg1,LREF(special_case)
-       subi    0,arg1,rmndr            /*  clear carry, negate the divisor */
-       ds      r0,rmndr,r0             /*  set V-bit to 1 */
-       add     arg0,arg0,temp          /*  shift msb bit into carry */
-       ds      r0,arg1,rmndr           /*  1st divide step, if no carry */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  2nd divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  3rd divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  4th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  5th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  6th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  7th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  8th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  9th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  10th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  11th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  12th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  13th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  14th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  15th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  16th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  17th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  18th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  19th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  20th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  21st divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  22nd divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  23rd divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  24th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  25th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  26th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  27th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  28th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  29th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  30th divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  31st divide step */
-       addc    temp,temp,temp          /*  shift temp with/into carry */
-       ds      rmndr,arg1,rmndr                /*  32nd divide step, */
-       comiclr,<= 0,rmndr,r0
-         add   rmndr,arg1,rmndr        /*  correction */
-       MILLIRETN
-       nop
-
-/* Putting >= on the last DS and deleting COMICLR does not work!  */
-LSYM(special_case)
-       sub,>>= arg0,arg1,rmndr
-         copy  arg0,rmndr
-       MILLIRETN
-       nop
-       .exit
-       .procend
-       .end
-#endif
-
-#ifdef L_div_const
-/* ROUTINE:    $$divI_2
-   .           $$divI_3        $$divU_3
-   .           $$divI_4
-   .           $$divI_5        $$divU_5
-   .           $$divI_6        $$divU_6
-   .           $$divI_7        $$divU_7
-   .           $$divI_8
-   .           $$divI_9        $$divU_9
-   .           $$divI_10       $$divU_10
-   .
-   .           $$divI_12       $$divU_12
-   .
-   .           $$divI_14       $$divU_14
-   .           $$divI_15       $$divU_15
-   .           $$divI_16
-   .           $$divI_17       $$divU_17
-   .
-   .   Divide by selected constants for single precision binary integers.
-
-   INPUT REGISTERS:
-   .   arg0 == dividend
-   .   mrp  == return pc
-   .   sr0  == return space when called externally
-
-   OUTPUT REGISTERS:
-   .   arg0 =  undefined
-   .   arg1 =  undefined
-   .   ret1 =  quotient
-
-   OTHER REGISTERS AFFECTED:
-   .   r1   =  undefined
-
-   SIDE EFFECTS:
-   .   Causes a trap under the following conditions: NONE
-   .   Changes memory at the following places:  NONE
-
-   PERMISSIBLE CONTEXT:
-   .   Unwindable.
-   .   Does not create a stack frame.
-   .   Suitable for internal or external millicode.
-   .   Assumes the special millicode register conventions.
-
-   DISCUSSION:
-   .   Calls other millicode routines using mrp:  NONE
-   .   Calls other millicode routines:  NONE  */
-
-
-/* TRUNCATED DIVISION BY SMALL INTEGERS
-
-   We are interested in q(x) = floor(x/y), where x >= 0 and y > 0
-   (with y fixed).
-
-   Let a = floor(z/y), for some choice of z.  Note that z will be
-   chosen so that division by z is cheap.
-
-   Let r be the remainder(z/y).  In other words, r = z - ay.
-
-   Now, our method is to choose a value for b such that
-
-   q'(x) = floor((ax+b)/z)
-
-   is equal to q(x) over as large a range of x as possible.  If the
-   two are equal over a sufficiently large range, and if it is easy to
-   form the product (ax), and it is easy to divide by z, then we can
-   perform the division much faster than the general division algorithm.
-
-   So, we want the following to be true:
-
-   .   For x in the following range:
-   .
-   .       ky <= x < (k+1)y
-   .
-   .   implies that
-   .
-   .       k <= (ax+b)/z < (k+1)
-
-   We want to determine b such that this is true for all k in the
-   range {0..K} for some maximum K.
-
-   Since (ax+b) is an increasing function of x, we can take each
-   bound separately to determine the "best" value for b.
-
-   (ax+b)/z < (k+1)           implies
-
-   (a((k+1)y-1)+b < (k+1)z     implies
-
-   b < a + (k+1)(z-ay)        implies
-
-   b < a + (k+1)r
-
-   This needs to be true for all k in the range {0..K}.  In
-   particular, it is true for k = 0 and this leads to a maximum
-   acceptable value for b.
-
-   b < a+r   or   b <= a+r-1
-
-   Taking the other bound, we have
-
-   k <= (ax+b)/z              implies
-
-   k <= (aky+b)/z             implies
-
-   k(z-ay) <= b                       implies
-
-   kr <= b
-
-   Clearly, the largest range for k will be achieved by maximizing b,
-   when r is not zero. When r is zero, then the simplest choice for b
-   is 0.  When r is not 0, set
-
-   .   b = a+r-1
-
-   Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)
-   for all x in the range:
-
-   .   0 <= x < (K+1)y
-
-   We need to determine what K is.  Of our two bounds,
-
-   .   b < a+(k+1)r    is satisfied for all k >= 0, by construction.
-
-   The other bound is
-
-   .   kr <= b
-
-   This is always true if r = 0.  If r is not 0 (the usual case), then
-   K = floor((a+r-1)/r), is the maximum value for k.
-
-   Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct
-   answer for q(x) = floor(x/y) when x is in the range
-
-   (0,(K+1)y-1)               K = floor((a+r-1)/r)
-
-   To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that
-   the formula for q'(x) yields the correct value of q(x) for all x
-   representable by a single word in HPPA.
-
-   We are also constrained in that computing the product (ax), adding
-   b, and dividing by z must all be done quickly, otherwise we will be
-   better off going through the general algorithm using the DS
-   instruction, which uses approximately 70 cycles.
-
-   For each y, there is a choice of z which satisfies the constraints
-   for (K+1)y >= 2**32.  We may not, however, be able to satisfy the
-   timing constraints for arbitrary y. It seems that z being equal to
-   a power of 2 or a power of 2 minus 1 is as good as we can do, since
-   it minimizes the time to do division by z.  We want the choice of z
-   to also result in a value for (a) that minimizes the computation of
-   the product (ax).  This is best achieved if (a) has a regular bit
-   pattern (so the multiplication can be done with shifts and adds).
-   The value of (a) also needs to be less than 2**32 so the product is
-   always guaranteed to fit in 2 words.
-
-   In actual practice, the following should be done:
-
-   1) For negative x, you should take the absolute value and remember
-   .  the fact so that the result can be negated.  This obviously does
-   .  not apply in the unsigned case.
-   2) For even y, you should factor out the power of 2 that divides y
-   .  and divide x by it.  You can then proceed by dividing by the
-   .  odd factor of y.
-
-   Here is a table of some odd values of y, and corresponding choices
-   for z which are "good".
-
-    y    z       r      a (hex)     max x (hex)
-
-    3  2**32     1     55555555      100000001
-    5  2**32     1     33333333      100000003
-    7  2**24-1   0       249249     (infinite)
-    9  2**24-1   0       1c71c7     (infinite)
-   11  2**20-1   0        1745d     (infinite)
-   13  2**24-1   0       13b13b     (infinite)
-   15  2**32     1     11111111      10000000d
-   17  2**32     1      f0f0f0f      10000000f
-
-   If r is 1, then b = a+r-1 = a.  This simplifies the computation
-   of (ax+b), since you can compute (x+1)(a) instead.  If r is 0,
-   then b = 0 is ok to use which simplifies (ax+b).
-
-   The bit patterns for 55555555, 33333333, and 11111111 are obviously
-   very regular.  The bit patterns for the other values of a above are:
-
-    y     (hex)          (binary)
-
-    7    249249  001001001001001001001001  << regular >>
-    9    1c71c7  000111000111000111000111  << regular >>
-   11     1745d  000000010111010001011101  << irregular >>
-   13    13b13b  000100111011000100111011  << irregular >>
-
-   The bit patterns for (a) corresponding to (y) of 11 and 13 may be
-   too irregular to warrant using this method.
-
-   When z is a power of 2 minus 1, then the division by z is slightly
-   more complicated, involving an iterative solution.
-
-   The code presented here solves division by 1 through 17, except for
-   11 and 13. There are algorithms for both signed and unsigned
-   quantities given.
-
-   TIMINGS (cycles)
-
-   divisor  positive  negative unsigned
-
-   .   1       2          2         2
-   .   2       4          4         2
-   .   3       19        21        19
-   .   4       4          4         2
-   .   5       18        22        19
-   .   6       19        22        19
-   .   8       4          4         2
-   .  10       18        19        17
-   .  12       18        20        18
-   .  15       16        18        16
-   .  16       4          4         2
-   .  17       16        18        16
-
-   Now, the algorithm for 7, 9, and 14 is an iterative one.  That is,
-   a loop body is executed until the tentative quotient is 0.  The
-   number of times the loop body is executed varies depending on the
-   dividend, but is never more than two times. If the dividend is
-   less than the divisor, then the loop body is not executed at all.
-   Each iteration adds 4 cycles to the timings.
-
-   divisor  positive  negative unsigned
-
-   .   7       19+4n    20+4n     20+4n    n = number of iterations
-   .   9       21+4n    22+4n     21+4n
-   .  14       21+4n    22+4n     20+4n
-
-   To give an idea of how the number of iterations varies, here is a
-   table of dividend versus number of iterations when dividing by 7.
-
-   smallest     largest       required
-   dividend    dividend      iterations
-
-   .   0            6              0
-   .   7        0x6ffffff          1
-   0x1000006   0xffffffff          2
-
-   There is some overlap in the range of numbers requiring 1 and 2
-   iterations. */
-
-RDEFINE(t2,r1)
-RDEFINE(x2,arg0)       /*  r26 */
-RDEFINE(t1,arg1)       /*  r25 */
-RDEFINE(x1,ret1)       /*  r29 */
-
-       SUBSPA_MILLI_DIV
-       ATTR_MILLI
-
-       .proc
-       .callinfo       millicode
-       .entry
-/* NONE of these routines require a stack frame
-   ALL of these routines are unwindable from millicode */
-
-GSYM($$divide_by_constant)
-       .export $$divide_by_constant,millicode
-/*  Provides a "nice" label for the code covered by the unwind descriptor
-    for things like gprof.  */
-
-/* DIVISION BY 2 (shift by 1) */
-GSYM($$divI_2)
-       .export         $$divI_2,millicode
-       comclr,>=       arg0,0,0
-       addi            1,arg0,arg0
-       MILLIRET
-       extrs           arg0,30,31,ret1
-
-
-/* DIVISION BY 4 (shift by 2) */
-GSYM($$divI_4)
-       .export         $$divI_4,millicode
-       comclr,>=       arg0,0,0
-       addi            3,arg0,arg0
-       MILLIRET
-       extrs           arg0,29,30,ret1
-
-
-/* DIVISION BY 8 (shift by 3) */
-GSYM($$divI_8)
-       .export         $$divI_8,millicode
-       comclr,>=       arg0,0,0
-       addi            7,arg0,arg0
-       MILLIRET
-       extrs           arg0,28,29,ret1
-
-/* DIVISION BY 16 (shift by 4) */
-GSYM($$divI_16)
-       .export         $$divI_16,millicode
-       comclr,>=       arg0,0,0
-       addi            15,arg0,arg0
-       MILLIRET
-       extrs           arg0,27,28,ret1
-
-/****************************************************************************
-*
-*      DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these
-*
-*      includes 3,5,15,17 and also 6,10,12
-*
-****************************************************************************/
-
-/* DIVISION BY 3 (use z = 2**32; a = 55555555) */
-
-GSYM($$divI_3)
-       .export         $$divI_3,millicode
-       comb,<,N        x2,0,LREF(neg3)
-
-       addi            1,x2,x2         /* this cannot overflow */
-       extru           x2,1,2,x1       /* multiply by 5 to get started */
-       sh2add          x2,x2,x2
-       b               LREF(pos)
-       addc            x1,0,x1
-
-LSYM(neg3)
-       subi            1,x2,x2         /* this cannot overflow */
-       extru           x2,1,2,x1       /* multiply by 5 to get started */
-       sh2add          x2,x2,x2
-       b               LREF(neg)
-       addc            x1,0,x1
-
-GSYM($$divU_3)
-       .export         $$divU_3,millicode
-       addi            1,x2,x2         /* this CAN overflow */
-       addc            0,0,x1
-       shd             x1,x2,30,t1     /* multiply by 5 to get started */
-       sh2add          x2,x2,x2
-       b               LREF(pos)
-       addc            x1,t1,x1
-
-/* DIVISION BY 5 (use z = 2**32; a = 33333333) */
-
-GSYM($$divI_5)
-       .export         $$divI_5,millicode
-       comb,<,N        x2,0,LREF(neg5)
-
-       addi            3,x2,t1         /* this cannot overflow */
-       sh1add          x2,t1,x2        /* multiply by 3 to get started */
-       b               LREF(pos)
-       addc            0,0,x1
-
-LSYM(neg5)
-       sub             0,x2,x2         /* negate x2                    */
-       addi            1,x2,x2         /* this cannot overflow */
-       shd             0,x2,31,x1      /* get top bit (can be 1)       */
-       sh1add          x2,x2,x2        /* multiply by 3 to get started */
-       b               LREF(neg)
-       addc            x1,0,x1
-
-GSYM($$divU_5)
-       .export         $$divU_5,millicode
-       addi            1,x2,x2         /* this CAN overflow */
-       addc            0,0,x1
-       shd             x1,x2,31,t1     /* multiply by 3 to get started */
-       sh1add          x2,x2,x2
-       b               LREF(pos)
-       addc            t1,x1,x1
-
-/* DIVISION BY 6 (shift to divide by 2 then divide by 3) */
-GSYM($$divI_6)
-       .export         $$divI_6,millicode
-       comb,<,N        x2,0,LREF(neg6)
-       extru           x2,30,31,x2     /* divide by 2                  */
-       addi            5,x2,t1         /* compute 5*(x2+1) = 5*x2+5    */
-       sh2add          x2,t1,x2        /* multiply by 5 to get started */
-       b               LREF(pos)
-       addc            0,0,x1
-
-LSYM(neg6)
-       subi            2,x2,x2         /* negate, divide by 2, and add 1 */
-                                       /* negation and adding 1 are done */
-                                       /* at the same time by the SUBI   */
-       extru           x2,30,31,x2
-       shd             0,x2,30,x1
-       sh2add          x2,x2,x2        /* multiply by 5 to get started */
-       b               LREF(neg)
-       addc            x1,0,x1
-
-GSYM($$divU_6)
-       .export         $$divU_6,millicode
-       extru           x2,30,31,x2     /* divide by 2 */
-       addi            1,x2,x2         /* cannot carry */
-       shd             0,x2,30,x1      /* multiply by 5 to get started */
-       sh2add          x2,x2,x2
-       b               LREF(pos)
-       addc            x1,0,x1
-
-/* DIVISION BY 10 (shift to divide by 2 then divide by 5) */
-GSYM($$divU_10)
-       .export         $$divU_10,millicode
-       extru           x2,30,31,x2     /* divide by 2 */
-       addi            3,x2,t1         /* compute 3*(x2+1) = (3*x2)+3  */
-       sh1add          x2,t1,x2        /* multiply by 3 to get started */
-       addc            0,0,x1
-LSYM(pos)
-       shd             x1,x2,28,t1     /* multiply by 0x11 */
-       shd             x2,0,28,t2
-       add             x2,t2,x2
-       addc            x1,t1,x1
-LSYM(pos_for_17)
-       shd             x1,x2,24,t1     /* multiply by 0x101 */
-       shd             x2,0,24,t2
-       add             x2,t2,x2
-       addc            x1,t1,x1
-
-       shd             x1,x2,16,t1     /* multiply by 0x10001 */
-       shd             x2,0,16,t2
-       add             x2,t2,x2
-       MILLIRET
-       addc            x1,t1,x1
-
-GSYM($$divI_10)
-       .export         $$divI_10,millicode
-       comb,<          x2,0,LREF(neg10)
-       copy            0,x1
-       extru           x2,30,31,x2     /* divide by 2 */
-       addib,TR        1,x2,LREF(pos)  /* add 1 (cannot overflow)     */
-       sh1add          x2,x2,x2        /* multiply by 3 to get started */
-
-LSYM(neg10)
-       subi            2,x2,x2         /* negate, divide by 2, and add 1 */
-                                       /* negation and adding 1 are done */
-                                       /* at the same time by the SUBI   */
-       extru           x2,30,31,x2
-       sh1add          x2,x2,x2        /* multiply by 3 to get started */
-LSYM(neg)
-       shd             x1,x2,28,t1     /* multiply by 0x11 */
-       shd             x2,0,28,t2
-       add             x2,t2,x2
-       addc            x1,t1,x1
-LSYM(neg_for_17)
-       shd             x1,x2,24,t1     /* multiply by 0x101 */
-       shd             x2,0,24,t2
-       add             x2,t2,x2
-       addc            x1,t1,x1
-
-       shd             x1,x2,16,t1     /* multiply by 0x10001 */
-       shd             x2,0,16,t2
-       add             x2,t2,x2
-       addc            x1,t1,x1
-       MILLIRET
-       sub             0,x1,x1
-
-/* DIVISION BY 12 (shift to divide by 4 then divide by 3) */
-GSYM($$divI_12)
-       .export         $$divI_12,millicode
-       comb,<          x2,0,LREF(neg12)
-       copy            0,x1
-       extru           x2,29,30,x2     /* divide by 4                  */
-       addib,tr        1,x2,LREF(pos)  /* compute 5*(x2+1) = 5*x2+5    */
-       sh2add          x2,x2,x2        /* multiply by 5 to get started */
-
-LSYM(neg12)
-       subi            4,x2,x2         /* negate, divide by 4, and add 1 */
-                                       /* negation and adding 1 are done */
-                                       /* at the same time by the SUBI   */
-       extru           x2,29,30,x2
-       b               LREF(neg)
-       sh2add          x2,x2,x2        /* multiply by 5 to get started */
-
-GSYM($$divU_12)
-       .export         $$divU_12,millicode
-       extru           x2,29,30,x2     /* divide by 4   */
-       addi            5,x2,t1         /* cannot carry */
-       sh2add          x2,t1,x2        /* multiply by 5 to get started */
-       b               LREF(pos)
-       addc            0,0,x1
-
-/* DIVISION BY 15 (use z = 2**32; a = 11111111) */
-GSYM($$divI_15)
-       .export         $$divI_15,millicode
-       comb,<          x2,0,LREF(neg15)
-       copy            0,x1
-       addib,tr        1,x2,LREF(pos)+4
-       shd             x1,x2,28,t1
-
-LSYM(neg15)
-       b               LREF(neg)
-       subi            1,x2,x2
-
-GSYM($$divU_15)
-       .export         $$divU_15,millicode
-       addi            1,x2,x2         /* this CAN overflow */
-       b               LREF(pos)
-       addc            0,0,x1
-
-/* DIVISION BY 17 (use z = 2**32; a =  f0f0f0f) */
-GSYM($$divI_17)
-       .export         $$divI_17,millicode
-       comb,<,n        x2,0,LREF(neg17)
-       addi            1,x2,x2         /* this cannot overflow */
-       shd             0,x2,28,t1      /* multiply by 0xf to get started */
-       shd             x2,0,28,t2
-       sub             t2,x2,x2
-       b               LREF(pos_for_17)
-       subb            t1,0,x1
-
-LSYM(neg17)
-       subi            1,x2,x2         /* this cannot overflow */
-       shd             0,x2,28,t1      /* multiply by 0xf to get started */
-       shd             x2,0,28,t2
-       sub             t2,x2,x2
-       b               LREF(neg_for_17)
-       subb            t1,0,x1
-
-GSYM($$divU_17)
-       .export         $$divU_17,millicode
-       addi            1,x2,x2         /* this CAN overflow */
-       addc            0,0,x1
-       shd             x1,x2,28,t1     /* multiply by 0xf to get started */
-LSYM(u17)
-       shd             x2,0,28,t2
-       sub             t2,x2,x2
-       b               LREF(pos_for_17)
-       subb            t1,x1,x1
-
-
-/* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these
-   includes 7,9 and also 14
-
-
-   z = 2**24-1
-   r = z mod x = 0
-
-   so choose b = 0
-
-   Also, in order to divide by z = 2**24-1, we approximate by dividing
-   by (z+1) = 2**24 (which is easy), and then correcting.
-
-   (ax) = (z+1)q' + r
-   .   = zq' + (q'+r)
-
-   So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)
-   Then the true remainder of (ax)/z is (q'+r).  Repeat the process
-   with this new remainder, adding the tentative quotients together,
-   until a tentative quotient is 0 (and then we are done).  There is
-   one last correction to be done.  It is possible that (q'+r) = z.
-   If so, then (q'+r)/(z+1) = 0 and it looks like we are done. But,
-   in fact, we need to add 1 more to the quotient.  Now, it turns
-   out that this happens if and only if the original value x is
-   an exact multiple of y.  So, to avoid a three instruction test at
-   the end, instead use 1 instruction to add 1 to x at the beginning.  */
-
-/* DIVISION BY 7 (use z = 2**24-1; a = 249249) */
-GSYM($$divI_7)
-       .export         $$divI_7,millicode
-       comb,<,n        x2,0,LREF(neg7)
-LSYM(7)
-       addi            1,x2,x2         /* cannot overflow */
-       shd             0,x2,29,x1
-       sh3add          x2,x2,x2
-       addc            x1,0,x1
-LSYM(pos7)
-       shd             x1,x2,26,t1
-       shd             x2,0,26,t2
-       add             x2,t2,x2
-       addc            x1,t1,x1
-
-       shd             x1,x2,20,t1
-       shd             x2,0,20,t2
-       add             x2,t2,x2
-       addc            x1,t1,t1
-
-       /* computed <t1,x2>.  Now divide it by (2**24 - 1)      */
-
-       copy            0,x1
-       shd,=           t1,x2,24,t1     /* tentative quotient  */
-LSYM(1)
-       addb,tr         t1,x1,LREF(2)   /* add to previous quotient   */
-       extru           x2,31,24,x2     /* new remainder (unadjusted) */
-
-       MILLIRETN
-
-LSYM(2)
-       addb,tr         t1,x2,LREF(1)   /* adjust remainder */
-       extru,=         x2,7,8,t1       /* new quotient     */
-
-LSYM(neg7)
-       subi            1,x2,x2         /* negate x2 and add 1 */
-LSYM(8)
-       shd             0,x2,29,x1
-       sh3add          x2,x2,x2
-       addc            x1,0,x1
-
-LSYM(neg7_shift)
-       shd             x1,x2,26,t1
-       shd             x2,0,26,t2
-       add             x2,t2,x2
-       addc            x1,t1,x1
-
-       shd             x1,x2,20,t1
-       shd             x2,0,20,t2
-       add             x2,t2,x2
-       addc            x1,t1,t1
-
-       /* computed <t1,x2>.  Now divide it by (2**24 - 1)      */
-
-       copy            0,x1
-       shd,=           t1,x2,24,t1     /* tentative quotient  */
-LSYM(3)
-       addb,tr         t1,x1,LREF(4)   /* add to previous quotient   */
-       extru           x2,31,24,x2     /* new remainder (unadjusted) */
-
-       MILLIRET
-       sub             0,x1,x1         /* negate result    */
-
-LSYM(4)
-       addb,tr         t1,x2,LREF(3)   /* adjust remainder */
-       extru,=         x2,7,8,t1       /* new quotient     */
-
-GSYM($$divU_7)
-       .export         $$divU_7,millicode
-       addi            1,x2,x2         /* can carry */
-       addc            0,0,x1
-       shd             x1,x2,29,t1
-       sh3add          x2,x2,x2
-       b               LREF(pos7)
-       addc            t1,x1,x1
-
-/* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */
-GSYM($$divI_9)
-       .export         $$divI_9,millicode
-       comb,<,n        x2,0,LREF(neg9)
-       addi            1,x2,x2         /* cannot overflow */
-       shd             0,x2,29,t1
-       shd             x2,0,29,t2
-       sub             t2,x2,x2
-       b               LREF(pos7)
-       subb            t1,0,x1
-
-LSYM(neg9)
-       subi            1,x2,x2         /* negate and add 1 */
-       shd             0,x2,29,t1
-       shd             x2,0,29,t2
-       sub             t2,x2,x2
-       b               LREF(neg7_shift)
-       subb            t1,0,x1
-
-GSYM($$divU_9)
-       .export         $$divU_9,millicode
-       addi            1,x2,x2         /* can carry */
-       addc            0,0,x1
-       shd             x1,x2,29,t1
-       shd             x2,0,29,t2
-       sub             t2,x2,x2
-       b               LREF(pos7)
-       subb            t1,x1,x1
-
-/* DIVISION BY 14 (shift to divide by 2 then divide by 7) */
-GSYM($$divI_14)
-       .export         $$divI_14,millicode
-       comb,<,n        x2,0,LREF(neg14)
-GSYM($$divU_14)
-       .export         $$divU_14,millicode
-       b               LREF(7)         /* go to 7 case */
-       extru           x2,30,31,x2     /* divide by 2  */
-
-LSYM(neg14)
-       subi            2,x2,x2         /* negate (and add 2) */
-       b               LREF(8)
-       extru           x2,30,31,x2     /* divide by 2        */
-       .exit
-       .procend
-       .end
-#endif
-
-#ifdef L_mulI
-/* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */
-/******************************************************************************
-This routine is used on PA2.0 processors when gcc -mno-fpregs is used
-
-ROUTINE:       $$mulI
-
-
-DESCRIPTION:   
-
-       $$mulI multiplies two single word integers, giving a single 
-       word result.  
-
-
-INPUT REGISTERS:
-
-       arg0 = Operand 1
-       arg1 = Operand 2
-       r31  == return pc
-       sr0  == return space when called externally 
-
-
-OUTPUT REGISTERS:
-
-       arg0 = undefined
-       arg1 = undefined
-       ret1 = result 
-
-OTHER REGISTERS AFFECTED:
-
-       r1   = undefined
-
-SIDE EFFECTS:
-
-       Causes a trap under the following conditions:  NONE
-       Changes memory at the following places:  NONE
-
-PERMISSIBLE CONTEXT:
-
-       Unwindable
-       Does not create a stack frame
-       Is usable for internal or external microcode
-
-DISCUSSION:
-
-       Calls other millicode routines via mrp:  NONE
-       Calls other millicode routines:  NONE
-
-***************************************************************************/
-
-
-#define        a0      %arg0
-#define        a1      %arg1
-#define        t0      %r1
-#define        r       %ret1
-
-#define        a0__128a0       zdep    a0,24,25,a0
-#define        a0__256a0       zdep    a0,23,24,a0
-#define        a1_ne_0_b_l0    comb,<> a1,0,LREF(l0)
-#define        a1_ne_0_b_l1    comb,<> a1,0,LREF(l1)
-#define        a1_ne_0_b_l2    comb,<> a1,0,LREF(l2)
-#define        b_n_ret_t0      b,n     LREF(ret_t0)
-#define        b_e_shift       b       LREF(e_shift)
-#define        b_e_t0ma0       b       LREF(e_t0ma0)
-#define        b_e_t0          b       LREF(e_t0)
-#define        b_e_t0a0        b       LREF(e_t0a0)
-#define        b_e_t02a0       b       LREF(e_t02a0)
-#define        b_e_t04a0       b       LREF(e_t04a0)
-#define        b_e_2t0         b       LREF(e_2t0)
-#define        b_e_2t0a0       b       LREF(e_2t0a0)
-#define        b_e_2t04a0      b       LREF(e2t04a0)
-#define        b_e_3t0         b       LREF(e_3t0)
-#define        b_e_4t0         b       LREF(e_4t0)
-#define        b_e_4t0a0       b       LREF(e_4t0a0)
-#define        b_e_4t08a0      b       LREF(e4t08a0)
-#define        b_e_5t0         b       LREF(e_5t0)
-#define        b_e_8t0         b       LREF(e_8t0)
-#define        b_e_8t0a0       b       LREF(e_8t0a0)
-#define        r__r_a0         add     r,a0,r
-#define        r__r_2a0        sh1add  a0,r,r
-#define        r__r_4a0        sh2add  a0,r,r
-#define        r__r_8a0        sh3add  a0,r,r
-#define        r__r_t0         add     r,t0,r
-#define        r__r_2t0        sh1add  t0,r,r
-#define        r__r_4t0        sh2add  t0,r,r
-#define        r__r_8t0        sh3add  t0,r,r
-#define        t0__3a0         sh1add  a0,a0,t0
-#define        t0__4a0         sh2add  a0,0,t0
-#define        t0__5a0         sh2add  a0,a0,t0
-#define        t0__8a0         sh3add  a0,0,t0
-#define        t0__9a0         sh3add  a0,a0,t0
-#define        t0__16a0        zdep    a0,27,28,t0
-#define        t0__32a0        zdep    a0,26,27,t0
-#define        t0__64a0        zdep    a0,25,26,t0
-#define        t0__128a0       zdep    a0,24,25,t0
-#define        t0__t0ma0       sub     t0,a0,t0
-#define        t0__t0_a0       add     t0,a0,t0
-#define        t0__t0_2a0      sh1add  a0,t0,t0
-#define        t0__t0_4a0      sh2add  a0,t0,t0
-#define        t0__t0_8a0      sh3add  a0,t0,t0
-#define        t0__2t0_a0      sh1add  t0,a0,t0
-#define        t0__3t0         sh1add  t0,t0,t0
-#define        t0__4t0         sh2add  t0,0,t0
-#define        t0__4t0_a0      sh2add  t0,a0,t0
-#define        t0__5t0         sh2add  t0,t0,t0
-#define        t0__8t0         sh3add  t0,0,t0
-#define        t0__8t0_a0      sh3add  t0,a0,t0
-#define        t0__9t0         sh3add  t0,t0,t0
-#define        t0__16t0        zdep    t0,27,28,t0
-#define        t0__32t0        zdep    t0,26,27,t0
-#define        t0__256a0       zdep    a0,23,24,t0
-
-
-       SUBSPA_MILLI
-       ATTR_MILLI
-       .align 16
-       .proc
-       .callinfo millicode
-       .export $$mulI,millicode
-GSYM($$mulI)   
-       combt,<<=       a1,a0,LREF(l4)  /* swap args if unsigned a1>a0 */
-       copy            0,r             /* zero out the result */
-       xor             a0,a1,a0        /* swap a0 & a1 using the */
-       xor             a0,a1,a1        /*  old xor trick */
-       xor             a0,a1,a0
-LSYM(l4)
-       combt,<=        0,a0,LREF(l3)           /* if a0>=0 then proceed like unsigned */
-       zdep            a1,30,8,t0      /* t0 = (a1&0xff)<<1 ********* */
-       sub,>           0,a1,t0         /* otherwise negate both and */
-       combt,<=,n      a0,t0,LREF(l2)  /*  swap back if |a0|<|a1| */
-       sub             0,a0,a1
-       movb,tr,n       t0,a0,LREF(l2)  /* 10th inst.  */
-
-LSYM(l0)       r__r_t0                         /* add in this partial product */
-LSYM(l1)       a0__256a0                       /* a0 <<= 8 ****************** */
-LSYM(l2)       zdep            a1,30,8,t0      /* t0 = (a1&0xff)<<1 ********* */
-LSYM(l3)       blr             t0,0            /* case on these 8 bits ****** */
-               extru           a1,23,24,a1     /* a1 >>= 8 ****************** */
-
-/*16 insts before this.  */
-/*                       a0 <<= 8 ************************** */
-LSYM(x0)       a1_ne_0_b_l2    ! a0__256a0     ! MILLIRETN     ! nop
-LSYM(x1)       a1_ne_0_b_l1    ! r__r_a0       ! MILLIRETN     ! nop
-LSYM(x2)       a1_ne_0_b_l1    ! r__r_2a0      ! MILLIRETN     ! nop
-LSYM(x3)       a1_ne_0_b_l0    ! t0__3a0       ! MILLIRET      ! r__r_t0
-LSYM(x4)       a1_ne_0_b_l1    ! r__r_4a0      ! MILLIRETN     ! nop
-LSYM(x5)       a1_ne_0_b_l0    ! t0__5a0       ! MILLIRET      ! r__r_t0
-LSYM(x6)       t0__3a0         ! a1_ne_0_b_l1  ! r__r_2t0      ! MILLIRETN
-LSYM(x7)       t0__3a0         ! a1_ne_0_b_l0  ! r__r_4a0      ! b_n_ret_t0
-LSYM(x8)       a1_ne_0_b_l1    ! r__r_8a0      ! MILLIRETN     ! nop
-LSYM(x9)       a1_ne_0_b_l0    ! t0__9a0       ! MILLIRET      ! r__r_t0
-LSYM(x10)      t0__5a0         ! a1_ne_0_b_l1  ! r__r_2t0      ! MILLIRETN
-LSYM(x11)      t0__3a0         ! a1_ne_0_b_l0  ! r__r_8a0      ! b_n_ret_t0
-LSYM(x12)      t0__3a0         ! a1_ne_0_b_l1  ! r__r_4t0      ! MILLIRETN
-LSYM(x13)      t0__5a0         ! a1_ne_0_b_l0  ! r__r_8a0      ! b_n_ret_t0
-LSYM(x14)      t0__3a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_2t0
-LSYM(x15)      t0__5a0         ! a1_ne_0_b_l0  ! t0__3t0       ! b_n_ret_t0
-LSYM(x16)      t0__16a0        ! a1_ne_0_b_l1  ! r__r_t0       ! MILLIRETN
-LSYM(x17)      t0__9a0         ! a1_ne_0_b_l0  ! t0__t0_8a0    ! b_n_ret_t0
-LSYM(x18)      t0__9a0         ! a1_ne_0_b_l1  ! r__r_2t0      ! MILLIRETN
-LSYM(x19)      t0__9a0         ! a1_ne_0_b_l0  ! t0__2t0_a0    ! b_n_ret_t0
-LSYM(x20)      t0__5a0         ! a1_ne_0_b_l1  ! r__r_4t0      ! MILLIRETN
-LSYM(x21)      t0__5a0         ! a1_ne_0_b_l0  ! t0__4t0_a0    ! b_n_ret_t0
-LSYM(x22)      t0__5a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_2t0
-LSYM(x23)      t0__5a0         ! t0__2t0_a0    ! b_e_t0        ! t0__2t0_a0
-LSYM(x24)      t0__3a0         ! a1_ne_0_b_l1  ! r__r_8t0      ! MILLIRETN
-LSYM(x25)      t0__5a0         ! a1_ne_0_b_l0  ! t0__5t0       ! b_n_ret_t0
-LSYM(x26)      t0__3a0         ! t0__4t0_a0    ! b_e_shift     ! r__r_2t0
-LSYM(x27)      t0__3a0         ! a1_ne_0_b_l0  ! t0__9t0       ! b_n_ret_t0
-LSYM(x28)      t0__3a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_4t0
-LSYM(x29)      t0__3a0         ! t0__2t0_a0    ! b_e_t0        ! t0__4t0_a0
-LSYM(x30)      t0__5a0         ! t0__3t0       ! b_e_shift     ! r__r_2t0
-LSYM(x31)      t0__32a0        ! a1_ne_0_b_l0  ! t0__t0ma0     ! b_n_ret_t0
-LSYM(x32)      t0__32a0        ! a1_ne_0_b_l1  ! r__r_t0       ! MILLIRETN
-LSYM(x33)      t0__8a0         ! a1_ne_0_b_l0  ! t0__4t0_a0    ! b_n_ret_t0
-LSYM(x34)      t0__16a0        ! t0__t0_a0     ! b_e_shift     ! r__r_2t0
-LSYM(x35)      t0__9a0         ! t0__3t0       ! b_e_t0        ! t0__t0_8a0
-LSYM(x36)      t0__9a0         ! a1_ne_0_b_l1  ! r__r_4t0      ! MILLIRETN
-LSYM(x37)      t0__9a0         ! a1_ne_0_b_l0  ! t0__4t0_a0    ! b_n_ret_t0
-LSYM(x38)      t0__9a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_2t0
-LSYM(x39)      t0__9a0         ! t0__2t0_a0    ! b_e_t0        ! t0__2t0_a0
-LSYM(x40)      t0__5a0         ! a1_ne_0_b_l1  ! r__r_8t0      ! MILLIRETN
-LSYM(x41)      t0__5a0         ! a1_ne_0_b_l0  ! t0__8t0_a0    ! b_n_ret_t0
-LSYM(x42)      t0__5a0         ! t0__4t0_a0    ! b_e_shift     ! r__r_2t0
-LSYM(x43)      t0__5a0         ! t0__4t0_a0    ! b_e_t0        ! t0__2t0_a0
-LSYM(x44)      t0__5a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_4t0
-LSYM(x45)      t0__9a0         ! a1_ne_0_b_l0  ! t0__5t0       ! b_n_ret_t0
-LSYM(x46)      t0__9a0         ! t0__5t0       ! b_e_t0        ! t0__t0_a0
-LSYM(x47)      t0__9a0         ! t0__5t0       ! b_e_t0        ! t0__t0_2a0
-LSYM(x48)      t0__3a0         ! a1_ne_0_b_l0  ! t0__16t0      ! b_n_ret_t0
-LSYM(x49)      t0__9a0         ! t0__5t0       ! b_e_t0        ! t0__t0_4a0
-LSYM(x50)      t0__5a0         ! t0__5t0       ! b_e_shift     ! r__r_2t0
-LSYM(x51)      t0__9a0         ! t0__t0_8a0    ! b_e_t0        ! t0__3t0
-LSYM(x52)      t0__3a0         ! t0__4t0_a0    ! b_e_shift     ! r__r_4t0
-LSYM(x53)      t0__3a0         ! t0__4t0_a0    ! b_e_t0        ! t0__4t0_a0
-LSYM(x54)      t0__9a0         ! t0__3t0       ! b_e_shift     ! r__r_2t0
-LSYM(x55)      t0__9a0         ! t0__3t0       ! b_e_t0        ! t0__2t0_a0
-LSYM(x56)      t0__3a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_8t0
-LSYM(x57)      t0__9a0         ! t0__2t0_a0    ! b_e_t0        ! t0__3t0
-LSYM(x58)      t0__3a0         ! t0__2t0_a0    ! b_e_2t0       ! t0__4t0_a0
-LSYM(x59)      t0__9a0         ! t0__2t0_a0    ! b_e_t02a0     ! t0__3t0
-LSYM(x60)      t0__5a0         ! t0__3t0       ! b_e_shift     ! r__r_4t0
-LSYM(x61)      t0__5a0         ! t0__3t0       ! b_e_t0        ! t0__4t0_a0
-LSYM(x62)      t0__32a0        ! t0__t0ma0     ! b_e_shift     ! r__r_2t0
-LSYM(x63)      t0__64a0        ! a1_ne_0_b_l0  ! t0__t0ma0     ! b_n_ret_t0
-LSYM(x64)      t0__64a0        ! a1_ne_0_b_l1  ! r__r_t0       ! MILLIRETN
-LSYM(x65)      t0__8a0         ! a1_ne_0_b_l0  ! t0__8t0_a0    ! b_n_ret_t0
-LSYM(x66)      t0__32a0        ! t0__t0_a0     ! b_e_shift     ! r__r_2t0
-LSYM(x67)      t0__8a0         ! t0__4t0_a0    ! b_e_t0        ! t0__2t0_a0
-LSYM(x68)      t0__8a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_4t0
-LSYM(x69)      t0__8a0         ! t0__2t0_a0    ! b_e_t0        ! t0__4t0_a0
-LSYM(x70)      t0__64a0        ! t0__t0_4a0    ! b_e_t0        ! t0__t0_2a0
-LSYM(x71)      t0__9a0         ! t0__8t0       ! b_e_t0        ! t0__t0ma0
-LSYM(x72)      t0__9a0         ! a1_ne_0_b_l1  ! r__r_8t0      ! MILLIRETN
-LSYM(x73)      t0__9a0         ! t0__8t0_a0    ! b_e_shift     ! r__r_t0
-LSYM(x74)      t0__9a0         ! t0__4t0_a0    ! b_e_shift     ! r__r_2t0
-LSYM(x75)      t0__9a0         ! t0__4t0_a0    ! b_e_t0        ! t0__2t0_a0
-LSYM(x76)      t0__9a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_4t0
-LSYM(x77)      t0__9a0         ! t0__2t0_a0    ! b_e_t0        ! t0__4t0_a0
-LSYM(x78)      t0__9a0         ! t0__2t0_a0    ! b_e_2t0       ! t0__2t0_a0
-LSYM(x79)      t0__16a0        ! t0__5t0       ! b_e_t0        ! t0__t0ma0
-LSYM(x80)      t0__16a0        ! t0__5t0       ! b_e_shift     ! r__r_t0
-LSYM(x81)      t0__9a0         ! t0__9t0       ! b_e_shift     ! r__r_t0
-LSYM(x82)      t0__5a0         ! t0__8t0_a0    ! b_e_shift     ! r__r_2t0
-LSYM(x83)      t0__5a0         ! t0__8t0_a0    ! b_e_t0        ! t0__2t0_a0
-LSYM(x84)      t0__5a0         ! t0__4t0_a0    ! b_e_shift     ! r__r_4t0
-LSYM(x85)      t0__8a0         ! t0__2t0_a0    ! b_e_t0        ! t0__5t0
-LSYM(x86)      t0__5a0         ! t0__4t0_a0    ! b_e_2t0       ! t0__2t0_a0
-LSYM(x87)      t0__9a0         ! t0__9t0       ! b_e_t02a0     ! t0__t0_4a0
-LSYM(x88)      t0__5a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_8t0
-LSYM(x89)      t0__5a0         ! t0__2t0_a0    ! b_e_t0        ! t0__8t0_a0
-LSYM(x90)      t0__9a0         ! t0__5t0       ! b_e_shift     ! r__r_2t0
-LSYM(x91)      t0__9a0         ! t0__5t0       ! b_e_t0        ! t0__2t0_a0
-LSYM(x92)      t0__5a0         ! t0__2t0_a0    ! b_e_4t0       ! t0__2t0_a0
-LSYM(x93)      t0__32a0        ! t0__t0ma0     ! b_e_t0        ! t0__3t0
-LSYM(x94)      t0__9a0         ! t0__5t0       ! b_e_2t0       ! t0__t0_2a0
-LSYM(x95)      t0__9a0         ! t0__2t0_a0    ! b_e_t0        ! t0__5t0
-LSYM(x96)      t0__8a0         ! t0__3t0       ! b_e_shift     ! r__r_4t0
-LSYM(x97)      t0__8a0         ! t0__3t0       ! b_e_t0        ! t0__4t0_a0
-LSYM(x98)      t0__32a0        ! t0__3t0       ! b_e_t0        ! t0__t0_2a0
-LSYM(x99)      t0__8a0         ! t0__4t0_a0    ! b_e_t0        ! t0__3t0
-LSYM(x100)     t0__5a0         ! t0__5t0       ! b_e_shift     ! r__r_4t0
-LSYM(x101)     t0__5a0         ! t0__5t0       ! b_e_t0        ! t0__4t0_a0
-LSYM(x102)     t0__32a0        ! t0__t0_2a0    ! b_e_t0        ! t0__3t0
-LSYM(x103)     t0__5a0         ! t0__5t0       ! b_e_t02a0     ! t0__4t0_a0
-LSYM(x104)     t0__3a0         ! t0__4t0_a0    ! b_e_shift     ! r__r_8t0
-LSYM(x105)     t0__5a0         ! t0__4t0_a0    ! b_e_t0        ! t0__5t0
-LSYM(x106)     t0__3a0         ! t0__4t0_a0    ! b_e_2t0       ! t0__4t0_a0
-LSYM(x107)     t0__9a0         ! t0__t0_4a0    ! b_e_t02a0     ! t0__8t0_a0
-LSYM(x108)     t0__9a0         ! t0__3t0       ! b_e_shift     ! r__r_4t0
-LSYM(x109)     t0__9a0         ! t0__3t0       ! b_e_t0        ! t0__4t0_a0
-LSYM(x110)     t0__9a0         ! t0__3t0       ! b_e_2t0       ! t0__2t0_a0
-LSYM(x111)     t0__9a0         ! t0__4t0_a0    ! b_e_t0        ! t0__3t0
-LSYM(x112)     t0__3a0         ! t0__2t0_a0    ! b_e_t0        ! t0__16t0
-LSYM(x113)     t0__9a0         ! t0__4t0_a0    ! b_e_t02a0     ! t0__3t0
-LSYM(x114)     t0__9a0         ! t0__2t0_a0    ! b_e_2t0       ! t0__3t0
-LSYM(x115)     t0__9a0         ! t0__2t0_a0    ! b_e_2t0a0     ! t0__3t0
-LSYM(x116)     t0__3a0         ! t0__2t0_a0    ! b_e_4t0       ! t0__4t0_a0
-LSYM(x117)     t0__3a0         ! t0__4t0_a0    ! b_e_t0        ! t0__9t0
-LSYM(x118)     t0__3a0         ! t0__4t0_a0    ! b_e_t0a0      ! t0__9t0
-LSYM(x119)     t0__3a0         ! t0__4t0_a0    ! b_e_t02a0     ! t0__9t0
-LSYM(x120)     t0__5a0         ! t0__3t0       ! b_e_shift     ! r__r_8t0
-LSYM(x121)     t0__5a0         ! t0__3t0       ! b_e_t0        ! t0__8t0_a0
-LSYM(x122)     t0__5a0         ! t0__3t0       ! b_e_2t0       ! t0__4t0_a0
-LSYM(x123)     t0__5a0         ! t0__8t0_a0    ! b_e_t0        ! t0__3t0
-LSYM(x124)     t0__32a0        ! t0__t0ma0     ! b_e_shift     ! r__r_4t0
-LSYM(x125)     t0__5a0         ! t0__5t0       ! b_e_t0        ! t0__5t0
-LSYM(x126)     t0__64a0        ! t0__t0ma0     ! b_e_shift     ! r__r_2t0
-LSYM(x127)     t0__128a0       ! a1_ne_0_b_l0  ! t0__t0ma0     ! b_n_ret_t0
-LSYM(x128)     t0__128a0       ! a1_ne_0_b_l1  ! r__r_t0       ! MILLIRETN
-LSYM(x129)     t0__128a0       ! a1_ne_0_b_l0  ! t0__t0_a0     ! b_n_ret_t0
-LSYM(x130)     t0__64a0        ! t0__t0_a0     ! b_e_shift     ! r__r_2t0
-LSYM(x131)     t0__8a0         ! t0__8t0_a0    ! b_e_t0        ! t0__2t0_a0
-LSYM(x132)     t0__8a0         ! t0__4t0_a0    ! b_e_shift     ! r__r_4t0
-LSYM(x133)     t0__8a0         ! t0__4t0_a0    ! b_e_t0        ! t0__4t0_a0
-LSYM(x134)     t0__8a0         ! t0__4t0_a0    ! b_e_2t0       ! t0__2t0_a0
-LSYM(x135)     t0__9a0         ! t0__5t0       ! b_e_t0        ! t0__3t0
-LSYM(x136)     t0__8a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_8t0
-LSYM(x137)     t0__8a0         ! t0__2t0_a0    ! b_e_t0        ! t0__8t0_a0
-LSYM(x138)     t0__8a0         ! t0__2t0_a0    ! b_e_2t0       ! t0__4t0_a0
-LSYM(x139)     t0__8a0         ! t0__2t0_a0    ! b_e_2t0a0     ! t0__4t0_a0
-LSYM(x140)     t0__3a0         ! t0__2t0_a0    ! b_e_4t0       ! t0__5t0
-LSYM(x141)     t0__8a0         ! t0__2t0_a0    ! b_e_4t0a0     ! t0__2t0_a0
-LSYM(x142)     t0__9a0         ! t0__8t0       ! b_e_2t0       ! t0__t0ma0
-LSYM(x143)     t0__16a0        ! t0__9t0       ! b_e_t0        ! t0__t0ma0
-LSYM(x144)     t0__9a0         ! t0__8t0       ! b_e_shift     ! r__r_2t0
-LSYM(x145)     t0__9a0         ! t0__8t0       ! b_e_t0        ! t0__2t0_a0
-LSYM(x146)     t0__9a0         ! t0__8t0_a0    ! b_e_shift     ! r__r_2t0
-LSYM(x147)     t0__9a0         ! t0__8t0_a0    ! b_e_t0        ! t0__2t0_a0
-LSYM(x148)     t0__9a0         ! t0__4t0_a0    ! b_e_shift     ! r__r_4t0
-LSYM(x149)     t0__9a0         ! t0__4t0_a0    ! b_e_t0        ! t0__4t0_a0
-LSYM(x150)     t0__9a0         ! t0__4t0_a0    ! b_e_2t0       ! t0__2t0_a0
-LSYM(x151)     t0__9a0         ! t0__4t0_a0    ! b_e_2t0a0     ! t0__2t0_a0
-LSYM(x152)     t0__9a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_8t0
-LSYM(x153)     t0__9a0         ! t0__2t0_a0    ! b_e_t0        ! t0__8t0_a0
-LSYM(x154)     t0__9a0         ! t0__2t0_a0    ! b_e_2t0       ! t0__4t0_a0
-LSYM(x155)     t0__32a0        ! t0__t0ma0     ! b_e_t0        ! t0__5t0
-LSYM(x156)     t0__9a0         ! t0__2t0_a0    ! b_e_4t0       ! t0__2t0_a0
-LSYM(x157)     t0__32a0        ! t0__t0ma0     ! b_e_t02a0     ! t0__5t0
-LSYM(x158)     t0__16a0        ! t0__5t0       ! b_e_2t0       ! t0__t0ma0
-LSYM(x159)     t0__32a0        ! t0__5t0       ! b_e_t0        ! t0__t0ma0
-LSYM(x160)     t0__5a0         ! t0__4t0       ! b_e_shift     ! r__r_8t0
-LSYM(x161)     t0__8a0         ! t0__5t0       ! b_e_t0        ! t0__4t0_a0
-LSYM(x162)     t0__9a0         ! t0__9t0       ! b_e_shift     ! r__r_2t0
-LSYM(x163)     t0__9a0         ! t0__9t0       ! b_e_t0        ! t0__2t0_a0
-LSYM(x164)     t0__5a0         ! t0__8t0_a0    ! b_e_shift     ! r__r_4t0
-LSYM(x165)     t0__8a0         ! t0__4t0_a0    ! b_e_t0        ! t0__5t0
-LSYM(x166)     t0__5a0         ! t0__8t0_a0    ! b_e_2t0       ! t0__2t0_a0
-LSYM(x167)     t0__5a0         ! t0__8t0_a0    ! b_e_2t0a0     ! t0__2t0_a0
-LSYM(x168)     t0__5a0         ! t0__4t0_a0    ! b_e_shift     ! r__r_8t0
-LSYM(x169)     t0__5a0         ! t0__4t0_a0    ! b_e_t0        ! t0__8t0_a0
-LSYM(x170)     t0__32a0        ! t0__t0_2a0    ! b_e_t0        ! t0__5t0
-LSYM(x171)     t0__9a0         ! t0__2t0_a0    ! b_e_t0        ! t0__9t0
-LSYM(x172)     t0__5a0         ! t0__4t0_a0    ! b_e_4t0       ! t0__2t0_a0
-LSYM(x173)     t0__9a0         ! t0__2t0_a0    ! b_e_t02a0     ! t0__9t0
-LSYM(x174)     t0__32a0        ! t0__t0_2a0    ! b_e_t04a0     ! t0__5t0
-LSYM(x175)     t0__8a0         ! t0__2t0_a0    ! b_e_5t0       ! t0__2t0_a0
-LSYM(x176)     t0__5a0         ! t0__4t0_a0    ! b_e_8t0       ! t0__t0_a0
-LSYM(x177)     t0__5a0         ! t0__4t0_a0    ! b_e_8t0a0     ! t0__t0_a0
-LSYM(x178)     t0__5a0         ! t0__2t0_a0    ! b_e_2t0       ! t0__8t0_a0
-LSYM(x179)     t0__5a0         ! t0__2t0_a0    ! b_e_2t0a0     ! t0__8t0_a0
-LSYM(x180)     t0__9a0         ! t0__5t0       ! b_e_shift     ! r__r_4t0
-LSYM(x181)     t0__9a0         ! t0__5t0       ! b_e_t0        ! t0__4t0_a0
-LSYM(x182)     t0__9a0         ! t0__5t0       ! b_e_2t0       ! t0__2t0_a0
-LSYM(x183)     t0__9a0         ! t0__5t0       ! b_e_2t0a0     ! t0__2t0_a0
-LSYM(x184)     t0__5a0         ! t0__9t0       ! b_e_4t0       ! t0__t0_a0
-LSYM(x185)     t0__9a0         ! t0__4t0_a0    ! b_e_t0        ! t0__5t0
-LSYM(x186)     t0__32a0        ! t0__t0ma0     ! b_e_2t0       ! t0__3t0
-LSYM(x187)     t0__9a0         ! t0__4t0_a0    ! b_e_t02a0     ! t0__5t0
-LSYM(x188)     t0__9a0         ! t0__5t0       ! b_e_4t0       ! t0__t0_2a0
-LSYM(x189)     t0__5a0         ! t0__4t0_a0    ! b_e_t0        ! t0__9t0
-LSYM(x190)     t0__9a0         ! t0__2t0_a0    ! b_e_2t0       ! t0__5t0
-LSYM(x191)     t0__64a0        ! t0__3t0       ! b_e_t0        ! t0__t0ma0
-LSYM(x192)     t0__8a0         ! t0__3t0       ! b_e_shift     ! r__r_8t0
-LSYM(x193)     t0__8a0         ! t0__3t0       ! b_e_t0        ! t0__8t0_a0
-LSYM(x194)     t0__8a0         ! t0__3t0       ! b_e_2t0       ! t0__4t0_a0
-LSYM(x195)     t0__8a0         ! t0__8t0_a0    ! b_e_t0        ! t0__3t0
-LSYM(x196)     t0__8a0         ! t0__3t0       ! b_e_4t0       ! t0__2t0_a0
-LSYM(x197)     t0__8a0         ! t0__3t0       ! b_e_4t0a0     ! t0__2t0_a0
-LSYM(x198)     t0__64a0        ! t0__t0_2a0    ! b_e_t0        ! t0__3t0
-LSYM(x199)     t0__8a0         ! t0__4t0_a0    ! b_e_2t0a0     ! t0__3t0
-LSYM(x200)     t0__5a0         ! t0__5t0       ! b_e_shift     ! r__r_8t0
-LSYM(x201)     t0__5a0         ! t0__5t0       ! b_e_t0        ! t0__8t0_a0
-LSYM(x202)     t0__5a0         ! t0__5t0       ! b_e_2t0       ! t0__4t0_a0
-LSYM(x203)     t0__5a0         ! t0__5t0       ! b_e_2t0a0     ! t0__4t0_a0
-LSYM(x204)     t0__8a0         ! t0__2t0_a0    ! b_e_4t0       ! t0__3t0
-LSYM(x205)     t0__5a0         ! t0__8t0_a0    ! b_e_t0        ! t0__5t0
-LSYM(x206)     t0__64a0        ! t0__t0_4a0    ! b_e_t02a0     ! t0__3t0
-LSYM(x207)     t0__8a0         ! t0__2t0_a0    ! b_e_3t0       ! t0__4t0_a0
-LSYM(x208)     t0__5a0         ! t0__5t0       ! b_e_8t0       ! t0__t0_a0
-LSYM(x209)     t0__5a0         ! t0__5t0       ! b_e_8t0a0     ! t0__t0_a0
-LSYM(x210)     t0__5a0         ! t0__4t0_a0    ! b_e_2t0       ! t0__5t0
-LSYM(x211)     t0__5a0         ! t0__4t0_a0    ! b_e_2t0a0     ! t0__5t0
-LSYM(x212)     t0__3a0         ! t0__4t0_a0    ! b_e_4t0       ! t0__4t0_a0
-LSYM(x213)     t0__3a0         ! t0__4t0_a0    ! b_e_4t0a0     ! t0__4t0_a0
-LSYM(x214)     t0__9a0         ! t0__t0_4a0    ! b_e_2t04a0    ! t0__8t0_a0
-LSYM(x215)     t0__5a0         ! t0__4t0_a0    ! b_e_5t0       ! t0__2t0_a0
-LSYM(x216)     t0__9a0         ! t0__3t0       ! b_e_shift     ! r__r_8t0
-LSYM(x217)     t0__9a0         ! t0__3t0       ! b_e_t0        ! t0__8t0_a0
-LSYM(x218)     t0__9a0         ! t0__3t0       ! b_e_2t0       ! t0__4t0_a0
-LSYM(x219)     t0__9a0         ! t0__8t0_a0    ! b_e_t0        ! t0__3t0
-LSYM(x220)     t0__3a0         ! t0__9t0       ! b_e_4t0       ! t0__2t0_a0
-LSYM(x221)     t0__3a0         ! t0__9t0       ! b_e_4t0a0     ! t0__2t0_a0
-LSYM(x222)     t0__9a0         ! t0__4t0_a0    ! b_e_2t0       ! t0__3t0
-LSYM(x223)     t0__9a0         ! t0__4t0_a0    ! b_e_2t0a0     ! t0__3t0
-LSYM(x224)     t0__9a0         ! t0__3t0       ! b_e_8t0       ! t0__t0_a0
-LSYM(x225)     t0__9a0         ! t0__5t0       ! b_e_t0        ! t0__5t0
-LSYM(x226)     t0__3a0         ! t0__2t0_a0    ! b_e_t02a0     ! t0__32t0
-LSYM(x227)     t0__9a0         ! t0__5t0       ! b_e_t02a0     ! t0__5t0
-LSYM(x228)     t0__9a0         ! t0__2t0_a0    ! b_e_4t0       ! t0__3t0
-LSYM(x229)     t0__9a0         ! t0__2t0_a0    ! b_e_4t0a0     ! t0__3t0
-LSYM(x230)     t0__9a0         ! t0__5t0       ! b_e_5t0       ! t0__t0_a0
-LSYM(x231)     t0__9a0         ! t0__2t0_a0    ! b_e_3t0       ! t0__4t0_a0
-LSYM(x232)     t0__3a0         ! t0__2t0_a0    ! b_e_8t0       ! t0__4t0_a0
-LSYM(x233)     t0__3a0         ! t0__2t0_a0    ! b_e_8t0a0     ! t0__4t0_a0
-LSYM(x234)     t0__3a0         ! t0__4t0_a0    ! b_e_2t0       ! t0__9t0
-LSYM(x235)     t0__3a0         ! t0__4t0_a0    ! b_e_2t0a0     ! t0__9t0
-LSYM(x236)     t0__9a0         ! t0__2t0_a0    ! b_e_4t08a0    ! t0__3t0
-LSYM(x237)     t0__16a0        ! t0__5t0       ! b_e_3t0       ! t0__t0ma0
-LSYM(x238)     t0__3a0         ! t0__4t0_a0    ! b_e_2t04a0    ! t0__9t0
-LSYM(x239)     t0__16a0        ! t0__5t0       ! b_e_t0ma0     ! t0__3t0
-LSYM(x240)     t0__9a0         ! t0__t0_a0     ! b_e_8t0       ! t0__3t0
-LSYM(x241)     t0__9a0         ! t0__t0_a0     ! b_e_8t0a0     ! t0__3t0
-LSYM(x242)     t0__5a0         ! t0__3t0       ! b_e_2t0       ! t0__8t0_a0
-LSYM(x243)     t0__9a0         ! t0__9t0       ! b_e_t0        ! t0__3t0
-LSYM(x244)     t0__5a0         ! t0__3t0       ! b_e_4t0       ! t0__4t0_a0
-LSYM(x245)     t0__8a0         ! t0__3t0       ! b_e_5t0       ! t0__2t0_a0
-LSYM(x246)     t0__5a0         ! t0__8t0_a0    ! b_e_2t0       ! t0__3t0
-LSYM(x247)     t0__5a0         ! t0__8t0_a0    ! b_e_2t0a0     ! t0__3t0
-LSYM(x248)     t0__32a0        ! t0__t0ma0     ! b_e_shift     ! r__r_8t0
-LSYM(x249)     t0__32a0        ! t0__t0ma0     ! b_e_t0        ! t0__8t0_a0
-LSYM(x250)     t0__5a0         ! t0__5t0       ! b_e_2t0       ! t0__5t0
-LSYM(x251)     t0__5a0         ! t0__5t0       ! b_e_2t0a0     ! t0__5t0
-LSYM(x252)     t0__64a0        ! t0__t0ma0     ! b_e_shift     ! r__r_4t0
-LSYM(x253)     t0__64a0        ! t0__t0ma0     ! b_e_t0        ! t0__4t0_a0
-LSYM(x254)     t0__128a0       ! t0__t0ma0     ! b_e_shift     ! r__r_2t0
-LSYM(x255)     t0__256a0       ! a1_ne_0_b_l0  ! t0__t0ma0     ! b_n_ret_t0
-/*1040 insts before this.  */
-LSYM(ret_t0)   MILLIRET
-LSYM(e_t0)     r__r_t0
-LSYM(e_shift)  a1_ne_0_b_l2
-       a0__256a0       /* a0 <<= 8 *********** */
-       MILLIRETN
-LSYM(e_t0ma0)  a1_ne_0_b_l0
-       t0__t0ma0
-       MILLIRET
-       r__r_t0
-LSYM(e_t0a0)   a1_ne_0_b_l0
-       t0__t0_a0
-       MILLIRET
-       r__r_t0
-LSYM(e_t02a0)  a1_ne_0_b_l0
-       t0__t0_2a0
-       MILLIRET
-       r__r_t0
-LSYM(e_t04a0)  a1_ne_0_b_l0
-       t0__t0_4a0
-       MILLIRET
-       r__r_t0
-LSYM(e_2t0)    a1_ne_0_b_l1
-       r__r_2t0
-       MILLIRETN
-LSYM(e_2t0a0)  a1_ne_0_b_l0
-       t0__2t0_a0
-       MILLIRET
-       r__r_t0
-LSYM(e2t04a0)  t0__t0_2a0
-       a1_ne_0_b_l1
-       r__r_2t0
-       MILLIRETN
-LSYM(e_3t0)    a1_ne_0_b_l0
-       t0__3t0
-       MILLIRET
-       r__r_t0
-LSYM(e_4t0)    a1_ne_0_b_l1
-       r__r_4t0
-       MILLIRETN
-LSYM(e_4t0a0)  a1_ne_0_b_l0
-       t0__4t0_a0
-       MILLIRET
-       r__r_t0
-LSYM(e4t08a0)  t0__t0_2a0
-       a1_ne_0_b_l1
-       r__r_4t0
-       MILLIRETN
-LSYM(e_5t0)    a1_ne_0_b_l0
-       t0__5t0
-       MILLIRET
-       r__r_t0
-LSYM(e_8t0)    a1_ne_0_b_l1
-       r__r_8t0
-       MILLIRETN
-LSYM(e_8t0a0)  a1_ne_0_b_l0
-       t0__8t0_a0
-       MILLIRET
-       r__r_t0
-
-       .procend
-       .end
-#endif
diff --git a/gcc/config/pa/t-linux b/gcc/config/pa/t-linux

index df351e11458b68a88a3cf91b29e6f9d5faef98a9..b94ebd250a8a2589851404c00ebd2430eabf9fec 100644 (file)
--- a/gcc/config/pa/t-linux
+++ b/gcc/config/pa/t-linux
@@ -16,13 +16,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-#Plug millicode routines into libgcc.a  We want these on both native and
-#cross compiles.  We use the "64-bit" routines because the "32-bit" code
-#is broken for certain corner cases.
-
-LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI _dyncall
-LIB1ASMSRC = pa/milli64.S
-
  # Compile libgcc2.a as PIC.
  TARGET_LIBGCC2_CFLAGS = -fPIC -DELF=1 -DLINUX=1
  
diff --git a/gcc/config/pa/t-linux64 b/gcc/config/pa/t-linux64

index d40546cabccc4194eda0da2f1276c9f9dd8cbd6f..af803a27ed3ffe244d71eb672ca091794b23ddb8 100644 (file)
--- a/gcc/config/pa/t-linux64
+++ b/gcc/config/pa/t-linux64
@@ -16,12 +16,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-#Plug millicode routines into libgcc.a  We want these on both native and
-#cross compiles.
-
-LIB1ASMFUNCS =  _divI _divU _remI _remU _div_const _mulI
-LIB1ASMSRC = pa/milli64.S
-
  LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/pa/linux-atomic.c
  
  # Compile libgcc2.a as PIC.
diff --git a/gcc/config/picochip/libgccExtras/fake_libgcc.asm b/gcc/config/picochip/libgccExtras/fake_libgcc.asm

deleted file mode 100644 (file)

index e4b78f1..0000000
--- a/gcc/config/picochip/libgccExtras/fake_libgcc.asm
+++ /dev/null
@@ -1,6 +0,0 @@
-// picoChip ASM file
-// Fake libgcc asm file. This contains nothing, but is used to prevent gcc
-// getting upset about the lack of a libgcc.S file when LIB1ASMFUNCS is defined
-// to switch off the compilation of parts of libgcc.
-
-
diff --git a/gcc/config/picochip/t-picochip b/gcc/config/picochip/t-picochip

index 222d7a646b93863fbdc234b0f17471062c09e74a..0f3fe8c3d816fdc2bf140f864271709818d3d180 100644 (file)
--- a/gcc/config/picochip/t-picochip
+++ b/gcc/config/picochip/t-picochip
@@ -35,14 +35,6 @@ LIB2FUNCS_EXTRA = \
         $(srcdir)/config/picochip/libgccExtras/parityhi2.asm            \
         $(srcdir)/config/picochip/libgccExtras/popcounthi2.asm
  
-# Prevent some of the more complicated libgcc functions from being
-# compiled. This is because they are generally too big to fit into an
-# AE anyway, so there is no point in having them. Also, some don't
-# compile properly so we'll ignore them for the moment.
-
-LIB1ASMFUNCS = _mulsc3 _divsc3
-LIB1ASMSRC = picochip/libgccExtras/fake_libgcc.asm
-
  # Turn off ranlib on target libraries.
  RANLIB_FOR_TARGET = cat
  
diff --git a/gcc/config/sh/lib1funcs.asm b/gcc/config/sh/lib1funcs.asm

deleted file mode 100644 (file)

index 2f0ca16..0000000
--- a/gcc/config/sh/lib1funcs.asm
+++ /dev/null
@@ -1,3933 +0,0 @@
-/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005, 2006, 2009
-   Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-
-!! libgcc routines for the Renesas / SuperH SH CPUs.
-!! Contributed by Steve Chamberlain.
-!! sac@cygnus.com
-
-!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
-!! recoded in assembly by Toshiyasu Morita
-!! tm@netcom.com
-
-#if defined(__ELF__) && defined(__linux__)
-.section .note.GNU-stack,"",%progbits
-.previous
-#endif
-
-/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
-   ELF local label prefixes by J"orn Rennecke
-   amylaar@cygnus.com  */
-
-#include "lib1funcs.h"
-
-/* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
-   so it is more convenient to define NO_FPSCR_VALUES here than to
-   define it on the command line.  */
-#if defined __vxworks && defined __PIC__
-#define NO_FPSCR_VALUES
-#endif
-       
-#if ! __SH5__
-#ifdef L_ashiftrt
-       .global GLOBAL(ashiftrt_r4_0)
-       .global GLOBAL(ashiftrt_r4_1)
-       .global GLOBAL(ashiftrt_r4_2)
-       .global GLOBAL(ashiftrt_r4_3)
-       .global GLOBAL(ashiftrt_r4_4)
-       .global GLOBAL(ashiftrt_r4_5)
-       .global GLOBAL(ashiftrt_r4_6)
-       .global GLOBAL(ashiftrt_r4_7)
-       .global GLOBAL(ashiftrt_r4_8)
-       .global GLOBAL(ashiftrt_r4_9)
-       .global GLOBAL(ashiftrt_r4_10)
-       .global GLOBAL(ashiftrt_r4_11)
-       .global GLOBAL(ashiftrt_r4_12)
-       .global GLOBAL(ashiftrt_r4_13)
-       .global GLOBAL(ashiftrt_r4_14)
-       .global GLOBAL(ashiftrt_r4_15)
-       .global GLOBAL(ashiftrt_r4_16)
-       .global GLOBAL(ashiftrt_r4_17)
-       .global GLOBAL(ashiftrt_r4_18)
-       .global GLOBAL(ashiftrt_r4_19)
-       .global GLOBAL(ashiftrt_r4_20)
-       .global GLOBAL(ashiftrt_r4_21)
-       .global GLOBAL(ashiftrt_r4_22)
-       .global GLOBAL(ashiftrt_r4_23)
-       .global GLOBAL(ashiftrt_r4_24)
-       .global GLOBAL(ashiftrt_r4_25)
-       .global GLOBAL(ashiftrt_r4_26)
-       .global GLOBAL(ashiftrt_r4_27)
-       .global GLOBAL(ashiftrt_r4_28)
-       .global GLOBAL(ashiftrt_r4_29)
-       .global GLOBAL(ashiftrt_r4_30)
-       .global GLOBAL(ashiftrt_r4_31)
-       .global GLOBAL(ashiftrt_r4_32)
-
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
-       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
-
-       .align  1
-GLOBAL(ashiftrt_r4_32):
-GLOBAL(ashiftrt_r4_31):
-       rotcl   r4
-       rts
-       subc    r4,r4
-
-GLOBAL(ashiftrt_r4_30):
-       shar    r4
-GLOBAL(ashiftrt_r4_29):
-       shar    r4
-GLOBAL(ashiftrt_r4_28):
-       shar    r4
-GLOBAL(ashiftrt_r4_27):
-       shar    r4
-GLOBAL(ashiftrt_r4_26):
-       shar    r4
-GLOBAL(ashiftrt_r4_25):
-       shar    r4
-GLOBAL(ashiftrt_r4_24):
-       shlr16  r4
-       shlr8   r4
-       rts
-       exts.b  r4,r4
-
-GLOBAL(ashiftrt_r4_23):
-       shar    r4
-GLOBAL(ashiftrt_r4_22):
-       shar    r4
-GLOBAL(ashiftrt_r4_21):
-       shar    r4
-GLOBAL(ashiftrt_r4_20):
-       shar    r4
-GLOBAL(ashiftrt_r4_19):
-       shar    r4
-GLOBAL(ashiftrt_r4_18):
-       shar    r4
-GLOBAL(ashiftrt_r4_17):
-       shar    r4
-GLOBAL(ashiftrt_r4_16):
-       shlr16  r4
-       rts
-       exts.w  r4,r4
-
-GLOBAL(ashiftrt_r4_15):
-       shar    r4
-GLOBAL(ashiftrt_r4_14):
-       shar    r4
-GLOBAL(ashiftrt_r4_13):
-       shar    r4
-GLOBAL(ashiftrt_r4_12):
-       shar    r4
-GLOBAL(ashiftrt_r4_11):
-       shar    r4
-GLOBAL(ashiftrt_r4_10):
-       shar    r4
-GLOBAL(ashiftrt_r4_9):
-       shar    r4
-GLOBAL(ashiftrt_r4_8):
-       shar    r4
-GLOBAL(ashiftrt_r4_7):
-       shar    r4
-GLOBAL(ashiftrt_r4_6):
-       shar    r4
-GLOBAL(ashiftrt_r4_5):
-       shar    r4
-GLOBAL(ashiftrt_r4_4):
-       shar    r4
-GLOBAL(ashiftrt_r4_3):
-       shar    r4
-GLOBAL(ashiftrt_r4_2):
-       shar    r4
-GLOBAL(ashiftrt_r4_1):
-       rts
-       shar    r4
-
-GLOBAL(ashiftrt_r4_0):
-       rts
-       nop
-
-       ENDFUNC(GLOBAL(ashiftrt_r4_0))
-       ENDFUNC(GLOBAL(ashiftrt_r4_1))
-       ENDFUNC(GLOBAL(ashiftrt_r4_2))
-       ENDFUNC(GLOBAL(ashiftrt_r4_3))
-       ENDFUNC(GLOBAL(ashiftrt_r4_4))
-       ENDFUNC(GLOBAL(ashiftrt_r4_5))
-       ENDFUNC(GLOBAL(ashiftrt_r4_6))
-       ENDFUNC(GLOBAL(ashiftrt_r4_7))
-       ENDFUNC(GLOBAL(ashiftrt_r4_8))
-       ENDFUNC(GLOBAL(ashiftrt_r4_9))
-       ENDFUNC(GLOBAL(ashiftrt_r4_10))
-       ENDFUNC(GLOBAL(ashiftrt_r4_11))
-       ENDFUNC(GLOBAL(ashiftrt_r4_12))
-       ENDFUNC(GLOBAL(ashiftrt_r4_13))
-       ENDFUNC(GLOBAL(ashiftrt_r4_14))
-       ENDFUNC(GLOBAL(ashiftrt_r4_15))
-       ENDFUNC(GLOBAL(ashiftrt_r4_16))
-       ENDFUNC(GLOBAL(ashiftrt_r4_17))
-       ENDFUNC(GLOBAL(ashiftrt_r4_18))
-       ENDFUNC(GLOBAL(ashiftrt_r4_19))
-       ENDFUNC(GLOBAL(ashiftrt_r4_20))
-       ENDFUNC(GLOBAL(ashiftrt_r4_21))
-       ENDFUNC(GLOBAL(ashiftrt_r4_22))
-       ENDFUNC(GLOBAL(ashiftrt_r4_23))
-       ENDFUNC(GLOBAL(ashiftrt_r4_24))
-       ENDFUNC(GLOBAL(ashiftrt_r4_25))
-       ENDFUNC(GLOBAL(ashiftrt_r4_26))
-       ENDFUNC(GLOBAL(ashiftrt_r4_27))
-       ENDFUNC(GLOBAL(ashiftrt_r4_28))
-       ENDFUNC(GLOBAL(ashiftrt_r4_29))
-       ENDFUNC(GLOBAL(ashiftrt_r4_30))
-       ENDFUNC(GLOBAL(ashiftrt_r4_31))
-       ENDFUNC(GLOBAL(ashiftrt_r4_32))
-#endif
-
-#ifdef L_ashiftrt_n
-
-!
-! GLOBAL(ashrsi3)
-!
-! Entry:
-!
-! r4: Value to shift
-! r5: Shifts
-!
-! Exit:
-!
-! r0: Result
-!
-! Destroys:
-!
-! (none)
-!
-
-       .global GLOBAL(ashrsi3)
-       HIDDEN_FUNC(GLOBAL(ashrsi3))
-       .align  2
-GLOBAL(ashrsi3):
-       mov     #31,r0
-       and     r0,r5
-       mova    LOCAL(ashrsi3_table),r0
-       mov.b   @(r0,r5),r5
-#ifdef __sh1__
-       add     r5,r0
-       jmp     @r0
-#else
-       braf    r5
-#endif
-       mov     r4,r0
-
-       .align  2
-LOCAL(ashrsi3_table):
-       .byte           LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
-       .byte           LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
-
-LOCAL(ashrsi3_31):
-       rotcl   r0
-       rts
-       subc    r0,r0
-
-LOCAL(ashrsi3_30):
-       shar    r0
-LOCAL(ashrsi3_29):
-       shar    r0
-LOCAL(ashrsi3_28):
-       shar    r0
-LOCAL(ashrsi3_27):
-       shar    r0
-LOCAL(ashrsi3_26):
-       shar    r0
-LOCAL(ashrsi3_25):
-       shar    r0
-LOCAL(ashrsi3_24):
-       shlr16  r0
-       shlr8   r0
-       rts
-       exts.b  r0,r0
-
-LOCAL(ashrsi3_23):
-       shar    r0
-LOCAL(ashrsi3_22):
-       shar    r0
-LOCAL(ashrsi3_21):
-       shar    r0
-LOCAL(ashrsi3_20):
-       shar    r0
-LOCAL(ashrsi3_19):
-       shar    r0
-LOCAL(ashrsi3_18):
-       shar    r0
-LOCAL(ashrsi3_17):
-       shar    r0
-LOCAL(ashrsi3_16):
-       shlr16  r0
-       rts
-       exts.w  r0,r0
-
-LOCAL(ashrsi3_15):
-       shar    r0
-LOCAL(ashrsi3_14):
-       shar    r0
-LOCAL(ashrsi3_13):
-       shar    r0
-LOCAL(ashrsi3_12):
-       shar    r0
-LOCAL(ashrsi3_11):
-       shar    r0
-LOCAL(ashrsi3_10):
-       shar    r0
-LOCAL(ashrsi3_9):
-       shar    r0
-LOCAL(ashrsi3_8):
-       shar    r0
-LOCAL(ashrsi3_7):
-       shar    r0
-LOCAL(ashrsi3_6):
-       shar    r0
-LOCAL(ashrsi3_5):
-       shar    r0
-LOCAL(ashrsi3_4):
-       shar    r0
-LOCAL(ashrsi3_3):
-       shar    r0
-LOCAL(ashrsi3_2):
-       shar    r0
-LOCAL(ashrsi3_1):
-       rts
-       shar    r0
-
-LOCAL(ashrsi3_0):
-       rts
-       nop
-
-       ENDFUNC(GLOBAL(ashrsi3))
-#endif
-
-#ifdef L_ashiftlt
-
-!
-! GLOBAL(ashlsi3)
-!
-! Entry:
-!
-! r4: Value to shift
-! r5: Shifts
-!
-! Exit:
-!
-! r0: Result
-!
-! Destroys:
-!
-! (none)
-!
-       .global GLOBAL(ashlsi3)
-       HIDDEN_FUNC(GLOBAL(ashlsi3))
-       .align  2
-GLOBAL(ashlsi3):
-       mov     #31,r0
-       and     r0,r5
-       mova    LOCAL(ashlsi3_table),r0
-       mov.b   @(r0,r5),r5
-#ifdef __sh1__
-       add     r5,r0
-       jmp     @r0
-#else
-       braf    r5
-#endif
-       mov     r4,r0
-
-       .align  2
-LOCAL(ashlsi3_table):
-       .byte           LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
-       .byte           LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
-
-LOCAL(ashlsi3_6):
-       shll2   r0
-LOCAL(ashlsi3_4):
-       shll2   r0
-LOCAL(ashlsi3_2):
-       rts
-       shll2   r0
-
-LOCAL(ashlsi3_7):
-       shll2   r0
-LOCAL(ashlsi3_5):
-       shll2   r0
-LOCAL(ashlsi3_3):
-       shll2   r0
-LOCAL(ashlsi3_1):
-       rts
-       shll    r0
-
-LOCAL(ashlsi3_14):
-       shll2   r0
-LOCAL(ashlsi3_12):
-       shll2   r0
-LOCAL(ashlsi3_10):
-       shll2   r0
-LOCAL(ashlsi3_8):
-       rts
-       shll8   r0
-
-LOCAL(ashlsi3_15):
-       shll2   r0
-LOCAL(ashlsi3_13):
-       shll2   r0
-LOCAL(ashlsi3_11):
-       shll2   r0
-LOCAL(ashlsi3_9):
-       shll8   r0
-       rts
-       shll    r0
-
-LOCAL(ashlsi3_22):
-       shll2   r0
-LOCAL(ashlsi3_20):
-       shll2   r0
-LOCAL(ashlsi3_18):
-       shll2   r0
-LOCAL(ashlsi3_16):
-       rts
-       shll16  r0
-
-LOCAL(ashlsi3_23):
-       shll2   r0
-LOCAL(ashlsi3_21):
-       shll2   r0
-LOCAL(ashlsi3_19):
-       shll2   r0
-LOCAL(ashlsi3_17):
-       shll16  r0
-       rts
-       shll    r0
-
-LOCAL(ashlsi3_30):
-       shll2   r0
-LOCAL(ashlsi3_28):
-       shll2   r0
-LOCAL(ashlsi3_26):
-       shll2   r0
-LOCAL(ashlsi3_24):
-       shll16  r0
-       rts
-       shll8   r0
-
-LOCAL(ashlsi3_31):
-       shll2   r0
-LOCAL(ashlsi3_29):
-       shll2   r0
-LOCAL(ashlsi3_27):
-       shll2   r0
-LOCAL(ashlsi3_25):
-       shll16  r0
-       shll8   r0
-       rts
-       shll    r0
-
-LOCAL(ashlsi3_0):
-       rts
-       nop
-
-       ENDFUNC(GLOBAL(ashlsi3))
-#endif
-
-#ifdef L_lshiftrt
-
-!
-! GLOBAL(lshrsi3)
-!
-! Entry:
-!
-! r4: Value to shift
-! r5: Shifts
-!
-! Exit:
-!
-! r0: Result
-!
-! Destroys:
-!
-! (none)
-!
-       .global GLOBAL(lshrsi3)
-       HIDDEN_FUNC(GLOBAL(lshrsi3))
-       .align  2
-GLOBAL(lshrsi3):
-       mov     #31,r0
-       and     r0,r5
-       mova    LOCAL(lshrsi3_table),r0
-       mov.b   @(r0,r5),r5
-#ifdef __sh1__
-       add     r5,r0
-       jmp     @r0
-#else
-       braf    r5
-#endif
-       mov     r4,r0
-
-       .align  2
-LOCAL(lshrsi3_table):
-       .byte           LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
-       .byte           LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
-
-LOCAL(lshrsi3_6):
-       shlr2   r0
-LOCAL(lshrsi3_4):
-       shlr2   r0
-LOCAL(lshrsi3_2):
-       rts
-       shlr2   r0
-
-LOCAL(lshrsi3_7):
-       shlr2   r0
-LOCAL(lshrsi3_5):
-       shlr2   r0
-LOCAL(lshrsi3_3):
-       shlr2   r0
-LOCAL(lshrsi3_1):
-       rts
-       shlr    r0
-
-LOCAL(lshrsi3_14):
-       shlr2   r0
-LOCAL(lshrsi3_12):
-       shlr2   r0
-LOCAL(lshrsi3_10):
-       shlr2   r0
-LOCAL(lshrsi3_8):
-       rts
-       shlr8   r0
-
-LOCAL(lshrsi3_15):
-       shlr2   r0
-LOCAL(lshrsi3_13):
-       shlr2   r0
-LOCAL(lshrsi3_11):
-       shlr2   r0
-LOCAL(lshrsi3_9):
-       shlr8   r0
-       rts
-       shlr    r0
-
-LOCAL(lshrsi3_22):
-       shlr2   r0
-LOCAL(lshrsi3_20):
-       shlr2   r0
-LOCAL(lshrsi3_18):
-       shlr2   r0
-LOCAL(lshrsi3_16):
-       rts
-       shlr16  r0
-
-LOCAL(lshrsi3_23):
-       shlr2   r0
-LOCAL(lshrsi3_21):
-       shlr2   r0
-LOCAL(lshrsi3_19):
-       shlr2   r0
-LOCAL(lshrsi3_17):
-       shlr16  r0
-       rts
-       shlr    r0
-
-LOCAL(lshrsi3_30):
-       shlr2   r0
-LOCAL(lshrsi3_28):
-       shlr2   r0
-LOCAL(lshrsi3_26):
-       shlr2   r0
-LOCAL(lshrsi3_24):
-       shlr16  r0
-       rts
-       shlr8   r0
-
-LOCAL(lshrsi3_31):
-       shlr2   r0
-LOCAL(lshrsi3_29):
-       shlr2   r0
-LOCAL(lshrsi3_27):
-       shlr2   r0
-LOCAL(lshrsi3_25):
-       shlr16  r0
-       shlr8   r0
-       rts
-       shlr    r0
-
-LOCAL(lshrsi3_0):
-       rts
-       nop
-
-       ENDFUNC(GLOBAL(lshrsi3))
-#endif
-
-#ifdef L_movmem
-       .text
-       .balign 4
-       .global GLOBAL(movmem)
-       HIDDEN_FUNC(GLOBAL(movmem))
-       HIDDEN_ALIAS(movstr,movmem)
-       /* This would be a lot simpler if r6 contained the byte count
-          minus 64, and we wouldn't be called here for a byte count of 64.  */
-GLOBAL(movmem):
-       sts.l   pr,@-r15
-       shll2   r6
-       bsr     GLOBAL(movmemSI52+2)
-       mov.l   @(48,r5),r0
-       .balign 4
-LOCAL(movmem_loop): /* Reached with rts */
-       mov.l   @(60,r5),r0
-       add     #-64,r6
-       mov.l   r0,@(60,r4)
-       tst     r6,r6
-       mov.l   @(56,r5),r0
-       bt      LOCAL(movmem_done)
-       mov.l   r0,@(56,r4)
-       cmp/pl  r6
-       mov.l   @(52,r5),r0
-       add     #64,r5
-       mov.l   r0,@(52,r4)
-       add     #64,r4
-       bt      GLOBAL(movmemSI52)
-! done all the large groups, do the remainder
-! jump to movmem+
-       mova    GLOBAL(movmemSI4)+4,r0
-       add     r6,r0
-       jmp     @r0
-LOCAL(movmem_done): ! share slot insn, works out aligned.
-       lds.l   @r15+,pr
-       mov.l   r0,@(56,r4)
-       mov.l   @(52,r5),r0
-       rts
-       mov.l   r0,@(52,r4)
-       .balign 4
-! ??? We need aliases movstr* for movmem* for the older libraries.  These
-! aliases will be removed at the some point in the future.
-       .global GLOBAL(movmemSI64)
-       HIDDEN_FUNC(GLOBAL(movmemSI64))
-       HIDDEN_ALIAS(movstrSI64,movmemSI64)
-GLOBAL(movmemSI64):
-       mov.l   @(60,r5),r0
-       mov.l   r0,@(60,r4)
-       .global GLOBAL(movmemSI60)
-       HIDDEN_FUNC(GLOBAL(movmemSI60))
-       HIDDEN_ALIAS(movstrSI60,movmemSI60)
-GLOBAL(movmemSI60):
-       mov.l   @(56,r5),r0
-       mov.l   r0,@(56,r4)
-       .global GLOBAL(movmemSI56)
-       HIDDEN_FUNC(GLOBAL(movmemSI56))
-       HIDDEN_ALIAS(movstrSI56,movmemSI56)
-GLOBAL(movmemSI56):
-       mov.l   @(52,r5),r0
-       mov.l   r0,@(52,r4)
-       .global GLOBAL(movmemSI52)
-       HIDDEN_FUNC(GLOBAL(movmemSI52))
-       HIDDEN_ALIAS(movstrSI52,movmemSI52)
-GLOBAL(movmemSI52):
-       mov.l   @(48,r5),r0
-       mov.l   r0,@(48,r4)
-       .global GLOBAL(movmemSI48)
-       HIDDEN_FUNC(GLOBAL(movmemSI48))
-       HIDDEN_ALIAS(movstrSI48,movmemSI48)
-GLOBAL(movmemSI48):
-       mov.l   @(44,r5),r0
-       mov.l   r0,@(44,r4)
-       .global GLOBAL(movmemSI44)
-       HIDDEN_FUNC(GLOBAL(movmemSI44))
-       HIDDEN_ALIAS(movstrSI44,movmemSI44)
-GLOBAL(movmemSI44):
-       mov.l   @(40,r5),r0
-       mov.l   r0,@(40,r4)
-       .global GLOBAL(movmemSI40)
-       HIDDEN_FUNC(GLOBAL(movmemSI40))
-       HIDDEN_ALIAS(movstrSI40,movmemSI40)
-GLOBAL(movmemSI40):
-       mov.l   @(36,r5),r0
-       mov.l   r0,@(36,r4)
-       .global GLOBAL(movmemSI36)
-       HIDDEN_FUNC(GLOBAL(movmemSI36))
-       HIDDEN_ALIAS(movstrSI36,movmemSI36)
-GLOBAL(movmemSI36):
-       mov.l   @(32,r5),r0
-       mov.l   r0,@(32,r4)
-       .global GLOBAL(movmemSI32)
-       HIDDEN_FUNC(GLOBAL(movmemSI32))
-       HIDDEN_ALIAS(movstrSI32,movmemSI32)
-GLOBAL(movmemSI32):
-       mov.l   @(28,r5),r0
-       mov.l   r0,@(28,r4)
-       .global GLOBAL(movmemSI28)
-       HIDDEN_FUNC(GLOBAL(movmemSI28))
-       HIDDEN_ALIAS(movstrSI28,movmemSI28)
-GLOBAL(movmemSI28):
-       mov.l   @(24,r5),r0
-       mov.l   r0,@(24,r4)
-       .global GLOBAL(movmemSI24)
-       HIDDEN_FUNC(GLOBAL(movmemSI24))
-       HIDDEN_ALIAS(movstrSI24,movmemSI24)
-GLOBAL(movmemSI24):
-       mov.l   @(20,r5),r0
-       mov.l   r0,@(20,r4)
-       .global GLOBAL(movmemSI20)
-       HIDDEN_FUNC(GLOBAL(movmemSI20))
-       HIDDEN_ALIAS(movstrSI20,movmemSI20)
-GLOBAL(movmemSI20):
-       mov.l   @(16,r5),r0
-       mov.l   r0,@(16,r4)
-       .global GLOBAL(movmemSI16)
-       HIDDEN_FUNC(GLOBAL(movmemSI16))
-       HIDDEN_ALIAS(movstrSI16,movmemSI16)
-GLOBAL(movmemSI16):
-       mov.l   @(12,r5),r0
-       mov.l   r0,@(12,r4)
-       .global GLOBAL(movmemSI12)
-       HIDDEN_FUNC(GLOBAL(movmemSI12))
-       HIDDEN_ALIAS(movstrSI12,movmemSI12)
-GLOBAL(movmemSI12):
-       mov.l   @(8,r5),r0
-       mov.l   r0,@(8,r4)
-       .global GLOBAL(movmemSI8)
-       HIDDEN_FUNC(GLOBAL(movmemSI8))
-       HIDDEN_ALIAS(movstrSI8,movmemSI8)
-GLOBAL(movmemSI8):
-       mov.l   @(4,r5),r0
-       mov.l   r0,@(4,r4)
-       .global GLOBAL(movmemSI4)
-       HIDDEN_FUNC(GLOBAL(movmemSI4))
-       HIDDEN_ALIAS(movstrSI4,movmemSI4)
-GLOBAL(movmemSI4):
-       mov.l   @(0,r5),r0
-       rts
-       mov.l   r0,@(0,r4)
-
-       ENDFUNC(GLOBAL(movmemSI64))
-       ENDFUNC(GLOBAL(movmemSI60))
-       ENDFUNC(GLOBAL(movmemSI56))
-       ENDFUNC(GLOBAL(movmemSI52))
-       ENDFUNC(GLOBAL(movmemSI48))
-       ENDFUNC(GLOBAL(movmemSI44))
-       ENDFUNC(GLOBAL(movmemSI40))
-       ENDFUNC(GLOBAL(movmemSI36))
-       ENDFUNC(GLOBAL(movmemSI32))
-       ENDFUNC(GLOBAL(movmemSI28))
-       ENDFUNC(GLOBAL(movmemSI24))
-       ENDFUNC(GLOBAL(movmemSI20))
-       ENDFUNC(GLOBAL(movmemSI16))
-       ENDFUNC(GLOBAL(movmemSI12))
-       ENDFUNC(GLOBAL(movmemSI8))
-       ENDFUNC(GLOBAL(movmemSI4))
-       ENDFUNC(GLOBAL(movmem))
-#endif
-
-#ifdef L_movmem_i4
-       .text
-       .global GLOBAL(movmem_i4_even)
-       .global GLOBAL(movmem_i4_odd)
-       .global GLOBAL(movmemSI12_i4)
-
-       HIDDEN_FUNC(GLOBAL(movmem_i4_even))
-       HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
-       HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
-
-       HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
-       HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
-       HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
-
-       .p2align        5
-L_movmem_2mod4_end:
-       mov.l   r0,@(16,r4)
-       rts
-       mov.l   r1,@(20,r4)
-
-       .p2align        2
-
-GLOBAL(movmem_i4_even):
-       mov.l   @r5+,r0
-       bra     L_movmem_start_even
-       mov.l   @r5+,r1
-
-GLOBAL(movmem_i4_odd):
-       mov.l   @r5+,r1
-       add     #-4,r4
-       mov.l   @r5+,r2
-       mov.l   @r5+,r3
-       mov.l   r1,@(4,r4)
-       mov.l   r2,@(8,r4)
-
-L_movmem_loop:
-       mov.l   r3,@(12,r4)
-       dt      r6
-       mov.l   @r5+,r0
-       bt/s    L_movmem_2mod4_end
-       mov.l   @r5+,r1
-       add     #16,r4
-L_movmem_start_even:
-       mov.l   @r5+,r2
-       mov.l   @r5+,r3
-       mov.l   r0,@r4
-       dt      r6
-       mov.l   r1,@(4,r4)
-       bf/s    L_movmem_loop
-       mov.l   r2,@(8,r4)
-       rts
-       mov.l   r3,@(12,r4)
-
-       ENDFUNC(GLOBAL(movmem_i4_even))
-       ENDFUNC(GLOBAL(movmem_i4_odd))
-
-       .p2align        4
-GLOBAL(movmemSI12_i4):
-       mov.l   @r5,r0
-       mov.l   @(4,r5),r1
-       mov.l   @(8,r5),r2
-       mov.l   r0,@r4
-       mov.l   r1,@(4,r4)
-       rts
-       mov.l   r2,@(8,r4)
-
-       ENDFUNC(GLOBAL(movmemSI12_i4))
-#endif
-
-#ifdef L_mulsi3
-
-
-       .global GLOBAL(mulsi3)
-       HIDDEN_FUNC(GLOBAL(mulsi3))
-
-! r4 =       aabb
-! r5 =       ccdd
-! r0 = aabb*ccdd  via partial products
-!
-! if aa == 0 and cc = 0
-! r0 = bb*dd
-!
-! else
-! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
-!
-
-GLOBAL(mulsi3):
-       mulu.w  r4,r5           ! multiply the lsws  macl=bb*dd
-       mov     r5,r3           ! r3 = ccdd
-       swap.w  r4,r2           ! r2 = bbaa
-       xtrct   r2,r3           ! r3 = aacc
-       tst     r3,r3           ! msws zero ?
-       bf      hiset
-       rts                     ! yes - then we have the answer
-       sts     macl,r0
-
-hiset: sts     macl,r0         ! r0 = bb*dd
-       mulu.w  r2,r5           ! brewing macl = aa*dd
-       sts     macl,r1
-       mulu.w  r3,r4           ! brewing macl = cc*bb
-       sts     macl,r2
-       add     r1,r2
-       shll16  r2
-       rts
-       add     r2,r0
-
-       ENDFUNC(GLOBAL(mulsi3))
-#endif
-#endif /* ! __SH5__ */
-#ifdef L_sdivsi3_i4
-       .title "SH DIVIDE"
-!! 4 byte integer Divide code for the Renesas SH
-#ifdef __SH4__
-!! args in r4 and r5, result in fpul, clobber dr0, dr2
-
-       .global GLOBAL(sdivsi3_i4)
-       HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
-GLOBAL(sdivsi3_i4):
-       lds r4,fpul
-       float fpul,dr0
-       lds r5,fpul
-       float fpul,dr2
-       fdiv dr2,dr0
-       rts
-       ftrc dr0,fpul
-
-       ENDFUNC(GLOBAL(sdivsi3_i4))
-#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
-!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
-
-#if ! __SH5__ || __SH5__ == 32
-#if __SH5__
-       .mode   SHcompact
-#endif
-       .global GLOBAL(sdivsi3_i4)
-       HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
-GLOBAL(sdivsi3_i4):
-       sts.l fpscr,@-r15
-       mov #8,r2
-       swap.w r2,r2
-       lds r2,fpscr
-       lds r4,fpul
-       float fpul,dr0
-       lds r5,fpul
-       float fpul,dr2
-       fdiv dr2,dr0
-       ftrc dr0,fpul
-       rts
-       lds.l @r15+,fpscr
-
-       ENDFUNC(GLOBAL(sdivsi3_i4))
-#endif /* ! __SH5__ || __SH5__ == 32 */
-#endif /* ! __SH4__ */
-#endif
-
-#ifdef L_sdivsi3
-/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
-   sh2e/sh3e code.  */
-#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
-!!
-!! Steve Chamberlain
-!! sac@cygnus.com
-!!
-!!
-
-!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
-
-       .global GLOBAL(sdivsi3)
-#if __SHMEDIA__
-#if __SH5__ == 32
-       .section        .text..SHmedia32,"ax"
-#else
-       .text
-#endif
-       .align  2
-#if 0
-/* The assembly code that follows is a hand-optimized version of the C
-   code that follows.  Note that the registers that are modified are
-   exactly those listed as clobbered in the patterns divsi3_i1 and
-   divsi3_i1_media.
-       
-int __sdivsi3 (i, j)
-     int i, j;
-{
-  register unsigned long long r18 asm ("r18");
-  register unsigned long long r19 asm ("r19");
-  register unsigned long long r0 asm ("r0") = 0;
-  register unsigned long long r1 asm ("r1") = 1;
-  register int r2 asm ("r2") = i >> 31;
-  register int r3 asm ("r3") = j >> 31;
-
-  r2 = r2 ? r2 : r1;
-  r3 = r3 ? r3 : r1;
-  r18 = i * r2;
-  r19 = j * r3;
-  r2 *= r3;
-  
-  r19 <<= 31;
-  r1 <<= 31;
-  do
-    if (r18 >= r19)
-      r0 |= r1, r18 -= r19;
-  while (r19 >>= 1, r1 >>= 1);
-
-  return r2 * (int)r0;
-}
-*/
-GLOBAL(sdivsi3):
-       pt/l    LOCAL(sdivsi3_dontadd), tr2
-       pt/l    LOCAL(sdivsi3_loop), tr1
-       ptabs/l r18, tr0
-       movi    0, r0
-       movi    1, r1
-       shari.l r4, 31, r2
-       shari.l r5, 31, r3
-       cmveq   r2, r1, r2
-       cmveq   r3, r1, r3
-       muls.l  r4, r2, r18
-       muls.l  r5, r3, r19
-       muls.l  r2, r3, r2
-       shlli   r19, 31, r19
-       shlli   r1, 31, r1
-LOCAL(sdivsi3_loop):
-       bgtu    r19, r18, tr2
-       or      r0, r1, r0
-       sub     r18, r19, r18
-LOCAL(sdivsi3_dontadd):
-       shlri   r1, 1, r1
-       shlri   r19, 1, r19
-       bnei    r1, 0, tr1
-       muls.l  r0, r2, r0
-       add.l   r0, r63, r0
-       blink   tr0, r63
-#elif 0 /* ! 0 */
- // inputs: r4,r5
- // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
- // result in r0
-GLOBAL(sdivsi3):
- // can create absolute value without extra latency,
- // but dependent on proper sign extension of inputs:
- // shari.l r5,31,r2
- // xor r5,r2,r20
- // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
- shari.l r5,31,r2
- ori r2,1,r2
- muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
- movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
- shari.l r4,31,r3
- nsb r20,r0
- shlld r20,r0,r25
- shlri r25,48,r25
- sub r19,r25,r1
- mmulfx.w r1,r1,r2
- mshflo.w r1,r63,r1
- // If r4 was to be used in-place instead of r21, could use this sequence
- // to compute absolute:
- // sub r63,r4,r19 // compute absolute value of r4
- // shlri r4,32,r3 // into lower 32 bit of r4, keeping
- // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
- ori r3,1,r3
- mmulfx.w r25,r2,r2
- sub r19,r0,r0
- muls.l r4,r3,r21
- msub.w r1,r2,r2
- addi r2,-2,r1
- mulu.l r21,r1,r19
- mmulfx.w r2,r2,r2
- shlli r1,15,r1
- shlrd r19,r0,r19
- mulu.l r19,r20,r3
- mmacnfx.wl r25,r2,r1
- ptabs r18,tr0
- sub r21,r3,r25
-
- mulu.l r25,r1,r2
- addi r0,14,r0
- xor r4,r5,r18
- shlrd r2,r0,r2
- mulu.l r2,r20,r3
- add r19,r2,r19
- shari.l r18,31,r18
- sub r25,r3,r25
-
- mulu.l r25,r1,r2
- sub r25,r20,r25
- add r19,r18,r19
- shlrd r2,r0,r2
- mulu.l r2,r20,r3
- addi r25,1,r25
- add r19,r2,r19
-
- cmpgt r25,r3,r25
- add.l r19,r25,r0
- xor r0,r18,r0
- blink tr0,r63
-#else /* ! 0 && ! 0 */
-
- // inputs: r4,r5
- // clobbered: r1,r18,r19,r20,r21,r25,tr0
- // result in r0
-       HIDDEN_FUNC(GLOBAL(sdivsi3_2))
-#ifndef __pic__
-       FUNC(GLOBAL(sdivsi3))
-GLOBAL(sdivsi3): /* this is the shcompact entry point */
- // The special SHmedia entry point sdivsi3_1 prevents accidental linking
- // with the SHcompact implementation, which clobbers tr1 / tr2.
- .global GLOBAL(sdivsi3_1)
-GLOBAL(sdivsi3_1):
- .global GLOBAL(div_table_internal)
- movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
- shori GLOBAL(div_table_internal) & 65535, r20
-#endif
- .global GLOBAL(sdivsi3_2)
- // div_table in r20
- // clobbered: r1,r18,r19,r21,r25,tr0
-GLOBAL(sdivsi3_2):
- nsb r5, r1
- shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
- shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
- ldx.ub r20, r21, r19 // u0.8
- shari r25, 32, r25   // normalize to s2.30
- shlli r21, 1, r21
- muls.l r25, r19, r19 // s2.38
- ldx.w r20, r21, r21  // s2.14
-  ptabs r18, tr0
- shari r19, 24, r19   // truncate to s2.14
- sub r21, r19, r19    // some 11 bit inverse in s1.14
- muls.l r19, r19, r21 // u0.28
-  sub r63, r1, r1
-  addi r1, 92, r1
- muls.l r25, r21, r18 // s2.58
- shlli r19, 45, r19   // multiply by two and convert to s2.58
-  /* bubble */
- sub r19, r18, r18
- shari r18, 28, r18   // some 22 bit inverse in s1.30
- muls.l r18, r25, r0  // s2.60
-  muls.l r18, r4, r25 // s32.30
-  /* bubble */
- shari r0, 16, r19   // s-16.44
- muls.l r19, r18, r19 // s-16.74
-  shari r25, 63, r0
-  shari r4, 14, r18   // s19.-14
- shari r19, 30, r19   // s-16.44
- muls.l r19, r18, r19 // s15.30
-  xor r21, r0, r21    // You could also use the constant 1 << 27.
-  add r21, r25, r21
- sub r21, r19, r21
- shard r21, r1, r21
- sub r21, r0, r0
- blink tr0, r63
-#ifndef __pic__
-       ENDFUNC(GLOBAL(sdivsi3))
-#endif
-       ENDFUNC(GLOBAL(sdivsi3_2))
-#endif
-#elif defined __SHMEDIA__
-/* m5compact-nofpu */
- // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
-       .mode   SHmedia
-       .section        .text..SHmedia32,"ax"
-       .align  2
-       FUNC(GLOBAL(sdivsi3))
-GLOBAL(sdivsi3):
-       pt/l LOCAL(sdivsi3_dontsub), tr0
-       pt/l LOCAL(sdivsi3_loop), tr1
-       ptabs/l r18,tr2
-       shari.l r4,31,r18
-       shari.l r5,31,r19
-       xor r4,r18,r20
-       xor r5,r19,r21
-       sub.l r20,r18,r20
-       sub.l r21,r19,r21
-       xor r18,r19,r19
-       shlli r21,32,r25
-       addi r25,-1,r21
-       addz.l r20,r63,r20
-LOCAL(sdivsi3_loop):
-       shlli r20,1,r20
-       bgeu/u r21,r20,tr0
-       sub r20,r21,r20
-LOCAL(sdivsi3_dontsub):
-       addi.l r25,-1,r25
-       bnei r25,-32,tr1
-       xor r20,r19,r20
-       sub.l r20,r19,r0
-       blink tr2,r63
-       ENDFUNC(GLOBAL(sdivsi3))
-#else /* ! __SHMEDIA__ */
-       FUNC(GLOBAL(sdivsi3))
-GLOBAL(sdivsi3):
-       mov     r4,r1
-       mov     r5,r0
-
-       tst     r0,r0
-       bt      div0
-       mov     #0,r2
-       div0s   r2,r1
-       subc    r3,r3
-       subc    r2,r1
-       div0s   r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       div1    r0,r3
-       rotcl   r1
-       addc    r2,r1
-       rts
-       mov     r1,r0
-
-
-div0:  rts
-       mov     #0,r0
-
-       ENDFUNC(GLOBAL(sdivsi3))
-#endif /* ! __SHMEDIA__ */
-#endif /* ! __SH4__ */
-#endif
-#ifdef L_udivsi3_i4
-
-       .title "SH DIVIDE"
-!! 4 byte integer Divide code for the Renesas SH
-#ifdef __SH4__
-!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
-!! and t bit
-
-       .global GLOBAL(udivsi3_i4)
-       HIDDEN_FUNC(GLOBAL(udivsi3_i4))
-GLOBAL(udivsi3_i4):
-       mov #1,r1
-       cmp/hi r1,r5
-       bf trivial
-       rotr r1
-       xor r1,r4
-       lds r4,fpul
-       mova L1,r0
-#ifdef FMOVD_WORKS
-       fmov.d @r0+,dr4
-#else
-       fmov.s @r0+,DR40
-       fmov.s @r0,DR41
-#endif
-       float fpul,dr0
-       xor r1,r5
-       lds r5,fpul
-       float fpul,dr2
-       fadd dr4,dr0
-       fadd dr4,dr2
-       fdiv dr2,dr0
-       rts
-       ftrc dr0,fpul
-
-trivial:
-       rts
-       lds r4,fpul
-
-       .align 2
-#ifdef FMOVD_WORKS
-       .align 3        ! make double below 8 byte aligned.
-#endif
-L1:
-       .double 2147483648
-
-       ENDFUNC(GLOBAL(udivsi3_i4))
-#elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
-#if ! __SH5__ || __SH5__ == 32
-!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
-       .mode   SHmedia
-       .global GLOBAL(udivsi3_i4)
-       HIDDEN_FUNC(GLOBAL(udivsi3_i4))
-GLOBAL(udivsi3_i4):
-       addz.l  r4,r63,r20
-       addz.l  r5,r63,r21
-       fmov.qd r20,dr0
-       fmov.qd r21,dr32
-       ptabs   r18,tr0
-       float.qd dr0,dr0
-       float.qd dr32,dr32
-       fdiv.d  dr0,dr32,dr0
-       ftrc.dq dr0,dr32
-       fmov.s fr33,fr32
-       blink tr0,r63
-
-       ENDFUNC(GLOBAL(udivsi3_i4))
-#endif /* ! __SH5__ || __SH5__ == 32 */
-#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
-!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
-
-       .global GLOBAL(udivsi3_i4)
-       HIDDEN_FUNC(GLOBAL(udivsi3_i4))
-GLOBAL(udivsi3_i4):
-       mov #1,r1
-       cmp/hi r1,r5
-       bf trivial
-       sts.l fpscr,@-r15
-       mova L1,r0
-       lds.l @r0+,fpscr
-       rotr r1
-       xor r1,r4
-       lds r4,fpul
-#ifdef FMOVD_WORKS
-       fmov.d @r0+,dr4
-#else
-       fmov.s @r0+,DR40
-       fmov.s @r0,DR41
-#endif
-       float fpul,dr0
-       xor r1,r5
-       lds r5,fpul
-       float fpul,dr2
-       fadd dr4,dr0
-       fadd dr4,dr2
-       fdiv dr2,dr0
-       ftrc dr0,fpul
-       rts
-       lds.l @r15+,fpscr
-
-#ifdef FMOVD_WORKS
-       .align 3        ! make double below 8 byte aligned.
-#endif
-trivial:
-       rts
-       lds r4,fpul
-
-       .align 2
-L1:
-#ifndef FMOVD_WORKS
-       .long 0x80000
-#else
-       .long 0x180000
-#endif
-       .double 2147483648
-
-       ENDFUNC(GLOBAL(udivsi3_i4))
-#endif /* ! __SH4__ */
-#endif
-
-#ifdef L_udivsi3
-/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
-   sh2e/sh3e code.  */
-#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
-
-!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
-       .global GLOBAL(udivsi3)
-       HIDDEN_FUNC(GLOBAL(udivsi3))
-
-#if __SHMEDIA__
-#if __SH5__ == 32
-       .section        .text..SHmedia32,"ax"
-#else
-       .text
-#endif
-       .align  2
-#if 0
-/* The assembly code that follows is a hand-optimized version of the C
-   code that follows.  Note that the registers that are modified are
-   exactly those listed as clobbered in the patterns udivsi3_i1 and
-   udivsi3_i1_media.
-       
-unsigned 
-__udivsi3 (i, j)
-    unsigned i, j; 
-{
-  register unsigned long long r0 asm ("r0") = 0;
-  register unsigned long long r18 asm ("r18") = 1;
-  register unsigned long long r4 asm ("r4") = i;
-  register unsigned long long r19 asm ("r19") = j;
-
-  r19 <<= 31;
-  r18 <<= 31;
-  do
-    if (r4 >= r19)
-      r0 |= r18, r4 -= r19;
-  while (r19 >>= 1, r18 >>= 1);
-
-  return r0;
-}
-*/
-GLOBAL(udivsi3):
-       pt/l    LOCAL(udivsi3_dontadd), tr2
-       pt/l    LOCAL(udivsi3_loop), tr1
-       ptabs/l r18, tr0
-       movi    0, r0
-       movi    1, r18
-       addz.l  r5, r63, r19
-       addz.l  r4, r63, r4
-       shlli   r19, 31, r19
-       shlli   r18, 31, r18
-LOCAL(udivsi3_loop):
-       bgtu    r19, r4, tr2
-       or      r0, r18, r0
-       sub     r4, r19, r4
-LOCAL(udivsi3_dontadd):
-       shlri   r18, 1, r18
-       shlri   r19, 1, r19
-       bnei    r18, 0, tr1
-       blink   tr0, r63
-#else
-GLOBAL(udivsi3):
- // inputs: r4,r5
- // clobbered: r18,r19,r20,r21,r22,r25,tr0
- // result in r0.
- addz.l r5,r63,r22
- nsb r22,r0
- shlld r22,r0,r25
- shlri r25,48,r25
- movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
- sub r20,r25,r21
- mmulfx.w r21,r21,r19
- mshflo.w r21,r63,r21
- ptabs r18,tr0
- mmulfx.w r25,r19,r19
- sub r20,r0,r0
- /* bubble */
- msub.w r21,r19,r19
- addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
-                   before the msub.w, but we need a different value for
-                   r19 to keep errors under control.  */
- mulu.l r4,r21,r18
- mmulfx.w r19,r19,r19
- shlli r21,15,r21
- shlrd r18,r0,r18
- mulu.l r18,r22,r20
- mmacnfx.wl r25,r19,r21
- /* bubble */
- sub r4,r20,r25
-
- mulu.l r25,r21,r19
- addi r0,14,r0
- /* bubble */
- shlrd r19,r0,r19
- mulu.l r19,r22,r20
- add r18,r19,r18
- /* bubble */
- sub.l r25,r20,r25
-
- mulu.l r25,r21,r19
- addz.l r25,r63,r25
- sub r25,r22,r25
- shlrd r19,r0,r19
- mulu.l r19,r22,r20
- addi r25,1,r25
- add r18,r19,r18
-
- cmpgt r25,r20,r25
- add.l r18,r25,r0
- blink tr0,r63
-#endif
-#elif defined (__SHMEDIA__)
-/* m5compact-nofpu - more emphasis on code size than on speed, but don't
-   ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
-   So use a short shmedia loop.  */
- // clobbered: r20,r21,r25,tr0,tr1,tr2
-       .mode   SHmedia
-       .section        .text..SHmedia32,"ax"
-       .align  2
-GLOBAL(udivsi3):
- pt/l LOCAL(udivsi3_dontsub), tr0
- pt/l LOCAL(udivsi3_loop), tr1
- ptabs/l r18,tr2
- shlli r5,32,r25
- addi r25,-1,r21
- addz.l r4,r63,r20
-LOCAL(udivsi3_loop):
- shlli r20,1,r20
- bgeu/u r21,r20,tr0
- sub r20,r21,r20
-LOCAL(udivsi3_dontsub):
- addi.l r25,-1,r25
- bnei r25,-32,tr1
- add.l r20,r63,r0
- blink tr2,r63
-#else /* ! defined (__SHMEDIA__) */
-LOCAL(div8):
- div1 r5,r4
-LOCAL(div7):
- div1 r5,r4; div1 r5,r4; div1 r5,r4
- div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
-
-LOCAL(divx4):
- div1 r5,r4; rotcl r0
- div1 r5,r4; rotcl r0
- div1 r5,r4; rotcl r0
- rts; div1 r5,r4
-
-GLOBAL(udivsi3):
- sts.l pr,@-r15
- extu.w r5,r0
- cmp/eq r5,r0
-#ifdef __sh1__
- bf LOCAL(large_divisor)
-#else
- bf/s LOCAL(large_divisor)
-#endif
- div0u
- swap.w r4,r0
- shlr16 r4
- bsr LOCAL(div8)
- shll16 r5
- bsr LOCAL(div7)
- div1 r5,r4
- xtrct r4,r0
- xtrct r0,r4
- bsr LOCAL(div8)
- swap.w r4,r4
- bsr LOCAL(div7)
- div1 r5,r4
- lds.l @r15+,pr
- xtrct r4,r0
- swap.w r0,r0
- rotcl r0
- rts
- shlr16 r5
-
-LOCAL(large_divisor):
-#ifdef __sh1__
- div0u
-#endif
- mov #0,r0
- xtrct r4,r0
- xtrct r0,r4
- bsr LOCAL(divx4)
- rotcl r0
- bsr LOCAL(divx4)
- rotcl r0
- bsr LOCAL(divx4)
- rotcl r0
- bsr LOCAL(divx4)
- rotcl r0
- lds.l @r15+,pr
- rts
- rotcl r0
-
-       ENDFUNC(GLOBAL(udivsi3))
-#endif /* ! __SHMEDIA__ */
-#endif /* __SH4__ */
-#endif /* L_udivsi3 */
-
-#ifdef L_udivdi3
-#ifdef __SHMEDIA__
-       .mode   SHmedia
-       .section        .text..SHmedia32,"ax"
-       .align  2
-       .global GLOBAL(udivdi3)
-       FUNC(GLOBAL(udivdi3))
-GLOBAL(udivdi3):
-       HIDDEN_ALIAS(udivdi3_internal,udivdi3)
-       shlri r3,1,r4
-       nsb r4,r22
-       shlld r3,r22,r6
-       shlri r6,49,r5
-       movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
-       sub r21,r5,r1
-       mmulfx.w r1,r1,r4
-       mshflo.w r1,r63,r1
-       sub r63,r22,r20 // r63 == 64 % 64
-       mmulfx.w r5,r4,r4
-       pta LOCAL(large_divisor),tr0
-       addi r20,32,r9
-       msub.w r1,r4,r1
-       madd.w r1,r1,r1
-       mmulfx.w r1,r1,r4
-       shlri r6,32,r7
-       bgt/u r9,r63,tr0 // large_divisor
-       mmulfx.w r5,r4,r4
-       shlri r2,32+14,r19
-       addi r22,-31,r0
-       msub.w r1,r4,r1
-
-       mulu.l r1,r7,r4
-       addi r1,-3,r5
-       mulu.l r5,r19,r5
-       sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
-       shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
-                        the case may be, %0000000000000000 000.11111111111, still */
-       muls.l r1,r4,r4 /* leaving at least one sign bit.  */
-       mulu.l r5,r3,r8
-       mshalds.l r1,r21,r1
-       shari r4,26,r4
-       shlld r8,r0,r8
-       add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
-       sub r2,r8,r2
-       /* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
-
-       shlri r2,22,r21
-       mulu.l r21,r1,r21
-       shlld r5,r0,r8
-       addi r20,30-22,r0
-       shlrd r21,r0,r21
-       mulu.l r21,r3,r5
-       add r8,r21,r8
-       mcmpgt.l r21,r63,r21 // See Note 1
-       addi r20,30,r0
-       mshfhi.l r63,r21,r21
-       sub r2,r5,r2
-       andc r2,r21,r2
-
-       /* small divisor: need a third divide step */
-       mulu.l r2,r1,r7
-       ptabs r18,tr0
-       addi r2,1,r2
-       shlrd r7,r0,r7
-       mulu.l r7,r3,r5
-       add r8,r7,r8
-       sub r2,r3,r2
-       cmpgt r2,r5,r5
-       add r8,r5,r2
-       /* could test r3 here to check for divide by zero.  */
-       blink tr0,r63
-
-LOCAL(large_divisor):
-       mmulfx.w r5,r4,r4
-       shlrd r2,r9,r25
-       shlri r25,32,r8
-       msub.w r1,r4,r1
-
-       mulu.l r1,r7,r4
-       addi r1,-3,r5
-       mulu.l r5,r8,r5
-       sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
-       shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
-                        the case may be, %0000000000000000 000.11111111111, still */
-       muls.l r1,r4,r4 /* leaving at least one sign bit.  */
-       shlri r5,14-1,r8
-       mulu.l r8,r7,r5
-       mshalds.l r1,r21,r1
-       shari r4,26,r4
-       add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
-       sub r25,r5,r25
-       /* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
-
-       shlri r25,22,r21
-       mulu.l r21,r1,r21
-       pta LOCAL(no_lo_adj),tr0
-       addi r22,32,r0
-       shlri r21,40,r21
-       mulu.l r21,r7,r5
-       add r8,r21,r8
-       shlld r2,r0,r2
-       sub r25,r5,r25
-       bgtu/u r7,r25,tr0 // no_lo_adj
-       addi r8,1,r8
-       sub r25,r7,r25
-LOCAL(no_lo_adj):
-       mextr4 r2,r25,r2
-
-       /* large_divisor: only needs a few adjustments.  */
-       mulu.l r8,r6,r5
-       ptabs r18,tr0
-       /* bubble */
-       cmpgtu r5,r2,r5
-       sub r8,r5,r2
-       blink tr0,r63
-       ENDFUNC(GLOBAL(udivdi3))
-/* Note 1: To shift the result of the second divide stage so that the result
-   always fits into 32 bits, yet we still reduce the rest sufficiently
-   would require a lot of instructions to do the shifts just right.  Using
-   the full 64 bit shift result to multiply with the divisor would require
-   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
-   Fortunately, if the upper 32 bits of the shift result are nonzero, we
-   know that the rest after taking this partial result into account will
-   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
-   upper 32 bits of the partial result are nonzero.  */
-#endif /* __SHMEDIA__ */
-#endif /* L_udivdi3 */
-
-#ifdef L_divdi3
-#ifdef __SHMEDIA__
-       .mode   SHmedia
-       .section        .text..SHmedia32,"ax"
-       .align  2
-       .global GLOBAL(divdi3)
-       FUNC(GLOBAL(divdi3))
-GLOBAL(divdi3):
-       pta GLOBAL(udivdi3_internal),tr0
-       shari r2,63,r22
-       shari r3,63,r23
-       xor r2,r22,r2
-       xor r3,r23,r3
-       sub r2,r22,r2
-       sub r3,r23,r3
-       beq/u r22,r23,tr0
-       ptabs r18,tr1
-       blink tr0,r18
-       sub r63,r2,r2
-       blink tr1,r63
-       ENDFUNC(GLOBAL(divdi3))
-#endif /* __SHMEDIA__ */
-#endif /* L_divdi3 */
-
-#ifdef L_umoddi3
-#ifdef __SHMEDIA__
-       .mode   SHmedia
-       .section        .text..SHmedia32,"ax"
-       .align  2
-       .global GLOBAL(umoddi3)
-       FUNC(GLOBAL(umoddi3))
-GLOBAL(umoddi3):
-       HIDDEN_ALIAS(umoddi3_internal,umoddi3)
-       shlri r3,1,r4
-       nsb r4,r22
-       shlld r3,r22,r6
-       shlri r6,49,r5
-       movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
-       sub r21,r5,r1
-       mmulfx.w r1,r1,r4
-       mshflo.w r1,r63,r1
-       sub r63,r22,r20 // r63 == 64 % 64
-       mmulfx.w r5,r4,r4
-       pta LOCAL(large_divisor),tr0
-       addi r20,32,r9
-       msub.w r1,r4,r1
-       madd.w r1,r1,r1
-       mmulfx.w r1,r1,r4
-       shlri r6,32,r7
-       bgt/u r9,r63,tr0 // large_divisor
-       mmulfx.w r5,r4,r4
-       shlri r2,32+14,r19
-       addi r22,-31,r0
-       msub.w r1,r4,r1
-
-       mulu.l r1,r7,r4
-       addi r1,-3,r5
-       mulu.l r5,r19,r5
-       sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
-       shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
-                        the case may be, %0000000000000000 000.11111111111, still */
-       muls.l r1,r4,r4 /* leaving at least one sign bit.  */
-       mulu.l r5,r3,r5
-       mshalds.l r1,r21,r1
-       shari r4,26,r4
-       shlld r5,r0,r5
-       add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
-       sub r2,r5,r2
-       /* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
-
-       shlri r2,22,r21
-       mulu.l r21,r1,r21
-       addi r20,30-22,r0
-       /* bubble */ /* could test r3 here to check for divide by zero.  */
-       shlrd r21,r0,r21
-       mulu.l r21,r3,r5
-       mcmpgt.l r21,r63,r21 // See Note 1
-       addi r20,30,r0
-       mshfhi.l r63,r21,r21
-       sub r2,r5,r2
-       andc r2,r21,r2
-
-       /* small divisor: need a third divide step */
-       mulu.l r2,r1,r7
-       ptabs r18,tr0
-       sub r2,r3,r8 /* re-use r8 here for rest - r3 */
-       shlrd r7,r0,r7
-       mulu.l r7,r3,r5
-       /* bubble */
-       addi r8,1,r7
-       cmpgt r7,r5,r7
-       cmvne r7,r8,r2
-       sub r2,r5,r2
-       blink tr0,r63
-
-LOCAL(large_divisor):
-       mmulfx.w r5,r4,r4
-       shlrd r2,r9,r25
-       shlri r25,32,r8
-       msub.w r1,r4,r1
-
-       mulu.l r1,r7,r4
-       addi r1,-3,r5
-       mulu.l r5,r8,r5
-       sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
-       shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
-                        the case may be, %0000000000000000 000.11111111111, still */
-       muls.l r1,r4,r4 /* leaving at least one sign bit.  */
-       shlri r5,14-1,r8
-       mulu.l r8,r7,r5
-       mshalds.l r1,r21,r1
-       shari r4,26,r4
-       add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
-       sub r25,r5,r25
-       /* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
-
-       shlri r25,22,r21
-       mulu.l r21,r1,r21
-       pta LOCAL(no_lo_adj),tr0
-       addi r22,32,r0
-       shlri r21,40,r21
-       mulu.l r21,r7,r5
-       add r8,r21,r8
-       shlld r2,r0,r2
-       sub r25,r5,r25
-       bgtu/u r7,r25,tr0 // no_lo_adj
-       addi r8,1,r8
-       sub r25,r7,r25
-LOCAL(no_lo_adj):
-       mextr4 r2,r25,r2
-
-       /* large_divisor: only needs a few adjustments.  */
-       mulu.l r8,r6,r5
-       ptabs r18,tr0
-       add r2,r6,r7
-       cmpgtu r5,r2,r8
-       cmvne r8,r7,r2
-       sub r2,r5,r2
-       shlrd r2,r22,r2
-       blink tr0,r63
-       ENDFUNC(GLOBAL(umoddi3))
-/* Note 1: To shift the result of the second divide stage so that the result
-   always fits into 32 bits, yet we still reduce the rest sufficiently
-   would require a lot of instructions to do the shifts just right.  Using
-   the full 64 bit shift result to multiply with the divisor would require
-   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
-   Fortunately, if the upper 32 bits of the shift result are nonzero, we
-   know that the rest after taking this partial result into account will
-   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
-   upper 32 bits of the partial result are nonzero.  */
-#endif /* __SHMEDIA__ */
-#endif /* L_umoddi3 */
-
-#ifdef L_moddi3
-#ifdef __SHMEDIA__
-       .mode   SHmedia
-       .section        .text..SHmedia32,"ax"
-       .align  2
-       .global GLOBAL(moddi3)
-       FUNC(GLOBAL(moddi3))
-GLOBAL(moddi3):
-       pta GLOBAL(umoddi3_internal),tr0
-       shari r2,63,r22
-       shari r3,63,r23
-       xor r2,r22,r2
-       xor r3,r23,r3
-       sub r2,r22,r2
-       sub r3,r23,r3
-       beq/u r22,r63,tr0
-       ptabs r18,tr1
-       blink tr0,r18
-       sub r63,r2,r2
-       blink tr1,r63
-       ENDFUNC(GLOBAL(moddi3))
-#endif /* __SHMEDIA__ */
-#endif /* L_moddi3 */
-
-#ifdef L_set_fpscr
-#if !defined (__SH2A_NOFPU__)
-#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
-#ifdef __SH5__
-       .mode   SHcompact
-#endif
-       .global GLOBAL(set_fpscr)
-       HIDDEN_FUNC(GLOBAL(set_fpscr))
-GLOBAL(set_fpscr):
-       lds r4,fpscr
-#ifdef __PIC__
-       mov.l   r12,@-r15
-#ifdef __vxworks
-       mov.l   LOCAL(set_fpscr_L0_base),r12
-       mov.l   LOCAL(set_fpscr_L0_index),r0
-       mov.l   @r12,r12
-       mov.l   @(r0,r12),r12
-#else
-       mova    LOCAL(set_fpscr_L0),r0
-       mov.l   LOCAL(set_fpscr_L0),r12
-       add     r0,r12
-#endif
-       mov.l   LOCAL(set_fpscr_L1),r0
-       mov.l   @(r0,r12),r1
-       mov.l   @r15+,r12
-#else
-       mov.l LOCAL(set_fpscr_L1),r1
-#endif
-       swap.w r4,r0
-       or #24,r0
-#ifndef FMOVD_WORKS
-       xor #16,r0
-#endif
-#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
-       swap.w r0,r3
-       mov.l r3,@(4,r1)
-#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
-       swap.w r0,r2
-       mov.l r2,@r1
-#endif
-#ifndef FMOVD_WORKS
-       xor #8,r0
-#else
-       xor #24,r0
-#endif
-#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
-       swap.w r0,r2
-       rts
-       mov.l r2,@r1
-#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
-       swap.w r0,r3
-       rts
-       mov.l r3,@(4,r1)
-#endif
-       .align 2
-#ifdef __PIC__
-#ifdef __vxworks
-LOCAL(set_fpscr_L0_base):
-       .long ___GOTT_BASE__
-LOCAL(set_fpscr_L0_index):
-       .long ___GOTT_INDEX__
-#else
-LOCAL(set_fpscr_L0):
-       .long _GLOBAL_OFFSET_TABLE_
-#endif
-LOCAL(set_fpscr_L1):
-       .long GLOBAL(fpscr_values@GOT)
-#else
-LOCAL(set_fpscr_L1):
-       .long GLOBAL(fpscr_values)
-#endif
-
-       ENDFUNC(GLOBAL(set_fpscr))
-#ifndef NO_FPSCR_VALUES
-#ifdef __ELF__
-        .comm   GLOBAL(fpscr_values),8,4
-#else
-        .comm   GLOBAL(fpscr_values),8
-#endif /* ELF */
-#endif /* NO_FPSCR_VALUES */
-#endif /* SH2E / SH3E / SH4 */
-#endif /* __SH2A_NOFPU__ */
-#endif /* L_set_fpscr */
-#ifdef L_ic_invalidate
-#if __SH5__ == 32
-       .mode   SHmedia
-       .section        .text..SHmedia32,"ax"
-       .align  2
-       .global GLOBAL(init_trampoline)
-       HIDDEN_FUNC(GLOBAL(init_trampoline))
-GLOBAL(init_trampoline):
-       st.l    r0,8,r2
-#ifdef __LITTLE_ENDIAN__
-       movi    9,r20
-       shori   0x402b,r20
-       shori   0xd101,r20
-       shori   0xd002,r20
-#else
-       movi    0xffffffffffffd002,r20
-       shori   0xd101,r20
-       shori   0x402b,r20
-       shori   9,r20
-#endif
-       st.q    r0,0,r20
-       st.l    r0,12,r3
-       ENDFUNC(GLOBAL(init_trampoline))
-       .global GLOBAL(ic_invalidate)
-       HIDDEN_FUNC(GLOBAL(ic_invalidate))
-GLOBAL(ic_invalidate):
-       ocbwb   r0,0
-       synco
-       icbi    r0, 0
-       ptabs   r18, tr0
-       synci
-       blink   tr0, r63
-       ENDFUNC(GLOBAL(ic_invalidate))
-#elif defined(__SH4A__)
-       .global GLOBAL(ic_invalidate)
-       HIDDEN_FUNC(GLOBAL(ic_invalidate))
-GLOBAL(ic_invalidate):
-       ocbwb   @r4
-       synco
-       icbi    @r4
-       rts
-         nop
-       ENDFUNC(GLOBAL(ic_invalidate))
-#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
-       /* For system code, we use ic_invalidate_line_i, but user code
-          needs a different mechanism.  A kernel call is generally not
-          available, and it would also be slow.  Different SH4 variants use
-          different sizes and associativities of the Icache.  We use a small
-          bit of dispatch code that can be put hidden in every shared object,
-          which calls the actual processor-specific invalidation code in a
-          separate module.
-          Or if you have operating system support, the OS could mmap the
-          procesor-specific code from a single page, since it is highly
-          repetitive.  */
-       .global GLOBAL(ic_invalidate)
-       HIDDEN_FUNC(GLOBAL(ic_invalidate))
-GLOBAL(ic_invalidate):
-#ifdef __pic__
-#ifdef __vxworks
-       mov.l   1f,r1
-       mov.l   2f,r0
-       mov.l   @r1,r1
-       mov.l   0f,r2
-       mov.l   @(r0,r1),r0
-#else
-       mov.l   1f,r1
-       mova    1f,r0
-       mov.l   0f,r2
-       add     r1,r0
-#endif
-       mov.l   @(r0,r2),r1
-#else
-       mov.l   0f,r1
-#endif
-       ocbwb   @r4
-       mov.l   @(8,r1),r0
-       sub     r1,r4
-       and     r4,r0
-       add     r1,r0
-       jmp     @r0
-       mov.l   @(4,r1),r0
-       .align  2
-#ifndef __pic__
-0:     .long   GLOBAL(ic_invalidate_array)
-#else /* __pic__ */
-       .global GLOBAL(ic_invalidate_array)
-0:     .long   GLOBAL(ic_invalidate_array)@GOT
-#ifdef __vxworks
-1:     .long   ___GOTT_BASE__
-2:     .long   ___GOTT_INDEX__
-#else
-1:     .long   _GLOBAL_OFFSET_TABLE_
-#endif
-       ENDFUNC(GLOBAL(ic_invalidate))
-#endif /* __pic__ */
-#endif /* SH4 */
-#endif /* L_ic_invalidate */
-
-#ifdef L_ic_invalidate_array
-#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
-       .global GLOBAL(ic_invalidate_array)
-       /* This is needed when an SH4 dso with trampolines is used on SH4A.  */
-       .global GLOBAL(ic_invalidate_array)
-       FUNC(GLOBAL(ic_invalidate_array))
-GLOBAL(ic_invalidate_array):
-       add     r1,r4
-       synco
-       icbi    @r4
-       rts
-         nop
-       .align 2
-       .long   0
-       ENDFUNC(GLOBAL(ic_invalidate_array))
-#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
-       .global GLOBAL(ic_invalidate_array)
-       .p2align 5
-       FUNC(GLOBAL(ic_invalidate_array))
-/* This must be aligned to the beginning of a cache line.  */
-GLOBAL(ic_invalidate_array):
-#ifndef WAYS
-#define WAYS 4
-#define WAY_SIZE 0x4000
-#endif
-#if WAYS == 1
-       .rept   WAY_SIZE * WAYS / 32
-       rts
-       nop
-       .rept   7
-       .long   WAY_SIZE - 32
-       .endr
-       .endr
-#elif WAYS <= 6
-       .rept   WAY_SIZE * WAYS / 32
-       braf    r0
-       add     #-8,r0
-       .long   WAY_SIZE + 8
-       .long   WAY_SIZE - 32
-       .rept   WAYS-2
-       braf    r0
-       nop
-       .endr
-       .rept   7 - WAYS
-       rts
-       nop
-       .endr
-       .endr
-#else /* WAYS > 6 */
-       /* This variant needs two different pages for mmap-ing.  */
-       .rept   WAYS-1
-       .rept   WAY_SIZE / 32
-       braf    r0
-       nop
-       .long   WAY_SIZE
-       .rept 6
-       .long   WAY_SIZE - 32
-       .endr
-       .endr
-       .endr
-       .rept   WAY_SIZE / 32
-       rts
-       .rept   15
-       nop
-       .endr
-       .endr
-#endif /* WAYS */
-       ENDFUNC(GLOBAL(ic_invalidate_array))
-#endif /* SH4 */
-#endif /* L_ic_invalidate_array */
-
-#if defined (__SH5__) && __SH5__ == 32
-#ifdef L_shcompact_call_trampoline
-       .section        .rodata
-       .align  1
-LOCAL(ct_main_table):
-.word  LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
-.word  LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
-       .mode   SHmedia
-       .section        .text..SHmedia32, "ax"
-       .align  2
-       
-     /* This function loads 64-bit general-purpose registers from the
-       stack, from a memory address contained in them or from an FP
-       register, according to a cookie passed in r1.  Its execution
-       time is linear on the number of registers that actually have
-       to be copied.  See sh.h for details on the actual bit pattern.
-
-       The function to be called is passed in r0.  If a 32-bit return
-       value is expected, the actual function will be tail-called,
-       otherwise the return address will be stored in r10 (that the
-       caller should expect to be clobbered) and the return value
-       will be expanded into r2/r3 upon return.  */
-       
-       .global GLOBAL(GCC_shcompact_call_trampoline)
-       FUNC(GLOBAL(GCC_shcompact_call_trampoline))
-GLOBAL(GCC_shcompact_call_trampoline):
-       ptabs/l r0, tr0 /* Prepare to call the actual function.  */
-       movi    ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
-       pt/l    LOCAL(ct_loop), tr1
-       addz.l  r1, r63, r1
-       shori   ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
-LOCAL(ct_loop):
-       nsb     r1, r28
-       shlli   r28, 1, r29
-       ldx.w   r0, r29, r30
-LOCAL(ct_main_label):
-       ptrel/l r30, tr2
-       blink   tr2, r63
-LOCAL(ct_r2_fp):       /* Copy r2 from an FP register.  */
-       /* It must be dr0, so just do it.  */
-       fmov.dq dr0, r2
-       movi    7, r30
-       shlli   r30, 29, r31
-       andc    r1, r31, r1
-       blink   tr1, r63
-LOCAL(ct_r3_fp):       /* Copy r3 from an FP register.  */
-       /* It is either dr0 or dr2.  */
-       movi    7, r30
-       shlri   r1, 26, r32
-       shlli   r30, 26, r31
-       andc    r1, r31, r1
-       fmov.dq dr0, r3
-       beqi/l  r32, 4, tr1
-       fmov.dq dr2, r3
-       blink   tr1, r63
-LOCAL(ct_r4_fp):       /* Copy r4 from an FP register.  */
-       shlri   r1, 23 - 3, r34
-       andi    r34, 3 << 3, r33
-       addi    r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
-LOCAL(ct_r4_fp_base):
-       ptrel/l r32, tr2
-       movi    7, r30
-       shlli   r30, 23, r31
-       andc    r1, r31, r1
-       blink   tr2, r63
-LOCAL(ct_r4_fp_copy):
-       fmov.dq dr0, r4
-       blink   tr1, r63
-       fmov.dq dr2, r4
-       blink   tr1, r63
-       fmov.dq dr4, r4
-       blink   tr1, r63
-LOCAL(ct_r5_fp):       /* Copy r5 from an FP register.  */
-       shlri   r1, 20 - 3, r34
-       andi    r34, 3 << 3, r33
-       addi    r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
-LOCAL(ct_r5_fp_base):
-       ptrel/l r32, tr2
-       movi    7, r30
-       shlli   r30, 20, r31
-       andc    r1, r31, r1
-       blink   tr2, r63
-LOCAL(ct_r5_fp_copy):
-       fmov.dq dr0, r5
-       blink   tr1, r63
-       fmov.dq dr2, r5
-       blink   tr1, r63
-       fmov.dq dr4, r5
-       blink   tr1, r63
-       fmov.dq dr6, r5
-       blink   tr1, r63
-LOCAL(ct_r6_fph):      /* Copy r6 from a high FP register.  */
-       /* It must be dr8.  */
-       fmov.dq dr8, r6
-       movi    15, r30
-       shlli   r30, 16, r31
-       andc    r1, r31, r1
-       blink   tr1, r63
-LOCAL(ct_r6_fpl):      /* Copy r6 from a low FP register.  */
-       shlri   r1, 16 - 3, r34
-       andi    r34, 3 << 3, r33
-       addi    r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
-LOCAL(ct_r6_fp_base):
-       ptrel/l r32, tr2
-       movi    7, r30
-       shlli   r30, 16, r31
-       andc    r1, r31, r1
-       blink   tr2, r63
-LOCAL(ct_r6_fp_copy):
-       fmov.dq dr0, r6
-       blink   tr1, r63
-       fmov.dq dr2, r6
-       blink   tr1, r63
-       fmov.dq dr4, r6
-       blink   tr1, r63
-       fmov.dq dr6, r6
-       blink   tr1, r63
-LOCAL(ct_r7_fph):      /* Copy r7 from a high FP register.  */
-       /* It is either dr8 or dr10.  */
-       movi    15 << 12, r31
-       shlri   r1, 12, r32
-       andc    r1, r31, r1
-       fmov.dq dr8, r7
-       beqi/l  r32, 8, tr1
-       fmov.dq dr10, r7
-       blink   tr1, r63
-LOCAL(ct_r7_fpl):      /* Copy r7 from a low FP register.  */
-       shlri   r1, 12 - 3, r34
-       andi    r34, 3 << 3, r33
-       addi    r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
-LOCAL(ct_r7_fp_base):
-       ptrel/l r32, tr2
-       movi    7 << 12, r31
-       andc    r1, r31, r1
-       blink   tr2, r63
-LOCAL(ct_r7_fp_copy):
-       fmov.dq dr0, r7
-       blink   tr1, r63
-       fmov.dq dr2, r7
-       blink   tr1, r63
-       fmov.dq dr4, r7
-       blink   tr1, r63
-       fmov.dq dr6, r7
-       blink   tr1, r63
-LOCAL(ct_r8_fph):      /* Copy r8 from a high FP register.  */
-       /* It is either dr8 or dr10.  */
-       movi    15 << 8, r31
-       andi    r1, 1 << 8, r32
-       andc    r1, r31, r1
-       fmov.dq dr8, r8
-       beq/l   r32, r63, tr1
-       fmov.dq dr10, r8
-       blink   tr1, r63
-LOCAL(ct_r8_fpl):      /* Copy r8 from a low FP register.  */
-       shlri   r1, 8 - 3, r34
-       andi    r34, 3 << 3, r33
-       addi    r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
-LOCAL(ct_r8_fp_base):
-       ptrel/l r32, tr2
-       movi    7 << 8, r31
-       andc    r1, r31, r1
-       blink   tr2, r63
-LOCAL(ct_r8_fp_copy):
-       fmov.dq dr0, r8
-       blink   tr1, r63
-       fmov.dq dr2, r8
-       blink   tr1, r63
-       fmov.dq dr4, r8
-       blink   tr1, r63
-       fmov.dq dr6, r8
-       blink   tr1, r63
-LOCAL(ct_r9_fph):      /* Copy r9 from a high FP register.  */
-       /* It is either dr8 or dr10.  */
-       movi    15 << 4, r31
-       andi    r1, 1 << 4, r32
-       andc    r1, r31, r1
-       fmov.dq dr8, r9
-       beq/l   r32, r63, tr1
-       fmov.dq dr10, r9
-       blink   tr1, r63
-LOCAL(ct_r9_fpl):      /* Copy r9 from a low FP register.  */
-       shlri   r1, 4 - 3, r34
-       andi    r34, 3 << 3, r33
-       addi    r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
-LOCAL(ct_r9_fp_base):
-       ptrel/l r32, tr2
-       movi    7 << 4, r31
-       andc    r1, r31, r1
-       blink   tr2, r63
-LOCAL(ct_r9_fp_copy):
-       fmov.dq dr0, r9
-       blink   tr1, r63
-       fmov.dq dr2, r9
-       blink   tr1, r63
-       fmov.dq dr4, r9
-       blink   tr1, r63
-       fmov.dq dr6, r9
-       blink   tr1, r63
-LOCAL(ct_r2_ld):       /* Copy r2 from a memory address.  */
-       pt/l    LOCAL(ct_r2_load), tr2
-       movi    3, r30
-       shlli   r30, 29, r31
-       and     r1, r31, r32
-       andc    r1, r31, r1
-       beq/l   r31, r32, tr2
-       addi.l  r2, 8, r3
-       ldx.q   r2, r63, r2
-       /* Fall through.  */
-LOCAL(ct_r3_ld):       /* Copy r3 from a memory address.  */
-       pt/l    LOCAL(ct_r3_load), tr2
-       movi    3, r30
-       shlli   r30, 26, r31
-       and     r1, r31, r32
-       andc    r1, r31, r1
-       beq/l   r31, r32, tr2
-       addi.l  r3, 8, r4
-       ldx.q   r3, r63, r3
-LOCAL(ct_r4_ld):       /* Copy r4 from a memory address.  */
-       pt/l    LOCAL(ct_r4_load), tr2
-       movi    3, r30
-       shlli   r30, 23, r31
-       and     r1, r31, r32
-       andc    r1, r31, r1
-       beq/l   r31, r32, tr2
-       addi.l  r4, 8, r5
-       ldx.q   r4, r63, r4
-LOCAL(ct_r5_ld):       /* Copy r5 from a memory address.  */
-       pt/l    LOCAL(ct_r5_load), tr2
-       movi    3, r30
-       shlli   r30, 20, r31
-       and     r1, r31, r32
-       andc    r1, r31, r1
-       beq/l   r31, r32, tr2
-       addi.l  r5, 8, r6
-       ldx.q   r5, r63, r5
-LOCAL(ct_r6_ld):       /* Copy r6 from a memory address.  */
-       pt/l    LOCAL(ct_r6_load), tr2
-       movi    3 << 16, r31
-       and     r1, r31, r32
-       andc    r1, r31, r1
-       beq/l   r31, r32, tr2
-       addi.l  r6, 8, r7
-       ldx.q   r6, r63, r6
-LOCAL(ct_r7_ld):       /* Copy r7 from a memory address.  */
-       pt/l    LOCAL(ct_r7_load), tr2
-       movi    3 << 12, r31
-       and     r1, r31, r32
-       andc    r1, r31, r1
-       beq/l   r31, r32, tr2
-       addi.l  r7, 8, r8
-       ldx.q   r7, r63, r7
-LOCAL(ct_r8_ld):       /* Copy r8 from a memory address.  */
-       pt/l    LOCAL(ct_r8_load), tr2
-       movi    3 << 8, r31
-       and     r1, r31, r32
-       andc    r1, r31, r1
-       beq/l   r31, r32, tr2
-       addi.l  r8, 8, r9
-       ldx.q   r8, r63, r8
-LOCAL(ct_r9_ld):       /* Copy r9 from a memory address.  */
-       pt/l    LOCAL(ct_check_tramp), tr2
-       ldx.q   r9, r63, r9
-       blink   tr2, r63
-LOCAL(ct_r2_load):
-       ldx.q   r2, r63, r2
-       blink   tr1, r63
-LOCAL(ct_r3_load):
-       ldx.q   r3, r63, r3
-       blink   tr1, r63
-LOCAL(ct_r4_load):
-       ldx.q   r4, r63, r4
-       blink   tr1, r63
-LOCAL(ct_r5_load):
-       ldx.q   r5, r63, r5
-       blink   tr1, r63
-LOCAL(ct_r6_load):
-       ldx.q   r6, r63, r6
-       blink   tr1, r63
-LOCAL(ct_r7_load):
-       ldx.q   r7, r63, r7
-       blink   tr1, r63
-LOCAL(ct_r8_load):
-       ldx.q   r8, r63, r8
-       blink   tr1, r63
-LOCAL(ct_r2_pop):      /* Pop r2 from the stack.  */
-       movi    1, r30
-       ldx.q   r15, r63, r2
-       shlli   r30, 29, r31
-       addi.l  r15, 8, r15
-       andc    r1, r31, r1
-       blink   tr1, r63
-LOCAL(ct_r3_pop):      /* Pop r3 from the stack.  */
-       movi    1, r30
-       ldx.q   r15, r63, r3
-       shlli   r30, 26, r31
-       addi.l  r15, 8, r15
-       andc    r1, r31, r1
-       blink   tr1, r63
-LOCAL(ct_r4_pop):      /* Pop r4 from the stack.  */
-       movi    1, r30
-       ldx.q   r15, r63, r4
-       shlli   r30, 23, r31
-       addi.l  r15, 8, r15
-       andc    r1, r31, r1
-       blink   tr1, r63
-LOCAL(ct_r5_pop):      /* Pop r5 from the stack.  */
-       movi    1, r30
-       ldx.q   r15, r63, r5
-       shlli   r30, 20, r31
-       addi.l  r15, 8, r15
-       andc    r1, r31, r1
-       blink   tr1, r63
-LOCAL(ct_r6_pop):      /* Pop r6 from the stack.  */
-       movi    1, r30
-       ldx.q   r15, r63, r6
-       shlli   r30, 16, r31
-       addi.l  r15, 8, r15
-       andc    r1, r31, r1
-       blink   tr1, r63
-LOCAL(ct_r7_pop):      /* Pop r7 from the stack.  */
-       ldx.q   r15, r63, r7
-       movi    1 << 12, r31
-       addi.l  r15, 8, r15
-       andc    r1, r31, r1
-       blink   tr1, r63
-LOCAL(ct_r8_pop):      /* Pop r8 from the stack.  */
-       ldx.q   r15, r63, r8
-       movi    1 << 8, r31
-       addi.l  r15, 8, r15
-       andc    r1, r31, r1
-       blink   tr1, r63
-LOCAL(ct_pop_seq):     /* Pop a sequence of registers off the stack.  */
-       andi    r1, 7 << 1, r30
-       movi    (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
-       shlli   r30, 2, r31
-       shori   LOCAL(ct_end_of_pop_seq) & 65535, r32
-       sub.l   r32, r31, r33
-       ptabs/l r33, tr2
-       blink   tr2, r63
-LOCAL(ct_start_of_pop_seq):    /* Beginning of pop sequence.  */
-       ldx.q   r15, r63, r3
-       addi.l  r15, 8, r15
-       ldx.q   r15, r63, r4
-       addi.l  r15, 8, r15
-       ldx.q   r15, r63, r5
-       addi.l  r15, 8, r15
-       ldx.q   r15, r63, r6
-       addi.l  r15, 8, r15
-       ldx.q   r15, r63, r7
-       addi.l  r15, 8, r15
-       ldx.q   r15, r63, r8
-       addi.l  r15, 8, r15
-LOCAL(ct_r9_pop):      /* Pop r9 from the stack.  */
-       ldx.q   r15, r63, r9
-       addi.l  r15, 8, r15
-LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction.  */
-LOCAL(ct_check_tramp): /* Check whether we need a trampoline.  */
-       pt/u    LOCAL(ct_ret_wide), tr2
-       andi    r1, 1, r1
-       bne/u   r1, r63, tr2
-LOCAL(ct_call_func):   /* Just branch to the function.  */
-       blink   tr0, r63
-LOCAL(ct_ret_wide):    /* Call the function, so that we can unpack its 
-                          64-bit return value.  */
-       add.l   r18, r63, r10
-       blink   tr0, r18
-       ptabs   r10, tr0
-#if __LITTLE_ENDIAN__
-       shari   r2, 32, r3
-       add.l   r2, r63, r2
-#else
-       add.l   r2, r63, r3
-       shari   r2, 32, r2
-#endif
-       blink   tr0, r63
-
-       ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
-#endif /* L_shcompact_call_trampoline */
-
-#ifdef L_shcompact_return_trampoline
-     /* This function does the converse of the code in `ret_wide'
-       above.  It is tail-called by SHcompact functions returning
-       64-bit non-floating-point values, to pack the 32-bit values in
-       r2 and r3 into r2.  */
-
-       .mode   SHmedia
-       .section        .text..SHmedia32, "ax"
-       .align  2
-       .global GLOBAL(GCC_shcompact_return_trampoline)
-       HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
-GLOBAL(GCC_shcompact_return_trampoline):
-       ptabs/l r18, tr0
-#if __LITTLE_ENDIAN__
-       addz.l  r2, r63, r2
-       shlli   r3, 32, r3
-#else
-       addz.l  r3, r63, r3
-       shlli   r2, 32, r2
-#endif
-       or      r3, r2, r2
-       blink   tr0, r63
-
-       ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
-#endif /* L_shcompact_return_trampoline */
-
-#ifdef L_shcompact_incoming_args
-       .section        .rodata
-       .align  1
-LOCAL(ia_main_table):
-.word  1 /* Invalid, just loop */
-.word  LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
-.word  LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
-.word  1 /* Invalid, just loop */
-.word  LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
-.word  LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
-.word  1 /* Invalid, just loop */
-.word  LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
-.word  LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
-.word  1 /* Invalid, just loop */
-.word  LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
-.word  LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
-.word  1 /* Invalid, just loop */
-.word  1 /* Invalid, just loop */
-.word  LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
-.word  LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
-.word  1 /* Invalid, just loop */
-.word  1 /* Invalid, just loop */
-.word  LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
-.word  LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
-.word  1 /* Invalid, just loop */
-.word  1 /* Invalid, just loop */
-.word  LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
-.word  LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
-.word  1 /* Invalid, just loop */
-.word  1 /* Invalid, just loop */
-.word  LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
-.word  LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
-.word  LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
-.word  LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
-.word  LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
-.word  LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
-.word  LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
-       .mode   SHmedia
-       .section        .text..SHmedia32, "ax"
-       .align  2
-       
-     /* This function stores 64-bit general-purpose registers back in
-       the stack, and loads the address in which each register
-       was stored into itself.  The lower 32 bits of r17 hold the address
-       to begin storing, and the upper 32 bits of r17 hold the cookie.
-       Its execution time is linear on the
-       number of registers that actually have to be copied, and it is
-       optimized for structures larger than 64 bits, as opposed to
-       individual `long long' arguments.  See sh.h for details on the
-       actual bit pattern.  */
-       
-       .global GLOBAL(GCC_shcompact_incoming_args)
-       FUNC(GLOBAL(GCC_shcompact_incoming_args))
-GLOBAL(GCC_shcompact_incoming_args):
-       ptabs/l r18, tr0        /* Prepare to return.  */
-       shlri   r17, 32, r0     /* Load the cookie.  */
-       movi    ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
-       pt/l    LOCAL(ia_loop), tr1
-       add.l   r17, r63, r17
-       shori   ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
-LOCAL(ia_loop):
-       nsb     r0, r36
-       shlli   r36, 1, r37
-       ldx.w   r43, r37, r38
-LOCAL(ia_main_label):
-       ptrel/l r38, tr2
-       blink   tr2, r63
-LOCAL(ia_r2_ld):       /* Store r2 and load its address.  */
-       movi    3, r38
-       shlli   r38, 29, r39
-       and     r0, r39, r40
-       andc    r0, r39, r0
-       stx.q   r17, r63, r2
-       add.l   r17, r63, r2
-       addi.l  r17, 8, r17
-       beq/u   r39, r40, tr1
-LOCAL(ia_r3_ld):       /* Store r3 and load its address.  */
-       movi    3, r38
-       shlli   r38, 26, r39
-       and     r0, r39, r40
-       andc    r0, r39, r0
-       stx.q   r17, r63, r3
-       add.l   r17, r63, r3
-       addi.l  r17, 8, r17
-       beq/u   r39, r40, tr1
-LOCAL(ia_r4_ld):       /* Store r4 and load its address.  */
-       movi    3, r38
-       shlli   r38, 23, r39
-       and     r0, r39, r40
-       andc    r0, r39, r0
-       stx.q   r17, r63, r4
-       add.l   r17, r63, r4
-       addi.l  r17, 8, r17
-       beq/u   r39, r40, tr1
-LOCAL(ia_r5_ld):       /* Store r5 and load its address.  */
-       movi    3, r38
-       shlli   r38, 20, r39
-       and     r0, r39, r40
-       andc    r0, r39, r0
-       stx.q   r17, r63, r5
-       add.l   r17, r63, r5
-       addi.l  r17, 8, r17
-       beq/u   r39, r40, tr1
-LOCAL(ia_r6_ld):       /* Store r6 and load its address.  */
-       movi    3, r38
-       shlli   r38, 16, r39
-       and     r0, r39, r40
-       andc    r0, r39, r0
-       stx.q   r17, r63, r6
-       add.l   r17, r63, r6
-       addi.l  r17, 8, r17
-       beq/u   r39, r40, tr1
-LOCAL(ia_r7_ld):       /* Store r7 and load its address.  */
-       movi    3 << 12, r39
-       and     r0, r39, r40
-       andc    r0, r39, r0
-       stx.q   r17, r63, r7
-       add.l   r17, r63, r7
-       addi.l  r17, 8, r17
-       beq/u   r39, r40, tr1
-LOCAL(ia_r8_ld):       /* Store r8 and load its address.  */
-       movi    3 << 8, r39
-       and     r0, r39, r40
-       andc    r0, r39, r0
-       stx.q   r17, r63, r8
-       add.l   r17, r63, r8
-       addi.l  r17, 8, r17
-       beq/u   r39, r40, tr1
-LOCAL(ia_r9_ld):       /* Store r9 and load its address.  */
-       stx.q   r17, r63, r9
-       add.l   r17, r63, r9
-       blink   tr0, r63
-LOCAL(ia_r2_push):     /* Push r2 onto the stack.  */
-       movi    1, r38
-       shlli   r38, 29, r39
-       andc    r0, r39, r0
-       stx.q   r17, r63, r2
-       addi.l  r17, 8, r17
-       blink   tr1, r63
-LOCAL(ia_r3_push):     /* Push r3 onto the stack.  */
-       movi    1, r38
-       shlli   r38, 26, r39
-       andc    r0, r39, r0
-       stx.q   r17, r63, r3
-       addi.l  r17, 8, r17
-       blink   tr1, r63
-LOCAL(ia_r4_push):     /* Push r4 onto the stack.  */
-       movi    1, r38
-       shlli   r38, 23, r39
-       andc    r0, r39, r0
-       stx.q   r17, r63, r4
-       addi.l  r17, 8, r17
-       blink   tr1, r63
-LOCAL(ia_r5_push):     /* Push r5 onto the stack.  */
-       movi    1, r38
-       shlli   r38, 20, r39
-       andc    r0, r39, r0
-       stx.q   r17, r63, r5
-       addi.l  r17, 8, r17
-       blink   tr1, r63
-LOCAL(ia_r6_push):     /* Push r6 onto the stack.  */
-       movi    1, r38
-       shlli   r38, 16, r39
-       andc    r0, r39, r0
-       stx.q   r17, r63, r6
-       addi.l  r17, 8, r17
-       blink   tr1, r63
-LOCAL(ia_r7_push):     /* Push r7 onto the stack.  */
-       movi    1 << 12, r39
-       andc    r0, r39, r0
-       stx.q   r17, r63, r7
-       addi.l  r17, 8, r17
-       blink   tr1, r63
-LOCAL(ia_r8_push):     /* Push r8 onto the stack.  */
-       movi    1 << 8, r39
-       andc    r0, r39, r0
-       stx.q   r17, r63, r8
-       addi.l  r17, 8, r17
-       blink   tr1, r63
-LOCAL(ia_push_seq):    /* Push a sequence of registers onto the stack.  */
-       andi    r0, 7 << 1, r38
-       movi    (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
-       shlli   r38, 2, r39
-       shori   LOCAL(ia_end_of_push_seq) & 65535, r40
-       sub.l   r40, r39, r41
-       ptabs/l r41, tr2
-       blink   tr2, r63
-LOCAL(ia_stack_of_push_seq):    /* Beginning of push sequence.  */
-       stx.q   r17, r63, r3
-       addi.l  r17, 8, r17
-       stx.q   r17, r63, r4
-       addi.l  r17, 8, r17
-       stx.q   r17, r63, r5
-       addi.l  r17, 8, r17
-       stx.q   r17, r63, r6
-       addi.l  r17, 8, r17
-       stx.q   r17, r63, r7
-       addi.l  r17, 8, r17
-       stx.q   r17, r63, r8
-       addi.l  r17, 8, r17
-LOCAL(ia_r9_push):     /* Push r9 onto the stack.  */
-       stx.q   r17, r63, r9
-LOCAL(ia_return):      /* Return.  */
-       blink   tr0, r63
-LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction.  */
-       ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
-#endif /* L_shcompact_incoming_args */
-#endif
-#if __SH5__
-#ifdef L_nested_trampoline
-#if __SH5__ == 32
-       .section        .text..SHmedia32,"ax"
-#else
-       .text
-#endif
-       .align  3 /* It is copied in units of 8 bytes in SHmedia mode.  */
-       .global GLOBAL(GCC_nested_trampoline)
-       HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
-GLOBAL(GCC_nested_trampoline):
-       .mode   SHmedia
-       ptrel/u r63, tr0
-       gettr   tr0, r0
-#if __SH5__ == 64
-       ld.q    r0, 24, r1
-#else
-       ld.l    r0, 24, r1
-#endif
-       ptabs/l r1, tr1
-#if __SH5__ == 64
-       ld.q    r0, 32, r1
-#else
-       ld.l    r0, 28, r1
-#endif
-       blink   tr1, r63
-
-       ENDFUNC(GLOBAL(GCC_nested_trampoline))
-#endif /* L_nested_trampoline */
-#endif /* __SH5__ */
-#if __SH5__ == 32
-#ifdef L_push_pop_shmedia_regs
-       .section        .text..SHmedia32,"ax"
-       .mode   SHmedia
-       .align  2
-#ifndef __SH4_NOFPU__  
-       .global GLOBAL(GCC_push_shmedia_regs)
-       FUNC(GLOBAL(GCC_push_shmedia_regs))
-GLOBAL(GCC_push_shmedia_regs):
-       addi.l  r15, -14*8, r15
-       fst.d   r15, 13*8, dr62
-       fst.d   r15, 12*8, dr60
-       fst.d   r15, 11*8, dr58
-       fst.d   r15, 10*8, dr56
-       fst.d   r15,  9*8, dr54
-       fst.d   r15,  8*8, dr52
-       fst.d   r15,  7*8, dr50
-       fst.d   r15,  6*8, dr48
-       fst.d   r15,  5*8, dr46
-       fst.d   r15,  4*8, dr44
-       fst.d   r15,  3*8, dr42
-       fst.d   r15,  2*8, dr40
-       fst.d   r15,  1*8, dr38
-       fst.d   r15,  0*8, dr36
-#else /* ! __SH4_NOFPU__ */
-       .global GLOBAL(GCC_push_shmedia_regs_nofpu)
-       FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
-GLOBAL(GCC_push_shmedia_regs_nofpu):
-#endif /* ! __SH4_NOFPU__ */
-       ptabs/l r18, tr0
-       addi.l  r15, -27*8, r15
-       gettr   tr7, r62
-       gettr   tr6, r61
-       gettr   tr5, r60
-       st.q    r15, 26*8, r62
-       st.q    r15, 25*8, r61
-       st.q    r15, 24*8, r60
-       st.q    r15, 23*8, r59
-       st.q    r15, 22*8, r58
-       st.q    r15, 21*8, r57
-       st.q    r15, 20*8, r56
-       st.q    r15, 19*8, r55
-       st.q    r15, 18*8, r54
-       st.q    r15, 17*8, r53
-       st.q    r15, 16*8, r52
-       st.q    r15, 15*8, r51
-       st.q    r15, 14*8, r50
-       st.q    r15, 13*8, r49
-       st.q    r15, 12*8, r48
-       st.q    r15, 11*8, r47
-       st.q    r15, 10*8, r46
-       st.q    r15,  9*8, r45
-       st.q    r15,  8*8, r44
-       st.q    r15,  7*8, r35
-       st.q    r15,  6*8, r34
-       st.q    r15,  5*8, r33
-       st.q    r15,  4*8, r32
-       st.q    r15,  3*8, r31
-       st.q    r15,  2*8, r30
-       st.q    r15,  1*8, r29
-       st.q    r15,  0*8, r28
-       blink   tr0, r63
-#ifndef __SH4_NOFPU__  
-       ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
-#else
-       ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
-#endif
-#ifndef __SH4_NOFPU__  
-       .global GLOBAL(GCC_pop_shmedia_regs)
-       FUNC(GLOBAL(GCC_pop_shmedia_regs))
-GLOBAL(GCC_pop_shmedia_regs):
-       pt      .L0, tr1
-       movi    41*8, r0
-       fld.d   r15, 40*8, dr62
-       fld.d   r15, 39*8, dr60
-       fld.d   r15, 38*8, dr58
-       fld.d   r15, 37*8, dr56
-       fld.d   r15, 36*8, dr54
-       fld.d   r15, 35*8, dr52
-       fld.d   r15, 34*8, dr50
-       fld.d   r15, 33*8, dr48
-       fld.d   r15, 32*8, dr46
-       fld.d   r15, 31*8, dr44
-       fld.d   r15, 30*8, dr42
-       fld.d   r15, 29*8, dr40
-       fld.d   r15, 28*8, dr38
-       fld.d   r15, 27*8, dr36
-       blink   tr1, r63
-#else /* ! __SH4_NOFPU__       */
-       .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
-       FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
-GLOBAL(GCC_pop_shmedia_regs_nofpu):
-#endif /* ! __SH4_NOFPU__      */
-       movi    27*8, r0
-.L0:
-       ptabs   r18, tr0
-       ld.q    r15, 26*8, r62
-       ld.q    r15, 25*8, r61
-       ld.q    r15, 24*8, r60
-       ptabs   r62, tr7
-       ptabs   r61, tr6
-       ptabs   r60, tr5
-       ld.q    r15, 23*8, r59
-       ld.q    r15, 22*8, r58
-       ld.q    r15, 21*8, r57
-       ld.q    r15, 20*8, r56
-       ld.q    r15, 19*8, r55
-       ld.q    r15, 18*8, r54
-       ld.q    r15, 17*8, r53
-       ld.q    r15, 16*8, r52
-       ld.q    r15, 15*8, r51
-       ld.q    r15, 14*8, r50
-       ld.q    r15, 13*8, r49
-       ld.q    r15, 12*8, r48
-       ld.q    r15, 11*8, r47
-       ld.q    r15, 10*8, r46
-       ld.q    r15,  9*8, r45
-       ld.q    r15,  8*8, r44
-       ld.q    r15,  7*8, r35
-       ld.q    r15,  6*8, r34
-       ld.q    r15,  5*8, r33
-       ld.q    r15,  4*8, r32
-       ld.q    r15,  3*8, r31
-       ld.q    r15,  2*8, r30
-       ld.q    r15,  1*8, r29
-       ld.q    r15,  0*8, r28
-       add.l   r15, r0, r15
-       blink   tr0, r63
-
-#ifndef __SH4_NOFPU__
-       ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
-#else
-       ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
-#endif
-#endif /* __SH5__ == 32 */
-#endif /* L_push_pop_shmedia_regs */
-
-#ifdef L_div_table
-#if __SH5__
-#if defined(__pic__) && defined(__SHMEDIA__)
-       .global GLOBAL(sdivsi3)
-       FUNC(GLOBAL(sdivsi3))
-#if __SH5__ == 32
-       .section        .text..SHmedia32,"ax"
-#else
-       .text
-#endif
-#if 0
-/* ??? FIXME: Presumably due to a linker bug, exporting data symbols
-   in a text section does not work (at least for shared libraries):
-   the linker sets the LSB of the address as if this was SHmedia code.  */
-#define TEXT_DATA_BUG
-#endif
-       .align  2
- // inputs: r4,r5
- // clobbered: r1,r18,r19,r20,r21,r25,tr0
- // result in r0
- .global GLOBAL(sdivsi3)
-GLOBAL(sdivsi3):
-#ifdef TEXT_DATA_BUG
- ptb datalabel Local_div_table,tr0
-#else
- ptb GLOBAL(div_table_internal),tr0
-#endif
- nsb r5, r1
- shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
- shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
- /* bubble */
- gettr tr0,r20
- ldx.ub r20, r21, r19 // u0.8
- shari r25, 32, r25   // normalize to s2.30
- shlli r21, 1, r21
- muls.l r25, r19, r19 // s2.38
- ldx.w r20, r21, r21  // s2.14
-  ptabs r18, tr0
- shari r19, 24, r19   // truncate to s2.14
- sub r21, r19, r19    // some 11 bit inverse in s1.14
- muls.l r19, r19, r21 // u0.28
-  sub r63, r1, r1
-  addi r1, 92, r1
- muls.l r25, r21, r18 // s2.58
- shlli r19, 45, r19   // multiply by two and convert to s2.58
-  /* bubble */
- sub r19, r18, r18
- shari r18, 28, r18   // some 22 bit inverse in s1.30
- muls.l r18, r25, r0  // s2.60
-  muls.l r18, r4, r25 // s32.30
-  /* bubble */
- shari r0, 16, r19   // s-16.44
- muls.l r19, r18, r19 // s-16.74
-  shari r25, 63, r0
-  shari r4, 14, r18   // s19.-14
- shari r19, 30, r19   // s-16.44
- muls.l r19, r18, r19 // s15.30
-  xor r21, r0, r21    // You could also use the constant 1 << 27.
-  add r21, r25, r21
- sub r21, r19, r21
- shard r21, r1, r21
- sub r21, r0, r0
- blink tr0, r63
-       ENDFUNC(GLOBAL(sdivsi3))
-/* This table has been generated by divtab.c .
-Defects for bias -330:
-   Max defect: 6.081536e-07 at -1.000000e+00
-   Min defect: 2.849516e-08 at 1.030651e+00
-   Max 2nd step defect: 9.606539e-12 at -1.000000e+00
-   Min 2nd step defect: 0.000000e+00 at 0.000000e+00
-   Defect at 1: 1.238659e-07
-   Defect at -2: 1.061708e-07 */
-#else /* ! __pic__ || ! __SHMEDIA__ */
-       .section        .rodata
-#endif /* __pic__ */
-#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
-       .balign 2
-       .type   Local_div_table,@object
-       .size   Local_div_table,128
-/* negative division constants */
-       .word   -16638
-       .word   -17135
-       .word   -17737
-       .word   -18433
-       .word   -19103
-       .word   -19751
-       .word   -20583
-       .word   -21383
-       .word   -22343
-       .word   -23353
-       .word   -24407
-       .word   -25582
-       .word   -26863
-       .word   -28382
-       .word   -29965
-       .word   -31800
-/* negative division factors */
-       .byte   66
-       .byte   70
-       .byte   75
-       .byte   81
-       .byte   87
-       .byte   93
-       .byte   101
-       .byte   109
-       .byte   119
-       .byte   130
-       .byte   142
-       .byte   156
-       .byte   172
-       .byte   192
-       .byte   214
-       .byte   241
-       .skip 16
-Local_div_table:
-       .skip 16
-/* positive division factors */
-       .byte   241
-       .byte   214
-       .byte   192
-       .byte   172
-       .byte   156
-       .byte   142
-       .byte   130
-       .byte   119
-       .byte   109
-       .byte   101
-       .byte   93
-       .byte   87
-       .byte   81
-       .byte   75
-       .byte   70
-       .byte   66
-/* positive division constants */
-       .word   31801
-       .word   29966
-       .word   28383
-       .word   26864
-       .word   25583
-       .word   24408
-       .word   23354
-       .word   22344
-       .word   21384
-       .word   20584
-       .word   19752
-       .word   19104
-       .word   18434
-       .word   17738
-       .word   17136
-       .word   16639
-       .section        .rodata
-#endif /* TEXT_DATA_BUG */
-       .balign 2
-       .type   GLOBAL(div_table),@object
-       .size   GLOBAL(div_table),128
-/* negative division constants */
-       .word   -16638
-       .word   -17135
-       .word   -17737
-       .word   -18433
-       .word   -19103
-       .word   -19751
-       .word   -20583
-       .word   -21383
-       .word   -22343
-       .word   -23353
-       .word   -24407
-       .word   -25582
-       .word   -26863
-       .word   -28382
-       .word   -29965
-       .word   -31800
-/* negative division factors */
-       .byte   66
-       .byte   70
-       .byte   75
-       .byte   81
-       .byte   87
-       .byte   93
-       .byte   101
-       .byte   109
-       .byte   119
-       .byte   130
-       .byte   142
-       .byte   156
-       .byte   172
-       .byte   192
-       .byte   214
-       .byte   241
-       .skip 16
-       .global GLOBAL(div_table)
-GLOBAL(div_table):
-       HIDDEN_ALIAS(div_table_internal,div_table)
-       .skip 16
-/* positive division factors */
-       .byte   241
-       .byte   214
-       .byte   192
-       .byte   172
-       .byte   156
-       .byte   142
-       .byte   130
-       .byte   119
-       .byte   109
-       .byte   101
-       .byte   93
-       .byte   87
-       .byte   81
-       .byte   75
-       .byte   70
-       .byte   66
-/* positive division constants */
-       .word   31801
-       .word   29966
-       .word   28383
-       .word   26864
-       .word   25583
-       .word   24408
-       .word   23354
-       .word   22344
-       .word   21384
-       .word   20584
-       .word   19752
-       .word   19104
-       .word   18434
-       .word   17738
-       .word   17136
-       .word   16639
-
-#elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
-/* This code used shld, thus is not suitable for SH1 / SH2.  */
-
-/* Signed / unsigned division without use of FPU, optimized for SH4.
-   Uses a lookup table for divisors in the range -128 .. +128, and
-   div1 with case distinction for larger divisors in three more ranges.
-   The code is lumped together with the table to allow the use of mova.  */
-#ifdef __LITTLE_ENDIAN__
-#define L_LSB 0
-#define L_LSWMSB 1
-#define L_MSWLSB 2
-#else
-#define L_LSB 3
-#define L_LSWMSB 2
-#define L_MSWLSB 1
-#endif
-
-       .balign 4
-       .global GLOBAL(udivsi3_i4i)
-       FUNC(GLOBAL(udivsi3_i4i))
-GLOBAL(udivsi3_i4i):
-       mov.w LOCAL(c128_w), r1
-       div0u
-       mov r4,r0
-       shlr8 r0
-       cmp/hi r1,r5
-       extu.w r5,r1
-       bf LOCAL(udiv_le128)
-       cmp/eq r5,r1
-       bf LOCAL(udiv_ge64k)
-       shlr r0
-       mov r5,r1
-       shll16 r5
-       mov.l r4,@-r15
-       div1 r5,r0
-       mov.l r1,@-r15
-       div1 r5,r0
-       div1 r5,r0
-       bra LOCAL(udiv_25)
-       div1 r5,r0
-
-LOCAL(div_le128):
-       mova LOCAL(div_table_ix),r0
-       bra LOCAL(div_le128_2)
-       mov.b @(r0,r5),r1
-LOCAL(udiv_le128):
-       mov.l r4,@-r15
-       mova LOCAL(div_table_ix),r0
-       mov.b @(r0,r5),r1
-       mov.l r5,@-r15
-LOCAL(div_le128_2):
-       mova LOCAL(div_table_inv),r0
-       mov.l @(r0,r1),r1
-       mov r5,r0
-       tst #0xfe,r0
-       mova LOCAL(div_table_clz),r0
-       dmulu.l r1,r4
-       mov.b @(r0,r5),r1
-       bt/s LOCAL(div_by_1)
-       mov r4,r0
-       mov.l @r15+,r5
-       sts mach,r0
-       /* clrt */
-       addc r4,r0
-       mov.l @r15+,r4
-       rotcr r0
-       rts
-       shld r1,r0
-
-LOCAL(div_by_1_neg):
-       neg r4,r0
-LOCAL(div_by_1):
-       mov.l @r15+,r5
-       rts
-       mov.l @r15+,r4
-
-LOCAL(div_ge64k):
-       bt/s LOCAL(div_r8)
-       div0u
-       shll8 r5
-       bra LOCAL(div_ge64k_2)
-       div1 r5,r0
-LOCAL(udiv_ge64k):
-       cmp/hi r0,r5
-       mov r5,r1
-       bt LOCAL(udiv_r8)
-       shll8 r5
-       mov.l r4,@-r15
-       div1 r5,r0
-       mov.l r1,@-r15
-LOCAL(div_ge64k_2):
-       div1 r5,r0
-       mov.l LOCAL(zero_l),r1
-       .rept 4
-       div1 r5,r0
-       .endr
-       mov.l r1,@-r15
-       div1 r5,r0
-       mov.w LOCAL(m256_w),r1
-       div1 r5,r0
-       mov.b r0,@(L_LSWMSB,r15)
-       xor r4,r0
-       and r1,r0
-       bra LOCAL(div_ge64k_end)
-       xor r4,r0
-       
-LOCAL(div_r8):
-       shll16 r4
-       bra LOCAL(div_r8_2)
-       shll8 r4
-LOCAL(udiv_r8):
-       mov.l r4,@-r15
-       shll16 r4
-       clrt
-       shll8 r4
-       mov.l r5,@-r15
-LOCAL(div_r8_2):
-       rotcl r4
-       mov r0,r1
-       div1 r5,r1
-       mov r4,r0
-       rotcl r0
-       mov r5,r4
-       div1 r5,r1
-       .rept 5
-       rotcl r0; div1 r5,r1
-       .endr
-       rotcl r0
-       mov.l @r15+,r5
-       div1 r4,r1
-       mov.l @r15+,r4
-       rts
-       rotcl r0
-
-       ENDFUNC(GLOBAL(udivsi3_i4i))
-
-       .global GLOBAL(sdivsi3_i4i)
-       FUNC(GLOBAL(sdivsi3_i4i))
-       /* This is link-compatible with a GLOBAL(sdivsi3) call,
-          but we effectively clobber only r1.  */
-GLOBAL(sdivsi3_i4i):
-       mov.l r4,@-r15
-       cmp/pz r5
-       mov.w LOCAL(c128_w), r1
-       bt/s LOCAL(pos_divisor)
-       cmp/pz r4
-       mov.l r5,@-r15
-       neg r5,r5
-       bt/s LOCAL(neg_result)
-       cmp/hi r1,r5
-       neg r4,r4
-LOCAL(pos_result):
-       extu.w r5,r0
-       bf LOCAL(div_le128)
-       cmp/eq r5,r0
-       mov r4,r0
-       shlr8 r0
-       bf/s LOCAL(div_ge64k)
-       cmp/hi r0,r5
-       div0u
-       shll16 r5
-       div1 r5,r0
-       div1 r5,r0
-       div1 r5,r0
-LOCAL(udiv_25):
-       mov.l LOCAL(zero_l),r1
-       div1 r5,r0
-       div1 r5,r0
-       mov.l r1,@-r15
-       .rept 3
-       div1 r5,r0
-       .endr
-       mov.b r0,@(L_MSWLSB,r15)
-       xtrct r4,r0
-       swap.w r0,r0
-       .rept 8
-       div1 r5,r0
-       .endr
-       mov.b r0,@(L_LSWMSB,r15)
-LOCAL(div_ge64k_end):
-       .rept 8
-       div1 r5,r0
-       .endr
-       mov.l @r15+,r4 ! zero-extension and swap using LS unit.
-       extu.b r0,r0
-       mov.l @r15+,r5
-       or r4,r0
-       mov.l @r15+,r4
-       rts
-       rotcl r0
-
-LOCAL(div_le128_neg):
-       tst #0xfe,r0
-       mova LOCAL(div_table_ix),r0
-       mov.b @(r0,r5),r1
-       mova LOCAL(div_table_inv),r0
-       bt/s LOCAL(div_by_1_neg)
-       mov.l @(r0,r1),r1
-       mova LOCAL(div_table_clz),r0
-       dmulu.l r1,r4
-       mov.b @(r0,r5),r1
-       mov.l @r15+,r5
-       sts mach,r0
-       /* clrt */
-       addc r4,r0
-       mov.l @r15+,r4
-       rotcr r0
-       shld r1,r0
-       rts
-       neg r0,r0
-
-LOCAL(pos_divisor):
-       mov.l r5,@-r15
-       bt/s LOCAL(pos_result)
-       cmp/hi r1,r5
-       neg r4,r4
-LOCAL(neg_result):
-       extu.w r5,r0
-       bf LOCAL(div_le128_neg)
-       cmp/eq r5,r0
-       mov r4,r0
-       shlr8 r0
-       bf/s LOCAL(div_ge64k_neg)
-       cmp/hi r0,r5
-       div0u
-       mov.l LOCAL(zero_l),r1
-       shll16 r5
-       div1 r5,r0
-       mov.l r1,@-r15
-       .rept 7
-       div1 r5,r0
-       .endr
-       mov.b r0,@(L_MSWLSB,r15)
-       xtrct r4,r0
-       swap.w r0,r0
-       .rept 8
-       div1 r5,r0
-       .endr
-       mov.b r0,@(L_LSWMSB,r15)
-LOCAL(div_ge64k_neg_end):
-       .rept 8
-       div1 r5,r0
-       .endr
-       mov.l @r15+,r4 ! zero-extension and swap using LS unit.
-       extu.b r0,r1
-       mov.l @r15+,r5
-       or r4,r1
-LOCAL(div_r8_neg_end):
-       mov.l @r15+,r4
-       rotcl r1
-       rts
-       neg r1,r0
-
-LOCAL(div_ge64k_neg):
-       bt/s LOCAL(div_r8_neg)
-       div0u
-       shll8 r5
-       mov.l LOCAL(zero_l),r1
-       .rept 6
-       div1 r5,r0
-       .endr
-       mov.l r1,@-r15
-       div1 r5,r0
-       mov.w LOCAL(m256_w),r1
-       div1 r5,r0
-       mov.b r0,@(L_LSWMSB,r15)
-       xor r4,r0
-       and r1,r0
-       bra LOCAL(div_ge64k_neg_end)
-       xor r4,r0
-
-LOCAL(c128_w):
-       .word 128
-
-LOCAL(div_r8_neg):
-       clrt
-       shll16 r4
-       mov r4,r1
-       shll8 r1
-       mov r5,r4
-       .rept 7
-       rotcl r1; div1 r5,r0
-       .endr
-       mov.l @r15+,r5
-       rotcl r1
-       bra LOCAL(div_r8_neg_end)
-       div1 r4,r0
-
-LOCAL(m256_w):
-       .word 0xff00
-/* This table has been generated by divtab-sh4.c.  */
-       .balign 4
-LOCAL(div_table_clz):
-       .byte   0
-       .byte   1
-       .byte   0
-       .byte   -1
-       .byte   -1
-       .byte   -2
-       .byte   -2
-       .byte   -2
-       .byte   -2
-       .byte   -3
-       .byte   -3
-       .byte   -3
-       .byte   -3
-       .byte   -3
-       .byte   -3
-       .byte   -3
-       .byte   -3
-       .byte   -4
-       .byte   -4
-       .byte   -4
-       .byte   -4
-       .byte   -4
-       .byte   -4
-       .byte   -4
-       .byte   -4
-       .byte   -4
-       .byte   -4
-       .byte   -4
-       .byte   -4
-       .byte   -4
-       .byte   -4
-       .byte   -4
-       .byte   -4
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -5
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-       .byte   -6
-/* Lookup table translating positive divisor to index into table of
-   normalized inverse.  N.B. the '0' entry is also the last entry of the
- previous table, and causes an unaligned access for division by zero.  */
-LOCAL(div_table_ix):
-       .byte   -6
-       .byte   -128
-       .byte   -128
-       .byte   0
-       .byte   -128
-       .byte   -64
-       .byte   0
-       .byte   64
-       .byte   -128
-       .byte   -96
-       .byte   -64
-       .byte   -32
-       .byte   0
-       .byte   32
-       .byte   64
-       .byte   96
-       .byte   -128
-       .byte   -112
-       .byte   -96
-       .byte   -80
-       .byte   -64
-       .byte   -48
-       .byte   -32
-       .byte   -16
-       .byte   0
-       .byte   16
-       .byte   32
-       .byte   48
-       .byte   64
-       .byte   80
-       .byte   96
-       .byte   112
-       .byte   -128
-       .byte   -120
-       .byte   -112
-       .byte   -104
-       .byte   -96
-       .byte   -88
-       .byte   -80
-       .byte   -72
-       .byte   -64
-       .byte   -56
-       .byte   -48
-       .byte   -40
-       .byte   -32
-       .byte   -24
-       .byte   -16
-       .byte   -8
-       .byte   0
-       .byte   8
-       .byte   16
-       .byte   24
-       .byte   32
-       .byte   40
-       .byte   48
-       .byte   56
-       .byte   64
-       .byte   72
-       .byte   80
-       .byte   88
-       .byte   96
-       .byte   104
-       .byte   112
-       .byte   120
-       .byte   -128
-       .byte   -124
-       .byte   -120
-       .byte   -116
-       .byte   -112
-       .byte   -108
-       .byte   -104
-       .byte   -100
-       .byte   -96
-       .byte   -92
-       .byte   -88
-       .byte   -84
-       .byte   -80
-       .byte   -76
-       .byte   -72
-       .byte   -68
-       .byte   -64
-       .byte   -60
-       .byte   -56
-       .byte   -52
-       .byte   -48
-       .byte   -44
-       .byte   -40
-       .byte   -36
-       .byte   -32
-       .byte   -28
-       .byte   -24
-       .byte   -20
-       .byte   -16
-       .byte   -12
-       .byte   -8
-       .byte   -4
-       .byte   0
-       .byte   4
-       .byte   8
-       .byte   12
-       .byte   16
-       .byte   20
-       .byte   24
-       .byte   28
-       .byte   32
-       .byte   36
-       .byte   40
-       .byte   44
-       .byte   48
-       .byte   52
-       .byte   56
-       .byte   60
-       .byte   64
-       .byte   68
-       .byte   72
-       .byte   76
-       .byte   80
-       .byte   84
-       .byte   88
-       .byte   92
-       .byte   96
-       .byte   100
-       .byte   104
-       .byte   108
-       .byte   112
-       .byte   116
-       .byte   120
-       .byte   124
-       .byte   -128
-/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */
-       .balign 4
-LOCAL(zero_l):
-       .long   0x0
-       .long   0xF81F81F9
-       .long   0xF07C1F08
-       .long   0xE9131AC0
-       .long   0xE1E1E1E2
-       .long   0xDAE6076C
-       .long   0xD41D41D5
-       .long   0xCD856891
-       .long   0xC71C71C8
-       .long   0xC0E07039
-       .long   0xBACF914D
-       .long   0xB4E81B4F
-       .long   0xAF286BCB
-       .long   0xA98EF607
-       .long   0xA41A41A5
-       .long   0x9EC8E952
-       .long   0x9999999A
-       .long   0x948B0FCE
-       .long   0x8F9C18FA
-       .long   0x8ACB90F7
-       .long   0x86186187
-       .long   0x81818182
-       .long   0x7D05F418
-       .long   0x78A4C818
-       .long   0x745D1746
-       .long   0x702E05C1
-       .long   0x6C16C16D
-       .long   0x68168169
-       .long   0x642C8591
-       .long   0x60581606
-       .long   0x5C9882BA
-       .long   0x58ED2309
-LOCAL(div_table_inv):
-       .long   0x55555556
-       .long   0x51D07EAF
-       .long   0x4E5E0A73
-       .long   0x4AFD6A06
-       .long   0x47AE147B
-       .long   0x446F8657
-       .long   0x41414142
-       .long   0x3E22CBCF
-       .long   0x3B13B13C
-       .long   0x38138139
-       .long   0x3521CFB3
-       .long   0x323E34A3
-       .long   0x2F684BDB
-       .long   0x2C9FB4D9
-       .long   0x29E4129F
-       .long   0x27350B89
-       .long   0x24924925
-       .long   0x21FB7813
-       .long   0x1F7047DD
-       .long   0x1CF06ADB
-       .long   0x1A7B9612
-       .long   0x18118119
-       .long   0x15B1E5F8
-       .long   0x135C8114
-       .long   0x11111112
-       .long   0xECF56BF
-       .long   0xC9714FC
-       .long   0xA6810A7
-       .long   0x8421085
-       .long   0x624DD30
-       .long   0x4104105
-       .long   0x2040811
-       /* maximum error: 0.987342 scaled: 0.921875*/
-
-       ENDFUNC(GLOBAL(sdivsi3_i4i))
-#endif /* SH3 / SH4 */
-
-#endif /* L_div_table */
-
-#ifdef L_udiv_qrnnd_16
-#if !__SHMEDIA__
-       HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
-       /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
-       /* n1 < d, but n1 might be larger than d1.  */
-       .global GLOBAL(udiv_qrnnd_16)
-       .balign 8
-GLOBAL(udiv_qrnnd_16):
-       div0u
-       cmp/hi r6,r0
-       bt .Lots
-       .rept 16
-       div1 r6,r0 
-       .endr
-       extu.w r0,r1
-       bt 0f
-       add r6,r0
-0:     rotcl r1
-       mulu.w r1,r5
-       xtrct r4,r0
-       swap.w r0,r0
-       sts macl,r2
-       cmp/hs r2,r0
-       sub r2,r0
-       bt 0f
-       addc r5,r0
-       add #-1,r1
-       bt 0f
-1:     add #-1,r1
-       rts
-       add r5,r0
-       .balign 8
-.Lots:
-       sub r5,r0
-       swap.w r4,r1
-       xtrct r0,r1
-       clrt
-       mov r1,r0
-       addc r5,r0
-       mov #-1,r1
-       SL1(bf, 1b,
-       shlr16 r1)
-0:     rts
-       nop
-       ENDFUNC(GLOBAL(udiv_qrnnd_16))
-#endif /* !__SHMEDIA__ */
-#endif /* L_udiv_qrnnd_16 */
diff --git a/gcc/config/sh/lib1funcs.h b/gcc/config/sh/lib1funcs.h

deleted file mode 100644 (file)

index af4b41c..0000000
--- a/gcc/config/sh/lib1funcs.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005, 2006, 2009
-   Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-#ifdef __ELF__
-#define LOCAL(X)       .L_##X
-#define FUNC(X)                .type X,@function
-#define HIDDEN_FUNC(X) FUNC(X); .hidden X
-#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X)
-#define ENDFUNC0(X)    .Lfe_##X: .size X,.Lfe_##X-X
-#define ENDFUNC(X)     ENDFUNC0(X)
-#else
-#define LOCAL(X)       L_##X
-#define FUNC(X)
-#define HIDDEN_FUNC(X)
-#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y)
-#define ENDFUNC(X)
-#endif
-
-#define        CONCAT(A,B)     A##B
-#define        GLOBAL0(U,X)    CONCAT(U,__##X)
-#define        GLOBAL(X)       GLOBAL0(__USER_LABEL_PREFIX__,X)
-
-#define ALIAS(X,Y)     .global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y)
-
-#if defined __SH2A__ && defined __FMOVD_ENABLED__
-#undef  FMOVD_WORKS
-#define FMOVD_WORKS
-#endif
-
-#ifdef __LITTLE_ENDIAN__
-#define DR00 fr1
-#define DR01 fr0
-#define DR20 fr3
-#define DR21 fr2
-#define DR40 fr5
-#define DR41 fr4
-#else /* !__LITTLE_ENDIAN__ */
-#define DR00 fr0
-#define DR01 fr1
-#define DR20 fr2
-#define DR21 fr3
-#define DR40 fr4
-#define DR41 fr5
-#endif /* !__LITTLE_ENDIAN__ */
-
-#ifdef __sh1__
-#define SL(branch, dest, in_slot, in_slot_arg2) \
-       in_slot, in_slot_arg2; branch dest
-#define SL1(branch, dest, in_slot) \
-       in_slot; branch dest
-#else /* ! __sh1__ */
-#define SL(branch, dest, in_slot, in_slot_arg2) \
-       branch##.s dest; in_slot, in_slot_arg2
-#define SL1(branch, dest, in_slot) \
-       branch##/s dest; in_slot
-#endif /* !__sh1__ */
diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h

index 1e654801334057d5f21938a139661859e9291f9e..cc26e05a7642d993e03b617211a85be394183486 100644 (file)
--- a/gcc/config/sh/sh.h
+++ b/gcc/config/sh/sh.h
@@ -1983,7 +1983,7 @@ struct sh_args {
     that the native compiler puts too large (> 32) immediate shift counts
     into a register and shifts by the register, letting the SH decide what
     to do instead of doing that itself.  */
-/* ??? The library routines in lib1funcs.asm truncate the shift count.
+/* ??? The library routines in lib1funcs.S truncate the shift count.
     However, the SH3 has hardware shifts that do not truncate exactly as gcc
     expects - the sign bit is significant - so it appears that we need to
     leave this zero for correct SH3 code.  */
diff --git a/gcc/config/sh/t-linux b/gcc/config/sh/t-linux

index a5c711618c639abbe0fb7fa55f5deb52d86fc9d3..2304fb176cbedd0eaa3226977ace6e7af212c19a 100644 (file)
--- a/gcc/config/sh/t-linux
+++ b/gcc/config/sh/t-linux
@@ -1,5 +1,3 @@
-LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array
-
  LIB2FUNCS_EXTRA= $(srcdir)/config/sh/linux-atomic.asm
  
  MULTILIB_DIRNAMES= 
diff --git a/gcc/config/sh/t-netbsd b/gcc/config/sh/t-netbsd

index de172d3f73fe53517a32db494f3d1e1ad00fb2ac..dea1c478cb522236b232f89c1b6eda21054b0d1f 100644 (file)
--- a/gcc/config/sh/t-netbsd
+++ b/gcc/config/sh/t-netbsd
@@ -17,6 +17,5 @@
  # <http://www.gnu.org/licenses/>.
  
  TARGET_LIBGCC2_CFLAGS = -fpic -mieee
-LIB1ASMFUNCS_CACHE = _ic_invalidate
  
  LIB2FUNCS_EXTRA=
diff --git a/gcc/config/sh/t-sh b/gcc/config/sh/t-sh

index 6eaf784e8ae0d5905c5236d7d62b8a89f3dcebe1..56ea83e0697eeed57c5e0c05278ccb1d6882d664 100644 (file)
--- a/gcc/config/sh/t-sh
+++ b/gcc/config/sh/t-sh
@@ -22,13 +22,6 @@ sh-c.o: $(srcdir)/config/sh/sh-c.c \
         $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
                 $(srcdir)/config/sh/sh-c.c
  
-LIB1ASMSRC = sh/lib1funcs.asm
-LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movmem \
-  _movmem_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
-  _div_table _udiv_qrnnd_16 \
-  $(LIB1ASMFUNCS_CACHE)
-LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array
-
  TARGET_LIBGCC2_CFLAGS = -mieee
  
  DEFAULT_ENDIAN = $(word 1,$(TM_ENDIAN_CONFIG))
diff --git a/gcc/config/sh/t-sh64 b/gcc/config/sh/t-sh64

index d88f929fd7a821dbb5d7f0699c4d1a4bfe4842db..3bd9205079b6b3af464ef69fa3f05e000c0fecfb 100644 (file)
--- a/gcc/config/sh/t-sh64
+++ b/gcc/config/sh/t-sh64
@@ -1,4 +1,4 @@
-# Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
+# Copyright (C) 2002, 2004, 2005, 2011 Free Software Foundation, Inc.
  #
  # This file is part of GCC.
  #
@@ -16,13 +16,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-LIB1ASMFUNCS = \
-  _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
-  _shcompact_call_trampoline _shcompact_return_trampoline \
-  _shcompact_incoming_args _ic_invalidate _nested_trampoline \
-  _push_pop_shmedia_regs \
-  _udivdi3 _divdi3 _umoddi3 _moddi3 _div_table
-
  MULTILIB_CPU_DIRS= $(ML_sh1) $(ML_sh2e) $(ML_sh2) $(ML_sh3e) $(ML_sh3) $(ML_sh4_nofpu) $(ML_sh4_single_only) $(ML_sh4_single) $(ML_sh4) $(ML_sh5_32media:m5-32media/=media32) $(ML_sh5_32media_nofpu:m5-32media-nofpu/=nofpu/media32) $(ML_sh5_compact:m5-compact/=compact) $(ML_sh5_compact_nofpu:m5-compact-nofpu/=nofpu/compact) $(ML_sh5_64media:m5-64media/=media64) $(ML_sh5_64media_nofpu:m5-64media-nofpu/=nofpu/media64)
  
  MULTILIB_RAW_DIRNAMES= $(MULTILIB_ENDIAN:/mb= mb) $(MULTILIB_CPU_DIRS:/=)
diff --git a/gcc/config/sparc/lb1spc.asm b/gcc/config/sparc/lb1spc.asm

deleted file mode 100644 (file)

index b60bd57..0000000
--- a/gcc/config/sparc/lb1spc.asm
+++ /dev/null
@@ -1,784 +0,0 @@
-/* This is an assembly language implementation of mulsi3, divsi3, and modsi3
-   for the sparc processor.
-
-   These routines are derived from the SPARC Architecture Manual, version 8,
-   slightly edited to match the desired calling convention, and also to
-   optimize them for our purposes.  */
-
-#ifdef L_mulsi3
-.text
-       .align 4
-       .global .umul
-       .proc 4
-.umul:
-       or      %o0, %o1, %o4   ! logical or of multiplier and multiplicand
-       mov     %o0, %y         ! multiplier to Y register
-       andncc  %o4, 0xfff, %o5 ! mask out lower 12 bits
-       be      mul_shortway    ! can do it the short way
-       andcc   %g0, %g0, %o4   ! zero the partial product and clear NV cc
-       !
-       ! long multiply
-       !
-       mulscc  %o4, %o1, %o4   ! first iteration of 33
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4   ! 32nd iteration
-       mulscc  %o4, %g0, %o4   ! last iteration only shifts
-       ! the upper 32 bits of product are wrong, but we do not care
-       retl
-       rd      %y, %o0
-       !
-       ! short multiply
-       !
-mul_shortway:
-       mulscc  %o4, %o1, %o4   ! first iteration of 13
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4
-       mulscc  %o4, %o1, %o4   ! 12th iteration
-       mulscc  %o4, %g0, %o4   ! last iteration only shifts
-       rd      %y, %o5
-       sll     %o4, 12, %o4    ! left shift partial product by 12 bits
-       srl     %o5, 20, %o5    ! right shift partial product by 20 bits
-       retl
-       or      %o5, %o4, %o0   ! merge for true product
-#endif
-
-#ifdef L_divsi3
-/*
- * Division and remainder, from Appendix E of the SPARC Version 8
- * Architecture Manual, with fixes from Gordon Irlam.
- */
-
-/*
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * m4 parameters:
- *  .div       name of function to generate
- *  div                div=div => %o0 / %o1; div=rem => %o0 % %o1
- *  true               true=true => signed; true=false => unsigned
- *
- * Algorithm parameters:
- *  N          how many bits per iteration we try to get (4)
- *  WORDSIZE   total number of bits (32)
- *
- * Derived constants:
- *  TOPBITS    number of bits in the top decade of a number
- *
- * Important variables:
- *  Q          the partial quotient under development (initially 0)
- *  R          the remainder so far, initially the dividend
- *  ITER       number of main division loop iterations required;
- *             equal to ceil(log2(quotient) / N).  Note that this
- *             is the log base (2^N) of the quotient.
- *  V          the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- *  Current estimate for non-large dividend is
- *     ceil(log2(quotient) / N) * (10 + 7N/2) + C
- *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
- *  different path, as the upper bits of the quotient must be developed
- *  one bit at a time.
- */
-        .global .udiv
-        .align 4
-        .proc 4
-        .text
-.udiv:
-         b ready_to_divide
-         mov 0, %g3             ! result is always positive
-
-        .global .div
-        .align 4
-        .proc 4
-        .text
-.div:
-       ! compute sign of result; if neither is negative, no problem
-       orcc    %o1, %o0, %g0   ! either negative?
-       bge     ready_to_divide ! no, go do the divide
-       xor     %o1, %o0, %g3   ! compute sign in any case
-       tst     %o1
-       bge     1f
-       tst     %o0
-       ! %o1 is definitely negative; %o0 might also be negative
-       bge     ready_to_divide ! if %o0 not negative...
-       sub     %g0, %o1, %o1   ! in any case, make %o1 nonneg
-1:     ! %o0 is negative, %o1 is nonnegative
-       sub     %g0, %o0, %o0   ! make %o0 nonnegative
-
-
-ready_to_divide:
-
-       ! Ready to divide.  Compute size of quotient; scale comparand.
-       orcc    %o1, %g0, %o5
-       bne     1f
-       mov     %o0, %o3
-
-       ! Divide by zero trap.  If it returns, return 0 (about as
-       ! wrong as possible, but that is what SunOS does...).
-       ta      0x2             ! ST_DIV0
-       retl
-       clr     %o0
-
-1:
-       cmp     %o3, %o5                ! if %o1 exceeds %o0, done
-       blu     got_result              ! (and algorithm fails otherwise)
-       clr     %o2
-       sethi   %hi(1 << (32 - 4 - 1)), %g1
-       cmp     %o3, %g1
-       blu     not_really_big
-       clr     %o4
-
-       ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
-       ! as our usual N-at-a-shot divide step will cause overflow and havoc.
-       ! The number of bits in the result here is N*ITER+SC, where SC <= N.
-       ! Compute ITER in an unorthodox manner: know we need to shift V into
-       ! the top decade: so do not even bother to compare to R.
-       1:
-               cmp     %o5, %g1
-               bgeu    3f
-               mov     1, %g2
-               sll     %o5, 4, %o5
-               b       1b
-               add     %o4, 1, %o4
-
-       ! Now compute %g2.
-       2:      addcc   %o5, %o5, %o5
-               bcc     not_too_big
-               add     %g2, 1, %g2
-
-               ! We get here if the %o1 overflowed while shifting.
-               ! This means that %o3 has the high-order bit set.
-               ! Restore %o5 and subtract from %o3.
-               sll     %g1, 4, %g1     ! high order bit
-               srl     %o5, 1, %o5     ! rest of %o5
-               add     %o5, %g1, %o5
-               b       do_single_div
-               sub     %g2, 1, %g2
-
-       not_too_big:
-       3:      cmp     %o5, %o3
-               blu     2b
-               nop
-               be      do_single_div
-               nop
-       /* NB: these are commented out in the V8-SPARC manual as well */
-       /* (I do not understand this) */
-       ! %o5 > %o3: went too far: back up 1 step
-       !       srl     %o5, 1, %o5
-       !       dec     %g2
-       ! do single-bit divide steps
-       !
-       ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
-       ! first divide step without thinking.  BUT, the others are conditional,
-       ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
-       ! order bit set in the first step, just falling into the regular
-       ! division loop will mess up the first time around.
-       ! So we unroll slightly...
-       do_single_div:
-               subcc   %g2, 1, %g2
-               bl      end_regular_divide
-               nop
-               sub     %o3, %o5, %o3
-               mov     1, %o2
-               b       end_single_divloop
-               nop
-       single_divloop:
-               sll     %o2, 1, %o2
-               bl      1f
-               srl     %o5, 1, %o5
-               ! %o3 >= 0
-               sub     %o3, %o5, %o3
-               b       2f
-               add     %o2, 1, %o2
-       1:      ! %o3 < 0
-               add     %o3, %o5, %o3
-               sub     %o2, 1, %o2
-       2:
-       end_single_divloop:
-               subcc   %g2, 1, %g2
-               bge     single_divloop
-               tst     %o3
-               b,a     end_regular_divide
-
-not_really_big:
-1:
-       sll     %o5, 4, %o5
-       cmp     %o5, %o3
-       bleu    1b
-       addcc   %o4, 1, %o4
-       be      got_result
-       sub     %o4, 1, %o4
-
-       tst     %o3     ! set up for initial iteration
-divloop:
-       sll     %o2, 4, %o2
-       ! depth 1, accumulated bits 0
-       bl      L1.16
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       ! depth 2, accumulated bits 1
-       bl      L2.17
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       ! depth 3, accumulated bits 3
-       bl      L3.19
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       ! depth 4, accumulated bits 7
-       bl      L4.23
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (7*2+1), %o2
-       
-L4.23:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (7*2-1), %o2
-       
-       
-L3.19:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       ! depth 4, accumulated bits 5
-       bl      L4.21
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (5*2+1), %o2
-       
-L4.21:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (5*2-1), %o2
-       
-L2.17:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       ! depth 3, accumulated bits 1
-       bl      L3.17
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       ! depth 4, accumulated bits 3
-       bl      L4.19
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (3*2+1), %o2
-       
-L4.19:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (3*2-1), %o2
-
-L3.17:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       ! depth 4, accumulated bits 1
-       bl      L4.17
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (1*2+1), %o2
-
-L4.17:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (1*2-1), %o2
-       
-L1.16:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       ! depth 2, accumulated bits -1
-       bl      L2.15
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       ! depth 3, accumulated bits -1
-       bl      L3.15
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       ! depth 4, accumulated bits -1
-       bl      L4.15
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (-1*2+1), %o2
-       
-L4.15:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (-1*2-1), %o2
-       
-L3.15:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       ! depth 4, accumulated bits -3
-       bl      L4.13
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (-3*2+1), %o2
-       
-L4.13:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (-3*2-1), %o2
-       
-L2.15:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       ! depth 3, accumulated bits -3
-       bl      L3.13
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       ! depth 4, accumulated bits -5
-       bl      L4.11
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (-5*2+1), %o2
-       
-L4.11:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (-5*2-1), %o2
-       
-L3.13:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       ! depth 4, accumulated bits -7
-       bl      L4.9
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (-7*2+1), %o2
-
-L4.9:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (-7*2-1), %o2
-       
-       9:
-end_regular_divide:
-       subcc   %o4, 1, %o4
-       bge     divloop
-       tst     %o3
-       bl,a    got_result
-       ! non-restoring fixup here (one instruction only!)
-       sub     %o2, 1, %o2
-
-
-got_result:
-       ! check to see if answer should be < 0
-       tst     %g3
-       bl,a    1f
-       sub %g0, %o2, %o2
-1:
-       retl
-       mov %o2, %o0
-#endif
-
-#ifdef L_modsi3
-/* This implementation was taken from glibc:
- *
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * Algorithm parameters:
- *  N          how many bits per iteration we try to get (4)
- *  WORDSIZE   total number of bits (32)
- *
- * Derived constants:
- *  TOPBITS    number of bits in the top decade of a number
- *
- * Important variables:
- *  Q          the partial quotient under development (initially 0)
- *  R          the remainder so far, initially the dividend
- *  ITER       number of main division loop iterations required;
- *             equal to ceil(log2(quotient) / N).  Note that this
- *             is the log base (2^N) of the quotient.
- *  V          the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- *  Current estimate for non-large dividend is
- *     ceil(log2(quotient) / N) * (10 + 7N/2) + C
- *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
- *  different path, as the upper bits of the quotient must be developed
- *  one bit at a time.
- */
-.text
-       .align 4
-       .global .urem
-       .proc 4
-.urem:
-       b       divide
-       mov     0, %g3          ! result always positive
-
-        .align 4
-       .global .rem
-       .proc 4
-.rem:
-       ! compute sign of result; if neither is negative, no problem
-       orcc    %o1, %o0, %g0   ! either negative?
-       bge     2f                      ! no, go do the divide
-       mov     %o0, %g3                ! sign of remainder matches %o0
-       tst     %o1
-       bge     1f
-       tst     %o0
-       ! %o1 is definitely negative; %o0 might also be negative
-       bge     2f                      ! if %o0 not negative...
-       sub     %g0, %o1, %o1   ! in any case, make %o1 nonneg
-1:     ! %o0 is negative, %o1 is nonnegative
-       sub     %g0, %o0, %o0   ! make %o0 nonnegative
-2:
-
-       ! Ready to divide.  Compute size of quotient; scale comparand.
-divide:
-       orcc    %o1, %g0, %o5
-       bne     1f
-       mov     %o0, %o3
-
-               ! Divide by zero trap.  If it returns, return 0 (about as
-               ! wrong as possible, but that is what SunOS does...).
-               ta      0x2   !ST_DIV0
-               retl
-               clr     %o0
-
-1:
-       cmp     %o3, %o5                ! if %o1 exceeds %o0, done
-       blu     got_result              ! (and algorithm fails otherwise)
-       clr     %o2
-       sethi   %hi(1 << (32 - 4 - 1)), %g1
-       cmp     %o3, %g1
-       blu     not_really_big
-       clr     %o4
-
-       ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
-       ! as our usual N-at-a-shot divide step will cause overflow and havoc.
-       ! The number of bits in the result here is N*ITER+SC, where SC <= N.
-       ! Compute ITER in an unorthodox manner: know we need to shift V into
-       ! the top decade: so do not even bother to compare to R.
-       1:
-               cmp     %o5, %g1
-               bgeu    3f
-               mov     1, %g2
-               sll     %o5, 4, %o5
-               b       1b
-               add     %o4, 1, %o4
-
-       ! Now compute %g2.
-       2:      addcc   %o5, %o5, %o5
-               bcc     not_too_big
-               add     %g2, 1, %g2
-
-               ! We get here if the %o1 overflowed while shifting.
-               ! This means that %o3 has the high-order bit set.
-               ! Restore %o5 and subtract from %o3.
-               sll     %g1, 4, %g1     ! high order bit
-               srl     %o5, 1, %o5             ! rest of %o5
-               add     %o5, %g1, %o5
-               b       do_single_div
-               sub     %g2, 1, %g2
-
-       not_too_big:
-       3:      cmp     %o5, %o3
-               blu     2b
-               nop
-               be      do_single_div
-               nop
-       /* NB: these are commented out in the V8-SPARC manual as well */
-       /* (I do not understand this) */
-       ! %o5 > %o3: went too far: back up 1 step
-       !       srl     %o5, 1, %o5
-       !       dec     %g2
-       ! do single-bit divide steps
-       !
-       ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
-       ! first divide step without thinking.  BUT, the others are conditional,
-       ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
-       ! order bit set in the first step, just falling into the regular
-       ! division loop will mess up the first time around.
-       ! So we unroll slightly...
-       do_single_div:
-               subcc   %g2, 1, %g2
-               bl      end_regular_divide
-               nop
-               sub     %o3, %o5, %o3
-               mov     1, %o2
-               b       end_single_divloop
-               nop
-       single_divloop:
-               sll     %o2, 1, %o2
-               bl      1f
-               srl     %o5, 1, %o5
-               ! %o3 >= 0
-               sub     %o3, %o5, %o3
-               b       2f
-               add     %o2, 1, %o2
-       1:      ! %o3 < 0
-               add     %o3, %o5, %o3
-               sub     %o2, 1, %o2
-       2:
-       end_single_divloop:
-               subcc   %g2, 1, %g2
-               bge     single_divloop
-               tst     %o3
-               b,a     end_regular_divide
-
-not_really_big:
-1:
-       sll     %o5, 4, %o5
-       cmp     %o5, %o3
-       bleu    1b
-       addcc   %o4, 1, %o4
-       be      got_result
-       sub     %o4, 1, %o4
-
-       tst     %o3     ! set up for initial iteration
-divloop:
-       sll     %o2, 4, %o2
-               ! depth 1, accumulated bits 0
-       bl      L1.16
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       ! depth 2, accumulated bits 1
-       bl      L2.17
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       ! depth 3, accumulated bits 3
-       bl      L3.19
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       ! depth 4, accumulated bits 7
-       bl      L4.23
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (7*2+1), %o2
-L4.23:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (7*2-1), %o2
-       
-L3.19:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       ! depth 4, accumulated bits 5
-       bl      L4.21
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (5*2+1), %o2
-       
-L4.21:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (5*2-1), %o2
-       
-L2.17:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       ! depth 3, accumulated bits 1
-       bl      L3.17
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       ! depth 4, accumulated bits 3
-       bl      L4.19
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (3*2+1), %o2
-       
-L4.19:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (3*2-1), %o2
-       
-L3.17:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       ! depth 4, accumulated bits 1
-       bl      L4.17
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (1*2+1), %o2
-       
-L4.17:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (1*2-1), %o2
-       
-L1.16:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       ! depth 2, accumulated bits -1
-       bl      L2.15
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       ! depth 3, accumulated bits -1
-       bl      L3.15
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       ! depth 4, accumulated bits -1
-       bl      L4.15
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (-1*2+1), %o2
-       
-L4.15:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (-1*2-1), %o2
-       
-L3.15:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       ! depth 4, accumulated bits -3
-       bl      L4.13
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (-3*2+1), %o2
-       
-L4.13:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (-3*2-1), %o2
-       
-L2.15:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       ! depth 3, accumulated bits -3
-       bl      L3.13
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       ! depth 4, accumulated bits -5
-       bl      L4.11
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (-5*2+1), %o2
-       
-L4.11:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (-5*2-1), %o2
-       
-L3.13:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       ! depth 4, accumulated bits -7
-       bl      L4.9
-       srl     %o5,1,%o5
-       ! remainder is positive
-       subcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (-7*2+1), %o2
-       
-L4.9:
-       ! remainder is negative
-       addcc   %o3,%o5,%o3
-       b       9f
-       add     %o2, (-7*2-1), %o2
-       
-       9:
-end_regular_divide:
-       subcc   %o4, 1, %o4
-       bge     divloop
-       tst     %o3
-       bl,a    got_result
-       ! non-restoring fixup here (one instruction only!)
-       add     %o3, %o1, %o3
-
-got_result:
-       ! check to see if answer should be < 0
-       tst     %g3
-       bl,a    1f
-       sub %g0, %o3, %o3
-1:
-       retl
-       mov %o3, %o0
-
-#endif
-
diff --git a/gcc/config/sparc/lb1spl.asm b/gcc/config/sparc/lb1spl.asm

deleted file mode 100644 (file)

index 973401f..0000000
--- a/gcc/config/sparc/lb1spl.asm
+++ /dev/null
@@ -1,246 +0,0 @@
-/* This is an assembly language implementation of mulsi3, divsi3, and modsi3
-   for the sparclite processor.
-
-   These routines are all from the SPARClite User's Guide, slightly edited
-   to match the desired calling convention, and also to optimize them.  */
-
-#ifdef L_udivsi3
-.text
-       .align 4
-       .global .udiv
-       .proc   04
-.udiv:
-       wr      %g0,%g0,%y      ! Not a delayed write for sparclite
-       tst     %g0
-       divscc  %o0,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       retl
-       divscc  %g1,%o1,%o0
-#endif
-
-#ifdef L_umodsi3
-.text
-       .align 4
-       .global .urem
-       .proc   04
-.urem:
-       wr      %g0,%g0,%y      ! Not a delayed write for sparclite
-       tst     %g0
-       divscc  %o0,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       divscc  %g1,%o1,%g1
-       bl 1f
-       rd      %y,%o0
-       retl
-       nop
-1:     retl
-       add     %o0,%o1,%o0
-#endif
-
-#ifdef L_divsi3
-.text
-       .align 4
-       .global .div
-       .proc   04
-! ??? This routine could be made faster if was optimized, and if it was
-! rewritten to only calculate the quotient.
-.div:
-       wr      %g0,%g0,%y      ! Not a delayed write for sparclite
-       mov     %o1,%o4
-       tst     %o1
-       bl,a    1f
-       sub     %g0,%o4,%o4
-1:     tst     %o0
-       bl,a    2f
-       mov     -1,%y
-2:     divscc  %o0,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       be      6f
-       mov     %y,%o3
-       bg      4f
-       addcc   %o3,%o4,%g0
-       be,a    6f
-       mov     %g0,%o3
-       tst     %o0
-       bl      5f
-       tst     %g1
-       ba      5f
-       add     %o3,%o4,%o3
-4:     subcc   %o3,%o4,%g0
-       be,a    6f
-       mov     %g0,%o3
-       tst     %o0
-       bge     5f
-       tst     %g1
-       sub     %o3,%o4,%o3
-5:     bl,a    6f
-       add     %g1,1,%g1
-6:     tst     %o1
-       bl,a    7f
-       sub     %g0,%g1,%g1
-7:     retl
-       mov     %g1,%o0         ! Quotient is in %g1.
-#endif
-
-#ifdef L_modsi3
-.text
-       .align 4
-       .global .rem
-       .proc   04
-! ??? This routine could be made faster if was optimized, and if it was
-! rewritten to only calculate the remainder.
-.rem:
-       wr      %g0,%g0,%y      ! Not a delayed write for sparclite
-       mov     %o1,%o4
-       tst     %o1
-       bl,a    1f
-       sub     %g0,%o4,%o4
-1:     tst     %o0
-       bl,a    2f
-       mov     -1,%y
-2:     divscc  %o0,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       divscc  %g1,%o4,%g1
-       be      6f
-       mov     %y,%o3
-       bg      4f
-       addcc   %o3,%o4,%g0
-       be,a    6f
-       mov     %g0,%o3
-       tst     %o0
-       bl      5f
-       tst     %g1
-       ba      5f
-       add     %o3,%o4,%o3
-4:     subcc   %o3,%o4,%g0
-       be,a    6f
-       mov     %g0,%o3
-       tst     %o0
-       bge     5f
-       tst     %g1
-       sub     %o3,%o4,%o3
-5:     bl,a    6f
-       add     %g1,1,%g1
-6:     tst     %o1
-       bl,a    7f
-       sub     %g0,%g1,%g1
-7:     retl
-       mov     %o3,%o0         ! Remainder is in %o3.
-#endif
diff --git a/gcc/config/sparc/t-elf b/gcc/config/sparc/t-elf

index 7073bcb7721cb3562eba689636dfeba1d90f6110..be926585481a71c9be7b6d5dfb361d171c0eef59 100644 (file)
--- a/gcc/config/sparc/t-elf
+++ b/gcc/config/sparc/t-elf
@@ -17,9 +17,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-LIB1ASMSRC = sparc/lb1spc.asm
-LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3
-
  MULTILIB_OPTIONS = msoft-float mcpu=v8 mflat
  MULTILIB_DIRNAMES = soft v8 flat
  MULTILIB_MATCHES = msoft-float=mno-fpu
diff --git a/gcc/config/sparc/t-leon b/gcc/config/sparc/t-leon

index 4f9d0a9e7974418ea4685469173313a86aa3044a..8e5e30f7ff7ec924515f7e6d7eaa08528050b481 100644 (file)
--- a/gcc/config/sparc/t-leon
+++ b/gcc/config/sparc/t-leon
@@ -16,9 +16,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-LIB1ASMSRC = sparc/lb1spc.asm
-LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3
-
  # Multilibs for LEON
  # LEON is a SPARC-V8, but the AT697 implementation has a bug in the
  # V8-specific instructions.
diff --git a/gcc/config/spu/t-spu-elf b/gcc/config/spu/t-spu-elf

index b1660353ee65b1a76a78ffffbfc1a93701d2bc21..45802499525c50d7df006fee15c6b35ac9164498 100644 (file)
--- a/gcc/config/spu/t-spu-elf
+++ b/gcc/config/spu/t-spu-elf
@@ -15,10 +15,6 @@
  #  along with GCC; see the file COPYING3.  If not see
  #  <http://www.gnu.org/licenses/>.
  
-# Suppress building libgcc1.a
-LIBGCC1 =
-CROSS_LIBGCC1 =
-
  TARGET_LIBGCC2_CFLAGS = -fPIC -mwarn-reloc -D__IN_LIBGCC2
  
  # We exclude those because the libgcc2.c default versions do not support
diff --git a/gcc/config/v850/lib1funcs.asm b/gcc/config/v850/lib1funcs.asm

deleted file mode 100644 (file)

index 04e9b1e..0000000
--- a/gcc/config/v850/lib1funcs.asm
+++ /dev/null
@@ -1,2330 +0,0 @@
-/* libgcc routines for NEC V850.
-   Copyright (C) 1996, 1997, 2002, 2005, 2009, 2010
-   Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-#ifdef L_mulsi3
-       .text
-       .globl ___mulsi3
-       .type  ___mulsi3,@function
-___mulsi3:
-#ifdef __v850__        
-/*
-   #define SHIFT 12
-   #define MASK ((1 << SHIFT) - 1)
-    
-   #define STEP(i, j)                               \
-   ({                                               \
-       short a_part = (a >> (i)) & MASK;            \
-       short b_part = (b >> (j)) & MASK;            \
-       int res = (((int) a_part) * ((int) b_part)); \
-       res;                                         \
-   })
-  
-   int
-   __mulsi3 (unsigned a, unsigned b)
-   {
-      return STEP (0, 0) +
-          ((STEP (SHIFT, 0) + STEP (0, SHIFT)) << SHIFT) +
-          ((STEP (0, 2 * SHIFT) + STEP (SHIFT, SHIFT) + STEP (2 * SHIFT, 0))
-           << (2 * SHIFT));
-   }
-*/
-        mov   r6, r14
-        movea lo(32767), r0, r10
-        and   r10, r14
-        mov   r7,  r15
-        and   r10, r15
-        shr   15,  r6
-        mov   r6,  r13
-        and   r10, r13
-        shr   15,  r7
-        mov   r7,  r12
-        and   r10, r12
-        shr   15,  r6
-        shr   15,  r7
-        mov   r14, r10
-        mulh  r15, r10
-        mov   r14, r11
-        mulh  r12, r11
-        mov   r13, r16
-        mulh  r15, r16
-        mulh  r14, r7
-        mulh  r15, r6
-        add   r16, r11
-        mulh  r13, r12
-        shl   15,  r11
-        add   r11, r10
-        add   r12, r7
-        add   r6,  r7
-        shl   30,  r7
-        add   r7,  r10
-        jmp   [r31]
-#endif /* __v850__ */
-#if defined(__v850e__) || defined(__v850ea__) || defined(__v850e2__) || defined(__v850e2v3__)
-        /* This routine is almost unneccesarry because gcc
-           generates the MUL instruction for the RTX mulsi3.
-           But if someone wants to link his application with
-           previsously compiled v850 objects then they will 
-          need this function.  */
- 
-        /* It isn't good to put the inst sequence as below;
-              mul r7, r6,
-              mov r6, r10, r0
-           In this case, there is a RAW hazard between them.
-           MUL inst takes 2 cycle in EX stage, then MOV inst
-           must wait 1cycle.  */
-        mov   r7, r10
-        mul   r6, r10, r0
-        jmp   [r31]
-#endif /* __v850e__ */
-       .size ___mulsi3,.-___mulsi3
-#endif /* L_mulsi3 */
-
-
-#ifdef L_udivsi3
-       .text
-       .global ___udivsi3
-       .type   ___udivsi3,@function
-___udivsi3:
-#ifdef __v850__
-       mov 1,r12
-       mov 0,r10
-       cmp r6,r7
-       bnl .L12
-       movhi hi(-2147483648),r0,r13
-       cmp r0,r7
-       blt .L12
-.L4:
-       shl 1,r7
-       shl 1,r12
-       cmp r6,r7
-       bnl .L12
-       cmp r0,r12
-       be .L8
-       mov r7,r19
-       and r13,r19
-       be .L4
-       br .L12
-.L9:
-       cmp r7,r6
-       bl .L10
-       sub r7,r6
-       or r12,r10
-.L10:
-       shr 1,r12
-       shr 1,r7
-.L12:
-       cmp r0,r12
-       bne .L9
-.L8:
-       jmp [r31]
-
-#else /* defined(__v850e__) */
-
-       /* See comments at end of __mulsi3.  */
-       mov   r6, r10   
-       divu  r7, r10, r0
-       jmp   [r31]             
-
-#endif /* __v850e__ */
-
-       .size ___udivsi3,.-___udivsi3
-#endif
-
-#ifdef L_divsi3
-       .text
-       .globl ___divsi3
-       .type  ___divsi3,@function
-___divsi3:
-#ifdef __v850__
-       add -8,sp
-       st.w r31,4[sp]
-       st.w r22,0[sp]
-       mov 1,r22
-       tst r7,r7
-       bp .L3
-       subr r0,r7
-       subr r0,r22
-.L3:
-       tst r6,r6
-       bp .L4
-       subr r0,r6
-       subr r0,r22
-.L4:
-       jarl ___udivsi3,r31
-       cmp r0,r22
-       bp .L7
-       subr r0,r10
-.L7:
-       ld.w 0[sp],r22
-       ld.w 4[sp],r31
-       add 8,sp
-       jmp [r31]
-
-#else /* defined(__v850e__) */
-
-       /* See comments at end of __mulsi3.  */
-       mov   r6, r10
-       div   r7, r10, r0
-       jmp   [r31]
-
-#endif /* __v850e__ */
-
-       .size ___divsi3,.-___divsi3
-#endif
-
-#ifdef  L_umodsi3
-       .text
-       .globl ___umodsi3
-       .type  ___umodsi3,@function
-___umodsi3:
-#ifdef __v850__
-       add -12,sp
-       st.w r31,8[sp]
-       st.w r7,4[sp]
-       st.w r6,0[sp]
-       jarl ___udivsi3,r31
-       ld.w 4[sp],r7
-       mov r10,r6
-       jarl ___mulsi3,r31
-       ld.w 0[sp],r6
-       subr r6,r10
-       ld.w 8[sp],r31
-       add 12,sp
-       jmp [r31]
-
-#else /* defined(__v850e__) */
-
-       /* See comments at end of __mulsi3.  */
-       divu  r7, r6, r10
-       jmp   [r31]
-
-#endif /* __v850e__ */
-
-       .size ___umodsi3,.-___umodsi3
-#endif /* L_umodsi3 */
-
-#ifdef  L_modsi3
-       .text
-       .globl ___modsi3
-       .type  ___modsi3,@function
-___modsi3:
-#ifdef __v850__        
-       add -12,sp
-       st.w r31,8[sp]
-       st.w r7,4[sp]
-       st.w r6,0[sp]
-       jarl ___divsi3,r31
-       ld.w 4[sp],r7
-       mov r10,r6
-       jarl ___mulsi3,r31
-       ld.w 0[sp],r6
-       subr r6,r10
-       ld.w 8[sp],r31
-       add 12,sp
-       jmp [r31]
-
-#else /* defined(__v850e__) */
-
-       /* See comments at end of __mulsi3.  */
-       div  r7, r6, r10
-       jmp [r31]
-
-#endif /* __v850e__ */
-
-       .size ___modsi3,.-___modsi3
-#endif /* L_modsi3 */
-
-#ifdef L_save_2
-       .text
-       .align  2
-       .globl  __save_r2_r29
-       .type   __save_r2_r29,@function
-       /* Allocate space and save registers 2, 20 .. 29 on the stack.  */
-       /* Called via:  jalr __save_r2_r29,r10.  */
-__save_r2_r29:
-#ifdef __EP__
-       mov     ep,r1
-       addi    -44,sp,sp
-       mov     sp,ep
-       sst.w   r29,0[ep]
-       sst.w   r28,4[ep]
-       sst.w   r27,8[ep]
-       sst.w   r26,12[ep]
-       sst.w   r25,16[ep]
-       sst.w   r24,20[ep]
-       sst.w   r23,24[ep]
-       sst.w   r22,28[ep]
-       sst.w   r21,32[ep]
-       sst.w   r20,36[ep]
-       sst.w   r2,40[ep]
-       mov     r1,ep
-#else
-       addi    -44,sp,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       st.w    r27,8[sp]
-       st.w    r26,12[sp]
-       st.w    r25,16[sp]
-       st.w    r24,20[sp]
-       st.w    r23,24[sp]
-       st.w    r22,28[sp]
-       st.w    r21,32[sp]
-       st.w    r20,36[sp]
-       st.w    r2,40[sp]
-#endif
-       jmp     [r10]
-       .size   __save_r2_r29,.-__save_r2_r29
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r2_r29.  */
-       .align  2
-       .globl  __return_r2_r29
-       .type   __return_r2_r29,@function
-__return_r2_r29:
-#ifdef __EP__
-       mov     ep,r1
-       mov     sp,ep
-       sld.w   0[ep],r29
-       sld.w   4[ep],r28
-       sld.w   8[ep],r27
-       sld.w   12[ep],r26
-       sld.w   16[ep],r25
-       sld.w   20[ep],r24
-       sld.w   24[ep],r23
-       sld.w   28[ep],r22
-       sld.w   32[ep],r21
-       sld.w   36[ep],r20
-       sld.w   40[ep],r2
-       addi    44,sp,sp
-       mov     r1,ep
-#else
-       ld.w    0[sp],r29
-       ld.w    4[sp],r28
-       ld.w    8[sp],r27
-       ld.w    12[sp],r26
-       ld.w    16[sp],r25
-       ld.w    20[sp],r24
-       ld.w    24[sp],r23
-       ld.w    28[sp],r22
-       ld.w    32[sp],r21
-       ld.w    36[sp],r20
-       ld.w    40[sp],r2
-       addi    44,sp,sp
-#endif
-       jmp     [r31]
-       .size   __return_r2_r29,.-__return_r2_r29
-#endif /* L_save_2 */
-
-#ifdef L_save_20
-       .text
-       .align  2
-       .globl  __save_r20_r29
-       .type   __save_r20_r29,@function
-       /* Allocate space and save registers 20 .. 29 on the stack.  */
-       /* Called via:  jalr __save_r20_r29,r10.  */
-__save_r20_r29:
-#ifdef __EP__
-       mov     ep,r1
-       addi    -40,sp,sp
-       mov     sp,ep
-       sst.w   r29,0[ep]
-       sst.w   r28,4[ep]
-       sst.w   r27,8[ep]
-       sst.w   r26,12[ep]
-       sst.w   r25,16[ep]
-       sst.w   r24,20[ep]
-       sst.w   r23,24[ep]
-       sst.w   r22,28[ep]
-       sst.w   r21,32[ep]
-       sst.w   r20,36[ep]
-       mov     r1,ep
-#else
-       addi    -40,sp,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       st.w    r27,8[sp]
-       st.w    r26,12[sp]
-       st.w    r25,16[sp]
-       st.w    r24,20[sp]
-       st.w    r23,24[sp]
-       st.w    r22,28[sp]
-       st.w    r21,32[sp]
-       st.w    r20,36[sp]
-#endif
-       jmp     [r10]
-       .size   __save_r20_r29,.-__save_r20_r29
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r20_r29.  */
-       .align  2
-       .globl  __return_r20_r29
-       .type   __return_r20_r29,@function
-__return_r20_r29:
-#ifdef __EP__
-       mov     ep,r1
-       mov     sp,ep
-       sld.w   0[ep],r29
-       sld.w   4[ep],r28
-       sld.w   8[ep],r27
-       sld.w   12[ep],r26
-       sld.w   16[ep],r25
-       sld.w   20[ep],r24
-       sld.w   24[ep],r23
-       sld.w   28[ep],r22
-       sld.w   32[ep],r21
-       sld.w   36[ep],r20
-       addi    40,sp,sp
-       mov     r1,ep
-#else
-       ld.w    0[sp],r29
-       ld.w    4[sp],r28
-       ld.w    8[sp],r27
-       ld.w    12[sp],r26
-       ld.w    16[sp],r25
-       ld.w    20[sp],r24
-       ld.w    24[sp],r23
-       ld.w    28[sp],r22
-       ld.w    32[sp],r21
-       ld.w    36[sp],r20
-       addi    40,sp,sp
-#endif
-       jmp     [r31]
-       .size   __return_r20_r29,.-__return_r20_r29
-#endif /* L_save_20 */
-
-#ifdef L_save_21
-       .text
-       .align  2
-       .globl  __save_r21_r29
-       .type   __save_r21_r29,@function
-       /* Allocate space and save registers 21 .. 29 on the stack.  */
-       /* Called via:  jalr __save_r21_r29,r10.  */
-__save_r21_r29:
-#ifdef __EP__
-       mov     ep,r1
-       addi    -36,sp,sp
-       mov     sp,ep
-       sst.w   r29,0[ep]
-       sst.w   r28,4[ep]
-       sst.w   r27,8[ep]
-       sst.w   r26,12[ep]
-       sst.w   r25,16[ep]
-       sst.w   r24,20[ep]
-       sst.w   r23,24[ep]
-       sst.w   r22,28[ep]
-       sst.w   r21,32[ep]
-       mov     r1,ep
-#else
-       addi    -36,sp,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       st.w    r27,8[sp]
-       st.w    r26,12[sp]
-       st.w    r25,16[sp]
-       st.w    r24,20[sp]
-       st.w    r23,24[sp]
-       st.w    r22,28[sp]
-       st.w    r21,32[sp]
-#endif
-       jmp     [r10]
-       .size   __save_r21_r29,.-__save_r21_r29
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r21_r29.  */
-       .align  2
-       .globl  __return_r21_r29
-       .type   __return_r21_r29,@function
-__return_r21_r29:
-#ifdef __EP__
-       mov     ep,r1
-       mov     sp,ep
-       sld.w   0[ep],r29
-       sld.w   4[ep],r28
-       sld.w   8[ep],r27
-       sld.w   12[ep],r26
-       sld.w   16[ep],r25
-       sld.w   20[ep],r24
-       sld.w   24[ep],r23
-       sld.w   28[ep],r22
-       sld.w   32[ep],r21
-       addi    36,sp,sp
-       mov     r1,ep
-#else
-       ld.w    0[sp],r29
-       ld.w    4[sp],r28
-       ld.w    8[sp],r27
-       ld.w    12[sp],r26
-       ld.w    16[sp],r25
-       ld.w    20[sp],r24
-       ld.w    24[sp],r23
-       ld.w    28[sp],r22
-       ld.w    32[sp],r21
-       addi    36,sp,sp
-#endif
-       jmp     [r31]
-       .size   __return_r21_r29,.-__return_r21_r29
-#endif /* L_save_21 */
-
-#ifdef L_save_22
-       .text
-       .align  2
-       .globl  __save_r22_r29
-       .type   __save_r22_r29,@function
-       /* Allocate space and save registers 22 .. 29 on the stack.  */
-       /* Called via:  jalr __save_r22_r29,r10.  */
-__save_r22_r29:
-#ifdef __EP__
-       mov     ep,r1
-       addi    -32,sp,sp
-       mov     sp,ep
-       sst.w   r29,0[ep]
-       sst.w   r28,4[ep]
-       sst.w   r27,8[ep]
-       sst.w   r26,12[ep]
-       sst.w   r25,16[ep]
-       sst.w   r24,20[ep]
-       sst.w   r23,24[ep]
-       sst.w   r22,28[ep]
-       mov     r1,ep
-#else
-       addi    -32,sp,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       st.w    r27,8[sp]
-       st.w    r26,12[sp]
-       st.w    r25,16[sp]
-       st.w    r24,20[sp]
-       st.w    r23,24[sp]
-       st.w    r22,28[sp]
-#endif
-       jmp     [r10]
-       .size   __save_r22_r29,.-__save_r22_r29
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r22_r29.  */
-       .align  2
-       .globl  __return_r22_r29
-       .type   __return_r22_r29,@function
-__return_r22_r29:
-#ifdef __EP__
-       mov     ep,r1
-       mov     sp,ep
-       sld.w   0[ep],r29
-       sld.w   4[ep],r28
-       sld.w   8[ep],r27
-       sld.w   12[ep],r26
-       sld.w   16[ep],r25
-       sld.w   20[ep],r24
-       sld.w   24[ep],r23
-       sld.w   28[ep],r22
-       addi    32,sp,sp
-       mov     r1,ep
-#else
-       ld.w    0[sp],r29
-       ld.w    4[sp],r28
-       ld.w    8[sp],r27
-       ld.w    12[sp],r26
-       ld.w    16[sp],r25
-       ld.w    20[sp],r24
-       ld.w    24[sp],r23
-       ld.w    28[sp],r22
-       addi    32,sp,sp
-#endif
-       jmp     [r31]
-       .size   __return_r22_r29,.-__return_r22_r29
-#endif /* L_save_22 */
-
-#ifdef L_save_23
-       .text
-       .align  2
-       .globl  __save_r23_r29
-       .type   __save_r23_r29,@function
-       /* Allocate space and save registers 23 .. 29 on the stack.  */
-       /* Called via:  jalr __save_r23_r29,r10.  */
-__save_r23_r29:
-#ifdef __EP__
-       mov     ep,r1
-       addi    -28,sp,sp
-       mov     sp,ep
-       sst.w   r29,0[ep]
-       sst.w   r28,4[ep]
-       sst.w   r27,8[ep]
-       sst.w   r26,12[ep]
-       sst.w   r25,16[ep]
-       sst.w   r24,20[ep]
-       sst.w   r23,24[ep]
-       mov     r1,ep
-#else
-       addi    -28,sp,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       st.w    r27,8[sp]
-       st.w    r26,12[sp]
-       st.w    r25,16[sp]
-       st.w    r24,20[sp]
-       st.w    r23,24[sp]
-#endif
-       jmp     [r10]
-       .size   __save_r23_r29,.-__save_r23_r29
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r23_r29.  */
-       .align  2
-       .globl  __return_r23_r29
-       .type   __return_r23_r29,@function
-__return_r23_r29:
-#ifdef __EP__
-       mov     ep,r1
-       mov     sp,ep
-       sld.w   0[ep],r29
-       sld.w   4[ep],r28
-       sld.w   8[ep],r27
-       sld.w   12[ep],r26
-       sld.w   16[ep],r25
-       sld.w   20[ep],r24
-       sld.w   24[ep],r23
-       addi    28,sp,sp
-       mov     r1,ep
-#else
-       ld.w    0[sp],r29
-       ld.w    4[sp],r28
-       ld.w    8[sp],r27
-       ld.w    12[sp],r26
-       ld.w    16[sp],r25
-       ld.w    20[sp],r24
-       ld.w    24[sp],r23
-       addi    28,sp,sp
-#endif
-       jmp     [r31]
-       .size   __return_r23_r29,.-__return_r23_r29
-#endif /* L_save_23 */
-
-#ifdef L_save_24
-       .text
-       .align  2
-       .globl  __save_r24_r29
-       .type   __save_r24_r29,@function
-       /* Allocate space and save registers 24 .. 29 on the stack.  */
-       /* Called via:  jalr __save_r24_r29,r10.  */
-__save_r24_r29:
-#ifdef __EP__
-       mov     ep,r1
-       addi    -24,sp,sp
-       mov     sp,ep
-       sst.w   r29,0[ep]
-       sst.w   r28,4[ep]
-       sst.w   r27,8[ep]
-       sst.w   r26,12[ep]
-       sst.w   r25,16[ep]
-       sst.w   r24,20[ep]
-       mov     r1,ep
-#else
-       addi    -24,sp,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       st.w    r27,8[sp]
-       st.w    r26,12[sp]
-       st.w    r25,16[sp]
-       st.w    r24,20[sp]
-#endif
-       jmp     [r10]
-       .size   __save_r24_r29,.-__save_r24_r29
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r24_r29.  */
-       .align  2
-       .globl  __return_r24_r29
-       .type   __return_r24_r29,@function
-__return_r24_r29:
-#ifdef __EP__
-       mov     ep,r1
-       mov     sp,ep
-       sld.w   0[ep],r29
-       sld.w   4[ep],r28
-       sld.w   8[ep],r27
-       sld.w   12[ep],r26
-       sld.w   16[ep],r25
-       sld.w   20[ep],r24
-       addi    24,sp,sp
-       mov     r1,ep
-#else
-       ld.w    0[sp],r29
-       ld.w    4[sp],r28
-       ld.w    8[sp],r27
-       ld.w    12[sp],r26
-       ld.w    16[sp],r25
-       ld.w    20[sp],r24
-       addi    24,sp,sp
-#endif
-       jmp     [r31]
-       .size   __return_r24_r29,.-__return_r24_r29
-#endif /* L_save_24 */
-
-#ifdef L_save_25
-       .text
-       .align  2
-       .globl  __save_r25_r29
-       .type   __save_r25_r29,@function
-       /* Allocate space and save registers 25 .. 29 on the stack.  */
-       /* Called via:  jalr __save_r25_r29,r10.  */
-__save_r25_r29:
-#ifdef __EP__
-       mov     ep,r1
-       addi    -20,sp,sp
-       mov     sp,ep
-       sst.w   r29,0[ep]
-       sst.w   r28,4[ep]
-       sst.w   r27,8[ep]
-       sst.w   r26,12[ep]
-       sst.w   r25,16[ep]
-       mov     r1,ep
-#else
-       addi    -20,sp,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       st.w    r27,8[sp]
-       st.w    r26,12[sp]
-       st.w    r25,16[sp]
-#endif
-       jmp     [r10]
-       .size   __save_r25_r29,.-__save_r25_r29
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r25_r29.  */
-       .align  2
-       .globl  __return_r25_r29
-       .type   __return_r25_r29,@function
-__return_r25_r29:
-#ifdef __EP__
-       mov     ep,r1
-       mov     sp,ep
-       sld.w   0[ep],r29
-       sld.w   4[ep],r28
-       sld.w   8[ep],r27
-       sld.w   12[ep],r26
-       sld.w   16[ep],r25
-       addi    20,sp,sp
-       mov     r1,ep
-#else
-       ld.w    0[ep],r29
-       ld.w    4[ep],r28
-       ld.w    8[ep],r27
-       ld.w    12[ep],r26
-       ld.w    16[ep],r25
-       addi    20,sp,sp
-#endif
-       jmp     [r31]
-       .size   __return_r25_r29,.-__return_r25_r29
-#endif /* L_save_25 */
-
-#ifdef L_save_26
-       .text
-       .align  2
-       .globl  __save_r26_r29
-       .type   __save_r26_r29,@function
-       /* Allocate space and save registers 26 .. 29 on the stack.  */
-       /* Called via:  jalr __save_r26_r29,r10.  */
-__save_r26_r29:
-#ifdef __EP__
-       mov     ep,r1
-       add     -16,sp
-       mov     sp,ep
-       sst.w   r29,0[ep]
-       sst.w   r28,4[ep]
-       sst.w   r27,8[ep]
-       sst.w   r26,12[ep]
-       mov     r1,ep
-#else
-       add     -16,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       st.w    r27,8[sp]
-       st.w    r26,12[sp]
-#endif
-       jmp     [r10]
-       .size   __save_r26_r29,.-__save_r26_r29
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r26_r29.  */
-       .align  2
-       .globl  __return_r26_r29
-       .type   __return_r26_r29,@function
-__return_r26_r29:
-#ifdef __EP__
-       mov     ep,r1
-       mov     sp,ep
-       sld.w   0[ep],r29
-       sld.w   4[ep],r28
-       sld.w   8[ep],r27
-       sld.w   12[ep],r26
-       addi    16,sp,sp
-       mov     r1,ep
-#else
-       ld.w    0[sp],r29
-       ld.w    4[sp],r28
-       ld.w    8[sp],r27
-       ld.w    12[sp],r26
-       addi    16,sp,sp
-#endif
-       jmp     [r31]
-       .size   __return_r26_r29,.-__return_r26_r29
-#endif /* L_save_26 */
-
-#ifdef L_save_27
-       .text
-       .align  2
-       .globl  __save_r27_r29
-       .type   __save_r27_r29,@function
-       /* Allocate space and save registers 27 .. 29 on the stack.  */
-       /* Called via:  jalr __save_r27_r29,r10.  */
-__save_r27_r29:
-       add     -12,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       st.w    r27,8[sp]
-       jmp     [r10]
-       .size   __save_r27_r29,.-__save_r27_r29
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r27_r29.  */
-       .align  2
-       .globl  __return_r27_r29
-       .type   __return_r27_r29,@function
-__return_r27_r29:
-       ld.w    0[sp],r29
-       ld.w    4[sp],r28
-       ld.w    8[sp],r27
-       add     12,sp
-       jmp     [r31]
-       .size   __return_r27_r29,.-__return_r27_r29
-#endif /* L_save_27 */
-
-#ifdef L_save_28
-       .text
-       .align  2
-       .globl  __save_r28_r29
-       .type   __save_r28_r29,@function
-       /* Allocate space and save registers 28,29 on the stack.  */
-       /* Called via:  jalr __save_r28_r29,r10.  */
-__save_r28_r29:
-       add     -8,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       jmp     [r10]
-       .size   __save_r28_r29,.-__save_r28_r29
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r28_r29.  */
-       .align  2
-       .globl  __return_r28_r29
-       .type   __return_r28_r29,@function
-__return_r28_r29:
-       ld.w    0[sp],r29
-       ld.w    4[sp],r28
-       add     8,sp
-       jmp     [r31]
-       .size   __return_r28_r29,.-__return_r28_r29
-#endif /* L_save_28 */
-
-#ifdef L_save_29
-       .text
-       .align  2
-       .globl  __save_r29
-       .type   __save_r29,@function
-       /* Allocate space and save register 29 on the stack.  */
-       /* Called via:  jalr __save_r29,r10.  */
-__save_r29:
-       add     -4,sp
-       st.w    r29,0[sp]
-       jmp     [r10]
-       .size   __save_r29,.-__save_r29
-
-       /* Restore saved register 29, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r29.  */
-       .align  2
-       .globl  __return_r29
-       .type   __return_r29,@function
-__return_r29:
-       ld.w    0[sp],r29
-       add     4,sp
-       jmp     [r31]
-       .size   __return_r29,.-__return_r29
-#endif /* L_save_28 */
-
-#ifdef L_save_2c
-       .text
-       .align  2
-       .globl  __save_r2_r31
-       .type   __save_r2_r31,@function
-       /* Allocate space and save registers 20 .. 29, 31 on the stack.  */
-       /* Also allocate space for the argument save area.  */
-       /* Called via:  jalr __save_r2_r31,r10.  */
-__save_r2_r31:
-#ifdef __EP__
-       mov     ep,r1
-       addi    -48,sp,sp
-       mov     sp,ep
-       sst.w   r29,0[ep]
-       sst.w   r28,4[ep]
-       sst.w   r27,8[ep]
-       sst.w   r26,12[ep]
-       sst.w   r25,16[ep]
-       sst.w   r24,20[ep]
-       sst.w   r23,24[ep]
-       sst.w   r22,28[ep]
-       sst.w   r21,32[ep]
-       sst.w   r20,36[ep]
-       sst.w   r2,40[ep]
-       sst.w   r31,44[ep]
-       mov     r1,ep
-#else
-       addi    -48,sp,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       st.w    r27,8[sp]
-       st.w    r26,12[sp]
-       st.w    r25,16[sp]
-       st.w    r24,20[sp]
-       st.w    r23,24[sp]
-       st.w    r22,28[sp]
-       st.w    r21,32[sp]
-       st.w    r20,36[sp]
-       st.w    r2,40[sp]
-       st.w    r31,44[sp]
-#endif
-       jmp     [r10]
-       .size   __save_r2_r31,.-__save_r2_r31
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r20_r31.  */
-       .align  2
-       .globl  __return_r2_r31
-       .type   __return_r2_r31,@function
-__return_r2_r31:
-#ifdef __EP__
-       mov     ep,r1
-       mov     sp,ep
-       sld.w   0[ep],r29
-       sld.w   4[ep],r28
-       sld.w   8[ep],r27
-       sld.w   12[ep],r26
-       sld.w   16[ep],r25
-       sld.w   20[ep],r24
-       sld.w   24[ep],r23
-       sld.w   28[ep],r22
-       sld.w   32[ep],r21
-       sld.w   36[ep],r20
-       sld.w   40[ep],r2
-       sld.w   44[ep],r31
-       addi    48,sp,sp
-       mov     r1,ep
-#else
-       ld.w    44[sp],r29
-       ld.w    40[sp],r28
-       ld.w    36[sp],r27
-       ld.w    32[sp],r26
-       ld.w    28[sp],r25
-       ld.w    24[sp],r24
-       ld.w    20[sp],r23
-       ld.w    16[sp],r22
-       ld.w    12[sp],r21
-       ld.w    8[sp],r20
-       ld.w    4[sp],r2
-       ld.w    0[sp],r31
-       addi    48,sp,sp
-#endif
-       jmp     [r31]
-       .size   __return_r2_r31,.-__return_r2_r31
-#endif /* L_save_2c */
-
-#ifdef L_save_20c
-       .text
-       .align  2
-       .globl  __save_r20_r31
-       .type   __save_r20_r31,@function
-       /* Allocate space and save registers 20 .. 29, 31 on the stack.  */
-       /* Also allocate space for the argument save area.  */
-       /* Called via:  jalr __save_r20_r31,r10.  */
-__save_r20_r31:
-#ifdef __EP__
-       mov     ep,r1
-       addi    -44,sp,sp
-       mov     sp,ep
-       sst.w   r29,0[ep]
-       sst.w   r28,4[ep]
-       sst.w   r27,8[ep]
-       sst.w   r26,12[ep]
-       sst.w   r25,16[ep]
-       sst.w   r24,20[ep]
-       sst.w   r23,24[ep]
-       sst.w   r22,28[ep]
-       sst.w   r21,32[ep]
-       sst.w   r20,36[ep]
-       sst.w   r31,40[ep]
-       mov     r1,ep
-#else
-       addi    -44,sp,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       st.w    r27,8[sp]
-       st.w    r26,12[sp]
-       st.w    r25,16[sp]
-       st.w    r24,20[sp]
-       st.w    r23,24[sp]
-       st.w    r22,28[sp]
-       st.w    r21,32[sp]
-       st.w    r20,36[sp]
-       st.w    r31,40[sp]
-#endif
-       jmp     [r10]
-       .size   __save_r20_r31,.-__save_r20_r31
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r20_r31.  */
-       .align  2
-       .globl  __return_r20_r31
-       .type   __return_r20_r31,@function
-__return_r20_r31:
-#ifdef __EP__
-       mov     ep,r1
-       mov     sp,ep
-       sld.w   0[ep],r29
-       sld.w   4[ep],r28
-       sld.w   8[ep],r27
-       sld.w   12[ep],r26
-       sld.w   16[ep],r25
-       sld.w   20[ep],r24
-       sld.w   24[ep],r23
-       sld.w   28[ep],r22
-       sld.w   32[ep],r21
-       sld.w   36[ep],r20
-       sld.w   40[ep],r31
-       addi    44,sp,sp
-       mov     r1,ep
-#else
-       ld.w    0[sp],r29
-       ld.w    4[sp],r28
-       ld.w    8[sp],r27
-       ld.w    12[sp],r26
-       ld.w    16[sp],r25
-       ld.w    20[sp],r24
-       ld.w    24[sp],r23
-       ld.w    28[sp],r22
-       ld.w    32[sp],r21
-       ld.w    36[sp],r20
-       ld.w    40[sp],r31
-       addi    44,sp,sp
-#endif
-       jmp     [r31]
-       .size   __return_r20_r31,.-__return_r20_r31
-#endif /* L_save_20c */
-
-#ifdef L_save_21c
-       .text
-       .align  2
-       .globl  __save_r21_r31
-       .type   __save_r21_r31,@function
-       /* Allocate space and save registers 21 .. 29, 31 on the stack.  */
-       /* Also allocate space for the argument save area.  */
-       /* Called via:  jalr __save_r21_r31,r10.  */
-__save_r21_r31:
-#ifdef __EP__  
-       mov     ep,r1
-       addi    -40,sp,sp
-       mov     sp,ep
-       sst.w   r29,0[ep]
-       sst.w   r28,4[ep]
-       sst.w   r27,8[ep]
-       sst.w   r26,12[ep]
-       sst.w   r25,16[ep]
-       sst.w   r24,20[ep]
-       sst.w   r23,24[ep]
-       sst.w   r22,28[ep]
-       sst.w   r21,32[ep]
-       sst.w   r31,36[ep]
-       mov     r1,ep
-       jmp     [r10]
-#else  
-       addi    -40,sp,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       st.w    r27,8[sp]
-       st.w    r26,12[sp]
-       st.w    r25,16[sp]
-       st.w    r24,20[sp]
-       st.w    r23,24[sp]
-       st.w    r22,28[sp]
-       st.w    r21,32[sp]
-       st.w    r31,36[sp]
-       jmp     [r10]
-#endif 
-       .size   __save_r21_r31,.-__save_r21_r31
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r21_r31.  */
-       .align  2
-       .globl  __return_r21_r31
-       .type   __return_r21_r31,@function
-__return_r21_r31:
-#ifdef __EP__
-       mov     ep,r1
-       mov     sp,ep
-       sld.w   0[ep],r29
-       sld.w   4[ep],r28
-       sld.w   8[ep],r27
-       sld.w   12[ep],r26
-       sld.w   16[ep],r25
-       sld.w   20[ep],r24
-       sld.w   24[ep],r23
-       sld.w   28[ep],r22
-       sld.w   32[ep],r21
-       sld.w   36[ep],r31
-       addi    40,sp,sp
-       mov     r1,ep
-#else
-       ld.w    0[sp],r29
-       ld.w    4[sp],r28
-       ld.w    8[sp],r27
-       ld.w    12[sp],r26
-       ld.w    16[sp],r25
-       ld.w    20[sp],r24
-       ld.w    24[sp],r23
-       ld.w    28[sp],r22
-       ld.w    32[sp],r21
-       ld.w    36[sp],r31
-       addi    40,sp,sp
-#endif
-       jmp     [r31]
-       .size   __return_r21_r31,.-__return_r21_r31
-#endif /* L_save_21c */
-
-#ifdef L_save_22c
-       .text
-       .align  2
-       .globl  __save_r22_r31
-       .type   __save_r22_r31,@function
-       /* Allocate space and save registers 22 .. 29, 31 on the stack.  */
-       /* Also allocate space for the argument save area.  */
-       /* Called via:  jalr __save_r22_r31,r10.  */
-__save_r22_r31:
-#ifdef __EP__
-       mov     ep,r1
-       addi    -36,sp,sp
-       mov     sp,ep
-       sst.w   r29,0[ep]
-       sst.w   r28,4[ep]
-       sst.w   r27,8[ep]
-       sst.w   r26,12[ep]
-       sst.w   r25,16[ep]
-       sst.w   r24,20[ep]
-       sst.w   r23,24[ep]
-       sst.w   r22,28[ep]
-       sst.w   r31,32[ep]
-       mov     r1,ep
-#else
-       addi    -36,sp,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       st.w    r27,8[sp]
-       st.w    r26,12[sp]
-       st.w    r25,16[sp]
-       st.w    r24,20[sp]
-       st.w    r23,24[sp]
-       st.w    r22,28[sp]
-       st.w    r31,32[sp]
-#endif
-       jmp     [r10]
-       .size   __save_r22_r31,.-__save_r22_r31
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r22_r31.  */
-       .align  2
-       .globl  __return_r22_r31
-       .type   __return_r22_r31,@function
-__return_r22_r31:
-#ifdef __EP__
-       mov     ep,r1
-       mov     sp,ep
-       sld.w   0[ep],r29
-       sld.w   4[ep],r28
-       sld.w   8[ep],r27
-       sld.w   12[ep],r26
-       sld.w   16[ep],r25
-       sld.w   20[ep],r24
-       sld.w   24[ep],r23
-       sld.w   28[ep],r22
-       sld.w   32[ep],r31
-       addi    36,sp,sp
-       mov     r1,ep
-#else
-       ld.w    0[sp],r29
-       ld.w    4[sp],r28
-       ld.w    8[sp],r27
-       ld.w    12[sp],r26
-       ld.w    16[sp],r25
-       ld.w    20[sp],r24
-       ld.w    24[sp],r23
-       ld.w    28[sp],r22
-       ld.w    32[sp],r31
-       addi    36,sp,sp
-#endif
-       jmp     [r31]
-       .size   __return_r22_r31,.-__return_r22_r31
-#endif /* L_save_22c */
-
-#ifdef L_save_23c
-       .text
-       .align  2
-       .globl  __save_r23_r31
-       .type   __save_r23_r31,@function
-       /* Allocate space and save registers 23 .. 29, 31 on the stack.  */
-       /* Also allocate space for the argument save area.  */
-       /* Called via:  jalr __save_r23_r31,r10.  */
-__save_r23_r31:
-#ifdef __EP__
-       mov     ep,r1
-       addi    -32,sp,sp
-       mov     sp,ep
-       sst.w   r29,0[ep]
-       sst.w   r28,4[ep]
-       sst.w   r27,8[ep]
-       sst.w   r26,12[ep]
-       sst.w   r25,16[ep]
-       sst.w   r24,20[ep]
-       sst.w   r23,24[ep]
-       sst.w   r31,28[ep]
-       mov     r1,ep
-#else
-       addi    -32,sp,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       st.w    r27,8[sp]
-       st.w    r26,12[sp]
-       st.w    r25,16[sp]
-       st.w    r24,20[sp]
-       st.w    r23,24[sp]
-       st.w    r31,28[sp]
-#endif
-       jmp     [r10]
-       .size   __save_r23_r31,.-__save_r23_r31
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r23_r31.  */
-       .align  2
-       .globl  __return_r23_r31
-       .type   __return_r23_r31,@function
-__return_r23_r31:
-#ifdef __EP__
-       mov     ep,r1
-       mov     sp,ep
-       sld.w   0[ep],r29
-       sld.w   4[ep],r28
-       sld.w   8[ep],r27
-       sld.w   12[ep],r26
-       sld.w   16[ep],r25
-       sld.w   20[ep],r24
-       sld.w   24[ep],r23
-       sld.w   28[ep],r31
-       addi    32,sp,sp
-       mov     r1,ep
-#else
-       ld.w    0[sp],r29
-       ld.w    4[sp],r28
-       ld.w    8[sp],r27
-       ld.w    12[sp],r26
-       ld.w    16[sp],r25
-       ld.w    20[sp],r24
-       ld.w    24[sp],r23
-       ld.w    28[sp],r31
-       addi    32,sp,sp
-#endif
-       jmp     [r31]
-       .size   __return_r23_r31,.-__return_r23_r31
-#endif /* L_save_23c */
-
-#ifdef L_save_24c
-       .text
-       .align  2
-       .globl  __save_r24_r31
-       .type   __save_r24_r31,@function
-       /* Allocate space and save registers 24 .. 29, 31 on the stack.  */
-       /* Also allocate space for the argument save area.  */
-       /* Called via:  jalr __save_r24_r31,r10.  */
-__save_r24_r31:
-#ifdef __EP__
-       mov     ep,r1
-       addi    -28,sp,sp
-       mov     sp,ep
-       sst.w   r29,0[ep]
-       sst.w   r28,4[ep]
-       sst.w   r27,8[ep]
-       sst.w   r26,12[ep]
-       sst.w   r25,16[ep]
-       sst.w   r24,20[ep]
-       sst.w   r31,24[ep]
-       mov     r1,ep
-#else
-       addi    -28,sp,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       st.w    r27,8[sp]
-       st.w    r26,12[sp]
-       st.w    r25,16[sp]
-       st.w    r24,20[sp]
-       st.w    r31,24[sp]
-#endif
-       jmp     [r10]
-       .size   __save_r24_r31,.-__save_r24_r31
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r24_r31.  */
-       .align  2
-       .globl  __return_r24_r31
-       .type   __return_r24_r31,@function
-__return_r24_r31:
-#ifdef __EP__
-       mov     ep,r1
-       mov     sp,ep
-       sld.w   0[ep],r29
-       sld.w   4[ep],r28
-       sld.w   8[ep],r27
-       sld.w   12[ep],r26
-       sld.w   16[ep],r25
-       sld.w   20[ep],r24
-       sld.w   24[ep],r31
-       addi    28,sp,sp
-       mov     r1,ep
-#else
-       ld.w    0[sp],r29
-       ld.w    4[sp],r28
-       ld.w    8[sp],r27
-       ld.w    12[sp],r26
-       ld.w    16[sp],r25
-       ld.w    20[sp],r24
-       ld.w    24[sp],r31
-       addi    28,sp,sp
-#endif
-       jmp     [r31]
-       .size   __return_r24_r31,.-__return_r24_r31
-#endif /* L_save_24c */
-
-#ifdef L_save_25c
-       .text
-       .align  2
-       .globl  __save_r25_r31
-       .type   __save_r25_r31,@function
-       /* Allocate space and save registers 25 .. 29, 31 on the stack.  */
-       /* Also allocate space for the argument save area.  */
-       /* Called via:  jalr __save_r25_r31,r10.  */
-__save_r25_r31:
-#ifdef __EP__
-       mov     ep,r1
-       addi    -24,sp,sp
-       mov     sp,ep
-       sst.w   r29,0[ep]
-       sst.w   r28,4[ep]
-       sst.w   r27,8[ep]
-       sst.w   r26,12[ep]
-       sst.w   r25,16[ep]
-       sst.w   r31,20[ep]
-       mov     r1,ep
-#else
-       addi    -24,sp,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       st.w    r27,8[sp]
-       st.w    r26,12[sp]
-       st.w    r25,16[sp]
-       st.w    r31,20[sp]
-#endif
-       jmp     [r10]
-       .size   __save_r25_r31,.-__save_r25_r31
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r25_r31.  */
-       .align  2
-       .globl  __return_r25_r31
-       .type   __return_r25_r31,@function
-__return_r25_r31:
-#ifdef __EP__
-       mov     ep,r1
-       mov     sp,ep
-       sld.w   0[ep],r29
-       sld.w   4[ep],r28
-       sld.w   8[ep],r27
-       sld.w   12[ep],r26
-       sld.w   16[ep],r25
-       sld.w   20[ep],r31
-       addi    24,sp,sp
-       mov     r1,ep
-#else
-       ld.w    0[sp],r29
-       ld.w    4[sp],r28
-       ld.w    8[sp],r27
-       ld.w    12[sp],r26
-       ld.w    16[sp],r25
-       ld.w    20[sp],r31
-       addi    24,sp,sp
-#endif
-       jmp     [r31]
-       .size   __return_r25_r31,.-__return_r25_r31
-#endif /* L_save_25c */
-
-#ifdef L_save_26c
-       .text
-       .align  2
-       .globl  __save_r26_r31
-       .type   __save_r26_r31,@function
-       /* Allocate space and save registers 26 .. 29, 31 on the stack.  */
-       /* Also allocate space for the argument save area.  */
-       /* Called via:  jalr __save_r26_r31,r10.  */
-__save_r26_r31:
-#ifdef __EP__
-       mov     ep,r1
-       addi    -20,sp,sp
-       mov     sp,ep
-       sst.w   r29,0[ep]
-       sst.w   r28,4[ep]
-       sst.w   r27,8[ep]
-       sst.w   r26,12[ep]
-       sst.w   r31,16[ep]
-       mov     r1,ep
-#else
-       addi    -20,sp,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       st.w    r27,8[sp]
-       st.w    r26,12[sp]
-       st.w    r31,16[sp]
-#endif
-       jmp     [r10]
-       .size   __save_r26_r31,.-__save_r26_r31
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r26_r31.  */
-       .align  2
-       .globl  __return_r26_r31
-       .type   __return_r26_r31,@function
-__return_r26_r31:
-#ifdef __EP__
-       mov     ep,r1
-       mov     sp,ep
-       sld.w   0[ep],r29
-       sld.w   4[ep],r28
-       sld.w   8[ep],r27
-       sld.w   12[ep],r26
-       sld.w   16[ep],r31
-       addi    20,sp,sp
-       mov     r1,ep
-#else
-       ld.w    0[sp],r29
-       ld.w    4[sp],r28
-       ld.w    8[sp],r27
-       ld.w    12[sp],r26
-       ld.w    16[sp],r31
-       addi    20,sp,sp
-#endif
-       jmp     [r31]
-       .size   __return_r26_r31,.-__return_r26_r31
-#endif /* L_save_26c */
-
-#ifdef L_save_27c
-       .text
-       .align  2
-       .globl  __save_r27_r31
-       .type   __save_r27_r31,@function
-       /* Allocate space and save registers 27 .. 29, 31 on the stack.  */
-       /* Also allocate space for the argument save area.  */
-       /* Called via:  jalr __save_r27_r31,r10.  */
-__save_r27_r31:
-#ifdef __EP__
-       mov     ep,r1
-       addi    -16,sp,sp
-       mov     sp,ep
-       sst.w   r29,0[ep]
-       sst.w   r28,4[ep]
-       sst.w   r27,8[ep]
-       sst.w   r31,12[ep]
-       mov     r1,ep
-#else
-       addi    -16,sp,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       st.w    r27,8[sp]
-       st.w    r31,12[sp]
-#endif
-       jmp     [r10]
-       .size   __save_r27_r31,.-__save_r27_r31
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r27_r31.  */
-       .align  2
-       .globl  __return_r27_r31
-       .type   __return_r27_r31,@function
-__return_r27_r31:
-#ifdef __EP__
-       mov     ep,r1
-       mov     sp,ep
-       sld.w   0[ep],r29
-       sld.w   4[ep],r28
-       sld.w   8[ep],r27
-       sld.w   12[ep],r31
-       addi    16,sp,sp
-       mov     r1,ep
-#else
-       ld.w    0[sp],r29
-       ld.w    4[sp],r28
-       ld.w    8[sp],r27
-       ld.w    12[sp],r31
-       addi    16,sp,sp
-#endif
-       jmp     [r31]
-       .size   __return_r27_r31,.-__return_r27_r31
-#endif /* L_save_27c */
-
-#ifdef L_save_28c
-       .text
-       .align  2
-       .globl  __save_r28_r31
-       .type   __save_r28_r31,@function
-       /* Allocate space and save registers 28 .. 29, 31 on the stack.  */
-       /* Also allocate space for the argument save area.  */
-       /* Called via:  jalr __save_r28_r31,r10.  */
-__save_r28_r31:
-       addi    -12,sp,sp
-       st.w    r29,0[sp]
-       st.w    r28,4[sp]
-       st.w    r31,8[sp]
-       jmp     [r10]
-       .size   __save_r28_r31,.-__save_r28_r31
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r28_r31.  */
-       .align  2
-       .globl  __return_r28_r31
-       .type   __return_r28_r31,@function
-__return_r28_r31:
-       ld.w    0[sp],r29
-       ld.w    4[sp],r28
-       ld.w    8[sp],r31
-       addi    12,sp,sp
-       jmp     [r31]
-       .size   __return_r28_r31,.-__return_r28_r31
-#endif /* L_save_28c */
-
-#ifdef L_save_29c
-       .text
-       .align  2
-       .globl  __save_r29_r31
-       .type   __save_r29_r31,@function
-       /* Allocate space and save registers 29 & 31 on the stack.  */
-       /* Also allocate space for the argument save area.  */
-       /* Called via:  jalr __save_r29_r31,r10.  */
-__save_r29_r31:
-       addi    -8,sp,sp
-       st.w    r29,0[sp]
-       st.w    r31,4[sp]
-       jmp     [r10]
-       .size   __save_r29_r31,.-__save_r29_r31
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r29_r31.  */
-       .align  2
-       .globl  __return_r29_r31
-       .type   __return_r29_r31,@function
-__return_r29_r31:
-       ld.w    0[sp],r29
-       ld.w    4[sp],r31
-       addi    8,sp,sp
-       jmp     [r31]
-       .size   __return_r29_r31,.-__return_r29_r31
-#endif /* L_save_29c */
-
-#ifdef L_save_31c
-       .text
-       .align  2
-       .globl  __save_r31
-       .type   __save_r31,@function
-       /* Allocate space and save register 31 on the stack.  */
-       /* Also allocate space for the argument save area.  */
-       /* Called via:  jalr __save_r31,r10.  */
-__save_r31:
-       addi    -4,sp,sp
-       st.w    r31,0[sp]
-       jmp     [r10]
-       .size   __save_r31,.-__save_r31
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  jr __return_r31.  */
-       .align  2
-       .globl  __return_r31
-       .type   __return_r31,@function
-__return_r31:
-       ld.w    0[sp],r31
-       addi    4,sp,sp
-       jmp     [r31]
-        .size   __return_r31,.-__return_r31
-#endif /* L_save_31c */
-
-#ifdef L_save_interrupt
-       .text
-       .align  2
-       .globl  __save_interrupt
-       .type   __save_interrupt,@function
-       /* Save registers r1, r4 on stack and load up with expected values.  */
-       /* Note, 20 bytes of stack have already been allocated.  */
-       /* Called via:  jalr __save_interrupt,r10.  */
-__save_interrupt:
-       /* add -20,sp ; st.w r11,16[sp] ; st.w r10,12[sp] ; */
-       st.w    ep,0[sp]
-       st.w    gp,4[sp]
-       st.w    r1,8[sp]
-       movhi   hi(__ep),r0,ep
-       movea   lo(__ep),ep,ep
-       movhi   hi(__gp),r0,gp
-       movea   lo(__gp),gp,gp
-       jmp     [r10]
-       .size   __save_interrupt,.-__save_interrupt
-
-       /* Restore saved registers, deallocate stack and return from the interrupt.  */
-       /* Called via:  jr __return_interrupt.  */
-       .align  2
-       .globl  __return_interrupt
-       .type   __return_interrupt,@function
-__return_interrupt:
-       ld.w    0[sp],ep
-       ld.w    4[sp],gp
-       ld.w    8[sp],r1
-       ld.w    12[sp],r10
-       ld.w    16[sp],r11
-       addi    20,sp,sp
-       reti
-       .size   __return_interrupt,.-__return_interrupt
-#endif /* L_save_interrupt */
-
-#ifdef L_save_all_interrupt
-       .text
-       .align  2
-       .globl  __save_all_interrupt
-       .type   __save_all_interrupt,@function
-       /* Save all registers except for those saved in __save_interrupt.  */
-       /* Allocate enough stack for all of the registers & 16 bytes of space.  */
-       /* Called via:  jalr __save_all_interrupt,r10.  */
-__save_all_interrupt:
-       addi    -104,sp,sp
-#ifdef __EP__
-       mov     ep,r1
-       mov     sp,ep
-       sst.w   r31,100[ep]
-       sst.w   r2,96[ep]
-       sst.w   gp,92[ep]
-       sst.w   r6,88[ep]
-       sst.w   r7,84[ep]
-       sst.w   r8,80[ep]
-       sst.w   r9,76[ep]
-       sst.w   r11,72[ep]
-       sst.w   r12,68[ep]
-       sst.w   r13,64[ep]
-       sst.w   r14,60[ep]
-       sst.w   r15,56[ep]
-       sst.w   r16,52[ep]
-       sst.w   r17,48[ep]
-       sst.w   r18,44[ep]
-       sst.w   r19,40[ep]
-       sst.w   r20,36[ep]
-       sst.w   r21,32[ep]
-       sst.w   r22,28[ep]
-       sst.w   r23,24[ep]
-       sst.w   r24,20[ep]
-       sst.w   r25,16[ep]
-       sst.w   r26,12[ep]
-       sst.w   r27,8[ep]
-       sst.w   r28,4[ep]
-       sst.w   r29,0[ep]
-       mov     r1,ep
-#else
-       st.w    r31,100[sp]
-       st.w    r2,96[sp]
-       st.w    gp,92[sp]
-       st.w    r6,88[sp]
-       st.w    r7,84[sp]
-       st.w    r8,80[sp]
-       st.w    r9,76[sp]
-       st.w    r11,72[sp]
-       st.w    r12,68[sp]
-       st.w    r13,64[sp]
-       st.w    r14,60[sp]
-       st.w    r15,56[sp]
-       st.w    r16,52[sp]
-       st.w    r17,48[sp]
-       st.w    r18,44[sp]
-       st.w    r19,40[sp]
-       st.w    r20,36[sp]
-       st.w    r21,32[sp]
-       st.w    r22,28[sp]
-       st.w    r23,24[sp]
-       st.w    r24,20[sp]
-       st.w    r25,16[sp]
-       st.w    r26,12[sp]
-       st.w    r27,8[sp]
-       st.w    r28,4[sp]
-       st.w    r29,0[sp]
-#endif
-       jmp     [r10]
-       .size   __save_all_interrupt,.-__save_all_interrupt
-
-       .globl  __restore_all_interrupt
-       .type   __restore_all_interrupt,@function
-       /* Restore all registers saved in __save_all_interrupt and
-          deallocate the stack space.  */
-       /* Called via:  jalr __restore_all_interrupt,r10.  */
-__restore_all_interrupt:
-#ifdef __EP__
-       mov     ep,r1
-       mov     sp,ep
-       sld.w   100[ep],r31
-       sld.w   96[ep],r2
-       sld.w   92[ep],gp
-       sld.w   88[ep],r6
-       sld.w   84[ep],r7
-       sld.w   80[ep],r8
-       sld.w   76[ep],r9
-       sld.w   72[ep],r11
-       sld.w   68[ep],r12
-       sld.w   64[ep],r13
-       sld.w   60[ep],r14
-       sld.w   56[ep],r15
-       sld.w   52[ep],r16
-       sld.w   48[ep],r17
-       sld.w   44[ep],r18
-       sld.w   40[ep],r19
-       sld.w   36[ep],r20
-       sld.w   32[ep],r21
-       sld.w   28[ep],r22
-       sld.w   24[ep],r23
-       sld.w   20[ep],r24
-       sld.w   16[ep],r25
-       sld.w   12[ep],r26
-       sld.w   8[ep],r27
-       sld.w   4[ep],r28
-       sld.w   0[ep],r29
-       mov     r1,ep
-#else
-       ld.w    100[sp],r31
-       ld.w    96[sp],r2
-       ld.w    92[sp],gp
-       ld.w    88[sp],r6
-       ld.w    84[sp],r7
-       ld.w    80[sp],r8
-       ld.w    76[sp],r9
-       ld.w    72[sp],r11
-       ld.w    68[sp],r12
-       ld.w    64[sp],r13
-       ld.w    60[sp],r14
-       ld.w    56[sp],r15
-       ld.w    52[sp],r16
-       ld.w    48[sp],r17
-       ld.w    44[sp],r18
-       ld.w    40[sp],r19
-       ld.w    36[sp],r20
-       ld.w    32[sp],r21
-       ld.w    28[sp],r22
-       ld.w    24[sp],r23
-       ld.w    20[sp],r24
-       ld.w    16[sp],r25
-       ld.w    12[sp],r26
-       ld.w    8[sp],r27
-       ld.w    4[sp],r28
-       ld.w    0[sp],r29
-#endif
-       addi    104,sp,sp       
-       jmp     [r10]
-       .size   __restore_all_interrupt,.-__restore_all_interrupt
-#endif /* L_save_all_interrupt */
-       
-#if defined(__v850e__) || defined(__v850e1__) || defined(__v850e2__) || defined(__v850e2v3__)
-#ifdef L_callt_save_r2_r29
-       /* Put these functions into the call table area.  */
-       .call_table_text
-       
-       /* Allocate space and save registers 2, 20 .. 29 on the stack.  */
-       /* Called via:  callt ctoff(__callt_save_r2_r29).  */
-       .align  2
-.L_save_r2_r29:
-       add     -4, sp
-       st.w    r2, 0[sp]
-       prepare {r20 - r29}, 0
-       ctret
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  callt ctoff(__callt_return_r2_r29).  */
-       .align  2
-.L_return_r2_r29:
-       dispose 0, {r20-r29}
-       ld.w    0[sp], r2
-       add     4, sp
-       jmp     [r31]
-
-       /* Place the offsets of the start of these routines into the call table.  */
-       .call_table_data
-
-       .global __callt_save_r2_r29
-       .type   __callt_save_r2_r29,@function
-__callt_save_r2_r29:   .short ctoff(.L_save_r2_r29)
-       
-       .global __callt_return_r2_r29
-       .type   __callt_return_r2_r29,@function
-__callt_return_r2_r29: .short ctoff(.L_return_r2_r29)
-       
-#endif /* L_callt_save_r2_r29.  */
-
-#ifdef L_callt_save_r2_r31
-       /* Put these functions into the call table area.  */
-       .call_table_text
-       
-       /* Allocate space and save registers 2 and 20 .. 29, 31 on the stack.  */
-       /* Also allocate space for the argument save area.  */
-       /* Called via:  callt ctoff(__callt_save_r2_r31).  */
-       .align  2
-.L_save_r2_r31:
-       add     -4, sp
-       st.w    r2, 0[sp]
-       prepare {r20 - r29, r31}, 0
-       ctret
-
-       /* Restore saved registers, deallocate stack and return to the user.  */
-       /* Called via:  callt ctoff(__callt_return_r2_r31).  */
-       .align  2
-.L_return_r2_r31:
-       dispose 0, {r20 - r29, r31}
-       ld.w    0[sp], r2
-       addi    4, sp, sp
-       jmp     [r31]
-
-       /* Place the offsets of the start of these routines into the call table.  */
-       .call_table_data
-
-       .global __callt_save_r2_r31
-       .type   __callt_save_r2_r31,@function
-__callt_save_r2_r31:   .short ctoff(.L_save_r2_r31)
-       
-       .global __callt_return_r2_r31
-       .type   __callt_return_r2_r31,@function
-__callt_return_r2_r31: .short ctoff(.L_return_r2_r31)
-       
-#endif /* L_callt_save_r2_r31 */
-
-#ifdef L_callt_save_interrupt
-       /* Put these functions into the call table area.  */
-       .call_table_text
-       
-       /* Save registers r1, ep, gp, r10 on stack and load up with expected values.  */
-       /* Called via:  callt ctoff(__callt_save_interrupt).  */
-       .align  2
-.L_save_interrupt:
-        /* SP has already been moved before callt ctoff(_save_interrupt).  */
-        /* R1,R10,R11,ctpc,ctpsw has alread been saved bofore callt ctoff(_save_interrupt).  */
-        /* addi -28, sp, sp  */
-        /* st.w r1,    24[sp] */
-        /* st.w r10,   12[sp] */
-        /* st.w r11,   16[sp] */
-        /* stsr ctpc,  r10    */
-        /* st.w r10,   20[sp] */
-        /* stsr ctpsw, r10    */
-        /* st.w r10,   24[sp] */
-        st.w    ep,  0[sp]
-        st.w    gp,  4[sp]
-        st.w    r1,  8[sp]
-       mov     hilo(__ep),ep
-       mov     hilo(__gp),gp
-       ctret
-
-        .call_table_text
-       /* Restore saved registers, deallocate stack and return from the interrupt.  */
-        /* Called via:  callt ctoff(__callt_restore_interrupt).  */
-       .align  2
-       .globl  __return_interrupt
-       .type   __return_interrupt,@function
-.L_return_interrupt:
-        ld.w    24[sp], r1
-        ldsr    r1,     ctpsw
-        ld.w    20[sp], r1
-        ldsr    r1,     ctpc
-        ld.w    16[sp], r11
-        ld.w    12[sp], r10
-        ld.w     8[sp], r1
-        ld.w     4[sp], gp
-        ld.w     0[sp], ep
-        addi    28, sp, sp
-        reti
-
-       /* Place the offsets of the start of these routines into the call table.  */
-       .call_table_data
-
-        .global __callt_save_interrupt
-        .type   __callt_save_interrupt,@function
-__callt_save_interrupt:         .short ctoff(.L_save_interrupt)
-
-        .global __callt_return_interrupt
-        .type   __callt_return_interrupt,@function
-__callt_return_interrupt:       .short ctoff(.L_return_interrupt)
-       
-#endif /* L_callt_save_interrupt */
-
-#ifdef L_callt_save_all_interrupt
-       /* Put these functions into the call table area.  */
-       .call_table_text
-       
-       /* Save all registers except for those saved in __save_interrupt.  */
-       /* Allocate enough stack for all of the registers & 16 bytes of space.  */
-       /* Called via:  callt ctoff(__callt_save_all_interrupt).  */
-       .align  2
-.L_save_all_interrupt:
-       addi    -60, sp, sp
-#ifdef __EP__
-       mov     ep,  r1
-       mov     sp,  ep
-       sst.w   r2,  56[ep]
-       sst.w   r5,  52[ep]
-       sst.w   r6,  48[ep]
-       sst.w   r7,  44[ep]
-       sst.w   r8,  40[ep]
-       sst.w   r9,  36[ep]
-       sst.w   r11, 32[ep]
-       sst.w   r12, 28[ep]
-       sst.w   r13, 24[ep]
-       sst.w   r14, 20[ep]
-       sst.w   r15, 16[ep]
-       sst.w   r16, 12[ep]
-       sst.w   r17, 8[ep]
-       sst.w   r18, 4[ep]
-       sst.w   r19, 0[ep]
-       mov     r1,  ep
-#else
-       st.w    r2,  56[sp]
-       st.w    r5,  52[sp]
-       st.w    r6,  48[sp]
-       st.w    r7,  44[sp]
-       st.w    r8,  40[sp]
-       st.w    r9,  36[sp]
-       st.w    r11, 32[sp]
-       st.w    r12, 28[sp]
-       st.w    r13, 24[sp]
-       st.w    r14, 20[sp]
-       st.w    r15, 16[sp]
-       st.w    r16, 12[sp]
-       st.w    r17, 8[sp]
-       st.w    r18, 4[sp]
-       st.w    r19, 0[sp]
-#endif
-       prepare {r20 - r29, r31}, 0
-       ctret   
-
-       /* Restore all registers saved in __save_all_interrupt
-          deallocate the stack space.  */
-       /* Called via:  callt ctoff(__callt_restore_all_interrupt).  */
-       .align 2
-.L_restore_all_interrupt:
-       dispose 0, {r20 - r29, r31}
-#ifdef __EP__
-       mov     ep, r1
-       mov     sp, ep
-       sld.w   0 [ep], r19
-       sld.w   4 [ep], r18
-       sld.w   8 [ep], r17
-       sld.w   12[ep], r16
-       sld.w   16[ep], r15
-       sld.w   20[ep], r14
-       sld.w   24[ep], r13
-       sld.w   28[ep], r12
-       sld.w   32[ep], r11
-       sld.w   36[ep], r9
-       sld.w   40[ep], r8
-       sld.w   44[ep], r7
-       sld.w   48[ep], r6
-       sld.w   52[ep], r5
-       sld.w   56[ep], r2
-       mov     r1, ep
-#else
-       ld.w    0 [sp], r19
-       ld.w    4 [sp], r18
-       ld.w    8 [sp], r17
-       ld.w    12[sp], r16
-       ld.w    16[sp], r15
-       ld.w    20[sp], r14
-       ld.w    24[sp], r13
-       ld.w    28[sp], r12
-       ld.w    32[sp], r11
-       ld.w    36[sp], r9
-       ld.w    40[sp], r8
-       ld.w    44[sp], r7
-       ld.w    48[sp], r6
-       ld.w    52[sp], r5
-       ld.w    56[sp], r2
-#endif
-       addi    60, sp, sp
-       ctret
-
-       /* Place the offsets of the start of these routines into the call table.  */
-       .call_table_data
-
-       .global __callt_save_all_interrupt
-       .type   __callt_save_all_interrupt,@function
-__callt_save_all_interrupt:    .short ctoff(.L_save_all_interrupt)
-       
-       .global __callt_restore_all_interrupt
-       .type   __callt_restore_all_interrupt,@function
-__callt_restore_all_interrupt: .short ctoff(.L_restore_all_interrupt)
-       
-#endif /* L_callt_save_all_interrupt */
-
-
-#define MAKE_CALLT_FUNCS( START )                                              \
-       .call_table_text                                                        ;\
-       .align  2                                                               ;\
-       /* Allocate space and save registers START .. r29 on the stack.  */     ;\
-       /* Called via:  callt ctoff(__callt_save_START_r29).  */                ;\
-.L_save_##START##_r29:                                                         ;\
-       prepare { START - r29 }, 0                                              ;\
-       ctret                                                                   ;\
-                                                                               ;\
-       /* Restore saved registers, deallocate stack and return.  */            ;\
-       /* Called via:  callt ctoff(__return_START_r29).  */                    ;\
-       .align  2                                                               ;\
-.L_return_##START##_r29:                                                       ;\
-       dispose 0, { START - r29 }, r31                                         ;\
-                                                                               ;\
-       /* Place the offsets of the start of these funcs into the call table.  */;\
-       .call_table_data                                                        ;\
-                                                                               ;\
-       .global __callt_save_##START##_r29                                      ;\
-       .type   __callt_save_##START##_r29,@function                            ;\
-__callt_save_##START##_r29:    .short ctoff(.L_save_##START##_r29 )            ;\
-                                                                               ;\
-       .global __callt_return_##START##_r29                                    ;\
-       .type   __callt_return_##START##_r29,@function                          ;\
-__callt_return_##START##_r29:  .short ctoff(.L_return_##START##_r29 )  
-
-
-#define MAKE_CALLT_CFUNCS( START )                                             \
-       .call_table_text                                                        ;\
-       .align  2                                                               ;\
-       /* Allocate space and save registers START .. r31 on the stack.  */     ;\
-       /* Called via:  callt ctoff(__callt_save_START_r31c).  */               ;\
-.L_save_##START##_r31c:                                                                ;\
-       prepare { START - r29, r31}, 0                                          ;\
-       ctret                                                                   ;\
-                                                                               ;\
-       /* Restore saved registers, deallocate stack and return.  */            ;\
-       /* Called via:  callt ctoff(__return_START_r31c).  */                   ;\
-       .align  2                                                               ;\
-.L_return_##START##_r31c:                                                      ;\
-       dispose 0, { START - r29, r31}, r31                                     ;\
-                                                                               ;\
-       /* Place the offsets of the start of these funcs into the call table.  */;\
-       .call_table_data                                                        ;\
-                                                                               ;\
-       .global __callt_save_##START##_r31c                                     ;\
-       .type   __callt_save_##START##_r31c,@function                           ;\
-__callt_save_##START##_r31c:    .short ctoff(.L_save_##START##_r31c )          ;\
-                                                                               ;\
-       .global __callt_return_##START##_r31c                                   ;\
-       .type   __callt_return_##START##_r31c,@function                         ;\
-__callt_return_##START##_r31c:  .short ctoff(.L_return_##START##_r31c )        
-
-       
-#ifdef L_callt_save_20
-       MAKE_CALLT_FUNCS (r20)
-#endif
-#ifdef L_callt_save_21
-       MAKE_CALLT_FUNCS (r21)
-#endif
-#ifdef L_callt_save_22
-       MAKE_CALLT_FUNCS (r22)
-#endif
-#ifdef L_callt_save_23
-       MAKE_CALLT_FUNCS (r23)
-#endif
-#ifdef L_callt_save_24
-       MAKE_CALLT_FUNCS (r24)
-#endif
-#ifdef L_callt_save_25
-       MAKE_CALLT_FUNCS (r25)
-#endif
-#ifdef L_callt_save_26
-       MAKE_CALLT_FUNCS (r26)
-#endif
-#ifdef L_callt_save_27
-       MAKE_CALLT_FUNCS (r27)
-#endif
-#ifdef L_callt_save_28
-       MAKE_CALLT_FUNCS (r28)
-#endif
-#ifdef L_callt_save_29
-       MAKE_CALLT_FUNCS (r29)
-#endif
-
-#ifdef L_callt_save_20c
-       MAKE_CALLT_CFUNCS (r20)
-#endif
-#ifdef L_callt_save_21c
-       MAKE_CALLT_CFUNCS (r21)
-#endif
-#ifdef L_callt_save_22c
-       MAKE_CALLT_CFUNCS (r22)
-#endif
-#ifdef L_callt_save_23c
-       MAKE_CALLT_CFUNCS (r23)
-#endif
-#ifdef L_callt_save_24c
-       MAKE_CALLT_CFUNCS (r24)
-#endif
-#ifdef L_callt_save_25c
-       MAKE_CALLT_CFUNCS (r25)
-#endif
-#ifdef L_callt_save_26c
-       MAKE_CALLT_CFUNCS (r26)
-#endif
-#ifdef L_callt_save_27c
-       MAKE_CALLT_CFUNCS (r27)
-#endif
-#ifdef L_callt_save_28c
-       MAKE_CALLT_CFUNCS (r28)
-#endif
-#ifdef L_callt_save_29c
-       MAKE_CALLT_CFUNCS (r29)
-#endif
-
-       
-#ifdef L_callt_save_31c
-       .call_table_text
-       .align  2
-       /* Allocate space and save register r31 on the stack.  */
-       /* Called via:  callt ctoff(__callt_save_r31c).  */
-.L_callt_save_r31c:
-       prepare {r31}, 0
-       ctret
-
-       /* Restore saved registers, deallocate stack and return.  */
-       /* Called via:  callt ctoff(__return_r31c).  */
-       .align  2
-.L_callt_return_r31c:
-       dispose 0, {r31}, r31
-       
-       /* Place the offsets of the start of these funcs into the call table.  */
-       .call_table_data
-
-       .global __callt_save_r31c
-       .type   __callt_save_r31c,@function
-__callt_save_r31c:     .short ctoff(.L_callt_save_r31c)
-
-       .global __callt_return_r31c
-       .type   __callt_return_r31c,@function
-__callt_return_r31c:   .short ctoff(.L_callt_return_r31c)              
-#endif
-
-#endif /* __v850e__ */
-
-/*  libgcc2 routines for NEC V850.  */
-/*  Double Integer Arithmetical Operation.  */
-
-#ifdef L_negdi2
-       .text
-       .global ___negdi2
-       .type   ___negdi2, @function
-___negdi2:
-       not     r6, r10
-       add     1,  r10
-       setf    l,  r6
-       not     r7, r11
-       add     r6, r11
-       jmp     [lp]
-
-       .size ___negdi2,.-___negdi2
-#endif
-
-#ifdef L_cmpdi2
-       .text
-       .global ___cmpdi2
-       .type   ___cmpdi2,@function
-___cmpdi2:
-       # Signed comparison bitween each high word.
-       cmp     r9, r7
-       be      .L_cmpdi_cmp_low
-       setf    ge, r10
-       setf    gt, r6
-       add     r6, r10
-       jmp     [lp]
-.L_cmpdi_cmp_low:
-       # Unsigned comparigon bitween each low word.
-       cmp     r8, r6
-       setf    nl, r10
-       setf    h,  r6
-       add     r6, r10
-       jmp     [lp]    
-       .size ___cmpdi2, . - ___cmpdi2  
-#endif
-
-#ifdef L_ucmpdi2
-       .text
-       .global ___ucmpdi2
-       .type   ___ucmpdi2,@function
-___ucmpdi2:
-       cmp     r9, r7  # Check if each high word are same.
-       bne     .L_ucmpdi_check_psw
-       cmp     r8, r6  # Compare the word.
-.L_ucmpdi_check_psw:
-       setf    nl, r10 # 
-       setf    h,  r6  # 
-       add     r6, r10 # Add the result of comparison NL and comparison H.
-       jmp     [lp]    
-       .size ___ucmpdi2, . - ___ucmpdi2
-#endif
-
-#ifdef L_muldi3
-       .text
-       .global ___muldi3
-       .type   ___muldi3,@function
-___muldi3:
-#ifdef __v850__
-        jarl  __save_r26_r31, r10
-        addi  16,  sp, sp
-        mov   r6,  r28
-        shr   15,  r28
-        movea lo(32767), r0, r14
-        and   r14, r28
-        mov   r8,  r10
-        shr   15,  r10
-        and   r14, r10
-        mov   r6,  r19
-        shr   30,  r19
-        mov   r7,  r12
-        shl   2,   r12
-        or    r12, r19
-        and   r14, r19
-        mov   r8,  r13
-        shr   30,  r13
-        mov   r9,  r12
-        shl   2,   r12
-        or    r12, r13
-        and   r14, r13
-        mov   r7,  r11
-        shr   13,  r11
-        and   r14, r11
-        mov   r9,  r31
-        shr   13,  r31
-        and   r14, r31
-        mov   r7,  r29
-        shr   28,  r29
-        and   r14, r29
-        mov   r9,  r12
-        shr   28,  r12
-        and   r14, r12
-        and   r14, r6
-        and   r14, r8
-        mov   r6,  r14
-        mulh  r8,  r14
-        mov   r6,  r16
-        mulh  r10, r16
-        mov   r6,  r18
-        mulh  r13, r18
-        mov   r6,  r15
-        mulh  r31, r15
-        mulh  r12, r6
-        mov   r28,  r17
-        mulh  r10, r17
-        add   -16, sp
-        mov   r28,  r12
-        mulh  r8,  r12
-        add   r17, r18
-        mov   r28,  r17
-        mulh  r31, r17
-        add   r12, r16
-        mov   r28,  r12
-        mulh  r13, r12
-        add   r17, r6
-        mov   r19, r17
-        add   r12, r15
-        mov   r19, r12
-        mulh  r8,  r12
-        mulh  r10, r17
-        add   r12, r18
-        mov   r19, r12
-        mulh  r13, r12
-        add   r17, r15
-        mov   r11, r13
-        mulh  r8,  r13
-        add   r12, r6
-        mov   r11, r12
-        mulh  r10, r12
-        add   r13, r15
-        mulh  r29, r8
-        add   r12, r6
-        mov   r16, r13
-        shl   15,  r13
-        add   r14, r13
-        mov   r18, r12
-        shl   30,  r12
-        mov   r13, r26
-        add   r12, r26
-        shr   15,  r14
-        movhi hi(131071), r0,  r12
-        movea lo(131071), r12, r13
-        and   r13, r14
-        mov   r16, r12
-        and   r13, r12
-        add   r12, r14
-        mov   r18, r12
-        shl   15,  r12
-        and   r13, r12
-        add   r12, r14
-        shr   17,  r14
-        shr   17,  r16
-        add   r14, r16
-        shl   13,  r15
-        shr   2,   r18
-        add   r18, r15
-        add   r15, r16
-        mov   r16, r27
-        add   r8,  r6
-        shl   28,  r6
-        add   r6,  r27
-        mov   r26, r10
-        mov   r27, r11
-        jr    __return_r26_r31
-#else /* defined(__v850e__) */
-       /*  (Ahi << 32 + Alo) * (Bhi << 32 + Blo) */
-       /*   r7           r6      r9         r8   */
-       mov  r8, r10
-       mulu r7, r8,  r0                /* Ahi * Blo */
-       mulu r6, r9,  r0                /* Alo * Bhi */
-       mulu r6, r10, r11               /* Alo * Blo */
-       add  r8, r11
-       add  r9, r11
-       jmp  [r31]
-#endif /* defined(__v850e__) */
-       .size ___muldi3, . - ___muldi3
-#endif
-       
diff --git a/gcc/config/v850/t-v850 b/gcc/config/v850/t-v850

index fcd3b841e30df649dfe34fd289718dd19853b4f5..7885229e631d0f044e4121120d5084c8a4e859d6 100644 (file)
--- a/gcc/config/v850/t-v850
+++ b/gcc/config/v850/t-v850
@@ -17,67 +17,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-LIB1ASMSRC = v850/lib1funcs.asm
-LIB1ASMFUNCS   = _mulsi3 \
-                 _divsi3 \
-                 _udivsi3 \
-                 _modsi3 \
-                 _umodsi3 \
-                 _save_2 \
-                 _save_20 \
-                 _save_21 \
-                 _save_22 \
-                 _save_23 \
-                 _save_24 \
-                 _save_25 \
-                 _save_26 \
-                 _save_27 \
-                 _save_28 \
-                 _save_29 \
-                 _save_2c \
-                 _save_20c \
-                 _save_21c \
-                 _save_22c \
-                 _save_23c \
-                 _save_24c \
-                 _save_25c \
-                 _save_26c \
-                 _save_27c \
-                 _save_28c \
-                 _save_29c \
-                 _save_31c \
-                 _save_interrupt \
-                 _save_all_interrupt \
-                  _callt_save_20 \
-                 _callt_save_21 \
-                 _callt_save_22 \
-                 _callt_save_23 \
-                 _callt_save_24 \
-                 _callt_save_25 \
-                 _callt_save_26 \
-                 _callt_save_27 \
-                 _callt_save_28 \
-                 _callt_save_29 \
-                 _callt_save_20c \
-                 _callt_save_21c \
-                 _callt_save_22c \
-                 _callt_save_23c \
-                 _callt_save_24c \
-                 _callt_save_25c \
-                 _callt_save_26c \
-                 _callt_save_27c \
-                 _callt_save_28c \
-                 _callt_save_29c \
-                 _callt_save_31c \
-                 _callt_save_interrupt \
-                 _callt_save_all_interrupt \
-                 _callt_save_r2_r29 \
-                 _callt_save_r2_r31 \
-                 _negdi2 \
-                 _cmpdi2 \
-                 _ucmpdi2 \
-                 _muldi3
-
  # Create target-specific versions of the libraries
  MULTILIB_OPTIONS  = mv850/mv850e/mv850e2/mv850e2v3
  MULTILIB_DIRNAMES = v850 v850e v850e2 v850e2v3
diff --git a/gcc/config/vax/lib1funcs.asm b/gcc/config/vax/lib1funcs.asm

deleted file mode 100644 (file)

index 1d57b56..0000000
--- a/gcc/config/vax/lib1funcs.asm
+++ /dev/null
@@ -1,92 +0,0 @@
-/* Copyright (C) 2009 Free Software Foundation, Inc.
-   This file is part of GCC.
-   Contributed by Maciej W. Rozycki <macro@linux-mips.org>.
-
-   This file is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by the
-   Free Software Foundation; either version 3, or (at your option) any
-   later version.
-
-   This file is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifdef L_udivsi3
-       .text
-       .globl  __udivsi3
-       .type   __udivsi3, @function
-__udivsi3:
-       .word   0
-       movl    8(%ap), %r1
-       blss    0f                      /* Check bit #31 of divisor.  */
-       movl    4(%ap), %r2
-       blss    1f                      /* Check bit #31 of dividend.  */
-
-       /* Both zero, do a standard division.  */
-
-       divl3   %r1, %r2, %r0
-       ret
-
-       /* MSB of divisor set, only 1 or 0 may result.  */
-0:
-       decl    %r1
-       clrl    %r0
-       cmpl    %r1, 4(%ap)
-       adwc    $0, %r0
-       ret
-
-       /* MSB of dividend set, do an extended division.  */
-1:
-       clrl    %r3
-       ediv    %r1, %r2, %r0, %r3
-       ret
-       .size   __udivsi3, . - __udivsi3
-       .previous
-#endif
-
-#ifdef L_umodsi3
-       .text
-       .globl  __umodsi3
-       .type   __umodsi3, @function
-__umodsi3:
-       .word   0
-       movl    8(%ap), %r1
-       blss    0f                      /* Check bit #31 of divisor.  */
-       movl    4(%ap), %r2
-       blss    1f                      /* Check bit #31 of dividend.  */
-
-       /* Both zero, do a standard division.  */
-
-       divl3   %r1, %r2, %r0
-       mull2   %r0, %r1
-       subl3   %r1, %r2, %r0
-       ret
-
-       /* MSB of divisor set, subtract the divisor at most once.  */
-0:
-       movl    4(%ap), %r2
-       clrl    %r0
-       cmpl    %r2, %r1
-       sbwc    $0, %r0
-       bicl2   %r0, %r1
-       subl3   %r1, %r2, %r0
-       ret
-
-       /* MSB of dividend set, do an extended division.  */
-1:
-       clrl    %r3
-       ediv    %r1, %r2, %r3, %r0
-       ret
-       .size   __umodsi3, . - __umodsi3
-       .previous
-#endif
diff --git a/gcc/config/vax/t-linux b/gcc/config/vax/t-linux

deleted file mode 100644 (file)

index 9af1edb..0000000
--- a/gcc/config/vax/t-linux
+++ /dev/null
@@ -1,2 +0,0 @@
-LIB1ASMSRC = vax/lib1funcs.asm
-LIB1ASMFUNCS = _udivsi3 _umodsi3
diff --git a/gcc/config/xtensa/ieee754-df.S b/gcc/config/xtensa/ieee754-df.S

deleted file mode 100644 (file)

index 9b46889..0000000
--- a/gcc/config/xtensa/ieee754-df.S
+++ /dev/null
@@ -1,2388 +0,0 @@
-/* IEEE-754 double-precision functions for Xtensa
-   Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
-   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifdef __XTENSA_EB__
-#define xh a2
-#define xl a3
-#define yh a4
-#define yl a5
-#else
-#define xh a3
-#define xl a2
-#define yh a5
-#define yl a4
-#endif
-
-/*  Warning!  The branch displacements for some Xtensa branch instructions
-    are quite small, and this code has been carefully laid out to keep
-    branch targets in range.  If you change anything, be sure to check that
-    the assembler is not relaxing anything to branch over a jump.  */
-
-#ifdef L_negdf2
-
-       .align  4
-       .global __negdf2
-       .type   __negdf2, @function
-__negdf2:
-       leaf_entry sp, 16
-       movi    a4, 0x80000000
-       xor     xh, xh, a4
-       leaf_return
-
-#endif /* L_negdf2 */
-
-#ifdef L_addsubdf3
-
-       /* Addition */
-__adddf3_aux:
-       
-       /* Handle NaNs and Infinities.  (This code is placed before the
-          start of the function just to keep it in range of the limited
-          branch displacements.)  */
-
-.Ladd_xnan_or_inf:
-       /* If y is neither Infinity nor NaN, return x.  */
-       bnall   yh, a6, 1f
-       /* If x is a NaN, return it.  Otherwise, return y.  */
-       slli    a7, xh, 12
-       or      a7, a7, xl
-       beqz    a7, .Ladd_ynan_or_inf
-1:     leaf_return
-
-.Ladd_ynan_or_inf:
-       /* Return y.  */
-       mov     xh, yh
-       mov     xl, yl
-       leaf_return
-
-.Ladd_opposite_signs:
-       /* Operand signs differ.  Do a subtraction.  */
-       slli    a7, a6, 11
-       xor     yh, yh, a7
-       j       .Lsub_same_sign
-
-       .align  4
-       .global __adddf3
-       .type   __adddf3, @function
-__adddf3:
-       leaf_entry sp, 16
-       movi    a6, 0x7ff00000
-
-       /* Check if the two operands have the same sign.  */
-       xor     a7, xh, yh
-       bltz    a7, .Ladd_opposite_signs
-
-.Ladd_same_sign:       
-       /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
-       ball    xh, a6, .Ladd_xnan_or_inf
-       ball    yh, a6, .Ladd_ynan_or_inf
-
-       /* Compare the exponents.  The smaller operand will be shifted
-          right by the exponent difference and added to the larger
-          one.  */
-       extui   a7, xh, 20, 12
-       extui   a8, yh, 20, 12
-       bltu    a7, a8, .Ladd_shiftx
-
-.Ladd_shifty:
-       /* Check if the smaller (or equal) exponent is zero.  */
-       bnone   yh, a6, .Ladd_yexpzero
-
-       /* Replace yh sign/exponent with 0x001.  */
-       or      yh, yh, a6
-       slli    yh, yh, 11
-       srli    yh, yh, 11
-
-.Ladd_yexpdiff:
-       /* Compute the exponent difference.  Optimize for difference < 32.  */
-       sub     a10, a7, a8
-       bgeui   a10, 32, .Ladd_bigshifty
-       
-       /* Shift yh/yl right by the exponent difference.  Any bits that are
-          shifted out of yl are saved in a9 for rounding the result.  */
-       ssr     a10
-       movi    a9, 0
-       src     a9, yl, a9
-       src     yl, yh, yl
-       srl     yh, yh
-
-.Ladd_addy:
-       /* Do the 64-bit addition.  */
-       add     xl, xl, yl
-       add     xh, xh, yh
-       bgeu    xl, yl, 1f
-       addi    xh, xh, 1
-1:
-       /* Check if the add overflowed into the exponent.  */
-       extui   a10, xh, 20, 12
-       beq     a10, a7, .Ladd_round
-       mov     a8, a7
-       j       .Ladd_carry
-
-.Ladd_yexpzero:
-       /* y is a subnormal value.  Replace its sign/exponent with zero,
-          i.e., no implicit "1.0", and increment the apparent exponent
-          because subnormals behave as if they had the minimum (nonzero)
-          exponent.  Test for the case when both exponents are zero.  */
-       slli    yh, yh, 12
-       srli    yh, yh, 12
-       bnone   xh, a6, .Ladd_bothexpzero
-       addi    a8, a8, 1
-       j       .Ladd_yexpdiff
-
-.Ladd_bothexpzero:
-       /* Both exponents are zero.  Handle this as a special case.  There
-          is no need to shift or round, and the normal code for handling
-          a carry into the exponent field will not work because it
-          assumes there is an implicit "1.0" that needs to be added.  */
-       add     xl, xl, yl
-       add     xh, xh, yh
-       bgeu    xl, yl, 1f
-       addi    xh, xh, 1
-1:     leaf_return
-
-.Ladd_bigshifty:
-       /* Exponent difference > 64 -- just return the bigger value.  */
-       bgeui   a10, 64, 1b
-
-       /* Shift yh/yl right by the exponent difference.  Any bits that are
-          shifted out are saved in a9 for rounding the result.  */
-       ssr     a10
-       sll     a11, yl         /* lost bits shifted out of yl */
-       src     a9, yh, yl
-       srl     yl, yh
-       movi    yh, 0
-       beqz    a11, .Ladd_addy
-       or      a9, a9, a10     /* any positive, nonzero value will work */
-       j       .Ladd_addy
-
-.Ladd_xexpzero:
-       /* Same as "yexpzero" except skip handling the case when both
-          exponents are zero.  */
-       slli    xh, xh, 12
-       srli    xh, xh, 12
-       addi    a7, a7, 1
-       j       .Ladd_xexpdiff
-
-.Ladd_shiftx:
-       /* Same thing as the "shifty" code, but with x and y swapped.  Also,
-          because the exponent difference is always nonzero in this version,
-          the shift sequence can use SLL and skip loading a constant zero.  */
-       bnone   xh, a6, .Ladd_xexpzero
-
-       or      xh, xh, a6
-       slli    xh, xh, 11
-       srli    xh, xh, 11
-
-.Ladd_xexpdiff:
-       sub     a10, a8, a7
-       bgeui   a10, 32, .Ladd_bigshiftx
-       
-       ssr     a10
-       sll     a9, xl
-       src     xl, xh, xl
-       srl     xh, xh
-
-.Ladd_addx:
-       add     xl, xl, yl
-       add     xh, xh, yh
-       bgeu    xl, yl, 1f
-       addi    xh, xh, 1
-1:
-       /* Check if the add overflowed into the exponent.  */
-       extui   a10, xh, 20, 12
-       bne     a10, a8, .Ladd_carry
-
-.Ladd_round:
-       /* Round up if the leftover fraction is >= 1/2.  */
-       bgez    a9, 1f
-       addi    xl, xl, 1
-       beqz    xl, .Ladd_roundcarry
-
-       /* Check if the leftover fraction is exactly 1/2.  */
-       slli    a9, a9, 1
-       beqz    a9, .Ladd_exactlyhalf
-1:     leaf_return
-
-.Ladd_bigshiftx:
-       /* Mostly the same thing as "bigshifty"....  */
-       bgeui   a10, 64, .Ladd_returny
-
-       ssr     a10
-       sll     a11, xl
-       src     a9, xh, xl
-       srl     xl, xh
-       movi    xh, 0
-       beqz    a11, .Ladd_addx
-       or      a9, a9, a10
-       j       .Ladd_addx
-
-.Ladd_returny:
-       mov     xh, yh
-       mov     xl, yl
-       leaf_return
-
-.Ladd_carry:   
-       /* The addition has overflowed into the exponent field, so the
-          value needs to be renormalized.  The mantissa of the result
-          can be recovered by subtracting the original exponent and
-          adding 0x100000 (which is the explicit "1.0" for the
-          mantissa of the non-shifted operand -- the "1.0" for the
-          shifted operand was already added).  The mantissa can then
-          be shifted right by one bit.  The explicit "1.0" of the
-          shifted mantissa then needs to be replaced by the exponent,
-          incremented by one to account for the normalizing shift.
-          It is faster to combine these operations: do the shift first
-          and combine the additions and subtractions.  If x is the
-          original exponent, the result is:
-              shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
-          or:
-              shifted mantissa + ((x + 1) << 19)
-          Note that the exponent is incremented here by leaving the
-          explicit "1.0" of the mantissa in the exponent field.  */
-
-       /* Shift xh/xl right by one bit.  Save the lsb of xl.  */
-       mov     a10, xl
-       ssai    1
-       src     xl, xh, xl
-       srl     xh, xh
-
-       /* See explanation above.  The original exponent is in a8.  */
-       addi    a8, a8, 1
-       slli    a8, a8, 19
-       add     xh, xh, a8
-
-       /* Return an Infinity if the exponent overflowed.  */
-       ball    xh, a6, .Ladd_infinity
-       
-       /* Same thing as the "round" code except the msb of the leftover
-          fraction is bit 0 of a10, with the rest of the fraction in a9.  */
-       bbci.l  a10, 0, 1f
-       addi    xl, xl, 1
-       beqz    xl, .Ladd_roundcarry
-       beqz    a9, .Ladd_exactlyhalf
-1:     leaf_return
-
-.Ladd_infinity:
-       /* Clear the mantissa.  */
-       movi    xl, 0
-       srli    xh, xh, 20
-       slli    xh, xh, 20
-
-       /* The sign bit may have been lost in a carry-out.  Put it back.  */
-       slli    a8, a8, 1
-       or      xh, xh, a8
-       leaf_return
-
-.Ladd_exactlyhalf:
-       /* Round down to the nearest even value.  */
-       srli    xl, xl, 1
-       slli    xl, xl, 1
-       leaf_return
-
-.Ladd_roundcarry:
-       /* xl is always zero when the rounding increment overflows, so
-          there's no need to round it to an even value.  */
-       addi    xh, xh, 1
-       /* Overflow to the exponent is OK.  */
-       leaf_return
-
-
-       /* Subtraction */
-__subdf3_aux:
-       
-       /* Handle NaNs and Infinities.  (This code is placed before the
-          start of the function just to keep it in range of the limited
-          branch displacements.)  */
-
-.Lsub_xnan_or_inf:
-       /* If y is neither Infinity nor NaN, return x.  */
-       bnall   yh, a6, 1f
-       /* Both x and y are either NaN or Inf, so the result is NaN.  */
-       movi    a4, 0x80000     /* make it a quiet NaN */
-       or      xh, xh, a4
-1:     leaf_return
-
-.Lsub_ynan_or_inf:
-       /* Negate y and return it.  */
-       slli    a7, a6, 11
-       xor     xh, yh, a7
-       mov     xl, yl
-       leaf_return
-
-.Lsub_opposite_signs:
-       /* Operand signs differ.  Do an addition.  */
-       slli    a7, a6, 11
-       xor     yh, yh, a7
-       j       .Ladd_same_sign
-
-       .align  4
-       .global __subdf3
-       .type   __subdf3, @function
-__subdf3:
-       leaf_entry sp, 16
-       movi    a6, 0x7ff00000
-
-       /* Check if the two operands have the same sign.  */
-       xor     a7, xh, yh
-       bltz    a7, .Lsub_opposite_signs
-
-.Lsub_same_sign:       
-       /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
-       ball    xh, a6, .Lsub_xnan_or_inf
-       ball    yh, a6, .Lsub_ynan_or_inf
-
-       /* Compare the operands.  In contrast to addition, the entire
-          value matters here.  */
-       extui   a7, xh, 20, 11
-       extui   a8, yh, 20, 11
-       bltu    xh, yh, .Lsub_xsmaller
-       beq     xh, yh, .Lsub_compare_low
-
-.Lsub_ysmaller:
-       /* Check if the smaller (or equal) exponent is zero.  */
-       bnone   yh, a6, .Lsub_yexpzero
-
-       /* Replace yh sign/exponent with 0x001.  */
-       or      yh, yh, a6
-       slli    yh, yh, 11
-       srli    yh, yh, 11
-
-.Lsub_yexpdiff:
-       /* Compute the exponent difference.  Optimize for difference < 32.  */
-       sub     a10, a7, a8
-       bgeui   a10, 32, .Lsub_bigshifty
-       
-       /* Shift yh/yl right by the exponent difference.  Any bits that are
-          shifted out of yl are saved in a9 for rounding the result.  */
-       ssr     a10
-       movi    a9, 0
-       src     a9, yl, a9
-       src     yl, yh, yl
-       srl     yh, yh
-
-.Lsub_suby:
-       /* Do the 64-bit subtraction.  */
-       sub     xh, xh, yh
-       bgeu    xl, yl, 1f
-       addi    xh, xh, -1
-1:     sub     xl, xl, yl
-
-       /* Subtract the leftover bits in a9 from zero and propagate any
-          borrow from xh/xl.  */
-       neg     a9, a9
-       beqz    a9, 1f
-       addi    a5, xh, -1
-       moveqz  xh, a5, xl
-       addi    xl, xl, -1
-1:
-       /* Check if the subtract underflowed into the exponent.  */
-       extui   a10, xh, 20, 11
-       beq     a10, a7, .Lsub_round
-       j       .Lsub_borrow
-
-.Lsub_compare_low:
-       /* The high words are equal.  Compare the low words.  */
-       bltu    xl, yl, .Lsub_xsmaller
-       bltu    yl, xl, .Lsub_ysmaller
-       /* The operands are equal.  Return 0.0.  */
-       movi    xh, 0
-       movi    xl, 0
-1:     leaf_return
-
-.Lsub_yexpzero:
-       /* y is a subnormal value.  Replace its sign/exponent with zero,
-          i.e., no implicit "1.0".  Unless x is also a subnormal, increment
-          y's apparent exponent because subnormals behave as if they had
-          the minimum (nonzero) exponent.  */
-       slli    yh, yh, 12
-       srli    yh, yh, 12
-       bnone   xh, a6, .Lsub_yexpdiff
-       addi    a8, a8, 1
-       j       .Lsub_yexpdiff
-
-.Lsub_bigshifty:
-       /* Exponent difference > 64 -- just return the bigger value.  */
-       bgeui   a10, 64, 1b
-
-       /* Shift yh/yl right by the exponent difference.  Any bits that are
-          shifted out are saved in a9 for rounding the result.  */
-       ssr     a10
-       sll     a11, yl         /* lost bits shifted out of yl */
-       src     a9, yh, yl
-       srl     yl, yh
-       movi    yh, 0
-       beqz    a11, .Lsub_suby
-       or      a9, a9, a10     /* any positive, nonzero value will work */
-       j       .Lsub_suby
-
-.Lsub_xsmaller:
-       /* Same thing as the "ysmaller" code, but with x and y swapped and
-          with y negated.  */
-       bnone   xh, a6, .Lsub_xexpzero
-
-       or      xh, xh, a6
-       slli    xh, xh, 11
-       srli    xh, xh, 11
-
-.Lsub_xexpdiff:
-       sub     a10, a8, a7
-       bgeui   a10, 32, .Lsub_bigshiftx
-       
-       ssr     a10
-       movi    a9, 0
-       src     a9, xl, a9
-       src     xl, xh, xl
-       srl     xh, xh
-
-       /* Negate y.  */
-       slli    a11, a6, 11
-       xor     yh, yh, a11
-
-.Lsub_subx:
-       sub     xl, yl, xl
-       sub     xh, yh, xh
-       bgeu    yl, xl, 1f
-       addi    xh, xh, -1
-1:
-       /* Subtract the leftover bits in a9 from zero and propagate any
-          borrow from xh/xl.  */
-       neg     a9, a9
-       beqz    a9, 1f
-       addi    a5, xh, -1
-       moveqz  xh, a5, xl
-       addi    xl, xl, -1
-1:
-       /* Check if the subtract underflowed into the exponent.  */
-       extui   a10, xh, 20, 11
-       bne     a10, a8, .Lsub_borrow
-
-.Lsub_round:
-       /* Round up if the leftover fraction is >= 1/2.  */
-       bgez    a9, 1f
-       addi    xl, xl, 1
-       beqz    xl, .Lsub_roundcarry
-
-       /* Check if the leftover fraction is exactly 1/2.  */
-       slli    a9, a9, 1
-       beqz    a9, .Lsub_exactlyhalf
-1:     leaf_return
-
-.Lsub_xexpzero:
-       /* Same as "yexpzero".  */
-       slli    xh, xh, 12
-       srli    xh, xh, 12
-       bnone   yh, a6, .Lsub_xexpdiff
-       addi    a7, a7, 1
-       j       .Lsub_xexpdiff
-
-.Lsub_bigshiftx:
-       /* Mostly the same thing as "bigshifty", but with the sign bit of the
-          shifted value set so that the subsequent subtraction flips the
-          sign of y.  */
-       bgeui   a10, 64, .Lsub_returny
-
-       ssr     a10
-       sll     a11, xl
-       src     a9, xh, xl
-       srl     xl, xh
-       slli    xh, a6, 11      /* set sign bit of xh */
-       beqz    a11, .Lsub_subx
-       or      a9, a9, a10
-       j       .Lsub_subx
-
-.Lsub_returny:
-       /* Negate and return y.  */
-       slli    a7, a6, 11
-       xor     xh, yh, a7
-       mov     xl, yl
-       leaf_return
-
-.Lsub_borrow:  
-       /* The subtraction has underflowed into the exponent field, so the
-          value needs to be renormalized.  Shift the mantissa left as
-          needed to remove any leading zeros and adjust the exponent
-          accordingly.  If the exponent is not large enough to remove
-          all the leading zeros, the result will be a subnormal value.  */
-
-       slli    a8, xh, 12
-       beqz    a8, .Lsub_xhzero
-       do_nsau a6, a8, a7, a11
-       srli    a8, a8, 12
-       bge     a6, a10, .Lsub_subnormal
-       addi    a6, a6, 1
-
-.Lsub_shift_lt32:
-       /* Shift the mantissa (a8/xl/a9) left by a6.  */
-       ssl     a6
-       src     a8, a8, xl
-       src     xl, xl, a9
-       sll     a9, a9
-
-       /* Combine the shifted mantissa with the sign and exponent,
-          decrementing the exponent by a6.  (The exponent has already
-          been decremented by one due to the borrow from the subtraction,
-          but adding the mantissa will increment the exponent by one.)  */
-       srli    xh, xh, 20
-       sub     xh, xh, a6
-       slli    xh, xh, 20
-       add     xh, xh, a8
-       j       .Lsub_round
-
-.Lsub_exactlyhalf:
-       /* Round down to the nearest even value.  */
-       srli    xl, xl, 1
-       slli    xl, xl, 1
-       leaf_return
-
-.Lsub_roundcarry:
-       /* xl is always zero when the rounding increment overflows, so
-          there's no need to round it to an even value.  */
-       addi    xh, xh, 1
-       /* Overflow to the exponent is OK.  */
-       leaf_return
-
-.Lsub_xhzero:
-       /* When normalizing the result, all the mantissa bits in the high
-          word are zero.  Shift by "20 + (leading zero count of xl) + 1".  */
-       do_nsau a6, xl, a7, a11
-       addi    a6, a6, 21
-       blt     a10, a6, .Lsub_subnormal
-
-.Lsub_normalize_shift:
-       bltui   a6, 32, .Lsub_shift_lt32
-
-       ssl     a6
-       src     a8, xl, a9
-       sll     xl, a9
-       movi    a9, 0
-
-       srli    xh, xh, 20
-       sub     xh, xh, a6
-       slli    xh, xh, 20
-       add     xh, xh, a8
-       j       .Lsub_round
-
-.Lsub_subnormal:
-       /* The exponent is too small to shift away all the leading zeros.
-          Set a6 to the current exponent (which has already been
-          decremented by the borrow) so that the exponent of the result
-          will be zero.  Do not add 1 to a6 in this case, because: (1)
-          adding the mantissa will not increment the exponent, so there is
-          no need to subtract anything extra from the exponent to
-          compensate, and (2) the effective exponent of a subnormal is 1
-          not 0 so the shift amount must be 1 smaller than normal. */
-       mov     a6, a10
-       j       .Lsub_normalize_shift
-
-#endif /* L_addsubdf3 */
-
-#ifdef L_muldf3
-
-       /* Multiplication */
-#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
-#define XCHAL_NO_MUL 1
-#endif
-
-__muldf3_aux:
-
-       /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
-          (This code is placed before the start of the function just to
-          keep it in range of the limited branch displacements.)  */
-
-.Lmul_xexpzero:
-       /* Clear the sign bit of x.  */
-       slli    xh, xh, 1
-       srli    xh, xh, 1
-
-       /* If x is zero, return zero.  */
-       or      a10, xh, xl
-       beqz    a10, .Lmul_return_zero
-
-       /* Normalize x.  Adjust the exponent in a8.  */
-       beqz    xh, .Lmul_xh_zero
-       do_nsau a10, xh, a11, a12
-       addi    a10, a10, -11
-       ssl     a10
-       src     xh, xh, xl
-       sll     xl, xl
-       movi    a8, 1
-       sub     a8, a8, a10
-       j       .Lmul_xnormalized       
-.Lmul_xh_zero:
-       do_nsau a10, xl, a11, a12
-       addi    a10, a10, -11
-       movi    a8, -31
-       sub     a8, a8, a10
-       ssl     a10
-       bltz    a10, .Lmul_xl_srl
-       sll     xh, xl
-       movi    xl, 0
-       j       .Lmul_xnormalized
-.Lmul_xl_srl:
-       srl     xh, xl
-       sll     xl, xl
-       j       .Lmul_xnormalized
-       
-.Lmul_yexpzero:
-       /* Clear the sign bit of y.  */
-       slli    yh, yh, 1
-       srli    yh, yh, 1
-
-       /* If y is zero, return zero.  */
-       or      a10, yh, yl
-       beqz    a10, .Lmul_return_zero
-
-       /* Normalize y.  Adjust the exponent in a9.  */
-       beqz    yh, .Lmul_yh_zero
-       do_nsau a10, yh, a11, a12
-       addi    a10, a10, -11
-       ssl     a10
-       src     yh, yh, yl
-       sll     yl, yl
-       movi    a9, 1
-       sub     a9, a9, a10
-       j       .Lmul_ynormalized       
-.Lmul_yh_zero:
-       do_nsau a10, yl, a11, a12
-       addi    a10, a10, -11
-       movi    a9, -31
-       sub     a9, a9, a10
-       ssl     a10
-       bltz    a10, .Lmul_yl_srl
-       sll     yh, yl
-       movi    yl, 0
-       j       .Lmul_ynormalized
-.Lmul_yl_srl:
-       srl     yh, yl
-       sll     yl, yl
-       j       .Lmul_ynormalized       
-
-.Lmul_return_zero:
-       /* Return zero with the appropriate sign bit.  */
-       srli    xh, a7, 31
-       slli    xh, xh, 31
-       movi    xl, 0
-       j       .Lmul_done
-
-.Lmul_xnan_or_inf:
-       /* If y is zero, return NaN.  */
-       bnez    yl, 1f
-       slli    a8, yh, 1
-       bnez    a8, 1f
-       movi    a4, 0x80000     /* make it a quiet NaN */
-       or      xh, xh, a4
-       j       .Lmul_done
-1:
-       /* If y is NaN, return y.  */
-       bnall   yh, a6, .Lmul_returnx
-       slli    a8, yh, 12
-       or      a8, a8, yl
-       beqz    a8, .Lmul_returnx
-
-.Lmul_returny:
-       mov     xh, yh
-       mov     xl, yl
-
-.Lmul_returnx:
-       /* Set the sign bit and return.  */
-       extui   a7, a7, 31, 1
-       slli    xh, xh, 1
-       ssai    1
-       src     xh, a7, xh
-       j       .Lmul_done
-
-.Lmul_ynan_or_inf:
-       /* If x is zero, return NaN.  */
-       bnez    xl, .Lmul_returny
-       slli    a8, xh, 1
-       bnez    a8, .Lmul_returny
-       movi    a7, 0x80000     /* make it a quiet NaN */
-       or      xh, yh, a7
-       j       .Lmul_done
-
-       .align  4
-       .global __muldf3
-       .type   __muldf3, @function
-__muldf3:
-#if __XTENSA_CALL0_ABI__
-       leaf_entry sp, 32
-       addi    sp, sp, -32
-       s32i    a12, sp, 16
-       s32i    a13, sp, 20
-       s32i    a14, sp, 24
-       s32i    a15, sp, 28
-#elif XCHAL_NO_MUL
-       /* This is not really a leaf function; allocate enough stack space
-          to allow CALL12s to a helper function.  */
-       leaf_entry sp, 64
-#else
-       leaf_entry sp, 32
-#endif
-       movi    a6, 0x7ff00000
-
-       /* Get the sign of the result.  */
-       xor     a7, xh, yh
-
-       /* Check for NaN and infinity.  */
-       ball    xh, a6, .Lmul_xnan_or_inf
-       ball    yh, a6, .Lmul_ynan_or_inf
-
-       /* Extract the exponents.  */
-       extui   a8, xh, 20, 11
-       extui   a9, yh, 20, 11
-
-       beqz    a8, .Lmul_xexpzero
-.Lmul_xnormalized:     
-       beqz    a9, .Lmul_yexpzero
-.Lmul_ynormalized:     
-
-       /* Add the exponents.  */
-       add     a8, a8, a9
-
-       /* Replace sign/exponent fields with explicit "1.0".  */
-       movi    a10, 0x1fffff
-       or      xh, xh, a6
-       and     xh, xh, a10
-       or      yh, yh, a6
-       and     yh, yh, a10
-
-       /* Multiply 64x64 to 128 bits.  The result ends up in xh/xl/a6.
-          The least-significant word of the result is thrown away except
-          that if it is nonzero, the lsb of a6 is set to 1.  */
-#if XCHAL_HAVE_MUL32_HIGH
-
-       /* Compute a6 with any carry-outs in a10.  */
-       movi    a10, 0
-       mull    a6, xl, yh
-       mull    a11, xh, yl
-       add     a6, a6, a11
-       bgeu    a6, a11, 1f
-       addi    a10, a10, 1
-1:
-       muluh   a11, xl, yl
-       add     a6, a6, a11
-       bgeu    a6, a11, 1f
-       addi    a10, a10, 1
-1:     
-       /* If the low word of the result is nonzero, set the lsb of a6.  */
-       mull    a11, xl, yl
-       beqz    a11, 1f
-       movi    a9, 1
-       or      a6, a6, a9
-1:
-       /* Compute xl with any carry-outs in a9.  */
-       movi    a9, 0
-       mull    a11, xh, yh
-       add     a10, a10, a11
-       bgeu    a10, a11, 1f
-       addi    a9, a9, 1
-1:     
-       muluh   a11, xh, yl
-       add     a10, a10, a11
-       bgeu    a10, a11, 1f
-       addi    a9, a9, 1
-1:     
-       muluh   xl, xl, yh
-       add     xl, xl, a10
-       bgeu    xl, a10, 1f
-       addi    a9, a9, 1
-1:
-       /* Compute xh.  */
-       muluh   xh, xh, yh
-       add     xh, xh, a9
-
-#else /* ! XCHAL_HAVE_MUL32_HIGH */
-
-       /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
-          products.  These partial products are:
-
-               0 xll * yll
-
-               1 xll * ylh
-               2 xlh * yll
-
-               3 xll * yhl
-               4 xlh * ylh
-               5 xhl * yll
-
-               6 xll * yhh
-               7 xlh * yhl
-               8 xhl * ylh
-               9 xhh * yll
-
-               10 xlh * yhh
-               11 xhl * yhl
-               12 xhh * ylh
-
-               13 xhl * yhh
-               14 xhh * yhl
-
-               15 xhh * yhh
-
-          where the input chunks are (hh, hl, lh, ll).  If using the Mul16
-          or Mul32 multiplier options, these input chunks must be stored in
-          separate registers.  For Mac16, the UMUL.AA.* opcodes can specify
-          that the inputs come from either half of the registers, so there
-          is no need to shift them out ahead of time.  If there is no
-          multiply hardware, the 16-bit chunks can be extracted when setting
-          up the arguments to the separate multiply function.  */
-
-       /* Save a7 since it is needed to hold a temporary value.  */
-       s32i    a7, sp, 4
-#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
-       /* Calling a separate multiply function will clobber a0 and requires
-          use of a8 as a temporary, so save those values now.  (The function
-          uses a custom ABI so nothing else needs to be saved.)  */
-       s32i    a0, sp, 0
-       s32i    a8, sp, 8
-#endif
-
-#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
-
-#define xlh a12
-#define ylh a13
-#define xhh a14
-#define yhh a15
-
-       /* Get the high halves of the inputs into registers.  */
-       srli    xlh, xl, 16
-       srli    ylh, yl, 16
-       srli    xhh, xh, 16
-       srli    yhh, yh, 16
-
-#define xll xl
-#define yll yl
-#define xhl xh
-#define yhl yh
-
-#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
-       /* Clear the high halves of the inputs.  This does not matter
-          for MUL16 because the high bits are ignored.  */
-       extui   xl, xl, 0, 16
-       extui   xh, xh, 0, 16
-       extui   yl, yl, 0, 16
-       extui   yh, yh, 0, 16
-#endif
-#endif /* MUL16 || MUL32 */
-
-
-#if XCHAL_HAVE_MUL16
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-       mul16u  dst, xreg ## xhalf, yreg ## yhalf
-
-#elif XCHAL_HAVE_MUL32
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-       mull    dst, xreg ## xhalf, yreg ## yhalf
-
-#elif XCHAL_HAVE_MAC16
-
-/* The preprocessor insists on inserting a space when concatenating after
-   a period in the definition of do_mul below.  These macros are a workaround
-   using underscores instead of periods when doing the concatenation.  */
-#define umul_aa_ll umul.aa.ll
-#define umul_aa_lh umul.aa.lh
-#define umul_aa_hl umul.aa.hl
-#define umul_aa_hh umul.aa.hh
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-       umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
-       rsr     dst, ACCLO
-
-#else /* no multiply hardware */
-       
-#define set_arg_l(dst, src) \
-       extui   dst, src, 0, 16
-#define set_arg_h(dst, src) \
-       srli    dst, src, 16
-
-#if __XTENSA_CALL0_ABI__
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-       set_arg_ ## xhalf (a13, xreg); \
-       set_arg_ ## yhalf (a14, yreg); \
-       call0   .Lmul_mulsi3; \
-       mov     dst, a12
-#else
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-       set_arg_ ## xhalf (a14, xreg); \
-       set_arg_ ## yhalf (a15, yreg); \
-       call12  .Lmul_mulsi3; \
-       mov     dst, a14
-#endif /* __XTENSA_CALL0_ABI__ */
-
-#endif /* no multiply hardware */
-
-       /* Add pp1 and pp2 into a10 with carry-out in a9.  */
-       do_mul(a10, xl, l, yl, h)       /* pp 1 */
-       do_mul(a11, xl, h, yl, l)       /* pp 2 */
-       movi    a9, 0
-       add     a10, a10, a11
-       bgeu    a10, a11, 1f
-       addi    a9, a9, 1
-1:
-       /* Initialize a6 with a9/a10 shifted into position.  Note that
-          this value can be safely incremented without any carry-outs.  */
-       ssai    16
-       src     a6, a9, a10
-
-       /* Compute the low word into a10.  */
-       do_mul(a11, xl, l, yl, l)       /* pp 0 */
-       sll     a10, a10
-       add     a10, a10, a11
-       bgeu    a10, a11, 1f
-       addi    a6, a6, 1
-1:
-       /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
-          This is good enough to determine the low half of a6, so that any
-          nonzero bits from the low word of the result can be collapsed
-          into a6, freeing up a register.  */
-       movi    a9, 0
-       do_mul(a11, xl, l, yh, l)       /* pp 3 */
-       add     a6, a6, a11
-       bgeu    a6, a11, 1f
-       addi    a9, a9, 1
-1:
-       do_mul(a11, xl, h, yl, h)       /* pp 4 */
-       add     a6, a6, a11
-       bgeu    a6, a11, 1f
-       addi    a9, a9, 1
-1:
-       do_mul(a11, xh, l, yl, l)       /* pp 5 */
-       add     a6, a6, a11
-       bgeu    a6, a11, 1f
-       addi    a9, a9, 1
-1:
-       /* Collapse any nonzero bits from the low word into a6.  */
-       beqz    a10, 1f
-       movi    a11, 1
-       or      a6, a6, a11
-1:
-       /* Add pp6-9 into a11 with carry-outs in a10.  */
-       do_mul(a7, xl, l, yh, h)        /* pp 6 */
-       do_mul(a11, xh, h, yl, l)       /* pp 9 */
-       movi    a10, 0
-       add     a11, a11, a7
-       bgeu    a11, a7, 1f
-       addi    a10, a10, 1
-1:     
-       do_mul(a7, xl, h, yh, l)        /* pp 7 */
-       add     a11, a11, a7
-       bgeu    a11, a7, 1f
-       addi    a10, a10, 1
-1:     
-       do_mul(a7, xh, l, yl, h)        /* pp 8 */
-       add     a11, a11, a7
-       bgeu    a11, a7, 1f
-       addi    a10, a10, 1
-1:     
-       /* Shift a10/a11 into position, and add low half of a11 to a6.  */
-       src     a10, a10, a11
-       add     a10, a10, a9
-       sll     a11, a11
-       add     a6, a6, a11
-       bgeu    a6, a11, 1f
-       addi    a10, a10, 1
-1:
-       /* Add pp10-12 into xl with carry-outs in a9.  */
-       movi    a9, 0
-       do_mul(xl, xl, h, yh, h)        /* pp 10 */
-       add     xl, xl, a10
-       bgeu    xl, a10, 1f
-       addi    a9, a9, 1
-1:
-       do_mul(a10, xh, l, yh, l)       /* pp 11 */
-       add     xl, xl, a10
-       bgeu    xl, a10, 1f
-       addi    a9, a9, 1
-1:
-       do_mul(a10, xh, h, yl, h)       /* pp 12 */
-       add     xl, xl, a10
-       bgeu    xl, a10, 1f
-       addi    a9, a9, 1
-1:
-       /* Add pp13-14 into a11 with carry-outs in a10.  */
-       do_mul(a11, xh, l, yh, h)       /* pp 13 */
-       do_mul(a7, xh, h, yh, l)        /* pp 14 */
-       movi    a10, 0
-       add     a11, a11, a7
-       bgeu    a11, a7, 1f
-       addi    a10, a10, 1
-1:
-       /* Shift a10/a11 into position, and add low half of a11 to a6.  */
-       src     a10, a10, a11
-       add     a10, a10, a9
-       sll     a11, a11
-       add     xl, xl, a11
-       bgeu    xl, a11, 1f
-       addi    a10, a10, 1
-1:
-       /* Compute xh.  */
-       do_mul(xh, xh, h, yh, h)        /* pp 15 */
-       add     xh, xh, a10
-
-       /* Restore values saved on the stack during the multiplication.  */
-       l32i    a7, sp, 4
-#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
-       l32i    a0, sp, 0
-       l32i    a8, sp, 8
-#endif
-#endif /* ! XCHAL_HAVE_MUL32_HIGH */
-
-       /* Shift left by 12 bits, unless there was a carry-out from the
-          multiply, in which case, shift by 11 bits and increment the
-          exponent.  Note: It is convenient to use the constant 0x3ff
-          instead of 0x400 when removing the extra exponent bias (so that
-          it is easy to construct 0x7fe for the overflow check).  Reverse
-          the logic here to decrement the exponent sum by one unless there
-          was a carry-out.  */
-       movi    a4, 11
-       srli    a5, xh, 21 - 12
-       bnez    a5, 1f
-       addi    a4, a4, 1
-       addi    a8, a8, -1
-1:     ssl     a4
-       src     xh, xh, xl
-       src     xl, xl, a6
-       sll     a6, a6
-
-       /* Subtract the extra bias from the exponent sum (plus one to account
-          for the explicit "1.0" of the mantissa that will be added to the
-          exponent in the final result).  */
-       movi    a4, 0x3ff
-       sub     a8, a8, a4
-       
-       /* Check for over/underflow.  The value in a8 is one less than the
-          final exponent, so values in the range 0..7fd are OK here.  */
-       slli    a4, a4, 1       /* 0x7fe */
-       bgeu    a8, a4, .Lmul_overflow
-       
-.Lmul_round:
-       /* Round.  */
-       bgez    a6, .Lmul_rounded
-       addi    xl, xl, 1
-       beqz    xl, .Lmul_roundcarry
-       slli    a6, a6, 1
-       beqz    a6, .Lmul_exactlyhalf
-
-.Lmul_rounded:
-       /* Add the exponent to the mantissa.  */
-       slli    a8, a8, 20
-       add     xh, xh, a8
-
-.Lmul_addsign:
-       /* Add the sign bit.  */
-       srli    a7, a7, 31
-       slli    a7, a7, 31
-       or      xh, xh, a7
-
-.Lmul_done:
-#if __XTENSA_CALL0_ABI__
-       l32i    a12, sp, 16
-       l32i    a13, sp, 20
-       l32i    a14, sp, 24
-       l32i    a15, sp, 28
-       addi    sp, sp, 32
-#endif
-       leaf_return
-
-.Lmul_exactlyhalf:
-       /* Round down to the nearest even value.  */
-       srli    xl, xl, 1
-       slli    xl, xl, 1
-       j       .Lmul_rounded
-
-.Lmul_roundcarry:
-       /* xl is always zero when the rounding increment overflows, so
-          there's no need to round it to an even value.  */
-       addi    xh, xh, 1
-       /* Overflow is OK -- it will be added to the exponent.  */
-       j       .Lmul_rounded
-
-.Lmul_overflow:
-       bltz    a8, .Lmul_underflow
-       /* Return +/- Infinity.  */
-       addi    a8, a4, 1       /* 0x7ff */
-       slli    xh, a8, 20
-       movi    xl, 0
-       j       .Lmul_addsign
-
-.Lmul_underflow:
-       /* Create a subnormal value, where the exponent field contains zero,
-          but the effective exponent is 1.  The value of a8 is one less than
-          the actual exponent, so just negate it to get the shift amount.  */
-       neg     a8, a8
-       mov     a9, a6
-       ssr     a8
-       bgeui   a8, 32, .Lmul_bigshift
-       
-       /* Shift xh/xl right.  Any bits that are shifted out of xl are saved
-          in a6 (combined with the shifted-out bits currently in a6) for
-          rounding the result.  */
-       sll     a6, xl
-       src     xl, xh, xl
-       srl     xh, xh
-       j       1f
-
-.Lmul_bigshift:
-       bgeui   a8, 64, .Lmul_flush_to_zero
-       sll     a10, xl         /* lost bits shifted out of xl */
-       src     a6, xh, xl
-       srl     xl, xh
-       movi    xh, 0
-       or      a9, a9, a10
-
-       /* Set the exponent to zero.  */
-1:     movi    a8, 0
-
-       /* Pack any nonzero bits shifted out into a6.  */
-       beqz    a9, .Lmul_round
-       movi    a9, 1
-       or      a6, a6, a9
-       j       .Lmul_round
-       
-.Lmul_flush_to_zero:
-       /* Return zero with the appropriate sign bit.  */
-       srli    xh, a7, 31
-       slli    xh, xh, 31
-       movi    xl, 0
-       j       .Lmul_done
-
-#if XCHAL_NO_MUL
-       
-       /* For Xtensa processors with no multiply hardware, this simplified
-          version of _mulsi3 is used for multiplying 16-bit chunks of
-          the floating-point mantissas.  When using CALL0, this function
-          uses a custom ABI: the inputs are passed in a13 and a14, the
-          result is returned in a12, and a8 and a15 are clobbered.  */
-       .align  4
-.Lmul_mulsi3:
-       leaf_entry sp, 16
-       .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
-       movi    \dst, 0
-1:     add     \tmp1, \src2, \dst
-       extui   \tmp2, \src1, 0, 1
-       movnez  \dst, \tmp1, \tmp2
-
-       do_addx2 \tmp1, \src2, \dst, \tmp1
-       extui   \tmp2, \src1, 1, 1
-       movnez  \dst, \tmp1, \tmp2
-
-       do_addx4 \tmp1, \src2, \dst, \tmp1
-       extui   \tmp2, \src1, 2, 1
-       movnez  \dst, \tmp1, \tmp2
-
-       do_addx8 \tmp1, \src2, \dst, \tmp1
-       extui   \tmp2, \src1, 3, 1
-       movnez  \dst, \tmp1, \tmp2
-
-       srli    \src1, \src1, 4
-       slli    \src2, \src2, 4
-       bnez    \src1, 1b
-       .endm
-#if __XTENSA_CALL0_ABI__
-       mul_mulsi3_body a12, a13, a14, a15, a8
-#else
-       /* The result will be written into a2, so save that argument in a4.  */
-       mov     a4, a2
-       mul_mulsi3_body a2, a4, a3, a5, a6
-#endif
-       leaf_return
-#endif /* XCHAL_NO_MUL */
-#endif /* L_muldf3 */
-
-#ifdef L_divdf3
-
-       /* Division */
-__divdf3_aux:
-
-       /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
-          (This code is placed before the start of the function just to
-          keep it in range of the limited branch displacements.)  */
-
-.Ldiv_yexpzero:
-       /* Clear the sign bit of y.  */
-       slli    yh, yh, 1
-       srli    yh, yh, 1
-
-       /* Check for division by zero.  */
-       or      a10, yh, yl
-       beqz    a10, .Ldiv_yzero
-
-       /* Normalize y.  Adjust the exponent in a9.  */
-       beqz    yh, .Ldiv_yh_zero
-       do_nsau a10, yh, a11, a9
-       addi    a10, a10, -11
-       ssl     a10
-       src     yh, yh, yl
-       sll     yl, yl
-       movi    a9, 1
-       sub     a9, a9, a10
-       j       .Ldiv_ynormalized       
-.Ldiv_yh_zero:
-       do_nsau a10, yl, a11, a9
-       addi    a10, a10, -11
-       movi    a9, -31
-       sub     a9, a9, a10
-       ssl     a10
-       bltz    a10, .Ldiv_yl_srl
-       sll     yh, yl
-       movi    yl, 0
-       j       .Ldiv_ynormalized
-.Ldiv_yl_srl:
-       srl     yh, yl
-       sll     yl, yl
-       j       .Ldiv_ynormalized       
-
-.Ldiv_yzero:
-       /* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
-       slli    xh, xh, 1
-       srli    xh, xh, 1
-       or      xl, xl, xh
-       srli    xh, a7, 31
-       slli    xh, xh, 31
-       or      xh, xh, a6
-       bnez    xl, 1f
-       movi    a4, 0x80000     /* make it a quiet NaN */
-       or      xh, xh, a4
-1:     movi    xl, 0
-       leaf_return
-
-.Ldiv_xexpzero:
-       /* Clear the sign bit of x.  */
-       slli    xh, xh, 1
-       srli    xh, xh, 1
-
-       /* If x is zero, return zero.  */
-       or      a10, xh, xl
-       beqz    a10, .Ldiv_return_zero
-
-       /* Normalize x.  Adjust the exponent in a8.  */
-       beqz    xh, .Ldiv_xh_zero
-       do_nsau a10, xh, a11, a8
-       addi    a10, a10, -11
-       ssl     a10
-       src     xh, xh, xl
-       sll     xl, xl
-       movi    a8, 1
-       sub     a8, a8, a10
-       j       .Ldiv_xnormalized       
-.Ldiv_xh_zero:
-       do_nsau a10, xl, a11, a8
-       addi    a10, a10, -11
-       movi    a8, -31
-       sub     a8, a8, a10
-       ssl     a10
-       bltz    a10, .Ldiv_xl_srl
-       sll     xh, xl
-       movi    xl, 0
-       j       .Ldiv_xnormalized
-.Ldiv_xl_srl:
-       srl     xh, xl
-       sll     xl, xl
-       j       .Ldiv_xnormalized
-       
-.Ldiv_return_zero:
-       /* Return zero with the appropriate sign bit.  */
-       srli    xh, a7, 31
-       slli    xh, xh, 31
-       movi    xl, 0
-       leaf_return
-
-.Ldiv_xnan_or_inf:
-       /* Set the sign bit of the result.  */
-       srli    a7, yh, 31
-       slli    a7, a7, 31
-       xor     xh, xh, a7
-       /* If y is NaN or Inf, return NaN.  */
-       bnall   yh, a6, 1f
-       movi    a4, 0x80000     /* make it a quiet NaN */
-       or      xh, xh, a4
-1:     leaf_return
-
-.Ldiv_ynan_or_inf:
-       /* If y is Infinity, return zero.  */
-       slli    a8, yh, 12
-       or      a8, a8, yl
-       beqz    a8, .Ldiv_return_zero
-       /* y is NaN; return it.  */
-       mov     xh, yh
-       mov     xl, yl
-       leaf_return
-
-.Ldiv_highequal1:
-       bltu    xl, yl, 2f
-       j       3f
-
-       .align  4
-       .global __divdf3
-       .type   __divdf3, @function
-__divdf3:
-       leaf_entry sp, 16
-       movi    a6, 0x7ff00000
-
-       /* Get the sign of the result.  */
-       xor     a7, xh, yh
-
-       /* Check for NaN and infinity.  */
-       ball    xh, a6, .Ldiv_xnan_or_inf
-       ball    yh, a6, .Ldiv_ynan_or_inf
-
-       /* Extract the exponents.  */
-       extui   a8, xh, 20, 11
-       extui   a9, yh, 20, 11
-
-       beqz    a9, .Ldiv_yexpzero
-.Ldiv_ynormalized:     
-       beqz    a8, .Ldiv_xexpzero
-.Ldiv_xnormalized:     
-
-       /* Subtract the exponents.  */
-       sub     a8, a8, a9
-
-       /* Replace sign/exponent fields with explicit "1.0".  */
-       movi    a10, 0x1fffff
-       or      xh, xh, a6
-       and     xh, xh, a10
-       or      yh, yh, a6
-       and     yh, yh, a10
-
-       /* Set SAR for left shift by one.  */
-       ssai    (32 - 1)
-
-       /* The first digit of the mantissa division must be a one.
-          Shift x (and adjust the exponent) as needed to make this true.  */
-       bltu    yh, xh, 3f
-       beq     yh, xh, .Ldiv_highequal1
-2:     src     xh, xh, xl
-       sll     xl, xl
-       addi    a8, a8, -1
-3:
-       /* Do the first subtraction and shift.  */
-       sub     xh, xh, yh
-       bgeu    xl, yl, 1f
-       addi    xh, xh, -1
-1:     sub     xl, xl, yl
-       src     xh, xh, xl
-       sll     xl, xl
-
-       /* Put the quotient into a10/a11.  */
-       movi    a10, 0
-       movi    a11, 1
-
-       /* Divide one bit at a time for 52 bits.  */
-       movi    a9, 52
-#if XCHAL_HAVE_LOOPS
-       loop    a9, .Ldiv_loopend
-#endif
-.Ldiv_loop:
-       /* Shift the quotient << 1.  */
-       src     a10, a10, a11
-       sll     a11, a11
-
-       /* Is this digit a 0 or 1?  */
-       bltu    xh, yh, 3f
-       beq     xh, yh, .Ldiv_highequal2
-
-       /* Output a 1 and subtract.  */
-2:     addi    a11, a11, 1
-       sub     xh, xh, yh
-       bgeu    xl, yl, 1f
-       addi    xh, xh, -1
-1:     sub     xl, xl, yl
-
-       /* Shift the dividend << 1.  */
-3:     src     xh, xh, xl
-       sll     xl, xl
-
-#if !XCHAL_HAVE_LOOPS
-       addi    a9, a9, -1
-       bnez    a9, .Ldiv_loop
-#endif
-.Ldiv_loopend:
-
-       /* Add the exponent bias (less one to account for the explicit "1.0"
-          of the mantissa that will be added to the exponent in the final
-          result).  */
-       movi    a9, 0x3fe
-       add     a8, a8, a9
-       
-       /* Check for over/underflow.  The value in a8 is one less than the
-          final exponent, so values in the range 0..7fd are OK here.  */
-       addmi   a9, a9, 0x400   /* 0x7fe */
-       bgeu    a8, a9, .Ldiv_overflow
-
-.Ldiv_round:
-       /* Round.  The remainder (<< 1) is in xh/xl.  */
-       bltu    xh, yh, .Ldiv_rounded
-       beq     xh, yh, .Ldiv_highequal3
-.Ldiv_roundup:
-       addi    a11, a11, 1
-       beqz    a11, .Ldiv_roundcarry
-
-.Ldiv_rounded:
-       mov     xl, a11
-       /* Add the exponent to the mantissa.  */
-       slli    a8, a8, 20
-       add     xh, a10, a8
-
-.Ldiv_addsign:
-       /* Add the sign bit.  */
-       srli    a7, a7, 31
-       slli    a7, a7, 31
-       or      xh, xh, a7
-       leaf_return
-
-.Ldiv_highequal2:
-       bgeu    xl, yl, 2b
-       j       3b
-
-.Ldiv_highequal3:
-       bltu    xl, yl, .Ldiv_rounded
-       bne     xl, yl, .Ldiv_roundup
-
-       /* Remainder is exactly half the divisor.  Round even.  */
-       addi    a11, a11, 1
-       beqz    a11, .Ldiv_roundcarry
-       srli    a11, a11, 1
-       slli    a11, a11, 1
-       j       .Ldiv_rounded
-
-.Ldiv_overflow:
-       bltz    a8, .Ldiv_underflow
-       /* Return +/- Infinity.  */
-       addi    a8, a9, 1       /* 0x7ff */
-       slli    xh, a8, 20
-       movi    xl, 0
-       j       .Ldiv_addsign
-
-.Ldiv_underflow:
-       /* Create a subnormal value, where the exponent field contains zero,
-          but the effective exponent is 1.  The value of a8 is one less than
-          the actual exponent, so just negate it to get the shift amount.  */
-       neg     a8, a8
-       ssr     a8
-       bgeui   a8, 32, .Ldiv_bigshift
-       
-       /* Shift a10/a11 right.  Any bits that are shifted out of a11 are
-          saved in a6 for rounding the result.  */
-       sll     a6, a11
-       src     a11, a10, a11
-       srl     a10, a10
-       j       1f
-
-.Ldiv_bigshift:
-       bgeui   a8, 64, .Ldiv_flush_to_zero
-       sll     a9, a11         /* lost bits shifted out of a11 */
-       src     a6, a10, a11
-       srl     a11, a10
-       movi    a10, 0
-       or      xl, xl, a9
-
-       /* Set the exponent to zero.  */
-1:     movi    a8, 0
-
-       /* Pack any nonzero remainder (in xh/xl) into a6.  */
-       or      xh, xh, xl
-       beqz    xh, 1f
-       movi    a9, 1
-       or      a6, a6, a9
-       
-       /* Round a10/a11 based on the bits shifted out into a6.  */
-1:     bgez    a6, .Ldiv_rounded
-       addi    a11, a11, 1
-       beqz    a11, .Ldiv_roundcarry
-       slli    a6, a6, 1
-       bnez    a6, .Ldiv_rounded
-       srli    a11, a11, 1
-       slli    a11, a11, 1
-       j       .Ldiv_rounded
-
-.Ldiv_roundcarry:
-       /* a11 is always zero when the rounding increment overflows, so
-          there's no need to round it to an even value.  */
-       addi    a10, a10, 1
-       /* Overflow to the exponent field is OK.  */
-       j       .Ldiv_rounded
-
-.Ldiv_flush_to_zero:
-       /* Return zero with the appropriate sign bit.  */
-       srli    xh, a7, 31
-       slli    xh, xh, 31
-       movi    xl, 0
-       leaf_return
-
-#endif /* L_divdf3 */
-
-#ifdef L_cmpdf2
-
-       /* Equal and Not Equal */
-
-       .align  4
-       .global __eqdf2
-       .global __nedf2
-       .set    __nedf2, __eqdf2
-       .type   __eqdf2, @function
-__eqdf2:
-       leaf_entry sp, 16
-       bne     xl, yl, 2f
-       bne     xh, yh, 4f
-
-       /* The values are equal but NaN != NaN.  Check the exponent.  */
-       movi    a6, 0x7ff00000
-       ball    xh, a6, 3f
-
-       /* Equal.  */
-       movi    a2, 0
-       leaf_return
-
-       /* Not equal.  */
-2:     movi    a2, 1
-       leaf_return
-
-       /* Check if the mantissas are nonzero.  */
-3:     slli    a7, xh, 12
-       or      a7, a7, xl
-       j       5f
-
-       /* Check if x and y are zero with different signs.  */
-4:     or      a7, xh, yh
-       slli    a7, a7, 1
-       or      a7, a7, xl      /* xl == yl here */
-
-       /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
-          or x when exponent(x) = 0x7ff and x == y.  */
-5:     movi    a2, 0
-       movi    a3, 1
-       movnez  a2, a3, a7      
-       leaf_return
-
-
-       /* Greater Than */
-
-       .align  4
-       .global __gtdf2
-       .type   __gtdf2, @function
-__gtdf2:
-       leaf_entry sp, 16
-       movi    a6, 0x7ff00000
-       ball    xh, a6, 2f
-1:     bnall   yh, a6, .Lle_cmp
-
-       /* Check if y is a NaN.  */
-       slli    a7, yh, 12
-       or      a7, a7, yl
-       beqz    a7, .Lle_cmp
-       movi    a2, 0
-       leaf_return
-
-       /* Check if x is a NaN.  */
-2:     slli    a7, xh, 12
-       or      a7, a7, xl
-       beqz    a7, 1b
-       movi    a2, 0
-       leaf_return
-
-
-       /* Less Than or Equal */
-
-       .align  4
-       .global __ledf2
-       .type   __ledf2, @function
-__ledf2:
-       leaf_entry sp, 16
-       movi    a6, 0x7ff00000
-       ball    xh, a6, 2f
-1:     bnall   yh, a6, .Lle_cmp
-
-       /* Check if y is a NaN.  */
-       slli    a7, yh, 12
-       or      a7, a7, yl
-       beqz    a7, .Lle_cmp
-       movi    a2, 1
-       leaf_return
-
-       /* Check if x is a NaN.  */
-2:     slli    a7, xh, 12
-       or      a7, a7, xl
-       beqz    a7, 1b
-       movi    a2, 1
-       leaf_return
-
-.Lle_cmp:
-       /* Check if x and y have different signs.  */
-       xor     a7, xh, yh
-       bltz    a7, .Lle_diff_signs
-
-       /* Check if x is negative.  */
-       bltz    xh, .Lle_xneg
-
-       /* Check if x <= y.  */
-       bltu    xh, yh, 4f
-       bne     xh, yh, 5f
-       bltu    yl, xl, 5f
-4:     movi    a2, 0
-       leaf_return
-
-.Lle_xneg:
-       /* Check if y <= x.  */
-       bltu    yh, xh, 4b
-       bne     yh, xh, 5f
-       bgeu    xl, yl, 4b
-5:     movi    a2, 1
-       leaf_return
-
-.Lle_diff_signs:
-       bltz    xh, 4b
-
-       /* Check if both x and y are zero.  */
-       or      a7, xh, yh
-       slli    a7, a7, 1
-       or      a7, a7, xl
-       or      a7, a7, yl
-       movi    a2, 1
-       movi    a3, 0
-       moveqz  a2, a3, a7
-       leaf_return
-
-
-       /* Greater Than or Equal */
-
-       .align  4
-       .global __gedf2
-       .type   __gedf2, @function
-__gedf2:
-       leaf_entry sp, 16
-       movi    a6, 0x7ff00000
-       ball    xh, a6, 2f
-1:     bnall   yh, a6, .Llt_cmp
-
-       /* Check if y is a NaN.  */
-       slli    a7, yh, 12
-       or      a7, a7, yl
-       beqz    a7, .Llt_cmp
-       movi    a2, -1
-       leaf_return
-
-       /* Check if x is a NaN.  */
-2:     slli    a7, xh, 12
-       or      a7, a7, xl
-       beqz    a7, 1b
-       movi    a2, -1
-       leaf_return
-
-
-       /* Less Than */
-
-       .align  4
-       .global __ltdf2
-       .type   __ltdf2, @function
-__ltdf2:
-       leaf_entry sp, 16
-       movi    a6, 0x7ff00000
-       ball    xh, a6, 2f
-1:     bnall   yh, a6, .Llt_cmp
-
-       /* Check if y is a NaN.  */
-       slli    a7, yh, 12
-       or      a7, a7, yl
-       beqz    a7, .Llt_cmp
-       movi    a2, 0
-       leaf_return
-
-       /* Check if x is a NaN.  */
-2:     slli    a7, xh, 12
-       or      a7, a7, xl
-       beqz    a7, 1b
-       movi    a2, 0
-       leaf_return
-
-.Llt_cmp:
-       /* Check if x and y have different signs.  */
-       xor     a7, xh, yh
-       bltz    a7, .Llt_diff_signs
-
-       /* Check if x is negative.  */
-       bltz    xh, .Llt_xneg
-
-       /* Check if x < y.  */
-       bltu    xh, yh, 4f
-       bne     xh, yh, 5f
-       bgeu    xl, yl, 5f
-4:     movi    a2, -1
-       leaf_return
-
-.Llt_xneg:
-       /* Check if y < x.  */
-       bltu    yh, xh, 4b
-       bne     yh, xh, 5f
-       bltu    yl, xl, 4b
-5:     movi    a2, 0
-       leaf_return
-
-.Llt_diff_signs:
-       bgez    xh, 5b
-
-       /* Check if both x and y are nonzero.  */
-       or      a7, xh, yh
-       slli    a7, a7, 1
-       or      a7, a7, xl
-       or      a7, a7, yl
-       movi    a2, 0
-       movi    a3, -1
-       movnez  a2, a3, a7
-       leaf_return
-
-
-       /* Unordered */
-
-       .align  4
-       .global __unorddf2
-       .type   __unorddf2, @function
-__unorddf2:
-       leaf_entry sp, 16
-       movi    a6, 0x7ff00000
-       ball    xh, a6, 3f
-1:     ball    yh, a6, 4f
-2:     movi    a2, 0
-       leaf_return
-
-3:     slli    a7, xh, 12
-       or      a7, a7, xl
-       beqz    a7, 1b
-       movi    a2, 1
-       leaf_return
-
-4:     slli    a7, yh, 12
-       or      a7, a7, yl
-       beqz    a7, 2b
-       movi    a2, 1
-       leaf_return
-
-#endif /* L_cmpdf2 */
-
-#ifdef L_fixdfsi
-
-       .align  4
-       .global __fixdfsi
-       .type   __fixdfsi, @function
-__fixdfsi:
-       leaf_entry sp, 16
-
-       /* Check for NaN and Infinity.  */
-       movi    a6, 0x7ff00000
-       ball    xh, a6, .Lfixdfsi_nan_or_inf
-
-       /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32.  */
-       extui   a4, xh, 20, 11
-       extui   a5, a6, 19, 10  /* 0x3fe */
-       sub     a4, a4, a5
-       bgei    a4, 32, .Lfixdfsi_maxint
-       blti    a4, 1, .Lfixdfsi_zero
-
-       /* Add explicit "1.0" and shift << 11.  */
-       or      a7, xh, a6
-       ssai    (32 - 11)
-       src     a5, a7, xl
-
-       /* Shift back to the right, based on the exponent.  */
-       ssl     a4              /* shift by 32 - a4 */
-       srl     a5, a5
-
-       /* Negate the result if sign != 0.  */
-       neg     a2, a5
-       movgez  a2, a5, a7
-       leaf_return
-
-.Lfixdfsi_nan_or_inf:
-       /* Handle Infinity and NaN.  */
-       slli    a4, xh, 12
-       or      a4, a4, xl
-       beqz    a4, .Lfixdfsi_maxint
-
-       /* Translate NaN to +maxint.  */
-       movi    xh, 0
-
-.Lfixdfsi_maxint:
-       slli    a4, a6, 11      /* 0x80000000 */
-       addi    a5, a4, -1      /* 0x7fffffff */
-       movgez  a4, a5, xh
-       mov     a2, a4
-       leaf_return
-
-.Lfixdfsi_zero:
-       movi    a2, 0
-       leaf_return
-
-#endif /* L_fixdfsi */
-
-#ifdef L_fixdfdi
-
-       .align  4
-       .global __fixdfdi
-       .type   __fixdfdi, @function
-__fixdfdi:
-       leaf_entry sp, 16
-
-       /* Check for NaN and Infinity.  */
-       movi    a6, 0x7ff00000
-       ball    xh, a6, .Lfixdfdi_nan_or_inf
-
-       /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64.  */
-       extui   a4, xh, 20, 11
-       extui   a5, a6, 19, 10  /* 0x3fe */
-       sub     a4, a4, a5
-       bgei    a4, 64, .Lfixdfdi_maxint
-       blti    a4, 1, .Lfixdfdi_zero
-
-       /* Add explicit "1.0" and shift << 11.  */
-       or      a7, xh, a6
-       ssai    (32 - 11)
-       src     xh, a7, xl
-       sll     xl, xl
-
-       /* Shift back to the right, based on the exponent.  */
-       ssl     a4              /* shift by 64 - a4 */
-       bgei    a4, 32, .Lfixdfdi_smallshift
-       srl     xl, xh
-       movi    xh, 0
-
-.Lfixdfdi_shifted:     
-       /* Negate the result if sign != 0.  */
-       bgez    a7, 1f
-       neg     xl, xl
-       neg     xh, xh
-       beqz    xl, 1f
-       addi    xh, xh, -1
-1:     leaf_return
-
-.Lfixdfdi_smallshift:
-       src     xl, xh, xl
-       srl     xh, xh
-       j       .Lfixdfdi_shifted
-
-.Lfixdfdi_nan_or_inf:
-       /* Handle Infinity and NaN.  */
-       slli    a4, xh, 12
-       or      a4, a4, xl
-       beqz    a4, .Lfixdfdi_maxint
-
-       /* Translate NaN to +maxint.  */
-       movi    xh, 0
-
-.Lfixdfdi_maxint:
-       slli    a7, a6, 11      /* 0x80000000 */
-       bgez    xh, 1f
-       mov     xh, a7
-       movi    xl, 0
-       leaf_return
-
-1:     addi    xh, a7, -1      /* 0x7fffffff */
-       movi    xl, -1
-       leaf_return
-
-.Lfixdfdi_zero:
-       movi    xh, 0
-       movi    xl, 0
-       leaf_return
-
-#endif /* L_fixdfdi */
-
-#ifdef L_fixunsdfsi
-
-       .align  4
-       .global __fixunsdfsi
-       .type   __fixunsdfsi, @function
-__fixunsdfsi:
-       leaf_entry sp, 16
-
-       /* Check for NaN and Infinity.  */
-       movi    a6, 0x7ff00000
-       ball    xh, a6, .Lfixunsdfsi_nan_or_inf
-
-       /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32.  */
-       extui   a4, xh, 20, 11
-       extui   a5, a6, 20, 10  /* 0x3ff */
-       sub     a4, a4, a5
-       bgei    a4, 32, .Lfixunsdfsi_maxint
-       bltz    a4, .Lfixunsdfsi_zero
-
-       /* Add explicit "1.0" and shift << 11.  */
-       or      a7, xh, a6
-       ssai    (32 - 11)
-       src     a5, a7, xl
-
-       /* Shift back to the right, based on the exponent.  */
-       addi    a4, a4, 1
-       beqi    a4, 32, .Lfixunsdfsi_bigexp
-       ssl     a4              /* shift by 32 - a4 */
-       srl     a5, a5
-
-       /* Negate the result if sign != 0.  */
-       neg     a2, a5
-       movgez  a2, a5, a7
-       leaf_return
-
-.Lfixunsdfsi_nan_or_inf:
-       /* Handle Infinity and NaN.  */
-       slli    a4, xh, 12
-       or      a4, a4, xl
-       beqz    a4, .Lfixunsdfsi_maxint
-
-       /* Translate NaN to 0xffffffff.  */
-       movi    a2, -1
-       leaf_return
-
-.Lfixunsdfsi_maxint:
-       slli    a4, a6, 11      /* 0x80000000 */
-       movi    a5, -1          /* 0xffffffff */
-       movgez  a4, a5, xh
-       mov     a2, a4
-       leaf_return
-
-.Lfixunsdfsi_zero:
-       movi    a2, 0
-       leaf_return
-
-.Lfixunsdfsi_bigexp:
-       /* Handle unsigned maximum exponent case.  */
-       bltz    xh, 1f
-       mov     a2, a5          /* no shift needed */
-       leaf_return
-
-       /* Return 0x80000000 if negative.  */
-1:     slli    a2, a6, 11
-       leaf_return
-
-#endif /* L_fixunsdfsi */
-
-#ifdef L_fixunsdfdi
-
-       .align  4
-       .global __fixunsdfdi
-       .type   __fixunsdfdi, @function
-__fixunsdfdi:
-       leaf_entry sp, 16
-
-       /* Check for NaN and Infinity.  */
-       movi    a6, 0x7ff00000
-       ball    xh, a6, .Lfixunsdfdi_nan_or_inf
-
-       /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64.  */
-       extui   a4, xh, 20, 11
-       extui   a5, a6, 20, 10  /* 0x3ff */
-       sub     a4, a4, a5
-       bgei    a4, 64, .Lfixunsdfdi_maxint
-       bltz    a4, .Lfixunsdfdi_zero
-
-       /* Add explicit "1.0" and shift << 11.  */
-       or      a7, xh, a6
-       ssai    (32 - 11)
-       src     xh, a7, xl
-       sll     xl, xl
-
-       /* Shift back to the right, based on the exponent.  */
-       addi    a4, a4, 1
-       beqi    a4, 64, .Lfixunsdfdi_bigexp
-       ssl     a4              /* shift by 64 - a4 */
-       bgei    a4, 32, .Lfixunsdfdi_smallshift
-       srl     xl, xh
-       movi    xh, 0
-
-.Lfixunsdfdi_shifted:
-       /* Negate the result if sign != 0.  */
-       bgez    a7, 1f
-       neg     xl, xl
-       neg     xh, xh
-       beqz    xl, 1f
-       addi    xh, xh, -1
-1:     leaf_return
-
-.Lfixunsdfdi_smallshift:
-       src     xl, xh, xl
-       srl     xh, xh
-       j       .Lfixunsdfdi_shifted
-
-.Lfixunsdfdi_nan_or_inf:
-       /* Handle Infinity and NaN.  */
-       slli    a4, xh, 12
-       or      a4, a4, xl
-       beqz    a4, .Lfixunsdfdi_maxint
-
-       /* Translate NaN to 0xffffffff.... */
-1:     movi    xh, -1
-       movi    xl, -1
-       leaf_return
-
-.Lfixunsdfdi_maxint:
-       bgez    xh, 1b
-2:     slli    xh, a6, 11      /* 0x80000000 */
-       movi    xl, 0
-       leaf_return
-
-.Lfixunsdfdi_zero:
-       movi    xh, 0
-       movi    xl, 0
-       leaf_return
-
-.Lfixunsdfdi_bigexp:
-       /* Handle unsigned maximum exponent case.  */
-       bltz    a7, 2b
-       leaf_return             /* no shift needed */
-
-#endif /* L_fixunsdfdi */
-
-#ifdef L_floatsidf
-
-       .align  4
-       .global __floatunsidf
-       .type   __floatunsidf, @function
-__floatunsidf:
-       leaf_entry sp, 16
-       beqz    a2, .Lfloatsidf_return_zero
-
-       /* Set the sign to zero and jump to the floatsidf code.  */
-       movi    a7, 0
-       j       .Lfloatsidf_normalize
-
-       .align  4
-       .global __floatsidf
-       .type   __floatsidf, @function
-__floatsidf:
-       leaf_entry sp, 16
-
-       /* Check for zero.  */
-       beqz    a2, .Lfloatsidf_return_zero
-
-       /* Save the sign.  */
-       extui   a7, a2, 31, 1
-
-       /* Get the absolute value.  */
-#if XCHAL_HAVE_ABS
-       abs     a2, a2
-#else
-       neg     a4, a2
-       movltz  a2, a4, a2
-#endif
-
-.Lfloatsidf_normalize:
-       /* Normalize with the first 1 bit in the msb.  */
-       do_nsau a4, a2, a5, a6
-       ssl     a4
-       sll     a5, a2
-
-       /* Shift the mantissa into position.  */
-       srli    xh, a5, 11
-       slli    xl, a5, (32 - 11)
-
-       /* Set the exponent.  */
-       movi    a5, 0x41d       /* 0x3fe + 31 */
-       sub     a5, a5, a4
-       slli    a5, a5, 20
-       add     xh, xh, a5
-
-       /* Add the sign and return. */
-       slli    a7, a7, 31
-       or      xh, xh, a7
-       leaf_return
-
-.Lfloatsidf_return_zero:
-       movi    a3, 0
-       leaf_return
-
-#endif /* L_floatsidf */
-
-#ifdef L_floatdidf
-
-       .align  4
-       .global __floatundidf
-       .type   __floatundidf, @function
-__floatundidf:
-       leaf_entry sp, 16
-
-       /* Check for zero.  */
-       or      a4, xh, xl
-       beqz    a4, 2f
-
-       /* Set the sign to zero and jump to the floatdidf code.  */
-       movi    a7, 0
-       j       .Lfloatdidf_normalize
-
-       .align  4
-       .global __floatdidf
-       .type   __floatdidf, @function
-__floatdidf:
-       leaf_entry sp, 16
-
-       /* Check for zero.  */
-       or      a4, xh, xl
-       beqz    a4, 2f
-
-       /* Save the sign.  */
-       extui   a7, xh, 31, 1
-
-       /* Get the absolute value.  */
-       bgez    xh, .Lfloatdidf_normalize
-       neg     xl, xl
-       neg     xh, xh
-       beqz    xl, .Lfloatdidf_normalize
-       addi    xh, xh, -1
-
-.Lfloatdidf_normalize:
-       /* Normalize with the first 1 bit in the msb of xh.  */
-       beqz    xh, .Lfloatdidf_bigshift
-       do_nsau a4, xh, a5, a6
-       ssl     a4
-       src     xh, xh, xl
-       sll     xl, xl
-
-.Lfloatdidf_shifted:
-       /* Shift the mantissa into position, with rounding bits in a6.  */
-       ssai    11
-       sll     a6, xl
-       src     xl, xh, xl
-       srl     xh, xh
-
-       /* Set the exponent.  */
-       movi    a5, 0x43d       /* 0x3fe + 63 */
-       sub     a5, a5, a4
-       slli    a5, a5, 20
-       add     xh, xh, a5
-
-       /* Add the sign.  */
-       slli    a7, a7, 31
-       or      xh, xh, a7
-
-       /* Round up if the leftover fraction is >= 1/2.  */
-       bgez    a6, 2f
-       addi    xl, xl, 1
-       beqz    xl, .Lfloatdidf_roundcarry
-
-       /* Check if the leftover fraction is exactly 1/2.  */
-       slli    a6, a6, 1
-       beqz    a6, .Lfloatdidf_exactlyhalf
-2:     leaf_return
-
-.Lfloatdidf_bigshift:
-       /* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
-       do_nsau a4, xl, a5, a6
-       ssl     a4
-       sll     xh, xl
-       movi    xl, 0
-       addi    a4, a4, 32
-       j       .Lfloatdidf_shifted
-
-.Lfloatdidf_exactlyhalf:
-       /* Round down to the nearest even value.  */
-       srli    xl, xl, 1
-       slli    xl, xl, 1
-       leaf_return
-
-.Lfloatdidf_roundcarry:
-       /* xl is always zero when the rounding increment overflows, so
-          there's no need to round it to an even value.  */
-       addi    xh, xh, 1
-       /* Overflow to the exponent is OK.  */
-       leaf_return
-
-#endif /* L_floatdidf */
-
-#ifdef L_truncdfsf2
-
-       .align  4
-       .global __truncdfsf2
-       .type   __truncdfsf2, @function
-__truncdfsf2:
-       leaf_entry sp, 16
-
-       /* Adjust the exponent bias.  */
-       movi    a4, (0x3ff - 0x7f) << 20
-       sub     a5, xh, a4
-
-       /* Check for underflow.  */
-       xor     a6, xh, a5
-       bltz    a6, .Ltrunc_underflow
-       extui   a6, a5, 20, 11
-       beqz    a6, .Ltrunc_underflow
-
-       /* Check for overflow.  */
-       movi    a4, 255
-       bge     a6, a4, .Ltrunc_overflow
-
-       /* Shift a5/xl << 3 into a5/a4.  */
-       ssai    (32 - 3)
-       src     a5, a5, xl
-       sll     a4, xl
-
-.Ltrunc_addsign:
-       /* Add the sign bit.  */
-       extui   a6, xh, 31, 1
-       slli    a6, a6, 31
-       or      a2, a6, a5
-
-       /* Round up if the leftover fraction is >= 1/2.  */
-       bgez    a4, 1f
-       addi    a2, a2, 1
-       /* Overflow to the exponent is OK.  The answer will be correct.  */
-
-       /* Check if the leftover fraction is exactly 1/2.  */
-       slli    a4, a4, 1
-       beqz    a4, .Ltrunc_exactlyhalf
-1:     leaf_return
-
-.Ltrunc_exactlyhalf:
-       /* Round down to the nearest even value.  */
-       srli    a2, a2, 1
-       slli    a2, a2, 1
-       leaf_return
-
-.Ltrunc_overflow:
-       /* Check if exponent == 0x7ff.  */
-       movi    a4, 0x7ff00000
-       bnall   xh, a4, 1f
-
-       /* Check if mantissa is nonzero.  */
-       slli    a5, xh, 12
-       or      a5, a5, xl
-       beqz    a5, 1f
-
-       /* Shift a4 to set a bit in the mantissa, making a quiet NaN.  */
-       srli    a4, a4, 1
-
-1:     slli    a4, a4, 4       /* 0xff000000 or 0xff800000 */
-       /* Add the sign bit.  */
-       extui   a6, xh, 31, 1
-       ssai    1
-       src     a2, a6, a4
-       leaf_return
-
-.Ltrunc_underflow:
-       /* Find shift count for a subnormal.  Flush to zero if >= 32.  */
-       extui   a6, xh, 20, 11
-       movi    a5, 0x3ff - 0x7f
-       sub     a6, a5, a6
-       addi    a6, a6, 1
-       bgeui   a6, 32, 1f
-
-       /* Replace the exponent with an explicit "1.0".  */
-       slli    a5, a5, 13      /* 0x700000 */
-       or      a5, a5, xh
-       slli    a5, a5, 11
-       srli    a5, a5, 11
-
-       /* Shift the mantissa left by 3 bits (into a5/a4).  */
-       ssai    (32 - 3)
-       src     a5, a5, xl
-       sll     a4, xl
-
-       /* Shift right by a6.  */
-       ssr     a6
-       sll     a7, a4
-       src     a4, a5, a4
-       srl     a5, a5
-       beqz    a7, .Ltrunc_addsign
-       or      a4, a4, a6      /* any positive, nonzero value will work */
-       j       .Ltrunc_addsign
-
-       /* Return +/- zero.  */
-1:     extui   a2, xh, 31, 1
-       slli    a2, a2, 31
-       leaf_return
-
-#endif /* L_truncdfsf2 */
-
-#ifdef L_extendsfdf2
-
-       .align  4
-       .global __extendsfdf2
-       .type   __extendsfdf2, @function
-__extendsfdf2:
-       leaf_entry sp, 16
-
-       /* Save the sign bit and then shift it off.  */
-       extui   a5, a2, 31, 1
-       slli    a5, a5, 31
-       slli    a4, a2, 1
-
-       /* Extract and check the exponent.  */
-       extui   a6, a2, 23, 8
-       beqz    a6, .Lextend_expzero
-       addi    a6, a6, 1
-       beqi    a6, 256, .Lextend_nan_or_inf
-
-       /* Shift >> 3 into a4/xl.  */
-       srli    a4, a4, 4
-       slli    xl, a2, (32 - 3)
-
-       /* Adjust the exponent bias.  */
-       movi    a6, (0x3ff - 0x7f) << 20
-       add     a4, a4, a6
-
-       /* Add the sign bit.  */
-       or      xh, a4, a5
-       leaf_return
-
-.Lextend_nan_or_inf:
-       movi    a4, 0x7ff00000
-
-       /* Check for NaN.  */
-       slli    a7, a2, 9
-       beqz    a7, 1f
-
-       slli    a6, a6, 11      /* 0x80000 */
-       or      a4, a4, a6
-
-       /* Add the sign and return.  */
-1:     or      xh, a4, a5
-       movi    xl, 0
-       leaf_return
-
-.Lextend_expzero:
-       beqz    a4, 1b
-
-       /* Normalize it to have 8 zero bits before the first 1 bit.  */
-       do_nsau a7, a4, a2, a3
-       addi    a7, a7, -8
-       ssl     a7
-       sll     a4, a4
-       
-       /* Shift >> 3 into a4/xl.  */
-       slli    xl, a4, (32 - 3)
-       srli    a4, a4, 3
-
-       /* Set the exponent.  */
-       movi    a6, 0x3fe - 0x7f
-       sub     a6, a6, a7
-       slli    a6, a6, 20
-       add     a4, a4, a6
-
-       /* Add the sign and return.  */
-       or      xh, a4, a5
-       leaf_return
-
-#endif /* L_extendsfdf2 */
-
-
diff --git a/gcc/config/xtensa/ieee754-sf.S b/gcc/config/xtensa/ieee754-sf.S

deleted file mode 100644 (file)

index d75be0e..0000000
--- a/gcc/config/xtensa/ieee754-sf.S
+++ /dev/null
@@ -1,1757 +0,0 @@
-/* IEEE-754 single-precision functions for Xtensa
-   Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
-   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifdef __XTENSA_EB__
-#define xh a2
-#define xl a3
-#define yh a4
-#define yl a5
-#else
-#define xh a3
-#define xl a2
-#define yh a5
-#define yl a4
-#endif
-
-/*  Warning!  The branch displacements for some Xtensa branch instructions
-    are quite small, and this code has been carefully laid out to keep
-    branch targets in range.  If you change anything, be sure to check that
-    the assembler is not relaxing anything to branch over a jump.  */
-
-#ifdef L_negsf2
-
-       .align  4
-       .global __negsf2
-       .type   __negsf2, @function
-__negsf2:
-       leaf_entry sp, 16
-       movi    a4, 0x80000000
-       xor     a2, a2, a4
-       leaf_return
-
-#endif /* L_negsf2 */
-
-#ifdef L_addsubsf3
-
-       /* Addition */
-__addsf3_aux:
-
-       /* Handle NaNs and Infinities.  (This code is placed before the
-          start of the function just to keep it in range of the limited
-          branch displacements.)  */
-
-.Ladd_xnan_or_inf:
-       /* If y is neither Infinity nor NaN, return x.  */
-       bnall   a3, a6, 1f
-       /* If x is a NaN, return it.  Otherwise, return y.  */
-       slli    a7, a2, 9
-       beqz    a7, .Ladd_ynan_or_inf
-1:     leaf_return
-
-.Ladd_ynan_or_inf:
-       /* Return y.  */
-       mov     a2, a3
-       leaf_return
-
-.Ladd_opposite_signs:
-       /* Operand signs differ.  Do a subtraction.  */
-       slli    a7, a6, 8
-       xor     a3, a3, a7
-       j       .Lsub_same_sign
-
-       .align  4
-       .global __addsf3
-       .type   __addsf3, @function
-__addsf3:
-       leaf_entry sp, 16
-       movi    a6, 0x7f800000
-
-       /* Check if the two operands have the same sign.  */
-       xor     a7, a2, a3
-       bltz    a7, .Ladd_opposite_signs
-
-.Ladd_same_sign:       
-       /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
-       ball    a2, a6, .Ladd_xnan_or_inf
-       ball    a3, a6, .Ladd_ynan_or_inf
-
-       /* Compare the exponents.  The smaller operand will be shifted
-          right by the exponent difference and added to the larger
-          one.  */
-       extui   a7, a2, 23, 9
-       extui   a8, a3, 23, 9
-       bltu    a7, a8, .Ladd_shiftx
-
-.Ladd_shifty:
-       /* Check if the smaller (or equal) exponent is zero.  */
-       bnone   a3, a6, .Ladd_yexpzero
-
-       /* Replace y sign/exponent with 0x008.  */
-       or      a3, a3, a6
-       slli    a3, a3, 8
-       srli    a3, a3, 8
-
-.Ladd_yexpdiff:
-       /* Compute the exponent difference.  */
-       sub     a10, a7, a8
-
-       /* Exponent difference > 32 -- just return the bigger value.  */
-       bgeui   a10, 32, 1f
-       
-       /* Shift y right by the exponent difference.  Any bits that are
-          shifted out of y are saved in a9 for rounding the result.  */
-       ssr     a10
-       movi    a9, 0
-       src     a9, a3, a9
-       srl     a3, a3
-
-       /* Do the addition.  */
-       add     a2, a2, a3
-
-       /* Check if the add overflowed into the exponent.  */
-       extui   a10, a2, 23, 9
-       beq     a10, a7, .Ladd_round
-       mov     a8, a7
-       j       .Ladd_carry
-
-.Ladd_yexpzero:
-       /* y is a subnormal value.  Replace its sign/exponent with zero,
-          i.e., no implicit "1.0", and increment the apparent exponent
-          because subnormals behave as if they had the minimum (nonzero)
-          exponent.  Test for the case when both exponents are zero.  */
-       slli    a3, a3, 9
-       srli    a3, a3, 9
-       bnone   a2, a6, .Ladd_bothexpzero
-       addi    a8, a8, 1
-       j       .Ladd_yexpdiff
-
-.Ladd_bothexpzero:
-       /* Both exponents are zero.  Handle this as a special case.  There
-          is no need to shift or round, and the normal code for handling
-          a carry into the exponent field will not work because it
-          assumes there is an implicit "1.0" that needs to be added.  */
-       add     a2, a2, a3
-1:     leaf_return
-
-.Ladd_xexpzero:
-       /* Same as "yexpzero" except skip handling the case when both
-          exponents are zero.  */
-       slli    a2, a2, 9
-       srli    a2, a2, 9
-       addi    a7, a7, 1
-       j       .Ladd_xexpdiff
-
-.Ladd_shiftx:
-       /* Same thing as the "shifty" code, but with x and y swapped.  Also,
-          because the exponent difference is always nonzero in this version,
-          the shift sequence can use SLL and skip loading a constant zero.  */
-       bnone   a2, a6, .Ladd_xexpzero
-
-       or      a2, a2, a6
-       slli    a2, a2, 8
-       srli    a2, a2, 8
-
-.Ladd_xexpdiff:
-       sub     a10, a8, a7
-       bgeui   a10, 32, .Ladd_returny
-       
-       ssr     a10
-       sll     a9, a2
-       srl     a2, a2
-
-       add     a2, a2, a3
-
-       /* Check if the add overflowed into the exponent.  */
-       extui   a10, a2, 23, 9
-       bne     a10, a8, .Ladd_carry
-
-.Ladd_round:
-       /* Round up if the leftover fraction is >= 1/2.  */
-       bgez    a9, 1f
-       addi    a2, a2, 1
-
-       /* Check if the leftover fraction is exactly 1/2.  */
-       slli    a9, a9, 1
-       beqz    a9, .Ladd_exactlyhalf
-1:     leaf_return
-
-.Ladd_returny:
-       mov     a2, a3
-       leaf_return
-
-.Ladd_carry:   
-       /* The addition has overflowed into the exponent field, so the
-          value needs to be renormalized.  The mantissa of the result
-          can be recovered by subtracting the original exponent and
-          adding 0x800000 (which is the explicit "1.0" for the
-          mantissa of the non-shifted operand -- the "1.0" for the
-          shifted operand was already added).  The mantissa can then
-          be shifted right by one bit.  The explicit "1.0" of the
-          shifted mantissa then needs to be replaced by the exponent,
-          incremented by one to account for the normalizing shift.
-          It is faster to combine these operations: do the shift first
-          and combine the additions and subtractions.  If x is the
-          original exponent, the result is:
-              shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
-          or:
-              shifted mantissa + ((x + 1) << 22)
-          Note that the exponent is incremented here by leaving the
-          explicit "1.0" of the mantissa in the exponent field.  */
-
-       /* Shift x right by one bit.  Save the lsb.  */
-       mov     a10, a2
-       srli    a2, a2, 1
-
-       /* See explanation above.  The original exponent is in a8.  */
-       addi    a8, a8, 1
-       slli    a8, a8, 22
-       add     a2, a2, a8
-
-       /* Return an Infinity if the exponent overflowed.  */
-       ball    a2, a6, .Ladd_infinity
-       
-       /* Same thing as the "round" code except the msb of the leftover
-          fraction is bit 0 of a10, with the rest of the fraction in a9.  */
-       bbci.l  a10, 0, 1f
-       addi    a2, a2, 1
-       beqz    a9, .Ladd_exactlyhalf
-1:     leaf_return
-
-.Ladd_infinity:
-       /* Clear the mantissa.  */
-       srli    a2, a2, 23
-       slli    a2, a2, 23
-
-       /* The sign bit may have been lost in a carry-out.  Put it back.  */
-       slli    a8, a8, 1
-       or      a2, a2, a8
-       leaf_return
-
-.Ladd_exactlyhalf:
-       /* Round down to the nearest even value.  */
-       srli    a2, a2, 1
-       slli    a2, a2, 1
-       leaf_return
-
-
-       /* Subtraction */
-__subsf3_aux:
-       
-       /* Handle NaNs and Infinities.  (This code is placed before the
-          start of the function just to keep it in range of the limited
-          branch displacements.)  */
-
-.Lsub_xnan_or_inf:
-       /* If y is neither Infinity nor NaN, return x.  */
-       bnall   a3, a6, 1f
-       /* Both x and y are either NaN or Inf, so the result is NaN.  */
-       movi    a4, 0x400000    /* make it a quiet NaN */
-       or      a2, a2, a4
-1:     leaf_return
-
-.Lsub_ynan_or_inf:
-       /* Negate y and return it.  */
-       slli    a7, a6, 8
-       xor     a2, a3, a7
-       leaf_return
-
-.Lsub_opposite_signs:
-       /* Operand signs differ.  Do an addition.  */
-       slli    a7, a6, 8
-       xor     a3, a3, a7
-       j       .Ladd_same_sign
-
-       .align  4
-       .global __subsf3
-       .type   __subsf3, @function
-__subsf3:
-       leaf_entry sp, 16
-       movi    a6, 0x7f800000
-
-       /* Check if the two operands have the same sign.  */
-       xor     a7, a2, a3
-       bltz    a7, .Lsub_opposite_signs
-
-.Lsub_same_sign:       
-       /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
-       ball    a2, a6, .Lsub_xnan_or_inf
-       ball    a3, a6, .Lsub_ynan_or_inf
-
-       /* Compare the operands.  In contrast to addition, the entire
-          value matters here.  */
-       extui   a7, a2, 23, 8
-       extui   a8, a3, 23, 8
-       bltu    a2, a3, .Lsub_xsmaller
-
-.Lsub_ysmaller:
-       /* Check if the smaller (or equal) exponent is zero.  */
-       bnone   a3, a6, .Lsub_yexpzero
-
-       /* Replace y sign/exponent with 0x008.  */
-       or      a3, a3, a6
-       slli    a3, a3, 8
-       srli    a3, a3, 8
-
-.Lsub_yexpdiff:
-       /* Compute the exponent difference.  */
-       sub     a10, a7, a8
-
-       /* Exponent difference > 32 -- just return the bigger value.  */
-       bgeui   a10, 32, 1f
-       
-       /* Shift y right by the exponent difference.  Any bits that are
-          shifted out of y are saved in a9 for rounding the result.  */
-       ssr     a10
-       movi    a9, 0
-       src     a9, a3, a9
-       srl     a3, a3
-
-       sub     a2, a2, a3
-
-       /* Subtract the leftover bits in a9 from zero and propagate any
-          borrow from a2.  */
-       neg     a9, a9
-       addi    a10, a2, -1
-       movnez  a2, a10, a9
-
-       /* Check if the subtract underflowed into the exponent.  */
-       extui   a10, a2, 23, 8
-       beq     a10, a7, .Lsub_round
-       j       .Lsub_borrow
-
-.Lsub_yexpzero:
-       /* Return zero if the inputs are equal.  (For the non-subnormal
-          case, subtracting the "1.0" will cause a borrow from the exponent
-          and this case can be detected when handling the borrow.)  */
-       beq     a2, a3, .Lsub_return_zero
-
-       /* y is a subnormal value.  Replace its sign/exponent with zero,
-          i.e., no implicit "1.0".  Unless x is also a subnormal, increment
-          y's apparent exponent because subnormals behave as if they had
-          the minimum (nonzero) exponent.  */
-       slli    a3, a3, 9
-       srli    a3, a3, 9
-       bnone   a2, a6, .Lsub_yexpdiff
-       addi    a8, a8, 1
-       j       .Lsub_yexpdiff
-
-.Lsub_returny:
-       /* Negate and return y.  */
-       slli    a7, a6, 8
-       xor     a2, a3, a7
-1:     leaf_return
-
-.Lsub_xsmaller:
-       /* Same thing as the "ysmaller" code, but with x and y swapped and
-          with y negated.  */
-       bnone   a2, a6, .Lsub_xexpzero
-
-       or      a2, a2, a6
-       slli    a2, a2, 8
-       srli    a2, a2, 8
-
-.Lsub_xexpdiff:
-       sub     a10, a8, a7
-       bgeui   a10, 32, .Lsub_returny
-       
-       ssr     a10
-       movi    a9, 0
-       src     a9, a2, a9
-       srl     a2, a2
-
-       /* Negate y.  */
-       slli    a11, a6, 8
-       xor     a3, a3, a11
-
-       sub     a2, a3, a2
-
-       neg     a9, a9
-       addi    a10, a2, -1
-       movnez  a2, a10, a9
-
-       /* Check if the subtract underflowed into the exponent.  */
-       extui   a10, a2, 23, 8
-       bne     a10, a8, .Lsub_borrow
-
-.Lsub_round:
-       /* Round up if the leftover fraction is >= 1/2.  */
-       bgez    a9, 1f
-       addi    a2, a2, 1
-
-       /* Check if the leftover fraction is exactly 1/2.  */
-       slli    a9, a9, 1
-       beqz    a9, .Lsub_exactlyhalf
-1:     leaf_return
-
-.Lsub_xexpzero:
-       /* Same as "yexpzero".  */
-       beq     a2, a3, .Lsub_return_zero
-       slli    a2, a2, 9
-       srli    a2, a2, 9
-       bnone   a3, a6, .Lsub_xexpdiff
-       addi    a7, a7, 1
-       j       .Lsub_xexpdiff
-
-.Lsub_return_zero:
-       movi    a2, 0
-       leaf_return
-
-.Lsub_borrow:  
-       /* The subtraction has underflowed into the exponent field, so the
-          value needs to be renormalized.  Shift the mantissa left as
-          needed to remove any leading zeros and adjust the exponent
-          accordingly.  If the exponent is not large enough to remove
-          all the leading zeros, the result will be a subnormal value.  */
-
-       slli    a8, a2, 9
-       beqz    a8, .Lsub_xzero
-       do_nsau a6, a8, a7, a11
-       srli    a8, a8, 9
-       bge     a6, a10, .Lsub_subnormal
-       addi    a6, a6, 1
-
-.Lsub_normalize_shift:
-       /* Shift the mantissa (a8/a9) left by a6.  */
-       ssl     a6
-       src     a8, a8, a9
-       sll     a9, a9
-
-       /* Combine the shifted mantissa with the sign and exponent,
-          decrementing the exponent by a6.  (The exponent has already
-          been decremented by one due to the borrow from the subtraction,
-          but adding the mantissa will increment the exponent by one.)  */
-       srli    a2, a2, 23
-       sub     a2, a2, a6
-       slli    a2, a2, 23
-       add     a2, a2, a8
-       j       .Lsub_round
-
-.Lsub_exactlyhalf:
-       /* Round down to the nearest even value.  */
-       srli    a2, a2, 1
-       slli    a2, a2, 1
-       leaf_return
-
-.Lsub_xzero:
-       /* If there was a borrow from the exponent, and the mantissa and
-          guard digits are all zero, then the inputs were equal and the
-          result should be zero.  */
-       beqz    a9, .Lsub_return_zero
-
-       /* Only the guard digit is nonzero.  Shift by min(24, a10).  */
-       addi    a11, a10, -24
-       movi    a6, 24
-       movltz  a6, a10, a11
-       j       .Lsub_normalize_shift
-
-.Lsub_subnormal:
-       /* The exponent is too small to shift away all the leading zeros.
-          Set a6 to the current exponent (which has already been
-          decremented by the borrow) so that the exponent of the result
-          will be zero.  Do not add 1 to a6 in this case, because: (1)
-          adding the mantissa will not increment the exponent, so there is
-          no need to subtract anything extra from the exponent to
-          compensate, and (2) the effective exponent of a subnormal is 1
-          not 0 so the shift amount must be 1 smaller than normal. */
-       mov     a6, a10
-       j       .Lsub_normalize_shift
-
-#endif /* L_addsubsf3 */
-
-#ifdef L_mulsf3
-
-       /* Multiplication */
-#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
-#define XCHAL_NO_MUL 1
-#endif
-
-__mulsf3_aux:
-
-       /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
-          (This code is placed before the start of the function just to
-          keep it in range of the limited branch displacements.)  */
-
-.Lmul_xexpzero:
-       /* Clear the sign bit of x.  */
-       slli    a2, a2, 1
-       srli    a2, a2, 1
-
-       /* If x is zero, return zero.  */
-       beqz    a2, .Lmul_return_zero
-
-       /* Normalize x.  Adjust the exponent in a8.  */
-       do_nsau a10, a2, a11, a12
-       addi    a10, a10, -8
-       ssl     a10
-       sll     a2, a2 
-       movi    a8, 1
-       sub     a8, a8, a10
-       j       .Lmul_xnormalized       
-       
-.Lmul_yexpzero:
-       /* Clear the sign bit of y.  */
-       slli    a3, a3, 1
-       srli    a3, a3, 1
-
-       /* If y is zero, return zero.  */
-       beqz    a3, .Lmul_return_zero
-
-       /* Normalize y.  Adjust the exponent in a9.  */
-       do_nsau a10, a3, a11, a12
-       addi    a10, a10, -8
-       ssl     a10
-       sll     a3, a3
-       movi    a9, 1
-       sub     a9, a9, a10
-       j       .Lmul_ynormalized       
-
-.Lmul_return_zero:
-       /* Return zero with the appropriate sign bit.  */
-       srli    a2, a7, 31
-       slli    a2, a2, 31
-       j       .Lmul_done
-
-.Lmul_xnan_or_inf:
-       /* If y is zero, return NaN.  */
-       slli    a8, a3, 1
-       bnez    a8, 1f
-       movi    a4, 0x400000    /* make it a quiet NaN */
-       or      a2, a2, a4
-       j       .Lmul_done
-1:
-       /* If y is NaN, return y.  */
-       bnall   a3, a6, .Lmul_returnx
-       slli    a8, a3, 9
-       beqz    a8, .Lmul_returnx
-
-.Lmul_returny:
-       mov     a2, a3
-
-.Lmul_returnx:
-       /* Set the sign bit and return.  */
-       extui   a7, a7, 31, 1
-       slli    a2, a2, 1
-       ssai    1
-       src     a2, a7, a2
-       j       .Lmul_done
-
-.Lmul_ynan_or_inf:
-       /* If x is zero, return NaN.  */
-       slli    a8, a2, 1
-       bnez    a8, .Lmul_returny
-       movi    a7, 0x400000    /* make it a quiet NaN */
-       or      a2, a3, a7
-       j       .Lmul_done
-
-       .align  4
-       .global __mulsf3
-       .type   __mulsf3, @function
-__mulsf3:
-#if __XTENSA_CALL0_ABI__
-       leaf_entry sp, 32
-       addi    sp, sp, -32
-       s32i    a12, sp, 16
-       s32i    a13, sp, 20
-       s32i    a14, sp, 24
-       s32i    a15, sp, 28
-#elif XCHAL_NO_MUL
-       /* This is not really a leaf function; allocate enough stack space
-          to allow CALL12s to a helper function.  */
-       leaf_entry sp, 64
-#else
-       leaf_entry sp, 32
-#endif
-       movi    a6, 0x7f800000
-
-       /* Get the sign of the result.  */
-       xor     a7, a2, a3
-
-       /* Check for NaN and infinity.  */
-       ball    a2, a6, .Lmul_xnan_or_inf
-       ball    a3, a6, .Lmul_ynan_or_inf
-
-       /* Extract the exponents.  */
-       extui   a8, a2, 23, 8
-       extui   a9, a3, 23, 8
-
-       beqz    a8, .Lmul_xexpzero
-.Lmul_xnormalized:     
-       beqz    a9, .Lmul_yexpzero
-.Lmul_ynormalized:     
-
-       /* Add the exponents.  */
-       add     a8, a8, a9
-
-       /* Replace sign/exponent fields with explicit "1.0".  */
-       movi    a10, 0xffffff
-       or      a2, a2, a6
-       and     a2, a2, a10
-       or      a3, a3, a6
-       and     a3, a3, a10
-
-       /* Multiply 32x32 to 64 bits.  The result ends up in a2/a6.  */
-
-#if XCHAL_HAVE_MUL32_HIGH
-
-       mull    a6, a2, a3
-       muluh   a2, a2, a3
-
-#else
-
-       /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
-          products.  These partial products are:
-
-               0 xl * yl
-
-               1 xl * yh
-               2 xh * yl
-
-               3 xh * yh
-
-          If using the Mul16 or Mul32 multiplier options, these input
-          chunks must be stored in separate registers.  For Mac16, the
-          UMUL.AA.* opcodes can specify that the inputs come from either
-          half of the registers, so there is no need to shift them out
-          ahead of time.  If there is no multiply hardware, the 16-bit
-          chunks can be extracted when setting up the arguments to the
-          separate multiply function.  */
-
-#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
-       /* Calling a separate multiply function will clobber a0 and requires
-          use of a8 as a temporary, so save those values now.  (The function
-          uses a custom ABI so nothing else needs to be saved.)  */
-       s32i    a0, sp, 0
-       s32i    a8, sp, 4
-#endif
-
-#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
-
-#define a2h a4
-#define a3h a5
-
-       /* Get the high halves of the inputs into registers.  */
-       srli    a2h, a2, 16
-       srli    a3h, a3, 16
-
-#define a2l a2
-#define a3l a3
-
-#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
-       /* Clear the high halves of the inputs.  This does not matter
-          for MUL16 because the high bits are ignored.  */
-       extui   a2, a2, 0, 16
-       extui   a3, a3, 0, 16
-#endif
-#endif /* MUL16 || MUL32 */
-
-
-#if XCHAL_HAVE_MUL16
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-       mul16u  dst, xreg ## xhalf, yreg ## yhalf
-
-#elif XCHAL_HAVE_MUL32
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-       mull    dst, xreg ## xhalf, yreg ## yhalf
-
-#elif XCHAL_HAVE_MAC16
-
-/* The preprocessor insists on inserting a space when concatenating after
-   a period in the definition of do_mul below.  These macros are a workaround
-   using underscores instead of periods when doing the concatenation.  */
-#define umul_aa_ll umul.aa.ll
-#define umul_aa_lh umul.aa.lh
-#define umul_aa_hl umul.aa.hl
-#define umul_aa_hh umul.aa.hh
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-       umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
-       rsr     dst, ACCLO
-
-#else /* no multiply hardware */
-       
-#define set_arg_l(dst, src) \
-       extui   dst, src, 0, 16
-#define set_arg_h(dst, src) \
-       srli    dst, src, 16
-
-#if __XTENSA_CALL0_ABI__
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-       set_arg_ ## xhalf (a13, xreg); \
-       set_arg_ ## yhalf (a14, yreg); \
-       call0   .Lmul_mulsi3; \
-       mov     dst, a12
-#else
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-       set_arg_ ## xhalf (a14, xreg); \
-       set_arg_ ## yhalf (a15, yreg); \
-       call12  .Lmul_mulsi3; \
-       mov     dst, a14
-#endif /* __XTENSA_CALL0_ABI__ */
-
-#endif /* no multiply hardware */
-
-       /* Add pp1 and pp2 into a6 with carry-out in a9.  */
-       do_mul(a6, a2, l, a3, h)        /* pp 1 */
-       do_mul(a11, a2, h, a3, l)       /* pp 2 */
-       movi    a9, 0
-       add     a6, a6, a11
-       bgeu    a6, a11, 1f
-       addi    a9, a9, 1
-1:
-       /* Shift the high half of a9/a6 into position in a9.  Note that
-          this value can be safely incremented without any carry-outs.  */
-       ssai    16
-       src     a9, a9, a6
-
-       /* Compute the low word into a6.  */
-       do_mul(a11, a2, l, a3, l)       /* pp 0 */
-       sll     a6, a6
-       add     a6, a6, a11
-       bgeu    a6, a11, 1f
-       addi    a9, a9, 1
-1:
-       /* Compute the high word into a2.  */
-       do_mul(a2, a2, h, a3, h)        /* pp 3 */
-       add     a2, a2, a9
-       
-#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
-       /* Restore values saved on the stack during the multiplication.  */
-       l32i    a0, sp, 0
-       l32i    a8, sp, 4
-#endif
-#endif /* ! XCHAL_HAVE_MUL32_HIGH */
-
-       /* Shift left by 9 bits, unless there was a carry-out from the
-          multiply, in which case, shift by 8 bits and increment the
-          exponent.  */
-       movi    a4, 9
-       srli    a5, a2, 24 - 9
-       beqz    a5, 1f
-       addi    a4, a4, -1
-       addi    a8, a8, 1
-1:     ssl     a4
-       src     a2, a2, a6
-       sll     a6, a6
-
-       /* Subtract the extra bias from the exponent sum (plus one to account
-          for the explicit "1.0" of the mantissa that will be added to the
-          exponent in the final result).  */
-       movi    a4, 0x80
-       sub     a8, a8, a4
-       
-       /* Check for over/underflow.  The value in a8 is one less than the
-          final exponent, so values in the range 0..fd are OK here.  */
-       movi    a4, 0xfe
-       bgeu    a8, a4, .Lmul_overflow
-       
-.Lmul_round:
-       /* Round.  */
-       bgez    a6, .Lmul_rounded
-       addi    a2, a2, 1
-       slli    a6, a6, 1
-       beqz    a6, .Lmul_exactlyhalf
-
-.Lmul_rounded:
-       /* Add the exponent to the mantissa.  */
-       slli    a8, a8, 23
-       add     a2, a2, a8
-
-.Lmul_addsign:
-       /* Add the sign bit.  */
-       srli    a7, a7, 31
-       slli    a7, a7, 31
-       or      a2, a2, a7
-
-.Lmul_done:
-#if __XTENSA_CALL0_ABI__
-       l32i    a12, sp, 16
-       l32i    a13, sp, 20
-       l32i    a14, sp, 24
-       l32i    a15, sp, 28
-       addi    sp, sp, 32
-#endif
-       leaf_return
-
-.Lmul_exactlyhalf:
-       /* Round down to the nearest even value.  */
-       srli    a2, a2, 1
-       slli    a2, a2, 1
-       j       .Lmul_rounded
-
-.Lmul_overflow:
-       bltz    a8, .Lmul_underflow
-       /* Return +/- Infinity.  */
-       movi    a8, 0xff
-       slli    a2, a8, 23
-       j       .Lmul_addsign
-
-.Lmul_underflow:
-       /* Create a subnormal value, where the exponent field contains zero,
-          but the effective exponent is 1.  The value of a8 is one less than
-          the actual exponent, so just negate it to get the shift amount.  */
-       neg     a8, a8
-       mov     a9, a6
-       ssr     a8
-       bgeui   a8, 32, .Lmul_flush_to_zero
-       
-       /* Shift a2 right.  Any bits that are shifted out of a2 are saved
-          in a6 (combined with the shifted-out bits currently in a6) for
-          rounding the result.  */
-       sll     a6, a2
-       srl     a2, a2
-
-       /* Set the exponent to zero.  */
-       movi    a8, 0
-
-       /* Pack any nonzero bits shifted out into a6.  */
-       beqz    a9, .Lmul_round
-       movi    a9, 1
-       or      a6, a6, a9
-       j       .Lmul_round
-       
-.Lmul_flush_to_zero:
-       /* Return zero with the appropriate sign bit.  */
-       srli    a2, a7, 31
-       slli    a2, a2, 31
-       j       .Lmul_done
-
-#if XCHAL_NO_MUL
-       
-       /* For Xtensa processors with no multiply hardware, this simplified
-          version of _mulsi3 is used for multiplying 16-bit chunks of
-          the floating-point mantissas.  When using CALL0, this function
-          uses a custom ABI: the inputs are passed in a13 and a14, the
-          result is returned in a12, and a8 and a15 are clobbered.  */
-       .align  4
-.Lmul_mulsi3:
-       leaf_entry sp, 16
-       .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
-       movi    \dst, 0
-1:     add     \tmp1, \src2, \dst
-       extui   \tmp2, \src1, 0, 1
-       movnez  \dst, \tmp1, \tmp2
-
-       do_addx2 \tmp1, \src2, \dst, \tmp1
-       extui   \tmp2, \src1, 1, 1
-       movnez  \dst, \tmp1, \tmp2
-
-       do_addx4 \tmp1, \src2, \dst, \tmp1
-       extui   \tmp2, \src1, 2, 1
-       movnez  \dst, \tmp1, \tmp2
-
-       do_addx8 \tmp1, \src2, \dst, \tmp1
-       extui   \tmp2, \src1, 3, 1
-       movnez  \dst, \tmp1, \tmp2
-
-       srli    \src1, \src1, 4
-       slli    \src2, \src2, 4
-       bnez    \src1, 1b
-       .endm
-#if __XTENSA_CALL0_ABI__
-       mul_mulsi3_body a12, a13, a14, a15, a8
-#else
-       /* The result will be written into a2, so save that argument in a4.  */
-       mov     a4, a2
-       mul_mulsi3_body a2, a4, a3, a5, a6
-#endif
-       leaf_return
-#endif /* XCHAL_NO_MUL */
-#endif /* L_mulsf3 */
-
-#ifdef L_divsf3
-
-       /* Division */
-__divsf3_aux:
-
-       /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
-          (This code is placed before the start of the function just to
-          keep it in range of the limited branch displacements.)  */
-
-.Ldiv_yexpzero:
-       /* Clear the sign bit of y.  */
-       slli    a3, a3, 1
-       srli    a3, a3, 1
-
-       /* Check for division by zero.  */
-       beqz    a3, .Ldiv_yzero
-
-       /* Normalize y.  Adjust the exponent in a9.  */
-       do_nsau a10, a3, a4, a5
-       addi    a10, a10, -8
-       ssl     a10
-       sll     a3, a3
-       movi    a9, 1
-       sub     a9, a9, a10
-       j       .Ldiv_ynormalized       
-
-.Ldiv_yzero:
-       /* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
-       slli    a4, a2, 1
-       srli    a4, a4, 1
-       srli    a2, a7, 31
-       slli    a2, a2, 31
-       or      a2, a2, a6
-       bnez    a4, 1f
-       movi    a4, 0x400000    /* make it a quiet NaN */
-       or      a2, a2, a4
-1:     leaf_return
-
-.Ldiv_xexpzero:
-       /* Clear the sign bit of x.  */
-       slli    a2, a2, 1
-       srli    a2, a2, 1
-
-       /* If x is zero, return zero.  */
-       beqz    a2, .Ldiv_return_zero
-
-       /* Normalize x.  Adjust the exponent in a8.  */
-       do_nsau a10, a2, a4, a5
-       addi    a10, a10, -8
-       ssl     a10
-       sll     a2, a2
-       movi    a8, 1
-       sub     a8, a8, a10
-       j       .Ldiv_xnormalized       
-       
-.Ldiv_return_zero:
-       /* Return zero with the appropriate sign bit.  */
-       srli    a2, a7, 31
-       slli    a2, a2, 31
-       leaf_return
-
-.Ldiv_xnan_or_inf:
-       /* Set the sign bit of the result.  */
-       srli    a7, a3, 31
-       slli    a7, a7, 31
-       xor     a2, a2, a7
-       /* If y is NaN or Inf, return NaN.  */
-       bnall   a3, a6, 1f
-       movi    a4, 0x400000    /* make it a quiet NaN */
-       or      a2, a2, a4
-1:     leaf_return
-
-.Ldiv_ynan_or_inf:
-       /* If y is Infinity, return zero.  */
-       slli    a8, a3, 9
-       beqz    a8, .Ldiv_return_zero
-       /* y is NaN; return it.  */
-       mov     a2, a3
-       leaf_return
-
-       .align  4
-       .global __divsf3
-       .type   __divsf3, @function
-__divsf3:
-       leaf_entry sp, 16
-       movi    a6, 0x7f800000
-
-       /* Get the sign of the result.  */
-       xor     a7, a2, a3
-
-       /* Check for NaN and infinity.  */
-       ball    a2, a6, .Ldiv_xnan_or_inf
-       ball    a3, a6, .Ldiv_ynan_or_inf
-
-       /* Extract the exponents.  */
-       extui   a8, a2, 23, 8
-       extui   a9, a3, 23, 8
-
-       beqz    a9, .Ldiv_yexpzero
-.Ldiv_ynormalized:     
-       beqz    a8, .Ldiv_xexpzero
-.Ldiv_xnormalized:     
-
-       /* Subtract the exponents.  */
-       sub     a8, a8, a9
-
-       /* Replace sign/exponent fields with explicit "1.0".  */
-       movi    a10, 0xffffff
-       or      a2, a2, a6
-       and     a2, a2, a10
-       or      a3, a3, a6
-       and     a3, a3, a10
-
-       /* The first digit of the mantissa division must be a one.
-          Shift x (and adjust the exponent) as needed to make this true.  */
-       bltu    a3, a2, 1f
-       slli    a2, a2, 1
-       addi    a8, a8, -1
-1:
-       /* Do the first subtraction and shift.  */
-       sub     a2, a2, a3
-       slli    a2, a2, 1
-
-       /* Put the quotient into a10.  */
-       movi    a10, 1
-
-       /* Divide one bit at a time for 23 bits.  */
-       movi    a9, 23
-#if XCHAL_HAVE_LOOPS
-       loop    a9, .Ldiv_loopend
-#endif
-.Ldiv_loop:
-       /* Shift the quotient << 1.  */
-       slli    a10, a10, 1
-
-       /* Is this digit a 0 or 1?  */
-       bltu    a2, a3, 1f
-
-       /* Output a 1 and subtract.  */
-       addi    a10, a10, 1
-       sub     a2, a2, a3
-
-       /* Shift the dividend << 1.  */
-1:     slli    a2, a2, 1
-
-#if !XCHAL_HAVE_LOOPS
-       addi    a9, a9, -1
-       bnez    a9, .Ldiv_loop
-#endif
-.Ldiv_loopend:
-
-       /* Add the exponent bias (less one to account for the explicit "1.0"
-          of the mantissa that will be added to the exponent in the final
-          result).  */
-       addi    a8, a8, 0x7e
-       
-       /* Check for over/underflow.  The value in a8 is one less than the
-          final exponent, so values in the range 0..fd are OK here.  */
-       movi    a4, 0xfe
-       bgeu    a8, a4, .Ldiv_overflow
-       
-.Ldiv_round:
-       /* Round.  The remainder (<< 1) is in a2.  */
-       bltu    a2, a3, .Ldiv_rounded
-       addi    a10, a10, 1
-       beq     a2, a3, .Ldiv_exactlyhalf
-
-.Ldiv_rounded:
-       /* Add the exponent to the mantissa.  */
-       slli    a8, a8, 23
-       add     a2, a10, a8
-
-.Ldiv_addsign:
-       /* Add the sign bit.  */
-       srli    a7, a7, 31
-       slli    a7, a7, 31
-       or      a2, a2, a7
-       leaf_return
-
-.Ldiv_overflow:
-       bltz    a8, .Ldiv_underflow
-       /* Return +/- Infinity.  */
-       addi    a8, a4, 1       /* 0xff */
-       slli    a2, a8, 23
-       j       .Ldiv_addsign
-
-.Ldiv_exactlyhalf:
-       /* Remainder is exactly half the divisor.  Round even.  */
-       srli    a10, a10, 1
-       slli    a10, a10, 1
-       j       .Ldiv_rounded
-
-.Ldiv_underflow:
-       /* Create a subnormal value, where the exponent field contains zero,
-          but the effective exponent is 1.  The value of a8 is one less than
-          the actual exponent, so just negate it to get the shift amount.  */
-       neg     a8, a8
-       ssr     a8
-       bgeui   a8, 32, .Ldiv_flush_to_zero
-       
-       /* Shift a10 right.  Any bits that are shifted out of a10 are
-          saved in a6 for rounding the result.  */
-       sll     a6, a10
-       srl     a10, a10
-
-       /* Set the exponent to zero.  */
-       movi    a8, 0
-
-       /* Pack any nonzero remainder (in a2) into a6.  */
-       beqz    a2, 1f
-       movi    a9, 1
-       or      a6, a6, a9
-       
-       /* Round a10 based on the bits shifted out into a6.  */
-1:     bgez    a6, .Ldiv_rounded
-       addi    a10, a10, 1
-       slli    a6, a6, 1
-       bnez    a6, .Ldiv_rounded
-       srli    a10, a10, 1
-       slli    a10, a10, 1
-       j       .Ldiv_rounded
-
-.Ldiv_flush_to_zero:
-       /* Return zero with the appropriate sign bit.  */
-       srli    a2, a7, 31
-       slli    a2, a2, 31
-       leaf_return
-
-#endif /* L_divsf3 */
-
-#ifdef L_cmpsf2
-
-       /* Equal and Not Equal */
-
-       .align  4
-       .global __eqsf2
-       .global __nesf2
-       .set    __nesf2, __eqsf2
-       .type   __eqsf2, @function
-__eqsf2:
-       leaf_entry sp, 16
-       bne     a2, a3, 4f
-
-       /* The values are equal but NaN != NaN.  Check the exponent.  */
-       movi    a6, 0x7f800000
-       ball    a2, a6, 3f
-
-       /* Equal.  */
-       movi    a2, 0
-       leaf_return
-
-       /* Not equal.  */
-2:     movi    a2, 1
-       leaf_return
-
-       /* Check if the mantissas are nonzero.  */
-3:     slli    a7, a2, 9
-       j       5f
-
-       /* Check if x and y are zero with different signs.  */
-4:     or      a7, a2, a3
-       slli    a7, a7, 1
-
-       /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
-          or x when exponent(x) = 0x7f8 and x == y.  */
-5:     movi    a2, 0
-       movi    a3, 1
-       movnez  a2, a3, a7      
-       leaf_return
-
-
-       /* Greater Than */
-
-       .align  4
-       .global __gtsf2
-       .type   __gtsf2, @function
-__gtsf2:
-       leaf_entry sp, 16
-       movi    a6, 0x7f800000
-       ball    a2, a6, 2f
-1:     bnall   a3, a6, .Lle_cmp
-
-       /* Check if y is a NaN.  */
-       slli    a7, a3, 9
-       beqz    a7, .Lle_cmp
-       movi    a2, 0
-       leaf_return
-
-       /* Check if x is a NaN.  */
-2:     slli    a7, a2, 9
-       beqz    a7, 1b
-       movi    a2, 0
-       leaf_return
-
-
-       /* Less Than or Equal */
-
-       .align  4
-       .global __lesf2
-       .type   __lesf2, @function
-__lesf2:
-       leaf_entry sp, 16
-       movi    a6, 0x7f800000
-       ball    a2, a6, 2f
-1:     bnall   a3, a6, .Lle_cmp
-
-       /* Check if y is a NaN.  */
-       slli    a7, a3, 9
-       beqz    a7, .Lle_cmp
-       movi    a2, 1
-       leaf_return
-
-       /* Check if x is a NaN.  */
-2:     slli    a7, a2, 9
-       beqz    a7, 1b
-       movi    a2, 1
-       leaf_return
-
-.Lle_cmp:
-       /* Check if x and y have different signs.  */
-       xor     a7, a2, a3
-       bltz    a7, .Lle_diff_signs
-
-       /* Check if x is negative.  */
-       bltz    a2, .Lle_xneg
-
-       /* Check if x <= y.  */
-       bltu    a3, a2, 5f
-4:     movi    a2, 0
-       leaf_return
-
-.Lle_xneg:
-       /* Check if y <= x.  */
-       bgeu    a2, a3, 4b
-5:     movi    a2, 1
-       leaf_return
-
-.Lle_diff_signs:
-       bltz    a2, 4b
-
-       /* Check if both x and y are zero.  */
-       or      a7, a2, a3
-       slli    a7, a7, 1
-       movi    a2, 1
-       movi    a3, 0
-       moveqz  a2, a3, a7
-       leaf_return
-
-
-       /* Greater Than or Equal */
-
-       .align  4
-       .global __gesf2
-       .type   __gesf2, @function
-__gesf2:
-       leaf_entry sp, 16
-       movi    a6, 0x7f800000
-       ball    a2, a6, 2f
-1:     bnall   a3, a6, .Llt_cmp
-
-       /* Check if y is a NaN.  */
-       slli    a7, a3, 9
-       beqz    a7, .Llt_cmp
-       movi    a2, -1
-       leaf_return
-
-       /* Check if x is a NaN.  */
-2:     slli    a7, a2, 9
-       beqz    a7, 1b
-       movi    a2, -1
-       leaf_return
-
-
-       /* Less Than */
-
-       .align  4
-       .global __ltsf2
-       .type   __ltsf2, @function
-__ltsf2:
-       leaf_entry sp, 16
-       movi    a6, 0x7f800000
-       ball    a2, a6, 2f
-1:     bnall   a3, a6, .Llt_cmp
-
-       /* Check if y is a NaN.  */
-       slli    a7, a3, 9
-       beqz    a7, .Llt_cmp
-       movi    a2, 0
-       leaf_return
-
-       /* Check if x is a NaN.  */
-2:     slli    a7, a2, 9
-       beqz    a7, 1b
-       movi    a2, 0
-       leaf_return
-
-.Llt_cmp:
-       /* Check if x and y have different signs.  */
-       xor     a7, a2, a3
-       bltz    a7, .Llt_diff_signs
-
-       /* Check if x is negative.  */
-       bltz    a2, .Llt_xneg
-
-       /* Check if x < y.  */
-       bgeu    a2, a3, 5f
-4:     movi    a2, -1
-       leaf_return
-
-.Llt_xneg:
-       /* Check if y < x.  */
-       bltu    a3, a2, 4b
-5:     movi    a2, 0
-       leaf_return
-
-.Llt_diff_signs:
-       bgez    a2, 5b
-
-       /* Check if both x and y are nonzero.  */
-       or      a7, a2, a3
-       slli    a7, a7, 1
-       movi    a2, 0
-       movi    a3, -1
-       movnez  a2, a3, a7
-       leaf_return
-
-
-       /* Unordered */
-
-       .align  4
-       .global __unordsf2
-       .type   __unordsf2, @function
-__unordsf2:
-       leaf_entry sp, 16
-       movi    a6, 0x7f800000
-       ball    a2, a6, 3f
-1:     ball    a3, a6, 4f
-2:     movi    a2, 0
-       leaf_return
-
-3:     slli    a7, a2, 9
-       beqz    a7, 1b
-       movi    a2, 1
-       leaf_return
-
-4:     slli    a7, a3, 9
-       beqz    a7, 2b
-       movi    a2, 1
-       leaf_return
-
-#endif /* L_cmpsf2 */
-
-#ifdef L_fixsfsi
-
-       .align  4
-       .global __fixsfsi
-       .type   __fixsfsi, @function
-__fixsfsi:
-       leaf_entry sp, 16
-
-       /* Check for NaN and Infinity.  */
-       movi    a6, 0x7f800000
-       ball    a2, a6, .Lfixsfsi_nan_or_inf
-
-       /* Extract the exponent and check if 0 < (exp - 0x7e) < 32.  */
-       extui   a4, a2, 23, 8
-       addi    a4, a4, -0x7e
-       bgei    a4, 32, .Lfixsfsi_maxint
-       blti    a4, 1, .Lfixsfsi_zero
-
-       /* Add explicit "1.0" and shift << 8.  */
-       or      a7, a2, a6
-       slli    a5, a7, 8
-
-       /* Shift back to the right, based on the exponent.  */
-       ssl     a4              /* shift by 32 - a4 */
-       srl     a5, a5
-
-       /* Negate the result if sign != 0.  */
-       neg     a2, a5
-       movgez  a2, a5, a7
-       leaf_return
-
-.Lfixsfsi_nan_or_inf:
-       /* Handle Infinity and NaN.  */
-       slli    a4, a2, 9
-       beqz    a4, .Lfixsfsi_maxint
-
-       /* Translate NaN to +maxint.  */
-       movi    a2, 0
-
-.Lfixsfsi_maxint:
-       slli    a4, a6, 8       /* 0x80000000 */
-       addi    a5, a4, -1      /* 0x7fffffff */
-       movgez  a4, a5, a2
-       mov     a2, a4
-       leaf_return
-
-.Lfixsfsi_zero:
-       movi    a2, 0
-       leaf_return
-
-#endif /* L_fixsfsi */
-
-#ifdef L_fixsfdi
-
-       .align  4
-       .global __fixsfdi
-       .type   __fixsfdi, @function
-__fixsfdi:
-       leaf_entry sp, 16
-
-       /* Check for NaN and Infinity.  */
-       movi    a6, 0x7f800000
-       ball    a2, a6, .Lfixsfdi_nan_or_inf
-
-       /* Extract the exponent and check if 0 < (exp - 0x7e) < 64.  */
-       extui   a4, a2, 23, 8
-       addi    a4, a4, -0x7e
-       bgei    a4, 64, .Lfixsfdi_maxint
-       blti    a4, 1, .Lfixsfdi_zero
-
-       /* Add explicit "1.0" and shift << 8.  */
-       or      a7, a2, a6
-       slli    xh, a7, 8
-
-       /* Shift back to the right, based on the exponent.  */
-       ssl     a4              /* shift by 64 - a4 */
-       bgei    a4, 32, .Lfixsfdi_smallshift
-       srl     xl, xh
-       movi    xh, 0
-
-.Lfixsfdi_shifted:     
-       /* Negate the result if sign != 0.  */
-       bgez    a7, 1f
-       neg     xl, xl
-       neg     xh, xh
-       beqz    xl, 1f
-       addi    xh, xh, -1
-1:     leaf_return
-
-.Lfixsfdi_smallshift:
-       movi    xl, 0
-       sll     xl, xh
-       srl     xh, xh
-       j       .Lfixsfdi_shifted
-
-.Lfixsfdi_nan_or_inf:
-       /* Handle Infinity and NaN.  */
-       slli    a4, a2, 9
-       beqz    a4, .Lfixsfdi_maxint
-
-       /* Translate NaN to +maxint.  */
-       movi    a2, 0
-
-.Lfixsfdi_maxint:
-       slli    a7, a6, 8       /* 0x80000000 */
-       bgez    a2, 1f
-       mov     xh, a7
-       movi    xl, 0
-       leaf_return
-
-1:     addi    xh, a7, -1      /* 0x7fffffff */
-       movi    xl, -1
-       leaf_return
-
-.Lfixsfdi_zero:
-       movi    xh, 0
-       movi    xl, 0
-       leaf_return
-
-#endif /* L_fixsfdi */
-
-#ifdef L_fixunssfsi
-
-       .align  4
-       .global __fixunssfsi
-       .type   __fixunssfsi, @function
-__fixunssfsi:
-       leaf_entry sp, 16
-
-       /* Check for NaN and Infinity.  */
-       movi    a6, 0x7f800000
-       ball    a2, a6, .Lfixunssfsi_nan_or_inf
-
-       /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32.  */
-       extui   a4, a2, 23, 8
-       addi    a4, a4, -0x7f
-       bgei    a4, 32, .Lfixunssfsi_maxint
-       bltz    a4, .Lfixunssfsi_zero
-
-       /* Add explicit "1.0" and shift << 8.  */
-       or      a7, a2, a6
-       slli    a5, a7, 8
-
-       /* Shift back to the right, based on the exponent.  */
-       addi    a4, a4, 1
-       beqi    a4, 32, .Lfixunssfsi_bigexp
-       ssl     a4              /* shift by 32 - a4 */
-       srl     a5, a5
-
-       /* Negate the result if sign != 0.  */
-       neg     a2, a5
-       movgez  a2, a5, a7
-       leaf_return
-
-.Lfixunssfsi_nan_or_inf:
-       /* Handle Infinity and NaN.  */
-       slli    a4, a2, 9
-       beqz    a4, .Lfixunssfsi_maxint
-
-       /* Translate NaN to 0xffffffff.  */
-       movi    a2, -1
-       leaf_return
-
-.Lfixunssfsi_maxint:
-       slli    a4, a6, 8       /* 0x80000000 */
-       movi    a5, -1          /* 0xffffffff */
-       movgez  a4, a5, a2
-       mov     a2, a4
-       leaf_return
-
-.Lfixunssfsi_zero:
-       movi    a2, 0
-       leaf_return
-
-.Lfixunssfsi_bigexp:
-       /* Handle unsigned maximum exponent case.  */
-       bltz    a2, 1f
-       mov     a2, a5          /* no shift needed */
-       leaf_return
-
-       /* Return 0x80000000 if negative.  */
-1:     slli    a2, a6, 8
-       leaf_return
-
-#endif /* L_fixunssfsi */
-
-#ifdef L_fixunssfdi
-
-       .align  4
-       .global __fixunssfdi
-       .type   __fixunssfdi, @function
-__fixunssfdi:
-       leaf_entry sp, 16
-
-       /* Check for NaN and Infinity.  */
-       movi    a6, 0x7f800000
-       ball    a2, a6, .Lfixunssfdi_nan_or_inf
-
-       /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64.  */
-       extui   a4, a2, 23, 8
-       addi    a4, a4, -0x7f
-       bgei    a4, 64, .Lfixunssfdi_maxint
-       bltz    a4, .Lfixunssfdi_zero
-
-       /* Add explicit "1.0" and shift << 8.  */
-       or      a7, a2, a6
-       slli    xh, a7, 8
-
-       /* Shift back to the right, based on the exponent.  */
-       addi    a4, a4, 1
-       beqi    a4, 64, .Lfixunssfdi_bigexp
-       ssl     a4              /* shift by 64 - a4 */
-       bgei    a4, 32, .Lfixunssfdi_smallshift
-       srl     xl, xh
-       movi    xh, 0
-
-.Lfixunssfdi_shifted:
-       /* Negate the result if sign != 0.  */
-       bgez    a7, 1f
-       neg     xl, xl
-       neg     xh, xh
-       beqz    xl, 1f
-       addi    xh, xh, -1
-1:     leaf_return
-
-.Lfixunssfdi_smallshift:
-       movi    xl, 0
-       src     xl, xh, xl
-       srl     xh, xh
-       j       .Lfixunssfdi_shifted
-
-.Lfixunssfdi_nan_or_inf:
-       /* Handle Infinity and NaN.  */
-       slli    a4, a2, 9
-       beqz    a4, .Lfixunssfdi_maxint
-
-       /* Translate NaN to 0xffffffff.... */
-1:     movi    xh, -1
-       movi    xl, -1
-       leaf_return
-
-.Lfixunssfdi_maxint:
-       bgez    a2, 1b
-2:     slli    xh, a6, 8       /* 0x80000000 */
-       movi    xl, 0
-       leaf_return
-
-.Lfixunssfdi_zero:
-       movi    xh, 0
-       movi    xl, 0
-       leaf_return
-
-.Lfixunssfdi_bigexp:
-       /* Handle unsigned maximum exponent case.  */
-       bltz    a7, 2b
-       movi    xl, 0
-       leaf_return             /* no shift needed */
-
-#endif /* L_fixunssfdi */
-
-#ifdef L_floatsisf
-
-       .align  4
-       .global __floatunsisf
-       .type   __floatunsisf, @function
-__floatunsisf:
-       leaf_entry sp, 16
-       beqz    a2, .Lfloatsisf_return
-
-       /* Set the sign to zero and jump to the floatsisf code.  */
-       movi    a7, 0
-       j       .Lfloatsisf_normalize
-
-       .align  4
-       .global __floatsisf
-       .type   __floatsisf, @function
-__floatsisf:
-       leaf_entry sp, 16
-
-       /* Check for zero.  */
-       beqz    a2, .Lfloatsisf_return
-
-       /* Save the sign.  */
-       extui   a7, a2, 31, 1
-
-       /* Get the absolute value.  */
-#if XCHAL_HAVE_ABS
-       abs     a2, a2
-#else
-       neg     a4, a2
-       movltz  a2, a4, a2
-#endif
-
-.Lfloatsisf_normalize:
-       /* Normalize with the first 1 bit in the msb.  */
-       do_nsau a4, a2, a5, a6
-       ssl     a4
-       sll     a5, a2
-
-       /* Shift the mantissa into position, with rounding bits in a6.  */
-       srli    a2, a5, 8
-       slli    a6, a5, (32 - 8)
-
-       /* Set the exponent.  */
-       movi    a5, 0x9d        /* 0x7e + 31 */
-       sub     a5, a5, a4
-       slli    a5, a5, 23
-       add     a2, a2, a5
-
-       /* Add the sign.  */
-       slli    a7, a7, 31
-       or      a2, a2, a7
-
-       /* Round up if the leftover fraction is >= 1/2.  */
-       bgez    a6, .Lfloatsisf_return
-       addi    a2, a2, 1       /* Overflow to the exponent is OK.  */
-
-       /* Check if the leftover fraction is exactly 1/2.  */
-       slli    a6, a6, 1
-       beqz    a6, .Lfloatsisf_exactlyhalf
-
-.Lfloatsisf_return:
-       leaf_return
-
-.Lfloatsisf_exactlyhalf:
-       /* Round down to the nearest even value.  */
-       srli    a2, a2, 1
-       slli    a2, a2, 1
-       leaf_return
-
-#endif /* L_floatsisf */
-
-#ifdef L_floatdisf
-
-       .align  4
-       .global __floatundisf
-       .type   __floatundisf, @function
-__floatundisf:
-       leaf_entry sp, 16
-
-       /* Check for zero.  */
-       or      a4, xh, xl
-       beqz    a4, 2f
-
-       /* Set the sign to zero and jump to the floatdisf code.  */
-       movi    a7, 0
-       j       .Lfloatdisf_normalize
-
-       .align  4
-       .global __floatdisf
-       .type   __floatdisf, @function
-__floatdisf:
-       leaf_entry sp, 16
-
-       /* Check for zero.  */
-       or      a4, xh, xl
-       beqz    a4, 2f
-
-       /* Save the sign.  */
-       extui   a7, xh, 31, 1
-
-       /* Get the absolute value.  */
-       bgez    xh, .Lfloatdisf_normalize
-       neg     xl, xl
-       neg     xh, xh
-       beqz    xl, .Lfloatdisf_normalize
-       addi    xh, xh, -1
-
-.Lfloatdisf_normalize:
-       /* Normalize with the first 1 bit in the msb of xh.  */
-       beqz    xh, .Lfloatdisf_bigshift
-       do_nsau a4, xh, a5, a6
-       ssl     a4
-       src     xh, xh, xl
-       sll     xl, xl
-
-.Lfloatdisf_shifted:
-       /* Shift the mantissa into position, with rounding bits in a6.  */
-       ssai    8
-       sll     a5, xl
-       src     a6, xh, xl
-       srl     xh, xh
-       beqz    a5, 1f
-       movi    a5, 1
-       or      a6, a6, a5
-1:
-       /* Set the exponent.  */
-       movi    a5, 0xbd        /* 0x7e + 63 */
-       sub     a5, a5, a4
-       slli    a5, a5, 23
-       add     a2, xh, a5
-
-       /* Add the sign.  */
-       slli    a7, a7, 31
-       or      a2, a2, a7
-
-       /* Round up if the leftover fraction is >= 1/2.  */
-       bgez    a6, 2f
-       addi    a2, a2, 1       /* Overflow to the exponent is OK.  */
-
-       /* Check if the leftover fraction is exactly 1/2.  */
-       slli    a6, a6, 1
-       beqz    a6, .Lfloatdisf_exactlyhalf
-2:     leaf_return
-
-.Lfloatdisf_bigshift:
-       /* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
-       do_nsau a4, xl, a5, a6
-       ssl     a4
-       sll     xh, xl
-       movi    xl, 0
-       addi    a4, a4, 32
-       j       .Lfloatdisf_shifted
-
-.Lfloatdisf_exactlyhalf:
-       /* Round down to the nearest even value.  */
-       srli    a2, a2, 1
-       slli    a2, a2, 1
-       leaf_return
-
-#endif /* L_floatdisf */
diff --git a/gcc/config/xtensa/lib1funcs.asm b/gcc/config/xtensa/lib1funcs.asm

deleted file mode 100644 (file)

index 071b917..0000000
--- a/gcc/config/xtensa/lib1funcs.asm
+++ /dev/null
@@ -1,845 +0,0 @@
-/* Assembly functions for the Xtensa version of libgcc1.
-   Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009
-   Free Software Foundation, Inc.
-   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-#include "xtensa-config.h"
-
-/* Define macros for the ABS and ADDX* instructions to handle cases
-   where they are not included in the Xtensa processor configuration.  */
-
-       .macro  do_abs dst, src, tmp
-#if XCHAL_HAVE_ABS
-       abs     \dst, \src
-#else
-       neg     \tmp, \src
-       movgez  \tmp, \src, \src
-       mov     \dst, \tmp
-#endif
-       .endm
-
-       .macro  do_addx2 dst, as, at, tmp
-#if XCHAL_HAVE_ADDX
-       addx2   \dst, \as, \at
-#else
-       slli    \tmp, \as, 1
-       add     \dst, \tmp, \at
-#endif
-       .endm
-
-       .macro  do_addx4 dst, as, at, tmp
-#if XCHAL_HAVE_ADDX
-       addx4   \dst, \as, \at
-#else
-       slli    \tmp, \as, 2
-       add     \dst, \tmp, \at
-#endif
-       .endm
-
-       .macro  do_addx8 dst, as, at, tmp
-#if XCHAL_HAVE_ADDX
-       addx8   \dst, \as, \at
-#else
-       slli    \tmp, \as, 3
-       add     \dst, \tmp, \at
-#endif
-       .endm
-
-/* Define macros for leaf function entry and return, supporting either the
-   standard register windowed ABI or the non-windowed call0 ABI.  These
-   macros do not allocate any extra stack space, so they only work for
-   leaf functions that do not need to spill anything to the stack.  */
-
-       .macro leaf_entry reg, size
-#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
-       entry \reg, \size
-#else
-       /* do nothing */
-#endif
-       .endm
-
-       .macro leaf_return
-#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
-       retw
-#else
-       ret
-#endif
-       .endm
-
-
-#ifdef L_mulsi3
-       .align  4
-       .global __mulsi3
-       .type   __mulsi3, @function
-__mulsi3:
-       leaf_entry sp, 16
-
-#if XCHAL_HAVE_MUL32
-       mull    a2, a2, a3
-
-#elif XCHAL_HAVE_MUL16
-       or      a4, a2, a3
-       srai    a4, a4, 16
-       bnez    a4, .LMUL16
-       mul16u  a2, a2, a3
-       leaf_return
-.LMUL16:
-       srai    a4, a2, 16
-       srai    a5, a3, 16
-       mul16u  a7, a4, a3
-       mul16u  a6, a5, a2
-       mul16u  a4, a2, a3
-       add     a7, a7, a6
-       slli    a7, a7, 16
-       add     a2, a7, a4
-
-#elif XCHAL_HAVE_MAC16
-       mul.aa.hl a2, a3
-       mula.aa.lh a2, a3
-       rsr     a5, ACCLO
-       umul.aa.ll a2, a3
-       rsr     a4, ACCLO
-       slli    a5, a5, 16
-       add     a2, a4, a5
-
-#else /* !MUL32 && !MUL16 && !MAC16 */
-
-       /* Multiply one bit at a time, but unroll the loop 4x to better
-          exploit the addx instructions and avoid overhead.
-          Peel the first iteration to save a cycle on init.  */
-
-       /* Avoid negative numbers.  */
-       xor     a5, a2, a3      /* Top bit is 1 if one input is negative.  */
-       do_abs  a3, a3, a6
-       do_abs  a2, a2, a6
-
-       /* Swap so the second argument is smaller.  */
-       sub     a7, a2, a3
-       mov     a4, a3
-       movgez  a4, a2, a7      /* a4 = max (a2, a3) */
-       movltz  a3, a2, a7      /* a3 = min (a2, a3) */
-
-       movi    a2, 0
-       extui   a6, a3, 0, 1
-       movnez  a2, a4, a6
-
-       do_addx2 a7, a4, a2, a7
-       extui   a6, a3, 1, 1
-       movnez  a2, a7, a6
-
-       do_addx4 a7, a4, a2, a7
-       extui   a6, a3, 2, 1
-       movnez  a2, a7, a6
-
-       do_addx8 a7, a4, a2, a7
-       extui   a6, a3, 3, 1
-       movnez  a2, a7, a6
-
-       bgeui   a3, 16, .Lmult_main_loop
-       neg     a3, a2
-       movltz  a2, a3, a5
-       leaf_return
-
-       .align  4
-.Lmult_main_loop:
-       srli    a3, a3, 4
-       slli    a4, a4, 4
-
-       add     a7, a4, a2
-       extui   a6, a3, 0, 1
-       movnez  a2, a7, a6
-
-       do_addx2 a7, a4, a2, a7
-       extui   a6, a3, 1, 1
-       movnez  a2, a7, a6
-
-       do_addx4 a7, a4, a2, a7
-       extui   a6, a3, 2, 1
-       movnez  a2, a7, a6
-
-       do_addx8 a7, a4, a2, a7
-       extui   a6, a3, 3, 1
-       movnez  a2, a7, a6
-
-       bgeui   a3, 16, .Lmult_main_loop
-
-       neg     a3, a2
-       movltz  a2, a3, a5
-
-#endif /* !MUL32 && !MUL16 && !MAC16 */
-
-       leaf_return
-       .size   __mulsi3, . - __mulsi3
-
-#endif /* L_mulsi3 */
-
-
-#ifdef L_umulsidi3
-
-#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
-#define XCHAL_NO_MUL 1
-#endif
-
-       .align  4
-       .global __umulsidi3
-       .type   __umulsidi3, @function
-__umulsidi3:
-#if __XTENSA_CALL0_ABI__
-       leaf_entry sp, 32
-       addi    sp, sp, -32
-       s32i    a12, sp, 16
-       s32i    a13, sp, 20
-       s32i    a14, sp, 24
-       s32i    a15, sp, 28
-#elif XCHAL_NO_MUL
-       /* This is not really a leaf function; allocate enough stack space
-          to allow CALL12s to a helper function.  */
-       leaf_entry sp, 48
-#else
-       leaf_entry sp, 16
-#endif
-
-#ifdef __XTENSA_EB__
-#define wh a2
-#define wl a3
-#else
-#define wh a3
-#define wl a2
-#endif /* __XTENSA_EB__ */
-
-       /* This code is taken from the mulsf3 routine in ieee754-sf.S.
-          See more comments there.  */
-
-#if XCHAL_HAVE_MUL32_HIGH
-       mull    a6, a2, a3
-       muluh   wh, a2, a3
-       mov     wl, a6
-
-#else /* ! MUL32_HIGH */
-
-#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
-       /* a0 and a8 will be clobbered by calling the multiply function
-          but a8 is not used here and need not be saved.  */
-       s32i    a0, sp, 0
-#endif
-
-#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
-
-#define a2h a4
-#define a3h a5
-
-       /* Get the high halves of the inputs into registers.  */
-       srli    a2h, a2, 16
-       srli    a3h, a3, 16
-
-#define a2l a2
-#define a3l a3
-
-#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
-       /* Clear the high halves of the inputs.  This does not matter
-          for MUL16 because the high bits are ignored.  */
-       extui   a2, a2, 0, 16
-       extui   a3, a3, 0, 16
-#endif
-#endif /* MUL16 || MUL32 */
-
-
-#if XCHAL_HAVE_MUL16
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-       mul16u  dst, xreg ## xhalf, yreg ## yhalf
-
-#elif XCHAL_HAVE_MUL32
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-       mull    dst, xreg ## xhalf, yreg ## yhalf
-
-#elif XCHAL_HAVE_MAC16
-
-/* The preprocessor insists on inserting a space when concatenating after
-   a period in the definition of do_mul below.  These macros are a workaround
-   using underscores instead of periods when doing the concatenation.  */
-#define umul_aa_ll umul.aa.ll
-#define umul_aa_lh umul.aa.lh
-#define umul_aa_hl umul.aa.hl
-#define umul_aa_hh umul.aa.hh
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-       umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
-       rsr     dst, ACCLO
-
-#else /* no multiply hardware */
-
-#define set_arg_l(dst, src) \
-       extui   dst, src, 0, 16
-#define set_arg_h(dst, src) \
-       srli    dst, src, 16
-
-#if __XTENSA_CALL0_ABI__
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-       set_arg_ ## xhalf (a13, xreg); \
-       set_arg_ ## yhalf (a14, yreg); \
-       call0   .Lmul_mulsi3; \
-       mov     dst, a12
-#else
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
-       set_arg_ ## xhalf (a14, xreg); \
-       set_arg_ ## yhalf (a15, yreg); \
-       call12  .Lmul_mulsi3; \
-       mov     dst, a14
-#endif /* __XTENSA_CALL0_ABI__ */
-
-#endif /* no multiply hardware */
-
-       /* Add pp1 and pp2 into a6 with carry-out in a9.  */
-       do_mul(a6, a2, l, a3, h)        /* pp 1 */
-       do_mul(a11, a2, h, a3, l)       /* pp 2 */
-       movi    a9, 0
-       add     a6, a6, a11
-       bgeu    a6, a11, 1f
-       addi    a9, a9, 1
-1:
-       /* Shift the high half of a9/a6 into position in a9.  Note that
-          this value can be safely incremented without any carry-outs.  */
-       ssai    16
-       src     a9, a9, a6
-
-       /* Compute the low word into a6.  */
-       do_mul(a11, a2, l, a3, l)       /* pp 0 */
-       sll     a6, a6
-       add     a6, a6, a11
-       bgeu    a6, a11, 1f
-       addi    a9, a9, 1
-1:
-       /* Compute the high word into wh.  */
-       do_mul(wh, a2, h, a3, h)        /* pp 3 */
-       add     wh, wh, a9
-       mov     wl, a6
-
-#endif /* !MUL32_HIGH */
-
-#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
-       /* Restore the original return address.  */
-       l32i    a0, sp, 0
-#endif
-#if __XTENSA_CALL0_ABI__
-       l32i    a12, sp, 16
-       l32i    a13, sp, 20
-       l32i    a14, sp, 24
-       l32i    a15, sp, 28
-       addi    sp, sp, 32
-#endif
-       leaf_return
-
-#if XCHAL_NO_MUL
-
-       /* For Xtensa processors with no multiply hardware, this simplified
-          version of _mulsi3 is used for multiplying 16-bit chunks of
-          the floating-point mantissas.  When using CALL0, this function
-          uses a custom ABI: the inputs are passed in a13 and a14, the
-          result is returned in a12, and a8 and a15 are clobbered.  */
-       .align  4
-.Lmul_mulsi3:
-       leaf_entry sp, 16
-       .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
-       movi    \dst, 0
-1:     add     \tmp1, \src2, \dst
-       extui   \tmp2, \src1, 0, 1
-       movnez  \dst, \tmp1, \tmp2
-
-       do_addx2 \tmp1, \src2, \dst, \tmp1
-       extui   \tmp2, \src1, 1, 1
-       movnez  \dst, \tmp1, \tmp2
-
-       do_addx4 \tmp1, \src2, \dst, \tmp1
-       extui   \tmp2, \src1, 2, 1
-       movnez  \dst, \tmp1, \tmp2
-
-       do_addx8 \tmp1, \src2, \dst, \tmp1
-       extui   \tmp2, \src1, 3, 1
-       movnez  \dst, \tmp1, \tmp2
-
-       srli    \src1, \src1, 4
-       slli    \src2, \src2, 4
-       bnez    \src1, 1b
-       .endm
-#if __XTENSA_CALL0_ABI__
-       mul_mulsi3_body a12, a13, a14, a15, a8
-#else
-       /* The result will be written into a2, so save that argument in a4.  */
-       mov     a4, a2
-       mul_mulsi3_body a2, a4, a3, a5, a6
-#endif
-       leaf_return
-#endif /* XCHAL_NO_MUL */
-
-       .size   __umulsidi3, . - __umulsidi3
-
-#endif /* L_umulsidi3 */
-
-
-/* Define a macro for the NSAU (unsigned normalize shift amount)
-   instruction, which computes the number of leading zero bits,
-   to handle cases where it is not included in the Xtensa processor
-   configuration.  */
-
-       .macro  do_nsau cnt, val, tmp, a
-#if XCHAL_HAVE_NSA
-       nsau    \cnt, \val
-#else
-       mov     \a, \val
-       movi    \cnt, 0
-       extui   \tmp, \a, 16, 16
-       bnez    \tmp, 0f
-       movi    \cnt, 16
-       slli    \a, \a, 16
-0:
-       extui   \tmp, \a, 24, 8
-       bnez    \tmp, 1f
-       addi    \cnt, \cnt, 8
-       slli    \a, \a, 8
-1:
-       movi    \tmp, __nsau_data
-       extui   \a, \a, 24, 8
-       add     \tmp, \tmp, \a
-       l8ui    \tmp, \tmp, 0
-       add     \cnt, \cnt, \tmp
-#endif /* !XCHAL_HAVE_NSA */
-       .endm
-
-#ifdef L_clz
-       .section .rodata
-       .align  4
-       .global __nsau_data
-       .type   __nsau_data, @object
-__nsau_data:
-#if !XCHAL_HAVE_NSA
-       .byte   8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
-       .byte   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
-       .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
-       .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
-       .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-       .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-       .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-       .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-       .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-       .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-       .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-       .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-       .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-       .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-       .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-       .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-#endif /* !XCHAL_HAVE_NSA */
-       .size   __nsau_data, . - __nsau_data
-       .hidden __nsau_data
-#endif /* L_clz */
-
-
-#ifdef L_clzsi2
-       .align  4
-       .global __clzsi2
-       .type   __clzsi2, @function
-__clzsi2:
-       leaf_entry sp, 16
-       do_nsau a2, a2, a3, a4
-       leaf_return
-       .size   __clzsi2, . - __clzsi2
-
-#endif /* L_clzsi2 */
-
-
-#ifdef L_ctzsi2
-       .align  4
-       .global __ctzsi2
-       .type   __ctzsi2, @function
-__ctzsi2:
-       leaf_entry sp, 16
-       neg     a3, a2
-       and     a3, a3, a2
-       do_nsau a2, a3, a4, a5
-       neg     a2, a2
-       addi    a2, a2, 31
-       leaf_return
-       .size   __ctzsi2, . - __ctzsi2
-
-#endif /* L_ctzsi2 */
-
-
-#ifdef L_ffssi2
-       .align  4
-       .global __ffssi2
-       .type   __ffssi2, @function
-__ffssi2:
-       leaf_entry sp, 16
-       neg     a3, a2
-       and     a3, a3, a2
-       do_nsau a2, a3, a4, a5
-       neg     a2, a2
-       addi    a2, a2, 32
-       leaf_return
-       .size   __ffssi2, . - __ffssi2
-
-#endif /* L_ffssi2 */
-
-
-#ifdef L_udivsi3
-       .align  4
-       .global __udivsi3
-       .type   __udivsi3, @function
-__udivsi3:
-       leaf_entry sp, 16
-#if XCHAL_HAVE_DIV32
-       quou    a2, a2, a3
-#else
-       bltui   a3, 2, .Lle_one /* check if the divisor <= 1 */
-
-       mov     a6, a2          /* keep dividend in a6 */
-       do_nsau a5, a6, a2, a7  /* dividend_shift = nsau (dividend) */
-       do_nsau a4, a3, a2, a7  /* divisor_shift = nsau (divisor) */
-       bgeu    a5, a4, .Lspecial
-
-       sub     a4, a4, a5      /* count = divisor_shift - dividend_shift */
-       ssl     a4
-       sll     a3, a3          /* divisor <<= count */
-       movi    a2, 0           /* quotient = 0 */
-
-       /* test-subtract-and-shift loop; one quotient bit on each iteration */
-#if XCHAL_HAVE_LOOPS
-       loopnez a4, .Lloopend
-#endif /* XCHAL_HAVE_LOOPS */
-.Lloop:
-       bltu    a6, a3, .Lzerobit
-       sub     a6, a6, a3
-       addi    a2, a2, 1
-.Lzerobit:
-       slli    a2, a2, 1
-       srli    a3, a3, 1
-#if !XCHAL_HAVE_LOOPS
-       addi    a4, a4, -1
-       bnez    a4, .Lloop
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lloopend:
-
-       bltu    a6, a3, .Lreturn
-       addi    a2, a2, 1       /* increment quotient if dividend >= divisor */
-.Lreturn:
-       leaf_return
-
-.Lle_one:
-       beqz    a3, .Lerror     /* if divisor == 1, return the dividend */
-       leaf_return
-
-.Lspecial:
-       /* return dividend >= divisor */
-       bltu    a6, a3, .Lreturn0
-       movi    a2, 1
-       leaf_return
-
-.Lerror:
-       /* Divide by zero: Use an illegal instruction to force an exception.
-          The subsequent "DIV0" string can be recognized by the exception
-          handler to identify the real cause of the exception.  */
-       ill
-       .ascii  "DIV0"
-
-.Lreturn0:
-       movi    a2, 0
-#endif /* XCHAL_HAVE_DIV32 */
-       leaf_return
-       .size   __udivsi3, . - __udivsi3
-
-#endif /* L_udivsi3 */
-
-
-#ifdef L_divsi3
-       .align  4
-       .global __divsi3
-       .type   __divsi3, @function
-__divsi3:
-       leaf_entry sp, 16
-#if XCHAL_HAVE_DIV32
-       quos    a2, a2, a3
-#else
-       xor     a7, a2, a3      /* sign = dividend ^ divisor */
-       do_abs  a6, a2, a4      /* udividend = abs (dividend) */
-       do_abs  a3, a3, a4      /* udivisor = abs (divisor) */
-       bltui   a3, 2, .Lle_one /* check if udivisor <= 1 */
-       do_nsau a5, a6, a2, a8  /* udividend_shift = nsau (udividend) */
-       do_nsau a4, a3, a2, a8  /* udivisor_shift = nsau (udivisor) */
-       bgeu    a5, a4, .Lspecial
-
-       sub     a4, a4, a5      /* count = udivisor_shift - udividend_shift */
-       ssl     a4
-       sll     a3, a3          /* udivisor <<= count */
-       movi    a2, 0           /* quotient = 0 */
-
-       /* test-subtract-and-shift loop; one quotient bit on each iteration */
-#if XCHAL_HAVE_LOOPS
-       loopnez a4, .Lloopend
-#endif /* XCHAL_HAVE_LOOPS */
-.Lloop:
-       bltu    a6, a3, .Lzerobit
-       sub     a6, a6, a3
-       addi    a2, a2, 1
-.Lzerobit:
-       slli    a2, a2, 1
-       srli    a3, a3, 1
-#if !XCHAL_HAVE_LOOPS
-       addi    a4, a4, -1
-       bnez    a4, .Lloop
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lloopend:
-
-       bltu    a6, a3, .Lreturn
-       addi    a2, a2, 1       /* increment if udividend >= udivisor */
-.Lreturn:
-       neg     a5, a2
-       movltz  a2, a5, a7      /* return (sign < 0) ? -quotient : quotient */
-       leaf_return
-
-.Lle_one:
-       beqz    a3, .Lerror
-       neg     a2, a6          /* if udivisor == 1, then return... */
-       movgez  a2, a6, a7      /* (sign < 0) ? -udividend : udividend */
-       leaf_return
-
-.Lspecial:
-       bltu    a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
-       movi    a2, 1
-       movi    a4, -1
-       movltz  a2, a4, a7      /* else return (sign < 0) ? -1 : 1 */
-       leaf_return
-
-.Lerror:
-       /* Divide by zero: Use an illegal instruction to force an exception.
-          The subsequent "DIV0" string can be recognized by the exception
-          handler to identify the real cause of the exception.  */
-       ill
-       .ascii  "DIV0"
-
-.Lreturn0:
-       movi    a2, 0
-#endif /* XCHAL_HAVE_DIV32 */
-       leaf_return
-       .size   __divsi3, . - __divsi3
-
-#endif /* L_divsi3 */
-
-
-#ifdef L_umodsi3
-       .align  4
-       .global __umodsi3
-       .type   __umodsi3, @function
-__umodsi3:
-       leaf_entry sp, 16
-#if XCHAL_HAVE_DIV32
-       remu    a2, a2, a3
-#else
-       bltui   a3, 2, .Lle_one /* check if the divisor is <= 1 */
-
-       do_nsau a5, a2, a6, a7  /* dividend_shift = nsau (dividend) */
-       do_nsau a4, a3, a6, a7  /* divisor_shift = nsau (divisor) */
-       bgeu    a5, a4, .Lspecial
-
-       sub     a4, a4, a5      /* count = divisor_shift - dividend_shift */
-       ssl     a4
-       sll     a3, a3          /* divisor <<= count */
-
-       /* test-subtract-and-shift loop */
-#if XCHAL_HAVE_LOOPS
-       loopnez a4, .Lloopend
-#endif /* XCHAL_HAVE_LOOPS */
-.Lloop:
-       bltu    a2, a3, .Lzerobit
-       sub     a2, a2, a3
-.Lzerobit:
-       srli    a3, a3, 1
-#if !XCHAL_HAVE_LOOPS
-       addi    a4, a4, -1
-       bnez    a4, .Lloop
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lloopend:
-
-.Lspecial:
-       bltu    a2, a3, .Lreturn
-       sub     a2, a2, a3      /* subtract once more if dividend >= divisor */
-.Lreturn:
-       leaf_return
-
-.Lle_one:
-       bnez    a3, .Lreturn0
-
-       /* Divide by zero: Use an illegal instruction to force an exception.
-          The subsequent "DIV0" string can be recognized by the exception
-          handler to identify the real cause of the exception.  */
-       ill
-       .ascii  "DIV0"
-
-.Lreturn0:
-       movi    a2, 0
-#endif /* XCHAL_HAVE_DIV32 */
-       leaf_return
-       .size   __umodsi3, . - __umodsi3
-
-#endif /* L_umodsi3 */
-
-
-#ifdef L_modsi3
-       .align  4
-       .global __modsi3
-       .type   __modsi3, @function
-__modsi3:
-       leaf_entry sp, 16
-#if XCHAL_HAVE_DIV32
-       rems    a2, a2, a3
-#else
-       mov     a7, a2          /* save original (signed) dividend */
-       do_abs  a2, a2, a4      /* udividend = abs (dividend) */
-       do_abs  a3, a3, a4      /* udivisor = abs (divisor) */
-       bltui   a3, 2, .Lle_one /* check if udivisor <= 1 */
-       do_nsau a5, a2, a6, a8  /* udividend_shift = nsau (udividend) */
-       do_nsau a4, a3, a6, a8  /* udivisor_shift = nsau (udivisor) */
-       bgeu    a5, a4, .Lspecial
-
-       sub     a4, a4, a5      /* count = udivisor_shift - udividend_shift */
-       ssl     a4
-       sll     a3, a3          /* udivisor <<= count */
-
-       /* test-subtract-and-shift loop */
-#if XCHAL_HAVE_LOOPS
-       loopnez a4, .Lloopend
-#endif /* XCHAL_HAVE_LOOPS */
-.Lloop:
-       bltu    a2, a3, .Lzerobit
-       sub     a2, a2, a3
-.Lzerobit:
-       srli    a3, a3, 1
-#if !XCHAL_HAVE_LOOPS
-       addi    a4, a4, -1
-       bnez    a4, .Lloop
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lloopend:
-
-.Lspecial:
-       bltu    a2, a3, .Lreturn
-       sub     a2, a2, a3      /* subtract again if udividend >= udivisor */
-.Lreturn:
-       bgez    a7, .Lpositive
-       neg     a2, a2          /* if (dividend < 0), return -udividend */
-.Lpositive:
-       leaf_return
-
-.Lle_one:
-       bnez    a3, .Lreturn0
-
-       /* Divide by zero: Use an illegal instruction to force an exception.
-          The subsequent "DIV0" string can be recognized by the exception
-          handler to identify the real cause of the exception.  */
-       ill
-       .ascii  "DIV0"
-
-.Lreturn0:
-       movi    a2, 0
-#endif /* XCHAL_HAVE_DIV32 */
-       leaf_return
-       .size   __modsi3, . - __modsi3
-
-#endif /* L_modsi3 */
-
-
-#ifdef __XTENSA_EB__
-#define uh a2
-#define ul a3
-#else
-#define uh a3
-#define ul a2
-#endif /* __XTENSA_EB__ */
-
-
-#ifdef L_ashldi3
-       .align  4
-       .global __ashldi3
-       .type   __ashldi3, @function
-__ashldi3:
-       leaf_entry sp, 16
-       ssl     a4
-       bgei    a4, 32, .Llow_only
-       src     uh, uh, ul
-       sll     ul, ul
-       leaf_return
-
-.Llow_only:
-       sll     uh, ul
-       movi    ul, 0
-       leaf_return
-       .size   __ashldi3, . - __ashldi3
-
-#endif /* L_ashldi3 */
-
-
-#ifdef L_ashrdi3
-       .align  4
-       .global __ashrdi3
-       .type   __ashrdi3, @function
-__ashrdi3:
-       leaf_entry sp, 16
-       ssr     a4
-       bgei    a4, 32, .Lhigh_only
-       src     ul, uh, ul
-       sra     uh, uh
-       leaf_return
-
-.Lhigh_only:
-       sra     ul, uh
-       srai    uh, uh, 31
-       leaf_return
-       .size   __ashrdi3, . - __ashrdi3
-
-#endif /* L_ashrdi3 */
-
-
-#ifdef L_lshrdi3
-       .align  4
-       .global __lshrdi3
-       .type   __lshrdi3, @function
-__lshrdi3:
-       leaf_entry sp, 16
-       ssr     a4
-       bgei    a4, 32, .Lhigh_only1
-       src     ul, uh, ul
-       srl     uh, uh
-       leaf_return
-
-.Lhigh_only1:
-       srl     ul, uh
-       movi    uh, 0
-       leaf_return
-       .size   __lshrdi3, . - __lshrdi3
-
-#endif /* L_lshrdi3 */
-
-
-#include "ieee754-df.S"
-#include "ieee754-sf.S"
diff --git a/gcc/config/xtensa/t-xtensa b/gcc/config/xtensa/t-xtensa

index c0a7cb5202f75ca57c4b02d5954341abcc484f34..31ac2ad2452bd815231656fce5b1d0b91b94a842 100644 (file)
--- a/gcc/config/xtensa/t-xtensa
+++ b/gcc/config/xtensa/t-xtensa
@@ -17,18 +17,6 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
-LIB1ASMSRC = xtensa/lib1funcs.asm
-LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \
-       _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \
-       _ashldi3 _ashrdi3 _lshrdi3 \
-       _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \
-       _fixunssfsi _fixunssfdi _floatsisf _floatunsisf \
-       _floatdisf _floatundisf \
-       _negdf2 _addsubdf3 _muldf3 _divdf3 _cmpdf2 _fixdfsi _fixdfdi \
-       _fixunsdfsi _fixunsdfdi _floatsidf _floatunsidf \
-       _floatdidf _floatundidf \
-       _truncdfsf2 _extendsfdf2
-
  LIB2FUNCS_EXTRA = $(srcdir)/config/xtensa/lib2funcs.S
  
  $(out_object_file): gt-xtensa.h
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog

index b5d9c243a9813672091c78a92a0476335a1d7180..6b2514aba9acbfb33009c6f2ca44e388b7c10d4f 100644 (file)
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,123 @@
+2011-11-02  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
+
+       * Makefile.in ($(lib1asmfuncs-o), $(lib1asmfuncs-s-o)): Use
+       $(srcdir) to refer to $(LIB1ASMSRC).
+       Use $<.
+       * config/arm/bpabi-v6m.S, config/arm/bpabi.S,
+       config/arm/ieee754-df.S, config/arm/ieee754-sf.S,
+       config/arm/lib1funcs.S: New files.
+       * config/arm/libunwind.S [!__symbian__]: Use lib1funcs.S.
+       * config/arm/t-arm: New file.
+       * config/arm/t-bpabi (LIB1ASMFUNCS): Set.
+       * config/arm/t-elf, config/arm/t-linux, config/arm/t-linux-eabi,
+       config/arm/t-strongarm-elf: New files.
+       * config/arm/t-symbian (LIB1ASMFUNCS): Set.
+       * config/arm/t-vxworks, config/arm/t-wince-pe: New files.
+       * config/avr/lib1funcs.S: New file.
+       * config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+       * config/bfin/lib1funcs.S, config/bfin/t-bfin: New files.
+       * config/c6x/lib1funcs.S: New file.
+       * config/c6x/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+       * config/fr30/lib1funcs.S, config/fr30/t-fr30: New files.
+       * config/frv/lib1funcs.S: New file.
+       * config/frv/t-frv (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+       * config/h8300/lib1funcs.S, config/h8300/t-h8300: New files.
+       * config/i386/cygwin.S, config/i386/t-chkstk: New files.
+       * config/ia64/__divxf3.asm: Rename to ...
+       * config/ia64/__divxf3.S: ... this.
+       Adapt lib1funcs.asm filename.
+       * config/ia64/_fixtfdi.asm: Rename to ...
+       * config/ia64/_fixtfdi.S: ... this.
+       Adapt lib1funcs.asm filename.
+       * config/ia64/_fixunstfdi.asm: Rename to ...
+       * config/ia64/_fixunstfdi.S: ... this.
+       Adapt lib1funcs.asm filename.
+       * config/ia64/_floatditf.asm: Rename to ...
+       * config/ia64/_floatditf.S: ... this.
+       Adapt lib1funcs.asm filename.
+       * config/ia64/lib1funcs.S: New file.
+       * config/ia64/t-hpux (LIB1ASMFUNCS): Set.
+       * config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+       * config/ia64/t-softfp-compat (libgcc1-tf-compats): Adapt suffix.
+       * config/m32c/lib1funcs.S, config/m32c/t-m32c: New files.
+       * config/m68k/lb1sf68.S, config/m68k/t-floatlib: New files.
+       * config/mcore/lib1funcs.S, config/mcore/t-mcore: New files.
+       * config/mep/lib1funcs.S: New file.
+       * config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+       * config/mips/mips16.S: New file.
+       * config/mips/t-mips16 (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+       * config/pa/milli64.S: New file.
+       * config/pa/t-linux, config/pa/t-linux64: New files.
+       * config/picochip/lib1funcs.S: New file.
+       * config/picochip/t-picochip (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+       * config/sh/lib1funcs.S, config/sh/lib1funcs.h: New files.
+       * config/sh/t-linux (LIB1ASMFUNCS_CACHE): Set.
+       * config/sh/t-netbsd: New file.
+       * config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE): Set.
+       Use $(srcdir) to refer to lib1funcs.S, adapt filename.
+       * config/sh/t-sh64: New file.
+       * config/sparc/lb1spc.S: New file.
+       * config/sparc/t-softmul (LIB1ASMSRC): Adapt sparc/lb1spc.asm
+       filename.
+       * config/v850/lib1funcs.S, config/v850/t-v850: New files.
+       * config/vax/lib1funcs.S, config/vax/t-linux: New files.
+       * config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S,
+       config/xtensa/lib1funcs.S: New files.
+       * config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+       * config.host (arm-wrs-vxworks): Add arm/t-arm, arm/t-vxworks to
+       tmake_file.
+       (arm*-*-freebsd*): Add arm/t-arm, arm/t-strongarm-elf to tmake_file.
+       (arm*-*-netbsdelf*): Add arm/t-arm to tmake_file.
+       (arm*-*-linux*): Likewise.
+       Add arm/t-elf, arm/t-bpabi, arm/t-linux-eabi to tmake_file for
+       arm*-*-linux-*eabi, add arm/t-linux otherwise.
+       (arm*-*-uclinux*): Add arm/t-arm, arm/t-elf to tmake_file.
+       (arm*-*-ecos-elf): Likewise.
+       (arm*-*-eabi*, arm*-*-symbianelf*): Likewise.
+       (arm*-*-rtems*): Likewise.
+       (arm*-*-elf): Likewise.
+       (arm*-wince-pe*): Add arm/t-arm, arm/t-wince-pe to tmake_file.
+       (avr-*-rtems*): Add to tmake_file, add avr/t-avr.
+       (bfin*-elf*): Add bfin/t-bfin to tmake_file.
+       (bfin*-uclinux*): Likewise.
+       (bfin*-linux-uclibc*): Likewise.
+       (bfin*-rtems*): Likewise.
+       (bfin*-*): Likewise.
+       (fido-*-elf): Merge into m68k-*-elf*.
+       (fr30-*-elf)): Add fr30/t-fr30 to tmake_file.
+       (frv-*-*linux*): Add frv/t-frv to tmake_file.
+       (h8300-*-rtems*): Add h8300/t-h8300 to tmake_file.
+       (h8300-*-elf*): Likewise.
+       (hppa*64*-*-linux*): Add pa/t-linux, pa/t-linux64 to tmake_file.
+       (hppa*-*-linux*): Add pa/t-linux to tmake_file.
+       (i[34567]86-*-cygwin*): Add i386/t-chkstk to tmake_file.
+       (i[34567]86-*-mingw*): Likewise.
+       (x86_64-*-mingw*): Likewise.
+       (i[34567]86-*-interix3*): Likewise.
+       (ia64*-*-hpux*): Add ia64/t-ia64, ia64/t-hpux to tmake_file.
+       (ia64-hp-*vms*): Add ia64/t-ia64 to tmake_file.
+       (m68k-*-elf*): Also handle fido-*-elf.
+       Add m68k/t-floatlib to tmake_file.
+       (m68k-*-uclinux*): Add m68k/t-floatlib to tmake_file.
+       (m68k-*-linux*): Likewise.
+       (m68k-*-rtems*): Likewise.
+       (mcore-*-elf): Add mcore/t-mcore to tmake_file.
+       (sh-*-elf*, sh[12346l]*-*-elf*): Add sh/t-sh64 to tmake_file for
+       sh64*-*-*.
+       (sh-*-linux*, sh[2346lbe]*-*-linux*): Add sh/t-sh to tmake_file.
+       Add sh/t-sh64 to tmake_file for sh64*-*-linux*.
+       (sh-*-netbsdelf*, shl*-*-netbsdelf*, sh5-*-netbsd*)
+       (sh5l*-*-netbsd*, sh64-*-netbsd*, sh64l*-*-netbsd*): Add sh/t-sh,
+       sh/t-netbsd to tmake_file.
+       Add sh/t-sh64 to tmake_file for sh5*-*-netbsd*, sh64*-netbsd*.
+       (sh-*-rtems*): Add sh/t-sh to tmake_file.
+       (sh-wrs-vxworks): Likewise.
+       (sparc-*-linux*): Add sparc/t-softmul to tmake_file except for
+       *-leon[3-9]*.
+       (v850*-*-*): Add v850/t-v850 to tmake_file.
+       (vax-*-linux*): Add vax/t-linux to tmake_file.
+       (m32c-*-elf*, m32c-*-rtems*): Add m32c/t-m32c to tmake_file.
+
  2011-11-02  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
  
         * crtstuff.c: New file.
diff --git a/libgcc/Makefile.in b/libgcc/Makefile.in

index 467901b057a202a57fb4a680b1df0f3e69dc40c3..6bbb369f8e814ddb7f1ffb9b5d8c99637465efb6 100644 (file)
--- a/libgcc/Makefile.in
+++ b/libgcc/Makefile.in
@@ -394,25 +394,22 @@ LIB2_DIVMOD_FUNCS := $(filter-out $(LIB2FUNCS_EXCLUDE) $(LIB1ASMFUNCS), \
  ifeq ($(enable_shared),yes)
  
  lib1asmfuncs-o = $(patsubst %,%$(objext),$(LIB1ASMFUNCS))
-$(lib1asmfuncs-o): %$(objext): $(gcc_srcdir)/config/$(LIB1ASMSRC) %.vis
-       $(gcc_compile) -DL$* -xassembler-with-cpp \
-         -c $(gcc_srcdir)/config/$(LIB1ASMSRC) -include $*.vis
+$(lib1asmfuncs-o): %$(objext): $(srcdir)/config/$(LIB1ASMSRC) %.vis
+       $(gcc_compile) -DL$* -xassembler-with-cpp -c $< -include $*.vis
  $(patsubst %,%.vis,$(LIB1ASMFUNCS)): %.vis: %_s$(objext)
         $(gen-hide-list)
  libgcc-objects += $(lib1asmfuncs-o)
  
  lib1asmfuncs-s-o = $(patsubst %,%_s$(objext),$(LIB1ASMFUNCS))
-$(lib1asmfuncs-s-o): %_s$(objext): $(gcc_srcdir)/config/$(LIB1ASMSRC)
-       $(gcc_s_compile) -DL$* -xassembler-with-cpp \
-         -c $(gcc_srcdir)/config/$(LIB1ASMSRC)
+$(lib1asmfuncs-s-o): %_s$(objext): $(srcdir)/config/$(LIB1ASMSRC)
+       $(gcc_s_compile) -DL$* -xassembler-with-cpp -c $<
  libgcc-s-objects += $(lib1asmfuncs-s-o)
  
  else
  
  lib1asmfuncs-o = $(patsubst %,%$(objext),$(LIB1ASMFUNCS))
-$(lib1asmfuncs-o): %$(objext): $(gcc_srcdir)/config/$(LIB1ASMSRC)
-       $(gcc_compile) -DL$* -xassembler-with-cpp \
-         -c $(gcc_srcdir)/config/$(LIB1ASMSRC)
+$(lib1asmfuncs-o): %$(objext): $(srcdir)/config/$(LIB1ASMSRC)
+       $(gcc_compile) -DL$* -xassembler-with-cpp -c $<
  libgcc-objects += $(lib1asmfuncs-o)
  
  endif
diff --git a/libgcc/config.host b/libgcc/config.host

index 01e2f21a7978bf23c0903df502489cb712b0fbd5..0a05ea184b067b3569caf48aacfb5b68f1b673cc 100644 (file)
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -306,22 +306,25 @@ alpha*-dec-*vms*)
         md_unwind_header=alpha/vms-unwind.h
         ;;
  arm-wrs-vxworks)
-       tmake_file="$tmake_file t-fdpbit"
+       tmake_file="$tmake_file arm/t-arm arm/t-vxworks t-fdpbit"
         extra_parts="$extra_parts crti.o crtn.o"
         ;;
  arm*-*-freebsd*)
-       tmake_file="$tmake_file t-fdpbit"
+       tmake_file="$tmake_file arm/t-arm arm/t-strongarm-elf t-fdpbit"
         ;;
  arm*-*-netbsdelf*)
-       tmake_file="$tmake_file t-slibgcc-gld-nover"
+       tmake_file="$tmake_file arm/t-arm t-slibgcc-gld-nover"
         ;;
  arm*-*-linux*)                 # ARM GNU/Linux with ELF
-       tmake_file="${tmake_file} t-fixedpoint-gnu-prefix"
+       tmake_file="${tmake_file} arm/t-arm t-fixedpoint-gnu-prefix"
         case ${host} in
         arm*-*-linux-*eabi)
-         tmake_file="${tmake_file} arm/t-bpabi t-slibgcc-libgcc"
+         tmake_file="${tmake_file} arm/t-elf arm/t-bpabi arm/t-linux-eabi t-slibgcc-libgcc"
           unwind_header=config/arm/unwind-arm.h
           ;;
+       *)
+         tmake_file="$tmake_file arm/t-linux"
+         ;;
         esac
         tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
         ;;
@@ -333,15 +336,15 @@ arm*-*-uclinux*)          # ARM ucLinux
           unwind_header=config/arm/unwind-arm.h
           ;;
         esac
-       tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
+       tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
         extra_parts="$extra_parts crti.o crtn.o"
         ;;
  arm*-*-ecos-elf)
-       tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
+       tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
         extra_parts="$extra_parts crti.o crtn.o"
         ;;
  arm*-*-eabi* | arm*-*-symbianelf* )
-       tmake_file="${tmake_file} t-fixedpoint-gnu-prefix"
+       tmake_file="${tmake_file} arm/t-arm arm/t-elf t-fixedpoint-gnu-prefix"
         case ${host} in
         arm*-*-eabi*)
           tmake_file="${tmake_file} arm/t-bpabi"
@@ -356,17 +359,18 @@ arm*-*-eabi* | arm*-*-symbianelf* )
         unwind_header=config/arm/unwind-arm.h
         ;;
  arm*-*-rtems*)
-       tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
+       tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
         extra_parts="$extra_parts crti.o crtn.o"
         ;;
  arm*-*-elf)
-       tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
+       tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
         extra_parts="$extra_parts crti.o crtn.o"
         ;;
  arm*-wince-pe*)
+       tmake_file="$tmake_file arm/t-arm arm/t-wince-pe"
         ;;
  avr-*-rtems*)
-       tmake_file=t-fpbit
+       tmake_file="$tmake_file avr/t-avr t-fpbit"
         # Don't use default.
         extra_parts=
         ;;
@@ -375,27 +379,27 @@ avr-*-*)
         tmake_file="${cpu_type}/t-avr t-fpbit"
         ;;
  bfin*-elf*)
-       tmake_file="bfin/t-crtlibid bfin/t-crtstuff t-fdpbit"
+       tmake_file="bfin/t-bfin bfin/t-crtlibid bfin/t-crtstuff t-fdpbit"
         extra_parts="$extra_parts crtbeginS.o crtendS.o crti.o crtn.o crtlibid.o"
          ;;
  bfin*-uclinux*)
-       tmake_file="bfin/t-crtlibid bfin/t-crtstuff t-fdpbit"
+       tmake_file="bfin/t-bfin bfin/t-crtlibid bfin/t-crtstuff t-fdpbit"
         extra_parts="$extra_parts crtbeginS.o crtendS.o crtlibid.o"
         md_unwind_header=bfin/linux-unwind.h
          ;;
  bfin*-linux-uclibc*)
-       tmake_file="$tmake_file bfin/t-crtstuff t-fdpbit bfin/t-linux"
+       tmake_file="$tmake_file bfin/t-bfin bfin/t-crtstuff t-fdpbit bfin/t-linux"
         # No need to build crtbeginT.o on uClibc systems.  Should probably
         # be moved to the OS specific section above.
         extra_parts="crtbegin.o crtbeginS.o crtend.o crtendS.o"
         md_unwind_header=bfin/linux-unwind.h
         ;;
  bfin*-rtems*)
-       tmake_file="$tmake_file t-fdpbit"
+       tmake_file="$tmake_file bfin/t-bfin t-fdpbit"
         extra_parts="$extra_parts crti.o crtn.o"
         ;;
  bfin*-*)
-       tmake_file="$tmake_file t-fdpbit"
+       tmake_file="$tmake_file bfin/t-bfin t-fdpbit"
         extra_parts="crtbegin.o crtend.o crti.o crtn.o"
          ;;
  crisv32-*-elf)
@@ -415,10 +419,8 @@ cris-*-none)
  cris-*-linux* | crisv32-*-linux*)
         tmake_file="$tmake_file t-fdpbit cris/t-linux"
         ;;
-fido-*-elf)
-       ;;
  fr30-*-elf)
-       tmake_file="$tmake_file t-fdpbit"
+       tmake_file="$tmake_file fr30/t-fr30 t-fdpbit"
         extra_parts="$extra_parts crti.o crtn.o"
         ;;
  frv-*-elf)
@@ -427,20 +429,21 @@ frv-*-elf)
         extra_parts="frvbegin.o frvend.o"
         ;;
  frv-*-*linux*)
-       tmake_file="$tmake_file t-fdpbit frv/t-linux"
+       tmake_file="$tmake_file frv/t-frv frv/t-linux t-fdpbit"
         ;;
  h8300-*-rtems*)
-       tmake_file="$tmake_file t-fpbit"
+       tmake_file="$tmake_file h8300/t-h8300 t-fpbit"
         extra_parts="$extra_parts crti.o crtn.o"
         ;;
  h8300-*-elf*)
-       tmake_file="$tmake_file t-fpbit"
+       tmake_file="$tmake_file h8300/t-h8300 t-fpbit"
         extra_parts="$extra_parts crti.o crtn.o"
         ;;
  hppa*64*-*-linux*)
+       tmake_file="$tmake_file pa/t-linux pa/t-linux64"
         ;;
  hppa*-*-linux*)
-       tmake_file="$tmake_file t-slibgcc-libgcc"
+       tmake_file="$tmake_file pa/t-linux t-slibgcc-libgcc"
         # Set the libgcc version number
         if test x$enable_sjlj_exceptions = xyes; then
             tmake_file="$tmake_file pa/t-slibgcc-sjlj-ver"
@@ -565,7 +568,7 @@ i[34567]86-*-cygwin*)
         else
                 tmake_dlldir_file="i386/t-dlldir-x"
         fi
-       tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-cygming i386/t-cygwin i386/t-crtfm t-dfprules"
+       tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-cygming i386/t-cygwin i386/t-crtfm i386/t-chkstk t-dfprules"
         case ${target_thread_file} in
           posix)
             tmake_file="i386/t-mingw-pthread $tmake_file"
@@ -586,7 +589,7 @@ i[34567]86-*-mingw*)
         else
                 tmake_dlldir_file="i386/t-dlldir-x"
         fi
-       tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-cygming i386/t-mingw32 i386/t-crtfm t-dfprules"
+       tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-cygming i386/t-mingw32 i386/t-crtfm i386/t-chkstk t-dfprules"
         md_unwind_header=i386/w32-unwind.h
         ;;
  x86_64-*-mingw*)
@@ -602,10 +605,11 @@ x86_64-*-mingw*)
         else
                 tmake_dlldir_file="i386/t-dlldir-x"
         fi
-       tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-mingw32 t-dfprules i386/t-crtfm"
+       tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-mingw32 t-dfprules i386/t-crtfm i386/t-chkstk"
         extra_parts="$extra_parts crtfastmath.o"
         ;;
  i[34567]86-*-interix3*)
+       tmake_file="$tmake_file i386/t-chkstk"
         ;;
  ia64*-*-elf*)
         extra_parts="$extra_parts crtbeginS.o crtendS.o crtfastmath.o"
@@ -625,10 +629,10 @@ ia64*-*-linux*)
         md_unwind_header=ia64/linux-unwind.h
         ;;
  ia64*-*-hpux*)
-       tmake_file="ia64/t-hpux t-slibgcc ia64/t-slibgcc-hpux t-slibgcc-hpux"
+       tmake_file="ia64/t-ia64 ia64/t-hpux t-slibgcc ia64/t-slibgcc-hpux t-slibgcc-hpux"
         ;;
  ia64-hp-*vms*)
-       tmake_file="$tmake_file ia64/t-eh-ia64 ia64/t-vms t-slibgcc-vms"
+       tmake_file="$tmake_file ia64/t-ia64 ia64/t-eh-ia64 ia64/t-vms t-slibgcc-vms"
         extra_parts="$extra_parts crtinitS.o"
         md_unwind_header=ia64/vms-unwind.h
         ;;
@@ -660,18 +664,21 @@ m32r-*-linux*)
  m32rle-*-linux*)
         tmake_file="$tmake_file m32r/t-linux t-fdpbit"
         ;;
-m68k-*-elf*)
+m68k-*-elf* | fido-*-elf)
+       tmake_file="$tmake_file m68k/t-floatlib"
         ;;
  m68k*-*-netbsdelf*)
         ;;
  m68k*-*-openbsd*)
         ;;
  m68k-*-uclinux*)               # Motorola m68k/ColdFire running uClinux with uClibc
+       tmake_file="$tmake_file m68k/t-floatlib"
         md_unwind_header=m68k/linux-unwind.h
         ;;
  m68k-*-linux*)                 # Motorola m68k's running GNU/Linux
                                 # with ELF format using glibc 2
                                 # aka the GNU/Linux C library 6.
+       tmake_file="$tmake_file m68k/t-floatlib"
         # If not configured with --enable-sjlj-exceptions, bump the
         # libgcc version number.
         if test x$enable_sjlj_exceptions != xyes; then
@@ -680,10 +687,11 @@ m68k-*-linux*)                    # Motorola m68k's running GNU/Linux
         md_unwind_header=m68k/linux-unwind.h
         ;;
  m68k-*-rtems*)
+       tmake_file="$tmake_file m68k/t-floatlib"
         extra_parts="$extra_parts crti.o crtn.o"
         ;;
  mcore-*-elf)
-       tmake_file=t-fdpbit
+       tmake_file="mcore/t-mcore t-fdpbit"
         extra_parts="$extra_parts crti.o crtn.o"
         ;;
  microblaze*-linux*)
@@ -905,6 +913,10 @@ sh-*-elf* | sh[12346l]*-*-elf*)
                 libic_invalidate_array_4-200.a \
                 libic_invalidate_array_4a.a \
                 libgcc-Os-4-200.a libgcc-4-300.a"
+       case ${host} in sh64*-*-*)
+               tmake_file="$tmake_file sh/t-sh64"
+               ;;
+       esac
         case ${host} in
         sh*-superh-elf)
                 tmake_file="$tmake_file sh/t-superh"
@@ -913,23 +925,33 @@ sh-*-elf* | sh[12346l]*-*-elf*)
         esac
         ;;
  sh-*-linux* | sh[2346lbe]*-*-linux*)
-       tmake_file="${tmake_file} t-slibgcc-libgcc sh/t-linux t-fdpbit"
+       tmake_file="${tmake_file} sh/t-sh t-slibgcc-libgcc sh/t-linux t-fdpbit"
+       case ${host} in sh64*-*-linux*)
+               tmake_file="$tmake_file sh/t-sh64"
+               ;;
+       esac
         md_unwind_header=sh/linux-unwind.h
         ;;
  sh-*-netbsdelf* | shl*-*-netbsdelf* | sh5-*-netbsd* | sh5l*-*-netbsd* | \
    sh64-*-netbsd* | sh64l*-*-netbsd*)
+       tmake_file="$tmake_file sh/t-sh sh/t-netbsd"
+       case ${host} in
+       sh5*-*-netbsd* | sh64*-netbsd*)
+               tmake_file="$tmake_file sh/t-sh64"
+               ;;
+       esac
         # NetBSD's C library includes a fast software FP library that
         # has support for setting/setting the rounding mode, exception
         # mask, etc.  Therefore, we don't want to include software FP
         # in libgcc.
         ;;
  sh-*-rtems*)
-       tmake_file="$tmake_file t-crtstuff-pic t-fdpbit"
+       tmake_file="$tmake_file sh/t-sh t-crtstuff-pic t-fdpbit"
         extra_parts="$extra_parts crt1.o crti.o crtn.o crtbeginS.o crtendS.o \
                 $sh_ic_extra_parts $sh_opt_extra_parts"
         ;;
  sh-wrs-vxworks)
-       tmake_file="$tmake_file t-crtstuff-pic t-fdpbit"
+       tmake_file="$tmake_file sh/t-sh t-crtstuff-pic t-fdpbit"
         ;;
  sparc-*-netbsdelf*)
         ;;
@@ -956,6 +978,13 @@ sparc-*-linux*)            # SPARC's running GNU/Linux, libc6
                 tmake_file="${tmake_file} sparc/t-linux"
                 ;;
         esac
+       case ${host} in
+       *-leon[3-9]*)
+               ;;
+       *)
+               tmake_file="$tmake_file sparc/t-softmul"
+               ;;
+       esac
         extra_parts="$extra_parts crtfastmath.o"
         md_unwind_header=sparc/linux-unwind.h
         ;;
@@ -1007,9 +1036,10 @@ tic6x-*-elf)
         unwind_header=config/c6x/unwind-c6x.h
         ;;
  v850*-*-*)
-       tmake_file=t-fdpbit
+       tmake_file="v850/t-v850 t-fdpbit"
         ;;
  vax-*-linux*)
+       tmake_file="$tmake_file vax/t-linux"
         ;;
  vax-*-netbsdelf*)
         ;;
@@ -1032,6 +1062,7 @@ am33_2.0-*-linux*)
         tmake_file="$tmake_file t-fdpbit"
         ;;
  m32c-*-elf*|m32c-*-rtems*)
+       tmake_file="$tmake_file m32c/t-m32c"
         ;;
  mep*-*-*)
         tmake_file="mep/t-mep t-fdpbit"
diff --git a/libgcc/config/arm/bpabi-v6m.S b/libgcc/config/arm/bpabi-v6m.S

new file mode 100644 (file)

index 0000000..4ecea6d
--- /dev/null
+++ b/libgcc/config/arm/bpabi-v6m.S
@@ -0,0 +1,318 @@
+/* Miscellaneous BPABI functions.  ARMv6M implementation
+
+   Copyright (C) 2006, 2008, 2009, 2010  Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef __ARM_EABI__
+/* Some attributes that are common to all routines in this file.  */
+       /* Tag_ABI_align_needed: This code does not require 8-byte
+          alignment from the caller.  */
+       /* .eabi_attribute 24, 0  -- default setting.  */
+       /* Tag_ABI_align_preserved: This code preserves 8-byte
+          alignment in any callee.  */
+       .eabi_attribute 25, 1
+#endif /* __ARM_EABI__ */
+
+#ifdef L_aeabi_lcmp
+
+FUNC_START aeabi_lcmp
+       cmp     xxh, yyh
+       beq     1f
+       bgt     2f
+       mov     r0, #1
+       neg     r0, r0
+       RET
+2:
+       mov     r0, #1
+       RET
+1:
+       sub     r0, xxl, yyl
+       beq     1f
+       bhi     2f
+       mov     r0, #1
+       neg     r0, r0
+       RET
+2:
+       mov     r0, #1
+1:
+       RET
+       FUNC_END aeabi_lcmp
+
+#endif /* L_aeabi_lcmp */
+       
+#ifdef L_aeabi_ulcmp
+
+FUNC_START aeabi_ulcmp
+       cmp     xxh, yyh
+       bne     1f
+       sub     r0, xxl, yyl
+       beq     2f
+1:
+       bcs     1f
+       mov     r0, #1
+       neg     r0, r0
+       RET
+1:
+       mov     r0, #1
+2:
+       RET
+       FUNC_END aeabi_ulcmp
+
+#endif /* L_aeabi_ulcmp */
+
+.macro test_div_by_zero signed
+       cmp     yyh, #0
+       bne     7f
+       cmp     yyl, #0
+       bne     7f
+       cmp     xxh, #0
+       bne     2f
+       cmp     xxl, #0
+2:
+       .ifc    \signed, unsigned
+       beq     3f
+       mov     xxh, #0
+       mvn     xxh, xxh                @ 0xffffffff
+       mov     xxl, xxh
+3:
+       .else
+       beq     5f
+       blt     6f
+       mov     xxl, #0
+       mvn     xxl, xxl                @ 0xffffffff
+       lsr     xxh, xxl, #1            @ 0x7fffffff
+       b       5f
+6:     mov     xxh, #0x80
+       lsl     xxh, xxh, #24           @ 0x80000000
+       mov     xxl, #0
+5:
+       .endif
+       @ tailcalls are tricky on v6-m.
+       push    {r0, r1, r2}
+       ldr     r0, 1f
+       adr     r1, 1f
+       add     r0, r1
+       str     r0, [sp, #8]
+       @ We know we are not on armv4t, so pop pc is safe.
+       pop     {r0, r1, pc}
+       .align  2
+1:
+       .word   __aeabi_ldiv0 - 1b
+7:
+.endm
+
+#ifdef L_aeabi_ldivmod
+
+FUNC_START aeabi_ldivmod
+       test_div_by_zero signed
+
+       push {r0, r1}
+       mov r0, sp
+       push {r0, lr}
+       ldr r0, [sp, #8]
+       bl SYM(__gnu_ldivmod_helper)
+       ldr r3, [sp, #4]
+       mov lr, r3
+       add sp, sp, #8
+       pop {r2, r3}
+       RET
+       FUNC_END aeabi_ldivmod
+
+#endif /* L_aeabi_ldivmod */
+
+#ifdef L_aeabi_uldivmod
+
+FUNC_START aeabi_uldivmod
+       test_div_by_zero unsigned
+
+       push {r0, r1}
+       mov r0, sp
+       push {r0, lr}
+       ldr r0, [sp, #8]
+       bl SYM(__gnu_uldivmod_helper)
+       ldr r3, [sp, #4]
+       mov lr, r3
+       add sp, sp, #8
+       pop {r2, r3}
+       RET
+       FUNC_END aeabi_uldivmod
+       
+#endif /* L_aeabi_uldivmod */
+
+#ifdef L_arm_addsubsf3
+
+FUNC_START aeabi_frsub
+
+      push     {r4, lr}
+      mov      r4, #1
+      lsl      r4, #31
+      eor      r0, r0, r4
+      bl       __aeabi_fadd
+      pop      {r4, pc}
+
+      FUNC_END aeabi_frsub
+
+#endif /* L_arm_addsubsf3 */
+
+#ifdef L_arm_cmpsf2
+
+FUNC_START aeabi_cfrcmple
+
+       mov     ip, r0
+       mov     r0, r1
+       mov     r1, ip
+       b       6f
+
+FUNC_START aeabi_cfcmpeq
+FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
+
+       @ The status-returning routines are required to preserve all
+       @ registers except ip, lr, and cpsr.
+6:     push    {r0, r1, r2, r3, r4, lr}
+       bl      __lesf2
+       @ Set the Z flag correctly, and the C flag unconditionally.
+       cmp     r0, #0
+       @ Clear the C flag if the return value was -1, indicating
+       @ that the first operand was smaller than the second.
+       bmi 1f
+       mov     r1, #0
+       cmn     r0, r1
+1:
+       pop     {r0, r1, r2, r3, r4, pc}
+
+       FUNC_END aeabi_cfcmple
+       FUNC_END aeabi_cfcmpeq
+       FUNC_END aeabi_cfrcmple
+
+FUNC_START     aeabi_fcmpeq
+
+       push    {r4, lr}
+       bl      __eqsf2
+       neg     r0, r0
+       add     r0, r0, #1
+       pop     {r4, pc}
+
+       FUNC_END aeabi_fcmpeq
+
+.macro COMPARISON cond, helper, mode=sf2
+FUNC_START     aeabi_fcmp\cond
+
+       push    {r4, lr}
+       bl      __\helper\mode
+       cmp     r0, #0
+       b\cond  1f
+       mov     r0, #0
+       pop     {r4, pc}
+1:
+       mov     r0, #1
+       pop     {r4, pc}
+
+       FUNC_END aeabi_fcmp\cond
+.endm
+
+COMPARISON lt, le
+COMPARISON le, le
+COMPARISON gt, ge
+COMPARISON ge, ge
+
+#endif /* L_arm_cmpsf2 */
+
+#ifdef L_arm_addsubdf3
+
+FUNC_START aeabi_drsub
+
+      push     {r4, lr}
+      mov      r4, #1
+      lsl      r4, #31
+      eor      xxh, xxh, r4
+      bl       __aeabi_dadd
+      pop      {r4, pc}
+
+      FUNC_END aeabi_drsub
+
+#endif /* L_arm_addsubdf3 */
+
+#ifdef L_arm_cmpdf2
+
+FUNC_START aeabi_cdrcmple
+
+       mov     ip, r0
+       mov     r0, r2
+       mov     r2, ip
+       mov     ip, r1
+       mov     r1, r3
+       mov     r3, ip
+       b       6f
+
+FUNC_START aeabi_cdcmpeq
+FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
+
+       @ The status-returning routines are required to preserve all
+       @ registers except ip, lr, and cpsr.
+6:     push    {r0, r1, r2, r3, r4, lr}
+       bl      __ledf2
+       @ Set the Z flag correctly, and the C flag unconditionally.
+       cmp     r0, #0
+       @ Clear the C flag if the return value was -1, indicating
+       @ that the first operand was smaller than the second.
+       bmi 1f
+       mov     r1, #0
+       cmn     r0, r1
+1:
+       pop     {r0, r1, r2, r3, r4, pc}
+
+       FUNC_END aeabi_cdcmple
+       FUNC_END aeabi_cdcmpeq
+       FUNC_END aeabi_cdrcmple
+
+FUNC_START     aeabi_dcmpeq
+
+       push    {r4, lr}
+       bl      __eqdf2
+       neg     r0, r0
+       add     r0, r0, #1
+       pop     {r4, pc}
+
+       FUNC_END aeabi_dcmpeq
+
+.macro COMPARISON cond, helper, mode=df2
+FUNC_START     aeabi_dcmp\cond
+
+       push    {r4, lr}
+       bl      __\helper\mode
+       cmp     r0, #0
+       b\cond  1f
+       mov     r0, #0
+       pop     {r4, pc}
+1:
+       mov     r0, #1
+       pop     {r4, pc}
+
+       FUNC_END aeabi_dcmp\cond
+.endm
+
+COMPARISON lt, le
+COMPARISON le, le
+COMPARISON gt, ge
+COMPARISON ge, ge
+
+#endif /* L_arm_cmpdf2 */
diff --git a/libgcc/config/arm/bpabi.S b/libgcc/config/arm/bpabi.S

new file mode 100644 (file)

index 0000000..2ff3389
--- /dev/null
+++ b/libgcc/config/arm/bpabi.S
@@ -0,0 +1,163 @@
+/* Miscellaneous BPABI functions.
+
+   Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef __ARM_EABI__
+/* Some attributes that are common to all routines in this file.  */
+       /* Tag_ABI_align_needed: This code does not require 8-byte
+          alignment from the caller.  */
+       /* .eabi_attribute 24, 0  -- default setting.  */
+       /* Tag_ABI_align_preserved: This code preserves 8-byte
+          alignment in any callee.  */
+       .eabi_attribute 25, 1
+#endif /* __ARM_EABI__ */
+
+#ifdef L_aeabi_lcmp
+
+ARM_FUNC_START aeabi_lcmp
+       cmp     xxh, yyh
+       do_it   lt
+       movlt   r0, #-1
+       do_it   gt
+       movgt   r0, #1
+       do_it   ne
+       RETc(ne)
+       subs    r0, xxl, yyl
+       do_it   lo
+       movlo   r0, #-1
+       do_it   hi
+       movhi   r0, #1
+       RET
+       FUNC_END aeabi_lcmp
+
+#endif /* L_aeabi_lcmp */
+       
+#ifdef L_aeabi_ulcmp
+
+ARM_FUNC_START aeabi_ulcmp
+       cmp     xxh, yyh
+       do_it   lo
+       movlo   r0, #-1
+       do_it   hi
+       movhi   r0, #1
+       do_it   ne
+       RETc(ne)
+       cmp     xxl, yyl
+       do_it   lo
+       movlo   r0, #-1
+       do_it   hi
+       movhi   r0, #1
+       do_it   eq
+       moveq   r0, #0
+       RET
+       FUNC_END aeabi_ulcmp
+
+#endif /* L_aeabi_ulcmp */
+
+.macro test_div_by_zero signed
+/* Tail-call to divide-by-zero handlers which may be overridden by the user,
+   so unwinding works properly.  */
+#if defined(__thumb2__)
+       cbnz    yyh, 1f
+       cbnz    yyl, 1f
+       cmp     xxh, #0
+       do_it   eq
+       cmpeq   xxl, #0
+       .ifc \signed, unsigned
+       beq     2f
+       mov     xxh, #0xffffffff
+       mov     xxl, xxh
+2:
+       .else
+       do_it   lt, t
+       movlt   xxl, #0
+       movlt   xxh, #0x80000000
+       do_it   gt, t
+       movgt   xxh, #0x7fffffff
+       movgt   xxl, #0xffffffff
+       .endif
+       b       SYM (__aeabi_ldiv0) __PLT__
+1:
+#else
+       /* Note: Thumb-1 code calls via an ARM shim on processors which
+          support ARM mode.  */
+       cmp     yyh, #0
+       cmpeq   yyl, #0
+       bne     2f
+       cmp     xxh, #0
+       cmpeq   xxl, #0
+       .ifc \signed, unsigned
+       movne   xxh, #0xffffffff
+       movne   xxl, #0xffffffff
+       .else
+       movlt   xxh, #0x80000000
+       movlt   xxl, #0
+       movgt   xxh, #0x7fffffff
+       movgt   xxl, #0xffffffff
+       .endif
+       b       SYM (__aeabi_ldiv0) __PLT__
+2:
+#endif
+.endm
+
+#ifdef L_aeabi_ldivmod
+
+ARM_FUNC_START aeabi_ldivmod
+       test_div_by_zero signed
+
+       sub sp, sp, #8
+#if defined(__thumb2__)
+       mov ip, sp
+       push {ip, lr}
+#else
+       do_push {sp, lr}
+#endif
+       bl SYM(__gnu_ldivmod_helper) __PLT__
+       ldr lr, [sp, #4]
+       add sp, sp, #8
+       do_pop {r2, r3}
+       RET
+       
+#endif /* L_aeabi_ldivmod */
+
+#ifdef L_aeabi_uldivmod
+
+ARM_FUNC_START aeabi_uldivmod
+       test_div_by_zero unsigned
+
+       sub sp, sp, #8
+#if defined(__thumb2__)
+       mov ip, sp
+       push {ip, lr}
+#else
+       do_push {sp, lr}
+#endif
+       bl SYM(__gnu_uldivmod_helper) __PLT__
+       ldr lr, [sp, #4]
+       add sp, sp, #8
+       do_pop {r2, r3}
+       RET
+       
+#endif /* L_aeabi_divmod */
+       
diff --git a/libgcc/config/arm/ieee754-df.S b/libgcc/config/arm/ieee754-df.S

new file mode 100644 (file)

index 0000000..eb0c386
--- /dev/null
+++ b/libgcc/config/arm/ieee754-df.S
@@ -0,0 +1,1447 @@
+/* ieee754-df.S double-precision floating point support for ARM
+
+   Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009  Free Software Foundation, Inc.
+   Contributed by Nicolas Pitre (nico@cam.org)
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/*
+ * Notes: 
+ * 
+ * The goal of this code is to be as fast as possible.  This is
+ * not meant to be easy to understand for the casual reader.
+ * For slightly simpler code please see the single precision version
+ * of this file.
+ * 
+ * Only the default rounding mode is intended for best performances.
+ * Exceptions aren't supported yet, but that can be added quite easily
+ * if necessary without impacting performances.
+ */
+
+
+@ For FPA, float words are always big-endian.
+@ For VFP, floats words follow the memory system mode.
+#if defined(__VFP_FP__) && !defined(__ARMEB__)
+#define xl r0
+#define xh r1
+#define yl r2
+#define yh r3
+#else
+#define xh r0
+#define xl r1
+#define yh r2
+#define yl r3
+#endif
+
+
+#ifdef L_arm_negdf2
+
+ARM_FUNC_START negdf2
+ARM_FUNC_ALIAS aeabi_dneg negdf2
+
+       @ flip sign bit
+       eor     xh, xh, #0x80000000
+       RET
+
+       FUNC_END aeabi_dneg
+       FUNC_END negdf2
+
+#endif
+
+#ifdef L_arm_addsubdf3
+
+ARM_FUNC_START aeabi_drsub
+
+       eor     xh, xh, #0x80000000     @ flip sign bit of first arg
+       b       1f      
+
+ARM_FUNC_START subdf3
+ARM_FUNC_ALIAS aeabi_dsub subdf3
+
+       eor     yh, yh, #0x80000000     @ flip sign bit of second arg
+#if defined(__INTERWORKING_STUBS__)
+       b       1f                      @ Skip Thumb-code prologue
+#endif
+
+ARM_FUNC_START adddf3
+ARM_FUNC_ALIAS aeabi_dadd adddf3
+
+1:     do_push {r4, r5, lr}
+
+       @ Look for zeroes, equal values, INF, or NAN.
+       shift1  lsl, r4, xh, #1
+       shift1  lsl, r5, yh, #1
+       teq     r4, r5
+       do_it   eq
+       teqeq   xl, yl
+       do_it   ne, ttt
+       COND(orr,s,ne)  ip, r4, xl
+       COND(orr,s,ne)  ip, r5, yl
+       COND(mvn,s,ne)  ip, r4, asr #21
+       COND(mvn,s,ne)  ip, r5, asr #21
+       beq     LSYM(Lad_s)
+
+       @ Compute exponent difference.  Make largest exponent in r4,
+       @ corresponding arg in xh-xl, and positive exponent difference in r5.
+       shift1  lsr, r4, r4, #21
+       rsbs    r5, r4, r5, lsr #21
+       do_it   lt
+       rsblt   r5, r5, #0
+       ble     1f
+       add     r4, r4, r5
+       eor     yl, xl, yl
+       eor     yh, xh, yh
+       eor     xl, yl, xl
+       eor     xh, yh, xh
+       eor     yl, xl, yl
+       eor     yh, xh, yh
+1:
+       @ If exponent difference is too large, return largest argument
+       @ already in xh-xl.  We need up to 54 bit to handle proper rounding
+       @ of 0x1p54 - 1.1.
+       cmp     r5, #54
+       do_it   hi
+       RETLDM  "r4, r5" hi
+
+       @ Convert mantissa to signed integer.
+       tst     xh, #0x80000000
+       mov     xh, xh, lsl #12
+       mov     ip, #0x00100000
+       orr     xh, ip, xh, lsr #12
+       beq     1f
+#if defined(__thumb2__)
+       negs    xl, xl
+       sbc     xh, xh, xh, lsl #1
+#else
+       rsbs    xl, xl, #0
+       rsc     xh, xh, #0
+#endif
+1:
+       tst     yh, #0x80000000
+       mov     yh, yh, lsl #12
+       orr     yh, ip, yh, lsr #12
+       beq     1f
+#if defined(__thumb2__)
+       negs    yl, yl
+       sbc     yh, yh, yh, lsl #1
+#else
+       rsbs    yl, yl, #0
+       rsc     yh, yh, #0
+#endif
+1:
+       @ If exponent == difference, one or both args were denormalized.
+       @ Since this is not common case, rescale them off line.
+       teq     r4, r5
+       beq     LSYM(Lad_d)
+LSYM(Lad_x):
+
+       @ Compensate for the exponent overlapping the mantissa MSB added later
+       sub     r4, r4, #1
+
+       @ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip.
+       rsbs    lr, r5, #32
+       blt     1f
+       shift1  lsl, ip, yl, lr
+       shiftop adds xl xl yl lsr r5 yl
+       adc     xh, xh, #0
+       shiftop adds xl xl yh lsl lr yl
+       shiftop adcs xh xh yh asr r5 yh
+       b       2f
+1:     sub     r5, r5, #32
+       add     lr, lr, #32
+       cmp     yl, #1
+       shift1  lsl,ip, yh, lr
+       do_it   cs
+       orrcs   ip, ip, #2              @ 2 not 1, to allow lsr #1 later
+       shiftop adds xl xl yh asr r5 yh
+       adcs    xh, xh, yh, asr #31
+2:
+       @ We now have a result in xh-xl-ip.
+       @ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above)
+       and     r5, xh, #0x80000000
+       bpl     LSYM(Lad_p)
+#if defined(__thumb2__)
+       mov     lr, #0
+       negs    ip, ip
+       sbcs    xl, lr, xl
+       sbc     xh, lr, xh
+#else
+       rsbs    ip, ip, #0
+       rscs    xl, xl, #0
+       rsc     xh, xh, #0
+#endif
+
+       @ Determine how to normalize the result.
+LSYM(Lad_p):
+       cmp     xh, #0x00100000
+       bcc     LSYM(Lad_a)
+       cmp     xh, #0x00200000
+       bcc     LSYM(Lad_e)
+
+       @ Result needs to be shifted right.
+       movs    xh, xh, lsr #1
+       movs    xl, xl, rrx
+       mov     ip, ip, rrx
+       add     r4, r4, #1
+
+       @ Make sure we did not bust our exponent.
+       mov     r2, r4, lsl #21
+       cmn     r2, #(2 << 21)
+       bcs     LSYM(Lad_o)
+
+       @ Our result is now properly aligned into xh-xl, remaining bits in ip.
+       @ Round with MSB of ip. If halfway between two numbers, round towards
+       @ LSB of xl = 0.
+       @ Pack final result together.
+LSYM(Lad_e):
+       cmp     ip, #0x80000000
+       do_it   eq
+       COND(mov,s,eq)  ip, xl, lsr #1
+       adcs    xl, xl, #0
+       adc     xh, xh, r4, lsl #20
+       orr     xh, xh, r5
+       RETLDM  "r4, r5"
+
+       @ Result must be shifted left and exponent adjusted.
+LSYM(Lad_a):
+       movs    ip, ip, lsl #1
+       adcs    xl, xl, xl
+       adc     xh, xh, xh
+       tst     xh, #0x00100000
+       sub     r4, r4, #1
+       bne     LSYM(Lad_e)
+
+       @ No rounding necessary since ip will always be 0 at this point.
+LSYM(Lad_l):
+
+#if __ARM_ARCH__ < 5
+
+       teq     xh, #0
+       movne   r3, #20
+       moveq   r3, #52
+       moveq   xh, xl
+       moveq   xl, #0
+       mov     r2, xh
+       cmp     r2, #(1 << 16)
+       movhs   r2, r2, lsr #16
+       subhs   r3, r3, #16
+       cmp     r2, #(1 << 8)
+       movhs   r2, r2, lsr #8
+       subhs   r3, r3, #8
+       cmp     r2, #(1 << 4)
+       movhs   r2, r2, lsr #4
+       subhs   r3, r3, #4
+       cmp     r2, #(1 << 2)
+       subhs   r3, r3, #2
+       sublo   r3, r3, r2, lsr #1
+       sub     r3, r3, r2, lsr #3
+
+#else
+
+       teq     xh, #0
+       do_it   eq, t
+       moveq   xh, xl
+       moveq   xl, #0
+       clz     r3, xh
+       do_it   eq
+       addeq   r3, r3, #32
+       sub     r3, r3, #11
+
+#endif
+
+       @ determine how to shift the value.
+       subs    r2, r3, #32
+       bge     2f
+       adds    r2, r2, #12
+       ble     1f
+
+       @ shift value left 21 to 31 bits, or actually right 11 to 1 bits
+       @ since a register switch happened above.
+       add     ip, r2, #20
+       rsb     r2, r2, #12
+       shift1  lsl, xl, xh, ip
+       shift1  lsr, xh, xh, r2
+       b       3f
+
+       @ actually shift value left 1 to 20 bits, which might also represent
+       @ 32 to 52 bits if counting the register switch that happened earlier.
+1:     add     r2, r2, #20
+2:     do_it   le
+       rsble   ip, r2, #32
+       shift1  lsl, xh, xh, r2
+#if defined(__thumb2__)
+       lsr     ip, xl, ip
+       itt     le
+       orrle   xh, xh, ip
+       lslle   xl, xl, r2
+#else
+       orrle   xh, xh, xl, lsr ip
+       movle   xl, xl, lsl r2
+#endif
+
+       @ adjust exponent accordingly.
+3:     subs    r4, r4, r3
+       do_it   ge, tt
+       addge   xh, xh, r4, lsl #20
+       orrge   xh, xh, r5
+       RETLDM  "r4, r5" ge
+
+       @ Exponent too small, denormalize result.
+       @ Find out proper shift value.
+       mvn     r4, r4
+       subs    r4, r4, #31
+       bge     2f
+       adds    r4, r4, #12
+       bgt     1f
+
+       @ shift result right of 1 to 20 bits, sign is in r5.
+       add     r4, r4, #20
+       rsb     r2, r4, #32
+       shift1  lsr, xl, xl, r4
+       shiftop orr xl xl xh lsl r2 yh
+       shiftop orr xh r5 xh lsr r4 yh
+       RETLDM  "r4, r5"
+
+       @ shift result right of 21 to 31 bits, or left 11 to 1 bits after
+       @ a register switch from xh to xl.
+1:     rsb     r4, r4, #12
+       rsb     r2, r4, #32
+       shift1  lsr, xl, xl, r2
+       shiftop orr xl xl xh lsl r4 yh
+       mov     xh, r5
+       RETLDM  "r4, r5"
+
+       @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
+       @ from xh to xl.
+2:     shift1  lsr, xl, xh, r4
+       mov     xh, r5
+       RETLDM  "r4, r5"
+
+       @ Adjust exponents for denormalized arguments.
+       @ Note that r4 must not remain equal to 0.
+LSYM(Lad_d):
+       teq     r4, #0
+       eor     yh, yh, #0x00100000
+       do_it   eq, te
+       eoreq   xh, xh, #0x00100000
+       addeq   r4, r4, #1
+       subne   r5, r5, #1
+       b       LSYM(Lad_x)
+
+
+LSYM(Lad_s):
+       mvns    ip, r4, asr #21
+       do_it   ne
+       COND(mvn,s,ne)  ip, r5, asr #21
+       beq     LSYM(Lad_i)
+
+       teq     r4, r5
+       do_it   eq
+       teqeq   xl, yl
+       beq     1f
+
+       @ Result is x + 0.0 = x or 0.0 + y = y.
+       orrs    ip, r4, xl
+       do_it   eq, t
+       moveq   xh, yh
+       moveq   xl, yl
+       RETLDM  "r4, r5"
+
+1:     teq     xh, yh
+
+       @ Result is x - x = 0.
+       do_it   ne, tt
+       movne   xh, #0
+       movne   xl, #0
+       RETLDM  "r4, r5" ne
+
+       @ Result is x + x = 2x.
+       movs    ip, r4, lsr #21
+       bne     2f
+       movs    xl, xl, lsl #1
+       adcs    xh, xh, xh
+       do_it   cs
+       orrcs   xh, xh, #0x80000000
+       RETLDM  "r4, r5"
+2:     adds    r4, r4, #(2 << 21)
+       do_it   cc, t
+       addcc   xh, xh, #(1 << 20)
+       RETLDM  "r4, r5" cc
+       and     r5, xh, #0x80000000
+
+       @ Overflow: return INF.
+LSYM(Lad_o):
+       orr     xh, r5, #0x7f000000
+       orr     xh, xh, #0x00f00000
+       mov     xl, #0
+       RETLDM  "r4, r5"
+
+       @ At least one of x or y is INF/NAN.
+       @   if xh-xl != INF/NAN: return yh-yl (which is INF/NAN)
+       @   if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
+       @   if either is NAN: return NAN
+       @   if opposite sign: return NAN
+       @   otherwise return xh-xl (which is INF or -INF)
+LSYM(Lad_i):
+       mvns    ip, r4, asr #21
+       do_it   ne, te
+       movne   xh, yh
+       movne   xl, yl
+       COND(mvn,s,eq)  ip, r5, asr #21
+       do_it   ne, t
+       movne   yh, xh
+       movne   yl, xl
+       orrs    r4, xl, xh, lsl #12
+       do_it   eq, te
+       COND(orr,s,eq)  r5, yl, yh, lsl #12
+       teqeq   xh, yh
+       orrne   xh, xh, #0x00080000     @ quiet NAN
+       RETLDM  "r4, r5"
+
+       FUNC_END aeabi_dsub
+       FUNC_END subdf3
+       FUNC_END aeabi_dadd
+       FUNC_END adddf3
+
+ARM_FUNC_START floatunsidf
+ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
+
+       teq     r0, #0
+       do_it   eq, t
+       moveq   r1, #0
+       RETc(eq)
+       do_push {r4, r5, lr}
+       mov     r4, #0x400              @ initial exponent
+       add     r4, r4, #(52-1 - 1)
+       mov     r5, #0                  @ sign bit is 0
+       .ifnc   xl, r0
+       mov     xl, r0
+       .endif
+       mov     xh, #0
+       b       LSYM(Lad_l)
+
+       FUNC_END aeabi_ui2d
+       FUNC_END floatunsidf
+
+ARM_FUNC_START floatsidf
+ARM_FUNC_ALIAS aeabi_i2d floatsidf
+
+       teq     r0, #0
+       do_it   eq, t
+       moveq   r1, #0
+       RETc(eq)
+       do_push {r4, r5, lr}
+       mov     r4, #0x400              @ initial exponent
+       add     r4, r4, #(52-1 - 1)
+       ands    r5, r0, #0x80000000     @ sign bit in r5
+       do_it   mi
+       rsbmi   r0, r0, #0              @ absolute value
+       .ifnc   xl, r0
+       mov     xl, r0
+       .endif
+       mov     xh, #0
+       b       LSYM(Lad_l)
+
+       FUNC_END aeabi_i2d
+       FUNC_END floatsidf
+
+ARM_FUNC_START extendsfdf2
+ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
+
+       movs    r2, r0, lsl #1          @ toss sign bit
+       mov     xh, r2, asr #3          @ stretch exponent
+       mov     xh, xh, rrx             @ retrieve sign bit
+       mov     xl, r2, lsl #28         @ retrieve remaining bits
+       do_it   ne, ttt
+       COND(and,s,ne)  r3, r2, #0xff000000     @ isolate exponent
+       teqne   r3, #0xff000000         @ if not 0, check if INF or NAN
+       eorne   xh, xh, #0x38000000     @ fixup exponent otherwise.
+       RETc(ne)                        @ and return it.
+
+       teq     r2, #0                  @ if actually 0
+       do_it   ne, e
+       teqne   r3, #0xff000000         @ or INF or NAN
+       RETc(eq)                        @ we are done already.
+
+       @ value was denormalized.  We can normalize it now.
+       do_push {r4, r5, lr}
+       mov     r4, #0x380              @ setup corresponding exponent
+       and     r5, xh, #0x80000000     @ move sign bit in r5
+       bic     xh, xh, #0x80000000
+       b       LSYM(Lad_l)
+
+       FUNC_END aeabi_f2d
+       FUNC_END extendsfdf2
+
+ARM_FUNC_START floatundidf
+ARM_FUNC_ALIAS aeabi_ul2d floatundidf
+
+       orrs    r2, r0, r1
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+       do_it   eq, t
+       mvfeqd  f0, #0.0
+#else
+       do_it   eq
+#endif
+       RETc(eq)
+
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+       @ For hard FPA code we want to return via the tail below so that
+       @ we can return the result in f0 as well as in r0/r1 for backwards
+       @ compatibility.
+       adr     ip, LSYM(f0_ret)
+       @ Push pc as well so that RETLDM works correctly.
+       do_push {r4, r5, ip, lr, pc}
+#else
+       do_push {r4, r5, lr}
+#endif
+
+       mov     r5, #0
+       b       2f
+
+ARM_FUNC_START floatdidf
+ARM_FUNC_ALIAS aeabi_l2d floatdidf
+
+       orrs    r2, r0, r1
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+       do_it   eq, t
+       mvfeqd  f0, #0.0
+#else
+       do_it   eq
+#endif
+       RETc(eq)
+
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+       @ For hard FPA code we want to return via the tail below so that
+       @ we can return the result in f0 as well as in r0/r1 for backwards
+       @ compatibility.
+       adr     ip, LSYM(f0_ret)
+       @ Push pc as well so that RETLDM works correctly.
+       do_push {r4, r5, ip, lr, pc}
+#else
+       do_push {r4, r5, lr}
+#endif
+
+       ands    r5, ah, #0x80000000     @ sign bit in r5
+       bpl     2f
+#if defined(__thumb2__)
+       negs    al, al
+       sbc     ah, ah, ah, lsl #1
+#else
+       rsbs    al, al, #0
+       rsc     ah, ah, #0
+#endif
+2:
+       mov     r4, #0x400              @ initial exponent
+       add     r4, r4, #(52-1 - 1)
+
+       @ FPA little-endian: must swap the word order.
+       .ifnc   xh, ah
+       mov     ip, al
+       mov     xh, ah
+       mov     xl, ip
+       .endif
+
+       movs    ip, xh, lsr #22
+       beq     LSYM(Lad_p)
+
+       @ The value is too big.  Scale it down a bit...
+       mov     r2, #3
+       movs    ip, ip, lsr #3
+       do_it   ne
+       addne   r2, r2, #3
+       movs    ip, ip, lsr #3
+       do_it   ne
+       addne   r2, r2, #3
+       add     r2, r2, ip, lsr #3
+
+       rsb     r3, r2, #32
+       shift1  lsl, ip, xl, r3
+       shift1  lsr, xl, xl, r2
+       shiftop orr xl xl xh lsl r3 lr
+       shift1  lsr, xh, xh, r2
+       add     r4, r4, r2
+       b       LSYM(Lad_p)
+
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+
+       @ Legacy code expects the result to be returned in f0.  Copy it
+       @ there as well.
+LSYM(f0_ret):
+       do_push {r0, r1}
+       ldfd    f0, [sp], #8
+       RETLDM
+
+#endif
+
+       FUNC_END floatdidf
+       FUNC_END aeabi_l2d
+       FUNC_END floatundidf
+       FUNC_END aeabi_ul2d
+
+#endif /* L_addsubdf3 */
+
+#ifdef L_arm_muldivdf3
+
+ARM_FUNC_START muldf3
+ARM_FUNC_ALIAS aeabi_dmul muldf3
+       do_push {r4, r5, r6, lr}
+
+       @ Mask out exponents, trap any zero/denormal/INF/NAN.
+       mov     ip, #0xff
+       orr     ip, ip, #0x700
+       ands    r4, ip, xh, lsr #20
+       do_it   ne, tte
+       COND(and,s,ne)  r5, ip, yh, lsr #20
+       teqne   r4, ip
+       teqne   r5, ip
+       bleq    LSYM(Lml_s)
+
+       @ Add exponents together
+       add     r4, r4, r5
+
+       @ Determine final sign.
+       eor     r6, xh, yh
+
+       @ Convert mantissa to unsigned integer.
+       @ If power of two, branch to a separate path.
+       bic     xh, xh, ip, lsl #21
+       bic     yh, yh, ip, lsl #21
+       orrs    r5, xl, xh, lsl #12
+       do_it   ne
+       COND(orr,s,ne)  r5, yl, yh, lsl #12
+       orr     xh, xh, #0x00100000
+       orr     yh, yh, #0x00100000
+       beq     LSYM(Lml_1)
+
+#if __ARM_ARCH__ < 4
+
+       @ Put sign bit in r6, which will be restored in yl later.
+       and   r6, r6, #0x80000000
+
+       @ Well, no way to make it shorter without the umull instruction.
+       stmfd   sp!, {r6, r7, r8, r9, sl, fp}
+       mov     r7, xl, lsr #16
+       mov     r8, yl, lsr #16
+       mov     r9, xh, lsr #16
+       mov     sl, yh, lsr #16
+       bic     xl, xl, r7, lsl #16
+       bic     yl, yl, r8, lsl #16
+       bic     xh, xh, r9, lsl #16
+       bic     yh, yh, sl, lsl #16
+       mul     ip, xl, yl
+       mul     fp, xl, r8
+       mov     lr, #0
+       adds    ip, ip, fp, lsl #16
+       adc     lr, lr, fp, lsr #16
+       mul     fp, r7, yl
+       adds    ip, ip, fp, lsl #16
+       adc     lr, lr, fp, lsr #16
+       mul     fp, xl, sl
+       mov     r5, #0
+       adds    lr, lr, fp, lsl #16
+       adc     r5, r5, fp, lsr #16
+       mul     fp, r7, yh
+       adds    lr, lr, fp, lsl #16
+       adc     r5, r5, fp, lsr #16
+       mul     fp, xh, r8
+       adds    lr, lr, fp, lsl #16
+       adc     r5, r5, fp, lsr #16
+       mul     fp, r9, yl
+       adds    lr, lr, fp, lsl #16
+       adc     r5, r5, fp, lsr #16
+       mul     fp, xh, sl
+       mul     r6, r9, sl
+       adds    r5, r5, fp, lsl #16
+       adc     r6, r6, fp, lsr #16
+       mul     fp, r9, yh
+       adds    r5, r5, fp, lsl #16
+       adc     r6, r6, fp, lsr #16
+       mul     fp, xl, yh
+       adds    lr, lr, fp
+       mul     fp, r7, sl
+       adcs    r5, r5, fp
+       mul     fp, xh, yl
+       adc     r6, r6, #0
+       adds    lr, lr, fp
+       mul     fp, r9, r8
+       adcs    r5, r5, fp
+       mul     fp, r7, r8
+       adc     r6, r6, #0
+       adds    lr, lr, fp
+       mul     fp, xh, yh
+       adcs    r5, r5, fp
+       adc     r6, r6, #0
+       ldmfd   sp!, {yl, r7, r8, r9, sl, fp}
+
+#else
+
+       @ Here is the actual multiplication.
+       umull   ip, lr, xl, yl
+       mov     r5, #0
+       umlal   lr, r5, xh, yl
+       and     yl, r6, #0x80000000
+       umlal   lr, r5, xl, yh
+       mov     r6, #0
+       umlal   r5, r6, xh, yh
+
+#endif
+
+       @ The LSBs in ip are only significant for the final rounding.
+       @ Fold them into lr.
+       teq     ip, #0
+       do_it   ne
+       orrne   lr, lr, #1
+
+       @ Adjust result upon the MSB position.
+       sub     r4, r4, #0xff
+       cmp     r6, #(1 << (20-11))
+       sbc     r4, r4, #0x300
+       bcs     1f
+       movs    lr, lr, lsl #1
+       adcs    r5, r5, r5
+       adc     r6, r6, r6
+1:
+       @ Shift to final position, add sign to result.
+       orr     xh, yl, r6, lsl #11
+       orr     xh, xh, r5, lsr #21
+       mov     xl, r5, lsl #11
+       orr     xl, xl, lr, lsr #21
+       mov     lr, lr, lsl #11
+
+       @ Check exponent range for under/overflow.
+       subs    ip, r4, #(254 - 1)
+       do_it   hi
+       cmphi   ip, #0x700
+       bhi     LSYM(Lml_u)
+
+       @ Round the result, merge final exponent.
+       cmp     lr, #0x80000000
+       do_it   eq
+       COND(mov,s,eq)  lr, xl, lsr #1
+       adcs    xl, xl, #0
+       adc     xh, xh, r4, lsl #20
+       RETLDM  "r4, r5, r6"
+
+       @ Multiplication by 0x1p*: let''s shortcut a lot of code.
+LSYM(Lml_1):
+       and     r6, r6, #0x80000000
+       orr     xh, r6, xh
+       orr     xl, xl, yl
+       eor     xh, xh, yh
+       subs    r4, r4, ip, lsr #1
+       do_it   gt, tt
+       COND(rsb,s,gt)  r5, r4, ip
+       orrgt   xh, xh, r4, lsl #20
+       RETLDM  "r4, r5, r6" gt
+
+       @ Under/overflow: fix things up for the code below.
+       orr     xh, xh, #0x00100000
+       mov     lr, #0
+       subs    r4, r4, #1
+
+LSYM(Lml_u):
+       @ Overflow?
+       bgt     LSYM(Lml_o)
+
+       @ Check if denormalized result is possible, otherwise return signed 0.
+       cmn     r4, #(53 + 1)
+       do_it   le, tt
+       movle   xl, #0
+       bicle   xh, xh, #0x7fffffff
+       RETLDM  "r4, r5, r6" le
+
+       @ Find out proper shift value.
+       rsb     r4, r4, #0
+       subs    r4, r4, #32
+       bge     2f
+       adds    r4, r4, #12
+       bgt     1f
+
+       @ shift result right of 1 to 20 bits, preserve sign bit, round, etc.
+       add     r4, r4, #20
+       rsb     r5, r4, #32
+       shift1  lsl, r3, xl, r5
+       shift1  lsr, xl, xl, r4
+       shiftop orr xl xl xh lsl r5 r2
+       and     r2, xh, #0x80000000
+       bic     xh, xh, #0x80000000
+       adds    xl, xl, r3, lsr #31
+       shiftop adc xh r2 xh lsr r4 r6
+       orrs    lr, lr, r3, lsl #1
+       do_it   eq
+       biceq   xl, xl, r3, lsr #31
+       RETLDM  "r4, r5, r6"
+
+       @ shift result right of 21 to 31 bits, or left 11 to 1 bits after
+       @ a register switch from xh to xl. Then round.
+1:     rsb     r4, r4, #12
+       rsb     r5, r4, #32
+       shift1  lsl, r3, xl, r4
+       shift1  lsr, xl, xl, r5
+       shiftop orr xl xl xh lsl r4 r2
+       bic     xh, xh, #0x7fffffff
+       adds    xl, xl, r3, lsr #31
+       adc     xh, xh, #0
+       orrs    lr, lr, r3, lsl #1
+       do_it   eq
+       biceq   xl, xl, r3, lsr #31
+       RETLDM  "r4, r5, r6"
+
+       @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
+       @ from xh to xl.  Leftover bits are in r3-r6-lr for rounding.
+2:     rsb     r5, r4, #32
+       shiftop orr lr lr xl lsl r5 r2
+       shift1  lsr, r3, xl, r4
+       shiftop orr r3 r3 xh lsl r5 r2
+       shift1  lsr, xl, xh, r4
+       bic     xh, xh, #0x7fffffff
+       shiftop bic xl xl xh lsr r4 r2
+       add     xl, xl, r3, lsr #31
+       orrs    lr, lr, r3, lsl #1
+       do_it   eq
+       biceq   xl, xl, r3, lsr #31
+       RETLDM  "r4, r5, r6"
+
+       @ One or both arguments are denormalized.
+       @ Scale them leftwards and preserve sign bit.
+LSYM(Lml_d):
+       teq     r4, #0
+       bne     2f
+       and     r6, xh, #0x80000000
+1:     movs    xl, xl, lsl #1
+       adc     xh, xh, xh
+       tst     xh, #0x00100000
+       do_it   eq
+       subeq   r4, r4, #1
+       beq     1b
+       orr     xh, xh, r6
+       teq     r5, #0
+       do_it   ne
+       RETc(ne)
+2:     and     r6, yh, #0x80000000
+3:     movs    yl, yl, lsl #1
+       adc     yh, yh, yh
+       tst     yh, #0x00100000
+       do_it   eq
+       subeq   r5, r5, #1
+       beq     3b
+       orr     yh, yh, r6
+       RET
+
+LSYM(Lml_s):
+       @ Isolate the INF and NAN cases away
+       teq     r4, ip
+       and     r5, ip, yh, lsr #20
+       do_it   ne
+       teqne   r5, ip
+       beq     1f
+
+       @ Here, one or more arguments are either denormalized or zero.
+       orrs    r6, xl, xh, lsl #1
+       do_it   ne
+       COND(orr,s,ne)  r6, yl, yh, lsl #1
+       bne     LSYM(Lml_d)
+
+       @ Result is 0, but determine sign anyway.
+LSYM(Lml_z):
+       eor     xh, xh, yh
+       and     xh, xh, #0x80000000
+       mov     xl, #0
+       RETLDM  "r4, r5, r6"
+
+1:     @ One or both args are INF or NAN.
+       orrs    r6, xl, xh, lsl #1
+       do_it   eq, te
+       moveq   xl, yl
+       moveq   xh, yh
+       COND(orr,s,ne)  r6, yl, yh, lsl #1
+       beq     LSYM(Lml_n)             @ 0 * INF or INF * 0 -> NAN
+       teq     r4, ip
+       bne     1f
+       orrs    r6, xl, xh, lsl #12
+       bne     LSYM(Lml_n)             @ NAN * <anything> -> NAN
+1:     teq     r5, ip
+       bne     LSYM(Lml_i)
+       orrs    r6, yl, yh, lsl #12
+       do_it   ne, t
+       movne   xl, yl
+       movne   xh, yh
+       bne     LSYM(Lml_n)             @ <anything> * NAN -> NAN
+
+       @ Result is INF, but we need to determine its sign.
+LSYM(Lml_i):
+       eor     xh, xh, yh
+
+       @ Overflow: return INF (sign already in xh).
+LSYM(Lml_o):
+       and     xh, xh, #0x80000000
+       orr     xh, xh, #0x7f000000
+       orr     xh, xh, #0x00f00000
+       mov     xl, #0
+       RETLDM  "r4, r5, r6"
+
+       @ Return a quiet NAN.
+LSYM(Lml_n):
+       orr     xh, xh, #0x7f000000
+       orr     xh, xh, #0x00f80000
+       RETLDM  "r4, r5, r6"
+
+       FUNC_END aeabi_dmul
+       FUNC_END muldf3
+
+ARM_FUNC_START divdf3
+ARM_FUNC_ALIAS aeabi_ddiv divdf3
+       
+       do_push {r4, r5, r6, lr}
+
+       @ Mask out exponents, trap any zero/denormal/INF/NAN.
+       mov     ip, #0xff
+       orr     ip, ip, #0x700
+       ands    r4, ip, xh, lsr #20
+       do_it   ne, tte
+       COND(and,s,ne)  r5, ip, yh, lsr #20
+       teqne   r4, ip
+       teqne   r5, ip
+       bleq    LSYM(Ldv_s)
+
+       @ Substract divisor exponent from dividend''s.
+       sub     r4, r4, r5
+
+       @ Preserve final sign into lr.
+       eor     lr, xh, yh
+
+       @ Convert mantissa to unsigned integer.
+       @ Dividend -> r5-r6, divisor -> yh-yl.
+       orrs    r5, yl, yh, lsl #12
+       mov     xh, xh, lsl #12
+       beq     LSYM(Ldv_1)
+       mov     yh, yh, lsl #12
+       mov     r5, #0x10000000
+       orr     yh, r5, yh, lsr #4
+       orr     yh, yh, yl, lsr #24
+       mov     yl, yl, lsl #8
+       orr     r5, r5, xh, lsr #4
+       orr     r5, r5, xl, lsr #24
+       mov     r6, xl, lsl #8
+
+       @ Initialize xh with final sign bit.
+       and     xh, lr, #0x80000000
+
+       @ Ensure result will land to known bit position.
+       @ Apply exponent bias accordingly.
+       cmp     r5, yh
+       do_it   eq
+       cmpeq   r6, yl
+       adc     r4, r4, #(255 - 2)
+       add     r4, r4, #0x300
+       bcs     1f
+       movs    yh, yh, lsr #1
+       mov     yl, yl, rrx
+1:
+       @ Perform first substraction to align result to a nibble.
+       subs    r6, r6, yl
+       sbc     r5, r5, yh
+       movs    yh, yh, lsr #1
+       mov     yl, yl, rrx
+       mov     xl, #0x00100000
+       mov     ip, #0x00080000
+
+       @ The actual division loop.
+1:     subs    lr, r6, yl
+       sbcs    lr, r5, yh
+       do_it   cs, tt
+       subcs   r6, r6, yl
+       movcs   r5, lr
+       orrcs   xl, xl, ip
+       movs    yh, yh, lsr #1
+       mov     yl, yl, rrx
+       subs    lr, r6, yl
+       sbcs    lr, r5, yh
+       do_it   cs, tt
+       subcs   r6, r6, yl
+       movcs   r5, lr
+       orrcs   xl, xl, ip, lsr #1
+       movs    yh, yh, lsr #1
+       mov     yl, yl, rrx
+       subs    lr, r6, yl
+       sbcs    lr, r5, yh
+       do_it   cs, tt
+       subcs   r6, r6, yl
+       movcs   r5, lr
+       orrcs   xl, xl, ip, lsr #2
+       movs    yh, yh, lsr #1
+       mov     yl, yl, rrx
+       subs    lr, r6, yl
+       sbcs    lr, r5, yh
+       do_it   cs, tt
+       subcs   r6, r6, yl
+       movcs   r5, lr
+       orrcs   xl, xl, ip, lsr #3
+
+       orrs    lr, r5, r6
+       beq     2f
+       mov     r5, r5, lsl #4
+       orr     r5, r5, r6, lsr #28
+       mov     r6, r6, lsl #4
+       mov     yh, yh, lsl #3
+       orr     yh, yh, yl, lsr #29
+       mov     yl, yl, lsl #3
+       movs    ip, ip, lsr #4
+       bne     1b
+
+       @ We are done with a word of the result.
+       @ Loop again for the low word if this pass was for the high word.
+       tst     xh, #0x00100000
+       bne     3f
+       orr     xh, xh, xl
+       mov     xl, #0
+       mov     ip, #0x80000000
+       b       1b
+2:
+       @ Be sure result starts in the high word.
+       tst     xh, #0x00100000
+       do_it   eq, t
+       orreq   xh, xh, xl
+       moveq   xl, #0
+3:
+       @ Check exponent range for under/overflow.
+       subs    ip, r4, #(254 - 1)
+       do_it   hi
+       cmphi   ip, #0x700
+       bhi     LSYM(Lml_u)
+
+       @ Round the result, merge final exponent.
+       subs    ip, r5, yh
+       do_it   eq, t
+       COND(sub,s,eq)  ip, r6, yl
+       COND(mov,s,eq)  ip, xl, lsr #1
+       adcs    xl, xl, #0
+       adc     xh, xh, r4, lsl #20
+       RETLDM  "r4, r5, r6"
+
+       @ Division by 0x1p*: shortcut a lot of code.
+LSYM(Ldv_1):
+       and     lr, lr, #0x80000000
+       orr     xh, lr, xh, lsr #12
+       adds    r4, r4, ip, lsr #1
+       do_it   gt, tt
+       COND(rsb,s,gt)  r5, r4, ip
+       orrgt   xh, xh, r4, lsl #20
+       RETLDM  "r4, r5, r6" gt
+
+       orr     xh, xh, #0x00100000
+       mov     lr, #0
+       subs    r4, r4, #1
+       b       LSYM(Lml_u)
+
+       @ Result mightt need to be denormalized: put remainder bits
+       @ in lr for rounding considerations.
+LSYM(Ldv_u):
+       orr     lr, r5, r6
+       b       LSYM(Lml_u)
+
+       @ One or both arguments is either INF, NAN or zero.
+LSYM(Ldv_s):
+       and     r5, ip, yh, lsr #20
+       teq     r4, ip
+       do_it   eq
+       teqeq   r5, ip
+       beq     LSYM(Lml_n)             @ INF/NAN / INF/NAN -> NAN
+       teq     r4, ip
+       bne     1f
+       orrs    r4, xl, xh, lsl #12
+       bne     LSYM(Lml_n)             @ NAN / <anything> -> NAN
+       teq     r5, ip
+       bne     LSYM(Lml_i)             @ INF / <anything> -> INF
+       mov     xl, yl
+       mov     xh, yh
+       b       LSYM(Lml_n)             @ INF / (INF or NAN) -> NAN
+1:     teq     r5, ip
+       bne     2f
+       orrs    r5, yl, yh, lsl #12
+       beq     LSYM(Lml_z)             @ <anything> / INF -> 0
+       mov     xl, yl
+       mov     xh, yh
+       b       LSYM(Lml_n)             @ <anything> / NAN -> NAN
+2:     @ If both are nonzero, we need to normalize and resume above.
+       orrs    r6, xl, xh, lsl #1
+       do_it   ne
+       COND(orr,s,ne)  r6, yl, yh, lsl #1
+       bne     LSYM(Lml_d)
+       @ One or both arguments are 0.
+       orrs    r4, xl, xh, lsl #1
+       bne     LSYM(Lml_i)             @ <non_zero> / 0 -> INF
+       orrs    r5, yl, yh, lsl #1
+       bne     LSYM(Lml_z)             @ 0 / <non_zero> -> 0
+       b       LSYM(Lml_n)             @ 0 / 0 -> NAN
+
+       FUNC_END aeabi_ddiv
+       FUNC_END divdf3
+
+#endif /* L_muldivdf3 */
+
+#ifdef L_arm_cmpdf2
+
+@ Note: only r0 (return value) and ip are clobbered here.
+
+ARM_FUNC_START gtdf2
+ARM_FUNC_ALIAS gedf2 gtdf2
+       mov     ip, #-1
+       b       1f
+
+ARM_FUNC_START ltdf2
+ARM_FUNC_ALIAS ledf2 ltdf2
+       mov     ip, #1
+       b       1f
+
+ARM_FUNC_START cmpdf2
+ARM_FUNC_ALIAS nedf2 cmpdf2
+ARM_FUNC_ALIAS eqdf2 cmpdf2
+       mov     ip, #1                  @ how should we specify unordered here?
+
+1:     str     ip, [sp, #-4]!
+
+       @ Trap any INF/NAN first.
+       mov     ip, xh, lsl #1
+       mvns    ip, ip, asr #21
+       mov     ip, yh, lsl #1
+       do_it   ne
+       COND(mvn,s,ne)  ip, ip, asr #21
+       beq     3f
+
+       @ Test for equality.
+       @ Note that 0.0 is equal to -0.0.
+2:     add     sp, sp, #4
+       orrs    ip, xl, xh, lsl #1      @ if x == 0.0 or -0.0
+       do_it   eq, e
+       COND(orr,s,eq)  ip, yl, yh, lsl #1      @ and y == 0.0 or -0.0
+       teqne   xh, yh                  @ or xh == yh
+       do_it   eq, tt
+       teqeq   xl, yl                  @ and xl == yl
+       moveq   r0, #0                  @ then equal.
+       RETc(eq)
+
+       @ Clear C flag
+       cmn     r0, #0
+
+       @ Compare sign, 
+       teq     xh, yh
+
+       @ Compare values if same sign
+       do_it   pl
+       cmppl   xh, yh
+       do_it   eq
+       cmpeq   xl, yl
+
+       @ Result:
+       do_it   cs, e
+       movcs   r0, yh, asr #31
+       mvncc   r0, yh, asr #31
+       orr     r0, r0, #1
+       RET
+
+       @ Look for a NAN.
+3:     mov     ip, xh, lsl #1
+       mvns    ip, ip, asr #21
+       bne     4f
+       orrs    ip, xl, xh, lsl #12
+       bne     5f                      @ x is NAN
+4:     mov     ip, yh, lsl #1
+       mvns    ip, ip, asr #21
+       bne     2b
+       orrs    ip, yl, yh, lsl #12
+       beq     2b                      @ y is not NAN
+5:     ldr     r0, [sp], #4            @ unordered return code
+       RET
+
+       FUNC_END gedf2
+       FUNC_END gtdf2
+       FUNC_END ledf2
+       FUNC_END ltdf2
+       FUNC_END nedf2
+       FUNC_END eqdf2
+       FUNC_END cmpdf2
+
+ARM_FUNC_START aeabi_cdrcmple
+
+       mov     ip, r0
+       mov     r0, r2
+       mov     r2, ip
+       mov     ip, r1
+       mov     r1, r3
+       mov     r3, ip
+       b       6f
+       
+ARM_FUNC_START aeabi_cdcmpeq
+ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
+
+       @ The status-returning routines are required to preserve all
+       @ registers except ip, lr, and cpsr.
+6:     do_push {r0, lr}
+       ARM_CALL cmpdf2
+       @ Set the Z flag correctly, and the C flag unconditionally.
+       cmp     r0, #0
+       @ Clear the C flag if the return value was -1, indicating
+       @ that the first operand was smaller than the second.
+       do_it   mi
+       cmnmi   r0, #0
+       RETLDM  "r0"
+
+       FUNC_END aeabi_cdcmple
+       FUNC_END aeabi_cdcmpeq
+       FUNC_END aeabi_cdrcmple
+       
+ARM_FUNC_START aeabi_dcmpeq
+
+       str     lr, [sp, #-8]!
+       ARM_CALL aeabi_cdcmple
+       do_it   eq, e
+       moveq   r0, #1  @ Equal to.
+       movne   r0, #0  @ Less than, greater than, or unordered.
+       RETLDM
+
+       FUNC_END aeabi_dcmpeq
+
+ARM_FUNC_START aeabi_dcmplt
+
+       str     lr, [sp, #-8]!
+       ARM_CALL aeabi_cdcmple
+       do_it   cc, e
+       movcc   r0, #1  @ Less than.
+       movcs   r0, #0  @ Equal to, greater than, or unordered.
+       RETLDM
+
+       FUNC_END aeabi_dcmplt
+
+ARM_FUNC_START aeabi_dcmple
+
+       str     lr, [sp, #-8]!
+       ARM_CALL aeabi_cdcmple
+       do_it   ls, e
+       movls   r0, #1  @ Less than or equal to.
+       movhi   r0, #0  @ Greater than or unordered.
+       RETLDM
+
+       FUNC_END aeabi_dcmple
+
+ARM_FUNC_START aeabi_dcmpge
+
+       str     lr, [sp, #-8]!
+       ARM_CALL aeabi_cdrcmple
+       do_it   ls, e
+       movls   r0, #1  @ Operand 2 is less than or equal to operand 1.
+       movhi   r0, #0  @ Operand 2 greater than operand 1, or unordered.
+       RETLDM
+
+       FUNC_END aeabi_dcmpge
+
+ARM_FUNC_START aeabi_dcmpgt
+
+       str     lr, [sp, #-8]!
+       ARM_CALL aeabi_cdrcmple
+       do_it   cc, e
+       movcc   r0, #1  @ Operand 2 is less than operand 1.
+       movcs   r0, #0  @ Operand 2 is greater than or equal to operand 1,
+                       @ or they are unordered.
+       RETLDM
+
+       FUNC_END aeabi_dcmpgt
+
+#endif /* L_cmpdf2 */
+
+#ifdef L_arm_unorddf2
+
+ARM_FUNC_START unorddf2
+ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
+
+       mov     ip, xh, lsl #1
+       mvns    ip, ip, asr #21
+       bne     1f
+       orrs    ip, xl, xh, lsl #12
+       bne     3f                      @ x is NAN
+1:     mov     ip, yh, lsl #1
+       mvns    ip, ip, asr #21
+       bne     2f
+       orrs    ip, yl, yh, lsl #12
+       bne     3f                      @ y is NAN
+2:     mov     r0, #0                  @ arguments are ordered.
+       RET
+
+3:     mov     r0, #1                  @ arguments are unordered.
+       RET
+
+       FUNC_END aeabi_dcmpun
+       FUNC_END unorddf2
+
+#endif /* L_unorddf2 */
+
+#ifdef L_arm_fixdfsi
+
+ARM_FUNC_START fixdfsi
+ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
+
+       @ check exponent range.
+       mov     r2, xh, lsl #1
+       adds    r2, r2, #(1 << 21)
+       bcs     2f                      @ value is INF or NAN
+       bpl     1f                      @ value is too small
+       mov     r3, #(0xfffffc00 + 31)
+       subs    r2, r3, r2, asr #21
+       bls     3f                      @ value is too large
+
+       @ scale value
+       mov     r3, xh, lsl #11
+       orr     r3, r3, #0x80000000
+       orr     r3, r3, xl, lsr #21
+       tst     xh, #0x80000000         @ the sign bit
+       shift1  lsr, r0, r3, r2
+       do_it   ne
+       rsbne   r0, r0, #0
+       RET
+
+1:     mov     r0, #0
+       RET
+
+2:     orrs    xl, xl, xh, lsl #12
+       bne     4f                      @ x is NAN.
+3:     ands    r0, xh, #0x80000000     @ the sign bit
+       do_it   eq
+       moveq   r0, #0x7fffffff         @ maximum signed positive si
+       RET
+
+4:     mov     r0, #0                  @ How should we convert NAN?
+       RET
+
+       FUNC_END aeabi_d2iz
+       FUNC_END fixdfsi
+
+#endif /* L_fixdfsi */
+
+#ifdef L_arm_fixunsdfsi
+
+ARM_FUNC_START fixunsdfsi
+ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
+
+       @ check exponent range.
+       movs    r2, xh, lsl #1
+       bcs     1f                      @ value is negative
+       adds    r2, r2, #(1 << 21)
+       bcs     2f                      @ value is INF or NAN
+       bpl     1f                      @ value is too small
+       mov     r3, #(0xfffffc00 + 31)
+       subs    r2, r3, r2, asr #21
+       bmi     3f                      @ value is too large
+
+       @ scale value
+       mov     r3, xh, lsl #11
+       orr     r3, r3, #0x80000000
+       orr     r3, r3, xl, lsr #21
+       shift1  lsr, r0, r3, r2
+       RET
+
+1:     mov     r0, #0
+       RET
+
+2:     orrs    xl, xl, xh, lsl #12
+       bne     4f                      @ value is NAN.
+3:     mov     r0, #0xffffffff         @ maximum unsigned si
+       RET
+
+4:     mov     r0, #0                  @ How should we convert NAN?
+       RET
+
+       FUNC_END aeabi_d2uiz
+       FUNC_END fixunsdfsi
+
+#endif /* L_fixunsdfsi */
+
+#ifdef L_arm_truncdfsf2
+
+ARM_FUNC_START truncdfsf2
+ARM_FUNC_ALIAS aeabi_d2f truncdfsf2
+
+       @ check exponent range.
+       mov     r2, xh, lsl #1
+       subs    r3, r2, #((1023 - 127) << 21)
+       do_it   cs, t
+       COND(sub,s,cs)  ip, r3, #(1 << 21)
+       COND(rsb,s,cs)  ip, ip, #(254 << 21)
+       bls     2f                      @ value is out of range
+
+1:     @ shift and round mantissa
+       and     ip, xh, #0x80000000
+       mov     r2, xl, lsl #3
+       orr     xl, ip, xl, lsr #29
+       cmp     r2, #0x80000000
+       adc     r0, xl, r3, lsl #2
+       do_it   eq
+       biceq   r0, r0, #1
+       RET
+
+2:     @ either overflow or underflow
+       tst     xh, #0x40000000
+       bne     3f                      @ overflow
+
+       @ check if denormalized value is possible
+       adds    r2, r3, #(23 << 21)
+       do_it   lt, t
+       andlt   r0, xh, #0x80000000     @ too small, return signed 0.
+       RETc(lt)
+
+       @ denormalize value so we can resume with the code above afterwards.
+       orr     xh, xh, #0x00100000
+       mov     r2, r2, lsr #21
+       rsb     r2, r2, #24
+       rsb     ip, r2, #32
+#if defined(__thumb2__)
+       lsls    r3, xl, ip
+#else
+       movs    r3, xl, lsl ip
+#endif
+       shift1  lsr, xl, xl, r2
+       do_it   ne
+       orrne   xl, xl, #1              @ fold r3 for rounding considerations. 
+       mov     r3, xh, lsl #11
+       mov     r3, r3, lsr #11
+       shiftop orr xl xl r3 lsl ip ip
+       shift1  lsr, r3, r3, r2
+       mov     r3, r3, lsl #1
+       b       1b
+
+3:     @ chech for NAN
+       mvns    r3, r2, asr #21
+       bne     5f                      @ simple overflow
+       orrs    r3, xl, xh, lsl #12
+       do_it   ne, tt
+       movne   r0, #0x7f000000
+       orrne   r0, r0, #0x00c00000
+       RETc(ne)                        @ return NAN
+
+5:     @ return INF with sign
+       and     r0, xh, #0x80000000
+       orr     r0, r0, #0x7f000000
+       orr     r0, r0, #0x00800000
+       RET
+
+       FUNC_END aeabi_d2f
+       FUNC_END truncdfsf2
+
+#endif /* L_truncdfsf2 */
diff --git a/libgcc/config/arm/ieee754-sf.S b/libgcc/config/arm/ieee754-sf.S

new file mode 100644 (file)

index 0000000..c93f66d
--- /dev/null
+++ b/libgcc/config/arm/ieee754-sf.S
@@ -0,0 +1,1060 @@
+/* ieee754-sf.S single-precision floating point support for ARM
+
+   Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009  Free Software Foundation, Inc.
+   Contributed by Nicolas Pitre (nico@cam.org)
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/*
+ * Notes:
+ *
+ * The goal of this code is to be as fast as possible.  This is
+ * not meant to be easy to understand for the casual reader.
+ *
+ * Only the default rounding mode is intended for best performances.
+ * Exceptions aren't supported yet, but that can be added quite easily
+ * if necessary without impacting performances.
+ */
+
+#ifdef L_arm_negsf2
+       
+ARM_FUNC_START negsf2
+ARM_FUNC_ALIAS aeabi_fneg negsf2
+
+       eor     r0, r0, #0x80000000     @ flip sign bit
+       RET
+
+       FUNC_END aeabi_fneg
+       FUNC_END negsf2
+
+#endif
+
+#ifdef L_arm_addsubsf3
+
+ARM_FUNC_START aeabi_frsub
+
+       eor     r0, r0, #0x80000000     @ flip sign bit of first arg
+       b       1f
+
+ARM_FUNC_START subsf3
+ARM_FUNC_ALIAS aeabi_fsub subsf3
+
+       eor     r1, r1, #0x80000000     @ flip sign bit of second arg
+#if defined(__INTERWORKING_STUBS__)
+       b       1f                      @ Skip Thumb-code prologue
+#endif
+
+ARM_FUNC_START addsf3
+ARM_FUNC_ALIAS aeabi_fadd addsf3
+
+1:     @ Look for zeroes, equal values, INF, or NAN.
+       movs    r2, r0, lsl #1
+       do_it   ne, ttt
+       COND(mov,s,ne)  r3, r1, lsl #1
+       teqne   r2, r3
+       COND(mvn,s,ne)  ip, r2, asr #24
+       COND(mvn,s,ne)  ip, r3, asr #24
+       beq     LSYM(Lad_s)
+
+       @ Compute exponent difference.  Make largest exponent in r2,
+       @ corresponding arg in r0, and positive exponent difference in r3.
+       mov     r2, r2, lsr #24
+       rsbs    r3, r2, r3, lsr #24
+       do_it   gt, ttt
+       addgt   r2, r2, r3
+       eorgt   r1, r0, r1
+       eorgt   r0, r1, r0
+       eorgt   r1, r0, r1
+       do_it   lt
+       rsblt   r3, r3, #0
+
+       @ If exponent difference is too large, return largest argument
+       @ already in r0.  We need up to 25 bit to handle proper rounding
+       @ of 0x1p25 - 1.1.
+       cmp     r3, #25
+       do_it   hi
+       RETc(hi)
+
+       @ Convert mantissa to signed integer.
+       tst     r0, #0x80000000
+       orr     r0, r0, #0x00800000
+       bic     r0, r0, #0xff000000
+       do_it   ne
+       rsbne   r0, r0, #0
+       tst     r1, #0x80000000
+       orr     r1, r1, #0x00800000
+       bic     r1, r1, #0xff000000
+       do_it   ne
+       rsbne   r1, r1, #0
+
+       @ If exponent == difference, one or both args were denormalized.
+       @ Since this is not common case, rescale them off line.
+       teq     r2, r3
+       beq     LSYM(Lad_d)
+LSYM(Lad_x):
+
+       @ Compensate for the exponent overlapping the mantissa MSB added later
+       sub     r2, r2, #1
+
+       @ Shift and add second arg to first arg in r0.
+       @ Keep leftover bits into r1.
+       shiftop adds r0 r0 r1 asr r3 ip
+       rsb     r3, r3, #32
+       shift1  lsl, r1, r1, r3
+
+       @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above)
+       and     r3, r0, #0x80000000
+       bpl     LSYM(Lad_p)
+#if defined(__thumb2__)
+       negs    r1, r1
+       sbc     r0, r0, r0, lsl #1
+#else
+       rsbs    r1, r1, #0
+       rsc     r0, r0, #0
+#endif
+
+       @ Determine how to normalize the result.
+LSYM(Lad_p):
+       cmp     r0, #0x00800000
+       bcc     LSYM(Lad_a)
+       cmp     r0, #0x01000000
+       bcc     LSYM(Lad_e)
+
+       @ Result needs to be shifted right.
+       movs    r0, r0, lsr #1
+       mov     r1, r1, rrx
+       add     r2, r2, #1
+
+       @ Make sure we did not bust our exponent.
+       cmp     r2, #254
+       bhs     LSYM(Lad_o)
+
+       @ Our result is now properly aligned into r0, remaining bits in r1.
+       @ Pack final result together.
+       @ Round with MSB of r1. If halfway between two numbers, round towards
+       @ LSB of r0 = 0. 
+LSYM(Lad_e):
+       cmp     r1, #0x80000000
+       adc     r0, r0, r2, lsl #23
+       do_it   eq
+       biceq   r0, r0, #1
+       orr     r0, r0, r3
+       RET
+
+       @ Result must be shifted left and exponent adjusted.
+LSYM(Lad_a):
+       movs    r1, r1, lsl #1
+       adc     r0, r0, r0
+       tst     r0, #0x00800000
+       sub     r2, r2, #1
+       bne     LSYM(Lad_e)
+       
+       @ No rounding necessary since r1 will always be 0 at this point.
+LSYM(Lad_l):
+
+#if __ARM_ARCH__ < 5
+
+       movs    ip, r0, lsr #12
+       moveq   r0, r0, lsl #12
+       subeq   r2, r2, #12
+       tst     r0, #0x00ff0000
+       moveq   r0, r0, lsl #8
+       subeq   r2, r2, #8
+       tst     r0, #0x00f00000
+       moveq   r0, r0, lsl #4
+       subeq   r2, r2, #4
+       tst     r0, #0x00c00000
+       moveq   r0, r0, lsl #2
+       subeq   r2, r2, #2
+       cmp     r0, #0x00800000
+       movcc   r0, r0, lsl #1
+       sbcs    r2, r2, #0
+
+#else
+
+       clz     ip, r0
+       sub     ip, ip, #8
+       subs    r2, r2, ip
+       shift1  lsl, r0, r0, ip
+
+#endif
+
+       @ Final result with sign
+       @ If exponent negative, denormalize result.
+       do_it   ge, et
+       addge   r0, r0, r2, lsl #23
+       rsblt   r2, r2, #0
+       orrge   r0, r0, r3
+#if defined(__thumb2__)
+       do_it   lt, t
+       lsrlt   r0, r0, r2
+       orrlt   r0, r3, r0
+#else
+       orrlt   r0, r3, r0, lsr r2
+#endif
+       RET
+
+       @ Fixup and adjust bit position for denormalized arguments.
+       @ Note that r2 must not remain equal to 0.
+LSYM(Lad_d):
+       teq     r2, #0
+       eor     r1, r1, #0x00800000
+       do_it   eq, te
+       eoreq   r0, r0, #0x00800000
+       addeq   r2, r2, #1
+       subne   r3, r3, #1
+       b       LSYM(Lad_x)
+
+LSYM(Lad_s):
+       mov     r3, r1, lsl #1
+
+       mvns    ip, r2, asr #24
+       do_it   ne
+       COND(mvn,s,ne)  ip, r3, asr #24
+       beq     LSYM(Lad_i)
+
+       teq     r2, r3
+       beq     1f
+
+       @ Result is x + 0.0 = x or 0.0 + y = y.
+       teq     r2, #0
+       do_it   eq
+       moveq   r0, r1
+       RET
+
+1:     teq     r0, r1
+
+       @ Result is x - x = 0.
+       do_it   ne, t
+       movne   r0, #0
+       RETc(ne)
+
+       @ Result is x + x = 2x.
+       tst     r2, #0xff000000
+       bne     2f
+       movs    r0, r0, lsl #1
+       do_it   cs
+       orrcs   r0, r0, #0x80000000
+       RET
+2:     adds    r2, r2, #(2 << 24)
+       do_it   cc, t
+       addcc   r0, r0, #(1 << 23)
+       RETc(cc)
+       and     r3, r0, #0x80000000
+
+       @ Overflow: return INF.
+LSYM(Lad_o):
+       orr     r0, r3, #0x7f000000
+       orr     r0, r0, #0x00800000
+       RET
+
+       @ At least one of r0/r1 is INF/NAN.
+       @   if r0 != INF/NAN: return r1 (which is INF/NAN)
+       @   if r1 != INF/NAN: return r0 (which is INF/NAN)
+       @   if r0 or r1 is NAN: return NAN
+       @   if opposite sign: return NAN
+       @   otherwise return r0 (which is INF or -INF)
+LSYM(Lad_i):
+       mvns    r2, r2, asr #24
+       do_it   ne, et
+       movne   r0, r1
+       COND(mvn,s,eq)  r3, r3, asr #24
+       movne   r1, r0
+       movs    r2, r0, lsl #9
+       do_it   eq, te
+       COND(mov,s,eq)  r3, r1, lsl #9
+       teqeq   r0, r1
+       orrne   r0, r0, #0x00400000     @ quiet NAN
+       RET
+
+       FUNC_END aeabi_frsub
+       FUNC_END aeabi_fadd
+       FUNC_END addsf3
+       FUNC_END aeabi_fsub
+       FUNC_END subsf3
+
+ARM_FUNC_START floatunsisf
+ARM_FUNC_ALIAS aeabi_ui2f floatunsisf
+               
+       mov     r3, #0
+       b       1f
+
+ARM_FUNC_START floatsisf
+ARM_FUNC_ALIAS aeabi_i2f floatsisf
+       
+       ands    r3, r0, #0x80000000
+       do_it   mi
+       rsbmi   r0, r0, #0
+
+1:     movs    ip, r0
+       do_it   eq
+       RETc(eq)
+
+       @ Add initial exponent to sign
+       orr     r3, r3, #((127 + 23) << 23)
+
+       .ifnc   ah, r0
+       mov     ah, r0
+       .endif
+       mov     al, #0
+       b       2f
+
+       FUNC_END aeabi_i2f
+       FUNC_END floatsisf
+       FUNC_END aeabi_ui2f
+       FUNC_END floatunsisf
+
+ARM_FUNC_START floatundisf
+ARM_FUNC_ALIAS aeabi_ul2f floatundisf
+
+       orrs    r2, r0, r1
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+       do_it   eq, t
+       mvfeqs  f0, #0.0
+#else
+       do_it   eq
+#endif
+       RETc(eq)
+
+       mov     r3, #0
+       b       1f
+
+ARM_FUNC_START floatdisf
+ARM_FUNC_ALIAS aeabi_l2f floatdisf
+
+       orrs    r2, r0, r1
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+       do_it   eq, t
+       mvfeqs  f0, #0.0
+#else
+       do_it   eq
+#endif
+       RETc(eq)
+
+       ands    r3, ah, #0x80000000     @ sign bit in r3
+       bpl     1f
+#if defined(__thumb2__)
+       negs    al, al
+       sbc     ah, ah, ah, lsl #1
+#else
+       rsbs    al, al, #0
+       rsc     ah, ah, #0
+#endif
+1:
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+       @ For hard FPA code we want to return via the tail below so that
+       @ we can return the result in f0 as well as in r0 for backwards
+       @ compatibility.
+       str     lr, [sp, #-8]!
+       adr     lr, LSYM(f0_ret)
+#endif
+
+       movs    ip, ah
+       do_it   eq, tt
+       moveq   ip, al
+       moveq   ah, al
+       moveq   al, #0
+
+       @ Add initial exponent to sign
+       orr     r3, r3, #((127 + 23 + 32) << 23)
+       do_it   eq
+       subeq   r3, r3, #(32 << 23)
+2:     sub     r3, r3, #(1 << 23)
+
+#if __ARM_ARCH__ < 5
+
+       mov     r2, #23
+       cmp     ip, #(1 << 16)
+       do_it   hs, t
+       movhs   ip, ip, lsr #16
+       subhs   r2, r2, #16
+       cmp     ip, #(1 << 8)
+       do_it   hs, t
+       movhs   ip, ip, lsr #8
+       subhs   r2, r2, #8
+       cmp     ip, #(1 << 4)
+       do_it   hs, t
+       movhs   ip, ip, lsr #4
+       subhs   r2, r2, #4
+       cmp     ip, #(1 << 2)
+       do_it   hs, e
+       subhs   r2, r2, #2
+       sublo   r2, r2, ip, lsr #1
+       subs    r2, r2, ip, lsr #3
+
+#else
+
+       clz     r2, ip
+       subs    r2, r2, #8
+
+#endif
+
+       sub     r3, r3, r2, lsl #23
+       blt     3f
+
+       shiftop add r3 r3 ah lsl r2 ip
+       shift1  lsl, ip, al, r2
+       rsb     r2, r2, #32
+       cmp     ip, #0x80000000
+       shiftop adc r0 r3 al lsr r2 r2
+       do_it   eq
+       biceq   r0, r0, #1
+       RET
+
+3:     add     r2, r2, #32
+       shift1  lsl, ip, ah, r2
+       rsb     r2, r2, #32
+       orrs    al, al, ip, lsl #1
+       shiftop adc r0 r3 ah lsr r2 r2
+       do_it   eq
+       biceq   r0, r0, ip, lsr #31
+       RET
+
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+
+LSYM(f0_ret):
+       str     r0, [sp, #-4]!
+       ldfs    f0, [sp], #4
+       RETLDM
+
+#endif
+
+       FUNC_END floatdisf
+       FUNC_END aeabi_l2f
+       FUNC_END floatundisf
+       FUNC_END aeabi_ul2f
+
+#endif /* L_addsubsf3 */
+
+#ifdef L_arm_muldivsf3
+
+ARM_FUNC_START mulsf3
+ARM_FUNC_ALIAS aeabi_fmul mulsf3
+
+       @ Mask out exponents, trap any zero/denormal/INF/NAN.
+       mov     ip, #0xff
+       ands    r2, ip, r0, lsr #23
+       do_it   ne, tt
+       COND(and,s,ne)  r3, ip, r1, lsr #23
+       teqne   r2, ip
+       teqne   r3, ip
+       beq     LSYM(Lml_s)
+LSYM(Lml_x):
+
+       @ Add exponents together
+       add     r2, r2, r3
+
+       @ Determine final sign.
+       eor     ip, r0, r1
+
+       @ Convert mantissa to unsigned integer.
+       @ If power of two, branch to a separate path.
+       @ Make up for final alignment.
+       movs    r0, r0, lsl #9
+       do_it   ne
+       COND(mov,s,ne)  r1, r1, lsl #9
+       beq     LSYM(Lml_1)
+       mov     r3, #0x08000000
+       orr     r0, r3, r0, lsr #5
+       orr     r1, r3, r1, lsr #5
+
+#if __ARM_ARCH__ < 4
+
+       @ Put sign bit in r3, which will be restored into r0 later.
+       and     r3, ip, #0x80000000
+
+       @ Well, no way to make it shorter without the umull instruction.
+       do_push {r3, r4, r5}
+       mov     r4, r0, lsr #16
+       mov     r5, r1, lsr #16
+       bic     r0, r0, r4, lsl #16
+       bic     r1, r1, r5, lsl #16
+       mul     ip, r4, r5
+       mul     r3, r0, r1
+       mul     r0, r5, r0
+       mla     r0, r4, r1, r0
+       adds    r3, r3, r0, lsl #16
+       adc     r1, ip, r0, lsr #16
+       do_pop  {r0, r4, r5}
+
+#else
+
+       @ The actual multiplication.
+       umull   r3, r1, r0, r1
+
+       @ Put final sign in r0.
+       and     r0, ip, #0x80000000
+
+#endif
+
+       @ Adjust result upon the MSB position.
+       cmp     r1, #(1 << 23)
+       do_it   cc, tt
+       movcc   r1, r1, lsl #1
+       orrcc   r1, r1, r3, lsr #31
+       movcc   r3, r3, lsl #1
+
+       @ Add sign to result.
+       orr     r0, r0, r1
+
+       @ Apply exponent bias, check for under/overflow.
+       sbc     r2, r2, #127
+       cmp     r2, #(254 - 1)
+       bhi     LSYM(Lml_u)
+
+       @ Round the result, merge final exponent.
+       cmp     r3, #0x80000000
+       adc     r0, r0, r2, lsl #23
+       do_it   eq
+       biceq   r0, r0, #1
+       RET
+
+       @ Multiplication by 0x1p*: let''s shortcut a lot of code.
+LSYM(Lml_1):
+       teq     r0, #0
+       and     ip, ip, #0x80000000
+       do_it   eq
+       moveq   r1, r1, lsl #9
+       orr     r0, ip, r0, lsr #9
+       orr     r0, r0, r1, lsr #9
+       subs    r2, r2, #127
+       do_it   gt, tt
+       COND(rsb,s,gt)  r3, r2, #255
+       orrgt   r0, r0, r2, lsl #23
+       RETc(gt)
+
+       @ Under/overflow: fix things up for the code below.
+       orr     r0, r0, #0x00800000
+       mov     r3, #0
+       subs    r2, r2, #1
+
+LSYM(Lml_u):
+       @ Overflow?
+       bgt     LSYM(Lml_o)
+
+       @ Check if denormalized result is possible, otherwise return signed 0.
+       cmn     r2, #(24 + 1)
+       do_it   le, t
+       bicle   r0, r0, #0x7fffffff
+       RETc(le)
+
+       @ Shift value right, round, etc.
+       rsb     r2, r2, #0
+       movs    r1, r0, lsl #1
+       shift1  lsr, r1, r1, r2
+       rsb     r2, r2, #32
+       shift1  lsl, ip, r0, r2
+       movs    r0, r1, rrx
+       adc     r0, r0, #0
+       orrs    r3, r3, ip, lsl #1
+       do_it   eq
+       biceq   r0, r0, ip, lsr #31
+       RET
+
+       @ One or both arguments are denormalized.
+       @ Scale them leftwards and preserve sign bit.
+LSYM(Lml_d):
+       teq     r2, #0
+       and     ip, r0, #0x80000000
+1:     do_it   eq, tt
+       moveq   r0, r0, lsl #1
+       tsteq   r0, #0x00800000
+       subeq   r2, r2, #1
+       beq     1b
+       orr     r0, r0, ip
+       teq     r3, #0
+       and     ip, r1, #0x80000000
+2:     do_it   eq, tt
+       moveq   r1, r1, lsl #1
+       tsteq   r1, #0x00800000
+       subeq   r3, r3, #1
+       beq     2b
+       orr     r1, r1, ip
+       b       LSYM(Lml_x)
+
+LSYM(Lml_s):
+       @ Isolate the INF and NAN cases away
+       and     r3, ip, r1, lsr #23
+       teq     r2, ip
+       do_it   ne
+       teqne   r3, ip
+       beq     1f
+
+       @ Here, one or more arguments are either denormalized or zero.
+       bics    ip, r0, #0x80000000
+       do_it   ne
+       COND(bic,s,ne)  ip, r1, #0x80000000
+       bne     LSYM(Lml_d)
+
+       @ Result is 0, but determine sign anyway.
+LSYM(Lml_z):
+       eor     r0, r0, r1
+       bic     r0, r0, #0x7fffffff
+       RET
+
+1:     @ One or both args are INF or NAN.
+       teq     r0, #0x0
+       do_it   ne, ett
+       teqne   r0, #0x80000000
+       moveq   r0, r1
+       teqne   r1, #0x0
+       teqne   r1, #0x80000000
+       beq     LSYM(Lml_n)             @ 0 * INF or INF * 0 -> NAN
+       teq     r2, ip
+       bne     1f
+       movs    r2, r0, lsl #9
+       bne     LSYM(Lml_n)             @ NAN * <anything> -> NAN
+1:     teq     r3, ip
+       bne     LSYM(Lml_i)
+       movs    r3, r1, lsl #9
+       do_it   ne
+       movne   r0, r1
+       bne     LSYM(Lml_n)             @ <anything> * NAN -> NAN
+
+       @ Result is INF, but we need to determine its sign.
+LSYM(Lml_i):
+       eor     r0, r0, r1
+
+       @ Overflow: return INF (sign already in r0).
+LSYM(Lml_o):
+       and     r0, r0, #0x80000000
+       orr     r0, r0, #0x7f000000
+       orr     r0, r0, #0x00800000
+       RET
+
+       @ Return a quiet NAN.
+LSYM(Lml_n):
+       orr     r0, r0, #0x7f000000
+       orr     r0, r0, #0x00c00000
+       RET
+
+       FUNC_END aeabi_fmul
+       FUNC_END mulsf3
+
+ARM_FUNC_START divsf3
+ARM_FUNC_ALIAS aeabi_fdiv divsf3
+
+       @ Mask out exponents, trap any zero/denormal/INF/NAN.
+       mov     ip, #0xff
+       ands    r2, ip, r0, lsr #23
+       do_it   ne, tt
+       COND(and,s,ne)  r3, ip, r1, lsr #23
+       teqne   r2, ip
+       teqne   r3, ip
+       beq     LSYM(Ldv_s)
+LSYM(Ldv_x):
+
+       @ Substract divisor exponent from dividend''s
+       sub     r2, r2, r3
+
+       @ Preserve final sign into ip.
+       eor     ip, r0, r1
+
+       @ Convert mantissa to unsigned integer.
+       @ Dividend -> r3, divisor -> r1.
+       movs    r1, r1, lsl #9
+       mov     r0, r0, lsl #9
+       beq     LSYM(Ldv_1)
+       mov     r3, #0x10000000
+       orr     r1, r3, r1, lsr #4
+       orr     r3, r3, r0, lsr #4
+
+       @ Initialize r0 (result) with final sign bit.
+       and     r0, ip, #0x80000000
+
+       @ Ensure result will land to known bit position.
+       @ Apply exponent bias accordingly.
+       cmp     r3, r1
+       do_it   cc
+       movcc   r3, r3, lsl #1
+       adc     r2, r2, #(127 - 2)
+
+       @ The actual division loop.
+       mov     ip, #0x00800000
+1:     cmp     r3, r1
+       do_it   cs, t
+       subcs   r3, r3, r1
+       orrcs   r0, r0, ip
+       cmp     r3, r1, lsr #1
+       do_it   cs, t
+       subcs   r3, r3, r1, lsr #1
+       orrcs   r0, r0, ip, lsr #1
+       cmp     r3, r1, lsr #2
+       do_it   cs, t
+       subcs   r3, r3, r1, lsr #2
+       orrcs   r0, r0, ip, lsr #2
+       cmp     r3, r1, lsr #3
+       do_it   cs, t
+       subcs   r3, r3, r1, lsr #3
+       orrcs   r0, r0, ip, lsr #3
+       movs    r3, r3, lsl #4
+       do_it   ne
+       COND(mov,s,ne)  ip, ip, lsr #4
+       bne     1b
+
+       @ Check exponent for under/overflow.
+       cmp     r2, #(254 - 1)
+       bhi     LSYM(Lml_u)
+
+       @ Round the result, merge final exponent.
+       cmp     r3, r1
+       adc     r0, r0, r2, lsl #23
+       do_it   eq
+       biceq   r0, r0, #1
+       RET
+
+       @ Division by 0x1p*: let''s shortcut a lot of code.
+LSYM(Ldv_1):
+       and     ip, ip, #0x80000000
+       orr     r0, ip, r0, lsr #9
+       adds    r2, r2, #127
+       do_it   gt, tt
+       COND(rsb,s,gt)  r3, r2, #255
+       orrgt   r0, r0, r2, lsl #23
+       RETc(gt)
+
+       orr     r0, r0, #0x00800000
+       mov     r3, #0
+       subs    r2, r2, #1
+       b       LSYM(Lml_u)
+
+       @ One or both arguments are denormalized.
+       @ Scale them leftwards and preserve sign bit.
+LSYM(Ldv_d):
+       teq     r2, #0
+       and     ip, r0, #0x80000000
+1:     do_it   eq, tt
+       moveq   r0, r0, lsl #1
+       tsteq   r0, #0x00800000
+       subeq   r2, r2, #1
+       beq     1b
+       orr     r0, r0, ip
+       teq     r3, #0
+       and     ip, r1, #0x80000000
+2:     do_it   eq, tt
+       moveq   r1, r1, lsl #1
+       tsteq   r1, #0x00800000
+       subeq   r3, r3, #1
+       beq     2b
+       orr     r1, r1, ip
+       b       LSYM(Ldv_x)
+
+       @ One or both arguments are either INF, NAN, zero or denormalized.
+LSYM(Ldv_s):
+       and     r3, ip, r1, lsr #23
+       teq     r2, ip
+       bne     1f
+       movs    r2, r0, lsl #9
+       bne     LSYM(Lml_n)             @ NAN / <anything> -> NAN
+       teq     r3, ip
+       bne     LSYM(Lml_i)             @ INF / <anything> -> INF
+       mov     r0, r1
+       b       LSYM(Lml_n)             @ INF / (INF or NAN) -> NAN
+1:     teq     r3, ip
+       bne     2f
+       movs    r3, r1, lsl #9
+       beq     LSYM(Lml_z)             @ <anything> / INF -> 0
+       mov     r0, r1
+       b       LSYM(Lml_n)             @ <anything> / NAN -> NAN
+2:     @ If both are nonzero, we need to normalize and resume above.
+       bics    ip, r0, #0x80000000
+       do_it   ne
+       COND(bic,s,ne)  ip, r1, #0x80000000
+       bne     LSYM(Ldv_d)
+       @ One or both arguments are zero.
+       bics    r2, r0, #0x80000000
+       bne     LSYM(Lml_i)             @ <non_zero> / 0 -> INF
+       bics    r3, r1, #0x80000000
+       bne     LSYM(Lml_z)             @ 0 / <non_zero> -> 0
+       b       LSYM(Lml_n)             @ 0 / 0 -> NAN
+
+       FUNC_END aeabi_fdiv
+       FUNC_END divsf3
+
+#endif /* L_muldivsf3 */
+
+#ifdef L_arm_cmpsf2
+
+       @ The return value in r0 is
+       @
+       @   0  if the operands are equal
+       @   1  if the first operand is greater than the second, or
+       @      the operands are unordered and the operation is
+       @      CMP, LT, LE, NE, or EQ.
+       @   -1 if the first operand is less than the second, or
+       @      the operands are unordered and the operation is GT
+       @      or GE.
+       @
+       @ The Z flag will be set iff the operands are equal.
+       @
+       @ The following registers are clobbered by this function:
+       @   ip, r0, r1, r2, r3
+
+ARM_FUNC_START gtsf2
+ARM_FUNC_ALIAS gesf2 gtsf2
+       mov     ip, #-1
+       b       1f
+
+ARM_FUNC_START ltsf2
+ARM_FUNC_ALIAS lesf2 ltsf2
+       mov     ip, #1
+       b       1f
+
+ARM_FUNC_START cmpsf2
+ARM_FUNC_ALIAS nesf2 cmpsf2
+ARM_FUNC_ALIAS eqsf2 cmpsf2
+       mov     ip, #1                  @ how should we specify unordered here?
+
+1:     str     ip, [sp, #-4]!
+
+       @ Trap any INF/NAN first.
+       mov     r2, r0, lsl #1
+       mov     r3, r1, lsl #1
+       mvns    ip, r2, asr #24
+       do_it   ne
+       COND(mvn,s,ne)  ip, r3, asr #24
+       beq     3f
+
+       @ Compare values.
+       @ Note that 0.0 is equal to -0.0.
+2:     add     sp, sp, #4
+       orrs    ip, r2, r3, lsr #1      @ test if both are 0, clear C flag
+       do_it   ne
+       teqne   r0, r1                  @ if not 0 compare sign
+       do_it   pl
+       COND(sub,s,pl)  r0, r2, r3              @ if same sign compare values, set r0
+
+       @ Result:
+       do_it   hi
+       movhi   r0, r1, asr #31
+       do_it   lo
+       mvnlo   r0, r1, asr #31
+       do_it   ne
+       orrne   r0, r0, #1
+       RET
+
+       @ Look for a NAN. 
+3:     mvns    ip, r2, asr #24
+       bne     4f
+       movs    ip, r0, lsl #9
+       bne     5f                      @ r0 is NAN
+4:     mvns    ip, r3, asr #24
+       bne     2b
+       movs    ip, r1, lsl #9
+       beq     2b                      @ r1 is not NAN
+5:     ldr     r0, [sp], #4            @ return unordered code.
+       RET
+
+       FUNC_END gesf2
+       FUNC_END gtsf2
+       FUNC_END lesf2
+       FUNC_END ltsf2
+       FUNC_END nesf2
+       FUNC_END eqsf2
+       FUNC_END cmpsf2
+
+ARM_FUNC_START aeabi_cfrcmple
+
+       mov     ip, r0
+       mov     r0, r1
+       mov     r1, ip
+       b       6f
+
+ARM_FUNC_START aeabi_cfcmpeq
+ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
+
+       @ The status-returning routines are required to preserve all
+       @ registers except ip, lr, and cpsr.
+6:     do_push {r0, r1, r2, r3, lr}
+       ARM_CALL cmpsf2
+       @ Set the Z flag correctly, and the C flag unconditionally.
+       cmp     r0, #0
+       @ Clear the C flag if the return value was -1, indicating
+       @ that the first operand was smaller than the second.
+       do_it   mi
+       cmnmi   r0, #0
+       RETLDM  "r0, r1, r2, r3"
+
+       FUNC_END aeabi_cfcmple
+       FUNC_END aeabi_cfcmpeq
+       FUNC_END aeabi_cfrcmple
+
+ARM_FUNC_START aeabi_fcmpeq
+
+       str     lr, [sp, #-8]!
+       ARM_CALL aeabi_cfcmple
+       do_it   eq, e
+       moveq   r0, #1  @ Equal to.
+       movne   r0, #0  @ Less than, greater than, or unordered.
+       RETLDM
+
+       FUNC_END aeabi_fcmpeq
+
+ARM_FUNC_START aeabi_fcmplt
+
+       str     lr, [sp, #-8]!
+       ARM_CALL aeabi_cfcmple
+       do_it   cc, e
+       movcc   r0, #1  @ Less than.
+       movcs   r0, #0  @ Equal to, greater than, or unordered.
+       RETLDM
+
+       FUNC_END aeabi_fcmplt
+
+ARM_FUNC_START aeabi_fcmple
+
+       str     lr, [sp, #-8]!
+       ARM_CALL aeabi_cfcmple
+       do_it   ls, e
+       movls   r0, #1  @ Less than or equal to.
+       movhi   r0, #0  @ Greater than or unordered.
+       RETLDM
+
+       FUNC_END aeabi_fcmple
+
+ARM_FUNC_START aeabi_fcmpge
+
+       str     lr, [sp, #-8]!
+       ARM_CALL aeabi_cfrcmple
+       do_it   ls, e
+       movls   r0, #1  @ Operand 2 is less than or equal to operand 1.
+       movhi   r0, #0  @ Operand 2 greater than operand 1, or unordered.
+       RETLDM
+
+       FUNC_END aeabi_fcmpge
+
+ARM_FUNC_START aeabi_fcmpgt
+
+       str     lr, [sp, #-8]!
+       ARM_CALL aeabi_cfrcmple
+       do_it   cc, e
+       movcc   r0, #1  @ Operand 2 is less than operand 1.
+       movcs   r0, #0  @ Operand 2 is greater than or equal to operand 1,
+                       @ or they are unordered.
+       RETLDM
+
+       FUNC_END aeabi_fcmpgt
+
+#endif /* L_cmpsf2 */
+
+#ifdef L_arm_unordsf2
+
+ARM_FUNC_START unordsf2
+ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
+
+       mov     r2, r0, lsl #1
+       mov     r3, r1, lsl #1
+       mvns    ip, r2, asr #24
+       bne     1f
+       movs    ip, r0, lsl #9
+       bne     3f                      @ r0 is NAN
+1:     mvns    ip, r3, asr #24
+       bne     2f
+       movs    ip, r1, lsl #9
+       bne     3f                      @ r1 is NAN
+2:     mov     r0, #0                  @ arguments are ordered.
+       RET
+3:     mov     r0, #1                  @ arguments are unordered.
+       RET
+
+       FUNC_END aeabi_fcmpun
+       FUNC_END unordsf2
+
+#endif /* L_unordsf2 */
+
+#ifdef L_arm_fixsfsi
+
+ARM_FUNC_START fixsfsi
+ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
+
+       @ check exponent range.
+       mov     r2, r0, lsl #1
+       cmp     r2, #(127 << 24)
+       bcc     1f                      @ value is too small
+       mov     r3, #(127 + 31)
+       subs    r2, r3, r2, lsr #24
+       bls     2f                      @ value is too large
+
+       @ scale value
+       mov     r3, r0, lsl #8
+       orr     r3, r3, #0x80000000
+       tst     r0, #0x80000000         @ the sign bit
+       shift1  lsr, r0, r3, r2
+       do_it   ne
+       rsbne   r0, r0, #0
+       RET
+
+1:     mov     r0, #0
+       RET
+
+2:     cmp     r2, #(127 + 31 - 0xff)
+       bne     3f
+       movs    r2, r0, lsl #9
+       bne     4f                      @ r0 is NAN.
+3:     ands    r0, r0, #0x80000000     @ the sign bit
+       do_it   eq
+       moveq   r0, #0x7fffffff         @ the maximum signed positive si
+       RET
+
+4:     mov     r0, #0                  @ What should we convert NAN to?
+       RET
+
+       FUNC_END aeabi_f2iz
+       FUNC_END fixsfsi
+
+#endif /* L_fixsfsi */
+
+#ifdef L_arm_fixunssfsi
+
+ARM_FUNC_START fixunssfsi
+ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
+
+       @ check exponent range.
+       movs    r2, r0, lsl #1
+       bcs     1f                      @ value is negative
+       cmp     r2, #(127 << 24)
+       bcc     1f                      @ value is too small
+       mov     r3, #(127 + 31)
+       subs    r2, r3, r2, lsr #24
+       bmi     2f                      @ value is too large
+
+       @ scale the value
+       mov     r3, r0, lsl #8
+       orr     r3, r3, #0x80000000
+       shift1  lsr, r0, r3, r2
+       RET
+
+1:     mov     r0, #0
+       RET
+
+2:     cmp     r2, #(127 + 31 - 0xff)
+       bne     3f
+       movs    r2, r0, lsl #9
+       bne     4f                      @ r0 is NAN.
+3:     mov     r0, #0xffffffff         @ maximum unsigned si
+       RET
+
+4:     mov     r0, #0                  @ What should we convert NAN to?
+       RET
+
+       FUNC_END aeabi_f2uiz
+       FUNC_END fixunssfsi
+
+#endif /* L_fixunssfsi */
diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S

new file mode 100644 (file)

index 0000000..2e76c01
--- /dev/null
+++ b/libgcc/config/arm/lib1funcs.S
@@ -0,0 +1,1829 @@
+@ libgcc routines for ARM cpu.
+@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
+
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007, 2008,
+   2009, 2010 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* An executable stack is *not* required for these functions.  */
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+.previous
+#endif  /* __ELF__ and __linux__ */
+
+#ifdef __ARM_EABI__
+/* Some attributes that are common to all routines in this file.  */
+       /* Tag_ABI_align_needed: This code does not require 8-byte
+          alignment from the caller.  */
+       /* .eabi_attribute 24, 0  -- default setting.  */
+       /* Tag_ABI_align_preserved: This code preserves 8-byte
+          alignment in any callee.  */
+       .eabi_attribute 25, 1
+#endif /* __ARM_EABI__ */
+/* ------------------------------------------------------------------------ */
+
+/* We need to know what prefix to add to function names.  */
+
+#ifndef __USER_LABEL_PREFIX__
+#error  __USER_LABEL_PREFIX__ not defined
+#endif
+
+/* ANSI concatenation macros.  */
+
+#define CONCAT1(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+/* Use the right prefix for global labels.  */
+
+#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
+
+#ifdef __ELF__
+#ifdef __thumb__
+#define __PLT__  /* Not supported in Thumb assembler (for now).  */
+#elif defined __vxworks && !defined __PIC__
+#define __PLT__ /* Not supported by the kernel loader.  */
+#else
+#define __PLT__ (PLT)
+#endif
+#define TYPE(x) .type SYM(x),function
+#define SIZE(x) .size SYM(x), . - SYM(x)
+#define LSYM(x) .x
+#else
+#define __PLT__
+#define TYPE(x)
+#define SIZE(x)
+#define LSYM(x) x
+#endif
+
+/* Function end macros.  Variants for interworking.  */
+
+#if defined(__ARM_ARCH_2__)
+# define __ARM_ARCH__ 2
+#endif
+
+#if defined(__ARM_ARCH_3__)
+# define __ARM_ARCH__ 3
+#endif
+
+#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
+       || defined(__ARM_ARCH_4T__)
+/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
+   long multiply instructions.  That includes v3M.  */
+# define __ARM_ARCH__ 4
+#endif
+       
+#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
+       || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
+       || defined(__ARM_ARCH_5TEJ__)
+# define __ARM_ARCH__ 5
+#endif
+
+#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+       || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
+       || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
+       || defined(__ARM_ARCH_6M__)
+# define __ARM_ARCH__ 6
+#endif
+
+#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+       || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+       || defined(__ARM_ARCH_7EM__)
+# define __ARM_ARCH__ 7
+#endif
+
+#ifndef __ARM_ARCH__
+#error Unable to determine architecture.
+#endif
+
+/* There are times when we might prefer Thumb1 code even if ARM code is
+   permitted, for example, the code might be smaller, or there might be
+   interworking problems with switching to ARM state if interworking is
+   disabled.  */
+#if (defined(__thumb__)                        \
+     && !defined(__thumb2__)           \
+     && (!defined(__THUMB_INTERWORK__) \
+        || defined (__OPTIMIZE_SIZE__) \
+        || defined(__ARM_ARCH_6M__)))
+# define __prefer_thumb__
+#endif
+
+/* How to return from a function call depends on the architecture variant.  */
+
+#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
+
+# define RET           bx      lr
+# define RETc(x)       bx##x   lr
+
+/* Special precautions for interworking on armv4t.  */
+# if (__ARM_ARCH__ == 4)
+
+/* Always use bx, not ldr pc.  */
+#  if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
+#    define __INTERWORKING__
+#   endif /* __THUMB__ || __THUMB_INTERWORK__ */
+
+/* Include thumb stub before arm mode code.  */
+#  if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
+#   define __INTERWORKING_STUBS__
+#  endif /* __thumb__ && !__THUMB_INTERWORK__ */
+
+#endif /* __ARM_ARCH == 4 */
+
+#else
+
+# define RET           mov     pc, lr
+# define RETc(x)       mov##x  pc, lr
+
+#endif
+
+.macro cfi_pop         advance, reg, cfa_offset
+#ifdef __ELF__
+       .pushsection    .debug_frame
+       .byte   0x4             /* DW_CFA_advance_loc4 */
+       .4byte  \advance
+       .byte   (0xc0 | \reg)   /* DW_CFA_restore */
+       .byte   0xe             /* DW_CFA_def_cfa_offset */
+       .uleb128 \cfa_offset
+       .popsection
+#endif
+.endm
+.macro cfi_push        advance, reg, offset, cfa_offset
+#ifdef __ELF__
+       .pushsection    .debug_frame
+       .byte   0x4             /* DW_CFA_advance_loc4 */
+       .4byte  \advance
+       .byte   (0x80 | \reg)   /* DW_CFA_offset */
+       .uleb128 (\offset / -4)
+       .byte   0xe             /* DW_CFA_def_cfa_offset */
+       .uleb128 \cfa_offset
+       .popsection
+#endif
+.endm
+.macro cfi_start       start_label, end_label
+#ifdef __ELF__
+       .pushsection    .debug_frame
+LSYM(Lstart_frame):
+       .4byte  LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
+LSYM(Lstart_cie):
+        .4byte 0xffffffff      @ CIE Identifier Tag
+        .byte  0x1     @ CIE Version
+        .ascii "\0"    @ CIE Augmentation
+        .uleb128 0x1   @ CIE Code Alignment Factor
+        .sleb128 -4    @ CIE Data Alignment Factor
+        .byte  0xe     @ CIE RA Column
+        .byte  0xc     @ DW_CFA_def_cfa
+        .uleb128 0xd
+        .uleb128 0x0
+
+       .align 2
+LSYM(Lend_cie):
+       .4byte  LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length
+LSYM(Lstart_fde):
+       .4byte  LSYM(Lstart_frame)      @ FDE CIE offset
+       .4byte  \start_label    @ FDE initial location
+       .4byte  \end_label-\start_label @ FDE address range
+       .popsection
+#endif
+.endm
+.macro cfi_end end_label
+#ifdef __ELF__
+       .pushsection    .debug_frame
+       .align  2
+LSYM(Lend_fde):
+       .popsection
+\end_label:
+#endif
+.endm
+
+/* Don't pass dirn, it's there just to get token pasting right.  */
+
+.macro RETLDM  regs=, cond=, unwind=, dirn=ia
+#if defined (__INTERWORKING__)
+       .ifc "\regs",""
+       ldr\cond        lr, [sp], #8
+       .else
+# if defined(__thumb2__)
+       pop\cond        {\regs, lr}
+# else
+       ldm\cond\dirn   sp!, {\regs, lr}
+# endif
+       .endif
+       .ifnc "\unwind", ""
+       /* Mark LR as restored.  */
+97:    cfi_pop 97b - \unwind, 0xe, 0x0
+       .endif
+       bx\cond lr
+#else
+       /* Caller is responsible for providing IT instruction.  */
+       .ifc "\regs",""
+       ldr\cond        pc, [sp], #8
+       .else
+# if defined(__thumb2__)
+       pop\cond        {\regs, pc}
+# else
+       ldm\cond\dirn   sp!, {\regs, pc}
+# endif
+       .endif
+#endif
+.endm
+
+/* The Unified assembly syntax allows the same code to be assembled for both
+   ARM and Thumb-2.  However this is only supported by recent gas, so define
+   a set of macros to allow ARM code on older assemblers.  */
+#if defined(__thumb2__)
+.macro do_it cond, suffix=""
+       it\suffix       \cond
+.endm
+.macro shift1 op, arg0, arg1, arg2
+       \op     \arg0, \arg1, \arg2
+.endm
+#define do_push        push
+#define do_pop pop
+#define COND(op1, op2, cond) op1 ## op2 ## cond
+/* Perform an arithmetic operation with a variable shift operand.  This
+   requires two instructions and a scratch register on Thumb-2.  */
+.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
+       \shiftop \tmp, \src2, \shiftreg
+       \name \dest, \src1, \tmp
+.endm
+#else
+.macro do_it cond, suffix=""
+.endm
+.macro shift1 op, arg0, arg1, arg2
+       mov     \arg0, \arg1, \op \arg2
+.endm
+#define do_push        stmfd sp!,
+#define do_pop ldmfd sp!,
+#define COND(op1, op2, cond) op1 ## cond ## op2
+.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
+       \name \dest, \src1, \src2, \shiftop \shiftreg
+.endm
+#endif
+
+#ifdef __ARM_EABI__
+.macro ARM_LDIV0 name signed
+       cmp     r0, #0
+       .ifc    \signed, unsigned
+       movne   r0, #0xffffffff
+       .else
+       movgt   r0, #0x7fffffff
+       movlt   r0, #0x80000000
+       .endif
+       b       SYM (__aeabi_idiv0) __PLT__
+.endm
+#else
+.macro ARM_LDIV0 name signed
+       str     lr, [sp, #-8]!
+98:    cfi_push 98b - __\name, 0xe, -0x8, 0x8
+       bl      SYM (__div0) __PLT__
+       mov     r0, #0                  @ About as wrong as it could be.
+       RETLDM  unwind=98b
+.endm
+#endif
+
+
+#ifdef __ARM_EABI__
+.macro THUMB_LDIV0 name signed
+#if defined(__ARM_ARCH_6M__)
+       .ifc \signed, unsigned
+       cmp     r0, #0
+       beq     1f
+       mov     r0, #0
+       mvn     r0, r0          @ 0xffffffff
+1:
+       .else
+       cmp     r0, #0
+       beq     2f
+       blt     3f
+       mov     r0, #0
+       mvn     r0, r0
+       lsr     r0, r0, #1      @ 0x7fffffff
+       b       2f
+3:     mov     r0, #0x80
+       lsl     r0, r0, #24     @ 0x80000000
+2:
+       .endif
+       push    {r0, r1, r2}
+       ldr     r0, 4f
+       adr     r1, 4f
+       add     r0, r1
+       str     r0, [sp, #8]
+       @ We know we are not on armv4t, so pop pc is safe.
+       pop     {r0, r1, pc}
+       .align  2
+4:
+       .word   __aeabi_idiv0 - 4b
+#elif defined(__thumb2__)
+       .syntax unified
+       .ifc \signed, unsigned
+       cbz     r0, 1f
+       mov     r0, #0xffffffff
+1:
+       .else
+       cmp     r0, #0
+       do_it   gt
+       movgt   r0, #0x7fffffff
+       do_it   lt
+       movlt   r0, #0x80000000
+       .endif
+       b.w     SYM(__aeabi_idiv0) __PLT__
+#else
+       .align  2
+       bx      pc
+       nop
+       .arm
+       cmp     r0, #0
+       .ifc    \signed, unsigned
+       movne   r0, #0xffffffff
+       .else
+       movgt   r0, #0x7fffffff
+       movlt   r0, #0x80000000
+       .endif
+       b       SYM(__aeabi_idiv0) __PLT__
+       .thumb
+#endif
+.endm
+#else
+.macro THUMB_LDIV0 name signed
+       push    { r1, lr }
+98:    cfi_push 98b - __\name, 0xe, -0x4, 0x8
+       bl      SYM (__div0)
+       mov     r0, #0                  @ About as wrong as it could be.
+#if defined (__INTERWORKING__)
+       pop     { r1, r2 }
+       bx      r2
+#else
+       pop     { r1, pc }
+#endif
+.endm
+#endif
+
+.macro FUNC_END name
+       SIZE (__\name)
+.endm
+
+.macro DIV_FUNC_END name signed
+       cfi_start       __\name, LSYM(Lend_div0)
+LSYM(Ldiv0):
+#ifdef __thumb__
+       THUMB_LDIV0 \name \signed
+#else
+       ARM_LDIV0 \name \signed
+#endif
+       cfi_end LSYM(Lend_div0)
+       FUNC_END \name
+.endm
+
+.macro THUMB_FUNC_START name
+       .globl  SYM (\name)
+       TYPE    (\name)
+       .thumb_func
+SYM (\name):
+.endm
+
+/* Function start macros.  Variants for ARM and Thumb.  */
+
+#ifdef __thumb__
+#define THUMB_FUNC .thumb_func
+#define THUMB_CODE .force_thumb
+# if defined(__thumb2__)
+#define THUMB_SYNTAX .syntax divided
+# else
+#define THUMB_SYNTAX
+# endif
+#else
+#define THUMB_FUNC
+#define THUMB_CODE
+#define THUMB_SYNTAX
+#endif
+
+.macro FUNC_START name
+       .text
+       .globl SYM (__\name)
+       TYPE (__\name)
+       .align 0
+       THUMB_CODE
+       THUMB_FUNC
+       THUMB_SYNTAX
+SYM (__\name):
+.endm
+
+/* Special function that will always be coded in ARM assembly, even if
+   in Thumb-only compilation.  */
+
+#if defined(__thumb2__)
+
+/* For Thumb-2 we build everything in thumb mode.  */
+.macro ARM_FUNC_START name
+       FUNC_START \name
+       .syntax unified
+.endm
+#define EQUIV .thumb_set
+.macro  ARM_CALL name
+       bl      __\name
+.endm
+
+#elif defined(__INTERWORKING_STUBS__)
+
+.macro ARM_FUNC_START name
+       FUNC_START \name
+       bx      pc
+       nop
+       .arm
+/* A hook to tell gdb that we've switched to ARM mode.  Also used to call
+   directly from other local arm routines.  */
+_L__\name:             
+.endm
+#define EQUIV .thumb_set
+/* Branch directly to a function declared with ARM_FUNC_START.
+   Must be called in arm mode.  */
+.macro  ARM_CALL name
+       bl      _L__\name
+.endm
+
+#else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
+
+#ifdef __ARM_ARCH_6M__
+#define EQUIV .thumb_set
+#else
+.macro ARM_FUNC_START name
+       .text
+       .globl SYM (__\name)
+       TYPE (__\name)
+       .align 0
+       .arm
+SYM (__\name):
+.endm
+#define EQUIV .set
+.macro  ARM_CALL name
+       bl      __\name
+.endm
+#endif
+
+#endif
+
+.macro FUNC_ALIAS new old
+       .globl  SYM (__\new)
+#if defined (__thumb__)
+       .thumb_set      SYM (__\new), SYM (__\old)
+#else
+       .set    SYM (__\new), SYM (__\old)
+#endif
+.endm
+
+#ifndef __ARM_ARCH_6M__
+.macro ARM_FUNC_ALIAS new old
+       .globl  SYM (__\new)
+       EQUIV   SYM (__\new), SYM (__\old)
+#if defined(__INTERWORKING_STUBS__)
+       .set    SYM (_L__\new), SYM (_L__\old)
+#endif
+.endm
+#endif
+
+#ifdef __ARMEB__
+#define xxh r0
+#define xxl r1
+#define yyh r2
+#define yyl r3
+#else
+#define xxh r1
+#define xxl r0
+#define yyh r3
+#define yyl r2
+#endif 
+
+#ifdef __ARM_EABI__
+.macro WEAK name
+       .weak SYM (__\name)
+.endm
+#endif
+
+#ifdef __thumb__
+/* Register aliases.  */
+
+work           .req    r4      @ XXXX is this safe ?
+dividend       .req    r0
+divisor                .req    r1
+overdone       .req    r2
+result         .req    r2
+curbit         .req    r3
+#endif
+#if 0
+ip             .req    r12
+sp             .req    r13
+lr             .req    r14
+pc             .req    r15
+#endif
+
+/* ------------------------------------------------------------------------ */
+/*             Bodies of the division and modulo routines.                 */
+/* ------------------------------------------------------------------------ */ 
+.macro ARM_DIV_BODY dividend, divisor, result, curbit
+
+#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
+
+#if defined (__thumb2__)
+       clz     \curbit, \dividend
+       clz     \result, \divisor
+       sub     \curbit, \result, \curbit
+       rsb     \curbit, \curbit, #31
+       adr     \result, 1f
+       add     \curbit, \result, \curbit, lsl #4
+       mov     \result, #0
+       mov     pc, \curbit
+.p2align 3
+1:
+       .set    shift, 32
+       .rept   32
+       .set    shift, shift - 1
+       cmp.w   \dividend, \divisor, lsl #shift
+       nop.n
+       adc.w   \result, \result, \result
+       it      cs
+       subcs.w \dividend, \dividend, \divisor, lsl #shift
+       .endr
+#else
+       clz     \curbit, \dividend
+       clz     \result, \divisor
+       sub     \curbit, \result, \curbit
+       rsbs    \curbit, \curbit, #31
+       addne   \curbit, \curbit, \curbit, lsl #1
+       mov     \result, #0
+       addne   pc, pc, \curbit, lsl #2
+       nop
+       .set    shift, 32
+       .rept   32
+       .set    shift, shift - 1
+       cmp     \dividend, \divisor, lsl #shift
+       adc     \result, \result, \result
+       subcs   \dividend, \dividend, \divisor, lsl #shift
+       .endr
+#endif
+
+#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
+#if __ARM_ARCH__ >= 5
+
+       clz     \curbit, \divisor
+       clz     \result, \dividend
+       sub     \result, \curbit, \result
+       mov     \curbit, #1
+       mov     \divisor, \divisor, lsl \result
+       mov     \curbit, \curbit, lsl \result
+       mov     \result, #0
+       
+#else /* __ARM_ARCH__ < 5 */
+
+       @ Initially shift the divisor left 3 bits if possible,
+       @ set curbit accordingly.  This allows for curbit to be located
+       @ at the left end of each 4-bit nibbles in the division loop
+       @ to save one loop in most cases.
+       tst     \divisor, #0xe0000000
+       moveq   \divisor, \divisor, lsl #3
+       moveq   \curbit, #8
+       movne   \curbit, #1
+
+       @ Unless the divisor is very big, shift it up in multiples of
+       @ four bits, since this is the amount of unwinding in the main
+       @ division loop.  Continue shifting until the divisor is 
+       @ larger than the dividend.
+1:     cmp     \divisor, #0x10000000
+       cmplo   \divisor, \dividend
+       movlo   \divisor, \divisor, lsl #4
+       movlo   \curbit, \curbit, lsl #4
+       blo     1b
+
+       @ For very big divisors, we must shift it a bit at a time, or
+       @ we will be in danger of overflowing.
+1:     cmp     \divisor, #0x80000000
+       cmplo   \divisor, \dividend
+       movlo   \divisor, \divisor, lsl #1
+       movlo   \curbit, \curbit, lsl #1
+       blo     1b
+
+       mov     \result, #0
+
+#endif /* __ARM_ARCH__ < 5 */
+
+       @ Division loop
+1:     cmp     \dividend, \divisor
+       do_it   hs, t
+       subhs   \dividend, \dividend, \divisor
+       orrhs   \result,   \result,   \curbit
+       cmp     \dividend, \divisor,  lsr #1
+       do_it   hs, t
+       subhs   \dividend, \dividend, \divisor, lsr #1
+       orrhs   \result,   \result,   \curbit,  lsr #1
+       cmp     \dividend, \divisor,  lsr #2
+       do_it   hs, t
+       subhs   \dividend, \dividend, \divisor, lsr #2
+       orrhs   \result,   \result,   \curbit,  lsr #2
+       cmp     \dividend, \divisor,  lsr #3
+       do_it   hs, t
+       subhs   \dividend, \dividend, \divisor, lsr #3
+       orrhs   \result,   \result,   \curbit,  lsr #3
+       cmp     \dividend, #0                   @ Early termination?
+       do_it   ne, t
+       movnes  \curbit,   \curbit,  lsr #4     @ No, any more bits to do?
+       movne   \divisor,  \divisor, lsr #4
+       bne     1b
+
+#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
+
+.endm
+/* ------------------------------------------------------------------------ */ 
+.macro ARM_DIV2_ORDER divisor, order
+
+#if __ARM_ARCH__ >= 5
+
+       clz     \order, \divisor
+       rsb     \order, \order, #31
+
+#else
+
+       cmp     \divisor, #(1 << 16)
+       movhs   \divisor, \divisor, lsr #16
+       movhs   \order, #16
+       movlo   \order, #0
+
+       cmp     \divisor, #(1 << 8)
+       movhs   \divisor, \divisor, lsr #8
+       addhs   \order, \order, #8
+
+       cmp     \divisor, #(1 << 4)
+       movhs   \divisor, \divisor, lsr #4
+       addhs   \order, \order, #4
+
+       cmp     \divisor, #(1 << 2)
+       addhi   \order, \order, #3
+       addls   \order, \order, \divisor, lsr #1
+
+#endif
+
+.endm
+/* ------------------------------------------------------------------------ */
+.macro ARM_MOD_BODY dividend, divisor, order, spare
+
+#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
+
+       clz     \order, \divisor
+       clz     \spare, \dividend
+       sub     \order, \order, \spare
+       rsbs    \order, \order, #31
+       addne   pc, pc, \order, lsl #3
+       nop
+       .set    shift, 32
+       .rept   32
+       .set    shift, shift - 1
+       cmp     \dividend, \divisor, lsl #shift
+       subcs   \dividend, \dividend, \divisor, lsl #shift
+       .endr
+
+#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
+#if __ARM_ARCH__ >= 5
+
+       clz     \order, \divisor
+       clz     \spare, \dividend
+       sub     \order, \order, \spare
+       mov     \divisor, \divisor, lsl \order
+       
+#else /* __ARM_ARCH__ < 5 */
+
+       mov     \order, #0
+
+       @ Unless the divisor is very big, shift it up in multiples of
+       @ four bits, since this is the amount of unwinding in the main
+       @ division loop.  Continue shifting until the divisor is 
+       @ larger than the dividend.
+1:     cmp     \divisor, #0x10000000
+       cmplo   \divisor, \dividend
+       movlo   \divisor, \divisor, lsl #4
+       addlo   \order, \order, #4
+       blo     1b
+
+       @ For very big divisors, we must shift it a bit at a time, or
+       @ we will be in danger of overflowing.
+1:     cmp     \divisor, #0x80000000
+       cmplo   \divisor, \dividend
+       movlo   \divisor, \divisor, lsl #1
+       addlo   \order, \order, #1
+       blo     1b
+
+#endif /* __ARM_ARCH__ < 5 */
+
+       @ Perform all needed substractions to keep only the reminder.
+       @ Do comparisons in batch of 4 first.
+       subs    \order, \order, #3              @ yes, 3 is intended here
+       blt     2f
+
+1:     cmp     \dividend, \divisor
+       subhs   \dividend, \dividend, \divisor
+       cmp     \dividend, \divisor,  lsr #1
+       subhs   \dividend, \dividend, \divisor, lsr #1
+       cmp     \dividend, \divisor,  lsr #2
+       subhs   \dividend, \dividend, \divisor, lsr #2
+       cmp     \dividend, \divisor,  lsr #3
+       subhs   \dividend, \dividend, \divisor, lsr #3
+       cmp     \dividend, #1
+       mov     \divisor, \divisor, lsr #4
+       subges  \order, \order, #4
+       bge     1b
+
+       tst     \order, #3
+       teqne   \dividend, #0
+       beq     5f
+
+       @ Either 1, 2 or 3 comparison/substractions are left.
+2:     cmn     \order, #2
+       blt     4f
+       beq     3f
+       cmp     \dividend, \divisor
+       subhs   \dividend, \dividend, \divisor
+       mov     \divisor,  \divisor,  lsr #1
+3:     cmp     \dividend, \divisor
+       subhs   \dividend, \dividend, \divisor
+       mov     \divisor,  \divisor,  lsr #1
+4:     cmp     \dividend, \divisor
+       subhs   \dividend, \dividend, \divisor
+5:
+
+#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
+
+.endm
+/* ------------------------------------------------------------------------ */
+.macro THUMB_DIV_MOD_BODY modulo
+       @ Load the constant 0x10000000 into our work register.
+       mov     work, #1
+       lsl     work, #28
+LSYM(Loop1):
+       @ Unless the divisor is very big, shift it up in multiples of
+       @ four bits, since this is the amount of unwinding in the main
+       @ division loop.  Continue shifting until the divisor is 
+       @ larger than the dividend.
+       cmp     divisor, work
+       bhs     LSYM(Lbignum)
+       cmp     divisor, dividend
+       bhs     LSYM(Lbignum)
+       lsl     divisor, #4
+       lsl     curbit,  #4
+       b       LSYM(Loop1)
+LSYM(Lbignum):
+       @ Set work to 0x80000000
+       lsl     work, #3
+LSYM(Loop2):
+       @ For very big divisors, we must shift it a bit at a time, or
+       @ we will be in danger of overflowing.
+       cmp     divisor, work
+       bhs     LSYM(Loop3)
+       cmp     divisor, dividend
+       bhs     LSYM(Loop3)
+       lsl     divisor, #1
+       lsl     curbit,  #1
+       b       LSYM(Loop2)
+LSYM(Loop3):
+       @ Test for possible subtractions ...
+  .if \modulo
+       @ ... On the final pass, this may subtract too much from the dividend, 
+       @ so keep track of which subtractions are done, we can fix them up 
+       @ afterwards.
+       mov     overdone, #0
+       cmp     dividend, divisor
+       blo     LSYM(Lover1)
+       sub     dividend, dividend, divisor
+LSYM(Lover1):
+       lsr     work, divisor, #1
+       cmp     dividend, work
+       blo     LSYM(Lover2)
+       sub     dividend, dividend, work
+       mov     ip, curbit
+       mov     work, #1
+       ror     curbit, work
+       orr     overdone, curbit
+       mov     curbit, ip
+LSYM(Lover2):
+       lsr     work, divisor, #2
+       cmp     dividend, work
+       blo     LSYM(Lover3)
+       sub     dividend, dividend, work
+       mov     ip, curbit
+       mov     work, #2
+       ror     curbit, work
+       orr     overdone, curbit
+       mov     curbit, ip
+LSYM(Lover3):
+       lsr     work, divisor, #3
+       cmp     dividend, work
+       blo     LSYM(Lover4)
+       sub     dividend, dividend, work
+       mov     ip, curbit
+       mov     work, #3
+       ror     curbit, work
+       orr     overdone, curbit
+       mov     curbit, ip
+LSYM(Lover4):
+       mov     ip, curbit
+  .else
+       @ ... and note which bits are done in the result.  On the final pass,
+       @ this may subtract too much from the dividend, but the result will be ok,
+       @ since the "bit" will have been shifted out at the bottom.
+       cmp     dividend, divisor
+       blo     LSYM(Lover1)
+       sub     dividend, dividend, divisor
+       orr     result, result, curbit
+LSYM(Lover1):
+       lsr     work, divisor, #1
+       cmp     dividend, work
+       blo     LSYM(Lover2)
+       sub     dividend, dividend, work
+       lsr     work, curbit, #1
+       orr     result, work
+LSYM(Lover2):
+       lsr     work, divisor, #2
+       cmp     dividend, work
+       blo     LSYM(Lover3)
+       sub     dividend, dividend, work
+       lsr     work, curbit, #2
+       orr     result, work
+LSYM(Lover3):
+       lsr     work, divisor, #3
+       cmp     dividend, work
+       blo     LSYM(Lover4)
+       sub     dividend, dividend, work
+       lsr     work, curbit, #3
+       orr     result, work
+LSYM(Lover4):
+  .endif
+       
+       cmp     dividend, #0                    @ Early termination?
+       beq     LSYM(Lover5)
+       lsr     curbit,  #4                     @ No, any more bits to do?
+       beq     LSYM(Lover5)
+       lsr     divisor, #4
+       b       LSYM(Loop3)
+LSYM(Lover5):
+  .if \modulo
+       @ Any subtractions that we should not have done will be recorded in
+       @ the top three bits of "overdone".  Exactly which were not needed
+       @ are governed by the position of the bit, stored in ip.
+       mov     work, #0xe
+       lsl     work, #28
+       and     overdone, work
+       beq     LSYM(Lgot_result)
+       
+       @ If we terminated early, because dividend became zero, then the 
+       @ bit in ip will not be in the bottom nibble, and we should not
+       @ perform the additions below.  We must test for this though
+       @ (rather relying upon the TSTs to prevent the additions) since
+       @ the bit in ip could be in the top two bits which might then match
+       @ with one of the smaller RORs.
+       mov     curbit, ip
+       mov     work, #0x7
+       tst     curbit, work
+       beq     LSYM(Lgot_result)
+       
+       mov     curbit, ip
+       mov     work, #3
+       ror     curbit, work
+       tst     overdone, curbit
+       beq     LSYM(Lover6)
+       lsr     work, divisor, #3
+       add     dividend, work
+LSYM(Lover6):
+       mov     curbit, ip
+       mov     work, #2
+       ror     curbit, work
+       tst     overdone, curbit
+       beq     LSYM(Lover7)
+       lsr     work, divisor, #2
+       add     dividend, work
+LSYM(Lover7):
+       mov     curbit, ip
+       mov     work, #1
+       ror     curbit, work
+       tst     overdone, curbit
+       beq     LSYM(Lgot_result)
+       lsr     work, divisor, #1
+       add     dividend, work
+  .endif
+LSYM(Lgot_result):
+.endm  
+/* ------------------------------------------------------------------------ */
+/*             Start of the Real Functions                                 */
+/* ------------------------------------------------------------------------ */
+#ifdef L_udivsi3
+
+#if defined(__prefer_thumb__)
+
+       FUNC_START udivsi3
+       FUNC_ALIAS aeabi_uidiv udivsi3
+
+       cmp     divisor, #0
+       beq     LSYM(Ldiv0)
+LSYM(udivsi3_skip_div0_test):
+       mov     curbit, #1
+       mov     result, #0
+       
+       push    { work }
+       cmp     dividend, divisor
+       blo     LSYM(Lgot_result)
+
+       THUMB_DIV_MOD_BODY 0
+       
+       mov     r0, result
+       pop     { work }
+       RET
+
+#else /* ARM version/Thumb-2.  */
+
+       ARM_FUNC_START udivsi3
+       ARM_FUNC_ALIAS aeabi_uidiv udivsi3
+
+       /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily
+          check for division-by-zero a second time.  */
+LSYM(udivsi3_skip_div0_test):
+       subs    r2, r1, #1
+       do_it   eq
+       RETc(eq)
+       bcc     LSYM(Ldiv0)
+       cmp     r0, r1
+       bls     11f
+       tst     r1, r2
+       beq     12f
+       
+       ARM_DIV_BODY r0, r1, r2, r3
+       
+       mov     r0, r2
+       RET     
+
+11:    do_it   eq, e
+       moveq   r0, #1
+       movne   r0, #0
+       RET
+
+12:    ARM_DIV2_ORDER r1, r2
+
+       mov     r0, r0, lsr r2
+       RET
+
+#endif /* ARM version */
+
+       DIV_FUNC_END udivsi3 unsigned
+
+#if defined(__prefer_thumb__)
+FUNC_START aeabi_uidivmod
+       cmp     r1, #0
+       beq     LSYM(Ldiv0)
+       push    {r0, r1, lr}
+       bl      LSYM(udivsi3_skip_div0_test)
+       POP     {r1, r2, r3}
+       mul     r2, r0
+       sub     r1, r1, r2
+       bx      r3
+#else
+ARM_FUNC_START aeabi_uidivmod
+       cmp     r1, #0
+       beq     LSYM(Ldiv0)
+       stmfd   sp!, { r0, r1, lr }
+       bl      LSYM(udivsi3_skip_div0_test)
+       ldmfd   sp!, { r1, r2, lr }
+       mul     r3, r2, r0
+       sub     r1, r1, r3
+       RET
+#endif
+       FUNC_END aeabi_uidivmod
+       
+#endif /* L_udivsi3 */
+/* ------------------------------------------------------------------------ */
+#ifdef L_umodsi3
+
+       FUNC_START umodsi3
+
+#ifdef __thumb__
+
+       cmp     divisor, #0
+       beq     LSYM(Ldiv0)
+       mov     curbit, #1
+       cmp     dividend, divisor
+       bhs     LSYM(Lover10)
+       RET     
+
+LSYM(Lover10):
+       push    { work }
+
+       THUMB_DIV_MOD_BODY 1
+       
+       pop     { work }
+       RET
+       
+#else  /* ARM version.  */
+       
+       subs    r2, r1, #1                      @ compare divisor with 1
+       bcc     LSYM(Ldiv0)
+       cmpne   r0, r1                          @ compare dividend with divisor
+       moveq   r0, #0
+       tsthi   r1, r2                          @ see if divisor is power of 2
+       andeq   r0, r0, r2
+       RETc(ls)
+
+       ARM_MOD_BODY r0, r1, r2, r3
+       
+       RET     
+
+#endif /* ARM version.  */
+       
+       DIV_FUNC_END umodsi3 unsigned
+
+#endif /* L_umodsi3 */
+/* ------------------------------------------------------------------------ */
+#ifdef L_divsi3
+
+#if defined(__prefer_thumb__)
+
+       FUNC_START divsi3       
+       FUNC_ALIAS aeabi_idiv divsi3
+
+       cmp     divisor, #0
+       beq     LSYM(Ldiv0)
+LSYM(divsi3_skip_div0_test):
+       push    { work }
+       mov     work, dividend
+       eor     work, divisor           @ Save the sign of the result.
+       mov     ip, work
+       mov     curbit, #1
+       mov     result, #0
+       cmp     divisor, #0
+       bpl     LSYM(Lover10)
+       neg     divisor, divisor        @ Loops below use unsigned.
+LSYM(Lover10):
+       cmp     dividend, #0
+       bpl     LSYM(Lover11)
+       neg     dividend, dividend
+LSYM(Lover11):
+       cmp     dividend, divisor
+       blo     LSYM(Lgot_result)
+
+       THUMB_DIV_MOD_BODY 0
+       
+       mov     r0, result
+       mov     work, ip
+       cmp     work, #0
+       bpl     LSYM(Lover12)
+       neg     r0, r0
+LSYM(Lover12):
+       pop     { work }
+       RET
+
+#else /* ARM/Thumb-2 version.  */
+       
+       ARM_FUNC_START divsi3   
+       ARM_FUNC_ALIAS aeabi_idiv divsi3
+
+       cmp     r1, #0
+       beq     LSYM(Ldiv0)
+LSYM(divsi3_skip_div0_test):
+       eor     ip, r0, r1                      @ save the sign of the result.
+       do_it   mi
+       rsbmi   r1, r1, #0                      @ loops below use unsigned.
+       subs    r2, r1, #1                      @ division by 1 or -1 ?
+       beq     10f
+       movs    r3, r0
+       do_it   mi
+       rsbmi   r3, r0, #0                      @ positive dividend value
+       cmp     r3, r1
+       bls     11f
+       tst     r1, r2                          @ divisor is power of 2 ?
+       beq     12f
+
+       ARM_DIV_BODY r3, r1, r0, r2
+       
+       cmp     ip, #0
+       do_it   mi
+       rsbmi   r0, r0, #0
+       RET     
+
+10:    teq     ip, r0                          @ same sign ?
+       do_it   mi
+       rsbmi   r0, r0, #0
+       RET     
+
+11:    do_it   lo
+       movlo   r0, #0
+       do_it   eq,t
+       moveq   r0, ip, asr #31
+       orreq   r0, r0, #1
+       RET
+
+12:    ARM_DIV2_ORDER r1, r2
+
+       cmp     ip, #0
+       mov     r0, r3, lsr r2
+       do_it   mi
+       rsbmi   r0, r0, #0
+       RET
+
+#endif /* ARM version */
+       
+       DIV_FUNC_END divsi3 signed
+
+#if defined(__prefer_thumb__)
+FUNC_START aeabi_idivmod
+       cmp     r1, #0
+       beq     LSYM(Ldiv0)
+       push    {r0, r1, lr}
+       bl      LSYM(divsi3_skip_div0_test)
+       POP     {r1, r2, r3}
+       mul     r2, r0
+       sub     r1, r1, r2
+       bx      r3
+#else
+ARM_FUNC_START aeabi_idivmod
+       cmp     r1, #0
+       beq     LSYM(Ldiv0)
+       stmfd   sp!, { r0, r1, lr }
+       bl      LSYM(divsi3_skip_div0_test)
+       ldmfd   sp!, { r1, r2, lr }
+       mul     r3, r2, r0
+       sub     r1, r1, r3
+       RET
+#endif
+       FUNC_END aeabi_idivmod
+       
+#endif /* L_divsi3 */
+/* ------------------------------------------------------------------------ */
+#ifdef L_modsi3
+
+       FUNC_START modsi3
+
+#ifdef __thumb__
+
+       mov     curbit, #1
+       cmp     divisor, #0
+       beq     LSYM(Ldiv0)
+       bpl     LSYM(Lover10)
+       neg     divisor, divisor                @ Loops below use unsigned.
+LSYM(Lover10):
+       push    { work }
+       @ Need to save the sign of the dividend, unfortunately, we need
+       @ work later on.  Must do this after saving the original value of
+       @ the work register, because we will pop this value off first.
+       push    { dividend }
+       cmp     dividend, #0
+       bpl     LSYM(Lover11)
+       neg     dividend, dividend
+LSYM(Lover11):
+       cmp     dividend, divisor
+       blo     LSYM(Lgot_result)
+
+       THUMB_DIV_MOD_BODY 1
+               
+       pop     { work }
+       cmp     work, #0
+       bpl     LSYM(Lover12)
+       neg     dividend, dividend
+LSYM(Lover12):
+       pop     { work }
+       RET     
+
+#else /* ARM version.  */
+       
+       cmp     r1, #0
+       beq     LSYM(Ldiv0)
+       rsbmi   r1, r1, #0                      @ loops below use unsigned.
+       movs    ip, r0                          @ preserve sign of dividend
+       rsbmi   r0, r0, #0                      @ if negative make positive
+       subs    r2, r1, #1                      @ compare divisor with 1
+       cmpne   r0, r1                          @ compare dividend with divisor
+       moveq   r0, #0
+       tsthi   r1, r2                          @ see if divisor is power of 2
+       andeq   r0, r0, r2
+       bls     10f
+
+       ARM_MOD_BODY r0, r1, r2, r3
+
+10:    cmp     ip, #0
+       rsbmi   r0, r0, #0
+       RET     
+
+#endif /* ARM version */
+       
+       DIV_FUNC_END modsi3 signed
+
+#endif /* L_modsi3 */
+/* ------------------------------------------------------------------------ */
+#ifdef L_dvmd_tls
+
+#ifdef __ARM_EABI__
+       WEAK aeabi_idiv0
+       WEAK aeabi_ldiv0
+       FUNC_START aeabi_idiv0
+       FUNC_START aeabi_ldiv0
+       RET
+       FUNC_END aeabi_ldiv0
+       FUNC_END aeabi_idiv0
+#else
+       FUNC_START div0
+       RET
+       FUNC_END div0
+#endif
+       
+#endif /* L_divmodsi_tools */
+/* ------------------------------------------------------------------------ */
+#ifdef L_dvmd_lnx
+@ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
+
+/* Constant taken from <asm/signal.h>.  */
+#define SIGFPE 8
+
+#ifdef __ARM_EABI__
+       WEAK aeabi_idiv0
+       WEAK aeabi_ldiv0
+       ARM_FUNC_START aeabi_idiv0
+       ARM_FUNC_START aeabi_ldiv0
+#else
+       ARM_FUNC_START div0
+#endif
+
+       do_push {r1, lr}
+       mov     r0, #SIGFPE
+       bl      SYM(raise) __PLT__
+       RETLDM  r1
+
+#ifdef __ARM_EABI__
+       FUNC_END aeabi_ldiv0
+       FUNC_END aeabi_idiv0
+#else
+       FUNC_END div0
+#endif
+       
+#endif /* L_dvmd_lnx */
+#ifdef L_clear_cache
+#if defined __ARM_EABI__ && defined __linux__
+@ EABI GNU/Linux call to cacheflush syscall.
+       ARM_FUNC_START clear_cache
+       do_push {r7}
+#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__)
+       movw    r7, #2
+       movt    r7, #0xf
+#else
+       mov     r7, #0xf0000
+       add     r7, r7, #2
+#endif
+       mov     r2, #0
+       swi     0
+       do_pop  {r7}
+       RET
+       FUNC_END clear_cache
+#else
+#error "This is only for ARM EABI GNU/Linux"
+#endif
+#endif /* L_clear_cache */
+/* ------------------------------------------------------------------------ */
+/* Dword shift operations.  */
+/* All the following Dword shift variants rely on the fact that
+       shft xxx, Reg
+   is in fact done as
+       shft xxx, (Reg & 255)
+   so for Reg value in (32...63) and (-1...-31) we will get zero (in the
+   case of logical shifts) or the sign (for asr).  */
+
+#ifdef __ARMEB__
+#define al     r1
+#define ah     r0
+#else
+#define al     r0
+#define ah     r1
+#endif
+
+/* Prevent __aeabi double-word shifts from being produced on SymbianOS.  */
+#ifndef __symbian__
+
+#ifdef L_lshrdi3
+
+       FUNC_START lshrdi3
+       FUNC_ALIAS aeabi_llsr lshrdi3
+       
+#ifdef __thumb__
+       lsr     al, r2
+       mov     r3, ah
+       lsr     ah, r2
+       mov     ip, r3
+       sub     r2, #32
+       lsr     r3, r2
+       orr     al, r3
+       neg     r2, r2
+       mov     r3, ip
+       lsl     r3, r2
+       orr     al, r3
+       RET
+#else
+       subs    r3, r2, #32
+       rsb     ip, r2, #32
+       movmi   al, al, lsr r2
+       movpl   al, ah, lsr r3
+       orrmi   al, al, ah, lsl ip
+       mov     ah, ah, lsr r2
+       RET
+#endif
+       FUNC_END aeabi_llsr
+       FUNC_END lshrdi3
+
+#endif
+       
+#ifdef L_ashrdi3
+       
+       FUNC_START ashrdi3
+       FUNC_ALIAS aeabi_lasr ashrdi3
+       
+#ifdef __thumb__
+       lsr     al, r2
+       mov     r3, ah
+       asr     ah, r2
+       sub     r2, #32
+       @ If r2 is negative at this point the following step would OR
+       @ the sign bit into all of AL.  That's not what we want...
+       bmi     1f
+       mov     ip, r3
+       asr     r3, r2
+       orr     al, r3
+       mov     r3, ip
+1:
+       neg     r2, r2
+       lsl     r3, r2
+       orr     al, r3
+       RET
+#else
+       subs    r3, r2, #32
+       rsb     ip, r2, #32
+       movmi   al, al, lsr r2
+       movpl   al, ah, asr r3
+       orrmi   al, al, ah, lsl ip
+       mov     ah, ah, asr r2
+       RET
+#endif
+
+       FUNC_END aeabi_lasr
+       FUNC_END ashrdi3
+
+#endif
+
+#ifdef L_ashldi3
+
+       FUNC_START ashldi3
+       FUNC_ALIAS aeabi_llsl ashldi3
+       
+#ifdef __thumb__
+       lsl     ah, r2
+       mov     r3, al
+       lsl     al, r2
+       mov     ip, r3
+       sub     r2, #32
+       lsl     r3, r2
+       orr     ah, r3
+       neg     r2, r2
+       mov     r3, ip
+       lsr     r3, r2
+       orr     ah, r3
+       RET
+#else
+       subs    r3, r2, #32
+       rsb     ip, r2, #32
+       movmi   ah, ah, lsl r2
+       movpl   ah, al, lsl r3
+       orrmi   ah, ah, al, lsr ip
+       mov     al, al, lsl r2
+       RET
+#endif
+       FUNC_END aeabi_llsl
+       FUNC_END ashldi3
+
+#endif
+
+#endif /* __symbian__ */
+
+#if ((__ARM_ARCH__ > 5) && !defined(__ARM_ARCH_6M__)) \
+    || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
+    || defined(__ARM_ARCH_5TEJ__)
+#define HAVE_ARM_CLZ 1
+#endif
+
+#ifdef L_clzsi2
+#if defined(__ARM_ARCH_6M__)
+FUNC_START clzsi2
+       mov     r1, #28
+       mov     r3, #1
+       lsl     r3, r3, #16
+       cmp     r0, r3 /* 0x10000 */
+       bcc     2f
+       lsr     r0, r0, #16
+       sub     r1, r1, #16
+2:     lsr     r3, r3, #8
+       cmp     r0, r3 /* #0x100 */
+       bcc     2f
+       lsr     r0, r0, #8
+       sub     r1, r1, #8
+2:     lsr     r3, r3, #4
+       cmp     r0, r3 /* #0x10 */
+       bcc     2f
+       lsr     r0, r0, #4
+       sub     r1, r1, #4
+2:     adr     r2, 1f
+       ldrb    r0, [r2, r0]
+       add     r0, r0, r1
+       bx lr
+.align 2
+1:
+.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
+       FUNC_END clzsi2
+#else
+ARM_FUNC_START clzsi2
+# if defined(HAVE_ARM_CLZ)
+       clz     r0, r0
+       RET
+# else
+       mov     r1, #28
+       cmp     r0, #0x10000
+       do_it   cs, t
+       movcs   r0, r0, lsr #16
+       subcs   r1, r1, #16
+       cmp     r0, #0x100
+       do_it   cs, t
+       movcs   r0, r0, lsr #8
+       subcs   r1, r1, #8
+       cmp     r0, #0x10
+       do_it   cs, t
+       movcs   r0, r0, lsr #4
+       subcs   r1, r1, #4
+       adr     r2, 1f
+       ldrb    r0, [r2, r0]
+       add     r0, r0, r1
+       RET
+.align 2
+1:
+.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
+# endif /* !HAVE_ARM_CLZ */
+       FUNC_END clzsi2
+#endif
+#endif /* L_clzsi2 */
+
+#ifdef L_clzdi2
+#if !defined(HAVE_ARM_CLZ)
+
+# if defined(__ARM_ARCH_6M__)
+FUNC_START clzdi2
+       push    {r4, lr}
+# else
+ARM_FUNC_START clzdi2
+       do_push {r4, lr}
+# endif
+       cmp     xxh, #0
+       bne     1f
+# ifdef __ARMEB__
+       mov     r0, xxl
+       bl      __clzsi2
+       add     r0, r0, #32
+       b 2f
+1:
+       bl      __clzsi2
+# else
+       bl      __clzsi2
+       add     r0, r0, #32
+       b 2f
+1:
+       mov     r0, xxh
+       bl      __clzsi2
+# endif
+2:
+# if defined(__ARM_ARCH_6M__)
+       pop     {r4, pc}
+# else
+       RETLDM  r4
+# endif
+       FUNC_END clzdi2
+
+#else /* HAVE_ARM_CLZ */
+
+ARM_FUNC_START clzdi2
+       cmp     xxh, #0
+       do_it   eq, et
+       clzeq   r0, xxl
+       clzne   r0, xxh
+       addeq   r0, r0, #32
+       RET
+       FUNC_END clzdi2
+
+#endif
+#endif /* L_clzdi2 */
+
+/* ------------------------------------------------------------------------ */
+/* These next two sections are here despite the fact that they contain Thumb 
+   assembler because their presence allows interworked code to be linked even
+   when the GCC library is this one.  */
+               
+/* Do not build the interworking functions when the target architecture does 
+   not support Thumb instructions.  (This can be a multilib option).  */
+#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
+      || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
+      || __ARM_ARCH__ >= 6
+
+#if defined L_call_via_rX
+
+/* These labels & instructions are used by the Arm/Thumb interworking code. 
+   The address of function to be called is loaded into a register and then 
+   one of these labels is called via a BL instruction.  This puts the 
+   return address into the link register with the bottom bit set, and the 
+   code here switches to the correct mode before executing the function.  */
+       
+       .text
+       .align 0
+        .force_thumb
+
+.macro call_via register
+       THUMB_FUNC_START _call_via_\register
+
+       bx      \register
+       nop
+
+       SIZE    (_call_via_\register)
+.endm
+
+       call_via r0
+       call_via r1
+       call_via r2
+       call_via r3
+       call_via r4
+       call_via r5
+       call_via r6
+       call_via r7
+       call_via r8
+       call_via r9
+       call_via sl
+       call_via fp
+       call_via ip
+       call_via sp
+       call_via lr
+
+#endif /* L_call_via_rX */
+
+/* Don't bother with the old interworking routines for Thumb-2.  */
+/* ??? Maybe only omit these on "m" variants.  */
+#if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__)
+
+#if defined L_interwork_call_via_rX
+
+/* These labels & instructions are used by the Arm/Thumb interworking code,
+   when the target address is in an unknown instruction set.  The address 
+   of function to be called is loaded into a register and then one of these
+   labels is called via a BL instruction.  This puts the return address 
+   into the link register with the bottom bit set, and the code here 
+   switches to the correct mode before executing the function.  Unfortunately
+   the target code cannot be relied upon to return via a BX instruction, so
+   instead we have to store the resturn address on the stack and allow the
+   called function to return here instead.  Upon return we recover the real
+   return address and use a BX to get back to Thumb mode.
+
+   There are three variations of this code.  The first,
+   _interwork_call_via_rN(), will push the return address onto the
+   stack and pop it in _arm_return().  It should only be used if all
+   arguments are passed in registers.
+
+   The second, _interwork_r7_call_via_rN(), instead stores the return
+   address at [r7, #-4].  It is the caller's responsibility to ensure
+   that this address is valid and contains no useful data.
+
+   The third, _interwork_r11_call_via_rN(), works in the same way but
+   uses r11 instead of r7.  It is useful if the caller does not really
+   need a frame pointer.  */
+       
+       .text
+       .align 0
+
+       .code   32
+       .globl _arm_return
+LSYM(Lstart_arm_return):
+       cfi_start       LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
+       cfi_push        0, 0xe, -0x8, 0x8
+       nop     @ This nop is for the benefit of debuggers, so that
+               @ backtraces will use the correct unwind information.
+_arm_return:
+       RETLDM  unwind=LSYM(Lstart_arm_return)
+       cfi_end LSYM(Lend_arm_return)
+
+       .globl _arm_return_r7
+_arm_return_r7:
+       ldr     lr, [r7, #-4]
+       bx      lr
+
+       .globl _arm_return_r11
+_arm_return_r11:
+       ldr     lr, [r11, #-4]
+       bx      lr
+
+.macro interwork_with_frame frame, register, name, return
+       .code   16
+
+       THUMB_FUNC_START \name
+
+       bx      pc
+       nop
+
+       .code   32
+       tst     \register, #1
+       streq   lr, [\frame, #-4]
+       adreq   lr, _arm_return_\frame
+       bx      \register
+
+       SIZE    (\name)
+.endm
+
+.macro interwork register
+       .code   16
+
+       THUMB_FUNC_START _interwork_call_via_\register
+
+       bx      pc
+       nop
+
+       .code   32
+       .globl LSYM(Lchange_\register)
+LSYM(Lchange_\register):
+       tst     \register, #1
+       streq   lr, [sp, #-8]!
+       adreq   lr, _arm_return
+       bx      \register
+
+       SIZE    (_interwork_call_via_\register)
+
+       interwork_with_frame r7,\register,_interwork_r7_call_via_\register
+       interwork_with_frame r11,\register,_interwork_r11_call_via_\register
+.endm
+       
+       interwork r0
+       interwork r1
+       interwork r2
+       interwork r3
+       interwork r4
+       interwork r5
+       interwork r6
+       interwork r7
+       interwork r8
+       interwork r9
+       interwork sl
+       interwork fp
+       interwork ip
+       interwork sp
+       
+       /* The LR case has to be handled a little differently...  */
+       .code 16
+
+       THUMB_FUNC_START _interwork_call_via_lr
+
+       bx      pc
+       nop
+       
+       .code 32
+       .globl .Lchange_lr
+.Lchange_lr:
+       tst     lr, #1
+       stmeqdb r13!, {lr, pc}
+       mov     ip, lr
+       adreq   lr, _arm_return
+       bx      ip
+       
+       SIZE    (_interwork_call_via_lr)
+       
+#endif /* L_interwork_call_via_rX */
+#endif /* !__thumb2__ */
+
+/* Functions to support compact pic switch tables in thumb1 state.
+   All these routines take an index into the table in r0.  The
+   table is at LR & ~1 (but this must be rounded up in the case
+   of 32-bit entires).  They are only permitted to clobber r12
+   and r14 and r0 must be preserved on exit.  */
+#ifdef L_thumb1_case_sqi
+       
+       .text
+       .align 0
+        .force_thumb
+       .syntax unified
+       THUMB_FUNC_START __gnu_thumb1_case_sqi
+       push    {r1}
+       mov     r1, lr
+       lsrs    r1, r1, #1
+       lsls    r1, r1, #1
+       ldrsb   r1, [r1, r0]
+       lsls    r1, r1, #1
+       add     lr, lr, r1
+       pop     {r1}
+       bx      lr
+       SIZE (__gnu_thumb1_case_sqi)
+#endif
+
+#ifdef L_thumb1_case_uqi
+       
+       .text
+       .align 0
+        .force_thumb
+       .syntax unified
+       THUMB_FUNC_START __gnu_thumb1_case_uqi
+       push    {r1}
+       mov     r1, lr
+       lsrs    r1, r1, #1
+       lsls    r1, r1, #1
+       ldrb    r1, [r1, r0]
+       lsls    r1, r1, #1
+       add     lr, lr, r1
+       pop     {r1}
+       bx      lr
+       SIZE (__gnu_thumb1_case_uqi)
+#endif
+
+#ifdef L_thumb1_case_shi
+       
+       .text
+       .align 0
+        .force_thumb
+       .syntax unified
+       THUMB_FUNC_START __gnu_thumb1_case_shi
+       push    {r0, r1}
+       mov     r1, lr
+       lsrs    r1, r1, #1
+       lsls    r0, r0, #1
+       lsls    r1, r1, #1
+       ldrsh   r1, [r1, r0]
+       lsls    r1, r1, #1
+       add     lr, lr, r1
+       pop     {r0, r1}
+       bx      lr
+       SIZE (__gnu_thumb1_case_shi)
+#endif
+
+#ifdef L_thumb1_case_uhi
+       
+       .text
+       .align 0
+        .force_thumb
+       .syntax unified
+       THUMB_FUNC_START __gnu_thumb1_case_uhi
+       push    {r0, r1}
+       mov     r1, lr
+       lsrs    r1, r1, #1
+       lsls    r0, r0, #1
+       lsls    r1, r1, #1
+       ldrh    r1, [r1, r0]
+       lsls    r1, r1, #1
+       add     lr, lr, r1
+       pop     {r0, r1}
+       bx      lr
+       SIZE (__gnu_thumb1_case_uhi)
+#endif
+
+#ifdef L_thumb1_case_si
+       
+       .text
+       .align 0
+        .force_thumb
+       .syntax unified
+       THUMB_FUNC_START __gnu_thumb1_case_si
+       push    {r0, r1}
+       mov     r1, lr
+       adds.n  r1, r1, #2      /* Align to word.  */
+       lsrs    r1, r1, #2
+       lsls    r0, r0, #2
+       lsls    r1, r1, #2
+       ldr     r0, [r1, r0]
+       adds    r0, r0, r1
+       mov     lr, r0
+       pop     {r0, r1}
+       mov     pc, lr          /* We know we were called from thumb code.  */
+       SIZE (__gnu_thumb1_case_si)
+#endif
+
+#endif /* Arch supports thumb.  */
+
+#ifndef __symbian__
+#ifndef __ARM_ARCH_6M__
+#include "ieee754-df.S"
+#include "ieee754-sf.S"
+#include "bpabi.S"
+#else /* __ARM_ARCH_6M__ */
+#include "bpabi-v6m.S"
+#endif /* __ARM_ARCH_6M__ */
+#endif /* !__symbian__ */
diff --git a/libgcc/config/arm/libunwind.S b/libgcc/config/arm/libunwind.S

index a3a19daab4b2e4fc53ca3f2e8000b379b5b6d727..8166cd86e478a16d707ee81668ef0c53931f4ea4 100644 (file)
--- a/libgcc/config/arm/libunwind.S
+++ b/libgcc/config/arm/libunwind.S
@@ -40,7 +40,7 @@
  
  #ifndef __symbian__
  
-#include "config/arm/lib1funcs.asm"
+#include "lib1funcs.S"
  
  .macro UNPREFIX name
         .global SYM (\name)
diff --git a/libgcc/config/arm/t-arm b/libgcc/config/arm/t-arm

new file mode 100644 (file)

index 0000000..4e17e99
--- /dev/null
+++ b/libgcc/config/arm/t-arm
@@ -0,0 +1,3 @@
+LIB1ASMSRC = arm/lib1funcs.S
+LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi _thumb1_case_shi \
+       _thumb1_case_uhi _thumb1_case_si
diff --git a/libgcc/config/arm/t-bpabi b/libgcc/config/arm/t-bpabi

index ebb2f9fd85d51404a4c4edf541ea28014a18f1d7..8787285ab1fbc71bc9665a6d6fb8db0fa8296d23 100644 (file)
--- a/libgcc/config/arm/t-bpabi
+++ b/libgcc/config/arm/t-bpabi
@@ -1,3 +1,6 @@
+# Add the bpabi.S functions.
+LIB1ASMFUNCS += _aeabi_lcmp _aeabi_ulcmp _aeabi_ldivmod _aeabi_uldivmod
+
  LIB2ADDEH = $(srcdir)/config/arm/unwind-arm.c \
    $(srcdir)/config/arm/libunwind.S \
    $(srcdir)/config/arm/pr-support.c $(srcdir)/unwind-c.c
diff --git a/libgcc/config/arm/t-elf b/libgcc/config/arm/t-elf

new file mode 100644 (file)

index 0000000..fab32e4
--- /dev/null
+++ b/libgcc/config/arm/t-elf
@@ -0,0 +1,13 @@
+# For most CPUs we have an assembly soft-float implementations.
+# However this is not true for ARMv6M.  Here we want to use the soft-fp C
+# implementation.  The soft-fp code is only build for ARMv6M.  This pulls
+# in the asm implementation for other CPUs.
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func \
+       _call_via_rX _interwork_call_via_rX \
+       _lshrdi3 _ashrdi3 _ashldi3 \
+       _arm_negdf2 _arm_addsubdf3 _arm_muldivdf3 _arm_cmpdf2 _arm_unorddf2 \
+       _arm_fixdfsi _arm_fixunsdfsi \
+       _arm_truncdfsf2 _arm_negsf2 _arm_addsubsf3 _arm_muldivsf3 \
+       _arm_cmpsf2 _arm_unordsf2 _arm_fixsfsi _arm_fixunssfsi \
+       _arm_floatdidf _arm_floatdisf _arm_floatundidf _arm_floatundisf \
+       _clzsi2 _clzdi2 
diff --git a/libgcc/config/arm/t-linux b/libgcc/config/arm/t-linux

new file mode 100644 (file)

index 0000000..a154f77
--- /dev/null
+++ b/libgcc/config/arm/t-linux
@@ -0,0 +1,3 @@
+LIB1ASMSRC = arm/lib1funcs.S
+LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx _clzsi2 _clzdi2 \
+       _arm_addsubdf3 _arm_addsubsf3
diff --git a/libgcc/config/arm/t-linux-eabi b/libgcc/config/arm/t-linux-eabi

new file mode 100644 (file)

index 0000000..dfc9197
--- /dev/null
+++ b/libgcc/config/arm/t-linux-eabi
@@ -0,0 +1,2 @@
+# Use a version of div0 which raises SIGFPE, and a special __clear_cache.
+LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx _clear_cache
diff --git a/libgcc/config/arm/t-strongarm-elf b/libgcc/config/arm/t-strongarm-elf

new file mode 100644 (file)

index 0000000..cd9f966
--- /dev/null
+++ b/libgcc/config/arm/t-strongarm-elf
@@ -0,0 +1 @@
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _clzsi2 _clzdi2
diff --git a/libgcc/config/arm/t-symbian b/libgcc/config/arm/t-symbian

index 6788d5f40b3279588b284c792eecd3265f69eef0..1989696c8a3e4a999522fd5213e411d0adf0509a 100644 (file)
--- a/libgcc/config/arm/t-symbian
+++ b/libgcc/config/arm/t-symbian
@@ -1,2 +1,16 @@
+LIB1ASMFUNCS += _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
+
+# These functions have __aeabi equivalents and will never be called by GCC.  
+# By putting them in LIB1ASMFUNCS, we avoid the standard libgcc2.c code being
+# used -- and we make sure that definitions are not available in lib1funcs.S,
+# either, so they end up undefined.
+LIB1ASMFUNCS += \
+       _ashldi3 _ashrdi3 _divdi3 _floatdidf _udivmoddi4 _umoddi3 \
+       _udivdi3 _lshrdi3 _moddi3 _muldi3 _negdi2 _cmpdi2 \
+       _fixdfdi _fixsfdi _fixunsdfdi _fixunssfdi _floatdisf \
+       _negdf2 _addsubdf3 _muldivdf3 _cmpdf2 _unorddf2 _fixdfsi _fixunsdfsi \
+       _truncdfsf2 _negsf2 _addsubsf3 _muldivsf3 _cmpsf2 _unordsf2 \
+       _fixsfsi _fixunssfsi
+
  # Include the gcc personality routine
  LIB2ADDEH = $(srcdir)/unwind-c.c $(srcdir)/config/arm/pr-support.c
diff --git a/libgcc/config/arm/t-vxworks b/libgcc/config/arm/t-vxworks

new file mode 100644 (file)

index 0000000..70ccdc1
--- /dev/null
+++ b/libgcc/config/arm/t-vxworks
@@ -0,0 +1 @@
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
diff --git a/libgcc/config/arm/t-wince-pe b/libgcc/config/arm/t-wince-pe

new file mode 100644 (file)

index 0000000..33ea969
--- /dev/null
+++ b/libgcc/config/arm/t-wince-pe
@@ -0,0 +1 @@
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
diff --git a/libgcc/config/avr/lib1funcs.S b/libgcc/config/avr/lib1funcs.S

new file mode 100644 (file)

index 0000000..8c369c9
--- /dev/null
+++ b/libgcc/config/avr/lib1funcs.S
@@ -0,0 +1,1533 @@
+/*  -*- Mode: Asm -*-  */
+/* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009
+   Free Software Foundation, Inc.
+   Contributed by Denis Chertykov <chertykov@gmail.com>
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define __zero_reg__ r1
+#define __tmp_reg__ r0
+#define __SREG__ 0x3f
+#define __SP_H__ 0x3e
+#define __SP_L__ 0x3d
+#define __RAMPZ__ 0x3B
+#define __EIND__  0x3C
+
+/* Most of the functions here are called directly from avr.md
+   patterns, instead of using the standard libcall mechanisms.
+   This can make better code because GCC knows exactly which
+   of the call-used registers (not all of them) are clobbered.  */
+
+/* FIXME:  At present, there is no SORT directive in the linker
+           script so that we must not assume that different modules
+           in the same input section like .libgcc.text.mul will be
+           located close together.  Therefore, we cannot use
+           RCALL/RJMP to call a function like __udivmodhi4 from
+           __divmodhi4 and have to use lengthy XCALL/XJMP even
+           though they are in the same input section and all same
+           input sections together are small enough to reach every
+           location with a RCALL/RJMP instruction.  */
+
+       .macro  mov_l  r_dest, r_src
+#if defined (__AVR_HAVE_MOVW__)
+       movw    \r_dest, \r_src
+#else
+       mov     \r_dest, \r_src
+#endif
+       .endm
+
+       .macro  mov_h  r_dest, r_src
+#if defined (__AVR_HAVE_MOVW__)
+       ; empty
+#else
+       mov     \r_dest, \r_src
+#endif
+       .endm
+
+#if defined (__AVR_HAVE_JMP_CALL__)
+#define XCALL call
+#define XJMP  jmp
+#else
+#define XCALL rcall
+#define XJMP  rjmp
+#endif
+
+.macro DEFUN name
+.global \name
+.func \name
+\name:
+.endm
+
+.macro ENDF name
+.size \name, .-\name
+.endfunc
+.endm
+
+\f
+.section .text.libgcc.mul, "ax", @progbits
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+/* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
+#if !defined (__AVR_HAVE_MUL__)
+/*******************************************************
+    Multiplication  8 x 8  without MUL
+*******************************************************/
+#if defined (L_mulqi3)
+
+#define        r_arg2  r22             /* multiplicand */
+#define        r_arg1  r24             /* multiplier */
+#define r_res  __tmp_reg__     /* result */
+
+DEFUN __mulqi3
+       clr     r_res           ; clear result
+__mulqi3_loop:
+       sbrc    r_arg1,0
+       add     r_res,r_arg2
+       add     r_arg2,r_arg2   ; shift multiplicand
+       breq    __mulqi3_exit   ; while multiplicand != 0
+       lsr     r_arg1          ; 
+       brne    __mulqi3_loop   ; exit if multiplier = 0
+__mulqi3_exit: 
+       mov     r_arg1,r_res    ; result to return register
+       ret
+ENDF __mulqi3
+
+#undef r_arg2  
+#undef r_arg1  
+#undef r_res   
+       
+#endif         /* defined (L_mulqi3) */
+
+#if defined (L_mulqihi3)
+DEFUN __mulqihi3
+       clr     r25
+       sbrc    r24, 7
+       dec     r25
+       clr     r23
+       sbrc    r22, 7
+       dec     r22
+       XJMP    __mulhi3
+ENDF __mulqihi3:
+#endif /* defined (L_mulqihi3) */
+
+#if defined (L_umulqihi3)
+DEFUN __umulqihi3
+       clr     r25
+       clr     r23
+       XJMP    __mulhi3
+ENDF __umulqihi3
+#endif /* defined (L_umulqihi3) */
+
+/*******************************************************
+    Multiplication  16 x 16  without MUL
+*******************************************************/
+#if defined (L_mulhi3)
+#define        r_arg1L r24             /* multiplier Low */
+#define        r_arg1H r25             /* multiplier High */
+#define        r_arg2L r22             /* multiplicand Low */
+#define        r_arg2H r23             /* multiplicand High */
+#define r_resL __tmp_reg__     /* result Low */
+#define r_resH  r21            /* result High */
+
+DEFUN __mulhi3
+       clr     r_resH          ; clear result
+       clr     r_resL          ; clear result
+__mulhi3_loop:
+       sbrs    r_arg1L,0
+       rjmp    __mulhi3_skip1
+       add     r_resL,r_arg2L  ; result + multiplicand
+       adc     r_resH,r_arg2H
+__mulhi3_skip1:        
+       add     r_arg2L,r_arg2L ; shift multiplicand
+       adc     r_arg2H,r_arg2H
+
+       cp      r_arg2L,__zero_reg__
+       cpc     r_arg2H,__zero_reg__
+       breq    __mulhi3_exit   ; while multiplicand != 0
+
+       lsr     r_arg1H         ; gets LSB of multiplier
+       ror     r_arg1L
+       sbiw    r_arg1L,0
+       brne    __mulhi3_loop   ; exit if multiplier = 0
+__mulhi3_exit:
+       mov     r_arg1H,r_resH  ; result to return register
+       mov     r_arg1L,r_resL
+       ret
+ENDF __mulhi3
+
+#undef r_arg1L
+#undef r_arg1H
+#undef r_arg2L
+#undef r_arg2H
+#undef r_resL  
+#undef r_resH 
+
+#endif /* defined (L_mulhi3) */
+
+/*******************************************************
+    Widening Multiplication  32 = 16 x 16  without MUL
+*******************************************************/
+
+#if defined (L_mulhisi3)
+DEFUN __mulhisi3
+;;; FIXME: This is dead code (noone calls it)
+    mov_l   r18, r24
+    mov_h   r19, r25
+    clr     r24
+    sbrc    r23, 7
+    dec     r24
+    mov     r25, r24
+    clr     r20
+    sbrc    r19, 7
+    dec     r20
+    mov     r21, r20
+    XJMP    __mulsi3
+ENDF __mulhisi3
+#endif /* defined (L_mulhisi3) */
+
+#if defined (L_umulhisi3)
+DEFUN __umulhisi3
+;;; FIXME: This is dead code (noone calls it)
+    mov_l   r18, r24
+    mov_h   r19, r25
+    clr     r24
+    clr     r25
+    mov_l   r20, r24
+    mov_h   r21, r25
+    XJMP    __mulsi3
+ENDF __umulhisi3
+#endif /* defined (L_umulhisi3) */
+
+#if defined (L_mulsi3)
+/*******************************************************
+    Multiplication  32 x 32  without MUL
+*******************************************************/
+#define r_arg1L  r22           /* multiplier Low */
+#define r_arg1H  r23
+#define        r_arg1HL r24
+#define        r_arg1HH r25            /* multiplier High */
+
+#define        r_arg2L  r18            /* multiplicand Low */
+#define        r_arg2H  r19    
+#define        r_arg2HL r20
+#define        r_arg2HH r21            /* multiplicand High */
+       
+#define r_resL  r26            /* result Low */
+#define r_resH   r27
+#define r_resHL         r30
+#define r_resHH  r31           /* result High */
+
+DEFUN __mulsi3
+       clr     r_resHH         ; clear result
+       clr     r_resHL         ; clear result
+       clr     r_resH          ; clear result
+       clr     r_resL          ; clear result
+__mulsi3_loop:
+       sbrs    r_arg1L,0
+       rjmp    __mulsi3_skip1
+       add     r_resL,r_arg2L          ; result + multiplicand
+       adc     r_resH,r_arg2H
+       adc     r_resHL,r_arg2HL
+       adc     r_resHH,r_arg2HH
+__mulsi3_skip1:
+       add     r_arg2L,r_arg2L         ; shift multiplicand
+       adc     r_arg2H,r_arg2H
+       adc     r_arg2HL,r_arg2HL
+       adc     r_arg2HH,r_arg2HH
+       
+       lsr     r_arg1HH        ; gets LSB of multiplier
+       ror     r_arg1HL
+       ror     r_arg1H
+       ror     r_arg1L
+       brne    __mulsi3_loop
+       sbiw    r_arg1HL,0
+       cpc     r_arg1H,r_arg1L
+       brne    __mulsi3_loop           ; exit if multiplier = 0
+__mulsi3_exit:
+       mov_h   r_arg1HH,r_resHH        ; result to return register
+       mov_l   r_arg1HL,r_resHL
+       mov_h   r_arg1H,r_resH
+       mov_l   r_arg1L,r_resL
+       ret
+ENDF __mulsi3
+
+#undef r_arg1L 
+#undef r_arg1H 
+#undef r_arg1HL
+#undef r_arg1HH
+             
+#undef r_arg2L 
+#undef r_arg2H 
+#undef r_arg2HL
+#undef r_arg2HH
+             
+#undef r_resL  
+#undef r_resH  
+#undef r_resHL 
+#undef r_resHH 
+
+#endif /* defined (L_mulsi3) */
+
+#endif /* !defined (__AVR_HAVE_MUL__) */
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+\f
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+#if defined (__AVR_HAVE_MUL__)    
+#define A0 26
+#define B0 18
+#define C0 22
+
+#define A1 A0+1
+
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+
+/*******************************************************
+    Widening Multiplication  32 = 16 x 16
+*******************************************************/
+                              
+#if defined (L_mulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
+;;; C3:C0   = (signed long) A1:A0   * (signed long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulhisi3
+    XCALL   __umulhisi3
+    ;; Sign-extend B
+    tst     B1
+    brpl    1f
+    sub     C2, A0
+    sbc     C3, A1
+1:  ;; Sign-extend A
+    XJMP __usmulhisi3_tail
+ENDF __mulhisi3
+#endif /* L_mulhisi3 */
+
+#if defined (L_usmulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0   = (signed long) A1:A0   * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __usmulhisi3
+    XCALL   __umulhisi3
+    ;; FALLTHRU
+ENDF __usmulhisi3
+
+DEFUN __usmulhisi3_tail
+    ;; Sign-extend A
+    sbrs    A1, 7
+    ret
+    sub     C2, B0
+    sbc     C3, B1
+    ret
+ENDF __usmulhisi3_tail
+#endif /* L_usmulhisi3 */
+
+#if defined (L_umulhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0   = (unsigned long) A1:A0   * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __umulhisi3
+    mul     A0, B0
+    movw    C0, r0
+    mul     A1, B1
+    movw    C2, r0
+    mul     A0, B1
+    rcall   1f
+    mul     A1, B0
+1:  add     C1, r0
+    adc     C2, r1
+    clr     __zero_reg__
+    adc     C3, __zero_reg__
+    ret
+ENDF __umulhisi3
+#endif /* L_umulhisi3 */
+
+/*******************************************************
+    Widening Multiplication  32 = 16 x 32
+*******************************************************/
+
+#if defined (L_mulshisi3)
+;;; R25:R22 = (signed long) R27:R26 * R21:R18
+;;; (C3:C0) = (signed long) A1:A0   * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulshisi3
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+    ;; Some cores have problem skipping 2-word instruction
+    tst     A1
+    brmi    __mulohisi3
+#else
+    sbrs    A1, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+    XJMP    __muluhisi3
+    ;; FALLTHRU
+ENDF __mulshisi3
+    
+;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
+;;; (C3:C0) = (one-extended long) A1:A0   * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulohisi3
+    XCALL   __muluhisi3
+    ;; One-extend R27:R26 (A1:A0)
+    sub     C2, B0
+    sbc     C3, B1
+    ret
+ENDF __mulohisi3
+#endif /* L_mulshisi3 */
+
+#if defined (L_muluhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
+;;; (C3:C0) = (unsigned long) A1:A0   * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __muluhisi3
+    XCALL   __umulhisi3
+    mul     A0, B3
+    add     C3, r0
+    mul     A1, B2
+    add     C3, r0
+    mul     A0, B2
+    add     C2, r0
+    adc     C3, r1
+    clr     __zero_reg__
+    ret
+ENDF __muluhisi3
+#endif /* L_muluhisi3 */
+
+/*******************************************************
+    Multiplication  32 x 32
+*******************************************************/
+
+#if defined (L_mulsi3)
+;;; R25:R22 = R25:R22 * R21:R18
+;;; (C3:C0) = C3:C0   * B3:B0
+;;; Clobbers: R26, R27, __tmp_reg__
+DEFUN __mulsi3
+    movw    A0, C0
+    push    C2
+    push    C3
+    XCALL   __muluhisi3
+    pop     A1
+    pop     A0
+    ;; A1:A0 now contains the high word of A
+    mul     A0, B0
+    add     C2, r0
+    adc     C3, r1
+    mul     A0, B1
+    add     C3, r0
+    mul     A1, B0
+    add     C3, r0
+    clr     __zero_reg__
+    ret
+ENDF __mulsi3
+#endif /* L_mulsi3 */
+
+#undef A0
+#undef A1
+
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+
+#endif /* __AVR_HAVE_MUL__ */
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+       
+\f
+.section .text.libgcc.div, "ax", @progbits
+
+/*******************************************************
+       Division 8 / 8 => (result + remainder)
+*******************************************************/
+#define        r_rem   r25     /* remainder */
+#define        r_arg1  r24     /* dividend, quotient */
+#define        r_arg2  r22     /* divisor */
+#define        r_cnt   r23     /* loop count */
+
+#if defined (L_udivmodqi4)
+DEFUN __udivmodqi4
+       sub     r_rem,r_rem     ; clear remainder and carry
+       ldi     r_cnt,9         ; init loop counter
+       rjmp    __udivmodqi4_ep ; jump to entry point
+__udivmodqi4_loop:
+       rol     r_rem           ; shift dividend into remainder
+       cp      r_rem,r_arg2    ; compare remainder & divisor
+       brcs    __udivmodqi4_ep ; remainder <= divisor
+       sub     r_rem,r_arg2    ; restore remainder
+__udivmodqi4_ep:
+       rol     r_arg1          ; shift dividend (with CARRY)
+       dec     r_cnt           ; decrement loop counter
+       brne    __udivmodqi4_loop
+       com     r_arg1          ; complement result 
+                               ; because C flag was complemented in loop
+       ret
+ENDF __udivmodqi4
+#endif /* defined (L_udivmodqi4) */
+
+#if defined (L_divmodqi4)
+DEFUN __divmodqi4
+        bst     r_arg1,7       ; store sign of dividend
+        mov     __tmp_reg__,r_arg1
+        eor     __tmp_reg__,r_arg2; r0.7 is sign of result
+        sbrc   r_arg1,7
+       neg     r_arg1          ; dividend negative : negate
+        sbrc   r_arg2,7
+       neg     r_arg2          ; divisor negative : negate
+       XCALL   __udivmodqi4    ; do the unsigned div/mod
+       brtc    __divmodqi4_1
+       neg     r_rem           ; correct remainder sign
+__divmodqi4_1:
+       sbrc    __tmp_reg__,7
+       neg     r_arg1          ; correct result sign
+__divmodqi4_exit:
+       ret
+ENDF __divmodqi4
+#endif /* defined (L_divmodqi4) */
+
+#undef r_rem
+#undef r_arg1
+#undef r_arg2
+#undef r_cnt
+       
+               
+/*******************************************************
+       Division 16 / 16 => (result + remainder)
+*******************************************************/
+#define        r_remL  r26     /* remainder Low */
+#define        r_remH  r27     /* remainder High */
+
+/* return: remainder */
+#define        r_arg1L r24     /* dividend Low */
+#define        r_arg1H r25     /* dividend High */
+
+/* return: quotient */
+#define        r_arg2L r22     /* divisor Low */
+#define        r_arg2H r23     /* divisor High */
+       
+#define        r_cnt   r21     /* loop count */
+
+#if defined (L_udivmodhi4)
+DEFUN __udivmodhi4
+       sub     r_remL,r_remL
+       sub     r_remH,r_remH   ; clear remainder and carry
+       ldi     r_cnt,17        ; init loop counter
+       rjmp    __udivmodhi4_ep ; jump to entry point
+__udivmodhi4_loop:
+        rol    r_remL          ; shift dividend into remainder
+       rol     r_remH
+        cp     r_remL,r_arg2L  ; compare remainder & divisor
+       cpc     r_remH,r_arg2H
+        brcs   __udivmodhi4_ep ; remainder < divisor
+        sub    r_remL,r_arg2L  ; restore remainder
+        sbc    r_remH,r_arg2H
+__udivmodhi4_ep:
+        rol    r_arg1L         ; shift dividend (with CARRY)
+        rol    r_arg1H
+        dec    r_cnt           ; decrement loop counter
+        brne   __udivmodhi4_loop
+       com     r_arg1L
+       com     r_arg1H
+; div/mod results to return registers, as for the div() function
+       mov_l   r_arg2L, r_arg1L        ; quotient
+       mov_h   r_arg2H, r_arg1H
+       mov_l   r_arg1L, r_remL         ; remainder
+       mov_h   r_arg1H, r_remH
+       ret
+ENDF __udivmodhi4
+#endif /* defined (L_udivmodhi4) */
+
+#if defined (L_divmodhi4)
+DEFUN __divmodhi4
+       .global _div
+_div:
+        bst     r_arg1H,7      ; store sign of dividend
+        mov     __tmp_reg__,r_arg1H
+        eor     __tmp_reg__,r_arg2H   ; r0.7 is sign of result
+       rcall   __divmodhi4_neg1 ; dividend negative : negate
+       sbrc    r_arg2H,7
+       rcall   __divmodhi4_neg2 ; divisor negative : negate
+       XCALL   __udivmodhi4    ; do the unsigned div/mod
+       rcall   __divmodhi4_neg1 ; correct remainder sign
+       tst     __tmp_reg__
+       brpl    __divmodhi4_exit
+__divmodhi4_neg2:
+       com     r_arg2H
+       neg     r_arg2L         ; correct divisor/result sign
+       sbci    r_arg2H,0xff
+__divmodhi4_exit:
+       ret
+__divmodhi4_neg1:
+       brtc    __divmodhi4_exit
+       com     r_arg1H
+       neg     r_arg1L         ; correct dividend/remainder sign
+       sbci    r_arg1H,0xff
+       ret
+ENDF __divmodhi4
+#endif /* defined (L_divmodhi4) */
+
+#undef r_remH  
+#undef r_remL  
+             
+#undef r_arg1H 
+#undef r_arg1L 
+             
+#undef r_arg2H 
+#undef r_arg2L 
+               
+#undef r_cnt           
+       
+/*******************************************************
+       Division 32 / 32 => (result + remainder)
+*******************************************************/
+#define        r_remHH r31     /* remainder High */
+#define        r_remHL r30
+#define        r_remH  r27
+#define        r_remL  r26     /* remainder Low */
+
+/* return: remainder */
+#define        r_arg1HH r25    /* dividend High */
+#define        r_arg1HL r24
+#define        r_arg1H  r23
+#define        r_arg1L  r22    /* dividend Low */
+
+/* return: quotient */
+#define        r_arg2HH r21    /* divisor High */
+#define        r_arg2HL r20
+#define        r_arg2H  r19
+#define        r_arg2L  r18    /* divisor Low */
+       
+#define        r_cnt __zero_reg__  /* loop count (0 after the loop!) */
+
+#if defined (L_udivmodsi4)
+DEFUN __udivmodsi4
+       ldi     r_remL, 33      ; init loop counter
+       mov     r_cnt, r_remL
+       sub     r_remL,r_remL
+       sub     r_remH,r_remH   ; clear remainder and carry
+       mov_l   r_remHL, r_remL
+       mov_h   r_remHH, r_remH
+       rjmp    __udivmodsi4_ep ; jump to entry point
+__udivmodsi4_loop:
+        rol    r_remL          ; shift dividend into remainder
+       rol     r_remH
+       rol     r_remHL
+       rol     r_remHH
+        cp     r_remL,r_arg2L  ; compare remainder & divisor
+       cpc     r_remH,r_arg2H
+       cpc     r_remHL,r_arg2HL
+       cpc     r_remHH,r_arg2HH
+       brcs    __udivmodsi4_ep ; remainder <= divisor
+        sub    r_remL,r_arg2L  ; restore remainder
+        sbc    r_remH,r_arg2H
+        sbc    r_remHL,r_arg2HL
+        sbc    r_remHH,r_arg2HH
+__udivmodsi4_ep:
+        rol    r_arg1L         ; shift dividend (with CARRY)
+        rol    r_arg1H
+        rol    r_arg1HL
+        rol    r_arg1HH
+        dec    r_cnt           ; decrement loop counter
+        brne   __udivmodsi4_loop
+                               ; __zero_reg__ now restored (r_cnt == 0)
+       com     r_arg1L
+       com     r_arg1H
+       com     r_arg1HL
+       com     r_arg1HH
+; div/mod results to return registers, as for the ldiv() function
+       mov_l   r_arg2L,  r_arg1L       ; quotient
+       mov_h   r_arg2H,  r_arg1H
+       mov_l   r_arg2HL, r_arg1HL
+       mov_h   r_arg2HH, r_arg1HH
+       mov_l   r_arg1L,  r_remL        ; remainder
+       mov_h   r_arg1H,  r_remH
+       mov_l   r_arg1HL, r_remHL
+       mov_h   r_arg1HH, r_remHH
+       ret
+ENDF __udivmodsi4
+#endif /* defined (L_udivmodsi4) */
+
+#if defined (L_divmodsi4)
+DEFUN __divmodsi4
+        bst     r_arg1HH,7     ; store sign of dividend
+        mov     __tmp_reg__,r_arg1HH
+        eor     __tmp_reg__,r_arg2HH   ; r0.7 is sign of result
+       rcall   __divmodsi4_neg1 ; dividend negative : negate
+       sbrc    r_arg2HH,7
+       rcall   __divmodsi4_neg2 ; divisor negative : negate
+       XCALL   __udivmodsi4    ; do the unsigned div/mod
+       rcall   __divmodsi4_neg1 ; correct remainder sign
+       rol     __tmp_reg__
+       brcc    __divmodsi4_exit
+__divmodsi4_neg2:
+       com     r_arg2HH
+       com     r_arg2HL
+       com     r_arg2H
+       neg     r_arg2L         ; correct divisor/quotient sign
+       sbci    r_arg2H,0xff
+       sbci    r_arg2HL,0xff
+       sbci    r_arg2HH,0xff
+__divmodsi4_exit:
+       ret
+__divmodsi4_neg1:
+       brtc    __divmodsi4_exit
+       com     r_arg1HH
+       com     r_arg1HL
+       com     r_arg1H
+       neg     r_arg1L         ; correct dividend/remainder sign
+       sbci    r_arg1H, 0xff
+       sbci    r_arg1HL,0xff
+       sbci    r_arg1HH,0xff
+       ret
+ENDF __divmodsi4
+#endif /* defined (L_divmodsi4) */
+
+\f
+.section .text.libgcc.prologue, "ax", @progbits
+    
+/**********************************
+ * This is a prologue subroutine
+ **********************************/
+#if defined (L_prologue)
+
+DEFUN __prologue_saves__
+       push r2
+       push r3
+       push r4
+       push r5
+       push r6
+       push r7
+       push r8
+       push r9
+       push r10
+       push r11
+       push r12
+       push r13
+       push r14
+       push r15
+       push r16
+       push r17
+       push r28
+       push r29
+       in      r28,__SP_L__
+       in      r29,__SP_H__
+       sub     r28,r26
+       sbc     r29,r27
+       in      __tmp_reg__,__SREG__
+       cli
+       out     __SP_H__,r29
+       out     __SREG__,__tmp_reg__
+       out     __SP_L__,r28
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+       eijmp
+#else
+       ijmp
+#endif
+
+ENDF __prologue_saves__
+#endif /* defined (L_prologue) */
+
+/*
+ * This is an epilogue subroutine
+ */
+#if defined (L_epilogue)
+
+DEFUN __epilogue_restores__
+       ldd     r2,Y+18
+       ldd     r3,Y+17
+       ldd     r4,Y+16
+       ldd     r5,Y+15
+       ldd     r6,Y+14
+       ldd     r7,Y+13
+       ldd     r8,Y+12
+       ldd     r9,Y+11
+       ldd     r10,Y+10
+       ldd     r11,Y+9
+       ldd     r12,Y+8
+       ldd     r13,Y+7
+       ldd     r14,Y+6
+       ldd     r15,Y+5
+       ldd     r16,Y+4
+       ldd     r17,Y+3
+       ldd     r26,Y+2
+       ldd     r27,Y+1
+       add     r28,r30
+       adc     r29,__zero_reg__
+       in      __tmp_reg__,__SREG__
+       cli
+       out     __SP_H__,r29
+       out     __SREG__,__tmp_reg__
+       out     __SP_L__,r28
+       mov_l   r28, r26
+       mov_h   r29, r27
+       ret
+ENDF __epilogue_restores__
+#endif /* defined (L_epilogue) */
+
+#ifdef L_exit
+       .section .fini9,"ax",@progbits
+DEFUN _exit
+       .weak   exit
+exit:
+ENDF _exit
+
+       /* Code from .fini8 ... .fini1 sections inserted by ld script.  */
+
+       .section .fini0,"ax",@progbits
+       cli
+__stop_program:
+       rjmp    __stop_program
+#endif /* defined (L_exit) */
+
+#ifdef L_cleanup
+       .weak   _cleanup
+       .func   _cleanup
+_cleanup:
+       ret
+.endfunc
+#endif /* defined (L_cleanup) */
+
+\f
+.section .text.libgcc, "ax", @progbits
+    
+#ifdef L_tablejump
+DEFUN __tablejump2__
+       lsl     r30
+       rol     r31
+    ;; FALLTHRU
+ENDF __tablejump2__
+
+DEFUN __tablejump__
+#if defined (__AVR_HAVE_LPMX__)
+       lpm __tmp_reg__, Z+
+       lpm r31, Z
+       mov r30, __tmp_reg__
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+       eijmp
+#else
+       ijmp
+#endif
+
+#else /* !HAVE_LPMX */
+       lpm
+       adiw r30, 1
+       push r0
+       lpm
+       push r0
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+       in   __tmp_reg__, __EIND__
+       push __tmp_reg__
+#endif
+       ret
+#endif /* !HAVE_LPMX */
+ENDF __tablejump__
+#endif /* defined (L_tablejump) */
+
+#ifdef L_copy_data
+       .section .init4,"ax",@progbits
+DEFUN __do_copy_data
+#if defined(__AVR_HAVE_ELPMX__)
+       ldi     r17, hi8(__data_end)
+       ldi     r26, lo8(__data_start)
+       ldi     r27, hi8(__data_start)
+       ldi     r30, lo8(__data_load_start)
+       ldi     r31, hi8(__data_load_start)
+       ldi     r16, hh8(__data_load_start)
+       out     __RAMPZ__, r16
+       rjmp    .L__do_copy_data_start
+.L__do_copy_data_loop:
+       elpm    r0, Z+
+       st      X+, r0
+.L__do_copy_data_start:
+       cpi     r26, lo8(__data_end)
+       cpc     r27, r17
+       brne    .L__do_copy_data_loop
+#elif  !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
+       ldi     r17, hi8(__data_end)
+       ldi     r26, lo8(__data_start)
+       ldi     r27, hi8(__data_start)
+       ldi     r30, lo8(__data_load_start)
+       ldi     r31, hi8(__data_load_start)
+       ldi     r16, hh8(__data_load_start - 0x10000)
+.L__do_copy_data_carry:
+       inc     r16
+       out     __RAMPZ__, r16
+       rjmp    .L__do_copy_data_start
+.L__do_copy_data_loop:
+       elpm
+       st      X+, r0
+       adiw    r30, 1
+       brcs    .L__do_copy_data_carry
+.L__do_copy_data_start:
+       cpi     r26, lo8(__data_end)
+       cpc     r27, r17
+       brne    .L__do_copy_data_loop
+#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
+       ldi     r17, hi8(__data_end)
+       ldi     r26, lo8(__data_start)
+       ldi     r27, hi8(__data_start)
+       ldi     r30, lo8(__data_load_start)
+       ldi     r31, hi8(__data_load_start)
+       rjmp    .L__do_copy_data_start
+.L__do_copy_data_loop:
+#if defined (__AVR_HAVE_LPMX__)
+       lpm     r0, Z+
+#else
+       lpm
+       adiw    r30, 1
+#endif
+       st      X+, r0
+.L__do_copy_data_start:
+       cpi     r26, lo8(__data_end)
+       cpc     r27, r17
+       brne    .L__do_copy_data_loop
+#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
+ENDF __do_copy_data
+#endif /* L_copy_data */
+
+/* __do_clear_bss is only necessary if there is anything in .bss section.  */
+
+#ifdef L_clear_bss
+       .section .init4,"ax",@progbits
+DEFUN __do_clear_bss
+       ldi     r17, hi8(__bss_end)
+       ldi     r26, lo8(__bss_start)
+       ldi     r27, hi8(__bss_start)
+       rjmp    .do_clear_bss_start
+.do_clear_bss_loop:
+       st      X+, __zero_reg__
+.do_clear_bss_start:
+       cpi     r26, lo8(__bss_end)
+       cpc     r27, r17
+       brne    .do_clear_bss_loop
+ENDF __do_clear_bss
+#endif /* L_clear_bss */
+
+/* __do_global_ctors and __do_global_dtors are only necessary
+   if there are any constructors/destructors.  */
+
+#ifdef L_ctors
+       .section .init6,"ax",@progbits
+DEFUN __do_global_ctors
+#if defined(__AVR_HAVE_RAMPZ__)
+       ldi     r17, hi8(__ctors_start)
+       ldi     r28, lo8(__ctors_end)
+       ldi     r29, hi8(__ctors_end)
+       ldi     r16, hh8(__ctors_end)
+       rjmp    .L__do_global_ctors_start
+.L__do_global_ctors_loop:
+       sbiw    r28, 2
+       sbc     r16, __zero_reg__
+       mov_h   r31, r29
+       mov_l   r30, r28
+       out     __RAMPZ__, r16
+       XCALL   __tablejump_elpm__
+.L__do_global_ctors_start:
+       cpi     r28, lo8(__ctors_start)
+       cpc     r29, r17
+       ldi     r24, hh8(__ctors_start)
+       cpc     r16, r24
+       brne    .L__do_global_ctors_loop
+#else
+       ldi     r17, hi8(__ctors_start)
+       ldi     r28, lo8(__ctors_end)
+       ldi     r29, hi8(__ctors_end)
+       rjmp    .L__do_global_ctors_start
+.L__do_global_ctors_loop:
+       sbiw    r28, 2
+       mov_h   r31, r29
+       mov_l   r30, r28
+       XCALL   __tablejump__
+.L__do_global_ctors_start:
+       cpi     r28, lo8(__ctors_start)
+       cpc     r29, r17
+       brne    .L__do_global_ctors_loop
+#endif /* defined(__AVR_HAVE_RAMPZ__) */
+ENDF __do_global_ctors
+#endif /* L_ctors */
+
+#ifdef L_dtors
+       .section .fini6,"ax",@progbits
+DEFUN __do_global_dtors
+#if defined(__AVR_HAVE_RAMPZ__)
+       ldi     r17, hi8(__dtors_end)
+       ldi     r28, lo8(__dtors_start)
+       ldi     r29, hi8(__dtors_start)
+       ldi     r16, hh8(__dtors_start)
+       rjmp    .L__do_global_dtors_start
+.L__do_global_dtors_loop:
+       sbiw    r28, 2
+       sbc     r16, __zero_reg__
+       mov_h   r31, r29
+       mov_l   r30, r28
+       out     __RAMPZ__, r16
+       XCALL   __tablejump_elpm__
+.L__do_global_dtors_start:
+       cpi     r28, lo8(__dtors_end)
+       cpc     r29, r17
+       ldi     r24, hh8(__dtors_end)
+       cpc     r16, r24
+       brne    .L__do_global_dtors_loop
+#else
+       ldi     r17, hi8(__dtors_end)
+       ldi     r28, lo8(__dtors_start)
+       ldi     r29, hi8(__dtors_start)
+       rjmp    .L__do_global_dtors_start
+.L__do_global_dtors_loop:
+       mov_h   r31, r29
+       mov_l   r30, r28
+       XCALL   __tablejump__
+       adiw    r28, 2
+.L__do_global_dtors_start:
+       cpi     r28, lo8(__dtors_end)
+       cpc     r29, r17
+       brne    .L__do_global_dtors_loop
+#endif /* defined(__AVR_HAVE_RAMPZ__) */
+ENDF __do_global_dtors
+#endif /* L_dtors */
+
+.section .text.libgcc, "ax", @progbits
+    
+#ifdef L_tablejump_elpm
+DEFUN __tablejump_elpm__
+#if defined (__AVR_HAVE_ELPM__)
+#if defined (__AVR_HAVE_LPMX__)
+       elpm    __tmp_reg__, Z+
+       elpm    r31, Z
+       mov     r30, __tmp_reg__
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+       eijmp
+#else
+       ijmp
+#endif
+
+#else
+       elpm
+       adiw    r30, 1
+       push    r0
+       elpm
+       push    r0
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+       in      __tmp_reg__, __EIND__
+       push    __tmp_reg__
+#endif
+       ret
+#endif
+#endif /* defined (__AVR_HAVE_ELPM__) */
+ENDF __tablejump_elpm__
+#endif /* defined (L_tablejump_elpm) */
+
+\f
+.section .text.libgcc.builtins, "ax", @progbits
+
+/**********************************
+ * Find first set Bit (ffs)
+ **********************************/
+
+#if defined (L_ffssi2)
+;; find first set bit
+;; r25:r24 = ffs32 (r25:r22)
+;; clobbers: r22, r26
+DEFUN __ffssi2
+    clr  r26
+    tst  r22
+    brne 1f
+    subi r26, -8
+    or   r22, r23
+    brne 1f
+    subi r26, -8
+    or   r22, r24
+    brne 1f
+    subi r26, -8
+    or   r22, r25
+    brne 1f
+    ret
+1:  mov  r24, r22
+    XJMP __loop_ffsqi2
+ENDF __ffssi2
+#endif /* defined (L_ffssi2) */
+
+#if defined (L_ffshi2)
+;; find first set bit
+;; r25:r24 = ffs16 (r25:r24)
+;; clobbers: r26
+DEFUN __ffshi2
+    clr  r26
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+    ;; Some cores have problem skipping 2-word instruction
+    tst  r24
+    breq 2f
+#else
+    cpse r24, __zero_reg__
+#endif /* __AVR_HAVE_JMP_CALL__ */
+1:  XJMP __loop_ffsqi2
+2:  ldi  r26, 8
+    or   r24, r25
+    brne 1b
+    ret
+ENDF __ffshi2
+#endif /* defined (L_ffshi2) */
+
+#if defined (L_loop_ffsqi2)
+;; Helper for ffshi2, ffssi2
+;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
+;; r24 must be != 0
+;; clobbers: r26
+DEFUN __loop_ffsqi2
+    inc  r26
+    lsr  r24
+    brcc __loop_ffsqi2
+    mov  r24, r26
+    clr  r25
+    ret    
+ENDF __loop_ffsqi2
+#endif /* defined (L_loop_ffsqi2) */
+
+\f
+/**********************************
+ * Count trailing Zeros (ctz)
+ **********************************/
+
+#if defined (L_ctzsi2)
+;; count trailing zeros
+;; r25:r24 = ctz32 (r25:r22)
+;; clobbers: r26, r22
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
+DEFUN __ctzsi2
+    XCALL __ffssi2
+    dec  r24
+    ret
+ENDF __ctzsi2
+#endif /* defined (L_ctzsi2) */
+
+#if defined (L_ctzhi2)
+;; count trailing zeros
+;; r25:r24 = ctz16 (r25:r24)
+;; clobbers: r26
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
+DEFUN __ctzhi2
+    XCALL __ffshi2
+    dec  r24
+    ret
+ENDF __ctzhi2
+#endif /* defined (L_ctzhi2) */
+
+\f
+/**********************************
+ * Count leading Zeros (clz)
+ **********************************/
+
+#if defined (L_clzdi2)
+;; count leading zeros
+;; r25:r24 = clz64 (r25:r18)
+;; clobbers: r22, r23, r26
+DEFUN __clzdi2
+    XCALL __clzsi2
+    sbrs r24, 5
+    ret
+    mov_l r22, r18
+    mov_h r23, r19
+    mov_l r24, r20
+    mov_h r25, r21
+    XCALL __clzsi2
+    subi r24, -32
+    ret
+ENDF __clzdi2
+#endif /* defined (L_clzdi2) */
+
+#if defined (L_clzsi2)
+;; count leading zeros
+;; r25:r24 = clz32 (r25:r22)
+;; clobbers: r26
+DEFUN __clzsi2
+    XCALL __clzhi2
+    sbrs r24, 4
+    ret
+    mov_l r24, r22
+    mov_h r25, r23
+    XCALL __clzhi2
+    subi r24, -16
+    ret
+ENDF __clzsi2
+#endif /* defined (L_clzsi2) */
+
+#if defined (L_clzhi2)
+;; count leading zeros
+;; r25:r24 = clz16 (r25:r24)
+;; clobbers: r26
+DEFUN __clzhi2
+    clr  r26
+    tst  r25
+    brne 1f
+    subi r26, -8
+    or   r25, r24
+    brne 1f
+    ldi  r24, 16
+    ret
+1:  cpi  r25, 16
+    brsh 3f
+    subi r26, -3
+    swap r25
+2:  inc  r26
+3:  lsl  r25
+    brcc 2b
+    mov  r24, r26
+    clr  r25
+    ret
+ENDF __clzhi2
+#endif /* defined (L_clzhi2) */
+
+\f
+/**********************************
+ * Parity 
+ **********************************/
+
+#if defined (L_paritydi2)
+;; r25:r24 = parity64 (r25:r18)
+;; clobbers: __tmp_reg__
+DEFUN __paritydi2
+    eor  r24, r18
+    eor  r24, r19
+    eor  r24, r20
+    eor  r24, r21
+    XJMP __paritysi2
+ENDF __paritydi2
+#endif /* defined (L_paritydi2) */
+
+#if defined (L_paritysi2)
+;; r25:r24 = parity32 (r25:r22)
+;; clobbers: __tmp_reg__
+DEFUN __paritysi2
+    eor  r24, r22
+    eor  r24, r23
+    XJMP __parityhi2
+ENDF __paritysi2
+#endif /* defined (L_paritysi2) */
+
+#if defined (L_parityhi2)
+;; r25:r24 = parity16 (r25:r24)
+;; clobbers: __tmp_reg__
+DEFUN __parityhi2
+    eor  r24, r25
+;; FALLTHRU
+ENDF __parityhi2
+
+;; r25:r24 = parity8 (r24)
+;; clobbers: __tmp_reg__
+DEFUN __parityqi2
+    ;; parity is in r24[0..7]
+    mov  __tmp_reg__, r24
+    swap __tmp_reg__
+    eor  r24, __tmp_reg__
+    ;; parity is in r24[0..3]
+    subi r24, -4
+    andi r24, -5
+    subi r24, -6
+    ;; parity is in r24[0,3]
+    sbrc r24, 3
+    inc  r24
+    ;; parity is in r24[0]
+    andi r24, 1
+    clr  r25
+    ret
+ENDF __parityqi2
+#endif /* defined (L_parityhi2) */
+
+\f
+/**********************************
+ * Population Count
+ **********************************/
+
+#if defined (L_popcounthi2)
+;; population count
+;; r25:r24 = popcount16 (r25:r24)
+;; clobbers: __tmp_reg__
+DEFUN __popcounthi2
+    XCALL __popcountqi2
+    push r24
+    mov  r24, r25
+    XCALL __popcountqi2
+    clr  r25
+    ;; FALLTHRU
+ENDF __popcounthi2
+
+DEFUN __popcounthi2_tail
+    pop   __tmp_reg__
+    add   r24, __tmp_reg__
+    ret
+ENDF __popcounthi2_tail
+#endif /* defined (L_popcounthi2) */
+
+#if defined (L_popcountsi2)
+;; population count
+;; r25:r24 = popcount32 (r25:r22)
+;; clobbers: __tmp_reg__
+DEFUN __popcountsi2
+    XCALL __popcounthi2
+    push  r24
+    mov_l r24, r22
+    mov_h r25, r23
+    XCALL __popcounthi2
+    XJMP  __popcounthi2_tail
+ENDF __popcountsi2
+#endif /* defined (L_popcountsi2) */
+
+#if defined (L_popcountdi2)
+;; population count
+;; r25:r24 = popcount64 (r25:r18)
+;; clobbers: r22, r23, __tmp_reg__
+DEFUN __popcountdi2
+    XCALL __popcountsi2
+    push  r24
+    mov_l r22, r18
+    mov_h r23, r19
+    mov_l r24, r20
+    mov_h r25, r21
+    XCALL __popcountsi2
+    XJMP  __popcounthi2_tail
+ENDF __popcountdi2
+#endif /* defined (L_popcountdi2) */
+
+#if defined (L_popcountqi2)
+;; population count
+;; r24 = popcount8 (r24)
+;; clobbers: __tmp_reg__
+DEFUN __popcountqi2
+    mov  __tmp_reg__, r24
+    andi r24, 1
+    lsr  __tmp_reg__    
+    lsr  __tmp_reg__    
+    adc  r24, __zero_reg__
+    lsr  __tmp_reg__    
+    adc  r24, __zero_reg__
+    lsr  __tmp_reg__    
+    adc  r24, __zero_reg__
+    lsr  __tmp_reg__    
+    adc  r24, __zero_reg__
+    lsr  __tmp_reg__    
+    adc  r24, __zero_reg__
+    lsr  __tmp_reg__    
+    adc  r24, __tmp_reg__    
+    ret    
+ENDF __popcountqi2
+#endif /* defined (L_popcountqi2) */
+
+\f
+/**********************************
+ * Swap bytes
+ **********************************/
+
+;; swap two registers with different register number
+.macro bswap a, b
+    eor \a, \b
+    eor \b, \a
+    eor \a, \b
+.endm
+
+#if defined (L_bswapsi2)
+;; swap bytes
+;; r25:r22 = bswap32 (r25:r22)
+DEFUN __bswapsi2
+    bswap r22, r25
+    bswap r23, r24
+    ret
+ENDF __bswapsi2
+#endif /* defined (L_bswapsi2) */
+
+#if defined (L_bswapdi2)
+;; swap bytes
+;; r25:r18 = bswap64 (r25:r18)
+DEFUN __bswapdi2
+    bswap r18, r25
+    bswap r19, r24
+    bswap r20, r23
+    bswap r21, r22
+    ret
+ENDF __bswapdi2
+#endif /* defined (L_bswapdi2) */
+
+\f
+/**********************************
+ * 64-bit shifts
+ **********************************/
+
+#if defined (L_ashrdi3)
+;; Arithmetic shift right
+;; r25:r18 = ashr64 (r25:r18, r17:r16)
+DEFUN __ashrdi3
+    push r16
+    andi r16, 63
+    breq 2f
+1:  asr  r25
+    ror  r24
+    ror  r23
+    ror  r22
+    ror  r21
+    ror  r20
+    ror  r19
+    ror  r18
+    dec  r16
+    brne 1b
+2:  pop  r16
+    ret
+ENDF __ashrdi3
+#endif /* defined (L_ashrdi3) */
+
+#if defined (L_lshrdi3)
+;; Logic shift right
+;; r25:r18 = lshr64 (r25:r18, r17:r16)
+DEFUN __lshrdi3
+    push r16
+    andi r16, 63
+    breq 2f
+1:  lsr  r25
+    ror  r24
+    ror  r23
+    ror  r22
+    ror  r21
+    ror  r20
+    ror  r19
+    ror  r18
+    dec  r16
+    brne 1b
+2:  pop  r16
+    ret
+ENDF __lshrdi3
+#endif /* defined (L_lshrdi3) */
+
+#if defined (L_ashldi3)
+;; Shift left
+;; r25:r18 = ashl64 (r25:r18, r17:r16)
+DEFUN __ashldi3
+    push r16
+    andi r16, 63
+    breq 2f
+1:  lsl  r18
+    rol  r19
+    rol  r20
+    rol  r21
+    rol  r22
+    rol  r23
+    rol  r24
+    rol  r25
+    dec  r16
+    brne 1b
+2:  pop  r16
+    ret
+ENDF __ashldi3
+#endif /* defined (L_ashldi3) */
+
+\f
+.section .text.libgcc.fmul, "ax", @progbits
+
+/***********************************************************/    
+;;; Softmul versions of FMUL, FMULS and FMULSU to implement
+;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
+/***********************************************************/    
+
+#define A1 24
+#define B1 25
+#define C0 22
+#define C1 23
+#define A0 __tmp_reg__
+
+#ifdef L_fmuls
+;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmuls
+    ;; A0.7 = negate result?
+    mov  A0, A1
+    eor  A0, B1
+    ;; B1 = |B1|
+    sbrc B1, 7
+    neg  B1
+    XJMP __fmulsu_exit
+ENDF __fmuls
+#endif /* L_fmuls */
+
+#ifdef L_fmulsu
+;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmulsu
+    ;; A0.7 = negate result?
+    mov  A0, A1
+;; FALLTHRU
+ENDF __fmulsu
+
+;; Helper for __fmuls and __fmulsu
+DEFUN __fmulsu_exit
+    ;; A1 = |A1|
+    sbrc A1, 7
+    neg  A1
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+    ;; Some cores have problem skipping 2-word instruction
+    tst  A0
+    brmi 1f
+#else
+    sbrs A0, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+    XJMP  __fmul
+1:  XCALL __fmul
+    ;; C = -C iff A0.7 = 1
+    com  C1
+    neg  C0
+    sbci C1, -1
+    ret
+ENDF __fmulsu_exit
+#endif /* L_fmulsu */
+
+
+#ifdef L_fmul
+;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmul
+    ; clear result
+    clr   C0
+    clr   C1
+    clr   A0
+1:  tst   B1
+    ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
+2:  brpl  3f
+    ;; C += A
+    add   C0, A0
+    adc   C1, A1
+3:  ;; A >>= 1
+    lsr   A1
+    ror   A0
+    ;; B <<= 1
+    lsl   B1
+    brne  2b
+    ret
+ENDF __fmul
+#endif /* L_fmul */
+
+#undef A0
+#undef A1
+#undef B1
+#undef C0
+#undef C1
diff --git a/libgcc/config/avr/t-avr b/libgcc/config/avr/t-avr

index 78829c76af4f9ed0133df574edbb2c02d339e9fc..f1c114a6dd69b218e1080a7ce6a62b077dfe3270 100644 (file)
--- a/libgcc/config/avr/t-avr
+++ b/libgcc/config/avr/t-avr
@@ -1,3 +1,51 @@
+LIB1ASMSRC = avr/lib1funcs.S
+LIB1ASMFUNCS = \
+       _mulqi3 \
+       _mulhi3 \
+       _mulhisi3 \
+       _umulhisi3 \
+       _usmulhisi3 \
+       _muluhisi3 \
+       _mulshisi3 \
+       _mulsi3 \
+       _udivmodqi4 \
+       _divmodqi4 \
+       _udivmodhi4 \
+       _divmodhi4 \
+       _udivmodsi4 \
+       _divmodsi4 \
+       _prologue \
+       _epilogue \
+       _exit \
+       _cleanup \
+       _tablejump \
+       _tablejump_elpm \
+       _copy_data \
+       _clear_bss \
+       _ctors \
+       _dtors \
+       _ffssi2 \
+       _ffshi2 \
+       _loop_ffsqi2 \
+       _ctzsi2 \
+       _ctzhi2 \
+       _clzdi2 \
+       _clzsi2 \
+       _clzhi2 \
+       _paritydi2 \
+       _paritysi2 \
+       _parityhi2 \
+       _popcounthi2 \
+       _popcountsi2 \
+       _popcountdi2 \
+       _popcountqi2 \
+       _bswapsi2 \
+       _bswapdi2 \
+       _ashldi3 \
+       _ashrdi3 \
+       _lshrdi3 \
+       _fmul _fmuls _fmulsu
+
  # Extra 16-bit integer functions.
  intfuncs16 = _absvXX2 _addvXX3 _subvXX3 _mulvXX3 _negvXX2 _clrsbXX2
  
diff --git a/libgcc/config/bfin/lib1funcs.S b/libgcc/config/bfin/lib1funcs.S

new file mode 100644 (file)

index 0000000..c7bf4f3
--- /dev/null
+++ b/libgcc/config/bfin/lib1funcs.S
@@ -0,0 +1,211 @@
+/* libgcc functions for Blackfin.
+   Copyright (C) 2005, 2009 Free Software Foundation, Inc.
+   Contributed by Analog Devices.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef L_divsi3
+.text
+.align 2
+.global ___divsi3;
+.type ___divsi3, STT_FUNC;
+
+___divsi3:
+        [--SP]= RETS;
+       [--SP] = R7;
+
+       R2 = -R0;
+        CC = R0 < 0;
+       IF CC R0 = R2;
+       R7 = CC;
+
+       R2 = -R1;
+        CC = R1 < 0;
+       IF CC R1 = R2;
+       R2 = CC;
+       R7 = R7 ^ R2;
+
+        CALL ___udivsi3;
+
+       CC = R7;
+       R1 = -R0;
+       IF CC R0 = R1;
+
+       R7 = [SP++];
+        RETS = [SP++];
+        RTS;
+#endif
+
+#ifdef L_modsi3        
+.align 2
+.global ___modsi3;
+.type ___modsi3, STT_FUNC;
+
+___modsi3:
+       [--SP] = RETS;
+       [--SP] = R0;
+       [--SP] = R1;
+       CALL ___divsi3;
+       R2 = [SP++];
+       R1 = [SP++];
+       R2 *= R0;
+       R0 = R1 - R2;
+       RETS = [SP++];
+       RTS; 
+#endif
+
+#ifdef L_udivsi3
+.align 2
+.global ___udivsi3;
+.type ___udivsi3, STT_FUNC;
+
+___udivsi3:
+        P0 = 32;
+        LSETUP (0f, 1f) LC0 = P0;
+       /* upper half of dividend */
+        R3 = 0;
+0:
+       /* The first time round in the loop we shift in garbage, but since we
+          perform 33 shifts, it doesn't matter.  */
+       R0 = ROT R0 BY 1;
+       R3 = ROT R3 BY 1;
+       R2 = R3 - R1;
+        CC = R3 < R1 (IU);
+1:
+       /* Last instruction of the loop.  */
+       IF ! CC R3 = R2;
+
+       /* Shift in the last bit.  */
+       R0 = ROT R0 BY 1;
+       /* R0 is the result, R3 contains the remainder.  */
+       R0 = ~ R0;
+        RTS;
+#endif
+
+#ifdef L_umodsi3
+.align 2
+.global ___umodsi3;
+.type ___umodsi3, STT_FUNC;
+
+___umodsi3:
+       [--SP] = RETS;
+       CALL ___udivsi3;
+       R0 = R3;
+       RETS = [SP++]; 
+       RTS;
+#endif
+
+#ifdef L_umulsi3_highpart
+.align 2
+.global ___umulsi3_highpart;
+.type ___umulsi3_highpart, STT_FUNC;
+
+___umulsi3_highpart:
+       A1 = R1.L * R0.L (FU);
+       A1 = A1 >> 16;
+       A0 = R1.H * R0.H, A1 += R1.L * R0.H (FU);
+       A1 += R0.L * R1.H (FU);
+       A1 = A1 >> 16;
+       A0 += A1;
+       R0 = A0 (FU);
+       RTS;
+#endif
+
+#ifdef L_smulsi3_highpart
+.align 2
+.global ___smulsi3_highpart;
+.type ___smulsi3_highpart, STT_FUNC;
+
+___smulsi3_highpart:
+       A1 = R1.L * R0.L (FU);
+       A1 = A1 >> 16;
+       A0 = R0.H * R1.H, A1 += R0.H * R1.L (IS,M);
+       A1 += R1.H * R0.L (IS,M);
+       A1 = A1 >>> 16;
+       R0 = (A0 += A1);
+       RTS;
+#endif
+
+#ifdef L_muldi3
+.align 2
+.global ___muldi3;
+.type ___muldi3, STT_FUNC;
+
+/*
+          R1:R0 * R3:R2
+        = R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l
+[X]     = (R1.h * R3.h) * 2^96
+[X]       + (R1.h * R3.l + R1.l * R3.h) * 2^80
+[X]       + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^64
+[T1]      + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^48
+[T2]      + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^32
+[T3]      + (R0.l * R2.h + R2.l * R0.h) * 2^16
+[T4]      + (R0.l * R2.l)
+
+       We can discard the first three lines marked "X" since we produce
+       only a 64 bit result.  So, we need ten 16-bit multiplies.
+
+       Individual mul-acc results:
+[E1]    =  R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h
+[E2]    =  R1.l * R2.l + R3.l * R0.l + R0.h * R2.h
+[E3]    =  R0.l * R2.h + R2.l * R0.h
+[E4]    =  R0.l * R2.l
+
+       We also need to add high parts from lower-level results to higher ones:
+       E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E4
+
+       One interesting property is that all parts of the result that depend
+       on the sign of the multiplication are discarded.  Those would be the
+       multiplications involving R1.h and R3.h, but only the top 16 bit of
+       the 32 bit result depend on the sign, and since R1.h and R3.h only
+       occur in E1, the top half of these results is cut off.
+       So, we can just use FU mode for all of the 16-bit multiplies, and
+       ignore questions of when to use mixed mode.  */
+
+___muldi3:
+       /* [SP] technically is part of the caller's frame, but we can
+          use it as scratch space.  */
+       A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12];      /* E1 */
+       A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4;         /* E1 */
+       A0 += A1;                                                       /* E1 */
+       R4 = A0.w;
+       A0 = R0.l * R3.l (FU);                                          /* E2 */
+       A0 += R2.l * R1.l (FU);                                         /* E2 */
+
+       A1 = R2.L * R0.L (FU);                                          /* E4 */
+       R3 = A1.w;
+       A1 = A1 >> 16;                                                  /* E3c */
+       A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU);                      /* E2, E3c */
+       A1 += R0.L * R2.H (FU);                                         /* E3c */
+       R0 = A1.w;
+       A1 = A1 >> 16;                                                  /* E2c */
+       A0 += A1;                                                       /* E2c */
+       R1 = A0.w;
+
+       /* low(result) = low(E3c):low(E4) */
+       R0 = PACK (R0.l, R3.l);
+       /* high(result) = E2c + (E1 << 16) */
+       R1.h = R1.h + R4.l (NS) || R4 = [SP];
+       RTS;
+
+.size ___muldi3, .-___muldi3
+#endif
diff --git a/libgcc/config/bfin/t-bfin b/libgcc/config/bfin/t-bfin

new file mode 100644 (file)

index 0000000..bc2b088
--- /dev/null
+++ b/libgcc/config/bfin/t-bfin
@@ -0,0 +1,3 @@
+LIB1ASMSRC = bfin/lib1funcs.S
+LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart
+LIB1ASMFUNCS += _smulsi3_highpart
diff --git a/libgcc/config/c6x/lib1funcs.S b/libgcc/config/c6x/lib1funcs.S

new file mode 100644 (file)

index 0000000..5bf3447
--- /dev/null
+++ b/libgcc/config/c6x/lib1funcs.S
@@ -0,0 +1,438 @@
+/* Copyright 2010, 2011  Free Software Foundation, Inc.
+   Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+       ;; ABI considerations for the divide functions
+       ;; The following registers are call-used:
+       ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
+       ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
+       ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
+       ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
+       ;;
+       ;; In our implementation, divu and remu are leaf functions,
+       ;; while both divi and remi call into divu.
+       ;; A0 is not clobbered by any of the functions.
+       ;; divu does not clobber B2 either, which is taken advantage of
+       ;; in remi.
+       ;; divi uses B5 to hold the original return address during
+       ;; the call to divu.
+       ;; remi uses B2 and A5 to hold the input values during the
+       ;; call to divu.  It stores B3 in on the stack.
+
+#ifdef L_divsi3
+.text
+.align 2
+.global __c6xabi_divi
+.hidden __c6xabi_divi
+.type __c6xabi_divi, STT_FUNC
+
+__c6xabi_divi:
+       call .s2        __c6xabi_divu
+||     mv .d2          B3, B5
+||     cmpgt .l1       0, A4, A1
+||     cmpgt .l2       0, B4, B1
+
+       [A1] neg .l1    A4, A4
+||     [B1] neg .l2    B4, B4
+||     xor .s1x        A1, B1, A1
+
+#ifdef _TMS320C6400
+       [A1] addkpc .s2 1f, B3, 4
+#else
+       [A1] mvkl .s2   1f, B3
+       [A1] mvkh .s2   1f, B3
+       nop             2
+#endif
+1:
+       neg .l1         A4, A4
+||     mv .l2          B3,B5
+||     ret .s2         B5
+       nop             5
+#endif
+
+#if defined L_modsi3 || defined L_divmodsi4
+.align 2
+#ifdef L_modsi3
+#define MOD_OUTPUT_REG A4
+.global __c6xabi_remi
+.hidden __c6xabi_remi
+.type __c6xabi_remi, STT_FUNC
+#else
+#define MOD_OUTPUT_REG A5
+.global __c6xabi_divremi
+.hidden __c6xabi_divremi
+.type __c6xabi_divremi, STT_FUNC
+__c6xabi_divremi:
+#endif
+
+__c6xabi_remi:
+       stw .d2t2       B3, *B15--[2]
+||     cmpgt .l1       0, A4, A1
+||     cmpgt .l2       0, B4, B2
+||     mv .s1          A4, A5
+||     call .s2        __c6xabi_divu
+
+       [A1] neg .l1    A4, A4
+||     [B2] neg .l2    B4, B4
+||     xor .s2x        B2, A1, B0
+||     mv .d2          B4, B2
+
+#ifdef _TMS320C6400
+       [B0] addkpc .s2 1f, B3, 1
+       [!B0] addkpc .s2 2f, B3, 1
+       nop             2
+#else
+       [B0] mvkl .s2   1f,B3
+       [!B0] mvkl .s2  2f,B3
+
+       [B0] mvkh .s2   1f,B3
+       [!B0] mvkh .s2  2f,B3
+#endif
+1:
+       neg .l1         A4, A4
+2:
+       ldw .d2t2       *++B15[2], B3
+
+#ifdef _TMS320C6400_PLUS
+       mpy32 .m1x      A4, B2, A6
+       nop             3
+       ret .s2         B3
+       sub .l1         A5, A6, MOD_OUTPUT_REG
+       nop             4
+#else
+       mpyu .m1x       A4, B2, A1
+       nop             1
+       mpylhu .m1x     A4, B2, A6
+||     mpylhu .m2x     B2, A4, B2
+       nop             1
+       add .l1x        A6, B2, A6
+||     ret .s2         B3
+       shl .s1         A6, 16, A6
+       add .d1         A6, A1, A6
+       sub .l1         A5, A6, MOD_OUTPUT_REG
+       nop             2
+#endif
+
+#endif
+
+#if defined L_udivsi3 || defined L_udivmodsi4
+.align 2
+#ifdef L_udivsi3
+.global __c6xabi_divu
+.hidden __c6xabi_divu
+.type __c6xabi_divu, STT_FUNC
+__c6xabi_divu:
+#else
+.global __c6xabi_divremu
+.hidden __c6xabi_divremu
+.type __c6xabi_divremu, STT_FUNC
+__c6xabi_divremu:
+#endif
+       ;; We use a series of up to 31 subc instructions.  First, we find
+       ;; out how many leading zero bits there are in the divisor.  This
+       ;; gives us both a shift count for aligning (shifting) the divisor
+       ;; to the, and the number of times we have to execute subc.
+
+       ;; At the end, we have both the remainder and most of the quotient
+       ;; in A4.  The top bit of the quotient is computed first and is
+       ;; placed in A2.
+
+       ;; Return immediately if the dividend is zero.  Setting B4 to 1
+       ;; is a trick to allow us to leave the following insns in the jump
+       ;; delay slot without affecting the result.
+       mv      .s2x    A4, B1
+
+#ifndef _TMS320C6400
+[!b1]  mvk     .s2     1, B4
+#endif
+[b1]   lmbd    .l2     1, B4, B1
+||[!b1] b      .s2     B3      ; RETURN A
+#ifdef _TMS320C6400
+||[!b1] mvk    .d2     1, B4
+#endif
+#ifdef L_udivmodsi4
+||[!b1] zero   .s1     A5
+#endif
+       mv      .l1x    B1, A6
+||     shl     .s2     B4, B1, B4
+
+       ;; The loop performs a maximum of 28 steps, so we do the
+       ;; first 3 here.
+       cmpltu  .l1x    A4, B4, A2
+[!A2]  sub     .l1x    A4, B4, A4
+||     shru    .s2     B4, 1, B4
+||     xor     .s1     1, A2, A2
+
+       shl     .s1     A2, 31, A2
+|| [b1]        subc    .l1x    A4,B4,A4
+|| [b1]        add     .s2     -1, B1, B1
+[b1]   subc    .l1x    A4,B4,A4
+|| [b1]        add     .s2     -1, B1, B1
+
+       ;; RETURN A may happen here (note: must happen before the next branch)
+0:
+       cmpgt   .l2     B1, 7, B0
+|| [b1]        subc    .l1x    A4,B4,A4
+|| [b1]        add     .s2     -1, B1, B1
+[b1]   subc    .l1x    A4,B4,A4
+|| [b1]        add     .s2     -1, B1, B1
+|| [b0] b      .s1     0b
+[b1]   subc    .l1x    A4,B4,A4
+|| [b1]        add     .s2     -1, B1, B1
+[b1]   subc    .l1x    A4,B4,A4
+|| [b1]        add     .s2     -1, B1, B1
+[b1]   subc    .l1x    A4,B4,A4
+|| [b1]        add     .s2     -1, B1, B1
+[b1]   subc    .l1x    A4,B4,A4
+|| [b1]        add     .s2     -1, B1, B1
+[b1]   subc    .l1x    A4,B4,A4
+|| [b1]        add     .s2     -1, B1, B1
+       ;; loop backwards branch happens here
+
+       ret     .s2     B3
+||     mvk     .s1     32, A1
+       sub     .l1     A1, A6, A6
+#ifdef L_udivmodsi4
+||     extu    .s1     A4, A6, A5
+#endif
+       shl     .s1     A4, A6, A4
+       shru    .s1     A4, 1, A4
+||     sub     .l1     A6, 1, A6
+       or      .l1     A2, A4, A4
+       shru    .s1     A4, A6, A4
+       nop
+
+#endif
+
+#ifdef L_umodsi3
+.align 2
+.global __c6xabi_remu
+.hidden __c6xabi_remu
+.type __c6xabi_remu, STT_FUNC
+__c6xabi_remu:
+       ;; The ABI seems designed to prevent these functions calling each other,
+       ;; so we duplicate most of the divsi3 code here.
+       mv      .s2x    A4, B1
+#ifndef _TMS320C6400
+[!b1]  mvk     .s2     1, B4
+#endif
+       lmbd    .l2     1, B4, B1
+||[!b1] b      .s2     B3      ; RETURN A
+#ifdef _TMS320C6400
+||[!b1] mvk    .d2     1, B4
+#endif
+
+       mv      .l1x    B1, A7
+||     shl     .s2     B4, B1, B4
+
+       cmpltu  .l1x    A4, B4, A1
+[!a1]  sub     .l1x    A4, B4, A4
+       shru    .s2     B4, 1, B4
+
+0:
+       cmpgt   .l2     B1, 7, B0
+|| [b1]        subc    .l1x    A4,B4,A4
+|| [b1]        add     .s2     -1, B1, B1
+       ;; RETURN A may happen here (note: must happen before the next branch)
+[b1]   subc    .l1x    A4,B4,A4
+|| [b1]        add     .s2     -1, B1, B1
+|| [b0] b      .s1     0b
+[b1]   subc    .l1x    A4,B4,A4
+|| [b1]        add     .s2     -1, B1, B1
+[b1]   subc    .l1x    A4,B4,A4
+|| [b1]        add     .s2     -1, B1, B1
+[b1]   subc    .l1x    A4,B4,A4
+|| [b1]        add     .s2     -1, B1, B1
+[b1]   subc    .l1x    A4,B4,A4
+|| [b1]        add     .s2     -1, B1, B1
+[b1]   subc    .l1x    A4,B4,A4
+|| [b1]        add     .s2     -1, B1, B1
+       ;; loop backwards branch happens here
+
+       ret     .s2     B3
+[b1]   subc    .l1x    A4,B4,A4
+|| [b1]        add     .s2     -1, B1, B1
+[b1]   subc    .l1x    A4,B4,A4
+
+       extu    .s1     A4, A7, A4
+       nop     2
+#endif
+
+#if defined L_strasgi_64plus && defined _TMS320C6400_PLUS
+
+.align 2
+.global __c6xabi_strasgi_64plus
+.hidden __c6xabi_strasgi_64plus
+.type __c6xabi_strasgi_64plus, STT_FUNC
+__c6xabi_strasgi_64plus:
+       shru    .s2x    a6, 2, b31
+||     mv      .s1     a4, a30
+||     mv      .d2     b4, b30
+
+       add     .s2     -4, b31, b31
+
+       sploopd         1
+||     mvc     .s2     b31, ilc
+       ldw     .d2t2   *b30++, b31
+       nop     4
+       mv      .s1x    b31,a31
+       spkernel        6, 0
+||     stw     .d1t1   a31, *a30++
+
+       ret     .s2     b3
+       nop 5
+#endif
+
+#ifdef L_strasgi
+.global __c6xabi_strasgi
+.type __c6xabi_strasgi, STT_FUNC
+__c6xabi_strasgi:
+       ;; This is essentially memcpy, with alignment known to be at least
+       ;; 4, and the size a multiple of 4 greater than or equal to 28.
+       ldw     .d2t1   *B4++, A0
+||     mvk     .s2     16, B1
+       ldw     .d2t1   *B4++, A1
+||     mvk     .s2     20, B2
+||     sub     .d1     A6, 24, A6
+       ldw     .d2t1   *B4++, A5
+       ldw     .d2t1   *B4++, A7
+||     mv      .l2x    A6, B7
+       ldw     .d2t1   *B4++, A8
+       ldw     .d2t1   *B4++, A9
+||     mv      .s2x    A0, B5
+||     cmpltu  .l2     B2, B7, B0
+
+0:
+       stw     .d1t2   B5, *A4++
+||[b0] ldw     .d2t1   *B4++, A0
+||     mv      .s2x    A1, B5
+||     mv      .l2     B7, B6
+
+[b0]   sub     .d2     B6, 24, B7
+||[b0] b       .s2     0b
+||     cmpltu  .l2     B1, B6, B0
+
+[b0]   ldw     .d2t1   *B4++, A1
+||     stw     .d1t2   B5, *A4++
+||     mv      .s2x    A5, B5
+||     cmpltu  .l2     12, B6, B0
+
+[b0]   ldw     .d2t1   *B4++, A5
+||     stw     .d1t2   B5, *A4++
+||     mv      .s2x    A7, B5
+||     cmpltu  .l2     8, B6, B0
+
+[b0]   ldw     .d2t1   *B4++, A7
+||     stw     .d1t2   B5, *A4++
+||     mv      .s2x    A8, B5
+||     cmpltu  .l2     4, B6, B0
+
+[b0]   ldw     .d2t1   *B4++, A8
+||     stw     .d1t2   B5, *A4++
+||     mv      .s2x    A9, B5
+||     cmpltu  .l2     0, B6, B0
+
+[b0]   ldw     .d2t1   *B4++, A9
+||     stw     .d1t2   B5, *A4++
+||     mv      .s2x    A0, B5
+||     cmpltu  .l2     B2, B7, B0
+
+       ;; loop back branch happens here
+
+       cmpltu  .l2     B1, B6, B0
+||     ret     .s2     b3
+
+[b0]   stw     .d1t1   A1, *A4++
+||     cmpltu  .l2     12, B6, B0
+[b0]   stw     .d1t1   A5, *A4++
+||     cmpltu  .l2     8, B6, B0
+[b0]   stw     .d1t1   A7, *A4++
+||     cmpltu  .l2     4, B6, B0
+[b0]   stw     .d1t1   A8, *A4++
+||     cmpltu  .l2     0, B6, B0
+[b0]   stw     .d1t1   A9, *A4++
+
+       ;; return happens here
+
+#endif
+
+#ifdef _TMS320C6400_PLUS
+#ifdef L_push_rts
+.align 2
+.global __c6xabi_push_rts
+.hidden __c6xabi_push_rts
+.type __c6xabi_push_rts, STT_FUNC
+__c6xabi_push_rts:
+       stw .d2t2       B14, *B15--[2]
+       stdw .d2t1      A15:A14, *B15--
+||     b .s2x          A3
+       stdw .d2t2      B13:B12, *B15--
+       stdw .d2t1      A13:A12, *B15--
+       stdw .d2t2      B11:B10, *B15--
+       stdw .d2t1      A11:A10, *B15--
+       stdw .d2t2      B3:B2, *B15--
+#endif
+
+#ifdef L_pop_rts
+.align 2
+.global __c6xabi_pop_rts
+.hidden __c6xabi_pop_rts
+.type __c6xabi_pop_rts, STT_FUNC
+__c6xabi_pop_rts:
+       lddw .d2t2      *++B15, B3:B2
+       lddw .d2t1      *++B15, A11:A10
+       lddw .d2t2      *++B15, B11:B10
+       lddw .d2t1      *++B15, A13:A12
+       lddw .d2t2      *++B15, B13:B12
+       lddw .d2t1      *++B15, A15:A14
+||     b .s2           B3
+       ldw .d2t2       *++B15[2], B14
+       nop             4
+#endif
+
+#ifdef L_call_stub
+.align 2
+.global __c6xabi_call_stub
+.type __c6xabi_call_stub, STT_FUNC
+__c6xabi_call_stub:
+       stw .d2t1       A2, *B15--[2]
+       stdw .d2t1      A7:A6, *B15--
+||     call .s2        B31
+       stdw .d2t1      A1:A0, *B15--
+       stdw .d2t2      B7:B6, *B15--
+       stdw .d2t2      B5:B4, *B15--
+       stdw .d2t2      B1:B0, *B15--
+       stdw .d2t2      B3:B2, *B15--
+||     addkpc .s2      1f, B3, 0
+1:
+       lddw .d2t2      *++B15, B3:B2
+       lddw .d2t2      *++B15, B1:B0
+       lddw .d2t2      *++B15, B5:B4
+       lddw .d2t2      *++B15, B7:B6
+       lddw .d2t1      *++B15, A1:A0
+       lddw .d2t1      *++B15, A7:A6
+||     b .s2           B3
+       ldw .d2t1       *++B15[2], A2
+       nop             4
+#endif
+
+#endif
+
diff --git a/libgcc/config/c6x/t-elf b/libgcc/config/c6x/t-elf

index 99d0cd2d5ca8bd71db9bdade10eec73a3a96c189..e01c4109e52300aea2429559211c0424728fb94b 100644 (file)
--- a/libgcc/config/c6x/t-elf
+++ b/libgcc/config/c6x/t-elf
@@ -1,6 +1,11 @@
  # Cannot use default rules due to $(CRTSTUFF_T_CFLAGS).
  CUSTOM_CRTIN = yes
  
+LIB1ASMSRC = c6x/lib1funcs.S
+LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _udivmodsi4 _divmodsi4
+LIB1ASMFUNCS += _strasgi _strasgi_64plus _clzsi2 _clzdi2 _clz
+LIB1ASMFUNCS += _push_rts _pop_rts _call_stub
+
  # Assemble startup files.
  crti.o: $(srcdir)/config/c6x/crti.S
         $(crt_compile) -c $(CRTSTUFF_T_CFLAGS) $<
diff --git a/libgcc/config/fr30/lib1funcs.S b/libgcc/config/fr30/lib1funcs.S

new file mode 100644 (file)

index 0000000..7c63453
--- /dev/null
+++ b/libgcc/config/fr30/lib1funcs.S
@@ -0,0 +1,115 @@
+/* libgcc routines for the FR30.
+   Copyright (C) 1998, 1999, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+       .macro FUNC_START name
+       .text
+       .globl __\name
+       .type  __\name, @function
+__\name:
+       .endm
+
+       .macro FUNC_END name
+       .size  __\name, . - __\name
+       .endm
+
+       .macro DIV_BODY reg number
+       .if \number
+       DIV_BODY  \reg, "\number - 1"
+       div1    \reg
+       .endif
+       .endm
+       
+#ifdef L_udivsi3
+FUNC_START udivsi3
+       ;; Perform an unsiged division of r4 / r5 and place the result in r4.
+       ;; Does not handle overflow yet...
+       mov     r4, mdl
+       div0u   r5
+       DIV_BODY r5 32
+       mov     mdl, r4
+       ret
+FUNC_END udivsi3
+#endif /* L_udivsi3 */
+
+#ifdef L_divsi3
+FUNC_START divsi3
+       ;; Perform a siged division of r4 / r5 and place the result in r4.
+       ;; Does not handle overflow yet...
+       mov     r4, mdl
+       div0s   r5
+       DIV_BODY r5 32
+       div2    r5
+       div3
+       div4s
+       mov     mdl, r4
+       ret
+FUNC_END divsi3
+#endif /* L_divsi3 */
+
+#ifdef L_umodsi3
+FUNC_START umodsi3
+       ;; Perform an unsiged division of r4 / r5 and places the remainder in r4.
+       ;; Does not handle overflow yet...
+       mov     r4, mdl
+       div0u   r5
+       DIV_BODY r5 32
+       mov     mdh, r4
+       ret
+FUNC_END umodsi3
+#endif /* L_umodsi3 */
+
+#ifdef L_modsi3
+FUNC_START modsi3
+       ;; Perform a siged division of r4 / r5 and place the remainder in r4.
+       ;; Does not handle overflow yet...
+       mov     r4, mdl
+       div0s   r5
+       DIV_BODY r5 32
+       div2    r5
+       div3
+       div4s
+       mov     mdh, r4
+       ret
+FUNC_END modsi3
+#endif /* L_modsi3 */
+
+#ifdef L_negsi2
+FUNC_START negsi2
+       ldi:8   #0, r0
+       sub     r4, r0
+       mov     r0, r4
+       ret
+FUNC_END negsi2
+#endif /* L_negsi2 */
+
+#ifdef L_one_cmplsi2
+FUNC_START one_cmplsi2
+       ldi:8   #0xff, r0
+       extsb   r0
+       eor     r0, r4
+       ret
+FUNC_END one_cmplsi2
+#endif /* L_one_cmplsi2 */
+
+
diff --git a/libgcc/config/fr30/t-fr30 b/libgcc/config/fr30/t-fr30

new file mode 100644 (file)

index 0000000..ee5ed9a
--- /dev/null
+++ b/libgcc/config/fr30/t-fr30
@@ -0,0 +1,2 @@
+LIB1ASMSRC    = fr30/lib1funcs.S
+LIB1ASMFUNCS  = _udivsi3 _divsi3 _umodsi3 _modsi3
diff --git a/libgcc/config/frv/lib1funcs.S b/libgcc/config/frv/lib1funcs.S

new file mode 100644 (file)

index 0000000..d1ffcab
--- /dev/null
+++ b/libgcc/config/frv/lib1funcs.S
@@ -0,0 +1,269 @@
+/* Library functions.
+   Copyright (C) 2000, 2003, 2008, 2009 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+  
+   This file is part of GCC.
+  
+   GCC is free software ; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+  
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY ; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+  
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <frv-asm.h>
+
+\f
+#ifdef L_cmpll
+/* icc0 = __cmpll (long long a, long long b)  */
+
+       .globl  EXT(__cmpll)
+       .type   EXT(__cmpll),@function
+       .text
+       .p2align 4
+EXT(__cmpll):
+       cmp     gr8, gr10, icc0
+       ckeq    icc0, cc4
+       P(ccmp) gr9, gr11, cc4, 1
+       ret
+.Lend:
+       .size   EXT(__cmpll),.Lend-EXT(__cmpll)
+#endif /* L_cmpll */
+\f
+#ifdef L_cmpf
+/* icc0 = __cmpf (float a, float b) */
+/* Note, because this function returns the result in ICC0, it means it can't
+   handle NaNs.  */
+
+       .globl  EXT(__cmpf)
+       .type   EXT(__cmpf),@function
+       .text
+       .p2align 4
+EXT(__cmpf):
+#ifdef __FRV_HARD_FLOAT__      /* floating point instructions available */
+       movgf   gr8, fr0
+       P(movgf) gr9, fr1
+       setlos  #1, gr8
+       fcmps   fr0, fr1, fcc0
+       P(fcklt) fcc0, cc0
+       fckeq   fcc0, cc1
+       csub    gr0, gr8, gr8, cc0, 1
+       cmov    gr0, gr8, cc1, 1
+       cmpi    gr8, 0, icc0
+       ret
+#else                          /* no floating point instructions available */
+       movsg   lr, gr4
+       addi    sp, #-16, sp
+       sti     gr4, @(sp, 8)
+       st      fp, @(sp, gr0)
+       mov     sp, fp
+       call    EXT(__cmpsf2)
+       cmpi    gr8, #0, icc0
+       ldi     @(sp, 8), gr4
+       movgs   gr4, lr
+       ld      @(sp,gr0), fp
+       addi    sp, #16, sp
+       ret
+#endif
+.Lend:
+       .size   EXT(__cmpf),.Lend-EXT(__cmpf)
+#endif
+\f
+#ifdef L_cmpd
+/* icc0 = __cmpd (double a, double b) */
+/* Note, because this function returns the result in ICC0, it means it can't
+   handle NaNs.  */
+
+       .globl  EXT(__cmpd)
+       .type   EXT(__cmpd),@function
+       .text
+       .p2align 4
+EXT(__cmpd):
+       movsg   lr, gr4
+       addi    sp, #-16, sp
+       sti     gr4, @(sp, 8)
+       st      fp, @(sp, gr0)
+       mov     sp, fp
+       call    EXT(__cmpdf2)
+       cmpi    gr8, #0, icc0
+       ldi     @(sp, 8), gr4
+       movgs   gr4, lr
+       ld      @(sp,gr0), fp
+       addi    sp, #16, sp
+       ret
+.Lend:
+       .size   EXT(__cmpd),.Lend-EXT(__cmpd)
+#endif
+\f
+#ifdef L_addll
+/* gr8,gr9 = __addll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+   ABI program calls it.  */
+
+       .globl  EXT(__addll)
+       .type   EXT(__addll),@function
+       .text
+       .p2align
+EXT(__addll):
+       addcc   gr9, gr11, gr9, icc0
+       addx    gr8, gr10, gr8, icc0
+       ret
+.Lend:
+       .size   EXT(__addll),.Lend-EXT(__addll)
+#endif
+\f
+#ifdef L_subll
+/* gr8,gr9 = __subll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+   ABI program calls it.  */
+
+       .globl  EXT(__subll)
+       .type   EXT(__subll),@function
+       .text
+       .p2align 4
+EXT(__subll):
+       subcc   gr9, gr11, gr9, icc0
+       subx    gr8, gr10, gr8, icc0
+       ret
+.Lend:
+       .size   EXT(__subll),.Lend-EXT(__subll)
+#endif
+\f
+#ifdef L_andll
+/* gr8,gr9 = __andll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+   ABI program calls it.  */
+
+       .globl  EXT(__andll)
+       .type   EXT(__andll),@function
+       .text
+       .p2align 4
+EXT(__andll):
+       P(and)  gr9, gr11, gr9
+       P2(and) gr8, gr10, gr8
+       ret
+.Lend:
+       .size   EXT(__andll),.Lend-EXT(__andll)
+#endif
+\f
+#ifdef L_orll
+/* gr8,gr9 = __orll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+   ABI program calls it.  */
+
+       .globl  EXT(__orll)
+       .type   EXT(__orll),@function
+       .text
+       .p2align 4
+EXT(__orll):
+       P(or)   gr9, gr11, gr9
+       P2(or)  gr8, gr10, gr8
+       ret
+.Lend:
+       .size   EXT(__orll),.Lend-EXT(__orll)
+#endif
+\f
+#ifdef L_xorll
+/* gr8,gr9 = __xorll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+   ABI program calls it.  */
+
+       .globl  EXT(__xorll)
+       .type   EXT(__xorll),@function
+       .text
+       .p2align 4
+EXT(__xorll):
+       P(xor)  gr9, gr11, gr9
+       P2(xor) gr8, gr10, gr8
+       ret
+.Lend:
+       .size   EXT(__xorll),.Lend-EXT(__xorll)
+#endif
+\f
+#ifdef L_notll
+/* gr8,gr9 = __notll (long long a) */
+/* Note, gcc will never call this function, but it is present in case an
+   ABI program calls it.  */
+
+       .globl  EXT(__notll)
+       .type   EXT(__notll),@function
+       .text
+       .p2align 4
+EXT(__notll):
+       P(not)  gr9, gr9
+       P2(not) gr8, gr8
+       ret
+.Lend:
+       .size   EXT(__notll),.Lend-EXT(__notll)
+#endif
+\f
+#ifdef L_cmov
+/* (void) __cmov (char *dest, const char *src, size_t len) */
+/*
+ * void __cmov (char *dest, const char *src, size_t len)
+ * {
+ *   size_t i;
+ * 
+ *   if (dest < src || dest > src+len)
+ *     {
+ *      for (i = 0; i < len; i++)
+ *      dest[i] = src[i];
+ *     }
+ *   else
+ *     {
+ *      while (len-- > 0)
+ *      dest[len] = src[len];
+ *     }
+ * }
+ */
+
+       .globl  EXT(__cmov)
+       .type   EXT(__cmov),@function
+       .text
+       .p2align 4
+EXT(__cmov):
+       P(cmp)  gr8, gr9, icc0
+       add     gr9, gr10, gr4
+       P(cmp)  gr8, gr4, icc1
+       bc      icc0, 0, .Lfwd
+       bls     icc1, 0, .Lback
+.Lfwd:
+       /* move bytes in a forward direction */
+       P(setlos) #0, gr5
+       cmp     gr0, gr10, icc0
+       P(subi) gr9, #1, gr9
+       P2(subi) gr8, #1, gr8
+       bnc     icc0, 0, .Lret
+.Lfloop:
+       /* forward byte move loop */
+       addi    gr5, #1, gr5
+       P(ldsb) @(gr9, gr5), gr4
+       cmp     gr5, gr10, icc0
+       P(stb)  gr4, @(gr8, gr5)
+       bc      icc0, 0, .Lfloop
+       ret
+.Lbloop:
+       /* backward byte move loop body */
+       ldsb    @(gr9,gr10),gr4
+       stb     gr4,@(gr8,gr10)
+.Lback:
+       P(cmpi) gr10, #0, icc0
+       addi    gr10, #-1, gr10
+       bne     icc0, 0, .Lbloop
+.Lret:
+       ret
+.Lend:
+       .size    EXT(__cmov),.Lend-EXT(__cmov)
+#endif
diff --git a/libgcc/config/frv/t-frv b/libgcc/config/frv/t-frv

index b364a5a25b98b32afe6d095f5c312f1e2efee776..9773722d8e7021b0724139fff284d550210a489d 100644 (file)
--- a/libgcc/config/frv/t-frv
+++ b/libgcc/config/frv/t-frv
@@ -1,3 +1,6 @@
+LIB1ASMSRC     = frv/lib1funcs.S
+LIB1ASMFUNCS   = _cmpll _cmpf _cmpd _addll _subll _andll _orll _xorll _notll _cmov
+
  # Compile two additional files that are linked with every program
  # linked using GCC on systems using COFF or ELF, for the sake of C++
  # constructors.
diff --git a/libgcc/config/h8300/lib1funcs.S b/libgcc/config/h8300/lib1funcs.S

new file mode 100644 (file)

index 0000000..1b75b73
--- /dev/null
+++ b/libgcc/config/h8300/lib1funcs.S
@@ -0,0 +1,838 @@
+;; libgcc routines for the Renesas H8/300 CPU.
+;; Contributed by Steve Chamberlain <sac@cygnus.com>
+;; Optimizations by Toshiyasu Morita <toshiyasu.morita@renesas.com>
+
+/* Copyright (C) 1994, 2000, 2001, 2002, 2003, 2004, 2009
+   Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Assembler register definitions.  */
+
+#define A0 r0
+#define A0L r0l
+#define A0H r0h
+
+#define A1 r1
+#define A1L r1l
+#define A1H r1h
+
+#define A2 r2
+#define A2L r2l
+#define A2H r2h
+
+#define A3 r3
+#define A3L r3l
+#define A3H r3h
+
+#define S0 r4
+#define S0L r4l
+#define S0H r4h
+
+#define S1 r5
+#define S1L r5l
+#define S1H r5h
+
+#define S2 r6
+#define S2L r6l
+#define S2H r6h
+
+#ifdef __H8300__
+#define PUSHP  push
+#define POPP   pop
+
+#define A0P    r0
+#define A1P    r1
+#define A2P    r2
+#define A3P    r3
+#define S0P    r4
+#define S1P    r5
+#define S2P    r6
+#endif
+
+#if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__)
+#define PUSHP  push.l
+#define POPP   pop.l
+
+#define A0P    er0
+#define A1P    er1
+#define A2P    er2
+#define A3P    er3
+#define S0P    er4
+#define S1P    er5
+#define S2P    er6
+
+#define A0E    e0
+#define A1E    e1
+#define A2E    e2
+#define A3E    e3
+#endif
+
+#ifdef __H8300H__
+#ifdef __NORMAL_MODE__
+       .h8300hn
+#else
+       .h8300h
+#endif
+#endif
+
+#ifdef __H8300S__
+#ifdef __NORMAL_MODE__
+       .h8300sn
+#else
+       .h8300s
+#endif
+#endif
+#ifdef __H8300SX__
+#ifdef __NORMAL_MODE__
+       .h8300sxn
+#else
+       .h8300sx
+#endif
+#endif
+
+#ifdef L_cmpsi2
+#ifdef __H8300__
+       .section .text
+       .align 2
+       .global ___cmpsi2
+___cmpsi2:
+       cmp.w   A0,A2
+       bne     .L2
+       cmp.w   A1,A3
+       bne     .L4
+       mov.w   #1,A0
+       rts
+.L2:
+       bgt     .L5
+.L3:
+       mov.w   #2,A0
+       rts
+.L4:
+       bls     .L3
+.L5:
+       sub.w   A0,A0
+       rts
+       .end
+#endif
+#endif /* L_cmpsi2 */
+
+#ifdef L_ucmpsi2
+#ifdef __H8300__
+       .section .text
+       .align 2
+       .global ___ucmpsi2
+___ucmpsi2:
+       cmp.w   A0,A2
+       bne     .L2
+       cmp.w   A1,A3
+       bne     .L4
+       mov.w   #1,A0
+       rts
+.L2:
+       bhi     .L5
+.L3:
+       mov.w   #2,A0
+       rts
+.L4:
+       bls     .L3
+.L5:
+       sub.w   A0,A0
+       rts
+       .end
+#endif
+#endif /* L_ucmpsi2 */
+
+#ifdef L_divhi3
+
+;; HImode divides for the H8/300.
+;; We bunch all of this into one object file since there are several
+;; "supporting routines".
+
+; general purpose normalize routine
+;
+; divisor in A0
+; dividend in A1
+; turns both into +ve numbers, and leaves what the answer sign
+; should be in A2L
+
+#ifdef __H8300__
+       .section .text
+       .align 2
+divnorm:
+       or      A0H,A0H         ; is divisor > 0
+       stc     ccr,A2L
+       bge     _lab1
+       not     A0H             ; no - then make it +ve
+       not     A0L
+       adds    #1,A0
+_lab1: or      A1H,A1H ; look at dividend
+       bge     _lab2
+       not     A1H             ; it is -ve, make it positive
+       not     A1L
+       adds    #1,A1
+       xor     #0x8,A2L; and toggle sign of result
+_lab2: rts
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+       or      A0H,A0H         ; is divisor > 0
+       stc     ccr,A2L
+       bge     _lab7
+       not     A0H             ; no - then make it +ve
+       not     A0L
+       adds    #1,A0
+_lab7: or      A1H,A1H ; look at dividend
+       bge     _lab8
+       not     A1H             ; it is -ve, make it positive
+       not     A1L
+       adds    #1,A1
+_lab8: rts
+
+; A0=A0/A1 signed
+
+       .global ___divhi3
+___divhi3:
+       bsr     divnorm
+       bsr     ___udivhi3
+negans:        btst    #3,A2L  ; should answer be negative ?
+       beq     _lab4
+       not     A0H     ; yes, so make it so
+       not     A0L
+       adds    #1,A0
+_lab4: rts
+
+; A0=A0%A1 signed
+
+       .global ___modhi3
+___modhi3:
+       bsr     modnorm
+       bsr     ___udivhi3
+       mov     A3,A0
+       bra     negans
+
+; A0=A0%A1 unsigned
+
+       .global ___umodhi3
+___umodhi3:
+       bsr     ___udivhi3
+       mov     A3,A0
+       rts
+
+; A0=A0/A1 unsigned
+; A3=A0%A1 unsigned
+; A2H trashed
+; D high 8 bits of denom
+; d low 8 bits of denom
+; N high 8 bits of num
+; n low 8 bits of num
+; M high 8 bits of mod
+; m low 8 bits of mod
+; Q high 8 bits of quot
+; q low 8 bits of quot
+; P preserve
+
+; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
+; see how to partition up the expression.
+
+       .global ___udivhi3
+___udivhi3:
+                               ; A0 A1 A2 A3
+                               ; Nn Dd       P
+       sub.w   A3,A3           ; Nn Dd xP 00
+       or      A1H,A1H
+       bne     divlongway
+       or      A0H,A0H
+       beq     _lab6
+
+; we know that D == 0 and N is != 0
+       mov.b   A0H,A3L         ; Nn Dd xP 0N
+       divxu   A1L,A3          ;          MQ
+       mov.b   A3L,A0H         ; Q
+; dealt with N, do n
+_lab6: mov.b   A0L,A3L         ;           n
+       divxu   A1L,A3          ;          mq
+       mov.b   A3L,A0L         ; Qq
+       mov.b   A3H,A3L         ;           m
+       mov.b   #0x0,A3H        ; Qq       0m
+       rts
+
+; D != 0 - which means the denominator is
+;          loop around to get the result.
+
+divlongway:
+       mov.b   A0H,A3L         ; Nn Dd xP 0N
+       mov.b   #0x0,A0H        ; high byte of answer has to be zero
+       mov.b   #0x8,A2H        ;       8
+div8:  add.b   A0L,A0L         ; n*=2
+       rotxl   A3L             ; Make remainder bigger
+       rotxl   A3H
+       sub.w   A1,A3           ; Q-=N
+       bhs     setbit          ; set a bit ?
+       add.w   A1,A3           ;  no : too far , Q+=N
+
+       dec     A2H
+       bne     div8            ; next bit
+       rts
+
+setbit:        inc     A0L             ; do insert bit
+       dec     A2H
+       bne     div8            ; next bit
+       rts
+
+#endif /* __H8300__ */
+#endif /* L_divhi3 */
+
+#ifdef L_divsi3
+
+;; 4 byte integer divides for the H8/300.
+;;
+;; We have one routine which does all the work and lots of
+;; little ones which prepare the args and massage the sign.
+;; We bunch all of this into one object file since there are several
+;; "supporting routines".
+
+       .section .text
+       .align 2
+
+; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
+; This function is here to keep branch displacements small.
+
+#ifdef __H8300__
+
+divnorm:
+       mov.b   A0H,A0H         ; is the numerator -ve
+       stc     ccr,S2L         ; keep the sign in bit 3 of S2L
+       bge     postive
+
+       ; negate arg
+       not     A0H
+       not     A1H
+       not     A0L
+       not     A1L
+
+       add     #1,A1L
+       addx    #0,A1H
+       addx    #0,A0L
+       addx    #0,A0H
+postive:
+       mov.b   A2H,A2H         ; is the denominator -ve
+       bge     postive2
+       not     A2L
+       not     A2H
+       not     A3L
+       not     A3H
+       add.b   #1,A3L
+       addx    #0,A3H
+       addx    #0,A2L
+       addx    #0,A2H
+       xor.b   #0x08,S2L       ; toggle the result sign
+postive2:
+       rts
+
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+       mov.b   A0H,A0H         ; is the numerator -ve
+       stc     ccr,S2L         ; keep the sign in bit 3 of S2L
+       bge     mpostive
+
+       ; negate arg
+       not     A0H
+       not     A1H
+       not     A0L
+       not     A1L
+
+       add     #1,A1L
+       addx    #0,A1H
+       addx    #0,A0L
+       addx    #0,A0H
+mpostive:
+       mov.b   A2H,A2H         ; is the denominator -ve
+       bge     mpostive2
+       not     A2L
+       not     A2H
+       not     A3L
+       not     A3H
+       add.b   #1,A3L
+       addx    #0,A3H
+       addx    #0,A2L
+       addx    #0,A2H
+mpostive2:
+       rts
+
+#else /* __H8300H__ */
+
+divnorm:
+       mov.l   A0P,A0P         ; is the numerator -ve
+       stc     ccr,S2L         ; keep the sign in bit 3 of S2L
+       bge     postive
+
+       neg.l   A0P             ; negate arg
+
+postive:
+       mov.l   A1P,A1P         ; is the denominator -ve
+       bge     postive2
+
+       neg.l   A1P             ; negate arg
+       xor.b   #0x08,S2L       ; toggle the result sign
+
+postive2:
+       rts
+
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+       mov.l   A0P,A0P         ; is the numerator -ve
+       stc     ccr,S2L         ; keep the sign in bit 3 of S2L
+       bge     mpostive
+
+       neg.l   A0P             ; negate arg
+
+mpostive:
+       mov.l   A1P,A1P         ; is the denominator -ve
+       bge     mpostive2
+
+       neg.l   A1P             ; negate arg
+
+mpostive2:
+       rts
+
+#endif
+
+; numerator in A0/A1
+; denominator in A2/A3
+       .global ___modsi3
+___modsi3:
+#ifdef __H8300__
+       PUSHP   S2P
+       PUSHP   S0P
+       PUSHP   S1P
+       bsr     modnorm
+       bsr     divmodsi4
+       mov     S0,A0
+       mov     S1,A1
+       bra     exitdiv
+#else
+       PUSHP   S2P
+       bsr     modnorm
+       bsr     ___udivsi3
+       mov.l   er3,er0
+       bra     exitdiv
+#endif
+
+       ;; H8/300H and H8S version of ___udivsi3 is defined later in
+       ;; the file.
+#ifdef __H8300__
+       .global ___udivsi3
+___udivsi3:
+       PUSHP   S2P
+       PUSHP   S0P
+       PUSHP   S1P
+       bsr     divmodsi4
+       bra     reti
+#endif
+
+       .global ___umodsi3
+___umodsi3:
+#ifdef __H8300__
+       PUSHP   S2P
+       PUSHP   S0P
+       PUSHP   S1P
+       bsr     divmodsi4
+       mov     S0,A0
+       mov     S1,A1
+       bra     reti
+#else
+       bsr     ___udivsi3
+       mov.l   er3,er0
+       rts
+#endif
+
+       .global ___divsi3
+___divsi3:
+#ifdef __H8300__
+       PUSHP   S2P
+       PUSHP   S0P
+       PUSHP   S1P
+       jsr     divnorm
+       jsr     divmodsi4
+#else
+       PUSHP   S2P
+       jsr     divnorm
+       bsr     ___udivsi3
+#endif
+
+       ; examine what the sign should be
+exitdiv:
+       btst    #3,S2L
+       beq     reti
+
+       ; should be -ve
+#ifdef __H8300__
+       not     A0H
+       not     A1H
+       not     A0L
+       not     A1L
+
+       add     #1,A1L
+       addx    #0,A1H
+       addx    #0,A0L
+       addx    #0,A0H
+#else /* __H8300H__ */
+       neg.l   A0P
+#endif
+
+reti:
+#ifdef __H8300__
+       POPP    S1P
+       POPP    S0P
+#endif
+       POPP    S2P
+       rts
+
+       ; takes A0/A1 numerator (A0P for H8/300H)
+       ; A2/A3 denominator (A1P for H8/300H)
+       ; returns A0/A1 quotient (A0P for H8/300H)
+       ; S0/S1 remainder (S0P for H8/300H)
+       ; trashes S2H
+
+#ifdef __H8300__
+
+divmodsi4:
+        sub.w  S0,S0           ; zero play area
+        mov.w  S0,S1
+        mov.b  A2H,S2H
+        or     A2L,S2H
+        or     A3H,S2H
+        bne    DenHighNonZero
+        mov.b  A0H,A0H
+        bne    NumByte0Zero
+        mov.b  A0L,A0L
+        bne    NumByte1Zero
+        mov.b  A1H,A1H
+        bne    NumByte2Zero
+        bra    NumByte3Zero
+NumByte0Zero:
+       mov.b   A0H,S1L
+        divxu  A3L,S1
+        mov.b  S1L,A0H
+NumByte1Zero:
+       mov.b   A0L,S1L
+        divxu  A3L,S1
+        mov.b  S1L,A0L
+NumByte2Zero:
+       mov.b   A1H,S1L
+        divxu  A3L,S1
+        mov.b  S1L,A1H
+NumByte3Zero:
+       mov.b   A1L,S1L
+        divxu  A3L,S1
+        mov.b  S1L,A1L
+
+        mov.b  S1H,S1L
+        mov.b  #0x0,S1H
+        rts
+
+; have to do the divide by shift and test
+DenHighNonZero:
+       mov.b   A0H,S1L
+        mov.b  A0L,A0H
+        mov.b  A1H,A0L
+        mov.b  A1L,A1H
+
+        mov.b  #0,A1L
+        mov.b  #24,S2H ; only do 24 iterations
+
+nextbit:
+       add.w   A1,A1   ; double the answer guess
+        rotxl  A0L
+        rotxl  A0H
+
+        rotxl  S1L     ; double remainder
+        rotxl  S1H
+        rotxl  S0L
+        rotxl  S0H
+        sub.w  A3,S1   ; does it all fit
+        subx   A2L,S0L
+        subx   A2H,S0H
+        bhs    setone
+
+        add.w  A3,S1   ; no, restore mistake
+        addx   A2L,S0L
+        addx   A2H,S0H
+
+        dec    S2H
+        bne    nextbit
+        rts
+
+setone:
+       inc     A1L
+        dec    S2H
+        bne    nextbit
+        rts
+
+#else /* __H8300H__ */
+
+       ;; This function also computes the remainder and stores it in er3.
+       .global ___udivsi3
+___udivsi3:
+       mov.w   A1E,A1E         ; denominator top word 0?
+       bne     DenHighNonZero
+
+       ; do it the easy way, see page 107 in manual
+       mov.w   A0E,A2
+       extu.l  A2P
+       divxu.w A1,A2P
+       mov.w   A2E,A0E
+       divxu.w A1,A0P
+       mov.w   A0E,A3
+       mov.w   A2,A0E
+       extu.l  A3P
+       rts
+
+       ; er0 = er0 / er1
+       ; er3 = er0 % er1
+       ; trashes er1 er2
+       ; expects er1 >= 2^16
+DenHighNonZero:
+       mov.l   er0,er3
+       mov.l   er1,er2
+#ifdef __H8300H__
+divmod_L21:
+       shlr.l  er0
+       shlr.l  er2             ; make divisor < 2^16
+       mov.w   e2,e2
+       bne     divmod_L21
+#else
+       shlr.l  #2,er2          ; make divisor < 2^16
+       mov.w   e2,e2
+       beq     divmod_L22A
+divmod_L21:
+       shlr.l  #2,er0
+divmod_L22:
+       shlr.l  #2,er2          ; make divisor < 2^16
+       mov.w   e2,e2
+       bne     divmod_L21
+divmod_L22A:
+       rotxl.w r2
+       bcs     divmod_L23
+       shlr.l  er0
+       bra     divmod_L24
+divmod_L23:
+       rotxr.w r2
+       shlr.l  #2,er0
+divmod_L24:
+#endif
+       ;; At this point,
+       ;;  er0 contains shifted dividend
+       ;;  er1 contains divisor
+       ;;  er2 contains shifted divisor
+       ;;  er3 contains dividend, later remainder
+       divxu.w r2,er0          ; r0 now contains the approximate quotient (AQ)
+       extu.l  er0
+       beq     divmod_L25
+       subs    #1,er0          ; er0 = AQ - 1
+       mov.w   e1,r2
+       mulxu.w r0,er2          ; er2 = upper (AQ - 1) * divisor
+       sub.w   r2,e3           ; dividend - 65536 * er2
+       mov.w   r1,r2
+       mulxu.w r0,er2          ; compute er3 = remainder (tentative)
+       sub.l   er2,er3         ; er3 = dividend - (AQ - 1) * divisor
+divmod_L25:
+       cmp.l   er1,er3         ; is divisor < remainder?
+       blo     divmod_L26
+       adds    #1,er0
+       sub.l   er1,er3         ; correct the remainder
+divmod_L26:
+       rts
+
+#endif
+#endif /* L_divsi3 */
+
+#ifdef L_mulhi3
+
+;; HImode multiply.
+; The H8/300 only has an 8*8->16 multiply.
+; The answer is the same as:
+;
+; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
+; (we can ignore A1.h * A0.h cause that will all off the top)
+; A0 in
+; A1 in
+; A0 answer
+
+#ifdef __H8300__
+       .section .text
+       .align 2
+       .global ___mulhi3
+___mulhi3:
+       mov.b   A1L,A2L         ; A2l gets srcb.l
+       mulxu   A0L,A2          ; A2 gets first sub product
+
+       mov.b   A0H,A3L         ; prepare for
+       mulxu   A1L,A3          ; second sub product
+
+       add.b   A3L,A2H         ; sum first two terms
+
+       mov.b   A1H,A3L         ; third sub product
+       mulxu   A0L,A3
+
+       add.b   A3L,A2H         ; almost there
+       mov.w   A2,A0           ; that is
+       rts
+
+#endif
+#endif /* L_mulhi3 */
+
+#ifdef L_mulsi3
+
+;; SImode multiply.
+;;
+;; I think that shift and add may be sufficient for this.  Using the
+;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead.  This way
+;; the inner loop uses maybe 20 cycles + overhead, but terminates
+;; quickly on small args.
+;;
+;; A0/A1 src_a
+;; A2/A3 src_b
+;;
+;;  while (a)
+;;    {
+;;      if (a & 1)
+;;        r += b;
+;;      a >>= 1;
+;;      b <<= 1;
+;;    }
+
+       .section .text
+       .align 2
+
+#ifdef __H8300__
+
+       .global ___mulsi3
+___mulsi3:
+       PUSHP   S0P
+       PUSHP   S1P
+
+       sub.w   S0,S0
+       sub.w   S1,S1
+
+       ; while (a)
+_top:  mov.w   A0,A0
+       bne     _more
+       mov.w   A1,A1
+       beq     _done
+_more: ; if (a & 1)
+       bld     #0,A1L
+       bcc     _nobit
+       ; r += b
+       add.w   A3,S1
+       addx    A2L,S0L
+       addx    A2H,S0H
+_nobit:
+       ; a >>= 1
+       shlr    A0H
+       rotxr   A0L
+       rotxr   A1H
+       rotxr   A1L
+
+       ; b <<= 1
+       add.w   A3,A3
+       addx    A2L,A2L
+       addx    A2H,A2H
+       bra     _top
+
+_done:
+       mov.w   S0,A0
+       mov.w   S1,A1
+       POPP    S1P
+       POPP    S0P
+       rts
+
+#else /* __H8300H__ */
+
+;
+; mulsi3 for H8/300H - based on Renesas SH implementation
+;
+; by Toshiyasu Morita
+;
+; Old code:
+;
+; 16b * 16b = 372 states (worst case)
+; 32b * 32b = 724 states (worst case)
+;
+; New code:
+;
+; 16b * 16b =  48 states
+; 16b * 32b =  72 states
+; 32b * 32b =  92 states
+;
+
+       .global ___mulsi3
+___mulsi3:
+       mov.w   r1,r2   ; ( 2 states) b * d
+       mulxu   r0,er2  ; (22 states)
+
+       mov.w   e0,r3   ; ( 2 states) a * d
+       beq     L_skip1 ; ( 4 states)
+       mulxu   r1,er3  ; (22 states)
+       add.w   r3,e2   ; ( 2 states)
+
+L_skip1:
+       mov.w   e1,r3   ; ( 2 states) c * b
+       beq     L_skip2 ; ( 4 states)
+       mulxu   r0,er3  ; (22 states)
+       add.w   r3,e2   ; ( 2 states)
+
+L_skip2:
+       mov.l   er2,er0 ; ( 2 states)
+       rts             ; (10 states)
+
+#endif
+#endif /* L_mulsi3 */
+#ifdef L_fixunssfsi_asm
+/* For the h8300 we use asm to save some bytes, to
+   allow more programs to fit into the tiny address
+   space.  For the H8/300H and H8S, the C version is good enough.  */
+#ifdef __H8300__
+/* We still treat NANs different than libgcc2.c, but then, the
+   behavior is undefined anyways.  */
+       .global ___fixunssfsi
+___fixunssfsi:
+       cmp.b #0x4f,r0h
+       bge Large_num
+       jmp     @___fixsfsi
+Large_num:
+       bhi L_huge_num
+       xor.b #0x80,A0L
+       bmi L_shift8
+L_huge_num:
+       mov.w #65535,A0
+       mov.w A0,A1
+       rts
+L_shift8:
+       mov.b A0L,A0H
+       mov.b A1H,A0L
+       mov.b A1L,A1H
+       mov.b #0,A1L
+       rts
+#endif
+#endif /* L_fixunssfsi_asm */
diff --git a/libgcc/config/h8300/t-h8300 b/libgcc/config/h8300/t-h8300

new file mode 100644 (file)

index 0000000..4602ff8
--- /dev/null
+++ b/libgcc/config/h8300/t-h8300
@@ -0,0 +1,3 @@
+LIB1ASMSRC = h8300/lib1funcs.S
+LIB1ASMFUNCS = _cmpsi2 _ucmpsi2 _divhi3 _divsi3 _mulhi3 _mulsi3 \
+  _fixunssfsi_asm
diff --git a/libgcc/config/i386/cygwin.S b/libgcc/config/i386/cygwin.S

new file mode 100644 (file)

index 0000000..8f9c486
--- /dev/null
+++ b/libgcc/config/i386/cygwin.S
@@ -0,0 +1,188 @@
+/* stuff needed for libgcc on win32.
+ *
+ *   Copyright (C) 1996, 1998, 2001, 2003, 2008, 2009, 2010
+ *   Free Software Foundation, Inc.
+ *   Written By Steve Chamberlain
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ * 
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#include "auto-host.h"
+
+#ifdef HAVE_GAS_CFI_SECTIONS_DIRECTIVE
+       .cfi_sections   .debug_frame
+# define cfi_startproc()               .cfi_startproc
+# define cfi_endproc()                 .cfi_endproc
+# define cfi_adjust_cfa_offset(X)      .cfi_adjust_cfa_offset X
+# define cfi_def_cfa_register(X)       .cfi_def_cfa_register X
+# define cfi_register(D,S)             .cfi_register D, S
+# ifdef _WIN64
+#  define cfi_push(X)          .cfi_adjust_cfa_offset 8; .cfi_rel_offset X, 0
+#  define cfi_pop(X)           .cfi_adjust_cfa_offset -8; .cfi_restore X
+# else
+#  define cfi_push(X)          .cfi_adjust_cfa_offset 4; .cfi_rel_offset X, 0
+#  define cfi_pop(X)           .cfi_adjust_cfa_offset -4; .cfi_restore X
+# endif
+#else
+# define cfi_startproc()
+# define cfi_endproc()
+# define cfi_adjust_cfa_offset(X)
+# define cfi_def_cfa_register(X)
+# define cfi_register(D,S)
+# define cfi_push(X)
+# define cfi_pop(X)
+#endif /* HAVE_GAS_CFI_SECTIONS_DIRECTIVE */
+
+#ifdef L_chkstk
+/* Function prologue calls __chkstk to probe the stack when allocating more
+   than CHECK_STACK_LIMIT bytes in one go.  Touching the stack at 4K
+   increments is necessary to ensure that the guard pages used
+   by the OS virtual memory manger are allocated in correct sequence.  */
+
+       .global ___chkstk
+       .global __alloca
+#ifdef _WIN64
+/* __alloca is a normal function call, which uses %rcx as the argument.  */
+       cfi_startproc()
+__alloca:
+       movq    %rcx, %rax
+       /* FALLTHRU */
+
+/* ___chkstk is a *special* function call, which uses %rax as the argument.
+   We avoid clobbering the 4 integer argument registers, %rcx, %rdx, 
+   %r8 and %r9, which leaves us with %rax, %r10, and %r11 to use.  */
+       .align  4
+___chkstk:
+       popq    %r11                    /* pop return address */
+       cfi_adjust_cfa_offset(-8)       /* indicate return address in r11 */
+       cfi_register(%rip, %r11)
+       movq    %rsp, %r10
+       cmpq    $0x1000, %rax           /* > 4k ?*/
+       jb      2f
+
+1:     subq    $0x1000, %r10           /* yes, move pointer down 4k*/
+       orl     $0x0, (%r10)            /* probe there */
+       subq    $0x1000, %rax           /* decrement count */
+       cmpq    $0x1000, %rax
+       ja      1b                      /* and do it again */
+
+2:     subq    %rax, %r10
+       movq    %rsp, %rax              /* hold CFA until return */
+       cfi_def_cfa_register(%rax)
+       orl     $0x0, (%r10)            /* less than 4k, just peek here */
+       movq    %r10, %rsp              /* decrement stack */
+
+       /* Push the return value back.  Doing this instead of just
+          jumping to %r11 preserves the cached call-return stack
+          used by most modern processors.  */
+       pushq   %r11
+       ret
+       cfi_endproc()
+#else
+       cfi_startproc()
+___chkstk:
+__alloca:
+       pushl   %ecx                    /* save temp */
+       cfi_push(%eax)
+       leal    8(%esp), %ecx           /* point past return addr */
+       cmpl    $0x1000, %eax           /* > 4k ?*/
+       jb      2f
+
+1:     subl    $0x1000, %ecx           /* yes, move pointer down 4k*/
+       orl     $0x0, (%ecx)            /* probe there */
+       subl    $0x1000, %eax           /* decrement count */
+       cmpl    $0x1000, %eax
+       ja      1b                      /* and do it again */
+
+2:     subl    %eax, %ecx         
+       orl     $0x0, (%ecx)            /* less than 4k, just peek here */
+       movl    %esp, %eax              /* save current stack pointer */
+       cfi_def_cfa_register(%eax)
+       movl    %ecx, %esp              /* decrement stack */
+       movl    (%eax), %ecx            /* recover saved temp */
+
+       /* Copy the return register.  Doing this instead of just jumping to
+          the address preserves the cached call-return stack used by most
+          modern processors.  */
+       pushl   4(%eax)
+       ret
+       cfi_endproc()
+#endif /* _WIN64 */
+#endif /* L_chkstk */
+
+#ifdef L_chkstk_ms
+/* ___chkstk_ms is a *special* function call, which uses %rax as the argument.
+   We avoid clobbering any registers.  Unlike ___chkstk, it just probes the
+   stack and does no stack allocation.  */
+       .global ___chkstk_ms
+#ifdef _WIN64
+       cfi_startproc()
+___chkstk_ms:
+       pushq   %rcx                    /* save temps */
+       cfi_push(%rcx)
+       pushq   %rax
+       cfi_push(%rax)
+       cmpq    $0x1000, %rax           /* > 4k ?*/
+       leaq    24(%rsp), %rcx          /* point past return addr */
+       jb      2f
+
+1:     subq    $0x1000, %rcx           /* yes, move pointer down 4k */
+       orq     $0x0, (%rcx)            /* probe there */
+       subq    $0x1000, %rax           /* decrement count */
+       cmpq    $0x1000, %rax
+       ja      1b                      /* and do it again */
+
+2:     subq    %rax, %rcx
+       orq     $0x0, (%rcx)            /* less than 4k, just peek here */
+
+       popq    %rax
+       cfi_pop(%rax)
+       popq    %rcx
+       cfi_pop(%rcx)
+       ret
+       cfi_endproc()
+#else
+       cfi_startproc()
+___chkstk_ms:
+       pushl   %ecx                    /* save temp */
+       cfi_push(%ecx)
+       pushl   %eax
+       cfi_push(%eax)
+       cmpl    $0x1000, %eax           /* > 4k ?*/
+       leal    12(%esp), %ecx          /* point past return addr */
+       jb      2f
+
+1:     subl    $0x1000, %ecx           /* yes, move pointer down 4k*/
+       orl     $0x0, (%ecx)            /* probe there */
+       subl    $0x1000, %eax           /* decrement count */
+       cmpl    $0x1000, %eax
+       ja      1b                      /* and do it again */
+
+2:     subl    %eax, %ecx
+       orl     $0x0, (%ecx)            /* less than 4k, just peek here */
+
+       popl    %eax
+       cfi_pop(%eax)
+       popl    %ecx
+       cfi_pop(%ecx)
+       ret
+       cfi_endproc()
+#endif /* _WIN64 */
+#endif /* L_chkstk_ms */
diff --git a/libgcc/config/i386/t-chkstk b/libgcc/config/i386/t-chkstk

new file mode 100644 (file)

index 0000000..822981f
--- /dev/null
+++ b/libgcc/config/i386/t-chkstk
@@ -0,0 +1,2 @@
+LIB1ASMSRC = i386/cygwin.S
+LIB1ASMFUNCS = _chkstk _chkstk_ms
diff --git a/libgcc/config/ia64/__divxf3.S b/libgcc/config/ia64/__divxf3.S

new file mode 100644 (file)

index 0000000..9cba8f5
--- /dev/null
+++ b/libgcc/config/ia64/__divxf3.S
@@ -0,0 +1,11 @@
+#ifdef SHARED
+#define __divtf3 __divtf3_compat
+#endif
+
+#define L__divxf3
+#include "config/ia64/lib1funcs.S"
+
+#ifdef SHARED
+#undef __divtf3
+.symver __divtf3_compat, __divtf3@GCC_3.0
+#endif
diff --git a/libgcc/config/ia64/__divxf3.asm b/libgcc/config/ia64/__divxf3.asm

deleted file mode 100644 (file)

index f741bda..0000000
--- a/libgcc/config/ia64/__divxf3.asm
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifdef SHARED
-#define __divtf3 __divtf3_compat
-#endif
-
-#define L__divxf3
-#include "config/ia64/lib1funcs.asm"
-
-#ifdef SHARED
-#undef __divtf3
-.symver __divtf3_compat, __divtf3@GCC_3.0
-#endif
diff --git a/libgcc/config/ia64/_fixtfdi.S b/libgcc/config/ia64/_fixtfdi.S

new file mode 100644 (file)

index 0000000..863b70f
--- /dev/null
+++ b/libgcc/config/ia64/_fixtfdi.S
@@ -0,0 +1,11 @@
+#ifdef SHARED
+#define __fixtfti __fixtfti_compat
+#endif
+
+#define L_fixtfdi
+#include "config/ia64/lib1funcs.S"
+
+#ifdef SHARED
+#undef __fixtfti
+.symver __fixtfti_compat, __fixtfti@GCC_3.0
+#endif
diff --git a/libgcc/config/ia64/_fixtfdi.asm b/libgcc/config/ia64/_fixtfdi.asm

deleted file mode 100644 (file)

index 4d13c80..0000000
--- a/libgcc/config/ia64/_fixtfdi.asm
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifdef SHARED
-#define __fixtfti __fixtfti_compat
-#endif
-
-#define L_fixtfdi
-#include "config/ia64/lib1funcs.asm"
-
-#ifdef SHARED
-#undef __fixtfti
-.symver __fixtfti_compat, __fixtfti@GCC_3.0
-#endif
diff --git a/libgcc/config/ia64/_fixunstfdi.S b/libgcc/config/ia64/_fixunstfdi.S

new file mode 100644 (file)

index 0000000..aac6a28
--- /dev/null
+++ b/libgcc/config/ia64/_fixunstfdi.S
@@ -0,0 +1,11 @@
+#ifdef SHARED
+#define __fixunstfti __fixunstfti_compat
+#endif
+
+#define L_fixunstfdi
+#include "config/ia64/lib1funcs.S"
+
+#ifdef SHARED
+#undef __fixunstfti
+.symver __fixunstfti_compat, __fixunstfti@GCC_3.0
+#endif
diff --git a/libgcc/config/ia64/_fixunstfdi.asm b/libgcc/config/ia64/_fixunstfdi.asm

deleted file mode 100644 (file)

index b722d9e..0000000
--- a/libgcc/config/ia64/_fixunstfdi.asm
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifdef SHARED
-#define __fixunstfti __fixunstfti_compat
-#endif
-
-#define L_fixunstfdi
-#include "config/ia64/lib1funcs.asm"
-
-#ifdef SHARED
-#undef __fixunstfti
-.symver __fixunstfti_compat, __fixunstfti@GCC_3.0
-#endif
diff --git a/libgcc/config/ia64/_floatditf.S b/libgcc/config/ia64/_floatditf.S

new file mode 100644 (file)

index 0000000..e37404d
--- /dev/null
+++ b/libgcc/config/ia64/_floatditf.S
@@ -0,0 +1,11 @@
+#ifdef SHARED
+#define __floattitf __floattitf_compat
+#endif
+
+#define L_floatditf
+#include "config/ia64/lib1funcs.S"
+
+#ifdef SHARED
+#undef __floattitf
+.symver __floattitf_compat, __floattitf@GCC_3.0
+#endif
diff --git a/libgcc/config/ia64/_floatditf.asm b/libgcc/config/ia64/_floatditf.asm

deleted file mode 100644 (file)

index 21d7702..0000000
--- a/libgcc/config/ia64/_floatditf.asm
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifdef SHARED
-#define __floattitf __floattitf_compat
-#endif
-
-#define L_floatditf
-#include "config/ia64/lib1funcs.asm"
-
-#ifdef SHARED
-#undef __floattitf
-.symver __floattitf_compat, __floattitf@GCC_3.0
-#endif
diff --git a/libgcc/config/ia64/lib1funcs.S b/libgcc/config/ia64/lib1funcs.S

new file mode 100644 (file)

index 0000000..b7eaa6e
--- /dev/null
+++ b/libgcc/config/ia64/lib1funcs.S
@@ -0,0 +1,795 @@
+/* Copyright (C) 2000, 2001, 2003, 2005, 2009 Free Software Foundation, Inc.
+   Contributed by James E. Wilson <wilson@cygnus.com>.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef L__divxf3
+// Compute a 80-bit IEEE double-extended quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// farg0 holds the dividend.  farg1 holds the divisor.
+//
+// __divtf3 is an alternate symbol name for backward compatibility.
+
+       .text
+       .align 16
+       .global __divxf3
+       .proc __divxf3
+__divxf3:
+#ifdef SHARED
+       .global __divtf3
+__divtf3:
+#endif
+       cmp.eq p7, p0 = r0, r0
+       frcpa.s0 f10, p6 = farg0, farg1
+       ;;
+(p6)   cmp.ne p7, p0 = r0, r0
+       .pred.rel.mutex p6, p7
+(p6)   fnma.s1 f11 = farg1, f10, f1
+(p6)   fma.s1 f12 = farg0, f10, f0
+       ;;
+(p6)   fma.s1 f13 = f11, f11, f0
+(p6)   fma.s1 f14 = f11, f11, f11
+       ;;
+(p6)   fma.s1 f11 = f13, f13, f11
+(p6)   fma.s1 f13 = f14, f10, f10
+       ;;
+(p6)   fma.s1 f10 = f13, f11, f10
+(p6)   fnma.s1 f11 = farg1, f12, farg0
+       ;;
+(p6)   fma.s1 f11 = f11, f10, f12
+(p6)   fnma.s1 f12 = farg1, f10, f1
+       ;;
+(p6)   fma.s1 f10 = f12, f10, f10
+(p6)   fnma.s1 f12 = farg1, f11, farg0
+       ;;
+(p6)   fma.s0 fret0 = f12, f10, f11
+(p7)   mov fret0 = f10
+       br.ret.sptk rp
+       .endp __divxf3
+#endif
+
+#ifdef L__divdf3
+// Compute a 64-bit IEEE double quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// farg0 holds the dividend.  farg1 holds the divisor.
+
+       .text
+       .align 16
+       .global __divdf3
+       .proc __divdf3
+__divdf3:
+       cmp.eq p7, p0 = r0, r0
+       frcpa.s0 f10, p6 = farg0, farg1
+       ;;
+(p6)   cmp.ne p7, p0 = r0, r0
+       .pred.rel.mutex p6, p7
+(p6)   fmpy.s1 f11 = farg0, f10
+(p6)   fnma.s1 f12 = farg1, f10, f1
+       ;;
+(p6)   fma.s1 f11 = f12, f11, f11
+(p6)   fmpy.s1 f13 = f12, f12
+       ;;
+(p6)   fma.s1 f10 = f12, f10, f10
+(p6)   fma.s1 f11 = f13, f11, f11
+       ;;
+(p6)   fmpy.s1 f12 = f13, f13
+(p6)   fma.s1 f10 = f13, f10, f10
+       ;;
+(p6)   fma.d.s1 f11 = f12, f11, f11
+(p6)   fma.s1 f10 = f12, f10, f10
+       ;;
+(p6)   fnma.d.s1 f8 = farg1, f11, farg0
+       ;;
+(p6)   fma.d fret0 = f8, f10, f11
+(p7)   mov fret0 = f10
+       br.ret.sptk rp
+       ;;
+       .endp __divdf3
+#endif
+
+#ifdef L__divsf3
+// Compute a 32-bit IEEE float quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// farg0 holds the dividend.  farg1 holds the divisor.
+
+       .text
+       .align 16
+       .global __divsf3
+       .proc __divsf3
+__divsf3:
+       cmp.eq p7, p0 = r0, r0
+       frcpa.s0 f10, p6 = farg0, farg1
+       ;;
+(p6)   cmp.ne p7, p0 = r0, r0
+       .pred.rel.mutex p6, p7
+(p6)   fmpy.s1 f8 = farg0, f10
+(p6)   fnma.s1 f9 = farg1, f10, f1
+       ;;
+(p6)   fma.s1 f8 = f9, f8, f8
+(p6)   fmpy.s1 f9 = f9, f9
+       ;;
+(p6)   fma.s1 f8 = f9, f8, f8
+(p6)   fmpy.s1 f9 = f9, f9
+       ;;
+(p6)   fma.d.s1 f10 = f9, f8, f8
+       ;;
+(p6)   fnorm.s.s0 fret0 = f10
+(p7)   mov fret0 = f10
+       br.ret.sptk rp
+       ;;
+       .endp __divsf3
+#endif
+
+#ifdef L__divdi3
+// Compute a 64-bit integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+       .text
+       .align 16
+       .global __divdi3
+       .proc __divdi3
+__divdi3:
+       .regstk 2,0,0,0
+       // Transfer inputs to FP registers.
+       setf.sig f8 = in0
+       setf.sig f9 = in1
+       // Check divide by zero.
+       cmp.ne.unc p0,p7=0,in1
+       ;;
+       // Convert the inputs to FP, so that they won't be treated as unsigned.
+       fcvt.xf f8 = f8
+       fcvt.xf f9 = f9
+(p7)   break 1
+       ;;
+       // Compute the reciprocal approximation.
+       frcpa.s1 f10, p6 = f8, f9
+       ;;
+       // 3 Newton-Raphson iterations.
+(p6)   fnma.s1 f11 = f9, f10, f1
+(p6)   fmpy.s1 f12 = f8, f10
+       ;;
+(p6)   fmpy.s1 f13 = f11, f11
+(p6)   fma.s1 f12 = f11, f12, f12
+       ;;
+(p6)   fma.s1 f10 = f11, f10, f10
+(p6)   fma.s1 f11 = f13, f12, f12
+       ;;
+(p6)   fma.s1 f10 = f13, f10, f10
+(p6)   fnma.s1 f12 = f9, f11, f8
+       ;;
+(p6)   fma.s1 f10 = f12, f10, f11
+       ;;
+       // Round quotient to an integer.
+       fcvt.fx.trunc.s1 f10 = f10
+       ;;
+       // Transfer result to GP registers.
+       getf.sig ret0 = f10
+       br.ret.sptk rp
+       ;;
+       .endp __divdi3
+#endif
+
+#ifdef L__moddi3
+// Compute a 64-bit integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend (a).  in1 holds the divisor (b).
+
+       .text
+       .align 16
+       .global __moddi3
+       .proc __moddi3
+__moddi3:
+       .regstk 2,0,0,0
+       // Transfer inputs to FP registers.
+       setf.sig f14 = in0
+       setf.sig f9 = in1
+       // Check divide by zero.
+       cmp.ne.unc p0,p7=0,in1
+       ;;
+       // Convert the inputs to FP, so that they won't be treated as unsigned.
+       fcvt.xf f8 = f14
+       fcvt.xf f9 = f9
+(p7)   break 1
+       ;;
+       // Compute the reciprocal approximation.
+       frcpa.s1 f10, p6 = f8, f9
+       ;;
+       // 3 Newton-Raphson iterations.
+(p6)   fmpy.s1 f12 = f8, f10
+(p6)   fnma.s1 f11 = f9, f10, f1
+       ;;
+(p6)   fma.s1 f12 = f11, f12, f12
+(p6)   fmpy.s1 f13 = f11, f11
+       ;;
+(p6)   fma.s1 f10 = f11, f10, f10
+(p6)   fma.s1 f11 = f13, f12, f12
+       ;;
+       sub in1 = r0, in1
+(p6)   fma.s1 f10 = f13, f10, f10
+(p6)   fnma.s1 f12 = f9, f11, f8
+       ;;
+       setf.sig f9 = in1
+(p6)   fma.s1 f10 = f12, f10, f11
+       ;;
+       fcvt.fx.trunc.s1 f10 = f10
+       ;;
+       // r = q * (-b) + a
+       xma.l f10 = f10, f9, f14
+       ;;
+       // Transfer result to GP registers.
+       getf.sig ret0 = f10
+       br.ret.sptk rp
+       ;;
+       .endp __moddi3
+#endif
+
+#ifdef L__udivdi3
+// Compute a 64-bit unsigned integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+       .text
+       .align 16
+       .global __udivdi3
+       .proc __udivdi3
+__udivdi3:
+       .regstk 2,0,0,0
+       // Transfer inputs to FP registers.
+       setf.sig f8 = in0
+       setf.sig f9 = in1
+       // Check divide by zero.
+       cmp.ne.unc p0,p7=0,in1
+       ;;
+       // Convert the inputs to FP, to avoid FP software-assist faults.
+       fcvt.xuf.s1 f8 = f8
+       fcvt.xuf.s1 f9 = f9
+(p7)   break 1
+       ;;
+       // Compute the reciprocal approximation.
+       frcpa.s1 f10, p6 = f8, f9
+       ;;
+       // 3 Newton-Raphson iterations.
+(p6)   fnma.s1 f11 = f9, f10, f1
+(p6)   fmpy.s1 f12 = f8, f10
+       ;;
+(p6)   fmpy.s1 f13 = f11, f11
+(p6)   fma.s1 f12 = f11, f12, f12
+       ;;
+(p6)   fma.s1 f10 = f11, f10, f10
+(p6)   fma.s1 f11 = f13, f12, f12
+       ;;
+(p6)   fma.s1 f10 = f13, f10, f10
+(p6)   fnma.s1 f12 = f9, f11, f8
+       ;;
+(p6)   fma.s1 f10 = f12, f10, f11
+       ;;
+       // Round quotient to an unsigned integer.
+       fcvt.fxu.trunc.s1 f10 = f10
+       ;;
+       // Transfer result to GP registers.
+       getf.sig ret0 = f10
+       br.ret.sptk rp
+       ;;
+       .endp __udivdi3
+#endif
+
+#ifdef L__umoddi3
+// Compute a 64-bit unsigned integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend (a).  in1 holds the divisor (b).
+
+       .text
+       .align 16
+       .global __umoddi3
+       .proc __umoddi3
+__umoddi3:
+       .regstk 2,0,0,0
+       // Transfer inputs to FP registers.
+       setf.sig f14 = in0
+       setf.sig f9 = in1
+       // Check divide by zero.
+       cmp.ne.unc p0,p7=0,in1
+       ;;
+       // Convert the inputs to FP, to avoid FP software assist faults.
+       fcvt.xuf.s1 f8 = f14
+       fcvt.xuf.s1 f9 = f9
+(p7)   break 1;
+       ;;
+       // Compute the reciprocal approximation.
+       frcpa.s1 f10, p6 = f8, f9
+       ;;
+       // 3 Newton-Raphson iterations.
+(p6)   fmpy.s1 f12 = f8, f10
+(p6)   fnma.s1 f11 = f9, f10, f1
+       ;;
+(p6)   fma.s1 f12 = f11, f12, f12
+(p6)   fmpy.s1 f13 = f11, f11
+       ;;
+(p6)   fma.s1 f10 = f11, f10, f10
+(p6)   fma.s1 f11 = f13, f12, f12
+       ;;
+       sub in1 = r0, in1
+(p6)   fma.s1 f10 = f13, f10, f10
+(p6)   fnma.s1 f12 = f9, f11, f8
+       ;;
+       setf.sig f9 = in1
+(p6)   fma.s1 f10 = f12, f10, f11
+       ;;
+       // Round quotient to an unsigned integer.
+       fcvt.fxu.trunc.s1 f10 = f10
+       ;;
+       // r = q * (-b) + a
+       xma.l f10 = f10, f9, f14
+       ;;
+       // Transfer result to GP registers.
+       getf.sig ret0 = f10
+       br.ret.sptk rp
+       ;;
+       .endp __umoddi3
+#endif
+
+#ifdef L__divsi3
+// Compute a 32-bit integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+       .text
+       .align 16
+       .global __divsi3
+       .proc __divsi3
+__divsi3:
+       .regstk 2,0,0,0
+       // Check divide by zero.
+       cmp.ne.unc p0,p7=0,in1
+       sxt4 in0 = in0
+       sxt4 in1 = in1
+       ;;
+       setf.sig f8 = in0
+       setf.sig f9 = in1
+(p7)   break 1
+       ;;
+       mov r2 = 0x0ffdd
+       fcvt.xf f8 = f8
+       fcvt.xf f9 = f9
+       ;;
+       setf.exp f11 = r2
+       frcpa.s1 f10, p6 = f8, f9
+       ;;
+(p6)   fmpy.s1 f8 = f8, f10
+(p6)   fnma.s1 f9 = f9, f10, f1
+       ;;
+(p6)   fma.s1 f8 = f9, f8, f8
+(p6)   fma.s1 f9 = f9, f9, f11
+       ;;
+(p6)   fma.s1 f10 = f9, f8, f8
+       ;;
+       fcvt.fx.trunc.s1 f10 = f10
+       ;;
+       getf.sig ret0 = f10
+       br.ret.sptk rp
+       ;;
+       .endp __divsi3
+#endif
+
+#ifdef L__modsi3
+// Compute a 32-bit integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+       .text
+       .align 16
+       .global __modsi3
+       .proc __modsi3
+__modsi3:
+       .regstk 2,0,0,0
+       mov r2 = 0x0ffdd
+       sxt4 in0 = in0
+       sxt4 in1 = in1
+       ;;
+       setf.sig f13 = r32
+       setf.sig f9 = r33
+       // Check divide by zero.
+       cmp.ne.unc p0,p7=0,in1
+       ;;
+       sub in1 = r0, in1
+       fcvt.xf f8 = f13
+       fcvt.xf f9 = f9
+       ;;
+       setf.exp f11 = r2
+       frcpa.s1 f10, p6 = f8, f9
+(p7)   break 1
+       ;;
+(p6)   fmpy.s1 f12 = f8, f10
+(p6)   fnma.s1 f10 = f9, f10, f1
+       ;;
+       setf.sig f9 = in1
+(p6)   fma.s1 f12 = f10, f12, f12
+(p6)   fma.s1 f10 = f10, f10, f11      
+       ;;
+(p6)   fma.s1 f10 = f10, f12, f12
+       ;;
+       fcvt.fx.trunc.s1 f10 = f10
+       ;;
+       xma.l f10 = f10, f9, f13
+       ;;
+       getf.sig ret0 = f10
+       br.ret.sptk rp
+       ;;
+       .endp __modsi3
+#endif
+
+#ifdef L__udivsi3
+// Compute a 32-bit unsigned integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+       .text
+       .align 16
+       .global __udivsi3
+       .proc __udivsi3
+__udivsi3:
+       .regstk 2,0,0,0
+       mov r2 = 0x0ffdd
+       zxt4 in0 = in0
+       zxt4 in1 = in1
+       ;;
+       setf.sig f8 = in0
+       setf.sig f9 = in1
+       // Check divide by zero.
+       cmp.ne.unc p0,p7=0,in1
+       ;;
+       fcvt.xf f8 = f8
+       fcvt.xf f9 = f9
+(p7)   break 1
+       ;;
+       setf.exp f11 = r2
+       frcpa.s1 f10, p6 = f8, f9
+       ;;
+(p6)   fmpy.s1 f8 = f8, f10
+(p6)   fnma.s1 f9 = f9, f10, f1
+       ;;
+(p6)   fma.s1 f8 = f9, f8, f8
+(p6)   fma.s1 f9 = f9, f9, f11
+       ;;
+(p6)   fma.s1 f10 = f9, f8, f8
+       ;;
+       fcvt.fxu.trunc.s1 f10 = f10
+       ;;
+       getf.sig ret0 = f10
+       br.ret.sptk rp
+       ;;
+       .endp __udivsi3
+#endif
+
+#ifdef L__umodsi3
+// Compute a 32-bit unsigned integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+       .text
+       .align 16
+       .global __umodsi3
+       .proc __umodsi3
+__umodsi3:
+       .regstk 2,0,0,0
+       mov r2 = 0x0ffdd
+       zxt4 in0 = in0
+       zxt4 in1 = in1
+       ;;
+       setf.sig f13 = in0
+       setf.sig f9 = in1
+       // Check divide by zero.
+       cmp.ne.unc p0,p7=0,in1
+       ;;
+       sub in1 = r0, in1
+       fcvt.xf f8 = f13
+       fcvt.xf f9 = f9
+       ;;
+       setf.exp f11 = r2
+       frcpa.s1 f10, p6 = f8, f9
+(p7)   break 1;
+       ;;
+(p6)   fmpy.s1 f12 = f8, f10
+(p6)   fnma.s1 f10 = f9, f10, f1
+       ;;
+       setf.sig f9 = in1
+(p6)   fma.s1 f12 = f10, f12, f12
+(p6)   fma.s1 f10 = f10, f10, f11
+       ;;
+(p6)   fma.s1 f10 = f10, f12, f12
+       ;;
+       fcvt.fxu.trunc.s1 f10 = f10
+       ;;
+       xma.l f10 = f10, f9, f13
+       ;;
+       getf.sig ret0 = f10
+       br.ret.sptk rp
+       ;;
+       .endp __umodsi3
+#endif
+
+#ifdef L__save_stack_nonlocal
+// Notes on save/restore stack nonlocal: We read ar.bsp but write
+// ar.bspstore.  This is because ar.bsp can be read at all times
+// (independent of the RSE mode) but since it's read-only we need to
+// restore the value via ar.bspstore.  This is OK because
+// ar.bsp==ar.bspstore after executing "flushrs".
+
+// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer)
+
+       .text
+       .align 16
+       .global __ia64_save_stack_nonlocal
+       .proc __ia64_save_stack_nonlocal
+__ia64_save_stack_nonlocal:
+       { .mmf
+         alloc r18 = ar.pfs, 2, 0, 0, 0
+         mov r19 = ar.rsc
+         ;;
+       }
+       { .mmi
+         flushrs
+         st8 [in0] = in1, 24
+         and r19 = 0x1c, r19
+         ;;
+       }
+       { .mmi
+         st8 [in0] = r18, -16
+         mov ar.rsc = r19
+         or r19 = 0x3, r19
+         ;;
+       }
+       { .mmi
+         mov r16 = ar.bsp
+         mov r17 = ar.rnat
+         adds r2 = 8, in0
+         ;;
+       }
+       { .mmi
+         st8 [in0] = r16
+         st8 [r2] = r17
+       }
+       { .mib
+         mov ar.rsc = r19
+         br.ret.sptk.few rp
+         ;;
+       }
+       .endp __ia64_save_stack_nonlocal
+#endif
+
+#ifdef L__nonlocal_goto
+// void __ia64_nonlocal_goto(void *target_label, void *save_area,
+//                          void *static_chain);
+
+       .text
+       .align 16
+       .global __ia64_nonlocal_goto
+       .proc __ia64_nonlocal_goto
+__ia64_nonlocal_goto:
+       { .mmi
+         alloc r20 = ar.pfs, 3, 0, 0, 0
+         ld8 r12 = [in1], 8
+         mov.ret.sptk rp = in0, .L0
+         ;;
+       }
+       { .mmf
+         ld8 r16 = [in1], 8
+         mov r19 = ar.rsc
+         ;;
+       }
+       { .mmi
+         flushrs
+         ld8 r17 = [in1], 8
+         and r19 = 0x1c, r19
+         ;;
+       }
+       { .mmi
+         ld8 r18 = [in1]
+         mov ar.rsc = r19
+         or r19 = 0x3, r19
+         ;;
+       }
+       { .mmi
+         mov ar.bspstore = r16
+         ;;
+         mov ar.rnat = r17
+         ;;
+       }
+       { .mmi
+         loadrs
+         invala
+         mov r15 = in2
+         ;;
+       }
+.L0:   { .mib
+         mov ar.rsc = r19
+         mov ar.pfs = r18
+         br.ret.sptk.few rp
+         ;;
+       }
+       .endp __ia64_nonlocal_goto
+#endif
+
+#ifdef L__restore_stack_nonlocal
+// This is mostly the same as nonlocal_goto above.
+// ??? This has not been tested yet.
+
+// void __ia64_restore_stack_nonlocal(void *save_area)
+
+       .text
+       .align 16
+       .global __ia64_restore_stack_nonlocal
+       .proc __ia64_restore_stack_nonlocal
+__ia64_restore_stack_nonlocal:
+       { .mmf
+         alloc r20 = ar.pfs, 4, 0, 0, 0
+         ld8 r12 = [in0], 8
+         ;;
+       }
+       { .mmb
+         ld8 r16=[in0], 8
+         mov r19 = ar.rsc
+         ;;
+       }
+       { .mmi
+         flushrs
+         ld8 r17 = [in0], 8
+         and r19 = 0x1c, r19
+         ;;
+       }
+       { .mmf
+         ld8 r18 = [in0]
+         mov ar.rsc = r19
+         ;;
+       }
+       { .mmi
+         mov ar.bspstore = r16
+         ;;
+         mov ar.rnat = r17
+         or r19 = 0x3, r19
+         ;;
+       }
+       { .mmf
+         loadrs
+         invala
+         ;;
+       }
+.L0:   { .mib
+         mov ar.rsc = r19
+         mov ar.pfs = r18
+         br.ret.sptk.few rp
+         ;;
+       }
+       .endp __ia64_restore_stack_nonlocal
+#endif
+
+#ifdef L__trampoline
+// Implement the nested function trampoline.  This is out of line
+// so that we don't have to bother with flushing the icache, as
+// well as making the on-stack trampoline smaller.
+//
+// The trampoline has the following form:
+//
+//             +-------------------+ >
+//     TRAMP:  | __ia64_trampoline | |
+//             +-------------------+  > fake function descriptor
+//             | TRAMP+16          | |
+//             +-------------------+ >
+//             | target descriptor |
+//             +-------------------+
+//             | static link       |
+//             +-------------------+
+
+       .text
+       .align 16
+       .global __ia64_trampoline
+       .proc __ia64_trampoline
+__ia64_trampoline:
+       { .mmi
+         ld8 r2 = [r1], 8
+         ;;
+         ld8 r15 = [r1]
+       }
+       { .mmi
+         ld8 r3 = [r2], 8
+         ;;
+         ld8 r1 = [r2]
+         mov b6 = r3
+       }
+       { .bbb
+         br.sptk.many b6
+         ;;
+       }
+       .endp __ia64_trampoline
+#endif
+
+#ifdef SHARED
+// Thunks for backward compatibility.
+#ifdef L_fixtfdi
+       .text
+       .align 16
+       .global __fixtfti
+       .proc __fixtfti
+__fixtfti:
+       { .bbb
+         br.sptk.many __fixxfti
+         ;;
+       }
+       .endp __fixtfti
+#endif
+#ifdef L_fixunstfdi
+       .align 16
+       .global __fixunstfti
+       .proc __fixunstfti
+__fixunstfti:
+       { .bbb
+         br.sptk.many __fixunsxfti
+         ;;
+       }
+       .endp __fixunstfti
+#endif
+#ifdef L_floatditf
+       .align 16
+       .global __floattitf
+       .proc __floattitf
+__floattitf:
+       { .bbb
+         br.sptk.many __floattixf
+         ;;
+       }
+       .endp __floattitf
+#endif
+#endif
diff --git a/libgcc/config/ia64/t-hpux b/libgcc/config/ia64/t-hpux

index ef3387e7a61987384dc64699c3e3580d572a5fb5..1fee41385c0aa058b7ca43655970cd10ffa9a936 100644 (file)
--- a/libgcc/config/ia64/t-hpux
+++ b/libgcc/config/ia64/t-hpux
@@ -1 +1,6 @@
+# On HP-UX we do not want _fixtfdi, _fixunstfdi, or _floatditf from
+# LIB1ASMSRC.  These functions map the 128 bit conversion function names
+# to 80 bit conversions and were done for Linux backwards compatibility.
+LIB1ASMFUNCS := $(filter-out _fixtfdi _fixunstfdi _floatditf,$(LIB1ASMFUNCS))
+
  LIB2ADDEH = $(srcdir)/unwind-c.c
diff --git a/libgcc/config/ia64/t-ia64 b/libgcc/config/ia64/t-ia64

index 59cf3aa75f470b477637e9e502485e38a76511a1..80445d8a2a87ea993ccb3979f6d266e930c28305 100644 (file)
--- a/libgcc/config/ia64/t-ia64
+++ b/libgcc/config/ia64/t-ia64
@@ -1,3 +1,16 @@
+LIB1ASMSRC    = ia64/lib1funcs.S
+
+# We use different names for the DImode div/mod files so that they won't
+# conflict with libgcc2.c files.  We used to use __ia64 as a prefix, now
+# we use __ as the prefix.  Note that L_divdi3 in libgcc2.c actually defines
+# a TImode divide function, so there is no actual overlap here between
+# libgcc2.c and lib1funcs.S.
+LIB1ASMFUNCS  = __divxf3 __divdf3 __divsf3 \
+       __divdi3 __moddi3 __udivdi3 __umoddi3 \
+       __divsi3 __modsi3 __udivsi3 __umodsi3 __save_stack_nonlocal \
+       __nonlocal_goto __restore_stack_nonlocal __trampoline \
+       _fixtfdi _fixunstfdi _floatditf
+
  CUSTOM_CRTSTUFF = yes
  
  # Assemble startup files.
diff --git a/libgcc/config/ia64/t-softfp-compat b/libgcc/config/ia64/t-softfp-compat

index d3dad68c48f64ebe28883117132801dfaca5bd37..00f45d51cd02f18cee46e55723658a3002d3b648 100644 (file)
--- a/libgcc/config/ia64/t-softfp-compat
+++ b/libgcc/config/ia64/t-softfp-compat
@@ -3,5 +3,5 @@
  # Replace __dvxf3 _fixtfdi _fixunstfdi _floatditf
  libgcc1-tf-functions = __divxf3  _fixtfdi _fixunstfdi _floatditf
  LIB1ASMFUNCS := $(filter-out $(libgcc1-tf-functions), $(LIB1ASMFUNCS))
-libgcc1-tf-compats = $(addsuffix .asm, $(libgcc1-tf-functions))
+libgcc1-tf-compats = $(addsuffix .S, $(libgcc1-tf-functions))
  LIB2ADD += $(addprefix $(srcdir)/config/ia64/, $(libgcc1-tf-compats))
diff --git a/libgcc/config/m32c/lib1funcs.S b/libgcc/config/m32c/lib1funcs.S

new file mode 100644 (file)

index 0000000..9b65778
--- /dev/null
+++ b/libgcc/config/m32c/lib1funcs.S
@@ -0,0 +1,231 @@
+/* libgcc routines for R8C/M16C/M32C
+   Copyright (C) 2005, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if defined(__r8c_cpu__) || defined(__m16c_cpu__)
+#define A16
+#define A(n,w) n
+#define W w
+#else
+#define A24
+#define A(n,w) w
+#define W l
+#endif
+
+
+#ifdef L__m32c_memregs
+
+/* Warning: these memory locations are used as a register bank.  They
+   *must* end up consecutive in any final executable, so you may *not*
+   use the otherwise obvious ".comm" directive to allocate space for
+   them. */
+
+       .bss
+       .global mem0
+mem0:  .space  1
+       .global mem1
+mem1:  .space  1
+       .global mem2
+mem2:  .space  1
+       .global mem3
+mem3:  .space  1
+       .global mem4
+mem4:  .space  1
+       .global mem5
+mem5:  .space  1
+       .global mem6
+mem6:  .space  1
+       .global mem7
+mem7:  .space  1
+       .global mem8
+mem8:  .space  1
+       .global mem9
+mem9:  .space  1
+       .global mem10
+mem10: .space  1
+       .global mem11
+mem11: .space  1
+       .global mem12
+mem12: .space  1
+       .global mem13
+mem13: .space  1
+       .global mem14
+mem14: .space  1
+       .global mem15
+mem15: .space  1
+
+#endif
+
+#ifdef L__m32c_eh_return
+       .text
+       .global __m32c_eh_return
+__m32c_eh_return:      
+
+       /* At this point, r0 has the stack adjustment, r1r3 has the
+          address to return to.  The stack looks like this:
+
+          old_ra
+          old_fp
+          <- unwound sp
+          ...
+          fb
+          through
+          r0
+          <- sp
+
+          What we need to do is restore all the registers, update the
+          stack, and return to the right place.
+       */
+
+       stc     sp,a0
+       
+       add.W   A(#16,#24),a0
+       /* a0 points to the current stack, just above the register
+          save areas */
+
+       mov.w   a0,a1
+       exts.w  r0
+       sub.W   A(r0,r2r0),a1
+       sub.W   A(#3,#4),a1
+       /* a1 points to the new stack.  */
+
+       /* This is for the "rts" below.  */
+       mov.w   r1,[a1]
+#ifdef A16
+       mov.w   r2,r1
+       mov.b   r1l,2[a1]
+#else
+       mov.w   r2,2[a1]
+#endif
+
+       /* This is for the "popc sp" below.  */
+       mov.W   a1,[a0] 
+
+       popm    r0,r1,r2,r3,a0,a1,sb,fb
+       popc    sp
+       rts
+#endif
+
+/* SImode arguments for SI foo(SI,SI) functions.  */
+#ifdef A16
+#define SAL  5[fb]
+#define SAH  7[fb]
+#define SBL  9[fb]
+#define SBH 11[fb]
+#else
+#define SAL  8[fb]
+#define SAH 10[fb]
+#define SBL 12[fb]
+#define SBH 14[fb]
+#endif
+
+#ifdef L__m32c_mulsi3
+       .text
+       .global ___mulsi3
+___mulsi3:
+       enter   #0
+       push.w  r2
+       mov.w   SAL,r0
+       mulu.w  SBL,r0          /* writes to r2r0 */
+       mov.w   r0,mem0
+       mov.w   r2,mem2
+       mov.w   SAL,r0
+       mulu.w  SBH,r0          /* writes to r2r0 */
+       add.w   r0,mem2
+       mov.w   SAH,r0
+       mulu.w  SBL,r0          /* writes to r2r0 */
+       add.w   r0,mem2
+       pop.w   r2
+       exitd
+#endif
+
+#ifdef L__m32c_cmpsi2
+       .text
+       .global ___cmpsi2
+___cmpsi2:
+       enter   #0
+       cmp.w   SBH,SAH
+       jgt     cmpsi_gt
+       jlt     cmpsi_lt
+       cmp.w   SBL,SAL
+       jgt     cmpsi_gt
+       jlt     cmpsi_lt
+       mov.w   #1,r0
+       exitd
+cmpsi_gt:
+       mov.w   #2,r0
+       exitd
+cmpsi_lt:
+       mov.w   #0,r0
+       exitd
+#endif
+
+#ifdef L__m32c_ucmpsi2
+       .text
+       .global ___ucmpsi2
+___ucmpsi2:
+       enter   #0
+       cmp.w   SBH,SAH
+       jgtu    cmpsi_gt
+       jltu    cmpsi_lt
+       cmp.w   SBL,SAL
+       jgtu    cmpsi_gt
+       jltu    cmpsi_lt
+       mov.w   #1,r0
+       exitd
+cmpsi_gt:
+       mov.w   #2,r0
+       exitd
+cmpsi_lt:
+       mov.w   #0,r0
+       exitd
+#endif
+
+#ifdef L__m32c_jsri16
+       .text
+#ifdef A16
+       .global m32c_jsri16
+m32c_jsri16:
+       add.w   #-1, sp
+
+       /* Read the address (16 bits) and return address (24 bits) off
+       the stack.  */
+       mov.w   4[sp], r0
+       mov.w   1[sp], r3
+       mov.b   3[sp], a0 /* This zero-extends, so the high byte has
+                            zero in it.  */
+
+       /* Write the return address, then new address, to the stack.  */
+       mov.w   a0, 1[sp] /* Just to get the zero in 2[sp].  */
+       mov.w   r0, 0[sp]
+       mov.w   r3, 3[sp]
+       mov.b   a0, 5[sp]
+
+       /* This "returns" to the target address, leaving the pending
+       return address on the stack.  */
+       rts
+#endif
+
+#endif
diff --git a/libgcc/config/m32c/t-m32c b/libgcc/config/m32c/t-m32c

new file mode 100644 (file)

index 0000000..d214837
--- /dev/null
+++ b/libgcc/config/m32c/t-m32c
@@ -0,0 +1,9 @@
+LIB1ASMSRC = m32c/lib1funcs.S
+
+LIB1ASMFUNCS = \
+       __m32c_memregs \
+       __m32c_eh_return \
+       __m32c_mulsi3 \
+       __m32c_cmpsi2 \
+       __m32c_ucmpsi2 \
+       __m32c_jsri16
diff --git a/libgcc/config/m32r/initfini.c b/libgcc/config/m32r/initfini.c

index 6e7d58614c7fdcb46712acec95a420e7d2d989f1..5633245922313129da08881e624b42e3009435e6 100644 (file)
--- a/libgcc/config/m32r/initfini.c
+++ b/libgcc/config/m32r/initfini.c
@@ -1,5 +1,5 @@
  /* .init/.fini section handling + C++ global constructor/destructor handling.
-   This file is based on crtstuff.c, sol2-crti.asm, sol2-crtn.asm.
+   This file is based on crtstuff.c, sol2-crti.S, sol2-crtn.S.
  
     Copyright (C) 1996, 1997, 1998, 2006, 2009 Free Software Foundation, Inc.
  
diff --git a/libgcc/config/m68k/lb1sf68.S b/libgcc/config/m68k/lb1sf68.S

new file mode 100644 (file)

index 0000000..0339a09
--- /dev/null
+++ b/libgcc/config/m68k/lb1sf68.S
@@ -0,0 +1,4116 @@
+/* libgcc routines for 68000 w/o floating-point hardware.
+   Copyright (C) 1994, 1996, 1997, 1998, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Use this one for any 680x0; assumes no floating point hardware.
+   The trailing " '" appearing on some lines is for ANSI preprocessors.  Yuk.
+   Some of this code comes from MINIX, via the folks at ericsson.
+   D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
+*/
+
+/* These are predefined by new versions of GNU cpp.  */
+
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__ _
+#endif
+
+#ifndef __REGISTER_PREFIX__
+#define __REGISTER_PREFIX__
+#endif
+
+#ifndef __IMMEDIATE_PREFIX__
+#define __IMMEDIATE_PREFIX__ #
+#endif
+
+/* ANSI concatenation macros.  */
+
+#define CONCAT1(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+/* Use the right prefix for global labels.  */
+
+#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
+
+/* Note that X is a function.  */
+       
+#ifdef __ELF__
+#define FUNC(x) .type SYM(x),function
+#else
+/* The .proc pseudo-op is accepted, but ignored, by GAS.  We could just        
+   define this to the empty string for non-ELF systems, but defining it
+   to .proc means that the information is available to the assembler if
+   the need arises.  */
+#define FUNC(x) .proc
+#endif
+               
+/* Use the right prefix for registers.  */
+
+#define REG(x) CONCAT1 (__REGISTER_PREFIX__, x)
+
+/* Use the right prefix for immediate values.  */
+
+#define IMM(x) CONCAT1 (__IMMEDIATE_PREFIX__, x)
+
+#define d0 REG (d0)
+#define d1 REG (d1)
+#define d2 REG (d2)
+#define d3 REG (d3)
+#define d4 REG (d4)
+#define d5 REG (d5)
+#define d6 REG (d6)
+#define d7 REG (d7)
+#define a0 REG (a0)
+#define a1 REG (a1)
+#define a2 REG (a2)
+#define a3 REG (a3)
+#define a4 REG (a4)
+#define a5 REG (a5)
+#define a6 REG (a6)
+#define fp REG (fp)
+#define sp REG (sp)
+#define pc REG (pc)
+
+/* Provide a few macros to allow for PIC code support.
+ * With PIC, data is stored A5 relative so we've got to take a bit of special
+ * care to ensure that all loads of global data is via A5.  PIC also requires
+ * jumps and subroutine calls to be PC relative rather than absolute.  We cheat
+ * a little on this and in the PIC case, we use short offset branches and
+ * hope that the final object code is within range (which it should be).
+ */
+#ifndef __PIC__
+
+       /* Non PIC (absolute/relocatable) versions */
+
+       .macro PICCALL addr
+       jbsr    \addr
+       .endm
+
+       .macro PICJUMP addr
+       jmp     \addr
+       .endm
+
+       .macro PICLEA sym, reg
+       lea     \sym, \reg
+       .endm
+
+       .macro PICPEA sym, areg
+       pea     \sym
+       .endm
+
+#else /* __PIC__ */
+
+# if defined (__uClinux__)
+
+       /* Versions for uClinux */
+
+#  if defined(__ID_SHARED_LIBRARY__)
+
+       /* -mid-shared-library versions  */
+
+       .macro PICLEA sym, reg
+       movel   a5@(_current_shared_library_a5_offset_), \reg
+       movel   \sym@GOT(\reg), \reg
+       .endm
+
+       .macro PICPEA sym, areg
+       movel   a5@(_current_shared_library_a5_offset_), \areg
+       movel   \sym@GOT(\areg), sp@-
+       .endm
+
+       .macro PICCALL addr
+       PICLEA  \addr,a0
+       jsr     a0@
+       .endm
+
+       .macro PICJUMP addr
+       PICLEA  \addr,a0
+       jmp     a0@
+       .endm
+
+#  else /* !__ID_SHARED_LIBRARY__ */
+
+       /* Versions for -msep-data */
+
+       .macro PICLEA sym, reg
+       movel   \sym@GOT(a5), \reg
+       .endm
+
+       .macro PICPEA sym, areg
+       movel   \sym@GOT(a5), sp@-
+       .endm
+
+       .macro PICCALL addr
+#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__)
+       lea     \addr-.-8,a0
+       jsr     pc@(a0)
+#else
+       jbsr    \addr
+#endif
+       .endm
+
+       .macro PICJUMP addr
+       /* ISA C has no bra.l instruction, and since this assembly file
+          gets assembled into multiple object files, we avoid the
+          bra instruction entirely.  */
+#if defined (__mcoldfire__) && !defined (__mcfisab__)
+       lea     \addr-.-8,a0
+       jmp     pc@(a0)
+#else
+       bra     \addr
+#endif
+       .endm
+
+#  endif
+
+# else /* !__uClinux__ */
+
+       /* Versions for Linux */
+
+       .macro PICLEA sym, reg
+       movel   #_GLOBAL_OFFSET_TABLE_@GOTPC, \reg
+       lea     (-6, pc, \reg), \reg
+       movel   \sym@GOT(\reg), \reg
+       .endm
+
+       .macro PICPEA sym, areg
+       movel   #_GLOBAL_OFFSET_TABLE_@GOTPC, \areg
+       lea     (-6, pc, \areg), \areg
+       movel   \sym@GOT(\areg), sp@-
+       .endm
+
+       .macro PICCALL addr
+#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__)
+       lea     \addr-.-8,a0
+       jsr     pc@(a0)
+#else
+       jbsr    \addr
+#endif
+       .endm
+
+       .macro PICJUMP addr
+       /* ISA C has no bra.l instruction, and since this assembly file
+          gets assembled into multiple object files, we avoid the
+          bra instruction entirely.  */
+#if defined (__mcoldfire__) && !defined (__mcfisab__)
+       lea     \addr-.-8,a0
+       jmp     pc@(a0)
+#else
+       bra     \addr
+#endif
+       .endm
+
+# endif
+#endif /* __PIC__ */
+
+
+#ifdef L_floatex
+
+| This is an attempt at a decent floating point (single, double and 
+| extended double) code for the GNU C compiler. It should be easy to
+| adapt to other compilers (but beware of the local labels!).
+
+| Starting date: 21 October, 1990
+
+| It is convenient to introduce the notation (s,e,f) for a floating point
+| number, where s=sign, e=exponent, f=fraction. We will call a floating
+| point number fpn to abbreviate, independently of the precision.
+| Let MAX_EXP be in each case the maximum exponent (255 for floats, 1023 
+| for doubles and 16383 for long doubles). We then have the following 
+| different cases:
+|  1. Normalized fpns have 0 < e < MAX_EXP. They correspond to 
+|     (-1)^s x 1.f x 2^(e-bias-1).
+|  2. Denormalized fpns have e=0. They correspond to numbers of the form
+|     (-1)^s x 0.f x 2^(-bias).
+|  3. +/-INFINITY have e=MAX_EXP, f=0.
+|  4. Quiet NaN (Not a Number) have all bits set.
+|  5. Signaling NaN (Not a Number) have s=0, e=MAX_EXP, f=1.
+
+|=============================================================================
+|                                  exceptions
+|=============================================================================
+
+| This is the floating point condition code register (_fpCCR):
+|
+| struct {
+|   short _exception_bits;     
+|   short _trap_enable_bits;   
+|   short _sticky_bits;
+|   short _rounding_mode;
+|   short _format;
+|   short _last_operation;
+|   union {
+|     float sf;
+|     double df;
+|   } _operand1;
+|   union {
+|     float sf;
+|     double df;
+|   } _operand2;
+| } _fpCCR;
+
+       .data
+       .even
+
+       .globl  SYM (_fpCCR)
+       
+SYM (_fpCCR):
+__exception_bits:
+       .word   0
+__trap_enable_bits:
+       .word   0
+__sticky_bits:
+       .word   0
+__rounding_mode:
+       .word   ROUND_TO_NEAREST
+__format:
+       .word   NIL
+__last_operation:
+       .word   NOOP
+__operand1:
+       .long   0
+       .long   0
+__operand2:
+       .long   0
+       .long   0
+
+| Offsets:
+EBITS  = __exception_bits - SYM (_fpCCR)
+TRAPE  = __trap_enable_bits - SYM (_fpCCR)
+STICK  = __sticky_bits - SYM (_fpCCR)
+ROUND  = __rounding_mode - SYM (_fpCCR)
+FORMT  = __format - SYM (_fpCCR)
+LASTO  = __last_operation - SYM (_fpCCR)
+OPER1  = __operand1 - SYM (_fpCCR)
+OPER2  = __operand2 - SYM (_fpCCR)
+
+| The following exception types are supported:
+INEXACT_RESULT                 = 0x0001
+UNDERFLOW              = 0x0002
+OVERFLOW               = 0x0004
+DIVIDE_BY_ZERO                 = 0x0008
+INVALID_OPERATION      = 0x0010
+
+| The allowed rounding modes are:
+UNKNOWN           = -1
+ROUND_TO_NEAREST  = 0 | round result to nearest representable value
+ROUND_TO_ZERO     = 1 | round result towards zero
+ROUND_TO_PLUS     = 2 | round result towards plus infinity
+ROUND_TO_MINUS    = 3 | round result towards minus infinity
+
+| The allowed values of format are:
+NIL          = 0
+SINGLE_FLOAT = 1
+DOUBLE_FLOAT = 2
+LONG_FLOAT   = 3
+
+| The allowed values for the last operation are:
+NOOP         = 0
+ADD          = 1
+MULTIPLY     = 2
+DIVIDE       = 3
+NEGATE       = 4
+COMPARE      = 5
+EXTENDSFDF   = 6
+TRUNCDFSF    = 7
+
+|=============================================================================
+|                           __clear_sticky_bits
+|=============================================================================
+
+| The sticky bits are normally not cleared (thus the name), whereas the 
+| exception type and exception value reflect the last computation. 
+| This routine is provided to clear them (you can also write to _fpCCR,
+| since it is globally visible).
+
+       .globl  SYM (__clear_sticky_bit)
+
+       .text
+       .even
+
+| void __clear_sticky_bits(void);
+SYM (__clear_sticky_bit):              
+       PICLEA  SYM (_fpCCR),a0
+#ifndef __mcoldfire__
+       movew   IMM (0),a0@(STICK)
+#else
+       clr.w   a0@(STICK)
+#endif
+       rts
+
+|=============================================================================
+|                           $_exception_handler
+|=============================================================================
+
+       .globl  $_exception_handler
+
+       .text
+       .even
+
+| This is the common exit point if an exception occurs.
+| NOTE: it is NOT callable from C!
+| It expects the exception type in d7, the format (SINGLE_FLOAT,
+| DOUBLE_FLOAT or LONG_FLOAT) in d6, and the last operation code in d5.
+| It sets the corresponding exception and sticky bits, and the format. 
+| Depending on the format if fills the corresponding slots for the 
+| operands which produced the exception (all this information is provided
+| so if you write your own exception handlers you have enough information
+| to deal with the problem).
+| Then checks to see if the corresponding exception is trap-enabled, 
+| in which case it pushes the address of _fpCCR and traps through 
+| trap FPTRAP (15 for the moment).
+
+FPTRAP = 15
+
+$_exception_handler:
+       PICLEA  SYM (_fpCCR),a0
+       movew   d7,a0@(EBITS)   | set __exception_bits
+#ifndef __mcoldfire__
+       orw     d7,a0@(STICK)   | and __sticky_bits
+#else
+       movew   a0@(STICK),d4
+       orl     d7,d4
+       movew   d4,a0@(STICK)
+#endif
+       movew   d6,a0@(FORMT)   | and __format
+       movew   d5,a0@(LASTO)   | and __last_operation
+
+| Now put the operands in place:
+#ifndef __mcoldfire__
+       cmpw    IMM (SINGLE_FLOAT),d6
+#else
+       cmpl    IMM (SINGLE_FLOAT),d6
+#endif
+       beq     1f
+       movel   a6@(8),a0@(OPER1)
+       movel   a6@(12),a0@(OPER1+4)
+       movel   a6@(16),a0@(OPER2)
+       movel   a6@(20),a0@(OPER2+4)
+       bra     2f
+1:     movel   a6@(8),a0@(OPER1)
+       movel   a6@(12),a0@(OPER2)
+2:
+| And check whether the exception is trap-enabled:
+#ifndef __mcoldfire__
+       andw    a0@(TRAPE),d7   | is exception trap-enabled?
+#else
+       clrl    d6
+       movew   a0@(TRAPE),d6
+       andl    d6,d7
+#endif
+       beq     1f              | no, exit
+       PICPEA  SYM (_fpCCR),a1 | yes, push address of _fpCCR
+       trap    IMM (FPTRAP)    | and trap
+#ifndef __mcoldfire__
+1:     moveml  sp@+,d2-d7      | restore data registers
+#else
+1:     moveml  sp@,d2-d7
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6              | and return
+       rts
+#endif /* L_floatex */
+
+#ifdef  L_mulsi3
+       .text
+       FUNC(__mulsi3)
+       .globl  SYM (__mulsi3)
+SYM (__mulsi3):
+       movew   sp@(4), d0      /* x0 -> d0 */
+       muluw   sp@(10), d0     /* x0*y1 */
+       movew   sp@(6), d1      /* x1 -> d1 */
+       muluw   sp@(8), d1      /* x1*y0 */
+#ifndef __mcoldfire__
+       addw    d1, d0
+#else
+       addl    d1, d0
+#endif
+       swap    d0
+       clrw    d0
+       movew   sp@(6), d1      /* x1 -> d1 */
+       muluw   sp@(10), d1     /* x1*y1 */
+       addl    d1, d0
+
+       rts
+#endif /* L_mulsi3 */
+
+#ifdef  L_udivsi3
+       .text
+       FUNC(__udivsi3)
+       .globl  SYM (__udivsi3)
+SYM (__udivsi3):
+#ifndef __mcoldfire__
+       movel   d2, sp@-
+       movel   sp@(12), d1     /* d1 = divisor */
+       movel   sp@(8), d0      /* d0 = dividend */
+
+       cmpl    IMM (0x10000), d1 /* divisor >= 2 ^ 16 ?   */
+       jcc     L3              /* then try next algorithm */
+       movel   d0, d2
+       clrw    d2
+       swap    d2
+       divu    d1, d2          /* high quotient in lower word */
+       movew   d2, d0          /* save high quotient */
+       swap    d0
+       movew   sp@(10), d2     /* get low dividend + high rest */
+       divu    d1, d2          /* low quotient */
+       movew   d2, d0
+       jra     L6
+
+L3:    movel   d1, d2          /* use d2 as divisor backup */
+L4:    lsrl    IMM (1), d1     /* shift divisor */
+       lsrl    IMM (1), d0     /* shift dividend */
+       cmpl    IMM (0x10000), d1 /* still divisor >= 2 ^ 16 ?  */
+       jcc     L4
+       divu    d1, d0          /* now we have 16-bit divisor */
+       andl    IMM (0xffff), d0 /* mask out divisor, ignore remainder */
+
+/* Multiply the 16-bit tentative quotient with the 32-bit divisor.  Because of
+   the operand ranges, this might give a 33-bit product.  If this product is
+   greater than the dividend, the tentative quotient was too large. */
+       movel   d2, d1
+       mulu    d0, d1          /* low part, 32 bits */
+       swap    d2
+       mulu    d0, d2          /* high part, at most 17 bits */
+       swap    d2              /* align high part with low part */
+       tstw    d2              /* high part 17 bits? */
+       jne     L5              /* if 17 bits, quotient was too large */
+       addl    d2, d1          /* add parts */
+       jcs     L5              /* if sum is 33 bits, quotient was too large */
+       cmpl    sp@(8), d1      /* compare the sum with the dividend */
+       jls     L6              /* if sum > dividend, quotient was too large */
+L5:    subql   IMM (1), d0     /* adjust quotient */
+
+L6:    movel   sp@+, d2
+       rts
+
+#else /* __mcoldfire__ */
+
+/* ColdFire implementation of non-restoring division algorithm from
+   Hennessy & Patterson, Appendix A. */
+       link    a6,IMM (-12)
+       moveml  d2-d4,sp@
+       movel   a6@(8),d0
+       movel   a6@(12),d1
+       clrl    d2              | clear p
+       moveq   IMM (31),d4
+L1:    addl    d0,d0           | shift reg pair (p,a) one bit left
+       addxl   d2,d2
+       movl    d2,d3           | subtract b from p, store in tmp.
+       subl    d1,d3
+       jcs     L2              | if no carry,
+       bset    IMM (0),d0      | set the low order bit of a to 1,
+       movl    d3,d2           | and store tmp in p.
+L2:    subql   IMM (1),d4
+       jcc     L1
+       moveml  sp@,d2-d4       | restore data registers
+       unlk    a6              | and return
+       rts
+#endif /* __mcoldfire__ */
+
+#endif /* L_udivsi3 */
+
+#ifdef  L_divsi3
+       .text
+       FUNC(__divsi3)
+       .globl  SYM (__divsi3)
+SYM (__divsi3):
+       movel   d2, sp@-
+
+       moveq   IMM (1), d2     /* sign of result stored in d2 (=1 or =-1) */
+       movel   sp@(12), d1     /* d1 = divisor */
+       jpl     L1
+       negl    d1
+#ifndef __mcoldfire__
+       negb    d2              /* change sign because divisor <0  */
+#else
+       negl    d2              /* change sign because divisor <0  */
+#endif
+L1:    movel   sp@(8), d0      /* d0 = dividend */
+       jpl     L2
+       negl    d0
+#ifndef __mcoldfire__
+       negb    d2
+#else
+       negl    d2
+#endif
+
+L2:    movel   d1, sp@-
+       movel   d0, sp@-
+       PICCALL SYM (__udivsi3) /* divide abs(dividend) by abs(divisor) */
+       addql   IMM (8), sp
+
+       tstb    d2
+       jpl     L3
+       negl    d0
+
+L3:    movel   sp@+, d2
+       rts
+#endif /* L_divsi3 */
+
+#ifdef  L_umodsi3
+       .text
+       FUNC(__umodsi3)
+       .globl  SYM (__umodsi3)
+SYM (__umodsi3):
+       movel   sp@(8), d1      /* d1 = divisor */
+       movel   sp@(4), d0      /* d0 = dividend */
+       movel   d1, sp@-
+       movel   d0, sp@-
+       PICCALL SYM (__udivsi3)
+       addql   IMM (8), sp
+       movel   sp@(8), d1      /* d1 = divisor */
+#ifndef __mcoldfire__
+       movel   d1, sp@-
+       movel   d0, sp@-
+       PICCALL SYM (__mulsi3)  /* d0 = (a/b)*b */
+       addql   IMM (8), sp
+#else
+       mulsl   d1,d0
+#endif
+       movel   sp@(4), d1      /* d1 = dividend */
+       subl    d0, d1          /* d1 = a - (a/b)*b */
+       movel   d1, d0
+       rts
+#endif /* L_umodsi3 */
+
+#ifdef  L_modsi3
+       .text
+       FUNC(__modsi3)
+       .globl  SYM (__modsi3)
+SYM (__modsi3):
+       movel   sp@(8), d1      /* d1 = divisor */
+       movel   sp@(4), d0      /* d0 = dividend */
+       movel   d1, sp@-
+       movel   d0, sp@-
+       PICCALL SYM (__divsi3)
+       addql   IMM (8), sp
+       movel   sp@(8), d1      /* d1 = divisor */
+#ifndef __mcoldfire__
+       movel   d1, sp@-
+       movel   d0, sp@-
+       PICCALL SYM (__mulsi3)  /* d0 = (a/b)*b */
+       addql   IMM (8), sp
+#else
+       mulsl   d1,d0
+#endif
+       movel   sp@(4), d1      /* d1 = dividend */
+       subl    d0, d1          /* d1 = a - (a/b)*b */
+       movel   d1, d0
+       rts
+#endif /* L_modsi3 */
+
+
+#ifdef  L_double
+
+       .globl  SYM (_fpCCR)
+       .globl  $_exception_handler
+
+QUIET_NaN      = 0xffffffff
+
+D_MAX_EXP      = 0x07ff
+D_BIAS         = 1022
+DBL_MAX_EXP    = D_MAX_EXP - D_BIAS
+DBL_MIN_EXP    = 1 - D_BIAS
+DBL_MANT_DIG   = 53
+
+INEXACT_RESULT                 = 0x0001
+UNDERFLOW              = 0x0002
+OVERFLOW               = 0x0004
+DIVIDE_BY_ZERO                 = 0x0008
+INVALID_OPERATION      = 0x0010
+
+DOUBLE_FLOAT = 2
+
+NOOP         = 0
+ADD          = 1
+MULTIPLY     = 2
+DIVIDE       = 3
+NEGATE       = 4
+COMPARE      = 5
+EXTENDSFDF   = 6
+TRUNCDFSF    = 7
+
+UNKNOWN           = -1
+ROUND_TO_NEAREST  = 0 | round result to nearest representable value
+ROUND_TO_ZERO     = 1 | round result towards zero
+ROUND_TO_PLUS     = 2 | round result towards plus infinity
+ROUND_TO_MINUS    = 3 | round result towards minus infinity
+
+| Entry points:
+
+       .globl SYM (__adddf3)
+       .globl SYM (__subdf3)
+       .globl SYM (__muldf3)
+       .globl SYM (__divdf3)
+       .globl SYM (__negdf2)
+       .globl SYM (__cmpdf2)
+       .globl SYM (__cmpdf2_internal)
+       .hidden SYM (__cmpdf2_internal)
+
+       .text
+       .even
+
+| These are common routines to return and signal exceptions.   
+
+Ld$den:
+| Return and signal a denormalized number
+       orl     d7,d0
+       movew   IMM (INEXACT_RESULT+UNDERFLOW),d7
+       moveq   IMM (DOUBLE_FLOAT),d6
+       PICJUMP $_exception_handler
+
+Ld$infty:
+Ld$overflow:
+| Return a properly signed INFINITY and set the exception flags 
+       movel   IMM (0x7ff00000),d0
+       movel   IMM (0),d1
+       orl     d7,d0
+       movew   IMM (INEXACT_RESULT+OVERFLOW),d7
+       moveq   IMM (DOUBLE_FLOAT),d6
+       PICJUMP $_exception_handler
+
+Ld$underflow:
+| Return 0 and set the exception flags 
+       movel   IMM (0),d0
+       movel   d0,d1
+       movew   IMM (INEXACT_RESULT+UNDERFLOW),d7
+       moveq   IMM (DOUBLE_FLOAT),d6
+       PICJUMP $_exception_handler
+
+Ld$inop:
+| Return a quiet NaN and set the exception flags
+       movel   IMM (QUIET_NaN),d0
+       movel   d0,d1
+       movew   IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+       moveq   IMM (DOUBLE_FLOAT),d6
+       PICJUMP $_exception_handler
+
+Ld$div$0:
+| Return a properly signed INFINITY and set the exception flags
+       movel   IMM (0x7ff00000),d0
+       movel   IMM (0),d1
+       orl     d7,d0
+       movew   IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7
+       moveq   IMM (DOUBLE_FLOAT),d6
+       PICJUMP $_exception_handler
+
+|=============================================================================
+|=============================================================================
+|                         double precision routines
+|=============================================================================
+|=============================================================================
+
+| A double precision floating point number (double) has the format:
+|
+| struct _double {
+|  unsigned int sign      : 1;  /* sign bit */ 
+|  unsigned int exponent  : 11; /* exponent, shifted by 126 */
+|  unsigned int fraction  : 52; /* fraction */
+| } double;
+| 
+| Thus sizeof(double) = 8 (64 bits). 
+|
+| All the routines are callable from C programs, and return the result 
+| in the register pair d0-d1. They also preserve all registers except 
+| d0-d1 and a0-a1.
+
+|=============================================================================
+|                              __subdf3
+|=============================================================================
+
+| double __subdf3(double, double);
+       FUNC(__subdf3)
+SYM (__subdf3):
+       bchg    IMM (31),sp@(12) | change sign of second operand
+                               | and fall through, so we always add
+|=============================================================================
+|                              __adddf3
+|=============================================================================
+
+| double __adddf3(double, double);
+       FUNC(__adddf3)
+SYM (__adddf3):
+#ifndef __mcoldfire__
+       link    a6,IMM (0)      | everything will be done in registers
+       moveml  d2-d7,sp@-      | save all data registers and a2 (but d0-d1)
+#else
+       link    a6,IMM (-24)
+       moveml  d2-d7,sp@
+#endif
+       movel   a6@(8),d0       | get first operand
+       movel   a6@(12),d1      | 
+       movel   a6@(16),d2      | get second operand
+       movel   a6@(20),d3      | 
+
+       movel   d0,d7           | get d0's sign bit in d7 '
+       addl    d1,d1           | check and clear sign bit of a, and gain one
+       addxl   d0,d0           | bit of extra precision
+       beq     Ladddf$b        | if zero return second operand
+
+       movel   d2,d6           | save sign in d6 
+       addl    d3,d3           | get rid of sign bit and gain one bit of
+       addxl   d2,d2           | extra precision
+       beq     Ladddf$a        | if zero return first operand
+
+       andl    IMM (0x80000000),d7 | isolate a's sign bit '
+        swap   d6              | and also b's sign bit '
+#ifndef __mcoldfire__
+       andw    IMM (0x8000),d6 |
+       orw     d6,d7           | and combine them into d7, so that a's sign '
+                               | bit is in the high word and b's is in the '
+                               | low word, so d6 is free to be used
+#else
+       andl    IMM (0x8000),d6
+       orl     d6,d7
+#endif
+       movel   d7,a0           | now save d7 into a0, so d7 is free to
+                               | be used also
+
+| Get the exponents and check for denormalized and/or infinity.
+
+       movel   IMM (0x001fffff),d6 | mask for the fraction
+       movel   IMM (0x00200000),d7 | mask to put hidden bit back
+
+       movel   d0,d4           | 
+       andl    d6,d0           | get fraction in d0
+       notl    d6              | make d6 into mask for the exponent
+       andl    d6,d4           | get exponent in d4
+       beq     Ladddf$a$den    | branch if a is denormalized
+       cmpl    d6,d4           | check for INFINITY or NaN
+       beq     Ladddf$nf       | 
+       orl     d7,d0           | and put hidden bit back
+Ladddf$1:
+       swap    d4              | shift right exponent so that it starts
+#ifndef __mcoldfire__
+       lsrw    IMM (5),d4      | in bit 0 and not bit 20
+#else
+       lsrl    IMM (5),d4      | in bit 0 and not bit 20
+#endif
+| Now we have a's exponent in d4 and fraction in d0-d1 '
+       movel   d2,d5           | save b to get exponent
+       andl    d6,d5           | get exponent in d5
+       beq     Ladddf$b$den    | branch if b is denormalized
+       cmpl    d6,d5           | check for INFINITY or NaN
+       beq     Ladddf$nf
+       notl    d6              | make d6 into mask for the fraction again
+       andl    d6,d2           | and get fraction in d2
+       orl     d7,d2           | and put hidden bit back
+Ladddf$2:
+       swap    d5              | shift right exponent so that it starts
+#ifndef __mcoldfire__
+       lsrw    IMM (5),d5      | in bit 0 and not bit 20
+#else
+       lsrl    IMM (5),d5      | in bit 0 and not bit 20
+#endif
+
+| Now we have b's exponent in d5 and fraction in d2-d3. '
+
+| The situation now is as follows: the signs are combined in a0, the 
+| numbers are in d0-d1 (a) and d2-d3 (b), and the exponents in d4 (a)
+| and d5 (b). To do the rounding correctly we need to keep all the
+| bits until the end, so we need to use d0-d1-d2-d3 for the first number
+| and d4-d5-d6-d7 for the second. To do this we store (temporarily) the
+| exponents in a2-a3.
+
+#ifndef __mcoldfire__
+       moveml  a2-a3,sp@-      | save the address registers
+#else
+       movel   a2,sp@- 
+       movel   a3,sp@- 
+       movel   a4,sp@- 
+#endif
+
+       movel   d4,a2           | save the exponents
+       movel   d5,a3           | 
+
+       movel   IMM (0),d7      | and move the numbers around
+       movel   d7,d6           |
+       movel   d3,d5           |
+       movel   d2,d4           |
+       movel   d7,d3           |
+       movel   d7,d2           |
+
+| Here we shift the numbers until the exponents are the same, and put 
+| the largest exponent in a2.
+#ifndef __mcoldfire__
+       exg     d4,a2           | get exponents back
+       exg     d5,a3           |
+       cmpw    d4,d5           | compare the exponents
+#else
+       movel   d4,a4           | get exponents back
+       movel   a2,d4
+       movel   a4,a2
+       movel   d5,a4
+       movel   a3,d5
+       movel   a4,a3
+       cmpl    d4,d5           | compare the exponents
+#endif
+       beq     Ladddf$3        | if equal don't shift '
+       bhi     9f              | branch if second exponent is higher
+
+| Here we have a's exponent larger than b's, so we have to shift b. We do 
+| this by using as counter d2:
+1:     movew   d4,d2           | move largest exponent to d2
+#ifndef __mcoldfire__
+       subw    d5,d2           | and subtract second exponent
+       exg     d4,a2           | get back the longs we saved
+       exg     d5,a3           |
+#else
+       subl    d5,d2           | and subtract second exponent
+       movel   d4,a4           | get back the longs we saved
+       movel   a2,d4
+       movel   a4,a2
+       movel   d5,a4
+       movel   a3,d5
+       movel   a4,a3
+#endif
+| if difference is too large we don't shift (actually, we can just exit) '
+#ifndef __mcoldfire__
+       cmpw    IMM (DBL_MANT_DIG+2),d2
+#else
+       cmpl    IMM (DBL_MANT_DIG+2),d2
+#endif
+       bge     Ladddf$b$small
+#ifndef __mcoldfire__
+       cmpw    IMM (32),d2     | if difference >= 32, shift by longs
+#else
+       cmpl    IMM (32),d2     | if difference >= 32, shift by longs
+#endif
+       bge     5f
+2:
+#ifndef __mcoldfire__
+       cmpw    IMM (16),d2     | if difference >= 16, shift by words   
+#else
+       cmpl    IMM (16),d2     | if difference >= 16, shift by words   
+#endif
+       bge     6f
+       bra     3f              | enter dbra loop
+
+4:
+#ifndef __mcoldfire__
+       lsrl    IMM (1),d4
+       roxrl   IMM (1),d5
+       roxrl   IMM (1),d6
+       roxrl   IMM (1),d7
+#else
+       lsrl    IMM (1),d7
+       btst    IMM (0),d6
+       beq     10f
+       bset    IMM (31),d7
+10:    lsrl    IMM (1),d6
+       btst    IMM (0),d5
+       beq     11f
+       bset    IMM (31),d6
+11:    lsrl    IMM (1),d5
+       btst    IMM (0),d4
+       beq     12f
+       bset    IMM (31),d5
+12:    lsrl    IMM (1),d4
+#endif
+3:
+#ifndef __mcoldfire__
+       dbra    d2,4b
+#else
+       subql   IMM (1),d2
+       bpl     4b      
+#endif
+       movel   IMM (0),d2
+       movel   d2,d3   
+       bra     Ladddf$4
+5:
+       movel   d6,d7
+       movel   d5,d6
+       movel   d4,d5
+       movel   IMM (0),d4
+#ifndef __mcoldfire__
+       subw    IMM (32),d2
+#else
+       subl    IMM (32),d2
+#endif
+       bra     2b
+6:
+       movew   d6,d7
+       swap    d7
+       movew   d5,d6
+       swap    d6
+       movew   d4,d5
+       swap    d5
+       movew   IMM (0),d4
+       swap    d4
+#ifndef __mcoldfire__
+       subw    IMM (16),d2
+#else
+       subl    IMM (16),d2
+#endif
+       bra     3b
+       
+9:
+#ifndef __mcoldfire__
+       exg     d4,d5
+       movew   d4,d6
+       subw    d5,d6           | keep d5 (largest exponent) in d4
+       exg     d4,a2
+       exg     d5,a3
+#else
+       movel   d5,d6
+       movel   d4,d5
+       movel   d6,d4
+       subl    d5,d6
+       movel   d4,a4
+       movel   a2,d4
+       movel   a4,a2
+       movel   d5,a4
+       movel   a3,d5
+       movel   a4,a3
+#endif
+| if difference is too large we don't shift (actually, we can just exit) '
+#ifndef __mcoldfire__
+       cmpw    IMM (DBL_MANT_DIG+2),d6
+#else
+       cmpl    IMM (DBL_MANT_DIG+2),d6
+#endif
+       bge     Ladddf$a$small
+#ifndef __mcoldfire__
+       cmpw    IMM (32),d6     | if difference >= 32, shift by longs
+#else
+       cmpl    IMM (32),d6     | if difference >= 32, shift by longs
+#endif
+       bge     5f
+2:
+#ifndef __mcoldfire__
+       cmpw    IMM (16),d6     | if difference >= 16, shift by words   
+#else
+       cmpl    IMM (16),d6     | if difference >= 16, shift by words   
+#endif
+       bge     6f
+       bra     3f              | enter dbra loop
+
+4:
+#ifndef __mcoldfire__
+       lsrl    IMM (1),d0
+       roxrl   IMM (1),d1
+       roxrl   IMM (1),d2
+       roxrl   IMM (1),d3
+#else
+       lsrl    IMM (1),d3
+       btst    IMM (0),d2
+       beq     10f
+       bset    IMM (31),d3
+10:    lsrl    IMM (1),d2
+       btst    IMM (0),d1
+       beq     11f
+       bset    IMM (31),d2
+11:    lsrl    IMM (1),d1
+       btst    IMM (0),d0
+       beq     12f
+       bset    IMM (31),d1
+12:    lsrl    IMM (1),d0
+#endif
+3:
+#ifndef __mcoldfire__
+       dbra    d6,4b
+#else
+       subql   IMM (1),d6
+       bpl     4b
+#endif
+       movel   IMM (0),d7
+       movel   d7,d6
+       bra     Ladddf$4
+5:
+       movel   d2,d3
+       movel   d1,d2
+       movel   d0,d1
+       movel   IMM (0),d0
+#ifndef __mcoldfire__
+       subw    IMM (32),d6
+#else
+       subl    IMM (32),d6
+#endif
+       bra     2b
+6:
+       movew   d2,d3
+       swap    d3
+       movew   d1,d2
+       swap    d2
+       movew   d0,d1
+       swap    d1
+       movew   IMM (0),d0
+       swap    d0
+#ifndef __mcoldfire__
+       subw    IMM (16),d6
+#else
+       subl    IMM (16),d6
+#endif
+       bra     3b
+Ladddf$3:
+#ifndef __mcoldfire__
+       exg     d4,a2   
+       exg     d5,a3
+#else
+       movel   d4,a4
+       movel   a2,d4
+       movel   a4,a2
+       movel   d5,a4
+       movel   a3,d5
+       movel   a4,a3
+#endif
+Ladddf$4:      
+| Now we have the numbers in d0--d3 and d4--d7, the exponent in a2, and
+| the signs in a4.
+
+| Here we have to decide whether to add or subtract the numbers:
+#ifndef __mcoldfire__
+       exg     d7,a0           | get the signs 
+       exg     d6,a3           | a3 is free to be used
+#else
+       movel   d7,a4
+       movel   a0,d7
+       movel   a4,a0
+       movel   d6,a4
+       movel   a3,d6
+       movel   a4,a3
+#endif
+       movel   d7,d6           |
+       movew   IMM (0),d7      | get a's sign in d7 '
+       swap    d6              |
+       movew   IMM (0),d6      | and b's sign in d6 '
+       eorl    d7,d6           | compare the signs
+       bmi     Lsubdf$0        | if the signs are different we have 
+                               | to subtract
+#ifndef __mcoldfire__
+       exg     d7,a0           | else we add the numbers
+       exg     d6,a3           |
+#else
+       movel   d7,a4
+       movel   a0,d7
+       movel   a4,a0
+       movel   d6,a4
+       movel   a3,d6
+       movel   a4,a3
+#endif
+       addl    d7,d3           |
+       addxl   d6,d2           |
+       addxl   d5,d1           | 
+       addxl   d4,d0           |
+
+       movel   a2,d4           | return exponent to d4
+       movel   a0,d7           | 
+       andl    IMM (0x80000000),d7 | d7 now has the sign
+
+#ifndef __mcoldfire__
+       moveml  sp@+,a2-a3      
+#else
+       movel   sp@+,a4 
+       movel   sp@+,a3 
+       movel   sp@+,a2 
+#endif
+
+| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider
+| the case of denormalized numbers in the rounding routine itself).
+| As in the addition (not in the subtraction!) we could have set 
+| one more bit we check this:
+       btst    IMM (DBL_MANT_DIG+1),d0 
+       beq     1f
+#ifndef __mcoldfire__
+       lsrl    IMM (1),d0
+       roxrl   IMM (1),d1
+       roxrl   IMM (1),d2
+       roxrl   IMM (1),d3
+       addw    IMM (1),d4
+#else
+       lsrl    IMM (1),d3
+       btst    IMM (0),d2
+       beq     10f
+       bset    IMM (31),d3
+10:    lsrl    IMM (1),d2
+       btst    IMM (0),d1
+       beq     11f
+       bset    IMM (31),d2
+11:    lsrl    IMM (1),d1
+       btst    IMM (0),d0
+       beq     12f
+       bset    IMM (31),d1
+12:    lsrl    IMM (1),d0
+       addl    IMM (1),d4
+#endif
+1:
+       lea     pc@(Ladddf$5),a0 | to return from rounding routine
+       PICLEA  SYM (_fpCCR),a1 | check the rounding mode
+#ifdef __mcoldfire__
+       clrl    d6
+#endif
+       movew   a1@(6),d6       | rounding mode in d6
+       beq     Lround$to$nearest
+#ifndef __mcoldfire__
+       cmpw    IMM (ROUND_TO_PLUS),d6
+#else
+       cmpl    IMM (ROUND_TO_PLUS),d6
+#endif
+       bhi     Lround$to$minus
+       blt     Lround$to$zero
+       bra     Lround$to$plus
+Ladddf$5:
+| Put back the exponent and check for overflow
+#ifndef __mcoldfire__
+       cmpw    IMM (0x7ff),d4  | is the exponent big?
+#else
+       cmpl    IMM (0x7ff),d4  | is the exponent big?
+#endif
+       bge     1f
+       bclr    IMM (DBL_MANT_DIG-1),d0
+#ifndef __mcoldfire__
+       lslw    IMM (4),d4      | put exponent back into position
+#else
+       lsll    IMM (4),d4      | put exponent back into position
+#endif
+       swap    d0              | 
+#ifndef __mcoldfire__
+       orw     d4,d0           |
+#else
+       orl     d4,d0           |
+#endif
+       swap    d0              |
+       bra     Ladddf$ret
+1:
+       moveq   IMM (ADD),d5
+       bra     Ld$overflow
+
+Lsubdf$0:
+| Here we do the subtraction.
+#ifndef __mcoldfire__
+       exg     d7,a0           | put sign back in a0
+       exg     d6,a3           |
+#else
+       movel   d7,a4
+       movel   a0,d7
+       movel   a4,a0
+       movel   d6,a4
+       movel   a3,d6
+       movel   a4,a3
+#endif
+       subl    d7,d3           |
+       subxl   d6,d2           |
+       subxl   d5,d1           |
+       subxl   d4,d0           |
+       beq     Ladddf$ret$1    | if zero just exit
+       bpl     1f              | if positive skip the following
+       movel   a0,d7           |
+       bchg    IMM (31),d7     | change sign bit in d7
+       movel   d7,a0           |
+       negl    d3              |
+       negxl   d2              |
+       negxl   d1              | and negate result
+       negxl   d0              |
+1:     
+       movel   a2,d4           | return exponent to d4
+       movel   a0,d7
+       andl    IMM (0x80000000),d7 | isolate sign bit
+#ifndef __mcoldfire__
+       moveml  sp@+,a2-a3      |
+#else
+       movel   sp@+,a4
+       movel   sp@+,a3
+       movel   sp@+,a2
+#endif
+
+| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider
+| the case of denormalized numbers in the rounding routine itself).
+| As in the addition (not in the subtraction!) we could have set 
+| one more bit we check this:
+       btst    IMM (DBL_MANT_DIG+1),d0 
+       beq     1f
+#ifndef __mcoldfire__
+       lsrl    IMM (1),d0
+       roxrl   IMM (1),d1
+       roxrl   IMM (1),d2
+       roxrl   IMM (1),d3
+       addw    IMM (1),d4
+#else
+       lsrl    IMM (1),d3
+       btst    IMM (0),d2
+       beq     10f
+       bset    IMM (31),d3
+10:    lsrl    IMM (1),d2
+       btst    IMM (0),d1
+       beq     11f
+       bset    IMM (31),d2
+11:    lsrl    IMM (1),d1
+       btst    IMM (0),d0
+       beq     12f
+       bset    IMM (31),d1
+12:    lsrl    IMM (1),d0
+       addl    IMM (1),d4
+#endif
+1:
+       lea     pc@(Lsubdf$1),a0 | to return from rounding routine
+       PICLEA  SYM (_fpCCR),a1 | check the rounding mode
+#ifdef __mcoldfire__
+       clrl    d6
+#endif
+       movew   a1@(6),d6       | rounding mode in d6
+       beq     Lround$to$nearest
+#ifndef __mcoldfire__
+       cmpw    IMM (ROUND_TO_PLUS),d6
+#else
+       cmpl    IMM (ROUND_TO_PLUS),d6
+#endif
+       bhi     Lround$to$minus
+       blt     Lround$to$zero
+       bra     Lround$to$plus
+Lsubdf$1:
+| Put back the exponent and sign (we don't have overflow). '
+       bclr    IMM (DBL_MANT_DIG-1),d0 
+#ifndef __mcoldfire__
+       lslw    IMM (4),d4      | put exponent back into position
+#else
+       lsll    IMM (4),d4      | put exponent back into position
+#endif
+       swap    d0              | 
+#ifndef __mcoldfire__
+       orw     d4,d0           |
+#else
+       orl     d4,d0           |
+#endif
+       swap    d0              |
+       bra     Ladddf$ret
+
+| If one of the numbers was too small (difference of exponents >= 
+| DBL_MANT_DIG+1) we return the other (and now we don't have to '
+| check for finiteness or zero).
+Ladddf$a$small:
+#ifndef __mcoldfire__
+       moveml  sp@+,a2-a3      
+#else
+       movel   sp@+,a4
+       movel   sp@+,a3
+       movel   sp@+,a2
+#endif
+       movel   a6@(16),d0
+       movel   a6@(20),d1
+       PICLEA  SYM (_fpCCR),a0
+       movew   IMM (0),a0@
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7      | restore data registers
+#else
+       moveml  sp@,d2-d7
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6              | and return
+       rts
+
+Ladddf$b$small:
+#ifndef __mcoldfire__
+       moveml  sp@+,a2-a3      
+#else
+       movel   sp@+,a4 
+       movel   sp@+,a3 
+       movel   sp@+,a2 
+#endif
+       movel   a6@(8),d0
+       movel   a6@(12),d1
+       PICLEA  SYM (_fpCCR),a0
+       movew   IMM (0),a0@
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7      | restore data registers
+#else
+       moveml  sp@,d2-d7
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6              | and return
+       rts
+
+Ladddf$a$den:
+       movel   d7,d4           | d7 contains 0x00200000
+       bra     Ladddf$1
+
+Ladddf$b$den:
+       movel   d7,d5           | d7 contains 0x00200000
+       notl    d6
+       bra     Ladddf$2
+
+Ladddf$b:
+| Return b (if a is zero)
+       movel   d2,d0
+       movel   d3,d1
+       bne     1f                      | Check if b is -0
+       cmpl    IMM (0x80000000),d0
+       bne     1f
+       andl    IMM (0x80000000),d7     | Use the sign of a
+       clrl    d0
+       bra     Ladddf$ret
+Ladddf$a:
+       movel   a6@(8),d0
+       movel   a6@(12),d1
+1:
+       moveq   IMM (ADD),d5
+| Check for NaN and +/-INFINITY.
+       movel   d0,d7                   |
+       andl    IMM (0x80000000),d7     |
+       bclr    IMM (31),d0             |
+       cmpl    IMM (0x7ff00000),d0     |
+       bge     2f                      |
+       movel   d0,d0                   | check for zero, since we don't  '
+       bne     Ladddf$ret              | want to return -0 by mistake
+       bclr    IMM (31),d7             |
+       bra     Ladddf$ret              |
+2:
+       andl    IMM (0x000fffff),d0     | check for NaN (nonzero fraction)
+       orl     d1,d0                   |
+       bne     Ld$inop                 |
+       bra     Ld$infty                |
+       
+Ladddf$ret$1:
+#ifndef __mcoldfire__
+       moveml  sp@+,a2-a3      | restore regs and exit
+#else
+       movel   sp@+,a4
+       movel   sp@+,a3
+       movel   sp@+,a2
+#endif
+
+Ladddf$ret:
+| Normal exit.
+       PICLEA  SYM (_fpCCR),a0
+       movew   IMM (0),a0@
+       orl     d7,d0           | put sign bit back
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7
+#else
+       moveml  sp@,d2-d7
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6
+       rts
+
+Ladddf$ret$den:
+| Return a denormalized number.
+#ifndef __mcoldfire__
+       lsrl    IMM (1),d0      | shift right once more
+       roxrl   IMM (1),d1      |
+#else
+       lsrl    IMM (1),d1
+       btst    IMM (0),d0
+       beq     10f
+       bset    IMM (31),d1
+10:    lsrl    IMM (1),d0
+#endif
+       bra     Ladddf$ret
+
+Ladddf$nf:
+       moveq   IMM (ADD),d5
+| This could be faster but it is not worth the effort, since it is not
+| executed very often. We sacrifice speed for clarity here.
+       movel   a6@(8),d0       | get the numbers back (remember that we
+       movel   a6@(12),d1      | did some processing already)
+       movel   a6@(16),d2      | 
+       movel   a6@(20),d3      | 
+       movel   IMM (0x7ff00000),d4 | useful constant (INFINITY)
+       movel   d0,d7           | save sign bits
+       movel   d2,d6           | 
+       bclr    IMM (31),d0     | clear sign bits
+       bclr    IMM (31),d2     | 
+| We know that one of them is either NaN of +/-INFINITY
+| Check for NaN (if either one is NaN return NaN)
+       cmpl    d4,d0           | check first a (d0)
+       bhi     Ld$inop         | if d0 > 0x7ff00000 or equal and
+       bne     2f
+       tstl    d1              | d1 > 0, a is NaN
+       bne     Ld$inop         | 
+2:     cmpl    d4,d2           | check now b (d1)
+       bhi     Ld$inop         | 
+       bne     3f
+       tstl    d3              | 
+       bne     Ld$inop         | 
+3:
+| Now comes the check for +/-INFINITY. We know that both are (maybe not
+| finite) numbers, but we have to check if both are infinite whether we
+| are adding or subtracting them.
+       eorl    d7,d6           | to check sign bits
+       bmi     1f
+       andl    IMM (0x80000000),d7 | get (common) sign bit
+       bra     Ld$infty
+1:
+| We know one (or both) are infinite, so we test for equality between the
+| two numbers (if they are equal they have to be infinite both, so we
+| return NaN).
+       cmpl    d2,d0           | are both infinite?
+       bne     1f              | if d0 <> d2 they are not equal
+       cmpl    d3,d1           | if d0 == d2 test d3 and d1
+       beq     Ld$inop         | if equal return NaN
+1:     
+       andl    IMM (0x80000000),d7 | get a's sign bit '
+       cmpl    d4,d0           | test now for infinity
+       beq     Ld$infty        | if a is INFINITY return with this sign
+       bchg    IMM (31),d7     | else we know b is INFINITY and has
+       bra     Ld$infty        | the opposite sign
+
+|=============================================================================
+|                              __muldf3
+|=============================================================================
+
+| double __muldf3(double, double);
+       FUNC(__muldf3)
+SYM (__muldf3):
+#ifndef __mcoldfire__
+       link    a6,IMM (0)
+       moveml  d2-d7,sp@-
+#else
+       link    a6,IMM (-24)
+       moveml  d2-d7,sp@
+#endif
+       movel   a6@(8),d0               | get a into d0-d1
+       movel   a6@(12),d1              | 
+       movel   a6@(16),d2              | and b into d2-d3
+       movel   a6@(20),d3              |
+       movel   d0,d7                   | d7 will hold the sign of the product
+       eorl    d2,d7                   |
+       andl    IMM (0x80000000),d7     |
+       movel   d7,a0                   | save sign bit into a0 
+       movel   IMM (0x7ff00000),d7     | useful constant (+INFINITY)
+       movel   d7,d6                   | another (mask for fraction)
+       notl    d6                      |
+       bclr    IMM (31),d0             | get rid of a's sign bit '
+       movel   d0,d4                   | 
+       orl     d1,d4                   | 
+       beq     Lmuldf$a$0              | branch if a is zero
+       movel   d0,d4                   |
+       bclr    IMM (31),d2             | get rid of b's sign bit '
+       movel   d2,d5                   |
+       orl     d3,d5                   | 
+       beq     Lmuldf$b$0              | branch if b is zero
+       movel   d2,d5                   | 
+       cmpl    d7,d0                   | is a big?
+       bhi     Lmuldf$inop             | if a is NaN return NaN
+       beq     Lmuldf$a$nf             | we still have to check d1 and b ...
+       cmpl    d7,d2                   | now compare b with INFINITY
+       bhi     Lmuldf$inop             | is b NaN?
+       beq     Lmuldf$b$nf             | we still have to check d3 ...
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d4 and d5.
+       andl    d7,d4                   | isolate exponent in d4
+       beq     Lmuldf$a$den            | if exponent zero, have denormalized
+       andl    d6,d0                   | isolate fraction
+       orl     IMM (0x00100000),d0     | and put hidden bit back
+       swap    d4                      | I like exponents in the first byte
+#ifndef __mcoldfire__
+       lsrw    IMM (4),d4              | 
+#else
+       lsrl    IMM (4),d4              | 
+#endif
+Lmuldf$1:                      
+       andl    d7,d5                   |
+       beq     Lmuldf$b$den            |
+       andl    d6,d2                   |
+       orl     IMM (0x00100000),d2     | and put hidden bit back
+       swap    d5                      |
+#ifndef __mcoldfire__
+       lsrw    IMM (4),d5              |
+#else
+       lsrl    IMM (4),d5              |
+#endif
+Lmuldf$2:                              |
+#ifndef __mcoldfire__
+       addw    d5,d4                   | add exponents
+       subw    IMM (D_BIAS+1),d4       | and subtract bias (plus one)
+#else
+       addl    d5,d4                   | add exponents
+       subl    IMM (D_BIAS+1),d4       | and subtract bias (plus one)
+#endif
+
+| We are now ready to do the multiplication. The situation is as follows:
+| both a and b have bit 52 ( bit 20 of d0 and d2) set (even if they were 
+| denormalized to start with!), which means that in the product bit 104 
+| (which will correspond to bit 8 of the fourth long) is set.
+
+| Here we have to do the product.
+| To do it we have to juggle the registers back and forth, as there are not
+| enough to keep everything in them. So we use the address registers to keep
+| some intermediate data.
+
+#ifndef __mcoldfire__
+       moveml  a2-a3,sp@-      | save a2 and a3 for temporary use
+#else
+       movel   a2,sp@-
+       movel   a3,sp@-
+       movel   a4,sp@-
+#endif
+       movel   IMM (0),a2      | a2 is a null register
+       movel   d4,a3           | and a3 will preserve the exponent
+
+| First, shift d2-d3 so bit 20 becomes bit 31:
+#ifndef __mcoldfire__
+       rorl    IMM (5),d2      | rotate d2 5 places right
+       swap    d2              | and swap it
+       rorl    IMM (5),d3      | do the same thing with d3
+       swap    d3              |
+       movew   d3,d6           | get the rightmost 11 bits of d3
+       andw    IMM (0x07ff),d6 |
+       orw     d6,d2           | and put them into d2
+       andw    IMM (0xf800),d3 | clear those bits in d3
+#else
+       moveq   IMM (11),d7     | left shift d2 11 bits
+       lsll    d7,d2
+       movel   d3,d6           | get a copy of d3
+       lsll    d7,d3           | left shift d3 11 bits
+       andl    IMM (0xffe00000),d6 | get the top 11 bits of d3
+       moveq   IMM (21),d7     | right shift them 21 bits
+       lsrl    d7,d6
+       orl     d6,d2           | stick them at the end of d2
+#endif
+
+       movel   d2,d6           | move b into d6-d7
+       movel   d3,d7           | move a into d4-d5
+       movel   d0,d4           | and clear d0-d1-d2-d3 (to put result)
+       movel   d1,d5           |
+       movel   IMM (0),d3      |
+       movel   d3,d2           |
+       movel   d3,d1           |
+       movel   d3,d0           |
+
+| We use a1 as counter:        
+       movel   IMM (DBL_MANT_DIG-1),a1         
+#ifndef __mcoldfire__
+       exg     d7,a1
+#else
+       movel   d7,a4
+       movel   a1,d7
+       movel   a4,a1
+#endif
+
+1:
+#ifndef __mcoldfire__
+       exg     d7,a1           | put counter back in a1
+#else
+       movel   d7,a4
+       movel   a1,d7
+       movel   a4,a1
+#endif
+       addl    d3,d3           | shift sum once left
+       addxl   d2,d2           |
+       addxl   d1,d1           |
+       addxl   d0,d0           |
+       addl    d7,d7           |
+       addxl   d6,d6           |
+       bcc     2f              | if bit clear skip the following
+#ifndef __mcoldfire__
+       exg     d7,a2           |
+#else
+       movel   d7,a4
+       movel   a2,d7
+       movel   a4,a2
+#endif
+       addl    d5,d3           | else add a to the sum
+       addxl   d4,d2           |
+       addxl   d7,d1           |
+       addxl   d7,d0           |
+#ifndef __mcoldfire__
+       exg     d7,a2           | 
+#else
+       movel   d7,a4
+       movel   a2,d7
+       movel   a4,a2
+#endif
+2:
+#ifndef __mcoldfire__
+       exg     d7,a1           | put counter in d7
+       dbf     d7,1b           | decrement and branch
+#else
+       movel   d7,a4
+       movel   a1,d7
+       movel   a4,a1
+       subql   IMM (1),d7
+       bpl     1b
+#endif
+
+       movel   a3,d4           | restore exponent
+#ifndef __mcoldfire__
+       moveml  sp@+,a2-a3
+#else
+       movel   sp@+,a4
+       movel   sp@+,a3
+       movel   sp@+,a2
+#endif
+
+| Now we have the product in d0-d1-d2-d3, with bit 8 of d0 set. The 
+| first thing to do now is to normalize it so bit 8 becomes bit 
+| DBL_MANT_DIG-32 (to do the rounding); later we will shift right.
+       swap    d0
+       swap    d1
+       movew   d1,d0
+       swap    d2
+       movew   d2,d1
+       swap    d3
+       movew   d3,d2
+       movew   IMM (0),d3
+#ifndef __mcoldfire__
+       lsrl    IMM (1),d0
+       roxrl   IMM (1),d1
+       roxrl   IMM (1),d2
+       roxrl   IMM (1),d3
+       lsrl    IMM (1),d0
+       roxrl   IMM (1),d1
+       roxrl   IMM (1),d2
+       roxrl   IMM (1),d3
+       lsrl    IMM (1),d0
+       roxrl   IMM (1),d1
+       roxrl   IMM (1),d2
+       roxrl   IMM (1),d3
+#else
+       moveq   IMM (29),d6
+       lsrl    IMM (3),d3
+       movel   d2,d7
+       lsll    d6,d7
+       orl     d7,d3
+       lsrl    IMM (3),d2
+       movel   d1,d7
+       lsll    d6,d7
+       orl     d7,d2
+       lsrl    IMM (3),d1
+       movel   d0,d7
+       lsll    d6,d7
+       orl     d7,d1
+       lsrl    IMM (3),d0
+#endif
+       
+| Now round, check for over- and underflow, and exit.
+       movel   a0,d7           | get sign bit back into d7
+       moveq   IMM (MULTIPLY),d5
+
+       btst    IMM (DBL_MANT_DIG+1-32),d0
+       beq     Lround$exit
+#ifndef __mcoldfire__
+       lsrl    IMM (1),d0
+       roxrl   IMM (1),d1
+       addw    IMM (1),d4
+#else
+       lsrl    IMM (1),d1
+       btst    IMM (0),d0
+       beq     10f
+       bset    IMM (31),d1
+10:    lsrl    IMM (1),d0
+       addl    IMM (1),d4
+#endif
+       bra     Lround$exit
+
+Lmuldf$inop:
+       moveq   IMM (MULTIPLY),d5
+       bra     Ld$inop
+
+Lmuldf$b$nf:
+       moveq   IMM (MULTIPLY),d5
+       movel   a0,d7           | get sign bit back into d7
+       tstl    d3              | we know d2 == 0x7ff00000, so check d3
+       bne     Ld$inop         | if d3 <> 0 b is NaN
+       bra     Ld$overflow     | else we have overflow (since a is finite)
+
+Lmuldf$a$nf:
+       moveq   IMM (MULTIPLY),d5
+       movel   a0,d7           | get sign bit back into d7
+       tstl    d1              | we know d0 == 0x7ff00000, so check d1
+       bne     Ld$inop         | if d1 <> 0 a is NaN
+       bra     Ld$overflow     | else signal overflow
+
+| If either number is zero return zero, unless the other is +/-INFINITY or
+| NaN, in which case we return NaN.
+Lmuldf$b$0:
+       moveq   IMM (MULTIPLY),d5
+#ifndef __mcoldfire__
+       exg     d2,d0           | put b (==0) into d0-d1
+       exg     d3,d1           | and a (with sign bit cleared) into d2-d3
+       movel   a0,d0           | set result sign
+#else
+       movel   d0,d2           | put a into d2-d3
+       movel   d1,d3
+       movel   a0,d0           | put result zero into d0-d1
+       movq    IMM(0),d1
+#endif
+       bra     1f
+Lmuldf$a$0:
+       movel   a0,d0           | set result sign
+       movel   a6@(16),d2      | put b into d2-d3 again
+       movel   a6@(20),d3      |
+       bclr    IMM (31),d2     | clear sign bit
+1:     cmpl    IMM (0x7ff00000),d2 | check for non-finiteness
+       bge     Ld$inop         | in case NaN or +/-INFINITY return NaN
+       PICLEA  SYM (_fpCCR),a0
+       movew   IMM (0),a0@
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7
+#else
+       moveml  sp@,d2-d7
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6
+       rts
+
+| If a number is denormalized we put an exponent of 1 but do not put the 
+| hidden bit back into the fraction; instead we shift left until bit 21
+| (the hidden bit) is set, adjusting the exponent accordingly. We do this
+| to ensure that the product of the fractions is close to 1.
+Lmuldf$a$den:
+       movel   IMM (1),d4
+       andl    d6,d0
+1:     addl    d1,d1           | shift a left until bit 20 is set
+       addxl   d0,d0           |
+#ifndef __mcoldfire__
+       subw    IMM (1),d4      | and adjust exponent
+#else
+       subl    IMM (1),d4      | and adjust exponent
+#endif
+       btst    IMM (20),d0     |
+       bne     Lmuldf$1        |
+       bra     1b
+
+Lmuldf$b$den:
+       movel   IMM (1),d5
+       andl    d6,d2
+1:     addl    d3,d3           | shift b left until bit 20 is set
+       addxl   d2,d2           |
+#ifndef __mcoldfire__
+       subw    IMM (1),d5      | and adjust exponent
+#else
+       subql   IMM (1),d5      | and adjust exponent
+#endif
+       btst    IMM (20),d2     |
+       bne     Lmuldf$2        |
+       bra     1b
+
+
+|=============================================================================
+|                              __divdf3
+|=============================================================================
+
+| double __divdf3(double, double);
+       FUNC(__divdf3)
+SYM (__divdf3):
+#ifndef __mcoldfire__
+       link    a6,IMM (0)
+       moveml  d2-d7,sp@-
+#else
+       link    a6,IMM (-24)
+       moveml  d2-d7,sp@
+#endif
+       movel   a6@(8),d0       | get a into d0-d1
+       movel   a6@(12),d1      | 
+       movel   a6@(16),d2      | and b into d2-d3
+       movel   a6@(20),d3      |
+       movel   d0,d7           | d7 will hold the sign of the result
+       eorl    d2,d7           |
+       andl    IMM (0x80000000),d7
+       movel   d7,a0           | save sign into a0
+       movel   IMM (0x7ff00000),d7 | useful constant (+INFINITY)
+       movel   d7,d6           | another (mask for fraction)
+       notl    d6              |
+       bclr    IMM (31),d0     | get rid of a's sign bit '
+       movel   d0,d4           |
+       orl     d1,d4           |
+       beq     Ldivdf$a$0      | branch if a is zero
+       movel   d0,d4           |
+       bclr    IMM (31),d2     | get rid of b's sign bit '
+       movel   d2,d5           |
+       orl     d3,d5           |
+       beq     Ldivdf$b$0      | branch if b is zero
+       movel   d2,d5
+       cmpl    d7,d0           | is a big?
+       bhi     Ldivdf$inop     | if a is NaN return NaN
+       beq     Ldivdf$a$nf     | if d0 == 0x7ff00000 we check d1
+       cmpl    d7,d2           | now compare b with INFINITY 
+       bhi     Ldivdf$inop     | if b is NaN return NaN
+       beq     Ldivdf$b$nf     | if d2 == 0x7ff00000 we check d3
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d4 and d5 and normalize the numbers to
+| ensure that the ratio of the fractions is around 1. We do this by
+| making sure that both numbers have bit #DBL_MANT_DIG-32-1 (hidden bit)
+| set, even if they were denormalized to start with.
+| Thus, the result will satisfy: 2 > result > 1/2.
+       andl    d7,d4           | and isolate exponent in d4
+       beq     Ldivdf$a$den    | if exponent is zero we have a denormalized
+       andl    d6,d0           | and isolate fraction
+       orl     IMM (0x00100000),d0 | and put hidden bit back
+       swap    d4              | I like exponents in the first byte
+#ifndef __mcoldfire__
+       lsrw    IMM (4),d4      | 
+#else
+       lsrl    IMM (4),d4      | 
+#endif
+Ldivdf$1:                      | 
+       andl    d7,d5           |
+       beq     Ldivdf$b$den    |
+       andl    d6,d2           |
+       orl     IMM (0x00100000),d2
+       swap    d5              |
+#ifndef __mcoldfire__
+       lsrw    IMM (4),d5      |
+#else
+       lsrl    IMM (4),d5      |
+#endif
+Ldivdf$2:                      |
+#ifndef __mcoldfire__
+       subw    d5,d4           | subtract exponents
+       addw    IMM (D_BIAS),d4 | and add bias
+#else
+       subl    d5,d4           | subtract exponents
+       addl    IMM (D_BIAS),d4 | and add bias
+#endif
+
+| We are now ready to do the division. We have prepared things in such a way
+| that the ratio of the fractions will be less than 2 but greater than 1/2.
+| At this point the registers in use are:
+| d0-d1        hold a (first operand, bit DBL_MANT_DIG-32=0, bit 
+| DBL_MANT_DIG-1-32=1)
+| d2-d3        hold b (second operand, bit DBL_MANT_DIG-32=1)
+| d4   holds the difference of the exponents, corrected by the bias
+| a0   holds the sign of the ratio
+
+| To do the rounding correctly we need to keep information about the
+| nonsignificant bits. One way to do this would be to do the division
+| using four registers; another is to use two registers (as originally
+| I did), but use a sticky bit to preserve information about the 
+| fractional part. Note that we can keep that info in a1, which is not
+| used.
+       movel   IMM (0),d6      | d6-d7 will hold the result
+       movel   d6,d7           | 
+       movel   IMM (0),a1      | and a1 will hold the sticky bit
+
+       movel   IMM (DBL_MANT_DIG-32+1),d5      
+       
+1:     cmpl    d0,d2           | is a < b?
+       bhi     3f              | if b > a skip the following
+       beq     4f              | if d0==d2 check d1 and d3
+2:     subl    d3,d1           | 
+       subxl   d2,d0           | a <-- a - b
+       bset    d5,d6           | set the corresponding bit in d6
+3:     addl    d1,d1           | shift a by 1
+       addxl   d0,d0           |
+#ifndef __mcoldfire__
+       dbra    d5,1b           | and branch back
+#else
+       subql   IMM (1), d5
+       bpl     1b
+#endif
+       bra     5f                      
+4:     cmpl    d1,d3           | here d0==d2, so check d1 and d3
+       bhi     3b              | if d1 > d2 skip the subtraction
+       bra     2b              | else go do it
+5:
+| Here we have to start setting the bits in the second long.
+       movel   IMM (31),d5     | again d5 is counter
+
+1:     cmpl    d0,d2           | is a < b?
+       bhi     3f              | if b > a skip the following
+       beq     4f              | if d0==d2 check d1 and d3
+2:     subl    d3,d1           | 
+       subxl   d2,d0           | a <-- a - b
+       bset    d5,d7           | set the corresponding bit in d7
+3:     addl    d1,d1           | shift a by 1
+       addxl   d0,d0           |
+#ifndef __mcoldfire__
+       dbra    d5,1b           | and branch back
+#else
+       subql   IMM (1), d5
+       bpl     1b
+#endif
+       bra     5f                      
+4:     cmpl    d1,d3           | here d0==d2, so check d1 and d3
+       bhi     3b              | if d1 > d2 skip the subtraction
+       bra     2b              | else go do it
+5:
+| Now go ahead checking until we hit a one, which we store in d2.
+       movel   IMM (DBL_MANT_DIG),d5
+1:     cmpl    d2,d0           | is a < b?
+       bhi     4f              | if b < a, exit
+       beq     3f              | if d0==d2 check d1 and d3
+2:     addl    d1,d1           | shift a by 1
+       addxl   d0,d0           |
+#ifndef __mcoldfire__
+       dbra    d5,1b           | and branch back
+#else
+       subql   IMM (1), d5
+       bpl     1b
+#endif
+       movel   IMM (0),d2      | here no sticky bit was found
+       movel   d2,d3
+       bra     5f                      
+3:     cmpl    d1,d3           | here d0==d2, so check d1 and d3
+       bhi     2b              | if d1 > d2 go back
+4:
+| Here put the sticky bit in d2-d3 (in the position which actually corresponds
+| to it; if you don't do this the algorithm loses in some cases). '
+       movel   IMM (0),d2
+       movel   d2,d3
+#ifndef __mcoldfire__
+       subw    IMM (DBL_MANT_DIG),d5
+       addw    IMM (63),d5
+       cmpw    IMM (31),d5
+#else
+       subl    IMM (DBL_MANT_DIG),d5
+       addl    IMM (63),d5
+       cmpl    IMM (31),d5
+#endif
+       bhi     2f
+1:     bset    d5,d3
+       bra     5f
+#ifndef __mcoldfire__
+       subw    IMM (32),d5
+#else
+       subl    IMM (32),d5
+#endif
+2:     bset    d5,d2
+5:
+| Finally we are finished! Move the longs in the address registers to
+| their final destination:
+       movel   d6,d0
+       movel   d7,d1
+       movel   IMM (0),d3
+
+| Here we have finished the division, with the result in d0-d1-d2-d3, with
+| 2^21 <= d6 < 2^23. Thus bit 23 is not set, but bit 22 could be set.
+| If it is not, then definitely bit 21 is set. Normalize so bit 22 is
+| not set:
+       btst    IMM (DBL_MANT_DIG-32+1),d0
+       beq     1f
+#ifndef __mcoldfire__
+       lsrl    IMM (1),d0
+       roxrl   IMM (1),d1
+       roxrl   IMM (1),d2
+       roxrl   IMM (1),d3
+       addw    IMM (1),d4
+#else
+       lsrl    IMM (1),d3
+       btst    IMM (0),d2
+       beq     10f
+       bset    IMM (31),d3
+10:    lsrl    IMM (1),d2
+       btst    IMM (0),d1
+       beq     11f
+       bset    IMM (31),d2
+11:    lsrl    IMM (1),d1
+       btst    IMM (0),d0
+       beq     12f
+       bset    IMM (31),d1
+12:    lsrl    IMM (1),d0
+       addl    IMM (1),d4
+#endif
+1:
+| Now round, check for over- and underflow, and exit.
+       movel   a0,d7           | restore sign bit to d7
+       moveq   IMM (DIVIDE),d5
+       bra     Lround$exit
+
+Ldivdf$inop:
+       moveq   IMM (DIVIDE),d5
+       bra     Ld$inop
+
+Ldivdf$a$0:
+| If a is zero check to see whether b is zero also. In that case return
+| NaN; then check if b is NaN, and return NaN also in that case. Else
+| return a properly signed zero.
+       moveq   IMM (DIVIDE),d5
+       bclr    IMM (31),d2     |
+       movel   d2,d4           | 
+       orl     d3,d4           | 
+       beq     Ld$inop         | if b is also zero return NaN
+       cmpl    IMM (0x7ff00000),d2 | check for NaN
+       bhi     Ld$inop         | 
+       blt     1f              |
+       tstl    d3              |
+       bne     Ld$inop         |
+1:     movel   a0,d0           | else return signed zero
+       moveq   IMM(0),d1       | 
+       PICLEA  SYM (_fpCCR),a0 | clear exception flags
+       movew   IMM (0),a0@     |
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7      | 
+#else
+       moveml  sp@,d2-d7       | 
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6              | 
+       rts                     |       
+
+Ldivdf$b$0:
+       moveq   IMM (DIVIDE),d5
+| If we got here a is not zero. Check if a is NaN; in that case return NaN,
+| else return +/-INFINITY. Remember that a is in d0 with the sign bit 
+| cleared already.
+       movel   a0,d7           | put a's sign bit back in d7 '
+       cmpl    IMM (0x7ff00000),d0 | compare d0 with INFINITY
+       bhi     Ld$inop         | if larger it is NaN
+       tstl    d1              | 
+       bne     Ld$inop         | 
+       bra     Ld$div$0        | else signal DIVIDE_BY_ZERO
+
+Ldivdf$b$nf:
+       moveq   IMM (DIVIDE),d5
+| If d2 == 0x7ff00000 we have to check d3.
+       tstl    d3              |
+       bne     Ld$inop         | if d3 <> 0, b is NaN
+       bra     Ld$underflow    | else b is +/-INFINITY, so signal underflow
+
+Ldivdf$a$nf:
+       moveq   IMM (DIVIDE),d5
+| If d0 == 0x7ff00000 we have to check d1.
+       tstl    d1              |
+       bne     Ld$inop         | if d1 <> 0, a is NaN
+| If a is INFINITY we have to check b
+       cmpl    d7,d2           | compare b with INFINITY 
+       bge     Ld$inop         | if b is NaN or INFINITY return NaN
+       tstl    d3              |
+       bne     Ld$inop         | 
+       bra     Ld$overflow     | else return overflow
+
+| If a number is denormalized we put an exponent of 1 but do not put the 
+| bit back into the fraction.
+Ldivdf$a$den:
+       movel   IMM (1),d4
+       andl    d6,d0
+1:     addl    d1,d1           | shift a left until bit 20 is set
+       addxl   d0,d0
+#ifndef __mcoldfire__
+       subw    IMM (1),d4      | and adjust exponent
+#else
+       subl    IMM (1),d4      | and adjust exponent
+#endif
+       btst    IMM (DBL_MANT_DIG-32-1),d0
+       bne     Ldivdf$1
+       bra     1b
+
+Ldivdf$b$den:
+       movel   IMM (1),d5
+       andl    d6,d2
+1:     addl    d3,d3           | shift b left until bit 20 is set
+       addxl   d2,d2
+#ifndef __mcoldfire__
+       subw    IMM (1),d5      | and adjust exponent
+#else
+       subql   IMM (1),d5      | and adjust exponent
+#endif
+       btst    IMM (DBL_MANT_DIG-32-1),d2
+       bne     Ldivdf$2
+       bra     1b
+
+Lround$exit:
+| This is a common exit point for __muldf3 and __divdf3. When they enter
+| this point the sign of the result is in d7, the result in d0-d1, normalized
+| so that 2^21 <= d0 < 2^22, and the exponent is in the lower byte of d4.
+
+| First check for underlow in the exponent:
+#ifndef __mcoldfire__
+       cmpw    IMM (-DBL_MANT_DIG-1),d4                
+#else
+       cmpl    IMM (-DBL_MANT_DIG-1),d4                
+#endif
+       blt     Ld$underflow    
+| It could happen that the exponent is less than 1, in which case the 
+| number is denormalized. In this case we shift right and adjust the 
+| exponent until it becomes 1 or the fraction is zero (in the latter case 
+| we signal underflow and return zero).
+       movel   d7,a0           |
+       movel   IMM (0),d6      | use d6-d7 to collect bits flushed right
+       movel   d6,d7           | use d6-d7 to collect bits flushed right
+#ifndef __mcoldfire__
+       cmpw    IMM (1),d4      | if the exponent is less than 1 we 
+#else
+       cmpl    IMM (1),d4      | if the exponent is less than 1 we 
+#endif
+       bge     2f              | have to shift right (denormalize)
+1:
+#ifndef __mcoldfire__
+       addw    IMM (1),d4      | adjust the exponent
+       lsrl    IMM (1),d0      | shift right once 
+       roxrl   IMM (1),d1      |
+       roxrl   IMM (1),d2      |
+       roxrl   IMM (1),d3      |
+       roxrl   IMM (1),d6      | 
+       roxrl   IMM (1),d7      |
+       cmpw    IMM (1),d4      | is the exponent 1 already?
+#else
+       addl    IMM (1),d4      | adjust the exponent
+       lsrl    IMM (1),d7
+       btst    IMM (0),d6
+       beq     13f
+       bset    IMM (31),d7
+13:    lsrl    IMM (1),d6
+       btst    IMM (0),d3
+       beq     14f
+       bset    IMM (31),d6
+14:    lsrl    IMM (1),d3
+       btst    IMM (0),d2
+       beq     10f
+       bset    IMM (31),d3
+10:    lsrl    IMM (1),d2
+       btst    IMM (0),d1
+       beq     11f
+       bset    IMM (31),d2
+11:    lsrl    IMM (1),d1
+       btst    IMM (0),d0
+       beq     12f
+       bset    IMM (31),d1
+12:    lsrl    IMM (1),d0
+       cmpl    IMM (1),d4      | is the exponent 1 already?
+#endif
+       beq     2f              | if not loop back
+       bra     1b              |
+       bra     Ld$underflow    | safety check, shouldn't execute '
+2:     orl     d6,d2           | this is a trick so we don't lose  '
+       orl     d7,d3           | the bits which were flushed right
+       movel   a0,d7           | get back sign bit into d7
+| Now call the rounding routine (which takes care of denormalized numbers):
+       lea     pc@(Lround$0),a0 | to return from rounding routine
+       PICLEA  SYM (_fpCCR),a1 | check the rounding mode
+#ifdef __mcoldfire__
+       clrl    d6
+#endif
+       movew   a1@(6),d6       | rounding mode in d6
+       beq     Lround$to$nearest
+#ifndef __mcoldfire__
+       cmpw    IMM (ROUND_TO_PLUS),d6
+#else
+       cmpl    IMM (ROUND_TO_PLUS),d6
+#endif
+       bhi     Lround$to$minus
+       blt     Lround$to$zero
+       bra     Lround$to$plus
+Lround$0:
+| Here we have a correctly rounded result (either normalized or denormalized).
+
+| Here we should have either a normalized number or a denormalized one, and
+| the exponent is necessarily larger or equal to 1 (so we don't have to  '
+| check again for underflow!). We have to check for overflow or for a 
+| denormalized number (which also signals underflow).
+| Check for overflow (i.e., exponent >= 0x7ff).
+#ifndef __mcoldfire__
+       cmpw    IMM (0x07ff),d4
+#else
+       cmpl    IMM (0x07ff),d4
+#endif
+       bge     Ld$overflow
+| Now check for a denormalized number (exponent==0):
+       movew   d4,d4
+       beq     Ld$den
+1:
+| Put back the exponents and sign and return.
+#ifndef __mcoldfire__
+       lslw    IMM (4),d4      | exponent back to fourth byte
+#else
+       lsll    IMM (4),d4      | exponent back to fourth byte
+#endif
+       bclr    IMM (DBL_MANT_DIG-32-1),d0
+       swap    d0              | and put back exponent
+#ifndef __mcoldfire__
+       orw     d4,d0           | 
+#else
+       orl     d4,d0           | 
+#endif
+       swap    d0              |
+       orl     d7,d0           | and sign also
+
+       PICLEA  SYM (_fpCCR),a0
+       movew   IMM (0),a0@
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7
+#else
+       moveml  sp@,d2-d7
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6
+       rts
+
+|=============================================================================
+|                              __negdf2
+|=============================================================================
+
+| double __negdf2(double, double);
+       FUNC(__negdf2)
+SYM (__negdf2):
+#ifndef __mcoldfire__
+       link    a6,IMM (0)
+       moveml  d2-d7,sp@-
+#else
+       link    a6,IMM (-24)
+       moveml  d2-d7,sp@
+#endif
+       moveq   IMM (NEGATE),d5
+       movel   a6@(8),d0       | get number to negate in d0-d1
+       movel   a6@(12),d1      |
+       bchg    IMM (31),d0     | negate
+       movel   d0,d2           | make a positive copy (for the tests)
+       bclr    IMM (31),d2     |
+       movel   d2,d4           | check for zero
+       orl     d1,d4           |
+       beq     2f              | if zero (either sign) return +zero
+       cmpl    IMM (0x7ff00000),d2 | compare to +INFINITY
+       blt     1f              | if finite, return
+       bhi     Ld$inop         | if larger (fraction not zero) is NaN
+       tstl    d1              | if d2 == 0x7ff00000 check d1
+       bne     Ld$inop         |
+       movel   d0,d7           | else get sign and return INFINITY
+       andl    IMM (0x80000000),d7
+       bra     Ld$infty                
+1:     PICLEA  SYM (_fpCCR),a0
+       movew   IMM (0),a0@
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7
+#else
+       moveml  sp@,d2-d7
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6
+       rts
+2:     bclr    IMM (31),d0
+       bra     1b
+
+|=============================================================================
+|                              __cmpdf2
+|=============================================================================
+
+GREATER =  1
+LESS    = -1
+EQUAL   =  0
+
+| int __cmpdf2_internal(double, double, int);
+SYM (__cmpdf2_internal):
+#ifndef __mcoldfire__
+       link    a6,IMM (0)
+       moveml  d2-d7,sp@-      | save registers
+#else
+       link    a6,IMM (-24)
+       moveml  d2-d7,sp@
+#endif
+       moveq   IMM (COMPARE),d5
+       movel   a6@(8),d0       | get first operand
+       movel   a6@(12),d1      |
+       movel   a6@(16),d2      | get second operand
+       movel   a6@(20),d3      |
+| First check if a and/or b are (+/-) zero and in that case clear
+| the sign bit.
+       movel   d0,d6           | copy signs into d6 (a) and d7(b)
+       bclr    IMM (31),d0     | and clear signs in d0 and d2
+       movel   d2,d7           |
+       bclr    IMM (31),d2     |
+       cmpl    IMM (0x7ff00000),d0 | check for a == NaN
+       bhi     Lcmpd$inop              | if d0 > 0x7ff00000, a is NaN
+       beq     Lcmpdf$a$nf     | if equal can be INFINITY, so check d1
+       movel   d0,d4           | copy into d4 to test for zero
+       orl     d1,d4           |
+       beq     Lcmpdf$a$0      |
+Lcmpdf$0:
+       cmpl    IMM (0x7ff00000),d2 | check for b == NaN
+       bhi     Lcmpd$inop              | if d2 > 0x7ff00000, b is NaN
+       beq     Lcmpdf$b$nf     | if equal can be INFINITY, so check d3
+       movel   d2,d4           |
+       orl     d3,d4           |
+       beq     Lcmpdf$b$0      |
+Lcmpdf$1:
+| Check the signs
+       eorl    d6,d7
+       bpl     1f
+| If the signs are not equal check if a >= 0
+       tstl    d6
+       bpl     Lcmpdf$a$gt$b   | if (a >= 0 && b < 0) => a > b
+       bmi     Lcmpdf$b$gt$a   | if (a < 0 && b >= 0) => a < b
+1:
+| If the signs are equal check for < 0
+       tstl    d6
+       bpl     1f
+| If both are negative exchange them
+#ifndef __mcoldfire__
+       exg     d0,d2
+       exg     d1,d3
+#else
+       movel   d0,d7
+       movel   d2,d0
+       movel   d7,d2
+       movel   d1,d7
+       movel   d3,d1
+       movel   d7,d3
+#endif
+1:
+| Now that they are positive we just compare them as longs (does this also
+| work for denormalized numbers?).
+       cmpl    d0,d2
+       bhi     Lcmpdf$b$gt$a   | |b| > |a|
+       bne     Lcmpdf$a$gt$b   | |b| < |a|
+| If we got here d0 == d2, so we compare d1 and d3.
+       cmpl    d1,d3
+       bhi     Lcmpdf$b$gt$a   | |b| > |a|
+       bne     Lcmpdf$a$gt$b   | |b| < |a|
+| If we got here a == b.
+       movel   IMM (EQUAL),d0
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7      | put back the registers
+#else
+       moveml  sp@,d2-d7
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6
+       rts
+Lcmpdf$a$gt$b:
+       movel   IMM (GREATER),d0
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7      | put back the registers
+#else
+       moveml  sp@,d2-d7
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6
+       rts
+Lcmpdf$b$gt$a:
+       movel   IMM (LESS),d0
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7      | put back the registers
+#else
+       moveml  sp@,d2-d7
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6
+       rts
+
+Lcmpdf$a$0:    
+       bclr    IMM (31),d6
+       bra     Lcmpdf$0
+Lcmpdf$b$0:
+       bclr    IMM (31),d7
+       bra     Lcmpdf$1
+
+Lcmpdf$a$nf:
+       tstl    d1
+       bne     Ld$inop
+       bra     Lcmpdf$0
+
+Lcmpdf$b$nf:
+       tstl    d3
+       bne     Ld$inop
+       bra     Lcmpdf$1
+
+Lcmpd$inop:
+       movl    a6@(24),d0
+       moveq   IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+       moveq   IMM (DOUBLE_FLOAT),d6
+       PICJUMP $_exception_handler
+
+| int __cmpdf2(double, double);
+       FUNC(__cmpdf2)
+SYM (__cmpdf2):
+       link    a6,IMM (0)
+       pea     1
+       movl    a6@(20),sp@-
+       movl    a6@(16),sp@-
+       movl    a6@(12),sp@-
+       movl    a6@(8),sp@-
+       PICCALL SYM (__cmpdf2_internal)
+       unlk    a6
+       rts
+
+|=============================================================================
+|                           rounding routines
+|=============================================================================
+
+| The rounding routines expect the number to be normalized in registers
+| d0-d1-d2-d3, with the exponent in register d4. They assume that the 
+| exponent is larger or equal to 1. They return a properly normalized number
+| if possible, and a denormalized number otherwise. The exponent is returned
+| in d4.
+
+Lround$to$nearest:
+| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"):
+| Here we assume that the exponent is not too small (this should be checked
+| before entering the rounding routine), but the number could be denormalized.
+
+| Check for denormalized numbers:
+1:     btst    IMM (DBL_MANT_DIG-32),d0
+       bne     2f              | if set the number is normalized
+| Normalize shifting left until bit #DBL_MANT_DIG-32 is set or the exponent 
+| is one (remember that a denormalized number corresponds to an 
+| exponent of -D_BIAS+1).
+#ifndef __mcoldfire__
+       cmpw    IMM (1),d4      | remember that the exponent is at least one
+#else
+       cmpl    IMM (1),d4      | remember that the exponent is at least one
+#endif
+       beq     2f              | an exponent of one means denormalized
+       addl    d3,d3           | else shift and adjust the exponent
+       addxl   d2,d2           |
+       addxl   d1,d1           |
+       addxl   d0,d0           |
+#ifndef __mcoldfire__
+       dbra    d4,1b           |
+#else
+       subql   IMM (1), d4
+       bpl     1b
+#endif
+2:
+| Now round: we do it as follows: after the shifting we can write the
+| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2.
+| If delta < 1, do nothing. If delta > 1, add 1 to f. 
+| If delta == 1, we make sure the rounded number will be even (odd?) 
+| (after shifting).
+       btst    IMM (0),d1      | is delta < 1?
+       beq     2f              | if so, do not do anything
+       orl     d2,d3           | is delta == 1?
+       bne     1f              | if so round to even
+       movel   d1,d3           | 
+       andl    IMM (2),d3      | bit 1 is the last significant bit
+       movel   IMM (0),d2      |
+       addl    d3,d1           |
+       addxl   d2,d0           |
+       bra     2f              | 
+1:     movel   IMM (1),d3      | else add 1 
+       movel   IMM (0),d2      |
+       addl    d3,d1           |
+       addxl   d2,d0
+| Shift right once (because we used bit #DBL_MANT_DIG-32!).
+2:
+#ifndef __mcoldfire__
+       lsrl    IMM (1),d0
+       roxrl   IMM (1),d1              
+#else
+       lsrl    IMM (1),d1
+       btst    IMM (0),d0
+       beq     10f
+       bset    IMM (31),d1
+10:    lsrl    IMM (1),d0
+#endif
+
+| Now check again bit #DBL_MANT_DIG-32 (rounding could have produced a
+| 'fraction overflow' ...).
+       btst    IMM (DBL_MANT_DIG-32),d0        
+       beq     1f
+#ifndef __mcoldfire__
+       lsrl    IMM (1),d0
+       roxrl   IMM (1),d1
+       addw    IMM (1),d4
+#else
+       lsrl    IMM (1),d1
+       btst    IMM (0),d0
+       beq     10f
+       bset    IMM (31),d1
+10:    lsrl    IMM (1),d0
+       addl    IMM (1),d4
+#endif
+1:
+| If bit #DBL_MANT_DIG-32-1 is clear we have a denormalized number, so we 
+| have to put the exponent to zero and return a denormalized number.
+       btst    IMM (DBL_MANT_DIG-32-1),d0
+       beq     1f
+       jmp     a0@
+1:     movel   IMM (0),d4
+       jmp     a0@
+
+Lround$to$zero:
+Lround$to$plus:
+Lround$to$minus:
+       jmp     a0@
+#endif /* L_double */
+
+#ifdef  L_float
+
+       .globl  SYM (_fpCCR)
+       .globl  $_exception_handler
+
+QUIET_NaN    = 0xffffffff
+SIGNL_NaN    = 0x7f800001
+INFINITY     = 0x7f800000
+
+F_MAX_EXP      = 0xff
+F_BIAS         = 126
+FLT_MAX_EXP    = F_MAX_EXP - F_BIAS
+FLT_MIN_EXP    = 1 - F_BIAS
+FLT_MANT_DIG   = 24
+
+INEXACT_RESULT                 = 0x0001
+UNDERFLOW              = 0x0002
+OVERFLOW               = 0x0004
+DIVIDE_BY_ZERO                 = 0x0008
+INVALID_OPERATION      = 0x0010
+
+SINGLE_FLOAT = 1
+
+NOOP         = 0
+ADD          = 1
+MULTIPLY     = 2
+DIVIDE       = 3
+NEGATE       = 4
+COMPARE      = 5
+EXTENDSFDF   = 6
+TRUNCDFSF    = 7
+
+UNKNOWN           = -1
+ROUND_TO_NEAREST  = 0 | round result to nearest representable value
+ROUND_TO_ZERO     = 1 | round result towards zero
+ROUND_TO_PLUS     = 2 | round result towards plus infinity
+ROUND_TO_MINUS    = 3 | round result towards minus infinity
+
+| Entry points:
+
+       .globl SYM (__addsf3)
+       .globl SYM (__subsf3)
+       .globl SYM (__mulsf3)
+       .globl SYM (__divsf3)
+       .globl SYM (__negsf2)
+       .globl SYM (__cmpsf2)
+       .globl SYM (__cmpsf2_internal)
+       .hidden SYM (__cmpsf2_internal)
+
+| These are common routines to return and signal exceptions.   
+
+       .text
+       .even
+
+Lf$den:
+| Return and signal a denormalized number
+       orl     d7,d0
+       moveq   IMM (INEXACT_RESULT+UNDERFLOW),d7
+       moveq   IMM (SINGLE_FLOAT),d6
+       PICJUMP $_exception_handler
+
+Lf$infty:
+Lf$overflow:
+| Return a properly signed INFINITY and set the exception flags 
+       movel   IMM (INFINITY),d0
+       orl     d7,d0
+       moveq   IMM (INEXACT_RESULT+OVERFLOW),d7
+       moveq   IMM (SINGLE_FLOAT),d6
+       PICJUMP $_exception_handler
+
+Lf$underflow:
+| Return 0 and set the exception flags 
+       moveq   IMM (0),d0
+       moveq   IMM (INEXACT_RESULT+UNDERFLOW),d7
+       moveq   IMM (SINGLE_FLOAT),d6
+       PICJUMP $_exception_handler
+
+Lf$inop:
+| Return a quiet NaN and set the exception flags
+       movel   IMM (QUIET_NaN),d0
+       moveq   IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+       moveq   IMM (SINGLE_FLOAT),d6
+       PICJUMP $_exception_handler
+
+Lf$div$0:
+| Return a properly signed INFINITY and set the exception flags
+       movel   IMM (INFINITY),d0
+       orl     d7,d0
+       moveq   IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7
+       moveq   IMM (SINGLE_FLOAT),d6
+       PICJUMP $_exception_handler
+
+|=============================================================================
+|=============================================================================
+|                         single precision routines
+|=============================================================================
+|=============================================================================
+
+| A single precision floating point number (float) has the format:
+|
+| struct _float {
+|  unsigned int sign      : 1;  /* sign bit */ 
+|  unsigned int exponent  : 8;  /* exponent, shifted by 126 */
+|  unsigned int fraction  : 23; /* fraction */
+| } float;
+| 
+| Thus sizeof(float) = 4 (32 bits). 
+|
+| All the routines are callable from C programs, and return the result 
+| in the single register d0. They also preserve all registers except 
+| d0-d1 and a0-a1.
+
+|=============================================================================
+|                              __subsf3
+|=============================================================================
+
+| float __subsf3(float, float);
+       FUNC(__subsf3)
+SYM (__subsf3):
+       bchg    IMM (31),sp@(8) | change sign of second operand
+                               | and fall through
+|=============================================================================
+|                              __addsf3
+|=============================================================================
+
+| float __addsf3(float, float);
+       FUNC(__addsf3)
+SYM (__addsf3):
+#ifndef __mcoldfire__
+       link    a6,IMM (0)      | everything will be done in registers
+       moveml  d2-d7,sp@-      | save all data registers but d0-d1
+#else
+       link    a6,IMM (-24)
+       moveml  d2-d7,sp@
+#endif
+       movel   a6@(8),d0       | get first operand
+       movel   a6@(12),d1      | get second operand
+       movel   d0,a0           | get d0's sign bit '
+       addl    d0,d0           | check and clear sign bit of a
+       beq     Laddsf$b        | if zero return second operand
+       movel   d1,a1           | save b's sign bit '
+       addl    d1,d1           | get rid of sign bit
+       beq     Laddsf$a        | if zero return first operand
+
+| Get the exponents and check for denormalized and/or infinity.
+
+       movel   IMM (0x00ffffff),d4     | mask to get fraction
+       movel   IMM (0x01000000),d5     | mask to put hidden bit back
+
+       movel   d0,d6           | save a to get exponent
+       andl    d4,d0           | get fraction in d0
+       notl    d4              | make d4 into a mask for the exponent
+       andl    d4,d6           | get exponent in d6
+       beq     Laddsf$a$den    | branch if a is denormalized
+       cmpl    d4,d6           | check for INFINITY or NaN
+       beq     Laddsf$nf
+       swap    d6              | put exponent into first word
+       orl     d5,d0           | and put hidden bit back
+Laddsf$1:
+| Now we have a's exponent in d6 (second byte) and the mantissa in d0. '
+       movel   d1,d7           | get exponent in d7
+       andl    d4,d7           | 
+       beq     Laddsf$b$den    | branch if b is denormalized
+       cmpl    d4,d7           | check for INFINITY or NaN
+       beq     Laddsf$nf
+       swap    d7              | put exponent into first word
+       notl    d4              | make d4 into a mask for the fraction
+       andl    d4,d1           | get fraction in d1
+       orl     d5,d1           | and put hidden bit back
+Laddsf$2:
+| Now we have b's exponent in d7 (second byte) and the mantissa in d1. '
+
+| Note that the hidden bit corresponds to bit #FLT_MANT_DIG-1, and we 
+| shifted right once, so bit #FLT_MANT_DIG is set (so we have one extra
+| bit).
+
+       movel   d1,d2           | move b to d2, since we want to use
+                               | two registers to do the sum
+       movel   IMM (0),d1      | and clear the new ones
+       movel   d1,d3           |
+
+| Here we shift the numbers in registers d0 and d1 so the exponents are the
+| same, and put the largest exponent in d6. Note that we are using two
+| registers for each number (see the discussion by D. Knuth in "Seminumerical 
+| Algorithms").
+#ifndef __mcoldfire__
+       cmpw    d6,d7           | compare exponents
+#else
+       cmpl    d6,d7           | compare exponents
+#endif
+       beq     Laddsf$3        | if equal don't shift '
+       bhi     5f              | branch if second exponent largest
+1:
+       subl    d6,d7           | keep the largest exponent
+       negl    d7
+#ifndef __mcoldfire__
+       lsrw    IMM (8),d7      | put difference in lower byte
+#else
+       lsrl    IMM (8),d7      | put difference in lower byte
+#endif
+| if difference is too large we don't shift (actually, we can just exit) '
+#ifndef __mcoldfire__
+       cmpw    IMM (FLT_MANT_DIG+2),d7         
+#else
+       cmpl    IMM (FLT_MANT_DIG+2),d7         
+#endif
+       bge     Laddsf$b$small
+#ifndef __mcoldfire__
+       cmpw    IMM (16),d7     | if difference >= 16 swap
+#else
+       cmpl    IMM (16),d7     | if difference >= 16 swap
+#endif
+       bge     4f
+2:
+#ifndef __mcoldfire__
+       subw    IMM (1),d7
+#else
+       subql   IMM (1), d7
+#endif
+3:
+#ifndef __mcoldfire__
+       lsrl    IMM (1),d2      | shift right second operand
+       roxrl   IMM (1),d3
+       dbra    d7,3b
+#else
+       lsrl    IMM (1),d3
+       btst    IMM (0),d2
+       beq     10f
+       bset    IMM (31),d3
+10:    lsrl    IMM (1),d2
+       subql   IMM (1), d7
+       bpl     3b
+#endif
+       bra     Laddsf$3
+4:
+       movew   d2,d3
+       swap    d3
+       movew   d3,d2
+       swap    d2
+#ifndef __mcoldfire__
+       subw    IMM (16),d7
+#else
+       subl    IMM (16),d7
+#endif
+       bne     2b              | if still more bits, go back to normal case
+       bra     Laddsf$3
+5:
+#ifndef __mcoldfire__
+       exg     d6,d7           | exchange the exponents
+#else
+       eorl    d6,d7
+       eorl    d7,d6
+       eorl    d6,d7
+#endif
+       subl    d6,d7           | keep the largest exponent
+       negl    d7              |
+#ifndef __mcoldfire__
+       lsrw    IMM (8),d7      | put difference in lower byte
+#else
+       lsrl    IMM (8),d7      | put difference in lower byte
+#endif
+| if difference is too large we don't shift (and exit!) '
+#ifndef __mcoldfire__
+       cmpw    IMM (FLT_MANT_DIG+2),d7         
+#else
+       cmpl    IMM (FLT_MANT_DIG+2),d7         
+#endif
+       bge     Laddsf$a$small
+#ifndef __mcoldfire__
+       cmpw    IMM (16),d7     | if difference >= 16 swap
+#else
+       cmpl    IMM (16),d7     | if difference >= 16 swap
+#endif
+       bge     8f
+6:
+#ifndef __mcoldfire__
+       subw    IMM (1),d7
+#else
+       subl    IMM (1),d7
+#endif
+7:
+#ifndef __mcoldfire__
+       lsrl    IMM (1),d0      | shift right first operand
+       roxrl   IMM (1),d1
+       dbra    d7,7b
+#else
+       lsrl    IMM (1),d1
+       btst    IMM (0),d0
+       beq     10f
+       bset    IMM (31),d1
+10:    lsrl    IMM (1),d0
+       subql   IMM (1),d7
+       bpl     7b
+#endif
+       bra     Laddsf$3
+8:
+       movew   d0,d1
+       swap    d1
+       movew   d1,d0
+       swap    d0
+#ifndef __mcoldfire__
+       subw    IMM (16),d7
+#else
+       subl    IMM (16),d7
+#endif
+       bne     6b              | if still more bits, go back to normal case
+                               | otherwise we fall through
+
+| Now we have a in d0-d1, b in d2-d3, and the largest exponent in d6 (the
+| signs are stored in a0 and a1).
+
+Laddsf$3:
+| Here we have to decide whether to add or subtract the numbers
+#ifndef __mcoldfire__
+       exg     d6,a0           | get signs back
+       exg     d7,a1           | and save the exponents
+#else
+       movel   d6,d4
+       movel   a0,d6
+       movel   d4,a0
+       movel   d7,d4
+       movel   a1,d7
+       movel   d4,a1
+#endif
+       eorl    d6,d7           | combine sign bits
+       bmi     Lsubsf$0        | if negative a and b have opposite 
+                               | sign so we actually subtract the
+                               | numbers
+
+| Here we have both positive or both negative
+#ifndef __mcoldfire__
+       exg     d6,a0           | now we have the exponent in d6
+#else
+       movel   d6,d4
+       movel   a0,d6
+       movel   d4,a0
+#endif
+       movel   a0,d7           | and sign in d7
+       andl    IMM (0x80000000),d7
+| Here we do the addition.
+       addl    d3,d1
+       addxl   d2,d0
+| Note: now we have d2, d3, d4 and d5 to play with! 
+
+| Put the exponent, in the first byte, in d2, to use the "standard" rounding
+| routines:
+       movel   d6,d2
+#ifndef __mcoldfire__
+       lsrw    IMM (8),d2
+#else
+       lsrl    IMM (8),d2
+#endif
+
+| Before rounding normalize so bit #FLT_MANT_DIG is set (we will consider
+| the case of denormalized numbers in the rounding routine itself).
+| As in the addition (not in the subtraction!) we could have set 
+| one more bit we check this:
+       btst    IMM (FLT_MANT_DIG+1),d0 
+       beq     1f
+#ifndef __mcoldfire__
+       lsrl    IMM (1),d0
+       roxrl   IMM (1),d1
+#else
+       lsrl    IMM (1),d1
+       btst    IMM (0),d0
+       beq     10f
+       bset    IMM (31),d1
+10:    lsrl    IMM (1),d0
+#endif
+       addl    IMM (1),d2
+1:
+       lea     pc@(Laddsf$4),a0 | to return from rounding routine
+       PICLEA  SYM (_fpCCR),a1 | check the rounding mode
+#ifdef __mcoldfire__
+       clrl    d6
+#endif
+       movew   a1@(6),d6       | rounding mode in d6
+       beq     Lround$to$nearest
+#ifndef __mcoldfire__
+       cmpw    IMM (ROUND_TO_PLUS),d6
+#else
+       cmpl    IMM (ROUND_TO_PLUS),d6
+#endif
+       bhi     Lround$to$minus
+       blt     Lround$to$zero
+       bra     Lround$to$plus
+Laddsf$4:
+| Put back the exponent, but check for overflow.
+#ifndef __mcoldfire__
+       cmpw    IMM (0xff),d2
+#else
+       cmpl    IMM (0xff),d2
+#endif
+       bhi     1f
+       bclr    IMM (FLT_MANT_DIG-1),d0
+#ifndef __mcoldfire__
+       lslw    IMM (7),d2
+#else
+       lsll    IMM (7),d2
+#endif
+       swap    d2
+       orl     d2,d0
+       bra     Laddsf$ret
+1:
+       moveq   IMM (ADD),d5
+       bra     Lf$overflow
+
+Lsubsf$0:
+| We are here if a > 0 and b < 0 (sign bits cleared).
+| Here we do the subtraction.
+       movel   d6,d7           | put sign in d7
+       andl    IMM (0x80000000),d7
+
+       subl    d3,d1           | result in d0-d1
+       subxl   d2,d0           |
+       beq     Laddsf$ret      | if zero just exit
+       bpl     1f              | if positive skip the following
+       bchg    IMM (31),d7     | change sign bit in d7
+       negl    d1
+       negxl   d0
+1:
+#ifndef __mcoldfire__
+       exg     d2,a0           | now we have the exponent in d2
+       lsrw    IMM (8),d2      | put it in the first byte
+#else
+       movel   d2,d4
+       movel   a0,d2
+       movel   d4,a0
+       lsrl    IMM (8),d2      | put it in the first byte
+#endif
+
+| Now d0-d1 is positive and the sign bit is in d7.
+
+| Note that we do not have to normalize, since in the subtraction bit
+| #FLT_MANT_DIG+1 is never set, and denormalized numbers are handled by
+| the rounding routines themselves.
+       lea     pc@(Lsubsf$1),a0 | to return from rounding routine
+       PICLEA  SYM (_fpCCR),a1 | check the rounding mode
+#ifdef __mcoldfire__
+       clrl    d6
+#endif
+       movew   a1@(6),d6       | rounding mode in d6
+       beq     Lround$to$nearest
+#ifndef __mcoldfire__
+       cmpw    IMM (ROUND_TO_PLUS),d6
+#else
+       cmpl    IMM (ROUND_TO_PLUS),d6
+#endif
+       bhi     Lround$to$minus
+       blt     Lround$to$zero
+       bra     Lround$to$plus
+Lsubsf$1:
+| Put back the exponent (we can't have overflow!). '
+       bclr    IMM (FLT_MANT_DIG-1),d0
+#ifndef __mcoldfire__
+       lslw    IMM (7),d2
+#else
+       lsll    IMM (7),d2
+#endif
+       swap    d2
+       orl     d2,d0
+       bra     Laddsf$ret
+
+| If one of the numbers was too small (difference of exponents >= 
+| FLT_MANT_DIG+2) we return the other (and now we don't have to '
+| check for finiteness or zero).
+Laddsf$a$small:
+       movel   a6@(12),d0
+       PICLEA  SYM (_fpCCR),a0
+       movew   IMM (0),a0@
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7      | restore data registers
+#else
+       moveml  sp@,d2-d7
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6              | and return
+       rts
+
+Laddsf$b$small:
+       movel   a6@(8),d0
+       PICLEA  SYM (_fpCCR),a0
+       movew   IMM (0),a0@
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7      | restore data registers
+#else
+       moveml  sp@,d2-d7
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6              | and return
+       rts
+
+| If the numbers are denormalized remember to put exponent equal to 1.
+
+Laddsf$a$den:
+       movel   d5,d6           | d5 contains 0x01000000
+       swap    d6
+       bra     Laddsf$1
+
+Laddsf$b$den:
+       movel   d5,d7
+       swap    d7
+       notl    d4              | make d4 into a mask for the fraction
+                               | (this was not executed after the jump)
+       bra     Laddsf$2
+
+| The rest is mainly code for the different results which can be 
+| returned (checking always for +/-INFINITY and NaN).
+
+Laddsf$b:
+| Return b (if a is zero).
+       movel   a6@(12),d0
+       cmpl    IMM (0x80000000),d0     | Check if b is -0
+       bne     1f
+       movel   a0,d7
+       andl    IMM (0x80000000),d7     | Use the sign of a
+       clrl    d0
+       bra     Laddsf$ret
+Laddsf$a:
+| Return a (if b is zero).
+       movel   a6@(8),d0
+1:
+       moveq   IMM (ADD),d5
+| We have to check for NaN and +/-infty.
+       movel   d0,d7
+       andl    IMM (0x80000000),d7     | put sign in d7
+       bclr    IMM (31),d0             | clear sign
+       cmpl    IMM (INFINITY),d0       | check for infty or NaN
+       bge     2f
+       movel   d0,d0           | check for zero (we do this because we don't '
+       bne     Laddsf$ret      | want to return -0 by mistake
+       bclr    IMM (31),d7     | if zero be sure to clear sign
+       bra     Laddsf$ret      | if everything OK just return
+2:
+| The value to be returned is either +/-infty or NaN
+       andl    IMM (0x007fffff),d0     | check for NaN
+       bne     Lf$inop                 | if mantissa not zero is NaN
+       bra     Lf$infty
+
+Laddsf$ret:
+| Normal exit (a and b nonzero, result is not NaN nor +/-infty).
+| We have to clear the exception flags (just the exception type).
+       PICLEA  SYM (_fpCCR),a0
+       movew   IMM (0),a0@
+       orl     d7,d0           | put sign bit
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7      | restore data registers
+#else
+       moveml  sp@,d2-d7
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6              | and return
+       rts
+
+Laddsf$ret$den:
+| Return a denormalized number (for addition we don't signal underflow) '
+       lsrl    IMM (1),d0      | remember to shift right back once
+       bra     Laddsf$ret      | and return
+
+| Note: when adding two floats of the same sign if either one is 
+| NaN we return NaN without regard to whether the other is finite or 
+| not. When subtracting them (i.e., when adding two numbers of 
+| opposite signs) things are more complicated: if both are INFINITY 
+| we return NaN, if only one is INFINITY and the other is NaN we return
+| NaN, but if it is finite we return INFINITY with the corresponding sign.
+
+Laddsf$nf:
+       moveq   IMM (ADD),d5
+| This could be faster but it is not worth the effort, since it is not
+| executed very often. We sacrifice speed for clarity here.
+       movel   a6@(8),d0       | get the numbers back (remember that we
+       movel   a6@(12),d1      | did some processing already)
+       movel   IMM (INFINITY),d4 | useful constant (INFINITY)
+       movel   d0,d2           | save sign bits
+       movel   d1,d3
+       bclr    IMM (31),d0     | clear sign bits
+       bclr    IMM (31),d1
+| We know that one of them is either NaN of +/-INFINITY
+| Check for NaN (if either one is NaN return NaN)
+       cmpl    d4,d0           | check first a (d0)
+       bhi     Lf$inop         
+       cmpl    d4,d1           | check now b (d1)
+       bhi     Lf$inop         
+| Now comes the check for +/-INFINITY. We know that both are (maybe not
+| finite) numbers, but we have to check if both are infinite whether we
+| are adding or subtracting them.
+       eorl    d3,d2           | to check sign bits
+       bmi     1f
+       movel   d0,d7
+       andl    IMM (0x80000000),d7     | get (common) sign bit
+       bra     Lf$infty
+1:
+| We know one (or both) are infinite, so we test for equality between the
+| two numbers (if they are equal they have to be infinite both, so we
+| return NaN).
+       cmpl    d1,d0           | are both infinite?
+       beq     Lf$inop         | if so return NaN
+
+       movel   d0,d7
+       andl    IMM (0x80000000),d7 | get a's sign bit '
+       cmpl    d4,d0           | test now for infinity
+       beq     Lf$infty        | if a is INFINITY return with this sign
+       bchg    IMM (31),d7     | else we know b is INFINITY and has
+       bra     Lf$infty        | the opposite sign
+
+|=============================================================================
+|                             __mulsf3
+|=============================================================================
+
+| float __mulsf3(float, float);
+       FUNC(__mulsf3)
+SYM (__mulsf3):
+#ifndef __mcoldfire__
+       link    a6,IMM (0)
+       moveml  d2-d7,sp@-
+#else
+       link    a6,IMM (-24)
+       moveml  d2-d7,sp@
+#endif
+       movel   a6@(8),d0       | get a into d0
+       movel   a6@(12),d1      | and b into d1
+       movel   d0,d7           | d7 will hold the sign of the product
+       eorl    d1,d7           |
+       andl    IMM (0x80000000),d7
+       movel   IMM (INFINITY),d6       | useful constant (+INFINITY)
+       movel   d6,d5                   | another (mask for fraction)
+       notl    d5                      |
+       movel   IMM (0x00800000),d4     | this is to put hidden bit back
+       bclr    IMM (31),d0             | get rid of a's sign bit '
+       movel   d0,d2                   |
+       beq     Lmulsf$a$0              | branch if a is zero
+       bclr    IMM (31),d1             | get rid of b's sign bit '
+       movel   d1,d3           |
+       beq     Lmulsf$b$0      | branch if b is zero
+       cmpl    d6,d0           | is a big?
+       bhi     Lmulsf$inop     | if a is NaN return NaN
+       beq     Lmulsf$inf      | if a is INFINITY we have to check b
+       cmpl    d6,d1           | now compare b with INFINITY
+       bhi     Lmulsf$inop     | is b NaN?
+       beq     Lmulsf$overflow | is b INFINITY?
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d2 and d3.
+       andl    d6,d2           | and isolate exponent in d2
+       beq     Lmulsf$a$den    | if exponent is zero we have a denormalized
+       andl    d5,d0           | and isolate fraction
+       orl     d4,d0           | and put hidden bit back
+       swap    d2              | I like exponents in the first byte
+#ifndef __mcoldfire__
+       lsrw    IMM (7),d2      | 
+#else
+       lsrl    IMM (7),d2      | 
+#endif
+Lmulsf$1:                      | number
+       andl    d6,d3           |
+       beq     Lmulsf$b$den    |
+       andl    d5,d1           |
+       orl     d4,d1           |
+       swap    d3              |
+#ifndef __mcoldfire__
+       lsrw    IMM (7),d3      |
+#else
+       lsrl    IMM (7),d3      |
+#endif
+Lmulsf$2:                      |
+#ifndef __mcoldfire__
+       addw    d3,d2           | add exponents
+       subw    IMM (F_BIAS+1),d2 | and subtract bias (plus one)
+#else
+       addl    d3,d2           | add exponents
+       subl    IMM (F_BIAS+1),d2 | and subtract bias (plus one)
+#endif
+
+| We are now ready to do the multiplication. The situation is as follows:
+| both a and b have bit FLT_MANT_DIG-1 set (even if they were 
+| denormalized to start with!), which means that in the product 
+| bit 2*(FLT_MANT_DIG-1) (that is, bit 2*FLT_MANT_DIG-2-32 of the 
+| high long) is set. 
+
+| To do the multiplication let us move the number a little bit around ...
+       movel   d1,d6           | second operand in d6
+       movel   d0,d5           | first operand in d4-d5
+       movel   IMM (0),d4
+       movel   d4,d1           | the sums will go in d0-d1
+       movel   d4,d0
+
+| now bit FLT_MANT_DIG-1 becomes bit 31:
+       lsll    IMM (31-FLT_MANT_DIG+1),d6              
+
+| Start the loop (we loop #FLT_MANT_DIG times):
+       moveq   IMM (FLT_MANT_DIG-1),d3 
+1:     addl    d1,d1           | shift sum 
+       addxl   d0,d0
+       lsll    IMM (1),d6      | get bit bn
+       bcc     2f              | if not set skip sum
+       addl    d5,d1           | add a
+       addxl   d4,d0
+2:
+#ifndef __mcoldfire__
+       dbf     d3,1b           | loop back
+#else
+       subql   IMM (1),d3
+       bpl     1b
+#endif
+
+| Now we have the product in d0-d1, with bit (FLT_MANT_DIG - 1) + FLT_MANT_DIG
+| (mod 32) of d0 set. The first thing to do now is to normalize it so bit 
+| FLT_MANT_DIG is set (to do the rounding).
+#ifndef __mcoldfire__
+       rorl    IMM (6),d1
+       swap    d1
+       movew   d1,d3
+       andw    IMM (0x03ff),d3
+       andw    IMM (0xfd00),d1
+#else
+       movel   d1,d3
+       lsll    IMM (8),d1
+       addl    d1,d1
+       addl    d1,d1
+       moveq   IMM (22),d5
+       lsrl    d5,d3
+       orl     d3,d1
+       andl    IMM (0xfffffd00),d1
+#endif
+       lsll    IMM (8),d0
+       addl    d0,d0
+       addl    d0,d0
+#ifndef __mcoldfire__
+       orw     d3,d0
+#else
+       orl     d3,d0
+#endif
+
+       moveq   IMM (MULTIPLY),d5
+       
+       btst    IMM (FLT_MANT_DIG+1),d0
+       beq     Lround$exit
+#ifndef __mcoldfire__
+       lsrl    IMM (1),d0
+       roxrl   IMM (1),d1
+       addw    IMM (1),d2
+#else
+       lsrl    IMM (1),d1
+       btst    IMM (0),d0
+       beq     10f
+       bset    IMM (31),d1
+10:    lsrl    IMM (1),d0
+       addql   IMM (1),d2
+#endif
+       bra     Lround$exit
+
+Lmulsf$inop:
+       moveq   IMM (MULTIPLY),d5
+       bra     Lf$inop
+
+Lmulsf$overflow:
+       moveq   IMM (MULTIPLY),d5
+       bra     Lf$overflow
+
+Lmulsf$inf:
+       moveq   IMM (MULTIPLY),d5
+| If either is NaN return NaN; else both are (maybe infinite) numbers, so
+| return INFINITY with the correct sign (which is in d7).
+       cmpl    d6,d1           | is b NaN?
+       bhi     Lf$inop         | if so return NaN
+       bra     Lf$overflow     | else return +/-INFINITY
+
+| If either number is zero return zero, unless the other is +/-INFINITY, 
+| or NaN, in which case we return NaN.
+Lmulsf$b$0:
+| Here d1 (==b) is zero.
+       movel   a6@(8),d1       | get a again to check for non-finiteness
+       bra     1f
+Lmulsf$a$0:
+       movel   a6@(12),d1      | get b again to check for non-finiteness
+1:     bclr    IMM (31),d1     | clear sign bit 
+       cmpl    IMM (INFINITY),d1 | and check for a large exponent
+       bge     Lf$inop         | if b is +/-INFINITY or NaN return NaN
+       movel   d7,d0           | else return signed zero
+       PICLEA  SYM (_fpCCR),a0 |
+       movew   IMM (0),a0@     | 
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7      | 
+#else
+       moveml  sp@,d2-d7
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6              | 
+       rts                     | 
+
+| If a number is denormalized we put an exponent of 1 but do not put the 
+| hidden bit back into the fraction; instead we shift left until bit 23
+| (the hidden bit) is set, adjusting the exponent accordingly. We do this
+| to ensure that the product of the fractions is close to 1.
+Lmulsf$a$den:
+       movel   IMM (1),d2
+       andl    d5,d0
+1:     addl    d0,d0           | shift a left (until bit 23 is set)
+#ifndef __mcoldfire__
+       subw    IMM (1),d2      | and adjust exponent
+#else
+       subql   IMM (1),d2      | and adjust exponent
+#endif
+       btst    IMM (FLT_MANT_DIG-1),d0
+       bne     Lmulsf$1        |
+       bra     1b              | else loop back
+
+Lmulsf$b$den:
+       movel   IMM (1),d3
+       andl    d5,d1
+1:     addl    d1,d1           | shift b left until bit 23 is set
+#ifndef __mcoldfire__
+       subw    IMM (1),d3      | and adjust exponent
+#else
+       subql   IMM (1),d3      | and adjust exponent
+#endif
+       btst    IMM (FLT_MANT_DIG-1),d1
+       bne     Lmulsf$2        |
+       bra     1b              | else loop back
+
+|=============================================================================
+|                             __divsf3
+|=============================================================================
+
+| float __divsf3(float, float);
+       FUNC(__divsf3)
+SYM (__divsf3):
+#ifndef __mcoldfire__
+       link    a6,IMM (0)
+       moveml  d2-d7,sp@-
+#else
+       link    a6,IMM (-24)
+       moveml  d2-d7,sp@
+#endif
+       movel   a6@(8),d0               | get a into d0
+       movel   a6@(12),d1              | and b into d1
+       movel   d0,d7                   | d7 will hold the sign of the result
+       eorl    d1,d7                   |
+       andl    IMM (0x80000000),d7     | 
+       movel   IMM (INFINITY),d6       | useful constant (+INFINITY)
+       movel   d6,d5                   | another (mask for fraction)
+       notl    d5                      |
+       movel   IMM (0x00800000),d4     | this is to put hidden bit back
+       bclr    IMM (31),d0             | get rid of a's sign bit '
+       movel   d0,d2                   |
+       beq     Ldivsf$a$0              | branch if a is zero
+       bclr    IMM (31),d1             | get rid of b's sign bit '
+       movel   d1,d3                   |
+       beq     Ldivsf$b$0              | branch if b is zero
+       cmpl    d6,d0                   | is a big?
+       bhi     Ldivsf$inop             | if a is NaN return NaN
+       beq     Ldivsf$inf              | if a is INFINITY we have to check b
+       cmpl    d6,d1                   | now compare b with INFINITY 
+       bhi     Ldivsf$inop             | if b is NaN return NaN
+       beq     Ldivsf$underflow
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d2 and d3 and normalize the numbers to
+| ensure that the ratio of the fractions is close to 1. We do this by
+| making sure that bit #FLT_MANT_DIG-1 (hidden bit) is set.
+       andl    d6,d2           | and isolate exponent in d2
+       beq     Ldivsf$a$den    | if exponent is zero we have a denormalized
+       andl    d5,d0           | and isolate fraction
+       orl     d4,d0           | and put hidden bit back
+       swap    d2              | I like exponents in the first byte
+#ifndef __mcoldfire__
+       lsrw    IMM (7),d2      | 
+#else
+       lsrl    IMM (7),d2      | 
+#endif
+Ldivsf$1:                      | 
+       andl    d6,d3           |
+       beq     Ldivsf$b$den    |
+       andl    d5,d1           |
+       orl     d4,d1           |
+       swap    d3              |
+#ifndef __mcoldfire__
+       lsrw    IMM (7),d3      |
+#else
+       lsrl    IMM (7),d3      |
+#endif
+Ldivsf$2:                      |
+#ifndef __mcoldfire__
+       subw    d3,d2           | subtract exponents
+       addw    IMM (F_BIAS),d2 | and add bias
+#else
+       subl    d3,d2           | subtract exponents
+       addl    IMM (F_BIAS),d2 | and add bias
+#endif
+ 
+| We are now ready to do the division. We have prepared things in such a way
+| that the ratio of the fractions will be less than 2 but greater than 1/2.
+| At this point the registers in use are:
+| d0   holds a (first operand, bit FLT_MANT_DIG=0, bit FLT_MANT_DIG-1=1)
+| d1   holds b (second operand, bit FLT_MANT_DIG=1)
+| d2   holds the difference of the exponents, corrected by the bias
+| d7   holds the sign of the ratio
+| d4, d5, d6 hold some constants
+       movel   d7,a0           | d6-d7 will hold the ratio of the fractions
+       movel   IMM (0),d6      | 
+       movel   d6,d7
+
+       moveq   IMM (FLT_MANT_DIG+1),d3
+1:     cmpl    d0,d1           | is a < b?
+       bhi     2f              |
+       bset    d3,d6           | set a bit in d6
+       subl    d1,d0           | if a >= b  a <-- a-b
+       beq     3f              | if a is zero, exit
+2:     addl    d0,d0           | multiply a by 2
+#ifndef __mcoldfire__
+       dbra    d3,1b
+#else
+       subql   IMM (1),d3
+       bpl     1b
+#endif
+
+| Now we keep going to set the sticky bit ...
+       moveq   IMM (FLT_MANT_DIG),d3
+1:     cmpl    d0,d1
+       ble     2f
+       addl    d0,d0
+#ifndef __mcoldfire__
+       dbra    d3,1b
+#else
+       subql   IMM(1),d3
+       bpl     1b
+#endif
+       movel   IMM (0),d1
+       bra     3f
+2:     movel   IMM (0),d1
+#ifndef __mcoldfire__
+       subw    IMM (FLT_MANT_DIG),d3
+       addw    IMM (31),d3
+#else
+       subl    IMM (FLT_MANT_DIG),d3
+       addl    IMM (31),d3
+#endif
+       bset    d3,d1
+3:
+       movel   d6,d0           | put the ratio in d0-d1
+       movel   a0,d7           | get sign back
+
+| Because of the normalization we did before we are guaranteed that 
+| d0 is smaller than 2^26 but larger than 2^24. Thus bit 26 is not set,
+| bit 25 could be set, and if it is not set then bit 24 is necessarily set.
+       btst    IMM (FLT_MANT_DIG+1),d0         
+       beq     1f              | if it is not set, then bit 24 is set
+       lsrl    IMM (1),d0      |
+#ifndef __mcoldfire__
+       addw    IMM (1),d2      |
+#else
+       addl    IMM (1),d2      |
+#endif
+1:
+| Now round, check for over- and underflow, and exit.
+       moveq   IMM (DIVIDE),d5
+       bra     Lround$exit
+
+Ldivsf$inop:
+       moveq   IMM (DIVIDE),d5
+       bra     Lf$inop
+
+Ldivsf$overflow:
+       moveq   IMM (DIVIDE),d5
+       bra     Lf$overflow
+
+Ldivsf$underflow:
+       moveq   IMM (DIVIDE),d5
+       bra     Lf$underflow
+
+Ldivsf$a$0:
+       moveq   IMM (DIVIDE),d5
+| If a is zero check to see whether b is zero also. In that case return
+| NaN; then check if b is NaN, and return NaN also in that case. Else
+| return a properly signed zero.
+       andl    IMM (0x7fffffff),d1     | clear sign bit and test b
+       beq     Lf$inop                 | if b is also zero return NaN
+       cmpl    IMM (INFINITY),d1       | check for NaN
+       bhi     Lf$inop                 | 
+       movel   d7,d0                   | else return signed zero
+       PICLEA  SYM (_fpCCR),a0         |
+       movew   IMM (0),a0@             |
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7              | 
+#else
+       moveml  sp@,d2-d7               | 
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6                      | 
+       rts                             | 
+       
+Ldivsf$b$0:
+       moveq   IMM (DIVIDE),d5
+| If we got here a is not zero. Check if a is NaN; in that case return NaN,
+| else return +/-INFINITY. Remember that a is in d0 with the sign bit 
+| cleared already.
+       cmpl    IMM (INFINITY),d0       | compare d0 with INFINITY
+       bhi     Lf$inop                 | if larger it is NaN
+       bra     Lf$div$0                | else signal DIVIDE_BY_ZERO
+
+Ldivsf$inf:
+       moveq   IMM (DIVIDE),d5
+| If a is INFINITY we have to check b
+       cmpl    IMM (INFINITY),d1       | compare b with INFINITY 
+       bge     Lf$inop                 | if b is NaN or INFINITY return NaN
+       bra     Lf$overflow             | else return overflow
+
+| If a number is denormalized we put an exponent of 1 but do not put the 
+| bit back into the fraction.
+Ldivsf$a$den:
+       movel   IMM (1),d2
+       andl    d5,d0
+1:     addl    d0,d0           | shift a left until bit FLT_MANT_DIG-1 is set
+#ifndef __mcoldfire__
+       subw    IMM (1),d2      | and adjust exponent
+#else
+       subl    IMM (1),d2      | and adjust exponent
+#endif
+       btst    IMM (FLT_MANT_DIG-1),d0
+       bne     Ldivsf$1
+       bra     1b
+
+Ldivsf$b$den:
+       movel   IMM (1),d3
+       andl    d5,d1
+1:     addl    d1,d1           | shift b left until bit FLT_MANT_DIG is set
+#ifndef __mcoldfire__
+       subw    IMM (1),d3      | and adjust exponent
+#else
+       subl    IMM (1),d3      | and adjust exponent
+#endif
+       btst    IMM (FLT_MANT_DIG-1),d1
+       bne     Ldivsf$2
+       bra     1b
+
+Lround$exit:
+| This is a common exit point for __mulsf3 and __divsf3. 
+
+| First check for underlow in the exponent:
+#ifndef __mcoldfire__
+       cmpw    IMM (-FLT_MANT_DIG-1),d2                
+#else
+       cmpl    IMM (-FLT_MANT_DIG-1),d2                
+#endif
+       blt     Lf$underflow    
+| It could happen that the exponent is less than 1, in which case the 
+| number is denormalized. In this case we shift right and adjust the 
+| exponent until it becomes 1 or the fraction is zero (in the latter case 
+| we signal underflow and return zero).
+       movel   IMM (0),d6      | d6 is used temporarily
+#ifndef __mcoldfire__
+       cmpw    IMM (1),d2      | if the exponent is less than 1 we 
+#else
+       cmpl    IMM (1),d2      | if the exponent is less than 1 we 
+#endif
+       bge     2f              | have to shift right (denormalize)
+1:
+#ifndef __mcoldfire__
+       addw    IMM (1),d2      | adjust the exponent
+       lsrl    IMM (1),d0      | shift right once 
+       roxrl   IMM (1),d1      |
+       roxrl   IMM (1),d6      | d6 collect bits we would lose otherwise
+       cmpw    IMM (1),d2      | is the exponent 1 already?
+#else
+       addql   IMM (1),d2      | adjust the exponent
+       lsrl    IMM (1),d6
+       btst    IMM (0),d1
+       beq     11f
+       bset    IMM (31),d6
+11:    lsrl    IMM (1),d1
+       btst    IMM (0),d0
+       beq     10f
+       bset    IMM (31),d1
+10:    lsrl    IMM (1),d0
+       cmpl    IMM (1),d2      | is the exponent 1 already?
+#endif
+       beq     2f              | if not loop back
+       bra     1b              |
+       bra     Lf$underflow    | safety check, shouldn't execute '
+2:     orl     d6,d1           | this is a trick so we don't lose  '
+                               | the extra bits which were flushed right
+| Now call the rounding routine (which takes care of denormalized numbers):
+       lea     pc@(Lround$0),a0 | to return from rounding routine
+       PICLEA  SYM (_fpCCR),a1 | check the rounding mode
+#ifdef __mcoldfire__
+       clrl    d6
+#endif
+       movew   a1@(6),d6       | rounding mode in d6
+       beq     Lround$to$nearest
+#ifndef __mcoldfire__
+       cmpw    IMM (ROUND_TO_PLUS),d6
+#else
+       cmpl    IMM (ROUND_TO_PLUS),d6
+#endif
+       bhi     Lround$to$minus
+       blt     Lround$to$zero
+       bra     Lround$to$plus
+Lround$0:
+| Here we have a correctly rounded result (either normalized or denormalized).
+
+| Here we should have either a normalized number or a denormalized one, and
+| the exponent is necessarily larger or equal to 1 (so we don't have to  '
+| check again for underflow!). We have to check for overflow or for a 
+| denormalized number (which also signals underflow).
+| Check for overflow (i.e., exponent >= 255).
+#ifndef __mcoldfire__
+       cmpw    IMM (0x00ff),d2
+#else
+       cmpl    IMM (0x00ff),d2
+#endif
+       bge     Lf$overflow
+| Now check for a denormalized number (exponent==0).
+       movew   d2,d2
+       beq     Lf$den
+1:
+| Put back the exponents and sign and return.
+#ifndef __mcoldfire__
+       lslw    IMM (7),d2      | exponent back to fourth byte
+#else
+       lsll    IMM (7),d2      | exponent back to fourth byte
+#endif
+       bclr    IMM (FLT_MANT_DIG-1),d0
+       swap    d0              | and put back exponent
+#ifndef __mcoldfire__
+       orw     d2,d0           | 
+#else
+       orl     d2,d0
+#endif
+       swap    d0              |
+       orl     d7,d0           | and sign also
+
+       PICLEA  SYM (_fpCCR),a0
+       movew   IMM (0),a0@
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7
+#else
+       moveml  sp@,d2-d7
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6
+       rts
+
+|=============================================================================
+|                             __negsf2
+|=============================================================================
+
+| This is trivial and could be shorter if we didn't bother checking for NaN '
+| and +/-INFINITY.
+
+| float __negsf2(float);
+       FUNC(__negsf2)
+SYM (__negsf2):
+#ifndef __mcoldfire__
+       link    a6,IMM (0)
+       moveml  d2-d7,sp@-
+#else
+       link    a6,IMM (-24)
+       moveml  d2-d7,sp@
+#endif
+       moveq   IMM (NEGATE),d5
+       movel   a6@(8),d0       | get number to negate in d0
+       bchg    IMM (31),d0     | negate
+       movel   d0,d1           | make a positive copy
+       bclr    IMM (31),d1     |
+       tstl    d1              | check for zero
+       beq     2f              | if zero (either sign) return +zero
+       cmpl    IMM (INFINITY),d1 | compare to +INFINITY
+       blt     1f              |
+       bhi     Lf$inop         | if larger (fraction not zero) is NaN
+       movel   d0,d7           | else get sign and return INFINITY
+       andl    IMM (0x80000000),d7
+       bra     Lf$infty                
+1:     PICLEA  SYM (_fpCCR),a0
+       movew   IMM (0),a0@
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7
+#else
+       moveml  sp@,d2-d7
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6
+       rts
+2:     bclr    IMM (31),d0
+       bra     1b
+
+|=============================================================================
+|                             __cmpsf2
+|=============================================================================
+
+GREATER =  1
+LESS    = -1
+EQUAL   =  0
+
+| int __cmpsf2_internal(float, float, int);
+SYM (__cmpsf2_internal):
+#ifndef __mcoldfire__
+       link    a6,IMM (0)
+       moveml  d2-d7,sp@-      | save registers
+#else
+       link    a6,IMM (-24)
+       moveml  d2-d7,sp@
+#endif
+       moveq   IMM (COMPARE),d5
+       movel   a6@(8),d0       | get first operand
+       movel   a6@(12),d1      | get second operand
+| Check if either is NaN, and in that case return garbage and signal
+| INVALID_OPERATION. Check also if either is zero, and clear the signs
+| if necessary.
+       movel   d0,d6
+       andl    IMM (0x7fffffff),d0
+       beq     Lcmpsf$a$0
+       cmpl    IMM (0x7f800000),d0
+       bhi     Lcmpf$inop
+Lcmpsf$1:
+       movel   d1,d7
+       andl    IMM (0x7fffffff),d1
+       beq     Lcmpsf$b$0
+       cmpl    IMM (0x7f800000),d1
+       bhi     Lcmpf$inop
+Lcmpsf$2:
+| Check the signs
+       eorl    d6,d7
+       bpl     1f
+| If the signs are not equal check if a >= 0
+       tstl    d6
+       bpl     Lcmpsf$a$gt$b   | if (a >= 0 && b < 0) => a > b
+       bmi     Lcmpsf$b$gt$a   | if (a < 0 && b >= 0) => a < b
+1:
+| If the signs are equal check for < 0
+       tstl    d6
+       bpl     1f
+| If both are negative exchange them
+#ifndef __mcoldfire__
+       exg     d0,d1
+#else
+       movel   d0,d7
+       movel   d1,d0
+       movel   d7,d1
+#endif
+1:
+| Now that they are positive we just compare them as longs (does this also
+| work for denormalized numbers?).
+       cmpl    d0,d1
+       bhi     Lcmpsf$b$gt$a   | |b| > |a|
+       bne     Lcmpsf$a$gt$b   | |b| < |a|
+| If we got here a == b.
+       movel   IMM (EQUAL),d0
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7      | put back the registers
+#else
+       moveml  sp@,d2-d7
+#endif
+       unlk    a6
+       rts
+Lcmpsf$a$gt$b:
+       movel   IMM (GREATER),d0
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7      | put back the registers
+#else
+       moveml  sp@,d2-d7
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6
+       rts
+Lcmpsf$b$gt$a:
+       movel   IMM (LESS),d0
+#ifndef __mcoldfire__
+       moveml  sp@+,d2-d7      | put back the registers
+#else
+       moveml  sp@,d2-d7
+       | XXX if frame pointer is ever removed, stack pointer must
+       | be adjusted here.
+#endif
+       unlk    a6
+       rts
+
+Lcmpsf$a$0:    
+       bclr    IMM (31),d6
+       bra     Lcmpsf$1
+Lcmpsf$b$0:
+       bclr    IMM (31),d7
+       bra     Lcmpsf$2
+
+Lcmpf$inop:
+       movl    a6@(16),d0
+       moveq   IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+       moveq   IMM (SINGLE_FLOAT),d6
+       PICJUMP $_exception_handler
+
+| int __cmpsf2(float, float);
+       FUNC(__cmpsf2)
+SYM (__cmpsf2):
+       link    a6,IMM (0)
+       pea     1
+       movl    a6@(12),sp@-
+       movl    a6@(8),sp@-
+       PICCALL SYM (__cmpsf2_internal)
+       unlk    a6
+       rts
+
+|=============================================================================
+|                           rounding routines
+|=============================================================================
+
+| The rounding routines expect the number to be normalized in registers
+| d0-d1, with the exponent in register d2. They assume that the 
+| exponent is larger or equal to 1. They return a properly normalized number
+| if possible, and a denormalized number otherwise. The exponent is returned
+| in d2.
+
+Lround$to$nearest:
+| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"):
+| Here we assume that the exponent is not too small (this should be checked
+| before entering the rounding routine), but the number could be denormalized.
+
+| Check for denormalized numbers:
+1:     btst    IMM (FLT_MANT_DIG),d0
+       bne     2f              | if set the number is normalized
+| Normalize shifting left until bit #FLT_MANT_DIG is set or the exponent 
+| is one (remember that a denormalized number corresponds to an 
+| exponent of -F_BIAS+1).
+#ifndef __mcoldfire__
+       cmpw    IMM (1),d2      | remember that the exponent is at least one
+#else
+       cmpl    IMM (1),d2      | remember that the exponent is at least one
+#endif
+       beq     2f              | an exponent of one means denormalized
+       addl    d1,d1           | else shift and adjust the exponent
+       addxl   d0,d0           |
+#ifndef __mcoldfire__
+       dbra    d2,1b           |
+#else
+       subql   IMM (1),d2
+       bpl     1b
+#endif
+2:
+| Now round: we do it as follows: after the shifting we can write the
+| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2.
+| If delta < 1, do nothing. If delta > 1, add 1 to f. 
+| If delta == 1, we make sure the rounded number will be even (odd?) 
+| (after shifting).
+       btst    IMM (0),d0      | is delta < 1?
+       beq     2f              | if so, do not do anything
+       tstl    d1              | is delta == 1?
+       bne     1f              | if so round to even
+       movel   d0,d1           | 
+       andl    IMM (2),d1      | bit 1 is the last significant bit
+       addl    d1,d0           | 
+       bra     2f              | 
+1:     movel   IMM (1),d1      | else add 1 
+       addl    d1,d0           |
+| Shift right once (because we used bit #FLT_MANT_DIG!).
+2:     lsrl    IMM (1),d0              
+| Now check again bit #FLT_MANT_DIG (rounding could have produced a
+| 'fraction overflow' ...).
+       btst    IMM (FLT_MANT_DIG),d0   
+       beq     1f
+       lsrl    IMM (1),d0
+#ifndef __mcoldfire__
+       addw    IMM (1),d2
+#else
+       addql   IMM (1),d2
+#endif
+1:
+| If bit #FLT_MANT_DIG-1 is clear we have a denormalized number, so we 
+| have to put the exponent to zero and return a denormalized number.
+       btst    IMM (FLT_MANT_DIG-1),d0
+       beq     1f
+       jmp     a0@
+1:     movel   IMM (0),d2
+       jmp     a0@
+
+Lround$to$zero:
+Lround$to$plus:
+Lround$to$minus:
+       jmp     a0@
+#endif /* L_float */
+
+| gcc expects the routines __eqdf2, __nedf2, __gtdf2, __gedf2,
+| __ledf2, __ltdf2 to all return the same value as a direct call to
+| __cmpdf2 would.  In this implementation, each of these routines
+| simply calls __cmpdf2.  It would be more efficient to give the
+| __cmpdf2 routine several names, but separating them out will make it
+| easier to write efficient versions of these routines someday.
+| If the operands recompare unordered unordered __gtdf2 and __gedf2 return -1.
+| The other routines return 1.
+
+#ifdef  L_eqdf2
+       .text
+       FUNC(__eqdf2)
+       .globl  SYM (__eqdf2)
+SYM (__eqdf2):
+       link    a6,IMM (0)
+       pea     1
+       movl    a6@(20),sp@-
+       movl    a6@(16),sp@-
+       movl    a6@(12),sp@-
+       movl    a6@(8),sp@-
+       PICCALL SYM (__cmpdf2_internal)
+       unlk    a6
+       rts
+#endif /* L_eqdf2 */
+
+#ifdef  L_nedf2
+       .text
+       FUNC(__nedf2)
+       .globl  SYM (__nedf2)
+SYM (__nedf2):
+       link    a6,IMM (0)
+       pea     1
+       movl    a6@(20),sp@-
+       movl    a6@(16),sp@-
+       movl    a6@(12),sp@-
+       movl    a6@(8),sp@-
+       PICCALL SYM (__cmpdf2_internal)
+       unlk    a6
+       rts
+#endif /* L_nedf2 */
+
+#ifdef  L_gtdf2
+       .text
+       FUNC(__gtdf2)
+       .globl  SYM (__gtdf2)
+SYM (__gtdf2):
+       link    a6,IMM (0)
+       pea     -1
+       movl    a6@(20),sp@-
+       movl    a6@(16),sp@-
+       movl    a6@(12),sp@-
+       movl    a6@(8),sp@-
+       PICCALL SYM (__cmpdf2_internal)
+       unlk    a6
+       rts
+#endif /* L_gtdf2 */
+
+#ifdef  L_gedf2
+       .text
+       FUNC(__gedf2)
+       .globl  SYM (__gedf2)
+SYM (__gedf2):
+       link    a6,IMM (0)
+       pea     -1
+       movl    a6@(20),sp@-
+       movl    a6@(16),sp@-
+       movl    a6@(12),sp@-
+       movl    a6@(8),sp@-
+       PICCALL SYM (__cmpdf2_internal)
+       unlk    a6
+       rts
+#endif /* L_gedf2 */
+
+#ifdef  L_ltdf2
+       .text
+       FUNC(__ltdf2)
+       .globl  SYM (__ltdf2)
+SYM (__ltdf2):
+       link    a6,IMM (0)
+       pea     1
+       movl    a6@(20),sp@-
+       movl    a6@(16),sp@-
+       movl    a6@(12),sp@-
+       movl    a6@(8),sp@-
+       PICCALL SYM (__cmpdf2_internal)
+       unlk    a6
+       rts
+#endif /* L_ltdf2 */
+
+#ifdef  L_ledf2
+       .text
+       FUNC(__ledf2)
+       .globl  SYM (__ledf2)
+SYM (__ledf2):
+       link    a6,IMM (0)
+       pea     1
+       movl    a6@(20),sp@-
+       movl    a6@(16),sp@-
+       movl    a6@(12),sp@-
+       movl    a6@(8),sp@-
+       PICCALL SYM (__cmpdf2_internal)
+       unlk    a6
+       rts
+#endif /* L_ledf2 */
+
+| The comments above about __eqdf2, et. al., also apply to __eqsf2,
+| et. al., except that the latter call __cmpsf2 rather than __cmpdf2.
+
+#ifdef  L_eqsf2
+       .text
+       FUNC(__eqsf2)
+       .globl  SYM (__eqsf2)
+SYM (__eqsf2):
+       link    a6,IMM (0)
+       pea     1
+       movl    a6@(12),sp@-
+       movl    a6@(8),sp@-
+       PICCALL SYM (__cmpsf2_internal)
+       unlk    a6
+       rts
+#endif /* L_eqsf2 */
+
+#ifdef  L_nesf2
+       .text
+       FUNC(__nesf2)
+       .globl  SYM (__nesf2)
+SYM (__nesf2):
+       link    a6,IMM (0)
+       pea     1
+       movl    a6@(12),sp@-
+       movl    a6@(8),sp@-
+       PICCALL SYM (__cmpsf2_internal)
+       unlk    a6
+       rts
+#endif /* L_nesf2 */
+
+#ifdef  L_gtsf2
+       .text
+       FUNC(__gtsf2)
+       .globl  SYM (__gtsf2)
+SYM (__gtsf2):
+       link    a6,IMM (0)
+       pea     -1
+       movl    a6@(12),sp@-
+       movl    a6@(8),sp@-
+       PICCALL SYM (__cmpsf2_internal)
+       unlk    a6
+       rts
+#endif /* L_gtsf2 */
+
+#ifdef  L_gesf2
+       .text
+       FUNC(__gesf2)
+       .globl  SYM (__gesf2)
+SYM (__gesf2):
+       link    a6,IMM (0)
+       pea     -1
+       movl    a6@(12),sp@-
+       movl    a6@(8),sp@-
+       PICCALL SYM (__cmpsf2_internal)
+       unlk    a6
+       rts
+#endif /* L_gesf2 */
+
+#ifdef  L_ltsf2
+       .text
+       FUNC(__ltsf2)
+       .globl  SYM (__ltsf2)
+SYM (__ltsf2):
+       link    a6,IMM (0)
+       pea     1
+       movl    a6@(12),sp@-
+       movl    a6@(8),sp@-
+       PICCALL SYM (__cmpsf2_internal)
+       unlk    a6
+       rts
+#endif /* L_ltsf2 */
+
+#ifdef  L_lesf2
+       .text
+       FUNC(__lesf2)
+       .globl  SYM (__lesf2)
+SYM (__lesf2):
+       link    a6,IMM (0)
+       pea     1
+       movl    a6@(12),sp@-
+       movl    a6@(8),sp@-
+       PICCALL SYM (__cmpsf2_internal)
+       unlk    a6
+       rts
+#endif /* L_lesf2 */
+
+#if defined (__ELF__) && defined (__linux__)
+       /* Make stack non-executable for ELF linux targets.  */
+       .section        .note.GNU-stack,"",@progbits
+#endif
diff --git a/libgcc/config/m68k/t-floatlib b/libgcc/config/m68k/t-floatlib

new file mode 100644 (file)

index 0000000..4160eb9
--- /dev/null
+++ b/libgcc/config/m68k/t-floatlib
@@ -0,0 +1,5 @@
+LIB1ASMSRC = m68k/lb1sf68.S
+LIB1ASMFUNCS = _mulsi3 _udivsi3 _divsi3 _umodsi3 _modsi3 \
+   _double _float _floatex \
+   _eqdf2 _nedf2 _gtdf2 _gedf2 _ltdf2 _ledf2 \
+   _eqsf2 _nesf2 _gtsf2 _gesf2 _ltsf2 _lesf2
diff --git a/libgcc/config/mcore/lib1funcs.S b/libgcc/config/mcore/lib1funcs.S

new file mode 100644 (file)

index 0000000..701762f
--- /dev/null
+++ b/libgcc/config/mcore/lib1funcs.S
@@ -0,0 +1,303 @@
+/* libgcc routines for the MCore.
+   Copyright (C) 1993, 1999, 2000, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define CONCAT1(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+/* Use the right prefix for global labels.  */
+
+#define SYM(x) CONCAT1 (__, x)
+
+#ifdef __ELF__
+#define TYPE(x) .type SYM (x),@function
+#define SIZE(x) .size SYM (x), . - SYM (x)
+#else
+#define TYPE(x)
+#define SIZE(x)
+#endif
+
+.macro FUNC_START name
+       .text
+       .globl SYM (\name)
+       TYPE (\name)
+SYM (\name):
+.endm
+
+.macro FUNC_END name
+       SIZE (\name)
+.endm
+
+#ifdef L_udivsi3
+FUNC_START udiv32
+FUNC_START udivsi32
+
+       movi    r1,0            // r1-r2 form 64 bit dividend
+       movi    r4,1            // r4 is quotient (1 for a sentinel)
+
+       cmpnei  r3,0            // look for 0 divisor
+       bt      9f
+       trap    3               // divide by 0
+9:
+       // control iterations; skip across high order 0 bits in dividend
+       mov     r7,r2
+       cmpnei  r7,0
+       bt      8f
+       movi    r2,0            // 0 dividend
+       jmp     r15             // quick return
+8:
+       ff1     r7              // figure distance to skip
+       lsl     r4,r7           // move the sentinel along (with 0's behind)
+       lsl     r2,r7           // and the low 32 bits of numerator
+
+// appears to be wrong...
+// tested out incorrectly in our OS work...
+//     mov     r7,r3           // looking at divisor
+//     ff1     r7              // I can move 32-r7 more bits to left.
+//     addi    r7,1            // ok, one short of that...
+//     mov     r1,r2
+//     lsr     r1,r7           // bits that came from low order...
+//     rsubi   r7,31           // r7 == "32-n" == LEFT distance
+//     addi    r7,1            // this is (32-n)
+//     lsl     r4,r7           // fixes the high 32 (quotient)
+//     lsl     r2,r7
+//     cmpnei  r4,0
+//     bf      4f              // the sentinel went away...
+
+       // run the remaining bits
+
+1:     lslc    r2,1            // 1 bit left shift of r1-r2
+       addc    r1,r1
+       cmphs   r1,r3           // upper 32 of dividend >= divisor?
+       bf      2f
+       sub     r1,r3           // if yes, subtract divisor
+2:     addc    r4,r4           // shift by 1 and count subtracts
+       bf      1b              // if sentinel falls out of quotient, stop
+
+4:     mov     r2,r4           // return quotient
+       mov     r3,r1           // and piggyback the remainder
+       jmp     r15
+FUNC_END udiv32
+FUNC_END udivsi32
+#endif
+
+#ifdef L_umodsi3
+FUNC_START urem32
+FUNC_START umodsi3
+       movi    r1,0            // r1-r2 form 64 bit dividend
+       movi    r4,1            // r4 is quotient (1 for a sentinel)
+       cmpnei  r3,0            // look for 0 divisor
+       bt      9f
+       trap    3               // divide by 0
+9:
+       // control iterations; skip across high order 0 bits in dividend
+       mov     r7,r2
+       cmpnei  r7,0
+       bt      8f
+       movi    r2,0            // 0 dividend
+       jmp     r15             // quick return
+8:
+       ff1     r7              // figure distance to skip
+       lsl     r4,r7           // move the sentinel along (with 0's behind)
+       lsl     r2,r7           // and the low 32 bits of numerator
+
+1:     lslc    r2,1            // 1 bit left shift of r1-r2
+       addc    r1,r1
+       cmphs   r1,r3           // upper 32 of dividend >= divisor?
+       bf      2f
+       sub     r1,r3           // if yes, subtract divisor
+2:     addc    r4,r4           // shift by 1 and count subtracts
+       bf      1b              // if sentinel falls out of quotient, stop
+       mov     r2,r1           // return remainder
+       jmp     r15
+FUNC_END urem32
+FUNC_END umodsi3
+#endif
+
+#ifdef L_divsi3
+FUNC_START div32
+FUNC_START divsi3
+       mov     r5,r2           // calc sign of quotient
+       xor     r5,r3
+       abs     r2              // do unsigned divide
+       abs     r3
+       movi    r1,0            // r1-r2 form 64 bit dividend
+       movi    r4,1            // r4 is quotient (1 for a sentinel)
+       cmpnei  r3,0            // look for 0 divisor
+       bt      9f
+       trap    3               // divide by 0
+9:
+       // control iterations; skip across high order 0 bits in dividend
+       mov     r7,r2
+       cmpnei  r7,0
+       bt      8f
+       movi    r2,0            // 0 dividend
+       jmp     r15             // quick return
+8:
+       ff1     r7              // figure distance to skip
+       lsl     r4,r7           // move the sentinel along (with 0's behind)
+       lsl     r2,r7           // and the low 32 bits of numerator
+
+// tested out incorrectly in our OS work...
+//     mov     r7,r3           // looking at divisor
+//     ff1     r7              // I can move 32-r7 more bits to left.
+//     addi    r7,1            // ok, one short of that...
+//     mov     r1,r2
+//     lsr     r1,r7           // bits that came from low order...
+//     rsubi   r7,31           // r7 == "32-n" == LEFT distance
+//     addi    r7,1            // this is (32-n)
+//     lsl     r4,r7           // fixes the high 32 (quotient)
+//     lsl     r2,r7
+//     cmpnei  r4,0
+//     bf      4f              // the sentinel went away...
+
+       // run the remaining bits
+1:     lslc    r2,1            // 1 bit left shift of r1-r2
+       addc    r1,r1
+       cmphs   r1,r3           // upper 32 of dividend >= divisor?
+       bf      2f
+       sub     r1,r3           // if yes, subtract divisor
+2:     addc    r4,r4           // shift by 1 and count subtracts
+       bf      1b              // if sentinel falls out of quotient, stop
+
+4:     mov     r2,r4           // return quotient
+       mov     r3,r1           // piggyback the remainder
+       btsti   r5,31           // after adjusting for sign
+       bf      3f
+       rsubi   r2,0
+       rsubi   r3,0
+3:     jmp     r15
+FUNC_END div32
+FUNC_END divsi3
+#endif
+
+#ifdef L_modsi3
+FUNC_START rem32
+FUNC_START modsi3
+       mov     r5,r2           // calc sign of remainder
+       abs     r2              // do unsigned divide
+       abs     r3
+       movi    r1,0            // r1-r2 form 64 bit dividend
+       movi    r4,1            // r4 is quotient (1 for a sentinel)
+       cmpnei  r3,0            // look for 0 divisor
+       bt      9f
+       trap    3               // divide by 0
+9: 
+       // control iterations; skip across high order 0 bits in dividend
+       mov     r7,r2
+       cmpnei  r7,0
+       bt      8f
+       movi    r2,0            // 0 dividend
+       jmp     r15             // quick return
+8:
+       ff1     r7              // figure distance to skip
+       lsl     r4,r7           // move the sentinel along (with 0's behind)
+       lsl     r2,r7           // and the low 32 bits of numerator
+
+1:     lslc    r2,1            // 1 bit left shift of r1-r2
+       addc    r1,r1
+       cmphs   r1,r3           // upper 32 of dividend >= divisor?
+       bf      2f
+       sub     r1,r3           // if yes, subtract divisor
+2:     addc    r4,r4           // shift by 1 and count subtracts
+       bf      1b              // if sentinel falls out of quotient, stop
+       mov     r2,r1           // return remainder
+       btsti   r5,31           // after adjusting for sign
+       bf      3f
+       rsubi   r2,0
+3:     jmp     r15
+FUNC_END rem32
+FUNC_END modsi3
+#endif
+
+
+/* GCC expects that {__eq,__ne,__gt,__ge,__le,__lt}{df2,sf2}
+   will behave as __cmpdf2. So, we stub the implementations to
+   jump on to __cmpdf2 and __cmpsf2.
+ 
+   All of these shortcircuit the return path so that __cmp{sd}f2
+   will go directly back to the caller.  */
+
+.macro  COMPARE_DF_JUMP name
+       .import SYM (cmpdf2)
+FUNC_START \name
+       jmpi SYM (cmpdf2)
+FUNC_END \name
+.endm
+               
+#ifdef  L_eqdf2
+COMPARE_DF_JUMP eqdf2
+#endif /* L_eqdf2 */
+
+#ifdef  L_nedf2
+COMPARE_DF_JUMP nedf2
+#endif /* L_nedf2 */
+
+#ifdef  L_gtdf2
+COMPARE_DF_JUMP gtdf2
+#endif /* L_gtdf2 */
+
+#ifdef  L_gedf2
+COMPARE_DF_JUMP gedf2
+#endif /* L_gedf2 */
+
+#ifdef  L_ltdf2
+COMPARE_DF_JUMP ltdf2
+#endif /* L_ltdf2 */
+       
+#ifdef  L_ledf2
+COMPARE_DF_JUMP ledf2
+#endif /* L_ledf2 */
+
+/* SINGLE PRECISION FLOATING POINT STUBS */
+
+.macro  COMPARE_SF_JUMP name
+       .import SYM (cmpsf2)
+FUNC_START \name
+       jmpi SYM (cmpsf2)
+FUNC_END \name
+.endm
+               
+#ifdef  L_eqsf2
+COMPARE_SF_JUMP eqsf2
+#endif /* L_eqsf2 */
+       
+#ifdef  L_nesf2
+COMPARE_SF_JUMP nesf2
+#endif /* L_nesf2 */
+       
+#ifdef  L_gtsf2
+COMPARE_SF_JUMP gtsf2
+#endif /* L_gtsf2 */
+       
+#ifdef  L_gesf2
+COMPARE_SF_JUMP __gesf2
+#endif /* L_gesf2 */
+       
+#ifdef  L_ltsf2
+COMPARE_SF_JUMP __ltsf2
+#endif /* L_ltsf2 */
+       
+#ifdef  L_lesf2
+COMPARE_SF_JUMP lesf2
+#endif /* L_lesf2 */
diff --git a/libgcc/config/mcore/t-mcore b/libgcc/config/mcore/t-mcore

new file mode 100644 (file)

index 0000000..19c4c15
--- /dev/null
+++ b/libgcc/config/mcore/t-mcore
@@ -0,0 +1,2 @@
+LIB1ASMSRC    = mcore/lib1funcs.S
+LIB1ASMFUNCS  = _divsi3 _udivsi3 _modsi3 _umodsi3
diff --git a/libgcc/config/mep/lib1funcs.S b/libgcc/config/mep/lib1funcs.S

new file mode 100644 (file)

index 0000000..0a18913
--- /dev/null
+++ b/libgcc/config/mep/lib1funcs.S
@@ -0,0 +1,125 @@
+/* libgcc routines for Toshiba Media Processor.
+   Copyright (C) 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+  
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+  
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define SAVEALL \
+       add3    $sp, $sp, -16*4 ; \
+       sw      $0, ($sp) ; \
+       sw      $1, 4($sp) ; \
+       sw      $2, 8($sp) ; \
+       sw      $3, 12($sp) ; \
+       sw      $4, 16($sp) ; \
+       sw      $5, 20($sp) ; \
+       sw      $6, 24($sp) ; \
+       sw      $7, 28($sp) ; \
+       sw      $8, 32($sp) ; \
+       sw      $9, 36($sp) ; \
+       sw      $10, 40($sp) ; \
+       sw      $11, 44($sp) ; \
+       sw      $12, 48($sp) ; \
+       sw      $13, 52($sp) ; \
+       sw      $14, 56($sp) ; \
+       ldc     $5, $lp ; \
+       add     $5, 3 ; \
+       mov     $6, -4 ; \
+       and     $5, $6
+
+#define RESTOREALL \
+       stc     $5, $lp ; \
+       lw      $14, 56($sp) ; \
+       lw      $13, 52($sp) ; \
+       lw      $12, 48($sp) ; \
+       lw      $11, 44($sp) ; \
+       lw      $10, 40($sp) ; \
+       lw      $9, 36($sp) ; \
+       lw      $8, 32($sp) ; \
+       lw      $7, 28($sp) ; \
+       lw      $6, 24($sp) ; \
+       lw      $5, 20($sp) ; \
+       lw      $4, 16($sp) ; \
+       lw      $3, 12($sp) ; \
+       lw      $2, 8($sp) ; \
+       lw      $1, 4($sp) ; \
+       lw      $0, ($sp) ; \
+       add3    $sp, $sp, 16*4 ; \
+       ret
+
+#ifdef L_mep_profile
+       .text
+       .global __mep_mcount
+__mep_mcount:
+       SAVEALL
+       ldc     $1, $lp
+       mov     $2, $0
+       bsr     __mep_mcount_2
+       RESTOREALL
+#endif
+
+#ifdef L_mep_bb_init_trace
+       .text
+       .global __mep_bb_init_trace_func
+__mep_bb_init_trace_func:
+       SAVEALL
+       lw      $1, ($5)
+       lw      $2, 4($5)
+       add     $5, 8
+       bsr     __bb_init_trace_func
+       RESTOREALL
+#endif
+
+#ifdef L_mep_bb_init
+       .text
+       .global __mep_bb_init_func
+__mep_bb_init_func:
+       SAVEALL
+       lw      $1, ($5)
+       add     $5, 4
+       bsr     __bb_init_func
+       RESTOREALL
+#endif
+
+#ifdef L_mep_bb_trace
+       .text
+       .global __mep_bb_trace_func
+__mep_bb_trace_func:
+       SAVEALL
+       movu    $3, __bb
+       lw      $1, ($5)
+       sw      $1, ($3)
+       lw      $2, 4($5)
+       sw      $2, 4($3)
+       add     $5, 8
+       bsr     __bb_trace_func
+       RESTOREALL
+#endif
+
+#ifdef L_mep_bb_increment
+       .text
+       .global __mep_bb_increment_func
+__mep_bb_increment_func:
+       SAVEALL
+       lw      $1, ($5)
+       lw      $0, ($1)
+       add     $0, 1
+       sw      $0, ($1)
+       add     $5, 4
+       RESTOREALL
+#endif
diff --git a/libgcc/config/mep/t-mep b/libgcc/config/mep/t-mep

index 36e6f5dc7715c56f6be0cb6bd43a1c1226d48440..d1fb094a41e43e1f82e6ba94b24f0cd5a1167e94 100644 (file)
--- a/libgcc/config/mep/t-mep
+++ b/libgcc/config/mep/t-mep
@@ -1,2 +1,11 @@
+# profiling support
+LIB1ASMSRC = mep/lib1funcs.S
+
+LIB1ASMFUNCS = _mep_profile \
+              _mep_bb_init_trace \
+              _mep_bb_init \
+              _mep_bb_trace \
+              _mep_bb_increment
+
  # Use -O0 instead of -O2 so we don't get complex relocations
  CRTSTUFF_CFLAGS += -O0
diff --git a/libgcc/config/mips/mips16.S b/libgcc/config/mips/mips16.S

new file mode 100644 (file)

index 0000000..ec331b5
--- /dev/null
+++ b/libgcc/config/mips/mips16.S
@@ -0,0 +1,712 @@
+/* mips16 floating point support code
+   Copyright (C) 1996, 1997, 1998, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Cygnus Support
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* This file contains mips16 floating point support functions.  These
+   functions are called by mips16 code to handle floating point when
+   -msoft-float is not used.  They accept the arguments and return
+   values using the soft-float calling convention, but do the actual
+   operation using the hard floating point instructions.  */
+
+#if defined _MIPS_SIM && (_MIPS_SIM == _ABIO32 || _MIPS_SIM == _ABIO64)
+
+/* This file contains 32-bit assembly code.  */
+       .set nomips16
+
+/* Start a function.  */
+
+#define STARTFN(NAME) .globl NAME; .ent NAME; NAME:
+
+/* Finish a function.  */
+
+#define ENDFN(NAME) .end NAME
+
+/* ARG1
+       The FPR that holds the first floating-point argument.
+
+   ARG2
+       The FPR that holds the second floating-point argument.
+
+   RET
+       The FPR that holds a floating-point return value.  */
+
+#define RET $f0
+#define ARG1 $f12
+#ifdef __mips64
+#define ARG2 $f13
+#else
+#define ARG2 $f14
+#endif
+
+/* Set 64-bit register GPR so that its high 32 bits contain HIGH_FPR
+   and so that its low 32 bits contain LOW_FPR.  */
+#define MERGE_GPRf(GPR, HIGH_FPR, LOW_FPR)     \
+       .set    noat;                           \
+       mfc1    $1, LOW_FPR;                    \
+       mfc1    GPR, HIGH_FPR;                  \
+       dsll    $1, $1, 32;                     \
+       dsll    GPR, GPR, 32;                   \
+       dsrl    $1, $1, 32;                     \
+       or      GPR, GPR, $1;                   \
+       .set    at
+
+/* Move the high 32 bits of GPR to HIGH_FPR and the low 32 bits of
+   GPR to LOW_FPR.  */
+#define MERGE_GPRt(GPR, HIGH_FPR, LOW_FPR)     \
+       .set    noat;                           \
+       dsrl    $1, GPR, 32;                    \
+       mtc1    GPR, LOW_FPR;                   \
+       mtc1    $1, HIGH_FPR;                   \
+       .set    at
+
+/* Jump to T, and use "OPCODE, OP2" to implement a delayed move.  */
+#define DELAYt(T, OPCODE, OP2)                 \
+       .set    noreorder;                      \
+       jr      T;                              \
+       OPCODE, OP2;                            \
+       .set    reorder
+
+/* Use "OPCODE. OP2" and jump to T.  */
+#define DELAYf(T, OPCODE, OP2) OPCODE, OP2; jr T
+
+/* MOVE_SF_BYTE0(D)
+       Move the first single-precision floating-point argument between
+       GPRs and FPRs.
+
+   MOVE_SI_BYTE0(D)
+       Likewise the first single-precision integer argument.
+
+   MOVE_SF_BYTE4(D)
+       Move the second single-precision floating-point argument between
+       GPRs and FPRs, given that the first argument occupies 4 bytes.
+
+   MOVE_SF_BYTE8(D)
+       Move the second single-precision floating-point argument between
+       GPRs and FPRs, given that the first argument occupies 8 bytes.
+
+   MOVE_DF_BYTE0(D)
+       Move the first double-precision floating-point argument between
+       GPRs and FPRs.
+
+   MOVE_DF_BYTE8(D)
+       Likewise the second double-precision floating-point argument.
+
+   MOVE_SF_RET(D, T)
+       Likewise a single-precision floating-point return value,
+       then jump to T.
+
+   MOVE_SC_RET(D, T)
+       Likewise a complex single-precision floating-point return value.
+
+   MOVE_DF_RET(D, T)
+       Likewise a double-precision floating-point return value.
+
+   MOVE_DC_RET(D, T)
+       Likewise a complex double-precision floating-point return value.
+
+   MOVE_SI_RET(D, T)
+       Likewise a single-precision integer return value.
+
+   The D argument is "t" to move to FPRs and "f" to move from FPRs.
+   The return macros may assume that the target of the jump does not
+   use a floating-point register.  */
+
+#define MOVE_SF_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0)
+#define MOVE_SI_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0)
+
+#if defined(__mips64) && defined(__MIPSEB__)
+#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f0, $f1); jr T
+#elif defined(__mips64)
+/* The high 32 bits of $2 correspond to the second word in memory;
+   i.e. the imaginary part.  */
+#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f1, $f0); jr T
+#elif __mips_fpr == 64
+#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1)
+#else
+#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f2)
+#endif
+
+#if defined(__mips64)
+#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12
+#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f13
+#define MOVE_SF_BYTE8(D) m##D##c1 $5,$f13
+#else
+#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12
+#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f14
+#define MOVE_SF_BYTE8(D) m##D##c1 $6,$f14
+#endif
+#define MOVE_SI_BYTE0(D) MOVE_SF_BYTE0(D)
+
+#if defined(__mips64)
+#define MOVE_DF_BYTE0(D) dm##D##c1 $4,$f12
+#define MOVE_DF_BYTE8(D) dm##D##c1 $5,$f13
+#define MOVE_DF_RET(D, T) DELAY##D (T, dm##D##c1 $2,$f0)
+#define MOVE_DC_RET(D, T) dm##D##c1 $3,$f1; MOVE_DF_RET (D, T)
+#elif __mips_fpr == 64 && defined(__MIPSEB__)
+#define MOVE_DF_BYTE0(D) m##D##c1 $5,$f12; m##D##hc1 $4,$f12
+#define MOVE_DF_BYTE8(D) m##D##c1 $7,$f14; m##D##hc1 $6,$f14
+#define MOVE_DF_RET(D, T) m##D##c1 $3,$f0; DELAY##D (T, m##D##hc1 $2,$f0)
+#define MOVE_DC_RET(D, T) m##D##c1 $5,$f1; m##D##hc1 $4,$f1; MOVE_DF_RET (D, T)
+#elif __mips_fpr == 64
+#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##hc1 $5,$f12
+#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##hc1 $7,$f14
+#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##hc1 $3,$f0)
+#define MOVE_DC_RET(D, T) m##D##c1 $4,$f1; m##D##hc1 $5,$f1; MOVE_DF_RET (D, T)
+#elif defined(__MIPSEB__)
+/* FPRs are little-endian.  */
+#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f13; m##D##c1 $5,$f12
+#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f15; m##D##c1 $7,$f14
+#define MOVE_DF_RET(D, T) m##D##c1 $2,$f1; DELAY##D (T, m##D##c1 $3,$f0)
+#define MOVE_DC_RET(D, T) m##D##c1 $4,$f3; m##D##c1 $5,$f2; MOVE_DF_RET (D, T)
+#else
+#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##c1 $5,$f13
+#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##c1 $7,$f15
+#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1)
+#define MOVE_DC_RET(D, T) m##D##c1 $4,$f2; m##D##c1 $5,$f3; MOVE_DF_RET (D, T)
+#endif
+
+/* Single-precision math.  */
+
+/* Define a function NAME that loads two single-precision values,
+   performs FPU operation OPCODE on them, and returns the single-
+   precision result.  */
+
+#define OPSF3(NAME, OPCODE)    \
+STARTFN (NAME);                        \
+       MOVE_SF_BYTE0 (t);      \
+       MOVE_SF_BYTE4 (t);      \
+       OPCODE  RET,ARG1,ARG2;  \
+       MOVE_SF_RET (f, $31);   \
+       ENDFN (NAME)
+
+#ifdef L_m16addsf3
+OPSF3 (__mips16_addsf3, add.s)
+#endif
+#ifdef L_m16subsf3
+OPSF3 (__mips16_subsf3, sub.s)
+#endif
+#ifdef L_m16mulsf3
+OPSF3 (__mips16_mulsf3, mul.s)
+#endif
+#ifdef L_m16divsf3
+OPSF3 (__mips16_divsf3, div.s)
+#endif
+
+/* Define a function NAME that loads a single-precision value,
+   performs FPU operation OPCODE on it, and returns the single-
+   precision result.  */
+
+#define OPSF2(NAME, OPCODE)    \
+STARTFN (NAME);                        \
+       MOVE_SF_BYTE0 (t);      \
+       OPCODE  RET,ARG1;       \
+       MOVE_SF_RET (f, $31);   \
+       ENDFN (NAME)
+
+#ifdef L_m16negsf2
+OPSF2 (__mips16_negsf2, neg.s)
+#endif
+#ifdef L_m16abssf2
+OPSF2 (__mips16_abssf2, abs.s)
+#endif
+
+/* Single-precision comparisons.  */
+
+/* Define a function NAME that loads two single-precision values,
+   performs floating point comparison OPCODE, and returns TRUE or
+   FALSE depending on the result.  */
+
+#define CMPSF(NAME, OPCODE, TRUE, FALSE)       \
+STARTFN (NAME);                                        \
+       MOVE_SF_BYTE0 (t);                      \
+       MOVE_SF_BYTE4 (t);                      \
+       OPCODE  ARG1,ARG2;                      \
+       li      $2,TRUE;                        \
+       bc1t    1f;                             \
+       li      $2,FALSE;                       \
+1:;                                            \
+       j       $31;                            \
+       ENDFN (NAME)
+
+/* Like CMPSF, but reverse the comparison operands.  */
+
+#define REVCMPSF(NAME, OPCODE, TRUE, FALSE)    \
+STARTFN (NAME);                                        \
+       MOVE_SF_BYTE0 (t);                      \
+       MOVE_SF_BYTE4 (t);                      \
+       OPCODE  ARG2,ARG1;                      \
+       li      $2,TRUE;                        \
+       bc1t    1f;                             \
+       li      $2,FALSE;                       \
+1:;                                            \
+       j       $31;                            \
+       ENDFN (NAME)
+
+#ifdef L_m16eqsf2
+CMPSF (__mips16_eqsf2, c.eq.s, 0, 1)
+#endif
+#ifdef L_m16nesf2
+CMPSF (__mips16_nesf2, c.eq.s, 0, 1)
+#endif
+#ifdef L_m16gtsf2
+REVCMPSF (__mips16_gtsf2, c.lt.s, 1, 0)
+#endif
+#ifdef L_m16gesf2
+REVCMPSF (__mips16_gesf2, c.le.s, 0, -1)
+#endif
+#ifdef L_m16lesf2
+CMPSF (__mips16_lesf2, c.le.s, 0, 1)
+#endif
+#ifdef L_m16ltsf2
+CMPSF (__mips16_ltsf2, c.lt.s, -1, 0)
+#endif
+#ifdef L_m16unordsf2
+CMPSF(__mips16_unordsf2, c.un.s, 1, 0)
+#endif
+
+
+/* Single-precision conversions.  */
+
+#ifdef L_m16fltsisf
+STARTFN (__mips16_floatsisf)
+       MOVE_SF_BYTE0 (t)
+       cvt.s.w RET,ARG1
+       MOVE_SF_RET (f, $31)
+       ENDFN (__mips16_floatsisf)
+#endif
+
+#ifdef L_m16fltunsisf
+STARTFN (__mips16_floatunsisf)
+       .set    noreorder
+       bltz    $4,1f
+       MOVE_SF_BYTE0 (t)
+       .set    reorder
+       cvt.s.w RET,ARG1
+       MOVE_SF_RET (f, $31)
+1:             
+       and     $2,$4,1
+       srl     $3,$4,1
+       or      $2,$2,$3
+       mtc1    $2,RET
+       cvt.s.w RET,RET
+       add.s   RET,RET,RET
+       MOVE_SF_RET (f, $31)
+       ENDFN (__mips16_floatunsisf)
+#endif
+       
+#ifdef L_m16fix_truncsfsi
+STARTFN (__mips16_fix_truncsfsi)
+       MOVE_SF_BYTE0 (t)
+       trunc.w.s RET,ARG1,$4
+       MOVE_SI_RET (f, $31)
+       ENDFN (__mips16_fix_truncsfsi)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+
+/* Double-precision math.  */
+
+/* Define a function NAME that loads two double-precision values,
+   performs FPU operation OPCODE on them, and returns the double-
+   precision result.  */
+
+#define OPDF3(NAME, OPCODE)    \
+STARTFN (NAME);                        \
+       MOVE_DF_BYTE0 (t);      \
+       MOVE_DF_BYTE8 (t);      \
+       OPCODE RET,ARG1,ARG2;   \
+       MOVE_DF_RET (f, $31);   \
+       ENDFN (NAME)
+
+#ifdef L_m16adddf3
+OPDF3 (__mips16_adddf3, add.d)
+#endif
+#ifdef L_m16subdf3
+OPDF3 (__mips16_subdf3, sub.d)
+#endif
+#ifdef L_m16muldf3
+OPDF3 (__mips16_muldf3, mul.d)
+#endif
+#ifdef L_m16divdf3
+OPDF3 (__mips16_divdf3, div.d)
+#endif
+
+/* Define a function NAME that loads a double-precision value,
+   performs FPU operation OPCODE on it, and returns the double-
+   precision result.  */
+
+#define OPDF2(NAME, OPCODE)    \
+STARTFN (NAME);                        \
+       MOVE_DF_BYTE0 (t);      \
+       OPCODE RET,ARG1;        \
+       MOVE_DF_RET (f, $31);   \
+       ENDFN (NAME)
+
+#ifdef L_m16negdf2
+OPDF2 (__mips16_negdf2, neg.d)
+#endif
+#ifdef L_m16absdf2
+OPDF2 (__mips16_absdf2, abs.d)
+#endif
+
+/* Conversions between single and double precision.  */
+
+#ifdef L_m16extsfdf2
+STARTFN (__mips16_extendsfdf2)
+       MOVE_SF_BYTE0 (t)
+       cvt.d.s RET,ARG1
+       MOVE_DF_RET (f, $31)
+       ENDFN (__mips16_extendsfdf2)
+#endif
+
+#ifdef L_m16trdfsf2
+STARTFN (__mips16_truncdfsf2)
+       MOVE_DF_BYTE0 (t)
+       cvt.s.d RET,ARG1
+       MOVE_SF_RET (f, $31)
+       ENDFN (__mips16_truncdfsf2)
+#endif
+
+/* Double-precision comparisons.  */
+
+/* Define a function NAME that loads two double-precision values,
+   performs floating point comparison OPCODE, and returns TRUE or
+   FALSE depending on the result.  */
+
+#define CMPDF(NAME, OPCODE, TRUE, FALSE)       \
+STARTFN (NAME);                                        \
+       MOVE_DF_BYTE0 (t);                      \
+       MOVE_DF_BYTE8 (t);                      \
+       OPCODE  ARG1,ARG2;                      \
+       li      $2,TRUE;                        \
+       bc1t    1f;                             \
+       li      $2,FALSE;                       \
+1:;                                            \
+       j       $31;                            \
+       ENDFN (NAME)
+
+/* Like CMPDF, but reverse the comparison operands.  */
+
+#define REVCMPDF(NAME, OPCODE, TRUE, FALSE)    \
+STARTFN (NAME);                                        \
+       MOVE_DF_BYTE0 (t);                      \
+       MOVE_DF_BYTE8 (t);                      \
+       OPCODE  ARG2,ARG1;                      \
+       li      $2,TRUE;                        \
+       bc1t    1f;                             \
+       li      $2,FALSE;                       \
+1:;                                            \
+       j       $31;                            \
+       ENDFN (NAME)
+
+#ifdef L_m16eqdf2
+CMPDF (__mips16_eqdf2, c.eq.d, 0, 1)
+#endif
+#ifdef L_m16nedf2
+CMPDF (__mips16_nedf2, c.eq.d, 0, 1)
+#endif
+#ifdef L_m16gtdf2
+REVCMPDF (__mips16_gtdf2, c.lt.d, 1, 0)
+#endif
+#ifdef L_m16gedf2
+REVCMPDF (__mips16_gedf2, c.le.d, 0, -1)
+#endif
+#ifdef L_m16ledf2
+CMPDF (__mips16_ledf2, c.le.d, 0, 1)
+#endif
+#ifdef L_m16ltdf2
+CMPDF (__mips16_ltdf2, c.lt.d, -1, 0)
+#endif
+#ifdef L_m16unorddf2
+CMPDF(__mips16_unorddf2, c.un.d, 1, 0)
+#endif
+
+/* Double-precision conversions.  */
+
+#ifdef L_m16fltsidf
+STARTFN (__mips16_floatsidf)
+       MOVE_SI_BYTE0 (t)
+       cvt.d.w RET,ARG1
+       MOVE_DF_RET (f, $31)
+       ENDFN (__mips16_floatsidf)
+#endif
+       
+#ifdef L_m16fltunsidf
+STARTFN (__mips16_floatunsidf)
+       MOVE_SI_BYTE0 (t)
+       cvt.d.w RET,ARG1
+       bgez    $4,1f
+       li.d    ARG1, 4.294967296e+9
+       add.d   RET, RET, ARG1
+1:     MOVE_DF_RET (f, $31)
+       ENDFN (__mips16_floatunsidf)
+#endif
+       
+#ifdef L_m16fix_truncdfsi
+STARTFN (__mips16_fix_truncdfsi)
+       MOVE_DF_BYTE0 (t)
+       trunc.w.d RET,ARG1,$4
+       MOVE_SI_RET (f, $31)
+       ENDFN (__mips16_fix_truncdfsi)
+#endif
+#endif /* !__mips_single_float */
+
+/* Define a function NAME that moves a return value of mode MODE from
+   FPRs to GPRs.  */
+
+#define RET_FUNCTION(NAME, MODE)       \
+STARTFN (NAME);                                \
+       MOVE_##MODE##_RET (t, $31);     \
+       ENDFN (NAME)
+
+#ifdef L_m16retsf
+RET_FUNCTION (__mips16_ret_sf, SF)
+#endif
+
+#ifdef L_m16retsc
+RET_FUNCTION (__mips16_ret_sc, SC)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16retdf
+RET_FUNCTION (__mips16_ret_df, DF)
+#endif
+
+#ifdef L_m16retdc
+RET_FUNCTION (__mips16_ret_dc, DC)
+#endif
+#endif /* !__mips_single_float */
+
+/* STUB_ARGS_X copies the arguments from GPRs to FPRs for argument
+   code X.  X is calculated as ARG1 + ARG2 * 4, where ARG1 and ARG2
+   classify the first and second arguments as follows:
+
+       1: a single-precision argument
+       2: a double-precision argument
+       0: no argument, or not one of the above.  */
+
+#define STUB_ARGS_0                                            /* () */
+#define STUB_ARGS_1 MOVE_SF_BYTE0 (t)                          /* (sf) */
+#define STUB_ARGS_5 MOVE_SF_BYTE0 (t); MOVE_SF_BYTE4 (t)       /* (sf, sf) */
+#define STUB_ARGS_9 MOVE_SF_BYTE0 (t); MOVE_DF_BYTE8 (t)       /* (sf, df) */
+#define STUB_ARGS_2 MOVE_DF_BYTE0 (t)                          /* (df) */
+#define STUB_ARGS_6 MOVE_DF_BYTE0 (t); MOVE_SF_BYTE8 (t)       /* (df, sf) */
+#define STUB_ARGS_10 MOVE_DF_BYTE0 (t); MOVE_DF_BYTE8 (t)      /* (df, df) */
+
+/* These functions are used by 16-bit code when calling via a function
+   pointer.  They must copy the floating point arguments from the GPRs
+   to FPRs and then call function $2.  */
+
+#define CALL_STUB_NO_RET(NAME, CODE)   \
+STARTFN (NAME);                                \
+       STUB_ARGS_##CODE;               \
+       .set    noreorder;              \
+       jr      $2;                     \
+       move    $25,$2;                 \
+       .set    reorder;                \
+       ENDFN (NAME)
+
+#ifdef L_m16stub1
+CALL_STUB_NO_RET (__mips16_call_stub_1, 1)
+#endif
+
+#ifdef L_m16stub5
+CALL_STUB_NO_RET (__mips16_call_stub_5, 5)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+
+#ifdef L_m16stub2
+CALL_STUB_NO_RET (__mips16_call_stub_2, 2)
+#endif
+
+#ifdef L_m16stub6
+CALL_STUB_NO_RET (__mips16_call_stub_6, 6)
+#endif
+
+#ifdef L_m16stub9
+CALL_STUB_NO_RET (__mips16_call_stub_9, 9)
+#endif
+
+#ifdef L_m16stub10
+CALL_STUB_NO_RET (__mips16_call_stub_10, 10)
+#endif
+#endif /* !__mips_single_float */
+
+/* Now we have the same set of functions, except that this time the
+   function being called returns an SFmode, SCmode, DFmode or DCmode
+   value; we need to instantiate a set for each case.  The calling
+   function will arrange to preserve $18, so these functions are free
+   to use it to hold the return address.
+
+   Note that we do not know whether the function we are calling is 16
+   bit or 32 bit.  However, it does not matter, because 16-bit
+   functions always return floating point values in both the gp and
+   the fp regs.  It would be possible to check whether the function
+   being called is 16 bits, in which case the copy is unnecessary;
+   however, it's faster to always do the copy.  */
+
+#define CALL_STUB_RET(NAME, CODE, MODE)        \
+STARTFN (NAME);                                \
+       move    $18,$31;                \
+       STUB_ARGS_##CODE;               \
+       .set    noreorder;              \
+       jalr    $2;                     \
+       move    $25,$2;                 \
+       .set    reorder;                \
+       MOVE_##MODE##_RET (f, $18);     \
+       ENDFN (NAME)
+
+/* First, instantiate the single-float set.  */
+
+#ifdef L_m16stubsf0
+CALL_STUB_RET (__mips16_call_stub_sf_0, 0, SF)
+#endif
+
+#ifdef L_m16stubsf1
+CALL_STUB_RET (__mips16_call_stub_sf_1, 1, SF)
+#endif
+
+#ifdef L_m16stubsf5
+CALL_STUB_RET (__mips16_call_stub_sf_5, 5, SF)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16stubsf2
+CALL_STUB_RET (__mips16_call_stub_sf_2, 2, SF)
+#endif
+
+#ifdef L_m16stubsf6
+CALL_STUB_RET (__mips16_call_stub_sf_6, 6, SF)
+#endif
+
+#ifdef L_m16stubsf9
+CALL_STUB_RET (__mips16_call_stub_sf_9, 9, SF)
+#endif
+
+#ifdef L_m16stubsf10
+CALL_STUB_RET (__mips16_call_stub_sf_10, 10, SF)
+#endif
+#endif /* !__mips_single_float */
+
+
+/* Now we have the same set of functions again, except that this time
+   the function being called returns an DFmode value.  */
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16stubdf0
+CALL_STUB_RET (__mips16_call_stub_df_0, 0, DF)
+#endif
+
+#ifdef L_m16stubdf1
+CALL_STUB_RET (__mips16_call_stub_df_1, 1, DF)
+#endif
+
+#ifdef L_m16stubdf5
+CALL_STUB_RET (__mips16_call_stub_df_5, 5, DF)
+#endif
+
+#ifdef L_m16stubdf2
+CALL_STUB_RET (__mips16_call_stub_df_2, 2, DF)
+#endif
+
+#ifdef L_m16stubdf6
+CALL_STUB_RET (__mips16_call_stub_df_6, 6, DF)
+#endif
+
+#ifdef L_m16stubdf9
+CALL_STUB_RET (__mips16_call_stub_df_9, 9, DF)
+#endif
+
+#ifdef L_m16stubdf10
+CALL_STUB_RET (__mips16_call_stub_df_10, 10, DF)
+#endif
+#endif /* !__mips_single_float */
+
+
+/* Ho hum.  Here we have the same set of functions again, this time
+   for when the function being called returns an SCmode value.  */
+
+#ifdef L_m16stubsc0
+CALL_STUB_RET (__mips16_call_stub_sc_0, 0, SC)
+#endif
+
+#ifdef L_m16stubsc1
+CALL_STUB_RET (__mips16_call_stub_sc_1, 1, SC)
+#endif
+
+#ifdef L_m16stubsc5
+CALL_STUB_RET (__mips16_call_stub_sc_5, 5, SC)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16stubsc2
+CALL_STUB_RET (__mips16_call_stub_sc_2, 2, SC)
+#endif
+
+#ifdef L_m16stubsc6
+CALL_STUB_RET (__mips16_call_stub_sc_6, 6, SC)
+#endif
+
+#ifdef L_m16stubsc9
+CALL_STUB_RET (__mips16_call_stub_sc_9, 9, SC)
+#endif
+
+#ifdef L_m16stubsc10
+CALL_STUB_RET (__mips16_call_stub_sc_10, 10, SC)
+#endif
+#endif /* !__mips_single_float */
+
+
+/* Finally, another set of functions for DCmode.  */
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16stubdc0
+CALL_STUB_RET (__mips16_call_stub_dc_0, 0, DC)
+#endif
+
+#ifdef L_m16stubdc1
+CALL_STUB_RET (__mips16_call_stub_dc_1, 1, DC)
+#endif
+
+#ifdef L_m16stubdc5
+CALL_STUB_RET (__mips16_call_stub_dc_5, 5, DC)
+#endif
+
+#ifdef L_m16stubdc2
+CALL_STUB_RET (__mips16_call_stub_dc_2, 2, DC)
+#endif
+
+#ifdef L_m16stubdc6
+CALL_STUB_RET (__mips16_call_stub_dc_6, 6, DC)
+#endif
+
+#ifdef L_m16stubdc9
+CALL_STUB_RET (__mips16_call_stub_dc_9, 9, DC)
+#endif
+
+#ifdef L_m16stubdc10
+CALL_STUB_RET (__mips16_call_stub_dc_10, 10, DC)
+#endif
+#endif /* !__mips_single_float */
+#endif
diff --git a/libgcc/config/mips/t-mips16 b/libgcc/config/mips/t-mips16

index 46c7472f5f69d421c8e58e1b5b912a3d3fd757c7..5553ed76e2d5a5602fcea8bdbd8b43a04838155c 100644 (file)
--- a/libgcc/config/mips/t-mips16
+++ b/libgcc/config/mips/t-mips16
@@ -1,3 +1,43 @@
+# Copyright (C) 2007, 2008, 2011 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC = mips/mips16.S
+LIB1ASMFUNCS = _m16addsf3 _m16subsf3 _m16mulsf3 _m16divsf3 \
+       _m16eqsf2 _m16nesf2 _m16gtsf2 _m16gesf2 _m16lesf2 _m16ltsf2 \
+       _m16unordsf2 \
+       _m16fltsisf _m16fix_truncsfsi _m16fltunsisf \
+       _m16adddf3 _m16subdf3 _m16muldf3 _m16divdf3 \
+       _m16extsfdf2 _m16trdfsf2 \
+       _m16eqdf2 _m16nedf2 _m16gtdf2 _m16gedf2 _m16ledf2 _m16ltdf2 \
+       _m16unorddf2 \
+       _m16fltsidf _m16fix_truncdfsi _m16fltunsidf \
+       _m16retsf _m16retdf \
+       _m16retsc _m16retdc \
+       _m16stub1 _m16stub2 _m16stub5 _m16stub6 _m16stub9 _m16stub10 \
+       _m16stubsf0 _m16stubsf1 _m16stubsf2 _m16stubsf5 _m16stubsf6 \
+       _m16stubsf9 _m16stubsf10 \
+       _m16stubdf0 _m16stubdf1 _m16stubdf2 _m16stubdf5 _m16stubdf6 \
+       _m16stubdf9 _m16stubdf10 \
+       _m16stubsc0 _m16stubsc1 _m16stubsc2 _m16stubsc5 _m16stubsc6 \
+       _m16stubsc9 _m16stubsc10 \
+       _m16stubdc0 _m16stubdc1 _m16stubdc2 _m16stubdc5 _m16stubdc6 \
+       _m16stubdc9 _m16stubdc10
+
  SYNC = yes
  SYNC_CFLAGS = -mno-mips16
  
diff --git a/libgcc/config/pa/milli64.S b/libgcc/config/pa/milli64.S

new file mode 100644 (file)

index 0000000..2e9c4f7
--- /dev/null
+++ b/libgcc/config/pa/milli64.S
@@ -0,0 +1,2134 @@
+/* 32 and 64-bit millicode, original author Hewlett-Packard
+   adapted for gcc by Paul Bame <bame@debian.org>
+   and Alan Modra <alan@linuxcare.com.au>.
+
+   Copyright 2001, 2002, 2003, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef pa64
+        .level  2.0w
+#endif
+
+/* Hardware General Registers.  */
+r0:    .reg    %r0
+r1:    .reg    %r1
+r2:    .reg    %r2
+r3:    .reg    %r3
+r4:    .reg    %r4
+r5:    .reg    %r5
+r6:    .reg    %r6
+r7:    .reg    %r7
+r8:    .reg    %r8
+r9:    .reg    %r9
+r10:   .reg    %r10
+r11:   .reg    %r11
+r12:   .reg    %r12
+r13:   .reg    %r13
+r14:   .reg    %r14
+r15:   .reg    %r15
+r16:   .reg    %r16
+r17:   .reg    %r17
+r18:   .reg    %r18
+r19:   .reg    %r19
+r20:   .reg    %r20
+r21:   .reg    %r21
+r22:   .reg    %r22
+r23:   .reg    %r23
+r24:   .reg    %r24
+r25:   .reg    %r25
+r26:   .reg    %r26
+r27:   .reg    %r27
+r28:   .reg    %r28
+r29:   .reg    %r29
+r30:   .reg    %r30
+r31:   .reg    %r31
+
+/* Hardware Space Registers.  */
+sr0:   .reg    %sr0
+sr1:   .reg    %sr1
+sr2:   .reg    %sr2
+sr3:   .reg    %sr3
+sr4:   .reg    %sr4
+sr5:   .reg    %sr5
+sr6:   .reg    %sr6
+sr7:   .reg    %sr7
+
+/* Hardware Floating Point Registers.  */
+fr0:   .reg    %fr0
+fr1:   .reg    %fr1
+fr2:   .reg    %fr2
+fr3:   .reg    %fr3
+fr4:   .reg    %fr4
+fr5:   .reg    %fr5
+fr6:   .reg    %fr6
+fr7:   .reg    %fr7
+fr8:   .reg    %fr8
+fr9:   .reg    %fr9
+fr10:  .reg    %fr10
+fr11:  .reg    %fr11
+fr12:  .reg    %fr12
+fr13:  .reg    %fr13
+fr14:  .reg    %fr14
+fr15:  .reg    %fr15
+
+/* Hardware Control Registers.  */
+cr11:  .reg    %cr11
+sar:   .reg    %cr11   /* Shift Amount Register */
+
+/* Software Architecture General Registers.  */
+rp:    .reg    r2      /* return pointer */
+#ifdef pa64
+mrp:   .reg    r2      /* millicode return pointer */
+#else
+mrp:   .reg    r31     /* millicode return pointer */
+#endif
+ret0:  .reg    r28     /* return value */
+ret1:  .reg    r29     /* return value (high part of double) */
+sp:    .reg    r30     /* stack pointer */
+dp:    .reg    r27     /* data pointer */
+arg0:  .reg    r26     /* argument */
+arg1:  .reg    r25     /* argument or high part of double argument */
+arg2:  .reg    r24     /* argument */
+arg3:  .reg    r23     /* argument or high part of double argument */
+
+/* Software Architecture Space Registers.  */
+/*             sr0     ; return link from BLE */
+sret:  .reg    sr1     /* return value */
+sarg:  .reg    sr1     /* argument */
+/*             sr4     ; PC SPACE tracker */
+/*             sr5     ; process private data */
+
+/* Frame Offsets (millicode convention!)  Used when calling other
+   millicode routines.  Stack unwinding is dependent upon these
+   definitions.  */
+r31_slot:      .equ    -20     /* "current RP" slot */
+sr0_slot:      .equ    -16     /* "static link" slot */
+#if defined(pa64)
+mrp_slot:       .equ    -16    /* "current RP" slot */
+psp_slot:       .equ    -8     /* "previous SP" slot */
+#else
+mrp_slot:      .equ    -20     /* "current RP" slot (replacing "r31_slot") */
+#endif
+
+
+#define DEFINE(name,value)name:        .EQU    value
+#define RDEFINE(name,value)name:       .REG    value
+#ifdef milliext
+#define MILLI_BE(lbl)   BE    lbl(sr7,r0)
+#define MILLI_BEN(lbl)  BE,n  lbl(sr7,r0)
+#define MILLI_BLE(lbl) BLE   lbl(sr7,r0)
+#define MILLI_BLEN(lbl)        BLE,n lbl(sr7,r0)
+#define MILLIRETN      BE,n  0(sr0,mrp)
+#define MILLIRET       BE    0(sr0,mrp)
+#define MILLI_RETN     BE,n  0(sr0,mrp)
+#define MILLI_RET      BE    0(sr0,mrp)
+#else
+#define MILLI_BE(lbl)  B     lbl
+#define MILLI_BEN(lbl)  B,n   lbl
+#define MILLI_BLE(lbl) BL    lbl,mrp
+#define MILLI_BLEN(lbl)        BL,n  lbl,mrp
+#define MILLIRETN      BV,n  0(mrp)
+#define MILLIRET       BV    0(mrp)
+#define MILLI_RETN     BV,n  0(mrp)
+#define MILLI_RET      BV    0(mrp)
+#endif
+
+#ifdef __STDC__
+#define CAT(a,b)       a##b
+#else
+#define CAT(a,b)       a/**/b
+#endif
+
+#ifdef ELF
+#define SUBSPA_MILLI    .section .text
+#define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16
+#define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16
+#define ATTR_MILLI
+#define SUBSPA_DATA     .section .data
+#define ATTR_DATA
+#define GLOBAL          $global$
+#define GSYM(sym)       !sym:
+#define LSYM(sym)       !CAT(.L,sym:)
+#define LREF(sym)       CAT(.L,sym)
+
+#else
+
+#ifdef coff
+/* This used to be .milli but since link32 places different named
+   sections in different segments millicode ends up a long ways away
+   from .text (1meg?).  This way they will be a lot closer.
+
+   The SUBSPA_MILLI_* specify locality sets for certain millicode
+   modules in order to ensure that modules that call one another are
+   placed close together. Without locality sets this is unlikely to
+   happen because of the Dynamite linker library search algorithm. We
+   want these modules close together so that short calls always reach
+   (we don't want to require long calls or use long call stubs).  */
+
+#define SUBSPA_MILLI    .subspa .text
+#define SUBSPA_MILLI_DIV .subspa .text$dv,align=16
+#define SUBSPA_MILLI_MUL .subspa .text$mu,align=16
+#define ATTR_MILLI      .attr code,read,execute
+#define SUBSPA_DATA     .subspa .data
+#define ATTR_DATA       .attr init_data,read,write
+#define GLOBAL          _gp
+#else
+#define SUBSPA_MILLI    .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8
+#define SUBSPA_MILLI_DIV SUBSPA_MILLI
+#define SUBSPA_MILLI_MUL SUBSPA_MILLI
+#define ATTR_MILLI
+#define SUBSPA_DATA     .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero
+#define ATTR_DATA
+#define GLOBAL          $global$
+#endif
+#define SPACE_DATA      .space $PRIVATE$,spnum=1,sort=16
+
+#define GSYM(sym)       !sym
+#define LSYM(sym)       !CAT(L$,sym)
+#define LREF(sym)       CAT(L$,sym)
+#endif
+
+#ifdef L_dyncall
+       SUBSPA_MILLI
+       ATTR_DATA
+GSYM($$dyncall)
+       .export $$dyncall,millicode
+       .proc
+       .callinfo       millicode
+       .entry
+       bb,>=,n %r22,30,LREF(1)         ; branch if not plabel address
+       depi    0,31,2,%r22             ; clear the two least significant bits
+       ldw     4(%r22),%r19            ; load new LTP value
+       ldw     0(%r22),%r22            ; load address of target
+LSYM(1)
+#ifdef LINUX
+       bv      %r0(%r22)               ; branch to the real target
+#else
+       ldsid   (%sr0,%r22),%r1         ; get the "space ident" selected by r22
+       mtsp    %r1,%sr0                ; move that space identifier into sr0
+       be      0(%sr0,%r22)            ; branch to the real target
+#endif
+       stw     %r2,-24(%r30)           ; save return address into frame marker
+       .exit
+       .procend
+#endif
+
+#ifdef L_divI
+/* ROUTINES:   $$divI, $$divoI
+
+   Single precision divide for signed binary integers.
+
+   The quotient is truncated towards zero.
+   The sign of the quotient is the XOR of the signs of the dividend and
+   divisor.
+   Divide by zero is trapped.
+   Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI.
+
+   INPUT REGISTERS:
+   .   arg0 == dividend
+   .   arg1 == divisor
+   .   mrp  == return pc
+   .   sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .   arg0 =  undefined
+   .   arg1 =  undefined
+   .   ret1 =  quotient
+
+   OTHER REGISTERS AFFECTED:
+   .   r1   =  undefined
+
+   SIDE EFFECTS:
+   .   Causes a trap under the following conditions:
+   .           divisor is zero  (traps with ADDIT,=  0,25,0)
+   .           dividend==-2**31  and divisor==-1 and routine is $$divoI
+   .                            (traps with ADDO  26,25,0)
+   .   Changes memory at the following places:
+   .           NONE
+
+   PERMISSIBLE CONTEXT:
+   .   Unwindable.
+   .   Suitable for internal or external millicode.
+   .   Assumes the special millicode register conventions.
+
+   DISCUSSION:
+   .   Branchs to other millicode routines using BE
+   .           $$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15
+   .
+   .   For selected divisors, calls a divide by constant routine written by
+   .   Karl Pettis.  Eligible divisors are 1..15 excluding 11 and 13.
+   .
+   .   The only overflow case is -2**31 divided by -1.
+   .   Both routines return -2**31 but only $$divoI traps.  */
+
+RDEFINE(temp,r1)
+RDEFINE(retreg,ret1)   /*  r29 */
+RDEFINE(temp1,arg0)
+       SUBSPA_MILLI_DIV
+       ATTR_MILLI
+       .import $$divI_2,millicode
+       .import $$divI_3,millicode
+       .import $$divI_4,millicode
+       .import $$divI_5,millicode
+       .import $$divI_6,millicode
+       .import $$divI_7,millicode
+       .import $$divI_8,millicode
+       .import $$divI_9,millicode
+       .import $$divI_10,millicode
+       .import $$divI_12,millicode
+       .import $$divI_14,millicode
+       .import $$divI_15,millicode
+       .export $$divI,millicode
+       .export $$divoI,millicode
+       .proc
+       .callinfo       millicode
+       .entry
+GSYM($$divoI)
+       comib,=,n  -1,arg1,LREF(negative1)      /*  when divisor == -1 */
+GSYM($$divI)
+       ldo     -1(arg1),temp           /*  is there at most one bit set ? */
+       and,<>  arg1,temp,r0            /*  if not, don't use power of 2 divide */
+       addi,>  0,arg1,r0               /*  if divisor > 0, use power of 2 divide */
+       b,n     LREF(neg_denom)
+LSYM(pow2)
+       addi,>= 0,arg0,retreg           /*  if numerator is negative, add the */
+       add     arg0,temp,retreg        /*  (denominaotr -1) to correct for shifts */
+       extru,= arg1,15,16,temp         /*  test denominator with 0xffff0000 */
+       extrs   retreg,15,16,retreg     /*  retreg = retreg >> 16 */
+       or      arg1,temp,arg1          /*  arg1 = arg1 | (arg1 >> 16) */
+       ldi     0xcc,temp1              /*  setup 0xcc in temp1 */
+       extru,= arg1,23,8,temp          /*  test denominator with 0xff00 */
+       extrs   retreg,23,24,retreg     /*  retreg = retreg >> 8 */
+       or      arg1,temp,arg1          /*  arg1 = arg1 | (arg1 >> 8) */
+       ldi     0xaa,temp               /*  setup 0xaa in temp */
+       extru,= arg1,27,4,r0            /*  test denominator with 0xf0 */
+       extrs   retreg,27,28,retreg     /*  retreg = retreg >> 4 */
+       and,=   arg1,temp1,r0           /*  test denominator with 0xcc */
+       extrs   retreg,29,30,retreg     /*  retreg = retreg >> 2 */
+       and,=   arg1,temp,r0            /*  test denominator with 0xaa */
+       extrs   retreg,30,31,retreg     /*  retreg = retreg >> 1 */
+       MILLIRETN
+LSYM(neg_denom)
+       addi,<  0,arg1,r0               /*  if arg1 >= 0, it's not power of 2 */
+       b,n     LREF(regular_seq)
+       sub     r0,arg1,temp            /*  make denominator positive */
+       comb,=,n  arg1,temp,LREF(regular_seq)   /*  test against 0x80000000 and 0 */
+       ldo     -1(temp),retreg         /*  is there at most one bit set ? */
+       and,=   temp,retreg,r0          /*  if so, the denominator is power of 2 */
+       b,n     LREF(regular_seq)
+       sub     r0,arg0,retreg          /*  negate numerator */
+       comb,=,n arg0,retreg,LREF(regular_seq) /*  test against 0x80000000 */
+       copy    retreg,arg0             /*  set up arg0, arg1 and temp  */
+       copy    temp,arg1               /*  before branching to pow2 */
+       b       LREF(pow2)
+       ldo     -1(arg1),temp
+LSYM(regular_seq)
+       comib,>>=,n 15,arg1,LREF(small_divisor)
+       add,>=  0,arg0,retreg           /*  move dividend, if retreg < 0, */
+LSYM(normal)
+       subi    0,retreg,retreg         /*    make it positive */
+       sub     0,arg1,temp             /*  clear carry,  */
+                                       /*    negate the divisor */
+       ds      0,temp,0                /*  set V-bit to the comple- */
+                                       /*    ment of the divisor sign */
+       add     retreg,retreg,retreg    /*  shift msb bit into carry */
+       ds      r0,arg1,temp            /*  1st divide step, if no carry */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  2nd divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  3rd divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  4th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  5th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  6th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  7th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  8th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  9th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  10th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  11th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  12th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  13th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  14th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  15th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  16th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  17th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  18th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  19th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  20th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  21st divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  22nd divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  23rd divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  24th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  25th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  26th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  27th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  28th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  29th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  30th divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  31st divide step */
+       addc    retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds      temp,arg1,temp          /*  32nd divide step, */
+       addc    retreg,retreg,retreg    /*  shift last retreg bit into retreg */
+       xor,>=  arg0,arg1,0             /*  get correct sign of quotient */
+         sub   0,retreg,retreg         /*    based on operand signs */
+       MILLIRETN
+       nop
+
+LSYM(small_divisor)
+
+#if defined(pa64)
+/*  Clear the upper 32 bits of the arg1 register.  We are working with */
+/*  small divisors (and 32-bit integers)   We must not be mislead  */
+/*  by "1" bits left in the upper 32 bits.  */
+       depd %r0,31,32,%r25
+#endif
+       blr,n   arg1,r0
+       nop
+/*  table for divisor == 0,1, ... ,15 */
+       addit,= 0,arg1,r0       /*  trap if divisor == 0 */
+       nop
+       MILLIRET                /*  divisor == 1 */
+       copy    arg0,retreg
+       MILLI_BEN($$divI_2)     /*  divisor == 2 */
+       nop
+       MILLI_BEN($$divI_3)     /*  divisor == 3 */
+       nop
+       MILLI_BEN($$divI_4)     /*  divisor == 4 */
+       nop
+       MILLI_BEN($$divI_5)     /*  divisor == 5 */
+       nop
+       MILLI_BEN($$divI_6)     /*  divisor == 6 */
+       nop
+       MILLI_BEN($$divI_7)     /*  divisor == 7 */
+       nop
+       MILLI_BEN($$divI_8)     /*  divisor == 8 */
+       nop
+       MILLI_BEN($$divI_9)     /*  divisor == 9 */
+       nop
+       MILLI_BEN($$divI_10)    /*  divisor == 10 */
+       nop
+       b       LREF(normal)            /*  divisor == 11 */
+       add,>=  0,arg0,retreg
+       MILLI_BEN($$divI_12)    /*  divisor == 12 */
+       nop
+       b       LREF(normal)            /*  divisor == 13 */
+       add,>=  0,arg0,retreg
+       MILLI_BEN($$divI_14)    /*  divisor == 14 */
+       nop
+       MILLI_BEN($$divI_15)    /*  divisor == 15 */
+       nop
+
+LSYM(negative1)
+       sub     0,arg0,retreg   /*  result is negation of dividend */
+       MILLIRET
+       addo    arg0,arg1,r0    /*  trap iff dividend==0x80000000 && divisor==-1 */
+       .exit
+       .procend
+       .end
+#endif
+
+#ifdef L_divU
+/* ROUTINE:    $$divU
+   .
+   .   Single precision divide for unsigned integers.
+   .
+   .   Quotient is truncated towards zero.
+   .   Traps on divide by zero.
+
+   INPUT REGISTERS:
+   .   arg0 == dividend
+   .   arg1 == divisor
+   .   mrp  == return pc
+   .   sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .   arg0 =  undefined
+   .   arg1 =  undefined
+   .   ret1 =  quotient
+
+   OTHER REGISTERS AFFECTED:
+   .   r1   =  undefined
+
+   SIDE EFFECTS:
+   .   Causes a trap under the following conditions:
+   .           divisor is zero
+   .   Changes memory at the following places:
+   .           NONE
+
+   PERMISSIBLE CONTEXT:
+   .   Unwindable.
+   .   Does not create a stack frame.
+   .   Suitable for internal or external millicode.
+   .   Assumes the special millicode register conventions.
+
+   DISCUSSION:
+   .   Branchs to other millicode routines using BE:
+   .           $$divU_# for 3,5,6,7,9,10,12,14,15
+   .
+   .   For selected small divisors calls the special divide by constant
+   .   routines written by Karl Pettis.  These are: 3,5,6,7,9,10,12,14,15.  */
+
+RDEFINE(temp,r1)
+RDEFINE(retreg,ret1)   /* r29 */
+RDEFINE(temp1,arg0)
+       SUBSPA_MILLI_DIV
+       ATTR_MILLI
+       .export $$divU,millicode
+       .import $$divU_3,millicode
+       .import $$divU_5,millicode
+       .import $$divU_6,millicode
+       .import $$divU_7,millicode
+       .import $$divU_9,millicode
+       .import $$divU_10,millicode
+       .import $$divU_12,millicode
+       .import $$divU_14,millicode
+       .import $$divU_15,millicode
+       .proc
+       .callinfo       millicode
+       .entry
+GSYM($$divU)
+/* The subtract is not nullified since it does no harm and can be used
+   by the two cases that branch back to "normal".  */
+       ldo     -1(arg1),temp           /* is there at most one bit set ? */
+       and,=   arg1,temp,r0            /* if so, denominator is power of 2 */
+       b       LREF(regular_seq)
+       addit,= 0,arg1,0                /* trap for zero dvr */
+       copy    arg0,retreg
+       extru,= arg1,15,16,temp         /* test denominator with 0xffff0000 */
+       extru   retreg,15,16,retreg     /* retreg = retreg >> 16 */
+       or      arg1,temp,arg1          /* arg1 = arg1 | (arg1 >> 16) */
+       ldi     0xcc,temp1              /* setup 0xcc in temp1 */
+       extru,= arg1,23,8,temp          /* test denominator with 0xff00 */
+       extru   retreg,23,24,retreg     /* retreg = retreg >> 8 */
+       or      arg1,temp,arg1          /* arg1 = arg1 | (arg1 >> 8) */
+       ldi     0xaa,temp               /* setup 0xaa in temp */
+       extru,= arg1,27,4,r0            /* test denominator with 0xf0 */
+       extru   retreg,27,28,retreg     /* retreg = retreg >> 4 */
+       and,=   arg1,temp1,r0           /* test denominator with 0xcc */
+       extru   retreg,29,30,retreg     /* retreg = retreg >> 2 */
+       and,=   arg1,temp,r0            /* test denominator with 0xaa */
+       extru   retreg,30,31,retreg     /* retreg = retreg >> 1 */
+       MILLIRETN
+       nop     
+LSYM(regular_seq)
+       comib,>=  15,arg1,LREF(special_divisor)
+       subi    0,arg1,temp             /* clear carry, negate the divisor */
+       ds      r0,temp,r0              /* set V-bit to 1 */
+LSYM(normal)
+       add     arg0,arg0,retreg        /* shift msb bit into carry */
+       ds      r0,arg1,temp            /* 1st divide step, if no carry */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 2nd divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 3rd divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 4th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 5th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 6th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 7th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 8th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 9th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 10th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 11th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 12th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 13th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 14th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 15th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 16th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 17th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 18th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 19th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 20th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 21st divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 22nd divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 23rd divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 24th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 25th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 26th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 27th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 28th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 29th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 30th divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 31st divide step */
+       addc    retreg,retreg,retreg    /* shift retreg with/into carry */
+       ds      temp,arg1,temp          /* 32nd divide step, */
+       MILLIRET
+       addc    retreg,retreg,retreg    /* shift last retreg bit into retreg */
+
+/* Handle the cases where divisor is a small constant or has high bit on.  */
+LSYM(special_divisor)
+/*     blr     arg1,r0 */
+/*     comib,>,n  0,arg1,LREF(big_divisor) ; nullify previous instruction */
+
+/* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from
+   generating such a blr, comib sequence. A problem in nullification. So I
+   rewrote this code.  */
+
+#if defined(pa64)
+/* Clear the upper 32 bits of the arg1 register.  We are working with
+   small divisors (and 32-bit unsigned integers)   We must not be mislead
+   by "1" bits left in the upper 32 bits.  */
+       depd %r0,31,32,%r25
+#endif
+       comib,> 0,arg1,LREF(big_divisor)
+       nop
+       blr     arg1,r0
+       nop
+
+LSYM(zero_divisor)     /* this label is here to provide external visibility */
+       addit,= 0,arg1,0                /* trap for zero dvr */
+       nop
+       MILLIRET                        /* divisor == 1 */
+       copy    arg0,retreg
+       MILLIRET                        /* divisor == 2 */
+       extru   arg0,30,31,retreg
+       MILLI_BEN($$divU_3)             /* divisor == 3 */
+       nop
+       MILLIRET                        /* divisor == 4 */
+       extru   arg0,29,30,retreg
+       MILLI_BEN($$divU_5)             /* divisor == 5 */
+       nop
+       MILLI_BEN($$divU_6)             /* divisor == 6 */
+       nop
+       MILLI_BEN($$divU_7)             /* divisor == 7 */
+       nop
+       MILLIRET                        /* divisor == 8 */
+       extru   arg0,28,29,retreg
+       MILLI_BEN($$divU_9)             /* divisor == 9 */
+       nop
+       MILLI_BEN($$divU_10)            /* divisor == 10 */
+       nop
+       b       LREF(normal)            /* divisor == 11 */
+       ds      r0,temp,r0              /* set V-bit to 1 */
+       MILLI_BEN($$divU_12)            /* divisor == 12 */
+       nop
+       b       LREF(normal)            /* divisor == 13 */
+       ds      r0,temp,r0              /* set V-bit to 1 */
+       MILLI_BEN($$divU_14)            /* divisor == 14 */
+       nop
+       MILLI_BEN($$divU_15)            /* divisor == 15 */
+       nop
+
+/* Handle the case where the high bit is on in the divisor.
+   Compute:    if( dividend>=divisor) quotient=1; else quotient=0;
+   Note:       dividend>==divisor iff dividend-divisor does not borrow
+   and         not borrow iff carry.  */
+LSYM(big_divisor)
+       sub     arg0,arg1,r0
+       MILLIRET
+       addc    r0,r0,retreg
+       .exit
+       .procend
+       .end
+#endif
+
+#ifdef L_remI
+/* ROUTINE:    $$remI
+
+   DESCRIPTION:
+   .   $$remI returns the remainder of the division of two signed 32-bit
+   .   integers.  The sign of the remainder is the same as the sign of
+   .   the dividend.
+
+
+   INPUT REGISTERS:
+   .   arg0 == dividend
+   .   arg1 == divisor
+   .   mrp  == return pc
+   .   sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .   arg0 = destroyed
+   .   arg1 = destroyed
+   .   ret1 = remainder
+
+   OTHER REGISTERS AFFECTED:
+   .   r1   = undefined
+
+   SIDE EFFECTS:
+   .   Causes a trap under the following conditions:  DIVIDE BY ZERO
+   .   Changes memory at the following places:  NONE
+
+   PERMISSIBLE CONTEXT:
+   .   Unwindable
+   .   Does not create a stack frame
+   .   Is usable for internal or external microcode
+
+   DISCUSSION:
+   .   Calls other millicode routines via mrp:  NONE
+   .   Calls other millicode routines:  NONE  */
+
+RDEFINE(tmp,r1)
+RDEFINE(retreg,ret1)
+
+       SUBSPA_MILLI
+       ATTR_MILLI
+       .proc
+       .callinfo millicode
+       .entry
+GSYM($$remI)
+GSYM($$remoI)
+       .export $$remI,MILLICODE
+       .export $$remoI,MILLICODE
+       ldo             -1(arg1),tmp            /*  is there at most one bit set ? */
+       and,<>          arg1,tmp,r0             /*  if not, don't use power of 2 */
+       addi,>          0,arg1,r0               /*  if denominator > 0, use power */
+                                               /*  of 2 */
+       b,n             LREF(neg_denom)
+LSYM(pow2)
+       comb,>,n        0,arg0,LREF(neg_num)    /*  is numerator < 0 ? */
+       and             arg0,tmp,retreg         /*  get the result */
+       MILLIRETN
+LSYM(neg_num)
+       subi            0,arg0,arg0             /*  negate numerator */
+       and             arg0,tmp,retreg         /*  get the result */
+       subi            0,retreg,retreg         /*  negate result */
+       MILLIRETN
+LSYM(neg_denom)
+       addi,<          0,arg1,r0               /*  if arg1 >= 0, it's not power */
+                                               /*  of 2 */
+       b,n             LREF(regular_seq)
+       sub             r0,arg1,tmp             /*  make denominator positive */
+       comb,=,n        arg1,tmp,LREF(regular_seq) /*  test against 0x80000000 and 0 */
+       ldo             -1(tmp),retreg          /*  is there at most one bit set ? */
+       and,=           tmp,retreg,r0           /*  if not, go to regular_seq */
+       b,n             LREF(regular_seq)
+       comb,>,n        0,arg0,LREF(neg_num_2)  /*  if arg0 < 0, negate it  */
+       and             arg0,retreg,retreg
+       MILLIRETN
+LSYM(neg_num_2)
+       subi            0,arg0,tmp              /*  test against 0x80000000 */
+       and             tmp,retreg,retreg
+       subi            0,retreg,retreg
+       MILLIRETN
+LSYM(regular_seq)
+       addit,=         0,arg1,0                /*  trap if div by zero */
+       add,>=          0,arg0,retreg           /*  move dividend, if retreg < 0, */
+       sub             0,retreg,retreg         /*    make it positive */
+       sub             0,arg1, tmp             /*  clear carry,  */
+                                               /*    negate the divisor */
+       ds              0, tmp,0                /*  set V-bit to the comple- */
+                                               /*    ment of the divisor sign */
+       or              0,0, tmp                /*  clear  tmp */
+       add             retreg,retreg,retreg    /*  shift msb bit into carry */
+       ds               tmp,arg1, tmp          /*  1st divide step, if no carry */
+                                               /*    out, msb of quotient = 0 */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+LSYM(t1)
+       ds               tmp,arg1, tmp          /*  2nd divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  3rd divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  4th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  5th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  6th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  7th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  8th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  9th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  10th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  11th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  12th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  13th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  14th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  15th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  16th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  17th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  18th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  19th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  20th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  21st divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  22nd divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  23rd divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  24th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  25th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  26th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  27th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  28th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  29th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  30th divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  31st divide step */
+       addc            retreg,retreg,retreg    /*  shift retreg with/into carry */
+       ds               tmp,arg1, tmp          /*  32nd divide step, */
+       addc            retreg,retreg,retreg    /*  shift last bit into retreg */
+       movb,>=,n        tmp,retreg,LREF(finish) /*  branch if pos.  tmp */
+       add,<           arg1,0,0                /*  if arg1 > 0, add arg1 */
+       add,tr           tmp,arg1,retreg        /*    for correcting remainder tmp */
+       sub              tmp,arg1,retreg        /*  else add absolute value arg1 */
+LSYM(finish)
+       add,>=          arg0,0,0                /*  set sign of remainder */
+       sub             0,retreg,retreg         /*    to sign of dividend */
+       MILLIRET
+       nop
+       .exit
+       .procend
+#ifdef milliext
+       .origin 0x00000200
+#endif
+       .end
+#endif
+
+#ifdef L_remU
+/* ROUTINE:    $$remU
+   .   Single precision divide for remainder with unsigned binary integers.
+   .
+   .   The remainder must be dividend-(dividend/divisor)*divisor.
+   .   Divide by zero is trapped.
+
+   INPUT REGISTERS:
+   .   arg0 == dividend
+   .   arg1 == divisor
+   .   mrp  == return pc
+   .   sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .   arg0 =  undefined
+   .   arg1 =  undefined
+   .   ret1 =  remainder
+
+   OTHER REGISTERS AFFECTED:
+   .   r1   =  undefined
+
+   SIDE EFFECTS:
+   .   Causes a trap under the following conditions:  DIVIDE BY ZERO
+   .   Changes memory at the following places:  NONE
+
+   PERMISSIBLE CONTEXT:
+   .   Unwindable.
+   .   Does not create a stack frame.
+   .   Suitable for internal or external millicode.
+   .   Assumes the special millicode register conventions.
+
+   DISCUSSION:
+   .   Calls other millicode routines using mrp: NONE
+   .   Calls other millicode routines: NONE  */
+
+
+RDEFINE(temp,r1)
+RDEFINE(rmndr,ret1)    /*  r29 */
+       SUBSPA_MILLI
+       ATTR_MILLI
+       .export $$remU,millicode
+       .proc
+       .callinfo       millicode
+       .entry
+GSYM($$remU)
+       ldo     -1(arg1),temp           /*  is there at most one bit set ? */
+       and,=   arg1,temp,r0            /*  if not, don't use power of 2 */
+       b       LREF(regular_seq)
+       addit,= 0,arg1,r0               /*  trap on div by zero */
+       and     arg0,temp,rmndr         /*  get the result for power of 2 */
+       MILLIRETN
+LSYM(regular_seq)
+       comib,>=,n  0,arg1,LREF(special_case)
+       subi    0,arg1,rmndr            /*  clear carry, negate the divisor */
+       ds      r0,rmndr,r0             /*  set V-bit to 1 */
+       add     arg0,arg0,temp          /*  shift msb bit into carry */
+       ds      r0,arg1,rmndr           /*  1st divide step, if no carry */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  2nd divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  3rd divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  4th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  5th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  6th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  7th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  8th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  9th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  10th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  11th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  12th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  13th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  14th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  15th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  16th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  17th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  18th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  19th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  20th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  21st divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  22nd divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  23rd divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  24th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  25th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  26th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  27th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  28th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  29th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  30th divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  31st divide step */
+       addc    temp,temp,temp          /*  shift temp with/into carry */
+       ds      rmndr,arg1,rmndr                /*  32nd divide step, */
+       comiclr,<= 0,rmndr,r0
+         add   rmndr,arg1,rmndr        /*  correction */
+       MILLIRETN
+       nop
+
+/* Putting >= on the last DS and deleting COMICLR does not work!  */
+LSYM(special_case)
+       sub,>>= arg0,arg1,rmndr
+         copy  arg0,rmndr
+       MILLIRETN
+       nop
+       .exit
+       .procend
+       .end
+#endif
+
+#ifdef L_div_const
+/* ROUTINE:    $$divI_2
+   .           $$divI_3        $$divU_3
+   .           $$divI_4
+   .           $$divI_5        $$divU_5
+   .           $$divI_6        $$divU_6
+   .           $$divI_7        $$divU_7
+   .           $$divI_8
+   .           $$divI_9        $$divU_9
+   .           $$divI_10       $$divU_10
+   .
+   .           $$divI_12       $$divU_12
+   .
+   .           $$divI_14       $$divU_14
+   .           $$divI_15       $$divU_15
+   .           $$divI_16
+   .           $$divI_17       $$divU_17
+   .
+   .   Divide by selected constants for single precision binary integers.
+
+   INPUT REGISTERS:
+   .   arg0 == dividend
+   .   mrp  == return pc
+   .   sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .   arg0 =  undefined
+   .   arg1 =  undefined
+   .   ret1 =  quotient
+
+   OTHER REGISTERS AFFECTED:
+   .   r1   =  undefined
+
+   SIDE EFFECTS:
+   .   Causes a trap under the following conditions: NONE
+   .   Changes memory at the following places:  NONE
+
+   PERMISSIBLE CONTEXT:
+   .   Unwindable.
+   .   Does not create a stack frame.
+   .   Suitable for internal or external millicode.
+   .   Assumes the special millicode register conventions.
+
+   DISCUSSION:
+   .   Calls other millicode routines using mrp:  NONE
+   .   Calls other millicode routines:  NONE  */
+
+
+/* TRUNCATED DIVISION BY SMALL INTEGERS
+
+   We are interested in q(x) = floor(x/y), where x >= 0 and y > 0
+   (with y fixed).
+
+   Let a = floor(z/y), for some choice of z.  Note that z will be
+   chosen so that division by z is cheap.
+
+   Let r be the remainder(z/y).  In other words, r = z - ay.
+
+   Now, our method is to choose a value for b such that
+
+   q'(x) = floor((ax+b)/z)
+
+   is equal to q(x) over as large a range of x as possible.  If the
+   two are equal over a sufficiently large range, and if it is easy to
+   form the product (ax), and it is easy to divide by z, then we can
+   perform the division much faster than the general division algorithm.
+
+   So, we want the following to be true:
+
+   .   For x in the following range:
+   .
+   .       ky <= x < (k+1)y
+   .
+   .   implies that
+   .
+   .       k <= (ax+b)/z < (k+1)
+
+   We want to determine b such that this is true for all k in the
+   range {0..K} for some maximum K.
+
+   Since (ax+b) is an increasing function of x, we can take each
+   bound separately to determine the "best" value for b.
+
+   (ax+b)/z < (k+1)           implies
+
+   (a((k+1)y-1)+b < (k+1)z     implies
+
+   b < a + (k+1)(z-ay)        implies
+
+   b < a + (k+1)r
+
+   This needs to be true for all k in the range {0..K}.  In
+   particular, it is true for k = 0 and this leads to a maximum
+   acceptable value for b.
+
+   b < a+r   or   b <= a+r-1
+
+   Taking the other bound, we have
+
+   k <= (ax+b)/z              implies
+
+   k <= (aky+b)/z             implies
+
+   k(z-ay) <= b                       implies
+
+   kr <= b
+
+   Clearly, the largest range for k will be achieved by maximizing b,
+   when r is not zero. When r is zero, then the simplest choice for b
+   is 0.  When r is not 0, set
+
+   .   b = a+r-1
+
+   Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)
+   for all x in the range:
+
+   .   0 <= x < (K+1)y
+
+   We need to determine what K is.  Of our two bounds,
+
+   .   b < a+(k+1)r    is satisfied for all k >= 0, by construction.
+
+   The other bound is
+
+   .   kr <= b
+
+   This is always true if r = 0.  If r is not 0 (the usual case), then
+   K = floor((a+r-1)/r), is the maximum value for k.
+
+   Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct
+   answer for q(x) = floor(x/y) when x is in the range
+
+   (0,(K+1)y-1)               K = floor((a+r-1)/r)
+
+   To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that
+   the formula for q'(x) yields the correct value of q(x) for all x
+   representable by a single word in HPPA.
+
+   We are also constrained in that computing the product (ax), adding
+   b, and dividing by z must all be done quickly, otherwise we will be
+   better off going through the general algorithm using the DS
+   instruction, which uses approximately 70 cycles.
+
+   For each y, there is a choice of z which satisfies the constraints
+   for (K+1)y >= 2**32.  We may not, however, be able to satisfy the
+   timing constraints for arbitrary y. It seems that z being equal to
+   a power of 2 or a power of 2 minus 1 is as good as we can do, since
+   it minimizes the time to do division by z.  We want the choice of z
+   to also result in a value for (a) that minimizes the computation of
+   the product (ax).  This is best achieved if (a) has a regular bit
+   pattern (so the multiplication can be done with shifts and adds).
+   The value of (a) also needs to be less than 2**32 so the product is
+   always guaranteed to fit in 2 words.
+
+   In actual practice, the following should be done:
+
+   1) For negative x, you should take the absolute value and remember
+   .  the fact so that the result can be negated.  This obviously does
+   .  not apply in the unsigned case.
+   2) For even y, you should factor out the power of 2 that divides y
+   .  and divide x by it.  You can then proceed by dividing by the
+   .  odd factor of y.
+
+   Here is a table of some odd values of y, and corresponding choices
+   for z which are "good".
+
+    y    z       r      a (hex)     max x (hex)
+
+    3  2**32     1     55555555      100000001
+    5  2**32     1     33333333      100000003
+    7  2**24-1   0       249249     (infinite)
+    9  2**24-1   0       1c71c7     (infinite)
+   11  2**20-1   0        1745d     (infinite)
+   13  2**24-1   0       13b13b     (infinite)
+   15  2**32     1     11111111      10000000d
+   17  2**32     1      f0f0f0f      10000000f
+
+   If r is 1, then b = a+r-1 = a.  This simplifies the computation
+   of (ax+b), since you can compute (x+1)(a) instead.  If r is 0,
+   then b = 0 is ok to use which simplifies (ax+b).
+
+   The bit patterns for 55555555, 33333333, and 11111111 are obviously
+   very regular.  The bit patterns for the other values of a above are:
+
+    y     (hex)          (binary)
+
+    7    249249  001001001001001001001001  << regular >>
+    9    1c71c7  000111000111000111000111  << regular >>
+   11     1745d  000000010111010001011101  << irregular >>
+   13    13b13b  000100111011000100111011  << irregular >>
+
+   The bit patterns for (a) corresponding to (y) of 11 and 13 may be
+   too irregular to warrant using this method.
+
+   When z is a power of 2 minus 1, then the division by z is slightly
+   more complicated, involving an iterative solution.
+
+   The code presented here solves division by 1 through 17, except for
+   11 and 13. There are algorithms for both signed and unsigned
+   quantities given.
+
+   TIMINGS (cycles)
+
+   divisor  positive  negative unsigned
+
+   .   1       2          2         2
+   .   2       4          4         2
+   .   3       19        21        19
+   .   4       4          4         2
+   .   5       18        22        19
+   .   6       19        22        19
+   .   8       4          4         2
+   .  10       18        19        17
+   .  12       18        20        18
+   .  15       16        18        16
+   .  16       4          4         2
+   .  17       16        18        16
+
+   Now, the algorithm for 7, 9, and 14 is an iterative one.  That is,
+   a loop body is executed until the tentative quotient is 0.  The
+   number of times the loop body is executed varies depending on the
+   dividend, but is never more than two times. If the dividend is
+   less than the divisor, then the loop body is not executed at all.
+   Each iteration adds 4 cycles to the timings.
+
+   divisor  positive  negative unsigned
+
+   .   7       19+4n    20+4n     20+4n    n = number of iterations
+   .   9       21+4n    22+4n     21+4n
+   .  14       21+4n    22+4n     20+4n
+
+   To give an idea of how the number of iterations varies, here is a
+   table of dividend versus number of iterations when dividing by 7.
+
+   smallest     largest       required
+   dividend    dividend      iterations
+
+   .   0            6              0
+   .   7        0x6ffffff          1
+   0x1000006   0xffffffff          2
+
+   There is some overlap in the range of numbers requiring 1 and 2
+   iterations. */
+
+RDEFINE(t2,r1)
+RDEFINE(x2,arg0)       /*  r26 */
+RDEFINE(t1,arg1)       /*  r25 */
+RDEFINE(x1,ret1)       /*  r29 */
+
+       SUBSPA_MILLI_DIV
+       ATTR_MILLI
+
+       .proc
+       .callinfo       millicode
+       .entry
+/* NONE of these routines require a stack frame
+   ALL of these routines are unwindable from millicode */
+
+GSYM($$divide_by_constant)
+       .export $$divide_by_constant,millicode
+/*  Provides a "nice" label for the code covered by the unwind descriptor
+    for things like gprof.  */
+
+/* DIVISION BY 2 (shift by 1) */
+GSYM($$divI_2)
+       .export         $$divI_2,millicode
+       comclr,>=       arg0,0,0
+       addi            1,arg0,arg0
+       MILLIRET
+       extrs           arg0,30,31,ret1
+
+
+/* DIVISION BY 4 (shift by 2) */
+GSYM($$divI_4)
+       .export         $$divI_4,millicode
+       comclr,>=       arg0,0,0
+       addi            3,arg0,arg0
+       MILLIRET
+       extrs           arg0,29,30,ret1
+
+
+/* DIVISION BY 8 (shift by 3) */
+GSYM($$divI_8)
+       .export         $$divI_8,millicode
+       comclr,>=       arg0,0,0
+       addi            7,arg0,arg0
+       MILLIRET
+       extrs           arg0,28,29,ret1
+
+/* DIVISION BY 16 (shift by 4) */
+GSYM($$divI_16)
+       .export         $$divI_16,millicode
+       comclr,>=       arg0,0,0
+       addi            15,arg0,arg0
+       MILLIRET
+       extrs           arg0,27,28,ret1
+
+/****************************************************************************
+*
+*      DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these
+*
+*      includes 3,5,15,17 and also 6,10,12
+*
+****************************************************************************/
+
+/* DIVISION BY 3 (use z = 2**32; a = 55555555) */
+
+GSYM($$divI_3)
+       .export         $$divI_3,millicode
+       comb,<,N        x2,0,LREF(neg3)
+
+       addi            1,x2,x2         /* this cannot overflow */
+       extru           x2,1,2,x1       /* multiply by 5 to get started */
+       sh2add          x2,x2,x2
+       b               LREF(pos)
+       addc            x1,0,x1
+
+LSYM(neg3)
+       subi            1,x2,x2         /* this cannot overflow */
+       extru           x2,1,2,x1       /* multiply by 5 to get started */
+       sh2add          x2,x2,x2
+       b               LREF(neg)
+       addc            x1,0,x1
+
+GSYM($$divU_3)
+       .export         $$divU_3,millicode
+       addi            1,x2,x2         /* this CAN overflow */
+       addc            0,0,x1
+       shd             x1,x2,30,t1     /* multiply by 5 to get started */
+       sh2add          x2,x2,x2
+       b               LREF(pos)
+       addc            x1,t1,x1
+
+/* DIVISION BY 5 (use z = 2**32; a = 33333333) */
+
+GSYM($$divI_5)
+       .export         $$divI_5,millicode
+       comb,<,N        x2,0,LREF(neg5)
+
+       addi            3,x2,t1         /* this cannot overflow */
+       sh1add          x2,t1,x2        /* multiply by 3 to get started */
+       b               LREF(pos)
+       addc            0,0,x1
+
+LSYM(neg5)
+       sub             0,x2,x2         /* negate x2                    */
+       addi            1,x2,x2         /* this cannot overflow */
+       shd             0,x2,31,x1      /* get top bit (can be 1)       */
+       sh1add          x2,x2,x2        /* multiply by 3 to get started */
+       b               LREF(neg)
+       addc            x1,0,x1
+
+GSYM($$divU_5)
+       .export         $$divU_5,millicode
+       addi            1,x2,x2         /* this CAN overflow */
+       addc            0,0,x1
+       shd             x1,x2,31,t1     /* multiply by 3 to get started */
+       sh1add          x2,x2,x2
+       b               LREF(pos)
+       addc            t1,x1,x1
+
+/* DIVISION BY 6 (shift to divide by 2 then divide by 3) */
+GSYM($$divI_6)
+       .export         $$divI_6,millicode
+       comb,<,N        x2,0,LREF(neg6)
+       extru           x2,30,31,x2     /* divide by 2                  */
+       addi            5,x2,t1         /* compute 5*(x2+1) = 5*x2+5    */
+       sh2add          x2,t1,x2        /* multiply by 5 to get started */
+       b               LREF(pos)
+       addc            0,0,x1
+
+LSYM(neg6)
+       subi            2,x2,x2         /* negate, divide by 2, and add 1 */
+                                       /* negation and adding 1 are done */
+                                       /* at the same time by the SUBI   */
+       extru           x2,30,31,x2
+       shd             0,x2,30,x1
+       sh2add          x2,x2,x2        /* multiply by 5 to get started */
+       b               LREF(neg)
+       addc            x1,0,x1
+
+GSYM($$divU_6)
+       .export         $$divU_6,millicode
+       extru           x2,30,31,x2     /* divide by 2 */
+       addi            1,x2,x2         /* cannot carry */
+       shd             0,x2,30,x1      /* multiply by 5 to get started */
+       sh2add          x2,x2,x2
+       b               LREF(pos)
+       addc            x1,0,x1
+
+/* DIVISION BY 10 (shift to divide by 2 then divide by 5) */
+GSYM($$divU_10)
+       .export         $$divU_10,millicode
+       extru           x2,30,31,x2     /* divide by 2 */
+       addi            3,x2,t1         /* compute 3*(x2+1) = (3*x2)+3  */
+       sh1add          x2,t1,x2        /* multiply by 3 to get started */
+       addc            0,0,x1
+LSYM(pos)
+       shd             x1,x2,28,t1     /* multiply by 0x11 */
+       shd             x2,0,28,t2
+       add             x2,t2,x2
+       addc            x1,t1,x1
+LSYM(pos_for_17)
+       shd             x1,x2,24,t1     /* multiply by 0x101 */
+       shd             x2,0,24,t2
+       add             x2,t2,x2
+       addc            x1,t1,x1
+
+       shd             x1,x2,16,t1     /* multiply by 0x10001 */
+       shd             x2,0,16,t2
+       add             x2,t2,x2
+       MILLIRET
+       addc            x1,t1,x1
+
+GSYM($$divI_10)
+       .export         $$divI_10,millicode
+       comb,<          x2,0,LREF(neg10)
+       copy            0,x1
+       extru           x2,30,31,x2     /* divide by 2 */
+       addib,TR        1,x2,LREF(pos)  /* add 1 (cannot overflow)     */
+       sh1add          x2,x2,x2        /* multiply by 3 to get started */
+
+LSYM(neg10)
+       subi            2,x2,x2         /* negate, divide by 2, and add 1 */
+                                       /* negation and adding 1 are done */
+                                       /* at the same time by the SUBI   */
+       extru           x2,30,31,x2
+       sh1add          x2,x2,x2        /* multiply by 3 to get started */
+LSYM(neg)
+       shd             x1,x2,28,t1     /* multiply by 0x11 */
+       shd             x2,0,28,t2
+       add             x2,t2,x2
+       addc            x1,t1,x1
+LSYM(neg_for_17)
+       shd             x1,x2,24,t1     /* multiply by 0x101 */
+       shd             x2,0,24,t2
+       add             x2,t2,x2
+       addc            x1,t1,x1
+
+       shd             x1,x2,16,t1     /* multiply by 0x10001 */
+       shd             x2,0,16,t2
+       add             x2,t2,x2
+       addc            x1,t1,x1
+       MILLIRET
+       sub             0,x1,x1
+
+/* DIVISION BY 12 (shift to divide by 4 then divide by 3) */
+GSYM($$divI_12)
+       .export         $$divI_12,millicode
+       comb,<          x2,0,LREF(neg12)
+       copy            0,x1
+       extru           x2,29,30,x2     /* divide by 4                  */
+       addib,tr        1,x2,LREF(pos)  /* compute 5*(x2+1) = 5*x2+5    */
+       sh2add          x2,x2,x2        /* multiply by 5 to get started */
+
+LSYM(neg12)
+       subi            4,x2,x2         /* negate, divide by 4, and add 1 */
+                                       /* negation and adding 1 are done */
+                                       /* at the same time by the SUBI   */
+       extru           x2,29,30,x2
+       b               LREF(neg)
+       sh2add          x2,x2,x2        /* multiply by 5 to get started */
+
+GSYM($$divU_12)
+       .export         $$divU_12,millicode
+       extru           x2,29,30,x2     /* divide by 4   */
+       addi            5,x2,t1         /* cannot carry */
+       sh2add          x2,t1,x2        /* multiply by 5 to get started */
+       b               LREF(pos)
+       addc            0,0,x1
+
+/* DIVISION BY 15 (use z = 2**32; a = 11111111) */
+GSYM($$divI_15)
+       .export         $$divI_15,millicode
+       comb,<          x2,0,LREF(neg15)
+       copy            0,x1
+       addib,tr        1,x2,LREF(pos)+4
+       shd             x1,x2,28,t1
+
+LSYM(neg15)
+       b               LREF(neg)
+       subi            1,x2,x2
+
+GSYM($$divU_15)
+       .export         $$divU_15,millicode
+       addi            1,x2,x2         /* this CAN overflow */
+       b               LREF(pos)
+       addc            0,0,x1
+
+/* DIVISION BY 17 (use z = 2**32; a =  f0f0f0f) */
+GSYM($$divI_17)
+       .export         $$divI_17,millicode
+       comb,<,n        x2,0,LREF(neg17)
+       addi            1,x2,x2         /* this cannot overflow */
+       shd             0,x2,28,t1      /* multiply by 0xf to get started */
+       shd             x2,0,28,t2
+       sub             t2,x2,x2
+       b               LREF(pos_for_17)
+       subb            t1,0,x1
+
+LSYM(neg17)
+       subi            1,x2,x2         /* this cannot overflow */
+       shd             0,x2,28,t1      /* multiply by 0xf to get started */
+       shd             x2,0,28,t2
+       sub             t2,x2,x2
+       b               LREF(neg_for_17)
+       subb            t1,0,x1
+
+GSYM($$divU_17)
+       .export         $$divU_17,millicode
+       addi            1,x2,x2         /* this CAN overflow */
+       addc            0,0,x1
+       shd             x1,x2,28,t1     /* multiply by 0xf to get started */
+LSYM(u17)
+       shd             x2,0,28,t2
+       sub             t2,x2,x2
+       b               LREF(pos_for_17)
+       subb            t1,x1,x1
+
+
+/* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these
+   includes 7,9 and also 14
+
+
+   z = 2**24-1
+   r = z mod x = 0
+
+   so choose b = 0
+
+   Also, in order to divide by z = 2**24-1, we approximate by dividing
+   by (z+1) = 2**24 (which is easy), and then correcting.
+
+   (ax) = (z+1)q' + r
+   .   = zq' + (q'+r)
+
+   So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)
+   Then the true remainder of (ax)/z is (q'+r).  Repeat the process
+   with this new remainder, adding the tentative quotients together,
+   until a tentative quotient is 0 (and then we are done).  There is
+   one last correction to be done.  It is possible that (q'+r) = z.
+   If so, then (q'+r)/(z+1) = 0 and it looks like we are done. But,
+   in fact, we need to add 1 more to the quotient.  Now, it turns
+   out that this happens if and only if the original value x is
+   an exact multiple of y.  So, to avoid a three instruction test at
+   the end, instead use 1 instruction to add 1 to x at the beginning.  */
+
+/* DIVISION BY 7 (use z = 2**24-1; a = 249249) */
+GSYM($$divI_7)
+       .export         $$divI_7,millicode
+       comb,<,n        x2,0,LREF(neg7)
+LSYM(7)
+       addi            1,x2,x2         /* cannot overflow */
+       shd             0,x2,29,x1
+       sh3add          x2,x2,x2
+       addc            x1,0,x1
+LSYM(pos7)
+       shd             x1,x2,26,t1
+       shd             x2,0,26,t2
+       add             x2,t2,x2
+       addc            x1,t1,x1
+
+       shd             x1,x2,20,t1
+       shd             x2,0,20,t2
+       add             x2,t2,x2
+       addc            x1,t1,t1
+
+       /* computed <t1,x2>.  Now divide it by (2**24 - 1)      */
+
+       copy            0,x1
+       shd,=           t1,x2,24,t1     /* tentative quotient  */
+LSYM(1)
+       addb,tr         t1,x1,LREF(2)   /* add to previous quotient   */
+       extru           x2,31,24,x2     /* new remainder (unadjusted) */
+
+       MILLIRETN
+
+LSYM(2)
+       addb,tr         t1,x2,LREF(1)   /* adjust remainder */
+       extru,=         x2,7,8,t1       /* new quotient     */
+
+LSYM(neg7)
+       subi            1,x2,x2         /* negate x2 and add 1 */
+LSYM(8)
+       shd             0,x2,29,x1
+       sh3add          x2,x2,x2
+       addc            x1,0,x1
+
+LSYM(neg7_shift)
+       shd             x1,x2,26,t1
+       shd             x2,0,26,t2
+       add             x2,t2,x2
+       addc            x1,t1,x1
+
+       shd             x1,x2,20,t1
+       shd             x2,0,20,t2
+       add             x2,t2,x2
+       addc            x1,t1,t1
+
+       /* computed <t1,x2>.  Now divide it by (2**24 - 1)      */
+
+       copy            0,x1
+       shd,=           t1,x2,24,t1     /* tentative quotient  */
+LSYM(3)
+       addb,tr         t1,x1,LREF(4)   /* add to previous quotient   */
+       extru           x2,31,24,x2     /* new remainder (unadjusted) */
+
+       MILLIRET
+       sub             0,x1,x1         /* negate result    */
+
+LSYM(4)
+       addb,tr         t1,x2,LREF(3)   /* adjust remainder */
+       extru,=         x2,7,8,t1       /* new quotient     */
+
+GSYM($$divU_7)
+       .export         $$divU_7,millicode
+       addi            1,x2,x2         /* can carry */
+       addc            0,0,x1
+       shd             x1,x2,29,t1
+       sh3add          x2,x2,x2
+       b               LREF(pos7)
+       addc            t1,x1,x1
+
+/* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */
+GSYM($$divI_9)
+       .export         $$divI_9,millicode
+       comb,<,n        x2,0,LREF(neg9)
+       addi            1,x2,x2         /* cannot overflow */
+       shd             0,x2,29,t1
+       shd             x2,0,29,t2
+       sub             t2,x2,x2
+       b               LREF(pos7)
+       subb            t1,0,x1
+
+LSYM(neg9)
+       subi            1,x2,x2         /* negate and add 1 */
+       shd             0,x2,29,t1
+       shd             x2,0,29,t2
+       sub             t2,x2,x2
+       b               LREF(neg7_shift)
+       subb            t1,0,x1
+
+GSYM($$divU_9)
+       .export         $$divU_9,millicode
+       addi            1,x2,x2         /* can carry */
+       addc            0,0,x1
+       shd             x1,x2,29,t1
+       shd             x2,0,29,t2
+       sub             t2,x2,x2
+       b               LREF(pos7)
+       subb            t1,x1,x1
+
+/* DIVISION BY 14 (shift to divide by 2 then divide by 7) */
+GSYM($$divI_14)
+       .export         $$divI_14,millicode
+       comb,<,n        x2,0,LREF(neg14)
+GSYM($$divU_14)
+       .export         $$divU_14,millicode
+       b               LREF(7)         /* go to 7 case */
+       extru           x2,30,31,x2     /* divide by 2  */
+
+LSYM(neg14)
+       subi            2,x2,x2         /* negate (and add 2) */
+       b               LREF(8)
+       extru           x2,30,31,x2     /* divide by 2        */
+       .exit
+       .procend
+       .end
+#endif
+
+#ifdef L_mulI
+/* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */
+/******************************************************************************
+This routine is used on PA2.0 processors when gcc -mno-fpregs is used
+
+ROUTINE:       $$mulI
+
+
+DESCRIPTION:   
+
+       $$mulI multiplies two single word integers, giving a single 
+       word result.  
+
+
+INPUT REGISTERS:
+
+       arg0 = Operand 1
+       arg1 = Operand 2
+       r31  == return pc
+       sr0  == return space when called externally 
+
+
+OUTPUT REGISTERS:
+
+       arg0 = undefined
+       arg1 = undefined
+       ret1 = result 
+
+OTHER REGISTERS AFFECTED:
+
+       r1   = undefined
+
+SIDE EFFECTS:
+
+       Causes a trap under the following conditions:  NONE
+       Changes memory at the following places:  NONE
+
+PERMISSIBLE CONTEXT:
+
+       Unwindable
+       Does not create a stack frame
+       Is usable for internal or external microcode
+
+DISCUSSION:
+
+       Calls other millicode routines via mrp:  NONE
+       Calls other millicode routines:  NONE
+
+***************************************************************************/
+
+
+#define        a0      %arg0
+#define        a1      %arg1
+#define        t0      %r1
+#define        r       %ret1
+
+#define        a0__128a0       zdep    a0,24,25,a0
+#define        a0__256a0       zdep    a0,23,24,a0
+#define        a1_ne_0_b_l0    comb,<> a1,0,LREF(l0)
+#define        a1_ne_0_b_l1    comb,<> a1,0,LREF(l1)
+#define        a1_ne_0_b_l2    comb,<> a1,0,LREF(l2)
+#define        b_n_ret_t0      b,n     LREF(ret_t0)
+#define        b_e_shift       b       LREF(e_shift)
+#define        b_e_t0ma0       b       LREF(e_t0ma0)
+#define        b_e_t0          b       LREF(e_t0)
+#define        b_e_t0a0        b       LREF(e_t0a0)
+#define        b_e_t02a0       b       LREF(e_t02a0)
+#define        b_e_t04a0       b       LREF(e_t04a0)
+#define        b_e_2t0         b       LREF(e_2t0)
+#define        b_e_2t0a0       b       LREF(e_2t0a0)
+#define        b_e_2t04a0      b       LREF(e2t04a0)
+#define        b_e_3t0         b       LREF(e_3t0)
+#define        b_e_4t0         b       LREF(e_4t0)
+#define        b_e_4t0a0       b       LREF(e_4t0a0)
+#define        b_e_4t08a0      b       LREF(e4t08a0)
+#define        b_e_5t0         b       LREF(e_5t0)
+#define        b_e_8t0         b       LREF(e_8t0)
+#define        b_e_8t0a0       b       LREF(e_8t0a0)
+#define        r__r_a0         add     r,a0,r
+#define        r__r_2a0        sh1add  a0,r,r
+#define        r__r_4a0        sh2add  a0,r,r
+#define        r__r_8a0        sh3add  a0,r,r
+#define        r__r_t0         add     r,t0,r
+#define        r__r_2t0        sh1add  t0,r,r
+#define        r__r_4t0        sh2add  t0,r,r
+#define        r__r_8t0        sh3add  t0,r,r
+#define        t0__3a0         sh1add  a0,a0,t0
+#define        t0__4a0         sh2add  a0,0,t0
+#define        t0__5a0         sh2add  a0,a0,t0
+#define        t0__8a0         sh3add  a0,0,t0
+#define        t0__9a0         sh3add  a0,a0,t0
+#define        t0__16a0        zdep    a0,27,28,t0
+#define        t0__32a0        zdep    a0,26,27,t0
+#define        t0__64a0        zdep    a0,25,26,t0
+#define        t0__128a0       zdep    a0,24,25,t0
+#define        t0__t0ma0       sub     t0,a0,t0
+#define        t0__t0_a0       add     t0,a0,t0
+#define        t0__t0_2a0      sh1add  a0,t0,t0
+#define        t0__t0_4a0      sh2add  a0,t0,t0
+#define        t0__t0_8a0      sh3add  a0,t0,t0
+#define        t0__2t0_a0      sh1add  t0,a0,t0
+#define        t0__3t0         sh1add  t0,t0,t0
+#define        t0__4t0         sh2add  t0,0,t0
+#define        t0__4t0_a0      sh2add  t0,a0,t0
+#define        t0__5t0         sh2add  t0,t0,t0
+#define        t0__8t0         sh3add  t0,0,t0
+#define        t0__8t0_a0      sh3add  t0,a0,t0
+#define        t0__9t0         sh3add  t0,t0,t0
+#define        t0__16t0        zdep    t0,27,28,t0
+#define        t0__32t0        zdep    t0,26,27,t0
+#define        t0__256a0       zdep    a0,23,24,t0
+
+
+       SUBSPA_MILLI
+       ATTR_MILLI
+       .align 16
+       .proc
+       .callinfo millicode
+       .export $$mulI,millicode
+GSYM($$mulI)   
+       combt,<<=       a1,a0,LREF(l4)  /* swap args if unsigned a1>a0 */
+       copy            0,r             /* zero out the result */
+       xor             a0,a1,a0        /* swap a0 & a1 using the */
+       xor             a0,a1,a1        /*  old xor trick */
+       xor             a0,a1,a0
+LSYM(l4)
+       combt,<=        0,a0,LREF(l3)           /* if a0>=0 then proceed like unsigned */
+       zdep            a1,30,8,t0      /* t0 = (a1&0xff)<<1 ********* */
+       sub,>           0,a1,t0         /* otherwise negate both and */
+       combt,<=,n      a0,t0,LREF(l2)  /*  swap back if |a0|<|a1| */
+       sub             0,a0,a1
+       movb,tr,n       t0,a0,LREF(l2)  /* 10th inst.  */
+
+LSYM(l0)       r__r_t0                         /* add in this partial product */
+LSYM(l1)       a0__256a0                       /* a0 <<= 8 ****************** */
+LSYM(l2)       zdep            a1,30,8,t0      /* t0 = (a1&0xff)<<1 ********* */
+LSYM(l3)       blr             t0,0            /* case on these 8 bits ****** */
+               extru           a1,23,24,a1     /* a1 >>= 8 ****************** */
+
+/*16 insts before this.  */
+/*                       a0 <<= 8 ************************** */
+LSYM(x0)       a1_ne_0_b_l2    ! a0__256a0     ! MILLIRETN     ! nop
+LSYM(x1)       a1_ne_0_b_l1    ! r__r_a0       ! MILLIRETN     ! nop
+LSYM(x2)       a1_ne_0_b_l1    ! r__r_2a0      ! MILLIRETN     ! nop
+LSYM(x3)       a1_ne_0_b_l0    ! t0__3a0       ! MILLIRET      ! r__r_t0
+LSYM(x4)       a1_ne_0_b_l1    ! r__r_4a0      ! MILLIRETN     ! nop
+LSYM(x5)       a1_ne_0_b_l0    ! t0__5a0       ! MILLIRET      ! r__r_t0
+LSYM(x6)       t0__3a0         ! a1_ne_0_b_l1  ! r__r_2t0      ! MILLIRETN
+LSYM(x7)       t0__3a0         ! a1_ne_0_b_l0  ! r__r_4a0      ! b_n_ret_t0
+LSYM(x8)       a1_ne_0_b_l1    ! r__r_8a0      ! MILLIRETN     ! nop
+LSYM(x9)       a1_ne_0_b_l0    ! t0__9a0       ! MILLIRET      ! r__r_t0
+LSYM(x10)      t0__5a0         ! a1_ne_0_b_l1  ! r__r_2t0      ! MILLIRETN
+LSYM(x11)      t0__3a0         ! a1_ne_0_b_l0  ! r__r_8a0      ! b_n_ret_t0
+LSYM(x12)      t0__3a0         ! a1_ne_0_b_l1  ! r__r_4t0      ! MILLIRETN
+LSYM(x13)      t0__5a0         ! a1_ne_0_b_l0  ! r__r_8a0      ! b_n_ret_t0
+LSYM(x14)      t0__3a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_2t0
+LSYM(x15)      t0__5a0         ! a1_ne_0_b_l0  ! t0__3t0       ! b_n_ret_t0
+LSYM(x16)      t0__16a0        ! a1_ne_0_b_l1  ! r__r_t0       ! MILLIRETN
+LSYM(x17)      t0__9a0         ! a1_ne_0_b_l0  ! t0__t0_8a0    ! b_n_ret_t0
+LSYM(x18)      t0__9a0         ! a1_ne_0_b_l1  ! r__r_2t0      ! MILLIRETN
+LSYM(x19)      t0__9a0         ! a1_ne_0_b_l0  ! t0__2t0_a0    ! b_n_ret_t0
+LSYM(x20)      t0__5a0         ! a1_ne_0_b_l1  ! r__r_4t0      ! MILLIRETN
+LSYM(x21)      t0__5a0         ! a1_ne_0_b_l0  ! t0__4t0_a0    ! b_n_ret_t0
+LSYM(x22)      t0__5a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_2t0
+LSYM(x23)      t0__5a0         ! t0__2t0_a0    ! b_e_t0        ! t0__2t0_a0
+LSYM(x24)      t0__3a0         ! a1_ne_0_b_l1  ! r__r_8t0      ! MILLIRETN
+LSYM(x25)      t0__5a0         ! a1_ne_0_b_l0  ! t0__5t0       ! b_n_ret_t0
+LSYM(x26)      t0__3a0         ! t0__4t0_a0    ! b_e_shift     ! r__r_2t0
+LSYM(x27)      t0__3a0         ! a1_ne_0_b_l0  ! t0__9t0       ! b_n_ret_t0
+LSYM(x28)      t0__3a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_4t0
+LSYM(x29)      t0__3a0         ! t0__2t0_a0    ! b_e_t0        ! t0__4t0_a0
+LSYM(x30)      t0__5a0         ! t0__3t0       ! b_e_shift     ! r__r_2t0
+LSYM(x31)      t0__32a0        ! a1_ne_0_b_l0  ! t0__t0ma0     ! b_n_ret_t0
+LSYM(x32)      t0__32a0        ! a1_ne_0_b_l1  ! r__r_t0       ! MILLIRETN
+LSYM(x33)      t0__8a0         ! a1_ne_0_b_l0  ! t0__4t0_a0    ! b_n_ret_t0
+LSYM(x34)      t0__16a0        ! t0__t0_a0     ! b_e_shift     ! r__r_2t0
+LSYM(x35)      t0__9a0         ! t0__3t0       ! b_e_t0        ! t0__t0_8a0
+LSYM(x36)      t0__9a0         ! a1_ne_0_b_l1  ! r__r_4t0      ! MILLIRETN
+LSYM(x37)      t0__9a0         ! a1_ne_0_b_l0  ! t0__4t0_a0    ! b_n_ret_t0
+LSYM(x38)      t0__9a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_2t0
+LSYM(x39)      t0__9a0         ! t0__2t0_a0    ! b_e_t0        ! t0__2t0_a0
+LSYM(x40)      t0__5a0         ! a1_ne_0_b_l1  ! r__r_8t0      ! MILLIRETN
+LSYM(x41)      t0__5a0         ! a1_ne_0_b_l0  ! t0__8t0_a0    ! b_n_ret_t0
+LSYM(x42)      t0__5a0         ! t0__4t0_a0    ! b_e_shift     ! r__r_2t0
+LSYM(x43)      t0__5a0         ! t0__4t0_a0    ! b_e_t0        ! t0__2t0_a0
+LSYM(x44)      t0__5a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_4t0
+LSYM(x45)      t0__9a0         ! a1_ne_0_b_l0  ! t0__5t0       ! b_n_ret_t0
+LSYM(x46)      t0__9a0         ! t0__5t0       ! b_e_t0        ! t0__t0_a0
+LSYM(x47)      t0__9a0         ! t0__5t0       ! b_e_t0        ! t0__t0_2a0
+LSYM(x48)      t0__3a0         ! a1_ne_0_b_l0  ! t0__16t0      ! b_n_ret_t0
+LSYM(x49)      t0__9a0         ! t0__5t0       ! b_e_t0        ! t0__t0_4a0
+LSYM(x50)      t0__5a0         ! t0__5t0       ! b_e_shift     ! r__r_2t0
+LSYM(x51)      t0__9a0         ! t0__t0_8a0    ! b_e_t0        ! t0__3t0
+LSYM(x52)      t0__3a0         ! t0__4t0_a0    ! b_e_shift     ! r__r_4t0
+LSYM(x53)      t0__3a0         ! t0__4t0_a0    ! b_e_t0        ! t0__4t0_a0
+LSYM(x54)      t0__9a0         ! t0__3t0       ! b_e_shift     ! r__r_2t0
+LSYM(x55)      t0__9a0         ! t0__3t0       ! b_e_t0        ! t0__2t0_a0
+LSYM(x56)      t0__3a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_8t0
+LSYM(x57)      t0__9a0         ! t0__2t0_a0    ! b_e_t0        ! t0__3t0
+LSYM(x58)      t0__3a0         ! t0__2t0_a0    ! b_e_2t0       ! t0__4t0_a0
+LSYM(x59)      t0__9a0         ! t0__2t0_a0    ! b_e_t02a0     ! t0__3t0
+LSYM(x60)      t0__5a0         ! t0__3t0       ! b_e_shift     ! r__r_4t0
+LSYM(x61)      t0__5a0         ! t0__3t0       ! b_e_t0        ! t0__4t0_a0
+LSYM(x62)      t0__32a0        ! t0__t0ma0     ! b_e_shift     ! r__r_2t0
+LSYM(x63)      t0__64a0        ! a1_ne_0_b_l0  ! t0__t0ma0     ! b_n_ret_t0
+LSYM(x64)      t0__64a0        ! a1_ne_0_b_l1  ! r__r_t0       ! MILLIRETN
+LSYM(x65)      t0__8a0         ! a1_ne_0_b_l0  ! t0__8t0_a0    ! b_n_ret_t0
+LSYM(x66)      t0__32a0        ! t0__t0_a0     ! b_e_shift     ! r__r_2t0
+LSYM(x67)      t0__8a0         ! t0__4t0_a0    ! b_e_t0        ! t0__2t0_a0
+LSYM(x68)      t0__8a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_4t0
+LSYM(x69)      t0__8a0         ! t0__2t0_a0    ! b_e_t0        ! t0__4t0_a0
+LSYM(x70)      t0__64a0        ! t0__t0_4a0    ! b_e_t0        ! t0__t0_2a0
+LSYM(x71)      t0__9a0         ! t0__8t0       ! b_e_t0        ! t0__t0ma0
+LSYM(x72)      t0__9a0         ! a1_ne_0_b_l1  ! r__r_8t0      ! MILLIRETN
+LSYM(x73)      t0__9a0         ! t0__8t0_a0    ! b_e_shift     ! r__r_t0
+LSYM(x74)      t0__9a0         ! t0__4t0_a0    ! b_e_shift     ! r__r_2t0
+LSYM(x75)      t0__9a0         ! t0__4t0_a0    ! b_e_t0        ! t0__2t0_a0
+LSYM(x76)      t0__9a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_4t0
+LSYM(x77)      t0__9a0         ! t0__2t0_a0    ! b_e_t0        ! t0__4t0_a0
+LSYM(x78)      t0__9a0         ! t0__2t0_a0    ! b_e_2t0       ! t0__2t0_a0
+LSYM(x79)      t0__16a0        ! t0__5t0       ! b_e_t0        ! t0__t0ma0
+LSYM(x80)      t0__16a0        ! t0__5t0       ! b_e_shift     ! r__r_t0
+LSYM(x81)      t0__9a0         ! t0__9t0       ! b_e_shift     ! r__r_t0
+LSYM(x82)      t0__5a0         ! t0__8t0_a0    ! b_e_shift     ! r__r_2t0
+LSYM(x83)      t0__5a0         ! t0__8t0_a0    ! b_e_t0        ! t0__2t0_a0
+LSYM(x84)      t0__5a0         ! t0__4t0_a0    ! b_e_shift     ! r__r_4t0
+LSYM(x85)      t0__8a0         ! t0__2t0_a0    ! b_e_t0        ! t0__5t0
+LSYM(x86)      t0__5a0         ! t0__4t0_a0    ! b_e_2t0       ! t0__2t0_a0
+LSYM(x87)      t0__9a0         ! t0__9t0       ! b_e_t02a0     ! t0__t0_4a0
+LSYM(x88)      t0__5a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_8t0
+LSYM(x89)      t0__5a0         ! t0__2t0_a0    ! b_e_t0        ! t0__8t0_a0
+LSYM(x90)      t0__9a0         ! t0__5t0       ! b_e_shift     ! r__r_2t0
+LSYM(x91)      t0__9a0         ! t0__5t0       ! b_e_t0        ! t0__2t0_a0
+LSYM(x92)      t0__5a0         ! t0__2t0_a0    ! b_e_4t0       ! t0__2t0_a0
+LSYM(x93)      t0__32a0        ! t0__t0ma0     ! b_e_t0        ! t0__3t0
+LSYM(x94)      t0__9a0         ! t0__5t0       ! b_e_2t0       ! t0__t0_2a0
+LSYM(x95)      t0__9a0         ! t0__2t0_a0    ! b_e_t0        ! t0__5t0
+LSYM(x96)      t0__8a0         ! t0__3t0       ! b_e_shift     ! r__r_4t0
+LSYM(x97)      t0__8a0         ! t0__3t0       ! b_e_t0        ! t0__4t0_a0
+LSYM(x98)      t0__32a0        ! t0__3t0       ! b_e_t0        ! t0__t0_2a0
+LSYM(x99)      t0__8a0         ! t0__4t0_a0    ! b_e_t0        ! t0__3t0
+LSYM(x100)     t0__5a0         ! t0__5t0       ! b_e_shift     ! r__r_4t0
+LSYM(x101)     t0__5a0         ! t0__5t0       ! b_e_t0        ! t0__4t0_a0
+LSYM(x102)     t0__32a0        ! t0__t0_2a0    ! b_e_t0        ! t0__3t0
+LSYM(x103)     t0__5a0         ! t0__5t0       ! b_e_t02a0     ! t0__4t0_a0
+LSYM(x104)     t0__3a0         ! t0__4t0_a0    ! b_e_shift     ! r__r_8t0
+LSYM(x105)     t0__5a0         ! t0__4t0_a0    ! b_e_t0        ! t0__5t0
+LSYM(x106)     t0__3a0         ! t0__4t0_a0    ! b_e_2t0       ! t0__4t0_a0
+LSYM(x107)     t0__9a0         ! t0__t0_4a0    ! b_e_t02a0     ! t0__8t0_a0
+LSYM(x108)     t0__9a0         ! t0__3t0       ! b_e_shift     ! r__r_4t0
+LSYM(x109)     t0__9a0         ! t0__3t0       ! b_e_t0        ! t0__4t0_a0
+LSYM(x110)     t0__9a0         ! t0__3t0       ! b_e_2t0       ! t0__2t0_a0
+LSYM(x111)     t0__9a0         ! t0__4t0_a0    ! b_e_t0        ! t0__3t0
+LSYM(x112)     t0__3a0         ! t0__2t0_a0    ! b_e_t0        ! t0__16t0
+LSYM(x113)     t0__9a0         ! t0__4t0_a0    ! b_e_t02a0     ! t0__3t0
+LSYM(x114)     t0__9a0         ! t0__2t0_a0    ! b_e_2t0       ! t0__3t0
+LSYM(x115)     t0__9a0         ! t0__2t0_a0    ! b_e_2t0a0     ! t0__3t0
+LSYM(x116)     t0__3a0         ! t0__2t0_a0    ! b_e_4t0       ! t0__4t0_a0
+LSYM(x117)     t0__3a0         ! t0__4t0_a0    ! b_e_t0        ! t0__9t0
+LSYM(x118)     t0__3a0         ! t0__4t0_a0    ! b_e_t0a0      ! t0__9t0
+LSYM(x119)     t0__3a0         ! t0__4t0_a0    ! b_e_t02a0     ! t0__9t0
+LSYM(x120)     t0__5a0         ! t0__3t0       ! b_e_shift     ! r__r_8t0
+LSYM(x121)     t0__5a0         ! t0__3t0       ! b_e_t0        ! t0__8t0_a0
+LSYM(x122)     t0__5a0         ! t0__3t0       ! b_e_2t0       ! t0__4t0_a0
+LSYM(x123)     t0__5a0         ! t0__8t0_a0    ! b_e_t0        ! t0__3t0
+LSYM(x124)     t0__32a0        ! t0__t0ma0     ! b_e_shift     ! r__r_4t0
+LSYM(x125)     t0__5a0         ! t0__5t0       ! b_e_t0        ! t0__5t0
+LSYM(x126)     t0__64a0        ! t0__t0ma0     ! b_e_shift     ! r__r_2t0
+LSYM(x127)     t0__128a0       ! a1_ne_0_b_l0  ! t0__t0ma0     ! b_n_ret_t0
+LSYM(x128)     t0__128a0       ! a1_ne_0_b_l1  ! r__r_t0       ! MILLIRETN
+LSYM(x129)     t0__128a0       ! a1_ne_0_b_l0  ! t0__t0_a0     ! b_n_ret_t0
+LSYM(x130)     t0__64a0        ! t0__t0_a0     ! b_e_shift     ! r__r_2t0
+LSYM(x131)     t0__8a0         ! t0__8t0_a0    ! b_e_t0        ! t0__2t0_a0
+LSYM(x132)     t0__8a0         ! t0__4t0_a0    ! b_e_shift     ! r__r_4t0
+LSYM(x133)     t0__8a0         ! t0__4t0_a0    ! b_e_t0        ! t0__4t0_a0
+LSYM(x134)     t0__8a0         ! t0__4t0_a0    ! b_e_2t0       ! t0__2t0_a0
+LSYM(x135)     t0__9a0         ! t0__5t0       ! b_e_t0        ! t0__3t0
+LSYM(x136)     t0__8a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_8t0
+LSYM(x137)     t0__8a0         ! t0__2t0_a0    ! b_e_t0        ! t0__8t0_a0
+LSYM(x138)     t0__8a0         ! t0__2t0_a0    ! b_e_2t0       ! t0__4t0_a0
+LSYM(x139)     t0__8a0         ! t0__2t0_a0    ! b_e_2t0a0     ! t0__4t0_a0
+LSYM(x140)     t0__3a0         ! t0__2t0_a0    ! b_e_4t0       ! t0__5t0
+LSYM(x141)     t0__8a0         ! t0__2t0_a0    ! b_e_4t0a0     ! t0__2t0_a0
+LSYM(x142)     t0__9a0         ! t0__8t0       ! b_e_2t0       ! t0__t0ma0
+LSYM(x143)     t0__16a0        ! t0__9t0       ! b_e_t0        ! t0__t0ma0
+LSYM(x144)     t0__9a0         ! t0__8t0       ! b_e_shift     ! r__r_2t0
+LSYM(x145)     t0__9a0         ! t0__8t0       ! b_e_t0        ! t0__2t0_a0
+LSYM(x146)     t0__9a0         ! t0__8t0_a0    ! b_e_shift     ! r__r_2t0
+LSYM(x147)     t0__9a0         ! t0__8t0_a0    ! b_e_t0        ! t0__2t0_a0
+LSYM(x148)     t0__9a0         ! t0__4t0_a0    ! b_e_shift     ! r__r_4t0
+LSYM(x149)     t0__9a0         ! t0__4t0_a0    ! b_e_t0        ! t0__4t0_a0
+LSYM(x150)     t0__9a0         ! t0__4t0_a0    ! b_e_2t0       ! t0__2t0_a0
+LSYM(x151)     t0__9a0         ! t0__4t0_a0    ! b_e_2t0a0     ! t0__2t0_a0
+LSYM(x152)     t0__9a0         ! t0__2t0_a0    ! b_e_shift     ! r__r_8t0
+LSYM(x153)     t0__9a0         ! t0__2t0_a0    ! b_e_t0        ! t0__8t0_a0
+LSYM(x154)     t0__9a0         ! t0__2t0_a0    ! b_e_2t0       ! t0__4t0_a0
+LSYM(x155)     t0__32a0        ! t0__t0ma0     ! b_e_t0        ! t0__5t0
+LSYM(x156)     t0__9a0         ! t0__2t0_a0    ! b_e_4t0       ! t0__2t0_a0
+LSYM(x157)     t0__32a0        ! t0__t0ma0     ! b_e_t02a0     ! t0__5t0
+LSYM(x158)     t0__16a0        ! t0__5t0       ! b_e_2t0       ! t0__t0ma0
+LSYM(x159)     t0__32a0        ! t0__5t0       ! b_e_t0        ! t0__t0ma0
+LSYM(x160)     t0__5a0         ! t0__4t0       ! b_e_shift     ! r__r_8t0
+LSYM(x161)     t0__8a0         ! t0__5t0       ! b_e_t0        ! t0__4t0_a0
+LSYM(x162)     t0__9a0         ! t0__9t0       ! b_e_shift     ! r__r_2t0
+LSYM(x163)     t0__9a0         ! t0__9t0       ! b_e_t0        ! t0__2t0_a0
+LSYM(x164)     t0__5a0         ! t0__8t0_a0    ! b_e_shift     ! r__r_4t0
+LSYM(x165)     t0__8a0         ! t0__4t0_a0    ! b_e_t0        ! t0__5t0
+LSYM(x166)     t0__5a0         ! t0__8t0_a0    ! b_e_2t0       ! t0__2t0_a0
+LSYM(x167)     t0__5a0         ! t0__8t0_a0    ! b_e_2t0a0     ! t0__2t0_a0
+LSYM(x168)     t0__5a0         ! t0__4t0_a0    ! b_e_shift     ! r__r_8t0
+LSYM(x169)     t0__5a0         ! t0__4t0_a0    ! b_e_t0        ! t0__8t0_a0
+LSYM(x170)     t0__32a0        ! t0__t0_2a0    ! b_e_t0        ! t0__5t0
+LSYM(x171)     t0__9a0         ! t0__2t0_a0    ! b_e_t0        ! t0__9t0
+LSYM(x172)     t0__5a0         ! t0__4t0_a0    ! b_e_4t0       ! t0__2t0_a0
+LSYM(x173)     t0__9a0         ! t0__2t0_a0    ! b_e_t02a0     ! t0__9t0
+LSYM(x174)     t0__32a0        ! t0__t0_2a0    ! b_e_t04a0     ! t0__5t0
+LSYM(x175)     t0__8a0         ! t0__2t0_a0    ! b_e_5t0       ! t0__2t0_a0
+LSYM(x176)     t0__5a0         ! t0__4t0_a0    ! b_e_8t0       ! t0__t0_a0
+LSYM(x177)     t0__5a0         ! t0__4t0_a0    ! b_e_8t0a0     ! t0__t0_a0
+LSYM(x178)     t0__5a0         ! t0__2t0_a0    ! b_e_2t0       ! t0__8t0_a0
+LSYM(x179)     t0__5a0         ! t0__2t0_a0    ! b_e_2t0a0     ! t0__8t0_a0
+LSYM(x180)     t0__9a0         ! t0__5t0       ! b_e_shift     ! r__r_4t0
+LSYM(x181)     t0__9a0         ! t0__5t0       ! b_e_t0        ! t0__4t0_a0
+LSYM(x182)     t0__9a0         ! t0__5t0       ! b_e_2t0       ! t0__2t0_a0
+LSYM(x183)     t0__9a0         ! t0__5t0       ! b_e_2t0a0     ! t0__2t0_a0
+LSYM(x184)     t0__5a0         ! t0__9t0       ! b_e_4t0       ! t0__t0_a0
+LSYM(x185)     t0__9a0         ! t0__4t0_a0    ! b_e_t0        ! t0__5t0
+LSYM(x186)     t0__32a0        ! t0__t0ma0     ! b_e_2t0       ! t0__3t0
+LSYM(x187)     t0__9a0         ! t0__4t0_a0    ! b_e_t02a0     ! t0__5t0
+LSYM(x188)     t0__9a0         ! t0__5t0       ! b_e_4t0       ! t0__t0_2a0
+LSYM(x189)     t0__5a0         ! t0__4t0_a0    ! b_e_t0        ! t0__9t0
+LSYM(x190)     t0__9a0         ! t0__2t0_a0    ! b_e_2t0       ! t0__5t0
+LSYM(x191)     t0__64a0        ! t0__3t0       ! b_e_t0        ! t0__t0ma0
+LSYM(x192)     t0__8a0         ! t0__3t0       ! b_e_shift     ! r__r_8t0
+LSYM(x193)     t0__8a0         ! t0__3t0       ! b_e_t0        ! t0__8t0_a0
+LSYM(x194)     t0__8a0         ! t0__3t0       ! b_e_2t0       ! t0__4t0_a0
+LSYM(x195)     t0__8a0         ! t0__8t0_a0    ! b_e_t0        ! t0__3t0
+LSYM(x196)     t0__8a0         ! t0__3t0       ! b_e_4t0       ! t0__2t0_a0
+LSYM(x197)     t0__8a0         ! t0__3t0       ! b_e_4t0a0     ! t0__2t0_a0
+LSYM(x198)     t0__64a0        ! t0__t0_2a0    ! b_e_t0        ! t0__3t0
+LSYM(x199)     t0__8a0         ! t0__4t0_a0    ! b_e_2t0a0     ! t0__3t0
+LSYM(x200)     t0__5a0         ! t0__5t0       ! b_e_shift     ! r__r_8t0
+LSYM(x201)     t0__5a0         ! t0__5t0       ! b_e_t0        ! t0__8t0_a0
+LSYM(x202)     t0__5a0         ! t0__5t0       ! b_e_2t0       ! t0__4t0_a0
+LSYM(x203)     t0__5a0         ! t0__5t0       ! b_e_2t0a0     ! t0__4t0_a0
+LSYM(x204)     t0__8a0         ! t0__2t0_a0    ! b_e_4t0       ! t0__3t0
+LSYM(x205)     t0__5a0         ! t0__8t0_a0    ! b_e_t0        ! t0__5t0
+LSYM(x206)     t0__64a0        ! t0__t0_4a0    ! b_e_t02a0     ! t0__3t0
+LSYM(x207)     t0__8a0         ! t0__2t0_a0    ! b_e_3t0       ! t0__4t0_a0
+LSYM(x208)     t0__5a0         ! t0__5t0       ! b_e_8t0       ! t0__t0_a0
+LSYM(x209)     t0__5a0         ! t0__5t0       ! b_e_8t0a0     ! t0__t0_a0
+LSYM(x210)     t0__5a0         ! t0__4t0_a0    ! b_e_2t0       ! t0__5t0
+LSYM(x211)     t0__5a0         ! t0__4t0_a0    ! b_e_2t0a0     ! t0__5t0
+LSYM(x212)     t0__3a0         ! t0__4t0_a0    ! b_e_4t0       ! t0__4t0_a0
+LSYM(x213)     t0__3a0         ! t0__4t0_a0    ! b_e_4t0a0     ! t0__4t0_a0
+LSYM(x214)     t0__9a0         ! t0__t0_4a0    ! b_e_2t04a0    ! t0__8t0_a0
+LSYM(x215)     t0__5a0         ! t0__4t0_a0    ! b_e_5t0       ! t0__2t0_a0
+LSYM(x216)     t0__9a0         ! t0__3t0       ! b_e_shift     ! r__r_8t0
+LSYM(x217)     t0__9a0         ! t0__3t0       ! b_e_t0        ! t0__8t0_a0
+LSYM(x218)     t0__9a0         ! t0__3t0       ! b_e_2t0       ! t0__4t0_a0
+LSYM(x219)     t0__9a0         ! t0__8t0_a0    ! b_e_t0        ! t0__3t0
+LSYM(x220)     t0__3a0         ! t0__9t0       ! b_e_4t0       ! t0__2t0_a0
+LSYM(x221)     t0__3a0         ! t0__9t0       ! b_e_4t0a0     ! t0__2t0_a0
+LSYM(x222)     t0__9a0         ! t0__4t0_a0    ! b_e_2t0       ! t0__3t0
+LSYM(x223)     t0__9a0         ! t0__4t0_a0    ! b_e_2t0a0     ! t0__3t0
+LSYM(x224)     t0__9a0         ! t0__3t0       ! b_e_8t0       ! t0__t0_a0
+LSYM(x225)     t0__9a0         ! t0__5t0       ! b_e_t0        ! t0__5t0
+LSYM(x226)     t0__3a0         ! t0__2t0_a0    ! b_e_t02a0     ! t0__32t0
+LSYM(x227)     t0__9a0         ! t0__5t0       ! b_e_t02a0     ! t0__5t0
+LSYM(x228)     t0__9a0         ! t0__2t0_a0    ! b_e_4t0       ! t0__3t0
+LSYM(x229)     t0__9a0         ! t0__2t0_a0    ! b_e_4t0a0     ! t0__3t0
+LSYM(x230)     t0__9a0         ! t0__5t0       ! b_e_5t0       ! t0__t0_a0
+LSYM(x231)     t0__9a0         ! t0__2t0_a0    ! b_e_3t0       ! t0__4t0_a0
+LSYM(x232)     t0__3a0         ! t0__2t0_a0    ! b_e_8t0       ! t0__4t0_a0
+LSYM(x233)     t0__3a0         ! t0__2t0_a0    ! b_e_8t0a0     ! t0__4t0_a0
+LSYM(x234)     t0__3a0         ! t0__4t0_a0    ! b_e_2t0       ! t0__9t0
+LSYM(x235)     t0__3a0         ! t0__4t0_a0    ! b_e_2t0a0     ! t0__9t0
+LSYM(x236)     t0__9a0         ! t0__2t0_a0    ! b_e_4t08a0    ! t0__3t0
+LSYM(x237)     t0__16a0        ! t0__5t0       ! b_e_3t0       ! t0__t0ma0
+LSYM(x238)     t0__3a0         ! t0__4t0_a0    ! b_e_2t04a0    ! t0__9t0
+LSYM(x239)     t0__16a0        ! t0__5t0       ! b_e_t0ma0     ! t0__3t0
+LSYM(x240)     t0__9a0         ! t0__t0_a0     ! b_e_8t0       ! t0__3t0
+LSYM(x241)     t0__9a0         ! t0__t0_a0     ! b_e_8t0a0     ! t0__3t0
+LSYM(x242)     t0__5a0         ! t0__3t0       ! b_e_2t0       ! t0__8t0_a0
+LSYM(x243)     t0__9a0         ! t0__9t0       ! b_e_t0        ! t0__3t0
+LSYM(x244)     t0__5a0         ! t0__3t0       ! b_e_4t0       ! t0__4t0_a0
+LSYM(x245)     t0__8a0         ! t0__3t0       ! b_e_5t0       ! t0__2t0_a0
+LSYM(x246)     t0__5a0         ! t0__8t0_a0    ! b_e_2t0       ! t0__3t0
+LSYM(x247)     t0__5a0         ! t0__8t0_a0    ! b_e_2t0a0     ! t0__3t0
+LSYM(x248)     t0__32a0        ! t0__t0ma0     ! b_e_shift     ! r__r_8t0
+LSYM(x249)     t0__32a0        ! t0__t0ma0     ! b_e_t0        ! t0__8t0_a0
+LSYM(x250)     t0__5a0         ! t0__5t0       ! b_e_2t0       ! t0__5t0
+LSYM(x251)     t0__5a0         ! t0__5t0       ! b_e_2t0a0     ! t0__5t0
+LSYM(x252)     t0__64a0        ! t0__t0ma0     ! b_e_shift     ! r__r_4t0
+LSYM(x253)     t0__64a0        ! t0__t0ma0     ! b_e_t0        ! t0__4t0_a0
+LSYM(x254)     t0__128a0       ! t0__t0ma0     ! b_e_shift     ! r__r_2t0
+LSYM(x255)     t0__256a0       ! a1_ne_0_b_l0  ! t0__t0ma0     ! b_n_ret_t0
+/*1040 insts before this.  */
+LSYM(ret_t0)   MILLIRET
+LSYM(e_t0)     r__r_t0
+LSYM(e_shift)  a1_ne_0_b_l2
+       a0__256a0       /* a0 <<= 8 *********** */
+       MILLIRETN
+LSYM(e_t0ma0)  a1_ne_0_b_l0
+       t0__t0ma0
+       MILLIRET
+       r__r_t0
+LSYM(e_t0a0)   a1_ne_0_b_l0
+       t0__t0_a0
+       MILLIRET
+       r__r_t0
+LSYM(e_t02a0)  a1_ne_0_b_l0
+       t0__t0_2a0
+       MILLIRET
+       r__r_t0
+LSYM(e_t04a0)  a1_ne_0_b_l0
+       t0__t0_4a0
+       MILLIRET
+       r__r_t0
+LSYM(e_2t0)    a1_ne_0_b_l1
+       r__r_2t0
+       MILLIRETN
+LSYM(e_2t0a0)  a1_ne_0_b_l0
+       t0__2t0_a0
+       MILLIRET
+       r__r_t0
+LSYM(e2t04a0)  t0__t0_2a0
+       a1_ne_0_b_l1
+       r__r_2t0
+       MILLIRETN
+LSYM(e_3t0)    a1_ne_0_b_l0
+       t0__3t0
+       MILLIRET
+       r__r_t0
+LSYM(e_4t0)    a1_ne_0_b_l1
+       r__r_4t0
+       MILLIRETN
+LSYM(e_4t0a0)  a1_ne_0_b_l0
+       t0__4t0_a0
+       MILLIRET
+       r__r_t0
+LSYM(e4t08a0)  t0__t0_2a0
+       a1_ne_0_b_l1
+       r__r_4t0
+       MILLIRETN
+LSYM(e_5t0)    a1_ne_0_b_l0
+       t0__5t0
+       MILLIRET
+       r__r_t0
+LSYM(e_8t0)    a1_ne_0_b_l1
+       r__r_8t0
+       MILLIRETN
+LSYM(e_8t0a0)  a1_ne_0_b_l0
+       t0__8t0_a0
+       MILLIRET
+       r__r_t0
+
+       .procend
+       .end
+#endif
diff --git a/libgcc/config/pa/t-linux b/libgcc/config/pa/t-linux

new file mode 100644 (file)

index 0000000..d396bf7
--- /dev/null
+++ b/libgcc/config/pa/t-linux
@@ -0,0 +1,6 @@
+#Plug millicode routines into libgcc.a  We want these on both native and
+#cross compiles.  We use the "64-bit" routines because the "32-bit" code
+#is broken for certain corner cases.
+
+LIB1ASMSRC = pa/milli64.S
+LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI _dyncall
diff --git a/libgcc/config/pa/t-linux64 b/libgcc/config/pa/t-linux64

new file mode 100644 (file)

index 0000000..6cb9806
--- /dev/null
+++ b/libgcc/config/pa/t-linux64
@@ -0,0 +1,4 @@
+# Plug millicode routines into libgcc.a  We want these on both native and
+# cross compiles.
+# FIXME: Explain.
+LIB1ASMFUNCS := $(filter-out _dyncall, $(LIB1ASMFUNCS))
diff --git a/libgcc/config/picochip/lib1funcs.S b/libgcc/config/picochip/lib1funcs.S

new file mode 100644 (file)

index 0000000..d344170
--- /dev/null
+++ b/libgcc/config/picochip/lib1funcs.S
@@ -0,0 +1,4 @@
+// picoChip ASM file
+// Fake libgcc asm file. This contains nothing, but is used to prevent gcc
+// getting upset about the lack of a lib1funcs.S file when LIB1ASMFUNCS is
+// defined to switch off the compilation of parts of libgcc.
diff --git a/libgcc/config/picochip/t-picochip b/libgcc/config/picochip/t-picochip

index 5135d500cbb8984ed7f10405eac568b3b196d649..a596ec989472cbbf9d91073981a6d0bca8040194 100644 (file)
--- a/libgcc/config/picochip/t-picochip
+++ b/libgcc/config/picochip/t-picochip
@@ -1,2 +1,9 @@
+# Prevent some of the more complicated libgcc functions from being
+# compiled.  This is because they are generally too big to fit into an
+# AE anyway, so there is no point in having them.  Also, some don't
+# compile properly so we'll ignore them for the moment.
+LIB1ASMSRC = picochip/lib1funcs.S
+LIB1ASMFUNCS = _mulsc3 _divsc3
+
  # Turn off the building of exception handling libraries.
  LIB2ADDEH =
diff --git a/libgcc/config/sh/lib1funcs.S b/libgcc/config/sh/lib1funcs.S

new file mode 100644 (file)

index 0000000..2f0ca16
--- /dev/null
+++ b/libgcc/config/sh/lib1funcs.S
@@ -0,0 +1,3933 @@
+/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+   2004, 2005, 2006, 2009
+   Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+!! libgcc routines for the Renesas / SuperH SH CPUs.
+!! Contributed by Steve Chamberlain.
+!! sac@cygnus.com
+
+!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
+!! recoded in assembly by Toshiyasu Morita
+!! tm@netcom.com
+
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+.previous
+#endif
+
+/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
+   ELF local label prefixes by J"orn Rennecke
+   amylaar@cygnus.com  */
+
+#include "lib1funcs.h"
+
+/* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
+   so it is more convenient to define NO_FPSCR_VALUES here than to
+   define it on the command line.  */
+#if defined __vxworks && defined __PIC__
+#define NO_FPSCR_VALUES
+#endif
+       
+#if ! __SH5__
+#ifdef L_ashiftrt
+       .global GLOBAL(ashiftrt_r4_0)
+       .global GLOBAL(ashiftrt_r4_1)
+       .global GLOBAL(ashiftrt_r4_2)
+       .global GLOBAL(ashiftrt_r4_3)
+       .global GLOBAL(ashiftrt_r4_4)
+       .global GLOBAL(ashiftrt_r4_5)
+       .global GLOBAL(ashiftrt_r4_6)
+       .global GLOBAL(ashiftrt_r4_7)
+       .global GLOBAL(ashiftrt_r4_8)
+       .global GLOBAL(ashiftrt_r4_9)
+       .global GLOBAL(ashiftrt_r4_10)
+       .global GLOBAL(ashiftrt_r4_11)
+       .global GLOBAL(ashiftrt_r4_12)
+       .global GLOBAL(ashiftrt_r4_13)
+       .global GLOBAL(ashiftrt_r4_14)
+       .global GLOBAL(ashiftrt_r4_15)
+       .global GLOBAL(ashiftrt_r4_16)
+       .global GLOBAL(ashiftrt_r4_17)
+       .global GLOBAL(ashiftrt_r4_18)
+       .global GLOBAL(ashiftrt_r4_19)
+       .global GLOBAL(ashiftrt_r4_20)
+       .global GLOBAL(ashiftrt_r4_21)
+       .global GLOBAL(ashiftrt_r4_22)
+       .global GLOBAL(ashiftrt_r4_23)
+       .global GLOBAL(ashiftrt_r4_24)
+       .global GLOBAL(ashiftrt_r4_25)
+       .global GLOBAL(ashiftrt_r4_26)
+       .global GLOBAL(ashiftrt_r4_27)
+       .global GLOBAL(ashiftrt_r4_28)
+       .global GLOBAL(ashiftrt_r4_29)
+       .global GLOBAL(ashiftrt_r4_30)
+       .global GLOBAL(ashiftrt_r4_31)
+       .global GLOBAL(ashiftrt_r4_32)
+
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
+       HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
+
+       .align  1
+GLOBAL(ashiftrt_r4_32):
+GLOBAL(ashiftrt_r4_31):
+       rotcl   r4
+       rts
+       subc    r4,r4
+
+GLOBAL(ashiftrt_r4_30):
+       shar    r4
+GLOBAL(ashiftrt_r4_29):
+       shar    r4
+GLOBAL(ashiftrt_r4_28):
+       shar    r4
+GLOBAL(ashiftrt_r4_27):
+       shar    r4
+GLOBAL(ashiftrt_r4_26):
+       shar    r4
+GLOBAL(ashiftrt_r4_25):
+       shar    r4
+GLOBAL(ashiftrt_r4_24):
+       shlr16  r4
+       shlr8   r4
+       rts
+       exts.b  r4,r4
+
+GLOBAL(ashiftrt_r4_23):
+       shar    r4
+GLOBAL(ashiftrt_r4_22):
+       shar    r4
+GLOBAL(ashiftrt_r4_21):
+       shar    r4
+GLOBAL(ashiftrt_r4_20):
+       shar    r4
+GLOBAL(ashiftrt_r4_19):
+       shar    r4
+GLOBAL(ashiftrt_r4_18):
+       shar    r4
+GLOBAL(ashiftrt_r4_17):
+       shar    r4
+GLOBAL(ashiftrt_r4_16):
+       shlr16  r4
+       rts
+       exts.w  r4,r4
+
+GLOBAL(ashiftrt_r4_15):
+       shar    r4
+GLOBAL(ashiftrt_r4_14):
+       shar    r4
+GLOBAL(ashiftrt_r4_13):
+       shar    r4
+GLOBAL(ashiftrt_r4_12):
+       shar    r4
+GLOBAL(ashiftrt_r4_11):
+       shar    r4
+GLOBAL(ashiftrt_r4_10):
+       shar    r4
+GLOBAL(ashiftrt_r4_9):
+       shar    r4
+GLOBAL(ashiftrt_r4_8):
+       shar    r4
+GLOBAL(ashiftrt_r4_7):
+       shar    r4
+GLOBAL(ashiftrt_r4_6):
+       shar    r4
+GLOBAL(ashiftrt_r4_5):
+       shar    r4
+GLOBAL(ashiftrt_r4_4):
+       shar    r4
+GLOBAL(ashiftrt_r4_3):
+       shar    r4
+GLOBAL(ashiftrt_r4_2):
+       shar    r4
+GLOBAL(ashiftrt_r4_1):
+       rts
+       shar    r4
+
+GLOBAL(ashiftrt_r4_0):
+       rts
+       nop
+
+       ENDFUNC(GLOBAL(ashiftrt_r4_0))
+       ENDFUNC(GLOBAL(ashiftrt_r4_1))
+       ENDFUNC(GLOBAL(ashiftrt_r4_2))
+       ENDFUNC(GLOBAL(ashiftrt_r4_3))
+       ENDFUNC(GLOBAL(ashiftrt_r4_4))
+       ENDFUNC(GLOBAL(ashiftrt_r4_5))
+       ENDFUNC(GLOBAL(ashiftrt_r4_6))
+       ENDFUNC(GLOBAL(ashiftrt_r4_7))
+       ENDFUNC(GLOBAL(ashiftrt_r4_8))
+       ENDFUNC(GLOBAL(ashiftrt_r4_9))
+       ENDFUNC(GLOBAL(ashiftrt_r4_10))
+       ENDFUNC(GLOBAL(ashiftrt_r4_11))
+       ENDFUNC(GLOBAL(ashiftrt_r4_12))
+       ENDFUNC(GLOBAL(ashiftrt_r4_13))
+       ENDFUNC(GLOBAL(ashiftrt_r4_14))
+       ENDFUNC(GLOBAL(ashiftrt_r4_15))
+       ENDFUNC(GLOBAL(ashiftrt_r4_16))
+       ENDFUNC(GLOBAL(ashiftrt_r4_17))
+       ENDFUNC(GLOBAL(ashiftrt_r4_18))
+       ENDFUNC(GLOBAL(ashiftrt_r4_19))
+       ENDFUNC(GLOBAL(ashiftrt_r4_20))
+       ENDFUNC(GLOBAL(ashiftrt_r4_21))
+       ENDFUNC(GLOBAL(ashiftrt_r4_22))
+       ENDFUNC(GLOBAL(ashiftrt_r4_23))
+       ENDFUNC(GLOBAL(ashiftrt_r4_24))
+       ENDFUNC(GLOBAL(ashiftrt_r4_25))
+       ENDFUNC(GLOBAL(ashiftrt_r4_26))
+       ENDFUNC(GLOBAL(ashiftrt_r4_27))
+       ENDFUNC(GLOBAL(ashiftrt_r4_28))
+       ENDFUNC(GLOBAL(ashiftrt_r4_29))
+       ENDFUNC(GLOBAL(ashiftrt_r4_30))
+       ENDFUNC(GLOBAL(ashiftrt_r4_31))
+       ENDFUNC(GLOBAL(ashiftrt_r4_32))
+#endif
+
+#ifdef L_ashiftrt_n
+
+!
+! GLOBAL(ashrsi3)
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! (none)
+!
+
+       .global GLOBAL(ashrsi3)
+       HIDDEN_FUNC(GLOBAL(ashrsi3))
+       .align  2
+GLOBAL(ashrsi3):
+       mov     #31,r0
+       and     r0,r5
+       mova    LOCAL(ashrsi3_table),r0
+       mov.b   @(r0,r5),r5
+#ifdef __sh1__
+       add     r5,r0
+       jmp     @r0
+#else
+       braf    r5
+#endif
+       mov     r4,r0
+
+       .align  2
+LOCAL(ashrsi3_table):
+       .byte           LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
+       .byte           LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
+
+LOCAL(ashrsi3_31):
+       rotcl   r0
+       rts
+       subc    r0,r0
+
+LOCAL(ashrsi3_30):
+       shar    r0
+LOCAL(ashrsi3_29):
+       shar    r0
+LOCAL(ashrsi3_28):
+       shar    r0
+LOCAL(ashrsi3_27):
+       shar    r0
+LOCAL(ashrsi3_26):
+       shar    r0
+LOCAL(ashrsi3_25):
+       shar    r0
+LOCAL(ashrsi3_24):
+       shlr16  r0
+       shlr8   r0
+       rts
+       exts.b  r0,r0
+
+LOCAL(ashrsi3_23):
+       shar    r0
+LOCAL(ashrsi3_22):
+       shar    r0
+LOCAL(ashrsi3_21):
+       shar    r0
+LOCAL(ashrsi3_20):
+       shar    r0
+LOCAL(ashrsi3_19):
+       shar    r0
+LOCAL(ashrsi3_18):
+       shar    r0
+LOCAL(ashrsi3_17):
+       shar    r0
+LOCAL(ashrsi3_16):
+       shlr16  r0
+       rts
+       exts.w  r0,r0
+
+LOCAL(ashrsi3_15):
+       shar    r0
+LOCAL(ashrsi3_14):
+       shar    r0
+LOCAL(ashrsi3_13):
+       shar    r0
+LOCAL(ashrsi3_12):
+       shar    r0
+LOCAL(ashrsi3_11):
+       shar    r0
+LOCAL(ashrsi3_10):
+       shar    r0
+LOCAL(ashrsi3_9):
+       shar    r0
+LOCAL(ashrsi3_8):
+       shar    r0
+LOCAL(ashrsi3_7):
+       shar    r0
+LOCAL(ashrsi3_6):
+       shar    r0
+LOCAL(ashrsi3_5):
+       shar    r0
+LOCAL(ashrsi3_4):
+       shar    r0
+LOCAL(ashrsi3_3):
+       shar    r0
+LOCAL(ashrsi3_2):
+       shar    r0
+LOCAL(ashrsi3_1):
+       rts
+       shar    r0
+
+LOCAL(ashrsi3_0):
+       rts
+       nop
+
+       ENDFUNC(GLOBAL(ashrsi3))
+#endif
+
+#ifdef L_ashiftlt
+
+!
+! GLOBAL(ashlsi3)
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! (none)
+!
+       .global GLOBAL(ashlsi3)
+       HIDDEN_FUNC(GLOBAL(ashlsi3))
+       .align  2
+GLOBAL(ashlsi3):
+       mov     #31,r0
+       and     r0,r5
+       mova    LOCAL(ashlsi3_table),r0
+       mov.b   @(r0,r5),r5
+#ifdef __sh1__
+       add     r5,r0
+       jmp     @r0
+#else
+       braf    r5
+#endif
+       mov     r4,r0
+
+       .align  2
+LOCAL(ashlsi3_table):
+       .byte           LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
+       .byte           LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
+
+LOCAL(ashlsi3_6):
+       shll2   r0
+LOCAL(ashlsi3_4):
+       shll2   r0
+LOCAL(ashlsi3_2):
+       rts
+       shll2   r0
+
+LOCAL(ashlsi3_7):
+       shll2   r0
+LOCAL(ashlsi3_5):
+       shll2   r0
+LOCAL(ashlsi3_3):
+       shll2   r0
+LOCAL(ashlsi3_1):
+       rts
+       shll    r0
+
+LOCAL(ashlsi3_14):
+       shll2   r0
+LOCAL(ashlsi3_12):
+       shll2   r0
+LOCAL(ashlsi3_10):
+       shll2   r0
+LOCAL(ashlsi3_8):
+       rts
+       shll8   r0
+
+LOCAL(ashlsi3_15):
+       shll2   r0
+LOCAL(ashlsi3_13):
+       shll2   r0
+LOCAL(ashlsi3_11):
+       shll2   r0
+LOCAL(ashlsi3_9):
+       shll8   r0
+       rts
+       shll    r0
+
+LOCAL(ashlsi3_22):
+       shll2   r0
+LOCAL(ashlsi3_20):
+       shll2   r0
+LOCAL(ashlsi3_18):
+       shll2   r0
+LOCAL(ashlsi3_16):
+       rts
+       shll16  r0
+
+LOCAL(ashlsi3_23):
+       shll2   r0
+LOCAL(ashlsi3_21):
+       shll2   r0
+LOCAL(ashlsi3_19):
+       shll2   r0
+LOCAL(ashlsi3_17):
+       shll16  r0
+       rts
+       shll    r0
+
+LOCAL(ashlsi3_30):
+       shll2   r0
+LOCAL(ashlsi3_28):
+       shll2   r0
+LOCAL(ashlsi3_26):
+       shll2   r0
+LOCAL(ashlsi3_24):
+       shll16  r0
+       rts
+       shll8   r0
+
+LOCAL(ashlsi3_31):
+       shll2   r0
+LOCAL(ashlsi3_29):
+       shll2   r0
+LOCAL(ashlsi3_27):
+       shll2   r0
+LOCAL(ashlsi3_25):
+       shll16  r0
+       shll8   r0
+       rts
+       shll    r0
+
+LOCAL(ashlsi3_0):
+       rts
+       nop
+
+       ENDFUNC(GLOBAL(ashlsi3))
+#endif
+
+#ifdef L_lshiftrt
+
+!
+! GLOBAL(lshrsi3)
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! (none)
+!
+       .global GLOBAL(lshrsi3)
+       HIDDEN_FUNC(GLOBAL(lshrsi3))
+       .align  2
+GLOBAL(lshrsi3):
+       mov     #31,r0
+       and     r0,r5
+       mova    LOCAL(lshrsi3_table),r0
+       mov.b   @(r0,r5),r5
+#ifdef __sh1__
+       add     r5,r0
+       jmp     @r0
+#else
+       braf    r5
+#endif
+       mov     r4,r0
+
+       .align  2
+LOCAL(lshrsi3_table):
+       .byte           LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
+       .byte           LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
+
+LOCAL(lshrsi3_6):
+       shlr2   r0
+LOCAL(lshrsi3_4):
+       shlr2   r0
+LOCAL(lshrsi3_2):
+       rts
+       shlr2   r0
+
+LOCAL(lshrsi3_7):
+       shlr2   r0
+LOCAL(lshrsi3_5):
+       shlr2   r0
+LOCAL(lshrsi3_3):
+       shlr2   r0
+LOCAL(lshrsi3_1):
+       rts
+       shlr    r0
+
+LOCAL(lshrsi3_14):
+       shlr2   r0
+LOCAL(lshrsi3_12):
+       shlr2   r0
+LOCAL(lshrsi3_10):
+       shlr2   r0
+LOCAL(lshrsi3_8):
+       rts
+       shlr8   r0
+
+LOCAL(lshrsi3_15):
+       shlr2   r0
+LOCAL(lshrsi3_13):
+       shlr2   r0
+LOCAL(lshrsi3_11):
+       shlr2   r0
+LOCAL(lshrsi3_9):
+       shlr8   r0
+       rts
+       shlr    r0
+
+LOCAL(lshrsi3_22):
+       shlr2   r0
+LOCAL(lshrsi3_20):
+       shlr2   r0
+LOCAL(lshrsi3_18):
+       shlr2   r0
+LOCAL(lshrsi3_16):
+       rts
+       shlr16  r0
+
+LOCAL(lshrsi3_23):
+       shlr2   r0
+LOCAL(lshrsi3_21):
+       shlr2   r0
+LOCAL(lshrsi3_19):
+       shlr2   r0
+LOCAL(lshrsi3_17):
+       shlr16  r0
+       rts
+       shlr    r0
+
+LOCAL(lshrsi3_30):
+       shlr2   r0
+LOCAL(lshrsi3_28):
+       shlr2   r0
+LOCAL(lshrsi3_26):
+       shlr2   r0
+LOCAL(lshrsi3_24):
+       shlr16  r0
+       rts
+       shlr8   r0
+
+LOCAL(lshrsi3_31):
+       shlr2   r0
+LOCAL(lshrsi3_29):
+       shlr2   r0
+LOCAL(lshrsi3_27):
+       shlr2   r0
+LOCAL(lshrsi3_25):
+       shlr16  r0
+       shlr8   r0
+       rts
+       shlr    r0
+
+LOCAL(lshrsi3_0):
+       rts
+       nop
+
+       ENDFUNC(GLOBAL(lshrsi3))
+#endif
+
+#ifdef L_movmem
+       .text
+       .balign 4
+       .global GLOBAL(movmem)
+       HIDDEN_FUNC(GLOBAL(movmem))
+       HIDDEN_ALIAS(movstr,movmem)
+       /* This would be a lot simpler if r6 contained the byte count
+          minus 64, and we wouldn't be called here for a byte count of 64.  */
+GLOBAL(movmem):
+       sts.l   pr,@-r15
+       shll2   r6
+       bsr     GLOBAL(movmemSI52+2)
+       mov.l   @(48,r5),r0
+       .balign 4
+LOCAL(movmem_loop): /* Reached with rts */
+       mov.l   @(60,r5),r0
+       add     #-64,r6
+       mov.l   r0,@(60,r4)
+       tst     r6,r6
+       mov.l   @(56,r5),r0
+       bt      LOCAL(movmem_done)
+       mov.l   r0,@(56,r4)
+       cmp/pl  r6
+       mov.l   @(52,r5),r0
+       add     #64,r5
+       mov.l   r0,@(52,r4)
+       add     #64,r4
+       bt      GLOBAL(movmemSI52)
+! done all the large groups, do the remainder
+! jump to movmem+
+       mova    GLOBAL(movmemSI4)+4,r0
+       add     r6,r0
+       jmp     @r0
+LOCAL(movmem_done): ! share slot insn, works out aligned.
+       lds.l   @r15+,pr
+       mov.l   r0,@(56,r4)
+       mov.l   @(52,r5),r0
+       rts
+       mov.l   r0,@(52,r4)
+       .balign 4
+! ??? We need aliases movstr* for movmem* for the older libraries.  These
+! aliases will be removed at the some point in the future.
+       .global GLOBAL(movmemSI64)
+       HIDDEN_FUNC(GLOBAL(movmemSI64))
+       HIDDEN_ALIAS(movstrSI64,movmemSI64)
+GLOBAL(movmemSI64):
+       mov.l   @(60,r5),r0
+       mov.l   r0,@(60,r4)
+       .global GLOBAL(movmemSI60)
+       HIDDEN_FUNC(GLOBAL(movmemSI60))
+       HIDDEN_ALIAS(movstrSI60,movmemSI60)
+GLOBAL(movmemSI60):
+       mov.l   @(56,r5),r0
+       mov.l   r0,@(56,r4)
+       .global GLOBAL(movmemSI56)
+       HIDDEN_FUNC(GLOBAL(movmemSI56))
+       HIDDEN_ALIAS(movstrSI56,movmemSI56)
+GLOBAL(movmemSI56):
+       mov.l   @(52,r5),r0
+       mov.l   r0,@(52,r4)
+       .global GLOBAL(movmemSI52)
+       HIDDEN_FUNC(GLOBAL(movmemSI52))
+       HIDDEN_ALIAS(movstrSI52,movmemSI52)
+GLOBAL(movmemSI52):
+       mov.l   @(48,r5),r0
+       mov.l   r0,@(48,r4)
+       .global GLOBAL(movmemSI48)
+       HIDDEN_FUNC(GLOBAL(movmemSI48))
+       HIDDEN_ALIAS(movstrSI48,movmemSI48)
+GLOBAL(movmemSI48):
+       mov.l   @(44,r5),r0
+       mov.l   r0,@(44,r4)
+       .global GLOBAL(movmemSI44)
+       HIDDEN_FUNC(GLOBAL(movmemSI44))
+       HIDDEN_ALIAS(movstrSI44,movmemSI44)
+GLOBAL(movmemSI44):
+       mov.l   @(40,r5),r0
+       mov.l   r0,@(40,r4)
+       .global GLOBAL(movmemSI40)
+       HIDDEN_FUNC(GLOBAL(movmemSI40))
+       HIDDEN_ALIAS(movstrSI40,movmemSI40)
+GLOBAL(movmemSI40):
+       mov.l   @(36,r5),r0
+       mov.l   r0,@(36,r4)
+       .global GLOBAL(movmemSI36)
+       HIDDEN_FUNC(GLOBAL(movmemSI36))
+       HIDDEN_ALIAS(movstrSI36,movmemSI36)
+GLOBAL(movmemSI36):
+       mov.l   @(32,r5),r0
+       mov.l   r0,@(32,r4)
+       .global GLOBAL(movmemSI32)
+       HIDDEN_FUNC(GLOBAL(movmemSI32))
+       HIDDEN_ALIAS(movstrSI32,movmemSI32)
+GLOBAL(movmemSI32):
+       mov.l   @(28,r5),r0
+       mov.l   r0,@(28,r4)
+       .global GLOBAL(movmemSI28)
+       HIDDEN_FUNC(GLOBAL(movmemSI28))
+       HIDDEN_ALIAS(movstrSI28,movmemSI28)
+GLOBAL(movmemSI28):
+       mov.l   @(24,r5),r0
+       mov.l   r0,@(24,r4)
+       .global GLOBAL(movmemSI24)
+       HIDDEN_FUNC(GLOBAL(movmemSI24))
+       HIDDEN_ALIAS(movstrSI24,movmemSI24)
+GLOBAL(movmemSI24):
+       mov.l   @(20,r5),r0
+       mov.l   r0,@(20,r4)
+       .global GLOBAL(movmemSI20)
+       HIDDEN_FUNC(GLOBAL(movmemSI20))
+       HIDDEN_ALIAS(movstrSI20,movmemSI20)
+GLOBAL(movmemSI20):
+       mov.l   @(16,r5),r0
+       mov.l   r0,@(16,r4)
+       .global GLOBAL(movmemSI16)
+       HIDDEN_FUNC(GLOBAL(movmemSI16))
+       HIDDEN_ALIAS(movstrSI16,movmemSI16)
+GLOBAL(movmemSI16):
+       mov.l   @(12,r5),r0
+       mov.l   r0,@(12,r4)
+       .global GLOBAL(movmemSI12)
+       HIDDEN_FUNC(GLOBAL(movmemSI12))
+       HIDDEN_ALIAS(movstrSI12,movmemSI12)
+GLOBAL(movmemSI12):
+       mov.l   @(8,r5),r0
+       mov.l   r0,@(8,r4)
+       .global GLOBAL(movmemSI8)
+       HIDDEN_FUNC(GLOBAL(movmemSI8))
+       HIDDEN_ALIAS(movstrSI8,movmemSI8)
+GLOBAL(movmemSI8):
+       mov.l   @(4,r5),r0
+       mov.l   r0,@(4,r4)
+       .global GLOBAL(movmemSI4)
+       HIDDEN_FUNC(GLOBAL(movmemSI4))
+       HIDDEN_ALIAS(movstrSI4,movmemSI4)
+GLOBAL(movmemSI4):
+       mov.l   @(0,r5),r0
+       rts
+       mov.l   r0,@(0,r4)
+
+       ENDFUNC(GLOBAL(movmemSI64))
+       ENDFUNC(GLOBAL(movmemSI60))
+       ENDFUNC(GLOBAL(movmemSI56))
+       ENDFUNC(GLOBAL(movmemSI52))
+       ENDFUNC(GLOBAL(movmemSI48))
+       ENDFUNC(GLOBAL(movmemSI44))
+       ENDFUNC(GLOBAL(movmemSI40))
+       ENDFUNC(GLOBAL(movmemSI36))
+       ENDFUNC(GLOBAL(movmemSI32))
+       ENDFUNC(GLOBAL(movmemSI28))
+       ENDFUNC(GLOBAL(movmemSI24))
+       ENDFUNC(GLOBAL(movmemSI20))
+       ENDFUNC(GLOBAL(movmemSI16))
+       ENDFUNC(GLOBAL(movmemSI12))
+       ENDFUNC(GLOBAL(movmemSI8))
+       ENDFUNC(GLOBAL(movmemSI4))
+       ENDFUNC(GLOBAL(movmem))
+#endif
+
+#ifdef L_movmem_i4
+       .text
+       .global GLOBAL(movmem_i4_even)
+       .global GLOBAL(movmem_i4_odd)
+       .global GLOBAL(movmemSI12_i4)
+
+       HIDDEN_FUNC(GLOBAL(movmem_i4_even))
+       HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
+       HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
+
+       HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
+       HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
+       HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
+
+       .p2align        5
+L_movmem_2mod4_end:
+       mov.l   r0,@(16,r4)
+       rts
+       mov.l   r1,@(20,r4)
+
+       .p2align        2
+
+GLOBAL(movmem_i4_even):
+       mov.l   @r5+,r0
+       bra     L_movmem_start_even
+       mov.l   @r5+,r1
+
+GLOBAL(movmem_i4_odd):
+       mov.l   @r5+,r1
+       add     #-4,r4
+       mov.l   @r5+,r2
+       mov.l   @r5+,r3
+       mov.l   r1,@(4,r4)
+       mov.l   r2,@(8,r4)
+
+L_movmem_loop:
+       mov.l   r3,@(12,r4)
+       dt      r6
+       mov.l   @r5+,r0
+       bt/s    L_movmem_2mod4_end
+       mov.l   @r5+,r1
+       add     #16,r4
+L_movmem_start_even:
+       mov.l   @r5+,r2
+       mov.l   @r5+,r3
+       mov.l   r0,@r4
+       dt      r6
+       mov.l   r1,@(4,r4)
+       bf/s    L_movmem_loop
+       mov.l   r2,@(8,r4)
+       rts
+       mov.l   r3,@(12,r4)
+
+       ENDFUNC(GLOBAL(movmem_i4_even))
+       ENDFUNC(GLOBAL(movmem_i4_odd))
+
+       .p2align        4
+GLOBAL(movmemSI12_i4):
+       mov.l   @r5,r0
+       mov.l   @(4,r5),r1
+       mov.l   @(8,r5),r2
+       mov.l   r0,@r4
+       mov.l   r1,@(4,r4)
+       rts
+       mov.l   r2,@(8,r4)
+
+       ENDFUNC(GLOBAL(movmemSI12_i4))
+#endif
+
+#ifdef L_mulsi3
+
+
+       .global GLOBAL(mulsi3)
+       HIDDEN_FUNC(GLOBAL(mulsi3))
+
+! r4 =       aabb
+! r5 =       ccdd
+! r0 = aabb*ccdd  via partial products
+!
+! if aa == 0 and cc = 0
+! r0 = bb*dd
+!
+! else
+! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
+!
+
+GLOBAL(mulsi3):
+       mulu.w  r4,r5           ! multiply the lsws  macl=bb*dd
+       mov     r5,r3           ! r3 = ccdd
+       swap.w  r4,r2           ! r2 = bbaa
+       xtrct   r2,r3           ! r3 = aacc
+       tst     r3,r3           ! msws zero ?
+       bf      hiset
+       rts                     ! yes - then we have the answer
+       sts     macl,r0
+
+hiset: sts     macl,r0         ! r0 = bb*dd
+       mulu.w  r2,r5           ! brewing macl = aa*dd
+       sts     macl,r1
+       mulu.w  r3,r4           ! brewing macl = cc*bb
+       sts     macl,r2
+       add     r1,r2
+       shll16  r2
+       rts
+       add     r2,r0
+
+       ENDFUNC(GLOBAL(mulsi3))
+#endif
+#endif /* ! __SH5__ */
+#ifdef L_sdivsi3_i4
+       .title "SH DIVIDE"
+!! 4 byte integer Divide code for the Renesas SH
+#ifdef __SH4__
+!! args in r4 and r5, result in fpul, clobber dr0, dr2
+
+       .global GLOBAL(sdivsi3_i4)
+       HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
+GLOBAL(sdivsi3_i4):
+       lds r4,fpul
+       float fpul,dr0
+       lds r5,fpul
+       float fpul,dr2
+       fdiv dr2,dr0
+       rts
+       ftrc dr0,fpul
+
+       ENDFUNC(GLOBAL(sdivsi3_i4))
+#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
+!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
+
+#if ! __SH5__ || __SH5__ == 32
+#if __SH5__
+       .mode   SHcompact
+#endif
+       .global GLOBAL(sdivsi3_i4)
+       HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
+GLOBAL(sdivsi3_i4):
+       sts.l fpscr,@-r15
+       mov #8,r2
+       swap.w r2,r2
+       lds r2,fpscr
+       lds r4,fpul
+       float fpul,dr0
+       lds r5,fpul
+       float fpul,dr2
+       fdiv dr2,dr0
+       ftrc dr0,fpul
+       rts
+       lds.l @r15+,fpscr
+
+       ENDFUNC(GLOBAL(sdivsi3_i4))
+#endif /* ! __SH5__ || __SH5__ == 32 */
+#endif /* ! __SH4__ */
+#endif
+
+#ifdef L_sdivsi3
+/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
+   sh2e/sh3e code.  */
+#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
+!!
+!! Steve Chamberlain
+!! sac@cygnus.com
+!!
+!!
+
+!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
+
+       .global GLOBAL(sdivsi3)
+#if __SHMEDIA__
+#if __SH5__ == 32
+       .section        .text..SHmedia32,"ax"
+#else
+       .text
+#endif
+       .align  2
+#if 0
+/* The assembly code that follows is a hand-optimized version of the C
+   code that follows.  Note that the registers that are modified are
+   exactly those listed as clobbered in the patterns divsi3_i1 and
+   divsi3_i1_media.
+       
+int __sdivsi3 (i, j)
+     int i, j;
+{
+  register unsigned long long r18 asm ("r18");
+  register unsigned long long r19 asm ("r19");
+  register unsigned long long r0 asm ("r0") = 0;
+  register unsigned long long r1 asm ("r1") = 1;
+  register int r2 asm ("r2") = i >> 31;
+  register int r3 asm ("r3") = j >> 31;
+
+  r2 = r2 ? r2 : r1;
+  r3 = r3 ? r3 : r1;
+  r18 = i * r2;
+  r19 = j * r3;
+  r2 *= r3;
+  
+  r19 <<= 31;
+  r1 <<= 31;
+  do
+    if (r18 >= r19)
+      r0 |= r1, r18 -= r19;
+  while (r19 >>= 1, r1 >>= 1);
+
+  return r2 * (int)r0;
+}
+*/
+GLOBAL(sdivsi3):
+       pt/l    LOCAL(sdivsi3_dontadd), tr2
+       pt/l    LOCAL(sdivsi3_loop), tr1
+       ptabs/l r18, tr0
+       movi    0, r0
+       movi    1, r1
+       shari.l r4, 31, r2
+       shari.l r5, 31, r3
+       cmveq   r2, r1, r2
+       cmveq   r3, r1, r3
+       muls.l  r4, r2, r18
+       muls.l  r5, r3, r19
+       muls.l  r2, r3, r2
+       shlli   r19, 31, r19
+       shlli   r1, 31, r1
+LOCAL(sdivsi3_loop):
+       bgtu    r19, r18, tr2
+       or      r0, r1, r0
+       sub     r18, r19, r18
+LOCAL(sdivsi3_dontadd):
+       shlri   r1, 1, r1
+       shlri   r19, 1, r19
+       bnei    r1, 0, tr1
+       muls.l  r0, r2, r0
+       add.l   r0, r63, r0
+       blink   tr0, r63
+#elif 0 /* ! 0 */
+ // inputs: r4,r5
+ // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
+ // result in r0
+GLOBAL(sdivsi3):
+ // can create absolute value without extra latency,
+ // but dependent on proper sign extension of inputs:
+ // shari.l r5,31,r2
+ // xor r5,r2,r20
+ // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
+ shari.l r5,31,r2
+ ori r2,1,r2
+ muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
+ movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
+ shari.l r4,31,r3
+ nsb r20,r0
+ shlld r20,r0,r25
+ shlri r25,48,r25
+ sub r19,r25,r1
+ mmulfx.w r1,r1,r2
+ mshflo.w r1,r63,r1
+ // If r4 was to be used in-place instead of r21, could use this sequence
+ // to compute absolute:
+ // sub r63,r4,r19 // compute absolute value of r4
+ // shlri r4,32,r3 // into lower 32 bit of r4, keeping
+ // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
+ ori r3,1,r3
+ mmulfx.w r25,r2,r2
+ sub r19,r0,r0
+ muls.l r4,r3,r21
+ msub.w r1,r2,r2
+ addi r2,-2,r1
+ mulu.l r21,r1,r19
+ mmulfx.w r2,r2,r2
+ shlli r1,15,r1
+ shlrd r19,r0,r19
+ mulu.l r19,r20,r3
+ mmacnfx.wl r25,r2,r1
+ ptabs r18,tr0
+ sub r21,r3,r25
+
+ mulu.l r25,r1,r2
+ addi r0,14,r0
+ xor r4,r5,r18
+ shlrd r2,r0,r2
+ mulu.l r2,r20,r3
+ add r19,r2,r19
+ shari.l r18,31,r18
+ sub r25,r3,r25
+
+ mulu.l r25,r1,r2
+ sub r25,r20,r25
+ add r19,r18,r19
+ shlrd r2,r0,r2
+ mulu.l r2,r20,r3
+ addi r25,1,r25
+ add r19,r2,r19
+
+ cmpgt r25,r3,r25
+ add.l r19,r25,r0
+ xor r0,r18,r0
+ blink tr0,r63
+#else /* ! 0 && ! 0 */
+
+ // inputs: r4,r5
+ // clobbered: r1,r18,r19,r20,r21,r25,tr0
+ // result in r0
+       HIDDEN_FUNC(GLOBAL(sdivsi3_2))
+#ifndef __pic__
+       FUNC(GLOBAL(sdivsi3))
+GLOBAL(sdivsi3): /* this is the shcompact entry point */
+ // The special SHmedia entry point sdivsi3_1 prevents accidental linking
+ // with the SHcompact implementation, which clobbers tr1 / tr2.
+ .global GLOBAL(sdivsi3_1)
+GLOBAL(sdivsi3_1):
+ .global GLOBAL(div_table_internal)
+ movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
+ shori GLOBAL(div_table_internal) & 65535, r20
+#endif
+ .global GLOBAL(sdivsi3_2)
+ // div_table in r20
+ // clobbered: r1,r18,r19,r21,r25,tr0
+GLOBAL(sdivsi3_2):
+ nsb r5, r1
+ shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
+ shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
+ ldx.ub r20, r21, r19 // u0.8
+ shari r25, 32, r25   // normalize to s2.30
+ shlli r21, 1, r21
+ muls.l r25, r19, r19 // s2.38
+ ldx.w r20, r21, r21  // s2.14
+  ptabs r18, tr0
+ shari r19, 24, r19   // truncate to s2.14
+ sub r21, r19, r19    // some 11 bit inverse in s1.14
+ muls.l r19, r19, r21 // u0.28
+  sub r63, r1, r1
+  addi r1, 92, r1
+ muls.l r25, r21, r18 // s2.58
+ shlli r19, 45, r19   // multiply by two and convert to s2.58
+  /* bubble */
+ sub r19, r18, r18
+ shari r18, 28, r18   // some 22 bit inverse in s1.30
+ muls.l r18, r25, r0  // s2.60
+  muls.l r18, r4, r25 // s32.30
+  /* bubble */
+ shari r0, 16, r19   // s-16.44
+ muls.l r19, r18, r19 // s-16.74
+  shari r25, 63, r0
+  shari r4, 14, r18   // s19.-14
+ shari r19, 30, r19   // s-16.44
+ muls.l r19, r18, r19 // s15.30
+  xor r21, r0, r21    // You could also use the constant 1 << 27.
+  add r21, r25, r21
+ sub r21, r19, r21
+ shard r21, r1, r21
+ sub r21, r0, r0
+ blink tr0, r63
+#ifndef __pic__
+       ENDFUNC(GLOBAL(sdivsi3))
+#endif
+       ENDFUNC(GLOBAL(sdivsi3_2))
+#endif
+#elif defined __SHMEDIA__
+/* m5compact-nofpu */
+ // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
+       .mode   SHmedia
+       .section        .text..SHmedia32,"ax"
+       .align  2
+       FUNC(GLOBAL(sdivsi3))
+GLOBAL(sdivsi3):
+       pt/l LOCAL(sdivsi3_dontsub), tr0
+       pt/l LOCAL(sdivsi3_loop), tr1
+       ptabs/l r18,tr2
+       shari.l r4,31,r18
+       shari.l r5,31,r19
+       xor r4,r18,r20
+       xor r5,r19,r21
+       sub.l r20,r18,r20
+       sub.l r21,r19,r21
+       xor r18,r19,r19
+       shlli r21,32,r25
+       addi r25,-1,r21
+       addz.l r20,r63,r20
+LOCAL(sdivsi3_loop):
+       shlli r20,1,r20
+       bgeu/u r21,r20,tr0
+       sub r20,r21,r20
+LOCAL(sdivsi3_dontsub):
+       addi.l r25,-1,r25
+       bnei r25,-32,tr1
+       xor r20,r19,r20
+       sub.l r20,r19,r0
+       blink tr2,r63
+       ENDFUNC(GLOBAL(sdivsi3))
+#else /* ! __SHMEDIA__ */
+       FUNC(GLOBAL(sdivsi3))
+GLOBAL(sdivsi3):
+       mov     r4,r1
+       mov     r5,r0
+
+       tst     r0,r0
+       bt      div0
+       mov     #0,r2
+       div0s   r2,r1
+       subc    r3,r3
+       subc    r2,r1
+       div0s   r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       div1    r0,r3
+       rotcl   r1
+       addc    r2,r1
+       rts
+       mov     r1,r0
+
+
+div0:  rts
+       mov     #0,r0
+
+       ENDFUNC(GLOBAL(sdivsi3))
+#endif /* ! __SHMEDIA__ */
+#endif /* ! __SH4__ */
+#endif
+#ifdef L_udivsi3_i4
+
+       .title "SH DIVIDE"
+!! 4 byte integer Divide code for the Renesas SH
+#ifdef __SH4__
+!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
+!! and t bit
+
+       .global GLOBAL(udivsi3_i4)
+       HIDDEN_FUNC(GLOBAL(udivsi3_i4))
+GLOBAL(udivsi3_i4):
+       mov #1,r1
+       cmp/hi r1,r5
+       bf trivial
+       rotr r1
+       xor r1,r4
+       lds r4,fpul
+       mova L1,r0
+#ifdef FMOVD_WORKS
+       fmov.d @r0+,dr4
+#else
+       fmov.s @r0+,DR40
+       fmov.s @r0,DR41
+#endif
+       float fpul,dr0
+       xor r1,r5
+       lds r5,fpul
+       float fpul,dr2
+       fadd dr4,dr0
+       fadd dr4,dr2
+       fdiv dr2,dr0
+       rts
+       ftrc dr0,fpul
+
+trivial:
+       rts
+       lds r4,fpul
+
+       .align 2
+#ifdef FMOVD_WORKS
+       .align 3        ! make double below 8 byte aligned.
+#endif
+L1:
+       .double 2147483648
+
+       ENDFUNC(GLOBAL(udivsi3_i4))
+#elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
+#if ! __SH5__ || __SH5__ == 32
+!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
+       .mode   SHmedia
+       .global GLOBAL(udivsi3_i4)
+       HIDDEN_FUNC(GLOBAL(udivsi3_i4))
+GLOBAL(udivsi3_i4):
+       addz.l  r4,r63,r20
+       addz.l  r5,r63,r21
+       fmov.qd r20,dr0
+       fmov.qd r21,dr32
+       ptabs   r18,tr0
+       float.qd dr0,dr0
+       float.qd dr32,dr32
+       fdiv.d  dr0,dr32,dr0
+       ftrc.dq dr0,dr32
+       fmov.s fr33,fr32
+       blink tr0,r63
+
+       ENDFUNC(GLOBAL(udivsi3_i4))
+#endif /* ! __SH5__ || __SH5__ == 32 */
+#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
+!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
+
+       .global GLOBAL(udivsi3_i4)
+       HIDDEN_FUNC(GLOBAL(udivsi3_i4))
+GLOBAL(udivsi3_i4):
+       mov #1,r1
+       cmp/hi r1,r5
+       bf trivial
+       sts.l fpscr,@-r15
+       mova L1,r0
+       lds.l @r0+,fpscr
+       rotr r1
+       xor r1,r4
+       lds r4,fpul
+#ifdef FMOVD_WORKS
+       fmov.d @r0+,dr4
+#else
+       fmov.s @r0+,DR40
+       fmov.s @r0,DR41
+#endif
+       float fpul,dr0
+       xor r1,r5
+       lds r5,fpul
+       float fpul,dr2
+       fadd dr4,dr0
+       fadd dr4,dr2
+       fdiv dr2,dr0
+       ftrc dr0,fpul
+       rts
+       lds.l @r15+,fpscr
+
+#ifdef FMOVD_WORKS
+       .align 3        ! make double below 8 byte aligned.
+#endif
+trivial:
+       rts
+       lds r4,fpul
+
+       .align 2
+L1:
+#ifndef FMOVD_WORKS
+       .long 0x80000
+#else
+       .long 0x180000
+#endif
+       .double 2147483648
+
+       ENDFUNC(GLOBAL(udivsi3_i4))
+#endif /* ! __SH4__ */
+#endif
+
+#ifdef L_udivsi3
+/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
+   sh2e/sh3e code.  */
+#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
+
+!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
+       .global GLOBAL(udivsi3)
+       HIDDEN_FUNC(GLOBAL(udivsi3))
+
+#if __SHMEDIA__
+#if __SH5__ == 32
+       .section        .text..SHmedia32,"ax"
+#else
+       .text
+#endif
+       .align  2
+#if 0
+/* The assembly code that follows is a hand-optimized version of the C
+   code that follows.  Note that the registers that are modified are
+   exactly those listed as clobbered in the patterns udivsi3_i1 and
+   udivsi3_i1_media.
+       
+unsigned 
+__udivsi3 (i, j)
+    unsigned i, j; 
+{
+  register unsigned long long r0 asm ("r0") = 0;
+  register unsigned long long r18 asm ("r18") = 1;
+  register unsigned long long r4 asm ("r4") = i;
+  register unsigned long long r19 asm ("r19") = j;
+
+  r19 <<= 31;
+  r18 <<= 31;
+  do
+    if (r4 >= r19)
+      r0 |= r18, r4 -= r19;
+  while (r19 >>= 1, r18 >>= 1);
+
+  return r0;
+}
+*/
+GLOBAL(udivsi3):
+       pt/l    LOCAL(udivsi3_dontadd), tr2
+       pt/l    LOCAL(udivsi3_loop), tr1
+       ptabs/l r18, tr0
+       movi    0, r0
+       movi    1, r18
+       addz.l  r5, r63, r19
+       addz.l  r4, r63, r4
+       shlli   r19, 31, r19
+       shlli   r18, 31, r18
+LOCAL(udivsi3_loop):
+       bgtu    r19, r4, tr2
+       or      r0, r18, r0
+       sub     r4, r19, r4
+LOCAL(udivsi3_dontadd):
+       shlri   r18, 1, r18
+       shlri   r19, 1, r19
+       bnei    r18, 0, tr1
+       blink   tr0, r63
+#else
+GLOBAL(udivsi3):
+ // inputs: r4,r5
+ // clobbered: r18,r19,r20,r21,r22,r25,tr0
+ // result in r0.
+ addz.l r5,r63,r22
+ nsb r22,r0
+ shlld r22,r0,r25
+ shlri r25,48,r25
+ movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
+ sub r20,r25,r21
+ mmulfx.w r21,r21,r19
+ mshflo.w r21,r63,r21
+ ptabs r18,tr0
+ mmulfx.w r25,r19,r19
+ sub r20,r0,r0
+ /* bubble */
+ msub.w r21,r19,r19
+ addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
+                   before the msub.w, but we need a different value for
+                   r19 to keep errors under control.  */
+ mulu.l r4,r21,r18
+ mmulfx.w r19,r19,r19
+ shlli r21,15,r21
+ shlrd r18,r0,r18
+ mulu.l r18,r22,r20
+ mmacnfx.wl r25,r19,r21
+ /* bubble */
+ sub r4,r20,r25
+
+ mulu.l r25,r21,r19
+ addi r0,14,r0
+ /* bubble */
+ shlrd r19,r0,r19
+ mulu.l r19,r22,r20
+ add r18,r19,r18
+ /* bubble */
+ sub.l r25,r20,r25
+
+ mulu.l r25,r21,r19
+ addz.l r25,r63,r25
+ sub r25,r22,r25
+ shlrd r19,r0,r19
+ mulu.l r19,r22,r20
+ addi r25,1,r25
+ add r18,r19,r18
+
+ cmpgt r25,r20,r25
+ add.l r18,r25,r0
+ blink tr0,r63
+#endif
+#elif defined (__SHMEDIA__)
+/* m5compact-nofpu - more emphasis on code size than on speed, but don't
+   ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
+   So use a short shmedia loop.  */
+ // clobbered: r20,r21,r25,tr0,tr1,tr2
+       .mode   SHmedia
+       .section        .text..SHmedia32,"ax"
+       .align  2
+GLOBAL(udivsi3):
+ pt/l LOCAL(udivsi3_dontsub), tr0
+ pt/l LOCAL(udivsi3_loop), tr1
+ ptabs/l r18,tr2
+ shlli r5,32,r25
+ addi r25,-1,r21
+ addz.l r4,r63,r20
+LOCAL(udivsi3_loop):
+ shlli r20,1,r20
+ bgeu/u r21,r20,tr0
+ sub r20,r21,r20
+LOCAL(udivsi3_dontsub):
+ addi.l r25,-1,r25
+ bnei r25,-32,tr1
+ add.l r20,r63,r0
+ blink tr2,r63
+#else /* ! defined (__SHMEDIA__) */
+LOCAL(div8):
+ div1 r5,r4
+LOCAL(div7):
+ div1 r5,r4; div1 r5,r4; div1 r5,r4
+ div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
+
+LOCAL(divx4):
+ div1 r5,r4; rotcl r0
+ div1 r5,r4; rotcl r0
+ div1 r5,r4; rotcl r0
+ rts; div1 r5,r4
+
+GLOBAL(udivsi3):
+ sts.l pr,@-r15
+ extu.w r5,r0
+ cmp/eq r5,r0
+#ifdef __sh1__
+ bf LOCAL(large_divisor)
+#else
+ bf/s LOCAL(large_divisor)
+#endif
+ div0u
+ swap.w r4,r0
+ shlr16 r4
+ bsr LOCAL(div8)
+ shll16 r5
+ bsr LOCAL(div7)
+ div1 r5,r4
+ xtrct r4,r0
+ xtrct r0,r4
+ bsr LOCAL(div8)
+ swap.w r4,r4
+ bsr LOCAL(div7)
+ div1 r5,r4
+ lds.l @r15+,pr
+ xtrct r4,r0
+ swap.w r0,r0
+ rotcl r0
+ rts
+ shlr16 r5
+
+LOCAL(large_divisor):
+#ifdef __sh1__
+ div0u
+#endif
+ mov #0,r0
+ xtrct r4,r0
+ xtrct r0,r4
+ bsr LOCAL(divx4)
+ rotcl r0
+ bsr LOCAL(divx4)
+ rotcl r0
+ bsr LOCAL(divx4)
+ rotcl r0
+ bsr LOCAL(divx4)
+ rotcl r0
+ lds.l @r15+,pr
+ rts
+ rotcl r0
+
+       ENDFUNC(GLOBAL(udivsi3))
+#endif /* ! __SHMEDIA__ */
+#endif /* __SH4__ */
+#endif /* L_udivsi3 */
+
+#ifdef L_udivdi3
+#ifdef __SHMEDIA__
+       .mode   SHmedia
+       .section        .text..SHmedia32,"ax"
+       .align  2
+       .global GLOBAL(udivdi3)
+       FUNC(GLOBAL(udivdi3))
+GLOBAL(udivdi3):
+       HIDDEN_ALIAS(udivdi3_internal,udivdi3)
+       shlri r3,1,r4
+       nsb r4,r22
+       shlld r3,r22,r6
+       shlri r6,49,r5
+       movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
+       sub r21,r5,r1
+       mmulfx.w r1,r1,r4
+       mshflo.w r1,r63,r1
+       sub r63,r22,r20 // r63 == 64 % 64
+       mmulfx.w r5,r4,r4
+       pta LOCAL(large_divisor),tr0
+       addi r20,32,r9
+       msub.w r1,r4,r1
+       madd.w r1,r1,r1
+       mmulfx.w r1,r1,r4
+       shlri r6,32,r7
+       bgt/u r9,r63,tr0 // large_divisor
+       mmulfx.w r5,r4,r4
+       shlri r2,32+14,r19
+       addi r22,-31,r0
+       msub.w r1,r4,r1
+
+       mulu.l r1,r7,r4
+       addi r1,-3,r5
+       mulu.l r5,r19,r5
+       sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+       shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+                        the case may be, %0000000000000000 000.11111111111, still */
+       muls.l r1,r4,r4 /* leaving at least one sign bit.  */
+       mulu.l r5,r3,r8
+       mshalds.l r1,r21,r1
+       shari r4,26,r4
+       shlld r8,r0,r8
+       add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+       sub r2,r8,r2
+       /* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
+
+       shlri r2,22,r21
+       mulu.l r21,r1,r21
+       shlld r5,r0,r8
+       addi r20,30-22,r0
+       shlrd r21,r0,r21
+       mulu.l r21,r3,r5
+       add r8,r21,r8
+       mcmpgt.l r21,r63,r21 // See Note 1
+       addi r20,30,r0
+       mshfhi.l r63,r21,r21
+       sub r2,r5,r2
+       andc r2,r21,r2
+
+       /* small divisor: need a third divide step */
+       mulu.l r2,r1,r7
+       ptabs r18,tr0
+       addi r2,1,r2
+       shlrd r7,r0,r7
+       mulu.l r7,r3,r5
+       add r8,r7,r8
+       sub r2,r3,r2
+       cmpgt r2,r5,r5
+       add r8,r5,r2
+       /* could test r3 here to check for divide by zero.  */
+       blink tr0,r63
+
+LOCAL(large_divisor):
+       mmulfx.w r5,r4,r4
+       shlrd r2,r9,r25
+       shlri r25,32,r8
+       msub.w r1,r4,r1
+
+       mulu.l r1,r7,r4
+       addi r1,-3,r5
+       mulu.l r5,r8,r5
+       sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+       shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+                        the case may be, %0000000000000000 000.11111111111, still */
+       muls.l r1,r4,r4 /* leaving at least one sign bit.  */
+       shlri r5,14-1,r8
+       mulu.l r8,r7,r5
+       mshalds.l r1,r21,r1
+       shari r4,26,r4
+       add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+       sub r25,r5,r25
+       /* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
+
+       shlri r25,22,r21
+       mulu.l r21,r1,r21
+       pta LOCAL(no_lo_adj),tr0
+       addi r22,32,r0
+       shlri r21,40,r21
+       mulu.l r21,r7,r5
+       add r8,r21,r8
+       shlld r2,r0,r2
+       sub r25,r5,r25
+       bgtu/u r7,r25,tr0 // no_lo_adj
+       addi r8,1,r8
+       sub r25,r7,r25
+LOCAL(no_lo_adj):
+       mextr4 r2,r25,r2
+
+       /* large_divisor: only needs a few adjustments.  */
+       mulu.l r8,r6,r5
+       ptabs r18,tr0
+       /* bubble */
+       cmpgtu r5,r2,r5
+       sub r8,r5,r2
+       blink tr0,r63
+       ENDFUNC(GLOBAL(udivdi3))
+/* Note 1: To shift the result of the second divide stage so that the result
+   always fits into 32 bits, yet we still reduce the rest sufficiently
+   would require a lot of instructions to do the shifts just right.  Using
+   the full 64 bit shift result to multiply with the divisor would require
+   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
+   Fortunately, if the upper 32 bits of the shift result are nonzero, we
+   know that the rest after taking this partial result into account will
+   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
+   upper 32 bits of the partial result are nonzero.  */
+#endif /* __SHMEDIA__ */
+#endif /* L_udivdi3 */
+
+#ifdef L_divdi3
+#ifdef __SHMEDIA__
+       .mode   SHmedia
+       .section        .text..SHmedia32,"ax"
+       .align  2
+       .global GLOBAL(divdi3)
+       FUNC(GLOBAL(divdi3))
+GLOBAL(divdi3):
+       pta GLOBAL(udivdi3_internal),tr0
+       shari r2,63,r22
+       shari r3,63,r23
+       xor r2,r22,r2
+       xor r3,r23,r3
+       sub r2,r22,r2
+       sub r3,r23,r3
+       beq/u r22,r23,tr0
+       ptabs r18,tr1
+       blink tr0,r18
+       sub r63,r2,r2
+       blink tr1,r63
+       ENDFUNC(GLOBAL(divdi3))
+#endif /* __SHMEDIA__ */
+#endif /* L_divdi3 */
+
+#ifdef L_umoddi3
+#ifdef __SHMEDIA__
+       .mode   SHmedia
+       .section        .text..SHmedia32,"ax"
+       .align  2
+       .global GLOBAL(umoddi3)
+       FUNC(GLOBAL(umoddi3))
+GLOBAL(umoddi3):
+       HIDDEN_ALIAS(umoddi3_internal,umoddi3)
+       shlri r3,1,r4
+       nsb r4,r22
+       shlld r3,r22,r6
+       shlri r6,49,r5
+       movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
+       sub r21,r5,r1
+       mmulfx.w r1,r1,r4
+       mshflo.w r1,r63,r1
+       sub r63,r22,r20 // r63 == 64 % 64
+       mmulfx.w r5,r4,r4
+       pta LOCAL(large_divisor),tr0
+       addi r20,32,r9
+       msub.w r1,r4,r1
+       madd.w r1,r1,r1
+       mmulfx.w r1,r1,r4
+       shlri r6,32,r7
+       bgt/u r9,r63,tr0 // large_divisor
+       mmulfx.w r5,r4,r4
+       shlri r2,32+14,r19
+       addi r22,-31,r0
+       msub.w r1,r4,r1
+
+       mulu.l r1,r7,r4
+       addi r1,-3,r5
+       mulu.l r5,r19,r5
+       sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+       shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+                        the case may be, %0000000000000000 000.11111111111, still */
+       muls.l r1,r4,r4 /* leaving at least one sign bit.  */
+       mulu.l r5,r3,r5
+       mshalds.l r1,r21,r1
+       shari r4,26,r4
+       shlld r5,r0,r5
+       add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+       sub r2,r5,r2
+       /* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
+
+       shlri r2,22,r21
+       mulu.l r21,r1,r21
+       addi r20,30-22,r0
+       /* bubble */ /* could test r3 here to check for divide by zero.  */
+       shlrd r21,r0,r21
+       mulu.l r21,r3,r5
+       mcmpgt.l r21,r63,r21 // See Note 1
+       addi r20,30,r0
+       mshfhi.l r63,r21,r21
+       sub r2,r5,r2
+       andc r2,r21,r2
+
+       /* small divisor: need a third divide step */
+       mulu.l r2,r1,r7
+       ptabs r18,tr0
+       sub r2,r3,r8 /* re-use r8 here for rest - r3 */
+       shlrd r7,r0,r7
+       mulu.l r7,r3,r5
+       /* bubble */
+       addi r8,1,r7
+       cmpgt r7,r5,r7
+       cmvne r7,r8,r2
+       sub r2,r5,r2
+       blink tr0,r63
+
+LOCAL(large_divisor):
+       mmulfx.w r5,r4,r4
+       shlrd r2,r9,r25
+       shlri r25,32,r8
+       msub.w r1,r4,r1
+
+       mulu.l r1,r7,r4
+       addi r1,-3,r5
+       mulu.l r5,r8,r5
+       sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+       shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+                        the case may be, %0000000000000000 000.11111111111, still */
+       muls.l r1,r4,r4 /* leaving at least one sign bit.  */
+       shlri r5,14-1,r8
+       mulu.l r8,r7,r5
+       mshalds.l r1,r21,r1
+       shari r4,26,r4
+       add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+       sub r25,r5,r25
+       /* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
+
+       shlri r25,22,r21
+       mulu.l r21,r1,r21
+       pta LOCAL(no_lo_adj),tr0
+       addi r22,32,r0
+       shlri r21,40,r21
+       mulu.l r21,r7,r5
+       add r8,r21,r8
+       shlld r2,r0,r2
+       sub r25,r5,r25
+       bgtu/u r7,r25,tr0 // no_lo_adj
+       addi r8,1,r8
+       sub r25,r7,r25
+LOCAL(no_lo_adj):
+       mextr4 r2,r25,r2
+
+       /* large_divisor: only needs a few adjustments.  */
+       mulu.l r8,r6,r5
+       ptabs r18,tr0
+       add r2,r6,r7
+       cmpgtu r5,r2,r8
+       cmvne r8,r7,r2
+       sub r2,r5,r2
+       shlrd r2,r22,r2
+       blink tr0,r63
+       ENDFUNC(GLOBAL(umoddi3))
+/* Note 1: To shift the result of the second divide stage so that the result
+   always fits into 32 bits, yet we still reduce the rest sufficiently
+   would require a lot of instructions to do the shifts just right.  Using
+   the full 64 bit shift result to multiply with the divisor would require
+   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
+   Fortunately, if the upper 32 bits of the shift result are nonzero, we
+   know that the rest after taking this partial result into account will
+   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
+   upper 32 bits of the partial result are nonzero.  */
+#endif /* __SHMEDIA__ */
+#endif /* L_umoddi3 */
+
+#ifdef L_moddi3
+#ifdef __SHMEDIA__
+       .mode   SHmedia
+       .section        .text..SHmedia32,"ax"
+       .align  2
+       .global GLOBAL(moddi3)
+       FUNC(GLOBAL(moddi3))
+GLOBAL(moddi3):
+       pta GLOBAL(umoddi3_internal),tr0
+       shari r2,63,r22
+       shari r3,63,r23
+       xor r2,r22,r2
+       xor r3,r23,r3
+       sub r2,r22,r2
+       sub r3,r23,r3
+       beq/u r22,r63,tr0
+       ptabs r18,tr1
+       blink tr0,r18
+       sub r63,r2,r2
+       blink tr1,r63
+       ENDFUNC(GLOBAL(moddi3))
+#endif /* __SHMEDIA__ */
+#endif /* L_moddi3 */
+
+#ifdef L_set_fpscr
+#if !defined (__SH2A_NOFPU__)
+#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
+#ifdef __SH5__
+       .mode   SHcompact
+#endif
+       .global GLOBAL(set_fpscr)
+       HIDDEN_FUNC(GLOBAL(set_fpscr))
+GLOBAL(set_fpscr):
+       lds r4,fpscr
+#ifdef __PIC__
+       mov.l   r12,@-r15
+#ifdef __vxworks
+       mov.l   LOCAL(set_fpscr_L0_base),r12
+       mov.l   LOCAL(set_fpscr_L0_index),r0
+       mov.l   @r12,r12
+       mov.l   @(r0,r12),r12
+#else
+       mova    LOCAL(set_fpscr_L0),r0
+       mov.l   LOCAL(set_fpscr_L0),r12
+       add     r0,r12
+#endif
+       mov.l   LOCAL(set_fpscr_L1),r0
+       mov.l   @(r0,r12),r1
+       mov.l   @r15+,r12
+#else
+       mov.l LOCAL(set_fpscr_L1),r1
+#endif
+       swap.w r4,r0
+       or #24,r0
+#ifndef FMOVD_WORKS
+       xor #16,r0
+#endif
+#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
+       swap.w r0,r3
+       mov.l r3,@(4,r1)
+#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
+       swap.w r0,r2
+       mov.l r2,@r1
+#endif
+#ifndef FMOVD_WORKS
+       xor #8,r0
+#else
+       xor #24,r0
+#endif
+#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
+       swap.w r0,r2
+       rts
+       mov.l r2,@r1
+#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
+       swap.w r0,r3
+       rts
+       mov.l r3,@(4,r1)
+#endif
+       .align 2
+#ifdef __PIC__
+#ifdef __vxworks
+LOCAL(set_fpscr_L0_base):
+       .long ___GOTT_BASE__
+LOCAL(set_fpscr_L0_index):
+       .long ___GOTT_INDEX__
+#else
+LOCAL(set_fpscr_L0):
+       .long _GLOBAL_OFFSET_TABLE_
+#endif
+LOCAL(set_fpscr_L1):
+       .long GLOBAL(fpscr_values@GOT)
+#else
+LOCAL(set_fpscr_L1):
+       .long GLOBAL(fpscr_values)
+#endif
+
+       ENDFUNC(GLOBAL(set_fpscr))
+#ifndef NO_FPSCR_VALUES
+#ifdef __ELF__
+        .comm   GLOBAL(fpscr_values),8,4
+#else
+        .comm   GLOBAL(fpscr_values),8
+#endif /* ELF */
+#endif /* NO_FPSCR_VALUES */
+#endif /* SH2E / SH3E / SH4 */
+#endif /* __SH2A_NOFPU__ */
+#endif /* L_set_fpscr */
+#ifdef L_ic_invalidate
+#if __SH5__ == 32
+       .mode   SHmedia
+       .section        .text..SHmedia32,"ax"
+       .align  2
+       .global GLOBAL(init_trampoline)
+       HIDDEN_FUNC(GLOBAL(init_trampoline))
+GLOBAL(init_trampoline):
+       st.l    r0,8,r2
+#ifdef __LITTLE_ENDIAN__
+       movi    9,r20
+       shori   0x402b,r20
+       shori   0xd101,r20
+       shori   0xd002,r20
+#else
+       movi    0xffffffffffffd002,r20
+       shori   0xd101,r20
+       shori   0x402b,r20
+       shori   9,r20
+#endif
+       st.q    r0,0,r20
+       st.l    r0,12,r3
+       ENDFUNC(GLOBAL(init_trampoline))
+       .global GLOBAL(ic_invalidate)
+       HIDDEN_FUNC(GLOBAL(ic_invalidate))
+GLOBAL(ic_invalidate):
+       ocbwb   r0,0
+       synco
+       icbi    r0, 0
+       ptabs   r18, tr0
+       synci
+       blink   tr0, r63
+       ENDFUNC(GLOBAL(ic_invalidate))
+#elif defined(__SH4A__)
+       .global GLOBAL(ic_invalidate)
+       HIDDEN_FUNC(GLOBAL(ic_invalidate))
+GLOBAL(ic_invalidate):
+       ocbwb   @r4
+       synco
+       icbi    @r4
+       rts
+         nop
+       ENDFUNC(GLOBAL(ic_invalidate))
+#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
+       /* For system code, we use ic_invalidate_line_i, but user code
+          needs a different mechanism.  A kernel call is generally not
+          available, and it would also be slow.  Different SH4 variants use
+          different sizes and associativities of the Icache.  We use a small
+          bit of dispatch code that can be put hidden in every shared object,
+          which calls the actual processor-specific invalidation code in a
+          separate module.
+          Or if you have operating system support, the OS could mmap the
+          procesor-specific code from a single page, since it is highly
+          repetitive.  */
+       .global GLOBAL(ic_invalidate)
+       HIDDEN_FUNC(GLOBAL(ic_invalidate))
+GLOBAL(ic_invalidate):
+#ifdef __pic__
+#ifdef __vxworks
+       mov.l   1f,r1
+       mov.l   2f,r0
+       mov.l   @r1,r1
+       mov.l   0f,r2
+       mov.l   @(r0,r1),r0
+#else
+       mov.l   1f,r1
+       mova    1f,r0
+       mov.l   0f,r2
+       add     r1,r0
+#endif
+       mov.l   @(r0,r2),r1
+#else
+       mov.l   0f,r1
+#endif
+       ocbwb   @r4
+       mov.l   @(8,r1),r0
+       sub     r1,r4
+       and     r4,r0
+       add     r1,r0
+       jmp     @r0
+       mov.l   @(4,r1),r0
+       .align  2
+#ifndef __pic__
+0:     .long   GLOBAL(ic_invalidate_array)
+#else /* __pic__ */
+       .global GLOBAL(ic_invalidate_array)
+0:     .long   GLOBAL(ic_invalidate_array)@GOT
+#ifdef __vxworks
+1:     .long   ___GOTT_BASE__
+2:     .long   ___GOTT_INDEX__
+#else
+1:     .long   _GLOBAL_OFFSET_TABLE_
+#endif
+       ENDFUNC(GLOBAL(ic_invalidate))
+#endif /* __pic__ */
+#endif /* SH4 */
+#endif /* L_ic_invalidate */
+
+#ifdef L_ic_invalidate_array
+#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
+       .global GLOBAL(ic_invalidate_array)
+       /* This is needed when an SH4 dso with trampolines is used on SH4A.  */
+       .global GLOBAL(ic_invalidate_array)
+       FUNC(GLOBAL(ic_invalidate_array))
+GLOBAL(ic_invalidate_array):
+       add     r1,r4
+       synco
+       icbi    @r4
+       rts
+         nop
+       .align 2
+       .long   0
+       ENDFUNC(GLOBAL(ic_invalidate_array))
+#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
+       .global GLOBAL(ic_invalidate_array)
+       .p2align 5
+       FUNC(GLOBAL(ic_invalidate_array))
+/* This must be aligned to the beginning of a cache line.  */
+GLOBAL(ic_invalidate_array):
+#ifndef WAYS
+#define WAYS 4
+#define WAY_SIZE 0x4000
+#endif
+#if WAYS == 1
+       .rept   WAY_SIZE * WAYS / 32
+       rts
+       nop
+       .rept   7
+       .long   WAY_SIZE - 32
+       .endr
+       .endr
+#elif WAYS <= 6
+       .rept   WAY_SIZE * WAYS / 32
+       braf    r0
+       add     #-8,r0
+       .long   WAY_SIZE + 8
+       .long   WAY_SIZE - 32
+       .rept   WAYS-2
+       braf    r0
+       nop
+       .endr
+       .rept   7 - WAYS
+       rts
+       nop
+       .endr
+       .endr
+#else /* WAYS > 6 */
+       /* This variant needs two different pages for mmap-ing.  */
+       .rept   WAYS-1
+       .rept   WAY_SIZE / 32
+       braf    r0
+       nop
+       .long   WAY_SIZE
+       .rept 6
+       .long   WAY_SIZE - 32
+       .endr
+       .endr
+       .endr
+       .rept   WAY_SIZE / 32
+       rts
+       .rept   15
+       nop
+       .endr
+       .endr
+#endif /* WAYS */
+       ENDFUNC(GLOBAL(ic_invalidate_array))
+#endif /* SH4 */
+#endif /* L_ic_invalidate_array */
+
+#if defined (__SH5__) && __SH5__ == 32
+#ifdef L_shcompact_call_trampoline
+       .section        .rodata
+       .align  1
+LOCAL(ct_main_table):
+.word  LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
+.word  LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
+       .mode   SHmedia
+       .section        .text..SHmedia32, "ax"
+       .align  2
+       
+     /* This function loads 64-bit general-purpose registers from the
+       stack, from a memory address contained in them or from an FP
+       register, according to a cookie passed in r1.  Its execution
+       time is linear on the number of registers that actually have
+       to be copied.  See sh.h for details on the actual bit pattern.
+
+       The function to be called is passed in r0.  If a 32-bit return
+       value is expected, the actual function will be tail-called,
+       otherwise the return address will be stored in r10 (that the
+       caller should expect to be clobbered) and the return value
+       will be expanded into r2/r3 upon return.  */
+       
+       .global GLOBAL(GCC_shcompact_call_trampoline)
+       FUNC(GLOBAL(GCC_shcompact_call_trampoline))
+GLOBAL(GCC_shcompact_call_trampoline):
+       ptabs/l r0, tr0 /* Prepare to call the actual function.  */
+       movi    ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
+       pt/l    LOCAL(ct_loop), tr1
+       addz.l  r1, r63, r1
+       shori   ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
+LOCAL(ct_loop):
+       nsb     r1, r28
+       shlli   r28, 1, r29
+       ldx.w   r0, r29, r30
+LOCAL(ct_main_label):
+       ptrel/l r30, tr2
+       blink   tr2, r63
+LOCAL(ct_r2_fp):       /* Copy r2 from an FP register.  */
+       /* It must be dr0, so just do it.  */
+       fmov.dq dr0, r2
+       movi    7, r30
+       shlli   r30, 29, r31
+       andc    r1, r31, r1
+       blink   tr1, r63
+LOCAL(ct_r3_fp):       /* Copy r3 from an FP register.  */
+       /* It is either dr0 or dr2.  */
+       movi    7, r30
+       shlri   r1, 26, r32
+       shlli   r30, 26, r31
+       andc    r1, r31, r1
+       fmov.dq dr0, r3
+       beqi/l  r32, 4, tr1
+       fmov.dq dr2, r3
+       blink   tr1, r63
+LOCAL(ct_r4_fp):       /* Copy r4 from an FP register.  */
+       shlri   r1, 23 - 3, r34
+       andi    r34, 3 << 3, r33
+       addi    r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
+LOCAL(ct_r4_fp_base):
+       ptrel/l r32, tr2
+       movi    7, r30
+       shlli   r30, 23, r31
+       andc    r1, r31, r1
+       blink   tr2, r63
+LOCAL(ct_r4_fp_copy):
+       fmov.dq dr0, r4
+       blink   tr1, r63
+       fmov.dq dr2, r4
+       blink   tr1, r63
+       fmov.dq dr4, r4
+       blink   tr1, r63
+LOCAL(ct_r5_fp):       /* Copy r5 from an FP register.  */
+       shlri   r1, 20 - 3, r34
+       andi    r34, 3 << 3, r33
+       addi    r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
+LOCAL(ct_r5_fp_base):
+       ptrel/l r32, tr2
+       movi    7, r30
+       shlli   r30, 20, r31
+       andc    r1, r31, r1
+       blink   tr2, r63
+LOCAL(ct_r5_fp_copy):
+       fmov.dq dr0, r5
+       blink   tr1, r63
+       fmov.dq dr2, r5
+       blink   tr1, r63
+       fmov.dq dr4, r5
+       blink   tr1, r63
+       fmov.dq dr6, r5
+       blink   tr1, r63
+LOCAL(ct_r6_fph):      /* Copy r6 from a high FP register.  */
+       /* It must be dr8.  */
+       fmov.dq dr8, r6
+       movi    15, r30
+       shlli   r30, 16, r31
+       andc    r1, r31, r1
+       blink   tr1, r63
+LOCAL(ct_r6_fpl):      /* Copy r6 from a low FP register.  */
+       shlri   r1, 16 - 3, r34
+       andi    r34, 3 << 3, r33
+       addi    r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
+LOCAL(ct_r6_fp_base):
+       ptrel/l r32, tr2
+       movi    7, r30
+       shlli   r30, 16, r31
+       andc    r1, r31, r1
+       blink   tr2, r63
+LOCAL(ct_r6_fp_copy):
+       fmov.dq dr0, r6
+       blink   tr1, r63
+       fmov.dq dr2, r6
+       blink   tr1, r63
+       fmov.dq dr4, r6
+       blink   tr1, r63
+       fmov.dq dr6, r6
+       blink   tr1, r63
+LOCAL(ct_r7_fph):      /* Copy r7 from a high FP register.  */
+       /* It is either dr8 or dr10.  */
+       movi    15 << 12, r31
+       shlri   r1, 12, r32
+       andc    r1, r31, r1
+       fmov.dq dr8, r7
+       beqi/l  r32, 8, tr1
+       fmov.dq dr10, r7
+       blink   tr1, r63
+LOCAL(ct_r7_fpl):      /* Copy r7 from a low FP register.  */
+       shlri   r1, 12 - 3, r34
+       andi    r34, 3 << 3, r33
+       addi    r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
+LOCAL(ct_r7_fp_base):
+       ptrel/l r32, tr2
+       movi    7 << 12, r31
+       andc    r1, r31, r1
+       blink   tr2, r63
+LOCAL(ct_r7_fp_copy):
+       fmov.dq dr0, r7
+       blink   tr1, r63
+       fmov.dq dr2, r7
+       blink   tr1, r63
+       fmov.dq dr4, r7
+       blink   tr1, r63
+       fmov.dq dr6, r7
+       blink   tr1, r63
+LOCAL(ct_r8_fph):      /* Copy r8 from a high FP register.  */
+       /* It is either dr8 or dr10.  */
+       movi    15 << 8, r31
+       andi    r1, 1 << 8, r32
+       andc    r1, r31, r1
+       fmov.dq dr8, r8
+       beq/l   r32, r63, tr1
+       fmov.dq dr10, r8
+       blink   tr1, r63
+LOCAL(ct_r8_fpl):      /* Copy r8 from a low FP register.  */
+       shlri   r1, 8 - 3, r34
+       andi    r34, 3 << 3, r33
+       addi    r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
+LOCAL(ct_r8_fp_base):
+       ptrel/l r32, tr2
+       movi    7 << 8, r31
+       andc    r1, r31, r1
+       blink   tr2, r63
+LOCAL(ct_r8_fp_copy):
+       fmov.dq dr0, r8
+       blink   tr1, r63
+       fmov.dq dr2, r8
+       blink   tr1, r63
+       fmov.dq dr4, r8
+       blink   tr1, r63
+       fmov.dq dr6, r8
+       blink   tr1, r63
+LOCAL(ct_r9_fph):      /* Copy r9 from a high FP register.  */
+       /* It is either dr8 or dr10.  */
+       movi    15 << 4, r31
+       andi    r1, 1 << 4, r32
+       andc    r1, r31, r1
+       fmov.dq dr8, r9
+       beq/l   r32, r63, tr1
+       fmov.dq dr10, r9
+       blink   tr1, r63
+LOCAL(ct_r9_fpl):      /* Copy r9 from a low FP register.  */
+       shlri   r1, 4 - 3, r34
+       andi    r34, 3 << 3, r33
+       addi    r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
+LOCAL(ct_r9_fp_base):
+       ptrel/l r32, tr2
+       movi    7 << 4, r31
+       andc    r1, r31, r1
+       blink   tr2, r63
+LOCAL(ct_r9_fp_copy):
+       fmov.dq dr0, r9
+       blink   tr1, r63
+       fmov.dq dr2, r9
+       blink   tr1, r63
+       fmov.dq dr4, r9
+       blink   tr1, r63
+       fmov.dq dr6, r9
+       blink   tr1, r63
+LOCAL(ct_r2_ld):       /* Copy r2 from a memory address.  */
+       pt/l    LOCAL(ct_r2_load), tr2
+       movi    3, r30
+       shlli   r30, 29, r31
+       and     r1, r31, r32
+       andc    r1, r31, r1
+       beq/l   r31, r32, tr2
+       addi.l  r2, 8, r3
+       ldx.q   r2, r63, r2
+       /* Fall through.  */
+LOCAL(ct_r3_ld):       /* Copy r3 from a memory address.  */
+       pt/l    LOCAL(ct_r3_load), tr2
+       movi    3, r30
+       shlli   r30, 26, r31
+       and     r1, r31, r32
+       andc    r1, r31, r1
+       beq/l   r31, r32, tr2
+       addi.l  r3, 8, r4
+       ldx.q   r3, r63, r3
+LOCAL(ct_r4_ld):       /* Copy r4 from a memory address.  */
+       pt/l    LOCAL(ct_r4_load), tr2
+       movi    3, r30
+       shlli   r30, 23, r31
+       and     r1, r31, r32
+       andc    r1, r31, r1
+       beq/l   r31, r32, tr2
+       addi.l  r4, 8, r5
+       ldx.q   r4, r63, r4
+LOCAL(ct_r5_ld):       /* Copy r5 from a memory address.  */
+       pt/l    LOCAL(ct_r5_load), tr2
+       movi    3, r30
+       shlli   r30, 20, r31
+       and     r1, r31, r32
+       andc    r1, r31, r1
+       beq/l   r31, r32, tr2
+       addi.l  r5, 8, r6
+       ldx.q   r5, r63, r5
+LOCAL(ct_r6_ld):       /* Copy r6 from a memory address.  */
+       pt/l    LOCAL(ct_r6_load), tr2
+       movi    3 << 16, r31
+       and     r1, r31, r32
+       andc    r1, r31, r1
+       beq/l   r31, r32, tr2
+       addi.l  r6, 8, r7
+       ldx.q   r6, r63, r6
+LOCAL(ct_r7_ld):       /* Copy r7 from a memory address.  */
+       pt/l    LOCAL(ct_r7_load), tr2
+       movi    3 << 12, r31
+       and     r1, r31, r32
+       andc    r1, r31, r1
+       beq/l   r31, r32, tr2
+       addi.l  r7, 8, r8
+       ldx.q   r7, r63, r7
+LOCAL(ct_r8_ld):       /* Copy r8 from a memory address.  */
+       pt/l    LOCAL(ct_r8_load), tr2
+       movi    3 << 8, r31
+       and     r1, r31, r32
+       andc    r1, r31, r1
+       beq/l   r31, r32, tr2
+       addi.l  r8, 8, r9
+       ldx.q   r8, r63, r8
+LOCAL(ct_r9_ld):       /* Copy r9 from a memory address.  */
+       pt/l    LOCAL(ct_check_tramp), tr2
+       ldx.q   r9, r63, r9
+       blink   tr2, r63
+LOCAL(ct_r2_load):
+       ldx.q   r2, r63, r2
+       blink   tr1, r63
+LOCAL(ct_r3_load):
+       ldx.q   r3, r63, r3
+       blink   tr1, r63
+LOCAL(ct_r4_load):
+       ldx.q   r4, r63, r4
+       blink   tr1, r63
+LOCAL(ct_r5_load):
+       ldx.q   r5, r63, r5
+       blink   tr1, r63
+LOCAL(ct_r6_load):
+       ldx.q   r6, r63, r6
+       blink   tr1, r63
+LOCAL(ct_r7_load):
+       ldx.q   r7, r63, r7
+       blink   tr1, r63
+LOCAL(ct_r8_load):
+       ldx.q   r8, r63, r8
+       blink   tr1, r63
+LOCAL(ct_r2_pop):      /* Pop r2 from the stack.  */
+       movi    1, r30
+       ldx.q   r15, r63, r2
+       shlli   r30, 29, r31
+       addi.l  r15, 8, r15
+       andc    r1, r31, r1
+       blink   tr1, r63
+LOCAL(ct_r3_pop):      /* Pop r3 from the stack.  */
+       movi    1, r30
+       ldx.q   r15, r63, r3
+       shlli   r30, 26, r31
+       addi.l  r15, 8, r15
+       andc    r1, r31, r1
+       blink   tr1, r63
+LOCAL(ct_r4_pop):      /* Pop r4 from the stack.  */
+       movi    1, r30
+       ldx.q   r15, r63, r4
+       shlli   r30, 23, r31
+       addi.l  r15, 8, r15
+       andc    r1, r31, r1
+       blink   tr1, r63
+LOCAL(ct_r5_pop):      /* Pop r5 from the stack.  */
+       movi    1, r30
+       ldx.q   r15, r63, r5
+       shlli   r30, 20, r31
+       addi.l  r15, 8, r15
+       andc    r1, r31, r1
+       blink   tr1, r63
+LOCAL(ct_r6_pop):      /* Pop r6 from the stack.  */
+       movi    1, r30
+       ldx.q   r15, r63, r6
+       shlli   r30, 16, r31
+       addi.l  r15, 8, r15
+       andc    r1, r31, r1
+       blink   tr1, r63
+LOCAL(ct_r7_pop):      /* Pop r7 from the stack.  */
+       ldx.q   r15, r63, r7
+       movi    1 << 12, r31
+       addi.l  r15, 8, r15
+       andc    r1, r31, r1
+       blink   tr1, r63
+LOCAL(ct_r8_pop):      /* Pop r8 from the stack.  */
+       ldx.q   r15, r63, r8
+       movi    1 << 8, r31
+       addi.l  r15, 8, r15
+       andc    r1, r31, r1
+       blink   tr1, r63
+LOCAL(ct_pop_seq):     /* Pop a sequence of registers off the stack.  */
+       andi    r1, 7 << 1, r30
+       movi    (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
+       shlli   r30, 2, r31
+       shori   LOCAL(ct_end_of_pop_seq) & 65535, r32
+       sub.l   r32, r31, r33
+       ptabs/l r33, tr2
+       blink   tr2, r63
+LOCAL(ct_start_of_pop_seq):    /* Beginning of pop sequence.  */
+       ldx.q   r15, r63, r3
+       addi.l  r15, 8, r15
+       ldx.q   r15, r63, r4
+       addi.l  r15, 8, r15
+       ldx.q   r15, r63, r5
+       addi.l  r15, 8, r15
+       ldx.q   r15, r63, r6
+       addi.l  r15, 8, r15
+       ldx.q   r15, r63, r7
+       addi.l  r15, 8, r15
+       ldx.q   r15, r63, r8
+       addi.l  r15, 8, r15
+LOCAL(ct_r9_pop):      /* Pop r9 from the stack.  */
+       ldx.q   r15, r63, r9
+       addi.l  r15, 8, r15
+LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction.  */
+LOCAL(ct_check_tramp): /* Check whether we need a trampoline.  */
+       pt/u    LOCAL(ct_ret_wide), tr2
+       andi    r1, 1, r1
+       bne/u   r1, r63, tr2
+LOCAL(ct_call_func):   /* Just branch to the function.  */
+       blink   tr0, r63
+LOCAL(ct_ret_wide):    /* Call the function, so that we can unpack its 
+                          64-bit return value.  */
+       add.l   r18, r63, r10
+       blink   tr0, r18
+       ptabs   r10, tr0
+#if __LITTLE_ENDIAN__
+       shari   r2, 32, r3
+       add.l   r2, r63, r2
+#else
+       add.l   r2, r63, r3
+       shari   r2, 32, r2
+#endif
+       blink   tr0, r63
+
+       ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
+#endif /* L_shcompact_call_trampoline */
+
+#ifdef L_shcompact_return_trampoline
+     /* This function does the converse of the code in `ret_wide'
+       above.  It is tail-called by SHcompact functions returning
+       64-bit non-floating-point values, to pack the 32-bit values in
+       r2 and r3 into r2.  */
+
+       .mode   SHmedia
+       .section        .text..SHmedia32, "ax"
+       .align  2
+       .global GLOBAL(GCC_shcompact_return_trampoline)
+       HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
+GLOBAL(GCC_shcompact_return_trampoline):
+       ptabs/l r18, tr0
+#if __LITTLE_ENDIAN__
+       addz.l  r2, r63, r2
+       shlli   r3, 32, r3
+#else
+       addz.l  r3, r63, r3
+       shlli   r2, 32, r2
+#endif
+       or      r3, r2, r2
+       blink   tr0, r63
+
+       ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
+#endif /* L_shcompact_return_trampoline */
+
+#ifdef L_shcompact_incoming_args
+       .section        .rodata
+       .align  1
+LOCAL(ia_main_table):
+.word  1 /* Invalid, just loop */
+.word  LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
+.word  LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
+.word  1 /* Invalid, just loop */
+.word  LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
+.word  LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
+.word  1 /* Invalid, just loop */
+.word  LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
+.word  LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
+.word  1 /* Invalid, just loop */
+.word  LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
+.word  LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
+.word  1 /* Invalid, just loop */
+.word  1 /* Invalid, just loop */
+.word  LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
+.word  LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
+.word  1 /* Invalid, just loop */
+.word  1 /* Invalid, just loop */
+.word  LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
+.word  LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
+.word  1 /* Invalid, just loop */
+.word  1 /* Invalid, just loop */
+.word  LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
+.word  LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
+.word  1 /* Invalid, just loop */
+.word  1 /* Invalid, just loop */
+.word  LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
+.word  LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
+.word  LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
+.word  LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
+.word  LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
+.word  LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
+.word  LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
+       .mode   SHmedia
+       .section        .text..SHmedia32, "ax"
+       .align  2
+       
+     /* This function stores 64-bit general-purpose registers back in
+       the stack, and loads the address in which each register
+       was stored into itself.  The lower 32 bits of r17 hold the address
+       to begin storing, and the upper 32 bits of r17 hold the cookie.
+       Its execution time is linear on the
+       number of registers that actually have to be copied, and it is
+       optimized for structures larger than 64 bits, as opposed to
+       individual `long long' arguments.  See sh.h for details on the
+       actual bit pattern.  */
+       
+       .global GLOBAL(GCC_shcompact_incoming_args)
+       FUNC(GLOBAL(GCC_shcompact_incoming_args))
+GLOBAL(GCC_shcompact_incoming_args):
+       ptabs/l r18, tr0        /* Prepare to return.  */
+       shlri   r17, 32, r0     /* Load the cookie.  */
+       movi    ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
+       pt/l    LOCAL(ia_loop), tr1
+       add.l   r17, r63, r17
+       shori   ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
+LOCAL(ia_loop):
+       nsb     r0, r36
+       shlli   r36, 1, r37
+       ldx.w   r43, r37, r38
+LOCAL(ia_main_label):
+       ptrel/l r38, tr2
+       blink   tr2, r63
+LOCAL(ia_r2_ld):       /* Store r2 and load its address.  */
+       movi    3, r38
+       shlli   r38, 29, r39
+       and     r0, r39, r40
+       andc    r0, r39, r0
+       stx.q   r17, r63, r2
+       add.l   r17, r63, r2
+       addi.l  r17, 8, r17
+       beq/u   r39, r40, tr1
+LOCAL(ia_r3_ld):       /* Store r3 and load its address.  */
+       movi    3, r38
+       shlli   r38, 26, r39
+       and     r0, r39, r40
+       andc    r0, r39, r0
+       stx.q   r17, r63, r3
+       add.l   r17, r63, r3
+       addi.l  r17, 8, r17
+       beq/u   r39, r40, tr1
+LOCAL(ia_r4_ld):       /* Store r4 and load its address.  */
+       movi    3, r38
+       shlli   r38, 23, r39
+       and     r0, r39, r40
+       andc    r0, r39, r0
+       stx.q   r17, r63, r4
+       add.l   r17, r63, r4
+       addi.l  r17, 8, r17
+       beq/u   r39, r40, tr1
+LOCAL(ia_r5_ld):       /* Store r5 and load its address.  */
+       movi    3, r38
+       shlli   r38, 20, r39
+       and     r0, r39, r40
+       andc    r0, r39, r0
+       stx.q   r17, r63, r5
+       add.l   r17, r63, r5
+       addi.l  r17, 8, r17
+       beq/u   r39, r40, tr1
+LOCAL(ia_r6_ld):       /* Store r6 and load its address.  */
+       movi    3, r38
+       shlli   r38, 16, r39
+       and     r0, r39, r40
+       andc    r0, r39, r0
+       stx.q   r17, r63, r6
+       add.l   r17, r63, r6
+       addi.l  r17, 8, r17
+       beq/u   r39, r40, tr1
+LOCAL(ia_r7_ld):       /* Store r7 and load its address.  */
+       movi    3 << 12, r39
+       and     r0, r39, r40
+       andc    r0, r39, r0
+       stx.q   r17, r63, r7
+       add.l   r17, r63, r7
+       addi.l  r17, 8, r17
+       beq/u   r39, r40, tr1
+LOCAL(ia_r8_ld):       /* Store r8 and load its address.  */
+       movi    3 << 8, r39
+       and     r0, r39, r40
+       andc    r0, r39, r0
+       stx.q   r17, r63, r8
+       add.l   r17, r63, r8
+       addi.l  r17, 8, r17
+       beq/u   r39, r40, tr1
+LOCAL(ia_r9_ld):       /* Store r9 and load its address.  */
+       stx.q   r17, r63, r9
+       add.l   r17, r63, r9
+       blink   tr0, r63
+LOCAL(ia_r2_push):     /* Push r2 onto the stack.  */
+       movi    1, r38
+       shlli   r38, 29, r39
+       andc    r0, r39, r0
+       stx.q   r17, r63, r2
+       addi.l  r17, 8, r17
+       blink   tr1, r63
+LOCAL(ia_r3_push):     /* Push r3 onto the stack.  */
+       movi    1, r38
+       shlli   r38, 26, r39
+       andc    r0, r39, r0
+       stx.q   r17, r63, r3
+       addi.l  r17, 8, r17
+       blink   tr1, r63
+LOCAL(ia_r4_push):     /* Push r4 onto the stack.  */
+       movi    1, r38
+       shlli   r38, 23, r39
+       andc    r0, r39, r0
+       stx.q   r17, r63, r4
+       addi.l  r17, 8, r17
+       blink   tr1, r63
+LOCAL(ia_r5_push):     /* Push r5 onto the stack.  */
+       movi    1, r38
+       shlli   r38, 20, r39
+       andc    r0, r39, r0
+       stx.q   r17, r63, r5
+       addi.l  r17, 8, r17
+       blink   tr1, r63
+LOCAL(ia_r6_push):     /* Push r6 onto the stack.  */
+       movi    1, r38
+       shlli   r38, 16, r39
+       andc    r0, r39, r0
+       stx.q   r17, r63, r6
+       addi.l  r17, 8, r17
+       blink   tr1, r63
+LOCAL(ia_r7_push):     /* Push r7 onto the stack.  */
+       movi    1 << 12, r39
+       andc    r0, r39, r0
+       stx.q   r17, r63, r7
+       addi.l  r17, 8, r17
+       blink   tr1, r63
+LOCAL(ia_r8_push):     /* Push r8 onto the stack.  */
+       movi    1 << 8, r39
+       andc    r0, r39, r0
+       stx.q   r17, r63, r8
+       addi.l  r17, 8, r17
+       blink   tr1, r63
+LOCAL(ia_push_seq):    /* Push a sequence of registers onto the stack.  */
+       andi    r0, 7 << 1, r38
+       movi    (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
+       shlli   r38, 2, r39
+       shori   LOCAL(ia_end_of_push_seq) & 65535, r40
+       sub.l   r40, r39, r41
+       ptabs/l r41, tr2
+       blink   tr2, r63
+LOCAL(ia_stack_of_push_seq):    /* Beginning of push sequence.  */
+       stx.q   r17, r63, r3
+       addi.l  r17, 8, r17
+       stx.q   r17, r63, r4
+       addi.l  r17, 8, r17
+       stx.q   r17, r63, r5
+       addi.l  r17, 8, r17
+       stx.q   r17, r63, r6
+       addi.l  r17, 8, r17
+       stx.q   r17, r63, r7
+       addi.l  r17, 8, r17
+       stx.q   r17, r63, r8
+       addi.l  r17, 8, r17
+LOCAL(ia_r9_push):     /* Push r9 onto the stack.  */
+       stx.q   r17, r63, r9
+LOCAL(ia_return):      /* Return.  */
+       blink   tr0, r63
+LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction.  */
+       ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
+#endif /* L_shcompact_incoming_args */
+#endif
+#if __SH5__
+#ifdef L_nested_trampoline
+#if __SH5__ == 32
+       .section        .text..SHmedia32,"ax"
+#else
+       .text
+#endif
+       .align  3 /* It is copied in units of 8 bytes in SHmedia mode.  */
+       .global GLOBAL(GCC_nested_trampoline)
+       HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
+GLOBAL(GCC_nested_trampoline):
+       .mode   SHmedia
+       ptrel/u r63, tr0
+       gettr   tr0, r0
+#if __SH5__ == 64
+       ld.q    r0, 24, r1
+#else
+       ld.l    r0, 24, r1
+#endif
+       ptabs/l r1, tr1
+#if __SH5__ == 64
+       ld.q    r0, 32, r1
+#else
+       ld.l    r0, 28, r1
+#endif
+       blink   tr1, r63
+
+       ENDFUNC(GLOBAL(GCC_nested_trampoline))
+#endif /* L_nested_trampoline */
+#endif /* __SH5__ */
+#if __SH5__ == 32
+#ifdef L_push_pop_shmedia_regs
+       .section        .text..SHmedia32,"ax"
+       .mode   SHmedia
+       .align  2
+#ifndef __SH4_NOFPU__  
+       .global GLOBAL(GCC_push_shmedia_regs)
+       FUNC(GLOBAL(GCC_push_shmedia_regs))
+GLOBAL(GCC_push_shmedia_regs):
+       addi.l  r15, -14*8, r15
+       fst.d   r15, 13*8, dr62
+       fst.d   r15, 12*8, dr60
+       fst.d   r15, 11*8, dr58
+       fst.d   r15, 10*8, dr56
+       fst.d   r15,  9*8, dr54
+       fst.d   r15,  8*8, dr52
+       fst.d   r15,  7*8, dr50
+       fst.d   r15,  6*8, dr48
+       fst.d   r15,  5*8, dr46
+       fst.d   r15,  4*8, dr44
+       fst.d   r15,  3*8, dr42
+       fst.d   r15,  2*8, dr40
+       fst.d   r15,  1*8, dr38
+       fst.d   r15,  0*8, dr36
+#else /* ! __SH4_NOFPU__ */
+       .global GLOBAL(GCC_push_shmedia_regs_nofpu)
+       FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
+GLOBAL(GCC_push_shmedia_regs_nofpu):
+#endif /* ! __SH4_NOFPU__ */
+       ptabs/l r18, tr0
+       addi.l  r15, -27*8, r15
+       gettr   tr7, r62
+       gettr   tr6, r61
+       gettr   tr5, r60
+       st.q    r15, 26*8, r62
+       st.q    r15, 25*8, r61
+       st.q    r15, 24*8, r60
+       st.q    r15, 23*8, r59
+       st.q    r15, 22*8, r58
+       st.q    r15, 21*8, r57
+       st.q    r15, 20*8, r56
+       st.q    r15, 19*8, r55
+       st.q    r15, 18*8, r54
+       st.q    r15, 17*8, r53
+       st.q    r15, 16*8, r52
+       st.q    r15, 15*8, r51
+       st.q    r15, 14*8, r50
+       st.q    r15, 13*8, r49
+       st.q    r15, 12*8, r48
+       st.q    r15, 11*8, r47
+       st.q    r15, 10*8, r46
+       st.q    r15,  9*8, r45
+       st.q    r15,  8*8, r44
+       st.q    r15,  7*8, r35
+       st.q    r15,  6*8, r34
+       st.q    r15,  5*8, r33
+       st.q    r15,  4*8, r32
+       st.q    r15,  3*8, r31
+       st.q    r15,  2*8, r30
+       st.q    r15,  1*8, r29
+       st.q    r15,  0*8, r28
+       blink   tr0, r63
+#ifndef __SH4_NOFPU__  
+       ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
+#else
+       ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
+#endif
+#ifndef __SH4_NOFPU__  
+       .global GLOBAL(GCC_pop_shmedia_regs)
+       FUNC(GLOBAL(GCC_pop_shmedia_regs))
+GLOBAL(GCC_pop_shmedia_regs):
+       pt      .L0, tr1
+       movi    41*8, r0
+       fld.d   r15, 40*8, dr62
+       fld.d   r15, 39*8, dr60
+       fld.d   r15, 38*8, dr58
+       fld.d   r15, 37*8, dr56
+       fld.d   r15, 36*8, dr54
+       fld.d   r15, 35*8, dr52
+       fld.d   r15, 34*8, dr50
+       fld.d   r15, 33*8, dr48
+       fld.d   r15, 32*8, dr46
+       fld.d   r15, 31*8, dr44
+       fld.d   r15, 30*8, dr42
+       fld.d   r15, 29*8, dr40
+       fld.d   r15, 28*8, dr38
+       fld.d   r15, 27*8, dr36
+       blink   tr1, r63
+#else /* ! __SH4_NOFPU__       */
+       .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
+       FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
+GLOBAL(GCC_pop_shmedia_regs_nofpu):
+#endif /* ! __SH4_NOFPU__      */
+       movi    27*8, r0
+.L0:
+       ptabs   r18, tr0
+       ld.q    r15, 26*8, r62
+       ld.q    r15, 25*8, r61
+       ld.q    r15, 24*8, r60
+       ptabs   r62, tr7
+       ptabs   r61, tr6
+       ptabs   r60, tr5
+       ld.q    r15, 23*8, r59
+       ld.q    r15, 22*8, r58
+       ld.q    r15, 21*8, r57
+       ld.q    r15, 20*8, r56
+       ld.q    r15, 19*8, r55
+       ld.q    r15, 18*8, r54
+       ld.q    r15, 17*8, r53
+       ld.q    r15, 16*8, r52
+       ld.q    r15, 15*8, r51
+       ld.q    r15, 14*8, r50
+       ld.q    r15, 13*8, r49
+       ld.q    r15, 12*8, r48
+       ld.q    r15, 11*8, r47
+       ld.q    r15, 10*8, r46
+       ld.q    r15,  9*8, r45
+       ld.q    r15,  8*8, r44
+       ld.q    r15,  7*8, r35
+       ld.q    r15,  6*8, r34
+       ld.q    r15,  5*8, r33
+       ld.q    r15,  4*8, r32
+       ld.q    r15,  3*8, r31
+       ld.q    r15,  2*8, r30
+       ld.q    r15,  1*8, r29
+       ld.q    r15,  0*8, r28
+       add.l   r15, r0, r15
+       blink   tr0, r63
+
+#ifndef __SH4_NOFPU__
+       ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
+#else
+       ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
+#endif
+#endif /* __SH5__ == 32 */
+#endif /* L_push_pop_shmedia_regs */
+
+#ifdef L_div_table
+#if __SH5__
+#if defined(__pic__) && defined(__SHMEDIA__)
+       .global GLOBAL(sdivsi3)
+       FUNC(GLOBAL(sdivsi3))
+#if __SH5__ == 32
+       .section        .text..SHmedia32,"ax"
+#else
+       .text
+#endif
+#if 0
+/* ??? FIXME: Presumably due to a linker bug, exporting data symbols
+   in a text section does not work (at least for shared libraries):
+   the linker sets the LSB of the address as if this was SHmedia code.  */
+#define TEXT_DATA_BUG
+#endif
+       .align  2
+ // inputs: r4,r5
+ // clobbered: r1,r18,r19,r20,r21,r25,tr0
+ // result in r0
+ .global GLOBAL(sdivsi3)
+GLOBAL(sdivsi3):
+#ifdef TEXT_DATA_BUG
+ ptb datalabel Local_div_table,tr0
+#else
+ ptb GLOBAL(div_table_internal),tr0
+#endif
+ nsb r5, r1
+ shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
+ shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
+ /* bubble */
+ gettr tr0,r20
+ ldx.ub r20, r21, r19 // u0.8
+ shari r25, 32, r25   // normalize to s2.30
+ shlli r21, 1, r21
+ muls.l r25, r19, r19 // s2.38
+ ldx.w r20, r21, r21  // s2.14
+  ptabs r18, tr0
+ shari r19, 24, r19   // truncate to s2.14
+ sub r21, r19, r19    // some 11 bit inverse in s1.14
+ muls.l r19, r19, r21 // u0.28
+  sub r63, r1, r1
+  addi r1, 92, r1
+ muls.l r25, r21, r18 // s2.58
+ shlli r19, 45, r19   // multiply by two and convert to s2.58
+  /* bubble */
+ sub r19, r18, r18
+ shari r18, 28, r18   // some 22 bit inverse in s1.30
+ muls.l r18, r25, r0  // s2.60
+  muls.l r18, r4, r25 // s32.30
+  /* bubble */
+ shari r0, 16, r19   // s-16.44
+ muls.l r19, r18, r19 // s-16.74
+  shari r25, 63, r0
+  shari r4, 14, r18   // s19.-14
+ shari r19, 30, r19   // s-16.44
+ muls.l r19, r18, r19 // s15.30
+  xor r21, r0, r21    // You could also use the constant 1 << 27.
+  add r21, r25, r21
+ sub r21, r19, r21
+ shard r21, r1, r21
+ sub r21, r0, r0
+ blink tr0, r63
+       ENDFUNC(GLOBAL(sdivsi3))
+/* This table has been generated by divtab.c .
+Defects for bias -330:
+   Max defect: 6.081536e-07 at -1.000000e+00
+   Min defect: 2.849516e-08 at 1.030651e+00
+   Max 2nd step defect: 9.606539e-12 at -1.000000e+00
+   Min 2nd step defect: 0.000000e+00 at 0.000000e+00
+   Defect at 1: 1.238659e-07
+   Defect at -2: 1.061708e-07 */
+#else /* ! __pic__ || ! __SHMEDIA__ */
+       .section        .rodata
+#endif /* __pic__ */
+#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
+       .balign 2
+       .type   Local_div_table,@object
+       .size   Local_div_table,128
+/* negative division constants */
+       .word   -16638
+       .word   -17135
+       .word   -17737
+       .word   -18433
+       .word   -19103
+       .word   -19751
+       .word   -20583
+       .word   -21383
+       .word   -22343
+       .word   -23353
+       .word   -24407
+       .word   -25582
+       .word   -26863
+       .word   -28382
+       .word   -29965
+       .word   -31800
+/* negative division factors */
+       .byte   66
+       .byte   70
+       .byte   75
+       .byte   81
+       .byte   87
+       .byte   93
+       .byte   101
+       .byte   109
+       .byte   119
+       .byte   130
+       .byte   142
+       .byte   156
+       .byte   172
+       .byte   192
+       .byte   214
+       .byte   241
+       .skip 16
+Local_div_table:
+       .skip 16
+/* positive division factors */
+       .byte   241
+       .byte   214
+       .byte   192
+       .byte   172
+       .byte   156
+       .byte   142
+       .byte   130
+       .byte   119
+       .byte   109
+       .byte   101
+       .byte   93
+       .byte   87
+       .byte   81
+       .byte   75
+       .byte   70
+       .byte   66
+/* positive division constants */
+       .word   31801
+       .word   29966
+       .word   28383
+       .word   26864
+       .word   25583
+       .word   24408
+       .word   23354
+       .word   22344
+       .word   21384
+       .word   20584
+       .word   19752
+       .word   19104
+       .word   18434
+       .word   17738
+       .word   17136
+       .word   16639
+       .section        .rodata
+#endif /* TEXT_DATA_BUG */
+       .balign 2
+       .type   GLOBAL(div_table),@object
+       .size   GLOBAL(div_table),128
+/* negative division constants */
+       .word   -16638
+       .word   -17135
+       .word   -17737
+       .word   -18433
+       .word   -19103
+       .word   -19751
+       .word   -20583
+       .word   -21383
+       .word   -22343
+       .word   -23353
+       .word   -24407
+       .word   -25582
+       .word   -26863
+       .word   -28382
+       .word   -29965
+       .word   -31800
+/* negative division factors */
+       .byte   66
+       .byte   70
+       .byte   75
+       .byte   81
+       .byte   87
+       .byte   93
+       .byte   101
+       .byte   109
+       .byte   119
+       .byte   130
+       .byte   142
+       .byte   156
+       .byte   172
+       .byte   192
+       .byte   214
+       .byte   241
+       .skip 16
+       .global GLOBAL(div_table)
+GLOBAL(div_table):
+       HIDDEN_ALIAS(div_table_internal,div_table)
+       .skip 16
+/* positive division factors */
+       .byte   241
+       .byte   214
+       .byte   192
+       .byte   172
+       .byte   156
+       .byte   142
+       .byte   130
+       .byte   119
+       .byte   109
+       .byte   101
+       .byte   93
+       .byte   87
+       .byte   81
+       .byte   75
+       .byte   70
+       .byte   66
+/* positive division constants */
+       .word   31801
+       .word   29966
+       .word   28383
+       .word   26864
+       .word   25583
+       .word   24408
+       .word   23354
+       .word   22344
+       .word   21384
+       .word   20584
+       .word   19752
+       .word   19104
+       .word   18434
+       .word   17738
+       .word   17136
+       .word   16639
+
+#elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
+/* This code used shld, thus is not suitable for SH1 / SH2.  */
+
+/* Signed / unsigned division without use of FPU, optimized for SH4.
+   Uses a lookup table for divisors in the range -128 .. +128, and
+   div1 with case distinction for larger divisors in three more ranges.
+   The code is lumped together with the table to allow the use of mova.  */
+#ifdef __LITTLE_ENDIAN__
+#define L_LSB 0
+#define L_LSWMSB 1
+#define L_MSWLSB 2
+#else
+#define L_LSB 3
+#define L_LSWMSB 2
+#define L_MSWLSB 1
+#endif
+
+       .balign 4
+       .global GLOBAL(udivsi3_i4i)
+       FUNC(GLOBAL(udivsi3_i4i))
+GLOBAL(udivsi3_i4i):
+       mov.w LOCAL(c128_w), r1
+       div0u
+       mov r4,r0
+       shlr8 r0
+       cmp/hi r1,r5
+       extu.w r5,r1
+       bf LOCAL(udiv_le128)
+       cmp/eq r5,r1
+       bf LOCAL(udiv_ge64k)
+       shlr r0
+       mov r5,r1
+       shll16 r5
+       mov.l r4,@-r15
+       div1 r5,r0
+       mov.l r1,@-r15
+       div1 r5,r0
+       div1 r5,r0
+       bra LOCAL(udiv_25)
+       div1 r5,r0
+
+LOCAL(div_le128):
+       mova LOCAL(div_table_ix),r0
+       bra LOCAL(div_le128_2)
+       mov.b @(r0,r5),r1
+LOCAL(udiv_le128):
+       mov.l r4,@-r15
+       mova LOCAL(div_table_ix),r0
+       mov.b @(r0,r5),r1
+       mov.l r5,@-r15
+LOCAL(div_le128_2):
+       mova LOCAL(div_table_inv),r0
+       mov.l @(r0,r1),r1
+       mov r5,r0
+       tst #0xfe,r0
+       mova LOCAL(div_table_clz),r0
+       dmulu.l r1,r4
+       mov.b @(r0,r5),r1
+       bt/s LOCAL(div_by_1)
+       mov r4,r0
+       mov.l @r15+,r5
+       sts mach,r0
+       /* clrt */
+       addc r4,r0
+       mov.l @r15+,r4
+       rotcr r0
+       rts
+       shld r1,r0
+
+LOCAL(div_by_1_neg):
+       neg r4,r0
+LOCAL(div_by_1):
+       mov.l @r15+,r5
+       rts
+       mov.l @r15+,r4
+
+LOCAL(div_ge64k):
+       bt/s LOCAL(div_r8)
+       div0u
+       shll8 r5
+       bra LOCAL(div_ge64k_2)
+       div1 r5,r0
+LOCAL(udiv_ge64k):
+       cmp/hi r0,r5
+       mov r5,r1
+       bt LOCAL(udiv_r8)
+       shll8 r5
+       mov.l r4,@-r15
+       div1 r5,r0
+       mov.l r1,@-r15
+LOCAL(div_ge64k_2):
+       div1 r5,r0
+       mov.l LOCAL(zero_l),r1
+       .rept 4
+       div1 r5,r0
+       .endr
+       mov.l r1,@-r15
+       div1 r5,r0
+       mov.w LOCAL(m256_w),r1
+       div1 r5,r0
+       mov.b r0,@(L_LSWMSB,r15)
+       xor r4,r0
+       and r1,r0
+       bra LOCAL(div_ge64k_end)
+       xor r4,r0
+       
+LOCAL(div_r8):
+       shll16 r4
+       bra LOCAL(div_r8_2)
+       shll8 r4
+LOCAL(udiv_r8):
+       mov.l r4,@-r15
+       shll16 r4
+       clrt
+       shll8 r4
+       mov.l r5,@-r15
+LOCAL(div_r8_2):
+       rotcl r4
+       mov r0,r1
+       div1 r5,r1
+       mov r4,r0
+       rotcl r0
+       mov r5,r4
+       div1 r5,r1
+       .rept 5
+       rotcl r0; div1 r5,r1
+       .endr
+       rotcl r0
+       mov.l @r15+,r5
+       div1 r4,r1
+       mov.l @r15+,r4
+       rts
+       rotcl r0
+
+       ENDFUNC(GLOBAL(udivsi3_i4i))
+
+       .global GLOBAL(sdivsi3_i4i)
+       FUNC(GLOBAL(sdivsi3_i4i))
+       /* This is link-compatible with a GLOBAL(sdivsi3) call,
+          but we effectively clobber only r1.  */
+GLOBAL(sdivsi3_i4i):
+       mov.l r4,@-r15
+       cmp/pz r5
+       mov.w LOCAL(c128_w), r1
+       bt/s LOCAL(pos_divisor)
+       cmp/pz r4
+       mov.l r5,@-r15
+       neg r5,r5
+       bt/s LOCAL(neg_result)
+       cmp/hi r1,r5
+       neg r4,r4
+LOCAL(pos_result):
+       extu.w r5,r0
+       bf LOCAL(div_le128)
+       cmp/eq r5,r0
+       mov r4,r0
+       shlr8 r0
+       bf/s LOCAL(div_ge64k)
+       cmp/hi r0,r5
+       div0u
+       shll16 r5
+       div1 r5,r0
+       div1 r5,r0
+       div1 r5,r0
+LOCAL(udiv_25):
+       mov.l LOCAL(zero_l),r1
+       div1 r5,r0
+       div1 r5,r0
+       mov.l r1,@-r15
+       .rept 3
+       div1 r5,r0
+       .endr
+       mov.b r0,@(L_MSWLSB,r15)
+       xtrct r4,r0
+       swap.w r0,r0
+       .rept 8
+       div1 r5,r0
+       .endr
+       mov.b r0,@(L_LSWMSB,r15)
+LOCAL(div_ge64k_end):
+       .rept 8
+       div1 r5,r0
+       .endr
+       mov.l @r15+,r4 ! zero-extension and swap using LS unit.
+       extu.b r0,r0
+       mov.l @r15+,r5
+       or r4,r0
+       mov.l @r15+,r4
+       rts
+       rotcl r0
+
+LOCAL(div_le128_neg):
+       tst #0xfe,r0
+       mova LOCAL(div_table_ix),r0
+       mov.b @(r0,r5),r1
+       mova LOCAL(div_table_inv),r0
+       bt/s LOCAL(div_by_1_neg)
+       mov.l @(r0,r1),r1
+       mova LOCAL(div_table_clz),r0
+       dmulu.l r1,r4
+       mov.b @(r0,r5),r1
+       mov.l @r15+,r5
+       sts mach,r0
+       /* clrt */
+       addc r4,r0
+       mov.l @r15+,r4
+       rotcr r0
+       shld r1,r0
+       rts
+       neg r0,r0
+
+LOCAL(pos_divisor):
+       mov.l r5,@-r15
+       bt/s LOCAL(pos_result)
+       cmp/hi r1,r5
+       neg r4,r4
+LOCAL(neg_result):
+       extu.w r5,r0
+       bf LOCAL(div_le128_neg)
+       cmp/eq r5,r0
+       mov r4,r0
+       shlr8 r0
+       bf/s LOCAL(div_ge64k_neg)
+       cmp/hi r0,r5
+       div0u
+       mov.l LOCAL(zero_l),r1
+       shll16 r5
+       div1 r5,r0
+       mov.l r1,@-r15
+       .rept 7
+       div1 r5,r0
+       .endr
+       mov.b r0,@(L_MSWLSB,r15)
+       xtrct r4,r0
+       swap.w r0,r0
+       .rept 8
+       div1 r5,r0
+       .endr
+       mov.b r0,@(L_LSWMSB,r15)
+LOCAL(div_ge64k_neg_end):
+       .rept 8
+       div1 r5,r0
+       .endr
+       mov.l @r15+,r4 ! zero-extension and swap using LS unit.
+       extu.b r0,r1
+       mov.l @r15+,r5
+       or r4,r1
+LOCAL(div_r8_neg_end):
+       mov.l @r15+,r4
+       rotcl r1
+       rts
+       neg r1,r0
+
+LOCAL(div_ge64k_neg):
+       bt/s LOCAL(div_r8_neg)
+       div0u
+       shll8 r5
+       mov.l LOCAL(zero_l),r1
+       .rept 6
+       div1 r5,r0
+       .endr
+       mov.l r1,@-r15
+       div1 r5,r0
+       mov.w LOCAL(m256_w),r1
+       div1 r5,r0
+       mov.b r0,@(L_LSWMSB,r15)
+       xor r4,r0
+       and r1,r0
+       bra LOCAL(div_ge64k_neg_end)
+       xor r4,r0
+
+LOCAL(c128_w):
+       .word 128
+
+LOCAL(div_r8_neg):
+       clrt
+       shll16 r4
+       mov r4,r1
+       shll8 r1
+       mov r5,r4
+       .rept 7
+       rotcl r1; div1 r5,r0
+       .endr
+       mov.l @r15+,r5
+       rotcl r1
+       bra LOCAL(div_r8_neg_end)
+       div1 r4,r0
+
+LOCAL(m256_w):
+       .word 0xff00
+/* This table has been generated by divtab-sh4.c.  */
+       .balign 4
+LOCAL(div_table_clz):
+       .byte   0
+       .byte   1
+       .byte   0
+       .byte   -1
+       .byte   -1
+       .byte   -2
+       .byte   -2
+       .byte   -2
+       .byte   -2
+       .byte   -3
+       .byte   -3
+       .byte   -3
+       .byte   -3
+       .byte   -3
+       .byte   -3
+       .byte   -3
+       .byte   -3
+       .byte   -4
+       .byte   -4
+       .byte   -4
+       .byte   -4
+       .byte   -4
+       .byte   -4
+       .byte   -4
+       .byte   -4
+       .byte   -4
+       .byte   -4
+       .byte   -4
+       .byte   -4
+       .byte   -4
+       .byte   -4
+       .byte   -4
+       .byte   -4
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -5
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+       .byte   -6
+/* Lookup table translating positive divisor to index into table of
+   normalized inverse.  N.B. the '0' entry is also the last entry of the
+ previous table, and causes an unaligned access for division by zero.  */
+LOCAL(div_table_ix):
+       .byte   -6
+       .byte   -128
+       .byte   -128
+       .byte   0
+       .byte   -128
+       .byte   -64
+       .byte   0
+       .byte   64
+       .byte   -128
+       .byte   -96
+       .byte   -64
+       .byte   -32
+       .byte   0
+       .byte   32
+       .byte   64
+       .byte   96
+       .byte   -128
+       .byte   -112
+       .byte   -96
+       .byte   -80
+       .byte   -64
+       .byte   -48
+       .byte   -32
+       .byte   -16
+       .byte   0
+       .byte   16
+       .byte   32
+       .byte   48
+       .byte   64
+       .byte   80
+       .byte   96
+       .byte   112
+       .byte   -128
+       .byte   -120
+       .byte   -112
+       .byte   -104
+       .byte   -96
+       .byte   -88
+       .byte   -80
+       .byte   -72
+       .byte   -64
+       .byte   -56
+       .byte   -48
+       .byte   -40
+       .byte   -32
+       .byte   -24
+       .byte   -16
+       .byte   -8
+       .byte   0
+       .byte   8
+       .byte   16
+       .byte   24
+       .byte   32
+       .byte   40
+       .byte   48
+       .byte   56
+       .byte   64
+       .byte   72
+       .byte   80
+       .byte   88
+       .byte   96
+       .byte   104
+       .byte   112
+       .byte   120
+       .byte   -128
+       .byte   -124
+       .byte   -120
+       .byte   -116
+       .byte   -112
+       .byte   -108
+       .byte   -104
+       .byte   -100
+       .byte   -96
+       .byte   -92
+       .byte   -88
+       .byte   -84
+       .byte   -80
+       .byte   -76
+       .byte   -72
+       .byte   -68
+       .byte   -64
+       .byte   -60
+       .byte   -56
+       .byte   -52
+       .byte   -48
+       .byte   -44
+       .byte   -40
+       .byte   -36
+       .byte   -32
+       .byte   -28
+       .byte   -24
+       .byte   -20
+       .byte   -16
+       .byte   -12
+       .byte   -8
+       .byte   -4
+       .byte   0
+       .byte   4
+       .byte   8
+       .byte   12
+       .byte   16
+       .byte   20
+       .byte   24
+       .byte   28
+       .byte   32
+       .byte   36
+       .byte   40
+       .byte   44
+       .byte   48
+       .byte   52
+       .byte   56
+       .byte   60
+       .byte   64
+       .byte   68
+       .byte   72
+       .byte   76
+       .byte   80
+       .byte   84
+       .byte   88
+       .byte   92
+       .byte   96
+       .byte   100
+       .byte   104
+       .byte   108
+       .byte   112
+       .byte   116
+       .byte   120
+       .byte   124
+       .byte   -128
+/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */
+       .balign 4
+LOCAL(zero_l):
+       .long   0x0
+       .long   0xF81F81F9
+       .long   0xF07C1F08
+       .long   0xE9131AC0
+       .long   0xE1E1E1E2
+       .long   0xDAE6076C
+       .long   0xD41D41D5
+       .long   0xCD856891
+       .long   0xC71C71C8
+       .long   0xC0E07039
+       .long   0xBACF914D
+       .long   0xB4E81B4F
+       .long   0xAF286BCB
+       .long   0xA98EF607
+       .long   0xA41A41A5
+       .long   0x9EC8E952
+       .long   0x9999999A
+       .long   0x948B0FCE
+       .long   0x8F9C18FA
+       .long   0x8ACB90F7
+       .long   0x86186187
+       .long   0x81818182
+       .long   0x7D05F418
+       .long   0x78A4C818
+       .long   0x745D1746
+       .long   0x702E05C1
+       .long   0x6C16C16D
+       .long   0x68168169
+       .long   0x642C8591
+       .long   0x60581606
+       .long   0x5C9882BA
+       .long   0x58ED2309
+LOCAL(div_table_inv):
+       .long   0x55555556
+       .long   0x51D07EAF
+       .long   0x4E5E0A73
+       .long   0x4AFD6A06
+       .long   0x47AE147B
+       .long   0x446F8657
+       .long   0x41414142
+       .long   0x3E22CBCF
+       .long   0x3B13B13C
+       .long   0x38138139
+       .long   0x3521CFB3
+       .long   0x323E34A3
+       .long   0x2F684BDB
+       .long   0x2C9FB4D9
+       .long   0x29E4129F
+       .long   0x27350B89
+       .long   0x24924925
+       .long   0x21FB7813
+       .long   0x1F7047DD
+       .long   0x1CF06ADB
+       .long   0x1A7B9612
+       .long   0x18118119
+       .long   0x15B1E5F8
+       .long   0x135C8114
+       .long   0x11111112
+       .long   0xECF56BF
+       .long   0xC9714FC
+       .long   0xA6810A7
+       .long   0x8421085
+       .long   0x624DD30
+       .long   0x4104105
+       .long   0x2040811
+       /* maximum error: 0.987342 scaled: 0.921875*/
+
+       ENDFUNC(GLOBAL(sdivsi3_i4i))
+#endif /* SH3 / SH4 */
+
+#endif /* L_div_table */
+
+#ifdef L_udiv_qrnnd_16
+#if !__SHMEDIA__
+       HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
+       /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
+       /* n1 < d, but n1 might be larger than d1.  */
+       .global GLOBAL(udiv_qrnnd_16)
+       .balign 8
+GLOBAL(udiv_qrnnd_16):
+       div0u
+       cmp/hi r6,r0
+       bt .Lots
+       .rept 16
+       div1 r6,r0 
+       .endr
+       extu.w r0,r1
+       bt 0f
+       add r6,r0
+0:     rotcl r1
+       mulu.w r1,r5
+       xtrct r4,r0
+       swap.w r0,r0
+       sts macl,r2
+       cmp/hs r2,r0
+       sub r2,r0
+       bt 0f
+       addc r5,r0
+       add #-1,r1
+       bt 0f
+1:     add #-1,r1
+       rts
+       add r5,r0
+       .balign 8
+.Lots:
+       sub r5,r0
+       swap.w r4,r1
+       xtrct r0,r1
+       clrt
+       mov r1,r0
+       addc r5,r0
+       mov #-1,r1
+       SL1(bf, 1b,
+       shlr16 r1)
+0:     rts
+       nop
+       ENDFUNC(GLOBAL(udiv_qrnnd_16))
+#endif /* !__SHMEDIA__ */
+#endif /* L_udiv_qrnnd_16 */
diff --git a/libgcc/config/sh/lib1funcs.h b/libgcc/config/sh/lib1funcs.h

new file mode 100644 (file)

index 0000000..af4b41c
--- /dev/null
+++ b/libgcc/config/sh/lib1funcs.h
@@ -0,0 +1,76 @@
+/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+   2004, 2005, 2006, 2009
+   Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef __ELF__
+#define LOCAL(X)       .L_##X
+#define FUNC(X)                .type X,@function
+#define HIDDEN_FUNC(X) FUNC(X); .hidden X
+#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X)
+#define ENDFUNC0(X)    .Lfe_##X: .size X,.Lfe_##X-X
+#define ENDFUNC(X)     ENDFUNC0(X)
+#else
+#define LOCAL(X)       L_##X
+#define FUNC(X)
+#define HIDDEN_FUNC(X)
+#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y)
+#define ENDFUNC(X)
+#endif
+
+#define        CONCAT(A,B)     A##B
+#define        GLOBAL0(U,X)    CONCAT(U,__##X)
+#define        GLOBAL(X)       GLOBAL0(__USER_LABEL_PREFIX__,X)
+
+#define ALIAS(X,Y)     .global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y)
+
+#if defined __SH2A__ && defined __FMOVD_ENABLED__
+#undef  FMOVD_WORKS
+#define FMOVD_WORKS
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define DR00 fr1
+#define DR01 fr0
+#define DR20 fr3
+#define DR21 fr2
+#define DR40 fr5
+#define DR41 fr4
+#else /* !__LITTLE_ENDIAN__ */
+#define DR00 fr0
+#define DR01 fr1
+#define DR20 fr2
+#define DR21 fr3
+#define DR40 fr4
+#define DR41 fr5
+#endif /* !__LITTLE_ENDIAN__ */
+
+#ifdef __sh1__
+#define SL(branch, dest, in_slot, in_slot_arg2) \
+       in_slot, in_slot_arg2; branch dest
+#define SL1(branch, dest, in_slot) \
+       in_slot; branch dest
+#else /* ! __sh1__ */
+#define SL(branch, dest, in_slot, in_slot_arg2) \
+       branch##.s dest; in_slot, in_slot_arg2
+#define SL1(branch, dest, in_slot) \
+       branch##/s dest; in_slot
+#endif /* !__sh1__ */
diff --git a/libgcc/config/sh/t-linux b/libgcc/config/sh/t-linux

index af618e260c671f592df961e5d170bcecaa9f2a7d..9b1feacd1f319dabf0360fbf42ff48b37113e581 100644 (file)
--- a/libgcc/config/sh/t-linux
+++ b/libgcc/config/sh/t-linux
@@ -1,3 +1,5 @@
+LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array
+
  HOST_LIBGCC2_CFLAGS = -fpic -mieee -DNO_FPSCR_VALUES
  
  # Override t-slibgcc-elf-ver to export some libgcc symbols with
diff --git a/libgcc/config/sh/t-netbsd b/libgcc/config/sh/t-netbsd

new file mode 100644 (file)

index 0000000..663edbf
--- /dev/null
+++ b/libgcc/config/sh/t-netbsd
@@ -0,0 +1 @@
+LIB1ASMFUNCS_CACHE = _ic_invalidate
diff --git a/libgcc/config/sh/t-sh b/libgcc/config/sh/t-sh

index ab4d98089b12dafc1c1af2a10b5eb233b1839177..2319adbef1d0b1b8544e43cf3d54f7dc71d442ef 100644 (file)
--- a/libgcc/config/sh/t-sh
+++ b/libgcc/config/sh/t-sh
@@ -17,26 +17,33 @@
  # along with GCC; see the file COPYING3.  If not see
  # <http://www.gnu.org/licenses/>.
  
+LIB1ASMSRC = sh/lib1funcs.S
+LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movmem \
+  _movmem_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
+  _div_table _udiv_qrnnd_16 \
+  $(LIB1ASMFUNCS_CACHE)
+LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array
+
  crt1.o: $(srcdir)/config/sh/crt1.S
         $(gcc_compile) -c $<
  
-ic_invalidate_array_4-100.o: $(gcc_srcdir)/config/sh/lib1funcs.asm
+ic_invalidate_array_4-100.o: $(srcdir)/config/sh/lib1funcs.S
         $(gcc_compile) -c -DL_ic_invalidate_array -DWAYS=1 -DWAY_SIZE=0x2000 $<
  libic_invalidate_array_4-100.a: ic_invalidate_array_4-100.o
         $(AR_CREATE_FOR_TARGET) $@ $<
  
-ic_invalidate_array_4-200.o: $(gcc_srcdir)/config/sh/lib1funcs.asm
+ic_invalidate_array_4-200.o: $(srcdir)/config/sh/lib1funcs.S
         $(gcc_compile) -c -DL_ic_invalidate_array -DWAYS=2 -DWAY_SIZE=0x2000 $<
  libic_invalidate_array_4-200.a: ic_invalidate_array_4-200.o
         $(AR_CREATE_FOR_TARGET) $@ $<
  
-ic_invalidate_array_4a.o: $(gcc_srcdir)/config/sh/lib1funcs.asm
+ic_invalidate_array_4a.o: $(srcdir)/config/sh/lib1funcs.S
         $(gcc_compile) -c -DL_ic_invalidate_array -D__FORCE_SH4A__ $<
  libic_invalidate_array_4a.a: ic_invalidate_array_4a.o
         $(AR_CREATE_FOR_TARGET) $@ $<
  
  sdivsi3_i4i-Os-4-200.o: $(srcdir)/config/sh/lib1funcs-Os-4-200.S
-       $(gcc_compile) -c -DL_sdivsi3_i4i $<
+       $(compile) -c -DL_sdivsi3_i4i $<
  udivsi3_i4i-Os-4-200.o: $(srcdir)/config/sh/lib1funcs-Os-4-200.S
         $(gcc_compile) -c -DL_udivsi3_i4i $<
  unwind-dw2-Os-4-200.o: $(gcc_srcdir)/unwind-dw2.c
diff --git a/libgcc/config/sh/t-sh64 b/libgcc/config/sh/t-sh64

new file mode 100644 (file)

index 0000000..fa9950e
--- /dev/null
+++ b/libgcc/config/sh/t-sh64
@@ -0,0 +1,6 @@
+LIB1ASMFUNCS = \
+  _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
+  _shcompact_call_trampoline _shcompact_return_trampoline \
+  _shcompact_incoming_args _ic_invalidate _nested_trampoline \
+  _push_pop_shmedia_regs \
+  _udivdi3 _divdi3 _umoddi3 _moddi3 _div_table
diff --git a/libgcc/config/sparc/lb1spc.S b/libgcc/config/sparc/lb1spc.S

new file mode 100644 (file)

index 0000000..b60bd57
--- /dev/null
+++ b/libgcc/config/sparc/lb1spc.S
@@ -0,0 +1,784 @@
+/* This is an assembly language implementation of mulsi3, divsi3, and modsi3
+   for the sparc processor.
+
+   These routines are derived from the SPARC Architecture Manual, version 8,
+   slightly edited to match the desired calling convention, and also to
+   optimize them for our purposes.  */
+
+#ifdef L_mulsi3
+.text
+       .align 4
+       .global .umul
+       .proc 4
+.umul:
+       or      %o0, %o1, %o4   ! logical or of multiplier and multiplicand
+       mov     %o0, %y         ! multiplier to Y register
+       andncc  %o4, 0xfff, %o5 ! mask out lower 12 bits
+       be      mul_shortway    ! can do it the short way
+       andcc   %g0, %g0, %o4   ! zero the partial product and clear NV cc
+       !
+       ! long multiply
+       !
+       mulscc  %o4, %o1, %o4   ! first iteration of 33
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4   ! 32nd iteration
+       mulscc  %o4, %g0, %o4   ! last iteration only shifts
+       ! the upper 32 bits of product are wrong, but we do not care
+       retl
+       rd      %y, %o0
+       !
+       ! short multiply
+       !
+mul_shortway:
+       mulscc  %o4, %o1, %o4   ! first iteration of 13
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4
+       mulscc  %o4, %o1, %o4   ! 12th iteration
+       mulscc  %o4, %g0, %o4   ! last iteration only shifts
+       rd      %y, %o5
+       sll     %o4, 12, %o4    ! left shift partial product by 12 bits
+       srl     %o5, 20, %o5    ! right shift partial product by 20 bits
+       retl
+       or      %o5, %o4, %o0   ! merge for true product
+#endif
+
+#ifdef L_divsi3
+/*
+ * Division and remainder, from Appendix E of the SPARC Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  .div       name of function to generate
+ *  div                div=div => %o0 / %o1; div=rem => %o0 % %o1
+ *  true               true=true => signed; true=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N          how many bits per iteration we try to get (4)
+ *  WORDSIZE   total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS    number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q          the partial quotient under development (initially 0)
+ *  R          the remainder so far, initially the dividend
+ *  ITER       number of main division loop iterations required;
+ *             equal to ceil(log2(quotient) / N).  Note that this
+ *             is the log base (2^N) of the quotient.
+ *  V          the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *     ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+        .global .udiv
+        .align 4
+        .proc 4
+        .text
+.udiv:
+         b ready_to_divide
+         mov 0, %g3             ! result is always positive
+
+        .global .div
+        .align 4
+        .proc 4
+        .text
+.div:
+       ! compute sign of result; if neither is negative, no problem
+       orcc    %o1, %o0, %g0   ! either negative?
+       bge     ready_to_divide ! no, go do the divide
+       xor     %o1, %o0, %g3   ! compute sign in any case
+       tst     %o1
+       bge     1f
+       tst     %o0
+       ! %o1 is definitely negative; %o0 might also be negative
+       bge     ready_to_divide ! if %o0 not negative...
+       sub     %g0, %o1, %o1   ! in any case, make %o1 nonneg
+1:     ! %o0 is negative, %o1 is nonnegative
+       sub     %g0, %o0, %o0   ! make %o0 nonnegative
+
+
+ready_to_divide:
+
+       ! Ready to divide.  Compute size of quotient; scale comparand.
+       orcc    %o1, %g0, %o5
+       bne     1f
+       mov     %o0, %o3
+
+       ! Divide by zero trap.  If it returns, return 0 (about as
+       ! wrong as possible, but that is what SunOS does...).
+       ta      0x2             ! ST_DIV0
+       retl
+       clr     %o0
+
+1:
+       cmp     %o3, %o5                ! if %o1 exceeds %o0, done
+       blu     got_result              ! (and algorithm fails otherwise)
+       clr     %o2
+       sethi   %hi(1 << (32 - 4 - 1)), %g1
+       cmp     %o3, %g1
+       blu     not_really_big
+       clr     %o4
+
+       ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+       ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+       ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+       ! Compute ITER in an unorthodox manner: know we need to shift V into
+       ! the top decade: so do not even bother to compare to R.
+       1:
+               cmp     %o5, %g1
+               bgeu    3f
+               mov     1, %g2
+               sll     %o5, 4, %o5
+               b       1b
+               add     %o4, 1, %o4
+
+       ! Now compute %g2.
+       2:      addcc   %o5, %o5, %o5
+               bcc     not_too_big
+               add     %g2, 1, %g2
+
+               ! We get here if the %o1 overflowed while shifting.
+               ! This means that %o3 has the high-order bit set.
+               ! Restore %o5 and subtract from %o3.
+               sll     %g1, 4, %g1     ! high order bit
+               srl     %o5, 1, %o5     ! rest of %o5
+               add     %o5, %g1, %o5
+               b       do_single_div
+               sub     %g2, 1, %g2
+
+       not_too_big:
+       3:      cmp     %o5, %o3
+               blu     2b
+               nop
+               be      do_single_div
+               nop
+       /* NB: these are commented out in the V8-SPARC manual as well */
+       /* (I do not understand this) */
+       ! %o5 > %o3: went too far: back up 1 step
+       !       srl     %o5, 1, %o5
+       !       dec     %g2
+       ! do single-bit divide steps
+       !
+       ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+       ! first divide step without thinking.  BUT, the others are conditional,
+       ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+       ! order bit set in the first step, just falling into the regular
+       ! division loop will mess up the first time around.
+       ! So we unroll slightly...
+       do_single_div:
+               subcc   %g2, 1, %g2
+               bl      end_regular_divide
+               nop
+               sub     %o3, %o5, %o3
+               mov     1, %o2
+               b       end_single_divloop
+               nop
+       single_divloop:
+               sll     %o2, 1, %o2
+               bl      1f
+               srl     %o5, 1, %o5
+               ! %o3 >= 0
+               sub     %o3, %o5, %o3
+               b       2f
+               add     %o2, 1, %o2
+       1:      ! %o3 < 0
+               add     %o3, %o5, %o3
+               sub     %o2, 1, %o2
+       2:
+       end_single_divloop:
+               subcc   %g2, 1, %g2
+               bge     single_divloop
+               tst     %o3
+               b,a     end_regular_divide
+
+not_really_big:
+1:
+       sll     %o5, 4, %o5
+       cmp     %o5, %o3
+       bleu    1b
+       addcc   %o4, 1, %o4
+       be      got_result
+       sub     %o4, 1, %o4
+
+       tst     %o3     ! set up for initial iteration
+divloop:
+       sll     %o2, 4, %o2
+       ! depth 1, accumulated bits 0
+       bl      L1.16
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       ! depth 2, accumulated bits 1
+       bl      L2.17
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       ! depth 3, accumulated bits 3
+       bl      L3.19
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       ! depth 4, accumulated bits 7
+       bl      L4.23
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (7*2+1), %o2
+       
+L4.23:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (7*2-1), %o2
+       
+       
+L3.19:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       ! depth 4, accumulated bits 5
+       bl      L4.21
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (5*2+1), %o2
+       
+L4.21:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (5*2-1), %o2
+       
+L2.17:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       ! depth 3, accumulated bits 1
+       bl      L3.17
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       ! depth 4, accumulated bits 3
+       bl      L4.19
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (3*2+1), %o2
+       
+L4.19:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (3*2-1), %o2
+
+L3.17:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       ! depth 4, accumulated bits 1
+       bl      L4.17
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (1*2+1), %o2
+
+L4.17:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (1*2-1), %o2
+       
+L1.16:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       ! depth 2, accumulated bits -1
+       bl      L2.15
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       ! depth 3, accumulated bits -1
+       bl      L3.15
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       ! depth 4, accumulated bits -1
+       bl      L4.15
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (-1*2+1), %o2
+       
+L4.15:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (-1*2-1), %o2
+       
+L3.15:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       ! depth 4, accumulated bits -3
+       bl      L4.13
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (-3*2+1), %o2
+       
+L4.13:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (-3*2-1), %o2
+       
+L2.15:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       ! depth 3, accumulated bits -3
+       bl      L3.13
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       ! depth 4, accumulated bits -5
+       bl      L4.11
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (-5*2+1), %o2
+       
+L4.11:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (-5*2-1), %o2
+       
+L3.13:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       ! depth 4, accumulated bits -7
+       bl      L4.9
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (-7*2+1), %o2
+
+L4.9:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (-7*2-1), %o2
+       
+       9:
+end_regular_divide:
+       subcc   %o4, 1, %o4
+       bge     divloop
+       tst     %o3
+       bl,a    got_result
+       ! non-restoring fixup here (one instruction only!)
+       sub     %o2, 1, %o2
+
+
+got_result:
+       ! check to see if answer should be < 0
+       tst     %g3
+       bl,a    1f
+       sub %g0, %o2, %o2
+1:
+       retl
+       mov %o2, %o0
+#endif
+
+#ifdef L_modsi3
+/* This implementation was taken from glibc:
+ *
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * Algorithm parameters:
+ *  N          how many bits per iteration we try to get (4)
+ *  WORDSIZE   total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS    number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q          the partial quotient under development (initially 0)
+ *  R          the remainder so far, initially the dividend
+ *  ITER       number of main division loop iterations required;
+ *             equal to ceil(log2(quotient) / N).  Note that this
+ *             is the log base (2^N) of the quotient.
+ *  V          the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *     ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+.text
+       .align 4
+       .global .urem
+       .proc 4
+.urem:
+       b       divide
+       mov     0, %g3          ! result always positive
+
+        .align 4
+       .global .rem
+       .proc 4
+.rem:
+       ! compute sign of result; if neither is negative, no problem
+       orcc    %o1, %o0, %g0   ! either negative?
+       bge     2f                      ! no, go do the divide
+       mov     %o0, %g3                ! sign of remainder matches %o0
+       tst     %o1
+       bge     1f
+       tst     %o0
+       ! %o1 is definitely negative; %o0 might also be negative
+       bge     2f                      ! if %o0 not negative...
+       sub     %g0, %o1, %o1   ! in any case, make %o1 nonneg
+1:     ! %o0 is negative, %o1 is nonnegative
+       sub     %g0, %o0, %o0   ! make %o0 nonnegative
+2:
+
+       ! Ready to divide.  Compute size of quotient; scale comparand.
+divide:
+       orcc    %o1, %g0, %o5
+       bne     1f
+       mov     %o0, %o3
+
+               ! Divide by zero trap.  If it returns, return 0 (about as
+               ! wrong as possible, but that is what SunOS does...).
+               ta      0x2   !ST_DIV0
+               retl
+               clr     %o0
+
+1:
+       cmp     %o3, %o5                ! if %o1 exceeds %o0, done
+       blu     got_result              ! (and algorithm fails otherwise)
+       clr     %o2
+       sethi   %hi(1 << (32 - 4 - 1)), %g1
+       cmp     %o3, %g1
+       blu     not_really_big
+       clr     %o4
+
+       ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+       ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+       ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+       ! Compute ITER in an unorthodox manner: know we need to shift V into
+       ! the top decade: so do not even bother to compare to R.
+       1:
+               cmp     %o5, %g1
+               bgeu    3f
+               mov     1, %g2
+               sll     %o5, 4, %o5
+               b       1b
+               add     %o4, 1, %o4
+
+       ! Now compute %g2.
+       2:      addcc   %o5, %o5, %o5
+               bcc     not_too_big
+               add     %g2, 1, %g2
+
+               ! We get here if the %o1 overflowed while shifting.
+               ! This means that %o3 has the high-order bit set.
+               ! Restore %o5 and subtract from %o3.
+               sll     %g1, 4, %g1     ! high order bit
+               srl     %o5, 1, %o5             ! rest of %o5
+               add     %o5, %g1, %o5
+               b       do_single_div
+               sub     %g2, 1, %g2
+
+       not_too_big:
+       3:      cmp     %o5, %o3
+               blu     2b
+               nop
+               be      do_single_div
+               nop
+       /* NB: these are commented out in the V8-SPARC manual as well */
+       /* (I do not understand this) */
+       ! %o5 > %o3: went too far: back up 1 step
+       !       srl     %o5, 1, %o5
+       !       dec     %g2
+       ! do single-bit divide steps
+       !
+       ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+       ! first divide step without thinking.  BUT, the others are conditional,
+       ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+       ! order bit set in the first step, just falling into the regular
+       ! division loop will mess up the first time around.
+       ! So we unroll slightly...
+       do_single_div:
+               subcc   %g2, 1, %g2
+               bl      end_regular_divide
+               nop
+               sub     %o3, %o5, %o3
+               mov     1, %o2
+               b       end_single_divloop
+               nop
+       single_divloop:
+               sll     %o2, 1, %o2
+               bl      1f
+               srl     %o5, 1, %o5
+               ! %o3 >= 0
+               sub     %o3, %o5, %o3
+               b       2f
+               add     %o2, 1, %o2
+       1:      ! %o3 < 0
+               add     %o3, %o5, %o3
+               sub     %o2, 1, %o2
+       2:
+       end_single_divloop:
+               subcc   %g2, 1, %g2
+               bge     single_divloop
+               tst     %o3
+               b,a     end_regular_divide
+
+not_really_big:
+1:
+       sll     %o5, 4, %o5
+       cmp     %o5, %o3
+       bleu    1b
+       addcc   %o4, 1, %o4
+       be      got_result
+       sub     %o4, 1, %o4
+
+       tst     %o3     ! set up for initial iteration
+divloop:
+       sll     %o2, 4, %o2
+               ! depth 1, accumulated bits 0
+       bl      L1.16
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       ! depth 2, accumulated bits 1
+       bl      L2.17
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       ! depth 3, accumulated bits 3
+       bl      L3.19
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       ! depth 4, accumulated bits 7
+       bl      L4.23
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (7*2+1), %o2
+L4.23:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (7*2-1), %o2
+       
+L3.19:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       ! depth 4, accumulated bits 5
+       bl      L4.21
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (5*2+1), %o2
+       
+L4.21:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (5*2-1), %o2
+       
+L2.17:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       ! depth 3, accumulated bits 1
+       bl      L3.17
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       ! depth 4, accumulated bits 3
+       bl      L4.19
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (3*2+1), %o2
+       
+L4.19:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (3*2-1), %o2
+       
+L3.17:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       ! depth 4, accumulated bits 1
+       bl      L4.17
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (1*2+1), %o2
+       
+L4.17:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (1*2-1), %o2
+       
+L1.16:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       ! depth 2, accumulated bits -1
+       bl      L2.15
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       ! depth 3, accumulated bits -1
+       bl      L3.15
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       ! depth 4, accumulated bits -1
+       bl      L4.15
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (-1*2+1), %o2
+       
+L4.15:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (-1*2-1), %o2
+       
+L3.15:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       ! depth 4, accumulated bits -3
+       bl      L4.13
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (-3*2+1), %o2
+       
+L4.13:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (-3*2-1), %o2
+       
+L2.15:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       ! depth 3, accumulated bits -3
+       bl      L3.13
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       ! depth 4, accumulated bits -5
+       bl      L4.11
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (-5*2+1), %o2
+       
+L4.11:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (-5*2-1), %o2
+       
+L3.13:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       ! depth 4, accumulated bits -7
+       bl      L4.9
+       srl     %o5,1,%o5
+       ! remainder is positive
+       subcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (-7*2+1), %o2
+       
+L4.9:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+       b       9f
+       add     %o2, (-7*2-1), %o2
+       
+       9:
+end_regular_divide:
+       subcc   %o4, 1, %o4
+       bge     divloop
+       tst     %o3
+       bl,a    got_result
+       ! non-restoring fixup here (one instruction only!)
+       add     %o3, %o1, %o3
+
+got_result:
+       ! check to see if answer should be < 0
+       tst     %g3
+       bl,a    1f
+       sub %g0, %o3, %o3
+1:
+       retl
+       mov %o3, %o0
+
+#endif
+
diff --git a/libgcc/config/sparc/t-softmul b/libgcc/config/sparc/t-softmul

index 49faae47c534e2b27de5f1383e29777305cdb206..7142200600f461e1507890a17bc04f4134893b68 100644 (file)
--- a/libgcc/config/sparc/t-softmul
+++ b/libgcc/config/sparc/t-softmul
@@ -1,2 +1,2 @@
-LIB1ASMSRC = sparc/lb1spc.asm
+LIB1ASMSRC = sparc/lb1spc.S
  LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3
diff --git a/libgcc/config/v850/lib1funcs.S b/libgcc/config/v850/lib1funcs.S

new file mode 100644 (file)

index 0000000..04e9b1e
--- /dev/null
+++ b/libgcc/config/v850/lib1funcs.S
@@ -0,0 +1,2330 @@
+/* libgcc routines for NEC V850.
+   Copyright (C) 1996, 1997, 2002, 2005, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef L_mulsi3
+       .text
+       .globl ___mulsi3
+       .type  ___mulsi3,@function
+___mulsi3:
+#ifdef __v850__        
+/*
+   #define SHIFT 12
+   #define MASK ((1 << SHIFT) - 1)
+    
+   #define STEP(i, j)                               \
+   ({                                               \
+       short a_part = (a >> (i)) & MASK;            \
+       short b_part = (b >> (j)) & MASK;            \
+       int res = (((int) a_part) * ((int) b_part)); \
+       res;                                         \
+   })
+  
+   int
+   __mulsi3 (unsigned a, unsigned b)
+   {
+      return STEP (0, 0) +
+          ((STEP (SHIFT, 0) + STEP (0, SHIFT)) << SHIFT) +
+          ((STEP (0, 2 * SHIFT) + STEP (SHIFT, SHIFT) + STEP (2 * SHIFT, 0))
+           << (2 * SHIFT));
+   }
+*/
+        mov   r6, r14
+        movea lo(32767), r0, r10
+        and   r10, r14
+        mov   r7,  r15
+        and   r10, r15
+        shr   15,  r6
+        mov   r6,  r13
+        and   r10, r13
+        shr   15,  r7
+        mov   r7,  r12
+        and   r10, r12
+        shr   15,  r6
+        shr   15,  r7
+        mov   r14, r10
+        mulh  r15, r10
+        mov   r14, r11
+        mulh  r12, r11
+        mov   r13, r16
+        mulh  r15, r16
+        mulh  r14, r7
+        mulh  r15, r6
+        add   r16, r11
+        mulh  r13, r12
+        shl   15,  r11
+        add   r11, r10
+        add   r12, r7
+        add   r6,  r7
+        shl   30,  r7
+        add   r7,  r10
+        jmp   [r31]
+#endif /* __v850__ */
+#if defined(__v850e__) || defined(__v850ea__) || defined(__v850e2__) || defined(__v850e2v3__)
+        /* This routine is almost unneccesarry because gcc
+           generates the MUL instruction for the RTX mulsi3.
+           But if someone wants to link his application with
+           previsously compiled v850 objects then they will 
+          need this function.  */
+ 
+        /* It isn't good to put the inst sequence as below;
+              mul r7, r6,
+              mov r6, r10, r0
+           In this case, there is a RAW hazard between them.
+           MUL inst takes 2 cycle in EX stage, then MOV inst
+           must wait 1cycle.  */
+        mov   r7, r10
+        mul   r6, r10, r0
+        jmp   [r31]
+#endif /* __v850e__ */
+       .size ___mulsi3,.-___mulsi3
+#endif /* L_mulsi3 */
+
+
+#ifdef L_udivsi3
+       .text
+       .global ___udivsi3
+       .type   ___udivsi3,@function
+___udivsi3:
+#ifdef __v850__
+       mov 1,r12
+       mov 0,r10
+       cmp r6,r7
+       bnl .L12
+       movhi hi(-2147483648),r0,r13
+       cmp r0,r7
+       blt .L12
+.L4:
+       shl 1,r7
+       shl 1,r12
+       cmp r6,r7
+       bnl .L12
+       cmp r0,r12
+       be .L8
+       mov r7,r19
+       and r13,r19
+       be .L4
+       br .L12
+.L9:
+       cmp r7,r6
+       bl .L10
+       sub r7,r6
+       or r12,r10
+.L10:
+       shr 1,r12
+       shr 1,r7
+.L12:
+       cmp r0,r12
+       bne .L9
+.L8:
+       jmp [r31]
+
+#else /* defined(__v850e__) */
+
+       /* See comments at end of __mulsi3.  */
+       mov   r6, r10   
+       divu  r7, r10, r0
+       jmp   [r31]             
+
+#endif /* __v850e__ */
+
+       .size ___udivsi3,.-___udivsi3
+#endif
+
+#ifdef L_divsi3
+       .text
+       .globl ___divsi3
+       .type  ___divsi3,@function
+___divsi3:
+#ifdef __v850__
+       add -8,sp
+       st.w r31,4[sp]
+       st.w r22,0[sp]
+       mov 1,r22
+       tst r7,r7
+       bp .L3
+       subr r0,r7
+       subr r0,r22
+.L3:
+       tst r6,r6
+       bp .L4
+       subr r0,r6
+       subr r0,r22
+.L4:
+       jarl ___udivsi3,r31
+       cmp r0,r22
+       bp .L7
+       subr r0,r10
+.L7:
+       ld.w 0[sp],r22
+       ld.w 4[sp],r31
+       add 8,sp
+       jmp [r31]
+
+#else /* defined(__v850e__) */
+
+       /* See comments at end of __mulsi3.  */
+       mov   r6, r10
+       div   r7, r10, r0
+       jmp   [r31]
+
+#endif /* __v850e__ */
+
+       .size ___divsi3,.-___divsi3
+#endif
+
+#ifdef  L_umodsi3
+       .text
+       .globl ___umodsi3
+       .type  ___umodsi3,@function
+___umodsi3:
+#ifdef __v850__
+       add -12,sp
+       st.w r31,8[sp]
+       st.w r7,4[sp]
+       st.w r6,0[sp]
+       jarl ___udivsi3,r31
+       ld.w 4[sp],r7
+       mov r10,r6
+       jarl ___mulsi3,r31
+       ld.w 0[sp],r6
+       subr r6,r10
+       ld.w 8[sp],r31
+       add 12,sp
+       jmp [r31]
+
+#else /* defined(__v850e__) */
+
+       /* See comments at end of __mulsi3.  */
+       divu  r7, r6, r10
+       jmp   [r31]
+
+#endif /* __v850e__ */
+
+       .size ___umodsi3,.-___umodsi3
+#endif /* L_umodsi3 */
+
+#ifdef  L_modsi3
+       .text
+       .globl ___modsi3
+       .type  ___modsi3,@function
+___modsi3:
+#ifdef __v850__        
+       add -12,sp
+       st.w r31,8[sp]
+       st.w r7,4[sp]
+       st.w r6,0[sp]
+       jarl ___divsi3,r31
+       ld.w 4[sp],r7
+       mov r10,r6
+       jarl ___mulsi3,r31
+       ld.w 0[sp],r6
+       subr r6,r10
+       ld.w 8[sp],r31
+       add 12,sp
+       jmp [r31]
+
+#else /* defined(__v850e__) */
+
+       /* See comments at end of __mulsi3.  */
+       div  r7, r6, r10
+       jmp [r31]
+
+#endif /* __v850e__ */
+
+       .size ___modsi3,.-___modsi3
+#endif /* L_modsi3 */
+
+#ifdef L_save_2
+       .text
+       .align  2
+       .globl  __save_r2_r29
+       .type   __save_r2_r29,@function
+       /* Allocate space and save registers 2, 20 .. 29 on the stack.  */
+       /* Called via:  jalr __save_r2_r29,r10.  */
+__save_r2_r29:
+#ifdef __EP__
+       mov     ep,r1
+       addi    -44,sp,sp
+       mov     sp,ep
+       sst.w   r29,0[ep]
+       sst.w   r28,4[ep]
+       sst.w   r27,8[ep]
+       sst.w   r26,12[ep]
+       sst.w   r25,16[ep]
+       sst.w   r24,20[ep]
+       sst.w   r23,24[ep]
+       sst.w   r22,28[ep]
+       sst.w   r21,32[ep]
+       sst.w   r20,36[ep]
+       sst.w   r2,40[ep]
+       mov     r1,ep
+#else
+       addi    -44,sp,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       st.w    r27,8[sp]
+       st.w    r26,12[sp]
+       st.w    r25,16[sp]
+       st.w    r24,20[sp]
+       st.w    r23,24[sp]
+       st.w    r22,28[sp]
+       st.w    r21,32[sp]
+       st.w    r20,36[sp]
+       st.w    r2,40[sp]
+#endif
+       jmp     [r10]
+       .size   __save_r2_r29,.-__save_r2_r29
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r2_r29.  */
+       .align  2
+       .globl  __return_r2_r29
+       .type   __return_r2_r29,@function
+__return_r2_r29:
+#ifdef __EP__
+       mov     ep,r1
+       mov     sp,ep
+       sld.w   0[ep],r29
+       sld.w   4[ep],r28
+       sld.w   8[ep],r27
+       sld.w   12[ep],r26
+       sld.w   16[ep],r25
+       sld.w   20[ep],r24
+       sld.w   24[ep],r23
+       sld.w   28[ep],r22
+       sld.w   32[ep],r21
+       sld.w   36[ep],r20
+       sld.w   40[ep],r2
+       addi    44,sp,sp
+       mov     r1,ep
+#else
+       ld.w    0[sp],r29
+       ld.w    4[sp],r28
+       ld.w    8[sp],r27
+       ld.w    12[sp],r26
+       ld.w    16[sp],r25
+       ld.w    20[sp],r24
+       ld.w    24[sp],r23
+       ld.w    28[sp],r22
+       ld.w    32[sp],r21
+       ld.w    36[sp],r20
+       ld.w    40[sp],r2
+       addi    44,sp,sp
+#endif
+       jmp     [r31]
+       .size   __return_r2_r29,.-__return_r2_r29
+#endif /* L_save_2 */
+
+#ifdef L_save_20
+       .text
+       .align  2
+       .globl  __save_r20_r29
+       .type   __save_r20_r29,@function
+       /* Allocate space and save registers 20 .. 29 on the stack.  */
+       /* Called via:  jalr __save_r20_r29,r10.  */
+__save_r20_r29:
+#ifdef __EP__
+       mov     ep,r1
+       addi    -40,sp,sp
+       mov     sp,ep
+       sst.w   r29,0[ep]
+       sst.w   r28,4[ep]
+       sst.w   r27,8[ep]
+       sst.w   r26,12[ep]
+       sst.w   r25,16[ep]
+       sst.w   r24,20[ep]
+       sst.w   r23,24[ep]
+       sst.w   r22,28[ep]
+       sst.w   r21,32[ep]
+       sst.w   r20,36[ep]
+       mov     r1,ep
+#else
+       addi    -40,sp,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       st.w    r27,8[sp]
+       st.w    r26,12[sp]
+       st.w    r25,16[sp]
+       st.w    r24,20[sp]
+       st.w    r23,24[sp]
+       st.w    r22,28[sp]
+       st.w    r21,32[sp]
+       st.w    r20,36[sp]
+#endif
+       jmp     [r10]
+       .size   __save_r20_r29,.-__save_r20_r29
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r20_r29.  */
+       .align  2
+       .globl  __return_r20_r29
+       .type   __return_r20_r29,@function
+__return_r20_r29:
+#ifdef __EP__
+       mov     ep,r1
+       mov     sp,ep
+       sld.w   0[ep],r29
+       sld.w   4[ep],r28
+       sld.w   8[ep],r27
+       sld.w   12[ep],r26
+       sld.w   16[ep],r25
+       sld.w   20[ep],r24
+       sld.w   24[ep],r23
+       sld.w   28[ep],r22
+       sld.w   32[ep],r21
+       sld.w   36[ep],r20
+       addi    40,sp,sp
+       mov     r1,ep
+#else
+       ld.w    0[sp],r29
+       ld.w    4[sp],r28
+       ld.w    8[sp],r27
+       ld.w    12[sp],r26
+       ld.w    16[sp],r25
+       ld.w    20[sp],r24
+       ld.w    24[sp],r23
+       ld.w    28[sp],r22
+       ld.w    32[sp],r21
+       ld.w    36[sp],r20
+       addi    40,sp,sp
+#endif
+       jmp     [r31]
+       .size   __return_r20_r29,.-__return_r20_r29
+#endif /* L_save_20 */
+
+#ifdef L_save_21
+       .text
+       .align  2
+       .globl  __save_r21_r29
+       .type   __save_r21_r29,@function
+       /* Allocate space and save registers 21 .. 29 on the stack.  */
+       /* Called via:  jalr __save_r21_r29,r10.  */
+__save_r21_r29:
+#ifdef __EP__
+       mov     ep,r1
+       addi    -36,sp,sp
+       mov     sp,ep
+       sst.w   r29,0[ep]
+       sst.w   r28,4[ep]
+       sst.w   r27,8[ep]
+       sst.w   r26,12[ep]
+       sst.w   r25,16[ep]
+       sst.w   r24,20[ep]
+       sst.w   r23,24[ep]
+       sst.w   r22,28[ep]
+       sst.w   r21,32[ep]
+       mov     r1,ep
+#else
+       addi    -36,sp,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       st.w    r27,8[sp]
+       st.w    r26,12[sp]
+       st.w    r25,16[sp]
+       st.w    r24,20[sp]
+       st.w    r23,24[sp]
+       st.w    r22,28[sp]
+       st.w    r21,32[sp]
+#endif
+       jmp     [r10]
+       .size   __save_r21_r29,.-__save_r21_r29
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r21_r29.  */
+       .align  2
+       .globl  __return_r21_r29
+       .type   __return_r21_r29,@function
+__return_r21_r29:
+#ifdef __EP__
+       mov     ep,r1
+       mov     sp,ep
+       sld.w   0[ep],r29
+       sld.w   4[ep],r28
+       sld.w   8[ep],r27
+       sld.w   12[ep],r26
+       sld.w   16[ep],r25
+       sld.w   20[ep],r24
+       sld.w   24[ep],r23
+       sld.w   28[ep],r22
+       sld.w   32[ep],r21
+       addi    36,sp,sp
+       mov     r1,ep
+#else
+       ld.w    0[sp],r29
+       ld.w    4[sp],r28
+       ld.w    8[sp],r27
+       ld.w    12[sp],r26
+       ld.w    16[sp],r25
+       ld.w    20[sp],r24
+       ld.w    24[sp],r23
+       ld.w    28[sp],r22
+       ld.w    32[sp],r21
+       addi    36,sp,sp
+#endif
+       jmp     [r31]
+       .size   __return_r21_r29,.-__return_r21_r29
+#endif /* L_save_21 */
+
+#ifdef L_save_22
+       .text
+       .align  2
+       .globl  __save_r22_r29
+       .type   __save_r22_r29,@function
+       /* Allocate space and save registers 22 .. 29 on the stack.  */
+       /* Called via:  jalr __save_r22_r29,r10.  */
+__save_r22_r29:
+#ifdef __EP__
+       mov     ep,r1
+       addi    -32,sp,sp
+       mov     sp,ep
+       sst.w   r29,0[ep]
+       sst.w   r28,4[ep]
+       sst.w   r27,8[ep]
+       sst.w   r26,12[ep]
+       sst.w   r25,16[ep]
+       sst.w   r24,20[ep]
+       sst.w   r23,24[ep]
+       sst.w   r22,28[ep]
+       mov     r1,ep
+#else
+       addi    -32,sp,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       st.w    r27,8[sp]
+       st.w    r26,12[sp]
+       st.w    r25,16[sp]
+       st.w    r24,20[sp]
+       st.w    r23,24[sp]
+       st.w    r22,28[sp]
+#endif
+       jmp     [r10]
+       .size   __save_r22_r29,.-__save_r22_r29
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r22_r29.  */
+       .align  2
+       .globl  __return_r22_r29
+       .type   __return_r22_r29,@function
+__return_r22_r29:
+#ifdef __EP__
+       mov     ep,r1
+       mov     sp,ep
+       sld.w   0[ep],r29
+       sld.w   4[ep],r28
+       sld.w   8[ep],r27
+       sld.w   12[ep],r26
+       sld.w   16[ep],r25
+       sld.w   20[ep],r24
+       sld.w   24[ep],r23
+       sld.w   28[ep],r22
+       addi    32,sp,sp
+       mov     r1,ep
+#else
+       ld.w    0[sp],r29
+       ld.w    4[sp],r28
+       ld.w    8[sp],r27
+       ld.w    12[sp],r26
+       ld.w    16[sp],r25
+       ld.w    20[sp],r24
+       ld.w    24[sp],r23
+       ld.w    28[sp],r22
+       addi    32,sp,sp
+#endif
+       jmp     [r31]
+       .size   __return_r22_r29,.-__return_r22_r29
+#endif /* L_save_22 */
+
+#ifdef L_save_23
+       .text
+       .align  2
+       .globl  __save_r23_r29
+       .type   __save_r23_r29,@function
+       /* Allocate space and save registers 23 .. 29 on the stack.  */
+       /* Called via:  jalr __save_r23_r29,r10.  */
+__save_r23_r29:
+#ifdef __EP__
+       mov     ep,r1
+       addi    -28,sp,sp
+       mov     sp,ep
+       sst.w   r29,0[ep]
+       sst.w   r28,4[ep]
+       sst.w   r27,8[ep]
+       sst.w   r26,12[ep]
+       sst.w   r25,16[ep]
+       sst.w   r24,20[ep]
+       sst.w   r23,24[ep]
+       mov     r1,ep
+#else
+       addi    -28,sp,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       st.w    r27,8[sp]
+       st.w    r26,12[sp]
+       st.w    r25,16[sp]
+       st.w    r24,20[sp]
+       st.w    r23,24[sp]
+#endif
+       jmp     [r10]
+       .size   __save_r23_r29,.-__save_r23_r29
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r23_r29.  */
+       .align  2
+       .globl  __return_r23_r29
+       .type   __return_r23_r29,@function
+__return_r23_r29:
+#ifdef __EP__
+       mov     ep,r1
+       mov     sp,ep
+       sld.w   0[ep],r29
+       sld.w   4[ep],r28
+       sld.w   8[ep],r27
+       sld.w   12[ep],r26
+       sld.w   16[ep],r25
+       sld.w   20[ep],r24
+       sld.w   24[ep],r23
+       addi    28,sp,sp
+       mov     r1,ep
+#else
+       ld.w    0[sp],r29
+       ld.w    4[sp],r28
+       ld.w    8[sp],r27
+       ld.w    12[sp],r26
+       ld.w    16[sp],r25
+       ld.w    20[sp],r24
+       ld.w    24[sp],r23
+       addi    28,sp,sp
+#endif
+       jmp     [r31]
+       .size   __return_r23_r29,.-__return_r23_r29
+#endif /* L_save_23 */
+
+#ifdef L_save_24
+       .text
+       .align  2
+       .globl  __save_r24_r29
+       .type   __save_r24_r29,@function
+       /* Allocate space and save registers 24 .. 29 on the stack.  */
+       /* Called via:  jalr __save_r24_r29,r10.  */
+__save_r24_r29:
+#ifdef __EP__
+       mov     ep,r1
+       addi    -24,sp,sp
+       mov     sp,ep
+       sst.w   r29,0[ep]
+       sst.w   r28,4[ep]
+       sst.w   r27,8[ep]
+       sst.w   r26,12[ep]
+       sst.w   r25,16[ep]
+       sst.w   r24,20[ep]
+       mov     r1,ep
+#else
+       addi    -24,sp,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       st.w    r27,8[sp]
+       st.w    r26,12[sp]
+       st.w    r25,16[sp]
+       st.w    r24,20[sp]
+#endif
+       jmp     [r10]
+       .size   __save_r24_r29,.-__save_r24_r29
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r24_r29.  */
+       .align  2
+       .globl  __return_r24_r29
+       .type   __return_r24_r29,@function
+__return_r24_r29:
+#ifdef __EP__
+       mov     ep,r1
+       mov     sp,ep
+       sld.w   0[ep],r29
+       sld.w   4[ep],r28
+       sld.w   8[ep],r27
+       sld.w   12[ep],r26
+       sld.w   16[ep],r25
+       sld.w   20[ep],r24
+       addi    24,sp,sp
+       mov     r1,ep
+#else
+       ld.w    0[sp],r29
+       ld.w    4[sp],r28
+       ld.w    8[sp],r27
+       ld.w    12[sp],r26
+       ld.w    16[sp],r25
+       ld.w    20[sp],r24
+       addi    24,sp,sp
+#endif
+       jmp     [r31]
+       .size   __return_r24_r29,.-__return_r24_r29
+#endif /* L_save_24 */
+
+#ifdef L_save_25
+       .text
+       .align  2
+       .globl  __save_r25_r29
+       .type   __save_r25_r29,@function
+       /* Allocate space and save registers 25 .. 29 on the stack.  */
+       /* Called via:  jalr __save_r25_r29,r10.  */
+__save_r25_r29:
+#ifdef __EP__
+       mov     ep,r1
+       addi    -20,sp,sp
+       mov     sp,ep
+       sst.w   r29,0[ep]
+       sst.w   r28,4[ep]
+       sst.w   r27,8[ep]
+       sst.w   r26,12[ep]
+       sst.w   r25,16[ep]
+       mov     r1,ep
+#else
+       addi    -20,sp,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       st.w    r27,8[sp]
+       st.w    r26,12[sp]
+       st.w    r25,16[sp]
+#endif
+       jmp     [r10]
+       .size   __save_r25_r29,.-__save_r25_r29
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r25_r29.  */
+       .align  2
+       .globl  __return_r25_r29
+       .type   __return_r25_r29,@function
+__return_r25_r29:
+#ifdef __EP__
+       mov     ep,r1
+       mov     sp,ep
+       sld.w   0[ep],r29
+       sld.w   4[ep],r28
+       sld.w   8[ep],r27
+       sld.w   12[ep],r26
+       sld.w   16[ep],r25
+       addi    20,sp,sp
+       mov     r1,ep
+#else
+       ld.w    0[ep],r29
+       ld.w    4[ep],r28
+       ld.w    8[ep],r27
+       ld.w    12[ep],r26
+       ld.w    16[ep],r25
+       addi    20,sp,sp
+#endif
+       jmp     [r31]
+       .size   __return_r25_r29,.-__return_r25_r29
+#endif /* L_save_25 */
+
+#ifdef L_save_26
+       .text
+       .align  2
+       .globl  __save_r26_r29
+       .type   __save_r26_r29,@function
+       /* Allocate space and save registers 26 .. 29 on the stack.  */
+       /* Called via:  jalr __save_r26_r29,r10.  */
+__save_r26_r29:
+#ifdef __EP__
+       mov     ep,r1
+       add     -16,sp
+       mov     sp,ep
+       sst.w   r29,0[ep]
+       sst.w   r28,4[ep]
+       sst.w   r27,8[ep]
+       sst.w   r26,12[ep]
+       mov     r1,ep
+#else
+       add     -16,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       st.w    r27,8[sp]
+       st.w    r26,12[sp]
+#endif
+       jmp     [r10]
+       .size   __save_r26_r29,.-__save_r26_r29
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r26_r29.  */
+       .align  2
+       .globl  __return_r26_r29
+       .type   __return_r26_r29,@function
+__return_r26_r29:
+#ifdef __EP__
+       mov     ep,r1
+       mov     sp,ep
+       sld.w   0[ep],r29
+       sld.w   4[ep],r28
+       sld.w   8[ep],r27
+       sld.w   12[ep],r26
+       addi    16,sp,sp
+       mov     r1,ep
+#else
+       ld.w    0[sp],r29
+       ld.w    4[sp],r28
+       ld.w    8[sp],r27
+       ld.w    12[sp],r26
+       addi    16,sp,sp
+#endif
+       jmp     [r31]
+       .size   __return_r26_r29,.-__return_r26_r29
+#endif /* L_save_26 */
+
+#ifdef L_save_27
+       .text
+       .align  2
+       .globl  __save_r27_r29
+       .type   __save_r27_r29,@function
+       /* Allocate space and save registers 27 .. 29 on the stack.  */
+       /* Called via:  jalr __save_r27_r29,r10.  */
+__save_r27_r29:
+       add     -12,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       st.w    r27,8[sp]
+       jmp     [r10]
+       .size   __save_r27_r29,.-__save_r27_r29
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r27_r29.  */
+       .align  2
+       .globl  __return_r27_r29
+       .type   __return_r27_r29,@function
+__return_r27_r29:
+       ld.w    0[sp],r29
+       ld.w    4[sp],r28
+       ld.w    8[sp],r27
+       add     12,sp
+       jmp     [r31]
+       .size   __return_r27_r29,.-__return_r27_r29
+#endif /* L_save_27 */
+
+#ifdef L_save_28
+       .text
+       .align  2
+       .globl  __save_r28_r29
+       .type   __save_r28_r29,@function
+       /* Allocate space and save registers 28,29 on the stack.  */
+       /* Called via:  jalr __save_r28_r29,r10.  */
+__save_r28_r29:
+       add     -8,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       jmp     [r10]
+       .size   __save_r28_r29,.-__save_r28_r29
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r28_r29.  */
+       .align  2
+       .globl  __return_r28_r29
+       .type   __return_r28_r29,@function
+__return_r28_r29:
+       ld.w    0[sp],r29
+       ld.w    4[sp],r28
+       add     8,sp
+       jmp     [r31]
+       .size   __return_r28_r29,.-__return_r28_r29
+#endif /* L_save_28 */
+
+#ifdef L_save_29
+       .text
+       .align  2
+       .globl  __save_r29
+       .type   __save_r29,@function
+       /* Allocate space and save register 29 on the stack.  */
+       /* Called via:  jalr __save_r29,r10.  */
+__save_r29:
+       add     -4,sp
+       st.w    r29,0[sp]
+       jmp     [r10]
+       .size   __save_r29,.-__save_r29
+
+       /* Restore saved register 29, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r29.  */
+       .align  2
+       .globl  __return_r29
+       .type   __return_r29,@function
+__return_r29:
+       ld.w    0[sp],r29
+       add     4,sp
+       jmp     [r31]
+       .size   __return_r29,.-__return_r29
+#endif /* L_save_28 */
+
+#ifdef L_save_2c
+       .text
+       .align  2
+       .globl  __save_r2_r31
+       .type   __save_r2_r31,@function
+       /* Allocate space and save registers 20 .. 29, 31 on the stack.  */
+       /* Also allocate space for the argument save area.  */
+       /* Called via:  jalr __save_r2_r31,r10.  */
+__save_r2_r31:
+#ifdef __EP__
+       mov     ep,r1
+       addi    -48,sp,sp
+       mov     sp,ep
+       sst.w   r29,0[ep]
+       sst.w   r28,4[ep]
+       sst.w   r27,8[ep]
+       sst.w   r26,12[ep]
+       sst.w   r25,16[ep]
+       sst.w   r24,20[ep]
+       sst.w   r23,24[ep]
+       sst.w   r22,28[ep]
+       sst.w   r21,32[ep]
+       sst.w   r20,36[ep]
+       sst.w   r2,40[ep]
+       sst.w   r31,44[ep]
+       mov     r1,ep
+#else
+       addi    -48,sp,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       st.w    r27,8[sp]
+       st.w    r26,12[sp]
+       st.w    r25,16[sp]
+       st.w    r24,20[sp]
+       st.w    r23,24[sp]
+       st.w    r22,28[sp]
+       st.w    r21,32[sp]
+       st.w    r20,36[sp]
+       st.w    r2,40[sp]
+       st.w    r31,44[sp]
+#endif
+       jmp     [r10]
+       .size   __save_r2_r31,.-__save_r2_r31
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r20_r31.  */
+       .align  2
+       .globl  __return_r2_r31
+       .type   __return_r2_r31,@function
+__return_r2_r31:
+#ifdef __EP__
+       mov     ep,r1
+       mov     sp,ep
+       sld.w   0[ep],r29
+       sld.w   4[ep],r28
+       sld.w   8[ep],r27
+       sld.w   12[ep],r26
+       sld.w   16[ep],r25
+       sld.w   20[ep],r24
+       sld.w   24[ep],r23
+       sld.w   28[ep],r22
+       sld.w   32[ep],r21
+       sld.w   36[ep],r20
+       sld.w   40[ep],r2
+       sld.w   44[ep],r31
+       addi    48,sp,sp
+       mov     r1,ep
+#else
+       ld.w    44[sp],r29
+       ld.w    40[sp],r28
+       ld.w    36[sp],r27
+       ld.w    32[sp],r26
+       ld.w    28[sp],r25
+       ld.w    24[sp],r24
+       ld.w    20[sp],r23
+       ld.w    16[sp],r22
+       ld.w    12[sp],r21
+       ld.w    8[sp],r20
+       ld.w    4[sp],r2
+       ld.w    0[sp],r31
+       addi    48,sp,sp
+#endif
+       jmp     [r31]
+       .size   __return_r2_r31,.-__return_r2_r31
+#endif /* L_save_2c */
+
+#ifdef L_save_20c
+       .text
+       .align  2
+       .globl  __save_r20_r31
+       .type   __save_r20_r31,@function
+       /* Allocate space and save registers 20 .. 29, 31 on the stack.  */
+       /* Also allocate space for the argument save area.  */
+       /* Called via:  jalr __save_r20_r31,r10.  */
+__save_r20_r31:
+#ifdef __EP__
+       mov     ep,r1
+       addi    -44,sp,sp
+       mov     sp,ep
+       sst.w   r29,0[ep]
+       sst.w   r28,4[ep]
+       sst.w   r27,8[ep]
+       sst.w   r26,12[ep]
+       sst.w   r25,16[ep]
+       sst.w   r24,20[ep]
+       sst.w   r23,24[ep]
+       sst.w   r22,28[ep]
+       sst.w   r21,32[ep]
+       sst.w   r20,36[ep]
+       sst.w   r31,40[ep]
+       mov     r1,ep
+#else
+       addi    -44,sp,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       st.w    r27,8[sp]
+       st.w    r26,12[sp]
+       st.w    r25,16[sp]
+       st.w    r24,20[sp]
+       st.w    r23,24[sp]
+       st.w    r22,28[sp]
+       st.w    r21,32[sp]
+       st.w    r20,36[sp]
+       st.w    r31,40[sp]
+#endif
+       jmp     [r10]
+       .size   __save_r20_r31,.-__save_r20_r31
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r20_r31.  */
+       .align  2
+       .globl  __return_r20_r31
+       .type   __return_r20_r31,@function
+__return_r20_r31:
+#ifdef __EP__
+       mov     ep,r1
+       mov     sp,ep
+       sld.w   0[ep],r29
+       sld.w   4[ep],r28
+       sld.w   8[ep],r27
+       sld.w   12[ep],r26
+       sld.w   16[ep],r25
+       sld.w   20[ep],r24
+       sld.w   24[ep],r23
+       sld.w   28[ep],r22
+       sld.w   32[ep],r21
+       sld.w   36[ep],r20
+       sld.w   40[ep],r31
+       addi    44,sp,sp
+       mov     r1,ep
+#else
+       ld.w    0[sp],r29
+       ld.w    4[sp],r28
+       ld.w    8[sp],r27
+       ld.w    12[sp],r26
+       ld.w    16[sp],r25
+       ld.w    20[sp],r24
+       ld.w    24[sp],r23
+       ld.w    28[sp],r22
+       ld.w    32[sp],r21
+       ld.w    36[sp],r20
+       ld.w    40[sp],r31
+       addi    44,sp,sp
+#endif
+       jmp     [r31]
+       .size   __return_r20_r31,.-__return_r20_r31
+#endif /* L_save_20c */
+
+#ifdef L_save_21c
+       .text
+       .align  2
+       .globl  __save_r21_r31
+       .type   __save_r21_r31,@function
+       /* Allocate space and save registers 21 .. 29, 31 on the stack.  */
+       /* Also allocate space for the argument save area.  */
+       /* Called via:  jalr __save_r21_r31,r10.  */
+__save_r21_r31:
+#ifdef __EP__  
+       mov     ep,r1
+       addi    -40,sp,sp
+       mov     sp,ep
+       sst.w   r29,0[ep]
+       sst.w   r28,4[ep]
+       sst.w   r27,8[ep]
+       sst.w   r26,12[ep]
+       sst.w   r25,16[ep]
+       sst.w   r24,20[ep]
+       sst.w   r23,24[ep]
+       sst.w   r22,28[ep]
+       sst.w   r21,32[ep]
+       sst.w   r31,36[ep]
+       mov     r1,ep
+       jmp     [r10]
+#else  
+       addi    -40,sp,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       st.w    r27,8[sp]
+       st.w    r26,12[sp]
+       st.w    r25,16[sp]
+       st.w    r24,20[sp]
+       st.w    r23,24[sp]
+       st.w    r22,28[sp]
+       st.w    r21,32[sp]
+       st.w    r31,36[sp]
+       jmp     [r10]
+#endif 
+       .size   __save_r21_r31,.-__save_r21_r31
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r21_r31.  */
+       .align  2
+       .globl  __return_r21_r31
+       .type   __return_r21_r31,@function
+__return_r21_r31:
+#ifdef __EP__
+       mov     ep,r1
+       mov     sp,ep
+       sld.w   0[ep],r29
+       sld.w   4[ep],r28
+       sld.w   8[ep],r27
+       sld.w   12[ep],r26
+       sld.w   16[ep],r25
+       sld.w   20[ep],r24
+       sld.w   24[ep],r23
+       sld.w   28[ep],r22
+       sld.w   32[ep],r21
+       sld.w   36[ep],r31
+       addi    40,sp,sp
+       mov     r1,ep
+#else
+       ld.w    0[sp],r29
+       ld.w    4[sp],r28
+       ld.w    8[sp],r27
+       ld.w    12[sp],r26
+       ld.w    16[sp],r25
+       ld.w    20[sp],r24
+       ld.w    24[sp],r23
+       ld.w    28[sp],r22
+       ld.w    32[sp],r21
+       ld.w    36[sp],r31
+       addi    40,sp,sp
+#endif
+       jmp     [r31]
+       .size   __return_r21_r31,.-__return_r21_r31
+#endif /* L_save_21c */
+
+#ifdef L_save_22c
+       .text
+       .align  2
+       .globl  __save_r22_r31
+       .type   __save_r22_r31,@function
+       /* Allocate space and save registers 22 .. 29, 31 on the stack.  */
+       /* Also allocate space for the argument save area.  */
+       /* Called via:  jalr __save_r22_r31,r10.  */
+__save_r22_r31:
+#ifdef __EP__
+       mov     ep,r1
+       addi    -36,sp,sp
+       mov     sp,ep
+       sst.w   r29,0[ep]
+       sst.w   r28,4[ep]
+       sst.w   r27,8[ep]
+       sst.w   r26,12[ep]
+       sst.w   r25,16[ep]
+       sst.w   r24,20[ep]
+       sst.w   r23,24[ep]
+       sst.w   r22,28[ep]
+       sst.w   r31,32[ep]
+       mov     r1,ep
+#else
+       addi    -36,sp,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       st.w    r27,8[sp]
+       st.w    r26,12[sp]
+       st.w    r25,16[sp]
+       st.w    r24,20[sp]
+       st.w    r23,24[sp]
+       st.w    r22,28[sp]
+       st.w    r31,32[sp]
+#endif
+       jmp     [r10]
+       .size   __save_r22_r31,.-__save_r22_r31
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r22_r31.  */
+       .align  2
+       .globl  __return_r22_r31
+       .type   __return_r22_r31,@function
+__return_r22_r31:
+#ifdef __EP__
+       mov     ep,r1
+       mov     sp,ep
+       sld.w   0[ep],r29
+       sld.w   4[ep],r28
+       sld.w   8[ep],r27
+       sld.w   12[ep],r26
+       sld.w   16[ep],r25
+       sld.w   20[ep],r24
+       sld.w   24[ep],r23
+       sld.w   28[ep],r22
+       sld.w   32[ep],r31
+       addi    36,sp,sp
+       mov     r1,ep
+#else
+       ld.w    0[sp],r29
+       ld.w    4[sp],r28
+       ld.w    8[sp],r27
+       ld.w    12[sp],r26
+       ld.w    16[sp],r25
+       ld.w    20[sp],r24
+       ld.w    24[sp],r23
+       ld.w    28[sp],r22
+       ld.w    32[sp],r31
+       addi    36,sp,sp
+#endif
+       jmp     [r31]
+       .size   __return_r22_r31,.-__return_r22_r31
+#endif /* L_save_22c */
+
+#ifdef L_save_23c
+       .text
+       .align  2
+       .globl  __save_r23_r31
+       .type   __save_r23_r31,@function
+       /* Allocate space and save registers 23 .. 29, 31 on the stack.  */
+       /* Also allocate space for the argument save area.  */
+       /* Called via:  jalr __save_r23_r31,r10.  */
+__save_r23_r31:
+#ifdef __EP__
+       mov     ep,r1
+       addi    -32,sp,sp
+       mov     sp,ep
+       sst.w   r29,0[ep]
+       sst.w   r28,4[ep]
+       sst.w   r27,8[ep]
+       sst.w   r26,12[ep]
+       sst.w   r25,16[ep]
+       sst.w   r24,20[ep]
+       sst.w   r23,24[ep]
+       sst.w   r31,28[ep]
+       mov     r1,ep
+#else
+       addi    -32,sp,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       st.w    r27,8[sp]
+       st.w    r26,12[sp]
+       st.w    r25,16[sp]
+       st.w    r24,20[sp]
+       st.w    r23,24[sp]
+       st.w    r31,28[sp]
+#endif
+       jmp     [r10]
+       .size   __save_r23_r31,.-__save_r23_r31
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r23_r31.  */
+       .align  2
+       .globl  __return_r23_r31
+       .type   __return_r23_r31,@function
+__return_r23_r31:
+#ifdef __EP__
+       mov     ep,r1
+       mov     sp,ep
+       sld.w   0[ep],r29
+       sld.w   4[ep],r28
+       sld.w   8[ep],r27
+       sld.w   12[ep],r26
+       sld.w   16[ep],r25
+       sld.w   20[ep],r24
+       sld.w   24[ep],r23
+       sld.w   28[ep],r31
+       addi    32,sp,sp
+       mov     r1,ep
+#else
+       ld.w    0[sp],r29
+       ld.w    4[sp],r28
+       ld.w    8[sp],r27
+       ld.w    12[sp],r26
+       ld.w    16[sp],r25
+       ld.w    20[sp],r24
+       ld.w    24[sp],r23
+       ld.w    28[sp],r31
+       addi    32,sp,sp
+#endif
+       jmp     [r31]
+       .size   __return_r23_r31,.-__return_r23_r31
+#endif /* L_save_23c */
+
+#ifdef L_save_24c
+       .text
+       .align  2
+       .globl  __save_r24_r31
+       .type   __save_r24_r31,@function
+       /* Allocate space and save registers 24 .. 29, 31 on the stack.  */
+       /* Also allocate space for the argument save area.  */
+       /* Called via:  jalr __save_r24_r31,r10.  */
+__save_r24_r31:
+#ifdef __EP__
+       mov     ep,r1
+       addi    -28,sp,sp
+       mov     sp,ep
+       sst.w   r29,0[ep]
+       sst.w   r28,4[ep]
+       sst.w   r27,8[ep]
+       sst.w   r26,12[ep]
+       sst.w   r25,16[ep]
+       sst.w   r24,20[ep]
+       sst.w   r31,24[ep]
+       mov     r1,ep
+#else
+       addi    -28,sp,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       st.w    r27,8[sp]
+       st.w    r26,12[sp]
+       st.w    r25,16[sp]
+       st.w    r24,20[sp]
+       st.w    r31,24[sp]
+#endif
+       jmp     [r10]
+       .size   __save_r24_r31,.-__save_r24_r31
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r24_r31.  */
+       .align  2
+       .globl  __return_r24_r31
+       .type   __return_r24_r31,@function
+__return_r24_r31:
+#ifdef __EP__
+       mov     ep,r1
+       mov     sp,ep
+       sld.w   0[ep],r29
+       sld.w   4[ep],r28
+       sld.w   8[ep],r27
+       sld.w   12[ep],r26
+       sld.w   16[ep],r25
+       sld.w   20[ep],r24
+       sld.w   24[ep],r31
+       addi    28,sp,sp
+       mov     r1,ep
+#else
+       ld.w    0[sp],r29
+       ld.w    4[sp],r28
+       ld.w    8[sp],r27
+       ld.w    12[sp],r26
+       ld.w    16[sp],r25
+       ld.w    20[sp],r24
+       ld.w    24[sp],r31
+       addi    28,sp,sp
+#endif
+       jmp     [r31]
+       .size   __return_r24_r31,.-__return_r24_r31
+#endif /* L_save_24c */
+
+#ifdef L_save_25c
+       .text
+       .align  2
+       .globl  __save_r25_r31
+       .type   __save_r25_r31,@function
+       /* Allocate space and save registers 25 .. 29, 31 on the stack.  */
+       /* Also allocate space for the argument save area.  */
+       /* Called via:  jalr __save_r25_r31,r10.  */
+__save_r25_r31:
+#ifdef __EP__
+       mov     ep,r1
+       addi    -24,sp,sp
+       mov     sp,ep
+       sst.w   r29,0[ep]
+       sst.w   r28,4[ep]
+       sst.w   r27,8[ep]
+       sst.w   r26,12[ep]
+       sst.w   r25,16[ep]
+       sst.w   r31,20[ep]
+       mov     r1,ep
+#else
+       addi    -24,sp,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       st.w    r27,8[sp]
+       st.w    r26,12[sp]
+       st.w    r25,16[sp]
+       st.w    r31,20[sp]
+#endif
+       jmp     [r10]
+       .size   __save_r25_r31,.-__save_r25_r31
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r25_r31.  */
+       .align  2
+       .globl  __return_r25_r31
+       .type   __return_r25_r31,@function
+__return_r25_r31:
+#ifdef __EP__
+       mov     ep,r1
+       mov     sp,ep
+       sld.w   0[ep],r29
+       sld.w   4[ep],r28
+       sld.w   8[ep],r27
+       sld.w   12[ep],r26
+       sld.w   16[ep],r25
+       sld.w   20[ep],r31
+       addi    24,sp,sp
+       mov     r1,ep
+#else
+       ld.w    0[sp],r29
+       ld.w    4[sp],r28
+       ld.w    8[sp],r27
+       ld.w    12[sp],r26
+       ld.w    16[sp],r25
+       ld.w    20[sp],r31
+       addi    24,sp,sp
+#endif
+       jmp     [r31]
+       .size   __return_r25_r31,.-__return_r25_r31
+#endif /* L_save_25c */
+
+#ifdef L_save_26c
+       .text
+       .align  2
+       .globl  __save_r26_r31
+       .type   __save_r26_r31,@function
+       /* Allocate space and save registers 26 .. 29, 31 on the stack.  */
+       /* Also allocate space for the argument save area.  */
+       /* Called via:  jalr __save_r26_r31,r10.  */
+__save_r26_r31:
+#ifdef __EP__
+       mov     ep,r1
+       addi    -20,sp,sp
+       mov     sp,ep
+       sst.w   r29,0[ep]
+       sst.w   r28,4[ep]
+       sst.w   r27,8[ep]
+       sst.w   r26,12[ep]
+       sst.w   r31,16[ep]
+       mov     r1,ep
+#else
+       addi    -20,sp,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       st.w    r27,8[sp]
+       st.w    r26,12[sp]
+       st.w    r31,16[sp]
+#endif
+       jmp     [r10]
+       .size   __save_r26_r31,.-__save_r26_r31
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r26_r31.  */
+       .align  2
+       .globl  __return_r26_r31
+       .type   __return_r26_r31,@function
+__return_r26_r31:
+#ifdef __EP__
+       mov     ep,r1
+       mov     sp,ep
+       sld.w   0[ep],r29
+       sld.w   4[ep],r28
+       sld.w   8[ep],r27
+       sld.w   12[ep],r26
+       sld.w   16[ep],r31
+       addi    20,sp,sp
+       mov     r1,ep
+#else
+       ld.w    0[sp],r29
+       ld.w    4[sp],r28
+       ld.w    8[sp],r27
+       ld.w    12[sp],r26
+       ld.w    16[sp],r31
+       addi    20,sp,sp
+#endif
+       jmp     [r31]
+       .size   __return_r26_r31,.-__return_r26_r31
+#endif /* L_save_26c */
+
+#ifdef L_save_27c
+       .text
+       .align  2
+       .globl  __save_r27_r31
+       .type   __save_r27_r31,@function
+       /* Allocate space and save registers 27 .. 29, 31 on the stack.  */
+       /* Also allocate space for the argument save area.  */
+       /* Called via:  jalr __save_r27_r31,r10.  */
+__save_r27_r31:
+#ifdef __EP__
+       mov     ep,r1
+       addi    -16,sp,sp
+       mov     sp,ep
+       sst.w   r29,0[ep]
+       sst.w   r28,4[ep]
+       sst.w   r27,8[ep]
+       sst.w   r31,12[ep]
+       mov     r1,ep
+#else
+       addi    -16,sp,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       st.w    r27,8[sp]
+       st.w    r31,12[sp]
+#endif
+       jmp     [r10]
+       .size   __save_r27_r31,.-__save_r27_r31
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r27_r31.  */
+       .align  2
+       .globl  __return_r27_r31
+       .type   __return_r27_r31,@function
+__return_r27_r31:
+#ifdef __EP__
+       mov     ep,r1
+       mov     sp,ep
+       sld.w   0[ep],r29
+       sld.w   4[ep],r28
+       sld.w   8[ep],r27
+       sld.w   12[ep],r31
+       addi    16,sp,sp
+       mov     r1,ep
+#else
+       ld.w    0[sp],r29
+       ld.w    4[sp],r28
+       ld.w    8[sp],r27
+       ld.w    12[sp],r31
+       addi    16,sp,sp
+#endif
+       jmp     [r31]
+       .size   __return_r27_r31,.-__return_r27_r31
+#endif /* L_save_27c */
+
+#ifdef L_save_28c
+       .text
+       .align  2
+       .globl  __save_r28_r31
+       .type   __save_r28_r31,@function
+       /* Allocate space and save registers 28 .. 29, 31 on the stack.  */
+       /* Also allocate space for the argument save area.  */
+       /* Called via:  jalr __save_r28_r31,r10.  */
+__save_r28_r31:
+       addi    -12,sp,sp
+       st.w    r29,0[sp]
+       st.w    r28,4[sp]
+       st.w    r31,8[sp]
+       jmp     [r10]
+       .size   __save_r28_r31,.-__save_r28_r31
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r28_r31.  */
+       .align  2
+       .globl  __return_r28_r31
+       .type   __return_r28_r31,@function
+__return_r28_r31:
+       ld.w    0[sp],r29
+       ld.w    4[sp],r28
+       ld.w    8[sp],r31
+       addi    12,sp,sp
+       jmp     [r31]
+       .size   __return_r28_r31,.-__return_r28_r31
+#endif /* L_save_28c */
+
+#ifdef L_save_29c
+       .text
+       .align  2
+       .globl  __save_r29_r31
+       .type   __save_r29_r31,@function
+       /* Allocate space and save registers 29 & 31 on the stack.  */
+       /* Also allocate space for the argument save area.  */
+       /* Called via:  jalr __save_r29_r31,r10.  */
+__save_r29_r31:
+       addi    -8,sp,sp
+       st.w    r29,0[sp]
+       st.w    r31,4[sp]
+       jmp     [r10]
+       .size   __save_r29_r31,.-__save_r29_r31
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r29_r31.  */
+       .align  2
+       .globl  __return_r29_r31
+       .type   __return_r29_r31,@function
+__return_r29_r31:
+       ld.w    0[sp],r29
+       ld.w    4[sp],r31
+       addi    8,sp,sp
+       jmp     [r31]
+       .size   __return_r29_r31,.-__return_r29_r31
+#endif /* L_save_29c */
+
+#ifdef L_save_31c
+       .text
+       .align  2
+       .globl  __save_r31
+       .type   __save_r31,@function
+       /* Allocate space and save register 31 on the stack.  */
+       /* Also allocate space for the argument save area.  */
+       /* Called via:  jalr __save_r31,r10.  */
+__save_r31:
+       addi    -4,sp,sp
+       st.w    r31,0[sp]
+       jmp     [r10]
+       .size   __save_r31,.-__save_r31
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  jr __return_r31.  */
+       .align  2
+       .globl  __return_r31
+       .type   __return_r31,@function
+__return_r31:
+       ld.w    0[sp],r31
+       addi    4,sp,sp
+       jmp     [r31]
+        .size   __return_r31,.-__return_r31
+#endif /* L_save_31c */
+
+#ifdef L_save_interrupt
+       .text
+       .align  2
+       .globl  __save_interrupt
+       .type   __save_interrupt,@function
+       /* Save registers r1, r4 on stack and load up with expected values.  */
+       /* Note, 20 bytes of stack have already been allocated.  */
+       /* Called via:  jalr __save_interrupt,r10.  */
+__save_interrupt:
+       /* add -20,sp ; st.w r11,16[sp] ; st.w r10,12[sp] ; */
+       st.w    ep,0[sp]
+       st.w    gp,4[sp]
+       st.w    r1,8[sp]
+       movhi   hi(__ep),r0,ep
+       movea   lo(__ep),ep,ep
+       movhi   hi(__gp),r0,gp
+       movea   lo(__gp),gp,gp
+       jmp     [r10]
+       .size   __save_interrupt,.-__save_interrupt
+
+       /* Restore saved registers, deallocate stack and return from the interrupt.  */
+       /* Called via:  jr __return_interrupt.  */
+       .align  2
+       .globl  __return_interrupt
+       .type   __return_interrupt,@function
+__return_interrupt:
+       ld.w    0[sp],ep
+       ld.w    4[sp],gp
+       ld.w    8[sp],r1
+       ld.w    12[sp],r10
+       ld.w    16[sp],r11
+       addi    20,sp,sp
+       reti
+       .size   __return_interrupt,.-__return_interrupt
+#endif /* L_save_interrupt */
+
+#ifdef L_save_all_interrupt
+       .text
+       .align  2
+       .globl  __save_all_interrupt
+       .type   __save_all_interrupt,@function
+       /* Save all registers except for those saved in __save_interrupt.  */
+       /* Allocate enough stack for all of the registers & 16 bytes of space.  */
+       /* Called via:  jalr __save_all_interrupt,r10.  */
+__save_all_interrupt:
+       addi    -104,sp,sp
+#ifdef __EP__
+       mov     ep,r1
+       mov     sp,ep
+       sst.w   r31,100[ep]
+       sst.w   r2,96[ep]
+       sst.w   gp,92[ep]
+       sst.w   r6,88[ep]
+       sst.w   r7,84[ep]
+       sst.w   r8,80[ep]
+       sst.w   r9,76[ep]
+       sst.w   r11,72[ep]
+       sst.w   r12,68[ep]
+       sst.w   r13,64[ep]
+       sst.w   r14,60[ep]
+       sst.w   r15,56[ep]
+       sst.w   r16,52[ep]
+       sst.w   r17,48[ep]
+       sst.w   r18,44[ep]
+       sst.w   r19,40[ep]
+       sst.w   r20,36[ep]
+       sst.w   r21,32[ep]
+       sst.w   r22,28[ep]
+       sst.w   r23,24[ep]
+       sst.w   r24,20[ep]
+       sst.w   r25,16[ep]
+       sst.w   r26,12[ep]
+       sst.w   r27,8[ep]
+       sst.w   r28,4[ep]
+       sst.w   r29,0[ep]
+       mov     r1,ep
+#else
+       st.w    r31,100[sp]
+       st.w    r2,96[sp]
+       st.w    gp,92[sp]
+       st.w    r6,88[sp]
+       st.w    r7,84[sp]
+       st.w    r8,80[sp]
+       st.w    r9,76[sp]
+       st.w    r11,72[sp]
+       st.w    r12,68[sp]
+       st.w    r13,64[sp]
+       st.w    r14,60[sp]
+       st.w    r15,56[sp]
+       st.w    r16,52[sp]
+       st.w    r17,48[sp]
+       st.w    r18,44[sp]
+       st.w    r19,40[sp]
+       st.w    r20,36[sp]
+       st.w    r21,32[sp]
+       st.w    r22,28[sp]
+       st.w    r23,24[sp]
+       st.w    r24,20[sp]
+       st.w    r25,16[sp]
+       st.w    r26,12[sp]
+       st.w    r27,8[sp]
+       st.w    r28,4[sp]
+       st.w    r29,0[sp]
+#endif
+       jmp     [r10]
+       .size   __save_all_interrupt,.-__save_all_interrupt
+
+       .globl  __restore_all_interrupt
+       .type   __restore_all_interrupt,@function
+       /* Restore all registers saved in __save_all_interrupt and
+          deallocate the stack space.  */
+       /* Called via:  jalr __restore_all_interrupt,r10.  */
+__restore_all_interrupt:
+#ifdef __EP__
+       mov     ep,r1
+       mov     sp,ep
+       sld.w   100[ep],r31
+       sld.w   96[ep],r2
+       sld.w   92[ep],gp
+       sld.w   88[ep],r6
+       sld.w   84[ep],r7
+       sld.w   80[ep],r8
+       sld.w   76[ep],r9
+       sld.w   72[ep],r11
+       sld.w   68[ep],r12
+       sld.w   64[ep],r13
+       sld.w   60[ep],r14
+       sld.w   56[ep],r15
+       sld.w   52[ep],r16
+       sld.w   48[ep],r17
+       sld.w   44[ep],r18
+       sld.w   40[ep],r19
+       sld.w   36[ep],r20
+       sld.w   32[ep],r21
+       sld.w   28[ep],r22
+       sld.w   24[ep],r23
+       sld.w   20[ep],r24
+       sld.w   16[ep],r25
+       sld.w   12[ep],r26
+       sld.w   8[ep],r27
+       sld.w   4[ep],r28
+       sld.w   0[ep],r29
+       mov     r1,ep
+#else
+       ld.w    100[sp],r31
+       ld.w    96[sp],r2
+       ld.w    92[sp],gp
+       ld.w    88[sp],r6
+       ld.w    84[sp],r7
+       ld.w    80[sp],r8
+       ld.w    76[sp],r9
+       ld.w    72[sp],r11
+       ld.w    68[sp],r12
+       ld.w    64[sp],r13
+       ld.w    60[sp],r14
+       ld.w    56[sp],r15
+       ld.w    52[sp],r16
+       ld.w    48[sp],r17
+       ld.w    44[sp],r18
+       ld.w    40[sp],r19
+       ld.w    36[sp],r20
+       ld.w    32[sp],r21
+       ld.w    28[sp],r22
+       ld.w    24[sp],r23
+       ld.w    20[sp],r24
+       ld.w    16[sp],r25
+       ld.w    12[sp],r26
+       ld.w    8[sp],r27
+       ld.w    4[sp],r28
+       ld.w    0[sp],r29
+#endif
+       addi    104,sp,sp       
+       jmp     [r10]
+       .size   __restore_all_interrupt,.-__restore_all_interrupt
+#endif /* L_save_all_interrupt */
+       
+#if defined(__v850e__) || defined(__v850e1__) || defined(__v850e2__) || defined(__v850e2v3__)
+#ifdef L_callt_save_r2_r29
+       /* Put these functions into the call table area.  */
+       .call_table_text
+       
+       /* Allocate space and save registers 2, 20 .. 29 on the stack.  */
+       /* Called via:  callt ctoff(__callt_save_r2_r29).  */
+       .align  2
+.L_save_r2_r29:
+       add     -4, sp
+       st.w    r2, 0[sp]
+       prepare {r20 - r29}, 0
+       ctret
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  callt ctoff(__callt_return_r2_r29).  */
+       .align  2
+.L_return_r2_r29:
+       dispose 0, {r20-r29}
+       ld.w    0[sp], r2
+       add     4, sp
+       jmp     [r31]
+
+       /* Place the offsets of the start of these routines into the call table.  */
+       .call_table_data
+
+       .global __callt_save_r2_r29
+       .type   __callt_save_r2_r29,@function
+__callt_save_r2_r29:   .short ctoff(.L_save_r2_r29)
+       
+       .global __callt_return_r2_r29
+       .type   __callt_return_r2_r29,@function
+__callt_return_r2_r29: .short ctoff(.L_return_r2_r29)
+       
+#endif /* L_callt_save_r2_r29.  */
+
+#ifdef L_callt_save_r2_r31
+       /* Put these functions into the call table area.  */
+       .call_table_text
+       
+       /* Allocate space and save registers 2 and 20 .. 29, 31 on the stack.  */
+       /* Also allocate space for the argument save area.  */
+       /* Called via:  callt ctoff(__callt_save_r2_r31).  */
+       .align  2
+.L_save_r2_r31:
+       add     -4, sp
+       st.w    r2, 0[sp]
+       prepare {r20 - r29, r31}, 0
+       ctret
+
+       /* Restore saved registers, deallocate stack and return to the user.  */
+       /* Called via:  callt ctoff(__callt_return_r2_r31).  */
+       .align  2
+.L_return_r2_r31:
+       dispose 0, {r20 - r29, r31}
+       ld.w    0[sp], r2
+       addi    4, sp, sp
+       jmp     [r31]
+
+       /* Place the offsets of the start of these routines into the call table.  */
+       .call_table_data
+
+       .global __callt_save_r2_r31
+       .type   __callt_save_r2_r31,@function
+__callt_save_r2_r31:   .short ctoff(.L_save_r2_r31)
+       
+       .global __callt_return_r2_r31
+       .type   __callt_return_r2_r31,@function
+__callt_return_r2_r31: .short ctoff(.L_return_r2_r31)
+       
+#endif /* L_callt_save_r2_r31 */
+
+#ifdef L_callt_save_interrupt
+       /* Put these functions into the call table area.  */
+       .call_table_text
+       
+       /* Save registers r1, ep, gp, r10 on stack and load up with expected values.  */
+       /* Called via:  callt ctoff(__callt_save_interrupt).  */
+       .align  2
+.L_save_interrupt:
+        /* SP has already been moved before callt ctoff(_save_interrupt).  */
+        /* R1,R10,R11,ctpc,ctpsw has alread been saved bofore callt ctoff(_save_interrupt).  */
+        /* addi -28, sp, sp  */
+        /* st.w r1,    24[sp] */
+        /* st.w r10,   12[sp] */
+        /* st.w r11,   16[sp] */
+        /* stsr ctpc,  r10    */
+        /* st.w r10,   20[sp] */
+        /* stsr ctpsw, r10    */
+        /* st.w r10,   24[sp] */
+        st.w    ep,  0[sp]
+        st.w    gp,  4[sp]
+        st.w    r1,  8[sp]
+       mov     hilo(__ep),ep
+       mov     hilo(__gp),gp
+       ctret
+
+        .call_table_text
+       /* Restore saved registers, deallocate stack and return from the interrupt.  */
+        /* Called via:  callt ctoff(__callt_restore_interrupt).  */
+       .align  2
+       .globl  __return_interrupt
+       .type   __return_interrupt,@function
+.L_return_interrupt:
+        ld.w    24[sp], r1
+        ldsr    r1,     ctpsw
+        ld.w    20[sp], r1
+        ldsr    r1,     ctpc
+        ld.w    16[sp], r11
+        ld.w    12[sp], r10
+        ld.w     8[sp], r1
+        ld.w     4[sp], gp
+        ld.w     0[sp], ep
+        addi    28, sp, sp
+        reti
+
+       /* Place the offsets of the start of these routines into the call table.  */
+       .call_table_data
+
+        .global __callt_save_interrupt
+        .type   __callt_save_interrupt,@function
+__callt_save_interrupt:         .short ctoff(.L_save_interrupt)
+
+        .global __callt_return_interrupt
+        .type   __callt_return_interrupt,@function
+__callt_return_interrupt:       .short ctoff(.L_return_interrupt)
+       
+#endif /* L_callt_save_interrupt */
+
+#ifdef L_callt_save_all_interrupt
+       /* Put these functions into the call table area.  */
+       .call_table_text
+       
+       /* Save all registers except for those saved in __save_interrupt.  */
+       /* Allocate enough stack for all of the registers & 16 bytes of space.  */
+       /* Called via:  callt ctoff(__callt_save_all_interrupt).  */
+       .align  2
+.L_save_all_interrupt:
+       addi    -60, sp, sp
+#ifdef __EP__
+       mov     ep,  r1
+       mov     sp,  ep
+       sst.w   r2,  56[ep]
+       sst.w   r5,  52[ep]
+       sst.w   r6,  48[ep]
+       sst.w   r7,  44[ep]
+       sst.w   r8,  40[ep]
+       sst.w   r9,  36[ep]
+       sst.w   r11, 32[ep]
+       sst.w   r12, 28[ep]
+       sst.w   r13, 24[ep]
+       sst.w   r14, 20[ep]
+       sst.w   r15, 16[ep]
+       sst.w   r16, 12[ep]
+       sst.w   r17, 8[ep]
+       sst.w   r18, 4[ep]
+       sst.w   r19, 0[ep]
+       mov     r1,  ep
+#else
+       st.w    r2,  56[sp]
+       st.w    r5,  52[sp]
+       st.w    r6,  48[sp]
+       st.w    r7,  44[sp]
+       st.w    r8,  40[sp]
+       st.w    r9,  36[sp]
+       st.w    r11, 32[sp]
+       st.w    r12, 28[sp]
+       st.w    r13, 24[sp]
+       st.w    r14, 20[sp]
+       st.w    r15, 16[sp]
+       st.w    r16, 12[sp]
+       st.w    r17, 8[sp]
+       st.w    r18, 4[sp]
+       st.w    r19, 0[sp]
+#endif
+       prepare {r20 - r29, r31}, 0
+       ctret   
+
+       /* Restore all registers saved in __save_all_interrupt
+          deallocate the stack space.  */
+       /* Called via:  callt ctoff(__callt_restore_all_interrupt).  */
+       .align 2
+.L_restore_all_interrupt:
+       dispose 0, {r20 - r29, r31}
+#ifdef __EP__
+       mov     ep, r1
+       mov     sp, ep
+       sld.w   0 [ep], r19
+       sld.w   4 [ep], r18
+       sld.w   8 [ep], r17
+       sld.w   12[ep], r16
+       sld.w   16[ep], r15
+       sld.w   20[ep], r14
+       sld.w   24[ep], r13
+       sld.w   28[ep], r12
+       sld.w   32[ep], r11
+       sld.w   36[ep], r9
+       sld.w   40[ep], r8
+       sld.w   44[ep], r7
+       sld.w   48[ep], r6
+       sld.w   52[ep], r5
+       sld.w   56[ep], r2
+       mov     r1, ep
+#else
+       ld.w    0 [sp], r19
+       ld.w    4 [sp], r18
+       ld.w    8 [sp], r17
+       ld.w    12[sp], r16
+       ld.w    16[sp], r15
+       ld.w    20[sp], r14
+       ld.w    24[sp], r13
+       ld.w    28[sp], r12
+       ld.w    32[sp], r11
+       ld.w    36[sp], r9
+       ld.w    40[sp], r8
+       ld.w    44[sp], r7
+       ld.w    48[sp], r6
+       ld.w    52[sp], r5
+       ld.w    56[sp], r2
+#endif
+       addi    60, sp, sp
+       ctret
+
+       /* Place the offsets of the start of these routines into the call table.  */
+       .call_table_data
+
+       .global __callt_save_all_interrupt
+       .type   __callt_save_all_interrupt,@function
+__callt_save_all_interrupt:    .short ctoff(.L_save_all_interrupt)
+       
+       .global __callt_restore_all_interrupt
+       .type   __callt_restore_all_interrupt,@function
+__callt_restore_all_interrupt: .short ctoff(.L_restore_all_interrupt)
+       
+#endif /* L_callt_save_all_interrupt */
+
+
+#define MAKE_CALLT_FUNCS( START )                                              \
+       .call_table_text                                                        ;\
+       .align  2                                                               ;\
+       /* Allocate space and save registers START .. r29 on the stack.  */     ;\
+       /* Called via:  callt ctoff(__callt_save_START_r29).  */                ;\
+.L_save_##START##_r29:                                                         ;\
+       prepare { START - r29 }, 0                                              ;\
+       ctret                                                                   ;\
+                                                                               ;\
+       /* Restore saved registers, deallocate stack and return.  */            ;\
+       /* Called via:  callt ctoff(__return_START_r29).  */                    ;\
+       .align  2                                                               ;\
+.L_return_##START##_r29:                                                       ;\
+       dispose 0, { START - r29 }, r31                                         ;\
+                                                                               ;\
+       /* Place the offsets of the start of these funcs into the call table.  */;\
+       .call_table_data                                                        ;\
+                                                                               ;\
+       .global __callt_save_##START##_r29                                      ;\
+       .type   __callt_save_##START##_r29,@function                            ;\
+__callt_save_##START##_r29:    .short ctoff(.L_save_##START##_r29 )            ;\
+                                                                               ;\
+       .global __callt_return_##START##_r29                                    ;\
+       .type   __callt_return_##START##_r29,@function                          ;\
+__callt_return_##START##_r29:  .short ctoff(.L_return_##START##_r29 )  
+
+
+#define MAKE_CALLT_CFUNCS( START )                                             \
+       .call_table_text                                                        ;\
+       .align  2                                                               ;\
+       /* Allocate space and save registers START .. r31 on the stack.  */     ;\
+       /* Called via:  callt ctoff(__callt_save_START_r31c).  */               ;\
+.L_save_##START##_r31c:                                                                ;\
+       prepare { START - r29, r31}, 0                                          ;\
+       ctret                                                                   ;\
+                                                                               ;\
+       /* Restore saved registers, deallocate stack and return.  */            ;\
+       /* Called via:  callt ctoff(__return_START_r31c).  */                   ;\
+       .align  2                                                               ;\
+.L_return_##START##_r31c:                                                      ;\
+       dispose 0, { START - r29, r31}, r31                                     ;\
+                                                                               ;\
+       /* Place the offsets of the start of these funcs into the call table.  */;\
+       .call_table_data                                                        ;\
+                                                                               ;\
+       .global __callt_save_##START##_r31c                                     ;\
+       .type   __callt_save_##START##_r31c,@function                           ;\
+__callt_save_##START##_r31c:    .short ctoff(.L_save_##START##_r31c )          ;\
+                                                                               ;\
+       .global __callt_return_##START##_r31c                                   ;\
+       .type   __callt_return_##START##_r31c,@function                         ;\
+__callt_return_##START##_r31c:  .short ctoff(.L_return_##START##_r31c )        
+
+       
+#ifdef L_callt_save_20
+       MAKE_CALLT_FUNCS (r20)
+#endif
+#ifdef L_callt_save_21
+       MAKE_CALLT_FUNCS (r21)
+#endif
+#ifdef L_callt_save_22
+       MAKE_CALLT_FUNCS (r22)
+#endif
+#ifdef L_callt_save_23
+       MAKE_CALLT_FUNCS (r23)
+#endif
+#ifdef L_callt_save_24
+       MAKE_CALLT_FUNCS (r24)
+#endif
+#ifdef L_callt_save_25
+       MAKE_CALLT_FUNCS (r25)
+#endif
+#ifdef L_callt_save_26
+       MAKE_CALLT_FUNCS (r26)
+#endif
+#ifdef L_callt_save_27
+       MAKE_CALLT_FUNCS (r27)
+#endif
+#ifdef L_callt_save_28
+       MAKE_CALLT_FUNCS (r28)
+#endif
+#ifdef L_callt_save_29
+       MAKE_CALLT_FUNCS (r29)
+#endif
+
+#ifdef L_callt_save_20c
+       MAKE_CALLT_CFUNCS (r20)
+#endif
+#ifdef L_callt_save_21c
+       MAKE_CALLT_CFUNCS (r21)
+#endif
+#ifdef L_callt_save_22c
+       MAKE_CALLT_CFUNCS (r22)
+#endif
+#ifdef L_callt_save_23c
+       MAKE_CALLT_CFUNCS (r23)
+#endif
+#ifdef L_callt_save_24c
+       MAKE_CALLT_CFUNCS (r24)
+#endif
+#ifdef L_callt_save_25c
+       MAKE_CALLT_CFUNCS (r25)
+#endif
+#ifdef L_callt_save_26c
+       MAKE_CALLT_CFUNCS (r26)
+#endif
+#ifdef L_callt_save_27c
+       MAKE_CALLT_CFUNCS (r27)
+#endif
+#ifdef L_callt_save_28c
+       MAKE_CALLT_CFUNCS (r28)
+#endif
+#ifdef L_callt_save_29c
+       MAKE_CALLT_CFUNCS (r29)
+#endif
+
+       
+#ifdef L_callt_save_31c
+       .call_table_text
+       .align  2
+       /* Allocate space and save register r31 on the stack.  */
+       /* Called via:  callt ctoff(__callt_save_r31c).  */
+.L_callt_save_r31c:
+       prepare {r31}, 0
+       ctret
+
+       /* Restore saved registers, deallocate stack and return.  */
+       /* Called via:  callt ctoff(__return_r31c).  */
+       .align  2
+.L_callt_return_r31c:
+       dispose 0, {r31}, r31
+       
+       /* Place the offsets of the start of these funcs into the call table.  */
+       .call_table_data
+
+       .global __callt_save_r31c
+       .type   __callt_save_r31c,@function
+__callt_save_r31c:     .short ctoff(.L_callt_save_r31c)
+
+       .global __callt_return_r31c
+       .type   __callt_return_r31c,@function
+__callt_return_r31c:   .short ctoff(.L_callt_return_r31c)              
+#endif
+
+#endif /* __v850e__ */
+
+/*  libgcc2 routines for NEC V850.  */
+/*  Double Integer Arithmetical Operation.  */
+
+#ifdef L_negdi2
+       .text
+       .global ___negdi2
+       .type   ___negdi2, @function
+___negdi2:
+       not     r6, r10
+       add     1,  r10
+       setf    l,  r6
+       not     r7, r11
+       add     r6, r11
+       jmp     [lp]
+
+       .size ___negdi2,.-___negdi2
+#endif
+
+#ifdef L_cmpdi2
+       .text
+       .global ___cmpdi2
+       .type   ___cmpdi2,@function
+___cmpdi2:
+       # Signed comparison bitween each high word.
+       cmp     r9, r7
+       be      .L_cmpdi_cmp_low
+       setf    ge, r10
+       setf    gt, r6
+       add     r6, r10
+       jmp     [lp]
+.L_cmpdi_cmp_low:
+       # Unsigned comparigon bitween each low word.
+       cmp     r8, r6
+       setf    nl, r10
+       setf    h,  r6
+       add     r6, r10
+       jmp     [lp]    
+       .size ___cmpdi2, . - ___cmpdi2  
+#endif
+
+#ifdef L_ucmpdi2
+       .text
+       .global ___ucmpdi2
+       .type   ___ucmpdi2,@function
+___ucmpdi2:
+       cmp     r9, r7  # Check if each high word are same.
+       bne     .L_ucmpdi_check_psw
+       cmp     r8, r6  # Compare the word.
+.L_ucmpdi_check_psw:
+       setf    nl, r10 # 
+       setf    h,  r6  # 
+       add     r6, r10 # Add the result of comparison NL and comparison H.
+       jmp     [lp]    
+       .size ___ucmpdi2, . - ___ucmpdi2
+#endif
+
+#ifdef L_muldi3
+       .text
+       .global ___muldi3
+       .type   ___muldi3,@function
+___muldi3:
+#ifdef __v850__
+        jarl  __save_r26_r31, r10
+        addi  16,  sp, sp
+        mov   r6,  r28
+        shr   15,  r28
+        movea lo(32767), r0, r14
+        and   r14, r28
+        mov   r8,  r10
+        shr   15,  r10
+        and   r14, r10
+        mov   r6,  r19
+        shr   30,  r19
+        mov   r7,  r12
+        shl   2,   r12
+        or    r12, r19
+        and   r14, r19
+        mov   r8,  r13
+        shr   30,  r13
+        mov   r9,  r12
+        shl   2,   r12
+        or    r12, r13
+        and   r14, r13
+        mov   r7,  r11
+        shr   13,  r11
+        and   r14, r11
+        mov   r9,  r31
+        shr   13,  r31
+        and   r14, r31
+        mov   r7,  r29
+        shr   28,  r29
+        and   r14, r29
+        mov   r9,  r12
+        shr   28,  r12
+        and   r14, r12
+        and   r14, r6
+        and   r14, r8
+        mov   r6,  r14
+        mulh  r8,  r14
+        mov   r6,  r16
+        mulh  r10, r16
+        mov   r6,  r18
+        mulh  r13, r18
+        mov   r6,  r15
+        mulh  r31, r15
+        mulh  r12, r6
+        mov   r28,  r17
+        mulh  r10, r17
+        add   -16, sp
+        mov   r28,  r12
+        mulh  r8,  r12
+        add   r17, r18
+        mov   r28,  r17
+        mulh  r31, r17
+        add   r12, r16
+        mov   r28,  r12
+        mulh  r13, r12
+        add   r17, r6
+        mov   r19, r17
+        add   r12, r15
+        mov   r19, r12
+        mulh  r8,  r12
+        mulh  r10, r17
+        add   r12, r18
+        mov   r19, r12
+        mulh  r13, r12
+        add   r17, r15
+        mov   r11, r13
+        mulh  r8,  r13
+        add   r12, r6
+        mov   r11, r12
+        mulh  r10, r12
+        add   r13, r15
+        mulh  r29, r8
+        add   r12, r6
+        mov   r16, r13
+        shl   15,  r13
+        add   r14, r13
+        mov   r18, r12
+        shl   30,  r12
+        mov   r13, r26
+        add   r12, r26
+        shr   15,  r14
+        movhi hi(131071), r0,  r12
+        movea lo(131071), r12, r13
+        and   r13, r14
+        mov   r16, r12
+        and   r13, r12
+        add   r12, r14
+        mov   r18, r12
+        shl   15,  r12
+        and   r13, r12
+        add   r12, r14
+        shr   17,  r14
+        shr   17,  r16
+        add   r14, r16
+        shl   13,  r15
+        shr   2,   r18
+        add   r18, r15
+        add   r15, r16
+        mov   r16, r27
+        add   r8,  r6
+        shl   28,  r6
+        add   r6,  r27
+        mov   r26, r10
+        mov   r27, r11
+        jr    __return_r26_r31
+#else /* defined(__v850e__) */
+       /*  (Ahi << 32 + Alo) * (Bhi << 32 + Blo) */
+       /*   r7           r6      r9         r8   */
+       mov  r8, r10
+       mulu r7, r8,  r0                /* Ahi * Blo */
+       mulu r6, r9,  r0                /* Alo * Bhi */
+       mulu r6, r10, r11               /* Alo * Blo */
+       add  r8, r11
+       add  r9, r11
+       jmp  [r31]
+#endif /* defined(__v850e__) */
+       .size ___muldi3, . - ___muldi3
+#endif
+       
diff --git a/libgcc/config/v850/t-v850 b/libgcc/config/v850/t-v850

new file mode 100644 (file)

index 0000000..b61703a
--- /dev/null
+++ b/libgcc/config/v850/t-v850
@@ -0,0 +1,60 @@
+LIB1ASMSRC = v850/lib1funcs.S
+LIB1ASMFUNCS   = _mulsi3 \
+                 _divsi3 \
+                 _udivsi3 \
+                 _modsi3 \
+                 _umodsi3 \
+                 _save_2 \
+                 _save_20 \
+                 _save_21 \
+                 _save_22 \
+                 _save_23 \
+                 _save_24 \
+                 _save_25 \
+                 _save_26 \
+                 _save_27 \
+                 _save_28 \
+                 _save_29 \
+                 _save_2c \
+                 _save_20c \
+                 _save_21c \
+                 _save_22c \
+                 _save_23c \
+                 _save_24c \
+                 _save_25c \
+                 _save_26c \
+                 _save_27c \
+                 _save_28c \
+                 _save_29c \
+                 _save_31c \
+                 _save_interrupt \
+                 _save_all_interrupt \
+                  _callt_save_20 \
+                 _callt_save_21 \
+                 _callt_save_22 \
+                 _callt_save_23 \
+                 _callt_save_24 \
+                 _callt_save_25 \
+                 _callt_save_26 \
+                 _callt_save_27 \
+                 _callt_save_28 \
+                 _callt_save_29 \
+                 _callt_save_20c \
+                 _callt_save_21c \
+                 _callt_save_22c \
+                 _callt_save_23c \
+                 _callt_save_24c \
+                 _callt_save_25c \
+                 _callt_save_26c \
+                 _callt_save_27c \
+                 _callt_save_28c \
+                 _callt_save_29c \
+                 _callt_save_31c \
+                 _callt_save_interrupt \
+                 _callt_save_all_interrupt \
+                 _callt_save_r2_r29 \
+                 _callt_save_r2_r31 \
+                 _negdi2 \
+                 _cmpdi2 \
+                 _ucmpdi2 \
+                 _muldi3
diff --git a/libgcc/config/vax/lib1funcs.S b/libgcc/config/vax/lib1funcs.S

new file mode 100644 (file)

index 0000000..1d57b56
--- /dev/null
+++ b/libgcc/config/vax/lib1funcs.S
@@ -0,0 +1,92 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+   This file is part of GCC.
+   Contributed by Maciej W. Rozycki <macro@linux-mips.org>.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef L_udivsi3
+       .text
+       .globl  __udivsi3
+       .type   __udivsi3, @function
+__udivsi3:
+       .word   0
+       movl    8(%ap), %r1
+       blss    0f                      /* Check bit #31 of divisor.  */
+       movl    4(%ap), %r2
+       blss    1f                      /* Check bit #31 of dividend.  */
+
+       /* Both zero, do a standard division.  */
+
+       divl3   %r1, %r2, %r0
+       ret
+
+       /* MSB of divisor set, only 1 or 0 may result.  */
+0:
+       decl    %r1
+       clrl    %r0
+       cmpl    %r1, 4(%ap)
+       adwc    $0, %r0
+       ret
+
+       /* MSB of dividend set, do an extended division.  */
+1:
+       clrl    %r3
+       ediv    %r1, %r2, %r0, %r3
+       ret
+       .size   __udivsi3, . - __udivsi3
+       .previous
+#endif
+
+#ifdef L_umodsi3
+       .text
+       .globl  __umodsi3
+       .type   __umodsi3, @function
+__umodsi3:
+       .word   0
+       movl    8(%ap), %r1
+       blss    0f                      /* Check bit #31 of divisor.  */
+       movl    4(%ap), %r2
+       blss    1f                      /* Check bit #31 of dividend.  */
+
+       /* Both zero, do a standard division.  */
+
+       divl3   %r1, %r2, %r0
+       mull2   %r0, %r1
+       subl3   %r1, %r2, %r0
+       ret
+
+       /* MSB of divisor set, subtract the divisor at most once.  */
+0:
+       movl    4(%ap), %r2
+       clrl    %r0
+       cmpl    %r2, %r1
+       sbwc    $0, %r0
+       bicl2   %r0, %r1
+       subl3   %r1, %r2, %r0
+       ret
+
+       /* MSB of dividend set, do an extended division.  */
+1:
+       clrl    %r3
+       ediv    %r1, %r2, %r3, %r0
+       ret
+       .size   __umodsi3, . - __umodsi3
+       .previous
+#endif
diff --git a/libgcc/config/vax/t-linux b/libgcc/config/vax/t-linux

new file mode 100644 (file)

index 0000000..17929c8
--- /dev/null
+++ b/libgcc/config/vax/t-linux
@@ -0,0 +1,2 @@
+LIB1ASMSRC = vax/lib1funcs.S
+LIB1ASMFUNCS = _udivsi3 _umodsi3
diff --git a/libgcc/config/xtensa/ieee754-df.S b/libgcc/config/xtensa/ieee754-df.S

new file mode 100644 (file)

index 0000000..9b46889
--- /dev/null
+++ b/libgcc/config/xtensa/ieee754-df.S
@@ -0,0 +1,2388 @@
+/* IEEE-754 double-precision functions for Xtensa
+   Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef __XTENSA_EB__
+#define xh a2
+#define xl a3
+#define yh a4
+#define yl a5
+#else
+#define xh a3
+#define xl a2
+#define yh a5
+#define yl a4
+#endif
+
+/*  Warning!  The branch displacements for some Xtensa branch instructions
+    are quite small, and this code has been carefully laid out to keep
+    branch targets in range.  If you change anything, be sure to check that
+    the assembler is not relaxing anything to branch over a jump.  */
+
+#ifdef L_negdf2
+
+       .align  4
+       .global __negdf2
+       .type   __negdf2, @function
+__negdf2:
+       leaf_entry sp, 16
+       movi    a4, 0x80000000
+       xor     xh, xh, a4
+       leaf_return
+
+#endif /* L_negdf2 */
+
+#ifdef L_addsubdf3
+
+       /* Addition */
+__adddf3_aux:
+       
+       /* Handle NaNs and Infinities.  (This code is placed before the
+          start of the function just to keep it in range of the limited
+          branch displacements.)  */
+
+.Ladd_xnan_or_inf:
+       /* If y is neither Infinity nor NaN, return x.  */
+       bnall   yh, a6, 1f
+       /* If x is a NaN, return it.  Otherwise, return y.  */
+       slli    a7, xh, 12
+       or      a7, a7, xl
+       beqz    a7, .Ladd_ynan_or_inf
+1:     leaf_return
+
+.Ladd_ynan_or_inf:
+       /* Return y.  */
+       mov     xh, yh
+       mov     xl, yl
+       leaf_return
+
+.Ladd_opposite_signs:
+       /* Operand signs differ.  Do a subtraction.  */
+       slli    a7, a6, 11
+       xor     yh, yh, a7
+       j       .Lsub_same_sign
+
+       .align  4
+       .global __adddf3
+       .type   __adddf3, @function
+__adddf3:
+       leaf_entry sp, 16
+       movi    a6, 0x7ff00000
+
+       /* Check if the two operands have the same sign.  */
+       xor     a7, xh, yh
+       bltz    a7, .Ladd_opposite_signs
+
+.Ladd_same_sign:       
+       /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
+       ball    xh, a6, .Ladd_xnan_or_inf
+       ball    yh, a6, .Ladd_ynan_or_inf
+
+       /* Compare the exponents.  The smaller operand will be shifted
+          right by the exponent difference and added to the larger
+          one.  */
+       extui   a7, xh, 20, 12
+       extui   a8, yh, 20, 12
+       bltu    a7, a8, .Ladd_shiftx
+
+.Ladd_shifty:
+       /* Check if the smaller (or equal) exponent is zero.  */
+       bnone   yh, a6, .Ladd_yexpzero
+
+       /* Replace yh sign/exponent with 0x001.  */
+       or      yh, yh, a6
+       slli    yh, yh, 11
+       srli    yh, yh, 11
+
+.Ladd_yexpdiff:
+       /* Compute the exponent difference.  Optimize for difference < 32.  */
+       sub     a10, a7, a8
+       bgeui   a10, 32, .Ladd_bigshifty
+       
+       /* Shift yh/yl right by the exponent difference.  Any bits that are
+          shifted out of yl are saved in a9 for rounding the result.  */
+       ssr     a10
+       movi    a9, 0
+       src     a9, yl, a9
+       src     yl, yh, yl
+       srl     yh, yh
+
+.Ladd_addy:
+       /* Do the 64-bit addition.  */
+       add     xl, xl, yl
+       add     xh, xh, yh
+       bgeu    xl, yl, 1f
+       addi    xh, xh, 1
+1:
+       /* Check if the add overflowed into the exponent.  */
+       extui   a10, xh, 20, 12
+       beq     a10, a7, .Ladd_round
+       mov     a8, a7
+       j       .Ladd_carry
+
+.Ladd_yexpzero:
+       /* y is a subnormal value.  Replace its sign/exponent with zero,
+          i.e., no implicit "1.0", and increment the apparent exponent
+          because subnormals behave as if they had the minimum (nonzero)
+          exponent.  Test for the case when both exponents are zero.  */
+       slli    yh, yh, 12
+       srli    yh, yh, 12
+       bnone   xh, a6, .Ladd_bothexpzero
+       addi    a8, a8, 1
+       j       .Ladd_yexpdiff
+
+.Ladd_bothexpzero:
+       /* Both exponents are zero.  Handle this as a special case.  There
+          is no need to shift or round, and the normal code for handling
+          a carry into the exponent field will not work because it
+          assumes there is an implicit "1.0" that needs to be added.  */
+       add     xl, xl, yl
+       add     xh, xh, yh
+       bgeu    xl, yl, 1f
+       addi    xh, xh, 1
+1:     leaf_return
+
+.Ladd_bigshifty:
+       /* Exponent difference > 64 -- just return the bigger value.  */
+       bgeui   a10, 64, 1b
+
+       /* Shift yh/yl right by the exponent difference.  Any bits that are
+          shifted out are saved in a9 for rounding the result.  */
+       ssr     a10
+       sll     a11, yl         /* lost bits shifted out of yl */
+       src     a9, yh, yl
+       srl     yl, yh
+       movi    yh, 0
+       beqz    a11, .Ladd_addy
+       or      a9, a9, a10     /* any positive, nonzero value will work */
+       j       .Ladd_addy
+
+.Ladd_xexpzero:
+       /* Same as "yexpzero" except skip handling the case when both
+          exponents are zero.  */
+       slli    xh, xh, 12
+       srli    xh, xh, 12
+       addi    a7, a7, 1
+       j       .Ladd_xexpdiff
+
+.Ladd_shiftx:
+       /* Same thing as the "shifty" code, but with x and y swapped.  Also,
+          because the exponent difference is always nonzero in this version,
+          the shift sequence can use SLL and skip loading a constant zero.  */
+       bnone   xh, a6, .Ladd_xexpzero
+
+       or      xh, xh, a6
+       slli    xh, xh, 11
+       srli    xh, xh, 11
+
+.Ladd_xexpdiff:
+       sub     a10, a8, a7
+       bgeui   a10, 32, .Ladd_bigshiftx
+       
+       ssr     a10
+       sll     a9, xl
+       src     xl, xh, xl
+       srl     xh, xh
+
+.Ladd_addx:
+       add     xl, xl, yl
+       add     xh, xh, yh
+       bgeu    xl, yl, 1f
+       addi    xh, xh, 1
+1:
+       /* Check if the add overflowed into the exponent.  */
+       extui   a10, xh, 20, 12
+       bne     a10, a8, .Ladd_carry
+
+.Ladd_round:
+       /* Round up if the leftover fraction is >= 1/2.  */
+       bgez    a9, 1f
+       addi    xl, xl, 1
+       beqz    xl, .Ladd_roundcarry
+
+       /* Check if the leftover fraction is exactly 1/2.  */
+       slli    a9, a9, 1
+       beqz    a9, .Ladd_exactlyhalf
+1:     leaf_return
+
+.Ladd_bigshiftx:
+       /* Mostly the same thing as "bigshifty"....  */
+       bgeui   a10, 64, .Ladd_returny
+
+       ssr     a10
+       sll     a11, xl
+       src     a9, xh, xl
+       srl     xl, xh
+       movi    xh, 0
+       beqz    a11, .Ladd_addx
+       or      a9, a9, a10
+       j       .Ladd_addx
+
+.Ladd_returny:
+       mov     xh, yh
+       mov     xl, yl
+       leaf_return
+
+.Ladd_carry:   
+       /* The addition has overflowed into the exponent field, so the
+          value needs to be renormalized.  The mantissa of the result
+          can be recovered by subtracting the original exponent and
+          adding 0x100000 (which is the explicit "1.0" for the
+          mantissa of the non-shifted operand -- the "1.0" for the
+          shifted operand was already added).  The mantissa can then
+          be shifted right by one bit.  The explicit "1.0" of the
+          shifted mantissa then needs to be replaced by the exponent,
+          incremented by one to account for the normalizing shift.
+          It is faster to combine these operations: do the shift first
+          and combine the additions and subtractions.  If x is the
+          original exponent, the result is:
+              shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
+          or:
+              shifted mantissa + ((x + 1) << 19)
+          Note that the exponent is incremented here by leaving the
+          explicit "1.0" of the mantissa in the exponent field.  */
+
+       /* Shift xh/xl right by one bit.  Save the lsb of xl.  */
+       mov     a10, xl
+       ssai    1
+       src     xl, xh, xl
+       srl     xh, xh
+
+       /* See explanation above.  The original exponent is in a8.  */
+       addi    a8, a8, 1
+       slli    a8, a8, 19
+       add     xh, xh, a8
+
+       /* Return an Infinity if the exponent overflowed.  */
+       ball    xh, a6, .Ladd_infinity
+       
+       /* Same thing as the "round" code except the msb of the leftover
+          fraction is bit 0 of a10, with the rest of the fraction in a9.  */
+       bbci.l  a10, 0, 1f
+       addi    xl, xl, 1
+       beqz    xl, .Ladd_roundcarry
+       beqz    a9, .Ladd_exactlyhalf
+1:     leaf_return
+
+.Ladd_infinity:
+       /* Clear the mantissa.  */
+       movi    xl, 0
+       srli    xh, xh, 20
+       slli    xh, xh, 20
+
+       /* The sign bit may have been lost in a carry-out.  Put it back.  */
+       slli    a8, a8, 1
+       or      xh, xh, a8
+       leaf_return
+
+.Ladd_exactlyhalf:
+       /* Round down to the nearest even value.  */
+       srli    xl, xl, 1
+       slli    xl, xl, 1
+       leaf_return
+
+.Ladd_roundcarry:
+       /* xl is always zero when the rounding increment overflows, so
+          there's no need to round it to an even value.  */
+       addi    xh, xh, 1
+       /* Overflow to the exponent is OK.  */
+       leaf_return
+
+
+       /* Subtraction */
+__subdf3_aux:
+       
+       /* Handle NaNs and Infinities.  (This code is placed before the
+          start of the function just to keep it in range of the limited
+          branch displacements.)  */
+
+.Lsub_xnan_or_inf:
+       /* If y is neither Infinity nor NaN, return x.  */
+       bnall   yh, a6, 1f
+       /* Both x and y are either NaN or Inf, so the result is NaN.  */
+       movi    a4, 0x80000     /* make it a quiet NaN */
+       or      xh, xh, a4
+1:     leaf_return
+
+.Lsub_ynan_or_inf:
+       /* Negate y and return it.  */
+       slli    a7, a6, 11
+       xor     xh, yh, a7
+       mov     xl, yl
+       leaf_return
+
+.Lsub_opposite_signs:
+       /* Operand signs differ.  Do an addition.  */
+       slli    a7, a6, 11
+       xor     yh, yh, a7
+       j       .Ladd_same_sign
+
+       .align  4
+       .global __subdf3
+       .type   __subdf3, @function
+__subdf3:
+       leaf_entry sp, 16
+       movi    a6, 0x7ff00000
+
+       /* Check if the two operands have the same sign.  */
+       xor     a7, xh, yh
+       bltz    a7, .Lsub_opposite_signs
+
+.Lsub_same_sign:       
+       /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
+       ball    xh, a6, .Lsub_xnan_or_inf
+       ball    yh, a6, .Lsub_ynan_or_inf
+
+       /* Compare the operands.  In contrast to addition, the entire
+          value matters here.  */
+       extui   a7, xh, 20, 11
+       extui   a8, yh, 20, 11
+       bltu    xh, yh, .Lsub_xsmaller
+       beq     xh, yh, .Lsub_compare_low
+
+.Lsub_ysmaller:
+       /* Check if the smaller (or equal) exponent is zero.  */
+       bnone   yh, a6, .Lsub_yexpzero
+
+       /* Replace yh sign/exponent with 0x001.  */
+       or      yh, yh, a6
+       slli    yh, yh, 11
+       srli    yh, yh, 11
+
+.Lsub_yexpdiff:
+       /* Compute the exponent difference.  Optimize for difference < 32.  */
+       sub     a10, a7, a8
+       bgeui   a10, 32, .Lsub_bigshifty
+       
+       /* Shift yh/yl right by the exponent difference.  Any bits that are
+          shifted out of yl are saved in a9 for rounding the result.  */
+       ssr     a10
+       movi    a9, 0
+       src     a9, yl, a9
+       src     yl, yh, yl
+       srl     yh, yh
+
+.Lsub_suby:
+       /* Do the 64-bit subtraction.  */
+       sub     xh, xh, yh
+       bgeu    xl, yl, 1f
+       addi    xh, xh, -1
+1:     sub     xl, xl, yl
+
+       /* Subtract the leftover bits in a9 from zero and propagate any
+          borrow from xh/xl.  */
+       neg     a9, a9
+       beqz    a9, 1f
+       addi    a5, xh, -1
+       moveqz  xh, a5, xl
+       addi    xl, xl, -1
+1:
+       /* Check if the subtract underflowed into the exponent.  */
+       extui   a10, xh, 20, 11
+       beq     a10, a7, .Lsub_round
+       j       .Lsub_borrow
+
+.Lsub_compare_low:
+       /* The high words are equal.  Compare the low words.  */
+       bltu    xl, yl, .Lsub_xsmaller
+       bltu    yl, xl, .Lsub_ysmaller
+       /* The operands are equal.  Return 0.0.  */
+       movi    xh, 0
+       movi    xl, 0
+1:     leaf_return
+
+.Lsub_yexpzero:
+       /* y is a subnormal value.  Replace its sign/exponent with zero,
+          i.e., no implicit "1.0".  Unless x is also a subnormal, increment
+          y's apparent exponent because subnormals behave as if they had
+          the minimum (nonzero) exponent.  */
+       slli    yh, yh, 12
+       srli    yh, yh, 12
+       bnone   xh, a6, .Lsub_yexpdiff
+       addi    a8, a8, 1
+       j       .Lsub_yexpdiff
+
+.Lsub_bigshifty:
+       /* Exponent difference > 64 -- just return the bigger value.  */
+       bgeui   a10, 64, 1b
+
+       /* Shift yh/yl right by the exponent difference.  Any bits that are
+          shifted out are saved in a9 for rounding the result.  */
+       ssr     a10
+       sll     a11, yl         /* lost bits shifted out of yl */
+       src     a9, yh, yl
+       srl     yl, yh
+       movi    yh, 0
+       beqz    a11, .Lsub_suby
+       or      a9, a9, a10     /* any positive, nonzero value will work */
+       j       .Lsub_suby
+
+.Lsub_xsmaller:
+       /* Same thing as the "ysmaller" code, but with x and y swapped and
+          with y negated.  */
+       bnone   xh, a6, .Lsub_xexpzero
+
+       or      xh, xh, a6
+       slli    xh, xh, 11
+       srli    xh, xh, 11
+
+.Lsub_xexpdiff:
+       sub     a10, a8, a7
+       bgeui   a10, 32, .Lsub_bigshiftx
+       
+       ssr     a10
+       movi    a9, 0
+       src     a9, xl, a9
+       src     xl, xh, xl
+       srl     xh, xh
+
+       /* Negate y.  */
+       slli    a11, a6, 11
+       xor     yh, yh, a11
+
+.Lsub_subx:
+       sub     xl, yl, xl
+       sub     xh, yh, xh
+       bgeu    yl, xl, 1f
+       addi    xh, xh, -1
+1:
+       /* Subtract the leftover bits in a9 from zero and propagate any
+          borrow from xh/xl.  */
+       neg     a9, a9
+       beqz    a9, 1f
+       addi    a5, xh, -1
+       moveqz  xh, a5, xl
+       addi    xl, xl, -1
+1:
+       /* Check if the subtract underflowed into the exponent.  */
+       extui   a10, xh, 20, 11
+       bne     a10, a8, .Lsub_borrow
+
+.Lsub_round:
+       /* Round up if the leftover fraction is >= 1/2.  */
+       bgez    a9, 1f
+       addi    xl, xl, 1
+       beqz    xl, .Lsub_roundcarry
+
+       /* Check if the leftover fraction is exactly 1/2.  */
+       slli    a9, a9, 1
+       beqz    a9, .Lsub_exactlyhalf
+1:     leaf_return
+
+.Lsub_xexpzero:
+       /* Same as "yexpzero".  */
+       slli    xh, xh, 12
+       srli    xh, xh, 12
+       bnone   yh, a6, .Lsub_xexpdiff
+       addi    a7, a7, 1
+       j       .Lsub_xexpdiff
+
+.Lsub_bigshiftx:
+       /* Mostly the same thing as "bigshifty", but with the sign bit of the
+          shifted value set so that the subsequent subtraction flips the
+          sign of y.  */
+       bgeui   a10, 64, .Lsub_returny
+
+       ssr     a10
+       sll     a11, xl
+       src     a9, xh, xl
+       srl     xl, xh
+       slli    xh, a6, 11      /* set sign bit of xh */
+       beqz    a11, .Lsub_subx
+       or      a9, a9, a10
+       j       .Lsub_subx
+
+.Lsub_returny:
+       /* Negate and return y.  */
+       slli    a7, a6, 11
+       xor     xh, yh, a7
+       mov     xl, yl
+       leaf_return
+
+.Lsub_borrow:  
+       /* The subtraction has underflowed into the exponent field, so the
+          value needs to be renormalized.  Shift the mantissa left as
+          needed to remove any leading zeros and adjust the exponent
+          accordingly.  If the exponent is not large enough to remove
+          all the leading zeros, the result will be a subnormal value.  */
+
+       slli    a8, xh, 12
+       beqz    a8, .Lsub_xhzero
+       do_nsau a6, a8, a7, a11
+       srli    a8, a8, 12
+       bge     a6, a10, .Lsub_subnormal
+       addi    a6, a6, 1
+
+.Lsub_shift_lt32:
+       /* Shift the mantissa (a8/xl/a9) left by a6.  */
+       ssl     a6
+       src     a8, a8, xl
+       src     xl, xl, a9
+       sll     a9, a9
+
+       /* Combine the shifted mantissa with the sign and exponent,
+          decrementing the exponent by a6.  (The exponent has already
+          been decremented by one due to the borrow from the subtraction,
+          but adding the mantissa will increment the exponent by one.)  */
+       srli    xh, xh, 20
+       sub     xh, xh, a6
+       slli    xh, xh, 20
+       add     xh, xh, a8
+       j       .Lsub_round
+
+.Lsub_exactlyhalf:
+       /* Round down to the nearest even value.  */
+       srli    xl, xl, 1
+       slli    xl, xl, 1
+       leaf_return
+
+.Lsub_roundcarry:
+       /* xl is always zero when the rounding increment overflows, so
+          there's no need to round it to an even value.  */
+       addi    xh, xh, 1
+       /* Overflow to the exponent is OK.  */
+       leaf_return
+
+.Lsub_xhzero:
+       /* When normalizing the result, all the mantissa bits in the high
+          word are zero.  Shift by "20 + (leading zero count of xl) + 1".  */
+       do_nsau a6, xl, a7, a11
+       addi    a6, a6, 21
+       blt     a10, a6, .Lsub_subnormal
+
+.Lsub_normalize_shift:
+       bltui   a6, 32, .Lsub_shift_lt32
+
+       ssl     a6
+       src     a8, xl, a9
+       sll     xl, a9
+       movi    a9, 0
+
+       srli    xh, xh, 20
+       sub     xh, xh, a6
+       slli    xh, xh, 20
+       add     xh, xh, a8
+       j       .Lsub_round
+
+.Lsub_subnormal:
+       /* The exponent is too small to shift away all the leading zeros.
+          Set a6 to the current exponent (which has already been
+          decremented by the borrow) so that the exponent of the result
+          will be zero.  Do not add 1 to a6 in this case, because: (1)
+          adding the mantissa will not increment the exponent, so there is
+          no need to subtract anything extra from the exponent to
+          compensate, and (2) the effective exponent of a subnormal is 1
+          not 0 so the shift amount must be 1 smaller than normal. */
+       mov     a6, a10
+       j       .Lsub_normalize_shift
+
+#endif /* L_addsubdf3 */
+
+#ifdef L_muldf3
+
+       /* Multiplication */
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+__muldf3_aux:
+
+       /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+          (This code is placed before the start of the function just to
+          keep it in range of the limited branch displacements.)  */
+
+.Lmul_xexpzero:
+       /* Clear the sign bit of x.  */
+       slli    xh, xh, 1
+       srli    xh, xh, 1
+
+       /* If x is zero, return zero.  */
+       or      a10, xh, xl
+       beqz    a10, .Lmul_return_zero
+
+       /* Normalize x.  Adjust the exponent in a8.  */
+       beqz    xh, .Lmul_xh_zero
+       do_nsau a10, xh, a11, a12
+       addi    a10, a10, -11
+       ssl     a10
+       src     xh, xh, xl
+       sll     xl, xl
+       movi    a8, 1
+       sub     a8, a8, a10
+       j       .Lmul_xnormalized       
+.Lmul_xh_zero:
+       do_nsau a10, xl, a11, a12
+       addi    a10, a10, -11
+       movi    a8, -31
+       sub     a8, a8, a10
+       ssl     a10
+       bltz    a10, .Lmul_xl_srl
+       sll     xh, xl
+       movi    xl, 0
+       j       .Lmul_xnormalized
+.Lmul_xl_srl:
+       srl     xh, xl
+       sll     xl, xl
+       j       .Lmul_xnormalized
+       
+.Lmul_yexpzero:
+       /* Clear the sign bit of y.  */
+       slli    yh, yh, 1
+       srli    yh, yh, 1
+
+       /* If y is zero, return zero.  */
+       or      a10, yh, yl
+       beqz    a10, .Lmul_return_zero
+
+       /* Normalize y.  Adjust the exponent in a9.  */
+       beqz    yh, .Lmul_yh_zero
+       do_nsau a10, yh, a11, a12
+       addi    a10, a10, -11
+       ssl     a10
+       src     yh, yh, yl
+       sll     yl, yl
+       movi    a9, 1
+       sub     a9, a9, a10
+       j       .Lmul_ynormalized       
+.Lmul_yh_zero:
+       do_nsau a10, yl, a11, a12
+       addi    a10, a10, -11
+       movi    a9, -31
+       sub     a9, a9, a10
+       ssl     a10
+       bltz    a10, .Lmul_yl_srl
+       sll     yh, yl
+       movi    yl, 0
+       j       .Lmul_ynormalized
+.Lmul_yl_srl:
+       srl     yh, yl
+       sll     yl, yl
+       j       .Lmul_ynormalized       
+
+.Lmul_return_zero:
+       /* Return zero with the appropriate sign bit.  */
+       srli    xh, a7, 31
+       slli    xh, xh, 31
+       movi    xl, 0
+       j       .Lmul_done
+
+.Lmul_xnan_or_inf:
+       /* If y is zero, return NaN.  */
+       bnez    yl, 1f
+       slli    a8, yh, 1
+       bnez    a8, 1f
+       movi    a4, 0x80000     /* make it a quiet NaN */
+       or      xh, xh, a4
+       j       .Lmul_done
+1:
+       /* If y is NaN, return y.  */
+       bnall   yh, a6, .Lmul_returnx
+       slli    a8, yh, 12
+       or      a8, a8, yl
+       beqz    a8, .Lmul_returnx
+
+.Lmul_returny:
+       mov     xh, yh
+       mov     xl, yl
+
+.Lmul_returnx:
+       /* Set the sign bit and return.  */
+       extui   a7, a7, 31, 1
+       slli    xh, xh, 1
+       ssai    1
+       src     xh, a7, xh
+       j       .Lmul_done
+
+.Lmul_ynan_or_inf:
+       /* If x is zero, return NaN.  */
+       bnez    xl, .Lmul_returny
+       slli    a8, xh, 1
+       bnez    a8, .Lmul_returny
+       movi    a7, 0x80000     /* make it a quiet NaN */
+       or      xh, yh, a7
+       j       .Lmul_done
+
+       .align  4
+       .global __muldf3
+       .type   __muldf3, @function
+__muldf3:
+#if __XTENSA_CALL0_ABI__
+       leaf_entry sp, 32
+       addi    sp, sp, -32
+       s32i    a12, sp, 16
+       s32i    a13, sp, 20
+       s32i    a14, sp, 24
+       s32i    a15, sp, 28
+#elif XCHAL_NO_MUL
+       /* This is not really a leaf function; allocate enough stack space
+          to allow CALL12s to a helper function.  */
+       leaf_entry sp, 64
+#else
+       leaf_entry sp, 32
+#endif
+       movi    a6, 0x7ff00000
+
+       /* Get the sign of the result.  */
+       xor     a7, xh, yh
+
+       /* Check for NaN and infinity.  */
+       ball    xh, a6, .Lmul_xnan_or_inf
+       ball    yh, a6, .Lmul_ynan_or_inf
+
+       /* Extract the exponents.  */
+       extui   a8, xh, 20, 11
+       extui   a9, yh, 20, 11
+
+       beqz    a8, .Lmul_xexpzero
+.Lmul_xnormalized:     
+       beqz    a9, .Lmul_yexpzero
+.Lmul_ynormalized:     
+
+       /* Add the exponents.  */
+       add     a8, a8, a9
+
+       /* Replace sign/exponent fields with explicit "1.0".  */
+       movi    a10, 0x1fffff
+       or      xh, xh, a6
+       and     xh, xh, a10
+       or      yh, yh, a6
+       and     yh, yh, a10
+
+       /* Multiply 64x64 to 128 bits.  The result ends up in xh/xl/a6.
+          The least-significant word of the result is thrown away except
+          that if it is nonzero, the lsb of a6 is set to 1.  */
+#if XCHAL_HAVE_MUL32_HIGH
+
+       /* Compute a6 with any carry-outs in a10.  */
+       movi    a10, 0
+       mull    a6, xl, yh
+       mull    a11, xh, yl
+       add     a6, a6, a11
+       bgeu    a6, a11, 1f
+       addi    a10, a10, 1
+1:
+       muluh   a11, xl, yl
+       add     a6, a6, a11
+       bgeu    a6, a11, 1f
+       addi    a10, a10, 1
+1:     
+       /* If the low word of the result is nonzero, set the lsb of a6.  */
+       mull    a11, xl, yl
+       beqz    a11, 1f
+       movi    a9, 1
+       or      a6, a6, a9
+1:
+       /* Compute xl with any carry-outs in a9.  */
+       movi    a9, 0
+       mull    a11, xh, yh
+       add     a10, a10, a11
+       bgeu    a10, a11, 1f
+       addi    a9, a9, 1
+1:     
+       muluh   a11, xh, yl
+       add     a10, a10, a11
+       bgeu    a10, a11, 1f
+       addi    a9, a9, 1
+1:     
+       muluh   xl, xl, yh
+       add     xl, xl, a10
+       bgeu    xl, a10, 1f
+       addi    a9, a9, 1
+1:
+       /* Compute xh.  */
+       muluh   xh, xh, yh
+       add     xh, xh, a9
+
+#else /* ! XCHAL_HAVE_MUL32_HIGH */
+
+       /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
+          products.  These partial products are:
+
+               0 xll * yll
+
+               1 xll * ylh
+               2 xlh * yll
+
+               3 xll * yhl
+               4 xlh * ylh
+               5 xhl * yll
+
+               6 xll * yhh
+               7 xlh * yhl
+               8 xhl * ylh
+               9 xhh * yll
+
+               10 xlh * yhh
+               11 xhl * yhl
+               12 xhh * ylh
+
+               13 xhl * yhh
+               14 xhh * yhl
+
+               15 xhh * yhh
+
+          where the input chunks are (hh, hl, lh, ll).  If using the Mul16
+          or Mul32 multiplier options, these input chunks must be stored in
+          separate registers.  For Mac16, the UMUL.AA.* opcodes can specify
+          that the inputs come from either half of the registers, so there
+          is no need to shift them out ahead of time.  If there is no
+          multiply hardware, the 16-bit chunks can be extracted when setting
+          up the arguments to the separate multiply function.  */
+
+       /* Save a7 since it is needed to hold a temporary value.  */
+       s32i    a7, sp, 4
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+       /* Calling a separate multiply function will clobber a0 and requires
+          use of a8 as a temporary, so save those values now.  (The function
+          uses a custom ABI so nothing else needs to be saved.)  */
+       s32i    a0, sp, 0
+       s32i    a8, sp, 8
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define xlh a12
+#define ylh a13
+#define xhh a14
+#define yhh a15
+
+       /* Get the high halves of the inputs into registers.  */
+       srli    xlh, xl, 16
+       srli    ylh, yl, 16
+       srli    xhh, xh, 16
+       srli    yhh, yh, 16
+
+#define xll xl
+#define yll yl
+#define xhl xh
+#define yhl yh
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+       /* Clear the high halves of the inputs.  This does not matter
+          for MUL16 because the high bits are ignored.  */
+       extui   xl, xl, 0, 16
+       extui   xh, xh, 0, 16
+       extui   yl, yl, 0, 16
+       extui   yh, yh, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+       mul16u  dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+       mull    dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+   a period in the definition of do_mul below.  These macros are a workaround
+   using underscores instead of periods when doing the concatenation.  */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+       umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
+       rsr     dst, ACCLO
+
+#else /* no multiply hardware */
+       
+#define set_arg_l(dst, src) \
+       extui   dst, src, 0, 16
+#define set_arg_h(dst, src) \
+       srli    dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+       set_arg_ ## xhalf (a13, xreg); \
+       set_arg_ ## yhalf (a14, yreg); \
+       call0   .Lmul_mulsi3; \
+       mov     dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+       set_arg_ ## xhalf (a14, xreg); \
+       set_arg_ ## yhalf (a15, yreg); \
+       call12  .Lmul_mulsi3; \
+       mov     dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+       /* Add pp1 and pp2 into a10 with carry-out in a9.  */
+       do_mul(a10, xl, l, yl, h)       /* pp 1 */
+       do_mul(a11, xl, h, yl, l)       /* pp 2 */
+       movi    a9, 0
+       add     a10, a10, a11
+       bgeu    a10, a11, 1f
+       addi    a9, a9, 1
+1:
+       /* Initialize a6 with a9/a10 shifted into position.  Note that
+          this value can be safely incremented without any carry-outs.  */
+       ssai    16
+       src     a6, a9, a10
+
+       /* Compute the low word into a10.  */
+       do_mul(a11, xl, l, yl, l)       /* pp 0 */
+       sll     a10, a10
+       add     a10, a10, a11
+       bgeu    a10, a11, 1f
+       addi    a6, a6, 1
+1:
+       /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
+          This is good enough to determine the low half of a6, so that any
+          nonzero bits from the low word of the result can be collapsed
+          into a6, freeing up a register.  */
+       movi    a9, 0
+       do_mul(a11, xl, l, yh, l)       /* pp 3 */
+       add     a6, a6, a11
+       bgeu    a6, a11, 1f
+       addi    a9, a9, 1
+1:
+       do_mul(a11, xl, h, yl, h)       /* pp 4 */
+       add     a6, a6, a11
+       bgeu    a6, a11, 1f
+       addi    a9, a9, 1
+1:
+       do_mul(a11, xh, l, yl, l)       /* pp 5 */
+       add     a6, a6, a11
+       bgeu    a6, a11, 1f
+       addi    a9, a9, 1
+1:
+       /* Collapse any nonzero bits from the low word into a6.  */
+       beqz    a10, 1f
+       movi    a11, 1
+       or      a6, a6, a11
+1:
+       /* Add pp6-9 into a11 with carry-outs in a10.  */
+       do_mul(a7, xl, l, yh, h)        /* pp 6 */
+       do_mul(a11, xh, h, yl, l)       /* pp 9 */
+       movi    a10, 0
+       add     a11, a11, a7
+       bgeu    a11, a7, 1f
+       addi    a10, a10, 1
+1:     
+       do_mul(a7, xl, h, yh, l)        /* pp 7 */
+       add     a11, a11, a7
+       bgeu    a11, a7, 1f
+       addi    a10, a10, 1
+1:     
+       do_mul(a7, xh, l, yl, h)        /* pp 8 */
+       add     a11, a11, a7
+       bgeu    a11, a7, 1f
+       addi    a10, a10, 1
+1:     
+       /* Shift a10/a11 into position, and add low half of a11 to a6.  */
+       src     a10, a10, a11
+       add     a10, a10, a9
+       sll     a11, a11
+       add     a6, a6, a11
+       bgeu    a6, a11, 1f
+       addi    a10, a10, 1
+1:
+       /* Add pp10-12 into xl with carry-outs in a9.  */
+       movi    a9, 0
+       do_mul(xl, xl, h, yh, h)        /* pp 10 */
+       add     xl, xl, a10
+       bgeu    xl, a10, 1f
+       addi    a9, a9, 1
+1:
+       do_mul(a10, xh, l, yh, l)       /* pp 11 */
+       add     xl, xl, a10
+       bgeu    xl, a10, 1f
+       addi    a9, a9, 1
+1:
+       do_mul(a10, xh, h, yl, h)       /* pp 12 */
+       add     xl, xl, a10
+       bgeu    xl, a10, 1f
+       addi    a9, a9, 1
+1:
+       /* Add pp13-14 into a11 with carry-outs in a10.  */
+       do_mul(a11, xh, l, yh, h)       /* pp 13 */
+       do_mul(a7, xh, h, yh, l)        /* pp 14 */
+       movi    a10, 0
+       add     a11, a11, a7
+       bgeu    a11, a7, 1f
+       addi    a10, a10, 1
+1:
+       /* Shift a10/a11 into position, and add low half of a11 to a6.  */
+       src     a10, a10, a11
+       add     a10, a10, a9
+       sll     a11, a11
+       add     xl, xl, a11
+       bgeu    xl, a11, 1f
+       addi    a10, a10, 1
+1:
+       /* Compute xh.  */
+       do_mul(xh, xh, h, yh, h)        /* pp 15 */
+       add     xh, xh, a10
+
+       /* Restore values saved on the stack during the multiplication.  */
+       l32i    a7, sp, 4
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+       l32i    a0, sp, 0
+       l32i    a8, sp, 8
+#endif
+#endif /* ! XCHAL_HAVE_MUL32_HIGH */
+
+       /* Shift left by 12 bits, unless there was a carry-out from the
+          multiply, in which case, shift by 11 bits and increment the
+          exponent.  Note: It is convenient to use the constant 0x3ff
+          instead of 0x400 when removing the extra exponent bias (so that
+          it is easy to construct 0x7fe for the overflow check).  Reverse
+          the logic here to decrement the exponent sum by one unless there
+          was a carry-out.  */
+       movi    a4, 11
+       srli    a5, xh, 21 - 12
+       bnez    a5, 1f
+       addi    a4, a4, 1
+       addi    a8, a8, -1
+1:     ssl     a4
+       src     xh, xh, xl
+       src     xl, xl, a6
+       sll     a6, a6
+
+       /* Subtract the extra bias from the exponent sum (plus one to account
+          for the explicit "1.0" of the mantissa that will be added to the
+          exponent in the final result).  */
+       movi    a4, 0x3ff
+       sub     a8, a8, a4
+       
+       /* Check for over/underflow.  The value in a8 is one less than the
+          final exponent, so values in the range 0..7fd are OK here.  */
+       slli    a4, a4, 1       /* 0x7fe */
+       bgeu    a8, a4, .Lmul_overflow
+       
+.Lmul_round:
+       /* Round.  */
+       bgez    a6, .Lmul_rounded
+       addi    xl, xl, 1
+       beqz    xl, .Lmul_roundcarry
+       slli    a6, a6, 1
+       beqz    a6, .Lmul_exactlyhalf
+
+.Lmul_rounded:
+       /* Add the exponent to the mantissa.  */
+       slli    a8, a8, 20
+       add     xh, xh, a8
+
+.Lmul_addsign:
+       /* Add the sign bit.  */
+       srli    a7, a7, 31
+       slli    a7, a7, 31
+       or      xh, xh, a7
+
+.Lmul_done:
+#if __XTENSA_CALL0_ABI__
+       l32i    a12, sp, 16
+       l32i    a13, sp, 20
+       l32i    a14, sp, 24
+       l32i    a15, sp, 28
+       addi    sp, sp, 32
+#endif
+       leaf_return
+
+.Lmul_exactlyhalf:
+       /* Round down to the nearest even value.  */
+       srli    xl, xl, 1
+       slli    xl, xl, 1
+       j       .Lmul_rounded
+
+.Lmul_roundcarry:
+       /* xl is always zero when the rounding increment overflows, so
+          there's no need to round it to an even value.  */
+       addi    xh, xh, 1
+       /* Overflow is OK -- it will be added to the exponent.  */
+       j       .Lmul_rounded
+
+.Lmul_overflow:
+       bltz    a8, .Lmul_underflow
+       /* Return +/- Infinity.  */
+       addi    a8, a4, 1       /* 0x7ff */
+       slli    xh, a8, 20
+       movi    xl, 0
+       j       .Lmul_addsign
+
+.Lmul_underflow:
+       /* Create a subnormal value, where the exponent field contains zero,
+          but the effective exponent is 1.  The value of a8 is one less than
+          the actual exponent, so just negate it to get the shift amount.  */
+       neg     a8, a8
+       mov     a9, a6
+       ssr     a8
+       bgeui   a8, 32, .Lmul_bigshift
+       
+       /* Shift xh/xl right.  Any bits that are shifted out of xl are saved
+          in a6 (combined with the shifted-out bits currently in a6) for
+          rounding the result.  */
+       sll     a6, xl
+       src     xl, xh, xl
+       srl     xh, xh
+       j       1f
+
+.Lmul_bigshift:
+       bgeui   a8, 64, .Lmul_flush_to_zero
+       sll     a10, xl         /* lost bits shifted out of xl */
+       src     a6, xh, xl
+       srl     xl, xh
+       movi    xh, 0
+       or      a9, a9, a10
+
+       /* Set the exponent to zero.  */
+1:     movi    a8, 0
+
+       /* Pack any nonzero bits shifted out into a6.  */
+       beqz    a9, .Lmul_round
+       movi    a9, 1
+       or      a6, a6, a9
+       j       .Lmul_round
+       
+.Lmul_flush_to_zero:
+       /* Return zero with the appropriate sign bit.  */
+       srli    xh, a7, 31
+       slli    xh, xh, 31
+       movi    xl, 0
+       j       .Lmul_done
+
+#if XCHAL_NO_MUL
+       
+       /* For Xtensa processors with no multiply hardware, this simplified
+          version of _mulsi3 is used for multiplying 16-bit chunks of
+          the floating-point mantissas.  When using CALL0, this function
+          uses a custom ABI: the inputs are passed in a13 and a14, the
+          result is returned in a12, and a8 and a15 are clobbered.  */
+       .align  4
+.Lmul_mulsi3:
+       leaf_entry sp, 16
+       .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+       movi    \dst, 0
+1:     add     \tmp1, \src2, \dst
+       extui   \tmp2, \src1, 0, 1
+       movnez  \dst, \tmp1, \tmp2
+
+       do_addx2 \tmp1, \src2, \dst, \tmp1
+       extui   \tmp2, \src1, 1, 1
+       movnez  \dst, \tmp1, \tmp2
+
+       do_addx4 \tmp1, \src2, \dst, \tmp1
+       extui   \tmp2, \src1, 2, 1
+       movnez  \dst, \tmp1, \tmp2
+
+       do_addx8 \tmp1, \src2, \dst, \tmp1
+       extui   \tmp2, \src1, 3, 1
+       movnez  \dst, \tmp1, \tmp2
+
+       srli    \src1, \src1, 4
+       slli    \src2, \src2, 4
+       bnez    \src1, 1b
+       .endm
+#if __XTENSA_CALL0_ABI__
+       mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+       /* The result will be written into a2, so save that argument in a4.  */
+       mov     a4, a2
+       mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+       leaf_return
+#endif /* XCHAL_NO_MUL */
+#endif /* L_muldf3 */
+
+#ifdef L_divdf3
+
+       /* Division */
+__divdf3_aux:
+
+       /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+          (This code is placed before the start of the function just to
+          keep it in range of the limited branch displacements.)  */
+
+.Ldiv_yexpzero:
+       /* Clear the sign bit of y.  */
+       slli    yh, yh, 1
+       srli    yh, yh, 1
+
+       /* Check for division by zero.  */
+       or      a10, yh, yl
+       beqz    a10, .Ldiv_yzero
+
+       /* Normalize y.  Adjust the exponent in a9.  */
+       beqz    yh, .Ldiv_yh_zero
+       do_nsau a10, yh, a11, a9
+       addi    a10, a10, -11
+       ssl     a10
+       src     yh, yh, yl
+       sll     yl, yl
+       movi    a9, 1
+       sub     a9, a9, a10
+       j       .Ldiv_ynormalized       
+.Ldiv_yh_zero:
+       do_nsau a10, yl, a11, a9
+       addi    a10, a10, -11
+       movi    a9, -31
+       sub     a9, a9, a10
+       ssl     a10
+       bltz    a10, .Ldiv_yl_srl
+       sll     yh, yl
+       movi    yl, 0
+       j       .Ldiv_ynormalized
+.Ldiv_yl_srl:
+       srl     yh, yl
+       sll     yl, yl
+       j       .Ldiv_ynormalized       
+
+.Ldiv_yzero:
+       /* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
+       slli    xh, xh, 1
+       srli    xh, xh, 1
+       or      xl, xl, xh
+       srli    xh, a7, 31
+       slli    xh, xh, 31
+       or      xh, xh, a6
+       bnez    xl, 1f
+       movi    a4, 0x80000     /* make it a quiet NaN */
+       or      xh, xh, a4
+1:     movi    xl, 0
+       leaf_return
+
+.Ldiv_xexpzero:
+       /* Clear the sign bit of x.  */
+       slli    xh, xh, 1
+       srli    xh, xh, 1
+
+       /* If x is zero, return zero.  */
+       or      a10, xh, xl
+       beqz    a10, .Ldiv_return_zero
+
+       /* Normalize x.  Adjust the exponent in a8.  */
+       beqz    xh, .Ldiv_xh_zero
+       do_nsau a10, xh, a11, a8
+       addi    a10, a10, -11
+       ssl     a10
+       src     xh, xh, xl
+       sll     xl, xl
+       movi    a8, 1
+       sub     a8, a8, a10
+       j       .Ldiv_xnormalized       
+.Ldiv_xh_zero:
+       do_nsau a10, xl, a11, a8
+       addi    a10, a10, -11
+       movi    a8, -31
+       sub     a8, a8, a10
+       ssl     a10
+       bltz    a10, .Ldiv_xl_srl
+       sll     xh, xl
+       movi    xl, 0
+       j       .Ldiv_xnormalized
+.Ldiv_xl_srl:
+       srl     xh, xl
+       sll     xl, xl
+       j       .Ldiv_xnormalized
+       
+.Ldiv_return_zero:
+       /* Return zero with the appropriate sign bit.  */
+       srli    xh, a7, 31
+       slli    xh, xh, 31
+       movi    xl, 0
+       leaf_return
+
+.Ldiv_xnan_or_inf:
+       /* Set the sign bit of the result.  */
+       srli    a7, yh, 31
+       slli    a7, a7, 31
+       xor     xh, xh, a7
+       /* If y is NaN or Inf, return NaN.  */
+       bnall   yh, a6, 1f
+       movi    a4, 0x80000     /* make it a quiet NaN */
+       or      xh, xh, a4
+1:     leaf_return
+
+.Ldiv_ynan_or_inf:
+       /* If y is Infinity, return zero.  */
+       slli    a8, yh, 12
+       or      a8, a8, yl
+       beqz    a8, .Ldiv_return_zero
+       /* y is NaN; return it.  */
+       mov     xh, yh
+       mov     xl, yl
+       leaf_return
+
+.Ldiv_highequal1:
+       bltu    xl, yl, 2f
+       j       3f
+
+       .align  4
+       .global __divdf3
+       .type   __divdf3, @function
+__divdf3:
+       leaf_entry sp, 16
+       movi    a6, 0x7ff00000
+
+       /* Get the sign of the result.  */
+       xor     a7, xh, yh
+
+       /* Check for NaN and infinity.  */
+       ball    xh, a6, .Ldiv_xnan_or_inf
+       ball    yh, a6, .Ldiv_ynan_or_inf
+
+       /* Extract the exponents.  */
+       extui   a8, xh, 20, 11
+       extui   a9, yh, 20, 11
+
+       beqz    a9, .Ldiv_yexpzero
+.Ldiv_ynormalized:     
+       beqz    a8, .Ldiv_xexpzero
+.Ldiv_xnormalized:     
+
+       /* Subtract the exponents.  */
+       sub     a8, a8, a9
+
+       /* Replace sign/exponent fields with explicit "1.0".  */
+       movi    a10, 0x1fffff
+       or      xh, xh, a6
+       and     xh, xh, a10
+       or      yh, yh, a6
+       and     yh, yh, a10
+
+       /* Set SAR for left shift by one.  */
+       ssai    (32 - 1)
+
+       /* The first digit of the mantissa division must be a one.
+          Shift x (and adjust the exponent) as needed to make this true.  */
+       bltu    yh, xh, 3f
+       beq     yh, xh, .Ldiv_highequal1
+2:     src     xh, xh, xl
+       sll     xl, xl
+       addi    a8, a8, -1
+3:
+       /* Do the first subtraction and shift.  */
+       sub     xh, xh, yh
+       bgeu    xl, yl, 1f
+       addi    xh, xh, -1
+1:     sub     xl, xl, yl
+       src     xh, xh, xl
+       sll     xl, xl
+
+       /* Put the quotient into a10/a11.  */
+       movi    a10, 0
+       movi    a11, 1
+
+       /* Divide one bit at a time for 52 bits.  */
+       movi    a9, 52
+#if XCHAL_HAVE_LOOPS
+       loop    a9, .Ldiv_loopend
+#endif
+.Ldiv_loop:
+       /* Shift the quotient << 1.  */
+       src     a10, a10, a11
+       sll     a11, a11
+
+       /* Is this digit a 0 or 1?  */
+       bltu    xh, yh, 3f
+       beq     xh, yh, .Ldiv_highequal2
+
+       /* Output a 1 and subtract.  */
+2:     addi    a11, a11, 1
+       sub     xh, xh, yh
+       bgeu    xl, yl, 1f
+       addi    xh, xh, -1
+1:     sub     xl, xl, yl
+
+       /* Shift the dividend << 1.  */
+3:     src     xh, xh, xl
+       sll     xl, xl
+
+#if !XCHAL_HAVE_LOOPS
+       addi    a9, a9, -1
+       bnez    a9, .Ldiv_loop
+#endif
+.Ldiv_loopend:
+
+       /* Add the exponent bias (less one to account for the explicit "1.0"
+          of the mantissa that will be added to the exponent in the final
+          result).  */
+       movi    a9, 0x3fe
+       add     a8, a8, a9
+       
+       /* Check for over/underflow.  The value in a8 is one less than the
+          final exponent, so values in the range 0..7fd are OK here.  */
+       addmi   a9, a9, 0x400   /* 0x7fe */
+       bgeu    a8, a9, .Ldiv_overflow
+
+.Ldiv_round:
+       /* Round.  The remainder (<< 1) is in xh/xl.  */
+       bltu    xh, yh, .Ldiv_rounded
+       beq     xh, yh, .Ldiv_highequal3
+.Ldiv_roundup:
+       addi    a11, a11, 1
+       beqz    a11, .Ldiv_roundcarry
+
+.Ldiv_rounded:
+       mov     xl, a11
+       /* Add the exponent to the mantissa.  */
+       slli    a8, a8, 20
+       add     xh, a10, a8
+
+.Ldiv_addsign:
+       /* Add the sign bit.  */
+       srli    a7, a7, 31
+       slli    a7, a7, 31
+       or      xh, xh, a7
+       leaf_return
+
+.Ldiv_highequal2:
+       bgeu    xl, yl, 2b
+       j       3b
+
+.Ldiv_highequal3:
+       bltu    xl, yl, .Ldiv_rounded
+       bne     xl, yl, .Ldiv_roundup
+
+       /* Remainder is exactly half the divisor.  Round even.  */
+       addi    a11, a11, 1
+       beqz    a11, .Ldiv_roundcarry
+       srli    a11, a11, 1
+       slli    a11, a11, 1
+       j       .Ldiv_rounded
+
+.Ldiv_overflow:
+       bltz    a8, .Ldiv_underflow
+       /* Return +/- Infinity.  */
+       addi    a8, a9, 1       /* 0x7ff */
+       slli    xh, a8, 20
+       movi    xl, 0
+       j       .Ldiv_addsign
+
+.Ldiv_underflow:
+       /* Create a subnormal value, where the exponent field contains zero,
+          but the effective exponent is 1.  The value of a8 is one less than
+          the actual exponent, so just negate it to get the shift amount.  */
+       neg     a8, a8
+       ssr     a8
+       bgeui   a8, 32, .Ldiv_bigshift
+       
+       /* Shift a10/a11 right.  Any bits that are shifted out of a11 are
+          saved in a6 for rounding the result.  */
+       sll     a6, a11
+       src     a11, a10, a11
+       srl     a10, a10
+       j       1f
+
+.Ldiv_bigshift:
+       bgeui   a8, 64, .Ldiv_flush_to_zero
+       sll     a9, a11         /* lost bits shifted out of a11 */
+       src     a6, a10, a11
+       srl     a11, a10
+       movi    a10, 0
+       or      xl, xl, a9
+
+       /* Set the exponent to zero.  */
+1:     movi    a8, 0
+
+       /* Pack any nonzero remainder (in xh/xl) into a6.  */
+       or      xh, xh, xl
+       beqz    xh, 1f
+       movi    a9, 1
+       or      a6, a6, a9
+       
+       /* Round a10/a11 based on the bits shifted out into a6.  */
+1:     bgez    a6, .Ldiv_rounded
+       addi    a11, a11, 1
+       beqz    a11, .Ldiv_roundcarry
+       slli    a6, a6, 1
+       bnez    a6, .Ldiv_rounded
+       srli    a11, a11, 1
+       slli    a11, a11, 1
+       j       .Ldiv_rounded
+
+.Ldiv_roundcarry:
+       /* a11 is always zero when the rounding increment overflows, so
+          there's no need to round it to an even value.  */
+       addi    a10, a10, 1
+       /* Overflow to the exponent field is OK.  */
+       j       .Ldiv_rounded
+
+.Ldiv_flush_to_zero:
+       /* Return zero with the appropriate sign bit.  */
+       srli    xh, a7, 31
+       slli    xh, xh, 31
+       movi    xl, 0
+       leaf_return
+
+#endif /* L_divdf3 */
+
+#ifdef L_cmpdf2
+
+       /* Equal and Not Equal */
+
+       .align  4
+       .global __eqdf2
+       .global __nedf2
+       .set    __nedf2, __eqdf2
+       .type   __eqdf2, @function
+__eqdf2:
+       leaf_entry sp, 16
+       bne     xl, yl, 2f
+       bne     xh, yh, 4f
+
+       /* The values are equal but NaN != NaN.  Check the exponent.  */
+       movi    a6, 0x7ff00000
+       ball    xh, a6, 3f
+
+       /* Equal.  */
+       movi    a2, 0
+       leaf_return
+
+       /* Not equal.  */
+2:     movi    a2, 1
+       leaf_return
+
+       /* Check if the mantissas are nonzero.  */
+3:     slli    a7, xh, 12
+       or      a7, a7, xl
+       j       5f
+
+       /* Check if x and y are zero with different signs.  */
+4:     or      a7, xh, yh
+       slli    a7, a7, 1
+       or      a7, a7, xl      /* xl == yl here */
+
+       /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
+          or x when exponent(x) = 0x7ff and x == y.  */
+5:     movi    a2, 0
+       movi    a3, 1
+       movnez  a2, a3, a7      
+       leaf_return
+
+
+       /* Greater Than */
+
+       .align  4
+       .global __gtdf2
+       .type   __gtdf2, @function
+__gtdf2:
+       leaf_entry sp, 16
+       movi    a6, 0x7ff00000
+       ball    xh, a6, 2f
+1:     bnall   yh, a6, .Lle_cmp
+
+       /* Check if y is a NaN.  */
+       slli    a7, yh, 12
+       or      a7, a7, yl
+       beqz    a7, .Lle_cmp
+       movi    a2, 0
+       leaf_return
+
+       /* Check if x is a NaN.  */
+2:     slli    a7, xh, 12
+       or      a7, a7, xl
+       beqz    a7, 1b
+       movi    a2, 0
+       leaf_return
+
+
+       /* Less Than or Equal */
+
+       .align  4
+       .global __ledf2
+       .type   __ledf2, @function
+__ledf2:
+       leaf_entry sp, 16
+       movi    a6, 0x7ff00000
+       ball    xh, a6, 2f
+1:     bnall   yh, a6, .Lle_cmp
+
+       /* Check if y is a NaN.  */
+       slli    a7, yh, 12
+       or      a7, a7, yl
+       beqz    a7, .Lle_cmp
+       movi    a2, 1
+       leaf_return
+
+       /* Check if x is a NaN.  */
+2:     slli    a7, xh, 12
+       or      a7, a7, xl
+       beqz    a7, 1b
+       movi    a2, 1
+       leaf_return
+
+.Lle_cmp:
+       /* Check if x and y have different signs.  */
+       xor     a7, xh, yh
+       bltz    a7, .Lle_diff_signs
+
+       /* Check if x is negative.  */
+       bltz    xh, .Lle_xneg
+
+       /* Check if x <= y.  */
+       bltu    xh, yh, 4f
+       bne     xh, yh, 5f
+       bltu    yl, xl, 5f
+4:     movi    a2, 0
+       leaf_return
+
+.Lle_xneg:
+       /* Check if y <= x.  */
+       bltu    yh, xh, 4b
+       bne     yh, xh, 5f
+       bgeu    xl, yl, 4b
+5:     movi    a2, 1
+       leaf_return
+
+.Lle_diff_signs:
+       bltz    xh, 4b
+
+       /* Check if both x and y are zero.  */
+       or      a7, xh, yh
+       slli    a7, a7, 1
+       or      a7, a7, xl
+       or      a7, a7, yl
+       movi    a2, 1
+       movi    a3, 0
+       moveqz  a2, a3, a7
+       leaf_return
+
+
+       /* Greater Than or Equal */
+
+       .align  4
+       .global __gedf2
+       .type   __gedf2, @function
+__gedf2:
+       leaf_entry sp, 16
+       movi    a6, 0x7ff00000
+       ball    xh, a6, 2f
+1:     bnall   yh, a6, .Llt_cmp
+
+       /* Check if y is a NaN.  */
+       slli    a7, yh, 12
+       or      a7, a7, yl
+       beqz    a7, .Llt_cmp
+       movi    a2, -1
+       leaf_return
+
+       /* Check if x is a NaN.  */
+2:     slli    a7, xh, 12
+       or      a7, a7, xl
+       beqz    a7, 1b
+       movi    a2, -1
+       leaf_return
+
+
+       /* Less Than */
+
+       .align  4
+       .global __ltdf2
+       .type   __ltdf2, @function
+__ltdf2:
+       leaf_entry sp, 16
+       movi    a6, 0x7ff00000
+       ball    xh, a6, 2f
+1:     bnall   yh, a6, .Llt_cmp
+
+       /* Check if y is a NaN.  */
+       slli    a7, yh, 12
+       or      a7, a7, yl
+       beqz    a7, .Llt_cmp
+       movi    a2, 0
+       leaf_return
+
+       /* Check if x is a NaN.  */
+2:     slli    a7, xh, 12
+       or      a7, a7, xl
+       beqz    a7, 1b
+       movi    a2, 0
+       leaf_return
+
+.Llt_cmp:
+       /* Check if x and y have different signs.  */
+       xor     a7, xh, yh
+       bltz    a7, .Llt_diff_signs
+
+       /* Check if x is negative.  */
+       bltz    xh, .Llt_xneg
+
+       /* Check if x < y.  */
+       bltu    xh, yh, 4f
+       bne     xh, yh, 5f
+       bgeu    xl, yl, 5f
+4:     movi    a2, -1
+       leaf_return
+
+.Llt_xneg:
+       /* Check if y < x.  */
+       bltu    yh, xh, 4b
+       bne     yh, xh, 5f
+       bltu    yl, xl, 4b
+5:     movi    a2, 0
+       leaf_return
+
+.Llt_diff_signs:
+       bgez    xh, 5b
+
+       /* Check if both x and y are nonzero.  */
+       or      a7, xh, yh
+       slli    a7, a7, 1
+       or      a7, a7, xl
+       or      a7, a7, yl
+       movi    a2, 0
+       movi    a3, -1
+       movnez  a2, a3, a7
+       leaf_return
+
+
+       /* Unordered */
+
+       .align  4
+       .global __unorddf2
+       .type   __unorddf2, @function
+__unorddf2:
+       leaf_entry sp, 16
+       movi    a6, 0x7ff00000
+       ball    xh, a6, 3f
+1:     ball    yh, a6, 4f
+2:     movi    a2, 0
+       leaf_return
+
+3:     slli    a7, xh, 12
+       or      a7, a7, xl
+       beqz    a7, 1b
+       movi    a2, 1
+       leaf_return
+
+4:     slli    a7, yh, 12
+       or      a7, a7, yl
+       beqz    a7, 2b
+       movi    a2, 1
+       leaf_return
+
+#endif /* L_cmpdf2 */
+
+#ifdef L_fixdfsi
+
+       .align  4
+       .global __fixdfsi
+       .type   __fixdfsi, @function
+__fixdfsi:
+       leaf_entry sp, 16
+
+       /* Check for NaN and Infinity.  */
+       movi    a6, 0x7ff00000
+       ball    xh, a6, .Lfixdfsi_nan_or_inf
+
+       /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32.  */
+       extui   a4, xh, 20, 11
+       extui   a5, a6, 19, 10  /* 0x3fe */
+       sub     a4, a4, a5
+       bgei    a4, 32, .Lfixdfsi_maxint
+       blti    a4, 1, .Lfixdfsi_zero
+
+       /* Add explicit "1.0" and shift << 11.  */
+       or      a7, xh, a6
+       ssai    (32 - 11)
+       src     a5, a7, xl
+
+       /* Shift back to the right, based on the exponent.  */
+       ssl     a4              /* shift by 32 - a4 */
+       srl     a5, a5
+
+       /* Negate the result if sign != 0.  */
+       neg     a2, a5
+       movgez  a2, a5, a7
+       leaf_return
+
+.Lfixdfsi_nan_or_inf:
+       /* Handle Infinity and NaN.  */
+       slli    a4, xh, 12
+       or      a4, a4, xl
+       beqz    a4, .Lfixdfsi_maxint
+
+       /* Translate NaN to +maxint.  */
+       movi    xh, 0
+
+.Lfixdfsi_maxint:
+       slli    a4, a6, 11      /* 0x80000000 */
+       addi    a5, a4, -1      /* 0x7fffffff */
+       movgez  a4, a5, xh
+       mov     a2, a4
+       leaf_return
+
+.Lfixdfsi_zero:
+       movi    a2, 0
+       leaf_return
+
+#endif /* L_fixdfsi */
+
+#ifdef L_fixdfdi
+
+       .align  4
+       .global __fixdfdi
+       .type   __fixdfdi, @function
+__fixdfdi:
+       leaf_entry sp, 16
+
+       /* Check for NaN and Infinity.  */
+       movi    a6, 0x7ff00000
+       ball    xh, a6, .Lfixdfdi_nan_or_inf
+
+       /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64.  */
+       extui   a4, xh, 20, 11
+       extui   a5, a6, 19, 10  /* 0x3fe */
+       sub     a4, a4, a5
+       bgei    a4, 64, .Lfixdfdi_maxint
+       blti    a4, 1, .Lfixdfdi_zero
+
+       /* Add explicit "1.0" and shift << 11.  */
+       or      a7, xh, a6
+       ssai    (32 - 11)
+       src     xh, a7, xl
+       sll     xl, xl
+
+       /* Shift back to the right, based on the exponent.  */
+       ssl     a4              /* shift by 64 - a4 */
+       bgei    a4, 32, .Lfixdfdi_smallshift
+       srl     xl, xh
+       movi    xh, 0
+
+.Lfixdfdi_shifted:     
+       /* Negate the result if sign != 0.  */
+       bgez    a7, 1f
+       neg     xl, xl
+       neg     xh, xh
+       beqz    xl, 1f
+       addi    xh, xh, -1
+1:     leaf_return
+
+.Lfixdfdi_smallshift:
+       src     xl, xh, xl
+       srl     xh, xh
+       j       .Lfixdfdi_shifted
+
+.Lfixdfdi_nan_or_inf:
+       /* Handle Infinity and NaN.  */
+       slli    a4, xh, 12
+       or      a4, a4, xl
+       beqz    a4, .Lfixdfdi_maxint
+
+       /* Translate NaN to +maxint.  */
+       movi    xh, 0
+
+.Lfixdfdi_maxint:
+       slli    a7, a6, 11      /* 0x80000000 */
+       bgez    xh, 1f
+       mov     xh, a7
+       movi    xl, 0
+       leaf_return
+
+1:     addi    xh, a7, -1      /* 0x7fffffff */
+       movi    xl, -1
+       leaf_return
+
+.Lfixdfdi_zero:
+       movi    xh, 0
+       movi    xl, 0
+       leaf_return
+
+#endif /* L_fixdfdi */
+
+#ifdef L_fixunsdfsi
+
+       .align  4
+       .global __fixunsdfsi
+       .type   __fixunsdfsi, @function
+__fixunsdfsi:
+       leaf_entry sp, 16
+
+       /* Check for NaN and Infinity.  */
+       movi    a6, 0x7ff00000
+       ball    xh, a6, .Lfixunsdfsi_nan_or_inf
+
+       /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32.  */
+       extui   a4, xh, 20, 11
+       extui   a5, a6, 20, 10  /* 0x3ff */
+       sub     a4, a4, a5
+       bgei    a4, 32, .Lfixunsdfsi_maxint
+       bltz    a4, .Lfixunsdfsi_zero
+
+       /* Add explicit "1.0" and shift << 11.  */
+       or      a7, xh, a6
+       ssai    (32 - 11)
+       src     a5, a7, xl
+
+       /* Shift back to the right, based on the exponent.  */
+       addi    a4, a4, 1
+       beqi    a4, 32, .Lfixunsdfsi_bigexp
+       ssl     a4              /* shift by 32 - a4 */
+       srl     a5, a5
+
+       /* Negate the result if sign != 0.  */
+       neg     a2, a5
+       movgez  a2, a5, a7
+       leaf_return
+
+.Lfixunsdfsi_nan_or_inf:
+       /* Handle Infinity and NaN.  */
+       slli    a4, xh, 12
+       or      a4, a4, xl
+       beqz    a4, .Lfixunsdfsi_maxint
+
+       /* Translate NaN to 0xffffffff.  */
+       movi    a2, -1
+       leaf_return
+
+.Lfixunsdfsi_maxint:
+       slli    a4, a6, 11      /* 0x80000000 */
+       movi    a5, -1          /* 0xffffffff */
+       movgez  a4, a5, xh
+       mov     a2, a4
+       leaf_return
+
+.Lfixunsdfsi_zero:
+       movi    a2, 0
+       leaf_return
+
+.Lfixunsdfsi_bigexp:
+       /* Handle unsigned maximum exponent case.  */
+       bltz    xh, 1f
+       mov     a2, a5          /* no shift needed */
+       leaf_return
+
+       /* Return 0x80000000 if negative.  */
+1:     slli    a2, a6, 11
+       leaf_return
+
+#endif /* L_fixunsdfsi */
+
+#ifdef L_fixunsdfdi
+
+       .align  4
+       .global __fixunsdfdi
+       .type   __fixunsdfdi, @function
+__fixunsdfdi:
+       leaf_entry sp, 16
+
+       /* Check for NaN and Infinity.  */
+       movi    a6, 0x7ff00000
+       ball    xh, a6, .Lfixunsdfdi_nan_or_inf
+
+       /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64.  */
+       extui   a4, xh, 20, 11
+       extui   a5, a6, 20, 10  /* 0x3ff */
+       sub     a4, a4, a5
+       bgei    a4, 64, .Lfixunsdfdi_maxint
+       bltz    a4, .Lfixunsdfdi_zero
+
+       /* Add explicit "1.0" and shift << 11.  */
+       or      a7, xh, a6
+       ssai    (32 - 11)
+       src     xh, a7, xl
+       sll     xl, xl
+
+       /* Shift back to the right, based on the exponent.  */
+       addi    a4, a4, 1
+       beqi    a4, 64, .Lfixunsdfdi_bigexp
+       ssl     a4              /* shift by 64 - a4 */
+       bgei    a4, 32, .Lfixunsdfdi_smallshift
+       srl     xl, xh
+       movi    xh, 0
+
+.Lfixunsdfdi_shifted:
+       /* Negate the result if sign != 0.  */
+       bgez    a7, 1f
+       neg     xl, xl
+       neg     xh, xh
+       beqz    xl, 1f
+       addi    xh, xh, -1
+1:     leaf_return
+
+.Lfixunsdfdi_smallshift:
+       src     xl, xh, xl
+       srl     xh, xh
+       j       .Lfixunsdfdi_shifted
+
+.Lfixunsdfdi_nan_or_inf:
+       /* Handle Infinity and NaN.  */
+       slli    a4, xh, 12
+       or      a4, a4, xl
+       beqz    a4, .Lfixunsdfdi_maxint
+
+       /* Translate NaN to 0xffffffff.... */
+1:     movi    xh, -1
+       movi    xl, -1
+       leaf_return
+
+.Lfixunsdfdi_maxint:
+       bgez    xh, 1b
+2:     slli    xh, a6, 11      /* 0x80000000 */
+       movi    xl, 0
+       leaf_return
+
+.Lfixunsdfdi_zero:
+       movi    xh, 0
+       movi    xl, 0
+       leaf_return
+
+.Lfixunsdfdi_bigexp:
+       /* Handle unsigned maximum exponent case.  */
+       bltz    a7, 2b
+       leaf_return             /* no shift needed */
+
+#endif /* L_fixunsdfdi */
+
+#ifdef L_floatsidf
+
+       .align  4
+       .global __floatunsidf
+       .type   __floatunsidf, @function
+__floatunsidf:
+       leaf_entry sp, 16
+       beqz    a2, .Lfloatsidf_return_zero
+
+       /* Set the sign to zero and jump to the floatsidf code.  */
+       movi    a7, 0
+       j       .Lfloatsidf_normalize
+
+       .align  4
+       .global __floatsidf
+       .type   __floatsidf, @function
+__floatsidf:
+       leaf_entry sp, 16
+
+       /* Check for zero.  */
+       beqz    a2, .Lfloatsidf_return_zero
+
+       /* Save the sign.  */
+       extui   a7, a2, 31, 1
+
+       /* Get the absolute value.  */
+#if XCHAL_HAVE_ABS
+       abs     a2, a2
+#else
+       neg     a4, a2
+       movltz  a2, a4, a2
+#endif
+
+.Lfloatsidf_normalize:
+       /* Normalize with the first 1 bit in the msb.  */
+       do_nsau a4, a2, a5, a6
+       ssl     a4
+       sll     a5, a2
+
+       /* Shift the mantissa into position.  */
+       srli    xh, a5, 11
+       slli    xl, a5, (32 - 11)
+
+       /* Set the exponent.  */
+       movi    a5, 0x41d       /* 0x3fe + 31 */
+       sub     a5, a5, a4
+       slli    a5, a5, 20
+       add     xh, xh, a5
+
+       /* Add the sign and return. */
+       slli    a7, a7, 31
+       or      xh, xh, a7
+       leaf_return
+
+.Lfloatsidf_return_zero:
+       movi    a3, 0
+       leaf_return
+
+#endif /* L_floatsidf */
+
+#ifdef L_floatdidf
+
+       .align  4
+       .global __floatundidf
+       .type   __floatundidf, @function
+__floatundidf:
+       leaf_entry sp, 16
+
+       /* Check for zero.  */
+       or      a4, xh, xl
+       beqz    a4, 2f
+
+       /* Set the sign to zero and jump to the floatdidf code.  */
+       movi    a7, 0
+       j       .Lfloatdidf_normalize
+
+       .align  4
+       .global __floatdidf
+       .type   __floatdidf, @function
+__floatdidf:
+       leaf_entry sp, 16
+
+       /* Check for zero.  */
+       or      a4, xh, xl
+       beqz    a4, 2f
+
+       /* Save the sign.  */
+       extui   a7, xh, 31, 1
+
+       /* Get the absolute value.  */
+       bgez    xh, .Lfloatdidf_normalize
+       neg     xl, xl
+       neg     xh, xh
+       beqz    xl, .Lfloatdidf_normalize
+       addi    xh, xh, -1
+
+.Lfloatdidf_normalize:
+       /* Normalize with the first 1 bit in the msb of xh.  */
+       beqz    xh, .Lfloatdidf_bigshift
+       do_nsau a4, xh, a5, a6
+       ssl     a4
+       src     xh, xh, xl
+       sll     xl, xl
+
+.Lfloatdidf_shifted:
+       /* Shift the mantissa into position, with rounding bits in a6.  */
+       ssai    11
+       sll     a6, xl
+       src     xl, xh, xl
+       srl     xh, xh
+
+       /* Set the exponent.  */
+       movi    a5, 0x43d       /* 0x3fe + 63 */
+       sub     a5, a5, a4
+       slli    a5, a5, 20
+       add     xh, xh, a5
+
+       /* Add the sign.  */
+       slli    a7, a7, 31
+       or      xh, xh, a7
+
+       /* Round up if the leftover fraction is >= 1/2.  */
+       bgez    a6, 2f
+       addi    xl, xl, 1
+       beqz    xl, .Lfloatdidf_roundcarry
+
+       /* Check if the leftover fraction is exactly 1/2.  */
+       slli    a6, a6, 1
+       beqz    a6, .Lfloatdidf_exactlyhalf
+2:     leaf_return
+
+.Lfloatdidf_bigshift:
+       /* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
+       do_nsau a4, xl, a5, a6
+       ssl     a4
+       sll     xh, xl
+       movi    xl, 0
+       addi    a4, a4, 32
+       j       .Lfloatdidf_shifted
+
+.Lfloatdidf_exactlyhalf:
+       /* Round down to the nearest even value.  */
+       srli    xl, xl, 1
+       slli    xl, xl, 1
+       leaf_return
+
+.Lfloatdidf_roundcarry:
+       /* xl is always zero when the rounding increment overflows, so
+          there's no need to round it to an even value.  */
+       addi    xh, xh, 1
+       /* Overflow to the exponent is OK.  */
+       leaf_return
+
+#endif /* L_floatdidf */
+
+#ifdef L_truncdfsf2
+
+       .align  4
+       .global __truncdfsf2
+       .type   __truncdfsf2, @function
+__truncdfsf2:
+       leaf_entry sp, 16
+
+       /* Adjust the exponent bias.  */
+       movi    a4, (0x3ff - 0x7f) << 20
+       sub     a5, xh, a4
+
+       /* Check for underflow.  */
+       xor     a6, xh, a5
+       bltz    a6, .Ltrunc_underflow
+       extui   a6, a5, 20, 11
+       beqz    a6, .Ltrunc_underflow
+
+       /* Check for overflow.  */
+       movi    a4, 255
+       bge     a6, a4, .Ltrunc_overflow
+
+       /* Shift a5/xl << 3 into a5/a4.  */
+       ssai    (32 - 3)
+       src     a5, a5, xl
+       sll     a4, xl
+
+.Ltrunc_addsign:
+       /* Add the sign bit.  */
+       extui   a6, xh, 31, 1
+       slli    a6, a6, 31
+       or      a2, a6, a5
+
+       /* Round up if the leftover fraction is >= 1/2.  */
+       bgez    a4, 1f
+       addi    a2, a2, 1
+       /* Overflow to the exponent is OK.  The answer will be correct.  */
+
+       /* Check if the leftover fraction is exactly 1/2.  */
+       slli    a4, a4, 1
+       beqz    a4, .Ltrunc_exactlyhalf
+1:     leaf_return
+
+.Ltrunc_exactlyhalf:
+       /* Round down to the nearest even value.  */
+       srli    a2, a2, 1
+       slli    a2, a2, 1
+       leaf_return
+
+.Ltrunc_overflow:
+       /* Check if exponent == 0x7ff.  */
+       movi    a4, 0x7ff00000
+       bnall   xh, a4, 1f
+
+       /* Check if mantissa is nonzero.  */
+       slli    a5, xh, 12
+       or      a5, a5, xl
+       beqz    a5, 1f
+
+       /* Shift a4 to set a bit in the mantissa, making a quiet NaN.  */
+       srli    a4, a4, 1
+
+1:     slli    a4, a4, 4       /* 0xff000000 or 0xff800000 */
+       /* Add the sign bit.  */
+       extui   a6, xh, 31, 1
+       ssai    1
+       src     a2, a6, a4
+       leaf_return
+
+.Ltrunc_underflow:
+       /* Find shift count for a subnormal.  Flush to zero if >= 32.  */
+       extui   a6, xh, 20, 11
+       movi    a5, 0x3ff - 0x7f
+       sub     a6, a5, a6
+       addi    a6, a6, 1
+       bgeui   a6, 32, 1f
+
+       /* Replace the exponent with an explicit "1.0".  */
+       slli    a5, a5, 13      /* 0x700000 */
+       or      a5, a5, xh
+       slli    a5, a5, 11
+       srli    a5, a5, 11
+
+       /* Shift the mantissa left by 3 bits (into a5/a4).  */
+       ssai    (32 - 3)
+       src     a5, a5, xl
+       sll     a4, xl
+
+       /* Shift right by a6.  */
+       ssr     a6
+       sll     a7, a4
+       src     a4, a5, a4
+       srl     a5, a5
+       beqz    a7, .Ltrunc_addsign
+       or      a4, a4, a6      /* any positive, nonzero value will work */
+       j       .Ltrunc_addsign
+
+       /* Return +/- zero.  */
+1:     extui   a2, xh, 31, 1
+       slli    a2, a2, 31
+       leaf_return
+
+#endif /* L_truncdfsf2 */
+
+#ifdef L_extendsfdf2
+
+       .align  4
+       .global __extendsfdf2
+       .type   __extendsfdf2, @function
+__extendsfdf2:
+       leaf_entry sp, 16
+
+       /* Save the sign bit and then shift it off.  */
+       extui   a5, a2, 31, 1
+       slli    a5, a5, 31
+       slli    a4, a2, 1
+
+       /* Extract and check the exponent.  */
+       extui   a6, a2, 23, 8
+       beqz    a6, .Lextend_expzero
+       addi    a6, a6, 1
+       beqi    a6, 256, .Lextend_nan_or_inf
+
+       /* Shift >> 3 into a4/xl.  */
+       srli    a4, a4, 4
+       slli    xl, a2, (32 - 3)
+
+       /* Adjust the exponent bias.  */
+       movi    a6, (0x3ff - 0x7f) << 20
+       add     a4, a4, a6
+
+       /* Add the sign bit.  */
+       or      xh, a4, a5
+       leaf_return
+
+.Lextend_nan_or_inf:
+       movi    a4, 0x7ff00000
+
+       /* Check for NaN.  */
+       slli    a7, a2, 9
+       beqz    a7, 1f
+
+       slli    a6, a6, 11      /* 0x80000 */
+       or      a4, a4, a6
+
+       /* Add the sign and return.  */
+1:     or      xh, a4, a5
+       movi    xl, 0
+       leaf_return
+
+.Lextend_expzero:
+       beqz    a4, 1b
+
+       /* Normalize it to have 8 zero bits before the first 1 bit.  */
+       do_nsau a7, a4, a2, a3
+       addi    a7, a7, -8
+       ssl     a7
+       sll     a4, a4
+       
+       /* Shift >> 3 into a4/xl.  */
+       slli    xl, a4, (32 - 3)
+       srli    a4, a4, 3
+
+       /* Set the exponent.  */
+       movi    a6, 0x3fe - 0x7f
+       sub     a6, a6, a7
+       slli    a6, a6, 20
+       add     a4, a4, a6
+
+       /* Add the sign and return.  */
+       or      xh, a4, a5
+       leaf_return
+
+#endif /* L_extendsfdf2 */
+
+
diff --git a/libgcc/config/xtensa/ieee754-sf.S b/libgcc/config/xtensa/ieee754-sf.S

new file mode 100644 (file)

index 0000000..d75be0e
--- /dev/null
+++ b/libgcc/config/xtensa/ieee754-sf.S
@@ -0,0 +1,1757 @@
+/* IEEE-754 single-precision functions for Xtensa
+   Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef __XTENSA_EB__
+#define xh a2
+#define xl a3
+#define yh a4
+#define yl a5
+#else
+#define xh a3
+#define xl a2
+#define yh a5
+#define yl a4
+#endif
+
+/*  Warning!  The branch displacements for some Xtensa branch instructions
+    are quite small, and this code has been carefully laid out to keep
+    branch targets in range.  If you change anything, be sure to check that
+    the assembler is not relaxing anything to branch over a jump.  */
+
+#ifdef L_negsf2
+
+       .align  4
+       .global __negsf2
+       .type   __negsf2, @function
+__negsf2:
+       leaf_entry sp, 16
+       movi    a4, 0x80000000
+       xor     a2, a2, a4
+       leaf_return
+
+#endif /* L_negsf2 */
+
+#ifdef L_addsubsf3
+
+       /* Addition */
+__addsf3_aux:
+
+       /* Handle NaNs and Infinities.  (This code is placed before the
+          start of the function just to keep it in range of the limited
+          branch displacements.)  */
+
+.Ladd_xnan_or_inf:
+       /* If y is neither Infinity nor NaN, return x.  */
+       bnall   a3, a6, 1f
+       /* If x is a NaN, return it.  Otherwise, return y.  */
+       slli    a7, a2, 9
+       beqz    a7, .Ladd_ynan_or_inf
+1:     leaf_return
+
+.Ladd_ynan_or_inf:
+       /* Return y.  */
+       mov     a2, a3
+       leaf_return
+
+.Ladd_opposite_signs:
+       /* Operand signs differ.  Do a subtraction.  */
+       slli    a7, a6, 8
+       xor     a3, a3, a7
+       j       .Lsub_same_sign
+
+       .align  4
+       .global __addsf3
+       .type   __addsf3, @function
+__addsf3:
+       leaf_entry sp, 16
+       movi    a6, 0x7f800000
+
+       /* Check if the two operands have the same sign.  */
+       xor     a7, a2, a3
+       bltz    a7, .Ladd_opposite_signs
+
+.Ladd_same_sign:       
+       /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
+       ball    a2, a6, .Ladd_xnan_or_inf
+       ball    a3, a6, .Ladd_ynan_or_inf
+
+       /* Compare the exponents.  The smaller operand will be shifted
+          right by the exponent difference and added to the larger
+          one.  */
+       extui   a7, a2, 23, 9
+       extui   a8, a3, 23, 9
+       bltu    a7, a8, .Ladd_shiftx
+
+.Ladd_shifty:
+       /* Check if the smaller (or equal) exponent is zero.  */
+       bnone   a3, a6, .Ladd_yexpzero
+
+       /* Replace y sign/exponent with 0x008.  */
+       or      a3, a3, a6
+       slli    a3, a3, 8
+       srli    a3, a3, 8
+
+.Ladd_yexpdiff:
+       /* Compute the exponent difference.  */
+       sub     a10, a7, a8
+
+       /* Exponent difference > 32 -- just return the bigger value.  */
+       bgeui   a10, 32, 1f
+       
+       /* Shift y right by the exponent difference.  Any bits that are
+          shifted out of y are saved in a9 for rounding the result.  */
+       ssr     a10
+       movi    a9, 0
+       src     a9, a3, a9
+       srl     a3, a3
+
+       /* Do the addition.  */
+       add     a2, a2, a3
+
+       /* Check if the add overflowed into the exponent.  */
+       extui   a10, a2, 23, 9
+       beq     a10, a7, .Ladd_round
+       mov     a8, a7
+       j       .Ladd_carry
+
+.Ladd_yexpzero:
+       /* y is a subnormal value.  Replace its sign/exponent with zero,
+          i.e., no implicit "1.0", and increment the apparent exponent
+          because subnormals behave as if they had the minimum (nonzero)
+          exponent.  Test for the case when both exponents are zero.  */
+       slli    a3, a3, 9
+       srli    a3, a3, 9
+       bnone   a2, a6, .Ladd_bothexpzero
+       addi    a8, a8, 1
+       j       .Ladd_yexpdiff
+
+.Ladd_bothexpzero:
+       /* Both exponents are zero.  Handle this as a special case.  There
+          is no need to shift or round, and the normal code for handling
+          a carry into the exponent field will not work because it
+          assumes there is an implicit "1.0" that needs to be added.  */
+       add     a2, a2, a3
+1:     leaf_return
+
+.Ladd_xexpzero:
+       /* Same as "yexpzero" except skip handling the case when both
+          exponents are zero.  */
+       slli    a2, a2, 9
+       srli    a2, a2, 9
+       addi    a7, a7, 1
+       j       .Ladd_xexpdiff
+
+.Ladd_shiftx:
+       /* Same thing as the "shifty" code, but with x and y swapped.  Also,
+          because the exponent difference is always nonzero in this version,
+          the shift sequence can use SLL and skip loading a constant zero.  */
+       bnone   a2, a6, .Ladd_xexpzero
+
+       or      a2, a2, a6
+       slli    a2, a2, 8
+       srli    a2, a2, 8
+
+.Ladd_xexpdiff:
+       sub     a10, a8, a7
+       bgeui   a10, 32, .Ladd_returny
+       
+       ssr     a10
+       sll     a9, a2
+       srl     a2, a2
+
+       add     a2, a2, a3
+
+       /* Check if the add overflowed into the exponent.  */
+       extui   a10, a2, 23, 9
+       bne     a10, a8, .Ladd_carry
+
+.Ladd_round:
+       /* Round up if the leftover fraction is >= 1/2.  */
+       bgez    a9, 1f
+       addi    a2, a2, 1
+
+       /* Check if the leftover fraction is exactly 1/2.  */
+       slli    a9, a9, 1
+       beqz    a9, .Ladd_exactlyhalf
+1:     leaf_return
+
+.Ladd_returny:
+       mov     a2, a3
+       leaf_return
+
+.Ladd_carry:   
+       /* The addition has overflowed into the exponent field, so the
+          value needs to be renormalized.  The mantissa of the result
+          can be recovered by subtracting the original exponent and
+          adding 0x800000 (which is the explicit "1.0" for the
+          mantissa of the non-shifted operand -- the "1.0" for the
+          shifted operand was already added).  The mantissa can then
+          be shifted right by one bit.  The explicit "1.0" of the
+          shifted mantissa then needs to be replaced by the exponent,
+          incremented by one to account for the normalizing shift.
+          It is faster to combine these operations: do the shift first
+          and combine the additions and subtractions.  If x is the
+          original exponent, the result is:
+              shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
+          or:
+              shifted mantissa + ((x + 1) << 22)
+          Note that the exponent is incremented here by leaving the
+          explicit "1.0" of the mantissa in the exponent field.  */
+
+       /* Shift x right by one bit.  Save the lsb.  */
+       mov     a10, a2
+       srli    a2, a2, 1
+
+       /* See explanation above.  The original exponent is in a8.  */
+       addi    a8, a8, 1
+       slli    a8, a8, 22
+       add     a2, a2, a8
+
+       /* Return an Infinity if the exponent overflowed.  */
+       ball    a2, a6, .Ladd_infinity
+       
+       /* Same thing as the "round" code except the msb of the leftover
+          fraction is bit 0 of a10, with the rest of the fraction in a9.  */
+       bbci.l  a10, 0, 1f
+       addi    a2, a2, 1
+       beqz    a9, .Ladd_exactlyhalf
+1:     leaf_return
+
+.Ladd_infinity:
+       /* Clear the mantissa.  */
+       srli    a2, a2, 23
+       slli    a2, a2, 23
+
+       /* The sign bit may have been lost in a carry-out.  Put it back.  */
+       slli    a8, a8, 1
+       or      a2, a2, a8
+       leaf_return
+
+.Ladd_exactlyhalf:
+       /* Round down to the nearest even value.  */
+       srli    a2, a2, 1
+       slli    a2, a2, 1
+       leaf_return
+
+
+       /* Subtraction */
+__subsf3_aux:
+       
+       /* Handle NaNs and Infinities.  (This code is placed before the
+          start of the function just to keep it in range of the limited
+          branch displacements.)  */
+
+.Lsub_xnan_or_inf:
+       /* If y is neither Infinity nor NaN, return x.  */
+       bnall   a3, a6, 1f
+       /* Both x and y are either NaN or Inf, so the result is NaN.  */
+       movi    a4, 0x400000    /* make it a quiet NaN */
+       or      a2, a2, a4
+1:     leaf_return
+
+.Lsub_ynan_or_inf:
+       /* Negate y and return it.  */
+       slli    a7, a6, 8
+       xor     a2, a3, a7
+       leaf_return
+
+.Lsub_opposite_signs:
+       /* Operand signs differ.  Do an addition.  */
+       slli    a7, a6, 8
+       xor     a3, a3, a7
+       j       .Ladd_same_sign
+
+       .align  4
+       .global __subsf3
+       .type   __subsf3, @function
+__subsf3:
+       leaf_entry sp, 16
+       movi    a6, 0x7f800000
+
+       /* Check if the two operands have the same sign.  */
+       xor     a7, a2, a3
+       bltz    a7, .Lsub_opposite_signs
+
+.Lsub_same_sign:       
+       /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
+       ball    a2, a6, .Lsub_xnan_or_inf
+       ball    a3, a6, .Lsub_ynan_or_inf
+
+       /* Compare the operands.  In contrast to addition, the entire
+          value matters here.  */
+       extui   a7, a2, 23, 8
+       extui   a8, a3, 23, 8
+       bltu    a2, a3, .Lsub_xsmaller
+
+.Lsub_ysmaller:
+       /* Check if the smaller (or equal) exponent is zero.  */
+       bnone   a3, a6, .Lsub_yexpzero
+
+       /* Replace y sign/exponent with 0x008.  */
+       or      a3, a3, a6
+       slli    a3, a3, 8
+       srli    a3, a3, 8
+
+.Lsub_yexpdiff:
+       /* Compute the exponent difference.  */
+       sub     a10, a7, a8
+
+       /* Exponent difference > 32 -- just return the bigger value.  */
+       bgeui   a10, 32, 1f
+       
+       /* Shift y right by the exponent difference.  Any bits that are
+          shifted out of y are saved in a9 for rounding the result.  */
+       ssr     a10
+       movi    a9, 0
+       src     a9, a3, a9
+       srl     a3, a3
+
+       sub     a2, a2, a3
+
+       /* Subtract the leftover bits in a9 from zero and propagate any
+          borrow from a2.  */
+       neg     a9, a9
+       addi    a10, a2, -1
+       movnez  a2, a10, a9
+
+       /* Check if the subtract underflowed into the exponent.  */
+       extui   a10, a2, 23, 8
+       beq     a10, a7, .Lsub_round
+       j       .Lsub_borrow
+
+.Lsub_yexpzero:
+       /* Return zero if the inputs are equal.  (For the non-subnormal
+          case, subtracting the "1.0" will cause a borrow from the exponent
+          and this case can be detected when handling the borrow.)  */
+       beq     a2, a3, .Lsub_return_zero
+
+       /* y is a subnormal value.  Replace its sign/exponent with zero,
+          i.e., no implicit "1.0".  Unless x is also a subnormal, increment
+          y's apparent exponent because subnormals behave as if they had
+          the minimum (nonzero) exponent.  */
+       slli    a3, a3, 9
+       srli    a3, a3, 9
+       bnone   a2, a6, .Lsub_yexpdiff
+       addi    a8, a8, 1
+       j       .Lsub_yexpdiff
+
+.Lsub_returny:
+       /* Negate and return y.  */
+       slli    a7, a6, 8
+       xor     a2, a3, a7
+1:     leaf_return
+
+.Lsub_xsmaller:
+       /* Same thing as the "ysmaller" code, but with x and y swapped and
+          with y negated.  */
+       bnone   a2, a6, .Lsub_xexpzero
+
+       or      a2, a2, a6
+       slli    a2, a2, 8
+       srli    a2, a2, 8
+
+.Lsub_xexpdiff:
+       sub     a10, a8, a7
+       bgeui   a10, 32, .Lsub_returny
+       
+       ssr     a10
+       movi    a9, 0
+       src     a9, a2, a9
+       srl     a2, a2
+
+       /* Negate y.  */
+       slli    a11, a6, 8
+       xor     a3, a3, a11
+
+       sub     a2, a3, a2
+
+       neg     a9, a9
+       addi    a10, a2, -1
+       movnez  a2, a10, a9
+
+       /* Check if the subtract underflowed into the exponent.  */
+       extui   a10, a2, 23, 8
+       bne     a10, a8, .Lsub_borrow
+
+.Lsub_round:
+       /* Round up if the leftover fraction is >= 1/2.  */
+       bgez    a9, 1f
+       addi    a2, a2, 1
+
+       /* Check if the leftover fraction is exactly 1/2.  */
+       slli    a9, a9, 1
+       beqz    a9, .Lsub_exactlyhalf
+1:     leaf_return
+
+.Lsub_xexpzero:
+       /* Same as "yexpzero".  */
+       beq     a2, a3, .Lsub_return_zero
+       slli    a2, a2, 9
+       srli    a2, a2, 9
+       bnone   a3, a6, .Lsub_xexpdiff
+       addi    a7, a7, 1
+       j       .Lsub_xexpdiff
+
+.Lsub_return_zero:
+       movi    a2, 0
+       leaf_return
+
+.Lsub_borrow:  
+       /* The subtraction has underflowed into the exponent field, so the
+          value needs to be renormalized.  Shift the mantissa left as
+          needed to remove any leading zeros and adjust the exponent
+          accordingly.  If the exponent is not large enough to remove
+          all the leading zeros, the result will be a subnormal value.  */
+
+       slli    a8, a2, 9
+       beqz    a8, .Lsub_xzero
+       do_nsau a6, a8, a7, a11
+       srli    a8, a8, 9
+       bge     a6, a10, .Lsub_subnormal
+       addi    a6, a6, 1
+
+.Lsub_normalize_shift:
+       /* Shift the mantissa (a8/a9) left by a6.  */
+       ssl     a6
+       src     a8, a8, a9
+       sll     a9, a9
+
+       /* Combine the shifted mantissa with the sign and exponent,
+          decrementing the exponent by a6.  (The exponent has already
+          been decremented by one due to the borrow from the subtraction,
+          but adding the mantissa will increment the exponent by one.)  */
+       srli    a2, a2, 23
+       sub     a2, a2, a6
+       slli    a2, a2, 23
+       add     a2, a2, a8
+       j       .Lsub_round
+
+.Lsub_exactlyhalf:
+       /* Round down to the nearest even value.  */
+       srli    a2, a2, 1
+       slli    a2, a2, 1
+       leaf_return
+
+.Lsub_xzero:
+       /* If there was a borrow from the exponent, and the mantissa and
+          guard digits are all zero, then the inputs were equal and the
+          result should be zero.  */
+       beqz    a9, .Lsub_return_zero
+
+       /* Only the guard digit is nonzero.  Shift by min(24, a10).  */
+       addi    a11, a10, -24
+       movi    a6, 24
+       movltz  a6, a10, a11
+       j       .Lsub_normalize_shift
+
+.Lsub_subnormal:
+       /* The exponent is too small to shift away all the leading zeros.
+          Set a6 to the current exponent (which has already been
+          decremented by the borrow) so that the exponent of the result
+          will be zero.  Do not add 1 to a6 in this case, because: (1)
+          adding the mantissa will not increment the exponent, so there is
+          no need to subtract anything extra from the exponent to
+          compensate, and (2) the effective exponent of a subnormal is 1
+          not 0 so the shift amount must be 1 smaller than normal. */
+       mov     a6, a10
+       j       .Lsub_normalize_shift
+
+#endif /* L_addsubsf3 */
+
+#ifdef L_mulsf3
+
+       /* Multiplication */
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+__mulsf3_aux:
+
+       /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+          (This code is placed before the start of the function just to
+          keep it in range of the limited branch displacements.)  */
+
+.Lmul_xexpzero:
+       /* Clear the sign bit of x.  */
+       slli    a2, a2, 1
+       srli    a2, a2, 1
+
+       /* If x is zero, return zero.  */
+       beqz    a2, .Lmul_return_zero
+
+       /* Normalize x.  Adjust the exponent in a8.  */
+       do_nsau a10, a2, a11, a12
+       addi    a10, a10, -8
+       ssl     a10
+       sll     a2, a2 
+       movi    a8, 1
+       sub     a8, a8, a10
+       j       .Lmul_xnormalized       
+       
+.Lmul_yexpzero:
+       /* Clear the sign bit of y.  */
+       slli    a3, a3, 1
+       srli    a3, a3, 1
+
+       /* If y is zero, return zero.  */
+       beqz    a3, .Lmul_return_zero
+
+       /* Normalize y.  Adjust the exponent in a9.  */
+       do_nsau a10, a3, a11, a12
+       addi    a10, a10, -8
+       ssl     a10
+       sll     a3, a3
+       movi    a9, 1
+       sub     a9, a9, a10
+       j       .Lmul_ynormalized       
+
+.Lmul_return_zero:
+       /* Return zero with the appropriate sign bit.  */
+       srli    a2, a7, 31
+       slli    a2, a2, 31
+       j       .Lmul_done
+
+.Lmul_xnan_or_inf:
+       /* If y is zero, return NaN.  */
+       slli    a8, a3, 1
+       bnez    a8, 1f
+       movi    a4, 0x400000    /* make it a quiet NaN */
+       or      a2, a2, a4
+       j       .Lmul_done
+1:
+       /* If y is NaN, return y.  */
+       bnall   a3, a6, .Lmul_returnx
+       slli    a8, a3, 9
+       beqz    a8, .Lmul_returnx
+
+.Lmul_returny:
+       mov     a2, a3
+
+.Lmul_returnx:
+       /* Set the sign bit and return.  */
+       extui   a7, a7, 31, 1
+       slli    a2, a2, 1
+       ssai    1
+       src     a2, a7, a2
+       j       .Lmul_done
+
+.Lmul_ynan_or_inf:
+       /* If x is zero, return NaN.  */
+       slli    a8, a2, 1
+       bnez    a8, .Lmul_returny
+       movi    a7, 0x400000    /* make it a quiet NaN */
+       or      a2, a3, a7
+       j       .Lmul_done
+
+       .align  4
+       .global __mulsf3
+       .type   __mulsf3, @function
+__mulsf3:
+#if __XTENSA_CALL0_ABI__
+       leaf_entry sp, 32
+       addi    sp, sp, -32
+       s32i    a12, sp, 16
+       s32i    a13, sp, 20
+       s32i    a14, sp, 24
+       s32i    a15, sp, 28
+#elif XCHAL_NO_MUL
+       /* This is not really a leaf function; allocate enough stack space
+          to allow CALL12s to a helper function.  */
+       leaf_entry sp, 64
+#else
+       leaf_entry sp, 32
+#endif
+       movi    a6, 0x7f800000
+
+       /* Get the sign of the result.  */
+       xor     a7, a2, a3
+
+       /* Check for NaN and infinity.  */
+       ball    a2, a6, .Lmul_xnan_or_inf
+       ball    a3, a6, .Lmul_ynan_or_inf
+
+       /* Extract the exponents.  */
+       extui   a8, a2, 23, 8
+       extui   a9, a3, 23, 8
+
+       beqz    a8, .Lmul_xexpzero
+.Lmul_xnormalized:     
+       beqz    a9, .Lmul_yexpzero
+.Lmul_ynormalized:     
+
+       /* Add the exponents.  */
+       add     a8, a8, a9
+
+       /* Replace sign/exponent fields with explicit "1.0".  */
+       movi    a10, 0xffffff
+       or      a2, a2, a6
+       and     a2, a2, a10
+       or      a3, a3, a6
+       and     a3, a3, a10
+
+       /* Multiply 32x32 to 64 bits.  The result ends up in a2/a6.  */
+
+#if XCHAL_HAVE_MUL32_HIGH
+
+       mull    a6, a2, a3
+       muluh   a2, a2, a3
+
+#else
+
+       /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
+          products.  These partial products are:
+
+               0 xl * yl
+
+               1 xl * yh
+               2 xh * yl
+
+               3 xh * yh
+
+          If using the Mul16 or Mul32 multiplier options, these input
+          chunks must be stored in separate registers.  For Mac16, the
+          UMUL.AA.* opcodes can specify that the inputs come from either
+          half of the registers, so there is no need to shift them out
+          ahead of time.  If there is no multiply hardware, the 16-bit
+          chunks can be extracted when setting up the arguments to the
+          separate multiply function.  */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+       /* Calling a separate multiply function will clobber a0 and requires
+          use of a8 as a temporary, so save those values now.  (The function
+          uses a custom ABI so nothing else needs to be saved.)  */
+       s32i    a0, sp, 0
+       s32i    a8, sp, 4
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define a2h a4
+#define a3h a5
+
+       /* Get the high halves of the inputs into registers.  */
+       srli    a2h, a2, 16
+       srli    a3h, a3, 16
+
+#define a2l a2
+#define a3l a3
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+       /* Clear the high halves of the inputs.  This does not matter
+          for MUL16 because the high bits are ignored.  */
+       extui   a2, a2, 0, 16
+       extui   a3, a3, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+       mul16u  dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+       mull    dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+   a period in the definition of do_mul below.  These macros are a workaround
+   using underscores instead of periods when doing the concatenation.  */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+       umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
+       rsr     dst, ACCLO
+
+#else /* no multiply hardware */
+       
+#define set_arg_l(dst, src) \
+       extui   dst, src, 0, 16
+#define set_arg_h(dst, src) \
+       srli    dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+       set_arg_ ## xhalf (a13, xreg); \
+       set_arg_ ## yhalf (a14, yreg); \
+       call0   .Lmul_mulsi3; \
+       mov     dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+       set_arg_ ## xhalf (a14, xreg); \
+       set_arg_ ## yhalf (a15, yreg); \
+       call12  .Lmul_mulsi3; \
+       mov     dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+       /* Add pp1 and pp2 into a6 with carry-out in a9.  */
+       do_mul(a6, a2, l, a3, h)        /* pp 1 */
+       do_mul(a11, a2, h, a3, l)       /* pp 2 */
+       movi    a9, 0
+       add     a6, a6, a11
+       bgeu    a6, a11, 1f
+       addi    a9, a9, 1
+1:
+       /* Shift the high half of a9/a6 into position in a9.  Note that
+          this value can be safely incremented without any carry-outs.  */
+       ssai    16
+       src     a9, a9, a6
+
+       /* Compute the low word into a6.  */
+       do_mul(a11, a2, l, a3, l)       /* pp 0 */
+       sll     a6, a6
+       add     a6, a6, a11
+       bgeu    a6, a11, 1f
+       addi    a9, a9, 1
+1:
+       /* Compute the high word into a2.  */
+       do_mul(a2, a2, h, a3, h)        /* pp 3 */
+       add     a2, a2, a9
+       
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+       /* Restore values saved on the stack during the multiplication.  */
+       l32i    a0, sp, 0
+       l32i    a8, sp, 4
+#endif
+#endif /* ! XCHAL_HAVE_MUL32_HIGH */
+
+       /* Shift left by 9 bits, unless there was a carry-out from the
+          multiply, in which case, shift by 8 bits and increment the
+          exponent.  */
+       movi    a4, 9
+       srli    a5, a2, 24 - 9
+       beqz    a5, 1f
+       addi    a4, a4, -1
+       addi    a8, a8, 1
+1:     ssl     a4
+       src     a2, a2, a6
+       sll     a6, a6
+
+       /* Subtract the extra bias from the exponent sum (plus one to account
+          for the explicit "1.0" of the mantissa that will be added to the
+          exponent in the final result).  */
+       movi    a4, 0x80
+       sub     a8, a8, a4
+       
+       /* Check for over/underflow.  The value in a8 is one less than the
+          final exponent, so values in the range 0..fd are OK here.  */
+       movi    a4, 0xfe
+       bgeu    a8, a4, .Lmul_overflow
+       
+.Lmul_round:
+       /* Round.  */
+       bgez    a6, .Lmul_rounded
+       addi    a2, a2, 1
+       slli    a6, a6, 1
+       beqz    a6, .Lmul_exactlyhalf
+
+.Lmul_rounded:
+       /* Add the exponent to the mantissa.  */
+       slli    a8, a8, 23
+       add     a2, a2, a8
+
+.Lmul_addsign:
+       /* Add the sign bit.  */
+       srli    a7, a7, 31
+       slli    a7, a7, 31
+       or      a2, a2, a7
+
+.Lmul_done:
+#if __XTENSA_CALL0_ABI__
+       l32i    a12, sp, 16
+       l32i    a13, sp, 20
+       l32i    a14, sp, 24
+       l32i    a15, sp, 28
+       addi    sp, sp, 32
+#endif
+       leaf_return
+
+.Lmul_exactlyhalf:
+       /* Round down to the nearest even value.  */
+       srli    a2, a2, 1
+       slli    a2, a2, 1
+       j       .Lmul_rounded
+
+.Lmul_overflow:
+       bltz    a8, .Lmul_underflow
+       /* Return +/- Infinity.  */
+       movi    a8, 0xff
+       slli    a2, a8, 23
+       j       .Lmul_addsign
+
+.Lmul_underflow:
+       /* Create a subnormal value, where the exponent field contains zero,
+          but the effective exponent is 1.  The value of a8 is one less than
+          the actual exponent, so just negate it to get the shift amount.  */
+       neg     a8, a8
+       mov     a9, a6
+       ssr     a8
+       bgeui   a8, 32, .Lmul_flush_to_zero
+       
+       /* Shift a2 right.  Any bits that are shifted out of a2 are saved
+          in a6 (combined with the shifted-out bits currently in a6) for
+          rounding the result.  */
+       sll     a6, a2
+       srl     a2, a2
+
+       /* Set the exponent to zero.  */
+       movi    a8, 0
+
+       /* Pack any nonzero bits shifted out into a6.  */
+       beqz    a9, .Lmul_round
+       movi    a9, 1
+       or      a6, a6, a9
+       j       .Lmul_round
+       
+.Lmul_flush_to_zero:
+       /* Return zero with the appropriate sign bit.  */
+       srli    a2, a7, 31
+       slli    a2, a2, 31
+       j       .Lmul_done
+
+#if XCHAL_NO_MUL
+       
+       /* For Xtensa processors with no multiply hardware, this simplified
+          version of _mulsi3 is used for multiplying 16-bit chunks of
+          the floating-point mantissas.  When using CALL0, this function
+          uses a custom ABI: the inputs are passed in a13 and a14, the
+          result is returned in a12, and a8 and a15 are clobbered.  */
+       .align  4
+.Lmul_mulsi3:
+       leaf_entry sp, 16
+       .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+       movi    \dst, 0
+1:     add     \tmp1, \src2, \dst
+       extui   \tmp2, \src1, 0, 1
+       movnez  \dst, \tmp1, \tmp2
+
+       do_addx2 \tmp1, \src2, \dst, \tmp1
+       extui   \tmp2, \src1, 1, 1
+       movnez  \dst, \tmp1, \tmp2
+
+       do_addx4 \tmp1, \src2, \dst, \tmp1
+       extui   \tmp2, \src1, 2, 1
+       movnez  \dst, \tmp1, \tmp2
+
+       do_addx8 \tmp1, \src2, \dst, \tmp1
+       extui   \tmp2, \src1, 3, 1
+       movnez  \dst, \tmp1, \tmp2
+
+       srli    \src1, \src1, 4
+       slli    \src2, \src2, 4
+       bnez    \src1, 1b
+       .endm
+#if __XTENSA_CALL0_ABI__
+       mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+       /* The result will be written into a2, so save that argument in a4.  */
+       mov     a4, a2
+       mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+       leaf_return
+#endif /* XCHAL_NO_MUL */
+#endif /* L_mulsf3 */
+
+#ifdef L_divsf3
+
+       /* Division */
+__divsf3_aux:
+
+       /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+          (This code is placed before the start of the function just to
+          keep it in range of the limited branch displacements.)  */
+
+.Ldiv_yexpzero:
+       /* Clear the sign bit of y.  */
+       slli    a3, a3, 1
+       srli    a3, a3, 1
+
+       /* Check for division by zero.  */
+       beqz    a3, .Ldiv_yzero
+
+       /* Normalize y.  Adjust the exponent in a9.  */
+       do_nsau a10, a3, a4, a5
+       addi    a10, a10, -8
+       ssl     a10
+       sll     a3, a3
+       movi    a9, 1
+       sub     a9, a9, a10
+       j       .Ldiv_ynormalized       
+
+.Ldiv_yzero:
+       /* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
+       slli    a4, a2, 1
+       srli    a4, a4, 1
+       srli    a2, a7, 31
+       slli    a2, a2, 31
+       or      a2, a2, a6
+       bnez    a4, 1f
+       movi    a4, 0x400000    /* make it a quiet NaN */
+       or      a2, a2, a4
+1:     leaf_return
+
+.Ldiv_xexpzero:
+       /* Clear the sign bit of x.  */
+       slli    a2, a2, 1
+       srli    a2, a2, 1
+
+       /* If x is zero, return zero.  */
+       beqz    a2, .Ldiv_return_zero
+
+       /* Normalize x.  Adjust the exponent in a8.  */
+       do_nsau a10, a2, a4, a5
+       addi    a10, a10, -8
+       ssl     a10
+       sll     a2, a2
+       movi    a8, 1
+       sub     a8, a8, a10
+       j       .Ldiv_xnormalized       
+       
+.Ldiv_return_zero:
+       /* Return zero with the appropriate sign bit.  */
+       srli    a2, a7, 31
+       slli    a2, a2, 31
+       leaf_return
+
+.Ldiv_xnan_or_inf:
+       /* Set the sign bit of the result.  */
+       srli    a7, a3, 31
+       slli    a7, a7, 31
+       xor     a2, a2, a7
+       /* If y is NaN or Inf, return NaN.  */
+       bnall   a3, a6, 1f
+       movi    a4, 0x400000    /* make it a quiet NaN */
+       or      a2, a2, a4
+1:     leaf_return
+
+.Ldiv_ynan_or_inf:
+       /* If y is Infinity, return zero.  */
+       slli    a8, a3, 9
+       beqz    a8, .Ldiv_return_zero
+       /* y is NaN; return it.  */
+       mov     a2, a3
+       leaf_return
+
+       .align  4
+       .global __divsf3
+       .type   __divsf3, @function
+__divsf3:
+       leaf_entry sp, 16
+       movi    a6, 0x7f800000
+
+       /* Get the sign of the result.  */
+       xor     a7, a2, a3
+
+       /* Check for NaN and infinity.  */
+       ball    a2, a6, .Ldiv_xnan_or_inf
+       ball    a3, a6, .Ldiv_ynan_or_inf
+
+       /* Extract the exponents.  */
+       extui   a8, a2, 23, 8
+       extui   a9, a3, 23, 8
+
+       beqz    a9, .Ldiv_yexpzero
+.Ldiv_ynormalized:     
+       beqz    a8, .Ldiv_xexpzero
+.Ldiv_xnormalized:     
+
+       /* Subtract the exponents.  */
+       sub     a8, a8, a9
+
+       /* Replace sign/exponent fields with explicit "1.0".  */
+       movi    a10, 0xffffff
+       or      a2, a2, a6
+       and     a2, a2, a10
+       or      a3, a3, a6
+       and     a3, a3, a10
+
+       /* The first digit of the mantissa division must be a one.
+          Shift x (and adjust the exponent) as needed to make this true.  */
+       bltu    a3, a2, 1f
+       slli    a2, a2, 1
+       addi    a8, a8, -1
+1:
+       /* Do the first subtraction and shift.  */
+       sub     a2, a2, a3
+       slli    a2, a2, 1
+
+       /* Put the quotient into a10.  */
+       movi    a10, 1
+
+       /* Divide one bit at a time for 23 bits.  */
+       movi    a9, 23
+#if XCHAL_HAVE_LOOPS
+       loop    a9, .Ldiv_loopend
+#endif
+.Ldiv_loop:
+       /* Shift the quotient << 1.  */
+       slli    a10, a10, 1
+
+       /* Is this digit a 0 or 1?  */
+       bltu    a2, a3, 1f
+
+       /* Output a 1 and subtract.  */
+       addi    a10, a10, 1
+       sub     a2, a2, a3
+
+       /* Shift the dividend << 1.  */
+1:     slli    a2, a2, 1
+
+#if !XCHAL_HAVE_LOOPS
+       addi    a9, a9, -1
+       bnez    a9, .Ldiv_loop
+#endif
+.Ldiv_loopend:
+
+       /* Add the exponent bias (less one to account for the explicit "1.0"
+          of the mantissa that will be added to the exponent in the final
+          result).  */
+       addi    a8, a8, 0x7e
+       
+       /* Check for over/underflow.  The value in a8 is one less than the
+          final exponent, so values in the range 0..fd are OK here.  */
+       movi    a4, 0xfe
+       bgeu    a8, a4, .Ldiv_overflow
+       
+.Ldiv_round:
+       /* Round.  The remainder (<< 1) is in a2.  */
+       bltu    a2, a3, .Ldiv_rounded
+       addi    a10, a10, 1
+       beq     a2, a3, .Ldiv_exactlyhalf
+
+.Ldiv_rounded:
+       /* Add the exponent to the mantissa.  */
+       slli    a8, a8, 23
+       add     a2, a10, a8
+
+.Ldiv_addsign:
+       /* Add the sign bit.  */
+       srli    a7, a7, 31
+       slli    a7, a7, 31
+       or      a2, a2, a7
+       leaf_return
+
+.Ldiv_overflow:
+       bltz    a8, .Ldiv_underflow
+       /* Return +/- Infinity.  */
+       addi    a8, a4, 1       /* 0xff */
+       slli    a2, a8, 23
+       j       .Ldiv_addsign
+
+.Ldiv_exactlyhalf:
+       /* Remainder is exactly half the divisor.  Round even.  */
+       srli    a10, a10, 1
+       slli    a10, a10, 1
+       j       .Ldiv_rounded
+
+.Ldiv_underflow:
+       /* Create a subnormal value, where the exponent field contains zero,
+          but the effective exponent is 1.  The value of a8 is one less than
+          the actual exponent, so just negate it to get the shift amount.  */
+       neg     a8, a8
+       ssr     a8
+       bgeui   a8, 32, .Ldiv_flush_to_zero
+       
+       /* Shift a10 right.  Any bits that are shifted out of a10 are
+          saved in a6 for rounding the result.  */
+       sll     a6, a10
+       srl     a10, a10
+
+       /* Set the exponent to zero.  */
+       movi    a8, 0
+
+       /* Pack any nonzero remainder (in a2) into a6.  */
+       beqz    a2, 1f
+       movi    a9, 1
+       or      a6, a6, a9
+       
+       /* Round a10 based on the bits shifted out into a6.  */
+1:     bgez    a6, .Ldiv_rounded
+       addi    a10, a10, 1
+       slli    a6, a6, 1
+       bnez    a6, .Ldiv_rounded
+       srli    a10, a10, 1
+       slli    a10, a10, 1
+       j       .Ldiv_rounded
+
+.Ldiv_flush_to_zero:
+       /* Return zero with the appropriate sign bit.  */
+       srli    a2, a7, 31
+       slli    a2, a2, 31
+       leaf_return
+
+#endif /* L_divsf3 */
+
+#ifdef L_cmpsf2
+
+       /* Equal and Not Equal */
+
+       .align  4
+       .global __eqsf2
+       .global __nesf2
+       .set    __nesf2, __eqsf2
+       .type   __eqsf2, @function
+__eqsf2:
+       leaf_entry sp, 16
+       bne     a2, a3, 4f
+
+       /* The values are equal but NaN != NaN.  Check the exponent.  */
+       movi    a6, 0x7f800000
+       ball    a2, a6, 3f
+
+       /* Equal.  */
+       movi    a2, 0
+       leaf_return
+
+       /* Not equal.  */
+2:     movi    a2, 1
+       leaf_return
+
+       /* Check if the mantissas are nonzero.  */
+3:     slli    a7, a2, 9
+       j       5f
+
+       /* Check if x and y are zero with different signs.  */
+4:     or      a7, a2, a3
+       slli    a7, a7, 1
+
+       /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
+          or x when exponent(x) = 0x7f8 and x == y.  */
+5:     movi    a2, 0
+       movi    a3, 1
+       movnez  a2, a3, a7      
+       leaf_return
+
+
+       /* Greater Than */
+
+       .align  4
+       .global __gtsf2
+       .type   __gtsf2, @function
+__gtsf2:
+       leaf_entry sp, 16
+       movi    a6, 0x7f800000
+       ball    a2, a6, 2f
+1:     bnall   a3, a6, .Lle_cmp
+
+       /* Check if y is a NaN.  */
+       slli    a7, a3, 9
+       beqz    a7, .Lle_cmp
+       movi    a2, 0
+       leaf_return
+
+       /* Check if x is a NaN.  */
+2:     slli    a7, a2, 9
+       beqz    a7, 1b
+       movi    a2, 0
+       leaf_return
+
+
+       /* Less Than or Equal */
+
+       .align  4
+       .global __lesf2
+       .type   __lesf2, @function
+__lesf2:
+       leaf_entry sp, 16
+       movi    a6, 0x7f800000
+       ball    a2, a6, 2f
+1:     bnall   a3, a6, .Lle_cmp
+
+       /* Check if y is a NaN.  */
+       slli    a7, a3, 9
+       beqz    a7, .Lle_cmp
+       movi    a2, 1
+       leaf_return
+
+       /* Check if x is a NaN.  */
+2:     slli    a7, a2, 9
+       beqz    a7, 1b
+       movi    a2, 1
+       leaf_return
+
+.Lle_cmp:
+       /* Check if x and y have different signs.  */
+       xor     a7, a2, a3
+       bltz    a7, .Lle_diff_signs
+
+       /* Check if x is negative.  */
+       bltz    a2, .Lle_xneg
+
+       /* Check if x <= y.  */
+       bltu    a3, a2, 5f
+4:     movi    a2, 0
+       leaf_return
+
+.Lle_xneg:
+       /* Check if y <= x.  */
+       bgeu    a2, a3, 4b
+5:     movi    a2, 1
+       leaf_return
+
+.Lle_diff_signs:
+       bltz    a2, 4b
+
+       /* Check if both x and y are zero.  */
+       or      a7, a2, a3
+       slli    a7, a7, 1
+       movi    a2, 1
+       movi    a3, 0
+       moveqz  a2, a3, a7
+       leaf_return
+
+
+       /* Greater Than or Equal */
+
+       .align  4
+       .global __gesf2
+       .type   __gesf2, @function
+__gesf2:
+       leaf_entry sp, 16
+       movi    a6, 0x7f800000
+       ball    a2, a6, 2f
+1:     bnall   a3, a6, .Llt_cmp
+
+       /* Check if y is a NaN.  */
+       slli    a7, a3, 9
+       beqz    a7, .Llt_cmp
+       movi    a2, -1
+       leaf_return
+
+       /* Check if x is a NaN.  */
+2:     slli    a7, a2, 9
+       beqz    a7, 1b
+       movi    a2, -1
+       leaf_return
+
+
+       /* Less Than */
+
+       .align  4
+       .global __ltsf2
+       .type   __ltsf2, @function
+__ltsf2:
+       leaf_entry sp, 16
+       movi    a6, 0x7f800000
+       ball    a2, a6, 2f
+1:     bnall   a3, a6, .Llt_cmp
+
+       /* Check if y is a NaN.  */
+       slli    a7, a3, 9
+       beqz    a7, .Llt_cmp
+       movi    a2, 0
+       leaf_return
+
+       /* Check if x is a NaN.  */
+2:     slli    a7, a2, 9
+       beqz    a7, 1b
+       movi    a2, 0
+       leaf_return
+
+.Llt_cmp:
+       /* Check if x and y have different signs.  */
+       xor     a7, a2, a3
+       bltz    a7, .Llt_diff_signs
+
+       /* Check if x is negative.  */
+       bltz    a2, .Llt_xneg
+
+       /* Check if x < y.  */
+       bgeu    a2, a3, 5f
+4:     movi    a2, -1
+       leaf_return
+
+.Llt_xneg:
+       /* Check if y < x.  */
+       bltu    a3, a2, 4b
+5:     movi    a2, 0
+       leaf_return
+
+.Llt_diff_signs:
+       bgez    a2, 5b
+
+       /* Check if both x and y are nonzero.  */
+       or      a7, a2, a3
+       slli    a7, a7, 1
+       movi    a2, 0
+       movi    a3, -1
+       movnez  a2, a3, a7
+       leaf_return
+
+
+       /* Unordered */
+
+       .align  4
+       .global __unordsf2
+       .type   __unordsf2, @function
+__unordsf2:
+       leaf_entry sp, 16
+       movi    a6, 0x7f800000
+       ball    a2, a6, 3f
+1:     ball    a3, a6, 4f
+2:     movi    a2, 0
+       leaf_return
+
+3:     slli    a7, a2, 9
+       beqz    a7, 1b
+       movi    a2, 1
+       leaf_return
+
+4:     slli    a7, a3, 9
+       beqz    a7, 2b
+       movi    a2, 1
+       leaf_return
+
+#endif /* L_cmpsf2 */
+
+#ifdef L_fixsfsi
+
+       .align  4
+       .global __fixsfsi
+       .type   __fixsfsi, @function
+__fixsfsi:
+       leaf_entry sp, 16
+
+       /* Check for NaN and Infinity.  */
+       movi    a6, 0x7f800000
+       ball    a2, a6, .Lfixsfsi_nan_or_inf
+
+       /* Extract the exponent and check if 0 < (exp - 0x7e) < 32.  */
+       extui   a4, a2, 23, 8
+       addi    a4, a4, -0x7e
+       bgei    a4, 32, .Lfixsfsi_maxint
+       blti    a4, 1, .Lfixsfsi_zero
+
+       /* Add explicit "1.0" and shift << 8.  */
+       or      a7, a2, a6
+       slli    a5, a7, 8
+
+       /* Shift back to the right, based on the exponent.  */
+       ssl     a4              /* shift by 32 - a4 */
+       srl     a5, a5
+
+       /* Negate the result if sign != 0.  */
+       neg     a2, a5
+       movgez  a2, a5, a7
+       leaf_return
+
+.Lfixsfsi_nan_or_inf:
+       /* Handle Infinity and NaN.  */
+       slli    a4, a2, 9
+       beqz    a4, .Lfixsfsi_maxint
+
+       /* Translate NaN to +maxint.  */
+       movi    a2, 0
+
+.Lfixsfsi_maxint:
+       slli    a4, a6, 8       /* 0x80000000 */
+       addi    a5, a4, -1      /* 0x7fffffff */
+       movgez  a4, a5, a2
+       mov     a2, a4
+       leaf_return
+
+.Lfixsfsi_zero:
+       movi    a2, 0
+       leaf_return
+
+#endif /* L_fixsfsi */
+
+#ifdef L_fixsfdi
+
+       .align  4
+       .global __fixsfdi
+       .type   __fixsfdi, @function
+__fixsfdi:
+       leaf_entry sp, 16
+
+       /* Check for NaN and Infinity.  */
+       movi    a6, 0x7f800000
+       ball    a2, a6, .Lfixsfdi_nan_or_inf
+
+       /* Extract the exponent and check if 0 < (exp - 0x7e) < 64.  */
+       extui   a4, a2, 23, 8
+       addi    a4, a4, -0x7e
+       bgei    a4, 64, .Lfixsfdi_maxint
+       blti    a4, 1, .Lfixsfdi_zero
+
+       /* Add explicit "1.0" and shift << 8.  */
+       or      a7, a2, a6
+       slli    xh, a7, 8
+
+       /* Shift back to the right, based on the exponent.  */
+       ssl     a4              /* shift by 64 - a4 */
+       bgei    a4, 32, .Lfixsfdi_smallshift
+       srl     xl, xh
+       movi    xh, 0
+
+.Lfixsfdi_shifted:     
+       /* Negate the result if sign != 0.  */
+       bgez    a7, 1f
+       neg     xl, xl
+       neg     xh, xh
+       beqz    xl, 1f
+       addi    xh, xh, -1
+1:     leaf_return
+
+.Lfixsfdi_smallshift:
+       movi    xl, 0
+       sll     xl, xh
+       srl     xh, xh
+       j       .Lfixsfdi_shifted
+
+.Lfixsfdi_nan_or_inf:
+       /* Handle Infinity and NaN.  */
+       slli    a4, a2, 9
+       beqz    a4, .Lfixsfdi_maxint
+
+       /* Translate NaN to +maxint.  */
+       movi    a2, 0
+
+.Lfixsfdi_maxint:
+       slli    a7, a6, 8       /* 0x80000000 */
+       bgez    a2, 1f
+       mov     xh, a7
+       movi    xl, 0
+       leaf_return
+
+1:     addi    xh, a7, -1      /* 0x7fffffff */
+       movi    xl, -1
+       leaf_return
+
+.Lfixsfdi_zero:
+       movi    xh, 0
+       movi    xl, 0
+       leaf_return
+
+#endif /* L_fixsfdi */
+
+#ifdef L_fixunssfsi
+
+       .align  4
+       .global __fixunssfsi
+       .type   __fixunssfsi, @function
+__fixunssfsi:
+       leaf_entry sp, 16
+
+       /* Check for NaN and Infinity.  */
+       movi    a6, 0x7f800000
+       ball    a2, a6, .Lfixunssfsi_nan_or_inf
+
+       /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32.  */
+       extui   a4, a2, 23, 8
+       addi    a4, a4, -0x7f
+       bgei    a4, 32, .Lfixunssfsi_maxint
+       bltz    a4, .Lfixunssfsi_zero
+
+       /* Add explicit "1.0" and shift << 8.  */
+       or      a7, a2, a6
+       slli    a5, a7, 8
+
+       /* Shift back to the right, based on the exponent.  */
+       addi    a4, a4, 1
+       beqi    a4, 32, .Lfixunssfsi_bigexp
+       ssl     a4              /* shift by 32 - a4 */
+       srl     a5, a5
+
+       /* Negate the result if sign != 0.  */
+       neg     a2, a5
+       movgez  a2, a5, a7
+       leaf_return
+
+.Lfixunssfsi_nan_or_inf:
+       /* Handle Infinity and NaN.  */
+       slli    a4, a2, 9
+       beqz    a4, .Lfixunssfsi_maxint
+
+       /* Translate NaN to 0xffffffff.  */
+       movi    a2, -1
+       leaf_return
+
+.Lfixunssfsi_maxint:
+       slli    a4, a6, 8       /* 0x80000000 */
+       movi    a5, -1          /* 0xffffffff */
+       movgez  a4, a5, a2
+       mov     a2, a4
+       leaf_return
+
+.Lfixunssfsi_zero:
+       movi    a2, 0
+       leaf_return
+
+.Lfixunssfsi_bigexp:
+       /* Handle unsigned maximum exponent case.  */
+       bltz    a2, 1f
+       mov     a2, a5          /* no shift needed */
+       leaf_return
+
+       /* Return 0x80000000 if negative.  */
+1:     slli    a2, a6, 8
+       leaf_return
+
+#endif /* L_fixunssfsi */
+
+#ifdef L_fixunssfdi
+
+       .align  4
+       .global __fixunssfdi
+       .type   __fixunssfdi, @function
+__fixunssfdi:
+       leaf_entry sp, 16
+
+       /* Check for NaN and Infinity.  */
+       movi    a6, 0x7f800000
+       ball    a2, a6, .Lfixunssfdi_nan_or_inf
+
+       /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64.  */
+       extui   a4, a2, 23, 8
+       addi    a4, a4, -0x7f
+       bgei    a4, 64, .Lfixunssfdi_maxint
+       bltz    a4, .Lfixunssfdi_zero
+
+       /* Add explicit "1.0" and shift << 8.  */
+       or      a7, a2, a6
+       slli    xh, a7, 8
+
+       /* Shift back to the right, based on the exponent.  */
+       addi    a4, a4, 1
+       beqi    a4, 64, .Lfixunssfdi_bigexp
+       ssl     a4              /* shift by 64 - a4 */
+       bgei    a4, 32, .Lfixunssfdi_smallshift
+       srl     xl, xh
+       movi    xh, 0
+
+.Lfixunssfdi_shifted:
+       /* Negate the result if sign != 0.  */
+       bgez    a7, 1f
+       neg     xl, xl
+       neg     xh, xh
+       beqz    xl, 1f
+       addi    xh, xh, -1
+1:     leaf_return
+
+.Lfixunssfdi_smallshift:
+       movi    xl, 0
+       src     xl, xh, xl
+       srl     xh, xh
+       j       .Lfixunssfdi_shifted
+
+.Lfixunssfdi_nan_or_inf:
+       /* Handle Infinity and NaN.  */
+       slli    a4, a2, 9
+       beqz    a4, .Lfixunssfdi_maxint
+
+       /* Translate NaN to 0xffffffff.... */
+1:     movi    xh, -1
+       movi    xl, -1
+       leaf_return
+
+.Lfixunssfdi_maxint:
+       bgez    a2, 1b
+2:     slli    xh, a6, 8       /* 0x80000000 */
+       movi    xl, 0
+       leaf_return
+
+.Lfixunssfdi_zero:
+       movi    xh, 0
+       movi    xl, 0
+       leaf_return
+
+.Lfixunssfdi_bigexp:
+       /* Handle unsigned maximum exponent case.  */
+       bltz    a7, 2b
+       movi    xl, 0
+       leaf_return             /* no shift needed */
+
+#endif /* L_fixunssfdi */
+
+#ifdef L_floatsisf
+
+       .align  4
+       .global __floatunsisf
+       .type   __floatunsisf, @function
+__floatunsisf:
+       leaf_entry sp, 16
+       beqz    a2, .Lfloatsisf_return
+
+       /* Set the sign to zero and jump to the floatsisf code.  */
+       movi    a7, 0
+       j       .Lfloatsisf_normalize
+
+       .align  4
+       .global __floatsisf
+       .type   __floatsisf, @function
+__floatsisf:
+       leaf_entry sp, 16
+
+       /* Check for zero.  */
+       beqz    a2, .Lfloatsisf_return
+
+       /* Save the sign.  */
+       extui   a7, a2, 31, 1
+
+       /* Get the absolute value.  */
+#if XCHAL_HAVE_ABS
+       abs     a2, a2
+#else
+       neg     a4, a2
+       movltz  a2, a4, a2
+#endif
+
+.Lfloatsisf_normalize:
+       /* Normalize with the first 1 bit in the msb.  */
+       do_nsau a4, a2, a5, a6
+       ssl     a4
+       sll     a5, a2
+
+       /* Shift the mantissa into position, with rounding bits in a6.  */
+       srli    a2, a5, 8
+       slli    a6, a5, (32 - 8)
+
+       /* Set the exponent.  */
+       movi    a5, 0x9d        /* 0x7e + 31 */
+       sub     a5, a5, a4
+       slli    a5, a5, 23
+       add     a2, a2, a5
+
+       /* Add the sign.  */
+       slli    a7, a7, 31
+       or      a2, a2, a7
+
+       /* Round up if the leftover fraction is >= 1/2.  */
+       bgez    a6, .Lfloatsisf_return
+       addi    a2, a2, 1       /* Overflow to the exponent is OK.  */
+
+       /* Check if the leftover fraction is exactly 1/2.  */
+       slli    a6, a6, 1
+       beqz    a6, .Lfloatsisf_exactlyhalf
+
+.Lfloatsisf_return:
+       leaf_return
+
+.Lfloatsisf_exactlyhalf:
+       /* Round down to the nearest even value.  */
+       srli    a2, a2, 1
+       slli    a2, a2, 1
+       leaf_return
+
+#endif /* L_floatsisf */
+
+#ifdef L_floatdisf
+
+       .align  4
+       .global __floatundisf
+       .type   __floatundisf, @function
+__floatundisf:
+       leaf_entry sp, 16
+
+       /* Check for zero.  */
+       or      a4, xh, xl
+       beqz    a4, 2f
+
+       /* Set the sign to zero and jump to the floatdisf code.  */
+       movi    a7, 0
+       j       .Lfloatdisf_normalize
+
+       .align  4
+       .global __floatdisf
+       .type   __floatdisf, @function
+__floatdisf:
+       leaf_entry sp, 16
+
+       /* Check for zero.  */
+       or      a4, xh, xl
+       beqz    a4, 2f
+
+       /* Save the sign.  */
+       extui   a7, xh, 31, 1
+
+       /* Get the absolute value.  */
+       bgez    xh, .Lfloatdisf_normalize
+       neg     xl, xl
+       neg     xh, xh
+       beqz    xl, .Lfloatdisf_normalize
+       addi    xh, xh, -1
+
+.Lfloatdisf_normalize:
+       /* Normalize with the first 1 bit in the msb of xh.  */
+       beqz    xh, .Lfloatdisf_bigshift
+       do_nsau a4, xh, a5, a6
+       ssl     a4
+       src     xh, xh, xl
+       sll     xl, xl
+
+.Lfloatdisf_shifted:
+       /* Shift the mantissa into position, with rounding bits in a6.  */
+       ssai    8
+       sll     a5, xl
+       src     a6, xh, xl
+       srl     xh, xh
+       beqz    a5, 1f
+       movi    a5, 1
+       or      a6, a6, a5
+1:
+       /* Set the exponent.  */
+       movi    a5, 0xbd        /* 0x7e + 63 */
+       sub     a5, a5, a4
+       slli    a5, a5, 23
+       add     a2, xh, a5
+
+       /* Add the sign.  */
+       slli    a7, a7, 31
+       or      a2, a2, a7
+
+       /* Round up if the leftover fraction is >= 1/2.  */
+       bgez    a6, 2f
+       addi    a2, a2, 1       /* Overflow to the exponent is OK.  */
+
+       /* Check if the leftover fraction is exactly 1/2.  */
+       slli    a6, a6, 1
+       beqz    a6, .Lfloatdisf_exactlyhalf
+2:     leaf_return
+
+.Lfloatdisf_bigshift:
+       /* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
+       do_nsau a4, xl, a5, a6
+       ssl     a4
+       sll     xh, xl
+       movi    xl, 0
+       addi    a4, a4, 32
+       j       .Lfloatdisf_shifted
+
+.Lfloatdisf_exactlyhalf:
+       /* Round down to the nearest even value.  */
+       srli    a2, a2, 1
+       slli    a2, a2, 1
+       leaf_return
+
+#endif /* L_floatdisf */
diff --git a/libgcc/config/xtensa/lib1funcs.S b/libgcc/config/xtensa/lib1funcs.S

new file mode 100644 (file)

index 0000000..071b917
--- /dev/null
+++ b/libgcc/config/xtensa/lib1funcs.S
@@ -0,0 +1,845 @@
+/* Assembly functions for the Xtensa version of libgcc1.
+   Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009
+   Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "xtensa-config.h"
+
+/* Define macros for the ABS and ADDX* instructions to handle cases
+   where they are not included in the Xtensa processor configuration.  */
+
+       .macro  do_abs dst, src, tmp
+#if XCHAL_HAVE_ABS
+       abs     \dst, \src
+#else
+       neg     \tmp, \src
+       movgez  \tmp, \src, \src
+       mov     \dst, \tmp
+#endif
+       .endm
+
+       .macro  do_addx2 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+       addx2   \dst, \as, \at
+#else
+       slli    \tmp, \as, 1
+       add     \dst, \tmp, \at
+#endif
+       .endm
+
+       .macro  do_addx4 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+       addx4   \dst, \as, \at
+#else
+       slli    \tmp, \as, 2
+       add     \dst, \tmp, \at
+#endif
+       .endm
+
+       .macro  do_addx8 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+       addx8   \dst, \as, \at
+#else
+       slli    \tmp, \as, 3
+       add     \dst, \tmp, \at
+#endif
+       .endm
+
+/* Define macros for leaf function entry and return, supporting either the
+   standard register windowed ABI or the non-windowed call0 ABI.  These
+   macros do not allocate any extra stack space, so they only work for
+   leaf functions that do not need to spill anything to the stack.  */
+
+       .macro leaf_entry reg, size
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+       entry \reg, \size
+#else
+       /* do nothing */
+#endif
+       .endm
+
+       .macro leaf_return
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+       retw
+#else
+       ret
+#endif
+       .endm
+
+
+#ifdef L_mulsi3
+       .align  4
+       .global __mulsi3
+       .type   __mulsi3, @function
+__mulsi3:
+       leaf_entry sp, 16
+
+#if XCHAL_HAVE_MUL32
+       mull    a2, a2, a3
+
+#elif XCHAL_HAVE_MUL16
+       or      a4, a2, a3
+       srai    a4, a4, 16
+       bnez    a4, .LMUL16
+       mul16u  a2, a2, a3
+       leaf_return
+.LMUL16:
+       srai    a4, a2, 16
+       srai    a5, a3, 16
+       mul16u  a7, a4, a3
+       mul16u  a6, a5, a2
+       mul16u  a4, a2, a3
+       add     a7, a7, a6
+       slli    a7, a7, 16
+       add     a2, a7, a4
+
+#elif XCHAL_HAVE_MAC16
+       mul.aa.hl a2, a3
+       mula.aa.lh a2, a3
+       rsr     a5, ACCLO
+       umul.aa.ll a2, a3
+       rsr     a4, ACCLO
+       slli    a5, a5, 16
+       add     a2, a4, a5
+
+#else /* !MUL32 && !MUL16 && !MAC16 */
+
+       /* Multiply one bit at a time, but unroll the loop 4x to better
+          exploit the addx instructions and avoid overhead.
+          Peel the first iteration to save a cycle on init.  */
+
+       /* Avoid negative numbers.  */
+       xor     a5, a2, a3      /* Top bit is 1 if one input is negative.  */
+       do_abs  a3, a3, a6
+       do_abs  a2, a2, a6
+
+       /* Swap so the second argument is smaller.  */
+       sub     a7, a2, a3
+       mov     a4, a3
+       movgez  a4, a2, a7      /* a4 = max (a2, a3) */
+       movltz  a3, a2, a7      /* a3 = min (a2, a3) */
+
+       movi    a2, 0
+       extui   a6, a3, 0, 1
+       movnez  a2, a4, a6
+
+       do_addx2 a7, a4, a2, a7
+       extui   a6, a3, 1, 1
+       movnez  a2, a7, a6
+
+       do_addx4 a7, a4, a2, a7
+       extui   a6, a3, 2, 1
+       movnez  a2, a7, a6
+
+       do_addx8 a7, a4, a2, a7
+       extui   a6, a3, 3, 1
+       movnez  a2, a7, a6
+
+       bgeui   a3, 16, .Lmult_main_loop
+       neg     a3, a2
+       movltz  a2, a3, a5
+       leaf_return
+
+       .align  4
+.Lmult_main_loop:
+       srli    a3, a3, 4
+       slli    a4, a4, 4
+
+       add     a7, a4, a2
+       extui   a6, a3, 0, 1
+       movnez  a2, a7, a6
+
+       do_addx2 a7, a4, a2, a7
+       extui   a6, a3, 1, 1
+       movnez  a2, a7, a6
+
+       do_addx4 a7, a4, a2, a7
+       extui   a6, a3, 2, 1
+       movnez  a2, a7, a6
+
+       do_addx8 a7, a4, a2, a7
+       extui   a6, a3, 3, 1
+       movnez  a2, a7, a6
+
+       bgeui   a3, 16, .Lmult_main_loop
+
+       neg     a3, a2
+       movltz  a2, a3, a5
+
+#endif /* !MUL32 && !MUL16 && !MAC16 */
+
+       leaf_return
+       .size   __mulsi3, . - __mulsi3
+
+#endif /* L_mulsi3 */
+
+
+#ifdef L_umulsidi3
+
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+       .align  4
+       .global __umulsidi3
+       .type   __umulsidi3, @function
+__umulsidi3:
+#if __XTENSA_CALL0_ABI__
+       leaf_entry sp, 32
+       addi    sp, sp, -32
+       s32i    a12, sp, 16
+       s32i    a13, sp, 20
+       s32i    a14, sp, 24
+       s32i    a15, sp, 28
+#elif XCHAL_NO_MUL
+       /* This is not really a leaf function; allocate enough stack space
+          to allow CALL12s to a helper function.  */
+       leaf_entry sp, 48
+#else
+       leaf_entry sp, 16
+#endif
+
+#ifdef __XTENSA_EB__
+#define wh a2
+#define wl a3
+#else
+#define wh a3
+#define wl a2
+#endif /* __XTENSA_EB__ */
+
+       /* This code is taken from the mulsf3 routine in ieee754-sf.S.
+          See more comments there.  */
+
+#if XCHAL_HAVE_MUL32_HIGH
+       mull    a6, a2, a3
+       muluh   wh, a2, a3
+       mov     wl, a6
+
+#else /* ! MUL32_HIGH */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+       /* a0 and a8 will be clobbered by calling the multiply function
+          but a8 is not used here and need not be saved.  */
+       s32i    a0, sp, 0
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define a2h a4
+#define a3h a5
+
+       /* Get the high halves of the inputs into registers.  */
+       srli    a2h, a2, 16
+       srli    a3h, a3, 16
+
+#define a2l a2
+#define a3l a3
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+       /* Clear the high halves of the inputs.  This does not matter
+          for MUL16 because the high bits are ignored.  */
+       extui   a2, a2, 0, 16
+       extui   a3, a3, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+       mul16u  dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+       mull    dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+   a period in the definition of do_mul below.  These macros are a workaround
+   using underscores instead of periods when doing the concatenation.  */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+       umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
+       rsr     dst, ACCLO
+
+#else /* no multiply hardware */
+
+#define set_arg_l(dst, src) \
+       extui   dst, src, 0, 16
+#define set_arg_h(dst, src) \
+       srli    dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+       set_arg_ ## xhalf (a13, xreg); \
+       set_arg_ ## yhalf (a14, yreg); \
+       call0   .Lmul_mulsi3; \
+       mov     dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+       set_arg_ ## xhalf (a14, xreg); \
+       set_arg_ ## yhalf (a15, yreg); \
+       call12  .Lmul_mulsi3; \
+       mov     dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+       /* Add pp1 and pp2 into a6 with carry-out in a9.  */
+       do_mul(a6, a2, l, a3, h)        /* pp 1 */
+       do_mul(a11, a2, h, a3, l)       /* pp 2 */
+       movi    a9, 0
+       add     a6, a6, a11
+       bgeu    a6, a11, 1f
+       addi    a9, a9, 1
+1:
+       /* Shift the high half of a9/a6 into position in a9.  Note that
+          this value can be safely incremented without any carry-outs.  */
+       ssai    16
+       src     a9, a9, a6
+
+       /* Compute the low word into a6.  */
+       do_mul(a11, a2, l, a3, l)       /* pp 0 */
+       sll     a6, a6
+       add     a6, a6, a11
+       bgeu    a6, a11, 1f
+       addi    a9, a9, 1
+1:
+       /* Compute the high word into wh.  */
+       do_mul(wh, a2, h, a3, h)        /* pp 3 */
+       add     wh, wh, a9
+       mov     wl, a6
+
+#endif /* !MUL32_HIGH */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+       /* Restore the original return address.  */
+       l32i    a0, sp, 0
+#endif
+#if __XTENSA_CALL0_ABI__
+       l32i    a12, sp, 16
+       l32i    a13, sp, 20
+       l32i    a14, sp, 24
+       l32i    a15, sp, 28
+       addi    sp, sp, 32
+#endif
+       leaf_return
+
+#if XCHAL_NO_MUL
+
+       /* For Xtensa processors with no multiply hardware, this simplified
+          version of _mulsi3 is used for multiplying 16-bit chunks of
+          the floating-point mantissas.  When using CALL0, this function
+          uses a custom ABI: the inputs are passed in a13 and a14, the
+          result is returned in a12, and a8 and a15 are clobbered.  */
+       .align  4
+.Lmul_mulsi3:
+       leaf_entry sp, 16
+       .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+       movi    \dst, 0
+1:     add     \tmp1, \src2, \dst
+       extui   \tmp2, \src1, 0, 1
+       movnez  \dst, \tmp1, \tmp2
+
+       do_addx2 \tmp1, \src2, \dst, \tmp1
+       extui   \tmp2, \src1, 1, 1
+       movnez  \dst, \tmp1, \tmp2
+
+       do_addx4 \tmp1, \src2, \dst, \tmp1
+       extui   \tmp2, \src1, 2, 1
+       movnez  \dst, \tmp1, \tmp2
+
+       do_addx8 \tmp1, \src2, \dst, \tmp1
+       extui   \tmp2, \src1, 3, 1
+       movnez  \dst, \tmp1, \tmp2
+
+       srli    \src1, \src1, 4
+       slli    \src2, \src2, 4
+       bnez    \src1, 1b
+       .endm
+#if __XTENSA_CALL0_ABI__
+       mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+       /* The result will be written into a2, so save that argument in a4.  */
+       mov     a4, a2
+       mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+       leaf_return
+#endif /* XCHAL_NO_MUL */
+
+       .size   __umulsidi3, . - __umulsidi3
+
+#endif /* L_umulsidi3 */
+
+
+/* Define a macro for the NSAU (unsigned normalize shift amount)
+   instruction, which computes the number of leading zero bits,
+   to handle cases where it is not included in the Xtensa processor
+   configuration.  */
+
+       .macro  do_nsau cnt, val, tmp, a
+#if XCHAL_HAVE_NSA
+       nsau    \cnt, \val
+#else
+       mov     \a, \val
+       movi    \cnt, 0
+       extui   \tmp, \a, 16, 16
+       bnez    \tmp, 0f
+       movi    \cnt, 16
+       slli    \a, \a, 16
+0:
+       extui   \tmp, \a, 24, 8
+       bnez    \tmp, 1f
+       addi    \cnt, \cnt, 8
+       slli    \a, \a, 8
+1:
+       movi    \tmp, __nsau_data
+       extui   \a, \a, 24, 8
+       add     \tmp, \tmp, \a
+       l8ui    \tmp, \tmp, 0
+       add     \cnt, \cnt, \tmp
+#endif /* !XCHAL_HAVE_NSA */
+       .endm
+
+#ifdef L_clz
+       .section .rodata
+       .align  4
+       .global __nsau_data
+       .type   __nsau_data, @object
+__nsau_data:
+#if !XCHAL_HAVE_NSA
+       .byte   8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
+       .byte   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
+       .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+       .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+       .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+       .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+       .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+       .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+       .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+       .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+       .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+       .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+       .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+       .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+       .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+       .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+#endif /* !XCHAL_HAVE_NSA */
+       .size   __nsau_data, . - __nsau_data
+       .hidden __nsau_data
+#endif /* L_clz */
+
+
+#ifdef L_clzsi2
+       .align  4
+       .global __clzsi2
+       .type   __clzsi2, @function
+__clzsi2:
+       leaf_entry sp, 16
+       do_nsau a2, a2, a3, a4
+       leaf_return
+       .size   __clzsi2, . - __clzsi2
+
+#endif /* L_clzsi2 */
+
+
+#ifdef L_ctzsi2
+       .align  4
+       .global __ctzsi2
+       .type   __ctzsi2, @function
+__ctzsi2:
+       leaf_entry sp, 16
+       neg     a3, a2
+       and     a3, a3, a2
+       do_nsau a2, a3, a4, a5
+       neg     a2, a2
+       addi    a2, a2, 31
+       leaf_return
+       .size   __ctzsi2, . - __ctzsi2
+
+#endif /* L_ctzsi2 */
+
+
+#ifdef L_ffssi2
+       .align  4
+       .global __ffssi2
+       .type   __ffssi2, @function
+__ffssi2:
+       leaf_entry sp, 16
+       neg     a3, a2
+       and     a3, a3, a2
+       do_nsau a2, a3, a4, a5
+       neg     a2, a2
+       addi    a2, a2, 32
+       leaf_return
+       .size   __ffssi2, . - __ffssi2
+
+#endif /* L_ffssi2 */
+
+
+#ifdef L_udivsi3
+       .align  4
+       .global __udivsi3
+       .type   __udivsi3, @function
+__udivsi3:
+       leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+       quou    a2, a2, a3
+#else
+       bltui   a3, 2, .Lle_one /* check if the divisor <= 1 */
+
+       mov     a6, a2          /* keep dividend in a6 */
+       do_nsau a5, a6, a2, a7  /* dividend_shift = nsau (dividend) */
+       do_nsau a4, a3, a2, a7  /* divisor_shift = nsau (divisor) */
+       bgeu    a5, a4, .Lspecial
+
+       sub     a4, a4, a5      /* count = divisor_shift - dividend_shift */
+       ssl     a4
+       sll     a3, a3          /* divisor <<= count */
+       movi    a2, 0           /* quotient = 0 */
+
+       /* test-subtract-and-shift loop; one quotient bit on each iteration */
+#if XCHAL_HAVE_LOOPS
+       loopnez a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+       bltu    a6, a3, .Lzerobit
+       sub     a6, a6, a3
+       addi    a2, a2, 1
+.Lzerobit:
+       slli    a2, a2, 1
+       srli    a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+       addi    a4, a4, -1
+       bnez    a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+       bltu    a6, a3, .Lreturn
+       addi    a2, a2, 1       /* increment quotient if dividend >= divisor */
+.Lreturn:
+       leaf_return
+
+.Lle_one:
+       beqz    a3, .Lerror     /* if divisor == 1, return the dividend */
+       leaf_return
+
+.Lspecial:
+       /* return dividend >= divisor */
+       bltu    a6, a3, .Lreturn0
+       movi    a2, 1
+       leaf_return
+
+.Lerror:
+       /* Divide by zero: Use an illegal instruction to force an exception.
+          The subsequent "DIV0" string can be recognized by the exception
+          handler to identify the real cause of the exception.  */
+       ill
+       .ascii  "DIV0"
+
+.Lreturn0:
+       movi    a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+       leaf_return
+       .size   __udivsi3, . - __udivsi3
+
+#endif /* L_udivsi3 */
+
+
+#ifdef L_divsi3
+       .align  4
+       .global __divsi3
+       .type   __divsi3, @function
+__divsi3:
+       leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+       quos    a2, a2, a3
+#else
+       xor     a7, a2, a3      /* sign = dividend ^ divisor */
+       do_abs  a6, a2, a4      /* udividend = abs (dividend) */
+       do_abs  a3, a3, a4      /* udivisor = abs (divisor) */
+       bltui   a3, 2, .Lle_one /* check if udivisor <= 1 */
+       do_nsau a5, a6, a2, a8  /* udividend_shift = nsau (udividend) */
+       do_nsau a4, a3, a2, a8  /* udivisor_shift = nsau (udivisor) */
+       bgeu    a5, a4, .Lspecial
+
+       sub     a4, a4, a5      /* count = udivisor_shift - udividend_shift */
+       ssl     a4
+       sll     a3, a3          /* udivisor <<= count */
+       movi    a2, 0           /* quotient = 0 */
+
+       /* test-subtract-and-shift loop; one quotient bit on each iteration */
+#if XCHAL_HAVE_LOOPS
+       loopnez a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+       bltu    a6, a3, .Lzerobit
+       sub     a6, a6, a3
+       addi    a2, a2, 1
+.Lzerobit:
+       slli    a2, a2, 1
+       srli    a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+       addi    a4, a4, -1
+       bnez    a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+       bltu    a6, a3, .Lreturn
+       addi    a2, a2, 1       /* increment if udividend >= udivisor */
+.Lreturn:
+       neg     a5, a2
+       movltz  a2, a5, a7      /* return (sign < 0) ? -quotient : quotient */
+       leaf_return
+
+.Lle_one:
+       beqz    a3, .Lerror
+       neg     a2, a6          /* if udivisor == 1, then return... */
+       movgez  a2, a6, a7      /* (sign < 0) ? -udividend : udividend */
+       leaf_return
+
+.Lspecial:
+       bltu    a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
+       movi    a2, 1
+       movi    a4, -1
+       movltz  a2, a4, a7      /* else return (sign < 0) ? -1 : 1 */
+       leaf_return
+
+.Lerror:
+       /* Divide by zero: Use an illegal instruction to force an exception.
+          The subsequent "DIV0" string can be recognized by the exception
+          handler to identify the real cause of the exception.  */
+       ill
+       .ascii  "DIV0"
+
+.Lreturn0:
+       movi    a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+       leaf_return
+       .size   __divsi3, . - __divsi3
+
+#endif /* L_divsi3 */
+
+
+#ifdef L_umodsi3
+       .align  4
+       .global __umodsi3
+       .type   __umodsi3, @function
+__umodsi3:
+       leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+       remu    a2, a2, a3
+#else
+       bltui   a3, 2, .Lle_one /* check if the divisor is <= 1 */
+
+       do_nsau a5, a2, a6, a7  /* dividend_shift = nsau (dividend) */
+       do_nsau a4, a3, a6, a7  /* divisor_shift = nsau (divisor) */
+       bgeu    a5, a4, .Lspecial
+
+       sub     a4, a4, a5      /* count = divisor_shift - dividend_shift */
+       ssl     a4
+       sll     a3, a3          /* divisor <<= count */
+
+       /* test-subtract-and-shift loop */
+#if XCHAL_HAVE_LOOPS
+       loopnez a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+       bltu    a2, a3, .Lzerobit
+       sub     a2, a2, a3
+.Lzerobit:
+       srli    a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+       addi    a4, a4, -1
+       bnez    a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+.Lspecial:
+       bltu    a2, a3, .Lreturn
+       sub     a2, a2, a3      /* subtract once more if dividend >= divisor */
+.Lreturn:
+       leaf_return
+
+.Lle_one:
+       bnez    a3, .Lreturn0
+
+       /* Divide by zero: Use an illegal instruction to force an exception.
+          The subsequent "DIV0" string can be recognized by the exception
+          handler to identify the real cause of the exception.  */
+       ill
+       .ascii  "DIV0"
+
+.Lreturn0:
+       movi    a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+       leaf_return
+       .size   __umodsi3, . - __umodsi3
+
+#endif /* L_umodsi3 */
+
+
+#ifdef L_modsi3
+       .align  4
+       .global __modsi3
+       .type   __modsi3, @function
+__modsi3:
+       leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+       rems    a2, a2, a3
+#else
+       mov     a7, a2          /* save original (signed) dividend */
+       do_abs  a2, a2, a4      /* udividend = abs (dividend) */
+       do_abs  a3, a3, a4      /* udivisor = abs (divisor) */
+       bltui   a3, 2, .Lle_one /* check if udivisor <= 1 */
+       do_nsau a5, a2, a6, a8  /* udividend_shift = nsau (udividend) */
+       do_nsau a4, a3, a6, a8  /* udivisor_shift = nsau (udivisor) */
+       bgeu    a5, a4, .Lspecial
+
+       sub     a4, a4, a5      /* count = udivisor_shift - udividend_shift */
+       ssl     a4
+       sll     a3, a3          /* udivisor <<= count */
+
+       /* test-subtract-and-shift loop */
+#if XCHAL_HAVE_LOOPS
+       loopnez a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+       bltu    a2, a3, .Lzerobit
+       sub     a2, a2, a3
+.Lzerobit:
+       srli    a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+       addi    a4, a4, -1
+       bnez    a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+.Lspecial:
+       bltu    a2, a3, .Lreturn
+       sub     a2, a2, a3      /* subtract again if udividend >= udivisor */
+.Lreturn:
+       bgez    a7, .Lpositive
+       neg     a2, a2          /* if (dividend < 0), return -udividend */
+.Lpositive:
+       leaf_return
+
+.Lle_one:
+       bnez    a3, .Lreturn0
+
+       /* Divide by zero: Use an illegal instruction to force an exception.
+          The subsequent "DIV0" string can be recognized by the exception
+          handler to identify the real cause of the exception.  */
+       ill
+       .ascii  "DIV0"
+
+.Lreturn0:
+       movi    a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+       leaf_return
+       .size   __modsi3, . - __modsi3
+
+#endif /* L_modsi3 */
+
+
+#ifdef __XTENSA_EB__
+#define uh a2
+#define ul a3
+#else
+#define uh a3
+#define ul a2
+#endif /* __XTENSA_EB__ */
+
+
+#ifdef L_ashldi3
+       .align  4
+       .global __ashldi3
+       .type   __ashldi3, @function
+__ashldi3:
+       leaf_entry sp, 16
+       ssl     a4
+       bgei    a4, 32, .Llow_only
+       src     uh, uh, ul
+       sll     ul, ul
+       leaf_return
+
+.Llow_only:
+       sll     uh, ul
+       movi    ul, 0
+       leaf_return
+       .size   __ashldi3, . - __ashldi3
+
+#endif /* L_ashldi3 */
+
+
+#ifdef L_ashrdi3
+       .align  4
+       .global __ashrdi3
+       .type   __ashrdi3, @function
+__ashrdi3:
+       leaf_entry sp, 16
+       ssr     a4
+       bgei    a4, 32, .Lhigh_only
+       src     ul, uh, ul
+       sra     uh, uh
+       leaf_return
+
+.Lhigh_only:
+       sra     ul, uh
+       srai    uh, uh, 31
+       leaf_return
+       .size   __ashrdi3, . - __ashrdi3
+
+#endif /* L_ashrdi3 */
+
+
+#ifdef L_lshrdi3
+       .align  4
+       .global __lshrdi3
+       .type   __lshrdi3, @function
+__lshrdi3:
+       leaf_entry sp, 16
+       ssr     a4
+       bgei    a4, 32, .Lhigh_only1
+       src     ul, uh, ul
+       srl     uh, uh
+       leaf_return
+
+.Lhigh_only1:
+       srl     ul, uh
+       movi    uh, 0
+       leaf_return
+       .size   __lshrdi3, . - __lshrdi3
+
+#endif /* L_lshrdi3 */
+
+
+#include "ieee754-df.S"
+#include "ieee754-sf.S"
diff --git a/libgcc/config/xtensa/t-xtensa b/libgcc/config/xtensa/t-xtensa

index 7d9e9db0487eef6b9ee1fb7492bfc180dcc3ca0d..5bcc0946243767dd5f24238795dec831420a07f7 100644 (file)
--- a/libgcc/config/xtensa/t-xtensa
+++ b/libgcc/config/xtensa/t-xtensa
@@ -1,2 +1,14 @@
+LIB1ASMSRC = xtensa/lib1funcs.S
+LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \
+       _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \
+       _ashldi3 _ashrdi3 _lshrdi3 \
+       _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \
+       _fixunssfsi _fixunssfdi _floatsisf _floatunsisf \
+       _floatdisf _floatundisf \
+       _negdf2 _addsubdf3 _muldf3 _divdf3 _cmpdf2 _fixdfsi _fixdfdi \
+       _fixunsdfsi _fixunsdfdi _floatsidf _floatunsidf \
+       _floatdidf _floatundidf \
+       _truncdfsf2 _extendsfdf2
+
  LIB2ADDEH = $(srcdir)/config/xtensa/unwind-dw2-xtensa.c \
     $(srcdir)/unwind-dw2-fde.c $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c
author	Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
	Wed, 2 Nov 2011 15:03:19 +0000 (15:03 +0000)
committer	Rainer Orth <ro@gcc.gnu.org>
	Wed, 2 Nov 2011 15:03:19 +0000 (15:03 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/Makefile.in		patch \| blob \| history
gcc/config.gcc		patch \| blob \| history
gcc/config/arm/arm.c		patch \| blob \| history
gcc/config/arm/bpabi-v6m.S	[deleted file]	patch \| blob \| history
gcc/config/arm/bpabi.S	[deleted file]	patch \| blob \| history
gcc/config/arm/ieee754-df.S	[deleted file]	patch \| blob \| history
gcc/config/arm/ieee754-sf.S	[deleted file]	patch \| blob \| history
gcc/config/arm/lib1funcs.asm	[deleted file]	patch \| blob \| history
gcc/config/arm/linux-eabi.h		patch \| blob \| history
gcc/config/arm/t-arm		patch \| blob \| history
gcc/config/arm/t-arm-elf		patch \| blob \| history
gcc/config/arm/t-bpabi		patch \| blob \| history
gcc/config/arm/t-linux		patch \| blob \| history
gcc/config/arm/t-linux-eabi		patch \| blob \| history
gcc/config/arm/t-strongarm-elf		patch \| blob \| history
gcc/config/arm/t-symbian		patch \| blob \| history
gcc/config/arm/t-vxworks		patch \| blob \| history
gcc/config/arm/t-wince-pe		patch \| blob \| history
gcc/config/avr/libgcc.S	[deleted file]	patch \| blob \| history
gcc/config/avr/t-avr		patch \| blob \| history
gcc/config/bfin/lib1funcs.asm	[deleted file]	patch \| blob \| history
gcc/config/bfin/t-bfin	[deleted file]	patch \| blob \| history
gcc/config/bfin/t-bfin-elf		patch \| blob \| history
gcc/config/bfin/t-bfin-linux		patch \| blob \| history
gcc/config/bfin/t-bfin-uclinux		patch \| blob \| history
gcc/config/c6x/lib1funcs.asm	[deleted file]	patch \| blob \| history
gcc/config/c6x/t-c6x-elf		patch \| blob \| history
gcc/config/fr30/lib1funcs.asm	[deleted file]	patch \| blob \| history
gcc/config/fr30/t-fr30		patch \| blob \| history
gcc/config/frv/lib1funcs.asm	[deleted file]	patch \| blob \| history
gcc/config/frv/t-frv		patch \| blob \| history
gcc/config/h8300/fixunssfsi.c		patch \| blob \| history
gcc/config/h8300/lib1funcs.asm	[deleted file]	patch \| blob \| history
gcc/config/h8300/t-h8300		patch \| blob \| history
gcc/config/i386/cygwin.asm	[deleted file]	patch \| blob \| history
gcc/config/i386/t-cygming		patch \| blob \| history
gcc/config/i386/t-interix		patch \| blob \| history
gcc/config/ia64/lib1funcs.asm	[deleted file]	patch \| blob \| history
gcc/config/ia64/t-hpux		patch \| blob \| history
gcc/config/ia64/t-ia64		patch \| blob \| history
gcc/config/iq2000/t-iq2000		patch \| blob \| history
gcc/config/m32c/m32c-lib1.S	[deleted file]	patch \| blob \| history
gcc/config/m32c/m32c.c		patch \| blob \| history
gcc/config/m32c/t-m32c		patch \| blob \| history
gcc/config/m32r/t-linux		patch \| blob \| history
gcc/config/m68k/lb1sf68.asm	[deleted file]	patch \| blob \| history
gcc/config/m68k/t-floatlib		patch \| blob \| history
gcc/config/mcore/lib1.asm	[deleted file]	patch \| blob \| history
gcc/config/mcore/t-mcore		patch \| blob \| history
gcc/config/mep/mep-lib1.asm	[deleted file]	patch \| blob \| history
gcc/config/mep/t-mep		patch \| blob \| history
gcc/config/mips/mips16.S	[deleted file]	patch \| blob \| history
gcc/config/mips/t-libgcc-mips16	[deleted file]	patch \| blob \| history
gcc/config/mips/t-sr71k		patch \| blob \| history
gcc/config/pa/milli64.S	[deleted file]	patch \| blob \| history
gcc/config/pa/t-linux		patch \| blob \| history
gcc/config/pa/t-linux64		patch \| blob \| history
gcc/config/picochip/libgccExtras/fake_libgcc.asm	[deleted file]	patch \| blob \| history
gcc/config/picochip/t-picochip		patch \| blob \| history
gcc/config/sh/lib1funcs.asm	[deleted file]	patch \| blob \| history
gcc/config/sh/lib1funcs.h	[deleted file]	patch \| blob \| history
gcc/config/sh/sh.h		patch \| blob \| history
gcc/config/sh/t-linux		patch \| blob \| history
gcc/config/sh/t-netbsd		patch \| blob \| history
gcc/config/sh/t-sh		patch \| blob \| history
gcc/config/sh/t-sh64		patch \| blob \| history
gcc/config/sparc/lb1spc.asm	[deleted file]	patch \| blob \| history
gcc/config/sparc/lb1spl.asm	[deleted file]	patch \| blob \| history
gcc/config/sparc/t-elf		patch \| blob \| history
gcc/config/sparc/t-leon		patch \| blob \| history
gcc/config/spu/t-spu-elf		patch \| blob \| history
gcc/config/v850/lib1funcs.asm	[deleted file]	patch \| blob \| history
gcc/config/v850/t-v850		patch \| blob \| history
gcc/config/vax/lib1funcs.asm	[deleted file]	patch \| blob \| history
gcc/config/vax/t-linux	[deleted file]	patch \| blob \| history
gcc/config/xtensa/ieee754-df.S	[deleted file]	patch \| blob \| history
gcc/config/xtensa/ieee754-sf.S	[deleted file]	patch \| blob \| history
gcc/config/xtensa/lib1funcs.asm	[deleted file]	patch \| blob \| history
gcc/config/xtensa/t-xtensa		patch \| blob \| history
libgcc/ChangeLog		patch \| blob \| history
libgcc/Makefile.in		patch \| blob \| history
libgcc/config.host		patch \| blob \| history
libgcc/config/arm/bpabi-v6m.S	[new file with mode: 0644]	patch \| blob
libgcc/config/arm/bpabi.S	[new file with mode: 0644]	patch \| blob
libgcc/config/arm/ieee754-df.S	[new file with mode: 0644]	patch \| blob
libgcc/config/arm/ieee754-sf.S	[new file with mode: 0644]	patch \| blob
libgcc/config/arm/lib1funcs.S	[new file with mode: 0644]	patch \| blob
libgcc/config/arm/libunwind.S		patch \| blob \| history
libgcc/config/arm/t-arm	[new file with mode: 0644]	patch \| blob
libgcc/config/arm/t-bpabi		patch \| blob \| history
libgcc/config/arm/t-elf	[new file with mode: 0644]	patch \| blob
libgcc/config/arm/t-linux	[new file with mode: 0644]	patch \| blob
libgcc/config/arm/t-linux-eabi	[new file with mode: 0644]	patch \| blob
libgcc/config/arm/t-strongarm-elf	[new file with mode: 0644]	patch \| blob
libgcc/config/arm/t-symbian		patch \| blob \| history
libgcc/config/arm/t-vxworks	[new file with mode: 0644]	patch \| blob
libgcc/config/arm/t-wince-pe	[new file with mode: 0644]	patch \| blob
libgcc/config/avr/lib1funcs.S	[new file with mode: 0644]	patch \| blob
libgcc/config/avr/t-avr		patch \| blob \| history
libgcc/config/bfin/lib1funcs.S	[new file with mode: 0644]	patch \| blob
libgcc/config/bfin/t-bfin	[new file with mode: 0644]	patch \| blob
libgcc/config/c6x/lib1funcs.S	[new file with mode: 0644]	patch \| blob
libgcc/config/c6x/t-elf		patch \| blob \| history
libgcc/config/fr30/lib1funcs.S	[new file with mode: 0644]	patch \| blob
libgcc/config/fr30/t-fr30	[new file with mode: 0644]	patch \| blob
libgcc/config/frv/lib1funcs.S	[new file with mode: 0644]	patch \| blob
libgcc/config/frv/t-frv		patch \| blob \| history
libgcc/config/h8300/lib1funcs.S	[new file with mode: 0644]	patch \| blob
libgcc/config/h8300/t-h8300	[new file with mode: 0644]	patch \| blob
libgcc/config/i386/cygwin.S	[new file with mode: 0644]	patch \| blob
libgcc/config/i386/t-chkstk	[new file with mode: 0644]	patch \| blob
libgcc/config/ia64/__divxf3.S	[new file with mode: 0644]	patch \| blob
libgcc/config/ia64/__divxf3.asm	[deleted file]	patch \| blob \| history
libgcc/config/ia64/_fixtfdi.S	[new file with mode: 0644]	patch \| blob
libgcc/config/ia64/_fixtfdi.asm	[deleted file]	patch \| blob \| history
libgcc/config/ia64/_fixunstfdi.S	[new file with mode: 0644]	patch \| blob
libgcc/config/ia64/_fixunstfdi.asm	[deleted file]	patch \| blob \| history
libgcc/config/ia64/_floatditf.S	[new file with mode: 0644]	patch \| blob
libgcc/config/ia64/_floatditf.asm	[deleted file]	patch \| blob \| history
libgcc/config/ia64/lib1funcs.S	[new file with mode: 0644]	patch \| blob
libgcc/config/ia64/t-hpux		patch \| blob \| history
libgcc/config/ia64/t-ia64		patch \| blob \| history
libgcc/config/ia64/t-softfp-compat		patch \| blob \| history
libgcc/config/m32c/lib1funcs.S	[new file with mode: 0644]	patch \| blob
libgcc/config/m32c/t-m32c	[new file with mode: 0644]	patch \| blob
libgcc/config/m32r/initfini.c		patch \| blob \| history
libgcc/config/m68k/lb1sf68.S	[new file with mode: 0644]	patch \| blob
libgcc/config/m68k/t-floatlib	[new file with mode: 0644]	patch \| blob
libgcc/config/mcore/lib1funcs.S	[new file with mode: 0644]	patch \| blob
libgcc/config/mcore/t-mcore	[new file with mode: 0644]	patch \| blob
libgcc/config/mep/lib1funcs.S	[new file with mode: 0644]	patch \| blob
libgcc/config/mep/t-mep		patch \| blob \| history
libgcc/config/mips/mips16.S	[new file with mode: 0644]	patch \| blob
libgcc/config/mips/t-mips16		patch \| blob \| history
libgcc/config/pa/milli64.S	[new file with mode: 0644]	patch \| blob
libgcc/config/pa/t-linux	[new file with mode: 0644]	patch \| blob
libgcc/config/pa/t-linux64	[new file with mode: 0644]	patch \| blob
libgcc/config/picochip/lib1funcs.S	[new file with mode: 0644]	patch \| blob
libgcc/config/picochip/t-picochip		patch \| blob \| history
libgcc/config/sh/lib1funcs.S	[new file with mode: 0644]	patch \| blob
libgcc/config/sh/lib1funcs.h	[new file with mode: 0644]	patch \| blob
libgcc/config/sh/t-linux		patch \| blob \| history
libgcc/config/sh/t-netbsd	[new file with mode: 0644]	patch \| blob
libgcc/config/sh/t-sh		patch \| blob \| history
libgcc/config/sh/t-sh64	[new file with mode: 0644]	patch \| blob
libgcc/config/sparc/lb1spc.S	[new file with mode: 0644]	patch \| blob
libgcc/config/sparc/t-softmul		patch \| blob \| history
libgcc/config/v850/lib1funcs.S	[new file with mode: 0644]	patch \| blob
libgcc/config/v850/t-v850	[new file with mode: 0644]	patch \| blob
libgcc/config/vax/lib1funcs.S	[new file with mode: 0644]	patch \| blob
libgcc/config/vax/t-linux	[new file with mode: 0644]	patch \| blob
libgcc/config/xtensa/ieee754-df.S	[new file with mode: 0644]	patch \| blob
libgcc/config/xtensa/ieee754-sf.S	[new file with mode: 0644]	patch \| blob
libgcc/config/xtensa/lib1funcs.S	[new file with mode: 0644]	patch \| blob
libgcc/config/xtensa/t-xtensa		patch \| blob \| history