From: Rainer Orth Date: Wed, 2 Nov 2011 15:03:19 +0000 (+0000) Subject: Move libgcc1 to toplevel libgcc X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=45b86625d7edd2278c0cdcf335e007a47671813f;p=gcc.git Move libgcc1 to toplevel libgcc gcc: * Makefile.in (LIB1ASMSRC): Don't export. (libgcc.mvars): Don't emit LIB1ASMFUNCS, LIB1ASMSRC. * config/arm/arm.c: Update lib1funcs.asm filename. * config/arm/linux-eabi.h: Likewise. * config/arm/bpabi-v6m.S, config/arm/bpabi.S, config/arm/ieee754-df.S, config/arm/ieee754-sf.S: Move to ../libgcc/config/arm. * config/arm/lib1funcs.asm: Move to ../libgcc/config/arm/lib1funcs.S. * config/arm/t-arm (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/arm/t-arm-elf (LIB1ASMFUNCS): Remove. * config/arm/t-bpabi: Likewise. * config/arm/t-linux (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/arm/t-linux-eabi (LIB1ASMFUNCS): Remove. * config/arm/t-strongarm-elf: Likewise. * config/arm/t-symbian: Likewise. * config/arm/t-vxworks: Likewise. * config/arm/t-wince-pe: Likewise. * config/avr/libgcc.S: Move to ../libgcc/config/avr. * config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/bfin/lib1funcs.asm: Move to ../libgcc/config/bfin/lib1funcs.S. * config/bfin/t-bfin: Remove. * config/bfin/t-bfin-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/bfin/t-bfin-linux: Likewise. * config/bfin/t-bfin-uclinux: Likewise. * config/c6x/lib1funcs.asm: Move to ../libgcc/config/c6x/lib1funcs.S. * config/c6x/t-c6x-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/fr30/lib1funcs.asm: Move to ../libgcc/config/fr30/lib1funcs.S. * config/fr30/t-fr30 (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/frv/lib1funcs.asm: Move to ../libgcc/config/frv/lib1funcs.S. * config/frv/t-frv (CROSS_LIBGCC1, LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/h8300/fixunssfsi.c: Update lib1funcs.asm filename. * config/h8300/lib1funcs.asm: Move to ../libgcc/config/h8300/lib1funcs.S. * config/h8300/t-h8300 (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/i386/cygwin.asm: Move to ../libgcc/config/i386/cygwin.S. * config/i386/t-cygming (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/i386/t-interix: Likewise. * config/ia64/lib1funcs.asm: Move to ../libgcc/config/ia64/lib1funcs.S. * config/ia64/t-hpux (LIB1ASMFUNCS, LIBGCC1_TEST): Remove. * config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/iq2000/t-iq2000 (LIBGCC1, CROSS_LIBGCC1): Remove. * config/m32c/m32c.c: Update m32c-lib1.S filename. * config/m32c/m32c-lib1.S: Move to ../libgcc/config/m32c/lib1funcs.S. * config/m32c/t-m32c (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/m32r/t-linux (CROSS_LIBGCC1, LIBGCC1, LIBGCC1_TEST): Remove. * config/m68k/lb1sf68.asm: Move to ../libgcc/config/m68k/lb1sf68.S. * config/m68k/t-floatlib (LIB1ASMSRC, LIB1ASMFUNCS): New file. * config/mcore/lib1.asm: Move to ../libgcc/config/mcore/lib1funcs.S. * config/mcore/t-mcore (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/mep/mep-lib1.asm: Move to ../libgcc/config/mep/lib1funcs.S. * config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/mips/mips16.S: Move to ../libgcc/config/mips. * config/mips/t-libgcc-mips16: Remove. * config/mips/t-sr71k (LIBGCC1, CROSS_LIBGCC1): Remove. * config/pa/milli64.S: Move to ../libgcc/config/pa. * config/pa/t-linux (LIB1ASMFUNCS, LIB1ASMSRC): Remove. * config/pa/t-linux64: Likewise. * config/picochip/libgccExtras/fake_libgcc.asm: Move to ../libgcc/config/picochip/lib1funcs.S. * config/picochip/t-picochip (LIB1ASMFUNCS, LIB1ASMSRC): Remove. * config/sh/lib1funcs.asm: Move to ../libgcc/config/sh/lib1funcs.S. * config/sh/lib1funcs.h: Move to ../libgcc/config/sh. * config/sh/sh.h: Update lib1funcs.asm filename. * config/sh/t-linux (LIB1ASMFUNCS_CACHE): Remove. * config/sh/t-netbsd: Likewise. * config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE): Remove. * config/sh/t-sh64 (LIB1ASMFUNCS): Remove. * config/sparc/lb1spc.asm: Move to ../libgcc/config/sparc/lb1spc.S. * config/sparc/lb1spl.asm: Remove. * config/sparc/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/sparc/t-leon: Likewise. * config/spu/t-spu-elf (LIBGCC1, CROSS_LIBGCC1): Remove. * config/v850/lib1funcs.asm: Move to ../libgcc/config/v850/lib1funcs.S. * config/v850/t-v850 (LIB1ASMSRC, LIB1ASMFUNCS): Remove * config/vax/lib1funcs.asm: Move to ../libgcc/config/vax/lib1funcs.S. * config/vax/t-linux: Remove. * config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S: Move to ../libgcc/config/xtensa. * config/xtensa/lib1funcs.asm: Move to ../libgcc/config/xtensa/lib1funcs.S. * config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config.gcc (bfin*-rtems*): Remove bfin/t-bfin from tmake_file. (bfin*-*): Likewise. (mips64*-*-linux*, mipsisa64*-*-linux*): Remove mips/t-libgcc-mips16 from tmake_file. (mips*-*-linux*): Likewise. (mips*-sde-elf*): Likewise. (mipsisa32-*-elf*, mipsisa32el-*-elf*, mipsisa32r2-*-elf*) (mipsisa32r2el-*-elf*, mipsisa64-*-elf*, mipsisa64el-*-elf*) (mipsisa64r2-*-elf*, mipsisa64r2el-*-elf*): Likewise. (mipsisa64sb1-*-elf*, mipsisa64sb1el-*-elf*): Likewise. (mips-*-elf*, mipsel-*-elf*): Likewise. (mips64-*-elf*, mips64el-*-elf*): Likewise. (mips64orion-*-elf*, mips64orionel-*-elf*): Likewise. (mips*-*-rtems*): Likewise. (mipstx39-*-elf*, mipstx39el-*-elf*): Likewise. (vax-*-linux*): Remove vax/t-linux from tmake_file. libgcc: * Makefile.in ($(lib1asmfuncs-o), $(lib1asmfuncs-s-o)): Use $(srcdir) to refer to $(LIB1ASMSRC). Use $<. * config/arm/bpabi-v6m.S, config/arm/bpabi.S, config/arm/ieee754-df.S, config/arm/ieee754-sf.S, config/arm/lib1funcs.S: New files. * config/arm/libunwind.S [!__symbian__]: Use lib1funcs.S. * config/arm/t-arm: New file. * config/arm/t-bpabi (LIB1ASMFUNCS): Set. * config/arm/t-elf, config/arm/t-linux, config/arm/t-linux-eabi, config/arm/t-strongarm-elf: New files. * config/arm/t-symbian (LIB1ASMFUNCS): Set. * config/arm/t-vxworks, config/arm/t-wince-pe: New files. * config/avr/lib1funcs.S: New file. * config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/bfin/lib1funcs.S, config/bfin/t-bfin: New files. * config/c6x/lib1funcs.S: New file. * config/c6x/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/fr30/lib1funcs.S, config/fr30/t-fr30: New files. * config/frv/lib1funcs.S: New file. * config/frv/t-frv (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/h8300/lib1funcs.S, config/h8300/t-h8300: New files. * config/i386/cygwin.S, config/i386/t-chkstk: New files. * config/ia64/__divxf3.asm: Rename to ... * config/ia64/__divxf3.S: ... this. Adapt lib1funcs.asm filename. * config/ia64/_fixtfdi.asm: Rename to ... * config/ia64/_fixtfdi.S: ... this. Adapt lib1funcs.asm filename. * config/ia64/_fixunstfdi.asm: Rename to ... * config/ia64/_fixunstfdi.S: ... this. Adapt lib1funcs.asm filename. * config/ia64/_floatditf.asm: Rename to ... * config/ia64/_floatditf.S: ... this. Adapt lib1funcs.asm filename. * config/ia64/lib1funcs.S: New file. * config/ia64/t-hpux (LIB1ASMFUNCS): Set. * config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/ia64/t-softfp-compat (libgcc1-tf-compats): Adapt suffix. * config/m32c/lib1funcs.S, config/m32c/t-m32c: New files. * config/m68k/lb1sf68.S, config/m68k/t-floatlib: New files. * config/mcore/lib1funcs.S, config/mcore/t-mcore: New files. * config/mep/lib1funcs.S: New file. * config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/mips/mips16.S: New file. * config/mips/t-mips16 (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/pa/milli64.S: New file. * config/pa/t-linux, config/pa/t-linux64: New files. * config/picochip/lib1funcs.S: New file. * config/picochip/t-picochip (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/sh/lib1funcs.S, config/sh/lib1funcs.h: New files. * config/sh/t-linux (LIB1ASMFUNCS_CACHE): Set. * config/sh/t-netbsd: New file. * config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE): Set. Use $(srcdir) to refer to lib1funcs.S, adapt filename. * config/sh/t-sh64: New file. * config/sparc/lb1spc.S: New file. * config/sparc/t-softmul (LIB1ASMSRC): Adapt sparc/lb1spc.asm filename. * config/v850/lib1funcs.S, config/v850/t-v850: New files. * config/vax/lib1funcs.S, config/vax/t-linux: New files. * config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S, config/xtensa/lib1funcs.S: New files. * config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config.host (arm-wrs-vxworks): Add arm/t-arm, arm/t-vxworks to tmake_file. (arm*-*-freebsd*): Add arm/t-arm, arm/t-strongarm-elf to tmake_file. (arm*-*-netbsdelf*): Add arm/t-arm to tmake_file. (arm*-*-linux*): Likewise. Add arm/t-elf, arm/t-bpabi, arm/t-linux-eabi to tmake_file for arm*-*-linux-*eabi, add arm/t-linux otherwise. (arm*-*-uclinux*): Add arm/t-arm, arm/t-elf to tmake_file. (arm*-*-ecos-elf): Likewise. (arm*-*-eabi*, arm*-*-symbianelf*): Likewise. (arm*-*-rtems*): Likewise. (arm*-*-elf): Likewise. (arm*-wince-pe*): Add arm/t-arm, arm/t-wince-pe to tmake_file. (avr-*-rtems*): Add to tmake_file, add avr/t-avr. (bfin*-elf*): Add bfin/t-bfin to tmake_file. (bfin*-uclinux*): Likewise. (bfin*-linux-uclibc*): Likewise. (bfin*-rtems*): Likewise. (bfin*-*): Likewise. (fido-*-elf): Merge into m68k-*-elf*. (fr30-*-elf)): Add fr30/t-fr30 to tmake_file. (frv-*-*linux*): Add frv/t-frv to tmake_file. (h8300-*-rtems*): Add h8300/t-h8300 to tmake_file. (h8300-*-elf*): Likewise. (hppa*64*-*-linux*): Add pa/t-linux, pa/t-linux64 to tmake_file. (hppa*-*-linux*): Add pa/t-linux to tmake_file. (i[34567]86-*-cygwin*): Add i386/t-chkstk to tmake_file. (i[34567]86-*-mingw*): Likewise. (x86_64-*-mingw*): Likewise. (i[34567]86-*-interix3*): Likewise. (ia64*-*-hpux*): Add ia64/t-ia64, ia64/t-hpux to tmake_file. (ia64-hp-*vms*): Add ia64/t-ia64 to tmake_file. (m68k-*-elf*): Also handle fido-*-elf. Add m68k/t-floatlib to tmake_file. (m68k-*-uclinux*): Add m68k/t-floatlib to tmake_file. (m68k-*-linux*): Likewise. (m68k-*-rtems*): Likewise. (mcore-*-elf): Add mcore/t-mcore to tmake_file. (sh-*-elf*, sh[12346l]*-*-elf*): Add sh/t-sh64 to tmake_file for sh64*-*-*. (sh-*-linux*, sh[2346lbe]*-*-linux*): Add sh/t-sh to tmake_file. Add sh/t-sh64 to tmake_file for sh64*-*-linux*. (sh-*-netbsdelf*, shl*-*-netbsdelf*, sh5-*-netbsd*) (sh5l*-*-netbsd*, sh64-*-netbsd*, sh64l*-*-netbsd*): Add sh/t-sh, sh/t-netbsd to tmake_file. Add sh/t-sh64 to tmake_file for sh5*-*-netbsd*, sh64*-netbsd*. (sh-*-rtems*): Add sh/t-sh to tmake_file. (sh-wrs-vxworks): Likewise. (sparc-*-linux*): Add sparc/t-softmul to tmake_file except for *-leon[3-9]*. (v850*-*-*): Add v850/t-v850 to tmake_file. (vax-*-linux*): Add vax/t-linux to tmake_file. (m32c-*-elf*, m32c-*-rtems*): Add m32c/t-m32c to tmake_file. From-SVN: r180773 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5c3a91da561..071cce6c29c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,109 @@ +2011-11-02 Rainer Orth + + * Makefile.in (LIB1ASMSRC): Don't export. + (libgcc.mvars): Don't emit LIB1ASMFUNCS, LIB1ASMSRC. + * config/arm/arm.c: Update lib1funcs.asm filename. + * config/arm/linux-eabi.h: Likewise. + * config/arm/bpabi-v6m.S, config/arm/bpabi.S, + config/arm/ieee754-df.S, config/arm/ieee754-sf.S: Move to + ../libgcc/config/arm. + * config/arm/lib1funcs.asm: Move to ../libgcc/config/arm/lib1funcs.S. + * config/arm/t-arm (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/arm/t-arm-elf (LIB1ASMFUNCS): Remove. + * config/arm/t-bpabi: Likewise. + * config/arm/t-linux (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/arm/t-linux-eabi (LIB1ASMFUNCS): Remove. + * config/arm/t-strongarm-elf: Likewise. + * config/arm/t-symbian: Likewise. + * config/arm/t-vxworks: Likewise. + * config/arm/t-wince-pe: Likewise. + * config/avr/libgcc.S: Move to ../libgcc/config/avr. + * config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/bfin/lib1funcs.asm: Move to + ../libgcc/config/bfin/lib1funcs.S. + * config/bfin/t-bfin: Remove. + * config/bfin/t-bfin-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/bfin/t-bfin-linux: Likewise. + * config/bfin/t-bfin-uclinux: Likewise. + * config/c6x/lib1funcs.asm: Move to + ../libgcc/config/c6x/lib1funcs.S. + * config/c6x/t-c6x-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/fr30/lib1funcs.asm: Move to + ../libgcc/config/fr30/lib1funcs.S. + * config/fr30/t-fr30 (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/frv/lib1funcs.asm: Move to + ../libgcc/config/frv/lib1funcs.S. + * config/frv/t-frv (CROSS_LIBGCC1, LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/h8300/fixunssfsi.c: Update lib1funcs.asm filename. + * config/h8300/lib1funcs.asm: Move to + ../libgcc/config/h8300/lib1funcs.S. + * config/h8300/t-h8300 (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/i386/cygwin.asm: Move to ../libgcc/config/i386/cygwin.S. + * config/i386/t-cygming (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/i386/t-interix: Likewise. + * config/ia64/lib1funcs.asm: Move to + ../libgcc/config/ia64/lib1funcs.S. + * config/ia64/t-hpux (LIB1ASMFUNCS, LIBGCC1_TEST): Remove. + * config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/iq2000/t-iq2000 (LIBGCC1, CROSS_LIBGCC1): Remove. + * config/m32c/m32c.c: Update m32c-lib1.S filename. + * config/m32c/m32c-lib1.S: Move to ../libgcc/config/m32c/lib1funcs.S. + * config/m32c/t-m32c (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/m32r/t-linux (CROSS_LIBGCC1, LIBGCC1, LIBGCC1_TEST): Remove. + * config/m68k/lb1sf68.asm: Move to ../libgcc/config/m68k/lb1sf68.S. + * config/m68k/t-floatlib (LIB1ASMSRC, LIB1ASMFUNCS): New file. + * config/mcore/lib1.asm: Move to ../libgcc/config/mcore/lib1funcs.S. + * config/mcore/t-mcore (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/mep/mep-lib1.asm: Move to ../libgcc/config/mep/lib1funcs.S. + * config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/mips/mips16.S: Move to ../libgcc/config/mips. + * config/mips/t-libgcc-mips16: Remove. + * config/mips/t-sr71k (LIBGCC1, CROSS_LIBGCC1): Remove. + * config/pa/milli64.S: Move to ../libgcc/config/pa. + * config/pa/t-linux (LIB1ASMFUNCS, LIB1ASMSRC): Remove. + * config/pa/t-linux64: Likewise. + * config/picochip/libgccExtras/fake_libgcc.asm: Move to + ../libgcc/config/picochip/lib1funcs.S. + * config/picochip/t-picochip (LIB1ASMFUNCS, LIB1ASMSRC): Remove. + * config/sh/lib1funcs.asm: Move to ../libgcc/config/sh/lib1funcs.S. + * config/sh/lib1funcs.h: Move to ../libgcc/config/sh. + * config/sh/sh.h: Update lib1funcs.asm filename. + * config/sh/t-linux (LIB1ASMFUNCS_CACHE): Remove. + * config/sh/t-netbsd: Likewise. + * config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE): + Remove. + * config/sh/t-sh64 (LIB1ASMFUNCS): Remove. + * config/sparc/lb1spc.asm: Move to ../libgcc/config/sparc/lb1spc.S. + * config/sparc/lb1spl.asm: Remove. + * config/sparc/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/sparc/t-leon: Likewise. + * config/spu/t-spu-elf (LIBGCC1, CROSS_LIBGCC1): Remove. + * config/v850/lib1funcs.asm: Move to ../libgcc/config/v850/lib1funcs.S. + * config/v850/t-v850 (LIB1ASMSRC, LIB1ASMFUNCS): Remove + * config/vax/lib1funcs.asm: Move to ../libgcc/config/vax/lib1funcs.S. + * config/vax/t-linux: Remove. + * config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S: Move to + ../libgcc/config/xtensa. + * config/xtensa/lib1funcs.asm: Move to + ../libgcc/config/xtensa/lib1funcs.S. + * config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config.gcc (bfin*-rtems*): Remove bfin/t-bfin from tmake_file. + (bfin*-*): Likewise. + (mips64*-*-linux*, mipsisa64*-*-linux*): Remove + mips/t-libgcc-mips16 from tmake_file. + (mips*-*-linux*): Likewise. + (mips*-sde-elf*): Likewise. + (mipsisa32-*-elf*, mipsisa32el-*-elf*, mipsisa32r2-*-elf*) + (mipsisa32r2el-*-elf*, mipsisa64-*-elf*, mipsisa64el-*-elf*) + (mipsisa64r2-*-elf*, mipsisa64r2el-*-elf*): Likewise. + (mipsisa64sb1-*-elf*, mipsisa64sb1el-*-elf*): Likewise. + (mips-*-elf*, mipsel-*-elf*): Likewise. + (mips64-*-elf*, mips64el-*-elf*): Likewise. + (mips64orion-*-elf*, mips64orionel-*-elf*): Likewise. + (mips*-*-rtems*): Likewise. + (mipstx39-*-elf*, mipstx39el-*-elf*): Likewise. + (vax-*-linux*): Remove vax/t-linux from tmake_file. + 2011-11-02 Rainer Orth * config.gcc (extra_parts): Remove. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index b6951dc1486..38449d7c30e 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1110,7 +1110,6 @@ export DESTDIR export GCC_FOR_TARGET export INCLUDES export INSTALL_DATA -export LIB1ASMSRC export LIBGCC2_CFLAGS export LIPO_FOR_TARGET export MACHMODE_H @@ -1878,8 +1877,6 @@ libgcc-support: libgcc.mvars stmp-int-hdrs $(TCONFIG_H) \ libgcc.mvars: config.status Makefile $(LIB2ADD) $(LIB2ADD_ST) specs \ xgcc$(exeext) : > tmp-libgcc.mvars - echo LIB1ASMFUNCS = '$(LIB1ASMFUNCS)' >> tmp-libgcc.mvars - echo LIB1ASMSRC = '$(LIB1ASMSRC)' >> tmp-libgcc.mvars echo LIB2FUNCS_ST = '$(LIB2FUNCS_ST)' >> tmp-libgcc.mvars echo LIB2FUNCS_EXCLUDE = '$(LIB2FUNCS_EXCLUDE)' >> tmp-libgcc.mvars echo LIB2ADD = '$(call srcdirify,$(LIB2ADD))' >> tmp-libgcc.mvars diff --git a/gcc/config.gcc b/gcc/config.gcc index 6bbec7db39e..79230a6935a 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -950,11 +950,10 @@ bfin*-linux-uclibc*) ;; bfin*-rtems*) tm_file="${tm_file} dbxelf.h elfos.h bfin/elf.h bfin/rtems.h rtems.h newlib-stdint.h" - tmake_file="bfin/t-bfin t-rtems bfin/t-rtems" + tmake_file="t-rtems bfin/t-rtems" ;; bfin*-*) tm_file="${tm_file} dbxelf.h elfos.h newlib-stdint.h bfin/elf.h" - tmake_file=bfin/t-bfin use_collect2=no use_gcc_stdint=wrap ;; @@ -1737,7 +1736,7 @@ mips*-*-netbsd*) # NetBSD/mips, either endian. ;; mips64*-*-linux* | mipsisa64*-*-linux*) tm_file="dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h ${tm_file} mips/gnu-user.h mips/gnu-user64.h mips/linux64.h" - tmake_file="${tmake_file} mips/t-linux64 mips/t-libgcc-mips16" + tmake_file="${tmake_file} mips/t-linux64" tm_defines="${tm_defines} MIPS_ABI_DEFAULT=ABI_N32" case ${target} in mips64el-st-linux-gnu) @@ -1758,7 +1757,6 @@ mips64*-*-linux* | mipsisa64*-*-linux*) ;; mips*-*-linux*) # Linux MIPS, either endian. tm_file="dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h ${tm_file} mips/gnu-user.h mips/linux.h" - tmake_file="${tmake_file} mips/t-libgcc-mips16" if test x$enable_targets = xall; then tm_file="${tm_file} mips/gnu-user64.h mips/linux64.h" tmake_file="${tmake_file} mips/t-linux64" @@ -1785,7 +1783,7 @@ mips*-*-openbsd*) ;; mips*-sde-elf*) tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h mips/sde.h" - tmake_file="mips/t-sde mips/t-libgcc-mips16" + tmake_file="mips/t-sde" extra_options="${extra_options} mips/sde.opt" case "${with_newlib}" in yes) @@ -1822,7 +1820,7 @@ mipsisa32r2-*-elf* | mipsisa32r2el-*-elf* | \ mipsisa64-*-elf* | mipsisa64el-*-elf* | \ mipsisa64r2-*-elf* | mipsisa64r2el-*-elf*) tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h" - tmake_file="mips/t-isa3264 mips/t-libgcc-mips16" + tmake_file="mips/t-isa3264" case ${target} in mipsisa32r2*) tm_defines="${tm_defines} MIPS_ISA_DEFAULT=33" @@ -1859,17 +1857,17 @@ mipsisa64sr71k-*-elf*) ;; mipsisa64sb1-*-elf* | mipsisa64sb1el-*-elf*) tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h" - tmake_file="mips/t-elf mips/t-libgcc-mips16 mips/t-sb1" + tmake_file="mips/t-elf mips/t-sb1" target_cpu_default="MASK_64BIT|MASK_FLOAT64" tm_defines="${tm_defines} MIPS_ISA_DEFAULT=64 MIPS_CPU_STRING_DEFAULT=\\\"sb1\\\" MIPS_ABI_DEFAULT=ABI_O64" ;; mips-*-elf* | mipsel-*-elf*) tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h" - tmake_file="mips/t-elf mips/t-libgcc-mips16" + tmake_file="mips/t-elf" ;; mips64-*-elf* | mips64el-*-elf*) tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h" - tmake_file="mips/t-elf mips/t-libgcc-mips16" + tmake_file="mips/t-elf" target_cpu_default="MASK_64BIT|MASK_FLOAT64" tm_defines="${tm_defines} MIPS_ISA_DEFAULT=3 MIPS_ABI_DEFAULT=ABI_O64" ;; @@ -1880,13 +1878,13 @@ mips64vr-*-elf* | mips64vrel-*-elf*) ;; mips64orion-*-elf* | mips64orionel-*-elf*) tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elforion.h mips/elf.h" - tmake_file="mips/t-elf mips/t-libgcc-mips16" + tmake_file="mips/t-elf" target_cpu_default="MASK_64BIT|MASK_FLOAT64" tm_defines="${tm_defines} MIPS_ISA_DEFAULT=3 MIPS_ABI_DEFAULT=ABI_O64" ;; mips*-*-rtems*) tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h mips/rtems.h rtems.h" - tmake_file="mips/t-elf mips/t-libgcc-mips16 t-rtems mips/t-rtems" + tmake_file="mips/t-elf t-rtems mips/t-rtems" ;; mips-wrs-vxworks) tm_file="elfos.h ${tm_file} mips/elf.h vx-common.h vxworks.h mips/vxworks.h" @@ -1894,7 +1892,7 @@ mips-wrs-vxworks) ;; mipstx39-*-elf* | mipstx39el-*-elf*) tm_file="elfos.h newlib-stdint.h ${tm_file} mips/r3900.h mips/elf.h" - tmake_file="mips/t-r3900 mips/t-libgcc-mips16" + tmake_file="mips/t-r3900" ;; mmix-knuth-mmixware) tm_file="${tm_file} newlib-stdint.h" @@ -2511,7 +2509,6 @@ v850*-*-*) vax-*-linux*) tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h vax/elf.h vax/linux.h" extra_options="${extra_options} vax/elf.opt" - tmake_file="${tmake_file} vax/t-linux" ;; vax-*-netbsdelf*) tm_file="${tm_file} elfos.h netbsd.h netbsd-elf.h vax/elf.h vax/netbsd-elf.h" diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index e07c8c328c6..5f0d5629462 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -23495,7 +23495,7 @@ arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED) /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal ARM insns and therefore guarantee that the shift count is modulo 256. - DImode shifts (those implemented by lib1funcs.asm or by optabs.c) + DImode shifts (those implemented by lib1funcs.S or by optabs.c) guarantee no particular behavior for out-of-range counts. */ static unsigned HOST_WIDE_INT diff --git a/gcc/config/arm/bpabi-v6m.S b/gcc/config/arm/bpabi-v6m.S deleted file mode 100644 index 4ecea6da5a6..00000000000 --- a/gcc/config/arm/bpabi-v6m.S +++ /dev/null @@ -1,318 +0,0 @@ -/* Miscellaneous BPABI functions. ARMv6M implementation - - Copyright (C) 2006, 2008, 2009, 2010 Free Software Foundation, Inc. - Contributed by CodeSourcery. - - This file is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 3, or (at your option) any - later version. - - This file is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#ifdef __ARM_EABI__ -/* Some attributes that are common to all routines in this file. */ - /* Tag_ABI_align_needed: This code does not require 8-byte - alignment from the caller. */ - /* .eabi_attribute 24, 0 -- default setting. */ - /* Tag_ABI_align_preserved: This code preserves 8-byte - alignment in any callee. */ - .eabi_attribute 25, 1 -#endif /* __ARM_EABI__ */ - -#ifdef L_aeabi_lcmp - -FUNC_START aeabi_lcmp - cmp xxh, yyh - beq 1f - bgt 2f - mov r0, #1 - neg r0, r0 - RET -2: - mov r0, #1 - RET -1: - sub r0, xxl, yyl - beq 1f - bhi 2f - mov r0, #1 - neg r0, r0 - RET -2: - mov r0, #1 -1: - RET - FUNC_END aeabi_lcmp - -#endif /* L_aeabi_lcmp */ - -#ifdef L_aeabi_ulcmp - -FUNC_START aeabi_ulcmp - cmp xxh, yyh - bne 1f - sub r0, xxl, yyl - beq 2f -1: - bcs 1f - mov r0, #1 - neg r0, r0 - RET -1: - mov r0, #1 -2: - RET - FUNC_END aeabi_ulcmp - -#endif /* L_aeabi_ulcmp */ - -.macro test_div_by_zero signed - cmp yyh, #0 - bne 7f - cmp yyl, #0 - bne 7f - cmp xxh, #0 - bne 2f - cmp xxl, #0 -2: - .ifc \signed, unsigned - beq 3f - mov xxh, #0 - mvn xxh, xxh @ 0xffffffff - mov xxl, xxh -3: - .else - beq 5f - blt 6f - mov xxl, #0 - mvn xxl, xxl @ 0xffffffff - lsr xxh, xxl, #1 @ 0x7fffffff - b 5f -6: mov xxh, #0x80 - lsl xxh, xxh, #24 @ 0x80000000 - mov xxl, #0 -5: - .endif - @ tailcalls are tricky on v6-m. - push {r0, r1, r2} - ldr r0, 1f - adr r1, 1f - add r0, r1 - str r0, [sp, #8] - @ We know we are not on armv4t, so pop pc is safe. - pop {r0, r1, pc} - .align 2 -1: - .word __aeabi_ldiv0 - 1b -7: -.endm - -#ifdef L_aeabi_ldivmod - -FUNC_START aeabi_ldivmod - test_div_by_zero signed - - push {r0, r1} - mov r0, sp - push {r0, lr} - ldr r0, [sp, #8] - bl SYM(__gnu_ldivmod_helper) - ldr r3, [sp, #4] - mov lr, r3 - add sp, sp, #8 - pop {r2, r3} - RET - FUNC_END aeabi_ldivmod - -#endif /* L_aeabi_ldivmod */ - -#ifdef L_aeabi_uldivmod - -FUNC_START aeabi_uldivmod - test_div_by_zero unsigned - - push {r0, r1} - mov r0, sp - push {r0, lr} - ldr r0, [sp, #8] - bl SYM(__gnu_uldivmod_helper) - ldr r3, [sp, #4] - mov lr, r3 - add sp, sp, #8 - pop {r2, r3} - RET - FUNC_END aeabi_uldivmod - -#endif /* L_aeabi_uldivmod */ - -#ifdef L_arm_addsubsf3 - -FUNC_START aeabi_frsub - - push {r4, lr} - mov r4, #1 - lsl r4, #31 - eor r0, r0, r4 - bl __aeabi_fadd - pop {r4, pc} - - FUNC_END aeabi_frsub - -#endif /* L_arm_addsubsf3 */ - -#ifdef L_arm_cmpsf2 - -FUNC_START aeabi_cfrcmple - - mov ip, r0 - mov r0, r1 - mov r1, ip - b 6f - -FUNC_START aeabi_cfcmpeq -FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq - - @ The status-returning routines are required to preserve all - @ registers except ip, lr, and cpsr. -6: push {r0, r1, r2, r3, r4, lr} - bl __lesf2 - @ Set the Z flag correctly, and the C flag unconditionally. - cmp r0, #0 - @ Clear the C flag if the return value was -1, indicating - @ that the first operand was smaller than the second. - bmi 1f - mov r1, #0 - cmn r0, r1 -1: - pop {r0, r1, r2, r3, r4, pc} - - FUNC_END aeabi_cfcmple - FUNC_END aeabi_cfcmpeq - FUNC_END aeabi_cfrcmple - -FUNC_START aeabi_fcmpeq - - push {r4, lr} - bl __eqsf2 - neg r0, r0 - add r0, r0, #1 - pop {r4, pc} - - FUNC_END aeabi_fcmpeq - -.macro COMPARISON cond, helper, mode=sf2 -FUNC_START aeabi_fcmp\cond - - push {r4, lr} - bl __\helper\mode - cmp r0, #0 - b\cond 1f - mov r0, #0 - pop {r4, pc} -1: - mov r0, #1 - pop {r4, pc} - - FUNC_END aeabi_fcmp\cond -.endm - -COMPARISON lt, le -COMPARISON le, le -COMPARISON gt, ge -COMPARISON ge, ge - -#endif /* L_arm_cmpsf2 */ - -#ifdef L_arm_addsubdf3 - -FUNC_START aeabi_drsub - - push {r4, lr} - mov r4, #1 - lsl r4, #31 - eor xxh, xxh, r4 - bl __aeabi_dadd - pop {r4, pc} - - FUNC_END aeabi_drsub - -#endif /* L_arm_addsubdf3 */ - -#ifdef L_arm_cmpdf2 - -FUNC_START aeabi_cdrcmple - - mov ip, r0 - mov r0, r2 - mov r2, ip - mov ip, r1 - mov r1, r3 - mov r3, ip - b 6f - -FUNC_START aeabi_cdcmpeq -FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq - - @ The status-returning routines are required to preserve all - @ registers except ip, lr, and cpsr. -6: push {r0, r1, r2, r3, r4, lr} - bl __ledf2 - @ Set the Z flag correctly, and the C flag unconditionally. - cmp r0, #0 - @ Clear the C flag if the return value was -1, indicating - @ that the first operand was smaller than the second. - bmi 1f - mov r1, #0 - cmn r0, r1 -1: - pop {r0, r1, r2, r3, r4, pc} - - FUNC_END aeabi_cdcmple - FUNC_END aeabi_cdcmpeq - FUNC_END aeabi_cdrcmple - -FUNC_START aeabi_dcmpeq - - push {r4, lr} - bl __eqdf2 - neg r0, r0 - add r0, r0, #1 - pop {r4, pc} - - FUNC_END aeabi_dcmpeq - -.macro COMPARISON cond, helper, mode=df2 -FUNC_START aeabi_dcmp\cond - - push {r4, lr} - bl __\helper\mode - cmp r0, #0 - b\cond 1f - mov r0, #0 - pop {r4, pc} -1: - mov r0, #1 - pop {r4, pc} - - FUNC_END aeabi_dcmp\cond -.endm - -COMPARISON lt, le -COMPARISON le, le -COMPARISON gt, ge -COMPARISON ge, ge - -#endif /* L_arm_cmpdf2 */ diff --git a/gcc/config/arm/bpabi.S b/gcc/config/arm/bpabi.S deleted file mode 100644 index 2ff338927fa..00000000000 --- a/gcc/config/arm/bpabi.S +++ /dev/null @@ -1,163 +0,0 @@ -/* Miscellaneous BPABI functions. - - Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010 - Free Software Foundation, Inc. - Contributed by CodeSourcery, LLC. - - This file is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 3, or (at your option) any - later version. - - This file is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#ifdef __ARM_EABI__ -/* Some attributes that are common to all routines in this file. */ - /* Tag_ABI_align_needed: This code does not require 8-byte - alignment from the caller. */ - /* .eabi_attribute 24, 0 -- default setting. */ - /* Tag_ABI_align_preserved: This code preserves 8-byte - alignment in any callee. */ - .eabi_attribute 25, 1 -#endif /* __ARM_EABI__ */ - -#ifdef L_aeabi_lcmp - -ARM_FUNC_START aeabi_lcmp - cmp xxh, yyh - do_it lt - movlt r0, #-1 - do_it gt - movgt r0, #1 - do_it ne - RETc(ne) - subs r0, xxl, yyl - do_it lo - movlo r0, #-1 - do_it hi - movhi r0, #1 - RET - FUNC_END aeabi_lcmp - -#endif /* L_aeabi_lcmp */ - -#ifdef L_aeabi_ulcmp - -ARM_FUNC_START aeabi_ulcmp - cmp xxh, yyh - do_it lo - movlo r0, #-1 - do_it hi - movhi r0, #1 - do_it ne - RETc(ne) - cmp xxl, yyl - do_it lo - movlo r0, #-1 - do_it hi - movhi r0, #1 - do_it eq - moveq r0, #0 - RET - FUNC_END aeabi_ulcmp - -#endif /* L_aeabi_ulcmp */ - -.macro test_div_by_zero signed -/* Tail-call to divide-by-zero handlers which may be overridden by the user, - so unwinding works properly. */ -#if defined(__thumb2__) - cbnz yyh, 1f - cbnz yyl, 1f - cmp xxh, #0 - do_it eq - cmpeq xxl, #0 - .ifc \signed, unsigned - beq 2f - mov xxh, #0xffffffff - mov xxl, xxh -2: - .else - do_it lt, t - movlt xxl, #0 - movlt xxh, #0x80000000 - do_it gt, t - movgt xxh, #0x7fffffff - movgt xxl, #0xffffffff - .endif - b SYM (__aeabi_ldiv0) __PLT__ -1: -#else - /* Note: Thumb-1 code calls via an ARM shim on processors which - support ARM mode. */ - cmp yyh, #0 - cmpeq yyl, #0 - bne 2f - cmp xxh, #0 - cmpeq xxl, #0 - .ifc \signed, unsigned - movne xxh, #0xffffffff - movne xxl, #0xffffffff - .else - movlt xxh, #0x80000000 - movlt xxl, #0 - movgt xxh, #0x7fffffff - movgt xxl, #0xffffffff - .endif - b SYM (__aeabi_ldiv0) __PLT__ -2: -#endif -.endm - -#ifdef L_aeabi_ldivmod - -ARM_FUNC_START aeabi_ldivmod - test_div_by_zero signed - - sub sp, sp, #8 -#if defined(__thumb2__) - mov ip, sp - push {ip, lr} -#else - do_push {sp, lr} -#endif - bl SYM(__gnu_ldivmod_helper) __PLT__ - ldr lr, [sp, #4] - add sp, sp, #8 - do_pop {r2, r3} - RET - -#endif /* L_aeabi_ldivmod */ - -#ifdef L_aeabi_uldivmod - -ARM_FUNC_START aeabi_uldivmod - test_div_by_zero unsigned - - sub sp, sp, #8 -#if defined(__thumb2__) - mov ip, sp - push {ip, lr} -#else - do_push {sp, lr} -#endif - bl SYM(__gnu_uldivmod_helper) __PLT__ - ldr lr, [sp, #4] - add sp, sp, #8 - do_pop {r2, r3} - RET - -#endif /* L_aeabi_divmod */ - diff --git a/gcc/config/arm/ieee754-df.S b/gcc/config/arm/ieee754-df.S deleted file mode 100644 index eb0c38632d0..00000000000 --- a/gcc/config/arm/ieee754-df.S +++ /dev/null @@ -1,1447 +0,0 @@ -/* ieee754-df.S double-precision floating point support for ARM - - Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc. - Contributed by Nicolas Pitre (nico@cam.org) - - This file is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 3, or (at your option) any - later version. - - This file is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -/* - * Notes: - * - * The goal of this code is to be as fast as possible. This is - * not meant to be easy to understand for the casual reader. - * For slightly simpler code please see the single precision version - * of this file. - * - * Only the default rounding mode is intended for best performances. - * Exceptions aren't supported yet, but that can be added quite easily - * if necessary without impacting performances. - */ - - -@ For FPA, float words are always big-endian. -@ For VFP, floats words follow the memory system mode. -#if defined(__VFP_FP__) && !defined(__ARMEB__) -#define xl r0 -#define xh r1 -#define yl r2 -#define yh r3 -#else -#define xh r0 -#define xl r1 -#define yh r2 -#define yl r3 -#endif - - -#ifdef L_arm_negdf2 - -ARM_FUNC_START negdf2 -ARM_FUNC_ALIAS aeabi_dneg negdf2 - - @ flip sign bit - eor xh, xh, #0x80000000 - RET - - FUNC_END aeabi_dneg - FUNC_END negdf2 - -#endif - -#ifdef L_arm_addsubdf3 - -ARM_FUNC_START aeabi_drsub - - eor xh, xh, #0x80000000 @ flip sign bit of first arg - b 1f - -ARM_FUNC_START subdf3 -ARM_FUNC_ALIAS aeabi_dsub subdf3 - - eor yh, yh, #0x80000000 @ flip sign bit of second arg -#if defined(__INTERWORKING_STUBS__) - b 1f @ Skip Thumb-code prologue -#endif - -ARM_FUNC_START adddf3 -ARM_FUNC_ALIAS aeabi_dadd adddf3 - -1: do_push {r4, r5, lr} - - @ Look for zeroes, equal values, INF, or NAN. - shift1 lsl, r4, xh, #1 - shift1 lsl, r5, yh, #1 - teq r4, r5 - do_it eq - teqeq xl, yl - do_it ne, ttt - COND(orr,s,ne) ip, r4, xl - COND(orr,s,ne) ip, r5, yl - COND(mvn,s,ne) ip, r4, asr #21 - COND(mvn,s,ne) ip, r5, asr #21 - beq LSYM(Lad_s) - - @ Compute exponent difference. Make largest exponent in r4, - @ corresponding arg in xh-xl, and positive exponent difference in r5. - shift1 lsr, r4, r4, #21 - rsbs r5, r4, r5, lsr #21 - do_it lt - rsblt r5, r5, #0 - ble 1f - add r4, r4, r5 - eor yl, xl, yl - eor yh, xh, yh - eor xl, yl, xl - eor xh, yh, xh - eor yl, xl, yl - eor yh, xh, yh -1: - @ If exponent difference is too large, return largest argument - @ already in xh-xl. We need up to 54 bit to handle proper rounding - @ of 0x1p54 - 1.1. - cmp r5, #54 - do_it hi - RETLDM "r4, r5" hi - - @ Convert mantissa to signed integer. - tst xh, #0x80000000 - mov xh, xh, lsl #12 - mov ip, #0x00100000 - orr xh, ip, xh, lsr #12 - beq 1f -#if defined(__thumb2__) - negs xl, xl - sbc xh, xh, xh, lsl #1 -#else - rsbs xl, xl, #0 - rsc xh, xh, #0 -#endif -1: - tst yh, #0x80000000 - mov yh, yh, lsl #12 - orr yh, ip, yh, lsr #12 - beq 1f -#if defined(__thumb2__) - negs yl, yl - sbc yh, yh, yh, lsl #1 -#else - rsbs yl, yl, #0 - rsc yh, yh, #0 -#endif -1: - @ If exponent == difference, one or both args were denormalized. - @ Since this is not common case, rescale them off line. - teq r4, r5 - beq LSYM(Lad_d) -LSYM(Lad_x): - - @ Compensate for the exponent overlapping the mantissa MSB added later - sub r4, r4, #1 - - @ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip. - rsbs lr, r5, #32 - blt 1f - shift1 lsl, ip, yl, lr - shiftop adds xl xl yl lsr r5 yl - adc xh, xh, #0 - shiftop adds xl xl yh lsl lr yl - shiftop adcs xh xh yh asr r5 yh - b 2f -1: sub r5, r5, #32 - add lr, lr, #32 - cmp yl, #1 - shift1 lsl,ip, yh, lr - do_it cs - orrcs ip, ip, #2 @ 2 not 1, to allow lsr #1 later - shiftop adds xl xl yh asr r5 yh - adcs xh, xh, yh, asr #31 -2: - @ We now have a result in xh-xl-ip. - @ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above) - and r5, xh, #0x80000000 - bpl LSYM(Lad_p) -#if defined(__thumb2__) - mov lr, #0 - negs ip, ip - sbcs xl, lr, xl - sbc xh, lr, xh -#else - rsbs ip, ip, #0 - rscs xl, xl, #0 - rsc xh, xh, #0 -#endif - - @ Determine how to normalize the result. -LSYM(Lad_p): - cmp xh, #0x00100000 - bcc LSYM(Lad_a) - cmp xh, #0x00200000 - bcc LSYM(Lad_e) - - @ Result needs to be shifted right. - movs xh, xh, lsr #1 - movs xl, xl, rrx - mov ip, ip, rrx - add r4, r4, #1 - - @ Make sure we did not bust our exponent. - mov r2, r4, lsl #21 - cmn r2, #(2 << 21) - bcs LSYM(Lad_o) - - @ Our result is now properly aligned into xh-xl, remaining bits in ip. - @ Round with MSB of ip. If halfway between two numbers, round towards - @ LSB of xl = 0. - @ Pack final result together. -LSYM(Lad_e): - cmp ip, #0x80000000 - do_it eq - COND(mov,s,eq) ip, xl, lsr #1 - adcs xl, xl, #0 - adc xh, xh, r4, lsl #20 - orr xh, xh, r5 - RETLDM "r4, r5" - - @ Result must be shifted left and exponent adjusted. -LSYM(Lad_a): - movs ip, ip, lsl #1 - adcs xl, xl, xl - adc xh, xh, xh - tst xh, #0x00100000 - sub r4, r4, #1 - bne LSYM(Lad_e) - - @ No rounding necessary since ip will always be 0 at this point. -LSYM(Lad_l): - -#if __ARM_ARCH__ < 5 - - teq xh, #0 - movne r3, #20 - moveq r3, #52 - moveq xh, xl - moveq xl, #0 - mov r2, xh - cmp r2, #(1 << 16) - movhs r2, r2, lsr #16 - subhs r3, r3, #16 - cmp r2, #(1 << 8) - movhs r2, r2, lsr #8 - subhs r3, r3, #8 - cmp r2, #(1 << 4) - movhs r2, r2, lsr #4 - subhs r3, r3, #4 - cmp r2, #(1 << 2) - subhs r3, r3, #2 - sublo r3, r3, r2, lsr #1 - sub r3, r3, r2, lsr #3 - -#else - - teq xh, #0 - do_it eq, t - moveq xh, xl - moveq xl, #0 - clz r3, xh - do_it eq - addeq r3, r3, #32 - sub r3, r3, #11 - -#endif - - @ determine how to shift the value. - subs r2, r3, #32 - bge 2f - adds r2, r2, #12 - ble 1f - - @ shift value left 21 to 31 bits, or actually right 11 to 1 bits - @ since a register switch happened above. - add ip, r2, #20 - rsb r2, r2, #12 - shift1 lsl, xl, xh, ip - shift1 lsr, xh, xh, r2 - b 3f - - @ actually shift value left 1 to 20 bits, which might also represent - @ 32 to 52 bits if counting the register switch that happened earlier. -1: add r2, r2, #20 -2: do_it le - rsble ip, r2, #32 - shift1 lsl, xh, xh, r2 -#if defined(__thumb2__) - lsr ip, xl, ip - itt le - orrle xh, xh, ip - lslle xl, xl, r2 -#else - orrle xh, xh, xl, lsr ip - movle xl, xl, lsl r2 -#endif - - @ adjust exponent accordingly. -3: subs r4, r4, r3 - do_it ge, tt - addge xh, xh, r4, lsl #20 - orrge xh, xh, r5 - RETLDM "r4, r5" ge - - @ Exponent too small, denormalize result. - @ Find out proper shift value. - mvn r4, r4 - subs r4, r4, #31 - bge 2f - adds r4, r4, #12 - bgt 1f - - @ shift result right of 1 to 20 bits, sign is in r5. - add r4, r4, #20 - rsb r2, r4, #32 - shift1 lsr, xl, xl, r4 - shiftop orr xl xl xh lsl r2 yh - shiftop orr xh r5 xh lsr r4 yh - RETLDM "r4, r5" - - @ shift result right of 21 to 31 bits, or left 11 to 1 bits after - @ a register switch from xh to xl. -1: rsb r4, r4, #12 - rsb r2, r4, #32 - shift1 lsr, xl, xl, r2 - shiftop orr xl xl xh lsl r4 yh - mov xh, r5 - RETLDM "r4, r5" - - @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch - @ from xh to xl. -2: shift1 lsr, xl, xh, r4 - mov xh, r5 - RETLDM "r4, r5" - - @ Adjust exponents for denormalized arguments. - @ Note that r4 must not remain equal to 0. -LSYM(Lad_d): - teq r4, #0 - eor yh, yh, #0x00100000 - do_it eq, te - eoreq xh, xh, #0x00100000 - addeq r4, r4, #1 - subne r5, r5, #1 - b LSYM(Lad_x) - - -LSYM(Lad_s): - mvns ip, r4, asr #21 - do_it ne - COND(mvn,s,ne) ip, r5, asr #21 - beq LSYM(Lad_i) - - teq r4, r5 - do_it eq - teqeq xl, yl - beq 1f - - @ Result is x + 0.0 = x or 0.0 + y = y. - orrs ip, r4, xl - do_it eq, t - moveq xh, yh - moveq xl, yl - RETLDM "r4, r5" - -1: teq xh, yh - - @ Result is x - x = 0. - do_it ne, tt - movne xh, #0 - movne xl, #0 - RETLDM "r4, r5" ne - - @ Result is x + x = 2x. - movs ip, r4, lsr #21 - bne 2f - movs xl, xl, lsl #1 - adcs xh, xh, xh - do_it cs - orrcs xh, xh, #0x80000000 - RETLDM "r4, r5" -2: adds r4, r4, #(2 << 21) - do_it cc, t - addcc xh, xh, #(1 << 20) - RETLDM "r4, r5" cc - and r5, xh, #0x80000000 - - @ Overflow: return INF. -LSYM(Lad_o): - orr xh, r5, #0x7f000000 - orr xh, xh, #0x00f00000 - mov xl, #0 - RETLDM "r4, r5" - - @ At least one of x or y is INF/NAN. - @ if xh-xl != INF/NAN: return yh-yl (which is INF/NAN) - @ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN) - @ if either is NAN: return NAN - @ if opposite sign: return NAN - @ otherwise return xh-xl (which is INF or -INF) -LSYM(Lad_i): - mvns ip, r4, asr #21 - do_it ne, te - movne xh, yh - movne xl, yl - COND(mvn,s,eq) ip, r5, asr #21 - do_it ne, t - movne yh, xh - movne yl, xl - orrs r4, xl, xh, lsl #12 - do_it eq, te - COND(orr,s,eq) r5, yl, yh, lsl #12 - teqeq xh, yh - orrne xh, xh, #0x00080000 @ quiet NAN - RETLDM "r4, r5" - - FUNC_END aeabi_dsub - FUNC_END subdf3 - FUNC_END aeabi_dadd - FUNC_END adddf3 - -ARM_FUNC_START floatunsidf -ARM_FUNC_ALIAS aeabi_ui2d floatunsidf - - teq r0, #0 - do_it eq, t - moveq r1, #0 - RETc(eq) - do_push {r4, r5, lr} - mov r4, #0x400 @ initial exponent - add r4, r4, #(52-1 - 1) - mov r5, #0 @ sign bit is 0 - .ifnc xl, r0 - mov xl, r0 - .endif - mov xh, #0 - b LSYM(Lad_l) - - FUNC_END aeabi_ui2d - FUNC_END floatunsidf - -ARM_FUNC_START floatsidf -ARM_FUNC_ALIAS aeabi_i2d floatsidf - - teq r0, #0 - do_it eq, t - moveq r1, #0 - RETc(eq) - do_push {r4, r5, lr} - mov r4, #0x400 @ initial exponent - add r4, r4, #(52-1 - 1) - ands r5, r0, #0x80000000 @ sign bit in r5 - do_it mi - rsbmi r0, r0, #0 @ absolute value - .ifnc xl, r0 - mov xl, r0 - .endif - mov xh, #0 - b LSYM(Lad_l) - - FUNC_END aeabi_i2d - FUNC_END floatsidf - -ARM_FUNC_START extendsfdf2 -ARM_FUNC_ALIAS aeabi_f2d extendsfdf2 - - movs r2, r0, lsl #1 @ toss sign bit - mov xh, r2, asr #3 @ stretch exponent - mov xh, xh, rrx @ retrieve sign bit - mov xl, r2, lsl #28 @ retrieve remaining bits - do_it ne, ttt - COND(and,s,ne) r3, r2, #0xff000000 @ isolate exponent - teqne r3, #0xff000000 @ if not 0, check if INF or NAN - eorne xh, xh, #0x38000000 @ fixup exponent otherwise. - RETc(ne) @ and return it. - - teq r2, #0 @ if actually 0 - do_it ne, e - teqne r3, #0xff000000 @ or INF or NAN - RETc(eq) @ we are done already. - - @ value was denormalized. We can normalize it now. - do_push {r4, r5, lr} - mov r4, #0x380 @ setup corresponding exponent - and r5, xh, #0x80000000 @ move sign bit in r5 - bic xh, xh, #0x80000000 - b LSYM(Lad_l) - - FUNC_END aeabi_f2d - FUNC_END extendsfdf2 - -ARM_FUNC_START floatundidf -ARM_FUNC_ALIAS aeabi_ul2d floatundidf - - orrs r2, r0, r1 -#if !defined (__VFP_FP__) && !defined(__SOFTFP__) - do_it eq, t - mvfeqd f0, #0.0 -#else - do_it eq -#endif - RETc(eq) - -#if !defined (__VFP_FP__) && !defined(__SOFTFP__) - @ For hard FPA code we want to return via the tail below so that - @ we can return the result in f0 as well as in r0/r1 for backwards - @ compatibility. - adr ip, LSYM(f0_ret) - @ Push pc as well so that RETLDM works correctly. - do_push {r4, r5, ip, lr, pc} -#else - do_push {r4, r5, lr} -#endif - - mov r5, #0 - b 2f - -ARM_FUNC_START floatdidf -ARM_FUNC_ALIAS aeabi_l2d floatdidf - - orrs r2, r0, r1 -#if !defined (__VFP_FP__) && !defined(__SOFTFP__) - do_it eq, t - mvfeqd f0, #0.0 -#else - do_it eq -#endif - RETc(eq) - -#if !defined (__VFP_FP__) && !defined(__SOFTFP__) - @ For hard FPA code we want to return via the tail below so that - @ we can return the result in f0 as well as in r0/r1 for backwards - @ compatibility. - adr ip, LSYM(f0_ret) - @ Push pc as well so that RETLDM works correctly. - do_push {r4, r5, ip, lr, pc} -#else - do_push {r4, r5, lr} -#endif - - ands r5, ah, #0x80000000 @ sign bit in r5 - bpl 2f -#if defined(__thumb2__) - negs al, al - sbc ah, ah, ah, lsl #1 -#else - rsbs al, al, #0 - rsc ah, ah, #0 -#endif -2: - mov r4, #0x400 @ initial exponent - add r4, r4, #(52-1 - 1) - - @ FPA little-endian: must swap the word order. - .ifnc xh, ah - mov ip, al - mov xh, ah - mov xl, ip - .endif - - movs ip, xh, lsr #22 - beq LSYM(Lad_p) - - @ The value is too big. Scale it down a bit... - mov r2, #3 - movs ip, ip, lsr #3 - do_it ne - addne r2, r2, #3 - movs ip, ip, lsr #3 - do_it ne - addne r2, r2, #3 - add r2, r2, ip, lsr #3 - - rsb r3, r2, #32 - shift1 lsl, ip, xl, r3 - shift1 lsr, xl, xl, r2 - shiftop orr xl xl xh lsl r3 lr - shift1 lsr, xh, xh, r2 - add r4, r4, r2 - b LSYM(Lad_p) - -#if !defined (__VFP_FP__) && !defined(__SOFTFP__) - - @ Legacy code expects the result to be returned in f0. Copy it - @ there as well. -LSYM(f0_ret): - do_push {r0, r1} - ldfd f0, [sp], #8 - RETLDM - -#endif - - FUNC_END floatdidf - FUNC_END aeabi_l2d - FUNC_END floatundidf - FUNC_END aeabi_ul2d - -#endif /* L_addsubdf3 */ - -#ifdef L_arm_muldivdf3 - -ARM_FUNC_START muldf3 -ARM_FUNC_ALIAS aeabi_dmul muldf3 - do_push {r4, r5, r6, lr} - - @ Mask out exponents, trap any zero/denormal/INF/NAN. - mov ip, #0xff - orr ip, ip, #0x700 - ands r4, ip, xh, lsr #20 - do_it ne, tte - COND(and,s,ne) r5, ip, yh, lsr #20 - teqne r4, ip - teqne r5, ip - bleq LSYM(Lml_s) - - @ Add exponents together - add r4, r4, r5 - - @ Determine final sign. - eor r6, xh, yh - - @ Convert mantissa to unsigned integer. - @ If power of two, branch to a separate path. - bic xh, xh, ip, lsl #21 - bic yh, yh, ip, lsl #21 - orrs r5, xl, xh, lsl #12 - do_it ne - COND(orr,s,ne) r5, yl, yh, lsl #12 - orr xh, xh, #0x00100000 - orr yh, yh, #0x00100000 - beq LSYM(Lml_1) - -#if __ARM_ARCH__ < 4 - - @ Put sign bit in r6, which will be restored in yl later. - and r6, r6, #0x80000000 - - @ Well, no way to make it shorter without the umull instruction. - stmfd sp!, {r6, r7, r8, r9, sl, fp} - mov r7, xl, lsr #16 - mov r8, yl, lsr #16 - mov r9, xh, lsr #16 - mov sl, yh, lsr #16 - bic xl, xl, r7, lsl #16 - bic yl, yl, r8, lsl #16 - bic xh, xh, r9, lsl #16 - bic yh, yh, sl, lsl #16 - mul ip, xl, yl - mul fp, xl, r8 - mov lr, #0 - adds ip, ip, fp, lsl #16 - adc lr, lr, fp, lsr #16 - mul fp, r7, yl - adds ip, ip, fp, lsl #16 - adc lr, lr, fp, lsr #16 - mul fp, xl, sl - mov r5, #0 - adds lr, lr, fp, lsl #16 - adc r5, r5, fp, lsr #16 - mul fp, r7, yh - adds lr, lr, fp, lsl #16 - adc r5, r5, fp, lsr #16 - mul fp, xh, r8 - adds lr, lr, fp, lsl #16 - adc r5, r5, fp, lsr #16 - mul fp, r9, yl - adds lr, lr, fp, lsl #16 - adc r5, r5, fp, lsr #16 - mul fp, xh, sl - mul r6, r9, sl - adds r5, r5, fp, lsl #16 - adc r6, r6, fp, lsr #16 - mul fp, r9, yh - adds r5, r5, fp, lsl #16 - adc r6, r6, fp, lsr #16 - mul fp, xl, yh - adds lr, lr, fp - mul fp, r7, sl - adcs r5, r5, fp - mul fp, xh, yl - adc r6, r6, #0 - adds lr, lr, fp - mul fp, r9, r8 - adcs r5, r5, fp - mul fp, r7, r8 - adc r6, r6, #0 - adds lr, lr, fp - mul fp, xh, yh - adcs r5, r5, fp - adc r6, r6, #0 - ldmfd sp!, {yl, r7, r8, r9, sl, fp} - -#else - - @ Here is the actual multiplication. - umull ip, lr, xl, yl - mov r5, #0 - umlal lr, r5, xh, yl - and yl, r6, #0x80000000 - umlal lr, r5, xl, yh - mov r6, #0 - umlal r5, r6, xh, yh - -#endif - - @ The LSBs in ip are only significant for the final rounding. - @ Fold them into lr. - teq ip, #0 - do_it ne - orrne lr, lr, #1 - - @ Adjust result upon the MSB position. - sub r4, r4, #0xff - cmp r6, #(1 << (20-11)) - sbc r4, r4, #0x300 - bcs 1f - movs lr, lr, lsl #1 - adcs r5, r5, r5 - adc r6, r6, r6 -1: - @ Shift to final position, add sign to result. - orr xh, yl, r6, lsl #11 - orr xh, xh, r5, lsr #21 - mov xl, r5, lsl #11 - orr xl, xl, lr, lsr #21 - mov lr, lr, lsl #11 - - @ Check exponent range for under/overflow. - subs ip, r4, #(254 - 1) - do_it hi - cmphi ip, #0x700 - bhi LSYM(Lml_u) - - @ Round the result, merge final exponent. - cmp lr, #0x80000000 - do_it eq - COND(mov,s,eq) lr, xl, lsr #1 - adcs xl, xl, #0 - adc xh, xh, r4, lsl #20 - RETLDM "r4, r5, r6" - - @ Multiplication by 0x1p*: let''s shortcut a lot of code. -LSYM(Lml_1): - and r6, r6, #0x80000000 - orr xh, r6, xh - orr xl, xl, yl - eor xh, xh, yh - subs r4, r4, ip, lsr #1 - do_it gt, tt - COND(rsb,s,gt) r5, r4, ip - orrgt xh, xh, r4, lsl #20 - RETLDM "r4, r5, r6" gt - - @ Under/overflow: fix things up for the code below. - orr xh, xh, #0x00100000 - mov lr, #0 - subs r4, r4, #1 - -LSYM(Lml_u): - @ Overflow? - bgt LSYM(Lml_o) - - @ Check if denormalized result is possible, otherwise return signed 0. - cmn r4, #(53 + 1) - do_it le, tt - movle xl, #0 - bicle xh, xh, #0x7fffffff - RETLDM "r4, r5, r6" le - - @ Find out proper shift value. - rsb r4, r4, #0 - subs r4, r4, #32 - bge 2f - adds r4, r4, #12 - bgt 1f - - @ shift result right of 1 to 20 bits, preserve sign bit, round, etc. - add r4, r4, #20 - rsb r5, r4, #32 - shift1 lsl, r3, xl, r5 - shift1 lsr, xl, xl, r4 - shiftop orr xl xl xh lsl r5 r2 - and r2, xh, #0x80000000 - bic xh, xh, #0x80000000 - adds xl, xl, r3, lsr #31 - shiftop adc xh r2 xh lsr r4 r6 - orrs lr, lr, r3, lsl #1 - do_it eq - biceq xl, xl, r3, lsr #31 - RETLDM "r4, r5, r6" - - @ shift result right of 21 to 31 bits, or left 11 to 1 bits after - @ a register switch from xh to xl. Then round. -1: rsb r4, r4, #12 - rsb r5, r4, #32 - shift1 lsl, r3, xl, r4 - shift1 lsr, xl, xl, r5 - shiftop orr xl xl xh lsl r4 r2 - bic xh, xh, #0x7fffffff - adds xl, xl, r3, lsr #31 - adc xh, xh, #0 - orrs lr, lr, r3, lsl #1 - do_it eq - biceq xl, xl, r3, lsr #31 - RETLDM "r4, r5, r6" - - @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch - @ from xh to xl. Leftover bits are in r3-r6-lr for rounding. -2: rsb r5, r4, #32 - shiftop orr lr lr xl lsl r5 r2 - shift1 lsr, r3, xl, r4 - shiftop orr r3 r3 xh lsl r5 r2 - shift1 lsr, xl, xh, r4 - bic xh, xh, #0x7fffffff - shiftop bic xl xl xh lsr r4 r2 - add xl, xl, r3, lsr #31 - orrs lr, lr, r3, lsl #1 - do_it eq - biceq xl, xl, r3, lsr #31 - RETLDM "r4, r5, r6" - - @ One or both arguments are denormalized. - @ Scale them leftwards and preserve sign bit. -LSYM(Lml_d): - teq r4, #0 - bne 2f - and r6, xh, #0x80000000 -1: movs xl, xl, lsl #1 - adc xh, xh, xh - tst xh, #0x00100000 - do_it eq - subeq r4, r4, #1 - beq 1b - orr xh, xh, r6 - teq r5, #0 - do_it ne - RETc(ne) -2: and r6, yh, #0x80000000 -3: movs yl, yl, lsl #1 - adc yh, yh, yh - tst yh, #0x00100000 - do_it eq - subeq r5, r5, #1 - beq 3b - orr yh, yh, r6 - RET - -LSYM(Lml_s): - @ Isolate the INF and NAN cases away - teq r4, ip - and r5, ip, yh, lsr #20 - do_it ne - teqne r5, ip - beq 1f - - @ Here, one or more arguments are either denormalized or zero. - orrs r6, xl, xh, lsl #1 - do_it ne - COND(orr,s,ne) r6, yl, yh, lsl #1 - bne LSYM(Lml_d) - - @ Result is 0, but determine sign anyway. -LSYM(Lml_z): - eor xh, xh, yh - and xh, xh, #0x80000000 - mov xl, #0 - RETLDM "r4, r5, r6" - -1: @ One or both args are INF or NAN. - orrs r6, xl, xh, lsl #1 - do_it eq, te - moveq xl, yl - moveq xh, yh - COND(orr,s,ne) r6, yl, yh, lsl #1 - beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN - teq r4, ip - bne 1f - orrs r6, xl, xh, lsl #12 - bne LSYM(Lml_n) @ NAN * -> NAN -1: teq r5, ip - bne LSYM(Lml_i) - orrs r6, yl, yh, lsl #12 - do_it ne, t - movne xl, yl - movne xh, yh - bne LSYM(Lml_n) @ * NAN -> NAN - - @ Result is INF, but we need to determine its sign. -LSYM(Lml_i): - eor xh, xh, yh - - @ Overflow: return INF (sign already in xh). -LSYM(Lml_o): - and xh, xh, #0x80000000 - orr xh, xh, #0x7f000000 - orr xh, xh, #0x00f00000 - mov xl, #0 - RETLDM "r4, r5, r6" - - @ Return a quiet NAN. -LSYM(Lml_n): - orr xh, xh, #0x7f000000 - orr xh, xh, #0x00f80000 - RETLDM "r4, r5, r6" - - FUNC_END aeabi_dmul - FUNC_END muldf3 - -ARM_FUNC_START divdf3 -ARM_FUNC_ALIAS aeabi_ddiv divdf3 - - do_push {r4, r5, r6, lr} - - @ Mask out exponents, trap any zero/denormal/INF/NAN. - mov ip, #0xff - orr ip, ip, #0x700 - ands r4, ip, xh, lsr #20 - do_it ne, tte - COND(and,s,ne) r5, ip, yh, lsr #20 - teqne r4, ip - teqne r5, ip - bleq LSYM(Ldv_s) - - @ Substract divisor exponent from dividend''s. - sub r4, r4, r5 - - @ Preserve final sign into lr. - eor lr, xh, yh - - @ Convert mantissa to unsigned integer. - @ Dividend -> r5-r6, divisor -> yh-yl. - orrs r5, yl, yh, lsl #12 - mov xh, xh, lsl #12 - beq LSYM(Ldv_1) - mov yh, yh, lsl #12 - mov r5, #0x10000000 - orr yh, r5, yh, lsr #4 - orr yh, yh, yl, lsr #24 - mov yl, yl, lsl #8 - orr r5, r5, xh, lsr #4 - orr r5, r5, xl, lsr #24 - mov r6, xl, lsl #8 - - @ Initialize xh with final sign bit. - and xh, lr, #0x80000000 - - @ Ensure result will land to known bit position. - @ Apply exponent bias accordingly. - cmp r5, yh - do_it eq - cmpeq r6, yl - adc r4, r4, #(255 - 2) - add r4, r4, #0x300 - bcs 1f - movs yh, yh, lsr #1 - mov yl, yl, rrx -1: - @ Perform first substraction to align result to a nibble. - subs r6, r6, yl - sbc r5, r5, yh - movs yh, yh, lsr #1 - mov yl, yl, rrx - mov xl, #0x00100000 - mov ip, #0x00080000 - - @ The actual division loop. -1: subs lr, r6, yl - sbcs lr, r5, yh - do_it cs, tt - subcs r6, r6, yl - movcs r5, lr - orrcs xl, xl, ip - movs yh, yh, lsr #1 - mov yl, yl, rrx - subs lr, r6, yl - sbcs lr, r5, yh - do_it cs, tt - subcs r6, r6, yl - movcs r5, lr - orrcs xl, xl, ip, lsr #1 - movs yh, yh, lsr #1 - mov yl, yl, rrx - subs lr, r6, yl - sbcs lr, r5, yh - do_it cs, tt - subcs r6, r6, yl - movcs r5, lr - orrcs xl, xl, ip, lsr #2 - movs yh, yh, lsr #1 - mov yl, yl, rrx - subs lr, r6, yl - sbcs lr, r5, yh - do_it cs, tt - subcs r6, r6, yl - movcs r5, lr - orrcs xl, xl, ip, lsr #3 - - orrs lr, r5, r6 - beq 2f - mov r5, r5, lsl #4 - orr r5, r5, r6, lsr #28 - mov r6, r6, lsl #4 - mov yh, yh, lsl #3 - orr yh, yh, yl, lsr #29 - mov yl, yl, lsl #3 - movs ip, ip, lsr #4 - bne 1b - - @ We are done with a word of the result. - @ Loop again for the low word if this pass was for the high word. - tst xh, #0x00100000 - bne 3f - orr xh, xh, xl - mov xl, #0 - mov ip, #0x80000000 - b 1b -2: - @ Be sure result starts in the high word. - tst xh, #0x00100000 - do_it eq, t - orreq xh, xh, xl - moveq xl, #0 -3: - @ Check exponent range for under/overflow. - subs ip, r4, #(254 - 1) - do_it hi - cmphi ip, #0x700 - bhi LSYM(Lml_u) - - @ Round the result, merge final exponent. - subs ip, r5, yh - do_it eq, t - COND(sub,s,eq) ip, r6, yl - COND(mov,s,eq) ip, xl, lsr #1 - adcs xl, xl, #0 - adc xh, xh, r4, lsl #20 - RETLDM "r4, r5, r6" - - @ Division by 0x1p*: shortcut a lot of code. -LSYM(Ldv_1): - and lr, lr, #0x80000000 - orr xh, lr, xh, lsr #12 - adds r4, r4, ip, lsr #1 - do_it gt, tt - COND(rsb,s,gt) r5, r4, ip - orrgt xh, xh, r4, lsl #20 - RETLDM "r4, r5, r6" gt - - orr xh, xh, #0x00100000 - mov lr, #0 - subs r4, r4, #1 - b LSYM(Lml_u) - - @ Result mightt need to be denormalized: put remainder bits - @ in lr for rounding considerations. -LSYM(Ldv_u): - orr lr, r5, r6 - b LSYM(Lml_u) - - @ One or both arguments is either INF, NAN or zero. -LSYM(Ldv_s): - and r5, ip, yh, lsr #20 - teq r4, ip - do_it eq - teqeq r5, ip - beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN - teq r4, ip - bne 1f - orrs r4, xl, xh, lsl #12 - bne LSYM(Lml_n) @ NAN / -> NAN - teq r5, ip - bne LSYM(Lml_i) @ INF / -> INF - mov xl, yl - mov xh, yh - b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN -1: teq r5, ip - bne 2f - orrs r5, yl, yh, lsl #12 - beq LSYM(Lml_z) @ / INF -> 0 - mov xl, yl - mov xh, yh - b LSYM(Lml_n) @ / NAN -> NAN -2: @ If both are nonzero, we need to normalize and resume above. - orrs r6, xl, xh, lsl #1 - do_it ne - COND(orr,s,ne) r6, yl, yh, lsl #1 - bne LSYM(Lml_d) - @ One or both arguments are 0. - orrs r4, xl, xh, lsl #1 - bne LSYM(Lml_i) @ / 0 -> INF - orrs r5, yl, yh, lsl #1 - bne LSYM(Lml_z) @ 0 / -> 0 - b LSYM(Lml_n) @ 0 / 0 -> NAN - - FUNC_END aeabi_ddiv - FUNC_END divdf3 - -#endif /* L_muldivdf3 */ - -#ifdef L_arm_cmpdf2 - -@ Note: only r0 (return value) and ip are clobbered here. - -ARM_FUNC_START gtdf2 -ARM_FUNC_ALIAS gedf2 gtdf2 - mov ip, #-1 - b 1f - -ARM_FUNC_START ltdf2 -ARM_FUNC_ALIAS ledf2 ltdf2 - mov ip, #1 - b 1f - -ARM_FUNC_START cmpdf2 -ARM_FUNC_ALIAS nedf2 cmpdf2 -ARM_FUNC_ALIAS eqdf2 cmpdf2 - mov ip, #1 @ how should we specify unordered here? - -1: str ip, [sp, #-4]! - - @ Trap any INF/NAN first. - mov ip, xh, lsl #1 - mvns ip, ip, asr #21 - mov ip, yh, lsl #1 - do_it ne - COND(mvn,s,ne) ip, ip, asr #21 - beq 3f - - @ Test for equality. - @ Note that 0.0 is equal to -0.0. -2: add sp, sp, #4 - orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0 - do_it eq, e - COND(orr,s,eq) ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0 - teqne xh, yh @ or xh == yh - do_it eq, tt - teqeq xl, yl @ and xl == yl - moveq r0, #0 @ then equal. - RETc(eq) - - @ Clear C flag - cmn r0, #0 - - @ Compare sign, - teq xh, yh - - @ Compare values if same sign - do_it pl - cmppl xh, yh - do_it eq - cmpeq xl, yl - - @ Result: - do_it cs, e - movcs r0, yh, asr #31 - mvncc r0, yh, asr #31 - orr r0, r0, #1 - RET - - @ Look for a NAN. -3: mov ip, xh, lsl #1 - mvns ip, ip, asr #21 - bne 4f - orrs ip, xl, xh, lsl #12 - bne 5f @ x is NAN -4: mov ip, yh, lsl #1 - mvns ip, ip, asr #21 - bne 2b - orrs ip, yl, yh, lsl #12 - beq 2b @ y is not NAN -5: ldr r0, [sp], #4 @ unordered return code - RET - - FUNC_END gedf2 - FUNC_END gtdf2 - FUNC_END ledf2 - FUNC_END ltdf2 - FUNC_END nedf2 - FUNC_END eqdf2 - FUNC_END cmpdf2 - -ARM_FUNC_START aeabi_cdrcmple - - mov ip, r0 - mov r0, r2 - mov r2, ip - mov ip, r1 - mov r1, r3 - mov r3, ip - b 6f - -ARM_FUNC_START aeabi_cdcmpeq -ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq - - @ The status-returning routines are required to preserve all - @ registers except ip, lr, and cpsr. -6: do_push {r0, lr} - ARM_CALL cmpdf2 - @ Set the Z flag correctly, and the C flag unconditionally. - cmp r0, #0 - @ Clear the C flag if the return value was -1, indicating - @ that the first operand was smaller than the second. - do_it mi - cmnmi r0, #0 - RETLDM "r0" - - FUNC_END aeabi_cdcmple - FUNC_END aeabi_cdcmpeq - FUNC_END aeabi_cdrcmple - -ARM_FUNC_START aeabi_dcmpeq - - str lr, [sp, #-8]! - ARM_CALL aeabi_cdcmple - do_it eq, e - moveq r0, #1 @ Equal to. - movne r0, #0 @ Less than, greater than, or unordered. - RETLDM - - FUNC_END aeabi_dcmpeq - -ARM_FUNC_START aeabi_dcmplt - - str lr, [sp, #-8]! - ARM_CALL aeabi_cdcmple - do_it cc, e - movcc r0, #1 @ Less than. - movcs r0, #0 @ Equal to, greater than, or unordered. - RETLDM - - FUNC_END aeabi_dcmplt - -ARM_FUNC_START aeabi_dcmple - - str lr, [sp, #-8]! - ARM_CALL aeabi_cdcmple - do_it ls, e - movls r0, #1 @ Less than or equal to. - movhi r0, #0 @ Greater than or unordered. - RETLDM - - FUNC_END aeabi_dcmple - -ARM_FUNC_START aeabi_dcmpge - - str lr, [sp, #-8]! - ARM_CALL aeabi_cdrcmple - do_it ls, e - movls r0, #1 @ Operand 2 is less than or equal to operand 1. - movhi r0, #0 @ Operand 2 greater than operand 1, or unordered. - RETLDM - - FUNC_END aeabi_dcmpge - -ARM_FUNC_START aeabi_dcmpgt - - str lr, [sp, #-8]! - ARM_CALL aeabi_cdrcmple - do_it cc, e - movcc r0, #1 @ Operand 2 is less than operand 1. - movcs r0, #0 @ Operand 2 is greater than or equal to operand 1, - @ or they are unordered. - RETLDM - - FUNC_END aeabi_dcmpgt - -#endif /* L_cmpdf2 */ - -#ifdef L_arm_unorddf2 - -ARM_FUNC_START unorddf2 -ARM_FUNC_ALIAS aeabi_dcmpun unorddf2 - - mov ip, xh, lsl #1 - mvns ip, ip, asr #21 - bne 1f - orrs ip, xl, xh, lsl #12 - bne 3f @ x is NAN -1: mov ip, yh, lsl #1 - mvns ip, ip, asr #21 - bne 2f - orrs ip, yl, yh, lsl #12 - bne 3f @ y is NAN -2: mov r0, #0 @ arguments are ordered. - RET - -3: mov r0, #1 @ arguments are unordered. - RET - - FUNC_END aeabi_dcmpun - FUNC_END unorddf2 - -#endif /* L_unorddf2 */ - -#ifdef L_arm_fixdfsi - -ARM_FUNC_START fixdfsi -ARM_FUNC_ALIAS aeabi_d2iz fixdfsi - - @ check exponent range. - mov r2, xh, lsl #1 - adds r2, r2, #(1 << 21) - bcs 2f @ value is INF or NAN - bpl 1f @ value is too small - mov r3, #(0xfffffc00 + 31) - subs r2, r3, r2, asr #21 - bls 3f @ value is too large - - @ scale value - mov r3, xh, lsl #11 - orr r3, r3, #0x80000000 - orr r3, r3, xl, lsr #21 - tst xh, #0x80000000 @ the sign bit - shift1 lsr, r0, r3, r2 - do_it ne - rsbne r0, r0, #0 - RET - -1: mov r0, #0 - RET - -2: orrs xl, xl, xh, lsl #12 - bne 4f @ x is NAN. -3: ands r0, xh, #0x80000000 @ the sign bit - do_it eq - moveq r0, #0x7fffffff @ maximum signed positive si - RET - -4: mov r0, #0 @ How should we convert NAN? - RET - - FUNC_END aeabi_d2iz - FUNC_END fixdfsi - -#endif /* L_fixdfsi */ - -#ifdef L_arm_fixunsdfsi - -ARM_FUNC_START fixunsdfsi -ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi - - @ check exponent range. - movs r2, xh, lsl #1 - bcs 1f @ value is negative - adds r2, r2, #(1 << 21) - bcs 2f @ value is INF or NAN - bpl 1f @ value is too small - mov r3, #(0xfffffc00 + 31) - subs r2, r3, r2, asr #21 - bmi 3f @ value is too large - - @ scale value - mov r3, xh, lsl #11 - orr r3, r3, #0x80000000 - orr r3, r3, xl, lsr #21 - shift1 lsr, r0, r3, r2 - RET - -1: mov r0, #0 - RET - -2: orrs xl, xl, xh, lsl #12 - bne 4f @ value is NAN. -3: mov r0, #0xffffffff @ maximum unsigned si - RET - -4: mov r0, #0 @ How should we convert NAN? - RET - - FUNC_END aeabi_d2uiz - FUNC_END fixunsdfsi - -#endif /* L_fixunsdfsi */ - -#ifdef L_arm_truncdfsf2 - -ARM_FUNC_START truncdfsf2 -ARM_FUNC_ALIAS aeabi_d2f truncdfsf2 - - @ check exponent range. - mov r2, xh, lsl #1 - subs r3, r2, #((1023 - 127) << 21) - do_it cs, t - COND(sub,s,cs) ip, r3, #(1 << 21) - COND(rsb,s,cs) ip, ip, #(254 << 21) - bls 2f @ value is out of range - -1: @ shift and round mantissa - and ip, xh, #0x80000000 - mov r2, xl, lsl #3 - orr xl, ip, xl, lsr #29 - cmp r2, #0x80000000 - adc r0, xl, r3, lsl #2 - do_it eq - biceq r0, r0, #1 - RET - -2: @ either overflow or underflow - tst xh, #0x40000000 - bne 3f @ overflow - - @ check if denormalized value is possible - adds r2, r3, #(23 << 21) - do_it lt, t - andlt r0, xh, #0x80000000 @ too small, return signed 0. - RETc(lt) - - @ denormalize value so we can resume with the code above afterwards. - orr xh, xh, #0x00100000 - mov r2, r2, lsr #21 - rsb r2, r2, #24 - rsb ip, r2, #32 -#if defined(__thumb2__) - lsls r3, xl, ip -#else - movs r3, xl, lsl ip -#endif - shift1 lsr, xl, xl, r2 - do_it ne - orrne xl, xl, #1 @ fold r3 for rounding considerations. - mov r3, xh, lsl #11 - mov r3, r3, lsr #11 - shiftop orr xl xl r3 lsl ip ip - shift1 lsr, r3, r3, r2 - mov r3, r3, lsl #1 - b 1b - -3: @ chech for NAN - mvns r3, r2, asr #21 - bne 5f @ simple overflow - orrs r3, xl, xh, lsl #12 - do_it ne, tt - movne r0, #0x7f000000 - orrne r0, r0, #0x00c00000 - RETc(ne) @ return NAN - -5: @ return INF with sign - and r0, xh, #0x80000000 - orr r0, r0, #0x7f000000 - orr r0, r0, #0x00800000 - RET - - FUNC_END aeabi_d2f - FUNC_END truncdfsf2 - -#endif /* L_truncdfsf2 */ diff --git a/gcc/config/arm/ieee754-sf.S b/gcc/config/arm/ieee754-sf.S deleted file mode 100644 index c93f66d8ff8..00000000000 --- a/gcc/config/arm/ieee754-sf.S +++ /dev/null @@ -1,1060 +0,0 @@ -/* ieee754-sf.S single-precision floating point support for ARM - - Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc. - Contributed by Nicolas Pitre (nico@cam.org) - - This file is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 3, or (at your option) any - later version. - - This file is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -/* - * Notes: - * - * The goal of this code is to be as fast as possible. This is - * not meant to be easy to understand for the casual reader. - * - * Only the default rounding mode is intended for best performances. - * Exceptions aren't supported yet, but that can be added quite easily - * if necessary without impacting performances. - */ - -#ifdef L_arm_negsf2 - -ARM_FUNC_START negsf2 -ARM_FUNC_ALIAS aeabi_fneg negsf2 - - eor r0, r0, #0x80000000 @ flip sign bit - RET - - FUNC_END aeabi_fneg - FUNC_END negsf2 - -#endif - -#ifdef L_arm_addsubsf3 - -ARM_FUNC_START aeabi_frsub - - eor r0, r0, #0x80000000 @ flip sign bit of first arg - b 1f - -ARM_FUNC_START subsf3 -ARM_FUNC_ALIAS aeabi_fsub subsf3 - - eor r1, r1, #0x80000000 @ flip sign bit of second arg -#if defined(__INTERWORKING_STUBS__) - b 1f @ Skip Thumb-code prologue -#endif - -ARM_FUNC_START addsf3 -ARM_FUNC_ALIAS aeabi_fadd addsf3 - -1: @ Look for zeroes, equal values, INF, or NAN. - movs r2, r0, lsl #1 - do_it ne, ttt - COND(mov,s,ne) r3, r1, lsl #1 - teqne r2, r3 - COND(mvn,s,ne) ip, r2, asr #24 - COND(mvn,s,ne) ip, r3, asr #24 - beq LSYM(Lad_s) - - @ Compute exponent difference. Make largest exponent in r2, - @ corresponding arg in r0, and positive exponent difference in r3. - mov r2, r2, lsr #24 - rsbs r3, r2, r3, lsr #24 - do_it gt, ttt - addgt r2, r2, r3 - eorgt r1, r0, r1 - eorgt r0, r1, r0 - eorgt r1, r0, r1 - do_it lt - rsblt r3, r3, #0 - - @ If exponent difference is too large, return largest argument - @ already in r0. We need up to 25 bit to handle proper rounding - @ of 0x1p25 - 1.1. - cmp r3, #25 - do_it hi - RETc(hi) - - @ Convert mantissa to signed integer. - tst r0, #0x80000000 - orr r0, r0, #0x00800000 - bic r0, r0, #0xff000000 - do_it ne - rsbne r0, r0, #0 - tst r1, #0x80000000 - orr r1, r1, #0x00800000 - bic r1, r1, #0xff000000 - do_it ne - rsbne r1, r1, #0 - - @ If exponent == difference, one or both args were denormalized. - @ Since this is not common case, rescale them off line. - teq r2, r3 - beq LSYM(Lad_d) -LSYM(Lad_x): - - @ Compensate for the exponent overlapping the mantissa MSB added later - sub r2, r2, #1 - - @ Shift and add second arg to first arg in r0. - @ Keep leftover bits into r1. - shiftop adds r0 r0 r1 asr r3 ip - rsb r3, r3, #32 - shift1 lsl, r1, r1, r3 - - @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above) - and r3, r0, #0x80000000 - bpl LSYM(Lad_p) -#if defined(__thumb2__) - negs r1, r1 - sbc r0, r0, r0, lsl #1 -#else - rsbs r1, r1, #0 - rsc r0, r0, #0 -#endif - - @ Determine how to normalize the result. -LSYM(Lad_p): - cmp r0, #0x00800000 - bcc LSYM(Lad_a) - cmp r0, #0x01000000 - bcc LSYM(Lad_e) - - @ Result needs to be shifted right. - movs r0, r0, lsr #1 - mov r1, r1, rrx - add r2, r2, #1 - - @ Make sure we did not bust our exponent. - cmp r2, #254 - bhs LSYM(Lad_o) - - @ Our result is now properly aligned into r0, remaining bits in r1. - @ Pack final result together. - @ Round with MSB of r1. If halfway between two numbers, round towards - @ LSB of r0 = 0. -LSYM(Lad_e): - cmp r1, #0x80000000 - adc r0, r0, r2, lsl #23 - do_it eq - biceq r0, r0, #1 - orr r0, r0, r3 - RET - - @ Result must be shifted left and exponent adjusted. -LSYM(Lad_a): - movs r1, r1, lsl #1 - adc r0, r0, r0 - tst r0, #0x00800000 - sub r2, r2, #1 - bne LSYM(Lad_e) - - @ No rounding necessary since r1 will always be 0 at this point. -LSYM(Lad_l): - -#if __ARM_ARCH__ < 5 - - movs ip, r0, lsr #12 - moveq r0, r0, lsl #12 - subeq r2, r2, #12 - tst r0, #0x00ff0000 - moveq r0, r0, lsl #8 - subeq r2, r2, #8 - tst r0, #0x00f00000 - moveq r0, r0, lsl #4 - subeq r2, r2, #4 - tst r0, #0x00c00000 - moveq r0, r0, lsl #2 - subeq r2, r2, #2 - cmp r0, #0x00800000 - movcc r0, r0, lsl #1 - sbcs r2, r2, #0 - -#else - - clz ip, r0 - sub ip, ip, #8 - subs r2, r2, ip - shift1 lsl, r0, r0, ip - -#endif - - @ Final result with sign - @ If exponent negative, denormalize result. - do_it ge, et - addge r0, r0, r2, lsl #23 - rsblt r2, r2, #0 - orrge r0, r0, r3 -#if defined(__thumb2__) - do_it lt, t - lsrlt r0, r0, r2 - orrlt r0, r3, r0 -#else - orrlt r0, r3, r0, lsr r2 -#endif - RET - - @ Fixup and adjust bit position for denormalized arguments. - @ Note that r2 must not remain equal to 0. -LSYM(Lad_d): - teq r2, #0 - eor r1, r1, #0x00800000 - do_it eq, te - eoreq r0, r0, #0x00800000 - addeq r2, r2, #1 - subne r3, r3, #1 - b LSYM(Lad_x) - -LSYM(Lad_s): - mov r3, r1, lsl #1 - - mvns ip, r2, asr #24 - do_it ne - COND(mvn,s,ne) ip, r3, asr #24 - beq LSYM(Lad_i) - - teq r2, r3 - beq 1f - - @ Result is x + 0.0 = x or 0.0 + y = y. - teq r2, #0 - do_it eq - moveq r0, r1 - RET - -1: teq r0, r1 - - @ Result is x - x = 0. - do_it ne, t - movne r0, #0 - RETc(ne) - - @ Result is x + x = 2x. - tst r2, #0xff000000 - bne 2f - movs r0, r0, lsl #1 - do_it cs - orrcs r0, r0, #0x80000000 - RET -2: adds r2, r2, #(2 << 24) - do_it cc, t - addcc r0, r0, #(1 << 23) - RETc(cc) - and r3, r0, #0x80000000 - - @ Overflow: return INF. -LSYM(Lad_o): - orr r0, r3, #0x7f000000 - orr r0, r0, #0x00800000 - RET - - @ At least one of r0/r1 is INF/NAN. - @ if r0 != INF/NAN: return r1 (which is INF/NAN) - @ if r1 != INF/NAN: return r0 (which is INF/NAN) - @ if r0 or r1 is NAN: return NAN - @ if opposite sign: return NAN - @ otherwise return r0 (which is INF or -INF) -LSYM(Lad_i): - mvns r2, r2, asr #24 - do_it ne, et - movne r0, r1 - COND(mvn,s,eq) r3, r3, asr #24 - movne r1, r0 - movs r2, r0, lsl #9 - do_it eq, te - COND(mov,s,eq) r3, r1, lsl #9 - teqeq r0, r1 - orrne r0, r0, #0x00400000 @ quiet NAN - RET - - FUNC_END aeabi_frsub - FUNC_END aeabi_fadd - FUNC_END addsf3 - FUNC_END aeabi_fsub - FUNC_END subsf3 - -ARM_FUNC_START floatunsisf -ARM_FUNC_ALIAS aeabi_ui2f floatunsisf - - mov r3, #0 - b 1f - -ARM_FUNC_START floatsisf -ARM_FUNC_ALIAS aeabi_i2f floatsisf - - ands r3, r0, #0x80000000 - do_it mi - rsbmi r0, r0, #0 - -1: movs ip, r0 - do_it eq - RETc(eq) - - @ Add initial exponent to sign - orr r3, r3, #((127 + 23) << 23) - - .ifnc ah, r0 - mov ah, r0 - .endif - mov al, #0 - b 2f - - FUNC_END aeabi_i2f - FUNC_END floatsisf - FUNC_END aeabi_ui2f - FUNC_END floatunsisf - -ARM_FUNC_START floatundisf -ARM_FUNC_ALIAS aeabi_ul2f floatundisf - - orrs r2, r0, r1 -#if !defined (__VFP_FP__) && !defined(__SOFTFP__) - do_it eq, t - mvfeqs f0, #0.0 -#else - do_it eq -#endif - RETc(eq) - - mov r3, #0 - b 1f - -ARM_FUNC_START floatdisf -ARM_FUNC_ALIAS aeabi_l2f floatdisf - - orrs r2, r0, r1 -#if !defined (__VFP_FP__) && !defined(__SOFTFP__) - do_it eq, t - mvfeqs f0, #0.0 -#else - do_it eq -#endif - RETc(eq) - - ands r3, ah, #0x80000000 @ sign bit in r3 - bpl 1f -#if defined(__thumb2__) - negs al, al - sbc ah, ah, ah, lsl #1 -#else - rsbs al, al, #0 - rsc ah, ah, #0 -#endif -1: -#if !defined (__VFP_FP__) && !defined(__SOFTFP__) - @ For hard FPA code we want to return via the tail below so that - @ we can return the result in f0 as well as in r0 for backwards - @ compatibility. - str lr, [sp, #-8]! - adr lr, LSYM(f0_ret) -#endif - - movs ip, ah - do_it eq, tt - moveq ip, al - moveq ah, al - moveq al, #0 - - @ Add initial exponent to sign - orr r3, r3, #((127 + 23 + 32) << 23) - do_it eq - subeq r3, r3, #(32 << 23) -2: sub r3, r3, #(1 << 23) - -#if __ARM_ARCH__ < 5 - - mov r2, #23 - cmp ip, #(1 << 16) - do_it hs, t - movhs ip, ip, lsr #16 - subhs r2, r2, #16 - cmp ip, #(1 << 8) - do_it hs, t - movhs ip, ip, lsr #8 - subhs r2, r2, #8 - cmp ip, #(1 << 4) - do_it hs, t - movhs ip, ip, lsr #4 - subhs r2, r2, #4 - cmp ip, #(1 << 2) - do_it hs, e - subhs r2, r2, #2 - sublo r2, r2, ip, lsr #1 - subs r2, r2, ip, lsr #3 - -#else - - clz r2, ip - subs r2, r2, #8 - -#endif - - sub r3, r3, r2, lsl #23 - blt 3f - - shiftop add r3 r3 ah lsl r2 ip - shift1 lsl, ip, al, r2 - rsb r2, r2, #32 - cmp ip, #0x80000000 - shiftop adc r0 r3 al lsr r2 r2 - do_it eq - biceq r0, r0, #1 - RET - -3: add r2, r2, #32 - shift1 lsl, ip, ah, r2 - rsb r2, r2, #32 - orrs al, al, ip, lsl #1 - shiftop adc r0 r3 ah lsr r2 r2 - do_it eq - biceq r0, r0, ip, lsr #31 - RET - -#if !defined (__VFP_FP__) && !defined(__SOFTFP__) - -LSYM(f0_ret): - str r0, [sp, #-4]! - ldfs f0, [sp], #4 - RETLDM - -#endif - - FUNC_END floatdisf - FUNC_END aeabi_l2f - FUNC_END floatundisf - FUNC_END aeabi_ul2f - -#endif /* L_addsubsf3 */ - -#ifdef L_arm_muldivsf3 - -ARM_FUNC_START mulsf3 -ARM_FUNC_ALIAS aeabi_fmul mulsf3 - - @ Mask out exponents, trap any zero/denormal/INF/NAN. - mov ip, #0xff - ands r2, ip, r0, lsr #23 - do_it ne, tt - COND(and,s,ne) r3, ip, r1, lsr #23 - teqne r2, ip - teqne r3, ip - beq LSYM(Lml_s) -LSYM(Lml_x): - - @ Add exponents together - add r2, r2, r3 - - @ Determine final sign. - eor ip, r0, r1 - - @ Convert mantissa to unsigned integer. - @ If power of two, branch to a separate path. - @ Make up for final alignment. - movs r0, r0, lsl #9 - do_it ne - COND(mov,s,ne) r1, r1, lsl #9 - beq LSYM(Lml_1) - mov r3, #0x08000000 - orr r0, r3, r0, lsr #5 - orr r1, r3, r1, lsr #5 - -#if __ARM_ARCH__ < 4 - - @ Put sign bit in r3, which will be restored into r0 later. - and r3, ip, #0x80000000 - - @ Well, no way to make it shorter without the umull instruction. - do_push {r3, r4, r5} - mov r4, r0, lsr #16 - mov r5, r1, lsr #16 - bic r0, r0, r4, lsl #16 - bic r1, r1, r5, lsl #16 - mul ip, r4, r5 - mul r3, r0, r1 - mul r0, r5, r0 - mla r0, r4, r1, r0 - adds r3, r3, r0, lsl #16 - adc r1, ip, r0, lsr #16 - do_pop {r0, r4, r5} - -#else - - @ The actual multiplication. - umull r3, r1, r0, r1 - - @ Put final sign in r0. - and r0, ip, #0x80000000 - -#endif - - @ Adjust result upon the MSB position. - cmp r1, #(1 << 23) - do_it cc, tt - movcc r1, r1, lsl #1 - orrcc r1, r1, r3, lsr #31 - movcc r3, r3, lsl #1 - - @ Add sign to result. - orr r0, r0, r1 - - @ Apply exponent bias, check for under/overflow. - sbc r2, r2, #127 - cmp r2, #(254 - 1) - bhi LSYM(Lml_u) - - @ Round the result, merge final exponent. - cmp r3, #0x80000000 - adc r0, r0, r2, lsl #23 - do_it eq - biceq r0, r0, #1 - RET - - @ Multiplication by 0x1p*: let''s shortcut a lot of code. -LSYM(Lml_1): - teq r0, #0 - and ip, ip, #0x80000000 - do_it eq - moveq r1, r1, lsl #9 - orr r0, ip, r0, lsr #9 - orr r0, r0, r1, lsr #9 - subs r2, r2, #127 - do_it gt, tt - COND(rsb,s,gt) r3, r2, #255 - orrgt r0, r0, r2, lsl #23 - RETc(gt) - - @ Under/overflow: fix things up for the code below. - orr r0, r0, #0x00800000 - mov r3, #0 - subs r2, r2, #1 - -LSYM(Lml_u): - @ Overflow? - bgt LSYM(Lml_o) - - @ Check if denormalized result is possible, otherwise return signed 0. - cmn r2, #(24 + 1) - do_it le, t - bicle r0, r0, #0x7fffffff - RETc(le) - - @ Shift value right, round, etc. - rsb r2, r2, #0 - movs r1, r0, lsl #1 - shift1 lsr, r1, r1, r2 - rsb r2, r2, #32 - shift1 lsl, ip, r0, r2 - movs r0, r1, rrx - adc r0, r0, #0 - orrs r3, r3, ip, lsl #1 - do_it eq - biceq r0, r0, ip, lsr #31 - RET - - @ One or both arguments are denormalized. - @ Scale them leftwards and preserve sign bit. -LSYM(Lml_d): - teq r2, #0 - and ip, r0, #0x80000000 -1: do_it eq, tt - moveq r0, r0, lsl #1 - tsteq r0, #0x00800000 - subeq r2, r2, #1 - beq 1b - orr r0, r0, ip - teq r3, #0 - and ip, r1, #0x80000000 -2: do_it eq, tt - moveq r1, r1, lsl #1 - tsteq r1, #0x00800000 - subeq r3, r3, #1 - beq 2b - orr r1, r1, ip - b LSYM(Lml_x) - -LSYM(Lml_s): - @ Isolate the INF and NAN cases away - and r3, ip, r1, lsr #23 - teq r2, ip - do_it ne - teqne r3, ip - beq 1f - - @ Here, one or more arguments are either denormalized or zero. - bics ip, r0, #0x80000000 - do_it ne - COND(bic,s,ne) ip, r1, #0x80000000 - bne LSYM(Lml_d) - - @ Result is 0, but determine sign anyway. -LSYM(Lml_z): - eor r0, r0, r1 - bic r0, r0, #0x7fffffff - RET - -1: @ One or both args are INF or NAN. - teq r0, #0x0 - do_it ne, ett - teqne r0, #0x80000000 - moveq r0, r1 - teqne r1, #0x0 - teqne r1, #0x80000000 - beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN - teq r2, ip - bne 1f - movs r2, r0, lsl #9 - bne LSYM(Lml_n) @ NAN * -> NAN -1: teq r3, ip - bne LSYM(Lml_i) - movs r3, r1, lsl #9 - do_it ne - movne r0, r1 - bne LSYM(Lml_n) @ * NAN -> NAN - - @ Result is INF, but we need to determine its sign. -LSYM(Lml_i): - eor r0, r0, r1 - - @ Overflow: return INF (sign already in r0). -LSYM(Lml_o): - and r0, r0, #0x80000000 - orr r0, r0, #0x7f000000 - orr r0, r0, #0x00800000 - RET - - @ Return a quiet NAN. -LSYM(Lml_n): - orr r0, r0, #0x7f000000 - orr r0, r0, #0x00c00000 - RET - - FUNC_END aeabi_fmul - FUNC_END mulsf3 - -ARM_FUNC_START divsf3 -ARM_FUNC_ALIAS aeabi_fdiv divsf3 - - @ Mask out exponents, trap any zero/denormal/INF/NAN. - mov ip, #0xff - ands r2, ip, r0, lsr #23 - do_it ne, tt - COND(and,s,ne) r3, ip, r1, lsr #23 - teqne r2, ip - teqne r3, ip - beq LSYM(Ldv_s) -LSYM(Ldv_x): - - @ Substract divisor exponent from dividend''s - sub r2, r2, r3 - - @ Preserve final sign into ip. - eor ip, r0, r1 - - @ Convert mantissa to unsigned integer. - @ Dividend -> r3, divisor -> r1. - movs r1, r1, lsl #9 - mov r0, r0, lsl #9 - beq LSYM(Ldv_1) - mov r3, #0x10000000 - orr r1, r3, r1, lsr #4 - orr r3, r3, r0, lsr #4 - - @ Initialize r0 (result) with final sign bit. - and r0, ip, #0x80000000 - - @ Ensure result will land to known bit position. - @ Apply exponent bias accordingly. - cmp r3, r1 - do_it cc - movcc r3, r3, lsl #1 - adc r2, r2, #(127 - 2) - - @ The actual division loop. - mov ip, #0x00800000 -1: cmp r3, r1 - do_it cs, t - subcs r3, r3, r1 - orrcs r0, r0, ip - cmp r3, r1, lsr #1 - do_it cs, t - subcs r3, r3, r1, lsr #1 - orrcs r0, r0, ip, lsr #1 - cmp r3, r1, lsr #2 - do_it cs, t - subcs r3, r3, r1, lsr #2 - orrcs r0, r0, ip, lsr #2 - cmp r3, r1, lsr #3 - do_it cs, t - subcs r3, r3, r1, lsr #3 - orrcs r0, r0, ip, lsr #3 - movs r3, r3, lsl #4 - do_it ne - COND(mov,s,ne) ip, ip, lsr #4 - bne 1b - - @ Check exponent for under/overflow. - cmp r2, #(254 - 1) - bhi LSYM(Lml_u) - - @ Round the result, merge final exponent. - cmp r3, r1 - adc r0, r0, r2, lsl #23 - do_it eq - biceq r0, r0, #1 - RET - - @ Division by 0x1p*: let''s shortcut a lot of code. -LSYM(Ldv_1): - and ip, ip, #0x80000000 - orr r0, ip, r0, lsr #9 - adds r2, r2, #127 - do_it gt, tt - COND(rsb,s,gt) r3, r2, #255 - orrgt r0, r0, r2, lsl #23 - RETc(gt) - - orr r0, r0, #0x00800000 - mov r3, #0 - subs r2, r2, #1 - b LSYM(Lml_u) - - @ One or both arguments are denormalized. - @ Scale them leftwards and preserve sign bit. -LSYM(Ldv_d): - teq r2, #0 - and ip, r0, #0x80000000 -1: do_it eq, tt - moveq r0, r0, lsl #1 - tsteq r0, #0x00800000 - subeq r2, r2, #1 - beq 1b - orr r0, r0, ip - teq r3, #0 - and ip, r1, #0x80000000 -2: do_it eq, tt - moveq r1, r1, lsl #1 - tsteq r1, #0x00800000 - subeq r3, r3, #1 - beq 2b - orr r1, r1, ip - b LSYM(Ldv_x) - - @ One or both arguments are either INF, NAN, zero or denormalized. -LSYM(Ldv_s): - and r3, ip, r1, lsr #23 - teq r2, ip - bne 1f - movs r2, r0, lsl #9 - bne LSYM(Lml_n) @ NAN / -> NAN - teq r3, ip - bne LSYM(Lml_i) @ INF / -> INF - mov r0, r1 - b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN -1: teq r3, ip - bne 2f - movs r3, r1, lsl #9 - beq LSYM(Lml_z) @ / INF -> 0 - mov r0, r1 - b LSYM(Lml_n) @ / NAN -> NAN -2: @ If both are nonzero, we need to normalize and resume above. - bics ip, r0, #0x80000000 - do_it ne - COND(bic,s,ne) ip, r1, #0x80000000 - bne LSYM(Ldv_d) - @ One or both arguments are zero. - bics r2, r0, #0x80000000 - bne LSYM(Lml_i) @ / 0 -> INF - bics r3, r1, #0x80000000 - bne LSYM(Lml_z) @ 0 / -> 0 - b LSYM(Lml_n) @ 0 / 0 -> NAN - - FUNC_END aeabi_fdiv - FUNC_END divsf3 - -#endif /* L_muldivsf3 */ - -#ifdef L_arm_cmpsf2 - - @ The return value in r0 is - @ - @ 0 if the operands are equal - @ 1 if the first operand is greater than the second, or - @ the operands are unordered and the operation is - @ CMP, LT, LE, NE, or EQ. - @ -1 if the first operand is less than the second, or - @ the operands are unordered and the operation is GT - @ or GE. - @ - @ The Z flag will be set iff the operands are equal. - @ - @ The following registers are clobbered by this function: - @ ip, r0, r1, r2, r3 - -ARM_FUNC_START gtsf2 -ARM_FUNC_ALIAS gesf2 gtsf2 - mov ip, #-1 - b 1f - -ARM_FUNC_START ltsf2 -ARM_FUNC_ALIAS lesf2 ltsf2 - mov ip, #1 - b 1f - -ARM_FUNC_START cmpsf2 -ARM_FUNC_ALIAS nesf2 cmpsf2 -ARM_FUNC_ALIAS eqsf2 cmpsf2 - mov ip, #1 @ how should we specify unordered here? - -1: str ip, [sp, #-4]! - - @ Trap any INF/NAN first. - mov r2, r0, lsl #1 - mov r3, r1, lsl #1 - mvns ip, r2, asr #24 - do_it ne - COND(mvn,s,ne) ip, r3, asr #24 - beq 3f - - @ Compare values. - @ Note that 0.0 is equal to -0.0. -2: add sp, sp, #4 - orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag - do_it ne - teqne r0, r1 @ if not 0 compare sign - do_it pl - COND(sub,s,pl) r0, r2, r3 @ if same sign compare values, set r0 - - @ Result: - do_it hi - movhi r0, r1, asr #31 - do_it lo - mvnlo r0, r1, asr #31 - do_it ne - orrne r0, r0, #1 - RET - - @ Look for a NAN. -3: mvns ip, r2, asr #24 - bne 4f - movs ip, r0, lsl #9 - bne 5f @ r0 is NAN -4: mvns ip, r3, asr #24 - bne 2b - movs ip, r1, lsl #9 - beq 2b @ r1 is not NAN -5: ldr r0, [sp], #4 @ return unordered code. - RET - - FUNC_END gesf2 - FUNC_END gtsf2 - FUNC_END lesf2 - FUNC_END ltsf2 - FUNC_END nesf2 - FUNC_END eqsf2 - FUNC_END cmpsf2 - -ARM_FUNC_START aeabi_cfrcmple - - mov ip, r0 - mov r0, r1 - mov r1, ip - b 6f - -ARM_FUNC_START aeabi_cfcmpeq -ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq - - @ The status-returning routines are required to preserve all - @ registers except ip, lr, and cpsr. -6: do_push {r0, r1, r2, r3, lr} - ARM_CALL cmpsf2 - @ Set the Z flag correctly, and the C flag unconditionally. - cmp r0, #0 - @ Clear the C flag if the return value was -1, indicating - @ that the first operand was smaller than the second. - do_it mi - cmnmi r0, #0 - RETLDM "r0, r1, r2, r3" - - FUNC_END aeabi_cfcmple - FUNC_END aeabi_cfcmpeq - FUNC_END aeabi_cfrcmple - -ARM_FUNC_START aeabi_fcmpeq - - str lr, [sp, #-8]! - ARM_CALL aeabi_cfcmple - do_it eq, e - moveq r0, #1 @ Equal to. - movne r0, #0 @ Less than, greater than, or unordered. - RETLDM - - FUNC_END aeabi_fcmpeq - -ARM_FUNC_START aeabi_fcmplt - - str lr, [sp, #-8]! - ARM_CALL aeabi_cfcmple - do_it cc, e - movcc r0, #1 @ Less than. - movcs r0, #0 @ Equal to, greater than, or unordered. - RETLDM - - FUNC_END aeabi_fcmplt - -ARM_FUNC_START aeabi_fcmple - - str lr, [sp, #-8]! - ARM_CALL aeabi_cfcmple - do_it ls, e - movls r0, #1 @ Less than or equal to. - movhi r0, #0 @ Greater than or unordered. - RETLDM - - FUNC_END aeabi_fcmple - -ARM_FUNC_START aeabi_fcmpge - - str lr, [sp, #-8]! - ARM_CALL aeabi_cfrcmple - do_it ls, e - movls r0, #1 @ Operand 2 is less than or equal to operand 1. - movhi r0, #0 @ Operand 2 greater than operand 1, or unordered. - RETLDM - - FUNC_END aeabi_fcmpge - -ARM_FUNC_START aeabi_fcmpgt - - str lr, [sp, #-8]! - ARM_CALL aeabi_cfrcmple - do_it cc, e - movcc r0, #1 @ Operand 2 is less than operand 1. - movcs r0, #0 @ Operand 2 is greater than or equal to operand 1, - @ or they are unordered. - RETLDM - - FUNC_END aeabi_fcmpgt - -#endif /* L_cmpsf2 */ - -#ifdef L_arm_unordsf2 - -ARM_FUNC_START unordsf2 -ARM_FUNC_ALIAS aeabi_fcmpun unordsf2 - - mov r2, r0, lsl #1 - mov r3, r1, lsl #1 - mvns ip, r2, asr #24 - bne 1f - movs ip, r0, lsl #9 - bne 3f @ r0 is NAN -1: mvns ip, r3, asr #24 - bne 2f - movs ip, r1, lsl #9 - bne 3f @ r1 is NAN -2: mov r0, #0 @ arguments are ordered. - RET -3: mov r0, #1 @ arguments are unordered. - RET - - FUNC_END aeabi_fcmpun - FUNC_END unordsf2 - -#endif /* L_unordsf2 */ - -#ifdef L_arm_fixsfsi - -ARM_FUNC_START fixsfsi -ARM_FUNC_ALIAS aeabi_f2iz fixsfsi - - @ check exponent range. - mov r2, r0, lsl #1 - cmp r2, #(127 << 24) - bcc 1f @ value is too small - mov r3, #(127 + 31) - subs r2, r3, r2, lsr #24 - bls 2f @ value is too large - - @ scale value - mov r3, r0, lsl #8 - orr r3, r3, #0x80000000 - tst r0, #0x80000000 @ the sign bit - shift1 lsr, r0, r3, r2 - do_it ne - rsbne r0, r0, #0 - RET - -1: mov r0, #0 - RET - -2: cmp r2, #(127 + 31 - 0xff) - bne 3f - movs r2, r0, lsl #9 - bne 4f @ r0 is NAN. -3: ands r0, r0, #0x80000000 @ the sign bit - do_it eq - moveq r0, #0x7fffffff @ the maximum signed positive si - RET - -4: mov r0, #0 @ What should we convert NAN to? - RET - - FUNC_END aeabi_f2iz - FUNC_END fixsfsi - -#endif /* L_fixsfsi */ - -#ifdef L_arm_fixunssfsi - -ARM_FUNC_START fixunssfsi -ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi - - @ check exponent range. - movs r2, r0, lsl #1 - bcs 1f @ value is negative - cmp r2, #(127 << 24) - bcc 1f @ value is too small - mov r3, #(127 + 31) - subs r2, r3, r2, lsr #24 - bmi 2f @ value is too large - - @ scale the value - mov r3, r0, lsl #8 - orr r3, r3, #0x80000000 - shift1 lsr, r0, r3, r2 - RET - -1: mov r0, #0 - RET - -2: cmp r2, #(127 + 31 - 0xff) - bne 3f - movs r2, r0, lsl #9 - bne 4f @ r0 is NAN. -3: mov r0, #0xffffffff @ maximum unsigned si - RET - -4: mov r0, #0 @ What should we convert NAN to? - RET - - FUNC_END aeabi_f2uiz - FUNC_END fixunssfsi - -#endif /* L_fixunssfsi */ diff --git a/gcc/config/arm/lib1funcs.asm b/gcc/config/arm/lib1funcs.asm deleted file mode 100644 index 2e76c01df4b..00000000000 --- a/gcc/config/arm/lib1funcs.asm +++ /dev/null @@ -1,1829 +0,0 @@ -@ libgcc routines for ARM cpu. -@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) - -/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007, 2008, - 2009, 2010 Free Software Foundation, Inc. - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ - -/* An executable stack is *not* required for these functions. */ -#if defined(__ELF__) && defined(__linux__) -.section .note.GNU-stack,"",%progbits -.previous -#endif /* __ELF__ and __linux__ */ - -#ifdef __ARM_EABI__ -/* Some attributes that are common to all routines in this file. */ - /* Tag_ABI_align_needed: This code does not require 8-byte - alignment from the caller. */ - /* .eabi_attribute 24, 0 -- default setting. */ - /* Tag_ABI_align_preserved: This code preserves 8-byte - alignment in any callee. */ - .eabi_attribute 25, 1 -#endif /* __ARM_EABI__ */ -/* ------------------------------------------------------------------------ */ - -/* We need to know what prefix to add to function names. */ - -#ifndef __USER_LABEL_PREFIX__ -#error __USER_LABEL_PREFIX__ not defined -#endif - -/* ANSI concatenation macros. */ - -#define CONCAT1(a, b) CONCAT2(a, b) -#define CONCAT2(a, b) a ## b - -/* Use the right prefix for global labels. */ - -#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x) - -#ifdef __ELF__ -#ifdef __thumb__ -#define __PLT__ /* Not supported in Thumb assembler (for now). */ -#elif defined __vxworks && !defined __PIC__ -#define __PLT__ /* Not supported by the kernel loader. */ -#else -#define __PLT__ (PLT) -#endif -#define TYPE(x) .type SYM(x),function -#define SIZE(x) .size SYM(x), . - SYM(x) -#define LSYM(x) .x -#else -#define __PLT__ -#define TYPE(x) -#define SIZE(x) -#define LSYM(x) x -#endif - -/* Function end macros. Variants for interworking. */ - -#if defined(__ARM_ARCH_2__) -# define __ARM_ARCH__ 2 -#endif - -#if defined(__ARM_ARCH_3__) -# define __ARM_ARCH__ 3 -#endif - -#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \ - || defined(__ARM_ARCH_4T__) -/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with - long multiply instructions. That includes v3M. */ -# define __ARM_ARCH__ 4 -#endif - -#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \ - || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ - || defined(__ARM_ARCH_5TEJ__) -# define __ARM_ARCH__ 5 -#endif - -#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ - || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ - || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \ - || defined(__ARM_ARCH_6M__) -# define __ARM_ARCH__ 6 -#endif - -#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ - || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ - || defined(__ARM_ARCH_7EM__) -# define __ARM_ARCH__ 7 -#endif - -#ifndef __ARM_ARCH__ -#error Unable to determine architecture. -#endif - -/* There are times when we might prefer Thumb1 code even if ARM code is - permitted, for example, the code might be smaller, or there might be - interworking problems with switching to ARM state if interworking is - disabled. */ -#if (defined(__thumb__) \ - && !defined(__thumb2__) \ - && (!defined(__THUMB_INTERWORK__) \ - || defined (__OPTIMIZE_SIZE__) \ - || defined(__ARM_ARCH_6M__))) -# define __prefer_thumb__ -#endif - -/* How to return from a function call depends on the architecture variant. */ - -#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__) - -# define RET bx lr -# define RETc(x) bx##x lr - -/* Special precautions for interworking on armv4t. */ -# if (__ARM_ARCH__ == 4) - -/* Always use bx, not ldr pc. */ -# if (defined(__thumb__) || defined(__THUMB_INTERWORK__)) -# define __INTERWORKING__ -# endif /* __THUMB__ || __THUMB_INTERWORK__ */ - -/* Include thumb stub before arm mode code. */ -# if defined(__thumb__) && !defined(__THUMB_INTERWORK__) -# define __INTERWORKING_STUBS__ -# endif /* __thumb__ && !__THUMB_INTERWORK__ */ - -#endif /* __ARM_ARCH == 4 */ - -#else - -# define RET mov pc, lr -# define RETc(x) mov##x pc, lr - -#endif - -.macro cfi_pop advance, reg, cfa_offset -#ifdef __ELF__ - .pushsection .debug_frame - .byte 0x4 /* DW_CFA_advance_loc4 */ - .4byte \advance - .byte (0xc0 | \reg) /* DW_CFA_restore */ - .byte 0xe /* DW_CFA_def_cfa_offset */ - .uleb128 \cfa_offset - .popsection -#endif -.endm -.macro cfi_push advance, reg, offset, cfa_offset -#ifdef __ELF__ - .pushsection .debug_frame - .byte 0x4 /* DW_CFA_advance_loc4 */ - .4byte \advance - .byte (0x80 | \reg) /* DW_CFA_offset */ - .uleb128 (\offset / -4) - .byte 0xe /* DW_CFA_def_cfa_offset */ - .uleb128 \cfa_offset - .popsection -#endif -.endm -.macro cfi_start start_label, end_label -#ifdef __ELF__ - .pushsection .debug_frame -LSYM(Lstart_frame): - .4byte LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE -LSYM(Lstart_cie): - .4byte 0xffffffff @ CIE Identifier Tag - .byte 0x1 @ CIE Version - .ascii "\0" @ CIE Augmentation - .uleb128 0x1 @ CIE Code Alignment Factor - .sleb128 -4 @ CIE Data Alignment Factor - .byte 0xe @ CIE RA Column - .byte 0xc @ DW_CFA_def_cfa - .uleb128 0xd - .uleb128 0x0 - - .align 2 -LSYM(Lend_cie): - .4byte LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length -LSYM(Lstart_fde): - .4byte LSYM(Lstart_frame) @ FDE CIE offset - .4byte \start_label @ FDE initial location - .4byte \end_label-\start_label @ FDE address range - .popsection -#endif -.endm -.macro cfi_end end_label -#ifdef __ELF__ - .pushsection .debug_frame - .align 2 -LSYM(Lend_fde): - .popsection -\end_label: -#endif -.endm - -/* Don't pass dirn, it's there just to get token pasting right. */ - -.macro RETLDM regs=, cond=, unwind=, dirn=ia -#if defined (__INTERWORKING__) - .ifc "\regs","" - ldr\cond lr, [sp], #8 - .else -# if defined(__thumb2__) - pop\cond {\regs, lr} -# else - ldm\cond\dirn sp!, {\regs, lr} -# endif - .endif - .ifnc "\unwind", "" - /* Mark LR as restored. */ -97: cfi_pop 97b - \unwind, 0xe, 0x0 - .endif - bx\cond lr -#else - /* Caller is responsible for providing IT instruction. */ - .ifc "\regs","" - ldr\cond pc, [sp], #8 - .else -# if defined(__thumb2__) - pop\cond {\regs, pc} -# else - ldm\cond\dirn sp!, {\regs, pc} -# endif - .endif -#endif -.endm - -/* The Unified assembly syntax allows the same code to be assembled for both - ARM and Thumb-2. However this is only supported by recent gas, so define - a set of macros to allow ARM code on older assemblers. */ -#if defined(__thumb2__) -.macro do_it cond, suffix="" - it\suffix \cond -.endm -.macro shift1 op, arg0, arg1, arg2 - \op \arg0, \arg1, \arg2 -.endm -#define do_push push -#define do_pop pop -#define COND(op1, op2, cond) op1 ## op2 ## cond -/* Perform an arithmetic operation with a variable shift operand. This - requires two instructions and a scratch register on Thumb-2. */ -.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp - \shiftop \tmp, \src2, \shiftreg - \name \dest, \src1, \tmp -.endm -#else -.macro do_it cond, suffix="" -.endm -.macro shift1 op, arg0, arg1, arg2 - mov \arg0, \arg1, \op \arg2 -.endm -#define do_push stmfd sp!, -#define do_pop ldmfd sp!, -#define COND(op1, op2, cond) op1 ## cond ## op2 -.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp - \name \dest, \src1, \src2, \shiftop \shiftreg -.endm -#endif - -#ifdef __ARM_EABI__ -.macro ARM_LDIV0 name signed - cmp r0, #0 - .ifc \signed, unsigned - movne r0, #0xffffffff - .else - movgt r0, #0x7fffffff - movlt r0, #0x80000000 - .endif - b SYM (__aeabi_idiv0) __PLT__ -.endm -#else -.macro ARM_LDIV0 name signed - str lr, [sp, #-8]! -98: cfi_push 98b - __\name, 0xe, -0x8, 0x8 - bl SYM (__div0) __PLT__ - mov r0, #0 @ About as wrong as it could be. - RETLDM unwind=98b -.endm -#endif - - -#ifdef __ARM_EABI__ -.macro THUMB_LDIV0 name signed -#if defined(__ARM_ARCH_6M__) - .ifc \signed, unsigned - cmp r0, #0 - beq 1f - mov r0, #0 - mvn r0, r0 @ 0xffffffff -1: - .else - cmp r0, #0 - beq 2f - blt 3f - mov r0, #0 - mvn r0, r0 - lsr r0, r0, #1 @ 0x7fffffff - b 2f -3: mov r0, #0x80 - lsl r0, r0, #24 @ 0x80000000 -2: - .endif - push {r0, r1, r2} - ldr r0, 4f - adr r1, 4f - add r0, r1 - str r0, [sp, #8] - @ We know we are not on armv4t, so pop pc is safe. - pop {r0, r1, pc} - .align 2 -4: - .word __aeabi_idiv0 - 4b -#elif defined(__thumb2__) - .syntax unified - .ifc \signed, unsigned - cbz r0, 1f - mov r0, #0xffffffff -1: - .else - cmp r0, #0 - do_it gt - movgt r0, #0x7fffffff - do_it lt - movlt r0, #0x80000000 - .endif - b.w SYM(__aeabi_idiv0) __PLT__ -#else - .align 2 - bx pc - nop - .arm - cmp r0, #0 - .ifc \signed, unsigned - movne r0, #0xffffffff - .else - movgt r0, #0x7fffffff - movlt r0, #0x80000000 - .endif - b SYM(__aeabi_idiv0) __PLT__ - .thumb -#endif -.endm -#else -.macro THUMB_LDIV0 name signed - push { r1, lr } -98: cfi_push 98b - __\name, 0xe, -0x4, 0x8 - bl SYM (__div0) - mov r0, #0 @ About as wrong as it could be. -#if defined (__INTERWORKING__) - pop { r1, r2 } - bx r2 -#else - pop { r1, pc } -#endif -.endm -#endif - -.macro FUNC_END name - SIZE (__\name) -.endm - -.macro DIV_FUNC_END name signed - cfi_start __\name, LSYM(Lend_div0) -LSYM(Ldiv0): -#ifdef __thumb__ - THUMB_LDIV0 \name \signed -#else - ARM_LDIV0 \name \signed -#endif - cfi_end LSYM(Lend_div0) - FUNC_END \name -.endm - -.macro THUMB_FUNC_START name - .globl SYM (\name) - TYPE (\name) - .thumb_func -SYM (\name): -.endm - -/* Function start macros. Variants for ARM and Thumb. */ - -#ifdef __thumb__ -#define THUMB_FUNC .thumb_func -#define THUMB_CODE .force_thumb -# if defined(__thumb2__) -#define THUMB_SYNTAX .syntax divided -# else -#define THUMB_SYNTAX -# endif -#else -#define THUMB_FUNC -#define THUMB_CODE -#define THUMB_SYNTAX -#endif - -.macro FUNC_START name - .text - .globl SYM (__\name) - TYPE (__\name) - .align 0 - THUMB_CODE - THUMB_FUNC - THUMB_SYNTAX -SYM (__\name): -.endm - -/* Special function that will always be coded in ARM assembly, even if - in Thumb-only compilation. */ - -#if defined(__thumb2__) - -/* For Thumb-2 we build everything in thumb mode. */ -.macro ARM_FUNC_START name - FUNC_START \name - .syntax unified -.endm -#define EQUIV .thumb_set -.macro ARM_CALL name - bl __\name -.endm - -#elif defined(__INTERWORKING_STUBS__) - -.macro ARM_FUNC_START name - FUNC_START \name - bx pc - nop - .arm -/* A hook to tell gdb that we've switched to ARM mode. Also used to call - directly from other local arm routines. */ -_L__\name: -.endm -#define EQUIV .thumb_set -/* Branch directly to a function declared with ARM_FUNC_START. - Must be called in arm mode. */ -.macro ARM_CALL name - bl _L__\name -.endm - -#else /* !(__INTERWORKING_STUBS__ || __thumb2__) */ - -#ifdef __ARM_ARCH_6M__ -#define EQUIV .thumb_set -#else -.macro ARM_FUNC_START name - .text - .globl SYM (__\name) - TYPE (__\name) - .align 0 - .arm -SYM (__\name): -.endm -#define EQUIV .set -.macro ARM_CALL name - bl __\name -.endm -#endif - -#endif - -.macro FUNC_ALIAS new old - .globl SYM (__\new) -#if defined (__thumb__) - .thumb_set SYM (__\new), SYM (__\old) -#else - .set SYM (__\new), SYM (__\old) -#endif -.endm - -#ifndef __ARM_ARCH_6M__ -.macro ARM_FUNC_ALIAS new old - .globl SYM (__\new) - EQUIV SYM (__\new), SYM (__\old) -#if defined(__INTERWORKING_STUBS__) - .set SYM (_L__\new), SYM (_L__\old) -#endif -.endm -#endif - -#ifdef __ARMEB__ -#define xxh r0 -#define xxl r1 -#define yyh r2 -#define yyl r3 -#else -#define xxh r1 -#define xxl r0 -#define yyh r3 -#define yyl r2 -#endif - -#ifdef __ARM_EABI__ -.macro WEAK name - .weak SYM (__\name) -.endm -#endif - -#ifdef __thumb__ -/* Register aliases. */ - -work .req r4 @ XXXX is this safe ? -dividend .req r0 -divisor .req r1 -overdone .req r2 -result .req r2 -curbit .req r3 -#endif -#if 0 -ip .req r12 -sp .req r13 -lr .req r14 -pc .req r15 -#endif - -/* ------------------------------------------------------------------------ */ -/* Bodies of the division and modulo routines. */ -/* ------------------------------------------------------------------------ */ -.macro ARM_DIV_BODY dividend, divisor, result, curbit - -#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__) - -#if defined (__thumb2__) - clz \curbit, \dividend - clz \result, \divisor - sub \curbit, \result, \curbit - rsb \curbit, \curbit, #31 - adr \result, 1f - add \curbit, \result, \curbit, lsl #4 - mov \result, #0 - mov pc, \curbit -.p2align 3 -1: - .set shift, 32 - .rept 32 - .set shift, shift - 1 - cmp.w \dividend, \divisor, lsl #shift - nop.n - adc.w \result, \result, \result - it cs - subcs.w \dividend, \dividend, \divisor, lsl #shift - .endr -#else - clz \curbit, \dividend - clz \result, \divisor - sub \curbit, \result, \curbit - rsbs \curbit, \curbit, #31 - addne \curbit, \curbit, \curbit, lsl #1 - mov \result, #0 - addne pc, pc, \curbit, lsl #2 - nop - .set shift, 32 - .rept 32 - .set shift, shift - 1 - cmp \dividend, \divisor, lsl #shift - adc \result, \result, \result - subcs \dividend, \dividend, \divisor, lsl #shift - .endr -#endif - -#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ -#if __ARM_ARCH__ >= 5 - - clz \curbit, \divisor - clz \result, \dividend - sub \result, \curbit, \result - mov \curbit, #1 - mov \divisor, \divisor, lsl \result - mov \curbit, \curbit, lsl \result - mov \result, #0 - -#else /* __ARM_ARCH__ < 5 */ - - @ Initially shift the divisor left 3 bits if possible, - @ set curbit accordingly. This allows for curbit to be located - @ at the left end of each 4-bit nibbles in the division loop - @ to save one loop in most cases. - tst \divisor, #0xe0000000 - moveq \divisor, \divisor, lsl #3 - moveq \curbit, #8 - movne \curbit, #1 - - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. -1: cmp \divisor, #0x10000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #4 - movlo \curbit, \curbit, lsl #4 - blo 1b - - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. -1: cmp \divisor, #0x80000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #1 - movlo \curbit, \curbit, lsl #1 - blo 1b - - mov \result, #0 - -#endif /* __ARM_ARCH__ < 5 */ - - @ Division loop -1: cmp \dividend, \divisor - do_it hs, t - subhs \dividend, \dividend, \divisor - orrhs \result, \result, \curbit - cmp \dividend, \divisor, lsr #1 - do_it hs, t - subhs \dividend, \dividend, \divisor, lsr #1 - orrhs \result, \result, \curbit, lsr #1 - cmp \dividend, \divisor, lsr #2 - do_it hs, t - subhs \dividend, \dividend, \divisor, lsr #2 - orrhs \result, \result, \curbit, lsr #2 - cmp \dividend, \divisor, lsr #3 - do_it hs, t - subhs \dividend, \dividend, \divisor, lsr #3 - orrhs \result, \result, \curbit, lsr #3 - cmp \dividend, #0 @ Early termination? - do_it ne, t - movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? - movne \divisor, \divisor, lsr #4 - bne 1b - -#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ - -.endm -/* ------------------------------------------------------------------------ */ -.macro ARM_DIV2_ORDER divisor, order - -#if __ARM_ARCH__ >= 5 - - clz \order, \divisor - rsb \order, \order, #31 - -#else - - cmp \divisor, #(1 << 16) - movhs \divisor, \divisor, lsr #16 - movhs \order, #16 - movlo \order, #0 - - cmp \divisor, #(1 << 8) - movhs \divisor, \divisor, lsr #8 - addhs \order, \order, #8 - - cmp \divisor, #(1 << 4) - movhs \divisor, \divisor, lsr #4 - addhs \order, \order, #4 - - cmp \divisor, #(1 << 2) - addhi \order, \order, #3 - addls \order, \order, \divisor, lsr #1 - -#endif - -.endm -/* ------------------------------------------------------------------------ */ -.macro ARM_MOD_BODY dividend, divisor, order, spare - -#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__) - - clz \order, \divisor - clz \spare, \dividend - sub \order, \order, \spare - rsbs \order, \order, #31 - addne pc, pc, \order, lsl #3 - nop - .set shift, 32 - .rept 32 - .set shift, shift - 1 - cmp \dividend, \divisor, lsl #shift - subcs \dividend, \dividend, \divisor, lsl #shift - .endr - -#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ -#if __ARM_ARCH__ >= 5 - - clz \order, \divisor - clz \spare, \dividend - sub \order, \order, \spare - mov \divisor, \divisor, lsl \order - -#else /* __ARM_ARCH__ < 5 */ - - mov \order, #0 - - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. -1: cmp \divisor, #0x10000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #4 - addlo \order, \order, #4 - blo 1b - - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. -1: cmp \divisor, #0x80000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #1 - addlo \order, \order, #1 - blo 1b - -#endif /* __ARM_ARCH__ < 5 */ - - @ Perform all needed substractions to keep only the reminder. - @ Do comparisons in batch of 4 first. - subs \order, \order, #3 @ yes, 3 is intended here - blt 2f - -1: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - cmp \dividend, \divisor, lsr #1 - subhs \dividend, \dividend, \divisor, lsr #1 - cmp \dividend, \divisor, lsr #2 - subhs \dividend, \dividend, \divisor, lsr #2 - cmp \dividend, \divisor, lsr #3 - subhs \dividend, \dividend, \divisor, lsr #3 - cmp \dividend, #1 - mov \divisor, \divisor, lsr #4 - subges \order, \order, #4 - bge 1b - - tst \order, #3 - teqne \dividend, #0 - beq 5f - - @ Either 1, 2 or 3 comparison/substractions are left. -2: cmn \order, #2 - blt 4f - beq 3f - cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -3: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -4: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor -5: - -#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ - -.endm -/* ------------------------------------------------------------------------ */ -.macro THUMB_DIV_MOD_BODY modulo - @ Load the constant 0x10000000 into our work register. - mov work, #1 - lsl work, #28 -LSYM(Loop1): - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, work - bhs LSYM(Lbignum) - cmp divisor, dividend - bhs LSYM(Lbignum) - lsl divisor, #4 - lsl curbit, #4 - b LSYM(Loop1) -LSYM(Lbignum): - @ Set work to 0x80000000 - lsl work, #3 -LSYM(Loop2): - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, work - bhs LSYM(Loop3) - cmp divisor, dividend - bhs LSYM(Loop3) - lsl divisor, #1 - lsl curbit, #1 - b LSYM(Loop2) -LSYM(Loop3): - @ Test for possible subtractions ... - .if \modulo - @ ... On the final pass, this may subtract too much from the dividend, - @ so keep track of which subtractions are done, we can fix them up - @ afterwards. - mov overdone, #0 - cmp dividend, divisor - blo LSYM(Lover1) - sub dividend, dividend, divisor -LSYM(Lover1): - lsr work, divisor, #1 - cmp dividend, work - blo LSYM(Lover2) - sub dividend, dividend, work - mov ip, curbit - mov work, #1 - ror curbit, work - orr overdone, curbit - mov curbit, ip -LSYM(Lover2): - lsr work, divisor, #2 - cmp dividend, work - blo LSYM(Lover3) - sub dividend, dividend, work - mov ip, curbit - mov work, #2 - ror curbit, work - orr overdone, curbit - mov curbit, ip -LSYM(Lover3): - lsr work, divisor, #3 - cmp dividend, work - blo LSYM(Lover4) - sub dividend, dividend, work - mov ip, curbit - mov work, #3 - ror curbit, work - orr overdone, curbit - mov curbit, ip -LSYM(Lover4): - mov ip, curbit - .else - @ ... and note which bits are done in the result. On the final pass, - @ this may subtract too much from the dividend, but the result will be ok, - @ since the "bit" will have been shifted out at the bottom. - cmp dividend, divisor - blo LSYM(Lover1) - sub dividend, dividend, divisor - orr result, result, curbit -LSYM(Lover1): - lsr work, divisor, #1 - cmp dividend, work - blo LSYM(Lover2) - sub dividend, dividend, work - lsr work, curbit, #1 - orr result, work -LSYM(Lover2): - lsr work, divisor, #2 - cmp dividend, work - blo LSYM(Lover3) - sub dividend, dividend, work - lsr work, curbit, #2 - orr result, work -LSYM(Lover3): - lsr work, divisor, #3 - cmp dividend, work - blo LSYM(Lover4) - sub dividend, dividend, work - lsr work, curbit, #3 - orr result, work -LSYM(Lover4): - .endif - - cmp dividend, #0 @ Early termination? - beq LSYM(Lover5) - lsr curbit, #4 @ No, any more bits to do? - beq LSYM(Lover5) - lsr divisor, #4 - b LSYM(Loop3) -LSYM(Lover5): - .if \modulo - @ Any subtractions that we should not have done will be recorded in - @ the top three bits of "overdone". Exactly which were not needed - @ are governed by the position of the bit, stored in ip. - mov work, #0xe - lsl work, #28 - and overdone, work - beq LSYM(Lgot_result) - - @ If we terminated early, because dividend became zero, then the - @ bit in ip will not be in the bottom nibble, and we should not - @ perform the additions below. We must test for this though - @ (rather relying upon the TSTs to prevent the additions) since - @ the bit in ip could be in the top two bits which might then match - @ with one of the smaller RORs. - mov curbit, ip - mov work, #0x7 - tst curbit, work - beq LSYM(Lgot_result) - - mov curbit, ip - mov work, #3 - ror curbit, work - tst overdone, curbit - beq LSYM(Lover6) - lsr work, divisor, #3 - add dividend, work -LSYM(Lover6): - mov curbit, ip - mov work, #2 - ror curbit, work - tst overdone, curbit - beq LSYM(Lover7) - lsr work, divisor, #2 - add dividend, work -LSYM(Lover7): - mov curbit, ip - mov work, #1 - ror curbit, work - tst overdone, curbit - beq LSYM(Lgot_result) - lsr work, divisor, #1 - add dividend, work - .endif -LSYM(Lgot_result): -.endm -/* ------------------------------------------------------------------------ */ -/* Start of the Real Functions */ -/* ------------------------------------------------------------------------ */ -#ifdef L_udivsi3 - -#if defined(__prefer_thumb__) - - FUNC_START udivsi3 - FUNC_ALIAS aeabi_uidiv udivsi3 - - cmp divisor, #0 - beq LSYM(Ldiv0) -LSYM(udivsi3_skip_div0_test): - mov curbit, #1 - mov result, #0 - - push { work } - cmp dividend, divisor - blo LSYM(Lgot_result) - - THUMB_DIV_MOD_BODY 0 - - mov r0, result - pop { work } - RET - -#else /* ARM version/Thumb-2. */ - - ARM_FUNC_START udivsi3 - ARM_FUNC_ALIAS aeabi_uidiv udivsi3 - - /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily - check for division-by-zero a second time. */ -LSYM(udivsi3_skip_div0_test): - subs r2, r1, #1 - do_it eq - RETc(eq) - bcc LSYM(Ldiv0) - cmp r0, r1 - bls 11f - tst r1, r2 - beq 12f - - ARM_DIV_BODY r0, r1, r2, r3 - - mov r0, r2 - RET - -11: do_it eq, e - moveq r0, #1 - movne r0, #0 - RET - -12: ARM_DIV2_ORDER r1, r2 - - mov r0, r0, lsr r2 - RET - -#endif /* ARM version */ - - DIV_FUNC_END udivsi3 unsigned - -#if defined(__prefer_thumb__) -FUNC_START aeabi_uidivmod - cmp r1, #0 - beq LSYM(Ldiv0) - push {r0, r1, lr} - bl LSYM(udivsi3_skip_div0_test) - POP {r1, r2, r3} - mul r2, r0 - sub r1, r1, r2 - bx r3 -#else -ARM_FUNC_START aeabi_uidivmod - cmp r1, #0 - beq LSYM(Ldiv0) - stmfd sp!, { r0, r1, lr } - bl LSYM(udivsi3_skip_div0_test) - ldmfd sp!, { r1, r2, lr } - mul r3, r2, r0 - sub r1, r1, r3 - RET -#endif - FUNC_END aeabi_uidivmod - -#endif /* L_udivsi3 */ -/* ------------------------------------------------------------------------ */ -#ifdef L_umodsi3 - - FUNC_START umodsi3 - -#ifdef __thumb__ - - cmp divisor, #0 - beq LSYM(Ldiv0) - mov curbit, #1 - cmp dividend, divisor - bhs LSYM(Lover10) - RET - -LSYM(Lover10): - push { work } - - THUMB_DIV_MOD_BODY 1 - - pop { work } - RET - -#else /* ARM version. */ - - subs r2, r1, #1 @ compare divisor with 1 - bcc LSYM(Ldiv0) - cmpne r0, r1 @ compare dividend with divisor - moveq r0, #0 - tsthi r1, r2 @ see if divisor is power of 2 - andeq r0, r0, r2 - RETc(ls) - - ARM_MOD_BODY r0, r1, r2, r3 - - RET - -#endif /* ARM version. */ - - DIV_FUNC_END umodsi3 unsigned - -#endif /* L_umodsi3 */ -/* ------------------------------------------------------------------------ */ -#ifdef L_divsi3 - -#if defined(__prefer_thumb__) - - FUNC_START divsi3 - FUNC_ALIAS aeabi_idiv divsi3 - - cmp divisor, #0 - beq LSYM(Ldiv0) -LSYM(divsi3_skip_div0_test): - push { work } - mov work, dividend - eor work, divisor @ Save the sign of the result. - mov ip, work - mov curbit, #1 - mov result, #0 - cmp divisor, #0 - bpl LSYM(Lover10) - neg divisor, divisor @ Loops below use unsigned. -LSYM(Lover10): - cmp dividend, #0 - bpl LSYM(Lover11) - neg dividend, dividend -LSYM(Lover11): - cmp dividend, divisor - blo LSYM(Lgot_result) - - THUMB_DIV_MOD_BODY 0 - - mov r0, result - mov work, ip - cmp work, #0 - bpl LSYM(Lover12) - neg r0, r0 -LSYM(Lover12): - pop { work } - RET - -#else /* ARM/Thumb-2 version. */ - - ARM_FUNC_START divsi3 - ARM_FUNC_ALIAS aeabi_idiv divsi3 - - cmp r1, #0 - beq LSYM(Ldiv0) -LSYM(divsi3_skip_div0_test): - eor ip, r0, r1 @ save the sign of the result. - do_it mi - rsbmi r1, r1, #0 @ loops below use unsigned. - subs r2, r1, #1 @ division by 1 or -1 ? - beq 10f - movs r3, r0 - do_it mi - rsbmi r3, r0, #0 @ positive dividend value - cmp r3, r1 - bls 11f - tst r1, r2 @ divisor is power of 2 ? - beq 12f - - ARM_DIV_BODY r3, r1, r0, r2 - - cmp ip, #0 - do_it mi - rsbmi r0, r0, #0 - RET - -10: teq ip, r0 @ same sign ? - do_it mi - rsbmi r0, r0, #0 - RET - -11: do_it lo - movlo r0, #0 - do_it eq,t - moveq r0, ip, asr #31 - orreq r0, r0, #1 - RET - -12: ARM_DIV2_ORDER r1, r2 - - cmp ip, #0 - mov r0, r3, lsr r2 - do_it mi - rsbmi r0, r0, #0 - RET - -#endif /* ARM version */ - - DIV_FUNC_END divsi3 signed - -#if defined(__prefer_thumb__) -FUNC_START aeabi_idivmod - cmp r1, #0 - beq LSYM(Ldiv0) - push {r0, r1, lr} - bl LSYM(divsi3_skip_div0_test) - POP {r1, r2, r3} - mul r2, r0 - sub r1, r1, r2 - bx r3 -#else -ARM_FUNC_START aeabi_idivmod - cmp r1, #0 - beq LSYM(Ldiv0) - stmfd sp!, { r0, r1, lr } - bl LSYM(divsi3_skip_div0_test) - ldmfd sp!, { r1, r2, lr } - mul r3, r2, r0 - sub r1, r1, r3 - RET -#endif - FUNC_END aeabi_idivmod - -#endif /* L_divsi3 */ -/* ------------------------------------------------------------------------ */ -#ifdef L_modsi3 - - FUNC_START modsi3 - -#ifdef __thumb__ - - mov curbit, #1 - cmp divisor, #0 - beq LSYM(Ldiv0) - bpl LSYM(Lover10) - neg divisor, divisor @ Loops below use unsigned. -LSYM(Lover10): - push { work } - @ Need to save the sign of the dividend, unfortunately, we need - @ work later on. Must do this after saving the original value of - @ the work register, because we will pop this value off first. - push { dividend } - cmp dividend, #0 - bpl LSYM(Lover11) - neg dividend, dividend -LSYM(Lover11): - cmp dividend, divisor - blo LSYM(Lgot_result) - - THUMB_DIV_MOD_BODY 1 - - pop { work } - cmp work, #0 - bpl LSYM(Lover12) - neg dividend, dividend -LSYM(Lover12): - pop { work } - RET - -#else /* ARM version. */ - - cmp r1, #0 - beq LSYM(Ldiv0) - rsbmi r1, r1, #0 @ loops below use unsigned. - movs ip, r0 @ preserve sign of dividend - rsbmi r0, r0, #0 @ if negative make positive - subs r2, r1, #1 @ compare divisor with 1 - cmpne r0, r1 @ compare dividend with divisor - moveq r0, #0 - tsthi r1, r2 @ see if divisor is power of 2 - andeq r0, r0, r2 - bls 10f - - ARM_MOD_BODY r0, r1, r2, r3 - -10: cmp ip, #0 - rsbmi r0, r0, #0 - RET - -#endif /* ARM version */ - - DIV_FUNC_END modsi3 signed - -#endif /* L_modsi3 */ -/* ------------------------------------------------------------------------ */ -#ifdef L_dvmd_tls - -#ifdef __ARM_EABI__ - WEAK aeabi_idiv0 - WEAK aeabi_ldiv0 - FUNC_START aeabi_idiv0 - FUNC_START aeabi_ldiv0 - RET - FUNC_END aeabi_ldiv0 - FUNC_END aeabi_idiv0 -#else - FUNC_START div0 - RET - FUNC_END div0 -#endif - -#endif /* L_divmodsi_tools */ -/* ------------------------------------------------------------------------ */ -#ifdef L_dvmd_lnx -@ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls - -/* Constant taken from . */ -#define SIGFPE 8 - -#ifdef __ARM_EABI__ - WEAK aeabi_idiv0 - WEAK aeabi_ldiv0 - ARM_FUNC_START aeabi_idiv0 - ARM_FUNC_START aeabi_ldiv0 -#else - ARM_FUNC_START div0 -#endif - - do_push {r1, lr} - mov r0, #SIGFPE - bl SYM(raise) __PLT__ - RETLDM r1 - -#ifdef __ARM_EABI__ - FUNC_END aeabi_ldiv0 - FUNC_END aeabi_idiv0 -#else - FUNC_END div0 -#endif - -#endif /* L_dvmd_lnx */ -#ifdef L_clear_cache -#if defined __ARM_EABI__ && defined __linux__ -@ EABI GNU/Linux call to cacheflush syscall. - ARM_FUNC_START clear_cache - do_push {r7} -#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__) - movw r7, #2 - movt r7, #0xf -#else - mov r7, #0xf0000 - add r7, r7, #2 -#endif - mov r2, #0 - swi 0 - do_pop {r7} - RET - FUNC_END clear_cache -#else -#error "This is only for ARM EABI GNU/Linux" -#endif -#endif /* L_clear_cache */ -/* ------------------------------------------------------------------------ */ -/* Dword shift operations. */ -/* All the following Dword shift variants rely on the fact that - shft xxx, Reg - is in fact done as - shft xxx, (Reg & 255) - so for Reg value in (32...63) and (-1...-31) we will get zero (in the - case of logical shifts) or the sign (for asr). */ - -#ifdef __ARMEB__ -#define al r1 -#define ah r0 -#else -#define al r0 -#define ah r1 -#endif - -/* Prevent __aeabi double-word shifts from being produced on SymbianOS. */ -#ifndef __symbian__ - -#ifdef L_lshrdi3 - - FUNC_START lshrdi3 - FUNC_ALIAS aeabi_llsr lshrdi3 - -#ifdef __thumb__ - lsr al, r2 - mov r3, ah - lsr ah, r2 - mov ip, r3 - sub r2, #32 - lsr r3, r2 - orr al, r3 - neg r2, r2 - mov r3, ip - lsl r3, r2 - orr al, r3 - RET -#else - subs r3, r2, #32 - rsb ip, r2, #32 - movmi al, al, lsr r2 - movpl al, ah, lsr r3 - orrmi al, al, ah, lsl ip - mov ah, ah, lsr r2 - RET -#endif - FUNC_END aeabi_llsr - FUNC_END lshrdi3 - -#endif - -#ifdef L_ashrdi3 - - FUNC_START ashrdi3 - FUNC_ALIAS aeabi_lasr ashrdi3 - -#ifdef __thumb__ - lsr al, r2 - mov r3, ah - asr ah, r2 - sub r2, #32 - @ If r2 is negative at this point the following step would OR - @ the sign bit into all of AL. That's not what we want... - bmi 1f - mov ip, r3 - asr r3, r2 - orr al, r3 - mov r3, ip -1: - neg r2, r2 - lsl r3, r2 - orr al, r3 - RET -#else - subs r3, r2, #32 - rsb ip, r2, #32 - movmi al, al, lsr r2 - movpl al, ah, asr r3 - orrmi al, al, ah, lsl ip - mov ah, ah, asr r2 - RET -#endif - - FUNC_END aeabi_lasr - FUNC_END ashrdi3 - -#endif - -#ifdef L_ashldi3 - - FUNC_START ashldi3 - FUNC_ALIAS aeabi_llsl ashldi3 - -#ifdef __thumb__ - lsl ah, r2 - mov r3, al - lsl al, r2 - mov ip, r3 - sub r2, #32 - lsl r3, r2 - orr ah, r3 - neg r2, r2 - mov r3, ip - lsr r3, r2 - orr ah, r3 - RET -#else - subs r3, r2, #32 - rsb ip, r2, #32 - movmi ah, ah, lsl r2 - movpl ah, al, lsl r3 - orrmi ah, ah, al, lsr ip - mov al, al, lsl r2 - RET -#endif - FUNC_END aeabi_llsl - FUNC_END ashldi3 - -#endif - -#endif /* __symbian__ */ - -#if ((__ARM_ARCH__ > 5) && !defined(__ARM_ARCH_6M__)) \ - || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ - || defined(__ARM_ARCH_5TEJ__) -#define HAVE_ARM_CLZ 1 -#endif - -#ifdef L_clzsi2 -#if defined(__ARM_ARCH_6M__) -FUNC_START clzsi2 - mov r1, #28 - mov r3, #1 - lsl r3, r3, #16 - cmp r0, r3 /* 0x10000 */ - bcc 2f - lsr r0, r0, #16 - sub r1, r1, #16 -2: lsr r3, r3, #8 - cmp r0, r3 /* #0x100 */ - bcc 2f - lsr r0, r0, #8 - sub r1, r1, #8 -2: lsr r3, r3, #4 - cmp r0, r3 /* #0x10 */ - bcc 2f - lsr r0, r0, #4 - sub r1, r1, #4 -2: adr r2, 1f - ldrb r0, [r2, r0] - add r0, r0, r1 - bx lr -.align 2 -1: -.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 - FUNC_END clzsi2 -#else -ARM_FUNC_START clzsi2 -# if defined(HAVE_ARM_CLZ) - clz r0, r0 - RET -# else - mov r1, #28 - cmp r0, #0x10000 - do_it cs, t - movcs r0, r0, lsr #16 - subcs r1, r1, #16 - cmp r0, #0x100 - do_it cs, t - movcs r0, r0, lsr #8 - subcs r1, r1, #8 - cmp r0, #0x10 - do_it cs, t - movcs r0, r0, lsr #4 - subcs r1, r1, #4 - adr r2, 1f - ldrb r0, [r2, r0] - add r0, r0, r1 - RET -.align 2 -1: -.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 -# endif /* !HAVE_ARM_CLZ */ - FUNC_END clzsi2 -#endif -#endif /* L_clzsi2 */ - -#ifdef L_clzdi2 -#if !defined(HAVE_ARM_CLZ) - -# if defined(__ARM_ARCH_6M__) -FUNC_START clzdi2 - push {r4, lr} -# else -ARM_FUNC_START clzdi2 - do_push {r4, lr} -# endif - cmp xxh, #0 - bne 1f -# ifdef __ARMEB__ - mov r0, xxl - bl __clzsi2 - add r0, r0, #32 - b 2f -1: - bl __clzsi2 -# else - bl __clzsi2 - add r0, r0, #32 - b 2f -1: - mov r0, xxh - bl __clzsi2 -# endif -2: -# if defined(__ARM_ARCH_6M__) - pop {r4, pc} -# else - RETLDM r4 -# endif - FUNC_END clzdi2 - -#else /* HAVE_ARM_CLZ */ - -ARM_FUNC_START clzdi2 - cmp xxh, #0 - do_it eq, et - clzeq r0, xxl - clzne r0, xxh - addeq r0, r0, #32 - RET - FUNC_END clzdi2 - -#endif -#endif /* L_clzdi2 */ - -/* ------------------------------------------------------------------------ */ -/* These next two sections are here despite the fact that they contain Thumb - assembler because their presence allows interworked code to be linked even - when the GCC library is this one. */ - -/* Do not build the interworking functions when the target architecture does - not support Thumb instructions. (This can be a multilib option). */ -#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\ - || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \ - || __ARM_ARCH__ >= 6 - -#if defined L_call_via_rX - -/* These labels & instructions are used by the Arm/Thumb interworking code. - The address of function to be called is loaded into a register and then - one of these labels is called via a BL instruction. This puts the - return address into the link register with the bottom bit set, and the - code here switches to the correct mode before executing the function. */ - - .text - .align 0 - .force_thumb - -.macro call_via register - THUMB_FUNC_START _call_via_\register - - bx \register - nop - - SIZE (_call_via_\register) -.endm - - call_via r0 - call_via r1 - call_via r2 - call_via r3 - call_via r4 - call_via r5 - call_via r6 - call_via r7 - call_via r8 - call_via r9 - call_via sl - call_via fp - call_via ip - call_via sp - call_via lr - -#endif /* L_call_via_rX */ - -/* Don't bother with the old interworking routines for Thumb-2. */ -/* ??? Maybe only omit these on "m" variants. */ -#if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__) - -#if defined L_interwork_call_via_rX - -/* These labels & instructions are used by the Arm/Thumb interworking code, - when the target address is in an unknown instruction set. The address - of function to be called is loaded into a register and then one of these - labels is called via a BL instruction. This puts the return address - into the link register with the bottom bit set, and the code here - switches to the correct mode before executing the function. Unfortunately - the target code cannot be relied upon to return via a BX instruction, so - instead we have to store the resturn address on the stack and allow the - called function to return here instead. Upon return we recover the real - return address and use a BX to get back to Thumb mode. - - There are three variations of this code. The first, - _interwork_call_via_rN(), will push the return address onto the - stack and pop it in _arm_return(). It should only be used if all - arguments are passed in registers. - - The second, _interwork_r7_call_via_rN(), instead stores the return - address at [r7, #-4]. It is the caller's responsibility to ensure - that this address is valid and contains no useful data. - - The third, _interwork_r11_call_via_rN(), works in the same way but - uses r11 instead of r7. It is useful if the caller does not really - need a frame pointer. */ - - .text - .align 0 - - .code 32 - .globl _arm_return -LSYM(Lstart_arm_return): - cfi_start LSYM(Lstart_arm_return) LSYM(Lend_arm_return) - cfi_push 0, 0xe, -0x8, 0x8 - nop @ This nop is for the benefit of debuggers, so that - @ backtraces will use the correct unwind information. -_arm_return: - RETLDM unwind=LSYM(Lstart_arm_return) - cfi_end LSYM(Lend_arm_return) - - .globl _arm_return_r7 -_arm_return_r7: - ldr lr, [r7, #-4] - bx lr - - .globl _arm_return_r11 -_arm_return_r11: - ldr lr, [r11, #-4] - bx lr - -.macro interwork_with_frame frame, register, name, return - .code 16 - - THUMB_FUNC_START \name - - bx pc - nop - - .code 32 - tst \register, #1 - streq lr, [\frame, #-4] - adreq lr, _arm_return_\frame - bx \register - - SIZE (\name) -.endm - -.macro interwork register - .code 16 - - THUMB_FUNC_START _interwork_call_via_\register - - bx pc - nop - - .code 32 - .globl LSYM(Lchange_\register) -LSYM(Lchange_\register): - tst \register, #1 - streq lr, [sp, #-8]! - adreq lr, _arm_return - bx \register - - SIZE (_interwork_call_via_\register) - - interwork_with_frame r7,\register,_interwork_r7_call_via_\register - interwork_with_frame r11,\register,_interwork_r11_call_via_\register -.endm - - interwork r0 - interwork r1 - interwork r2 - interwork r3 - interwork r4 - interwork r5 - interwork r6 - interwork r7 - interwork r8 - interwork r9 - interwork sl - interwork fp - interwork ip - interwork sp - - /* The LR case has to be handled a little differently... */ - .code 16 - - THUMB_FUNC_START _interwork_call_via_lr - - bx pc - nop - - .code 32 - .globl .Lchange_lr -.Lchange_lr: - tst lr, #1 - stmeqdb r13!, {lr, pc} - mov ip, lr - adreq lr, _arm_return - bx ip - - SIZE (_interwork_call_via_lr) - -#endif /* L_interwork_call_via_rX */ -#endif /* !__thumb2__ */ - -/* Functions to support compact pic switch tables in thumb1 state. - All these routines take an index into the table in r0. The - table is at LR & ~1 (but this must be rounded up in the case - of 32-bit entires). They are only permitted to clobber r12 - and r14 and r0 must be preserved on exit. */ -#ifdef L_thumb1_case_sqi - - .text - .align 0 - .force_thumb - .syntax unified - THUMB_FUNC_START __gnu_thumb1_case_sqi - push {r1} - mov r1, lr - lsrs r1, r1, #1 - lsls r1, r1, #1 - ldrsb r1, [r1, r0] - lsls r1, r1, #1 - add lr, lr, r1 - pop {r1} - bx lr - SIZE (__gnu_thumb1_case_sqi) -#endif - -#ifdef L_thumb1_case_uqi - - .text - .align 0 - .force_thumb - .syntax unified - THUMB_FUNC_START __gnu_thumb1_case_uqi - push {r1} - mov r1, lr - lsrs r1, r1, #1 - lsls r1, r1, #1 - ldrb r1, [r1, r0] - lsls r1, r1, #1 - add lr, lr, r1 - pop {r1} - bx lr - SIZE (__gnu_thumb1_case_uqi) -#endif - -#ifdef L_thumb1_case_shi - - .text - .align 0 - .force_thumb - .syntax unified - THUMB_FUNC_START __gnu_thumb1_case_shi - push {r0, r1} - mov r1, lr - lsrs r1, r1, #1 - lsls r0, r0, #1 - lsls r1, r1, #1 - ldrsh r1, [r1, r0] - lsls r1, r1, #1 - add lr, lr, r1 - pop {r0, r1} - bx lr - SIZE (__gnu_thumb1_case_shi) -#endif - -#ifdef L_thumb1_case_uhi - - .text - .align 0 - .force_thumb - .syntax unified - THUMB_FUNC_START __gnu_thumb1_case_uhi - push {r0, r1} - mov r1, lr - lsrs r1, r1, #1 - lsls r0, r0, #1 - lsls r1, r1, #1 - ldrh r1, [r1, r0] - lsls r1, r1, #1 - add lr, lr, r1 - pop {r0, r1} - bx lr - SIZE (__gnu_thumb1_case_uhi) -#endif - -#ifdef L_thumb1_case_si - - .text - .align 0 - .force_thumb - .syntax unified - THUMB_FUNC_START __gnu_thumb1_case_si - push {r0, r1} - mov r1, lr - adds.n r1, r1, #2 /* Align to word. */ - lsrs r1, r1, #2 - lsls r0, r0, #2 - lsls r1, r1, #2 - ldr r0, [r1, r0] - adds r0, r0, r1 - mov lr, r0 - pop {r0, r1} - mov pc, lr /* We know we were called from thumb code. */ - SIZE (__gnu_thumb1_case_si) -#endif - -#endif /* Arch supports thumb. */ - -#ifndef __symbian__ -#ifndef __ARM_ARCH_6M__ -#include "ieee754-df.S" -#include "ieee754-sf.S" -#include "bpabi.S" -#else /* __ARM_ARCH_6M__ */ -#include "bpabi-v6m.S" -#endif /* __ARM_ARCH_6M__ */ -#endif /* !__symbian__ */ diff --git a/gcc/config/arm/linux-eabi.h b/gcc/config/arm/linux-eabi.h index a3830955948..80bd8259375 100644 --- a/gcc/config/arm/linux-eabi.h +++ b/gcc/config/arm/linux-eabi.h @@ -97,7 +97,7 @@ #undef LIBGCC_SPEC /* Clear the instruction cache from `beg' to `end'. This is - implemented in lib1funcs.asm, so ensure an error if this definition + implemented in lib1funcs.S, so ensure an error if this definition is used. */ #undef CLEAR_INSN_CACHE #define CLEAR_INSN_CACHE(BEG, END) not_used diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm index b970ec26a35..a9a174d473d 100644 --- a/gcc/config/arm/t-arm +++ b/gcc/config/arm/t-arm @@ -40,9 +40,6 @@ MD_INCLUDES= $(srcdir)/config/arm/arm-tune.md \ $(srcdir)/config/arm/thumb2.md \ $(srcdir)/config/arm/arm-fixed.md -LIB1ASMSRC = arm/lib1funcs.asm -LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi _thumb1_case_shi \ - _thumb1_case_uhi _thumb1_case_si s-config s-conditions s-flags s-codes s-constants s-emit s-recog s-preds \ s-opinit s-extract s-peep s-attr s-attrtab s-output: $(MD_INCLUDES) diff --git a/gcc/config/arm/t-arm-elf b/gcc/config/arm/t-arm-elf index bfcf6ffd939..a605d26244f 100644 --- a/gcc/config/arm/t-arm-elf +++ b/gcc/config/arm/t-arm-elf @@ -17,20 +17,6 @@ # along with GCC; see the file COPYING3. If not see # . -# For most CPUs we have an assembly soft-float implementations. -# However this is not true for ARMv6M. Here we want to use the soft-fp C -# implementation. The soft-fp code is only build for ARMv6M. This pulls -# in the asm implementation for other CPUs. -LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func \ - _call_via_rX _interwork_call_via_rX \ - _lshrdi3 _ashrdi3 _ashldi3 \ - _arm_negdf2 _arm_addsubdf3 _arm_muldivdf3 _arm_cmpdf2 _arm_unorddf2 \ - _arm_fixdfsi _arm_fixunsdfsi \ - _arm_truncdfsf2 _arm_negsf2 _arm_addsubsf3 _arm_muldivsf3 \ - _arm_cmpsf2 _arm_unordsf2 _arm_fixsfsi _arm_fixunssfsi \ - _arm_floatdidf _arm_floatdisf _arm_floatundidf _arm_floatundisf \ - _clzsi2 _clzdi2 - MULTILIB_OPTIONS = marm/mthumb MULTILIB_DIRNAMES = arm thumb MULTILIB_EXCEPTIONS = diff --git a/gcc/config/arm/t-bpabi b/gcc/config/arm/t-bpabi index 047525682fc..c9d5ed4d674 100644 --- a/gcc/config/arm/t-bpabi +++ b/gcc/config/arm/t-bpabi @@ -16,9 +16,6 @@ # along with GCC; see the file COPYING3. If not see # . -# Add the bpabi.S functions. -LIB1ASMFUNCS += _aeabi_lcmp _aeabi_ulcmp _aeabi_ldivmod _aeabi_uldivmod - # Add the BPABI C functions. LIB2FUNCS_EXTRA = $(srcdir)/config/arm/bpabi.c \ $(srcdir)/config/arm/unaligned-funcs.c diff --git a/gcc/config/arm/t-linux b/gcc/config/arm/t-linux index a0c5110f0c0..a204834014e 100644 --- a/gcc/config/arm/t-linux +++ b/gcc/config/arm/t-linux @@ -21,10 +21,6 @@ # difference. TARGET_LIBGCC2_CFLAGS = -fomit-frame-pointer -fPIC -LIB1ASMSRC = arm/lib1funcs.asm -LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx _clzsi2 _clzdi2 \ - _arm_addsubdf3 _arm_addsubsf3 - # MULTILIB_OPTIONS = mfloat-abi=hard/mfloat-abi=soft # MULTILIB_DIRNAMES = hard-float soft-float diff --git a/gcc/config/arm/t-linux-eabi b/gcc/config/arm/t-linux-eabi index fed979e980b..3030229fafa 100644 --- a/gcc/config/arm/t-linux-eabi +++ b/gcc/config/arm/t-linux-eabi @@ -28,8 +28,5 @@ MULTILIB_DIRNAMES = #MULTILIB_DIRNAMES += fa606te fa626te fmp626 fa726te #MULTILIB_EXCEPTIONS += *mthumb/*mcpu=fa606te *mthumb/*mcpu=fa626te *mthumb/*mcpu=fmp626 *mthumb/*mcpu=fa726te* -# Use a version of div0 which raises SIGFPE, and a special __clear_cache. -LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx _clear_cache - LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic.c LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic-64bit.c diff --git a/gcc/config/arm/t-strongarm-elf b/gcc/config/arm/t-strongarm-elf index 95680031e54..4d51e660c8b 100644 --- a/gcc/config/arm/t-strongarm-elf +++ b/gcc/config/arm/t-strongarm-elf @@ -17,8 +17,6 @@ # along with GCC; see the file COPYING3. If not see # . -LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _clzsi2 _clzdi2 - MULTILIB_OPTIONS = mlittle-endian/mbig-endian mfloat-abi=hard/mfloat-abi=soft MULTILIB_DIRNAMES = le be fpu soft MULTILIB_EXCEPTIONS = diff --git a/gcc/config/arm/t-symbian b/gcc/config/arm/t-symbian index cf716147849..736a01d10f4 100644 --- a/gcc/config/arm/t-symbian +++ b/gcc/config/arm/t-symbian @@ -16,20 +16,6 @@ # along with GCC; see the file COPYING3. If not see # . -LIB1ASMFUNCS += _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 - -# These functions have __aeabi equivalents and will never be called by GCC. -# By putting them in LIB1ASMFUNCS, we avoid the standard libgcc2.c code being -# used -- and we make sure that definitions are not available in lib1funcs.asm, -# either, so they end up undefined. -LIB1ASMFUNCS += \ - _ashldi3 _ashrdi3 _divdi3 _floatdidf _udivmoddi4 _umoddi3 \ - _udivdi3 _lshrdi3 _moddi3 _muldi3 _negdi2 _cmpdi2 \ - _fixdfdi _fixsfdi _fixunsdfdi _fixunssfdi _floatdisf \ - _negdf2 _addsubdf3 _muldivdf3 _cmpdf2 _unorddf2 _fixdfsi _fixunsdfsi \ - _truncdfsf2 _negsf2 _addsubsf3 _muldivsf3 _cmpsf2 _unordsf2 \ - _fixsfsi _fixunssfsi - EXTRA_HEADERS += $(srcdir)/ginclude/unwind-arm-common.h # Include half-float helpers. LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/arm/fp16.c diff --git a/gcc/config/arm/t-vxworks b/gcc/config/arm/t-vxworks index 8ac0d9bcec5..0900ffe15ed 100644 --- a/gcc/config/arm/t-vxworks +++ b/gcc/config/arm/t-vxworks @@ -16,8 +16,6 @@ # along with GCC; see the file COPYING3. If not see # . -LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 - MULTILIB_OPTIONS = \ mrtp fPIC \ t4/t4be/t4t/t4tbe/t5/t5be/t5t/t5tbe/tstrongarm/txscale/txscalebe diff --git a/gcc/config/arm/t-wince-pe b/gcc/config/arm/t-wince-pe index 9ce1f313140..8a8c65fd396 100644 --- a/gcc/config/arm/t-wince-pe +++ b/gcc/config/arm/t-wince-pe @@ -17,8 +17,6 @@ # along with GCC; see the file COPYING3. If not see # . -LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 - pe.o: $(srcdir)/config/arm/pe.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(RTL_H) output.h flags.h $(TREE_H) expr.h $(TM_P_H) $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ diff --git a/gcc/config/avr/libgcc.S b/gcc/config/avr/libgcc.S deleted file mode 100644 index 8c369c96a77..00000000000 --- a/gcc/config/avr/libgcc.S +++ /dev/null @@ -1,1533 +0,0 @@ -/* -*- Mode: Asm -*- */ -/* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009 - Free Software Foundation, Inc. - Contributed by Denis Chertykov - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ - -#define __zero_reg__ r1 -#define __tmp_reg__ r0 -#define __SREG__ 0x3f -#define __SP_H__ 0x3e -#define __SP_L__ 0x3d -#define __RAMPZ__ 0x3B -#define __EIND__ 0x3C - -/* Most of the functions here are called directly from avr.md - patterns, instead of using the standard libcall mechanisms. - This can make better code because GCC knows exactly which - of the call-used registers (not all of them) are clobbered. */ - -/* FIXME: At present, there is no SORT directive in the linker - script so that we must not assume that different modules - in the same input section like .libgcc.text.mul will be - located close together. Therefore, we cannot use - RCALL/RJMP to call a function like __udivmodhi4 from - __divmodhi4 and have to use lengthy XCALL/XJMP even - though they are in the same input section and all same - input sections together are small enough to reach every - location with a RCALL/RJMP instruction. */ - - .macro mov_l r_dest, r_src -#if defined (__AVR_HAVE_MOVW__) - movw \r_dest, \r_src -#else - mov \r_dest, \r_src -#endif - .endm - - .macro mov_h r_dest, r_src -#if defined (__AVR_HAVE_MOVW__) - ; empty -#else - mov \r_dest, \r_src -#endif - .endm - -#if defined (__AVR_HAVE_JMP_CALL__) -#define XCALL call -#define XJMP jmp -#else -#define XCALL rcall -#define XJMP rjmp -#endif - -.macro DEFUN name -.global \name -.func \name -\name: -.endm - -.macro ENDF name -.size \name, .-\name -.endfunc -.endm - - -.section .text.libgcc.mul, "ax", @progbits - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */ -#if !defined (__AVR_HAVE_MUL__) -/******************************************************* - Multiplication 8 x 8 without MUL -*******************************************************/ -#if defined (L_mulqi3) - -#define r_arg2 r22 /* multiplicand */ -#define r_arg1 r24 /* multiplier */ -#define r_res __tmp_reg__ /* result */ - -DEFUN __mulqi3 - clr r_res ; clear result -__mulqi3_loop: - sbrc r_arg1,0 - add r_res,r_arg2 - add r_arg2,r_arg2 ; shift multiplicand - breq __mulqi3_exit ; while multiplicand != 0 - lsr r_arg1 ; - brne __mulqi3_loop ; exit if multiplier = 0 -__mulqi3_exit: - mov r_arg1,r_res ; result to return register - ret -ENDF __mulqi3 - -#undef r_arg2 -#undef r_arg1 -#undef r_res - -#endif /* defined (L_mulqi3) */ - -#if defined (L_mulqihi3) -DEFUN __mulqihi3 - clr r25 - sbrc r24, 7 - dec r25 - clr r23 - sbrc r22, 7 - dec r22 - XJMP __mulhi3 -ENDF __mulqihi3: -#endif /* defined (L_mulqihi3) */ - -#if defined (L_umulqihi3) -DEFUN __umulqihi3 - clr r25 - clr r23 - XJMP __mulhi3 -ENDF __umulqihi3 -#endif /* defined (L_umulqihi3) */ - -/******************************************************* - Multiplication 16 x 16 without MUL -*******************************************************/ -#if defined (L_mulhi3) -#define r_arg1L r24 /* multiplier Low */ -#define r_arg1H r25 /* multiplier High */ -#define r_arg2L r22 /* multiplicand Low */ -#define r_arg2H r23 /* multiplicand High */ -#define r_resL __tmp_reg__ /* result Low */ -#define r_resH r21 /* result High */ - -DEFUN __mulhi3 - clr r_resH ; clear result - clr r_resL ; clear result -__mulhi3_loop: - sbrs r_arg1L,0 - rjmp __mulhi3_skip1 - add r_resL,r_arg2L ; result + multiplicand - adc r_resH,r_arg2H -__mulhi3_skip1: - add r_arg2L,r_arg2L ; shift multiplicand - adc r_arg2H,r_arg2H - - cp r_arg2L,__zero_reg__ - cpc r_arg2H,__zero_reg__ - breq __mulhi3_exit ; while multiplicand != 0 - - lsr r_arg1H ; gets LSB of multiplier - ror r_arg1L - sbiw r_arg1L,0 - brne __mulhi3_loop ; exit if multiplier = 0 -__mulhi3_exit: - mov r_arg1H,r_resH ; result to return register - mov r_arg1L,r_resL - ret -ENDF __mulhi3 - -#undef r_arg1L -#undef r_arg1H -#undef r_arg2L -#undef r_arg2H -#undef r_resL -#undef r_resH - -#endif /* defined (L_mulhi3) */ - -/******************************************************* - Widening Multiplication 32 = 16 x 16 without MUL -*******************************************************/ - -#if defined (L_mulhisi3) -DEFUN __mulhisi3 -;;; FIXME: This is dead code (noone calls it) - mov_l r18, r24 - mov_h r19, r25 - clr r24 - sbrc r23, 7 - dec r24 - mov r25, r24 - clr r20 - sbrc r19, 7 - dec r20 - mov r21, r20 - XJMP __mulsi3 -ENDF __mulhisi3 -#endif /* defined (L_mulhisi3) */ - -#if defined (L_umulhisi3) -DEFUN __umulhisi3 -;;; FIXME: This is dead code (noone calls it) - mov_l r18, r24 - mov_h r19, r25 - clr r24 - clr r25 - mov_l r20, r24 - mov_h r21, r25 - XJMP __mulsi3 -ENDF __umulhisi3 -#endif /* defined (L_umulhisi3) */ - -#if defined (L_mulsi3) -/******************************************************* - Multiplication 32 x 32 without MUL -*******************************************************/ -#define r_arg1L r22 /* multiplier Low */ -#define r_arg1H r23 -#define r_arg1HL r24 -#define r_arg1HH r25 /* multiplier High */ - -#define r_arg2L r18 /* multiplicand Low */ -#define r_arg2H r19 -#define r_arg2HL r20 -#define r_arg2HH r21 /* multiplicand High */ - -#define r_resL r26 /* result Low */ -#define r_resH r27 -#define r_resHL r30 -#define r_resHH r31 /* result High */ - -DEFUN __mulsi3 - clr r_resHH ; clear result - clr r_resHL ; clear result - clr r_resH ; clear result - clr r_resL ; clear result -__mulsi3_loop: - sbrs r_arg1L,0 - rjmp __mulsi3_skip1 - add r_resL,r_arg2L ; result + multiplicand - adc r_resH,r_arg2H - adc r_resHL,r_arg2HL - adc r_resHH,r_arg2HH -__mulsi3_skip1: - add r_arg2L,r_arg2L ; shift multiplicand - adc r_arg2H,r_arg2H - adc r_arg2HL,r_arg2HL - adc r_arg2HH,r_arg2HH - - lsr r_arg1HH ; gets LSB of multiplier - ror r_arg1HL - ror r_arg1H - ror r_arg1L - brne __mulsi3_loop - sbiw r_arg1HL,0 - cpc r_arg1H,r_arg1L - brne __mulsi3_loop ; exit if multiplier = 0 -__mulsi3_exit: - mov_h r_arg1HH,r_resHH ; result to return register - mov_l r_arg1HL,r_resHL - mov_h r_arg1H,r_resH - mov_l r_arg1L,r_resL - ret -ENDF __mulsi3 - -#undef r_arg1L -#undef r_arg1H -#undef r_arg1HL -#undef r_arg1HH - -#undef r_arg2L -#undef r_arg2H -#undef r_arg2HL -#undef r_arg2HH - -#undef r_resL -#undef r_resH -#undef r_resHL -#undef r_resHH - -#endif /* defined (L_mulsi3) */ - -#endif /* !defined (__AVR_HAVE_MUL__) */ -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -#if defined (__AVR_HAVE_MUL__) -#define A0 26 -#define B0 18 -#define C0 22 - -#define A1 A0+1 - -#define B1 B0+1 -#define B2 B0+2 -#define B3 B0+3 - -#define C1 C0+1 -#define C2 C0+2 -#define C3 C0+3 - -/******************************************************* - Widening Multiplication 32 = 16 x 16 -*******************************************************/ - -#if defined (L_mulhisi3) -;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18 -;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0 -;;; Clobbers: __tmp_reg__ -DEFUN __mulhisi3 - XCALL __umulhisi3 - ;; Sign-extend B - tst B1 - brpl 1f - sub C2, A0 - sbc C3, A1 -1: ;; Sign-extend A - XJMP __usmulhisi3_tail -ENDF __mulhisi3 -#endif /* L_mulhisi3 */ - -#if defined (L_usmulhisi3) -;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18 -;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0 -;;; Clobbers: __tmp_reg__ -DEFUN __usmulhisi3 - XCALL __umulhisi3 - ;; FALLTHRU -ENDF __usmulhisi3 - -DEFUN __usmulhisi3_tail - ;; Sign-extend A - sbrs A1, 7 - ret - sub C2, B0 - sbc C3, B1 - ret -ENDF __usmulhisi3_tail -#endif /* L_usmulhisi3 */ - -#if defined (L_umulhisi3) -;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18 -;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0 -;;; Clobbers: __tmp_reg__ -DEFUN __umulhisi3 - mul A0, B0 - movw C0, r0 - mul A1, B1 - movw C2, r0 - mul A0, B1 - rcall 1f - mul A1, B0 -1: add C1, r0 - adc C2, r1 - clr __zero_reg__ - adc C3, __zero_reg__ - ret -ENDF __umulhisi3 -#endif /* L_umulhisi3 */ - -/******************************************************* - Widening Multiplication 32 = 16 x 32 -*******************************************************/ - -#if defined (L_mulshisi3) -;;; R25:R22 = (signed long) R27:R26 * R21:R18 -;;; (C3:C0) = (signed long) A1:A0 * B3:B0 -;;; Clobbers: __tmp_reg__ -DEFUN __mulshisi3 -#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ - ;; Some cores have problem skipping 2-word instruction - tst A1 - brmi __mulohisi3 -#else - sbrs A1, 7 -#endif /* __AVR_HAVE_JMP_CALL__ */ - XJMP __muluhisi3 - ;; FALLTHRU -ENDF __mulshisi3 - -;;; R25:R22 = (one-extended long) R27:R26 * R21:R18 -;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0 -;;; Clobbers: __tmp_reg__ -DEFUN __mulohisi3 - XCALL __muluhisi3 - ;; One-extend R27:R26 (A1:A0) - sub C2, B0 - sbc C3, B1 - ret -ENDF __mulohisi3 -#endif /* L_mulshisi3 */ - -#if defined (L_muluhisi3) -;;; R25:R22 = (unsigned long) R27:R26 * R21:R18 -;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0 -;;; Clobbers: __tmp_reg__ -DEFUN __muluhisi3 - XCALL __umulhisi3 - mul A0, B3 - add C3, r0 - mul A1, B2 - add C3, r0 - mul A0, B2 - add C2, r0 - adc C3, r1 - clr __zero_reg__ - ret -ENDF __muluhisi3 -#endif /* L_muluhisi3 */ - -/******************************************************* - Multiplication 32 x 32 -*******************************************************/ - -#if defined (L_mulsi3) -;;; R25:R22 = R25:R22 * R21:R18 -;;; (C3:C0) = C3:C0 * B3:B0 -;;; Clobbers: R26, R27, __tmp_reg__ -DEFUN __mulsi3 - movw A0, C0 - push C2 - push C3 - XCALL __muluhisi3 - pop A1 - pop A0 - ;; A1:A0 now contains the high word of A - mul A0, B0 - add C2, r0 - adc C3, r1 - mul A0, B1 - add C3, r0 - mul A1, B0 - add C3, r0 - clr __zero_reg__ - ret -ENDF __mulsi3 -#endif /* L_mulsi3 */ - -#undef A0 -#undef A1 - -#undef B0 -#undef B1 -#undef B2 -#undef B3 - -#undef C0 -#undef C1 -#undef C2 -#undef C3 - -#endif /* __AVR_HAVE_MUL__ */ -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - -.section .text.libgcc.div, "ax", @progbits - -/******************************************************* - Division 8 / 8 => (result + remainder) -*******************************************************/ -#define r_rem r25 /* remainder */ -#define r_arg1 r24 /* dividend, quotient */ -#define r_arg2 r22 /* divisor */ -#define r_cnt r23 /* loop count */ - -#if defined (L_udivmodqi4) -DEFUN __udivmodqi4 - sub r_rem,r_rem ; clear remainder and carry - ldi r_cnt,9 ; init loop counter - rjmp __udivmodqi4_ep ; jump to entry point -__udivmodqi4_loop: - rol r_rem ; shift dividend into remainder - cp r_rem,r_arg2 ; compare remainder & divisor - brcs __udivmodqi4_ep ; remainder <= divisor - sub r_rem,r_arg2 ; restore remainder -__udivmodqi4_ep: - rol r_arg1 ; shift dividend (with CARRY) - dec r_cnt ; decrement loop counter - brne __udivmodqi4_loop - com r_arg1 ; complement result - ; because C flag was complemented in loop - ret -ENDF __udivmodqi4 -#endif /* defined (L_udivmodqi4) */ - -#if defined (L_divmodqi4) -DEFUN __divmodqi4 - bst r_arg1,7 ; store sign of dividend - mov __tmp_reg__,r_arg1 - eor __tmp_reg__,r_arg2; r0.7 is sign of result - sbrc r_arg1,7 - neg r_arg1 ; dividend negative : negate - sbrc r_arg2,7 - neg r_arg2 ; divisor negative : negate - XCALL __udivmodqi4 ; do the unsigned div/mod - brtc __divmodqi4_1 - neg r_rem ; correct remainder sign -__divmodqi4_1: - sbrc __tmp_reg__,7 - neg r_arg1 ; correct result sign -__divmodqi4_exit: - ret -ENDF __divmodqi4 -#endif /* defined (L_divmodqi4) */ - -#undef r_rem -#undef r_arg1 -#undef r_arg2 -#undef r_cnt - - -/******************************************************* - Division 16 / 16 => (result + remainder) -*******************************************************/ -#define r_remL r26 /* remainder Low */ -#define r_remH r27 /* remainder High */ - -/* return: remainder */ -#define r_arg1L r24 /* dividend Low */ -#define r_arg1H r25 /* dividend High */ - -/* return: quotient */ -#define r_arg2L r22 /* divisor Low */ -#define r_arg2H r23 /* divisor High */ - -#define r_cnt r21 /* loop count */ - -#if defined (L_udivmodhi4) -DEFUN __udivmodhi4 - sub r_remL,r_remL - sub r_remH,r_remH ; clear remainder and carry - ldi r_cnt,17 ; init loop counter - rjmp __udivmodhi4_ep ; jump to entry point -__udivmodhi4_loop: - rol r_remL ; shift dividend into remainder - rol r_remH - cp r_remL,r_arg2L ; compare remainder & divisor - cpc r_remH,r_arg2H - brcs __udivmodhi4_ep ; remainder < divisor - sub r_remL,r_arg2L ; restore remainder - sbc r_remH,r_arg2H -__udivmodhi4_ep: - rol r_arg1L ; shift dividend (with CARRY) - rol r_arg1H - dec r_cnt ; decrement loop counter - brne __udivmodhi4_loop - com r_arg1L - com r_arg1H -; div/mod results to return registers, as for the div() function - mov_l r_arg2L, r_arg1L ; quotient - mov_h r_arg2H, r_arg1H - mov_l r_arg1L, r_remL ; remainder - mov_h r_arg1H, r_remH - ret -ENDF __udivmodhi4 -#endif /* defined (L_udivmodhi4) */ - -#if defined (L_divmodhi4) -DEFUN __divmodhi4 - .global _div -_div: - bst r_arg1H,7 ; store sign of dividend - mov __tmp_reg__,r_arg1H - eor __tmp_reg__,r_arg2H ; r0.7 is sign of result - rcall __divmodhi4_neg1 ; dividend negative : negate - sbrc r_arg2H,7 - rcall __divmodhi4_neg2 ; divisor negative : negate - XCALL __udivmodhi4 ; do the unsigned div/mod - rcall __divmodhi4_neg1 ; correct remainder sign - tst __tmp_reg__ - brpl __divmodhi4_exit -__divmodhi4_neg2: - com r_arg2H - neg r_arg2L ; correct divisor/result sign - sbci r_arg2H,0xff -__divmodhi4_exit: - ret -__divmodhi4_neg1: - brtc __divmodhi4_exit - com r_arg1H - neg r_arg1L ; correct dividend/remainder sign - sbci r_arg1H,0xff - ret -ENDF __divmodhi4 -#endif /* defined (L_divmodhi4) */ - -#undef r_remH -#undef r_remL - -#undef r_arg1H -#undef r_arg1L - -#undef r_arg2H -#undef r_arg2L - -#undef r_cnt - -/******************************************************* - Division 32 / 32 => (result + remainder) -*******************************************************/ -#define r_remHH r31 /* remainder High */ -#define r_remHL r30 -#define r_remH r27 -#define r_remL r26 /* remainder Low */ - -/* return: remainder */ -#define r_arg1HH r25 /* dividend High */ -#define r_arg1HL r24 -#define r_arg1H r23 -#define r_arg1L r22 /* dividend Low */ - -/* return: quotient */ -#define r_arg2HH r21 /* divisor High */ -#define r_arg2HL r20 -#define r_arg2H r19 -#define r_arg2L r18 /* divisor Low */ - -#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */ - -#if defined (L_udivmodsi4) -DEFUN __udivmodsi4 - ldi r_remL, 33 ; init loop counter - mov r_cnt, r_remL - sub r_remL,r_remL - sub r_remH,r_remH ; clear remainder and carry - mov_l r_remHL, r_remL - mov_h r_remHH, r_remH - rjmp __udivmodsi4_ep ; jump to entry point -__udivmodsi4_loop: - rol r_remL ; shift dividend into remainder - rol r_remH - rol r_remHL - rol r_remHH - cp r_remL,r_arg2L ; compare remainder & divisor - cpc r_remH,r_arg2H - cpc r_remHL,r_arg2HL - cpc r_remHH,r_arg2HH - brcs __udivmodsi4_ep ; remainder <= divisor - sub r_remL,r_arg2L ; restore remainder - sbc r_remH,r_arg2H - sbc r_remHL,r_arg2HL - sbc r_remHH,r_arg2HH -__udivmodsi4_ep: - rol r_arg1L ; shift dividend (with CARRY) - rol r_arg1H - rol r_arg1HL - rol r_arg1HH - dec r_cnt ; decrement loop counter - brne __udivmodsi4_loop - ; __zero_reg__ now restored (r_cnt == 0) - com r_arg1L - com r_arg1H - com r_arg1HL - com r_arg1HH -; div/mod results to return registers, as for the ldiv() function - mov_l r_arg2L, r_arg1L ; quotient - mov_h r_arg2H, r_arg1H - mov_l r_arg2HL, r_arg1HL - mov_h r_arg2HH, r_arg1HH - mov_l r_arg1L, r_remL ; remainder - mov_h r_arg1H, r_remH - mov_l r_arg1HL, r_remHL - mov_h r_arg1HH, r_remHH - ret -ENDF __udivmodsi4 -#endif /* defined (L_udivmodsi4) */ - -#if defined (L_divmodsi4) -DEFUN __divmodsi4 - bst r_arg1HH,7 ; store sign of dividend - mov __tmp_reg__,r_arg1HH - eor __tmp_reg__,r_arg2HH ; r0.7 is sign of result - rcall __divmodsi4_neg1 ; dividend negative : negate - sbrc r_arg2HH,7 - rcall __divmodsi4_neg2 ; divisor negative : negate - XCALL __udivmodsi4 ; do the unsigned div/mod - rcall __divmodsi4_neg1 ; correct remainder sign - rol __tmp_reg__ - brcc __divmodsi4_exit -__divmodsi4_neg2: - com r_arg2HH - com r_arg2HL - com r_arg2H - neg r_arg2L ; correct divisor/quotient sign - sbci r_arg2H,0xff - sbci r_arg2HL,0xff - sbci r_arg2HH,0xff -__divmodsi4_exit: - ret -__divmodsi4_neg1: - brtc __divmodsi4_exit - com r_arg1HH - com r_arg1HL - com r_arg1H - neg r_arg1L ; correct dividend/remainder sign - sbci r_arg1H, 0xff - sbci r_arg1HL,0xff - sbci r_arg1HH,0xff - ret -ENDF __divmodsi4 -#endif /* defined (L_divmodsi4) */ - - -.section .text.libgcc.prologue, "ax", @progbits - -/********************************** - * This is a prologue subroutine - **********************************/ -#if defined (L_prologue) - -DEFUN __prologue_saves__ - push r2 - push r3 - push r4 - push r5 - push r6 - push r7 - push r8 - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - push r16 - push r17 - push r28 - push r29 - in r28,__SP_L__ - in r29,__SP_H__ - sub r28,r26 - sbc r29,r27 - in __tmp_reg__,__SREG__ - cli - out __SP_H__,r29 - out __SREG__,__tmp_reg__ - out __SP_L__,r28 -#if defined (__AVR_HAVE_EIJMP_EICALL__) - eijmp -#else - ijmp -#endif - -ENDF __prologue_saves__ -#endif /* defined (L_prologue) */ - -/* - * This is an epilogue subroutine - */ -#if defined (L_epilogue) - -DEFUN __epilogue_restores__ - ldd r2,Y+18 - ldd r3,Y+17 - ldd r4,Y+16 - ldd r5,Y+15 - ldd r6,Y+14 - ldd r7,Y+13 - ldd r8,Y+12 - ldd r9,Y+11 - ldd r10,Y+10 - ldd r11,Y+9 - ldd r12,Y+8 - ldd r13,Y+7 - ldd r14,Y+6 - ldd r15,Y+5 - ldd r16,Y+4 - ldd r17,Y+3 - ldd r26,Y+2 - ldd r27,Y+1 - add r28,r30 - adc r29,__zero_reg__ - in __tmp_reg__,__SREG__ - cli - out __SP_H__,r29 - out __SREG__,__tmp_reg__ - out __SP_L__,r28 - mov_l r28, r26 - mov_h r29, r27 - ret -ENDF __epilogue_restores__ -#endif /* defined (L_epilogue) */ - -#ifdef L_exit - .section .fini9,"ax",@progbits -DEFUN _exit - .weak exit -exit: -ENDF _exit - - /* Code from .fini8 ... .fini1 sections inserted by ld script. */ - - .section .fini0,"ax",@progbits - cli -__stop_program: - rjmp __stop_program -#endif /* defined (L_exit) */ - -#ifdef L_cleanup - .weak _cleanup - .func _cleanup -_cleanup: - ret -.endfunc -#endif /* defined (L_cleanup) */ - - -.section .text.libgcc, "ax", @progbits - -#ifdef L_tablejump -DEFUN __tablejump2__ - lsl r30 - rol r31 - ;; FALLTHRU -ENDF __tablejump2__ - -DEFUN __tablejump__ -#if defined (__AVR_HAVE_LPMX__) - lpm __tmp_reg__, Z+ - lpm r31, Z - mov r30, __tmp_reg__ -#if defined (__AVR_HAVE_EIJMP_EICALL__) - eijmp -#else - ijmp -#endif - -#else /* !HAVE_LPMX */ - lpm - adiw r30, 1 - push r0 - lpm - push r0 -#if defined (__AVR_HAVE_EIJMP_EICALL__) - in __tmp_reg__, __EIND__ - push __tmp_reg__ -#endif - ret -#endif /* !HAVE_LPMX */ -ENDF __tablejump__ -#endif /* defined (L_tablejump) */ - -#ifdef L_copy_data - .section .init4,"ax",@progbits -DEFUN __do_copy_data -#if defined(__AVR_HAVE_ELPMX__) - ldi r17, hi8(__data_end) - ldi r26, lo8(__data_start) - ldi r27, hi8(__data_start) - ldi r30, lo8(__data_load_start) - ldi r31, hi8(__data_load_start) - ldi r16, hh8(__data_load_start) - out __RAMPZ__, r16 - rjmp .L__do_copy_data_start -.L__do_copy_data_loop: - elpm r0, Z+ - st X+, r0 -.L__do_copy_data_start: - cpi r26, lo8(__data_end) - cpc r27, r17 - brne .L__do_copy_data_loop -#elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__) - ldi r17, hi8(__data_end) - ldi r26, lo8(__data_start) - ldi r27, hi8(__data_start) - ldi r30, lo8(__data_load_start) - ldi r31, hi8(__data_load_start) - ldi r16, hh8(__data_load_start - 0x10000) -.L__do_copy_data_carry: - inc r16 - out __RAMPZ__, r16 - rjmp .L__do_copy_data_start -.L__do_copy_data_loop: - elpm - st X+, r0 - adiw r30, 1 - brcs .L__do_copy_data_carry -.L__do_copy_data_start: - cpi r26, lo8(__data_end) - cpc r27, r17 - brne .L__do_copy_data_loop -#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) - ldi r17, hi8(__data_end) - ldi r26, lo8(__data_start) - ldi r27, hi8(__data_start) - ldi r30, lo8(__data_load_start) - ldi r31, hi8(__data_load_start) - rjmp .L__do_copy_data_start -.L__do_copy_data_loop: -#if defined (__AVR_HAVE_LPMX__) - lpm r0, Z+ -#else - lpm - adiw r30, 1 -#endif - st X+, r0 -.L__do_copy_data_start: - cpi r26, lo8(__data_end) - cpc r27, r17 - brne .L__do_copy_data_loop -#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */ -ENDF __do_copy_data -#endif /* L_copy_data */ - -/* __do_clear_bss is only necessary if there is anything in .bss section. */ - -#ifdef L_clear_bss - .section .init4,"ax",@progbits -DEFUN __do_clear_bss - ldi r17, hi8(__bss_end) - ldi r26, lo8(__bss_start) - ldi r27, hi8(__bss_start) - rjmp .do_clear_bss_start -.do_clear_bss_loop: - st X+, __zero_reg__ -.do_clear_bss_start: - cpi r26, lo8(__bss_end) - cpc r27, r17 - brne .do_clear_bss_loop -ENDF __do_clear_bss -#endif /* L_clear_bss */ - -/* __do_global_ctors and __do_global_dtors are only necessary - if there are any constructors/destructors. */ - -#ifdef L_ctors - .section .init6,"ax",@progbits -DEFUN __do_global_ctors -#if defined(__AVR_HAVE_RAMPZ__) - ldi r17, hi8(__ctors_start) - ldi r28, lo8(__ctors_end) - ldi r29, hi8(__ctors_end) - ldi r16, hh8(__ctors_end) - rjmp .L__do_global_ctors_start -.L__do_global_ctors_loop: - sbiw r28, 2 - sbc r16, __zero_reg__ - mov_h r31, r29 - mov_l r30, r28 - out __RAMPZ__, r16 - XCALL __tablejump_elpm__ -.L__do_global_ctors_start: - cpi r28, lo8(__ctors_start) - cpc r29, r17 - ldi r24, hh8(__ctors_start) - cpc r16, r24 - brne .L__do_global_ctors_loop -#else - ldi r17, hi8(__ctors_start) - ldi r28, lo8(__ctors_end) - ldi r29, hi8(__ctors_end) - rjmp .L__do_global_ctors_start -.L__do_global_ctors_loop: - sbiw r28, 2 - mov_h r31, r29 - mov_l r30, r28 - XCALL __tablejump__ -.L__do_global_ctors_start: - cpi r28, lo8(__ctors_start) - cpc r29, r17 - brne .L__do_global_ctors_loop -#endif /* defined(__AVR_HAVE_RAMPZ__) */ -ENDF __do_global_ctors -#endif /* L_ctors */ - -#ifdef L_dtors - .section .fini6,"ax",@progbits -DEFUN __do_global_dtors -#if defined(__AVR_HAVE_RAMPZ__) - ldi r17, hi8(__dtors_end) - ldi r28, lo8(__dtors_start) - ldi r29, hi8(__dtors_start) - ldi r16, hh8(__dtors_start) - rjmp .L__do_global_dtors_start -.L__do_global_dtors_loop: - sbiw r28, 2 - sbc r16, __zero_reg__ - mov_h r31, r29 - mov_l r30, r28 - out __RAMPZ__, r16 - XCALL __tablejump_elpm__ -.L__do_global_dtors_start: - cpi r28, lo8(__dtors_end) - cpc r29, r17 - ldi r24, hh8(__dtors_end) - cpc r16, r24 - brne .L__do_global_dtors_loop -#else - ldi r17, hi8(__dtors_end) - ldi r28, lo8(__dtors_start) - ldi r29, hi8(__dtors_start) - rjmp .L__do_global_dtors_start -.L__do_global_dtors_loop: - mov_h r31, r29 - mov_l r30, r28 - XCALL __tablejump__ - adiw r28, 2 -.L__do_global_dtors_start: - cpi r28, lo8(__dtors_end) - cpc r29, r17 - brne .L__do_global_dtors_loop -#endif /* defined(__AVR_HAVE_RAMPZ__) */ -ENDF __do_global_dtors -#endif /* L_dtors */ - -.section .text.libgcc, "ax", @progbits - -#ifdef L_tablejump_elpm -DEFUN __tablejump_elpm__ -#if defined (__AVR_HAVE_ELPM__) -#if defined (__AVR_HAVE_LPMX__) - elpm __tmp_reg__, Z+ - elpm r31, Z - mov r30, __tmp_reg__ -#if defined (__AVR_HAVE_EIJMP_EICALL__) - eijmp -#else - ijmp -#endif - -#else - elpm - adiw r30, 1 - push r0 - elpm - push r0 -#if defined (__AVR_HAVE_EIJMP_EICALL__) - in __tmp_reg__, __EIND__ - push __tmp_reg__ -#endif - ret -#endif -#endif /* defined (__AVR_HAVE_ELPM__) */ -ENDF __tablejump_elpm__ -#endif /* defined (L_tablejump_elpm) */ - - -.section .text.libgcc.builtins, "ax", @progbits - -/********************************** - * Find first set Bit (ffs) - **********************************/ - -#if defined (L_ffssi2) -;; find first set bit -;; r25:r24 = ffs32 (r25:r22) -;; clobbers: r22, r26 -DEFUN __ffssi2 - clr r26 - tst r22 - brne 1f - subi r26, -8 - or r22, r23 - brne 1f - subi r26, -8 - or r22, r24 - brne 1f - subi r26, -8 - or r22, r25 - brne 1f - ret -1: mov r24, r22 - XJMP __loop_ffsqi2 -ENDF __ffssi2 -#endif /* defined (L_ffssi2) */ - -#if defined (L_ffshi2) -;; find first set bit -;; r25:r24 = ffs16 (r25:r24) -;; clobbers: r26 -DEFUN __ffshi2 - clr r26 -#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ - ;; Some cores have problem skipping 2-word instruction - tst r24 - breq 2f -#else - cpse r24, __zero_reg__ -#endif /* __AVR_HAVE_JMP_CALL__ */ -1: XJMP __loop_ffsqi2 -2: ldi r26, 8 - or r24, r25 - brne 1b - ret -ENDF __ffshi2 -#endif /* defined (L_ffshi2) */ - -#if defined (L_loop_ffsqi2) -;; Helper for ffshi2, ffssi2 -;; r25:r24 = r26 + zero_extend16 (ffs8(r24)) -;; r24 must be != 0 -;; clobbers: r26 -DEFUN __loop_ffsqi2 - inc r26 - lsr r24 - brcc __loop_ffsqi2 - mov r24, r26 - clr r25 - ret -ENDF __loop_ffsqi2 -#endif /* defined (L_loop_ffsqi2) */ - - -/********************************** - * Count trailing Zeros (ctz) - **********************************/ - -#if defined (L_ctzsi2) -;; count trailing zeros -;; r25:r24 = ctz32 (r25:r22) -;; clobbers: r26, r22 -;; ctz(0) = 255 -;; Note that ctz(0) in undefined for GCC -DEFUN __ctzsi2 - XCALL __ffssi2 - dec r24 - ret -ENDF __ctzsi2 -#endif /* defined (L_ctzsi2) */ - -#if defined (L_ctzhi2) -;; count trailing zeros -;; r25:r24 = ctz16 (r25:r24) -;; clobbers: r26 -;; ctz(0) = 255 -;; Note that ctz(0) in undefined for GCC -DEFUN __ctzhi2 - XCALL __ffshi2 - dec r24 - ret -ENDF __ctzhi2 -#endif /* defined (L_ctzhi2) */ - - -/********************************** - * Count leading Zeros (clz) - **********************************/ - -#if defined (L_clzdi2) -;; count leading zeros -;; r25:r24 = clz64 (r25:r18) -;; clobbers: r22, r23, r26 -DEFUN __clzdi2 - XCALL __clzsi2 - sbrs r24, 5 - ret - mov_l r22, r18 - mov_h r23, r19 - mov_l r24, r20 - mov_h r25, r21 - XCALL __clzsi2 - subi r24, -32 - ret -ENDF __clzdi2 -#endif /* defined (L_clzdi2) */ - -#if defined (L_clzsi2) -;; count leading zeros -;; r25:r24 = clz32 (r25:r22) -;; clobbers: r26 -DEFUN __clzsi2 - XCALL __clzhi2 - sbrs r24, 4 - ret - mov_l r24, r22 - mov_h r25, r23 - XCALL __clzhi2 - subi r24, -16 - ret -ENDF __clzsi2 -#endif /* defined (L_clzsi2) */ - -#if defined (L_clzhi2) -;; count leading zeros -;; r25:r24 = clz16 (r25:r24) -;; clobbers: r26 -DEFUN __clzhi2 - clr r26 - tst r25 - brne 1f - subi r26, -8 - or r25, r24 - brne 1f - ldi r24, 16 - ret -1: cpi r25, 16 - brsh 3f - subi r26, -3 - swap r25 -2: inc r26 -3: lsl r25 - brcc 2b - mov r24, r26 - clr r25 - ret -ENDF __clzhi2 -#endif /* defined (L_clzhi2) */ - - -/********************************** - * Parity - **********************************/ - -#if defined (L_paritydi2) -;; r25:r24 = parity64 (r25:r18) -;; clobbers: __tmp_reg__ -DEFUN __paritydi2 - eor r24, r18 - eor r24, r19 - eor r24, r20 - eor r24, r21 - XJMP __paritysi2 -ENDF __paritydi2 -#endif /* defined (L_paritydi2) */ - -#if defined (L_paritysi2) -;; r25:r24 = parity32 (r25:r22) -;; clobbers: __tmp_reg__ -DEFUN __paritysi2 - eor r24, r22 - eor r24, r23 - XJMP __parityhi2 -ENDF __paritysi2 -#endif /* defined (L_paritysi2) */ - -#if defined (L_parityhi2) -;; r25:r24 = parity16 (r25:r24) -;; clobbers: __tmp_reg__ -DEFUN __parityhi2 - eor r24, r25 -;; FALLTHRU -ENDF __parityhi2 - -;; r25:r24 = parity8 (r24) -;; clobbers: __tmp_reg__ -DEFUN __parityqi2 - ;; parity is in r24[0..7] - mov __tmp_reg__, r24 - swap __tmp_reg__ - eor r24, __tmp_reg__ - ;; parity is in r24[0..3] - subi r24, -4 - andi r24, -5 - subi r24, -6 - ;; parity is in r24[0,3] - sbrc r24, 3 - inc r24 - ;; parity is in r24[0] - andi r24, 1 - clr r25 - ret -ENDF __parityqi2 -#endif /* defined (L_parityhi2) */ - - -/********************************** - * Population Count - **********************************/ - -#if defined (L_popcounthi2) -;; population count -;; r25:r24 = popcount16 (r25:r24) -;; clobbers: __tmp_reg__ -DEFUN __popcounthi2 - XCALL __popcountqi2 - push r24 - mov r24, r25 - XCALL __popcountqi2 - clr r25 - ;; FALLTHRU -ENDF __popcounthi2 - -DEFUN __popcounthi2_tail - pop __tmp_reg__ - add r24, __tmp_reg__ - ret -ENDF __popcounthi2_tail -#endif /* defined (L_popcounthi2) */ - -#if defined (L_popcountsi2) -;; population count -;; r25:r24 = popcount32 (r25:r22) -;; clobbers: __tmp_reg__ -DEFUN __popcountsi2 - XCALL __popcounthi2 - push r24 - mov_l r24, r22 - mov_h r25, r23 - XCALL __popcounthi2 - XJMP __popcounthi2_tail -ENDF __popcountsi2 -#endif /* defined (L_popcountsi2) */ - -#if defined (L_popcountdi2) -;; population count -;; r25:r24 = popcount64 (r25:r18) -;; clobbers: r22, r23, __tmp_reg__ -DEFUN __popcountdi2 - XCALL __popcountsi2 - push r24 - mov_l r22, r18 - mov_h r23, r19 - mov_l r24, r20 - mov_h r25, r21 - XCALL __popcountsi2 - XJMP __popcounthi2_tail -ENDF __popcountdi2 -#endif /* defined (L_popcountdi2) */ - -#if defined (L_popcountqi2) -;; population count -;; r24 = popcount8 (r24) -;; clobbers: __tmp_reg__ -DEFUN __popcountqi2 - mov __tmp_reg__, r24 - andi r24, 1 - lsr __tmp_reg__ - lsr __tmp_reg__ - adc r24, __zero_reg__ - lsr __tmp_reg__ - adc r24, __zero_reg__ - lsr __tmp_reg__ - adc r24, __zero_reg__ - lsr __tmp_reg__ - adc r24, __zero_reg__ - lsr __tmp_reg__ - adc r24, __zero_reg__ - lsr __tmp_reg__ - adc r24, __tmp_reg__ - ret -ENDF __popcountqi2 -#endif /* defined (L_popcountqi2) */ - - -/********************************** - * Swap bytes - **********************************/ - -;; swap two registers with different register number -.macro bswap a, b - eor \a, \b - eor \b, \a - eor \a, \b -.endm - -#if defined (L_bswapsi2) -;; swap bytes -;; r25:r22 = bswap32 (r25:r22) -DEFUN __bswapsi2 - bswap r22, r25 - bswap r23, r24 - ret -ENDF __bswapsi2 -#endif /* defined (L_bswapsi2) */ - -#if defined (L_bswapdi2) -;; swap bytes -;; r25:r18 = bswap64 (r25:r18) -DEFUN __bswapdi2 - bswap r18, r25 - bswap r19, r24 - bswap r20, r23 - bswap r21, r22 - ret -ENDF __bswapdi2 -#endif /* defined (L_bswapdi2) */ - - -/********************************** - * 64-bit shifts - **********************************/ - -#if defined (L_ashrdi3) -;; Arithmetic shift right -;; r25:r18 = ashr64 (r25:r18, r17:r16) -DEFUN __ashrdi3 - push r16 - andi r16, 63 - breq 2f -1: asr r25 - ror r24 - ror r23 - ror r22 - ror r21 - ror r20 - ror r19 - ror r18 - dec r16 - brne 1b -2: pop r16 - ret -ENDF __ashrdi3 -#endif /* defined (L_ashrdi3) */ - -#if defined (L_lshrdi3) -;; Logic shift right -;; r25:r18 = lshr64 (r25:r18, r17:r16) -DEFUN __lshrdi3 - push r16 - andi r16, 63 - breq 2f -1: lsr r25 - ror r24 - ror r23 - ror r22 - ror r21 - ror r20 - ror r19 - ror r18 - dec r16 - brne 1b -2: pop r16 - ret -ENDF __lshrdi3 -#endif /* defined (L_lshrdi3) */ - -#if defined (L_ashldi3) -;; Shift left -;; r25:r18 = ashl64 (r25:r18, r17:r16) -DEFUN __ashldi3 - push r16 - andi r16, 63 - breq 2f -1: lsl r18 - rol r19 - rol r20 - rol r21 - rol r22 - rol r23 - rol r24 - rol r25 - dec r16 - brne 1b -2: pop r16 - ret -ENDF __ashldi3 -#endif /* defined (L_ashldi3) */ - - -.section .text.libgcc.fmul, "ax", @progbits - -/***********************************************************/ -;;; Softmul versions of FMUL, FMULS and FMULSU to implement -;;; __builtin_avr_fmul* if !AVR_HAVE_MUL -/***********************************************************/ - -#define A1 24 -#define B1 25 -#define C0 22 -#define C1 23 -#define A0 __tmp_reg__ - -#ifdef L_fmuls -;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction -;;; Clobbers: r24, r25, __tmp_reg__ -DEFUN __fmuls - ;; A0.7 = negate result? - mov A0, A1 - eor A0, B1 - ;; B1 = |B1| - sbrc B1, 7 - neg B1 - XJMP __fmulsu_exit -ENDF __fmuls -#endif /* L_fmuls */ - -#ifdef L_fmulsu -;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction -;;; Clobbers: r24, r25, __tmp_reg__ -DEFUN __fmulsu - ;; A0.7 = negate result? - mov A0, A1 -;; FALLTHRU -ENDF __fmulsu - -;; Helper for __fmuls and __fmulsu -DEFUN __fmulsu_exit - ;; A1 = |A1| - sbrc A1, 7 - neg A1 -#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ - ;; Some cores have problem skipping 2-word instruction - tst A0 - brmi 1f -#else - sbrs A0, 7 -#endif /* __AVR_HAVE_JMP_CALL__ */ - XJMP __fmul -1: XCALL __fmul - ;; C = -C iff A0.7 = 1 - com C1 - neg C0 - sbci C1, -1 - ret -ENDF __fmulsu_exit -#endif /* L_fmulsu */ - - -#ifdef L_fmul -;;; r22:r23 = fmul (r24, r25) like in FMUL instruction -;;; Clobbers: r24, r25, __tmp_reg__ -DEFUN __fmul - ; clear result - clr C0 - clr C1 - clr A0 -1: tst B1 - ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C. -2: brpl 3f - ;; C += A - add C0, A0 - adc C1, A1 -3: ;; A >>= 1 - lsr A1 - ror A0 - ;; B <<= 1 - lsl B1 - brne 2b - ret -ENDF __fmul -#endif /* L_fmul */ - -#undef A0 -#undef A1 -#undef B1 -#undef C0 -#undef C1 diff --git a/gcc/config/avr/t-avr b/gcc/config/avr/t-avr index 30e8d96447e..3f37e591f8e 100644 --- a/gcc/config/avr/t-avr +++ b/gcc/config/avr/t-avr @@ -39,54 +39,6 @@ $(srcdir)/config/avr/avr-tables.opt: $(srcdir)/config/avr/genopt.sh \ $(SHELL) $(srcdir)/config/avr/genopt.sh $(srcdir)/config/avr > \ $(srcdir)/config/avr/avr-tables.opt -LIB1ASMSRC = avr/libgcc.S -LIB1ASMFUNCS = \ - _mulqi3 \ - _mulhi3 \ - _mulhisi3 \ - _umulhisi3 \ - _usmulhisi3 \ - _muluhisi3 \ - _mulshisi3 \ - _mulsi3 \ - _udivmodqi4 \ - _divmodqi4 \ - _udivmodhi4 \ - _divmodhi4 \ - _udivmodsi4 \ - _divmodsi4 \ - _prologue \ - _epilogue \ - _exit \ - _cleanup \ - _tablejump \ - _tablejump_elpm \ - _copy_data \ - _clear_bss \ - _ctors \ - _dtors \ - _ffssi2 \ - _ffshi2 \ - _loop_ffsqi2 \ - _ctzsi2 \ - _ctzhi2 \ - _clzdi2 \ - _clzsi2 \ - _clzhi2 \ - _paritydi2 \ - _paritysi2 \ - _parityhi2 \ - _popcounthi2 \ - _popcountsi2 \ - _popcountdi2 \ - _popcountqi2 \ - _bswapsi2 \ - _bswapdi2 \ - _ashldi3 \ - _ashrdi3 \ - _lshrdi3 \ - _fmul _fmuls _fmulsu - LIB2FUNCS_EXCLUDE = \ _clz diff --git a/gcc/config/bfin/lib1funcs.asm b/gcc/config/bfin/lib1funcs.asm deleted file mode 100644 index c7bf4f3f05c..00000000000 --- a/gcc/config/bfin/lib1funcs.asm +++ /dev/null @@ -1,211 +0,0 @@ -/* libgcc functions for Blackfin. - Copyright (C) 2005, 2009 Free Software Foundation, Inc. - Contributed by Analog Devices. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ - -#ifdef L_divsi3 -.text -.align 2 -.global ___divsi3; -.type ___divsi3, STT_FUNC; - -___divsi3: - [--SP]= RETS; - [--SP] = R7; - - R2 = -R0; - CC = R0 < 0; - IF CC R0 = R2; - R7 = CC; - - R2 = -R1; - CC = R1 < 0; - IF CC R1 = R2; - R2 = CC; - R7 = R7 ^ R2; - - CALL ___udivsi3; - - CC = R7; - R1 = -R0; - IF CC R0 = R1; - - R7 = [SP++]; - RETS = [SP++]; - RTS; -#endif - -#ifdef L_modsi3 -.align 2 -.global ___modsi3; -.type ___modsi3, STT_FUNC; - -___modsi3: - [--SP] = RETS; - [--SP] = R0; - [--SP] = R1; - CALL ___divsi3; - R2 = [SP++]; - R1 = [SP++]; - R2 *= R0; - R0 = R1 - R2; - RETS = [SP++]; - RTS; -#endif - -#ifdef L_udivsi3 -.align 2 -.global ___udivsi3; -.type ___udivsi3, STT_FUNC; - -___udivsi3: - P0 = 32; - LSETUP (0f, 1f) LC0 = P0; - /* upper half of dividend */ - R3 = 0; -0: - /* The first time round in the loop we shift in garbage, but since we - perform 33 shifts, it doesn't matter. */ - R0 = ROT R0 BY 1; - R3 = ROT R3 BY 1; - R2 = R3 - R1; - CC = R3 < R1 (IU); -1: - /* Last instruction of the loop. */ - IF ! CC R3 = R2; - - /* Shift in the last bit. */ - R0 = ROT R0 BY 1; - /* R0 is the result, R3 contains the remainder. */ - R0 = ~ R0; - RTS; -#endif - -#ifdef L_umodsi3 -.align 2 -.global ___umodsi3; -.type ___umodsi3, STT_FUNC; - -___umodsi3: - [--SP] = RETS; - CALL ___udivsi3; - R0 = R3; - RETS = [SP++]; - RTS; -#endif - -#ifdef L_umulsi3_highpart -.align 2 -.global ___umulsi3_highpart; -.type ___umulsi3_highpart, STT_FUNC; - -___umulsi3_highpart: - A1 = R1.L * R0.L (FU); - A1 = A1 >> 16; - A0 = R1.H * R0.H, A1 += R1.L * R0.H (FU); - A1 += R0.L * R1.H (FU); - A1 = A1 >> 16; - A0 += A1; - R0 = A0 (FU); - RTS; -#endif - -#ifdef L_smulsi3_highpart -.align 2 -.global ___smulsi3_highpart; -.type ___smulsi3_highpart, STT_FUNC; - -___smulsi3_highpart: - A1 = R1.L * R0.L (FU); - A1 = A1 >> 16; - A0 = R0.H * R1.H, A1 += R0.H * R1.L (IS,M); - A1 += R1.H * R0.L (IS,M); - A1 = A1 >>> 16; - R0 = (A0 += A1); - RTS; -#endif - -#ifdef L_muldi3 -.align 2 -.global ___muldi3; -.type ___muldi3, STT_FUNC; - -/* - R1:R0 * R3:R2 - = R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l -[X] = (R1.h * R3.h) * 2^96 -[X] + (R1.h * R3.l + R1.l * R3.h) * 2^80 -[X] + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^64 -[T1] + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^48 -[T2] + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^32 -[T3] + (R0.l * R2.h + R2.l * R0.h) * 2^16 -[T4] + (R0.l * R2.l) - - We can discard the first three lines marked "X" since we produce - only a 64 bit result. So, we need ten 16-bit multiplies. - - Individual mul-acc results: -[E1] = R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h -[E2] = R1.l * R2.l + R3.l * R0.l + R0.h * R2.h -[E3] = R0.l * R2.h + R2.l * R0.h -[E4] = R0.l * R2.l - - We also need to add high parts from lower-level results to higher ones: - E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E4 - - One interesting property is that all parts of the result that depend - on the sign of the multiplication are discarded. Those would be the - multiplications involving R1.h and R3.h, but only the top 16 bit of - the 32 bit result depend on the sign, and since R1.h and R3.h only - occur in E1, the top half of these results is cut off. - So, we can just use FU mode for all of the 16-bit multiplies, and - ignore questions of when to use mixed mode. */ - -___muldi3: - /* [SP] technically is part of the caller's frame, but we can - use it as scratch space. */ - A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12]; /* E1 */ - A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4; /* E1 */ - A0 += A1; /* E1 */ - R4 = A0.w; - A0 = R0.l * R3.l (FU); /* E2 */ - A0 += R2.l * R1.l (FU); /* E2 */ - - A1 = R2.L * R0.L (FU); /* E4 */ - R3 = A1.w; - A1 = A1 >> 16; /* E3c */ - A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU); /* E2, E3c */ - A1 += R0.L * R2.H (FU); /* E3c */ - R0 = A1.w; - A1 = A1 >> 16; /* E2c */ - A0 += A1; /* E2c */ - R1 = A0.w; - - /* low(result) = low(E3c):low(E4) */ - R0 = PACK (R0.l, R3.l); - /* high(result) = E2c + (E1 << 16) */ - R1.h = R1.h + R4.l (NS) || R4 = [SP]; - RTS; - -.size ___muldi3, .-___muldi3 -#endif diff --git a/gcc/config/bfin/t-bfin b/gcc/config/bfin/t-bfin deleted file mode 100644 index bb95ab4139e..00000000000 --- a/gcc/config/bfin/t-bfin +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2005, 2007, 2011 Free Software Foundation, Inc. -# -# This file is part of GCC. -# -# GCC is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GCC is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GCC; see the file COPYING3. If not see -# . - -## Target part of the Makefile - -LIB1ASMSRC = bfin/lib1funcs.asm -LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart -LIB1ASMFUNCS += _smulsi3_highpart diff --git a/gcc/config/bfin/t-bfin-elf b/gcc/config/bfin/t-bfin-elf index fcf76c4ddfe..5cbcfeeb87f 100644 --- a/gcc/config/bfin/t-bfin-elf +++ b/gcc/config/bfin/t-bfin-elf @@ -18,10 +18,6 @@ ## Target part of the Makefile -LIB1ASMSRC = bfin/lib1funcs.asm -LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart -LIB1ASMFUNCS += _smulsi3_highpart - TARGET_LIBGCC2_CFLAGS = -fpic MULTILIB_OPTIONS=mcpu=bf532-none diff --git a/gcc/config/bfin/t-bfin-linux b/gcc/config/bfin/t-bfin-linux index a83f9f2da83..9a1d6a09437 100644 --- a/gcc/config/bfin/t-bfin-linux +++ b/gcc/config/bfin/t-bfin-linux @@ -18,10 +18,6 @@ ## Target part of the Makefile -LIB1ASMSRC = bfin/lib1funcs.asm -LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart -LIB1ASMFUNCS += _smulsi3_highpart - TARGET_LIBGCC2_CFLAGS = -fpic MULTILIB_OPTIONS=mcpu=bf532-none diff --git a/gcc/config/bfin/t-bfin-uclinux b/gcc/config/bfin/t-bfin-uclinux index 1be0796987b..b9fca803e0a 100644 --- a/gcc/config/bfin/t-bfin-uclinux +++ b/gcc/config/bfin/t-bfin-uclinux @@ -18,10 +18,6 @@ ## Target part of the Makefile -LIB1ASMSRC = bfin/lib1funcs.asm -LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart -LIB1ASMFUNCS += _smulsi3_highpart - TARGET_LIBGCC2_CFLAGS = -fpic MULTILIB_OPTIONS=mcpu=bf532-none diff --git a/gcc/config/c6x/lib1funcs.asm b/gcc/config/c6x/lib1funcs.asm deleted file mode 100644 index 5bf34474bbd..00000000000 --- a/gcc/config/c6x/lib1funcs.asm +++ /dev/null @@ -1,438 +0,0 @@ -/* Copyright 2010, 2011 Free Software Foundation, Inc. - Contributed by Bernd Schmidt . - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ - - ;; ABI considerations for the divide functions - ;; The following registers are call-used: - ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5 - ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4 - ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4 - ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4 - ;; - ;; In our implementation, divu and remu are leaf functions, - ;; while both divi and remi call into divu. - ;; A0 is not clobbered by any of the functions. - ;; divu does not clobber B2 either, which is taken advantage of - ;; in remi. - ;; divi uses B5 to hold the original return address during - ;; the call to divu. - ;; remi uses B2 and A5 to hold the input values during the - ;; call to divu. It stores B3 in on the stack. - -#ifdef L_divsi3 -.text -.align 2 -.global __c6xabi_divi -.hidden __c6xabi_divi -.type __c6xabi_divi, STT_FUNC - -__c6xabi_divi: - call .s2 __c6xabi_divu -|| mv .d2 B3, B5 -|| cmpgt .l1 0, A4, A1 -|| cmpgt .l2 0, B4, B1 - - [A1] neg .l1 A4, A4 -|| [B1] neg .l2 B4, B4 -|| xor .s1x A1, B1, A1 - -#ifdef _TMS320C6400 - [A1] addkpc .s2 1f, B3, 4 -#else - [A1] mvkl .s2 1f, B3 - [A1] mvkh .s2 1f, B3 - nop 2 -#endif -1: - neg .l1 A4, A4 -|| mv .l2 B3,B5 -|| ret .s2 B5 - nop 5 -#endif - -#if defined L_modsi3 || defined L_divmodsi4 -.align 2 -#ifdef L_modsi3 -#define MOD_OUTPUT_REG A4 -.global __c6xabi_remi -.hidden __c6xabi_remi -.type __c6xabi_remi, STT_FUNC -#else -#define MOD_OUTPUT_REG A5 -.global __c6xabi_divremi -.hidden __c6xabi_divremi -.type __c6xabi_divremi, STT_FUNC -__c6xabi_divremi: -#endif - -__c6xabi_remi: - stw .d2t2 B3, *B15--[2] -|| cmpgt .l1 0, A4, A1 -|| cmpgt .l2 0, B4, B2 -|| mv .s1 A4, A5 -|| call .s2 __c6xabi_divu - - [A1] neg .l1 A4, A4 -|| [B2] neg .l2 B4, B4 -|| xor .s2x B2, A1, B0 -|| mv .d2 B4, B2 - -#ifdef _TMS320C6400 - [B0] addkpc .s2 1f, B3, 1 - [!B0] addkpc .s2 2f, B3, 1 - nop 2 -#else - [B0] mvkl .s2 1f,B3 - [!B0] mvkl .s2 2f,B3 - - [B0] mvkh .s2 1f,B3 - [!B0] mvkh .s2 2f,B3 -#endif -1: - neg .l1 A4, A4 -2: - ldw .d2t2 *++B15[2], B3 - -#ifdef _TMS320C6400_PLUS - mpy32 .m1x A4, B2, A6 - nop 3 - ret .s2 B3 - sub .l1 A5, A6, MOD_OUTPUT_REG - nop 4 -#else - mpyu .m1x A4, B2, A1 - nop 1 - mpylhu .m1x A4, B2, A6 -|| mpylhu .m2x B2, A4, B2 - nop 1 - add .l1x A6, B2, A6 -|| ret .s2 B3 - shl .s1 A6, 16, A6 - add .d1 A6, A1, A6 - sub .l1 A5, A6, MOD_OUTPUT_REG - nop 2 -#endif - -#endif - -#if defined L_udivsi3 || defined L_udivmodsi4 -.align 2 -#ifdef L_udivsi3 -.global __c6xabi_divu -.hidden __c6xabi_divu -.type __c6xabi_divu, STT_FUNC -__c6xabi_divu: -#else -.global __c6xabi_divremu -.hidden __c6xabi_divremu -.type __c6xabi_divremu, STT_FUNC -__c6xabi_divremu: -#endif - ;; We use a series of up to 31 subc instructions. First, we find - ;; out how many leading zero bits there are in the divisor. This - ;; gives us both a shift count for aligning (shifting) the divisor - ;; to the, and the number of times we have to execute subc. - - ;; At the end, we have both the remainder and most of the quotient - ;; in A4. The top bit of the quotient is computed first and is - ;; placed in A2. - - ;; Return immediately if the dividend is zero. Setting B4 to 1 - ;; is a trick to allow us to leave the following insns in the jump - ;; delay slot without affecting the result. - mv .s2x A4, B1 - -#ifndef _TMS320C6400 -[!b1] mvk .s2 1, B4 -#endif -[b1] lmbd .l2 1, B4, B1 -||[!b1] b .s2 B3 ; RETURN A -#ifdef _TMS320C6400 -||[!b1] mvk .d2 1, B4 -#endif -#ifdef L_udivmodsi4 -||[!b1] zero .s1 A5 -#endif - mv .l1x B1, A6 -|| shl .s2 B4, B1, B4 - - ;; The loop performs a maximum of 28 steps, so we do the - ;; first 3 here. - cmpltu .l1x A4, B4, A2 -[!A2] sub .l1x A4, B4, A4 -|| shru .s2 B4, 1, B4 -|| xor .s1 1, A2, A2 - - shl .s1 A2, 31, A2 -|| [b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 - - ;; RETURN A may happen here (note: must happen before the next branch) -0: - cmpgt .l2 B1, 7, B0 -|| [b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -|| [b0] b .s1 0b -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 - ;; loop backwards branch happens here - - ret .s2 B3 -|| mvk .s1 32, A1 - sub .l1 A1, A6, A6 -#ifdef L_udivmodsi4 -|| extu .s1 A4, A6, A5 -#endif - shl .s1 A4, A6, A4 - shru .s1 A4, 1, A4 -|| sub .l1 A6, 1, A6 - or .l1 A2, A4, A4 - shru .s1 A4, A6, A4 - nop - -#endif - -#ifdef L_umodsi3 -.align 2 -.global __c6xabi_remu -.hidden __c6xabi_remu -.type __c6xabi_remu, STT_FUNC -__c6xabi_remu: - ;; The ABI seems designed to prevent these functions calling each other, - ;; so we duplicate most of the divsi3 code here. - mv .s2x A4, B1 -#ifndef _TMS320C6400 -[!b1] mvk .s2 1, B4 -#endif - lmbd .l2 1, B4, B1 -||[!b1] b .s2 B3 ; RETURN A -#ifdef _TMS320C6400 -||[!b1] mvk .d2 1, B4 -#endif - - mv .l1x B1, A7 -|| shl .s2 B4, B1, B4 - - cmpltu .l1x A4, B4, A1 -[!a1] sub .l1x A4, B4, A4 - shru .s2 B4, 1, B4 - -0: - cmpgt .l2 B1, 7, B0 -|| [b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 - ;; RETURN A may happen here (note: must happen before the next branch) -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -|| [b0] b .s1 0b -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 - ;; loop backwards branch happens here - - ret .s2 B3 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 - - extu .s1 A4, A7, A4 - nop 2 -#endif - -#if defined L_strasgi_64plus && defined _TMS320C6400_PLUS - -.align 2 -.global __c6xabi_strasgi_64plus -.hidden __c6xabi_strasgi_64plus -.type __c6xabi_strasgi_64plus, STT_FUNC -__c6xabi_strasgi_64plus: - shru .s2x a6, 2, b31 -|| mv .s1 a4, a30 -|| mv .d2 b4, b30 - - add .s2 -4, b31, b31 - - sploopd 1 -|| mvc .s2 b31, ilc - ldw .d2t2 *b30++, b31 - nop 4 - mv .s1x b31,a31 - spkernel 6, 0 -|| stw .d1t1 a31, *a30++ - - ret .s2 b3 - nop 5 -#endif - -#ifdef L_strasgi -.global __c6xabi_strasgi -.type __c6xabi_strasgi, STT_FUNC -__c6xabi_strasgi: - ;; This is essentially memcpy, with alignment known to be at least - ;; 4, and the size a multiple of 4 greater than or equal to 28. - ldw .d2t1 *B4++, A0 -|| mvk .s2 16, B1 - ldw .d2t1 *B4++, A1 -|| mvk .s2 20, B2 -|| sub .d1 A6, 24, A6 - ldw .d2t1 *B4++, A5 - ldw .d2t1 *B4++, A7 -|| mv .l2x A6, B7 - ldw .d2t1 *B4++, A8 - ldw .d2t1 *B4++, A9 -|| mv .s2x A0, B5 -|| cmpltu .l2 B2, B7, B0 - -0: - stw .d1t2 B5, *A4++ -||[b0] ldw .d2t1 *B4++, A0 -|| mv .s2x A1, B5 -|| mv .l2 B7, B6 - -[b0] sub .d2 B6, 24, B7 -||[b0] b .s2 0b -|| cmpltu .l2 B1, B6, B0 - -[b0] ldw .d2t1 *B4++, A1 -|| stw .d1t2 B5, *A4++ -|| mv .s2x A5, B5 -|| cmpltu .l2 12, B6, B0 - -[b0] ldw .d2t1 *B4++, A5 -|| stw .d1t2 B5, *A4++ -|| mv .s2x A7, B5 -|| cmpltu .l2 8, B6, B0 - -[b0] ldw .d2t1 *B4++, A7 -|| stw .d1t2 B5, *A4++ -|| mv .s2x A8, B5 -|| cmpltu .l2 4, B6, B0 - -[b0] ldw .d2t1 *B4++, A8 -|| stw .d1t2 B5, *A4++ -|| mv .s2x A9, B5 -|| cmpltu .l2 0, B6, B0 - -[b0] ldw .d2t1 *B4++, A9 -|| stw .d1t2 B5, *A4++ -|| mv .s2x A0, B5 -|| cmpltu .l2 B2, B7, B0 - - ;; loop back branch happens here - - cmpltu .l2 B1, B6, B0 -|| ret .s2 b3 - -[b0] stw .d1t1 A1, *A4++ -|| cmpltu .l2 12, B6, B0 -[b0] stw .d1t1 A5, *A4++ -|| cmpltu .l2 8, B6, B0 -[b0] stw .d1t1 A7, *A4++ -|| cmpltu .l2 4, B6, B0 -[b0] stw .d1t1 A8, *A4++ -|| cmpltu .l2 0, B6, B0 -[b0] stw .d1t1 A9, *A4++ - - ;; return happens here - -#endif - -#ifdef _TMS320C6400_PLUS -#ifdef L_push_rts -.align 2 -.global __c6xabi_push_rts -.hidden __c6xabi_push_rts -.type __c6xabi_push_rts, STT_FUNC -__c6xabi_push_rts: - stw .d2t2 B14, *B15--[2] - stdw .d2t1 A15:A14, *B15-- -|| b .s2x A3 - stdw .d2t2 B13:B12, *B15-- - stdw .d2t1 A13:A12, *B15-- - stdw .d2t2 B11:B10, *B15-- - stdw .d2t1 A11:A10, *B15-- - stdw .d2t2 B3:B2, *B15-- -#endif - -#ifdef L_pop_rts -.align 2 -.global __c6xabi_pop_rts -.hidden __c6xabi_pop_rts -.type __c6xabi_pop_rts, STT_FUNC -__c6xabi_pop_rts: - lddw .d2t2 *++B15, B3:B2 - lddw .d2t1 *++B15, A11:A10 - lddw .d2t2 *++B15, B11:B10 - lddw .d2t1 *++B15, A13:A12 - lddw .d2t2 *++B15, B13:B12 - lddw .d2t1 *++B15, A15:A14 -|| b .s2 B3 - ldw .d2t2 *++B15[2], B14 - nop 4 -#endif - -#ifdef L_call_stub -.align 2 -.global __c6xabi_call_stub -.type __c6xabi_call_stub, STT_FUNC -__c6xabi_call_stub: - stw .d2t1 A2, *B15--[2] - stdw .d2t1 A7:A6, *B15-- -|| call .s2 B31 - stdw .d2t1 A1:A0, *B15-- - stdw .d2t2 B7:B6, *B15-- - stdw .d2t2 B5:B4, *B15-- - stdw .d2t2 B1:B0, *B15-- - stdw .d2t2 B3:B2, *B15-- -|| addkpc .s2 1f, B3, 0 -1: - lddw .d2t2 *++B15, B3:B2 - lddw .d2t2 *++B15, B1:B0 - lddw .d2t2 *++B15, B5:B4 - lddw .d2t2 *++B15, B7:B6 - lddw .d2t1 *++B15, A1:A0 - lddw .d2t1 *++B15, A7:A6 -|| b .s2 B3 - ldw .d2t1 *++B15[2], A2 - nop 4 -#endif - -#endif - diff --git a/gcc/config/c6x/t-c6x-elf b/gcc/config/c6x/t-c6x-elf index b3b4b850fe8..6bc2832026d 100644 --- a/gcc/config/c6x/t-c6x-elf +++ b/gcc/config/c6x/t-c6x-elf @@ -18,11 +18,6 @@ # along with GCC; see the file COPYING3. If not see # . -LIB1ASMSRC = c6x/lib1funcs.asm -LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _udivmodsi4 _divmodsi4 -LIB1ASMFUNCS += _strasgi _strasgi_64plus _clzsi2 _clzdi2 _clz -LIB1ASMFUNCS += _push_rts _pop_rts _call_stub - LIB2FUNCS_EXCLUDE = _cmpdi2 _ucmpdi2 _gcc_bcmp _eprintf _clzsi _clzdi EXTRA_HEADERS += $(srcdir)/ginclude/unwind-arm-common.h diff --git a/gcc/config/fr30/lib1funcs.asm b/gcc/config/fr30/lib1funcs.asm deleted file mode 100644 index 7c63453123a..00000000000 --- a/gcc/config/fr30/lib1funcs.asm +++ /dev/null @@ -1,115 +0,0 @@ -/* libgcc routines for the FR30. - Copyright (C) 1998, 1999, 2009 Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ - - .macro FUNC_START name - .text - .globl __\name - .type __\name, @function -__\name: - .endm - - .macro FUNC_END name - .size __\name, . - __\name - .endm - - .macro DIV_BODY reg number - .if \number - DIV_BODY \reg, "\number - 1" - div1 \reg - .endif - .endm - -#ifdef L_udivsi3 -FUNC_START udivsi3 - ;; Perform an unsiged division of r4 / r5 and place the result in r4. - ;; Does not handle overflow yet... - mov r4, mdl - div0u r5 - DIV_BODY r5 32 - mov mdl, r4 - ret -FUNC_END udivsi3 -#endif /* L_udivsi3 */ - -#ifdef L_divsi3 -FUNC_START divsi3 - ;; Perform a siged division of r4 / r5 and place the result in r4. - ;; Does not handle overflow yet... - mov r4, mdl - div0s r5 - DIV_BODY r5 32 - div2 r5 - div3 - div4s - mov mdl, r4 - ret -FUNC_END divsi3 -#endif /* L_divsi3 */ - -#ifdef L_umodsi3 -FUNC_START umodsi3 - ;; Perform an unsiged division of r4 / r5 and places the remainder in r4. - ;; Does not handle overflow yet... - mov r4, mdl - div0u r5 - DIV_BODY r5 32 - mov mdh, r4 - ret -FUNC_END umodsi3 -#endif /* L_umodsi3 */ - -#ifdef L_modsi3 -FUNC_START modsi3 - ;; Perform a siged division of r4 / r5 and place the remainder in r4. - ;; Does not handle overflow yet... - mov r4, mdl - div0s r5 - DIV_BODY r5 32 - div2 r5 - div3 - div4s - mov mdh, r4 - ret -FUNC_END modsi3 -#endif /* L_modsi3 */ - -#ifdef L_negsi2 -FUNC_START negsi2 - ldi:8 #0, r0 - sub r4, r0 - mov r0, r4 - ret -FUNC_END negsi2 -#endif /* L_negsi2 */ - -#ifdef L_one_cmplsi2 -FUNC_START one_cmplsi2 - ldi:8 #0xff, r0 - extsb r0 - eor r0, r4 - ret -FUNC_END one_cmplsi2 -#endif /* L_one_cmplsi2 */ - - diff --git a/gcc/config/fr30/t-fr30 b/gcc/config/fr30/t-fr30 index 75009d4eb70..e37921681d0 100644 --- a/gcc/config/fr30/t-fr30 +++ b/gcc/config/fr30/t-fr30 @@ -16,9 +16,6 @@ # along with GCC; see the file COPYING3. If not see # . -LIB1ASMSRC = fr30/lib1funcs.asm -LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 - # If any special flags are necessary when building libgcc2 put them here. # # TARGET_LIBGCC2_CFLAGS diff --git a/gcc/config/frv/lib1funcs.asm b/gcc/config/frv/lib1funcs.asm deleted file mode 100644 index d1ffcab6133..00000000000 --- a/gcc/config/frv/lib1funcs.asm +++ /dev/null @@ -1,269 +0,0 @@ -/* Library functions. - Copyright (C) 2000, 2003, 2008, 2009 Free Software Foundation, Inc. - Contributed by Red Hat, Inc. - - This file is part of GCC. - - GCC is free software ; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY ; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#include - - -#ifdef L_cmpll -/* icc0 = __cmpll (long long a, long long b) */ - - .globl EXT(__cmpll) - .type EXT(__cmpll),@function - .text - .p2align 4 -EXT(__cmpll): - cmp gr8, gr10, icc0 - ckeq icc0, cc4 - P(ccmp) gr9, gr11, cc4, 1 - ret -.Lend: - .size EXT(__cmpll),.Lend-EXT(__cmpll) -#endif /* L_cmpll */ - -#ifdef L_cmpf -/* icc0 = __cmpf (float a, float b) */ -/* Note, because this function returns the result in ICC0, it means it can't - handle NaNs. */ - - .globl EXT(__cmpf) - .type EXT(__cmpf),@function - .text - .p2align 4 -EXT(__cmpf): -#ifdef __FRV_HARD_FLOAT__ /* floating point instructions available */ - movgf gr8, fr0 - P(movgf) gr9, fr1 - setlos #1, gr8 - fcmps fr0, fr1, fcc0 - P(fcklt) fcc0, cc0 - fckeq fcc0, cc1 - csub gr0, gr8, gr8, cc0, 1 - cmov gr0, gr8, cc1, 1 - cmpi gr8, 0, icc0 - ret -#else /* no floating point instructions available */ - movsg lr, gr4 - addi sp, #-16, sp - sti gr4, @(sp, 8) - st fp, @(sp, gr0) - mov sp, fp - call EXT(__cmpsf2) - cmpi gr8, #0, icc0 - ldi @(sp, 8), gr4 - movgs gr4, lr - ld @(sp,gr0), fp - addi sp, #16, sp - ret -#endif -.Lend: - .size EXT(__cmpf),.Lend-EXT(__cmpf) -#endif - -#ifdef L_cmpd -/* icc0 = __cmpd (double a, double b) */ -/* Note, because this function returns the result in ICC0, it means it can't - handle NaNs. */ - - .globl EXT(__cmpd) - .type EXT(__cmpd),@function - .text - .p2align 4 -EXT(__cmpd): - movsg lr, gr4 - addi sp, #-16, sp - sti gr4, @(sp, 8) - st fp, @(sp, gr0) - mov sp, fp - call EXT(__cmpdf2) - cmpi gr8, #0, icc0 - ldi @(sp, 8), gr4 - movgs gr4, lr - ld @(sp,gr0), fp - addi sp, #16, sp - ret -.Lend: - .size EXT(__cmpd),.Lend-EXT(__cmpd) -#endif - -#ifdef L_addll -/* gr8,gr9 = __addll (long long a, long long b) */ -/* Note, gcc will never call this function, but it is present in case an - ABI program calls it. */ - - .globl EXT(__addll) - .type EXT(__addll),@function - .text - .p2align -EXT(__addll): - addcc gr9, gr11, gr9, icc0 - addx gr8, gr10, gr8, icc0 - ret -.Lend: - .size EXT(__addll),.Lend-EXT(__addll) -#endif - -#ifdef L_subll -/* gr8,gr9 = __subll (long long a, long long b) */ -/* Note, gcc will never call this function, but it is present in case an - ABI program calls it. */ - - .globl EXT(__subll) - .type EXT(__subll),@function - .text - .p2align 4 -EXT(__subll): - subcc gr9, gr11, gr9, icc0 - subx gr8, gr10, gr8, icc0 - ret -.Lend: - .size EXT(__subll),.Lend-EXT(__subll) -#endif - -#ifdef L_andll -/* gr8,gr9 = __andll (long long a, long long b) */ -/* Note, gcc will never call this function, but it is present in case an - ABI program calls it. */ - - .globl EXT(__andll) - .type EXT(__andll),@function - .text - .p2align 4 -EXT(__andll): - P(and) gr9, gr11, gr9 - P2(and) gr8, gr10, gr8 - ret -.Lend: - .size EXT(__andll),.Lend-EXT(__andll) -#endif - -#ifdef L_orll -/* gr8,gr9 = __orll (long long a, long long b) */ -/* Note, gcc will never call this function, but it is present in case an - ABI program calls it. */ - - .globl EXT(__orll) - .type EXT(__orll),@function - .text - .p2align 4 -EXT(__orll): - P(or) gr9, gr11, gr9 - P2(or) gr8, gr10, gr8 - ret -.Lend: - .size EXT(__orll),.Lend-EXT(__orll) -#endif - -#ifdef L_xorll -/* gr8,gr9 = __xorll (long long a, long long b) */ -/* Note, gcc will never call this function, but it is present in case an - ABI program calls it. */ - - .globl EXT(__xorll) - .type EXT(__xorll),@function - .text - .p2align 4 -EXT(__xorll): - P(xor) gr9, gr11, gr9 - P2(xor) gr8, gr10, gr8 - ret -.Lend: - .size EXT(__xorll),.Lend-EXT(__xorll) -#endif - -#ifdef L_notll -/* gr8,gr9 = __notll (long long a) */ -/* Note, gcc will never call this function, but it is present in case an - ABI program calls it. */ - - .globl EXT(__notll) - .type EXT(__notll),@function - .text - .p2align 4 -EXT(__notll): - P(not) gr9, gr9 - P2(not) gr8, gr8 - ret -.Lend: - .size EXT(__notll),.Lend-EXT(__notll) -#endif - -#ifdef L_cmov -/* (void) __cmov (char *dest, const char *src, size_t len) */ -/* - * void __cmov (char *dest, const char *src, size_t len) - * { - * size_t i; - * - * if (dest < src || dest > src+len) - * { - * for (i = 0; i < len; i++) - * dest[i] = src[i]; - * } - * else - * { - * while (len-- > 0) - * dest[len] = src[len]; - * } - * } - */ - - .globl EXT(__cmov) - .type EXT(__cmov),@function - .text - .p2align 4 -EXT(__cmov): - P(cmp) gr8, gr9, icc0 - add gr9, gr10, gr4 - P(cmp) gr8, gr4, icc1 - bc icc0, 0, .Lfwd - bls icc1, 0, .Lback -.Lfwd: - /* move bytes in a forward direction */ - P(setlos) #0, gr5 - cmp gr0, gr10, icc0 - P(subi) gr9, #1, gr9 - P2(subi) gr8, #1, gr8 - bnc icc0, 0, .Lret -.Lfloop: - /* forward byte move loop */ - addi gr5, #1, gr5 - P(ldsb) @(gr9, gr5), gr4 - cmp gr5, gr10, icc0 - P(stb) gr4, @(gr8, gr5) - bc icc0, 0, .Lfloop - ret -.Lbloop: - /* backward byte move loop body */ - ldsb @(gr9,gr10),gr4 - stb gr4,@(gr8,gr10) -.Lback: - P(cmpi) gr10, #0, icc0 - addi gr10, #-1, gr10 - bne icc0, 0, .Lbloop -.Lret: - ret -.Lend: - .size EXT(__cmov),.Lend-EXT(__cmov) -#endif diff --git a/gcc/config/frv/t-frv b/gcc/config/frv/t-frv index 03f3cd8cde1..e31f823c30a 100644 --- a/gcc/config/frv/t-frv +++ b/gcc/config/frv/t-frv @@ -16,15 +16,6 @@ # along with GCC; see the file COPYING3. If not see # . -# Name of assembly file containing libgcc1 functions. -# This entry must be present, but it can be empty if the target does -# not need any assembler functions to support its code generation. -# -# Alternatively if assembler functions *are* needed then define the -# entries below: -CROSS_LIBGCC1 = libgcc1-asm.a -LIB1ASMSRC = frv/lib1funcs.asm -LIB1ASMFUNCS = _cmpll _cmpf _cmpd _addll _subll _andll _orll _xorll _notll _cmov LIB2FUNCS_EXTRA = cmovh.c cmovw.c cmovd.c modi.c umodi.c uitof.c uitod.c ulltof.c ulltod.c # If any special flags are necessary when building libgcc2 put them here. diff --git a/gcc/config/h8300/fixunssfsi.c b/gcc/config/h8300/fixunssfsi.c index 2fe62b7a1a8..940d0c6dc6a 100644 --- a/gcc/config/h8300/fixunssfsi.c +++ b/gcc/config/h8300/fixunssfsi.c @@ -1,6 +1,6 @@ /* More subroutines needed by GCC output code on some machines. */ /* Compile this one with gcc. */ -/* Copyright (C) 1989, 1992, 2001, 2002, 2003, 2004, 2009 +/* Copyright (C) 1989, 1992, 2001, 2002, 2003, 2004, 2009, 2011 Free Software Foundation, Inc. This file is part of GCC. @@ -26,7 +26,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see /* The libgcc2.c implementation gets confused by our type setup and creates a directly recursive call, so we do our own implementation. For - the H8/300, that's in lib1funcs.asm, for H8/300H and H8S, it's here. */ + the H8/300, that's in lib1funcs.S, for H8/300H and H8S, it's here. */ #ifndef __H8300__ long __fixunssfsi (float a); diff --git a/gcc/config/h8300/lib1funcs.asm b/gcc/config/h8300/lib1funcs.asm deleted file mode 100644 index 1b75b73269d..00000000000 --- a/gcc/config/h8300/lib1funcs.asm +++ /dev/null @@ -1,838 +0,0 @@ -;; libgcc routines for the Renesas H8/300 CPU. -;; Contributed by Steve Chamberlain -;; Optimizations by Toshiyasu Morita - -/* Copyright (C) 1994, 2000, 2001, 2002, 2003, 2004, 2009 - Free Software Foundation, Inc. - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ - -/* Assembler register definitions. */ - -#define A0 r0 -#define A0L r0l -#define A0H r0h - -#define A1 r1 -#define A1L r1l -#define A1H r1h - -#define A2 r2 -#define A2L r2l -#define A2H r2h - -#define A3 r3 -#define A3L r3l -#define A3H r3h - -#define S0 r4 -#define S0L r4l -#define S0H r4h - -#define S1 r5 -#define S1L r5l -#define S1H r5h - -#define S2 r6 -#define S2L r6l -#define S2H r6h - -#ifdef __H8300__ -#define PUSHP push -#define POPP pop - -#define A0P r0 -#define A1P r1 -#define A2P r2 -#define A3P r3 -#define S0P r4 -#define S1P r5 -#define S2P r6 -#endif - -#if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__) -#define PUSHP push.l -#define POPP pop.l - -#define A0P er0 -#define A1P er1 -#define A2P er2 -#define A3P er3 -#define S0P er4 -#define S1P er5 -#define S2P er6 - -#define A0E e0 -#define A1E e1 -#define A2E e2 -#define A3E e3 -#endif - -#ifdef __H8300H__ -#ifdef __NORMAL_MODE__ - .h8300hn -#else - .h8300h -#endif -#endif - -#ifdef __H8300S__ -#ifdef __NORMAL_MODE__ - .h8300sn -#else - .h8300s -#endif -#endif -#ifdef __H8300SX__ -#ifdef __NORMAL_MODE__ - .h8300sxn -#else - .h8300sx -#endif -#endif - -#ifdef L_cmpsi2 -#ifdef __H8300__ - .section .text - .align 2 - .global ___cmpsi2 -___cmpsi2: - cmp.w A0,A2 - bne .L2 - cmp.w A1,A3 - bne .L4 - mov.w #1,A0 - rts -.L2: - bgt .L5 -.L3: - mov.w #2,A0 - rts -.L4: - bls .L3 -.L5: - sub.w A0,A0 - rts - .end -#endif -#endif /* L_cmpsi2 */ - -#ifdef L_ucmpsi2 -#ifdef __H8300__ - .section .text - .align 2 - .global ___ucmpsi2 -___ucmpsi2: - cmp.w A0,A2 - bne .L2 - cmp.w A1,A3 - bne .L4 - mov.w #1,A0 - rts -.L2: - bhi .L5 -.L3: - mov.w #2,A0 - rts -.L4: - bls .L3 -.L5: - sub.w A0,A0 - rts - .end -#endif -#endif /* L_ucmpsi2 */ - -#ifdef L_divhi3 - -;; HImode divides for the H8/300. -;; We bunch all of this into one object file since there are several -;; "supporting routines". - -; general purpose normalize routine -; -; divisor in A0 -; dividend in A1 -; turns both into +ve numbers, and leaves what the answer sign -; should be in A2L - -#ifdef __H8300__ - .section .text - .align 2 -divnorm: - or A0H,A0H ; is divisor > 0 - stc ccr,A2L - bge _lab1 - not A0H ; no - then make it +ve - not A0L - adds #1,A0 -_lab1: or A1H,A1H ; look at dividend - bge _lab2 - not A1H ; it is -ve, make it positive - not A1L - adds #1,A1 - xor #0x8,A2L; and toggle sign of result -_lab2: rts -;; Basically the same, except that the sign of the divisor determines -;; the sign. -modnorm: - or A0H,A0H ; is divisor > 0 - stc ccr,A2L - bge _lab7 - not A0H ; no - then make it +ve - not A0L - adds #1,A0 -_lab7: or A1H,A1H ; look at dividend - bge _lab8 - not A1H ; it is -ve, make it positive - not A1L - adds #1,A1 -_lab8: rts - -; A0=A0/A1 signed - - .global ___divhi3 -___divhi3: - bsr divnorm - bsr ___udivhi3 -negans: btst #3,A2L ; should answer be negative ? - beq _lab4 - not A0H ; yes, so make it so - not A0L - adds #1,A0 -_lab4: rts - -; A0=A0%A1 signed - - .global ___modhi3 -___modhi3: - bsr modnorm - bsr ___udivhi3 - mov A3,A0 - bra negans - -; A0=A0%A1 unsigned - - .global ___umodhi3 -___umodhi3: - bsr ___udivhi3 - mov A3,A0 - rts - -; A0=A0/A1 unsigned -; A3=A0%A1 unsigned -; A2H trashed -; D high 8 bits of denom -; d low 8 bits of denom -; N high 8 bits of num -; n low 8 bits of num -; M high 8 bits of mod -; m low 8 bits of mod -; Q high 8 bits of quot -; q low 8 bits of quot -; P preserve - -; The H8/300 only has a 16/8 bit divide, so we look at the incoming and -; see how to partition up the expression. - - .global ___udivhi3 -___udivhi3: - ; A0 A1 A2 A3 - ; Nn Dd P - sub.w A3,A3 ; Nn Dd xP 00 - or A1H,A1H - bne divlongway - or A0H,A0H - beq _lab6 - -; we know that D == 0 and N is != 0 - mov.b A0H,A3L ; Nn Dd xP 0N - divxu A1L,A3 ; MQ - mov.b A3L,A0H ; Q -; dealt with N, do n -_lab6: mov.b A0L,A3L ; n - divxu A1L,A3 ; mq - mov.b A3L,A0L ; Qq - mov.b A3H,A3L ; m - mov.b #0x0,A3H ; Qq 0m - rts - -; D != 0 - which means the denominator is -; loop around to get the result. - -divlongway: - mov.b A0H,A3L ; Nn Dd xP 0N - mov.b #0x0,A0H ; high byte of answer has to be zero - mov.b #0x8,A2H ; 8 -div8: add.b A0L,A0L ; n*=2 - rotxl A3L ; Make remainder bigger - rotxl A3H - sub.w A1,A3 ; Q-=N - bhs setbit ; set a bit ? - add.w A1,A3 ; no : too far , Q+=N - - dec A2H - bne div8 ; next bit - rts - -setbit: inc A0L ; do insert bit - dec A2H - bne div8 ; next bit - rts - -#endif /* __H8300__ */ -#endif /* L_divhi3 */ - -#ifdef L_divsi3 - -;; 4 byte integer divides for the H8/300. -;; -;; We have one routine which does all the work and lots of -;; little ones which prepare the args and massage the sign. -;; We bunch all of this into one object file since there are several -;; "supporting routines". - - .section .text - .align 2 - -; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest. -; This function is here to keep branch displacements small. - -#ifdef __H8300__ - -divnorm: - mov.b A0H,A0H ; is the numerator -ve - stc ccr,S2L ; keep the sign in bit 3 of S2L - bge postive - - ; negate arg - not A0H - not A1H - not A0L - not A1L - - add #1,A1L - addx #0,A1H - addx #0,A0L - addx #0,A0H -postive: - mov.b A2H,A2H ; is the denominator -ve - bge postive2 - not A2L - not A2H - not A3L - not A3H - add.b #1,A3L - addx #0,A3H - addx #0,A2L - addx #0,A2H - xor.b #0x08,S2L ; toggle the result sign -postive2: - rts - -;; Basically the same, except that the sign of the divisor determines -;; the sign. -modnorm: - mov.b A0H,A0H ; is the numerator -ve - stc ccr,S2L ; keep the sign in bit 3 of S2L - bge mpostive - - ; negate arg - not A0H - not A1H - not A0L - not A1L - - add #1,A1L - addx #0,A1H - addx #0,A0L - addx #0,A0H -mpostive: - mov.b A2H,A2H ; is the denominator -ve - bge mpostive2 - not A2L - not A2H - not A3L - not A3H - add.b #1,A3L - addx #0,A3H - addx #0,A2L - addx #0,A2H -mpostive2: - rts - -#else /* __H8300H__ */ - -divnorm: - mov.l A0P,A0P ; is the numerator -ve - stc ccr,S2L ; keep the sign in bit 3 of S2L - bge postive - - neg.l A0P ; negate arg - -postive: - mov.l A1P,A1P ; is the denominator -ve - bge postive2 - - neg.l A1P ; negate arg - xor.b #0x08,S2L ; toggle the result sign - -postive2: - rts - -;; Basically the same, except that the sign of the divisor determines -;; the sign. -modnorm: - mov.l A0P,A0P ; is the numerator -ve - stc ccr,S2L ; keep the sign in bit 3 of S2L - bge mpostive - - neg.l A0P ; negate arg - -mpostive: - mov.l A1P,A1P ; is the denominator -ve - bge mpostive2 - - neg.l A1P ; negate arg - -mpostive2: - rts - -#endif - -; numerator in A0/A1 -; denominator in A2/A3 - .global ___modsi3 -___modsi3: -#ifdef __H8300__ - PUSHP S2P - PUSHP S0P - PUSHP S1P - bsr modnorm - bsr divmodsi4 - mov S0,A0 - mov S1,A1 - bra exitdiv -#else - PUSHP S2P - bsr modnorm - bsr ___udivsi3 - mov.l er3,er0 - bra exitdiv -#endif - - ;; H8/300H and H8S version of ___udivsi3 is defined later in - ;; the file. -#ifdef __H8300__ - .global ___udivsi3 -___udivsi3: - PUSHP S2P - PUSHP S0P - PUSHP S1P - bsr divmodsi4 - bra reti -#endif - - .global ___umodsi3 -___umodsi3: -#ifdef __H8300__ - PUSHP S2P - PUSHP S0P - PUSHP S1P - bsr divmodsi4 - mov S0,A0 - mov S1,A1 - bra reti -#else - bsr ___udivsi3 - mov.l er3,er0 - rts -#endif - - .global ___divsi3 -___divsi3: -#ifdef __H8300__ - PUSHP S2P - PUSHP S0P - PUSHP S1P - jsr divnorm - jsr divmodsi4 -#else - PUSHP S2P - jsr divnorm - bsr ___udivsi3 -#endif - - ; examine what the sign should be -exitdiv: - btst #3,S2L - beq reti - - ; should be -ve -#ifdef __H8300__ - not A0H - not A1H - not A0L - not A1L - - add #1,A1L - addx #0,A1H - addx #0,A0L - addx #0,A0H -#else /* __H8300H__ */ - neg.l A0P -#endif - -reti: -#ifdef __H8300__ - POPP S1P - POPP S0P -#endif - POPP S2P - rts - - ; takes A0/A1 numerator (A0P for H8/300H) - ; A2/A3 denominator (A1P for H8/300H) - ; returns A0/A1 quotient (A0P for H8/300H) - ; S0/S1 remainder (S0P for H8/300H) - ; trashes S2H - -#ifdef __H8300__ - -divmodsi4: - sub.w S0,S0 ; zero play area - mov.w S0,S1 - mov.b A2H,S2H - or A2L,S2H - or A3H,S2H - bne DenHighNonZero - mov.b A0H,A0H - bne NumByte0Zero - mov.b A0L,A0L - bne NumByte1Zero - mov.b A1H,A1H - bne NumByte2Zero - bra NumByte3Zero -NumByte0Zero: - mov.b A0H,S1L - divxu A3L,S1 - mov.b S1L,A0H -NumByte1Zero: - mov.b A0L,S1L - divxu A3L,S1 - mov.b S1L,A0L -NumByte2Zero: - mov.b A1H,S1L - divxu A3L,S1 - mov.b S1L,A1H -NumByte3Zero: - mov.b A1L,S1L - divxu A3L,S1 - mov.b S1L,A1L - - mov.b S1H,S1L - mov.b #0x0,S1H - rts - -; have to do the divide by shift and test -DenHighNonZero: - mov.b A0H,S1L - mov.b A0L,A0H - mov.b A1H,A0L - mov.b A1L,A1H - - mov.b #0,A1L - mov.b #24,S2H ; only do 24 iterations - -nextbit: - add.w A1,A1 ; double the answer guess - rotxl A0L - rotxl A0H - - rotxl S1L ; double remainder - rotxl S1H - rotxl S0L - rotxl S0H - sub.w A3,S1 ; does it all fit - subx A2L,S0L - subx A2H,S0H - bhs setone - - add.w A3,S1 ; no, restore mistake - addx A2L,S0L - addx A2H,S0H - - dec S2H - bne nextbit - rts - -setone: - inc A1L - dec S2H - bne nextbit - rts - -#else /* __H8300H__ */ - - ;; This function also computes the remainder and stores it in er3. - .global ___udivsi3 -___udivsi3: - mov.w A1E,A1E ; denominator top word 0? - bne DenHighNonZero - - ; do it the easy way, see page 107 in manual - mov.w A0E,A2 - extu.l A2P - divxu.w A1,A2P - mov.w A2E,A0E - divxu.w A1,A0P - mov.w A0E,A3 - mov.w A2,A0E - extu.l A3P - rts - - ; er0 = er0 / er1 - ; er3 = er0 % er1 - ; trashes er1 er2 - ; expects er1 >= 2^16 -DenHighNonZero: - mov.l er0,er3 - mov.l er1,er2 -#ifdef __H8300H__ -divmod_L21: - shlr.l er0 - shlr.l er2 ; make divisor < 2^16 - mov.w e2,e2 - bne divmod_L21 -#else - shlr.l #2,er2 ; make divisor < 2^16 - mov.w e2,e2 - beq divmod_L22A -divmod_L21: - shlr.l #2,er0 -divmod_L22: - shlr.l #2,er2 ; make divisor < 2^16 - mov.w e2,e2 - bne divmod_L21 -divmod_L22A: - rotxl.w r2 - bcs divmod_L23 - shlr.l er0 - bra divmod_L24 -divmod_L23: - rotxr.w r2 - shlr.l #2,er0 -divmod_L24: -#endif - ;; At this point, - ;; er0 contains shifted dividend - ;; er1 contains divisor - ;; er2 contains shifted divisor - ;; er3 contains dividend, later remainder - divxu.w r2,er0 ; r0 now contains the approximate quotient (AQ) - extu.l er0 - beq divmod_L25 - subs #1,er0 ; er0 = AQ - 1 - mov.w e1,r2 - mulxu.w r0,er2 ; er2 = upper (AQ - 1) * divisor - sub.w r2,e3 ; dividend - 65536 * er2 - mov.w r1,r2 - mulxu.w r0,er2 ; compute er3 = remainder (tentative) - sub.l er2,er3 ; er3 = dividend - (AQ - 1) * divisor -divmod_L25: - cmp.l er1,er3 ; is divisor < remainder? - blo divmod_L26 - adds #1,er0 - sub.l er1,er3 ; correct the remainder -divmod_L26: - rts - -#endif -#endif /* L_divsi3 */ - -#ifdef L_mulhi3 - -;; HImode multiply. -; The H8/300 only has an 8*8->16 multiply. -; The answer is the same as: -; -; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256 -; (we can ignore A1.h * A0.h cause that will all off the top) -; A0 in -; A1 in -; A0 answer - -#ifdef __H8300__ - .section .text - .align 2 - .global ___mulhi3 -___mulhi3: - mov.b A1L,A2L ; A2l gets srcb.l - mulxu A0L,A2 ; A2 gets first sub product - - mov.b A0H,A3L ; prepare for - mulxu A1L,A3 ; second sub product - - add.b A3L,A2H ; sum first two terms - - mov.b A1H,A3L ; third sub product - mulxu A0L,A3 - - add.b A3L,A2H ; almost there - mov.w A2,A0 ; that is - rts - -#endif -#endif /* L_mulhi3 */ - -#ifdef L_mulsi3 - -;; SImode multiply. -;; -;; I think that shift and add may be sufficient for this. Using the -;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way -;; the inner loop uses maybe 20 cycles + overhead, but terminates -;; quickly on small args. -;; -;; A0/A1 src_a -;; A2/A3 src_b -;; -;; while (a) -;; { -;; if (a & 1) -;; r += b; -;; a >>= 1; -;; b <<= 1; -;; } - - .section .text - .align 2 - -#ifdef __H8300__ - - .global ___mulsi3 -___mulsi3: - PUSHP S0P - PUSHP S1P - - sub.w S0,S0 - sub.w S1,S1 - - ; while (a) -_top: mov.w A0,A0 - bne _more - mov.w A1,A1 - beq _done -_more: ; if (a & 1) - bld #0,A1L - bcc _nobit - ; r += b - add.w A3,S1 - addx A2L,S0L - addx A2H,S0H -_nobit: - ; a >>= 1 - shlr A0H - rotxr A0L - rotxr A1H - rotxr A1L - - ; b <<= 1 - add.w A3,A3 - addx A2L,A2L - addx A2H,A2H - bra _top - -_done: - mov.w S0,A0 - mov.w S1,A1 - POPP S1P - POPP S0P - rts - -#else /* __H8300H__ */ - -; -; mulsi3 for H8/300H - based on Renesas SH implementation -; -; by Toshiyasu Morita -; -; Old code: -; -; 16b * 16b = 372 states (worst case) -; 32b * 32b = 724 states (worst case) -; -; New code: -; -; 16b * 16b = 48 states -; 16b * 32b = 72 states -; 32b * 32b = 92 states -; - - .global ___mulsi3 -___mulsi3: - mov.w r1,r2 ; ( 2 states) b * d - mulxu r0,er2 ; (22 states) - - mov.w e0,r3 ; ( 2 states) a * d - beq L_skip1 ; ( 4 states) - mulxu r1,er3 ; (22 states) - add.w r3,e2 ; ( 2 states) - -L_skip1: - mov.w e1,r3 ; ( 2 states) c * b - beq L_skip2 ; ( 4 states) - mulxu r0,er3 ; (22 states) - add.w r3,e2 ; ( 2 states) - -L_skip2: - mov.l er2,er0 ; ( 2 states) - rts ; (10 states) - -#endif -#endif /* L_mulsi3 */ -#ifdef L_fixunssfsi_asm -/* For the h8300 we use asm to save some bytes, to - allow more programs to fit into the tiny address - space. For the H8/300H and H8S, the C version is good enough. */ -#ifdef __H8300__ -/* We still treat NANs different than libgcc2.c, but then, the - behavior is undefined anyways. */ - .global ___fixunssfsi -___fixunssfsi: - cmp.b #0x4f,r0h - bge Large_num - jmp @___fixsfsi -Large_num: - bhi L_huge_num - xor.b #0x80,A0L - bmi L_shift8 -L_huge_num: - mov.w #65535,A0 - mov.w A0,A1 - rts -L_shift8: - mov.b A0L,A0H - mov.b A1H,A0L - mov.b A1L,A1H - mov.b #0,A1L - rts -#endif -#endif /* L_fixunssfsi_asm */ diff --git a/gcc/config/h8300/t-h8300 b/gcc/config/h8300/t-h8300 index 616849007b4..7083c673acf 100644 --- a/gcc/config/h8300/t-h8300 +++ b/gcc/config/h8300/t-h8300 @@ -17,10 +17,6 @@ # along with GCC; see the file COPYING3. If not see # . -LIB1ASMSRC = h8300/lib1funcs.asm -LIB1ASMFUNCS = _cmpsi2 _ucmpsi2 _divhi3 _divsi3 _mulhi3 _mulsi3 \ - _fixunssfsi_asm - LIB2FUNCS_EXTRA = \ $(srcdir)/config/h8300/clzhi2.c \ $(srcdir)/config/h8300/ctzhi2.c \ diff --git a/gcc/config/i386/cygwin.asm b/gcc/config/i386/cygwin.asm deleted file mode 100644 index 8f9c486850e..00000000000 --- a/gcc/config/i386/cygwin.asm +++ /dev/null @@ -1,188 +0,0 @@ -/* stuff needed for libgcc on win32. - * - * Copyright (C) 1996, 1998, 2001, 2003, 2008, 2009, 2010 - * Free Software Foundation, Inc. - * Written By Steve Chamberlain - * - * This file is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) any - * later version. - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * . - */ - -#include "auto-host.h" - -#ifdef HAVE_GAS_CFI_SECTIONS_DIRECTIVE - .cfi_sections .debug_frame -# define cfi_startproc() .cfi_startproc -# define cfi_endproc() .cfi_endproc -# define cfi_adjust_cfa_offset(X) .cfi_adjust_cfa_offset X -# define cfi_def_cfa_register(X) .cfi_def_cfa_register X -# define cfi_register(D,S) .cfi_register D, S -# ifdef _WIN64 -# define cfi_push(X) .cfi_adjust_cfa_offset 8; .cfi_rel_offset X, 0 -# define cfi_pop(X) .cfi_adjust_cfa_offset -8; .cfi_restore X -# else -# define cfi_push(X) .cfi_adjust_cfa_offset 4; .cfi_rel_offset X, 0 -# define cfi_pop(X) .cfi_adjust_cfa_offset -4; .cfi_restore X -# endif -#else -# define cfi_startproc() -# define cfi_endproc() -# define cfi_adjust_cfa_offset(X) -# define cfi_def_cfa_register(X) -# define cfi_register(D,S) -# define cfi_push(X) -# define cfi_pop(X) -#endif /* HAVE_GAS_CFI_SECTIONS_DIRECTIVE */ - -#ifdef L_chkstk -/* Function prologue calls __chkstk to probe the stack when allocating more - than CHECK_STACK_LIMIT bytes in one go. Touching the stack at 4K - increments is necessary to ensure that the guard pages used - by the OS virtual memory manger are allocated in correct sequence. */ - - .global ___chkstk - .global __alloca -#ifdef _WIN64 -/* __alloca is a normal function call, which uses %rcx as the argument. */ - cfi_startproc() -__alloca: - movq %rcx, %rax - /* FALLTHRU */ - -/* ___chkstk is a *special* function call, which uses %rax as the argument. - We avoid clobbering the 4 integer argument registers, %rcx, %rdx, - %r8 and %r9, which leaves us with %rax, %r10, and %r11 to use. */ - .align 4 -___chkstk: - popq %r11 /* pop return address */ - cfi_adjust_cfa_offset(-8) /* indicate return address in r11 */ - cfi_register(%rip, %r11) - movq %rsp, %r10 - cmpq $0x1000, %rax /* > 4k ?*/ - jb 2f - -1: subq $0x1000, %r10 /* yes, move pointer down 4k*/ - orl $0x0, (%r10) /* probe there */ - subq $0x1000, %rax /* decrement count */ - cmpq $0x1000, %rax - ja 1b /* and do it again */ - -2: subq %rax, %r10 - movq %rsp, %rax /* hold CFA until return */ - cfi_def_cfa_register(%rax) - orl $0x0, (%r10) /* less than 4k, just peek here */ - movq %r10, %rsp /* decrement stack */ - - /* Push the return value back. Doing this instead of just - jumping to %r11 preserves the cached call-return stack - used by most modern processors. */ - pushq %r11 - ret - cfi_endproc() -#else - cfi_startproc() -___chkstk: -__alloca: - pushl %ecx /* save temp */ - cfi_push(%eax) - leal 8(%esp), %ecx /* point past return addr */ - cmpl $0x1000, %eax /* > 4k ?*/ - jb 2f - -1: subl $0x1000, %ecx /* yes, move pointer down 4k*/ - orl $0x0, (%ecx) /* probe there */ - subl $0x1000, %eax /* decrement count */ - cmpl $0x1000, %eax - ja 1b /* and do it again */ - -2: subl %eax, %ecx - orl $0x0, (%ecx) /* less than 4k, just peek here */ - movl %esp, %eax /* save current stack pointer */ - cfi_def_cfa_register(%eax) - movl %ecx, %esp /* decrement stack */ - movl (%eax), %ecx /* recover saved temp */ - - /* Copy the return register. Doing this instead of just jumping to - the address preserves the cached call-return stack used by most - modern processors. */ - pushl 4(%eax) - ret - cfi_endproc() -#endif /* _WIN64 */ -#endif /* L_chkstk */ - -#ifdef L_chkstk_ms -/* ___chkstk_ms is a *special* function call, which uses %rax as the argument. - We avoid clobbering any registers. Unlike ___chkstk, it just probes the - stack and does no stack allocation. */ - .global ___chkstk_ms -#ifdef _WIN64 - cfi_startproc() -___chkstk_ms: - pushq %rcx /* save temps */ - cfi_push(%rcx) - pushq %rax - cfi_push(%rax) - cmpq $0x1000, %rax /* > 4k ?*/ - leaq 24(%rsp), %rcx /* point past return addr */ - jb 2f - -1: subq $0x1000, %rcx /* yes, move pointer down 4k */ - orq $0x0, (%rcx) /* probe there */ - subq $0x1000, %rax /* decrement count */ - cmpq $0x1000, %rax - ja 1b /* and do it again */ - -2: subq %rax, %rcx - orq $0x0, (%rcx) /* less than 4k, just peek here */ - - popq %rax - cfi_pop(%rax) - popq %rcx - cfi_pop(%rcx) - ret - cfi_endproc() -#else - cfi_startproc() -___chkstk_ms: - pushl %ecx /* save temp */ - cfi_push(%ecx) - pushl %eax - cfi_push(%eax) - cmpl $0x1000, %eax /* > 4k ?*/ - leal 12(%esp), %ecx /* point past return addr */ - jb 2f - -1: subl $0x1000, %ecx /* yes, move pointer down 4k*/ - orl $0x0, (%ecx) /* probe there */ - subl $0x1000, %eax /* decrement count */ - cmpl $0x1000, %eax - ja 1b /* and do it again */ - -2: subl %eax, %ecx - orl $0x0, (%ecx) /* less than 4k, just peek here */ - - popl %eax - cfi_pop(%eax) - popl %ecx - cfi_pop(%ecx) - ret - cfi_endproc() -#endif /* _WIN64 */ -#endif /* L_chkstk_ms */ diff --git a/gcc/config/i386/t-cygming b/gcc/config/i386/t-cygming index 242d7f27f65..3e7f7cdd036 100644 --- a/gcc/config/i386/t-cygming +++ b/gcc/config/i386/t-cygming @@ -17,9 +17,6 @@ # along with GCC; see the file COPYING3. If not see # . -LIB1ASMSRC = i386/cygwin.asm -LIB1ASMFUNCS = _chkstk _chkstk_ms - # cygwin and mingw always have a limits.h, but, depending upon how we are # doing the build, it may not be installed yet. LIMITS_H_TEST = true diff --git a/gcc/config/i386/t-interix b/gcc/config/i386/t-interix index e7b016f1e7a..09c9127f6af 100644 --- a/gcc/config/i386/t-interix +++ b/gcc/config/i386/t-interix @@ -1,6 +1,3 @@ -LIB1ASMSRC = i386/cygwin.asm -LIB1ASMFUNCS = _chkstk _chkstk_ms - winnt.o: $(srcdir)/config/i386/winnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \ $(TM_P_H) $(HASHTAB_H) $(GGC_H) diff --git a/gcc/config/ia64/lib1funcs.asm b/gcc/config/ia64/lib1funcs.asm deleted file mode 100644 index b7eaa6eca3c..00000000000 --- a/gcc/config/ia64/lib1funcs.asm +++ /dev/null @@ -1,795 +0,0 @@ -/* Copyright (C) 2000, 2001, 2003, 2005, 2009 Free Software Foundation, Inc. - Contributed by James E. Wilson . - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#ifdef L__divxf3 -// Compute a 80-bit IEEE double-extended quotient. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// farg0 holds the dividend. farg1 holds the divisor. -// -// __divtf3 is an alternate symbol name for backward compatibility. - - .text - .align 16 - .global __divxf3 - .proc __divxf3 -__divxf3: -#ifdef SHARED - .global __divtf3 -__divtf3: -#endif - cmp.eq p7, p0 = r0, r0 - frcpa.s0 f10, p6 = farg0, farg1 - ;; -(p6) cmp.ne p7, p0 = r0, r0 - .pred.rel.mutex p6, p7 -(p6) fnma.s1 f11 = farg1, f10, f1 -(p6) fma.s1 f12 = farg0, f10, f0 - ;; -(p6) fma.s1 f13 = f11, f11, f0 -(p6) fma.s1 f14 = f11, f11, f11 - ;; -(p6) fma.s1 f11 = f13, f13, f11 -(p6) fma.s1 f13 = f14, f10, f10 - ;; -(p6) fma.s1 f10 = f13, f11, f10 -(p6) fnma.s1 f11 = farg1, f12, farg0 - ;; -(p6) fma.s1 f11 = f11, f10, f12 -(p6) fnma.s1 f12 = farg1, f10, f1 - ;; -(p6) fma.s1 f10 = f12, f10, f10 -(p6) fnma.s1 f12 = farg1, f11, farg0 - ;; -(p6) fma.s0 fret0 = f12, f10, f11 -(p7) mov fret0 = f10 - br.ret.sptk rp - .endp __divxf3 -#endif - -#ifdef L__divdf3 -// Compute a 64-bit IEEE double quotient. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// farg0 holds the dividend. farg1 holds the divisor. - - .text - .align 16 - .global __divdf3 - .proc __divdf3 -__divdf3: - cmp.eq p7, p0 = r0, r0 - frcpa.s0 f10, p6 = farg0, farg1 - ;; -(p6) cmp.ne p7, p0 = r0, r0 - .pred.rel.mutex p6, p7 -(p6) fmpy.s1 f11 = farg0, f10 -(p6) fnma.s1 f12 = farg1, f10, f1 - ;; -(p6) fma.s1 f11 = f12, f11, f11 -(p6) fmpy.s1 f13 = f12, f12 - ;; -(p6) fma.s1 f10 = f12, f10, f10 -(p6) fma.s1 f11 = f13, f11, f11 - ;; -(p6) fmpy.s1 f12 = f13, f13 -(p6) fma.s1 f10 = f13, f10, f10 - ;; -(p6) fma.d.s1 f11 = f12, f11, f11 -(p6) fma.s1 f10 = f12, f10, f10 - ;; -(p6) fnma.d.s1 f8 = farg1, f11, farg0 - ;; -(p6) fma.d fret0 = f8, f10, f11 -(p7) mov fret0 = f10 - br.ret.sptk rp - ;; - .endp __divdf3 -#endif - -#ifdef L__divsf3 -// Compute a 32-bit IEEE float quotient. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// farg0 holds the dividend. farg1 holds the divisor. - - .text - .align 16 - .global __divsf3 - .proc __divsf3 -__divsf3: - cmp.eq p7, p0 = r0, r0 - frcpa.s0 f10, p6 = farg0, farg1 - ;; -(p6) cmp.ne p7, p0 = r0, r0 - .pred.rel.mutex p6, p7 -(p6) fmpy.s1 f8 = farg0, f10 -(p6) fnma.s1 f9 = farg1, f10, f1 - ;; -(p6) fma.s1 f8 = f9, f8, f8 -(p6) fmpy.s1 f9 = f9, f9 - ;; -(p6) fma.s1 f8 = f9, f8, f8 -(p6) fmpy.s1 f9 = f9, f9 - ;; -(p6) fma.d.s1 f10 = f9, f8, f8 - ;; -(p6) fnorm.s.s0 fret0 = f10 -(p7) mov fret0 = f10 - br.ret.sptk rp - ;; - .endp __divsf3 -#endif - -#ifdef L__divdi3 -// Compute a 64-bit integer quotient. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// in0 holds the dividend. in1 holds the divisor. - - .text - .align 16 - .global __divdi3 - .proc __divdi3 -__divdi3: - .regstk 2,0,0,0 - // Transfer inputs to FP registers. - setf.sig f8 = in0 - setf.sig f9 = in1 - // Check divide by zero. - cmp.ne.unc p0,p7=0,in1 - ;; - // Convert the inputs to FP, so that they won't be treated as unsigned. - fcvt.xf f8 = f8 - fcvt.xf f9 = f9 -(p7) break 1 - ;; - // Compute the reciprocal approximation. - frcpa.s1 f10, p6 = f8, f9 - ;; - // 3 Newton-Raphson iterations. -(p6) fnma.s1 f11 = f9, f10, f1 -(p6) fmpy.s1 f12 = f8, f10 - ;; -(p6) fmpy.s1 f13 = f11, f11 -(p6) fma.s1 f12 = f11, f12, f12 - ;; -(p6) fma.s1 f10 = f11, f10, f10 -(p6) fma.s1 f11 = f13, f12, f12 - ;; -(p6) fma.s1 f10 = f13, f10, f10 -(p6) fnma.s1 f12 = f9, f11, f8 - ;; -(p6) fma.s1 f10 = f12, f10, f11 - ;; - // Round quotient to an integer. - fcvt.fx.trunc.s1 f10 = f10 - ;; - // Transfer result to GP registers. - getf.sig ret0 = f10 - br.ret.sptk rp - ;; - .endp __divdi3 -#endif - -#ifdef L__moddi3 -// Compute a 64-bit integer modulus. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// in0 holds the dividend (a). in1 holds the divisor (b). - - .text - .align 16 - .global __moddi3 - .proc __moddi3 -__moddi3: - .regstk 2,0,0,0 - // Transfer inputs to FP registers. - setf.sig f14 = in0 - setf.sig f9 = in1 - // Check divide by zero. - cmp.ne.unc p0,p7=0,in1 - ;; - // Convert the inputs to FP, so that they won't be treated as unsigned. - fcvt.xf f8 = f14 - fcvt.xf f9 = f9 -(p7) break 1 - ;; - // Compute the reciprocal approximation. - frcpa.s1 f10, p6 = f8, f9 - ;; - // 3 Newton-Raphson iterations. -(p6) fmpy.s1 f12 = f8, f10 -(p6) fnma.s1 f11 = f9, f10, f1 - ;; -(p6) fma.s1 f12 = f11, f12, f12 -(p6) fmpy.s1 f13 = f11, f11 - ;; -(p6) fma.s1 f10 = f11, f10, f10 -(p6) fma.s1 f11 = f13, f12, f12 - ;; - sub in1 = r0, in1 -(p6) fma.s1 f10 = f13, f10, f10 -(p6) fnma.s1 f12 = f9, f11, f8 - ;; - setf.sig f9 = in1 -(p6) fma.s1 f10 = f12, f10, f11 - ;; - fcvt.fx.trunc.s1 f10 = f10 - ;; - // r = q * (-b) + a - xma.l f10 = f10, f9, f14 - ;; - // Transfer result to GP registers. - getf.sig ret0 = f10 - br.ret.sptk rp - ;; - .endp __moddi3 -#endif - -#ifdef L__udivdi3 -// Compute a 64-bit unsigned integer quotient. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// in0 holds the dividend. in1 holds the divisor. - - .text - .align 16 - .global __udivdi3 - .proc __udivdi3 -__udivdi3: - .regstk 2,0,0,0 - // Transfer inputs to FP registers. - setf.sig f8 = in0 - setf.sig f9 = in1 - // Check divide by zero. - cmp.ne.unc p0,p7=0,in1 - ;; - // Convert the inputs to FP, to avoid FP software-assist faults. - fcvt.xuf.s1 f8 = f8 - fcvt.xuf.s1 f9 = f9 -(p7) break 1 - ;; - // Compute the reciprocal approximation. - frcpa.s1 f10, p6 = f8, f9 - ;; - // 3 Newton-Raphson iterations. -(p6) fnma.s1 f11 = f9, f10, f1 -(p6) fmpy.s1 f12 = f8, f10 - ;; -(p6) fmpy.s1 f13 = f11, f11 -(p6) fma.s1 f12 = f11, f12, f12 - ;; -(p6) fma.s1 f10 = f11, f10, f10 -(p6) fma.s1 f11 = f13, f12, f12 - ;; -(p6) fma.s1 f10 = f13, f10, f10 -(p6) fnma.s1 f12 = f9, f11, f8 - ;; -(p6) fma.s1 f10 = f12, f10, f11 - ;; - // Round quotient to an unsigned integer. - fcvt.fxu.trunc.s1 f10 = f10 - ;; - // Transfer result to GP registers. - getf.sig ret0 = f10 - br.ret.sptk rp - ;; - .endp __udivdi3 -#endif - -#ifdef L__umoddi3 -// Compute a 64-bit unsigned integer modulus. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// in0 holds the dividend (a). in1 holds the divisor (b). - - .text - .align 16 - .global __umoddi3 - .proc __umoddi3 -__umoddi3: - .regstk 2,0,0,0 - // Transfer inputs to FP registers. - setf.sig f14 = in0 - setf.sig f9 = in1 - // Check divide by zero. - cmp.ne.unc p0,p7=0,in1 - ;; - // Convert the inputs to FP, to avoid FP software assist faults. - fcvt.xuf.s1 f8 = f14 - fcvt.xuf.s1 f9 = f9 -(p7) break 1; - ;; - // Compute the reciprocal approximation. - frcpa.s1 f10, p6 = f8, f9 - ;; - // 3 Newton-Raphson iterations. -(p6) fmpy.s1 f12 = f8, f10 -(p6) fnma.s1 f11 = f9, f10, f1 - ;; -(p6) fma.s1 f12 = f11, f12, f12 -(p6) fmpy.s1 f13 = f11, f11 - ;; -(p6) fma.s1 f10 = f11, f10, f10 -(p6) fma.s1 f11 = f13, f12, f12 - ;; - sub in1 = r0, in1 -(p6) fma.s1 f10 = f13, f10, f10 -(p6) fnma.s1 f12 = f9, f11, f8 - ;; - setf.sig f9 = in1 -(p6) fma.s1 f10 = f12, f10, f11 - ;; - // Round quotient to an unsigned integer. - fcvt.fxu.trunc.s1 f10 = f10 - ;; - // r = q * (-b) + a - xma.l f10 = f10, f9, f14 - ;; - // Transfer result to GP registers. - getf.sig ret0 = f10 - br.ret.sptk rp - ;; - .endp __umoddi3 -#endif - -#ifdef L__divsi3 -// Compute a 32-bit integer quotient. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// in0 holds the dividend. in1 holds the divisor. - - .text - .align 16 - .global __divsi3 - .proc __divsi3 -__divsi3: - .regstk 2,0,0,0 - // Check divide by zero. - cmp.ne.unc p0,p7=0,in1 - sxt4 in0 = in0 - sxt4 in1 = in1 - ;; - setf.sig f8 = in0 - setf.sig f9 = in1 -(p7) break 1 - ;; - mov r2 = 0x0ffdd - fcvt.xf f8 = f8 - fcvt.xf f9 = f9 - ;; - setf.exp f11 = r2 - frcpa.s1 f10, p6 = f8, f9 - ;; -(p6) fmpy.s1 f8 = f8, f10 -(p6) fnma.s1 f9 = f9, f10, f1 - ;; -(p6) fma.s1 f8 = f9, f8, f8 -(p6) fma.s1 f9 = f9, f9, f11 - ;; -(p6) fma.s1 f10 = f9, f8, f8 - ;; - fcvt.fx.trunc.s1 f10 = f10 - ;; - getf.sig ret0 = f10 - br.ret.sptk rp - ;; - .endp __divsi3 -#endif - -#ifdef L__modsi3 -// Compute a 32-bit integer modulus. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// in0 holds the dividend. in1 holds the divisor. - - .text - .align 16 - .global __modsi3 - .proc __modsi3 -__modsi3: - .regstk 2,0,0,0 - mov r2 = 0x0ffdd - sxt4 in0 = in0 - sxt4 in1 = in1 - ;; - setf.sig f13 = r32 - setf.sig f9 = r33 - // Check divide by zero. - cmp.ne.unc p0,p7=0,in1 - ;; - sub in1 = r0, in1 - fcvt.xf f8 = f13 - fcvt.xf f9 = f9 - ;; - setf.exp f11 = r2 - frcpa.s1 f10, p6 = f8, f9 -(p7) break 1 - ;; -(p6) fmpy.s1 f12 = f8, f10 -(p6) fnma.s1 f10 = f9, f10, f1 - ;; - setf.sig f9 = in1 -(p6) fma.s1 f12 = f10, f12, f12 -(p6) fma.s1 f10 = f10, f10, f11 - ;; -(p6) fma.s1 f10 = f10, f12, f12 - ;; - fcvt.fx.trunc.s1 f10 = f10 - ;; - xma.l f10 = f10, f9, f13 - ;; - getf.sig ret0 = f10 - br.ret.sptk rp - ;; - .endp __modsi3 -#endif - -#ifdef L__udivsi3 -// Compute a 32-bit unsigned integer quotient. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// in0 holds the dividend. in1 holds the divisor. - - .text - .align 16 - .global __udivsi3 - .proc __udivsi3 -__udivsi3: - .regstk 2,0,0,0 - mov r2 = 0x0ffdd - zxt4 in0 = in0 - zxt4 in1 = in1 - ;; - setf.sig f8 = in0 - setf.sig f9 = in1 - // Check divide by zero. - cmp.ne.unc p0,p7=0,in1 - ;; - fcvt.xf f8 = f8 - fcvt.xf f9 = f9 -(p7) break 1 - ;; - setf.exp f11 = r2 - frcpa.s1 f10, p6 = f8, f9 - ;; -(p6) fmpy.s1 f8 = f8, f10 -(p6) fnma.s1 f9 = f9, f10, f1 - ;; -(p6) fma.s1 f8 = f9, f8, f8 -(p6) fma.s1 f9 = f9, f9, f11 - ;; -(p6) fma.s1 f10 = f9, f8, f8 - ;; - fcvt.fxu.trunc.s1 f10 = f10 - ;; - getf.sig ret0 = f10 - br.ret.sptk rp - ;; - .endp __udivsi3 -#endif - -#ifdef L__umodsi3 -// Compute a 32-bit unsigned integer modulus. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// in0 holds the dividend. in1 holds the divisor. - - .text - .align 16 - .global __umodsi3 - .proc __umodsi3 -__umodsi3: - .regstk 2,0,0,0 - mov r2 = 0x0ffdd - zxt4 in0 = in0 - zxt4 in1 = in1 - ;; - setf.sig f13 = in0 - setf.sig f9 = in1 - // Check divide by zero. - cmp.ne.unc p0,p7=0,in1 - ;; - sub in1 = r0, in1 - fcvt.xf f8 = f13 - fcvt.xf f9 = f9 - ;; - setf.exp f11 = r2 - frcpa.s1 f10, p6 = f8, f9 -(p7) break 1; - ;; -(p6) fmpy.s1 f12 = f8, f10 -(p6) fnma.s1 f10 = f9, f10, f1 - ;; - setf.sig f9 = in1 -(p6) fma.s1 f12 = f10, f12, f12 -(p6) fma.s1 f10 = f10, f10, f11 - ;; -(p6) fma.s1 f10 = f10, f12, f12 - ;; - fcvt.fxu.trunc.s1 f10 = f10 - ;; - xma.l f10 = f10, f9, f13 - ;; - getf.sig ret0 = f10 - br.ret.sptk rp - ;; - .endp __umodsi3 -#endif - -#ifdef L__save_stack_nonlocal -// Notes on save/restore stack nonlocal: We read ar.bsp but write -// ar.bspstore. This is because ar.bsp can be read at all times -// (independent of the RSE mode) but since it's read-only we need to -// restore the value via ar.bspstore. This is OK because -// ar.bsp==ar.bspstore after executing "flushrs". - -// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer) - - .text - .align 16 - .global __ia64_save_stack_nonlocal - .proc __ia64_save_stack_nonlocal -__ia64_save_stack_nonlocal: - { .mmf - alloc r18 = ar.pfs, 2, 0, 0, 0 - mov r19 = ar.rsc - ;; - } - { .mmi - flushrs - st8 [in0] = in1, 24 - and r19 = 0x1c, r19 - ;; - } - { .mmi - st8 [in0] = r18, -16 - mov ar.rsc = r19 - or r19 = 0x3, r19 - ;; - } - { .mmi - mov r16 = ar.bsp - mov r17 = ar.rnat - adds r2 = 8, in0 - ;; - } - { .mmi - st8 [in0] = r16 - st8 [r2] = r17 - } - { .mib - mov ar.rsc = r19 - br.ret.sptk.few rp - ;; - } - .endp __ia64_save_stack_nonlocal -#endif - -#ifdef L__nonlocal_goto -// void __ia64_nonlocal_goto(void *target_label, void *save_area, -// void *static_chain); - - .text - .align 16 - .global __ia64_nonlocal_goto - .proc __ia64_nonlocal_goto -__ia64_nonlocal_goto: - { .mmi - alloc r20 = ar.pfs, 3, 0, 0, 0 - ld8 r12 = [in1], 8 - mov.ret.sptk rp = in0, .L0 - ;; - } - { .mmf - ld8 r16 = [in1], 8 - mov r19 = ar.rsc - ;; - } - { .mmi - flushrs - ld8 r17 = [in1], 8 - and r19 = 0x1c, r19 - ;; - } - { .mmi - ld8 r18 = [in1] - mov ar.rsc = r19 - or r19 = 0x3, r19 - ;; - } - { .mmi - mov ar.bspstore = r16 - ;; - mov ar.rnat = r17 - ;; - } - { .mmi - loadrs - invala - mov r15 = in2 - ;; - } -.L0: { .mib - mov ar.rsc = r19 - mov ar.pfs = r18 - br.ret.sptk.few rp - ;; - } - .endp __ia64_nonlocal_goto -#endif - -#ifdef L__restore_stack_nonlocal -// This is mostly the same as nonlocal_goto above. -// ??? This has not been tested yet. - -// void __ia64_restore_stack_nonlocal(void *save_area) - - .text - .align 16 - .global __ia64_restore_stack_nonlocal - .proc __ia64_restore_stack_nonlocal -__ia64_restore_stack_nonlocal: - { .mmf - alloc r20 = ar.pfs, 4, 0, 0, 0 - ld8 r12 = [in0], 8 - ;; - } - { .mmb - ld8 r16=[in0], 8 - mov r19 = ar.rsc - ;; - } - { .mmi - flushrs - ld8 r17 = [in0], 8 - and r19 = 0x1c, r19 - ;; - } - { .mmf - ld8 r18 = [in0] - mov ar.rsc = r19 - ;; - } - { .mmi - mov ar.bspstore = r16 - ;; - mov ar.rnat = r17 - or r19 = 0x3, r19 - ;; - } - { .mmf - loadrs - invala - ;; - } -.L0: { .mib - mov ar.rsc = r19 - mov ar.pfs = r18 - br.ret.sptk.few rp - ;; - } - .endp __ia64_restore_stack_nonlocal -#endif - -#ifdef L__trampoline -// Implement the nested function trampoline. This is out of line -// so that we don't have to bother with flushing the icache, as -// well as making the on-stack trampoline smaller. -// -// The trampoline has the following form: -// -// +-------------------+ > -// TRAMP: | __ia64_trampoline | | -// +-------------------+ > fake function descriptor -// | TRAMP+16 | | -// +-------------------+ > -// | target descriptor | -// +-------------------+ -// | static link | -// +-------------------+ - - .text - .align 16 - .global __ia64_trampoline - .proc __ia64_trampoline -__ia64_trampoline: - { .mmi - ld8 r2 = [r1], 8 - ;; - ld8 r15 = [r1] - } - { .mmi - ld8 r3 = [r2], 8 - ;; - ld8 r1 = [r2] - mov b6 = r3 - } - { .bbb - br.sptk.many b6 - ;; - } - .endp __ia64_trampoline -#endif - -#ifdef SHARED -// Thunks for backward compatibility. -#ifdef L_fixtfdi - .text - .align 16 - .global __fixtfti - .proc __fixtfti -__fixtfti: - { .bbb - br.sptk.many __fixxfti - ;; - } - .endp __fixtfti -#endif -#ifdef L_fixunstfdi - .align 16 - .global __fixunstfti - .proc __fixunstfti -__fixunstfti: - { .bbb - br.sptk.many __fixunsxfti - ;; - } - .endp __fixunstfti -#endif -#ifdef L_floatditf - .align 16 - .global __floattitf - .proc __floattitf -__floattitf: - { .bbb - br.sptk.many __floattixf - ;; - } - .endp __floattitf -#endif -#endif diff --git a/gcc/config/ia64/t-hpux b/gcc/config/ia64/t-hpux index e1554861d18..23691f3856c 100644 --- a/gcc/config/ia64/t-hpux +++ b/gcc/config/ia64/t-hpux @@ -26,12 +26,6 @@ MULTILIB_OPTIONS = milp32/mlp64 MULTILIB_DIRNAMES = hpux32 hpux64 MULTILIB_MATCHES = -# On HP-UX we do not want _fixtfdi, _fixunstfdi, or _floatditf from -# LIB1ASMSRC. These functions map the 128 bit conversion function names -# to 80 bit conversions and were done for Linux backwards compatibility. - -LIB1ASMFUNCS := $(filter-out _fixtfdi _fixunstfdi _floatditf,$(LIB1ASMFUNCS)) - # Support routines for HP-UX 128 bit floats. LIB2FUNCS_EXTRA=quadlib.c $(srcdir)/config/floatunsitf.c @@ -39,12 +33,6 @@ LIB2FUNCS_EXTRA=quadlib.c $(srcdir)/config/floatunsitf.c quadlib.c: $(srcdir)/config/ia64/quadlib.c cat $(srcdir)/config/ia64/quadlib.c > quadlib.c -# We get an undefined main when building a cross compiler because our -# linkspec has "-u main" and we want that for linking but it makes -# LIBGCC1_TEST fail because it uses -nostdlib -nostartup. - -LIBGCC1_TEST = - # We do not want to include the EH stuff that linux uses, we want to use # the HP-UX libunwind library. diff --git a/gcc/config/ia64/t-ia64 b/gcc/config/ia64/t-ia64 index a143d43d56c..8a54d46b458 100644 --- a/gcc/config/ia64/t-ia64 +++ b/gcc/config/ia64/t-ia64 @@ -18,19 +18,6 @@ # along with GCC; see the file COPYING3. If not see # . -LIB1ASMSRC = ia64/lib1funcs.asm - -# We use different names for the DImode div/mod files so that they won't -# conflict with libgcc2.c files. We used to use __ia64 as a prefix, now -# we use __ as the prefix. Note that L_divdi3 in libgcc2.c actually defines -# a TImode divide function, so there is no actual overlap here between -# libgcc2.c and lib1funcs.asm. -LIB1ASMFUNCS = __divxf3 __divdf3 __divsf3 \ - __divdi3 __moddi3 __udivdi3 __umoddi3 \ - __divsi3 __modsi3 __udivsi3 __umodsi3 __save_stack_nonlocal \ - __nonlocal_goto __restore_stack_nonlocal __trampoline \ - _fixtfdi _fixunstfdi _floatditf - # ??? Hack to get -P option used when compiling lib1funcs.asm, because Intel # assembler does not accept # line number as a comment. # ??? This breaks C++ pragma interface/implementation, which is used in the diff --git a/gcc/config/iq2000/t-iq2000 b/gcc/config/iq2000/t-iq2000 index 03d8c703f86..c634e58646e 100644 --- a/gcc/config/iq2000/t-iq2000 +++ b/gcc/config/iq2000/t-iq2000 @@ -16,11 +16,6 @@ # along with GCC; see the file COPYING3. If not see # . -# Suppress building libgcc1.a, since the MIPS compiler port is complete -# and does not need anything from libgcc1.a. -LIBGCC1 = -CROSS_LIBGCC1 = - LIB2FUNCS_EXTRA = $(srcdir)/config/udivmod.c $(srcdir)/config/divmod.c $(srcdir)/config/udivmodsi4.c $(srcdir)/config/iq2000/lib2extra-funcs.c # Enable the following if multilibs are needed. diff --git a/gcc/config/m32c/m32c-lib1.S b/gcc/config/m32c/m32c-lib1.S deleted file mode 100644 index 9b657787187..00000000000 --- a/gcc/config/m32c/m32c-lib1.S +++ /dev/null @@ -1,231 +0,0 @@ -/* libgcc routines for R8C/M16C/M32C - Copyright (C) 2005, 2009, 2010 - Free Software Foundation, Inc. - Contributed by Red Hat. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3, or (at your - option) any later version. - - GCC is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public - License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#if defined(__r8c_cpu__) || defined(__m16c_cpu__) -#define A16 -#define A(n,w) n -#define W w -#else -#define A24 -#define A(n,w) w -#define W l -#endif - - -#ifdef L__m32c_memregs - -/* Warning: these memory locations are used as a register bank. They - *must* end up consecutive in any final executable, so you may *not* - use the otherwise obvious ".comm" directive to allocate space for - them. */ - - .bss - .global mem0 -mem0: .space 1 - .global mem1 -mem1: .space 1 - .global mem2 -mem2: .space 1 - .global mem3 -mem3: .space 1 - .global mem4 -mem4: .space 1 - .global mem5 -mem5: .space 1 - .global mem6 -mem6: .space 1 - .global mem7 -mem7: .space 1 - .global mem8 -mem8: .space 1 - .global mem9 -mem9: .space 1 - .global mem10 -mem10: .space 1 - .global mem11 -mem11: .space 1 - .global mem12 -mem12: .space 1 - .global mem13 -mem13: .space 1 - .global mem14 -mem14: .space 1 - .global mem15 -mem15: .space 1 - -#endif - -#ifdef L__m32c_eh_return - .text - .global __m32c_eh_return -__m32c_eh_return: - - /* At this point, r0 has the stack adjustment, r1r3 has the - address to return to. The stack looks like this: - - old_ra - old_fp - <- unwound sp - ... - fb - through - r0 - <- sp - - What we need to do is restore all the registers, update the - stack, and return to the right place. - */ - - stc sp,a0 - - add.W A(#16,#24),a0 - /* a0 points to the current stack, just above the register - save areas */ - - mov.w a0,a1 - exts.w r0 - sub.W A(r0,r2r0),a1 - sub.W A(#3,#4),a1 - /* a1 points to the new stack. */ - - /* This is for the "rts" below. */ - mov.w r1,[a1] -#ifdef A16 - mov.w r2,r1 - mov.b r1l,2[a1] -#else - mov.w r2,2[a1] -#endif - - /* This is for the "popc sp" below. */ - mov.W a1,[a0] - - popm r0,r1,r2,r3,a0,a1,sb,fb - popc sp - rts -#endif - -/* SImode arguments for SI foo(SI,SI) functions. */ -#ifdef A16 -#define SAL 5[fb] -#define SAH 7[fb] -#define SBL 9[fb] -#define SBH 11[fb] -#else -#define SAL 8[fb] -#define SAH 10[fb] -#define SBL 12[fb] -#define SBH 14[fb] -#endif - -#ifdef L__m32c_mulsi3 - .text - .global ___mulsi3 -___mulsi3: - enter #0 - push.w r2 - mov.w SAL,r0 - mulu.w SBL,r0 /* writes to r2r0 */ - mov.w r0,mem0 - mov.w r2,mem2 - mov.w SAL,r0 - mulu.w SBH,r0 /* writes to r2r0 */ - add.w r0,mem2 - mov.w SAH,r0 - mulu.w SBL,r0 /* writes to r2r0 */ - add.w r0,mem2 - pop.w r2 - exitd -#endif - -#ifdef L__m32c_cmpsi2 - .text - .global ___cmpsi2 -___cmpsi2: - enter #0 - cmp.w SBH,SAH - jgt cmpsi_gt - jlt cmpsi_lt - cmp.w SBL,SAL - jgt cmpsi_gt - jlt cmpsi_lt - mov.w #1,r0 - exitd -cmpsi_gt: - mov.w #2,r0 - exitd -cmpsi_lt: - mov.w #0,r0 - exitd -#endif - -#ifdef L__m32c_ucmpsi2 - .text - .global ___ucmpsi2 -___ucmpsi2: - enter #0 - cmp.w SBH,SAH - jgtu cmpsi_gt - jltu cmpsi_lt - cmp.w SBL,SAL - jgtu cmpsi_gt - jltu cmpsi_lt - mov.w #1,r0 - exitd -cmpsi_gt: - mov.w #2,r0 - exitd -cmpsi_lt: - mov.w #0,r0 - exitd -#endif - -#ifdef L__m32c_jsri16 - .text -#ifdef A16 - .global m32c_jsri16 -m32c_jsri16: - add.w #-1, sp - - /* Read the address (16 bits) and return address (24 bits) off - the stack. */ - mov.w 4[sp], r0 - mov.w 1[sp], r3 - mov.b 3[sp], a0 /* This zero-extends, so the high byte has - zero in it. */ - - /* Write the return address, then new address, to the stack. */ - mov.w a0, 1[sp] /* Just to get the zero in 2[sp]. */ - mov.w r0, 0[sp] - mov.w r3, 3[sp] - mov.b a0, 5[sp] - - /* This "returns" to the target address, leaving the pending - return address on the stack. */ - rts -#endif - -#endif diff --git a/gcc/config/m32c/m32c.c b/gcc/config/m32c/m32c.c index 7040df69fcf..04f69050609 100644 --- a/gcc/config/m32c/m32c.c +++ b/gcc/config/m32c/m32c.c @@ -391,7 +391,7 @@ class_can_hold_mode (reg_class_t rclass, enum machine_mode mode) we allow the user to limit the number of memregs available, in order to try to persuade gcc to try harder to use real registers. - Memregs are provided by m32c-lib1.S. + Memregs are provided by lib1funcs.S. */ int ok_to_change_target_memregs = TRUE; diff --git a/gcc/config/m32c/t-m32c b/gcc/config/m32c/t-m32c index b11f34d674f..aad972a2575 100644 --- a/gcc/config/m32c/t-m32c +++ b/gcc/config/m32c/t-m32c @@ -19,16 +19,6 @@ # along with GCC; see the file COPYING3. If not see # . -LIB1ASMSRC = m32c/m32c-lib1.S - -LIB1ASMFUNCS = \ - __m32c_memregs \ - __m32c_eh_return \ - __m32c_mulsi3 \ - __m32c_cmpsi2 \ - __m32c_ucmpsi2 \ - __m32c_jsri16 - LIB2FUNCS_EXTRA = $(srcdir)/config/m32c/m32c-lib2.c $(srcdir)/config/m32c/m32c-lib2-trapv.c # target-specific files diff --git a/gcc/config/m32r/t-linux b/gcc/config/m32r/t-linux index 487c0198786..f3b89d21d0b 100644 --- a/gcc/config/m32r/t-linux +++ b/gcc/config/m32r/t-linux @@ -16,9 +16,6 @@ # along with GCC; see the file COPYING3. If not see # . -# lib1funcs.asm is currently empty. -CROSS_LIBGCC1 = - # Turn off the SDA while compiling libgcc2. There are no headers for it # and we want maximal upward compatibility here. @@ -26,9 +23,3 @@ TARGET_LIBGCC2_CFLAGS = -G 0 -fPIC # Don't install "assert.h" in gcc. We use the one in glibc. INSTALL_ASSERT_H = - -# Do not build libgcc1. Let gcc generate those functions. The GNU/Linux -# C library can handle them. -LIBGCC1 = -CROSS_LIBGCC1 = -LIBGCC1_TEST = diff --git a/gcc/config/m68k/lb1sf68.asm b/gcc/config/m68k/lb1sf68.asm deleted file mode 100644 index 0339a092c4f..00000000000 --- a/gcc/config/m68k/lb1sf68.asm +++ /dev/null @@ -1,4116 +0,0 @@ -/* libgcc routines for 68000 w/o floating-point hardware. - Copyright (C) 1994, 1996, 1997, 1998, 2008, 2009 Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ - -/* Use this one for any 680x0; assumes no floating point hardware. - The trailing " '" appearing on some lines is for ANSI preprocessors. Yuk. - Some of this code comes from MINIX, via the folks at ericsson. - D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992 -*/ - -/* These are predefined by new versions of GNU cpp. */ - -#ifndef __USER_LABEL_PREFIX__ -#define __USER_LABEL_PREFIX__ _ -#endif - -#ifndef __REGISTER_PREFIX__ -#define __REGISTER_PREFIX__ -#endif - -#ifndef __IMMEDIATE_PREFIX__ -#define __IMMEDIATE_PREFIX__ # -#endif - -/* ANSI concatenation macros. */ - -#define CONCAT1(a, b) CONCAT2(a, b) -#define CONCAT2(a, b) a ## b - -/* Use the right prefix for global labels. */ - -#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x) - -/* Note that X is a function. */ - -#ifdef __ELF__ -#define FUNC(x) .type SYM(x),function -#else -/* The .proc pseudo-op is accepted, but ignored, by GAS. We could just - define this to the empty string for non-ELF systems, but defining it - to .proc means that the information is available to the assembler if - the need arises. */ -#define FUNC(x) .proc -#endif - -/* Use the right prefix for registers. */ - -#define REG(x) CONCAT1 (__REGISTER_PREFIX__, x) - -/* Use the right prefix for immediate values. */ - -#define IMM(x) CONCAT1 (__IMMEDIATE_PREFIX__, x) - -#define d0 REG (d0) -#define d1 REG (d1) -#define d2 REG (d2) -#define d3 REG (d3) -#define d4 REG (d4) -#define d5 REG (d5) -#define d6 REG (d6) -#define d7 REG (d7) -#define a0 REG (a0) -#define a1 REG (a1) -#define a2 REG (a2) -#define a3 REG (a3) -#define a4 REG (a4) -#define a5 REG (a5) -#define a6 REG (a6) -#define fp REG (fp) -#define sp REG (sp) -#define pc REG (pc) - -/* Provide a few macros to allow for PIC code support. - * With PIC, data is stored A5 relative so we've got to take a bit of special - * care to ensure that all loads of global data is via A5. PIC also requires - * jumps and subroutine calls to be PC relative rather than absolute. We cheat - * a little on this and in the PIC case, we use short offset branches and - * hope that the final object code is within range (which it should be). - */ -#ifndef __PIC__ - - /* Non PIC (absolute/relocatable) versions */ - - .macro PICCALL addr - jbsr \addr - .endm - - .macro PICJUMP addr - jmp \addr - .endm - - .macro PICLEA sym, reg - lea \sym, \reg - .endm - - .macro PICPEA sym, areg - pea \sym - .endm - -#else /* __PIC__ */ - -# if defined (__uClinux__) - - /* Versions for uClinux */ - -# if defined(__ID_SHARED_LIBRARY__) - - /* -mid-shared-library versions */ - - .macro PICLEA sym, reg - movel a5@(_current_shared_library_a5_offset_), \reg - movel \sym@GOT(\reg), \reg - .endm - - .macro PICPEA sym, areg - movel a5@(_current_shared_library_a5_offset_), \areg - movel \sym@GOT(\areg), sp@- - .endm - - .macro PICCALL addr - PICLEA \addr,a0 - jsr a0@ - .endm - - .macro PICJUMP addr - PICLEA \addr,a0 - jmp a0@ - .endm - -# else /* !__ID_SHARED_LIBRARY__ */ - - /* Versions for -msep-data */ - - .macro PICLEA sym, reg - movel \sym@GOT(a5), \reg - .endm - - .macro PICPEA sym, areg - movel \sym@GOT(a5), sp@- - .endm - - .macro PICCALL addr -#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__) - lea \addr-.-8,a0 - jsr pc@(a0) -#else - jbsr \addr -#endif - .endm - - .macro PICJUMP addr - /* ISA C has no bra.l instruction, and since this assembly file - gets assembled into multiple object files, we avoid the - bra instruction entirely. */ -#if defined (__mcoldfire__) && !defined (__mcfisab__) - lea \addr-.-8,a0 - jmp pc@(a0) -#else - bra \addr -#endif - .endm - -# endif - -# else /* !__uClinux__ */ - - /* Versions for Linux */ - - .macro PICLEA sym, reg - movel #_GLOBAL_OFFSET_TABLE_@GOTPC, \reg - lea (-6, pc, \reg), \reg - movel \sym@GOT(\reg), \reg - .endm - - .macro PICPEA sym, areg - movel #_GLOBAL_OFFSET_TABLE_@GOTPC, \areg - lea (-6, pc, \areg), \areg - movel \sym@GOT(\areg), sp@- - .endm - - .macro PICCALL addr -#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__) - lea \addr-.-8,a0 - jsr pc@(a0) -#else - jbsr \addr -#endif - .endm - - .macro PICJUMP addr - /* ISA C has no bra.l instruction, and since this assembly file - gets assembled into multiple object files, we avoid the - bra instruction entirely. */ -#if defined (__mcoldfire__) && !defined (__mcfisab__) - lea \addr-.-8,a0 - jmp pc@(a0) -#else - bra \addr -#endif - .endm - -# endif -#endif /* __PIC__ */ - - -#ifdef L_floatex - -| This is an attempt at a decent floating point (single, double and -| extended double) code for the GNU C compiler. It should be easy to -| adapt to other compilers (but beware of the local labels!). - -| Starting date: 21 October, 1990 - -| It is convenient to introduce the notation (s,e,f) for a floating point -| number, where s=sign, e=exponent, f=fraction. We will call a floating -| point number fpn to abbreviate, independently of the precision. -| Let MAX_EXP be in each case the maximum exponent (255 for floats, 1023 -| for doubles and 16383 for long doubles). We then have the following -| different cases: -| 1. Normalized fpns have 0 < e < MAX_EXP. They correspond to -| (-1)^s x 1.f x 2^(e-bias-1). -| 2. Denormalized fpns have e=0. They correspond to numbers of the form -| (-1)^s x 0.f x 2^(-bias). -| 3. +/-INFINITY have e=MAX_EXP, f=0. -| 4. Quiet NaN (Not a Number) have all bits set. -| 5. Signaling NaN (Not a Number) have s=0, e=MAX_EXP, f=1. - -|============================================================================= -| exceptions -|============================================================================= - -| This is the floating point condition code register (_fpCCR): -| -| struct { -| short _exception_bits; -| short _trap_enable_bits; -| short _sticky_bits; -| short _rounding_mode; -| short _format; -| short _last_operation; -| union { -| float sf; -| double df; -| } _operand1; -| union { -| float sf; -| double df; -| } _operand2; -| } _fpCCR; - - .data - .even - - .globl SYM (_fpCCR) - -SYM (_fpCCR): -__exception_bits: - .word 0 -__trap_enable_bits: - .word 0 -__sticky_bits: - .word 0 -__rounding_mode: - .word ROUND_TO_NEAREST -__format: - .word NIL -__last_operation: - .word NOOP -__operand1: - .long 0 - .long 0 -__operand2: - .long 0 - .long 0 - -| Offsets: -EBITS = __exception_bits - SYM (_fpCCR) -TRAPE = __trap_enable_bits - SYM (_fpCCR) -STICK = __sticky_bits - SYM (_fpCCR) -ROUND = __rounding_mode - SYM (_fpCCR) -FORMT = __format - SYM (_fpCCR) -LASTO = __last_operation - SYM (_fpCCR) -OPER1 = __operand1 - SYM (_fpCCR) -OPER2 = __operand2 - SYM (_fpCCR) - -| The following exception types are supported: -INEXACT_RESULT = 0x0001 -UNDERFLOW = 0x0002 -OVERFLOW = 0x0004 -DIVIDE_BY_ZERO = 0x0008 -INVALID_OPERATION = 0x0010 - -| The allowed rounding modes are: -UNKNOWN = -1 -ROUND_TO_NEAREST = 0 | round result to nearest representable value -ROUND_TO_ZERO = 1 | round result towards zero -ROUND_TO_PLUS = 2 | round result towards plus infinity -ROUND_TO_MINUS = 3 | round result towards minus infinity - -| The allowed values of format are: -NIL = 0 -SINGLE_FLOAT = 1 -DOUBLE_FLOAT = 2 -LONG_FLOAT = 3 - -| The allowed values for the last operation are: -NOOP = 0 -ADD = 1 -MULTIPLY = 2 -DIVIDE = 3 -NEGATE = 4 -COMPARE = 5 -EXTENDSFDF = 6 -TRUNCDFSF = 7 - -|============================================================================= -| __clear_sticky_bits -|============================================================================= - -| The sticky bits are normally not cleared (thus the name), whereas the -| exception type and exception value reflect the last computation. -| This routine is provided to clear them (you can also write to _fpCCR, -| since it is globally visible). - - .globl SYM (__clear_sticky_bit) - - .text - .even - -| void __clear_sticky_bits(void); -SYM (__clear_sticky_bit): - PICLEA SYM (_fpCCR),a0 -#ifndef __mcoldfire__ - movew IMM (0),a0@(STICK) -#else - clr.w a0@(STICK) -#endif - rts - -|============================================================================= -| $_exception_handler -|============================================================================= - - .globl $_exception_handler - - .text - .even - -| This is the common exit point if an exception occurs. -| NOTE: it is NOT callable from C! -| It expects the exception type in d7, the format (SINGLE_FLOAT, -| DOUBLE_FLOAT or LONG_FLOAT) in d6, and the last operation code in d5. -| It sets the corresponding exception and sticky bits, and the format. -| Depending on the format if fills the corresponding slots for the -| operands which produced the exception (all this information is provided -| so if you write your own exception handlers you have enough information -| to deal with the problem). -| Then checks to see if the corresponding exception is trap-enabled, -| in which case it pushes the address of _fpCCR and traps through -| trap FPTRAP (15 for the moment). - -FPTRAP = 15 - -$_exception_handler: - PICLEA SYM (_fpCCR),a0 - movew d7,a0@(EBITS) | set __exception_bits -#ifndef __mcoldfire__ - orw d7,a0@(STICK) | and __sticky_bits -#else - movew a0@(STICK),d4 - orl d7,d4 - movew d4,a0@(STICK) -#endif - movew d6,a0@(FORMT) | and __format - movew d5,a0@(LASTO) | and __last_operation - -| Now put the operands in place: -#ifndef __mcoldfire__ - cmpw IMM (SINGLE_FLOAT),d6 -#else - cmpl IMM (SINGLE_FLOAT),d6 -#endif - beq 1f - movel a6@(8),a0@(OPER1) - movel a6@(12),a0@(OPER1+4) - movel a6@(16),a0@(OPER2) - movel a6@(20),a0@(OPER2+4) - bra 2f -1: movel a6@(8),a0@(OPER1) - movel a6@(12),a0@(OPER2) -2: -| And check whether the exception is trap-enabled: -#ifndef __mcoldfire__ - andw a0@(TRAPE),d7 | is exception trap-enabled? -#else - clrl d6 - movew a0@(TRAPE),d6 - andl d6,d7 -#endif - beq 1f | no, exit - PICPEA SYM (_fpCCR),a1 | yes, push address of _fpCCR - trap IMM (FPTRAP) | and trap -#ifndef __mcoldfire__ -1: moveml sp@+,d2-d7 | restore data registers -#else -1: moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 | and return - rts -#endif /* L_floatex */ - -#ifdef L_mulsi3 - .text - FUNC(__mulsi3) - .globl SYM (__mulsi3) -SYM (__mulsi3): - movew sp@(4), d0 /* x0 -> d0 */ - muluw sp@(10), d0 /* x0*y1 */ - movew sp@(6), d1 /* x1 -> d1 */ - muluw sp@(8), d1 /* x1*y0 */ -#ifndef __mcoldfire__ - addw d1, d0 -#else - addl d1, d0 -#endif - swap d0 - clrw d0 - movew sp@(6), d1 /* x1 -> d1 */ - muluw sp@(10), d1 /* x1*y1 */ - addl d1, d0 - - rts -#endif /* L_mulsi3 */ - -#ifdef L_udivsi3 - .text - FUNC(__udivsi3) - .globl SYM (__udivsi3) -SYM (__udivsi3): -#ifndef __mcoldfire__ - movel d2, sp@- - movel sp@(12), d1 /* d1 = divisor */ - movel sp@(8), d0 /* d0 = dividend */ - - cmpl IMM (0x10000), d1 /* divisor >= 2 ^ 16 ? */ - jcc L3 /* then try next algorithm */ - movel d0, d2 - clrw d2 - swap d2 - divu d1, d2 /* high quotient in lower word */ - movew d2, d0 /* save high quotient */ - swap d0 - movew sp@(10), d2 /* get low dividend + high rest */ - divu d1, d2 /* low quotient */ - movew d2, d0 - jra L6 - -L3: movel d1, d2 /* use d2 as divisor backup */ -L4: lsrl IMM (1), d1 /* shift divisor */ - lsrl IMM (1), d0 /* shift dividend */ - cmpl IMM (0x10000), d1 /* still divisor >= 2 ^ 16 ? */ - jcc L4 - divu d1, d0 /* now we have 16-bit divisor */ - andl IMM (0xffff), d0 /* mask out divisor, ignore remainder */ - -/* Multiply the 16-bit tentative quotient with the 32-bit divisor. Because of - the operand ranges, this might give a 33-bit product. If this product is - greater than the dividend, the tentative quotient was too large. */ - movel d2, d1 - mulu d0, d1 /* low part, 32 bits */ - swap d2 - mulu d0, d2 /* high part, at most 17 bits */ - swap d2 /* align high part with low part */ - tstw d2 /* high part 17 bits? */ - jne L5 /* if 17 bits, quotient was too large */ - addl d2, d1 /* add parts */ - jcs L5 /* if sum is 33 bits, quotient was too large */ - cmpl sp@(8), d1 /* compare the sum with the dividend */ - jls L6 /* if sum > dividend, quotient was too large */ -L5: subql IMM (1), d0 /* adjust quotient */ - -L6: movel sp@+, d2 - rts - -#else /* __mcoldfire__ */ - -/* ColdFire implementation of non-restoring division algorithm from - Hennessy & Patterson, Appendix A. */ - link a6,IMM (-12) - moveml d2-d4,sp@ - movel a6@(8),d0 - movel a6@(12),d1 - clrl d2 | clear p - moveq IMM (31),d4 -L1: addl d0,d0 | shift reg pair (p,a) one bit left - addxl d2,d2 - movl d2,d3 | subtract b from p, store in tmp. - subl d1,d3 - jcs L2 | if no carry, - bset IMM (0),d0 | set the low order bit of a to 1, - movl d3,d2 | and store tmp in p. -L2: subql IMM (1),d4 - jcc L1 - moveml sp@,d2-d4 | restore data registers - unlk a6 | and return - rts -#endif /* __mcoldfire__ */ - -#endif /* L_udivsi3 */ - -#ifdef L_divsi3 - .text - FUNC(__divsi3) - .globl SYM (__divsi3) -SYM (__divsi3): - movel d2, sp@- - - moveq IMM (1), d2 /* sign of result stored in d2 (=1 or =-1) */ - movel sp@(12), d1 /* d1 = divisor */ - jpl L1 - negl d1 -#ifndef __mcoldfire__ - negb d2 /* change sign because divisor <0 */ -#else - negl d2 /* change sign because divisor <0 */ -#endif -L1: movel sp@(8), d0 /* d0 = dividend */ - jpl L2 - negl d0 -#ifndef __mcoldfire__ - negb d2 -#else - negl d2 -#endif - -L2: movel d1, sp@- - movel d0, sp@- - PICCALL SYM (__udivsi3) /* divide abs(dividend) by abs(divisor) */ - addql IMM (8), sp - - tstb d2 - jpl L3 - negl d0 - -L3: movel sp@+, d2 - rts -#endif /* L_divsi3 */ - -#ifdef L_umodsi3 - .text - FUNC(__umodsi3) - .globl SYM (__umodsi3) -SYM (__umodsi3): - movel sp@(8), d1 /* d1 = divisor */ - movel sp@(4), d0 /* d0 = dividend */ - movel d1, sp@- - movel d0, sp@- - PICCALL SYM (__udivsi3) - addql IMM (8), sp - movel sp@(8), d1 /* d1 = divisor */ -#ifndef __mcoldfire__ - movel d1, sp@- - movel d0, sp@- - PICCALL SYM (__mulsi3) /* d0 = (a/b)*b */ - addql IMM (8), sp -#else - mulsl d1,d0 -#endif - movel sp@(4), d1 /* d1 = dividend */ - subl d0, d1 /* d1 = a - (a/b)*b */ - movel d1, d0 - rts -#endif /* L_umodsi3 */ - -#ifdef L_modsi3 - .text - FUNC(__modsi3) - .globl SYM (__modsi3) -SYM (__modsi3): - movel sp@(8), d1 /* d1 = divisor */ - movel sp@(4), d0 /* d0 = dividend */ - movel d1, sp@- - movel d0, sp@- - PICCALL SYM (__divsi3) - addql IMM (8), sp - movel sp@(8), d1 /* d1 = divisor */ -#ifndef __mcoldfire__ - movel d1, sp@- - movel d0, sp@- - PICCALL SYM (__mulsi3) /* d0 = (a/b)*b */ - addql IMM (8), sp -#else - mulsl d1,d0 -#endif - movel sp@(4), d1 /* d1 = dividend */ - subl d0, d1 /* d1 = a - (a/b)*b */ - movel d1, d0 - rts -#endif /* L_modsi3 */ - - -#ifdef L_double - - .globl SYM (_fpCCR) - .globl $_exception_handler - -QUIET_NaN = 0xffffffff - -D_MAX_EXP = 0x07ff -D_BIAS = 1022 -DBL_MAX_EXP = D_MAX_EXP - D_BIAS -DBL_MIN_EXP = 1 - D_BIAS -DBL_MANT_DIG = 53 - -INEXACT_RESULT = 0x0001 -UNDERFLOW = 0x0002 -OVERFLOW = 0x0004 -DIVIDE_BY_ZERO = 0x0008 -INVALID_OPERATION = 0x0010 - -DOUBLE_FLOAT = 2 - -NOOP = 0 -ADD = 1 -MULTIPLY = 2 -DIVIDE = 3 -NEGATE = 4 -COMPARE = 5 -EXTENDSFDF = 6 -TRUNCDFSF = 7 - -UNKNOWN = -1 -ROUND_TO_NEAREST = 0 | round result to nearest representable value -ROUND_TO_ZERO = 1 | round result towards zero -ROUND_TO_PLUS = 2 | round result towards plus infinity -ROUND_TO_MINUS = 3 | round result towards minus infinity - -| Entry points: - - .globl SYM (__adddf3) - .globl SYM (__subdf3) - .globl SYM (__muldf3) - .globl SYM (__divdf3) - .globl SYM (__negdf2) - .globl SYM (__cmpdf2) - .globl SYM (__cmpdf2_internal) - .hidden SYM (__cmpdf2_internal) - - .text - .even - -| These are common routines to return and signal exceptions. - -Ld$den: -| Return and signal a denormalized number - orl d7,d0 - movew IMM (INEXACT_RESULT+UNDERFLOW),d7 - moveq IMM (DOUBLE_FLOAT),d6 - PICJUMP $_exception_handler - -Ld$infty: -Ld$overflow: -| Return a properly signed INFINITY and set the exception flags - movel IMM (0x7ff00000),d0 - movel IMM (0),d1 - orl d7,d0 - movew IMM (INEXACT_RESULT+OVERFLOW),d7 - moveq IMM (DOUBLE_FLOAT),d6 - PICJUMP $_exception_handler - -Ld$underflow: -| Return 0 and set the exception flags - movel IMM (0),d0 - movel d0,d1 - movew IMM (INEXACT_RESULT+UNDERFLOW),d7 - moveq IMM (DOUBLE_FLOAT),d6 - PICJUMP $_exception_handler - -Ld$inop: -| Return a quiet NaN and set the exception flags - movel IMM (QUIET_NaN),d0 - movel d0,d1 - movew IMM (INEXACT_RESULT+INVALID_OPERATION),d7 - moveq IMM (DOUBLE_FLOAT),d6 - PICJUMP $_exception_handler - -Ld$div$0: -| Return a properly signed INFINITY and set the exception flags - movel IMM (0x7ff00000),d0 - movel IMM (0),d1 - orl d7,d0 - movew IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7 - moveq IMM (DOUBLE_FLOAT),d6 - PICJUMP $_exception_handler - -|============================================================================= -|============================================================================= -| double precision routines -|============================================================================= -|============================================================================= - -| A double precision floating point number (double) has the format: -| -| struct _double { -| unsigned int sign : 1; /* sign bit */ -| unsigned int exponent : 11; /* exponent, shifted by 126 */ -| unsigned int fraction : 52; /* fraction */ -| } double; -| -| Thus sizeof(double) = 8 (64 bits). -| -| All the routines are callable from C programs, and return the result -| in the register pair d0-d1. They also preserve all registers except -| d0-d1 and a0-a1. - -|============================================================================= -| __subdf3 -|============================================================================= - -| double __subdf3(double, double); - FUNC(__subdf3) -SYM (__subdf3): - bchg IMM (31),sp@(12) | change sign of second operand - | and fall through, so we always add -|============================================================================= -| __adddf3 -|============================================================================= - -| double __adddf3(double, double); - FUNC(__adddf3) -SYM (__adddf3): -#ifndef __mcoldfire__ - link a6,IMM (0) | everything will be done in registers - moveml d2-d7,sp@- | save all data registers and a2 (but d0-d1) -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - movel a6@(8),d0 | get first operand - movel a6@(12),d1 | - movel a6@(16),d2 | get second operand - movel a6@(20),d3 | - - movel d0,d7 | get d0's sign bit in d7 ' - addl d1,d1 | check and clear sign bit of a, and gain one - addxl d0,d0 | bit of extra precision - beq Ladddf$b | if zero return second operand - - movel d2,d6 | save sign in d6 - addl d3,d3 | get rid of sign bit and gain one bit of - addxl d2,d2 | extra precision - beq Ladddf$a | if zero return first operand - - andl IMM (0x80000000),d7 | isolate a's sign bit ' - swap d6 | and also b's sign bit ' -#ifndef __mcoldfire__ - andw IMM (0x8000),d6 | - orw d6,d7 | and combine them into d7, so that a's sign ' - | bit is in the high word and b's is in the ' - | low word, so d6 is free to be used -#else - andl IMM (0x8000),d6 - orl d6,d7 -#endif - movel d7,a0 | now save d7 into a0, so d7 is free to - | be used also - -| Get the exponents and check for denormalized and/or infinity. - - movel IMM (0x001fffff),d6 | mask for the fraction - movel IMM (0x00200000),d7 | mask to put hidden bit back - - movel d0,d4 | - andl d6,d0 | get fraction in d0 - notl d6 | make d6 into mask for the exponent - andl d6,d4 | get exponent in d4 - beq Ladddf$a$den | branch if a is denormalized - cmpl d6,d4 | check for INFINITY or NaN - beq Ladddf$nf | - orl d7,d0 | and put hidden bit back -Ladddf$1: - swap d4 | shift right exponent so that it starts -#ifndef __mcoldfire__ - lsrw IMM (5),d4 | in bit 0 and not bit 20 -#else - lsrl IMM (5),d4 | in bit 0 and not bit 20 -#endif -| Now we have a's exponent in d4 and fraction in d0-d1 ' - movel d2,d5 | save b to get exponent - andl d6,d5 | get exponent in d5 - beq Ladddf$b$den | branch if b is denormalized - cmpl d6,d5 | check for INFINITY or NaN - beq Ladddf$nf - notl d6 | make d6 into mask for the fraction again - andl d6,d2 | and get fraction in d2 - orl d7,d2 | and put hidden bit back -Ladddf$2: - swap d5 | shift right exponent so that it starts -#ifndef __mcoldfire__ - lsrw IMM (5),d5 | in bit 0 and not bit 20 -#else - lsrl IMM (5),d5 | in bit 0 and not bit 20 -#endif - -| Now we have b's exponent in d5 and fraction in d2-d3. ' - -| The situation now is as follows: the signs are combined in a0, the -| numbers are in d0-d1 (a) and d2-d3 (b), and the exponents in d4 (a) -| and d5 (b). To do the rounding correctly we need to keep all the -| bits until the end, so we need to use d0-d1-d2-d3 for the first number -| and d4-d5-d6-d7 for the second. To do this we store (temporarily) the -| exponents in a2-a3. - -#ifndef __mcoldfire__ - moveml a2-a3,sp@- | save the address registers -#else - movel a2,sp@- - movel a3,sp@- - movel a4,sp@- -#endif - - movel d4,a2 | save the exponents - movel d5,a3 | - - movel IMM (0),d7 | and move the numbers around - movel d7,d6 | - movel d3,d5 | - movel d2,d4 | - movel d7,d3 | - movel d7,d2 | - -| Here we shift the numbers until the exponents are the same, and put -| the largest exponent in a2. -#ifndef __mcoldfire__ - exg d4,a2 | get exponents back - exg d5,a3 | - cmpw d4,d5 | compare the exponents -#else - movel d4,a4 | get exponents back - movel a2,d4 - movel a4,a2 - movel d5,a4 - movel a3,d5 - movel a4,a3 - cmpl d4,d5 | compare the exponents -#endif - beq Ladddf$3 | if equal don't shift ' - bhi 9f | branch if second exponent is higher - -| Here we have a's exponent larger than b's, so we have to shift b. We do -| this by using as counter d2: -1: movew d4,d2 | move largest exponent to d2 -#ifndef __mcoldfire__ - subw d5,d2 | and subtract second exponent - exg d4,a2 | get back the longs we saved - exg d5,a3 | -#else - subl d5,d2 | and subtract second exponent - movel d4,a4 | get back the longs we saved - movel a2,d4 - movel a4,a2 - movel d5,a4 - movel a3,d5 - movel a4,a3 -#endif -| if difference is too large we don't shift (actually, we can just exit) ' -#ifndef __mcoldfire__ - cmpw IMM (DBL_MANT_DIG+2),d2 -#else - cmpl IMM (DBL_MANT_DIG+2),d2 -#endif - bge Ladddf$b$small -#ifndef __mcoldfire__ - cmpw IMM (32),d2 | if difference >= 32, shift by longs -#else - cmpl IMM (32),d2 | if difference >= 32, shift by longs -#endif - bge 5f -2: -#ifndef __mcoldfire__ - cmpw IMM (16),d2 | if difference >= 16, shift by words -#else - cmpl IMM (16),d2 | if difference >= 16, shift by words -#endif - bge 6f - bra 3f | enter dbra loop - -4: -#ifndef __mcoldfire__ - lsrl IMM (1),d4 - roxrl IMM (1),d5 - roxrl IMM (1),d6 - roxrl IMM (1),d7 -#else - lsrl IMM (1),d7 - btst IMM (0),d6 - beq 10f - bset IMM (31),d7 -10: lsrl IMM (1),d6 - btst IMM (0),d5 - beq 11f - bset IMM (31),d6 -11: lsrl IMM (1),d5 - btst IMM (0),d4 - beq 12f - bset IMM (31),d5 -12: lsrl IMM (1),d4 -#endif -3: -#ifndef __mcoldfire__ - dbra d2,4b -#else - subql IMM (1),d2 - bpl 4b -#endif - movel IMM (0),d2 - movel d2,d3 - bra Ladddf$4 -5: - movel d6,d7 - movel d5,d6 - movel d4,d5 - movel IMM (0),d4 -#ifndef __mcoldfire__ - subw IMM (32),d2 -#else - subl IMM (32),d2 -#endif - bra 2b -6: - movew d6,d7 - swap d7 - movew d5,d6 - swap d6 - movew d4,d5 - swap d5 - movew IMM (0),d4 - swap d4 -#ifndef __mcoldfire__ - subw IMM (16),d2 -#else - subl IMM (16),d2 -#endif - bra 3b - -9: -#ifndef __mcoldfire__ - exg d4,d5 - movew d4,d6 - subw d5,d6 | keep d5 (largest exponent) in d4 - exg d4,a2 - exg d5,a3 -#else - movel d5,d6 - movel d4,d5 - movel d6,d4 - subl d5,d6 - movel d4,a4 - movel a2,d4 - movel a4,a2 - movel d5,a4 - movel a3,d5 - movel a4,a3 -#endif -| if difference is too large we don't shift (actually, we can just exit) ' -#ifndef __mcoldfire__ - cmpw IMM (DBL_MANT_DIG+2),d6 -#else - cmpl IMM (DBL_MANT_DIG+2),d6 -#endif - bge Ladddf$a$small -#ifndef __mcoldfire__ - cmpw IMM (32),d6 | if difference >= 32, shift by longs -#else - cmpl IMM (32),d6 | if difference >= 32, shift by longs -#endif - bge 5f -2: -#ifndef __mcoldfire__ - cmpw IMM (16),d6 | if difference >= 16, shift by words -#else - cmpl IMM (16),d6 | if difference >= 16, shift by words -#endif - bge 6f - bra 3f | enter dbra loop - -4: -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 - roxrl IMM (1),d2 - roxrl IMM (1),d3 -#else - lsrl IMM (1),d3 - btst IMM (0),d2 - beq 10f - bset IMM (31),d3 -10: lsrl IMM (1),d2 - btst IMM (0),d1 - beq 11f - bset IMM (31),d2 -11: lsrl IMM (1),d1 - btst IMM (0),d0 - beq 12f - bset IMM (31),d1 -12: lsrl IMM (1),d0 -#endif -3: -#ifndef __mcoldfire__ - dbra d6,4b -#else - subql IMM (1),d6 - bpl 4b -#endif - movel IMM (0),d7 - movel d7,d6 - bra Ladddf$4 -5: - movel d2,d3 - movel d1,d2 - movel d0,d1 - movel IMM (0),d0 -#ifndef __mcoldfire__ - subw IMM (32),d6 -#else - subl IMM (32),d6 -#endif - bra 2b -6: - movew d2,d3 - swap d3 - movew d1,d2 - swap d2 - movew d0,d1 - swap d1 - movew IMM (0),d0 - swap d0 -#ifndef __mcoldfire__ - subw IMM (16),d6 -#else - subl IMM (16),d6 -#endif - bra 3b -Ladddf$3: -#ifndef __mcoldfire__ - exg d4,a2 - exg d5,a3 -#else - movel d4,a4 - movel a2,d4 - movel a4,a2 - movel d5,a4 - movel a3,d5 - movel a4,a3 -#endif -Ladddf$4: -| Now we have the numbers in d0--d3 and d4--d7, the exponent in a2, and -| the signs in a4. - -| Here we have to decide whether to add or subtract the numbers: -#ifndef __mcoldfire__ - exg d7,a0 | get the signs - exg d6,a3 | a3 is free to be used -#else - movel d7,a4 - movel a0,d7 - movel a4,a0 - movel d6,a4 - movel a3,d6 - movel a4,a3 -#endif - movel d7,d6 | - movew IMM (0),d7 | get a's sign in d7 ' - swap d6 | - movew IMM (0),d6 | and b's sign in d6 ' - eorl d7,d6 | compare the signs - bmi Lsubdf$0 | if the signs are different we have - | to subtract -#ifndef __mcoldfire__ - exg d7,a0 | else we add the numbers - exg d6,a3 | -#else - movel d7,a4 - movel a0,d7 - movel a4,a0 - movel d6,a4 - movel a3,d6 - movel a4,a3 -#endif - addl d7,d3 | - addxl d6,d2 | - addxl d5,d1 | - addxl d4,d0 | - - movel a2,d4 | return exponent to d4 - movel a0,d7 | - andl IMM (0x80000000),d7 | d7 now has the sign - -#ifndef __mcoldfire__ - moveml sp@+,a2-a3 -#else - movel sp@+,a4 - movel sp@+,a3 - movel sp@+,a2 -#endif - -| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider -| the case of denormalized numbers in the rounding routine itself). -| As in the addition (not in the subtraction!) we could have set -| one more bit we check this: - btst IMM (DBL_MANT_DIG+1),d0 - beq 1f -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 - roxrl IMM (1),d2 - roxrl IMM (1),d3 - addw IMM (1),d4 -#else - lsrl IMM (1),d3 - btst IMM (0),d2 - beq 10f - bset IMM (31),d3 -10: lsrl IMM (1),d2 - btst IMM (0),d1 - beq 11f - bset IMM (31),d2 -11: lsrl IMM (1),d1 - btst IMM (0),d0 - beq 12f - bset IMM (31),d1 -12: lsrl IMM (1),d0 - addl IMM (1),d4 -#endif -1: - lea pc@(Ladddf$5),a0 | to return from rounding routine - PICLEA SYM (_fpCCR),a1 | check the rounding mode -#ifdef __mcoldfire__ - clrl d6 -#endif - movew a1@(6),d6 | rounding mode in d6 - beq Lround$to$nearest -#ifndef __mcoldfire__ - cmpw IMM (ROUND_TO_PLUS),d6 -#else - cmpl IMM (ROUND_TO_PLUS),d6 -#endif - bhi Lround$to$minus - blt Lround$to$zero - bra Lround$to$plus -Ladddf$5: -| Put back the exponent and check for overflow -#ifndef __mcoldfire__ - cmpw IMM (0x7ff),d4 | is the exponent big? -#else - cmpl IMM (0x7ff),d4 | is the exponent big? -#endif - bge 1f - bclr IMM (DBL_MANT_DIG-1),d0 -#ifndef __mcoldfire__ - lslw IMM (4),d4 | put exponent back into position -#else - lsll IMM (4),d4 | put exponent back into position -#endif - swap d0 | -#ifndef __mcoldfire__ - orw d4,d0 | -#else - orl d4,d0 | -#endif - swap d0 | - bra Ladddf$ret -1: - moveq IMM (ADD),d5 - bra Ld$overflow - -Lsubdf$0: -| Here we do the subtraction. -#ifndef __mcoldfire__ - exg d7,a0 | put sign back in a0 - exg d6,a3 | -#else - movel d7,a4 - movel a0,d7 - movel a4,a0 - movel d6,a4 - movel a3,d6 - movel a4,a3 -#endif - subl d7,d3 | - subxl d6,d2 | - subxl d5,d1 | - subxl d4,d0 | - beq Ladddf$ret$1 | if zero just exit - bpl 1f | if positive skip the following - movel a0,d7 | - bchg IMM (31),d7 | change sign bit in d7 - movel d7,a0 | - negl d3 | - negxl d2 | - negxl d1 | and negate result - negxl d0 | -1: - movel a2,d4 | return exponent to d4 - movel a0,d7 - andl IMM (0x80000000),d7 | isolate sign bit -#ifndef __mcoldfire__ - moveml sp@+,a2-a3 | -#else - movel sp@+,a4 - movel sp@+,a3 - movel sp@+,a2 -#endif - -| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider -| the case of denormalized numbers in the rounding routine itself). -| As in the addition (not in the subtraction!) we could have set -| one more bit we check this: - btst IMM (DBL_MANT_DIG+1),d0 - beq 1f -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 - roxrl IMM (1),d2 - roxrl IMM (1),d3 - addw IMM (1),d4 -#else - lsrl IMM (1),d3 - btst IMM (0),d2 - beq 10f - bset IMM (31),d3 -10: lsrl IMM (1),d2 - btst IMM (0),d1 - beq 11f - bset IMM (31),d2 -11: lsrl IMM (1),d1 - btst IMM (0),d0 - beq 12f - bset IMM (31),d1 -12: lsrl IMM (1),d0 - addl IMM (1),d4 -#endif -1: - lea pc@(Lsubdf$1),a0 | to return from rounding routine - PICLEA SYM (_fpCCR),a1 | check the rounding mode -#ifdef __mcoldfire__ - clrl d6 -#endif - movew a1@(6),d6 | rounding mode in d6 - beq Lround$to$nearest -#ifndef __mcoldfire__ - cmpw IMM (ROUND_TO_PLUS),d6 -#else - cmpl IMM (ROUND_TO_PLUS),d6 -#endif - bhi Lround$to$minus - blt Lround$to$zero - bra Lround$to$plus -Lsubdf$1: -| Put back the exponent and sign (we don't have overflow). ' - bclr IMM (DBL_MANT_DIG-1),d0 -#ifndef __mcoldfire__ - lslw IMM (4),d4 | put exponent back into position -#else - lsll IMM (4),d4 | put exponent back into position -#endif - swap d0 | -#ifndef __mcoldfire__ - orw d4,d0 | -#else - orl d4,d0 | -#endif - swap d0 | - bra Ladddf$ret - -| If one of the numbers was too small (difference of exponents >= -| DBL_MANT_DIG+1) we return the other (and now we don't have to ' -| check for finiteness or zero). -Ladddf$a$small: -#ifndef __mcoldfire__ - moveml sp@+,a2-a3 -#else - movel sp@+,a4 - movel sp@+,a3 - movel sp@+,a2 -#endif - movel a6@(16),d0 - movel a6@(20),d1 - PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | restore data registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 | and return - rts - -Ladddf$b$small: -#ifndef __mcoldfire__ - moveml sp@+,a2-a3 -#else - movel sp@+,a4 - movel sp@+,a3 - movel sp@+,a2 -#endif - movel a6@(8),d0 - movel a6@(12),d1 - PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | restore data registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 | and return - rts - -Ladddf$a$den: - movel d7,d4 | d7 contains 0x00200000 - bra Ladddf$1 - -Ladddf$b$den: - movel d7,d5 | d7 contains 0x00200000 - notl d6 - bra Ladddf$2 - -Ladddf$b: -| Return b (if a is zero) - movel d2,d0 - movel d3,d1 - bne 1f | Check if b is -0 - cmpl IMM (0x80000000),d0 - bne 1f - andl IMM (0x80000000),d7 | Use the sign of a - clrl d0 - bra Ladddf$ret -Ladddf$a: - movel a6@(8),d0 - movel a6@(12),d1 -1: - moveq IMM (ADD),d5 -| Check for NaN and +/-INFINITY. - movel d0,d7 | - andl IMM (0x80000000),d7 | - bclr IMM (31),d0 | - cmpl IMM (0x7ff00000),d0 | - bge 2f | - movel d0,d0 | check for zero, since we don't ' - bne Ladddf$ret | want to return -0 by mistake - bclr IMM (31),d7 | - bra Ladddf$ret | -2: - andl IMM (0x000fffff),d0 | check for NaN (nonzero fraction) - orl d1,d0 | - bne Ld$inop | - bra Ld$infty | - -Ladddf$ret$1: -#ifndef __mcoldfire__ - moveml sp@+,a2-a3 | restore regs and exit -#else - movel sp@+,a4 - movel sp@+,a3 - movel sp@+,a2 -#endif - -Ladddf$ret: -| Normal exit. - PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ - orl d7,d0 | put sign bit back -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts - -Ladddf$ret$den: -| Return a denormalized number. -#ifndef __mcoldfire__ - lsrl IMM (1),d0 | shift right once more - roxrl IMM (1),d1 | -#else - lsrl IMM (1),d1 - btst IMM (0),d0 - beq 10f - bset IMM (31),d1 -10: lsrl IMM (1),d0 -#endif - bra Ladddf$ret - -Ladddf$nf: - moveq IMM (ADD),d5 -| This could be faster but it is not worth the effort, since it is not -| executed very often. We sacrifice speed for clarity here. - movel a6@(8),d0 | get the numbers back (remember that we - movel a6@(12),d1 | did some processing already) - movel a6@(16),d2 | - movel a6@(20),d3 | - movel IMM (0x7ff00000),d4 | useful constant (INFINITY) - movel d0,d7 | save sign bits - movel d2,d6 | - bclr IMM (31),d0 | clear sign bits - bclr IMM (31),d2 | -| We know that one of them is either NaN of +/-INFINITY -| Check for NaN (if either one is NaN return NaN) - cmpl d4,d0 | check first a (d0) - bhi Ld$inop | if d0 > 0x7ff00000 or equal and - bne 2f - tstl d1 | d1 > 0, a is NaN - bne Ld$inop | -2: cmpl d4,d2 | check now b (d1) - bhi Ld$inop | - bne 3f - tstl d3 | - bne Ld$inop | -3: -| Now comes the check for +/-INFINITY. We know that both are (maybe not -| finite) numbers, but we have to check if both are infinite whether we -| are adding or subtracting them. - eorl d7,d6 | to check sign bits - bmi 1f - andl IMM (0x80000000),d7 | get (common) sign bit - bra Ld$infty -1: -| We know one (or both) are infinite, so we test for equality between the -| two numbers (if they are equal they have to be infinite both, so we -| return NaN). - cmpl d2,d0 | are both infinite? - bne 1f | if d0 <> d2 they are not equal - cmpl d3,d1 | if d0 == d2 test d3 and d1 - beq Ld$inop | if equal return NaN -1: - andl IMM (0x80000000),d7 | get a's sign bit ' - cmpl d4,d0 | test now for infinity - beq Ld$infty | if a is INFINITY return with this sign - bchg IMM (31),d7 | else we know b is INFINITY and has - bra Ld$infty | the opposite sign - -|============================================================================= -| __muldf3 -|============================================================================= - -| double __muldf3(double, double); - FUNC(__muldf3) -SYM (__muldf3): -#ifndef __mcoldfire__ - link a6,IMM (0) - moveml d2-d7,sp@- -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - movel a6@(8),d0 | get a into d0-d1 - movel a6@(12),d1 | - movel a6@(16),d2 | and b into d2-d3 - movel a6@(20),d3 | - movel d0,d7 | d7 will hold the sign of the product - eorl d2,d7 | - andl IMM (0x80000000),d7 | - movel d7,a0 | save sign bit into a0 - movel IMM (0x7ff00000),d7 | useful constant (+INFINITY) - movel d7,d6 | another (mask for fraction) - notl d6 | - bclr IMM (31),d0 | get rid of a's sign bit ' - movel d0,d4 | - orl d1,d4 | - beq Lmuldf$a$0 | branch if a is zero - movel d0,d4 | - bclr IMM (31),d2 | get rid of b's sign bit ' - movel d2,d5 | - orl d3,d5 | - beq Lmuldf$b$0 | branch if b is zero - movel d2,d5 | - cmpl d7,d0 | is a big? - bhi Lmuldf$inop | if a is NaN return NaN - beq Lmuldf$a$nf | we still have to check d1 and b ... - cmpl d7,d2 | now compare b with INFINITY - bhi Lmuldf$inop | is b NaN? - beq Lmuldf$b$nf | we still have to check d3 ... -| Here we have both numbers finite and nonzero (and with no sign bit). -| Now we get the exponents into d4 and d5. - andl d7,d4 | isolate exponent in d4 - beq Lmuldf$a$den | if exponent zero, have denormalized - andl d6,d0 | isolate fraction - orl IMM (0x00100000),d0 | and put hidden bit back - swap d4 | I like exponents in the first byte -#ifndef __mcoldfire__ - lsrw IMM (4),d4 | -#else - lsrl IMM (4),d4 | -#endif -Lmuldf$1: - andl d7,d5 | - beq Lmuldf$b$den | - andl d6,d2 | - orl IMM (0x00100000),d2 | and put hidden bit back - swap d5 | -#ifndef __mcoldfire__ - lsrw IMM (4),d5 | -#else - lsrl IMM (4),d5 | -#endif -Lmuldf$2: | -#ifndef __mcoldfire__ - addw d5,d4 | add exponents - subw IMM (D_BIAS+1),d4 | and subtract bias (plus one) -#else - addl d5,d4 | add exponents - subl IMM (D_BIAS+1),d4 | and subtract bias (plus one) -#endif - -| We are now ready to do the multiplication. The situation is as follows: -| both a and b have bit 52 ( bit 20 of d0 and d2) set (even if they were -| denormalized to start with!), which means that in the product bit 104 -| (which will correspond to bit 8 of the fourth long) is set. - -| Here we have to do the product. -| To do it we have to juggle the registers back and forth, as there are not -| enough to keep everything in them. So we use the address registers to keep -| some intermediate data. - -#ifndef __mcoldfire__ - moveml a2-a3,sp@- | save a2 and a3 for temporary use -#else - movel a2,sp@- - movel a3,sp@- - movel a4,sp@- -#endif - movel IMM (0),a2 | a2 is a null register - movel d4,a3 | and a3 will preserve the exponent - -| First, shift d2-d3 so bit 20 becomes bit 31: -#ifndef __mcoldfire__ - rorl IMM (5),d2 | rotate d2 5 places right - swap d2 | and swap it - rorl IMM (5),d3 | do the same thing with d3 - swap d3 | - movew d3,d6 | get the rightmost 11 bits of d3 - andw IMM (0x07ff),d6 | - orw d6,d2 | and put them into d2 - andw IMM (0xf800),d3 | clear those bits in d3 -#else - moveq IMM (11),d7 | left shift d2 11 bits - lsll d7,d2 - movel d3,d6 | get a copy of d3 - lsll d7,d3 | left shift d3 11 bits - andl IMM (0xffe00000),d6 | get the top 11 bits of d3 - moveq IMM (21),d7 | right shift them 21 bits - lsrl d7,d6 - orl d6,d2 | stick them at the end of d2 -#endif - - movel d2,d6 | move b into d6-d7 - movel d3,d7 | move a into d4-d5 - movel d0,d4 | and clear d0-d1-d2-d3 (to put result) - movel d1,d5 | - movel IMM (0),d3 | - movel d3,d2 | - movel d3,d1 | - movel d3,d0 | - -| We use a1 as counter: - movel IMM (DBL_MANT_DIG-1),a1 -#ifndef __mcoldfire__ - exg d7,a1 -#else - movel d7,a4 - movel a1,d7 - movel a4,a1 -#endif - -1: -#ifndef __mcoldfire__ - exg d7,a1 | put counter back in a1 -#else - movel d7,a4 - movel a1,d7 - movel a4,a1 -#endif - addl d3,d3 | shift sum once left - addxl d2,d2 | - addxl d1,d1 | - addxl d0,d0 | - addl d7,d7 | - addxl d6,d6 | - bcc 2f | if bit clear skip the following -#ifndef __mcoldfire__ - exg d7,a2 | -#else - movel d7,a4 - movel a2,d7 - movel a4,a2 -#endif - addl d5,d3 | else add a to the sum - addxl d4,d2 | - addxl d7,d1 | - addxl d7,d0 | -#ifndef __mcoldfire__ - exg d7,a2 | -#else - movel d7,a4 - movel a2,d7 - movel a4,a2 -#endif -2: -#ifndef __mcoldfire__ - exg d7,a1 | put counter in d7 - dbf d7,1b | decrement and branch -#else - movel d7,a4 - movel a1,d7 - movel a4,a1 - subql IMM (1),d7 - bpl 1b -#endif - - movel a3,d4 | restore exponent -#ifndef __mcoldfire__ - moveml sp@+,a2-a3 -#else - movel sp@+,a4 - movel sp@+,a3 - movel sp@+,a2 -#endif - -| Now we have the product in d0-d1-d2-d3, with bit 8 of d0 set. The -| first thing to do now is to normalize it so bit 8 becomes bit -| DBL_MANT_DIG-32 (to do the rounding); later we will shift right. - swap d0 - swap d1 - movew d1,d0 - swap d2 - movew d2,d1 - swap d3 - movew d3,d2 - movew IMM (0),d3 -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 - roxrl IMM (1),d2 - roxrl IMM (1),d3 - lsrl IMM (1),d0 - roxrl IMM (1),d1 - roxrl IMM (1),d2 - roxrl IMM (1),d3 - lsrl IMM (1),d0 - roxrl IMM (1),d1 - roxrl IMM (1),d2 - roxrl IMM (1),d3 -#else - moveq IMM (29),d6 - lsrl IMM (3),d3 - movel d2,d7 - lsll d6,d7 - orl d7,d3 - lsrl IMM (3),d2 - movel d1,d7 - lsll d6,d7 - orl d7,d2 - lsrl IMM (3),d1 - movel d0,d7 - lsll d6,d7 - orl d7,d1 - lsrl IMM (3),d0 -#endif - -| Now round, check for over- and underflow, and exit. - movel a0,d7 | get sign bit back into d7 - moveq IMM (MULTIPLY),d5 - - btst IMM (DBL_MANT_DIG+1-32),d0 - beq Lround$exit -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 - addw IMM (1),d4 -#else - lsrl IMM (1),d1 - btst IMM (0),d0 - beq 10f - bset IMM (31),d1 -10: lsrl IMM (1),d0 - addl IMM (1),d4 -#endif - bra Lround$exit - -Lmuldf$inop: - moveq IMM (MULTIPLY),d5 - bra Ld$inop - -Lmuldf$b$nf: - moveq IMM (MULTIPLY),d5 - movel a0,d7 | get sign bit back into d7 - tstl d3 | we know d2 == 0x7ff00000, so check d3 - bne Ld$inop | if d3 <> 0 b is NaN - bra Ld$overflow | else we have overflow (since a is finite) - -Lmuldf$a$nf: - moveq IMM (MULTIPLY),d5 - movel a0,d7 | get sign bit back into d7 - tstl d1 | we know d0 == 0x7ff00000, so check d1 - bne Ld$inop | if d1 <> 0 a is NaN - bra Ld$overflow | else signal overflow - -| If either number is zero return zero, unless the other is +/-INFINITY or -| NaN, in which case we return NaN. -Lmuldf$b$0: - moveq IMM (MULTIPLY),d5 -#ifndef __mcoldfire__ - exg d2,d0 | put b (==0) into d0-d1 - exg d3,d1 | and a (with sign bit cleared) into d2-d3 - movel a0,d0 | set result sign -#else - movel d0,d2 | put a into d2-d3 - movel d1,d3 - movel a0,d0 | put result zero into d0-d1 - movq IMM(0),d1 -#endif - bra 1f -Lmuldf$a$0: - movel a0,d0 | set result sign - movel a6@(16),d2 | put b into d2-d3 again - movel a6@(20),d3 | - bclr IMM (31),d2 | clear sign bit -1: cmpl IMM (0x7ff00000),d2 | check for non-finiteness - bge Ld$inop | in case NaN or +/-INFINITY return NaN - PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts - -| If a number is denormalized we put an exponent of 1 but do not put the -| hidden bit back into the fraction; instead we shift left until bit 21 -| (the hidden bit) is set, adjusting the exponent accordingly. We do this -| to ensure that the product of the fractions is close to 1. -Lmuldf$a$den: - movel IMM (1),d4 - andl d6,d0 -1: addl d1,d1 | shift a left until bit 20 is set - addxl d0,d0 | -#ifndef __mcoldfire__ - subw IMM (1),d4 | and adjust exponent -#else - subl IMM (1),d4 | and adjust exponent -#endif - btst IMM (20),d0 | - bne Lmuldf$1 | - bra 1b - -Lmuldf$b$den: - movel IMM (1),d5 - andl d6,d2 -1: addl d3,d3 | shift b left until bit 20 is set - addxl d2,d2 | -#ifndef __mcoldfire__ - subw IMM (1),d5 | and adjust exponent -#else - subql IMM (1),d5 | and adjust exponent -#endif - btst IMM (20),d2 | - bne Lmuldf$2 | - bra 1b - - -|============================================================================= -| __divdf3 -|============================================================================= - -| double __divdf3(double, double); - FUNC(__divdf3) -SYM (__divdf3): -#ifndef __mcoldfire__ - link a6,IMM (0) - moveml d2-d7,sp@- -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - movel a6@(8),d0 | get a into d0-d1 - movel a6@(12),d1 | - movel a6@(16),d2 | and b into d2-d3 - movel a6@(20),d3 | - movel d0,d7 | d7 will hold the sign of the result - eorl d2,d7 | - andl IMM (0x80000000),d7 - movel d7,a0 | save sign into a0 - movel IMM (0x7ff00000),d7 | useful constant (+INFINITY) - movel d7,d6 | another (mask for fraction) - notl d6 | - bclr IMM (31),d0 | get rid of a's sign bit ' - movel d0,d4 | - orl d1,d4 | - beq Ldivdf$a$0 | branch if a is zero - movel d0,d4 | - bclr IMM (31),d2 | get rid of b's sign bit ' - movel d2,d5 | - orl d3,d5 | - beq Ldivdf$b$0 | branch if b is zero - movel d2,d5 - cmpl d7,d0 | is a big? - bhi Ldivdf$inop | if a is NaN return NaN - beq Ldivdf$a$nf | if d0 == 0x7ff00000 we check d1 - cmpl d7,d2 | now compare b with INFINITY - bhi Ldivdf$inop | if b is NaN return NaN - beq Ldivdf$b$nf | if d2 == 0x7ff00000 we check d3 -| Here we have both numbers finite and nonzero (and with no sign bit). -| Now we get the exponents into d4 and d5 and normalize the numbers to -| ensure that the ratio of the fractions is around 1. We do this by -| making sure that both numbers have bit #DBL_MANT_DIG-32-1 (hidden bit) -| set, even if they were denormalized to start with. -| Thus, the result will satisfy: 2 > result > 1/2. - andl d7,d4 | and isolate exponent in d4 - beq Ldivdf$a$den | if exponent is zero we have a denormalized - andl d6,d0 | and isolate fraction - orl IMM (0x00100000),d0 | and put hidden bit back - swap d4 | I like exponents in the first byte -#ifndef __mcoldfire__ - lsrw IMM (4),d4 | -#else - lsrl IMM (4),d4 | -#endif -Ldivdf$1: | - andl d7,d5 | - beq Ldivdf$b$den | - andl d6,d2 | - orl IMM (0x00100000),d2 - swap d5 | -#ifndef __mcoldfire__ - lsrw IMM (4),d5 | -#else - lsrl IMM (4),d5 | -#endif -Ldivdf$2: | -#ifndef __mcoldfire__ - subw d5,d4 | subtract exponents - addw IMM (D_BIAS),d4 | and add bias -#else - subl d5,d4 | subtract exponents - addl IMM (D_BIAS),d4 | and add bias -#endif - -| We are now ready to do the division. We have prepared things in such a way -| that the ratio of the fractions will be less than 2 but greater than 1/2. -| At this point the registers in use are: -| d0-d1 hold a (first operand, bit DBL_MANT_DIG-32=0, bit -| DBL_MANT_DIG-1-32=1) -| d2-d3 hold b (second operand, bit DBL_MANT_DIG-32=1) -| d4 holds the difference of the exponents, corrected by the bias -| a0 holds the sign of the ratio - -| To do the rounding correctly we need to keep information about the -| nonsignificant bits. One way to do this would be to do the division -| using four registers; another is to use two registers (as originally -| I did), but use a sticky bit to preserve information about the -| fractional part. Note that we can keep that info in a1, which is not -| used. - movel IMM (0),d6 | d6-d7 will hold the result - movel d6,d7 | - movel IMM (0),a1 | and a1 will hold the sticky bit - - movel IMM (DBL_MANT_DIG-32+1),d5 - -1: cmpl d0,d2 | is a < b? - bhi 3f | if b > a skip the following - beq 4f | if d0==d2 check d1 and d3 -2: subl d3,d1 | - subxl d2,d0 | a <-- a - b - bset d5,d6 | set the corresponding bit in d6 -3: addl d1,d1 | shift a by 1 - addxl d0,d0 | -#ifndef __mcoldfire__ - dbra d5,1b | and branch back -#else - subql IMM (1), d5 - bpl 1b -#endif - bra 5f -4: cmpl d1,d3 | here d0==d2, so check d1 and d3 - bhi 3b | if d1 > d2 skip the subtraction - bra 2b | else go do it -5: -| Here we have to start setting the bits in the second long. - movel IMM (31),d5 | again d5 is counter - -1: cmpl d0,d2 | is a < b? - bhi 3f | if b > a skip the following - beq 4f | if d0==d2 check d1 and d3 -2: subl d3,d1 | - subxl d2,d0 | a <-- a - b - bset d5,d7 | set the corresponding bit in d7 -3: addl d1,d1 | shift a by 1 - addxl d0,d0 | -#ifndef __mcoldfire__ - dbra d5,1b | and branch back -#else - subql IMM (1), d5 - bpl 1b -#endif - bra 5f -4: cmpl d1,d3 | here d0==d2, so check d1 and d3 - bhi 3b | if d1 > d2 skip the subtraction - bra 2b | else go do it -5: -| Now go ahead checking until we hit a one, which we store in d2. - movel IMM (DBL_MANT_DIG),d5 -1: cmpl d2,d0 | is a < b? - bhi 4f | if b < a, exit - beq 3f | if d0==d2 check d1 and d3 -2: addl d1,d1 | shift a by 1 - addxl d0,d0 | -#ifndef __mcoldfire__ - dbra d5,1b | and branch back -#else - subql IMM (1), d5 - bpl 1b -#endif - movel IMM (0),d2 | here no sticky bit was found - movel d2,d3 - bra 5f -3: cmpl d1,d3 | here d0==d2, so check d1 and d3 - bhi 2b | if d1 > d2 go back -4: -| Here put the sticky bit in d2-d3 (in the position which actually corresponds -| to it; if you don't do this the algorithm loses in some cases). ' - movel IMM (0),d2 - movel d2,d3 -#ifndef __mcoldfire__ - subw IMM (DBL_MANT_DIG),d5 - addw IMM (63),d5 - cmpw IMM (31),d5 -#else - subl IMM (DBL_MANT_DIG),d5 - addl IMM (63),d5 - cmpl IMM (31),d5 -#endif - bhi 2f -1: bset d5,d3 - bra 5f -#ifndef __mcoldfire__ - subw IMM (32),d5 -#else - subl IMM (32),d5 -#endif -2: bset d5,d2 -5: -| Finally we are finished! Move the longs in the address registers to -| their final destination: - movel d6,d0 - movel d7,d1 - movel IMM (0),d3 - -| Here we have finished the division, with the result in d0-d1-d2-d3, with -| 2^21 <= d6 < 2^23. Thus bit 23 is not set, but bit 22 could be set. -| If it is not, then definitely bit 21 is set. Normalize so bit 22 is -| not set: - btst IMM (DBL_MANT_DIG-32+1),d0 - beq 1f -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 - roxrl IMM (1),d2 - roxrl IMM (1),d3 - addw IMM (1),d4 -#else - lsrl IMM (1),d3 - btst IMM (0),d2 - beq 10f - bset IMM (31),d3 -10: lsrl IMM (1),d2 - btst IMM (0),d1 - beq 11f - bset IMM (31),d2 -11: lsrl IMM (1),d1 - btst IMM (0),d0 - beq 12f - bset IMM (31),d1 -12: lsrl IMM (1),d0 - addl IMM (1),d4 -#endif -1: -| Now round, check for over- and underflow, and exit. - movel a0,d7 | restore sign bit to d7 - moveq IMM (DIVIDE),d5 - bra Lround$exit - -Ldivdf$inop: - moveq IMM (DIVIDE),d5 - bra Ld$inop - -Ldivdf$a$0: -| If a is zero check to see whether b is zero also. In that case return -| NaN; then check if b is NaN, and return NaN also in that case. Else -| return a properly signed zero. - moveq IMM (DIVIDE),d5 - bclr IMM (31),d2 | - movel d2,d4 | - orl d3,d4 | - beq Ld$inop | if b is also zero return NaN - cmpl IMM (0x7ff00000),d2 | check for NaN - bhi Ld$inop | - blt 1f | - tstl d3 | - bne Ld$inop | -1: movel a0,d0 | else return signed zero - moveq IMM(0),d1 | - PICLEA SYM (_fpCCR),a0 | clear exception flags - movew IMM (0),a0@ | -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | -#else - moveml sp@,d2-d7 | - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 | - rts | - -Ldivdf$b$0: - moveq IMM (DIVIDE),d5 -| If we got here a is not zero. Check if a is NaN; in that case return NaN, -| else return +/-INFINITY. Remember that a is in d0 with the sign bit -| cleared already. - movel a0,d7 | put a's sign bit back in d7 ' - cmpl IMM (0x7ff00000),d0 | compare d0 with INFINITY - bhi Ld$inop | if larger it is NaN - tstl d1 | - bne Ld$inop | - bra Ld$div$0 | else signal DIVIDE_BY_ZERO - -Ldivdf$b$nf: - moveq IMM (DIVIDE),d5 -| If d2 == 0x7ff00000 we have to check d3. - tstl d3 | - bne Ld$inop | if d3 <> 0, b is NaN - bra Ld$underflow | else b is +/-INFINITY, so signal underflow - -Ldivdf$a$nf: - moveq IMM (DIVIDE),d5 -| If d0 == 0x7ff00000 we have to check d1. - tstl d1 | - bne Ld$inop | if d1 <> 0, a is NaN -| If a is INFINITY we have to check b - cmpl d7,d2 | compare b with INFINITY - bge Ld$inop | if b is NaN or INFINITY return NaN - tstl d3 | - bne Ld$inop | - bra Ld$overflow | else return overflow - -| If a number is denormalized we put an exponent of 1 but do not put the -| bit back into the fraction. -Ldivdf$a$den: - movel IMM (1),d4 - andl d6,d0 -1: addl d1,d1 | shift a left until bit 20 is set - addxl d0,d0 -#ifndef __mcoldfire__ - subw IMM (1),d4 | and adjust exponent -#else - subl IMM (1),d4 | and adjust exponent -#endif - btst IMM (DBL_MANT_DIG-32-1),d0 - bne Ldivdf$1 - bra 1b - -Ldivdf$b$den: - movel IMM (1),d5 - andl d6,d2 -1: addl d3,d3 | shift b left until bit 20 is set - addxl d2,d2 -#ifndef __mcoldfire__ - subw IMM (1),d5 | and adjust exponent -#else - subql IMM (1),d5 | and adjust exponent -#endif - btst IMM (DBL_MANT_DIG-32-1),d2 - bne Ldivdf$2 - bra 1b - -Lround$exit: -| This is a common exit point for __muldf3 and __divdf3. When they enter -| this point the sign of the result is in d7, the result in d0-d1, normalized -| so that 2^21 <= d0 < 2^22, and the exponent is in the lower byte of d4. - -| First check for underlow in the exponent: -#ifndef __mcoldfire__ - cmpw IMM (-DBL_MANT_DIG-1),d4 -#else - cmpl IMM (-DBL_MANT_DIG-1),d4 -#endif - blt Ld$underflow -| It could happen that the exponent is less than 1, in which case the -| number is denormalized. In this case we shift right and adjust the -| exponent until it becomes 1 or the fraction is zero (in the latter case -| we signal underflow and return zero). - movel d7,a0 | - movel IMM (0),d6 | use d6-d7 to collect bits flushed right - movel d6,d7 | use d6-d7 to collect bits flushed right -#ifndef __mcoldfire__ - cmpw IMM (1),d4 | if the exponent is less than 1 we -#else - cmpl IMM (1),d4 | if the exponent is less than 1 we -#endif - bge 2f | have to shift right (denormalize) -1: -#ifndef __mcoldfire__ - addw IMM (1),d4 | adjust the exponent - lsrl IMM (1),d0 | shift right once - roxrl IMM (1),d1 | - roxrl IMM (1),d2 | - roxrl IMM (1),d3 | - roxrl IMM (1),d6 | - roxrl IMM (1),d7 | - cmpw IMM (1),d4 | is the exponent 1 already? -#else - addl IMM (1),d4 | adjust the exponent - lsrl IMM (1),d7 - btst IMM (0),d6 - beq 13f - bset IMM (31),d7 -13: lsrl IMM (1),d6 - btst IMM (0),d3 - beq 14f - bset IMM (31),d6 -14: lsrl IMM (1),d3 - btst IMM (0),d2 - beq 10f - bset IMM (31),d3 -10: lsrl IMM (1),d2 - btst IMM (0),d1 - beq 11f - bset IMM (31),d2 -11: lsrl IMM (1),d1 - btst IMM (0),d0 - beq 12f - bset IMM (31),d1 -12: lsrl IMM (1),d0 - cmpl IMM (1),d4 | is the exponent 1 already? -#endif - beq 2f | if not loop back - bra 1b | - bra Ld$underflow | safety check, shouldn't execute ' -2: orl d6,d2 | this is a trick so we don't lose ' - orl d7,d3 | the bits which were flushed right - movel a0,d7 | get back sign bit into d7 -| Now call the rounding routine (which takes care of denormalized numbers): - lea pc@(Lround$0),a0 | to return from rounding routine - PICLEA SYM (_fpCCR),a1 | check the rounding mode -#ifdef __mcoldfire__ - clrl d6 -#endif - movew a1@(6),d6 | rounding mode in d6 - beq Lround$to$nearest -#ifndef __mcoldfire__ - cmpw IMM (ROUND_TO_PLUS),d6 -#else - cmpl IMM (ROUND_TO_PLUS),d6 -#endif - bhi Lround$to$minus - blt Lround$to$zero - bra Lround$to$plus -Lround$0: -| Here we have a correctly rounded result (either normalized or denormalized). - -| Here we should have either a normalized number or a denormalized one, and -| the exponent is necessarily larger or equal to 1 (so we don't have to ' -| check again for underflow!). We have to check for overflow or for a -| denormalized number (which also signals underflow). -| Check for overflow (i.e., exponent >= 0x7ff). -#ifndef __mcoldfire__ - cmpw IMM (0x07ff),d4 -#else - cmpl IMM (0x07ff),d4 -#endif - bge Ld$overflow -| Now check for a denormalized number (exponent==0): - movew d4,d4 - beq Ld$den -1: -| Put back the exponents and sign and return. -#ifndef __mcoldfire__ - lslw IMM (4),d4 | exponent back to fourth byte -#else - lsll IMM (4),d4 | exponent back to fourth byte -#endif - bclr IMM (DBL_MANT_DIG-32-1),d0 - swap d0 | and put back exponent -#ifndef __mcoldfire__ - orw d4,d0 | -#else - orl d4,d0 | -#endif - swap d0 | - orl d7,d0 | and sign also - - PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts - -|============================================================================= -| __negdf2 -|============================================================================= - -| double __negdf2(double, double); - FUNC(__negdf2) -SYM (__negdf2): -#ifndef __mcoldfire__ - link a6,IMM (0) - moveml d2-d7,sp@- -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - moveq IMM (NEGATE),d5 - movel a6@(8),d0 | get number to negate in d0-d1 - movel a6@(12),d1 | - bchg IMM (31),d0 | negate - movel d0,d2 | make a positive copy (for the tests) - bclr IMM (31),d2 | - movel d2,d4 | check for zero - orl d1,d4 | - beq 2f | if zero (either sign) return +zero - cmpl IMM (0x7ff00000),d2 | compare to +INFINITY - blt 1f | if finite, return - bhi Ld$inop | if larger (fraction not zero) is NaN - tstl d1 | if d2 == 0x7ff00000 check d1 - bne Ld$inop | - movel d0,d7 | else get sign and return INFINITY - andl IMM (0x80000000),d7 - bra Ld$infty -1: PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts -2: bclr IMM (31),d0 - bra 1b - -|============================================================================= -| __cmpdf2 -|============================================================================= - -GREATER = 1 -LESS = -1 -EQUAL = 0 - -| int __cmpdf2_internal(double, double, int); -SYM (__cmpdf2_internal): -#ifndef __mcoldfire__ - link a6,IMM (0) - moveml d2-d7,sp@- | save registers -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - moveq IMM (COMPARE),d5 - movel a6@(8),d0 | get first operand - movel a6@(12),d1 | - movel a6@(16),d2 | get second operand - movel a6@(20),d3 | -| First check if a and/or b are (+/-) zero and in that case clear -| the sign bit. - movel d0,d6 | copy signs into d6 (a) and d7(b) - bclr IMM (31),d0 | and clear signs in d0 and d2 - movel d2,d7 | - bclr IMM (31),d2 | - cmpl IMM (0x7ff00000),d0 | check for a == NaN - bhi Lcmpd$inop | if d0 > 0x7ff00000, a is NaN - beq Lcmpdf$a$nf | if equal can be INFINITY, so check d1 - movel d0,d4 | copy into d4 to test for zero - orl d1,d4 | - beq Lcmpdf$a$0 | -Lcmpdf$0: - cmpl IMM (0x7ff00000),d2 | check for b == NaN - bhi Lcmpd$inop | if d2 > 0x7ff00000, b is NaN - beq Lcmpdf$b$nf | if equal can be INFINITY, so check d3 - movel d2,d4 | - orl d3,d4 | - beq Lcmpdf$b$0 | -Lcmpdf$1: -| Check the signs - eorl d6,d7 - bpl 1f -| If the signs are not equal check if a >= 0 - tstl d6 - bpl Lcmpdf$a$gt$b | if (a >= 0 && b < 0) => a > b - bmi Lcmpdf$b$gt$a | if (a < 0 && b >= 0) => a < b -1: -| If the signs are equal check for < 0 - tstl d6 - bpl 1f -| If both are negative exchange them -#ifndef __mcoldfire__ - exg d0,d2 - exg d1,d3 -#else - movel d0,d7 - movel d2,d0 - movel d7,d2 - movel d1,d7 - movel d3,d1 - movel d7,d3 -#endif -1: -| Now that they are positive we just compare them as longs (does this also -| work for denormalized numbers?). - cmpl d0,d2 - bhi Lcmpdf$b$gt$a | |b| > |a| - bne Lcmpdf$a$gt$b | |b| < |a| -| If we got here d0 == d2, so we compare d1 and d3. - cmpl d1,d3 - bhi Lcmpdf$b$gt$a | |b| > |a| - bne Lcmpdf$a$gt$b | |b| < |a| -| If we got here a == b. - movel IMM (EQUAL),d0 -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | put back the registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts -Lcmpdf$a$gt$b: - movel IMM (GREATER),d0 -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | put back the registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts -Lcmpdf$b$gt$a: - movel IMM (LESS),d0 -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | put back the registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts - -Lcmpdf$a$0: - bclr IMM (31),d6 - bra Lcmpdf$0 -Lcmpdf$b$0: - bclr IMM (31),d7 - bra Lcmpdf$1 - -Lcmpdf$a$nf: - tstl d1 - bne Ld$inop - bra Lcmpdf$0 - -Lcmpdf$b$nf: - tstl d3 - bne Ld$inop - bra Lcmpdf$1 - -Lcmpd$inop: - movl a6@(24),d0 - moveq IMM (INEXACT_RESULT+INVALID_OPERATION),d7 - moveq IMM (DOUBLE_FLOAT),d6 - PICJUMP $_exception_handler - -| int __cmpdf2(double, double); - FUNC(__cmpdf2) -SYM (__cmpdf2): - link a6,IMM (0) - pea 1 - movl a6@(20),sp@- - movl a6@(16),sp@- - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpdf2_internal) - unlk a6 - rts - -|============================================================================= -| rounding routines -|============================================================================= - -| The rounding routines expect the number to be normalized in registers -| d0-d1-d2-d3, with the exponent in register d4. They assume that the -| exponent is larger or equal to 1. They return a properly normalized number -| if possible, and a denormalized number otherwise. The exponent is returned -| in d4. - -Lround$to$nearest: -| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"): -| Here we assume that the exponent is not too small (this should be checked -| before entering the rounding routine), but the number could be denormalized. - -| Check for denormalized numbers: -1: btst IMM (DBL_MANT_DIG-32),d0 - bne 2f | if set the number is normalized -| Normalize shifting left until bit #DBL_MANT_DIG-32 is set or the exponent -| is one (remember that a denormalized number corresponds to an -| exponent of -D_BIAS+1). -#ifndef __mcoldfire__ - cmpw IMM (1),d4 | remember that the exponent is at least one -#else - cmpl IMM (1),d4 | remember that the exponent is at least one -#endif - beq 2f | an exponent of one means denormalized - addl d3,d3 | else shift and adjust the exponent - addxl d2,d2 | - addxl d1,d1 | - addxl d0,d0 | -#ifndef __mcoldfire__ - dbra d4,1b | -#else - subql IMM (1), d4 - bpl 1b -#endif -2: -| Now round: we do it as follows: after the shifting we can write the -| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2. -| If delta < 1, do nothing. If delta > 1, add 1 to f. -| If delta == 1, we make sure the rounded number will be even (odd?) -| (after shifting). - btst IMM (0),d1 | is delta < 1? - beq 2f | if so, do not do anything - orl d2,d3 | is delta == 1? - bne 1f | if so round to even - movel d1,d3 | - andl IMM (2),d3 | bit 1 is the last significant bit - movel IMM (0),d2 | - addl d3,d1 | - addxl d2,d0 | - bra 2f | -1: movel IMM (1),d3 | else add 1 - movel IMM (0),d2 | - addl d3,d1 | - addxl d2,d0 -| Shift right once (because we used bit #DBL_MANT_DIG-32!). -2: -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 -#else - lsrl IMM (1),d1 - btst IMM (0),d0 - beq 10f - bset IMM (31),d1 -10: lsrl IMM (1),d0 -#endif - -| Now check again bit #DBL_MANT_DIG-32 (rounding could have produced a -| 'fraction overflow' ...). - btst IMM (DBL_MANT_DIG-32),d0 - beq 1f -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 - addw IMM (1),d4 -#else - lsrl IMM (1),d1 - btst IMM (0),d0 - beq 10f - bset IMM (31),d1 -10: lsrl IMM (1),d0 - addl IMM (1),d4 -#endif -1: -| If bit #DBL_MANT_DIG-32-1 is clear we have a denormalized number, so we -| have to put the exponent to zero and return a denormalized number. - btst IMM (DBL_MANT_DIG-32-1),d0 - beq 1f - jmp a0@ -1: movel IMM (0),d4 - jmp a0@ - -Lround$to$zero: -Lround$to$plus: -Lround$to$minus: - jmp a0@ -#endif /* L_double */ - -#ifdef L_float - - .globl SYM (_fpCCR) - .globl $_exception_handler - -QUIET_NaN = 0xffffffff -SIGNL_NaN = 0x7f800001 -INFINITY = 0x7f800000 - -F_MAX_EXP = 0xff -F_BIAS = 126 -FLT_MAX_EXP = F_MAX_EXP - F_BIAS -FLT_MIN_EXP = 1 - F_BIAS -FLT_MANT_DIG = 24 - -INEXACT_RESULT = 0x0001 -UNDERFLOW = 0x0002 -OVERFLOW = 0x0004 -DIVIDE_BY_ZERO = 0x0008 -INVALID_OPERATION = 0x0010 - -SINGLE_FLOAT = 1 - -NOOP = 0 -ADD = 1 -MULTIPLY = 2 -DIVIDE = 3 -NEGATE = 4 -COMPARE = 5 -EXTENDSFDF = 6 -TRUNCDFSF = 7 - -UNKNOWN = -1 -ROUND_TO_NEAREST = 0 | round result to nearest representable value -ROUND_TO_ZERO = 1 | round result towards zero -ROUND_TO_PLUS = 2 | round result towards plus infinity -ROUND_TO_MINUS = 3 | round result towards minus infinity - -| Entry points: - - .globl SYM (__addsf3) - .globl SYM (__subsf3) - .globl SYM (__mulsf3) - .globl SYM (__divsf3) - .globl SYM (__negsf2) - .globl SYM (__cmpsf2) - .globl SYM (__cmpsf2_internal) - .hidden SYM (__cmpsf2_internal) - -| These are common routines to return and signal exceptions. - - .text - .even - -Lf$den: -| Return and signal a denormalized number - orl d7,d0 - moveq IMM (INEXACT_RESULT+UNDERFLOW),d7 - moveq IMM (SINGLE_FLOAT),d6 - PICJUMP $_exception_handler - -Lf$infty: -Lf$overflow: -| Return a properly signed INFINITY and set the exception flags - movel IMM (INFINITY),d0 - orl d7,d0 - moveq IMM (INEXACT_RESULT+OVERFLOW),d7 - moveq IMM (SINGLE_FLOAT),d6 - PICJUMP $_exception_handler - -Lf$underflow: -| Return 0 and set the exception flags - moveq IMM (0),d0 - moveq IMM (INEXACT_RESULT+UNDERFLOW),d7 - moveq IMM (SINGLE_FLOAT),d6 - PICJUMP $_exception_handler - -Lf$inop: -| Return a quiet NaN and set the exception flags - movel IMM (QUIET_NaN),d0 - moveq IMM (INEXACT_RESULT+INVALID_OPERATION),d7 - moveq IMM (SINGLE_FLOAT),d6 - PICJUMP $_exception_handler - -Lf$div$0: -| Return a properly signed INFINITY and set the exception flags - movel IMM (INFINITY),d0 - orl d7,d0 - moveq IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7 - moveq IMM (SINGLE_FLOAT),d6 - PICJUMP $_exception_handler - -|============================================================================= -|============================================================================= -| single precision routines -|============================================================================= -|============================================================================= - -| A single precision floating point number (float) has the format: -| -| struct _float { -| unsigned int sign : 1; /* sign bit */ -| unsigned int exponent : 8; /* exponent, shifted by 126 */ -| unsigned int fraction : 23; /* fraction */ -| } float; -| -| Thus sizeof(float) = 4 (32 bits). -| -| All the routines are callable from C programs, and return the result -| in the single register d0. They also preserve all registers except -| d0-d1 and a0-a1. - -|============================================================================= -| __subsf3 -|============================================================================= - -| float __subsf3(float, float); - FUNC(__subsf3) -SYM (__subsf3): - bchg IMM (31),sp@(8) | change sign of second operand - | and fall through -|============================================================================= -| __addsf3 -|============================================================================= - -| float __addsf3(float, float); - FUNC(__addsf3) -SYM (__addsf3): -#ifndef __mcoldfire__ - link a6,IMM (0) | everything will be done in registers - moveml d2-d7,sp@- | save all data registers but d0-d1 -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - movel a6@(8),d0 | get first operand - movel a6@(12),d1 | get second operand - movel d0,a0 | get d0's sign bit ' - addl d0,d0 | check and clear sign bit of a - beq Laddsf$b | if zero return second operand - movel d1,a1 | save b's sign bit ' - addl d1,d1 | get rid of sign bit - beq Laddsf$a | if zero return first operand - -| Get the exponents and check for denormalized and/or infinity. - - movel IMM (0x00ffffff),d4 | mask to get fraction - movel IMM (0x01000000),d5 | mask to put hidden bit back - - movel d0,d6 | save a to get exponent - andl d4,d0 | get fraction in d0 - notl d4 | make d4 into a mask for the exponent - andl d4,d6 | get exponent in d6 - beq Laddsf$a$den | branch if a is denormalized - cmpl d4,d6 | check for INFINITY or NaN - beq Laddsf$nf - swap d6 | put exponent into first word - orl d5,d0 | and put hidden bit back -Laddsf$1: -| Now we have a's exponent in d6 (second byte) and the mantissa in d0. ' - movel d1,d7 | get exponent in d7 - andl d4,d7 | - beq Laddsf$b$den | branch if b is denormalized - cmpl d4,d7 | check for INFINITY or NaN - beq Laddsf$nf - swap d7 | put exponent into first word - notl d4 | make d4 into a mask for the fraction - andl d4,d1 | get fraction in d1 - orl d5,d1 | and put hidden bit back -Laddsf$2: -| Now we have b's exponent in d7 (second byte) and the mantissa in d1. ' - -| Note that the hidden bit corresponds to bit #FLT_MANT_DIG-1, and we -| shifted right once, so bit #FLT_MANT_DIG is set (so we have one extra -| bit). - - movel d1,d2 | move b to d2, since we want to use - | two registers to do the sum - movel IMM (0),d1 | and clear the new ones - movel d1,d3 | - -| Here we shift the numbers in registers d0 and d1 so the exponents are the -| same, and put the largest exponent in d6. Note that we are using two -| registers for each number (see the discussion by D. Knuth in "Seminumerical -| Algorithms"). -#ifndef __mcoldfire__ - cmpw d6,d7 | compare exponents -#else - cmpl d6,d7 | compare exponents -#endif - beq Laddsf$3 | if equal don't shift ' - bhi 5f | branch if second exponent largest -1: - subl d6,d7 | keep the largest exponent - negl d7 -#ifndef __mcoldfire__ - lsrw IMM (8),d7 | put difference in lower byte -#else - lsrl IMM (8),d7 | put difference in lower byte -#endif -| if difference is too large we don't shift (actually, we can just exit) ' -#ifndef __mcoldfire__ - cmpw IMM (FLT_MANT_DIG+2),d7 -#else - cmpl IMM (FLT_MANT_DIG+2),d7 -#endif - bge Laddsf$b$small -#ifndef __mcoldfire__ - cmpw IMM (16),d7 | if difference >= 16 swap -#else - cmpl IMM (16),d7 | if difference >= 16 swap -#endif - bge 4f -2: -#ifndef __mcoldfire__ - subw IMM (1),d7 -#else - subql IMM (1), d7 -#endif -3: -#ifndef __mcoldfire__ - lsrl IMM (1),d2 | shift right second operand - roxrl IMM (1),d3 - dbra d7,3b -#else - lsrl IMM (1),d3 - btst IMM (0),d2 - beq 10f - bset IMM (31),d3 -10: lsrl IMM (1),d2 - subql IMM (1), d7 - bpl 3b -#endif - bra Laddsf$3 -4: - movew d2,d3 - swap d3 - movew d3,d2 - swap d2 -#ifndef __mcoldfire__ - subw IMM (16),d7 -#else - subl IMM (16),d7 -#endif - bne 2b | if still more bits, go back to normal case - bra Laddsf$3 -5: -#ifndef __mcoldfire__ - exg d6,d7 | exchange the exponents -#else - eorl d6,d7 - eorl d7,d6 - eorl d6,d7 -#endif - subl d6,d7 | keep the largest exponent - negl d7 | -#ifndef __mcoldfire__ - lsrw IMM (8),d7 | put difference in lower byte -#else - lsrl IMM (8),d7 | put difference in lower byte -#endif -| if difference is too large we don't shift (and exit!) ' -#ifndef __mcoldfire__ - cmpw IMM (FLT_MANT_DIG+2),d7 -#else - cmpl IMM (FLT_MANT_DIG+2),d7 -#endif - bge Laddsf$a$small -#ifndef __mcoldfire__ - cmpw IMM (16),d7 | if difference >= 16 swap -#else - cmpl IMM (16),d7 | if difference >= 16 swap -#endif - bge 8f -6: -#ifndef __mcoldfire__ - subw IMM (1),d7 -#else - subl IMM (1),d7 -#endif -7: -#ifndef __mcoldfire__ - lsrl IMM (1),d0 | shift right first operand - roxrl IMM (1),d1 - dbra d7,7b -#else - lsrl IMM (1),d1 - btst IMM (0),d0 - beq 10f - bset IMM (31),d1 -10: lsrl IMM (1),d0 - subql IMM (1),d7 - bpl 7b -#endif - bra Laddsf$3 -8: - movew d0,d1 - swap d1 - movew d1,d0 - swap d0 -#ifndef __mcoldfire__ - subw IMM (16),d7 -#else - subl IMM (16),d7 -#endif - bne 6b | if still more bits, go back to normal case - | otherwise we fall through - -| Now we have a in d0-d1, b in d2-d3, and the largest exponent in d6 (the -| signs are stored in a0 and a1). - -Laddsf$3: -| Here we have to decide whether to add or subtract the numbers -#ifndef __mcoldfire__ - exg d6,a0 | get signs back - exg d7,a1 | and save the exponents -#else - movel d6,d4 - movel a0,d6 - movel d4,a0 - movel d7,d4 - movel a1,d7 - movel d4,a1 -#endif - eorl d6,d7 | combine sign bits - bmi Lsubsf$0 | if negative a and b have opposite - | sign so we actually subtract the - | numbers - -| Here we have both positive or both negative -#ifndef __mcoldfire__ - exg d6,a0 | now we have the exponent in d6 -#else - movel d6,d4 - movel a0,d6 - movel d4,a0 -#endif - movel a0,d7 | and sign in d7 - andl IMM (0x80000000),d7 -| Here we do the addition. - addl d3,d1 - addxl d2,d0 -| Note: now we have d2, d3, d4 and d5 to play with! - -| Put the exponent, in the first byte, in d2, to use the "standard" rounding -| routines: - movel d6,d2 -#ifndef __mcoldfire__ - lsrw IMM (8),d2 -#else - lsrl IMM (8),d2 -#endif - -| Before rounding normalize so bit #FLT_MANT_DIG is set (we will consider -| the case of denormalized numbers in the rounding routine itself). -| As in the addition (not in the subtraction!) we could have set -| one more bit we check this: - btst IMM (FLT_MANT_DIG+1),d0 - beq 1f -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 -#else - lsrl IMM (1),d1 - btst IMM (0),d0 - beq 10f - bset IMM (31),d1 -10: lsrl IMM (1),d0 -#endif - addl IMM (1),d2 -1: - lea pc@(Laddsf$4),a0 | to return from rounding routine - PICLEA SYM (_fpCCR),a1 | check the rounding mode -#ifdef __mcoldfire__ - clrl d6 -#endif - movew a1@(6),d6 | rounding mode in d6 - beq Lround$to$nearest -#ifndef __mcoldfire__ - cmpw IMM (ROUND_TO_PLUS),d6 -#else - cmpl IMM (ROUND_TO_PLUS),d6 -#endif - bhi Lround$to$minus - blt Lround$to$zero - bra Lround$to$plus -Laddsf$4: -| Put back the exponent, but check for overflow. -#ifndef __mcoldfire__ - cmpw IMM (0xff),d2 -#else - cmpl IMM (0xff),d2 -#endif - bhi 1f - bclr IMM (FLT_MANT_DIG-1),d0 -#ifndef __mcoldfire__ - lslw IMM (7),d2 -#else - lsll IMM (7),d2 -#endif - swap d2 - orl d2,d0 - bra Laddsf$ret -1: - moveq IMM (ADD),d5 - bra Lf$overflow - -Lsubsf$0: -| We are here if a > 0 and b < 0 (sign bits cleared). -| Here we do the subtraction. - movel d6,d7 | put sign in d7 - andl IMM (0x80000000),d7 - - subl d3,d1 | result in d0-d1 - subxl d2,d0 | - beq Laddsf$ret | if zero just exit - bpl 1f | if positive skip the following - bchg IMM (31),d7 | change sign bit in d7 - negl d1 - negxl d0 -1: -#ifndef __mcoldfire__ - exg d2,a0 | now we have the exponent in d2 - lsrw IMM (8),d2 | put it in the first byte -#else - movel d2,d4 - movel a0,d2 - movel d4,a0 - lsrl IMM (8),d2 | put it in the first byte -#endif - -| Now d0-d1 is positive and the sign bit is in d7. - -| Note that we do not have to normalize, since in the subtraction bit -| #FLT_MANT_DIG+1 is never set, and denormalized numbers are handled by -| the rounding routines themselves. - lea pc@(Lsubsf$1),a0 | to return from rounding routine - PICLEA SYM (_fpCCR),a1 | check the rounding mode -#ifdef __mcoldfire__ - clrl d6 -#endif - movew a1@(6),d6 | rounding mode in d6 - beq Lround$to$nearest -#ifndef __mcoldfire__ - cmpw IMM (ROUND_TO_PLUS),d6 -#else - cmpl IMM (ROUND_TO_PLUS),d6 -#endif - bhi Lround$to$minus - blt Lround$to$zero - bra Lround$to$plus -Lsubsf$1: -| Put back the exponent (we can't have overflow!). ' - bclr IMM (FLT_MANT_DIG-1),d0 -#ifndef __mcoldfire__ - lslw IMM (7),d2 -#else - lsll IMM (7),d2 -#endif - swap d2 - orl d2,d0 - bra Laddsf$ret - -| If one of the numbers was too small (difference of exponents >= -| FLT_MANT_DIG+2) we return the other (and now we don't have to ' -| check for finiteness or zero). -Laddsf$a$small: - movel a6@(12),d0 - PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | restore data registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 | and return - rts - -Laddsf$b$small: - movel a6@(8),d0 - PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | restore data registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 | and return - rts - -| If the numbers are denormalized remember to put exponent equal to 1. - -Laddsf$a$den: - movel d5,d6 | d5 contains 0x01000000 - swap d6 - bra Laddsf$1 - -Laddsf$b$den: - movel d5,d7 - swap d7 - notl d4 | make d4 into a mask for the fraction - | (this was not executed after the jump) - bra Laddsf$2 - -| The rest is mainly code for the different results which can be -| returned (checking always for +/-INFINITY and NaN). - -Laddsf$b: -| Return b (if a is zero). - movel a6@(12),d0 - cmpl IMM (0x80000000),d0 | Check if b is -0 - bne 1f - movel a0,d7 - andl IMM (0x80000000),d7 | Use the sign of a - clrl d0 - bra Laddsf$ret -Laddsf$a: -| Return a (if b is zero). - movel a6@(8),d0 -1: - moveq IMM (ADD),d5 -| We have to check for NaN and +/-infty. - movel d0,d7 - andl IMM (0x80000000),d7 | put sign in d7 - bclr IMM (31),d0 | clear sign - cmpl IMM (INFINITY),d0 | check for infty or NaN - bge 2f - movel d0,d0 | check for zero (we do this because we don't ' - bne Laddsf$ret | want to return -0 by mistake - bclr IMM (31),d7 | if zero be sure to clear sign - bra Laddsf$ret | if everything OK just return -2: -| The value to be returned is either +/-infty or NaN - andl IMM (0x007fffff),d0 | check for NaN - bne Lf$inop | if mantissa not zero is NaN - bra Lf$infty - -Laddsf$ret: -| Normal exit (a and b nonzero, result is not NaN nor +/-infty). -| We have to clear the exception flags (just the exception type). - PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ - orl d7,d0 | put sign bit -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | restore data registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 | and return - rts - -Laddsf$ret$den: -| Return a denormalized number (for addition we don't signal underflow) ' - lsrl IMM (1),d0 | remember to shift right back once - bra Laddsf$ret | and return - -| Note: when adding two floats of the same sign if either one is -| NaN we return NaN without regard to whether the other is finite or -| not. When subtracting them (i.e., when adding two numbers of -| opposite signs) things are more complicated: if both are INFINITY -| we return NaN, if only one is INFINITY and the other is NaN we return -| NaN, but if it is finite we return INFINITY with the corresponding sign. - -Laddsf$nf: - moveq IMM (ADD),d5 -| This could be faster but it is not worth the effort, since it is not -| executed very often. We sacrifice speed for clarity here. - movel a6@(8),d0 | get the numbers back (remember that we - movel a6@(12),d1 | did some processing already) - movel IMM (INFINITY),d4 | useful constant (INFINITY) - movel d0,d2 | save sign bits - movel d1,d3 - bclr IMM (31),d0 | clear sign bits - bclr IMM (31),d1 -| We know that one of them is either NaN of +/-INFINITY -| Check for NaN (if either one is NaN return NaN) - cmpl d4,d0 | check first a (d0) - bhi Lf$inop - cmpl d4,d1 | check now b (d1) - bhi Lf$inop -| Now comes the check for +/-INFINITY. We know that both are (maybe not -| finite) numbers, but we have to check if both are infinite whether we -| are adding or subtracting them. - eorl d3,d2 | to check sign bits - bmi 1f - movel d0,d7 - andl IMM (0x80000000),d7 | get (common) sign bit - bra Lf$infty -1: -| We know one (or both) are infinite, so we test for equality between the -| two numbers (if they are equal they have to be infinite both, so we -| return NaN). - cmpl d1,d0 | are both infinite? - beq Lf$inop | if so return NaN - - movel d0,d7 - andl IMM (0x80000000),d7 | get a's sign bit ' - cmpl d4,d0 | test now for infinity - beq Lf$infty | if a is INFINITY return with this sign - bchg IMM (31),d7 | else we know b is INFINITY and has - bra Lf$infty | the opposite sign - -|============================================================================= -| __mulsf3 -|============================================================================= - -| float __mulsf3(float, float); - FUNC(__mulsf3) -SYM (__mulsf3): -#ifndef __mcoldfire__ - link a6,IMM (0) - moveml d2-d7,sp@- -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - movel a6@(8),d0 | get a into d0 - movel a6@(12),d1 | and b into d1 - movel d0,d7 | d7 will hold the sign of the product - eorl d1,d7 | - andl IMM (0x80000000),d7 - movel IMM (INFINITY),d6 | useful constant (+INFINITY) - movel d6,d5 | another (mask for fraction) - notl d5 | - movel IMM (0x00800000),d4 | this is to put hidden bit back - bclr IMM (31),d0 | get rid of a's sign bit ' - movel d0,d2 | - beq Lmulsf$a$0 | branch if a is zero - bclr IMM (31),d1 | get rid of b's sign bit ' - movel d1,d3 | - beq Lmulsf$b$0 | branch if b is zero - cmpl d6,d0 | is a big? - bhi Lmulsf$inop | if a is NaN return NaN - beq Lmulsf$inf | if a is INFINITY we have to check b - cmpl d6,d1 | now compare b with INFINITY - bhi Lmulsf$inop | is b NaN? - beq Lmulsf$overflow | is b INFINITY? -| Here we have both numbers finite and nonzero (and with no sign bit). -| Now we get the exponents into d2 and d3. - andl d6,d2 | and isolate exponent in d2 - beq Lmulsf$a$den | if exponent is zero we have a denormalized - andl d5,d0 | and isolate fraction - orl d4,d0 | and put hidden bit back - swap d2 | I like exponents in the first byte -#ifndef __mcoldfire__ - lsrw IMM (7),d2 | -#else - lsrl IMM (7),d2 | -#endif -Lmulsf$1: | number - andl d6,d3 | - beq Lmulsf$b$den | - andl d5,d1 | - orl d4,d1 | - swap d3 | -#ifndef __mcoldfire__ - lsrw IMM (7),d3 | -#else - lsrl IMM (7),d3 | -#endif -Lmulsf$2: | -#ifndef __mcoldfire__ - addw d3,d2 | add exponents - subw IMM (F_BIAS+1),d2 | and subtract bias (plus one) -#else - addl d3,d2 | add exponents - subl IMM (F_BIAS+1),d2 | and subtract bias (plus one) -#endif - -| We are now ready to do the multiplication. The situation is as follows: -| both a and b have bit FLT_MANT_DIG-1 set (even if they were -| denormalized to start with!), which means that in the product -| bit 2*(FLT_MANT_DIG-1) (that is, bit 2*FLT_MANT_DIG-2-32 of the -| high long) is set. - -| To do the multiplication let us move the number a little bit around ... - movel d1,d6 | second operand in d6 - movel d0,d5 | first operand in d4-d5 - movel IMM (0),d4 - movel d4,d1 | the sums will go in d0-d1 - movel d4,d0 - -| now bit FLT_MANT_DIG-1 becomes bit 31: - lsll IMM (31-FLT_MANT_DIG+1),d6 - -| Start the loop (we loop #FLT_MANT_DIG times): - moveq IMM (FLT_MANT_DIG-1),d3 -1: addl d1,d1 | shift sum - addxl d0,d0 - lsll IMM (1),d6 | get bit bn - bcc 2f | if not set skip sum - addl d5,d1 | add a - addxl d4,d0 -2: -#ifndef __mcoldfire__ - dbf d3,1b | loop back -#else - subql IMM (1),d3 - bpl 1b -#endif - -| Now we have the product in d0-d1, with bit (FLT_MANT_DIG - 1) + FLT_MANT_DIG -| (mod 32) of d0 set. The first thing to do now is to normalize it so bit -| FLT_MANT_DIG is set (to do the rounding). -#ifndef __mcoldfire__ - rorl IMM (6),d1 - swap d1 - movew d1,d3 - andw IMM (0x03ff),d3 - andw IMM (0xfd00),d1 -#else - movel d1,d3 - lsll IMM (8),d1 - addl d1,d1 - addl d1,d1 - moveq IMM (22),d5 - lsrl d5,d3 - orl d3,d1 - andl IMM (0xfffffd00),d1 -#endif - lsll IMM (8),d0 - addl d0,d0 - addl d0,d0 -#ifndef __mcoldfire__ - orw d3,d0 -#else - orl d3,d0 -#endif - - moveq IMM (MULTIPLY),d5 - - btst IMM (FLT_MANT_DIG+1),d0 - beq Lround$exit -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 - addw IMM (1),d2 -#else - lsrl IMM (1),d1 - btst IMM (0),d0 - beq 10f - bset IMM (31),d1 -10: lsrl IMM (1),d0 - addql IMM (1),d2 -#endif - bra Lround$exit - -Lmulsf$inop: - moveq IMM (MULTIPLY),d5 - bra Lf$inop - -Lmulsf$overflow: - moveq IMM (MULTIPLY),d5 - bra Lf$overflow - -Lmulsf$inf: - moveq IMM (MULTIPLY),d5 -| If either is NaN return NaN; else both are (maybe infinite) numbers, so -| return INFINITY with the correct sign (which is in d7). - cmpl d6,d1 | is b NaN? - bhi Lf$inop | if so return NaN - bra Lf$overflow | else return +/-INFINITY - -| If either number is zero return zero, unless the other is +/-INFINITY, -| or NaN, in which case we return NaN. -Lmulsf$b$0: -| Here d1 (==b) is zero. - movel a6@(8),d1 | get a again to check for non-finiteness - bra 1f -Lmulsf$a$0: - movel a6@(12),d1 | get b again to check for non-finiteness -1: bclr IMM (31),d1 | clear sign bit - cmpl IMM (INFINITY),d1 | and check for a large exponent - bge Lf$inop | if b is +/-INFINITY or NaN return NaN - movel d7,d0 | else return signed zero - PICLEA SYM (_fpCCR),a0 | - movew IMM (0),a0@ | -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 | - rts | - -| If a number is denormalized we put an exponent of 1 but do not put the -| hidden bit back into the fraction; instead we shift left until bit 23 -| (the hidden bit) is set, adjusting the exponent accordingly. We do this -| to ensure that the product of the fractions is close to 1. -Lmulsf$a$den: - movel IMM (1),d2 - andl d5,d0 -1: addl d0,d0 | shift a left (until bit 23 is set) -#ifndef __mcoldfire__ - subw IMM (1),d2 | and adjust exponent -#else - subql IMM (1),d2 | and adjust exponent -#endif - btst IMM (FLT_MANT_DIG-1),d0 - bne Lmulsf$1 | - bra 1b | else loop back - -Lmulsf$b$den: - movel IMM (1),d3 - andl d5,d1 -1: addl d1,d1 | shift b left until bit 23 is set -#ifndef __mcoldfire__ - subw IMM (1),d3 | and adjust exponent -#else - subql IMM (1),d3 | and adjust exponent -#endif - btst IMM (FLT_MANT_DIG-1),d1 - bne Lmulsf$2 | - bra 1b | else loop back - -|============================================================================= -| __divsf3 -|============================================================================= - -| float __divsf3(float, float); - FUNC(__divsf3) -SYM (__divsf3): -#ifndef __mcoldfire__ - link a6,IMM (0) - moveml d2-d7,sp@- -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - movel a6@(8),d0 | get a into d0 - movel a6@(12),d1 | and b into d1 - movel d0,d7 | d7 will hold the sign of the result - eorl d1,d7 | - andl IMM (0x80000000),d7 | - movel IMM (INFINITY),d6 | useful constant (+INFINITY) - movel d6,d5 | another (mask for fraction) - notl d5 | - movel IMM (0x00800000),d4 | this is to put hidden bit back - bclr IMM (31),d0 | get rid of a's sign bit ' - movel d0,d2 | - beq Ldivsf$a$0 | branch if a is zero - bclr IMM (31),d1 | get rid of b's sign bit ' - movel d1,d3 | - beq Ldivsf$b$0 | branch if b is zero - cmpl d6,d0 | is a big? - bhi Ldivsf$inop | if a is NaN return NaN - beq Ldivsf$inf | if a is INFINITY we have to check b - cmpl d6,d1 | now compare b with INFINITY - bhi Ldivsf$inop | if b is NaN return NaN - beq Ldivsf$underflow -| Here we have both numbers finite and nonzero (and with no sign bit). -| Now we get the exponents into d2 and d3 and normalize the numbers to -| ensure that the ratio of the fractions is close to 1. We do this by -| making sure that bit #FLT_MANT_DIG-1 (hidden bit) is set. - andl d6,d2 | and isolate exponent in d2 - beq Ldivsf$a$den | if exponent is zero we have a denormalized - andl d5,d0 | and isolate fraction - orl d4,d0 | and put hidden bit back - swap d2 | I like exponents in the first byte -#ifndef __mcoldfire__ - lsrw IMM (7),d2 | -#else - lsrl IMM (7),d2 | -#endif -Ldivsf$1: | - andl d6,d3 | - beq Ldivsf$b$den | - andl d5,d1 | - orl d4,d1 | - swap d3 | -#ifndef __mcoldfire__ - lsrw IMM (7),d3 | -#else - lsrl IMM (7),d3 | -#endif -Ldivsf$2: | -#ifndef __mcoldfire__ - subw d3,d2 | subtract exponents - addw IMM (F_BIAS),d2 | and add bias -#else - subl d3,d2 | subtract exponents - addl IMM (F_BIAS),d2 | and add bias -#endif - -| We are now ready to do the division. We have prepared things in such a way -| that the ratio of the fractions will be less than 2 but greater than 1/2. -| At this point the registers in use are: -| d0 holds a (first operand, bit FLT_MANT_DIG=0, bit FLT_MANT_DIG-1=1) -| d1 holds b (second operand, bit FLT_MANT_DIG=1) -| d2 holds the difference of the exponents, corrected by the bias -| d7 holds the sign of the ratio -| d4, d5, d6 hold some constants - movel d7,a0 | d6-d7 will hold the ratio of the fractions - movel IMM (0),d6 | - movel d6,d7 - - moveq IMM (FLT_MANT_DIG+1),d3 -1: cmpl d0,d1 | is a < b? - bhi 2f | - bset d3,d6 | set a bit in d6 - subl d1,d0 | if a >= b a <-- a-b - beq 3f | if a is zero, exit -2: addl d0,d0 | multiply a by 2 -#ifndef __mcoldfire__ - dbra d3,1b -#else - subql IMM (1),d3 - bpl 1b -#endif - -| Now we keep going to set the sticky bit ... - moveq IMM (FLT_MANT_DIG),d3 -1: cmpl d0,d1 - ble 2f - addl d0,d0 -#ifndef __mcoldfire__ - dbra d3,1b -#else - subql IMM(1),d3 - bpl 1b -#endif - movel IMM (0),d1 - bra 3f -2: movel IMM (0),d1 -#ifndef __mcoldfire__ - subw IMM (FLT_MANT_DIG),d3 - addw IMM (31),d3 -#else - subl IMM (FLT_MANT_DIG),d3 - addl IMM (31),d3 -#endif - bset d3,d1 -3: - movel d6,d0 | put the ratio in d0-d1 - movel a0,d7 | get sign back - -| Because of the normalization we did before we are guaranteed that -| d0 is smaller than 2^26 but larger than 2^24. Thus bit 26 is not set, -| bit 25 could be set, and if it is not set then bit 24 is necessarily set. - btst IMM (FLT_MANT_DIG+1),d0 - beq 1f | if it is not set, then bit 24 is set - lsrl IMM (1),d0 | -#ifndef __mcoldfire__ - addw IMM (1),d2 | -#else - addl IMM (1),d2 | -#endif -1: -| Now round, check for over- and underflow, and exit. - moveq IMM (DIVIDE),d5 - bra Lround$exit - -Ldivsf$inop: - moveq IMM (DIVIDE),d5 - bra Lf$inop - -Ldivsf$overflow: - moveq IMM (DIVIDE),d5 - bra Lf$overflow - -Ldivsf$underflow: - moveq IMM (DIVIDE),d5 - bra Lf$underflow - -Ldivsf$a$0: - moveq IMM (DIVIDE),d5 -| If a is zero check to see whether b is zero also. In that case return -| NaN; then check if b is NaN, and return NaN also in that case. Else -| return a properly signed zero. - andl IMM (0x7fffffff),d1 | clear sign bit and test b - beq Lf$inop | if b is also zero return NaN - cmpl IMM (INFINITY),d1 | check for NaN - bhi Lf$inop | - movel d7,d0 | else return signed zero - PICLEA SYM (_fpCCR),a0 | - movew IMM (0),a0@ | -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | -#else - moveml sp@,d2-d7 | - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 | - rts | - -Ldivsf$b$0: - moveq IMM (DIVIDE),d5 -| If we got here a is not zero. Check if a is NaN; in that case return NaN, -| else return +/-INFINITY. Remember that a is in d0 with the sign bit -| cleared already. - cmpl IMM (INFINITY),d0 | compare d0 with INFINITY - bhi Lf$inop | if larger it is NaN - bra Lf$div$0 | else signal DIVIDE_BY_ZERO - -Ldivsf$inf: - moveq IMM (DIVIDE),d5 -| If a is INFINITY we have to check b - cmpl IMM (INFINITY),d1 | compare b with INFINITY - bge Lf$inop | if b is NaN or INFINITY return NaN - bra Lf$overflow | else return overflow - -| If a number is denormalized we put an exponent of 1 but do not put the -| bit back into the fraction. -Ldivsf$a$den: - movel IMM (1),d2 - andl d5,d0 -1: addl d0,d0 | shift a left until bit FLT_MANT_DIG-1 is set -#ifndef __mcoldfire__ - subw IMM (1),d2 | and adjust exponent -#else - subl IMM (1),d2 | and adjust exponent -#endif - btst IMM (FLT_MANT_DIG-1),d0 - bne Ldivsf$1 - bra 1b - -Ldivsf$b$den: - movel IMM (1),d3 - andl d5,d1 -1: addl d1,d1 | shift b left until bit FLT_MANT_DIG is set -#ifndef __mcoldfire__ - subw IMM (1),d3 | and adjust exponent -#else - subl IMM (1),d3 | and adjust exponent -#endif - btst IMM (FLT_MANT_DIG-1),d1 - bne Ldivsf$2 - bra 1b - -Lround$exit: -| This is a common exit point for __mulsf3 and __divsf3. - -| First check for underlow in the exponent: -#ifndef __mcoldfire__ - cmpw IMM (-FLT_MANT_DIG-1),d2 -#else - cmpl IMM (-FLT_MANT_DIG-1),d2 -#endif - blt Lf$underflow -| It could happen that the exponent is less than 1, in which case the -| number is denormalized. In this case we shift right and adjust the -| exponent until it becomes 1 or the fraction is zero (in the latter case -| we signal underflow and return zero). - movel IMM (0),d6 | d6 is used temporarily -#ifndef __mcoldfire__ - cmpw IMM (1),d2 | if the exponent is less than 1 we -#else - cmpl IMM (1),d2 | if the exponent is less than 1 we -#endif - bge 2f | have to shift right (denormalize) -1: -#ifndef __mcoldfire__ - addw IMM (1),d2 | adjust the exponent - lsrl IMM (1),d0 | shift right once - roxrl IMM (1),d1 | - roxrl IMM (1),d6 | d6 collect bits we would lose otherwise - cmpw IMM (1),d2 | is the exponent 1 already? -#else - addql IMM (1),d2 | adjust the exponent - lsrl IMM (1),d6 - btst IMM (0),d1 - beq 11f - bset IMM (31),d6 -11: lsrl IMM (1),d1 - btst IMM (0),d0 - beq 10f - bset IMM (31),d1 -10: lsrl IMM (1),d0 - cmpl IMM (1),d2 | is the exponent 1 already? -#endif - beq 2f | if not loop back - bra 1b | - bra Lf$underflow | safety check, shouldn't execute ' -2: orl d6,d1 | this is a trick so we don't lose ' - | the extra bits which were flushed right -| Now call the rounding routine (which takes care of denormalized numbers): - lea pc@(Lround$0),a0 | to return from rounding routine - PICLEA SYM (_fpCCR),a1 | check the rounding mode -#ifdef __mcoldfire__ - clrl d6 -#endif - movew a1@(6),d6 | rounding mode in d6 - beq Lround$to$nearest -#ifndef __mcoldfire__ - cmpw IMM (ROUND_TO_PLUS),d6 -#else - cmpl IMM (ROUND_TO_PLUS),d6 -#endif - bhi Lround$to$minus - blt Lround$to$zero - bra Lround$to$plus -Lround$0: -| Here we have a correctly rounded result (either normalized or denormalized). - -| Here we should have either a normalized number or a denormalized one, and -| the exponent is necessarily larger or equal to 1 (so we don't have to ' -| check again for underflow!). We have to check for overflow or for a -| denormalized number (which also signals underflow). -| Check for overflow (i.e., exponent >= 255). -#ifndef __mcoldfire__ - cmpw IMM (0x00ff),d2 -#else - cmpl IMM (0x00ff),d2 -#endif - bge Lf$overflow -| Now check for a denormalized number (exponent==0). - movew d2,d2 - beq Lf$den -1: -| Put back the exponents and sign and return. -#ifndef __mcoldfire__ - lslw IMM (7),d2 | exponent back to fourth byte -#else - lsll IMM (7),d2 | exponent back to fourth byte -#endif - bclr IMM (FLT_MANT_DIG-1),d0 - swap d0 | and put back exponent -#ifndef __mcoldfire__ - orw d2,d0 | -#else - orl d2,d0 -#endif - swap d0 | - orl d7,d0 | and sign also - - PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts - -|============================================================================= -| __negsf2 -|============================================================================= - -| This is trivial and could be shorter if we didn't bother checking for NaN ' -| and +/-INFINITY. - -| float __negsf2(float); - FUNC(__negsf2) -SYM (__negsf2): -#ifndef __mcoldfire__ - link a6,IMM (0) - moveml d2-d7,sp@- -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - moveq IMM (NEGATE),d5 - movel a6@(8),d0 | get number to negate in d0 - bchg IMM (31),d0 | negate - movel d0,d1 | make a positive copy - bclr IMM (31),d1 | - tstl d1 | check for zero - beq 2f | if zero (either sign) return +zero - cmpl IMM (INFINITY),d1 | compare to +INFINITY - blt 1f | - bhi Lf$inop | if larger (fraction not zero) is NaN - movel d0,d7 | else get sign and return INFINITY - andl IMM (0x80000000),d7 - bra Lf$infty -1: PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts -2: bclr IMM (31),d0 - bra 1b - -|============================================================================= -| __cmpsf2 -|============================================================================= - -GREATER = 1 -LESS = -1 -EQUAL = 0 - -| int __cmpsf2_internal(float, float, int); -SYM (__cmpsf2_internal): -#ifndef __mcoldfire__ - link a6,IMM (0) - moveml d2-d7,sp@- | save registers -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - moveq IMM (COMPARE),d5 - movel a6@(8),d0 | get first operand - movel a6@(12),d1 | get second operand -| Check if either is NaN, and in that case return garbage and signal -| INVALID_OPERATION. Check also if either is zero, and clear the signs -| if necessary. - movel d0,d6 - andl IMM (0x7fffffff),d0 - beq Lcmpsf$a$0 - cmpl IMM (0x7f800000),d0 - bhi Lcmpf$inop -Lcmpsf$1: - movel d1,d7 - andl IMM (0x7fffffff),d1 - beq Lcmpsf$b$0 - cmpl IMM (0x7f800000),d1 - bhi Lcmpf$inop -Lcmpsf$2: -| Check the signs - eorl d6,d7 - bpl 1f -| If the signs are not equal check if a >= 0 - tstl d6 - bpl Lcmpsf$a$gt$b | if (a >= 0 && b < 0) => a > b - bmi Lcmpsf$b$gt$a | if (a < 0 && b >= 0) => a < b -1: -| If the signs are equal check for < 0 - tstl d6 - bpl 1f -| If both are negative exchange them -#ifndef __mcoldfire__ - exg d0,d1 -#else - movel d0,d7 - movel d1,d0 - movel d7,d1 -#endif -1: -| Now that they are positive we just compare them as longs (does this also -| work for denormalized numbers?). - cmpl d0,d1 - bhi Lcmpsf$b$gt$a | |b| > |a| - bne Lcmpsf$a$gt$b | |b| < |a| -| If we got here a == b. - movel IMM (EQUAL),d0 -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | put back the registers -#else - moveml sp@,d2-d7 -#endif - unlk a6 - rts -Lcmpsf$a$gt$b: - movel IMM (GREATER),d0 -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | put back the registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts -Lcmpsf$b$gt$a: - movel IMM (LESS),d0 -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | put back the registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts - -Lcmpsf$a$0: - bclr IMM (31),d6 - bra Lcmpsf$1 -Lcmpsf$b$0: - bclr IMM (31),d7 - bra Lcmpsf$2 - -Lcmpf$inop: - movl a6@(16),d0 - moveq IMM (INEXACT_RESULT+INVALID_OPERATION),d7 - moveq IMM (SINGLE_FLOAT),d6 - PICJUMP $_exception_handler - -| int __cmpsf2(float, float); - FUNC(__cmpsf2) -SYM (__cmpsf2): - link a6,IMM (0) - pea 1 - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpsf2_internal) - unlk a6 - rts - -|============================================================================= -| rounding routines -|============================================================================= - -| The rounding routines expect the number to be normalized in registers -| d0-d1, with the exponent in register d2. They assume that the -| exponent is larger or equal to 1. They return a properly normalized number -| if possible, and a denormalized number otherwise. The exponent is returned -| in d2. - -Lround$to$nearest: -| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"): -| Here we assume that the exponent is not too small (this should be checked -| before entering the rounding routine), but the number could be denormalized. - -| Check for denormalized numbers: -1: btst IMM (FLT_MANT_DIG),d0 - bne 2f | if set the number is normalized -| Normalize shifting left until bit #FLT_MANT_DIG is set or the exponent -| is one (remember that a denormalized number corresponds to an -| exponent of -F_BIAS+1). -#ifndef __mcoldfire__ - cmpw IMM (1),d2 | remember that the exponent is at least one -#else - cmpl IMM (1),d2 | remember that the exponent is at least one -#endif - beq 2f | an exponent of one means denormalized - addl d1,d1 | else shift and adjust the exponent - addxl d0,d0 | -#ifndef __mcoldfire__ - dbra d2,1b | -#else - subql IMM (1),d2 - bpl 1b -#endif -2: -| Now round: we do it as follows: after the shifting we can write the -| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2. -| If delta < 1, do nothing. If delta > 1, add 1 to f. -| If delta == 1, we make sure the rounded number will be even (odd?) -| (after shifting). - btst IMM (0),d0 | is delta < 1? - beq 2f | if so, do not do anything - tstl d1 | is delta == 1? - bne 1f | if so round to even - movel d0,d1 | - andl IMM (2),d1 | bit 1 is the last significant bit - addl d1,d0 | - bra 2f | -1: movel IMM (1),d1 | else add 1 - addl d1,d0 | -| Shift right once (because we used bit #FLT_MANT_DIG!). -2: lsrl IMM (1),d0 -| Now check again bit #FLT_MANT_DIG (rounding could have produced a -| 'fraction overflow' ...). - btst IMM (FLT_MANT_DIG),d0 - beq 1f - lsrl IMM (1),d0 -#ifndef __mcoldfire__ - addw IMM (1),d2 -#else - addql IMM (1),d2 -#endif -1: -| If bit #FLT_MANT_DIG-1 is clear we have a denormalized number, so we -| have to put the exponent to zero and return a denormalized number. - btst IMM (FLT_MANT_DIG-1),d0 - beq 1f - jmp a0@ -1: movel IMM (0),d2 - jmp a0@ - -Lround$to$zero: -Lround$to$plus: -Lround$to$minus: - jmp a0@ -#endif /* L_float */ - -| gcc expects the routines __eqdf2, __nedf2, __gtdf2, __gedf2, -| __ledf2, __ltdf2 to all return the same value as a direct call to -| __cmpdf2 would. In this implementation, each of these routines -| simply calls __cmpdf2. It would be more efficient to give the -| __cmpdf2 routine several names, but separating them out will make it -| easier to write efficient versions of these routines someday. -| If the operands recompare unordered unordered __gtdf2 and __gedf2 return -1. -| The other routines return 1. - -#ifdef L_eqdf2 - .text - FUNC(__eqdf2) - .globl SYM (__eqdf2) -SYM (__eqdf2): - link a6,IMM (0) - pea 1 - movl a6@(20),sp@- - movl a6@(16),sp@- - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpdf2_internal) - unlk a6 - rts -#endif /* L_eqdf2 */ - -#ifdef L_nedf2 - .text - FUNC(__nedf2) - .globl SYM (__nedf2) -SYM (__nedf2): - link a6,IMM (0) - pea 1 - movl a6@(20),sp@- - movl a6@(16),sp@- - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpdf2_internal) - unlk a6 - rts -#endif /* L_nedf2 */ - -#ifdef L_gtdf2 - .text - FUNC(__gtdf2) - .globl SYM (__gtdf2) -SYM (__gtdf2): - link a6,IMM (0) - pea -1 - movl a6@(20),sp@- - movl a6@(16),sp@- - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpdf2_internal) - unlk a6 - rts -#endif /* L_gtdf2 */ - -#ifdef L_gedf2 - .text - FUNC(__gedf2) - .globl SYM (__gedf2) -SYM (__gedf2): - link a6,IMM (0) - pea -1 - movl a6@(20),sp@- - movl a6@(16),sp@- - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpdf2_internal) - unlk a6 - rts -#endif /* L_gedf2 */ - -#ifdef L_ltdf2 - .text - FUNC(__ltdf2) - .globl SYM (__ltdf2) -SYM (__ltdf2): - link a6,IMM (0) - pea 1 - movl a6@(20),sp@- - movl a6@(16),sp@- - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpdf2_internal) - unlk a6 - rts -#endif /* L_ltdf2 */ - -#ifdef L_ledf2 - .text - FUNC(__ledf2) - .globl SYM (__ledf2) -SYM (__ledf2): - link a6,IMM (0) - pea 1 - movl a6@(20),sp@- - movl a6@(16),sp@- - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpdf2_internal) - unlk a6 - rts -#endif /* L_ledf2 */ - -| The comments above about __eqdf2, et. al., also apply to __eqsf2, -| et. al., except that the latter call __cmpsf2 rather than __cmpdf2. - -#ifdef L_eqsf2 - .text - FUNC(__eqsf2) - .globl SYM (__eqsf2) -SYM (__eqsf2): - link a6,IMM (0) - pea 1 - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpsf2_internal) - unlk a6 - rts -#endif /* L_eqsf2 */ - -#ifdef L_nesf2 - .text - FUNC(__nesf2) - .globl SYM (__nesf2) -SYM (__nesf2): - link a6,IMM (0) - pea 1 - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpsf2_internal) - unlk a6 - rts -#endif /* L_nesf2 */ - -#ifdef L_gtsf2 - .text - FUNC(__gtsf2) - .globl SYM (__gtsf2) -SYM (__gtsf2): - link a6,IMM (0) - pea -1 - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpsf2_internal) - unlk a6 - rts -#endif /* L_gtsf2 */ - -#ifdef L_gesf2 - .text - FUNC(__gesf2) - .globl SYM (__gesf2) -SYM (__gesf2): - link a6,IMM (0) - pea -1 - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpsf2_internal) - unlk a6 - rts -#endif /* L_gesf2 */ - -#ifdef L_ltsf2 - .text - FUNC(__ltsf2) - .globl SYM (__ltsf2) -SYM (__ltsf2): - link a6,IMM (0) - pea 1 - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpsf2_internal) - unlk a6 - rts -#endif /* L_ltsf2 */ - -#ifdef L_lesf2 - .text - FUNC(__lesf2) - .globl SYM (__lesf2) -SYM (__lesf2): - link a6,IMM (0) - pea 1 - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpsf2_internal) - unlk a6 - rts -#endif /* L_lesf2 */ - -#if defined (__ELF__) && defined (__linux__) - /* Make stack non-executable for ELF linux targets. */ - .section .note.GNU-stack,"",@progbits -#endif diff --git a/gcc/config/m68k/t-floatlib b/gcc/config/m68k/t-floatlib index 2039d1d0dc4..23734be40bd 100644 --- a/gcc/config/m68k/t-floatlib +++ b/gcc/config/m68k/t-floatlib @@ -1,4 +1,4 @@ -# Copyright (C) 2007 Free Software Foundation, Inc. +# Copyright (C) 2007, 2011 Free Software Foundation, Inc. # # This file is part of GCC. # @@ -16,12 +16,6 @@ # along with GCC; see the file COPYING3. If not see # . -LIB1ASMSRC = m68k/lb1sf68.asm -LIB1ASMFUNCS = _mulsi3 _udivsi3 _divsi3 _umodsi3 _modsi3 \ - _double _float _floatex \ - _eqdf2 _nedf2 _gtdf2 _gedf2 _ltdf2 _ledf2 \ - _eqsf2 _nesf2 _gtsf2 _gesf2 _ltsf2 _lesf2 - LIB2FUNCS_EXTRA = fpgnulib.c xfgnulib.c fpgnulib.c: $(srcdir)/config/m68k/fpgnulib.c diff --git a/gcc/config/mcore/lib1.asm b/gcc/config/mcore/lib1.asm deleted file mode 100644 index 701762f2a3c..00000000000 --- a/gcc/config/mcore/lib1.asm +++ /dev/null @@ -1,303 +0,0 @@ -/* libgcc routines for the MCore. - Copyright (C) 1993, 1999, 2000, 2009 Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ - -#define CONCAT1(a, b) CONCAT2(a, b) -#define CONCAT2(a, b) a ## b - -/* Use the right prefix for global labels. */ - -#define SYM(x) CONCAT1 (__, x) - -#ifdef __ELF__ -#define TYPE(x) .type SYM (x),@function -#define SIZE(x) .size SYM (x), . - SYM (x) -#else -#define TYPE(x) -#define SIZE(x) -#endif - -.macro FUNC_START name - .text - .globl SYM (\name) - TYPE (\name) -SYM (\name): -.endm - -.macro FUNC_END name - SIZE (\name) -.endm - -#ifdef L_udivsi3 -FUNC_START udiv32 -FUNC_START udivsi32 - - movi r1,0 // r1-r2 form 64 bit dividend - movi r4,1 // r4 is quotient (1 for a sentinel) - - cmpnei r3,0 // look for 0 divisor - bt 9f - trap 3 // divide by 0 -9: - // control iterations; skip across high order 0 bits in dividend - mov r7,r2 - cmpnei r7,0 - bt 8f - movi r2,0 // 0 dividend - jmp r15 // quick return -8: - ff1 r7 // figure distance to skip - lsl r4,r7 // move the sentinel along (with 0's behind) - lsl r2,r7 // and the low 32 bits of numerator - -// appears to be wrong... -// tested out incorrectly in our OS work... -// mov r7,r3 // looking at divisor -// ff1 r7 // I can move 32-r7 more bits to left. -// addi r7,1 // ok, one short of that... -// mov r1,r2 -// lsr r1,r7 // bits that came from low order... -// rsubi r7,31 // r7 == "32-n" == LEFT distance -// addi r7,1 // this is (32-n) -// lsl r4,r7 // fixes the high 32 (quotient) -// lsl r2,r7 -// cmpnei r4,0 -// bf 4f // the sentinel went away... - - // run the remaining bits - -1: lslc r2,1 // 1 bit left shift of r1-r2 - addc r1,r1 - cmphs r1,r3 // upper 32 of dividend >= divisor? - bf 2f - sub r1,r3 // if yes, subtract divisor -2: addc r4,r4 // shift by 1 and count subtracts - bf 1b // if sentinel falls out of quotient, stop - -4: mov r2,r4 // return quotient - mov r3,r1 // and piggyback the remainder - jmp r15 -FUNC_END udiv32 -FUNC_END udivsi32 -#endif - -#ifdef L_umodsi3 -FUNC_START urem32 -FUNC_START umodsi3 - movi r1,0 // r1-r2 form 64 bit dividend - movi r4,1 // r4 is quotient (1 for a sentinel) - cmpnei r3,0 // look for 0 divisor - bt 9f - trap 3 // divide by 0 -9: - // control iterations; skip across high order 0 bits in dividend - mov r7,r2 - cmpnei r7,0 - bt 8f - movi r2,0 // 0 dividend - jmp r15 // quick return -8: - ff1 r7 // figure distance to skip - lsl r4,r7 // move the sentinel along (with 0's behind) - lsl r2,r7 // and the low 32 bits of numerator - -1: lslc r2,1 // 1 bit left shift of r1-r2 - addc r1,r1 - cmphs r1,r3 // upper 32 of dividend >= divisor? - bf 2f - sub r1,r3 // if yes, subtract divisor -2: addc r4,r4 // shift by 1 and count subtracts - bf 1b // if sentinel falls out of quotient, stop - mov r2,r1 // return remainder - jmp r15 -FUNC_END urem32 -FUNC_END umodsi3 -#endif - -#ifdef L_divsi3 -FUNC_START div32 -FUNC_START divsi3 - mov r5,r2 // calc sign of quotient - xor r5,r3 - abs r2 // do unsigned divide - abs r3 - movi r1,0 // r1-r2 form 64 bit dividend - movi r4,1 // r4 is quotient (1 for a sentinel) - cmpnei r3,0 // look for 0 divisor - bt 9f - trap 3 // divide by 0 -9: - // control iterations; skip across high order 0 bits in dividend - mov r7,r2 - cmpnei r7,0 - bt 8f - movi r2,0 // 0 dividend - jmp r15 // quick return -8: - ff1 r7 // figure distance to skip - lsl r4,r7 // move the sentinel along (with 0's behind) - lsl r2,r7 // and the low 32 bits of numerator - -// tested out incorrectly in our OS work... -// mov r7,r3 // looking at divisor -// ff1 r7 // I can move 32-r7 more bits to left. -// addi r7,1 // ok, one short of that... -// mov r1,r2 -// lsr r1,r7 // bits that came from low order... -// rsubi r7,31 // r7 == "32-n" == LEFT distance -// addi r7,1 // this is (32-n) -// lsl r4,r7 // fixes the high 32 (quotient) -// lsl r2,r7 -// cmpnei r4,0 -// bf 4f // the sentinel went away... - - // run the remaining bits -1: lslc r2,1 // 1 bit left shift of r1-r2 - addc r1,r1 - cmphs r1,r3 // upper 32 of dividend >= divisor? - bf 2f - sub r1,r3 // if yes, subtract divisor -2: addc r4,r4 // shift by 1 and count subtracts - bf 1b // if sentinel falls out of quotient, stop - -4: mov r2,r4 // return quotient - mov r3,r1 // piggyback the remainder - btsti r5,31 // after adjusting for sign - bf 3f - rsubi r2,0 - rsubi r3,0 -3: jmp r15 -FUNC_END div32 -FUNC_END divsi3 -#endif - -#ifdef L_modsi3 -FUNC_START rem32 -FUNC_START modsi3 - mov r5,r2 // calc sign of remainder - abs r2 // do unsigned divide - abs r3 - movi r1,0 // r1-r2 form 64 bit dividend - movi r4,1 // r4 is quotient (1 for a sentinel) - cmpnei r3,0 // look for 0 divisor - bt 9f - trap 3 // divide by 0 -9: - // control iterations; skip across high order 0 bits in dividend - mov r7,r2 - cmpnei r7,0 - bt 8f - movi r2,0 // 0 dividend - jmp r15 // quick return -8: - ff1 r7 // figure distance to skip - lsl r4,r7 // move the sentinel along (with 0's behind) - lsl r2,r7 // and the low 32 bits of numerator - -1: lslc r2,1 // 1 bit left shift of r1-r2 - addc r1,r1 - cmphs r1,r3 // upper 32 of dividend >= divisor? - bf 2f - sub r1,r3 // if yes, subtract divisor -2: addc r4,r4 // shift by 1 and count subtracts - bf 1b // if sentinel falls out of quotient, stop - mov r2,r1 // return remainder - btsti r5,31 // after adjusting for sign - bf 3f - rsubi r2,0 -3: jmp r15 -FUNC_END rem32 -FUNC_END modsi3 -#endif - - -/* GCC expects that {__eq,__ne,__gt,__ge,__le,__lt}{df2,sf2} - will behave as __cmpdf2. So, we stub the implementations to - jump on to __cmpdf2 and __cmpsf2. - - All of these shortcircuit the return path so that __cmp{sd}f2 - will go directly back to the caller. */ - -.macro COMPARE_DF_JUMP name - .import SYM (cmpdf2) -FUNC_START \name - jmpi SYM (cmpdf2) -FUNC_END \name -.endm - -#ifdef L_eqdf2 -COMPARE_DF_JUMP eqdf2 -#endif /* L_eqdf2 */ - -#ifdef L_nedf2 -COMPARE_DF_JUMP nedf2 -#endif /* L_nedf2 */ - -#ifdef L_gtdf2 -COMPARE_DF_JUMP gtdf2 -#endif /* L_gtdf2 */ - -#ifdef L_gedf2 -COMPARE_DF_JUMP gedf2 -#endif /* L_gedf2 */ - -#ifdef L_ltdf2 -COMPARE_DF_JUMP ltdf2 -#endif /* L_ltdf2 */ - -#ifdef L_ledf2 -COMPARE_DF_JUMP ledf2 -#endif /* L_ledf2 */ - -/* SINGLE PRECISION FLOATING POINT STUBS */ - -.macro COMPARE_SF_JUMP name - .import SYM (cmpsf2) -FUNC_START \name - jmpi SYM (cmpsf2) -FUNC_END \name -.endm - -#ifdef L_eqsf2 -COMPARE_SF_JUMP eqsf2 -#endif /* L_eqsf2 */ - -#ifdef L_nesf2 -COMPARE_SF_JUMP nesf2 -#endif /* L_nesf2 */ - -#ifdef L_gtsf2 -COMPARE_SF_JUMP gtsf2 -#endif /* L_gtsf2 */ - -#ifdef L_gesf2 -COMPARE_SF_JUMP __gesf2 -#endif /* L_gesf2 */ - -#ifdef L_ltsf2 -COMPARE_SF_JUMP __ltsf2 -#endif /* L_ltsf2 */ - -#ifdef L_lesf2 -COMPARE_SF_JUMP lesf2 -#endif /* L_lesf2 */ diff --git a/gcc/config/mcore/t-mcore b/gcc/config/mcore/t-mcore index 9c84d850f20..265399cecfe 100644 --- a/gcc/config/mcore/t-mcore +++ b/gcc/config/mcore/t-mcore @@ -16,9 +16,6 @@ # along with GCC; see the file COPYING3. If not see # . -LIB1ASMSRC = mcore/lib1.asm -LIB1ASMFUNCS = _divsi3 _udivsi3 _modsi3 _umodsi3 - # could use -msifilter to be safe from interrupt/jmp interactions and others. TARGET_LIBGCC2_CFLAGS=-O3 -DNO_FLOATLIB_FIXUNSDFSI #-msifilter diff --git a/gcc/config/mep/mep-lib1.asm b/gcc/config/mep/mep-lib1.asm deleted file mode 100644 index 0a18913f927..00000000000 --- a/gcc/config/mep/mep-lib1.asm +++ /dev/null @@ -1,125 +0,0 @@ -/* libgcc routines for Toshiba Media Processor. - Copyright (C) 2001, 2002, 2005, 2009 Free Software Foundation, Inc. - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3 of the License, or (at your -option) any later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ - -#define SAVEALL \ - add3 $sp, $sp, -16*4 ; \ - sw $0, ($sp) ; \ - sw $1, 4($sp) ; \ - sw $2, 8($sp) ; \ - sw $3, 12($sp) ; \ - sw $4, 16($sp) ; \ - sw $5, 20($sp) ; \ - sw $6, 24($sp) ; \ - sw $7, 28($sp) ; \ - sw $8, 32($sp) ; \ - sw $9, 36($sp) ; \ - sw $10, 40($sp) ; \ - sw $11, 44($sp) ; \ - sw $12, 48($sp) ; \ - sw $13, 52($sp) ; \ - sw $14, 56($sp) ; \ - ldc $5, $lp ; \ - add $5, 3 ; \ - mov $6, -4 ; \ - and $5, $6 - -#define RESTOREALL \ - stc $5, $lp ; \ - lw $14, 56($sp) ; \ - lw $13, 52($sp) ; \ - lw $12, 48($sp) ; \ - lw $11, 44($sp) ; \ - lw $10, 40($sp) ; \ - lw $9, 36($sp) ; \ - lw $8, 32($sp) ; \ - lw $7, 28($sp) ; \ - lw $6, 24($sp) ; \ - lw $5, 20($sp) ; \ - lw $4, 16($sp) ; \ - lw $3, 12($sp) ; \ - lw $2, 8($sp) ; \ - lw $1, 4($sp) ; \ - lw $0, ($sp) ; \ - add3 $sp, $sp, 16*4 ; \ - ret - -#ifdef L_mep_profile - .text - .global __mep_mcount -__mep_mcount: - SAVEALL - ldc $1, $lp - mov $2, $0 - bsr __mep_mcount_2 - RESTOREALL -#endif - -#ifdef L_mep_bb_init_trace - .text - .global __mep_bb_init_trace_func -__mep_bb_init_trace_func: - SAVEALL - lw $1, ($5) - lw $2, 4($5) - add $5, 8 - bsr __bb_init_trace_func - RESTOREALL -#endif - -#ifdef L_mep_bb_init - .text - .global __mep_bb_init_func -__mep_bb_init_func: - SAVEALL - lw $1, ($5) - add $5, 4 - bsr __bb_init_func - RESTOREALL -#endif - -#ifdef L_mep_bb_trace - .text - .global __mep_bb_trace_func -__mep_bb_trace_func: - SAVEALL - movu $3, __bb - lw $1, ($5) - sw $1, ($3) - lw $2, 4($5) - sw $2, 4($3) - add $5, 8 - bsr __bb_trace_func - RESTOREALL -#endif - -#ifdef L_mep_bb_increment - .text - .global __mep_bb_increment_func -__mep_bb_increment_func: - SAVEALL - lw $1, ($5) - lw $0, ($1) - add $0, 1 - sw $0, ($1) - add $5, 4 - RESTOREALL -#endif diff --git a/gcc/config/mep/t-mep b/gcc/config/mep/t-mep index d560db0aa4b..ac4ad95bc87 100644 --- a/gcc/config/mep/t-mep +++ b/gcc/config/mep/t-mep @@ -32,16 +32,6 @@ mep-pragma.o: $(srcdir)/config/mep/mep-pragma.c $(CONFIG_H) $(SYSTEM_H) \ function.h insn-config.h reload.h $(TARGET_H) $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< -# profiling support - -LIB1ASMSRC = mep/mep-lib1.asm - -LIB1ASMFUNCS = _mep_profile \ - _mep_bb_init_trace \ - _mep_bb_init \ - _mep_bb_trace \ - _mep_bb_increment - # multiply and divide routines LIB2FUNCS_EXTRA = \ diff --git a/gcc/config/mips/mips16.S b/gcc/config/mips/mips16.S deleted file mode 100644 index ec331b5f65e..00000000000 --- a/gcc/config/mips/mips16.S +++ /dev/null @@ -1,712 +0,0 @@ -/* mips16 floating point support code - Copyright (C) 1996, 1997, 1998, 2008, 2009, 2010 - Free Software Foundation, Inc. - Contributed by Cygnus Support - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ - -/* This file contains mips16 floating point support functions. These - functions are called by mips16 code to handle floating point when - -msoft-float is not used. They accept the arguments and return - values using the soft-float calling convention, but do the actual - operation using the hard floating point instructions. */ - -#if defined _MIPS_SIM && (_MIPS_SIM == _ABIO32 || _MIPS_SIM == _ABIO64) - -/* This file contains 32-bit assembly code. */ - .set nomips16 - -/* Start a function. */ - -#define STARTFN(NAME) .globl NAME; .ent NAME; NAME: - -/* Finish a function. */ - -#define ENDFN(NAME) .end NAME - -/* ARG1 - The FPR that holds the first floating-point argument. - - ARG2 - The FPR that holds the second floating-point argument. - - RET - The FPR that holds a floating-point return value. */ - -#define RET $f0 -#define ARG1 $f12 -#ifdef __mips64 -#define ARG2 $f13 -#else -#define ARG2 $f14 -#endif - -/* Set 64-bit register GPR so that its high 32 bits contain HIGH_FPR - and so that its low 32 bits contain LOW_FPR. */ -#define MERGE_GPRf(GPR, HIGH_FPR, LOW_FPR) \ - .set noat; \ - mfc1 $1, LOW_FPR; \ - mfc1 GPR, HIGH_FPR; \ - dsll $1, $1, 32; \ - dsll GPR, GPR, 32; \ - dsrl $1, $1, 32; \ - or GPR, GPR, $1; \ - .set at - -/* Move the high 32 bits of GPR to HIGH_FPR and the low 32 bits of - GPR to LOW_FPR. */ -#define MERGE_GPRt(GPR, HIGH_FPR, LOW_FPR) \ - .set noat; \ - dsrl $1, GPR, 32; \ - mtc1 GPR, LOW_FPR; \ - mtc1 $1, HIGH_FPR; \ - .set at - -/* Jump to T, and use "OPCODE, OP2" to implement a delayed move. */ -#define DELAYt(T, OPCODE, OP2) \ - .set noreorder; \ - jr T; \ - OPCODE, OP2; \ - .set reorder - -/* Use "OPCODE. OP2" and jump to T. */ -#define DELAYf(T, OPCODE, OP2) OPCODE, OP2; jr T - -/* MOVE_SF_BYTE0(D) - Move the first single-precision floating-point argument between - GPRs and FPRs. - - MOVE_SI_BYTE0(D) - Likewise the first single-precision integer argument. - - MOVE_SF_BYTE4(D) - Move the second single-precision floating-point argument between - GPRs and FPRs, given that the first argument occupies 4 bytes. - - MOVE_SF_BYTE8(D) - Move the second single-precision floating-point argument between - GPRs and FPRs, given that the first argument occupies 8 bytes. - - MOVE_DF_BYTE0(D) - Move the first double-precision floating-point argument between - GPRs and FPRs. - - MOVE_DF_BYTE8(D) - Likewise the second double-precision floating-point argument. - - MOVE_SF_RET(D, T) - Likewise a single-precision floating-point return value, - then jump to T. - - MOVE_SC_RET(D, T) - Likewise a complex single-precision floating-point return value. - - MOVE_DF_RET(D, T) - Likewise a double-precision floating-point return value. - - MOVE_DC_RET(D, T) - Likewise a complex double-precision floating-point return value. - - MOVE_SI_RET(D, T) - Likewise a single-precision integer return value. - - The D argument is "t" to move to FPRs and "f" to move from FPRs. - The return macros may assume that the target of the jump does not - use a floating-point register. */ - -#define MOVE_SF_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0) -#define MOVE_SI_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0) - -#if defined(__mips64) && defined(__MIPSEB__) -#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f0, $f1); jr T -#elif defined(__mips64) -/* The high 32 bits of $2 correspond to the second word in memory; - i.e. the imaginary part. */ -#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f1, $f0); jr T -#elif __mips_fpr == 64 -#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1) -#else -#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f2) -#endif - -#if defined(__mips64) -#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12 -#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f13 -#define MOVE_SF_BYTE8(D) m##D##c1 $5,$f13 -#else -#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12 -#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f14 -#define MOVE_SF_BYTE8(D) m##D##c1 $6,$f14 -#endif -#define MOVE_SI_BYTE0(D) MOVE_SF_BYTE0(D) - -#if defined(__mips64) -#define MOVE_DF_BYTE0(D) dm##D##c1 $4,$f12 -#define MOVE_DF_BYTE8(D) dm##D##c1 $5,$f13 -#define MOVE_DF_RET(D, T) DELAY##D (T, dm##D##c1 $2,$f0) -#define MOVE_DC_RET(D, T) dm##D##c1 $3,$f1; MOVE_DF_RET (D, T) -#elif __mips_fpr == 64 && defined(__MIPSEB__) -#define MOVE_DF_BYTE0(D) m##D##c1 $5,$f12; m##D##hc1 $4,$f12 -#define MOVE_DF_BYTE8(D) m##D##c1 $7,$f14; m##D##hc1 $6,$f14 -#define MOVE_DF_RET(D, T) m##D##c1 $3,$f0; DELAY##D (T, m##D##hc1 $2,$f0) -#define MOVE_DC_RET(D, T) m##D##c1 $5,$f1; m##D##hc1 $4,$f1; MOVE_DF_RET (D, T) -#elif __mips_fpr == 64 -#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##hc1 $5,$f12 -#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##hc1 $7,$f14 -#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##hc1 $3,$f0) -#define MOVE_DC_RET(D, T) m##D##c1 $4,$f1; m##D##hc1 $5,$f1; MOVE_DF_RET (D, T) -#elif defined(__MIPSEB__) -/* FPRs are little-endian. */ -#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f13; m##D##c1 $5,$f12 -#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f15; m##D##c1 $7,$f14 -#define MOVE_DF_RET(D, T) m##D##c1 $2,$f1; DELAY##D (T, m##D##c1 $3,$f0) -#define MOVE_DC_RET(D, T) m##D##c1 $4,$f3; m##D##c1 $5,$f2; MOVE_DF_RET (D, T) -#else -#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##c1 $5,$f13 -#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##c1 $7,$f15 -#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1) -#define MOVE_DC_RET(D, T) m##D##c1 $4,$f2; m##D##c1 $5,$f3; MOVE_DF_RET (D, T) -#endif - -/* Single-precision math. */ - -/* Define a function NAME that loads two single-precision values, - performs FPU operation OPCODE on them, and returns the single- - precision result. */ - -#define OPSF3(NAME, OPCODE) \ -STARTFN (NAME); \ - MOVE_SF_BYTE0 (t); \ - MOVE_SF_BYTE4 (t); \ - OPCODE RET,ARG1,ARG2; \ - MOVE_SF_RET (f, $31); \ - ENDFN (NAME) - -#ifdef L_m16addsf3 -OPSF3 (__mips16_addsf3, add.s) -#endif -#ifdef L_m16subsf3 -OPSF3 (__mips16_subsf3, sub.s) -#endif -#ifdef L_m16mulsf3 -OPSF3 (__mips16_mulsf3, mul.s) -#endif -#ifdef L_m16divsf3 -OPSF3 (__mips16_divsf3, div.s) -#endif - -/* Define a function NAME that loads a single-precision value, - performs FPU operation OPCODE on it, and returns the single- - precision result. */ - -#define OPSF2(NAME, OPCODE) \ -STARTFN (NAME); \ - MOVE_SF_BYTE0 (t); \ - OPCODE RET,ARG1; \ - MOVE_SF_RET (f, $31); \ - ENDFN (NAME) - -#ifdef L_m16negsf2 -OPSF2 (__mips16_negsf2, neg.s) -#endif -#ifdef L_m16abssf2 -OPSF2 (__mips16_abssf2, abs.s) -#endif - -/* Single-precision comparisons. */ - -/* Define a function NAME that loads two single-precision values, - performs floating point comparison OPCODE, and returns TRUE or - FALSE depending on the result. */ - -#define CMPSF(NAME, OPCODE, TRUE, FALSE) \ -STARTFN (NAME); \ - MOVE_SF_BYTE0 (t); \ - MOVE_SF_BYTE4 (t); \ - OPCODE ARG1,ARG2; \ - li $2,TRUE; \ - bc1t 1f; \ - li $2,FALSE; \ -1:; \ - j $31; \ - ENDFN (NAME) - -/* Like CMPSF, but reverse the comparison operands. */ - -#define REVCMPSF(NAME, OPCODE, TRUE, FALSE) \ -STARTFN (NAME); \ - MOVE_SF_BYTE0 (t); \ - MOVE_SF_BYTE4 (t); \ - OPCODE ARG2,ARG1; \ - li $2,TRUE; \ - bc1t 1f; \ - li $2,FALSE; \ -1:; \ - j $31; \ - ENDFN (NAME) - -#ifdef L_m16eqsf2 -CMPSF (__mips16_eqsf2, c.eq.s, 0, 1) -#endif -#ifdef L_m16nesf2 -CMPSF (__mips16_nesf2, c.eq.s, 0, 1) -#endif -#ifdef L_m16gtsf2 -REVCMPSF (__mips16_gtsf2, c.lt.s, 1, 0) -#endif -#ifdef L_m16gesf2 -REVCMPSF (__mips16_gesf2, c.le.s, 0, -1) -#endif -#ifdef L_m16lesf2 -CMPSF (__mips16_lesf2, c.le.s, 0, 1) -#endif -#ifdef L_m16ltsf2 -CMPSF (__mips16_ltsf2, c.lt.s, -1, 0) -#endif -#ifdef L_m16unordsf2 -CMPSF(__mips16_unordsf2, c.un.s, 1, 0) -#endif - - -/* Single-precision conversions. */ - -#ifdef L_m16fltsisf -STARTFN (__mips16_floatsisf) - MOVE_SF_BYTE0 (t) - cvt.s.w RET,ARG1 - MOVE_SF_RET (f, $31) - ENDFN (__mips16_floatsisf) -#endif - -#ifdef L_m16fltunsisf -STARTFN (__mips16_floatunsisf) - .set noreorder - bltz $4,1f - MOVE_SF_BYTE0 (t) - .set reorder - cvt.s.w RET,ARG1 - MOVE_SF_RET (f, $31) -1: - and $2,$4,1 - srl $3,$4,1 - or $2,$2,$3 - mtc1 $2,RET - cvt.s.w RET,RET - add.s RET,RET,RET - MOVE_SF_RET (f, $31) - ENDFN (__mips16_floatunsisf) -#endif - -#ifdef L_m16fix_truncsfsi -STARTFN (__mips16_fix_truncsfsi) - MOVE_SF_BYTE0 (t) - trunc.w.s RET,ARG1,$4 - MOVE_SI_RET (f, $31) - ENDFN (__mips16_fix_truncsfsi) -#endif - -#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) - -/* Double-precision math. */ - -/* Define a function NAME that loads two double-precision values, - performs FPU operation OPCODE on them, and returns the double- - precision result. */ - -#define OPDF3(NAME, OPCODE) \ -STARTFN (NAME); \ - MOVE_DF_BYTE0 (t); \ - MOVE_DF_BYTE8 (t); \ - OPCODE RET,ARG1,ARG2; \ - MOVE_DF_RET (f, $31); \ - ENDFN (NAME) - -#ifdef L_m16adddf3 -OPDF3 (__mips16_adddf3, add.d) -#endif -#ifdef L_m16subdf3 -OPDF3 (__mips16_subdf3, sub.d) -#endif -#ifdef L_m16muldf3 -OPDF3 (__mips16_muldf3, mul.d) -#endif -#ifdef L_m16divdf3 -OPDF3 (__mips16_divdf3, div.d) -#endif - -/* Define a function NAME that loads a double-precision value, - performs FPU operation OPCODE on it, and returns the double- - precision result. */ - -#define OPDF2(NAME, OPCODE) \ -STARTFN (NAME); \ - MOVE_DF_BYTE0 (t); \ - OPCODE RET,ARG1; \ - MOVE_DF_RET (f, $31); \ - ENDFN (NAME) - -#ifdef L_m16negdf2 -OPDF2 (__mips16_negdf2, neg.d) -#endif -#ifdef L_m16absdf2 -OPDF2 (__mips16_absdf2, abs.d) -#endif - -/* Conversions between single and double precision. */ - -#ifdef L_m16extsfdf2 -STARTFN (__mips16_extendsfdf2) - MOVE_SF_BYTE0 (t) - cvt.d.s RET,ARG1 - MOVE_DF_RET (f, $31) - ENDFN (__mips16_extendsfdf2) -#endif - -#ifdef L_m16trdfsf2 -STARTFN (__mips16_truncdfsf2) - MOVE_DF_BYTE0 (t) - cvt.s.d RET,ARG1 - MOVE_SF_RET (f, $31) - ENDFN (__mips16_truncdfsf2) -#endif - -/* Double-precision comparisons. */ - -/* Define a function NAME that loads two double-precision values, - performs floating point comparison OPCODE, and returns TRUE or - FALSE depending on the result. */ - -#define CMPDF(NAME, OPCODE, TRUE, FALSE) \ -STARTFN (NAME); \ - MOVE_DF_BYTE0 (t); \ - MOVE_DF_BYTE8 (t); \ - OPCODE ARG1,ARG2; \ - li $2,TRUE; \ - bc1t 1f; \ - li $2,FALSE; \ -1:; \ - j $31; \ - ENDFN (NAME) - -/* Like CMPDF, but reverse the comparison operands. */ - -#define REVCMPDF(NAME, OPCODE, TRUE, FALSE) \ -STARTFN (NAME); \ - MOVE_DF_BYTE0 (t); \ - MOVE_DF_BYTE8 (t); \ - OPCODE ARG2,ARG1; \ - li $2,TRUE; \ - bc1t 1f; \ - li $2,FALSE; \ -1:; \ - j $31; \ - ENDFN (NAME) - -#ifdef L_m16eqdf2 -CMPDF (__mips16_eqdf2, c.eq.d, 0, 1) -#endif -#ifdef L_m16nedf2 -CMPDF (__mips16_nedf2, c.eq.d, 0, 1) -#endif -#ifdef L_m16gtdf2 -REVCMPDF (__mips16_gtdf2, c.lt.d, 1, 0) -#endif -#ifdef L_m16gedf2 -REVCMPDF (__mips16_gedf2, c.le.d, 0, -1) -#endif -#ifdef L_m16ledf2 -CMPDF (__mips16_ledf2, c.le.d, 0, 1) -#endif -#ifdef L_m16ltdf2 -CMPDF (__mips16_ltdf2, c.lt.d, -1, 0) -#endif -#ifdef L_m16unorddf2 -CMPDF(__mips16_unorddf2, c.un.d, 1, 0) -#endif - -/* Double-precision conversions. */ - -#ifdef L_m16fltsidf -STARTFN (__mips16_floatsidf) - MOVE_SI_BYTE0 (t) - cvt.d.w RET,ARG1 - MOVE_DF_RET (f, $31) - ENDFN (__mips16_floatsidf) -#endif - -#ifdef L_m16fltunsidf -STARTFN (__mips16_floatunsidf) - MOVE_SI_BYTE0 (t) - cvt.d.w RET,ARG1 - bgez $4,1f - li.d ARG1, 4.294967296e+9 - add.d RET, RET, ARG1 -1: MOVE_DF_RET (f, $31) - ENDFN (__mips16_floatunsidf) -#endif - -#ifdef L_m16fix_truncdfsi -STARTFN (__mips16_fix_truncdfsi) - MOVE_DF_BYTE0 (t) - trunc.w.d RET,ARG1,$4 - MOVE_SI_RET (f, $31) - ENDFN (__mips16_fix_truncdfsi) -#endif -#endif /* !__mips_single_float */ - -/* Define a function NAME that moves a return value of mode MODE from - FPRs to GPRs. */ - -#define RET_FUNCTION(NAME, MODE) \ -STARTFN (NAME); \ - MOVE_##MODE##_RET (t, $31); \ - ENDFN (NAME) - -#ifdef L_m16retsf -RET_FUNCTION (__mips16_ret_sf, SF) -#endif - -#ifdef L_m16retsc -RET_FUNCTION (__mips16_ret_sc, SC) -#endif - -#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) -#ifdef L_m16retdf -RET_FUNCTION (__mips16_ret_df, DF) -#endif - -#ifdef L_m16retdc -RET_FUNCTION (__mips16_ret_dc, DC) -#endif -#endif /* !__mips_single_float */ - -/* STUB_ARGS_X copies the arguments from GPRs to FPRs for argument - code X. X is calculated as ARG1 + ARG2 * 4, where ARG1 and ARG2 - classify the first and second arguments as follows: - - 1: a single-precision argument - 2: a double-precision argument - 0: no argument, or not one of the above. */ - -#define STUB_ARGS_0 /* () */ -#define STUB_ARGS_1 MOVE_SF_BYTE0 (t) /* (sf) */ -#define STUB_ARGS_5 MOVE_SF_BYTE0 (t); MOVE_SF_BYTE4 (t) /* (sf, sf) */ -#define STUB_ARGS_9 MOVE_SF_BYTE0 (t); MOVE_DF_BYTE8 (t) /* (sf, df) */ -#define STUB_ARGS_2 MOVE_DF_BYTE0 (t) /* (df) */ -#define STUB_ARGS_6 MOVE_DF_BYTE0 (t); MOVE_SF_BYTE8 (t) /* (df, sf) */ -#define STUB_ARGS_10 MOVE_DF_BYTE0 (t); MOVE_DF_BYTE8 (t) /* (df, df) */ - -/* These functions are used by 16-bit code when calling via a function - pointer. They must copy the floating point arguments from the GPRs - to FPRs and then call function $2. */ - -#define CALL_STUB_NO_RET(NAME, CODE) \ -STARTFN (NAME); \ - STUB_ARGS_##CODE; \ - .set noreorder; \ - jr $2; \ - move $25,$2; \ - .set reorder; \ - ENDFN (NAME) - -#ifdef L_m16stub1 -CALL_STUB_NO_RET (__mips16_call_stub_1, 1) -#endif - -#ifdef L_m16stub5 -CALL_STUB_NO_RET (__mips16_call_stub_5, 5) -#endif - -#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) - -#ifdef L_m16stub2 -CALL_STUB_NO_RET (__mips16_call_stub_2, 2) -#endif - -#ifdef L_m16stub6 -CALL_STUB_NO_RET (__mips16_call_stub_6, 6) -#endif - -#ifdef L_m16stub9 -CALL_STUB_NO_RET (__mips16_call_stub_9, 9) -#endif - -#ifdef L_m16stub10 -CALL_STUB_NO_RET (__mips16_call_stub_10, 10) -#endif -#endif /* !__mips_single_float */ - -/* Now we have the same set of functions, except that this time the - function being called returns an SFmode, SCmode, DFmode or DCmode - value; we need to instantiate a set for each case. The calling - function will arrange to preserve $18, so these functions are free - to use it to hold the return address. - - Note that we do not know whether the function we are calling is 16 - bit or 32 bit. However, it does not matter, because 16-bit - functions always return floating point values in both the gp and - the fp regs. It would be possible to check whether the function - being called is 16 bits, in which case the copy is unnecessary; - however, it's faster to always do the copy. */ - -#define CALL_STUB_RET(NAME, CODE, MODE) \ -STARTFN (NAME); \ - move $18,$31; \ - STUB_ARGS_##CODE; \ - .set noreorder; \ - jalr $2; \ - move $25,$2; \ - .set reorder; \ - MOVE_##MODE##_RET (f, $18); \ - ENDFN (NAME) - -/* First, instantiate the single-float set. */ - -#ifdef L_m16stubsf0 -CALL_STUB_RET (__mips16_call_stub_sf_0, 0, SF) -#endif - -#ifdef L_m16stubsf1 -CALL_STUB_RET (__mips16_call_stub_sf_1, 1, SF) -#endif - -#ifdef L_m16stubsf5 -CALL_STUB_RET (__mips16_call_stub_sf_5, 5, SF) -#endif - -#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) -#ifdef L_m16stubsf2 -CALL_STUB_RET (__mips16_call_stub_sf_2, 2, SF) -#endif - -#ifdef L_m16stubsf6 -CALL_STUB_RET (__mips16_call_stub_sf_6, 6, SF) -#endif - -#ifdef L_m16stubsf9 -CALL_STUB_RET (__mips16_call_stub_sf_9, 9, SF) -#endif - -#ifdef L_m16stubsf10 -CALL_STUB_RET (__mips16_call_stub_sf_10, 10, SF) -#endif -#endif /* !__mips_single_float */ - - -/* Now we have the same set of functions again, except that this time - the function being called returns an DFmode value. */ - -#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) -#ifdef L_m16stubdf0 -CALL_STUB_RET (__mips16_call_stub_df_0, 0, DF) -#endif - -#ifdef L_m16stubdf1 -CALL_STUB_RET (__mips16_call_stub_df_1, 1, DF) -#endif - -#ifdef L_m16stubdf5 -CALL_STUB_RET (__mips16_call_stub_df_5, 5, DF) -#endif - -#ifdef L_m16stubdf2 -CALL_STUB_RET (__mips16_call_stub_df_2, 2, DF) -#endif - -#ifdef L_m16stubdf6 -CALL_STUB_RET (__mips16_call_stub_df_6, 6, DF) -#endif - -#ifdef L_m16stubdf9 -CALL_STUB_RET (__mips16_call_stub_df_9, 9, DF) -#endif - -#ifdef L_m16stubdf10 -CALL_STUB_RET (__mips16_call_stub_df_10, 10, DF) -#endif -#endif /* !__mips_single_float */ - - -/* Ho hum. Here we have the same set of functions again, this time - for when the function being called returns an SCmode value. */ - -#ifdef L_m16stubsc0 -CALL_STUB_RET (__mips16_call_stub_sc_0, 0, SC) -#endif - -#ifdef L_m16stubsc1 -CALL_STUB_RET (__mips16_call_stub_sc_1, 1, SC) -#endif - -#ifdef L_m16stubsc5 -CALL_STUB_RET (__mips16_call_stub_sc_5, 5, SC) -#endif - -#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) -#ifdef L_m16stubsc2 -CALL_STUB_RET (__mips16_call_stub_sc_2, 2, SC) -#endif - -#ifdef L_m16stubsc6 -CALL_STUB_RET (__mips16_call_stub_sc_6, 6, SC) -#endif - -#ifdef L_m16stubsc9 -CALL_STUB_RET (__mips16_call_stub_sc_9, 9, SC) -#endif - -#ifdef L_m16stubsc10 -CALL_STUB_RET (__mips16_call_stub_sc_10, 10, SC) -#endif -#endif /* !__mips_single_float */ - - -/* Finally, another set of functions for DCmode. */ - -#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) -#ifdef L_m16stubdc0 -CALL_STUB_RET (__mips16_call_stub_dc_0, 0, DC) -#endif - -#ifdef L_m16stubdc1 -CALL_STUB_RET (__mips16_call_stub_dc_1, 1, DC) -#endif - -#ifdef L_m16stubdc5 -CALL_STUB_RET (__mips16_call_stub_dc_5, 5, DC) -#endif - -#ifdef L_m16stubdc2 -CALL_STUB_RET (__mips16_call_stub_dc_2, 2, DC) -#endif - -#ifdef L_m16stubdc6 -CALL_STUB_RET (__mips16_call_stub_dc_6, 6, DC) -#endif - -#ifdef L_m16stubdc9 -CALL_STUB_RET (__mips16_call_stub_dc_9, 9, DC) -#endif - -#ifdef L_m16stubdc10 -CALL_STUB_RET (__mips16_call_stub_dc_10, 10, DC) -#endif -#endif /* !__mips_single_float */ -#endif diff --git a/gcc/config/mips/t-libgcc-mips16 b/gcc/config/mips/t-libgcc-mips16 deleted file mode 100644 index 31a042bb75e..00000000000 --- a/gcc/config/mips/t-libgcc-mips16 +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (C) 2007, 2008, 2011 Free Software Foundation, Inc. -# -# This file is part of GCC. -# -# GCC is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GCC is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GCC; see the file COPYING3. If not see -# . - -LIB1ASMSRC = mips/mips16.S -LIB1ASMFUNCS = _m16addsf3 _m16subsf3 _m16mulsf3 _m16divsf3 \ - _m16eqsf2 _m16nesf2 _m16gtsf2 _m16gesf2 _m16lesf2 _m16ltsf2 \ - _m16unordsf2 \ - _m16fltsisf _m16fix_truncsfsi _m16fltunsisf \ - _m16adddf3 _m16subdf3 _m16muldf3 _m16divdf3 \ - _m16extsfdf2 _m16trdfsf2 \ - _m16eqdf2 _m16nedf2 _m16gtdf2 _m16gedf2 _m16ledf2 _m16ltdf2 \ - _m16unorddf2 \ - _m16fltsidf _m16fix_truncdfsi _m16fltunsidf \ - _m16retsf _m16retdf \ - _m16retsc _m16retdc \ - _m16stub1 _m16stub2 _m16stub5 _m16stub6 _m16stub9 _m16stub10 \ - _m16stubsf0 _m16stubsf1 _m16stubsf2 _m16stubsf5 _m16stubsf6 \ - _m16stubsf9 _m16stubsf10 \ - _m16stubdf0 _m16stubdf1 _m16stubdf2 _m16stubdf5 _m16stubdf6 \ - _m16stubdf9 _m16stubdf10 \ - _m16stubsc0 _m16stubsc1 _m16stubsc2 _m16stubsc5 _m16stubsc6 \ - _m16stubsc9 _m16stubsc10 \ - _m16stubdc0 _m16stubdc1 _m16stubdc2 _m16stubdc5 _m16stubdc6 \ - _m16stubdc9 _m16stubdc10 diff --git a/gcc/config/mips/t-sr71k b/gcc/config/mips/t-sr71k index 7b8669fefd2..f204017faa8 100644 --- a/gcc/config/mips/t-sr71k +++ b/gcc/config/mips/t-sr71k @@ -16,11 +16,6 @@ # along with GCC; see the file COPYING3. If not see # . -# Suppress building libgcc1.a, since the MIPS compiler port is complete -# and does not need anything from libgcc1.a. -LIBGCC1 = -CROSS_LIBGCC1 = - # We must build libgcc2.a with -G 0, in case the user wants to link # without the $gp register. TARGET_LIBGCC2_CFLAGS = -G 0 diff --git a/gcc/config/pa/milli64.S b/gcc/config/pa/milli64.S deleted file mode 100644 index 2e9c4f741b6..00000000000 --- a/gcc/config/pa/milli64.S +++ /dev/null @@ -1,2134 +0,0 @@ -/* 32 and 64-bit millicode, original author Hewlett-Packard - adapted for gcc by Paul Bame - and Alan Modra . - - Copyright 2001, 2002, 2003, 2007, 2009 Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 3, or (at your option) any later -version. - -GCC is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ - -#ifdef pa64 - .level 2.0w -#endif - -/* Hardware General Registers. */ -r0: .reg %r0 -r1: .reg %r1 -r2: .reg %r2 -r3: .reg %r3 -r4: .reg %r4 -r5: .reg %r5 -r6: .reg %r6 -r7: .reg %r7 -r8: .reg %r8 -r9: .reg %r9 -r10: .reg %r10 -r11: .reg %r11 -r12: .reg %r12 -r13: .reg %r13 -r14: .reg %r14 -r15: .reg %r15 -r16: .reg %r16 -r17: .reg %r17 -r18: .reg %r18 -r19: .reg %r19 -r20: .reg %r20 -r21: .reg %r21 -r22: .reg %r22 -r23: .reg %r23 -r24: .reg %r24 -r25: .reg %r25 -r26: .reg %r26 -r27: .reg %r27 -r28: .reg %r28 -r29: .reg %r29 -r30: .reg %r30 -r31: .reg %r31 - -/* Hardware Space Registers. */ -sr0: .reg %sr0 -sr1: .reg %sr1 -sr2: .reg %sr2 -sr3: .reg %sr3 -sr4: .reg %sr4 -sr5: .reg %sr5 -sr6: .reg %sr6 -sr7: .reg %sr7 - -/* Hardware Floating Point Registers. */ -fr0: .reg %fr0 -fr1: .reg %fr1 -fr2: .reg %fr2 -fr3: .reg %fr3 -fr4: .reg %fr4 -fr5: .reg %fr5 -fr6: .reg %fr6 -fr7: .reg %fr7 -fr8: .reg %fr8 -fr9: .reg %fr9 -fr10: .reg %fr10 -fr11: .reg %fr11 -fr12: .reg %fr12 -fr13: .reg %fr13 -fr14: .reg %fr14 -fr15: .reg %fr15 - -/* Hardware Control Registers. */ -cr11: .reg %cr11 -sar: .reg %cr11 /* Shift Amount Register */ - -/* Software Architecture General Registers. */ -rp: .reg r2 /* return pointer */ -#ifdef pa64 -mrp: .reg r2 /* millicode return pointer */ -#else -mrp: .reg r31 /* millicode return pointer */ -#endif -ret0: .reg r28 /* return value */ -ret1: .reg r29 /* return value (high part of double) */ -sp: .reg r30 /* stack pointer */ -dp: .reg r27 /* data pointer */ -arg0: .reg r26 /* argument */ -arg1: .reg r25 /* argument or high part of double argument */ -arg2: .reg r24 /* argument */ -arg3: .reg r23 /* argument or high part of double argument */ - -/* Software Architecture Space Registers. */ -/* sr0 ; return link from BLE */ -sret: .reg sr1 /* return value */ -sarg: .reg sr1 /* argument */ -/* sr4 ; PC SPACE tracker */ -/* sr5 ; process private data */ - -/* Frame Offsets (millicode convention!) Used when calling other - millicode routines. Stack unwinding is dependent upon these - definitions. */ -r31_slot: .equ -20 /* "current RP" slot */ -sr0_slot: .equ -16 /* "static link" slot */ -#if defined(pa64) -mrp_slot: .equ -16 /* "current RP" slot */ -psp_slot: .equ -8 /* "previous SP" slot */ -#else -mrp_slot: .equ -20 /* "current RP" slot (replacing "r31_slot") */ -#endif - - -#define DEFINE(name,value)name: .EQU value -#define RDEFINE(name,value)name: .REG value -#ifdef milliext -#define MILLI_BE(lbl) BE lbl(sr7,r0) -#define MILLI_BEN(lbl) BE,n lbl(sr7,r0) -#define MILLI_BLE(lbl) BLE lbl(sr7,r0) -#define MILLI_BLEN(lbl) BLE,n lbl(sr7,r0) -#define MILLIRETN BE,n 0(sr0,mrp) -#define MILLIRET BE 0(sr0,mrp) -#define MILLI_RETN BE,n 0(sr0,mrp) -#define MILLI_RET BE 0(sr0,mrp) -#else -#define MILLI_BE(lbl) B lbl -#define MILLI_BEN(lbl) B,n lbl -#define MILLI_BLE(lbl) BL lbl,mrp -#define MILLI_BLEN(lbl) BL,n lbl,mrp -#define MILLIRETN BV,n 0(mrp) -#define MILLIRET BV 0(mrp) -#define MILLI_RETN BV,n 0(mrp) -#define MILLI_RET BV 0(mrp) -#endif - -#ifdef __STDC__ -#define CAT(a,b) a##b -#else -#define CAT(a,b) a/**/b -#endif - -#ifdef ELF -#define SUBSPA_MILLI .section .text -#define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16 -#define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16 -#define ATTR_MILLI -#define SUBSPA_DATA .section .data -#define ATTR_DATA -#define GLOBAL $global$ -#define GSYM(sym) !sym: -#define LSYM(sym) !CAT(.L,sym:) -#define LREF(sym) CAT(.L,sym) - -#else - -#ifdef coff -/* This used to be .milli but since link32 places different named - sections in different segments millicode ends up a long ways away - from .text (1meg?). This way they will be a lot closer. - - The SUBSPA_MILLI_* specify locality sets for certain millicode - modules in order to ensure that modules that call one another are - placed close together. Without locality sets this is unlikely to - happen because of the Dynamite linker library search algorithm. We - want these modules close together so that short calls always reach - (we don't want to require long calls or use long call stubs). */ - -#define SUBSPA_MILLI .subspa .text -#define SUBSPA_MILLI_DIV .subspa .text$dv,align=16 -#define SUBSPA_MILLI_MUL .subspa .text$mu,align=16 -#define ATTR_MILLI .attr code,read,execute -#define SUBSPA_DATA .subspa .data -#define ATTR_DATA .attr init_data,read,write -#define GLOBAL _gp -#else -#define SUBSPA_MILLI .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8 -#define SUBSPA_MILLI_DIV SUBSPA_MILLI -#define SUBSPA_MILLI_MUL SUBSPA_MILLI -#define ATTR_MILLI -#define SUBSPA_DATA .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero -#define ATTR_DATA -#define GLOBAL $global$ -#endif -#define SPACE_DATA .space $PRIVATE$,spnum=1,sort=16 - -#define GSYM(sym) !sym -#define LSYM(sym) !CAT(L$,sym) -#define LREF(sym) CAT(L$,sym) -#endif - -#ifdef L_dyncall - SUBSPA_MILLI - ATTR_DATA -GSYM($$dyncall) - .export $$dyncall,millicode - .proc - .callinfo millicode - .entry - bb,>=,n %r22,30,LREF(1) ; branch if not plabel address - depi 0,31,2,%r22 ; clear the two least significant bits - ldw 4(%r22),%r19 ; load new LTP value - ldw 0(%r22),%r22 ; load address of target -LSYM(1) -#ifdef LINUX - bv %r0(%r22) ; branch to the real target -#else - ldsid (%sr0,%r22),%r1 ; get the "space ident" selected by r22 - mtsp %r1,%sr0 ; move that space identifier into sr0 - be 0(%sr0,%r22) ; branch to the real target -#endif - stw %r2,-24(%r30) ; save return address into frame marker - .exit - .procend -#endif - -#ifdef L_divI -/* ROUTINES: $$divI, $$divoI - - Single precision divide for signed binary integers. - - The quotient is truncated towards zero. - The sign of the quotient is the XOR of the signs of the dividend and - divisor. - Divide by zero is trapped. - Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI. - - INPUT REGISTERS: - . arg0 == dividend - . arg1 == divisor - . mrp == return pc - . sr0 == return space when called externally - - OUTPUT REGISTERS: - . arg0 = undefined - . arg1 = undefined - . ret1 = quotient - - OTHER REGISTERS AFFECTED: - . r1 = undefined - - SIDE EFFECTS: - . Causes a trap under the following conditions: - . divisor is zero (traps with ADDIT,= 0,25,0) - . dividend==-2**31 and divisor==-1 and routine is $$divoI - . (traps with ADDO 26,25,0) - . Changes memory at the following places: - . NONE - - PERMISSIBLE CONTEXT: - . Unwindable. - . Suitable for internal or external millicode. - . Assumes the special millicode register conventions. - - DISCUSSION: - . Branchs to other millicode routines using BE - . $$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15 - . - . For selected divisors, calls a divide by constant routine written by - . Karl Pettis. Eligible divisors are 1..15 excluding 11 and 13. - . - . The only overflow case is -2**31 divided by -1. - . Both routines return -2**31 but only $$divoI traps. */ - -RDEFINE(temp,r1) -RDEFINE(retreg,ret1) /* r29 */ -RDEFINE(temp1,arg0) - SUBSPA_MILLI_DIV - ATTR_MILLI - .import $$divI_2,millicode - .import $$divI_3,millicode - .import $$divI_4,millicode - .import $$divI_5,millicode - .import $$divI_6,millicode - .import $$divI_7,millicode - .import $$divI_8,millicode - .import $$divI_9,millicode - .import $$divI_10,millicode - .import $$divI_12,millicode - .import $$divI_14,millicode - .import $$divI_15,millicode - .export $$divI,millicode - .export $$divoI,millicode - .proc - .callinfo millicode - .entry -GSYM($$divoI) - comib,=,n -1,arg1,LREF(negative1) /* when divisor == -1 */ -GSYM($$divI) - ldo -1(arg1),temp /* is there at most one bit set ? */ - and,<> arg1,temp,r0 /* if not, don't use power of 2 divide */ - addi,> 0,arg1,r0 /* if divisor > 0, use power of 2 divide */ - b,n LREF(neg_denom) -LSYM(pow2) - addi,>= 0,arg0,retreg /* if numerator is negative, add the */ - add arg0,temp,retreg /* (denominaotr -1) to correct for shifts */ - extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */ - extrs retreg,15,16,retreg /* retreg = retreg >> 16 */ - or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */ - ldi 0xcc,temp1 /* setup 0xcc in temp1 */ - extru,= arg1,23,8,temp /* test denominator with 0xff00 */ - extrs retreg,23,24,retreg /* retreg = retreg >> 8 */ - or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */ - ldi 0xaa,temp /* setup 0xaa in temp */ - extru,= arg1,27,4,r0 /* test denominator with 0xf0 */ - extrs retreg,27,28,retreg /* retreg = retreg >> 4 */ - and,= arg1,temp1,r0 /* test denominator with 0xcc */ - extrs retreg,29,30,retreg /* retreg = retreg >> 2 */ - and,= arg1,temp,r0 /* test denominator with 0xaa */ - extrs retreg,30,31,retreg /* retreg = retreg >> 1 */ - MILLIRETN -LSYM(neg_denom) - addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power of 2 */ - b,n LREF(regular_seq) - sub r0,arg1,temp /* make denominator positive */ - comb,=,n arg1,temp,LREF(regular_seq) /* test against 0x80000000 and 0 */ - ldo -1(temp),retreg /* is there at most one bit set ? */ - and,= temp,retreg,r0 /* if so, the denominator is power of 2 */ - b,n LREF(regular_seq) - sub r0,arg0,retreg /* negate numerator */ - comb,=,n arg0,retreg,LREF(regular_seq) /* test against 0x80000000 */ - copy retreg,arg0 /* set up arg0, arg1 and temp */ - copy temp,arg1 /* before branching to pow2 */ - b LREF(pow2) - ldo -1(arg1),temp -LSYM(regular_seq) - comib,>>=,n 15,arg1,LREF(small_divisor) - add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */ -LSYM(normal) - subi 0,retreg,retreg /* make it positive */ - sub 0,arg1,temp /* clear carry, */ - /* negate the divisor */ - ds 0,temp,0 /* set V-bit to the comple- */ - /* ment of the divisor sign */ - add retreg,retreg,retreg /* shift msb bit into carry */ - ds r0,arg1,temp /* 1st divide step, if no carry */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 2nd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 3rd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 4th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 5th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 6th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 7th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 8th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 9th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 10th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 11th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 12th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 13th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 14th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 15th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 16th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 17th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 18th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 19th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 20th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 21st divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 22nd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 23rd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 24th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 25th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 26th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 27th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 28th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 29th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 30th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 31st divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 32nd divide step, */ - addc retreg,retreg,retreg /* shift last retreg bit into retreg */ - xor,>= arg0,arg1,0 /* get correct sign of quotient */ - sub 0,retreg,retreg /* based on operand signs */ - MILLIRETN - nop - -LSYM(small_divisor) - -#if defined(pa64) -/* Clear the upper 32 bits of the arg1 register. We are working with */ -/* small divisors (and 32-bit integers) We must not be mislead */ -/* by "1" bits left in the upper 32 bits. */ - depd %r0,31,32,%r25 -#endif - blr,n arg1,r0 - nop -/* table for divisor == 0,1, ... ,15 */ - addit,= 0,arg1,r0 /* trap if divisor == 0 */ - nop - MILLIRET /* divisor == 1 */ - copy arg0,retreg - MILLI_BEN($$divI_2) /* divisor == 2 */ - nop - MILLI_BEN($$divI_3) /* divisor == 3 */ - nop - MILLI_BEN($$divI_4) /* divisor == 4 */ - nop - MILLI_BEN($$divI_5) /* divisor == 5 */ - nop - MILLI_BEN($$divI_6) /* divisor == 6 */ - nop - MILLI_BEN($$divI_7) /* divisor == 7 */ - nop - MILLI_BEN($$divI_8) /* divisor == 8 */ - nop - MILLI_BEN($$divI_9) /* divisor == 9 */ - nop - MILLI_BEN($$divI_10) /* divisor == 10 */ - nop - b LREF(normal) /* divisor == 11 */ - add,>= 0,arg0,retreg - MILLI_BEN($$divI_12) /* divisor == 12 */ - nop - b LREF(normal) /* divisor == 13 */ - add,>= 0,arg0,retreg - MILLI_BEN($$divI_14) /* divisor == 14 */ - nop - MILLI_BEN($$divI_15) /* divisor == 15 */ - nop - -LSYM(negative1) - sub 0,arg0,retreg /* result is negation of dividend */ - MILLIRET - addo arg0,arg1,r0 /* trap iff dividend==0x80000000 && divisor==-1 */ - .exit - .procend - .end -#endif - -#ifdef L_divU -/* ROUTINE: $$divU - . - . Single precision divide for unsigned integers. - . - . Quotient is truncated towards zero. - . Traps on divide by zero. - - INPUT REGISTERS: - . arg0 == dividend - . arg1 == divisor - . mrp == return pc - . sr0 == return space when called externally - - OUTPUT REGISTERS: - . arg0 = undefined - . arg1 = undefined - . ret1 = quotient - - OTHER REGISTERS AFFECTED: - . r1 = undefined - - SIDE EFFECTS: - . Causes a trap under the following conditions: - . divisor is zero - . Changes memory at the following places: - . NONE - - PERMISSIBLE CONTEXT: - . Unwindable. - . Does not create a stack frame. - . Suitable for internal or external millicode. - . Assumes the special millicode register conventions. - - DISCUSSION: - . Branchs to other millicode routines using BE: - . $$divU_# for 3,5,6,7,9,10,12,14,15 - . - . For selected small divisors calls the special divide by constant - . routines written by Karl Pettis. These are: 3,5,6,7,9,10,12,14,15. */ - -RDEFINE(temp,r1) -RDEFINE(retreg,ret1) /* r29 */ -RDEFINE(temp1,arg0) - SUBSPA_MILLI_DIV - ATTR_MILLI - .export $$divU,millicode - .import $$divU_3,millicode - .import $$divU_5,millicode - .import $$divU_6,millicode - .import $$divU_7,millicode - .import $$divU_9,millicode - .import $$divU_10,millicode - .import $$divU_12,millicode - .import $$divU_14,millicode - .import $$divU_15,millicode - .proc - .callinfo millicode - .entry -GSYM($$divU) -/* The subtract is not nullified since it does no harm and can be used - by the two cases that branch back to "normal". */ - ldo -1(arg1),temp /* is there at most one bit set ? */ - and,= arg1,temp,r0 /* if so, denominator is power of 2 */ - b LREF(regular_seq) - addit,= 0,arg1,0 /* trap for zero dvr */ - copy arg0,retreg - extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */ - extru retreg,15,16,retreg /* retreg = retreg >> 16 */ - or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */ - ldi 0xcc,temp1 /* setup 0xcc in temp1 */ - extru,= arg1,23,8,temp /* test denominator with 0xff00 */ - extru retreg,23,24,retreg /* retreg = retreg >> 8 */ - or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */ - ldi 0xaa,temp /* setup 0xaa in temp */ - extru,= arg1,27,4,r0 /* test denominator with 0xf0 */ - extru retreg,27,28,retreg /* retreg = retreg >> 4 */ - and,= arg1,temp1,r0 /* test denominator with 0xcc */ - extru retreg,29,30,retreg /* retreg = retreg >> 2 */ - and,= arg1,temp,r0 /* test denominator with 0xaa */ - extru retreg,30,31,retreg /* retreg = retreg >> 1 */ - MILLIRETN - nop -LSYM(regular_seq) - comib,>= 15,arg1,LREF(special_divisor) - subi 0,arg1,temp /* clear carry, negate the divisor */ - ds r0,temp,r0 /* set V-bit to 1 */ -LSYM(normal) - add arg0,arg0,retreg /* shift msb bit into carry */ - ds r0,arg1,temp /* 1st divide step, if no carry */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 2nd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 3rd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 4th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 5th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 6th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 7th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 8th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 9th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 10th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 11th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 12th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 13th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 14th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 15th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 16th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 17th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 18th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 19th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 20th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 21st divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 22nd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 23rd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 24th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 25th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 26th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 27th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 28th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 29th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 30th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 31st divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 32nd divide step, */ - MILLIRET - addc retreg,retreg,retreg /* shift last retreg bit into retreg */ - -/* Handle the cases where divisor is a small constant or has high bit on. */ -LSYM(special_divisor) -/* blr arg1,r0 */ -/* comib,>,n 0,arg1,LREF(big_divisor) ; nullify previous instruction */ - -/* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from - generating such a blr, comib sequence. A problem in nullification. So I - rewrote this code. */ - -#if defined(pa64) -/* Clear the upper 32 bits of the arg1 register. We are working with - small divisors (and 32-bit unsigned integers) We must not be mislead - by "1" bits left in the upper 32 bits. */ - depd %r0,31,32,%r25 -#endif - comib,> 0,arg1,LREF(big_divisor) - nop - blr arg1,r0 - nop - -LSYM(zero_divisor) /* this label is here to provide external visibility */ - addit,= 0,arg1,0 /* trap for zero dvr */ - nop - MILLIRET /* divisor == 1 */ - copy arg0,retreg - MILLIRET /* divisor == 2 */ - extru arg0,30,31,retreg - MILLI_BEN($$divU_3) /* divisor == 3 */ - nop - MILLIRET /* divisor == 4 */ - extru arg0,29,30,retreg - MILLI_BEN($$divU_5) /* divisor == 5 */ - nop - MILLI_BEN($$divU_6) /* divisor == 6 */ - nop - MILLI_BEN($$divU_7) /* divisor == 7 */ - nop - MILLIRET /* divisor == 8 */ - extru arg0,28,29,retreg - MILLI_BEN($$divU_9) /* divisor == 9 */ - nop - MILLI_BEN($$divU_10) /* divisor == 10 */ - nop - b LREF(normal) /* divisor == 11 */ - ds r0,temp,r0 /* set V-bit to 1 */ - MILLI_BEN($$divU_12) /* divisor == 12 */ - nop - b LREF(normal) /* divisor == 13 */ - ds r0,temp,r0 /* set V-bit to 1 */ - MILLI_BEN($$divU_14) /* divisor == 14 */ - nop - MILLI_BEN($$divU_15) /* divisor == 15 */ - nop - -/* Handle the case where the high bit is on in the divisor. - Compute: if( dividend>=divisor) quotient=1; else quotient=0; - Note: dividend>==divisor iff dividend-divisor does not borrow - and not borrow iff carry. */ -LSYM(big_divisor) - sub arg0,arg1,r0 - MILLIRET - addc r0,r0,retreg - .exit - .procend - .end -#endif - -#ifdef L_remI -/* ROUTINE: $$remI - - DESCRIPTION: - . $$remI returns the remainder of the division of two signed 32-bit - . integers. The sign of the remainder is the same as the sign of - . the dividend. - - - INPUT REGISTERS: - . arg0 == dividend - . arg1 == divisor - . mrp == return pc - . sr0 == return space when called externally - - OUTPUT REGISTERS: - . arg0 = destroyed - . arg1 = destroyed - . ret1 = remainder - - OTHER REGISTERS AFFECTED: - . r1 = undefined - - SIDE EFFECTS: - . Causes a trap under the following conditions: DIVIDE BY ZERO - . Changes memory at the following places: NONE - - PERMISSIBLE CONTEXT: - . Unwindable - . Does not create a stack frame - . Is usable for internal or external microcode - - DISCUSSION: - . Calls other millicode routines via mrp: NONE - . Calls other millicode routines: NONE */ - -RDEFINE(tmp,r1) -RDEFINE(retreg,ret1) - - SUBSPA_MILLI - ATTR_MILLI - .proc - .callinfo millicode - .entry -GSYM($$remI) -GSYM($$remoI) - .export $$remI,MILLICODE - .export $$remoI,MILLICODE - ldo -1(arg1),tmp /* is there at most one bit set ? */ - and,<> arg1,tmp,r0 /* if not, don't use power of 2 */ - addi,> 0,arg1,r0 /* if denominator > 0, use power */ - /* of 2 */ - b,n LREF(neg_denom) -LSYM(pow2) - comb,>,n 0,arg0,LREF(neg_num) /* is numerator < 0 ? */ - and arg0,tmp,retreg /* get the result */ - MILLIRETN -LSYM(neg_num) - subi 0,arg0,arg0 /* negate numerator */ - and arg0,tmp,retreg /* get the result */ - subi 0,retreg,retreg /* negate result */ - MILLIRETN -LSYM(neg_denom) - addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power */ - /* of 2 */ - b,n LREF(regular_seq) - sub r0,arg1,tmp /* make denominator positive */ - comb,=,n arg1,tmp,LREF(regular_seq) /* test against 0x80000000 and 0 */ - ldo -1(tmp),retreg /* is there at most one bit set ? */ - and,= tmp,retreg,r0 /* if not, go to regular_seq */ - b,n LREF(regular_seq) - comb,>,n 0,arg0,LREF(neg_num_2) /* if arg0 < 0, negate it */ - and arg0,retreg,retreg - MILLIRETN -LSYM(neg_num_2) - subi 0,arg0,tmp /* test against 0x80000000 */ - and tmp,retreg,retreg - subi 0,retreg,retreg - MILLIRETN -LSYM(regular_seq) - addit,= 0,arg1,0 /* trap if div by zero */ - add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */ - sub 0,retreg,retreg /* make it positive */ - sub 0,arg1, tmp /* clear carry, */ - /* negate the divisor */ - ds 0, tmp,0 /* set V-bit to the comple- */ - /* ment of the divisor sign */ - or 0,0, tmp /* clear tmp */ - add retreg,retreg,retreg /* shift msb bit into carry */ - ds tmp,arg1, tmp /* 1st divide step, if no carry */ - /* out, msb of quotient = 0 */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ -LSYM(t1) - ds tmp,arg1, tmp /* 2nd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 3rd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 4th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 5th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 6th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 7th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 8th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 9th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 10th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 11th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 12th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 13th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 14th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 15th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 16th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 17th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 18th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 19th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 20th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 21st divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 22nd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 23rd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 24th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 25th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 26th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 27th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 28th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 29th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 30th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 31st divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 32nd divide step, */ - addc retreg,retreg,retreg /* shift last bit into retreg */ - movb,>=,n tmp,retreg,LREF(finish) /* branch if pos. tmp */ - add,< arg1,0,0 /* if arg1 > 0, add arg1 */ - add,tr tmp,arg1,retreg /* for correcting remainder tmp */ - sub tmp,arg1,retreg /* else add absolute value arg1 */ -LSYM(finish) - add,>= arg0,0,0 /* set sign of remainder */ - sub 0,retreg,retreg /* to sign of dividend */ - MILLIRET - nop - .exit - .procend -#ifdef milliext - .origin 0x00000200 -#endif - .end -#endif - -#ifdef L_remU -/* ROUTINE: $$remU - . Single precision divide for remainder with unsigned binary integers. - . - . The remainder must be dividend-(dividend/divisor)*divisor. - . Divide by zero is trapped. - - INPUT REGISTERS: - . arg0 == dividend - . arg1 == divisor - . mrp == return pc - . sr0 == return space when called externally - - OUTPUT REGISTERS: - . arg0 = undefined - . arg1 = undefined - . ret1 = remainder - - OTHER REGISTERS AFFECTED: - . r1 = undefined - - SIDE EFFECTS: - . Causes a trap under the following conditions: DIVIDE BY ZERO - . Changes memory at the following places: NONE - - PERMISSIBLE CONTEXT: - . Unwindable. - . Does not create a stack frame. - . Suitable for internal or external millicode. - . Assumes the special millicode register conventions. - - DISCUSSION: - . Calls other millicode routines using mrp: NONE - . Calls other millicode routines: NONE */ - - -RDEFINE(temp,r1) -RDEFINE(rmndr,ret1) /* r29 */ - SUBSPA_MILLI - ATTR_MILLI - .export $$remU,millicode - .proc - .callinfo millicode - .entry -GSYM($$remU) - ldo -1(arg1),temp /* is there at most one bit set ? */ - and,= arg1,temp,r0 /* if not, don't use power of 2 */ - b LREF(regular_seq) - addit,= 0,arg1,r0 /* trap on div by zero */ - and arg0,temp,rmndr /* get the result for power of 2 */ - MILLIRETN -LSYM(regular_seq) - comib,>=,n 0,arg1,LREF(special_case) - subi 0,arg1,rmndr /* clear carry, negate the divisor */ - ds r0,rmndr,r0 /* set V-bit to 1 */ - add arg0,arg0,temp /* shift msb bit into carry */ - ds r0,arg1,rmndr /* 1st divide step, if no carry */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 2nd divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 3rd divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 4th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 5th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 6th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 7th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 8th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 9th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 10th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 11th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 12th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 13th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 14th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 15th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 16th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 17th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 18th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 19th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 20th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 21st divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 22nd divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 23rd divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 24th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 25th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 26th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 27th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 28th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 29th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 30th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 31st divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 32nd divide step, */ - comiclr,<= 0,rmndr,r0 - add rmndr,arg1,rmndr /* correction */ - MILLIRETN - nop - -/* Putting >= on the last DS and deleting COMICLR does not work! */ -LSYM(special_case) - sub,>>= arg0,arg1,rmndr - copy arg0,rmndr - MILLIRETN - nop - .exit - .procend - .end -#endif - -#ifdef L_div_const -/* ROUTINE: $$divI_2 - . $$divI_3 $$divU_3 - . $$divI_4 - . $$divI_5 $$divU_5 - . $$divI_6 $$divU_6 - . $$divI_7 $$divU_7 - . $$divI_8 - . $$divI_9 $$divU_9 - . $$divI_10 $$divU_10 - . - . $$divI_12 $$divU_12 - . - . $$divI_14 $$divU_14 - . $$divI_15 $$divU_15 - . $$divI_16 - . $$divI_17 $$divU_17 - . - . Divide by selected constants for single precision binary integers. - - INPUT REGISTERS: - . arg0 == dividend - . mrp == return pc - . sr0 == return space when called externally - - OUTPUT REGISTERS: - . arg0 = undefined - . arg1 = undefined - . ret1 = quotient - - OTHER REGISTERS AFFECTED: - . r1 = undefined - - SIDE EFFECTS: - . Causes a trap under the following conditions: NONE - . Changes memory at the following places: NONE - - PERMISSIBLE CONTEXT: - . Unwindable. - . Does not create a stack frame. - . Suitable for internal or external millicode. - . Assumes the special millicode register conventions. - - DISCUSSION: - . Calls other millicode routines using mrp: NONE - . Calls other millicode routines: NONE */ - - -/* TRUNCATED DIVISION BY SMALL INTEGERS - - We are interested in q(x) = floor(x/y), where x >= 0 and y > 0 - (with y fixed). - - Let a = floor(z/y), for some choice of z. Note that z will be - chosen so that division by z is cheap. - - Let r be the remainder(z/y). In other words, r = z - ay. - - Now, our method is to choose a value for b such that - - q'(x) = floor((ax+b)/z) - - is equal to q(x) over as large a range of x as possible. If the - two are equal over a sufficiently large range, and if it is easy to - form the product (ax), and it is easy to divide by z, then we can - perform the division much faster than the general division algorithm. - - So, we want the following to be true: - - . For x in the following range: - . - . ky <= x < (k+1)y - . - . implies that - . - . k <= (ax+b)/z < (k+1) - - We want to determine b such that this is true for all k in the - range {0..K} for some maximum K. - - Since (ax+b) is an increasing function of x, we can take each - bound separately to determine the "best" value for b. - - (ax+b)/z < (k+1) implies - - (a((k+1)y-1)+b < (k+1)z implies - - b < a + (k+1)(z-ay) implies - - b < a + (k+1)r - - This needs to be true for all k in the range {0..K}. In - particular, it is true for k = 0 and this leads to a maximum - acceptable value for b. - - b < a+r or b <= a+r-1 - - Taking the other bound, we have - - k <= (ax+b)/z implies - - k <= (aky+b)/z implies - - k(z-ay) <= b implies - - kr <= b - - Clearly, the largest range for k will be achieved by maximizing b, - when r is not zero. When r is zero, then the simplest choice for b - is 0. When r is not 0, set - - . b = a+r-1 - - Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y) - for all x in the range: - - . 0 <= x < (K+1)y - - We need to determine what K is. Of our two bounds, - - . b < a+(k+1)r is satisfied for all k >= 0, by construction. - - The other bound is - - . kr <= b - - This is always true if r = 0. If r is not 0 (the usual case), then - K = floor((a+r-1)/r), is the maximum value for k. - - Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct - answer for q(x) = floor(x/y) when x is in the range - - (0,(K+1)y-1) K = floor((a+r-1)/r) - - To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that - the formula for q'(x) yields the correct value of q(x) for all x - representable by a single word in HPPA. - - We are also constrained in that computing the product (ax), adding - b, and dividing by z must all be done quickly, otherwise we will be - better off going through the general algorithm using the DS - instruction, which uses approximately 70 cycles. - - For each y, there is a choice of z which satisfies the constraints - for (K+1)y >= 2**32. We may not, however, be able to satisfy the - timing constraints for arbitrary y. It seems that z being equal to - a power of 2 or a power of 2 minus 1 is as good as we can do, since - it minimizes the time to do division by z. We want the choice of z - to also result in a value for (a) that minimizes the computation of - the product (ax). This is best achieved if (a) has a regular bit - pattern (so the multiplication can be done with shifts and adds). - The value of (a) also needs to be less than 2**32 so the product is - always guaranteed to fit in 2 words. - - In actual practice, the following should be done: - - 1) For negative x, you should take the absolute value and remember - . the fact so that the result can be negated. This obviously does - . not apply in the unsigned case. - 2) For even y, you should factor out the power of 2 that divides y - . and divide x by it. You can then proceed by dividing by the - . odd factor of y. - - Here is a table of some odd values of y, and corresponding choices - for z which are "good". - - y z r a (hex) max x (hex) - - 3 2**32 1 55555555 100000001 - 5 2**32 1 33333333 100000003 - 7 2**24-1 0 249249 (infinite) - 9 2**24-1 0 1c71c7 (infinite) - 11 2**20-1 0 1745d (infinite) - 13 2**24-1 0 13b13b (infinite) - 15 2**32 1 11111111 10000000d - 17 2**32 1 f0f0f0f 10000000f - - If r is 1, then b = a+r-1 = a. This simplifies the computation - of (ax+b), since you can compute (x+1)(a) instead. If r is 0, - then b = 0 is ok to use which simplifies (ax+b). - - The bit patterns for 55555555, 33333333, and 11111111 are obviously - very regular. The bit patterns for the other values of a above are: - - y (hex) (binary) - - 7 249249 001001001001001001001001 << regular >> - 9 1c71c7 000111000111000111000111 << regular >> - 11 1745d 000000010111010001011101 << irregular >> - 13 13b13b 000100111011000100111011 << irregular >> - - The bit patterns for (a) corresponding to (y) of 11 and 13 may be - too irregular to warrant using this method. - - When z is a power of 2 minus 1, then the division by z is slightly - more complicated, involving an iterative solution. - - The code presented here solves division by 1 through 17, except for - 11 and 13. There are algorithms for both signed and unsigned - quantities given. - - TIMINGS (cycles) - - divisor positive negative unsigned - - . 1 2 2 2 - . 2 4 4 2 - . 3 19 21 19 - . 4 4 4 2 - . 5 18 22 19 - . 6 19 22 19 - . 8 4 4 2 - . 10 18 19 17 - . 12 18 20 18 - . 15 16 18 16 - . 16 4 4 2 - . 17 16 18 16 - - Now, the algorithm for 7, 9, and 14 is an iterative one. That is, - a loop body is executed until the tentative quotient is 0. The - number of times the loop body is executed varies depending on the - dividend, but is never more than two times. If the dividend is - less than the divisor, then the loop body is not executed at all. - Each iteration adds 4 cycles to the timings. - - divisor positive negative unsigned - - . 7 19+4n 20+4n 20+4n n = number of iterations - . 9 21+4n 22+4n 21+4n - . 14 21+4n 22+4n 20+4n - - To give an idea of how the number of iterations varies, here is a - table of dividend versus number of iterations when dividing by 7. - - smallest largest required - dividend dividend iterations - - . 0 6 0 - . 7 0x6ffffff 1 - 0x1000006 0xffffffff 2 - - There is some overlap in the range of numbers requiring 1 and 2 - iterations. */ - -RDEFINE(t2,r1) -RDEFINE(x2,arg0) /* r26 */ -RDEFINE(t1,arg1) /* r25 */ -RDEFINE(x1,ret1) /* r29 */ - - SUBSPA_MILLI_DIV - ATTR_MILLI - - .proc - .callinfo millicode - .entry -/* NONE of these routines require a stack frame - ALL of these routines are unwindable from millicode */ - -GSYM($$divide_by_constant) - .export $$divide_by_constant,millicode -/* Provides a "nice" label for the code covered by the unwind descriptor - for things like gprof. */ - -/* DIVISION BY 2 (shift by 1) */ -GSYM($$divI_2) - .export $$divI_2,millicode - comclr,>= arg0,0,0 - addi 1,arg0,arg0 - MILLIRET - extrs arg0,30,31,ret1 - - -/* DIVISION BY 4 (shift by 2) */ -GSYM($$divI_4) - .export $$divI_4,millicode - comclr,>= arg0,0,0 - addi 3,arg0,arg0 - MILLIRET - extrs arg0,29,30,ret1 - - -/* DIVISION BY 8 (shift by 3) */ -GSYM($$divI_8) - .export $$divI_8,millicode - comclr,>= arg0,0,0 - addi 7,arg0,arg0 - MILLIRET - extrs arg0,28,29,ret1 - -/* DIVISION BY 16 (shift by 4) */ -GSYM($$divI_16) - .export $$divI_16,millicode - comclr,>= arg0,0,0 - addi 15,arg0,arg0 - MILLIRET - extrs arg0,27,28,ret1 - -/**************************************************************************** -* -* DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these -* -* includes 3,5,15,17 and also 6,10,12 -* -****************************************************************************/ - -/* DIVISION BY 3 (use z = 2**32; a = 55555555) */ - -GSYM($$divI_3) - .export $$divI_3,millicode - comb,<,N x2,0,LREF(neg3) - - addi 1,x2,x2 /* this cannot overflow */ - extru x2,1,2,x1 /* multiply by 5 to get started */ - sh2add x2,x2,x2 - b LREF(pos) - addc x1,0,x1 - -LSYM(neg3) - subi 1,x2,x2 /* this cannot overflow */ - extru x2,1,2,x1 /* multiply by 5 to get started */ - sh2add x2,x2,x2 - b LREF(neg) - addc x1,0,x1 - -GSYM($$divU_3) - .export $$divU_3,millicode - addi 1,x2,x2 /* this CAN overflow */ - addc 0,0,x1 - shd x1,x2,30,t1 /* multiply by 5 to get started */ - sh2add x2,x2,x2 - b LREF(pos) - addc x1,t1,x1 - -/* DIVISION BY 5 (use z = 2**32; a = 33333333) */ - -GSYM($$divI_5) - .export $$divI_5,millicode - comb,<,N x2,0,LREF(neg5) - - addi 3,x2,t1 /* this cannot overflow */ - sh1add x2,t1,x2 /* multiply by 3 to get started */ - b LREF(pos) - addc 0,0,x1 - -LSYM(neg5) - sub 0,x2,x2 /* negate x2 */ - addi 1,x2,x2 /* this cannot overflow */ - shd 0,x2,31,x1 /* get top bit (can be 1) */ - sh1add x2,x2,x2 /* multiply by 3 to get started */ - b LREF(neg) - addc x1,0,x1 - -GSYM($$divU_5) - .export $$divU_5,millicode - addi 1,x2,x2 /* this CAN overflow */ - addc 0,0,x1 - shd x1,x2,31,t1 /* multiply by 3 to get started */ - sh1add x2,x2,x2 - b LREF(pos) - addc t1,x1,x1 - -/* DIVISION BY 6 (shift to divide by 2 then divide by 3) */ -GSYM($$divI_6) - .export $$divI_6,millicode - comb,<,N x2,0,LREF(neg6) - extru x2,30,31,x2 /* divide by 2 */ - addi 5,x2,t1 /* compute 5*(x2+1) = 5*x2+5 */ - sh2add x2,t1,x2 /* multiply by 5 to get started */ - b LREF(pos) - addc 0,0,x1 - -LSYM(neg6) - subi 2,x2,x2 /* negate, divide by 2, and add 1 */ - /* negation and adding 1 are done */ - /* at the same time by the SUBI */ - extru x2,30,31,x2 - shd 0,x2,30,x1 - sh2add x2,x2,x2 /* multiply by 5 to get started */ - b LREF(neg) - addc x1,0,x1 - -GSYM($$divU_6) - .export $$divU_6,millicode - extru x2,30,31,x2 /* divide by 2 */ - addi 1,x2,x2 /* cannot carry */ - shd 0,x2,30,x1 /* multiply by 5 to get started */ - sh2add x2,x2,x2 - b LREF(pos) - addc x1,0,x1 - -/* DIVISION BY 10 (shift to divide by 2 then divide by 5) */ -GSYM($$divU_10) - .export $$divU_10,millicode - extru x2,30,31,x2 /* divide by 2 */ - addi 3,x2,t1 /* compute 3*(x2+1) = (3*x2)+3 */ - sh1add x2,t1,x2 /* multiply by 3 to get started */ - addc 0,0,x1 -LSYM(pos) - shd x1,x2,28,t1 /* multiply by 0x11 */ - shd x2,0,28,t2 - add x2,t2,x2 - addc x1,t1,x1 -LSYM(pos_for_17) - shd x1,x2,24,t1 /* multiply by 0x101 */ - shd x2,0,24,t2 - add x2,t2,x2 - addc x1,t1,x1 - - shd x1,x2,16,t1 /* multiply by 0x10001 */ - shd x2,0,16,t2 - add x2,t2,x2 - MILLIRET - addc x1,t1,x1 - -GSYM($$divI_10) - .export $$divI_10,millicode - comb,< x2,0,LREF(neg10) - copy 0,x1 - extru x2,30,31,x2 /* divide by 2 */ - addib,TR 1,x2,LREF(pos) /* add 1 (cannot overflow) */ - sh1add x2,x2,x2 /* multiply by 3 to get started */ - -LSYM(neg10) - subi 2,x2,x2 /* negate, divide by 2, and add 1 */ - /* negation and adding 1 are done */ - /* at the same time by the SUBI */ - extru x2,30,31,x2 - sh1add x2,x2,x2 /* multiply by 3 to get started */ -LSYM(neg) - shd x1,x2,28,t1 /* multiply by 0x11 */ - shd x2,0,28,t2 - add x2,t2,x2 - addc x1,t1,x1 -LSYM(neg_for_17) - shd x1,x2,24,t1 /* multiply by 0x101 */ - shd x2,0,24,t2 - add x2,t2,x2 - addc x1,t1,x1 - - shd x1,x2,16,t1 /* multiply by 0x10001 */ - shd x2,0,16,t2 - add x2,t2,x2 - addc x1,t1,x1 - MILLIRET - sub 0,x1,x1 - -/* DIVISION BY 12 (shift to divide by 4 then divide by 3) */ -GSYM($$divI_12) - .export $$divI_12,millicode - comb,< x2,0,LREF(neg12) - copy 0,x1 - extru x2,29,30,x2 /* divide by 4 */ - addib,tr 1,x2,LREF(pos) /* compute 5*(x2+1) = 5*x2+5 */ - sh2add x2,x2,x2 /* multiply by 5 to get started */ - -LSYM(neg12) - subi 4,x2,x2 /* negate, divide by 4, and add 1 */ - /* negation and adding 1 are done */ - /* at the same time by the SUBI */ - extru x2,29,30,x2 - b LREF(neg) - sh2add x2,x2,x2 /* multiply by 5 to get started */ - -GSYM($$divU_12) - .export $$divU_12,millicode - extru x2,29,30,x2 /* divide by 4 */ - addi 5,x2,t1 /* cannot carry */ - sh2add x2,t1,x2 /* multiply by 5 to get started */ - b LREF(pos) - addc 0,0,x1 - -/* DIVISION BY 15 (use z = 2**32; a = 11111111) */ -GSYM($$divI_15) - .export $$divI_15,millicode - comb,< x2,0,LREF(neg15) - copy 0,x1 - addib,tr 1,x2,LREF(pos)+4 - shd x1,x2,28,t1 - -LSYM(neg15) - b LREF(neg) - subi 1,x2,x2 - -GSYM($$divU_15) - .export $$divU_15,millicode - addi 1,x2,x2 /* this CAN overflow */ - b LREF(pos) - addc 0,0,x1 - -/* DIVISION BY 17 (use z = 2**32; a = f0f0f0f) */ -GSYM($$divI_17) - .export $$divI_17,millicode - comb,<,n x2,0,LREF(neg17) - addi 1,x2,x2 /* this cannot overflow */ - shd 0,x2,28,t1 /* multiply by 0xf to get started */ - shd x2,0,28,t2 - sub t2,x2,x2 - b LREF(pos_for_17) - subb t1,0,x1 - -LSYM(neg17) - subi 1,x2,x2 /* this cannot overflow */ - shd 0,x2,28,t1 /* multiply by 0xf to get started */ - shd x2,0,28,t2 - sub t2,x2,x2 - b LREF(neg_for_17) - subb t1,0,x1 - -GSYM($$divU_17) - .export $$divU_17,millicode - addi 1,x2,x2 /* this CAN overflow */ - addc 0,0,x1 - shd x1,x2,28,t1 /* multiply by 0xf to get started */ -LSYM(u17) - shd x2,0,28,t2 - sub t2,x2,x2 - b LREF(pos_for_17) - subb t1,x1,x1 - - -/* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these - includes 7,9 and also 14 - - - z = 2**24-1 - r = z mod x = 0 - - so choose b = 0 - - Also, in order to divide by z = 2**24-1, we approximate by dividing - by (z+1) = 2**24 (which is easy), and then correcting. - - (ax) = (z+1)q' + r - . = zq' + (q'+r) - - So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1) - Then the true remainder of (ax)/z is (q'+r). Repeat the process - with this new remainder, adding the tentative quotients together, - until a tentative quotient is 0 (and then we are done). There is - one last correction to be done. It is possible that (q'+r) = z. - If so, then (q'+r)/(z+1) = 0 and it looks like we are done. But, - in fact, we need to add 1 more to the quotient. Now, it turns - out that this happens if and only if the original value x is - an exact multiple of y. So, to avoid a three instruction test at - the end, instead use 1 instruction to add 1 to x at the beginning. */ - -/* DIVISION BY 7 (use z = 2**24-1; a = 249249) */ -GSYM($$divI_7) - .export $$divI_7,millicode - comb,<,n x2,0,LREF(neg7) -LSYM(7) - addi 1,x2,x2 /* cannot overflow */ - shd 0,x2,29,x1 - sh3add x2,x2,x2 - addc x1,0,x1 -LSYM(pos7) - shd x1,x2,26,t1 - shd x2,0,26,t2 - add x2,t2,x2 - addc x1,t1,x1 - - shd x1,x2,20,t1 - shd x2,0,20,t2 - add x2,t2,x2 - addc x1,t1,t1 - - /* computed . Now divide it by (2**24 - 1) */ - - copy 0,x1 - shd,= t1,x2,24,t1 /* tentative quotient */ -LSYM(1) - addb,tr t1,x1,LREF(2) /* add to previous quotient */ - extru x2,31,24,x2 /* new remainder (unadjusted) */ - - MILLIRETN - -LSYM(2) - addb,tr t1,x2,LREF(1) /* adjust remainder */ - extru,= x2,7,8,t1 /* new quotient */ - -LSYM(neg7) - subi 1,x2,x2 /* negate x2 and add 1 */ -LSYM(8) - shd 0,x2,29,x1 - sh3add x2,x2,x2 - addc x1,0,x1 - -LSYM(neg7_shift) - shd x1,x2,26,t1 - shd x2,0,26,t2 - add x2,t2,x2 - addc x1,t1,x1 - - shd x1,x2,20,t1 - shd x2,0,20,t2 - add x2,t2,x2 - addc x1,t1,t1 - - /* computed . Now divide it by (2**24 - 1) */ - - copy 0,x1 - shd,= t1,x2,24,t1 /* tentative quotient */ -LSYM(3) - addb,tr t1,x1,LREF(4) /* add to previous quotient */ - extru x2,31,24,x2 /* new remainder (unadjusted) */ - - MILLIRET - sub 0,x1,x1 /* negate result */ - -LSYM(4) - addb,tr t1,x2,LREF(3) /* adjust remainder */ - extru,= x2,7,8,t1 /* new quotient */ - -GSYM($$divU_7) - .export $$divU_7,millicode - addi 1,x2,x2 /* can carry */ - addc 0,0,x1 - shd x1,x2,29,t1 - sh3add x2,x2,x2 - b LREF(pos7) - addc t1,x1,x1 - -/* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */ -GSYM($$divI_9) - .export $$divI_9,millicode - comb,<,n x2,0,LREF(neg9) - addi 1,x2,x2 /* cannot overflow */ - shd 0,x2,29,t1 - shd x2,0,29,t2 - sub t2,x2,x2 - b LREF(pos7) - subb t1,0,x1 - -LSYM(neg9) - subi 1,x2,x2 /* negate and add 1 */ - shd 0,x2,29,t1 - shd x2,0,29,t2 - sub t2,x2,x2 - b LREF(neg7_shift) - subb t1,0,x1 - -GSYM($$divU_9) - .export $$divU_9,millicode - addi 1,x2,x2 /* can carry */ - addc 0,0,x1 - shd x1,x2,29,t1 - shd x2,0,29,t2 - sub t2,x2,x2 - b LREF(pos7) - subb t1,x1,x1 - -/* DIVISION BY 14 (shift to divide by 2 then divide by 7) */ -GSYM($$divI_14) - .export $$divI_14,millicode - comb,<,n x2,0,LREF(neg14) -GSYM($$divU_14) - .export $$divU_14,millicode - b LREF(7) /* go to 7 case */ - extru x2,30,31,x2 /* divide by 2 */ - -LSYM(neg14) - subi 2,x2,x2 /* negate (and add 2) */ - b LREF(8) - extru x2,30,31,x2 /* divide by 2 */ - .exit - .procend - .end -#endif - -#ifdef L_mulI -/* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */ -/****************************************************************************** -This routine is used on PA2.0 processors when gcc -mno-fpregs is used - -ROUTINE: $$mulI - - -DESCRIPTION: - - $$mulI multiplies two single word integers, giving a single - word result. - - -INPUT REGISTERS: - - arg0 = Operand 1 - arg1 = Operand 2 - r31 == return pc - sr0 == return space when called externally - - -OUTPUT REGISTERS: - - arg0 = undefined - arg1 = undefined - ret1 = result - -OTHER REGISTERS AFFECTED: - - r1 = undefined - -SIDE EFFECTS: - - Causes a trap under the following conditions: NONE - Changes memory at the following places: NONE - -PERMISSIBLE CONTEXT: - - Unwindable - Does not create a stack frame - Is usable for internal or external microcode - -DISCUSSION: - - Calls other millicode routines via mrp: NONE - Calls other millicode routines: NONE - -***************************************************************************/ - - -#define a0 %arg0 -#define a1 %arg1 -#define t0 %r1 -#define r %ret1 - -#define a0__128a0 zdep a0,24,25,a0 -#define a0__256a0 zdep a0,23,24,a0 -#define a1_ne_0_b_l0 comb,<> a1,0,LREF(l0) -#define a1_ne_0_b_l1 comb,<> a1,0,LREF(l1) -#define a1_ne_0_b_l2 comb,<> a1,0,LREF(l2) -#define b_n_ret_t0 b,n LREF(ret_t0) -#define b_e_shift b LREF(e_shift) -#define b_e_t0ma0 b LREF(e_t0ma0) -#define b_e_t0 b LREF(e_t0) -#define b_e_t0a0 b LREF(e_t0a0) -#define b_e_t02a0 b LREF(e_t02a0) -#define b_e_t04a0 b LREF(e_t04a0) -#define b_e_2t0 b LREF(e_2t0) -#define b_e_2t0a0 b LREF(e_2t0a0) -#define b_e_2t04a0 b LREF(e2t04a0) -#define b_e_3t0 b LREF(e_3t0) -#define b_e_4t0 b LREF(e_4t0) -#define b_e_4t0a0 b LREF(e_4t0a0) -#define b_e_4t08a0 b LREF(e4t08a0) -#define b_e_5t0 b LREF(e_5t0) -#define b_e_8t0 b LREF(e_8t0) -#define b_e_8t0a0 b LREF(e_8t0a0) -#define r__r_a0 add r,a0,r -#define r__r_2a0 sh1add a0,r,r -#define r__r_4a0 sh2add a0,r,r -#define r__r_8a0 sh3add a0,r,r -#define r__r_t0 add r,t0,r -#define r__r_2t0 sh1add t0,r,r -#define r__r_4t0 sh2add t0,r,r -#define r__r_8t0 sh3add t0,r,r -#define t0__3a0 sh1add a0,a0,t0 -#define t0__4a0 sh2add a0,0,t0 -#define t0__5a0 sh2add a0,a0,t0 -#define t0__8a0 sh3add a0,0,t0 -#define t0__9a0 sh3add a0,a0,t0 -#define t0__16a0 zdep a0,27,28,t0 -#define t0__32a0 zdep a0,26,27,t0 -#define t0__64a0 zdep a0,25,26,t0 -#define t0__128a0 zdep a0,24,25,t0 -#define t0__t0ma0 sub t0,a0,t0 -#define t0__t0_a0 add t0,a0,t0 -#define t0__t0_2a0 sh1add a0,t0,t0 -#define t0__t0_4a0 sh2add a0,t0,t0 -#define t0__t0_8a0 sh3add a0,t0,t0 -#define t0__2t0_a0 sh1add t0,a0,t0 -#define t0__3t0 sh1add t0,t0,t0 -#define t0__4t0 sh2add t0,0,t0 -#define t0__4t0_a0 sh2add t0,a0,t0 -#define t0__5t0 sh2add t0,t0,t0 -#define t0__8t0 sh3add t0,0,t0 -#define t0__8t0_a0 sh3add t0,a0,t0 -#define t0__9t0 sh3add t0,t0,t0 -#define t0__16t0 zdep t0,27,28,t0 -#define t0__32t0 zdep t0,26,27,t0 -#define t0__256a0 zdep a0,23,24,t0 - - - SUBSPA_MILLI - ATTR_MILLI - .align 16 - .proc - .callinfo millicode - .export $$mulI,millicode -GSYM($$mulI) - combt,<<= a1,a0,LREF(l4) /* swap args if unsigned a1>a0 */ - copy 0,r /* zero out the result */ - xor a0,a1,a0 /* swap a0 & a1 using the */ - xor a0,a1,a1 /* old xor trick */ - xor a0,a1,a0 -LSYM(l4) - combt,<= 0,a0,LREF(l3) /* if a0>=0 then proceed like unsigned */ - zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */ - sub,> 0,a1,t0 /* otherwise negate both and */ - combt,<=,n a0,t0,LREF(l2) /* swap back if |a0|<|a1| */ - sub 0,a0,a1 - movb,tr,n t0,a0,LREF(l2) /* 10th inst. */ - -LSYM(l0) r__r_t0 /* add in this partial product */ -LSYM(l1) a0__256a0 /* a0 <<= 8 ****************** */ -LSYM(l2) zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */ -LSYM(l3) blr t0,0 /* case on these 8 bits ****** */ - extru a1,23,24,a1 /* a1 >>= 8 ****************** */ - -/*16 insts before this. */ -/* a0 <<= 8 ************************** */ -LSYM(x0) a1_ne_0_b_l2 ! a0__256a0 ! MILLIRETN ! nop -LSYM(x1) a1_ne_0_b_l1 ! r__r_a0 ! MILLIRETN ! nop -LSYM(x2) a1_ne_0_b_l1 ! r__r_2a0 ! MILLIRETN ! nop -LSYM(x3) a1_ne_0_b_l0 ! t0__3a0 ! MILLIRET ! r__r_t0 -LSYM(x4) a1_ne_0_b_l1 ! r__r_4a0 ! MILLIRETN ! nop -LSYM(x5) a1_ne_0_b_l0 ! t0__5a0 ! MILLIRET ! r__r_t0 -LSYM(x6) t0__3a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN -LSYM(x7) t0__3a0 ! a1_ne_0_b_l0 ! r__r_4a0 ! b_n_ret_t0 -LSYM(x8) a1_ne_0_b_l1 ! r__r_8a0 ! MILLIRETN ! nop -LSYM(x9) a1_ne_0_b_l0 ! t0__9a0 ! MILLIRET ! r__r_t0 -LSYM(x10) t0__5a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN -LSYM(x11) t0__3a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0 -LSYM(x12) t0__3a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN -LSYM(x13) t0__5a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0 -LSYM(x14) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x15) t0__5a0 ! a1_ne_0_b_l0 ! t0__3t0 ! b_n_ret_t0 -LSYM(x16) t0__16a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN -LSYM(x17) t0__9a0 ! a1_ne_0_b_l0 ! t0__t0_8a0 ! b_n_ret_t0 -LSYM(x18) t0__9a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN -LSYM(x19) t0__9a0 ! a1_ne_0_b_l0 ! t0__2t0_a0 ! b_n_ret_t0 -LSYM(x20) t0__5a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN -LSYM(x21) t0__5a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0 -LSYM(x22) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x23) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x24) t0__3a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN -LSYM(x25) t0__5a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0 -LSYM(x26) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x27) t0__3a0 ! a1_ne_0_b_l0 ! t0__9t0 ! b_n_ret_t0 -LSYM(x28) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 -LSYM(x29) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x30) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_2t0 -LSYM(x31) t0__32a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 -LSYM(x32) t0__32a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN -LSYM(x33) t0__8a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0 -LSYM(x34) t0__16a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x35) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__t0_8a0 -LSYM(x36) t0__9a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN -LSYM(x37) t0__9a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0 -LSYM(x38) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x39) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x40) t0__5a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN -LSYM(x41) t0__5a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0 -LSYM(x42) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x43) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x44) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 -LSYM(x45) t0__9a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0 -LSYM(x46) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_a0 -LSYM(x47) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_2a0 -LSYM(x48) t0__3a0 ! a1_ne_0_b_l0 ! t0__16t0 ! b_n_ret_t0 -LSYM(x49) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_4a0 -LSYM(x50) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_2t0 -LSYM(x51) t0__9a0 ! t0__t0_8a0 ! b_e_t0 ! t0__3t0 -LSYM(x52) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 -LSYM(x53) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x54) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_2t0 -LSYM(x55) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x56) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 -LSYM(x57) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__3t0 -LSYM(x58) t0__3a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0 -LSYM(x59) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__3t0 -LSYM(x60) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_4t0 -LSYM(x61) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x62) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0 -LSYM(x63) t0__64a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 -LSYM(x64) t0__64a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN -LSYM(x65) t0__8a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0 -LSYM(x66) t0__32a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x67) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x68) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 -LSYM(x69) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x70) t0__64a0 ! t0__t0_4a0 ! b_e_t0 ! t0__t0_2a0 -LSYM(x71) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__t0ma0 -LSYM(x72) t0__9a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN -LSYM(x73) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_t0 -LSYM(x74) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x75) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x76) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 -LSYM(x77) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x78) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__2t0_a0 -LSYM(x79) t0__16a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0 -LSYM(x80) t0__16a0 ! t0__5t0 ! b_e_shift ! r__r_t0 -LSYM(x81) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_t0 -LSYM(x82) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x83) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x84) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 -LSYM(x85) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0 -LSYM(x86) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0 -LSYM(x87) t0__9a0 ! t0__9t0 ! b_e_t02a0 ! t0__t0_4a0 -LSYM(x88) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 -LSYM(x89) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0 -LSYM(x90) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_2t0 -LSYM(x91) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x92) t0__5a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0 -LSYM(x93) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__3t0 -LSYM(x94) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__t0_2a0 -LSYM(x95) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0 -LSYM(x96) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_4t0 -LSYM(x97) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x98) t0__32a0 ! t0__3t0 ! b_e_t0 ! t0__t0_2a0 -LSYM(x99) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0 -LSYM(x100) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_4t0 -LSYM(x101) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x102) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0 -LSYM(x103) t0__5a0 ! t0__5t0 ! b_e_t02a0 ! t0__4t0_a0 -LSYM(x104) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0 -LSYM(x105) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0 -LSYM(x106) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__4t0_a0 -LSYM(x107) t0__9a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__8t0_a0 -LSYM(x108) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_4t0 -LSYM(x109) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x110) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__2t0_a0 -LSYM(x111) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0 -LSYM(x112) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__16t0 -LSYM(x113) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__3t0 -LSYM(x114) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__3t0 -LSYM(x115) t0__9a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__3t0 -LSYM(x116) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__4t0_a0 -LSYM(x117) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0 -LSYM(x118) t0__3a0 ! t0__4t0_a0 ! b_e_t0a0 ! t0__9t0 -LSYM(x119) t0__3a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__9t0 -LSYM(x120) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_8t0 -LSYM(x121) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0 -LSYM(x122) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0 -LSYM(x123) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0 -LSYM(x124) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0 -LSYM(x125) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__5t0 -LSYM(x126) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0 -LSYM(x127) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 -LSYM(x128) t0__128a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN -LSYM(x129) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0_a0 ! b_n_ret_t0 -LSYM(x130) t0__64a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x131) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x132) t0__8a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 -LSYM(x133) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x134) t0__8a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0 -LSYM(x135) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__3t0 -LSYM(x136) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 -LSYM(x137) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0 -LSYM(x138) t0__8a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0 -LSYM(x139) t0__8a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__4t0_a0 -LSYM(x140) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__5t0 -LSYM(x141) t0__8a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__2t0_a0 -LSYM(x142) t0__9a0 ! t0__8t0 ! b_e_2t0 ! t0__t0ma0 -LSYM(x143) t0__16a0 ! t0__9t0 ! b_e_t0 ! t0__t0ma0 -LSYM(x144) t0__9a0 ! t0__8t0 ! b_e_shift ! r__r_2t0 -LSYM(x145) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x146) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x147) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x148) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 -LSYM(x149) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x150) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0 -LSYM(x151) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__2t0_a0 -LSYM(x152) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 -LSYM(x153) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0 -LSYM(x154) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0 -LSYM(x155) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__5t0 -LSYM(x156) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0 -LSYM(x157) t0__32a0 ! t0__t0ma0 ! b_e_t02a0 ! t0__5t0 -LSYM(x158) t0__16a0 ! t0__5t0 ! b_e_2t0 ! t0__t0ma0 -LSYM(x159) t0__32a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0 -LSYM(x160) t0__5a0 ! t0__4t0 ! b_e_shift ! r__r_8t0 -LSYM(x161) t0__8a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x162) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_2t0 -LSYM(x163) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x164) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_4t0 -LSYM(x165) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0 -LSYM(x166) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__2t0_a0 -LSYM(x167) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__2t0_a0 -LSYM(x168) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0 -LSYM(x169) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__8t0_a0 -LSYM(x170) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__5t0 -LSYM(x171) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__9t0 -LSYM(x172) t0__5a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__2t0_a0 -LSYM(x173) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__9t0 -LSYM(x174) t0__32a0 ! t0__t0_2a0 ! b_e_t04a0 ! t0__5t0 -LSYM(x175) t0__8a0 ! t0__2t0_a0 ! b_e_5t0 ! t0__2t0_a0 -LSYM(x176) t0__5a0 ! t0__4t0_a0 ! b_e_8t0 ! t0__t0_a0 -LSYM(x177) t0__5a0 ! t0__4t0_a0 ! b_e_8t0a0 ! t0__t0_a0 -LSYM(x178) t0__5a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__8t0_a0 -LSYM(x179) t0__5a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__8t0_a0 -LSYM(x180) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_4t0 -LSYM(x181) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x182) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__2t0_a0 -LSYM(x183) t0__9a0 ! t0__5t0 ! b_e_2t0a0 ! t0__2t0_a0 -LSYM(x184) t0__5a0 ! t0__9t0 ! b_e_4t0 ! t0__t0_a0 -LSYM(x185) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0 -LSYM(x186) t0__32a0 ! t0__t0ma0 ! b_e_2t0 ! t0__3t0 -LSYM(x187) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__5t0 -LSYM(x188) t0__9a0 ! t0__5t0 ! b_e_4t0 ! t0__t0_2a0 -LSYM(x189) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0 -LSYM(x190) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__5t0 -LSYM(x191) t0__64a0 ! t0__3t0 ! b_e_t0 ! t0__t0ma0 -LSYM(x192) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_8t0 -LSYM(x193) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0 -LSYM(x194) t0__8a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0 -LSYM(x195) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0 -LSYM(x196) t0__8a0 ! t0__3t0 ! b_e_4t0 ! t0__2t0_a0 -LSYM(x197) t0__8a0 ! t0__3t0 ! b_e_4t0a0 ! t0__2t0_a0 -LSYM(x198) t0__64a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0 -LSYM(x199) t0__8a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0 -LSYM(x200) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_8t0 -LSYM(x201) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__8t0_a0 -LSYM(x202) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__4t0_a0 -LSYM(x203) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__4t0_a0 -LSYM(x204) t0__8a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0 -LSYM(x205) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__5t0 -LSYM(x206) t0__64a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__3t0 -LSYM(x207) t0__8a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0 -LSYM(x208) t0__5a0 ! t0__5t0 ! b_e_8t0 ! t0__t0_a0 -LSYM(x209) t0__5a0 ! t0__5t0 ! b_e_8t0a0 ! t0__t0_a0 -LSYM(x210) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__5t0 -LSYM(x211) t0__5a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__5t0 -LSYM(x212) t0__3a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__4t0_a0 -LSYM(x213) t0__3a0 ! t0__4t0_a0 ! b_e_4t0a0 ! t0__4t0_a0 -LSYM(x214) t0__9a0 ! t0__t0_4a0 ! b_e_2t04a0 ! t0__8t0_a0 -LSYM(x215) t0__5a0 ! t0__4t0_a0 ! b_e_5t0 ! t0__2t0_a0 -LSYM(x216) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_8t0 -LSYM(x217) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0 -LSYM(x218) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0 -LSYM(x219) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0 -LSYM(x220) t0__3a0 ! t0__9t0 ! b_e_4t0 ! t0__2t0_a0 -LSYM(x221) t0__3a0 ! t0__9t0 ! b_e_4t0a0 ! t0__2t0_a0 -LSYM(x222) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__3t0 -LSYM(x223) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0 -LSYM(x224) t0__9a0 ! t0__3t0 ! b_e_8t0 ! t0__t0_a0 -LSYM(x225) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__5t0 -LSYM(x226) t0__3a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__32t0 -LSYM(x227) t0__9a0 ! t0__5t0 ! b_e_t02a0 ! t0__5t0 -LSYM(x228) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0 -LSYM(x229) t0__9a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__3t0 -LSYM(x230) t0__9a0 ! t0__5t0 ! b_e_5t0 ! t0__t0_a0 -LSYM(x231) t0__9a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0 -LSYM(x232) t0__3a0 ! t0__2t0_a0 ! b_e_8t0 ! t0__4t0_a0 -LSYM(x233) t0__3a0 ! t0__2t0_a0 ! b_e_8t0a0 ! t0__4t0_a0 -LSYM(x234) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__9t0 -LSYM(x235) t0__3a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__9t0 -LSYM(x236) t0__9a0 ! t0__2t0_a0 ! b_e_4t08a0 ! t0__3t0 -LSYM(x237) t0__16a0 ! t0__5t0 ! b_e_3t0 ! t0__t0ma0 -LSYM(x238) t0__3a0 ! t0__4t0_a0 ! b_e_2t04a0 ! t0__9t0 -LSYM(x239) t0__16a0 ! t0__5t0 ! b_e_t0ma0 ! t0__3t0 -LSYM(x240) t0__9a0 ! t0__t0_a0 ! b_e_8t0 ! t0__3t0 -LSYM(x241) t0__9a0 ! t0__t0_a0 ! b_e_8t0a0 ! t0__3t0 -LSYM(x242) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__8t0_a0 -LSYM(x243) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__3t0 -LSYM(x244) t0__5a0 ! t0__3t0 ! b_e_4t0 ! t0__4t0_a0 -LSYM(x245) t0__8a0 ! t0__3t0 ! b_e_5t0 ! t0__2t0_a0 -LSYM(x246) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__3t0 -LSYM(x247) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__3t0 -LSYM(x248) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_8t0 -LSYM(x249) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__8t0_a0 -LSYM(x250) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__5t0 -LSYM(x251) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__5t0 -LSYM(x252) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0 -LSYM(x253) t0__64a0 ! t0__t0ma0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x254) t0__128a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0 -LSYM(x255) t0__256a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 -/*1040 insts before this. */ -LSYM(ret_t0) MILLIRET -LSYM(e_t0) r__r_t0 -LSYM(e_shift) a1_ne_0_b_l2 - a0__256a0 /* a0 <<= 8 *********** */ - MILLIRETN -LSYM(e_t0ma0) a1_ne_0_b_l0 - t0__t0ma0 - MILLIRET - r__r_t0 -LSYM(e_t0a0) a1_ne_0_b_l0 - t0__t0_a0 - MILLIRET - r__r_t0 -LSYM(e_t02a0) a1_ne_0_b_l0 - t0__t0_2a0 - MILLIRET - r__r_t0 -LSYM(e_t04a0) a1_ne_0_b_l0 - t0__t0_4a0 - MILLIRET - r__r_t0 -LSYM(e_2t0) a1_ne_0_b_l1 - r__r_2t0 - MILLIRETN -LSYM(e_2t0a0) a1_ne_0_b_l0 - t0__2t0_a0 - MILLIRET - r__r_t0 -LSYM(e2t04a0) t0__t0_2a0 - a1_ne_0_b_l1 - r__r_2t0 - MILLIRETN -LSYM(e_3t0) a1_ne_0_b_l0 - t0__3t0 - MILLIRET - r__r_t0 -LSYM(e_4t0) a1_ne_0_b_l1 - r__r_4t0 - MILLIRETN -LSYM(e_4t0a0) a1_ne_0_b_l0 - t0__4t0_a0 - MILLIRET - r__r_t0 -LSYM(e4t08a0) t0__t0_2a0 - a1_ne_0_b_l1 - r__r_4t0 - MILLIRETN -LSYM(e_5t0) a1_ne_0_b_l0 - t0__5t0 - MILLIRET - r__r_t0 -LSYM(e_8t0) a1_ne_0_b_l1 - r__r_8t0 - MILLIRETN -LSYM(e_8t0a0) a1_ne_0_b_l0 - t0__8t0_a0 - MILLIRET - r__r_t0 - - .procend - .end -#endif diff --git a/gcc/config/pa/t-linux b/gcc/config/pa/t-linux index df351e11458..b94ebd250a8 100644 --- a/gcc/config/pa/t-linux +++ b/gcc/config/pa/t-linux @@ -16,13 +16,6 @@ # along with GCC; see the file COPYING3. If not see # . -#Plug millicode routines into libgcc.a We want these on both native and -#cross compiles. We use the "64-bit" routines because the "32-bit" code -#is broken for certain corner cases. - -LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI _dyncall -LIB1ASMSRC = pa/milli64.S - # Compile libgcc2.a as PIC. TARGET_LIBGCC2_CFLAGS = -fPIC -DELF=1 -DLINUX=1 diff --git a/gcc/config/pa/t-linux64 b/gcc/config/pa/t-linux64 index d40546cabcc..af803a27ed3 100644 --- a/gcc/config/pa/t-linux64 +++ b/gcc/config/pa/t-linux64 @@ -16,12 +16,6 @@ # along with GCC; see the file COPYING3. If not see # . -#Plug millicode routines into libgcc.a We want these on both native and -#cross compiles. - -LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI -LIB1ASMSRC = pa/milli64.S - LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/pa/linux-atomic.c # Compile libgcc2.a as PIC. diff --git a/gcc/config/picochip/libgccExtras/fake_libgcc.asm b/gcc/config/picochip/libgccExtras/fake_libgcc.asm deleted file mode 100644 index e4b78f1e1f1..00000000000 --- a/gcc/config/picochip/libgccExtras/fake_libgcc.asm +++ /dev/null @@ -1,6 +0,0 @@ -// picoChip ASM file -// Fake libgcc asm file. This contains nothing, but is used to prevent gcc -// getting upset about the lack of a libgcc.S file when LIB1ASMFUNCS is defined -// to switch off the compilation of parts of libgcc. - - diff --git a/gcc/config/picochip/t-picochip b/gcc/config/picochip/t-picochip index 222d7a646b9..0f3fe8c3d81 100644 --- a/gcc/config/picochip/t-picochip +++ b/gcc/config/picochip/t-picochip @@ -35,14 +35,6 @@ LIB2FUNCS_EXTRA = \ $(srcdir)/config/picochip/libgccExtras/parityhi2.asm \ $(srcdir)/config/picochip/libgccExtras/popcounthi2.asm -# Prevent some of the more complicated libgcc functions from being -# compiled. This is because they are generally too big to fit into an -# AE anyway, so there is no point in having them. Also, some don't -# compile properly so we'll ignore them for the moment. - -LIB1ASMFUNCS = _mulsc3 _divsc3 -LIB1ASMSRC = picochip/libgccExtras/fake_libgcc.asm - # Turn off ranlib on target libraries. RANLIB_FOR_TARGET = cat diff --git a/gcc/config/sh/lib1funcs.asm b/gcc/config/sh/lib1funcs.asm deleted file mode 100644 index 2f0ca16cd91..00000000000 --- a/gcc/config/sh/lib1funcs.asm +++ /dev/null @@ -1,3933 +0,0 @@ -/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2009 - Free Software Foundation, Inc. - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ - - -!! libgcc routines for the Renesas / SuperH SH CPUs. -!! Contributed by Steve Chamberlain. -!! sac@cygnus.com - -!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines -!! recoded in assembly by Toshiyasu Morita -!! tm@netcom.com - -#if defined(__ELF__) && defined(__linux__) -.section .note.GNU-stack,"",%progbits -.previous -#endif - -/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and - ELF local label prefixes by J"orn Rennecke - amylaar@cygnus.com */ - -#include "lib1funcs.h" - -/* t-vxworks needs to build both PIC and non-PIC versions of libgcc, - so it is more convenient to define NO_FPSCR_VALUES here than to - define it on the command line. */ -#if defined __vxworks && defined __PIC__ -#define NO_FPSCR_VALUES -#endif - -#if ! __SH5__ -#ifdef L_ashiftrt - .global GLOBAL(ashiftrt_r4_0) - .global GLOBAL(ashiftrt_r4_1) - .global GLOBAL(ashiftrt_r4_2) - .global GLOBAL(ashiftrt_r4_3) - .global GLOBAL(ashiftrt_r4_4) - .global GLOBAL(ashiftrt_r4_5) - .global GLOBAL(ashiftrt_r4_6) - .global GLOBAL(ashiftrt_r4_7) - .global GLOBAL(ashiftrt_r4_8) - .global GLOBAL(ashiftrt_r4_9) - .global GLOBAL(ashiftrt_r4_10) - .global GLOBAL(ashiftrt_r4_11) - .global GLOBAL(ashiftrt_r4_12) - .global GLOBAL(ashiftrt_r4_13) - .global GLOBAL(ashiftrt_r4_14) - .global GLOBAL(ashiftrt_r4_15) - .global GLOBAL(ashiftrt_r4_16) - .global GLOBAL(ashiftrt_r4_17) - .global GLOBAL(ashiftrt_r4_18) - .global GLOBAL(ashiftrt_r4_19) - .global GLOBAL(ashiftrt_r4_20) - .global GLOBAL(ashiftrt_r4_21) - .global GLOBAL(ashiftrt_r4_22) - .global GLOBAL(ashiftrt_r4_23) - .global GLOBAL(ashiftrt_r4_24) - .global GLOBAL(ashiftrt_r4_25) - .global GLOBAL(ashiftrt_r4_26) - .global GLOBAL(ashiftrt_r4_27) - .global GLOBAL(ashiftrt_r4_28) - .global GLOBAL(ashiftrt_r4_29) - .global GLOBAL(ashiftrt_r4_30) - .global GLOBAL(ashiftrt_r4_31) - .global GLOBAL(ashiftrt_r4_32) - - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32)) - - .align 1 -GLOBAL(ashiftrt_r4_32): -GLOBAL(ashiftrt_r4_31): - rotcl r4 - rts - subc r4,r4 - -GLOBAL(ashiftrt_r4_30): - shar r4 -GLOBAL(ashiftrt_r4_29): - shar r4 -GLOBAL(ashiftrt_r4_28): - shar r4 -GLOBAL(ashiftrt_r4_27): - shar r4 -GLOBAL(ashiftrt_r4_26): - shar r4 -GLOBAL(ashiftrt_r4_25): - shar r4 -GLOBAL(ashiftrt_r4_24): - shlr16 r4 - shlr8 r4 - rts - exts.b r4,r4 - -GLOBAL(ashiftrt_r4_23): - shar r4 -GLOBAL(ashiftrt_r4_22): - shar r4 -GLOBAL(ashiftrt_r4_21): - shar r4 -GLOBAL(ashiftrt_r4_20): - shar r4 -GLOBAL(ashiftrt_r4_19): - shar r4 -GLOBAL(ashiftrt_r4_18): - shar r4 -GLOBAL(ashiftrt_r4_17): - shar r4 -GLOBAL(ashiftrt_r4_16): - shlr16 r4 - rts - exts.w r4,r4 - -GLOBAL(ashiftrt_r4_15): - shar r4 -GLOBAL(ashiftrt_r4_14): - shar r4 -GLOBAL(ashiftrt_r4_13): - shar r4 -GLOBAL(ashiftrt_r4_12): - shar r4 -GLOBAL(ashiftrt_r4_11): - shar r4 -GLOBAL(ashiftrt_r4_10): - shar r4 -GLOBAL(ashiftrt_r4_9): - shar r4 -GLOBAL(ashiftrt_r4_8): - shar r4 -GLOBAL(ashiftrt_r4_7): - shar r4 -GLOBAL(ashiftrt_r4_6): - shar r4 -GLOBAL(ashiftrt_r4_5): - shar r4 -GLOBAL(ashiftrt_r4_4): - shar r4 -GLOBAL(ashiftrt_r4_3): - shar r4 -GLOBAL(ashiftrt_r4_2): - shar r4 -GLOBAL(ashiftrt_r4_1): - rts - shar r4 - -GLOBAL(ashiftrt_r4_0): - rts - nop - - ENDFUNC(GLOBAL(ashiftrt_r4_0)) - ENDFUNC(GLOBAL(ashiftrt_r4_1)) - ENDFUNC(GLOBAL(ashiftrt_r4_2)) - ENDFUNC(GLOBAL(ashiftrt_r4_3)) - ENDFUNC(GLOBAL(ashiftrt_r4_4)) - ENDFUNC(GLOBAL(ashiftrt_r4_5)) - ENDFUNC(GLOBAL(ashiftrt_r4_6)) - ENDFUNC(GLOBAL(ashiftrt_r4_7)) - ENDFUNC(GLOBAL(ashiftrt_r4_8)) - ENDFUNC(GLOBAL(ashiftrt_r4_9)) - ENDFUNC(GLOBAL(ashiftrt_r4_10)) - ENDFUNC(GLOBAL(ashiftrt_r4_11)) - ENDFUNC(GLOBAL(ashiftrt_r4_12)) - ENDFUNC(GLOBAL(ashiftrt_r4_13)) - ENDFUNC(GLOBAL(ashiftrt_r4_14)) - ENDFUNC(GLOBAL(ashiftrt_r4_15)) - ENDFUNC(GLOBAL(ashiftrt_r4_16)) - ENDFUNC(GLOBAL(ashiftrt_r4_17)) - ENDFUNC(GLOBAL(ashiftrt_r4_18)) - ENDFUNC(GLOBAL(ashiftrt_r4_19)) - ENDFUNC(GLOBAL(ashiftrt_r4_20)) - ENDFUNC(GLOBAL(ashiftrt_r4_21)) - ENDFUNC(GLOBAL(ashiftrt_r4_22)) - ENDFUNC(GLOBAL(ashiftrt_r4_23)) - ENDFUNC(GLOBAL(ashiftrt_r4_24)) - ENDFUNC(GLOBAL(ashiftrt_r4_25)) - ENDFUNC(GLOBAL(ashiftrt_r4_26)) - ENDFUNC(GLOBAL(ashiftrt_r4_27)) - ENDFUNC(GLOBAL(ashiftrt_r4_28)) - ENDFUNC(GLOBAL(ashiftrt_r4_29)) - ENDFUNC(GLOBAL(ashiftrt_r4_30)) - ENDFUNC(GLOBAL(ashiftrt_r4_31)) - ENDFUNC(GLOBAL(ashiftrt_r4_32)) -#endif - -#ifdef L_ashiftrt_n - -! -! GLOBAL(ashrsi3) -! -! Entry: -! -! r4: Value to shift -! r5: Shifts -! -! Exit: -! -! r0: Result -! -! Destroys: -! -! (none) -! - - .global GLOBAL(ashrsi3) - HIDDEN_FUNC(GLOBAL(ashrsi3)) - .align 2 -GLOBAL(ashrsi3): - mov #31,r0 - and r0,r5 - mova LOCAL(ashrsi3_table),r0 - mov.b @(r0,r5),r5 -#ifdef __sh1__ - add r5,r0 - jmp @r0 -#else - braf r5 -#endif - mov r4,r0 - - .align 2 -LOCAL(ashrsi3_table): - .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table) - -LOCAL(ashrsi3_31): - rotcl r0 - rts - subc r0,r0 - -LOCAL(ashrsi3_30): - shar r0 -LOCAL(ashrsi3_29): - shar r0 -LOCAL(ashrsi3_28): - shar r0 -LOCAL(ashrsi3_27): - shar r0 -LOCAL(ashrsi3_26): - shar r0 -LOCAL(ashrsi3_25): - shar r0 -LOCAL(ashrsi3_24): - shlr16 r0 - shlr8 r0 - rts - exts.b r0,r0 - -LOCAL(ashrsi3_23): - shar r0 -LOCAL(ashrsi3_22): - shar r0 -LOCAL(ashrsi3_21): - shar r0 -LOCAL(ashrsi3_20): - shar r0 -LOCAL(ashrsi3_19): - shar r0 -LOCAL(ashrsi3_18): - shar r0 -LOCAL(ashrsi3_17): - shar r0 -LOCAL(ashrsi3_16): - shlr16 r0 - rts - exts.w r0,r0 - -LOCAL(ashrsi3_15): - shar r0 -LOCAL(ashrsi3_14): - shar r0 -LOCAL(ashrsi3_13): - shar r0 -LOCAL(ashrsi3_12): - shar r0 -LOCAL(ashrsi3_11): - shar r0 -LOCAL(ashrsi3_10): - shar r0 -LOCAL(ashrsi3_9): - shar r0 -LOCAL(ashrsi3_8): - shar r0 -LOCAL(ashrsi3_7): - shar r0 -LOCAL(ashrsi3_6): - shar r0 -LOCAL(ashrsi3_5): - shar r0 -LOCAL(ashrsi3_4): - shar r0 -LOCAL(ashrsi3_3): - shar r0 -LOCAL(ashrsi3_2): - shar r0 -LOCAL(ashrsi3_1): - rts - shar r0 - -LOCAL(ashrsi3_0): - rts - nop - - ENDFUNC(GLOBAL(ashrsi3)) -#endif - -#ifdef L_ashiftlt - -! -! GLOBAL(ashlsi3) -! -! Entry: -! -! r4: Value to shift -! r5: Shifts -! -! Exit: -! -! r0: Result -! -! Destroys: -! -! (none) -! - .global GLOBAL(ashlsi3) - HIDDEN_FUNC(GLOBAL(ashlsi3)) - .align 2 -GLOBAL(ashlsi3): - mov #31,r0 - and r0,r5 - mova LOCAL(ashlsi3_table),r0 - mov.b @(r0,r5),r5 -#ifdef __sh1__ - add r5,r0 - jmp @r0 -#else - braf r5 -#endif - mov r4,r0 - - .align 2 -LOCAL(ashlsi3_table): - .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table) - -LOCAL(ashlsi3_6): - shll2 r0 -LOCAL(ashlsi3_4): - shll2 r0 -LOCAL(ashlsi3_2): - rts - shll2 r0 - -LOCAL(ashlsi3_7): - shll2 r0 -LOCAL(ashlsi3_5): - shll2 r0 -LOCAL(ashlsi3_3): - shll2 r0 -LOCAL(ashlsi3_1): - rts - shll r0 - -LOCAL(ashlsi3_14): - shll2 r0 -LOCAL(ashlsi3_12): - shll2 r0 -LOCAL(ashlsi3_10): - shll2 r0 -LOCAL(ashlsi3_8): - rts - shll8 r0 - -LOCAL(ashlsi3_15): - shll2 r0 -LOCAL(ashlsi3_13): - shll2 r0 -LOCAL(ashlsi3_11): - shll2 r0 -LOCAL(ashlsi3_9): - shll8 r0 - rts - shll r0 - -LOCAL(ashlsi3_22): - shll2 r0 -LOCAL(ashlsi3_20): - shll2 r0 -LOCAL(ashlsi3_18): - shll2 r0 -LOCAL(ashlsi3_16): - rts - shll16 r0 - -LOCAL(ashlsi3_23): - shll2 r0 -LOCAL(ashlsi3_21): - shll2 r0 -LOCAL(ashlsi3_19): - shll2 r0 -LOCAL(ashlsi3_17): - shll16 r0 - rts - shll r0 - -LOCAL(ashlsi3_30): - shll2 r0 -LOCAL(ashlsi3_28): - shll2 r0 -LOCAL(ashlsi3_26): - shll2 r0 -LOCAL(ashlsi3_24): - shll16 r0 - rts - shll8 r0 - -LOCAL(ashlsi3_31): - shll2 r0 -LOCAL(ashlsi3_29): - shll2 r0 -LOCAL(ashlsi3_27): - shll2 r0 -LOCAL(ashlsi3_25): - shll16 r0 - shll8 r0 - rts - shll r0 - -LOCAL(ashlsi3_0): - rts - nop - - ENDFUNC(GLOBAL(ashlsi3)) -#endif - -#ifdef L_lshiftrt - -! -! GLOBAL(lshrsi3) -! -! Entry: -! -! r4: Value to shift -! r5: Shifts -! -! Exit: -! -! r0: Result -! -! Destroys: -! -! (none) -! - .global GLOBAL(lshrsi3) - HIDDEN_FUNC(GLOBAL(lshrsi3)) - .align 2 -GLOBAL(lshrsi3): - mov #31,r0 - and r0,r5 - mova LOCAL(lshrsi3_table),r0 - mov.b @(r0,r5),r5 -#ifdef __sh1__ - add r5,r0 - jmp @r0 -#else - braf r5 -#endif - mov r4,r0 - - .align 2 -LOCAL(lshrsi3_table): - .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table) - -LOCAL(lshrsi3_6): - shlr2 r0 -LOCAL(lshrsi3_4): - shlr2 r0 -LOCAL(lshrsi3_2): - rts - shlr2 r0 - -LOCAL(lshrsi3_7): - shlr2 r0 -LOCAL(lshrsi3_5): - shlr2 r0 -LOCAL(lshrsi3_3): - shlr2 r0 -LOCAL(lshrsi3_1): - rts - shlr r0 - -LOCAL(lshrsi3_14): - shlr2 r0 -LOCAL(lshrsi3_12): - shlr2 r0 -LOCAL(lshrsi3_10): - shlr2 r0 -LOCAL(lshrsi3_8): - rts - shlr8 r0 - -LOCAL(lshrsi3_15): - shlr2 r0 -LOCAL(lshrsi3_13): - shlr2 r0 -LOCAL(lshrsi3_11): - shlr2 r0 -LOCAL(lshrsi3_9): - shlr8 r0 - rts - shlr r0 - -LOCAL(lshrsi3_22): - shlr2 r0 -LOCAL(lshrsi3_20): - shlr2 r0 -LOCAL(lshrsi3_18): - shlr2 r0 -LOCAL(lshrsi3_16): - rts - shlr16 r0 - -LOCAL(lshrsi3_23): - shlr2 r0 -LOCAL(lshrsi3_21): - shlr2 r0 -LOCAL(lshrsi3_19): - shlr2 r0 -LOCAL(lshrsi3_17): - shlr16 r0 - rts - shlr r0 - -LOCAL(lshrsi3_30): - shlr2 r0 -LOCAL(lshrsi3_28): - shlr2 r0 -LOCAL(lshrsi3_26): - shlr2 r0 -LOCAL(lshrsi3_24): - shlr16 r0 - rts - shlr8 r0 - -LOCAL(lshrsi3_31): - shlr2 r0 -LOCAL(lshrsi3_29): - shlr2 r0 -LOCAL(lshrsi3_27): - shlr2 r0 -LOCAL(lshrsi3_25): - shlr16 r0 - shlr8 r0 - rts - shlr r0 - -LOCAL(lshrsi3_0): - rts - nop - - ENDFUNC(GLOBAL(lshrsi3)) -#endif - -#ifdef L_movmem - .text - .balign 4 - .global GLOBAL(movmem) - HIDDEN_FUNC(GLOBAL(movmem)) - HIDDEN_ALIAS(movstr,movmem) - /* This would be a lot simpler if r6 contained the byte count - minus 64, and we wouldn't be called here for a byte count of 64. */ -GLOBAL(movmem): - sts.l pr,@-r15 - shll2 r6 - bsr GLOBAL(movmemSI52+2) - mov.l @(48,r5),r0 - .balign 4 -LOCAL(movmem_loop): /* Reached with rts */ - mov.l @(60,r5),r0 - add #-64,r6 - mov.l r0,@(60,r4) - tst r6,r6 - mov.l @(56,r5),r0 - bt LOCAL(movmem_done) - mov.l r0,@(56,r4) - cmp/pl r6 - mov.l @(52,r5),r0 - add #64,r5 - mov.l r0,@(52,r4) - add #64,r4 - bt GLOBAL(movmemSI52) -! done all the large groups, do the remainder -! jump to movmem+ - mova GLOBAL(movmemSI4)+4,r0 - add r6,r0 - jmp @r0 -LOCAL(movmem_done): ! share slot insn, works out aligned. - lds.l @r15+,pr - mov.l r0,@(56,r4) - mov.l @(52,r5),r0 - rts - mov.l r0,@(52,r4) - .balign 4 -! ??? We need aliases movstr* for movmem* for the older libraries. These -! aliases will be removed at the some point in the future. - .global GLOBAL(movmemSI64) - HIDDEN_FUNC(GLOBAL(movmemSI64)) - HIDDEN_ALIAS(movstrSI64,movmemSI64) -GLOBAL(movmemSI64): - mov.l @(60,r5),r0 - mov.l r0,@(60,r4) - .global GLOBAL(movmemSI60) - HIDDEN_FUNC(GLOBAL(movmemSI60)) - HIDDEN_ALIAS(movstrSI60,movmemSI60) -GLOBAL(movmemSI60): - mov.l @(56,r5),r0 - mov.l r0,@(56,r4) - .global GLOBAL(movmemSI56) - HIDDEN_FUNC(GLOBAL(movmemSI56)) - HIDDEN_ALIAS(movstrSI56,movmemSI56) -GLOBAL(movmemSI56): - mov.l @(52,r5),r0 - mov.l r0,@(52,r4) - .global GLOBAL(movmemSI52) - HIDDEN_FUNC(GLOBAL(movmemSI52)) - HIDDEN_ALIAS(movstrSI52,movmemSI52) -GLOBAL(movmemSI52): - mov.l @(48,r5),r0 - mov.l r0,@(48,r4) - .global GLOBAL(movmemSI48) - HIDDEN_FUNC(GLOBAL(movmemSI48)) - HIDDEN_ALIAS(movstrSI48,movmemSI48) -GLOBAL(movmemSI48): - mov.l @(44,r5),r0 - mov.l r0,@(44,r4) - .global GLOBAL(movmemSI44) - HIDDEN_FUNC(GLOBAL(movmemSI44)) - HIDDEN_ALIAS(movstrSI44,movmemSI44) -GLOBAL(movmemSI44): - mov.l @(40,r5),r0 - mov.l r0,@(40,r4) - .global GLOBAL(movmemSI40) - HIDDEN_FUNC(GLOBAL(movmemSI40)) - HIDDEN_ALIAS(movstrSI40,movmemSI40) -GLOBAL(movmemSI40): - mov.l @(36,r5),r0 - mov.l r0,@(36,r4) - .global GLOBAL(movmemSI36) - HIDDEN_FUNC(GLOBAL(movmemSI36)) - HIDDEN_ALIAS(movstrSI36,movmemSI36) -GLOBAL(movmemSI36): - mov.l @(32,r5),r0 - mov.l r0,@(32,r4) - .global GLOBAL(movmemSI32) - HIDDEN_FUNC(GLOBAL(movmemSI32)) - HIDDEN_ALIAS(movstrSI32,movmemSI32) -GLOBAL(movmemSI32): - mov.l @(28,r5),r0 - mov.l r0,@(28,r4) - .global GLOBAL(movmemSI28) - HIDDEN_FUNC(GLOBAL(movmemSI28)) - HIDDEN_ALIAS(movstrSI28,movmemSI28) -GLOBAL(movmemSI28): - mov.l @(24,r5),r0 - mov.l r0,@(24,r4) - .global GLOBAL(movmemSI24) - HIDDEN_FUNC(GLOBAL(movmemSI24)) - HIDDEN_ALIAS(movstrSI24,movmemSI24) -GLOBAL(movmemSI24): - mov.l @(20,r5),r0 - mov.l r0,@(20,r4) - .global GLOBAL(movmemSI20) - HIDDEN_FUNC(GLOBAL(movmemSI20)) - HIDDEN_ALIAS(movstrSI20,movmemSI20) -GLOBAL(movmemSI20): - mov.l @(16,r5),r0 - mov.l r0,@(16,r4) - .global GLOBAL(movmemSI16) - HIDDEN_FUNC(GLOBAL(movmemSI16)) - HIDDEN_ALIAS(movstrSI16,movmemSI16) -GLOBAL(movmemSI16): - mov.l @(12,r5),r0 - mov.l r0,@(12,r4) - .global GLOBAL(movmemSI12) - HIDDEN_FUNC(GLOBAL(movmemSI12)) - HIDDEN_ALIAS(movstrSI12,movmemSI12) -GLOBAL(movmemSI12): - mov.l @(8,r5),r0 - mov.l r0,@(8,r4) - .global GLOBAL(movmemSI8) - HIDDEN_FUNC(GLOBAL(movmemSI8)) - HIDDEN_ALIAS(movstrSI8,movmemSI8) -GLOBAL(movmemSI8): - mov.l @(4,r5),r0 - mov.l r0,@(4,r4) - .global GLOBAL(movmemSI4) - HIDDEN_FUNC(GLOBAL(movmemSI4)) - HIDDEN_ALIAS(movstrSI4,movmemSI4) -GLOBAL(movmemSI4): - mov.l @(0,r5),r0 - rts - mov.l r0,@(0,r4) - - ENDFUNC(GLOBAL(movmemSI64)) - ENDFUNC(GLOBAL(movmemSI60)) - ENDFUNC(GLOBAL(movmemSI56)) - ENDFUNC(GLOBAL(movmemSI52)) - ENDFUNC(GLOBAL(movmemSI48)) - ENDFUNC(GLOBAL(movmemSI44)) - ENDFUNC(GLOBAL(movmemSI40)) - ENDFUNC(GLOBAL(movmemSI36)) - ENDFUNC(GLOBAL(movmemSI32)) - ENDFUNC(GLOBAL(movmemSI28)) - ENDFUNC(GLOBAL(movmemSI24)) - ENDFUNC(GLOBAL(movmemSI20)) - ENDFUNC(GLOBAL(movmemSI16)) - ENDFUNC(GLOBAL(movmemSI12)) - ENDFUNC(GLOBAL(movmemSI8)) - ENDFUNC(GLOBAL(movmemSI4)) - ENDFUNC(GLOBAL(movmem)) -#endif - -#ifdef L_movmem_i4 - .text - .global GLOBAL(movmem_i4_even) - .global GLOBAL(movmem_i4_odd) - .global GLOBAL(movmemSI12_i4) - - HIDDEN_FUNC(GLOBAL(movmem_i4_even)) - HIDDEN_FUNC(GLOBAL(movmem_i4_odd)) - HIDDEN_FUNC(GLOBAL(movmemSI12_i4)) - - HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even) - HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd) - HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4) - - .p2align 5 -L_movmem_2mod4_end: - mov.l r0,@(16,r4) - rts - mov.l r1,@(20,r4) - - .p2align 2 - -GLOBAL(movmem_i4_even): - mov.l @r5+,r0 - bra L_movmem_start_even - mov.l @r5+,r1 - -GLOBAL(movmem_i4_odd): - mov.l @r5+,r1 - add #-4,r4 - mov.l @r5+,r2 - mov.l @r5+,r3 - mov.l r1,@(4,r4) - mov.l r2,@(8,r4) - -L_movmem_loop: - mov.l r3,@(12,r4) - dt r6 - mov.l @r5+,r0 - bt/s L_movmem_2mod4_end - mov.l @r5+,r1 - add #16,r4 -L_movmem_start_even: - mov.l @r5+,r2 - mov.l @r5+,r3 - mov.l r0,@r4 - dt r6 - mov.l r1,@(4,r4) - bf/s L_movmem_loop - mov.l r2,@(8,r4) - rts - mov.l r3,@(12,r4) - - ENDFUNC(GLOBAL(movmem_i4_even)) - ENDFUNC(GLOBAL(movmem_i4_odd)) - - .p2align 4 -GLOBAL(movmemSI12_i4): - mov.l @r5,r0 - mov.l @(4,r5),r1 - mov.l @(8,r5),r2 - mov.l r0,@r4 - mov.l r1,@(4,r4) - rts - mov.l r2,@(8,r4) - - ENDFUNC(GLOBAL(movmemSI12_i4)) -#endif - -#ifdef L_mulsi3 - - - .global GLOBAL(mulsi3) - HIDDEN_FUNC(GLOBAL(mulsi3)) - -! r4 = aabb -! r5 = ccdd -! r0 = aabb*ccdd via partial products -! -! if aa == 0 and cc = 0 -! r0 = bb*dd -! -! else -! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536) -! - -GLOBAL(mulsi3): - mulu.w r4,r5 ! multiply the lsws macl=bb*dd - mov r5,r3 ! r3 = ccdd - swap.w r4,r2 ! r2 = bbaa - xtrct r2,r3 ! r3 = aacc - tst r3,r3 ! msws zero ? - bf hiset - rts ! yes - then we have the answer - sts macl,r0 - -hiset: sts macl,r0 ! r0 = bb*dd - mulu.w r2,r5 ! brewing macl = aa*dd - sts macl,r1 - mulu.w r3,r4 ! brewing macl = cc*bb - sts macl,r2 - add r1,r2 - shll16 r2 - rts - add r2,r0 - - ENDFUNC(GLOBAL(mulsi3)) -#endif -#endif /* ! __SH5__ */ -#ifdef L_sdivsi3_i4 - .title "SH DIVIDE" -!! 4 byte integer Divide code for the Renesas SH -#ifdef __SH4__ -!! args in r4 and r5, result in fpul, clobber dr0, dr2 - - .global GLOBAL(sdivsi3_i4) - HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) -GLOBAL(sdivsi3_i4): - lds r4,fpul - float fpul,dr0 - lds r5,fpul - float fpul,dr2 - fdiv dr2,dr0 - rts - ftrc dr0,fpul - - ENDFUNC(GLOBAL(sdivsi3_i4)) -#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__) -!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2 - -#if ! __SH5__ || __SH5__ == 32 -#if __SH5__ - .mode SHcompact -#endif - .global GLOBAL(sdivsi3_i4) - HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) -GLOBAL(sdivsi3_i4): - sts.l fpscr,@-r15 - mov #8,r2 - swap.w r2,r2 - lds r2,fpscr - lds r4,fpul - float fpul,dr0 - lds r5,fpul - float fpul,dr2 - fdiv dr2,dr0 - ftrc dr0,fpul - rts - lds.l @r15+,fpscr - - ENDFUNC(GLOBAL(sdivsi3_i4)) -#endif /* ! __SH5__ || __SH5__ == 32 */ -#endif /* ! __SH4__ */ -#endif - -#ifdef L_sdivsi3 -/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with - sh2e/sh3e code. */ -#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) -!! -!! Steve Chamberlain -!! sac@cygnus.com -!! -!! - -!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit - - .global GLOBAL(sdivsi3) -#if __SHMEDIA__ -#if __SH5__ == 32 - .section .text..SHmedia32,"ax" -#else - .text -#endif - .align 2 -#if 0 -/* The assembly code that follows is a hand-optimized version of the C - code that follows. Note that the registers that are modified are - exactly those listed as clobbered in the patterns divsi3_i1 and - divsi3_i1_media. - -int __sdivsi3 (i, j) - int i, j; -{ - register unsigned long long r18 asm ("r18"); - register unsigned long long r19 asm ("r19"); - register unsigned long long r0 asm ("r0") = 0; - register unsigned long long r1 asm ("r1") = 1; - register int r2 asm ("r2") = i >> 31; - register int r3 asm ("r3") = j >> 31; - - r2 = r2 ? r2 : r1; - r3 = r3 ? r3 : r1; - r18 = i * r2; - r19 = j * r3; - r2 *= r3; - - r19 <<= 31; - r1 <<= 31; - do - if (r18 >= r19) - r0 |= r1, r18 -= r19; - while (r19 >>= 1, r1 >>= 1); - - return r2 * (int)r0; -} -*/ -GLOBAL(sdivsi3): - pt/l LOCAL(sdivsi3_dontadd), tr2 - pt/l LOCAL(sdivsi3_loop), tr1 - ptabs/l r18, tr0 - movi 0, r0 - movi 1, r1 - shari.l r4, 31, r2 - shari.l r5, 31, r3 - cmveq r2, r1, r2 - cmveq r3, r1, r3 - muls.l r4, r2, r18 - muls.l r5, r3, r19 - muls.l r2, r3, r2 - shlli r19, 31, r19 - shlli r1, 31, r1 -LOCAL(sdivsi3_loop): - bgtu r19, r18, tr2 - or r0, r1, r0 - sub r18, r19, r18 -LOCAL(sdivsi3_dontadd): - shlri r1, 1, r1 - shlri r19, 1, r19 - bnei r1, 0, tr1 - muls.l r0, r2, r0 - add.l r0, r63, r0 - blink tr0, r63 -#elif 0 /* ! 0 */ - // inputs: r4,r5 - // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0 - // result in r0 -GLOBAL(sdivsi3): - // can create absolute value without extra latency, - // but dependent on proper sign extension of inputs: - // shari.l r5,31,r2 - // xor r5,r2,r20 - // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended. - shari.l r5,31,r2 - ori r2,1,r2 - muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended. - movi 0xffffffffffffbb0c,r19 // shift count eqiv 76 - shari.l r4,31,r3 - nsb r20,r0 - shlld r20,r0,r25 - shlri r25,48,r25 - sub r19,r25,r1 - mmulfx.w r1,r1,r2 - mshflo.w r1,r63,r1 - // If r4 was to be used in-place instead of r21, could use this sequence - // to compute absolute: - // sub r63,r4,r19 // compute absolute value of r4 - // shlri r4,32,r3 // into lower 32 bit of r4, keeping - // mcmv r19,r3,r4 // the sign in the upper 32 bits intact. - ori r3,1,r3 - mmulfx.w r25,r2,r2 - sub r19,r0,r0 - muls.l r4,r3,r21 - msub.w r1,r2,r2 - addi r2,-2,r1 - mulu.l r21,r1,r19 - mmulfx.w r2,r2,r2 - shlli r1,15,r1 - shlrd r19,r0,r19 - mulu.l r19,r20,r3 - mmacnfx.wl r25,r2,r1 - ptabs r18,tr0 - sub r21,r3,r25 - - mulu.l r25,r1,r2 - addi r0,14,r0 - xor r4,r5,r18 - shlrd r2,r0,r2 - mulu.l r2,r20,r3 - add r19,r2,r19 - shari.l r18,31,r18 - sub r25,r3,r25 - - mulu.l r25,r1,r2 - sub r25,r20,r25 - add r19,r18,r19 - shlrd r2,r0,r2 - mulu.l r2,r20,r3 - addi r25,1,r25 - add r19,r2,r19 - - cmpgt r25,r3,r25 - add.l r19,r25,r0 - xor r0,r18,r0 - blink tr0,r63 -#else /* ! 0 && ! 0 */ - - // inputs: r4,r5 - // clobbered: r1,r18,r19,r20,r21,r25,tr0 - // result in r0 - HIDDEN_FUNC(GLOBAL(sdivsi3_2)) -#ifndef __pic__ - FUNC(GLOBAL(sdivsi3)) -GLOBAL(sdivsi3): /* this is the shcompact entry point */ - // The special SHmedia entry point sdivsi3_1 prevents accidental linking - // with the SHcompact implementation, which clobbers tr1 / tr2. - .global GLOBAL(sdivsi3_1) -GLOBAL(sdivsi3_1): - .global GLOBAL(div_table_internal) - movi (GLOBAL(div_table_internal) >> 16) & 65535, r20 - shori GLOBAL(div_table_internal) & 65535, r20 -#endif - .global GLOBAL(sdivsi3_2) - // div_table in r20 - // clobbered: r1,r18,r19,r21,r25,tr0 -GLOBAL(sdivsi3_2): - nsb r5, r1 - shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 - shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) - ldx.ub r20, r21, r19 // u0.8 - shari r25, 32, r25 // normalize to s2.30 - shlli r21, 1, r21 - muls.l r25, r19, r19 // s2.38 - ldx.w r20, r21, r21 // s2.14 - ptabs r18, tr0 - shari r19, 24, r19 // truncate to s2.14 - sub r21, r19, r19 // some 11 bit inverse in s1.14 - muls.l r19, r19, r21 // u0.28 - sub r63, r1, r1 - addi r1, 92, r1 - muls.l r25, r21, r18 // s2.58 - shlli r19, 45, r19 // multiply by two and convert to s2.58 - /* bubble */ - sub r19, r18, r18 - shari r18, 28, r18 // some 22 bit inverse in s1.30 - muls.l r18, r25, r0 // s2.60 - muls.l r18, r4, r25 // s32.30 - /* bubble */ - shari r0, 16, r19 // s-16.44 - muls.l r19, r18, r19 // s-16.74 - shari r25, 63, r0 - shari r4, 14, r18 // s19.-14 - shari r19, 30, r19 // s-16.44 - muls.l r19, r18, r19 // s15.30 - xor r21, r0, r21 // You could also use the constant 1 << 27. - add r21, r25, r21 - sub r21, r19, r21 - shard r21, r1, r21 - sub r21, r0, r0 - blink tr0, r63 -#ifndef __pic__ - ENDFUNC(GLOBAL(sdivsi3)) -#endif - ENDFUNC(GLOBAL(sdivsi3_2)) -#endif -#elif defined __SHMEDIA__ -/* m5compact-nofpu */ - // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2 - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 - FUNC(GLOBAL(sdivsi3)) -GLOBAL(sdivsi3): - pt/l LOCAL(sdivsi3_dontsub), tr0 - pt/l LOCAL(sdivsi3_loop), tr1 - ptabs/l r18,tr2 - shari.l r4,31,r18 - shari.l r5,31,r19 - xor r4,r18,r20 - xor r5,r19,r21 - sub.l r20,r18,r20 - sub.l r21,r19,r21 - xor r18,r19,r19 - shlli r21,32,r25 - addi r25,-1,r21 - addz.l r20,r63,r20 -LOCAL(sdivsi3_loop): - shlli r20,1,r20 - bgeu/u r21,r20,tr0 - sub r20,r21,r20 -LOCAL(sdivsi3_dontsub): - addi.l r25,-1,r25 - bnei r25,-32,tr1 - xor r20,r19,r20 - sub.l r20,r19,r0 - blink tr2,r63 - ENDFUNC(GLOBAL(sdivsi3)) -#else /* ! __SHMEDIA__ */ - FUNC(GLOBAL(sdivsi3)) -GLOBAL(sdivsi3): - mov r4,r1 - mov r5,r0 - - tst r0,r0 - bt div0 - mov #0,r2 - div0s r2,r1 - subc r3,r3 - subc r2,r1 - div0s r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - addc r2,r1 - rts - mov r1,r0 - - -div0: rts - mov #0,r0 - - ENDFUNC(GLOBAL(sdivsi3)) -#endif /* ! __SHMEDIA__ */ -#endif /* ! __SH4__ */ -#endif -#ifdef L_udivsi3_i4 - - .title "SH DIVIDE" -!! 4 byte integer Divide code for the Renesas SH -#ifdef __SH4__ -!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4, -!! and t bit - - .global GLOBAL(udivsi3_i4) - HIDDEN_FUNC(GLOBAL(udivsi3_i4)) -GLOBAL(udivsi3_i4): - mov #1,r1 - cmp/hi r1,r5 - bf trivial - rotr r1 - xor r1,r4 - lds r4,fpul - mova L1,r0 -#ifdef FMOVD_WORKS - fmov.d @r0+,dr4 -#else - fmov.s @r0+,DR40 - fmov.s @r0,DR41 -#endif - float fpul,dr0 - xor r1,r5 - lds r5,fpul - float fpul,dr2 - fadd dr4,dr0 - fadd dr4,dr2 - fdiv dr2,dr0 - rts - ftrc dr0,fpul - -trivial: - rts - lds r4,fpul - - .align 2 -#ifdef FMOVD_WORKS - .align 3 ! make double below 8 byte aligned. -#endif -L1: - .double 2147483648 - - ENDFUNC(GLOBAL(udivsi3_i4)) -#elif defined (__SH5__) && ! defined (__SH4_NOFPU__) -#if ! __SH5__ || __SH5__ == 32 -!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33 - .mode SHmedia - .global GLOBAL(udivsi3_i4) - HIDDEN_FUNC(GLOBAL(udivsi3_i4)) -GLOBAL(udivsi3_i4): - addz.l r4,r63,r20 - addz.l r5,r63,r21 - fmov.qd r20,dr0 - fmov.qd r21,dr32 - ptabs r18,tr0 - float.qd dr0,dr0 - float.qd dr32,dr32 - fdiv.d dr0,dr32,dr0 - ftrc.dq dr0,dr32 - fmov.s fr33,fr32 - blink tr0,r63 - - ENDFUNC(GLOBAL(udivsi3_i4)) -#endif /* ! __SH5__ || __SH5__ == 32 */ -#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) -!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4 - - .global GLOBAL(udivsi3_i4) - HIDDEN_FUNC(GLOBAL(udivsi3_i4)) -GLOBAL(udivsi3_i4): - mov #1,r1 - cmp/hi r1,r5 - bf trivial - sts.l fpscr,@-r15 - mova L1,r0 - lds.l @r0+,fpscr - rotr r1 - xor r1,r4 - lds r4,fpul -#ifdef FMOVD_WORKS - fmov.d @r0+,dr4 -#else - fmov.s @r0+,DR40 - fmov.s @r0,DR41 -#endif - float fpul,dr0 - xor r1,r5 - lds r5,fpul - float fpul,dr2 - fadd dr4,dr0 - fadd dr4,dr2 - fdiv dr2,dr0 - ftrc dr0,fpul - rts - lds.l @r15+,fpscr - -#ifdef FMOVD_WORKS - .align 3 ! make double below 8 byte aligned. -#endif -trivial: - rts - lds r4,fpul - - .align 2 -L1: -#ifndef FMOVD_WORKS - .long 0x80000 -#else - .long 0x180000 -#endif - .double 2147483648 - - ENDFUNC(GLOBAL(udivsi3_i4)) -#endif /* ! __SH4__ */ -#endif - -#ifdef L_udivsi3 -/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with - sh2e/sh3e code. */ -#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) - -!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit - .global GLOBAL(udivsi3) - HIDDEN_FUNC(GLOBAL(udivsi3)) - -#if __SHMEDIA__ -#if __SH5__ == 32 - .section .text..SHmedia32,"ax" -#else - .text -#endif - .align 2 -#if 0 -/* The assembly code that follows is a hand-optimized version of the C - code that follows. Note that the registers that are modified are - exactly those listed as clobbered in the patterns udivsi3_i1 and - udivsi3_i1_media. - -unsigned -__udivsi3 (i, j) - unsigned i, j; -{ - register unsigned long long r0 asm ("r0") = 0; - register unsigned long long r18 asm ("r18") = 1; - register unsigned long long r4 asm ("r4") = i; - register unsigned long long r19 asm ("r19") = j; - - r19 <<= 31; - r18 <<= 31; - do - if (r4 >= r19) - r0 |= r18, r4 -= r19; - while (r19 >>= 1, r18 >>= 1); - - return r0; -} -*/ -GLOBAL(udivsi3): - pt/l LOCAL(udivsi3_dontadd), tr2 - pt/l LOCAL(udivsi3_loop), tr1 - ptabs/l r18, tr0 - movi 0, r0 - movi 1, r18 - addz.l r5, r63, r19 - addz.l r4, r63, r4 - shlli r19, 31, r19 - shlli r18, 31, r18 -LOCAL(udivsi3_loop): - bgtu r19, r4, tr2 - or r0, r18, r0 - sub r4, r19, r4 -LOCAL(udivsi3_dontadd): - shlri r18, 1, r18 - shlri r19, 1, r19 - bnei r18, 0, tr1 - blink tr0, r63 -#else -GLOBAL(udivsi3): - // inputs: r4,r5 - // clobbered: r18,r19,r20,r21,r22,r25,tr0 - // result in r0. - addz.l r5,r63,r22 - nsb r22,r0 - shlld r22,r0,r25 - shlri r25,48,r25 - movi 0xffffffffffffbb0c,r20 // shift count eqiv 76 - sub r20,r25,r21 - mmulfx.w r21,r21,r19 - mshflo.w r21,r63,r21 - ptabs r18,tr0 - mmulfx.w r25,r19,r19 - sub r20,r0,r0 - /* bubble */ - msub.w r21,r19,r19 - addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21 - before the msub.w, but we need a different value for - r19 to keep errors under control. */ - mulu.l r4,r21,r18 - mmulfx.w r19,r19,r19 - shlli r21,15,r21 - shlrd r18,r0,r18 - mulu.l r18,r22,r20 - mmacnfx.wl r25,r19,r21 - /* bubble */ - sub r4,r20,r25 - - mulu.l r25,r21,r19 - addi r0,14,r0 - /* bubble */ - shlrd r19,r0,r19 - mulu.l r19,r22,r20 - add r18,r19,r18 - /* bubble */ - sub.l r25,r20,r25 - - mulu.l r25,r21,r19 - addz.l r25,r63,r25 - sub r25,r22,r25 - shlrd r19,r0,r19 - mulu.l r19,r22,r20 - addi r25,1,r25 - add r18,r19,r18 - - cmpgt r25,r20,r25 - add.l r18,r25,r0 - blink tr0,r63 -#endif -#elif defined (__SHMEDIA__) -/* m5compact-nofpu - more emphasis on code size than on speed, but don't - ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4. - So use a short shmedia loop. */ - // clobbered: r20,r21,r25,tr0,tr1,tr2 - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 -GLOBAL(udivsi3): - pt/l LOCAL(udivsi3_dontsub), tr0 - pt/l LOCAL(udivsi3_loop), tr1 - ptabs/l r18,tr2 - shlli r5,32,r25 - addi r25,-1,r21 - addz.l r4,r63,r20 -LOCAL(udivsi3_loop): - shlli r20,1,r20 - bgeu/u r21,r20,tr0 - sub r20,r21,r20 -LOCAL(udivsi3_dontsub): - addi.l r25,-1,r25 - bnei r25,-32,tr1 - add.l r20,r63,r0 - blink tr2,r63 -#else /* ! defined (__SHMEDIA__) */ -LOCAL(div8): - div1 r5,r4 -LOCAL(div7): - div1 r5,r4; div1 r5,r4; div1 r5,r4 - div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 - -LOCAL(divx4): - div1 r5,r4; rotcl r0 - div1 r5,r4; rotcl r0 - div1 r5,r4; rotcl r0 - rts; div1 r5,r4 - -GLOBAL(udivsi3): - sts.l pr,@-r15 - extu.w r5,r0 - cmp/eq r5,r0 -#ifdef __sh1__ - bf LOCAL(large_divisor) -#else - bf/s LOCAL(large_divisor) -#endif - div0u - swap.w r4,r0 - shlr16 r4 - bsr LOCAL(div8) - shll16 r5 - bsr LOCAL(div7) - div1 r5,r4 - xtrct r4,r0 - xtrct r0,r4 - bsr LOCAL(div8) - swap.w r4,r4 - bsr LOCAL(div7) - div1 r5,r4 - lds.l @r15+,pr - xtrct r4,r0 - swap.w r0,r0 - rotcl r0 - rts - shlr16 r5 - -LOCAL(large_divisor): -#ifdef __sh1__ - div0u -#endif - mov #0,r0 - xtrct r4,r0 - xtrct r0,r4 - bsr LOCAL(divx4) - rotcl r0 - bsr LOCAL(divx4) - rotcl r0 - bsr LOCAL(divx4) - rotcl r0 - bsr LOCAL(divx4) - rotcl r0 - lds.l @r15+,pr - rts - rotcl r0 - - ENDFUNC(GLOBAL(udivsi3)) -#endif /* ! __SHMEDIA__ */ -#endif /* __SH4__ */ -#endif /* L_udivsi3 */ - -#ifdef L_udivdi3 -#ifdef __SHMEDIA__ - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 - .global GLOBAL(udivdi3) - FUNC(GLOBAL(udivdi3)) -GLOBAL(udivdi3): - HIDDEN_ALIAS(udivdi3_internal,udivdi3) - shlri r3,1,r4 - nsb r4,r22 - shlld r3,r22,r6 - shlri r6,49,r5 - movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ - sub r21,r5,r1 - mmulfx.w r1,r1,r4 - mshflo.w r1,r63,r1 - sub r63,r22,r20 // r63 == 64 % 64 - mmulfx.w r5,r4,r4 - pta LOCAL(large_divisor),tr0 - addi r20,32,r9 - msub.w r1,r4,r1 - madd.w r1,r1,r1 - mmulfx.w r1,r1,r4 - shlri r6,32,r7 - bgt/u r9,r63,tr0 // large_divisor - mmulfx.w r5,r4,r4 - shlri r2,32+14,r19 - addi r22,-31,r0 - msub.w r1,r4,r1 - - mulu.l r1,r7,r4 - addi r1,-3,r5 - mulu.l r5,r19,r5 - sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 - shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as - the case may be, %0000000000000000 000.11111111111, still */ - muls.l r1,r4,r4 /* leaving at least one sign bit. */ - mulu.l r5,r3,r8 - mshalds.l r1,r21,r1 - shari r4,26,r4 - shlld r8,r0,r8 - add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) - sub r2,r8,r2 - /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ - - shlri r2,22,r21 - mulu.l r21,r1,r21 - shlld r5,r0,r8 - addi r20,30-22,r0 - shlrd r21,r0,r21 - mulu.l r21,r3,r5 - add r8,r21,r8 - mcmpgt.l r21,r63,r21 // See Note 1 - addi r20,30,r0 - mshfhi.l r63,r21,r21 - sub r2,r5,r2 - andc r2,r21,r2 - - /* small divisor: need a third divide step */ - mulu.l r2,r1,r7 - ptabs r18,tr0 - addi r2,1,r2 - shlrd r7,r0,r7 - mulu.l r7,r3,r5 - add r8,r7,r8 - sub r2,r3,r2 - cmpgt r2,r5,r5 - add r8,r5,r2 - /* could test r3 here to check for divide by zero. */ - blink tr0,r63 - -LOCAL(large_divisor): - mmulfx.w r5,r4,r4 - shlrd r2,r9,r25 - shlri r25,32,r8 - msub.w r1,r4,r1 - - mulu.l r1,r7,r4 - addi r1,-3,r5 - mulu.l r5,r8,r5 - sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 - shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as - the case may be, %0000000000000000 000.11111111111, still */ - muls.l r1,r4,r4 /* leaving at least one sign bit. */ - shlri r5,14-1,r8 - mulu.l r8,r7,r5 - mshalds.l r1,r21,r1 - shari r4,26,r4 - add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) - sub r25,r5,r25 - /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ - - shlri r25,22,r21 - mulu.l r21,r1,r21 - pta LOCAL(no_lo_adj),tr0 - addi r22,32,r0 - shlri r21,40,r21 - mulu.l r21,r7,r5 - add r8,r21,r8 - shlld r2,r0,r2 - sub r25,r5,r25 - bgtu/u r7,r25,tr0 // no_lo_adj - addi r8,1,r8 - sub r25,r7,r25 -LOCAL(no_lo_adj): - mextr4 r2,r25,r2 - - /* large_divisor: only needs a few adjustments. */ - mulu.l r8,r6,r5 - ptabs r18,tr0 - /* bubble */ - cmpgtu r5,r2,r5 - sub r8,r5,r2 - blink tr0,r63 - ENDFUNC(GLOBAL(udivdi3)) -/* Note 1: To shift the result of the second divide stage so that the result - always fits into 32 bits, yet we still reduce the rest sufficiently - would require a lot of instructions to do the shifts just right. Using - the full 64 bit shift result to multiply with the divisor would require - four extra instructions for the upper 32 bits (shift / mulu / shift / sub). - Fortunately, if the upper 32 bits of the shift result are nonzero, we - know that the rest after taking this partial result into account will - fit into 32 bits. So we just clear the upper 32 bits of the rest if the - upper 32 bits of the partial result are nonzero. */ -#endif /* __SHMEDIA__ */ -#endif /* L_udivdi3 */ - -#ifdef L_divdi3 -#ifdef __SHMEDIA__ - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 - .global GLOBAL(divdi3) - FUNC(GLOBAL(divdi3)) -GLOBAL(divdi3): - pta GLOBAL(udivdi3_internal),tr0 - shari r2,63,r22 - shari r3,63,r23 - xor r2,r22,r2 - xor r3,r23,r3 - sub r2,r22,r2 - sub r3,r23,r3 - beq/u r22,r23,tr0 - ptabs r18,tr1 - blink tr0,r18 - sub r63,r2,r2 - blink tr1,r63 - ENDFUNC(GLOBAL(divdi3)) -#endif /* __SHMEDIA__ */ -#endif /* L_divdi3 */ - -#ifdef L_umoddi3 -#ifdef __SHMEDIA__ - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 - .global GLOBAL(umoddi3) - FUNC(GLOBAL(umoddi3)) -GLOBAL(umoddi3): - HIDDEN_ALIAS(umoddi3_internal,umoddi3) - shlri r3,1,r4 - nsb r4,r22 - shlld r3,r22,r6 - shlri r6,49,r5 - movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ - sub r21,r5,r1 - mmulfx.w r1,r1,r4 - mshflo.w r1,r63,r1 - sub r63,r22,r20 // r63 == 64 % 64 - mmulfx.w r5,r4,r4 - pta LOCAL(large_divisor),tr0 - addi r20,32,r9 - msub.w r1,r4,r1 - madd.w r1,r1,r1 - mmulfx.w r1,r1,r4 - shlri r6,32,r7 - bgt/u r9,r63,tr0 // large_divisor - mmulfx.w r5,r4,r4 - shlri r2,32+14,r19 - addi r22,-31,r0 - msub.w r1,r4,r1 - - mulu.l r1,r7,r4 - addi r1,-3,r5 - mulu.l r5,r19,r5 - sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 - shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as - the case may be, %0000000000000000 000.11111111111, still */ - muls.l r1,r4,r4 /* leaving at least one sign bit. */ - mulu.l r5,r3,r5 - mshalds.l r1,r21,r1 - shari r4,26,r4 - shlld r5,r0,r5 - add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) - sub r2,r5,r2 - /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ - - shlri r2,22,r21 - mulu.l r21,r1,r21 - addi r20,30-22,r0 - /* bubble */ /* could test r3 here to check for divide by zero. */ - shlrd r21,r0,r21 - mulu.l r21,r3,r5 - mcmpgt.l r21,r63,r21 // See Note 1 - addi r20,30,r0 - mshfhi.l r63,r21,r21 - sub r2,r5,r2 - andc r2,r21,r2 - - /* small divisor: need a third divide step */ - mulu.l r2,r1,r7 - ptabs r18,tr0 - sub r2,r3,r8 /* re-use r8 here for rest - r3 */ - shlrd r7,r0,r7 - mulu.l r7,r3,r5 - /* bubble */ - addi r8,1,r7 - cmpgt r7,r5,r7 - cmvne r7,r8,r2 - sub r2,r5,r2 - blink tr0,r63 - -LOCAL(large_divisor): - mmulfx.w r5,r4,r4 - shlrd r2,r9,r25 - shlri r25,32,r8 - msub.w r1,r4,r1 - - mulu.l r1,r7,r4 - addi r1,-3,r5 - mulu.l r5,r8,r5 - sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 - shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as - the case may be, %0000000000000000 000.11111111111, still */ - muls.l r1,r4,r4 /* leaving at least one sign bit. */ - shlri r5,14-1,r8 - mulu.l r8,r7,r5 - mshalds.l r1,r21,r1 - shari r4,26,r4 - add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) - sub r25,r5,r25 - /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ - - shlri r25,22,r21 - mulu.l r21,r1,r21 - pta LOCAL(no_lo_adj),tr0 - addi r22,32,r0 - shlri r21,40,r21 - mulu.l r21,r7,r5 - add r8,r21,r8 - shlld r2,r0,r2 - sub r25,r5,r25 - bgtu/u r7,r25,tr0 // no_lo_adj - addi r8,1,r8 - sub r25,r7,r25 -LOCAL(no_lo_adj): - mextr4 r2,r25,r2 - - /* large_divisor: only needs a few adjustments. */ - mulu.l r8,r6,r5 - ptabs r18,tr0 - add r2,r6,r7 - cmpgtu r5,r2,r8 - cmvne r8,r7,r2 - sub r2,r5,r2 - shlrd r2,r22,r2 - blink tr0,r63 - ENDFUNC(GLOBAL(umoddi3)) -/* Note 1: To shift the result of the second divide stage so that the result - always fits into 32 bits, yet we still reduce the rest sufficiently - would require a lot of instructions to do the shifts just right. Using - the full 64 bit shift result to multiply with the divisor would require - four extra instructions for the upper 32 bits (shift / mulu / shift / sub). - Fortunately, if the upper 32 bits of the shift result are nonzero, we - know that the rest after taking this partial result into account will - fit into 32 bits. So we just clear the upper 32 bits of the rest if the - upper 32 bits of the partial result are nonzero. */ -#endif /* __SHMEDIA__ */ -#endif /* L_umoddi3 */ - -#ifdef L_moddi3 -#ifdef __SHMEDIA__ - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 - .global GLOBAL(moddi3) - FUNC(GLOBAL(moddi3)) -GLOBAL(moddi3): - pta GLOBAL(umoddi3_internal),tr0 - shari r2,63,r22 - shari r3,63,r23 - xor r2,r22,r2 - xor r3,r23,r3 - sub r2,r22,r2 - sub r3,r23,r3 - beq/u r22,r63,tr0 - ptabs r18,tr1 - blink tr0,r18 - sub r63,r2,r2 - blink tr1,r63 - ENDFUNC(GLOBAL(moddi3)) -#endif /* __SHMEDIA__ */ -#endif /* L_moddi3 */ - -#ifdef L_set_fpscr -#if !defined (__SH2A_NOFPU__) -#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32 -#ifdef __SH5__ - .mode SHcompact -#endif - .global GLOBAL(set_fpscr) - HIDDEN_FUNC(GLOBAL(set_fpscr)) -GLOBAL(set_fpscr): - lds r4,fpscr -#ifdef __PIC__ - mov.l r12,@-r15 -#ifdef __vxworks - mov.l LOCAL(set_fpscr_L0_base),r12 - mov.l LOCAL(set_fpscr_L0_index),r0 - mov.l @r12,r12 - mov.l @(r0,r12),r12 -#else - mova LOCAL(set_fpscr_L0),r0 - mov.l LOCAL(set_fpscr_L0),r12 - add r0,r12 -#endif - mov.l LOCAL(set_fpscr_L1),r0 - mov.l @(r0,r12),r1 - mov.l @r15+,r12 -#else - mov.l LOCAL(set_fpscr_L1),r1 -#endif - swap.w r4,r0 - or #24,r0 -#ifndef FMOVD_WORKS - xor #16,r0 -#endif -#if defined(__SH4__) || defined (__SH2A_DOUBLE__) - swap.w r0,r3 - mov.l r3,@(4,r1) -#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ - swap.w r0,r2 - mov.l r2,@r1 -#endif -#ifndef FMOVD_WORKS - xor #8,r0 -#else - xor #24,r0 -#endif -#if defined(__SH4__) || defined (__SH2A_DOUBLE__) - swap.w r0,r2 - rts - mov.l r2,@r1 -#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ - swap.w r0,r3 - rts - mov.l r3,@(4,r1) -#endif - .align 2 -#ifdef __PIC__ -#ifdef __vxworks -LOCAL(set_fpscr_L0_base): - .long ___GOTT_BASE__ -LOCAL(set_fpscr_L0_index): - .long ___GOTT_INDEX__ -#else -LOCAL(set_fpscr_L0): - .long _GLOBAL_OFFSET_TABLE_ -#endif -LOCAL(set_fpscr_L1): - .long GLOBAL(fpscr_values@GOT) -#else -LOCAL(set_fpscr_L1): - .long GLOBAL(fpscr_values) -#endif - - ENDFUNC(GLOBAL(set_fpscr)) -#ifndef NO_FPSCR_VALUES -#ifdef __ELF__ - .comm GLOBAL(fpscr_values),8,4 -#else - .comm GLOBAL(fpscr_values),8 -#endif /* ELF */ -#endif /* NO_FPSCR_VALUES */ -#endif /* SH2E / SH3E / SH4 */ -#endif /* __SH2A_NOFPU__ */ -#endif /* L_set_fpscr */ -#ifdef L_ic_invalidate -#if __SH5__ == 32 - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 - .global GLOBAL(init_trampoline) - HIDDEN_FUNC(GLOBAL(init_trampoline)) -GLOBAL(init_trampoline): - st.l r0,8,r2 -#ifdef __LITTLE_ENDIAN__ - movi 9,r20 - shori 0x402b,r20 - shori 0xd101,r20 - shori 0xd002,r20 -#else - movi 0xffffffffffffd002,r20 - shori 0xd101,r20 - shori 0x402b,r20 - shori 9,r20 -#endif - st.q r0,0,r20 - st.l r0,12,r3 - ENDFUNC(GLOBAL(init_trampoline)) - .global GLOBAL(ic_invalidate) - HIDDEN_FUNC(GLOBAL(ic_invalidate)) -GLOBAL(ic_invalidate): - ocbwb r0,0 - synco - icbi r0, 0 - ptabs r18, tr0 - synci - blink tr0, r63 - ENDFUNC(GLOBAL(ic_invalidate)) -#elif defined(__SH4A__) - .global GLOBAL(ic_invalidate) - HIDDEN_FUNC(GLOBAL(ic_invalidate)) -GLOBAL(ic_invalidate): - ocbwb @r4 - synco - icbi @r4 - rts - nop - ENDFUNC(GLOBAL(ic_invalidate)) -#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) - /* For system code, we use ic_invalidate_line_i, but user code - needs a different mechanism. A kernel call is generally not - available, and it would also be slow. Different SH4 variants use - different sizes and associativities of the Icache. We use a small - bit of dispatch code that can be put hidden in every shared object, - which calls the actual processor-specific invalidation code in a - separate module. - Or if you have operating system support, the OS could mmap the - procesor-specific code from a single page, since it is highly - repetitive. */ - .global GLOBAL(ic_invalidate) - HIDDEN_FUNC(GLOBAL(ic_invalidate)) -GLOBAL(ic_invalidate): -#ifdef __pic__ -#ifdef __vxworks - mov.l 1f,r1 - mov.l 2f,r0 - mov.l @r1,r1 - mov.l 0f,r2 - mov.l @(r0,r1),r0 -#else - mov.l 1f,r1 - mova 1f,r0 - mov.l 0f,r2 - add r1,r0 -#endif - mov.l @(r0,r2),r1 -#else - mov.l 0f,r1 -#endif - ocbwb @r4 - mov.l @(8,r1),r0 - sub r1,r4 - and r4,r0 - add r1,r0 - jmp @r0 - mov.l @(4,r1),r0 - .align 2 -#ifndef __pic__ -0: .long GLOBAL(ic_invalidate_array) -#else /* __pic__ */ - .global GLOBAL(ic_invalidate_array) -0: .long GLOBAL(ic_invalidate_array)@GOT -#ifdef __vxworks -1: .long ___GOTT_BASE__ -2: .long ___GOTT_INDEX__ -#else -1: .long _GLOBAL_OFFSET_TABLE_ -#endif - ENDFUNC(GLOBAL(ic_invalidate)) -#endif /* __pic__ */ -#endif /* SH4 */ -#endif /* L_ic_invalidate */ - -#ifdef L_ic_invalidate_array -#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)))) - .global GLOBAL(ic_invalidate_array) - /* This is needed when an SH4 dso with trampolines is used on SH4A. */ - .global GLOBAL(ic_invalidate_array) - FUNC(GLOBAL(ic_invalidate_array)) -GLOBAL(ic_invalidate_array): - add r1,r4 - synco - icbi @r4 - rts - nop - .align 2 - .long 0 - ENDFUNC(GLOBAL(ic_invalidate_array)) -#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) - .global GLOBAL(ic_invalidate_array) - .p2align 5 - FUNC(GLOBAL(ic_invalidate_array)) -/* This must be aligned to the beginning of a cache line. */ -GLOBAL(ic_invalidate_array): -#ifndef WAYS -#define WAYS 4 -#define WAY_SIZE 0x4000 -#endif -#if WAYS == 1 - .rept WAY_SIZE * WAYS / 32 - rts - nop - .rept 7 - .long WAY_SIZE - 32 - .endr - .endr -#elif WAYS <= 6 - .rept WAY_SIZE * WAYS / 32 - braf r0 - add #-8,r0 - .long WAY_SIZE + 8 - .long WAY_SIZE - 32 - .rept WAYS-2 - braf r0 - nop - .endr - .rept 7 - WAYS - rts - nop - .endr - .endr -#else /* WAYS > 6 */ - /* This variant needs two different pages for mmap-ing. */ - .rept WAYS-1 - .rept WAY_SIZE / 32 - braf r0 - nop - .long WAY_SIZE - .rept 6 - .long WAY_SIZE - 32 - .endr - .endr - .endr - .rept WAY_SIZE / 32 - rts - .rept 15 - nop - .endr - .endr -#endif /* WAYS */ - ENDFUNC(GLOBAL(ic_invalidate_array)) -#endif /* SH4 */ -#endif /* L_ic_invalidate_array */ - -#if defined (__SH5__) && __SH5__ == 32 -#ifdef L_shcompact_call_trampoline - .section .rodata - .align 1 -LOCAL(ct_main_table): -.word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label) - .mode SHmedia - .section .text..SHmedia32, "ax" - .align 2 - - /* This function loads 64-bit general-purpose registers from the - stack, from a memory address contained in them or from an FP - register, according to a cookie passed in r1. Its execution - time is linear on the number of registers that actually have - to be copied. See sh.h for details on the actual bit pattern. - - The function to be called is passed in r0. If a 32-bit return - value is expected, the actual function will be tail-called, - otherwise the return address will be stored in r10 (that the - caller should expect to be clobbered) and the return value - will be expanded into r2/r3 upon return. */ - - .global GLOBAL(GCC_shcompact_call_trampoline) - FUNC(GLOBAL(GCC_shcompact_call_trampoline)) -GLOBAL(GCC_shcompact_call_trampoline): - ptabs/l r0, tr0 /* Prepare to call the actual function. */ - movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0 - pt/l LOCAL(ct_loop), tr1 - addz.l r1, r63, r1 - shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0 -LOCAL(ct_loop): - nsb r1, r28 - shlli r28, 1, r29 - ldx.w r0, r29, r30 -LOCAL(ct_main_label): - ptrel/l r30, tr2 - blink tr2, r63 -LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */ - /* It must be dr0, so just do it. */ - fmov.dq dr0, r2 - movi 7, r30 - shlli r30, 29, r31 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */ - /* It is either dr0 or dr2. */ - movi 7, r30 - shlri r1, 26, r32 - shlli r30, 26, r31 - andc r1, r31, r1 - fmov.dq dr0, r3 - beqi/l r32, 4, tr1 - fmov.dq dr2, r3 - blink tr1, r63 -LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */ - shlri r1, 23 - 3, r34 - andi r34, 3 << 3, r33 - addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32 -LOCAL(ct_r4_fp_base): - ptrel/l r32, tr2 - movi 7, r30 - shlli r30, 23, r31 - andc r1, r31, r1 - blink tr2, r63 -LOCAL(ct_r4_fp_copy): - fmov.dq dr0, r4 - blink tr1, r63 - fmov.dq dr2, r4 - blink tr1, r63 - fmov.dq dr4, r4 - blink tr1, r63 -LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */ - shlri r1, 20 - 3, r34 - andi r34, 3 << 3, r33 - addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32 -LOCAL(ct_r5_fp_base): - ptrel/l r32, tr2 - movi 7, r30 - shlli r30, 20, r31 - andc r1, r31, r1 - blink tr2, r63 -LOCAL(ct_r5_fp_copy): - fmov.dq dr0, r5 - blink tr1, r63 - fmov.dq dr2, r5 - blink tr1, r63 - fmov.dq dr4, r5 - blink tr1, r63 - fmov.dq dr6, r5 - blink tr1, r63 -LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */ - /* It must be dr8. */ - fmov.dq dr8, r6 - movi 15, r30 - shlli r30, 16, r31 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */ - shlri r1, 16 - 3, r34 - andi r34, 3 << 3, r33 - addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32 -LOCAL(ct_r6_fp_base): - ptrel/l r32, tr2 - movi 7, r30 - shlli r30, 16, r31 - andc r1, r31, r1 - blink tr2, r63 -LOCAL(ct_r6_fp_copy): - fmov.dq dr0, r6 - blink tr1, r63 - fmov.dq dr2, r6 - blink tr1, r63 - fmov.dq dr4, r6 - blink tr1, r63 - fmov.dq dr6, r6 - blink tr1, r63 -LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */ - /* It is either dr8 or dr10. */ - movi 15 << 12, r31 - shlri r1, 12, r32 - andc r1, r31, r1 - fmov.dq dr8, r7 - beqi/l r32, 8, tr1 - fmov.dq dr10, r7 - blink tr1, r63 -LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */ - shlri r1, 12 - 3, r34 - andi r34, 3 << 3, r33 - addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32 -LOCAL(ct_r7_fp_base): - ptrel/l r32, tr2 - movi 7 << 12, r31 - andc r1, r31, r1 - blink tr2, r63 -LOCAL(ct_r7_fp_copy): - fmov.dq dr0, r7 - blink tr1, r63 - fmov.dq dr2, r7 - blink tr1, r63 - fmov.dq dr4, r7 - blink tr1, r63 - fmov.dq dr6, r7 - blink tr1, r63 -LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */ - /* It is either dr8 or dr10. */ - movi 15 << 8, r31 - andi r1, 1 << 8, r32 - andc r1, r31, r1 - fmov.dq dr8, r8 - beq/l r32, r63, tr1 - fmov.dq dr10, r8 - blink tr1, r63 -LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */ - shlri r1, 8 - 3, r34 - andi r34, 3 << 3, r33 - addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32 -LOCAL(ct_r8_fp_base): - ptrel/l r32, tr2 - movi 7 << 8, r31 - andc r1, r31, r1 - blink tr2, r63 -LOCAL(ct_r8_fp_copy): - fmov.dq dr0, r8 - blink tr1, r63 - fmov.dq dr2, r8 - blink tr1, r63 - fmov.dq dr4, r8 - blink tr1, r63 - fmov.dq dr6, r8 - blink tr1, r63 -LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */ - /* It is either dr8 or dr10. */ - movi 15 << 4, r31 - andi r1, 1 << 4, r32 - andc r1, r31, r1 - fmov.dq dr8, r9 - beq/l r32, r63, tr1 - fmov.dq dr10, r9 - blink tr1, r63 -LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */ - shlri r1, 4 - 3, r34 - andi r34, 3 << 3, r33 - addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32 -LOCAL(ct_r9_fp_base): - ptrel/l r32, tr2 - movi 7 << 4, r31 - andc r1, r31, r1 - blink tr2, r63 -LOCAL(ct_r9_fp_copy): - fmov.dq dr0, r9 - blink tr1, r63 - fmov.dq dr2, r9 - blink tr1, r63 - fmov.dq dr4, r9 - blink tr1, r63 - fmov.dq dr6, r9 - blink tr1, r63 -LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */ - pt/l LOCAL(ct_r2_load), tr2 - movi 3, r30 - shlli r30, 29, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r2, 8, r3 - ldx.q r2, r63, r2 - /* Fall through. */ -LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */ - pt/l LOCAL(ct_r3_load), tr2 - movi 3, r30 - shlli r30, 26, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r3, 8, r4 - ldx.q r3, r63, r3 -LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */ - pt/l LOCAL(ct_r4_load), tr2 - movi 3, r30 - shlli r30, 23, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r4, 8, r5 - ldx.q r4, r63, r4 -LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */ - pt/l LOCAL(ct_r5_load), tr2 - movi 3, r30 - shlli r30, 20, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r5, 8, r6 - ldx.q r5, r63, r5 -LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */ - pt/l LOCAL(ct_r6_load), tr2 - movi 3 << 16, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r6, 8, r7 - ldx.q r6, r63, r6 -LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */ - pt/l LOCAL(ct_r7_load), tr2 - movi 3 << 12, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r7, 8, r8 - ldx.q r7, r63, r7 -LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */ - pt/l LOCAL(ct_r8_load), tr2 - movi 3 << 8, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r8, 8, r9 - ldx.q r8, r63, r8 -LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */ - pt/l LOCAL(ct_check_tramp), tr2 - ldx.q r9, r63, r9 - blink tr2, r63 -LOCAL(ct_r2_load): - ldx.q r2, r63, r2 - blink tr1, r63 -LOCAL(ct_r3_load): - ldx.q r3, r63, r3 - blink tr1, r63 -LOCAL(ct_r4_load): - ldx.q r4, r63, r4 - blink tr1, r63 -LOCAL(ct_r5_load): - ldx.q r5, r63, r5 - blink tr1, r63 -LOCAL(ct_r6_load): - ldx.q r6, r63, r6 - blink tr1, r63 -LOCAL(ct_r7_load): - ldx.q r7, r63, r7 - blink tr1, r63 -LOCAL(ct_r8_load): - ldx.q r8, r63, r8 - blink tr1, r63 -LOCAL(ct_r2_pop): /* Pop r2 from the stack. */ - movi 1, r30 - ldx.q r15, r63, r2 - shlli r30, 29, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r3_pop): /* Pop r3 from the stack. */ - movi 1, r30 - ldx.q r15, r63, r3 - shlli r30, 26, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r4_pop): /* Pop r4 from the stack. */ - movi 1, r30 - ldx.q r15, r63, r4 - shlli r30, 23, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r5_pop): /* Pop r5 from the stack. */ - movi 1, r30 - ldx.q r15, r63, r5 - shlli r30, 20, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r6_pop): /* Pop r6 from the stack. */ - movi 1, r30 - ldx.q r15, r63, r6 - shlli r30, 16, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r7_pop): /* Pop r7 from the stack. */ - ldx.q r15, r63, r7 - movi 1 << 12, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r8_pop): /* Pop r8 from the stack. */ - ldx.q r15, r63, r8 - movi 1 << 8, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */ - andi r1, 7 << 1, r30 - movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32 - shlli r30, 2, r31 - shori LOCAL(ct_end_of_pop_seq) & 65535, r32 - sub.l r32, r31, r33 - ptabs/l r33, tr2 - blink tr2, r63 -LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */ - ldx.q r15, r63, r3 - addi.l r15, 8, r15 - ldx.q r15, r63, r4 - addi.l r15, 8, r15 - ldx.q r15, r63, r5 - addi.l r15, 8, r15 - ldx.q r15, r63, r6 - addi.l r15, 8, r15 - ldx.q r15, r63, r7 - addi.l r15, 8, r15 - ldx.q r15, r63, r8 - addi.l r15, 8, r15 -LOCAL(ct_r9_pop): /* Pop r9 from the stack. */ - ldx.q r15, r63, r9 - addi.l r15, 8, r15 -LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */ -LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */ - pt/u LOCAL(ct_ret_wide), tr2 - andi r1, 1, r1 - bne/u r1, r63, tr2 -LOCAL(ct_call_func): /* Just branch to the function. */ - blink tr0, r63 -LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its - 64-bit return value. */ - add.l r18, r63, r10 - blink tr0, r18 - ptabs r10, tr0 -#if __LITTLE_ENDIAN__ - shari r2, 32, r3 - add.l r2, r63, r2 -#else - add.l r2, r63, r3 - shari r2, 32, r2 -#endif - blink tr0, r63 - - ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline)) -#endif /* L_shcompact_call_trampoline */ - -#ifdef L_shcompact_return_trampoline - /* This function does the converse of the code in `ret_wide' - above. It is tail-called by SHcompact functions returning - 64-bit non-floating-point values, to pack the 32-bit values in - r2 and r3 into r2. */ - - .mode SHmedia - .section .text..SHmedia32, "ax" - .align 2 - .global GLOBAL(GCC_shcompact_return_trampoline) - HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline)) -GLOBAL(GCC_shcompact_return_trampoline): - ptabs/l r18, tr0 -#if __LITTLE_ENDIAN__ - addz.l r2, r63, r2 - shlli r3, 32, r3 -#else - addz.l r3, r63, r3 - shlli r2, 32, r2 -#endif - or r3, r2, r2 - blink tr0, r63 - - ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline)) -#endif /* L_shcompact_return_trampoline */ - -#ifdef L_shcompact_incoming_args - .section .rodata - .align 1 -LOCAL(ia_main_table): -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) - .mode SHmedia - .section .text..SHmedia32, "ax" - .align 2 - - /* This function stores 64-bit general-purpose registers back in - the stack, and loads the address in which each register - was stored into itself. The lower 32 bits of r17 hold the address - to begin storing, and the upper 32 bits of r17 hold the cookie. - Its execution time is linear on the - number of registers that actually have to be copied, and it is - optimized for structures larger than 64 bits, as opposed to - individual `long long' arguments. See sh.h for details on the - actual bit pattern. */ - - .global GLOBAL(GCC_shcompact_incoming_args) - FUNC(GLOBAL(GCC_shcompact_incoming_args)) -GLOBAL(GCC_shcompact_incoming_args): - ptabs/l r18, tr0 /* Prepare to return. */ - shlri r17, 32, r0 /* Load the cookie. */ - movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43 - pt/l LOCAL(ia_loop), tr1 - add.l r17, r63, r17 - shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43 -LOCAL(ia_loop): - nsb r0, r36 - shlli r36, 1, r37 - ldx.w r43, r37, r38 -LOCAL(ia_main_label): - ptrel/l r38, tr2 - blink tr2, r63 -LOCAL(ia_r2_ld): /* Store r2 and load its address. */ - movi 3, r38 - shlli r38, 29, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r2 - add.l r17, r63, r2 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r3_ld): /* Store r3 and load its address. */ - movi 3, r38 - shlli r38, 26, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r3 - add.l r17, r63, r3 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r4_ld): /* Store r4 and load its address. */ - movi 3, r38 - shlli r38, 23, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r4 - add.l r17, r63, r4 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r5_ld): /* Store r5 and load its address. */ - movi 3, r38 - shlli r38, 20, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r5 - add.l r17, r63, r5 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r6_ld): /* Store r6 and load its address. */ - movi 3, r38 - shlli r38, 16, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r6 - add.l r17, r63, r6 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r7_ld): /* Store r7 and load its address. */ - movi 3 << 12, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r7 - add.l r17, r63, r7 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r8_ld): /* Store r8 and load its address. */ - movi 3 << 8, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r8 - add.l r17, r63, r8 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r9_ld): /* Store r9 and load its address. */ - stx.q r17, r63, r9 - add.l r17, r63, r9 - blink tr0, r63 -LOCAL(ia_r2_push): /* Push r2 onto the stack. */ - movi 1, r38 - shlli r38, 29, r39 - andc r0, r39, r0 - stx.q r17, r63, r2 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_r3_push): /* Push r3 onto the stack. */ - movi 1, r38 - shlli r38, 26, r39 - andc r0, r39, r0 - stx.q r17, r63, r3 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_r4_push): /* Push r4 onto the stack. */ - movi 1, r38 - shlli r38, 23, r39 - andc r0, r39, r0 - stx.q r17, r63, r4 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_r5_push): /* Push r5 onto the stack. */ - movi 1, r38 - shlli r38, 20, r39 - andc r0, r39, r0 - stx.q r17, r63, r5 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_r6_push): /* Push r6 onto the stack. */ - movi 1, r38 - shlli r38, 16, r39 - andc r0, r39, r0 - stx.q r17, r63, r6 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_r7_push): /* Push r7 onto the stack. */ - movi 1 << 12, r39 - andc r0, r39, r0 - stx.q r17, r63, r7 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_r8_push): /* Push r8 onto the stack. */ - movi 1 << 8, r39 - andc r0, r39, r0 - stx.q r17, r63, r8 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */ - andi r0, 7 << 1, r38 - movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40 - shlli r38, 2, r39 - shori LOCAL(ia_end_of_push_seq) & 65535, r40 - sub.l r40, r39, r41 - ptabs/l r41, tr2 - blink tr2, r63 -LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */ - stx.q r17, r63, r3 - addi.l r17, 8, r17 - stx.q r17, r63, r4 - addi.l r17, 8, r17 - stx.q r17, r63, r5 - addi.l r17, 8, r17 - stx.q r17, r63, r6 - addi.l r17, 8, r17 - stx.q r17, r63, r7 - addi.l r17, 8, r17 - stx.q r17, r63, r8 - addi.l r17, 8, r17 -LOCAL(ia_r9_push): /* Push r9 onto the stack. */ - stx.q r17, r63, r9 -LOCAL(ia_return): /* Return. */ - blink tr0, r63 -LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */ - ENDFUNC(GLOBAL(GCC_shcompact_incoming_args)) -#endif /* L_shcompact_incoming_args */ -#endif -#if __SH5__ -#ifdef L_nested_trampoline -#if __SH5__ == 32 - .section .text..SHmedia32,"ax" -#else - .text -#endif - .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */ - .global GLOBAL(GCC_nested_trampoline) - HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline)) -GLOBAL(GCC_nested_trampoline): - .mode SHmedia - ptrel/u r63, tr0 - gettr tr0, r0 -#if __SH5__ == 64 - ld.q r0, 24, r1 -#else - ld.l r0, 24, r1 -#endif - ptabs/l r1, tr1 -#if __SH5__ == 64 - ld.q r0, 32, r1 -#else - ld.l r0, 28, r1 -#endif - blink tr1, r63 - - ENDFUNC(GLOBAL(GCC_nested_trampoline)) -#endif /* L_nested_trampoline */ -#endif /* __SH5__ */ -#if __SH5__ == 32 -#ifdef L_push_pop_shmedia_regs - .section .text..SHmedia32,"ax" - .mode SHmedia - .align 2 -#ifndef __SH4_NOFPU__ - .global GLOBAL(GCC_push_shmedia_regs) - FUNC(GLOBAL(GCC_push_shmedia_regs)) -GLOBAL(GCC_push_shmedia_regs): - addi.l r15, -14*8, r15 - fst.d r15, 13*8, dr62 - fst.d r15, 12*8, dr60 - fst.d r15, 11*8, dr58 - fst.d r15, 10*8, dr56 - fst.d r15, 9*8, dr54 - fst.d r15, 8*8, dr52 - fst.d r15, 7*8, dr50 - fst.d r15, 6*8, dr48 - fst.d r15, 5*8, dr46 - fst.d r15, 4*8, dr44 - fst.d r15, 3*8, dr42 - fst.d r15, 2*8, dr40 - fst.d r15, 1*8, dr38 - fst.d r15, 0*8, dr36 -#else /* ! __SH4_NOFPU__ */ - .global GLOBAL(GCC_push_shmedia_regs_nofpu) - FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) -GLOBAL(GCC_push_shmedia_regs_nofpu): -#endif /* ! __SH4_NOFPU__ */ - ptabs/l r18, tr0 - addi.l r15, -27*8, r15 - gettr tr7, r62 - gettr tr6, r61 - gettr tr5, r60 - st.q r15, 26*8, r62 - st.q r15, 25*8, r61 - st.q r15, 24*8, r60 - st.q r15, 23*8, r59 - st.q r15, 22*8, r58 - st.q r15, 21*8, r57 - st.q r15, 20*8, r56 - st.q r15, 19*8, r55 - st.q r15, 18*8, r54 - st.q r15, 17*8, r53 - st.q r15, 16*8, r52 - st.q r15, 15*8, r51 - st.q r15, 14*8, r50 - st.q r15, 13*8, r49 - st.q r15, 12*8, r48 - st.q r15, 11*8, r47 - st.q r15, 10*8, r46 - st.q r15, 9*8, r45 - st.q r15, 8*8, r44 - st.q r15, 7*8, r35 - st.q r15, 6*8, r34 - st.q r15, 5*8, r33 - st.q r15, 4*8, r32 - st.q r15, 3*8, r31 - st.q r15, 2*8, r30 - st.q r15, 1*8, r29 - st.q r15, 0*8, r28 - blink tr0, r63 -#ifndef __SH4_NOFPU__ - ENDFUNC(GLOBAL(GCC_push_shmedia_regs)) -#else - ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) -#endif -#ifndef __SH4_NOFPU__ - .global GLOBAL(GCC_pop_shmedia_regs) - FUNC(GLOBAL(GCC_pop_shmedia_regs)) -GLOBAL(GCC_pop_shmedia_regs): - pt .L0, tr1 - movi 41*8, r0 - fld.d r15, 40*8, dr62 - fld.d r15, 39*8, dr60 - fld.d r15, 38*8, dr58 - fld.d r15, 37*8, dr56 - fld.d r15, 36*8, dr54 - fld.d r15, 35*8, dr52 - fld.d r15, 34*8, dr50 - fld.d r15, 33*8, dr48 - fld.d r15, 32*8, dr46 - fld.d r15, 31*8, dr44 - fld.d r15, 30*8, dr42 - fld.d r15, 29*8, dr40 - fld.d r15, 28*8, dr38 - fld.d r15, 27*8, dr36 - blink tr1, r63 -#else /* ! __SH4_NOFPU__ */ - .global GLOBAL(GCC_pop_shmedia_regs_nofpu) - FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) -GLOBAL(GCC_pop_shmedia_regs_nofpu): -#endif /* ! __SH4_NOFPU__ */ - movi 27*8, r0 -.L0: - ptabs r18, tr0 - ld.q r15, 26*8, r62 - ld.q r15, 25*8, r61 - ld.q r15, 24*8, r60 - ptabs r62, tr7 - ptabs r61, tr6 - ptabs r60, tr5 - ld.q r15, 23*8, r59 - ld.q r15, 22*8, r58 - ld.q r15, 21*8, r57 - ld.q r15, 20*8, r56 - ld.q r15, 19*8, r55 - ld.q r15, 18*8, r54 - ld.q r15, 17*8, r53 - ld.q r15, 16*8, r52 - ld.q r15, 15*8, r51 - ld.q r15, 14*8, r50 - ld.q r15, 13*8, r49 - ld.q r15, 12*8, r48 - ld.q r15, 11*8, r47 - ld.q r15, 10*8, r46 - ld.q r15, 9*8, r45 - ld.q r15, 8*8, r44 - ld.q r15, 7*8, r35 - ld.q r15, 6*8, r34 - ld.q r15, 5*8, r33 - ld.q r15, 4*8, r32 - ld.q r15, 3*8, r31 - ld.q r15, 2*8, r30 - ld.q r15, 1*8, r29 - ld.q r15, 0*8, r28 - add.l r15, r0, r15 - blink tr0, r63 - -#ifndef __SH4_NOFPU__ - ENDFUNC(GLOBAL(GCC_pop_shmedia_regs)) -#else - ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) -#endif -#endif /* __SH5__ == 32 */ -#endif /* L_push_pop_shmedia_regs */ - -#ifdef L_div_table -#if __SH5__ -#if defined(__pic__) && defined(__SHMEDIA__) - .global GLOBAL(sdivsi3) - FUNC(GLOBAL(sdivsi3)) -#if __SH5__ == 32 - .section .text..SHmedia32,"ax" -#else - .text -#endif -#if 0 -/* ??? FIXME: Presumably due to a linker bug, exporting data symbols - in a text section does not work (at least for shared libraries): - the linker sets the LSB of the address as if this was SHmedia code. */ -#define TEXT_DATA_BUG -#endif - .align 2 - // inputs: r4,r5 - // clobbered: r1,r18,r19,r20,r21,r25,tr0 - // result in r0 - .global GLOBAL(sdivsi3) -GLOBAL(sdivsi3): -#ifdef TEXT_DATA_BUG - ptb datalabel Local_div_table,tr0 -#else - ptb GLOBAL(div_table_internal),tr0 -#endif - nsb r5, r1 - shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 - shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) - /* bubble */ - gettr tr0,r20 - ldx.ub r20, r21, r19 // u0.8 - shari r25, 32, r25 // normalize to s2.30 - shlli r21, 1, r21 - muls.l r25, r19, r19 // s2.38 - ldx.w r20, r21, r21 // s2.14 - ptabs r18, tr0 - shari r19, 24, r19 // truncate to s2.14 - sub r21, r19, r19 // some 11 bit inverse in s1.14 - muls.l r19, r19, r21 // u0.28 - sub r63, r1, r1 - addi r1, 92, r1 - muls.l r25, r21, r18 // s2.58 - shlli r19, 45, r19 // multiply by two and convert to s2.58 - /* bubble */ - sub r19, r18, r18 - shari r18, 28, r18 // some 22 bit inverse in s1.30 - muls.l r18, r25, r0 // s2.60 - muls.l r18, r4, r25 // s32.30 - /* bubble */ - shari r0, 16, r19 // s-16.44 - muls.l r19, r18, r19 // s-16.74 - shari r25, 63, r0 - shari r4, 14, r18 // s19.-14 - shari r19, 30, r19 // s-16.44 - muls.l r19, r18, r19 // s15.30 - xor r21, r0, r21 // You could also use the constant 1 << 27. - add r21, r25, r21 - sub r21, r19, r21 - shard r21, r1, r21 - sub r21, r0, r0 - blink tr0, r63 - ENDFUNC(GLOBAL(sdivsi3)) -/* This table has been generated by divtab.c . -Defects for bias -330: - Max defect: 6.081536e-07 at -1.000000e+00 - Min defect: 2.849516e-08 at 1.030651e+00 - Max 2nd step defect: 9.606539e-12 at -1.000000e+00 - Min 2nd step defect: 0.000000e+00 at 0.000000e+00 - Defect at 1: 1.238659e-07 - Defect at -2: 1.061708e-07 */ -#else /* ! __pic__ || ! __SHMEDIA__ */ - .section .rodata -#endif /* __pic__ */ -#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__) - .balign 2 - .type Local_div_table,@object - .size Local_div_table,128 -/* negative division constants */ - .word -16638 - .word -17135 - .word -17737 - .word -18433 - .word -19103 - .word -19751 - .word -20583 - .word -21383 - .word -22343 - .word -23353 - .word -24407 - .word -25582 - .word -26863 - .word -28382 - .word -29965 - .word -31800 -/* negative division factors */ - .byte 66 - .byte 70 - .byte 75 - .byte 81 - .byte 87 - .byte 93 - .byte 101 - .byte 109 - .byte 119 - .byte 130 - .byte 142 - .byte 156 - .byte 172 - .byte 192 - .byte 214 - .byte 241 - .skip 16 -Local_div_table: - .skip 16 -/* positive division factors */ - .byte 241 - .byte 214 - .byte 192 - .byte 172 - .byte 156 - .byte 142 - .byte 130 - .byte 119 - .byte 109 - .byte 101 - .byte 93 - .byte 87 - .byte 81 - .byte 75 - .byte 70 - .byte 66 -/* positive division constants */ - .word 31801 - .word 29966 - .word 28383 - .word 26864 - .word 25583 - .word 24408 - .word 23354 - .word 22344 - .word 21384 - .word 20584 - .word 19752 - .word 19104 - .word 18434 - .word 17738 - .word 17136 - .word 16639 - .section .rodata -#endif /* TEXT_DATA_BUG */ - .balign 2 - .type GLOBAL(div_table),@object - .size GLOBAL(div_table),128 -/* negative division constants */ - .word -16638 - .word -17135 - .word -17737 - .word -18433 - .word -19103 - .word -19751 - .word -20583 - .word -21383 - .word -22343 - .word -23353 - .word -24407 - .word -25582 - .word -26863 - .word -28382 - .word -29965 - .word -31800 -/* negative division factors */ - .byte 66 - .byte 70 - .byte 75 - .byte 81 - .byte 87 - .byte 93 - .byte 101 - .byte 109 - .byte 119 - .byte 130 - .byte 142 - .byte 156 - .byte 172 - .byte 192 - .byte 214 - .byte 241 - .skip 16 - .global GLOBAL(div_table) -GLOBAL(div_table): - HIDDEN_ALIAS(div_table_internal,div_table) - .skip 16 -/* positive division factors */ - .byte 241 - .byte 214 - .byte 192 - .byte 172 - .byte 156 - .byte 142 - .byte 130 - .byte 119 - .byte 109 - .byte 101 - .byte 93 - .byte 87 - .byte 81 - .byte 75 - .byte 70 - .byte 66 -/* positive division constants */ - .word 31801 - .word 29966 - .word 28383 - .word 26864 - .word 25583 - .word 24408 - .word 23354 - .word 22344 - .word 21384 - .word 20584 - .word 19752 - .word 19104 - .word 18434 - .word 17738 - .word 17136 - .word 16639 - -#elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__) -/* This code used shld, thus is not suitable for SH1 / SH2. */ - -/* Signed / unsigned division without use of FPU, optimized for SH4. - Uses a lookup table for divisors in the range -128 .. +128, and - div1 with case distinction for larger divisors in three more ranges. - The code is lumped together with the table to allow the use of mova. */ -#ifdef __LITTLE_ENDIAN__ -#define L_LSB 0 -#define L_LSWMSB 1 -#define L_MSWLSB 2 -#else -#define L_LSB 3 -#define L_LSWMSB 2 -#define L_MSWLSB 1 -#endif - - .balign 4 - .global GLOBAL(udivsi3_i4i) - FUNC(GLOBAL(udivsi3_i4i)) -GLOBAL(udivsi3_i4i): - mov.w LOCAL(c128_w), r1 - div0u - mov r4,r0 - shlr8 r0 - cmp/hi r1,r5 - extu.w r5,r1 - bf LOCAL(udiv_le128) - cmp/eq r5,r1 - bf LOCAL(udiv_ge64k) - shlr r0 - mov r5,r1 - shll16 r5 - mov.l r4,@-r15 - div1 r5,r0 - mov.l r1,@-r15 - div1 r5,r0 - div1 r5,r0 - bra LOCAL(udiv_25) - div1 r5,r0 - -LOCAL(div_le128): - mova LOCAL(div_table_ix),r0 - bra LOCAL(div_le128_2) - mov.b @(r0,r5),r1 -LOCAL(udiv_le128): - mov.l r4,@-r15 - mova LOCAL(div_table_ix),r0 - mov.b @(r0,r5),r1 - mov.l r5,@-r15 -LOCAL(div_le128_2): - mova LOCAL(div_table_inv),r0 - mov.l @(r0,r1),r1 - mov r5,r0 - tst #0xfe,r0 - mova LOCAL(div_table_clz),r0 - dmulu.l r1,r4 - mov.b @(r0,r5),r1 - bt/s LOCAL(div_by_1) - mov r4,r0 - mov.l @r15+,r5 - sts mach,r0 - /* clrt */ - addc r4,r0 - mov.l @r15+,r4 - rotcr r0 - rts - shld r1,r0 - -LOCAL(div_by_1_neg): - neg r4,r0 -LOCAL(div_by_1): - mov.l @r15+,r5 - rts - mov.l @r15+,r4 - -LOCAL(div_ge64k): - bt/s LOCAL(div_r8) - div0u - shll8 r5 - bra LOCAL(div_ge64k_2) - div1 r5,r0 -LOCAL(udiv_ge64k): - cmp/hi r0,r5 - mov r5,r1 - bt LOCAL(udiv_r8) - shll8 r5 - mov.l r4,@-r15 - div1 r5,r0 - mov.l r1,@-r15 -LOCAL(div_ge64k_2): - div1 r5,r0 - mov.l LOCAL(zero_l),r1 - .rept 4 - div1 r5,r0 - .endr - mov.l r1,@-r15 - div1 r5,r0 - mov.w LOCAL(m256_w),r1 - div1 r5,r0 - mov.b r0,@(L_LSWMSB,r15) - xor r4,r0 - and r1,r0 - bra LOCAL(div_ge64k_end) - xor r4,r0 - -LOCAL(div_r8): - shll16 r4 - bra LOCAL(div_r8_2) - shll8 r4 -LOCAL(udiv_r8): - mov.l r4,@-r15 - shll16 r4 - clrt - shll8 r4 - mov.l r5,@-r15 -LOCAL(div_r8_2): - rotcl r4 - mov r0,r1 - div1 r5,r1 - mov r4,r0 - rotcl r0 - mov r5,r4 - div1 r5,r1 - .rept 5 - rotcl r0; div1 r5,r1 - .endr - rotcl r0 - mov.l @r15+,r5 - div1 r4,r1 - mov.l @r15+,r4 - rts - rotcl r0 - - ENDFUNC(GLOBAL(udivsi3_i4i)) - - .global GLOBAL(sdivsi3_i4i) - FUNC(GLOBAL(sdivsi3_i4i)) - /* This is link-compatible with a GLOBAL(sdivsi3) call, - but we effectively clobber only r1. */ -GLOBAL(sdivsi3_i4i): - mov.l r4,@-r15 - cmp/pz r5 - mov.w LOCAL(c128_w), r1 - bt/s LOCAL(pos_divisor) - cmp/pz r4 - mov.l r5,@-r15 - neg r5,r5 - bt/s LOCAL(neg_result) - cmp/hi r1,r5 - neg r4,r4 -LOCAL(pos_result): - extu.w r5,r0 - bf LOCAL(div_le128) - cmp/eq r5,r0 - mov r4,r0 - shlr8 r0 - bf/s LOCAL(div_ge64k) - cmp/hi r0,r5 - div0u - shll16 r5 - div1 r5,r0 - div1 r5,r0 - div1 r5,r0 -LOCAL(udiv_25): - mov.l LOCAL(zero_l),r1 - div1 r5,r0 - div1 r5,r0 - mov.l r1,@-r15 - .rept 3 - div1 r5,r0 - .endr - mov.b r0,@(L_MSWLSB,r15) - xtrct r4,r0 - swap.w r0,r0 - .rept 8 - div1 r5,r0 - .endr - mov.b r0,@(L_LSWMSB,r15) -LOCAL(div_ge64k_end): - .rept 8 - div1 r5,r0 - .endr - mov.l @r15+,r4 ! zero-extension and swap using LS unit. - extu.b r0,r0 - mov.l @r15+,r5 - or r4,r0 - mov.l @r15+,r4 - rts - rotcl r0 - -LOCAL(div_le128_neg): - tst #0xfe,r0 - mova LOCAL(div_table_ix),r0 - mov.b @(r0,r5),r1 - mova LOCAL(div_table_inv),r0 - bt/s LOCAL(div_by_1_neg) - mov.l @(r0,r1),r1 - mova LOCAL(div_table_clz),r0 - dmulu.l r1,r4 - mov.b @(r0,r5),r1 - mov.l @r15+,r5 - sts mach,r0 - /* clrt */ - addc r4,r0 - mov.l @r15+,r4 - rotcr r0 - shld r1,r0 - rts - neg r0,r0 - -LOCAL(pos_divisor): - mov.l r5,@-r15 - bt/s LOCAL(pos_result) - cmp/hi r1,r5 - neg r4,r4 -LOCAL(neg_result): - extu.w r5,r0 - bf LOCAL(div_le128_neg) - cmp/eq r5,r0 - mov r4,r0 - shlr8 r0 - bf/s LOCAL(div_ge64k_neg) - cmp/hi r0,r5 - div0u - mov.l LOCAL(zero_l),r1 - shll16 r5 - div1 r5,r0 - mov.l r1,@-r15 - .rept 7 - div1 r5,r0 - .endr - mov.b r0,@(L_MSWLSB,r15) - xtrct r4,r0 - swap.w r0,r0 - .rept 8 - div1 r5,r0 - .endr - mov.b r0,@(L_LSWMSB,r15) -LOCAL(div_ge64k_neg_end): - .rept 8 - div1 r5,r0 - .endr - mov.l @r15+,r4 ! zero-extension and swap using LS unit. - extu.b r0,r1 - mov.l @r15+,r5 - or r4,r1 -LOCAL(div_r8_neg_end): - mov.l @r15+,r4 - rotcl r1 - rts - neg r1,r0 - -LOCAL(div_ge64k_neg): - bt/s LOCAL(div_r8_neg) - div0u - shll8 r5 - mov.l LOCAL(zero_l),r1 - .rept 6 - div1 r5,r0 - .endr - mov.l r1,@-r15 - div1 r5,r0 - mov.w LOCAL(m256_w),r1 - div1 r5,r0 - mov.b r0,@(L_LSWMSB,r15) - xor r4,r0 - and r1,r0 - bra LOCAL(div_ge64k_neg_end) - xor r4,r0 - -LOCAL(c128_w): - .word 128 - -LOCAL(div_r8_neg): - clrt - shll16 r4 - mov r4,r1 - shll8 r1 - mov r5,r4 - .rept 7 - rotcl r1; div1 r5,r0 - .endr - mov.l @r15+,r5 - rotcl r1 - bra LOCAL(div_r8_neg_end) - div1 r4,r0 - -LOCAL(m256_w): - .word 0xff00 -/* This table has been generated by divtab-sh4.c. */ - .balign 4 -LOCAL(div_table_clz): - .byte 0 - .byte 1 - .byte 0 - .byte -1 - .byte -1 - .byte -2 - .byte -2 - .byte -2 - .byte -2 - .byte -3 - .byte -3 - .byte -3 - .byte -3 - .byte -3 - .byte -3 - .byte -3 - .byte -3 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 -/* Lookup table translating positive divisor to index into table of - normalized inverse. N.B. the '0' entry is also the last entry of the - previous table, and causes an unaligned access for division by zero. */ -LOCAL(div_table_ix): - .byte -6 - .byte -128 - .byte -128 - .byte 0 - .byte -128 - .byte -64 - .byte 0 - .byte 64 - .byte -128 - .byte -96 - .byte -64 - .byte -32 - .byte 0 - .byte 32 - .byte 64 - .byte 96 - .byte -128 - .byte -112 - .byte -96 - .byte -80 - .byte -64 - .byte -48 - .byte -32 - .byte -16 - .byte 0 - .byte 16 - .byte 32 - .byte 48 - .byte 64 - .byte 80 - .byte 96 - .byte 112 - .byte -128 - .byte -120 - .byte -112 - .byte -104 - .byte -96 - .byte -88 - .byte -80 - .byte -72 - .byte -64 - .byte -56 - .byte -48 - .byte -40 - .byte -32 - .byte -24 - .byte -16 - .byte -8 - .byte 0 - .byte 8 - .byte 16 - .byte 24 - .byte 32 - .byte 40 - .byte 48 - .byte 56 - .byte 64 - .byte 72 - .byte 80 - .byte 88 - .byte 96 - .byte 104 - .byte 112 - .byte 120 - .byte -128 - .byte -124 - .byte -120 - .byte -116 - .byte -112 - .byte -108 - .byte -104 - .byte -100 - .byte -96 - .byte -92 - .byte -88 - .byte -84 - .byte -80 - .byte -76 - .byte -72 - .byte -68 - .byte -64 - .byte -60 - .byte -56 - .byte -52 - .byte -48 - .byte -44 - .byte -40 - .byte -36 - .byte -32 - .byte -28 - .byte -24 - .byte -20 - .byte -16 - .byte -12 - .byte -8 - .byte -4 - .byte 0 - .byte 4 - .byte 8 - .byte 12 - .byte 16 - .byte 20 - .byte 24 - .byte 28 - .byte 32 - .byte 36 - .byte 40 - .byte 44 - .byte 48 - .byte 52 - .byte 56 - .byte 60 - .byte 64 - .byte 68 - .byte 72 - .byte 76 - .byte 80 - .byte 84 - .byte 88 - .byte 92 - .byte 96 - .byte 100 - .byte 104 - .byte 108 - .byte 112 - .byte 116 - .byte 120 - .byte 124 - .byte -128 -/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */ - .balign 4 -LOCAL(zero_l): - .long 0x0 - .long 0xF81F81F9 - .long 0xF07C1F08 - .long 0xE9131AC0 - .long 0xE1E1E1E2 - .long 0xDAE6076C - .long 0xD41D41D5 - .long 0xCD856891 - .long 0xC71C71C8 - .long 0xC0E07039 - .long 0xBACF914D - .long 0xB4E81B4F - .long 0xAF286BCB - .long 0xA98EF607 - .long 0xA41A41A5 - .long 0x9EC8E952 - .long 0x9999999A - .long 0x948B0FCE - .long 0x8F9C18FA - .long 0x8ACB90F7 - .long 0x86186187 - .long 0x81818182 - .long 0x7D05F418 - .long 0x78A4C818 - .long 0x745D1746 - .long 0x702E05C1 - .long 0x6C16C16D - .long 0x68168169 - .long 0x642C8591 - .long 0x60581606 - .long 0x5C9882BA - .long 0x58ED2309 -LOCAL(div_table_inv): - .long 0x55555556 - .long 0x51D07EAF - .long 0x4E5E0A73 - .long 0x4AFD6A06 - .long 0x47AE147B - .long 0x446F8657 - .long 0x41414142 - .long 0x3E22CBCF - .long 0x3B13B13C - .long 0x38138139 - .long 0x3521CFB3 - .long 0x323E34A3 - .long 0x2F684BDB - .long 0x2C9FB4D9 - .long 0x29E4129F - .long 0x27350B89 - .long 0x24924925 - .long 0x21FB7813 - .long 0x1F7047DD - .long 0x1CF06ADB - .long 0x1A7B9612 - .long 0x18118119 - .long 0x15B1E5F8 - .long 0x135C8114 - .long 0x11111112 - .long 0xECF56BF - .long 0xC9714FC - .long 0xA6810A7 - .long 0x8421085 - .long 0x624DD30 - .long 0x4104105 - .long 0x2040811 - /* maximum error: 0.987342 scaled: 0.921875*/ - - ENDFUNC(GLOBAL(sdivsi3_i4i)) -#endif /* SH3 / SH4 */ - -#endif /* L_div_table */ - -#ifdef L_udiv_qrnnd_16 -#if !__SHMEDIA__ - HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16)) - /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ - /* n1 < d, but n1 might be larger than d1. */ - .global GLOBAL(udiv_qrnnd_16) - .balign 8 -GLOBAL(udiv_qrnnd_16): - div0u - cmp/hi r6,r0 - bt .Lots - .rept 16 - div1 r6,r0 - .endr - extu.w r0,r1 - bt 0f - add r6,r0 -0: rotcl r1 - mulu.w r1,r5 - xtrct r4,r0 - swap.w r0,r0 - sts macl,r2 - cmp/hs r2,r0 - sub r2,r0 - bt 0f - addc r5,r0 - add #-1,r1 - bt 0f -1: add #-1,r1 - rts - add r5,r0 - .balign 8 -.Lots: - sub r5,r0 - swap.w r4,r1 - xtrct r0,r1 - clrt - mov r1,r0 - addc r5,r0 - mov #-1,r1 - SL1(bf, 1b, - shlr16 r1) -0: rts - nop - ENDFUNC(GLOBAL(udiv_qrnnd_16)) -#endif /* !__SHMEDIA__ */ -#endif /* L_udiv_qrnnd_16 */ diff --git a/gcc/config/sh/lib1funcs.h b/gcc/config/sh/lib1funcs.h deleted file mode 100644 index af4b41cc314..00000000000 --- a/gcc/config/sh/lib1funcs.h +++ /dev/null @@ -1,76 +0,0 @@ -/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2009 - Free Software Foundation, Inc. - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ - -#ifdef __ELF__ -#define LOCAL(X) .L_##X -#define FUNC(X) .type X,@function -#define HIDDEN_FUNC(X) FUNC(X); .hidden X -#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X) -#define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X -#define ENDFUNC(X) ENDFUNC0(X) -#else -#define LOCAL(X) L_##X -#define FUNC(X) -#define HIDDEN_FUNC(X) -#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y) -#define ENDFUNC(X) -#endif - -#define CONCAT(A,B) A##B -#define GLOBAL0(U,X) CONCAT(U,__##X) -#define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X) - -#define ALIAS(X,Y) .global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y) - -#if defined __SH2A__ && defined __FMOVD_ENABLED__ -#undef FMOVD_WORKS -#define FMOVD_WORKS -#endif - -#ifdef __LITTLE_ENDIAN__ -#define DR00 fr1 -#define DR01 fr0 -#define DR20 fr3 -#define DR21 fr2 -#define DR40 fr5 -#define DR41 fr4 -#else /* !__LITTLE_ENDIAN__ */ -#define DR00 fr0 -#define DR01 fr1 -#define DR20 fr2 -#define DR21 fr3 -#define DR40 fr4 -#define DR41 fr5 -#endif /* !__LITTLE_ENDIAN__ */ - -#ifdef __sh1__ -#define SL(branch, dest, in_slot, in_slot_arg2) \ - in_slot, in_slot_arg2; branch dest -#define SL1(branch, dest, in_slot) \ - in_slot; branch dest -#else /* ! __sh1__ */ -#define SL(branch, dest, in_slot, in_slot_arg2) \ - branch##.s dest; in_slot, in_slot_arg2 -#define SL1(branch, dest, in_slot) \ - branch##/s dest; in_slot -#endif /* !__sh1__ */ diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h index 1e654801334..cc26e05a764 100644 --- a/gcc/config/sh/sh.h +++ b/gcc/config/sh/sh.h @@ -1983,7 +1983,7 @@ struct sh_args { that the native compiler puts too large (> 32) immediate shift counts into a register and shifts by the register, letting the SH decide what to do instead of doing that itself. */ -/* ??? The library routines in lib1funcs.asm truncate the shift count. +/* ??? The library routines in lib1funcs.S truncate the shift count. However, the SH3 has hardware shifts that do not truncate exactly as gcc expects - the sign bit is significant - so it appears that we need to leave this zero for correct SH3 code. */ diff --git a/gcc/config/sh/t-linux b/gcc/config/sh/t-linux index a5c711618c6..2304fb176cb 100644 --- a/gcc/config/sh/t-linux +++ b/gcc/config/sh/t-linux @@ -1,5 +1,3 @@ -LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array - LIB2FUNCS_EXTRA= $(srcdir)/config/sh/linux-atomic.asm MULTILIB_DIRNAMES= diff --git a/gcc/config/sh/t-netbsd b/gcc/config/sh/t-netbsd index de172d3f73f..dea1c478cb5 100644 --- a/gcc/config/sh/t-netbsd +++ b/gcc/config/sh/t-netbsd @@ -17,6 +17,5 @@ # . TARGET_LIBGCC2_CFLAGS = -fpic -mieee -LIB1ASMFUNCS_CACHE = _ic_invalidate LIB2FUNCS_EXTRA= diff --git a/gcc/config/sh/t-sh b/gcc/config/sh/t-sh index 6eaf784e8ae..56ea83e0697 100644 --- a/gcc/config/sh/t-sh +++ b/gcc/config/sh/t-sh @@ -22,13 +22,6 @@ sh-c.o: $(srcdir)/config/sh/sh-c.c \ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/sh/sh-c.c -LIB1ASMSRC = sh/lib1funcs.asm -LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movmem \ - _movmem_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \ - _div_table _udiv_qrnnd_16 \ - $(LIB1ASMFUNCS_CACHE) -LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array - TARGET_LIBGCC2_CFLAGS = -mieee DEFAULT_ENDIAN = $(word 1,$(TM_ENDIAN_CONFIG)) diff --git a/gcc/config/sh/t-sh64 b/gcc/config/sh/t-sh64 index d88f929fd7a..3bd9205079b 100644 --- a/gcc/config/sh/t-sh64 +++ b/gcc/config/sh/t-sh64 @@ -1,4 +1,4 @@ -# Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc. +# Copyright (C) 2002, 2004, 2005, 2011 Free Software Foundation, Inc. # # This file is part of GCC. # @@ -16,13 +16,6 @@ # along with GCC; see the file COPYING3. If not see # . -LIB1ASMFUNCS = \ - _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \ - _shcompact_call_trampoline _shcompact_return_trampoline \ - _shcompact_incoming_args _ic_invalidate _nested_trampoline \ - _push_pop_shmedia_regs \ - _udivdi3 _divdi3 _umoddi3 _moddi3 _div_table - MULTILIB_CPU_DIRS= $(ML_sh1) $(ML_sh2e) $(ML_sh2) $(ML_sh3e) $(ML_sh3) $(ML_sh4_nofpu) $(ML_sh4_single_only) $(ML_sh4_single) $(ML_sh4) $(ML_sh5_32media:m5-32media/=media32) $(ML_sh5_32media_nofpu:m5-32media-nofpu/=nofpu/media32) $(ML_sh5_compact:m5-compact/=compact) $(ML_sh5_compact_nofpu:m5-compact-nofpu/=nofpu/compact) $(ML_sh5_64media:m5-64media/=media64) $(ML_sh5_64media_nofpu:m5-64media-nofpu/=nofpu/media64) MULTILIB_RAW_DIRNAMES= $(MULTILIB_ENDIAN:/mb= mb) $(MULTILIB_CPU_DIRS:/=) diff --git a/gcc/config/sparc/lb1spc.asm b/gcc/config/sparc/lb1spc.asm deleted file mode 100644 index b60bd5740e7..00000000000 --- a/gcc/config/sparc/lb1spc.asm +++ /dev/null @@ -1,784 +0,0 @@ -/* This is an assembly language implementation of mulsi3, divsi3, and modsi3 - for the sparc processor. - - These routines are derived from the SPARC Architecture Manual, version 8, - slightly edited to match the desired calling convention, and also to - optimize them for our purposes. */ - -#ifdef L_mulsi3 -.text - .align 4 - .global .umul - .proc 4 -.umul: - or %o0, %o1, %o4 ! logical or of multiplier and multiplicand - mov %o0, %y ! multiplier to Y register - andncc %o4, 0xfff, %o5 ! mask out lower 12 bits - be mul_shortway ! can do it the short way - andcc %g0, %g0, %o4 ! zero the partial product and clear NV cc - ! - ! long multiply - ! - mulscc %o4, %o1, %o4 ! first iteration of 33 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 ! 32nd iteration - mulscc %o4, %g0, %o4 ! last iteration only shifts - ! the upper 32 bits of product are wrong, but we do not care - retl - rd %y, %o0 - ! - ! short multiply - ! -mul_shortway: - mulscc %o4, %o1, %o4 ! first iteration of 13 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 ! 12th iteration - mulscc %o4, %g0, %o4 ! last iteration only shifts - rd %y, %o5 - sll %o4, 12, %o4 ! left shift partial product by 12 bits - srl %o5, 20, %o5 ! right shift partial product by 20 bits - retl - or %o5, %o4, %o0 ! merge for true product -#endif - -#ifdef L_divsi3 -/* - * Division and remainder, from Appendix E of the SPARC Version 8 - * Architecture Manual, with fixes from Gordon Irlam. - */ - -/* - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * m4 parameters: - * .div name of function to generate - * div div=div => %o0 / %o1; div=rem => %o0 % %o1 - * true true=true => signed; true=false => unsigned - * - * Algorithm parameters: - * N how many bits per iteration we try to get (4) - * WORDSIZE total number of bits (32) - * - * Derived constants: - * TOPBITS number of bits in the top decade of a number - * - * Important variables: - * Q the partial quotient under development (initially 0) - * R the remainder so far, initially the dividend - * ITER number of main division loop iterations required; - * equal to ceil(log2(quotient) / N). Note that this - * is the log base (2^N) of the quotient. - * V the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - * Current estimate for non-large dividend is - * ceil(log2(quotient) / N) * (10 + 7N/2) + C - * A large dividend is one greater than 2^(31-TOPBITS) and takes a - * different path, as the upper bits of the quotient must be developed - * one bit at a time. - */ - .global .udiv - .align 4 - .proc 4 - .text -.udiv: - b ready_to_divide - mov 0, %g3 ! result is always positive - - .global .div - .align 4 - .proc 4 - .text -.div: - ! compute sign of result; if neither is negative, no problem - orcc %o1, %o0, %g0 ! either negative? - bge ready_to_divide ! no, go do the divide - xor %o1, %o0, %g3 ! compute sign in any case - tst %o1 - bge 1f - tst %o0 - ! %o1 is definitely negative; %o0 might also be negative - bge ready_to_divide ! if %o0 not negative... - sub %g0, %o1, %o1 ! in any case, make %o1 nonneg -1: ! %o0 is negative, %o1 is nonnegative - sub %g0, %o0, %o0 ! make %o0 nonnegative - - -ready_to_divide: - - ! Ready to divide. Compute size of quotient; scale comparand. - orcc %o1, %g0, %o5 - bne 1f - mov %o0, %o3 - - ! Divide by zero trap. If it returns, return 0 (about as - ! wrong as possible, but that is what SunOS does...). - ta 0x2 ! ST_DIV0 - retl - clr %o0 - -1: - cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu got_result ! (and algorithm fails otherwise) - clr %o2 - sethi %hi(1 << (32 - 4 - 1)), %g1 - cmp %o3, %g1 - blu not_really_big - clr %o4 - - ! Here the dividend is >= 2**(31-N) or so. We must be careful here, - ! as our usual N-at-a-shot divide step will cause overflow and havoc. - ! The number of bits in the result here is N*ITER+SC, where SC <= N. - ! Compute ITER in an unorthodox manner: know we need to shift V into - ! the top decade: so do not even bother to compare to R. - 1: - cmp %o5, %g1 - bgeu 3f - mov 1, %g2 - sll %o5, 4, %o5 - b 1b - add %o4, 1, %o4 - - ! Now compute %g2. - 2: addcc %o5, %o5, %o5 - bcc not_too_big - add %g2, 1, %g2 - - ! We get here if the %o1 overflowed while shifting. - ! This means that %o3 has the high-order bit set. - ! Restore %o5 and subtract from %o3. - sll %g1, 4, %g1 ! high order bit - srl %o5, 1, %o5 ! rest of %o5 - add %o5, %g1, %o5 - b do_single_div - sub %g2, 1, %g2 - - not_too_big: - 3: cmp %o5, %o3 - blu 2b - nop - be do_single_div - nop - /* NB: these are commented out in the V8-SPARC manual as well */ - /* (I do not understand this) */ - ! %o5 > %o3: went too far: back up 1 step - ! srl %o5, 1, %o5 - ! dec %g2 - ! do single-bit divide steps - ! - ! We have to be careful here. We know that %o3 >= %o5, so we can do the - ! first divide step without thinking. BUT, the others are conditional, - ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- - ! order bit set in the first step, just falling into the regular - ! division loop will mess up the first time around. - ! So we unroll slightly... - do_single_div: - subcc %g2, 1, %g2 - bl end_regular_divide - nop - sub %o3, %o5, %o3 - mov 1, %o2 - b end_single_divloop - nop - single_divloop: - sll %o2, 1, %o2 - bl 1f - srl %o5, 1, %o5 - ! %o3 >= 0 - sub %o3, %o5, %o3 - b 2f - add %o2, 1, %o2 - 1: ! %o3 < 0 - add %o3, %o5, %o3 - sub %o2, 1, %o2 - 2: - end_single_divloop: - subcc %g2, 1, %g2 - bge single_divloop - tst %o3 - b,a end_regular_divide - -not_really_big: -1: - sll %o5, 4, %o5 - cmp %o5, %o3 - bleu 1b - addcc %o4, 1, %o4 - be got_result - sub %o4, 1, %o4 - - tst %o3 ! set up for initial iteration -divloop: - sll %o2, 4, %o2 - ! depth 1, accumulated bits 0 - bl L1.16 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 2, accumulated bits 1 - bl L2.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits 3 - bl L3.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 7 - bl L4.23 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (7*2+1), %o2 - -L4.23: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (7*2-1), %o2 - - -L3.19: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 5 - bl L4.21 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (5*2+1), %o2 - -L4.21: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (5*2-1), %o2 - -L2.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits 1 - bl L3.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 3 - bl L4.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (3*2+1), %o2 - -L4.19: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (3*2-1), %o2 - -L3.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 1 - bl L4.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (1*2+1), %o2 - -L4.17: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (1*2-1), %o2 - -L1.16: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 2, accumulated bits -1 - bl L2.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits -1 - bl L3.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -1 - bl L4.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2+1), %o2 - -L4.15: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2-1), %o2 - -L3.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -3 - bl L4.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2+1), %o2 - -L4.13: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2-1), %o2 - -L2.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits -3 - bl L3.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -5 - bl L4.11 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2+1), %o2 - -L4.11: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2-1), %o2 - -L3.13: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -7 - bl L4.9 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2+1), %o2 - -L4.9: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2-1), %o2 - - 9: -end_regular_divide: - subcc %o4, 1, %o4 - bge divloop - tst %o3 - bl,a got_result - ! non-restoring fixup here (one instruction only!) - sub %o2, 1, %o2 - - -got_result: - ! check to see if answer should be < 0 - tst %g3 - bl,a 1f - sub %g0, %o2, %o2 -1: - retl - mov %o2, %o0 -#endif - -#ifdef L_modsi3 -/* This implementation was taken from glibc: - * - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * Algorithm parameters: - * N how many bits per iteration we try to get (4) - * WORDSIZE total number of bits (32) - * - * Derived constants: - * TOPBITS number of bits in the top decade of a number - * - * Important variables: - * Q the partial quotient under development (initially 0) - * R the remainder so far, initially the dividend - * ITER number of main division loop iterations required; - * equal to ceil(log2(quotient) / N). Note that this - * is the log base (2^N) of the quotient. - * V the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - * Current estimate for non-large dividend is - * ceil(log2(quotient) / N) * (10 + 7N/2) + C - * A large dividend is one greater than 2^(31-TOPBITS) and takes a - * different path, as the upper bits of the quotient must be developed - * one bit at a time. - */ -.text - .align 4 - .global .urem - .proc 4 -.urem: - b divide - mov 0, %g3 ! result always positive - - .align 4 - .global .rem - .proc 4 -.rem: - ! compute sign of result; if neither is negative, no problem - orcc %o1, %o0, %g0 ! either negative? - bge 2f ! no, go do the divide - mov %o0, %g3 ! sign of remainder matches %o0 - tst %o1 - bge 1f - tst %o0 - ! %o1 is definitely negative; %o0 might also be negative - bge 2f ! if %o0 not negative... - sub %g0, %o1, %o1 ! in any case, make %o1 nonneg -1: ! %o0 is negative, %o1 is nonnegative - sub %g0, %o0, %o0 ! make %o0 nonnegative -2: - - ! Ready to divide. Compute size of quotient; scale comparand. -divide: - orcc %o1, %g0, %o5 - bne 1f - mov %o0, %o3 - - ! Divide by zero trap. If it returns, return 0 (about as - ! wrong as possible, but that is what SunOS does...). - ta 0x2 !ST_DIV0 - retl - clr %o0 - -1: - cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu got_result ! (and algorithm fails otherwise) - clr %o2 - sethi %hi(1 << (32 - 4 - 1)), %g1 - cmp %o3, %g1 - blu not_really_big - clr %o4 - - ! Here the dividend is >= 2**(31-N) or so. We must be careful here, - ! as our usual N-at-a-shot divide step will cause overflow and havoc. - ! The number of bits in the result here is N*ITER+SC, where SC <= N. - ! Compute ITER in an unorthodox manner: know we need to shift V into - ! the top decade: so do not even bother to compare to R. - 1: - cmp %o5, %g1 - bgeu 3f - mov 1, %g2 - sll %o5, 4, %o5 - b 1b - add %o4, 1, %o4 - - ! Now compute %g2. - 2: addcc %o5, %o5, %o5 - bcc not_too_big - add %g2, 1, %g2 - - ! We get here if the %o1 overflowed while shifting. - ! This means that %o3 has the high-order bit set. - ! Restore %o5 and subtract from %o3. - sll %g1, 4, %g1 ! high order bit - srl %o5, 1, %o5 ! rest of %o5 - add %o5, %g1, %o5 - b do_single_div - sub %g2, 1, %g2 - - not_too_big: - 3: cmp %o5, %o3 - blu 2b - nop - be do_single_div - nop - /* NB: these are commented out in the V8-SPARC manual as well */ - /* (I do not understand this) */ - ! %o5 > %o3: went too far: back up 1 step - ! srl %o5, 1, %o5 - ! dec %g2 - ! do single-bit divide steps - ! - ! We have to be careful here. We know that %o3 >= %o5, so we can do the - ! first divide step without thinking. BUT, the others are conditional, - ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- - ! order bit set in the first step, just falling into the regular - ! division loop will mess up the first time around. - ! So we unroll slightly... - do_single_div: - subcc %g2, 1, %g2 - bl end_regular_divide - nop - sub %o3, %o5, %o3 - mov 1, %o2 - b end_single_divloop - nop - single_divloop: - sll %o2, 1, %o2 - bl 1f - srl %o5, 1, %o5 - ! %o3 >= 0 - sub %o3, %o5, %o3 - b 2f - add %o2, 1, %o2 - 1: ! %o3 < 0 - add %o3, %o5, %o3 - sub %o2, 1, %o2 - 2: - end_single_divloop: - subcc %g2, 1, %g2 - bge single_divloop - tst %o3 - b,a end_regular_divide - -not_really_big: -1: - sll %o5, 4, %o5 - cmp %o5, %o3 - bleu 1b - addcc %o4, 1, %o4 - be got_result - sub %o4, 1, %o4 - - tst %o3 ! set up for initial iteration -divloop: - sll %o2, 4, %o2 - ! depth 1, accumulated bits 0 - bl L1.16 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 2, accumulated bits 1 - bl L2.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits 3 - bl L3.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 7 - bl L4.23 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (7*2+1), %o2 -L4.23: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (7*2-1), %o2 - -L3.19: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 5 - bl L4.21 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (5*2+1), %o2 - -L4.21: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (5*2-1), %o2 - -L2.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits 1 - bl L3.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 3 - bl L4.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (3*2+1), %o2 - -L4.19: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (3*2-1), %o2 - -L3.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 1 - bl L4.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (1*2+1), %o2 - -L4.17: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (1*2-1), %o2 - -L1.16: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 2, accumulated bits -1 - bl L2.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits -1 - bl L3.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -1 - bl L4.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2+1), %o2 - -L4.15: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2-1), %o2 - -L3.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -3 - bl L4.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2+1), %o2 - -L4.13: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2-1), %o2 - -L2.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits -3 - bl L3.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -5 - bl L4.11 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2+1), %o2 - -L4.11: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2-1), %o2 - -L3.13: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -7 - bl L4.9 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2+1), %o2 - -L4.9: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2-1), %o2 - - 9: -end_regular_divide: - subcc %o4, 1, %o4 - bge divloop - tst %o3 - bl,a got_result - ! non-restoring fixup here (one instruction only!) - add %o3, %o1, %o3 - -got_result: - ! check to see if answer should be < 0 - tst %g3 - bl,a 1f - sub %g0, %o3, %o3 -1: - retl - mov %o3, %o0 - -#endif - diff --git a/gcc/config/sparc/lb1spl.asm b/gcc/config/sparc/lb1spl.asm deleted file mode 100644 index 973401f8018..00000000000 --- a/gcc/config/sparc/lb1spl.asm +++ /dev/null @@ -1,246 +0,0 @@ -/* This is an assembly language implementation of mulsi3, divsi3, and modsi3 - for the sparclite processor. - - These routines are all from the SPARClite User's Guide, slightly edited - to match the desired calling convention, and also to optimize them. */ - -#ifdef L_udivsi3 -.text - .align 4 - .global .udiv - .proc 04 -.udiv: - wr %g0,%g0,%y ! Not a delayed write for sparclite - tst %g0 - divscc %o0,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - retl - divscc %g1,%o1,%o0 -#endif - -#ifdef L_umodsi3 -.text - .align 4 - .global .urem - .proc 04 -.urem: - wr %g0,%g0,%y ! Not a delayed write for sparclite - tst %g0 - divscc %o0,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - bl 1f - rd %y,%o0 - retl - nop -1: retl - add %o0,%o1,%o0 -#endif - -#ifdef L_divsi3 -.text - .align 4 - .global .div - .proc 04 -! ??? This routine could be made faster if was optimized, and if it was -! rewritten to only calculate the quotient. -.div: - wr %g0,%g0,%y ! Not a delayed write for sparclite - mov %o1,%o4 - tst %o1 - bl,a 1f - sub %g0,%o4,%o4 -1: tst %o0 - bl,a 2f - mov -1,%y -2: divscc %o0,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - be 6f - mov %y,%o3 - bg 4f - addcc %o3,%o4,%g0 - be,a 6f - mov %g0,%o3 - tst %o0 - bl 5f - tst %g1 - ba 5f - add %o3,%o4,%o3 -4: subcc %o3,%o4,%g0 - be,a 6f - mov %g0,%o3 - tst %o0 - bge 5f - tst %g1 - sub %o3,%o4,%o3 -5: bl,a 6f - add %g1,1,%g1 -6: tst %o1 - bl,a 7f - sub %g0,%g1,%g1 -7: retl - mov %g1,%o0 ! Quotient is in %g1. -#endif - -#ifdef L_modsi3 -.text - .align 4 - .global .rem - .proc 04 -! ??? This routine could be made faster if was optimized, and if it was -! rewritten to only calculate the remainder. -.rem: - wr %g0,%g0,%y ! Not a delayed write for sparclite - mov %o1,%o4 - tst %o1 - bl,a 1f - sub %g0,%o4,%o4 -1: tst %o0 - bl,a 2f - mov -1,%y -2: divscc %o0,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - be 6f - mov %y,%o3 - bg 4f - addcc %o3,%o4,%g0 - be,a 6f - mov %g0,%o3 - tst %o0 - bl 5f - tst %g1 - ba 5f - add %o3,%o4,%o3 -4: subcc %o3,%o4,%g0 - be,a 6f - mov %g0,%o3 - tst %o0 - bge 5f - tst %g1 - sub %o3,%o4,%o3 -5: bl,a 6f - add %g1,1,%g1 -6: tst %o1 - bl,a 7f - sub %g0,%g1,%g1 -7: retl - mov %o3,%o0 ! Remainder is in %o3. -#endif diff --git a/gcc/config/sparc/t-elf b/gcc/config/sparc/t-elf index 7073bcb7721..be926585481 100644 --- a/gcc/config/sparc/t-elf +++ b/gcc/config/sparc/t-elf @@ -17,9 +17,6 @@ # along with GCC; see the file COPYING3. If not see # . -LIB1ASMSRC = sparc/lb1spc.asm -LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 - MULTILIB_OPTIONS = msoft-float mcpu=v8 mflat MULTILIB_DIRNAMES = soft v8 flat MULTILIB_MATCHES = msoft-float=mno-fpu diff --git a/gcc/config/sparc/t-leon b/gcc/config/sparc/t-leon index 4f9d0a9e797..8e5e30f7ff7 100644 --- a/gcc/config/sparc/t-leon +++ b/gcc/config/sparc/t-leon @@ -16,9 +16,6 @@ # along with GCC; see the file COPYING3. If not see # . -LIB1ASMSRC = sparc/lb1spc.asm -LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 - # Multilibs for LEON # LEON is a SPARC-V8, but the AT697 implementation has a bug in the # V8-specific instructions. diff --git a/gcc/config/spu/t-spu-elf b/gcc/config/spu/t-spu-elf index b1660353ee6..45802499525 100644 --- a/gcc/config/spu/t-spu-elf +++ b/gcc/config/spu/t-spu-elf @@ -15,10 +15,6 @@ # along with GCC; see the file COPYING3. If not see # . -# Suppress building libgcc1.a -LIBGCC1 = -CROSS_LIBGCC1 = - TARGET_LIBGCC2_CFLAGS = -fPIC -mwarn-reloc -D__IN_LIBGCC2 # We exclude those because the libgcc2.c default versions do not support diff --git a/gcc/config/v850/lib1funcs.asm b/gcc/config/v850/lib1funcs.asm deleted file mode 100644 index 04e9b1e0ad4..00000000000 --- a/gcc/config/v850/lib1funcs.asm +++ /dev/null @@ -1,2330 +0,0 @@ -/* libgcc routines for NEC V850. - Copyright (C) 1996, 1997, 2002, 2005, 2009, 2010 - Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ - -#ifdef L_mulsi3 - .text - .globl ___mulsi3 - .type ___mulsi3,@function -___mulsi3: -#ifdef __v850__ -/* - #define SHIFT 12 - #define MASK ((1 << SHIFT) - 1) - - #define STEP(i, j) \ - ({ \ - short a_part = (a >> (i)) & MASK; \ - short b_part = (b >> (j)) & MASK; \ - int res = (((int) a_part) * ((int) b_part)); \ - res; \ - }) - - int - __mulsi3 (unsigned a, unsigned b) - { - return STEP (0, 0) + - ((STEP (SHIFT, 0) + STEP (0, SHIFT)) << SHIFT) + - ((STEP (0, 2 * SHIFT) + STEP (SHIFT, SHIFT) + STEP (2 * SHIFT, 0)) - << (2 * SHIFT)); - } -*/ - mov r6, r14 - movea lo(32767), r0, r10 - and r10, r14 - mov r7, r15 - and r10, r15 - shr 15, r6 - mov r6, r13 - and r10, r13 - shr 15, r7 - mov r7, r12 - and r10, r12 - shr 15, r6 - shr 15, r7 - mov r14, r10 - mulh r15, r10 - mov r14, r11 - mulh r12, r11 - mov r13, r16 - mulh r15, r16 - mulh r14, r7 - mulh r15, r6 - add r16, r11 - mulh r13, r12 - shl 15, r11 - add r11, r10 - add r12, r7 - add r6, r7 - shl 30, r7 - add r7, r10 - jmp [r31] -#endif /* __v850__ */ -#if defined(__v850e__) || defined(__v850ea__) || defined(__v850e2__) || defined(__v850e2v3__) - /* This routine is almost unneccesarry because gcc - generates the MUL instruction for the RTX mulsi3. - But if someone wants to link his application with - previsously compiled v850 objects then they will - need this function. */ - - /* It isn't good to put the inst sequence as below; - mul r7, r6, - mov r6, r10, r0 - In this case, there is a RAW hazard between them. - MUL inst takes 2 cycle in EX stage, then MOV inst - must wait 1cycle. */ - mov r7, r10 - mul r6, r10, r0 - jmp [r31] -#endif /* __v850e__ */ - .size ___mulsi3,.-___mulsi3 -#endif /* L_mulsi3 */ - - -#ifdef L_udivsi3 - .text - .global ___udivsi3 - .type ___udivsi3,@function -___udivsi3: -#ifdef __v850__ - mov 1,r12 - mov 0,r10 - cmp r6,r7 - bnl .L12 - movhi hi(-2147483648),r0,r13 - cmp r0,r7 - blt .L12 -.L4: - shl 1,r7 - shl 1,r12 - cmp r6,r7 - bnl .L12 - cmp r0,r12 - be .L8 - mov r7,r19 - and r13,r19 - be .L4 - br .L12 -.L9: - cmp r7,r6 - bl .L10 - sub r7,r6 - or r12,r10 -.L10: - shr 1,r12 - shr 1,r7 -.L12: - cmp r0,r12 - bne .L9 -.L8: - jmp [r31] - -#else /* defined(__v850e__) */ - - /* See comments at end of __mulsi3. */ - mov r6, r10 - divu r7, r10, r0 - jmp [r31] - -#endif /* __v850e__ */ - - .size ___udivsi3,.-___udivsi3 -#endif - -#ifdef L_divsi3 - .text - .globl ___divsi3 - .type ___divsi3,@function -___divsi3: -#ifdef __v850__ - add -8,sp - st.w r31,4[sp] - st.w r22,0[sp] - mov 1,r22 - tst r7,r7 - bp .L3 - subr r0,r7 - subr r0,r22 -.L3: - tst r6,r6 - bp .L4 - subr r0,r6 - subr r0,r22 -.L4: - jarl ___udivsi3,r31 - cmp r0,r22 - bp .L7 - subr r0,r10 -.L7: - ld.w 0[sp],r22 - ld.w 4[sp],r31 - add 8,sp - jmp [r31] - -#else /* defined(__v850e__) */ - - /* See comments at end of __mulsi3. */ - mov r6, r10 - div r7, r10, r0 - jmp [r31] - -#endif /* __v850e__ */ - - .size ___divsi3,.-___divsi3 -#endif - -#ifdef L_umodsi3 - .text - .globl ___umodsi3 - .type ___umodsi3,@function -___umodsi3: -#ifdef __v850__ - add -12,sp - st.w r31,8[sp] - st.w r7,4[sp] - st.w r6,0[sp] - jarl ___udivsi3,r31 - ld.w 4[sp],r7 - mov r10,r6 - jarl ___mulsi3,r31 - ld.w 0[sp],r6 - subr r6,r10 - ld.w 8[sp],r31 - add 12,sp - jmp [r31] - -#else /* defined(__v850e__) */ - - /* See comments at end of __mulsi3. */ - divu r7, r6, r10 - jmp [r31] - -#endif /* __v850e__ */ - - .size ___umodsi3,.-___umodsi3 -#endif /* L_umodsi3 */ - -#ifdef L_modsi3 - .text - .globl ___modsi3 - .type ___modsi3,@function -___modsi3: -#ifdef __v850__ - add -12,sp - st.w r31,8[sp] - st.w r7,4[sp] - st.w r6,0[sp] - jarl ___divsi3,r31 - ld.w 4[sp],r7 - mov r10,r6 - jarl ___mulsi3,r31 - ld.w 0[sp],r6 - subr r6,r10 - ld.w 8[sp],r31 - add 12,sp - jmp [r31] - -#else /* defined(__v850e__) */ - - /* See comments at end of __mulsi3. */ - div r7, r6, r10 - jmp [r31] - -#endif /* __v850e__ */ - - .size ___modsi3,.-___modsi3 -#endif /* L_modsi3 */ - -#ifdef L_save_2 - .text - .align 2 - .globl __save_r2_r29 - .type __save_r2_r29,@function - /* Allocate space and save registers 2, 20 .. 29 on the stack. */ - /* Called via: jalr __save_r2_r29,r10. */ -__save_r2_r29: -#ifdef __EP__ - mov ep,r1 - addi -44,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - sst.w r22,28[ep] - sst.w r21,32[ep] - sst.w r20,36[ep] - sst.w r2,40[ep] - mov r1,ep -#else - addi -44,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] - st.w r22,28[sp] - st.w r21,32[sp] - st.w r20,36[sp] - st.w r2,40[sp] -#endif - jmp [r10] - .size __save_r2_r29,.-__save_r2_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r2_r29. */ - .align 2 - .globl __return_r2_r29 - .type __return_r2_r29,@function -__return_r2_r29: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - sld.w 28[ep],r22 - sld.w 32[ep],r21 - sld.w 36[ep],r20 - sld.w 40[ep],r2 - addi 44,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r23 - ld.w 28[sp],r22 - ld.w 32[sp],r21 - ld.w 36[sp],r20 - ld.w 40[sp],r2 - addi 44,sp,sp -#endif - jmp [r31] - .size __return_r2_r29,.-__return_r2_r29 -#endif /* L_save_2 */ - -#ifdef L_save_20 - .text - .align 2 - .globl __save_r20_r29 - .type __save_r20_r29,@function - /* Allocate space and save registers 20 .. 29 on the stack. */ - /* Called via: jalr __save_r20_r29,r10. */ -__save_r20_r29: -#ifdef __EP__ - mov ep,r1 - addi -40,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - sst.w r22,28[ep] - sst.w r21,32[ep] - sst.w r20,36[ep] - mov r1,ep -#else - addi -40,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] - st.w r22,28[sp] - st.w r21,32[sp] - st.w r20,36[sp] -#endif - jmp [r10] - .size __save_r20_r29,.-__save_r20_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r20_r29. */ - .align 2 - .globl __return_r20_r29 - .type __return_r20_r29,@function -__return_r20_r29: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - sld.w 28[ep],r22 - sld.w 32[ep],r21 - sld.w 36[ep],r20 - addi 40,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r23 - ld.w 28[sp],r22 - ld.w 32[sp],r21 - ld.w 36[sp],r20 - addi 40,sp,sp -#endif - jmp [r31] - .size __return_r20_r29,.-__return_r20_r29 -#endif /* L_save_20 */ - -#ifdef L_save_21 - .text - .align 2 - .globl __save_r21_r29 - .type __save_r21_r29,@function - /* Allocate space and save registers 21 .. 29 on the stack. */ - /* Called via: jalr __save_r21_r29,r10. */ -__save_r21_r29: -#ifdef __EP__ - mov ep,r1 - addi -36,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - sst.w r22,28[ep] - sst.w r21,32[ep] - mov r1,ep -#else - addi -36,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] - st.w r22,28[sp] - st.w r21,32[sp] -#endif - jmp [r10] - .size __save_r21_r29,.-__save_r21_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r21_r29. */ - .align 2 - .globl __return_r21_r29 - .type __return_r21_r29,@function -__return_r21_r29: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - sld.w 28[ep],r22 - sld.w 32[ep],r21 - addi 36,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r23 - ld.w 28[sp],r22 - ld.w 32[sp],r21 - addi 36,sp,sp -#endif - jmp [r31] - .size __return_r21_r29,.-__return_r21_r29 -#endif /* L_save_21 */ - -#ifdef L_save_22 - .text - .align 2 - .globl __save_r22_r29 - .type __save_r22_r29,@function - /* Allocate space and save registers 22 .. 29 on the stack. */ - /* Called via: jalr __save_r22_r29,r10. */ -__save_r22_r29: -#ifdef __EP__ - mov ep,r1 - addi -32,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - sst.w r22,28[ep] - mov r1,ep -#else - addi -32,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] - st.w r22,28[sp] -#endif - jmp [r10] - .size __save_r22_r29,.-__save_r22_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r22_r29. */ - .align 2 - .globl __return_r22_r29 - .type __return_r22_r29,@function -__return_r22_r29: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - sld.w 28[ep],r22 - addi 32,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r23 - ld.w 28[sp],r22 - addi 32,sp,sp -#endif - jmp [r31] - .size __return_r22_r29,.-__return_r22_r29 -#endif /* L_save_22 */ - -#ifdef L_save_23 - .text - .align 2 - .globl __save_r23_r29 - .type __save_r23_r29,@function - /* Allocate space and save registers 23 .. 29 on the stack. */ - /* Called via: jalr __save_r23_r29,r10. */ -__save_r23_r29: -#ifdef __EP__ - mov ep,r1 - addi -28,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - mov r1,ep -#else - addi -28,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] -#endif - jmp [r10] - .size __save_r23_r29,.-__save_r23_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r23_r29. */ - .align 2 - .globl __return_r23_r29 - .type __return_r23_r29,@function -__return_r23_r29: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - addi 28,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r23 - addi 28,sp,sp -#endif - jmp [r31] - .size __return_r23_r29,.-__return_r23_r29 -#endif /* L_save_23 */ - -#ifdef L_save_24 - .text - .align 2 - .globl __save_r24_r29 - .type __save_r24_r29,@function - /* Allocate space and save registers 24 .. 29 on the stack. */ - /* Called via: jalr __save_r24_r29,r10. */ -__save_r24_r29: -#ifdef __EP__ - mov ep,r1 - addi -24,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - mov r1,ep -#else - addi -24,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] -#endif - jmp [r10] - .size __save_r24_r29,.-__save_r24_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r24_r29. */ - .align 2 - .globl __return_r24_r29 - .type __return_r24_r29,@function -__return_r24_r29: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - addi 24,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - addi 24,sp,sp -#endif - jmp [r31] - .size __return_r24_r29,.-__return_r24_r29 -#endif /* L_save_24 */ - -#ifdef L_save_25 - .text - .align 2 - .globl __save_r25_r29 - .type __save_r25_r29,@function - /* Allocate space and save registers 25 .. 29 on the stack. */ - /* Called via: jalr __save_r25_r29,r10. */ -__save_r25_r29: -#ifdef __EP__ - mov ep,r1 - addi -20,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - mov r1,ep -#else - addi -20,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] -#endif - jmp [r10] - .size __save_r25_r29,.-__save_r25_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r25_r29. */ - .align 2 - .globl __return_r25_r29 - .type __return_r25_r29,@function -__return_r25_r29: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - addi 20,sp,sp - mov r1,ep -#else - ld.w 0[ep],r29 - ld.w 4[ep],r28 - ld.w 8[ep],r27 - ld.w 12[ep],r26 - ld.w 16[ep],r25 - addi 20,sp,sp -#endif - jmp [r31] - .size __return_r25_r29,.-__return_r25_r29 -#endif /* L_save_25 */ - -#ifdef L_save_26 - .text - .align 2 - .globl __save_r26_r29 - .type __save_r26_r29,@function - /* Allocate space and save registers 26 .. 29 on the stack. */ - /* Called via: jalr __save_r26_r29,r10. */ -__save_r26_r29: -#ifdef __EP__ - mov ep,r1 - add -16,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - mov r1,ep -#else - add -16,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] -#endif - jmp [r10] - .size __save_r26_r29,.-__save_r26_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r26_r29. */ - .align 2 - .globl __return_r26_r29 - .type __return_r26_r29,@function -__return_r26_r29: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - addi 16,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - addi 16,sp,sp -#endif - jmp [r31] - .size __return_r26_r29,.-__return_r26_r29 -#endif /* L_save_26 */ - -#ifdef L_save_27 - .text - .align 2 - .globl __save_r27_r29 - .type __save_r27_r29,@function - /* Allocate space and save registers 27 .. 29 on the stack. */ - /* Called via: jalr __save_r27_r29,r10. */ -__save_r27_r29: - add -12,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - jmp [r10] - .size __save_r27_r29,.-__save_r27_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r27_r29. */ - .align 2 - .globl __return_r27_r29 - .type __return_r27_r29,@function -__return_r27_r29: - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - add 12,sp - jmp [r31] - .size __return_r27_r29,.-__return_r27_r29 -#endif /* L_save_27 */ - -#ifdef L_save_28 - .text - .align 2 - .globl __save_r28_r29 - .type __save_r28_r29,@function - /* Allocate space and save registers 28,29 on the stack. */ - /* Called via: jalr __save_r28_r29,r10. */ -__save_r28_r29: - add -8,sp - st.w r29,0[sp] - st.w r28,4[sp] - jmp [r10] - .size __save_r28_r29,.-__save_r28_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r28_r29. */ - .align 2 - .globl __return_r28_r29 - .type __return_r28_r29,@function -__return_r28_r29: - ld.w 0[sp],r29 - ld.w 4[sp],r28 - add 8,sp - jmp [r31] - .size __return_r28_r29,.-__return_r28_r29 -#endif /* L_save_28 */ - -#ifdef L_save_29 - .text - .align 2 - .globl __save_r29 - .type __save_r29,@function - /* Allocate space and save register 29 on the stack. */ - /* Called via: jalr __save_r29,r10. */ -__save_r29: - add -4,sp - st.w r29,0[sp] - jmp [r10] - .size __save_r29,.-__save_r29 - - /* Restore saved register 29, deallocate stack and return to the user. */ - /* Called via: jr __return_r29. */ - .align 2 - .globl __return_r29 - .type __return_r29,@function -__return_r29: - ld.w 0[sp],r29 - add 4,sp - jmp [r31] - .size __return_r29,.-__return_r29 -#endif /* L_save_28 */ - -#ifdef L_save_2c - .text - .align 2 - .globl __save_r2_r31 - .type __save_r2_r31,@function - /* Allocate space and save registers 20 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r2_r31,r10. */ -__save_r2_r31: -#ifdef __EP__ - mov ep,r1 - addi -48,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - sst.w r22,28[ep] - sst.w r21,32[ep] - sst.w r20,36[ep] - sst.w r2,40[ep] - sst.w r31,44[ep] - mov r1,ep -#else - addi -48,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] - st.w r22,28[sp] - st.w r21,32[sp] - st.w r20,36[sp] - st.w r2,40[sp] - st.w r31,44[sp] -#endif - jmp [r10] - .size __save_r2_r31,.-__save_r2_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r20_r31. */ - .align 2 - .globl __return_r2_r31 - .type __return_r2_r31,@function -__return_r2_r31: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - sld.w 28[ep],r22 - sld.w 32[ep],r21 - sld.w 36[ep],r20 - sld.w 40[ep],r2 - sld.w 44[ep],r31 - addi 48,sp,sp - mov r1,ep -#else - ld.w 44[sp],r29 - ld.w 40[sp],r28 - ld.w 36[sp],r27 - ld.w 32[sp],r26 - ld.w 28[sp],r25 - ld.w 24[sp],r24 - ld.w 20[sp],r23 - ld.w 16[sp],r22 - ld.w 12[sp],r21 - ld.w 8[sp],r20 - ld.w 4[sp],r2 - ld.w 0[sp],r31 - addi 48,sp,sp -#endif - jmp [r31] - .size __return_r2_r31,.-__return_r2_r31 -#endif /* L_save_2c */ - -#ifdef L_save_20c - .text - .align 2 - .globl __save_r20_r31 - .type __save_r20_r31,@function - /* Allocate space and save registers 20 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r20_r31,r10. */ -__save_r20_r31: -#ifdef __EP__ - mov ep,r1 - addi -44,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - sst.w r22,28[ep] - sst.w r21,32[ep] - sst.w r20,36[ep] - sst.w r31,40[ep] - mov r1,ep -#else - addi -44,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] - st.w r22,28[sp] - st.w r21,32[sp] - st.w r20,36[sp] - st.w r31,40[sp] -#endif - jmp [r10] - .size __save_r20_r31,.-__save_r20_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r20_r31. */ - .align 2 - .globl __return_r20_r31 - .type __return_r20_r31,@function -__return_r20_r31: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - sld.w 28[ep],r22 - sld.w 32[ep],r21 - sld.w 36[ep],r20 - sld.w 40[ep],r31 - addi 44,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r23 - ld.w 28[sp],r22 - ld.w 32[sp],r21 - ld.w 36[sp],r20 - ld.w 40[sp],r31 - addi 44,sp,sp -#endif - jmp [r31] - .size __return_r20_r31,.-__return_r20_r31 -#endif /* L_save_20c */ - -#ifdef L_save_21c - .text - .align 2 - .globl __save_r21_r31 - .type __save_r21_r31,@function - /* Allocate space and save registers 21 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r21_r31,r10. */ -__save_r21_r31: -#ifdef __EP__ - mov ep,r1 - addi -40,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - sst.w r22,28[ep] - sst.w r21,32[ep] - sst.w r31,36[ep] - mov r1,ep - jmp [r10] -#else - addi -40,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] - st.w r22,28[sp] - st.w r21,32[sp] - st.w r31,36[sp] - jmp [r10] -#endif - .size __save_r21_r31,.-__save_r21_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r21_r31. */ - .align 2 - .globl __return_r21_r31 - .type __return_r21_r31,@function -__return_r21_r31: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - sld.w 28[ep],r22 - sld.w 32[ep],r21 - sld.w 36[ep],r31 - addi 40,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r23 - ld.w 28[sp],r22 - ld.w 32[sp],r21 - ld.w 36[sp],r31 - addi 40,sp,sp -#endif - jmp [r31] - .size __return_r21_r31,.-__return_r21_r31 -#endif /* L_save_21c */ - -#ifdef L_save_22c - .text - .align 2 - .globl __save_r22_r31 - .type __save_r22_r31,@function - /* Allocate space and save registers 22 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r22_r31,r10. */ -__save_r22_r31: -#ifdef __EP__ - mov ep,r1 - addi -36,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - sst.w r22,28[ep] - sst.w r31,32[ep] - mov r1,ep -#else - addi -36,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] - st.w r22,28[sp] - st.w r31,32[sp] -#endif - jmp [r10] - .size __save_r22_r31,.-__save_r22_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r22_r31. */ - .align 2 - .globl __return_r22_r31 - .type __return_r22_r31,@function -__return_r22_r31: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - sld.w 28[ep],r22 - sld.w 32[ep],r31 - addi 36,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r23 - ld.w 28[sp],r22 - ld.w 32[sp],r31 - addi 36,sp,sp -#endif - jmp [r31] - .size __return_r22_r31,.-__return_r22_r31 -#endif /* L_save_22c */ - -#ifdef L_save_23c - .text - .align 2 - .globl __save_r23_r31 - .type __save_r23_r31,@function - /* Allocate space and save registers 23 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r23_r31,r10. */ -__save_r23_r31: -#ifdef __EP__ - mov ep,r1 - addi -32,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - sst.w r31,28[ep] - mov r1,ep -#else - addi -32,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] - st.w r31,28[sp] -#endif - jmp [r10] - .size __save_r23_r31,.-__save_r23_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r23_r31. */ - .align 2 - .globl __return_r23_r31 - .type __return_r23_r31,@function -__return_r23_r31: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - sld.w 28[ep],r31 - addi 32,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r23 - ld.w 28[sp],r31 - addi 32,sp,sp -#endif - jmp [r31] - .size __return_r23_r31,.-__return_r23_r31 -#endif /* L_save_23c */ - -#ifdef L_save_24c - .text - .align 2 - .globl __save_r24_r31 - .type __save_r24_r31,@function - /* Allocate space and save registers 24 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r24_r31,r10. */ -__save_r24_r31: -#ifdef __EP__ - mov ep,r1 - addi -28,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r31,24[ep] - mov r1,ep -#else - addi -28,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r31,24[sp] -#endif - jmp [r10] - .size __save_r24_r31,.-__save_r24_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r24_r31. */ - .align 2 - .globl __return_r24_r31 - .type __return_r24_r31,@function -__return_r24_r31: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r31 - addi 28,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r31 - addi 28,sp,sp -#endif - jmp [r31] - .size __return_r24_r31,.-__return_r24_r31 -#endif /* L_save_24c */ - -#ifdef L_save_25c - .text - .align 2 - .globl __save_r25_r31 - .type __save_r25_r31,@function - /* Allocate space and save registers 25 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r25_r31,r10. */ -__save_r25_r31: -#ifdef __EP__ - mov ep,r1 - addi -24,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r31,20[ep] - mov r1,ep -#else - addi -24,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r31,20[sp] -#endif - jmp [r10] - .size __save_r25_r31,.-__save_r25_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r25_r31. */ - .align 2 - .globl __return_r25_r31 - .type __return_r25_r31,@function -__return_r25_r31: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r31 - addi 24,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r31 - addi 24,sp,sp -#endif - jmp [r31] - .size __return_r25_r31,.-__return_r25_r31 -#endif /* L_save_25c */ - -#ifdef L_save_26c - .text - .align 2 - .globl __save_r26_r31 - .type __save_r26_r31,@function - /* Allocate space and save registers 26 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r26_r31,r10. */ -__save_r26_r31: -#ifdef __EP__ - mov ep,r1 - addi -20,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r31,16[ep] - mov r1,ep -#else - addi -20,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r31,16[sp] -#endif - jmp [r10] - .size __save_r26_r31,.-__save_r26_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r26_r31. */ - .align 2 - .globl __return_r26_r31 - .type __return_r26_r31,@function -__return_r26_r31: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r31 - addi 20,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r31 - addi 20,sp,sp -#endif - jmp [r31] - .size __return_r26_r31,.-__return_r26_r31 -#endif /* L_save_26c */ - -#ifdef L_save_27c - .text - .align 2 - .globl __save_r27_r31 - .type __save_r27_r31,@function - /* Allocate space and save registers 27 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r27_r31,r10. */ -__save_r27_r31: -#ifdef __EP__ - mov ep,r1 - addi -16,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r31,12[ep] - mov r1,ep -#else - addi -16,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r31,12[sp] -#endif - jmp [r10] - .size __save_r27_r31,.-__save_r27_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r27_r31. */ - .align 2 - .globl __return_r27_r31 - .type __return_r27_r31,@function -__return_r27_r31: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r31 - addi 16,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r31 - addi 16,sp,sp -#endif - jmp [r31] - .size __return_r27_r31,.-__return_r27_r31 -#endif /* L_save_27c */ - -#ifdef L_save_28c - .text - .align 2 - .globl __save_r28_r31 - .type __save_r28_r31,@function - /* Allocate space and save registers 28 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r28_r31,r10. */ -__save_r28_r31: - addi -12,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r31,8[sp] - jmp [r10] - .size __save_r28_r31,.-__save_r28_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r28_r31. */ - .align 2 - .globl __return_r28_r31 - .type __return_r28_r31,@function -__return_r28_r31: - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r31 - addi 12,sp,sp - jmp [r31] - .size __return_r28_r31,.-__return_r28_r31 -#endif /* L_save_28c */ - -#ifdef L_save_29c - .text - .align 2 - .globl __save_r29_r31 - .type __save_r29_r31,@function - /* Allocate space and save registers 29 & 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r29_r31,r10. */ -__save_r29_r31: - addi -8,sp,sp - st.w r29,0[sp] - st.w r31,4[sp] - jmp [r10] - .size __save_r29_r31,.-__save_r29_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r29_r31. */ - .align 2 - .globl __return_r29_r31 - .type __return_r29_r31,@function -__return_r29_r31: - ld.w 0[sp],r29 - ld.w 4[sp],r31 - addi 8,sp,sp - jmp [r31] - .size __return_r29_r31,.-__return_r29_r31 -#endif /* L_save_29c */ - -#ifdef L_save_31c - .text - .align 2 - .globl __save_r31 - .type __save_r31,@function - /* Allocate space and save register 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r31,r10. */ -__save_r31: - addi -4,sp,sp - st.w r31,0[sp] - jmp [r10] - .size __save_r31,.-__save_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r31. */ - .align 2 - .globl __return_r31 - .type __return_r31,@function -__return_r31: - ld.w 0[sp],r31 - addi 4,sp,sp - jmp [r31] - .size __return_r31,.-__return_r31 -#endif /* L_save_31c */ - -#ifdef L_save_interrupt - .text - .align 2 - .globl __save_interrupt - .type __save_interrupt,@function - /* Save registers r1, r4 on stack and load up with expected values. */ - /* Note, 20 bytes of stack have already been allocated. */ - /* Called via: jalr __save_interrupt,r10. */ -__save_interrupt: - /* add -20,sp ; st.w r11,16[sp] ; st.w r10,12[sp] ; */ - st.w ep,0[sp] - st.w gp,4[sp] - st.w r1,8[sp] - movhi hi(__ep),r0,ep - movea lo(__ep),ep,ep - movhi hi(__gp),r0,gp - movea lo(__gp),gp,gp - jmp [r10] - .size __save_interrupt,.-__save_interrupt - - /* Restore saved registers, deallocate stack and return from the interrupt. */ - /* Called via: jr __return_interrupt. */ - .align 2 - .globl __return_interrupt - .type __return_interrupt,@function -__return_interrupt: - ld.w 0[sp],ep - ld.w 4[sp],gp - ld.w 8[sp],r1 - ld.w 12[sp],r10 - ld.w 16[sp],r11 - addi 20,sp,sp - reti - .size __return_interrupt,.-__return_interrupt -#endif /* L_save_interrupt */ - -#ifdef L_save_all_interrupt - .text - .align 2 - .globl __save_all_interrupt - .type __save_all_interrupt,@function - /* Save all registers except for those saved in __save_interrupt. */ - /* Allocate enough stack for all of the registers & 16 bytes of space. */ - /* Called via: jalr __save_all_interrupt,r10. */ -__save_all_interrupt: - addi -104,sp,sp -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sst.w r31,100[ep] - sst.w r2,96[ep] - sst.w gp,92[ep] - sst.w r6,88[ep] - sst.w r7,84[ep] - sst.w r8,80[ep] - sst.w r9,76[ep] - sst.w r11,72[ep] - sst.w r12,68[ep] - sst.w r13,64[ep] - sst.w r14,60[ep] - sst.w r15,56[ep] - sst.w r16,52[ep] - sst.w r17,48[ep] - sst.w r18,44[ep] - sst.w r19,40[ep] - sst.w r20,36[ep] - sst.w r21,32[ep] - sst.w r22,28[ep] - sst.w r23,24[ep] - sst.w r24,20[ep] - sst.w r25,16[ep] - sst.w r26,12[ep] - sst.w r27,8[ep] - sst.w r28,4[ep] - sst.w r29,0[ep] - mov r1,ep -#else - st.w r31,100[sp] - st.w r2,96[sp] - st.w gp,92[sp] - st.w r6,88[sp] - st.w r7,84[sp] - st.w r8,80[sp] - st.w r9,76[sp] - st.w r11,72[sp] - st.w r12,68[sp] - st.w r13,64[sp] - st.w r14,60[sp] - st.w r15,56[sp] - st.w r16,52[sp] - st.w r17,48[sp] - st.w r18,44[sp] - st.w r19,40[sp] - st.w r20,36[sp] - st.w r21,32[sp] - st.w r22,28[sp] - st.w r23,24[sp] - st.w r24,20[sp] - st.w r25,16[sp] - st.w r26,12[sp] - st.w r27,8[sp] - st.w r28,4[sp] - st.w r29,0[sp] -#endif - jmp [r10] - .size __save_all_interrupt,.-__save_all_interrupt - - .globl __restore_all_interrupt - .type __restore_all_interrupt,@function - /* Restore all registers saved in __save_all_interrupt and - deallocate the stack space. */ - /* Called via: jalr __restore_all_interrupt,r10. */ -__restore_all_interrupt: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 100[ep],r31 - sld.w 96[ep],r2 - sld.w 92[ep],gp - sld.w 88[ep],r6 - sld.w 84[ep],r7 - sld.w 80[ep],r8 - sld.w 76[ep],r9 - sld.w 72[ep],r11 - sld.w 68[ep],r12 - sld.w 64[ep],r13 - sld.w 60[ep],r14 - sld.w 56[ep],r15 - sld.w 52[ep],r16 - sld.w 48[ep],r17 - sld.w 44[ep],r18 - sld.w 40[ep],r19 - sld.w 36[ep],r20 - sld.w 32[ep],r21 - sld.w 28[ep],r22 - sld.w 24[ep],r23 - sld.w 20[ep],r24 - sld.w 16[ep],r25 - sld.w 12[ep],r26 - sld.w 8[ep],r27 - sld.w 4[ep],r28 - sld.w 0[ep],r29 - mov r1,ep -#else - ld.w 100[sp],r31 - ld.w 96[sp],r2 - ld.w 92[sp],gp - ld.w 88[sp],r6 - ld.w 84[sp],r7 - ld.w 80[sp],r8 - ld.w 76[sp],r9 - ld.w 72[sp],r11 - ld.w 68[sp],r12 - ld.w 64[sp],r13 - ld.w 60[sp],r14 - ld.w 56[sp],r15 - ld.w 52[sp],r16 - ld.w 48[sp],r17 - ld.w 44[sp],r18 - ld.w 40[sp],r19 - ld.w 36[sp],r20 - ld.w 32[sp],r21 - ld.w 28[sp],r22 - ld.w 24[sp],r23 - ld.w 20[sp],r24 - ld.w 16[sp],r25 - ld.w 12[sp],r26 - ld.w 8[sp],r27 - ld.w 4[sp],r28 - ld.w 0[sp],r29 -#endif - addi 104,sp,sp - jmp [r10] - .size __restore_all_interrupt,.-__restore_all_interrupt -#endif /* L_save_all_interrupt */ - -#if defined(__v850e__) || defined(__v850e1__) || defined(__v850e2__) || defined(__v850e2v3__) -#ifdef L_callt_save_r2_r29 - /* Put these functions into the call table area. */ - .call_table_text - - /* Allocate space and save registers 2, 20 .. 29 on the stack. */ - /* Called via: callt ctoff(__callt_save_r2_r29). */ - .align 2 -.L_save_r2_r29: - add -4, sp - st.w r2, 0[sp] - prepare {r20 - r29}, 0 - ctret - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: callt ctoff(__callt_return_r2_r29). */ - .align 2 -.L_return_r2_r29: - dispose 0, {r20-r29} - ld.w 0[sp], r2 - add 4, sp - jmp [r31] - - /* Place the offsets of the start of these routines into the call table. */ - .call_table_data - - .global __callt_save_r2_r29 - .type __callt_save_r2_r29,@function -__callt_save_r2_r29: .short ctoff(.L_save_r2_r29) - - .global __callt_return_r2_r29 - .type __callt_return_r2_r29,@function -__callt_return_r2_r29: .short ctoff(.L_return_r2_r29) - -#endif /* L_callt_save_r2_r29. */ - -#ifdef L_callt_save_r2_r31 - /* Put these functions into the call table area. */ - .call_table_text - - /* Allocate space and save registers 2 and 20 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: callt ctoff(__callt_save_r2_r31). */ - .align 2 -.L_save_r2_r31: - add -4, sp - st.w r2, 0[sp] - prepare {r20 - r29, r31}, 0 - ctret - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: callt ctoff(__callt_return_r2_r31). */ - .align 2 -.L_return_r2_r31: - dispose 0, {r20 - r29, r31} - ld.w 0[sp], r2 - addi 4, sp, sp - jmp [r31] - - /* Place the offsets of the start of these routines into the call table. */ - .call_table_data - - .global __callt_save_r2_r31 - .type __callt_save_r2_r31,@function -__callt_save_r2_r31: .short ctoff(.L_save_r2_r31) - - .global __callt_return_r2_r31 - .type __callt_return_r2_r31,@function -__callt_return_r2_r31: .short ctoff(.L_return_r2_r31) - -#endif /* L_callt_save_r2_r31 */ - -#ifdef L_callt_save_interrupt - /* Put these functions into the call table area. */ - .call_table_text - - /* Save registers r1, ep, gp, r10 on stack and load up with expected values. */ - /* Called via: callt ctoff(__callt_save_interrupt). */ - .align 2 -.L_save_interrupt: - /* SP has already been moved before callt ctoff(_save_interrupt). */ - /* R1,R10,R11,ctpc,ctpsw has alread been saved bofore callt ctoff(_save_interrupt). */ - /* addi -28, sp, sp */ - /* st.w r1, 24[sp] */ - /* st.w r10, 12[sp] */ - /* st.w r11, 16[sp] */ - /* stsr ctpc, r10 */ - /* st.w r10, 20[sp] */ - /* stsr ctpsw, r10 */ - /* st.w r10, 24[sp] */ - st.w ep, 0[sp] - st.w gp, 4[sp] - st.w r1, 8[sp] - mov hilo(__ep),ep - mov hilo(__gp),gp - ctret - - .call_table_text - /* Restore saved registers, deallocate stack and return from the interrupt. */ - /* Called via: callt ctoff(__callt_restore_interrupt). */ - .align 2 - .globl __return_interrupt - .type __return_interrupt,@function -.L_return_interrupt: - ld.w 24[sp], r1 - ldsr r1, ctpsw - ld.w 20[sp], r1 - ldsr r1, ctpc - ld.w 16[sp], r11 - ld.w 12[sp], r10 - ld.w 8[sp], r1 - ld.w 4[sp], gp - ld.w 0[sp], ep - addi 28, sp, sp - reti - - /* Place the offsets of the start of these routines into the call table. */ - .call_table_data - - .global __callt_save_interrupt - .type __callt_save_interrupt,@function -__callt_save_interrupt: .short ctoff(.L_save_interrupt) - - .global __callt_return_interrupt - .type __callt_return_interrupt,@function -__callt_return_interrupt: .short ctoff(.L_return_interrupt) - -#endif /* L_callt_save_interrupt */ - -#ifdef L_callt_save_all_interrupt - /* Put these functions into the call table area. */ - .call_table_text - - /* Save all registers except for those saved in __save_interrupt. */ - /* Allocate enough stack for all of the registers & 16 bytes of space. */ - /* Called via: callt ctoff(__callt_save_all_interrupt). */ - .align 2 -.L_save_all_interrupt: - addi -60, sp, sp -#ifdef __EP__ - mov ep, r1 - mov sp, ep - sst.w r2, 56[ep] - sst.w r5, 52[ep] - sst.w r6, 48[ep] - sst.w r7, 44[ep] - sst.w r8, 40[ep] - sst.w r9, 36[ep] - sst.w r11, 32[ep] - sst.w r12, 28[ep] - sst.w r13, 24[ep] - sst.w r14, 20[ep] - sst.w r15, 16[ep] - sst.w r16, 12[ep] - sst.w r17, 8[ep] - sst.w r18, 4[ep] - sst.w r19, 0[ep] - mov r1, ep -#else - st.w r2, 56[sp] - st.w r5, 52[sp] - st.w r6, 48[sp] - st.w r7, 44[sp] - st.w r8, 40[sp] - st.w r9, 36[sp] - st.w r11, 32[sp] - st.w r12, 28[sp] - st.w r13, 24[sp] - st.w r14, 20[sp] - st.w r15, 16[sp] - st.w r16, 12[sp] - st.w r17, 8[sp] - st.w r18, 4[sp] - st.w r19, 0[sp] -#endif - prepare {r20 - r29, r31}, 0 - ctret - - /* Restore all registers saved in __save_all_interrupt - deallocate the stack space. */ - /* Called via: callt ctoff(__callt_restore_all_interrupt). */ - .align 2 -.L_restore_all_interrupt: - dispose 0, {r20 - r29, r31} -#ifdef __EP__ - mov ep, r1 - mov sp, ep - sld.w 0 [ep], r19 - sld.w 4 [ep], r18 - sld.w 8 [ep], r17 - sld.w 12[ep], r16 - sld.w 16[ep], r15 - sld.w 20[ep], r14 - sld.w 24[ep], r13 - sld.w 28[ep], r12 - sld.w 32[ep], r11 - sld.w 36[ep], r9 - sld.w 40[ep], r8 - sld.w 44[ep], r7 - sld.w 48[ep], r6 - sld.w 52[ep], r5 - sld.w 56[ep], r2 - mov r1, ep -#else - ld.w 0 [sp], r19 - ld.w 4 [sp], r18 - ld.w 8 [sp], r17 - ld.w 12[sp], r16 - ld.w 16[sp], r15 - ld.w 20[sp], r14 - ld.w 24[sp], r13 - ld.w 28[sp], r12 - ld.w 32[sp], r11 - ld.w 36[sp], r9 - ld.w 40[sp], r8 - ld.w 44[sp], r7 - ld.w 48[sp], r6 - ld.w 52[sp], r5 - ld.w 56[sp], r2 -#endif - addi 60, sp, sp - ctret - - /* Place the offsets of the start of these routines into the call table. */ - .call_table_data - - .global __callt_save_all_interrupt - .type __callt_save_all_interrupt,@function -__callt_save_all_interrupt: .short ctoff(.L_save_all_interrupt) - - .global __callt_restore_all_interrupt - .type __callt_restore_all_interrupt,@function -__callt_restore_all_interrupt: .short ctoff(.L_restore_all_interrupt) - -#endif /* L_callt_save_all_interrupt */ - - -#define MAKE_CALLT_FUNCS( START ) \ - .call_table_text ;\ - .align 2 ;\ - /* Allocate space and save registers START .. r29 on the stack. */ ;\ - /* Called via: callt ctoff(__callt_save_START_r29). */ ;\ -.L_save_##START##_r29: ;\ - prepare { START - r29 }, 0 ;\ - ctret ;\ - ;\ - /* Restore saved registers, deallocate stack and return. */ ;\ - /* Called via: callt ctoff(__return_START_r29). */ ;\ - .align 2 ;\ -.L_return_##START##_r29: ;\ - dispose 0, { START - r29 }, r31 ;\ - ;\ - /* Place the offsets of the start of these funcs into the call table. */;\ - .call_table_data ;\ - ;\ - .global __callt_save_##START##_r29 ;\ - .type __callt_save_##START##_r29,@function ;\ -__callt_save_##START##_r29: .short ctoff(.L_save_##START##_r29 ) ;\ - ;\ - .global __callt_return_##START##_r29 ;\ - .type __callt_return_##START##_r29,@function ;\ -__callt_return_##START##_r29: .short ctoff(.L_return_##START##_r29 ) - - -#define MAKE_CALLT_CFUNCS( START ) \ - .call_table_text ;\ - .align 2 ;\ - /* Allocate space and save registers START .. r31 on the stack. */ ;\ - /* Called via: callt ctoff(__callt_save_START_r31c). */ ;\ -.L_save_##START##_r31c: ;\ - prepare { START - r29, r31}, 0 ;\ - ctret ;\ - ;\ - /* Restore saved registers, deallocate stack and return. */ ;\ - /* Called via: callt ctoff(__return_START_r31c). */ ;\ - .align 2 ;\ -.L_return_##START##_r31c: ;\ - dispose 0, { START - r29, r31}, r31 ;\ - ;\ - /* Place the offsets of the start of these funcs into the call table. */;\ - .call_table_data ;\ - ;\ - .global __callt_save_##START##_r31c ;\ - .type __callt_save_##START##_r31c,@function ;\ -__callt_save_##START##_r31c: .short ctoff(.L_save_##START##_r31c ) ;\ - ;\ - .global __callt_return_##START##_r31c ;\ - .type __callt_return_##START##_r31c,@function ;\ -__callt_return_##START##_r31c: .short ctoff(.L_return_##START##_r31c ) - - -#ifdef L_callt_save_20 - MAKE_CALLT_FUNCS (r20) -#endif -#ifdef L_callt_save_21 - MAKE_CALLT_FUNCS (r21) -#endif -#ifdef L_callt_save_22 - MAKE_CALLT_FUNCS (r22) -#endif -#ifdef L_callt_save_23 - MAKE_CALLT_FUNCS (r23) -#endif -#ifdef L_callt_save_24 - MAKE_CALLT_FUNCS (r24) -#endif -#ifdef L_callt_save_25 - MAKE_CALLT_FUNCS (r25) -#endif -#ifdef L_callt_save_26 - MAKE_CALLT_FUNCS (r26) -#endif -#ifdef L_callt_save_27 - MAKE_CALLT_FUNCS (r27) -#endif -#ifdef L_callt_save_28 - MAKE_CALLT_FUNCS (r28) -#endif -#ifdef L_callt_save_29 - MAKE_CALLT_FUNCS (r29) -#endif - -#ifdef L_callt_save_20c - MAKE_CALLT_CFUNCS (r20) -#endif -#ifdef L_callt_save_21c - MAKE_CALLT_CFUNCS (r21) -#endif -#ifdef L_callt_save_22c - MAKE_CALLT_CFUNCS (r22) -#endif -#ifdef L_callt_save_23c - MAKE_CALLT_CFUNCS (r23) -#endif -#ifdef L_callt_save_24c - MAKE_CALLT_CFUNCS (r24) -#endif -#ifdef L_callt_save_25c - MAKE_CALLT_CFUNCS (r25) -#endif -#ifdef L_callt_save_26c - MAKE_CALLT_CFUNCS (r26) -#endif -#ifdef L_callt_save_27c - MAKE_CALLT_CFUNCS (r27) -#endif -#ifdef L_callt_save_28c - MAKE_CALLT_CFUNCS (r28) -#endif -#ifdef L_callt_save_29c - MAKE_CALLT_CFUNCS (r29) -#endif - - -#ifdef L_callt_save_31c - .call_table_text - .align 2 - /* Allocate space and save register r31 on the stack. */ - /* Called via: callt ctoff(__callt_save_r31c). */ -.L_callt_save_r31c: - prepare {r31}, 0 - ctret - - /* Restore saved registers, deallocate stack and return. */ - /* Called via: callt ctoff(__return_r31c). */ - .align 2 -.L_callt_return_r31c: - dispose 0, {r31}, r31 - - /* Place the offsets of the start of these funcs into the call table. */ - .call_table_data - - .global __callt_save_r31c - .type __callt_save_r31c,@function -__callt_save_r31c: .short ctoff(.L_callt_save_r31c) - - .global __callt_return_r31c - .type __callt_return_r31c,@function -__callt_return_r31c: .short ctoff(.L_callt_return_r31c) -#endif - -#endif /* __v850e__ */ - -/* libgcc2 routines for NEC V850. */ -/* Double Integer Arithmetical Operation. */ - -#ifdef L_negdi2 - .text - .global ___negdi2 - .type ___negdi2, @function -___negdi2: - not r6, r10 - add 1, r10 - setf l, r6 - not r7, r11 - add r6, r11 - jmp [lp] - - .size ___negdi2,.-___negdi2 -#endif - -#ifdef L_cmpdi2 - .text - .global ___cmpdi2 - .type ___cmpdi2,@function -___cmpdi2: - # Signed comparison bitween each high word. - cmp r9, r7 - be .L_cmpdi_cmp_low - setf ge, r10 - setf gt, r6 - add r6, r10 - jmp [lp] -.L_cmpdi_cmp_low: - # Unsigned comparigon bitween each low word. - cmp r8, r6 - setf nl, r10 - setf h, r6 - add r6, r10 - jmp [lp] - .size ___cmpdi2, . - ___cmpdi2 -#endif - -#ifdef L_ucmpdi2 - .text - .global ___ucmpdi2 - .type ___ucmpdi2,@function -___ucmpdi2: - cmp r9, r7 # Check if each high word are same. - bne .L_ucmpdi_check_psw - cmp r8, r6 # Compare the word. -.L_ucmpdi_check_psw: - setf nl, r10 # - setf h, r6 # - add r6, r10 # Add the result of comparison NL and comparison H. - jmp [lp] - .size ___ucmpdi2, . - ___ucmpdi2 -#endif - -#ifdef L_muldi3 - .text - .global ___muldi3 - .type ___muldi3,@function -___muldi3: -#ifdef __v850__ - jarl __save_r26_r31, r10 - addi 16, sp, sp - mov r6, r28 - shr 15, r28 - movea lo(32767), r0, r14 - and r14, r28 - mov r8, r10 - shr 15, r10 - and r14, r10 - mov r6, r19 - shr 30, r19 - mov r7, r12 - shl 2, r12 - or r12, r19 - and r14, r19 - mov r8, r13 - shr 30, r13 - mov r9, r12 - shl 2, r12 - or r12, r13 - and r14, r13 - mov r7, r11 - shr 13, r11 - and r14, r11 - mov r9, r31 - shr 13, r31 - and r14, r31 - mov r7, r29 - shr 28, r29 - and r14, r29 - mov r9, r12 - shr 28, r12 - and r14, r12 - and r14, r6 - and r14, r8 - mov r6, r14 - mulh r8, r14 - mov r6, r16 - mulh r10, r16 - mov r6, r18 - mulh r13, r18 - mov r6, r15 - mulh r31, r15 - mulh r12, r6 - mov r28, r17 - mulh r10, r17 - add -16, sp - mov r28, r12 - mulh r8, r12 - add r17, r18 - mov r28, r17 - mulh r31, r17 - add r12, r16 - mov r28, r12 - mulh r13, r12 - add r17, r6 - mov r19, r17 - add r12, r15 - mov r19, r12 - mulh r8, r12 - mulh r10, r17 - add r12, r18 - mov r19, r12 - mulh r13, r12 - add r17, r15 - mov r11, r13 - mulh r8, r13 - add r12, r6 - mov r11, r12 - mulh r10, r12 - add r13, r15 - mulh r29, r8 - add r12, r6 - mov r16, r13 - shl 15, r13 - add r14, r13 - mov r18, r12 - shl 30, r12 - mov r13, r26 - add r12, r26 - shr 15, r14 - movhi hi(131071), r0, r12 - movea lo(131071), r12, r13 - and r13, r14 - mov r16, r12 - and r13, r12 - add r12, r14 - mov r18, r12 - shl 15, r12 - and r13, r12 - add r12, r14 - shr 17, r14 - shr 17, r16 - add r14, r16 - shl 13, r15 - shr 2, r18 - add r18, r15 - add r15, r16 - mov r16, r27 - add r8, r6 - shl 28, r6 - add r6, r27 - mov r26, r10 - mov r27, r11 - jr __return_r26_r31 -#else /* defined(__v850e__) */ - /* (Ahi << 32 + Alo) * (Bhi << 32 + Blo) */ - /* r7 r6 r9 r8 */ - mov r8, r10 - mulu r7, r8, r0 /* Ahi * Blo */ - mulu r6, r9, r0 /* Alo * Bhi */ - mulu r6, r10, r11 /* Alo * Blo */ - add r8, r11 - add r9, r11 - jmp [r31] -#endif /* defined(__v850e__) */ - .size ___muldi3, . - ___muldi3 -#endif - diff --git a/gcc/config/v850/t-v850 b/gcc/config/v850/t-v850 index fcd3b841e30..7885229e631 100644 --- a/gcc/config/v850/t-v850 +++ b/gcc/config/v850/t-v850 @@ -17,67 +17,6 @@ # along with GCC; see the file COPYING3. If not see # . -LIB1ASMSRC = v850/lib1funcs.asm -LIB1ASMFUNCS = _mulsi3 \ - _divsi3 \ - _udivsi3 \ - _modsi3 \ - _umodsi3 \ - _save_2 \ - _save_20 \ - _save_21 \ - _save_22 \ - _save_23 \ - _save_24 \ - _save_25 \ - _save_26 \ - _save_27 \ - _save_28 \ - _save_29 \ - _save_2c \ - _save_20c \ - _save_21c \ - _save_22c \ - _save_23c \ - _save_24c \ - _save_25c \ - _save_26c \ - _save_27c \ - _save_28c \ - _save_29c \ - _save_31c \ - _save_interrupt \ - _save_all_interrupt \ - _callt_save_20 \ - _callt_save_21 \ - _callt_save_22 \ - _callt_save_23 \ - _callt_save_24 \ - _callt_save_25 \ - _callt_save_26 \ - _callt_save_27 \ - _callt_save_28 \ - _callt_save_29 \ - _callt_save_20c \ - _callt_save_21c \ - _callt_save_22c \ - _callt_save_23c \ - _callt_save_24c \ - _callt_save_25c \ - _callt_save_26c \ - _callt_save_27c \ - _callt_save_28c \ - _callt_save_29c \ - _callt_save_31c \ - _callt_save_interrupt \ - _callt_save_all_interrupt \ - _callt_save_r2_r29 \ - _callt_save_r2_r31 \ - _negdi2 \ - _cmpdi2 \ - _ucmpdi2 \ - _muldi3 - # Create target-specific versions of the libraries MULTILIB_OPTIONS = mv850/mv850e/mv850e2/mv850e2v3 MULTILIB_DIRNAMES = v850 v850e v850e2 v850e2v3 diff --git a/gcc/config/vax/lib1funcs.asm b/gcc/config/vax/lib1funcs.asm deleted file mode 100644 index 1d57b56dad9..00000000000 --- a/gcc/config/vax/lib1funcs.asm +++ /dev/null @@ -1,92 +0,0 @@ -/* Copyright (C) 2009 Free Software Foundation, Inc. - This file is part of GCC. - Contributed by Maciej W. Rozycki . - - This file is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 3, or (at your option) any - later version. - - This file is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#ifdef L_udivsi3 - .text - .globl __udivsi3 - .type __udivsi3, @function -__udivsi3: - .word 0 - movl 8(%ap), %r1 - blss 0f /* Check bit #31 of divisor. */ - movl 4(%ap), %r2 - blss 1f /* Check bit #31 of dividend. */ - - /* Both zero, do a standard division. */ - - divl3 %r1, %r2, %r0 - ret - - /* MSB of divisor set, only 1 or 0 may result. */ -0: - decl %r1 - clrl %r0 - cmpl %r1, 4(%ap) - adwc $0, %r0 - ret - - /* MSB of dividend set, do an extended division. */ -1: - clrl %r3 - ediv %r1, %r2, %r0, %r3 - ret - .size __udivsi3, . - __udivsi3 - .previous -#endif - -#ifdef L_umodsi3 - .text - .globl __umodsi3 - .type __umodsi3, @function -__umodsi3: - .word 0 - movl 8(%ap), %r1 - blss 0f /* Check bit #31 of divisor. */ - movl 4(%ap), %r2 - blss 1f /* Check bit #31 of dividend. */ - - /* Both zero, do a standard division. */ - - divl3 %r1, %r2, %r0 - mull2 %r0, %r1 - subl3 %r1, %r2, %r0 - ret - - /* MSB of divisor set, subtract the divisor at most once. */ -0: - movl 4(%ap), %r2 - clrl %r0 - cmpl %r2, %r1 - sbwc $0, %r0 - bicl2 %r0, %r1 - subl3 %r1, %r2, %r0 - ret - - /* MSB of dividend set, do an extended division. */ -1: - clrl %r3 - ediv %r1, %r2, %r3, %r0 - ret - .size __umodsi3, . - __umodsi3 - .previous -#endif diff --git a/gcc/config/vax/t-linux b/gcc/config/vax/t-linux deleted file mode 100644 index 9af1edb0fab..00000000000 --- a/gcc/config/vax/t-linux +++ /dev/null @@ -1,2 +0,0 @@ -LIB1ASMSRC = vax/lib1funcs.asm -LIB1ASMFUNCS = _udivsi3 _umodsi3 diff --git a/gcc/config/xtensa/ieee754-df.S b/gcc/config/xtensa/ieee754-df.S deleted file mode 100644 index 9b46889bdc2..00000000000 --- a/gcc/config/xtensa/ieee754-df.S +++ /dev/null @@ -1,2388 +0,0 @@ -/* IEEE-754 double-precision functions for Xtensa - Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. - Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public - License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#ifdef __XTENSA_EB__ -#define xh a2 -#define xl a3 -#define yh a4 -#define yl a5 -#else -#define xh a3 -#define xl a2 -#define yh a5 -#define yl a4 -#endif - -/* Warning! The branch displacements for some Xtensa branch instructions - are quite small, and this code has been carefully laid out to keep - branch targets in range. If you change anything, be sure to check that - the assembler is not relaxing anything to branch over a jump. */ - -#ifdef L_negdf2 - - .align 4 - .global __negdf2 - .type __negdf2, @function -__negdf2: - leaf_entry sp, 16 - movi a4, 0x80000000 - xor xh, xh, a4 - leaf_return - -#endif /* L_negdf2 */ - -#ifdef L_addsubdf3 - - /* Addition */ -__adddf3_aux: - - /* Handle NaNs and Infinities. (This code is placed before the - start of the function just to keep it in range of the limited - branch displacements.) */ - -.Ladd_xnan_or_inf: - /* If y is neither Infinity nor NaN, return x. */ - bnall yh, a6, 1f - /* If x is a NaN, return it. Otherwise, return y. */ - slli a7, xh, 12 - or a7, a7, xl - beqz a7, .Ladd_ynan_or_inf -1: leaf_return - -.Ladd_ynan_or_inf: - /* Return y. */ - mov xh, yh - mov xl, yl - leaf_return - -.Ladd_opposite_signs: - /* Operand signs differ. Do a subtraction. */ - slli a7, a6, 11 - xor yh, yh, a7 - j .Lsub_same_sign - - .align 4 - .global __adddf3 - .type __adddf3, @function -__adddf3: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - - /* Check if the two operands have the same sign. */ - xor a7, xh, yh - bltz a7, .Ladd_opposite_signs - -.Ladd_same_sign: - /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ - ball xh, a6, .Ladd_xnan_or_inf - ball yh, a6, .Ladd_ynan_or_inf - - /* Compare the exponents. The smaller operand will be shifted - right by the exponent difference and added to the larger - one. */ - extui a7, xh, 20, 12 - extui a8, yh, 20, 12 - bltu a7, a8, .Ladd_shiftx - -.Ladd_shifty: - /* Check if the smaller (or equal) exponent is zero. */ - bnone yh, a6, .Ladd_yexpzero - - /* Replace yh sign/exponent with 0x001. */ - or yh, yh, a6 - slli yh, yh, 11 - srli yh, yh, 11 - -.Ladd_yexpdiff: - /* Compute the exponent difference. Optimize for difference < 32. */ - sub a10, a7, a8 - bgeui a10, 32, .Ladd_bigshifty - - /* Shift yh/yl right by the exponent difference. Any bits that are - shifted out of yl are saved in a9 for rounding the result. */ - ssr a10 - movi a9, 0 - src a9, yl, a9 - src yl, yh, yl - srl yh, yh - -.Ladd_addy: - /* Do the 64-bit addition. */ - add xl, xl, yl - add xh, xh, yh - bgeu xl, yl, 1f - addi xh, xh, 1 -1: - /* Check if the add overflowed into the exponent. */ - extui a10, xh, 20, 12 - beq a10, a7, .Ladd_round - mov a8, a7 - j .Ladd_carry - -.Ladd_yexpzero: - /* y is a subnormal value. Replace its sign/exponent with zero, - i.e., no implicit "1.0", and increment the apparent exponent - because subnormals behave as if they had the minimum (nonzero) - exponent. Test for the case when both exponents are zero. */ - slli yh, yh, 12 - srli yh, yh, 12 - bnone xh, a6, .Ladd_bothexpzero - addi a8, a8, 1 - j .Ladd_yexpdiff - -.Ladd_bothexpzero: - /* Both exponents are zero. Handle this as a special case. There - is no need to shift or round, and the normal code for handling - a carry into the exponent field will not work because it - assumes there is an implicit "1.0" that needs to be added. */ - add xl, xl, yl - add xh, xh, yh - bgeu xl, yl, 1f - addi xh, xh, 1 -1: leaf_return - -.Ladd_bigshifty: - /* Exponent difference > 64 -- just return the bigger value. */ - bgeui a10, 64, 1b - - /* Shift yh/yl right by the exponent difference. Any bits that are - shifted out are saved in a9 for rounding the result. */ - ssr a10 - sll a11, yl /* lost bits shifted out of yl */ - src a9, yh, yl - srl yl, yh - movi yh, 0 - beqz a11, .Ladd_addy - or a9, a9, a10 /* any positive, nonzero value will work */ - j .Ladd_addy - -.Ladd_xexpzero: - /* Same as "yexpzero" except skip handling the case when both - exponents are zero. */ - slli xh, xh, 12 - srli xh, xh, 12 - addi a7, a7, 1 - j .Ladd_xexpdiff - -.Ladd_shiftx: - /* Same thing as the "shifty" code, but with x and y swapped. Also, - because the exponent difference is always nonzero in this version, - the shift sequence can use SLL and skip loading a constant zero. */ - bnone xh, a6, .Ladd_xexpzero - - or xh, xh, a6 - slli xh, xh, 11 - srli xh, xh, 11 - -.Ladd_xexpdiff: - sub a10, a8, a7 - bgeui a10, 32, .Ladd_bigshiftx - - ssr a10 - sll a9, xl - src xl, xh, xl - srl xh, xh - -.Ladd_addx: - add xl, xl, yl - add xh, xh, yh - bgeu xl, yl, 1f - addi xh, xh, 1 -1: - /* Check if the add overflowed into the exponent. */ - extui a10, xh, 20, 12 - bne a10, a8, .Ladd_carry - -.Ladd_round: - /* Round up if the leftover fraction is >= 1/2. */ - bgez a9, 1f - addi xl, xl, 1 - beqz xl, .Ladd_roundcarry - - /* Check if the leftover fraction is exactly 1/2. */ - slli a9, a9, 1 - beqz a9, .Ladd_exactlyhalf -1: leaf_return - -.Ladd_bigshiftx: - /* Mostly the same thing as "bigshifty".... */ - bgeui a10, 64, .Ladd_returny - - ssr a10 - sll a11, xl - src a9, xh, xl - srl xl, xh - movi xh, 0 - beqz a11, .Ladd_addx - or a9, a9, a10 - j .Ladd_addx - -.Ladd_returny: - mov xh, yh - mov xl, yl - leaf_return - -.Ladd_carry: - /* The addition has overflowed into the exponent field, so the - value needs to be renormalized. The mantissa of the result - can be recovered by subtracting the original exponent and - adding 0x100000 (which is the explicit "1.0" for the - mantissa of the non-shifted operand -- the "1.0" for the - shifted operand was already added). The mantissa can then - be shifted right by one bit. The explicit "1.0" of the - shifted mantissa then needs to be replaced by the exponent, - incremented by one to account for the normalizing shift. - It is faster to combine these operations: do the shift first - and combine the additions and subtractions. If x is the - original exponent, the result is: - shifted mantissa - (x << 19) + (1 << 19) + (x << 20) - or: - shifted mantissa + ((x + 1) << 19) - Note that the exponent is incremented here by leaving the - explicit "1.0" of the mantissa in the exponent field. */ - - /* Shift xh/xl right by one bit. Save the lsb of xl. */ - mov a10, xl - ssai 1 - src xl, xh, xl - srl xh, xh - - /* See explanation above. The original exponent is in a8. */ - addi a8, a8, 1 - slli a8, a8, 19 - add xh, xh, a8 - - /* Return an Infinity if the exponent overflowed. */ - ball xh, a6, .Ladd_infinity - - /* Same thing as the "round" code except the msb of the leftover - fraction is bit 0 of a10, with the rest of the fraction in a9. */ - bbci.l a10, 0, 1f - addi xl, xl, 1 - beqz xl, .Ladd_roundcarry - beqz a9, .Ladd_exactlyhalf -1: leaf_return - -.Ladd_infinity: - /* Clear the mantissa. */ - movi xl, 0 - srli xh, xh, 20 - slli xh, xh, 20 - - /* The sign bit may have been lost in a carry-out. Put it back. */ - slli a8, a8, 1 - or xh, xh, a8 - leaf_return - -.Ladd_exactlyhalf: - /* Round down to the nearest even value. */ - srli xl, xl, 1 - slli xl, xl, 1 - leaf_return - -.Ladd_roundcarry: - /* xl is always zero when the rounding increment overflows, so - there's no need to round it to an even value. */ - addi xh, xh, 1 - /* Overflow to the exponent is OK. */ - leaf_return - - - /* Subtraction */ -__subdf3_aux: - - /* Handle NaNs and Infinities. (This code is placed before the - start of the function just to keep it in range of the limited - branch displacements.) */ - -.Lsub_xnan_or_inf: - /* If y is neither Infinity nor NaN, return x. */ - bnall yh, a6, 1f - /* Both x and y are either NaN or Inf, so the result is NaN. */ - movi a4, 0x80000 /* make it a quiet NaN */ - or xh, xh, a4 -1: leaf_return - -.Lsub_ynan_or_inf: - /* Negate y and return it. */ - slli a7, a6, 11 - xor xh, yh, a7 - mov xl, yl - leaf_return - -.Lsub_opposite_signs: - /* Operand signs differ. Do an addition. */ - slli a7, a6, 11 - xor yh, yh, a7 - j .Ladd_same_sign - - .align 4 - .global __subdf3 - .type __subdf3, @function -__subdf3: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - - /* Check if the two operands have the same sign. */ - xor a7, xh, yh - bltz a7, .Lsub_opposite_signs - -.Lsub_same_sign: - /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ - ball xh, a6, .Lsub_xnan_or_inf - ball yh, a6, .Lsub_ynan_or_inf - - /* Compare the operands. In contrast to addition, the entire - value matters here. */ - extui a7, xh, 20, 11 - extui a8, yh, 20, 11 - bltu xh, yh, .Lsub_xsmaller - beq xh, yh, .Lsub_compare_low - -.Lsub_ysmaller: - /* Check if the smaller (or equal) exponent is zero. */ - bnone yh, a6, .Lsub_yexpzero - - /* Replace yh sign/exponent with 0x001. */ - or yh, yh, a6 - slli yh, yh, 11 - srli yh, yh, 11 - -.Lsub_yexpdiff: - /* Compute the exponent difference. Optimize for difference < 32. */ - sub a10, a7, a8 - bgeui a10, 32, .Lsub_bigshifty - - /* Shift yh/yl right by the exponent difference. Any bits that are - shifted out of yl are saved in a9 for rounding the result. */ - ssr a10 - movi a9, 0 - src a9, yl, a9 - src yl, yh, yl - srl yh, yh - -.Lsub_suby: - /* Do the 64-bit subtraction. */ - sub xh, xh, yh - bgeu xl, yl, 1f - addi xh, xh, -1 -1: sub xl, xl, yl - - /* Subtract the leftover bits in a9 from zero and propagate any - borrow from xh/xl. */ - neg a9, a9 - beqz a9, 1f - addi a5, xh, -1 - moveqz xh, a5, xl - addi xl, xl, -1 -1: - /* Check if the subtract underflowed into the exponent. */ - extui a10, xh, 20, 11 - beq a10, a7, .Lsub_round - j .Lsub_borrow - -.Lsub_compare_low: - /* The high words are equal. Compare the low words. */ - bltu xl, yl, .Lsub_xsmaller - bltu yl, xl, .Lsub_ysmaller - /* The operands are equal. Return 0.0. */ - movi xh, 0 - movi xl, 0 -1: leaf_return - -.Lsub_yexpzero: - /* y is a subnormal value. Replace its sign/exponent with zero, - i.e., no implicit "1.0". Unless x is also a subnormal, increment - y's apparent exponent because subnormals behave as if they had - the minimum (nonzero) exponent. */ - slli yh, yh, 12 - srli yh, yh, 12 - bnone xh, a6, .Lsub_yexpdiff - addi a8, a8, 1 - j .Lsub_yexpdiff - -.Lsub_bigshifty: - /* Exponent difference > 64 -- just return the bigger value. */ - bgeui a10, 64, 1b - - /* Shift yh/yl right by the exponent difference. Any bits that are - shifted out are saved in a9 for rounding the result. */ - ssr a10 - sll a11, yl /* lost bits shifted out of yl */ - src a9, yh, yl - srl yl, yh - movi yh, 0 - beqz a11, .Lsub_suby - or a9, a9, a10 /* any positive, nonzero value will work */ - j .Lsub_suby - -.Lsub_xsmaller: - /* Same thing as the "ysmaller" code, but with x and y swapped and - with y negated. */ - bnone xh, a6, .Lsub_xexpzero - - or xh, xh, a6 - slli xh, xh, 11 - srli xh, xh, 11 - -.Lsub_xexpdiff: - sub a10, a8, a7 - bgeui a10, 32, .Lsub_bigshiftx - - ssr a10 - movi a9, 0 - src a9, xl, a9 - src xl, xh, xl - srl xh, xh - - /* Negate y. */ - slli a11, a6, 11 - xor yh, yh, a11 - -.Lsub_subx: - sub xl, yl, xl - sub xh, yh, xh - bgeu yl, xl, 1f - addi xh, xh, -1 -1: - /* Subtract the leftover bits in a9 from zero and propagate any - borrow from xh/xl. */ - neg a9, a9 - beqz a9, 1f - addi a5, xh, -1 - moveqz xh, a5, xl - addi xl, xl, -1 -1: - /* Check if the subtract underflowed into the exponent. */ - extui a10, xh, 20, 11 - bne a10, a8, .Lsub_borrow - -.Lsub_round: - /* Round up if the leftover fraction is >= 1/2. */ - bgez a9, 1f - addi xl, xl, 1 - beqz xl, .Lsub_roundcarry - - /* Check if the leftover fraction is exactly 1/2. */ - slli a9, a9, 1 - beqz a9, .Lsub_exactlyhalf -1: leaf_return - -.Lsub_xexpzero: - /* Same as "yexpzero". */ - slli xh, xh, 12 - srli xh, xh, 12 - bnone yh, a6, .Lsub_xexpdiff - addi a7, a7, 1 - j .Lsub_xexpdiff - -.Lsub_bigshiftx: - /* Mostly the same thing as "bigshifty", but with the sign bit of the - shifted value set so that the subsequent subtraction flips the - sign of y. */ - bgeui a10, 64, .Lsub_returny - - ssr a10 - sll a11, xl - src a9, xh, xl - srl xl, xh - slli xh, a6, 11 /* set sign bit of xh */ - beqz a11, .Lsub_subx - or a9, a9, a10 - j .Lsub_subx - -.Lsub_returny: - /* Negate and return y. */ - slli a7, a6, 11 - xor xh, yh, a7 - mov xl, yl - leaf_return - -.Lsub_borrow: - /* The subtraction has underflowed into the exponent field, so the - value needs to be renormalized. Shift the mantissa left as - needed to remove any leading zeros and adjust the exponent - accordingly. If the exponent is not large enough to remove - all the leading zeros, the result will be a subnormal value. */ - - slli a8, xh, 12 - beqz a8, .Lsub_xhzero - do_nsau a6, a8, a7, a11 - srli a8, a8, 12 - bge a6, a10, .Lsub_subnormal - addi a6, a6, 1 - -.Lsub_shift_lt32: - /* Shift the mantissa (a8/xl/a9) left by a6. */ - ssl a6 - src a8, a8, xl - src xl, xl, a9 - sll a9, a9 - - /* Combine the shifted mantissa with the sign and exponent, - decrementing the exponent by a6. (The exponent has already - been decremented by one due to the borrow from the subtraction, - but adding the mantissa will increment the exponent by one.) */ - srli xh, xh, 20 - sub xh, xh, a6 - slli xh, xh, 20 - add xh, xh, a8 - j .Lsub_round - -.Lsub_exactlyhalf: - /* Round down to the nearest even value. */ - srli xl, xl, 1 - slli xl, xl, 1 - leaf_return - -.Lsub_roundcarry: - /* xl is always zero when the rounding increment overflows, so - there's no need to round it to an even value. */ - addi xh, xh, 1 - /* Overflow to the exponent is OK. */ - leaf_return - -.Lsub_xhzero: - /* When normalizing the result, all the mantissa bits in the high - word are zero. Shift by "20 + (leading zero count of xl) + 1". */ - do_nsau a6, xl, a7, a11 - addi a6, a6, 21 - blt a10, a6, .Lsub_subnormal - -.Lsub_normalize_shift: - bltui a6, 32, .Lsub_shift_lt32 - - ssl a6 - src a8, xl, a9 - sll xl, a9 - movi a9, 0 - - srli xh, xh, 20 - sub xh, xh, a6 - slli xh, xh, 20 - add xh, xh, a8 - j .Lsub_round - -.Lsub_subnormal: - /* The exponent is too small to shift away all the leading zeros. - Set a6 to the current exponent (which has already been - decremented by the borrow) so that the exponent of the result - will be zero. Do not add 1 to a6 in this case, because: (1) - adding the mantissa will not increment the exponent, so there is - no need to subtract anything extra from the exponent to - compensate, and (2) the effective exponent of a subnormal is 1 - not 0 so the shift amount must be 1 smaller than normal. */ - mov a6, a10 - j .Lsub_normalize_shift - -#endif /* L_addsubdf3 */ - -#ifdef L_muldf3 - - /* Multiplication */ -#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 -#define XCHAL_NO_MUL 1 -#endif - -__muldf3_aux: - - /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). - (This code is placed before the start of the function just to - keep it in range of the limited branch displacements.) */ - -.Lmul_xexpzero: - /* Clear the sign bit of x. */ - slli xh, xh, 1 - srli xh, xh, 1 - - /* If x is zero, return zero. */ - or a10, xh, xl - beqz a10, .Lmul_return_zero - - /* Normalize x. Adjust the exponent in a8. */ - beqz xh, .Lmul_xh_zero - do_nsau a10, xh, a11, a12 - addi a10, a10, -11 - ssl a10 - src xh, xh, xl - sll xl, xl - movi a8, 1 - sub a8, a8, a10 - j .Lmul_xnormalized -.Lmul_xh_zero: - do_nsau a10, xl, a11, a12 - addi a10, a10, -11 - movi a8, -31 - sub a8, a8, a10 - ssl a10 - bltz a10, .Lmul_xl_srl - sll xh, xl - movi xl, 0 - j .Lmul_xnormalized -.Lmul_xl_srl: - srl xh, xl - sll xl, xl - j .Lmul_xnormalized - -.Lmul_yexpzero: - /* Clear the sign bit of y. */ - slli yh, yh, 1 - srli yh, yh, 1 - - /* If y is zero, return zero. */ - or a10, yh, yl - beqz a10, .Lmul_return_zero - - /* Normalize y. Adjust the exponent in a9. */ - beqz yh, .Lmul_yh_zero - do_nsau a10, yh, a11, a12 - addi a10, a10, -11 - ssl a10 - src yh, yh, yl - sll yl, yl - movi a9, 1 - sub a9, a9, a10 - j .Lmul_ynormalized -.Lmul_yh_zero: - do_nsau a10, yl, a11, a12 - addi a10, a10, -11 - movi a9, -31 - sub a9, a9, a10 - ssl a10 - bltz a10, .Lmul_yl_srl - sll yh, yl - movi yl, 0 - j .Lmul_ynormalized -.Lmul_yl_srl: - srl yh, yl - sll yl, yl - j .Lmul_ynormalized - -.Lmul_return_zero: - /* Return zero with the appropriate sign bit. */ - srli xh, a7, 31 - slli xh, xh, 31 - movi xl, 0 - j .Lmul_done - -.Lmul_xnan_or_inf: - /* If y is zero, return NaN. */ - bnez yl, 1f - slli a8, yh, 1 - bnez a8, 1f - movi a4, 0x80000 /* make it a quiet NaN */ - or xh, xh, a4 - j .Lmul_done -1: - /* If y is NaN, return y. */ - bnall yh, a6, .Lmul_returnx - slli a8, yh, 12 - or a8, a8, yl - beqz a8, .Lmul_returnx - -.Lmul_returny: - mov xh, yh - mov xl, yl - -.Lmul_returnx: - /* Set the sign bit and return. */ - extui a7, a7, 31, 1 - slli xh, xh, 1 - ssai 1 - src xh, a7, xh - j .Lmul_done - -.Lmul_ynan_or_inf: - /* If x is zero, return NaN. */ - bnez xl, .Lmul_returny - slli a8, xh, 1 - bnez a8, .Lmul_returny - movi a7, 0x80000 /* make it a quiet NaN */ - or xh, yh, a7 - j .Lmul_done - - .align 4 - .global __muldf3 - .type __muldf3, @function -__muldf3: -#if __XTENSA_CALL0_ABI__ - leaf_entry sp, 32 - addi sp, sp, -32 - s32i a12, sp, 16 - s32i a13, sp, 20 - s32i a14, sp, 24 - s32i a15, sp, 28 -#elif XCHAL_NO_MUL - /* This is not really a leaf function; allocate enough stack space - to allow CALL12s to a helper function. */ - leaf_entry sp, 64 -#else - leaf_entry sp, 32 -#endif - movi a6, 0x7ff00000 - - /* Get the sign of the result. */ - xor a7, xh, yh - - /* Check for NaN and infinity. */ - ball xh, a6, .Lmul_xnan_or_inf - ball yh, a6, .Lmul_ynan_or_inf - - /* Extract the exponents. */ - extui a8, xh, 20, 11 - extui a9, yh, 20, 11 - - beqz a8, .Lmul_xexpzero -.Lmul_xnormalized: - beqz a9, .Lmul_yexpzero -.Lmul_ynormalized: - - /* Add the exponents. */ - add a8, a8, a9 - - /* Replace sign/exponent fields with explicit "1.0". */ - movi a10, 0x1fffff - or xh, xh, a6 - and xh, xh, a10 - or yh, yh, a6 - and yh, yh, a10 - - /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6. - The least-significant word of the result is thrown away except - that if it is nonzero, the lsb of a6 is set to 1. */ -#if XCHAL_HAVE_MUL32_HIGH - - /* Compute a6 with any carry-outs in a10. */ - movi a10, 0 - mull a6, xl, yh - mull a11, xh, yl - add a6, a6, a11 - bgeu a6, a11, 1f - addi a10, a10, 1 -1: - muluh a11, xl, yl - add a6, a6, a11 - bgeu a6, a11, 1f - addi a10, a10, 1 -1: - /* If the low word of the result is nonzero, set the lsb of a6. */ - mull a11, xl, yl - beqz a11, 1f - movi a9, 1 - or a6, a6, a9 -1: - /* Compute xl with any carry-outs in a9. */ - movi a9, 0 - mull a11, xh, yh - add a10, a10, a11 - bgeu a10, a11, 1f - addi a9, a9, 1 -1: - muluh a11, xh, yl - add a10, a10, a11 - bgeu a10, a11, 1f - addi a9, a9, 1 -1: - muluh xl, xl, yh - add xl, xl, a10 - bgeu xl, a10, 1f - addi a9, a9, 1 -1: - /* Compute xh. */ - muluh xh, xh, yh - add xh, xh, a9 - -#else /* ! XCHAL_HAVE_MUL32_HIGH */ - - /* Break the inputs into 16-bit chunks and compute 16 32-bit partial - products. These partial products are: - - 0 xll * yll - - 1 xll * ylh - 2 xlh * yll - - 3 xll * yhl - 4 xlh * ylh - 5 xhl * yll - - 6 xll * yhh - 7 xlh * yhl - 8 xhl * ylh - 9 xhh * yll - - 10 xlh * yhh - 11 xhl * yhl - 12 xhh * ylh - - 13 xhl * yhh - 14 xhh * yhl - - 15 xhh * yhh - - where the input chunks are (hh, hl, lh, ll). If using the Mul16 - or Mul32 multiplier options, these input chunks must be stored in - separate registers. For Mac16, the UMUL.AA.* opcodes can specify - that the inputs come from either half of the registers, so there - is no need to shift them out ahead of time. If there is no - multiply hardware, the 16-bit chunks can be extracted when setting - up the arguments to the separate multiply function. */ - - /* Save a7 since it is needed to hold a temporary value. */ - s32i a7, sp, 4 -#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL - /* Calling a separate multiply function will clobber a0 and requires - use of a8 as a temporary, so save those values now. (The function - uses a custom ABI so nothing else needs to be saved.) */ - s32i a0, sp, 0 - s32i a8, sp, 8 -#endif - -#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 - -#define xlh a12 -#define ylh a13 -#define xhh a14 -#define yhh a15 - - /* Get the high halves of the inputs into registers. */ - srli xlh, xl, 16 - srli ylh, yl, 16 - srli xhh, xh, 16 - srli yhh, yh, 16 - -#define xll xl -#define yll yl -#define xhl xh -#define yhl yh - -#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 - /* Clear the high halves of the inputs. This does not matter - for MUL16 because the high bits are ignored. */ - extui xl, xl, 0, 16 - extui xh, xh, 0, 16 - extui yl, yl, 0, 16 - extui yh, yh, 0, 16 -#endif -#endif /* MUL16 || MUL32 */ - - -#if XCHAL_HAVE_MUL16 - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - mul16u dst, xreg ## xhalf, yreg ## yhalf - -#elif XCHAL_HAVE_MUL32 - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - mull dst, xreg ## xhalf, yreg ## yhalf - -#elif XCHAL_HAVE_MAC16 - -/* The preprocessor insists on inserting a space when concatenating after - a period in the definition of do_mul below. These macros are a workaround - using underscores instead of periods when doing the concatenation. */ -#define umul_aa_ll umul.aa.ll -#define umul_aa_lh umul.aa.lh -#define umul_aa_hl umul.aa.hl -#define umul_aa_hh umul.aa.hh - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - umul_aa_ ## xhalf ## yhalf xreg, yreg; \ - rsr dst, ACCLO - -#else /* no multiply hardware */ - -#define set_arg_l(dst, src) \ - extui dst, src, 0, 16 -#define set_arg_h(dst, src) \ - srli dst, src, 16 - -#if __XTENSA_CALL0_ABI__ -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - set_arg_ ## xhalf (a13, xreg); \ - set_arg_ ## yhalf (a14, yreg); \ - call0 .Lmul_mulsi3; \ - mov dst, a12 -#else -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - set_arg_ ## xhalf (a14, xreg); \ - set_arg_ ## yhalf (a15, yreg); \ - call12 .Lmul_mulsi3; \ - mov dst, a14 -#endif /* __XTENSA_CALL0_ABI__ */ - -#endif /* no multiply hardware */ - - /* Add pp1 and pp2 into a10 with carry-out in a9. */ - do_mul(a10, xl, l, yl, h) /* pp 1 */ - do_mul(a11, xl, h, yl, l) /* pp 2 */ - movi a9, 0 - add a10, a10, a11 - bgeu a10, a11, 1f - addi a9, a9, 1 -1: - /* Initialize a6 with a9/a10 shifted into position. Note that - this value can be safely incremented without any carry-outs. */ - ssai 16 - src a6, a9, a10 - - /* Compute the low word into a10. */ - do_mul(a11, xl, l, yl, l) /* pp 0 */ - sll a10, a10 - add a10, a10, a11 - bgeu a10, a11, 1f - addi a6, a6, 1 -1: - /* Compute the contributions of pp0-5 to a6, with carry-outs in a9. - This is good enough to determine the low half of a6, so that any - nonzero bits from the low word of the result can be collapsed - into a6, freeing up a register. */ - movi a9, 0 - do_mul(a11, xl, l, yh, l) /* pp 3 */ - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - do_mul(a11, xl, h, yl, h) /* pp 4 */ - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - do_mul(a11, xh, l, yl, l) /* pp 5 */ - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - /* Collapse any nonzero bits from the low word into a6. */ - beqz a10, 1f - movi a11, 1 - or a6, a6, a11 -1: - /* Add pp6-9 into a11 with carry-outs in a10. */ - do_mul(a7, xl, l, yh, h) /* pp 6 */ - do_mul(a11, xh, h, yl, l) /* pp 9 */ - movi a10, 0 - add a11, a11, a7 - bgeu a11, a7, 1f - addi a10, a10, 1 -1: - do_mul(a7, xl, h, yh, l) /* pp 7 */ - add a11, a11, a7 - bgeu a11, a7, 1f - addi a10, a10, 1 -1: - do_mul(a7, xh, l, yl, h) /* pp 8 */ - add a11, a11, a7 - bgeu a11, a7, 1f - addi a10, a10, 1 -1: - /* Shift a10/a11 into position, and add low half of a11 to a6. */ - src a10, a10, a11 - add a10, a10, a9 - sll a11, a11 - add a6, a6, a11 - bgeu a6, a11, 1f - addi a10, a10, 1 -1: - /* Add pp10-12 into xl with carry-outs in a9. */ - movi a9, 0 - do_mul(xl, xl, h, yh, h) /* pp 10 */ - add xl, xl, a10 - bgeu xl, a10, 1f - addi a9, a9, 1 -1: - do_mul(a10, xh, l, yh, l) /* pp 11 */ - add xl, xl, a10 - bgeu xl, a10, 1f - addi a9, a9, 1 -1: - do_mul(a10, xh, h, yl, h) /* pp 12 */ - add xl, xl, a10 - bgeu xl, a10, 1f - addi a9, a9, 1 -1: - /* Add pp13-14 into a11 with carry-outs in a10. */ - do_mul(a11, xh, l, yh, h) /* pp 13 */ - do_mul(a7, xh, h, yh, l) /* pp 14 */ - movi a10, 0 - add a11, a11, a7 - bgeu a11, a7, 1f - addi a10, a10, 1 -1: - /* Shift a10/a11 into position, and add low half of a11 to a6. */ - src a10, a10, a11 - add a10, a10, a9 - sll a11, a11 - add xl, xl, a11 - bgeu xl, a11, 1f - addi a10, a10, 1 -1: - /* Compute xh. */ - do_mul(xh, xh, h, yh, h) /* pp 15 */ - add xh, xh, a10 - - /* Restore values saved on the stack during the multiplication. */ - l32i a7, sp, 4 -#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL - l32i a0, sp, 0 - l32i a8, sp, 8 -#endif -#endif /* ! XCHAL_HAVE_MUL32_HIGH */ - - /* Shift left by 12 bits, unless there was a carry-out from the - multiply, in which case, shift by 11 bits and increment the - exponent. Note: It is convenient to use the constant 0x3ff - instead of 0x400 when removing the extra exponent bias (so that - it is easy to construct 0x7fe for the overflow check). Reverse - the logic here to decrement the exponent sum by one unless there - was a carry-out. */ - movi a4, 11 - srli a5, xh, 21 - 12 - bnez a5, 1f - addi a4, a4, 1 - addi a8, a8, -1 -1: ssl a4 - src xh, xh, xl - src xl, xl, a6 - sll a6, a6 - - /* Subtract the extra bias from the exponent sum (plus one to account - for the explicit "1.0" of the mantissa that will be added to the - exponent in the final result). */ - movi a4, 0x3ff - sub a8, a8, a4 - - /* Check for over/underflow. The value in a8 is one less than the - final exponent, so values in the range 0..7fd are OK here. */ - slli a4, a4, 1 /* 0x7fe */ - bgeu a8, a4, .Lmul_overflow - -.Lmul_round: - /* Round. */ - bgez a6, .Lmul_rounded - addi xl, xl, 1 - beqz xl, .Lmul_roundcarry - slli a6, a6, 1 - beqz a6, .Lmul_exactlyhalf - -.Lmul_rounded: - /* Add the exponent to the mantissa. */ - slli a8, a8, 20 - add xh, xh, a8 - -.Lmul_addsign: - /* Add the sign bit. */ - srli a7, a7, 31 - slli a7, a7, 31 - or xh, xh, a7 - -.Lmul_done: -#if __XTENSA_CALL0_ABI__ - l32i a12, sp, 16 - l32i a13, sp, 20 - l32i a14, sp, 24 - l32i a15, sp, 28 - addi sp, sp, 32 -#endif - leaf_return - -.Lmul_exactlyhalf: - /* Round down to the nearest even value. */ - srli xl, xl, 1 - slli xl, xl, 1 - j .Lmul_rounded - -.Lmul_roundcarry: - /* xl is always zero when the rounding increment overflows, so - there's no need to round it to an even value. */ - addi xh, xh, 1 - /* Overflow is OK -- it will be added to the exponent. */ - j .Lmul_rounded - -.Lmul_overflow: - bltz a8, .Lmul_underflow - /* Return +/- Infinity. */ - addi a8, a4, 1 /* 0x7ff */ - slli xh, a8, 20 - movi xl, 0 - j .Lmul_addsign - -.Lmul_underflow: - /* Create a subnormal value, where the exponent field contains zero, - but the effective exponent is 1. The value of a8 is one less than - the actual exponent, so just negate it to get the shift amount. */ - neg a8, a8 - mov a9, a6 - ssr a8 - bgeui a8, 32, .Lmul_bigshift - - /* Shift xh/xl right. Any bits that are shifted out of xl are saved - in a6 (combined with the shifted-out bits currently in a6) for - rounding the result. */ - sll a6, xl - src xl, xh, xl - srl xh, xh - j 1f - -.Lmul_bigshift: - bgeui a8, 64, .Lmul_flush_to_zero - sll a10, xl /* lost bits shifted out of xl */ - src a6, xh, xl - srl xl, xh - movi xh, 0 - or a9, a9, a10 - - /* Set the exponent to zero. */ -1: movi a8, 0 - - /* Pack any nonzero bits shifted out into a6. */ - beqz a9, .Lmul_round - movi a9, 1 - or a6, a6, a9 - j .Lmul_round - -.Lmul_flush_to_zero: - /* Return zero with the appropriate sign bit. */ - srli xh, a7, 31 - slli xh, xh, 31 - movi xl, 0 - j .Lmul_done - -#if XCHAL_NO_MUL - - /* For Xtensa processors with no multiply hardware, this simplified - version of _mulsi3 is used for multiplying 16-bit chunks of - the floating-point mantissas. When using CALL0, this function - uses a custom ABI: the inputs are passed in a13 and a14, the - result is returned in a12, and a8 and a15 are clobbered. */ - .align 4 -.Lmul_mulsi3: - leaf_entry sp, 16 - .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 - movi \dst, 0 -1: add \tmp1, \src2, \dst - extui \tmp2, \src1, 0, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx2 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 1, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx4 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 2, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx8 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 3, 1 - movnez \dst, \tmp1, \tmp2 - - srli \src1, \src1, 4 - slli \src2, \src2, 4 - bnez \src1, 1b - .endm -#if __XTENSA_CALL0_ABI__ - mul_mulsi3_body a12, a13, a14, a15, a8 -#else - /* The result will be written into a2, so save that argument in a4. */ - mov a4, a2 - mul_mulsi3_body a2, a4, a3, a5, a6 -#endif - leaf_return -#endif /* XCHAL_NO_MUL */ -#endif /* L_muldf3 */ - -#ifdef L_divdf3 - - /* Division */ -__divdf3_aux: - - /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). - (This code is placed before the start of the function just to - keep it in range of the limited branch displacements.) */ - -.Ldiv_yexpzero: - /* Clear the sign bit of y. */ - slli yh, yh, 1 - srli yh, yh, 1 - - /* Check for division by zero. */ - or a10, yh, yl - beqz a10, .Ldiv_yzero - - /* Normalize y. Adjust the exponent in a9. */ - beqz yh, .Ldiv_yh_zero - do_nsau a10, yh, a11, a9 - addi a10, a10, -11 - ssl a10 - src yh, yh, yl - sll yl, yl - movi a9, 1 - sub a9, a9, a10 - j .Ldiv_ynormalized -.Ldiv_yh_zero: - do_nsau a10, yl, a11, a9 - addi a10, a10, -11 - movi a9, -31 - sub a9, a9, a10 - ssl a10 - bltz a10, .Ldiv_yl_srl - sll yh, yl - movi yl, 0 - j .Ldiv_ynormalized -.Ldiv_yl_srl: - srl yh, yl - sll yl, yl - j .Ldiv_ynormalized - -.Ldiv_yzero: - /* y is zero. Return NaN if x is also zero; otherwise, infinity. */ - slli xh, xh, 1 - srli xh, xh, 1 - or xl, xl, xh - srli xh, a7, 31 - slli xh, xh, 31 - or xh, xh, a6 - bnez xl, 1f - movi a4, 0x80000 /* make it a quiet NaN */ - or xh, xh, a4 -1: movi xl, 0 - leaf_return - -.Ldiv_xexpzero: - /* Clear the sign bit of x. */ - slli xh, xh, 1 - srli xh, xh, 1 - - /* If x is zero, return zero. */ - or a10, xh, xl - beqz a10, .Ldiv_return_zero - - /* Normalize x. Adjust the exponent in a8. */ - beqz xh, .Ldiv_xh_zero - do_nsau a10, xh, a11, a8 - addi a10, a10, -11 - ssl a10 - src xh, xh, xl - sll xl, xl - movi a8, 1 - sub a8, a8, a10 - j .Ldiv_xnormalized -.Ldiv_xh_zero: - do_nsau a10, xl, a11, a8 - addi a10, a10, -11 - movi a8, -31 - sub a8, a8, a10 - ssl a10 - bltz a10, .Ldiv_xl_srl - sll xh, xl - movi xl, 0 - j .Ldiv_xnormalized -.Ldiv_xl_srl: - srl xh, xl - sll xl, xl - j .Ldiv_xnormalized - -.Ldiv_return_zero: - /* Return zero with the appropriate sign bit. */ - srli xh, a7, 31 - slli xh, xh, 31 - movi xl, 0 - leaf_return - -.Ldiv_xnan_or_inf: - /* Set the sign bit of the result. */ - srli a7, yh, 31 - slli a7, a7, 31 - xor xh, xh, a7 - /* If y is NaN or Inf, return NaN. */ - bnall yh, a6, 1f - movi a4, 0x80000 /* make it a quiet NaN */ - or xh, xh, a4 -1: leaf_return - -.Ldiv_ynan_or_inf: - /* If y is Infinity, return zero. */ - slli a8, yh, 12 - or a8, a8, yl - beqz a8, .Ldiv_return_zero - /* y is NaN; return it. */ - mov xh, yh - mov xl, yl - leaf_return - -.Ldiv_highequal1: - bltu xl, yl, 2f - j 3f - - .align 4 - .global __divdf3 - .type __divdf3, @function -__divdf3: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - - /* Get the sign of the result. */ - xor a7, xh, yh - - /* Check for NaN and infinity. */ - ball xh, a6, .Ldiv_xnan_or_inf - ball yh, a6, .Ldiv_ynan_or_inf - - /* Extract the exponents. */ - extui a8, xh, 20, 11 - extui a9, yh, 20, 11 - - beqz a9, .Ldiv_yexpzero -.Ldiv_ynormalized: - beqz a8, .Ldiv_xexpzero -.Ldiv_xnormalized: - - /* Subtract the exponents. */ - sub a8, a8, a9 - - /* Replace sign/exponent fields with explicit "1.0". */ - movi a10, 0x1fffff - or xh, xh, a6 - and xh, xh, a10 - or yh, yh, a6 - and yh, yh, a10 - - /* Set SAR for left shift by one. */ - ssai (32 - 1) - - /* The first digit of the mantissa division must be a one. - Shift x (and adjust the exponent) as needed to make this true. */ - bltu yh, xh, 3f - beq yh, xh, .Ldiv_highequal1 -2: src xh, xh, xl - sll xl, xl - addi a8, a8, -1 -3: - /* Do the first subtraction and shift. */ - sub xh, xh, yh - bgeu xl, yl, 1f - addi xh, xh, -1 -1: sub xl, xl, yl - src xh, xh, xl - sll xl, xl - - /* Put the quotient into a10/a11. */ - movi a10, 0 - movi a11, 1 - - /* Divide one bit at a time for 52 bits. */ - movi a9, 52 -#if XCHAL_HAVE_LOOPS - loop a9, .Ldiv_loopend -#endif -.Ldiv_loop: - /* Shift the quotient << 1. */ - src a10, a10, a11 - sll a11, a11 - - /* Is this digit a 0 or 1? */ - bltu xh, yh, 3f - beq xh, yh, .Ldiv_highequal2 - - /* Output a 1 and subtract. */ -2: addi a11, a11, 1 - sub xh, xh, yh - bgeu xl, yl, 1f - addi xh, xh, -1 -1: sub xl, xl, yl - - /* Shift the dividend << 1. */ -3: src xh, xh, xl - sll xl, xl - -#if !XCHAL_HAVE_LOOPS - addi a9, a9, -1 - bnez a9, .Ldiv_loop -#endif -.Ldiv_loopend: - - /* Add the exponent bias (less one to account for the explicit "1.0" - of the mantissa that will be added to the exponent in the final - result). */ - movi a9, 0x3fe - add a8, a8, a9 - - /* Check for over/underflow. The value in a8 is one less than the - final exponent, so values in the range 0..7fd are OK here. */ - addmi a9, a9, 0x400 /* 0x7fe */ - bgeu a8, a9, .Ldiv_overflow - -.Ldiv_round: - /* Round. The remainder (<< 1) is in xh/xl. */ - bltu xh, yh, .Ldiv_rounded - beq xh, yh, .Ldiv_highequal3 -.Ldiv_roundup: - addi a11, a11, 1 - beqz a11, .Ldiv_roundcarry - -.Ldiv_rounded: - mov xl, a11 - /* Add the exponent to the mantissa. */ - slli a8, a8, 20 - add xh, a10, a8 - -.Ldiv_addsign: - /* Add the sign bit. */ - srli a7, a7, 31 - slli a7, a7, 31 - or xh, xh, a7 - leaf_return - -.Ldiv_highequal2: - bgeu xl, yl, 2b - j 3b - -.Ldiv_highequal3: - bltu xl, yl, .Ldiv_rounded - bne xl, yl, .Ldiv_roundup - - /* Remainder is exactly half the divisor. Round even. */ - addi a11, a11, 1 - beqz a11, .Ldiv_roundcarry - srli a11, a11, 1 - slli a11, a11, 1 - j .Ldiv_rounded - -.Ldiv_overflow: - bltz a8, .Ldiv_underflow - /* Return +/- Infinity. */ - addi a8, a9, 1 /* 0x7ff */ - slli xh, a8, 20 - movi xl, 0 - j .Ldiv_addsign - -.Ldiv_underflow: - /* Create a subnormal value, where the exponent field contains zero, - but the effective exponent is 1. The value of a8 is one less than - the actual exponent, so just negate it to get the shift amount. */ - neg a8, a8 - ssr a8 - bgeui a8, 32, .Ldiv_bigshift - - /* Shift a10/a11 right. Any bits that are shifted out of a11 are - saved in a6 for rounding the result. */ - sll a6, a11 - src a11, a10, a11 - srl a10, a10 - j 1f - -.Ldiv_bigshift: - bgeui a8, 64, .Ldiv_flush_to_zero - sll a9, a11 /* lost bits shifted out of a11 */ - src a6, a10, a11 - srl a11, a10 - movi a10, 0 - or xl, xl, a9 - - /* Set the exponent to zero. */ -1: movi a8, 0 - - /* Pack any nonzero remainder (in xh/xl) into a6. */ - or xh, xh, xl - beqz xh, 1f - movi a9, 1 - or a6, a6, a9 - - /* Round a10/a11 based on the bits shifted out into a6. */ -1: bgez a6, .Ldiv_rounded - addi a11, a11, 1 - beqz a11, .Ldiv_roundcarry - slli a6, a6, 1 - bnez a6, .Ldiv_rounded - srli a11, a11, 1 - slli a11, a11, 1 - j .Ldiv_rounded - -.Ldiv_roundcarry: - /* a11 is always zero when the rounding increment overflows, so - there's no need to round it to an even value. */ - addi a10, a10, 1 - /* Overflow to the exponent field is OK. */ - j .Ldiv_rounded - -.Ldiv_flush_to_zero: - /* Return zero with the appropriate sign bit. */ - srli xh, a7, 31 - slli xh, xh, 31 - movi xl, 0 - leaf_return - -#endif /* L_divdf3 */ - -#ifdef L_cmpdf2 - - /* Equal and Not Equal */ - - .align 4 - .global __eqdf2 - .global __nedf2 - .set __nedf2, __eqdf2 - .type __eqdf2, @function -__eqdf2: - leaf_entry sp, 16 - bne xl, yl, 2f - bne xh, yh, 4f - - /* The values are equal but NaN != NaN. Check the exponent. */ - movi a6, 0x7ff00000 - ball xh, a6, 3f - - /* Equal. */ - movi a2, 0 - leaf_return - - /* Not equal. */ -2: movi a2, 1 - leaf_return - - /* Check if the mantissas are nonzero. */ -3: slli a7, xh, 12 - or a7, a7, xl - j 5f - - /* Check if x and y are zero with different signs. */ -4: or a7, xh, yh - slli a7, a7, 1 - or a7, a7, xl /* xl == yl here */ - - /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa - or x when exponent(x) = 0x7ff and x == y. */ -5: movi a2, 0 - movi a3, 1 - movnez a2, a3, a7 - leaf_return - - - /* Greater Than */ - - .align 4 - .global __gtdf2 - .type __gtdf2, @function -__gtdf2: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - ball xh, a6, 2f -1: bnall yh, a6, .Lle_cmp - - /* Check if y is a NaN. */ - slli a7, yh, 12 - or a7, a7, yl - beqz a7, .Lle_cmp - movi a2, 0 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, xh, 12 - or a7, a7, xl - beqz a7, 1b - movi a2, 0 - leaf_return - - - /* Less Than or Equal */ - - .align 4 - .global __ledf2 - .type __ledf2, @function -__ledf2: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - ball xh, a6, 2f -1: bnall yh, a6, .Lle_cmp - - /* Check if y is a NaN. */ - slli a7, yh, 12 - or a7, a7, yl - beqz a7, .Lle_cmp - movi a2, 1 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, xh, 12 - or a7, a7, xl - beqz a7, 1b - movi a2, 1 - leaf_return - -.Lle_cmp: - /* Check if x and y have different signs. */ - xor a7, xh, yh - bltz a7, .Lle_diff_signs - - /* Check if x is negative. */ - bltz xh, .Lle_xneg - - /* Check if x <= y. */ - bltu xh, yh, 4f - bne xh, yh, 5f - bltu yl, xl, 5f -4: movi a2, 0 - leaf_return - -.Lle_xneg: - /* Check if y <= x. */ - bltu yh, xh, 4b - bne yh, xh, 5f - bgeu xl, yl, 4b -5: movi a2, 1 - leaf_return - -.Lle_diff_signs: - bltz xh, 4b - - /* Check if both x and y are zero. */ - or a7, xh, yh - slli a7, a7, 1 - or a7, a7, xl - or a7, a7, yl - movi a2, 1 - movi a3, 0 - moveqz a2, a3, a7 - leaf_return - - - /* Greater Than or Equal */ - - .align 4 - .global __gedf2 - .type __gedf2, @function -__gedf2: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - ball xh, a6, 2f -1: bnall yh, a6, .Llt_cmp - - /* Check if y is a NaN. */ - slli a7, yh, 12 - or a7, a7, yl - beqz a7, .Llt_cmp - movi a2, -1 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, xh, 12 - or a7, a7, xl - beqz a7, 1b - movi a2, -1 - leaf_return - - - /* Less Than */ - - .align 4 - .global __ltdf2 - .type __ltdf2, @function -__ltdf2: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - ball xh, a6, 2f -1: bnall yh, a6, .Llt_cmp - - /* Check if y is a NaN. */ - slli a7, yh, 12 - or a7, a7, yl - beqz a7, .Llt_cmp - movi a2, 0 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, xh, 12 - or a7, a7, xl - beqz a7, 1b - movi a2, 0 - leaf_return - -.Llt_cmp: - /* Check if x and y have different signs. */ - xor a7, xh, yh - bltz a7, .Llt_diff_signs - - /* Check if x is negative. */ - bltz xh, .Llt_xneg - - /* Check if x < y. */ - bltu xh, yh, 4f - bne xh, yh, 5f - bgeu xl, yl, 5f -4: movi a2, -1 - leaf_return - -.Llt_xneg: - /* Check if y < x. */ - bltu yh, xh, 4b - bne yh, xh, 5f - bltu yl, xl, 4b -5: movi a2, 0 - leaf_return - -.Llt_diff_signs: - bgez xh, 5b - - /* Check if both x and y are nonzero. */ - or a7, xh, yh - slli a7, a7, 1 - or a7, a7, xl - or a7, a7, yl - movi a2, 0 - movi a3, -1 - movnez a2, a3, a7 - leaf_return - - - /* Unordered */ - - .align 4 - .global __unorddf2 - .type __unorddf2, @function -__unorddf2: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - ball xh, a6, 3f -1: ball yh, a6, 4f -2: movi a2, 0 - leaf_return - -3: slli a7, xh, 12 - or a7, a7, xl - beqz a7, 1b - movi a2, 1 - leaf_return - -4: slli a7, yh, 12 - or a7, a7, yl - beqz a7, 2b - movi a2, 1 - leaf_return - -#endif /* L_cmpdf2 */ - -#ifdef L_fixdfsi - - .align 4 - .global __fixdfsi - .type __fixdfsi, @function -__fixdfsi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7ff00000 - ball xh, a6, .Lfixdfsi_nan_or_inf - - /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */ - extui a4, xh, 20, 11 - extui a5, a6, 19, 10 /* 0x3fe */ - sub a4, a4, a5 - bgei a4, 32, .Lfixdfsi_maxint - blti a4, 1, .Lfixdfsi_zero - - /* Add explicit "1.0" and shift << 11. */ - or a7, xh, a6 - ssai (32 - 11) - src a5, a7, xl - - /* Shift back to the right, based on the exponent. */ - ssl a4 /* shift by 32 - a4 */ - srl a5, a5 - - /* Negate the result if sign != 0. */ - neg a2, a5 - movgez a2, a5, a7 - leaf_return - -.Lfixdfsi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, xh, 12 - or a4, a4, xl - beqz a4, .Lfixdfsi_maxint - - /* Translate NaN to +maxint. */ - movi xh, 0 - -.Lfixdfsi_maxint: - slli a4, a6, 11 /* 0x80000000 */ - addi a5, a4, -1 /* 0x7fffffff */ - movgez a4, a5, xh - mov a2, a4 - leaf_return - -.Lfixdfsi_zero: - movi a2, 0 - leaf_return - -#endif /* L_fixdfsi */ - -#ifdef L_fixdfdi - - .align 4 - .global __fixdfdi - .type __fixdfdi, @function -__fixdfdi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7ff00000 - ball xh, a6, .Lfixdfdi_nan_or_inf - - /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */ - extui a4, xh, 20, 11 - extui a5, a6, 19, 10 /* 0x3fe */ - sub a4, a4, a5 - bgei a4, 64, .Lfixdfdi_maxint - blti a4, 1, .Lfixdfdi_zero - - /* Add explicit "1.0" and shift << 11. */ - or a7, xh, a6 - ssai (32 - 11) - src xh, a7, xl - sll xl, xl - - /* Shift back to the right, based on the exponent. */ - ssl a4 /* shift by 64 - a4 */ - bgei a4, 32, .Lfixdfdi_smallshift - srl xl, xh - movi xh, 0 - -.Lfixdfdi_shifted: - /* Negate the result if sign != 0. */ - bgez a7, 1f - neg xl, xl - neg xh, xh - beqz xl, 1f - addi xh, xh, -1 -1: leaf_return - -.Lfixdfdi_smallshift: - src xl, xh, xl - srl xh, xh - j .Lfixdfdi_shifted - -.Lfixdfdi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, xh, 12 - or a4, a4, xl - beqz a4, .Lfixdfdi_maxint - - /* Translate NaN to +maxint. */ - movi xh, 0 - -.Lfixdfdi_maxint: - slli a7, a6, 11 /* 0x80000000 */ - bgez xh, 1f - mov xh, a7 - movi xl, 0 - leaf_return - -1: addi xh, a7, -1 /* 0x7fffffff */ - movi xl, -1 - leaf_return - -.Lfixdfdi_zero: - movi xh, 0 - movi xl, 0 - leaf_return - -#endif /* L_fixdfdi */ - -#ifdef L_fixunsdfsi - - .align 4 - .global __fixunsdfsi - .type __fixunsdfsi, @function -__fixunsdfsi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7ff00000 - ball xh, a6, .Lfixunsdfsi_nan_or_inf - - /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */ - extui a4, xh, 20, 11 - extui a5, a6, 20, 10 /* 0x3ff */ - sub a4, a4, a5 - bgei a4, 32, .Lfixunsdfsi_maxint - bltz a4, .Lfixunsdfsi_zero - - /* Add explicit "1.0" and shift << 11. */ - or a7, xh, a6 - ssai (32 - 11) - src a5, a7, xl - - /* Shift back to the right, based on the exponent. */ - addi a4, a4, 1 - beqi a4, 32, .Lfixunsdfsi_bigexp - ssl a4 /* shift by 32 - a4 */ - srl a5, a5 - - /* Negate the result if sign != 0. */ - neg a2, a5 - movgez a2, a5, a7 - leaf_return - -.Lfixunsdfsi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, xh, 12 - or a4, a4, xl - beqz a4, .Lfixunsdfsi_maxint - - /* Translate NaN to 0xffffffff. */ - movi a2, -1 - leaf_return - -.Lfixunsdfsi_maxint: - slli a4, a6, 11 /* 0x80000000 */ - movi a5, -1 /* 0xffffffff */ - movgez a4, a5, xh - mov a2, a4 - leaf_return - -.Lfixunsdfsi_zero: - movi a2, 0 - leaf_return - -.Lfixunsdfsi_bigexp: - /* Handle unsigned maximum exponent case. */ - bltz xh, 1f - mov a2, a5 /* no shift needed */ - leaf_return - - /* Return 0x80000000 if negative. */ -1: slli a2, a6, 11 - leaf_return - -#endif /* L_fixunsdfsi */ - -#ifdef L_fixunsdfdi - - .align 4 - .global __fixunsdfdi - .type __fixunsdfdi, @function -__fixunsdfdi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7ff00000 - ball xh, a6, .Lfixunsdfdi_nan_or_inf - - /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */ - extui a4, xh, 20, 11 - extui a5, a6, 20, 10 /* 0x3ff */ - sub a4, a4, a5 - bgei a4, 64, .Lfixunsdfdi_maxint - bltz a4, .Lfixunsdfdi_zero - - /* Add explicit "1.0" and shift << 11. */ - or a7, xh, a6 - ssai (32 - 11) - src xh, a7, xl - sll xl, xl - - /* Shift back to the right, based on the exponent. */ - addi a4, a4, 1 - beqi a4, 64, .Lfixunsdfdi_bigexp - ssl a4 /* shift by 64 - a4 */ - bgei a4, 32, .Lfixunsdfdi_smallshift - srl xl, xh - movi xh, 0 - -.Lfixunsdfdi_shifted: - /* Negate the result if sign != 0. */ - bgez a7, 1f - neg xl, xl - neg xh, xh - beqz xl, 1f - addi xh, xh, -1 -1: leaf_return - -.Lfixunsdfdi_smallshift: - src xl, xh, xl - srl xh, xh - j .Lfixunsdfdi_shifted - -.Lfixunsdfdi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, xh, 12 - or a4, a4, xl - beqz a4, .Lfixunsdfdi_maxint - - /* Translate NaN to 0xffffffff.... */ -1: movi xh, -1 - movi xl, -1 - leaf_return - -.Lfixunsdfdi_maxint: - bgez xh, 1b -2: slli xh, a6, 11 /* 0x80000000 */ - movi xl, 0 - leaf_return - -.Lfixunsdfdi_zero: - movi xh, 0 - movi xl, 0 - leaf_return - -.Lfixunsdfdi_bigexp: - /* Handle unsigned maximum exponent case. */ - bltz a7, 2b - leaf_return /* no shift needed */ - -#endif /* L_fixunsdfdi */ - -#ifdef L_floatsidf - - .align 4 - .global __floatunsidf - .type __floatunsidf, @function -__floatunsidf: - leaf_entry sp, 16 - beqz a2, .Lfloatsidf_return_zero - - /* Set the sign to zero and jump to the floatsidf code. */ - movi a7, 0 - j .Lfloatsidf_normalize - - .align 4 - .global __floatsidf - .type __floatsidf, @function -__floatsidf: - leaf_entry sp, 16 - - /* Check for zero. */ - beqz a2, .Lfloatsidf_return_zero - - /* Save the sign. */ - extui a7, a2, 31, 1 - - /* Get the absolute value. */ -#if XCHAL_HAVE_ABS - abs a2, a2 -#else - neg a4, a2 - movltz a2, a4, a2 -#endif - -.Lfloatsidf_normalize: - /* Normalize with the first 1 bit in the msb. */ - do_nsau a4, a2, a5, a6 - ssl a4 - sll a5, a2 - - /* Shift the mantissa into position. */ - srli xh, a5, 11 - slli xl, a5, (32 - 11) - - /* Set the exponent. */ - movi a5, 0x41d /* 0x3fe + 31 */ - sub a5, a5, a4 - slli a5, a5, 20 - add xh, xh, a5 - - /* Add the sign and return. */ - slli a7, a7, 31 - or xh, xh, a7 - leaf_return - -.Lfloatsidf_return_zero: - movi a3, 0 - leaf_return - -#endif /* L_floatsidf */ - -#ifdef L_floatdidf - - .align 4 - .global __floatundidf - .type __floatundidf, @function -__floatundidf: - leaf_entry sp, 16 - - /* Check for zero. */ - or a4, xh, xl - beqz a4, 2f - - /* Set the sign to zero and jump to the floatdidf code. */ - movi a7, 0 - j .Lfloatdidf_normalize - - .align 4 - .global __floatdidf - .type __floatdidf, @function -__floatdidf: - leaf_entry sp, 16 - - /* Check for zero. */ - or a4, xh, xl - beqz a4, 2f - - /* Save the sign. */ - extui a7, xh, 31, 1 - - /* Get the absolute value. */ - bgez xh, .Lfloatdidf_normalize - neg xl, xl - neg xh, xh - beqz xl, .Lfloatdidf_normalize - addi xh, xh, -1 - -.Lfloatdidf_normalize: - /* Normalize with the first 1 bit in the msb of xh. */ - beqz xh, .Lfloatdidf_bigshift - do_nsau a4, xh, a5, a6 - ssl a4 - src xh, xh, xl - sll xl, xl - -.Lfloatdidf_shifted: - /* Shift the mantissa into position, with rounding bits in a6. */ - ssai 11 - sll a6, xl - src xl, xh, xl - srl xh, xh - - /* Set the exponent. */ - movi a5, 0x43d /* 0x3fe + 63 */ - sub a5, a5, a4 - slli a5, a5, 20 - add xh, xh, a5 - - /* Add the sign. */ - slli a7, a7, 31 - or xh, xh, a7 - - /* Round up if the leftover fraction is >= 1/2. */ - bgez a6, 2f - addi xl, xl, 1 - beqz xl, .Lfloatdidf_roundcarry - - /* Check if the leftover fraction is exactly 1/2. */ - slli a6, a6, 1 - beqz a6, .Lfloatdidf_exactlyhalf -2: leaf_return - -.Lfloatdidf_bigshift: - /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ - do_nsau a4, xl, a5, a6 - ssl a4 - sll xh, xl - movi xl, 0 - addi a4, a4, 32 - j .Lfloatdidf_shifted - -.Lfloatdidf_exactlyhalf: - /* Round down to the nearest even value. */ - srli xl, xl, 1 - slli xl, xl, 1 - leaf_return - -.Lfloatdidf_roundcarry: - /* xl is always zero when the rounding increment overflows, so - there's no need to round it to an even value. */ - addi xh, xh, 1 - /* Overflow to the exponent is OK. */ - leaf_return - -#endif /* L_floatdidf */ - -#ifdef L_truncdfsf2 - - .align 4 - .global __truncdfsf2 - .type __truncdfsf2, @function -__truncdfsf2: - leaf_entry sp, 16 - - /* Adjust the exponent bias. */ - movi a4, (0x3ff - 0x7f) << 20 - sub a5, xh, a4 - - /* Check for underflow. */ - xor a6, xh, a5 - bltz a6, .Ltrunc_underflow - extui a6, a5, 20, 11 - beqz a6, .Ltrunc_underflow - - /* Check for overflow. */ - movi a4, 255 - bge a6, a4, .Ltrunc_overflow - - /* Shift a5/xl << 3 into a5/a4. */ - ssai (32 - 3) - src a5, a5, xl - sll a4, xl - -.Ltrunc_addsign: - /* Add the sign bit. */ - extui a6, xh, 31, 1 - slli a6, a6, 31 - or a2, a6, a5 - - /* Round up if the leftover fraction is >= 1/2. */ - bgez a4, 1f - addi a2, a2, 1 - /* Overflow to the exponent is OK. The answer will be correct. */ - - /* Check if the leftover fraction is exactly 1/2. */ - slli a4, a4, 1 - beqz a4, .Ltrunc_exactlyhalf -1: leaf_return - -.Ltrunc_exactlyhalf: - /* Round down to the nearest even value. */ - srli a2, a2, 1 - slli a2, a2, 1 - leaf_return - -.Ltrunc_overflow: - /* Check if exponent == 0x7ff. */ - movi a4, 0x7ff00000 - bnall xh, a4, 1f - - /* Check if mantissa is nonzero. */ - slli a5, xh, 12 - or a5, a5, xl - beqz a5, 1f - - /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */ - srli a4, a4, 1 - -1: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */ - /* Add the sign bit. */ - extui a6, xh, 31, 1 - ssai 1 - src a2, a6, a4 - leaf_return - -.Ltrunc_underflow: - /* Find shift count for a subnormal. Flush to zero if >= 32. */ - extui a6, xh, 20, 11 - movi a5, 0x3ff - 0x7f - sub a6, a5, a6 - addi a6, a6, 1 - bgeui a6, 32, 1f - - /* Replace the exponent with an explicit "1.0". */ - slli a5, a5, 13 /* 0x700000 */ - or a5, a5, xh - slli a5, a5, 11 - srli a5, a5, 11 - - /* Shift the mantissa left by 3 bits (into a5/a4). */ - ssai (32 - 3) - src a5, a5, xl - sll a4, xl - - /* Shift right by a6. */ - ssr a6 - sll a7, a4 - src a4, a5, a4 - srl a5, a5 - beqz a7, .Ltrunc_addsign - or a4, a4, a6 /* any positive, nonzero value will work */ - j .Ltrunc_addsign - - /* Return +/- zero. */ -1: extui a2, xh, 31, 1 - slli a2, a2, 31 - leaf_return - -#endif /* L_truncdfsf2 */ - -#ifdef L_extendsfdf2 - - .align 4 - .global __extendsfdf2 - .type __extendsfdf2, @function -__extendsfdf2: - leaf_entry sp, 16 - - /* Save the sign bit and then shift it off. */ - extui a5, a2, 31, 1 - slli a5, a5, 31 - slli a4, a2, 1 - - /* Extract and check the exponent. */ - extui a6, a2, 23, 8 - beqz a6, .Lextend_expzero - addi a6, a6, 1 - beqi a6, 256, .Lextend_nan_or_inf - - /* Shift >> 3 into a4/xl. */ - srli a4, a4, 4 - slli xl, a2, (32 - 3) - - /* Adjust the exponent bias. */ - movi a6, (0x3ff - 0x7f) << 20 - add a4, a4, a6 - - /* Add the sign bit. */ - or xh, a4, a5 - leaf_return - -.Lextend_nan_or_inf: - movi a4, 0x7ff00000 - - /* Check for NaN. */ - slli a7, a2, 9 - beqz a7, 1f - - slli a6, a6, 11 /* 0x80000 */ - or a4, a4, a6 - - /* Add the sign and return. */ -1: or xh, a4, a5 - movi xl, 0 - leaf_return - -.Lextend_expzero: - beqz a4, 1b - - /* Normalize it to have 8 zero bits before the first 1 bit. */ - do_nsau a7, a4, a2, a3 - addi a7, a7, -8 - ssl a7 - sll a4, a4 - - /* Shift >> 3 into a4/xl. */ - slli xl, a4, (32 - 3) - srli a4, a4, 3 - - /* Set the exponent. */ - movi a6, 0x3fe - 0x7f - sub a6, a6, a7 - slli a6, a6, 20 - add a4, a4, a6 - - /* Add the sign and return. */ - or xh, a4, a5 - leaf_return - -#endif /* L_extendsfdf2 */ - - diff --git a/gcc/config/xtensa/ieee754-sf.S b/gcc/config/xtensa/ieee754-sf.S deleted file mode 100644 index d75be0e5ae5..00000000000 --- a/gcc/config/xtensa/ieee754-sf.S +++ /dev/null @@ -1,1757 +0,0 @@ -/* IEEE-754 single-precision functions for Xtensa - Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. - Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public - License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#ifdef __XTENSA_EB__ -#define xh a2 -#define xl a3 -#define yh a4 -#define yl a5 -#else -#define xh a3 -#define xl a2 -#define yh a5 -#define yl a4 -#endif - -/* Warning! The branch displacements for some Xtensa branch instructions - are quite small, and this code has been carefully laid out to keep - branch targets in range. If you change anything, be sure to check that - the assembler is not relaxing anything to branch over a jump. */ - -#ifdef L_negsf2 - - .align 4 - .global __negsf2 - .type __negsf2, @function -__negsf2: - leaf_entry sp, 16 - movi a4, 0x80000000 - xor a2, a2, a4 - leaf_return - -#endif /* L_negsf2 */ - -#ifdef L_addsubsf3 - - /* Addition */ -__addsf3_aux: - - /* Handle NaNs and Infinities. (This code is placed before the - start of the function just to keep it in range of the limited - branch displacements.) */ - -.Ladd_xnan_or_inf: - /* If y is neither Infinity nor NaN, return x. */ - bnall a3, a6, 1f - /* If x is a NaN, return it. Otherwise, return y. */ - slli a7, a2, 9 - beqz a7, .Ladd_ynan_or_inf -1: leaf_return - -.Ladd_ynan_or_inf: - /* Return y. */ - mov a2, a3 - leaf_return - -.Ladd_opposite_signs: - /* Operand signs differ. Do a subtraction. */ - slli a7, a6, 8 - xor a3, a3, a7 - j .Lsub_same_sign - - .align 4 - .global __addsf3 - .type __addsf3, @function -__addsf3: - leaf_entry sp, 16 - movi a6, 0x7f800000 - - /* Check if the two operands have the same sign. */ - xor a7, a2, a3 - bltz a7, .Ladd_opposite_signs - -.Ladd_same_sign: - /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ - ball a2, a6, .Ladd_xnan_or_inf - ball a3, a6, .Ladd_ynan_or_inf - - /* Compare the exponents. The smaller operand will be shifted - right by the exponent difference and added to the larger - one. */ - extui a7, a2, 23, 9 - extui a8, a3, 23, 9 - bltu a7, a8, .Ladd_shiftx - -.Ladd_shifty: - /* Check if the smaller (or equal) exponent is zero. */ - bnone a3, a6, .Ladd_yexpzero - - /* Replace y sign/exponent with 0x008. */ - or a3, a3, a6 - slli a3, a3, 8 - srli a3, a3, 8 - -.Ladd_yexpdiff: - /* Compute the exponent difference. */ - sub a10, a7, a8 - - /* Exponent difference > 32 -- just return the bigger value. */ - bgeui a10, 32, 1f - - /* Shift y right by the exponent difference. Any bits that are - shifted out of y are saved in a9 for rounding the result. */ - ssr a10 - movi a9, 0 - src a9, a3, a9 - srl a3, a3 - - /* Do the addition. */ - add a2, a2, a3 - - /* Check if the add overflowed into the exponent. */ - extui a10, a2, 23, 9 - beq a10, a7, .Ladd_round - mov a8, a7 - j .Ladd_carry - -.Ladd_yexpzero: - /* y is a subnormal value. Replace its sign/exponent with zero, - i.e., no implicit "1.0", and increment the apparent exponent - because subnormals behave as if they had the minimum (nonzero) - exponent. Test for the case when both exponents are zero. */ - slli a3, a3, 9 - srli a3, a3, 9 - bnone a2, a6, .Ladd_bothexpzero - addi a8, a8, 1 - j .Ladd_yexpdiff - -.Ladd_bothexpzero: - /* Both exponents are zero. Handle this as a special case. There - is no need to shift or round, and the normal code for handling - a carry into the exponent field will not work because it - assumes there is an implicit "1.0" that needs to be added. */ - add a2, a2, a3 -1: leaf_return - -.Ladd_xexpzero: - /* Same as "yexpzero" except skip handling the case when both - exponents are zero. */ - slli a2, a2, 9 - srli a2, a2, 9 - addi a7, a7, 1 - j .Ladd_xexpdiff - -.Ladd_shiftx: - /* Same thing as the "shifty" code, but with x and y swapped. Also, - because the exponent difference is always nonzero in this version, - the shift sequence can use SLL and skip loading a constant zero. */ - bnone a2, a6, .Ladd_xexpzero - - or a2, a2, a6 - slli a2, a2, 8 - srli a2, a2, 8 - -.Ladd_xexpdiff: - sub a10, a8, a7 - bgeui a10, 32, .Ladd_returny - - ssr a10 - sll a9, a2 - srl a2, a2 - - add a2, a2, a3 - - /* Check if the add overflowed into the exponent. */ - extui a10, a2, 23, 9 - bne a10, a8, .Ladd_carry - -.Ladd_round: - /* Round up if the leftover fraction is >= 1/2. */ - bgez a9, 1f - addi a2, a2, 1 - - /* Check if the leftover fraction is exactly 1/2. */ - slli a9, a9, 1 - beqz a9, .Ladd_exactlyhalf -1: leaf_return - -.Ladd_returny: - mov a2, a3 - leaf_return - -.Ladd_carry: - /* The addition has overflowed into the exponent field, so the - value needs to be renormalized. The mantissa of the result - can be recovered by subtracting the original exponent and - adding 0x800000 (which is the explicit "1.0" for the - mantissa of the non-shifted operand -- the "1.0" for the - shifted operand was already added). The mantissa can then - be shifted right by one bit. The explicit "1.0" of the - shifted mantissa then needs to be replaced by the exponent, - incremented by one to account for the normalizing shift. - It is faster to combine these operations: do the shift first - and combine the additions and subtractions. If x is the - original exponent, the result is: - shifted mantissa - (x << 22) + (1 << 22) + (x << 23) - or: - shifted mantissa + ((x + 1) << 22) - Note that the exponent is incremented here by leaving the - explicit "1.0" of the mantissa in the exponent field. */ - - /* Shift x right by one bit. Save the lsb. */ - mov a10, a2 - srli a2, a2, 1 - - /* See explanation above. The original exponent is in a8. */ - addi a8, a8, 1 - slli a8, a8, 22 - add a2, a2, a8 - - /* Return an Infinity if the exponent overflowed. */ - ball a2, a6, .Ladd_infinity - - /* Same thing as the "round" code except the msb of the leftover - fraction is bit 0 of a10, with the rest of the fraction in a9. */ - bbci.l a10, 0, 1f - addi a2, a2, 1 - beqz a9, .Ladd_exactlyhalf -1: leaf_return - -.Ladd_infinity: - /* Clear the mantissa. */ - srli a2, a2, 23 - slli a2, a2, 23 - - /* The sign bit may have been lost in a carry-out. Put it back. */ - slli a8, a8, 1 - or a2, a2, a8 - leaf_return - -.Ladd_exactlyhalf: - /* Round down to the nearest even value. */ - srli a2, a2, 1 - slli a2, a2, 1 - leaf_return - - - /* Subtraction */ -__subsf3_aux: - - /* Handle NaNs and Infinities. (This code is placed before the - start of the function just to keep it in range of the limited - branch displacements.) */ - -.Lsub_xnan_or_inf: - /* If y is neither Infinity nor NaN, return x. */ - bnall a3, a6, 1f - /* Both x and y are either NaN or Inf, so the result is NaN. */ - movi a4, 0x400000 /* make it a quiet NaN */ - or a2, a2, a4 -1: leaf_return - -.Lsub_ynan_or_inf: - /* Negate y and return it. */ - slli a7, a6, 8 - xor a2, a3, a7 - leaf_return - -.Lsub_opposite_signs: - /* Operand signs differ. Do an addition. */ - slli a7, a6, 8 - xor a3, a3, a7 - j .Ladd_same_sign - - .align 4 - .global __subsf3 - .type __subsf3, @function -__subsf3: - leaf_entry sp, 16 - movi a6, 0x7f800000 - - /* Check if the two operands have the same sign. */ - xor a7, a2, a3 - bltz a7, .Lsub_opposite_signs - -.Lsub_same_sign: - /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ - ball a2, a6, .Lsub_xnan_or_inf - ball a3, a6, .Lsub_ynan_or_inf - - /* Compare the operands. In contrast to addition, the entire - value matters here. */ - extui a7, a2, 23, 8 - extui a8, a3, 23, 8 - bltu a2, a3, .Lsub_xsmaller - -.Lsub_ysmaller: - /* Check if the smaller (or equal) exponent is zero. */ - bnone a3, a6, .Lsub_yexpzero - - /* Replace y sign/exponent with 0x008. */ - or a3, a3, a6 - slli a3, a3, 8 - srli a3, a3, 8 - -.Lsub_yexpdiff: - /* Compute the exponent difference. */ - sub a10, a7, a8 - - /* Exponent difference > 32 -- just return the bigger value. */ - bgeui a10, 32, 1f - - /* Shift y right by the exponent difference. Any bits that are - shifted out of y are saved in a9 for rounding the result. */ - ssr a10 - movi a9, 0 - src a9, a3, a9 - srl a3, a3 - - sub a2, a2, a3 - - /* Subtract the leftover bits in a9 from zero and propagate any - borrow from a2. */ - neg a9, a9 - addi a10, a2, -1 - movnez a2, a10, a9 - - /* Check if the subtract underflowed into the exponent. */ - extui a10, a2, 23, 8 - beq a10, a7, .Lsub_round - j .Lsub_borrow - -.Lsub_yexpzero: - /* Return zero if the inputs are equal. (For the non-subnormal - case, subtracting the "1.0" will cause a borrow from the exponent - and this case can be detected when handling the borrow.) */ - beq a2, a3, .Lsub_return_zero - - /* y is a subnormal value. Replace its sign/exponent with zero, - i.e., no implicit "1.0". Unless x is also a subnormal, increment - y's apparent exponent because subnormals behave as if they had - the minimum (nonzero) exponent. */ - slli a3, a3, 9 - srli a3, a3, 9 - bnone a2, a6, .Lsub_yexpdiff - addi a8, a8, 1 - j .Lsub_yexpdiff - -.Lsub_returny: - /* Negate and return y. */ - slli a7, a6, 8 - xor a2, a3, a7 -1: leaf_return - -.Lsub_xsmaller: - /* Same thing as the "ysmaller" code, but with x and y swapped and - with y negated. */ - bnone a2, a6, .Lsub_xexpzero - - or a2, a2, a6 - slli a2, a2, 8 - srli a2, a2, 8 - -.Lsub_xexpdiff: - sub a10, a8, a7 - bgeui a10, 32, .Lsub_returny - - ssr a10 - movi a9, 0 - src a9, a2, a9 - srl a2, a2 - - /* Negate y. */ - slli a11, a6, 8 - xor a3, a3, a11 - - sub a2, a3, a2 - - neg a9, a9 - addi a10, a2, -1 - movnez a2, a10, a9 - - /* Check if the subtract underflowed into the exponent. */ - extui a10, a2, 23, 8 - bne a10, a8, .Lsub_borrow - -.Lsub_round: - /* Round up if the leftover fraction is >= 1/2. */ - bgez a9, 1f - addi a2, a2, 1 - - /* Check if the leftover fraction is exactly 1/2. */ - slli a9, a9, 1 - beqz a9, .Lsub_exactlyhalf -1: leaf_return - -.Lsub_xexpzero: - /* Same as "yexpzero". */ - beq a2, a3, .Lsub_return_zero - slli a2, a2, 9 - srli a2, a2, 9 - bnone a3, a6, .Lsub_xexpdiff - addi a7, a7, 1 - j .Lsub_xexpdiff - -.Lsub_return_zero: - movi a2, 0 - leaf_return - -.Lsub_borrow: - /* The subtraction has underflowed into the exponent field, so the - value needs to be renormalized. Shift the mantissa left as - needed to remove any leading zeros and adjust the exponent - accordingly. If the exponent is not large enough to remove - all the leading zeros, the result will be a subnormal value. */ - - slli a8, a2, 9 - beqz a8, .Lsub_xzero - do_nsau a6, a8, a7, a11 - srli a8, a8, 9 - bge a6, a10, .Lsub_subnormal - addi a6, a6, 1 - -.Lsub_normalize_shift: - /* Shift the mantissa (a8/a9) left by a6. */ - ssl a6 - src a8, a8, a9 - sll a9, a9 - - /* Combine the shifted mantissa with the sign and exponent, - decrementing the exponent by a6. (The exponent has already - been decremented by one due to the borrow from the subtraction, - but adding the mantissa will increment the exponent by one.) */ - srli a2, a2, 23 - sub a2, a2, a6 - slli a2, a2, 23 - add a2, a2, a8 - j .Lsub_round - -.Lsub_exactlyhalf: - /* Round down to the nearest even value. */ - srli a2, a2, 1 - slli a2, a2, 1 - leaf_return - -.Lsub_xzero: - /* If there was a borrow from the exponent, and the mantissa and - guard digits are all zero, then the inputs were equal and the - result should be zero. */ - beqz a9, .Lsub_return_zero - - /* Only the guard digit is nonzero. Shift by min(24, a10). */ - addi a11, a10, -24 - movi a6, 24 - movltz a6, a10, a11 - j .Lsub_normalize_shift - -.Lsub_subnormal: - /* The exponent is too small to shift away all the leading zeros. - Set a6 to the current exponent (which has already been - decremented by the borrow) so that the exponent of the result - will be zero. Do not add 1 to a6 in this case, because: (1) - adding the mantissa will not increment the exponent, so there is - no need to subtract anything extra from the exponent to - compensate, and (2) the effective exponent of a subnormal is 1 - not 0 so the shift amount must be 1 smaller than normal. */ - mov a6, a10 - j .Lsub_normalize_shift - -#endif /* L_addsubsf3 */ - -#ifdef L_mulsf3 - - /* Multiplication */ -#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 -#define XCHAL_NO_MUL 1 -#endif - -__mulsf3_aux: - - /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). - (This code is placed before the start of the function just to - keep it in range of the limited branch displacements.) */ - -.Lmul_xexpzero: - /* Clear the sign bit of x. */ - slli a2, a2, 1 - srli a2, a2, 1 - - /* If x is zero, return zero. */ - beqz a2, .Lmul_return_zero - - /* Normalize x. Adjust the exponent in a8. */ - do_nsau a10, a2, a11, a12 - addi a10, a10, -8 - ssl a10 - sll a2, a2 - movi a8, 1 - sub a8, a8, a10 - j .Lmul_xnormalized - -.Lmul_yexpzero: - /* Clear the sign bit of y. */ - slli a3, a3, 1 - srli a3, a3, 1 - - /* If y is zero, return zero. */ - beqz a3, .Lmul_return_zero - - /* Normalize y. Adjust the exponent in a9. */ - do_nsau a10, a3, a11, a12 - addi a10, a10, -8 - ssl a10 - sll a3, a3 - movi a9, 1 - sub a9, a9, a10 - j .Lmul_ynormalized - -.Lmul_return_zero: - /* Return zero with the appropriate sign bit. */ - srli a2, a7, 31 - slli a2, a2, 31 - j .Lmul_done - -.Lmul_xnan_or_inf: - /* If y is zero, return NaN. */ - slli a8, a3, 1 - bnez a8, 1f - movi a4, 0x400000 /* make it a quiet NaN */ - or a2, a2, a4 - j .Lmul_done -1: - /* If y is NaN, return y. */ - bnall a3, a6, .Lmul_returnx - slli a8, a3, 9 - beqz a8, .Lmul_returnx - -.Lmul_returny: - mov a2, a3 - -.Lmul_returnx: - /* Set the sign bit and return. */ - extui a7, a7, 31, 1 - slli a2, a2, 1 - ssai 1 - src a2, a7, a2 - j .Lmul_done - -.Lmul_ynan_or_inf: - /* If x is zero, return NaN. */ - slli a8, a2, 1 - bnez a8, .Lmul_returny - movi a7, 0x400000 /* make it a quiet NaN */ - or a2, a3, a7 - j .Lmul_done - - .align 4 - .global __mulsf3 - .type __mulsf3, @function -__mulsf3: -#if __XTENSA_CALL0_ABI__ - leaf_entry sp, 32 - addi sp, sp, -32 - s32i a12, sp, 16 - s32i a13, sp, 20 - s32i a14, sp, 24 - s32i a15, sp, 28 -#elif XCHAL_NO_MUL - /* This is not really a leaf function; allocate enough stack space - to allow CALL12s to a helper function. */ - leaf_entry sp, 64 -#else - leaf_entry sp, 32 -#endif - movi a6, 0x7f800000 - - /* Get the sign of the result. */ - xor a7, a2, a3 - - /* Check for NaN and infinity. */ - ball a2, a6, .Lmul_xnan_or_inf - ball a3, a6, .Lmul_ynan_or_inf - - /* Extract the exponents. */ - extui a8, a2, 23, 8 - extui a9, a3, 23, 8 - - beqz a8, .Lmul_xexpzero -.Lmul_xnormalized: - beqz a9, .Lmul_yexpzero -.Lmul_ynormalized: - - /* Add the exponents. */ - add a8, a8, a9 - - /* Replace sign/exponent fields with explicit "1.0". */ - movi a10, 0xffffff - or a2, a2, a6 - and a2, a2, a10 - or a3, a3, a6 - and a3, a3, a10 - - /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */ - -#if XCHAL_HAVE_MUL32_HIGH - - mull a6, a2, a3 - muluh a2, a2, a3 - -#else - - /* Break the inputs into 16-bit chunks and compute 4 32-bit partial - products. These partial products are: - - 0 xl * yl - - 1 xl * yh - 2 xh * yl - - 3 xh * yh - - If using the Mul16 or Mul32 multiplier options, these input - chunks must be stored in separate registers. For Mac16, the - UMUL.AA.* opcodes can specify that the inputs come from either - half of the registers, so there is no need to shift them out - ahead of time. If there is no multiply hardware, the 16-bit - chunks can be extracted when setting up the arguments to the - separate multiply function. */ - -#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL - /* Calling a separate multiply function will clobber a0 and requires - use of a8 as a temporary, so save those values now. (The function - uses a custom ABI so nothing else needs to be saved.) */ - s32i a0, sp, 0 - s32i a8, sp, 4 -#endif - -#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 - -#define a2h a4 -#define a3h a5 - - /* Get the high halves of the inputs into registers. */ - srli a2h, a2, 16 - srli a3h, a3, 16 - -#define a2l a2 -#define a3l a3 - -#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 - /* Clear the high halves of the inputs. This does not matter - for MUL16 because the high bits are ignored. */ - extui a2, a2, 0, 16 - extui a3, a3, 0, 16 -#endif -#endif /* MUL16 || MUL32 */ - - -#if XCHAL_HAVE_MUL16 - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - mul16u dst, xreg ## xhalf, yreg ## yhalf - -#elif XCHAL_HAVE_MUL32 - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - mull dst, xreg ## xhalf, yreg ## yhalf - -#elif XCHAL_HAVE_MAC16 - -/* The preprocessor insists on inserting a space when concatenating after - a period in the definition of do_mul below. These macros are a workaround - using underscores instead of periods when doing the concatenation. */ -#define umul_aa_ll umul.aa.ll -#define umul_aa_lh umul.aa.lh -#define umul_aa_hl umul.aa.hl -#define umul_aa_hh umul.aa.hh - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - umul_aa_ ## xhalf ## yhalf xreg, yreg; \ - rsr dst, ACCLO - -#else /* no multiply hardware */ - -#define set_arg_l(dst, src) \ - extui dst, src, 0, 16 -#define set_arg_h(dst, src) \ - srli dst, src, 16 - -#if __XTENSA_CALL0_ABI__ -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - set_arg_ ## xhalf (a13, xreg); \ - set_arg_ ## yhalf (a14, yreg); \ - call0 .Lmul_mulsi3; \ - mov dst, a12 -#else -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - set_arg_ ## xhalf (a14, xreg); \ - set_arg_ ## yhalf (a15, yreg); \ - call12 .Lmul_mulsi3; \ - mov dst, a14 -#endif /* __XTENSA_CALL0_ABI__ */ - -#endif /* no multiply hardware */ - - /* Add pp1 and pp2 into a6 with carry-out in a9. */ - do_mul(a6, a2, l, a3, h) /* pp 1 */ - do_mul(a11, a2, h, a3, l) /* pp 2 */ - movi a9, 0 - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - /* Shift the high half of a9/a6 into position in a9. Note that - this value can be safely incremented without any carry-outs. */ - ssai 16 - src a9, a9, a6 - - /* Compute the low word into a6. */ - do_mul(a11, a2, l, a3, l) /* pp 0 */ - sll a6, a6 - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - /* Compute the high word into a2. */ - do_mul(a2, a2, h, a3, h) /* pp 3 */ - add a2, a2, a9 - -#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL - /* Restore values saved on the stack during the multiplication. */ - l32i a0, sp, 0 - l32i a8, sp, 4 -#endif -#endif /* ! XCHAL_HAVE_MUL32_HIGH */ - - /* Shift left by 9 bits, unless there was a carry-out from the - multiply, in which case, shift by 8 bits and increment the - exponent. */ - movi a4, 9 - srli a5, a2, 24 - 9 - beqz a5, 1f - addi a4, a4, -1 - addi a8, a8, 1 -1: ssl a4 - src a2, a2, a6 - sll a6, a6 - - /* Subtract the extra bias from the exponent sum (plus one to account - for the explicit "1.0" of the mantissa that will be added to the - exponent in the final result). */ - movi a4, 0x80 - sub a8, a8, a4 - - /* Check for over/underflow. The value in a8 is one less than the - final exponent, so values in the range 0..fd are OK here. */ - movi a4, 0xfe - bgeu a8, a4, .Lmul_overflow - -.Lmul_round: - /* Round. */ - bgez a6, .Lmul_rounded - addi a2, a2, 1 - slli a6, a6, 1 - beqz a6, .Lmul_exactlyhalf - -.Lmul_rounded: - /* Add the exponent to the mantissa. */ - slli a8, a8, 23 - add a2, a2, a8 - -.Lmul_addsign: - /* Add the sign bit. */ - srli a7, a7, 31 - slli a7, a7, 31 - or a2, a2, a7 - -.Lmul_done: -#if __XTENSA_CALL0_ABI__ - l32i a12, sp, 16 - l32i a13, sp, 20 - l32i a14, sp, 24 - l32i a15, sp, 28 - addi sp, sp, 32 -#endif - leaf_return - -.Lmul_exactlyhalf: - /* Round down to the nearest even value. */ - srli a2, a2, 1 - slli a2, a2, 1 - j .Lmul_rounded - -.Lmul_overflow: - bltz a8, .Lmul_underflow - /* Return +/- Infinity. */ - movi a8, 0xff - slli a2, a8, 23 - j .Lmul_addsign - -.Lmul_underflow: - /* Create a subnormal value, where the exponent field contains zero, - but the effective exponent is 1. The value of a8 is one less than - the actual exponent, so just negate it to get the shift amount. */ - neg a8, a8 - mov a9, a6 - ssr a8 - bgeui a8, 32, .Lmul_flush_to_zero - - /* Shift a2 right. Any bits that are shifted out of a2 are saved - in a6 (combined with the shifted-out bits currently in a6) for - rounding the result. */ - sll a6, a2 - srl a2, a2 - - /* Set the exponent to zero. */ - movi a8, 0 - - /* Pack any nonzero bits shifted out into a6. */ - beqz a9, .Lmul_round - movi a9, 1 - or a6, a6, a9 - j .Lmul_round - -.Lmul_flush_to_zero: - /* Return zero with the appropriate sign bit. */ - srli a2, a7, 31 - slli a2, a2, 31 - j .Lmul_done - -#if XCHAL_NO_MUL - - /* For Xtensa processors with no multiply hardware, this simplified - version of _mulsi3 is used for multiplying 16-bit chunks of - the floating-point mantissas. When using CALL0, this function - uses a custom ABI: the inputs are passed in a13 and a14, the - result is returned in a12, and a8 and a15 are clobbered. */ - .align 4 -.Lmul_mulsi3: - leaf_entry sp, 16 - .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 - movi \dst, 0 -1: add \tmp1, \src2, \dst - extui \tmp2, \src1, 0, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx2 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 1, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx4 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 2, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx8 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 3, 1 - movnez \dst, \tmp1, \tmp2 - - srli \src1, \src1, 4 - slli \src2, \src2, 4 - bnez \src1, 1b - .endm -#if __XTENSA_CALL0_ABI__ - mul_mulsi3_body a12, a13, a14, a15, a8 -#else - /* The result will be written into a2, so save that argument in a4. */ - mov a4, a2 - mul_mulsi3_body a2, a4, a3, a5, a6 -#endif - leaf_return -#endif /* XCHAL_NO_MUL */ -#endif /* L_mulsf3 */ - -#ifdef L_divsf3 - - /* Division */ -__divsf3_aux: - - /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). - (This code is placed before the start of the function just to - keep it in range of the limited branch displacements.) */ - -.Ldiv_yexpzero: - /* Clear the sign bit of y. */ - slli a3, a3, 1 - srli a3, a3, 1 - - /* Check for division by zero. */ - beqz a3, .Ldiv_yzero - - /* Normalize y. Adjust the exponent in a9. */ - do_nsau a10, a3, a4, a5 - addi a10, a10, -8 - ssl a10 - sll a3, a3 - movi a9, 1 - sub a9, a9, a10 - j .Ldiv_ynormalized - -.Ldiv_yzero: - /* y is zero. Return NaN if x is also zero; otherwise, infinity. */ - slli a4, a2, 1 - srli a4, a4, 1 - srli a2, a7, 31 - slli a2, a2, 31 - or a2, a2, a6 - bnez a4, 1f - movi a4, 0x400000 /* make it a quiet NaN */ - or a2, a2, a4 -1: leaf_return - -.Ldiv_xexpzero: - /* Clear the sign bit of x. */ - slli a2, a2, 1 - srli a2, a2, 1 - - /* If x is zero, return zero. */ - beqz a2, .Ldiv_return_zero - - /* Normalize x. Adjust the exponent in a8. */ - do_nsau a10, a2, a4, a5 - addi a10, a10, -8 - ssl a10 - sll a2, a2 - movi a8, 1 - sub a8, a8, a10 - j .Ldiv_xnormalized - -.Ldiv_return_zero: - /* Return zero with the appropriate sign bit. */ - srli a2, a7, 31 - slli a2, a2, 31 - leaf_return - -.Ldiv_xnan_or_inf: - /* Set the sign bit of the result. */ - srli a7, a3, 31 - slli a7, a7, 31 - xor a2, a2, a7 - /* If y is NaN or Inf, return NaN. */ - bnall a3, a6, 1f - movi a4, 0x400000 /* make it a quiet NaN */ - or a2, a2, a4 -1: leaf_return - -.Ldiv_ynan_or_inf: - /* If y is Infinity, return zero. */ - slli a8, a3, 9 - beqz a8, .Ldiv_return_zero - /* y is NaN; return it. */ - mov a2, a3 - leaf_return - - .align 4 - .global __divsf3 - .type __divsf3, @function -__divsf3: - leaf_entry sp, 16 - movi a6, 0x7f800000 - - /* Get the sign of the result. */ - xor a7, a2, a3 - - /* Check for NaN and infinity. */ - ball a2, a6, .Ldiv_xnan_or_inf - ball a3, a6, .Ldiv_ynan_or_inf - - /* Extract the exponents. */ - extui a8, a2, 23, 8 - extui a9, a3, 23, 8 - - beqz a9, .Ldiv_yexpzero -.Ldiv_ynormalized: - beqz a8, .Ldiv_xexpzero -.Ldiv_xnormalized: - - /* Subtract the exponents. */ - sub a8, a8, a9 - - /* Replace sign/exponent fields with explicit "1.0". */ - movi a10, 0xffffff - or a2, a2, a6 - and a2, a2, a10 - or a3, a3, a6 - and a3, a3, a10 - - /* The first digit of the mantissa division must be a one. - Shift x (and adjust the exponent) as needed to make this true. */ - bltu a3, a2, 1f - slli a2, a2, 1 - addi a8, a8, -1 -1: - /* Do the first subtraction and shift. */ - sub a2, a2, a3 - slli a2, a2, 1 - - /* Put the quotient into a10. */ - movi a10, 1 - - /* Divide one bit at a time for 23 bits. */ - movi a9, 23 -#if XCHAL_HAVE_LOOPS - loop a9, .Ldiv_loopend -#endif -.Ldiv_loop: - /* Shift the quotient << 1. */ - slli a10, a10, 1 - - /* Is this digit a 0 or 1? */ - bltu a2, a3, 1f - - /* Output a 1 and subtract. */ - addi a10, a10, 1 - sub a2, a2, a3 - - /* Shift the dividend << 1. */ -1: slli a2, a2, 1 - -#if !XCHAL_HAVE_LOOPS - addi a9, a9, -1 - bnez a9, .Ldiv_loop -#endif -.Ldiv_loopend: - - /* Add the exponent bias (less one to account for the explicit "1.0" - of the mantissa that will be added to the exponent in the final - result). */ - addi a8, a8, 0x7e - - /* Check for over/underflow. The value in a8 is one less than the - final exponent, so values in the range 0..fd are OK here. */ - movi a4, 0xfe - bgeu a8, a4, .Ldiv_overflow - -.Ldiv_round: - /* Round. The remainder (<< 1) is in a2. */ - bltu a2, a3, .Ldiv_rounded - addi a10, a10, 1 - beq a2, a3, .Ldiv_exactlyhalf - -.Ldiv_rounded: - /* Add the exponent to the mantissa. */ - slli a8, a8, 23 - add a2, a10, a8 - -.Ldiv_addsign: - /* Add the sign bit. */ - srli a7, a7, 31 - slli a7, a7, 31 - or a2, a2, a7 - leaf_return - -.Ldiv_overflow: - bltz a8, .Ldiv_underflow - /* Return +/- Infinity. */ - addi a8, a4, 1 /* 0xff */ - slli a2, a8, 23 - j .Ldiv_addsign - -.Ldiv_exactlyhalf: - /* Remainder is exactly half the divisor. Round even. */ - srli a10, a10, 1 - slli a10, a10, 1 - j .Ldiv_rounded - -.Ldiv_underflow: - /* Create a subnormal value, where the exponent field contains zero, - but the effective exponent is 1. The value of a8 is one less than - the actual exponent, so just negate it to get the shift amount. */ - neg a8, a8 - ssr a8 - bgeui a8, 32, .Ldiv_flush_to_zero - - /* Shift a10 right. Any bits that are shifted out of a10 are - saved in a6 for rounding the result. */ - sll a6, a10 - srl a10, a10 - - /* Set the exponent to zero. */ - movi a8, 0 - - /* Pack any nonzero remainder (in a2) into a6. */ - beqz a2, 1f - movi a9, 1 - or a6, a6, a9 - - /* Round a10 based on the bits shifted out into a6. */ -1: bgez a6, .Ldiv_rounded - addi a10, a10, 1 - slli a6, a6, 1 - bnez a6, .Ldiv_rounded - srli a10, a10, 1 - slli a10, a10, 1 - j .Ldiv_rounded - -.Ldiv_flush_to_zero: - /* Return zero with the appropriate sign bit. */ - srli a2, a7, 31 - slli a2, a2, 31 - leaf_return - -#endif /* L_divsf3 */ - -#ifdef L_cmpsf2 - - /* Equal and Not Equal */ - - .align 4 - .global __eqsf2 - .global __nesf2 - .set __nesf2, __eqsf2 - .type __eqsf2, @function -__eqsf2: - leaf_entry sp, 16 - bne a2, a3, 4f - - /* The values are equal but NaN != NaN. Check the exponent. */ - movi a6, 0x7f800000 - ball a2, a6, 3f - - /* Equal. */ - movi a2, 0 - leaf_return - - /* Not equal. */ -2: movi a2, 1 - leaf_return - - /* Check if the mantissas are nonzero. */ -3: slli a7, a2, 9 - j 5f - - /* Check if x and y are zero with different signs. */ -4: or a7, a2, a3 - slli a7, a7, 1 - - /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa - or x when exponent(x) = 0x7f8 and x == y. */ -5: movi a2, 0 - movi a3, 1 - movnez a2, a3, a7 - leaf_return - - - /* Greater Than */ - - .align 4 - .global __gtsf2 - .type __gtsf2, @function -__gtsf2: - leaf_entry sp, 16 - movi a6, 0x7f800000 - ball a2, a6, 2f -1: bnall a3, a6, .Lle_cmp - - /* Check if y is a NaN. */ - slli a7, a3, 9 - beqz a7, .Lle_cmp - movi a2, 0 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, a2, 9 - beqz a7, 1b - movi a2, 0 - leaf_return - - - /* Less Than or Equal */ - - .align 4 - .global __lesf2 - .type __lesf2, @function -__lesf2: - leaf_entry sp, 16 - movi a6, 0x7f800000 - ball a2, a6, 2f -1: bnall a3, a6, .Lle_cmp - - /* Check if y is a NaN. */ - slli a7, a3, 9 - beqz a7, .Lle_cmp - movi a2, 1 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, a2, 9 - beqz a7, 1b - movi a2, 1 - leaf_return - -.Lle_cmp: - /* Check if x and y have different signs. */ - xor a7, a2, a3 - bltz a7, .Lle_diff_signs - - /* Check if x is negative. */ - bltz a2, .Lle_xneg - - /* Check if x <= y. */ - bltu a3, a2, 5f -4: movi a2, 0 - leaf_return - -.Lle_xneg: - /* Check if y <= x. */ - bgeu a2, a3, 4b -5: movi a2, 1 - leaf_return - -.Lle_diff_signs: - bltz a2, 4b - - /* Check if both x and y are zero. */ - or a7, a2, a3 - slli a7, a7, 1 - movi a2, 1 - movi a3, 0 - moveqz a2, a3, a7 - leaf_return - - - /* Greater Than or Equal */ - - .align 4 - .global __gesf2 - .type __gesf2, @function -__gesf2: - leaf_entry sp, 16 - movi a6, 0x7f800000 - ball a2, a6, 2f -1: bnall a3, a6, .Llt_cmp - - /* Check if y is a NaN. */ - slli a7, a3, 9 - beqz a7, .Llt_cmp - movi a2, -1 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, a2, 9 - beqz a7, 1b - movi a2, -1 - leaf_return - - - /* Less Than */ - - .align 4 - .global __ltsf2 - .type __ltsf2, @function -__ltsf2: - leaf_entry sp, 16 - movi a6, 0x7f800000 - ball a2, a6, 2f -1: bnall a3, a6, .Llt_cmp - - /* Check if y is a NaN. */ - slli a7, a3, 9 - beqz a7, .Llt_cmp - movi a2, 0 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, a2, 9 - beqz a7, 1b - movi a2, 0 - leaf_return - -.Llt_cmp: - /* Check if x and y have different signs. */ - xor a7, a2, a3 - bltz a7, .Llt_diff_signs - - /* Check if x is negative. */ - bltz a2, .Llt_xneg - - /* Check if x < y. */ - bgeu a2, a3, 5f -4: movi a2, -1 - leaf_return - -.Llt_xneg: - /* Check if y < x. */ - bltu a3, a2, 4b -5: movi a2, 0 - leaf_return - -.Llt_diff_signs: - bgez a2, 5b - - /* Check if both x and y are nonzero. */ - or a7, a2, a3 - slli a7, a7, 1 - movi a2, 0 - movi a3, -1 - movnez a2, a3, a7 - leaf_return - - - /* Unordered */ - - .align 4 - .global __unordsf2 - .type __unordsf2, @function -__unordsf2: - leaf_entry sp, 16 - movi a6, 0x7f800000 - ball a2, a6, 3f -1: ball a3, a6, 4f -2: movi a2, 0 - leaf_return - -3: slli a7, a2, 9 - beqz a7, 1b - movi a2, 1 - leaf_return - -4: slli a7, a3, 9 - beqz a7, 2b - movi a2, 1 - leaf_return - -#endif /* L_cmpsf2 */ - -#ifdef L_fixsfsi - - .align 4 - .global __fixsfsi - .type __fixsfsi, @function -__fixsfsi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7f800000 - ball a2, a6, .Lfixsfsi_nan_or_inf - - /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */ - extui a4, a2, 23, 8 - addi a4, a4, -0x7e - bgei a4, 32, .Lfixsfsi_maxint - blti a4, 1, .Lfixsfsi_zero - - /* Add explicit "1.0" and shift << 8. */ - or a7, a2, a6 - slli a5, a7, 8 - - /* Shift back to the right, based on the exponent. */ - ssl a4 /* shift by 32 - a4 */ - srl a5, a5 - - /* Negate the result if sign != 0. */ - neg a2, a5 - movgez a2, a5, a7 - leaf_return - -.Lfixsfsi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, a2, 9 - beqz a4, .Lfixsfsi_maxint - - /* Translate NaN to +maxint. */ - movi a2, 0 - -.Lfixsfsi_maxint: - slli a4, a6, 8 /* 0x80000000 */ - addi a5, a4, -1 /* 0x7fffffff */ - movgez a4, a5, a2 - mov a2, a4 - leaf_return - -.Lfixsfsi_zero: - movi a2, 0 - leaf_return - -#endif /* L_fixsfsi */ - -#ifdef L_fixsfdi - - .align 4 - .global __fixsfdi - .type __fixsfdi, @function -__fixsfdi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7f800000 - ball a2, a6, .Lfixsfdi_nan_or_inf - - /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */ - extui a4, a2, 23, 8 - addi a4, a4, -0x7e - bgei a4, 64, .Lfixsfdi_maxint - blti a4, 1, .Lfixsfdi_zero - - /* Add explicit "1.0" and shift << 8. */ - or a7, a2, a6 - slli xh, a7, 8 - - /* Shift back to the right, based on the exponent. */ - ssl a4 /* shift by 64 - a4 */ - bgei a4, 32, .Lfixsfdi_smallshift - srl xl, xh - movi xh, 0 - -.Lfixsfdi_shifted: - /* Negate the result if sign != 0. */ - bgez a7, 1f - neg xl, xl - neg xh, xh - beqz xl, 1f - addi xh, xh, -1 -1: leaf_return - -.Lfixsfdi_smallshift: - movi xl, 0 - sll xl, xh - srl xh, xh - j .Lfixsfdi_shifted - -.Lfixsfdi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, a2, 9 - beqz a4, .Lfixsfdi_maxint - - /* Translate NaN to +maxint. */ - movi a2, 0 - -.Lfixsfdi_maxint: - slli a7, a6, 8 /* 0x80000000 */ - bgez a2, 1f - mov xh, a7 - movi xl, 0 - leaf_return - -1: addi xh, a7, -1 /* 0x7fffffff */ - movi xl, -1 - leaf_return - -.Lfixsfdi_zero: - movi xh, 0 - movi xl, 0 - leaf_return - -#endif /* L_fixsfdi */ - -#ifdef L_fixunssfsi - - .align 4 - .global __fixunssfsi - .type __fixunssfsi, @function -__fixunssfsi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7f800000 - ball a2, a6, .Lfixunssfsi_nan_or_inf - - /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */ - extui a4, a2, 23, 8 - addi a4, a4, -0x7f - bgei a4, 32, .Lfixunssfsi_maxint - bltz a4, .Lfixunssfsi_zero - - /* Add explicit "1.0" and shift << 8. */ - or a7, a2, a6 - slli a5, a7, 8 - - /* Shift back to the right, based on the exponent. */ - addi a4, a4, 1 - beqi a4, 32, .Lfixunssfsi_bigexp - ssl a4 /* shift by 32 - a4 */ - srl a5, a5 - - /* Negate the result if sign != 0. */ - neg a2, a5 - movgez a2, a5, a7 - leaf_return - -.Lfixunssfsi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, a2, 9 - beqz a4, .Lfixunssfsi_maxint - - /* Translate NaN to 0xffffffff. */ - movi a2, -1 - leaf_return - -.Lfixunssfsi_maxint: - slli a4, a6, 8 /* 0x80000000 */ - movi a5, -1 /* 0xffffffff */ - movgez a4, a5, a2 - mov a2, a4 - leaf_return - -.Lfixunssfsi_zero: - movi a2, 0 - leaf_return - -.Lfixunssfsi_bigexp: - /* Handle unsigned maximum exponent case. */ - bltz a2, 1f - mov a2, a5 /* no shift needed */ - leaf_return - - /* Return 0x80000000 if negative. */ -1: slli a2, a6, 8 - leaf_return - -#endif /* L_fixunssfsi */ - -#ifdef L_fixunssfdi - - .align 4 - .global __fixunssfdi - .type __fixunssfdi, @function -__fixunssfdi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7f800000 - ball a2, a6, .Lfixunssfdi_nan_or_inf - - /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */ - extui a4, a2, 23, 8 - addi a4, a4, -0x7f - bgei a4, 64, .Lfixunssfdi_maxint - bltz a4, .Lfixunssfdi_zero - - /* Add explicit "1.0" and shift << 8. */ - or a7, a2, a6 - slli xh, a7, 8 - - /* Shift back to the right, based on the exponent. */ - addi a4, a4, 1 - beqi a4, 64, .Lfixunssfdi_bigexp - ssl a4 /* shift by 64 - a4 */ - bgei a4, 32, .Lfixunssfdi_smallshift - srl xl, xh - movi xh, 0 - -.Lfixunssfdi_shifted: - /* Negate the result if sign != 0. */ - bgez a7, 1f - neg xl, xl - neg xh, xh - beqz xl, 1f - addi xh, xh, -1 -1: leaf_return - -.Lfixunssfdi_smallshift: - movi xl, 0 - src xl, xh, xl - srl xh, xh - j .Lfixunssfdi_shifted - -.Lfixunssfdi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, a2, 9 - beqz a4, .Lfixunssfdi_maxint - - /* Translate NaN to 0xffffffff.... */ -1: movi xh, -1 - movi xl, -1 - leaf_return - -.Lfixunssfdi_maxint: - bgez a2, 1b -2: slli xh, a6, 8 /* 0x80000000 */ - movi xl, 0 - leaf_return - -.Lfixunssfdi_zero: - movi xh, 0 - movi xl, 0 - leaf_return - -.Lfixunssfdi_bigexp: - /* Handle unsigned maximum exponent case. */ - bltz a7, 2b - movi xl, 0 - leaf_return /* no shift needed */ - -#endif /* L_fixunssfdi */ - -#ifdef L_floatsisf - - .align 4 - .global __floatunsisf - .type __floatunsisf, @function -__floatunsisf: - leaf_entry sp, 16 - beqz a2, .Lfloatsisf_return - - /* Set the sign to zero and jump to the floatsisf code. */ - movi a7, 0 - j .Lfloatsisf_normalize - - .align 4 - .global __floatsisf - .type __floatsisf, @function -__floatsisf: - leaf_entry sp, 16 - - /* Check for zero. */ - beqz a2, .Lfloatsisf_return - - /* Save the sign. */ - extui a7, a2, 31, 1 - - /* Get the absolute value. */ -#if XCHAL_HAVE_ABS - abs a2, a2 -#else - neg a4, a2 - movltz a2, a4, a2 -#endif - -.Lfloatsisf_normalize: - /* Normalize with the first 1 bit in the msb. */ - do_nsau a4, a2, a5, a6 - ssl a4 - sll a5, a2 - - /* Shift the mantissa into position, with rounding bits in a6. */ - srli a2, a5, 8 - slli a6, a5, (32 - 8) - - /* Set the exponent. */ - movi a5, 0x9d /* 0x7e + 31 */ - sub a5, a5, a4 - slli a5, a5, 23 - add a2, a2, a5 - - /* Add the sign. */ - slli a7, a7, 31 - or a2, a2, a7 - - /* Round up if the leftover fraction is >= 1/2. */ - bgez a6, .Lfloatsisf_return - addi a2, a2, 1 /* Overflow to the exponent is OK. */ - - /* Check if the leftover fraction is exactly 1/2. */ - slli a6, a6, 1 - beqz a6, .Lfloatsisf_exactlyhalf - -.Lfloatsisf_return: - leaf_return - -.Lfloatsisf_exactlyhalf: - /* Round down to the nearest even value. */ - srli a2, a2, 1 - slli a2, a2, 1 - leaf_return - -#endif /* L_floatsisf */ - -#ifdef L_floatdisf - - .align 4 - .global __floatundisf - .type __floatundisf, @function -__floatundisf: - leaf_entry sp, 16 - - /* Check for zero. */ - or a4, xh, xl - beqz a4, 2f - - /* Set the sign to zero and jump to the floatdisf code. */ - movi a7, 0 - j .Lfloatdisf_normalize - - .align 4 - .global __floatdisf - .type __floatdisf, @function -__floatdisf: - leaf_entry sp, 16 - - /* Check for zero. */ - or a4, xh, xl - beqz a4, 2f - - /* Save the sign. */ - extui a7, xh, 31, 1 - - /* Get the absolute value. */ - bgez xh, .Lfloatdisf_normalize - neg xl, xl - neg xh, xh - beqz xl, .Lfloatdisf_normalize - addi xh, xh, -1 - -.Lfloatdisf_normalize: - /* Normalize with the first 1 bit in the msb of xh. */ - beqz xh, .Lfloatdisf_bigshift - do_nsau a4, xh, a5, a6 - ssl a4 - src xh, xh, xl - sll xl, xl - -.Lfloatdisf_shifted: - /* Shift the mantissa into position, with rounding bits in a6. */ - ssai 8 - sll a5, xl - src a6, xh, xl - srl xh, xh - beqz a5, 1f - movi a5, 1 - or a6, a6, a5 -1: - /* Set the exponent. */ - movi a5, 0xbd /* 0x7e + 63 */ - sub a5, a5, a4 - slli a5, a5, 23 - add a2, xh, a5 - - /* Add the sign. */ - slli a7, a7, 31 - or a2, a2, a7 - - /* Round up if the leftover fraction is >= 1/2. */ - bgez a6, 2f - addi a2, a2, 1 /* Overflow to the exponent is OK. */ - - /* Check if the leftover fraction is exactly 1/2. */ - slli a6, a6, 1 - beqz a6, .Lfloatdisf_exactlyhalf -2: leaf_return - -.Lfloatdisf_bigshift: - /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ - do_nsau a4, xl, a5, a6 - ssl a4 - sll xh, xl - movi xl, 0 - addi a4, a4, 32 - j .Lfloatdisf_shifted - -.Lfloatdisf_exactlyhalf: - /* Round down to the nearest even value. */ - srli a2, a2, 1 - slli a2, a2, 1 - leaf_return - -#endif /* L_floatdisf */ diff --git a/gcc/config/xtensa/lib1funcs.asm b/gcc/config/xtensa/lib1funcs.asm deleted file mode 100644 index 071b9171177..00000000000 --- a/gcc/config/xtensa/lib1funcs.asm +++ /dev/null @@ -1,845 +0,0 @@ -/* Assembly functions for the Xtensa version of libgcc1. - Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009 - Free Software Foundation, Inc. - Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 3, or (at your option) any later -version. - -GCC is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ - -#include "xtensa-config.h" - -/* Define macros for the ABS and ADDX* instructions to handle cases - where they are not included in the Xtensa processor configuration. */ - - .macro do_abs dst, src, tmp -#if XCHAL_HAVE_ABS - abs \dst, \src -#else - neg \tmp, \src - movgez \tmp, \src, \src - mov \dst, \tmp -#endif - .endm - - .macro do_addx2 dst, as, at, tmp -#if XCHAL_HAVE_ADDX - addx2 \dst, \as, \at -#else - slli \tmp, \as, 1 - add \dst, \tmp, \at -#endif - .endm - - .macro do_addx4 dst, as, at, tmp -#if XCHAL_HAVE_ADDX - addx4 \dst, \as, \at -#else - slli \tmp, \as, 2 - add \dst, \tmp, \at -#endif - .endm - - .macro do_addx8 dst, as, at, tmp -#if XCHAL_HAVE_ADDX - addx8 \dst, \as, \at -#else - slli \tmp, \as, 3 - add \dst, \tmp, \at -#endif - .endm - -/* Define macros for leaf function entry and return, supporting either the - standard register windowed ABI or the non-windowed call0 ABI. These - macros do not allocate any extra stack space, so they only work for - leaf functions that do not need to spill anything to the stack. */ - - .macro leaf_entry reg, size -#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ - entry \reg, \size -#else - /* do nothing */ -#endif - .endm - - .macro leaf_return -#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ - retw -#else - ret -#endif - .endm - - -#ifdef L_mulsi3 - .align 4 - .global __mulsi3 - .type __mulsi3, @function -__mulsi3: - leaf_entry sp, 16 - -#if XCHAL_HAVE_MUL32 - mull a2, a2, a3 - -#elif XCHAL_HAVE_MUL16 - or a4, a2, a3 - srai a4, a4, 16 - bnez a4, .LMUL16 - mul16u a2, a2, a3 - leaf_return -.LMUL16: - srai a4, a2, 16 - srai a5, a3, 16 - mul16u a7, a4, a3 - mul16u a6, a5, a2 - mul16u a4, a2, a3 - add a7, a7, a6 - slli a7, a7, 16 - add a2, a7, a4 - -#elif XCHAL_HAVE_MAC16 - mul.aa.hl a2, a3 - mula.aa.lh a2, a3 - rsr a5, ACCLO - umul.aa.ll a2, a3 - rsr a4, ACCLO - slli a5, a5, 16 - add a2, a4, a5 - -#else /* !MUL32 && !MUL16 && !MAC16 */ - - /* Multiply one bit at a time, but unroll the loop 4x to better - exploit the addx instructions and avoid overhead. - Peel the first iteration to save a cycle on init. */ - - /* Avoid negative numbers. */ - xor a5, a2, a3 /* Top bit is 1 if one input is negative. */ - do_abs a3, a3, a6 - do_abs a2, a2, a6 - - /* Swap so the second argument is smaller. */ - sub a7, a2, a3 - mov a4, a3 - movgez a4, a2, a7 /* a4 = max (a2, a3) */ - movltz a3, a2, a7 /* a3 = min (a2, a3) */ - - movi a2, 0 - extui a6, a3, 0, 1 - movnez a2, a4, a6 - - do_addx2 a7, a4, a2, a7 - extui a6, a3, 1, 1 - movnez a2, a7, a6 - - do_addx4 a7, a4, a2, a7 - extui a6, a3, 2, 1 - movnez a2, a7, a6 - - do_addx8 a7, a4, a2, a7 - extui a6, a3, 3, 1 - movnez a2, a7, a6 - - bgeui a3, 16, .Lmult_main_loop - neg a3, a2 - movltz a2, a3, a5 - leaf_return - - .align 4 -.Lmult_main_loop: - srli a3, a3, 4 - slli a4, a4, 4 - - add a7, a4, a2 - extui a6, a3, 0, 1 - movnez a2, a7, a6 - - do_addx2 a7, a4, a2, a7 - extui a6, a3, 1, 1 - movnez a2, a7, a6 - - do_addx4 a7, a4, a2, a7 - extui a6, a3, 2, 1 - movnez a2, a7, a6 - - do_addx8 a7, a4, a2, a7 - extui a6, a3, 3, 1 - movnez a2, a7, a6 - - bgeui a3, 16, .Lmult_main_loop - - neg a3, a2 - movltz a2, a3, a5 - -#endif /* !MUL32 && !MUL16 && !MAC16 */ - - leaf_return - .size __mulsi3, . - __mulsi3 - -#endif /* L_mulsi3 */ - - -#ifdef L_umulsidi3 - -#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 -#define XCHAL_NO_MUL 1 -#endif - - .align 4 - .global __umulsidi3 - .type __umulsidi3, @function -__umulsidi3: -#if __XTENSA_CALL0_ABI__ - leaf_entry sp, 32 - addi sp, sp, -32 - s32i a12, sp, 16 - s32i a13, sp, 20 - s32i a14, sp, 24 - s32i a15, sp, 28 -#elif XCHAL_NO_MUL - /* This is not really a leaf function; allocate enough stack space - to allow CALL12s to a helper function. */ - leaf_entry sp, 48 -#else - leaf_entry sp, 16 -#endif - -#ifdef __XTENSA_EB__ -#define wh a2 -#define wl a3 -#else -#define wh a3 -#define wl a2 -#endif /* __XTENSA_EB__ */ - - /* This code is taken from the mulsf3 routine in ieee754-sf.S. - See more comments there. */ - -#if XCHAL_HAVE_MUL32_HIGH - mull a6, a2, a3 - muluh wh, a2, a3 - mov wl, a6 - -#else /* ! MUL32_HIGH */ - -#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL - /* a0 and a8 will be clobbered by calling the multiply function - but a8 is not used here and need not be saved. */ - s32i a0, sp, 0 -#endif - -#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 - -#define a2h a4 -#define a3h a5 - - /* Get the high halves of the inputs into registers. */ - srli a2h, a2, 16 - srli a3h, a3, 16 - -#define a2l a2 -#define a3l a3 - -#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 - /* Clear the high halves of the inputs. This does not matter - for MUL16 because the high bits are ignored. */ - extui a2, a2, 0, 16 - extui a3, a3, 0, 16 -#endif -#endif /* MUL16 || MUL32 */ - - -#if XCHAL_HAVE_MUL16 - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - mul16u dst, xreg ## xhalf, yreg ## yhalf - -#elif XCHAL_HAVE_MUL32 - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - mull dst, xreg ## xhalf, yreg ## yhalf - -#elif XCHAL_HAVE_MAC16 - -/* The preprocessor insists on inserting a space when concatenating after - a period in the definition of do_mul below. These macros are a workaround - using underscores instead of periods when doing the concatenation. */ -#define umul_aa_ll umul.aa.ll -#define umul_aa_lh umul.aa.lh -#define umul_aa_hl umul.aa.hl -#define umul_aa_hh umul.aa.hh - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - umul_aa_ ## xhalf ## yhalf xreg, yreg; \ - rsr dst, ACCLO - -#else /* no multiply hardware */ - -#define set_arg_l(dst, src) \ - extui dst, src, 0, 16 -#define set_arg_h(dst, src) \ - srli dst, src, 16 - -#if __XTENSA_CALL0_ABI__ -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - set_arg_ ## xhalf (a13, xreg); \ - set_arg_ ## yhalf (a14, yreg); \ - call0 .Lmul_mulsi3; \ - mov dst, a12 -#else -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - set_arg_ ## xhalf (a14, xreg); \ - set_arg_ ## yhalf (a15, yreg); \ - call12 .Lmul_mulsi3; \ - mov dst, a14 -#endif /* __XTENSA_CALL0_ABI__ */ - -#endif /* no multiply hardware */ - - /* Add pp1 and pp2 into a6 with carry-out in a9. */ - do_mul(a6, a2, l, a3, h) /* pp 1 */ - do_mul(a11, a2, h, a3, l) /* pp 2 */ - movi a9, 0 - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - /* Shift the high half of a9/a6 into position in a9. Note that - this value can be safely incremented without any carry-outs. */ - ssai 16 - src a9, a9, a6 - - /* Compute the low word into a6. */ - do_mul(a11, a2, l, a3, l) /* pp 0 */ - sll a6, a6 - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - /* Compute the high word into wh. */ - do_mul(wh, a2, h, a3, h) /* pp 3 */ - add wh, wh, a9 - mov wl, a6 - -#endif /* !MUL32_HIGH */ - -#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL - /* Restore the original return address. */ - l32i a0, sp, 0 -#endif -#if __XTENSA_CALL0_ABI__ - l32i a12, sp, 16 - l32i a13, sp, 20 - l32i a14, sp, 24 - l32i a15, sp, 28 - addi sp, sp, 32 -#endif - leaf_return - -#if XCHAL_NO_MUL - - /* For Xtensa processors with no multiply hardware, this simplified - version of _mulsi3 is used for multiplying 16-bit chunks of - the floating-point mantissas. When using CALL0, this function - uses a custom ABI: the inputs are passed in a13 and a14, the - result is returned in a12, and a8 and a15 are clobbered. */ - .align 4 -.Lmul_mulsi3: - leaf_entry sp, 16 - .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 - movi \dst, 0 -1: add \tmp1, \src2, \dst - extui \tmp2, \src1, 0, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx2 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 1, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx4 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 2, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx8 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 3, 1 - movnez \dst, \tmp1, \tmp2 - - srli \src1, \src1, 4 - slli \src2, \src2, 4 - bnez \src1, 1b - .endm -#if __XTENSA_CALL0_ABI__ - mul_mulsi3_body a12, a13, a14, a15, a8 -#else - /* The result will be written into a2, so save that argument in a4. */ - mov a4, a2 - mul_mulsi3_body a2, a4, a3, a5, a6 -#endif - leaf_return -#endif /* XCHAL_NO_MUL */ - - .size __umulsidi3, . - __umulsidi3 - -#endif /* L_umulsidi3 */ - - -/* Define a macro for the NSAU (unsigned normalize shift amount) - instruction, which computes the number of leading zero bits, - to handle cases where it is not included in the Xtensa processor - configuration. */ - - .macro do_nsau cnt, val, tmp, a -#if XCHAL_HAVE_NSA - nsau \cnt, \val -#else - mov \a, \val - movi \cnt, 0 - extui \tmp, \a, 16, 16 - bnez \tmp, 0f - movi \cnt, 16 - slli \a, \a, 16 -0: - extui \tmp, \a, 24, 8 - bnez \tmp, 1f - addi \cnt, \cnt, 8 - slli \a, \a, 8 -1: - movi \tmp, __nsau_data - extui \a, \a, 24, 8 - add \tmp, \tmp, \a - l8ui \tmp, \tmp, 0 - add \cnt, \cnt, \tmp -#endif /* !XCHAL_HAVE_NSA */ - .endm - -#ifdef L_clz - .section .rodata - .align 4 - .global __nsau_data - .type __nsau_data, @object -__nsau_data: -#if !XCHAL_HAVE_NSA - .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 - .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 - .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 - .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 - .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 - .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 - .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 - .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -#endif /* !XCHAL_HAVE_NSA */ - .size __nsau_data, . - __nsau_data - .hidden __nsau_data -#endif /* L_clz */ - - -#ifdef L_clzsi2 - .align 4 - .global __clzsi2 - .type __clzsi2, @function -__clzsi2: - leaf_entry sp, 16 - do_nsau a2, a2, a3, a4 - leaf_return - .size __clzsi2, . - __clzsi2 - -#endif /* L_clzsi2 */ - - -#ifdef L_ctzsi2 - .align 4 - .global __ctzsi2 - .type __ctzsi2, @function -__ctzsi2: - leaf_entry sp, 16 - neg a3, a2 - and a3, a3, a2 - do_nsau a2, a3, a4, a5 - neg a2, a2 - addi a2, a2, 31 - leaf_return - .size __ctzsi2, . - __ctzsi2 - -#endif /* L_ctzsi2 */ - - -#ifdef L_ffssi2 - .align 4 - .global __ffssi2 - .type __ffssi2, @function -__ffssi2: - leaf_entry sp, 16 - neg a3, a2 - and a3, a3, a2 - do_nsau a2, a3, a4, a5 - neg a2, a2 - addi a2, a2, 32 - leaf_return - .size __ffssi2, . - __ffssi2 - -#endif /* L_ffssi2 */ - - -#ifdef L_udivsi3 - .align 4 - .global __udivsi3 - .type __udivsi3, @function -__udivsi3: - leaf_entry sp, 16 -#if XCHAL_HAVE_DIV32 - quou a2, a2, a3 -#else - bltui a3, 2, .Lle_one /* check if the divisor <= 1 */ - - mov a6, a2 /* keep dividend in a6 */ - do_nsau a5, a6, a2, a7 /* dividend_shift = nsau (dividend) */ - do_nsau a4, a3, a2, a7 /* divisor_shift = nsau (divisor) */ - bgeu a5, a4, .Lspecial - - sub a4, a4, a5 /* count = divisor_shift - dividend_shift */ - ssl a4 - sll a3, a3 /* divisor <<= count */ - movi a2, 0 /* quotient = 0 */ - - /* test-subtract-and-shift loop; one quotient bit on each iteration */ -#if XCHAL_HAVE_LOOPS - loopnez a4, .Lloopend -#endif /* XCHAL_HAVE_LOOPS */ -.Lloop: - bltu a6, a3, .Lzerobit - sub a6, a6, a3 - addi a2, a2, 1 -.Lzerobit: - slli a2, a2, 1 - srli a3, a3, 1 -#if !XCHAL_HAVE_LOOPS - addi a4, a4, -1 - bnez a4, .Lloop -#endif /* !XCHAL_HAVE_LOOPS */ -.Lloopend: - - bltu a6, a3, .Lreturn - addi a2, a2, 1 /* increment quotient if dividend >= divisor */ -.Lreturn: - leaf_return - -.Lle_one: - beqz a3, .Lerror /* if divisor == 1, return the dividend */ - leaf_return - -.Lspecial: - /* return dividend >= divisor */ - bltu a6, a3, .Lreturn0 - movi a2, 1 - leaf_return - -.Lerror: - /* Divide by zero: Use an illegal instruction to force an exception. - The subsequent "DIV0" string can be recognized by the exception - handler to identify the real cause of the exception. */ - ill - .ascii "DIV0" - -.Lreturn0: - movi a2, 0 -#endif /* XCHAL_HAVE_DIV32 */ - leaf_return - .size __udivsi3, . - __udivsi3 - -#endif /* L_udivsi3 */ - - -#ifdef L_divsi3 - .align 4 - .global __divsi3 - .type __divsi3, @function -__divsi3: - leaf_entry sp, 16 -#if XCHAL_HAVE_DIV32 - quos a2, a2, a3 -#else - xor a7, a2, a3 /* sign = dividend ^ divisor */ - do_abs a6, a2, a4 /* udividend = abs (dividend) */ - do_abs a3, a3, a4 /* udivisor = abs (divisor) */ - bltui a3, 2, .Lle_one /* check if udivisor <= 1 */ - do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */ - do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */ - bgeu a5, a4, .Lspecial - - sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */ - ssl a4 - sll a3, a3 /* udivisor <<= count */ - movi a2, 0 /* quotient = 0 */ - - /* test-subtract-and-shift loop; one quotient bit on each iteration */ -#if XCHAL_HAVE_LOOPS - loopnez a4, .Lloopend -#endif /* XCHAL_HAVE_LOOPS */ -.Lloop: - bltu a6, a3, .Lzerobit - sub a6, a6, a3 - addi a2, a2, 1 -.Lzerobit: - slli a2, a2, 1 - srli a3, a3, 1 -#if !XCHAL_HAVE_LOOPS - addi a4, a4, -1 - bnez a4, .Lloop -#endif /* !XCHAL_HAVE_LOOPS */ -.Lloopend: - - bltu a6, a3, .Lreturn - addi a2, a2, 1 /* increment if udividend >= udivisor */ -.Lreturn: - neg a5, a2 - movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */ - leaf_return - -.Lle_one: - beqz a3, .Lerror - neg a2, a6 /* if udivisor == 1, then return... */ - movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */ - leaf_return - -.Lspecial: - bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */ - movi a2, 1 - movi a4, -1 - movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */ - leaf_return - -.Lerror: - /* Divide by zero: Use an illegal instruction to force an exception. - The subsequent "DIV0" string can be recognized by the exception - handler to identify the real cause of the exception. */ - ill - .ascii "DIV0" - -.Lreturn0: - movi a2, 0 -#endif /* XCHAL_HAVE_DIV32 */ - leaf_return - .size __divsi3, . - __divsi3 - -#endif /* L_divsi3 */ - - -#ifdef L_umodsi3 - .align 4 - .global __umodsi3 - .type __umodsi3, @function -__umodsi3: - leaf_entry sp, 16 -#if XCHAL_HAVE_DIV32 - remu a2, a2, a3 -#else - bltui a3, 2, .Lle_one /* check if the divisor is <= 1 */ - - do_nsau a5, a2, a6, a7 /* dividend_shift = nsau (dividend) */ - do_nsau a4, a3, a6, a7 /* divisor_shift = nsau (divisor) */ - bgeu a5, a4, .Lspecial - - sub a4, a4, a5 /* count = divisor_shift - dividend_shift */ - ssl a4 - sll a3, a3 /* divisor <<= count */ - - /* test-subtract-and-shift loop */ -#if XCHAL_HAVE_LOOPS - loopnez a4, .Lloopend -#endif /* XCHAL_HAVE_LOOPS */ -.Lloop: - bltu a2, a3, .Lzerobit - sub a2, a2, a3 -.Lzerobit: - srli a3, a3, 1 -#if !XCHAL_HAVE_LOOPS - addi a4, a4, -1 - bnez a4, .Lloop -#endif /* !XCHAL_HAVE_LOOPS */ -.Lloopend: - -.Lspecial: - bltu a2, a3, .Lreturn - sub a2, a2, a3 /* subtract once more if dividend >= divisor */ -.Lreturn: - leaf_return - -.Lle_one: - bnez a3, .Lreturn0 - - /* Divide by zero: Use an illegal instruction to force an exception. - The subsequent "DIV0" string can be recognized by the exception - handler to identify the real cause of the exception. */ - ill - .ascii "DIV0" - -.Lreturn0: - movi a2, 0 -#endif /* XCHAL_HAVE_DIV32 */ - leaf_return - .size __umodsi3, . - __umodsi3 - -#endif /* L_umodsi3 */ - - -#ifdef L_modsi3 - .align 4 - .global __modsi3 - .type __modsi3, @function -__modsi3: - leaf_entry sp, 16 -#if XCHAL_HAVE_DIV32 - rems a2, a2, a3 -#else - mov a7, a2 /* save original (signed) dividend */ - do_abs a2, a2, a4 /* udividend = abs (dividend) */ - do_abs a3, a3, a4 /* udivisor = abs (divisor) */ - bltui a3, 2, .Lle_one /* check if udivisor <= 1 */ - do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */ - do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */ - bgeu a5, a4, .Lspecial - - sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */ - ssl a4 - sll a3, a3 /* udivisor <<= count */ - - /* test-subtract-and-shift loop */ -#if XCHAL_HAVE_LOOPS - loopnez a4, .Lloopend -#endif /* XCHAL_HAVE_LOOPS */ -.Lloop: - bltu a2, a3, .Lzerobit - sub a2, a2, a3 -.Lzerobit: - srli a3, a3, 1 -#if !XCHAL_HAVE_LOOPS - addi a4, a4, -1 - bnez a4, .Lloop -#endif /* !XCHAL_HAVE_LOOPS */ -.Lloopend: - -.Lspecial: - bltu a2, a3, .Lreturn - sub a2, a2, a3 /* subtract again if udividend >= udivisor */ -.Lreturn: - bgez a7, .Lpositive - neg a2, a2 /* if (dividend < 0), return -udividend */ -.Lpositive: - leaf_return - -.Lle_one: - bnez a3, .Lreturn0 - - /* Divide by zero: Use an illegal instruction to force an exception. - The subsequent "DIV0" string can be recognized by the exception - handler to identify the real cause of the exception. */ - ill - .ascii "DIV0" - -.Lreturn0: - movi a2, 0 -#endif /* XCHAL_HAVE_DIV32 */ - leaf_return - .size __modsi3, . - __modsi3 - -#endif /* L_modsi3 */ - - -#ifdef __XTENSA_EB__ -#define uh a2 -#define ul a3 -#else -#define uh a3 -#define ul a2 -#endif /* __XTENSA_EB__ */ - - -#ifdef L_ashldi3 - .align 4 - .global __ashldi3 - .type __ashldi3, @function -__ashldi3: - leaf_entry sp, 16 - ssl a4 - bgei a4, 32, .Llow_only - src uh, uh, ul - sll ul, ul - leaf_return - -.Llow_only: - sll uh, ul - movi ul, 0 - leaf_return - .size __ashldi3, . - __ashldi3 - -#endif /* L_ashldi3 */ - - -#ifdef L_ashrdi3 - .align 4 - .global __ashrdi3 - .type __ashrdi3, @function -__ashrdi3: - leaf_entry sp, 16 - ssr a4 - bgei a4, 32, .Lhigh_only - src ul, uh, ul - sra uh, uh - leaf_return - -.Lhigh_only: - sra ul, uh - srai uh, uh, 31 - leaf_return - .size __ashrdi3, . - __ashrdi3 - -#endif /* L_ashrdi3 */ - - -#ifdef L_lshrdi3 - .align 4 - .global __lshrdi3 - .type __lshrdi3, @function -__lshrdi3: - leaf_entry sp, 16 - ssr a4 - bgei a4, 32, .Lhigh_only1 - src ul, uh, ul - srl uh, uh - leaf_return - -.Lhigh_only1: - srl ul, uh - movi uh, 0 - leaf_return - .size __lshrdi3, . - __lshrdi3 - -#endif /* L_lshrdi3 */ - - -#include "ieee754-df.S" -#include "ieee754-sf.S" diff --git a/gcc/config/xtensa/t-xtensa b/gcc/config/xtensa/t-xtensa index c0a7cb5202f..31ac2ad2452 100644 --- a/gcc/config/xtensa/t-xtensa +++ b/gcc/config/xtensa/t-xtensa @@ -17,18 +17,6 @@ # along with GCC; see the file COPYING3. If not see # . -LIB1ASMSRC = xtensa/lib1funcs.asm -LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \ - _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \ - _ashldi3 _ashrdi3 _lshrdi3 \ - _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \ - _fixunssfsi _fixunssfdi _floatsisf _floatunsisf \ - _floatdisf _floatundisf \ - _negdf2 _addsubdf3 _muldf3 _divdf3 _cmpdf2 _fixdfsi _fixdfdi \ - _fixunsdfsi _fixunsdfdi _floatsidf _floatunsidf \ - _floatdidf _floatundidf \ - _truncdfsf2 _extendsfdf2 - LIB2FUNCS_EXTRA = $(srcdir)/config/xtensa/lib2funcs.S $(out_object_file): gt-xtensa.h diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index b5d9c243a98..6b2514aba9a 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,123 @@ +2011-11-02 Rainer Orth + + * Makefile.in ($(lib1asmfuncs-o), $(lib1asmfuncs-s-o)): Use + $(srcdir) to refer to $(LIB1ASMSRC). + Use $<. + * config/arm/bpabi-v6m.S, config/arm/bpabi.S, + config/arm/ieee754-df.S, config/arm/ieee754-sf.S, + config/arm/lib1funcs.S: New files. + * config/arm/libunwind.S [!__symbian__]: Use lib1funcs.S. + * config/arm/t-arm: New file. + * config/arm/t-bpabi (LIB1ASMFUNCS): Set. + * config/arm/t-elf, config/arm/t-linux, config/arm/t-linux-eabi, + config/arm/t-strongarm-elf: New files. + * config/arm/t-symbian (LIB1ASMFUNCS): Set. + * config/arm/t-vxworks, config/arm/t-wince-pe: New files. + * config/avr/lib1funcs.S: New file. + * config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Set. + * config/bfin/lib1funcs.S, config/bfin/t-bfin: New files. + * config/c6x/lib1funcs.S: New file. + * config/c6x/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Set. + * config/fr30/lib1funcs.S, config/fr30/t-fr30: New files. + * config/frv/lib1funcs.S: New file. + * config/frv/t-frv (LIB1ASMSRC, LIB1ASMFUNCS): Set. + * config/h8300/lib1funcs.S, config/h8300/t-h8300: New files. + * config/i386/cygwin.S, config/i386/t-chkstk: New files. + * config/ia64/__divxf3.asm: Rename to ... + * config/ia64/__divxf3.S: ... this. + Adapt lib1funcs.asm filename. + * config/ia64/_fixtfdi.asm: Rename to ... + * config/ia64/_fixtfdi.S: ... this. + Adapt lib1funcs.asm filename. + * config/ia64/_fixunstfdi.asm: Rename to ... + * config/ia64/_fixunstfdi.S: ... this. + Adapt lib1funcs.asm filename. + * config/ia64/_floatditf.asm: Rename to ... + * config/ia64/_floatditf.S: ... this. + Adapt lib1funcs.asm filename. + * config/ia64/lib1funcs.S: New file. + * config/ia64/t-hpux (LIB1ASMFUNCS): Set. + * config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Set. + * config/ia64/t-softfp-compat (libgcc1-tf-compats): Adapt suffix. + * config/m32c/lib1funcs.S, config/m32c/t-m32c: New files. + * config/m68k/lb1sf68.S, config/m68k/t-floatlib: New files. + * config/mcore/lib1funcs.S, config/mcore/t-mcore: New files. + * config/mep/lib1funcs.S: New file. + * config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Set. + * config/mips/mips16.S: New file. + * config/mips/t-mips16 (LIB1ASMSRC, LIB1ASMFUNCS): Set. + * config/pa/milli64.S: New file. + * config/pa/t-linux, config/pa/t-linux64: New files. + * config/picochip/lib1funcs.S: New file. + * config/picochip/t-picochip (LIB1ASMSRC, LIB1ASMFUNCS): Set. + * config/sh/lib1funcs.S, config/sh/lib1funcs.h: New files. + * config/sh/t-linux (LIB1ASMFUNCS_CACHE): Set. + * config/sh/t-netbsd: New file. + * config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE): Set. + Use $(srcdir) to refer to lib1funcs.S, adapt filename. + * config/sh/t-sh64: New file. + * config/sparc/lb1spc.S: New file. + * config/sparc/t-softmul (LIB1ASMSRC): Adapt sparc/lb1spc.asm + filename. + * config/v850/lib1funcs.S, config/v850/t-v850: New files. + * config/vax/lib1funcs.S, config/vax/t-linux: New files. + * config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S, + config/xtensa/lib1funcs.S: New files. + * config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Set. + * config.host (arm-wrs-vxworks): Add arm/t-arm, arm/t-vxworks to + tmake_file. + (arm*-*-freebsd*): Add arm/t-arm, arm/t-strongarm-elf to tmake_file. + (arm*-*-netbsdelf*): Add arm/t-arm to tmake_file. + (arm*-*-linux*): Likewise. + Add arm/t-elf, arm/t-bpabi, arm/t-linux-eabi to tmake_file for + arm*-*-linux-*eabi, add arm/t-linux otherwise. + (arm*-*-uclinux*): Add arm/t-arm, arm/t-elf to tmake_file. + (arm*-*-ecos-elf): Likewise. + (arm*-*-eabi*, arm*-*-symbianelf*): Likewise. + (arm*-*-rtems*): Likewise. + (arm*-*-elf): Likewise. + (arm*-wince-pe*): Add arm/t-arm, arm/t-wince-pe to tmake_file. + (avr-*-rtems*): Add to tmake_file, add avr/t-avr. + (bfin*-elf*): Add bfin/t-bfin to tmake_file. + (bfin*-uclinux*): Likewise. + (bfin*-linux-uclibc*): Likewise. + (bfin*-rtems*): Likewise. + (bfin*-*): Likewise. + (fido-*-elf): Merge into m68k-*-elf*. + (fr30-*-elf)): Add fr30/t-fr30 to tmake_file. + (frv-*-*linux*): Add frv/t-frv to tmake_file. + (h8300-*-rtems*): Add h8300/t-h8300 to tmake_file. + (h8300-*-elf*): Likewise. + (hppa*64*-*-linux*): Add pa/t-linux, pa/t-linux64 to tmake_file. + (hppa*-*-linux*): Add pa/t-linux to tmake_file. + (i[34567]86-*-cygwin*): Add i386/t-chkstk to tmake_file. + (i[34567]86-*-mingw*): Likewise. + (x86_64-*-mingw*): Likewise. + (i[34567]86-*-interix3*): Likewise. + (ia64*-*-hpux*): Add ia64/t-ia64, ia64/t-hpux to tmake_file. + (ia64-hp-*vms*): Add ia64/t-ia64 to tmake_file. + (m68k-*-elf*): Also handle fido-*-elf. + Add m68k/t-floatlib to tmake_file. + (m68k-*-uclinux*): Add m68k/t-floatlib to tmake_file. + (m68k-*-linux*): Likewise. + (m68k-*-rtems*): Likewise. + (mcore-*-elf): Add mcore/t-mcore to tmake_file. + (sh-*-elf*, sh[12346l]*-*-elf*): Add sh/t-sh64 to tmake_file for + sh64*-*-*. + (sh-*-linux*, sh[2346lbe]*-*-linux*): Add sh/t-sh to tmake_file. + Add sh/t-sh64 to tmake_file for sh64*-*-linux*. + (sh-*-netbsdelf*, shl*-*-netbsdelf*, sh5-*-netbsd*) + (sh5l*-*-netbsd*, sh64-*-netbsd*, sh64l*-*-netbsd*): Add sh/t-sh, + sh/t-netbsd to tmake_file. + Add sh/t-sh64 to tmake_file for sh5*-*-netbsd*, sh64*-netbsd*. + (sh-*-rtems*): Add sh/t-sh to tmake_file. + (sh-wrs-vxworks): Likewise. + (sparc-*-linux*): Add sparc/t-softmul to tmake_file except for + *-leon[3-9]*. + (v850*-*-*): Add v850/t-v850 to tmake_file. + (vax-*-linux*): Add vax/t-linux to tmake_file. + (m32c-*-elf*, m32c-*-rtems*): Add m32c/t-m32c to tmake_file. + 2011-11-02 Rainer Orth * crtstuff.c: New file. diff --git a/libgcc/Makefile.in b/libgcc/Makefile.in index 467901b057a..6bbb369f8e8 100644 --- a/libgcc/Makefile.in +++ b/libgcc/Makefile.in @@ -394,25 +394,22 @@ LIB2_DIVMOD_FUNCS := $(filter-out $(LIB2FUNCS_EXCLUDE) $(LIB1ASMFUNCS), \ ifeq ($(enable_shared),yes) lib1asmfuncs-o = $(patsubst %,%$(objext),$(LIB1ASMFUNCS)) -$(lib1asmfuncs-o): %$(objext): $(gcc_srcdir)/config/$(LIB1ASMSRC) %.vis - $(gcc_compile) -DL$* -xassembler-with-cpp \ - -c $(gcc_srcdir)/config/$(LIB1ASMSRC) -include $*.vis +$(lib1asmfuncs-o): %$(objext): $(srcdir)/config/$(LIB1ASMSRC) %.vis + $(gcc_compile) -DL$* -xassembler-with-cpp -c $< -include $*.vis $(patsubst %,%.vis,$(LIB1ASMFUNCS)): %.vis: %_s$(objext) $(gen-hide-list) libgcc-objects += $(lib1asmfuncs-o) lib1asmfuncs-s-o = $(patsubst %,%_s$(objext),$(LIB1ASMFUNCS)) -$(lib1asmfuncs-s-o): %_s$(objext): $(gcc_srcdir)/config/$(LIB1ASMSRC) - $(gcc_s_compile) -DL$* -xassembler-with-cpp \ - -c $(gcc_srcdir)/config/$(LIB1ASMSRC) +$(lib1asmfuncs-s-o): %_s$(objext): $(srcdir)/config/$(LIB1ASMSRC) + $(gcc_s_compile) -DL$* -xassembler-with-cpp -c $< libgcc-s-objects += $(lib1asmfuncs-s-o) else lib1asmfuncs-o = $(patsubst %,%$(objext),$(LIB1ASMFUNCS)) -$(lib1asmfuncs-o): %$(objext): $(gcc_srcdir)/config/$(LIB1ASMSRC) - $(gcc_compile) -DL$* -xassembler-with-cpp \ - -c $(gcc_srcdir)/config/$(LIB1ASMSRC) +$(lib1asmfuncs-o): %$(objext): $(srcdir)/config/$(LIB1ASMSRC) + $(gcc_compile) -DL$* -xassembler-with-cpp -c $< libgcc-objects += $(lib1asmfuncs-o) endif diff --git a/libgcc/config.host b/libgcc/config.host index 01e2f21a797..0a05ea184b0 100644 --- a/libgcc/config.host +++ b/libgcc/config.host @@ -306,22 +306,25 @@ alpha*-dec-*vms*) md_unwind_header=alpha/vms-unwind.h ;; arm-wrs-vxworks) - tmake_file="$tmake_file t-fdpbit" + tmake_file="$tmake_file arm/t-arm arm/t-vxworks t-fdpbit" extra_parts="$extra_parts crti.o crtn.o" ;; arm*-*-freebsd*) - tmake_file="$tmake_file t-fdpbit" + tmake_file="$tmake_file arm/t-arm arm/t-strongarm-elf t-fdpbit" ;; arm*-*-netbsdelf*) - tmake_file="$tmake_file t-slibgcc-gld-nover" + tmake_file="$tmake_file arm/t-arm t-slibgcc-gld-nover" ;; arm*-*-linux*) # ARM GNU/Linux with ELF - tmake_file="${tmake_file} t-fixedpoint-gnu-prefix" + tmake_file="${tmake_file} arm/t-arm t-fixedpoint-gnu-prefix" case ${host} in arm*-*-linux-*eabi) - tmake_file="${tmake_file} arm/t-bpabi t-slibgcc-libgcc" + tmake_file="${tmake_file} arm/t-elf arm/t-bpabi arm/t-linux-eabi t-slibgcc-libgcc" unwind_header=config/arm/unwind-arm.h ;; + *) + tmake_file="$tmake_file arm/t-linux" + ;; esac tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp" ;; @@ -333,15 +336,15 @@ arm*-*-uclinux*) # ARM ucLinux unwind_header=config/arm/unwind-arm.h ;; esac - tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp" + tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp" extra_parts="$extra_parts crti.o crtn.o" ;; arm*-*-ecos-elf) - tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp" + tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp" extra_parts="$extra_parts crti.o crtn.o" ;; arm*-*-eabi* | arm*-*-symbianelf* ) - tmake_file="${tmake_file} t-fixedpoint-gnu-prefix" + tmake_file="${tmake_file} arm/t-arm arm/t-elf t-fixedpoint-gnu-prefix" case ${host} in arm*-*-eabi*) tmake_file="${tmake_file} arm/t-bpabi" @@ -356,17 +359,18 @@ arm*-*-eabi* | arm*-*-symbianelf* ) unwind_header=config/arm/unwind-arm.h ;; arm*-*-rtems*) - tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp" + tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp" extra_parts="$extra_parts crti.o crtn.o" ;; arm*-*-elf) - tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp" + tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp" extra_parts="$extra_parts crti.o crtn.o" ;; arm*-wince-pe*) + tmake_file="$tmake_file arm/t-arm arm/t-wince-pe" ;; avr-*-rtems*) - tmake_file=t-fpbit + tmake_file="$tmake_file avr/t-avr t-fpbit" # Don't use default. extra_parts= ;; @@ -375,27 +379,27 @@ avr-*-*) tmake_file="${cpu_type}/t-avr t-fpbit" ;; bfin*-elf*) - tmake_file="bfin/t-crtlibid bfin/t-crtstuff t-fdpbit" + tmake_file="bfin/t-bfin bfin/t-crtlibid bfin/t-crtstuff t-fdpbit" extra_parts="$extra_parts crtbeginS.o crtendS.o crti.o crtn.o crtlibid.o" ;; bfin*-uclinux*) - tmake_file="bfin/t-crtlibid bfin/t-crtstuff t-fdpbit" + tmake_file="bfin/t-bfin bfin/t-crtlibid bfin/t-crtstuff t-fdpbit" extra_parts="$extra_parts crtbeginS.o crtendS.o crtlibid.o" md_unwind_header=bfin/linux-unwind.h ;; bfin*-linux-uclibc*) - tmake_file="$tmake_file bfin/t-crtstuff t-fdpbit bfin/t-linux" + tmake_file="$tmake_file bfin/t-bfin bfin/t-crtstuff t-fdpbit bfin/t-linux" # No need to build crtbeginT.o on uClibc systems. Should probably # be moved to the OS specific section above. extra_parts="crtbegin.o crtbeginS.o crtend.o crtendS.o" md_unwind_header=bfin/linux-unwind.h ;; bfin*-rtems*) - tmake_file="$tmake_file t-fdpbit" + tmake_file="$tmake_file bfin/t-bfin t-fdpbit" extra_parts="$extra_parts crti.o crtn.o" ;; bfin*-*) - tmake_file="$tmake_file t-fdpbit" + tmake_file="$tmake_file bfin/t-bfin t-fdpbit" extra_parts="crtbegin.o crtend.o crti.o crtn.o" ;; crisv32-*-elf) @@ -415,10 +419,8 @@ cris-*-none) cris-*-linux* | crisv32-*-linux*) tmake_file="$tmake_file t-fdpbit cris/t-linux" ;; -fido-*-elf) - ;; fr30-*-elf) - tmake_file="$tmake_file t-fdpbit" + tmake_file="$tmake_file fr30/t-fr30 t-fdpbit" extra_parts="$extra_parts crti.o crtn.o" ;; frv-*-elf) @@ -427,20 +429,21 @@ frv-*-elf) extra_parts="frvbegin.o frvend.o" ;; frv-*-*linux*) - tmake_file="$tmake_file t-fdpbit frv/t-linux" + tmake_file="$tmake_file frv/t-frv frv/t-linux t-fdpbit" ;; h8300-*-rtems*) - tmake_file="$tmake_file t-fpbit" + tmake_file="$tmake_file h8300/t-h8300 t-fpbit" extra_parts="$extra_parts crti.o crtn.o" ;; h8300-*-elf*) - tmake_file="$tmake_file t-fpbit" + tmake_file="$tmake_file h8300/t-h8300 t-fpbit" extra_parts="$extra_parts crti.o crtn.o" ;; hppa*64*-*-linux*) + tmake_file="$tmake_file pa/t-linux pa/t-linux64" ;; hppa*-*-linux*) - tmake_file="$tmake_file t-slibgcc-libgcc" + tmake_file="$tmake_file pa/t-linux t-slibgcc-libgcc" # Set the libgcc version number if test x$enable_sjlj_exceptions = xyes; then tmake_file="$tmake_file pa/t-slibgcc-sjlj-ver" @@ -565,7 +568,7 @@ i[34567]86-*-cygwin*) else tmake_dlldir_file="i386/t-dlldir-x" fi - tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-cygming i386/t-cygwin i386/t-crtfm t-dfprules" + tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-cygming i386/t-cygwin i386/t-crtfm i386/t-chkstk t-dfprules" case ${target_thread_file} in posix) tmake_file="i386/t-mingw-pthread $tmake_file" @@ -586,7 +589,7 @@ i[34567]86-*-mingw*) else tmake_dlldir_file="i386/t-dlldir-x" fi - tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-cygming i386/t-mingw32 i386/t-crtfm t-dfprules" + tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-cygming i386/t-mingw32 i386/t-crtfm i386/t-chkstk t-dfprules" md_unwind_header=i386/w32-unwind.h ;; x86_64-*-mingw*) @@ -602,10 +605,11 @@ x86_64-*-mingw*) else tmake_dlldir_file="i386/t-dlldir-x" fi - tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-mingw32 t-dfprules i386/t-crtfm" + tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-mingw32 t-dfprules i386/t-crtfm i386/t-chkstk" extra_parts="$extra_parts crtfastmath.o" ;; i[34567]86-*-interix3*) + tmake_file="$tmake_file i386/t-chkstk" ;; ia64*-*-elf*) extra_parts="$extra_parts crtbeginS.o crtendS.o crtfastmath.o" @@ -625,10 +629,10 @@ ia64*-*-linux*) md_unwind_header=ia64/linux-unwind.h ;; ia64*-*-hpux*) - tmake_file="ia64/t-hpux t-slibgcc ia64/t-slibgcc-hpux t-slibgcc-hpux" + tmake_file="ia64/t-ia64 ia64/t-hpux t-slibgcc ia64/t-slibgcc-hpux t-slibgcc-hpux" ;; ia64-hp-*vms*) - tmake_file="$tmake_file ia64/t-eh-ia64 ia64/t-vms t-slibgcc-vms" + tmake_file="$tmake_file ia64/t-ia64 ia64/t-eh-ia64 ia64/t-vms t-slibgcc-vms" extra_parts="$extra_parts crtinitS.o" md_unwind_header=ia64/vms-unwind.h ;; @@ -660,18 +664,21 @@ m32r-*-linux*) m32rle-*-linux*) tmake_file="$tmake_file m32r/t-linux t-fdpbit" ;; -m68k-*-elf*) +m68k-*-elf* | fido-*-elf) + tmake_file="$tmake_file m68k/t-floatlib" ;; m68k*-*-netbsdelf*) ;; m68k*-*-openbsd*) ;; m68k-*-uclinux*) # Motorola m68k/ColdFire running uClinux with uClibc + tmake_file="$tmake_file m68k/t-floatlib" md_unwind_header=m68k/linux-unwind.h ;; m68k-*-linux*) # Motorola m68k's running GNU/Linux # with ELF format using glibc 2 # aka the GNU/Linux C library 6. + tmake_file="$tmake_file m68k/t-floatlib" # If not configured with --enable-sjlj-exceptions, bump the # libgcc version number. if test x$enable_sjlj_exceptions != xyes; then @@ -680,10 +687,11 @@ m68k-*-linux*) # Motorola m68k's running GNU/Linux md_unwind_header=m68k/linux-unwind.h ;; m68k-*-rtems*) + tmake_file="$tmake_file m68k/t-floatlib" extra_parts="$extra_parts crti.o crtn.o" ;; mcore-*-elf) - tmake_file=t-fdpbit + tmake_file="mcore/t-mcore t-fdpbit" extra_parts="$extra_parts crti.o crtn.o" ;; microblaze*-linux*) @@ -905,6 +913,10 @@ sh-*-elf* | sh[12346l]*-*-elf*) libic_invalidate_array_4-200.a \ libic_invalidate_array_4a.a \ libgcc-Os-4-200.a libgcc-4-300.a" + case ${host} in sh64*-*-*) + tmake_file="$tmake_file sh/t-sh64" + ;; + esac case ${host} in sh*-superh-elf) tmake_file="$tmake_file sh/t-superh" @@ -913,23 +925,33 @@ sh-*-elf* | sh[12346l]*-*-elf*) esac ;; sh-*-linux* | sh[2346lbe]*-*-linux*) - tmake_file="${tmake_file} t-slibgcc-libgcc sh/t-linux t-fdpbit" + tmake_file="${tmake_file} sh/t-sh t-slibgcc-libgcc sh/t-linux t-fdpbit" + case ${host} in sh64*-*-linux*) + tmake_file="$tmake_file sh/t-sh64" + ;; + esac md_unwind_header=sh/linux-unwind.h ;; sh-*-netbsdelf* | shl*-*-netbsdelf* | sh5-*-netbsd* | sh5l*-*-netbsd* | \ sh64-*-netbsd* | sh64l*-*-netbsd*) + tmake_file="$tmake_file sh/t-sh sh/t-netbsd" + case ${host} in + sh5*-*-netbsd* | sh64*-netbsd*) + tmake_file="$tmake_file sh/t-sh64" + ;; + esac # NetBSD's C library includes a fast software FP library that # has support for setting/setting the rounding mode, exception # mask, etc. Therefore, we don't want to include software FP # in libgcc. ;; sh-*-rtems*) - tmake_file="$tmake_file t-crtstuff-pic t-fdpbit" + tmake_file="$tmake_file sh/t-sh t-crtstuff-pic t-fdpbit" extra_parts="$extra_parts crt1.o crti.o crtn.o crtbeginS.o crtendS.o \ $sh_ic_extra_parts $sh_opt_extra_parts" ;; sh-wrs-vxworks) - tmake_file="$tmake_file t-crtstuff-pic t-fdpbit" + tmake_file="$tmake_file sh/t-sh t-crtstuff-pic t-fdpbit" ;; sparc-*-netbsdelf*) ;; @@ -956,6 +978,13 @@ sparc-*-linux*) # SPARC's running GNU/Linux, libc6 tmake_file="${tmake_file} sparc/t-linux" ;; esac + case ${host} in + *-leon[3-9]*) + ;; + *) + tmake_file="$tmake_file sparc/t-softmul" + ;; + esac extra_parts="$extra_parts crtfastmath.o" md_unwind_header=sparc/linux-unwind.h ;; @@ -1007,9 +1036,10 @@ tic6x-*-elf) unwind_header=config/c6x/unwind-c6x.h ;; v850*-*-*) - tmake_file=t-fdpbit + tmake_file="v850/t-v850 t-fdpbit" ;; vax-*-linux*) + tmake_file="$tmake_file vax/t-linux" ;; vax-*-netbsdelf*) ;; @@ -1032,6 +1062,7 @@ am33_2.0-*-linux*) tmake_file="$tmake_file t-fdpbit" ;; m32c-*-elf*|m32c-*-rtems*) + tmake_file="$tmake_file m32c/t-m32c" ;; mep*-*-*) tmake_file="mep/t-mep t-fdpbit" diff --git a/libgcc/config/arm/bpabi-v6m.S b/libgcc/config/arm/bpabi-v6m.S new file mode 100644 index 00000000000..4ecea6da5a6 --- /dev/null +++ b/libgcc/config/arm/bpabi-v6m.S @@ -0,0 +1,318 @@ +/* Miscellaneous BPABI functions. ARMv6M implementation + + Copyright (C) 2006, 2008, 2009, 2010 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifdef __ARM_EABI__ +/* Some attributes that are common to all routines in this file. */ + /* Tag_ABI_align_needed: This code does not require 8-byte + alignment from the caller. */ + /* .eabi_attribute 24, 0 -- default setting. */ + /* Tag_ABI_align_preserved: This code preserves 8-byte + alignment in any callee. */ + .eabi_attribute 25, 1 +#endif /* __ARM_EABI__ */ + +#ifdef L_aeabi_lcmp + +FUNC_START aeabi_lcmp + cmp xxh, yyh + beq 1f + bgt 2f + mov r0, #1 + neg r0, r0 + RET +2: + mov r0, #1 + RET +1: + sub r0, xxl, yyl + beq 1f + bhi 2f + mov r0, #1 + neg r0, r0 + RET +2: + mov r0, #1 +1: + RET + FUNC_END aeabi_lcmp + +#endif /* L_aeabi_lcmp */ + +#ifdef L_aeabi_ulcmp + +FUNC_START aeabi_ulcmp + cmp xxh, yyh + bne 1f + sub r0, xxl, yyl + beq 2f +1: + bcs 1f + mov r0, #1 + neg r0, r0 + RET +1: + mov r0, #1 +2: + RET + FUNC_END aeabi_ulcmp + +#endif /* L_aeabi_ulcmp */ + +.macro test_div_by_zero signed + cmp yyh, #0 + bne 7f + cmp yyl, #0 + bne 7f + cmp xxh, #0 + bne 2f + cmp xxl, #0 +2: + .ifc \signed, unsigned + beq 3f + mov xxh, #0 + mvn xxh, xxh @ 0xffffffff + mov xxl, xxh +3: + .else + beq 5f + blt 6f + mov xxl, #0 + mvn xxl, xxl @ 0xffffffff + lsr xxh, xxl, #1 @ 0x7fffffff + b 5f +6: mov xxh, #0x80 + lsl xxh, xxh, #24 @ 0x80000000 + mov xxl, #0 +5: + .endif + @ tailcalls are tricky on v6-m. + push {r0, r1, r2} + ldr r0, 1f + adr r1, 1f + add r0, r1 + str r0, [sp, #8] + @ We know we are not on armv4t, so pop pc is safe. + pop {r0, r1, pc} + .align 2 +1: + .word __aeabi_ldiv0 - 1b +7: +.endm + +#ifdef L_aeabi_ldivmod + +FUNC_START aeabi_ldivmod + test_div_by_zero signed + + push {r0, r1} + mov r0, sp + push {r0, lr} + ldr r0, [sp, #8] + bl SYM(__gnu_ldivmod_helper) + ldr r3, [sp, #4] + mov lr, r3 + add sp, sp, #8 + pop {r2, r3} + RET + FUNC_END aeabi_ldivmod + +#endif /* L_aeabi_ldivmod */ + +#ifdef L_aeabi_uldivmod + +FUNC_START aeabi_uldivmod + test_div_by_zero unsigned + + push {r0, r1} + mov r0, sp + push {r0, lr} + ldr r0, [sp, #8] + bl SYM(__gnu_uldivmod_helper) + ldr r3, [sp, #4] + mov lr, r3 + add sp, sp, #8 + pop {r2, r3} + RET + FUNC_END aeabi_uldivmod + +#endif /* L_aeabi_uldivmod */ + +#ifdef L_arm_addsubsf3 + +FUNC_START aeabi_frsub + + push {r4, lr} + mov r4, #1 + lsl r4, #31 + eor r0, r0, r4 + bl __aeabi_fadd + pop {r4, pc} + + FUNC_END aeabi_frsub + +#endif /* L_arm_addsubsf3 */ + +#ifdef L_arm_cmpsf2 + +FUNC_START aeabi_cfrcmple + + mov ip, r0 + mov r0, r1 + mov r1, ip + b 6f + +FUNC_START aeabi_cfcmpeq +FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq + + @ The status-returning routines are required to preserve all + @ registers except ip, lr, and cpsr. +6: push {r0, r1, r2, r3, r4, lr} + bl __lesf2 + @ Set the Z flag correctly, and the C flag unconditionally. + cmp r0, #0 + @ Clear the C flag if the return value was -1, indicating + @ that the first operand was smaller than the second. + bmi 1f + mov r1, #0 + cmn r0, r1 +1: + pop {r0, r1, r2, r3, r4, pc} + + FUNC_END aeabi_cfcmple + FUNC_END aeabi_cfcmpeq + FUNC_END aeabi_cfrcmple + +FUNC_START aeabi_fcmpeq + + push {r4, lr} + bl __eqsf2 + neg r0, r0 + add r0, r0, #1 + pop {r4, pc} + + FUNC_END aeabi_fcmpeq + +.macro COMPARISON cond, helper, mode=sf2 +FUNC_START aeabi_fcmp\cond + + push {r4, lr} + bl __\helper\mode + cmp r0, #0 + b\cond 1f + mov r0, #0 + pop {r4, pc} +1: + mov r0, #1 + pop {r4, pc} + + FUNC_END aeabi_fcmp\cond +.endm + +COMPARISON lt, le +COMPARISON le, le +COMPARISON gt, ge +COMPARISON ge, ge + +#endif /* L_arm_cmpsf2 */ + +#ifdef L_arm_addsubdf3 + +FUNC_START aeabi_drsub + + push {r4, lr} + mov r4, #1 + lsl r4, #31 + eor xxh, xxh, r4 + bl __aeabi_dadd + pop {r4, pc} + + FUNC_END aeabi_drsub + +#endif /* L_arm_addsubdf3 */ + +#ifdef L_arm_cmpdf2 + +FUNC_START aeabi_cdrcmple + + mov ip, r0 + mov r0, r2 + mov r2, ip + mov ip, r1 + mov r1, r3 + mov r3, ip + b 6f + +FUNC_START aeabi_cdcmpeq +FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq + + @ The status-returning routines are required to preserve all + @ registers except ip, lr, and cpsr. +6: push {r0, r1, r2, r3, r4, lr} + bl __ledf2 + @ Set the Z flag correctly, and the C flag unconditionally. + cmp r0, #0 + @ Clear the C flag if the return value was -1, indicating + @ that the first operand was smaller than the second. + bmi 1f + mov r1, #0 + cmn r0, r1 +1: + pop {r0, r1, r2, r3, r4, pc} + + FUNC_END aeabi_cdcmple + FUNC_END aeabi_cdcmpeq + FUNC_END aeabi_cdrcmple + +FUNC_START aeabi_dcmpeq + + push {r4, lr} + bl __eqdf2 + neg r0, r0 + add r0, r0, #1 + pop {r4, pc} + + FUNC_END aeabi_dcmpeq + +.macro COMPARISON cond, helper, mode=df2 +FUNC_START aeabi_dcmp\cond + + push {r4, lr} + bl __\helper\mode + cmp r0, #0 + b\cond 1f + mov r0, #0 + pop {r4, pc} +1: + mov r0, #1 + pop {r4, pc} + + FUNC_END aeabi_dcmp\cond +.endm + +COMPARISON lt, le +COMPARISON le, le +COMPARISON gt, ge +COMPARISON ge, ge + +#endif /* L_arm_cmpdf2 */ diff --git a/libgcc/config/arm/bpabi.S b/libgcc/config/arm/bpabi.S new file mode 100644 index 00000000000..2ff338927fa --- /dev/null +++ b/libgcc/config/arm/bpabi.S @@ -0,0 +1,163 @@ +/* Miscellaneous BPABI functions. + + Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by CodeSourcery, LLC. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifdef __ARM_EABI__ +/* Some attributes that are common to all routines in this file. */ + /* Tag_ABI_align_needed: This code does not require 8-byte + alignment from the caller. */ + /* .eabi_attribute 24, 0 -- default setting. */ + /* Tag_ABI_align_preserved: This code preserves 8-byte + alignment in any callee. */ + .eabi_attribute 25, 1 +#endif /* __ARM_EABI__ */ + +#ifdef L_aeabi_lcmp + +ARM_FUNC_START aeabi_lcmp + cmp xxh, yyh + do_it lt + movlt r0, #-1 + do_it gt + movgt r0, #1 + do_it ne + RETc(ne) + subs r0, xxl, yyl + do_it lo + movlo r0, #-1 + do_it hi + movhi r0, #1 + RET + FUNC_END aeabi_lcmp + +#endif /* L_aeabi_lcmp */ + +#ifdef L_aeabi_ulcmp + +ARM_FUNC_START aeabi_ulcmp + cmp xxh, yyh + do_it lo + movlo r0, #-1 + do_it hi + movhi r0, #1 + do_it ne + RETc(ne) + cmp xxl, yyl + do_it lo + movlo r0, #-1 + do_it hi + movhi r0, #1 + do_it eq + moveq r0, #0 + RET + FUNC_END aeabi_ulcmp + +#endif /* L_aeabi_ulcmp */ + +.macro test_div_by_zero signed +/* Tail-call to divide-by-zero handlers which may be overridden by the user, + so unwinding works properly. */ +#if defined(__thumb2__) + cbnz yyh, 1f + cbnz yyl, 1f + cmp xxh, #0 + do_it eq + cmpeq xxl, #0 + .ifc \signed, unsigned + beq 2f + mov xxh, #0xffffffff + mov xxl, xxh +2: + .else + do_it lt, t + movlt xxl, #0 + movlt xxh, #0x80000000 + do_it gt, t + movgt xxh, #0x7fffffff + movgt xxl, #0xffffffff + .endif + b SYM (__aeabi_ldiv0) __PLT__ +1: +#else + /* Note: Thumb-1 code calls via an ARM shim on processors which + support ARM mode. */ + cmp yyh, #0 + cmpeq yyl, #0 + bne 2f + cmp xxh, #0 + cmpeq xxl, #0 + .ifc \signed, unsigned + movne xxh, #0xffffffff + movne xxl, #0xffffffff + .else + movlt xxh, #0x80000000 + movlt xxl, #0 + movgt xxh, #0x7fffffff + movgt xxl, #0xffffffff + .endif + b SYM (__aeabi_ldiv0) __PLT__ +2: +#endif +.endm + +#ifdef L_aeabi_ldivmod + +ARM_FUNC_START aeabi_ldivmod + test_div_by_zero signed + + sub sp, sp, #8 +#if defined(__thumb2__) + mov ip, sp + push {ip, lr} +#else + do_push {sp, lr} +#endif + bl SYM(__gnu_ldivmod_helper) __PLT__ + ldr lr, [sp, #4] + add sp, sp, #8 + do_pop {r2, r3} + RET + +#endif /* L_aeabi_ldivmod */ + +#ifdef L_aeabi_uldivmod + +ARM_FUNC_START aeabi_uldivmod + test_div_by_zero unsigned + + sub sp, sp, #8 +#if defined(__thumb2__) + mov ip, sp + push {ip, lr} +#else + do_push {sp, lr} +#endif + bl SYM(__gnu_uldivmod_helper) __PLT__ + ldr lr, [sp, #4] + add sp, sp, #8 + do_pop {r2, r3} + RET + +#endif /* L_aeabi_divmod */ + diff --git a/libgcc/config/arm/ieee754-df.S b/libgcc/config/arm/ieee754-df.S new file mode 100644 index 00000000000..eb0c38632d0 --- /dev/null +++ b/libgcc/config/arm/ieee754-df.S @@ -0,0 +1,1447 @@ +/* ieee754-df.S double-precision floating point support for ARM + + Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc. + Contributed by Nicolas Pitre (nico@cam.org) + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* + * Notes: + * + * The goal of this code is to be as fast as possible. This is + * not meant to be easy to understand for the casual reader. + * For slightly simpler code please see the single precision version + * of this file. + * + * Only the default rounding mode is intended for best performances. + * Exceptions aren't supported yet, but that can be added quite easily + * if necessary without impacting performances. + */ + + +@ For FPA, float words are always big-endian. +@ For VFP, floats words follow the memory system mode. +#if defined(__VFP_FP__) && !defined(__ARMEB__) +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#else +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#endif + + +#ifdef L_arm_negdf2 + +ARM_FUNC_START negdf2 +ARM_FUNC_ALIAS aeabi_dneg negdf2 + + @ flip sign bit + eor xh, xh, #0x80000000 + RET + + FUNC_END aeabi_dneg + FUNC_END negdf2 + +#endif + +#ifdef L_arm_addsubdf3 + +ARM_FUNC_START aeabi_drsub + + eor xh, xh, #0x80000000 @ flip sign bit of first arg + b 1f + +ARM_FUNC_START subdf3 +ARM_FUNC_ALIAS aeabi_dsub subdf3 + + eor yh, yh, #0x80000000 @ flip sign bit of second arg +#if defined(__INTERWORKING_STUBS__) + b 1f @ Skip Thumb-code prologue +#endif + +ARM_FUNC_START adddf3 +ARM_FUNC_ALIAS aeabi_dadd adddf3 + +1: do_push {r4, r5, lr} + + @ Look for zeroes, equal values, INF, or NAN. + shift1 lsl, r4, xh, #1 + shift1 lsl, r5, yh, #1 + teq r4, r5 + do_it eq + teqeq xl, yl + do_it ne, ttt + COND(orr,s,ne) ip, r4, xl + COND(orr,s,ne) ip, r5, yl + COND(mvn,s,ne) ip, r4, asr #21 + COND(mvn,s,ne) ip, r5, asr #21 + beq LSYM(Lad_s) + + @ Compute exponent difference. Make largest exponent in r4, + @ corresponding arg in xh-xl, and positive exponent difference in r5. + shift1 lsr, r4, r4, #21 + rsbs r5, r4, r5, lsr #21 + do_it lt + rsblt r5, r5, #0 + ble 1f + add r4, r4, r5 + eor yl, xl, yl + eor yh, xh, yh + eor xl, yl, xl + eor xh, yh, xh + eor yl, xl, yl + eor yh, xh, yh +1: + @ If exponent difference is too large, return largest argument + @ already in xh-xl. We need up to 54 bit to handle proper rounding + @ of 0x1p54 - 1.1. + cmp r5, #54 + do_it hi + RETLDM "r4, r5" hi + + @ Convert mantissa to signed integer. + tst xh, #0x80000000 + mov xh, xh, lsl #12 + mov ip, #0x00100000 + orr xh, ip, xh, lsr #12 + beq 1f +#if defined(__thumb2__) + negs xl, xl + sbc xh, xh, xh, lsl #1 +#else + rsbs xl, xl, #0 + rsc xh, xh, #0 +#endif +1: + tst yh, #0x80000000 + mov yh, yh, lsl #12 + orr yh, ip, yh, lsr #12 + beq 1f +#if defined(__thumb2__) + negs yl, yl + sbc yh, yh, yh, lsl #1 +#else + rsbs yl, yl, #0 + rsc yh, yh, #0 +#endif +1: + @ If exponent == difference, one or both args were denormalized. + @ Since this is not common case, rescale them off line. + teq r4, r5 + beq LSYM(Lad_d) +LSYM(Lad_x): + + @ Compensate for the exponent overlapping the mantissa MSB added later + sub r4, r4, #1 + + @ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip. + rsbs lr, r5, #32 + blt 1f + shift1 lsl, ip, yl, lr + shiftop adds xl xl yl lsr r5 yl + adc xh, xh, #0 + shiftop adds xl xl yh lsl lr yl + shiftop adcs xh xh yh asr r5 yh + b 2f +1: sub r5, r5, #32 + add lr, lr, #32 + cmp yl, #1 + shift1 lsl,ip, yh, lr + do_it cs + orrcs ip, ip, #2 @ 2 not 1, to allow lsr #1 later + shiftop adds xl xl yh asr r5 yh + adcs xh, xh, yh, asr #31 +2: + @ We now have a result in xh-xl-ip. + @ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above) + and r5, xh, #0x80000000 + bpl LSYM(Lad_p) +#if defined(__thumb2__) + mov lr, #0 + negs ip, ip + sbcs xl, lr, xl + sbc xh, lr, xh +#else + rsbs ip, ip, #0 + rscs xl, xl, #0 + rsc xh, xh, #0 +#endif + + @ Determine how to normalize the result. +LSYM(Lad_p): + cmp xh, #0x00100000 + bcc LSYM(Lad_a) + cmp xh, #0x00200000 + bcc LSYM(Lad_e) + + @ Result needs to be shifted right. + movs xh, xh, lsr #1 + movs xl, xl, rrx + mov ip, ip, rrx + add r4, r4, #1 + + @ Make sure we did not bust our exponent. + mov r2, r4, lsl #21 + cmn r2, #(2 << 21) + bcs LSYM(Lad_o) + + @ Our result is now properly aligned into xh-xl, remaining bits in ip. + @ Round with MSB of ip. If halfway between two numbers, round towards + @ LSB of xl = 0. + @ Pack final result together. +LSYM(Lad_e): + cmp ip, #0x80000000 + do_it eq + COND(mov,s,eq) ip, xl, lsr #1 + adcs xl, xl, #0 + adc xh, xh, r4, lsl #20 + orr xh, xh, r5 + RETLDM "r4, r5" + + @ Result must be shifted left and exponent adjusted. +LSYM(Lad_a): + movs ip, ip, lsl #1 + adcs xl, xl, xl + adc xh, xh, xh + tst xh, #0x00100000 + sub r4, r4, #1 + bne LSYM(Lad_e) + + @ No rounding necessary since ip will always be 0 at this point. +LSYM(Lad_l): + +#if __ARM_ARCH__ < 5 + + teq xh, #0 + movne r3, #20 + moveq r3, #52 + moveq xh, xl + moveq xl, #0 + mov r2, xh + cmp r2, #(1 << 16) + movhs r2, r2, lsr #16 + subhs r3, r3, #16 + cmp r2, #(1 << 8) + movhs r2, r2, lsr #8 + subhs r3, r3, #8 + cmp r2, #(1 << 4) + movhs r2, r2, lsr #4 + subhs r3, r3, #4 + cmp r2, #(1 << 2) + subhs r3, r3, #2 + sublo r3, r3, r2, lsr #1 + sub r3, r3, r2, lsr #3 + +#else + + teq xh, #0 + do_it eq, t + moveq xh, xl + moveq xl, #0 + clz r3, xh + do_it eq + addeq r3, r3, #32 + sub r3, r3, #11 + +#endif + + @ determine how to shift the value. + subs r2, r3, #32 + bge 2f + adds r2, r2, #12 + ble 1f + + @ shift value left 21 to 31 bits, or actually right 11 to 1 bits + @ since a register switch happened above. + add ip, r2, #20 + rsb r2, r2, #12 + shift1 lsl, xl, xh, ip + shift1 lsr, xh, xh, r2 + b 3f + + @ actually shift value left 1 to 20 bits, which might also represent + @ 32 to 52 bits if counting the register switch that happened earlier. +1: add r2, r2, #20 +2: do_it le + rsble ip, r2, #32 + shift1 lsl, xh, xh, r2 +#if defined(__thumb2__) + lsr ip, xl, ip + itt le + orrle xh, xh, ip + lslle xl, xl, r2 +#else + orrle xh, xh, xl, lsr ip + movle xl, xl, lsl r2 +#endif + + @ adjust exponent accordingly. +3: subs r4, r4, r3 + do_it ge, tt + addge xh, xh, r4, lsl #20 + orrge xh, xh, r5 + RETLDM "r4, r5" ge + + @ Exponent too small, denormalize result. + @ Find out proper shift value. + mvn r4, r4 + subs r4, r4, #31 + bge 2f + adds r4, r4, #12 + bgt 1f + + @ shift result right of 1 to 20 bits, sign is in r5. + add r4, r4, #20 + rsb r2, r4, #32 + shift1 lsr, xl, xl, r4 + shiftop orr xl xl xh lsl r2 yh + shiftop orr xh r5 xh lsr r4 yh + RETLDM "r4, r5" + + @ shift result right of 21 to 31 bits, or left 11 to 1 bits after + @ a register switch from xh to xl. +1: rsb r4, r4, #12 + rsb r2, r4, #32 + shift1 lsr, xl, xl, r2 + shiftop orr xl xl xh lsl r4 yh + mov xh, r5 + RETLDM "r4, r5" + + @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch + @ from xh to xl. +2: shift1 lsr, xl, xh, r4 + mov xh, r5 + RETLDM "r4, r5" + + @ Adjust exponents for denormalized arguments. + @ Note that r4 must not remain equal to 0. +LSYM(Lad_d): + teq r4, #0 + eor yh, yh, #0x00100000 + do_it eq, te + eoreq xh, xh, #0x00100000 + addeq r4, r4, #1 + subne r5, r5, #1 + b LSYM(Lad_x) + + +LSYM(Lad_s): + mvns ip, r4, asr #21 + do_it ne + COND(mvn,s,ne) ip, r5, asr #21 + beq LSYM(Lad_i) + + teq r4, r5 + do_it eq + teqeq xl, yl + beq 1f + + @ Result is x + 0.0 = x or 0.0 + y = y. + orrs ip, r4, xl + do_it eq, t + moveq xh, yh + moveq xl, yl + RETLDM "r4, r5" + +1: teq xh, yh + + @ Result is x - x = 0. + do_it ne, tt + movne xh, #0 + movne xl, #0 + RETLDM "r4, r5" ne + + @ Result is x + x = 2x. + movs ip, r4, lsr #21 + bne 2f + movs xl, xl, lsl #1 + adcs xh, xh, xh + do_it cs + orrcs xh, xh, #0x80000000 + RETLDM "r4, r5" +2: adds r4, r4, #(2 << 21) + do_it cc, t + addcc xh, xh, #(1 << 20) + RETLDM "r4, r5" cc + and r5, xh, #0x80000000 + + @ Overflow: return INF. +LSYM(Lad_o): + orr xh, r5, #0x7f000000 + orr xh, xh, #0x00f00000 + mov xl, #0 + RETLDM "r4, r5" + + @ At least one of x or y is INF/NAN. + @ if xh-xl != INF/NAN: return yh-yl (which is INF/NAN) + @ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN) + @ if either is NAN: return NAN + @ if opposite sign: return NAN + @ otherwise return xh-xl (which is INF or -INF) +LSYM(Lad_i): + mvns ip, r4, asr #21 + do_it ne, te + movne xh, yh + movne xl, yl + COND(mvn,s,eq) ip, r5, asr #21 + do_it ne, t + movne yh, xh + movne yl, xl + orrs r4, xl, xh, lsl #12 + do_it eq, te + COND(orr,s,eq) r5, yl, yh, lsl #12 + teqeq xh, yh + orrne xh, xh, #0x00080000 @ quiet NAN + RETLDM "r4, r5" + + FUNC_END aeabi_dsub + FUNC_END subdf3 + FUNC_END aeabi_dadd + FUNC_END adddf3 + +ARM_FUNC_START floatunsidf +ARM_FUNC_ALIAS aeabi_ui2d floatunsidf + + teq r0, #0 + do_it eq, t + moveq r1, #0 + RETc(eq) + do_push {r4, r5, lr} + mov r4, #0x400 @ initial exponent + add r4, r4, #(52-1 - 1) + mov r5, #0 @ sign bit is 0 + .ifnc xl, r0 + mov xl, r0 + .endif + mov xh, #0 + b LSYM(Lad_l) + + FUNC_END aeabi_ui2d + FUNC_END floatunsidf + +ARM_FUNC_START floatsidf +ARM_FUNC_ALIAS aeabi_i2d floatsidf + + teq r0, #0 + do_it eq, t + moveq r1, #0 + RETc(eq) + do_push {r4, r5, lr} + mov r4, #0x400 @ initial exponent + add r4, r4, #(52-1 - 1) + ands r5, r0, #0x80000000 @ sign bit in r5 + do_it mi + rsbmi r0, r0, #0 @ absolute value + .ifnc xl, r0 + mov xl, r0 + .endif + mov xh, #0 + b LSYM(Lad_l) + + FUNC_END aeabi_i2d + FUNC_END floatsidf + +ARM_FUNC_START extendsfdf2 +ARM_FUNC_ALIAS aeabi_f2d extendsfdf2 + + movs r2, r0, lsl #1 @ toss sign bit + mov xh, r2, asr #3 @ stretch exponent + mov xh, xh, rrx @ retrieve sign bit + mov xl, r2, lsl #28 @ retrieve remaining bits + do_it ne, ttt + COND(and,s,ne) r3, r2, #0xff000000 @ isolate exponent + teqne r3, #0xff000000 @ if not 0, check if INF or NAN + eorne xh, xh, #0x38000000 @ fixup exponent otherwise. + RETc(ne) @ and return it. + + teq r2, #0 @ if actually 0 + do_it ne, e + teqne r3, #0xff000000 @ or INF or NAN + RETc(eq) @ we are done already. + + @ value was denormalized. We can normalize it now. + do_push {r4, r5, lr} + mov r4, #0x380 @ setup corresponding exponent + and r5, xh, #0x80000000 @ move sign bit in r5 + bic xh, xh, #0x80000000 + b LSYM(Lad_l) + + FUNC_END aeabi_f2d + FUNC_END extendsfdf2 + +ARM_FUNC_START floatundidf +ARM_FUNC_ALIAS aeabi_ul2d floatundidf + + orrs r2, r0, r1 +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + do_it eq, t + mvfeqd f0, #0.0 +#else + do_it eq +#endif + RETc(eq) + +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + @ For hard FPA code we want to return via the tail below so that + @ we can return the result in f0 as well as in r0/r1 for backwards + @ compatibility. + adr ip, LSYM(f0_ret) + @ Push pc as well so that RETLDM works correctly. + do_push {r4, r5, ip, lr, pc} +#else + do_push {r4, r5, lr} +#endif + + mov r5, #0 + b 2f + +ARM_FUNC_START floatdidf +ARM_FUNC_ALIAS aeabi_l2d floatdidf + + orrs r2, r0, r1 +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + do_it eq, t + mvfeqd f0, #0.0 +#else + do_it eq +#endif + RETc(eq) + +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + @ For hard FPA code we want to return via the tail below so that + @ we can return the result in f0 as well as in r0/r1 for backwards + @ compatibility. + adr ip, LSYM(f0_ret) + @ Push pc as well so that RETLDM works correctly. + do_push {r4, r5, ip, lr, pc} +#else + do_push {r4, r5, lr} +#endif + + ands r5, ah, #0x80000000 @ sign bit in r5 + bpl 2f +#if defined(__thumb2__) + negs al, al + sbc ah, ah, ah, lsl #1 +#else + rsbs al, al, #0 + rsc ah, ah, #0 +#endif +2: + mov r4, #0x400 @ initial exponent + add r4, r4, #(52-1 - 1) + + @ FPA little-endian: must swap the word order. + .ifnc xh, ah + mov ip, al + mov xh, ah + mov xl, ip + .endif + + movs ip, xh, lsr #22 + beq LSYM(Lad_p) + + @ The value is too big. Scale it down a bit... + mov r2, #3 + movs ip, ip, lsr #3 + do_it ne + addne r2, r2, #3 + movs ip, ip, lsr #3 + do_it ne + addne r2, r2, #3 + add r2, r2, ip, lsr #3 + + rsb r3, r2, #32 + shift1 lsl, ip, xl, r3 + shift1 lsr, xl, xl, r2 + shiftop orr xl xl xh lsl r3 lr + shift1 lsr, xh, xh, r2 + add r4, r4, r2 + b LSYM(Lad_p) + +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + + @ Legacy code expects the result to be returned in f0. Copy it + @ there as well. +LSYM(f0_ret): + do_push {r0, r1} + ldfd f0, [sp], #8 + RETLDM + +#endif + + FUNC_END floatdidf + FUNC_END aeabi_l2d + FUNC_END floatundidf + FUNC_END aeabi_ul2d + +#endif /* L_addsubdf3 */ + +#ifdef L_arm_muldivdf3 + +ARM_FUNC_START muldf3 +ARM_FUNC_ALIAS aeabi_dmul muldf3 + do_push {r4, r5, r6, lr} + + @ Mask out exponents, trap any zero/denormal/INF/NAN. + mov ip, #0xff + orr ip, ip, #0x700 + ands r4, ip, xh, lsr #20 + do_it ne, tte + COND(and,s,ne) r5, ip, yh, lsr #20 + teqne r4, ip + teqne r5, ip + bleq LSYM(Lml_s) + + @ Add exponents together + add r4, r4, r5 + + @ Determine final sign. + eor r6, xh, yh + + @ Convert mantissa to unsigned integer. + @ If power of two, branch to a separate path. + bic xh, xh, ip, lsl #21 + bic yh, yh, ip, lsl #21 + orrs r5, xl, xh, lsl #12 + do_it ne + COND(orr,s,ne) r5, yl, yh, lsl #12 + orr xh, xh, #0x00100000 + orr yh, yh, #0x00100000 + beq LSYM(Lml_1) + +#if __ARM_ARCH__ < 4 + + @ Put sign bit in r6, which will be restored in yl later. + and r6, r6, #0x80000000 + + @ Well, no way to make it shorter without the umull instruction. + stmfd sp!, {r6, r7, r8, r9, sl, fp} + mov r7, xl, lsr #16 + mov r8, yl, lsr #16 + mov r9, xh, lsr #16 + mov sl, yh, lsr #16 + bic xl, xl, r7, lsl #16 + bic yl, yl, r8, lsl #16 + bic xh, xh, r9, lsl #16 + bic yh, yh, sl, lsl #16 + mul ip, xl, yl + mul fp, xl, r8 + mov lr, #0 + adds ip, ip, fp, lsl #16 + adc lr, lr, fp, lsr #16 + mul fp, r7, yl + adds ip, ip, fp, lsl #16 + adc lr, lr, fp, lsr #16 + mul fp, xl, sl + mov r5, #0 + adds lr, lr, fp, lsl #16 + adc r5, r5, fp, lsr #16 + mul fp, r7, yh + adds lr, lr, fp, lsl #16 + adc r5, r5, fp, lsr #16 + mul fp, xh, r8 + adds lr, lr, fp, lsl #16 + adc r5, r5, fp, lsr #16 + mul fp, r9, yl + adds lr, lr, fp, lsl #16 + adc r5, r5, fp, lsr #16 + mul fp, xh, sl + mul r6, r9, sl + adds r5, r5, fp, lsl #16 + adc r6, r6, fp, lsr #16 + mul fp, r9, yh + adds r5, r5, fp, lsl #16 + adc r6, r6, fp, lsr #16 + mul fp, xl, yh + adds lr, lr, fp + mul fp, r7, sl + adcs r5, r5, fp + mul fp, xh, yl + adc r6, r6, #0 + adds lr, lr, fp + mul fp, r9, r8 + adcs r5, r5, fp + mul fp, r7, r8 + adc r6, r6, #0 + adds lr, lr, fp + mul fp, xh, yh + adcs r5, r5, fp + adc r6, r6, #0 + ldmfd sp!, {yl, r7, r8, r9, sl, fp} + +#else + + @ Here is the actual multiplication. + umull ip, lr, xl, yl + mov r5, #0 + umlal lr, r5, xh, yl + and yl, r6, #0x80000000 + umlal lr, r5, xl, yh + mov r6, #0 + umlal r5, r6, xh, yh + +#endif + + @ The LSBs in ip are only significant for the final rounding. + @ Fold them into lr. + teq ip, #0 + do_it ne + orrne lr, lr, #1 + + @ Adjust result upon the MSB position. + sub r4, r4, #0xff + cmp r6, #(1 << (20-11)) + sbc r4, r4, #0x300 + bcs 1f + movs lr, lr, lsl #1 + adcs r5, r5, r5 + adc r6, r6, r6 +1: + @ Shift to final position, add sign to result. + orr xh, yl, r6, lsl #11 + orr xh, xh, r5, lsr #21 + mov xl, r5, lsl #11 + orr xl, xl, lr, lsr #21 + mov lr, lr, lsl #11 + + @ Check exponent range for under/overflow. + subs ip, r4, #(254 - 1) + do_it hi + cmphi ip, #0x700 + bhi LSYM(Lml_u) + + @ Round the result, merge final exponent. + cmp lr, #0x80000000 + do_it eq + COND(mov,s,eq) lr, xl, lsr #1 + adcs xl, xl, #0 + adc xh, xh, r4, lsl #20 + RETLDM "r4, r5, r6" + + @ Multiplication by 0x1p*: let''s shortcut a lot of code. +LSYM(Lml_1): + and r6, r6, #0x80000000 + orr xh, r6, xh + orr xl, xl, yl + eor xh, xh, yh + subs r4, r4, ip, lsr #1 + do_it gt, tt + COND(rsb,s,gt) r5, r4, ip + orrgt xh, xh, r4, lsl #20 + RETLDM "r4, r5, r6" gt + + @ Under/overflow: fix things up for the code below. + orr xh, xh, #0x00100000 + mov lr, #0 + subs r4, r4, #1 + +LSYM(Lml_u): + @ Overflow? + bgt LSYM(Lml_o) + + @ Check if denormalized result is possible, otherwise return signed 0. + cmn r4, #(53 + 1) + do_it le, tt + movle xl, #0 + bicle xh, xh, #0x7fffffff + RETLDM "r4, r5, r6" le + + @ Find out proper shift value. + rsb r4, r4, #0 + subs r4, r4, #32 + bge 2f + adds r4, r4, #12 + bgt 1f + + @ shift result right of 1 to 20 bits, preserve sign bit, round, etc. + add r4, r4, #20 + rsb r5, r4, #32 + shift1 lsl, r3, xl, r5 + shift1 lsr, xl, xl, r4 + shiftop orr xl xl xh lsl r5 r2 + and r2, xh, #0x80000000 + bic xh, xh, #0x80000000 + adds xl, xl, r3, lsr #31 + shiftop adc xh r2 xh lsr r4 r6 + orrs lr, lr, r3, lsl #1 + do_it eq + biceq xl, xl, r3, lsr #31 + RETLDM "r4, r5, r6" + + @ shift result right of 21 to 31 bits, or left 11 to 1 bits after + @ a register switch from xh to xl. Then round. +1: rsb r4, r4, #12 + rsb r5, r4, #32 + shift1 lsl, r3, xl, r4 + shift1 lsr, xl, xl, r5 + shiftop orr xl xl xh lsl r4 r2 + bic xh, xh, #0x7fffffff + adds xl, xl, r3, lsr #31 + adc xh, xh, #0 + orrs lr, lr, r3, lsl #1 + do_it eq + biceq xl, xl, r3, lsr #31 + RETLDM "r4, r5, r6" + + @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch + @ from xh to xl. Leftover bits are in r3-r6-lr for rounding. +2: rsb r5, r4, #32 + shiftop orr lr lr xl lsl r5 r2 + shift1 lsr, r3, xl, r4 + shiftop orr r3 r3 xh lsl r5 r2 + shift1 lsr, xl, xh, r4 + bic xh, xh, #0x7fffffff + shiftop bic xl xl xh lsr r4 r2 + add xl, xl, r3, lsr #31 + orrs lr, lr, r3, lsl #1 + do_it eq + biceq xl, xl, r3, lsr #31 + RETLDM "r4, r5, r6" + + @ One or both arguments are denormalized. + @ Scale them leftwards and preserve sign bit. +LSYM(Lml_d): + teq r4, #0 + bne 2f + and r6, xh, #0x80000000 +1: movs xl, xl, lsl #1 + adc xh, xh, xh + tst xh, #0x00100000 + do_it eq + subeq r4, r4, #1 + beq 1b + orr xh, xh, r6 + teq r5, #0 + do_it ne + RETc(ne) +2: and r6, yh, #0x80000000 +3: movs yl, yl, lsl #1 + adc yh, yh, yh + tst yh, #0x00100000 + do_it eq + subeq r5, r5, #1 + beq 3b + orr yh, yh, r6 + RET + +LSYM(Lml_s): + @ Isolate the INF and NAN cases away + teq r4, ip + and r5, ip, yh, lsr #20 + do_it ne + teqne r5, ip + beq 1f + + @ Here, one or more arguments are either denormalized or zero. + orrs r6, xl, xh, lsl #1 + do_it ne + COND(orr,s,ne) r6, yl, yh, lsl #1 + bne LSYM(Lml_d) + + @ Result is 0, but determine sign anyway. +LSYM(Lml_z): + eor xh, xh, yh + and xh, xh, #0x80000000 + mov xl, #0 + RETLDM "r4, r5, r6" + +1: @ One or both args are INF or NAN. + orrs r6, xl, xh, lsl #1 + do_it eq, te + moveq xl, yl + moveq xh, yh + COND(orr,s,ne) r6, yl, yh, lsl #1 + beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN + teq r4, ip + bne 1f + orrs r6, xl, xh, lsl #12 + bne LSYM(Lml_n) @ NAN * -> NAN +1: teq r5, ip + bne LSYM(Lml_i) + orrs r6, yl, yh, lsl #12 + do_it ne, t + movne xl, yl + movne xh, yh + bne LSYM(Lml_n) @ * NAN -> NAN + + @ Result is INF, but we need to determine its sign. +LSYM(Lml_i): + eor xh, xh, yh + + @ Overflow: return INF (sign already in xh). +LSYM(Lml_o): + and xh, xh, #0x80000000 + orr xh, xh, #0x7f000000 + orr xh, xh, #0x00f00000 + mov xl, #0 + RETLDM "r4, r5, r6" + + @ Return a quiet NAN. +LSYM(Lml_n): + orr xh, xh, #0x7f000000 + orr xh, xh, #0x00f80000 + RETLDM "r4, r5, r6" + + FUNC_END aeabi_dmul + FUNC_END muldf3 + +ARM_FUNC_START divdf3 +ARM_FUNC_ALIAS aeabi_ddiv divdf3 + + do_push {r4, r5, r6, lr} + + @ Mask out exponents, trap any zero/denormal/INF/NAN. + mov ip, #0xff + orr ip, ip, #0x700 + ands r4, ip, xh, lsr #20 + do_it ne, tte + COND(and,s,ne) r5, ip, yh, lsr #20 + teqne r4, ip + teqne r5, ip + bleq LSYM(Ldv_s) + + @ Substract divisor exponent from dividend''s. + sub r4, r4, r5 + + @ Preserve final sign into lr. + eor lr, xh, yh + + @ Convert mantissa to unsigned integer. + @ Dividend -> r5-r6, divisor -> yh-yl. + orrs r5, yl, yh, lsl #12 + mov xh, xh, lsl #12 + beq LSYM(Ldv_1) + mov yh, yh, lsl #12 + mov r5, #0x10000000 + orr yh, r5, yh, lsr #4 + orr yh, yh, yl, lsr #24 + mov yl, yl, lsl #8 + orr r5, r5, xh, lsr #4 + orr r5, r5, xl, lsr #24 + mov r6, xl, lsl #8 + + @ Initialize xh with final sign bit. + and xh, lr, #0x80000000 + + @ Ensure result will land to known bit position. + @ Apply exponent bias accordingly. + cmp r5, yh + do_it eq + cmpeq r6, yl + adc r4, r4, #(255 - 2) + add r4, r4, #0x300 + bcs 1f + movs yh, yh, lsr #1 + mov yl, yl, rrx +1: + @ Perform first substraction to align result to a nibble. + subs r6, r6, yl + sbc r5, r5, yh + movs yh, yh, lsr #1 + mov yl, yl, rrx + mov xl, #0x00100000 + mov ip, #0x00080000 + + @ The actual division loop. +1: subs lr, r6, yl + sbcs lr, r5, yh + do_it cs, tt + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip + movs yh, yh, lsr #1 + mov yl, yl, rrx + subs lr, r6, yl + sbcs lr, r5, yh + do_it cs, tt + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip, lsr #1 + movs yh, yh, lsr #1 + mov yl, yl, rrx + subs lr, r6, yl + sbcs lr, r5, yh + do_it cs, tt + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip, lsr #2 + movs yh, yh, lsr #1 + mov yl, yl, rrx + subs lr, r6, yl + sbcs lr, r5, yh + do_it cs, tt + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip, lsr #3 + + orrs lr, r5, r6 + beq 2f + mov r5, r5, lsl #4 + orr r5, r5, r6, lsr #28 + mov r6, r6, lsl #4 + mov yh, yh, lsl #3 + orr yh, yh, yl, lsr #29 + mov yl, yl, lsl #3 + movs ip, ip, lsr #4 + bne 1b + + @ We are done with a word of the result. + @ Loop again for the low word if this pass was for the high word. + tst xh, #0x00100000 + bne 3f + orr xh, xh, xl + mov xl, #0 + mov ip, #0x80000000 + b 1b +2: + @ Be sure result starts in the high word. + tst xh, #0x00100000 + do_it eq, t + orreq xh, xh, xl + moveq xl, #0 +3: + @ Check exponent range for under/overflow. + subs ip, r4, #(254 - 1) + do_it hi + cmphi ip, #0x700 + bhi LSYM(Lml_u) + + @ Round the result, merge final exponent. + subs ip, r5, yh + do_it eq, t + COND(sub,s,eq) ip, r6, yl + COND(mov,s,eq) ip, xl, lsr #1 + adcs xl, xl, #0 + adc xh, xh, r4, lsl #20 + RETLDM "r4, r5, r6" + + @ Division by 0x1p*: shortcut a lot of code. +LSYM(Ldv_1): + and lr, lr, #0x80000000 + orr xh, lr, xh, lsr #12 + adds r4, r4, ip, lsr #1 + do_it gt, tt + COND(rsb,s,gt) r5, r4, ip + orrgt xh, xh, r4, lsl #20 + RETLDM "r4, r5, r6" gt + + orr xh, xh, #0x00100000 + mov lr, #0 + subs r4, r4, #1 + b LSYM(Lml_u) + + @ Result mightt need to be denormalized: put remainder bits + @ in lr for rounding considerations. +LSYM(Ldv_u): + orr lr, r5, r6 + b LSYM(Lml_u) + + @ One or both arguments is either INF, NAN or zero. +LSYM(Ldv_s): + and r5, ip, yh, lsr #20 + teq r4, ip + do_it eq + teqeq r5, ip + beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN + teq r4, ip + bne 1f + orrs r4, xl, xh, lsl #12 + bne LSYM(Lml_n) @ NAN / -> NAN + teq r5, ip + bne LSYM(Lml_i) @ INF / -> INF + mov xl, yl + mov xh, yh + b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN +1: teq r5, ip + bne 2f + orrs r5, yl, yh, lsl #12 + beq LSYM(Lml_z) @ / INF -> 0 + mov xl, yl + mov xh, yh + b LSYM(Lml_n) @ / NAN -> NAN +2: @ If both are nonzero, we need to normalize and resume above. + orrs r6, xl, xh, lsl #1 + do_it ne + COND(orr,s,ne) r6, yl, yh, lsl #1 + bne LSYM(Lml_d) + @ One or both arguments are 0. + orrs r4, xl, xh, lsl #1 + bne LSYM(Lml_i) @ / 0 -> INF + orrs r5, yl, yh, lsl #1 + bne LSYM(Lml_z) @ 0 / -> 0 + b LSYM(Lml_n) @ 0 / 0 -> NAN + + FUNC_END aeabi_ddiv + FUNC_END divdf3 + +#endif /* L_muldivdf3 */ + +#ifdef L_arm_cmpdf2 + +@ Note: only r0 (return value) and ip are clobbered here. + +ARM_FUNC_START gtdf2 +ARM_FUNC_ALIAS gedf2 gtdf2 + mov ip, #-1 + b 1f + +ARM_FUNC_START ltdf2 +ARM_FUNC_ALIAS ledf2 ltdf2 + mov ip, #1 + b 1f + +ARM_FUNC_START cmpdf2 +ARM_FUNC_ALIAS nedf2 cmpdf2 +ARM_FUNC_ALIAS eqdf2 cmpdf2 + mov ip, #1 @ how should we specify unordered here? + +1: str ip, [sp, #-4]! + + @ Trap any INF/NAN first. + mov ip, xh, lsl #1 + mvns ip, ip, asr #21 + mov ip, yh, lsl #1 + do_it ne + COND(mvn,s,ne) ip, ip, asr #21 + beq 3f + + @ Test for equality. + @ Note that 0.0 is equal to -0.0. +2: add sp, sp, #4 + orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0 + do_it eq, e + COND(orr,s,eq) ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0 + teqne xh, yh @ or xh == yh + do_it eq, tt + teqeq xl, yl @ and xl == yl + moveq r0, #0 @ then equal. + RETc(eq) + + @ Clear C flag + cmn r0, #0 + + @ Compare sign, + teq xh, yh + + @ Compare values if same sign + do_it pl + cmppl xh, yh + do_it eq + cmpeq xl, yl + + @ Result: + do_it cs, e + movcs r0, yh, asr #31 + mvncc r0, yh, asr #31 + orr r0, r0, #1 + RET + + @ Look for a NAN. +3: mov ip, xh, lsl #1 + mvns ip, ip, asr #21 + bne 4f + orrs ip, xl, xh, lsl #12 + bne 5f @ x is NAN +4: mov ip, yh, lsl #1 + mvns ip, ip, asr #21 + bne 2b + orrs ip, yl, yh, lsl #12 + beq 2b @ y is not NAN +5: ldr r0, [sp], #4 @ unordered return code + RET + + FUNC_END gedf2 + FUNC_END gtdf2 + FUNC_END ledf2 + FUNC_END ltdf2 + FUNC_END nedf2 + FUNC_END eqdf2 + FUNC_END cmpdf2 + +ARM_FUNC_START aeabi_cdrcmple + + mov ip, r0 + mov r0, r2 + mov r2, ip + mov ip, r1 + mov r1, r3 + mov r3, ip + b 6f + +ARM_FUNC_START aeabi_cdcmpeq +ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq + + @ The status-returning routines are required to preserve all + @ registers except ip, lr, and cpsr. +6: do_push {r0, lr} + ARM_CALL cmpdf2 + @ Set the Z flag correctly, and the C flag unconditionally. + cmp r0, #0 + @ Clear the C flag if the return value was -1, indicating + @ that the first operand was smaller than the second. + do_it mi + cmnmi r0, #0 + RETLDM "r0" + + FUNC_END aeabi_cdcmple + FUNC_END aeabi_cdcmpeq + FUNC_END aeabi_cdrcmple + +ARM_FUNC_START aeabi_dcmpeq + + str lr, [sp, #-8]! + ARM_CALL aeabi_cdcmple + do_it eq, e + moveq r0, #1 @ Equal to. + movne r0, #0 @ Less than, greater than, or unordered. + RETLDM + + FUNC_END aeabi_dcmpeq + +ARM_FUNC_START aeabi_dcmplt + + str lr, [sp, #-8]! + ARM_CALL aeabi_cdcmple + do_it cc, e + movcc r0, #1 @ Less than. + movcs r0, #0 @ Equal to, greater than, or unordered. + RETLDM + + FUNC_END aeabi_dcmplt + +ARM_FUNC_START aeabi_dcmple + + str lr, [sp, #-8]! + ARM_CALL aeabi_cdcmple + do_it ls, e + movls r0, #1 @ Less than or equal to. + movhi r0, #0 @ Greater than or unordered. + RETLDM + + FUNC_END aeabi_dcmple + +ARM_FUNC_START aeabi_dcmpge + + str lr, [sp, #-8]! + ARM_CALL aeabi_cdrcmple + do_it ls, e + movls r0, #1 @ Operand 2 is less than or equal to operand 1. + movhi r0, #0 @ Operand 2 greater than operand 1, or unordered. + RETLDM + + FUNC_END aeabi_dcmpge + +ARM_FUNC_START aeabi_dcmpgt + + str lr, [sp, #-8]! + ARM_CALL aeabi_cdrcmple + do_it cc, e + movcc r0, #1 @ Operand 2 is less than operand 1. + movcs r0, #0 @ Operand 2 is greater than or equal to operand 1, + @ or they are unordered. + RETLDM + + FUNC_END aeabi_dcmpgt + +#endif /* L_cmpdf2 */ + +#ifdef L_arm_unorddf2 + +ARM_FUNC_START unorddf2 +ARM_FUNC_ALIAS aeabi_dcmpun unorddf2 + + mov ip, xh, lsl #1 + mvns ip, ip, asr #21 + bne 1f + orrs ip, xl, xh, lsl #12 + bne 3f @ x is NAN +1: mov ip, yh, lsl #1 + mvns ip, ip, asr #21 + bne 2f + orrs ip, yl, yh, lsl #12 + bne 3f @ y is NAN +2: mov r0, #0 @ arguments are ordered. + RET + +3: mov r0, #1 @ arguments are unordered. + RET + + FUNC_END aeabi_dcmpun + FUNC_END unorddf2 + +#endif /* L_unorddf2 */ + +#ifdef L_arm_fixdfsi + +ARM_FUNC_START fixdfsi +ARM_FUNC_ALIAS aeabi_d2iz fixdfsi + + @ check exponent range. + mov r2, xh, lsl #1 + adds r2, r2, #(1 << 21) + bcs 2f @ value is INF or NAN + bpl 1f @ value is too small + mov r3, #(0xfffffc00 + 31) + subs r2, r3, r2, asr #21 + bls 3f @ value is too large + + @ scale value + mov r3, xh, lsl #11 + orr r3, r3, #0x80000000 + orr r3, r3, xl, lsr #21 + tst xh, #0x80000000 @ the sign bit + shift1 lsr, r0, r3, r2 + do_it ne + rsbne r0, r0, #0 + RET + +1: mov r0, #0 + RET + +2: orrs xl, xl, xh, lsl #12 + bne 4f @ x is NAN. +3: ands r0, xh, #0x80000000 @ the sign bit + do_it eq + moveq r0, #0x7fffffff @ maximum signed positive si + RET + +4: mov r0, #0 @ How should we convert NAN? + RET + + FUNC_END aeabi_d2iz + FUNC_END fixdfsi + +#endif /* L_fixdfsi */ + +#ifdef L_arm_fixunsdfsi + +ARM_FUNC_START fixunsdfsi +ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi + + @ check exponent range. + movs r2, xh, lsl #1 + bcs 1f @ value is negative + adds r2, r2, #(1 << 21) + bcs 2f @ value is INF or NAN + bpl 1f @ value is too small + mov r3, #(0xfffffc00 + 31) + subs r2, r3, r2, asr #21 + bmi 3f @ value is too large + + @ scale value + mov r3, xh, lsl #11 + orr r3, r3, #0x80000000 + orr r3, r3, xl, lsr #21 + shift1 lsr, r0, r3, r2 + RET + +1: mov r0, #0 + RET + +2: orrs xl, xl, xh, lsl #12 + bne 4f @ value is NAN. +3: mov r0, #0xffffffff @ maximum unsigned si + RET + +4: mov r0, #0 @ How should we convert NAN? + RET + + FUNC_END aeabi_d2uiz + FUNC_END fixunsdfsi + +#endif /* L_fixunsdfsi */ + +#ifdef L_arm_truncdfsf2 + +ARM_FUNC_START truncdfsf2 +ARM_FUNC_ALIAS aeabi_d2f truncdfsf2 + + @ check exponent range. + mov r2, xh, lsl #1 + subs r3, r2, #((1023 - 127) << 21) + do_it cs, t + COND(sub,s,cs) ip, r3, #(1 << 21) + COND(rsb,s,cs) ip, ip, #(254 << 21) + bls 2f @ value is out of range + +1: @ shift and round mantissa + and ip, xh, #0x80000000 + mov r2, xl, lsl #3 + orr xl, ip, xl, lsr #29 + cmp r2, #0x80000000 + adc r0, xl, r3, lsl #2 + do_it eq + biceq r0, r0, #1 + RET + +2: @ either overflow or underflow + tst xh, #0x40000000 + bne 3f @ overflow + + @ check if denormalized value is possible + adds r2, r3, #(23 << 21) + do_it lt, t + andlt r0, xh, #0x80000000 @ too small, return signed 0. + RETc(lt) + + @ denormalize value so we can resume with the code above afterwards. + orr xh, xh, #0x00100000 + mov r2, r2, lsr #21 + rsb r2, r2, #24 + rsb ip, r2, #32 +#if defined(__thumb2__) + lsls r3, xl, ip +#else + movs r3, xl, lsl ip +#endif + shift1 lsr, xl, xl, r2 + do_it ne + orrne xl, xl, #1 @ fold r3 for rounding considerations. + mov r3, xh, lsl #11 + mov r3, r3, lsr #11 + shiftop orr xl xl r3 lsl ip ip + shift1 lsr, r3, r3, r2 + mov r3, r3, lsl #1 + b 1b + +3: @ chech for NAN + mvns r3, r2, asr #21 + bne 5f @ simple overflow + orrs r3, xl, xh, lsl #12 + do_it ne, tt + movne r0, #0x7f000000 + orrne r0, r0, #0x00c00000 + RETc(ne) @ return NAN + +5: @ return INF with sign + and r0, xh, #0x80000000 + orr r0, r0, #0x7f000000 + orr r0, r0, #0x00800000 + RET + + FUNC_END aeabi_d2f + FUNC_END truncdfsf2 + +#endif /* L_truncdfsf2 */ diff --git a/libgcc/config/arm/ieee754-sf.S b/libgcc/config/arm/ieee754-sf.S new file mode 100644 index 00000000000..c93f66d8ff8 --- /dev/null +++ b/libgcc/config/arm/ieee754-sf.S @@ -0,0 +1,1060 @@ +/* ieee754-sf.S single-precision floating point support for ARM + + Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc. + Contributed by Nicolas Pitre (nico@cam.org) + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* + * Notes: + * + * The goal of this code is to be as fast as possible. This is + * not meant to be easy to understand for the casual reader. + * + * Only the default rounding mode is intended for best performances. + * Exceptions aren't supported yet, but that can be added quite easily + * if necessary without impacting performances. + */ + +#ifdef L_arm_negsf2 + +ARM_FUNC_START negsf2 +ARM_FUNC_ALIAS aeabi_fneg negsf2 + + eor r0, r0, #0x80000000 @ flip sign bit + RET + + FUNC_END aeabi_fneg + FUNC_END negsf2 + +#endif + +#ifdef L_arm_addsubsf3 + +ARM_FUNC_START aeabi_frsub + + eor r0, r0, #0x80000000 @ flip sign bit of first arg + b 1f + +ARM_FUNC_START subsf3 +ARM_FUNC_ALIAS aeabi_fsub subsf3 + + eor r1, r1, #0x80000000 @ flip sign bit of second arg +#if defined(__INTERWORKING_STUBS__) + b 1f @ Skip Thumb-code prologue +#endif + +ARM_FUNC_START addsf3 +ARM_FUNC_ALIAS aeabi_fadd addsf3 + +1: @ Look for zeroes, equal values, INF, or NAN. + movs r2, r0, lsl #1 + do_it ne, ttt + COND(mov,s,ne) r3, r1, lsl #1 + teqne r2, r3 + COND(mvn,s,ne) ip, r2, asr #24 + COND(mvn,s,ne) ip, r3, asr #24 + beq LSYM(Lad_s) + + @ Compute exponent difference. Make largest exponent in r2, + @ corresponding arg in r0, and positive exponent difference in r3. + mov r2, r2, lsr #24 + rsbs r3, r2, r3, lsr #24 + do_it gt, ttt + addgt r2, r2, r3 + eorgt r1, r0, r1 + eorgt r0, r1, r0 + eorgt r1, r0, r1 + do_it lt + rsblt r3, r3, #0 + + @ If exponent difference is too large, return largest argument + @ already in r0. We need up to 25 bit to handle proper rounding + @ of 0x1p25 - 1.1. + cmp r3, #25 + do_it hi + RETc(hi) + + @ Convert mantissa to signed integer. + tst r0, #0x80000000 + orr r0, r0, #0x00800000 + bic r0, r0, #0xff000000 + do_it ne + rsbne r0, r0, #0 + tst r1, #0x80000000 + orr r1, r1, #0x00800000 + bic r1, r1, #0xff000000 + do_it ne + rsbne r1, r1, #0 + + @ If exponent == difference, one or both args were denormalized. + @ Since this is not common case, rescale them off line. + teq r2, r3 + beq LSYM(Lad_d) +LSYM(Lad_x): + + @ Compensate for the exponent overlapping the mantissa MSB added later + sub r2, r2, #1 + + @ Shift and add second arg to first arg in r0. + @ Keep leftover bits into r1. + shiftop adds r0 r0 r1 asr r3 ip + rsb r3, r3, #32 + shift1 lsl, r1, r1, r3 + + @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above) + and r3, r0, #0x80000000 + bpl LSYM(Lad_p) +#if defined(__thumb2__) + negs r1, r1 + sbc r0, r0, r0, lsl #1 +#else + rsbs r1, r1, #0 + rsc r0, r0, #0 +#endif + + @ Determine how to normalize the result. +LSYM(Lad_p): + cmp r0, #0x00800000 + bcc LSYM(Lad_a) + cmp r0, #0x01000000 + bcc LSYM(Lad_e) + + @ Result needs to be shifted right. + movs r0, r0, lsr #1 + mov r1, r1, rrx + add r2, r2, #1 + + @ Make sure we did not bust our exponent. + cmp r2, #254 + bhs LSYM(Lad_o) + + @ Our result is now properly aligned into r0, remaining bits in r1. + @ Pack final result together. + @ Round with MSB of r1. If halfway between two numbers, round towards + @ LSB of r0 = 0. +LSYM(Lad_e): + cmp r1, #0x80000000 + adc r0, r0, r2, lsl #23 + do_it eq + biceq r0, r0, #1 + orr r0, r0, r3 + RET + + @ Result must be shifted left and exponent adjusted. +LSYM(Lad_a): + movs r1, r1, lsl #1 + adc r0, r0, r0 + tst r0, #0x00800000 + sub r2, r2, #1 + bne LSYM(Lad_e) + + @ No rounding necessary since r1 will always be 0 at this point. +LSYM(Lad_l): + +#if __ARM_ARCH__ < 5 + + movs ip, r0, lsr #12 + moveq r0, r0, lsl #12 + subeq r2, r2, #12 + tst r0, #0x00ff0000 + moveq r0, r0, lsl #8 + subeq r2, r2, #8 + tst r0, #0x00f00000 + moveq r0, r0, lsl #4 + subeq r2, r2, #4 + tst r0, #0x00c00000 + moveq r0, r0, lsl #2 + subeq r2, r2, #2 + cmp r0, #0x00800000 + movcc r0, r0, lsl #1 + sbcs r2, r2, #0 + +#else + + clz ip, r0 + sub ip, ip, #8 + subs r2, r2, ip + shift1 lsl, r0, r0, ip + +#endif + + @ Final result with sign + @ If exponent negative, denormalize result. + do_it ge, et + addge r0, r0, r2, lsl #23 + rsblt r2, r2, #0 + orrge r0, r0, r3 +#if defined(__thumb2__) + do_it lt, t + lsrlt r0, r0, r2 + orrlt r0, r3, r0 +#else + orrlt r0, r3, r0, lsr r2 +#endif + RET + + @ Fixup and adjust bit position for denormalized arguments. + @ Note that r2 must not remain equal to 0. +LSYM(Lad_d): + teq r2, #0 + eor r1, r1, #0x00800000 + do_it eq, te + eoreq r0, r0, #0x00800000 + addeq r2, r2, #1 + subne r3, r3, #1 + b LSYM(Lad_x) + +LSYM(Lad_s): + mov r3, r1, lsl #1 + + mvns ip, r2, asr #24 + do_it ne + COND(mvn,s,ne) ip, r3, asr #24 + beq LSYM(Lad_i) + + teq r2, r3 + beq 1f + + @ Result is x + 0.0 = x or 0.0 + y = y. + teq r2, #0 + do_it eq + moveq r0, r1 + RET + +1: teq r0, r1 + + @ Result is x - x = 0. + do_it ne, t + movne r0, #0 + RETc(ne) + + @ Result is x + x = 2x. + tst r2, #0xff000000 + bne 2f + movs r0, r0, lsl #1 + do_it cs + orrcs r0, r0, #0x80000000 + RET +2: adds r2, r2, #(2 << 24) + do_it cc, t + addcc r0, r0, #(1 << 23) + RETc(cc) + and r3, r0, #0x80000000 + + @ Overflow: return INF. +LSYM(Lad_o): + orr r0, r3, #0x7f000000 + orr r0, r0, #0x00800000 + RET + + @ At least one of r0/r1 is INF/NAN. + @ if r0 != INF/NAN: return r1 (which is INF/NAN) + @ if r1 != INF/NAN: return r0 (which is INF/NAN) + @ if r0 or r1 is NAN: return NAN + @ if opposite sign: return NAN + @ otherwise return r0 (which is INF or -INF) +LSYM(Lad_i): + mvns r2, r2, asr #24 + do_it ne, et + movne r0, r1 + COND(mvn,s,eq) r3, r3, asr #24 + movne r1, r0 + movs r2, r0, lsl #9 + do_it eq, te + COND(mov,s,eq) r3, r1, lsl #9 + teqeq r0, r1 + orrne r0, r0, #0x00400000 @ quiet NAN + RET + + FUNC_END aeabi_frsub + FUNC_END aeabi_fadd + FUNC_END addsf3 + FUNC_END aeabi_fsub + FUNC_END subsf3 + +ARM_FUNC_START floatunsisf +ARM_FUNC_ALIAS aeabi_ui2f floatunsisf + + mov r3, #0 + b 1f + +ARM_FUNC_START floatsisf +ARM_FUNC_ALIAS aeabi_i2f floatsisf + + ands r3, r0, #0x80000000 + do_it mi + rsbmi r0, r0, #0 + +1: movs ip, r0 + do_it eq + RETc(eq) + + @ Add initial exponent to sign + orr r3, r3, #((127 + 23) << 23) + + .ifnc ah, r0 + mov ah, r0 + .endif + mov al, #0 + b 2f + + FUNC_END aeabi_i2f + FUNC_END floatsisf + FUNC_END aeabi_ui2f + FUNC_END floatunsisf + +ARM_FUNC_START floatundisf +ARM_FUNC_ALIAS aeabi_ul2f floatundisf + + orrs r2, r0, r1 +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + do_it eq, t + mvfeqs f0, #0.0 +#else + do_it eq +#endif + RETc(eq) + + mov r3, #0 + b 1f + +ARM_FUNC_START floatdisf +ARM_FUNC_ALIAS aeabi_l2f floatdisf + + orrs r2, r0, r1 +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + do_it eq, t + mvfeqs f0, #0.0 +#else + do_it eq +#endif + RETc(eq) + + ands r3, ah, #0x80000000 @ sign bit in r3 + bpl 1f +#if defined(__thumb2__) + negs al, al + sbc ah, ah, ah, lsl #1 +#else + rsbs al, al, #0 + rsc ah, ah, #0 +#endif +1: +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + @ For hard FPA code we want to return via the tail below so that + @ we can return the result in f0 as well as in r0 for backwards + @ compatibility. + str lr, [sp, #-8]! + adr lr, LSYM(f0_ret) +#endif + + movs ip, ah + do_it eq, tt + moveq ip, al + moveq ah, al + moveq al, #0 + + @ Add initial exponent to sign + orr r3, r3, #((127 + 23 + 32) << 23) + do_it eq + subeq r3, r3, #(32 << 23) +2: sub r3, r3, #(1 << 23) + +#if __ARM_ARCH__ < 5 + + mov r2, #23 + cmp ip, #(1 << 16) + do_it hs, t + movhs ip, ip, lsr #16 + subhs r2, r2, #16 + cmp ip, #(1 << 8) + do_it hs, t + movhs ip, ip, lsr #8 + subhs r2, r2, #8 + cmp ip, #(1 << 4) + do_it hs, t + movhs ip, ip, lsr #4 + subhs r2, r2, #4 + cmp ip, #(1 << 2) + do_it hs, e + subhs r2, r2, #2 + sublo r2, r2, ip, lsr #1 + subs r2, r2, ip, lsr #3 + +#else + + clz r2, ip + subs r2, r2, #8 + +#endif + + sub r3, r3, r2, lsl #23 + blt 3f + + shiftop add r3 r3 ah lsl r2 ip + shift1 lsl, ip, al, r2 + rsb r2, r2, #32 + cmp ip, #0x80000000 + shiftop adc r0 r3 al lsr r2 r2 + do_it eq + biceq r0, r0, #1 + RET + +3: add r2, r2, #32 + shift1 lsl, ip, ah, r2 + rsb r2, r2, #32 + orrs al, al, ip, lsl #1 + shiftop adc r0 r3 ah lsr r2 r2 + do_it eq + biceq r0, r0, ip, lsr #31 + RET + +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + +LSYM(f0_ret): + str r0, [sp, #-4]! + ldfs f0, [sp], #4 + RETLDM + +#endif + + FUNC_END floatdisf + FUNC_END aeabi_l2f + FUNC_END floatundisf + FUNC_END aeabi_ul2f + +#endif /* L_addsubsf3 */ + +#ifdef L_arm_muldivsf3 + +ARM_FUNC_START mulsf3 +ARM_FUNC_ALIAS aeabi_fmul mulsf3 + + @ Mask out exponents, trap any zero/denormal/INF/NAN. + mov ip, #0xff + ands r2, ip, r0, lsr #23 + do_it ne, tt + COND(and,s,ne) r3, ip, r1, lsr #23 + teqne r2, ip + teqne r3, ip + beq LSYM(Lml_s) +LSYM(Lml_x): + + @ Add exponents together + add r2, r2, r3 + + @ Determine final sign. + eor ip, r0, r1 + + @ Convert mantissa to unsigned integer. + @ If power of two, branch to a separate path. + @ Make up for final alignment. + movs r0, r0, lsl #9 + do_it ne + COND(mov,s,ne) r1, r1, lsl #9 + beq LSYM(Lml_1) + mov r3, #0x08000000 + orr r0, r3, r0, lsr #5 + orr r1, r3, r1, lsr #5 + +#if __ARM_ARCH__ < 4 + + @ Put sign bit in r3, which will be restored into r0 later. + and r3, ip, #0x80000000 + + @ Well, no way to make it shorter without the umull instruction. + do_push {r3, r4, r5} + mov r4, r0, lsr #16 + mov r5, r1, lsr #16 + bic r0, r0, r4, lsl #16 + bic r1, r1, r5, lsl #16 + mul ip, r4, r5 + mul r3, r0, r1 + mul r0, r5, r0 + mla r0, r4, r1, r0 + adds r3, r3, r0, lsl #16 + adc r1, ip, r0, lsr #16 + do_pop {r0, r4, r5} + +#else + + @ The actual multiplication. + umull r3, r1, r0, r1 + + @ Put final sign in r0. + and r0, ip, #0x80000000 + +#endif + + @ Adjust result upon the MSB position. + cmp r1, #(1 << 23) + do_it cc, tt + movcc r1, r1, lsl #1 + orrcc r1, r1, r3, lsr #31 + movcc r3, r3, lsl #1 + + @ Add sign to result. + orr r0, r0, r1 + + @ Apply exponent bias, check for under/overflow. + sbc r2, r2, #127 + cmp r2, #(254 - 1) + bhi LSYM(Lml_u) + + @ Round the result, merge final exponent. + cmp r3, #0x80000000 + adc r0, r0, r2, lsl #23 + do_it eq + biceq r0, r0, #1 + RET + + @ Multiplication by 0x1p*: let''s shortcut a lot of code. +LSYM(Lml_1): + teq r0, #0 + and ip, ip, #0x80000000 + do_it eq + moveq r1, r1, lsl #9 + orr r0, ip, r0, lsr #9 + orr r0, r0, r1, lsr #9 + subs r2, r2, #127 + do_it gt, tt + COND(rsb,s,gt) r3, r2, #255 + orrgt r0, r0, r2, lsl #23 + RETc(gt) + + @ Under/overflow: fix things up for the code below. + orr r0, r0, #0x00800000 + mov r3, #0 + subs r2, r2, #1 + +LSYM(Lml_u): + @ Overflow? + bgt LSYM(Lml_o) + + @ Check if denormalized result is possible, otherwise return signed 0. + cmn r2, #(24 + 1) + do_it le, t + bicle r0, r0, #0x7fffffff + RETc(le) + + @ Shift value right, round, etc. + rsb r2, r2, #0 + movs r1, r0, lsl #1 + shift1 lsr, r1, r1, r2 + rsb r2, r2, #32 + shift1 lsl, ip, r0, r2 + movs r0, r1, rrx + adc r0, r0, #0 + orrs r3, r3, ip, lsl #1 + do_it eq + biceq r0, r0, ip, lsr #31 + RET + + @ One or both arguments are denormalized. + @ Scale them leftwards and preserve sign bit. +LSYM(Lml_d): + teq r2, #0 + and ip, r0, #0x80000000 +1: do_it eq, tt + moveq r0, r0, lsl #1 + tsteq r0, #0x00800000 + subeq r2, r2, #1 + beq 1b + orr r0, r0, ip + teq r3, #0 + and ip, r1, #0x80000000 +2: do_it eq, tt + moveq r1, r1, lsl #1 + tsteq r1, #0x00800000 + subeq r3, r3, #1 + beq 2b + orr r1, r1, ip + b LSYM(Lml_x) + +LSYM(Lml_s): + @ Isolate the INF and NAN cases away + and r3, ip, r1, lsr #23 + teq r2, ip + do_it ne + teqne r3, ip + beq 1f + + @ Here, one or more arguments are either denormalized or zero. + bics ip, r0, #0x80000000 + do_it ne + COND(bic,s,ne) ip, r1, #0x80000000 + bne LSYM(Lml_d) + + @ Result is 0, but determine sign anyway. +LSYM(Lml_z): + eor r0, r0, r1 + bic r0, r0, #0x7fffffff + RET + +1: @ One or both args are INF or NAN. + teq r0, #0x0 + do_it ne, ett + teqne r0, #0x80000000 + moveq r0, r1 + teqne r1, #0x0 + teqne r1, #0x80000000 + beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN + teq r2, ip + bne 1f + movs r2, r0, lsl #9 + bne LSYM(Lml_n) @ NAN * -> NAN +1: teq r3, ip + bne LSYM(Lml_i) + movs r3, r1, lsl #9 + do_it ne + movne r0, r1 + bne LSYM(Lml_n) @ * NAN -> NAN + + @ Result is INF, but we need to determine its sign. +LSYM(Lml_i): + eor r0, r0, r1 + + @ Overflow: return INF (sign already in r0). +LSYM(Lml_o): + and r0, r0, #0x80000000 + orr r0, r0, #0x7f000000 + orr r0, r0, #0x00800000 + RET + + @ Return a quiet NAN. +LSYM(Lml_n): + orr r0, r0, #0x7f000000 + orr r0, r0, #0x00c00000 + RET + + FUNC_END aeabi_fmul + FUNC_END mulsf3 + +ARM_FUNC_START divsf3 +ARM_FUNC_ALIAS aeabi_fdiv divsf3 + + @ Mask out exponents, trap any zero/denormal/INF/NAN. + mov ip, #0xff + ands r2, ip, r0, lsr #23 + do_it ne, tt + COND(and,s,ne) r3, ip, r1, lsr #23 + teqne r2, ip + teqne r3, ip + beq LSYM(Ldv_s) +LSYM(Ldv_x): + + @ Substract divisor exponent from dividend''s + sub r2, r2, r3 + + @ Preserve final sign into ip. + eor ip, r0, r1 + + @ Convert mantissa to unsigned integer. + @ Dividend -> r3, divisor -> r1. + movs r1, r1, lsl #9 + mov r0, r0, lsl #9 + beq LSYM(Ldv_1) + mov r3, #0x10000000 + orr r1, r3, r1, lsr #4 + orr r3, r3, r0, lsr #4 + + @ Initialize r0 (result) with final sign bit. + and r0, ip, #0x80000000 + + @ Ensure result will land to known bit position. + @ Apply exponent bias accordingly. + cmp r3, r1 + do_it cc + movcc r3, r3, lsl #1 + adc r2, r2, #(127 - 2) + + @ The actual division loop. + mov ip, #0x00800000 +1: cmp r3, r1 + do_it cs, t + subcs r3, r3, r1 + orrcs r0, r0, ip + cmp r3, r1, lsr #1 + do_it cs, t + subcs r3, r3, r1, lsr #1 + orrcs r0, r0, ip, lsr #1 + cmp r3, r1, lsr #2 + do_it cs, t + subcs r3, r3, r1, lsr #2 + orrcs r0, r0, ip, lsr #2 + cmp r3, r1, lsr #3 + do_it cs, t + subcs r3, r3, r1, lsr #3 + orrcs r0, r0, ip, lsr #3 + movs r3, r3, lsl #4 + do_it ne + COND(mov,s,ne) ip, ip, lsr #4 + bne 1b + + @ Check exponent for under/overflow. + cmp r2, #(254 - 1) + bhi LSYM(Lml_u) + + @ Round the result, merge final exponent. + cmp r3, r1 + adc r0, r0, r2, lsl #23 + do_it eq + biceq r0, r0, #1 + RET + + @ Division by 0x1p*: let''s shortcut a lot of code. +LSYM(Ldv_1): + and ip, ip, #0x80000000 + orr r0, ip, r0, lsr #9 + adds r2, r2, #127 + do_it gt, tt + COND(rsb,s,gt) r3, r2, #255 + orrgt r0, r0, r2, lsl #23 + RETc(gt) + + orr r0, r0, #0x00800000 + mov r3, #0 + subs r2, r2, #1 + b LSYM(Lml_u) + + @ One or both arguments are denormalized. + @ Scale them leftwards and preserve sign bit. +LSYM(Ldv_d): + teq r2, #0 + and ip, r0, #0x80000000 +1: do_it eq, tt + moveq r0, r0, lsl #1 + tsteq r0, #0x00800000 + subeq r2, r2, #1 + beq 1b + orr r0, r0, ip + teq r3, #0 + and ip, r1, #0x80000000 +2: do_it eq, tt + moveq r1, r1, lsl #1 + tsteq r1, #0x00800000 + subeq r3, r3, #1 + beq 2b + orr r1, r1, ip + b LSYM(Ldv_x) + + @ One or both arguments are either INF, NAN, zero or denormalized. +LSYM(Ldv_s): + and r3, ip, r1, lsr #23 + teq r2, ip + bne 1f + movs r2, r0, lsl #9 + bne LSYM(Lml_n) @ NAN / -> NAN + teq r3, ip + bne LSYM(Lml_i) @ INF / -> INF + mov r0, r1 + b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN +1: teq r3, ip + bne 2f + movs r3, r1, lsl #9 + beq LSYM(Lml_z) @ / INF -> 0 + mov r0, r1 + b LSYM(Lml_n) @ / NAN -> NAN +2: @ If both are nonzero, we need to normalize and resume above. + bics ip, r0, #0x80000000 + do_it ne + COND(bic,s,ne) ip, r1, #0x80000000 + bne LSYM(Ldv_d) + @ One or both arguments are zero. + bics r2, r0, #0x80000000 + bne LSYM(Lml_i) @ / 0 -> INF + bics r3, r1, #0x80000000 + bne LSYM(Lml_z) @ 0 / -> 0 + b LSYM(Lml_n) @ 0 / 0 -> NAN + + FUNC_END aeabi_fdiv + FUNC_END divsf3 + +#endif /* L_muldivsf3 */ + +#ifdef L_arm_cmpsf2 + + @ The return value in r0 is + @ + @ 0 if the operands are equal + @ 1 if the first operand is greater than the second, or + @ the operands are unordered and the operation is + @ CMP, LT, LE, NE, or EQ. + @ -1 if the first operand is less than the second, or + @ the operands are unordered and the operation is GT + @ or GE. + @ + @ The Z flag will be set iff the operands are equal. + @ + @ The following registers are clobbered by this function: + @ ip, r0, r1, r2, r3 + +ARM_FUNC_START gtsf2 +ARM_FUNC_ALIAS gesf2 gtsf2 + mov ip, #-1 + b 1f + +ARM_FUNC_START ltsf2 +ARM_FUNC_ALIAS lesf2 ltsf2 + mov ip, #1 + b 1f + +ARM_FUNC_START cmpsf2 +ARM_FUNC_ALIAS nesf2 cmpsf2 +ARM_FUNC_ALIAS eqsf2 cmpsf2 + mov ip, #1 @ how should we specify unordered here? + +1: str ip, [sp, #-4]! + + @ Trap any INF/NAN first. + mov r2, r0, lsl #1 + mov r3, r1, lsl #1 + mvns ip, r2, asr #24 + do_it ne + COND(mvn,s,ne) ip, r3, asr #24 + beq 3f + + @ Compare values. + @ Note that 0.0 is equal to -0.0. +2: add sp, sp, #4 + orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag + do_it ne + teqne r0, r1 @ if not 0 compare sign + do_it pl + COND(sub,s,pl) r0, r2, r3 @ if same sign compare values, set r0 + + @ Result: + do_it hi + movhi r0, r1, asr #31 + do_it lo + mvnlo r0, r1, asr #31 + do_it ne + orrne r0, r0, #1 + RET + + @ Look for a NAN. +3: mvns ip, r2, asr #24 + bne 4f + movs ip, r0, lsl #9 + bne 5f @ r0 is NAN +4: mvns ip, r3, asr #24 + bne 2b + movs ip, r1, lsl #9 + beq 2b @ r1 is not NAN +5: ldr r0, [sp], #4 @ return unordered code. + RET + + FUNC_END gesf2 + FUNC_END gtsf2 + FUNC_END lesf2 + FUNC_END ltsf2 + FUNC_END nesf2 + FUNC_END eqsf2 + FUNC_END cmpsf2 + +ARM_FUNC_START aeabi_cfrcmple + + mov ip, r0 + mov r0, r1 + mov r1, ip + b 6f + +ARM_FUNC_START aeabi_cfcmpeq +ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq + + @ The status-returning routines are required to preserve all + @ registers except ip, lr, and cpsr. +6: do_push {r0, r1, r2, r3, lr} + ARM_CALL cmpsf2 + @ Set the Z flag correctly, and the C flag unconditionally. + cmp r0, #0 + @ Clear the C flag if the return value was -1, indicating + @ that the first operand was smaller than the second. + do_it mi + cmnmi r0, #0 + RETLDM "r0, r1, r2, r3" + + FUNC_END aeabi_cfcmple + FUNC_END aeabi_cfcmpeq + FUNC_END aeabi_cfrcmple + +ARM_FUNC_START aeabi_fcmpeq + + str lr, [sp, #-8]! + ARM_CALL aeabi_cfcmple + do_it eq, e + moveq r0, #1 @ Equal to. + movne r0, #0 @ Less than, greater than, or unordered. + RETLDM + + FUNC_END aeabi_fcmpeq + +ARM_FUNC_START aeabi_fcmplt + + str lr, [sp, #-8]! + ARM_CALL aeabi_cfcmple + do_it cc, e + movcc r0, #1 @ Less than. + movcs r0, #0 @ Equal to, greater than, or unordered. + RETLDM + + FUNC_END aeabi_fcmplt + +ARM_FUNC_START aeabi_fcmple + + str lr, [sp, #-8]! + ARM_CALL aeabi_cfcmple + do_it ls, e + movls r0, #1 @ Less than or equal to. + movhi r0, #0 @ Greater than or unordered. + RETLDM + + FUNC_END aeabi_fcmple + +ARM_FUNC_START aeabi_fcmpge + + str lr, [sp, #-8]! + ARM_CALL aeabi_cfrcmple + do_it ls, e + movls r0, #1 @ Operand 2 is less than or equal to operand 1. + movhi r0, #0 @ Operand 2 greater than operand 1, or unordered. + RETLDM + + FUNC_END aeabi_fcmpge + +ARM_FUNC_START aeabi_fcmpgt + + str lr, [sp, #-8]! + ARM_CALL aeabi_cfrcmple + do_it cc, e + movcc r0, #1 @ Operand 2 is less than operand 1. + movcs r0, #0 @ Operand 2 is greater than or equal to operand 1, + @ or they are unordered. + RETLDM + + FUNC_END aeabi_fcmpgt + +#endif /* L_cmpsf2 */ + +#ifdef L_arm_unordsf2 + +ARM_FUNC_START unordsf2 +ARM_FUNC_ALIAS aeabi_fcmpun unordsf2 + + mov r2, r0, lsl #1 + mov r3, r1, lsl #1 + mvns ip, r2, asr #24 + bne 1f + movs ip, r0, lsl #9 + bne 3f @ r0 is NAN +1: mvns ip, r3, asr #24 + bne 2f + movs ip, r1, lsl #9 + bne 3f @ r1 is NAN +2: mov r0, #0 @ arguments are ordered. + RET +3: mov r0, #1 @ arguments are unordered. + RET + + FUNC_END aeabi_fcmpun + FUNC_END unordsf2 + +#endif /* L_unordsf2 */ + +#ifdef L_arm_fixsfsi + +ARM_FUNC_START fixsfsi +ARM_FUNC_ALIAS aeabi_f2iz fixsfsi + + @ check exponent range. + mov r2, r0, lsl #1 + cmp r2, #(127 << 24) + bcc 1f @ value is too small + mov r3, #(127 + 31) + subs r2, r3, r2, lsr #24 + bls 2f @ value is too large + + @ scale value + mov r3, r0, lsl #8 + orr r3, r3, #0x80000000 + tst r0, #0x80000000 @ the sign bit + shift1 lsr, r0, r3, r2 + do_it ne + rsbne r0, r0, #0 + RET + +1: mov r0, #0 + RET + +2: cmp r2, #(127 + 31 - 0xff) + bne 3f + movs r2, r0, lsl #9 + bne 4f @ r0 is NAN. +3: ands r0, r0, #0x80000000 @ the sign bit + do_it eq + moveq r0, #0x7fffffff @ the maximum signed positive si + RET + +4: mov r0, #0 @ What should we convert NAN to? + RET + + FUNC_END aeabi_f2iz + FUNC_END fixsfsi + +#endif /* L_fixsfsi */ + +#ifdef L_arm_fixunssfsi + +ARM_FUNC_START fixunssfsi +ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi + + @ check exponent range. + movs r2, r0, lsl #1 + bcs 1f @ value is negative + cmp r2, #(127 << 24) + bcc 1f @ value is too small + mov r3, #(127 + 31) + subs r2, r3, r2, lsr #24 + bmi 2f @ value is too large + + @ scale the value + mov r3, r0, lsl #8 + orr r3, r3, #0x80000000 + shift1 lsr, r0, r3, r2 + RET + +1: mov r0, #0 + RET + +2: cmp r2, #(127 + 31 - 0xff) + bne 3f + movs r2, r0, lsl #9 + bne 4f @ r0 is NAN. +3: mov r0, #0xffffffff @ maximum unsigned si + RET + +4: mov r0, #0 @ What should we convert NAN to? + RET + + FUNC_END aeabi_f2uiz + FUNC_END fixunssfsi + +#endif /* L_fixunssfsi */ diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S new file mode 100644 index 00000000000..2e76c01df4b --- /dev/null +++ b/libgcc/config/arm/lib1funcs.S @@ -0,0 +1,1829 @@ +@ libgcc routines for ARM cpu. +@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) + +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007, 2008, + 2009, 2010 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* An executable stack is *not* required for these functions. */ +#if defined(__ELF__) && defined(__linux__) +.section .note.GNU-stack,"",%progbits +.previous +#endif /* __ELF__ and __linux__ */ + +#ifdef __ARM_EABI__ +/* Some attributes that are common to all routines in this file. */ + /* Tag_ABI_align_needed: This code does not require 8-byte + alignment from the caller. */ + /* .eabi_attribute 24, 0 -- default setting. */ + /* Tag_ABI_align_preserved: This code preserves 8-byte + alignment in any callee. */ + .eabi_attribute 25, 1 +#endif /* __ARM_EABI__ */ +/* ------------------------------------------------------------------------ */ + +/* We need to know what prefix to add to function names. */ + +#ifndef __USER_LABEL_PREFIX__ +#error __USER_LABEL_PREFIX__ not defined +#endif + +/* ANSI concatenation macros. */ + +#define CONCAT1(a, b) CONCAT2(a, b) +#define CONCAT2(a, b) a ## b + +/* Use the right prefix for global labels. */ + +#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x) + +#ifdef __ELF__ +#ifdef __thumb__ +#define __PLT__ /* Not supported in Thumb assembler (for now). */ +#elif defined __vxworks && !defined __PIC__ +#define __PLT__ /* Not supported by the kernel loader. */ +#else +#define __PLT__ (PLT) +#endif +#define TYPE(x) .type SYM(x),function +#define SIZE(x) .size SYM(x), . - SYM(x) +#define LSYM(x) .x +#else +#define __PLT__ +#define TYPE(x) +#define SIZE(x) +#define LSYM(x) x +#endif + +/* Function end macros. Variants for interworking. */ + +#if defined(__ARM_ARCH_2__) +# define __ARM_ARCH__ 2 +#endif + +#if defined(__ARM_ARCH_3__) +# define __ARM_ARCH__ 3 +#endif + +#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \ + || defined(__ARM_ARCH_4T__) +/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with + long multiply instructions. That includes v3M. */ +# define __ARM_ARCH__ 4 +#endif + +#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \ + || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ + || defined(__ARM_ARCH_5TEJ__) +# define __ARM_ARCH__ 5 +#endif + +#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ + || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \ + || defined(__ARM_ARCH_6M__) +# define __ARM_ARCH__ 6 +#endif + +#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ + || defined(__ARM_ARCH_7EM__) +# define __ARM_ARCH__ 7 +#endif + +#ifndef __ARM_ARCH__ +#error Unable to determine architecture. +#endif + +/* There are times when we might prefer Thumb1 code even if ARM code is + permitted, for example, the code might be smaller, or there might be + interworking problems with switching to ARM state if interworking is + disabled. */ +#if (defined(__thumb__) \ + && !defined(__thumb2__) \ + && (!defined(__THUMB_INTERWORK__) \ + || defined (__OPTIMIZE_SIZE__) \ + || defined(__ARM_ARCH_6M__))) +# define __prefer_thumb__ +#endif + +/* How to return from a function call depends on the architecture variant. */ + +#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__) + +# define RET bx lr +# define RETc(x) bx##x lr + +/* Special precautions for interworking on armv4t. */ +# if (__ARM_ARCH__ == 4) + +/* Always use bx, not ldr pc. */ +# if (defined(__thumb__) || defined(__THUMB_INTERWORK__)) +# define __INTERWORKING__ +# endif /* __THUMB__ || __THUMB_INTERWORK__ */ + +/* Include thumb stub before arm mode code. */ +# if defined(__thumb__) && !defined(__THUMB_INTERWORK__) +# define __INTERWORKING_STUBS__ +# endif /* __thumb__ && !__THUMB_INTERWORK__ */ + +#endif /* __ARM_ARCH == 4 */ + +#else + +# define RET mov pc, lr +# define RETc(x) mov##x pc, lr + +#endif + +.macro cfi_pop advance, reg, cfa_offset +#ifdef __ELF__ + .pushsection .debug_frame + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte \advance + .byte (0xc0 | \reg) /* DW_CFA_restore */ + .byte 0xe /* DW_CFA_def_cfa_offset */ + .uleb128 \cfa_offset + .popsection +#endif +.endm +.macro cfi_push advance, reg, offset, cfa_offset +#ifdef __ELF__ + .pushsection .debug_frame + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte \advance + .byte (0x80 | \reg) /* DW_CFA_offset */ + .uleb128 (\offset / -4) + .byte 0xe /* DW_CFA_def_cfa_offset */ + .uleb128 \cfa_offset + .popsection +#endif +.endm +.macro cfi_start start_label, end_label +#ifdef __ELF__ + .pushsection .debug_frame +LSYM(Lstart_frame): + .4byte LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE +LSYM(Lstart_cie): + .4byte 0xffffffff @ CIE Identifier Tag + .byte 0x1 @ CIE Version + .ascii "\0" @ CIE Augmentation + .uleb128 0x1 @ CIE Code Alignment Factor + .sleb128 -4 @ CIE Data Alignment Factor + .byte 0xe @ CIE RA Column + .byte 0xc @ DW_CFA_def_cfa + .uleb128 0xd + .uleb128 0x0 + + .align 2 +LSYM(Lend_cie): + .4byte LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length +LSYM(Lstart_fde): + .4byte LSYM(Lstart_frame) @ FDE CIE offset + .4byte \start_label @ FDE initial location + .4byte \end_label-\start_label @ FDE address range + .popsection +#endif +.endm +.macro cfi_end end_label +#ifdef __ELF__ + .pushsection .debug_frame + .align 2 +LSYM(Lend_fde): + .popsection +\end_label: +#endif +.endm + +/* Don't pass dirn, it's there just to get token pasting right. */ + +.macro RETLDM regs=, cond=, unwind=, dirn=ia +#if defined (__INTERWORKING__) + .ifc "\regs","" + ldr\cond lr, [sp], #8 + .else +# if defined(__thumb2__) + pop\cond {\regs, lr} +# else + ldm\cond\dirn sp!, {\regs, lr} +# endif + .endif + .ifnc "\unwind", "" + /* Mark LR as restored. */ +97: cfi_pop 97b - \unwind, 0xe, 0x0 + .endif + bx\cond lr +#else + /* Caller is responsible for providing IT instruction. */ + .ifc "\regs","" + ldr\cond pc, [sp], #8 + .else +# if defined(__thumb2__) + pop\cond {\regs, pc} +# else + ldm\cond\dirn sp!, {\regs, pc} +# endif + .endif +#endif +.endm + +/* The Unified assembly syntax allows the same code to be assembled for both + ARM and Thumb-2. However this is only supported by recent gas, so define + a set of macros to allow ARM code on older assemblers. */ +#if defined(__thumb2__) +.macro do_it cond, suffix="" + it\suffix \cond +.endm +.macro shift1 op, arg0, arg1, arg2 + \op \arg0, \arg1, \arg2 +.endm +#define do_push push +#define do_pop pop +#define COND(op1, op2, cond) op1 ## op2 ## cond +/* Perform an arithmetic operation with a variable shift operand. This + requires two instructions and a scratch register on Thumb-2. */ +.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp + \shiftop \tmp, \src2, \shiftreg + \name \dest, \src1, \tmp +.endm +#else +.macro do_it cond, suffix="" +.endm +.macro shift1 op, arg0, arg1, arg2 + mov \arg0, \arg1, \op \arg2 +.endm +#define do_push stmfd sp!, +#define do_pop ldmfd sp!, +#define COND(op1, op2, cond) op1 ## cond ## op2 +.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp + \name \dest, \src1, \src2, \shiftop \shiftreg +.endm +#endif + +#ifdef __ARM_EABI__ +.macro ARM_LDIV0 name signed + cmp r0, #0 + .ifc \signed, unsigned + movne r0, #0xffffffff + .else + movgt r0, #0x7fffffff + movlt r0, #0x80000000 + .endif + b SYM (__aeabi_idiv0) __PLT__ +.endm +#else +.macro ARM_LDIV0 name signed + str lr, [sp, #-8]! +98: cfi_push 98b - __\name, 0xe, -0x8, 0x8 + bl SYM (__div0) __PLT__ + mov r0, #0 @ About as wrong as it could be. + RETLDM unwind=98b +.endm +#endif + + +#ifdef __ARM_EABI__ +.macro THUMB_LDIV0 name signed +#if defined(__ARM_ARCH_6M__) + .ifc \signed, unsigned + cmp r0, #0 + beq 1f + mov r0, #0 + mvn r0, r0 @ 0xffffffff +1: + .else + cmp r0, #0 + beq 2f + blt 3f + mov r0, #0 + mvn r0, r0 + lsr r0, r0, #1 @ 0x7fffffff + b 2f +3: mov r0, #0x80 + lsl r0, r0, #24 @ 0x80000000 +2: + .endif + push {r0, r1, r2} + ldr r0, 4f + adr r1, 4f + add r0, r1 + str r0, [sp, #8] + @ We know we are not on armv4t, so pop pc is safe. + pop {r0, r1, pc} + .align 2 +4: + .word __aeabi_idiv0 - 4b +#elif defined(__thumb2__) + .syntax unified + .ifc \signed, unsigned + cbz r0, 1f + mov r0, #0xffffffff +1: + .else + cmp r0, #0 + do_it gt + movgt r0, #0x7fffffff + do_it lt + movlt r0, #0x80000000 + .endif + b.w SYM(__aeabi_idiv0) __PLT__ +#else + .align 2 + bx pc + nop + .arm + cmp r0, #0 + .ifc \signed, unsigned + movne r0, #0xffffffff + .else + movgt r0, #0x7fffffff + movlt r0, #0x80000000 + .endif + b SYM(__aeabi_idiv0) __PLT__ + .thumb +#endif +.endm +#else +.macro THUMB_LDIV0 name signed + push { r1, lr } +98: cfi_push 98b - __\name, 0xe, -0x4, 0x8 + bl SYM (__div0) + mov r0, #0 @ About as wrong as it could be. +#if defined (__INTERWORKING__) + pop { r1, r2 } + bx r2 +#else + pop { r1, pc } +#endif +.endm +#endif + +.macro FUNC_END name + SIZE (__\name) +.endm + +.macro DIV_FUNC_END name signed + cfi_start __\name, LSYM(Lend_div0) +LSYM(Ldiv0): +#ifdef __thumb__ + THUMB_LDIV0 \name \signed +#else + ARM_LDIV0 \name \signed +#endif + cfi_end LSYM(Lend_div0) + FUNC_END \name +.endm + +.macro THUMB_FUNC_START name + .globl SYM (\name) + TYPE (\name) + .thumb_func +SYM (\name): +.endm + +/* Function start macros. Variants for ARM and Thumb. */ + +#ifdef __thumb__ +#define THUMB_FUNC .thumb_func +#define THUMB_CODE .force_thumb +# if defined(__thumb2__) +#define THUMB_SYNTAX .syntax divided +# else +#define THUMB_SYNTAX +# endif +#else +#define THUMB_FUNC +#define THUMB_CODE +#define THUMB_SYNTAX +#endif + +.macro FUNC_START name + .text + .globl SYM (__\name) + TYPE (__\name) + .align 0 + THUMB_CODE + THUMB_FUNC + THUMB_SYNTAX +SYM (__\name): +.endm + +/* Special function that will always be coded in ARM assembly, even if + in Thumb-only compilation. */ + +#if defined(__thumb2__) + +/* For Thumb-2 we build everything in thumb mode. */ +.macro ARM_FUNC_START name + FUNC_START \name + .syntax unified +.endm +#define EQUIV .thumb_set +.macro ARM_CALL name + bl __\name +.endm + +#elif defined(__INTERWORKING_STUBS__) + +.macro ARM_FUNC_START name + FUNC_START \name + bx pc + nop + .arm +/* A hook to tell gdb that we've switched to ARM mode. Also used to call + directly from other local arm routines. */ +_L__\name: +.endm +#define EQUIV .thumb_set +/* Branch directly to a function declared with ARM_FUNC_START. + Must be called in arm mode. */ +.macro ARM_CALL name + bl _L__\name +.endm + +#else /* !(__INTERWORKING_STUBS__ || __thumb2__) */ + +#ifdef __ARM_ARCH_6M__ +#define EQUIV .thumb_set +#else +.macro ARM_FUNC_START name + .text + .globl SYM (__\name) + TYPE (__\name) + .align 0 + .arm +SYM (__\name): +.endm +#define EQUIV .set +.macro ARM_CALL name + bl __\name +.endm +#endif + +#endif + +.macro FUNC_ALIAS new old + .globl SYM (__\new) +#if defined (__thumb__) + .thumb_set SYM (__\new), SYM (__\old) +#else + .set SYM (__\new), SYM (__\old) +#endif +.endm + +#ifndef __ARM_ARCH_6M__ +.macro ARM_FUNC_ALIAS new old + .globl SYM (__\new) + EQUIV SYM (__\new), SYM (__\old) +#if defined(__INTERWORKING_STUBS__) + .set SYM (_L__\new), SYM (_L__\old) +#endif +.endm +#endif + +#ifdef __ARMEB__ +#define xxh r0 +#define xxl r1 +#define yyh r2 +#define yyl r3 +#else +#define xxh r1 +#define xxl r0 +#define yyh r3 +#define yyl r2 +#endif + +#ifdef __ARM_EABI__ +.macro WEAK name + .weak SYM (__\name) +.endm +#endif + +#ifdef __thumb__ +/* Register aliases. */ + +work .req r4 @ XXXX is this safe ? +dividend .req r0 +divisor .req r1 +overdone .req r2 +result .req r2 +curbit .req r3 +#endif +#if 0 +ip .req r12 +sp .req r13 +lr .req r14 +pc .req r15 +#endif + +/* ------------------------------------------------------------------------ */ +/* Bodies of the division and modulo routines. */ +/* ------------------------------------------------------------------------ */ +.macro ARM_DIV_BODY dividend, divisor, result, curbit + +#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__) + +#if defined (__thumb2__) + clz \curbit, \dividend + clz \result, \divisor + sub \curbit, \result, \curbit + rsb \curbit, \curbit, #31 + adr \result, 1f + add \curbit, \result, \curbit, lsl #4 + mov \result, #0 + mov pc, \curbit +.p2align 3 +1: + .set shift, 32 + .rept 32 + .set shift, shift - 1 + cmp.w \dividend, \divisor, lsl #shift + nop.n + adc.w \result, \result, \result + it cs + subcs.w \dividend, \dividend, \divisor, lsl #shift + .endr +#else + clz \curbit, \dividend + clz \result, \divisor + sub \curbit, \result, \curbit + rsbs \curbit, \curbit, #31 + addne \curbit, \curbit, \curbit, lsl #1 + mov \result, #0 + addne pc, pc, \curbit, lsl #2 + nop + .set shift, 32 + .rept 32 + .set shift, shift - 1 + cmp \dividend, \divisor, lsl #shift + adc \result, \result, \result + subcs \dividend, \dividend, \divisor, lsl #shift + .endr +#endif + +#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ +#if __ARM_ARCH__ >= 5 + + clz \curbit, \divisor + clz \result, \dividend + sub \result, \curbit, \result + mov \curbit, #1 + mov \divisor, \divisor, lsl \result + mov \curbit, \curbit, lsl \result + mov \result, #0 + +#else /* __ARM_ARCH__ < 5 */ + + @ Initially shift the divisor left 3 bits if possible, + @ set curbit accordingly. This allows for curbit to be located + @ at the left end of each 4-bit nibbles in the division loop + @ to save one loop in most cases. + tst \divisor, #0xe0000000 + moveq \divisor, \divisor, lsl #3 + moveq \curbit, #8 + movne \curbit, #1 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + movlo \curbit, \curbit, lsl #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + movlo \curbit, \curbit, lsl #1 + blo 1b + + mov \result, #0 + +#endif /* __ARM_ARCH__ < 5 */ + + @ Division loop +1: cmp \dividend, \divisor + do_it hs, t + subhs \dividend, \dividend, \divisor + orrhs \result, \result, \curbit + cmp \dividend, \divisor, lsr #1 + do_it hs, t + subhs \dividend, \dividend, \divisor, lsr #1 + orrhs \result, \result, \curbit, lsr #1 + cmp \dividend, \divisor, lsr #2 + do_it hs, t + subhs \dividend, \dividend, \divisor, lsr #2 + orrhs \result, \result, \curbit, lsr #2 + cmp \dividend, \divisor, lsr #3 + do_it hs, t + subhs \dividend, \dividend, \divisor, lsr #3 + orrhs \result, \result, \curbit, lsr #3 + cmp \dividend, #0 @ Early termination? + do_it ne, t + movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? + movne \divisor, \divisor, lsr #4 + bne 1b + +#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ + +.endm +/* ------------------------------------------------------------------------ */ +.macro ARM_DIV2_ORDER divisor, order + +#if __ARM_ARCH__ >= 5 + + clz \order, \divisor + rsb \order, \order, #31 + +#else + + cmp \divisor, #(1 << 16) + movhs \divisor, \divisor, lsr #16 + movhs \order, #16 + movlo \order, #0 + + cmp \divisor, #(1 << 8) + movhs \divisor, \divisor, lsr #8 + addhs \order, \order, #8 + + cmp \divisor, #(1 << 4) + movhs \divisor, \divisor, lsr #4 + addhs \order, \order, #4 + + cmp \divisor, #(1 << 2) + addhi \order, \order, #3 + addls \order, \order, \divisor, lsr #1 + +#endif + +.endm +/* ------------------------------------------------------------------------ */ +.macro ARM_MOD_BODY dividend, divisor, order, spare + +#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__) + + clz \order, \divisor + clz \spare, \dividend + sub \order, \order, \spare + rsbs \order, \order, #31 + addne pc, pc, \order, lsl #3 + nop + .set shift, 32 + .rept 32 + .set shift, shift - 1 + cmp \dividend, \divisor, lsl #shift + subcs \dividend, \dividend, \divisor, lsl #shift + .endr + +#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ +#if __ARM_ARCH__ >= 5 + + clz \order, \divisor + clz \spare, \dividend + sub \order, \order, \spare + mov \divisor, \divisor, lsl \order + +#else /* __ARM_ARCH__ < 5 */ + + mov \order, #0 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + addlo \order, \order, #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + addlo \order, \order, #1 + blo 1b + +#endif /* __ARM_ARCH__ < 5 */ + + @ Perform all needed substractions to keep only the reminder. + @ Do comparisons in batch of 4 first. + subs \order, \order, #3 @ yes, 3 is intended here + blt 2f + +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + cmp \dividend, #1 + mov \divisor, \divisor, lsr #4 + subges \order, \order, #4 + bge 1b + + tst \order, #3 + teqne \dividend, #0 + beq 5f + + @ Either 1, 2 or 3 comparison/substractions are left. +2: cmn \order, #2 + blt 4f + beq 3f + cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +3: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +4: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor +5: + +#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ + +.endm +/* ------------------------------------------------------------------------ */ +.macro THUMB_DIV_MOD_BODY modulo + @ Load the constant 0x10000000 into our work register. + mov work, #1 + lsl work, #28 +LSYM(Loop1): + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. + cmp divisor, work + bhs LSYM(Lbignum) + cmp divisor, dividend + bhs LSYM(Lbignum) + lsl divisor, #4 + lsl curbit, #4 + b LSYM(Loop1) +LSYM(Lbignum): + @ Set work to 0x80000000 + lsl work, #3 +LSYM(Loop2): + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. + cmp divisor, work + bhs LSYM(Loop3) + cmp divisor, dividend + bhs LSYM(Loop3) + lsl divisor, #1 + lsl curbit, #1 + b LSYM(Loop2) +LSYM(Loop3): + @ Test for possible subtractions ... + .if \modulo + @ ... On the final pass, this may subtract too much from the dividend, + @ so keep track of which subtractions are done, we can fix them up + @ afterwards. + mov overdone, #0 + cmp dividend, divisor + blo LSYM(Lover1) + sub dividend, dividend, divisor +LSYM(Lover1): + lsr work, divisor, #1 + cmp dividend, work + blo LSYM(Lover2) + sub dividend, dividend, work + mov ip, curbit + mov work, #1 + ror curbit, work + orr overdone, curbit + mov curbit, ip +LSYM(Lover2): + lsr work, divisor, #2 + cmp dividend, work + blo LSYM(Lover3) + sub dividend, dividend, work + mov ip, curbit + mov work, #2 + ror curbit, work + orr overdone, curbit + mov curbit, ip +LSYM(Lover3): + lsr work, divisor, #3 + cmp dividend, work + blo LSYM(Lover4) + sub dividend, dividend, work + mov ip, curbit + mov work, #3 + ror curbit, work + orr overdone, curbit + mov curbit, ip +LSYM(Lover4): + mov ip, curbit + .else + @ ... and note which bits are done in the result. On the final pass, + @ this may subtract too much from the dividend, but the result will be ok, + @ since the "bit" will have been shifted out at the bottom. + cmp dividend, divisor + blo LSYM(Lover1) + sub dividend, dividend, divisor + orr result, result, curbit +LSYM(Lover1): + lsr work, divisor, #1 + cmp dividend, work + blo LSYM(Lover2) + sub dividend, dividend, work + lsr work, curbit, #1 + orr result, work +LSYM(Lover2): + lsr work, divisor, #2 + cmp dividend, work + blo LSYM(Lover3) + sub dividend, dividend, work + lsr work, curbit, #2 + orr result, work +LSYM(Lover3): + lsr work, divisor, #3 + cmp dividend, work + blo LSYM(Lover4) + sub dividend, dividend, work + lsr work, curbit, #3 + orr result, work +LSYM(Lover4): + .endif + + cmp dividend, #0 @ Early termination? + beq LSYM(Lover5) + lsr curbit, #4 @ No, any more bits to do? + beq LSYM(Lover5) + lsr divisor, #4 + b LSYM(Loop3) +LSYM(Lover5): + .if \modulo + @ Any subtractions that we should not have done will be recorded in + @ the top three bits of "overdone". Exactly which were not needed + @ are governed by the position of the bit, stored in ip. + mov work, #0xe + lsl work, #28 + and overdone, work + beq LSYM(Lgot_result) + + @ If we terminated early, because dividend became zero, then the + @ bit in ip will not be in the bottom nibble, and we should not + @ perform the additions below. We must test for this though + @ (rather relying upon the TSTs to prevent the additions) since + @ the bit in ip could be in the top two bits which might then match + @ with one of the smaller RORs. + mov curbit, ip + mov work, #0x7 + tst curbit, work + beq LSYM(Lgot_result) + + mov curbit, ip + mov work, #3 + ror curbit, work + tst overdone, curbit + beq LSYM(Lover6) + lsr work, divisor, #3 + add dividend, work +LSYM(Lover6): + mov curbit, ip + mov work, #2 + ror curbit, work + tst overdone, curbit + beq LSYM(Lover7) + lsr work, divisor, #2 + add dividend, work +LSYM(Lover7): + mov curbit, ip + mov work, #1 + ror curbit, work + tst overdone, curbit + beq LSYM(Lgot_result) + lsr work, divisor, #1 + add dividend, work + .endif +LSYM(Lgot_result): +.endm +/* ------------------------------------------------------------------------ */ +/* Start of the Real Functions */ +/* ------------------------------------------------------------------------ */ +#ifdef L_udivsi3 + +#if defined(__prefer_thumb__) + + FUNC_START udivsi3 + FUNC_ALIAS aeabi_uidiv udivsi3 + + cmp divisor, #0 + beq LSYM(Ldiv0) +LSYM(udivsi3_skip_div0_test): + mov curbit, #1 + mov result, #0 + + push { work } + cmp dividend, divisor + blo LSYM(Lgot_result) + + THUMB_DIV_MOD_BODY 0 + + mov r0, result + pop { work } + RET + +#else /* ARM version/Thumb-2. */ + + ARM_FUNC_START udivsi3 + ARM_FUNC_ALIAS aeabi_uidiv udivsi3 + + /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily + check for division-by-zero a second time. */ +LSYM(udivsi3_skip_div0_test): + subs r2, r1, #1 + do_it eq + RETc(eq) + bcc LSYM(Ldiv0) + cmp r0, r1 + bls 11f + tst r1, r2 + beq 12f + + ARM_DIV_BODY r0, r1, r2, r3 + + mov r0, r2 + RET + +11: do_it eq, e + moveq r0, #1 + movne r0, #0 + RET + +12: ARM_DIV2_ORDER r1, r2 + + mov r0, r0, lsr r2 + RET + +#endif /* ARM version */ + + DIV_FUNC_END udivsi3 unsigned + +#if defined(__prefer_thumb__) +FUNC_START aeabi_uidivmod + cmp r1, #0 + beq LSYM(Ldiv0) + push {r0, r1, lr} + bl LSYM(udivsi3_skip_div0_test) + POP {r1, r2, r3} + mul r2, r0 + sub r1, r1, r2 + bx r3 +#else +ARM_FUNC_START aeabi_uidivmod + cmp r1, #0 + beq LSYM(Ldiv0) + stmfd sp!, { r0, r1, lr } + bl LSYM(udivsi3_skip_div0_test) + ldmfd sp!, { r1, r2, lr } + mul r3, r2, r0 + sub r1, r1, r3 + RET +#endif + FUNC_END aeabi_uidivmod + +#endif /* L_udivsi3 */ +/* ------------------------------------------------------------------------ */ +#ifdef L_umodsi3 + + FUNC_START umodsi3 + +#ifdef __thumb__ + + cmp divisor, #0 + beq LSYM(Ldiv0) + mov curbit, #1 + cmp dividend, divisor + bhs LSYM(Lover10) + RET + +LSYM(Lover10): + push { work } + + THUMB_DIV_MOD_BODY 1 + + pop { work } + RET + +#else /* ARM version. */ + + subs r2, r1, #1 @ compare divisor with 1 + bcc LSYM(Ldiv0) + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + RETc(ls) + + ARM_MOD_BODY r0, r1, r2, r3 + + RET + +#endif /* ARM version. */ + + DIV_FUNC_END umodsi3 unsigned + +#endif /* L_umodsi3 */ +/* ------------------------------------------------------------------------ */ +#ifdef L_divsi3 + +#if defined(__prefer_thumb__) + + FUNC_START divsi3 + FUNC_ALIAS aeabi_idiv divsi3 + + cmp divisor, #0 + beq LSYM(Ldiv0) +LSYM(divsi3_skip_div0_test): + push { work } + mov work, dividend + eor work, divisor @ Save the sign of the result. + mov ip, work + mov curbit, #1 + mov result, #0 + cmp divisor, #0 + bpl LSYM(Lover10) + neg divisor, divisor @ Loops below use unsigned. +LSYM(Lover10): + cmp dividend, #0 + bpl LSYM(Lover11) + neg dividend, dividend +LSYM(Lover11): + cmp dividend, divisor + blo LSYM(Lgot_result) + + THUMB_DIV_MOD_BODY 0 + + mov r0, result + mov work, ip + cmp work, #0 + bpl LSYM(Lover12) + neg r0, r0 +LSYM(Lover12): + pop { work } + RET + +#else /* ARM/Thumb-2 version. */ + + ARM_FUNC_START divsi3 + ARM_FUNC_ALIAS aeabi_idiv divsi3 + + cmp r1, #0 + beq LSYM(Ldiv0) +LSYM(divsi3_skip_div0_test): + eor ip, r0, r1 @ save the sign of the result. + do_it mi + rsbmi r1, r1, #0 @ loops below use unsigned. + subs r2, r1, #1 @ division by 1 or -1 ? + beq 10f + movs r3, r0 + do_it mi + rsbmi r3, r0, #0 @ positive dividend value + cmp r3, r1 + bls 11f + tst r1, r2 @ divisor is power of 2 ? + beq 12f + + ARM_DIV_BODY r3, r1, r0, r2 + + cmp ip, #0 + do_it mi + rsbmi r0, r0, #0 + RET + +10: teq ip, r0 @ same sign ? + do_it mi + rsbmi r0, r0, #0 + RET + +11: do_it lo + movlo r0, #0 + do_it eq,t + moveq r0, ip, asr #31 + orreq r0, r0, #1 + RET + +12: ARM_DIV2_ORDER r1, r2 + + cmp ip, #0 + mov r0, r3, lsr r2 + do_it mi + rsbmi r0, r0, #0 + RET + +#endif /* ARM version */ + + DIV_FUNC_END divsi3 signed + +#if defined(__prefer_thumb__) +FUNC_START aeabi_idivmod + cmp r1, #0 + beq LSYM(Ldiv0) + push {r0, r1, lr} + bl LSYM(divsi3_skip_div0_test) + POP {r1, r2, r3} + mul r2, r0 + sub r1, r1, r2 + bx r3 +#else +ARM_FUNC_START aeabi_idivmod + cmp r1, #0 + beq LSYM(Ldiv0) + stmfd sp!, { r0, r1, lr } + bl LSYM(divsi3_skip_div0_test) + ldmfd sp!, { r1, r2, lr } + mul r3, r2, r0 + sub r1, r1, r3 + RET +#endif + FUNC_END aeabi_idivmod + +#endif /* L_divsi3 */ +/* ------------------------------------------------------------------------ */ +#ifdef L_modsi3 + + FUNC_START modsi3 + +#ifdef __thumb__ + + mov curbit, #1 + cmp divisor, #0 + beq LSYM(Ldiv0) + bpl LSYM(Lover10) + neg divisor, divisor @ Loops below use unsigned. +LSYM(Lover10): + push { work } + @ Need to save the sign of the dividend, unfortunately, we need + @ work later on. Must do this after saving the original value of + @ the work register, because we will pop this value off first. + push { dividend } + cmp dividend, #0 + bpl LSYM(Lover11) + neg dividend, dividend +LSYM(Lover11): + cmp dividend, divisor + blo LSYM(Lgot_result) + + THUMB_DIV_MOD_BODY 1 + + pop { work } + cmp work, #0 + bpl LSYM(Lover12) + neg dividend, dividend +LSYM(Lover12): + pop { work } + RET + +#else /* ARM version. */ + + cmp r1, #0 + beq LSYM(Ldiv0) + rsbmi r1, r1, #0 @ loops below use unsigned. + movs ip, r0 @ preserve sign of dividend + rsbmi r0, r0, #0 @ if negative make positive + subs r2, r1, #1 @ compare divisor with 1 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + bls 10f + + ARM_MOD_BODY r0, r1, r2, r3 + +10: cmp ip, #0 + rsbmi r0, r0, #0 + RET + +#endif /* ARM version */ + + DIV_FUNC_END modsi3 signed + +#endif /* L_modsi3 */ +/* ------------------------------------------------------------------------ */ +#ifdef L_dvmd_tls + +#ifdef __ARM_EABI__ + WEAK aeabi_idiv0 + WEAK aeabi_ldiv0 + FUNC_START aeabi_idiv0 + FUNC_START aeabi_ldiv0 + RET + FUNC_END aeabi_ldiv0 + FUNC_END aeabi_idiv0 +#else + FUNC_START div0 + RET + FUNC_END div0 +#endif + +#endif /* L_divmodsi_tools */ +/* ------------------------------------------------------------------------ */ +#ifdef L_dvmd_lnx +@ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls + +/* Constant taken from . */ +#define SIGFPE 8 + +#ifdef __ARM_EABI__ + WEAK aeabi_idiv0 + WEAK aeabi_ldiv0 + ARM_FUNC_START aeabi_idiv0 + ARM_FUNC_START aeabi_ldiv0 +#else + ARM_FUNC_START div0 +#endif + + do_push {r1, lr} + mov r0, #SIGFPE + bl SYM(raise) __PLT__ + RETLDM r1 + +#ifdef __ARM_EABI__ + FUNC_END aeabi_ldiv0 + FUNC_END aeabi_idiv0 +#else + FUNC_END div0 +#endif + +#endif /* L_dvmd_lnx */ +#ifdef L_clear_cache +#if defined __ARM_EABI__ && defined __linux__ +@ EABI GNU/Linux call to cacheflush syscall. + ARM_FUNC_START clear_cache + do_push {r7} +#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__) + movw r7, #2 + movt r7, #0xf +#else + mov r7, #0xf0000 + add r7, r7, #2 +#endif + mov r2, #0 + swi 0 + do_pop {r7} + RET + FUNC_END clear_cache +#else +#error "This is only for ARM EABI GNU/Linux" +#endif +#endif /* L_clear_cache */ +/* ------------------------------------------------------------------------ */ +/* Dword shift operations. */ +/* All the following Dword shift variants rely on the fact that + shft xxx, Reg + is in fact done as + shft xxx, (Reg & 255) + so for Reg value in (32...63) and (-1...-31) we will get zero (in the + case of logical shifts) or the sign (for asr). */ + +#ifdef __ARMEB__ +#define al r1 +#define ah r0 +#else +#define al r0 +#define ah r1 +#endif + +/* Prevent __aeabi double-word shifts from being produced on SymbianOS. */ +#ifndef __symbian__ + +#ifdef L_lshrdi3 + + FUNC_START lshrdi3 + FUNC_ALIAS aeabi_llsr lshrdi3 + +#ifdef __thumb__ + lsr al, r2 + mov r3, ah + lsr ah, r2 + mov ip, r3 + sub r2, #32 + lsr r3, r2 + orr al, r3 + neg r2, r2 + mov r3, ip + lsl r3, r2 + orr al, r3 + RET +#else + subs r3, r2, #32 + rsb ip, r2, #32 + movmi al, al, lsr r2 + movpl al, ah, lsr r3 + orrmi al, al, ah, lsl ip + mov ah, ah, lsr r2 + RET +#endif + FUNC_END aeabi_llsr + FUNC_END lshrdi3 + +#endif + +#ifdef L_ashrdi3 + + FUNC_START ashrdi3 + FUNC_ALIAS aeabi_lasr ashrdi3 + +#ifdef __thumb__ + lsr al, r2 + mov r3, ah + asr ah, r2 + sub r2, #32 + @ If r2 is negative at this point the following step would OR + @ the sign bit into all of AL. That's not what we want... + bmi 1f + mov ip, r3 + asr r3, r2 + orr al, r3 + mov r3, ip +1: + neg r2, r2 + lsl r3, r2 + orr al, r3 + RET +#else + subs r3, r2, #32 + rsb ip, r2, #32 + movmi al, al, lsr r2 + movpl al, ah, asr r3 + orrmi al, al, ah, lsl ip + mov ah, ah, asr r2 + RET +#endif + + FUNC_END aeabi_lasr + FUNC_END ashrdi3 + +#endif + +#ifdef L_ashldi3 + + FUNC_START ashldi3 + FUNC_ALIAS aeabi_llsl ashldi3 + +#ifdef __thumb__ + lsl ah, r2 + mov r3, al + lsl al, r2 + mov ip, r3 + sub r2, #32 + lsl r3, r2 + orr ah, r3 + neg r2, r2 + mov r3, ip + lsr r3, r2 + orr ah, r3 + RET +#else + subs r3, r2, #32 + rsb ip, r2, #32 + movmi ah, ah, lsl r2 + movpl ah, al, lsl r3 + orrmi ah, ah, al, lsr ip + mov al, al, lsl r2 + RET +#endif + FUNC_END aeabi_llsl + FUNC_END ashldi3 + +#endif + +#endif /* __symbian__ */ + +#if ((__ARM_ARCH__ > 5) && !defined(__ARM_ARCH_6M__)) \ + || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ + || defined(__ARM_ARCH_5TEJ__) +#define HAVE_ARM_CLZ 1 +#endif + +#ifdef L_clzsi2 +#if defined(__ARM_ARCH_6M__) +FUNC_START clzsi2 + mov r1, #28 + mov r3, #1 + lsl r3, r3, #16 + cmp r0, r3 /* 0x10000 */ + bcc 2f + lsr r0, r0, #16 + sub r1, r1, #16 +2: lsr r3, r3, #8 + cmp r0, r3 /* #0x100 */ + bcc 2f + lsr r0, r0, #8 + sub r1, r1, #8 +2: lsr r3, r3, #4 + cmp r0, r3 /* #0x10 */ + bcc 2f + lsr r0, r0, #4 + sub r1, r1, #4 +2: adr r2, 1f + ldrb r0, [r2, r0] + add r0, r0, r1 + bx lr +.align 2 +1: +.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 + FUNC_END clzsi2 +#else +ARM_FUNC_START clzsi2 +# if defined(HAVE_ARM_CLZ) + clz r0, r0 + RET +# else + mov r1, #28 + cmp r0, #0x10000 + do_it cs, t + movcs r0, r0, lsr #16 + subcs r1, r1, #16 + cmp r0, #0x100 + do_it cs, t + movcs r0, r0, lsr #8 + subcs r1, r1, #8 + cmp r0, #0x10 + do_it cs, t + movcs r0, r0, lsr #4 + subcs r1, r1, #4 + adr r2, 1f + ldrb r0, [r2, r0] + add r0, r0, r1 + RET +.align 2 +1: +.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 +# endif /* !HAVE_ARM_CLZ */ + FUNC_END clzsi2 +#endif +#endif /* L_clzsi2 */ + +#ifdef L_clzdi2 +#if !defined(HAVE_ARM_CLZ) + +# if defined(__ARM_ARCH_6M__) +FUNC_START clzdi2 + push {r4, lr} +# else +ARM_FUNC_START clzdi2 + do_push {r4, lr} +# endif + cmp xxh, #0 + bne 1f +# ifdef __ARMEB__ + mov r0, xxl + bl __clzsi2 + add r0, r0, #32 + b 2f +1: + bl __clzsi2 +# else + bl __clzsi2 + add r0, r0, #32 + b 2f +1: + mov r0, xxh + bl __clzsi2 +# endif +2: +# if defined(__ARM_ARCH_6M__) + pop {r4, pc} +# else + RETLDM r4 +# endif + FUNC_END clzdi2 + +#else /* HAVE_ARM_CLZ */ + +ARM_FUNC_START clzdi2 + cmp xxh, #0 + do_it eq, et + clzeq r0, xxl + clzne r0, xxh + addeq r0, r0, #32 + RET + FUNC_END clzdi2 + +#endif +#endif /* L_clzdi2 */ + +/* ------------------------------------------------------------------------ */ +/* These next two sections are here despite the fact that they contain Thumb + assembler because their presence allows interworked code to be linked even + when the GCC library is this one. */ + +/* Do not build the interworking functions when the target architecture does + not support Thumb instructions. (This can be a multilib option). */ +#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\ + || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \ + || __ARM_ARCH__ >= 6 + +#if defined L_call_via_rX + +/* These labels & instructions are used by the Arm/Thumb interworking code. + The address of function to be called is loaded into a register and then + one of these labels is called via a BL instruction. This puts the + return address into the link register with the bottom bit set, and the + code here switches to the correct mode before executing the function. */ + + .text + .align 0 + .force_thumb + +.macro call_via register + THUMB_FUNC_START _call_via_\register + + bx \register + nop + + SIZE (_call_via_\register) +.endm + + call_via r0 + call_via r1 + call_via r2 + call_via r3 + call_via r4 + call_via r5 + call_via r6 + call_via r7 + call_via r8 + call_via r9 + call_via sl + call_via fp + call_via ip + call_via sp + call_via lr + +#endif /* L_call_via_rX */ + +/* Don't bother with the old interworking routines for Thumb-2. */ +/* ??? Maybe only omit these on "m" variants. */ +#if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__) + +#if defined L_interwork_call_via_rX + +/* These labels & instructions are used by the Arm/Thumb interworking code, + when the target address is in an unknown instruction set. The address + of function to be called is loaded into a register and then one of these + labels is called via a BL instruction. This puts the return address + into the link register with the bottom bit set, and the code here + switches to the correct mode before executing the function. Unfortunately + the target code cannot be relied upon to return via a BX instruction, so + instead we have to store the resturn address on the stack and allow the + called function to return here instead. Upon return we recover the real + return address and use a BX to get back to Thumb mode. + + There are three variations of this code. The first, + _interwork_call_via_rN(), will push the return address onto the + stack and pop it in _arm_return(). It should only be used if all + arguments are passed in registers. + + The second, _interwork_r7_call_via_rN(), instead stores the return + address at [r7, #-4]. It is the caller's responsibility to ensure + that this address is valid and contains no useful data. + + The third, _interwork_r11_call_via_rN(), works in the same way but + uses r11 instead of r7. It is useful if the caller does not really + need a frame pointer. */ + + .text + .align 0 + + .code 32 + .globl _arm_return +LSYM(Lstart_arm_return): + cfi_start LSYM(Lstart_arm_return) LSYM(Lend_arm_return) + cfi_push 0, 0xe, -0x8, 0x8 + nop @ This nop is for the benefit of debuggers, so that + @ backtraces will use the correct unwind information. +_arm_return: + RETLDM unwind=LSYM(Lstart_arm_return) + cfi_end LSYM(Lend_arm_return) + + .globl _arm_return_r7 +_arm_return_r7: + ldr lr, [r7, #-4] + bx lr + + .globl _arm_return_r11 +_arm_return_r11: + ldr lr, [r11, #-4] + bx lr + +.macro interwork_with_frame frame, register, name, return + .code 16 + + THUMB_FUNC_START \name + + bx pc + nop + + .code 32 + tst \register, #1 + streq lr, [\frame, #-4] + adreq lr, _arm_return_\frame + bx \register + + SIZE (\name) +.endm + +.macro interwork register + .code 16 + + THUMB_FUNC_START _interwork_call_via_\register + + bx pc + nop + + .code 32 + .globl LSYM(Lchange_\register) +LSYM(Lchange_\register): + tst \register, #1 + streq lr, [sp, #-8]! + adreq lr, _arm_return + bx \register + + SIZE (_interwork_call_via_\register) + + interwork_with_frame r7,\register,_interwork_r7_call_via_\register + interwork_with_frame r11,\register,_interwork_r11_call_via_\register +.endm + + interwork r0 + interwork r1 + interwork r2 + interwork r3 + interwork r4 + interwork r5 + interwork r6 + interwork r7 + interwork r8 + interwork r9 + interwork sl + interwork fp + interwork ip + interwork sp + + /* The LR case has to be handled a little differently... */ + .code 16 + + THUMB_FUNC_START _interwork_call_via_lr + + bx pc + nop + + .code 32 + .globl .Lchange_lr +.Lchange_lr: + tst lr, #1 + stmeqdb r13!, {lr, pc} + mov ip, lr + adreq lr, _arm_return + bx ip + + SIZE (_interwork_call_via_lr) + +#endif /* L_interwork_call_via_rX */ +#endif /* !__thumb2__ */ + +/* Functions to support compact pic switch tables in thumb1 state. + All these routines take an index into the table in r0. The + table is at LR & ~1 (but this must be rounded up in the case + of 32-bit entires). They are only permitted to clobber r12 + and r14 and r0 must be preserved on exit. */ +#ifdef L_thumb1_case_sqi + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_sqi + push {r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r1, r1, #1 + ldrsb r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r1} + bx lr + SIZE (__gnu_thumb1_case_sqi) +#endif + +#ifdef L_thumb1_case_uqi + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_uqi + push {r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r1, r1, #1 + ldrb r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r1} + bx lr + SIZE (__gnu_thumb1_case_uqi) +#endif + +#ifdef L_thumb1_case_shi + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_shi + push {r0, r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r0, r0, #1 + lsls r1, r1, #1 + ldrsh r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r0, r1} + bx lr + SIZE (__gnu_thumb1_case_shi) +#endif + +#ifdef L_thumb1_case_uhi + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_uhi + push {r0, r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r0, r0, #1 + lsls r1, r1, #1 + ldrh r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r0, r1} + bx lr + SIZE (__gnu_thumb1_case_uhi) +#endif + +#ifdef L_thumb1_case_si + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_si + push {r0, r1} + mov r1, lr + adds.n r1, r1, #2 /* Align to word. */ + lsrs r1, r1, #2 + lsls r0, r0, #2 + lsls r1, r1, #2 + ldr r0, [r1, r0] + adds r0, r0, r1 + mov lr, r0 + pop {r0, r1} + mov pc, lr /* We know we were called from thumb code. */ + SIZE (__gnu_thumb1_case_si) +#endif + +#endif /* Arch supports thumb. */ + +#ifndef __symbian__ +#ifndef __ARM_ARCH_6M__ +#include "ieee754-df.S" +#include "ieee754-sf.S" +#include "bpabi.S" +#else /* __ARM_ARCH_6M__ */ +#include "bpabi-v6m.S" +#endif /* __ARM_ARCH_6M__ */ +#endif /* !__symbian__ */ diff --git a/libgcc/config/arm/libunwind.S b/libgcc/config/arm/libunwind.S index a3a19daab4b..8166cd86e47 100644 --- a/libgcc/config/arm/libunwind.S +++ b/libgcc/config/arm/libunwind.S @@ -40,7 +40,7 @@ #ifndef __symbian__ -#include "config/arm/lib1funcs.asm" +#include "lib1funcs.S" .macro UNPREFIX name .global SYM (\name) diff --git a/libgcc/config/arm/t-arm b/libgcc/config/arm/t-arm new file mode 100644 index 00000000000..4e17e99b4a5 --- /dev/null +++ b/libgcc/config/arm/t-arm @@ -0,0 +1,3 @@ +LIB1ASMSRC = arm/lib1funcs.S +LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi _thumb1_case_shi \ + _thumb1_case_uhi _thumb1_case_si diff --git a/libgcc/config/arm/t-bpabi b/libgcc/config/arm/t-bpabi index ebb2f9fd85d..8787285ab1f 100644 --- a/libgcc/config/arm/t-bpabi +++ b/libgcc/config/arm/t-bpabi @@ -1,3 +1,6 @@ +# Add the bpabi.S functions. +LIB1ASMFUNCS += _aeabi_lcmp _aeabi_ulcmp _aeabi_ldivmod _aeabi_uldivmod + LIB2ADDEH = $(srcdir)/config/arm/unwind-arm.c \ $(srcdir)/config/arm/libunwind.S \ $(srcdir)/config/arm/pr-support.c $(srcdir)/unwind-c.c diff --git a/libgcc/config/arm/t-elf b/libgcc/config/arm/t-elf new file mode 100644 index 00000000000..fab32e445be --- /dev/null +++ b/libgcc/config/arm/t-elf @@ -0,0 +1,13 @@ +# For most CPUs we have an assembly soft-float implementations. +# However this is not true for ARMv6M. Here we want to use the soft-fp C +# implementation. The soft-fp code is only build for ARMv6M. This pulls +# in the asm implementation for other CPUs. +LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func \ + _call_via_rX _interwork_call_via_rX \ + _lshrdi3 _ashrdi3 _ashldi3 \ + _arm_negdf2 _arm_addsubdf3 _arm_muldivdf3 _arm_cmpdf2 _arm_unorddf2 \ + _arm_fixdfsi _arm_fixunsdfsi \ + _arm_truncdfsf2 _arm_negsf2 _arm_addsubsf3 _arm_muldivsf3 \ + _arm_cmpsf2 _arm_unordsf2 _arm_fixsfsi _arm_fixunssfsi \ + _arm_floatdidf _arm_floatdisf _arm_floatundidf _arm_floatundisf \ + _clzsi2 _clzdi2 diff --git a/libgcc/config/arm/t-linux b/libgcc/config/arm/t-linux new file mode 100644 index 00000000000..a154f775a0f --- /dev/null +++ b/libgcc/config/arm/t-linux @@ -0,0 +1,3 @@ +LIB1ASMSRC = arm/lib1funcs.S +LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx _clzsi2 _clzdi2 \ + _arm_addsubdf3 _arm_addsubsf3 diff --git a/libgcc/config/arm/t-linux-eabi b/libgcc/config/arm/t-linux-eabi new file mode 100644 index 00000000000..dfc9197ea45 --- /dev/null +++ b/libgcc/config/arm/t-linux-eabi @@ -0,0 +1,2 @@ +# Use a version of div0 which raises SIGFPE, and a special __clear_cache. +LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx _clear_cache diff --git a/libgcc/config/arm/t-strongarm-elf b/libgcc/config/arm/t-strongarm-elf new file mode 100644 index 00000000000..cd9f9667ddf --- /dev/null +++ b/libgcc/config/arm/t-strongarm-elf @@ -0,0 +1 @@ +LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _clzsi2 _clzdi2 diff --git a/libgcc/config/arm/t-symbian b/libgcc/config/arm/t-symbian index 6788d5f40b3..1989696c8a3 100644 --- a/libgcc/config/arm/t-symbian +++ b/libgcc/config/arm/t-symbian @@ -1,2 +1,16 @@ +LIB1ASMFUNCS += _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 + +# These functions have __aeabi equivalents and will never be called by GCC. +# By putting them in LIB1ASMFUNCS, we avoid the standard libgcc2.c code being +# used -- and we make sure that definitions are not available in lib1funcs.S, +# either, so they end up undefined. +LIB1ASMFUNCS += \ + _ashldi3 _ashrdi3 _divdi3 _floatdidf _udivmoddi4 _umoddi3 \ + _udivdi3 _lshrdi3 _moddi3 _muldi3 _negdi2 _cmpdi2 \ + _fixdfdi _fixsfdi _fixunsdfdi _fixunssfdi _floatdisf \ + _negdf2 _addsubdf3 _muldivdf3 _cmpdf2 _unorddf2 _fixdfsi _fixunsdfsi \ + _truncdfsf2 _negsf2 _addsubsf3 _muldivsf3 _cmpsf2 _unordsf2 \ + _fixsfsi _fixunssfsi + # Include the gcc personality routine LIB2ADDEH = $(srcdir)/unwind-c.c $(srcdir)/config/arm/pr-support.c diff --git a/libgcc/config/arm/t-vxworks b/libgcc/config/arm/t-vxworks new file mode 100644 index 00000000000..70ccdc1556a --- /dev/null +++ b/libgcc/config/arm/t-vxworks @@ -0,0 +1 @@ +LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 diff --git a/libgcc/config/arm/t-wince-pe b/libgcc/config/arm/t-wince-pe new file mode 100644 index 00000000000..33ea969ccf4 --- /dev/null +++ b/libgcc/config/arm/t-wince-pe @@ -0,0 +1 @@ +LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 diff --git a/libgcc/config/avr/lib1funcs.S b/libgcc/config/avr/lib1funcs.S new file mode 100644 index 00000000000..8c369c96a77 --- /dev/null +++ b/libgcc/config/avr/lib1funcs.S @@ -0,0 +1,1533 @@ +/* -*- Mode: Asm -*- */ +/* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009 + Free Software Foundation, Inc. + Contributed by Denis Chertykov + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#define __zero_reg__ r1 +#define __tmp_reg__ r0 +#define __SREG__ 0x3f +#define __SP_H__ 0x3e +#define __SP_L__ 0x3d +#define __RAMPZ__ 0x3B +#define __EIND__ 0x3C + +/* Most of the functions here are called directly from avr.md + patterns, instead of using the standard libcall mechanisms. + This can make better code because GCC knows exactly which + of the call-used registers (not all of them) are clobbered. */ + +/* FIXME: At present, there is no SORT directive in the linker + script so that we must not assume that different modules + in the same input section like .libgcc.text.mul will be + located close together. Therefore, we cannot use + RCALL/RJMP to call a function like __udivmodhi4 from + __divmodhi4 and have to use lengthy XCALL/XJMP even + though they are in the same input section and all same + input sections together are small enough to reach every + location with a RCALL/RJMP instruction. */ + + .macro mov_l r_dest, r_src +#if defined (__AVR_HAVE_MOVW__) + movw \r_dest, \r_src +#else + mov \r_dest, \r_src +#endif + .endm + + .macro mov_h r_dest, r_src +#if defined (__AVR_HAVE_MOVW__) + ; empty +#else + mov \r_dest, \r_src +#endif + .endm + +#if defined (__AVR_HAVE_JMP_CALL__) +#define XCALL call +#define XJMP jmp +#else +#define XCALL rcall +#define XJMP rjmp +#endif + +.macro DEFUN name +.global \name +.func \name +\name: +.endm + +.macro ENDF name +.size \name, .-\name +.endfunc +.endm + + +.section .text.libgcc.mul, "ax", @progbits + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +/* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */ +#if !defined (__AVR_HAVE_MUL__) +/******************************************************* + Multiplication 8 x 8 without MUL +*******************************************************/ +#if defined (L_mulqi3) + +#define r_arg2 r22 /* multiplicand */ +#define r_arg1 r24 /* multiplier */ +#define r_res __tmp_reg__ /* result */ + +DEFUN __mulqi3 + clr r_res ; clear result +__mulqi3_loop: + sbrc r_arg1,0 + add r_res,r_arg2 + add r_arg2,r_arg2 ; shift multiplicand + breq __mulqi3_exit ; while multiplicand != 0 + lsr r_arg1 ; + brne __mulqi3_loop ; exit if multiplier = 0 +__mulqi3_exit: + mov r_arg1,r_res ; result to return register + ret +ENDF __mulqi3 + +#undef r_arg2 +#undef r_arg1 +#undef r_res + +#endif /* defined (L_mulqi3) */ + +#if defined (L_mulqihi3) +DEFUN __mulqihi3 + clr r25 + sbrc r24, 7 + dec r25 + clr r23 + sbrc r22, 7 + dec r22 + XJMP __mulhi3 +ENDF __mulqihi3: +#endif /* defined (L_mulqihi3) */ + +#if defined (L_umulqihi3) +DEFUN __umulqihi3 + clr r25 + clr r23 + XJMP __mulhi3 +ENDF __umulqihi3 +#endif /* defined (L_umulqihi3) */ + +/******************************************************* + Multiplication 16 x 16 without MUL +*******************************************************/ +#if defined (L_mulhi3) +#define r_arg1L r24 /* multiplier Low */ +#define r_arg1H r25 /* multiplier High */ +#define r_arg2L r22 /* multiplicand Low */ +#define r_arg2H r23 /* multiplicand High */ +#define r_resL __tmp_reg__ /* result Low */ +#define r_resH r21 /* result High */ + +DEFUN __mulhi3 + clr r_resH ; clear result + clr r_resL ; clear result +__mulhi3_loop: + sbrs r_arg1L,0 + rjmp __mulhi3_skip1 + add r_resL,r_arg2L ; result + multiplicand + adc r_resH,r_arg2H +__mulhi3_skip1: + add r_arg2L,r_arg2L ; shift multiplicand + adc r_arg2H,r_arg2H + + cp r_arg2L,__zero_reg__ + cpc r_arg2H,__zero_reg__ + breq __mulhi3_exit ; while multiplicand != 0 + + lsr r_arg1H ; gets LSB of multiplier + ror r_arg1L + sbiw r_arg1L,0 + brne __mulhi3_loop ; exit if multiplier = 0 +__mulhi3_exit: + mov r_arg1H,r_resH ; result to return register + mov r_arg1L,r_resL + ret +ENDF __mulhi3 + +#undef r_arg1L +#undef r_arg1H +#undef r_arg2L +#undef r_arg2H +#undef r_resL +#undef r_resH + +#endif /* defined (L_mulhi3) */ + +/******************************************************* + Widening Multiplication 32 = 16 x 16 without MUL +*******************************************************/ + +#if defined (L_mulhisi3) +DEFUN __mulhisi3 +;;; FIXME: This is dead code (noone calls it) + mov_l r18, r24 + mov_h r19, r25 + clr r24 + sbrc r23, 7 + dec r24 + mov r25, r24 + clr r20 + sbrc r19, 7 + dec r20 + mov r21, r20 + XJMP __mulsi3 +ENDF __mulhisi3 +#endif /* defined (L_mulhisi3) */ + +#if defined (L_umulhisi3) +DEFUN __umulhisi3 +;;; FIXME: This is dead code (noone calls it) + mov_l r18, r24 + mov_h r19, r25 + clr r24 + clr r25 + mov_l r20, r24 + mov_h r21, r25 + XJMP __mulsi3 +ENDF __umulhisi3 +#endif /* defined (L_umulhisi3) */ + +#if defined (L_mulsi3) +/******************************************************* + Multiplication 32 x 32 without MUL +*******************************************************/ +#define r_arg1L r22 /* multiplier Low */ +#define r_arg1H r23 +#define r_arg1HL r24 +#define r_arg1HH r25 /* multiplier High */ + +#define r_arg2L r18 /* multiplicand Low */ +#define r_arg2H r19 +#define r_arg2HL r20 +#define r_arg2HH r21 /* multiplicand High */ + +#define r_resL r26 /* result Low */ +#define r_resH r27 +#define r_resHL r30 +#define r_resHH r31 /* result High */ + +DEFUN __mulsi3 + clr r_resHH ; clear result + clr r_resHL ; clear result + clr r_resH ; clear result + clr r_resL ; clear result +__mulsi3_loop: + sbrs r_arg1L,0 + rjmp __mulsi3_skip1 + add r_resL,r_arg2L ; result + multiplicand + adc r_resH,r_arg2H + adc r_resHL,r_arg2HL + adc r_resHH,r_arg2HH +__mulsi3_skip1: + add r_arg2L,r_arg2L ; shift multiplicand + adc r_arg2H,r_arg2H + adc r_arg2HL,r_arg2HL + adc r_arg2HH,r_arg2HH + + lsr r_arg1HH ; gets LSB of multiplier + ror r_arg1HL + ror r_arg1H + ror r_arg1L + brne __mulsi3_loop + sbiw r_arg1HL,0 + cpc r_arg1H,r_arg1L + brne __mulsi3_loop ; exit if multiplier = 0 +__mulsi3_exit: + mov_h r_arg1HH,r_resHH ; result to return register + mov_l r_arg1HL,r_resHL + mov_h r_arg1H,r_resH + mov_l r_arg1L,r_resL + ret +ENDF __mulsi3 + +#undef r_arg1L +#undef r_arg1H +#undef r_arg1HL +#undef r_arg1HH + +#undef r_arg2L +#undef r_arg2H +#undef r_arg2HL +#undef r_arg2HH + +#undef r_resL +#undef r_resH +#undef r_resHL +#undef r_resHH + +#endif /* defined (L_mulsi3) */ + +#endif /* !defined (__AVR_HAVE_MUL__) */ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +#if defined (__AVR_HAVE_MUL__) +#define A0 26 +#define B0 18 +#define C0 22 + +#define A1 A0+1 + +#define B1 B0+1 +#define B2 B0+2 +#define B3 B0+3 + +#define C1 C0+1 +#define C2 C0+2 +#define C3 C0+3 + +/******************************************************* + Widening Multiplication 32 = 16 x 16 +*******************************************************/ + +#if defined (L_mulhisi3) +;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18 +;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0 +;;; Clobbers: __tmp_reg__ +DEFUN __mulhisi3 + XCALL __umulhisi3 + ;; Sign-extend B + tst B1 + brpl 1f + sub C2, A0 + sbc C3, A1 +1: ;; Sign-extend A + XJMP __usmulhisi3_tail +ENDF __mulhisi3 +#endif /* L_mulhisi3 */ + +#if defined (L_usmulhisi3) +;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18 +;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0 +;;; Clobbers: __tmp_reg__ +DEFUN __usmulhisi3 + XCALL __umulhisi3 + ;; FALLTHRU +ENDF __usmulhisi3 + +DEFUN __usmulhisi3_tail + ;; Sign-extend A + sbrs A1, 7 + ret + sub C2, B0 + sbc C3, B1 + ret +ENDF __usmulhisi3_tail +#endif /* L_usmulhisi3 */ + +#if defined (L_umulhisi3) +;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18 +;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0 +;;; Clobbers: __tmp_reg__ +DEFUN __umulhisi3 + mul A0, B0 + movw C0, r0 + mul A1, B1 + movw C2, r0 + mul A0, B1 + rcall 1f + mul A1, B0 +1: add C1, r0 + adc C2, r1 + clr __zero_reg__ + adc C3, __zero_reg__ + ret +ENDF __umulhisi3 +#endif /* L_umulhisi3 */ + +/******************************************************* + Widening Multiplication 32 = 16 x 32 +*******************************************************/ + +#if defined (L_mulshisi3) +;;; R25:R22 = (signed long) R27:R26 * R21:R18 +;;; (C3:C0) = (signed long) A1:A0 * B3:B0 +;;; Clobbers: __tmp_reg__ +DEFUN __mulshisi3 +#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ + ;; Some cores have problem skipping 2-word instruction + tst A1 + brmi __mulohisi3 +#else + sbrs A1, 7 +#endif /* __AVR_HAVE_JMP_CALL__ */ + XJMP __muluhisi3 + ;; FALLTHRU +ENDF __mulshisi3 + +;;; R25:R22 = (one-extended long) R27:R26 * R21:R18 +;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0 +;;; Clobbers: __tmp_reg__ +DEFUN __mulohisi3 + XCALL __muluhisi3 + ;; One-extend R27:R26 (A1:A0) + sub C2, B0 + sbc C3, B1 + ret +ENDF __mulohisi3 +#endif /* L_mulshisi3 */ + +#if defined (L_muluhisi3) +;;; R25:R22 = (unsigned long) R27:R26 * R21:R18 +;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0 +;;; Clobbers: __tmp_reg__ +DEFUN __muluhisi3 + XCALL __umulhisi3 + mul A0, B3 + add C3, r0 + mul A1, B2 + add C3, r0 + mul A0, B2 + add C2, r0 + adc C3, r1 + clr __zero_reg__ + ret +ENDF __muluhisi3 +#endif /* L_muluhisi3 */ + +/******************************************************* + Multiplication 32 x 32 +*******************************************************/ + +#if defined (L_mulsi3) +;;; R25:R22 = R25:R22 * R21:R18 +;;; (C3:C0) = C3:C0 * B3:B0 +;;; Clobbers: R26, R27, __tmp_reg__ +DEFUN __mulsi3 + movw A0, C0 + push C2 + push C3 + XCALL __muluhisi3 + pop A1 + pop A0 + ;; A1:A0 now contains the high word of A + mul A0, B0 + add C2, r0 + adc C3, r1 + mul A0, B1 + add C3, r0 + mul A1, B0 + add C3, r0 + clr __zero_reg__ + ret +ENDF __mulsi3 +#endif /* L_mulsi3 */ + +#undef A0 +#undef A1 + +#undef B0 +#undef B1 +#undef B2 +#undef B3 + +#undef C0 +#undef C1 +#undef C2 +#undef C3 + +#endif /* __AVR_HAVE_MUL__ */ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +.section .text.libgcc.div, "ax", @progbits + +/******************************************************* + Division 8 / 8 => (result + remainder) +*******************************************************/ +#define r_rem r25 /* remainder */ +#define r_arg1 r24 /* dividend, quotient */ +#define r_arg2 r22 /* divisor */ +#define r_cnt r23 /* loop count */ + +#if defined (L_udivmodqi4) +DEFUN __udivmodqi4 + sub r_rem,r_rem ; clear remainder and carry + ldi r_cnt,9 ; init loop counter + rjmp __udivmodqi4_ep ; jump to entry point +__udivmodqi4_loop: + rol r_rem ; shift dividend into remainder + cp r_rem,r_arg2 ; compare remainder & divisor + brcs __udivmodqi4_ep ; remainder <= divisor + sub r_rem,r_arg2 ; restore remainder +__udivmodqi4_ep: + rol r_arg1 ; shift dividend (with CARRY) + dec r_cnt ; decrement loop counter + brne __udivmodqi4_loop + com r_arg1 ; complement result + ; because C flag was complemented in loop + ret +ENDF __udivmodqi4 +#endif /* defined (L_udivmodqi4) */ + +#if defined (L_divmodqi4) +DEFUN __divmodqi4 + bst r_arg1,7 ; store sign of dividend + mov __tmp_reg__,r_arg1 + eor __tmp_reg__,r_arg2; r0.7 is sign of result + sbrc r_arg1,7 + neg r_arg1 ; dividend negative : negate + sbrc r_arg2,7 + neg r_arg2 ; divisor negative : negate + XCALL __udivmodqi4 ; do the unsigned div/mod + brtc __divmodqi4_1 + neg r_rem ; correct remainder sign +__divmodqi4_1: + sbrc __tmp_reg__,7 + neg r_arg1 ; correct result sign +__divmodqi4_exit: + ret +ENDF __divmodqi4 +#endif /* defined (L_divmodqi4) */ + +#undef r_rem +#undef r_arg1 +#undef r_arg2 +#undef r_cnt + + +/******************************************************* + Division 16 / 16 => (result + remainder) +*******************************************************/ +#define r_remL r26 /* remainder Low */ +#define r_remH r27 /* remainder High */ + +/* return: remainder */ +#define r_arg1L r24 /* dividend Low */ +#define r_arg1H r25 /* dividend High */ + +/* return: quotient */ +#define r_arg2L r22 /* divisor Low */ +#define r_arg2H r23 /* divisor High */ + +#define r_cnt r21 /* loop count */ + +#if defined (L_udivmodhi4) +DEFUN __udivmodhi4 + sub r_remL,r_remL + sub r_remH,r_remH ; clear remainder and carry + ldi r_cnt,17 ; init loop counter + rjmp __udivmodhi4_ep ; jump to entry point +__udivmodhi4_loop: + rol r_remL ; shift dividend into remainder + rol r_remH + cp r_remL,r_arg2L ; compare remainder & divisor + cpc r_remH,r_arg2H + brcs __udivmodhi4_ep ; remainder < divisor + sub r_remL,r_arg2L ; restore remainder + sbc r_remH,r_arg2H +__udivmodhi4_ep: + rol r_arg1L ; shift dividend (with CARRY) + rol r_arg1H + dec r_cnt ; decrement loop counter + brne __udivmodhi4_loop + com r_arg1L + com r_arg1H +; div/mod results to return registers, as for the div() function + mov_l r_arg2L, r_arg1L ; quotient + mov_h r_arg2H, r_arg1H + mov_l r_arg1L, r_remL ; remainder + mov_h r_arg1H, r_remH + ret +ENDF __udivmodhi4 +#endif /* defined (L_udivmodhi4) */ + +#if defined (L_divmodhi4) +DEFUN __divmodhi4 + .global _div +_div: + bst r_arg1H,7 ; store sign of dividend + mov __tmp_reg__,r_arg1H + eor __tmp_reg__,r_arg2H ; r0.7 is sign of result + rcall __divmodhi4_neg1 ; dividend negative : negate + sbrc r_arg2H,7 + rcall __divmodhi4_neg2 ; divisor negative : negate + XCALL __udivmodhi4 ; do the unsigned div/mod + rcall __divmodhi4_neg1 ; correct remainder sign + tst __tmp_reg__ + brpl __divmodhi4_exit +__divmodhi4_neg2: + com r_arg2H + neg r_arg2L ; correct divisor/result sign + sbci r_arg2H,0xff +__divmodhi4_exit: + ret +__divmodhi4_neg1: + brtc __divmodhi4_exit + com r_arg1H + neg r_arg1L ; correct dividend/remainder sign + sbci r_arg1H,0xff + ret +ENDF __divmodhi4 +#endif /* defined (L_divmodhi4) */ + +#undef r_remH +#undef r_remL + +#undef r_arg1H +#undef r_arg1L + +#undef r_arg2H +#undef r_arg2L + +#undef r_cnt + +/******************************************************* + Division 32 / 32 => (result + remainder) +*******************************************************/ +#define r_remHH r31 /* remainder High */ +#define r_remHL r30 +#define r_remH r27 +#define r_remL r26 /* remainder Low */ + +/* return: remainder */ +#define r_arg1HH r25 /* dividend High */ +#define r_arg1HL r24 +#define r_arg1H r23 +#define r_arg1L r22 /* dividend Low */ + +/* return: quotient */ +#define r_arg2HH r21 /* divisor High */ +#define r_arg2HL r20 +#define r_arg2H r19 +#define r_arg2L r18 /* divisor Low */ + +#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */ + +#if defined (L_udivmodsi4) +DEFUN __udivmodsi4 + ldi r_remL, 33 ; init loop counter + mov r_cnt, r_remL + sub r_remL,r_remL + sub r_remH,r_remH ; clear remainder and carry + mov_l r_remHL, r_remL + mov_h r_remHH, r_remH + rjmp __udivmodsi4_ep ; jump to entry point +__udivmodsi4_loop: + rol r_remL ; shift dividend into remainder + rol r_remH + rol r_remHL + rol r_remHH + cp r_remL,r_arg2L ; compare remainder & divisor + cpc r_remH,r_arg2H + cpc r_remHL,r_arg2HL + cpc r_remHH,r_arg2HH + brcs __udivmodsi4_ep ; remainder <= divisor + sub r_remL,r_arg2L ; restore remainder + sbc r_remH,r_arg2H + sbc r_remHL,r_arg2HL + sbc r_remHH,r_arg2HH +__udivmodsi4_ep: + rol r_arg1L ; shift dividend (with CARRY) + rol r_arg1H + rol r_arg1HL + rol r_arg1HH + dec r_cnt ; decrement loop counter + brne __udivmodsi4_loop + ; __zero_reg__ now restored (r_cnt == 0) + com r_arg1L + com r_arg1H + com r_arg1HL + com r_arg1HH +; div/mod results to return registers, as for the ldiv() function + mov_l r_arg2L, r_arg1L ; quotient + mov_h r_arg2H, r_arg1H + mov_l r_arg2HL, r_arg1HL + mov_h r_arg2HH, r_arg1HH + mov_l r_arg1L, r_remL ; remainder + mov_h r_arg1H, r_remH + mov_l r_arg1HL, r_remHL + mov_h r_arg1HH, r_remHH + ret +ENDF __udivmodsi4 +#endif /* defined (L_udivmodsi4) */ + +#if defined (L_divmodsi4) +DEFUN __divmodsi4 + bst r_arg1HH,7 ; store sign of dividend + mov __tmp_reg__,r_arg1HH + eor __tmp_reg__,r_arg2HH ; r0.7 is sign of result + rcall __divmodsi4_neg1 ; dividend negative : negate + sbrc r_arg2HH,7 + rcall __divmodsi4_neg2 ; divisor negative : negate + XCALL __udivmodsi4 ; do the unsigned div/mod + rcall __divmodsi4_neg1 ; correct remainder sign + rol __tmp_reg__ + brcc __divmodsi4_exit +__divmodsi4_neg2: + com r_arg2HH + com r_arg2HL + com r_arg2H + neg r_arg2L ; correct divisor/quotient sign + sbci r_arg2H,0xff + sbci r_arg2HL,0xff + sbci r_arg2HH,0xff +__divmodsi4_exit: + ret +__divmodsi4_neg1: + brtc __divmodsi4_exit + com r_arg1HH + com r_arg1HL + com r_arg1H + neg r_arg1L ; correct dividend/remainder sign + sbci r_arg1H, 0xff + sbci r_arg1HL,0xff + sbci r_arg1HH,0xff + ret +ENDF __divmodsi4 +#endif /* defined (L_divmodsi4) */ + + +.section .text.libgcc.prologue, "ax", @progbits + +/********************************** + * This is a prologue subroutine + **********************************/ +#if defined (L_prologue) + +DEFUN __prologue_saves__ + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 + in r28,__SP_L__ + in r29,__SP_H__ + sub r28,r26 + sbc r29,r27 + in __tmp_reg__,__SREG__ + cli + out __SP_H__,r29 + out __SREG__,__tmp_reg__ + out __SP_L__,r28 +#if defined (__AVR_HAVE_EIJMP_EICALL__) + eijmp +#else + ijmp +#endif + +ENDF __prologue_saves__ +#endif /* defined (L_prologue) */ + +/* + * This is an epilogue subroutine + */ +#if defined (L_epilogue) + +DEFUN __epilogue_restores__ + ldd r2,Y+18 + ldd r3,Y+17 + ldd r4,Y+16 + ldd r5,Y+15 + ldd r6,Y+14 + ldd r7,Y+13 + ldd r8,Y+12 + ldd r9,Y+11 + ldd r10,Y+10 + ldd r11,Y+9 + ldd r12,Y+8 + ldd r13,Y+7 + ldd r14,Y+6 + ldd r15,Y+5 + ldd r16,Y+4 + ldd r17,Y+3 + ldd r26,Y+2 + ldd r27,Y+1 + add r28,r30 + adc r29,__zero_reg__ + in __tmp_reg__,__SREG__ + cli + out __SP_H__,r29 + out __SREG__,__tmp_reg__ + out __SP_L__,r28 + mov_l r28, r26 + mov_h r29, r27 + ret +ENDF __epilogue_restores__ +#endif /* defined (L_epilogue) */ + +#ifdef L_exit + .section .fini9,"ax",@progbits +DEFUN _exit + .weak exit +exit: +ENDF _exit + + /* Code from .fini8 ... .fini1 sections inserted by ld script. */ + + .section .fini0,"ax",@progbits + cli +__stop_program: + rjmp __stop_program +#endif /* defined (L_exit) */ + +#ifdef L_cleanup + .weak _cleanup + .func _cleanup +_cleanup: + ret +.endfunc +#endif /* defined (L_cleanup) */ + + +.section .text.libgcc, "ax", @progbits + +#ifdef L_tablejump +DEFUN __tablejump2__ + lsl r30 + rol r31 + ;; FALLTHRU +ENDF __tablejump2__ + +DEFUN __tablejump__ +#if defined (__AVR_HAVE_LPMX__) + lpm __tmp_reg__, Z+ + lpm r31, Z + mov r30, __tmp_reg__ +#if defined (__AVR_HAVE_EIJMP_EICALL__) + eijmp +#else + ijmp +#endif + +#else /* !HAVE_LPMX */ + lpm + adiw r30, 1 + push r0 + lpm + push r0 +#if defined (__AVR_HAVE_EIJMP_EICALL__) + in __tmp_reg__, __EIND__ + push __tmp_reg__ +#endif + ret +#endif /* !HAVE_LPMX */ +ENDF __tablejump__ +#endif /* defined (L_tablejump) */ + +#ifdef L_copy_data + .section .init4,"ax",@progbits +DEFUN __do_copy_data +#if defined(__AVR_HAVE_ELPMX__) + ldi r17, hi8(__data_end) + ldi r26, lo8(__data_start) + ldi r27, hi8(__data_start) + ldi r30, lo8(__data_load_start) + ldi r31, hi8(__data_load_start) + ldi r16, hh8(__data_load_start) + out __RAMPZ__, r16 + rjmp .L__do_copy_data_start +.L__do_copy_data_loop: + elpm r0, Z+ + st X+, r0 +.L__do_copy_data_start: + cpi r26, lo8(__data_end) + cpc r27, r17 + brne .L__do_copy_data_loop +#elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__) + ldi r17, hi8(__data_end) + ldi r26, lo8(__data_start) + ldi r27, hi8(__data_start) + ldi r30, lo8(__data_load_start) + ldi r31, hi8(__data_load_start) + ldi r16, hh8(__data_load_start - 0x10000) +.L__do_copy_data_carry: + inc r16 + out __RAMPZ__, r16 + rjmp .L__do_copy_data_start +.L__do_copy_data_loop: + elpm + st X+, r0 + adiw r30, 1 + brcs .L__do_copy_data_carry +.L__do_copy_data_start: + cpi r26, lo8(__data_end) + cpc r27, r17 + brne .L__do_copy_data_loop +#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) + ldi r17, hi8(__data_end) + ldi r26, lo8(__data_start) + ldi r27, hi8(__data_start) + ldi r30, lo8(__data_load_start) + ldi r31, hi8(__data_load_start) + rjmp .L__do_copy_data_start +.L__do_copy_data_loop: +#if defined (__AVR_HAVE_LPMX__) + lpm r0, Z+ +#else + lpm + adiw r30, 1 +#endif + st X+, r0 +.L__do_copy_data_start: + cpi r26, lo8(__data_end) + cpc r27, r17 + brne .L__do_copy_data_loop +#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */ +ENDF __do_copy_data +#endif /* L_copy_data */ + +/* __do_clear_bss is only necessary if there is anything in .bss section. */ + +#ifdef L_clear_bss + .section .init4,"ax",@progbits +DEFUN __do_clear_bss + ldi r17, hi8(__bss_end) + ldi r26, lo8(__bss_start) + ldi r27, hi8(__bss_start) + rjmp .do_clear_bss_start +.do_clear_bss_loop: + st X+, __zero_reg__ +.do_clear_bss_start: + cpi r26, lo8(__bss_end) + cpc r27, r17 + brne .do_clear_bss_loop +ENDF __do_clear_bss +#endif /* L_clear_bss */ + +/* __do_global_ctors and __do_global_dtors are only necessary + if there are any constructors/destructors. */ + +#ifdef L_ctors + .section .init6,"ax",@progbits +DEFUN __do_global_ctors +#if defined(__AVR_HAVE_RAMPZ__) + ldi r17, hi8(__ctors_start) + ldi r28, lo8(__ctors_end) + ldi r29, hi8(__ctors_end) + ldi r16, hh8(__ctors_end) + rjmp .L__do_global_ctors_start +.L__do_global_ctors_loop: + sbiw r28, 2 + sbc r16, __zero_reg__ + mov_h r31, r29 + mov_l r30, r28 + out __RAMPZ__, r16 + XCALL __tablejump_elpm__ +.L__do_global_ctors_start: + cpi r28, lo8(__ctors_start) + cpc r29, r17 + ldi r24, hh8(__ctors_start) + cpc r16, r24 + brne .L__do_global_ctors_loop +#else + ldi r17, hi8(__ctors_start) + ldi r28, lo8(__ctors_end) + ldi r29, hi8(__ctors_end) + rjmp .L__do_global_ctors_start +.L__do_global_ctors_loop: + sbiw r28, 2 + mov_h r31, r29 + mov_l r30, r28 + XCALL __tablejump__ +.L__do_global_ctors_start: + cpi r28, lo8(__ctors_start) + cpc r29, r17 + brne .L__do_global_ctors_loop +#endif /* defined(__AVR_HAVE_RAMPZ__) */ +ENDF __do_global_ctors +#endif /* L_ctors */ + +#ifdef L_dtors + .section .fini6,"ax",@progbits +DEFUN __do_global_dtors +#if defined(__AVR_HAVE_RAMPZ__) + ldi r17, hi8(__dtors_end) + ldi r28, lo8(__dtors_start) + ldi r29, hi8(__dtors_start) + ldi r16, hh8(__dtors_start) + rjmp .L__do_global_dtors_start +.L__do_global_dtors_loop: + sbiw r28, 2 + sbc r16, __zero_reg__ + mov_h r31, r29 + mov_l r30, r28 + out __RAMPZ__, r16 + XCALL __tablejump_elpm__ +.L__do_global_dtors_start: + cpi r28, lo8(__dtors_end) + cpc r29, r17 + ldi r24, hh8(__dtors_end) + cpc r16, r24 + brne .L__do_global_dtors_loop +#else + ldi r17, hi8(__dtors_end) + ldi r28, lo8(__dtors_start) + ldi r29, hi8(__dtors_start) + rjmp .L__do_global_dtors_start +.L__do_global_dtors_loop: + mov_h r31, r29 + mov_l r30, r28 + XCALL __tablejump__ + adiw r28, 2 +.L__do_global_dtors_start: + cpi r28, lo8(__dtors_end) + cpc r29, r17 + brne .L__do_global_dtors_loop +#endif /* defined(__AVR_HAVE_RAMPZ__) */ +ENDF __do_global_dtors +#endif /* L_dtors */ + +.section .text.libgcc, "ax", @progbits + +#ifdef L_tablejump_elpm +DEFUN __tablejump_elpm__ +#if defined (__AVR_HAVE_ELPM__) +#if defined (__AVR_HAVE_LPMX__) + elpm __tmp_reg__, Z+ + elpm r31, Z + mov r30, __tmp_reg__ +#if defined (__AVR_HAVE_EIJMP_EICALL__) + eijmp +#else + ijmp +#endif + +#else + elpm + adiw r30, 1 + push r0 + elpm + push r0 +#if defined (__AVR_HAVE_EIJMP_EICALL__) + in __tmp_reg__, __EIND__ + push __tmp_reg__ +#endif + ret +#endif +#endif /* defined (__AVR_HAVE_ELPM__) */ +ENDF __tablejump_elpm__ +#endif /* defined (L_tablejump_elpm) */ + + +.section .text.libgcc.builtins, "ax", @progbits + +/********************************** + * Find first set Bit (ffs) + **********************************/ + +#if defined (L_ffssi2) +;; find first set bit +;; r25:r24 = ffs32 (r25:r22) +;; clobbers: r22, r26 +DEFUN __ffssi2 + clr r26 + tst r22 + brne 1f + subi r26, -8 + or r22, r23 + brne 1f + subi r26, -8 + or r22, r24 + brne 1f + subi r26, -8 + or r22, r25 + brne 1f + ret +1: mov r24, r22 + XJMP __loop_ffsqi2 +ENDF __ffssi2 +#endif /* defined (L_ffssi2) */ + +#if defined (L_ffshi2) +;; find first set bit +;; r25:r24 = ffs16 (r25:r24) +;; clobbers: r26 +DEFUN __ffshi2 + clr r26 +#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ + ;; Some cores have problem skipping 2-word instruction + tst r24 + breq 2f +#else + cpse r24, __zero_reg__ +#endif /* __AVR_HAVE_JMP_CALL__ */ +1: XJMP __loop_ffsqi2 +2: ldi r26, 8 + or r24, r25 + brne 1b + ret +ENDF __ffshi2 +#endif /* defined (L_ffshi2) */ + +#if defined (L_loop_ffsqi2) +;; Helper for ffshi2, ffssi2 +;; r25:r24 = r26 + zero_extend16 (ffs8(r24)) +;; r24 must be != 0 +;; clobbers: r26 +DEFUN __loop_ffsqi2 + inc r26 + lsr r24 + brcc __loop_ffsqi2 + mov r24, r26 + clr r25 + ret +ENDF __loop_ffsqi2 +#endif /* defined (L_loop_ffsqi2) */ + + +/********************************** + * Count trailing Zeros (ctz) + **********************************/ + +#if defined (L_ctzsi2) +;; count trailing zeros +;; r25:r24 = ctz32 (r25:r22) +;; clobbers: r26, r22 +;; ctz(0) = 255 +;; Note that ctz(0) in undefined for GCC +DEFUN __ctzsi2 + XCALL __ffssi2 + dec r24 + ret +ENDF __ctzsi2 +#endif /* defined (L_ctzsi2) */ + +#if defined (L_ctzhi2) +;; count trailing zeros +;; r25:r24 = ctz16 (r25:r24) +;; clobbers: r26 +;; ctz(0) = 255 +;; Note that ctz(0) in undefined for GCC +DEFUN __ctzhi2 + XCALL __ffshi2 + dec r24 + ret +ENDF __ctzhi2 +#endif /* defined (L_ctzhi2) */ + + +/********************************** + * Count leading Zeros (clz) + **********************************/ + +#if defined (L_clzdi2) +;; count leading zeros +;; r25:r24 = clz64 (r25:r18) +;; clobbers: r22, r23, r26 +DEFUN __clzdi2 + XCALL __clzsi2 + sbrs r24, 5 + ret + mov_l r22, r18 + mov_h r23, r19 + mov_l r24, r20 + mov_h r25, r21 + XCALL __clzsi2 + subi r24, -32 + ret +ENDF __clzdi2 +#endif /* defined (L_clzdi2) */ + +#if defined (L_clzsi2) +;; count leading zeros +;; r25:r24 = clz32 (r25:r22) +;; clobbers: r26 +DEFUN __clzsi2 + XCALL __clzhi2 + sbrs r24, 4 + ret + mov_l r24, r22 + mov_h r25, r23 + XCALL __clzhi2 + subi r24, -16 + ret +ENDF __clzsi2 +#endif /* defined (L_clzsi2) */ + +#if defined (L_clzhi2) +;; count leading zeros +;; r25:r24 = clz16 (r25:r24) +;; clobbers: r26 +DEFUN __clzhi2 + clr r26 + tst r25 + brne 1f + subi r26, -8 + or r25, r24 + brne 1f + ldi r24, 16 + ret +1: cpi r25, 16 + brsh 3f + subi r26, -3 + swap r25 +2: inc r26 +3: lsl r25 + brcc 2b + mov r24, r26 + clr r25 + ret +ENDF __clzhi2 +#endif /* defined (L_clzhi2) */ + + +/********************************** + * Parity + **********************************/ + +#if defined (L_paritydi2) +;; r25:r24 = parity64 (r25:r18) +;; clobbers: __tmp_reg__ +DEFUN __paritydi2 + eor r24, r18 + eor r24, r19 + eor r24, r20 + eor r24, r21 + XJMP __paritysi2 +ENDF __paritydi2 +#endif /* defined (L_paritydi2) */ + +#if defined (L_paritysi2) +;; r25:r24 = parity32 (r25:r22) +;; clobbers: __tmp_reg__ +DEFUN __paritysi2 + eor r24, r22 + eor r24, r23 + XJMP __parityhi2 +ENDF __paritysi2 +#endif /* defined (L_paritysi2) */ + +#if defined (L_parityhi2) +;; r25:r24 = parity16 (r25:r24) +;; clobbers: __tmp_reg__ +DEFUN __parityhi2 + eor r24, r25 +;; FALLTHRU +ENDF __parityhi2 + +;; r25:r24 = parity8 (r24) +;; clobbers: __tmp_reg__ +DEFUN __parityqi2 + ;; parity is in r24[0..7] + mov __tmp_reg__, r24 + swap __tmp_reg__ + eor r24, __tmp_reg__ + ;; parity is in r24[0..3] + subi r24, -4 + andi r24, -5 + subi r24, -6 + ;; parity is in r24[0,3] + sbrc r24, 3 + inc r24 + ;; parity is in r24[0] + andi r24, 1 + clr r25 + ret +ENDF __parityqi2 +#endif /* defined (L_parityhi2) */ + + +/********************************** + * Population Count + **********************************/ + +#if defined (L_popcounthi2) +;; population count +;; r25:r24 = popcount16 (r25:r24) +;; clobbers: __tmp_reg__ +DEFUN __popcounthi2 + XCALL __popcountqi2 + push r24 + mov r24, r25 + XCALL __popcountqi2 + clr r25 + ;; FALLTHRU +ENDF __popcounthi2 + +DEFUN __popcounthi2_tail + pop __tmp_reg__ + add r24, __tmp_reg__ + ret +ENDF __popcounthi2_tail +#endif /* defined (L_popcounthi2) */ + +#if defined (L_popcountsi2) +;; population count +;; r25:r24 = popcount32 (r25:r22) +;; clobbers: __tmp_reg__ +DEFUN __popcountsi2 + XCALL __popcounthi2 + push r24 + mov_l r24, r22 + mov_h r25, r23 + XCALL __popcounthi2 + XJMP __popcounthi2_tail +ENDF __popcountsi2 +#endif /* defined (L_popcountsi2) */ + +#if defined (L_popcountdi2) +;; population count +;; r25:r24 = popcount64 (r25:r18) +;; clobbers: r22, r23, __tmp_reg__ +DEFUN __popcountdi2 + XCALL __popcountsi2 + push r24 + mov_l r22, r18 + mov_h r23, r19 + mov_l r24, r20 + mov_h r25, r21 + XCALL __popcountsi2 + XJMP __popcounthi2_tail +ENDF __popcountdi2 +#endif /* defined (L_popcountdi2) */ + +#if defined (L_popcountqi2) +;; population count +;; r24 = popcount8 (r24) +;; clobbers: __tmp_reg__ +DEFUN __popcountqi2 + mov __tmp_reg__, r24 + andi r24, 1 + lsr __tmp_reg__ + lsr __tmp_reg__ + adc r24, __zero_reg__ + lsr __tmp_reg__ + adc r24, __zero_reg__ + lsr __tmp_reg__ + adc r24, __zero_reg__ + lsr __tmp_reg__ + adc r24, __zero_reg__ + lsr __tmp_reg__ + adc r24, __zero_reg__ + lsr __tmp_reg__ + adc r24, __tmp_reg__ + ret +ENDF __popcountqi2 +#endif /* defined (L_popcountqi2) */ + + +/********************************** + * Swap bytes + **********************************/ + +;; swap two registers with different register number +.macro bswap a, b + eor \a, \b + eor \b, \a + eor \a, \b +.endm + +#if defined (L_bswapsi2) +;; swap bytes +;; r25:r22 = bswap32 (r25:r22) +DEFUN __bswapsi2 + bswap r22, r25 + bswap r23, r24 + ret +ENDF __bswapsi2 +#endif /* defined (L_bswapsi2) */ + +#if defined (L_bswapdi2) +;; swap bytes +;; r25:r18 = bswap64 (r25:r18) +DEFUN __bswapdi2 + bswap r18, r25 + bswap r19, r24 + bswap r20, r23 + bswap r21, r22 + ret +ENDF __bswapdi2 +#endif /* defined (L_bswapdi2) */ + + +/********************************** + * 64-bit shifts + **********************************/ + +#if defined (L_ashrdi3) +;; Arithmetic shift right +;; r25:r18 = ashr64 (r25:r18, r17:r16) +DEFUN __ashrdi3 + push r16 + andi r16, 63 + breq 2f +1: asr r25 + ror r24 + ror r23 + ror r22 + ror r21 + ror r20 + ror r19 + ror r18 + dec r16 + brne 1b +2: pop r16 + ret +ENDF __ashrdi3 +#endif /* defined (L_ashrdi3) */ + +#if defined (L_lshrdi3) +;; Logic shift right +;; r25:r18 = lshr64 (r25:r18, r17:r16) +DEFUN __lshrdi3 + push r16 + andi r16, 63 + breq 2f +1: lsr r25 + ror r24 + ror r23 + ror r22 + ror r21 + ror r20 + ror r19 + ror r18 + dec r16 + brne 1b +2: pop r16 + ret +ENDF __lshrdi3 +#endif /* defined (L_lshrdi3) */ + +#if defined (L_ashldi3) +;; Shift left +;; r25:r18 = ashl64 (r25:r18, r17:r16) +DEFUN __ashldi3 + push r16 + andi r16, 63 + breq 2f +1: lsl r18 + rol r19 + rol r20 + rol r21 + rol r22 + rol r23 + rol r24 + rol r25 + dec r16 + brne 1b +2: pop r16 + ret +ENDF __ashldi3 +#endif /* defined (L_ashldi3) */ + + +.section .text.libgcc.fmul, "ax", @progbits + +/***********************************************************/ +;;; Softmul versions of FMUL, FMULS and FMULSU to implement +;;; __builtin_avr_fmul* if !AVR_HAVE_MUL +/***********************************************************/ + +#define A1 24 +#define B1 25 +#define C0 22 +#define C1 23 +#define A0 __tmp_reg__ + +#ifdef L_fmuls +;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction +;;; Clobbers: r24, r25, __tmp_reg__ +DEFUN __fmuls + ;; A0.7 = negate result? + mov A0, A1 + eor A0, B1 + ;; B1 = |B1| + sbrc B1, 7 + neg B1 + XJMP __fmulsu_exit +ENDF __fmuls +#endif /* L_fmuls */ + +#ifdef L_fmulsu +;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction +;;; Clobbers: r24, r25, __tmp_reg__ +DEFUN __fmulsu + ;; A0.7 = negate result? + mov A0, A1 +;; FALLTHRU +ENDF __fmulsu + +;; Helper for __fmuls and __fmulsu +DEFUN __fmulsu_exit + ;; A1 = |A1| + sbrc A1, 7 + neg A1 +#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ + ;; Some cores have problem skipping 2-word instruction + tst A0 + brmi 1f +#else + sbrs A0, 7 +#endif /* __AVR_HAVE_JMP_CALL__ */ + XJMP __fmul +1: XCALL __fmul + ;; C = -C iff A0.7 = 1 + com C1 + neg C0 + sbci C1, -1 + ret +ENDF __fmulsu_exit +#endif /* L_fmulsu */ + + +#ifdef L_fmul +;;; r22:r23 = fmul (r24, r25) like in FMUL instruction +;;; Clobbers: r24, r25, __tmp_reg__ +DEFUN __fmul + ; clear result + clr C0 + clr C1 + clr A0 +1: tst B1 + ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C. +2: brpl 3f + ;; C += A + add C0, A0 + adc C1, A1 +3: ;; A >>= 1 + lsr A1 + ror A0 + ;; B <<= 1 + lsl B1 + brne 2b + ret +ENDF __fmul +#endif /* L_fmul */ + +#undef A0 +#undef A1 +#undef B1 +#undef C0 +#undef C1 diff --git a/libgcc/config/avr/t-avr b/libgcc/config/avr/t-avr index 78829c76af4..f1c114a6dd6 100644 --- a/libgcc/config/avr/t-avr +++ b/libgcc/config/avr/t-avr @@ -1,3 +1,51 @@ +LIB1ASMSRC = avr/lib1funcs.S +LIB1ASMFUNCS = \ + _mulqi3 \ + _mulhi3 \ + _mulhisi3 \ + _umulhisi3 \ + _usmulhisi3 \ + _muluhisi3 \ + _mulshisi3 \ + _mulsi3 \ + _udivmodqi4 \ + _divmodqi4 \ + _udivmodhi4 \ + _divmodhi4 \ + _udivmodsi4 \ + _divmodsi4 \ + _prologue \ + _epilogue \ + _exit \ + _cleanup \ + _tablejump \ + _tablejump_elpm \ + _copy_data \ + _clear_bss \ + _ctors \ + _dtors \ + _ffssi2 \ + _ffshi2 \ + _loop_ffsqi2 \ + _ctzsi2 \ + _ctzhi2 \ + _clzdi2 \ + _clzsi2 \ + _clzhi2 \ + _paritydi2 \ + _paritysi2 \ + _parityhi2 \ + _popcounthi2 \ + _popcountsi2 \ + _popcountdi2 \ + _popcountqi2 \ + _bswapsi2 \ + _bswapdi2 \ + _ashldi3 \ + _ashrdi3 \ + _lshrdi3 \ + _fmul _fmuls _fmulsu + # Extra 16-bit integer functions. intfuncs16 = _absvXX2 _addvXX3 _subvXX3 _mulvXX3 _negvXX2 _clrsbXX2 diff --git a/libgcc/config/bfin/lib1funcs.S b/libgcc/config/bfin/lib1funcs.S new file mode 100644 index 00000000000..c7bf4f3f05c --- /dev/null +++ b/libgcc/config/bfin/lib1funcs.S @@ -0,0 +1,211 @@ +/* libgcc functions for Blackfin. + Copyright (C) 2005, 2009 Free Software Foundation, Inc. + Contributed by Analog Devices. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#ifdef L_divsi3 +.text +.align 2 +.global ___divsi3; +.type ___divsi3, STT_FUNC; + +___divsi3: + [--SP]= RETS; + [--SP] = R7; + + R2 = -R0; + CC = R0 < 0; + IF CC R0 = R2; + R7 = CC; + + R2 = -R1; + CC = R1 < 0; + IF CC R1 = R2; + R2 = CC; + R7 = R7 ^ R2; + + CALL ___udivsi3; + + CC = R7; + R1 = -R0; + IF CC R0 = R1; + + R7 = [SP++]; + RETS = [SP++]; + RTS; +#endif + +#ifdef L_modsi3 +.align 2 +.global ___modsi3; +.type ___modsi3, STT_FUNC; + +___modsi3: + [--SP] = RETS; + [--SP] = R0; + [--SP] = R1; + CALL ___divsi3; + R2 = [SP++]; + R1 = [SP++]; + R2 *= R0; + R0 = R1 - R2; + RETS = [SP++]; + RTS; +#endif + +#ifdef L_udivsi3 +.align 2 +.global ___udivsi3; +.type ___udivsi3, STT_FUNC; + +___udivsi3: + P0 = 32; + LSETUP (0f, 1f) LC0 = P0; + /* upper half of dividend */ + R3 = 0; +0: + /* The first time round in the loop we shift in garbage, but since we + perform 33 shifts, it doesn't matter. */ + R0 = ROT R0 BY 1; + R3 = ROT R3 BY 1; + R2 = R3 - R1; + CC = R3 < R1 (IU); +1: + /* Last instruction of the loop. */ + IF ! CC R3 = R2; + + /* Shift in the last bit. */ + R0 = ROT R0 BY 1; + /* R0 is the result, R3 contains the remainder. */ + R0 = ~ R0; + RTS; +#endif + +#ifdef L_umodsi3 +.align 2 +.global ___umodsi3; +.type ___umodsi3, STT_FUNC; + +___umodsi3: + [--SP] = RETS; + CALL ___udivsi3; + R0 = R3; + RETS = [SP++]; + RTS; +#endif + +#ifdef L_umulsi3_highpart +.align 2 +.global ___umulsi3_highpart; +.type ___umulsi3_highpart, STT_FUNC; + +___umulsi3_highpart: + A1 = R1.L * R0.L (FU); + A1 = A1 >> 16; + A0 = R1.H * R0.H, A1 += R1.L * R0.H (FU); + A1 += R0.L * R1.H (FU); + A1 = A1 >> 16; + A0 += A1; + R0 = A0 (FU); + RTS; +#endif + +#ifdef L_smulsi3_highpart +.align 2 +.global ___smulsi3_highpart; +.type ___smulsi3_highpart, STT_FUNC; + +___smulsi3_highpart: + A1 = R1.L * R0.L (FU); + A1 = A1 >> 16; + A0 = R0.H * R1.H, A1 += R0.H * R1.L (IS,M); + A1 += R1.H * R0.L (IS,M); + A1 = A1 >>> 16; + R0 = (A0 += A1); + RTS; +#endif + +#ifdef L_muldi3 +.align 2 +.global ___muldi3; +.type ___muldi3, STT_FUNC; + +/* + R1:R0 * R3:R2 + = R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l +[X] = (R1.h * R3.h) * 2^96 +[X] + (R1.h * R3.l + R1.l * R3.h) * 2^80 +[X] + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^64 +[T1] + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^48 +[T2] + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^32 +[T3] + (R0.l * R2.h + R2.l * R0.h) * 2^16 +[T4] + (R0.l * R2.l) + + We can discard the first three lines marked "X" since we produce + only a 64 bit result. So, we need ten 16-bit multiplies. + + Individual mul-acc results: +[E1] = R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h +[E2] = R1.l * R2.l + R3.l * R0.l + R0.h * R2.h +[E3] = R0.l * R2.h + R2.l * R0.h +[E4] = R0.l * R2.l + + We also need to add high parts from lower-level results to higher ones: + E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E4 + + One interesting property is that all parts of the result that depend + on the sign of the multiplication are discarded. Those would be the + multiplications involving R1.h and R3.h, but only the top 16 bit of + the 32 bit result depend on the sign, and since R1.h and R3.h only + occur in E1, the top half of these results is cut off. + So, we can just use FU mode for all of the 16-bit multiplies, and + ignore questions of when to use mixed mode. */ + +___muldi3: + /* [SP] technically is part of the caller's frame, but we can + use it as scratch space. */ + A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12]; /* E1 */ + A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4; /* E1 */ + A0 += A1; /* E1 */ + R4 = A0.w; + A0 = R0.l * R3.l (FU); /* E2 */ + A0 += R2.l * R1.l (FU); /* E2 */ + + A1 = R2.L * R0.L (FU); /* E4 */ + R3 = A1.w; + A1 = A1 >> 16; /* E3c */ + A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU); /* E2, E3c */ + A1 += R0.L * R2.H (FU); /* E3c */ + R0 = A1.w; + A1 = A1 >> 16; /* E2c */ + A0 += A1; /* E2c */ + R1 = A0.w; + + /* low(result) = low(E3c):low(E4) */ + R0 = PACK (R0.l, R3.l); + /* high(result) = E2c + (E1 << 16) */ + R1.h = R1.h + R4.l (NS) || R4 = [SP]; + RTS; + +.size ___muldi3, .-___muldi3 +#endif diff --git a/libgcc/config/bfin/t-bfin b/libgcc/config/bfin/t-bfin new file mode 100644 index 00000000000..bc2b088ffc1 --- /dev/null +++ b/libgcc/config/bfin/t-bfin @@ -0,0 +1,3 @@ +LIB1ASMSRC = bfin/lib1funcs.S +LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart +LIB1ASMFUNCS += _smulsi3_highpart diff --git a/libgcc/config/c6x/lib1funcs.S b/libgcc/config/c6x/lib1funcs.S new file mode 100644 index 00000000000..5bf34474bbd --- /dev/null +++ b/libgcc/config/c6x/lib1funcs.S @@ -0,0 +1,438 @@ +/* Copyright 2010, 2011 Free Software Foundation, Inc. + Contributed by Bernd Schmidt . + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + + ;; ABI considerations for the divide functions + ;; The following registers are call-used: + ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5 + ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4 + ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4 + ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4 + ;; + ;; In our implementation, divu and remu are leaf functions, + ;; while both divi and remi call into divu. + ;; A0 is not clobbered by any of the functions. + ;; divu does not clobber B2 either, which is taken advantage of + ;; in remi. + ;; divi uses B5 to hold the original return address during + ;; the call to divu. + ;; remi uses B2 and A5 to hold the input values during the + ;; call to divu. It stores B3 in on the stack. + +#ifdef L_divsi3 +.text +.align 2 +.global __c6xabi_divi +.hidden __c6xabi_divi +.type __c6xabi_divi, STT_FUNC + +__c6xabi_divi: + call .s2 __c6xabi_divu +|| mv .d2 B3, B5 +|| cmpgt .l1 0, A4, A1 +|| cmpgt .l2 0, B4, B1 + + [A1] neg .l1 A4, A4 +|| [B1] neg .l2 B4, B4 +|| xor .s1x A1, B1, A1 + +#ifdef _TMS320C6400 + [A1] addkpc .s2 1f, B3, 4 +#else + [A1] mvkl .s2 1f, B3 + [A1] mvkh .s2 1f, B3 + nop 2 +#endif +1: + neg .l1 A4, A4 +|| mv .l2 B3,B5 +|| ret .s2 B5 + nop 5 +#endif + +#if defined L_modsi3 || defined L_divmodsi4 +.align 2 +#ifdef L_modsi3 +#define MOD_OUTPUT_REG A4 +.global __c6xabi_remi +.hidden __c6xabi_remi +.type __c6xabi_remi, STT_FUNC +#else +#define MOD_OUTPUT_REG A5 +.global __c6xabi_divremi +.hidden __c6xabi_divremi +.type __c6xabi_divremi, STT_FUNC +__c6xabi_divremi: +#endif + +__c6xabi_remi: + stw .d2t2 B3, *B15--[2] +|| cmpgt .l1 0, A4, A1 +|| cmpgt .l2 0, B4, B2 +|| mv .s1 A4, A5 +|| call .s2 __c6xabi_divu + + [A1] neg .l1 A4, A4 +|| [B2] neg .l2 B4, B4 +|| xor .s2x B2, A1, B0 +|| mv .d2 B4, B2 + +#ifdef _TMS320C6400 + [B0] addkpc .s2 1f, B3, 1 + [!B0] addkpc .s2 2f, B3, 1 + nop 2 +#else + [B0] mvkl .s2 1f,B3 + [!B0] mvkl .s2 2f,B3 + + [B0] mvkh .s2 1f,B3 + [!B0] mvkh .s2 2f,B3 +#endif +1: + neg .l1 A4, A4 +2: + ldw .d2t2 *++B15[2], B3 + +#ifdef _TMS320C6400_PLUS + mpy32 .m1x A4, B2, A6 + nop 3 + ret .s2 B3 + sub .l1 A5, A6, MOD_OUTPUT_REG + nop 4 +#else + mpyu .m1x A4, B2, A1 + nop 1 + mpylhu .m1x A4, B2, A6 +|| mpylhu .m2x B2, A4, B2 + nop 1 + add .l1x A6, B2, A6 +|| ret .s2 B3 + shl .s1 A6, 16, A6 + add .d1 A6, A1, A6 + sub .l1 A5, A6, MOD_OUTPUT_REG + nop 2 +#endif + +#endif + +#if defined L_udivsi3 || defined L_udivmodsi4 +.align 2 +#ifdef L_udivsi3 +.global __c6xabi_divu +.hidden __c6xabi_divu +.type __c6xabi_divu, STT_FUNC +__c6xabi_divu: +#else +.global __c6xabi_divremu +.hidden __c6xabi_divremu +.type __c6xabi_divremu, STT_FUNC +__c6xabi_divremu: +#endif + ;; We use a series of up to 31 subc instructions. First, we find + ;; out how many leading zero bits there are in the divisor. This + ;; gives us both a shift count for aligning (shifting) the divisor + ;; to the, and the number of times we have to execute subc. + + ;; At the end, we have both the remainder and most of the quotient + ;; in A4. The top bit of the quotient is computed first and is + ;; placed in A2. + + ;; Return immediately if the dividend is zero. Setting B4 to 1 + ;; is a trick to allow us to leave the following insns in the jump + ;; delay slot without affecting the result. + mv .s2x A4, B1 + +#ifndef _TMS320C6400 +[!b1] mvk .s2 1, B4 +#endif +[b1] lmbd .l2 1, B4, B1 +||[!b1] b .s2 B3 ; RETURN A +#ifdef _TMS320C6400 +||[!b1] mvk .d2 1, B4 +#endif +#ifdef L_udivmodsi4 +||[!b1] zero .s1 A5 +#endif + mv .l1x B1, A6 +|| shl .s2 B4, B1, B4 + + ;; The loop performs a maximum of 28 steps, so we do the + ;; first 3 here. + cmpltu .l1x A4, B4, A2 +[!A2] sub .l1x A4, B4, A4 +|| shru .s2 B4, 1, B4 +|| xor .s1 1, A2, A2 + + shl .s1 A2, 31, A2 +|| [b1] subc .l1x A4,B4,A4 +|| [b1] add .s2 -1, B1, B1 +[b1] subc .l1x A4,B4,A4 +|| [b1] add .s2 -1, B1, B1 + + ;; RETURN A may happen here (note: must happen before the next branch) +0: + cmpgt .l2 B1, 7, B0 +|| [b1] subc .l1x A4,B4,A4 +|| [b1] add .s2 -1, B1, B1 +[b1] subc .l1x A4,B4,A4 +|| [b1] add .s2 -1, B1, B1 +|| [b0] b .s1 0b +[b1] subc .l1x A4,B4,A4 +|| [b1] add .s2 -1, B1, B1 +[b1] subc .l1x A4,B4,A4 +|| [b1] add .s2 -1, B1, B1 +[b1] subc .l1x A4,B4,A4 +|| [b1] add .s2 -1, B1, B1 +[b1] subc .l1x A4,B4,A4 +|| [b1] add .s2 -1, B1, B1 +[b1] subc .l1x A4,B4,A4 +|| [b1] add .s2 -1, B1, B1 + ;; loop backwards branch happens here + + ret .s2 B3 +|| mvk .s1 32, A1 + sub .l1 A1, A6, A6 +#ifdef L_udivmodsi4 +|| extu .s1 A4, A6, A5 +#endif + shl .s1 A4, A6, A4 + shru .s1 A4, 1, A4 +|| sub .l1 A6, 1, A6 + or .l1 A2, A4, A4 + shru .s1 A4, A6, A4 + nop + +#endif + +#ifdef L_umodsi3 +.align 2 +.global __c6xabi_remu +.hidden __c6xabi_remu +.type __c6xabi_remu, STT_FUNC +__c6xabi_remu: + ;; The ABI seems designed to prevent these functions calling each other, + ;; so we duplicate most of the divsi3 code here. + mv .s2x A4, B1 +#ifndef _TMS320C6400 +[!b1] mvk .s2 1, B4 +#endif + lmbd .l2 1, B4, B1 +||[!b1] b .s2 B3 ; RETURN A +#ifdef _TMS320C6400 +||[!b1] mvk .d2 1, B4 +#endif + + mv .l1x B1, A7 +|| shl .s2 B4, B1, B4 + + cmpltu .l1x A4, B4, A1 +[!a1] sub .l1x A4, B4, A4 + shru .s2 B4, 1, B4 + +0: + cmpgt .l2 B1, 7, B0 +|| [b1] subc .l1x A4,B4,A4 +|| [b1] add .s2 -1, B1, B1 + ;; RETURN A may happen here (note: must happen before the next branch) +[b1] subc .l1x A4,B4,A4 +|| [b1] add .s2 -1, B1, B1 +|| [b0] b .s1 0b +[b1] subc .l1x A4,B4,A4 +|| [b1] add .s2 -1, B1, B1 +[b1] subc .l1x A4,B4,A4 +|| [b1] add .s2 -1, B1, B1 +[b1] subc .l1x A4,B4,A4 +|| [b1] add .s2 -1, B1, B1 +[b1] subc .l1x A4,B4,A4 +|| [b1] add .s2 -1, B1, B1 +[b1] subc .l1x A4,B4,A4 +|| [b1] add .s2 -1, B1, B1 + ;; loop backwards branch happens here + + ret .s2 B3 +[b1] subc .l1x A4,B4,A4 +|| [b1] add .s2 -1, B1, B1 +[b1] subc .l1x A4,B4,A4 + + extu .s1 A4, A7, A4 + nop 2 +#endif + +#if defined L_strasgi_64plus && defined _TMS320C6400_PLUS + +.align 2 +.global __c6xabi_strasgi_64plus +.hidden __c6xabi_strasgi_64plus +.type __c6xabi_strasgi_64plus, STT_FUNC +__c6xabi_strasgi_64plus: + shru .s2x a6, 2, b31 +|| mv .s1 a4, a30 +|| mv .d2 b4, b30 + + add .s2 -4, b31, b31 + + sploopd 1 +|| mvc .s2 b31, ilc + ldw .d2t2 *b30++, b31 + nop 4 + mv .s1x b31,a31 + spkernel 6, 0 +|| stw .d1t1 a31, *a30++ + + ret .s2 b3 + nop 5 +#endif + +#ifdef L_strasgi +.global __c6xabi_strasgi +.type __c6xabi_strasgi, STT_FUNC +__c6xabi_strasgi: + ;; This is essentially memcpy, with alignment known to be at least + ;; 4, and the size a multiple of 4 greater than or equal to 28. + ldw .d2t1 *B4++, A0 +|| mvk .s2 16, B1 + ldw .d2t1 *B4++, A1 +|| mvk .s2 20, B2 +|| sub .d1 A6, 24, A6 + ldw .d2t1 *B4++, A5 + ldw .d2t1 *B4++, A7 +|| mv .l2x A6, B7 + ldw .d2t1 *B4++, A8 + ldw .d2t1 *B4++, A9 +|| mv .s2x A0, B5 +|| cmpltu .l2 B2, B7, B0 + +0: + stw .d1t2 B5, *A4++ +||[b0] ldw .d2t1 *B4++, A0 +|| mv .s2x A1, B5 +|| mv .l2 B7, B6 + +[b0] sub .d2 B6, 24, B7 +||[b0] b .s2 0b +|| cmpltu .l2 B1, B6, B0 + +[b0] ldw .d2t1 *B4++, A1 +|| stw .d1t2 B5, *A4++ +|| mv .s2x A5, B5 +|| cmpltu .l2 12, B6, B0 + +[b0] ldw .d2t1 *B4++, A5 +|| stw .d1t2 B5, *A4++ +|| mv .s2x A7, B5 +|| cmpltu .l2 8, B6, B0 + +[b0] ldw .d2t1 *B4++, A7 +|| stw .d1t2 B5, *A4++ +|| mv .s2x A8, B5 +|| cmpltu .l2 4, B6, B0 + +[b0] ldw .d2t1 *B4++, A8 +|| stw .d1t2 B5, *A4++ +|| mv .s2x A9, B5 +|| cmpltu .l2 0, B6, B0 + +[b0] ldw .d2t1 *B4++, A9 +|| stw .d1t2 B5, *A4++ +|| mv .s2x A0, B5 +|| cmpltu .l2 B2, B7, B0 + + ;; loop back branch happens here + + cmpltu .l2 B1, B6, B0 +|| ret .s2 b3 + +[b0] stw .d1t1 A1, *A4++ +|| cmpltu .l2 12, B6, B0 +[b0] stw .d1t1 A5, *A4++ +|| cmpltu .l2 8, B6, B0 +[b0] stw .d1t1 A7, *A4++ +|| cmpltu .l2 4, B6, B0 +[b0] stw .d1t1 A8, *A4++ +|| cmpltu .l2 0, B6, B0 +[b0] stw .d1t1 A9, *A4++ + + ;; return happens here + +#endif + +#ifdef _TMS320C6400_PLUS +#ifdef L_push_rts +.align 2 +.global __c6xabi_push_rts +.hidden __c6xabi_push_rts +.type __c6xabi_push_rts, STT_FUNC +__c6xabi_push_rts: + stw .d2t2 B14, *B15--[2] + stdw .d2t1 A15:A14, *B15-- +|| b .s2x A3 + stdw .d2t2 B13:B12, *B15-- + stdw .d2t1 A13:A12, *B15-- + stdw .d2t2 B11:B10, *B15-- + stdw .d2t1 A11:A10, *B15-- + stdw .d2t2 B3:B2, *B15-- +#endif + +#ifdef L_pop_rts +.align 2 +.global __c6xabi_pop_rts +.hidden __c6xabi_pop_rts +.type __c6xabi_pop_rts, STT_FUNC +__c6xabi_pop_rts: + lddw .d2t2 *++B15, B3:B2 + lddw .d2t1 *++B15, A11:A10 + lddw .d2t2 *++B15, B11:B10 + lddw .d2t1 *++B15, A13:A12 + lddw .d2t2 *++B15, B13:B12 + lddw .d2t1 *++B15, A15:A14 +|| b .s2 B3 + ldw .d2t2 *++B15[2], B14 + nop 4 +#endif + +#ifdef L_call_stub +.align 2 +.global __c6xabi_call_stub +.type __c6xabi_call_stub, STT_FUNC +__c6xabi_call_stub: + stw .d2t1 A2, *B15--[2] + stdw .d2t1 A7:A6, *B15-- +|| call .s2 B31 + stdw .d2t1 A1:A0, *B15-- + stdw .d2t2 B7:B6, *B15-- + stdw .d2t2 B5:B4, *B15-- + stdw .d2t2 B1:B0, *B15-- + stdw .d2t2 B3:B2, *B15-- +|| addkpc .s2 1f, B3, 0 +1: + lddw .d2t2 *++B15, B3:B2 + lddw .d2t2 *++B15, B1:B0 + lddw .d2t2 *++B15, B5:B4 + lddw .d2t2 *++B15, B7:B6 + lddw .d2t1 *++B15, A1:A0 + lddw .d2t1 *++B15, A7:A6 +|| b .s2 B3 + ldw .d2t1 *++B15[2], A2 + nop 4 +#endif + +#endif + diff --git a/libgcc/config/c6x/t-elf b/libgcc/config/c6x/t-elf index 99d0cd2d5ca..e01c4109e52 100644 --- a/libgcc/config/c6x/t-elf +++ b/libgcc/config/c6x/t-elf @@ -1,6 +1,11 @@ # Cannot use default rules due to $(CRTSTUFF_T_CFLAGS). CUSTOM_CRTIN = yes +LIB1ASMSRC = c6x/lib1funcs.S +LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _udivmodsi4 _divmodsi4 +LIB1ASMFUNCS += _strasgi _strasgi_64plus _clzsi2 _clzdi2 _clz +LIB1ASMFUNCS += _push_rts _pop_rts _call_stub + # Assemble startup files. crti.o: $(srcdir)/config/c6x/crti.S $(crt_compile) -c $(CRTSTUFF_T_CFLAGS) $< diff --git a/libgcc/config/fr30/lib1funcs.S b/libgcc/config/fr30/lib1funcs.S new file mode 100644 index 00000000000..7c63453123a --- /dev/null +++ b/libgcc/config/fr30/lib1funcs.S @@ -0,0 +1,115 @@ +/* libgcc routines for the FR30. + Copyright (C) 1998, 1999, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + + .macro FUNC_START name + .text + .globl __\name + .type __\name, @function +__\name: + .endm + + .macro FUNC_END name + .size __\name, . - __\name + .endm + + .macro DIV_BODY reg number + .if \number + DIV_BODY \reg, "\number - 1" + div1 \reg + .endif + .endm + +#ifdef L_udivsi3 +FUNC_START udivsi3 + ;; Perform an unsiged division of r4 / r5 and place the result in r4. + ;; Does not handle overflow yet... + mov r4, mdl + div0u r5 + DIV_BODY r5 32 + mov mdl, r4 + ret +FUNC_END udivsi3 +#endif /* L_udivsi3 */ + +#ifdef L_divsi3 +FUNC_START divsi3 + ;; Perform a siged division of r4 / r5 and place the result in r4. + ;; Does not handle overflow yet... + mov r4, mdl + div0s r5 + DIV_BODY r5 32 + div2 r5 + div3 + div4s + mov mdl, r4 + ret +FUNC_END divsi3 +#endif /* L_divsi3 */ + +#ifdef L_umodsi3 +FUNC_START umodsi3 + ;; Perform an unsiged division of r4 / r5 and places the remainder in r4. + ;; Does not handle overflow yet... + mov r4, mdl + div0u r5 + DIV_BODY r5 32 + mov mdh, r4 + ret +FUNC_END umodsi3 +#endif /* L_umodsi3 */ + +#ifdef L_modsi3 +FUNC_START modsi3 + ;; Perform a siged division of r4 / r5 and place the remainder in r4. + ;; Does not handle overflow yet... + mov r4, mdl + div0s r5 + DIV_BODY r5 32 + div2 r5 + div3 + div4s + mov mdh, r4 + ret +FUNC_END modsi3 +#endif /* L_modsi3 */ + +#ifdef L_negsi2 +FUNC_START negsi2 + ldi:8 #0, r0 + sub r4, r0 + mov r0, r4 + ret +FUNC_END negsi2 +#endif /* L_negsi2 */ + +#ifdef L_one_cmplsi2 +FUNC_START one_cmplsi2 + ldi:8 #0xff, r0 + extsb r0 + eor r0, r4 + ret +FUNC_END one_cmplsi2 +#endif /* L_one_cmplsi2 */ + + diff --git a/libgcc/config/fr30/t-fr30 b/libgcc/config/fr30/t-fr30 new file mode 100644 index 00000000000..ee5ed9a127e --- /dev/null +++ b/libgcc/config/fr30/t-fr30 @@ -0,0 +1,2 @@ +LIB1ASMSRC = fr30/lib1funcs.S +LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 diff --git a/libgcc/config/frv/lib1funcs.S b/libgcc/config/frv/lib1funcs.S new file mode 100644 index 00000000000..d1ffcab6133 --- /dev/null +++ b/libgcc/config/frv/lib1funcs.S @@ -0,0 +1,269 @@ +/* Library functions. + Copyright (C) 2000, 2003, 2008, 2009 Free Software Foundation, Inc. + Contributed by Red Hat, Inc. + + This file is part of GCC. + + GCC is free software ; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY ; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include + + +#ifdef L_cmpll +/* icc0 = __cmpll (long long a, long long b) */ + + .globl EXT(__cmpll) + .type EXT(__cmpll),@function + .text + .p2align 4 +EXT(__cmpll): + cmp gr8, gr10, icc0 + ckeq icc0, cc4 + P(ccmp) gr9, gr11, cc4, 1 + ret +.Lend: + .size EXT(__cmpll),.Lend-EXT(__cmpll) +#endif /* L_cmpll */ + +#ifdef L_cmpf +/* icc0 = __cmpf (float a, float b) */ +/* Note, because this function returns the result in ICC0, it means it can't + handle NaNs. */ + + .globl EXT(__cmpf) + .type EXT(__cmpf),@function + .text + .p2align 4 +EXT(__cmpf): +#ifdef __FRV_HARD_FLOAT__ /* floating point instructions available */ + movgf gr8, fr0 + P(movgf) gr9, fr1 + setlos #1, gr8 + fcmps fr0, fr1, fcc0 + P(fcklt) fcc0, cc0 + fckeq fcc0, cc1 + csub gr0, gr8, gr8, cc0, 1 + cmov gr0, gr8, cc1, 1 + cmpi gr8, 0, icc0 + ret +#else /* no floating point instructions available */ + movsg lr, gr4 + addi sp, #-16, sp + sti gr4, @(sp, 8) + st fp, @(sp, gr0) + mov sp, fp + call EXT(__cmpsf2) + cmpi gr8, #0, icc0 + ldi @(sp, 8), gr4 + movgs gr4, lr + ld @(sp,gr0), fp + addi sp, #16, sp + ret +#endif +.Lend: + .size EXT(__cmpf),.Lend-EXT(__cmpf) +#endif + +#ifdef L_cmpd +/* icc0 = __cmpd (double a, double b) */ +/* Note, because this function returns the result in ICC0, it means it can't + handle NaNs. */ + + .globl EXT(__cmpd) + .type EXT(__cmpd),@function + .text + .p2align 4 +EXT(__cmpd): + movsg lr, gr4 + addi sp, #-16, sp + sti gr4, @(sp, 8) + st fp, @(sp, gr0) + mov sp, fp + call EXT(__cmpdf2) + cmpi gr8, #0, icc0 + ldi @(sp, 8), gr4 + movgs gr4, lr + ld @(sp,gr0), fp + addi sp, #16, sp + ret +.Lend: + .size EXT(__cmpd),.Lend-EXT(__cmpd) +#endif + +#ifdef L_addll +/* gr8,gr9 = __addll (long long a, long long b) */ +/* Note, gcc will never call this function, but it is present in case an + ABI program calls it. */ + + .globl EXT(__addll) + .type EXT(__addll),@function + .text + .p2align +EXT(__addll): + addcc gr9, gr11, gr9, icc0 + addx gr8, gr10, gr8, icc0 + ret +.Lend: + .size EXT(__addll),.Lend-EXT(__addll) +#endif + +#ifdef L_subll +/* gr8,gr9 = __subll (long long a, long long b) */ +/* Note, gcc will never call this function, but it is present in case an + ABI program calls it. */ + + .globl EXT(__subll) + .type EXT(__subll),@function + .text + .p2align 4 +EXT(__subll): + subcc gr9, gr11, gr9, icc0 + subx gr8, gr10, gr8, icc0 + ret +.Lend: + .size EXT(__subll),.Lend-EXT(__subll) +#endif + +#ifdef L_andll +/* gr8,gr9 = __andll (long long a, long long b) */ +/* Note, gcc will never call this function, but it is present in case an + ABI program calls it. */ + + .globl EXT(__andll) + .type EXT(__andll),@function + .text + .p2align 4 +EXT(__andll): + P(and) gr9, gr11, gr9 + P2(and) gr8, gr10, gr8 + ret +.Lend: + .size EXT(__andll),.Lend-EXT(__andll) +#endif + +#ifdef L_orll +/* gr8,gr9 = __orll (long long a, long long b) */ +/* Note, gcc will never call this function, but it is present in case an + ABI program calls it. */ + + .globl EXT(__orll) + .type EXT(__orll),@function + .text + .p2align 4 +EXT(__orll): + P(or) gr9, gr11, gr9 + P2(or) gr8, gr10, gr8 + ret +.Lend: + .size EXT(__orll),.Lend-EXT(__orll) +#endif + +#ifdef L_xorll +/* gr8,gr9 = __xorll (long long a, long long b) */ +/* Note, gcc will never call this function, but it is present in case an + ABI program calls it. */ + + .globl EXT(__xorll) + .type EXT(__xorll),@function + .text + .p2align 4 +EXT(__xorll): + P(xor) gr9, gr11, gr9 + P2(xor) gr8, gr10, gr8 + ret +.Lend: + .size EXT(__xorll),.Lend-EXT(__xorll) +#endif + +#ifdef L_notll +/* gr8,gr9 = __notll (long long a) */ +/* Note, gcc will never call this function, but it is present in case an + ABI program calls it. */ + + .globl EXT(__notll) + .type EXT(__notll),@function + .text + .p2align 4 +EXT(__notll): + P(not) gr9, gr9 + P2(not) gr8, gr8 + ret +.Lend: + .size EXT(__notll),.Lend-EXT(__notll) +#endif + +#ifdef L_cmov +/* (void) __cmov (char *dest, const char *src, size_t len) */ +/* + * void __cmov (char *dest, const char *src, size_t len) + * { + * size_t i; + * + * if (dest < src || dest > src+len) + * { + * for (i = 0; i < len; i++) + * dest[i] = src[i]; + * } + * else + * { + * while (len-- > 0) + * dest[len] = src[len]; + * } + * } + */ + + .globl EXT(__cmov) + .type EXT(__cmov),@function + .text + .p2align 4 +EXT(__cmov): + P(cmp) gr8, gr9, icc0 + add gr9, gr10, gr4 + P(cmp) gr8, gr4, icc1 + bc icc0, 0, .Lfwd + bls icc1, 0, .Lback +.Lfwd: + /* move bytes in a forward direction */ + P(setlos) #0, gr5 + cmp gr0, gr10, icc0 + P(subi) gr9, #1, gr9 + P2(subi) gr8, #1, gr8 + bnc icc0, 0, .Lret +.Lfloop: + /* forward byte move loop */ + addi gr5, #1, gr5 + P(ldsb) @(gr9, gr5), gr4 + cmp gr5, gr10, icc0 + P(stb) gr4, @(gr8, gr5) + bc icc0, 0, .Lfloop + ret +.Lbloop: + /* backward byte move loop body */ + ldsb @(gr9,gr10),gr4 + stb gr4,@(gr8,gr10) +.Lback: + P(cmpi) gr10, #0, icc0 + addi gr10, #-1, gr10 + bne icc0, 0, .Lbloop +.Lret: + ret +.Lend: + .size EXT(__cmov),.Lend-EXT(__cmov) +#endif diff --git a/libgcc/config/frv/t-frv b/libgcc/config/frv/t-frv index b364a5a25b9..9773722d8e7 100644 --- a/libgcc/config/frv/t-frv +++ b/libgcc/config/frv/t-frv @@ -1,3 +1,6 @@ +LIB1ASMSRC = frv/lib1funcs.S +LIB1ASMFUNCS = _cmpll _cmpf _cmpd _addll _subll _andll _orll _xorll _notll _cmov + # Compile two additional files that are linked with every program # linked using GCC on systems using COFF or ELF, for the sake of C++ # constructors. diff --git a/libgcc/config/h8300/lib1funcs.S b/libgcc/config/h8300/lib1funcs.S new file mode 100644 index 00000000000..1b75b73269d --- /dev/null +++ b/libgcc/config/h8300/lib1funcs.S @@ -0,0 +1,838 @@ +;; libgcc routines for the Renesas H8/300 CPU. +;; Contributed by Steve Chamberlain +;; Optimizations by Toshiyasu Morita + +/* Copyright (C) 1994, 2000, 2001, 2002, 2003, 2004, 2009 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* Assembler register definitions. */ + +#define A0 r0 +#define A0L r0l +#define A0H r0h + +#define A1 r1 +#define A1L r1l +#define A1H r1h + +#define A2 r2 +#define A2L r2l +#define A2H r2h + +#define A3 r3 +#define A3L r3l +#define A3H r3h + +#define S0 r4 +#define S0L r4l +#define S0H r4h + +#define S1 r5 +#define S1L r5l +#define S1H r5h + +#define S2 r6 +#define S2L r6l +#define S2H r6h + +#ifdef __H8300__ +#define PUSHP push +#define POPP pop + +#define A0P r0 +#define A1P r1 +#define A2P r2 +#define A3P r3 +#define S0P r4 +#define S1P r5 +#define S2P r6 +#endif + +#if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__) +#define PUSHP push.l +#define POPP pop.l + +#define A0P er0 +#define A1P er1 +#define A2P er2 +#define A3P er3 +#define S0P er4 +#define S1P er5 +#define S2P er6 + +#define A0E e0 +#define A1E e1 +#define A2E e2 +#define A3E e3 +#endif + +#ifdef __H8300H__ +#ifdef __NORMAL_MODE__ + .h8300hn +#else + .h8300h +#endif +#endif + +#ifdef __H8300S__ +#ifdef __NORMAL_MODE__ + .h8300sn +#else + .h8300s +#endif +#endif +#ifdef __H8300SX__ +#ifdef __NORMAL_MODE__ + .h8300sxn +#else + .h8300sx +#endif +#endif + +#ifdef L_cmpsi2 +#ifdef __H8300__ + .section .text + .align 2 + .global ___cmpsi2 +___cmpsi2: + cmp.w A0,A2 + bne .L2 + cmp.w A1,A3 + bne .L4 + mov.w #1,A0 + rts +.L2: + bgt .L5 +.L3: + mov.w #2,A0 + rts +.L4: + bls .L3 +.L5: + sub.w A0,A0 + rts + .end +#endif +#endif /* L_cmpsi2 */ + +#ifdef L_ucmpsi2 +#ifdef __H8300__ + .section .text + .align 2 + .global ___ucmpsi2 +___ucmpsi2: + cmp.w A0,A2 + bne .L2 + cmp.w A1,A3 + bne .L4 + mov.w #1,A0 + rts +.L2: + bhi .L5 +.L3: + mov.w #2,A0 + rts +.L4: + bls .L3 +.L5: + sub.w A0,A0 + rts + .end +#endif +#endif /* L_ucmpsi2 */ + +#ifdef L_divhi3 + +;; HImode divides for the H8/300. +;; We bunch all of this into one object file since there are several +;; "supporting routines". + +; general purpose normalize routine +; +; divisor in A0 +; dividend in A1 +; turns both into +ve numbers, and leaves what the answer sign +; should be in A2L + +#ifdef __H8300__ + .section .text + .align 2 +divnorm: + or A0H,A0H ; is divisor > 0 + stc ccr,A2L + bge _lab1 + not A0H ; no - then make it +ve + not A0L + adds #1,A0 +_lab1: or A1H,A1H ; look at dividend + bge _lab2 + not A1H ; it is -ve, make it positive + not A1L + adds #1,A1 + xor #0x8,A2L; and toggle sign of result +_lab2: rts +;; Basically the same, except that the sign of the divisor determines +;; the sign. +modnorm: + or A0H,A0H ; is divisor > 0 + stc ccr,A2L + bge _lab7 + not A0H ; no - then make it +ve + not A0L + adds #1,A0 +_lab7: or A1H,A1H ; look at dividend + bge _lab8 + not A1H ; it is -ve, make it positive + not A1L + adds #1,A1 +_lab8: rts + +; A0=A0/A1 signed + + .global ___divhi3 +___divhi3: + bsr divnorm + bsr ___udivhi3 +negans: btst #3,A2L ; should answer be negative ? + beq _lab4 + not A0H ; yes, so make it so + not A0L + adds #1,A0 +_lab4: rts + +; A0=A0%A1 signed + + .global ___modhi3 +___modhi3: + bsr modnorm + bsr ___udivhi3 + mov A3,A0 + bra negans + +; A0=A0%A1 unsigned + + .global ___umodhi3 +___umodhi3: + bsr ___udivhi3 + mov A3,A0 + rts + +; A0=A0/A1 unsigned +; A3=A0%A1 unsigned +; A2H trashed +; D high 8 bits of denom +; d low 8 bits of denom +; N high 8 bits of num +; n low 8 bits of num +; M high 8 bits of mod +; m low 8 bits of mod +; Q high 8 bits of quot +; q low 8 bits of quot +; P preserve + +; The H8/300 only has a 16/8 bit divide, so we look at the incoming and +; see how to partition up the expression. + + .global ___udivhi3 +___udivhi3: + ; A0 A1 A2 A3 + ; Nn Dd P + sub.w A3,A3 ; Nn Dd xP 00 + or A1H,A1H + bne divlongway + or A0H,A0H + beq _lab6 + +; we know that D == 0 and N is != 0 + mov.b A0H,A3L ; Nn Dd xP 0N + divxu A1L,A3 ; MQ + mov.b A3L,A0H ; Q +; dealt with N, do n +_lab6: mov.b A0L,A3L ; n + divxu A1L,A3 ; mq + mov.b A3L,A0L ; Qq + mov.b A3H,A3L ; m + mov.b #0x0,A3H ; Qq 0m + rts + +; D != 0 - which means the denominator is +; loop around to get the result. + +divlongway: + mov.b A0H,A3L ; Nn Dd xP 0N + mov.b #0x0,A0H ; high byte of answer has to be zero + mov.b #0x8,A2H ; 8 +div8: add.b A0L,A0L ; n*=2 + rotxl A3L ; Make remainder bigger + rotxl A3H + sub.w A1,A3 ; Q-=N + bhs setbit ; set a bit ? + add.w A1,A3 ; no : too far , Q+=N + + dec A2H + bne div8 ; next bit + rts + +setbit: inc A0L ; do insert bit + dec A2H + bne div8 ; next bit + rts + +#endif /* __H8300__ */ +#endif /* L_divhi3 */ + +#ifdef L_divsi3 + +;; 4 byte integer divides for the H8/300. +;; +;; We have one routine which does all the work and lots of +;; little ones which prepare the args and massage the sign. +;; We bunch all of this into one object file since there are several +;; "supporting routines". + + .section .text + .align 2 + +; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest. +; This function is here to keep branch displacements small. + +#ifdef __H8300__ + +divnorm: + mov.b A0H,A0H ; is the numerator -ve + stc ccr,S2L ; keep the sign in bit 3 of S2L + bge postive + + ; negate arg + not A0H + not A1H + not A0L + not A1L + + add #1,A1L + addx #0,A1H + addx #0,A0L + addx #0,A0H +postive: + mov.b A2H,A2H ; is the denominator -ve + bge postive2 + not A2L + not A2H + not A3L + not A3H + add.b #1,A3L + addx #0,A3H + addx #0,A2L + addx #0,A2H + xor.b #0x08,S2L ; toggle the result sign +postive2: + rts + +;; Basically the same, except that the sign of the divisor determines +;; the sign. +modnorm: + mov.b A0H,A0H ; is the numerator -ve + stc ccr,S2L ; keep the sign in bit 3 of S2L + bge mpostive + + ; negate arg + not A0H + not A1H + not A0L + not A1L + + add #1,A1L + addx #0,A1H + addx #0,A0L + addx #0,A0H +mpostive: + mov.b A2H,A2H ; is the denominator -ve + bge mpostive2 + not A2L + not A2H + not A3L + not A3H + add.b #1,A3L + addx #0,A3H + addx #0,A2L + addx #0,A2H +mpostive2: + rts + +#else /* __H8300H__ */ + +divnorm: + mov.l A0P,A0P ; is the numerator -ve + stc ccr,S2L ; keep the sign in bit 3 of S2L + bge postive + + neg.l A0P ; negate arg + +postive: + mov.l A1P,A1P ; is the denominator -ve + bge postive2 + + neg.l A1P ; negate arg + xor.b #0x08,S2L ; toggle the result sign + +postive2: + rts + +;; Basically the same, except that the sign of the divisor determines +;; the sign. +modnorm: + mov.l A0P,A0P ; is the numerator -ve + stc ccr,S2L ; keep the sign in bit 3 of S2L + bge mpostive + + neg.l A0P ; negate arg + +mpostive: + mov.l A1P,A1P ; is the denominator -ve + bge mpostive2 + + neg.l A1P ; negate arg + +mpostive2: + rts + +#endif + +; numerator in A0/A1 +; denominator in A2/A3 + .global ___modsi3 +___modsi3: +#ifdef __H8300__ + PUSHP S2P + PUSHP S0P + PUSHP S1P + bsr modnorm + bsr divmodsi4 + mov S0,A0 + mov S1,A1 + bra exitdiv +#else + PUSHP S2P + bsr modnorm + bsr ___udivsi3 + mov.l er3,er0 + bra exitdiv +#endif + + ;; H8/300H and H8S version of ___udivsi3 is defined later in + ;; the file. +#ifdef __H8300__ + .global ___udivsi3 +___udivsi3: + PUSHP S2P + PUSHP S0P + PUSHP S1P + bsr divmodsi4 + bra reti +#endif + + .global ___umodsi3 +___umodsi3: +#ifdef __H8300__ + PUSHP S2P + PUSHP S0P + PUSHP S1P + bsr divmodsi4 + mov S0,A0 + mov S1,A1 + bra reti +#else + bsr ___udivsi3 + mov.l er3,er0 + rts +#endif + + .global ___divsi3 +___divsi3: +#ifdef __H8300__ + PUSHP S2P + PUSHP S0P + PUSHP S1P + jsr divnorm + jsr divmodsi4 +#else + PUSHP S2P + jsr divnorm + bsr ___udivsi3 +#endif + + ; examine what the sign should be +exitdiv: + btst #3,S2L + beq reti + + ; should be -ve +#ifdef __H8300__ + not A0H + not A1H + not A0L + not A1L + + add #1,A1L + addx #0,A1H + addx #0,A0L + addx #0,A0H +#else /* __H8300H__ */ + neg.l A0P +#endif + +reti: +#ifdef __H8300__ + POPP S1P + POPP S0P +#endif + POPP S2P + rts + + ; takes A0/A1 numerator (A0P for H8/300H) + ; A2/A3 denominator (A1P for H8/300H) + ; returns A0/A1 quotient (A0P for H8/300H) + ; S0/S1 remainder (S0P for H8/300H) + ; trashes S2H + +#ifdef __H8300__ + +divmodsi4: + sub.w S0,S0 ; zero play area + mov.w S0,S1 + mov.b A2H,S2H + or A2L,S2H + or A3H,S2H + bne DenHighNonZero + mov.b A0H,A0H + bne NumByte0Zero + mov.b A0L,A0L + bne NumByte1Zero + mov.b A1H,A1H + bne NumByte2Zero + bra NumByte3Zero +NumByte0Zero: + mov.b A0H,S1L + divxu A3L,S1 + mov.b S1L,A0H +NumByte1Zero: + mov.b A0L,S1L + divxu A3L,S1 + mov.b S1L,A0L +NumByte2Zero: + mov.b A1H,S1L + divxu A3L,S1 + mov.b S1L,A1H +NumByte3Zero: + mov.b A1L,S1L + divxu A3L,S1 + mov.b S1L,A1L + + mov.b S1H,S1L + mov.b #0x0,S1H + rts + +; have to do the divide by shift and test +DenHighNonZero: + mov.b A0H,S1L + mov.b A0L,A0H + mov.b A1H,A0L + mov.b A1L,A1H + + mov.b #0,A1L + mov.b #24,S2H ; only do 24 iterations + +nextbit: + add.w A1,A1 ; double the answer guess + rotxl A0L + rotxl A0H + + rotxl S1L ; double remainder + rotxl S1H + rotxl S0L + rotxl S0H + sub.w A3,S1 ; does it all fit + subx A2L,S0L + subx A2H,S0H + bhs setone + + add.w A3,S1 ; no, restore mistake + addx A2L,S0L + addx A2H,S0H + + dec S2H + bne nextbit + rts + +setone: + inc A1L + dec S2H + bne nextbit + rts + +#else /* __H8300H__ */ + + ;; This function also computes the remainder and stores it in er3. + .global ___udivsi3 +___udivsi3: + mov.w A1E,A1E ; denominator top word 0? + bne DenHighNonZero + + ; do it the easy way, see page 107 in manual + mov.w A0E,A2 + extu.l A2P + divxu.w A1,A2P + mov.w A2E,A0E + divxu.w A1,A0P + mov.w A0E,A3 + mov.w A2,A0E + extu.l A3P + rts + + ; er0 = er0 / er1 + ; er3 = er0 % er1 + ; trashes er1 er2 + ; expects er1 >= 2^16 +DenHighNonZero: + mov.l er0,er3 + mov.l er1,er2 +#ifdef __H8300H__ +divmod_L21: + shlr.l er0 + shlr.l er2 ; make divisor < 2^16 + mov.w e2,e2 + bne divmod_L21 +#else + shlr.l #2,er2 ; make divisor < 2^16 + mov.w e2,e2 + beq divmod_L22A +divmod_L21: + shlr.l #2,er0 +divmod_L22: + shlr.l #2,er2 ; make divisor < 2^16 + mov.w e2,e2 + bne divmod_L21 +divmod_L22A: + rotxl.w r2 + bcs divmod_L23 + shlr.l er0 + bra divmod_L24 +divmod_L23: + rotxr.w r2 + shlr.l #2,er0 +divmod_L24: +#endif + ;; At this point, + ;; er0 contains shifted dividend + ;; er1 contains divisor + ;; er2 contains shifted divisor + ;; er3 contains dividend, later remainder + divxu.w r2,er0 ; r0 now contains the approximate quotient (AQ) + extu.l er0 + beq divmod_L25 + subs #1,er0 ; er0 = AQ - 1 + mov.w e1,r2 + mulxu.w r0,er2 ; er2 = upper (AQ - 1) * divisor + sub.w r2,e3 ; dividend - 65536 * er2 + mov.w r1,r2 + mulxu.w r0,er2 ; compute er3 = remainder (tentative) + sub.l er2,er3 ; er3 = dividend - (AQ - 1) * divisor +divmod_L25: + cmp.l er1,er3 ; is divisor < remainder? + blo divmod_L26 + adds #1,er0 + sub.l er1,er3 ; correct the remainder +divmod_L26: + rts + +#endif +#endif /* L_divsi3 */ + +#ifdef L_mulhi3 + +;; HImode multiply. +; The H8/300 only has an 8*8->16 multiply. +; The answer is the same as: +; +; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256 +; (we can ignore A1.h * A0.h cause that will all off the top) +; A0 in +; A1 in +; A0 answer + +#ifdef __H8300__ + .section .text + .align 2 + .global ___mulhi3 +___mulhi3: + mov.b A1L,A2L ; A2l gets srcb.l + mulxu A0L,A2 ; A2 gets first sub product + + mov.b A0H,A3L ; prepare for + mulxu A1L,A3 ; second sub product + + add.b A3L,A2H ; sum first two terms + + mov.b A1H,A3L ; third sub product + mulxu A0L,A3 + + add.b A3L,A2H ; almost there + mov.w A2,A0 ; that is + rts + +#endif +#endif /* L_mulhi3 */ + +#ifdef L_mulsi3 + +;; SImode multiply. +;; +;; I think that shift and add may be sufficient for this. Using the +;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way +;; the inner loop uses maybe 20 cycles + overhead, but terminates +;; quickly on small args. +;; +;; A0/A1 src_a +;; A2/A3 src_b +;; +;; while (a) +;; { +;; if (a & 1) +;; r += b; +;; a >>= 1; +;; b <<= 1; +;; } + + .section .text + .align 2 + +#ifdef __H8300__ + + .global ___mulsi3 +___mulsi3: + PUSHP S0P + PUSHP S1P + + sub.w S0,S0 + sub.w S1,S1 + + ; while (a) +_top: mov.w A0,A0 + bne _more + mov.w A1,A1 + beq _done +_more: ; if (a & 1) + bld #0,A1L + bcc _nobit + ; r += b + add.w A3,S1 + addx A2L,S0L + addx A2H,S0H +_nobit: + ; a >>= 1 + shlr A0H + rotxr A0L + rotxr A1H + rotxr A1L + + ; b <<= 1 + add.w A3,A3 + addx A2L,A2L + addx A2H,A2H + bra _top + +_done: + mov.w S0,A0 + mov.w S1,A1 + POPP S1P + POPP S0P + rts + +#else /* __H8300H__ */ + +; +; mulsi3 for H8/300H - based on Renesas SH implementation +; +; by Toshiyasu Morita +; +; Old code: +; +; 16b * 16b = 372 states (worst case) +; 32b * 32b = 724 states (worst case) +; +; New code: +; +; 16b * 16b = 48 states +; 16b * 32b = 72 states +; 32b * 32b = 92 states +; + + .global ___mulsi3 +___mulsi3: + mov.w r1,r2 ; ( 2 states) b * d + mulxu r0,er2 ; (22 states) + + mov.w e0,r3 ; ( 2 states) a * d + beq L_skip1 ; ( 4 states) + mulxu r1,er3 ; (22 states) + add.w r3,e2 ; ( 2 states) + +L_skip1: + mov.w e1,r3 ; ( 2 states) c * b + beq L_skip2 ; ( 4 states) + mulxu r0,er3 ; (22 states) + add.w r3,e2 ; ( 2 states) + +L_skip2: + mov.l er2,er0 ; ( 2 states) + rts ; (10 states) + +#endif +#endif /* L_mulsi3 */ +#ifdef L_fixunssfsi_asm +/* For the h8300 we use asm to save some bytes, to + allow more programs to fit into the tiny address + space. For the H8/300H and H8S, the C version is good enough. */ +#ifdef __H8300__ +/* We still treat NANs different than libgcc2.c, but then, the + behavior is undefined anyways. */ + .global ___fixunssfsi +___fixunssfsi: + cmp.b #0x4f,r0h + bge Large_num + jmp @___fixsfsi +Large_num: + bhi L_huge_num + xor.b #0x80,A0L + bmi L_shift8 +L_huge_num: + mov.w #65535,A0 + mov.w A0,A1 + rts +L_shift8: + mov.b A0L,A0H + mov.b A1H,A0L + mov.b A1L,A1H + mov.b #0,A1L + rts +#endif +#endif /* L_fixunssfsi_asm */ diff --git a/libgcc/config/h8300/t-h8300 b/libgcc/config/h8300/t-h8300 new file mode 100644 index 00000000000..4602ff8b9ef --- /dev/null +++ b/libgcc/config/h8300/t-h8300 @@ -0,0 +1,3 @@ +LIB1ASMSRC = h8300/lib1funcs.S +LIB1ASMFUNCS = _cmpsi2 _ucmpsi2 _divhi3 _divsi3 _mulhi3 _mulsi3 \ + _fixunssfsi_asm diff --git a/libgcc/config/i386/cygwin.S b/libgcc/config/i386/cygwin.S new file mode 100644 index 00000000000..8f9c486850e --- /dev/null +++ b/libgcc/config/i386/cygwin.S @@ -0,0 +1,188 @@ +/* stuff needed for libgcc on win32. + * + * Copyright (C) 1996, 1998, 2001, 2003, 2008, 2009, 2010 + * Free Software Foundation, Inc. + * Written By Steve Chamberlain + * + * This file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) any + * later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * . + */ + +#include "auto-host.h" + +#ifdef HAVE_GAS_CFI_SECTIONS_DIRECTIVE + .cfi_sections .debug_frame +# define cfi_startproc() .cfi_startproc +# define cfi_endproc() .cfi_endproc +# define cfi_adjust_cfa_offset(X) .cfi_adjust_cfa_offset X +# define cfi_def_cfa_register(X) .cfi_def_cfa_register X +# define cfi_register(D,S) .cfi_register D, S +# ifdef _WIN64 +# define cfi_push(X) .cfi_adjust_cfa_offset 8; .cfi_rel_offset X, 0 +# define cfi_pop(X) .cfi_adjust_cfa_offset -8; .cfi_restore X +# else +# define cfi_push(X) .cfi_adjust_cfa_offset 4; .cfi_rel_offset X, 0 +# define cfi_pop(X) .cfi_adjust_cfa_offset -4; .cfi_restore X +# endif +#else +# define cfi_startproc() +# define cfi_endproc() +# define cfi_adjust_cfa_offset(X) +# define cfi_def_cfa_register(X) +# define cfi_register(D,S) +# define cfi_push(X) +# define cfi_pop(X) +#endif /* HAVE_GAS_CFI_SECTIONS_DIRECTIVE */ + +#ifdef L_chkstk +/* Function prologue calls __chkstk to probe the stack when allocating more + than CHECK_STACK_LIMIT bytes in one go. Touching the stack at 4K + increments is necessary to ensure that the guard pages used + by the OS virtual memory manger are allocated in correct sequence. */ + + .global ___chkstk + .global __alloca +#ifdef _WIN64 +/* __alloca is a normal function call, which uses %rcx as the argument. */ + cfi_startproc() +__alloca: + movq %rcx, %rax + /* FALLTHRU */ + +/* ___chkstk is a *special* function call, which uses %rax as the argument. + We avoid clobbering the 4 integer argument registers, %rcx, %rdx, + %r8 and %r9, which leaves us with %rax, %r10, and %r11 to use. */ + .align 4 +___chkstk: + popq %r11 /* pop return address */ + cfi_adjust_cfa_offset(-8) /* indicate return address in r11 */ + cfi_register(%rip, %r11) + movq %rsp, %r10 + cmpq $0x1000, %rax /* > 4k ?*/ + jb 2f + +1: subq $0x1000, %r10 /* yes, move pointer down 4k*/ + orl $0x0, (%r10) /* probe there */ + subq $0x1000, %rax /* decrement count */ + cmpq $0x1000, %rax + ja 1b /* and do it again */ + +2: subq %rax, %r10 + movq %rsp, %rax /* hold CFA until return */ + cfi_def_cfa_register(%rax) + orl $0x0, (%r10) /* less than 4k, just peek here */ + movq %r10, %rsp /* decrement stack */ + + /* Push the return value back. Doing this instead of just + jumping to %r11 preserves the cached call-return stack + used by most modern processors. */ + pushq %r11 + ret + cfi_endproc() +#else + cfi_startproc() +___chkstk: +__alloca: + pushl %ecx /* save temp */ + cfi_push(%eax) + leal 8(%esp), %ecx /* point past return addr */ + cmpl $0x1000, %eax /* > 4k ?*/ + jb 2f + +1: subl $0x1000, %ecx /* yes, move pointer down 4k*/ + orl $0x0, (%ecx) /* probe there */ + subl $0x1000, %eax /* decrement count */ + cmpl $0x1000, %eax + ja 1b /* and do it again */ + +2: subl %eax, %ecx + orl $0x0, (%ecx) /* less than 4k, just peek here */ + movl %esp, %eax /* save current stack pointer */ + cfi_def_cfa_register(%eax) + movl %ecx, %esp /* decrement stack */ + movl (%eax), %ecx /* recover saved temp */ + + /* Copy the return register. Doing this instead of just jumping to + the address preserves the cached call-return stack used by most + modern processors. */ + pushl 4(%eax) + ret + cfi_endproc() +#endif /* _WIN64 */ +#endif /* L_chkstk */ + +#ifdef L_chkstk_ms +/* ___chkstk_ms is a *special* function call, which uses %rax as the argument. + We avoid clobbering any registers. Unlike ___chkstk, it just probes the + stack and does no stack allocation. */ + .global ___chkstk_ms +#ifdef _WIN64 + cfi_startproc() +___chkstk_ms: + pushq %rcx /* save temps */ + cfi_push(%rcx) + pushq %rax + cfi_push(%rax) + cmpq $0x1000, %rax /* > 4k ?*/ + leaq 24(%rsp), %rcx /* point past return addr */ + jb 2f + +1: subq $0x1000, %rcx /* yes, move pointer down 4k */ + orq $0x0, (%rcx) /* probe there */ + subq $0x1000, %rax /* decrement count */ + cmpq $0x1000, %rax + ja 1b /* and do it again */ + +2: subq %rax, %rcx + orq $0x0, (%rcx) /* less than 4k, just peek here */ + + popq %rax + cfi_pop(%rax) + popq %rcx + cfi_pop(%rcx) + ret + cfi_endproc() +#else + cfi_startproc() +___chkstk_ms: + pushl %ecx /* save temp */ + cfi_push(%ecx) + pushl %eax + cfi_push(%eax) + cmpl $0x1000, %eax /* > 4k ?*/ + leal 12(%esp), %ecx /* point past return addr */ + jb 2f + +1: subl $0x1000, %ecx /* yes, move pointer down 4k*/ + orl $0x0, (%ecx) /* probe there */ + subl $0x1000, %eax /* decrement count */ + cmpl $0x1000, %eax + ja 1b /* and do it again */ + +2: subl %eax, %ecx + orl $0x0, (%ecx) /* less than 4k, just peek here */ + + popl %eax + cfi_pop(%eax) + popl %ecx + cfi_pop(%ecx) + ret + cfi_endproc() +#endif /* _WIN64 */ +#endif /* L_chkstk_ms */ diff --git a/libgcc/config/i386/t-chkstk b/libgcc/config/i386/t-chkstk new file mode 100644 index 00000000000..822981faab8 --- /dev/null +++ b/libgcc/config/i386/t-chkstk @@ -0,0 +1,2 @@ +LIB1ASMSRC = i386/cygwin.S +LIB1ASMFUNCS = _chkstk _chkstk_ms diff --git a/libgcc/config/ia64/__divxf3.S b/libgcc/config/ia64/__divxf3.S new file mode 100644 index 00000000000..9cba8f59423 --- /dev/null +++ b/libgcc/config/ia64/__divxf3.S @@ -0,0 +1,11 @@ +#ifdef SHARED +#define __divtf3 __divtf3_compat +#endif + +#define L__divxf3 +#include "config/ia64/lib1funcs.S" + +#ifdef SHARED +#undef __divtf3 +.symver __divtf3_compat, __divtf3@GCC_3.0 +#endif diff --git a/libgcc/config/ia64/__divxf3.asm b/libgcc/config/ia64/__divxf3.asm deleted file mode 100644 index f741bdaf9bc..00000000000 --- a/libgcc/config/ia64/__divxf3.asm +++ /dev/null @@ -1,11 +0,0 @@ -#ifdef SHARED -#define __divtf3 __divtf3_compat -#endif - -#define L__divxf3 -#include "config/ia64/lib1funcs.asm" - -#ifdef SHARED -#undef __divtf3 -.symver __divtf3_compat, __divtf3@GCC_3.0 -#endif diff --git a/libgcc/config/ia64/_fixtfdi.S b/libgcc/config/ia64/_fixtfdi.S new file mode 100644 index 00000000000..863b70f7edc --- /dev/null +++ b/libgcc/config/ia64/_fixtfdi.S @@ -0,0 +1,11 @@ +#ifdef SHARED +#define __fixtfti __fixtfti_compat +#endif + +#define L_fixtfdi +#include "config/ia64/lib1funcs.S" + +#ifdef SHARED +#undef __fixtfti +.symver __fixtfti_compat, __fixtfti@GCC_3.0 +#endif diff --git a/libgcc/config/ia64/_fixtfdi.asm b/libgcc/config/ia64/_fixtfdi.asm deleted file mode 100644 index 4d13c808c51..00000000000 --- a/libgcc/config/ia64/_fixtfdi.asm +++ /dev/null @@ -1,11 +0,0 @@ -#ifdef SHARED -#define __fixtfti __fixtfti_compat -#endif - -#define L_fixtfdi -#include "config/ia64/lib1funcs.asm" - -#ifdef SHARED -#undef __fixtfti -.symver __fixtfti_compat, __fixtfti@GCC_3.0 -#endif diff --git a/libgcc/config/ia64/_fixunstfdi.S b/libgcc/config/ia64/_fixunstfdi.S new file mode 100644 index 00000000000..aac6a284eaa --- /dev/null +++ b/libgcc/config/ia64/_fixunstfdi.S @@ -0,0 +1,11 @@ +#ifdef SHARED +#define __fixunstfti __fixunstfti_compat +#endif + +#define L_fixunstfdi +#include "config/ia64/lib1funcs.S" + +#ifdef SHARED +#undef __fixunstfti +.symver __fixunstfti_compat, __fixunstfti@GCC_3.0 +#endif diff --git a/libgcc/config/ia64/_fixunstfdi.asm b/libgcc/config/ia64/_fixunstfdi.asm deleted file mode 100644 index b722d9e90dc..00000000000 --- a/libgcc/config/ia64/_fixunstfdi.asm +++ /dev/null @@ -1,11 +0,0 @@ -#ifdef SHARED -#define __fixunstfti __fixunstfti_compat -#endif - -#define L_fixunstfdi -#include "config/ia64/lib1funcs.asm" - -#ifdef SHARED -#undef __fixunstfti -.symver __fixunstfti_compat, __fixunstfti@GCC_3.0 -#endif diff --git a/libgcc/config/ia64/_floatditf.S b/libgcc/config/ia64/_floatditf.S new file mode 100644 index 00000000000..e37404d26d5 --- /dev/null +++ b/libgcc/config/ia64/_floatditf.S @@ -0,0 +1,11 @@ +#ifdef SHARED +#define __floattitf __floattitf_compat +#endif + +#define L_floatditf +#include "config/ia64/lib1funcs.S" + +#ifdef SHARED +#undef __floattitf +.symver __floattitf_compat, __floattitf@GCC_3.0 +#endif diff --git a/libgcc/config/ia64/_floatditf.asm b/libgcc/config/ia64/_floatditf.asm deleted file mode 100644 index 21d77028176..00000000000 --- a/libgcc/config/ia64/_floatditf.asm +++ /dev/null @@ -1,11 +0,0 @@ -#ifdef SHARED -#define __floattitf __floattitf_compat -#endif - -#define L_floatditf -#include "config/ia64/lib1funcs.asm" - -#ifdef SHARED -#undef __floattitf -.symver __floattitf_compat, __floattitf@GCC_3.0 -#endif diff --git a/libgcc/config/ia64/lib1funcs.S b/libgcc/config/ia64/lib1funcs.S new file mode 100644 index 00000000000..b7eaa6eca3c --- /dev/null +++ b/libgcc/config/ia64/lib1funcs.S @@ -0,0 +1,795 @@ +/* Copyright (C) 2000, 2001, 2003, 2005, 2009 Free Software Foundation, Inc. + Contributed by James E. Wilson . + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifdef L__divxf3 +// Compute a 80-bit IEEE double-extended quotient. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// farg0 holds the dividend. farg1 holds the divisor. +// +// __divtf3 is an alternate symbol name for backward compatibility. + + .text + .align 16 + .global __divxf3 + .proc __divxf3 +__divxf3: +#ifdef SHARED + .global __divtf3 +__divtf3: +#endif + cmp.eq p7, p0 = r0, r0 + frcpa.s0 f10, p6 = farg0, farg1 + ;; +(p6) cmp.ne p7, p0 = r0, r0 + .pred.rel.mutex p6, p7 +(p6) fnma.s1 f11 = farg1, f10, f1 +(p6) fma.s1 f12 = farg0, f10, f0 + ;; +(p6) fma.s1 f13 = f11, f11, f0 +(p6) fma.s1 f14 = f11, f11, f11 + ;; +(p6) fma.s1 f11 = f13, f13, f11 +(p6) fma.s1 f13 = f14, f10, f10 + ;; +(p6) fma.s1 f10 = f13, f11, f10 +(p6) fnma.s1 f11 = farg1, f12, farg0 + ;; +(p6) fma.s1 f11 = f11, f10, f12 +(p6) fnma.s1 f12 = farg1, f10, f1 + ;; +(p6) fma.s1 f10 = f12, f10, f10 +(p6) fnma.s1 f12 = farg1, f11, farg0 + ;; +(p6) fma.s0 fret0 = f12, f10, f11 +(p7) mov fret0 = f10 + br.ret.sptk rp + .endp __divxf3 +#endif + +#ifdef L__divdf3 +// Compute a 64-bit IEEE double quotient. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// farg0 holds the dividend. farg1 holds the divisor. + + .text + .align 16 + .global __divdf3 + .proc __divdf3 +__divdf3: + cmp.eq p7, p0 = r0, r0 + frcpa.s0 f10, p6 = farg0, farg1 + ;; +(p6) cmp.ne p7, p0 = r0, r0 + .pred.rel.mutex p6, p7 +(p6) fmpy.s1 f11 = farg0, f10 +(p6) fnma.s1 f12 = farg1, f10, f1 + ;; +(p6) fma.s1 f11 = f12, f11, f11 +(p6) fmpy.s1 f13 = f12, f12 + ;; +(p6) fma.s1 f10 = f12, f10, f10 +(p6) fma.s1 f11 = f13, f11, f11 + ;; +(p6) fmpy.s1 f12 = f13, f13 +(p6) fma.s1 f10 = f13, f10, f10 + ;; +(p6) fma.d.s1 f11 = f12, f11, f11 +(p6) fma.s1 f10 = f12, f10, f10 + ;; +(p6) fnma.d.s1 f8 = farg1, f11, farg0 + ;; +(p6) fma.d fret0 = f8, f10, f11 +(p7) mov fret0 = f10 + br.ret.sptk rp + ;; + .endp __divdf3 +#endif + +#ifdef L__divsf3 +// Compute a 32-bit IEEE float quotient. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// farg0 holds the dividend. farg1 holds the divisor. + + .text + .align 16 + .global __divsf3 + .proc __divsf3 +__divsf3: + cmp.eq p7, p0 = r0, r0 + frcpa.s0 f10, p6 = farg0, farg1 + ;; +(p6) cmp.ne p7, p0 = r0, r0 + .pred.rel.mutex p6, p7 +(p6) fmpy.s1 f8 = farg0, f10 +(p6) fnma.s1 f9 = farg1, f10, f1 + ;; +(p6) fma.s1 f8 = f9, f8, f8 +(p6) fmpy.s1 f9 = f9, f9 + ;; +(p6) fma.s1 f8 = f9, f8, f8 +(p6) fmpy.s1 f9 = f9, f9 + ;; +(p6) fma.d.s1 f10 = f9, f8, f8 + ;; +(p6) fnorm.s.s0 fret0 = f10 +(p7) mov fret0 = f10 + br.ret.sptk rp + ;; + .endp __divsf3 +#endif + +#ifdef L__divdi3 +// Compute a 64-bit integer quotient. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// in0 holds the dividend. in1 holds the divisor. + + .text + .align 16 + .global __divdi3 + .proc __divdi3 +__divdi3: + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + setf.sig f8 = in0 + setf.sig f9 = in1 + // Check divide by zero. + cmp.ne.unc p0,p7=0,in1 + ;; + // Convert the inputs to FP, so that they won't be treated as unsigned. + fcvt.xf f8 = f8 + fcvt.xf f9 = f9 +(p7) break 1 + ;; + // Compute the reciprocal approximation. + frcpa.s1 f10, p6 = f8, f9 + ;; + // 3 Newton-Raphson iterations. +(p6) fnma.s1 f11 = f9, f10, f1 +(p6) fmpy.s1 f12 = f8, f10 + ;; +(p6) fmpy.s1 f13 = f11, f11 +(p6) fma.s1 f12 = f11, f12, f12 + ;; +(p6) fma.s1 f10 = f11, f10, f10 +(p6) fma.s1 f11 = f13, f12, f12 + ;; +(p6) fma.s1 f10 = f13, f10, f10 +(p6) fnma.s1 f12 = f9, f11, f8 + ;; +(p6) fma.s1 f10 = f12, f10, f11 + ;; + // Round quotient to an integer. + fcvt.fx.trunc.s1 f10 = f10 + ;; + // Transfer result to GP registers. + getf.sig ret0 = f10 + br.ret.sptk rp + ;; + .endp __divdi3 +#endif + +#ifdef L__moddi3 +// Compute a 64-bit integer modulus. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// in0 holds the dividend (a). in1 holds the divisor (b). + + .text + .align 16 + .global __moddi3 + .proc __moddi3 +__moddi3: + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + setf.sig f14 = in0 + setf.sig f9 = in1 + // Check divide by zero. + cmp.ne.unc p0,p7=0,in1 + ;; + // Convert the inputs to FP, so that they won't be treated as unsigned. + fcvt.xf f8 = f14 + fcvt.xf f9 = f9 +(p7) break 1 + ;; + // Compute the reciprocal approximation. + frcpa.s1 f10, p6 = f8, f9 + ;; + // 3 Newton-Raphson iterations. +(p6) fmpy.s1 f12 = f8, f10 +(p6) fnma.s1 f11 = f9, f10, f1 + ;; +(p6) fma.s1 f12 = f11, f12, f12 +(p6) fmpy.s1 f13 = f11, f11 + ;; +(p6) fma.s1 f10 = f11, f10, f10 +(p6) fma.s1 f11 = f13, f12, f12 + ;; + sub in1 = r0, in1 +(p6) fma.s1 f10 = f13, f10, f10 +(p6) fnma.s1 f12 = f9, f11, f8 + ;; + setf.sig f9 = in1 +(p6) fma.s1 f10 = f12, f10, f11 + ;; + fcvt.fx.trunc.s1 f10 = f10 + ;; + // r = q * (-b) + a + xma.l f10 = f10, f9, f14 + ;; + // Transfer result to GP registers. + getf.sig ret0 = f10 + br.ret.sptk rp + ;; + .endp __moddi3 +#endif + +#ifdef L__udivdi3 +// Compute a 64-bit unsigned integer quotient. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// in0 holds the dividend. in1 holds the divisor. + + .text + .align 16 + .global __udivdi3 + .proc __udivdi3 +__udivdi3: + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + setf.sig f8 = in0 + setf.sig f9 = in1 + // Check divide by zero. + cmp.ne.unc p0,p7=0,in1 + ;; + // Convert the inputs to FP, to avoid FP software-assist faults. + fcvt.xuf.s1 f8 = f8 + fcvt.xuf.s1 f9 = f9 +(p7) break 1 + ;; + // Compute the reciprocal approximation. + frcpa.s1 f10, p6 = f8, f9 + ;; + // 3 Newton-Raphson iterations. +(p6) fnma.s1 f11 = f9, f10, f1 +(p6) fmpy.s1 f12 = f8, f10 + ;; +(p6) fmpy.s1 f13 = f11, f11 +(p6) fma.s1 f12 = f11, f12, f12 + ;; +(p6) fma.s1 f10 = f11, f10, f10 +(p6) fma.s1 f11 = f13, f12, f12 + ;; +(p6) fma.s1 f10 = f13, f10, f10 +(p6) fnma.s1 f12 = f9, f11, f8 + ;; +(p6) fma.s1 f10 = f12, f10, f11 + ;; + // Round quotient to an unsigned integer. + fcvt.fxu.trunc.s1 f10 = f10 + ;; + // Transfer result to GP registers. + getf.sig ret0 = f10 + br.ret.sptk rp + ;; + .endp __udivdi3 +#endif + +#ifdef L__umoddi3 +// Compute a 64-bit unsigned integer modulus. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// in0 holds the dividend (a). in1 holds the divisor (b). + + .text + .align 16 + .global __umoddi3 + .proc __umoddi3 +__umoddi3: + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + setf.sig f14 = in0 + setf.sig f9 = in1 + // Check divide by zero. + cmp.ne.unc p0,p7=0,in1 + ;; + // Convert the inputs to FP, to avoid FP software assist faults. + fcvt.xuf.s1 f8 = f14 + fcvt.xuf.s1 f9 = f9 +(p7) break 1; + ;; + // Compute the reciprocal approximation. + frcpa.s1 f10, p6 = f8, f9 + ;; + // 3 Newton-Raphson iterations. +(p6) fmpy.s1 f12 = f8, f10 +(p6) fnma.s1 f11 = f9, f10, f1 + ;; +(p6) fma.s1 f12 = f11, f12, f12 +(p6) fmpy.s1 f13 = f11, f11 + ;; +(p6) fma.s1 f10 = f11, f10, f10 +(p6) fma.s1 f11 = f13, f12, f12 + ;; + sub in1 = r0, in1 +(p6) fma.s1 f10 = f13, f10, f10 +(p6) fnma.s1 f12 = f9, f11, f8 + ;; + setf.sig f9 = in1 +(p6) fma.s1 f10 = f12, f10, f11 + ;; + // Round quotient to an unsigned integer. + fcvt.fxu.trunc.s1 f10 = f10 + ;; + // r = q * (-b) + a + xma.l f10 = f10, f9, f14 + ;; + // Transfer result to GP registers. + getf.sig ret0 = f10 + br.ret.sptk rp + ;; + .endp __umoddi3 +#endif + +#ifdef L__divsi3 +// Compute a 32-bit integer quotient. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// in0 holds the dividend. in1 holds the divisor. + + .text + .align 16 + .global __divsi3 + .proc __divsi3 +__divsi3: + .regstk 2,0,0,0 + // Check divide by zero. + cmp.ne.unc p0,p7=0,in1 + sxt4 in0 = in0 + sxt4 in1 = in1 + ;; + setf.sig f8 = in0 + setf.sig f9 = in1 +(p7) break 1 + ;; + mov r2 = 0x0ffdd + fcvt.xf f8 = f8 + fcvt.xf f9 = f9 + ;; + setf.exp f11 = r2 + frcpa.s1 f10, p6 = f8, f9 + ;; +(p6) fmpy.s1 f8 = f8, f10 +(p6) fnma.s1 f9 = f9, f10, f1 + ;; +(p6) fma.s1 f8 = f9, f8, f8 +(p6) fma.s1 f9 = f9, f9, f11 + ;; +(p6) fma.s1 f10 = f9, f8, f8 + ;; + fcvt.fx.trunc.s1 f10 = f10 + ;; + getf.sig ret0 = f10 + br.ret.sptk rp + ;; + .endp __divsi3 +#endif + +#ifdef L__modsi3 +// Compute a 32-bit integer modulus. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// in0 holds the dividend. in1 holds the divisor. + + .text + .align 16 + .global __modsi3 + .proc __modsi3 +__modsi3: + .regstk 2,0,0,0 + mov r2 = 0x0ffdd + sxt4 in0 = in0 + sxt4 in1 = in1 + ;; + setf.sig f13 = r32 + setf.sig f9 = r33 + // Check divide by zero. + cmp.ne.unc p0,p7=0,in1 + ;; + sub in1 = r0, in1 + fcvt.xf f8 = f13 + fcvt.xf f9 = f9 + ;; + setf.exp f11 = r2 + frcpa.s1 f10, p6 = f8, f9 +(p7) break 1 + ;; +(p6) fmpy.s1 f12 = f8, f10 +(p6) fnma.s1 f10 = f9, f10, f1 + ;; + setf.sig f9 = in1 +(p6) fma.s1 f12 = f10, f12, f12 +(p6) fma.s1 f10 = f10, f10, f11 + ;; +(p6) fma.s1 f10 = f10, f12, f12 + ;; + fcvt.fx.trunc.s1 f10 = f10 + ;; + xma.l f10 = f10, f9, f13 + ;; + getf.sig ret0 = f10 + br.ret.sptk rp + ;; + .endp __modsi3 +#endif + +#ifdef L__udivsi3 +// Compute a 32-bit unsigned integer quotient. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// in0 holds the dividend. in1 holds the divisor. + + .text + .align 16 + .global __udivsi3 + .proc __udivsi3 +__udivsi3: + .regstk 2,0,0,0 + mov r2 = 0x0ffdd + zxt4 in0 = in0 + zxt4 in1 = in1 + ;; + setf.sig f8 = in0 + setf.sig f9 = in1 + // Check divide by zero. + cmp.ne.unc p0,p7=0,in1 + ;; + fcvt.xf f8 = f8 + fcvt.xf f9 = f9 +(p7) break 1 + ;; + setf.exp f11 = r2 + frcpa.s1 f10, p6 = f8, f9 + ;; +(p6) fmpy.s1 f8 = f8, f10 +(p6) fnma.s1 f9 = f9, f10, f1 + ;; +(p6) fma.s1 f8 = f9, f8, f8 +(p6) fma.s1 f9 = f9, f9, f11 + ;; +(p6) fma.s1 f10 = f9, f8, f8 + ;; + fcvt.fxu.trunc.s1 f10 = f10 + ;; + getf.sig ret0 = f10 + br.ret.sptk rp + ;; + .endp __udivsi3 +#endif + +#ifdef L__umodsi3 +// Compute a 32-bit unsigned integer modulus. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// in0 holds the dividend. in1 holds the divisor. + + .text + .align 16 + .global __umodsi3 + .proc __umodsi3 +__umodsi3: + .regstk 2,0,0,0 + mov r2 = 0x0ffdd + zxt4 in0 = in0 + zxt4 in1 = in1 + ;; + setf.sig f13 = in0 + setf.sig f9 = in1 + // Check divide by zero. + cmp.ne.unc p0,p7=0,in1 + ;; + sub in1 = r0, in1 + fcvt.xf f8 = f13 + fcvt.xf f9 = f9 + ;; + setf.exp f11 = r2 + frcpa.s1 f10, p6 = f8, f9 +(p7) break 1; + ;; +(p6) fmpy.s1 f12 = f8, f10 +(p6) fnma.s1 f10 = f9, f10, f1 + ;; + setf.sig f9 = in1 +(p6) fma.s1 f12 = f10, f12, f12 +(p6) fma.s1 f10 = f10, f10, f11 + ;; +(p6) fma.s1 f10 = f10, f12, f12 + ;; + fcvt.fxu.trunc.s1 f10 = f10 + ;; + xma.l f10 = f10, f9, f13 + ;; + getf.sig ret0 = f10 + br.ret.sptk rp + ;; + .endp __umodsi3 +#endif + +#ifdef L__save_stack_nonlocal +// Notes on save/restore stack nonlocal: We read ar.bsp but write +// ar.bspstore. This is because ar.bsp can be read at all times +// (independent of the RSE mode) but since it's read-only we need to +// restore the value via ar.bspstore. This is OK because +// ar.bsp==ar.bspstore after executing "flushrs". + +// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer) + + .text + .align 16 + .global __ia64_save_stack_nonlocal + .proc __ia64_save_stack_nonlocal +__ia64_save_stack_nonlocal: + { .mmf + alloc r18 = ar.pfs, 2, 0, 0, 0 + mov r19 = ar.rsc + ;; + } + { .mmi + flushrs + st8 [in0] = in1, 24 + and r19 = 0x1c, r19 + ;; + } + { .mmi + st8 [in0] = r18, -16 + mov ar.rsc = r19 + or r19 = 0x3, r19 + ;; + } + { .mmi + mov r16 = ar.bsp + mov r17 = ar.rnat + adds r2 = 8, in0 + ;; + } + { .mmi + st8 [in0] = r16 + st8 [r2] = r17 + } + { .mib + mov ar.rsc = r19 + br.ret.sptk.few rp + ;; + } + .endp __ia64_save_stack_nonlocal +#endif + +#ifdef L__nonlocal_goto +// void __ia64_nonlocal_goto(void *target_label, void *save_area, +// void *static_chain); + + .text + .align 16 + .global __ia64_nonlocal_goto + .proc __ia64_nonlocal_goto +__ia64_nonlocal_goto: + { .mmi + alloc r20 = ar.pfs, 3, 0, 0, 0 + ld8 r12 = [in1], 8 + mov.ret.sptk rp = in0, .L0 + ;; + } + { .mmf + ld8 r16 = [in1], 8 + mov r19 = ar.rsc + ;; + } + { .mmi + flushrs + ld8 r17 = [in1], 8 + and r19 = 0x1c, r19 + ;; + } + { .mmi + ld8 r18 = [in1] + mov ar.rsc = r19 + or r19 = 0x3, r19 + ;; + } + { .mmi + mov ar.bspstore = r16 + ;; + mov ar.rnat = r17 + ;; + } + { .mmi + loadrs + invala + mov r15 = in2 + ;; + } +.L0: { .mib + mov ar.rsc = r19 + mov ar.pfs = r18 + br.ret.sptk.few rp + ;; + } + .endp __ia64_nonlocal_goto +#endif + +#ifdef L__restore_stack_nonlocal +// This is mostly the same as nonlocal_goto above. +// ??? This has not been tested yet. + +// void __ia64_restore_stack_nonlocal(void *save_area) + + .text + .align 16 + .global __ia64_restore_stack_nonlocal + .proc __ia64_restore_stack_nonlocal +__ia64_restore_stack_nonlocal: + { .mmf + alloc r20 = ar.pfs, 4, 0, 0, 0 + ld8 r12 = [in0], 8 + ;; + } + { .mmb + ld8 r16=[in0], 8 + mov r19 = ar.rsc + ;; + } + { .mmi + flushrs + ld8 r17 = [in0], 8 + and r19 = 0x1c, r19 + ;; + } + { .mmf + ld8 r18 = [in0] + mov ar.rsc = r19 + ;; + } + { .mmi + mov ar.bspstore = r16 + ;; + mov ar.rnat = r17 + or r19 = 0x3, r19 + ;; + } + { .mmf + loadrs + invala + ;; + } +.L0: { .mib + mov ar.rsc = r19 + mov ar.pfs = r18 + br.ret.sptk.few rp + ;; + } + .endp __ia64_restore_stack_nonlocal +#endif + +#ifdef L__trampoline +// Implement the nested function trampoline. This is out of line +// so that we don't have to bother with flushing the icache, as +// well as making the on-stack trampoline smaller. +// +// The trampoline has the following form: +// +// +-------------------+ > +// TRAMP: | __ia64_trampoline | | +// +-------------------+ > fake function descriptor +// | TRAMP+16 | | +// +-------------------+ > +// | target descriptor | +// +-------------------+ +// | static link | +// +-------------------+ + + .text + .align 16 + .global __ia64_trampoline + .proc __ia64_trampoline +__ia64_trampoline: + { .mmi + ld8 r2 = [r1], 8 + ;; + ld8 r15 = [r1] + } + { .mmi + ld8 r3 = [r2], 8 + ;; + ld8 r1 = [r2] + mov b6 = r3 + } + { .bbb + br.sptk.many b6 + ;; + } + .endp __ia64_trampoline +#endif + +#ifdef SHARED +// Thunks for backward compatibility. +#ifdef L_fixtfdi + .text + .align 16 + .global __fixtfti + .proc __fixtfti +__fixtfti: + { .bbb + br.sptk.many __fixxfti + ;; + } + .endp __fixtfti +#endif +#ifdef L_fixunstfdi + .align 16 + .global __fixunstfti + .proc __fixunstfti +__fixunstfti: + { .bbb + br.sptk.many __fixunsxfti + ;; + } + .endp __fixunstfti +#endif +#ifdef L_floatditf + .align 16 + .global __floattitf + .proc __floattitf +__floattitf: + { .bbb + br.sptk.many __floattixf + ;; + } + .endp __floattitf +#endif +#endif diff --git a/libgcc/config/ia64/t-hpux b/libgcc/config/ia64/t-hpux index ef3387e7a61..1fee41385c0 100644 --- a/libgcc/config/ia64/t-hpux +++ b/libgcc/config/ia64/t-hpux @@ -1 +1,6 @@ +# On HP-UX we do not want _fixtfdi, _fixunstfdi, or _floatditf from +# LIB1ASMSRC. These functions map the 128 bit conversion function names +# to 80 bit conversions and were done for Linux backwards compatibility. +LIB1ASMFUNCS := $(filter-out _fixtfdi _fixunstfdi _floatditf,$(LIB1ASMFUNCS)) + LIB2ADDEH = $(srcdir)/unwind-c.c diff --git a/libgcc/config/ia64/t-ia64 b/libgcc/config/ia64/t-ia64 index 59cf3aa75f4..80445d8a2a8 100644 --- a/libgcc/config/ia64/t-ia64 +++ b/libgcc/config/ia64/t-ia64 @@ -1,3 +1,16 @@ +LIB1ASMSRC = ia64/lib1funcs.S + +# We use different names for the DImode div/mod files so that they won't +# conflict with libgcc2.c files. We used to use __ia64 as a prefix, now +# we use __ as the prefix. Note that L_divdi3 in libgcc2.c actually defines +# a TImode divide function, so there is no actual overlap here between +# libgcc2.c and lib1funcs.S. +LIB1ASMFUNCS = __divxf3 __divdf3 __divsf3 \ + __divdi3 __moddi3 __udivdi3 __umoddi3 \ + __divsi3 __modsi3 __udivsi3 __umodsi3 __save_stack_nonlocal \ + __nonlocal_goto __restore_stack_nonlocal __trampoline \ + _fixtfdi _fixunstfdi _floatditf + CUSTOM_CRTSTUFF = yes # Assemble startup files. diff --git a/libgcc/config/ia64/t-softfp-compat b/libgcc/config/ia64/t-softfp-compat index d3dad68c48f..00f45d51cd0 100644 --- a/libgcc/config/ia64/t-softfp-compat +++ b/libgcc/config/ia64/t-softfp-compat @@ -3,5 +3,5 @@ # Replace __dvxf3 _fixtfdi _fixunstfdi _floatditf libgcc1-tf-functions = __divxf3 _fixtfdi _fixunstfdi _floatditf LIB1ASMFUNCS := $(filter-out $(libgcc1-tf-functions), $(LIB1ASMFUNCS)) -libgcc1-tf-compats = $(addsuffix .asm, $(libgcc1-tf-functions)) +libgcc1-tf-compats = $(addsuffix .S, $(libgcc1-tf-functions)) LIB2ADD += $(addprefix $(srcdir)/config/ia64/, $(libgcc1-tf-compats)) diff --git a/libgcc/config/m32c/lib1funcs.S b/libgcc/config/m32c/lib1funcs.S new file mode 100644 index 00000000000..9b657787187 --- /dev/null +++ b/libgcc/config/m32c/lib1funcs.S @@ -0,0 +1,231 @@ +/* libgcc routines for R8C/M16C/M32C + Copyright (C) 2005, 2009, 2010 + Free Software Foundation, Inc. + Contributed by Red Hat. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#if defined(__r8c_cpu__) || defined(__m16c_cpu__) +#define A16 +#define A(n,w) n +#define W w +#else +#define A24 +#define A(n,w) w +#define W l +#endif + + +#ifdef L__m32c_memregs + +/* Warning: these memory locations are used as a register bank. They + *must* end up consecutive in any final executable, so you may *not* + use the otherwise obvious ".comm" directive to allocate space for + them. */ + + .bss + .global mem0 +mem0: .space 1 + .global mem1 +mem1: .space 1 + .global mem2 +mem2: .space 1 + .global mem3 +mem3: .space 1 + .global mem4 +mem4: .space 1 + .global mem5 +mem5: .space 1 + .global mem6 +mem6: .space 1 + .global mem7 +mem7: .space 1 + .global mem8 +mem8: .space 1 + .global mem9 +mem9: .space 1 + .global mem10 +mem10: .space 1 + .global mem11 +mem11: .space 1 + .global mem12 +mem12: .space 1 + .global mem13 +mem13: .space 1 + .global mem14 +mem14: .space 1 + .global mem15 +mem15: .space 1 + +#endif + +#ifdef L__m32c_eh_return + .text + .global __m32c_eh_return +__m32c_eh_return: + + /* At this point, r0 has the stack adjustment, r1r3 has the + address to return to. The stack looks like this: + + old_ra + old_fp + <- unwound sp + ... + fb + through + r0 + <- sp + + What we need to do is restore all the registers, update the + stack, and return to the right place. + */ + + stc sp,a0 + + add.W A(#16,#24),a0 + /* a0 points to the current stack, just above the register + save areas */ + + mov.w a0,a1 + exts.w r0 + sub.W A(r0,r2r0),a1 + sub.W A(#3,#4),a1 + /* a1 points to the new stack. */ + + /* This is for the "rts" below. */ + mov.w r1,[a1] +#ifdef A16 + mov.w r2,r1 + mov.b r1l,2[a1] +#else + mov.w r2,2[a1] +#endif + + /* This is for the "popc sp" below. */ + mov.W a1,[a0] + + popm r0,r1,r2,r3,a0,a1,sb,fb + popc sp + rts +#endif + +/* SImode arguments for SI foo(SI,SI) functions. */ +#ifdef A16 +#define SAL 5[fb] +#define SAH 7[fb] +#define SBL 9[fb] +#define SBH 11[fb] +#else +#define SAL 8[fb] +#define SAH 10[fb] +#define SBL 12[fb] +#define SBH 14[fb] +#endif + +#ifdef L__m32c_mulsi3 + .text + .global ___mulsi3 +___mulsi3: + enter #0 + push.w r2 + mov.w SAL,r0 + mulu.w SBL,r0 /* writes to r2r0 */ + mov.w r0,mem0 + mov.w r2,mem2 + mov.w SAL,r0 + mulu.w SBH,r0 /* writes to r2r0 */ + add.w r0,mem2 + mov.w SAH,r0 + mulu.w SBL,r0 /* writes to r2r0 */ + add.w r0,mem2 + pop.w r2 + exitd +#endif + +#ifdef L__m32c_cmpsi2 + .text + .global ___cmpsi2 +___cmpsi2: + enter #0 + cmp.w SBH,SAH + jgt cmpsi_gt + jlt cmpsi_lt + cmp.w SBL,SAL + jgt cmpsi_gt + jlt cmpsi_lt + mov.w #1,r0 + exitd +cmpsi_gt: + mov.w #2,r0 + exitd +cmpsi_lt: + mov.w #0,r0 + exitd +#endif + +#ifdef L__m32c_ucmpsi2 + .text + .global ___ucmpsi2 +___ucmpsi2: + enter #0 + cmp.w SBH,SAH + jgtu cmpsi_gt + jltu cmpsi_lt + cmp.w SBL,SAL + jgtu cmpsi_gt + jltu cmpsi_lt + mov.w #1,r0 + exitd +cmpsi_gt: + mov.w #2,r0 + exitd +cmpsi_lt: + mov.w #0,r0 + exitd +#endif + +#ifdef L__m32c_jsri16 + .text +#ifdef A16 + .global m32c_jsri16 +m32c_jsri16: + add.w #-1, sp + + /* Read the address (16 bits) and return address (24 bits) off + the stack. */ + mov.w 4[sp], r0 + mov.w 1[sp], r3 + mov.b 3[sp], a0 /* This zero-extends, so the high byte has + zero in it. */ + + /* Write the return address, then new address, to the stack. */ + mov.w a0, 1[sp] /* Just to get the zero in 2[sp]. */ + mov.w r0, 0[sp] + mov.w r3, 3[sp] + mov.b a0, 5[sp] + + /* This "returns" to the target address, leaving the pending + return address on the stack. */ + rts +#endif + +#endif diff --git a/libgcc/config/m32c/t-m32c b/libgcc/config/m32c/t-m32c new file mode 100644 index 00000000000..d21483750fd --- /dev/null +++ b/libgcc/config/m32c/t-m32c @@ -0,0 +1,9 @@ +LIB1ASMSRC = m32c/lib1funcs.S + +LIB1ASMFUNCS = \ + __m32c_memregs \ + __m32c_eh_return \ + __m32c_mulsi3 \ + __m32c_cmpsi2 \ + __m32c_ucmpsi2 \ + __m32c_jsri16 diff --git a/libgcc/config/m32r/initfini.c b/libgcc/config/m32r/initfini.c index 6e7d58614c7..56332459223 100644 --- a/libgcc/config/m32r/initfini.c +++ b/libgcc/config/m32r/initfini.c @@ -1,5 +1,5 @@ /* .init/.fini section handling + C++ global constructor/destructor handling. - This file is based on crtstuff.c, sol2-crti.asm, sol2-crtn.asm. + This file is based on crtstuff.c, sol2-crti.S, sol2-crtn.S. Copyright (C) 1996, 1997, 1998, 2006, 2009 Free Software Foundation, Inc. diff --git a/libgcc/config/m68k/lb1sf68.S b/libgcc/config/m68k/lb1sf68.S new file mode 100644 index 00000000000..0339a092c4f --- /dev/null +++ b/libgcc/config/m68k/lb1sf68.S @@ -0,0 +1,4116 @@ +/* libgcc routines for 68000 w/o floating-point hardware. + Copyright (C) 1994, 1996, 1997, 1998, 2008, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* Use this one for any 680x0; assumes no floating point hardware. + The trailing " '" appearing on some lines is for ANSI preprocessors. Yuk. + Some of this code comes from MINIX, via the folks at ericsson. + D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992 +*/ + +/* These are predefined by new versions of GNU cpp. */ + +#ifndef __USER_LABEL_PREFIX__ +#define __USER_LABEL_PREFIX__ _ +#endif + +#ifndef __REGISTER_PREFIX__ +#define __REGISTER_PREFIX__ +#endif + +#ifndef __IMMEDIATE_PREFIX__ +#define __IMMEDIATE_PREFIX__ # +#endif + +/* ANSI concatenation macros. */ + +#define CONCAT1(a, b) CONCAT2(a, b) +#define CONCAT2(a, b) a ## b + +/* Use the right prefix for global labels. */ + +#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x) + +/* Note that X is a function. */ + +#ifdef __ELF__ +#define FUNC(x) .type SYM(x),function +#else +/* The .proc pseudo-op is accepted, but ignored, by GAS. We could just + define this to the empty string for non-ELF systems, but defining it + to .proc means that the information is available to the assembler if + the need arises. */ +#define FUNC(x) .proc +#endif + +/* Use the right prefix for registers. */ + +#define REG(x) CONCAT1 (__REGISTER_PREFIX__, x) + +/* Use the right prefix for immediate values. */ + +#define IMM(x) CONCAT1 (__IMMEDIATE_PREFIX__, x) + +#define d0 REG (d0) +#define d1 REG (d1) +#define d2 REG (d2) +#define d3 REG (d3) +#define d4 REG (d4) +#define d5 REG (d5) +#define d6 REG (d6) +#define d7 REG (d7) +#define a0 REG (a0) +#define a1 REG (a1) +#define a2 REG (a2) +#define a3 REG (a3) +#define a4 REG (a4) +#define a5 REG (a5) +#define a6 REG (a6) +#define fp REG (fp) +#define sp REG (sp) +#define pc REG (pc) + +/* Provide a few macros to allow for PIC code support. + * With PIC, data is stored A5 relative so we've got to take a bit of special + * care to ensure that all loads of global data is via A5. PIC also requires + * jumps and subroutine calls to be PC relative rather than absolute. We cheat + * a little on this and in the PIC case, we use short offset branches and + * hope that the final object code is within range (which it should be). + */ +#ifndef __PIC__ + + /* Non PIC (absolute/relocatable) versions */ + + .macro PICCALL addr + jbsr \addr + .endm + + .macro PICJUMP addr + jmp \addr + .endm + + .macro PICLEA sym, reg + lea \sym, \reg + .endm + + .macro PICPEA sym, areg + pea \sym + .endm + +#else /* __PIC__ */ + +# if defined (__uClinux__) + + /* Versions for uClinux */ + +# if defined(__ID_SHARED_LIBRARY__) + + /* -mid-shared-library versions */ + + .macro PICLEA sym, reg + movel a5@(_current_shared_library_a5_offset_), \reg + movel \sym@GOT(\reg), \reg + .endm + + .macro PICPEA sym, areg + movel a5@(_current_shared_library_a5_offset_), \areg + movel \sym@GOT(\areg), sp@- + .endm + + .macro PICCALL addr + PICLEA \addr,a0 + jsr a0@ + .endm + + .macro PICJUMP addr + PICLEA \addr,a0 + jmp a0@ + .endm + +# else /* !__ID_SHARED_LIBRARY__ */ + + /* Versions for -msep-data */ + + .macro PICLEA sym, reg + movel \sym@GOT(a5), \reg + .endm + + .macro PICPEA sym, areg + movel \sym@GOT(a5), sp@- + .endm + + .macro PICCALL addr +#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__) + lea \addr-.-8,a0 + jsr pc@(a0) +#else + jbsr \addr +#endif + .endm + + .macro PICJUMP addr + /* ISA C has no bra.l instruction, and since this assembly file + gets assembled into multiple object files, we avoid the + bra instruction entirely. */ +#if defined (__mcoldfire__) && !defined (__mcfisab__) + lea \addr-.-8,a0 + jmp pc@(a0) +#else + bra \addr +#endif + .endm + +# endif + +# else /* !__uClinux__ */ + + /* Versions for Linux */ + + .macro PICLEA sym, reg + movel #_GLOBAL_OFFSET_TABLE_@GOTPC, \reg + lea (-6, pc, \reg), \reg + movel \sym@GOT(\reg), \reg + .endm + + .macro PICPEA sym, areg + movel #_GLOBAL_OFFSET_TABLE_@GOTPC, \areg + lea (-6, pc, \areg), \areg + movel \sym@GOT(\areg), sp@- + .endm + + .macro PICCALL addr +#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__) + lea \addr-.-8,a0 + jsr pc@(a0) +#else + jbsr \addr +#endif + .endm + + .macro PICJUMP addr + /* ISA C has no bra.l instruction, and since this assembly file + gets assembled into multiple object files, we avoid the + bra instruction entirely. */ +#if defined (__mcoldfire__) && !defined (__mcfisab__) + lea \addr-.-8,a0 + jmp pc@(a0) +#else + bra \addr +#endif + .endm + +# endif +#endif /* __PIC__ */ + + +#ifdef L_floatex + +| This is an attempt at a decent floating point (single, double and +| extended double) code for the GNU C compiler. It should be easy to +| adapt to other compilers (but beware of the local labels!). + +| Starting date: 21 October, 1990 + +| It is convenient to introduce the notation (s,e,f) for a floating point +| number, where s=sign, e=exponent, f=fraction. We will call a floating +| point number fpn to abbreviate, independently of the precision. +| Let MAX_EXP be in each case the maximum exponent (255 for floats, 1023 +| for doubles and 16383 for long doubles). We then have the following +| different cases: +| 1. Normalized fpns have 0 < e < MAX_EXP. They correspond to +| (-1)^s x 1.f x 2^(e-bias-1). +| 2. Denormalized fpns have e=0. They correspond to numbers of the form +| (-1)^s x 0.f x 2^(-bias). +| 3. +/-INFINITY have e=MAX_EXP, f=0. +| 4. Quiet NaN (Not a Number) have all bits set. +| 5. Signaling NaN (Not a Number) have s=0, e=MAX_EXP, f=1. + +|============================================================================= +| exceptions +|============================================================================= + +| This is the floating point condition code register (_fpCCR): +| +| struct { +| short _exception_bits; +| short _trap_enable_bits; +| short _sticky_bits; +| short _rounding_mode; +| short _format; +| short _last_operation; +| union { +| float sf; +| double df; +| } _operand1; +| union { +| float sf; +| double df; +| } _operand2; +| } _fpCCR; + + .data + .even + + .globl SYM (_fpCCR) + +SYM (_fpCCR): +__exception_bits: + .word 0 +__trap_enable_bits: + .word 0 +__sticky_bits: + .word 0 +__rounding_mode: + .word ROUND_TO_NEAREST +__format: + .word NIL +__last_operation: + .word NOOP +__operand1: + .long 0 + .long 0 +__operand2: + .long 0 + .long 0 + +| Offsets: +EBITS = __exception_bits - SYM (_fpCCR) +TRAPE = __trap_enable_bits - SYM (_fpCCR) +STICK = __sticky_bits - SYM (_fpCCR) +ROUND = __rounding_mode - SYM (_fpCCR) +FORMT = __format - SYM (_fpCCR) +LASTO = __last_operation - SYM (_fpCCR) +OPER1 = __operand1 - SYM (_fpCCR) +OPER2 = __operand2 - SYM (_fpCCR) + +| The following exception types are supported: +INEXACT_RESULT = 0x0001 +UNDERFLOW = 0x0002 +OVERFLOW = 0x0004 +DIVIDE_BY_ZERO = 0x0008 +INVALID_OPERATION = 0x0010 + +| The allowed rounding modes are: +UNKNOWN = -1 +ROUND_TO_NEAREST = 0 | round result to nearest representable value +ROUND_TO_ZERO = 1 | round result towards zero +ROUND_TO_PLUS = 2 | round result towards plus infinity +ROUND_TO_MINUS = 3 | round result towards minus infinity + +| The allowed values of format are: +NIL = 0 +SINGLE_FLOAT = 1 +DOUBLE_FLOAT = 2 +LONG_FLOAT = 3 + +| The allowed values for the last operation are: +NOOP = 0 +ADD = 1 +MULTIPLY = 2 +DIVIDE = 3 +NEGATE = 4 +COMPARE = 5 +EXTENDSFDF = 6 +TRUNCDFSF = 7 + +|============================================================================= +| __clear_sticky_bits +|============================================================================= + +| The sticky bits are normally not cleared (thus the name), whereas the +| exception type and exception value reflect the last computation. +| This routine is provided to clear them (you can also write to _fpCCR, +| since it is globally visible). + + .globl SYM (__clear_sticky_bit) + + .text + .even + +| void __clear_sticky_bits(void); +SYM (__clear_sticky_bit): + PICLEA SYM (_fpCCR),a0 +#ifndef __mcoldfire__ + movew IMM (0),a0@(STICK) +#else + clr.w a0@(STICK) +#endif + rts + +|============================================================================= +| $_exception_handler +|============================================================================= + + .globl $_exception_handler + + .text + .even + +| This is the common exit point if an exception occurs. +| NOTE: it is NOT callable from C! +| It expects the exception type in d7, the format (SINGLE_FLOAT, +| DOUBLE_FLOAT or LONG_FLOAT) in d6, and the last operation code in d5. +| It sets the corresponding exception and sticky bits, and the format. +| Depending on the format if fills the corresponding slots for the +| operands which produced the exception (all this information is provided +| so if you write your own exception handlers you have enough information +| to deal with the problem). +| Then checks to see if the corresponding exception is trap-enabled, +| in which case it pushes the address of _fpCCR and traps through +| trap FPTRAP (15 for the moment). + +FPTRAP = 15 + +$_exception_handler: + PICLEA SYM (_fpCCR),a0 + movew d7,a0@(EBITS) | set __exception_bits +#ifndef __mcoldfire__ + orw d7,a0@(STICK) | and __sticky_bits +#else + movew a0@(STICK),d4 + orl d7,d4 + movew d4,a0@(STICK) +#endif + movew d6,a0@(FORMT) | and __format + movew d5,a0@(LASTO) | and __last_operation + +| Now put the operands in place: +#ifndef __mcoldfire__ + cmpw IMM (SINGLE_FLOAT),d6 +#else + cmpl IMM (SINGLE_FLOAT),d6 +#endif + beq 1f + movel a6@(8),a0@(OPER1) + movel a6@(12),a0@(OPER1+4) + movel a6@(16),a0@(OPER2) + movel a6@(20),a0@(OPER2+4) + bra 2f +1: movel a6@(8),a0@(OPER1) + movel a6@(12),a0@(OPER2) +2: +| And check whether the exception is trap-enabled: +#ifndef __mcoldfire__ + andw a0@(TRAPE),d7 | is exception trap-enabled? +#else + clrl d6 + movew a0@(TRAPE),d6 + andl d6,d7 +#endif + beq 1f | no, exit + PICPEA SYM (_fpCCR),a1 | yes, push address of _fpCCR + trap IMM (FPTRAP) | and trap +#ifndef __mcoldfire__ +1: moveml sp@+,d2-d7 | restore data registers +#else +1: moveml sp@,d2-d7 + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 | and return + rts +#endif /* L_floatex */ + +#ifdef L_mulsi3 + .text + FUNC(__mulsi3) + .globl SYM (__mulsi3) +SYM (__mulsi3): + movew sp@(4), d0 /* x0 -> d0 */ + muluw sp@(10), d0 /* x0*y1 */ + movew sp@(6), d1 /* x1 -> d1 */ + muluw sp@(8), d1 /* x1*y0 */ +#ifndef __mcoldfire__ + addw d1, d0 +#else + addl d1, d0 +#endif + swap d0 + clrw d0 + movew sp@(6), d1 /* x1 -> d1 */ + muluw sp@(10), d1 /* x1*y1 */ + addl d1, d0 + + rts +#endif /* L_mulsi3 */ + +#ifdef L_udivsi3 + .text + FUNC(__udivsi3) + .globl SYM (__udivsi3) +SYM (__udivsi3): +#ifndef __mcoldfire__ + movel d2, sp@- + movel sp@(12), d1 /* d1 = divisor */ + movel sp@(8), d0 /* d0 = dividend */ + + cmpl IMM (0x10000), d1 /* divisor >= 2 ^ 16 ? */ + jcc L3 /* then try next algorithm */ + movel d0, d2 + clrw d2 + swap d2 + divu d1, d2 /* high quotient in lower word */ + movew d2, d0 /* save high quotient */ + swap d0 + movew sp@(10), d2 /* get low dividend + high rest */ + divu d1, d2 /* low quotient */ + movew d2, d0 + jra L6 + +L3: movel d1, d2 /* use d2 as divisor backup */ +L4: lsrl IMM (1), d1 /* shift divisor */ + lsrl IMM (1), d0 /* shift dividend */ + cmpl IMM (0x10000), d1 /* still divisor >= 2 ^ 16 ? */ + jcc L4 + divu d1, d0 /* now we have 16-bit divisor */ + andl IMM (0xffff), d0 /* mask out divisor, ignore remainder */ + +/* Multiply the 16-bit tentative quotient with the 32-bit divisor. Because of + the operand ranges, this might give a 33-bit product. If this product is + greater than the dividend, the tentative quotient was too large. */ + movel d2, d1 + mulu d0, d1 /* low part, 32 bits */ + swap d2 + mulu d0, d2 /* high part, at most 17 bits */ + swap d2 /* align high part with low part */ + tstw d2 /* high part 17 bits? */ + jne L5 /* if 17 bits, quotient was too large */ + addl d2, d1 /* add parts */ + jcs L5 /* if sum is 33 bits, quotient was too large */ + cmpl sp@(8), d1 /* compare the sum with the dividend */ + jls L6 /* if sum > dividend, quotient was too large */ +L5: subql IMM (1), d0 /* adjust quotient */ + +L6: movel sp@+, d2 + rts + +#else /* __mcoldfire__ */ + +/* ColdFire implementation of non-restoring division algorithm from + Hennessy & Patterson, Appendix A. */ + link a6,IMM (-12) + moveml d2-d4,sp@ + movel a6@(8),d0 + movel a6@(12),d1 + clrl d2 | clear p + moveq IMM (31),d4 +L1: addl d0,d0 | shift reg pair (p,a) one bit left + addxl d2,d2 + movl d2,d3 | subtract b from p, store in tmp. + subl d1,d3 + jcs L2 | if no carry, + bset IMM (0),d0 | set the low order bit of a to 1, + movl d3,d2 | and store tmp in p. +L2: subql IMM (1),d4 + jcc L1 + moveml sp@,d2-d4 | restore data registers + unlk a6 | and return + rts +#endif /* __mcoldfire__ */ + +#endif /* L_udivsi3 */ + +#ifdef L_divsi3 + .text + FUNC(__divsi3) + .globl SYM (__divsi3) +SYM (__divsi3): + movel d2, sp@- + + moveq IMM (1), d2 /* sign of result stored in d2 (=1 or =-1) */ + movel sp@(12), d1 /* d1 = divisor */ + jpl L1 + negl d1 +#ifndef __mcoldfire__ + negb d2 /* change sign because divisor <0 */ +#else + negl d2 /* change sign because divisor <0 */ +#endif +L1: movel sp@(8), d0 /* d0 = dividend */ + jpl L2 + negl d0 +#ifndef __mcoldfire__ + negb d2 +#else + negl d2 +#endif + +L2: movel d1, sp@- + movel d0, sp@- + PICCALL SYM (__udivsi3) /* divide abs(dividend) by abs(divisor) */ + addql IMM (8), sp + + tstb d2 + jpl L3 + negl d0 + +L3: movel sp@+, d2 + rts +#endif /* L_divsi3 */ + +#ifdef L_umodsi3 + .text + FUNC(__umodsi3) + .globl SYM (__umodsi3) +SYM (__umodsi3): + movel sp@(8), d1 /* d1 = divisor */ + movel sp@(4), d0 /* d0 = dividend */ + movel d1, sp@- + movel d0, sp@- + PICCALL SYM (__udivsi3) + addql IMM (8), sp + movel sp@(8), d1 /* d1 = divisor */ +#ifndef __mcoldfire__ + movel d1, sp@- + movel d0, sp@- + PICCALL SYM (__mulsi3) /* d0 = (a/b)*b */ + addql IMM (8), sp +#else + mulsl d1,d0 +#endif + movel sp@(4), d1 /* d1 = dividend */ + subl d0, d1 /* d1 = a - (a/b)*b */ + movel d1, d0 + rts +#endif /* L_umodsi3 */ + +#ifdef L_modsi3 + .text + FUNC(__modsi3) + .globl SYM (__modsi3) +SYM (__modsi3): + movel sp@(8), d1 /* d1 = divisor */ + movel sp@(4), d0 /* d0 = dividend */ + movel d1, sp@- + movel d0, sp@- + PICCALL SYM (__divsi3) + addql IMM (8), sp + movel sp@(8), d1 /* d1 = divisor */ +#ifndef __mcoldfire__ + movel d1, sp@- + movel d0, sp@- + PICCALL SYM (__mulsi3) /* d0 = (a/b)*b */ + addql IMM (8), sp +#else + mulsl d1,d0 +#endif + movel sp@(4), d1 /* d1 = dividend */ + subl d0, d1 /* d1 = a - (a/b)*b */ + movel d1, d0 + rts +#endif /* L_modsi3 */ + + +#ifdef L_double + + .globl SYM (_fpCCR) + .globl $_exception_handler + +QUIET_NaN = 0xffffffff + +D_MAX_EXP = 0x07ff +D_BIAS = 1022 +DBL_MAX_EXP = D_MAX_EXP - D_BIAS +DBL_MIN_EXP = 1 - D_BIAS +DBL_MANT_DIG = 53 + +INEXACT_RESULT = 0x0001 +UNDERFLOW = 0x0002 +OVERFLOW = 0x0004 +DIVIDE_BY_ZERO = 0x0008 +INVALID_OPERATION = 0x0010 + +DOUBLE_FLOAT = 2 + +NOOP = 0 +ADD = 1 +MULTIPLY = 2 +DIVIDE = 3 +NEGATE = 4 +COMPARE = 5 +EXTENDSFDF = 6 +TRUNCDFSF = 7 + +UNKNOWN = -1 +ROUND_TO_NEAREST = 0 | round result to nearest representable value +ROUND_TO_ZERO = 1 | round result towards zero +ROUND_TO_PLUS = 2 | round result towards plus infinity +ROUND_TO_MINUS = 3 | round result towards minus infinity + +| Entry points: + + .globl SYM (__adddf3) + .globl SYM (__subdf3) + .globl SYM (__muldf3) + .globl SYM (__divdf3) + .globl SYM (__negdf2) + .globl SYM (__cmpdf2) + .globl SYM (__cmpdf2_internal) + .hidden SYM (__cmpdf2_internal) + + .text + .even + +| These are common routines to return and signal exceptions. + +Ld$den: +| Return and signal a denormalized number + orl d7,d0 + movew IMM (INEXACT_RESULT+UNDERFLOW),d7 + moveq IMM (DOUBLE_FLOAT),d6 + PICJUMP $_exception_handler + +Ld$infty: +Ld$overflow: +| Return a properly signed INFINITY and set the exception flags + movel IMM (0x7ff00000),d0 + movel IMM (0),d1 + orl d7,d0 + movew IMM (INEXACT_RESULT+OVERFLOW),d7 + moveq IMM (DOUBLE_FLOAT),d6 + PICJUMP $_exception_handler + +Ld$underflow: +| Return 0 and set the exception flags + movel IMM (0),d0 + movel d0,d1 + movew IMM (INEXACT_RESULT+UNDERFLOW),d7 + moveq IMM (DOUBLE_FLOAT),d6 + PICJUMP $_exception_handler + +Ld$inop: +| Return a quiet NaN and set the exception flags + movel IMM (QUIET_NaN),d0 + movel d0,d1 + movew IMM (INEXACT_RESULT+INVALID_OPERATION),d7 + moveq IMM (DOUBLE_FLOAT),d6 + PICJUMP $_exception_handler + +Ld$div$0: +| Return a properly signed INFINITY and set the exception flags + movel IMM (0x7ff00000),d0 + movel IMM (0),d1 + orl d7,d0 + movew IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7 + moveq IMM (DOUBLE_FLOAT),d6 + PICJUMP $_exception_handler + +|============================================================================= +|============================================================================= +| double precision routines +|============================================================================= +|============================================================================= + +| A double precision floating point number (double) has the format: +| +| struct _double { +| unsigned int sign : 1; /* sign bit */ +| unsigned int exponent : 11; /* exponent, shifted by 126 */ +| unsigned int fraction : 52; /* fraction */ +| } double; +| +| Thus sizeof(double) = 8 (64 bits). +| +| All the routines are callable from C programs, and return the result +| in the register pair d0-d1. They also preserve all registers except +| d0-d1 and a0-a1. + +|============================================================================= +| __subdf3 +|============================================================================= + +| double __subdf3(double, double); + FUNC(__subdf3) +SYM (__subdf3): + bchg IMM (31),sp@(12) | change sign of second operand + | and fall through, so we always add +|============================================================================= +| __adddf3 +|============================================================================= + +| double __adddf3(double, double); + FUNC(__adddf3) +SYM (__adddf3): +#ifndef __mcoldfire__ + link a6,IMM (0) | everything will be done in registers + moveml d2-d7,sp@- | save all data registers and a2 (but d0-d1) +#else + link a6,IMM (-24) + moveml d2-d7,sp@ +#endif + movel a6@(8),d0 | get first operand + movel a6@(12),d1 | + movel a6@(16),d2 | get second operand + movel a6@(20),d3 | + + movel d0,d7 | get d0's sign bit in d7 ' + addl d1,d1 | check and clear sign bit of a, and gain one + addxl d0,d0 | bit of extra precision + beq Ladddf$b | if zero return second operand + + movel d2,d6 | save sign in d6 + addl d3,d3 | get rid of sign bit and gain one bit of + addxl d2,d2 | extra precision + beq Ladddf$a | if zero return first operand + + andl IMM (0x80000000),d7 | isolate a's sign bit ' + swap d6 | and also b's sign bit ' +#ifndef __mcoldfire__ + andw IMM (0x8000),d6 | + orw d6,d7 | and combine them into d7, so that a's sign ' + | bit is in the high word and b's is in the ' + | low word, so d6 is free to be used +#else + andl IMM (0x8000),d6 + orl d6,d7 +#endif + movel d7,a0 | now save d7 into a0, so d7 is free to + | be used also + +| Get the exponents and check for denormalized and/or infinity. + + movel IMM (0x001fffff),d6 | mask for the fraction + movel IMM (0x00200000),d7 | mask to put hidden bit back + + movel d0,d4 | + andl d6,d0 | get fraction in d0 + notl d6 | make d6 into mask for the exponent + andl d6,d4 | get exponent in d4 + beq Ladddf$a$den | branch if a is denormalized + cmpl d6,d4 | check for INFINITY or NaN + beq Ladddf$nf | + orl d7,d0 | and put hidden bit back +Ladddf$1: + swap d4 | shift right exponent so that it starts +#ifndef __mcoldfire__ + lsrw IMM (5),d4 | in bit 0 and not bit 20 +#else + lsrl IMM (5),d4 | in bit 0 and not bit 20 +#endif +| Now we have a's exponent in d4 and fraction in d0-d1 ' + movel d2,d5 | save b to get exponent + andl d6,d5 | get exponent in d5 + beq Ladddf$b$den | branch if b is denormalized + cmpl d6,d5 | check for INFINITY or NaN + beq Ladddf$nf + notl d6 | make d6 into mask for the fraction again + andl d6,d2 | and get fraction in d2 + orl d7,d2 | and put hidden bit back +Ladddf$2: + swap d5 | shift right exponent so that it starts +#ifndef __mcoldfire__ + lsrw IMM (5),d5 | in bit 0 and not bit 20 +#else + lsrl IMM (5),d5 | in bit 0 and not bit 20 +#endif + +| Now we have b's exponent in d5 and fraction in d2-d3. ' + +| The situation now is as follows: the signs are combined in a0, the +| numbers are in d0-d1 (a) and d2-d3 (b), and the exponents in d4 (a) +| and d5 (b). To do the rounding correctly we need to keep all the +| bits until the end, so we need to use d0-d1-d2-d3 for the first number +| and d4-d5-d6-d7 for the second. To do this we store (temporarily) the +| exponents in a2-a3. + +#ifndef __mcoldfire__ + moveml a2-a3,sp@- | save the address registers +#else + movel a2,sp@- + movel a3,sp@- + movel a4,sp@- +#endif + + movel d4,a2 | save the exponents + movel d5,a3 | + + movel IMM (0),d7 | and move the numbers around + movel d7,d6 | + movel d3,d5 | + movel d2,d4 | + movel d7,d3 | + movel d7,d2 | + +| Here we shift the numbers until the exponents are the same, and put +| the largest exponent in a2. +#ifndef __mcoldfire__ + exg d4,a2 | get exponents back + exg d5,a3 | + cmpw d4,d5 | compare the exponents +#else + movel d4,a4 | get exponents back + movel a2,d4 + movel a4,a2 + movel d5,a4 + movel a3,d5 + movel a4,a3 + cmpl d4,d5 | compare the exponents +#endif + beq Ladddf$3 | if equal don't shift ' + bhi 9f | branch if second exponent is higher + +| Here we have a's exponent larger than b's, so we have to shift b. We do +| this by using as counter d2: +1: movew d4,d2 | move largest exponent to d2 +#ifndef __mcoldfire__ + subw d5,d2 | and subtract second exponent + exg d4,a2 | get back the longs we saved + exg d5,a3 | +#else + subl d5,d2 | and subtract second exponent + movel d4,a4 | get back the longs we saved + movel a2,d4 + movel a4,a2 + movel d5,a4 + movel a3,d5 + movel a4,a3 +#endif +| if difference is too large we don't shift (actually, we can just exit) ' +#ifndef __mcoldfire__ + cmpw IMM (DBL_MANT_DIG+2),d2 +#else + cmpl IMM (DBL_MANT_DIG+2),d2 +#endif + bge Ladddf$b$small +#ifndef __mcoldfire__ + cmpw IMM (32),d2 | if difference >= 32, shift by longs +#else + cmpl IMM (32),d2 | if difference >= 32, shift by longs +#endif + bge 5f +2: +#ifndef __mcoldfire__ + cmpw IMM (16),d2 | if difference >= 16, shift by words +#else + cmpl IMM (16),d2 | if difference >= 16, shift by words +#endif + bge 6f + bra 3f | enter dbra loop + +4: +#ifndef __mcoldfire__ + lsrl IMM (1),d4 + roxrl IMM (1),d5 + roxrl IMM (1),d6 + roxrl IMM (1),d7 +#else + lsrl IMM (1),d7 + btst IMM (0),d6 + beq 10f + bset IMM (31),d7 +10: lsrl IMM (1),d6 + btst IMM (0),d5 + beq 11f + bset IMM (31),d6 +11: lsrl IMM (1),d5 + btst IMM (0),d4 + beq 12f + bset IMM (31),d5 +12: lsrl IMM (1),d4 +#endif +3: +#ifndef __mcoldfire__ + dbra d2,4b +#else + subql IMM (1),d2 + bpl 4b +#endif + movel IMM (0),d2 + movel d2,d3 + bra Ladddf$4 +5: + movel d6,d7 + movel d5,d6 + movel d4,d5 + movel IMM (0),d4 +#ifndef __mcoldfire__ + subw IMM (32),d2 +#else + subl IMM (32),d2 +#endif + bra 2b +6: + movew d6,d7 + swap d7 + movew d5,d6 + swap d6 + movew d4,d5 + swap d5 + movew IMM (0),d4 + swap d4 +#ifndef __mcoldfire__ + subw IMM (16),d2 +#else + subl IMM (16),d2 +#endif + bra 3b + +9: +#ifndef __mcoldfire__ + exg d4,d5 + movew d4,d6 + subw d5,d6 | keep d5 (largest exponent) in d4 + exg d4,a2 + exg d5,a3 +#else + movel d5,d6 + movel d4,d5 + movel d6,d4 + subl d5,d6 + movel d4,a4 + movel a2,d4 + movel a4,a2 + movel d5,a4 + movel a3,d5 + movel a4,a3 +#endif +| if difference is too large we don't shift (actually, we can just exit) ' +#ifndef __mcoldfire__ + cmpw IMM (DBL_MANT_DIG+2),d6 +#else + cmpl IMM (DBL_MANT_DIG+2),d6 +#endif + bge Ladddf$a$small +#ifndef __mcoldfire__ + cmpw IMM (32),d6 | if difference >= 32, shift by longs +#else + cmpl IMM (32),d6 | if difference >= 32, shift by longs +#endif + bge 5f +2: +#ifndef __mcoldfire__ + cmpw IMM (16),d6 | if difference >= 16, shift by words +#else + cmpl IMM (16),d6 | if difference >= 16, shift by words +#endif + bge 6f + bra 3f | enter dbra loop + +4: +#ifndef __mcoldfire__ + lsrl IMM (1),d0 + roxrl IMM (1),d1 + roxrl IMM (1),d2 + roxrl IMM (1),d3 +#else + lsrl IMM (1),d3 + btst IMM (0),d2 + beq 10f + bset IMM (31),d3 +10: lsrl IMM (1),d2 + btst IMM (0),d1 + beq 11f + bset IMM (31),d2 +11: lsrl IMM (1),d1 + btst IMM (0),d0 + beq 12f + bset IMM (31),d1 +12: lsrl IMM (1),d0 +#endif +3: +#ifndef __mcoldfire__ + dbra d6,4b +#else + subql IMM (1),d6 + bpl 4b +#endif + movel IMM (0),d7 + movel d7,d6 + bra Ladddf$4 +5: + movel d2,d3 + movel d1,d2 + movel d0,d1 + movel IMM (0),d0 +#ifndef __mcoldfire__ + subw IMM (32),d6 +#else + subl IMM (32),d6 +#endif + bra 2b +6: + movew d2,d3 + swap d3 + movew d1,d2 + swap d2 + movew d0,d1 + swap d1 + movew IMM (0),d0 + swap d0 +#ifndef __mcoldfire__ + subw IMM (16),d6 +#else + subl IMM (16),d6 +#endif + bra 3b +Ladddf$3: +#ifndef __mcoldfire__ + exg d4,a2 + exg d5,a3 +#else + movel d4,a4 + movel a2,d4 + movel a4,a2 + movel d5,a4 + movel a3,d5 + movel a4,a3 +#endif +Ladddf$4: +| Now we have the numbers in d0--d3 and d4--d7, the exponent in a2, and +| the signs in a4. + +| Here we have to decide whether to add or subtract the numbers: +#ifndef __mcoldfire__ + exg d7,a0 | get the signs + exg d6,a3 | a3 is free to be used +#else + movel d7,a4 + movel a0,d7 + movel a4,a0 + movel d6,a4 + movel a3,d6 + movel a4,a3 +#endif + movel d7,d6 | + movew IMM (0),d7 | get a's sign in d7 ' + swap d6 | + movew IMM (0),d6 | and b's sign in d6 ' + eorl d7,d6 | compare the signs + bmi Lsubdf$0 | if the signs are different we have + | to subtract +#ifndef __mcoldfire__ + exg d7,a0 | else we add the numbers + exg d6,a3 | +#else + movel d7,a4 + movel a0,d7 + movel a4,a0 + movel d6,a4 + movel a3,d6 + movel a4,a3 +#endif + addl d7,d3 | + addxl d6,d2 | + addxl d5,d1 | + addxl d4,d0 | + + movel a2,d4 | return exponent to d4 + movel a0,d7 | + andl IMM (0x80000000),d7 | d7 now has the sign + +#ifndef __mcoldfire__ + moveml sp@+,a2-a3 +#else + movel sp@+,a4 + movel sp@+,a3 + movel sp@+,a2 +#endif + +| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider +| the case of denormalized numbers in the rounding routine itself). +| As in the addition (not in the subtraction!) we could have set +| one more bit we check this: + btst IMM (DBL_MANT_DIG+1),d0 + beq 1f +#ifndef __mcoldfire__ + lsrl IMM (1),d0 + roxrl IMM (1),d1 + roxrl IMM (1),d2 + roxrl IMM (1),d3 + addw IMM (1),d4 +#else + lsrl IMM (1),d3 + btst IMM (0),d2 + beq 10f + bset IMM (31),d3 +10: lsrl IMM (1),d2 + btst IMM (0),d1 + beq 11f + bset IMM (31),d2 +11: lsrl IMM (1),d1 + btst IMM (0),d0 + beq 12f + bset IMM (31),d1 +12: lsrl IMM (1),d0 + addl IMM (1),d4 +#endif +1: + lea pc@(Ladddf$5),a0 | to return from rounding routine + PICLEA SYM (_fpCCR),a1 | check the rounding mode +#ifdef __mcoldfire__ + clrl d6 +#endif + movew a1@(6),d6 | rounding mode in d6 + beq Lround$to$nearest +#ifndef __mcoldfire__ + cmpw IMM (ROUND_TO_PLUS),d6 +#else + cmpl IMM (ROUND_TO_PLUS),d6 +#endif + bhi Lround$to$minus + blt Lround$to$zero + bra Lround$to$plus +Ladddf$5: +| Put back the exponent and check for overflow +#ifndef __mcoldfire__ + cmpw IMM (0x7ff),d4 | is the exponent big? +#else + cmpl IMM (0x7ff),d4 | is the exponent big? +#endif + bge 1f + bclr IMM (DBL_MANT_DIG-1),d0 +#ifndef __mcoldfire__ + lslw IMM (4),d4 | put exponent back into position +#else + lsll IMM (4),d4 | put exponent back into position +#endif + swap d0 | +#ifndef __mcoldfire__ + orw d4,d0 | +#else + orl d4,d0 | +#endif + swap d0 | + bra Ladddf$ret +1: + moveq IMM (ADD),d5 + bra Ld$overflow + +Lsubdf$0: +| Here we do the subtraction. +#ifndef __mcoldfire__ + exg d7,a0 | put sign back in a0 + exg d6,a3 | +#else + movel d7,a4 + movel a0,d7 + movel a4,a0 + movel d6,a4 + movel a3,d6 + movel a4,a3 +#endif + subl d7,d3 | + subxl d6,d2 | + subxl d5,d1 | + subxl d4,d0 | + beq Ladddf$ret$1 | if zero just exit + bpl 1f | if positive skip the following + movel a0,d7 | + bchg IMM (31),d7 | change sign bit in d7 + movel d7,a0 | + negl d3 | + negxl d2 | + negxl d1 | and negate result + negxl d0 | +1: + movel a2,d4 | return exponent to d4 + movel a0,d7 + andl IMM (0x80000000),d7 | isolate sign bit +#ifndef __mcoldfire__ + moveml sp@+,a2-a3 | +#else + movel sp@+,a4 + movel sp@+,a3 + movel sp@+,a2 +#endif + +| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider +| the case of denormalized numbers in the rounding routine itself). +| As in the addition (not in the subtraction!) we could have set +| one more bit we check this: + btst IMM (DBL_MANT_DIG+1),d0 + beq 1f +#ifndef __mcoldfire__ + lsrl IMM (1),d0 + roxrl IMM (1),d1 + roxrl IMM (1),d2 + roxrl IMM (1),d3 + addw IMM (1),d4 +#else + lsrl IMM (1),d3 + btst IMM (0),d2 + beq 10f + bset IMM (31),d3 +10: lsrl IMM (1),d2 + btst IMM (0),d1 + beq 11f + bset IMM (31),d2 +11: lsrl IMM (1),d1 + btst IMM (0),d0 + beq 12f + bset IMM (31),d1 +12: lsrl IMM (1),d0 + addl IMM (1),d4 +#endif +1: + lea pc@(Lsubdf$1),a0 | to return from rounding routine + PICLEA SYM (_fpCCR),a1 | check the rounding mode +#ifdef __mcoldfire__ + clrl d6 +#endif + movew a1@(6),d6 | rounding mode in d6 + beq Lround$to$nearest +#ifndef __mcoldfire__ + cmpw IMM (ROUND_TO_PLUS),d6 +#else + cmpl IMM (ROUND_TO_PLUS),d6 +#endif + bhi Lround$to$minus + blt Lround$to$zero + bra Lround$to$plus +Lsubdf$1: +| Put back the exponent and sign (we don't have overflow). ' + bclr IMM (DBL_MANT_DIG-1),d0 +#ifndef __mcoldfire__ + lslw IMM (4),d4 | put exponent back into position +#else + lsll IMM (4),d4 | put exponent back into position +#endif + swap d0 | +#ifndef __mcoldfire__ + orw d4,d0 | +#else + orl d4,d0 | +#endif + swap d0 | + bra Ladddf$ret + +| If one of the numbers was too small (difference of exponents >= +| DBL_MANT_DIG+1) we return the other (and now we don't have to ' +| check for finiteness or zero). +Ladddf$a$small: +#ifndef __mcoldfire__ + moveml sp@+,a2-a3 +#else + movel sp@+,a4 + movel sp@+,a3 + movel sp@+,a2 +#endif + movel a6@(16),d0 + movel a6@(20),d1 + PICLEA SYM (_fpCCR),a0 + movew IMM (0),a0@ +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 | restore data registers +#else + moveml sp@,d2-d7 + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 | and return + rts + +Ladddf$b$small: +#ifndef __mcoldfire__ + moveml sp@+,a2-a3 +#else + movel sp@+,a4 + movel sp@+,a3 + movel sp@+,a2 +#endif + movel a6@(8),d0 + movel a6@(12),d1 + PICLEA SYM (_fpCCR),a0 + movew IMM (0),a0@ +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 | restore data registers +#else + moveml sp@,d2-d7 + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 | and return + rts + +Ladddf$a$den: + movel d7,d4 | d7 contains 0x00200000 + bra Ladddf$1 + +Ladddf$b$den: + movel d7,d5 | d7 contains 0x00200000 + notl d6 + bra Ladddf$2 + +Ladddf$b: +| Return b (if a is zero) + movel d2,d0 + movel d3,d1 + bne 1f | Check if b is -0 + cmpl IMM (0x80000000),d0 + bne 1f + andl IMM (0x80000000),d7 | Use the sign of a + clrl d0 + bra Ladddf$ret +Ladddf$a: + movel a6@(8),d0 + movel a6@(12),d1 +1: + moveq IMM (ADD),d5 +| Check for NaN and +/-INFINITY. + movel d0,d7 | + andl IMM (0x80000000),d7 | + bclr IMM (31),d0 | + cmpl IMM (0x7ff00000),d0 | + bge 2f | + movel d0,d0 | check for zero, since we don't ' + bne Ladddf$ret | want to return -0 by mistake + bclr IMM (31),d7 | + bra Ladddf$ret | +2: + andl IMM (0x000fffff),d0 | check for NaN (nonzero fraction) + orl d1,d0 | + bne Ld$inop | + bra Ld$infty | + +Ladddf$ret$1: +#ifndef __mcoldfire__ + moveml sp@+,a2-a3 | restore regs and exit +#else + movel sp@+,a4 + movel sp@+,a3 + movel sp@+,a2 +#endif + +Ladddf$ret: +| Normal exit. + PICLEA SYM (_fpCCR),a0 + movew IMM (0),a0@ + orl d7,d0 | put sign bit back +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 +#else + moveml sp@,d2-d7 + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 + rts + +Ladddf$ret$den: +| Return a denormalized number. +#ifndef __mcoldfire__ + lsrl IMM (1),d0 | shift right once more + roxrl IMM (1),d1 | +#else + lsrl IMM (1),d1 + btst IMM (0),d0 + beq 10f + bset IMM (31),d1 +10: lsrl IMM (1),d0 +#endif + bra Ladddf$ret + +Ladddf$nf: + moveq IMM (ADD),d5 +| This could be faster but it is not worth the effort, since it is not +| executed very often. We sacrifice speed for clarity here. + movel a6@(8),d0 | get the numbers back (remember that we + movel a6@(12),d1 | did some processing already) + movel a6@(16),d2 | + movel a6@(20),d3 | + movel IMM (0x7ff00000),d4 | useful constant (INFINITY) + movel d0,d7 | save sign bits + movel d2,d6 | + bclr IMM (31),d0 | clear sign bits + bclr IMM (31),d2 | +| We know that one of them is either NaN of +/-INFINITY +| Check for NaN (if either one is NaN return NaN) + cmpl d4,d0 | check first a (d0) + bhi Ld$inop | if d0 > 0x7ff00000 or equal and + bne 2f + tstl d1 | d1 > 0, a is NaN + bne Ld$inop | +2: cmpl d4,d2 | check now b (d1) + bhi Ld$inop | + bne 3f + tstl d3 | + bne Ld$inop | +3: +| Now comes the check for +/-INFINITY. We know that both are (maybe not +| finite) numbers, but we have to check if both are infinite whether we +| are adding or subtracting them. + eorl d7,d6 | to check sign bits + bmi 1f + andl IMM (0x80000000),d7 | get (common) sign bit + bra Ld$infty +1: +| We know one (or both) are infinite, so we test for equality between the +| two numbers (if they are equal they have to be infinite both, so we +| return NaN). + cmpl d2,d0 | are both infinite? + bne 1f | if d0 <> d2 they are not equal + cmpl d3,d1 | if d0 == d2 test d3 and d1 + beq Ld$inop | if equal return NaN +1: + andl IMM (0x80000000),d7 | get a's sign bit ' + cmpl d4,d0 | test now for infinity + beq Ld$infty | if a is INFINITY return with this sign + bchg IMM (31),d7 | else we know b is INFINITY and has + bra Ld$infty | the opposite sign + +|============================================================================= +| __muldf3 +|============================================================================= + +| double __muldf3(double, double); + FUNC(__muldf3) +SYM (__muldf3): +#ifndef __mcoldfire__ + link a6,IMM (0) + moveml d2-d7,sp@- +#else + link a6,IMM (-24) + moveml d2-d7,sp@ +#endif + movel a6@(8),d0 | get a into d0-d1 + movel a6@(12),d1 | + movel a6@(16),d2 | and b into d2-d3 + movel a6@(20),d3 | + movel d0,d7 | d7 will hold the sign of the product + eorl d2,d7 | + andl IMM (0x80000000),d7 | + movel d7,a0 | save sign bit into a0 + movel IMM (0x7ff00000),d7 | useful constant (+INFINITY) + movel d7,d6 | another (mask for fraction) + notl d6 | + bclr IMM (31),d0 | get rid of a's sign bit ' + movel d0,d4 | + orl d1,d4 | + beq Lmuldf$a$0 | branch if a is zero + movel d0,d4 | + bclr IMM (31),d2 | get rid of b's sign bit ' + movel d2,d5 | + orl d3,d5 | + beq Lmuldf$b$0 | branch if b is zero + movel d2,d5 | + cmpl d7,d0 | is a big? + bhi Lmuldf$inop | if a is NaN return NaN + beq Lmuldf$a$nf | we still have to check d1 and b ... + cmpl d7,d2 | now compare b with INFINITY + bhi Lmuldf$inop | is b NaN? + beq Lmuldf$b$nf | we still have to check d3 ... +| Here we have both numbers finite and nonzero (and with no sign bit). +| Now we get the exponents into d4 and d5. + andl d7,d4 | isolate exponent in d4 + beq Lmuldf$a$den | if exponent zero, have denormalized + andl d6,d0 | isolate fraction + orl IMM (0x00100000),d0 | and put hidden bit back + swap d4 | I like exponents in the first byte +#ifndef __mcoldfire__ + lsrw IMM (4),d4 | +#else + lsrl IMM (4),d4 | +#endif +Lmuldf$1: + andl d7,d5 | + beq Lmuldf$b$den | + andl d6,d2 | + orl IMM (0x00100000),d2 | and put hidden bit back + swap d5 | +#ifndef __mcoldfire__ + lsrw IMM (4),d5 | +#else + lsrl IMM (4),d5 | +#endif +Lmuldf$2: | +#ifndef __mcoldfire__ + addw d5,d4 | add exponents + subw IMM (D_BIAS+1),d4 | and subtract bias (plus one) +#else + addl d5,d4 | add exponents + subl IMM (D_BIAS+1),d4 | and subtract bias (plus one) +#endif + +| We are now ready to do the multiplication. The situation is as follows: +| both a and b have bit 52 ( bit 20 of d0 and d2) set (even if they were +| denormalized to start with!), which means that in the product bit 104 +| (which will correspond to bit 8 of the fourth long) is set. + +| Here we have to do the product. +| To do it we have to juggle the registers back and forth, as there are not +| enough to keep everything in them. So we use the address registers to keep +| some intermediate data. + +#ifndef __mcoldfire__ + moveml a2-a3,sp@- | save a2 and a3 for temporary use +#else + movel a2,sp@- + movel a3,sp@- + movel a4,sp@- +#endif + movel IMM (0),a2 | a2 is a null register + movel d4,a3 | and a3 will preserve the exponent + +| First, shift d2-d3 so bit 20 becomes bit 31: +#ifndef __mcoldfire__ + rorl IMM (5),d2 | rotate d2 5 places right + swap d2 | and swap it + rorl IMM (5),d3 | do the same thing with d3 + swap d3 | + movew d3,d6 | get the rightmost 11 bits of d3 + andw IMM (0x07ff),d6 | + orw d6,d2 | and put them into d2 + andw IMM (0xf800),d3 | clear those bits in d3 +#else + moveq IMM (11),d7 | left shift d2 11 bits + lsll d7,d2 + movel d3,d6 | get a copy of d3 + lsll d7,d3 | left shift d3 11 bits + andl IMM (0xffe00000),d6 | get the top 11 bits of d3 + moveq IMM (21),d7 | right shift them 21 bits + lsrl d7,d6 + orl d6,d2 | stick them at the end of d2 +#endif + + movel d2,d6 | move b into d6-d7 + movel d3,d7 | move a into d4-d5 + movel d0,d4 | and clear d0-d1-d2-d3 (to put result) + movel d1,d5 | + movel IMM (0),d3 | + movel d3,d2 | + movel d3,d1 | + movel d3,d0 | + +| We use a1 as counter: + movel IMM (DBL_MANT_DIG-1),a1 +#ifndef __mcoldfire__ + exg d7,a1 +#else + movel d7,a4 + movel a1,d7 + movel a4,a1 +#endif + +1: +#ifndef __mcoldfire__ + exg d7,a1 | put counter back in a1 +#else + movel d7,a4 + movel a1,d7 + movel a4,a1 +#endif + addl d3,d3 | shift sum once left + addxl d2,d2 | + addxl d1,d1 | + addxl d0,d0 | + addl d7,d7 | + addxl d6,d6 | + bcc 2f | if bit clear skip the following +#ifndef __mcoldfire__ + exg d7,a2 | +#else + movel d7,a4 + movel a2,d7 + movel a4,a2 +#endif + addl d5,d3 | else add a to the sum + addxl d4,d2 | + addxl d7,d1 | + addxl d7,d0 | +#ifndef __mcoldfire__ + exg d7,a2 | +#else + movel d7,a4 + movel a2,d7 + movel a4,a2 +#endif +2: +#ifndef __mcoldfire__ + exg d7,a1 | put counter in d7 + dbf d7,1b | decrement and branch +#else + movel d7,a4 + movel a1,d7 + movel a4,a1 + subql IMM (1),d7 + bpl 1b +#endif + + movel a3,d4 | restore exponent +#ifndef __mcoldfire__ + moveml sp@+,a2-a3 +#else + movel sp@+,a4 + movel sp@+,a3 + movel sp@+,a2 +#endif + +| Now we have the product in d0-d1-d2-d3, with bit 8 of d0 set. The +| first thing to do now is to normalize it so bit 8 becomes bit +| DBL_MANT_DIG-32 (to do the rounding); later we will shift right. + swap d0 + swap d1 + movew d1,d0 + swap d2 + movew d2,d1 + swap d3 + movew d3,d2 + movew IMM (0),d3 +#ifndef __mcoldfire__ + lsrl IMM (1),d0 + roxrl IMM (1),d1 + roxrl IMM (1),d2 + roxrl IMM (1),d3 + lsrl IMM (1),d0 + roxrl IMM (1),d1 + roxrl IMM (1),d2 + roxrl IMM (1),d3 + lsrl IMM (1),d0 + roxrl IMM (1),d1 + roxrl IMM (1),d2 + roxrl IMM (1),d3 +#else + moveq IMM (29),d6 + lsrl IMM (3),d3 + movel d2,d7 + lsll d6,d7 + orl d7,d3 + lsrl IMM (3),d2 + movel d1,d7 + lsll d6,d7 + orl d7,d2 + lsrl IMM (3),d1 + movel d0,d7 + lsll d6,d7 + orl d7,d1 + lsrl IMM (3),d0 +#endif + +| Now round, check for over- and underflow, and exit. + movel a0,d7 | get sign bit back into d7 + moveq IMM (MULTIPLY),d5 + + btst IMM (DBL_MANT_DIG+1-32),d0 + beq Lround$exit +#ifndef __mcoldfire__ + lsrl IMM (1),d0 + roxrl IMM (1),d1 + addw IMM (1),d4 +#else + lsrl IMM (1),d1 + btst IMM (0),d0 + beq 10f + bset IMM (31),d1 +10: lsrl IMM (1),d0 + addl IMM (1),d4 +#endif + bra Lround$exit + +Lmuldf$inop: + moveq IMM (MULTIPLY),d5 + bra Ld$inop + +Lmuldf$b$nf: + moveq IMM (MULTIPLY),d5 + movel a0,d7 | get sign bit back into d7 + tstl d3 | we know d2 == 0x7ff00000, so check d3 + bne Ld$inop | if d3 <> 0 b is NaN + bra Ld$overflow | else we have overflow (since a is finite) + +Lmuldf$a$nf: + moveq IMM (MULTIPLY),d5 + movel a0,d7 | get sign bit back into d7 + tstl d1 | we know d0 == 0x7ff00000, so check d1 + bne Ld$inop | if d1 <> 0 a is NaN + bra Ld$overflow | else signal overflow + +| If either number is zero return zero, unless the other is +/-INFINITY or +| NaN, in which case we return NaN. +Lmuldf$b$0: + moveq IMM (MULTIPLY),d5 +#ifndef __mcoldfire__ + exg d2,d0 | put b (==0) into d0-d1 + exg d3,d1 | and a (with sign bit cleared) into d2-d3 + movel a0,d0 | set result sign +#else + movel d0,d2 | put a into d2-d3 + movel d1,d3 + movel a0,d0 | put result zero into d0-d1 + movq IMM(0),d1 +#endif + bra 1f +Lmuldf$a$0: + movel a0,d0 | set result sign + movel a6@(16),d2 | put b into d2-d3 again + movel a6@(20),d3 | + bclr IMM (31),d2 | clear sign bit +1: cmpl IMM (0x7ff00000),d2 | check for non-finiteness + bge Ld$inop | in case NaN or +/-INFINITY return NaN + PICLEA SYM (_fpCCR),a0 + movew IMM (0),a0@ +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 +#else + moveml sp@,d2-d7 + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 + rts + +| If a number is denormalized we put an exponent of 1 but do not put the +| hidden bit back into the fraction; instead we shift left until bit 21 +| (the hidden bit) is set, adjusting the exponent accordingly. We do this +| to ensure that the product of the fractions is close to 1. +Lmuldf$a$den: + movel IMM (1),d4 + andl d6,d0 +1: addl d1,d1 | shift a left until bit 20 is set + addxl d0,d0 | +#ifndef __mcoldfire__ + subw IMM (1),d4 | and adjust exponent +#else + subl IMM (1),d4 | and adjust exponent +#endif + btst IMM (20),d0 | + bne Lmuldf$1 | + bra 1b + +Lmuldf$b$den: + movel IMM (1),d5 + andl d6,d2 +1: addl d3,d3 | shift b left until bit 20 is set + addxl d2,d2 | +#ifndef __mcoldfire__ + subw IMM (1),d5 | and adjust exponent +#else + subql IMM (1),d5 | and adjust exponent +#endif + btst IMM (20),d2 | + bne Lmuldf$2 | + bra 1b + + +|============================================================================= +| __divdf3 +|============================================================================= + +| double __divdf3(double, double); + FUNC(__divdf3) +SYM (__divdf3): +#ifndef __mcoldfire__ + link a6,IMM (0) + moveml d2-d7,sp@- +#else + link a6,IMM (-24) + moveml d2-d7,sp@ +#endif + movel a6@(8),d0 | get a into d0-d1 + movel a6@(12),d1 | + movel a6@(16),d2 | and b into d2-d3 + movel a6@(20),d3 | + movel d0,d7 | d7 will hold the sign of the result + eorl d2,d7 | + andl IMM (0x80000000),d7 + movel d7,a0 | save sign into a0 + movel IMM (0x7ff00000),d7 | useful constant (+INFINITY) + movel d7,d6 | another (mask for fraction) + notl d6 | + bclr IMM (31),d0 | get rid of a's sign bit ' + movel d0,d4 | + orl d1,d4 | + beq Ldivdf$a$0 | branch if a is zero + movel d0,d4 | + bclr IMM (31),d2 | get rid of b's sign bit ' + movel d2,d5 | + orl d3,d5 | + beq Ldivdf$b$0 | branch if b is zero + movel d2,d5 + cmpl d7,d0 | is a big? + bhi Ldivdf$inop | if a is NaN return NaN + beq Ldivdf$a$nf | if d0 == 0x7ff00000 we check d1 + cmpl d7,d2 | now compare b with INFINITY + bhi Ldivdf$inop | if b is NaN return NaN + beq Ldivdf$b$nf | if d2 == 0x7ff00000 we check d3 +| Here we have both numbers finite and nonzero (and with no sign bit). +| Now we get the exponents into d4 and d5 and normalize the numbers to +| ensure that the ratio of the fractions is around 1. We do this by +| making sure that both numbers have bit #DBL_MANT_DIG-32-1 (hidden bit) +| set, even if they were denormalized to start with. +| Thus, the result will satisfy: 2 > result > 1/2. + andl d7,d4 | and isolate exponent in d4 + beq Ldivdf$a$den | if exponent is zero we have a denormalized + andl d6,d0 | and isolate fraction + orl IMM (0x00100000),d0 | and put hidden bit back + swap d4 | I like exponents in the first byte +#ifndef __mcoldfire__ + lsrw IMM (4),d4 | +#else + lsrl IMM (4),d4 | +#endif +Ldivdf$1: | + andl d7,d5 | + beq Ldivdf$b$den | + andl d6,d2 | + orl IMM (0x00100000),d2 + swap d5 | +#ifndef __mcoldfire__ + lsrw IMM (4),d5 | +#else + lsrl IMM (4),d5 | +#endif +Ldivdf$2: | +#ifndef __mcoldfire__ + subw d5,d4 | subtract exponents + addw IMM (D_BIAS),d4 | and add bias +#else + subl d5,d4 | subtract exponents + addl IMM (D_BIAS),d4 | and add bias +#endif + +| We are now ready to do the division. We have prepared things in such a way +| that the ratio of the fractions will be less than 2 but greater than 1/2. +| At this point the registers in use are: +| d0-d1 hold a (first operand, bit DBL_MANT_DIG-32=0, bit +| DBL_MANT_DIG-1-32=1) +| d2-d3 hold b (second operand, bit DBL_MANT_DIG-32=1) +| d4 holds the difference of the exponents, corrected by the bias +| a0 holds the sign of the ratio + +| To do the rounding correctly we need to keep information about the +| nonsignificant bits. One way to do this would be to do the division +| using four registers; another is to use two registers (as originally +| I did), but use a sticky bit to preserve information about the +| fractional part. Note that we can keep that info in a1, which is not +| used. + movel IMM (0),d6 | d6-d7 will hold the result + movel d6,d7 | + movel IMM (0),a1 | and a1 will hold the sticky bit + + movel IMM (DBL_MANT_DIG-32+1),d5 + +1: cmpl d0,d2 | is a < b? + bhi 3f | if b > a skip the following + beq 4f | if d0==d2 check d1 and d3 +2: subl d3,d1 | + subxl d2,d0 | a <-- a - b + bset d5,d6 | set the corresponding bit in d6 +3: addl d1,d1 | shift a by 1 + addxl d0,d0 | +#ifndef __mcoldfire__ + dbra d5,1b | and branch back +#else + subql IMM (1), d5 + bpl 1b +#endif + bra 5f +4: cmpl d1,d3 | here d0==d2, so check d1 and d3 + bhi 3b | if d1 > d2 skip the subtraction + bra 2b | else go do it +5: +| Here we have to start setting the bits in the second long. + movel IMM (31),d5 | again d5 is counter + +1: cmpl d0,d2 | is a < b? + bhi 3f | if b > a skip the following + beq 4f | if d0==d2 check d1 and d3 +2: subl d3,d1 | + subxl d2,d0 | a <-- a - b + bset d5,d7 | set the corresponding bit in d7 +3: addl d1,d1 | shift a by 1 + addxl d0,d0 | +#ifndef __mcoldfire__ + dbra d5,1b | and branch back +#else + subql IMM (1), d5 + bpl 1b +#endif + bra 5f +4: cmpl d1,d3 | here d0==d2, so check d1 and d3 + bhi 3b | if d1 > d2 skip the subtraction + bra 2b | else go do it +5: +| Now go ahead checking until we hit a one, which we store in d2. + movel IMM (DBL_MANT_DIG),d5 +1: cmpl d2,d0 | is a < b? + bhi 4f | if b < a, exit + beq 3f | if d0==d2 check d1 and d3 +2: addl d1,d1 | shift a by 1 + addxl d0,d0 | +#ifndef __mcoldfire__ + dbra d5,1b | and branch back +#else + subql IMM (1), d5 + bpl 1b +#endif + movel IMM (0),d2 | here no sticky bit was found + movel d2,d3 + bra 5f +3: cmpl d1,d3 | here d0==d2, so check d1 and d3 + bhi 2b | if d1 > d2 go back +4: +| Here put the sticky bit in d2-d3 (in the position which actually corresponds +| to it; if you don't do this the algorithm loses in some cases). ' + movel IMM (0),d2 + movel d2,d3 +#ifndef __mcoldfire__ + subw IMM (DBL_MANT_DIG),d5 + addw IMM (63),d5 + cmpw IMM (31),d5 +#else + subl IMM (DBL_MANT_DIG),d5 + addl IMM (63),d5 + cmpl IMM (31),d5 +#endif + bhi 2f +1: bset d5,d3 + bra 5f +#ifndef __mcoldfire__ + subw IMM (32),d5 +#else + subl IMM (32),d5 +#endif +2: bset d5,d2 +5: +| Finally we are finished! Move the longs in the address registers to +| their final destination: + movel d6,d0 + movel d7,d1 + movel IMM (0),d3 + +| Here we have finished the division, with the result in d0-d1-d2-d3, with +| 2^21 <= d6 < 2^23. Thus bit 23 is not set, but bit 22 could be set. +| If it is not, then definitely bit 21 is set. Normalize so bit 22 is +| not set: + btst IMM (DBL_MANT_DIG-32+1),d0 + beq 1f +#ifndef __mcoldfire__ + lsrl IMM (1),d0 + roxrl IMM (1),d1 + roxrl IMM (1),d2 + roxrl IMM (1),d3 + addw IMM (1),d4 +#else + lsrl IMM (1),d3 + btst IMM (0),d2 + beq 10f + bset IMM (31),d3 +10: lsrl IMM (1),d2 + btst IMM (0),d1 + beq 11f + bset IMM (31),d2 +11: lsrl IMM (1),d1 + btst IMM (0),d0 + beq 12f + bset IMM (31),d1 +12: lsrl IMM (1),d0 + addl IMM (1),d4 +#endif +1: +| Now round, check for over- and underflow, and exit. + movel a0,d7 | restore sign bit to d7 + moveq IMM (DIVIDE),d5 + bra Lround$exit + +Ldivdf$inop: + moveq IMM (DIVIDE),d5 + bra Ld$inop + +Ldivdf$a$0: +| If a is zero check to see whether b is zero also. In that case return +| NaN; then check if b is NaN, and return NaN also in that case. Else +| return a properly signed zero. + moveq IMM (DIVIDE),d5 + bclr IMM (31),d2 | + movel d2,d4 | + orl d3,d4 | + beq Ld$inop | if b is also zero return NaN + cmpl IMM (0x7ff00000),d2 | check for NaN + bhi Ld$inop | + blt 1f | + tstl d3 | + bne Ld$inop | +1: movel a0,d0 | else return signed zero + moveq IMM(0),d1 | + PICLEA SYM (_fpCCR),a0 | clear exception flags + movew IMM (0),a0@ | +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 | +#else + moveml sp@,d2-d7 | + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 | + rts | + +Ldivdf$b$0: + moveq IMM (DIVIDE),d5 +| If we got here a is not zero. Check if a is NaN; in that case return NaN, +| else return +/-INFINITY. Remember that a is in d0 with the sign bit +| cleared already. + movel a0,d7 | put a's sign bit back in d7 ' + cmpl IMM (0x7ff00000),d0 | compare d0 with INFINITY + bhi Ld$inop | if larger it is NaN + tstl d1 | + bne Ld$inop | + bra Ld$div$0 | else signal DIVIDE_BY_ZERO + +Ldivdf$b$nf: + moveq IMM (DIVIDE),d5 +| If d2 == 0x7ff00000 we have to check d3. + tstl d3 | + bne Ld$inop | if d3 <> 0, b is NaN + bra Ld$underflow | else b is +/-INFINITY, so signal underflow + +Ldivdf$a$nf: + moveq IMM (DIVIDE),d5 +| If d0 == 0x7ff00000 we have to check d1. + tstl d1 | + bne Ld$inop | if d1 <> 0, a is NaN +| If a is INFINITY we have to check b + cmpl d7,d2 | compare b with INFINITY + bge Ld$inop | if b is NaN or INFINITY return NaN + tstl d3 | + bne Ld$inop | + bra Ld$overflow | else return overflow + +| If a number is denormalized we put an exponent of 1 but do not put the +| bit back into the fraction. +Ldivdf$a$den: + movel IMM (1),d4 + andl d6,d0 +1: addl d1,d1 | shift a left until bit 20 is set + addxl d0,d0 +#ifndef __mcoldfire__ + subw IMM (1),d4 | and adjust exponent +#else + subl IMM (1),d4 | and adjust exponent +#endif + btst IMM (DBL_MANT_DIG-32-1),d0 + bne Ldivdf$1 + bra 1b + +Ldivdf$b$den: + movel IMM (1),d5 + andl d6,d2 +1: addl d3,d3 | shift b left until bit 20 is set + addxl d2,d2 +#ifndef __mcoldfire__ + subw IMM (1),d5 | and adjust exponent +#else + subql IMM (1),d5 | and adjust exponent +#endif + btst IMM (DBL_MANT_DIG-32-1),d2 + bne Ldivdf$2 + bra 1b + +Lround$exit: +| This is a common exit point for __muldf3 and __divdf3. When they enter +| this point the sign of the result is in d7, the result in d0-d1, normalized +| so that 2^21 <= d0 < 2^22, and the exponent is in the lower byte of d4. + +| First check for underlow in the exponent: +#ifndef __mcoldfire__ + cmpw IMM (-DBL_MANT_DIG-1),d4 +#else + cmpl IMM (-DBL_MANT_DIG-1),d4 +#endif + blt Ld$underflow +| It could happen that the exponent is less than 1, in which case the +| number is denormalized. In this case we shift right and adjust the +| exponent until it becomes 1 or the fraction is zero (in the latter case +| we signal underflow and return zero). + movel d7,a0 | + movel IMM (0),d6 | use d6-d7 to collect bits flushed right + movel d6,d7 | use d6-d7 to collect bits flushed right +#ifndef __mcoldfire__ + cmpw IMM (1),d4 | if the exponent is less than 1 we +#else + cmpl IMM (1),d4 | if the exponent is less than 1 we +#endif + bge 2f | have to shift right (denormalize) +1: +#ifndef __mcoldfire__ + addw IMM (1),d4 | adjust the exponent + lsrl IMM (1),d0 | shift right once + roxrl IMM (1),d1 | + roxrl IMM (1),d2 | + roxrl IMM (1),d3 | + roxrl IMM (1),d6 | + roxrl IMM (1),d7 | + cmpw IMM (1),d4 | is the exponent 1 already? +#else + addl IMM (1),d4 | adjust the exponent + lsrl IMM (1),d7 + btst IMM (0),d6 + beq 13f + bset IMM (31),d7 +13: lsrl IMM (1),d6 + btst IMM (0),d3 + beq 14f + bset IMM (31),d6 +14: lsrl IMM (1),d3 + btst IMM (0),d2 + beq 10f + bset IMM (31),d3 +10: lsrl IMM (1),d2 + btst IMM (0),d1 + beq 11f + bset IMM (31),d2 +11: lsrl IMM (1),d1 + btst IMM (0),d0 + beq 12f + bset IMM (31),d1 +12: lsrl IMM (1),d0 + cmpl IMM (1),d4 | is the exponent 1 already? +#endif + beq 2f | if not loop back + bra 1b | + bra Ld$underflow | safety check, shouldn't execute ' +2: orl d6,d2 | this is a trick so we don't lose ' + orl d7,d3 | the bits which were flushed right + movel a0,d7 | get back sign bit into d7 +| Now call the rounding routine (which takes care of denormalized numbers): + lea pc@(Lround$0),a0 | to return from rounding routine + PICLEA SYM (_fpCCR),a1 | check the rounding mode +#ifdef __mcoldfire__ + clrl d6 +#endif + movew a1@(6),d6 | rounding mode in d6 + beq Lround$to$nearest +#ifndef __mcoldfire__ + cmpw IMM (ROUND_TO_PLUS),d6 +#else + cmpl IMM (ROUND_TO_PLUS),d6 +#endif + bhi Lround$to$minus + blt Lround$to$zero + bra Lround$to$plus +Lround$0: +| Here we have a correctly rounded result (either normalized or denormalized). + +| Here we should have either a normalized number or a denormalized one, and +| the exponent is necessarily larger or equal to 1 (so we don't have to ' +| check again for underflow!). We have to check for overflow or for a +| denormalized number (which also signals underflow). +| Check for overflow (i.e., exponent >= 0x7ff). +#ifndef __mcoldfire__ + cmpw IMM (0x07ff),d4 +#else + cmpl IMM (0x07ff),d4 +#endif + bge Ld$overflow +| Now check for a denormalized number (exponent==0): + movew d4,d4 + beq Ld$den +1: +| Put back the exponents and sign and return. +#ifndef __mcoldfire__ + lslw IMM (4),d4 | exponent back to fourth byte +#else + lsll IMM (4),d4 | exponent back to fourth byte +#endif + bclr IMM (DBL_MANT_DIG-32-1),d0 + swap d0 | and put back exponent +#ifndef __mcoldfire__ + orw d4,d0 | +#else + orl d4,d0 | +#endif + swap d0 | + orl d7,d0 | and sign also + + PICLEA SYM (_fpCCR),a0 + movew IMM (0),a0@ +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 +#else + moveml sp@,d2-d7 + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 + rts + +|============================================================================= +| __negdf2 +|============================================================================= + +| double __negdf2(double, double); + FUNC(__negdf2) +SYM (__negdf2): +#ifndef __mcoldfire__ + link a6,IMM (0) + moveml d2-d7,sp@- +#else + link a6,IMM (-24) + moveml d2-d7,sp@ +#endif + moveq IMM (NEGATE),d5 + movel a6@(8),d0 | get number to negate in d0-d1 + movel a6@(12),d1 | + bchg IMM (31),d0 | negate + movel d0,d2 | make a positive copy (for the tests) + bclr IMM (31),d2 | + movel d2,d4 | check for zero + orl d1,d4 | + beq 2f | if zero (either sign) return +zero + cmpl IMM (0x7ff00000),d2 | compare to +INFINITY + blt 1f | if finite, return + bhi Ld$inop | if larger (fraction not zero) is NaN + tstl d1 | if d2 == 0x7ff00000 check d1 + bne Ld$inop | + movel d0,d7 | else get sign and return INFINITY + andl IMM (0x80000000),d7 + bra Ld$infty +1: PICLEA SYM (_fpCCR),a0 + movew IMM (0),a0@ +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 +#else + moveml sp@,d2-d7 + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 + rts +2: bclr IMM (31),d0 + bra 1b + +|============================================================================= +| __cmpdf2 +|============================================================================= + +GREATER = 1 +LESS = -1 +EQUAL = 0 + +| int __cmpdf2_internal(double, double, int); +SYM (__cmpdf2_internal): +#ifndef __mcoldfire__ + link a6,IMM (0) + moveml d2-d7,sp@- | save registers +#else + link a6,IMM (-24) + moveml d2-d7,sp@ +#endif + moveq IMM (COMPARE),d5 + movel a6@(8),d0 | get first operand + movel a6@(12),d1 | + movel a6@(16),d2 | get second operand + movel a6@(20),d3 | +| First check if a and/or b are (+/-) zero and in that case clear +| the sign bit. + movel d0,d6 | copy signs into d6 (a) and d7(b) + bclr IMM (31),d0 | and clear signs in d0 and d2 + movel d2,d7 | + bclr IMM (31),d2 | + cmpl IMM (0x7ff00000),d0 | check for a == NaN + bhi Lcmpd$inop | if d0 > 0x7ff00000, a is NaN + beq Lcmpdf$a$nf | if equal can be INFINITY, so check d1 + movel d0,d4 | copy into d4 to test for zero + orl d1,d4 | + beq Lcmpdf$a$0 | +Lcmpdf$0: + cmpl IMM (0x7ff00000),d2 | check for b == NaN + bhi Lcmpd$inop | if d2 > 0x7ff00000, b is NaN + beq Lcmpdf$b$nf | if equal can be INFINITY, so check d3 + movel d2,d4 | + orl d3,d4 | + beq Lcmpdf$b$0 | +Lcmpdf$1: +| Check the signs + eorl d6,d7 + bpl 1f +| If the signs are not equal check if a >= 0 + tstl d6 + bpl Lcmpdf$a$gt$b | if (a >= 0 && b < 0) => a > b + bmi Lcmpdf$b$gt$a | if (a < 0 && b >= 0) => a < b +1: +| If the signs are equal check for < 0 + tstl d6 + bpl 1f +| If both are negative exchange them +#ifndef __mcoldfire__ + exg d0,d2 + exg d1,d3 +#else + movel d0,d7 + movel d2,d0 + movel d7,d2 + movel d1,d7 + movel d3,d1 + movel d7,d3 +#endif +1: +| Now that they are positive we just compare them as longs (does this also +| work for denormalized numbers?). + cmpl d0,d2 + bhi Lcmpdf$b$gt$a | |b| > |a| + bne Lcmpdf$a$gt$b | |b| < |a| +| If we got here d0 == d2, so we compare d1 and d3. + cmpl d1,d3 + bhi Lcmpdf$b$gt$a | |b| > |a| + bne Lcmpdf$a$gt$b | |b| < |a| +| If we got here a == b. + movel IMM (EQUAL),d0 +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 | put back the registers +#else + moveml sp@,d2-d7 + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 + rts +Lcmpdf$a$gt$b: + movel IMM (GREATER),d0 +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 | put back the registers +#else + moveml sp@,d2-d7 + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 + rts +Lcmpdf$b$gt$a: + movel IMM (LESS),d0 +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 | put back the registers +#else + moveml sp@,d2-d7 + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 + rts + +Lcmpdf$a$0: + bclr IMM (31),d6 + bra Lcmpdf$0 +Lcmpdf$b$0: + bclr IMM (31),d7 + bra Lcmpdf$1 + +Lcmpdf$a$nf: + tstl d1 + bne Ld$inop + bra Lcmpdf$0 + +Lcmpdf$b$nf: + tstl d3 + bne Ld$inop + bra Lcmpdf$1 + +Lcmpd$inop: + movl a6@(24),d0 + moveq IMM (INEXACT_RESULT+INVALID_OPERATION),d7 + moveq IMM (DOUBLE_FLOAT),d6 + PICJUMP $_exception_handler + +| int __cmpdf2(double, double); + FUNC(__cmpdf2) +SYM (__cmpdf2): + link a6,IMM (0) + pea 1 + movl a6@(20),sp@- + movl a6@(16),sp@- + movl a6@(12),sp@- + movl a6@(8),sp@- + PICCALL SYM (__cmpdf2_internal) + unlk a6 + rts + +|============================================================================= +| rounding routines +|============================================================================= + +| The rounding routines expect the number to be normalized in registers +| d0-d1-d2-d3, with the exponent in register d4. They assume that the +| exponent is larger or equal to 1. They return a properly normalized number +| if possible, and a denormalized number otherwise. The exponent is returned +| in d4. + +Lround$to$nearest: +| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"): +| Here we assume that the exponent is not too small (this should be checked +| before entering the rounding routine), but the number could be denormalized. + +| Check for denormalized numbers: +1: btst IMM (DBL_MANT_DIG-32),d0 + bne 2f | if set the number is normalized +| Normalize shifting left until bit #DBL_MANT_DIG-32 is set or the exponent +| is one (remember that a denormalized number corresponds to an +| exponent of -D_BIAS+1). +#ifndef __mcoldfire__ + cmpw IMM (1),d4 | remember that the exponent is at least one +#else + cmpl IMM (1),d4 | remember that the exponent is at least one +#endif + beq 2f | an exponent of one means denormalized + addl d3,d3 | else shift and adjust the exponent + addxl d2,d2 | + addxl d1,d1 | + addxl d0,d0 | +#ifndef __mcoldfire__ + dbra d4,1b | +#else + subql IMM (1), d4 + bpl 1b +#endif +2: +| Now round: we do it as follows: after the shifting we can write the +| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2. +| If delta < 1, do nothing. If delta > 1, add 1 to f. +| If delta == 1, we make sure the rounded number will be even (odd?) +| (after shifting). + btst IMM (0),d1 | is delta < 1? + beq 2f | if so, do not do anything + orl d2,d3 | is delta == 1? + bne 1f | if so round to even + movel d1,d3 | + andl IMM (2),d3 | bit 1 is the last significant bit + movel IMM (0),d2 | + addl d3,d1 | + addxl d2,d0 | + bra 2f | +1: movel IMM (1),d3 | else add 1 + movel IMM (0),d2 | + addl d3,d1 | + addxl d2,d0 +| Shift right once (because we used bit #DBL_MANT_DIG-32!). +2: +#ifndef __mcoldfire__ + lsrl IMM (1),d0 + roxrl IMM (1),d1 +#else + lsrl IMM (1),d1 + btst IMM (0),d0 + beq 10f + bset IMM (31),d1 +10: lsrl IMM (1),d0 +#endif + +| Now check again bit #DBL_MANT_DIG-32 (rounding could have produced a +| 'fraction overflow' ...). + btst IMM (DBL_MANT_DIG-32),d0 + beq 1f +#ifndef __mcoldfire__ + lsrl IMM (1),d0 + roxrl IMM (1),d1 + addw IMM (1),d4 +#else + lsrl IMM (1),d1 + btst IMM (0),d0 + beq 10f + bset IMM (31),d1 +10: lsrl IMM (1),d0 + addl IMM (1),d4 +#endif +1: +| If bit #DBL_MANT_DIG-32-1 is clear we have a denormalized number, so we +| have to put the exponent to zero and return a denormalized number. + btst IMM (DBL_MANT_DIG-32-1),d0 + beq 1f + jmp a0@ +1: movel IMM (0),d4 + jmp a0@ + +Lround$to$zero: +Lround$to$plus: +Lround$to$minus: + jmp a0@ +#endif /* L_double */ + +#ifdef L_float + + .globl SYM (_fpCCR) + .globl $_exception_handler + +QUIET_NaN = 0xffffffff +SIGNL_NaN = 0x7f800001 +INFINITY = 0x7f800000 + +F_MAX_EXP = 0xff +F_BIAS = 126 +FLT_MAX_EXP = F_MAX_EXP - F_BIAS +FLT_MIN_EXP = 1 - F_BIAS +FLT_MANT_DIG = 24 + +INEXACT_RESULT = 0x0001 +UNDERFLOW = 0x0002 +OVERFLOW = 0x0004 +DIVIDE_BY_ZERO = 0x0008 +INVALID_OPERATION = 0x0010 + +SINGLE_FLOAT = 1 + +NOOP = 0 +ADD = 1 +MULTIPLY = 2 +DIVIDE = 3 +NEGATE = 4 +COMPARE = 5 +EXTENDSFDF = 6 +TRUNCDFSF = 7 + +UNKNOWN = -1 +ROUND_TO_NEAREST = 0 | round result to nearest representable value +ROUND_TO_ZERO = 1 | round result towards zero +ROUND_TO_PLUS = 2 | round result towards plus infinity +ROUND_TO_MINUS = 3 | round result towards minus infinity + +| Entry points: + + .globl SYM (__addsf3) + .globl SYM (__subsf3) + .globl SYM (__mulsf3) + .globl SYM (__divsf3) + .globl SYM (__negsf2) + .globl SYM (__cmpsf2) + .globl SYM (__cmpsf2_internal) + .hidden SYM (__cmpsf2_internal) + +| These are common routines to return and signal exceptions. + + .text + .even + +Lf$den: +| Return and signal a denormalized number + orl d7,d0 + moveq IMM (INEXACT_RESULT+UNDERFLOW),d7 + moveq IMM (SINGLE_FLOAT),d6 + PICJUMP $_exception_handler + +Lf$infty: +Lf$overflow: +| Return a properly signed INFINITY and set the exception flags + movel IMM (INFINITY),d0 + orl d7,d0 + moveq IMM (INEXACT_RESULT+OVERFLOW),d7 + moveq IMM (SINGLE_FLOAT),d6 + PICJUMP $_exception_handler + +Lf$underflow: +| Return 0 and set the exception flags + moveq IMM (0),d0 + moveq IMM (INEXACT_RESULT+UNDERFLOW),d7 + moveq IMM (SINGLE_FLOAT),d6 + PICJUMP $_exception_handler + +Lf$inop: +| Return a quiet NaN and set the exception flags + movel IMM (QUIET_NaN),d0 + moveq IMM (INEXACT_RESULT+INVALID_OPERATION),d7 + moveq IMM (SINGLE_FLOAT),d6 + PICJUMP $_exception_handler + +Lf$div$0: +| Return a properly signed INFINITY and set the exception flags + movel IMM (INFINITY),d0 + orl d7,d0 + moveq IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7 + moveq IMM (SINGLE_FLOAT),d6 + PICJUMP $_exception_handler + +|============================================================================= +|============================================================================= +| single precision routines +|============================================================================= +|============================================================================= + +| A single precision floating point number (float) has the format: +| +| struct _float { +| unsigned int sign : 1; /* sign bit */ +| unsigned int exponent : 8; /* exponent, shifted by 126 */ +| unsigned int fraction : 23; /* fraction */ +| } float; +| +| Thus sizeof(float) = 4 (32 bits). +| +| All the routines are callable from C programs, and return the result +| in the single register d0. They also preserve all registers except +| d0-d1 and a0-a1. + +|============================================================================= +| __subsf3 +|============================================================================= + +| float __subsf3(float, float); + FUNC(__subsf3) +SYM (__subsf3): + bchg IMM (31),sp@(8) | change sign of second operand + | and fall through +|============================================================================= +| __addsf3 +|============================================================================= + +| float __addsf3(float, float); + FUNC(__addsf3) +SYM (__addsf3): +#ifndef __mcoldfire__ + link a6,IMM (0) | everything will be done in registers + moveml d2-d7,sp@- | save all data registers but d0-d1 +#else + link a6,IMM (-24) + moveml d2-d7,sp@ +#endif + movel a6@(8),d0 | get first operand + movel a6@(12),d1 | get second operand + movel d0,a0 | get d0's sign bit ' + addl d0,d0 | check and clear sign bit of a + beq Laddsf$b | if zero return second operand + movel d1,a1 | save b's sign bit ' + addl d1,d1 | get rid of sign bit + beq Laddsf$a | if zero return first operand + +| Get the exponents and check for denormalized and/or infinity. + + movel IMM (0x00ffffff),d4 | mask to get fraction + movel IMM (0x01000000),d5 | mask to put hidden bit back + + movel d0,d6 | save a to get exponent + andl d4,d0 | get fraction in d0 + notl d4 | make d4 into a mask for the exponent + andl d4,d6 | get exponent in d6 + beq Laddsf$a$den | branch if a is denormalized + cmpl d4,d6 | check for INFINITY or NaN + beq Laddsf$nf + swap d6 | put exponent into first word + orl d5,d0 | and put hidden bit back +Laddsf$1: +| Now we have a's exponent in d6 (second byte) and the mantissa in d0. ' + movel d1,d7 | get exponent in d7 + andl d4,d7 | + beq Laddsf$b$den | branch if b is denormalized + cmpl d4,d7 | check for INFINITY or NaN + beq Laddsf$nf + swap d7 | put exponent into first word + notl d4 | make d4 into a mask for the fraction + andl d4,d1 | get fraction in d1 + orl d5,d1 | and put hidden bit back +Laddsf$2: +| Now we have b's exponent in d7 (second byte) and the mantissa in d1. ' + +| Note that the hidden bit corresponds to bit #FLT_MANT_DIG-1, and we +| shifted right once, so bit #FLT_MANT_DIG is set (so we have one extra +| bit). + + movel d1,d2 | move b to d2, since we want to use + | two registers to do the sum + movel IMM (0),d1 | and clear the new ones + movel d1,d3 | + +| Here we shift the numbers in registers d0 and d1 so the exponents are the +| same, and put the largest exponent in d6. Note that we are using two +| registers for each number (see the discussion by D. Knuth in "Seminumerical +| Algorithms"). +#ifndef __mcoldfire__ + cmpw d6,d7 | compare exponents +#else + cmpl d6,d7 | compare exponents +#endif + beq Laddsf$3 | if equal don't shift ' + bhi 5f | branch if second exponent largest +1: + subl d6,d7 | keep the largest exponent + negl d7 +#ifndef __mcoldfire__ + lsrw IMM (8),d7 | put difference in lower byte +#else + lsrl IMM (8),d7 | put difference in lower byte +#endif +| if difference is too large we don't shift (actually, we can just exit) ' +#ifndef __mcoldfire__ + cmpw IMM (FLT_MANT_DIG+2),d7 +#else + cmpl IMM (FLT_MANT_DIG+2),d7 +#endif + bge Laddsf$b$small +#ifndef __mcoldfire__ + cmpw IMM (16),d7 | if difference >= 16 swap +#else + cmpl IMM (16),d7 | if difference >= 16 swap +#endif + bge 4f +2: +#ifndef __mcoldfire__ + subw IMM (1),d7 +#else + subql IMM (1), d7 +#endif +3: +#ifndef __mcoldfire__ + lsrl IMM (1),d2 | shift right second operand + roxrl IMM (1),d3 + dbra d7,3b +#else + lsrl IMM (1),d3 + btst IMM (0),d2 + beq 10f + bset IMM (31),d3 +10: lsrl IMM (1),d2 + subql IMM (1), d7 + bpl 3b +#endif + bra Laddsf$3 +4: + movew d2,d3 + swap d3 + movew d3,d2 + swap d2 +#ifndef __mcoldfire__ + subw IMM (16),d7 +#else + subl IMM (16),d7 +#endif + bne 2b | if still more bits, go back to normal case + bra Laddsf$3 +5: +#ifndef __mcoldfire__ + exg d6,d7 | exchange the exponents +#else + eorl d6,d7 + eorl d7,d6 + eorl d6,d7 +#endif + subl d6,d7 | keep the largest exponent + negl d7 | +#ifndef __mcoldfire__ + lsrw IMM (8),d7 | put difference in lower byte +#else + lsrl IMM (8),d7 | put difference in lower byte +#endif +| if difference is too large we don't shift (and exit!) ' +#ifndef __mcoldfire__ + cmpw IMM (FLT_MANT_DIG+2),d7 +#else + cmpl IMM (FLT_MANT_DIG+2),d7 +#endif + bge Laddsf$a$small +#ifndef __mcoldfire__ + cmpw IMM (16),d7 | if difference >= 16 swap +#else + cmpl IMM (16),d7 | if difference >= 16 swap +#endif + bge 8f +6: +#ifndef __mcoldfire__ + subw IMM (1),d7 +#else + subl IMM (1),d7 +#endif +7: +#ifndef __mcoldfire__ + lsrl IMM (1),d0 | shift right first operand + roxrl IMM (1),d1 + dbra d7,7b +#else + lsrl IMM (1),d1 + btst IMM (0),d0 + beq 10f + bset IMM (31),d1 +10: lsrl IMM (1),d0 + subql IMM (1),d7 + bpl 7b +#endif + bra Laddsf$3 +8: + movew d0,d1 + swap d1 + movew d1,d0 + swap d0 +#ifndef __mcoldfire__ + subw IMM (16),d7 +#else + subl IMM (16),d7 +#endif + bne 6b | if still more bits, go back to normal case + | otherwise we fall through + +| Now we have a in d0-d1, b in d2-d3, and the largest exponent in d6 (the +| signs are stored in a0 and a1). + +Laddsf$3: +| Here we have to decide whether to add or subtract the numbers +#ifndef __mcoldfire__ + exg d6,a0 | get signs back + exg d7,a1 | and save the exponents +#else + movel d6,d4 + movel a0,d6 + movel d4,a0 + movel d7,d4 + movel a1,d7 + movel d4,a1 +#endif + eorl d6,d7 | combine sign bits + bmi Lsubsf$0 | if negative a and b have opposite + | sign so we actually subtract the + | numbers + +| Here we have both positive or both negative +#ifndef __mcoldfire__ + exg d6,a0 | now we have the exponent in d6 +#else + movel d6,d4 + movel a0,d6 + movel d4,a0 +#endif + movel a0,d7 | and sign in d7 + andl IMM (0x80000000),d7 +| Here we do the addition. + addl d3,d1 + addxl d2,d0 +| Note: now we have d2, d3, d4 and d5 to play with! + +| Put the exponent, in the first byte, in d2, to use the "standard" rounding +| routines: + movel d6,d2 +#ifndef __mcoldfire__ + lsrw IMM (8),d2 +#else + lsrl IMM (8),d2 +#endif + +| Before rounding normalize so bit #FLT_MANT_DIG is set (we will consider +| the case of denormalized numbers in the rounding routine itself). +| As in the addition (not in the subtraction!) we could have set +| one more bit we check this: + btst IMM (FLT_MANT_DIG+1),d0 + beq 1f +#ifndef __mcoldfire__ + lsrl IMM (1),d0 + roxrl IMM (1),d1 +#else + lsrl IMM (1),d1 + btst IMM (0),d0 + beq 10f + bset IMM (31),d1 +10: lsrl IMM (1),d0 +#endif + addl IMM (1),d2 +1: + lea pc@(Laddsf$4),a0 | to return from rounding routine + PICLEA SYM (_fpCCR),a1 | check the rounding mode +#ifdef __mcoldfire__ + clrl d6 +#endif + movew a1@(6),d6 | rounding mode in d6 + beq Lround$to$nearest +#ifndef __mcoldfire__ + cmpw IMM (ROUND_TO_PLUS),d6 +#else + cmpl IMM (ROUND_TO_PLUS),d6 +#endif + bhi Lround$to$minus + blt Lround$to$zero + bra Lround$to$plus +Laddsf$4: +| Put back the exponent, but check for overflow. +#ifndef __mcoldfire__ + cmpw IMM (0xff),d2 +#else + cmpl IMM (0xff),d2 +#endif + bhi 1f + bclr IMM (FLT_MANT_DIG-1),d0 +#ifndef __mcoldfire__ + lslw IMM (7),d2 +#else + lsll IMM (7),d2 +#endif + swap d2 + orl d2,d0 + bra Laddsf$ret +1: + moveq IMM (ADD),d5 + bra Lf$overflow + +Lsubsf$0: +| We are here if a > 0 and b < 0 (sign bits cleared). +| Here we do the subtraction. + movel d6,d7 | put sign in d7 + andl IMM (0x80000000),d7 + + subl d3,d1 | result in d0-d1 + subxl d2,d0 | + beq Laddsf$ret | if zero just exit + bpl 1f | if positive skip the following + bchg IMM (31),d7 | change sign bit in d7 + negl d1 + negxl d0 +1: +#ifndef __mcoldfire__ + exg d2,a0 | now we have the exponent in d2 + lsrw IMM (8),d2 | put it in the first byte +#else + movel d2,d4 + movel a0,d2 + movel d4,a0 + lsrl IMM (8),d2 | put it in the first byte +#endif + +| Now d0-d1 is positive and the sign bit is in d7. + +| Note that we do not have to normalize, since in the subtraction bit +| #FLT_MANT_DIG+1 is never set, and denormalized numbers are handled by +| the rounding routines themselves. + lea pc@(Lsubsf$1),a0 | to return from rounding routine + PICLEA SYM (_fpCCR),a1 | check the rounding mode +#ifdef __mcoldfire__ + clrl d6 +#endif + movew a1@(6),d6 | rounding mode in d6 + beq Lround$to$nearest +#ifndef __mcoldfire__ + cmpw IMM (ROUND_TO_PLUS),d6 +#else + cmpl IMM (ROUND_TO_PLUS),d6 +#endif + bhi Lround$to$minus + blt Lround$to$zero + bra Lround$to$plus +Lsubsf$1: +| Put back the exponent (we can't have overflow!). ' + bclr IMM (FLT_MANT_DIG-1),d0 +#ifndef __mcoldfire__ + lslw IMM (7),d2 +#else + lsll IMM (7),d2 +#endif + swap d2 + orl d2,d0 + bra Laddsf$ret + +| If one of the numbers was too small (difference of exponents >= +| FLT_MANT_DIG+2) we return the other (and now we don't have to ' +| check for finiteness or zero). +Laddsf$a$small: + movel a6@(12),d0 + PICLEA SYM (_fpCCR),a0 + movew IMM (0),a0@ +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 | restore data registers +#else + moveml sp@,d2-d7 + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 | and return + rts + +Laddsf$b$small: + movel a6@(8),d0 + PICLEA SYM (_fpCCR),a0 + movew IMM (0),a0@ +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 | restore data registers +#else + moveml sp@,d2-d7 + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 | and return + rts + +| If the numbers are denormalized remember to put exponent equal to 1. + +Laddsf$a$den: + movel d5,d6 | d5 contains 0x01000000 + swap d6 + bra Laddsf$1 + +Laddsf$b$den: + movel d5,d7 + swap d7 + notl d4 | make d4 into a mask for the fraction + | (this was not executed after the jump) + bra Laddsf$2 + +| The rest is mainly code for the different results which can be +| returned (checking always for +/-INFINITY and NaN). + +Laddsf$b: +| Return b (if a is zero). + movel a6@(12),d0 + cmpl IMM (0x80000000),d0 | Check if b is -0 + bne 1f + movel a0,d7 + andl IMM (0x80000000),d7 | Use the sign of a + clrl d0 + bra Laddsf$ret +Laddsf$a: +| Return a (if b is zero). + movel a6@(8),d0 +1: + moveq IMM (ADD),d5 +| We have to check for NaN and +/-infty. + movel d0,d7 + andl IMM (0x80000000),d7 | put sign in d7 + bclr IMM (31),d0 | clear sign + cmpl IMM (INFINITY),d0 | check for infty or NaN + bge 2f + movel d0,d0 | check for zero (we do this because we don't ' + bne Laddsf$ret | want to return -0 by mistake + bclr IMM (31),d7 | if zero be sure to clear sign + bra Laddsf$ret | if everything OK just return +2: +| The value to be returned is either +/-infty or NaN + andl IMM (0x007fffff),d0 | check for NaN + bne Lf$inop | if mantissa not zero is NaN + bra Lf$infty + +Laddsf$ret: +| Normal exit (a and b nonzero, result is not NaN nor +/-infty). +| We have to clear the exception flags (just the exception type). + PICLEA SYM (_fpCCR),a0 + movew IMM (0),a0@ + orl d7,d0 | put sign bit +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 | restore data registers +#else + moveml sp@,d2-d7 + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 | and return + rts + +Laddsf$ret$den: +| Return a denormalized number (for addition we don't signal underflow) ' + lsrl IMM (1),d0 | remember to shift right back once + bra Laddsf$ret | and return + +| Note: when adding two floats of the same sign if either one is +| NaN we return NaN without regard to whether the other is finite or +| not. When subtracting them (i.e., when adding two numbers of +| opposite signs) things are more complicated: if both are INFINITY +| we return NaN, if only one is INFINITY and the other is NaN we return +| NaN, but if it is finite we return INFINITY with the corresponding sign. + +Laddsf$nf: + moveq IMM (ADD),d5 +| This could be faster but it is not worth the effort, since it is not +| executed very often. We sacrifice speed for clarity here. + movel a6@(8),d0 | get the numbers back (remember that we + movel a6@(12),d1 | did some processing already) + movel IMM (INFINITY),d4 | useful constant (INFINITY) + movel d0,d2 | save sign bits + movel d1,d3 + bclr IMM (31),d0 | clear sign bits + bclr IMM (31),d1 +| We know that one of them is either NaN of +/-INFINITY +| Check for NaN (if either one is NaN return NaN) + cmpl d4,d0 | check first a (d0) + bhi Lf$inop + cmpl d4,d1 | check now b (d1) + bhi Lf$inop +| Now comes the check for +/-INFINITY. We know that both are (maybe not +| finite) numbers, but we have to check if both are infinite whether we +| are adding or subtracting them. + eorl d3,d2 | to check sign bits + bmi 1f + movel d0,d7 + andl IMM (0x80000000),d7 | get (common) sign bit + bra Lf$infty +1: +| We know one (or both) are infinite, so we test for equality between the +| two numbers (if they are equal they have to be infinite both, so we +| return NaN). + cmpl d1,d0 | are both infinite? + beq Lf$inop | if so return NaN + + movel d0,d7 + andl IMM (0x80000000),d7 | get a's sign bit ' + cmpl d4,d0 | test now for infinity + beq Lf$infty | if a is INFINITY return with this sign + bchg IMM (31),d7 | else we know b is INFINITY and has + bra Lf$infty | the opposite sign + +|============================================================================= +| __mulsf3 +|============================================================================= + +| float __mulsf3(float, float); + FUNC(__mulsf3) +SYM (__mulsf3): +#ifndef __mcoldfire__ + link a6,IMM (0) + moveml d2-d7,sp@- +#else + link a6,IMM (-24) + moveml d2-d7,sp@ +#endif + movel a6@(8),d0 | get a into d0 + movel a6@(12),d1 | and b into d1 + movel d0,d7 | d7 will hold the sign of the product + eorl d1,d7 | + andl IMM (0x80000000),d7 + movel IMM (INFINITY),d6 | useful constant (+INFINITY) + movel d6,d5 | another (mask for fraction) + notl d5 | + movel IMM (0x00800000),d4 | this is to put hidden bit back + bclr IMM (31),d0 | get rid of a's sign bit ' + movel d0,d2 | + beq Lmulsf$a$0 | branch if a is zero + bclr IMM (31),d1 | get rid of b's sign bit ' + movel d1,d3 | + beq Lmulsf$b$0 | branch if b is zero + cmpl d6,d0 | is a big? + bhi Lmulsf$inop | if a is NaN return NaN + beq Lmulsf$inf | if a is INFINITY we have to check b + cmpl d6,d1 | now compare b with INFINITY + bhi Lmulsf$inop | is b NaN? + beq Lmulsf$overflow | is b INFINITY? +| Here we have both numbers finite and nonzero (and with no sign bit). +| Now we get the exponents into d2 and d3. + andl d6,d2 | and isolate exponent in d2 + beq Lmulsf$a$den | if exponent is zero we have a denormalized + andl d5,d0 | and isolate fraction + orl d4,d0 | and put hidden bit back + swap d2 | I like exponents in the first byte +#ifndef __mcoldfire__ + lsrw IMM (7),d2 | +#else + lsrl IMM (7),d2 | +#endif +Lmulsf$1: | number + andl d6,d3 | + beq Lmulsf$b$den | + andl d5,d1 | + orl d4,d1 | + swap d3 | +#ifndef __mcoldfire__ + lsrw IMM (7),d3 | +#else + lsrl IMM (7),d3 | +#endif +Lmulsf$2: | +#ifndef __mcoldfire__ + addw d3,d2 | add exponents + subw IMM (F_BIAS+1),d2 | and subtract bias (plus one) +#else + addl d3,d2 | add exponents + subl IMM (F_BIAS+1),d2 | and subtract bias (plus one) +#endif + +| We are now ready to do the multiplication. The situation is as follows: +| both a and b have bit FLT_MANT_DIG-1 set (even if they were +| denormalized to start with!), which means that in the product +| bit 2*(FLT_MANT_DIG-1) (that is, bit 2*FLT_MANT_DIG-2-32 of the +| high long) is set. + +| To do the multiplication let us move the number a little bit around ... + movel d1,d6 | second operand in d6 + movel d0,d5 | first operand in d4-d5 + movel IMM (0),d4 + movel d4,d1 | the sums will go in d0-d1 + movel d4,d0 + +| now bit FLT_MANT_DIG-1 becomes bit 31: + lsll IMM (31-FLT_MANT_DIG+1),d6 + +| Start the loop (we loop #FLT_MANT_DIG times): + moveq IMM (FLT_MANT_DIG-1),d3 +1: addl d1,d1 | shift sum + addxl d0,d0 + lsll IMM (1),d6 | get bit bn + bcc 2f | if not set skip sum + addl d5,d1 | add a + addxl d4,d0 +2: +#ifndef __mcoldfire__ + dbf d3,1b | loop back +#else + subql IMM (1),d3 + bpl 1b +#endif + +| Now we have the product in d0-d1, with bit (FLT_MANT_DIG - 1) + FLT_MANT_DIG +| (mod 32) of d0 set. The first thing to do now is to normalize it so bit +| FLT_MANT_DIG is set (to do the rounding). +#ifndef __mcoldfire__ + rorl IMM (6),d1 + swap d1 + movew d1,d3 + andw IMM (0x03ff),d3 + andw IMM (0xfd00),d1 +#else + movel d1,d3 + lsll IMM (8),d1 + addl d1,d1 + addl d1,d1 + moveq IMM (22),d5 + lsrl d5,d3 + orl d3,d1 + andl IMM (0xfffffd00),d1 +#endif + lsll IMM (8),d0 + addl d0,d0 + addl d0,d0 +#ifndef __mcoldfire__ + orw d3,d0 +#else + orl d3,d0 +#endif + + moveq IMM (MULTIPLY),d5 + + btst IMM (FLT_MANT_DIG+1),d0 + beq Lround$exit +#ifndef __mcoldfire__ + lsrl IMM (1),d0 + roxrl IMM (1),d1 + addw IMM (1),d2 +#else + lsrl IMM (1),d1 + btst IMM (0),d0 + beq 10f + bset IMM (31),d1 +10: lsrl IMM (1),d0 + addql IMM (1),d2 +#endif + bra Lround$exit + +Lmulsf$inop: + moveq IMM (MULTIPLY),d5 + bra Lf$inop + +Lmulsf$overflow: + moveq IMM (MULTIPLY),d5 + bra Lf$overflow + +Lmulsf$inf: + moveq IMM (MULTIPLY),d5 +| If either is NaN return NaN; else both are (maybe infinite) numbers, so +| return INFINITY with the correct sign (which is in d7). + cmpl d6,d1 | is b NaN? + bhi Lf$inop | if so return NaN + bra Lf$overflow | else return +/-INFINITY + +| If either number is zero return zero, unless the other is +/-INFINITY, +| or NaN, in which case we return NaN. +Lmulsf$b$0: +| Here d1 (==b) is zero. + movel a6@(8),d1 | get a again to check for non-finiteness + bra 1f +Lmulsf$a$0: + movel a6@(12),d1 | get b again to check for non-finiteness +1: bclr IMM (31),d1 | clear sign bit + cmpl IMM (INFINITY),d1 | and check for a large exponent + bge Lf$inop | if b is +/-INFINITY or NaN return NaN + movel d7,d0 | else return signed zero + PICLEA SYM (_fpCCR),a0 | + movew IMM (0),a0@ | +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 | +#else + moveml sp@,d2-d7 + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 | + rts | + +| If a number is denormalized we put an exponent of 1 but do not put the +| hidden bit back into the fraction; instead we shift left until bit 23 +| (the hidden bit) is set, adjusting the exponent accordingly. We do this +| to ensure that the product of the fractions is close to 1. +Lmulsf$a$den: + movel IMM (1),d2 + andl d5,d0 +1: addl d0,d0 | shift a left (until bit 23 is set) +#ifndef __mcoldfire__ + subw IMM (1),d2 | and adjust exponent +#else + subql IMM (1),d2 | and adjust exponent +#endif + btst IMM (FLT_MANT_DIG-1),d0 + bne Lmulsf$1 | + bra 1b | else loop back + +Lmulsf$b$den: + movel IMM (1),d3 + andl d5,d1 +1: addl d1,d1 | shift b left until bit 23 is set +#ifndef __mcoldfire__ + subw IMM (1),d3 | and adjust exponent +#else + subql IMM (1),d3 | and adjust exponent +#endif + btst IMM (FLT_MANT_DIG-1),d1 + bne Lmulsf$2 | + bra 1b | else loop back + +|============================================================================= +| __divsf3 +|============================================================================= + +| float __divsf3(float, float); + FUNC(__divsf3) +SYM (__divsf3): +#ifndef __mcoldfire__ + link a6,IMM (0) + moveml d2-d7,sp@- +#else + link a6,IMM (-24) + moveml d2-d7,sp@ +#endif + movel a6@(8),d0 | get a into d0 + movel a6@(12),d1 | and b into d1 + movel d0,d7 | d7 will hold the sign of the result + eorl d1,d7 | + andl IMM (0x80000000),d7 | + movel IMM (INFINITY),d6 | useful constant (+INFINITY) + movel d6,d5 | another (mask for fraction) + notl d5 | + movel IMM (0x00800000),d4 | this is to put hidden bit back + bclr IMM (31),d0 | get rid of a's sign bit ' + movel d0,d2 | + beq Ldivsf$a$0 | branch if a is zero + bclr IMM (31),d1 | get rid of b's sign bit ' + movel d1,d3 | + beq Ldivsf$b$0 | branch if b is zero + cmpl d6,d0 | is a big? + bhi Ldivsf$inop | if a is NaN return NaN + beq Ldivsf$inf | if a is INFINITY we have to check b + cmpl d6,d1 | now compare b with INFINITY + bhi Ldivsf$inop | if b is NaN return NaN + beq Ldivsf$underflow +| Here we have both numbers finite and nonzero (and with no sign bit). +| Now we get the exponents into d2 and d3 and normalize the numbers to +| ensure that the ratio of the fractions is close to 1. We do this by +| making sure that bit #FLT_MANT_DIG-1 (hidden bit) is set. + andl d6,d2 | and isolate exponent in d2 + beq Ldivsf$a$den | if exponent is zero we have a denormalized + andl d5,d0 | and isolate fraction + orl d4,d0 | and put hidden bit back + swap d2 | I like exponents in the first byte +#ifndef __mcoldfire__ + lsrw IMM (7),d2 | +#else + lsrl IMM (7),d2 | +#endif +Ldivsf$1: | + andl d6,d3 | + beq Ldivsf$b$den | + andl d5,d1 | + orl d4,d1 | + swap d3 | +#ifndef __mcoldfire__ + lsrw IMM (7),d3 | +#else + lsrl IMM (7),d3 | +#endif +Ldivsf$2: | +#ifndef __mcoldfire__ + subw d3,d2 | subtract exponents + addw IMM (F_BIAS),d2 | and add bias +#else + subl d3,d2 | subtract exponents + addl IMM (F_BIAS),d2 | and add bias +#endif + +| We are now ready to do the division. We have prepared things in such a way +| that the ratio of the fractions will be less than 2 but greater than 1/2. +| At this point the registers in use are: +| d0 holds a (first operand, bit FLT_MANT_DIG=0, bit FLT_MANT_DIG-1=1) +| d1 holds b (second operand, bit FLT_MANT_DIG=1) +| d2 holds the difference of the exponents, corrected by the bias +| d7 holds the sign of the ratio +| d4, d5, d6 hold some constants + movel d7,a0 | d6-d7 will hold the ratio of the fractions + movel IMM (0),d6 | + movel d6,d7 + + moveq IMM (FLT_MANT_DIG+1),d3 +1: cmpl d0,d1 | is a < b? + bhi 2f | + bset d3,d6 | set a bit in d6 + subl d1,d0 | if a >= b a <-- a-b + beq 3f | if a is zero, exit +2: addl d0,d0 | multiply a by 2 +#ifndef __mcoldfire__ + dbra d3,1b +#else + subql IMM (1),d3 + bpl 1b +#endif + +| Now we keep going to set the sticky bit ... + moveq IMM (FLT_MANT_DIG),d3 +1: cmpl d0,d1 + ble 2f + addl d0,d0 +#ifndef __mcoldfire__ + dbra d3,1b +#else + subql IMM(1),d3 + bpl 1b +#endif + movel IMM (0),d1 + bra 3f +2: movel IMM (0),d1 +#ifndef __mcoldfire__ + subw IMM (FLT_MANT_DIG),d3 + addw IMM (31),d3 +#else + subl IMM (FLT_MANT_DIG),d3 + addl IMM (31),d3 +#endif + bset d3,d1 +3: + movel d6,d0 | put the ratio in d0-d1 + movel a0,d7 | get sign back + +| Because of the normalization we did before we are guaranteed that +| d0 is smaller than 2^26 but larger than 2^24. Thus bit 26 is not set, +| bit 25 could be set, and if it is not set then bit 24 is necessarily set. + btst IMM (FLT_MANT_DIG+1),d0 + beq 1f | if it is not set, then bit 24 is set + lsrl IMM (1),d0 | +#ifndef __mcoldfire__ + addw IMM (1),d2 | +#else + addl IMM (1),d2 | +#endif +1: +| Now round, check for over- and underflow, and exit. + moveq IMM (DIVIDE),d5 + bra Lround$exit + +Ldivsf$inop: + moveq IMM (DIVIDE),d5 + bra Lf$inop + +Ldivsf$overflow: + moveq IMM (DIVIDE),d5 + bra Lf$overflow + +Ldivsf$underflow: + moveq IMM (DIVIDE),d5 + bra Lf$underflow + +Ldivsf$a$0: + moveq IMM (DIVIDE),d5 +| If a is zero check to see whether b is zero also. In that case return +| NaN; then check if b is NaN, and return NaN also in that case. Else +| return a properly signed zero. + andl IMM (0x7fffffff),d1 | clear sign bit and test b + beq Lf$inop | if b is also zero return NaN + cmpl IMM (INFINITY),d1 | check for NaN + bhi Lf$inop | + movel d7,d0 | else return signed zero + PICLEA SYM (_fpCCR),a0 | + movew IMM (0),a0@ | +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 | +#else + moveml sp@,d2-d7 | + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 | + rts | + +Ldivsf$b$0: + moveq IMM (DIVIDE),d5 +| If we got here a is not zero. Check if a is NaN; in that case return NaN, +| else return +/-INFINITY. Remember that a is in d0 with the sign bit +| cleared already. + cmpl IMM (INFINITY),d0 | compare d0 with INFINITY + bhi Lf$inop | if larger it is NaN + bra Lf$div$0 | else signal DIVIDE_BY_ZERO + +Ldivsf$inf: + moveq IMM (DIVIDE),d5 +| If a is INFINITY we have to check b + cmpl IMM (INFINITY),d1 | compare b with INFINITY + bge Lf$inop | if b is NaN or INFINITY return NaN + bra Lf$overflow | else return overflow + +| If a number is denormalized we put an exponent of 1 but do not put the +| bit back into the fraction. +Ldivsf$a$den: + movel IMM (1),d2 + andl d5,d0 +1: addl d0,d0 | shift a left until bit FLT_MANT_DIG-1 is set +#ifndef __mcoldfire__ + subw IMM (1),d2 | and adjust exponent +#else + subl IMM (1),d2 | and adjust exponent +#endif + btst IMM (FLT_MANT_DIG-1),d0 + bne Ldivsf$1 + bra 1b + +Ldivsf$b$den: + movel IMM (1),d3 + andl d5,d1 +1: addl d1,d1 | shift b left until bit FLT_MANT_DIG is set +#ifndef __mcoldfire__ + subw IMM (1),d3 | and adjust exponent +#else + subl IMM (1),d3 | and adjust exponent +#endif + btst IMM (FLT_MANT_DIG-1),d1 + bne Ldivsf$2 + bra 1b + +Lround$exit: +| This is a common exit point for __mulsf3 and __divsf3. + +| First check for underlow in the exponent: +#ifndef __mcoldfire__ + cmpw IMM (-FLT_MANT_DIG-1),d2 +#else + cmpl IMM (-FLT_MANT_DIG-1),d2 +#endif + blt Lf$underflow +| It could happen that the exponent is less than 1, in which case the +| number is denormalized. In this case we shift right and adjust the +| exponent until it becomes 1 or the fraction is zero (in the latter case +| we signal underflow and return zero). + movel IMM (0),d6 | d6 is used temporarily +#ifndef __mcoldfire__ + cmpw IMM (1),d2 | if the exponent is less than 1 we +#else + cmpl IMM (1),d2 | if the exponent is less than 1 we +#endif + bge 2f | have to shift right (denormalize) +1: +#ifndef __mcoldfire__ + addw IMM (1),d2 | adjust the exponent + lsrl IMM (1),d0 | shift right once + roxrl IMM (1),d1 | + roxrl IMM (1),d6 | d6 collect bits we would lose otherwise + cmpw IMM (1),d2 | is the exponent 1 already? +#else + addql IMM (1),d2 | adjust the exponent + lsrl IMM (1),d6 + btst IMM (0),d1 + beq 11f + bset IMM (31),d6 +11: lsrl IMM (1),d1 + btst IMM (0),d0 + beq 10f + bset IMM (31),d1 +10: lsrl IMM (1),d0 + cmpl IMM (1),d2 | is the exponent 1 already? +#endif + beq 2f | if not loop back + bra 1b | + bra Lf$underflow | safety check, shouldn't execute ' +2: orl d6,d1 | this is a trick so we don't lose ' + | the extra bits which were flushed right +| Now call the rounding routine (which takes care of denormalized numbers): + lea pc@(Lround$0),a0 | to return from rounding routine + PICLEA SYM (_fpCCR),a1 | check the rounding mode +#ifdef __mcoldfire__ + clrl d6 +#endif + movew a1@(6),d6 | rounding mode in d6 + beq Lround$to$nearest +#ifndef __mcoldfire__ + cmpw IMM (ROUND_TO_PLUS),d6 +#else + cmpl IMM (ROUND_TO_PLUS),d6 +#endif + bhi Lround$to$minus + blt Lround$to$zero + bra Lround$to$plus +Lround$0: +| Here we have a correctly rounded result (either normalized or denormalized). + +| Here we should have either a normalized number or a denormalized one, and +| the exponent is necessarily larger or equal to 1 (so we don't have to ' +| check again for underflow!). We have to check for overflow or for a +| denormalized number (which also signals underflow). +| Check for overflow (i.e., exponent >= 255). +#ifndef __mcoldfire__ + cmpw IMM (0x00ff),d2 +#else + cmpl IMM (0x00ff),d2 +#endif + bge Lf$overflow +| Now check for a denormalized number (exponent==0). + movew d2,d2 + beq Lf$den +1: +| Put back the exponents and sign and return. +#ifndef __mcoldfire__ + lslw IMM (7),d2 | exponent back to fourth byte +#else + lsll IMM (7),d2 | exponent back to fourth byte +#endif + bclr IMM (FLT_MANT_DIG-1),d0 + swap d0 | and put back exponent +#ifndef __mcoldfire__ + orw d2,d0 | +#else + orl d2,d0 +#endif + swap d0 | + orl d7,d0 | and sign also + + PICLEA SYM (_fpCCR),a0 + movew IMM (0),a0@ +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 +#else + moveml sp@,d2-d7 + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 + rts + +|============================================================================= +| __negsf2 +|============================================================================= + +| This is trivial and could be shorter if we didn't bother checking for NaN ' +| and +/-INFINITY. + +| float __negsf2(float); + FUNC(__negsf2) +SYM (__negsf2): +#ifndef __mcoldfire__ + link a6,IMM (0) + moveml d2-d7,sp@- +#else + link a6,IMM (-24) + moveml d2-d7,sp@ +#endif + moveq IMM (NEGATE),d5 + movel a6@(8),d0 | get number to negate in d0 + bchg IMM (31),d0 | negate + movel d0,d1 | make a positive copy + bclr IMM (31),d1 | + tstl d1 | check for zero + beq 2f | if zero (either sign) return +zero + cmpl IMM (INFINITY),d1 | compare to +INFINITY + blt 1f | + bhi Lf$inop | if larger (fraction not zero) is NaN + movel d0,d7 | else get sign and return INFINITY + andl IMM (0x80000000),d7 + bra Lf$infty +1: PICLEA SYM (_fpCCR),a0 + movew IMM (0),a0@ +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 +#else + moveml sp@,d2-d7 + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 + rts +2: bclr IMM (31),d0 + bra 1b + +|============================================================================= +| __cmpsf2 +|============================================================================= + +GREATER = 1 +LESS = -1 +EQUAL = 0 + +| int __cmpsf2_internal(float, float, int); +SYM (__cmpsf2_internal): +#ifndef __mcoldfire__ + link a6,IMM (0) + moveml d2-d7,sp@- | save registers +#else + link a6,IMM (-24) + moveml d2-d7,sp@ +#endif + moveq IMM (COMPARE),d5 + movel a6@(8),d0 | get first operand + movel a6@(12),d1 | get second operand +| Check if either is NaN, and in that case return garbage and signal +| INVALID_OPERATION. Check also if either is zero, and clear the signs +| if necessary. + movel d0,d6 + andl IMM (0x7fffffff),d0 + beq Lcmpsf$a$0 + cmpl IMM (0x7f800000),d0 + bhi Lcmpf$inop +Lcmpsf$1: + movel d1,d7 + andl IMM (0x7fffffff),d1 + beq Lcmpsf$b$0 + cmpl IMM (0x7f800000),d1 + bhi Lcmpf$inop +Lcmpsf$2: +| Check the signs + eorl d6,d7 + bpl 1f +| If the signs are not equal check if a >= 0 + tstl d6 + bpl Lcmpsf$a$gt$b | if (a >= 0 && b < 0) => a > b + bmi Lcmpsf$b$gt$a | if (a < 0 && b >= 0) => a < b +1: +| If the signs are equal check for < 0 + tstl d6 + bpl 1f +| If both are negative exchange them +#ifndef __mcoldfire__ + exg d0,d1 +#else + movel d0,d7 + movel d1,d0 + movel d7,d1 +#endif +1: +| Now that they are positive we just compare them as longs (does this also +| work for denormalized numbers?). + cmpl d0,d1 + bhi Lcmpsf$b$gt$a | |b| > |a| + bne Lcmpsf$a$gt$b | |b| < |a| +| If we got here a == b. + movel IMM (EQUAL),d0 +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 | put back the registers +#else + moveml sp@,d2-d7 +#endif + unlk a6 + rts +Lcmpsf$a$gt$b: + movel IMM (GREATER),d0 +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 | put back the registers +#else + moveml sp@,d2-d7 + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 + rts +Lcmpsf$b$gt$a: + movel IMM (LESS),d0 +#ifndef __mcoldfire__ + moveml sp@+,d2-d7 | put back the registers +#else + moveml sp@,d2-d7 + | XXX if frame pointer is ever removed, stack pointer must + | be adjusted here. +#endif + unlk a6 + rts + +Lcmpsf$a$0: + bclr IMM (31),d6 + bra Lcmpsf$1 +Lcmpsf$b$0: + bclr IMM (31),d7 + bra Lcmpsf$2 + +Lcmpf$inop: + movl a6@(16),d0 + moveq IMM (INEXACT_RESULT+INVALID_OPERATION),d7 + moveq IMM (SINGLE_FLOAT),d6 + PICJUMP $_exception_handler + +| int __cmpsf2(float, float); + FUNC(__cmpsf2) +SYM (__cmpsf2): + link a6,IMM (0) + pea 1 + movl a6@(12),sp@- + movl a6@(8),sp@- + PICCALL SYM (__cmpsf2_internal) + unlk a6 + rts + +|============================================================================= +| rounding routines +|============================================================================= + +| The rounding routines expect the number to be normalized in registers +| d0-d1, with the exponent in register d2. They assume that the +| exponent is larger or equal to 1. They return a properly normalized number +| if possible, and a denormalized number otherwise. The exponent is returned +| in d2. + +Lround$to$nearest: +| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"): +| Here we assume that the exponent is not too small (this should be checked +| before entering the rounding routine), but the number could be denormalized. + +| Check for denormalized numbers: +1: btst IMM (FLT_MANT_DIG),d0 + bne 2f | if set the number is normalized +| Normalize shifting left until bit #FLT_MANT_DIG is set or the exponent +| is one (remember that a denormalized number corresponds to an +| exponent of -F_BIAS+1). +#ifndef __mcoldfire__ + cmpw IMM (1),d2 | remember that the exponent is at least one +#else + cmpl IMM (1),d2 | remember that the exponent is at least one +#endif + beq 2f | an exponent of one means denormalized + addl d1,d1 | else shift and adjust the exponent + addxl d0,d0 | +#ifndef __mcoldfire__ + dbra d2,1b | +#else + subql IMM (1),d2 + bpl 1b +#endif +2: +| Now round: we do it as follows: after the shifting we can write the +| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2. +| If delta < 1, do nothing. If delta > 1, add 1 to f. +| If delta == 1, we make sure the rounded number will be even (odd?) +| (after shifting). + btst IMM (0),d0 | is delta < 1? + beq 2f | if so, do not do anything + tstl d1 | is delta == 1? + bne 1f | if so round to even + movel d0,d1 | + andl IMM (2),d1 | bit 1 is the last significant bit + addl d1,d0 | + bra 2f | +1: movel IMM (1),d1 | else add 1 + addl d1,d0 | +| Shift right once (because we used bit #FLT_MANT_DIG!). +2: lsrl IMM (1),d0 +| Now check again bit #FLT_MANT_DIG (rounding could have produced a +| 'fraction overflow' ...). + btst IMM (FLT_MANT_DIG),d0 + beq 1f + lsrl IMM (1),d0 +#ifndef __mcoldfire__ + addw IMM (1),d2 +#else + addql IMM (1),d2 +#endif +1: +| If bit #FLT_MANT_DIG-1 is clear we have a denormalized number, so we +| have to put the exponent to zero and return a denormalized number. + btst IMM (FLT_MANT_DIG-1),d0 + beq 1f + jmp a0@ +1: movel IMM (0),d2 + jmp a0@ + +Lround$to$zero: +Lround$to$plus: +Lround$to$minus: + jmp a0@ +#endif /* L_float */ + +| gcc expects the routines __eqdf2, __nedf2, __gtdf2, __gedf2, +| __ledf2, __ltdf2 to all return the same value as a direct call to +| __cmpdf2 would. In this implementation, each of these routines +| simply calls __cmpdf2. It would be more efficient to give the +| __cmpdf2 routine several names, but separating them out will make it +| easier to write efficient versions of these routines someday. +| If the operands recompare unordered unordered __gtdf2 and __gedf2 return -1. +| The other routines return 1. + +#ifdef L_eqdf2 + .text + FUNC(__eqdf2) + .globl SYM (__eqdf2) +SYM (__eqdf2): + link a6,IMM (0) + pea 1 + movl a6@(20),sp@- + movl a6@(16),sp@- + movl a6@(12),sp@- + movl a6@(8),sp@- + PICCALL SYM (__cmpdf2_internal) + unlk a6 + rts +#endif /* L_eqdf2 */ + +#ifdef L_nedf2 + .text + FUNC(__nedf2) + .globl SYM (__nedf2) +SYM (__nedf2): + link a6,IMM (0) + pea 1 + movl a6@(20),sp@- + movl a6@(16),sp@- + movl a6@(12),sp@- + movl a6@(8),sp@- + PICCALL SYM (__cmpdf2_internal) + unlk a6 + rts +#endif /* L_nedf2 */ + +#ifdef L_gtdf2 + .text + FUNC(__gtdf2) + .globl SYM (__gtdf2) +SYM (__gtdf2): + link a6,IMM (0) + pea -1 + movl a6@(20),sp@- + movl a6@(16),sp@- + movl a6@(12),sp@- + movl a6@(8),sp@- + PICCALL SYM (__cmpdf2_internal) + unlk a6 + rts +#endif /* L_gtdf2 */ + +#ifdef L_gedf2 + .text + FUNC(__gedf2) + .globl SYM (__gedf2) +SYM (__gedf2): + link a6,IMM (0) + pea -1 + movl a6@(20),sp@- + movl a6@(16),sp@- + movl a6@(12),sp@- + movl a6@(8),sp@- + PICCALL SYM (__cmpdf2_internal) + unlk a6 + rts +#endif /* L_gedf2 */ + +#ifdef L_ltdf2 + .text + FUNC(__ltdf2) + .globl SYM (__ltdf2) +SYM (__ltdf2): + link a6,IMM (0) + pea 1 + movl a6@(20),sp@- + movl a6@(16),sp@- + movl a6@(12),sp@- + movl a6@(8),sp@- + PICCALL SYM (__cmpdf2_internal) + unlk a6 + rts +#endif /* L_ltdf2 */ + +#ifdef L_ledf2 + .text + FUNC(__ledf2) + .globl SYM (__ledf2) +SYM (__ledf2): + link a6,IMM (0) + pea 1 + movl a6@(20),sp@- + movl a6@(16),sp@- + movl a6@(12),sp@- + movl a6@(8),sp@- + PICCALL SYM (__cmpdf2_internal) + unlk a6 + rts +#endif /* L_ledf2 */ + +| The comments above about __eqdf2, et. al., also apply to __eqsf2, +| et. al., except that the latter call __cmpsf2 rather than __cmpdf2. + +#ifdef L_eqsf2 + .text + FUNC(__eqsf2) + .globl SYM (__eqsf2) +SYM (__eqsf2): + link a6,IMM (0) + pea 1 + movl a6@(12),sp@- + movl a6@(8),sp@- + PICCALL SYM (__cmpsf2_internal) + unlk a6 + rts +#endif /* L_eqsf2 */ + +#ifdef L_nesf2 + .text + FUNC(__nesf2) + .globl SYM (__nesf2) +SYM (__nesf2): + link a6,IMM (0) + pea 1 + movl a6@(12),sp@- + movl a6@(8),sp@- + PICCALL SYM (__cmpsf2_internal) + unlk a6 + rts +#endif /* L_nesf2 */ + +#ifdef L_gtsf2 + .text + FUNC(__gtsf2) + .globl SYM (__gtsf2) +SYM (__gtsf2): + link a6,IMM (0) + pea -1 + movl a6@(12),sp@- + movl a6@(8),sp@- + PICCALL SYM (__cmpsf2_internal) + unlk a6 + rts +#endif /* L_gtsf2 */ + +#ifdef L_gesf2 + .text + FUNC(__gesf2) + .globl SYM (__gesf2) +SYM (__gesf2): + link a6,IMM (0) + pea -1 + movl a6@(12),sp@- + movl a6@(8),sp@- + PICCALL SYM (__cmpsf2_internal) + unlk a6 + rts +#endif /* L_gesf2 */ + +#ifdef L_ltsf2 + .text + FUNC(__ltsf2) + .globl SYM (__ltsf2) +SYM (__ltsf2): + link a6,IMM (0) + pea 1 + movl a6@(12),sp@- + movl a6@(8),sp@- + PICCALL SYM (__cmpsf2_internal) + unlk a6 + rts +#endif /* L_ltsf2 */ + +#ifdef L_lesf2 + .text + FUNC(__lesf2) + .globl SYM (__lesf2) +SYM (__lesf2): + link a6,IMM (0) + pea 1 + movl a6@(12),sp@- + movl a6@(8),sp@- + PICCALL SYM (__cmpsf2_internal) + unlk a6 + rts +#endif /* L_lesf2 */ + +#if defined (__ELF__) && defined (__linux__) + /* Make stack non-executable for ELF linux targets. */ + .section .note.GNU-stack,"",@progbits +#endif diff --git a/libgcc/config/m68k/t-floatlib b/libgcc/config/m68k/t-floatlib new file mode 100644 index 00000000000..4160eb9f537 --- /dev/null +++ b/libgcc/config/m68k/t-floatlib @@ -0,0 +1,5 @@ +LIB1ASMSRC = m68k/lb1sf68.S +LIB1ASMFUNCS = _mulsi3 _udivsi3 _divsi3 _umodsi3 _modsi3 \ + _double _float _floatex \ + _eqdf2 _nedf2 _gtdf2 _gedf2 _ltdf2 _ledf2 \ + _eqsf2 _nesf2 _gtsf2 _gesf2 _ltsf2 _lesf2 diff --git a/libgcc/config/mcore/lib1funcs.S b/libgcc/config/mcore/lib1funcs.S new file mode 100644 index 00000000000..701762f2a3c --- /dev/null +++ b/libgcc/config/mcore/lib1funcs.S @@ -0,0 +1,303 @@ +/* libgcc routines for the MCore. + Copyright (C) 1993, 1999, 2000, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#define CONCAT1(a, b) CONCAT2(a, b) +#define CONCAT2(a, b) a ## b + +/* Use the right prefix for global labels. */ + +#define SYM(x) CONCAT1 (__, x) + +#ifdef __ELF__ +#define TYPE(x) .type SYM (x),@function +#define SIZE(x) .size SYM (x), . - SYM (x) +#else +#define TYPE(x) +#define SIZE(x) +#endif + +.macro FUNC_START name + .text + .globl SYM (\name) + TYPE (\name) +SYM (\name): +.endm + +.macro FUNC_END name + SIZE (\name) +.endm + +#ifdef L_udivsi3 +FUNC_START udiv32 +FUNC_START udivsi32 + + movi r1,0 // r1-r2 form 64 bit dividend + movi r4,1 // r4 is quotient (1 for a sentinel) + + cmpnei r3,0 // look for 0 divisor + bt 9f + trap 3 // divide by 0 +9: + // control iterations; skip across high order 0 bits in dividend + mov r7,r2 + cmpnei r7,0 + bt 8f + movi r2,0 // 0 dividend + jmp r15 // quick return +8: + ff1 r7 // figure distance to skip + lsl r4,r7 // move the sentinel along (with 0's behind) + lsl r2,r7 // and the low 32 bits of numerator + +// appears to be wrong... +// tested out incorrectly in our OS work... +// mov r7,r3 // looking at divisor +// ff1 r7 // I can move 32-r7 more bits to left. +// addi r7,1 // ok, one short of that... +// mov r1,r2 +// lsr r1,r7 // bits that came from low order... +// rsubi r7,31 // r7 == "32-n" == LEFT distance +// addi r7,1 // this is (32-n) +// lsl r4,r7 // fixes the high 32 (quotient) +// lsl r2,r7 +// cmpnei r4,0 +// bf 4f // the sentinel went away... + + // run the remaining bits + +1: lslc r2,1 // 1 bit left shift of r1-r2 + addc r1,r1 + cmphs r1,r3 // upper 32 of dividend >= divisor? + bf 2f + sub r1,r3 // if yes, subtract divisor +2: addc r4,r4 // shift by 1 and count subtracts + bf 1b // if sentinel falls out of quotient, stop + +4: mov r2,r4 // return quotient + mov r3,r1 // and piggyback the remainder + jmp r15 +FUNC_END udiv32 +FUNC_END udivsi32 +#endif + +#ifdef L_umodsi3 +FUNC_START urem32 +FUNC_START umodsi3 + movi r1,0 // r1-r2 form 64 bit dividend + movi r4,1 // r4 is quotient (1 for a sentinel) + cmpnei r3,0 // look for 0 divisor + bt 9f + trap 3 // divide by 0 +9: + // control iterations; skip across high order 0 bits in dividend + mov r7,r2 + cmpnei r7,0 + bt 8f + movi r2,0 // 0 dividend + jmp r15 // quick return +8: + ff1 r7 // figure distance to skip + lsl r4,r7 // move the sentinel along (with 0's behind) + lsl r2,r7 // and the low 32 bits of numerator + +1: lslc r2,1 // 1 bit left shift of r1-r2 + addc r1,r1 + cmphs r1,r3 // upper 32 of dividend >= divisor? + bf 2f + sub r1,r3 // if yes, subtract divisor +2: addc r4,r4 // shift by 1 and count subtracts + bf 1b // if sentinel falls out of quotient, stop + mov r2,r1 // return remainder + jmp r15 +FUNC_END urem32 +FUNC_END umodsi3 +#endif + +#ifdef L_divsi3 +FUNC_START div32 +FUNC_START divsi3 + mov r5,r2 // calc sign of quotient + xor r5,r3 + abs r2 // do unsigned divide + abs r3 + movi r1,0 // r1-r2 form 64 bit dividend + movi r4,1 // r4 is quotient (1 for a sentinel) + cmpnei r3,0 // look for 0 divisor + bt 9f + trap 3 // divide by 0 +9: + // control iterations; skip across high order 0 bits in dividend + mov r7,r2 + cmpnei r7,0 + bt 8f + movi r2,0 // 0 dividend + jmp r15 // quick return +8: + ff1 r7 // figure distance to skip + lsl r4,r7 // move the sentinel along (with 0's behind) + lsl r2,r7 // and the low 32 bits of numerator + +// tested out incorrectly in our OS work... +// mov r7,r3 // looking at divisor +// ff1 r7 // I can move 32-r7 more bits to left. +// addi r7,1 // ok, one short of that... +// mov r1,r2 +// lsr r1,r7 // bits that came from low order... +// rsubi r7,31 // r7 == "32-n" == LEFT distance +// addi r7,1 // this is (32-n) +// lsl r4,r7 // fixes the high 32 (quotient) +// lsl r2,r7 +// cmpnei r4,0 +// bf 4f // the sentinel went away... + + // run the remaining bits +1: lslc r2,1 // 1 bit left shift of r1-r2 + addc r1,r1 + cmphs r1,r3 // upper 32 of dividend >= divisor? + bf 2f + sub r1,r3 // if yes, subtract divisor +2: addc r4,r4 // shift by 1 and count subtracts + bf 1b // if sentinel falls out of quotient, stop + +4: mov r2,r4 // return quotient + mov r3,r1 // piggyback the remainder + btsti r5,31 // after adjusting for sign + bf 3f + rsubi r2,0 + rsubi r3,0 +3: jmp r15 +FUNC_END div32 +FUNC_END divsi3 +#endif + +#ifdef L_modsi3 +FUNC_START rem32 +FUNC_START modsi3 + mov r5,r2 // calc sign of remainder + abs r2 // do unsigned divide + abs r3 + movi r1,0 // r1-r2 form 64 bit dividend + movi r4,1 // r4 is quotient (1 for a sentinel) + cmpnei r3,0 // look for 0 divisor + bt 9f + trap 3 // divide by 0 +9: + // control iterations; skip across high order 0 bits in dividend + mov r7,r2 + cmpnei r7,0 + bt 8f + movi r2,0 // 0 dividend + jmp r15 // quick return +8: + ff1 r7 // figure distance to skip + lsl r4,r7 // move the sentinel along (with 0's behind) + lsl r2,r7 // and the low 32 bits of numerator + +1: lslc r2,1 // 1 bit left shift of r1-r2 + addc r1,r1 + cmphs r1,r3 // upper 32 of dividend >= divisor? + bf 2f + sub r1,r3 // if yes, subtract divisor +2: addc r4,r4 // shift by 1 and count subtracts + bf 1b // if sentinel falls out of quotient, stop + mov r2,r1 // return remainder + btsti r5,31 // after adjusting for sign + bf 3f + rsubi r2,0 +3: jmp r15 +FUNC_END rem32 +FUNC_END modsi3 +#endif + + +/* GCC expects that {__eq,__ne,__gt,__ge,__le,__lt}{df2,sf2} + will behave as __cmpdf2. So, we stub the implementations to + jump on to __cmpdf2 and __cmpsf2. + + All of these shortcircuit the return path so that __cmp{sd}f2 + will go directly back to the caller. */ + +.macro COMPARE_DF_JUMP name + .import SYM (cmpdf2) +FUNC_START \name + jmpi SYM (cmpdf2) +FUNC_END \name +.endm + +#ifdef L_eqdf2 +COMPARE_DF_JUMP eqdf2 +#endif /* L_eqdf2 */ + +#ifdef L_nedf2 +COMPARE_DF_JUMP nedf2 +#endif /* L_nedf2 */ + +#ifdef L_gtdf2 +COMPARE_DF_JUMP gtdf2 +#endif /* L_gtdf2 */ + +#ifdef L_gedf2 +COMPARE_DF_JUMP gedf2 +#endif /* L_gedf2 */ + +#ifdef L_ltdf2 +COMPARE_DF_JUMP ltdf2 +#endif /* L_ltdf2 */ + +#ifdef L_ledf2 +COMPARE_DF_JUMP ledf2 +#endif /* L_ledf2 */ + +/* SINGLE PRECISION FLOATING POINT STUBS */ + +.macro COMPARE_SF_JUMP name + .import SYM (cmpsf2) +FUNC_START \name + jmpi SYM (cmpsf2) +FUNC_END \name +.endm + +#ifdef L_eqsf2 +COMPARE_SF_JUMP eqsf2 +#endif /* L_eqsf2 */ + +#ifdef L_nesf2 +COMPARE_SF_JUMP nesf2 +#endif /* L_nesf2 */ + +#ifdef L_gtsf2 +COMPARE_SF_JUMP gtsf2 +#endif /* L_gtsf2 */ + +#ifdef L_gesf2 +COMPARE_SF_JUMP __gesf2 +#endif /* L_gesf2 */ + +#ifdef L_ltsf2 +COMPARE_SF_JUMP __ltsf2 +#endif /* L_ltsf2 */ + +#ifdef L_lesf2 +COMPARE_SF_JUMP lesf2 +#endif /* L_lesf2 */ diff --git a/libgcc/config/mcore/t-mcore b/libgcc/config/mcore/t-mcore new file mode 100644 index 00000000000..19c4c15cd0b --- /dev/null +++ b/libgcc/config/mcore/t-mcore @@ -0,0 +1,2 @@ +LIB1ASMSRC = mcore/lib1funcs.S +LIB1ASMFUNCS = _divsi3 _udivsi3 _modsi3 _umodsi3 diff --git a/libgcc/config/mep/lib1funcs.S b/libgcc/config/mep/lib1funcs.S new file mode 100644 index 00000000000..0a18913f927 --- /dev/null +++ b/libgcc/config/mep/lib1funcs.S @@ -0,0 +1,125 @@ +/* libgcc routines for Toshiba Media Processor. + Copyright (C) 2001, 2002, 2005, 2009 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#define SAVEALL \ + add3 $sp, $sp, -16*4 ; \ + sw $0, ($sp) ; \ + sw $1, 4($sp) ; \ + sw $2, 8($sp) ; \ + sw $3, 12($sp) ; \ + sw $4, 16($sp) ; \ + sw $5, 20($sp) ; \ + sw $6, 24($sp) ; \ + sw $7, 28($sp) ; \ + sw $8, 32($sp) ; \ + sw $9, 36($sp) ; \ + sw $10, 40($sp) ; \ + sw $11, 44($sp) ; \ + sw $12, 48($sp) ; \ + sw $13, 52($sp) ; \ + sw $14, 56($sp) ; \ + ldc $5, $lp ; \ + add $5, 3 ; \ + mov $6, -4 ; \ + and $5, $6 + +#define RESTOREALL \ + stc $5, $lp ; \ + lw $14, 56($sp) ; \ + lw $13, 52($sp) ; \ + lw $12, 48($sp) ; \ + lw $11, 44($sp) ; \ + lw $10, 40($sp) ; \ + lw $9, 36($sp) ; \ + lw $8, 32($sp) ; \ + lw $7, 28($sp) ; \ + lw $6, 24($sp) ; \ + lw $5, 20($sp) ; \ + lw $4, 16($sp) ; \ + lw $3, 12($sp) ; \ + lw $2, 8($sp) ; \ + lw $1, 4($sp) ; \ + lw $0, ($sp) ; \ + add3 $sp, $sp, 16*4 ; \ + ret + +#ifdef L_mep_profile + .text + .global __mep_mcount +__mep_mcount: + SAVEALL + ldc $1, $lp + mov $2, $0 + bsr __mep_mcount_2 + RESTOREALL +#endif + +#ifdef L_mep_bb_init_trace + .text + .global __mep_bb_init_trace_func +__mep_bb_init_trace_func: + SAVEALL + lw $1, ($5) + lw $2, 4($5) + add $5, 8 + bsr __bb_init_trace_func + RESTOREALL +#endif + +#ifdef L_mep_bb_init + .text + .global __mep_bb_init_func +__mep_bb_init_func: + SAVEALL + lw $1, ($5) + add $5, 4 + bsr __bb_init_func + RESTOREALL +#endif + +#ifdef L_mep_bb_trace + .text + .global __mep_bb_trace_func +__mep_bb_trace_func: + SAVEALL + movu $3, __bb + lw $1, ($5) + sw $1, ($3) + lw $2, 4($5) + sw $2, 4($3) + add $5, 8 + bsr __bb_trace_func + RESTOREALL +#endif + +#ifdef L_mep_bb_increment + .text + .global __mep_bb_increment_func +__mep_bb_increment_func: + SAVEALL + lw $1, ($5) + lw $0, ($1) + add $0, 1 + sw $0, ($1) + add $5, 4 + RESTOREALL +#endif diff --git a/libgcc/config/mep/t-mep b/libgcc/config/mep/t-mep index 36e6f5dc771..d1fb094a41e 100644 --- a/libgcc/config/mep/t-mep +++ b/libgcc/config/mep/t-mep @@ -1,2 +1,11 @@ +# profiling support +LIB1ASMSRC = mep/lib1funcs.S + +LIB1ASMFUNCS = _mep_profile \ + _mep_bb_init_trace \ + _mep_bb_init \ + _mep_bb_trace \ + _mep_bb_increment + # Use -O0 instead of -O2 so we don't get complex relocations CRTSTUFF_CFLAGS += -O0 diff --git a/libgcc/config/mips/mips16.S b/libgcc/config/mips/mips16.S new file mode 100644 index 00000000000..ec331b5f65e --- /dev/null +++ b/libgcc/config/mips/mips16.S @@ -0,0 +1,712 @@ +/* mips16 floating point support code + Copyright (C) 1996, 1997, 1998, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by Cygnus Support + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* This file contains mips16 floating point support functions. These + functions are called by mips16 code to handle floating point when + -msoft-float is not used. They accept the arguments and return + values using the soft-float calling convention, but do the actual + operation using the hard floating point instructions. */ + +#if defined _MIPS_SIM && (_MIPS_SIM == _ABIO32 || _MIPS_SIM == _ABIO64) + +/* This file contains 32-bit assembly code. */ + .set nomips16 + +/* Start a function. */ + +#define STARTFN(NAME) .globl NAME; .ent NAME; NAME: + +/* Finish a function. */ + +#define ENDFN(NAME) .end NAME + +/* ARG1 + The FPR that holds the first floating-point argument. + + ARG2 + The FPR that holds the second floating-point argument. + + RET + The FPR that holds a floating-point return value. */ + +#define RET $f0 +#define ARG1 $f12 +#ifdef __mips64 +#define ARG2 $f13 +#else +#define ARG2 $f14 +#endif + +/* Set 64-bit register GPR so that its high 32 bits contain HIGH_FPR + and so that its low 32 bits contain LOW_FPR. */ +#define MERGE_GPRf(GPR, HIGH_FPR, LOW_FPR) \ + .set noat; \ + mfc1 $1, LOW_FPR; \ + mfc1 GPR, HIGH_FPR; \ + dsll $1, $1, 32; \ + dsll GPR, GPR, 32; \ + dsrl $1, $1, 32; \ + or GPR, GPR, $1; \ + .set at + +/* Move the high 32 bits of GPR to HIGH_FPR and the low 32 bits of + GPR to LOW_FPR. */ +#define MERGE_GPRt(GPR, HIGH_FPR, LOW_FPR) \ + .set noat; \ + dsrl $1, GPR, 32; \ + mtc1 GPR, LOW_FPR; \ + mtc1 $1, HIGH_FPR; \ + .set at + +/* Jump to T, and use "OPCODE, OP2" to implement a delayed move. */ +#define DELAYt(T, OPCODE, OP2) \ + .set noreorder; \ + jr T; \ + OPCODE, OP2; \ + .set reorder + +/* Use "OPCODE. OP2" and jump to T. */ +#define DELAYf(T, OPCODE, OP2) OPCODE, OP2; jr T + +/* MOVE_SF_BYTE0(D) + Move the first single-precision floating-point argument between + GPRs and FPRs. + + MOVE_SI_BYTE0(D) + Likewise the first single-precision integer argument. + + MOVE_SF_BYTE4(D) + Move the second single-precision floating-point argument between + GPRs and FPRs, given that the first argument occupies 4 bytes. + + MOVE_SF_BYTE8(D) + Move the second single-precision floating-point argument between + GPRs and FPRs, given that the first argument occupies 8 bytes. + + MOVE_DF_BYTE0(D) + Move the first double-precision floating-point argument between + GPRs and FPRs. + + MOVE_DF_BYTE8(D) + Likewise the second double-precision floating-point argument. + + MOVE_SF_RET(D, T) + Likewise a single-precision floating-point return value, + then jump to T. + + MOVE_SC_RET(D, T) + Likewise a complex single-precision floating-point return value. + + MOVE_DF_RET(D, T) + Likewise a double-precision floating-point return value. + + MOVE_DC_RET(D, T) + Likewise a complex double-precision floating-point return value. + + MOVE_SI_RET(D, T) + Likewise a single-precision integer return value. + + The D argument is "t" to move to FPRs and "f" to move from FPRs. + The return macros may assume that the target of the jump does not + use a floating-point register. */ + +#define MOVE_SF_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0) +#define MOVE_SI_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0) + +#if defined(__mips64) && defined(__MIPSEB__) +#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f0, $f1); jr T +#elif defined(__mips64) +/* The high 32 bits of $2 correspond to the second word in memory; + i.e. the imaginary part. */ +#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f1, $f0); jr T +#elif __mips_fpr == 64 +#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1) +#else +#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f2) +#endif + +#if defined(__mips64) +#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12 +#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f13 +#define MOVE_SF_BYTE8(D) m##D##c1 $5,$f13 +#else +#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12 +#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f14 +#define MOVE_SF_BYTE8(D) m##D##c1 $6,$f14 +#endif +#define MOVE_SI_BYTE0(D) MOVE_SF_BYTE0(D) + +#if defined(__mips64) +#define MOVE_DF_BYTE0(D) dm##D##c1 $4,$f12 +#define MOVE_DF_BYTE8(D) dm##D##c1 $5,$f13 +#define MOVE_DF_RET(D, T) DELAY##D (T, dm##D##c1 $2,$f0) +#define MOVE_DC_RET(D, T) dm##D##c1 $3,$f1; MOVE_DF_RET (D, T) +#elif __mips_fpr == 64 && defined(__MIPSEB__) +#define MOVE_DF_BYTE0(D) m##D##c1 $5,$f12; m##D##hc1 $4,$f12 +#define MOVE_DF_BYTE8(D) m##D##c1 $7,$f14; m##D##hc1 $6,$f14 +#define MOVE_DF_RET(D, T) m##D##c1 $3,$f0; DELAY##D (T, m##D##hc1 $2,$f0) +#define MOVE_DC_RET(D, T) m##D##c1 $5,$f1; m##D##hc1 $4,$f1; MOVE_DF_RET (D, T) +#elif __mips_fpr == 64 +#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##hc1 $5,$f12 +#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##hc1 $7,$f14 +#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##hc1 $3,$f0) +#define MOVE_DC_RET(D, T) m##D##c1 $4,$f1; m##D##hc1 $5,$f1; MOVE_DF_RET (D, T) +#elif defined(__MIPSEB__) +/* FPRs are little-endian. */ +#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f13; m##D##c1 $5,$f12 +#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f15; m##D##c1 $7,$f14 +#define MOVE_DF_RET(D, T) m##D##c1 $2,$f1; DELAY##D (T, m##D##c1 $3,$f0) +#define MOVE_DC_RET(D, T) m##D##c1 $4,$f3; m##D##c1 $5,$f2; MOVE_DF_RET (D, T) +#else +#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##c1 $5,$f13 +#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##c1 $7,$f15 +#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1) +#define MOVE_DC_RET(D, T) m##D##c1 $4,$f2; m##D##c1 $5,$f3; MOVE_DF_RET (D, T) +#endif + +/* Single-precision math. */ + +/* Define a function NAME that loads two single-precision values, + performs FPU operation OPCODE on them, and returns the single- + precision result. */ + +#define OPSF3(NAME, OPCODE) \ +STARTFN (NAME); \ + MOVE_SF_BYTE0 (t); \ + MOVE_SF_BYTE4 (t); \ + OPCODE RET,ARG1,ARG2; \ + MOVE_SF_RET (f, $31); \ + ENDFN (NAME) + +#ifdef L_m16addsf3 +OPSF3 (__mips16_addsf3, add.s) +#endif +#ifdef L_m16subsf3 +OPSF3 (__mips16_subsf3, sub.s) +#endif +#ifdef L_m16mulsf3 +OPSF3 (__mips16_mulsf3, mul.s) +#endif +#ifdef L_m16divsf3 +OPSF3 (__mips16_divsf3, div.s) +#endif + +/* Define a function NAME that loads a single-precision value, + performs FPU operation OPCODE on it, and returns the single- + precision result. */ + +#define OPSF2(NAME, OPCODE) \ +STARTFN (NAME); \ + MOVE_SF_BYTE0 (t); \ + OPCODE RET,ARG1; \ + MOVE_SF_RET (f, $31); \ + ENDFN (NAME) + +#ifdef L_m16negsf2 +OPSF2 (__mips16_negsf2, neg.s) +#endif +#ifdef L_m16abssf2 +OPSF2 (__mips16_abssf2, abs.s) +#endif + +/* Single-precision comparisons. */ + +/* Define a function NAME that loads two single-precision values, + performs floating point comparison OPCODE, and returns TRUE or + FALSE depending on the result. */ + +#define CMPSF(NAME, OPCODE, TRUE, FALSE) \ +STARTFN (NAME); \ + MOVE_SF_BYTE0 (t); \ + MOVE_SF_BYTE4 (t); \ + OPCODE ARG1,ARG2; \ + li $2,TRUE; \ + bc1t 1f; \ + li $2,FALSE; \ +1:; \ + j $31; \ + ENDFN (NAME) + +/* Like CMPSF, but reverse the comparison operands. */ + +#define REVCMPSF(NAME, OPCODE, TRUE, FALSE) \ +STARTFN (NAME); \ + MOVE_SF_BYTE0 (t); \ + MOVE_SF_BYTE4 (t); \ + OPCODE ARG2,ARG1; \ + li $2,TRUE; \ + bc1t 1f; \ + li $2,FALSE; \ +1:; \ + j $31; \ + ENDFN (NAME) + +#ifdef L_m16eqsf2 +CMPSF (__mips16_eqsf2, c.eq.s, 0, 1) +#endif +#ifdef L_m16nesf2 +CMPSF (__mips16_nesf2, c.eq.s, 0, 1) +#endif +#ifdef L_m16gtsf2 +REVCMPSF (__mips16_gtsf2, c.lt.s, 1, 0) +#endif +#ifdef L_m16gesf2 +REVCMPSF (__mips16_gesf2, c.le.s, 0, -1) +#endif +#ifdef L_m16lesf2 +CMPSF (__mips16_lesf2, c.le.s, 0, 1) +#endif +#ifdef L_m16ltsf2 +CMPSF (__mips16_ltsf2, c.lt.s, -1, 0) +#endif +#ifdef L_m16unordsf2 +CMPSF(__mips16_unordsf2, c.un.s, 1, 0) +#endif + + +/* Single-precision conversions. */ + +#ifdef L_m16fltsisf +STARTFN (__mips16_floatsisf) + MOVE_SF_BYTE0 (t) + cvt.s.w RET,ARG1 + MOVE_SF_RET (f, $31) + ENDFN (__mips16_floatsisf) +#endif + +#ifdef L_m16fltunsisf +STARTFN (__mips16_floatunsisf) + .set noreorder + bltz $4,1f + MOVE_SF_BYTE0 (t) + .set reorder + cvt.s.w RET,ARG1 + MOVE_SF_RET (f, $31) +1: + and $2,$4,1 + srl $3,$4,1 + or $2,$2,$3 + mtc1 $2,RET + cvt.s.w RET,RET + add.s RET,RET,RET + MOVE_SF_RET (f, $31) + ENDFN (__mips16_floatunsisf) +#endif + +#ifdef L_m16fix_truncsfsi +STARTFN (__mips16_fix_truncsfsi) + MOVE_SF_BYTE0 (t) + trunc.w.s RET,ARG1,$4 + MOVE_SI_RET (f, $31) + ENDFN (__mips16_fix_truncsfsi) +#endif + +#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) + +/* Double-precision math. */ + +/* Define a function NAME that loads two double-precision values, + performs FPU operation OPCODE on them, and returns the double- + precision result. */ + +#define OPDF3(NAME, OPCODE) \ +STARTFN (NAME); \ + MOVE_DF_BYTE0 (t); \ + MOVE_DF_BYTE8 (t); \ + OPCODE RET,ARG1,ARG2; \ + MOVE_DF_RET (f, $31); \ + ENDFN (NAME) + +#ifdef L_m16adddf3 +OPDF3 (__mips16_adddf3, add.d) +#endif +#ifdef L_m16subdf3 +OPDF3 (__mips16_subdf3, sub.d) +#endif +#ifdef L_m16muldf3 +OPDF3 (__mips16_muldf3, mul.d) +#endif +#ifdef L_m16divdf3 +OPDF3 (__mips16_divdf3, div.d) +#endif + +/* Define a function NAME that loads a double-precision value, + performs FPU operation OPCODE on it, and returns the double- + precision result. */ + +#define OPDF2(NAME, OPCODE) \ +STARTFN (NAME); \ + MOVE_DF_BYTE0 (t); \ + OPCODE RET,ARG1; \ + MOVE_DF_RET (f, $31); \ + ENDFN (NAME) + +#ifdef L_m16negdf2 +OPDF2 (__mips16_negdf2, neg.d) +#endif +#ifdef L_m16absdf2 +OPDF2 (__mips16_absdf2, abs.d) +#endif + +/* Conversions between single and double precision. */ + +#ifdef L_m16extsfdf2 +STARTFN (__mips16_extendsfdf2) + MOVE_SF_BYTE0 (t) + cvt.d.s RET,ARG1 + MOVE_DF_RET (f, $31) + ENDFN (__mips16_extendsfdf2) +#endif + +#ifdef L_m16trdfsf2 +STARTFN (__mips16_truncdfsf2) + MOVE_DF_BYTE0 (t) + cvt.s.d RET,ARG1 + MOVE_SF_RET (f, $31) + ENDFN (__mips16_truncdfsf2) +#endif + +/* Double-precision comparisons. */ + +/* Define a function NAME that loads two double-precision values, + performs floating point comparison OPCODE, and returns TRUE or + FALSE depending on the result. */ + +#define CMPDF(NAME, OPCODE, TRUE, FALSE) \ +STARTFN (NAME); \ + MOVE_DF_BYTE0 (t); \ + MOVE_DF_BYTE8 (t); \ + OPCODE ARG1,ARG2; \ + li $2,TRUE; \ + bc1t 1f; \ + li $2,FALSE; \ +1:; \ + j $31; \ + ENDFN (NAME) + +/* Like CMPDF, but reverse the comparison operands. */ + +#define REVCMPDF(NAME, OPCODE, TRUE, FALSE) \ +STARTFN (NAME); \ + MOVE_DF_BYTE0 (t); \ + MOVE_DF_BYTE8 (t); \ + OPCODE ARG2,ARG1; \ + li $2,TRUE; \ + bc1t 1f; \ + li $2,FALSE; \ +1:; \ + j $31; \ + ENDFN (NAME) + +#ifdef L_m16eqdf2 +CMPDF (__mips16_eqdf2, c.eq.d, 0, 1) +#endif +#ifdef L_m16nedf2 +CMPDF (__mips16_nedf2, c.eq.d, 0, 1) +#endif +#ifdef L_m16gtdf2 +REVCMPDF (__mips16_gtdf2, c.lt.d, 1, 0) +#endif +#ifdef L_m16gedf2 +REVCMPDF (__mips16_gedf2, c.le.d, 0, -1) +#endif +#ifdef L_m16ledf2 +CMPDF (__mips16_ledf2, c.le.d, 0, 1) +#endif +#ifdef L_m16ltdf2 +CMPDF (__mips16_ltdf2, c.lt.d, -1, 0) +#endif +#ifdef L_m16unorddf2 +CMPDF(__mips16_unorddf2, c.un.d, 1, 0) +#endif + +/* Double-precision conversions. */ + +#ifdef L_m16fltsidf +STARTFN (__mips16_floatsidf) + MOVE_SI_BYTE0 (t) + cvt.d.w RET,ARG1 + MOVE_DF_RET (f, $31) + ENDFN (__mips16_floatsidf) +#endif + +#ifdef L_m16fltunsidf +STARTFN (__mips16_floatunsidf) + MOVE_SI_BYTE0 (t) + cvt.d.w RET,ARG1 + bgez $4,1f + li.d ARG1, 4.294967296e+9 + add.d RET, RET, ARG1 +1: MOVE_DF_RET (f, $31) + ENDFN (__mips16_floatunsidf) +#endif + +#ifdef L_m16fix_truncdfsi +STARTFN (__mips16_fix_truncdfsi) + MOVE_DF_BYTE0 (t) + trunc.w.d RET,ARG1,$4 + MOVE_SI_RET (f, $31) + ENDFN (__mips16_fix_truncdfsi) +#endif +#endif /* !__mips_single_float */ + +/* Define a function NAME that moves a return value of mode MODE from + FPRs to GPRs. */ + +#define RET_FUNCTION(NAME, MODE) \ +STARTFN (NAME); \ + MOVE_##MODE##_RET (t, $31); \ + ENDFN (NAME) + +#ifdef L_m16retsf +RET_FUNCTION (__mips16_ret_sf, SF) +#endif + +#ifdef L_m16retsc +RET_FUNCTION (__mips16_ret_sc, SC) +#endif + +#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) +#ifdef L_m16retdf +RET_FUNCTION (__mips16_ret_df, DF) +#endif + +#ifdef L_m16retdc +RET_FUNCTION (__mips16_ret_dc, DC) +#endif +#endif /* !__mips_single_float */ + +/* STUB_ARGS_X copies the arguments from GPRs to FPRs for argument + code X. X is calculated as ARG1 + ARG2 * 4, where ARG1 and ARG2 + classify the first and second arguments as follows: + + 1: a single-precision argument + 2: a double-precision argument + 0: no argument, or not one of the above. */ + +#define STUB_ARGS_0 /* () */ +#define STUB_ARGS_1 MOVE_SF_BYTE0 (t) /* (sf) */ +#define STUB_ARGS_5 MOVE_SF_BYTE0 (t); MOVE_SF_BYTE4 (t) /* (sf, sf) */ +#define STUB_ARGS_9 MOVE_SF_BYTE0 (t); MOVE_DF_BYTE8 (t) /* (sf, df) */ +#define STUB_ARGS_2 MOVE_DF_BYTE0 (t) /* (df) */ +#define STUB_ARGS_6 MOVE_DF_BYTE0 (t); MOVE_SF_BYTE8 (t) /* (df, sf) */ +#define STUB_ARGS_10 MOVE_DF_BYTE0 (t); MOVE_DF_BYTE8 (t) /* (df, df) */ + +/* These functions are used by 16-bit code when calling via a function + pointer. They must copy the floating point arguments from the GPRs + to FPRs and then call function $2. */ + +#define CALL_STUB_NO_RET(NAME, CODE) \ +STARTFN (NAME); \ + STUB_ARGS_##CODE; \ + .set noreorder; \ + jr $2; \ + move $25,$2; \ + .set reorder; \ + ENDFN (NAME) + +#ifdef L_m16stub1 +CALL_STUB_NO_RET (__mips16_call_stub_1, 1) +#endif + +#ifdef L_m16stub5 +CALL_STUB_NO_RET (__mips16_call_stub_5, 5) +#endif + +#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) + +#ifdef L_m16stub2 +CALL_STUB_NO_RET (__mips16_call_stub_2, 2) +#endif + +#ifdef L_m16stub6 +CALL_STUB_NO_RET (__mips16_call_stub_6, 6) +#endif + +#ifdef L_m16stub9 +CALL_STUB_NO_RET (__mips16_call_stub_9, 9) +#endif + +#ifdef L_m16stub10 +CALL_STUB_NO_RET (__mips16_call_stub_10, 10) +#endif +#endif /* !__mips_single_float */ + +/* Now we have the same set of functions, except that this time the + function being called returns an SFmode, SCmode, DFmode or DCmode + value; we need to instantiate a set for each case. The calling + function will arrange to preserve $18, so these functions are free + to use it to hold the return address. + + Note that we do not know whether the function we are calling is 16 + bit or 32 bit. However, it does not matter, because 16-bit + functions always return floating point values in both the gp and + the fp regs. It would be possible to check whether the function + being called is 16 bits, in which case the copy is unnecessary; + however, it's faster to always do the copy. */ + +#define CALL_STUB_RET(NAME, CODE, MODE) \ +STARTFN (NAME); \ + move $18,$31; \ + STUB_ARGS_##CODE; \ + .set noreorder; \ + jalr $2; \ + move $25,$2; \ + .set reorder; \ + MOVE_##MODE##_RET (f, $18); \ + ENDFN (NAME) + +/* First, instantiate the single-float set. */ + +#ifdef L_m16stubsf0 +CALL_STUB_RET (__mips16_call_stub_sf_0, 0, SF) +#endif + +#ifdef L_m16stubsf1 +CALL_STUB_RET (__mips16_call_stub_sf_1, 1, SF) +#endif + +#ifdef L_m16stubsf5 +CALL_STUB_RET (__mips16_call_stub_sf_5, 5, SF) +#endif + +#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) +#ifdef L_m16stubsf2 +CALL_STUB_RET (__mips16_call_stub_sf_2, 2, SF) +#endif + +#ifdef L_m16stubsf6 +CALL_STUB_RET (__mips16_call_stub_sf_6, 6, SF) +#endif + +#ifdef L_m16stubsf9 +CALL_STUB_RET (__mips16_call_stub_sf_9, 9, SF) +#endif + +#ifdef L_m16stubsf10 +CALL_STUB_RET (__mips16_call_stub_sf_10, 10, SF) +#endif +#endif /* !__mips_single_float */ + + +/* Now we have the same set of functions again, except that this time + the function being called returns an DFmode value. */ + +#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) +#ifdef L_m16stubdf0 +CALL_STUB_RET (__mips16_call_stub_df_0, 0, DF) +#endif + +#ifdef L_m16stubdf1 +CALL_STUB_RET (__mips16_call_stub_df_1, 1, DF) +#endif + +#ifdef L_m16stubdf5 +CALL_STUB_RET (__mips16_call_stub_df_5, 5, DF) +#endif + +#ifdef L_m16stubdf2 +CALL_STUB_RET (__mips16_call_stub_df_2, 2, DF) +#endif + +#ifdef L_m16stubdf6 +CALL_STUB_RET (__mips16_call_stub_df_6, 6, DF) +#endif + +#ifdef L_m16stubdf9 +CALL_STUB_RET (__mips16_call_stub_df_9, 9, DF) +#endif + +#ifdef L_m16stubdf10 +CALL_STUB_RET (__mips16_call_stub_df_10, 10, DF) +#endif +#endif /* !__mips_single_float */ + + +/* Ho hum. Here we have the same set of functions again, this time + for when the function being called returns an SCmode value. */ + +#ifdef L_m16stubsc0 +CALL_STUB_RET (__mips16_call_stub_sc_0, 0, SC) +#endif + +#ifdef L_m16stubsc1 +CALL_STUB_RET (__mips16_call_stub_sc_1, 1, SC) +#endif + +#ifdef L_m16stubsc5 +CALL_STUB_RET (__mips16_call_stub_sc_5, 5, SC) +#endif + +#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) +#ifdef L_m16stubsc2 +CALL_STUB_RET (__mips16_call_stub_sc_2, 2, SC) +#endif + +#ifdef L_m16stubsc6 +CALL_STUB_RET (__mips16_call_stub_sc_6, 6, SC) +#endif + +#ifdef L_m16stubsc9 +CALL_STUB_RET (__mips16_call_stub_sc_9, 9, SC) +#endif + +#ifdef L_m16stubsc10 +CALL_STUB_RET (__mips16_call_stub_sc_10, 10, SC) +#endif +#endif /* !__mips_single_float */ + + +/* Finally, another set of functions for DCmode. */ + +#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) +#ifdef L_m16stubdc0 +CALL_STUB_RET (__mips16_call_stub_dc_0, 0, DC) +#endif + +#ifdef L_m16stubdc1 +CALL_STUB_RET (__mips16_call_stub_dc_1, 1, DC) +#endif + +#ifdef L_m16stubdc5 +CALL_STUB_RET (__mips16_call_stub_dc_5, 5, DC) +#endif + +#ifdef L_m16stubdc2 +CALL_STUB_RET (__mips16_call_stub_dc_2, 2, DC) +#endif + +#ifdef L_m16stubdc6 +CALL_STUB_RET (__mips16_call_stub_dc_6, 6, DC) +#endif + +#ifdef L_m16stubdc9 +CALL_STUB_RET (__mips16_call_stub_dc_9, 9, DC) +#endif + +#ifdef L_m16stubdc10 +CALL_STUB_RET (__mips16_call_stub_dc_10, 10, DC) +#endif +#endif /* !__mips_single_float */ +#endif diff --git a/libgcc/config/mips/t-mips16 b/libgcc/config/mips/t-mips16 index 46c7472f5f6..5553ed76e2d 100644 --- a/libgcc/config/mips/t-mips16 +++ b/libgcc/config/mips/t-mips16 @@ -1,3 +1,43 @@ +# Copyright (C) 2007, 2008, 2011 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +LIB1ASMSRC = mips/mips16.S +LIB1ASMFUNCS = _m16addsf3 _m16subsf3 _m16mulsf3 _m16divsf3 \ + _m16eqsf2 _m16nesf2 _m16gtsf2 _m16gesf2 _m16lesf2 _m16ltsf2 \ + _m16unordsf2 \ + _m16fltsisf _m16fix_truncsfsi _m16fltunsisf \ + _m16adddf3 _m16subdf3 _m16muldf3 _m16divdf3 \ + _m16extsfdf2 _m16trdfsf2 \ + _m16eqdf2 _m16nedf2 _m16gtdf2 _m16gedf2 _m16ledf2 _m16ltdf2 \ + _m16unorddf2 \ + _m16fltsidf _m16fix_truncdfsi _m16fltunsidf \ + _m16retsf _m16retdf \ + _m16retsc _m16retdc \ + _m16stub1 _m16stub2 _m16stub5 _m16stub6 _m16stub9 _m16stub10 \ + _m16stubsf0 _m16stubsf1 _m16stubsf2 _m16stubsf5 _m16stubsf6 \ + _m16stubsf9 _m16stubsf10 \ + _m16stubdf0 _m16stubdf1 _m16stubdf2 _m16stubdf5 _m16stubdf6 \ + _m16stubdf9 _m16stubdf10 \ + _m16stubsc0 _m16stubsc1 _m16stubsc2 _m16stubsc5 _m16stubsc6 \ + _m16stubsc9 _m16stubsc10 \ + _m16stubdc0 _m16stubdc1 _m16stubdc2 _m16stubdc5 _m16stubdc6 \ + _m16stubdc9 _m16stubdc10 + SYNC = yes SYNC_CFLAGS = -mno-mips16 diff --git a/libgcc/config/pa/milli64.S b/libgcc/config/pa/milli64.S new file mode 100644 index 00000000000..2e9c4f741b6 --- /dev/null +++ b/libgcc/config/pa/milli64.S @@ -0,0 +1,2134 @@ +/* 32 and 64-bit millicode, original author Hewlett-Packard + adapted for gcc by Paul Bame + and Alan Modra . + + Copyright 2001, 2002, 2003, 2007, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#ifdef pa64 + .level 2.0w +#endif + +/* Hardware General Registers. */ +r0: .reg %r0 +r1: .reg %r1 +r2: .reg %r2 +r3: .reg %r3 +r4: .reg %r4 +r5: .reg %r5 +r6: .reg %r6 +r7: .reg %r7 +r8: .reg %r8 +r9: .reg %r9 +r10: .reg %r10 +r11: .reg %r11 +r12: .reg %r12 +r13: .reg %r13 +r14: .reg %r14 +r15: .reg %r15 +r16: .reg %r16 +r17: .reg %r17 +r18: .reg %r18 +r19: .reg %r19 +r20: .reg %r20 +r21: .reg %r21 +r22: .reg %r22 +r23: .reg %r23 +r24: .reg %r24 +r25: .reg %r25 +r26: .reg %r26 +r27: .reg %r27 +r28: .reg %r28 +r29: .reg %r29 +r30: .reg %r30 +r31: .reg %r31 + +/* Hardware Space Registers. */ +sr0: .reg %sr0 +sr1: .reg %sr1 +sr2: .reg %sr2 +sr3: .reg %sr3 +sr4: .reg %sr4 +sr5: .reg %sr5 +sr6: .reg %sr6 +sr7: .reg %sr7 + +/* Hardware Floating Point Registers. */ +fr0: .reg %fr0 +fr1: .reg %fr1 +fr2: .reg %fr2 +fr3: .reg %fr3 +fr4: .reg %fr4 +fr5: .reg %fr5 +fr6: .reg %fr6 +fr7: .reg %fr7 +fr8: .reg %fr8 +fr9: .reg %fr9 +fr10: .reg %fr10 +fr11: .reg %fr11 +fr12: .reg %fr12 +fr13: .reg %fr13 +fr14: .reg %fr14 +fr15: .reg %fr15 + +/* Hardware Control Registers. */ +cr11: .reg %cr11 +sar: .reg %cr11 /* Shift Amount Register */ + +/* Software Architecture General Registers. */ +rp: .reg r2 /* return pointer */ +#ifdef pa64 +mrp: .reg r2 /* millicode return pointer */ +#else +mrp: .reg r31 /* millicode return pointer */ +#endif +ret0: .reg r28 /* return value */ +ret1: .reg r29 /* return value (high part of double) */ +sp: .reg r30 /* stack pointer */ +dp: .reg r27 /* data pointer */ +arg0: .reg r26 /* argument */ +arg1: .reg r25 /* argument or high part of double argument */ +arg2: .reg r24 /* argument */ +arg3: .reg r23 /* argument or high part of double argument */ + +/* Software Architecture Space Registers. */ +/* sr0 ; return link from BLE */ +sret: .reg sr1 /* return value */ +sarg: .reg sr1 /* argument */ +/* sr4 ; PC SPACE tracker */ +/* sr5 ; process private data */ + +/* Frame Offsets (millicode convention!) Used when calling other + millicode routines. Stack unwinding is dependent upon these + definitions. */ +r31_slot: .equ -20 /* "current RP" slot */ +sr0_slot: .equ -16 /* "static link" slot */ +#if defined(pa64) +mrp_slot: .equ -16 /* "current RP" slot */ +psp_slot: .equ -8 /* "previous SP" slot */ +#else +mrp_slot: .equ -20 /* "current RP" slot (replacing "r31_slot") */ +#endif + + +#define DEFINE(name,value)name: .EQU value +#define RDEFINE(name,value)name: .REG value +#ifdef milliext +#define MILLI_BE(lbl) BE lbl(sr7,r0) +#define MILLI_BEN(lbl) BE,n lbl(sr7,r0) +#define MILLI_BLE(lbl) BLE lbl(sr7,r0) +#define MILLI_BLEN(lbl) BLE,n lbl(sr7,r0) +#define MILLIRETN BE,n 0(sr0,mrp) +#define MILLIRET BE 0(sr0,mrp) +#define MILLI_RETN BE,n 0(sr0,mrp) +#define MILLI_RET BE 0(sr0,mrp) +#else +#define MILLI_BE(lbl) B lbl +#define MILLI_BEN(lbl) B,n lbl +#define MILLI_BLE(lbl) BL lbl,mrp +#define MILLI_BLEN(lbl) BL,n lbl,mrp +#define MILLIRETN BV,n 0(mrp) +#define MILLIRET BV 0(mrp) +#define MILLI_RETN BV,n 0(mrp) +#define MILLI_RET BV 0(mrp) +#endif + +#ifdef __STDC__ +#define CAT(a,b) a##b +#else +#define CAT(a,b) a/**/b +#endif + +#ifdef ELF +#define SUBSPA_MILLI .section .text +#define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16 +#define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16 +#define ATTR_MILLI +#define SUBSPA_DATA .section .data +#define ATTR_DATA +#define GLOBAL $global$ +#define GSYM(sym) !sym: +#define LSYM(sym) !CAT(.L,sym:) +#define LREF(sym) CAT(.L,sym) + +#else + +#ifdef coff +/* This used to be .milli but since link32 places different named + sections in different segments millicode ends up a long ways away + from .text (1meg?). This way they will be a lot closer. + + The SUBSPA_MILLI_* specify locality sets for certain millicode + modules in order to ensure that modules that call one another are + placed close together. Without locality sets this is unlikely to + happen because of the Dynamite linker library search algorithm. We + want these modules close together so that short calls always reach + (we don't want to require long calls or use long call stubs). */ + +#define SUBSPA_MILLI .subspa .text +#define SUBSPA_MILLI_DIV .subspa .text$dv,align=16 +#define SUBSPA_MILLI_MUL .subspa .text$mu,align=16 +#define ATTR_MILLI .attr code,read,execute +#define SUBSPA_DATA .subspa .data +#define ATTR_DATA .attr init_data,read,write +#define GLOBAL _gp +#else +#define SUBSPA_MILLI .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8 +#define SUBSPA_MILLI_DIV SUBSPA_MILLI +#define SUBSPA_MILLI_MUL SUBSPA_MILLI +#define ATTR_MILLI +#define SUBSPA_DATA .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero +#define ATTR_DATA +#define GLOBAL $global$ +#endif +#define SPACE_DATA .space $PRIVATE$,spnum=1,sort=16 + +#define GSYM(sym) !sym +#define LSYM(sym) !CAT(L$,sym) +#define LREF(sym) CAT(L$,sym) +#endif + +#ifdef L_dyncall + SUBSPA_MILLI + ATTR_DATA +GSYM($$dyncall) + .export $$dyncall,millicode + .proc + .callinfo millicode + .entry + bb,>=,n %r22,30,LREF(1) ; branch if not plabel address + depi 0,31,2,%r22 ; clear the two least significant bits + ldw 4(%r22),%r19 ; load new LTP value + ldw 0(%r22),%r22 ; load address of target +LSYM(1) +#ifdef LINUX + bv %r0(%r22) ; branch to the real target +#else + ldsid (%sr0,%r22),%r1 ; get the "space ident" selected by r22 + mtsp %r1,%sr0 ; move that space identifier into sr0 + be 0(%sr0,%r22) ; branch to the real target +#endif + stw %r2,-24(%r30) ; save return address into frame marker + .exit + .procend +#endif + +#ifdef L_divI +/* ROUTINES: $$divI, $$divoI + + Single precision divide for signed binary integers. + + The quotient is truncated towards zero. + The sign of the quotient is the XOR of the signs of the dividend and + divisor. + Divide by zero is trapped. + Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI. + + INPUT REGISTERS: + . arg0 == dividend + . arg1 == divisor + . mrp == return pc + . sr0 == return space when called externally + + OUTPUT REGISTERS: + . arg0 = undefined + . arg1 = undefined + . ret1 = quotient + + OTHER REGISTERS AFFECTED: + . r1 = undefined + + SIDE EFFECTS: + . Causes a trap under the following conditions: + . divisor is zero (traps with ADDIT,= 0,25,0) + . dividend==-2**31 and divisor==-1 and routine is $$divoI + . (traps with ADDO 26,25,0) + . Changes memory at the following places: + . NONE + + PERMISSIBLE CONTEXT: + . Unwindable. + . Suitable for internal or external millicode. + . Assumes the special millicode register conventions. + + DISCUSSION: + . Branchs to other millicode routines using BE + . $$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15 + . + . For selected divisors, calls a divide by constant routine written by + . Karl Pettis. Eligible divisors are 1..15 excluding 11 and 13. + . + . The only overflow case is -2**31 divided by -1. + . Both routines return -2**31 but only $$divoI traps. */ + +RDEFINE(temp,r1) +RDEFINE(retreg,ret1) /* r29 */ +RDEFINE(temp1,arg0) + SUBSPA_MILLI_DIV + ATTR_MILLI + .import $$divI_2,millicode + .import $$divI_3,millicode + .import $$divI_4,millicode + .import $$divI_5,millicode + .import $$divI_6,millicode + .import $$divI_7,millicode + .import $$divI_8,millicode + .import $$divI_9,millicode + .import $$divI_10,millicode + .import $$divI_12,millicode + .import $$divI_14,millicode + .import $$divI_15,millicode + .export $$divI,millicode + .export $$divoI,millicode + .proc + .callinfo millicode + .entry +GSYM($$divoI) + comib,=,n -1,arg1,LREF(negative1) /* when divisor == -1 */ +GSYM($$divI) + ldo -1(arg1),temp /* is there at most one bit set ? */ + and,<> arg1,temp,r0 /* if not, don't use power of 2 divide */ + addi,> 0,arg1,r0 /* if divisor > 0, use power of 2 divide */ + b,n LREF(neg_denom) +LSYM(pow2) + addi,>= 0,arg0,retreg /* if numerator is negative, add the */ + add arg0,temp,retreg /* (denominaotr -1) to correct for shifts */ + extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */ + extrs retreg,15,16,retreg /* retreg = retreg >> 16 */ + or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */ + ldi 0xcc,temp1 /* setup 0xcc in temp1 */ + extru,= arg1,23,8,temp /* test denominator with 0xff00 */ + extrs retreg,23,24,retreg /* retreg = retreg >> 8 */ + or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */ + ldi 0xaa,temp /* setup 0xaa in temp */ + extru,= arg1,27,4,r0 /* test denominator with 0xf0 */ + extrs retreg,27,28,retreg /* retreg = retreg >> 4 */ + and,= arg1,temp1,r0 /* test denominator with 0xcc */ + extrs retreg,29,30,retreg /* retreg = retreg >> 2 */ + and,= arg1,temp,r0 /* test denominator with 0xaa */ + extrs retreg,30,31,retreg /* retreg = retreg >> 1 */ + MILLIRETN +LSYM(neg_denom) + addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power of 2 */ + b,n LREF(regular_seq) + sub r0,arg1,temp /* make denominator positive */ + comb,=,n arg1,temp,LREF(regular_seq) /* test against 0x80000000 and 0 */ + ldo -1(temp),retreg /* is there at most one bit set ? */ + and,= temp,retreg,r0 /* if so, the denominator is power of 2 */ + b,n LREF(regular_seq) + sub r0,arg0,retreg /* negate numerator */ + comb,=,n arg0,retreg,LREF(regular_seq) /* test against 0x80000000 */ + copy retreg,arg0 /* set up arg0, arg1 and temp */ + copy temp,arg1 /* before branching to pow2 */ + b LREF(pow2) + ldo -1(arg1),temp +LSYM(regular_seq) + comib,>>=,n 15,arg1,LREF(small_divisor) + add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */ +LSYM(normal) + subi 0,retreg,retreg /* make it positive */ + sub 0,arg1,temp /* clear carry, */ + /* negate the divisor */ + ds 0,temp,0 /* set V-bit to the comple- */ + /* ment of the divisor sign */ + add retreg,retreg,retreg /* shift msb bit into carry */ + ds r0,arg1,temp /* 1st divide step, if no carry */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 2nd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 3rd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 4th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 5th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 6th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 7th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 8th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 9th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 10th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 11th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 12th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 13th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 14th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 15th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 16th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 17th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 18th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 19th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 20th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 21st divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 22nd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 23rd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 24th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 25th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 26th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 27th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 28th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 29th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 30th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 31st divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 32nd divide step, */ + addc retreg,retreg,retreg /* shift last retreg bit into retreg */ + xor,>= arg0,arg1,0 /* get correct sign of quotient */ + sub 0,retreg,retreg /* based on operand signs */ + MILLIRETN + nop + +LSYM(small_divisor) + +#if defined(pa64) +/* Clear the upper 32 bits of the arg1 register. We are working with */ +/* small divisors (and 32-bit integers) We must not be mislead */ +/* by "1" bits left in the upper 32 bits. */ + depd %r0,31,32,%r25 +#endif + blr,n arg1,r0 + nop +/* table for divisor == 0,1, ... ,15 */ + addit,= 0,arg1,r0 /* trap if divisor == 0 */ + nop + MILLIRET /* divisor == 1 */ + copy arg0,retreg + MILLI_BEN($$divI_2) /* divisor == 2 */ + nop + MILLI_BEN($$divI_3) /* divisor == 3 */ + nop + MILLI_BEN($$divI_4) /* divisor == 4 */ + nop + MILLI_BEN($$divI_5) /* divisor == 5 */ + nop + MILLI_BEN($$divI_6) /* divisor == 6 */ + nop + MILLI_BEN($$divI_7) /* divisor == 7 */ + nop + MILLI_BEN($$divI_8) /* divisor == 8 */ + nop + MILLI_BEN($$divI_9) /* divisor == 9 */ + nop + MILLI_BEN($$divI_10) /* divisor == 10 */ + nop + b LREF(normal) /* divisor == 11 */ + add,>= 0,arg0,retreg + MILLI_BEN($$divI_12) /* divisor == 12 */ + nop + b LREF(normal) /* divisor == 13 */ + add,>= 0,arg0,retreg + MILLI_BEN($$divI_14) /* divisor == 14 */ + nop + MILLI_BEN($$divI_15) /* divisor == 15 */ + nop + +LSYM(negative1) + sub 0,arg0,retreg /* result is negation of dividend */ + MILLIRET + addo arg0,arg1,r0 /* trap iff dividend==0x80000000 && divisor==-1 */ + .exit + .procend + .end +#endif + +#ifdef L_divU +/* ROUTINE: $$divU + . + . Single precision divide for unsigned integers. + . + . Quotient is truncated towards zero. + . Traps on divide by zero. + + INPUT REGISTERS: + . arg0 == dividend + . arg1 == divisor + . mrp == return pc + . sr0 == return space when called externally + + OUTPUT REGISTERS: + . arg0 = undefined + . arg1 = undefined + . ret1 = quotient + + OTHER REGISTERS AFFECTED: + . r1 = undefined + + SIDE EFFECTS: + . Causes a trap under the following conditions: + . divisor is zero + . Changes memory at the following places: + . NONE + + PERMISSIBLE CONTEXT: + . Unwindable. + . Does not create a stack frame. + . Suitable for internal or external millicode. + . Assumes the special millicode register conventions. + + DISCUSSION: + . Branchs to other millicode routines using BE: + . $$divU_# for 3,5,6,7,9,10,12,14,15 + . + . For selected small divisors calls the special divide by constant + . routines written by Karl Pettis. These are: 3,5,6,7,9,10,12,14,15. */ + +RDEFINE(temp,r1) +RDEFINE(retreg,ret1) /* r29 */ +RDEFINE(temp1,arg0) + SUBSPA_MILLI_DIV + ATTR_MILLI + .export $$divU,millicode + .import $$divU_3,millicode + .import $$divU_5,millicode + .import $$divU_6,millicode + .import $$divU_7,millicode + .import $$divU_9,millicode + .import $$divU_10,millicode + .import $$divU_12,millicode + .import $$divU_14,millicode + .import $$divU_15,millicode + .proc + .callinfo millicode + .entry +GSYM($$divU) +/* The subtract is not nullified since it does no harm and can be used + by the two cases that branch back to "normal". */ + ldo -1(arg1),temp /* is there at most one bit set ? */ + and,= arg1,temp,r0 /* if so, denominator is power of 2 */ + b LREF(regular_seq) + addit,= 0,arg1,0 /* trap for zero dvr */ + copy arg0,retreg + extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */ + extru retreg,15,16,retreg /* retreg = retreg >> 16 */ + or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */ + ldi 0xcc,temp1 /* setup 0xcc in temp1 */ + extru,= arg1,23,8,temp /* test denominator with 0xff00 */ + extru retreg,23,24,retreg /* retreg = retreg >> 8 */ + or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */ + ldi 0xaa,temp /* setup 0xaa in temp */ + extru,= arg1,27,4,r0 /* test denominator with 0xf0 */ + extru retreg,27,28,retreg /* retreg = retreg >> 4 */ + and,= arg1,temp1,r0 /* test denominator with 0xcc */ + extru retreg,29,30,retreg /* retreg = retreg >> 2 */ + and,= arg1,temp,r0 /* test denominator with 0xaa */ + extru retreg,30,31,retreg /* retreg = retreg >> 1 */ + MILLIRETN + nop +LSYM(regular_seq) + comib,>= 15,arg1,LREF(special_divisor) + subi 0,arg1,temp /* clear carry, negate the divisor */ + ds r0,temp,r0 /* set V-bit to 1 */ +LSYM(normal) + add arg0,arg0,retreg /* shift msb bit into carry */ + ds r0,arg1,temp /* 1st divide step, if no carry */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 2nd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 3rd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 4th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 5th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 6th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 7th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 8th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 9th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 10th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 11th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 12th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 13th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 14th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 15th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 16th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 17th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 18th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 19th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 20th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 21st divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 22nd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 23rd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 24th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 25th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 26th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 27th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 28th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 29th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 30th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 31st divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds temp,arg1,temp /* 32nd divide step, */ + MILLIRET + addc retreg,retreg,retreg /* shift last retreg bit into retreg */ + +/* Handle the cases where divisor is a small constant or has high bit on. */ +LSYM(special_divisor) +/* blr arg1,r0 */ +/* comib,>,n 0,arg1,LREF(big_divisor) ; nullify previous instruction */ + +/* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from + generating such a blr, comib sequence. A problem in nullification. So I + rewrote this code. */ + +#if defined(pa64) +/* Clear the upper 32 bits of the arg1 register. We are working with + small divisors (and 32-bit unsigned integers) We must not be mislead + by "1" bits left in the upper 32 bits. */ + depd %r0,31,32,%r25 +#endif + comib,> 0,arg1,LREF(big_divisor) + nop + blr arg1,r0 + nop + +LSYM(zero_divisor) /* this label is here to provide external visibility */ + addit,= 0,arg1,0 /* trap for zero dvr */ + nop + MILLIRET /* divisor == 1 */ + copy arg0,retreg + MILLIRET /* divisor == 2 */ + extru arg0,30,31,retreg + MILLI_BEN($$divU_3) /* divisor == 3 */ + nop + MILLIRET /* divisor == 4 */ + extru arg0,29,30,retreg + MILLI_BEN($$divU_5) /* divisor == 5 */ + nop + MILLI_BEN($$divU_6) /* divisor == 6 */ + nop + MILLI_BEN($$divU_7) /* divisor == 7 */ + nop + MILLIRET /* divisor == 8 */ + extru arg0,28,29,retreg + MILLI_BEN($$divU_9) /* divisor == 9 */ + nop + MILLI_BEN($$divU_10) /* divisor == 10 */ + nop + b LREF(normal) /* divisor == 11 */ + ds r0,temp,r0 /* set V-bit to 1 */ + MILLI_BEN($$divU_12) /* divisor == 12 */ + nop + b LREF(normal) /* divisor == 13 */ + ds r0,temp,r0 /* set V-bit to 1 */ + MILLI_BEN($$divU_14) /* divisor == 14 */ + nop + MILLI_BEN($$divU_15) /* divisor == 15 */ + nop + +/* Handle the case where the high bit is on in the divisor. + Compute: if( dividend>=divisor) quotient=1; else quotient=0; + Note: dividend>==divisor iff dividend-divisor does not borrow + and not borrow iff carry. */ +LSYM(big_divisor) + sub arg0,arg1,r0 + MILLIRET + addc r0,r0,retreg + .exit + .procend + .end +#endif + +#ifdef L_remI +/* ROUTINE: $$remI + + DESCRIPTION: + . $$remI returns the remainder of the division of two signed 32-bit + . integers. The sign of the remainder is the same as the sign of + . the dividend. + + + INPUT REGISTERS: + . arg0 == dividend + . arg1 == divisor + . mrp == return pc + . sr0 == return space when called externally + + OUTPUT REGISTERS: + . arg0 = destroyed + . arg1 = destroyed + . ret1 = remainder + + OTHER REGISTERS AFFECTED: + . r1 = undefined + + SIDE EFFECTS: + . Causes a trap under the following conditions: DIVIDE BY ZERO + . Changes memory at the following places: NONE + + PERMISSIBLE CONTEXT: + . Unwindable + . Does not create a stack frame + . Is usable for internal or external microcode + + DISCUSSION: + . Calls other millicode routines via mrp: NONE + . Calls other millicode routines: NONE */ + +RDEFINE(tmp,r1) +RDEFINE(retreg,ret1) + + SUBSPA_MILLI + ATTR_MILLI + .proc + .callinfo millicode + .entry +GSYM($$remI) +GSYM($$remoI) + .export $$remI,MILLICODE + .export $$remoI,MILLICODE + ldo -1(arg1),tmp /* is there at most one bit set ? */ + and,<> arg1,tmp,r0 /* if not, don't use power of 2 */ + addi,> 0,arg1,r0 /* if denominator > 0, use power */ + /* of 2 */ + b,n LREF(neg_denom) +LSYM(pow2) + comb,>,n 0,arg0,LREF(neg_num) /* is numerator < 0 ? */ + and arg0,tmp,retreg /* get the result */ + MILLIRETN +LSYM(neg_num) + subi 0,arg0,arg0 /* negate numerator */ + and arg0,tmp,retreg /* get the result */ + subi 0,retreg,retreg /* negate result */ + MILLIRETN +LSYM(neg_denom) + addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power */ + /* of 2 */ + b,n LREF(regular_seq) + sub r0,arg1,tmp /* make denominator positive */ + comb,=,n arg1,tmp,LREF(regular_seq) /* test against 0x80000000 and 0 */ + ldo -1(tmp),retreg /* is there at most one bit set ? */ + and,= tmp,retreg,r0 /* if not, go to regular_seq */ + b,n LREF(regular_seq) + comb,>,n 0,arg0,LREF(neg_num_2) /* if arg0 < 0, negate it */ + and arg0,retreg,retreg + MILLIRETN +LSYM(neg_num_2) + subi 0,arg0,tmp /* test against 0x80000000 */ + and tmp,retreg,retreg + subi 0,retreg,retreg + MILLIRETN +LSYM(regular_seq) + addit,= 0,arg1,0 /* trap if div by zero */ + add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */ + sub 0,retreg,retreg /* make it positive */ + sub 0,arg1, tmp /* clear carry, */ + /* negate the divisor */ + ds 0, tmp,0 /* set V-bit to the comple- */ + /* ment of the divisor sign */ + or 0,0, tmp /* clear tmp */ + add retreg,retreg,retreg /* shift msb bit into carry */ + ds tmp,arg1, tmp /* 1st divide step, if no carry */ + /* out, msb of quotient = 0 */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ +LSYM(t1) + ds tmp,arg1, tmp /* 2nd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 3rd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 4th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 5th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 6th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 7th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 8th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 9th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 10th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 11th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 12th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 13th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 14th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 15th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 16th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 17th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 18th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 19th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 20th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 21st divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 22nd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 23rd divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 24th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 25th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 26th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 27th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 28th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 29th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 30th divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 31st divide step */ + addc retreg,retreg,retreg /* shift retreg with/into carry */ + ds tmp,arg1, tmp /* 32nd divide step, */ + addc retreg,retreg,retreg /* shift last bit into retreg */ + movb,>=,n tmp,retreg,LREF(finish) /* branch if pos. tmp */ + add,< arg1,0,0 /* if arg1 > 0, add arg1 */ + add,tr tmp,arg1,retreg /* for correcting remainder tmp */ + sub tmp,arg1,retreg /* else add absolute value arg1 */ +LSYM(finish) + add,>= arg0,0,0 /* set sign of remainder */ + sub 0,retreg,retreg /* to sign of dividend */ + MILLIRET + nop + .exit + .procend +#ifdef milliext + .origin 0x00000200 +#endif + .end +#endif + +#ifdef L_remU +/* ROUTINE: $$remU + . Single precision divide for remainder with unsigned binary integers. + . + . The remainder must be dividend-(dividend/divisor)*divisor. + . Divide by zero is trapped. + + INPUT REGISTERS: + . arg0 == dividend + . arg1 == divisor + . mrp == return pc + . sr0 == return space when called externally + + OUTPUT REGISTERS: + . arg0 = undefined + . arg1 = undefined + . ret1 = remainder + + OTHER REGISTERS AFFECTED: + . r1 = undefined + + SIDE EFFECTS: + . Causes a trap under the following conditions: DIVIDE BY ZERO + . Changes memory at the following places: NONE + + PERMISSIBLE CONTEXT: + . Unwindable. + . Does not create a stack frame. + . Suitable for internal or external millicode. + . Assumes the special millicode register conventions. + + DISCUSSION: + . Calls other millicode routines using mrp: NONE + . Calls other millicode routines: NONE */ + + +RDEFINE(temp,r1) +RDEFINE(rmndr,ret1) /* r29 */ + SUBSPA_MILLI + ATTR_MILLI + .export $$remU,millicode + .proc + .callinfo millicode + .entry +GSYM($$remU) + ldo -1(arg1),temp /* is there at most one bit set ? */ + and,= arg1,temp,r0 /* if not, don't use power of 2 */ + b LREF(regular_seq) + addit,= 0,arg1,r0 /* trap on div by zero */ + and arg0,temp,rmndr /* get the result for power of 2 */ + MILLIRETN +LSYM(regular_seq) + comib,>=,n 0,arg1,LREF(special_case) + subi 0,arg1,rmndr /* clear carry, negate the divisor */ + ds r0,rmndr,r0 /* set V-bit to 1 */ + add arg0,arg0,temp /* shift msb bit into carry */ + ds r0,arg1,rmndr /* 1st divide step, if no carry */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 2nd divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 3rd divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 4th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 5th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 6th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 7th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 8th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 9th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 10th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 11th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 12th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 13th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 14th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 15th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 16th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 17th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 18th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 19th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 20th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 21st divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 22nd divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 23rd divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 24th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 25th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 26th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 27th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 28th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 29th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 30th divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 31st divide step */ + addc temp,temp,temp /* shift temp with/into carry */ + ds rmndr,arg1,rmndr /* 32nd divide step, */ + comiclr,<= 0,rmndr,r0 + add rmndr,arg1,rmndr /* correction */ + MILLIRETN + nop + +/* Putting >= on the last DS and deleting COMICLR does not work! */ +LSYM(special_case) + sub,>>= arg0,arg1,rmndr + copy arg0,rmndr + MILLIRETN + nop + .exit + .procend + .end +#endif + +#ifdef L_div_const +/* ROUTINE: $$divI_2 + . $$divI_3 $$divU_3 + . $$divI_4 + . $$divI_5 $$divU_5 + . $$divI_6 $$divU_6 + . $$divI_7 $$divU_7 + . $$divI_8 + . $$divI_9 $$divU_9 + . $$divI_10 $$divU_10 + . + . $$divI_12 $$divU_12 + . + . $$divI_14 $$divU_14 + . $$divI_15 $$divU_15 + . $$divI_16 + . $$divI_17 $$divU_17 + . + . Divide by selected constants for single precision binary integers. + + INPUT REGISTERS: + . arg0 == dividend + . mrp == return pc + . sr0 == return space when called externally + + OUTPUT REGISTERS: + . arg0 = undefined + . arg1 = undefined + . ret1 = quotient + + OTHER REGISTERS AFFECTED: + . r1 = undefined + + SIDE EFFECTS: + . Causes a trap under the following conditions: NONE + . Changes memory at the following places: NONE + + PERMISSIBLE CONTEXT: + . Unwindable. + . Does not create a stack frame. + . Suitable for internal or external millicode. + . Assumes the special millicode register conventions. + + DISCUSSION: + . Calls other millicode routines using mrp: NONE + . Calls other millicode routines: NONE */ + + +/* TRUNCATED DIVISION BY SMALL INTEGERS + + We are interested in q(x) = floor(x/y), where x >= 0 and y > 0 + (with y fixed). + + Let a = floor(z/y), for some choice of z. Note that z will be + chosen so that division by z is cheap. + + Let r be the remainder(z/y). In other words, r = z - ay. + + Now, our method is to choose a value for b such that + + q'(x) = floor((ax+b)/z) + + is equal to q(x) over as large a range of x as possible. If the + two are equal over a sufficiently large range, and if it is easy to + form the product (ax), and it is easy to divide by z, then we can + perform the division much faster than the general division algorithm. + + So, we want the following to be true: + + . For x in the following range: + . + . ky <= x < (k+1)y + . + . implies that + . + . k <= (ax+b)/z < (k+1) + + We want to determine b such that this is true for all k in the + range {0..K} for some maximum K. + + Since (ax+b) is an increasing function of x, we can take each + bound separately to determine the "best" value for b. + + (ax+b)/z < (k+1) implies + + (a((k+1)y-1)+b < (k+1)z implies + + b < a + (k+1)(z-ay) implies + + b < a + (k+1)r + + This needs to be true for all k in the range {0..K}. In + particular, it is true for k = 0 and this leads to a maximum + acceptable value for b. + + b < a+r or b <= a+r-1 + + Taking the other bound, we have + + k <= (ax+b)/z implies + + k <= (aky+b)/z implies + + k(z-ay) <= b implies + + kr <= b + + Clearly, the largest range for k will be achieved by maximizing b, + when r is not zero. When r is zero, then the simplest choice for b + is 0. When r is not 0, set + + . b = a+r-1 + + Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y) + for all x in the range: + + . 0 <= x < (K+1)y + + We need to determine what K is. Of our two bounds, + + . b < a+(k+1)r is satisfied for all k >= 0, by construction. + + The other bound is + + . kr <= b + + This is always true if r = 0. If r is not 0 (the usual case), then + K = floor((a+r-1)/r), is the maximum value for k. + + Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct + answer for q(x) = floor(x/y) when x is in the range + + (0,(K+1)y-1) K = floor((a+r-1)/r) + + To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that + the formula for q'(x) yields the correct value of q(x) for all x + representable by a single word in HPPA. + + We are also constrained in that computing the product (ax), adding + b, and dividing by z must all be done quickly, otherwise we will be + better off going through the general algorithm using the DS + instruction, which uses approximately 70 cycles. + + For each y, there is a choice of z which satisfies the constraints + for (K+1)y >= 2**32. We may not, however, be able to satisfy the + timing constraints for arbitrary y. It seems that z being equal to + a power of 2 or a power of 2 minus 1 is as good as we can do, since + it minimizes the time to do division by z. We want the choice of z + to also result in a value for (a) that minimizes the computation of + the product (ax). This is best achieved if (a) has a regular bit + pattern (so the multiplication can be done with shifts and adds). + The value of (a) also needs to be less than 2**32 so the product is + always guaranteed to fit in 2 words. + + In actual practice, the following should be done: + + 1) For negative x, you should take the absolute value and remember + . the fact so that the result can be negated. This obviously does + . not apply in the unsigned case. + 2) For even y, you should factor out the power of 2 that divides y + . and divide x by it. You can then proceed by dividing by the + . odd factor of y. + + Here is a table of some odd values of y, and corresponding choices + for z which are "good". + + y z r a (hex) max x (hex) + + 3 2**32 1 55555555 100000001 + 5 2**32 1 33333333 100000003 + 7 2**24-1 0 249249 (infinite) + 9 2**24-1 0 1c71c7 (infinite) + 11 2**20-1 0 1745d (infinite) + 13 2**24-1 0 13b13b (infinite) + 15 2**32 1 11111111 10000000d + 17 2**32 1 f0f0f0f 10000000f + + If r is 1, then b = a+r-1 = a. This simplifies the computation + of (ax+b), since you can compute (x+1)(a) instead. If r is 0, + then b = 0 is ok to use which simplifies (ax+b). + + The bit patterns for 55555555, 33333333, and 11111111 are obviously + very regular. The bit patterns for the other values of a above are: + + y (hex) (binary) + + 7 249249 001001001001001001001001 << regular >> + 9 1c71c7 000111000111000111000111 << regular >> + 11 1745d 000000010111010001011101 << irregular >> + 13 13b13b 000100111011000100111011 << irregular >> + + The bit patterns for (a) corresponding to (y) of 11 and 13 may be + too irregular to warrant using this method. + + When z is a power of 2 minus 1, then the division by z is slightly + more complicated, involving an iterative solution. + + The code presented here solves division by 1 through 17, except for + 11 and 13. There are algorithms for both signed and unsigned + quantities given. + + TIMINGS (cycles) + + divisor positive negative unsigned + + . 1 2 2 2 + . 2 4 4 2 + . 3 19 21 19 + . 4 4 4 2 + . 5 18 22 19 + . 6 19 22 19 + . 8 4 4 2 + . 10 18 19 17 + . 12 18 20 18 + . 15 16 18 16 + . 16 4 4 2 + . 17 16 18 16 + + Now, the algorithm for 7, 9, and 14 is an iterative one. That is, + a loop body is executed until the tentative quotient is 0. The + number of times the loop body is executed varies depending on the + dividend, but is never more than two times. If the dividend is + less than the divisor, then the loop body is not executed at all. + Each iteration adds 4 cycles to the timings. + + divisor positive negative unsigned + + . 7 19+4n 20+4n 20+4n n = number of iterations + . 9 21+4n 22+4n 21+4n + . 14 21+4n 22+4n 20+4n + + To give an idea of how the number of iterations varies, here is a + table of dividend versus number of iterations when dividing by 7. + + smallest largest required + dividend dividend iterations + + . 0 6 0 + . 7 0x6ffffff 1 + 0x1000006 0xffffffff 2 + + There is some overlap in the range of numbers requiring 1 and 2 + iterations. */ + +RDEFINE(t2,r1) +RDEFINE(x2,arg0) /* r26 */ +RDEFINE(t1,arg1) /* r25 */ +RDEFINE(x1,ret1) /* r29 */ + + SUBSPA_MILLI_DIV + ATTR_MILLI + + .proc + .callinfo millicode + .entry +/* NONE of these routines require a stack frame + ALL of these routines are unwindable from millicode */ + +GSYM($$divide_by_constant) + .export $$divide_by_constant,millicode +/* Provides a "nice" label for the code covered by the unwind descriptor + for things like gprof. */ + +/* DIVISION BY 2 (shift by 1) */ +GSYM($$divI_2) + .export $$divI_2,millicode + comclr,>= arg0,0,0 + addi 1,arg0,arg0 + MILLIRET + extrs arg0,30,31,ret1 + + +/* DIVISION BY 4 (shift by 2) */ +GSYM($$divI_4) + .export $$divI_4,millicode + comclr,>= arg0,0,0 + addi 3,arg0,arg0 + MILLIRET + extrs arg0,29,30,ret1 + + +/* DIVISION BY 8 (shift by 3) */ +GSYM($$divI_8) + .export $$divI_8,millicode + comclr,>= arg0,0,0 + addi 7,arg0,arg0 + MILLIRET + extrs arg0,28,29,ret1 + +/* DIVISION BY 16 (shift by 4) */ +GSYM($$divI_16) + .export $$divI_16,millicode + comclr,>= arg0,0,0 + addi 15,arg0,arg0 + MILLIRET + extrs arg0,27,28,ret1 + +/**************************************************************************** +* +* DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these +* +* includes 3,5,15,17 and also 6,10,12 +* +****************************************************************************/ + +/* DIVISION BY 3 (use z = 2**32; a = 55555555) */ + +GSYM($$divI_3) + .export $$divI_3,millicode + comb,<,N x2,0,LREF(neg3) + + addi 1,x2,x2 /* this cannot overflow */ + extru x2,1,2,x1 /* multiply by 5 to get started */ + sh2add x2,x2,x2 + b LREF(pos) + addc x1,0,x1 + +LSYM(neg3) + subi 1,x2,x2 /* this cannot overflow */ + extru x2,1,2,x1 /* multiply by 5 to get started */ + sh2add x2,x2,x2 + b LREF(neg) + addc x1,0,x1 + +GSYM($$divU_3) + .export $$divU_3,millicode + addi 1,x2,x2 /* this CAN overflow */ + addc 0,0,x1 + shd x1,x2,30,t1 /* multiply by 5 to get started */ + sh2add x2,x2,x2 + b LREF(pos) + addc x1,t1,x1 + +/* DIVISION BY 5 (use z = 2**32; a = 33333333) */ + +GSYM($$divI_5) + .export $$divI_5,millicode + comb,<,N x2,0,LREF(neg5) + + addi 3,x2,t1 /* this cannot overflow */ + sh1add x2,t1,x2 /* multiply by 3 to get started */ + b LREF(pos) + addc 0,0,x1 + +LSYM(neg5) + sub 0,x2,x2 /* negate x2 */ + addi 1,x2,x2 /* this cannot overflow */ + shd 0,x2,31,x1 /* get top bit (can be 1) */ + sh1add x2,x2,x2 /* multiply by 3 to get started */ + b LREF(neg) + addc x1,0,x1 + +GSYM($$divU_5) + .export $$divU_5,millicode + addi 1,x2,x2 /* this CAN overflow */ + addc 0,0,x1 + shd x1,x2,31,t1 /* multiply by 3 to get started */ + sh1add x2,x2,x2 + b LREF(pos) + addc t1,x1,x1 + +/* DIVISION BY 6 (shift to divide by 2 then divide by 3) */ +GSYM($$divI_6) + .export $$divI_6,millicode + comb,<,N x2,0,LREF(neg6) + extru x2,30,31,x2 /* divide by 2 */ + addi 5,x2,t1 /* compute 5*(x2+1) = 5*x2+5 */ + sh2add x2,t1,x2 /* multiply by 5 to get started */ + b LREF(pos) + addc 0,0,x1 + +LSYM(neg6) + subi 2,x2,x2 /* negate, divide by 2, and add 1 */ + /* negation and adding 1 are done */ + /* at the same time by the SUBI */ + extru x2,30,31,x2 + shd 0,x2,30,x1 + sh2add x2,x2,x2 /* multiply by 5 to get started */ + b LREF(neg) + addc x1,0,x1 + +GSYM($$divU_6) + .export $$divU_6,millicode + extru x2,30,31,x2 /* divide by 2 */ + addi 1,x2,x2 /* cannot carry */ + shd 0,x2,30,x1 /* multiply by 5 to get started */ + sh2add x2,x2,x2 + b LREF(pos) + addc x1,0,x1 + +/* DIVISION BY 10 (shift to divide by 2 then divide by 5) */ +GSYM($$divU_10) + .export $$divU_10,millicode + extru x2,30,31,x2 /* divide by 2 */ + addi 3,x2,t1 /* compute 3*(x2+1) = (3*x2)+3 */ + sh1add x2,t1,x2 /* multiply by 3 to get started */ + addc 0,0,x1 +LSYM(pos) + shd x1,x2,28,t1 /* multiply by 0x11 */ + shd x2,0,28,t2 + add x2,t2,x2 + addc x1,t1,x1 +LSYM(pos_for_17) + shd x1,x2,24,t1 /* multiply by 0x101 */ + shd x2,0,24,t2 + add x2,t2,x2 + addc x1,t1,x1 + + shd x1,x2,16,t1 /* multiply by 0x10001 */ + shd x2,0,16,t2 + add x2,t2,x2 + MILLIRET + addc x1,t1,x1 + +GSYM($$divI_10) + .export $$divI_10,millicode + comb,< x2,0,LREF(neg10) + copy 0,x1 + extru x2,30,31,x2 /* divide by 2 */ + addib,TR 1,x2,LREF(pos) /* add 1 (cannot overflow) */ + sh1add x2,x2,x2 /* multiply by 3 to get started */ + +LSYM(neg10) + subi 2,x2,x2 /* negate, divide by 2, and add 1 */ + /* negation and adding 1 are done */ + /* at the same time by the SUBI */ + extru x2,30,31,x2 + sh1add x2,x2,x2 /* multiply by 3 to get started */ +LSYM(neg) + shd x1,x2,28,t1 /* multiply by 0x11 */ + shd x2,0,28,t2 + add x2,t2,x2 + addc x1,t1,x1 +LSYM(neg_for_17) + shd x1,x2,24,t1 /* multiply by 0x101 */ + shd x2,0,24,t2 + add x2,t2,x2 + addc x1,t1,x1 + + shd x1,x2,16,t1 /* multiply by 0x10001 */ + shd x2,0,16,t2 + add x2,t2,x2 + addc x1,t1,x1 + MILLIRET + sub 0,x1,x1 + +/* DIVISION BY 12 (shift to divide by 4 then divide by 3) */ +GSYM($$divI_12) + .export $$divI_12,millicode + comb,< x2,0,LREF(neg12) + copy 0,x1 + extru x2,29,30,x2 /* divide by 4 */ + addib,tr 1,x2,LREF(pos) /* compute 5*(x2+1) = 5*x2+5 */ + sh2add x2,x2,x2 /* multiply by 5 to get started */ + +LSYM(neg12) + subi 4,x2,x2 /* negate, divide by 4, and add 1 */ + /* negation and adding 1 are done */ + /* at the same time by the SUBI */ + extru x2,29,30,x2 + b LREF(neg) + sh2add x2,x2,x2 /* multiply by 5 to get started */ + +GSYM($$divU_12) + .export $$divU_12,millicode + extru x2,29,30,x2 /* divide by 4 */ + addi 5,x2,t1 /* cannot carry */ + sh2add x2,t1,x2 /* multiply by 5 to get started */ + b LREF(pos) + addc 0,0,x1 + +/* DIVISION BY 15 (use z = 2**32; a = 11111111) */ +GSYM($$divI_15) + .export $$divI_15,millicode + comb,< x2,0,LREF(neg15) + copy 0,x1 + addib,tr 1,x2,LREF(pos)+4 + shd x1,x2,28,t1 + +LSYM(neg15) + b LREF(neg) + subi 1,x2,x2 + +GSYM($$divU_15) + .export $$divU_15,millicode + addi 1,x2,x2 /* this CAN overflow */ + b LREF(pos) + addc 0,0,x1 + +/* DIVISION BY 17 (use z = 2**32; a = f0f0f0f) */ +GSYM($$divI_17) + .export $$divI_17,millicode + comb,<,n x2,0,LREF(neg17) + addi 1,x2,x2 /* this cannot overflow */ + shd 0,x2,28,t1 /* multiply by 0xf to get started */ + shd x2,0,28,t2 + sub t2,x2,x2 + b LREF(pos_for_17) + subb t1,0,x1 + +LSYM(neg17) + subi 1,x2,x2 /* this cannot overflow */ + shd 0,x2,28,t1 /* multiply by 0xf to get started */ + shd x2,0,28,t2 + sub t2,x2,x2 + b LREF(neg_for_17) + subb t1,0,x1 + +GSYM($$divU_17) + .export $$divU_17,millicode + addi 1,x2,x2 /* this CAN overflow */ + addc 0,0,x1 + shd x1,x2,28,t1 /* multiply by 0xf to get started */ +LSYM(u17) + shd x2,0,28,t2 + sub t2,x2,x2 + b LREF(pos_for_17) + subb t1,x1,x1 + + +/* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these + includes 7,9 and also 14 + + + z = 2**24-1 + r = z mod x = 0 + + so choose b = 0 + + Also, in order to divide by z = 2**24-1, we approximate by dividing + by (z+1) = 2**24 (which is easy), and then correcting. + + (ax) = (z+1)q' + r + . = zq' + (q'+r) + + So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1) + Then the true remainder of (ax)/z is (q'+r). Repeat the process + with this new remainder, adding the tentative quotients together, + until a tentative quotient is 0 (and then we are done). There is + one last correction to be done. It is possible that (q'+r) = z. + If so, then (q'+r)/(z+1) = 0 and it looks like we are done. But, + in fact, we need to add 1 more to the quotient. Now, it turns + out that this happens if and only if the original value x is + an exact multiple of y. So, to avoid a three instruction test at + the end, instead use 1 instruction to add 1 to x at the beginning. */ + +/* DIVISION BY 7 (use z = 2**24-1; a = 249249) */ +GSYM($$divI_7) + .export $$divI_7,millicode + comb,<,n x2,0,LREF(neg7) +LSYM(7) + addi 1,x2,x2 /* cannot overflow */ + shd 0,x2,29,x1 + sh3add x2,x2,x2 + addc x1,0,x1 +LSYM(pos7) + shd x1,x2,26,t1 + shd x2,0,26,t2 + add x2,t2,x2 + addc x1,t1,x1 + + shd x1,x2,20,t1 + shd x2,0,20,t2 + add x2,t2,x2 + addc x1,t1,t1 + + /* computed . Now divide it by (2**24 - 1) */ + + copy 0,x1 + shd,= t1,x2,24,t1 /* tentative quotient */ +LSYM(1) + addb,tr t1,x1,LREF(2) /* add to previous quotient */ + extru x2,31,24,x2 /* new remainder (unadjusted) */ + + MILLIRETN + +LSYM(2) + addb,tr t1,x2,LREF(1) /* adjust remainder */ + extru,= x2,7,8,t1 /* new quotient */ + +LSYM(neg7) + subi 1,x2,x2 /* negate x2 and add 1 */ +LSYM(8) + shd 0,x2,29,x1 + sh3add x2,x2,x2 + addc x1,0,x1 + +LSYM(neg7_shift) + shd x1,x2,26,t1 + shd x2,0,26,t2 + add x2,t2,x2 + addc x1,t1,x1 + + shd x1,x2,20,t1 + shd x2,0,20,t2 + add x2,t2,x2 + addc x1,t1,t1 + + /* computed . Now divide it by (2**24 - 1) */ + + copy 0,x1 + shd,= t1,x2,24,t1 /* tentative quotient */ +LSYM(3) + addb,tr t1,x1,LREF(4) /* add to previous quotient */ + extru x2,31,24,x2 /* new remainder (unadjusted) */ + + MILLIRET + sub 0,x1,x1 /* negate result */ + +LSYM(4) + addb,tr t1,x2,LREF(3) /* adjust remainder */ + extru,= x2,7,8,t1 /* new quotient */ + +GSYM($$divU_7) + .export $$divU_7,millicode + addi 1,x2,x2 /* can carry */ + addc 0,0,x1 + shd x1,x2,29,t1 + sh3add x2,x2,x2 + b LREF(pos7) + addc t1,x1,x1 + +/* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */ +GSYM($$divI_9) + .export $$divI_9,millicode + comb,<,n x2,0,LREF(neg9) + addi 1,x2,x2 /* cannot overflow */ + shd 0,x2,29,t1 + shd x2,0,29,t2 + sub t2,x2,x2 + b LREF(pos7) + subb t1,0,x1 + +LSYM(neg9) + subi 1,x2,x2 /* negate and add 1 */ + shd 0,x2,29,t1 + shd x2,0,29,t2 + sub t2,x2,x2 + b LREF(neg7_shift) + subb t1,0,x1 + +GSYM($$divU_9) + .export $$divU_9,millicode + addi 1,x2,x2 /* can carry */ + addc 0,0,x1 + shd x1,x2,29,t1 + shd x2,0,29,t2 + sub t2,x2,x2 + b LREF(pos7) + subb t1,x1,x1 + +/* DIVISION BY 14 (shift to divide by 2 then divide by 7) */ +GSYM($$divI_14) + .export $$divI_14,millicode + comb,<,n x2,0,LREF(neg14) +GSYM($$divU_14) + .export $$divU_14,millicode + b LREF(7) /* go to 7 case */ + extru x2,30,31,x2 /* divide by 2 */ + +LSYM(neg14) + subi 2,x2,x2 /* negate (and add 2) */ + b LREF(8) + extru x2,30,31,x2 /* divide by 2 */ + .exit + .procend + .end +#endif + +#ifdef L_mulI +/* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */ +/****************************************************************************** +This routine is used on PA2.0 processors when gcc -mno-fpregs is used + +ROUTINE: $$mulI + + +DESCRIPTION: + + $$mulI multiplies two single word integers, giving a single + word result. + + +INPUT REGISTERS: + + arg0 = Operand 1 + arg1 = Operand 2 + r31 == return pc + sr0 == return space when called externally + + +OUTPUT REGISTERS: + + arg0 = undefined + arg1 = undefined + ret1 = result + +OTHER REGISTERS AFFECTED: + + r1 = undefined + +SIDE EFFECTS: + + Causes a trap under the following conditions: NONE + Changes memory at the following places: NONE + +PERMISSIBLE CONTEXT: + + Unwindable + Does not create a stack frame + Is usable for internal or external microcode + +DISCUSSION: + + Calls other millicode routines via mrp: NONE + Calls other millicode routines: NONE + +***************************************************************************/ + + +#define a0 %arg0 +#define a1 %arg1 +#define t0 %r1 +#define r %ret1 + +#define a0__128a0 zdep a0,24,25,a0 +#define a0__256a0 zdep a0,23,24,a0 +#define a1_ne_0_b_l0 comb,<> a1,0,LREF(l0) +#define a1_ne_0_b_l1 comb,<> a1,0,LREF(l1) +#define a1_ne_0_b_l2 comb,<> a1,0,LREF(l2) +#define b_n_ret_t0 b,n LREF(ret_t0) +#define b_e_shift b LREF(e_shift) +#define b_e_t0ma0 b LREF(e_t0ma0) +#define b_e_t0 b LREF(e_t0) +#define b_e_t0a0 b LREF(e_t0a0) +#define b_e_t02a0 b LREF(e_t02a0) +#define b_e_t04a0 b LREF(e_t04a0) +#define b_e_2t0 b LREF(e_2t0) +#define b_e_2t0a0 b LREF(e_2t0a0) +#define b_e_2t04a0 b LREF(e2t04a0) +#define b_e_3t0 b LREF(e_3t0) +#define b_e_4t0 b LREF(e_4t0) +#define b_e_4t0a0 b LREF(e_4t0a0) +#define b_e_4t08a0 b LREF(e4t08a0) +#define b_e_5t0 b LREF(e_5t0) +#define b_e_8t0 b LREF(e_8t0) +#define b_e_8t0a0 b LREF(e_8t0a0) +#define r__r_a0 add r,a0,r +#define r__r_2a0 sh1add a0,r,r +#define r__r_4a0 sh2add a0,r,r +#define r__r_8a0 sh3add a0,r,r +#define r__r_t0 add r,t0,r +#define r__r_2t0 sh1add t0,r,r +#define r__r_4t0 sh2add t0,r,r +#define r__r_8t0 sh3add t0,r,r +#define t0__3a0 sh1add a0,a0,t0 +#define t0__4a0 sh2add a0,0,t0 +#define t0__5a0 sh2add a0,a0,t0 +#define t0__8a0 sh3add a0,0,t0 +#define t0__9a0 sh3add a0,a0,t0 +#define t0__16a0 zdep a0,27,28,t0 +#define t0__32a0 zdep a0,26,27,t0 +#define t0__64a0 zdep a0,25,26,t0 +#define t0__128a0 zdep a0,24,25,t0 +#define t0__t0ma0 sub t0,a0,t0 +#define t0__t0_a0 add t0,a0,t0 +#define t0__t0_2a0 sh1add a0,t0,t0 +#define t0__t0_4a0 sh2add a0,t0,t0 +#define t0__t0_8a0 sh3add a0,t0,t0 +#define t0__2t0_a0 sh1add t0,a0,t0 +#define t0__3t0 sh1add t0,t0,t0 +#define t0__4t0 sh2add t0,0,t0 +#define t0__4t0_a0 sh2add t0,a0,t0 +#define t0__5t0 sh2add t0,t0,t0 +#define t0__8t0 sh3add t0,0,t0 +#define t0__8t0_a0 sh3add t0,a0,t0 +#define t0__9t0 sh3add t0,t0,t0 +#define t0__16t0 zdep t0,27,28,t0 +#define t0__32t0 zdep t0,26,27,t0 +#define t0__256a0 zdep a0,23,24,t0 + + + SUBSPA_MILLI + ATTR_MILLI + .align 16 + .proc + .callinfo millicode + .export $$mulI,millicode +GSYM($$mulI) + combt,<<= a1,a0,LREF(l4) /* swap args if unsigned a1>a0 */ + copy 0,r /* zero out the result */ + xor a0,a1,a0 /* swap a0 & a1 using the */ + xor a0,a1,a1 /* old xor trick */ + xor a0,a1,a0 +LSYM(l4) + combt,<= 0,a0,LREF(l3) /* if a0>=0 then proceed like unsigned */ + zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */ + sub,> 0,a1,t0 /* otherwise negate both and */ + combt,<=,n a0,t0,LREF(l2) /* swap back if |a0|<|a1| */ + sub 0,a0,a1 + movb,tr,n t0,a0,LREF(l2) /* 10th inst. */ + +LSYM(l0) r__r_t0 /* add in this partial product */ +LSYM(l1) a0__256a0 /* a0 <<= 8 ****************** */ +LSYM(l2) zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */ +LSYM(l3) blr t0,0 /* case on these 8 bits ****** */ + extru a1,23,24,a1 /* a1 >>= 8 ****************** */ + +/*16 insts before this. */ +/* a0 <<= 8 ************************** */ +LSYM(x0) a1_ne_0_b_l2 ! a0__256a0 ! MILLIRETN ! nop +LSYM(x1) a1_ne_0_b_l1 ! r__r_a0 ! MILLIRETN ! nop +LSYM(x2) a1_ne_0_b_l1 ! r__r_2a0 ! MILLIRETN ! nop +LSYM(x3) a1_ne_0_b_l0 ! t0__3a0 ! MILLIRET ! r__r_t0 +LSYM(x4) a1_ne_0_b_l1 ! r__r_4a0 ! MILLIRETN ! nop +LSYM(x5) a1_ne_0_b_l0 ! t0__5a0 ! MILLIRET ! r__r_t0 +LSYM(x6) t0__3a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN +LSYM(x7) t0__3a0 ! a1_ne_0_b_l0 ! r__r_4a0 ! b_n_ret_t0 +LSYM(x8) a1_ne_0_b_l1 ! r__r_8a0 ! MILLIRETN ! nop +LSYM(x9) a1_ne_0_b_l0 ! t0__9a0 ! MILLIRET ! r__r_t0 +LSYM(x10) t0__5a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN +LSYM(x11) t0__3a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0 +LSYM(x12) t0__3a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN +LSYM(x13) t0__5a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0 +LSYM(x14) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x15) t0__5a0 ! a1_ne_0_b_l0 ! t0__3t0 ! b_n_ret_t0 +LSYM(x16) t0__16a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN +LSYM(x17) t0__9a0 ! a1_ne_0_b_l0 ! t0__t0_8a0 ! b_n_ret_t0 +LSYM(x18) t0__9a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN +LSYM(x19) t0__9a0 ! a1_ne_0_b_l0 ! t0__2t0_a0 ! b_n_ret_t0 +LSYM(x20) t0__5a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN +LSYM(x21) t0__5a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0 +LSYM(x22) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x23) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x24) t0__3a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN +LSYM(x25) t0__5a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0 +LSYM(x26) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x27) t0__3a0 ! a1_ne_0_b_l0 ! t0__9t0 ! b_n_ret_t0 +LSYM(x28) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 +LSYM(x29) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x30) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_2t0 +LSYM(x31) t0__32a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 +LSYM(x32) t0__32a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN +LSYM(x33) t0__8a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0 +LSYM(x34) t0__16a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x35) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__t0_8a0 +LSYM(x36) t0__9a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN +LSYM(x37) t0__9a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0 +LSYM(x38) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x39) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x40) t0__5a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN +LSYM(x41) t0__5a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0 +LSYM(x42) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x43) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x44) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 +LSYM(x45) t0__9a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0 +LSYM(x46) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_a0 +LSYM(x47) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_2a0 +LSYM(x48) t0__3a0 ! a1_ne_0_b_l0 ! t0__16t0 ! b_n_ret_t0 +LSYM(x49) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_4a0 +LSYM(x50) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_2t0 +LSYM(x51) t0__9a0 ! t0__t0_8a0 ! b_e_t0 ! t0__3t0 +LSYM(x52) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 +LSYM(x53) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x54) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_2t0 +LSYM(x55) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x56) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 +LSYM(x57) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__3t0 +LSYM(x58) t0__3a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0 +LSYM(x59) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__3t0 +LSYM(x60) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_4t0 +LSYM(x61) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x62) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0 +LSYM(x63) t0__64a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 +LSYM(x64) t0__64a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN +LSYM(x65) t0__8a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0 +LSYM(x66) t0__32a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x67) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x68) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 +LSYM(x69) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x70) t0__64a0 ! t0__t0_4a0 ! b_e_t0 ! t0__t0_2a0 +LSYM(x71) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__t0ma0 +LSYM(x72) t0__9a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN +LSYM(x73) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_t0 +LSYM(x74) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x75) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x76) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 +LSYM(x77) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x78) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__2t0_a0 +LSYM(x79) t0__16a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0 +LSYM(x80) t0__16a0 ! t0__5t0 ! b_e_shift ! r__r_t0 +LSYM(x81) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_t0 +LSYM(x82) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x83) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x84) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 +LSYM(x85) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0 +LSYM(x86) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0 +LSYM(x87) t0__9a0 ! t0__9t0 ! b_e_t02a0 ! t0__t0_4a0 +LSYM(x88) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 +LSYM(x89) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0 +LSYM(x90) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_2t0 +LSYM(x91) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x92) t0__5a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0 +LSYM(x93) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__3t0 +LSYM(x94) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__t0_2a0 +LSYM(x95) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0 +LSYM(x96) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_4t0 +LSYM(x97) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x98) t0__32a0 ! t0__3t0 ! b_e_t0 ! t0__t0_2a0 +LSYM(x99) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0 +LSYM(x100) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_4t0 +LSYM(x101) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x102) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0 +LSYM(x103) t0__5a0 ! t0__5t0 ! b_e_t02a0 ! t0__4t0_a0 +LSYM(x104) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0 +LSYM(x105) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0 +LSYM(x106) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__4t0_a0 +LSYM(x107) t0__9a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__8t0_a0 +LSYM(x108) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_4t0 +LSYM(x109) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x110) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__2t0_a0 +LSYM(x111) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0 +LSYM(x112) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__16t0 +LSYM(x113) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__3t0 +LSYM(x114) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__3t0 +LSYM(x115) t0__9a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__3t0 +LSYM(x116) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__4t0_a0 +LSYM(x117) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0 +LSYM(x118) t0__3a0 ! t0__4t0_a0 ! b_e_t0a0 ! t0__9t0 +LSYM(x119) t0__3a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__9t0 +LSYM(x120) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_8t0 +LSYM(x121) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0 +LSYM(x122) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0 +LSYM(x123) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0 +LSYM(x124) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0 +LSYM(x125) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__5t0 +LSYM(x126) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0 +LSYM(x127) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 +LSYM(x128) t0__128a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN +LSYM(x129) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0_a0 ! b_n_ret_t0 +LSYM(x130) t0__64a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x131) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x132) t0__8a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 +LSYM(x133) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x134) t0__8a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0 +LSYM(x135) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__3t0 +LSYM(x136) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 +LSYM(x137) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0 +LSYM(x138) t0__8a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0 +LSYM(x139) t0__8a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__4t0_a0 +LSYM(x140) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__5t0 +LSYM(x141) t0__8a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__2t0_a0 +LSYM(x142) t0__9a0 ! t0__8t0 ! b_e_2t0 ! t0__t0ma0 +LSYM(x143) t0__16a0 ! t0__9t0 ! b_e_t0 ! t0__t0ma0 +LSYM(x144) t0__9a0 ! t0__8t0 ! b_e_shift ! r__r_2t0 +LSYM(x145) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x146) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0 +LSYM(x147) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x148) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 +LSYM(x149) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x150) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0 +LSYM(x151) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__2t0_a0 +LSYM(x152) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 +LSYM(x153) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0 +LSYM(x154) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0 +LSYM(x155) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__5t0 +LSYM(x156) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0 +LSYM(x157) t0__32a0 ! t0__t0ma0 ! b_e_t02a0 ! t0__5t0 +LSYM(x158) t0__16a0 ! t0__5t0 ! b_e_2t0 ! t0__t0ma0 +LSYM(x159) t0__32a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0 +LSYM(x160) t0__5a0 ! t0__4t0 ! b_e_shift ! r__r_8t0 +LSYM(x161) t0__8a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x162) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_2t0 +LSYM(x163) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__2t0_a0 +LSYM(x164) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_4t0 +LSYM(x165) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0 +LSYM(x166) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__2t0_a0 +LSYM(x167) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__2t0_a0 +LSYM(x168) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0 +LSYM(x169) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__8t0_a0 +LSYM(x170) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__5t0 +LSYM(x171) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__9t0 +LSYM(x172) t0__5a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__2t0_a0 +LSYM(x173) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__9t0 +LSYM(x174) t0__32a0 ! t0__t0_2a0 ! b_e_t04a0 ! t0__5t0 +LSYM(x175) t0__8a0 ! t0__2t0_a0 ! b_e_5t0 ! t0__2t0_a0 +LSYM(x176) t0__5a0 ! t0__4t0_a0 ! b_e_8t0 ! t0__t0_a0 +LSYM(x177) t0__5a0 ! t0__4t0_a0 ! b_e_8t0a0 ! t0__t0_a0 +LSYM(x178) t0__5a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__8t0_a0 +LSYM(x179) t0__5a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__8t0_a0 +LSYM(x180) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_4t0 +LSYM(x181) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x182) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__2t0_a0 +LSYM(x183) t0__9a0 ! t0__5t0 ! b_e_2t0a0 ! t0__2t0_a0 +LSYM(x184) t0__5a0 ! t0__9t0 ! b_e_4t0 ! t0__t0_a0 +LSYM(x185) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0 +LSYM(x186) t0__32a0 ! t0__t0ma0 ! b_e_2t0 ! t0__3t0 +LSYM(x187) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__5t0 +LSYM(x188) t0__9a0 ! t0__5t0 ! b_e_4t0 ! t0__t0_2a0 +LSYM(x189) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0 +LSYM(x190) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__5t0 +LSYM(x191) t0__64a0 ! t0__3t0 ! b_e_t0 ! t0__t0ma0 +LSYM(x192) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_8t0 +LSYM(x193) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0 +LSYM(x194) t0__8a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0 +LSYM(x195) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0 +LSYM(x196) t0__8a0 ! t0__3t0 ! b_e_4t0 ! t0__2t0_a0 +LSYM(x197) t0__8a0 ! t0__3t0 ! b_e_4t0a0 ! t0__2t0_a0 +LSYM(x198) t0__64a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0 +LSYM(x199) t0__8a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0 +LSYM(x200) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_8t0 +LSYM(x201) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__8t0_a0 +LSYM(x202) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__4t0_a0 +LSYM(x203) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__4t0_a0 +LSYM(x204) t0__8a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0 +LSYM(x205) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__5t0 +LSYM(x206) t0__64a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__3t0 +LSYM(x207) t0__8a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0 +LSYM(x208) t0__5a0 ! t0__5t0 ! b_e_8t0 ! t0__t0_a0 +LSYM(x209) t0__5a0 ! t0__5t0 ! b_e_8t0a0 ! t0__t0_a0 +LSYM(x210) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__5t0 +LSYM(x211) t0__5a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__5t0 +LSYM(x212) t0__3a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__4t0_a0 +LSYM(x213) t0__3a0 ! t0__4t0_a0 ! b_e_4t0a0 ! t0__4t0_a0 +LSYM(x214) t0__9a0 ! t0__t0_4a0 ! b_e_2t04a0 ! t0__8t0_a0 +LSYM(x215) t0__5a0 ! t0__4t0_a0 ! b_e_5t0 ! t0__2t0_a0 +LSYM(x216) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_8t0 +LSYM(x217) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0 +LSYM(x218) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0 +LSYM(x219) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0 +LSYM(x220) t0__3a0 ! t0__9t0 ! b_e_4t0 ! t0__2t0_a0 +LSYM(x221) t0__3a0 ! t0__9t0 ! b_e_4t0a0 ! t0__2t0_a0 +LSYM(x222) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__3t0 +LSYM(x223) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0 +LSYM(x224) t0__9a0 ! t0__3t0 ! b_e_8t0 ! t0__t0_a0 +LSYM(x225) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__5t0 +LSYM(x226) t0__3a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__32t0 +LSYM(x227) t0__9a0 ! t0__5t0 ! b_e_t02a0 ! t0__5t0 +LSYM(x228) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0 +LSYM(x229) t0__9a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__3t0 +LSYM(x230) t0__9a0 ! t0__5t0 ! b_e_5t0 ! t0__t0_a0 +LSYM(x231) t0__9a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0 +LSYM(x232) t0__3a0 ! t0__2t0_a0 ! b_e_8t0 ! t0__4t0_a0 +LSYM(x233) t0__3a0 ! t0__2t0_a0 ! b_e_8t0a0 ! t0__4t0_a0 +LSYM(x234) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__9t0 +LSYM(x235) t0__3a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__9t0 +LSYM(x236) t0__9a0 ! t0__2t0_a0 ! b_e_4t08a0 ! t0__3t0 +LSYM(x237) t0__16a0 ! t0__5t0 ! b_e_3t0 ! t0__t0ma0 +LSYM(x238) t0__3a0 ! t0__4t0_a0 ! b_e_2t04a0 ! t0__9t0 +LSYM(x239) t0__16a0 ! t0__5t0 ! b_e_t0ma0 ! t0__3t0 +LSYM(x240) t0__9a0 ! t0__t0_a0 ! b_e_8t0 ! t0__3t0 +LSYM(x241) t0__9a0 ! t0__t0_a0 ! b_e_8t0a0 ! t0__3t0 +LSYM(x242) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__8t0_a0 +LSYM(x243) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__3t0 +LSYM(x244) t0__5a0 ! t0__3t0 ! b_e_4t0 ! t0__4t0_a0 +LSYM(x245) t0__8a0 ! t0__3t0 ! b_e_5t0 ! t0__2t0_a0 +LSYM(x246) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__3t0 +LSYM(x247) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__3t0 +LSYM(x248) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_8t0 +LSYM(x249) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__8t0_a0 +LSYM(x250) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__5t0 +LSYM(x251) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__5t0 +LSYM(x252) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0 +LSYM(x253) t0__64a0 ! t0__t0ma0 ! b_e_t0 ! t0__4t0_a0 +LSYM(x254) t0__128a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0 +LSYM(x255) t0__256a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 +/*1040 insts before this. */ +LSYM(ret_t0) MILLIRET +LSYM(e_t0) r__r_t0 +LSYM(e_shift) a1_ne_0_b_l2 + a0__256a0 /* a0 <<= 8 *********** */ + MILLIRETN +LSYM(e_t0ma0) a1_ne_0_b_l0 + t0__t0ma0 + MILLIRET + r__r_t0 +LSYM(e_t0a0) a1_ne_0_b_l0 + t0__t0_a0 + MILLIRET + r__r_t0 +LSYM(e_t02a0) a1_ne_0_b_l0 + t0__t0_2a0 + MILLIRET + r__r_t0 +LSYM(e_t04a0) a1_ne_0_b_l0 + t0__t0_4a0 + MILLIRET + r__r_t0 +LSYM(e_2t0) a1_ne_0_b_l1 + r__r_2t0 + MILLIRETN +LSYM(e_2t0a0) a1_ne_0_b_l0 + t0__2t0_a0 + MILLIRET + r__r_t0 +LSYM(e2t04a0) t0__t0_2a0 + a1_ne_0_b_l1 + r__r_2t0 + MILLIRETN +LSYM(e_3t0) a1_ne_0_b_l0 + t0__3t0 + MILLIRET + r__r_t0 +LSYM(e_4t0) a1_ne_0_b_l1 + r__r_4t0 + MILLIRETN +LSYM(e_4t0a0) a1_ne_0_b_l0 + t0__4t0_a0 + MILLIRET + r__r_t0 +LSYM(e4t08a0) t0__t0_2a0 + a1_ne_0_b_l1 + r__r_4t0 + MILLIRETN +LSYM(e_5t0) a1_ne_0_b_l0 + t0__5t0 + MILLIRET + r__r_t0 +LSYM(e_8t0) a1_ne_0_b_l1 + r__r_8t0 + MILLIRETN +LSYM(e_8t0a0) a1_ne_0_b_l0 + t0__8t0_a0 + MILLIRET + r__r_t0 + + .procend + .end +#endif diff --git a/libgcc/config/pa/t-linux b/libgcc/config/pa/t-linux new file mode 100644 index 00000000000..d396bf7705a --- /dev/null +++ b/libgcc/config/pa/t-linux @@ -0,0 +1,6 @@ +#Plug millicode routines into libgcc.a We want these on both native and +#cross compiles. We use the "64-bit" routines because the "32-bit" code +#is broken for certain corner cases. + +LIB1ASMSRC = pa/milli64.S +LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI _dyncall diff --git a/libgcc/config/pa/t-linux64 b/libgcc/config/pa/t-linux64 new file mode 100644 index 00000000000..6cb9806ff2e --- /dev/null +++ b/libgcc/config/pa/t-linux64 @@ -0,0 +1,4 @@ +# Plug millicode routines into libgcc.a We want these on both native and +# cross compiles. +# FIXME: Explain. +LIB1ASMFUNCS := $(filter-out _dyncall, $(LIB1ASMFUNCS)) diff --git a/libgcc/config/picochip/lib1funcs.S b/libgcc/config/picochip/lib1funcs.S new file mode 100644 index 00000000000..d344170d248 --- /dev/null +++ b/libgcc/config/picochip/lib1funcs.S @@ -0,0 +1,4 @@ +// picoChip ASM file +// Fake libgcc asm file. This contains nothing, but is used to prevent gcc +// getting upset about the lack of a lib1funcs.S file when LIB1ASMFUNCS is +// defined to switch off the compilation of parts of libgcc. diff --git a/libgcc/config/picochip/t-picochip b/libgcc/config/picochip/t-picochip index 5135d500cbb..a596ec98947 100644 --- a/libgcc/config/picochip/t-picochip +++ b/libgcc/config/picochip/t-picochip @@ -1,2 +1,9 @@ +# Prevent some of the more complicated libgcc functions from being +# compiled. This is because they are generally too big to fit into an +# AE anyway, so there is no point in having them. Also, some don't +# compile properly so we'll ignore them for the moment. +LIB1ASMSRC = picochip/lib1funcs.S +LIB1ASMFUNCS = _mulsc3 _divsc3 + # Turn off the building of exception handling libraries. LIB2ADDEH = diff --git a/libgcc/config/sh/lib1funcs.S b/libgcc/config/sh/lib1funcs.S new file mode 100644 index 00000000000..2f0ca16cd91 --- /dev/null +++ b/libgcc/config/sh/lib1funcs.S @@ -0,0 +1,3933 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006, 2009 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +#if defined(__ELF__) && defined(__linux__) +.section .note.GNU-stack,"",%progbits +.previous +#endif + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + +#include "lib1funcs.h" + +/* t-vxworks needs to build both PIC and non-PIC versions of libgcc, + so it is more convenient to define NO_FPSCR_VALUES here than to + define it on the command line. */ +#if defined __vxworks && defined __PIC__ +#define NO_FPSCR_VALUES +#endif + +#if ! __SH5__ +#ifdef L_ashiftrt + .global GLOBAL(ashiftrt_r4_0) + .global GLOBAL(ashiftrt_r4_1) + .global GLOBAL(ashiftrt_r4_2) + .global GLOBAL(ashiftrt_r4_3) + .global GLOBAL(ashiftrt_r4_4) + .global GLOBAL(ashiftrt_r4_5) + .global GLOBAL(ashiftrt_r4_6) + .global GLOBAL(ashiftrt_r4_7) + .global GLOBAL(ashiftrt_r4_8) + .global GLOBAL(ashiftrt_r4_9) + .global GLOBAL(ashiftrt_r4_10) + .global GLOBAL(ashiftrt_r4_11) + .global GLOBAL(ashiftrt_r4_12) + .global GLOBAL(ashiftrt_r4_13) + .global GLOBAL(ashiftrt_r4_14) + .global GLOBAL(ashiftrt_r4_15) + .global GLOBAL(ashiftrt_r4_16) + .global GLOBAL(ashiftrt_r4_17) + .global GLOBAL(ashiftrt_r4_18) + .global GLOBAL(ashiftrt_r4_19) + .global GLOBAL(ashiftrt_r4_20) + .global GLOBAL(ashiftrt_r4_21) + .global GLOBAL(ashiftrt_r4_22) + .global GLOBAL(ashiftrt_r4_23) + .global GLOBAL(ashiftrt_r4_24) + .global GLOBAL(ashiftrt_r4_25) + .global GLOBAL(ashiftrt_r4_26) + .global GLOBAL(ashiftrt_r4_27) + .global GLOBAL(ashiftrt_r4_28) + .global GLOBAL(ashiftrt_r4_29) + .global GLOBAL(ashiftrt_r4_30) + .global GLOBAL(ashiftrt_r4_31) + .global GLOBAL(ashiftrt_r4_32) + + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32)) + + .align 1 +GLOBAL(ashiftrt_r4_32): +GLOBAL(ashiftrt_r4_31): + rotcl r4 + rts + subc r4,r4 + +GLOBAL(ashiftrt_r4_30): + shar r4 +GLOBAL(ashiftrt_r4_29): + shar r4 +GLOBAL(ashiftrt_r4_28): + shar r4 +GLOBAL(ashiftrt_r4_27): + shar r4 +GLOBAL(ashiftrt_r4_26): + shar r4 +GLOBAL(ashiftrt_r4_25): + shar r4 +GLOBAL(ashiftrt_r4_24): + shlr16 r4 + shlr8 r4 + rts + exts.b r4,r4 + +GLOBAL(ashiftrt_r4_23): + shar r4 +GLOBAL(ashiftrt_r4_22): + shar r4 +GLOBAL(ashiftrt_r4_21): + shar r4 +GLOBAL(ashiftrt_r4_20): + shar r4 +GLOBAL(ashiftrt_r4_19): + shar r4 +GLOBAL(ashiftrt_r4_18): + shar r4 +GLOBAL(ashiftrt_r4_17): + shar r4 +GLOBAL(ashiftrt_r4_16): + shlr16 r4 + rts + exts.w r4,r4 + +GLOBAL(ashiftrt_r4_15): + shar r4 +GLOBAL(ashiftrt_r4_14): + shar r4 +GLOBAL(ashiftrt_r4_13): + shar r4 +GLOBAL(ashiftrt_r4_12): + shar r4 +GLOBAL(ashiftrt_r4_11): + shar r4 +GLOBAL(ashiftrt_r4_10): + shar r4 +GLOBAL(ashiftrt_r4_9): + shar r4 +GLOBAL(ashiftrt_r4_8): + shar r4 +GLOBAL(ashiftrt_r4_7): + shar r4 +GLOBAL(ashiftrt_r4_6): + shar r4 +GLOBAL(ashiftrt_r4_5): + shar r4 +GLOBAL(ashiftrt_r4_4): + shar r4 +GLOBAL(ashiftrt_r4_3): + shar r4 +GLOBAL(ashiftrt_r4_2): + shar r4 +GLOBAL(ashiftrt_r4_1): + rts + shar r4 + +GLOBAL(ashiftrt_r4_0): + rts + nop + + ENDFUNC(GLOBAL(ashiftrt_r4_0)) + ENDFUNC(GLOBAL(ashiftrt_r4_1)) + ENDFUNC(GLOBAL(ashiftrt_r4_2)) + ENDFUNC(GLOBAL(ashiftrt_r4_3)) + ENDFUNC(GLOBAL(ashiftrt_r4_4)) + ENDFUNC(GLOBAL(ashiftrt_r4_5)) + ENDFUNC(GLOBAL(ashiftrt_r4_6)) + ENDFUNC(GLOBAL(ashiftrt_r4_7)) + ENDFUNC(GLOBAL(ashiftrt_r4_8)) + ENDFUNC(GLOBAL(ashiftrt_r4_9)) + ENDFUNC(GLOBAL(ashiftrt_r4_10)) + ENDFUNC(GLOBAL(ashiftrt_r4_11)) + ENDFUNC(GLOBAL(ashiftrt_r4_12)) + ENDFUNC(GLOBAL(ashiftrt_r4_13)) + ENDFUNC(GLOBAL(ashiftrt_r4_14)) + ENDFUNC(GLOBAL(ashiftrt_r4_15)) + ENDFUNC(GLOBAL(ashiftrt_r4_16)) + ENDFUNC(GLOBAL(ashiftrt_r4_17)) + ENDFUNC(GLOBAL(ashiftrt_r4_18)) + ENDFUNC(GLOBAL(ashiftrt_r4_19)) + ENDFUNC(GLOBAL(ashiftrt_r4_20)) + ENDFUNC(GLOBAL(ashiftrt_r4_21)) + ENDFUNC(GLOBAL(ashiftrt_r4_22)) + ENDFUNC(GLOBAL(ashiftrt_r4_23)) + ENDFUNC(GLOBAL(ashiftrt_r4_24)) + ENDFUNC(GLOBAL(ashiftrt_r4_25)) + ENDFUNC(GLOBAL(ashiftrt_r4_26)) + ENDFUNC(GLOBAL(ashiftrt_r4_27)) + ENDFUNC(GLOBAL(ashiftrt_r4_28)) + ENDFUNC(GLOBAL(ashiftrt_r4_29)) + ENDFUNC(GLOBAL(ashiftrt_r4_30)) + ENDFUNC(GLOBAL(ashiftrt_r4_31)) + ENDFUNC(GLOBAL(ashiftrt_r4_32)) +#endif + +#ifdef L_ashiftrt_n + +! +! GLOBAL(ashrsi3) +! +! Entry: +! +! r4: Value to shift +! r5: Shifts +! +! Exit: +! +! r0: Result +! +! Destroys: +! +! (none) +! + + .global GLOBAL(ashrsi3) + HIDDEN_FUNC(GLOBAL(ashrsi3)) + .align 2 +GLOBAL(ashrsi3): + mov #31,r0 + and r0,r5 + mova LOCAL(ashrsi3_table),r0 + mov.b @(r0,r5),r5 +#ifdef __sh1__ + add r5,r0 + jmp @r0 +#else + braf r5 +#endif + mov r4,r0 + + .align 2 +LOCAL(ashrsi3_table): + .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table) + +LOCAL(ashrsi3_31): + rotcl r0 + rts + subc r0,r0 + +LOCAL(ashrsi3_30): + shar r0 +LOCAL(ashrsi3_29): + shar r0 +LOCAL(ashrsi3_28): + shar r0 +LOCAL(ashrsi3_27): + shar r0 +LOCAL(ashrsi3_26): + shar r0 +LOCAL(ashrsi3_25): + shar r0 +LOCAL(ashrsi3_24): + shlr16 r0 + shlr8 r0 + rts + exts.b r0,r0 + +LOCAL(ashrsi3_23): + shar r0 +LOCAL(ashrsi3_22): + shar r0 +LOCAL(ashrsi3_21): + shar r0 +LOCAL(ashrsi3_20): + shar r0 +LOCAL(ashrsi3_19): + shar r0 +LOCAL(ashrsi3_18): + shar r0 +LOCAL(ashrsi3_17): + shar r0 +LOCAL(ashrsi3_16): + shlr16 r0 + rts + exts.w r0,r0 + +LOCAL(ashrsi3_15): + shar r0 +LOCAL(ashrsi3_14): + shar r0 +LOCAL(ashrsi3_13): + shar r0 +LOCAL(ashrsi3_12): + shar r0 +LOCAL(ashrsi3_11): + shar r0 +LOCAL(ashrsi3_10): + shar r0 +LOCAL(ashrsi3_9): + shar r0 +LOCAL(ashrsi3_8): + shar r0 +LOCAL(ashrsi3_7): + shar r0 +LOCAL(ashrsi3_6): + shar r0 +LOCAL(ashrsi3_5): + shar r0 +LOCAL(ashrsi3_4): + shar r0 +LOCAL(ashrsi3_3): + shar r0 +LOCAL(ashrsi3_2): + shar r0 +LOCAL(ashrsi3_1): + rts + shar r0 + +LOCAL(ashrsi3_0): + rts + nop + + ENDFUNC(GLOBAL(ashrsi3)) +#endif + +#ifdef L_ashiftlt + +! +! GLOBAL(ashlsi3) +! +! Entry: +! +! r4: Value to shift +! r5: Shifts +! +! Exit: +! +! r0: Result +! +! Destroys: +! +! (none) +! + .global GLOBAL(ashlsi3) + HIDDEN_FUNC(GLOBAL(ashlsi3)) + .align 2 +GLOBAL(ashlsi3): + mov #31,r0 + and r0,r5 + mova LOCAL(ashlsi3_table),r0 + mov.b @(r0,r5),r5 +#ifdef __sh1__ + add r5,r0 + jmp @r0 +#else + braf r5 +#endif + mov r4,r0 + + .align 2 +LOCAL(ashlsi3_table): + .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table) + +LOCAL(ashlsi3_6): + shll2 r0 +LOCAL(ashlsi3_4): + shll2 r0 +LOCAL(ashlsi3_2): + rts + shll2 r0 + +LOCAL(ashlsi3_7): + shll2 r0 +LOCAL(ashlsi3_5): + shll2 r0 +LOCAL(ashlsi3_3): + shll2 r0 +LOCAL(ashlsi3_1): + rts + shll r0 + +LOCAL(ashlsi3_14): + shll2 r0 +LOCAL(ashlsi3_12): + shll2 r0 +LOCAL(ashlsi3_10): + shll2 r0 +LOCAL(ashlsi3_8): + rts + shll8 r0 + +LOCAL(ashlsi3_15): + shll2 r0 +LOCAL(ashlsi3_13): + shll2 r0 +LOCAL(ashlsi3_11): + shll2 r0 +LOCAL(ashlsi3_9): + shll8 r0 + rts + shll r0 + +LOCAL(ashlsi3_22): + shll2 r0 +LOCAL(ashlsi3_20): + shll2 r0 +LOCAL(ashlsi3_18): + shll2 r0 +LOCAL(ashlsi3_16): + rts + shll16 r0 + +LOCAL(ashlsi3_23): + shll2 r0 +LOCAL(ashlsi3_21): + shll2 r0 +LOCAL(ashlsi3_19): + shll2 r0 +LOCAL(ashlsi3_17): + shll16 r0 + rts + shll r0 + +LOCAL(ashlsi3_30): + shll2 r0 +LOCAL(ashlsi3_28): + shll2 r0 +LOCAL(ashlsi3_26): + shll2 r0 +LOCAL(ashlsi3_24): + shll16 r0 + rts + shll8 r0 + +LOCAL(ashlsi3_31): + shll2 r0 +LOCAL(ashlsi3_29): + shll2 r0 +LOCAL(ashlsi3_27): + shll2 r0 +LOCAL(ashlsi3_25): + shll16 r0 + shll8 r0 + rts + shll r0 + +LOCAL(ashlsi3_0): + rts + nop + + ENDFUNC(GLOBAL(ashlsi3)) +#endif + +#ifdef L_lshiftrt + +! +! GLOBAL(lshrsi3) +! +! Entry: +! +! r4: Value to shift +! r5: Shifts +! +! Exit: +! +! r0: Result +! +! Destroys: +! +! (none) +! + .global GLOBAL(lshrsi3) + HIDDEN_FUNC(GLOBAL(lshrsi3)) + .align 2 +GLOBAL(lshrsi3): + mov #31,r0 + and r0,r5 + mova LOCAL(lshrsi3_table),r0 + mov.b @(r0,r5),r5 +#ifdef __sh1__ + add r5,r0 + jmp @r0 +#else + braf r5 +#endif + mov r4,r0 + + .align 2 +LOCAL(lshrsi3_table): + .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table) + +LOCAL(lshrsi3_6): + shlr2 r0 +LOCAL(lshrsi3_4): + shlr2 r0 +LOCAL(lshrsi3_2): + rts + shlr2 r0 + +LOCAL(lshrsi3_7): + shlr2 r0 +LOCAL(lshrsi3_5): + shlr2 r0 +LOCAL(lshrsi3_3): + shlr2 r0 +LOCAL(lshrsi3_1): + rts + shlr r0 + +LOCAL(lshrsi3_14): + shlr2 r0 +LOCAL(lshrsi3_12): + shlr2 r0 +LOCAL(lshrsi3_10): + shlr2 r0 +LOCAL(lshrsi3_8): + rts + shlr8 r0 + +LOCAL(lshrsi3_15): + shlr2 r0 +LOCAL(lshrsi3_13): + shlr2 r0 +LOCAL(lshrsi3_11): + shlr2 r0 +LOCAL(lshrsi3_9): + shlr8 r0 + rts + shlr r0 + +LOCAL(lshrsi3_22): + shlr2 r0 +LOCAL(lshrsi3_20): + shlr2 r0 +LOCAL(lshrsi3_18): + shlr2 r0 +LOCAL(lshrsi3_16): + rts + shlr16 r0 + +LOCAL(lshrsi3_23): + shlr2 r0 +LOCAL(lshrsi3_21): + shlr2 r0 +LOCAL(lshrsi3_19): + shlr2 r0 +LOCAL(lshrsi3_17): + shlr16 r0 + rts + shlr r0 + +LOCAL(lshrsi3_30): + shlr2 r0 +LOCAL(lshrsi3_28): + shlr2 r0 +LOCAL(lshrsi3_26): + shlr2 r0 +LOCAL(lshrsi3_24): + shlr16 r0 + rts + shlr8 r0 + +LOCAL(lshrsi3_31): + shlr2 r0 +LOCAL(lshrsi3_29): + shlr2 r0 +LOCAL(lshrsi3_27): + shlr2 r0 +LOCAL(lshrsi3_25): + shlr16 r0 + shlr8 r0 + rts + shlr r0 + +LOCAL(lshrsi3_0): + rts + nop + + ENDFUNC(GLOBAL(lshrsi3)) +#endif + +#ifdef L_movmem + .text + .balign 4 + .global GLOBAL(movmem) + HIDDEN_FUNC(GLOBAL(movmem)) + HIDDEN_ALIAS(movstr,movmem) + /* This would be a lot simpler if r6 contained the byte count + minus 64, and we wouldn't be called here for a byte count of 64. */ +GLOBAL(movmem): + sts.l pr,@-r15 + shll2 r6 + bsr GLOBAL(movmemSI52+2) + mov.l @(48,r5),r0 + .balign 4 +LOCAL(movmem_loop): /* Reached with rts */ + mov.l @(60,r5),r0 + add #-64,r6 + mov.l r0,@(60,r4) + tst r6,r6 + mov.l @(56,r5),r0 + bt LOCAL(movmem_done) + mov.l r0,@(56,r4) + cmp/pl r6 + mov.l @(52,r5),r0 + add #64,r5 + mov.l r0,@(52,r4) + add #64,r4 + bt GLOBAL(movmemSI52) +! done all the large groups, do the remainder +! jump to movmem+ + mova GLOBAL(movmemSI4)+4,r0 + add r6,r0 + jmp @r0 +LOCAL(movmem_done): ! share slot insn, works out aligned. + lds.l @r15+,pr + mov.l r0,@(56,r4) + mov.l @(52,r5),r0 + rts + mov.l r0,@(52,r4) + .balign 4 +! ??? We need aliases movstr* for movmem* for the older libraries. These +! aliases will be removed at the some point in the future. + .global GLOBAL(movmemSI64) + HIDDEN_FUNC(GLOBAL(movmemSI64)) + HIDDEN_ALIAS(movstrSI64,movmemSI64) +GLOBAL(movmemSI64): + mov.l @(60,r5),r0 + mov.l r0,@(60,r4) + .global GLOBAL(movmemSI60) + HIDDEN_FUNC(GLOBAL(movmemSI60)) + HIDDEN_ALIAS(movstrSI60,movmemSI60) +GLOBAL(movmemSI60): + mov.l @(56,r5),r0 + mov.l r0,@(56,r4) + .global GLOBAL(movmemSI56) + HIDDEN_FUNC(GLOBAL(movmemSI56)) + HIDDEN_ALIAS(movstrSI56,movmemSI56) +GLOBAL(movmemSI56): + mov.l @(52,r5),r0 + mov.l r0,@(52,r4) + .global GLOBAL(movmemSI52) + HIDDEN_FUNC(GLOBAL(movmemSI52)) + HIDDEN_ALIAS(movstrSI52,movmemSI52) +GLOBAL(movmemSI52): + mov.l @(48,r5),r0 + mov.l r0,@(48,r4) + .global GLOBAL(movmemSI48) + HIDDEN_FUNC(GLOBAL(movmemSI48)) + HIDDEN_ALIAS(movstrSI48,movmemSI48) +GLOBAL(movmemSI48): + mov.l @(44,r5),r0 + mov.l r0,@(44,r4) + .global GLOBAL(movmemSI44) + HIDDEN_FUNC(GLOBAL(movmemSI44)) + HIDDEN_ALIAS(movstrSI44,movmemSI44) +GLOBAL(movmemSI44): + mov.l @(40,r5),r0 + mov.l r0,@(40,r4) + .global GLOBAL(movmemSI40) + HIDDEN_FUNC(GLOBAL(movmemSI40)) + HIDDEN_ALIAS(movstrSI40,movmemSI40) +GLOBAL(movmemSI40): + mov.l @(36,r5),r0 + mov.l r0,@(36,r4) + .global GLOBAL(movmemSI36) + HIDDEN_FUNC(GLOBAL(movmemSI36)) + HIDDEN_ALIAS(movstrSI36,movmemSI36) +GLOBAL(movmemSI36): + mov.l @(32,r5),r0 + mov.l r0,@(32,r4) + .global GLOBAL(movmemSI32) + HIDDEN_FUNC(GLOBAL(movmemSI32)) + HIDDEN_ALIAS(movstrSI32,movmemSI32) +GLOBAL(movmemSI32): + mov.l @(28,r5),r0 + mov.l r0,@(28,r4) + .global GLOBAL(movmemSI28) + HIDDEN_FUNC(GLOBAL(movmemSI28)) + HIDDEN_ALIAS(movstrSI28,movmemSI28) +GLOBAL(movmemSI28): + mov.l @(24,r5),r0 + mov.l r0,@(24,r4) + .global GLOBAL(movmemSI24) + HIDDEN_FUNC(GLOBAL(movmemSI24)) + HIDDEN_ALIAS(movstrSI24,movmemSI24) +GLOBAL(movmemSI24): + mov.l @(20,r5),r0 + mov.l r0,@(20,r4) + .global GLOBAL(movmemSI20) + HIDDEN_FUNC(GLOBAL(movmemSI20)) + HIDDEN_ALIAS(movstrSI20,movmemSI20) +GLOBAL(movmemSI20): + mov.l @(16,r5),r0 + mov.l r0,@(16,r4) + .global GLOBAL(movmemSI16) + HIDDEN_FUNC(GLOBAL(movmemSI16)) + HIDDEN_ALIAS(movstrSI16,movmemSI16) +GLOBAL(movmemSI16): + mov.l @(12,r5),r0 + mov.l r0,@(12,r4) + .global GLOBAL(movmemSI12) + HIDDEN_FUNC(GLOBAL(movmemSI12)) + HIDDEN_ALIAS(movstrSI12,movmemSI12) +GLOBAL(movmemSI12): + mov.l @(8,r5),r0 + mov.l r0,@(8,r4) + .global GLOBAL(movmemSI8) + HIDDEN_FUNC(GLOBAL(movmemSI8)) + HIDDEN_ALIAS(movstrSI8,movmemSI8) +GLOBAL(movmemSI8): + mov.l @(4,r5),r0 + mov.l r0,@(4,r4) + .global GLOBAL(movmemSI4) + HIDDEN_FUNC(GLOBAL(movmemSI4)) + HIDDEN_ALIAS(movstrSI4,movmemSI4) +GLOBAL(movmemSI4): + mov.l @(0,r5),r0 + rts + mov.l r0,@(0,r4) + + ENDFUNC(GLOBAL(movmemSI64)) + ENDFUNC(GLOBAL(movmemSI60)) + ENDFUNC(GLOBAL(movmemSI56)) + ENDFUNC(GLOBAL(movmemSI52)) + ENDFUNC(GLOBAL(movmemSI48)) + ENDFUNC(GLOBAL(movmemSI44)) + ENDFUNC(GLOBAL(movmemSI40)) + ENDFUNC(GLOBAL(movmemSI36)) + ENDFUNC(GLOBAL(movmemSI32)) + ENDFUNC(GLOBAL(movmemSI28)) + ENDFUNC(GLOBAL(movmemSI24)) + ENDFUNC(GLOBAL(movmemSI20)) + ENDFUNC(GLOBAL(movmemSI16)) + ENDFUNC(GLOBAL(movmemSI12)) + ENDFUNC(GLOBAL(movmemSI8)) + ENDFUNC(GLOBAL(movmemSI4)) + ENDFUNC(GLOBAL(movmem)) +#endif + +#ifdef L_movmem_i4 + .text + .global GLOBAL(movmem_i4_even) + .global GLOBAL(movmem_i4_odd) + .global GLOBAL(movmemSI12_i4) + + HIDDEN_FUNC(GLOBAL(movmem_i4_even)) + HIDDEN_FUNC(GLOBAL(movmem_i4_odd)) + HIDDEN_FUNC(GLOBAL(movmemSI12_i4)) + + HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even) + HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd) + HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4) + + .p2align 5 +L_movmem_2mod4_end: + mov.l r0,@(16,r4) + rts + mov.l r1,@(20,r4) + + .p2align 2 + +GLOBAL(movmem_i4_even): + mov.l @r5+,r0 + bra L_movmem_start_even + mov.l @r5+,r1 + +GLOBAL(movmem_i4_odd): + mov.l @r5+,r1 + add #-4,r4 + mov.l @r5+,r2 + mov.l @r5+,r3 + mov.l r1,@(4,r4) + mov.l r2,@(8,r4) + +L_movmem_loop: + mov.l r3,@(12,r4) + dt r6 + mov.l @r5+,r0 + bt/s L_movmem_2mod4_end + mov.l @r5+,r1 + add #16,r4 +L_movmem_start_even: + mov.l @r5+,r2 + mov.l @r5+,r3 + mov.l r0,@r4 + dt r6 + mov.l r1,@(4,r4) + bf/s L_movmem_loop + mov.l r2,@(8,r4) + rts + mov.l r3,@(12,r4) + + ENDFUNC(GLOBAL(movmem_i4_even)) + ENDFUNC(GLOBAL(movmem_i4_odd)) + + .p2align 4 +GLOBAL(movmemSI12_i4): + mov.l @r5,r0 + mov.l @(4,r5),r1 + mov.l @(8,r5),r2 + mov.l r0,@r4 + mov.l r1,@(4,r4) + rts + mov.l r2,@(8,r4) + + ENDFUNC(GLOBAL(movmemSI12_i4)) +#endif + +#ifdef L_mulsi3 + + + .global GLOBAL(mulsi3) + HIDDEN_FUNC(GLOBAL(mulsi3)) + +! r4 = aabb +! r5 = ccdd +! r0 = aabb*ccdd via partial products +! +! if aa == 0 and cc = 0 +! r0 = bb*dd +! +! else +! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536) +! + +GLOBAL(mulsi3): + mulu.w r4,r5 ! multiply the lsws macl=bb*dd + mov r5,r3 ! r3 = ccdd + swap.w r4,r2 ! r2 = bbaa + xtrct r2,r3 ! r3 = aacc + tst r3,r3 ! msws zero ? + bf hiset + rts ! yes - then we have the answer + sts macl,r0 + +hiset: sts macl,r0 ! r0 = bb*dd + mulu.w r2,r5 ! brewing macl = aa*dd + sts macl,r1 + mulu.w r3,r4 ! brewing macl = cc*bb + sts macl,r2 + add r1,r2 + shll16 r2 + rts + add r2,r0 + + ENDFUNC(GLOBAL(mulsi3)) +#endif +#endif /* ! __SH5__ */ +#ifdef L_sdivsi3_i4 + .title "SH DIVIDE" +!! 4 byte integer Divide code for the Renesas SH +#ifdef __SH4__ +!! args in r4 and r5, result in fpul, clobber dr0, dr2 + + .global GLOBAL(sdivsi3_i4) + HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) +GLOBAL(sdivsi3_i4): + lds r4,fpul + float fpul,dr0 + lds r5,fpul + float fpul,dr2 + fdiv dr2,dr0 + rts + ftrc dr0,fpul + + ENDFUNC(GLOBAL(sdivsi3_i4)) +#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__) +!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2 + +#if ! __SH5__ || __SH5__ == 32 +#if __SH5__ + .mode SHcompact +#endif + .global GLOBAL(sdivsi3_i4) + HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) +GLOBAL(sdivsi3_i4): + sts.l fpscr,@-r15 + mov #8,r2 + swap.w r2,r2 + lds r2,fpscr + lds r4,fpul + float fpul,dr0 + lds r5,fpul + float fpul,dr2 + fdiv dr2,dr0 + ftrc dr0,fpul + rts + lds.l @r15+,fpscr + + ENDFUNC(GLOBAL(sdivsi3_i4)) +#endif /* ! __SH5__ || __SH5__ == 32 */ +#endif /* ! __SH4__ */ +#endif + +#ifdef L_sdivsi3 +/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with + sh2e/sh3e code. */ +#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) +!! +!! Steve Chamberlain +!! sac@cygnus.com +!! +!! + +!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit + + .global GLOBAL(sdivsi3) +#if __SHMEDIA__ +#if __SH5__ == 32 + .section .text..SHmedia32,"ax" +#else + .text +#endif + .align 2 +#if 0 +/* The assembly code that follows is a hand-optimized version of the C + code that follows. Note that the registers that are modified are + exactly those listed as clobbered in the patterns divsi3_i1 and + divsi3_i1_media. + +int __sdivsi3 (i, j) + int i, j; +{ + register unsigned long long r18 asm ("r18"); + register unsigned long long r19 asm ("r19"); + register unsigned long long r0 asm ("r0") = 0; + register unsigned long long r1 asm ("r1") = 1; + register int r2 asm ("r2") = i >> 31; + register int r3 asm ("r3") = j >> 31; + + r2 = r2 ? r2 : r1; + r3 = r3 ? r3 : r1; + r18 = i * r2; + r19 = j * r3; + r2 *= r3; + + r19 <<= 31; + r1 <<= 31; + do + if (r18 >= r19) + r0 |= r1, r18 -= r19; + while (r19 >>= 1, r1 >>= 1); + + return r2 * (int)r0; +} +*/ +GLOBAL(sdivsi3): + pt/l LOCAL(sdivsi3_dontadd), tr2 + pt/l LOCAL(sdivsi3_loop), tr1 + ptabs/l r18, tr0 + movi 0, r0 + movi 1, r1 + shari.l r4, 31, r2 + shari.l r5, 31, r3 + cmveq r2, r1, r2 + cmveq r3, r1, r3 + muls.l r4, r2, r18 + muls.l r5, r3, r19 + muls.l r2, r3, r2 + shlli r19, 31, r19 + shlli r1, 31, r1 +LOCAL(sdivsi3_loop): + bgtu r19, r18, tr2 + or r0, r1, r0 + sub r18, r19, r18 +LOCAL(sdivsi3_dontadd): + shlri r1, 1, r1 + shlri r19, 1, r19 + bnei r1, 0, tr1 + muls.l r0, r2, r0 + add.l r0, r63, r0 + blink tr0, r63 +#elif 0 /* ! 0 */ + // inputs: r4,r5 + // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0 + // result in r0 +GLOBAL(sdivsi3): + // can create absolute value without extra latency, + // but dependent on proper sign extension of inputs: + // shari.l r5,31,r2 + // xor r5,r2,r20 + // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended. + shari.l r5,31,r2 + ori r2,1,r2 + muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended. + movi 0xffffffffffffbb0c,r19 // shift count eqiv 76 + shari.l r4,31,r3 + nsb r20,r0 + shlld r20,r0,r25 + shlri r25,48,r25 + sub r19,r25,r1 + mmulfx.w r1,r1,r2 + mshflo.w r1,r63,r1 + // If r4 was to be used in-place instead of r21, could use this sequence + // to compute absolute: + // sub r63,r4,r19 // compute absolute value of r4 + // shlri r4,32,r3 // into lower 32 bit of r4, keeping + // mcmv r19,r3,r4 // the sign in the upper 32 bits intact. + ori r3,1,r3 + mmulfx.w r25,r2,r2 + sub r19,r0,r0 + muls.l r4,r3,r21 + msub.w r1,r2,r2 + addi r2,-2,r1 + mulu.l r21,r1,r19 + mmulfx.w r2,r2,r2 + shlli r1,15,r1 + shlrd r19,r0,r19 + mulu.l r19,r20,r3 + mmacnfx.wl r25,r2,r1 + ptabs r18,tr0 + sub r21,r3,r25 + + mulu.l r25,r1,r2 + addi r0,14,r0 + xor r4,r5,r18 + shlrd r2,r0,r2 + mulu.l r2,r20,r3 + add r19,r2,r19 + shari.l r18,31,r18 + sub r25,r3,r25 + + mulu.l r25,r1,r2 + sub r25,r20,r25 + add r19,r18,r19 + shlrd r2,r0,r2 + mulu.l r2,r20,r3 + addi r25,1,r25 + add r19,r2,r19 + + cmpgt r25,r3,r25 + add.l r19,r25,r0 + xor r0,r18,r0 + blink tr0,r63 +#else /* ! 0 && ! 0 */ + + // inputs: r4,r5 + // clobbered: r1,r18,r19,r20,r21,r25,tr0 + // result in r0 + HIDDEN_FUNC(GLOBAL(sdivsi3_2)) +#ifndef __pic__ + FUNC(GLOBAL(sdivsi3)) +GLOBAL(sdivsi3): /* this is the shcompact entry point */ + // The special SHmedia entry point sdivsi3_1 prevents accidental linking + // with the SHcompact implementation, which clobbers tr1 / tr2. + .global GLOBAL(sdivsi3_1) +GLOBAL(sdivsi3_1): + .global GLOBAL(div_table_internal) + movi (GLOBAL(div_table_internal) >> 16) & 65535, r20 + shori GLOBAL(div_table_internal) & 65535, r20 +#endif + .global GLOBAL(sdivsi3_2) + // div_table in r20 + // clobbered: r1,r18,r19,r21,r25,tr0 +GLOBAL(sdivsi3_2): + nsb r5, r1 + shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 + shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) + ldx.ub r20, r21, r19 // u0.8 + shari r25, 32, r25 // normalize to s2.30 + shlli r21, 1, r21 + muls.l r25, r19, r19 // s2.38 + ldx.w r20, r21, r21 // s2.14 + ptabs r18, tr0 + shari r19, 24, r19 // truncate to s2.14 + sub r21, r19, r19 // some 11 bit inverse in s1.14 + muls.l r19, r19, r21 // u0.28 + sub r63, r1, r1 + addi r1, 92, r1 + muls.l r25, r21, r18 // s2.58 + shlli r19, 45, r19 // multiply by two and convert to s2.58 + /* bubble */ + sub r19, r18, r18 + shari r18, 28, r18 // some 22 bit inverse in s1.30 + muls.l r18, r25, r0 // s2.60 + muls.l r18, r4, r25 // s32.30 + /* bubble */ + shari r0, 16, r19 // s-16.44 + muls.l r19, r18, r19 // s-16.74 + shari r25, 63, r0 + shari r4, 14, r18 // s19.-14 + shari r19, 30, r19 // s-16.44 + muls.l r19, r18, r19 // s15.30 + xor r21, r0, r21 // You could also use the constant 1 << 27. + add r21, r25, r21 + sub r21, r19, r21 + shard r21, r1, r21 + sub r21, r0, r0 + blink tr0, r63 +#ifndef __pic__ + ENDFUNC(GLOBAL(sdivsi3)) +#endif + ENDFUNC(GLOBAL(sdivsi3_2)) +#endif +#elif defined __SHMEDIA__ +/* m5compact-nofpu */ + // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2 + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 + FUNC(GLOBAL(sdivsi3)) +GLOBAL(sdivsi3): + pt/l LOCAL(sdivsi3_dontsub), tr0 + pt/l LOCAL(sdivsi3_loop), tr1 + ptabs/l r18,tr2 + shari.l r4,31,r18 + shari.l r5,31,r19 + xor r4,r18,r20 + xor r5,r19,r21 + sub.l r20,r18,r20 + sub.l r21,r19,r21 + xor r18,r19,r19 + shlli r21,32,r25 + addi r25,-1,r21 + addz.l r20,r63,r20 +LOCAL(sdivsi3_loop): + shlli r20,1,r20 + bgeu/u r21,r20,tr0 + sub r20,r21,r20 +LOCAL(sdivsi3_dontsub): + addi.l r25,-1,r25 + bnei r25,-32,tr1 + xor r20,r19,r20 + sub.l r20,r19,r0 + blink tr2,r63 + ENDFUNC(GLOBAL(sdivsi3)) +#else /* ! __SHMEDIA__ */ + FUNC(GLOBAL(sdivsi3)) +GLOBAL(sdivsi3): + mov r4,r1 + mov r5,r0 + + tst r0,r0 + bt div0 + mov #0,r2 + div0s r2,r1 + subc r3,r3 + subc r2,r1 + div0s r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + addc r2,r1 + rts + mov r1,r0 + + +div0: rts + mov #0,r0 + + ENDFUNC(GLOBAL(sdivsi3)) +#endif /* ! __SHMEDIA__ */ +#endif /* ! __SH4__ */ +#endif +#ifdef L_udivsi3_i4 + + .title "SH DIVIDE" +!! 4 byte integer Divide code for the Renesas SH +#ifdef __SH4__ +!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4, +!! and t bit + + .global GLOBAL(udivsi3_i4) + HIDDEN_FUNC(GLOBAL(udivsi3_i4)) +GLOBAL(udivsi3_i4): + mov #1,r1 + cmp/hi r1,r5 + bf trivial + rotr r1 + xor r1,r4 + lds r4,fpul + mova L1,r0 +#ifdef FMOVD_WORKS + fmov.d @r0+,dr4 +#else + fmov.s @r0+,DR40 + fmov.s @r0,DR41 +#endif + float fpul,dr0 + xor r1,r5 + lds r5,fpul + float fpul,dr2 + fadd dr4,dr0 + fadd dr4,dr2 + fdiv dr2,dr0 + rts + ftrc dr0,fpul + +trivial: + rts + lds r4,fpul + + .align 2 +#ifdef FMOVD_WORKS + .align 3 ! make double below 8 byte aligned. +#endif +L1: + .double 2147483648 + + ENDFUNC(GLOBAL(udivsi3_i4)) +#elif defined (__SH5__) && ! defined (__SH4_NOFPU__) +#if ! __SH5__ || __SH5__ == 32 +!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33 + .mode SHmedia + .global GLOBAL(udivsi3_i4) + HIDDEN_FUNC(GLOBAL(udivsi3_i4)) +GLOBAL(udivsi3_i4): + addz.l r4,r63,r20 + addz.l r5,r63,r21 + fmov.qd r20,dr0 + fmov.qd r21,dr32 + ptabs r18,tr0 + float.qd dr0,dr0 + float.qd dr32,dr32 + fdiv.d dr0,dr32,dr0 + ftrc.dq dr0,dr32 + fmov.s fr33,fr32 + blink tr0,r63 + + ENDFUNC(GLOBAL(udivsi3_i4)) +#endif /* ! __SH5__ || __SH5__ == 32 */ +#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) +!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4 + + .global GLOBAL(udivsi3_i4) + HIDDEN_FUNC(GLOBAL(udivsi3_i4)) +GLOBAL(udivsi3_i4): + mov #1,r1 + cmp/hi r1,r5 + bf trivial + sts.l fpscr,@-r15 + mova L1,r0 + lds.l @r0+,fpscr + rotr r1 + xor r1,r4 + lds r4,fpul +#ifdef FMOVD_WORKS + fmov.d @r0+,dr4 +#else + fmov.s @r0+,DR40 + fmov.s @r0,DR41 +#endif + float fpul,dr0 + xor r1,r5 + lds r5,fpul + float fpul,dr2 + fadd dr4,dr0 + fadd dr4,dr2 + fdiv dr2,dr0 + ftrc dr0,fpul + rts + lds.l @r15+,fpscr + +#ifdef FMOVD_WORKS + .align 3 ! make double below 8 byte aligned. +#endif +trivial: + rts + lds r4,fpul + + .align 2 +L1: +#ifndef FMOVD_WORKS + .long 0x80000 +#else + .long 0x180000 +#endif + .double 2147483648 + + ENDFUNC(GLOBAL(udivsi3_i4)) +#endif /* ! __SH4__ */ +#endif + +#ifdef L_udivsi3 +/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with + sh2e/sh3e code. */ +#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) + +!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit + .global GLOBAL(udivsi3) + HIDDEN_FUNC(GLOBAL(udivsi3)) + +#if __SHMEDIA__ +#if __SH5__ == 32 + .section .text..SHmedia32,"ax" +#else + .text +#endif + .align 2 +#if 0 +/* The assembly code that follows is a hand-optimized version of the C + code that follows. Note that the registers that are modified are + exactly those listed as clobbered in the patterns udivsi3_i1 and + udivsi3_i1_media. + +unsigned +__udivsi3 (i, j) + unsigned i, j; +{ + register unsigned long long r0 asm ("r0") = 0; + register unsigned long long r18 asm ("r18") = 1; + register unsigned long long r4 asm ("r4") = i; + register unsigned long long r19 asm ("r19") = j; + + r19 <<= 31; + r18 <<= 31; + do + if (r4 >= r19) + r0 |= r18, r4 -= r19; + while (r19 >>= 1, r18 >>= 1); + + return r0; +} +*/ +GLOBAL(udivsi3): + pt/l LOCAL(udivsi3_dontadd), tr2 + pt/l LOCAL(udivsi3_loop), tr1 + ptabs/l r18, tr0 + movi 0, r0 + movi 1, r18 + addz.l r5, r63, r19 + addz.l r4, r63, r4 + shlli r19, 31, r19 + shlli r18, 31, r18 +LOCAL(udivsi3_loop): + bgtu r19, r4, tr2 + or r0, r18, r0 + sub r4, r19, r4 +LOCAL(udivsi3_dontadd): + shlri r18, 1, r18 + shlri r19, 1, r19 + bnei r18, 0, tr1 + blink tr0, r63 +#else +GLOBAL(udivsi3): + // inputs: r4,r5 + // clobbered: r18,r19,r20,r21,r22,r25,tr0 + // result in r0. + addz.l r5,r63,r22 + nsb r22,r0 + shlld r22,r0,r25 + shlri r25,48,r25 + movi 0xffffffffffffbb0c,r20 // shift count eqiv 76 + sub r20,r25,r21 + mmulfx.w r21,r21,r19 + mshflo.w r21,r63,r21 + ptabs r18,tr0 + mmulfx.w r25,r19,r19 + sub r20,r0,r0 + /* bubble */ + msub.w r21,r19,r19 + addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21 + before the msub.w, but we need a different value for + r19 to keep errors under control. */ + mulu.l r4,r21,r18 + mmulfx.w r19,r19,r19 + shlli r21,15,r21 + shlrd r18,r0,r18 + mulu.l r18,r22,r20 + mmacnfx.wl r25,r19,r21 + /* bubble */ + sub r4,r20,r25 + + mulu.l r25,r21,r19 + addi r0,14,r0 + /* bubble */ + shlrd r19,r0,r19 + mulu.l r19,r22,r20 + add r18,r19,r18 + /* bubble */ + sub.l r25,r20,r25 + + mulu.l r25,r21,r19 + addz.l r25,r63,r25 + sub r25,r22,r25 + shlrd r19,r0,r19 + mulu.l r19,r22,r20 + addi r25,1,r25 + add r18,r19,r18 + + cmpgt r25,r20,r25 + add.l r18,r25,r0 + blink tr0,r63 +#endif +#elif defined (__SHMEDIA__) +/* m5compact-nofpu - more emphasis on code size than on speed, but don't + ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4. + So use a short shmedia loop. */ + // clobbered: r20,r21,r25,tr0,tr1,tr2 + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 +GLOBAL(udivsi3): + pt/l LOCAL(udivsi3_dontsub), tr0 + pt/l LOCAL(udivsi3_loop), tr1 + ptabs/l r18,tr2 + shlli r5,32,r25 + addi r25,-1,r21 + addz.l r4,r63,r20 +LOCAL(udivsi3_loop): + shlli r20,1,r20 + bgeu/u r21,r20,tr0 + sub r20,r21,r20 +LOCAL(udivsi3_dontsub): + addi.l r25,-1,r25 + bnei r25,-32,tr1 + add.l r20,r63,r0 + blink tr2,r63 +#else /* ! defined (__SHMEDIA__) */ +LOCAL(div8): + div1 r5,r4 +LOCAL(div7): + div1 r5,r4; div1 r5,r4; div1 r5,r4 + div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 + +LOCAL(divx4): + div1 r5,r4; rotcl r0 + div1 r5,r4; rotcl r0 + div1 r5,r4; rotcl r0 + rts; div1 r5,r4 + +GLOBAL(udivsi3): + sts.l pr,@-r15 + extu.w r5,r0 + cmp/eq r5,r0 +#ifdef __sh1__ + bf LOCAL(large_divisor) +#else + bf/s LOCAL(large_divisor) +#endif + div0u + swap.w r4,r0 + shlr16 r4 + bsr LOCAL(div8) + shll16 r5 + bsr LOCAL(div7) + div1 r5,r4 + xtrct r4,r0 + xtrct r0,r4 + bsr LOCAL(div8) + swap.w r4,r4 + bsr LOCAL(div7) + div1 r5,r4 + lds.l @r15+,pr + xtrct r4,r0 + swap.w r0,r0 + rotcl r0 + rts + shlr16 r5 + +LOCAL(large_divisor): +#ifdef __sh1__ + div0u +#endif + mov #0,r0 + xtrct r4,r0 + xtrct r0,r4 + bsr LOCAL(divx4) + rotcl r0 + bsr LOCAL(divx4) + rotcl r0 + bsr LOCAL(divx4) + rotcl r0 + bsr LOCAL(divx4) + rotcl r0 + lds.l @r15+,pr + rts + rotcl r0 + + ENDFUNC(GLOBAL(udivsi3)) +#endif /* ! __SHMEDIA__ */ +#endif /* __SH4__ */ +#endif /* L_udivsi3 */ + +#ifdef L_udivdi3 +#ifdef __SHMEDIA__ + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 + .global GLOBAL(udivdi3) + FUNC(GLOBAL(udivdi3)) +GLOBAL(udivdi3): + HIDDEN_ALIAS(udivdi3_internal,udivdi3) + shlri r3,1,r4 + nsb r4,r22 + shlld r3,r22,r6 + shlri r6,49,r5 + movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ + sub r21,r5,r1 + mmulfx.w r1,r1,r4 + mshflo.w r1,r63,r1 + sub r63,r22,r20 // r63 == 64 % 64 + mmulfx.w r5,r4,r4 + pta LOCAL(large_divisor),tr0 + addi r20,32,r9 + msub.w r1,r4,r1 + madd.w r1,r1,r1 + mmulfx.w r1,r1,r4 + shlri r6,32,r7 + bgt/u r9,r63,tr0 // large_divisor + mmulfx.w r5,r4,r4 + shlri r2,32+14,r19 + addi r22,-31,r0 + msub.w r1,r4,r1 + + mulu.l r1,r7,r4 + addi r1,-3,r5 + mulu.l r5,r19,r5 + sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 + shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as + the case may be, %0000000000000000 000.11111111111, still */ + muls.l r1,r4,r4 /* leaving at least one sign bit. */ + mulu.l r5,r3,r8 + mshalds.l r1,r21,r1 + shari r4,26,r4 + shlld r8,r0,r8 + add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) + sub r2,r8,r2 + /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ + + shlri r2,22,r21 + mulu.l r21,r1,r21 + shlld r5,r0,r8 + addi r20,30-22,r0 + shlrd r21,r0,r21 + mulu.l r21,r3,r5 + add r8,r21,r8 + mcmpgt.l r21,r63,r21 // See Note 1 + addi r20,30,r0 + mshfhi.l r63,r21,r21 + sub r2,r5,r2 + andc r2,r21,r2 + + /* small divisor: need a third divide step */ + mulu.l r2,r1,r7 + ptabs r18,tr0 + addi r2,1,r2 + shlrd r7,r0,r7 + mulu.l r7,r3,r5 + add r8,r7,r8 + sub r2,r3,r2 + cmpgt r2,r5,r5 + add r8,r5,r2 + /* could test r3 here to check for divide by zero. */ + blink tr0,r63 + +LOCAL(large_divisor): + mmulfx.w r5,r4,r4 + shlrd r2,r9,r25 + shlri r25,32,r8 + msub.w r1,r4,r1 + + mulu.l r1,r7,r4 + addi r1,-3,r5 + mulu.l r5,r8,r5 + sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 + shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as + the case may be, %0000000000000000 000.11111111111, still */ + muls.l r1,r4,r4 /* leaving at least one sign bit. */ + shlri r5,14-1,r8 + mulu.l r8,r7,r5 + mshalds.l r1,r21,r1 + shari r4,26,r4 + add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) + sub r25,r5,r25 + /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ + + shlri r25,22,r21 + mulu.l r21,r1,r21 + pta LOCAL(no_lo_adj),tr0 + addi r22,32,r0 + shlri r21,40,r21 + mulu.l r21,r7,r5 + add r8,r21,r8 + shlld r2,r0,r2 + sub r25,r5,r25 + bgtu/u r7,r25,tr0 // no_lo_adj + addi r8,1,r8 + sub r25,r7,r25 +LOCAL(no_lo_adj): + mextr4 r2,r25,r2 + + /* large_divisor: only needs a few adjustments. */ + mulu.l r8,r6,r5 + ptabs r18,tr0 + /* bubble */ + cmpgtu r5,r2,r5 + sub r8,r5,r2 + blink tr0,r63 + ENDFUNC(GLOBAL(udivdi3)) +/* Note 1: To shift the result of the second divide stage so that the result + always fits into 32 bits, yet we still reduce the rest sufficiently + would require a lot of instructions to do the shifts just right. Using + the full 64 bit shift result to multiply with the divisor would require + four extra instructions for the upper 32 bits (shift / mulu / shift / sub). + Fortunately, if the upper 32 bits of the shift result are nonzero, we + know that the rest after taking this partial result into account will + fit into 32 bits. So we just clear the upper 32 bits of the rest if the + upper 32 bits of the partial result are nonzero. */ +#endif /* __SHMEDIA__ */ +#endif /* L_udivdi3 */ + +#ifdef L_divdi3 +#ifdef __SHMEDIA__ + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 + .global GLOBAL(divdi3) + FUNC(GLOBAL(divdi3)) +GLOBAL(divdi3): + pta GLOBAL(udivdi3_internal),tr0 + shari r2,63,r22 + shari r3,63,r23 + xor r2,r22,r2 + xor r3,r23,r3 + sub r2,r22,r2 + sub r3,r23,r3 + beq/u r22,r23,tr0 + ptabs r18,tr1 + blink tr0,r18 + sub r63,r2,r2 + blink tr1,r63 + ENDFUNC(GLOBAL(divdi3)) +#endif /* __SHMEDIA__ */ +#endif /* L_divdi3 */ + +#ifdef L_umoddi3 +#ifdef __SHMEDIA__ + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 + .global GLOBAL(umoddi3) + FUNC(GLOBAL(umoddi3)) +GLOBAL(umoddi3): + HIDDEN_ALIAS(umoddi3_internal,umoddi3) + shlri r3,1,r4 + nsb r4,r22 + shlld r3,r22,r6 + shlri r6,49,r5 + movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ + sub r21,r5,r1 + mmulfx.w r1,r1,r4 + mshflo.w r1,r63,r1 + sub r63,r22,r20 // r63 == 64 % 64 + mmulfx.w r5,r4,r4 + pta LOCAL(large_divisor),tr0 + addi r20,32,r9 + msub.w r1,r4,r1 + madd.w r1,r1,r1 + mmulfx.w r1,r1,r4 + shlri r6,32,r7 + bgt/u r9,r63,tr0 // large_divisor + mmulfx.w r5,r4,r4 + shlri r2,32+14,r19 + addi r22,-31,r0 + msub.w r1,r4,r1 + + mulu.l r1,r7,r4 + addi r1,-3,r5 + mulu.l r5,r19,r5 + sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 + shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as + the case may be, %0000000000000000 000.11111111111, still */ + muls.l r1,r4,r4 /* leaving at least one sign bit. */ + mulu.l r5,r3,r5 + mshalds.l r1,r21,r1 + shari r4,26,r4 + shlld r5,r0,r5 + add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) + sub r2,r5,r2 + /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ + + shlri r2,22,r21 + mulu.l r21,r1,r21 + addi r20,30-22,r0 + /* bubble */ /* could test r3 here to check for divide by zero. */ + shlrd r21,r0,r21 + mulu.l r21,r3,r5 + mcmpgt.l r21,r63,r21 // See Note 1 + addi r20,30,r0 + mshfhi.l r63,r21,r21 + sub r2,r5,r2 + andc r2,r21,r2 + + /* small divisor: need a third divide step */ + mulu.l r2,r1,r7 + ptabs r18,tr0 + sub r2,r3,r8 /* re-use r8 here for rest - r3 */ + shlrd r7,r0,r7 + mulu.l r7,r3,r5 + /* bubble */ + addi r8,1,r7 + cmpgt r7,r5,r7 + cmvne r7,r8,r2 + sub r2,r5,r2 + blink tr0,r63 + +LOCAL(large_divisor): + mmulfx.w r5,r4,r4 + shlrd r2,r9,r25 + shlri r25,32,r8 + msub.w r1,r4,r1 + + mulu.l r1,r7,r4 + addi r1,-3,r5 + mulu.l r5,r8,r5 + sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 + shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as + the case may be, %0000000000000000 000.11111111111, still */ + muls.l r1,r4,r4 /* leaving at least one sign bit. */ + shlri r5,14-1,r8 + mulu.l r8,r7,r5 + mshalds.l r1,r21,r1 + shari r4,26,r4 + add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) + sub r25,r5,r25 + /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ + + shlri r25,22,r21 + mulu.l r21,r1,r21 + pta LOCAL(no_lo_adj),tr0 + addi r22,32,r0 + shlri r21,40,r21 + mulu.l r21,r7,r5 + add r8,r21,r8 + shlld r2,r0,r2 + sub r25,r5,r25 + bgtu/u r7,r25,tr0 // no_lo_adj + addi r8,1,r8 + sub r25,r7,r25 +LOCAL(no_lo_adj): + mextr4 r2,r25,r2 + + /* large_divisor: only needs a few adjustments. */ + mulu.l r8,r6,r5 + ptabs r18,tr0 + add r2,r6,r7 + cmpgtu r5,r2,r8 + cmvne r8,r7,r2 + sub r2,r5,r2 + shlrd r2,r22,r2 + blink tr0,r63 + ENDFUNC(GLOBAL(umoddi3)) +/* Note 1: To shift the result of the second divide stage so that the result + always fits into 32 bits, yet we still reduce the rest sufficiently + would require a lot of instructions to do the shifts just right. Using + the full 64 bit shift result to multiply with the divisor would require + four extra instructions for the upper 32 bits (shift / mulu / shift / sub). + Fortunately, if the upper 32 bits of the shift result are nonzero, we + know that the rest after taking this partial result into account will + fit into 32 bits. So we just clear the upper 32 bits of the rest if the + upper 32 bits of the partial result are nonzero. */ +#endif /* __SHMEDIA__ */ +#endif /* L_umoddi3 */ + +#ifdef L_moddi3 +#ifdef __SHMEDIA__ + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 + .global GLOBAL(moddi3) + FUNC(GLOBAL(moddi3)) +GLOBAL(moddi3): + pta GLOBAL(umoddi3_internal),tr0 + shari r2,63,r22 + shari r3,63,r23 + xor r2,r22,r2 + xor r3,r23,r3 + sub r2,r22,r2 + sub r3,r23,r3 + beq/u r22,r63,tr0 + ptabs r18,tr1 + blink tr0,r18 + sub r63,r2,r2 + blink tr1,r63 + ENDFUNC(GLOBAL(moddi3)) +#endif /* __SHMEDIA__ */ +#endif /* L_moddi3 */ + +#ifdef L_set_fpscr +#if !defined (__SH2A_NOFPU__) +#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32 +#ifdef __SH5__ + .mode SHcompact +#endif + .global GLOBAL(set_fpscr) + HIDDEN_FUNC(GLOBAL(set_fpscr)) +GLOBAL(set_fpscr): + lds r4,fpscr +#ifdef __PIC__ + mov.l r12,@-r15 +#ifdef __vxworks + mov.l LOCAL(set_fpscr_L0_base),r12 + mov.l LOCAL(set_fpscr_L0_index),r0 + mov.l @r12,r12 + mov.l @(r0,r12),r12 +#else + mova LOCAL(set_fpscr_L0),r0 + mov.l LOCAL(set_fpscr_L0),r12 + add r0,r12 +#endif + mov.l LOCAL(set_fpscr_L1),r0 + mov.l @(r0,r12),r1 + mov.l @r15+,r12 +#else + mov.l LOCAL(set_fpscr_L1),r1 +#endif + swap.w r4,r0 + or #24,r0 +#ifndef FMOVD_WORKS + xor #16,r0 +#endif +#if defined(__SH4__) || defined (__SH2A_DOUBLE__) + swap.w r0,r3 + mov.l r3,@(4,r1) +#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ + swap.w r0,r2 + mov.l r2,@r1 +#endif +#ifndef FMOVD_WORKS + xor #8,r0 +#else + xor #24,r0 +#endif +#if defined(__SH4__) || defined (__SH2A_DOUBLE__) + swap.w r0,r2 + rts + mov.l r2,@r1 +#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ + swap.w r0,r3 + rts + mov.l r3,@(4,r1) +#endif + .align 2 +#ifdef __PIC__ +#ifdef __vxworks +LOCAL(set_fpscr_L0_base): + .long ___GOTT_BASE__ +LOCAL(set_fpscr_L0_index): + .long ___GOTT_INDEX__ +#else +LOCAL(set_fpscr_L0): + .long _GLOBAL_OFFSET_TABLE_ +#endif +LOCAL(set_fpscr_L1): + .long GLOBAL(fpscr_values@GOT) +#else +LOCAL(set_fpscr_L1): + .long GLOBAL(fpscr_values) +#endif + + ENDFUNC(GLOBAL(set_fpscr)) +#ifndef NO_FPSCR_VALUES +#ifdef __ELF__ + .comm GLOBAL(fpscr_values),8,4 +#else + .comm GLOBAL(fpscr_values),8 +#endif /* ELF */ +#endif /* NO_FPSCR_VALUES */ +#endif /* SH2E / SH3E / SH4 */ +#endif /* __SH2A_NOFPU__ */ +#endif /* L_set_fpscr */ +#ifdef L_ic_invalidate +#if __SH5__ == 32 + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 + .global GLOBAL(init_trampoline) + HIDDEN_FUNC(GLOBAL(init_trampoline)) +GLOBAL(init_trampoline): + st.l r0,8,r2 +#ifdef __LITTLE_ENDIAN__ + movi 9,r20 + shori 0x402b,r20 + shori 0xd101,r20 + shori 0xd002,r20 +#else + movi 0xffffffffffffd002,r20 + shori 0xd101,r20 + shori 0x402b,r20 + shori 9,r20 +#endif + st.q r0,0,r20 + st.l r0,12,r3 + ENDFUNC(GLOBAL(init_trampoline)) + .global GLOBAL(ic_invalidate) + HIDDEN_FUNC(GLOBAL(ic_invalidate)) +GLOBAL(ic_invalidate): + ocbwb r0,0 + synco + icbi r0, 0 + ptabs r18, tr0 + synci + blink tr0, r63 + ENDFUNC(GLOBAL(ic_invalidate)) +#elif defined(__SH4A__) + .global GLOBAL(ic_invalidate) + HIDDEN_FUNC(GLOBAL(ic_invalidate)) +GLOBAL(ic_invalidate): + ocbwb @r4 + synco + icbi @r4 + rts + nop + ENDFUNC(GLOBAL(ic_invalidate)) +#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) + /* For system code, we use ic_invalidate_line_i, but user code + needs a different mechanism. A kernel call is generally not + available, and it would also be slow. Different SH4 variants use + different sizes and associativities of the Icache. We use a small + bit of dispatch code that can be put hidden in every shared object, + which calls the actual processor-specific invalidation code in a + separate module. + Or if you have operating system support, the OS could mmap the + procesor-specific code from a single page, since it is highly + repetitive. */ + .global GLOBAL(ic_invalidate) + HIDDEN_FUNC(GLOBAL(ic_invalidate)) +GLOBAL(ic_invalidate): +#ifdef __pic__ +#ifdef __vxworks + mov.l 1f,r1 + mov.l 2f,r0 + mov.l @r1,r1 + mov.l 0f,r2 + mov.l @(r0,r1),r0 +#else + mov.l 1f,r1 + mova 1f,r0 + mov.l 0f,r2 + add r1,r0 +#endif + mov.l @(r0,r2),r1 +#else + mov.l 0f,r1 +#endif + ocbwb @r4 + mov.l @(8,r1),r0 + sub r1,r4 + and r4,r0 + add r1,r0 + jmp @r0 + mov.l @(4,r1),r0 + .align 2 +#ifndef __pic__ +0: .long GLOBAL(ic_invalidate_array) +#else /* __pic__ */ + .global GLOBAL(ic_invalidate_array) +0: .long GLOBAL(ic_invalidate_array)@GOT +#ifdef __vxworks +1: .long ___GOTT_BASE__ +2: .long ___GOTT_INDEX__ +#else +1: .long _GLOBAL_OFFSET_TABLE_ +#endif + ENDFUNC(GLOBAL(ic_invalidate)) +#endif /* __pic__ */ +#endif /* SH4 */ +#endif /* L_ic_invalidate */ + +#ifdef L_ic_invalidate_array +#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)))) + .global GLOBAL(ic_invalidate_array) + /* This is needed when an SH4 dso with trampolines is used on SH4A. */ + .global GLOBAL(ic_invalidate_array) + FUNC(GLOBAL(ic_invalidate_array)) +GLOBAL(ic_invalidate_array): + add r1,r4 + synco + icbi @r4 + rts + nop + .align 2 + .long 0 + ENDFUNC(GLOBAL(ic_invalidate_array)) +#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) + .global GLOBAL(ic_invalidate_array) + .p2align 5 + FUNC(GLOBAL(ic_invalidate_array)) +/* This must be aligned to the beginning of a cache line. */ +GLOBAL(ic_invalidate_array): +#ifndef WAYS +#define WAYS 4 +#define WAY_SIZE 0x4000 +#endif +#if WAYS == 1 + .rept WAY_SIZE * WAYS / 32 + rts + nop + .rept 7 + .long WAY_SIZE - 32 + .endr + .endr +#elif WAYS <= 6 + .rept WAY_SIZE * WAYS / 32 + braf r0 + add #-8,r0 + .long WAY_SIZE + 8 + .long WAY_SIZE - 32 + .rept WAYS-2 + braf r0 + nop + .endr + .rept 7 - WAYS + rts + nop + .endr + .endr +#else /* WAYS > 6 */ + /* This variant needs two different pages for mmap-ing. */ + .rept WAYS-1 + .rept WAY_SIZE / 32 + braf r0 + nop + .long WAY_SIZE + .rept 6 + .long WAY_SIZE - 32 + .endr + .endr + .endr + .rept WAY_SIZE / 32 + rts + .rept 15 + nop + .endr + .endr +#endif /* WAYS */ + ENDFUNC(GLOBAL(ic_invalidate_array)) +#endif /* SH4 */ +#endif /* L_ic_invalidate_array */ + +#if defined (__SH5__) && __SH5__ == 32 +#ifdef L_shcompact_call_trampoline + .section .rodata + .align 1 +LOCAL(ct_main_table): +.word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label) + .mode SHmedia + .section .text..SHmedia32, "ax" + .align 2 + + /* This function loads 64-bit general-purpose registers from the + stack, from a memory address contained in them or from an FP + register, according to a cookie passed in r1. Its execution + time is linear on the number of registers that actually have + to be copied. See sh.h for details on the actual bit pattern. + + The function to be called is passed in r0. If a 32-bit return + value is expected, the actual function will be tail-called, + otherwise the return address will be stored in r10 (that the + caller should expect to be clobbered) and the return value + will be expanded into r2/r3 upon return. */ + + .global GLOBAL(GCC_shcompact_call_trampoline) + FUNC(GLOBAL(GCC_shcompact_call_trampoline)) +GLOBAL(GCC_shcompact_call_trampoline): + ptabs/l r0, tr0 /* Prepare to call the actual function. */ + movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0 + pt/l LOCAL(ct_loop), tr1 + addz.l r1, r63, r1 + shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0 +LOCAL(ct_loop): + nsb r1, r28 + shlli r28, 1, r29 + ldx.w r0, r29, r30 +LOCAL(ct_main_label): + ptrel/l r30, tr2 + blink tr2, r63 +LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */ + /* It must be dr0, so just do it. */ + fmov.dq dr0, r2 + movi 7, r30 + shlli r30, 29, r31 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */ + /* It is either dr0 or dr2. */ + movi 7, r30 + shlri r1, 26, r32 + shlli r30, 26, r31 + andc r1, r31, r1 + fmov.dq dr0, r3 + beqi/l r32, 4, tr1 + fmov.dq dr2, r3 + blink tr1, r63 +LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */ + shlri r1, 23 - 3, r34 + andi r34, 3 << 3, r33 + addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32 +LOCAL(ct_r4_fp_base): + ptrel/l r32, tr2 + movi 7, r30 + shlli r30, 23, r31 + andc r1, r31, r1 + blink tr2, r63 +LOCAL(ct_r4_fp_copy): + fmov.dq dr0, r4 + blink tr1, r63 + fmov.dq dr2, r4 + blink tr1, r63 + fmov.dq dr4, r4 + blink tr1, r63 +LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */ + shlri r1, 20 - 3, r34 + andi r34, 3 << 3, r33 + addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32 +LOCAL(ct_r5_fp_base): + ptrel/l r32, tr2 + movi 7, r30 + shlli r30, 20, r31 + andc r1, r31, r1 + blink tr2, r63 +LOCAL(ct_r5_fp_copy): + fmov.dq dr0, r5 + blink tr1, r63 + fmov.dq dr2, r5 + blink tr1, r63 + fmov.dq dr4, r5 + blink tr1, r63 + fmov.dq dr6, r5 + blink tr1, r63 +LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */ + /* It must be dr8. */ + fmov.dq dr8, r6 + movi 15, r30 + shlli r30, 16, r31 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */ + shlri r1, 16 - 3, r34 + andi r34, 3 << 3, r33 + addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32 +LOCAL(ct_r6_fp_base): + ptrel/l r32, tr2 + movi 7, r30 + shlli r30, 16, r31 + andc r1, r31, r1 + blink tr2, r63 +LOCAL(ct_r6_fp_copy): + fmov.dq dr0, r6 + blink tr1, r63 + fmov.dq dr2, r6 + blink tr1, r63 + fmov.dq dr4, r6 + blink tr1, r63 + fmov.dq dr6, r6 + blink tr1, r63 +LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */ + /* It is either dr8 or dr10. */ + movi 15 << 12, r31 + shlri r1, 12, r32 + andc r1, r31, r1 + fmov.dq dr8, r7 + beqi/l r32, 8, tr1 + fmov.dq dr10, r7 + blink tr1, r63 +LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */ + shlri r1, 12 - 3, r34 + andi r34, 3 << 3, r33 + addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32 +LOCAL(ct_r7_fp_base): + ptrel/l r32, tr2 + movi 7 << 12, r31 + andc r1, r31, r1 + blink tr2, r63 +LOCAL(ct_r7_fp_copy): + fmov.dq dr0, r7 + blink tr1, r63 + fmov.dq dr2, r7 + blink tr1, r63 + fmov.dq dr4, r7 + blink tr1, r63 + fmov.dq dr6, r7 + blink tr1, r63 +LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */ + /* It is either dr8 or dr10. */ + movi 15 << 8, r31 + andi r1, 1 << 8, r32 + andc r1, r31, r1 + fmov.dq dr8, r8 + beq/l r32, r63, tr1 + fmov.dq dr10, r8 + blink tr1, r63 +LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */ + shlri r1, 8 - 3, r34 + andi r34, 3 << 3, r33 + addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32 +LOCAL(ct_r8_fp_base): + ptrel/l r32, tr2 + movi 7 << 8, r31 + andc r1, r31, r1 + blink tr2, r63 +LOCAL(ct_r8_fp_copy): + fmov.dq dr0, r8 + blink tr1, r63 + fmov.dq dr2, r8 + blink tr1, r63 + fmov.dq dr4, r8 + blink tr1, r63 + fmov.dq dr6, r8 + blink tr1, r63 +LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */ + /* It is either dr8 or dr10. */ + movi 15 << 4, r31 + andi r1, 1 << 4, r32 + andc r1, r31, r1 + fmov.dq dr8, r9 + beq/l r32, r63, tr1 + fmov.dq dr10, r9 + blink tr1, r63 +LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */ + shlri r1, 4 - 3, r34 + andi r34, 3 << 3, r33 + addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32 +LOCAL(ct_r9_fp_base): + ptrel/l r32, tr2 + movi 7 << 4, r31 + andc r1, r31, r1 + blink tr2, r63 +LOCAL(ct_r9_fp_copy): + fmov.dq dr0, r9 + blink tr1, r63 + fmov.dq dr2, r9 + blink tr1, r63 + fmov.dq dr4, r9 + blink tr1, r63 + fmov.dq dr6, r9 + blink tr1, r63 +LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */ + pt/l LOCAL(ct_r2_load), tr2 + movi 3, r30 + shlli r30, 29, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r2, 8, r3 + ldx.q r2, r63, r2 + /* Fall through. */ +LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */ + pt/l LOCAL(ct_r3_load), tr2 + movi 3, r30 + shlli r30, 26, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r3, 8, r4 + ldx.q r3, r63, r3 +LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */ + pt/l LOCAL(ct_r4_load), tr2 + movi 3, r30 + shlli r30, 23, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r4, 8, r5 + ldx.q r4, r63, r4 +LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */ + pt/l LOCAL(ct_r5_load), tr2 + movi 3, r30 + shlli r30, 20, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r5, 8, r6 + ldx.q r5, r63, r5 +LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */ + pt/l LOCAL(ct_r6_load), tr2 + movi 3 << 16, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r6, 8, r7 + ldx.q r6, r63, r6 +LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */ + pt/l LOCAL(ct_r7_load), tr2 + movi 3 << 12, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r7, 8, r8 + ldx.q r7, r63, r7 +LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */ + pt/l LOCAL(ct_r8_load), tr2 + movi 3 << 8, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r8, 8, r9 + ldx.q r8, r63, r8 +LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */ + pt/l LOCAL(ct_check_tramp), tr2 + ldx.q r9, r63, r9 + blink tr2, r63 +LOCAL(ct_r2_load): + ldx.q r2, r63, r2 + blink tr1, r63 +LOCAL(ct_r3_load): + ldx.q r3, r63, r3 + blink tr1, r63 +LOCAL(ct_r4_load): + ldx.q r4, r63, r4 + blink tr1, r63 +LOCAL(ct_r5_load): + ldx.q r5, r63, r5 + blink tr1, r63 +LOCAL(ct_r6_load): + ldx.q r6, r63, r6 + blink tr1, r63 +LOCAL(ct_r7_load): + ldx.q r7, r63, r7 + blink tr1, r63 +LOCAL(ct_r8_load): + ldx.q r8, r63, r8 + blink tr1, r63 +LOCAL(ct_r2_pop): /* Pop r2 from the stack. */ + movi 1, r30 + ldx.q r15, r63, r2 + shlli r30, 29, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r3_pop): /* Pop r3 from the stack. */ + movi 1, r30 + ldx.q r15, r63, r3 + shlli r30, 26, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r4_pop): /* Pop r4 from the stack. */ + movi 1, r30 + ldx.q r15, r63, r4 + shlli r30, 23, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r5_pop): /* Pop r5 from the stack. */ + movi 1, r30 + ldx.q r15, r63, r5 + shlli r30, 20, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r6_pop): /* Pop r6 from the stack. */ + movi 1, r30 + ldx.q r15, r63, r6 + shlli r30, 16, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r7_pop): /* Pop r7 from the stack. */ + ldx.q r15, r63, r7 + movi 1 << 12, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r8_pop): /* Pop r8 from the stack. */ + ldx.q r15, r63, r8 + movi 1 << 8, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */ + andi r1, 7 << 1, r30 + movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32 + shlli r30, 2, r31 + shori LOCAL(ct_end_of_pop_seq) & 65535, r32 + sub.l r32, r31, r33 + ptabs/l r33, tr2 + blink tr2, r63 +LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */ + ldx.q r15, r63, r3 + addi.l r15, 8, r15 + ldx.q r15, r63, r4 + addi.l r15, 8, r15 + ldx.q r15, r63, r5 + addi.l r15, 8, r15 + ldx.q r15, r63, r6 + addi.l r15, 8, r15 + ldx.q r15, r63, r7 + addi.l r15, 8, r15 + ldx.q r15, r63, r8 + addi.l r15, 8, r15 +LOCAL(ct_r9_pop): /* Pop r9 from the stack. */ + ldx.q r15, r63, r9 + addi.l r15, 8, r15 +LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */ +LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */ + pt/u LOCAL(ct_ret_wide), tr2 + andi r1, 1, r1 + bne/u r1, r63, tr2 +LOCAL(ct_call_func): /* Just branch to the function. */ + blink tr0, r63 +LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its + 64-bit return value. */ + add.l r18, r63, r10 + blink tr0, r18 + ptabs r10, tr0 +#if __LITTLE_ENDIAN__ + shari r2, 32, r3 + add.l r2, r63, r2 +#else + add.l r2, r63, r3 + shari r2, 32, r2 +#endif + blink tr0, r63 + + ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline)) +#endif /* L_shcompact_call_trampoline */ + +#ifdef L_shcompact_return_trampoline + /* This function does the converse of the code in `ret_wide' + above. It is tail-called by SHcompact functions returning + 64-bit non-floating-point values, to pack the 32-bit values in + r2 and r3 into r2. */ + + .mode SHmedia + .section .text..SHmedia32, "ax" + .align 2 + .global GLOBAL(GCC_shcompact_return_trampoline) + HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline)) +GLOBAL(GCC_shcompact_return_trampoline): + ptabs/l r18, tr0 +#if __LITTLE_ENDIAN__ + addz.l r2, r63, r2 + shlli r3, 32, r3 +#else + addz.l r3, r63, r3 + shlli r2, 32, r2 +#endif + or r3, r2, r2 + blink tr0, r63 + + ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline)) +#endif /* L_shcompact_return_trampoline */ + +#ifdef L_shcompact_incoming_args + .section .rodata + .align 1 +LOCAL(ia_main_table): +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) + .mode SHmedia + .section .text..SHmedia32, "ax" + .align 2 + + /* This function stores 64-bit general-purpose registers back in + the stack, and loads the address in which each register + was stored into itself. The lower 32 bits of r17 hold the address + to begin storing, and the upper 32 bits of r17 hold the cookie. + Its execution time is linear on the + number of registers that actually have to be copied, and it is + optimized for structures larger than 64 bits, as opposed to + individual `long long' arguments. See sh.h for details on the + actual bit pattern. */ + + .global GLOBAL(GCC_shcompact_incoming_args) + FUNC(GLOBAL(GCC_shcompact_incoming_args)) +GLOBAL(GCC_shcompact_incoming_args): + ptabs/l r18, tr0 /* Prepare to return. */ + shlri r17, 32, r0 /* Load the cookie. */ + movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43 + pt/l LOCAL(ia_loop), tr1 + add.l r17, r63, r17 + shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43 +LOCAL(ia_loop): + nsb r0, r36 + shlli r36, 1, r37 + ldx.w r43, r37, r38 +LOCAL(ia_main_label): + ptrel/l r38, tr2 + blink tr2, r63 +LOCAL(ia_r2_ld): /* Store r2 and load its address. */ + movi 3, r38 + shlli r38, 29, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r2 + add.l r17, r63, r2 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r3_ld): /* Store r3 and load its address. */ + movi 3, r38 + shlli r38, 26, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r3 + add.l r17, r63, r3 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r4_ld): /* Store r4 and load its address. */ + movi 3, r38 + shlli r38, 23, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r4 + add.l r17, r63, r4 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r5_ld): /* Store r5 and load its address. */ + movi 3, r38 + shlli r38, 20, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r5 + add.l r17, r63, r5 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r6_ld): /* Store r6 and load its address. */ + movi 3, r38 + shlli r38, 16, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r6 + add.l r17, r63, r6 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r7_ld): /* Store r7 and load its address. */ + movi 3 << 12, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r7 + add.l r17, r63, r7 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r8_ld): /* Store r8 and load its address. */ + movi 3 << 8, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r8 + add.l r17, r63, r8 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r9_ld): /* Store r9 and load its address. */ + stx.q r17, r63, r9 + add.l r17, r63, r9 + blink tr0, r63 +LOCAL(ia_r2_push): /* Push r2 onto the stack. */ + movi 1, r38 + shlli r38, 29, r39 + andc r0, r39, r0 + stx.q r17, r63, r2 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_r3_push): /* Push r3 onto the stack. */ + movi 1, r38 + shlli r38, 26, r39 + andc r0, r39, r0 + stx.q r17, r63, r3 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_r4_push): /* Push r4 onto the stack. */ + movi 1, r38 + shlli r38, 23, r39 + andc r0, r39, r0 + stx.q r17, r63, r4 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_r5_push): /* Push r5 onto the stack. */ + movi 1, r38 + shlli r38, 20, r39 + andc r0, r39, r0 + stx.q r17, r63, r5 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_r6_push): /* Push r6 onto the stack. */ + movi 1, r38 + shlli r38, 16, r39 + andc r0, r39, r0 + stx.q r17, r63, r6 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_r7_push): /* Push r7 onto the stack. */ + movi 1 << 12, r39 + andc r0, r39, r0 + stx.q r17, r63, r7 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_r8_push): /* Push r8 onto the stack. */ + movi 1 << 8, r39 + andc r0, r39, r0 + stx.q r17, r63, r8 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */ + andi r0, 7 << 1, r38 + movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40 + shlli r38, 2, r39 + shori LOCAL(ia_end_of_push_seq) & 65535, r40 + sub.l r40, r39, r41 + ptabs/l r41, tr2 + blink tr2, r63 +LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */ + stx.q r17, r63, r3 + addi.l r17, 8, r17 + stx.q r17, r63, r4 + addi.l r17, 8, r17 + stx.q r17, r63, r5 + addi.l r17, 8, r17 + stx.q r17, r63, r6 + addi.l r17, 8, r17 + stx.q r17, r63, r7 + addi.l r17, 8, r17 + stx.q r17, r63, r8 + addi.l r17, 8, r17 +LOCAL(ia_r9_push): /* Push r9 onto the stack. */ + stx.q r17, r63, r9 +LOCAL(ia_return): /* Return. */ + blink tr0, r63 +LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */ + ENDFUNC(GLOBAL(GCC_shcompact_incoming_args)) +#endif /* L_shcompact_incoming_args */ +#endif +#if __SH5__ +#ifdef L_nested_trampoline +#if __SH5__ == 32 + .section .text..SHmedia32,"ax" +#else + .text +#endif + .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */ + .global GLOBAL(GCC_nested_trampoline) + HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline)) +GLOBAL(GCC_nested_trampoline): + .mode SHmedia + ptrel/u r63, tr0 + gettr tr0, r0 +#if __SH5__ == 64 + ld.q r0, 24, r1 +#else + ld.l r0, 24, r1 +#endif + ptabs/l r1, tr1 +#if __SH5__ == 64 + ld.q r0, 32, r1 +#else + ld.l r0, 28, r1 +#endif + blink tr1, r63 + + ENDFUNC(GLOBAL(GCC_nested_trampoline)) +#endif /* L_nested_trampoline */ +#endif /* __SH5__ */ +#if __SH5__ == 32 +#ifdef L_push_pop_shmedia_regs + .section .text..SHmedia32,"ax" + .mode SHmedia + .align 2 +#ifndef __SH4_NOFPU__ + .global GLOBAL(GCC_push_shmedia_regs) + FUNC(GLOBAL(GCC_push_shmedia_regs)) +GLOBAL(GCC_push_shmedia_regs): + addi.l r15, -14*8, r15 + fst.d r15, 13*8, dr62 + fst.d r15, 12*8, dr60 + fst.d r15, 11*8, dr58 + fst.d r15, 10*8, dr56 + fst.d r15, 9*8, dr54 + fst.d r15, 8*8, dr52 + fst.d r15, 7*8, dr50 + fst.d r15, 6*8, dr48 + fst.d r15, 5*8, dr46 + fst.d r15, 4*8, dr44 + fst.d r15, 3*8, dr42 + fst.d r15, 2*8, dr40 + fst.d r15, 1*8, dr38 + fst.d r15, 0*8, dr36 +#else /* ! __SH4_NOFPU__ */ + .global GLOBAL(GCC_push_shmedia_regs_nofpu) + FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) +GLOBAL(GCC_push_shmedia_regs_nofpu): +#endif /* ! __SH4_NOFPU__ */ + ptabs/l r18, tr0 + addi.l r15, -27*8, r15 + gettr tr7, r62 + gettr tr6, r61 + gettr tr5, r60 + st.q r15, 26*8, r62 + st.q r15, 25*8, r61 + st.q r15, 24*8, r60 + st.q r15, 23*8, r59 + st.q r15, 22*8, r58 + st.q r15, 21*8, r57 + st.q r15, 20*8, r56 + st.q r15, 19*8, r55 + st.q r15, 18*8, r54 + st.q r15, 17*8, r53 + st.q r15, 16*8, r52 + st.q r15, 15*8, r51 + st.q r15, 14*8, r50 + st.q r15, 13*8, r49 + st.q r15, 12*8, r48 + st.q r15, 11*8, r47 + st.q r15, 10*8, r46 + st.q r15, 9*8, r45 + st.q r15, 8*8, r44 + st.q r15, 7*8, r35 + st.q r15, 6*8, r34 + st.q r15, 5*8, r33 + st.q r15, 4*8, r32 + st.q r15, 3*8, r31 + st.q r15, 2*8, r30 + st.q r15, 1*8, r29 + st.q r15, 0*8, r28 + blink tr0, r63 +#ifndef __SH4_NOFPU__ + ENDFUNC(GLOBAL(GCC_push_shmedia_regs)) +#else + ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) +#endif +#ifndef __SH4_NOFPU__ + .global GLOBAL(GCC_pop_shmedia_regs) + FUNC(GLOBAL(GCC_pop_shmedia_regs)) +GLOBAL(GCC_pop_shmedia_regs): + pt .L0, tr1 + movi 41*8, r0 + fld.d r15, 40*8, dr62 + fld.d r15, 39*8, dr60 + fld.d r15, 38*8, dr58 + fld.d r15, 37*8, dr56 + fld.d r15, 36*8, dr54 + fld.d r15, 35*8, dr52 + fld.d r15, 34*8, dr50 + fld.d r15, 33*8, dr48 + fld.d r15, 32*8, dr46 + fld.d r15, 31*8, dr44 + fld.d r15, 30*8, dr42 + fld.d r15, 29*8, dr40 + fld.d r15, 28*8, dr38 + fld.d r15, 27*8, dr36 + blink tr1, r63 +#else /* ! __SH4_NOFPU__ */ + .global GLOBAL(GCC_pop_shmedia_regs_nofpu) + FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) +GLOBAL(GCC_pop_shmedia_regs_nofpu): +#endif /* ! __SH4_NOFPU__ */ + movi 27*8, r0 +.L0: + ptabs r18, tr0 + ld.q r15, 26*8, r62 + ld.q r15, 25*8, r61 + ld.q r15, 24*8, r60 + ptabs r62, tr7 + ptabs r61, tr6 + ptabs r60, tr5 + ld.q r15, 23*8, r59 + ld.q r15, 22*8, r58 + ld.q r15, 21*8, r57 + ld.q r15, 20*8, r56 + ld.q r15, 19*8, r55 + ld.q r15, 18*8, r54 + ld.q r15, 17*8, r53 + ld.q r15, 16*8, r52 + ld.q r15, 15*8, r51 + ld.q r15, 14*8, r50 + ld.q r15, 13*8, r49 + ld.q r15, 12*8, r48 + ld.q r15, 11*8, r47 + ld.q r15, 10*8, r46 + ld.q r15, 9*8, r45 + ld.q r15, 8*8, r44 + ld.q r15, 7*8, r35 + ld.q r15, 6*8, r34 + ld.q r15, 5*8, r33 + ld.q r15, 4*8, r32 + ld.q r15, 3*8, r31 + ld.q r15, 2*8, r30 + ld.q r15, 1*8, r29 + ld.q r15, 0*8, r28 + add.l r15, r0, r15 + blink tr0, r63 + +#ifndef __SH4_NOFPU__ + ENDFUNC(GLOBAL(GCC_pop_shmedia_regs)) +#else + ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) +#endif +#endif /* __SH5__ == 32 */ +#endif /* L_push_pop_shmedia_regs */ + +#ifdef L_div_table +#if __SH5__ +#if defined(__pic__) && defined(__SHMEDIA__) + .global GLOBAL(sdivsi3) + FUNC(GLOBAL(sdivsi3)) +#if __SH5__ == 32 + .section .text..SHmedia32,"ax" +#else + .text +#endif +#if 0 +/* ??? FIXME: Presumably due to a linker bug, exporting data symbols + in a text section does not work (at least for shared libraries): + the linker sets the LSB of the address as if this was SHmedia code. */ +#define TEXT_DATA_BUG +#endif + .align 2 + // inputs: r4,r5 + // clobbered: r1,r18,r19,r20,r21,r25,tr0 + // result in r0 + .global GLOBAL(sdivsi3) +GLOBAL(sdivsi3): +#ifdef TEXT_DATA_BUG + ptb datalabel Local_div_table,tr0 +#else + ptb GLOBAL(div_table_internal),tr0 +#endif + nsb r5, r1 + shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 + shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) + /* bubble */ + gettr tr0,r20 + ldx.ub r20, r21, r19 // u0.8 + shari r25, 32, r25 // normalize to s2.30 + shlli r21, 1, r21 + muls.l r25, r19, r19 // s2.38 + ldx.w r20, r21, r21 // s2.14 + ptabs r18, tr0 + shari r19, 24, r19 // truncate to s2.14 + sub r21, r19, r19 // some 11 bit inverse in s1.14 + muls.l r19, r19, r21 // u0.28 + sub r63, r1, r1 + addi r1, 92, r1 + muls.l r25, r21, r18 // s2.58 + shlli r19, 45, r19 // multiply by two and convert to s2.58 + /* bubble */ + sub r19, r18, r18 + shari r18, 28, r18 // some 22 bit inverse in s1.30 + muls.l r18, r25, r0 // s2.60 + muls.l r18, r4, r25 // s32.30 + /* bubble */ + shari r0, 16, r19 // s-16.44 + muls.l r19, r18, r19 // s-16.74 + shari r25, 63, r0 + shari r4, 14, r18 // s19.-14 + shari r19, 30, r19 // s-16.44 + muls.l r19, r18, r19 // s15.30 + xor r21, r0, r21 // You could also use the constant 1 << 27. + add r21, r25, r21 + sub r21, r19, r21 + shard r21, r1, r21 + sub r21, r0, r0 + blink tr0, r63 + ENDFUNC(GLOBAL(sdivsi3)) +/* This table has been generated by divtab.c . +Defects for bias -330: + Max defect: 6.081536e-07 at -1.000000e+00 + Min defect: 2.849516e-08 at 1.030651e+00 + Max 2nd step defect: 9.606539e-12 at -1.000000e+00 + Min 2nd step defect: 0.000000e+00 at 0.000000e+00 + Defect at 1: 1.238659e-07 + Defect at -2: 1.061708e-07 */ +#else /* ! __pic__ || ! __SHMEDIA__ */ + .section .rodata +#endif /* __pic__ */ +#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__) + .balign 2 + .type Local_div_table,@object + .size Local_div_table,128 +/* negative division constants */ + .word -16638 + .word -17135 + .word -17737 + .word -18433 + .word -19103 + .word -19751 + .word -20583 + .word -21383 + .word -22343 + .word -23353 + .word -24407 + .word -25582 + .word -26863 + .word -28382 + .word -29965 + .word -31800 +/* negative division factors */ + .byte 66 + .byte 70 + .byte 75 + .byte 81 + .byte 87 + .byte 93 + .byte 101 + .byte 109 + .byte 119 + .byte 130 + .byte 142 + .byte 156 + .byte 172 + .byte 192 + .byte 214 + .byte 241 + .skip 16 +Local_div_table: + .skip 16 +/* positive division factors */ + .byte 241 + .byte 214 + .byte 192 + .byte 172 + .byte 156 + .byte 142 + .byte 130 + .byte 119 + .byte 109 + .byte 101 + .byte 93 + .byte 87 + .byte 81 + .byte 75 + .byte 70 + .byte 66 +/* positive division constants */ + .word 31801 + .word 29966 + .word 28383 + .word 26864 + .word 25583 + .word 24408 + .word 23354 + .word 22344 + .word 21384 + .word 20584 + .word 19752 + .word 19104 + .word 18434 + .word 17738 + .word 17136 + .word 16639 + .section .rodata +#endif /* TEXT_DATA_BUG */ + .balign 2 + .type GLOBAL(div_table),@object + .size GLOBAL(div_table),128 +/* negative division constants */ + .word -16638 + .word -17135 + .word -17737 + .word -18433 + .word -19103 + .word -19751 + .word -20583 + .word -21383 + .word -22343 + .word -23353 + .word -24407 + .word -25582 + .word -26863 + .word -28382 + .word -29965 + .word -31800 +/* negative division factors */ + .byte 66 + .byte 70 + .byte 75 + .byte 81 + .byte 87 + .byte 93 + .byte 101 + .byte 109 + .byte 119 + .byte 130 + .byte 142 + .byte 156 + .byte 172 + .byte 192 + .byte 214 + .byte 241 + .skip 16 + .global GLOBAL(div_table) +GLOBAL(div_table): + HIDDEN_ALIAS(div_table_internal,div_table) + .skip 16 +/* positive division factors */ + .byte 241 + .byte 214 + .byte 192 + .byte 172 + .byte 156 + .byte 142 + .byte 130 + .byte 119 + .byte 109 + .byte 101 + .byte 93 + .byte 87 + .byte 81 + .byte 75 + .byte 70 + .byte 66 +/* positive division constants */ + .word 31801 + .word 29966 + .word 28383 + .word 26864 + .word 25583 + .word 24408 + .word 23354 + .word 22344 + .word 21384 + .word 20584 + .word 19752 + .word 19104 + .word 18434 + .word 17738 + .word 17136 + .word 16639 + +#elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__) +/* This code used shld, thus is not suitable for SH1 / SH2. */ + +/* Signed / unsigned division without use of FPU, optimized for SH4. + Uses a lookup table for divisors in the range -128 .. +128, and + div1 with case distinction for larger divisors in three more ranges. + The code is lumped together with the table to allow the use of mova. */ +#ifdef __LITTLE_ENDIAN__ +#define L_LSB 0 +#define L_LSWMSB 1 +#define L_MSWLSB 2 +#else +#define L_LSB 3 +#define L_LSWMSB 2 +#define L_MSWLSB 1 +#endif + + .balign 4 + .global GLOBAL(udivsi3_i4i) + FUNC(GLOBAL(udivsi3_i4i)) +GLOBAL(udivsi3_i4i): + mov.w LOCAL(c128_w), r1 + div0u + mov r4,r0 + shlr8 r0 + cmp/hi r1,r5 + extu.w r5,r1 + bf LOCAL(udiv_le128) + cmp/eq r5,r1 + bf LOCAL(udiv_ge64k) + shlr r0 + mov r5,r1 + shll16 r5 + mov.l r4,@-r15 + div1 r5,r0 + mov.l r1,@-r15 + div1 r5,r0 + div1 r5,r0 + bra LOCAL(udiv_25) + div1 r5,r0 + +LOCAL(div_le128): + mova LOCAL(div_table_ix),r0 + bra LOCAL(div_le128_2) + mov.b @(r0,r5),r1 +LOCAL(udiv_le128): + mov.l r4,@-r15 + mova LOCAL(div_table_ix),r0 + mov.b @(r0,r5),r1 + mov.l r5,@-r15 +LOCAL(div_le128_2): + mova LOCAL(div_table_inv),r0 + mov.l @(r0,r1),r1 + mov r5,r0 + tst #0xfe,r0 + mova LOCAL(div_table_clz),r0 + dmulu.l r1,r4 + mov.b @(r0,r5),r1 + bt/s LOCAL(div_by_1) + mov r4,r0 + mov.l @r15+,r5 + sts mach,r0 + /* clrt */ + addc r4,r0 + mov.l @r15+,r4 + rotcr r0 + rts + shld r1,r0 + +LOCAL(div_by_1_neg): + neg r4,r0 +LOCAL(div_by_1): + mov.l @r15+,r5 + rts + mov.l @r15+,r4 + +LOCAL(div_ge64k): + bt/s LOCAL(div_r8) + div0u + shll8 r5 + bra LOCAL(div_ge64k_2) + div1 r5,r0 +LOCAL(udiv_ge64k): + cmp/hi r0,r5 + mov r5,r1 + bt LOCAL(udiv_r8) + shll8 r5 + mov.l r4,@-r15 + div1 r5,r0 + mov.l r1,@-r15 +LOCAL(div_ge64k_2): + div1 r5,r0 + mov.l LOCAL(zero_l),r1 + .rept 4 + div1 r5,r0 + .endr + mov.l r1,@-r15 + div1 r5,r0 + mov.w LOCAL(m256_w),r1 + div1 r5,r0 + mov.b r0,@(L_LSWMSB,r15) + xor r4,r0 + and r1,r0 + bra LOCAL(div_ge64k_end) + xor r4,r0 + +LOCAL(div_r8): + shll16 r4 + bra LOCAL(div_r8_2) + shll8 r4 +LOCAL(udiv_r8): + mov.l r4,@-r15 + shll16 r4 + clrt + shll8 r4 + mov.l r5,@-r15 +LOCAL(div_r8_2): + rotcl r4 + mov r0,r1 + div1 r5,r1 + mov r4,r0 + rotcl r0 + mov r5,r4 + div1 r5,r1 + .rept 5 + rotcl r0; div1 r5,r1 + .endr + rotcl r0 + mov.l @r15+,r5 + div1 r4,r1 + mov.l @r15+,r4 + rts + rotcl r0 + + ENDFUNC(GLOBAL(udivsi3_i4i)) + + .global GLOBAL(sdivsi3_i4i) + FUNC(GLOBAL(sdivsi3_i4i)) + /* This is link-compatible with a GLOBAL(sdivsi3) call, + but we effectively clobber only r1. */ +GLOBAL(sdivsi3_i4i): + mov.l r4,@-r15 + cmp/pz r5 + mov.w LOCAL(c128_w), r1 + bt/s LOCAL(pos_divisor) + cmp/pz r4 + mov.l r5,@-r15 + neg r5,r5 + bt/s LOCAL(neg_result) + cmp/hi r1,r5 + neg r4,r4 +LOCAL(pos_result): + extu.w r5,r0 + bf LOCAL(div_le128) + cmp/eq r5,r0 + mov r4,r0 + shlr8 r0 + bf/s LOCAL(div_ge64k) + cmp/hi r0,r5 + div0u + shll16 r5 + div1 r5,r0 + div1 r5,r0 + div1 r5,r0 +LOCAL(udiv_25): + mov.l LOCAL(zero_l),r1 + div1 r5,r0 + div1 r5,r0 + mov.l r1,@-r15 + .rept 3 + div1 r5,r0 + .endr + mov.b r0,@(L_MSWLSB,r15) + xtrct r4,r0 + swap.w r0,r0 + .rept 8 + div1 r5,r0 + .endr + mov.b r0,@(L_LSWMSB,r15) +LOCAL(div_ge64k_end): + .rept 8 + div1 r5,r0 + .endr + mov.l @r15+,r4 ! zero-extension and swap using LS unit. + extu.b r0,r0 + mov.l @r15+,r5 + or r4,r0 + mov.l @r15+,r4 + rts + rotcl r0 + +LOCAL(div_le128_neg): + tst #0xfe,r0 + mova LOCAL(div_table_ix),r0 + mov.b @(r0,r5),r1 + mova LOCAL(div_table_inv),r0 + bt/s LOCAL(div_by_1_neg) + mov.l @(r0,r1),r1 + mova LOCAL(div_table_clz),r0 + dmulu.l r1,r4 + mov.b @(r0,r5),r1 + mov.l @r15+,r5 + sts mach,r0 + /* clrt */ + addc r4,r0 + mov.l @r15+,r4 + rotcr r0 + shld r1,r0 + rts + neg r0,r0 + +LOCAL(pos_divisor): + mov.l r5,@-r15 + bt/s LOCAL(pos_result) + cmp/hi r1,r5 + neg r4,r4 +LOCAL(neg_result): + extu.w r5,r0 + bf LOCAL(div_le128_neg) + cmp/eq r5,r0 + mov r4,r0 + shlr8 r0 + bf/s LOCAL(div_ge64k_neg) + cmp/hi r0,r5 + div0u + mov.l LOCAL(zero_l),r1 + shll16 r5 + div1 r5,r0 + mov.l r1,@-r15 + .rept 7 + div1 r5,r0 + .endr + mov.b r0,@(L_MSWLSB,r15) + xtrct r4,r0 + swap.w r0,r0 + .rept 8 + div1 r5,r0 + .endr + mov.b r0,@(L_LSWMSB,r15) +LOCAL(div_ge64k_neg_end): + .rept 8 + div1 r5,r0 + .endr + mov.l @r15+,r4 ! zero-extension and swap using LS unit. + extu.b r0,r1 + mov.l @r15+,r5 + or r4,r1 +LOCAL(div_r8_neg_end): + mov.l @r15+,r4 + rotcl r1 + rts + neg r1,r0 + +LOCAL(div_ge64k_neg): + bt/s LOCAL(div_r8_neg) + div0u + shll8 r5 + mov.l LOCAL(zero_l),r1 + .rept 6 + div1 r5,r0 + .endr + mov.l r1,@-r15 + div1 r5,r0 + mov.w LOCAL(m256_w),r1 + div1 r5,r0 + mov.b r0,@(L_LSWMSB,r15) + xor r4,r0 + and r1,r0 + bra LOCAL(div_ge64k_neg_end) + xor r4,r0 + +LOCAL(c128_w): + .word 128 + +LOCAL(div_r8_neg): + clrt + shll16 r4 + mov r4,r1 + shll8 r1 + mov r5,r4 + .rept 7 + rotcl r1; div1 r5,r0 + .endr + mov.l @r15+,r5 + rotcl r1 + bra LOCAL(div_r8_neg_end) + div1 r4,r0 + +LOCAL(m256_w): + .word 0xff00 +/* This table has been generated by divtab-sh4.c. */ + .balign 4 +LOCAL(div_table_clz): + .byte 0 + .byte 1 + .byte 0 + .byte -1 + .byte -1 + .byte -2 + .byte -2 + .byte -2 + .byte -2 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 +/* Lookup table translating positive divisor to index into table of + normalized inverse. N.B. the '0' entry is also the last entry of the + previous table, and causes an unaligned access for division by zero. */ +LOCAL(div_table_ix): + .byte -6 + .byte -128 + .byte -128 + .byte 0 + .byte -128 + .byte -64 + .byte 0 + .byte 64 + .byte -128 + .byte -96 + .byte -64 + .byte -32 + .byte 0 + .byte 32 + .byte 64 + .byte 96 + .byte -128 + .byte -112 + .byte -96 + .byte -80 + .byte -64 + .byte -48 + .byte -32 + .byte -16 + .byte 0 + .byte 16 + .byte 32 + .byte 48 + .byte 64 + .byte 80 + .byte 96 + .byte 112 + .byte -128 + .byte -120 + .byte -112 + .byte -104 + .byte -96 + .byte -88 + .byte -80 + .byte -72 + .byte -64 + .byte -56 + .byte -48 + .byte -40 + .byte -32 + .byte -24 + .byte -16 + .byte -8 + .byte 0 + .byte 8 + .byte 16 + .byte 24 + .byte 32 + .byte 40 + .byte 48 + .byte 56 + .byte 64 + .byte 72 + .byte 80 + .byte 88 + .byte 96 + .byte 104 + .byte 112 + .byte 120 + .byte -128 + .byte -124 + .byte -120 + .byte -116 + .byte -112 + .byte -108 + .byte -104 + .byte -100 + .byte -96 + .byte -92 + .byte -88 + .byte -84 + .byte -80 + .byte -76 + .byte -72 + .byte -68 + .byte -64 + .byte -60 + .byte -56 + .byte -52 + .byte -48 + .byte -44 + .byte -40 + .byte -36 + .byte -32 + .byte -28 + .byte -24 + .byte -20 + .byte -16 + .byte -12 + .byte -8 + .byte -4 + .byte 0 + .byte 4 + .byte 8 + .byte 12 + .byte 16 + .byte 20 + .byte 24 + .byte 28 + .byte 32 + .byte 36 + .byte 40 + .byte 44 + .byte 48 + .byte 52 + .byte 56 + .byte 60 + .byte 64 + .byte 68 + .byte 72 + .byte 76 + .byte 80 + .byte 84 + .byte 88 + .byte 92 + .byte 96 + .byte 100 + .byte 104 + .byte 108 + .byte 112 + .byte 116 + .byte 120 + .byte 124 + .byte -128 +/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */ + .balign 4 +LOCAL(zero_l): + .long 0x0 + .long 0xF81F81F9 + .long 0xF07C1F08 + .long 0xE9131AC0 + .long 0xE1E1E1E2 + .long 0xDAE6076C + .long 0xD41D41D5 + .long 0xCD856891 + .long 0xC71C71C8 + .long 0xC0E07039 + .long 0xBACF914D + .long 0xB4E81B4F + .long 0xAF286BCB + .long 0xA98EF607 + .long 0xA41A41A5 + .long 0x9EC8E952 + .long 0x9999999A + .long 0x948B0FCE + .long 0x8F9C18FA + .long 0x8ACB90F7 + .long 0x86186187 + .long 0x81818182 + .long 0x7D05F418 + .long 0x78A4C818 + .long 0x745D1746 + .long 0x702E05C1 + .long 0x6C16C16D + .long 0x68168169 + .long 0x642C8591 + .long 0x60581606 + .long 0x5C9882BA + .long 0x58ED2309 +LOCAL(div_table_inv): + .long 0x55555556 + .long 0x51D07EAF + .long 0x4E5E0A73 + .long 0x4AFD6A06 + .long 0x47AE147B + .long 0x446F8657 + .long 0x41414142 + .long 0x3E22CBCF + .long 0x3B13B13C + .long 0x38138139 + .long 0x3521CFB3 + .long 0x323E34A3 + .long 0x2F684BDB + .long 0x2C9FB4D9 + .long 0x29E4129F + .long 0x27350B89 + .long 0x24924925 + .long 0x21FB7813 + .long 0x1F7047DD + .long 0x1CF06ADB + .long 0x1A7B9612 + .long 0x18118119 + .long 0x15B1E5F8 + .long 0x135C8114 + .long 0x11111112 + .long 0xECF56BF + .long 0xC9714FC + .long 0xA6810A7 + .long 0x8421085 + .long 0x624DD30 + .long 0x4104105 + .long 0x2040811 + /* maximum error: 0.987342 scaled: 0.921875*/ + + ENDFUNC(GLOBAL(sdivsi3_i4i)) +#endif /* SH3 / SH4 */ + +#endif /* L_div_table */ + +#ifdef L_udiv_qrnnd_16 +#if !__SHMEDIA__ + HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16)) + /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ + /* n1 < d, but n1 might be larger than d1. */ + .global GLOBAL(udiv_qrnnd_16) + .balign 8 +GLOBAL(udiv_qrnnd_16): + div0u + cmp/hi r6,r0 + bt .Lots + .rept 16 + div1 r6,r0 + .endr + extu.w r0,r1 + bt 0f + add r6,r0 +0: rotcl r1 + mulu.w r1,r5 + xtrct r4,r0 + swap.w r0,r0 + sts macl,r2 + cmp/hs r2,r0 + sub r2,r0 + bt 0f + addc r5,r0 + add #-1,r1 + bt 0f +1: add #-1,r1 + rts + add r5,r0 + .balign 8 +.Lots: + sub r5,r0 + swap.w r4,r1 + xtrct r0,r1 + clrt + mov r1,r0 + addc r5,r0 + mov #-1,r1 + SL1(bf, 1b, + shlr16 r1) +0: rts + nop + ENDFUNC(GLOBAL(udiv_qrnnd_16)) +#endif /* !__SHMEDIA__ */ +#endif /* L_udiv_qrnnd_16 */ diff --git a/libgcc/config/sh/lib1funcs.h b/libgcc/config/sh/lib1funcs.h new file mode 100644 index 00000000000..af4b41cc314 --- /dev/null +++ b/libgcc/config/sh/lib1funcs.h @@ -0,0 +1,76 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006, 2009 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#ifdef __ELF__ +#define LOCAL(X) .L_##X +#define FUNC(X) .type X,@function +#define HIDDEN_FUNC(X) FUNC(X); .hidden X +#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X) +#define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X +#define ENDFUNC(X) ENDFUNC0(X) +#else +#define LOCAL(X) L_##X +#define FUNC(X) +#define HIDDEN_FUNC(X) +#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y) +#define ENDFUNC(X) +#endif + +#define CONCAT(A,B) A##B +#define GLOBAL0(U,X) CONCAT(U,__##X) +#define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X) + +#define ALIAS(X,Y) .global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y) + +#if defined __SH2A__ && defined __FMOVD_ENABLED__ +#undef FMOVD_WORKS +#define FMOVD_WORKS +#endif + +#ifdef __LITTLE_ENDIAN__ +#define DR00 fr1 +#define DR01 fr0 +#define DR20 fr3 +#define DR21 fr2 +#define DR40 fr5 +#define DR41 fr4 +#else /* !__LITTLE_ENDIAN__ */ +#define DR00 fr0 +#define DR01 fr1 +#define DR20 fr2 +#define DR21 fr3 +#define DR40 fr4 +#define DR41 fr5 +#endif /* !__LITTLE_ENDIAN__ */ + +#ifdef __sh1__ +#define SL(branch, dest, in_slot, in_slot_arg2) \ + in_slot, in_slot_arg2; branch dest +#define SL1(branch, dest, in_slot) \ + in_slot; branch dest +#else /* ! __sh1__ */ +#define SL(branch, dest, in_slot, in_slot_arg2) \ + branch##.s dest; in_slot, in_slot_arg2 +#define SL1(branch, dest, in_slot) \ + branch##/s dest; in_slot +#endif /* !__sh1__ */ diff --git a/libgcc/config/sh/t-linux b/libgcc/config/sh/t-linux index af618e260c6..9b1feacd1f3 100644 --- a/libgcc/config/sh/t-linux +++ b/libgcc/config/sh/t-linux @@ -1,3 +1,5 @@ +LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array + HOST_LIBGCC2_CFLAGS = -fpic -mieee -DNO_FPSCR_VALUES # Override t-slibgcc-elf-ver to export some libgcc symbols with diff --git a/libgcc/config/sh/t-netbsd b/libgcc/config/sh/t-netbsd new file mode 100644 index 00000000000..663edbf4187 --- /dev/null +++ b/libgcc/config/sh/t-netbsd @@ -0,0 +1 @@ +LIB1ASMFUNCS_CACHE = _ic_invalidate diff --git a/libgcc/config/sh/t-sh b/libgcc/config/sh/t-sh index ab4d98089b1..2319adbef1d 100644 --- a/libgcc/config/sh/t-sh +++ b/libgcc/config/sh/t-sh @@ -17,26 +17,33 @@ # along with GCC; see the file COPYING3. If not see # . +LIB1ASMSRC = sh/lib1funcs.S +LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movmem \ + _movmem_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \ + _div_table _udiv_qrnnd_16 \ + $(LIB1ASMFUNCS_CACHE) +LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array + crt1.o: $(srcdir)/config/sh/crt1.S $(gcc_compile) -c $< -ic_invalidate_array_4-100.o: $(gcc_srcdir)/config/sh/lib1funcs.asm +ic_invalidate_array_4-100.o: $(srcdir)/config/sh/lib1funcs.S $(gcc_compile) -c -DL_ic_invalidate_array -DWAYS=1 -DWAY_SIZE=0x2000 $< libic_invalidate_array_4-100.a: ic_invalidate_array_4-100.o $(AR_CREATE_FOR_TARGET) $@ $< -ic_invalidate_array_4-200.o: $(gcc_srcdir)/config/sh/lib1funcs.asm +ic_invalidate_array_4-200.o: $(srcdir)/config/sh/lib1funcs.S $(gcc_compile) -c -DL_ic_invalidate_array -DWAYS=2 -DWAY_SIZE=0x2000 $< libic_invalidate_array_4-200.a: ic_invalidate_array_4-200.o $(AR_CREATE_FOR_TARGET) $@ $< -ic_invalidate_array_4a.o: $(gcc_srcdir)/config/sh/lib1funcs.asm +ic_invalidate_array_4a.o: $(srcdir)/config/sh/lib1funcs.S $(gcc_compile) -c -DL_ic_invalidate_array -D__FORCE_SH4A__ $< libic_invalidate_array_4a.a: ic_invalidate_array_4a.o $(AR_CREATE_FOR_TARGET) $@ $< sdivsi3_i4i-Os-4-200.o: $(srcdir)/config/sh/lib1funcs-Os-4-200.S - $(gcc_compile) -c -DL_sdivsi3_i4i $< + $(compile) -c -DL_sdivsi3_i4i $< udivsi3_i4i-Os-4-200.o: $(srcdir)/config/sh/lib1funcs-Os-4-200.S $(gcc_compile) -c -DL_udivsi3_i4i $< unwind-dw2-Os-4-200.o: $(gcc_srcdir)/unwind-dw2.c diff --git a/libgcc/config/sh/t-sh64 b/libgcc/config/sh/t-sh64 new file mode 100644 index 00000000000..fa9950e03b2 --- /dev/null +++ b/libgcc/config/sh/t-sh64 @@ -0,0 +1,6 @@ +LIB1ASMFUNCS = \ + _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \ + _shcompact_call_trampoline _shcompact_return_trampoline \ + _shcompact_incoming_args _ic_invalidate _nested_trampoline \ + _push_pop_shmedia_regs \ + _udivdi3 _divdi3 _umoddi3 _moddi3 _div_table diff --git a/libgcc/config/sparc/lb1spc.S b/libgcc/config/sparc/lb1spc.S new file mode 100644 index 00000000000..b60bd5740e7 --- /dev/null +++ b/libgcc/config/sparc/lb1spc.S @@ -0,0 +1,784 @@ +/* This is an assembly language implementation of mulsi3, divsi3, and modsi3 + for the sparc processor. + + These routines are derived from the SPARC Architecture Manual, version 8, + slightly edited to match the desired calling convention, and also to + optimize them for our purposes. */ + +#ifdef L_mulsi3 +.text + .align 4 + .global .umul + .proc 4 +.umul: + or %o0, %o1, %o4 ! logical or of multiplier and multiplicand + mov %o0, %y ! multiplier to Y register + andncc %o4, 0xfff, %o5 ! mask out lower 12 bits + be mul_shortway ! can do it the short way + andcc %g0, %g0, %o4 ! zero the partial product and clear NV cc + ! + ! long multiply + ! + mulscc %o4, %o1, %o4 ! first iteration of 33 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 ! 32nd iteration + mulscc %o4, %g0, %o4 ! last iteration only shifts + ! the upper 32 bits of product are wrong, but we do not care + retl + rd %y, %o0 + ! + ! short multiply + ! +mul_shortway: + mulscc %o4, %o1, %o4 ! first iteration of 13 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 ! 12th iteration + mulscc %o4, %g0, %o4 ! last iteration only shifts + rd %y, %o5 + sll %o4, 12, %o4 ! left shift partial product by 12 bits + srl %o5, 20, %o5 ! right shift partial product by 20 bits + retl + or %o5, %o4, %o0 ! merge for true product +#endif + +#ifdef L_divsi3 +/* + * Division and remainder, from Appendix E of the SPARC Version 8 + * Architecture Manual, with fixes from Gordon Irlam. + */ + +/* + * Input: dividend and divisor in %o0 and %o1 respectively. + * + * m4 parameters: + * .div name of function to generate + * div div=div => %o0 / %o1; div=rem => %o0 % %o1 + * true true=true => signed; true=false => unsigned + * + * Algorithm parameters: + * N how many bits per iteration we try to get (4) + * WORDSIZE total number of bits (32) + * + * Derived constants: + * TOPBITS number of bits in the top decade of a number + * + * Important variables: + * Q the partial quotient under development (initially 0) + * R the remainder so far, initially the dividend + * ITER number of main division loop iterations required; + * equal to ceil(log2(quotient) / N). Note that this + * is the log base (2^N) of the quotient. + * V the current comparand, initially divisor*2^(ITER*N-1) + * + * Cost: + * Current estimate for non-large dividend is + * ceil(log2(quotient) / N) * (10 + 7N/2) + C + * A large dividend is one greater than 2^(31-TOPBITS) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + */ + .global .udiv + .align 4 + .proc 4 + .text +.udiv: + b ready_to_divide + mov 0, %g3 ! result is always positive + + .global .div + .align 4 + .proc 4 + .text +.div: + ! compute sign of result; if neither is negative, no problem + orcc %o1, %o0, %g0 ! either negative? + bge ready_to_divide ! no, go do the divide + xor %o1, %o0, %g3 ! compute sign in any case + tst %o1 + bge 1f + tst %o0 + ! %o1 is definitely negative; %o0 might also be negative + bge ready_to_divide ! if %o0 not negative... + sub %g0, %o1, %o1 ! in any case, make %o1 nonneg +1: ! %o0 is negative, %o1 is nonnegative + sub %g0, %o0, %o0 ! make %o0 nonnegative + + +ready_to_divide: + + ! Ready to divide. Compute size of quotient; scale comparand. + orcc %o1, %g0, %o5 + bne 1f + mov %o0, %o3 + + ! Divide by zero trap. If it returns, return 0 (about as + ! wrong as possible, but that is what SunOS does...). + ta 0x2 ! ST_DIV0 + retl + clr %o0 + +1: + cmp %o3, %o5 ! if %o1 exceeds %o0, done + blu got_result ! (and algorithm fails otherwise) + clr %o2 + sethi %hi(1 << (32 - 4 - 1)), %g1 + cmp %o3, %g1 + blu not_really_big + clr %o4 + + ! Here the dividend is >= 2**(31-N) or so. We must be careful here, + ! as our usual N-at-a-shot divide step will cause overflow and havoc. + ! The number of bits in the result here is N*ITER+SC, where SC <= N. + ! Compute ITER in an unorthodox manner: know we need to shift V into + ! the top decade: so do not even bother to compare to R. + 1: + cmp %o5, %g1 + bgeu 3f + mov 1, %g2 + sll %o5, 4, %o5 + b 1b + add %o4, 1, %o4 + + ! Now compute %g2. + 2: addcc %o5, %o5, %o5 + bcc not_too_big + add %g2, 1, %g2 + + ! We get here if the %o1 overflowed while shifting. + ! This means that %o3 has the high-order bit set. + ! Restore %o5 and subtract from %o3. + sll %g1, 4, %g1 ! high order bit + srl %o5, 1, %o5 ! rest of %o5 + add %o5, %g1, %o5 + b do_single_div + sub %g2, 1, %g2 + + not_too_big: + 3: cmp %o5, %o3 + blu 2b + nop + be do_single_div + nop + /* NB: these are commented out in the V8-SPARC manual as well */ + /* (I do not understand this) */ + ! %o5 > %o3: went too far: back up 1 step + ! srl %o5, 1, %o5 + ! dec %g2 + ! do single-bit divide steps + ! + ! We have to be careful here. We know that %o3 >= %o5, so we can do the + ! first divide step without thinking. BUT, the others are conditional, + ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- + ! order bit set in the first step, just falling into the regular + ! division loop will mess up the first time around. + ! So we unroll slightly... + do_single_div: + subcc %g2, 1, %g2 + bl end_regular_divide + nop + sub %o3, %o5, %o3 + mov 1, %o2 + b end_single_divloop + nop + single_divloop: + sll %o2, 1, %o2 + bl 1f + srl %o5, 1, %o5 + ! %o3 >= 0 + sub %o3, %o5, %o3 + b 2f + add %o2, 1, %o2 + 1: ! %o3 < 0 + add %o3, %o5, %o3 + sub %o2, 1, %o2 + 2: + end_single_divloop: + subcc %g2, 1, %g2 + bge single_divloop + tst %o3 + b,a end_regular_divide + +not_really_big: +1: + sll %o5, 4, %o5 + cmp %o5, %o3 + bleu 1b + addcc %o4, 1, %o4 + be got_result + sub %o4, 1, %o4 + + tst %o3 ! set up for initial iteration +divloop: + sll %o2, 4, %o2 + ! depth 1, accumulated bits 0 + bl L1.16 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 2, accumulated bits 1 + bl L2.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits 3 + bl L3.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 7 + bl L4.23 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (7*2+1), %o2 + +L4.23: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (7*2-1), %o2 + + +L3.19: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 5 + bl L4.21 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (5*2+1), %o2 + +L4.21: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (5*2-1), %o2 + +L2.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits 1 + bl L3.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 3 + bl L4.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (3*2+1), %o2 + +L4.19: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (3*2-1), %o2 + +L3.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 1 + bl L4.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (1*2+1), %o2 + +L4.17: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (1*2-1), %o2 + +L1.16: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 2, accumulated bits -1 + bl L2.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits -1 + bl L3.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -1 + bl L4.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2+1), %o2 + +L4.15: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2-1), %o2 + +L3.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -3 + bl L4.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2+1), %o2 + +L4.13: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2-1), %o2 + +L2.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits -3 + bl L3.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -5 + bl L4.11 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2+1), %o2 + +L4.11: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2-1), %o2 + +L3.13: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -7 + bl L4.9 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2+1), %o2 + +L4.9: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2-1), %o2 + + 9: +end_regular_divide: + subcc %o4, 1, %o4 + bge divloop + tst %o3 + bl,a got_result + ! non-restoring fixup here (one instruction only!) + sub %o2, 1, %o2 + + +got_result: + ! check to see if answer should be < 0 + tst %g3 + bl,a 1f + sub %g0, %o2, %o2 +1: + retl + mov %o2, %o0 +#endif + +#ifdef L_modsi3 +/* This implementation was taken from glibc: + * + * Input: dividend and divisor in %o0 and %o1 respectively. + * + * Algorithm parameters: + * N how many bits per iteration we try to get (4) + * WORDSIZE total number of bits (32) + * + * Derived constants: + * TOPBITS number of bits in the top decade of a number + * + * Important variables: + * Q the partial quotient under development (initially 0) + * R the remainder so far, initially the dividend + * ITER number of main division loop iterations required; + * equal to ceil(log2(quotient) / N). Note that this + * is the log base (2^N) of the quotient. + * V the current comparand, initially divisor*2^(ITER*N-1) + * + * Cost: + * Current estimate for non-large dividend is + * ceil(log2(quotient) / N) * (10 + 7N/2) + C + * A large dividend is one greater than 2^(31-TOPBITS) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + */ +.text + .align 4 + .global .urem + .proc 4 +.urem: + b divide + mov 0, %g3 ! result always positive + + .align 4 + .global .rem + .proc 4 +.rem: + ! compute sign of result; if neither is negative, no problem + orcc %o1, %o0, %g0 ! either negative? + bge 2f ! no, go do the divide + mov %o0, %g3 ! sign of remainder matches %o0 + tst %o1 + bge 1f + tst %o0 + ! %o1 is definitely negative; %o0 might also be negative + bge 2f ! if %o0 not negative... + sub %g0, %o1, %o1 ! in any case, make %o1 nonneg +1: ! %o0 is negative, %o1 is nonnegative + sub %g0, %o0, %o0 ! make %o0 nonnegative +2: + + ! Ready to divide. Compute size of quotient; scale comparand. +divide: + orcc %o1, %g0, %o5 + bne 1f + mov %o0, %o3 + + ! Divide by zero trap. If it returns, return 0 (about as + ! wrong as possible, but that is what SunOS does...). + ta 0x2 !ST_DIV0 + retl + clr %o0 + +1: + cmp %o3, %o5 ! if %o1 exceeds %o0, done + blu got_result ! (and algorithm fails otherwise) + clr %o2 + sethi %hi(1 << (32 - 4 - 1)), %g1 + cmp %o3, %g1 + blu not_really_big + clr %o4 + + ! Here the dividend is >= 2**(31-N) or so. We must be careful here, + ! as our usual N-at-a-shot divide step will cause overflow and havoc. + ! The number of bits in the result here is N*ITER+SC, where SC <= N. + ! Compute ITER in an unorthodox manner: know we need to shift V into + ! the top decade: so do not even bother to compare to R. + 1: + cmp %o5, %g1 + bgeu 3f + mov 1, %g2 + sll %o5, 4, %o5 + b 1b + add %o4, 1, %o4 + + ! Now compute %g2. + 2: addcc %o5, %o5, %o5 + bcc not_too_big + add %g2, 1, %g2 + + ! We get here if the %o1 overflowed while shifting. + ! This means that %o3 has the high-order bit set. + ! Restore %o5 and subtract from %o3. + sll %g1, 4, %g1 ! high order bit + srl %o5, 1, %o5 ! rest of %o5 + add %o5, %g1, %o5 + b do_single_div + sub %g2, 1, %g2 + + not_too_big: + 3: cmp %o5, %o3 + blu 2b + nop + be do_single_div + nop + /* NB: these are commented out in the V8-SPARC manual as well */ + /* (I do not understand this) */ + ! %o5 > %o3: went too far: back up 1 step + ! srl %o5, 1, %o5 + ! dec %g2 + ! do single-bit divide steps + ! + ! We have to be careful here. We know that %o3 >= %o5, so we can do the + ! first divide step without thinking. BUT, the others are conditional, + ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- + ! order bit set in the first step, just falling into the regular + ! division loop will mess up the first time around. + ! So we unroll slightly... + do_single_div: + subcc %g2, 1, %g2 + bl end_regular_divide + nop + sub %o3, %o5, %o3 + mov 1, %o2 + b end_single_divloop + nop + single_divloop: + sll %o2, 1, %o2 + bl 1f + srl %o5, 1, %o5 + ! %o3 >= 0 + sub %o3, %o5, %o3 + b 2f + add %o2, 1, %o2 + 1: ! %o3 < 0 + add %o3, %o5, %o3 + sub %o2, 1, %o2 + 2: + end_single_divloop: + subcc %g2, 1, %g2 + bge single_divloop + tst %o3 + b,a end_regular_divide + +not_really_big: +1: + sll %o5, 4, %o5 + cmp %o5, %o3 + bleu 1b + addcc %o4, 1, %o4 + be got_result + sub %o4, 1, %o4 + + tst %o3 ! set up for initial iteration +divloop: + sll %o2, 4, %o2 + ! depth 1, accumulated bits 0 + bl L1.16 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 2, accumulated bits 1 + bl L2.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits 3 + bl L3.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 7 + bl L4.23 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (7*2+1), %o2 +L4.23: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (7*2-1), %o2 + +L3.19: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 5 + bl L4.21 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (5*2+1), %o2 + +L4.21: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (5*2-1), %o2 + +L2.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits 1 + bl L3.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 3 + bl L4.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (3*2+1), %o2 + +L4.19: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (3*2-1), %o2 + +L3.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 1 + bl L4.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (1*2+1), %o2 + +L4.17: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (1*2-1), %o2 + +L1.16: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 2, accumulated bits -1 + bl L2.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits -1 + bl L3.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -1 + bl L4.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2+1), %o2 + +L4.15: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2-1), %o2 + +L3.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -3 + bl L4.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2+1), %o2 + +L4.13: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2-1), %o2 + +L2.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits -3 + bl L3.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -5 + bl L4.11 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2+1), %o2 + +L4.11: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2-1), %o2 + +L3.13: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -7 + bl L4.9 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2+1), %o2 + +L4.9: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2-1), %o2 + + 9: +end_regular_divide: + subcc %o4, 1, %o4 + bge divloop + tst %o3 + bl,a got_result + ! non-restoring fixup here (one instruction only!) + add %o3, %o1, %o3 + +got_result: + ! check to see if answer should be < 0 + tst %g3 + bl,a 1f + sub %g0, %o3, %o3 +1: + retl + mov %o3, %o0 + +#endif + diff --git a/libgcc/config/sparc/t-softmul b/libgcc/config/sparc/t-softmul index 49faae47c53..7142200600f 100644 --- a/libgcc/config/sparc/t-softmul +++ b/libgcc/config/sparc/t-softmul @@ -1,2 +1,2 @@ -LIB1ASMSRC = sparc/lb1spc.asm +LIB1ASMSRC = sparc/lb1spc.S LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 diff --git a/libgcc/config/v850/lib1funcs.S b/libgcc/config/v850/lib1funcs.S new file mode 100644 index 00000000000..04e9b1e0ad4 --- /dev/null +++ b/libgcc/config/v850/lib1funcs.S @@ -0,0 +1,2330 @@ +/* libgcc routines for NEC V850. + Copyright (C) 1996, 1997, 2002, 2005, 2009, 2010 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#ifdef L_mulsi3 + .text + .globl ___mulsi3 + .type ___mulsi3,@function +___mulsi3: +#ifdef __v850__ +/* + #define SHIFT 12 + #define MASK ((1 << SHIFT) - 1) + + #define STEP(i, j) \ + ({ \ + short a_part = (a >> (i)) & MASK; \ + short b_part = (b >> (j)) & MASK; \ + int res = (((int) a_part) * ((int) b_part)); \ + res; \ + }) + + int + __mulsi3 (unsigned a, unsigned b) + { + return STEP (0, 0) + + ((STEP (SHIFT, 0) + STEP (0, SHIFT)) << SHIFT) + + ((STEP (0, 2 * SHIFT) + STEP (SHIFT, SHIFT) + STEP (2 * SHIFT, 0)) + << (2 * SHIFT)); + } +*/ + mov r6, r14 + movea lo(32767), r0, r10 + and r10, r14 + mov r7, r15 + and r10, r15 + shr 15, r6 + mov r6, r13 + and r10, r13 + shr 15, r7 + mov r7, r12 + and r10, r12 + shr 15, r6 + shr 15, r7 + mov r14, r10 + mulh r15, r10 + mov r14, r11 + mulh r12, r11 + mov r13, r16 + mulh r15, r16 + mulh r14, r7 + mulh r15, r6 + add r16, r11 + mulh r13, r12 + shl 15, r11 + add r11, r10 + add r12, r7 + add r6, r7 + shl 30, r7 + add r7, r10 + jmp [r31] +#endif /* __v850__ */ +#if defined(__v850e__) || defined(__v850ea__) || defined(__v850e2__) || defined(__v850e2v3__) + /* This routine is almost unneccesarry because gcc + generates the MUL instruction for the RTX mulsi3. + But if someone wants to link his application with + previsously compiled v850 objects then they will + need this function. */ + + /* It isn't good to put the inst sequence as below; + mul r7, r6, + mov r6, r10, r0 + In this case, there is a RAW hazard between them. + MUL inst takes 2 cycle in EX stage, then MOV inst + must wait 1cycle. */ + mov r7, r10 + mul r6, r10, r0 + jmp [r31] +#endif /* __v850e__ */ + .size ___mulsi3,.-___mulsi3 +#endif /* L_mulsi3 */ + + +#ifdef L_udivsi3 + .text + .global ___udivsi3 + .type ___udivsi3,@function +___udivsi3: +#ifdef __v850__ + mov 1,r12 + mov 0,r10 + cmp r6,r7 + bnl .L12 + movhi hi(-2147483648),r0,r13 + cmp r0,r7 + blt .L12 +.L4: + shl 1,r7 + shl 1,r12 + cmp r6,r7 + bnl .L12 + cmp r0,r12 + be .L8 + mov r7,r19 + and r13,r19 + be .L4 + br .L12 +.L9: + cmp r7,r6 + bl .L10 + sub r7,r6 + or r12,r10 +.L10: + shr 1,r12 + shr 1,r7 +.L12: + cmp r0,r12 + bne .L9 +.L8: + jmp [r31] + +#else /* defined(__v850e__) */ + + /* See comments at end of __mulsi3. */ + mov r6, r10 + divu r7, r10, r0 + jmp [r31] + +#endif /* __v850e__ */ + + .size ___udivsi3,.-___udivsi3 +#endif + +#ifdef L_divsi3 + .text + .globl ___divsi3 + .type ___divsi3,@function +___divsi3: +#ifdef __v850__ + add -8,sp + st.w r31,4[sp] + st.w r22,0[sp] + mov 1,r22 + tst r7,r7 + bp .L3 + subr r0,r7 + subr r0,r22 +.L3: + tst r6,r6 + bp .L4 + subr r0,r6 + subr r0,r22 +.L4: + jarl ___udivsi3,r31 + cmp r0,r22 + bp .L7 + subr r0,r10 +.L7: + ld.w 0[sp],r22 + ld.w 4[sp],r31 + add 8,sp + jmp [r31] + +#else /* defined(__v850e__) */ + + /* See comments at end of __mulsi3. */ + mov r6, r10 + div r7, r10, r0 + jmp [r31] + +#endif /* __v850e__ */ + + .size ___divsi3,.-___divsi3 +#endif + +#ifdef L_umodsi3 + .text + .globl ___umodsi3 + .type ___umodsi3,@function +___umodsi3: +#ifdef __v850__ + add -12,sp + st.w r31,8[sp] + st.w r7,4[sp] + st.w r6,0[sp] + jarl ___udivsi3,r31 + ld.w 4[sp],r7 + mov r10,r6 + jarl ___mulsi3,r31 + ld.w 0[sp],r6 + subr r6,r10 + ld.w 8[sp],r31 + add 12,sp + jmp [r31] + +#else /* defined(__v850e__) */ + + /* See comments at end of __mulsi3. */ + divu r7, r6, r10 + jmp [r31] + +#endif /* __v850e__ */ + + .size ___umodsi3,.-___umodsi3 +#endif /* L_umodsi3 */ + +#ifdef L_modsi3 + .text + .globl ___modsi3 + .type ___modsi3,@function +___modsi3: +#ifdef __v850__ + add -12,sp + st.w r31,8[sp] + st.w r7,4[sp] + st.w r6,0[sp] + jarl ___divsi3,r31 + ld.w 4[sp],r7 + mov r10,r6 + jarl ___mulsi3,r31 + ld.w 0[sp],r6 + subr r6,r10 + ld.w 8[sp],r31 + add 12,sp + jmp [r31] + +#else /* defined(__v850e__) */ + + /* See comments at end of __mulsi3. */ + div r7, r6, r10 + jmp [r31] + +#endif /* __v850e__ */ + + .size ___modsi3,.-___modsi3 +#endif /* L_modsi3 */ + +#ifdef L_save_2 + .text + .align 2 + .globl __save_r2_r29 + .type __save_r2_r29,@function + /* Allocate space and save registers 2, 20 .. 29 on the stack. */ + /* Called via: jalr __save_r2_r29,r10. */ +__save_r2_r29: +#ifdef __EP__ + mov ep,r1 + addi -44,sp,sp + mov sp,ep + sst.w r29,0[ep] + sst.w r28,4[ep] + sst.w r27,8[ep] + sst.w r26,12[ep] + sst.w r25,16[ep] + sst.w r24,20[ep] + sst.w r23,24[ep] + sst.w r22,28[ep] + sst.w r21,32[ep] + sst.w r20,36[ep] + sst.w r2,40[ep] + mov r1,ep +#else + addi -44,sp,sp + st.w r29,0[sp] + st.w r28,4[sp] + st.w r27,8[sp] + st.w r26,12[sp] + st.w r25,16[sp] + st.w r24,20[sp] + st.w r23,24[sp] + st.w r22,28[sp] + st.w r21,32[sp] + st.w r20,36[sp] + st.w r2,40[sp] +#endif + jmp [r10] + .size __save_r2_r29,.-__save_r2_r29 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r2_r29. */ + .align 2 + .globl __return_r2_r29 + .type __return_r2_r29,@function +__return_r2_r29: +#ifdef __EP__ + mov ep,r1 + mov sp,ep + sld.w 0[ep],r29 + sld.w 4[ep],r28 + sld.w 8[ep],r27 + sld.w 12[ep],r26 + sld.w 16[ep],r25 + sld.w 20[ep],r24 + sld.w 24[ep],r23 + sld.w 28[ep],r22 + sld.w 32[ep],r21 + sld.w 36[ep],r20 + sld.w 40[ep],r2 + addi 44,sp,sp + mov r1,ep +#else + ld.w 0[sp],r29 + ld.w 4[sp],r28 + ld.w 8[sp],r27 + ld.w 12[sp],r26 + ld.w 16[sp],r25 + ld.w 20[sp],r24 + ld.w 24[sp],r23 + ld.w 28[sp],r22 + ld.w 32[sp],r21 + ld.w 36[sp],r20 + ld.w 40[sp],r2 + addi 44,sp,sp +#endif + jmp [r31] + .size __return_r2_r29,.-__return_r2_r29 +#endif /* L_save_2 */ + +#ifdef L_save_20 + .text + .align 2 + .globl __save_r20_r29 + .type __save_r20_r29,@function + /* Allocate space and save registers 20 .. 29 on the stack. */ + /* Called via: jalr __save_r20_r29,r10. */ +__save_r20_r29: +#ifdef __EP__ + mov ep,r1 + addi -40,sp,sp + mov sp,ep + sst.w r29,0[ep] + sst.w r28,4[ep] + sst.w r27,8[ep] + sst.w r26,12[ep] + sst.w r25,16[ep] + sst.w r24,20[ep] + sst.w r23,24[ep] + sst.w r22,28[ep] + sst.w r21,32[ep] + sst.w r20,36[ep] + mov r1,ep +#else + addi -40,sp,sp + st.w r29,0[sp] + st.w r28,4[sp] + st.w r27,8[sp] + st.w r26,12[sp] + st.w r25,16[sp] + st.w r24,20[sp] + st.w r23,24[sp] + st.w r22,28[sp] + st.w r21,32[sp] + st.w r20,36[sp] +#endif + jmp [r10] + .size __save_r20_r29,.-__save_r20_r29 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r20_r29. */ + .align 2 + .globl __return_r20_r29 + .type __return_r20_r29,@function +__return_r20_r29: +#ifdef __EP__ + mov ep,r1 + mov sp,ep + sld.w 0[ep],r29 + sld.w 4[ep],r28 + sld.w 8[ep],r27 + sld.w 12[ep],r26 + sld.w 16[ep],r25 + sld.w 20[ep],r24 + sld.w 24[ep],r23 + sld.w 28[ep],r22 + sld.w 32[ep],r21 + sld.w 36[ep],r20 + addi 40,sp,sp + mov r1,ep +#else + ld.w 0[sp],r29 + ld.w 4[sp],r28 + ld.w 8[sp],r27 + ld.w 12[sp],r26 + ld.w 16[sp],r25 + ld.w 20[sp],r24 + ld.w 24[sp],r23 + ld.w 28[sp],r22 + ld.w 32[sp],r21 + ld.w 36[sp],r20 + addi 40,sp,sp +#endif + jmp [r31] + .size __return_r20_r29,.-__return_r20_r29 +#endif /* L_save_20 */ + +#ifdef L_save_21 + .text + .align 2 + .globl __save_r21_r29 + .type __save_r21_r29,@function + /* Allocate space and save registers 21 .. 29 on the stack. */ + /* Called via: jalr __save_r21_r29,r10. */ +__save_r21_r29: +#ifdef __EP__ + mov ep,r1 + addi -36,sp,sp + mov sp,ep + sst.w r29,0[ep] + sst.w r28,4[ep] + sst.w r27,8[ep] + sst.w r26,12[ep] + sst.w r25,16[ep] + sst.w r24,20[ep] + sst.w r23,24[ep] + sst.w r22,28[ep] + sst.w r21,32[ep] + mov r1,ep +#else + addi -36,sp,sp + st.w r29,0[sp] + st.w r28,4[sp] + st.w r27,8[sp] + st.w r26,12[sp] + st.w r25,16[sp] + st.w r24,20[sp] + st.w r23,24[sp] + st.w r22,28[sp] + st.w r21,32[sp] +#endif + jmp [r10] + .size __save_r21_r29,.-__save_r21_r29 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r21_r29. */ + .align 2 + .globl __return_r21_r29 + .type __return_r21_r29,@function +__return_r21_r29: +#ifdef __EP__ + mov ep,r1 + mov sp,ep + sld.w 0[ep],r29 + sld.w 4[ep],r28 + sld.w 8[ep],r27 + sld.w 12[ep],r26 + sld.w 16[ep],r25 + sld.w 20[ep],r24 + sld.w 24[ep],r23 + sld.w 28[ep],r22 + sld.w 32[ep],r21 + addi 36,sp,sp + mov r1,ep +#else + ld.w 0[sp],r29 + ld.w 4[sp],r28 + ld.w 8[sp],r27 + ld.w 12[sp],r26 + ld.w 16[sp],r25 + ld.w 20[sp],r24 + ld.w 24[sp],r23 + ld.w 28[sp],r22 + ld.w 32[sp],r21 + addi 36,sp,sp +#endif + jmp [r31] + .size __return_r21_r29,.-__return_r21_r29 +#endif /* L_save_21 */ + +#ifdef L_save_22 + .text + .align 2 + .globl __save_r22_r29 + .type __save_r22_r29,@function + /* Allocate space and save registers 22 .. 29 on the stack. */ + /* Called via: jalr __save_r22_r29,r10. */ +__save_r22_r29: +#ifdef __EP__ + mov ep,r1 + addi -32,sp,sp + mov sp,ep + sst.w r29,0[ep] + sst.w r28,4[ep] + sst.w r27,8[ep] + sst.w r26,12[ep] + sst.w r25,16[ep] + sst.w r24,20[ep] + sst.w r23,24[ep] + sst.w r22,28[ep] + mov r1,ep +#else + addi -32,sp,sp + st.w r29,0[sp] + st.w r28,4[sp] + st.w r27,8[sp] + st.w r26,12[sp] + st.w r25,16[sp] + st.w r24,20[sp] + st.w r23,24[sp] + st.w r22,28[sp] +#endif + jmp [r10] + .size __save_r22_r29,.-__save_r22_r29 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r22_r29. */ + .align 2 + .globl __return_r22_r29 + .type __return_r22_r29,@function +__return_r22_r29: +#ifdef __EP__ + mov ep,r1 + mov sp,ep + sld.w 0[ep],r29 + sld.w 4[ep],r28 + sld.w 8[ep],r27 + sld.w 12[ep],r26 + sld.w 16[ep],r25 + sld.w 20[ep],r24 + sld.w 24[ep],r23 + sld.w 28[ep],r22 + addi 32,sp,sp + mov r1,ep +#else + ld.w 0[sp],r29 + ld.w 4[sp],r28 + ld.w 8[sp],r27 + ld.w 12[sp],r26 + ld.w 16[sp],r25 + ld.w 20[sp],r24 + ld.w 24[sp],r23 + ld.w 28[sp],r22 + addi 32,sp,sp +#endif + jmp [r31] + .size __return_r22_r29,.-__return_r22_r29 +#endif /* L_save_22 */ + +#ifdef L_save_23 + .text + .align 2 + .globl __save_r23_r29 + .type __save_r23_r29,@function + /* Allocate space and save registers 23 .. 29 on the stack. */ + /* Called via: jalr __save_r23_r29,r10. */ +__save_r23_r29: +#ifdef __EP__ + mov ep,r1 + addi -28,sp,sp + mov sp,ep + sst.w r29,0[ep] + sst.w r28,4[ep] + sst.w r27,8[ep] + sst.w r26,12[ep] + sst.w r25,16[ep] + sst.w r24,20[ep] + sst.w r23,24[ep] + mov r1,ep +#else + addi -28,sp,sp + st.w r29,0[sp] + st.w r28,4[sp] + st.w r27,8[sp] + st.w r26,12[sp] + st.w r25,16[sp] + st.w r24,20[sp] + st.w r23,24[sp] +#endif + jmp [r10] + .size __save_r23_r29,.-__save_r23_r29 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r23_r29. */ + .align 2 + .globl __return_r23_r29 + .type __return_r23_r29,@function +__return_r23_r29: +#ifdef __EP__ + mov ep,r1 + mov sp,ep + sld.w 0[ep],r29 + sld.w 4[ep],r28 + sld.w 8[ep],r27 + sld.w 12[ep],r26 + sld.w 16[ep],r25 + sld.w 20[ep],r24 + sld.w 24[ep],r23 + addi 28,sp,sp + mov r1,ep +#else + ld.w 0[sp],r29 + ld.w 4[sp],r28 + ld.w 8[sp],r27 + ld.w 12[sp],r26 + ld.w 16[sp],r25 + ld.w 20[sp],r24 + ld.w 24[sp],r23 + addi 28,sp,sp +#endif + jmp [r31] + .size __return_r23_r29,.-__return_r23_r29 +#endif /* L_save_23 */ + +#ifdef L_save_24 + .text + .align 2 + .globl __save_r24_r29 + .type __save_r24_r29,@function + /* Allocate space and save registers 24 .. 29 on the stack. */ + /* Called via: jalr __save_r24_r29,r10. */ +__save_r24_r29: +#ifdef __EP__ + mov ep,r1 + addi -24,sp,sp + mov sp,ep + sst.w r29,0[ep] + sst.w r28,4[ep] + sst.w r27,8[ep] + sst.w r26,12[ep] + sst.w r25,16[ep] + sst.w r24,20[ep] + mov r1,ep +#else + addi -24,sp,sp + st.w r29,0[sp] + st.w r28,4[sp] + st.w r27,8[sp] + st.w r26,12[sp] + st.w r25,16[sp] + st.w r24,20[sp] +#endif + jmp [r10] + .size __save_r24_r29,.-__save_r24_r29 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r24_r29. */ + .align 2 + .globl __return_r24_r29 + .type __return_r24_r29,@function +__return_r24_r29: +#ifdef __EP__ + mov ep,r1 + mov sp,ep + sld.w 0[ep],r29 + sld.w 4[ep],r28 + sld.w 8[ep],r27 + sld.w 12[ep],r26 + sld.w 16[ep],r25 + sld.w 20[ep],r24 + addi 24,sp,sp + mov r1,ep +#else + ld.w 0[sp],r29 + ld.w 4[sp],r28 + ld.w 8[sp],r27 + ld.w 12[sp],r26 + ld.w 16[sp],r25 + ld.w 20[sp],r24 + addi 24,sp,sp +#endif + jmp [r31] + .size __return_r24_r29,.-__return_r24_r29 +#endif /* L_save_24 */ + +#ifdef L_save_25 + .text + .align 2 + .globl __save_r25_r29 + .type __save_r25_r29,@function + /* Allocate space and save registers 25 .. 29 on the stack. */ + /* Called via: jalr __save_r25_r29,r10. */ +__save_r25_r29: +#ifdef __EP__ + mov ep,r1 + addi -20,sp,sp + mov sp,ep + sst.w r29,0[ep] + sst.w r28,4[ep] + sst.w r27,8[ep] + sst.w r26,12[ep] + sst.w r25,16[ep] + mov r1,ep +#else + addi -20,sp,sp + st.w r29,0[sp] + st.w r28,4[sp] + st.w r27,8[sp] + st.w r26,12[sp] + st.w r25,16[sp] +#endif + jmp [r10] + .size __save_r25_r29,.-__save_r25_r29 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r25_r29. */ + .align 2 + .globl __return_r25_r29 + .type __return_r25_r29,@function +__return_r25_r29: +#ifdef __EP__ + mov ep,r1 + mov sp,ep + sld.w 0[ep],r29 + sld.w 4[ep],r28 + sld.w 8[ep],r27 + sld.w 12[ep],r26 + sld.w 16[ep],r25 + addi 20,sp,sp + mov r1,ep +#else + ld.w 0[ep],r29 + ld.w 4[ep],r28 + ld.w 8[ep],r27 + ld.w 12[ep],r26 + ld.w 16[ep],r25 + addi 20,sp,sp +#endif + jmp [r31] + .size __return_r25_r29,.-__return_r25_r29 +#endif /* L_save_25 */ + +#ifdef L_save_26 + .text + .align 2 + .globl __save_r26_r29 + .type __save_r26_r29,@function + /* Allocate space and save registers 26 .. 29 on the stack. */ + /* Called via: jalr __save_r26_r29,r10. */ +__save_r26_r29: +#ifdef __EP__ + mov ep,r1 + add -16,sp + mov sp,ep + sst.w r29,0[ep] + sst.w r28,4[ep] + sst.w r27,8[ep] + sst.w r26,12[ep] + mov r1,ep +#else + add -16,sp + st.w r29,0[sp] + st.w r28,4[sp] + st.w r27,8[sp] + st.w r26,12[sp] +#endif + jmp [r10] + .size __save_r26_r29,.-__save_r26_r29 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r26_r29. */ + .align 2 + .globl __return_r26_r29 + .type __return_r26_r29,@function +__return_r26_r29: +#ifdef __EP__ + mov ep,r1 + mov sp,ep + sld.w 0[ep],r29 + sld.w 4[ep],r28 + sld.w 8[ep],r27 + sld.w 12[ep],r26 + addi 16,sp,sp + mov r1,ep +#else + ld.w 0[sp],r29 + ld.w 4[sp],r28 + ld.w 8[sp],r27 + ld.w 12[sp],r26 + addi 16,sp,sp +#endif + jmp [r31] + .size __return_r26_r29,.-__return_r26_r29 +#endif /* L_save_26 */ + +#ifdef L_save_27 + .text + .align 2 + .globl __save_r27_r29 + .type __save_r27_r29,@function + /* Allocate space and save registers 27 .. 29 on the stack. */ + /* Called via: jalr __save_r27_r29,r10. */ +__save_r27_r29: + add -12,sp + st.w r29,0[sp] + st.w r28,4[sp] + st.w r27,8[sp] + jmp [r10] + .size __save_r27_r29,.-__save_r27_r29 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r27_r29. */ + .align 2 + .globl __return_r27_r29 + .type __return_r27_r29,@function +__return_r27_r29: + ld.w 0[sp],r29 + ld.w 4[sp],r28 + ld.w 8[sp],r27 + add 12,sp + jmp [r31] + .size __return_r27_r29,.-__return_r27_r29 +#endif /* L_save_27 */ + +#ifdef L_save_28 + .text + .align 2 + .globl __save_r28_r29 + .type __save_r28_r29,@function + /* Allocate space and save registers 28,29 on the stack. */ + /* Called via: jalr __save_r28_r29,r10. */ +__save_r28_r29: + add -8,sp + st.w r29,0[sp] + st.w r28,4[sp] + jmp [r10] + .size __save_r28_r29,.-__save_r28_r29 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r28_r29. */ + .align 2 + .globl __return_r28_r29 + .type __return_r28_r29,@function +__return_r28_r29: + ld.w 0[sp],r29 + ld.w 4[sp],r28 + add 8,sp + jmp [r31] + .size __return_r28_r29,.-__return_r28_r29 +#endif /* L_save_28 */ + +#ifdef L_save_29 + .text + .align 2 + .globl __save_r29 + .type __save_r29,@function + /* Allocate space and save register 29 on the stack. */ + /* Called via: jalr __save_r29,r10. */ +__save_r29: + add -4,sp + st.w r29,0[sp] + jmp [r10] + .size __save_r29,.-__save_r29 + + /* Restore saved register 29, deallocate stack and return to the user. */ + /* Called via: jr __return_r29. */ + .align 2 + .globl __return_r29 + .type __return_r29,@function +__return_r29: + ld.w 0[sp],r29 + add 4,sp + jmp [r31] + .size __return_r29,.-__return_r29 +#endif /* L_save_28 */ + +#ifdef L_save_2c + .text + .align 2 + .globl __save_r2_r31 + .type __save_r2_r31,@function + /* Allocate space and save registers 20 .. 29, 31 on the stack. */ + /* Also allocate space for the argument save area. */ + /* Called via: jalr __save_r2_r31,r10. */ +__save_r2_r31: +#ifdef __EP__ + mov ep,r1 + addi -48,sp,sp + mov sp,ep + sst.w r29,0[ep] + sst.w r28,4[ep] + sst.w r27,8[ep] + sst.w r26,12[ep] + sst.w r25,16[ep] + sst.w r24,20[ep] + sst.w r23,24[ep] + sst.w r22,28[ep] + sst.w r21,32[ep] + sst.w r20,36[ep] + sst.w r2,40[ep] + sst.w r31,44[ep] + mov r1,ep +#else + addi -48,sp,sp + st.w r29,0[sp] + st.w r28,4[sp] + st.w r27,8[sp] + st.w r26,12[sp] + st.w r25,16[sp] + st.w r24,20[sp] + st.w r23,24[sp] + st.w r22,28[sp] + st.w r21,32[sp] + st.w r20,36[sp] + st.w r2,40[sp] + st.w r31,44[sp] +#endif + jmp [r10] + .size __save_r2_r31,.-__save_r2_r31 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r20_r31. */ + .align 2 + .globl __return_r2_r31 + .type __return_r2_r31,@function +__return_r2_r31: +#ifdef __EP__ + mov ep,r1 + mov sp,ep + sld.w 0[ep],r29 + sld.w 4[ep],r28 + sld.w 8[ep],r27 + sld.w 12[ep],r26 + sld.w 16[ep],r25 + sld.w 20[ep],r24 + sld.w 24[ep],r23 + sld.w 28[ep],r22 + sld.w 32[ep],r21 + sld.w 36[ep],r20 + sld.w 40[ep],r2 + sld.w 44[ep],r31 + addi 48,sp,sp + mov r1,ep +#else + ld.w 44[sp],r29 + ld.w 40[sp],r28 + ld.w 36[sp],r27 + ld.w 32[sp],r26 + ld.w 28[sp],r25 + ld.w 24[sp],r24 + ld.w 20[sp],r23 + ld.w 16[sp],r22 + ld.w 12[sp],r21 + ld.w 8[sp],r20 + ld.w 4[sp],r2 + ld.w 0[sp],r31 + addi 48,sp,sp +#endif + jmp [r31] + .size __return_r2_r31,.-__return_r2_r31 +#endif /* L_save_2c */ + +#ifdef L_save_20c + .text + .align 2 + .globl __save_r20_r31 + .type __save_r20_r31,@function + /* Allocate space and save registers 20 .. 29, 31 on the stack. */ + /* Also allocate space for the argument save area. */ + /* Called via: jalr __save_r20_r31,r10. */ +__save_r20_r31: +#ifdef __EP__ + mov ep,r1 + addi -44,sp,sp + mov sp,ep + sst.w r29,0[ep] + sst.w r28,4[ep] + sst.w r27,8[ep] + sst.w r26,12[ep] + sst.w r25,16[ep] + sst.w r24,20[ep] + sst.w r23,24[ep] + sst.w r22,28[ep] + sst.w r21,32[ep] + sst.w r20,36[ep] + sst.w r31,40[ep] + mov r1,ep +#else + addi -44,sp,sp + st.w r29,0[sp] + st.w r28,4[sp] + st.w r27,8[sp] + st.w r26,12[sp] + st.w r25,16[sp] + st.w r24,20[sp] + st.w r23,24[sp] + st.w r22,28[sp] + st.w r21,32[sp] + st.w r20,36[sp] + st.w r31,40[sp] +#endif + jmp [r10] + .size __save_r20_r31,.-__save_r20_r31 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r20_r31. */ + .align 2 + .globl __return_r20_r31 + .type __return_r20_r31,@function +__return_r20_r31: +#ifdef __EP__ + mov ep,r1 + mov sp,ep + sld.w 0[ep],r29 + sld.w 4[ep],r28 + sld.w 8[ep],r27 + sld.w 12[ep],r26 + sld.w 16[ep],r25 + sld.w 20[ep],r24 + sld.w 24[ep],r23 + sld.w 28[ep],r22 + sld.w 32[ep],r21 + sld.w 36[ep],r20 + sld.w 40[ep],r31 + addi 44,sp,sp + mov r1,ep +#else + ld.w 0[sp],r29 + ld.w 4[sp],r28 + ld.w 8[sp],r27 + ld.w 12[sp],r26 + ld.w 16[sp],r25 + ld.w 20[sp],r24 + ld.w 24[sp],r23 + ld.w 28[sp],r22 + ld.w 32[sp],r21 + ld.w 36[sp],r20 + ld.w 40[sp],r31 + addi 44,sp,sp +#endif + jmp [r31] + .size __return_r20_r31,.-__return_r20_r31 +#endif /* L_save_20c */ + +#ifdef L_save_21c + .text + .align 2 + .globl __save_r21_r31 + .type __save_r21_r31,@function + /* Allocate space and save registers 21 .. 29, 31 on the stack. */ + /* Also allocate space for the argument save area. */ + /* Called via: jalr __save_r21_r31,r10. */ +__save_r21_r31: +#ifdef __EP__ + mov ep,r1 + addi -40,sp,sp + mov sp,ep + sst.w r29,0[ep] + sst.w r28,4[ep] + sst.w r27,8[ep] + sst.w r26,12[ep] + sst.w r25,16[ep] + sst.w r24,20[ep] + sst.w r23,24[ep] + sst.w r22,28[ep] + sst.w r21,32[ep] + sst.w r31,36[ep] + mov r1,ep + jmp [r10] +#else + addi -40,sp,sp + st.w r29,0[sp] + st.w r28,4[sp] + st.w r27,8[sp] + st.w r26,12[sp] + st.w r25,16[sp] + st.w r24,20[sp] + st.w r23,24[sp] + st.w r22,28[sp] + st.w r21,32[sp] + st.w r31,36[sp] + jmp [r10] +#endif + .size __save_r21_r31,.-__save_r21_r31 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r21_r31. */ + .align 2 + .globl __return_r21_r31 + .type __return_r21_r31,@function +__return_r21_r31: +#ifdef __EP__ + mov ep,r1 + mov sp,ep + sld.w 0[ep],r29 + sld.w 4[ep],r28 + sld.w 8[ep],r27 + sld.w 12[ep],r26 + sld.w 16[ep],r25 + sld.w 20[ep],r24 + sld.w 24[ep],r23 + sld.w 28[ep],r22 + sld.w 32[ep],r21 + sld.w 36[ep],r31 + addi 40,sp,sp + mov r1,ep +#else + ld.w 0[sp],r29 + ld.w 4[sp],r28 + ld.w 8[sp],r27 + ld.w 12[sp],r26 + ld.w 16[sp],r25 + ld.w 20[sp],r24 + ld.w 24[sp],r23 + ld.w 28[sp],r22 + ld.w 32[sp],r21 + ld.w 36[sp],r31 + addi 40,sp,sp +#endif + jmp [r31] + .size __return_r21_r31,.-__return_r21_r31 +#endif /* L_save_21c */ + +#ifdef L_save_22c + .text + .align 2 + .globl __save_r22_r31 + .type __save_r22_r31,@function + /* Allocate space and save registers 22 .. 29, 31 on the stack. */ + /* Also allocate space for the argument save area. */ + /* Called via: jalr __save_r22_r31,r10. */ +__save_r22_r31: +#ifdef __EP__ + mov ep,r1 + addi -36,sp,sp + mov sp,ep + sst.w r29,0[ep] + sst.w r28,4[ep] + sst.w r27,8[ep] + sst.w r26,12[ep] + sst.w r25,16[ep] + sst.w r24,20[ep] + sst.w r23,24[ep] + sst.w r22,28[ep] + sst.w r31,32[ep] + mov r1,ep +#else + addi -36,sp,sp + st.w r29,0[sp] + st.w r28,4[sp] + st.w r27,8[sp] + st.w r26,12[sp] + st.w r25,16[sp] + st.w r24,20[sp] + st.w r23,24[sp] + st.w r22,28[sp] + st.w r31,32[sp] +#endif + jmp [r10] + .size __save_r22_r31,.-__save_r22_r31 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r22_r31. */ + .align 2 + .globl __return_r22_r31 + .type __return_r22_r31,@function +__return_r22_r31: +#ifdef __EP__ + mov ep,r1 + mov sp,ep + sld.w 0[ep],r29 + sld.w 4[ep],r28 + sld.w 8[ep],r27 + sld.w 12[ep],r26 + sld.w 16[ep],r25 + sld.w 20[ep],r24 + sld.w 24[ep],r23 + sld.w 28[ep],r22 + sld.w 32[ep],r31 + addi 36,sp,sp + mov r1,ep +#else + ld.w 0[sp],r29 + ld.w 4[sp],r28 + ld.w 8[sp],r27 + ld.w 12[sp],r26 + ld.w 16[sp],r25 + ld.w 20[sp],r24 + ld.w 24[sp],r23 + ld.w 28[sp],r22 + ld.w 32[sp],r31 + addi 36,sp,sp +#endif + jmp [r31] + .size __return_r22_r31,.-__return_r22_r31 +#endif /* L_save_22c */ + +#ifdef L_save_23c + .text + .align 2 + .globl __save_r23_r31 + .type __save_r23_r31,@function + /* Allocate space and save registers 23 .. 29, 31 on the stack. */ + /* Also allocate space for the argument save area. */ + /* Called via: jalr __save_r23_r31,r10. */ +__save_r23_r31: +#ifdef __EP__ + mov ep,r1 + addi -32,sp,sp + mov sp,ep + sst.w r29,0[ep] + sst.w r28,4[ep] + sst.w r27,8[ep] + sst.w r26,12[ep] + sst.w r25,16[ep] + sst.w r24,20[ep] + sst.w r23,24[ep] + sst.w r31,28[ep] + mov r1,ep +#else + addi -32,sp,sp + st.w r29,0[sp] + st.w r28,4[sp] + st.w r27,8[sp] + st.w r26,12[sp] + st.w r25,16[sp] + st.w r24,20[sp] + st.w r23,24[sp] + st.w r31,28[sp] +#endif + jmp [r10] + .size __save_r23_r31,.-__save_r23_r31 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r23_r31. */ + .align 2 + .globl __return_r23_r31 + .type __return_r23_r31,@function +__return_r23_r31: +#ifdef __EP__ + mov ep,r1 + mov sp,ep + sld.w 0[ep],r29 + sld.w 4[ep],r28 + sld.w 8[ep],r27 + sld.w 12[ep],r26 + sld.w 16[ep],r25 + sld.w 20[ep],r24 + sld.w 24[ep],r23 + sld.w 28[ep],r31 + addi 32,sp,sp + mov r1,ep +#else + ld.w 0[sp],r29 + ld.w 4[sp],r28 + ld.w 8[sp],r27 + ld.w 12[sp],r26 + ld.w 16[sp],r25 + ld.w 20[sp],r24 + ld.w 24[sp],r23 + ld.w 28[sp],r31 + addi 32,sp,sp +#endif + jmp [r31] + .size __return_r23_r31,.-__return_r23_r31 +#endif /* L_save_23c */ + +#ifdef L_save_24c + .text + .align 2 + .globl __save_r24_r31 + .type __save_r24_r31,@function + /* Allocate space and save registers 24 .. 29, 31 on the stack. */ + /* Also allocate space for the argument save area. */ + /* Called via: jalr __save_r24_r31,r10. */ +__save_r24_r31: +#ifdef __EP__ + mov ep,r1 + addi -28,sp,sp + mov sp,ep + sst.w r29,0[ep] + sst.w r28,4[ep] + sst.w r27,8[ep] + sst.w r26,12[ep] + sst.w r25,16[ep] + sst.w r24,20[ep] + sst.w r31,24[ep] + mov r1,ep +#else + addi -28,sp,sp + st.w r29,0[sp] + st.w r28,4[sp] + st.w r27,8[sp] + st.w r26,12[sp] + st.w r25,16[sp] + st.w r24,20[sp] + st.w r31,24[sp] +#endif + jmp [r10] + .size __save_r24_r31,.-__save_r24_r31 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r24_r31. */ + .align 2 + .globl __return_r24_r31 + .type __return_r24_r31,@function +__return_r24_r31: +#ifdef __EP__ + mov ep,r1 + mov sp,ep + sld.w 0[ep],r29 + sld.w 4[ep],r28 + sld.w 8[ep],r27 + sld.w 12[ep],r26 + sld.w 16[ep],r25 + sld.w 20[ep],r24 + sld.w 24[ep],r31 + addi 28,sp,sp + mov r1,ep +#else + ld.w 0[sp],r29 + ld.w 4[sp],r28 + ld.w 8[sp],r27 + ld.w 12[sp],r26 + ld.w 16[sp],r25 + ld.w 20[sp],r24 + ld.w 24[sp],r31 + addi 28,sp,sp +#endif + jmp [r31] + .size __return_r24_r31,.-__return_r24_r31 +#endif /* L_save_24c */ + +#ifdef L_save_25c + .text + .align 2 + .globl __save_r25_r31 + .type __save_r25_r31,@function + /* Allocate space and save registers 25 .. 29, 31 on the stack. */ + /* Also allocate space for the argument save area. */ + /* Called via: jalr __save_r25_r31,r10. */ +__save_r25_r31: +#ifdef __EP__ + mov ep,r1 + addi -24,sp,sp + mov sp,ep + sst.w r29,0[ep] + sst.w r28,4[ep] + sst.w r27,8[ep] + sst.w r26,12[ep] + sst.w r25,16[ep] + sst.w r31,20[ep] + mov r1,ep +#else + addi -24,sp,sp + st.w r29,0[sp] + st.w r28,4[sp] + st.w r27,8[sp] + st.w r26,12[sp] + st.w r25,16[sp] + st.w r31,20[sp] +#endif + jmp [r10] + .size __save_r25_r31,.-__save_r25_r31 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r25_r31. */ + .align 2 + .globl __return_r25_r31 + .type __return_r25_r31,@function +__return_r25_r31: +#ifdef __EP__ + mov ep,r1 + mov sp,ep + sld.w 0[ep],r29 + sld.w 4[ep],r28 + sld.w 8[ep],r27 + sld.w 12[ep],r26 + sld.w 16[ep],r25 + sld.w 20[ep],r31 + addi 24,sp,sp + mov r1,ep +#else + ld.w 0[sp],r29 + ld.w 4[sp],r28 + ld.w 8[sp],r27 + ld.w 12[sp],r26 + ld.w 16[sp],r25 + ld.w 20[sp],r31 + addi 24,sp,sp +#endif + jmp [r31] + .size __return_r25_r31,.-__return_r25_r31 +#endif /* L_save_25c */ + +#ifdef L_save_26c + .text + .align 2 + .globl __save_r26_r31 + .type __save_r26_r31,@function + /* Allocate space and save registers 26 .. 29, 31 on the stack. */ + /* Also allocate space for the argument save area. */ + /* Called via: jalr __save_r26_r31,r10. */ +__save_r26_r31: +#ifdef __EP__ + mov ep,r1 + addi -20,sp,sp + mov sp,ep + sst.w r29,0[ep] + sst.w r28,4[ep] + sst.w r27,8[ep] + sst.w r26,12[ep] + sst.w r31,16[ep] + mov r1,ep +#else + addi -20,sp,sp + st.w r29,0[sp] + st.w r28,4[sp] + st.w r27,8[sp] + st.w r26,12[sp] + st.w r31,16[sp] +#endif + jmp [r10] + .size __save_r26_r31,.-__save_r26_r31 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r26_r31. */ + .align 2 + .globl __return_r26_r31 + .type __return_r26_r31,@function +__return_r26_r31: +#ifdef __EP__ + mov ep,r1 + mov sp,ep + sld.w 0[ep],r29 + sld.w 4[ep],r28 + sld.w 8[ep],r27 + sld.w 12[ep],r26 + sld.w 16[ep],r31 + addi 20,sp,sp + mov r1,ep +#else + ld.w 0[sp],r29 + ld.w 4[sp],r28 + ld.w 8[sp],r27 + ld.w 12[sp],r26 + ld.w 16[sp],r31 + addi 20,sp,sp +#endif + jmp [r31] + .size __return_r26_r31,.-__return_r26_r31 +#endif /* L_save_26c */ + +#ifdef L_save_27c + .text + .align 2 + .globl __save_r27_r31 + .type __save_r27_r31,@function + /* Allocate space and save registers 27 .. 29, 31 on the stack. */ + /* Also allocate space for the argument save area. */ + /* Called via: jalr __save_r27_r31,r10. */ +__save_r27_r31: +#ifdef __EP__ + mov ep,r1 + addi -16,sp,sp + mov sp,ep + sst.w r29,0[ep] + sst.w r28,4[ep] + sst.w r27,8[ep] + sst.w r31,12[ep] + mov r1,ep +#else + addi -16,sp,sp + st.w r29,0[sp] + st.w r28,4[sp] + st.w r27,8[sp] + st.w r31,12[sp] +#endif + jmp [r10] + .size __save_r27_r31,.-__save_r27_r31 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r27_r31. */ + .align 2 + .globl __return_r27_r31 + .type __return_r27_r31,@function +__return_r27_r31: +#ifdef __EP__ + mov ep,r1 + mov sp,ep + sld.w 0[ep],r29 + sld.w 4[ep],r28 + sld.w 8[ep],r27 + sld.w 12[ep],r31 + addi 16,sp,sp + mov r1,ep +#else + ld.w 0[sp],r29 + ld.w 4[sp],r28 + ld.w 8[sp],r27 + ld.w 12[sp],r31 + addi 16,sp,sp +#endif + jmp [r31] + .size __return_r27_r31,.-__return_r27_r31 +#endif /* L_save_27c */ + +#ifdef L_save_28c + .text + .align 2 + .globl __save_r28_r31 + .type __save_r28_r31,@function + /* Allocate space and save registers 28 .. 29, 31 on the stack. */ + /* Also allocate space for the argument save area. */ + /* Called via: jalr __save_r28_r31,r10. */ +__save_r28_r31: + addi -12,sp,sp + st.w r29,0[sp] + st.w r28,4[sp] + st.w r31,8[sp] + jmp [r10] + .size __save_r28_r31,.-__save_r28_r31 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r28_r31. */ + .align 2 + .globl __return_r28_r31 + .type __return_r28_r31,@function +__return_r28_r31: + ld.w 0[sp],r29 + ld.w 4[sp],r28 + ld.w 8[sp],r31 + addi 12,sp,sp + jmp [r31] + .size __return_r28_r31,.-__return_r28_r31 +#endif /* L_save_28c */ + +#ifdef L_save_29c + .text + .align 2 + .globl __save_r29_r31 + .type __save_r29_r31,@function + /* Allocate space and save registers 29 & 31 on the stack. */ + /* Also allocate space for the argument save area. */ + /* Called via: jalr __save_r29_r31,r10. */ +__save_r29_r31: + addi -8,sp,sp + st.w r29,0[sp] + st.w r31,4[sp] + jmp [r10] + .size __save_r29_r31,.-__save_r29_r31 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r29_r31. */ + .align 2 + .globl __return_r29_r31 + .type __return_r29_r31,@function +__return_r29_r31: + ld.w 0[sp],r29 + ld.w 4[sp],r31 + addi 8,sp,sp + jmp [r31] + .size __return_r29_r31,.-__return_r29_r31 +#endif /* L_save_29c */ + +#ifdef L_save_31c + .text + .align 2 + .globl __save_r31 + .type __save_r31,@function + /* Allocate space and save register 31 on the stack. */ + /* Also allocate space for the argument save area. */ + /* Called via: jalr __save_r31,r10. */ +__save_r31: + addi -4,sp,sp + st.w r31,0[sp] + jmp [r10] + .size __save_r31,.-__save_r31 + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: jr __return_r31. */ + .align 2 + .globl __return_r31 + .type __return_r31,@function +__return_r31: + ld.w 0[sp],r31 + addi 4,sp,sp + jmp [r31] + .size __return_r31,.-__return_r31 +#endif /* L_save_31c */ + +#ifdef L_save_interrupt + .text + .align 2 + .globl __save_interrupt + .type __save_interrupt,@function + /* Save registers r1, r4 on stack and load up with expected values. */ + /* Note, 20 bytes of stack have already been allocated. */ + /* Called via: jalr __save_interrupt,r10. */ +__save_interrupt: + /* add -20,sp ; st.w r11,16[sp] ; st.w r10,12[sp] ; */ + st.w ep,0[sp] + st.w gp,4[sp] + st.w r1,8[sp] + movhi hi(__ep),r0,ep + movea lo(__ep),ep,ep + movhi hi(__gp),r0,gp + movea lo(__gp),gp,gp + jmp [r10] + .size __save_interrupt,.-__save_interrupt + + /* Restore saved registers, deallocate stack and return from the interrupt. */ + /* Called via: jr __return_interrupt. */ + .align 2 + .globl __return_interrupt + .type __return_interrupt,@function +__return_interrupt: + ld.w 0[sp],ep + ld.w 4[sp],gp + ld.w 8[sp],r1 + ld.w 12[sp],r10 + ld.w 16[sp],r11 + addi 20,sp,sp + reti + .size __return_interrupt,.-__return_interrupt +#endif /* L_save_interrupt */ + +#ifdef L_save_all_interrupt + .text + .align 2 + .globl __save_all_interrupt + .type __save_all_interrupt,@function + /* Save all registers except for those saved in __save_interrupt. */ + /* Allocate enough stack for all of the registers & 16 bytes of space. */ + /* Called via: jalr __save_all_interrupt,r10. */ +__save_all_interrupt: + addi -104,sp,sp +#ifdef __EP__ + mov ep,r1 + mov sp,ep + sst.w r31,100[ep] + sst.w r2,96[ep] + sst.w gp,92[ep] + sst.w r6,88[ep] + sst.w r7,84[ep] + sst.w r8,80[ep] + sst.w r9,76[ep] + sst.w r11,72[ep] + sst.w r12,68[ep] + sst.w r13,64[ep] + sst.w r14,60[ep] + sst.w r15,56[ep] + sst.w r16,52[ep] + sst.w r17,48[ep] + sst.w r18,44[ep] + sst.w r19,40[ep] + sst.w r20,36[ep] + sst.w r21,32[ep] + sst.w r22,28[ep] + sst.w r23,24[ep] + sst.w r24,20[ep] + sst.w r25,16[ep] + sst.w r26,12[ep] + sst.w r27,8[ep] + sst.w r28,4[ep] + sst.w r29,0[ep] + mov r1,ep +#else + st.w r31,100[sp] + st.w r2,96[sp] + st.w gp,92[sp] + st.w r6,88[sp] + st.w r7,84[sp] + st.w r8,80[sp] + st.w r9,76[sp] + st.w r11,72[sp] + st.w r12,68[sp] + st.w r13,64[sp] + st.w r14,60[sp] + st.w r15,56[sp] + st.w r16,52[sp] + st.w r17,48[sp] + st.w r18,44[sp] + st.w r19,40[sp] + st.w r20,36[sp] + st.w r21,32[sp] + st.w r22,28[sp] + st.w r23,24[sp] + st.w r24,20[sp] + st.w r25,16[sp] + st.w r26,12[sp] + st.w r27,8[sp] + st.w r28,4[sp] + st.w r29,0[sp] +#endif + jmp [r10] + .size __save_all_interrupt,.-__save_all_interrupt + + .globl __restore_all_interrupt + .type __restore_all_interrupt,@function + /* Restore all registers saved in __save_all_interrupt and + deallocate the stack space. */ + /* Called via: jalr __restore_all_interrupt,r10. */ +__restore_all_interrupt: +#ifdef __EP__ + mov ep,r1 + mov sp,ep + sld.w 100[ep],r31 + sld.w 96[ep],r2 + sld.w 92[ep],gp + sld.w 88[ep],r6 + sld.w 84[ep],r7 + sld.w 80[ep],r8 + sld.w 76[ep],r9 + sld.w 72[ep],r11 + sld.w 68[ep],r12 + sld.w 64[ep],r13 + sld.w 60[ep],r14 + sld.w 56[ep],r15 + sld.w 52[ep],r16 + sld.w 48[ep],r17 + sld.w 44[ep],r18 + sld.w 40[ep],r19 + sld.w 36[ep],r20 + sld.w 32[ep],r21 + sld.w 28[ep],r22 + sld.w 24[ep],r23 + sld.w 20[ep],r24 + sld.w 16[ep],r25 + sld.w 12[ep],r26 + sld.w 8[ep],r27 + sld.w 4[ep],r28 + sld.w 0[ep],r29 + mov r1,ep +#else + ld.w 100[sp],r31 + ld.w 96[sp],r2 + ld.w 92[sp],gp + ld.w 88[sp],r6 + ld.w 84[sp],r7 + ld.w 80[sp],r8 + ld.w 76[sp],r9 + ld.w 72[sp],r11 + ld.w 68[sp],r12 + ld.w 64[sp],r13 + ld.w 60[sp],r14 + ld.w 56[sp],r15 + ld.w 52[sp],r16 + ld.w 48[sp],r17 + ld.w 44[sp],r18 + ld.w 40[sp],r19 + ld.w 36[sp],r20 + ld.w 32[sp],r21 + ld.w 28[sp],r22 + ld.w 24[sp],r23 + ld.w 20[sp],r24 + ld.w 16[sp],r25 + ld.w 12[sp],r26 + ld.w 8[sp],r27 + ld.w 4[sp],r28 + ld.w 0[sp],r29 +#endif + addi 104,sp,sp + jmp [r10] + .size __restore_all_interrupt,.-__restore_all_interrupt +#endif /* L_save_all_interrupt */ + +#if defined(__v850e__) || defined(__v850e1__) || defined(__v850e2__) || defined(__v850e2v3__) +#ifdef L_callt_save_r2_r29 + /* Put these functions into the call table area. */ + .call_table_text + + /* Allocate space and save registers 2, 20 .. 29 on the stack. */ + /* Called via: callt ctoff(__callt_save_r2_r29). */ + .align 2 +.L_save_r2_r29: + add -4, sp + st.w r2, 0[sp] + prepare {r20 - r29}, 0 + ctret + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: callt ctoff(__callt_return_r2_r29). */ + .align 2 +.L_return_r2_r29: + dispose 0, {r20-r29} + ld.w 0[sp], r2 + add 4, sp + jmp [r31] + + /* Place the offsets of the start of these routines into the call table. */ + .call_table_data + + .global __callt_save_r2_r29 + .type __callt_save_r2_r29,@function +__callt_save_r2_r29: .short ctoff(.L_save_r2_r29) + + .global __callt_return_r2_r29 + .type __callt_return_r2_r29,@function +__callt_return_r2_r29: .short ctoff(.L_return_r2_r29) + +#endif /* L_callt_save_r2_r29. */ + +#ifdef L_callt_save_r2_r31 + /* Put these functions into the call table area. */ + .call_table_text + + /* Allocate space and save registers 2 and 20 .. 29, 31 on the stack. */ + /* Also allocate space for the argument save area. */ + /* Called via: callt ctoff(__callt_save_r2_r31). */ + .align 2 +.L_save_r2_r31: + add -4, sp + st.w r2, 0[sp] + prepare {r20 - r29, r31}, 0 + ctret + + /* Restore saved registers, deallocate stack and return to the user. */ + /* Called via: callt ctoff(__callt_return_r2_r31). */ + .align 2 +.L_return_r2_r31: + dispose 0, {r20 - r29, r31} + ld.w 0[sp], r2 + addi 4, sp, sp + jmp [r31] + + /* Place the offsets of the start of these routines into the call table. */ + .call_table_data + + .global __callt_save_r2_r31 + .type __callt_save_r2_r31,@function +__callt_save_r2_r31: .short ctoff(.L_save_r2_r31) + + .global __callt_return_r2_r31 + .type __callt_return_r2_r31,@function +__callt_return_r2_r31: .short ctoff(.L_return_r2_r31) + +#endif /* L_callt_save_r2_r31 */ + +#ifdef L_callt_save_interrupt + /* Put these functions into the call table area. */ + .call_table_text + + /* Save registers r1, ep, gp, r10 on stack and load up with expected values. */ + /* Called via: callt ctoff(__callt_save_interrupt). */ + .align 2 +.L_save_interrupt: + /* SP has already been moved before callt ctoff(_save_interrupt). */ + /* R1,R10,R11,ctpc,ctpsw has alread been saved bofore callt ctoff(_save_interrupt). */ + /* addi -28, sp, sp */ + /* st.w r1, 24[sp] */ + /* st.w r10, 12[sp] */ + /* st.w r11, 16[sp] */ + /* stsr ctpc, r10 */ + /* st.w r10, 20[sp] */ + /* stsr ctpsw, r10 */ + /* st.w r10, 24[sp] */ + st.w ep, 0[sp] + st.w gp, 4[sp] + st.w r1, 8[sp] + mov hilo(__ep),ep + mov hilo(__gp),gp + ctret + + .call_table_text + /* Restore saved registers, deallocate stack and return from the interrupt. */ + /* Called via: callt ctoff(__callt_restore_interrupt). */ + .align 2 + .globl __return_interrupt + .type __return_interrupt,@function +.L_return_interrupt: + ld.w 24[sp], r1 + ldsr r1, ctpsw + ld.w 20[sp], r1 + ldsr r1, ctpc + ld.w 16[sp], r11 + ld.w 12[sp], r10 + ld.w 8[sp], r1 + ld.w 4[sp], gp + ld.w 0[sp], ep + addi 28, sp, sp + reti + + /* Place the offsets of the start of these routines into the call table. */ + .call_table_data + + .global __callt_save_interrupt + .type __callt_save_interrupt,@function +__callt_save_interrupt: .short ctoff(.L_save_interrupt) + + .global __callt_return_interrupt + .type __callt_return_interrupt,@function +__callt_return_interrupt: .short ctoff(.L_return_interrupt) + +#endif /* L_callt_save_interrupt */ + +#ifdef L_callt_save_all_interrupt + /* Put these functions into the call table area. */ + .call_table_text + + /* Save all registers except for those saved in __save_interrupt. */ + /* Allocate enough stack for all of the registers & 16 bytes of space. */ + /* Called via: callt ctoff(__callt_save_all_interrupt). */ + .align 2 +.L_save_all_interrupt: + addi -60, sp, sp +#ifdef __EP__ + mov ep, r1 + mov sp, ep + sst.w r2, 56[ep] + sst.w r5, 52[ep] + sst.w r6, 48[ep] + sst.w r7, 44[ep] + sst.w r8, 40[ep] + sst.w r9, 36[ep] + sst.w r11, 32[ep] + sst.w r12, 28[ep] + sst.w r13, 24[ep] + sst.w r14, 20[ep] + sst.w r15, 16[ep] + sst.w r16, 12[ep] + sst.w r17, 8[ep] + sst.w r18, 4[ep] + sst.w r19, 0[ep] + mov r1, ep +#else + st.w r2, 56[sp] + st.w r5, 52[sp] + st.w r6, 48[sp] + st.w r7, 44[sp] + st.w r8, 40[sp] + st.w r9, 36[sp] + st.w r11, 32[sp] + st.w r12, 28[sp] + st.w r13, 24[sp] + st.w r14, 20[sp] + st.w r15, 16[sp] + st.w r16, 12[sp] + st.w r17, 8[sp] + st.w r18, 4[sp] + st.w r19, 0[sp] +#endif + prepare {r20 - r29, r31}, 0 + ctret + + /* Restore all registers saved in __save_all_interrupt + deallocate the stack space. */ + /* Called via: callt ctoff(__callt_restore_all_interrupt). */ + .align 2 +.L_restore_all_interrupt: + dispose 0, {r20 - r29, r31} +#ifdef __EP__ + mov ep, r1 + mov sp, ep + sld.w 0 [ep], r19 + sld.w 4 [ep], r18 + sld.w 8 [ep], r17 + sld.w 12[ep], r16 + sld.w 16[ep], r15 + sld.w 20[ep], r14 + sld.w 24[ep], r13 + sld.w 28[ep], r12 + sld.w 32[ep], r11 + sld.w 36[ep], r9 + sld.w 40[ep], r8 + sld.w 44[ep], r7 + sld.w 48[ep], r6 + sld.w 52[ep], r5 + sld.w 56[ep], r2 + mov r1, ep +#else + ld.w 0 [sp], r19 + ld.w 4 [sp], r18 + ld.w 8 [sp], r17 + ld.w 12[sp], r16 + ld.w 16[sp], r15 + ld.w 20[sp], r14 + ld.w 24[sp], r13 + ld.w 28[sp], r12 + ld.w 32[sp], r11 + ld.w 36[sp], r9 + ld.w 40[sp], r8 + ld.w 44[sp], r7 + ld.w 48[sp], r6 + ld.w 52[sp], r5 + ld.w 56[sp], r2 +#endif + addi 60, sp, sp + ctret + + /* Place the offsets of the start of these routines into the call table. */ + .call_table_data + + .global __callt_save_all_interrupt + .type __callt_save_all_interrupt,@function +__callt_save_all_interrupt: .short ctoff(.L_save_all_interrupt) + + .global __callt_restore_all_interrupt + .type __callt_restore_all_interrupt,@function +__callt_restore_all_interrupt: .short ctoff(.L_restore_all_interrupt) + +#endif /* L_callt_save_all_interrupt */ + + +#define MAKE_CALLT_FUNCS( START ) \ + .call_table_text ;\ + .align 2 ;\ + /* Allocate space and save registers START .. r29 on the stack. */ ;\ + /* Called via: callt ctoff(__callt_save_START_r29). */ ;\ +.L_save_##START##_r29: ;\ + prepare { START - r29 }, 0 ;\ + ctret ;\ + ;\ + /* Restore saved registers, deallocate stack and return. */ ;\ + /* Called via: callt ctoff(__return_START_r29). */ ;\ + .align 2 ;\ +.L_return_##START##_r29: ;\ + dispose 0, { START - r29 }, r31 ;\ + ;\ + /* Place the offsets of the start of these funcs into the call table. */;\ + .call_table_data ;\ + ;\ + .global __callt_save_##START##_r29 ;\ + .type __callt_save_##START##_r29,@function ;\ +__callt_save_##START##_r29: .short ctoff(.L_save_##START##_r29 ) ;\ + ;\ + .global __callt_return_##START##_r29 ;\ + .type __callt_return_##START##_r29,@function ;\ +__callt_return_##START##_r29: .short ctoff(.L_return_##START##_r29 ) + + +#define MAKE_CALLT_CFUNCS( START ) \ + .call_table_text ;\ + .align 2 ;\ + /* Allocate space and save registers START .. r31 on the stack. */ ;\ + /* Called via: callt ctoff(__callt_save_START_r31c). */ ;\ +.L_save_##START##_r31c: ;\ + prepare { START - r29, r31}, 0 ;\ + ctret ;\ + ;\ + /* Restore saved registers, deallocate stack and return. */ ;\ + /* Called via: callt ctoff(__return_START_r31c). */ ;\ + .align 2 ;\ +.L_return_##START##_r31c: ;\ + dispose 0, { START - r29, r31}, r31 ;\ + ;\ + /* Place the offsets of the start of these funcs into the call table. */;\ + .call_table_data ;\ + ;\ + .global __callt_save_##START##_r31c ;\ + .type __callt_save_##START##_r31c,@function ;\ +__callt_save_##START##_r31c: .short ctoff(.L_save_##START##_r31c ) ;\ + ;\ + .global __callt_return_##START##_r31c ;\ + .type __callt_return_##START##_r31c,@function ;\ +__callt_return_##START##_r31c: .short ctoff(.L_return_##START##_r31c ) + + +#ifdef L_callt_save_20 + MAKE_CALLT_FUNCS (r20) +#endif +#ifdef L_callt_save_21 + MAKE_CALLT_FUNCS (r21) +#endif +#ifdef L_callt_save_22 + MAKE_CALLT_FUNCS (r22) +#endif +#ifdef L_callt_save_23 + MAKE_CALLT_FUNCS (r23) +#endif +#ifdef L_callt_save_24 + MAKE_CALLT_FUNCS (r24) +#endif +#ifdef L_callt_save_25 + MAKE_CALLT_FUNCS (r25) +#endif +#ifdef L_callt_save_26 + MAKE_CALLT_FUNCS (r26) +#endif +#ifdef L_callt_save_27 + MAKE_CALLT_FUNCS (r27) +#endif +#ifdef L_callt_save_28 + MAKE_CALLT_FUNCS (r28) +#endif +#ifdef L_callt_save_29 + MAKE_CALLT_FUNCS (r29) +#endif + +#ifdef L_callt_save_20c + MAKE_CALLT_CFUNCS (r20) +#endif +#ifdef L_callt_save_21c + MAKE_CALLT_CFUNCS (r21) +#endif +#ifdef L_callt_save_22c + MAKE_CALLT_CFUNCS (r22) +#endif +#ifdef L_callt_save_23c + MAKE_CALLT_CFUNCS (r23) +#endif +#ifdef L_callt_save_24c + MAKE_CALLT_CFUNCS (r24) +#endif +#ifdef L_callt_save_25c + MAKE_CALLT_CFUNCS (r25) +#endif +#ifdef L_callt_save_26c + MAKE_CALLT_CFUNCS (r26) +#endif +#ifdef L_callt_save_27c + MAKE_CALLT_CFUNCS (r27) +#endif +#ifdef L_callt_save_28c + MAKE_CALLT_CFUNCS (r28) +#endif +#ifdef L_callt_save_29c + MAKE_CALLT_CFUNCS (r29) +#endif + + +#ifdef L_callt_save_31c + .call_table_text + .align 2 + /* Allocate space and save register r31 on the stack. */ + /* Called via: callt ctoff(__callt_save_r31c). */ +.L_callt_save_r31c: + prepare {r31}, 0 + ctret + + /* Restore saved registers, deallocate stack and return. */ + /* Called via: callt ctoff(__return_r31c). */ + .align 2 +.L_callt_return_r31c: + dispose 0, {r31}, r31 + + /* Place the offsets of the start of these funcs into the call table. */ + .call_table_data + + .global __callt_save_r31c + .type __callt_save_r31c,@function +__callt_save_r31c: .short ctoff(.L_callt_save_r31c) + + .global __callt_return_r31c + .type __callt_return_r31c,@function +__callt_return_r31c: .short ctoff(.L_callt_return_r31c) +#endif + +#endif /* __v850e__ */ + +/* libgcc2 routines for NEC V850. */ +/* Double Integer Arithmetical Operation. */ + +#ifdef L_negdi2 + .text + .global ___negdi2 + .type ___negdi2, @function +___negdi2: + not r6, r10 + add 1, r10 + setf l, r6 + not r7, r11 + add r6, r11 + jmp [lp] + + .size ___negdi2,.-___negdi2 +#endif + +#ifdef L_cmpdi2 + .text + .global ___cmpdi2 + .type ___cmpdi2,@function +___cmpdi2: + # Signed comparison bitween each high word. + cmp r9, r7 + be .L_cmpdi_cmp_low + setf ge, r10 + setf gt, r6 + add r6, r10 + jmp [lp] +.L_cmpdi_cmp_low: + # Unsigned comparigon bitween each low word. + cmp r8, r6 + setf nl, r10 + setf h, r6 + add r6, r10 + jmp [lp] + .size ___cmpdi2, . - ___cmpdi2 +#endif + +#ifdef L_ucmpdi2 + .text + .global ___ucmpdi2 + .type ___ucmpdi2,@function +___ucmpdi2: + cmp r9, r7 # Check if each high word are same. + bne .L_ucmpdi_check_psw + cmp r8, r6 # Compare the word. +.L_ucmpdi_check_psw: + setf nl, r10 # + setf h, r6 # + add r6, r10 # Add the result of comparison NL and comparison H. + jmp [lp] + .size ___ucmpdi2, . - ___ucmpdi2 +#endif + +#ifdef L_muldi3 + .text + .global ___muldi3 + .type ___muldi3,@function +___muldi3: +#ifdef __v850__ + jarl __save_r26_r31, r10 + addi 16, sp, sp + mov r6, r28 + shr 15, r28 + movea lo(32767), r0, r14 + and r14, r28 + mov r8, r10 + shr 15, r10 + and r14, r10 + mov r6, r19 + shr 30, r19 + mov r7, r12 + shl 2, r12 + or r12, r19 + and r14, r19 + mov r8, r13 + shr 30, r13 + mov r9, r12 + shl 2, r12 + or r12, r13 + and r14, r13 + mov r7, r11 + shr 13, r11 + and r14, r11 + mov r9, r31 + shr 13, r31 + and r14, r31 + mov r7, r29 + shr 28, r29 + and r14, r29 + mov r9, r12 + shr 28, r12 + and r14, r12 + and r14, r6 + and r14, r8 + mov r6, r14 + mulh r8, r14 + mov r6, r16 + mulh r10, r16 + mov r6, r18 + mulh r13, r18 + mov r6, r15 + mulh r31, r15 + mulh r12, r6 + mov r28, r17 + mulh r10, r17 + add -16, sp + mov r28, r12 + mulh r8, r12 + add r17, r18 + mov r28, r17 + mulh r31, r17 + add r12, r16 + mov r28, r12 + mulh r13, r12 + add r17, r6 + mov r19, r17 + add r12, r15 + mov r19, r12 + mulh r8, r12 + mulh r10, r17 + add r12, r18 + mov r19, r12 + mulh r13, r12 + add r17, r15 + mov r11, r13 + mulh r8, r13 + add r12, r6 + mov r11, r12 + mulh r10, r12 + add r13, r15 + mulh r29, r8 + add r12, r6 + mov r16, r13 + shl 15, r13 + add r14, r13 + mov r18, r12 + shl 30, r12 + mov r13, r26 + add r12, r26 + shr 15, r14 + movhi hi(131071), r0, r12 + movea lo(131071), r12, r13 + and r13, r14 + mov r16, r12 + and r13, r12 + add r12, r14 + mov r18, r12 + shl 15, r12 + and r13, r12 + add r12, r14 + shr 17, r14 + shr 17, r16 + add r14, r16 + shl 13, r15 + shr 2, r18 + add r18, r15 + add r15, r16 + mov r16, r27 + add r8, r6 + shl 28, r6 + add r6, r27 + mov r26, r10 + mov r27, r11 + jr __return_r26_r31 +#else /* defined(__v850e__) */ + /* (Ahi << 32 + Alo) * (Bhi << 32 + Blo) */ + /* r7 r6 r9 r8 */ + mov r8, r10 + mulu r7, r8, r0 /* Ahi * Blo */ + mulu r6, r9, r0 /* Alo * Bhi */ + mulu r6, r10, r11 /* Alo * Blo */ + add r8, r11 + add r9, r11 + jmp [r31] +#endif /* defined(__v850e__) */ + .size ___muldi3, . - ___muldi3 +#endif + diff --git a/libgcc/config/v850/t-v850 b/libgcc/config/v850/t-v850 new file mode 100644 index 00000000000..b61703ace09 --- /dev/null +++ b/libgcc/config/v850/t-v850 @@ -0,0 +1,60 @@ +LIB1ASMSRC = v850/lib1funcs.S +LIB1ASMFUNCS = _mulsi3 \ + _divsi3 \ + _udivsi3 \ + _modsi3 \ + _umodsi3 \ + _save_2 \ + _save_20 \ + _save_21 \ + _save_22 \ + _save_23 \ + _save_24 \ + _save_25 \ + _save_26 \ + _save_27 \ + _save_28 \ + _save_29 \ + _save_2c \ + _save_20c \ + _save_21c \ + _save_22c \ + _save_23c \ + _save_24c \ + _save_25c \ + _save_26c \ + _save_27c \ + _save_28c \ + _save_29c \ + _save_31c \ + _save_interrupt \ + _save_all_interrupt \ + _callt_save_20 \ + _callt_save_21 \ + _callt_save_22 \ + _callt_save_23 \ + _callt_save_24 \ + _callt_save_25 \ + _callt_save_26 \ + _callt_save_27 \ + _callt_save_28 \ + _callt_save_29 \ + _callt_save_20c \ + _callt_save_21c \ + _callt_save_22c \ + _callt_save_23c \ + _callt_save_24c \ + _callt_save_25c \ + _callt_save_26c \ + _callt_save_27c \ + _callt_save_28c \ + _callt_save_29c \ + _callt_save_31c \ + _callt_save_interrupt \ + _callt_save_all_interrupt \ + _callt_save_r2_r29 \ + _callt_save_r2_r31 \ + _negdi2 \ + _cmpdi2 \ + _ucmpdi2 \ + _muldi3 diff --git a/libgcc/config/vax/lib1funcs.S b/libgcc/config/vax/lib1funcs.S new file mode 100644 index 00000000000..1d57b56dad9 --- /dev/null +++ b/libgcc/config/vax/lib1funcs.S @@ -0,0 +1,92 @@ +/* Copyright (C) 2009 Free Software Foundation, Inc. + This file is part of GCC. + Contributed by Maciej W. Rozycki . + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifdef L_udivsi3 + .text + .globl __udivsi3 + .type __udivsi3, @function +__udivsi3: + .word 0 + movl 8(%ap), %r1 + blss 0f /* Check bit #31 of divisor. */ + movl 4(%ap), %r2 + blss 1f /* Check bit #31 of dividend. */ + + /* Both zero, do a standard division. */ + + divl3 %r1, %r2, %r0 + ret + + /* MSB of divisor set, only 1 or 0 may result. */ +0: + decl %r1 + clrl %r0 + cmpl %r1, 4(%ap) + adwc $0, %r0 + ret + + /* MSB of dividend set, do an extended division. */ +1: + clrl %r3 + ediv %r1, %r2, %r0, %r3 + ret + .size __udivsi3, . - __udivsi3 + .previous +#endif + +#ifdef L_umodsi3 + .text + .globl __umodsi3 + .type __umodsi3, @function +__umodsi3: + .word 0 + movl 8(%ap), %r1 + blss 0f /* Check bit #31 of divisor. */ + movl 4(%ap), %r2 + blss 1f /* Check bit #31 of dividend. */ + + /* Both zero, do a standard division. */ + + divl3 %r1, %r2, %r0 + mull2 %r0, %r1 + subl3 %r1, %r2, %r0 + ret + + /* MSB of divisor set, subtract the divisor at most once. */ +0: + movl 4(%ap), %r2 + clrl %r0 + cmpl %r2, %r1 + sbwc $0, %r0 + bicl2 %r0, %r1 + subl3 %r1, %r2, %r0 + ret + + /* MSB of dividend set, do an extended division. */ +1: + clrl %r3 + ediv %r1, %r2, %r3, %r0 + ret + .size __umodsi3, . - __umodsi3 + .previous +#endif diff --git a/libgcc/config/vax/t-linux b/libgcc/config/vax/t-linux new file mode 100644 index 00000000000..17929c8717c --- /dev/null +++ b/libgcc/config/vax/t-linux @@ -0,0 +1,2 @@ +LIB1ASMSRC = vax/lib1funcs.S +LIB1ASMFUNCS = _udivsi3 _umodsi3 diff --git a/libgcc/config/xtensa/ieee754-df.S b/libgcc/config/xtensa/ieee754-df.S new file mode 100644 index 00000000000..9b46889bdc2 --- /dev/null +++ b/libgcc/config/xtensa/ieee754-df.S @@ -0,0 +1,2388 @@ +/* IEEE-754 double-precision functions for Xtensa + Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. + Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifdef __XTENSA_EB__ +#define xh a2 +#define xl a3 +#define yh a4 +#define yl a5 +#else +#define xh a3 +#define xl a2 +#define yh a5 +#define yl a4 +#endif + +/* Warning! The branch displacements for some Xtensa branch instructions + are quite small, and this code has been carefully laid out to keep + branch targets in range. If you change anything, be sure to check that + the assembler is not relaxing anything to branch over a jump. */ + +#ifdef L_negdf2 + + .align 4 + .global __negdf2 + .type __negdf2, @function +__negdf2: + leaf_entry sp, 16 + movi a4, 0x80000000 + xor xh, xh, a4 + leaf_return + +#endif /* L_negdf2 */ + +#ifdef L_addsubdf3 + + /* Addition */ +__adddf3_aux: + + /* Handle NaNs and Infinities. (This code is placed before the + start of the function just to keep it in range of the limited + branch displacements.) */ + +.Ladd_xnan_or_inf: + /* If y is neither Infinity nor NaN, return x. */ + bnall yh, a6, 1f + /* If x is a NaN, return it. Otherwise, return y. */ + slli a7, xh, 12 + or a7, a7, xl + beqz a7, .Ladd_ynan_or_inf +1: leaf_return + +.Ladd_ynan_or_inf: + /* Return y. */ + mov xh, yh + mov xl, yl + leaf_return + +.Ladd_opposite_signs: + /* Operand signs differ. Do a subtraction. */ + slli a7, a6, 11 + xor yh, yh, a7 + j .Lsub_same_sign + + .align 4 + .global __adddf3 + .type __adddf3, @function +__adddf3: + leaf_entry sp, 16 + movi a6, 0x7ff00000 + + /* Check if the two operands have the same sign. */ + xor a7, xh, yh + bltz a7, .Ladd_opposite_signs + +.Ladd_same_sign: + /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ + ball xh, a6, .Ladd_xnan_or_inf + ball yh, a6, .Ladd_ynan_or_inf + + /* Compare the exponents. The smaller operand will be shifted + right by the exponent difference and added to the larger + one. */ + extui a7, xh, 20, 12 + extui a8, yh, 20, 12 + bltu a7, a8, .Ladd_shiftx + +.Ladd_shifty: + /* Check if the smaller (or equal) exponent is zero. */ + bnone yh, a6, .Ladd_yexpzero + + /* Replace yh sign/exponent with 0x001. */ + or yh, yh, a6 + slli yh, yh, 11 + srli yh, yh, 11 + +.Ladd_yexpdiff: + /* Compute the exponent difference. Optimize for difference < 32. */ + sub a10, a7, a8 + bgeui a10, 32, .Ladd_bigshifty + + /* Shift yh/yl right by the exponent difference. Any bits that are + shifted out of yl are saved in a9 for rounding the result. */ + ssr a10 + movi a9, 0 + src a9, yl, a9 + src yl, yh, yl + srl yh, yh + +.Ladd_addy: + /* Do the 64-bit addition. */ + add xl, xl, yl + add xh, xh, yh + bgeu xl, yl, 1f + addi xh, xh, 1 +1: + /* Check if the add overflowed into the exponent. */ + extui a10, xh, 20, 12 + beq a10, a7, .Ladd_round + mov a8, a7 + j .Ladd_carry + +.Ladd_yexpzero: + /* y is a subnormal value. Replace its sign/exponent with zero, + i.e., no implicit "1.0", and increment the apparent exponent + because subnormals behave as if they had the minimum (nonzero) + exponent. Test for the case when both exponents are zero. */ + slli yh, yh, 12 + srli yh, yh, 12 + bnone xh, a6, .Ladd_bothexpzero + addi a8, a8, 1 + j .Ladd_yexpdiff + +.Ladd_bothexpzero: + /* Both exponents are zero. Handle this as a special case. There + is no need to shift or round, and the normal code for handling + a carry into the exponent field will not work because it + assumes there is an implicit "1.0" that needs to be added. */ + add xl, xl, yl + add xh, xh, yh + bgeu xl, yl, 1f + addi xh, xh, 1 +1: leaf_return + +.Ladd_bigshifty: + /* Exponent difference > 64 -- just return the bigger value. */ + bgeui a10, 64, 1b + + /* Shift yh/yl right by the exponent difference. Any bits that are + shifted out are saved in a9 for rounding the result. */ + ssr a10 + sll a11, yl /* lost bits shifted out of yl */ + src a9, yh, yl + srl yl, yh + movi yh, 0 + beqz a11, .Ladd_addy + or a9, a9, a10 /* any positive, nonzero value will work */ + j .Ladd_addy + +.Ladd_xexpzero: + /* Same as "yexpzero" except skip handling the case when both + exponents are zero. */ + slli xh, xh, 12 + srli xh, xh, 12 + addi a7, a7, 1 + j .Ladd_xexpdiff + +.Ladd_shiftx: + /* Same thing as the "shifty" code, but with x and y swapped. Also, + because the exponent difference is always nonzero in this version, + the shift sequence can use SLL and skip loading a constant zero. */ + bnone xh, a6, .Ladd_xexpzero + + or xh, xh, a6 + slli xh, xh, 11 + srli xh, xh, 11 + +.Ladd_xexpdiff: + sub a10, a8, a7 + bgeui a10, 32, .Ladd_bigshiftx + + ssr a10 + sll a9, xl + src xl, xh, xl + srl xh, xh + +.Ladd_addx: + add xl, xl, yl + add xh, xh, yh + bgeu xl, yl, 1f + addi xh, xh, 1 +1: + /* Check if the add overflowed into the exponent. */ + extui a10, xh, 20, 12 + bne a10, a8, .Ladd_carry + +.Ladd_round: + /* Round up if the leftover fraction is >= 1/2. */ + bgez a9, 1f + addi xl, xl, 1 + beqz xl, .Ladd_roundcarry + + /* Check if the leftover fraction is exactly 1/2. */ + slli a9, a9, 1 + beqz a9, .Ladd_exactlyhalf +1: leaf_return + +.Ladd_bigshiftx: + /* Mostly the same thing as "bigshifty".... */ + bgeui a10, 64, .Ladd_returny + + ssr a10 + sll a11, xl + src a9, xh, xl + srl xl, xh + movi xh, 0 + beqz a11, .Ladd_addx + or a9, a9, a10 + j .Ladd_addx + +.Ladd_returny: + mov xh, yh + mov xl, yl + leaf_return + +.Ladd_carry: + /* The addition has overflowed into the exponent field, so the + value needs to be renormalized. The mantissa of the result + can be recovered by subtracting the original exponent and + adding 0x100000 (which is the explicit "1.0" for the + mantissa of the non-shifted operand -- the "1.0" for the + shifted operand was already added). The mantissa can then + be shifted right by one bit. The explicit "1.0" of the + shifted mantissa then needs to be replaced by the exponent, + incremented by one to account for the normalizing shift. + It is faster to combine these operations: do the shift first + and combine the additions and subtractions. If x is the + original exponent, the result is: + shifted mantissa - (x << 19) + (1 << 19) + (x << 20) + or: + shifted mantissa + ((x + 1) << 19) + Note that the exponent is incremented here by leaving the + explicit "1.0" of the mantissa in the exponent field. */ + + /* Shift xh/xl right by one bit. Save the lsb of xl. */ + mov a10, xl + ssai 1 + src xl, xh, xl + srl xh, xh + + /* See explanation above. The original exponent is in a8. */ + addi a8, a8, 1 + slli a8, a8, 19 + add xh, xh, a8 + + /* Return an Infinity if the exponent overflowed. */ + ball xh, a6, .Ladd_infinity + + /* Same thing as the "round" code except the msb of the leftover + fraction is bit 0 of a10, with the rest of the fraction in a9. */ + bbci.l a10, 0, 1f + addi xl, xl, 1 + beqz xl, .Ladd_roundcarry + beqz a9, .Ladd_exactlyhalf +1: leaf_return + +.Ladd_infinity: + /* Clear the mantissa. */ + movi xl, 0 + srli xh, xh, 20 + slli xh, xh, 20 + + /* The sign bit may have been lost in a carry-out. Put it back. */ + slli a8, a8, 1 + or xh, xh, a8 + leaf_return + +.Ladd_exactlyhalf: + /* Round down to the nearest even value. */ + srli xl, xl, 1 + slli xl, xl, 1 + leaf_return + +.Ladd_roundcarry: + /* xl is always zero when the rounding increment overflows, so + there's no need to round it to an even value. */ + addi xh, xh, 1 + /* Overflow to the exponent is OK. */ + leaf_return + + + /* Subtraction */ +__subdf3_aux: + + /* Handle NaNs and Infinities. (This code is placed before the + start of the function just to keep it in range of the limited + branch displacements.) */ + +.Lsub_xnan_or_inf: + /* If y is neither Infinity nor NaN, return x. */ + bnall yh, a6, 1f + /* Both x and y are either NaN or Inf, so the result is NaN. */ + movi a4, 0x80000 /* make it a quiet NaN */ + or xh, xh, a4 +1: leaf_return + +.Lsub_ynan_or_inf: + /* Negate y and return it. */ + slli a7, a6, 11 + xor xh, yh, a7 + mov xl, yl + leaf_return + +.Lsub_opposite_signs: + /* Operand signs differ. Do an addition. */ + slli a7, a6, 11 + xor yh, yh, a7 + j .Ladd_same_sign + + .align 4 + .global __subdf3 + .type __subdf3, @function +__subdf3: + leaf_entry sp, 16 + movi a6, 0x7ff00000 + + /* Check if the two operands have the same sign. */ + xor a7, xh, yh + bltz a7, .Lsub_opposite_signs + +.Lsub_same_sign: + /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ + ball xh, a6, .Lsub_xnan_or_inf + ball yh, a6, .Lsub_ynan_or_inf + + /* Compare the operands. In contrast to addition, the entire + value matters here. */ + extui a7, xh, 20, 11 + extui a8, yh, 20, 11 + bltu xh, yh, .Lsub_xsmaller + beq xh, yh, .Lsub_compare_low + +.Lsub_ysmaller: + /* Check if the smaller (or equal) exponent is zero. */ + bnone yh, a6, .Lsub_yexpzero + + /* Replace yh sign/exponent with 0x001. */ + or yh, yh, a6 + slli yh, yh, 11 + srli yh, yh, 11 + +.Lsub_yexpdiff: + /* Compute the exponent difference. Optimize for difference < 32. */ + sub a10, a7, a8 + bgeui a10, 32, .Lsub_bigshifty + + /* Shift yh/yl right by the exponent difference. Any bits that are + shifted out of yl are saved in a9 for rounding the result. */ + ssr a10 + movi a9, 0 + src a9, yl, a9 + src yl, yh, yl + srl yh, yh + +.Lsub_suby: + /* Do the 64-bit subtraction. */ + sub xh, xh, yh + bgeu xl, yl, 1f + addi xh, xh, -1 +1: sub xl, xl, yl + + /* Subtract the leftover bits in a9 from zero and propagate any + borrow from xh/xl. */ + neg a9, a9 + beqz a9, 1f + addi a5, xh, -1 + moveqz xh, a5, xl + addi xl, xl, -1 +1: + /* Check if the subtract underflowed into the exponent. */ + extui a10, xh, 20, 11 + beq a10, a7, .Lsub_round + j .Lsub_borrow + +.Lsub_compare_low: + /* The high words are equal. Compare the low words. */ + bltu xl, yl, .Lsub_xsmaller + bltu yl, xl, .Lsub_ysmaller + /* The operands are equal. Return 0.0. */ + movi xh, 0 + movi xl, 0 +1: leaf_return + +.Lsub_yexpzero: + /* y is a subnormal value. Replace its sign/exponent with zero, + i.e., no implicit "1.0". Unless x is also a subnormal, increment + y's apparent exponent because subnormals behave as if they had + the minimum (nonzero) exponent. */ + slli yh, yh, 12 + srli yh, yh, 12 + bnone xh, a6, .Lsub_yexpdiff + addi a8, a8, 1 + j .Lsub_yexpdiff + +.Lsub_bigshifty: + /* Exponent difference > 64 -- just return the bigger value. */ + bgeui a10, 64, 1b + + /* Shift yh/yl right by the exponent difference. Any bits that are + shifted out are saved in a9 for rounding the result. */ + ssr a10 + sll a11, yl /* lost bits shifted out of yl */ + src a9, yh, yl + srl yl, yh + movi yh, 0 + beqz a11, .Lsub_suby + or a9, a9, a10 /* any positive, nonzero value will work */ + j .Lsub_suby + +.Lsub_xsmaller: + /* Same thing as the "ysmaller" code, but with x and y swapped and + with y negated. */ + bnone xh, a6, .Lsub_xexpzero + + or xh, xh, a6 + slli xh, xh, 11 + srli xh, xh, 11 + +.Lsub_xexpdiff: + sub a10, a8, a7 + bgeui a10, 32, .Lsub_bigshiftx + + ssr a10 + movi a9, 0 + src a9, xl, a9 + src xl, xh, xl + srl xh, xh + + /* Negate y. */ + slli a11, a6, 11 + xor yh, yh, a11 + +.Lsub_subx: + sub xl, yl, xl + sub xh, yh, xh + bgeu yl, xl, 1f + addi xh, xh, -1 +1: + /* Subtract the leftover bits in a9 from zero and propagate any + borrow from xh/xl. */ + neg a9, a9 + beqz a9, 1f + addi a5, xh, -1 + moveqz xh, a5, xl + addi xl, xl, -1 +1: + /* Check if the subtract underflowed into the exponent. */ + extui a10, xh, 20, 11 + bne a10, a8, .Lsub_borrow + +.Lsub_round: + /* Round up if the leftover fraction is >= 1/2. */ + bgez a9, 1f + addi xl, xl, 1 + beqz xl, .Lsub_roundcarry + + /* Check if the leftover fraction is exactly 1/2. */ + slli a9, a9, 1 + beqz a9, .Lsub_exactlyhalf +1: leaf_return + +.Lsub_xexpzero: + /* Same as "yexpzero". */ + slli xh, xh, 12 + srli xh, xh, 12 + bnone yh, a6, .Lsub_xexpdiff + addi a7, a7, 1 + j .Lsub_xexpdiff + +.Lsub_bigshiftx: + /* Mostly the same thing as "bigshifty", but with the sign bit of the + shifted value set so that the subsequent subtraction flips the + sign of y. */ + bgeui a10, 64, .Lsub_returny + + ssr a10 + sll a11, xl + src a9, xh, xl + srl xl, xh + slli xh, a6, 11 /* set sign bit of xh */ + beqz a11, .Lsub_subx + or a9, a9, a10 + j .Lsub_subx + +.Lsub_returny: + /* Negate and return y. */ + slli a7, a6, 11 + xor xh, yh, a7 + mov xl, yl + leaf_return + +.Lsub_borrow: + /* The subtraction has underflowed into the exponent field, so the + value needs to be renormalized. Shift the mantissa left as + needed to remove any leading zeros and adjust the exponent + accordingly. If the exponent is not large enough to remove + all the leading zeros, the result will be a subnormal value. */ + + slli a8, xh, 12 + beqz a8, .Lsub_xhzero + do_nsau a6, a8, a7, a11 + srli a8, a8, 12 + bge a6, a10, .Lsub_subnormal + addi a6, a6, 1 + +.Lsub_shift_lt32: + /* Shift the mantissa (a8/xl/a9) left by a6. */ + ssl a6 + src a8, a8, xl + src xl, xl, a9 + sll a9, a9 + + /* Combine the shifted mantissa with the sign and exponent, + decrementing the exponent by a6. (The exponent has already + been decremented by one due to the borrow from the subtraction, + but adding the mantissa will increment the exponent by one.) */ + srli xh, xh, 20 + sub xh, xh, a6 + slli xh, xh, 20 + add xh, xh, a8 + j .Lsub_round + +.Lsub_exactlyhalf: + /* Round down to the nearest even value. */ + srli xl, xl, 1 + slli xl, xl, 1 + leaf_return + +.Lsub_roundcarry: + /* xl is always zero when the rounding increment overflows, so + there's no need to round it to an even value. */ + addi xh, xh, 1 + /* Overflow to the exponent is OK. */ + leaf_return + +.Lsub_xhzero: + /* When normalizing the result, all the mantissa bits in the high + word are zero. Shift by "20 + (leading zero count of xl) + 1". */ + do_nsau a6, xl, a7, a11 + addi a6, a6, 21 + blt a10, a6, .Lsub_subnormal + +.Lsub_normalize_shift: + bltui a6, 32, .Lsub_shift_lt32 + + ssl a6 + src a8, xl, a9 + sll xl, a9 + movi a9, 0 + + srli xh, xh, 20 + sub xh, xh, a6 + slli xh, xh, 20 + add xh, xh, a8 + j .Lsub_round + +.Lsub_subnormal: + /* The exponent is too small to shift away all the leading zeros. + Set a6 to the current exponent (which has already been + decremented by the borrow) so that the exponent of the result + will be zero. Do not add 1 to a6 in this case, because: (1) + adding the mantissa will not increment the exponent, so there is + no need to subtract anything extra from the exponent to + compensate, and (2) the effective exponent of a subnormal is 1 + not 0 so the shift amount must be 1 smaller than normal. */ + mov a6, a10 + j .Lsub_normalize_shift + +#endif /* L_addsubdf3 */ + +#ifdef L_muldf3 + + /* Multiplication */ +#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#define XCHAL_NO_MUL 1 +#endif + +__muldf3_aux: + + /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). + (This code is placed before the start of the function just to + keep it in range of the limited branch displacements.) */ + +.Lmul_xexpzero: + /* Clear the sign bit of x. */ + slli xh, xh, 1 + srli xh, xh, 1 + + /* If x is zero, return zero. */ + or a10, xh, xl + beqz a10, .Lmul_return_zero + + /* Normalize x. Adjust the exponent in a8. */ + beqz xh, .Lmul_xh_zero + do_nsau a10, xh, a11, a12 + addi a10, a10, -11 + ssl a10 + src xh, xh, xl + sll xl, xl + movi a8, 1 + sub a8, a8, a10 + j .Lmul_xnormalized +.Lmul_xh_zero: + do_nsau a10, xl, a11, a12 + addi a10, a10, -11 + movi a8, -31 + sub a8, a8, a10 + ssl a10 + bltz a10, .Lmul_xl_srl + sll xh, xl + movi xl, 0 + j .Lmul_xnormalized +.Lmul_xl_srl: + srl xh, xl + sll xl, xl + j .Lmul_xnormalized + +.Lmul_yexpzero: + /* Clear the sign bit of y. */ + slli yh, yh, 1 + srli yh, yh, 1 + + /* If y is zero, return zero. */ + or a10, yh, yl + beqz a10, .Lmul_return_zero + + /* Normalize y. Adjust the exponent in a9. */ + beqz yh, .Lmul_yh_zero + do_nsau a10, yh, a11, a12 + addi a10, a10, -11 + ssl a10 + src yh, yh, yl + sll yl, yl + movi a9, 1 + sub a9, a9, a10 + j .Lmul_ynormalized +.Lmul_yh_zero: + do_nsau a10, yl, a11, a12 + addi a10, a10, -11 + movi a9, -31 + sub a9, a9, a10 + ssl a10 + bltz a10, .Lmul_yl_srl + sll yh, yl + movi yl, 0 + j .Lmul_ynormalized +.Lmul_yl_srl: + srl yh, yl + sll yl, yl + j .Lmul_ynormalized + +.Lmul_return_zero: + /* Return zero with the appropriate sign bit. */ + srli xh, a7, 31 + slli xh, xh, 31 + movi xl, 0 + j .Lmul_done + +.Lmul_xnan_or_inf: + /* If y is zero, return NaN. */ + bnez yl, 1f + slli a8, yh, 1 + bnez a8, 1f + movi a4, 0x80000 /* make it a quiet NaN */ + or xh, xh, a4 + j .Lmul_done +1: + /* If y is NaN, return y. */ + bnall yh, a6, .Lmul_returnx + slli a8, yh, 12 + or a8, a8, yl + beqz a8, .Lmul_returnx + +.Lmul_returny: + mov xh, yh + mov xl, yl + +.Lmul_returnx: + /* Set the sign bit and return. */ + extui a7, a7, 31, 1 + slli xh, xh, 1 + ssai 1 + src xh, a7, xh + j .Lmul_done + +.Lmul_ynan_or_inf: + /* If x is zero, return NaN. */ + bnez xl, .Lmul_returny + slli a8, xh, 1 + bnez a8, .Lmul_returny + movi a7, 0x80000 /* make it a quiet NaN */ + or xh, yh, a7 + j .Lmul_done + + .align 4 + .global __muldf3 + .type __muldf3, @function +__muldf3: +#if __XTENSA_CALL0_ABI__ + leaf_entry sp, 32 + addi sp, sp, -32 + s32i a12, sp, 16 + s32i a13, sp, 20 + s32i a14, sp, 24 + s32i a15, sp, 28 +#elif XCHAL_NO_MUL + /* This is not really a leaf function; allocate enough stack space + to allow CALL12s to a helper function. */ + leaf_entry sp, 64 +#else + leaf_entry sp, 32 +#endif + movi a6, 0x7ff00000 + + /* Get the sign of the result. */ + xor a7, xh, yh + + /* Check for NaN and infinity. */ + ball xh, a6, .Lmul_xnan_or_inf + ball yh, a6, .Lmul_ynan_or_inf + + /* Extract the exponents. */ + extui a8, xh, 20, 11 + extui a9, yh, 20, 11 + + beqz a8, .Lmul_xexpzero +.Lmul_xnormalized: + beqz a9, .Lmul_yexpzero +.Lmul_ynormalized: + + /* Add the exponents. */ + add a8, a8, a9 + + /* Replace sign/exponent fields with explicit "1.0". */ + movi a10, 0x1fffff + or xh, xh, a6 + and xh, xh, a10 + or yh, yh, a6 + and yh, yh, a10 + + /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6. + The least-significant word of the result is thrown away except + that if it is nonzero, the lsb of a6 is set to 1. */ +#if XCHAL_HAVE_MUL32_HIGH + + /* Compute a6 with any carry-outs in a10. */ + movi a10, 0 + mull a6, xl, yh + mull a11, xh, yl + add a6, a6, a11 + bgeu a6, a11, 1f + addi a10, a10, 1 +1: + muluh a11, xl, yl + add a6, a6, a11 + bgeu a6, a11, 1f + addi a10, a10, 1 +1: + /* If the low word of the result is nonzero, set the lsb of a6. */ + mull a11, xl, yl + beqz a11, 1f + movi a9, 1 + or a6, a6, a9 +1: + /* Compute xl with any carry-outs in a9. */ + movi a9, 0 + mull a11, xh, yh + add a10, a10, a11 + bgeu a10, a11, 1f + addi a9, a9, 1 +1: + muluh a11, xh, yl + add a10, a10, a11 + bgeu a10, a11, 1f + addi a9, a9, 1 +1: + muluh xl, xl, yh + add xl, xl, a10 + bgeu xl, a10, 1f + addi a9, a9, 1 +1: + /* Compute xh. */ + muluh xh, xh, yh + add xh, xh, a9 + +#else /* ! XCHAL_HAVE_MUL32_HIGH */ + + /* Break the inputs into 16-bit chunks and compute 16 32-bit partial + products. These partial products are: + + 0 xll * yll + + 1 xll * ylh + 2 xlh * yll + + 3 xll * yhl + 4 xlh * ylh + 5 xhl * yll + + 6 xll * yhh + 7 xlh * yhl + 8 xhl * ylh + 9 xhh * yll + + 10 xlh * yhh + 11 xhl * yhl + 12 xhh * ylh + + 13 xhl * yhh + 14 xhh * yhl + + 15 xhh * yhh + + where the input chunks are (hh, hl, lh, ll). If using the Mul16 + or Mul32 multiplier options, these input chunks must be stored in + separate registers. For Mac16, the UMUL.AA.* opcodes can specify + that the inputs come from either half of the registers, so there + is no need to shift them out ahead of time. If there is no + multiply hardware, the 16-bit chunks can be extracted when setting + up the arguments to the separate multiply function. */ + + /* Save a7 since it is needed to hold a temporary value. */ + s32i a7, sp, 4 +#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL + /* Calling a separate multiply function will clobber a0 and requires + use of a8 as a temporary, so save those values now. (The function + uses a custom ABI so nothing else needs to be saved.) */ + s32i a0, sp, 0 + s32i a8, sp, 8 +#endif + +#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 + +#define xlh a12 +#define ylh a13 +#define xhh a14 +#define yhh a15 + + /* Get the high halves of the inputs into registers. */ + srli xlh, xl, 16 + srli ylh, yl, 16 + srli xhh, xh, 16 + srli yhh, yh, 16 + +#define xll xl +#define yll yl +#define xhl xh +#define yhl yh + +#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 + /* Clear the high halves of the inputs. This does not matter + for MUL16 because the high bits are ignored. */ + extui xl, xl, 0, 16 + extui xh, xh, 0, 16 + extui yl, yl, 0, 16 + extui yh, yh, 0, 16 +#endif +#endif /* MUL16 || MUL32 */ + + +#if XCHAL_HAVE_MUL16 + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + mul16u dst, xreg ## xhalf, yreg ## yhalf + +#elif XCHAL_HAVE_MUL32 + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + mull dst, xreg ## xhalf, yreg ## yhalf + +#elif XCHAL_HAVE_MAC16 + +/* The preprocessor insists on inserting a space when concatenating after + a period in the definition of do_mul below. These macros are a workaround + using underscores instead of periods when doing the concatenation. */ +#define umul_aa_ll umul.aa.ll +#define umul_aa_lh umul.aa.lh +#define umul_aa_hl umul.aa.hl +#define umul_aa_hh umul.aa.hh + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + umul_aa_ ## xhalf ## yhalf xreg, yreg; \ + rsr dst, ACCLO + +#else /* no multiply hardware */ + +#define set_arg_l(dst, src) \ + extui dst, src, 0, 16 +#define set_arg_h(dst, src) \ + srli dst, src, 16 + +#if __XTENSA_CALL0_ABI__ +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + set_arg_ ## xhalf (a13, xreg); \ + set_arg_ ## yhalf (a14, yreg); \ + call0 .Lmul_mulsi3; \ + mov dst, a12 +#else +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + set_arg_ ## xhalf (a14, xreg); \ + set_arg_ ## yhalf (a15, yreg); \ + call12 .Lmul_mulsi3; \ + mov dst, a14 +#endif /* __XTENSA_CALL0_ABI__ */ + +#endif /* no multiply hardware */ + + /* Add pp1 and pp2 into a10 with carry-out in a9. */ + do_mul(a10, xl, l, yl, h) /* pp 1 */ + do_mul(a11, xl, h, yl, l) /* pp 2 */ + movi a9, 0 + add a10, a10, a11 + bgeu a10, a11, 1f + addi a9, a9, 1 +1: + /* Initialize a6 with a9/a10 shifted into position. Note that + this value can be safely incremented without any carry-outs. */ + ssai 16 + src a6, a9, a10 + + /* Compute the low word into a10. */ + do_mul(a11, xl, l, yl, l) /* pp 0 */ + sll a10, a10 + add a10, a10, a11 + bgeu a10, a11, 1f + addi a6, a6, 1 +1: + /* Compute the contributions of pp0-5 to a6, with carry-outs in a9. + This is good enough to determine the low half of a6, so that any + nonzero bits from the low word of the result can be collapsed + into a6, freeing up a register. */ + movi a9, 0 + do_mul(a11, xl, l, yh, l) /* pp 3 */ + add a6, a6, a11 + bgeu a6, a11, 1f + addi a9, a9, 1 +1: + do_mul(a11, xl, h, yl, h) /* pp 4 */ + add a6, a6, a11 + bgeu a6, a11, 1f + addi a9, a9, 1 +1: + do_mul(a11, xh, l, yl, l) /* pp 5 */ + add a6, a6, a11 + bgeu a6, a11, 1f + addi a9, a9, 1 +1: + /* Collapse any nonzero bits from the low word into a6. */ + beqz a10, 1f + movi a11, 1 + or a6, a6, a11 +1: + /* Add pp6-9 into a11 with carry-outs in a10. */ + do_mul(a7, xl, l, yh, h) /* pp 6 */ + do_mul(a11, xh, h, yl, l) /* pp 9 */ + movi a10, 0 + add a11, a11, a7 + bgeu a11, a7, 1f + addi a10, a10, 1 +1: + do_mul(a7, xl, h, yh, l) /* pp 7 */ + add a11, a11, a7 + bgeu a11, a7, 1f + addi a10, a10, 1 +1: + do_mul(a7, xh, l, yl, h) /* pp 8 */ + add a11, a11, a7 + bgeu a11, a7, 1f + addi a10, a10, 1 +1: + /* Shift a10/a11 into position, and add low half of a11 to a6. */ + src a10, a10, a11 + add a10, a10, a9 + sll a11, a11 + add a6, a6, a11 + bgeu a6, a11, 1f + addi a10, a10, 1 +1: + /* Add pp10-12 into xl with carry-outs in a9. */ + movi a9, 0 + do_mul(xl, xl, h, yh, h) /* pp 10 */ + add xl, xl, a10 + bgeu xl, a10, 1f + addi a9, a9, 1 +1: + do_mul(a10, xh, l, yh, l) /* pp 11 */ + add xl, xl, a10 + bgeu xl, a10, 1f + addi a9, a9, 1 +1: + do_mul(a10, xh, h, yl, h) /* pp 12 */ + add xl, xl, a10 + bgeu xl, a10, 1f + addi a9, a9, 1 +1: + /* Add pp13-14 into a11 with carry-outs in a10. */ + do_mul(a11, xh, l, yh, h) /* pp 13 */ + do_mul(a7, xh, h, yh, l) /* pp 14 */ + movi a10, 0 + add a11, a11, a7 + bgeu a11, a7, 1f + addi a10, a10, 1 +1: + /* Shift a10/a11 into position, and add low half of a11 to a6. */ + src a10, a10, a11 + add a10, a10, a9 + sll a11, a11 + add xl, xl, a11 + bgeu xl, a11, 1f + addi a10, a10, 1 +1: + /* Compute xh. */ + do_mul(xh, xh, h, yh, h) /* pp 15 */ + add xh, xh, a10 + + /* Restore values saved on the stack during the multiplication. */ + l32i a7, sp, 4 +#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL + l32i a0, sp, 0 + l32i a8, sp, 8 +#endif +#endif /* ! XCHAL_HAVE_MUL32_HIGH */ + + /* Shift left by 12 bits, unless there was a carry-out from the + multiply, in which case, shift by 11 bits and increment the + exponent. Note: It is convenient to use the constant 0x3ff + instead of 0x400 when removing the extra exponent bias (so that + it is easy to construct 0x7fe for the overflow check). Reverse + the logic here to decrement the exponent sum by one unless there + was a carry-out. */ + movi a4, 11 + srli a5, xh, 21 - 12 + bnez a5, 1f + addi a4, a4, 1 + addi a8, a8, -1 +1: ssl a4 + src xh, xh, xl + src xl, xl, a6 + sll a6, a6 + + /* Subtract the extra bias from the exponent sum (plus one to account + for the explicit "1.0" of the mantissa that will be added to the + exponent in the final result). */ + movi a4, 0x3ff + sub a8, a8, a4 + + /* Check for over/underflow. The value in a8 is one less than the + final exponent, so values in the range 0..7fd are OK here. */ + slli a4, a4, 1 /* 0x7fe */ + bgeu a8, a4, .Lmul_overflow + +.Lmul_round: + /* Round. */ + bgez a6, .Lmul_rounded + addi xl, xl, 1 + beqz xl, .Lmul_roundcarry + slli a6, a6, 1 + beqz a6, .Lmul_exactlyhalf + +.Lmul_rounded: + /* Add the exponent to the mantissa. */ + slli a8, a8, 20 + add xh, xh, a8 + +.Lmul_addsign: + /* Add the sign bit. */ + srli a7, a7, 31 + slli a7, a7, 31 + or xh, xh, a7 + +.Lmul_done: +#if __XTENSA_CALL0_ABI__ + l32i a12, sp, 16 + l32i a13, sp, 20 + l32i a14, sp, 24 + l32i a15, sp, 28 + addi sp, sp, 32 +#endif + leaf_return + +.Lmul_exactlyhalf: + /* Round down to the nearest even value. */ + srli xl, xl, 1 + slli xl, xl, 1 + j .Lmul_rounded + +.Lmul_roundcarry: + /* xl is always zero when the rounding increment overflows, so + there's no need to round it to an even value. */ + addi xh, xh, 1 + /* Overflow is OK -- it will be added to the exponent. */ + j .Lmul_rounded + +.Lmul_overflow: + bltz a8, .Lmul_underflow + /* Return +/- Infinity. */ + addi a8, a4, 1 /* 0x7ff */ + slli xh, a8, 20 + movi xl, 0 + j .Lmul_addsign + +.Lmul_underflow: + /* Create a subnormal value, where the exponent field contains zero, + but the effective exponent is 1. The value of a8 is one less than + the actual exponent, so just negate it to get the shift amount. */ + neg a8, a8 + mov a9, a6 + ssr a8 + bgeui a8, 32, .Lmul_bigshift + + /* Shift xh/xl right. Any bits that are shifted out of xl are saved + in a6 (combined with the shifted-out bits currently in a6) for + rounding the result. */ + sll a6, xl + src xl, xh, xl + srl xh, xh + j 1f + +.Lmul_bigshift: + bgeui a8, 64, .Lmul_flush_to_zero + sll a10, xl /* lost bits shifted out of xl */ + src a6, xh, xl + srl xl, xh + movi xh, 0 + or a9, a9, a10 + + /* Set the exponent to zero. */ +1: movi a8, 0 + + /* Pack any nonzero bits shifted out into a6. */ + beqz a9, .Lmul_round + movi a9, 1 + or a6, a6, a9 + j .Lmul_round + +.Lmul_flush_to_zero: + /* Return zero with the appropriate sign bit. */ + srli xh, a7, 31 + slli xh, xh, 31 + movi xl, 0 + j .Lmul_done + +#if XCHAL_NO_MUL + + /* For Xtensa processors with no multiply hardware, this simplified + version of _mulsi3 is used for multiplying 16-bit chunks of + the floating-point mantissas. When using CALL0, this function + uses a custom ABI: the inputs are passed in a13 and a14, the + result is returned in a12, and a8 and a15 are clobbered. */ + .align 4 +.Lmul_mulsi3: + leaf_entry sp, 16 + .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 + movi \dst, 0 +1: add \tmp1, \src2, \dst + extui \tmp2, \src1, 0, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx2 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 1, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx4 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 2, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx8 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 3, 1 + movnez \dst, \tmp1, \tmp2 + + srli \src1, \src1, 4 + slli \src2, \src2, 4 + bnez \src1, 1b + .endm +#if __XTENSA_CALL0_ABI__ + mul_mulsi3_body a12, a13, a14, a15, a8 +#else + /* The result will be written into a2, so save that argument in a4. */ + mov a4, a2 + mul_mulsi3_body a2, a4, a3, a5, a6 +#endif + leaf_return +#endif /* XCHAL_NO_MUL */ +#endif /* L_muldf3 */ + +#ifdef L_divdf3 + + /* Division */ +__divdf3_aux: + + /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). + (This code is placed before the start of the function just to + keep it in range of the limited branch displacements.) */ + +.Ldiv_yexpzero: + /* Clear the sign bit of y. */ + slli yh, yh, 1 + srli yh, yh, 1 + + /* Check for division by zero. */ + or a10, yh, yl + beqz a10, .Ldiv_yzero + + /* Normalize y. Adjust the exponent in a9. */ + beqz yh, .Ldiv_yh_zero + do_nsau a10, yh, a11, a9 + addi a10, a10, -11 + ssl a10 + src yh, yh, yl + sll yl, yl + movi a9, 1 + sub a9, a9, a10 + j .Ldiv_ynormalized +.Ldiv_yh_zero: + do_nsau a10, yl, a11, a9 + addi a10, a10, -11 + movi a9, -31 + sub a9, a9, a10 + ssl a10 + bltz a10, .Ldiv_yl_srl + sll yh, yl + movi yl, 0 + j .Ldiv_ynormalized +.Ldiv_yl_srl: + srl yh, yl + sll yl, yl + j .Ldiv_ynormalized + +.Ldiv_yzero: + /* y is zero. Return NaN if x is also zero; otherwise, infinity. */ + slli xh, xh, 1 + srli xh, xh, 1 + or xl, xl, xh + srli xh, a7, 31 + slli xh, xh, 31 + or xh, xh, a6 + bnez xl, 1f + movi a4, 0x80000 /* make it a quiet NaN */ + or xh, xh, a4 +1: movi xl, 0 + leaf_return + +.Ldiv_xexpzero: + /* Clear the sign bit of x. */ + slli xh, xh, 1 + srli xh, xh, 1 + + /* If x is zero, return zero. */ + or a10, xh, xl + beqz a10, .Ldiv_return_zero + + /* Normalize x. Adjust the exponent in a8. */ + beqz xh, .Ldiv_xh_zero + do_nsau a10, xh, a11, a8 + addi a10, a10, -11 + ssl a10 + src xh, xh, xl + sll xl, xl + movi a8, 1 + sub a8, a8, a10 + j .Ldiv_xnormalized +.Ldiv_xh_zero: + do_nsau a10, xl, a11, a8 + addi a10, a10, -11 + movi a8, -31 + sub a8, a8, a10 + ssl a10 + bltz a10, .Ldiv_xl_srl + sll xh, xl + movi xl, 0 + j .Ldiv_xnormalized +.Ldiv_xl_srl: + srl xh, xl + sll xl, xl + j .Ldiv_xnormalized + +.Ldiv_return_zero: + /* Return zero with the appropriate sign bit. */ + srli xh, a7, 31 + slli xh, xh, 31 + movi xl, 0 + leaf_return + +.Ldiv_xnan_or_inf: + /* Set the sign bit of the result. */ + srli a7, yh, 31 + slli a7, a7, 31 + xor xh, xh, a7 + /* If y is NaN or Inf, return NaN. */ + bnall yh, a6, 1f + movi a4, 0x80000 /* make it a quiet NaN */ + or xh, xh, a4 +1: leaf_return + +.Ldiv_ynan_or_inf: + /* If y is Infinity, return zero. */ + slli a8, yh, 12 + or a8, a8, yl + beqz a8, .Ldiv_return_zero + /* y is NaN; return it. */ + mov xh, yh + mov xl, yl + leaf_return + +.Ldiv_highequal1: + bltu xl, yl, 2f + j 3f + + .align 4 + .global __divdf3 + .type __divdf3, @function +__divdf3: + leaf_entry sp, 16 + movi a6, 0x7ff00000 + + /* Get the sign of the result. */ + xor a7, xh, yh + + /* Check for NaN and infinity. */ + ball xh, a6, .Ldiv_xnan_or_inf + ball yh, a6, .Ldiv_ynan_or_inf + + /* Extract the exponents. */ + extui a8, xh, 20, 11 + extui a9, yh, 20, 11 + + beqz a9, .Ldiv_yexpzero +.Ldiv_ynormalized: + beqz a8, .Ldiv_xexpzero +.Ldiv_xnormalized: + + /* Subtract the exponents. */ + sub a8, a8, a9 + + /* Replace sign/exponent fields with explicit "1.0". */ + movi a10, 0x1fffff + or xh, xh, a6 + and xh, xh, a10 + or yh, yh, a6 + and yh, yh, a10 + + /* Set SAR for left shift by one. */ + ssai (32 - 1) + + /* The first digit of the mantissa division must be a one. + Shift x (and adjust the exponent) as needed to make this true. */ + bltu yh, xh, 3f + beq yh, xh, .Ldiv_highequal1 +2: src xh, xh, xl + sll xl, xl + addi a8, a8, -1 +3: + /* Do the first subtraction and shift. */ + sub xh, xh, yh + bgeu xl, yl, 1f + addi xh, xh, -1 +1: sub xl, xl, yl + src xh, xh, xl + sll xl, xl + + /* Put the quotient into a10/a11. */ + movi a10, 0 + movi a11, 1 + + /* Divide one bit at a time for 52 bits. */ + movi a9, 52 +#if XCHAL_HAVE_LOOPS + loop a9, .Ldiv_loopend +#endif +.Ldiv_loop: + /* Shift the quotient << 1. */ + src a10, a10, a11 + sll a11, a11 + + /* Is this digit a 0 or 1? */ + bltu xh, yh, 3f + beq xh, yh, .Ldiv_highequal2 + + /* Output a 1 and subtract. */ +2: addi a11, a11, 1 + sub xh, xh, yh + bgeu xl, yl, 1f + addi xh, xh, -1 +1: sub xl, xl, yl + + /* Shift the dividend << 1. */ +3: src xh, xh, xl + sll xl, xl + +#if !XCHAL_HAVE_LOOPS + addi a9, a9, -1 + bnez a9, .Ldiv_loop +#endif +.Ldiv_loopend: + + /* Add the exponent bias (less one to account for the explicit "1.0" + of the mantissa that will be added to the exponent in the final + result). */ + movi a9, 0x3fe + add a8, a8, a9 + + /* Check for over/underflow. The value in a8 is one less than the + final exponent, so values in the range 0..7fd are OK here. */ + addmi a9, a9, 0x400 /* 0x7fe */ + bgeu a8, a9, .Ldiv_overflow + +.Ldiv_round: + /* Round. The remainder (<< 1) is in xh/xl. */ + bltu xh, yh, .Ldiv_rounded + beq xh, yh, .Ldiv_highequal3 +.Ldiv_roundup: + addi a11, a11, 1 + beqz a11, .Ldiv_roundcarry + +.Ldiv_rounded: + mov xl, a11 + /* Add the exponent to the mantissa. */ + slli a8, a8, 20 + add xh, a10, a8 + +.Ldiv_addsign: + /* Add the sign bit. */ + srli a7, a7, 31 + slli a7, a7, 31 + or xh, xh, a7 + leaf_return + +.Ldiv_highequal2: + bgeu xl, yl, 2b + j 3b + +.Ldiv_highequal3: + bltu xl, yl, .Ldiv_rounded + bne xl, yl, .Ldiv_roundup + + /* Remainder is exactly half the divisor. Round even. */ + addi a11, a11, 1 + beqz a11, .Ldiv_roundcarry + srli a11, a11, 1 + slli a11, a11, 1 + j .Ldiv_rounded + +.Ldiv_overflow: + bltz a8, .Ldiv_underflow + /* Return +/- Infinity. */ + addi a8, a9, 1 /* 0x7ff */ + slli xh, a8, 20 + movi xl, 0 + j .Ldiv_addsign + +.Ldiv_underflow: + /* Create a subnormal value, where the exponent field contains zero, + but the effective exponent is 1. The value of a8 is one less than + the actual exponent, so just negate it to get the shift amount. */ + neg a8, a8 + ssr a8 + bgeui a8, 32, .Ldiv_bigshift + + /* Shift a10/a11 right. Any bits that are shifted out of a11 are + saved in a6 for rounding the result. */ + sll a6, a11 + src a11, a10, a11 + srl a10, a10 + j 1f + +.Ldiv_bigshift: + bgeui a8, 64, .Ldiv_flush_to_zero + sll a9, a11 /* lost bits shifted out of a11 */ + src a6, a10, a11 + srl a11, a10 + movi a10, 0 + or xl, xl, a9 + + /* Set the exponent to zero. */ +1: movi a8, 0 + + /* Pack any nonzero remainder (in xh/xl) into a6. */ + or xh, xh, xl + beqz xh, 1f + movi a9, 1 + or a6, a6, a9 + + /* Round a10/a11 based on the bits shifted out into a6. */ +1: bgez a6, .Ldiv_rounded + addi a11, a11, 1 + beqz a11, .Ldiv_roundcarry + slli a6, a6, 1 + bnez a6, .Ldiv_rounded + srli a11, a11, 1 + slli a11, a11, 1 + j .Ldiv_rounded + +.Ldiv_roundcarry: + /* a11 is always zero when the rounding increment overflows, so + there's no need to round it to an even value. */ + addi a10, a10, 1 + /* Overflow to the exponent field is OK. */ + j .Ldiv_rounded + +.Ldiv_flush_to_zero: + /* Return zero with the appropriate sign bit. */ + srli xh, a7, 31 + slli xh, xh, 31 + movi xl, 0 + leaf_return + +#endif /* L_divdf3 */ + +#ifdef L_cmpdf2 + + /* Equal and Not Equal */ + + .align 4 + .global __eqdf2 + .global __nedf2 + .set __nedf2, __eqdf2 + .type __eqdf2, @function +__eqdf2: + leaf_entry sp, 16 + bne xl, yl, 2f + bne xh, yh, 4f + + /* The values are equal but NaN != NaN. Check the exponent. */ + movi a6, 0x7ff00000 + ball xh, a6, 3f + + /* Equal. */ + movi a2, 0 + leaf_return + + /* Not equal. */ +2: movi a2, 1 + leaf_return + + /* Check if the mantissas are nonzero. */ +3: slli a7, xh, 12 + or a7, a7, xl + j 5f + + /* Check if x and y are zero with different signs. */ +4: or a7, xh, yh + slli a7, a7, 1 + or a7, a7, xl /* xl == yl here */ + + /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa + or x when exponent(x) = 0x7ff and x == y. */ +5: movi a2, 0 + movi a3, 1 + movnez a2, a3, a7 + leaf_return + + + /* Greater Than */ + + .align 4 + .global __gtdf2 + .type __gtdf2, @function +__gtdf2: + leaf_entry sp, 16 + movi a6, 0x7ff00000 + ball xh, a6, 2f +1: bnall yh, a6, .Lle_cmp + + /* Check if y is a NaN. */ + slli a7, yh, 12 + or a7, a7, yl + beqz a7, .Lle_cmp + movi a2, 0 + leaf_return + + /* Check if x is a NaN. */ +2: slli a7, xh, 12 + or a7, a7, xl + beqz a7, 1b + movi a2, 0 + leaf_return + + + /* Less Than or Equal */ + + .align 4 + .global __ledf2 + .type __ledf2, @function +__ledf2: + leaf_entry sp, 16 + movi a6, 0x7ff00000 + ball xh, a6, 2f +1: bnall yh, a6, .Lle_cmp + + /* Check if y is a NaN. */ + slli a7, yh, 12 + or a7, a7, yl + beqz a7, .Lle_cmp + movi a2, 1 + leaf_return + + /* Check if x is a NaN. */ +2: slli a7, xh, 12 + or a7, a7, xl + beqz a7, 1b + movi a2, 1 + leaf_return + +.Lle_cmp: + /* Check if x and y have different signs. */ + xor a7, xh, yh + bltz a7, .Lle_diff_signs + + /* Check if x is negative. */ + bltz xh, .Lle_xneg + + /* Check if x <= y. */ + bltu xh, yh, 4f + bne xh, yh, 5f + bltu yl, xl, 5f +4: movi a2, 0 + leaf_return + +.Lle_xneg: + /* Check if y <= x. */ + bltu yh, xh, 4b + bne yh, xh, 5f + bgeu xl, yl, 4b +5: movi a2, 1 + leaf_return + +.Lle_diff_signs: + bltz xh, 4b + + /* Check if both x and y are zero. */ + or a7, xh, yh + slli a7, a7, 1 + or a7, a7, xl + or a7, a7, yl + movi a2, 1 + movi a3, 0 + moveqz a2, a3, a7 + leaf_return + + + /* Greater Than or Equal */ + + .align 4 + .global __gedf2 + .type __gedf2, @function +__gedf2: + leaf_entry sp, 16 + movi a6, 0x7ff00000 + ball xh, a6, 2f +1: bnall yh, a6, .Llt_cmp + + /* Check if y is a NaN. */ + slli a7, yh, 12 + or a7, a7, yl + beqz a7, .Llt_cmp + movi a2, -1 + leaf_return + + /* Check if x is a NaN. */ +2: slli a7, xh, 12 + or a7, a7, xl + beqz a7, 1b + movi a2, -1 + leaf_return + + + /* Less Than */ + + .align 4 + .global __ltdf2 + .type __ltdf2, @function +__ltdf2: + leaf_entry sp, 16 + movi a6, 0x7ff00000 + ball xh, a6, 2f +1: bnall yh, a6, .Llt_cmp + + /* Check if y is a NaN. */ + slli a7, yh, 12 + or a7, a7, yl + beqz a7, .Llt_cmp + movi a2, 0 + leaf_return + + /* Check if x is a NaN. */ +2: slli a7, xh, 12 + or a7, a7, xl + beqz a7, 1b + movi a2, 0 + leaf_return + +.Llt_cmp: + /* Check if x and y have different signs. */ + xor a7, xh, yh + bltz a7, .Llt_diff_signs + + /* Check if x is negative. */ + bltz xh, .Llt_xneg + + /* Check if x < y. */ + bltu xh, yh, 4f + bne xh, yh, 5f + bgeu xl, yl, 5f +4: movi a2, -1 + leaf_return + +.Llt_xneg: + /* Check if y < x. */ + bltu yh, xh, 4b + bne yh, xh, 5f + bltu yl, xl, 4b +5: movi a2, 0 + leaf_return + +.Llt_diff_signs: + bgez xh, 5b + + /* Check if both x and y are nonzero. */ + or a7, xh, yh + slli a7, a7, 1 + or a7, a7, xl + or a7, a7, yl + movi a2, 0 + movi a3, -1 + movnez a2, a3, a7 + leaf_return + + + /* Unordered */ + + .align 4 + .global __unorddf2 + .type __unorddf2, @function +__unorddf2: + leaf_entry sp, 16 + movi a6, 0x7ff00000 + ball xh, a6, 3f +1: ball yh, a6, 4f +2: movi a2, 0 + leaf_return + +3: slli a7, xh, 12 + or a7, a7, xl + beqz a7, 1b + movi a2, 1 + leaf_return + +4: slli a7, yh, 12 + or a7, a7, yl + beqz a7, 2b + movi a2, 1 + leaf_return + +#endif /* L_cmpdf2 */ + +#ifdef L_fixdfsi + + .align 4 + .global __fixdfsi + .type __fixdfsi, @function +__fixdfsi: + leaf_entry sp, 16 + + /* Check for NaN and Infinity. */ + movi a6, 0x7ff00000 + ball xh, a6, .Lfixdfsi_nan_or_inf + + /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */ + extui a4, xh, 20, 11 + extui a5, a6, 19, 10 /* 0x3fe */ + sub a4, a4, a5 + bgei a4, 32, .Lfixdfsi_maxint + blti a4, 1, .Lfixdfsi_zero + + /* Add explicit "1.0" and shift << 11. */ + or a7, xh, a6 + ssai (32 - 11) + src a5, a7, xl + + /* Shift back to the right, based on the exponent. */ + ssl a4 /* shift by 32 - a4 */ + srl a5, a5 + + /* Negate the result if sign != 0. */ + neg a2, a5 + movgez a2, a5, a7 + leaf_return + +.Lfixdfsi_nan_or_inf: + /* Handle Infinity and NaN. */ + slli a4, xh, 12 + or a4, a4, xl + beqz a4, .Lfixdfsi_maxint + + /* Translate NaN to +maxint. */ + movi xh, 0 + +.Lfixdfsi_maxint: + slli a4, a6, 11 /* 0x80000000 */ + addi a5, a4, -1 /* 0x7fffffff */ + movgez a4, a5, xh + mov a2, a4 + leaf_return + +.Lfixdfsi_zero: + movi a2, 0 + leaf_return + +#endif /* L_fixdfsi */ + +#ifdef L_fixdfdi + + .align 4 + .global __fixdfdi + .type __fixdfdi, @function +__fixdfdi: + leaf_entry sp, 16 + + /* Check for NaN and Infinity. */ + movi a6, 0x7ff00000 + ball xh, a6, .Lfixdfdi_nan_or_inf + + /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */ + extui a4, xh, 20, 11 + extui a5, a6, 19, 10 /* 0x3fe */ + sub a4, a4, a5 + bgei a4, 64, .Lfixdfdi_maxint + blti a4, 1, .Lfixdfdi_zero + + /* Add explicit "1.0" and shift << 11. */ + or a7, xh, a6 + ssai (32 - 11) + src xh, a7, xl + sll xl, xl + + /* Shift back to the right, based on the exponent. */ + ssl a4 /* shift by 64 - a4 */ + bgei a4, 32, .Lfixdfdi_smallshift + srl xl, xh + movi xh, 0 + +.Lfixdfdi_shifted: + /* Negate the result if sign != 0. */ + bgez a7, 1f + neg xl, xl + neg xh, xh + beqz xl, 1f + addi xh, xh, -1 +1: leaf_return + +.Lfixdfdi_smallshift: + src xl, xh, xl + srl xh, xh + j .Lfixdfdi_shifted + +.Lfixdfdi_nan_or_inf: + /* Handle Infinity and NaN. */ + slli a4, xh, 12 + or a4, a4, xl + beqz a4, .Lfixdfdi_maxint + + /* Translate NaN to +maxint. */ + movi xh, 0 + +.Lfixdfdi_maxint: + slli a7, a6, 11 /* 0x80000000 */ + bgez xh, 1f + mov xh, a7 + movi xl, 0 + leaf_return + +1: addi xh, a7, -1 /* 0x7fffffff */ + movi xl, -1 + leaf_return + +.Lfixdfdi_zero: + movi xh, 0 + movi xl, 0 + leaf_return + +#endif /* L_fixdfdi */ + +#ifdef L_fixunsdfsi + + .align 4 + .global __fixunsdfsi + .type __fixunsdfsi, @function +__fixunsdfsi: + leaf_entry sp, 16 + + /* Check for NaN and Infinity. */ + movi a6, 0x7ff00000 + ball xh, a6, .Lfixunsdfsi_nan_or_inf + + /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */ + extui a4, xh, 20, 11 + extui a5, a6, 20, 10 /* 0x3ff */ + sub a4, a4, a5 + bgei a4, 32, .Lfixunsdfsi_maxint + bltz a4, .Lfixunsdfsi_zero + + /* Add explicit "1.0" and shift << 11. */ + or a7, xh, a6 + ssai (32 - 11) + src a5, a7, xl + + /* Shift back to the right, based on the exponent. */ + addi a4, a4, 1 + beqi a4, 32, .Lfixunsdfsi_bigexp + ssl a4 /* shift by 32 - a4 */ + srl a5, a5 + + /* Negate the result if sign != 0. */ + neg a2, a5 + movgez a2, a5, a7 + leaf_return + +.Lfixunsdfsi_nan_or_inf: + /* Handle Infinity and NaN. */ + slli a4, xh, 12 + or a4, a4, xl + beqz a4, .Lfixunsdfsi_maxint + + /* Translate NaN to 0xffffffff. */ + movi a2, -1 + leaf_return + +.Lfixunsdfsi_maxint: + slli a4, a6, 11 /* 0x80000000 */ + movi a5, -1 /* 0xffffffff */ + movgez a4, a5, xh + mov a2, a4 + leaf_return + +.Lfixunsdfsi_zero: + movi a2, 0 + leaf_return + +.Lfixunsdfsi_bigexp: + /* Handle unsigned maximum exponent case. */ + bltz xh, 1f + mov a2, a5 /* no shift needed */ + leaf_return + + /* Return 0x80000000 if negative. */ +1: slli a2, a6, 11 + leaf_return + +#endif /* L_fixunsdfsi */ + +#ifdef L_fixunsdfdi + + .align 4 + .global __fixunsdfdi + .type __fixunsdfdi, @function +__fixunsdfdi: + leaf_entry sp, 16 + + /* Check for NaN and Infinity. */ + movi a6, 0x7ff00000 + ball xh, a6, .Lfixunsdfdi_nan_or_inf + + /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */ + extui a4, xh, 20, 11 + extui a5, a6, 20, 10 /* 0x3ff */ + sub a4, a4, a5 + bgei a4, 64, .Lfixunsdfdi_maxint + bltz a4, .Lfixunsdfdi_zero + + /* Add explicit "1.0" and shift << 11. */ + or a7, xh, a6 + ssai (32 - 11) + src xh, a7, xl + sll xl, xl + + /* Shift back to the right, based on the exponent. */ + addi a4, a4, 1 + beqi a4, 64, .Lfixunsdfdi_bigexp + ssl a4 /* shift by 64 - a4 */ + bgei a4, 32, .Lfixunsdfdi_smallshift + srl xl, xh + movi xh, 0 + +.Lfixunsdfdi_shifted: + /* Negate the result if sign != 0. */ + bgez a7, 1f + neg xl, xl + neg xh, xh + beqz xl, 1f + addi xh, xh, -1 +1: leaf_return + +.Lfixunsdfdi_smallshift: + src xl, xh, xl + srl xh, xh + j .Lfixunsdfdi_shifted + +.Lfixunsdfdi_nan_or_inf: + /* Handle Infinity and NaN. */ + slli a4, xh, 12 + or a4, a4, xl + beqz a4, .Lfixunsdfdi_maxint + + /* Translate NaN to 0xffffffff.... */ +1: movi xh, -1 + movi xl, -1 + leaf_return + +.Lfixunsdfdi_maxint: + bgez xh, 1b +2: slli xh, a6, 11 /* 0x80000000 */ + movi xl, 0 + leaf_return + +.Lfixunsdfdi_zero: + movi xh, 0 + movi xl, 0 + leaf_return + +.Lfixunsdfdi_bigexp: + /* Handle unsigned maximum exponent case. */ + bltz a7, 2b + leaf_return /* no shift needed */ + +#endif /* L_fixunsdfdi */ + +#ifdef L_floatsidf + + .align 4 + .global __floatunsidf + .type __floatunsidf, @function +__floatunsidf: + leaf_entry sp, 16 + beqz a2, .Lfloatsidf_return_zero + + /* Set the sign to zero and jump to the floatsidf code. */ + movi a7, 0 + j .Lfloatsidf_normalize + + .align 4 + .global __floatsidf + .type __floatsidf, @function +__floatsidf: + leaf_entry sp, 16 + + /* Check for zero. */ + beqz a2, .Lfloatsidf_return_zero + + /* Save the sign. */ + extui a7, a2, 31, 1 + + /* Get the absolute value. */ +#if XCHAL_HAVE_ABS + abs a2, a2 +#else + neg a4, a2 + movltz a2, a4, a2 +#endif + +.Lfloatsidf_normalize: + /* Normalize with the first 1 bit in the msb. */ + do_nsau a4, a2, a5, a6 + ssl a4 + sll a5, a2 + + /* Shift the mantissa into position. */ + srli xh, a5, 11 + slli xl, a5, (32 - 11) + + /* Set the exponent. */ + movi a5, 0x41d /* 0x3fe + 31 */ + sub a5, a5, a4 + slli a5, a5, 20 + add xh, xh, a5 + + /* Add the sign and return. */ + slli a7, a7, 31 + or xh, xh, a7 + leaf_return + +.Lfloatsidf_return_zero: + movi a3, 0 + leaf_return + +#endif /* L_floatsidf */ + +#ifdef L_floatdidf + + .align 4 + .global __floatundidf + .type __floatundidf, @function +__floatundidf: + leaf_entry sp, 16 + + /* Check for zero. */ + or a4, xh, xl + beqz a4, 2f + + /* Set the sign to zero and jump to the floatdidf code. */ + movi a7, 0 + j .Lfloatdidf_normalize + + .align 4 + .global __floatdidf + .type __floatdidf, @function +__floatdidf: + leaf_entry sp, 16 + + /* Check for zero. */ + or a4, xh, xl + beqz a4, 2f + + /* Save the sign. */ + extui a7, xh, 31, 1 + + /* Get the absolute value. */ + bgez xh, .Lfloatdidf_normalize + neg xl, xl + neg xh, xh + beqz xl, .Lfloatdidf_normalize + addi xh, xh, -1 + +.Lfloatdidf_normalize: + /* Normalize with the first 1 bit in the msb of xh. */ + beqz xh, .Lfloatdidf_bigshift + do_nsau a4, xh, a5, a6 + ssl a4 + src xh, xh, xl + sll xl, xl + +.Lfloatdidf_shifted: + /* Shift the mantissa into position, with rounding bits in a6. */ + ssai 11 + sll a6, xl + src xl, xh, xl + srl xh, xh + + /* Set the exponent. */ + movi a5, 0x43d /* 0x3fe + 63 */ + sub a5, a5, a4 + slli a5, a5, 20 + add xh, xh, a5 + + /* Add the sign. */ + slli a7, a7, 31 + or xh, xh, a7 + + /* Round up if the leftover fraction is >= 1/2. */ + bgez a6, 2f + addi xl, xl, 1 + beqz xl, .Lfloatdidf_roundcarry + + /* Check if the leftover fraction is exactly 1/2. */ + slli a6, a6, 1 + beqz a6, .Lfloatdidf_exactlyhalf +2: leaf_return + +.Lfloatdidf_bigshift: + /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ + do_nsau a4, xl, a5, a6 + ssl a4 + sll xh, xl + movi xl, 0 + addi a4, a4, 32 + j .Lfloatdidf_shifted + +.Lfloatdidf_exactlyhalf: + /* Round down to the nearest even value. */ + srli xl, xl, 1 + slli xl, xl, 1 + leaf_return + +.Lfloatdidf_roundcarry: + /* xl is always zero when the rounding increment overflows, so + there's no need to round it to an even value. */ + addi xh, xh, 1 + /* Overflow to the exponent is OK. */ + leaf_return + +#endif /* L_floatdidf */ + +#ifdef L_truncdfsf2 + + .align 4 + .global __truncdfsf2 + .type __truncdfsf2, @function +__truncdfsf2: + leaf_entry sp, 16 + + /* Adjust the exponent bias. */ + movi a4, (0x3ff - 0x7f) << 20 + sub a5, xh, a4 + + /* Check for underflow. */ + xor a6, xh, a5 + bltz a6, .Ltrunc_underflow + extui a6, a5, 20, 11 + beqz a6, .Ltrunc_underflow + + /* Check for overflow. */ + movi a4, 255 + bge a6, a4, .Ltrunc_overflow + + /* Shift a5/xl << 3 into a5/a4. */ + ssai (32 - 3) + src a5, a5, xl + sll a4, xl + +.Ltrunc_addsign: + /* Add the sign bit. */ + extui a6, xh, 31, 1 + slli a6, a6, 31 + or a2, a6, a5 + + /* Round up if the leftover fraction is >= 1/2. */ + bgez a4, 1f + addi a2, a2, 1 + /* Overflow to the exponent is OK. The answer will be correct. */ + + /* Check if the leftover fraction is exactly 1/2. */ + slli a4, a4, 1 + beqz a4, .Ltrunc_exactlyhalf +1: leaf_return + +.Ltrunc_exactlyhalf: + /* Round down to the nearest even value. */ + srli a2, a2, 1 + slli a2, a2, 1 + leaf_return + +.Ltrunc_overflow: + /* Check if exponent == 0x7ff. */ + movi a4, 0x7ff00000 + bnall xh, a4, 1f + + /* Check if mantissa is nonzero. */ + slli a5, xh, 12 + or a5, a5, xl + beqz a5, 1f + + /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */ + srli a4, a4, 1 + +1: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */ + /* Add the sign bit. */ + extui a6, xh, 31, 1 + ssai 1 + src a2, a6, a4 + leaf_return + +.Ltrunc_underflow: + /* Find shift count for a subnormal. Flush to zero if >= 32. */ + extui a6, xh, 20, 11 + movi a5, 0x3ff - 0x7f + sub a6, a5, a6 + addi a6, a6, 1 + bgeui a6, 32, 1f + + /* Replace the exponent with an explicit "1.0". */ + slli a5, a5, 13 /* 0x700000 */ + or a5, a5, xh + slli a5, a5, 11 + srli a5, a5, 11 + + /* Shift the mantissa left by 3 bits (into a5/a4). */ + ssai (32 - 3) + src a5, a5, xl + sll a4, xl + + /* Shift right by a6. */ + ssr a6 + sll a7, a4 + src a4, a5, a4 + srl a5, a5 + beqz a7, .Ltrunc_addsign + or a4, a4, a6 /* any positive, nonzero value will work */ + j .Ltrunc_addsign + + /* Return +/- zero. */ +1: extui a2, xh, 31, 1 + slli a2, a2, 31 + leaf_return + +#endif /* L_truncdfsf2 */ + +#ifdef L_extendsfdf2 + + .align 4 + .global __extendsfdf2 + .type __extendsfdf2, @function +__extendsfdf2: + leaf_entry sp, 16 + + /* Save the sign bit and then shift it off. */ + extui a5, a2, 31, 1 + slli a5, a5, 31 + slli a4, a2, 1 + + /* Extract and check the exponent. */ + extui a6, a2, 23, 8 + beqz a6, .Lextend_expzero + addi a6, a6, 1 + beqi a6, 256, .Lextend_nan_or_inf + + /* Shift >> 3 into a4/xl. */ + srli a4, a4, 4 + slli xl, a2, (32 - 3) + + /* Adjust the exponent bias. */ + movi a6, (0x3ff - 0x7f) << 20 + add a4, a4, a6 + + /* Add the sign bit. */ + or xh, a4, a5 + leaf_return + +.Lextend_nan_or_inf: + movi a4, 0x7ff00000 + + /* Check for NaN. */ + slli a7, a2, 9 + beqz a7, 1f + + slli a6, a6, 11 /* 0x80000 */ + or a4, a4, a6 + + /* Add the sign and return. */ +1: or xh, a4, a5 + movi xl, 0 + leaf_return + +.Lextend_expzero: + beqz a4, 1b + + /* Normalize it to have 8 zero bits before the first 1 bit. */ + do_nsau a7, a4, a2, a3 + addi a7, a7, -8 + ssl a7 + sll a4, a4 + + /* Shift >> 3 into a4/xl. */ + slli xl, a4, (32 - 3) + srli a4, a4, 3 + + /* Set the exponent. */ + movi a6, 0x3fe - 0x7f + sub a6, a6, a7 + slli a6, a6, 20 + add a4, a4, a6 + + /* Add the sign and return. */ + or xh, a4, a5 + leaf_return + +#endif /* L_extendsfdf2 */ + + diff --git a/libgcc/config/xtensa/ieee754-sf.S b/libgcc/config/xtensa/ieee754-sf.S new file mode 100644 index 00000000000..d75be0e5ae5 --- /dev/null +++ b/libgcc/config/xtensa/ieee754-sf.S @@ -0,0 +1,1757 @@ +/* IEEE-754 single-precision functions for Xtensa + Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. + Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifdef __XTENSA_EB__ +#define xh a2 +#define xl a3 +#define yh a4 +#define yl a5 +#else +#define xh a3 +#define xl a2 +#define yh a5 +#define yl a4 +#endif + +/* Warning! The branch displacements for some Xtensa branch instructions + are quite small, and this code has been carefully laid out to keep + branch targets in range. If you change anything, be sure to check that + the assembler is not relaxing anything to branch over a jump. */ + +#ifdef L_negsf2 + + .align 4 + .global __negsf2 + .type __negsf2, @function +__negsf2: + leaf_entry sp, 16 + movi a4, 0x80000000 + xor a2, a2, a4 + leaf_return + +#endif /* L_negsf2 */ + +#ifdef L_addsubsf3 + + /* Addition */ +__addsf3_aux: + + /* Handle NaNs and Infinities. (This code is placed before the + start of the function just to keep it in range of the limited + branch displacements.) */ + +.Ladd_xnan_or_inf: + /* If y is neither Infinity nor NaN, return x. */ + bnall a3, a6, 1f + /* If x is a NaN, return it. Otherwise, return y. */ + slli a7, a2, 9 + beqz a7, .Ladd_ynan_or_inf +1: leaf_return + +.Ladd_ynan_or_inf: + /* Return y. */ + mov a2, a3 + leaf_return + +.Ladd_opposite_signs: + /* Operand signs differ. Do a subtraction. */ + slli a7, a6, 8 + xor a3, a3, a7 + j .Lsub_same_sign + + .align 4 + .global __addsf3 + .type __addsf3, @function +__addsf3: + leaf_entry sp, 16 + movi a6, 0x7f800000 + + /* Check if the two operands have the same sign. */ + xor a7, a2, a3 + bltz a7, .Ladd_opposite_signs + +.Ladd_same_sign: + /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ + ball a2, a6, .Ladd_xnan_or_inf + ball a3, a6, .Ladd_ynan_or_inf + + /* Compare the exponents. The smaller operand will be shifted + right by the exponent difference and added to the larger + one. */ + extui a7, a2, 23, 9 + extui a8, a3, 23, 9 + bltu a7, a8, .Ladd_shiftx + +.Ladd_shifty: + /* Check if the smaller (or equal) exponent is zero. */ + bnone a3, a6, .Ladd_yexpzero + + /* Replace y sign/exponent with 0x008. */ + or a3, a3, a6 + slli a3, a3, 8 + srli a3, a3, 8 + +.Ladd_yexpdiff: + /* Compute the exponent difference. */ + sub a10, a7, a8 + + /* Exponent difference > 32 -- just return the bigger value. */ + bgeui a10, 32, 1f + + /* Shift y right by the exponent difference. Any bits that are + shifted out of y are saved in a9 for rounding the result. */ + ssr a10 + movi a9, 0 + src a9, a3, a9 + srl a3, a3 + + /* Do the addition. */ + add a2, a2, a3 + + /* Check if the add overflowed into the exponent. */ + extui a10, a2, 23, 9 + beq a10, a7, .Ladd_round + mov a8, a7 + j .Ladd_carry + +.Ladd_yexpzero: + /* y is a subnormal value. Replace its sign/exponent with zero, + i.e., no implicit "1.0", and increment the apparent exponent + because subnormals behave as if they had the minimum (nonzero) + exponent. Test for the case when both exponents are zero. */ + slli a3, a3, 9 + srli a3, a3, 9 + bnone a2, a6, .Ladd_bothexpzero + addi a8, a8, 1 + j .Ladd_yexpdiff + +.Ladd_bothexpzero: + /* Both exponents are zero. Handle this as a special case. There + is no need to shift or round, and the normal code for handling + a carry into the exponent field will not work because it + assumes there is an implicit "1.0" that needs to be added. */ + add a2, a2, a3 +1: leaf_return + +.Ladd_xexpzero: + /* Same as "yexpzero" except skip handling the case when both + exponents are zero. */ + slli a2, a2, 9 + srli a2, a2, 9 + addi a7, a7, 1 + j .Ladd_xexpdiff + +.Ladd_shiftx: + /* Same thing as the "shifty" code, but with x and y swapped. Also, + because the exponent difference is always nonzero in this version, + the shift sequence can use SLL and skip loading a constant zero. */ + bnone a2, a6, .Ladd_xexpzero + + or a2, a2, a6 + slli a2, a2, 8 + srli a2, a2, 8 + +.Ladd_xexpdiff: + sub a10, a8, a7 + bgeui a10, 32, .Ladd_returny + + ssr a10 + sll a9, a2 + srl a2, a2 + + add a2, a2, a3 + + /* Check if the add overflowed into the exponent. */ + extui a10, a2, 23, 9 + bne a10, a8, .Ladd_carry + +.Ladd_round: + /* Round up if the leftover fraction is >= 1/2. */ + bgez a9, 1f + addi a2, a2, 1 + + /* Check if the leftover fraction is exactly 1/2. */ + slli a9, a9, 1 + beqz a9, .Ladd_exactlyhalf +1: leaf_return + +.Ladd_returny: + mov a2, a3 + leaf_return + +.Ladd_carry: + /* The addition has overflowed into the exponent field, so the + value needs to be renormalized. The mantissa of the result + can be recovered by subtracting the original exponent and + adding 0x800000 (which is the explicit "1.0" for the + mantissa of the non-shifted operand -- the "1.0" for the + shifted operand was already added). The mantissa can then + be shifted right by one bit. The explicit "1.0" of the + shifted mantissa then needs to be replaced by the exponent, + incremented by one to account for the normalizing shift. + It is faster to combine these operations: do the shift first + and combine the additions and subtractions. If x is the + original exponent, the result is: + shifted mantissa - (x << 22) + (1 << 22) + (x << 23) + or: + shifted mantissa + ((x + 1) << 22) + Note that the exponent is incremented here by leaving the + explicit "1.0" of the mantissa in the exponent field. */ + + /* Shift x right by one bit. Save the lsb. */ + mov a10, a2 + srli a2, a2, 1 + + /* See explanation above. The original exponent is in a8. */ + addi a8, a8, 1 + slli a8, a8, 22 + add a2, a2, a8 + + /* Return an Infinity if the exponent overflowed. */ + ball a2, a6, .Ladd_infinity + + /* Same thing as the "round" code except the msb of the leftover + fraction is bit 0 of a10, with the rest of the fraction in a9. */ + bbci.l a10, 0, 1f + addi a2, a2, 1 + beqz a9, .Ladd_exactlyhalf +1: leaf_return + +.Ladd_infinity: + /* Clear the mantissa. */ + srli a2, a2, 23 + slli a2, a2, 23 + + /* The sign bit may have been lost in a carry-out. Put it back. */ + slli a8, a8, 1 + or a2, a2, a8 + leaf_return + +.Ladd_exactlyhalf: + /* Round down to the nearest even value. */ + srli a2, a2, 1 + slli a2, a2, 1 + leaf_return + + + /* Subtraction */ +__subsf3_aux: + + /* Handle NaNs and Infinities. (This code is placed before the + start of the function just to keep it in range of the limited + branch displacements.) */ + +.Lsub_xnan_or_inf: + /* If y is neither Infinity nor NaN, return x. */ + bnall a3, a6, 1f + /* Both x and y are either NaN or Inf, so the result is NaN. */ + movi a4, 0x400000 /* make it a quiet NaN */ + or a2, a2, a4 +1: leaf_return + +.Lsub_ynan_or_inf: + /* Negate y and return it. */ + slli a7, a6, 8 + xor a2, a3, a7 + leaf_return + +.Lsub_opposite_signs: + /* Operand signs differ. Do an addition. */ + slli a7, a6, 8 + xor a3, a3, a7 + j .Ladd_same_sign + + .align 4 + .global __subsf3 + .type __subsf3, @function +__subsf3: + leaf_entry sp, 16 + movi a6, 0x7f800000 + + /* Check if the two operands have the same sign. */ + xor a7, a2, a3 + bltz a7, .Lsub_opposite_signs + +.Lsub_same_sign: + /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ + ball a2, a6, .Lsub_xnan_or_inf + ball a3, a6, .Lsub_ynan_or_inf + + /* Compare the operands. In contrast to addition, the entire + value matters here. */ + extui a7, a2, 23, 8 + extui a8, a3, 23, 8 + bltu a2, a3, .Lsub_xsmaller + +.Lsub_ysmaller: + /* Check if the smaller (or equal) exponent is zero. */ + bnone a3, a6, .Lsub_yexpzero + + /* Replace y sign/exponent with 0x008. */ + or a3, a3, a6 + slli a3, a3, 8 + srli a3, a3, 8 + +.Lsub_yexpdiff: + /* Compute the exponent difference. */ + sub a10, a7, a8 + + /* Exponent difference > 32 -- just return the bigger value. */ + bgeui a10, 32, 1f + + /* Shift y right by the exponent difference. Any bits that are + shifted out of y are saved in a9 for rounding the result. */ + ssr a10 + movi a9, 0 + src a9, a3, a9 + srl a3, a3 + + sub a2, a2, a3 + + /* Subtract the leftover bits in a9 from zero and propagate any + borrow from a2. */ + neg a9, a9 + addi a10, a2, -1 + movnez a2, a10, a9 + + /* Check if the subtract underflowed into the exponent. */ + extui a10, a2, 23, 8 + beq a10, a7, .Lsub_round + j .Lsub_borrow + +.Lsub_yexpzero: + /* Return zero if the inputs are equal. (For the non-subnormal + case, subtracting the "1.0" will cause a borrow from the exponent + and this case can be detected when handling the borrow.) */ + beq a2, a3, .Lsub_return_zero + + /* y is a subnormal value. Replace its sign/exponent with zero, + i.e., no implicit "1.0". Unless x is also a subnormal, increment + y's apparent exponent because subnormals behave as if they had + the minimum (nonzero) exponent. */ + slli a3, a3, 9 + srli a3, a3, 9 + bnone a2, a6, .Lsub_yexpdiff + addi a8, a8, 1 + j .Lsub_yexpdiff + +.Lsub_returny: + /* Negate and return y. */ + slli a7, a6, 8 + xor a2, a3, a7 +1: leaf_return + +.Lsub_xsmaller: + /* Same thing as the "ysmaller" code, but with x and y swapped and + with y negated. */ + bnone a2, a6, .Lsub_xexpzero + + or a2, a2, a6 + slli a2, a2, 8 + srli a2, a2, 8 + +.Lsub_xexpdiff: + sub a10, a8, a7 + bgeui a10, 32, .Lsub_returny + + ssr a10 + movi a9, 0 + src a9, a2, a9 + srl a2, a2 + + /* Negate y. */ + slli a11, a6, 8 + xor a3, a3, a11 + + sub a2, a3, a2 + + neg a9, a9 + addi a10, a2, -1 + movnez a2, a10, a9 + + /* Check if the subtract underflowed into the exponent. */ + extui a10, a2, 23, 8 + bne a10, a8, .Lsub_borrow + +.Lsub_round: + /* Round up if the leftover fraction is >= 1/2. */ + bgez a9, 1f + addi a2, a2, 1 + + /* Check if the leftover fraction is exactly 1/2. */ + slli a9, a9, 1 + beqz a9, .Lsub_exactlyhalf +1: leaf_return + +.Lsub_xexpzero: + /* Same as "yexpzero". */ + beq a2, a3, .Lsub_return_zero + slli a2, a2, 9 + srli a2, a2, 9 + bnone a3, a6, .Lsub_xexpdiff + addi a7, a7, 1 + j .Lsub_xexpdiff + +.Lsub_return_zero: + movi a2, 0 + leaf_return + +.Lsub_borrow: + /* The subtraction has underflowed into the exponent field, so the + value needs to be renormalized. Shift the mantissa left as + needed to remove any leading zeros and adjust the exponent + accordingly. If the exponent is not large enough to remove + all the leading zeros, the result will be a subnormal value. */ + + slli a8, a2, 9 + beqz a8, .Lsub_xzero + do_nsau a6, a8, a7, a11 + srli a8, a8, 9 + bge a6, a10, .Lsub_subnormal + addi a6, a6, 1 + +.Lsub_normalize_shift: + /* Shift the mantissa (a8/a9) left by a6. */ + ssl a6 + src a8, a8, a9 + sll a9, a9 + + /* Combine the shifted mantissa with the sign and exponent, + decrementing the exponent by a6. (The exponent has already + been decremented by one due to the borrow from the subtraction, + but adding the mantissa will increment the exponent by one.) */ + srli a2, a2, 23 + sub a2, a2, a6 + slli a2, a2, 23 + add a2, a2, a8 + j .Lsub_round + +.Lsub_exactlyhalf: + /* Round down to the nearest even value. */ + srli a2, a2, 1 + slli a2, a2, 1 + leaf_return + +.Lsub_xzero: + /* If there was a borrow from the exponent, and the mantissa and + guard digits are all zero, then the inputs were equal and the + result should be zero. */ + beqz a9, .Lsub_return_zero + + /* Only the guard digit is nonzero. Shift by min(24, a10). */ + addi a11, a10, -24 + movi a6, 24 + movltz a6, a10, a11 + j .Lsub_normalize_shift + +.Lsub_subnormal: + /* The exponent is too small to shift away all the leading zeros. + Set a6 to the current exponent (which has already been + decremented by the borrow) so that the exponent of the result + will be zero. Do not add 1 to a6 in this case, because: (1) + adding the mantissa will not increment the exponent, so there is + no need to subtract anything extra from the exponent to + compensate, and (2) the effective exponent of a subnormal is 1 + not 0 so the shift amount must be 1 smaller than normal. */ + mov a6, a10 + j .Lsub_normalize_shift + +#endif /* L_addsubsf3 */ + +#ifdef L_mulsf3 + + /* Multiplication */ +#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#define XCHAL_NO_MUL 1 +#endif + +__mulsf3_aux: + + /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). + (This code is placed before the start of the function just to + keep it in range of the limited branch displacements.) */ + +.Lmul_xexpzero: + /* Clear the sign bit of x. */ + slli a2, a2, 1 + srli a2, a2, 1 + + /* If x is zero, return zero. */ + beqz a2, .Lmul_return_zero + + /* Normalize x. Adjust the exponent in a8. */ + do_nsau a10, a2, a11, a12 + addi a10, a10, -8 + ssl a10 + sll a2, a2 + movi a8, 1 + sub a8, a8, a10 + j .Lmul_xnormalized + +.Lmul_yexpzero: + /* Clear the sign bit of y. */ + slli a3, a3, 1 + srli a3, a3, 1 + + /* If y is zero, return zero. */ + beqz a3, .Lmul_return_zero + + /* Normalize y. Adjust the exponent in a9. */ + do_nsau a10, a3, a11, a12 + addi a10, a10, -8 + ssl a10 + sll a3, a3 + movi a9, 1 + sub a9, a9, a10 + j .Lmul_ynormalized + +.Lmul_return_zero: + /* Return zero with the appropriate sign bit. */ + srli a2, a7, 31 + slli a2, a2, 31 + j .Lmul_done + +.Lmul_xnan_or_inf: + /* If y is zero, return NaN. */ + slli a8, a3, 1 + bnez a8, 1f + movi a4, 0x400000 /* make it a quiet NaN */ + or a2, a2, a4 + j .Lmul_done +1: + /* If y is NaN, return y. */ + bnall a3, a6, .Lmul_returnx + slli a8, a3, 9 + beqz a8, .Lmul_returnx + +.Lmul_returny: + mov a2, a3 + +.Lmul_returnx: + /* Set the sign bit and return. */ + extui a7, a7, 31, 1 + slli a2, a2, 1 + ssai 1 + src a2, a7, a2 + j .Lmul_done + +.Lmul_ynan_or_inf: + /* If x is zero, return NaN. */ + slli a8, a2, 1 + bnez a8, .Lmul_returny + movi a7, 0x400000 /* make it a quiet NaN */ + or a2, a3, a7 + j .Lmul_done + + .align 4 + .global __mulsf3 + .type __mulsf3, @function +__mulsf3: +#if __XTENSA_CALL0_ABI__ + leaf_entry sp, 32 + addi sp, sp, -32 + s32i a12, sp, 16 + s32i a13, sp, 20 + s32i a14, sp, 24 + s32i a15, sp, 28 +#elif XCHAL_NO_MUL + /* This is not really a leaf function; allocate enough stack space + to allow CALL12s to a helper function. */ + leaf_entry sp, 64 +#else + leaf_entry sp, 32 +#endif + movi a6, 0x7f800000 + + /* Get the sign of the result. */ + xor a7, a2, a3 + + /* Check for NaN and infinity. */ + ball a2, a6, .Lmul_xnan_or_inf + ball a3, a6, .Lmul_ynan_or_inf + + /* Extract the exponents. */ + extui a8, a2, 23, 8 + extui a9, a3, 23, 8 + + beqz a8, .Lmul_xexpzero +.Lmul_xnormalized: + beqz a9, .Lmul_yexpzero +.Lmul_ynormalized: + + /* Add the exponents. */ + add a8, a8, a9 + + /* Replace sign/exponent fields with explicit "1.0". */ + movi a10, 0xffffff + or a2, a2, a6 + and a2, a2, a10 + or a3, a3, a6 + and a3, a3, a10 + + /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */ + +#if XCHAL_HAVE_MUL32_HIGH + + mull a6, a2, a3 + muluh a2, a2, a3 + +#else + + /* Break the inputs into 16-bit chunks and compute 4 32-bit partial + products. These partial products are: + + 0 xl * yl + + 1 xl * yh + 2 xh * yl + + 3 xh * yh + + If using the Mul16 or Mul32 multiplier options, these input + chunks must be stored in separate registers. For Mac16, the + UMUL.AA.* opcodes can specify that the inputs come from either + half of the registers, so there is no need to shift them out + ahead of time. If there is no multiply hardware, the 16-bit + chunks can be extracted when setting up the arguments to the + separate multiply function. */ + +#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL + /* Calling a separate multiply function will clobber a0 and requires + use of a8 as a temporary, so save those values now. (The function + uses a custom ABI so nothing else needs to be saved.) */ + s32i a0, sp, 0 + s32i a8, sp, 4 +#endif + +#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 + +#define a2h a4 +#define a3h a5 + + /* Get the high halves of the inputs into registers. */ + srli a2h, a2, 16 + srli a3h, a3, 16 + +#define a2l a2 +#define a3l a3 + +#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 + /* Clear the high halves of the inputs. This does not matter + for MUL16 because the high bits are ignored. */ + extui a2, a2, 0, 16 + extui a3, a3, 0, 16 +#endif +#endif /* MUL16 || MUL32 */ + + +#if XCHAL_HAVE_MUL16 + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + mul16u dst, xreg ## xhalf, yreg ## yhalf + +#elif XCHAL_HAVE_MUL32 + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + mull dst, xreg ## xhalf, yreg ## yhalf + +#elif XCHAL_HAVE_MAC16 + +/* The preprocessor insists on inserting a space when concatenating after + a period in the definition of do_mul below. These macros are a workaround + using underscores instead of periods when doing the concatenation. */ +#define umul_aa_ll umul.aa.ll +#define umul_aa_lh umul.aa.lh +#define umul_aa_hl umul.aa.hl +#define umul_aa_hh umul.aa.hh + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + umul_aa_ ## xhalf ## yhalf xreg, yreg; \ + rsr dst, ACCLO + +#else /* no multiply hardware */ + +#define set_arg_l(dst, src) \ + extui dst, src, 0, 16 +#define set_arg_h(dst, src) \ + srli dst, src, 16 + +#if __XTENSA_CALL0_ABI__ +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + set_arg_ ## xhalf (a13, xreg); \ + set_arg_ ## yhalf (a14, yreg); \ + call0 .Lmul_mulsi3; \ + mov dst, a12 +#else +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + set_arg_ ## xhalf (a14, xreg); \ + set_arg_ ## yhalf (a15, yreg); \ + call12 .Lmul_mulsi3; \ + mov dst, a14 +#endif /* __XTENSA_CALL0_ABI__ */ + +#endif /* no multiply hardware */ + + /* Add pp1 and pp2 into a6 with carry-out in a9. */ + do_mul(a6, a2, l, a3, h) /* pp 1 */ + do_mul(a11, a2, h, a3, l) /* pp 2 */ + movi a9, 0 + add a6, a6, a11 + bgeu a6, a11, 1f + addi a9, a9, 1 +1: + /* Shift the high half of a9/a6 into position in a9. Note that + this value can be safely incremented without any carry-outs. */ + ssai 16 + src a9, a9, a6 + + /* Compute the low word into a6. */ + do_mul(a11, a2, l, a3, l) /* pp 0 */ + sll a6, a6 + add a6, a6, a11 + bgeu a6, a11, 1f + addi a9, a9, 1 +1: + /* Compute the high word into a2. */ + do_mul(a2, a2, h, a3, h) /* pp 3 */ + add a2, a2, a9 + +#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL + /* Restore values saved on the stack during the multiplication. */ + l32i a0, sp, 0 + l32i a8, sp, 4 +#endif +#endif /* ! XCHAL_HAVE_MUL32_HIGH */ + + /* Shift left by 9 bits, unless there was a carry-out from the + multiply, in which case, shift by 8 bits and increment the + exponent. */ + movi a4, 9 + srli a5, a2, 24 - 9 + beqz a5, 1f + addi a4, a4, -1 + addi a8, a8, 1 +1: ssl a4 + src a2, a2, a6 + sll a6, a6 + + /* Subtract the extra bias from the exponent sum (plus one to account + for the explicit "1.0" of the mantissa that will be added to the + exponent in the final result). */ + movi a4, 0x80 + sub a8, a8, a4 + + /* Check for over/underflow. The value in a8 is one less than the + final exponent, so values in the range 0..fd are OK here. */ + movi a4, 0xfe + bgeu a8, a4, .Lmul_overflow + +.Lmul_round: + /* Round. */ + bgez a6, .Lmul_rounded + addi a2, a2, 1 + slli a6, a6, 1 + beqz a6, .Lmul_exactlyhalf + +.Lmul_rounded: + /* Add the exponent to the mantissa. */ + slli a8, a8, 23 + add a2, a2, a8 + +.Lmul_addsign: + /* Add the sign bit. */ + srli a7, a7, 31 + slli a7, a7, 31 + or a2, a2, a7 + +.Lmul_done: +#if __XTENSA_CALL0_ABI__ + l32i a12, sp, 16 + l32i a13, sp, 20 + l32i a14, sp, 24 + l32i a15, sp, 28 + addi sp, sp, 32 +#endif + leaf_return + +.Lmul_exactlyhalf: + /* Round down to the nearest even value. */ + srli a2, a2, 1 + slli a2, a2, 1 + j .Lmul_rounded + +.Lmul_overflow: + bltz a8, .Lmul_underflow + /* Return +/- Infinity. */ + movi a8, 0xff + slli a2, a8, 23 + j .Lmul_addsign + +.Lmul_underflow: + /* Create a subnormal value, where the exponent field contains zero, + but the effective exponent is 1. The value of a8 is one less than + the actual exponent, so just negate it to get the shift amount. */ + neg a8, a8 + mov a9, a6 + ssr a8 + bgeui a8, 32, .Lmul_flush_to_zero + + /* Shift a2 right. Any bits that are shifted out of a2 are saved + in a6 (combined with the shifted-out bits currently in a6) for + rounding the result. */ + sll a6, a2 + srl a2, a2 + + /* Set the exponent to zero. */ + movi a8, 0 + + /* Pack any nonzero bits shifted out into a6. */ + beqz a9, .Lmul_round + movi a9, 1 + or a6, a6, a9 + j .Lmul_round + +.Lmul_flush_to_zero: + /* Return zero with the appropriate sign bit. */ + srli a2, a7, 31 + slli a2, a2, 31 + j .Lmul_done + +#if XCHAL_NO_MUL + + /* For Xtensa processors with no multiply hardware, this simplified + version of _mulsi3 is used for multiplying 16-bit chunks of + the floating-point mantissas. When using CALL0, this function + uses a custom ABI: the inputs are passed in a13 and a14, the + result is returned in a12, and a8 and a15 are clobbered. */ + .align 4 +.Lmul_mulsi3: + leaf_entry sp, 16 + .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 + movi \dst, 0 +1: add \tmp1, \src2, \dst + extui \tmp2, \src1, 0, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx2 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 1, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx4 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 2, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx8 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 3, 1 + movnez \dst, \tmp1, \tmp2 + + srli \src1, \src1, 4 + slli \src2, \src2, 4 + bnez \src1, 1b + .endm +#if __XTENSA_CALL0_ABI__ + mul_mulsi3_body a12, a13, a14, a15, a8 +#else + /* The result will be written into a2, so save that argument in a4. */ + mov a4, a2 + mul_mulsi3_body a2, a4, a3, a5, a6 +#endif + leaf_return +#endif /* XCHAL_NO_MUL */ +#endif /* L_mulsf3 */ + +#ifdef L_divsf3 + + /* Division */ +__divsf3_aux: + + /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). + (This code is placed before the start of the function just to + keep it in range of the limited branch displacements.) */ + +.Ldiv_yexpzero: + /* Clear the sign bit of y. */ + slli a3, a3, 1 + srli a3, a3, 1 + + /* Check for division by zero. */ + beqz a3, .Ldiv_yzero + + /* Normalize y. Adjust the exponent in a9. */ + do_nsau a10, a3, a4, a5 + addi a10, a10, -8 + ssl a10 + sll a3, a3 + movi a9, 1 + sub a9, a9, a10 + j .Ldiv_ynormalized + +.Ldiv_yzero: + /* y is zero. Return NaN if x is also zero; otherwise, infinity. */ + slli a4, a2, 1 + srli a4, a4, 1 + srli a2, a7, 31 + slli a2, a2, 31 + or a2, a2, a6 + bnez a4, 1f + movi a4, 0x400000 /* make it a quiet NaN */ + or a2, a2, a4 +1: leaf_return + +.Ldiv_xexpzero: + /* Clear the sign bit of x. */ + slli a2, a2, 1 + srli a2, a2, 1 + + /* If x is zero, return zero. */ + beqz a2, .Ldiv_return_zero + + /* Normalize x. Adjust the exponent in a8. */ + do_nsau a10, a2, a4, a5 + addi a10, a10, -8 + ssl a10 + sll a2, a2 + movi a8, 1 + sub a8, a8, a10 + j .Ldiv_xnormalized + +.Ldiv_return_zero: + /* Return zero with the appropriate sign bit. */ + srli a2, a7, 31 + slli a2, a2, 31 + leaf_return + +.Ldiv_xnan_or_inf: + /* Set the sign bit of the result. */ + srli a7, a3, 31 + slli a7, a7, 31 + xor a2, a2, a7 + /* If y is NaN or Inf, return NaN. */ + bnall a3, a6, 1f + movi a4, 0x400000 /* make it a quiet NaN */ + or a2, a2, a4 +1: leaf_return + +.Ldiv_ynan_or_inf: + /* If y is Infinity, return zero. */ + slli a8, a3, 9 + beqz a8, .Ldiv_return_zero + /* y is NaN; return it. */ + mov a2, a3 + leaf_return + + .align 4 + .global __divsf3 + .type __divsf3, @function +__divsf3: + leaf_entry sp, 16 + movi a6, 0x7f800000 + + /* Get the sign of the result. */ + xor a7, a2, a3 + + /* Check for NaN and infinity. */ + ball a2, a6, .Ldiv_xnan_or_inf + ball a3, a6, .Ldiv_ynan_or_inf + + /* Extract the exponents. */ + extui a8, a2, 23, 8 + extui a9, a3, 23, 8 + + beqz a9, .Ldiv_yexpzero +.Ldiv_ynormalized: + beqz a8, .Ldiv_xexpzero +.Ldiv_xnormalized: + + /* Subtract the exponents. */ + sub a8, a8, a9 + + /* Replace sign/exponent fields with explicit "1.0". */ + movi a10, 0xffffff + or a2, a2, a6 + and a2, a2, a10 + or a3, a3, a6 + and a3, a3, a10 + + /* The first digit of the mantissa division must be a one. + Shift x (and adjust the exponent) as needed to make this true. */ + bltu a3, a2, 1f + slli a2, a2, 1 + addi a8, a8, -1 +1: + /* Do the first subtraction and shift. */ + sub a2, a2, a3 + slli a2, a2, 1 + + /* Put the quotient into a10. */ + movi a10, 1 + + /* Divide one bit at a time for 23 bits. */ + movi a9, 23 +#if XCHAL_HAVE_LOOPS + loop a9, .Ldiv_loopend +#endif +.Ldiv_loop: + /* Shift the quotient << 1. */ + slli a10, a10, 1 + + /* Is this digit a 0 or 1? */ + bltu a2, a3, 1f + + /* Output a 1 and subtract. */ + addi a10, a10, 1 + sub a2, a2, a3 + + /* Shift the dividend << 1. */ +1: slli a2, a2, 1 + +#if !XCHAL_HAVE_LOOPS + addi a9, a9, -1 + bnez a9, .Ldiv_loop +#endif +.Ldiv_loopend: + + /* Add the exponent bias (less one to account for the explicit "1.0" + of the mantissa that will be added to the exponent in the final + result). */ + addi a8, a8, 0x7e + + /* Check for over/underflow. The value in a8 is one less than the + final exponent, so values in the range 0..fd are OK here. */ + movi a4, 0xfe + bgeu a8, a4, .Ldiv_overflow + +.Ldiv_round: + /* Round. The remainder (<< 1) is in a2. */ + bltu a2, a3, .Ldiv_rounded + addi a10, a10, 1 + beq a2, a3, .Ldiv_exactlyhalf + +.Ldiv_rounded: + /* Add the exponent to the mantissa. */ + slli a8, a8, 23 + add a2, a10, a8 + +.Ldiv_addsign: + /* Add the sign bit. */ + srli a7, a7, 31 + slli a7, a7, 31 + or a2, a2, a7 + leaf_return + +.Ldiv_overflow: + bltz a8, .Ldiv_underflow + /* Return +/- Infinity. */ + addi a8, a4, 1 /* 0xff */ + slli a2, a8, 23 + j .Ldiv_addsign + +.Ldiv_exactlyhalf: + /* Remainder is exactly half the divisor. Round even. */ + srli a10, a10, 1 + slli a10, a10, 1 + j .Ldiv_rounded + +.Ldiv_underflow: + /* Create a subnormal value, where the exponent field contains zero, + but the effective exponent is 1. The value of a8 is one less than + the actual exponent, so just negate it to get the shift amount. */ + neg a8, a8 + ssr a8 + bgeui a8, 32, .Ldiv_flush_to_zero + + /* Shift a10 right. Any bits that are shifted out of a10 are + saved in a6 for rounding the result. */ + sll a6, a10 + srl a10, a10 + + /* Set the exponent to zero. */ + movi a8, 0 + + /* Pack any nonzero remainder (in a2) into a6. */ + beqz a2, 1f + movi a9, 1 + or a6, a6, a9 + + /* Round a10 based on the bits shifted out into a6. */ +1: bgez a6, .Ldiv_rounded + addi a10, a10, 1 + slli a6, a6, 1 + bnez a6, .Ldiv_rounded + srli a10, a10, 1 + slli a10, a10, 1 + j .Ldiv_rounded + +.Ldiv_flush_to_zero: + /* Return zero with the appropriate sign bit. */ + srli a2, a7, 31 + slli a2, a2, 31 + leaf_return + +#endif /* L_divsf3 */ + +#ifdef L_cmpsf2 + + /* Equal and Not Equal */ + + .align 4 + .global __eqsf2 + .global __nesf2 + .set __nesf2, __eqsf2 + .type __eqsf2, @function +__eqsf2: + leaf_entry sp, 16 + bne a2, a3, 4f + + /* The values are equal but NaN != NaN. Check the exponent. */ + movi a6, 0x7f800000 + ball a2, a6, 3f + + /* Equal. */ + movi a2, 0 + leaf_return + + /* Not equal. */ +2: movi a2, 1 + leaf_return + + /* Check if the mantissas are nonzero. */ +3: slli a7, a2, 9 + j 5f + + /* Check if x and y are zero with different signs. */ +4: or a7, a2, a3 + slli a7, a7, 1 + + /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa + or x when exponent(x) = 0x7f8 and x == y. */ +5: movi a2, 0 + movi a3, 1 + movnez a2, a3, a7 + leaf_return + + + /* Greater Than */ + + .align 4 + .global __gtsf2 + .type __gtsf2, @function +__gtsf2: + leaf_entry sp, 16 + movi a6, 0x7f800000 + ball a2, a6, 2f +1: bnall a3, a6, .Lle_cmp + + /* Check if y is a NaN. */ + slli a7, a3, 9 + beqz a7, .Lle_cmp + movi a2, 0 + leaf_return + + /* Check if x is a NaN. */ +2: slli a7, a2, 9 + beqz a7, 1b + movi a2, 0 + leaf_return + + + /* Less Than or Equal */ + + .align 4 + .global __lesf2 + .type __lesf2, @function +__lesf2: + leaf_entry sp, 16 + movi a6, 0x7f800000 + ball a2, a6, 2f +1: bnall a3, a6, .Lle_cmp + + /* Check if y is a NaN. */ + slli a7, a3, 9 + beqz a7, .Lle_cmp + movi a2, 1 + leaf_return + + /* Check if x is a NaN. */ +2: slli a7, a2, 9 + beqz a7, 1b + movi a2, 1 + leaf_return + +.Lle_cmp: + /* Check if x and y have different signs. */ + xor a7, a2, a3 + bltz a7, .Lle_diff_signs + + /* Check if x is negative. */ + bltz a2, .Lle_xneg + + /* Check if x <= y. */ + bltu a3, a2, 5f +4: movi a2, 0 + leaf_return + +.Lle_xneg: + /* Check if y <= x. */ + bgeu a2, a3, 4b +5: movi a2, 1 + leaf_return + +.Lle_diff_signs: + bltz a2, 4b + + /* Check if both x and y are zero. */ + or a7, a2, a3 + slli a7, a7, 1 + movi a2, 1 + movi a3, 0 + moveqz a2, a3, a7 + leaf_return + + + /* Greater Than or Equal */ + + .align 4 + .global __gesf2 + .type __gesf2, @function +__gesf2: + leaf_entry sp, 16 + movi a6, 0x7f800000 + ball a2, a6, 2f +1: bnall a3, a6, .Llt_cmp + + /* Check if y is a NaN. */ + slli a7, a3, 9 + beqz a7, .Llt_cmp + movi a2, -1 + leaf_return + + /* Check if x is a NaN. */ +2: slli a7, a2, 9 + beqz a7, 1b + movi a2, -1 + leaf_return + + + /* Less Than */ + + .align 4 + .global __ltsf2 + .type __ltsf2, @function +__ltsf2: + leaf_entry sp, 16 + movi a6, 0x7f800000 + ball a2, a6, 2f +1: bnall a3, a6, .Llt_cmp + + /* Check if y is a NaN. */ + slli a7, a3, 9 + beqz a7, .Llt_cmp + movi a2, 0 + leaf_return + + /* Check if x is a NaN. */ +2: slli a7, a2, 9 + beqz a7, 1b + movi a2, 0 + leaf_return + +.Llt_cmp: + /* Check if x and y have different signs. */ + xor a7, a2, a3 + bltz a7, .Llt_diff_signs + + /* Check if x is negative. */ + bltz a2, .Llt_xneg + + /* Check if x < y. */ + bgeu a2, a3, 5f +4: movi a2, -1 + leaf_return + +.Llt_xneg: + /* Check if y < x. */ + bltu a3, a2, 4b +5: movi a2, 0 + leaf_return + +.Llt_diff_signs: + bgez a2, 5b + + /* Check if both x and y are nonzero. */ + or a7, a2, a3 + slli a7, a7, 1 + movi a2, 0 + movi a3, -1 + movnez a2, a3, a7 + leaf_return + + + /* Unordered */ + + .align 4 + .global __unordsf2 + .type __unordsf2, @function +__unordsf2: + leaf_entry sp, 16 + movi a6, 0x7f800000 + ball a2, a6, 3f +1: ball a3, a6, 4f +2: movi a2, 0 + leaf_return + +3: slli a7, a2, 9 + beqz a7, 1b + movi a2, 1 + leaf_return + +4: slli a7, a3, 9 + beqz a7, 2b + movi a2, 1 + leaf_return + +#endif /* L_cmpsf2 */ + +#ifdef L_fixsfsi + + .align 4 + .global __fixsfsi + .type __fixsfsi, @function +__fixsfsi: + leaf_entry sp, 16 + + /* Check for NaN and Infinity. */ + movi a6, 0x7f800000 + ball a2, a6, .Lfixsfsi_nan_or_inf + + /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */ + extui a4, a2, 23, 8 + addi a4, a4, -0x7e + bgei a4, 32, .Lfixsfsi_maxint + blti a4, 1, .Lfixsfsi_zero + + /* Add explicit "1.0" and shift << 8. */ + or a7, a2, a6 + slli a5, a7, 8 + + /* Shift back to the right, based on the exponent. */ + ssl a4 /* shift by 32 - a4 */ + srl a5, a5 + + /* Negate the result if sign != 0. */ + neg a2, a5 + movgez a2, a5, a7 + leaf_return + +.Lfixsfsi_nan_or_inf: + /* Handle Infinity and NaN. */ + slli a4, a2, 9 + beqz a4, .Lfixsfsi_maxint + + /* Translate NaN to +maxint. */ + movi a2, 0 + +.Lfixsfsi_maxint: + slli a4, a6, 8 /* 0x80000000 */ + addi a5, a4, -1 /* 0x7fffffff */ + movgez a4, a5, a2 + mov a2, a4 + leaf_return + +.Lfixsfsi_zero: + movi a2, 0 + leaf_return + +#endif /* L_fixsfsi */ + +#ifdef L_fixsfdi + + .align 4 + .global __fixsfdi + .type __fixsfdi, @function +__fixsfdi: + leaf_entry sp, 16 + + /* Check for NaN and Infinity. */ + movi a6, 0x7f800000 + ball a2, a6, .Lfixsfdi_nan_or_inf + + /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */ + extui a4, a2, 23, 8 + addi a4, a4, -0x7e + bgei a4, 64, .Lfixsfdi_maxint + blti a4, 1, .Lfixsfdi_zero + + /* Add explicit "1.0" and shift << 8. */ + or a7, a2, a6 + slli xh, a7, 8 + + /* Shift back to the right, based on the exponent. */ + ssl a4 /* shift by 64 - a4 */ + bgei a4, 32, .Lfixsfdi_smallshift + srl xl, xh + movi xh, 0 + +.Lfixsfdi_shifted: + /* Negate the result if sign != 0. */ + bgez a7, 1f + neg xl, xl + neg xh, xh + beqz xl, 1f + addi xh, xh, -1 +1: leaf_return + +.Lfixsfdi_smallshift: + movi xl, 0 + sll xl, xh + srl xh, xh + j .Lfixsfdi_shifted + +.Lfixsfdi_nan_or_inf: + /* Handle Infinity and NaN. */ + slli a4, a2, 9 + beqz a4, .Lfixsfdi_maxint + + /* Translate NaN to +maxint. */ + movi a2, 0 + +.Lfixsfdi_maxint: + slli a7, a6, 8 /* 0x80000000 */ + bgez a2, 1f + mov xh, a7 + movi xl, 0 + leaf_return + +1: addi xh, a7, -1 /* 0x7fffffff */ + movi xl, -1 + leaf_return + +.Lfixsfdi_zero: + movi xh, 0 + movi xl, 0 + leaf_return + +#endif /* L_fixsfdi */ + +#ifdef L_fixunssfsi + + .align 4 + .global __fixunssfsi + .type __fixunssfsi, @function +__fixunssfsi: + leaf_entry sp, 16 + + /* Check for NaN and Infinity. */ + movi a6, 0x7f800000 + ball a2, a6, .Lfixunssfsi_nan_or_inf + + /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */ + extui a4, a2, 23, 8 + addi a4, a4, -0x7f + bgei a4, 32, .Lfixunssfsi_maxint + bltz a4, .Lfixunssfsi_zero + + /* Add explicit "1.0" and shift << 8. */ + or a7, a2, a6 + slli a5, a7, 8 + + /* Shift back to the right, based on the exponent. */ + addi a4, a4, 1 + beqi a4, 32, .Lfixunssfsi_bigexp + ssl a4 /* shift by 32 - a4 */ + srl a5, a5 + + /* Negate the result if sign != 0. */ + neg a2, a5 + movgez a2, a5, a7 + leaf_return + +.Lfixunssfsi_nan_or_inf: + /* Handle Infinity and NaN. */ + slli a4, a2, 9 + beqz a4, .Lfixunssfsi_maxint + + /* Translate NaN to 0xffffffff. */ + movi a2, -1 + leaf_return + +.Lfixunssfsi_maxint: + slli a4, a6, 8 /* 0x80000000 */ + movi a5, -1 /* 0xffffffff */ + movgez a4, a5, a2 + mov a2, a4 + leaf_return + +.Lfixunssfsi_zero: + movi a2, 0 + leaf_return + +.Lfixunssfsi_bigexp: + /* Handle unsigned maximum exponent case. */ + bltz a2, 1f + mov a2, a5 /* no shift needed */ + leaf_return + + /* Return 0x80000000 if negative. */ +1: slli a2, a6, 8 + leaf_return + +#endif /* L_fixunssfsi */ + +#ifdef L_fixunssfdi + + .align 4 + .global __fixunssfdi + .type __fixunssfdi, @function +__fixunssfdi: + leaf_entry sp, 16 + + /* Check for NaN and Infinity. */ + movi a6, 0x7f800000 + ball a2, a6, .Lfixunssfdi_nan_or_inf + + /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */ + extui a4, a2, 23, 8 + addi a4, a4, -0x7f + bgei a4, 64, .Lfixunssfdi_maxint + bltz a4, .Lfixunssfdi_zero + + /* Add explicit "1.0" and shift << 8. */ + or a7, a2, a6 + slli xh, a7, 8 + + /* Shift back to the right, based on the exponent. */ + addi a4, a4, 1 + beqi a4, 64, .Lfixunssfdi_bigexp + ssl a4 /* shift by 64 - a4 */ + bgei a4, 32, .Lfixunssfdi_smallshift + srl xl, xh + movi xh, 0 + +.Lfixunssfdi_shifted: + /* Negate the result if sign != 0. */ + bgez a7, 1f + neg xl, xl + neg xh, xh + beqz xl, 1f + addi xh, xh, -1 +1: leaf_return + +.Lfixunssfdi_smallshift: + movi xl, 0 + src xl, xh, xl + srl xh, xh + j .Lfixunssfdi_shifted + +.Lfixunssfdi_nan_or_inf: + /* Handle Infinity and NaN. */ + slli a4, a2, 9 + beqz a4, .Lfixunssfdi_maxint + + /* Translate NaN to 0xffffffff.... */ +1: movi xh, -1 + movi xl, -1 + leaf_return + +.Lfixunssfdi_maxint: + bgez a2, 1b +2: slli xh, a6, 8 /* 0x80000000 */ + movi xl, 0 + leaf_return + +.Lfixunssfdi_zero: + movi xh, 0 + movi xl, 0 + leaf_return + +.Lfixunssfdi_bigexp: + /* Handle unsigned maximum exponent case. */ + bltz a7, 2b + movi xl, 0 + leaf_return /* no shift needed */ + +#endif /* L_fixunssfdi */ + +#ifdef L_floatsisf + + .align 4 + .global __floatunsisf + .type __floatunsisf, @function +__floatunsisf: + leaf_entry sp, 16 + beqz a2, .Lfloatsisf_return + + /* Set the sign to zero and jump to the floatsisf code. */ + movi a7, 0 + j .Lfloatsisf_normalize + + .align 4 + .global __floatsisf + .type __floatsisf, @function +__floatsisf: + leaf_entry sp, 16 + + /* Check for zero. */ + beqz a2, .Lfloatsisf_return + + /* Save the sign. */ + extui a7, a2, 31, 1 + + /* Get the absolute value. */ +#if XCHAL_HAVE_ABS + abs a2, a2 +#else + neg a4, a2 + movltz a2, a4, a2 +#endif + +.Lfloatsisf_normalize: + /* Normalize with the first 1 bit in the msb. */ + do_nsau a4, a2, a5, a6 + ssl a4 + sll a5, a2 + + /* Shift the mantissa into position, with rounding bits in a6. */ + srli a2, a5, 8 + slli a6, a5, (32 - 8) + + /* Set the exponent. */ + movi a5, 0x9d /* 0x7e + 31 */ + sub a5, a5, a4 + slli a5, a5, 23 + add a2, a2, a5 + + /* Add the sign. */ + slli a7, a7, 31 + or a2, a2, a7 + + /* Round up if the leftover fraction is >= 1/2. */ + bgez a6, .Lfloatsisf_return + addi a2, a2, 1 /* Overflow to the exponent is OK. */ + + /* Check if the leftover fraction is exactly 1/2. */ + slli a6, a6, 1 + beqz a6, .Lfloatsisf_exactlyhalf + +.Lfloatsisf_return: + leaf_return + +.Lfloatsisf_exactlyhalf: + /* Round down to the nearest even value. */ + srli a2, a2, 1 + slli a2, a2, 1 + leaf_return + +#endif /* L_floatsisf */ + +#ifdef L_floatdisf + + .align 4 + .global __floatundisf + .type __floatundisf, @function +__floatundisf: + leaf_entry sp, 16 + + /* Check for zero. */ + or a4, xh, xl + beqz a4, 2f + + /* Set the sign to zero and jump to the floatdisf code. */ + movi a7, 0 + j .Lfloatdisf_normalize + + .align 4 + .global __floatdisf + .type __floatdisf, @function +__floatdisf: + leaf_entry sp, 16 + + /* Check for zero. */ + or a4, xh, xl + beqz a4, 2f + + /* Save the sign. */ + extui a7, xh, 31, 1 + + /* Get the absolute value. */ + bgez xh, .Lfloatdisf_normalize + neg xl, xl + neg xh, xh + beqz xl, .Lfloatdisf_normalize + addi xh, xh, -1 + +.Lfloatdisf_normalize: + /* Normalize with the first 1 bit in the msb of xh. */ + beqz xh, .Lfloatdisf_bigshift + do_nsau a4, xh, a5, a6 + ssl a4 + src xh, xh, xl + sll xl, xl + +.Lfloatdisf_shifted: + /* Shift the mantissa into position, with rounding bits in a6. */ + ssai 8 + sll a5, xl + src a6, xh, xl + srl xh, xh + beqz a5, 1f + movi a5, 1 + or a6, a6, a5 +1: + /* Set the exponent. */ + movi a5, 0xbd /* 0x7e + 63 */ + sub a5, a5, a4 + slli a5, a5, 23 + add a2, xh, a5 + + /* Add the sign. */ + slli a7, a7, 31 + or a2, a2, a7 + + /* Round up if the leftover fraction is >= 1/2. */ + bgez a6, 2f + addi a2, a2, 1 /* Overflow to the exponent is OK. */ + + /* Check if the leftover fraction is exactly 1/2. */ + slli a6, a6, 1 + beqz a6, .Lfloatdisf_exactlyhalf +2: leaf_return + +.Lfloatdisf_bigshift: + /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ + do_nsau a4, xl, a5, a6 + ssl a4 + sll xh, xl + movi xl, 0 + addi a4, a4, 32 + j .Lfloatdisf_shifted + +.Lfloatdisf_exactlyhalf: + /* Round down to the nearest even value. */ + srli a2, a2, 1 + slli a2, a2, 1 + leaf_return + +#endif /* L_floatdisf */ diff --git a/libgcc/config/xtensa/lib1funcs.S b/libgcc/config/xtensa/lib1funcs.S new file mode 100644 index 00000000000..071b9171177 --- /dev/null +++ b/libgcc/config/xtensa/lib1funcs.S @@ -0,0 +1,845 @@ +/* Assembly functions for the Xtensa version of libgcc1. + Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009 + Free Software Foundation, Inc. + Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#include "xtensa-config.h" + +/* Define macros for the ABS and ADDX* instructions to handle cases + where they are not included in the Xtensa processor configuration. */ + + .macro do_abs dst, src, tmp +#if XCHAL_HAVE_ABS + abs \dst, \src +#else + neg \tmp, \src + movgez \tmp, \src, \src + mov \dst, \tmp +#endif + .endm + + .macro do_addx2 dst, as, at, tmp +#if XCHAL_HAVE_ADDX + addx2 \dst, \as, \at +#else + slli \tmp, \as, 1 + add \dst, \tmp, \at +#endif + .endm + + .macro do_addx4 dst, as, at, tmp +#if XCHAL_HAVE_ADDX + addx4 \dst, \as, \at +#else + slli \tmp, \as, 2 + add \dst, \tmp, \at +#endif + .endm + + .macro do_addx8 dst, as, at, tmp +#if XCHAL_HAVE_ADDX + addx8 \dst, \as, \at +#else + slli \tmp, \as, 3 + add \dst, \tmp, \at +#endif + .endm + +/* Define macros for leaf function entry and return, supporting either the + standard register windowed ABI or the non-windowed call0 ABI. These + macros do not allocate any extra stack space, so they only work for + leaf functions that do not need to spill anything to the stack. */ + + .macro leaf_entry reg, size +#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ + entry \reg, \size +#else + /* do nothing */ +#endif + .endm + + .macro leaf_return +#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ + retw +#else + ret +#endif + .endm + + +#ifdef L_mulsi3 + .align 4 + .global __mulsi3 + .type __mulsi3, @function +__mulsi3: + leaf_entry sp, 16 + +#if XCHAL_HAVE_MUL32 + mull a2, a2, a3 + +#elif XCHAL_HAVE_MUL16 + or a4, a2, a3 + srai a4, a4, 16 + bnez a4, .LMUL16 + mul16u a2, a2, a3 + leaf_return +.LMUL16: + srai a4, a2, 16 + srai a5, a3, 16 + mul16u a7, a4, a3 + mul16u a6, a5, a2 + mul16u a4, a2, a3 + add a7, a7, a6 + slli a7, a7, 16 + add a2, a7, a4 + +#elif XCHAL_HAVE_MAC16 + mul.aa.hl a2, a3 + mula.aa.lh a2, a3 + rsr a5, ACCLO + umul.aa.ll a2, a3 + rsr a4, ACCLO + slli a5, a5, 16 + add a2, a4, a5 + +#else /* !MUL32 && !MUL16 && !MAC16 */ + + /* Multiply one bit at a time, but unroll the loop 4x to better + exploit the addx instructions and avoid overhead. + Peel the first iteration to save a cycle on init. */ + + /* Avoid negative numbers. */ + xor a5, a2, a3 /* Top bit is 1 if one input is negative. */ + do_abs a3, a3, a6 + do_abs a2, a2, a6 + + /* Swap so the second argument is smaller. */ + sub a7, a2, a3 + mov a4, a3 + movgez a4, a2, a7 /* a4 = max (a2, a3) */ + movltz a3, a2, a7 /* a3 = min (a2, a3) */ + + movi a2, 0 + extui a6, a3, 0, 1 + movnez a2, a4, a6 + + do_addx2 a7, a4, a2, a7 + extui a6, a3, 1, 1 + movnez a2, a7, a6 + + do_addx4 a7, a4, a2, a7 + extui a6, a3, 2, 1 + movnez a2, a7, a6 + + do_addx8 a7, a4, a2, a7 + extui a6, a3, 3, 1 + movnez a2, a7, a6 + + bgeui a3, 16, .Lmult_main_loop + neg a3, a2 + movltz a2, a3, a5 + leaf_return + + .align 4 +.Lmult_main_loop: + srli a3, a3, 4 + slli a4, a4, 4 + + add a7, a4, a2 + extui a6, a3, 0, 1 + movnez a2, a7, a6 + + do_addx2 a7, a4, a2, a7 + extui a6, a3, 1, 1 + movnez a2, a7, a6 + + do_addx4 a7, a4, a2, a7 + extui a6, a3, 2, 1 + movnez a2, a7, a6 + + do_addx8 a7, a4, a2, a7 + extui a6, a3, 3, 1 + movnez a2, a7, a6 + + bgeui a3, 16, .Lmult_main_loop + + neg a3, a2 + movltz a2, a3, a5 + +#endif /* !MUL32 && !MUL16 && !MAC16 */ + + leaf_return + .size __mulsi3, . - __mulsi3 + +#endif /* L_mulsi3 */ + + +#ifdef L_umulsidi3 + +#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#define XCHAL_NO_MUL 1 +#endif + + .align 4 + .global __umulsidi3 + .type __umulsidi3, @function +__umulsidi3: +#if __XTENSA_CALL0_ABI__ + leaf_entry sp, 32 + addi sp, sp, -32 + s32i a12, sp, 16 + s32i a13, sp, 20 + s32i a14, sp, 24 + s32i a15, sp, 28 +#elif XCHAL_NO_MUL + /* This is not really a leaf function; allocate enough stack space + to allow CALL12s to a helper function. */ + leaf_entry sp, 48 +#else + leaf_entry sp, 16 +#endif + +#ifdef __XTENSA_EB__ +#define wh a2 +#define wl a3 +#else +#define wh a3 +#define wl a2 +#endif /* __XTENSA_EB__ */ + + /* This code is taken from the mulsf3 routine in ieee754-sf.S. + See more comments there. */ + +#if XCHAL_HAVE_MUL32_HIGH + mull a6, a2, a3 + muluh wh, a2, a3 + mov wl, a6 + +#else /* ! MUL32_HIGH */ + +#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL + /* a0 and a8 will be clobbered by calling the multiply function + but a8 is not used here and need not be saved. */ + s32i a0, sp, 0 +#endif + +#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 + +#define a2h a4 +#define a3h a5 + + /* Get the high halves of the inputs into registers. */ + srli a2h, a2, 16 + srli a3h, a3, 16 + +#define a2l a2 +#define a3l a3 + +#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 + /* Clear the high halves of the inputs. This does not matter + for MUL16 because the high bits are ignored. */ + extui a2, a2, 0, 16 + extui a3, a3, 0, 16 +#endif +#endif /* MUL16 || MUL32 */ + + +#if XCHAL_HAVE_MUL16 + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + mul16u dst, xreg ## xhalf, yreg ## yhalf + +#elif XCHAL_HAVE_MUL32 + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + mull dst, xreg ## xhalf, yreg ## yhalf + +#elif XCHAL_HAVE_MAC16 + +/* The preprocessor insists on inserting a space when concatenating after + a period in the definition of do_mul below. These macros are a workaround + using underscores instead of periods when doing the concatenation. */ +#define umul_aa_ll umul.aa.ll +#define umul_aa_lh umul.aa.lh +#define umul_aa_hl umul.aa.hl +#define umul_aa_hh umul.aa.hh + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + umul_aa_ ## xhalf ## yhalf xreg, yreg; \ + rsr dst, ACCLO + +#else /* no multiply hardware */ + +#define set_arg_l(dst, src) \ + extui dst, src, 0, 16 +#define set_arg_h(dst, src) \ + srli dst, src, 16 + +#if __XTENSA_CALL0_ABI__ +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + set_arg_ ## xhalf (a13, xreg); \ + set_arg_ ## yhalf (a14, yreg); \ + call0 .Lmul_mulsi3; \ + mov dst, a12 +#else +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + set_arg_ ## xhalf (a14, xreg); \ + set_arg_ ## yhalf (a15, yreg); \ + call12 .Lmul_mulsi3; \ + mov dst, a14 +#endif /* __XTENSA_CALL0_ABI__ */ + +#endif /* no multiply hardware */ + + /* Add pp1 and pp2 into a6 with carry-out in a9. */ + do_mul(a6, a2, l, a3, h) /* pp 1 */ + do_mul(a11, a2, h, a3, l) /* pp 2 */ + movi a9, 0 + add a6, a6, a11 + bgeu a6, a11, 1f + addi a9, a9, 1 +1: + /* Shift the high half of a9/a6 into position in a9. Note that + this value can be safely incremented without any carry-outs. */ + ssai 16 + src a9, a9, a6 + + /* Compute the low word into a6. */ + do_mul(a11, a2, l, a3, l) /* pp 0 */ + sll a6, a6 + add a6, a6, a11 + bgeu a6, a11, 1f + addi a9, a9, 1 +1: + /* Compute the high word into wh. */ + do_mul(wh, a2, h, a3, h) /* pp 3 */ + add wh, wh, a9 + mov wl, a6 + +#endif /* !MUL32_HIGH */ + +#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL + /* Restore the original return address. */ + l32i a0, sp, 0 +#endif +#if __XTENSA_CALL0_ABI__ + l32i a12, sp, 16 + l32i a13, sp, 20 + l32i a14, sp, 24 + l32i a15, sp, 28 + addi sp, sp, 32 +#endif + leaf_return + +#if XCHAL_NO_MUL + + /* For Xtensa processors with no multiply hardware, this simplified + version of _mulsi3 is used for multiplying 16-bit chunks of + the floating-point mantissas. When using CALL0, this function + uses a custom ABI: the inputs are passed in a13 and a14, the + result is returned in a12, and a8 and a15 are clobbered. */ + .align 4 +.Lmul_mulsi3: + leaf_entry sp, 16 + .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 + movi \dst, 0 +1: add \tmp1, \src2, \dst + extui \tmp2, \src1, 0, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx2 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 1, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx4 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 2, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx8 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 3, 1 + movnez \dst, \tmp1, \tmp2 + + srli \src1, \src1, 4 + slli \src2, \src2, 4 + bnez \src1, 1b + .endm +#if __XTENSA_CALL0_ABI__ + mul_mulsi3_body a12, a13, a14, a15, a8 +#else + /* The result will be written into a2, so save that argument in a4. */ + mov a4, a2 + mul_mulsi3_body a2, a4, a3, a5, a6 +#endif + leaf_return +#endif /* XCHAL_NO_MUL */ + + .size __umulsidi3, . - __umulsidi3 + +#endif /* L_umulsidi3 */ + + +/* Define a macro for the NSAU (unsigned normalize shift amount) + instruction, which computes the number of leading zero bits, + to handle cases where it is not included in the Xtensa processor + configuration. */ + + .macro do_nsau cnt, val, tmp, a +#if XCHAL_HAVE_NSA + nsau \cnt, \val +#else + mov \a, \val + movi \cnt, 0 + extui \tmp, \a, 16, 16 + bnez \tmp, 0f + movi \cnt, 16 + slli \a, \a, 16 +0: + extui \tmp, \a, 24, 8 + bnez \tmp, 1f + addi \cnt, \cnt, 8 + slli \a, \a, 8 +1: + movi \tmp, __nsau_data + extui \a, \a, 24, 8 + add \tmp, \tmp, \a + l8ui \tmp, \tmp, 0 + add \cnt, \cnt, \tmp +#endif /* !XCHAL_HAVE_NSA */ + .endm + +#ifdef L_clz + .section .rodata + .align 4 + .global __nsau_data + .type __nsau_data, @object +__nsau_data: +#if !XCHAL_HAVE_NSA + .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 + .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 + .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 + .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 + .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +#endif /* !XCHAL_HAVE_NSA */ + .size __nsau_data, . - __nsau_data + .hidden __nsau_data +#endif /* L_clz */ + + +#ifdef L_clzsi2 + .align 4 + .global __clzsi2 + .type __clzsi2, @function +__clzsi2: + leaf_entry sp, 16 + do_nsau a2, a2, a3, a4 + leaf_return + .size __clzsi2, . - __clzsi2 + +#endif /* L_clzsi2 */ + + +#ifdef L_ctzsi2 + .align 4 + .global __ctzsi2 + .type __ctzsi2, @function +__ctzsi2: + leaf_entry sp, 16 + neg a3, a2 + and a3, a3, a2 + do_nsau a2, a3, a4, a5 + neg a2, a2 + addi a2, a2, 31 + leaf_return + .size __ctzsi2, . - __ctzsi2 + +#endif /* L_ctzsi2 */ + + +#ifdef L_ffssi2 + .align 4 + .global __ffssi2 + .type __ffssi2, @function +__ffssi2: + leaf_entry sp, 16 + neg a3, a2 + and a3, a3, a2 + do_nsau a2, a3, a4, a5 + neg a2, a2 + addi a2, a2, 32 + leaf_return + .size __ffssi2, . - __ffssi2 + +#endif /* L_ffssi2 */ + + +#ifdef L_udivsi3 + .align 4 + .global __udivsi3 + .type __udivsi3, @function +__udivsi3: + leaf_entry sp, 16 +#if XCHAL_HAVE_DIV32 + quou a2, a2, a3 +#else + bltui a3, 2, .Lle_one /* check if the divisor <= 1 */ + + mov a6, a2 /* keep dividend in a6 */ + do_nsau a5, a6, a2, a7 /* dividend_shift = nsau (dividend) */ + do_nsau a4, a3, a2, a7 /* divisor_shift = nsau (divisor) */ + bgeu a5, a4, .Lspecial + + sub a4, a4, a5 /* count = divisor_shift - dividend_shift */ + ssl a4 + sll a3, a3 /* divisor <<= count */ + movi a2, 0 /* quotient = 0 */ + + /* test-subtract-and-shift loop; one quotient bit on each iteration */ +#if XCHAL_HAVE_LOOPS + loopnez a4, .Lloopend +#endif /* XCHAL_HAVE_LOOPS */ +.Lloop: + bltu a6, a3, .Lzerobit + sub a6, a6, a3 + addi a2, a2, 1 +.Lzerobit: + slli a2, a2, 1 + srli a3, a3, 1 +#if !XCHAL_HAVE_LOOPS + addi a4, a4, -1 + bnez a4, .Lloop +#endif /* !XCHAL_HAVE_LOOPS */ +.Lloopend: + + bltu a6, a3, .Lreturn + addi a2, a2, 1 /* increment quotient if dividend >= divisor */ +.Lreturn: + leaf_return + +.Lle_one: + beqz a3, .Lerror /* if divisor == 1, return the dividend */ + leaf_return + +.Lspecial: + /* return dividend >= divisor */ + bltu a6, a3, .Lreturn0 + movi a2, 1 + leaf_return + +.Lerror: + /* Divide by zero: Use an illegal instruction to force an exception. + The subsequent "DIV0" string can be recognized by the exception + handler to identify the real cause of the exception. */ + ill + .ascii "DIV0" + +.Lreturn0: + movi a2, 0 +#endif /* XCHAL_HAVE_DIV32 */ + leaf_return + .size __udivsi3, . - __udivsi3 + +#endif /* L_udivsi3 */ + + +#ifdef L_divsi3 + .align 4 + .global __divsi3 + .type __divsi3, @function +__divsi3: + leaf_entry sp, 16 +#if XCHAL_HAVE_DIV32 + quos a2, a2, a3 +#else + xor a7, a2, a3 /* sign = dividend ^ divisor */ + do_abs a6, a2, a4 /* udividend = abs (dividend) */ + do_abs a3, a3, a4 /* udivisor = abs (divisor) */ + bltui a3, 2, .Lle_one /* check if udivisor <= 1 */ + do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */ + do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */ + bgeu a5, a4, .Lspecial + + sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */ + ssl a4 + sll a3, a3 /* udivisor <<= count */ + movi a2, 0 /* quotient = 0 */ + + /* test-subtract-and-shift loop; one quotient bit on each iteration */ +#if XCHAL_HAVE_LOOPS + loopnez a4, .Lloopend +#endif /* XCHAL_HAVE_LOOPS */ +.Lloop: + bltu a6, a3, .Lzerobit + sub a6, a6, a3 + addi a2, a2, 1 +.Lzerobit: + slli a2, a2, 1 + srli a3, a3, 1 +#if !XCHAL_HAVE_LOOPS + addi a4, a4, -1 + bnez a4, .Lloop +#endif /* !XCHAL_HAVE_LOOPS */ +.Lloopend: + + bltu a6, a3, .Lreturn + addi a2, a2, 1 /* increment if udividend >= udivisor */ +.Lreturn: + neg a5, a2 + movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */ + leaf_return + +.Lle_one: + beqz a3, .Lerror + neg a2, a6 /* if udivisor == 1, then return... */ + movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */ + leaf_return + +.Lspecial: + bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */ + movi a2, 1 + movi a4, -1 + movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */ + leaf_return + +.Lerror: + /* Divide by zero: Use an illegal instruction to force an exception. + The subsequent "DIV0" string can be recognized by the exception + handler to identify the real cause of the exception. */ + ill + .ascii "DIV0" + +.Lreturn0: + movi a2, 0 +#endif /* XCHAL_HAVE_DIV32 */ + leaf_return + .size __divsi3, . - __divsi3 + +#endif /* L_divsi3 */ + + +#ifdef L_umodsi3 + .align 4 + .global __umodsi3 + .type __umodsi3, @function +__umodsi3: + leaf_entry sp, 16 +#if XCHAL_HAVE_DIV32 + remu a2, a2, a3 +#else + bltui a3, 2, .Lle_one /* check if the divisor is <= 1 */ + + do_nsau a5, a2, a6, a7 /* dividend_shift = nsau (dividend) */ + do_nsau a4, a3, a6, a7 /* divisor_shift = nsau (divisor) */ + bgeu a5, a4, .Lspecial + + sub a4, a4, a5 /* count = divisor_shift - dividend_shift */ + ssl a4 + sll a3, a3 /* divisor <<= count */ + + /* test-subtract-and-shift loop */ +#if XCHAL_HAVE_LOOPS + loopnez a4, .Lloopend +#endif /* XCHAL_HAVE_LOOPS */ +.Lloop: + bltu a2, a3, .Lzerobit + sub a2, a2, a3 +.Lzerobit: + srli a3, a3, 1 +#if !XCHAL_HAVE_LOOPS + addi a4, a4, -1 + bnez a4, .Lloop +#endif /* !XCHAL_HAVE_LOOPS */ +.Lloopend: + +.Lspecial: + bltu a2, a3, .Lreturn + sub a2, a2, a3 /* subtract once more if dividend >= divisor */ +.Lreturn: + leaf_return + +.Lle_one: + bnez a3, .Lreturn0 + + /* Divide by zero: Use an illegal instruction to force an exception. + The subsequent "DIV0" string can be recognized by the exception + handler to identify the real cause of the exception. */ + ill + .ascii "DIV0" + +.Lreturn0: + movi a2, 0 +#endif /* XCHAL_HAVE_DIV32 */ + leaf_return + .size __umodsi3, . - __umodsi3 + +#endif /* L_umodsi3 */ + + +#ifdef L_modsi3 + .align 4 + .global __modsi3 + .type __modsi3, @function +__modsi3: + leaf_entry sp, 16 +#if XCHAL_HAVE_DIV32 + rems a2, a2, a3 +#else + mov a7, a2 /* save original (signed) dividend */ + do_abs a2, a2, a4 /* udividend = abs (dividend) */ + do_abs a3, a3, a4 /* udivisor = abs (divisor) */ + bltui a3, 2, .Lle_one /* check if udivisor <= 1 */ + do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */ + do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */ + bgeu a5, a4, .Lspecial + + sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */ + ssl a4 + sll a3, a3 /* udivisor <<= count */ + + /* test-subtract-and-shift loop */ +#if XCHAL_HAVE_LOOPS + loopnez a4, .Lloopend +#endif /* XCHAL_HAVE_LOOPS */ +.Lloop: + bltu a2, a3, .Lzerobit + sub a2, a2, a3 +.Lzerobit: + srli a3, a3, 1 +#if !XCHAL_HAVE_LOOPS + addi a4, a4, -1 + bnez a4, .Lloop +#endif /* !XCHAL_HAVE_LOOPS */ +.Lloopend: + +.Lspecial: + bltu a2, a3, .Lreturn + sub a2, a2, a3 /* subtract again if udividend >= udivisor */ +.Lreturn: + bgez a7, .Lpositive + neg a2, a2 /* if (dividend < 0), return -udividend */ +.Lpositive: + leaf_return + +.Lle_one: + bnez a3, .Lreturn0 + + /* Divide by zero: Use an illegal instruction to force an exception. + The subsequent "DIV0" string can be recognized by the exception + handler to identify the real cause of the exception. */ + ill + .ascii "DIV0" + +.Lreturn0: + movi a2, 0 +#endif /* XCHAL_HAVE_DIV32 */ + leaf_return + .size __modsi3, . - __modsi3 + +#endif /* L_modsi3 */ + + +#ifdef __XTENSA_EB__ +#define uh a2 +#define ul a3 +#else +#define uh a3 +#define ul a2 +#endif /* __XTENSA_EB__ */ + + +#ifdef L_ashldi3 + .align 4 + .global __ashldi3 + .type __ashldi3, @function +__ashldi3: + leaf_entry sp, 16 + ssl a4 + bgei a4, 32, .Llow_only + src uh, uh, ul + sll ul, ul + leaf_return + +.Llow_only: + sll uh, ul + movi ul, 0 + leaf_return + .size __ashldi3, . - __ashldi3 + +#endif /* L_ashldi3 */ + + +#ifdef L_ashrdi3 + .align 4 + .global __ashrdi3 + .type __ashrdi3, @function +__ashrdi3: + leaf_entry sp, 16 + ssr a4 + bgei a4, 32, .Lhigh_only + src ul, uh, ul + sra uh, uh + leaf_return + +.Lhigh_only: + sra ul, uh + srai uh, uh, 31 + leaf_return + .size __ashrdi3, . - __ashrdi3 + +#endif /* L_ashrdi3 */ + + +#ifdef L_lshrdi3 + .align 4 + .global __lshrdi3 + .type __lshrdi3, @function +__lshrdi3: + leaf_entry sp, 16 + ssr a4 + bgei a4, 32, .Lhigh_only1 + src ul, uh, ul + srl uh, uh + leaf_return + +.Lhigh_only1: + srl ul, uh + movi uh, 0 + leaf_return + .size __lshrdi3, . - __lshrdi3 + +#endif /* L_lshrdi3 */ + + +#include "ieee754-df.S" +#include "ieee754-sf.S" diff --git a/libgcc/config/xtensa/t-xtensa b/libgcc/config/xtensa/t-xtensa index 7d9e9db0487..5bcc0946243 100644 --- a/libgcc/config/xtensa/t-xtensa +++ b/libgcc/config/xtensa/t-xtensa @@ -1,2 +1,14 @@ +LIB1ASMSRC = xtensa/lib1funcs.S +LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \ + _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \ + _ashldi3 _ashrdi3 _lshrdi3 \ + _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \ + _fixunssfsi _fixunssfdi _floatsisf _floatunsisf \ + _floatdisf _floatundisf \ + _negdf2 _addsubdf3 _muldf3 _divdf3 _cmpdf2 _fixdfsi _fixdfdi \ + _fixunsdfsi _fixunsdfdi _floatsidf _floatunsidf \ + _floatdidf _floatundidf \ + _truncdfsf2 _extendsfdf2 + LIB2ADDEH = $(srcdir)/config/xtensa/unwind-dw2-xtensa.c \ $(srcdir)/unwind-dw2-fde.c $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c