From 312209c6a5ff3828335e698a9d0c872a48411fdf Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Wed, 28 Jul 2004 09:13:58 +0000 Subject: [PATCH] Introduce sh4a support. gcc/ChangeLog: Introduce sh4a support. * config.gcc: Handle sh4a multilibs and cpu selection. * config/sh/sh.h: Likewise. Handle sh4a command line flags. * config/sh/t-mlib-sh4a: New. * config/sh/t-mlib-sh4al: New. * config/sh/t-mlib-sh4a-nofpu: New. * config/sh/t-mlib-sh4a-single: New. * config/sh/t-mlib-sh4a-single-only: New. 2004-02-20 DJ Delorie * config/sh/sh.md ("movua"): Change constraint from "m" to "Sua". * config/sh/sh.h (EXTRA_CONSTRAINT_S): Add "Sua" support. 2003-08-22 Eric Christopher * config/sh/sh4a.md: Update for chip errata. 2003-08-07 Eric Christopher * config/sh/sh4a.md: New file. sh4a processor description. 2003-07-08 Alexandre Oliva * config/sh/sh.h (TARGET_SWITCHES): Added 4al. Adjust description of -m4a-nofpu. (SH_ASM_SPEC): Pass -dsp for -m4al, not -m4a-nofpu. * config/sh/t-sh (MULTILIB_MATCHES): Map -m4al to -m4a-nofpu. * doc/invoke.texi (SH Options): Document -m4al. 2003-07-03 Alexandre Oliva * config/sh/sh.c (expand_block_move): Remove commented-out code checked in by mistake. (sh_cannot_change_mode_class): Enable SUBREGs to be used to select single elements from SFmode vectors. * config/sh/sh.md (fsca): Use VEC_CONCAT to initialize the output register. (sinsf2, cossf2, sindf2, cosdf2): Don't emit CLOBBER. 2003-07-01 Alexandre Oliva * config/sh/sh.h (sh_fsca_sf2int, sh_fsca_df2int, sh_fsca_int2sf): Remove variable declarations. * config/sh/sh.c (sh_fsca_sf2int, sh_fsca_df2int, sh_fsca_int2sf): New functions. (sh_fsca_sf2int_rtx, sh_fsca_df2int_rtx, sh_fsca_int2sf_rtx): New static variables. * config/sh/sh-protos.h (sh_fsca_sf2int, sh_fsca_df2int, sh_fsca_int2sf): Declare. * config/sh/sh.md: Adjust. * doc/invoke.texi (SH Options): Document new options. * config/sh/lib1funcs.asm (ic_invalidate): Remove SH4a forward compatibility from SH4 code. 2003-06-27 Alexandre Oliva * config/sh/sh.c (expand_block_move): Don't emit POST_INC too early. (memory_movsrc_operand): Renamed to... (unaligned_load_operand): ... this. Simplified. * config/sh/sh.h (PREDICATE_CODES): Adjust. * config/sh/sh.md (movua, extv, extzv): Likewise. Change movua's input operand to SImode, and adjust the others. Introduce post-increment by peephole. * config/sh/sh.c (expand_block_move): Give the target address the same mode as the temp reg. * config/sh/sh.c (expand_block_move): Use a temp reg for unaligned copying. 2003-06-26 Alexandre Oliva Introduce support for SH4a. * config/sh/lib1funcs.asm (ic_invalidate): Use icbi if __SH4A__. Emit 4 4kb blocks and touch all of them otherwise. * config/sh/sh.c (sh_fsca_sf2int, sh_fsca_df2int, sh_fsca_int2sf): New. (sh_init_builtins): Initialize them. (print_operand): Support `d'. (expand_block_move): Use movua if src is misaligned. (memory_movsrc_operand): New. * config/sh/sh.h (TARGET_CPU_CPP_BUILTINS): Define __SH4A__ and one of the SH4 macros. (SH4A_BIT, TARGET_SH4A_ARCH, TARGET_SH4A_FP, SELECT_SH4A_NOFPU, SELECT_SH4A_SINGLE_ONLY, SELECT_SH4A, SELECT_SH4A_SINGLE): New. (TARGET_NONE): Add SH4A_BIT. (TARGET_SWITCHES): Add 4a-single-only, 4a-single, 4a-nofpu and 4a. (SH_ASM_SPEC): Pass -dsp if -m4a-nofpu. (sh_fsca_sf2int, sh_fsca_df2int, sh_fsca_int2sf): Declare. (OVERRIDE_OPTIONS): Set cpu to CPU_SH4A when appropriate. (enum processor_type): Added PROCESSOR_SH4A. (PREDICATE_CODES): Add memory_movsrc_operand. * config/sh/sh.md: Removed unused variables. (attr cpu): Add sh4a. (attr type): Add movua, fsrra and fsca. (prefetch): New, for SH4. (ic_invalidate_line, ic_invalidate_line_sh4a): Use icbi. (toggle_sz): Set type to fp. (toggle_pr, rsqrtsf2, fsca, sinsf2, cossf2, sindf2, cosdf2): New. (movua, extv, extzv): New. * config/sh/t-sh: Add multilibs for 4a, 4a-nofpu, 4a-single and 4a-single-only. gcc/testsuite/ChangeLog: 2003-07-06 Alexandre Oliva * gcc.dg/sh4a-memmovua.c: Tweak regular expression. 2003-07-01 Alexandre Oliva * gcc.dg/sh4a-bitmovua.c: New. * gcc.dg/sh4a-cos.c: New. * gcc.dg/sh4a-cosf.c: New. * gcc.dg/sh4a-fprun.c: New. * gcc.dg/sh4a-fsrra.c: New. * gcc.dg/sh4a-memmovua.c: New. * gcc.dg/sh4a-sin.c: New. * gcc.dg/sh4a-sincos.c: New. * gcc.dg/sh4a-sincosf.c: New. * gcc.dg/sh4a-sinf.c: New. libstdc++-v3/ChangeLog: 2003-10-01 Eric Christopher * config/cpu/sh/atomicity.h (__exchange_and_add): Remove 'm' constraint. 2003-07-09 Alexandre Oliva * config/cpu/sh/atomicity.h: New. Use movli and movco on SH4a. From-SVN: r85257 --- gcc/ChangeLog | 90 ++++++++++ gcc/config.gcc | 10 ++ gcc/config/sh/lib1funcs.asm | 27 +++ gcc/config/sh/sh-protos.h | 3 + gcc/config/sh/sh.c | 137 +++++++++++++- gcc/config/sh/sh.h | 80 ++++++++- gcc/config/sh/sh.md | 214 +++++++++++++++++++++- gcc/config/sh/sh4a.md | 235 +++++++++++++++++++++++++ gcc/config/sh/t-mlib-sh4a | 1 + gcc/config/sh/t-mlib-sh4a-nofpu | 1 + gcc/config/sh/t-mlib-sh4a-single | 1 + gcc/config/sh/t-mlib-sh4a-single-only | 1 + gcc/config/sh/t-mlib-sh4al | 1 + gcc/config/sh/t-sh | 7 +- gcc/doc/invoke.texi | 26 +++ gcc/testsuite/ChangeLog | 16 ++ gcc/testsuite/gcc.dg/sh4a-bitmovua.c | 73 ++++++++ gcc/testsuite/gcc.dg/sh4a-cos.c | 13 ++ gcc/testsuite/gcc.dg/sh4a-cosf.c | 13 ++ gcc/testsuite/gcc.dg/sh4a-fprun.c | 35 ++++ gcc/testsuite/gcc.dg/sh4a-fsrra.c | 13 ++ gcc/testsuite/gcc.dg/sh4a-memmovua.c | 17 ++ gcc/testsuite/gcc.dg/sh4a-sin.c | 13 ++ gcc/testsuite/gcc.dg/sh4a-sincos.c | 14 ++ gcc/testsuite/gcc.dg/sh4a-sincosf.c | 14 ++ gcc/testsuite/gcc.dg/sh4a-sinf.c | 13 ++ libstdc++-v3/ChangeLog | 8 + libstdc++-v3/config/cpu/sh/atomicity.h | 123 +++++++++++++ 28 files changed, 1189 insertions(+), 10 deletions(-) create mode 100644 gcc/config/sh/sh4a.md create mode 100644 gcc/config/sh/t-mlib-sh4a create mode 100644 gcc/config/sh/t-mlib-sh4a-nofpu create mode 100644 gcc/config/sh/t-mlib-sh4a-single create mode 100644 gcc/config/sh/t-mlib-sh4a-single-only create mode 100644 gcc/config/sh/t-mlib-sh4al create mode 100644 gcc/testsuite/gcc.dg/sh4a-bitmovua.c create mode 100644 gcc/testsuite/gcc.dg/sh4a-cos.c create mode 100644 gcc/testsuite/gcc.dg/sh4a-cosf.c create mode 100644 gcc/testsuite/gcc.dg/sh4a-fprun.c create mode 100644 gcc/testsuite/gcc.dg/sh4a-fsrra.c create mode 100644 gcc/testsuite/gcc.dg/sh4a-memmovua.c create mode 100644 gcc/testsuite/gcc.dg/sh4a-sin.c create mode 100644 gcc/testsuite/gcc.dg/sh4a-sincos.c create mode 100644 gcc/testsuite/gcc.dg/sh4a-sincosf.c create mode 100644 gcc/testsuite/gcc.dg/sh4a-sinf.c create mode 100644 libstdc++-v3/config/cpu/sh/atomicity.h diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a3f30ff9d3d..ddea34766fa 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,93 @@ +2004-07-28 Alexandre Oliva + + Introduce sh4a support. + * config.gcc: Handle sh4a multilibs and cpu selection. + * config/sh/sh.h: Likewise. Handle sh4a command line flags. + * config/sh/t-mlib-sh4a: New. + * config/sh/t-mlib-sh4al: New. + * config/sh/t-mlib-sh4a-nofpu: New. + * config/sh/t-mlib-sh4a-single: New. + * config/sh/t-mlib-sh4a-single-only: New. + 2004-02-20 DJ Delorie + * config/sh/sh.md ("movua"): Change constraint from "m" to "Sua". + * config/sh/sh.h (EXTRA_CONSTRAINT_S): Add "Sua" support. + 2003-08-22 Eric Christopher + * config/sh/sh4a.md: Update for chip errata. + 2003-08-07 Eric Christopher + * config/sh/sh4a.md: New file. sh4a processor description. + 2003-07-08 Alexandre Oliva + * config/sh/sh.h (TARGET_SWITCHES): Added 4al. Adjust description + of -m4a-nofpu. + (SH_ASM_SPEC): Pass -dsp for -m4al, not -m4a-nofpu. + * config/sh/t-sh (MULTILIB_MATCHES): Map -m4al to -m4a-nofpu. + * doc/invoke.texi (SH Options): Document -m4al. + 2003-07-03 Alexandre Oliva + * config/sh/sh.c (expand_block_move): Remove commented-out code + checked in by mistake. + (sh_cannot_change_mode_class): Enable SUBREGs to be used to select + single elements from SFmode vectors. + * config/sh/sh.md (fsca): Use VEC_CONCAT to initialize the output + register. + (sinsf2, cossf2, sindf2, cosdf2): Don't emit CLOBBER. + 2003-07-01 Alexandre Oliva + * config/sh/sh.h (sh_fsca_sf2int, sh_fsca_df2int, + sh_fsca_int2sf): Remove variable declarations. + * config/sh/sh.c (sh_fsca_sf2int, sh_fsca_df2int, + sh_fsca_int2sf): New functions. + (sh_fsca_sf2int_rtx, sh_fsca_df2int_rtx, + sh_fsca_int2sf_rtx): New static variables. + * config/sh/sh-protos.h (sh_fsca_sf2int, sh_fsca_df2int, + sh_fsca_int2sf): Declare. + * config/sh/sh.md: Adjust. + * doc/invoke.texi (SH Options): Document new options. + * config/sh/lib1funcs.asm (ic_invalidate): Remove SH4a forward + compatibility from SH4 code. + 2003-06-27 Alexandre Oliva + * config/sh/sh.c (expand_block_move): Don't emit POST_INC too + early. + (memory_movsrc_operand): Renamed to... + (unaligned_load_operand): ... this. Simplified. + * config/sh/sh.h (PREDICATE_CODES): Adjust. + * config/sh/sh.md (movua, extv, extzv): Likewise. Change movua's + input operand to SImode, and adjust the others. Introduce + post-increment by peephole. + * config/sh/sh.c (expand_block_move): Give the target address the + same mode as the temp reg. + * config/sh/sh.c (expand_block_move): Use a temp reg for unaligned + copying. + 2003-06-26 Alexandre Oliva + Introduce support for SH4a. + * config/sh/lib1funcs.asm (ic_invalidate): Use icbi if + __SH4A__. Emit 4 4kb blocks and touch all of them otherwise. + * config/sh/sh.c (sh_fsca_sf2int, sh_fsca_df2int, + sh_fsca_int2sf): New. + (sh_init_builtins): Initialize them. + (print_operand): Support `d'. + (expand_block_move): Use movua if src is misaligned. + (memory_movsrc_operand): New. + * config/sh/sh.h (TARGET_CPU_CPP_BUILTINS): Define __SH4A__ + and one of the SH4 macros. + (SH4A_BIT, TARGET_SH4A_ARCH, TARGET_SH4A_FP, + SELECT_SH4A_NOFPU, SELECT_SH4A_SINGLE_ONLY, SELECT_SH4A, + SELECT_SH4A_SINGLE): New. + (TARGET_NONE): Add SH4A_BIT. + (TARGET_SWITCHES): Add 4a-single-only, 4a-single, 4a-nofpu and 4a. + (SH_ASM_SPEC): Pass -dsp if -m4a-nofpu. + (sh_fsca_sf2int, sh_fsca_df2int, sh_fsca_int2sf): Declare. + (OVERRIDE_OPTIONS): Set cpu to CPU_SH4A when appropriate. + (enum processor_type): Added PROCESSOR_SH4A. + (PREDICATE_CODES): Add memory_movsrc_operand. + * config/sh/sh.md: Removed unused variables. + (attr cpu): Add sh4a. + (attr type): Add movua, fsrra and fsca. + (prefetch): New, for SH4. + (ic_invalidate_line, ic_invalidate_line_sh4a): Use icbi. + (toggle_sz): Set type to fp. + (toggle_pr, rsqrtsf2, fsca, sinsf2, cossf2, sindf2, cosdf2): New. + (movua, extv, extzv): New. + * config/sh/t-sh: Add multilibs for 4a, 4a-nofpu, 4a-single + and 4a-single-only. + 2004-07-28 Diego Novillo * tree-optimize.c (init_tree_optimization_passes): Schedule diff --git a/gcc/config.gcc b/gcc/config.gcc index 8fc8a99fbbc..1360b072a48 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -1746,6 +1746,11 @@ sh-*-symbianelf* | sh[12346l]*-*-symbianelf* | \ case `echo ${target} | sed 's/e[lb]-/-/'` in sh64*-*-netbsd*) sh_cpu_target=sh5-64media ;; sh64* | sh5*-*-netbsd*) sh_cpu_target=sh5-32media ;; + sh4a_single_only*) sh_cpu_target=sh4a-single-only ;; + sh4a_single*) sh_cpu_target=sh4a-single ;; + sh4a_nofpu*) sh_cpu_target=sh4a-nofpu ;; + sh4al) sh_cpu_target=sh4al ;; + sh4a*) sh_cpu_target=sh4a ;; sh4_single_only*) sh_cpu_target=sh4-single-only ;; sh4_single*) sh_cpu_target=sh4-single ;; sh4_nofpu*) sh_cpu_target=sh4-nofpu ;; @@ -1760,6 +1765,7 @@ sh-*-symbianelf* | sh[12346l]*-*-symbianelf* | \ case $sh_cpu_default in sh5-64media-nofpu | sh5-64media | \ sh5-32media-nofpu | sh5-32media | sh5-compact-nofpu | sh5-compact | \ + sh4a-single-only | sh4a-single | sh4a-nofpu | sh4a | sh4al | \ sh4-single-only | sh4-single | sh4-nofpu | sh4 | \ sh3e | sh3 | sh2e | sh2 | sh1) ;; "") sh_cpu_default=${sh_cpu_target} ;; @@ -1783,6 +1789,7 @@ sh-*-symbianelf* | sh[12346l]*-*-symbianelf* | \ case ${sh_multilib} in sh1 | sh2 | sh2e | sh3 | sh3e | \ sh4 | sh4-single | sh4-single-only | sh4-nofpu | \ + sh4a | sh4a-single | sh4a-single-only | sh4a-nofpu | sh4al | \ sh5-64media | sh5-64media-nofpu | \ sh5-32media | sh5-32media-nofpu | \ sh5-compact | sh5-compact-nofpu) @@ -2476,9 +2483,12 @@ fi "" | m1 | m2 | m2e | m3 | m3e | m4 | m4-single | m4-single-only | m4-nofpu ) # OK ;; + m4a | m4a-single | m4a-single-only | m4a-nofpu | m4al) + ;; *) echo "Unknown CPU used in --with-cpu=$with_cpu, known values:" 1>&2 echo "m1 m2 m2e m3 m3e m4 m4-single m4-single-only m4-nofpu" 1>&2 + echo "m4a m4a-single m4a-single-only m4a-nofpu m4al" 1>&2 exit 1 ;; esac diff --git a/gcc/config/sh/lib1funcs.asm b/gcc/config/sh/lib1funcs.asm index 7619f0c67d7..239438fd06b 100644 --- a/gcc/config/sh/lib1funcs.asm +++ b/gcc/config/sh/lib1funcs.asm @@ -2036,7 +2036,34 @@ GLOBAL(ic_invalidate): ENDFUNC(GLOBAL(ic_invalidate)) ENDFUNC(GLOBAL(init_trampoline)) +#elif defined(__SH4A__) + .global GLOBAL(ic_invalidate) + FUNC(GLOBAL(ic_invalidate)) +GLOBAL(ic_invalidate): + ocbwb @r4 + synco + rts + icbi @r4 + ENDFUNC(GLOBAL(ic_invalidate)) #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) + /* This assumes a direct-mapped cache, which is the case for + the first SH4, but not for the second version of SH4, that + uses a 2-way set-associative cache, nor SH4a, that is 4-way. + SH4a fortunately offers an instruction to invalidate the + instruction cache, and we use it above, but SH4 doesn't. + However, since the libraries don't contain any nested + functions (the only case in which GCC would emit this pattern) + and we actually emit the ic_invalidate_line_i pattern for + cache invalidation on all SH4 multilibs (even 4-nofpu, that + isn't even corevered here), and pre-SH4 cores don't have + caches, it seems like this code is pointless, unless it's + meant for backward binary compatibility or for userland-only + cache invalidation for say sh4-*-linux-gnu. Such a feature + should probably be moved into a system call, such that the + kernel could do whatever it takes to invalidate a cache line + on the core it's actually running on. I.e., this hideous :-) + piece of code should go away at some point. */ + .global GLOBAL(ic_invalidate) FUNC(GLOBAL(ic_invalidate)) GLOBAL(ic_invalidate): diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h index 0d1733c3717..d5ddb7b20b7 100644 --- a/gcc/config/sh/sh-protos.h +++ b/gcc/config/sh/sh-protos.h @@ -25,6 +25,9 @@ Boston, MA 02111-1307, USA. */ #define GCC_SH_PROTOS_H #ifdef RTX_CODE +extern rtx sh_fsca_sf2int (void); +extern rtx sh_fsca_df2int (void); +extern rtx sh_fsca_int2sf (void); extern struct rtx_def *prepare_scc_operands (enum rtx_code); /* Declare functions defined in sh.c and used in templates. */ diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c index 913bb34d253..010e5dcadc3 100644 --- a/gcc/config/sh/sh.c +++ b/gcc/config/sh/sh.c @@ -537,6 +537,7 @@ print_operand_address (FILE *stream, rtx x) 'T' print the next word of a dp value - same as 'R' in big endian mode. 'M' print an `x' if `m' will print `base,index'. 'N' print 'r63' if the operand is (const_int 0). + 'd' print a V2SF reg as dN instead of fpN. 'm' print a pair `base,offset' or `base,index', for LD and ST. 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value. 'o' output an operator. */ @@ -651,6 +652,13 @@ print_operand (FILE *stream, rtx x, int code) } break; + case 'd': + if (GET_CODE (x) != REG || GET_MODE (x) != V2SFmode) + abort (); + + fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1); + break; + case 'N': if (x == CONST0_RTX (GET_MODE (x))) { @@ -772,9 +780,48 @@ expand_block_move (rtx *operands) int constp = (GET_CODE (operands[2]) == CONST_INT); int bytes = (constp ? INTVAL (operands[2]) : 0); + if (! constp) + return 0; + + /* If we could use mov.l to move words and dest is word-aligned, we + can use movua.l for loads and still generate a relatively short + and efficient sequence. */ + if (TARGET_SH4A_ARCH && align < 4 + && MEM_ALIGN (operands[0]) >= 32 + && can_move_by_pieces (bytes, 32)) + { + rtx dest = copy_rtx (operands[0]); + rtx src = copy_rtx (operands[1]); + /* We could use different pseudos for each copied word, but + since movua can only load into r0, it's kind of + pointless. */ + rtx temp = gen_reg_rtx (SImode); + rtx src_addr = copy_addr_to_reg (XEXP (src, 0)); + int copied = 0; + + while (copied + 4 <= bytes) + { + rtx to = adjust_address (dest, SImode, copied); + rtx from = adjust_automodify_address (src, SImode, src_addr, copied); + + emit_insn (gen_movua (temp, from)); + emit_move_insn (src_addr, plus_constant (src_addr, 4)); + emit_move_insn (to, temp); + copied += 4; + } + + if (copied < bytes) + move_by_pieces (adjust_address (dest, BLKmode, copied), + adjust_automodify_address (src, BLKmode, + src_addr, copied), + bytes - copied, align, 0); + + return 1; + } + /* If it isn't a constant number of bytes, or if it doesn't have 4 byte alignment, or if it isn't a multiple of 4 bytes, then fail. */ - if (! constp || align < 4 || (bytes % 4 != 0)) + if (align < 4 || (bytes % 4 != 0)) return 0; if (TARGET_HARD_SH4) @@ -9397,6 +9444,11 @@ bool sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, enum reg_class class) { + /* We want to enable the use of SUBREGs as a means to + VEC_SELECT a single element of a vector. */ + if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode) + return (reg_classes_intersect_p (GENERAL_REGS, class)); + if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to)) { if (TARGET_LITTLE_ENDIAN) @@ -9821,4 +9873,87 @@ check_use_sfunc_addr (rtx insn, rtx reg) abort (); } +/* Returns 1 if OP is a MEM that can be source of a simple move operation. */ + +int +unaligned_load_operand (rtx op, enum machine_mode mode) +{ + rtx inside; + + if (GET_CODE (op) != MEM || GET_MODE (op) != mode) + return 0; + + inside = XEXP (op, 0); + + if (GET_CODE (inside) == POST_INC) + inside = XEXP (inside, 0); + + if (GET_CODE (inside) == REG) + return 1; + + return 0; +} + +/* This function returns a constant rtx that represents pi / 2**15 in + SFmode. it's used to scale SFmode angles, in radians, to a + fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi + maps to 0x10000). */ + +static GTY(()) rtx sh_fsca_sf2int_rtx; + +rtx +sh_fsca_sf2int (void) +{ + if (! sh_fsca_sf2int_rtx) + { + REAL_VALUE_TYPE rv; + + real_from_string (&rv, "10430.378350470453"); + sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode); + } + + return sh_fsca_sf2int_rtx; +} + +/* This function returns a constant rtx that represents pi / 2**15 in + DFmode. it's used to scale DFmode angles, in radians, to a + fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi + maps to 0x10000). */ + +static GTY(()) rtx sh_fsca_df2int_rtx; + +rtx +sh_fsca_df2int (void) +{ + if (! sh_fsca_df2int_rtx) + { + REAL_VALUE_TYPE rv; + + real_from_string (&rv, "10430.378350470453"); + sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode); + } + + return sh_fsca_df2int_rtx; +} + +/* This function returns a constant rtx that represents 2**15 / pi in + SFmode. it's used to scale a fixed-point signed 16.16-bit fraction + of a full circle back to a SFmode value, i.e., 0x10000 maps to + 2*pi). */ + +static GTY(()) rtx sh_fsca_int2sf_rtx; + +rtx +sh_fsca_int2sf (void) +{ + if (! sh_fsca_int2sf_rtx) + { + REAL_VALUE_TYPE rv; + + real_from_string (&rv, "9.587379924285257e-5"); + sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode); + } + + return sh_fsca_int2sf_rtx; +} #include "gt-sh.h" diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h index e38361f186a..71546facf9d 100644 --- a/gcc/config/sh/sh.h +++ b/gcc/config/sh/sh.h @@ -60,6 +60,13 @@ do { \ case PROCESSOR_SH4: \ builtin_define (TARGET_FPU_SINGLE ? "__SH4_SINGLE__" : "__SH4__"); \ break; \ + case PROCESSOR_SH4A: \ + builtin_define ("__SH4A__"); \ + builtin_define (TARGET_SH4 \ + ? (TARGET_FPU_SINGLE ? "__SH4_SINGLE__" : "__SH4__") \ + : TARGET_FPU_ANY ? "__SH4_SINGLE_ONLY__" \ + : "__SH4_NOFPU__"); \ + break; \ case PROCESSOR_SH5: \ { \ builtin_define_with_value ("__SH5__", \ @@ -138,6 +145,7 @@ extern int target_flags; #define HARD_SH4_BIT (1<<5) #define FPU_SINGLE_BIT (1<<7) #define SH4_BIT (1<<12) +#define SH4A_BIT (1<<3) #define FMOVD_BIT (1<<4) #define SH5_BIT (1<<0) #define SPACE_BIT (1<<13) @@ -200,6 +208,14 @@ extern int target_flags; /* Nonzero if we should generate code using type 4 insns. */ #define TARGET_SH4 ((target_flags & SH4_BIT) && (target_flags & SH1_BIT)) +/* Nonzero if we're generating code for the common subset of + instructions present on both SH4a and SH4al-dsp. */ +#define TARGET_SH4A_ARCH (target_flags & SH4A_BIT) + +/* Nonzero if we're generating code for SH4a, unless the use of the + FPU is disabled (which makes it compatible with SH4al-dsp). */ +#define TARGET_SH4A_FP (TARGET_SH4A_ARCH && TARGET_FPU_ANY) + /* Nonzero if we should generate code for a SH5 CPU (either ISA). */ #define TARGET_SH5 (target_flags & SH5_BIT) @@ -285,6 +301,10 @@ extern int target_flags; #define SELECT_SH4_SINGLE_ONLY (HARD_SH4_BIT | SELECT_SH3E) #define SELECT_SH4 (SH4_BIT | SH_E_BIT | HARD_SH4_BIT | SELECT_SH3) #define SELECT_SH4_SINGLE (FPU_SINGLE_BIT | SELECT_SH4) +#define SELECT_SH4A_NOFPU (SH4A_BIT | SELECT_SH4_NOFPU) +#define SELECT_SH4A_SINGLE_ONLY (SH4A_BIT | SELECT_SH4_SINGLE_ONLY) +#define SELECT_SH4A (SH4A_BIT | SELECT_SH4) +#define SELECT_SH4A_SINGLE (SH4A_BIT | SELECT_SH4_SINGLE) #define SELECT_SH5_64MEDIA (SH5_BIT | SH4_BIT) #define SELECT_SH5_64MEDIA_NOFPU (SH5_BIT) #define SELECT_SH5_32MEDIA (SH5_BIT | SH4_BIT | SH_E_BIT) @@ -302,6 +322,12 @@ extern int target_flags; #ifndef SUPPORT_SH4_NOFPU #define TARGET_SWITCH_SH4_NOFPU #endif +#ifndef SUPPORT_SH4A_NOFPU +#define TARGET_SWITCH_SH4A_NOFPU +#endif +#ifndef SUPPORT_SH4AL +#define TARGET_SWITCH_SH4AL +#endif #endif #endif #endif @@ -313,15 +339,24 @@ extern int target_flags; #ifndef SUPPORT_SH4_SINGLE_ONLY #define TARGET_SWITCH_SH4_SINGLE_ONLY #endif +#ifndef SUPPORT_SH4A_SINGLE_ONLY +#define TARGET_SWITCH_SH4A_SINGLE_ONLY +#endif #endif #endif #ifndef SUPPORT_SH4 #define TARGET_SWITCH_SH4 +#ifndef SUPPORT_SH4A +#define TARGET_SWITCH_SH4A +#endif #endif #ifndef SUPPORT_SH4_SINGLE #define TARGET_SWITCH_SH4_SINGLE +#ifndef SUPPORT_SH4A_SINGLE +#define TARGET_SWITCH_SH4A_SINGLE +#endif #endif #ifndef SUPPORT_SH5_64MEDIA @@ -342,7 +377,7 @@ extern int target_flags; /* Reset all target-selection flags. */ #define TARGET_NONE -(SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | SH4_BIT \ - | HARD_SH4_BIT | FPU_SINGLE_BIT | SH5_BIT) + | SH4A_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT | SH5_BIT) #ifndef TARGET_SWITCH_SH1 #define TARGET_SWITCH_SH1 \ @@ -389,6 +424,31 @@ extern int target_flags; {"4", TARGET_NONE, "" }, \ {"4", SELECT_SH4, "Generate SH4 code" }, #endif +#ifndef TARGET_SWITCH_SH4A +#define TARGET_SWITCH_SH4A \ + {"4a", TARGET_NONE, "" }, \ + {"4a", SELECT_SH4A, "Generate SH4a code" }, +#endif +#ifndef TARGET_SWITCH_SH4A_SINGLE_ONLY +#define TARGET_SWITCH_SH4A_SINGLE_ONLY \ + {"4a-single-only", TARGET_NONE, "" }, \ + {"4a-single-only", SELECT_SH4A_SINGLE_ONLY, "Generate only single-precision SH4a code" }, +#endif +#ifndef TARGET_SWITCH_SH4A_SINGLE +#define TARGET_SWITCH_SH4A_SINGLE \ + {"4a-single", TARGET_NONE, "" },\ + {"4a-single", SELECT_SH4A_SINGLE, "Generate default single-precision SH4a code" }, +#endif +#ifndef TARGET_SWITCH_SH4A_NOFPU +#define TARGET_SWITCH_SH4A_NOFPU \ + {"4a-nofpu", TARGET_NONE, "" },\ + {"4a-nofpu", SELECT_SH4A_NOFPU, "Generate SH4a FPU-less code" }, +#endif +#ifndef TARGET_SWITCH_SH4AL +#define TARGET_SWITCH_SH4AL \ + {"4al", TARGET_NONE, "" },\ + {"4al", SELECT_SH4A_NOFPU, "Generate SH4al-dsp code" }, +#endif #ifndef TARGET_SWITCH_SH5_64MEDIA #define TARGET_SWITCH_SH5_64MEDIA \ {"5-64media", TARGET_NONE, "" }, \ @@ -424,6 +484,11 @@ extern int target_flags; TARGET_SWITCH_SH4_SINGLE \ TARGET_SWITCH_SH4_NOFPU \ TARGET_SWITCH_SH4 \ + TARGET_SWITCH_SH4A_SINGLE_ONLY \ + TARGET_SWITCH_SH4A_SINGLE \ + TARGET_SWITCH_SH4A_NOFPU \ + TARGET_SWITCH_SH4A \ + TARGET_SWITCH_SH4AL \ TARGET_SWITCH_SH5_64MEDIA \ TARGET_SWITCH_SH5_64MEDIA_NOFPU \ TARGET_SWITCHES_SH5_32MEDIA \ @@ -497,7 +562,7 @@ extern int target_flags; #define SH_ASM_SPEC \ "%(subtarget_asm_endian_spec) %{mrelax:-relax %(subtarget_asm_relax_spec)}\ -%(subtarget_asm_isa_spec)" +%(subtarget_asm_isa_spec) %{m4al:-dsp}" #define ASM_SPEC SH_ASM_SPEC @@ -584,6 +649,11 @@ do { \ assembler_dialect = 1; \ sh_cpu = CPU_SH4; \ } \ + if (TARGET_SH4A_ARCH) \ + { \ + assembler_dialect = 1; \ + sh_cpu = CPU_SH4A; \ + } \ if (TARGET_SH5) \ { \ sh_cpu = CPU_SH5; \ @@ -2441,8 +2511,12 @@ struct sh_args { #define EXTRA_CONSTRAINT_Sr0(OP) \ (memory_operand((OP), GET_MODE (OP)) \ && ! refers_to_regno_p (R0_REG, R0_REG + 1, OP, (rtx *)0)) +#define EXTRA_CONSTRAINT_Sua(OP) \ + (memory_operand((OP), GET_MODE (OP)) \ + && GET_CODE (XEXP (OP, 0)) != PLUS) #define EXTRA_CONSTRAINT_S(OP, STR) \ ((STR)[1] == 'r' && (STR)[2] == '0' ? EXTRA_CONSTRAINT_Sr0 (OP) \ + : (STR)[1] == 'u' && (STR)[2] == 'a' ? EXTRA_CONSTRAINT_Sua (OP) \ : 0) #define EXTRA_CONSTRAINT_STR(OP, C, STR) \ @@ -3175,6 +3249,7 @@ enum processor_type { PROCESSOR_SH3, PROCESSOR_SH3E, PROCESSOR_SH4, + PROCESSOR_SH4A, PROCESSOR_SH5 }; @@ -3245,6 +3320,7 @@ extern int rtx_equal_function_value_matters; {"general_extend_operand", {SUBREG, REG, MEM, TRUNCATE}}, \ {"general_movsrc_operand", {SUBREG, REG, CONST_INT, CONST_DOUBLE, MEM}}, \ {"general_movdst_operand", {SUBREG, REG, MEM}}, \ + {"unaligned_load_operand", {MEM}}, \ {"greater_comparison_operator", {GT,GE,GTU,GEU}}, \ {"int_gpr_dest", {SUBREG, REG}}, \ {"inqhi_operand", {TRUNCATE}}, \ diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index 4af9cf9a14b..77ba4d59d74 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -161,7 +161,7 @@ ;; Target CPU. (define_attr "cpu" - "sh1,sh2,sh2e,sh3,sh3e,sh4,sh5" + "sh1,sh2,sh2e,sh3,sh3e,sh4,sh4a,sh5" (const (symbol_ref "sh_cpu_attr"))) (define_attr "endian" "big,little" @@ -218,6 +218,9 @@ ;; ftrc_s fix_truncsfsi2_i4 ;; dfdiv double precision floating point divide (or square root) ;; cwb ic_invalidate_line_i +;; movua SH4a unaligned load +;; fsrra square root reciprocal approximate +;; fsca sine and cosine approximate ;; tls_load load TLS related address ;; arith_media SHmedia arithmetic, logical, and shift instructions ;; cbranch_media SHmedia conditional branch instructions @@ -249,7 +252,7 @@ ;; nil no-op move, will be deleted. (define_attr "type" - "mt_group,cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,load,load_si,fload,store,move,fmove,smpy,dmpy,return,pload,prset,pstore,prget,pcload,pcload_si,pcfload,rte,sfunc,call,fp,fdiv,ftrc_s,dfp_arith,dfp_cmp,dfp_conv,dfdiv,gp_fpul,fpul_gp,mac_gp,mem_fpscr,gp_fpscr,cwb,tls_load,arith_media,cbranch_media,cmp_media,dfdiv_media,dfmul_media,dfparith_media,dfpconv_media,dmpy_media,fcmp_media,fdiv_media,fload_media,fmove_media,fparith_media,fpconv_media,fstore_media,gettr_media,invalidate_line_media,jump_media,load_media,pt_media,ptabs_media,store_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media,nil,other" + "mt_group,cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,load,load_si,fload,store,move,fmove,smpy,dmpy,return,pload,prset,pstore,prget,pcload,pcload_si,pcfload,rte,sfunc,call,fp,fdiv,ftrc_s,dfp_arith,dfp_cmp,dfp_conv,dfdiv,gp_fpul,fpul_gp,mac_gp,mem_fpscr,gp_fpscr,cwb,movua,fsrra,fsca,tls_load,arith_media,cbranch_media,cmp_media,dfdiv_media,dfmul_media,dfparith_media,dfpconv_media,dmpy_media,fcmp_media,fdiv_media,fload_media,fmove_media,fparith_media,fpconv_media,fstore_media,gettr_media,invalidate_line_media,jump_media,load_media,pt_media,ptabs_media,store_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media,nil,other" (const_string "other")) ;; We define a new attribute namely "insn_class".We use @@ -3488,6 +3491,11 @@ emit_insn (gen_ic_invalidate_line_compact (operands[0], operands[1])); DONE; } + else if (TARGET_SH4A_ARCH) + { + emit_insn (gen_ic_invalidate_line_sh4a (operands[0])); + DONE; + } operands[0] = force_reg (Pmode, operands[0]); operands[1] = force_reg (Pmode, GEN_INT (trunc_int_for_mode (0xf0000008, Pmode))); @@ -3508,6 +3516,14 @@ [(set_attr "length" "8") (set_attr "type" "cwb")]) +(define_insn "ic_invalidate_line_sh4a" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] + UNSPEC_ICACHE)] + "TARGET_SH4A_ARCH" + "ocbwb\\t@%0\;synco\;icbi\\t@%0" + [(set_attr "length" "16") + (set_attr "type" "cwb")]) + ;; ??? could make arg 0 an offsettable memory operand to allow to save ;; an add in the code that calculates the address. (define_insn "ic_invalidate_line_media" @@ -8151,7 +8167,19 @@ mov.l\\t1f,r0\\n\\ (xor:PSI (reg:PSI FPSCR_REG) (const_int 1048576)))] "TARGET_SH4" "fschg" - [(set_attr "fp_set" "unknown")]) + [(set_attr "type" "fp") (set_attr "fp_set" "unknown")]) + +;; There's no way we can use it today, since optimize mode switching +;; doesn't enable us to know from which mode we're switching to the +;; mode it requests, to tell whether we can use a relative mode switch +;; (like toggle_pr) or an absolute switch (like loading fpscr from +;; memory). +(define_insn "toggle_pr" + [(set (reg:PSI FPSCR_REG) + (xor:PSI (reg:PSI FPSCR_REG) (const_int 524288)))] + "TARGET_SH4A_FP && ! TARGET_FPU_SINGLE" + "fpchg" + [(set_attr "type" "fp")]) (define_expand "addsf3" [(set (match_operand:SF 0 "arith_reg_operand" "") @@ -8650,6 +8678,117 @@ mov.l\\t1f,r0\\n\\ [(set_attr "type" "fdiv") (set_attr "fp_mode" "single")]) +(define_insn "rsqrtsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (div:SF (match_operand:SF 1 "immediate_operand" "i") + (sqrt:SF (match_operand:SF 2 "register_operand" "0")))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH4A_FP && flag_unsafe_math_optimizations + && operands[1] == CONST1_RTX (SFmode)" + "fsrra %0" + [(set_attr "type" "fsrra") + (set_attr "fp_mode" "single")]) + +(define_insn "fsca" + [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f") + (vec_concat:V2SF + (unspec:SF [(mult:SF + (float:SF (match_operand:SI 1 "fpul_operand" "y")) + (match_operand:SF 2 "immediate_operand" "i")) + ] UNSPEC_FSINA) + (unspec:SF [(mult:SF (float:SF (match_dup 1)) (match_dup 2)) + ] UNSPEC_FCOSA))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH4A_FP && flag_unsafe_math_optimizations + && operands[2] == sh_fsca_int2sf ()" + "fsca fpul,%d0" + [(set_attr "type" "fsca") + (set_attr "fp_mode" "single")]) + +(define_expand "sinsf2" + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (unspec:SF [(match_operand:SF 1 "fp_arith_reg_operand" "")] + UNSPEC_FSINA))] + "TARGET_SH4A_FP && flag_unsafe_math_optimizations" + " +{ + rtx scaled = gen_reg_rtx (SFmode); + rtx truncated = gen_reg_rtx (SImode); + rtx fsca = gen_reg_rtx (V2SFmode); + rtx scale_reg = force_reg (SFmode, sh_fsca_sf2int ()); + + emit_sf_insn (gen_mulsf3 (scaled, operands[1], scale_reg)); + emit_sf_insn (gen_fix_truncsfsi2 (truncated, scaled)); + emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (), + get_fpscr_rtx ())); + emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, fsca, 0)); + DONE; +}") + +(define_expand "cossf2" + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (unspec:SF [(match_operand:SF 1 "fp_arith_reg_operand" "")] + UNSPEC_FCOSA))] + "TARGET_SH4A_FP && flag_unsafe_math_optimizations" + " +{ + rtx scaled = gen_reg_rtx (SFmode); + rtx truncated = gen_reg_rtx (SImode); + rtx fsca = gen_reg_rtx (V2SFmode); + rtx scale_reg = force_reg (SFmode, sh_fsca_sf2int ()); + + emit_sf_insn (gen_mulsf3 (scaled, operands[1], scale_reg)); + emit_sf_insn (gen_fix_truncsfsi2 (truncated, scaled)); + emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (), + get_fpscr_rtx ())); + emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, fsca, 4)); + DONE; +}") + +(define_expand "sindf2" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "") + (unspec:DF [(match_operand:DF 1 "fp_arith_reg_operand" "")] + UNSPEC_FSINA))] + "TARGET_SH4A_FP && ! TARGET_FPU_SINGLE && flag_unsafe_math_optimizations" + " +{ + rtx scaled = gen_reg_rtx (DFmode); + rtx truncated = gen_reg_rtx (SImode); + rtx fsca = gen_reg_rtx (V2SFmode); + rtx scale_reg = force_reg (DFmode, sh_fsca_df2int ()); + rtx sfresult = gen_reg_rtx (SFmode); + + emit_df_insn (gen_muldf3 (scaled, operands[1], scale_reg)); + emit_df_insn (gen_fix_truncdfsi2 (truncated, scaled)); + emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (), + get_fpscr_rtx ())); + emit_move_insn (sfresult, gen_rtx_SUBREG (SFmode, fsca, 0)); + emit_df_insn (gen_extendsfdf2 (operands[0], sfresult)); + DONE; +}") + +(define_expand "cosdf2" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "") + (unspec:DF [(match_operand:DF 1 "fp_arith_reg_operand" "")] + UNSPEC_FCOSA))] + "TARGET_SH4A_FP && ! TARGET_FPU_SINGLE && flag_unsafe_math_optimizations" + " +{ + rtx scaled = gen_reg_rtx (DFmode); + rtx truncated = gen_reg_rtx (SImode); + rtx fsca = gen_reg_rtx (V2SFmode); + rtx scale_reg = force_reg (DFmode, sh_fsca_df2int ()); + rtx sfresult = gen_reg_rtx (SFmode); + + emit_df_insn (gen_muldf3 (scaled, operands[1], scale_reg)); + emit_df_insn (gen_fix_truncdfsi2 (truncated, scaled)); + emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (), + get_fpscr_rtx ())); + emit_move_insn (sfresult, gen_rtx_SUBREG (SFmode, fsca, 4)); + emit_df_insn (gen_extendsfdf2 (operands[0], sfresult)); + DONE; +}") + (define_expand "abssf2" [(set (match_operand:SF 0 "fp_arith_reg_operand" "") (abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))] @@ -9188,6 +9327,71 @@ mov.l\\t1f,r0\\n\\ DONE; }") + +(define_insn "movua" + [(set (match_operand:SI 0 "register_operand" "=z") + (sign_extract:SI (match_operand:SI 1 "unaligned_load_operand" "Sua>") + (const_int 32) (const_int 0)))] + "TARGET_SH4A_ARCH" + "movua.l %1,%0" + [(set_attr "type" "movua")]) + +;; We shouldn't need this, but cse replaces increments with references +;; to other regs before flow has a chance to create post_inc +;; addressing modes, and only postreload's cse_move2add brings the +;; increments back to a usable form. +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (mem:SI (match_operand:SI 1 "register_operand" "")) + (const_int 32) (const_int 0))) + (set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))] + "TARGET_SH4A_ARCH && REGNO (operands[0]) != REGNO (operands[1])" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (mem:SI (post_inc:SI + (match_operand:SI 1 "register_operand" ""))) + (const_int 32) (const_int 0)))] + "") + +(define_expand "extv" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (match_operand:QI 1 "unaligned_load_operand" "") + (match_operand 2 "const_int_operand" "") + (match_operand 3 "const_int_operand" "")))] + "" +{ + if (TARGET_SH4A_ARCH + && INTVAL (operands[2]) == 32 + && INTVAL (operands[3]) == -24 * (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) + && GET_CODE (operands[1]) == MEM && MEM_ALIGN (operands[1]) < 32) + { + emit_insn (gen_movua (operands[0], + adjust_address (operands[1], SImode, 0))); + DONE; + } + + FAIL; +}) + +(define_expand "extzv" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extract:SI (match_operand:QI 1 "unaligned_load_operand" "") + (match_operand 2 "const_int_operand" "") + (match_operand 3 "const_int_operand" "")))] + "" +{ + if (TARGET_SH4A_ARCH + && INTVAL (operands[2]) == 32 + && INTVAL (operands[3]) == -24 * (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) + && GET_CODE (operands[1]) == MEM && MEM_ALIGN (operands[1]) < 32) + { + emit_insn (gen_movua (operands[0], + adjust_address (operands[1], SImode, 0))); + DONE; + } + + FAIL; +}) + ;; ------------------------------------------------------------------------- ;; Peepholes @@ -10661,9 +10865,11 @@ mov.l\\t1f,r0\\n\\ [(prefetch (match_operand:QI 0 "address_operand" "p") (match_operand:SI 1 "const_int_operand" "n") (match_operand:SI 2 "const_int_operand" "n"))] - "TARGET_SHMEDIA" + "TARGET_SHMEDIA || TARGET_HARD_SH4" "* { + if (TARGET_HARD_SH4) + return \"pref @%0\"; operands[0] = gen_rtx_MEM (QImode, operands[0]); output_asm_insn (\"ld%M0.b %m0,r63\", operands); return \"\"; diff --git a/gcc/config/sh/sh4a.md b/gcc/config/sh/sh4a.md new file mode 100644 index 00000000000..b9bac220559 --- /dev/null +++ b/gcc/config/sh/sh4a.md @@ -0,0 +1,235 @@ +;; Scheduling description for Renesas SH4a +;; Copyright (C) 2003 Free Software Foundation, Inc. +;; +;; This file is part of GNU CC. +;; +;; GNU CC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GNU CC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GNU CC; see the file COPYING. If not, write to +;; the Free Software Foundation, 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;; The following description models the SH4A pipeline +;; using the DFA based scheduler. + +(define_automaton "sh4a") + +(define_cpu_unit "sh4a_ex" "sh4a") +(define_cpu_unit "sh4a_ls" "sh4a") +(define_cpu_unit "sh4a_fex" "sh4a") +(define_cpu_unit "sh4a_fls" "sh4a") +(define_cpu_unit "sh4a_mult" "sh4a") +(define_cpu_unit "sh4a_fdiv" "sh4a") + +;; Decoding is done on the integer pipeline like the +;; sh4. Define issue to be the | of the two pipelines +;; to control how often instructions are issued. +(define_reservation "ID_or" "sh4a_ex|sh4a_ls") +(define_reservation "ID_and" "sh4a_ex+sh4a_ls") + + +;; ======================================================= +;; Locking Descriptions + +;; Sh4a_Memory access on the LS pipeline. +(define_cpu_unit "sh4a_memory" "sh4a") + +;; Other access on the LS pipeline. +(define_cpu_unit "sh4a_load_store" "sh4a") + +;; The address calculator used for branch instructions. +;; This will be reserved after "issue" of branch instructions +;; and this is to make sure that no two branch instructions +;; can be issued in parallel. +(define_reservation "sh4a_addrcalc" "sh4a_ex") + +;; ======================================================= +;; Reservations + +;; Branch (BF,BF/S,BT,BT/S,BRA,BSR) +;; Group: BR +;; Latency when taken: 2 +(define_insn_reservation "sh4a_branch" 2 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "cbranch,jump")) + "ID_or+sh4a_addrcalc") + +;; Jump (JSR,JMP,RTS) +;; Group: BR +;; Latency: 3 +(define_insn_reservation "sh4a_jump" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "return,jump_ind")) + "ID_or+sh4a_addrcalc") + +;; RTE +;; Group: CO +;; Latency: 3 +(define_insn_reservation "sh4a_rte" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "rte")) + "ID_and*4") + +;; EX Group Single +;; Group: EX +;; Latency: 0 +(define_insn_reservation "sh4a_ex" 0 + (and (eq_attr "cpu" "sh4a") + (eq_attr "insn_class" "ex_group")) + "sh4a_ex") + +;; MOVA +;; Group: LS +;; Latency: 1 +(define_insn_reservation "sh4a_mova" 1 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "mova")) + "sh4a_ls+sh4a_load_store") + +;; MOV +;; Group: MT +;; Latency: 0 +(define_insn_reservation "sh4a_mov" 0 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "move")) + "ID_or") + +;; Load +;; Group: LS +;; Latency: 3 +(define_insn_reservation "sh4a_load" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "load,pcload")) + "sh4a_ls+sh4a_memory") + +(define_insn_reservation "sh4a_load_si" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "load_si,pcload_si")) + "sh4a_ls+sh4a_memory") + +;; Store +;; Group: LS +;; Latency: 0 +(define_insn_reservation "sh4a_store" 0 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "store")) + "sh4a_ls+sh4a_memory") + +;; CWB TYPE + +;; MOVUA +;; Group: LS +;; Latency: 3 +(define_insn_reservation "sh4a_movua" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "movua")) + "sh4a_ls+sh4a_memory*2") + +;; Fixed point multiplication (single) +;; Group: CO +;; Latency: 2 +(define_insn_reservation "sh4a_smult" 2 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "smpy")) + "ID_or+sh4a_mult") + +;; Fixed point multiplication (double) +;; Group: CO +;; Latency: 3 +(define_insn_reservation "sh4a_dmult" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "dmpy")) + "ID_or+sh4a_mult") + +(define_insn_reservation "sh4a_mac_gp" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "mac_gp")) + "ID_and") + +;; Other MT group instructions(1 step operations) +;; Group: MT +;; Latency: 1 +(define_insn_reservation "sh4a_mt" 1 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "mt_group")) + "ID_or") + +;; Floating point reg move +;; Group: LS +;; Latency: 2 +(define_insn_reservation "sh4a_freg_mov" 2 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "fmove")) + "sh4a_ls,sh4a_fls") + +;; Single precision floating point computation FCMP/EQ, +;; FCMP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRVHG, FSCHG +;; Group: FE +;; Latency: 3 +(define_insn_reservation "sh4a_fp_arith" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "fp")) + "ID_or,sh4a_fex") + +(define_insn_reservation "sh4a_fp_arith_ftrc" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "ftrc_s")) + "ID_or,sh4a_fex") + +;; Single-precision FDIV/FSQRT +;; Group: FE +;; Latency: 20 +(define_insn_reservation "sh4a_fdiv" 20 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "fdiv")) + "ID_or,sh4a_fex+sh4a_fdiv,sh4a_fex") + +;; Double Precision floating point computation +;; (FCNVDS, FCNVSD, FLOAT, FTRC) +;; Group: FE +;; Latency: 3 +(define_insn_reservation "sh4a_dp_float" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "dfp_conv")) + "ID_or,sh4a_fex") + +;; Double-precision floating-point (FADD,FMUL,FSUB) +;; Group: FE +;; Latency: 5 +(define_insn_reservation "sh4a_fp_double_arith" 5 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "dfp_arith")) + "ID_or,sh4a_fex*3") + +;; Double precision FDIV/SQRT +;; Group: FE +;; Latency: 36 +(define_insn_reservation "sh4a_dp_div" 36 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "dfdiv")) + "ID_or,sh4a_fex+sh4a_fdiv,sh4a_fex*2") + +;; FSRRA +;; Group: FE +;; Latency: 5 +(define_insn_reservation "sh4a_fsrra" 5 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "fsrra")) + "ID_or,sh4a_fex") + +;; FSCA +;; Group: FE +;; Latency: 7 +(define_insn_reservation "sh4a_fsca" 7 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "fsca")) + "ID_or,sh4a_fex*3") diff --git a/gcc/config/sh/t-mlib-sh4a b/gcc/config/sh/t-mlib-sh4a new file mode 100644 index 00000000000..788b852962d --- /dev/null +++ b/gcc/config/sh/t-mlib-sh4a @@ -0,0 +1 @@ +ML_sh4a=m4a/ diff --git a/gcc/config/sh/t-mlib-sh4a-nofpu b/gcc/config/sh/t-mlib-sh4a-nofpu new file mode 100644 index 00000000000..c9dc28bb886 --- /dev/null +++ b/gcc/config/sh/t-mlib-sh4a-nofpu @@ -0,0 +1 @@ +ML_sh4a_nofpu=m4a-nofpu/ diff --git a/gcc/config/sh/t-mlib-sh4a-single b/gcc/config/sh/t-mlib-sh4a-single new file mode 100644 index 00000000000..036a4cc0a36 --- /dev/null +++ b/gcc/config/sh/t-mlib-sh4a-single @@ -0,0 +1 @@ +ML_sh4a_single=m4a-single/ diff --git a/gcc/config/sh/t-mlib-sh4a-single-only b/gcc/config/sh/t-mlib-sh4a-single-only new file mode 100644 index 00000000000..5709e8ef789 --- /dev/null +++ b/gcc/config/sh/t-mlib-sh4a-single-only @@ -0,0 +1 @@ +ML_sh4a_single_only=m4a-single-only/ diff --git a/gcc/config/sh/t-mlib-sh4al b/gcc/config/sh/t-mlib-sh4al new file mode 100644 index 00000000000..e8e36ba5b86 --- /dev/null +++ b/gcc/config/sh/t-mlib-sh4al @@ -0,0 +1 @@ +ML_sh4al=m4al/ diff --git a/gcc/config/sh/t-sh b/gcc/config/sh/t-sh index 8e262fa7e00..97dd99bf101 100644 --- a/gcc/config/sh/t-sh +++ b/gcc/config/sh/t-sh @@ -22,15 +22,16 @@ fp-bit.c: $(srcdir)/config/fp-bit.c cat $(srcdir)/config/fp-bit.c >> fp-bit.c MULTILIB_ENDIAN = ml/mb -MULTILIB_CPUS= $(ML_sh1)$(ML_sh2e)$(ML_sh2)$(ML_sh3e)$(ML_sh3)$(ML_sh4_nofpu)$(ML_sh4_single_only)$(ML_sh4_single)$(ML_sh4)$(ML_m5_32media)$(ML_m5_32media_nofpu)$(ML_m5_compact)$(ML_m5_compact_nofpu)$(ML_m5_64media)$(ML_m5_64media_nofpu) +MULTILIB_CPUS= $(ML_sh1)$(ML_sh2e)$(ML_sh2)$(ML_sh3e)$(ML_sh3)$(ML_sh4_nofpu)$(ML_sh4_single_only)$(ML_sh4_single)$(ML_sh4)$(ML_sh4a_nofpu)$(ML_sh4a_single_only)$(ML_sh4a_single)$(ML_sh4a)$(ML_m5_32media)$(ML_m5_32media_nofpu)$(ML_m5_compact)$(ML_m5_compact_nofpu)$(ML_m5_64media)$(ML_m5_64media_nofpu) MULTILIB_OPTIONS= $(MULTILIB_ENDIAN) $(MULTILIB_CPUS:/=) MULTILIB_DIRNAMES= #MULTILIB_MATCHES = m2=m3 m2e=m3e m2=m4-nofpu MULTILIB_MATCHES = $(shell \ multilibs="$(MULTILIB_OPTIONS)" ; \ - for abi in m1,m2,m3,m4-nofpu \ - m2e,m3e,m4-single-only \ + for abi in m1,m2,m3,m4-nofpu,m4al,m4a-nofpu \ + m2e,m3e,m4-single-only,m4a-single-only \ + m4-single,m4a-single m4,m4a \ m5-32media,m5-compact,m5-32media \ m5-32media-nofpu,m5-compact-nofpu,m5-32media-nofpu; do \ subst= ; \ diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 2dc69698307..204c27bca7b 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -621,6 +621,7 @@ See RS/6000 and PowerPC Options. @emph{SH Options} @gccoptlist{-m1 -m2 -m2e -m3 -m3e @gol -m4-nofpu -m4-single-only -m4-single -m4 @gol +-m4a-nofpu -m4a-single-only -m4a-single -m4a -m4al @gol -m5-64media -m5-64media-nofpu @gol -m5-32media -m5-32media-nofpu @gol -m5-compact -m5-compact-nofpu @gol @@ -10518,6 +10519,31 @@ single-precision mode by default. @opindex m4 Generate code for the SH4. +@item -m4a-nofpu +@opindex m4a-nofpu +Generate code for the SH4al-dsp, or for a SH4a in such a way that the +floating-point unit is not used. + +@item -m4a-single-only +@opindex m4a-single-only +Generate code for the SH4a, in such a way that no double-precision +floating point operations are used. + +@item -m4a-single +@opindex m4a-single +Generate code for the SH4a assuming the floating-point unit is in +single-precision mode by default. + +@item -m4a +@opindex m4a +Generate code for the SH4a. + +@item -m4al +@opindex m4al +Same as @option{-m4a-nofpu}, except that it implicitly passes +@option{-dsp} to the assembler. GCC doesn't generate any DSP +instructions at the moment. + @item -mb @opindex mb Compile code for the processor in big endian mode. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 8b99613999f..78fa42055bf 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,19 @@ +2004-07-28 Alexandre Oliva + + 2003-07-06 Alexandre Oliva + * gcc.dg/sh4a-memmovua.c: Tweak regular expression. + 2003-07-01 Alexandre Oliva + * gcc.dg/sh4a-bitmovua.c: New. + * gcc.dg/sh4a-cos.c: New. + * gcc.dg/sh4a-cosf.c: New. + * gcc.dg/sh4a-fprun.c: New. + * gcc.dg/sh4a-fsrra.c: New. + * gcc.dg/sh4a-memmovua.c: New. + * gcc.dg/sh4a-sin.c: New. + * gcc.dg/sh4a-sincos.c: New. + * gcc.dg/sh4a-sincosf.c: New. + * gcc.dg/sh4a-sinf.c: New. + 2004-07-28 Diego Novillo * gcc.dg/tree-ssa/20030714-2.c: Adjust number of expected diff --git a/gcc/testsuite/gcc.dg/sh4a-bitmovua.c b/gcc/testsuite/gcc.dg/sh4a-bitmovua.c new file mode 100644 index 00000000000..b7081bf7186 --- /dev/null +++ b/gcc/testsuite/gcc.dg/sh4a-bitmovua.c @@ -0,0 +1,73 @@ +/* Verify that we generate movua to load unaligned 32-bit values. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O" } */ +/* { dg-final { scan-assembler-times "\tmovua\\.l\t" 6 } } */ + +#ifdef __SH4A__ +/* Aligned. */ +struct s0 { long long d : 32; } x0; +long long f0() { + return x0.d; +} + +/* Unaligned load. */ +struct s1 { long long c : 8; long long d : 32; } x1; +long long f1() { + return x1.d; +} + +/* Unaligned load. */ +struct s2 { long long c : 16; long long d : 32; } x2; +long long f2() { + return x2.d; +} + +/* Unaligned load. */ +struct s3 { long long c : 24; long long d : 32; } x3; +long long f3() { + return x3.d; +} + +/* Aligned. */ +struct s4 { long long c : 32; long long d : 32; } x4; +long long f4() { + return x4.d; +} + +/* Aligned. */ +struct u0 { unsigned long long d : 32; } y0; +unsigned long long g0() { + return y0.d; +} + +/* Unaligned load. */ +struct u1 { long long c : 8; unsigned long long d : 32; } y1; +unsigned long long g1() { + return y1.d; +} + +/* Unaligned load. */ +struct u2 { long long c : 16; unsigned long long d : 32; } y2; +unsigned long long g2() { + return y2.d; +} + +/* Unaligned load. */ +struct u3 { long long c : 24; unsigned long long d : 32; } y3; +unsigned long long g3() { + return y3.d; +} + +/* Aligned. */ +struct u4 { long long c : 32; unsigned long long d : 32; } y4; +unsigned long long g4() { + return y4.d; +} +#else +asm ("movua.l\t"); +asm ("movua.l\t"); +asm ("movua.l\t"); +asm ("movua.l\t"); +asm ("movua.l\t"); +asm ("movua.l\t"); +#endif diff --git a/gcc/testsuite/gcc.dg/sh4a-cos.c b/gcc/testsuite/gcc.dg/sh4a-cos.c new file mode 100644 index 00000000000..198d41f8675 --- /dev/null +++ b/gcc/testsuite/gcc.dg/sh4a-cos.c @@ -0,0 +1,13 @@ +/* Verify that we generate single-precision sine and cosine approximate + (fsca) in fast math mode. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O -ffast-math" } */ +/* { dg-final { scan-assembler "\tfsca\t" } } */ + +#if defined __SH4A__ && ! defined __SH4_NOFPU__ +#include + +double test(double f) { return cos(f); } +#else +asm ("fsca\t"); +#endif diff --git a/gcc/testsuite/gcc.dg/sh4a-cosf.c b/gcc/testsuite/gcc.dg/sh4a-cosf.c new file mode 100644 index 00000000000..f78c140d501 --- /dev/null +++ b/gcc/testsuite/gcc.dg/sh4a-cosf.c @@ -0,0 +1,13 @@ +/* Verify that we generate single-precision sine and cosine approximate + (fsca) in fast math mode. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O -ffast-math" } */ +/* { dg-final { scan-assembler "\tfsca\t" } } */ + +#if defined __SH4A__ && ! defined __SH4_NOFPU__ +#include + +float test(float f) { return cosf(f); } +#else +asm ("fsca\t"); +#endif diff --git a/gcc/testsuite/gcc.dg/sh4a-fprun.c b/gcc/testsuite/gcc.dg/sh4a-fprun.c new file mode 100644 index 00000000000..8e26dc170a1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/sh4a-fprun.c @@ -0,0 +1,35 @@ +/* Verify that fsca and fssra yield reasonable results. */ +/* { do-do run { target "sh*-*-*" } } */ +/* { dg-options "-O -ffast-math" } */ + +#include +#include + +float sqrt_arg = 4.0f, sqrt_res = 2.0f; +float dg2rad_f; +double dg2rad_d; + +void check_f (float res, float expected) { + if (res >= expected - 0.001f && res <= expected + 0.001f) + return; + + abort (); +} + +void check_d (double res, double expected) { + if (res >= expected - 0.001 && res <= expected + 0.001) + return; + + abort (); +} + +int main() { + check_f (sqrtf(sqrt_arg), sqrt_res); + dg2rad_f = dg2rad_d = atan(1) / 45; + check_f (sinf(90*dg2rad_f), 1); + check_f (cosf(90*dg2rad_f), 0); + check_d (sin(-90*dg2rad_d), -1); + check_d (cos(180*dg2rad_d), -1); + check_d (sin(-45*dg2rad_d) * cosf(135*dg2rad_f), 0.5); + exit (0); +} diff --git a/gcc/testsuite/gcc.dg/sh4a-fsrra.c b/gcc/testsuite/gcc.dg/sh4a-fsrra.c new file mode 100644 index 00000000000..c8f04e4d2e2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/sh4a-fsrra.c @@ -0,0 +1,13 @@ +/* Verify that we generate single-precision square root reciprocal + approximate (fsrra) in fast math mode. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O -ffast-math" } */ +/* { dg-final { scan-assembler "\tfsrra\t" } } */ + +#if defined __SH4A__ && ! defined __SH4_NOFPU__ +#include + +float test(float f) { return 1 / sqrtf(f); } +#else +asm ("fsrra\t"); +#endif diff --git a/gcc/testsuite/gcc.dg/sh4a-memmovua.c b/gcc/testsuite/gcc.dg/sh4a-memmovua.c new file mode 100644 index 00000000000..68927929854 --- /dev/null +++ b/gcc/testsuite/gcc.dg/sh4a-memmovua.c @@ -0,0 +1,17 @@ +/* Verify that we generate movua to copy unaligned memory regions to + 32-bit-aligned addresses. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O" } */ +/* { dg-final { scan-assembler-times "\tmovua\\.l\t(.*)+" 2 } } */ + +#ifdef __SH4A__ +#include + +struct s { int i; char a[10], b[10]; } x; +int f() { + memcpy(x.a, x.b, 10); +} +#else +asm ("movua.l\t+"); +asm ("movua.l\t+"); +#endif diff --git a/gcc/testsuite/gcc.dg/sh4a-sin.c b/gcc/testsuite/gcc.dg/sh4a-sin.c new file mode 100644 index 00000000000..9f46f600763 --- /dev/null +++ b/gcc/testsuite/gcc.dg/sh4a-sin.c @@ -0,0 +1,13 @@ +/* Verify that we generate single-precision sine and cosine approximate + (fsca) in fast math mode. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O -ffast-math" } */ +/* { dg-final { scan-assembler "\tfsca\t" } } */ + +#if defined __SH4A__ && ! defined __SH4_NOFPU__ +#include + +double test(double f) { return sin(f); } +#else +asm ("fsca\t"); +#endif diff --git a/gcc/testsuite/gcc.dg/sh4a-sincos.c b/gcc/testsuite/gcc.dg/sh4a-sincos.c new file mode 100644 index 00000000000..f4293797534 --- /dev/null +++ b/gcc/testsuite/gcc.dg/sh4a-sincos.c @@ -0,0 +1,14 @@ +/* Verify that we generate a single single-precision sine and cosine + approximate (fsca) in fast math mode when a function computes both + sine and cosine. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O -ffast-math" } */ +/* { dg-final { scan-assembler-times "\tfsca\t" 1 } } */ + +#if defined __SH4A__ && ! defined __SH4_NOFPU__ +#include + +double test(double f) { return sin(f) + cos(f); } +#else +asm ("fsca\t"); +#endif diff --git a/gcc/testsuite/gcc.dg/sh4a-sincosf.c b/gcc/testsuite/gcc.dg/sh4a-sincosf.c new file mode 100644 index 00000000000..42913dbd59e --- /dev/null +++ b/gcc/testsuite/gcc.dg/sh4a-sincosf.c @@ -0,0 +1,14 @@ +/* Verify that we generate a single single-precision sine and cosine + approximate (fsca) in fast math mode when a function computes both + sine and cosine. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O -ffast-math" } */ +/* { dg-final { scan-assembler-times "\tfsca\t" 1 } } */ + +#if defined __SH4A__ && ! defined __SH4_NOFPU__ +#include + +float test(float f) { return sinf(f) + cosf(f); } +#else +asm ("fsca\t"); +#endif diff --git a/gcc/testsuite/gcc.dg/sh4a-sinf.c b/gcc/testsuite/gcc.dg/sh4a-sinf.c new file mode 100644 index 00000000000..2a2343fd73a --- /dev/null +++ b/gcc/testsuite/gcc.dg/sh4a-sinf.c @@ -0,0 +1,13 @@ +/* Verify that we generate single-precision sine and cosine approximate + (fsca) in fast math mode. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O -ffast-math" } */ +/* { dg-final { scan-assembler "\tfsca\t" } } */ + +#if defined __SH4A__ && ! defined __SH4_NOFPU__ +#include + +float test(float f) { return sinf(f); } +#else +asm ("fsca\t"); +#endif diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 3a16fce910e..aed1079e74d 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,11 @@ +2004-07-28 Alexandre Oliva + + 2003-10-01 Eric Christopher + * config/cpu/sh/atomicity.h (__exchange_and_add): Remove 'm' + constraint. + 2003-07-09 Alexandre Oliva + * config/cpu/sh/atomicity.h: New. Use movli and movco on SH4a. + 2004-07-23 Benjamin Kosnik PR libstdc++/16678 diff --git a/libstdc++-v3/config/cpu/sh/atomicity.h b/libstdc++-v3/config/cpu/sh/atomicity.h new file mode 100644 index 00000000000..b7d6c605d72 --- /dev/null +++ b/libstdc++-v3/config/cpu/sh/atomicity.h @@ -0,0 +1,123 @@ +// Low-level functions for atomic operations: Generic version -*- C++ -*- + +// Copyright (C) 1999, 2001, 2002, 2003 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 2, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING. If not, write to the Free +// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, +// USA. + +// As a special exception, you may use this file as part of a free software +// library without restriction. Specifically, if other files instantiate +// templates or use macros or inline functions from this file, or you compile +// this file and link it with other files to produce an executable, this +// file does not by itself cause the resulting executable to be covered by +// the GNU General Public License. This exception does not however +// invalidate any other reasons why the executable file might be covered by +// the GNU General Public License. + +#ifndef _BITS_ATOMICITY_H +#define _BITS_ATOMICITY_H 1 + +#ifdef __SH4A__ + +typedef int _Atomic_word; + +static inline _Atomic_word +__attribute__ ((__unused__)) +__exchange_and_add (volatile _Atomic_word* __mem, int __val) +{ + _Atomic_word __result; + + __asm__ __volatile__ + ("0:\n" + "\tmovli.l\t@%2,r0\n" + "\tmov\tr0,%1\n" + "\tadd\t%3,r0\n" + "\tmovco.l\tr0,@%2\n" + "\tbf\t0b" + : "+m" (*__mem), "=r" (__result) + : "r" (__mem), "rI08" (__val) + : "r0"); + + return __result; +} + + +static inline void +__attribute__ ((__unused__)) +__atomic_add (volatile _Atomic_word* __mem, int __val) +{ + asm("0:\n" + "\tmovli.l\t@%1,r0\n" + "\tadd\t%2,r0\n" + "\tmovco.l\tr0,@%1\n" + "\tbf\t0b" + : "+m" (*__mem) + : "r" (__mem), "rI08" (__val) + : "r0"); +} + +#else + +/* This is generic/atomicity.h */ + +#include + +#define _GLIBCPP_NEED_GENERIC_MUTEX + +typedef int _Atomic_word; + +namespace __gnu_cxx +{ + extern __gthread_mutex_t _Atomic_add_mutex; + +#ifndef __GTHREAD_MUTEX_INIT + extern __gthread_once_t _Atomic_add_mutex_once; + extern void __gthread_atomic_add_mutex_once(); +#endif +} + +static inline _Atomic_word +__attribute__ ((__unused__)) +__exchange_and_add (volatile _Atomic_word* __mem, int __val) +{ +#ifndef __GTHREAD_MUTEX_INIT + __gthread_once (&__gnu_cxx::_Atomic_add_mutex_once, + __gnu_cxx::__gthread_atomic_add_mutex_once); +#endif + + _Atomic_word __result; + + __gthread_mutex_lock (&__gnu_cxx::_Atomic_add_mutex); + + __result = *__mem; + *__mem += __val; + + __gthread_mutex_unlock (&__gnu_cxx::_Atomic_add_mutex); + return __result; +} + + +static inline void +__attribute__ ((__unused__)) +__atomic_add (volatile _Atomic_word* __mem, int __val) +{ + (void) __exchange_and_add (__mem, __val); +} + + +#endif + +#endif /* atomicity.h */ -- 2.30.2