+2004-07-28 Alexandre Oliva <aoliva@redhat.com>
+
+ Introduce sh4a support.
+ * config.gcc: Handle sh4a multilibs and cpu selection.
+ * config/sh/sh.h: Likewise. Handle sh4a command line flags.
+ * config/sh/t-mlib-sh4a: New.
+ * config/sh/t-mlib-sh4al: New.
+ * config/sh/t-mlib-sh4a-nofpu: New.
+ * config/sh/t-mlib-sh4a-single: New.
+ * config/sh/t-mlib-sh4a-single-only: New.
+ 2004-02-20 DJ Delorie <dj@redhat.com>
+ * config/sh/sh.md ("movua"): Change constraint from "m" to "Sua".
+ * config/sh/sh.h (EXTRA_CONSTRAINT_S): Add "Sua" support.
+ 2003-08-22 Eric Christopher <echristo@redhat.com>
+ * config/sh/sh4a.md: Update for chip errata.
+ 2003-08-07 Eric Christopher <echristo@redhat.com>
+ * config/sh/sh4a.md: New file. sh4a processor description.
+ 2003-07-08 Alexandre Oliva <aoliva@redhat.com>
+ * config/sh/sh.h (TARGET_SWITCHES): Added 4al. Adjust description
+ of -m4a-nofpu.
+ (SH_ASM_SPEC): Pass -dsp for -m4al, not -m4a-nofpu.
+ * config/sh/t-sh (MULTILIB_MATCHES): Map -m4al to -m4a-nofpu.
+ * doc/invoke.texi (SH Options): Document -m4al.
+ 2003-07-03 Alexandre Oliva <aoliva@redhat.com>
+ * config/sh/sh.c (expand_block_move): Remove commented-out code
+ checked in by mistake.
+ (sh_cannot_change_mode_class): Enable SUBREGs to be used to select
+ single elements from SFmode vectors.
+ * config/sh/sh.md (fsca): Use VEC_CONCAT to initialize the output
+ register.
+ (sinsf2, cossf2, sindf2, cosdf2): Don't emit CLOBBER.
+ 2003-07-01 Alexandre Oliva <aoliva@redhat.com>
+ * config/sh/sh.h (sh_fsca_sf2int, sh_fsca_df2int,
+ sh_fsca_int2sf): Remove variable declarations.
+ * config/sh/sh.c (sh_fsca_sf2int, sh_fsca_df2int,
+ sh_fsca_int2sf): New functions.
+ (sh_fsca_sf2int_rtx, sh_fsca_df2int_rtx,
+ sh_fsca_int2sf_rtx): New static variables.
+ * config/sh/sh-protos.h (sh_fsca_sf2int, sh_fsca_df2int,
+ sh_fsca_int2sf): Declare.
+ * config/sh/sh.md: Adjust.
+ * doc/invoke.texi (SH Options): Document new options.
+ * config/sh/lib1funcs.asm (ic_invalidate): Remove SH4a forward
+ compatibility from SH4 code.
+ 2003-06-27 Alexandre Oliva <aoliva@redhat.com>
+ * config/sh/sh.c (expand_block_move): Don't emit POST_INC too
+ early.
+ (memory_movsrc_operand): Renamed to...
+ (unaligned_load_operand): ... this. Simplified.
+ * config/sh/sh.h (PREDICATE_CODES): Adjust.
+ * config/sh/sh.md (movua, extv, extzv): Likewise. Change movua's
+ input operand to SImode, and adjust the others. Introduce
+ post-increment by peephole.
+ * config/sh/sh.c (expand_block_move): Give the target address the
+ same mode as the temp reg.
+ * config/sh/sh.c (expand_block_move): Use a temp reg for unaligned
+ copying.
+ 2003-06-26 Alexandre Oliva <aoliva@redhat.com>
+ Introduce support for SH4a.
+ * config/sh/lib1funcs.asm (ic_invalidate): Use icbi if
+ __SH4A__. Emit 4 4kb blocks and touch all of them otherwise.
+ * config/sh/sh.c (sh_fsca_sf2int, sh_fsca_df2int,
+ sh_fsca_int2sf): New.
+ (sh_init_builtins): Initialize them.
+ (print_operand): Support `d'.
+ (expand_block_move): Use movua if src is misaligned.
+ (memory_movsrc_operand): New.
+ * config/sh/sh.h (TARGET_CPU_CPP_BUILTINS): Define __SH4A__
+ and one of the SH4 macros.
+ (SH4A_BIT, TARGET_SH4A_ARCH, TARGET_SH4A_FP,
+ SELECT_SH4A_NOFPU, SELECT_SH4A_SINGLE_ONLY, SELECT_SH4A,
+ SELECT_SH4A_SINGLE): New.
+ (TARGET_NONE): Add SH4A_BIT.
+ (TARGET_SWITCHES): Add 4a-single-only, 4a-single, 4a-nofpu and 4a.
+ (SH_ASM_SPEC): Pass -dsp if -m4a-nofpu.
+ (sh_fsca_sf2int, sh_fsca_df2int, sh_fsca_int2sf): Declare.
+ (OVERRIDE_OPTIONS): Set cpu to CPU_SH4A when appropriate.
+ (enum processor_type): Added PROCESSOR_SH4A.
+ (PREDICATE_CODES): Add memory_movsrc_operand.
+ * config/sh/sh.md: Removed unused variables.
+ (attr cpu): Add sh4a.
+ (attr type): Add movua, fsrra and fsca.
+ (prefetch): New, for SH4.
+ (ic_invalidate_line, ic_invalidate_line_sh4a): Use icbi.
+ (toggle_sz): Set type to fp.
+ (toggle_pr, rsqrtsf2, fsca, sinsf2, cossf2, sindf2, cosdf2): New.
+ (movua, extv, extzv): New.
+ * config/sh/t-sh: Add multilibs for 4a, 4a-nofpu, 4a-single
+ and 4a-single-only.
+
2004-07-28 Diego Novillo <dnovillo@redhat.com>
* tree-optimize.c (init_tree_optimization_passes): Schedule
case `echo ${target} | sed 's/e[lb]-/-/'` in
sh64*-*-netbsd*) sh_cpu_target=sh5-64media ;;
sh64* | sh5*-*-netbsd*) sh_cpu_target=sh5-32media ;;
+ sh4a_single_only*) sh_cpu_target=sh4a-single-only ;;
+ sh4a_single*) sh_cpu_target=sh4a-single ;;
+ sh4a_nofpu*) sh_cpu_target=sh4a-nofpu ;;
+ sh4al) sh_cpu_target=sh4al ;;
+ sh4a*) sh_cpu_target=sh4a ;;
sh4_single_only*) sh_cpu_target=sh4-single-only ;;
sh4_single*) sh_cpu_target=sh4-single ;;
sh4_nofpu*) sh_cpu_target=sh4-nofpu ;;
case $sh_cpu_default in
sh5-64media-nofpu | sh5-64media | \
sh5-32media-nofpu | sh5-32media | sh5-compact-nofpu | sh5-compact | \
+ sh4a-single-only | sh4a-single | sh4a-nofpu | sh4a | sh4al | \
sh4-single-only | sh4-single | sh4-nofpu | sh4 | \
sh3e | sh3 | sh2e | sh2 | sh1) ;;
"") sh_cpu_default=${sh_cpu_target} ;;
case ${sh_multilib} in
sh1 | sh2 | sh2e | sh3 | sh3e | \
sh4 | sh4-single | sh4-single-only | sh4-nofpu | \
+ sh4a | sh4a-single | sh4a-single-only | sh4a-nofpu | sh4al | \
sh5-64media | sh5-64media-nofpu | \
sh5-32media | sh5-32media-nofpu | \
sh5-compact | sh5-compact-nofpu)
"" | m1 | m2 | m2e | m3 | m3e | m4 | m4-single | m4-single-only | m4-nofpu )
# OK
;;
+ m4a | m4a-single | m4a-single-only | m4a-nofpu | m4al)
+ ;;
*)
echo "Unknown CPU used in --with-cpu=$with_cpu, known values:" 1>&2
echo "m1 m2 m2e m3 m3e m4 m4-single m4-single-only m4-nofpu" 1>&2
+ echo "m4a m4a-single m4a-single-only m4a-nofpu m4al" 1>&2
exit 1
;;
esac
ENDFUNC(GLOBAL(ic_invalidate))
ENDFUNC(GLOBAL(init_trampoline))
+#elif defined(__SH4A__)
+ .global GLOBAL(ic_invalidate)
+ FUNC(GLOBAL(ic_invalidate))
+GLOBAL(ic_invalidate):
+ ocbwb @r4
+ synco
+ rts
+ icbi @r4
+ ENDFUNC(GLOBAL(ic_invalidate))
#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
+ /* This assumes a direct-mapped cache, which is the case for
+ the first SH4, but not for the second version of SH4, that
+ uses a 2-way set-associative cache, nor SH4a, that is 4-way.
+ SH4a fortunately offers an instruction to invalidate the
+ instruction cache, and we use it above, but SH4 doesn't.
+ However, since the libraries don't contain any nested
+ functions (the only case in which GCC would emit this pattern)
+ and we actually emit the ic_invalidate_line_i pattern for
+ cache invalidation on all SH4 multilibs (even 4-nofpu, that
+ isn't even corevered here), and pre-SH4 cores don't have
+ caches, it seems like this code is pointless, unless it's
+ meant for backward binary compatibility or for userland-only
+ cache invalidation for say sh4-*-linux-gnu. Such a feature
+ should probably be moved into a system call, such that the
+ kernel could do whatever it takes to invalidate a cache line
+ on the core it's actually running on. I.e., this hideous :-)
+ piece of code should go away at some point. */
+
.global GLOBAL(ic_invalidate)
FUNC(GLOBAL(ic_invalidate))
GLOBAL(ic_invalidate):
#define GCC_SH_PROTOS_H
#ifdef RTX_CODE
+extern rtx sh_fsca_sf2int (void);
+extern rtx sh_fsca_df2int (void);
+extern rtx sh_fsca_int2sf (void);
extern struct rtx_def *prepare_scc_operands (enum rtx_code);
/* Declare functions defined in sh.c and used in templates. */
'T' print the next word of a dp value - same as 'R' in big endian mode.
'M' print an `x' if `m' will print `base,index'.
'N' print 'r63' if the operand is (const_int 0).
+ 'd' print a V2SF reg as dN instead of fpN.
'm' print a pair `base,offset' or `base,index', for LD and ST.
'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
'o' output an operator. */
}
break;
+ case 'd':
+ if (GET_CODE (x) != REG || GET_MODE (x) != V2SFmode)
+ abort ();
+
+ fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
+ break;
+
case 'N':
if (x == CONST0_RTX (GET_MODE (x)))
{
int constp = (GET_CODE (operands[2]) == CONST_INT);
int bytes = (constp ? INTVAL (operands[2]) : 0);
+ if (! constp)
+ return 0;
+
+ /* If we could use mov.l to move words and dest is word-aligned, we
+ can use movua.l for loads and still generate a relatively short
+ and efficient sequence. */
+ if (TARGET_SH4A_ARCH && align < 4
+ && MEM_ALIGN (operands[0]) >= 32
+ && can_move_by_pieces (bytes, 32))
+ {
+ rtx dest = copy_rtx (operands[0]);
+ rtx src = copy_rtx (operands[1]);
+ /* We could use different pseudos for each copied word, but
+ since movua can only load into r0, it's kind of
+ pointless. */
+ rtx temp = gen_reg_rtx (SImode);
+ rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
+ int copied = 0;
+
+ while (copied + 4 <= bytes)
+ {
+ rtx to = adjust_address (dest, SImode, copied);
+ rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
+
+ emit_insn (gen_movua (temp, from));
+ emit_move_insn (src_addr, plus_constant (src_addr, 4));
+ emit_move_insn (to, temp);
+ copied += 4;
+ }
+
+ if (copied < bytes)
+ move_by_pieces (adjust_address (dest, BLKmode, copied),
+ adjust_automodify_address (src, BLKmode,
+ src_addr, copied),
+ bytes - copied, align, 0);
+
+ return 1;
+ }
+
/* If it isn't a constant number of bytes, or if it doesn't have 4 byte
alignment, or if it isn't a multiple of 4 bytes, then fail. */
- if (! constp || align < 4 || (bytes % 4 != 0))
+ if (align < 4 || (bytes % 4 != 0))
return 0;
if (TARGET_HARD_SH4)
sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
enum reg_class class)
{
+ /* We want to enable the use of SUBREGs as a means to
+ VEC_SELECT a single element of a vector. */
+ if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
+ return (reg_classes_intersect_p (GENERAL_REGS, class));
+
if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
{
if (TARGET_LITTLE_ENDIAN)
abort ();
}
+/* Returns 1 if OP is a MEM that can be source of a simple move operation. */
+
+int
+unaligned_load_operand (rtx op, enum machine_mode mode)
+{
+ rtx inside;
+
+ if (GET_CODE (op) != MEM || GET_MODE (op) != mode)
+ return 0;
+
+ inside = XEXP (op, 0);
+
+ if (GET_CODE (inside) == POST_INC)
+ inside = XEXP (inside, 0);
+
+ if (GET_CODE (inside) == REG)
+ return 1;
+
+ return 0;
+}
+
+/* This function returns a constant rtx that represents pi / 2**15 in
+ SFmode. it's used to scale SFmode angles, in radians, to a
+ fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
+ maps to 0x10000). */
+
+static GTY(()) rtx sh_fsca_sf2int_rtx;
+
+rtx
+sh_fsca_sf2int (void)
+{
+ if (! sh_fsca_sf2int_rtx)
+ {
+ REAL_VALUE_TYPE rv;
+
+ real_from_string (&rv, "10430.378350470453");
+ sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
+ }
+
+ return sh_fsca_sf2int_rtx;
+}
+
+/* This function returns a constant rtx that represents pi / 2**15 in
+ DFmode. it's used to scale DFmode angles, in radians, to a
+ fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
+ maps to 0x10000). */
+
+static GTY(()) rtx sh_fsca_df2int_rtx;
+
+rtx
+sh_fsca_df2int (void)
+{
+ if (! sh_fsca_df2int_rtx)
+ {
+ REAL_VALUE_TYPE rv;
+
+ real_from_string (&rv, "10430.378350470453");
+ sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
+ }
+
+ return sh_fsca_df2int_rtx;
+}
+
+/* This function returns a constant rtx that represents 2**15 / pi in
+ SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
+ of a full circle back to a SFmode value, i.e., 0x10000 maps to
+ 2*pi). */
+
+static GTY(()) rtx sh_fsca_int2sf_rtx;
+
+rtx
+sh_fsca_int2sf (void)
+{
+ if (! sh_fsca_int2sf_rtx)
+ {
+ REAL_VALUE_TYPE rv;
+
+ real_from_string (&rv, "9.587379924285257e-5");
+ sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
+ }
+
+ return sh_fsca_int2sf_rtx;
+}
#include "gt-sh.h"
case PROCESSOR_SH4: \
builtin_define (TARGET_FPU_SINGLE ? "__SH4_SINGLE__" : "__SH4__"); \
break; \
+ case PROCESSOR_SH4A: \
+ builtin_define ("__SH4A__"); \
+ builtin_define (TARGET_SH4 \
+ ? (TARGET_FPU_SINGLE ? "__SH4_SINGLE__" : "__SH4__") \
+ : TARGET_FPU_ANY ? "__SH4_SINGLE_ONLY__" \
+ : "__SH4_NOFPU__"); \
+ break; \
case PROCESSOR_SH5: \
{ \
builtin_define_with_value ("__SH5__", \
#define HARD_SH4_BIT (1<<5)
#define FPU_SINGLE_BIT (1<<7)
#define SH4_BIT (1<<12)
+#define SH4A_BIT (1<<3)
#define FMOVD_BIT (1<<4)
#define SH5_BIT (1<<0)
#define SPACE_BIT (1<<13)
/* Nonzero if we should generate code using type 4 insns. */
#define TARGET_SH4 ((target_flags & SH4_BIT) && (target_flags & SH1_BIT))
+/* Nonzero if we're generating code for the common subset of
+ instructions present on both SH4a and SH4al-dsp. */
+#define TARGET_SH4A_ARCH (target_flags & SH4A_BIT)
+
+/* Nonzero if we're generating code for SH4a, unless the use of the
+ FPU is disabled (which makes it compatible with SH4al-dsp). */
+#define TARGET_SH4A_FP (TARGET_SH4A_ARCH && TARGET_FPU_ANY)
+
/* Nonzero if we should generate code for a SH5 CPU (either ISA). */
#define TARGET_SH5 (target_flags & SH5_BIT)
#define SELECT_SH4_SINGLE_ONLY (HARD_SH4_BIT | SELECT_SH3E)
#define SELECT_SH4 (SH4_BIT | SH_E_BIT | HARD_SH4_BIT | SELECT_SH3)
#define SELECT_SH4_SINGLE (FPU_SINGLE_BIT | SELECT_SH4)
+#define SELECT_SH4A_NOFPU (SH4A_BIT | SELECT_SH4_NOFPU)
+#define SELECT_SH4A_SINGLE_ONLY (SH4A_BIT | SELECT_SH4_SINGLE_ONLY)
+#define SELECT_SH4A (SH4A_BIT | SELECT_SH4)
+#define SELECT_SH4A_SINGLE (SH4A_BIT | SELECT_SH4_SINGLE)
#define SELECT_SH5_64MEDIA (SH5_BIT | SH4_BIT)
#define SELECT_SH5_64MEDIA_NOFPU (SH5_BIT)
#define SELECT_SH5_32MEDIA (SH5_BIT | SH4_BIT | SH_E_BIT)
#ifndef SUPPORT_SH4_NOFPU
#define TARGET_SWITCH_SH4_NOFPU
#endif
+#ifndef SUPPORT_SH4A_NOFPU
+#define TARGET_SWITCH_SH4A_NOFPU
+#endif
+#ifndef SUPPORT_SH4AL
+#define TARGET_SWITCH_SH4AL
+#endif
#endif
#endif
#endif
#ifndef SUPPORT_SH4_SINGLE_ONLY
#define TARGET_SWITCH_SH4_SINGLE_ONLY
#endif
+#ifndef SUPPORT_SH4A_SINGLE_ONLY
+#define TARGET_SWITCH_SH4A_SINGLE_ONLY
+#endif
#endif
#endif
#ifndef SUPPORT_SH4
#define TARGET_SWITCH_SH4
+#ifndef SUPPORT_SH4A
+#define TARGET_SWITCH_SH4A
+#endif
#endif
#ifndef SUPPORT_SH4_SINGLE
#define TARGET_SWITCH_SH4_SINGLE
+#ifndef SUPPORT_SH4A_SINGLE
+#define TARGET_SWITCH_SH4A_SINGLE
+#endif
#endif
#ifndef SUPPORT_SH5_64MEDIA
/* Reset all target-selection flags. */
#define TARGET_NONE -(SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | SH4_BIT \
- | HARD_SH4_BIT | FPU_SINGLE_BIT | SH5_BIT)
+ | SH4A_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT | SH5_BIT)
#ifndef TARGET_SWITCH_SH1
#define TARGET_SWITCH_SH1 \
{"4", TARGET_NONE, "" }, \
{"4", SELECT_SH4, "Generate SH4 code" },
#endif
+#ifndef TARGET_SWITCH_SH4A
+#define TARGET_SWITCH_SH4A \
+ {"4a", TARGET_NONE, "" }, \
+ {"4a", SELECT_SH4A, "Generate SH4a code" },
+#endif
+#ifndef TARGET_SWITCH_SH4A_SINGLE_ONLY
+#define TARGET_SWITCH_SH4A_SINGLE_ONLY \
+ {"4a-single-only", TARGET_NONE, "" }, \
+ {"4a-single-only", SELECT_SH4A_SINGLE_ONLY, "Generate only single-precision SH4a code" },
+#endif
+#ifndef TARGET_SWITCH_SH4A_SINGLE
+#define TARGET_SWITCH_SH4A_SINGLE \
+ {"4a-single", TARGET_NONE, "" },\
+ {"4a-single", SELECT_SH4A_SINGLE, "Generate default single-precision SH4a code" },
+#endif
+#ifndef TARGET_SWITCH_SH4A_NOFPU
+#define TARGET_SWITCH_SH4A_NOFPU \
+ {"4a-nofpu", TARGET_NONE, "" },\
+ {"4a-nofpu", SELECT_SH4A_NOFPU, "Generate SH4a FPU-less code" },
+#endif
+#ifndef TARGET_SWITCH_SH4AL
+#define TARGET_SWITCH_SH4AL \
+ {"4al", TARGET_NONE, "" },\
+ {"4al", SELECT_SH4A_NOFPU, "Generate SH4al-dsp code" },
+#endif
#ifndef TARGET_SWITCH_SH5_64MEDIA
#define TARGET_SWITCH_SH5_64MEDIA \
{"5-64media", TARGET_NONE, "" }, \
TARGET_SWITCH_SH4_SINGLE \
TARGET_SWITCH_SH4_NOFPU \
TARGET_SWITCH_SH4 \
+ TARGET_SWITCH_SH4A_SINGLE_ONLY \
+ TARGET_SWITCH_SH4A_SINGLE \
+ TARGET_SWITCH_SH4A_NOFPU \
+ TARGET_SWITCH_SH4A \
+ TARGET_SWITCH_SH4AL \
TARGET_SWITCH_SH5_64MEDIA \
TARGET_SWITCH_SH5_64MEDIA_NOFPU \
TARGET_SWITCHES_SH5_32MEDIA \
#define SH_ASM_SPEC \
"%(subtarget_asm_endian_spec) %{mrelax:-relax %(subtarget_asm_relax_spec)}\
-%(subtarget_asm_isa_spec)"
+%(subtarget_asm_isa_spec) %{m4al:-dsp}"
#define ASM_SPEC SH_ASM_SPEC
assembler_dialect = 1; \
sh_cpu = CPU_SH4; \
} \
+ if (TARGET_SH4A_ARCH) \
+ { \
+ assembler_dialect = 1; \
+ sh_cpu = CPU_SH4A; \
+ } \
if (TARGET_SH5) \
{ \
sh_cpu = CPU_SH5; \
#define EXTRA_CONSTRAINT_Sr0(OP) \
(memory_operand((OP), GET_MODE (OP)) \
&& ! refers_to_regno_p (R0_REG, R0_REG + 1, OP, (rtx *)0))
+#define EXTRA_CONSTRAINT_Sua(OP) \
+ (memory_operand((OP), GET_MODE (OP)) \
+ && GET_CODE (XEXP (OP, 0)) != PLUS)
#define EXTRA_CONSTRAINT_S(OP, STR) \
((STR)[1] == 'r' && (STR)[2] == '0' ? EXTRA_CONSTRAINT_Sr0 (OP) \
+ : (STR)[1] == 'u' && (STR)[2] == 'a' ? EXTRA_CONSTRAINT_Sua (OP) \
: 0)
#define EXTRA_CONSTRAINT_STR(OP, C, STR) \
PROCESSOR_SH3,
PROCESSOR_SH3E,
PROCESSOR_SH4,
+ PROCESSOR_SH4A,
PROCESSOR_SH5
};
{"general_extend_operand", {SUBREG, REG, MEM, TRUNCATE}}, \
{"general_movsrc_operand", {SUBREG, REG, CONST_INT, CONST_DOUBLE, MEM}}, \
{"general_movdst_operand", {SUBREG, REG, MEM}}, \
+ {"unaligned_load_operand", {MEM}}, \
{"greater_comparison_operator", {GT,GE,GTU,GEU}}, \
{"int_gpr_dest", {SUBREG, REG}}, \
{"inqhi_operand", {TRUNCATE}}, \
;; Target CPU.
(define_attr "cpu"
- "sh1,sh2,sh2e,sh3,sh3e,sh4,sh5"
+ "sh1,sh2,sh2e,sh3,sh3e,sh4,sh4a,sh5"
(const (symbol_ref "sh_cpu_attr")))
(define_attr "endian" "big,little"
;; ftrc_s fix_truncsfsi2_i4
;; dfdiv double precision floating point divide (or square root)
;; cwb ic_invalidate_line_i
+;; movua SH4a unaligned load
+;; fsrra square root reciprocal approximate
+;; fsca sine and cosine approximate
;; tls_load load TLS related address
;; arith_media SHmedia arithmetic, logical, and shift instructions
;; cbranch_media SHmedia conditional branch instructions
;; nil no-op move, will be deleted.
(define_attr "type"
- "mt_group,cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,load,load_si,fload,store,move,fmove,smpy,dmpy,return,pload,prset,pstore,prget,pcload,pcload_si,pcfload,rte,sfunc,call,fp,fdiv,ftrc_s,dfp_arith,dfp_cmp,dfp_conv,dfdiv,gp_fpul,fpul_gp,mac_gp,mem_fpscr,gp_fpscr,cwb,tls_load,arith_media,cbranch_media,cmp_media,dfdiv_media,dfmul_media,dfparith_media,dfpconv_media,dmpy_media,fcmp_media,fdiv_media,fload_media,fmove_media,fparith_media,fpconv_media,fstore_media,gettr_media,invalidate_line_media,jump_media,load_media,pt_media,ptabs_media,store_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media,nil,other"
+ "mt_group,cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,load,load_si,fload,store,move,fmove,smpy,dmpy,return,pload,prset,pstore,prget,pcload,pcload_si,pcfload,rte,sfunc,call,fp,fdiv,ftrc_s,dfp_arith,dfp_cmp,dfp_conv,dfdiv,gp_fpul,fpul_gp,mac_gp,mem_fpscr,gp_fpscr,cwb,movua,fsrra,fsca,tls_load,arith_media,cbranch_media,cmp_media,dfdiv_media,dfmul_media,dfparith_media,dfpconv_media,dmpy_media,fcmp_media,fdiv_media,fload_media,fmove_media,fparith_media,fpconv_media,fstore_media,gettr_media,invalidate_line_media,jump_media,load_media,pt_media,ptabs_media,store_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media,nil,other"
(const_string "other"))
;; We define a new attribute namely "insn_class".We use
emit_insn (gen_ic_invalidate_line_compact (operands[0], operands[1]));
DONE;
}
+ else if (TARGET_SH4A_ARCH)
+ {
+ emit_insn (gen_ic_invalidate_line_sh4a (operands[0]));
+ DONE;
+ }
operands[0] = force_reg (Pmode, operands[0]);
operands[1] = force_reg (Pmode, GEN_INT (trunc_int_for_mode (0xf0000008,
Pmode)));
[(set_attr "length" "8")
(set_attr "type" "cwb")])
+(define_insn "ic_invalidate_line_sh4a"
+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")]
+ UNSPEC_ICACHE)]
+ "TARGET_SH4A_ARCH"
+ "ocbwb\\t@%0\;synco\;icbi\\t@%0"
+ [(set_attr "length" "16")
+ (set_attr "type" "cwb")])
+
;; ??? could make arg 0 an offsettable memory operand to allow to save
;; an add in the code that calculates the address.
(define_insn "ic_invalidate_line_media"
(xor:PSI (reg:PSI FPSCR_REG) (const_int 1048576)))]
"TARGET_SH4"
"fschg"
- [(set_attr "fp_set" "unknown")])
+ [(set_attr "type" "fp") (set_attr "fp_set" "unknown")])
+
+;; There's no way we can use it today, since optimize mode switching
+;; doesn't enable us to know from which mode we're switching to the
+;; mode it requests, to tell whether we can use a relative mode switch
+;; (like toggle_pr) or an absolute switch (like loading fpscr from
+;; memory).
+(define_insn "toggle_pr"
+ [(set (reg:PSI FPSCR_REG)
+ (xor:PSI (reg:PSI FPSCR_REG) (const_int 524288)))]
+ "TARGET_SH4A_FP && ! TARGET_FPU_SINGLE"
+ "fpchg"
+ [(set_attr "type" "fp")])
(define_expand "addsf3"
[(set (match_operand:SF 0 "arith_reg_operand" "")
[(set_attr "type" "fdiv")
(set_attr "fp_mode" "single")])
+(define_insn "rsqrtsf2"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (div:SF (match_operand:SF 1 "immediate_operand" "i")
+ (sqrt:SF (match_operand:SF 2 "register_operand" "0"))))
+ (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+ "TARGET_SH4A_FP && flag_unsafe_math_optimizations
+ && operands[1] == CONST1_RTX (SFmode)"
+ "fsrra %0"
+ [(set_attr "type" "fsrra")
+ (set_attr "fp_mode" "single")])
+
+(define_insn "fsca"
+ [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f")
+ (vec_concat:V2SF
+ (unspec:SF [(mult:SF
+ (float:SF (match_operand:SI 1 "fpul_operand" "y"))
+ (match_operand:SF 2 "immediate_operand" "i"))
+ ] UNSPEC_FSINA)
+ (unspec:SF [(mult:SF (float:SF (match_dup 1)) (match_dup 2))
+ ] UNSPEC_FCOSA)))
+ (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+ "TARGET_SH4A_FP && flag_unsafe_math_optimizations
+ && operands[2] == sh_fsca_int2sf ()"
+ "fsca fpul,%d0"
+ [(set_attr "type" "fsca")
+ (set_attr "fp_mode" "single")])
+
+(define_expand "sinsf2"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "")
+ (unspec:SF [(match_operand:SF 1 "fp_arith_reg_operand" "")]
+ UNSPEC_FSINA))]
+ "TARGET_SH4A_FP && flag_unsafe_math_optimizations"
+ "
+{
+ rtx scaled = gen_reg_rtx (SFmode);
+ rtx truncated = gen_reg_rtx (SImode);
+ rtx fsca = gen_reg_rtx (V2SFmode);
+ rtx scale_reg = force_reg (SFmode, sh_fsca_sf2int ());
+
+ emit_sf_insn (gen_mulsf3 (scaled, operands[1], scale_reg));
+ emit_sf_insn (gen_fix_truncsfsi2 (truncated, scaled));
+ emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (),
+ get_fpscr_rtx ()));
+ emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, fsca, 0));
+ DONE;
+}")
+
+(define_expand "cossf2"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "")
+ (unspec:SF [(match_operand:SF 1 "fp_arith_reg_operand" "")]
+ UNSPEC_FCOSA))]
+ "TARGET_SH4A_FP && flag_unsafe_math_optimizations"
+ "
+{
+ rtx scaled = gen_reg_rtx (SFmode);
+ rtx truncated = gen_reg_rtx (SImode);
+ rtx fsca = gen_reg_rtx (V2SFmode);
+ rtx scale_reg = force_reg (SFmode, sh_fsca_sf2int ());
+
+ emit_sf_insn (gen_mulsf3 (scaled, operands[1], scale_reg));
+ emit_sf_insn (gen_fix_truncsfsi2 (truncated, scaled));
+ emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (),
+ get_fpscr_rtx ()));
+ emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, fsca, 4));
+ DONE;
+}")
+
+(define_expand "sindf2"
+ [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+ (unspec:DF [(match_operand:DF 1 "fp_arith_reg_operand" "")]
+ UNSPEC_FSINA))]
+ "TARGET_SH4A_FP && ! TARGET_FPU_SINGLE && flag_unsafe_math_optimizations"
+ "
+{
+ rtx scaled = gen_reg_rtx (DFmode);
+ rtx truncated = gen_reg_rtx (SImode);
+ rtx fsca = gen_reg_rtx (V2SFmode);
+ rtx scale_reg = force_reg (DFmode, sh_fsca_df2int ());
+ rtx sfresult = gen_reg_rtx (SFmode);
+
+ emit_df_insn (gen_muldf3 (scaled, operands[1], scale_reg));
+ emit_df_insn (gen_fix_truncdfsi2 (truncated, scaled));
+ emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (),
+ get_fpscr_rtx ()));
+ emit_move_insn (sfresult, gen_rtx_SUBREG (SFmode, fsca, 0));
+ emit_df_insn (gen_extendsfdf2 (operands[0], sfresult));
+ DONE;
+}")
+
+(define_expand "cosdf2"
+ [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+ (unspec:DF [(match_operand:DF 1 "fp_arith_reg_operand" "")]
+ UNSPEC_FCOSA))]
+ "TARGET_SH4A_FP && ! TARGET_FPU_SINGLE && flag_unsafe_math_optimizations"
+ "
+{
+ rtx scaled = gen_reg_rtx (DFmode);
+ rtx truncated = gen_reg_rtx (SImode);
+ rtx fsca = gen_reg_rtx (V2SFmode);
+ rtx scale_reg = force_reg (DFmode, sh_fsca_df2int ());
+ rtx sfresult = gen_reg_rtx (SFmode);
+
+ emit_df_insn (gen_muldf3 (scaled, operands[1], scale_reg));
+ emit_df_insn (gen_fix_truncdfsi2 (truncated, scaled));
+ emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (),
+ get_fpscr_rtx ()));
+ emit_move_insn (sfresult, gen_rtx_SUBREG (SFmode, fsca, 4));
+ emit_df_insn (gen_extendsfdf2 (operands[0], sfresult));
+ DONE;
+}")
+
(define_expand "abssf2"
[(set (match_operand:SF 0 "fp_arith_reg_operand" "")
(abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))]
DONE;
}")
+
+(define_insn "movua"
+ [(set (match_operand:SI 0 "register_operand" "=z")
+ (sign_extract:SI (match_operand:SI 1 "unaligned_load_operand" "Sua>")
+ (const_int 32) (const_int 0)))]
+ "TARGET_SH4A_ARCH"
+ "movua.l %1,%0"
+ [(set_attr "type" "movua")])
+
+;; We shouldn't need this, but cse replaces increments with references
+;; to other regs before flow has a chance to create post_inc
+;; addressing modes, and only postreload's cse_move2add brings the
+;; increments back to a usable form.
+(define_peephole2
+ [(set (match_operand:SI 0 "register_operand" "")
+ (sign_extract:SI (mem:SI (match_operand:SI 1 "register_operand" ""))
+ (const_int 32) (const_int 0)))
+ (set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))]
+ "TARGET_SH4A_ARCH && REGNO (operands[0]) != REGNO (operands[1])"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (sign_extract:SI (mem:SI (post_inc:SI
+ (match_operand:SI 1 "register_operand" "")))
+ (const_int 32) (const_int 0)))]
+ "")
+
+(define_expand "extv"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (sign_extract:SI (match_operand:QI 1 "unaligned_load_operand" "")
+ (match_operand 2 "const_int_operand" "")
+ (match_operand 3 "const_int_operand" "")))]
+ ""
+{
+ if (TARGET_SH4A_ARCH
+ && INTVAL (operands[2]) == 32
+ && INTVAL (operands[3]) == -24 * (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+ && GET_CODE (operands[1]) == MEM && MEM_ALIGN (operands[1]) < 32)
+ {
+ emit_insn (gen_movua (operands[0],
+ adjust_address (operands[1], SImode, 0)));
+ DONE;
+ }
+
+ FAIL;
+})
+
+(define_expand "extzv"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (zero_extract:SI (match_operand:QI 1 "unaligned_load_operand" "")
+ (match_operand 2 "const_int_operand" "")
+ (match_operand 3 "const_int_operand" "")))]
+ ""
+{
+ if (TARGET_SH4A_ARCH
+ && INTVAL (operands[2]) == 32
+ && INTVAL (operands[3]) == -24 * (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+ && GET_CODE (operands[1]) == MEM && MEM_ALIGN (operands[1]) < 32)
+ {
+ emit_insn (gen_movua (operands[0],
+ adjust_address (operands[1], SImode, 0)));
+ DONE;
+ }
+
+ FAIL;
+})
+
\f
;; -------------------------------------------------------------------------
;; Peepholes
[(prefetch (match_operand:QI 0 "address_operand" "p")
(match_operand:SI 1 "const_int_operand" "n")
(match_operand:SI 2 "const_int_operand" "n"))]
- "TARGET_SHMEDIA"
+ "TARGET_SHMEDIA || TARGET_HARD_SH4"
"*
{
+ if (TARGET_HARD_SH4)
+ return \"pref @%0\";
operands[0] = gen_rtx_MEM (QImode, operands[0]);
output_asm_insn (\"ld%M0.b %m0,r63\", operands);
return \"\";
--- /dev/null
+;; Scheduling description for Renesas SH4a
+;; Copyright (C) 2003 Free Software Foundation, Inc.
+;;
+;; This file is part of GNU CC.
+;;
+;; GNU CC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GNU CC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GNU CC; see the file COPYING. If not, write to
+;; the Free Software Foundation, 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+;; The following description models the SH4A pipeline
+;; using the DFA based scheduler.
+
+(define_automaton "sh4a")
+
+(define_cpu_unit "sh4a_ex" "sh4a")
+(define_cpu_unit "sh4a_ls" "sh4a")
+(define_cpu_unit "sh4a_fex" "sh4a")
+(define_cpu_unit "sh4a_fls" "sh4a")
+(define_cpu_unit "sh4a_mult" "sh4a")
+(define_cpu_unit "sh4a_fdiv" "sh4a")
+
+;; Decoding is done on the integer pipeline like the
+;; sh4. Define issue to be the | of the two pipelines
+;; to control how often instructions are issued.
+(define_reservation "ID_or" "sh4a_ex|sh4a_ls")
+(define_reservation "ID_and" "sh4a_ex+sh4a_ls")
+
+
+;; =======================================================
+;; Locking Descriptions
+
+;; Sh4a_Memory access on the LS pipeline.
+(define_cpu_unit "sh4a_memory" "sh4a")
+
+;; Other access on the LS pipeline.
+(define_cpu_unit "sh4a_load_store" "sh4a")
+
+;; The address calculator used for branch instructions.
+;; This will be reserved after "issue" of branch instructions
+;; and this is to make sure that no two branch instructions
+;; can be issued in parallel.
+(define_reservation "sh4a_addrcalc" "sh4a_ex")
+
+;; =======================================================
+;; Reservations
+
+;; Branch (BF,BF/S,BT,BT/S,BRA,BSR)
+;; Group: BR
+;; Latency when taken: 2
+(define_insn_reservation "sh4a_branch" 2
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "cbranch,jump"))
+ "ID_or+sh4a_addrcalc")
+
+;; Jump (JSR,JMP,RTS)
+;; Group: BR
+;; Latency: 3
+(define_insn_reservation "sh4a_jump" 3
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "return,jump_ind"))
+ "ID_or+sh4a_addrcalc")
+
+;; RTE
+;; Group: CO
+;; Latency: 3
+(define_insn_reservation "sh4a_rte" 3
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "rte"))
+ "ID_and*4")
+
+;; EX Group Single
+;; Group: EX
+;; Latency: 0
+(define_insn_reservation "sh4a_ex" 0
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "insn_class" "ex_group"))
+ "sh4a_ex")
+
+;; MOVA
+;; Group: LS
+;; Latency: 1
+(define_insn_reservation "sh4a_mova" 1
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "mova"))
+ "sh4a_ls+sh4a_load_store")
+
+;; MOV
+;; Group: MT
+;; Latency: 0
+(define_insn_reservation "sh4a_mov" 0
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "move"))
+ "ID_or")
+
+;; Load
+;; Group: LS
+;; Latency: 3
+(define_insn_reservation "sh4a_load" 3
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "load,pcload"))
+ "sh4a_ls+sh4a_memory")
+
+(define_insn_reservation "sh4a_load_si" 3
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "load_si,pcload_si"))
+ "sh4a_ls+sh4a_memory")
+
+;; Store
+;; Group: LS
+;; Latency: 0
+(define_insn_reservation "sh4a_store" 0
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "store"))
+ "sh4a_ls+sh4a_memory")
+
+;; CWB TYPE
+
+;; MOVUA
+;; Group: LS
+;; Latency: 3
+(define_insn_reservation "sh4a_movua" 3
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "movua"))
+ "sh4a_ls+sh4a_memory*2")
+
+;; Fixed point multiplication (single)
+;; Group: CO
+;; Latency: 2
+(define_insn_reservation "sh4a_smult" 2
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "smpy"))
+ "ID_or+sh4a_mult")
+
+;; Fixed point multiplication (double)
+;; Group: CO
+;; Latency: 3
+(define_insn_reservation "sh4a_dmult" 3
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "dmpy"))
+ "ID_or+sh4a_mult")
+
+(define_insn_reservation "sh4a_mac_gp" 3
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "mac_gp"))
+ "ID_and")
+
+;; Other MT group instructions(1 step operations)
+;; Group: MT
+;; Latency: 1
+(define_insn_reservation "sh4a_mt" 1
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "mt_group"))
+ "ID_or")
+
+;; Floating point reg move
+;; Group: LS
+;; Latency: 2
+(define_insn_reservation "sh4a_freg_mov" 2
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "fmove"))
+ "sh4a_ls,sh4a_fls")
+
+;; Single precision floating point computation FCMP/EQ,
+;; FCMP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRVHG, FSCHG
+;; Group: FE
+;; Latency: 3
+(define_insn_reservation "sh4a_fp_arith" 3
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "fp"))
+ "ID_or,sh4a_fex")
+
+(define_insn_reservation "sh4a_fp_arith_ftrc" 3
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "ftrc_s"))
+ "ID_or,sh4a_fex")
+
+;; Single-precision FDIV/FSQRT
+;; Group: FE
+;; Latency: 20
+(define_insn_reservation "sh4a_fdiv" 20
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "fdiv"))
+ "ID_or,sh4a_fex+sh4a_fdiv,sh4a_fex")
+
+;; Double Precision floating point computation
+;; (FCNVDS, FCNVSD, FLOAT, FTRC)
+;; Group: FE
+;; Latency: 3
+(define_insn_reservation "sh4a_dp_float" 3
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "dfp_conv"))
+ "ID_or,sh4a_fex")
+
+;; Double-precision floating-point (FADD,FMUL,FSUB)
+;; Group: FE
+;; Latency: 5
+(define_insn_reservation "sh4a_fp_double_arith" 5
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "dfp_arith"))
+ "ID_or,sh4a_fex*3")
+
+;; Double precision FDIV/SQRT
+;; Group: FE
+;; Latency: 36
+(define_insn_reservation "sh4a_dp_div" 36
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "dfdiv"))
+ "ID_or,sh4a_fex+sh4a_fdiv,sh4a_fex*2")
+
+;; FSRRA
+;; Group: FE
+;; Latency: 5
+(define_insn_reservation "sh4a_fsrra" 5
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "fsrra"))
+ "ID_or,sh4a_fex")
+
+;; FSCA
+;; Group: FE
+;; Latency: 7
+(define_insn_reservation "sh4a_fsca" 7
+ (and (eq_attr "cpu" "sh4a")
+ (eq_attr "type" "fsca"))
+ "ID_or,sh4a_fex*3")
--- /dev/null
+ML_sh4a=m4a/
--- /dev/null
+ML_sh4a_nofpu=m4a-nofpu/
--- /dev/null
+ML_sh4a_single=m4a-single/
--- /dev/null
+ML_sh4a_single_only=m4a-single-only/
--- /dev/null
+ML_sh4al=m4al/
cat $(srcdir)/config/fp-bit.c >> fp-bit.c
MULTILIB_ENDIAN = ml/mb
-MULTILIB_CPUS= $(ML_sh1)$(ML_sh2e)$(ML_sh2)$(ML_sh3e)$(ML_sh3)$(ML_sh4_nofpu)$(ML_sh4_single_only)$(ML_sh4_single)$(ML_sh4)$(ML_m5_32media)$(ML_m5_32media_nofpu)$(ML_m5_compact)$(ML_m5_compact_nofpu)$(ML_m5_64media)$(ML_m5_64media_nofpu)
+MULTILIB_CPUS= $(ML_sh1)$(ML_sh2e)$(ML_sh2)$(ML_sh3e)$(ML_sh3)$(ML_sh4_nofpu)$(ML_sh4_single_only)$(ML_sh4_single)$(ML_sh4)$(ML_sh4a_nofpu)$(ML_sh4a_single_only)$(ML_sh4a_single)$(ML_sh4a)$(ML_m5_32media)$(ML_m5_32media_nofpu)$(ML_m5_compact)$(ML_m5_compact_nofpu)$(ML_m5_64media)$(ML_m5_64media_nofpu)
MULTILIB_OPTIONS= $(MULTILIB_ENDIAN) $(MULTILIB_CPUS:/=)
MULTILIB_DIRNAMES=
#MULTILIB_MATCHES = m2=m3 m2e=m3e m2=m4-nofpu
MULTILIB_MATCHES = $(shell \
multilibs="$(MULTILIB_OPTIONS)" ; \
- for abi in m1,m2,m3,m4-nofpu \
- m2e,m3e,m4-single-only \
+ for abi in m1,m2,m3,m4-nofpu,m4al,m4a-nofpu \
+ m2e,m3e,m4-single-only,m4a-single-only \
+ m4-single,m4a-single m4,m4a \
m5-32media,m5-compact,m5-32media \
m5-32media-nofpu,m5-compact-nofpu,m5-32media-nofpu; do \
subst= ; \
@emph{SH Options}
@gccoptlist{-m1 -m2 -m2e -m3 -m3e @gol
-m4-nofpu -m4-single-only -m4-single -m4 @gol
+-m4a-nofpu -m4a-single-only -m4a-single -m4a -m4al @gol
-m5-64media -m5-64media-nofpu @gol
-m5-32media -m5-32media-nofpu @gol
-m5-compact -m5-compact-nofpu @gol
@opindex m4
Generate code for the SH4.
+@item -m4a-nofpu
+@opindex m4a-nofpu
+Generate code for the SH4al-dsp, or for a SH4a in such a way that the
+floating-point unit is not used.
+
+@item -m4a-single-only
+@opindex m4a-single-only
+Generate code for the SH4a, in such a way that no double-precision
+floating point operations are used.
+
+@item -m4a-single
+@opindex m4a-single
+Generate code for the SH4a assuming the floating-point unit is in
+single-precision mode by default.
+
+@item -m4a
+@opindex m4a
+Generate code for the SH4a.
+
+@item -m4al
+@opindex m4al
+Same as @option{-m4a-nofpu}, except that it implicitly passes
+@option{-dsp} to the assembler. GCC doesn't generate any DSP
+instructions at the moment.
+
@item -mb
@opindex mb
Compile code for the processor in big endian mode.
+2004-07-28 Alexandre Oliva <aoliva@redhat.com>
+
+ 2003-07-06 Alexandre Oliva <aoliva@redhat.com>
+ * gcc.dg/sh4a-memmovua.c: Tweak regular expression.
+ 2003-07-01 Alexandre Oliva <aoliva@redhat.com>
+ * gcc.dg/sh4a-bitmovua.c: New.
+ * gcc.dg/sh4a-cos.c: New.
+ * gcc.dg/sh4a-cosf.c: New.
+ * gcc.dg/sh4a-fprun.c: New.
+ * gcc.dg/sh4a-fsrra.c: New.
+ * gcc.dg/sh4a-memmovua.c: New.
+ * gcc.dg/sh4a-sin.c: New.
+ * gcc.dg/sh4a-sincos.c: New.
+ * gcc.dg/sh4a-sincosf.c: New.
+ * gcc.dg/sh4a-sinf.c: New.
+
2004-07-28 Diego Novillo <dnovillo@redhat.com>
* gcc.dg/tree-ssa/20030714-2.c: Adjust number of expected
--- /dev/null
+/* Verify that we generate movua to load unaligned 32-bit values. */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O" } */
+/* { dg-final { scan-assembler-times "\tmovua\\.l\t" 6 } } */
+
+#ifdef __SH4A__
+/* Aligned. */
+struct s0 { long long d : 32; } x0;
+long long f0() {
+ return x0.d;
+}
+
+/* Unaligned load. */
+struct s1 { long long c : 8; long long d : 32; } x1;
+long long f1() {
+ return x1.d;
+}
+
+/* Unaligned load. */
+struct s2 { long long c : 16; long long d : 32; } x2;
+long long f2() {
+ return x2.d;
+}
+
+/* Unaligned load. */
+struct s3 { long long c : 24; long long d : 32; } x3;
+long long f3() {
+ return x3.d;
+}
+
+/* Aligned. */
+struct s4 { long long c : 32; long long d : 32; } x4;
+long long f4() {
+ return x4.d;
+}
+
+/* Aligned. */
+struct u0 { unsigned long long d : 32; } y0;
+unsigned long long g0() {
+ return y0.d;
+}
+
+/* Unaligned load. */
+struct u1 { long long c : 8; unsigned long long d : 32; } y1;
+unsigned long long g1() {
+ return y1.d;
+}
+
+/* Unaligned load. */
+struct u2 { long long c : 16; unsigned long long d : 32; } y2;
+unsigned long long g2() {
+ return y2.d;
+}
+
+/* Unaligned load. */
+struct u3 { long long c : 24; unsigned long long d : 32; } y3;
+unsigned long long g3() {
+ return y3.d;
+}
+
+/* Aligned. */
+struct u4 { long long c : 32; unsigned long long d : 32; } y4;
+unsigned long long g4() {
+ return y4.d;
+}
+#else
+asm ("movua.l\t");
+asm ("movua.l\t");
+asm ("movua.l\t");
+asm ("movua.l\t");
+asm ("movua.l\t");
+asm ("movua.l\t");
+#endif
--- /dev/null
+/* Verify that we generate single-precision sine and cosine approximate
+ (fsca) in fast math mode. */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O -ffast-math" } */
+/* { dg-final { scan-assembler "\tfsca\t" } } */
+
+#if defined __SH4A__ && ! defined __SH4_NOFPU__
+#include <math.h>
+
+double test(double f) { return cos(f); }
+#else
+asm ("fsca\t");
+#endif
--- /dev/null
+/* Verify that we generate single-precision sine and cosine approximate
+ (fsca) in fast math mode. */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O -ffast-math" } */
+/* { dg-final { scan-assembler "\tfsca\t" } } */
+
+#if defined __SH4A__ && ! defined __SH4_NOFPU__
+#include <math.h>
+
+float test(float f) { return cosf(f); }
+#else
+asm ("fsca\t");
+#endif
--- /dev/null
+/* Verify that fsca and fssra yield reasonable results. */
+/* { do-do run { target "sh*-*-*" } } */
+/* { dg-options "-O -ffast-math" } */
+
+#include <math.h>
+#include <stdlib.h>
+
+float sqrt_arg = 4.0f, sqrt_res = 2.0f;
+float dg2rad_f;
+double dg2rad_d;
+
+void check_f (float res, float expected) {
+ if (res >= expected - 0.001f && res <= expected + 0.001f)
+ return;
+
+ abort ();
+}
+
+void check_d (double res, double expected) {
+ if (res >= expected - 0.001 && res <= expected + 0.001)
+ return;
+
+ abort ();
+}
+
+int main() {
+ check_f (sqrtf(sqrt_arg), sqrt_res);
+ dg2rad_f = dg2rad_d = atan(1) / 45;
+ check_f (sinf(90*dg2rad_f), 1);
+ check_f (cosf(90*dg2rad_f), 0);
+ check_d (sin(-90*dg2rad_d), -1);
+ check_d (cos(180*dg2rad_d), -1);
+ check_d (sin(-45*dg2rad_d) * cosf(135*dg2rad_f), 0.5);
+ exit (0);
+}
--- /dev/null
+/* Verify that we generate single-precision square root reciprocal
+ approximate (fsrra) in fast math mode. */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O -ffast-math" } */
+/* { dg-final { scan-assembler "\tfsrra\t" } } */
+
+#if defined __SH4A__ && ! defined __SH4_NOFPU__
+#include <math.h>
+
+float test(float f) { return 1 / sqrtf(f); }
+#else
+asm ("fsrra\t");
+#endif
--- /dev/null
+/* Verify that we generate movua to copy unaligned memory regions to
+ 32-bit-aligned addresses. */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O" } */
+/* { dg-final { scan-assembler-times "\tmovua\\.l\t(.*)+" 2 } } */
+
+#ifdef __SH4A__
+#include <stdlib.h>
+
+struct s { int i; char a[10], b[10]; } x;
+int f() {
+ memcpy(x.a, x.b, 10);
+}
+#else
+asm ("movua.l\t+");
+asm ("movua.l\t+");
+#endif
--- /dev/null
+/* Verify that we generate single-precision sine and cosine approximate
+ (fsca) in fast math mode. */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O -ffast-math" } */
+/* { dg-final { scan-assembler "\tfsca\t" } } */
+
+#if defined __SH4A__ && ! defined __SH4_NOFPU__
+#include <math.h>
+
+double test(double f) { return sin(f); }
+#else
+asm ("fsca\t");
+#endif
--- /dev/null
+/* Verify that we generate a single single-precision sine and cosine
+ approximate (fsca) in fast math mode when a function computes both
+ sine and cosine. */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O -ffast-math" } */
+/* { dg-final { scan-assembler-times "\tfsca\t" 1 } } */
+
+#if defined __SH4A__ && ! defined __SH4_NOFPU__
+#include <math.h>
+
+double test(double f) { return sin(f) + cos(f); }
+#else
+asm ("fsca\t");
+#endif
--- /dev/null
+/* Verify that we generate a single single-precision sine and cosine
+ approximate (fsca) in fast math mode when a function computes both
+ sine and cosine. */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O -ffast-math" } */
+/* { dg-final { scan-assembler-times "\tfsca\t" 1 } } */
+
+#if defined __SH4A__ && ! defined __SH4_NOFPU__
+#include <math.h>
+
+float test(float f) { return sinf(f) + cosf(f); }
+#else
+asm ("fsca\t");
+#endif
--- /dev/null
+/* Verify that we generate single-precision sine and cosine approximate
+ (fsca) in fast math mode. */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O -ffast-math" } */
+/* { dg-final { scan-assembler "\tfsca\t" } } */
+
+#if defined __SH4A__ && ! defined __SH4_NOFPU__
+#include <math.h>
+
+float test(float f) { return sinf(f); }
+#else
+asm ("fsca\t");
+#endif
+2004-07-28 Alexandre Oliva <aoliva@redhat.com>
+
+ 2003-10-01 Eric Christopher <echristo@redhat.com>
+ * config/cpu/sh/atomicity.h (__exchange_and_add): Remove 'm'
+ constraint.
+ 2003-07-09 Alexandre Oliva <aoliva@redhat.com>
+ * config/cpu/sh/atomicity.h: New. Use movli and movco on SH4a.
+
2004-07-23 Benjamin Kosnik <bkoz@redhat.com>
PR libstdc++/16678
--- /dev/null
+// Low-level functions for atomic operations: Generic version -*- C++ -*-
+
+// Copyright (C) 1999, 2001, 2002, 2003 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 2, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING. If not, write to the Free
+// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+// USA.
+
+// As a special exception, you may use this file as part of a free software
+// library without restriction. Specifically, if other files instantiate
+// templates or use macros or inline functions from this file, or you compile
+// this file and link it with other files to produce an executable, this
+// file does not by itself cause the resulting executable to be covered by
+// the GNU General Public License. This exception does not however
+// invalidate any other reasons why the executable file might be covered by
+// the GNU General Public License.
+
+#ifndef _BITS_ATOMICITY_H
+#define _BITS_ATOMICITY_H 1
+
+#ifdef __SH4A__
+
+typedef int _Atomic_word;
+
+static inline _Atomic_word
+__attribute__ ((__unused__))
+__exchange_and_add (volatile _Atomic_word* __mem, int __val)
+{
+ _Atomic_word __result;
+
+ __asm__ __volatile__
+ ("0:\n"
+ "\tmovli.l\t@%2,r0\n"
+ "\tmov\tr0,%1\n"
+ "\tadd\t%3,r0\n"
+ "\tmovco.l\tr0,@%2\n"
+ "\tbf\t0b"
+ : "+m" (*__mem), "=r" (__result)
+ : "r" (__mem), "rI08" (__val)
+ : "r0");
+
+ return __result;
+}
+
+
+static inline void
+__attribute__ ((__unused__))
+__atomic_add (volatile _Atomic_word* __mem, int __val)
+{
+ asm("0:\n"
+ "\tmovli.l\t@%1,r0\n"
+ "\tadd\t%2,r0\n"
+ "\tmovco.l\tr0,@%1\n"
+ "\tbf\t0b"
+ : "+m" (*__mem)
+ : "r" (__mem), "rI08" (__val)
+ : "r0");
+}
+
+#else
+
+/* This is generic/atomicity.h */
+
+#include <bits/gthr.h>
+
+#define _GLIBCPP_NEED_GENERIC_MUTEX
+
+typedef int _Atomic_word;
+
+namespace __gnu_cxx
+{
+ extern __gthread_mutex_t _Atomic_add_mutex;
+
+#ifndef __GTHREAD_MUTEX_INIT
+ extern __gthread_once_t _Atomic_add_mutex_once;
+ extern void __gthread_atomic_add_mutex_once();
+#endif
+}
+
+static inline _Atomic_word
+__attribute__ ((__unused__))
+__exchange_and_add (volatile _Atomic_word* __mem, int __val)
+{
+#ifndef __GTHREAD_MUTEX_INIT
+ __gthread_once (&__gnu_cxx::_Atomic_add_mutex_once,
+ __gnu_cxx::__gthread_atomic_add_mutex_once);
+#endif
+
+ _Atomic_word __result;
+
+ __gthread_mutex_lock (&__gnu_cxx::_Atomic_add_mutex);
+
+ __result = *__mem;
+ *__mem += __val;
+
+ __gthread_mutex_unlock (&__gnu_cxx::_Atomic_add_mutex);
+ return __result;
+}
+
+
+static inline void
+__attribute__ ((__unused__))
+__atomic_add (volatile _Atomic_word* __mem, int __val)
+{
+ (void) __exchange_and_add (__mem, __val);
+}
+
+
+#endif
+
+#endif /* atomicity.h */