* alpha.c (alpha_cpu_name): New variable.
(alpha_mlat_string): Likewise.
(alpha_memory_latency): Likewise.
(override_options): Handle -mmemory-latency.
(alpha_adjust_cost): Adjust load cost for latency.
* alpha.h (TARGET_OPTIONS): Add meory-latency.
(REGISTER_MOVE_COST): Define in terms of memory_latency. Take
TARGET_CIX into account.
(MEMORY_MOVE_COST): Define in terms of memory_latency.
* invoke.texi (DEC Alpha Options): Document -mmemory-latency.
* alpha.h (ASM_COMMENT_START): New macro.
From-SVN: r17106
-Mon Dec 15 17:48:05 1997 Ricahrd Henderson <rth@cygnus.com>
+Mon Dec 15 18:31:43 1997 Richard Henderson <rth@cygnus.com>
+
+ * alpha.c (alpha_cpu_name): New variable.
+ (alpha_mlat_string): Likewise.
+ (alpha_memory_latency): Likewise.
+ (override_options): Handle -mmemory-latency.
+ (alpha_adjust_cost): Adjust load cost for latency.
+ * alpha.h (TARGET_OPTIONS): Add meory-latency.
+ (REGISTER_MOVE_COST): Define in terms of memory_latency. Take
+ TARGET_CIX into account.
+ (MEMORY_MOVE_COST): Define in terms of memory_latency.
+ * invoke.texi (DEC Alpha Options): Document -mmemory-latency.
+
+ * alpha.h (ASM_COMMENT_START): New macro.
+
+Mon Dec 15 17:48:05 1997 Richard Henderson <rth@cygnus.com>
* reload.h, reload1.c (eliminate_regs), caller-save.c, dbxout.c,
dwarfout.c, dwarf2out.c, reload.c, sdbout.c: Revert March 15 change.
#include "config.h"
#include <stdio.h>
+#include <ctype.h>
#include "rtl.h"
#include "regs.h"
#include "hard-reg-set.h"
/* Specify which cpu to schedule for. */
enum processor_type alpha_cpu;
+static char* const alpha_cpu_name[] =
+{
+ "ev4", "ev5", "ev6"
+};
/* Specify how accurate floating-point traps need to be. */
/* Strings decoded into the above options. */
-char *alpha_cpu_string; /* -mcpu=ev[4|5] */
+char *alpha_cpu_string; /* -mcpu= */
char *alpha_tp_string; /* -mtrap-precision=[p|s|i] */
char *alpha_fprm_string; /* -mfp-rounding-mode=[n|m|c|d] */
char *alpha_fptm_string; /* -mfp-trap-mode=[n|u|su|sui] */
+char *alpha_mlat_string; /* -mmemory-latency= */
/* Save information from a "cmpxx" operation until the branch or scc is
emitted. */
static rtx alpha_return_addr_rtx;
+/* The number of cycles of latency we should assume on memory reads. */
+
+int alpha_memory_latency = 3;
+
/* Declarations of static functions. */
static void alpha_set_memflags_1 PROTO((rtx, int, int, int));
static rtx alpha_emit_set_const_1 PROTO((rtx, enum machine_mode,
alpha_fptm = ALPHA_FPTM_SU;
}
}
+
+ {
+ char *end;
+ int lat;
+
+ if (!alpha_mlat_string)
+ alpha_mlat_string = "L1";
+
+ if (isdigit (alpha_mlat_string[0])
+ && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
+ ;
+ else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
+ && isdigit (alpha_mlat_string[1])
+ && alpha_mlat_string[2] == '\0')
+ {
+ static int const cache_latency[][4] =
+ {
+ { 3, 30, -1 }, /* ev4 -- Bcache is a guess */
+ { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
+ { 3, 12, -1 }, /* ev6 -- Ho hum, doesn't exist yet */
+ };
+
+ lat = alpha_mlat_string[1] - '0';
+ if (lat < 0 || lat > 3 || cache_latency[alpha_cpu][lat-1] == -1)
+ {
+ warning ("L%d cache latency unknown for %s",
+ lat, alpha_cpu_name[alpha_cpu]);
+ lat = 3;
+ }
+ else
+ lat = cache_latency[alpha_cpu][lat-1];
+ }
+ else if (! strcmp (alpha_mlat_string, "main"))
+ {
+ /* Most current memories have about 370ns latency. This is
+ a reasonable guess for a fast cpu. */
+ lat = 150;
+ }
+ else
+ {
+ warning ("bad value `%s' for -mmemory-latency", alpha_mlat_string);
+ lat = 3;
+ }
+
+ alpha_memory_latency = lat;
+ }
}
\f
/* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
insn_type = get_attr_type (insn);
dep_insn_type = get_attr_type (dep_insn);
+ /* Bring in the user-defined memory latency. */
+ if (dep_insn_type == TYPE_LD || dep_insn_type == TYPE_LDSYM)
+ cost += alpha_memory_latency-1;
+
if (alpha_cpu == PROCESSOR_EV5)
{
/* And the lord DEC saith: "A special bypass provides an effective
extern char *m88k_short_data;
#define TARGET_OPTIONS { { "short-data-", &m88k_short_data } } */
-extern char *alpha_cpu_string; /* For -mcpu=ev[4|5] */
+extern char *alpha_cpu_string; /* For -mcpu= */
extern char *alpha_fprm_string; /* For -mfp-rounding-mode=[n|m|c|d] */
extern char *alpha_fptm_string; /* For -mfp-trap-mode=[n|u|su|sui] */
extern char *alpha_tp_string; /* For -mtrap-precision=[p|f|i] */
+extern char *alpha_mlat_string; /* For -mmemory-latency= */
#define TARGET_OPTIONS \
{ \
{"fp-rounding-mode=", &alpha_fprm_string}, \
{"fp-trap-mode=", &alpha_fptm_string}, \
{"trap-precision=", &alpha_tp_string}, \
+ {"memory-latency=", &alpha_mlat_string}, \
}
/* Sometimes certain combinations of command options do not make sense
reduce the impact of not being able to allocate a pseudo to a
hard register. */
-#define REGISTER_MOVE_COST(CLASS1, CLASS2) \
- (((CLASS1) == FLOAT_REGS) == ((CLASS2) == FLOAT_REGS) ? 2 : 20)
+#define REGISTER_MOVE_COST(CLASS1, CLASS2) \
+ (TARGET_CIX || ((CLASS1) == FLOAT_REGS) == ((CLASS2) == FLOAT_REGS) \
+ ? 2 : 4+2*alpha_memory_latency)
/* A C expressions returning the cost of moving data of MODE from a register to
or from memory.
On the Alpha, bump this up a bit. */
-#define MEMORY_MOVE_COST(MODE) 6
+extern int alpha_memory_latency;
+#define MEMORY_MOVE_COST(MODE) (2*alpha_memory_latency)
/* Provide the cost of a branch. Exact meaning under development. */
#define BRANCH_COST 5
IS_LOCAL is 0 if name is used in call, 1 if name is used in definition. */
extern void alpha_need_linkage ();
+/* This macro defines the start of an assembly comment. */
+
+#define ASM_COMMENT_START " #"
+
/* This macro produces the initial definition of a function name. On the
Alpha, we need to save the function name for the prologue and epilogue. */
;; the address, BBOX, used for branches, EBOX, used for integer
;; operations, and FBOX, used for FP operations.
-;; Memory delivers its result in three cycles.
+;; Memory delivers its result in three cycles. Actually return one and
+;; take care of this in adjust_cost, since we want to handle user-defined
+;; memory latencies.
(define_function_unit "ev4_abox" 1 0
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "ld,ldsym,st"))
- 3 1)
+ 1 1)
;; Branches have no delay cost, but do tie up the unit for two cycles.
(define_function_unit "ev4_bbox" 1 1
1 1)
;; Memory takes at least 2 clocks, and load cannot dual issue with stores.
+;; Return one from here and fix up with user-defined latencies in adjust_cost.
(define_function_unit "ev5_ebox" 2 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "ld,ldsym"))
- 2 1)
+ 1 1)
(define_function_unit "ev5_e0" 1 0
(and (eq_attr "cpu" "ev5")