[ARM] Refactor costs calculation for MEM.
authorCharles Baylis <charles.baylis@linaro.org>
Thu, 23 Nov 2017 18:37:18 +0000 (18:37 +0000)
committerCharles Baylis <cbaylis@gcc.gnu.org>
Thu, 23 Nov 2017 18:37:18 +0000 (18:37 +0000)
This patch moves the calculation of costs for MEM into a
separate function, and reforms the calculation into two
parts. Firstly any additional cost of the addressing mode
is calculated, and then the cost of the memory access itself
is added.

In this patch, the calculation of the cost of the addressing
mode is omitted, to be added in a subsequent patch.

gcc/ChangeLog:

<date>  Charles Baylis  <charles.baylis@linaro.org>

        * config/arm/arm.c (arm_mem_costs): New function.
        (arm_rtx_costs_internal): Use arm_mem_costs.

gcc/testsuite/ChangeLog:

<date>  Charles Baylis  <charles.baylis@linaro.org>

        * gcc.target/arm/addr-modes-float.c: New test.
        * gcc.target/arm/addr-modes-int.c: New test.
        * gcc.target/arm/addr-modes.h: New header.

From-SVN: r255111

gcc/ChangeLog
gcc/config/arm/arm.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/arm/addr-modes-float.c [new file with mode: 0644]
gcc/testsuite/gcc.target/arm/addr-modes-int.c [new file with mode: 0644]
gcc/testsuite/gcc.target/arm/addr-modes.h [new file with mode: 0644]

index 7986f134a618af6d832332d020ef488d9b5c0330..e41ae9f2dc40c4c8166a4ea79f25d8acfdeaf8fe 100644 (file)
@@ -1,3 +1,8 @@
+2017-11-23  Charles Baylis  <charles.baylis@linaro.org>
+
+       * config/arm/arm.c (arm_mem_costs): New function.
+       (arm_rtx_costs_internal): Use arm_mem_costs.
+
 2017-11-23  Mark Wielaard  <mark@klomp.org>
 
        * dwarf2out.c (init_sections_and_labels): Use generation to create
index bed4d6cdd4825d67c1b2472e4a19441ec2068398..5bdf7ff7e047bc32819221daafb75c075494e136 100644 (file)
@@ -9242,8 +9242,52 @@ arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
          }                                                             \
        while (0)
 
+/* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
+   considering the costs of the addressing mode and memory access
+   separately.  */
+static bool
+arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
+              int *cost, bool speed_p)
+{
+  machine_mode mode = GET_MODE (x);
+
+  *cost = COSTS_N_INSNS (1);
+
+  if (flag_pic
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
+    /* This will be split into two instructions.  Add the cost of the
+       additional instruction here.  The cost of the memory access is computed
+       below.  See arm.md:calculate_pic_address.  */
+    *cost += COSTS_N_INSNS (1);
+
+  /* Calculate cost of memory access.  */
+  if (speed_p)
+    {
+      if (FLOAT_MODE_P (mode))
+       {
+         if (GET_MODE_SIZE (mode) == 8)
+           *cost += extra_cost->ldst.loadd;
+         else
+           *cost += extra_cost->ldst.loadf;
+       }
+      else if (VECTOR_MODE_P (mode))
+       *cost += extra_cost->ldst.loadv;
+      else
+       {
+         /* Integer modes */
+         if (GET_MODE_SIZE (mode) == 8)
+           *cost += extra_cost->ldst.ldrd;
+         else
+           *cost += extra_cost->ldst.load;
+       }
+    }
+
+  return true;
+}
+
 /* RTX costs.  Make an estimate of the cost of executing the operation
-   X, which is contained with an operation with code OUTER_CODE.
+   X, which is contained within an operation with code OUTER_CODE.
    SPEED_P indicates whether the cost desired is the performance cost,
    or the size cost.  The estimate is stored in COST and the return
    value is TRUE if the cost calculation is final, or FALSE if the
@@ -9322,30 +9366,7 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
       return false;
 
     case MEM:
-      /* A memory access costs 1 insn if the mode is small, or the address is
-        a single register, otherwise it costs one insn per word.  */
-      if (REG_P (XEXP (x, 0)))
-       *cost = COSTS_N_INSNS (1);
-      else if (flag_pic
-              && GET_CODE (XEXP (x, 0)) == PLUS
-              && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
-       /* This will be split into two instructions.
-          See arm.md:calculate_pic_address.  */
-       *cost = COSTS_N_INSNS (2);
-      else
-       *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
-
-      /* For speed optimizations, add the costs of the address and
-        accessing memory.  */
-      if (speed_p)
-#ifdef NOT_YET
-       *cost += (extra_cost->ldst.load
-                 + arm_address_cost (XEXP (x, 0), mode,
-                                     ADDR_SPACE_GENERIC, speed_p));
-#else
-        *cost += extra_cost->ldst.load;
-#endif
-      return true;
+      return arm_mem_costs (x, extra_cost, cost, speed_p);
 
     case PARALLEL:
     {
index 34d31b88760915e758e8c5d8aaf720b240e9a3db..a89696fe45d32a7c2dfaef8b1663c3d0e71b00dd 100644 (file)
@@ -1,3 +1,9 @@
+2017-11-23  Charles Baylis  <charles.baylis@linaro.org>
+
+       * gcc.target/arm/addr-modes-float.c: New test.
+       * gcc.target/arm/addr-modes-int.c: New test.
+       * gcc.target/arm/addr-modes.h: New header.
+
 2017-11-23  Eric Botcazou  <ebotcazou@adacore.com>
 
        * testsuite/gcc.dg/pr64277.c: Adjust scan.
diff --git a/gcc/testsuite/gcc.target/arm/addr-modes-float.c b/gcc/testsuite/gcc.target/arm/addr-modes-float.c
new file mode 100644 (file)
index 0000000..3b4235c
--- /dev/null
@@ -0,0 +1,42 @@
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-do compile } */
+
+#include <arm_neon.h>
+
+#include "addr-modes.h"
+
+POST_STORE(float)
+/* { dg-final { scan-assembler "vstmia.32" } } */
+POST_STORE(double)
+/* { dg-final { scan-assembler "vstmia.64" } } */
+
+POST_LOAD(float)
+/* { dg-final { scan-assembler "vldmia.32" } } */
+POST_LOAD(double)
+/* { dg-final { scan-assembler "vldmia.64" } } */
+
+POST_STORE_VEC (int8_t, int8x8_t, vst1_s8)
+/* { dg-final { scan-assembler "vst1.8\t\{.*\}, \\\[r\[0-9\]+\\\]!" } } */
+POST_STORE_VEC (int8_t, int8x16_t, vst1q_s8)
+/* { dg-final { scan-assembler "vst1.8\t\{.*\[-,\]d.*\}, \\\[r\[0-9\]+\\\]!" } } */
+
+POST_STORE_VEC (int8_t, int8x8x2_t, vst2_s8)
+/* { dg-final { scan-assembler "vst2.8\t\{.*\}, \\\[r\[0-9\]+\\\]!" } } */
+POST_STORE_VEC (int8_t, int8x16x2_t, vst2q_s8)
+/* { dg-final { scan-assembler "vst2.8\t\{.*-d.*\}, \\\[r\[0-9\]+\\\]!" } } */
+
+POST_STORE_VEC (int8_t, int8x8x3_t, vst3_s8)
+/* { dg-final { scan-assembler "vst3.8\t\{.*\}, \\\[r\[0-9\]+\\\]!" } } */
+POST_STORE_VEC (int8_t, int8x16x3_t, vst3q_s8)
+/* { dg-final { scan-assembler "vst3.8\t\{d\[02468\], d\[02468\], d\[02468\]\}, \\\[r\[0-9\]+\\\]!" } } */
+/* { dg-final { scan-assembler "vst3.8\t\{d\[13579\], d\[13579\], d\[13579\]\}, \\\[r\[0-9\]+\\\]!" { xfail *-*-* } } } */
+
+POST_STORE_VEC (int8_t, int8x8x4_t, vst4_s8)
+/* { dg-final { scan-assembler "vst4.8\t\{.*\}, \\\[r\[0-9\]+\\\]!" } } */
+POST_STORE_VEC (int8_t, int8x16x4_t, vst4q_s8)
+/* { dg-final { scan-assembler "vst4.8\t\{d\[02468\], d\[02468\], d\[02468\], d\[02468\]\}, \\\[r\[0-9\]+\\\]!" } } */
+/* { dg-final { scan-assembler "vst4.8\t\{d\[13579\], d\[13579\], d\[13579\], d\[13579\]\}, \\\[r\[0-9\]+\\\]!" { xfail *-*-* } } } */
+
+/* { dg-final { scan-assembler-not "add" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/arm/addr-modes-int.c b/gcc/testsuite/gcc.target/arm/addr-modes-int.c
new file mode 100644 (file)
index 0000000..e3e1e6a
--- /dev/null
@@ -0,0 +1,46 @@
+/* { dg-options "-O2 -march=armv7-a" } */
+/* { dg-add-options arm_neon } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-do compile } */
+
+#include "addr-modes.h"
+
+typedef long long ll;
+
+PRE_STORE(char)
+/* { dg-final { scan-assembler "strb.*#1]!" } } */
+PRE_STORE(short)
+/* { dg-final { scan-assembler "strh.*#2]!" } } */
+PRE_STORE(int)
+/* { dg-final { scan-assembler "str.*#4]!" } } */
+PRE_STORE(ll)
+/* { dg-final { scan-assembler "strd.*#8]!" } } */
+
+POST_STORE(char)
+/* { dg-final { scan-assembler "strb.*], #1" } } */
+POST_STORE(short)
+/* { dg-final { scan-assembler "strh.*], #2" } } */
+POST_STORE(int)
+/* { dg-final { scan-assembler "str.*], #4" } } */
+POST_STORE(ll)
+/* { dg-final { scan-assembler "strd.*], #8" } } */
+
+PRE_LOAD(char)
+/* { dg-final { scan-assembler "ldrb.*#1]!" } } */
+PRE_LOAD(short)
+/* { dg-final { scan-assembler "ldrsh.*#2]!" } } */
+PRE_LOAD(int)
+/* { dg-final { scan-assembler "ldr.*#4]!" } } */
+PRE_LOAD(ll)
+/* { dg-final { scan-assembler "ldrd.*#8]!" } } */
+
+POST_LOAD(char)
+/* { dg-final { scan-assembler "ldrb.*], #1" } } */
+POST_LOAD(short)
+/* { dg-final { scan-assembler "ldrsh.*], #2" } } */
+POST_LOAD(int)
+/* { dg-final { scan-assembler "ldr.*], #4" } } */
+POST_LOAD(ll)
+/* { dg-final { scan-assembler "ldrd.*], #8" } } */
+
+/* { dg-final { scan-assembler-not "\tadd" } } */
diff --git a/gcc/testsuite/gcc.target/arm/addr-modes.h b/gcc/testsuite/gcc.target/arm/addr-modes.h
new file mode 100644 (file)
index 0000000..eac4678
--- /dev/null
@@ -0,0 +1,53 @@
+
+#define PRE_STORE(T)                   \
+  T *                                  \
+  T ## _pre_store (T *p, T v)          \
+  {                                    \
+    *++p = v;                          \
+    return p;                          \
+  }                                    \
+
+#define POST_STORE(T)                  \
+  T *                                  \
+  T ## _post_store (T *p, T v)         \
+  {                                    \
+    *p++ = v;                          \
+    return p;                          \
+  }
+
+#define POST_STORE_VEC(T, VT, OP)      \
+  T *                                  \
+  VT ## _post_store (T * p, VT v)      \
+  {                                    \
+    OP (p, v);                         \
+    p += sizeof (VT) / sizeof (T);     \
+    return p;                          \
+  }
+
+#define PRE_LOAD(T)                    \
+  void                                 \
+  T ## _pre_load (T *p)                        \
+  {                                    \
+    extern void f ## T (T*,T);         \
+    T x = *++p;                                \
+    f ## T (p, x);                     \
+  }
+
+#define POST_LOAD(T)                   \
+  void                                 \
+  T ## _post_load (T *p)               \
+  {                                    \
+    extern void f ## T (T*,T);         \
+    T x = *p++;                                \
+    f ## T (p, x);                     \
+  }
+
+#define POST_LOAD_VEC(T, VT, OP)       \
+  void                                 \
+  VT ## _post_load (T * p)             \
+  {                                    \
+    extern void f ## T (T*,T);         \
+    VT x = OP (p, v);                  \
+    p += sizeof (VT) / sizeof (T);     \
+    f ## T (p, x);                     \
+  }