[AArch64][1/2] Add fmul-by-power-of-2+fcvt optimisation
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Tue, 20 Oct 2015 16:01:53 +0000 (16:01 +0000)
committerKyrylo Tkachov <ktkachov@gcc.gnu.org>
Tue, 20 Oct 2015 16:01:53 +0000 (16:01 +0000)
* config/aarch64/aarch64.md
(*aarch64_fcvt<su_optab><GPF:mode><GPI:mode>2_mult): New pattern.
* config/aarch64/aarch64-simd.md
(*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult): Likewise.
* config/aarch64/aarch64.c (aarch64_rtx_costs): Handle above patterns.
(aarch64_fpconst_pow_of_2): New function.
(aarch64_vec_fpconst_pow_of_2): Likewise.
* config/aarch64/aarch64-protos.h (aarch64_fpconst_pow_of_2): Declare
prototype.
(aarch64_vec_fpconst_pow_of_2): Likewise.
* config/aarch64/predicates.md (aarch64_fp_pow2): New predicate.
(aarch64_fp_vec_pow2): Likewise.

* gcc.target/aarch64/fmul_fcvt_1.c: New test.
* gcc.target/aarch64/fmul_fcvt_2.c: Likewise.

From-SVN: r229085

gcc/ChangeLog
gcc/config/aarch64/aarch64-protos.h
gcc/config/aarch64/aarch64-simd.md
gcc/config/aarch64/aarch64.c
gcc/config/aarch64/aarch64.md
gcc/config/aarch64/predicates.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/fmul_fcvt_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/fmul_fcvt_2.c [new file with mode: 0644]

index 1badb9bdceabaf3978a940c0463cc64e45081fa9..862b68b9ce682cb01a2af5ddbc6ae32f537735d6 100644 (file)
@@ -1,3 +1,18 @@
+2015-10-20  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       * config/aarch64/aarch64.md
+       (*aarch64_fcvt<su_optab><GPF:mode><GPI:mode>2_mult): New pattern.
+       * config/aarch64/aarch64-simd.md
+       (*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult): Likewise.
+       * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle above patterns.
+       (aarch64_fpconst_pow_of_2): New function.
+       (aarch64_vec_fpconst_pow_of_2): Likewise.
+       * config/aarch64/aarch64-protos.h (aarch64_fpconst_pow_of_2): Declare
+       prototype.
+       (aarch64_vec_fpconst_pow_of_2): Likewise.
+       * config/aarch64/predicates.md (aarch64_fp_pow2): New predicate.
+       (aarch64_fp_vec_pow2): Likewise.
+
 2015-10-20  Uros Bizjak  <ubizjak@gmail.com>
 
        * config/alpha/alpha.h (HARD_REGNO_NREGS): Use CEIL macro.
index baaf1bd776d759affc05edf897fc4517e0f83c7d..2a969adf5d303c336d7ae26f4ba5ae683c871735 100644 (file)
@@ -294,12 +294,14 @@ enum aarch64_symbol_type aarch64_classify_symbol (rtx, rtx);
 enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
 enum reg_class aarch64_regno_regclass (unsigned);
 int aarch64_asm_preferred_eh_data_format (int, int);
+int aarch64_fpconst_pow_of_2 (rtx);
 machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned,
                                                       machine_mode);
 int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_simd_attr_length_move (rtx_insn *);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
+int aarch64_vec_fpconst_pow_of_2 (rtx);
 rtx aarch64_final_eh_return_addr (void);
 rtx aarch64_legitimize_reload_address (rtx *, machine_mode, int, int, int);
 const char *aarch64_output_move_struct (rtx *operands);
index 167277e91a3fb38e255fbab99160a0b012e10e54..cf1ff6d7da346e545f86c27b2fa1ecfd86cf757b 100644 (file)
   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
 )
 
+(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
+  [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
+       (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
+                              [(mult:VDQF
+        (match_operand:VDQF 1 "register_operand" "w")
+        (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
+                              UNSPEC_FRINTZ)))]
+  "TARGET_SIMD
+   && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
+               GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
+  {
+    int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
+    char buf[64];
+    snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
+    output_asm_insn (buf, operands);
+    return "";
+  }
+  [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
+)
+
 (define_expand "<optab><VDQF:mode><fcvt_target>2"
   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
        (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
index aba5b56071d673816b267203500ad343e24eede1..6197a367a7ab7f6a09dfe31e5cc05077ed6efef0 100644 (file)
@@ -6786,6 +6786,19 @@ cost_plus:
          else
            *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
        }
+
+      /* We can combine fmul by a power of 2 followed by a fcvt into a single
+        fixed-point fcvt.  */
+      if (GET_CODE (x) == MULT
+         && ((VECTOR_MODE_P (mode)
+              && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
+             || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
+       {
+         *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
+                            0, speed);
+         return true;
+       }
+
       *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
       return true;
 
@@ -13250,6 +13263,52 @@ aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
   return default_unspec_may_trap_p (x, flags);
 }
 
+
+/* If X is a positive CONST_DOUBLE with a value that is a power of 2
+   return the log2 of that value.  Otherwise return -1.  */
+
+int
+aarch64_fpconst_pow_of_2 (rtx x)
+{
+  const REAL_VALUE_TYPE *r;
+
+  if (!CONST_DOUBLE_P (x))
+    return -1;
+
+  r = CONST_DOUBLE_REAL_VALUE (x);
+
+  if (REAL_VALUE_NEGATIVE (*r)
+      || REAL_VALUE_ISNAN (*r)
+      || REAL_VALUE_ISINF (*r)
+      || !real_isinteger (r, DFmode))
+    return -1;
+
+  return exact_log2 (real_to_integer (r));
+}
+
+/* If X is a vector of equal CONST_DOUBLE values and that value is
+   Y, return the aarch64_fpconst_pow_of_2 of Y.  Otherwise return -1.  */
+
+int
+aarch64_vec_fpconst_pow_of_2 (rtx x)
+{
+  if (GET_CODE (x) != CONST_VECTOR)
+    return -1;
+
+  if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
+    return -1;
+
+  int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
+  if (firstval <= 0)
+    return -1;
+
+  for (int i = 1; i < CONST_VECTOR_NUNITS (x); i++)
+    if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
+      return -1;
+
+  return firstval;
+}
+
 /* Implement TARGET_PROMOTED_TYPE to promote __fp16 to float.  */
 static tree
 aarch64_promoted_type (const_tree t)
index 208f58f6dcdb291cead5e8e024808b712651f239..c3c1e9db852bf23b3e69515561741d82ae66416d 100644 (file)
   [(set_attr "type" "f_cvtf2i")]
 )
 
+(define_insn "*aarch64_fcvt<su_optab><GPF:mode><GPI:mode>2_mult"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+       (FIXUORS:GPI
+         (mult:GPF
+           (match_operand:GPF 1 "register_operand" "w")
+           (match_operand:GPF 2 "aarch64_fp_pow2" "F"))))]
+  "TARGET_FLOAT
+   && IN_RANGE (aarch64_fpconst_pow_of_2 (operands[2]), 1,
+               GET_MODE_BITSIZE (<GPI:MODE>mode))"
+  {
+    int fbits = aarch64_fpconst_pow_of_2 (operands[2]);
+    char buf[64];
+    snprintf (buf, 64, "fcvtz<su>\\t%%<GPI:w>0, %%<GPF:s>1, #%d", fbits);
+    output_asm_insn (buf, operands);
+    return "";
+  }
+  [(set_attr "type" "f_cvtf2i")]
+)
+
 ;; fma - no throw
 
 (define_insn "fma<mode>4"
index 7841378a5dacb0531c898d80b0eb02733f8fc901..046f852b1d35f2678282ee59545867dca96313b1 100644 (file)
        (and (match_code "const_double")
            (match_test "aarch64_float_const_zero_rtx_p (op)"))))
 
+(define_predicate "aarch64_fp_pow2"
+  (and (match_code "const_double")
+       (match_test "aarch64_fpconst_pow_of_2 (op) > 0")))
+
+(define_predicate "aarch64_fp_vec_pow2"
+  (match_test "aarch64_vec_fpconst_pow_of_2 (op) > 0"))
+
 (define_predicate "aarch64_plus_immediate"
   (and (match_code "const_int")
        (ior (match_test "aarch64_uimm12_shift (INTVAL (op))")
index 1d8f1cb8c9ecc53839b8090642ee15249ca6ba1c..d32bf0c0df7c4e4eb9b81eec7c66fdda2ef48850 100644 (file)
@@ -1,3 +1,8 @@
+2015-10-20  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       * gcc.target/aarch64/fmul_fcvt_1.c: New test.
+       * gcc.target/aarch64/fmul_fcvt_2.c: Likewise.
+
 2015-10-20  H.J. Lu  <hongjiu.lu@intel.com>
 
        PR target/66810
diff --git a/gcc/testsuite/gcc.target/aarch64/fmul_fcvt_1.c b/gcc/testsuite/gcc.target/aarch64/fmul_fcvt_1.c
new file mode 100644 (file)
index 0000000..4e3ace7
--- /dev/null
@@ -0,0 +1,129 @@
+/* { dg-do run } */
+/* { dg-options "-save-temps -O2 -fno-inline" } */
+
+#define FUNC_DEFS(__a) \
+int                    \
+sffoo##__a (float x)   \
+{                      \
+  return x * __a##.0f; \
+}                      \
+                       \
+unsigned int           \
+usffoo##__a (float x)  \
+{                      \
+  return x * __a##.0f; \
+}                      \
+                       \
+long                   \
+lsffoo##__a (float x)  \
+{                      \
+  return x * __a##.0f; \
+}                      \
+                       \
+unsigned long          \
+ulsffoo##__a (float x) \
+{                      \
+  return x * __a##.0f; \
+}
+
+#define FUNC_DEFD(__a) \
+long                   \
+dffoo##__a (double x)  \
+{                      \
+  return x * __a##.0;  \
+}                      \
+                       \
+unsigned long          \
+udffoo##__a (double x) \
+{                      \
+  return x * __a##.0;  \
+}                      \
+int                    \
+sdffoo##__a (double x) \
+{                      \
+  return x * __a##.0;  \
+}                      \
+                       \
+unsigned int           \
+usdffoo##__a (double x)        \
+{                      \
+  return x * __a##.0;  \
+}
+
+FUNC_DEFS (4)
+FUNC_DEFD (4)
+/* { dg-final { scan-assembler-times "fcvtzs\tw\[0-9\], s\[0-9\]*.*#2" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzs\tx\[0-9\], s\[0-9\]*.*#2" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzs\tx\[0-9\], d\[0-9\]*.*#2" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzs\tw\[0-9\], d\[0-9\]*.*#2" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzu\tw\[0-9\], s\[0-9\]*.*#2" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzu\tx\[0-9\], s\[0-9\]*.*#2" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzu\tx\[0-9\], d\[0-9\]*.*#2" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzu\tw\[0-9\], d\[0-9\]*.*#2" 1 } } */
+
+FUNC_DEFS (8)
+FUNC_DEFD (8)
+/* { dg-final { scan-assembler-times "fcvtzs\tw\[0-9\], s\[0-9\]*.*#3" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzs\tx\[0-9\], s\[0-9\]*.*#3" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzs\tx\[0-9\], d\[0-9\]*.*#3" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzs\tw\[0-9\], d\[0-9\]*.*#3" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzu\tw\[0-9\], s\[0-9\]*.*#3" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzu\tx\[0-9\], s\[0-9\]*.*#3" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzu\tx\[0-9\], d\[0-9\]*.*#3" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzu\tw\[0-9\], d\[0-9\]*.*#3" 1 } } */
+
+FUNC_DEFS (16)
+FUNC_DEFD (16)
+/* { dg-final { scan-assembler-times "fcvtzs\tw\[0-9\], s\[0-9\]*.*#4" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzs\tx\[0-9\], s\[0-9\]*.*#4" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzs\tx\[0-9\], d\[0-9\]*.*#4" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzs\tw\[0-9\], d\[0-9\]*.*#4" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzu\tw\[0-9\], s\[0-9\]*.*#4" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzu\tx\[0-9\], s\[0-9\]*.*#4" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzu\tx\[0-9\], d\[0-9\]*.*#4" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzu\tw\[0-9\], d\[0-9\]*.*#4" 1 } } */
+
+
+#define FUNC_TESTS(__a, __b)                                   \
+do                                                             \
+  {                                                            \
+    if (sffoo##__a (__b) != (int)(__b * __a))                  \
+      __builtin_abort ();                                      \
+    if (usffoo##__a (__b) != (unsigned int)(__b * __a))        \
+      __builtin_abort ();                                      \
+    if (lsffoo##__a (__b) != (long)(__b * __a))                \
+      __builtin_abort ();                                      \
+    if (ulsffoo##__a (__b) != (unsigned long)(__b * __a))      \
+      __builtin_abort ();                                      \
+  } while (0)
+
+#define FUNC_TESTD(__a, __b)                                   \
+do                                                             \
+  {                                                            \
+    if (dffoo##__a (__b) != (long)(__b * __a))                 \
+      __builtin_abort ();                                      \
+    if (udffoo##__a (__b) != (unsigned long)(__b * __a))       \
+      __builtin_abort ();                                      \
+    if (sdffoo##__a (__b) != (int)(__b * __a))                 \
+      __builtin_abort ();                                      \
+    if (usdffoo##__a (__b) != (unsigned int)(__b * __a))       \
+      __builtin_abort ();                                      \
+  } while (0)
+
+int
+main (void)
+{
+  float i;
+
+  for (i = -0.001; i < 32.0; i += 1.0f)
+    {
+      FUNC_TESTS (4, i);
+      FUNC_TESTS (8, i);
+      FUNC_TESTS (16, i);
+
+      FUNC_TESTD (4, i);
+      FUNC_TESTD (8, i);
+      FUNC_TESTD (16, i);
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fmul_fcvt_2.c b/gcc/testsuite/gcc.target/aarch64/fmul_fcvt_2.c
new file mode 100644 (file)
index 0000000..d8a9335
--- /dev/null
@@ -0,0 +1,67 @@
+/* { dg-do run } */
+/* { dg-options "-save-temps -O2 -ftree-vectorize -fno-inline" } */
+
+#define N 1024
+
+#define FUNC_DEF(__a)          \
+void                           \
+foo##__a (float *a, int *b)    \
+{                              \
+  int i;                       \
+  for (i = 0; i < N; i++)      \
+    b[i] = a[i] * __a##.0f;    \
+}
+
+FUNC_DEF (4)
+FUNC_DEF (8)
+FUNC_DEF (16)
+
+int ints[N];
+float floats[N];
+
+void
+reset_ints (int *arr)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    arr[i] = 0;
+}
+
+void
+check_result (int *is, int n)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    if (is[i] != i * n)
+      __builtin_abort ();
+}
+
+#define FUNC_CHECK(__a)                \
+do                             \
+  {                            \
+    reset_ints (ints);         \
+    foo##__a (floats, ints);   \
+    check_result (ints, __a);  \
+  } while (0)
+
+
+int
+main (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    floats[i] = (float) i;
+
+  FUNC_CHECK (4);
+  FUNC_CHECK (8);
+  FUNC_CHECK (16);
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "fmul\tv\[0-9\]*.*" } } */
+/* { dg-final { scan-assembler-times "fcvtzs\tv\[0-9\].4s, v\[0-9\].4s*.*#2" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzs\tv\[0-9\].4s, v\[0-9\].4s*.*#3" 1 } } */
+/* { dg-final { scan-assembler-times "fcvtzs\tv\[0-9\].4s, v\[0-9\].4s*.*#4" 1 } } */