From: Bilyan Borisov Date: Mon, 23 Nov 2015 14:23:20 +0000 (+0000) Subject: [AARCH64] Adding constant folding for __builtin_fmulx* with scalar 32 and 64 bit... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=546e500c8f90f9adf4187d07f8e6d33d82194f3c;p=gcc.git [AARCH64] Adding constant folding for __builtin_fmulx* with scalar 32 and 64 bit arguments gcc/ * config/aarch64/aarch64-builtins.c (aarch64_gimple_fold_builtin): Added constant folding. gcc/testsuite/ * gcc.target/aarch64/simd/vmulx.x: New. * gcc.target/aarch64/simd/vmulx_f64_2.c: Likewise. * gcc.target/aarch64/simd/vmulxd_f64_2.c: Likewise. * gcc.target/aarch64/simd/vmulxs_f32_2.c: Likewise. From-SVN: r230758 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b1d0918ced9..988f7238ec5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2015-11-23 Bilyan Borisov + + * config/aarch64/aarch64-builtins.c + (aarch64_gimple_fold_builtin): Fold FMULX. + 2015-11-23 Richard Biener Jiong Wang diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index c4cda4f31a3..45011f61d9b 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -1468,7 +1468,7 @@ aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi) if (fndecl) { int fcode = DECL_FUNCTION_CODE (fndecl); - int nargs = gimple_call_num_args (stmt); + unsigned nargs = gimple_call_num_args (stmt); tree *args = (nargs > 0 ? gimple_call_arg_ptr (stmt, 0) : &error_mark_node); @@ -1492,7 +1492,54 @@ aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi) new_stmt = gimple_build_assign (gimple_call_lhs (stmt), REDUC_MIN_EXPR, args[0]); break; - + BUILTIN_GPF (BINOP, fmulx, 0) + { + gcc_assert (nargs == 2); + bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST; + bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST; + if (a0_cst_p || a1_cst_p) + { + if (a0_cst_p && a1_cst_p) + { + tree t0 = TREE_TYPE (args[0]); + real_value a0 = (TREE_REAL_CST (args[0])); + real_value a1 = (TREE_REAL_CST (args[1])); + if (real_equal (&a1, &dconst0)) + std::swap (a0, a1); + /* According to real_equal (), +0 equals -0. */ + if (real_equal (&a0, &dconst0) && real_isinf (&a1)) + { + real_value res = dconst2; + res.sign = a0.sign ^ a1.sign; + new_stmt = + gimple_build_assign (gimple_call_lhs (stmt), + REAL_CST, + build_real (t0, res)); + } + else + new_stmt = + gimple_build_assign (gimple_call_lhs (stmt), + MULT_EXPR, + args[0], args[1]); + } + else /* a0_cst_p ^ a1_cst_p. */ + { + real_value const_part = a0_cst_p + ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]); + if (!real_equal (&const_part, &dconst0) + && !real_isinf (&const_part)) + new_stmt = + gimple_build_assign (gimple_call_lhs (stmt), + MULT_EXPR, args[0], args[1]); + } + } + if (new_stmt) + { + gimple_set_vuse (new_stmt, gimple_vuse (stmt)); + gimple_set_vdef (new_stmt, gimple_vdef (stmt)); + } + break; + } default: break; } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ac6f5ef6f8e..2a5d8930aa0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2015-11-23 Bilyan Borisov + + * gcc.target/aarch64/simd/vmulx.x: New. + * gcc.target/aarch64/simd/vmulx_f64_2.c: Likewise. + * gcc.target/aarch64/simd/vmulxd_f64_2.c: Likewise. + * gcc.target/aarch64/simd/vmulxs_f32_2.c: Likewise. + 2015-11-23 Igor Zamyatin PR c++/68001 diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulx.x b/gcc/testsuite/gcc.target/aarch64/simd/vmulx.x new file mode 100644 index 00000000000..8968a64a95c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulx.x @@ -0,0 +1,46 @@ +#define PASS_ARRAY(...) {__VA_ARGS__} + +#define SETUP_TEST_CASE_VEC(I, INTRINSIC, BASE_TYPE, TYPE1, TYPE2, \ + VALS1, VALS2, EXPS, LEN, FM, Q_LD, Q_ST, \ + V1, V2) \ + do \ + { \ + int i##I; \ + BASE_TYPE vec##I##_1_data[] = VALS1; \ + BASE_TYPE vec##I##_2_data[] = VALS2; \ + V1 TYPE1 vec##I##_1 = vld1##Q_LD##_##FM (vec##I##_1_data); \ + V2 TYPE2 vec##I##_2 = vld1##Q_LD##_##FM (vec##I##_2_data); \ + TYPE1 actual##I##_v = INTRINSIC (vec##I##_1, vec##I##_2); \ + volatile BASE_TYPE expected##I[] = EXPS; \ + BASE_TYPE actual##I[LEN]; \ + vst1##Q_ST##_##FM (actual##I, actual##I##_v); \ + for (i##I = 0; i##I < LEN; ++i##I) \ + if (actual##I[i##I] != expected##I[i##I]) \ + abort (); \ + } \ + while (0) \ + +#define SETUP_TEST_CASE_SCALAR(I, INTRINSIC, TYPE, VAL1, VAL2, EXP) \ + do \ + { \ + TYPE vec_##I##_1 = VAL1; \ + TYPE vec_##I##_2 = VAL2; \ + TYPE expected_##I = EXP; \ + volatile TYPE actual_##I = INTRINSIC (vec_##I##_1, vec_##I##_2); \ + if (actual_##I != expected_##I) \ + abort (); \ + } \ + while (0) \ + +/* Functions used to return values that won't be optimised away. */ +float32_t __attribute__ ((noinline)) +foo32 () +{ + return 1.0; +} + +float64_t __attribute__ ((noinline)) +foo64 () +{ + return 1.0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulx_f64_2.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_f64_2.c new file mode 100644 index 00000000000..2d11675ed0b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_f64_2.c @@ -0,0 +1,59 @@ +/* Test the vmulx_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" +#include "vmulx.x" + +extern void abort (void); + +int +main (void) +{ + float64_t v1 = 3.14159265359; + float64_t v2 = 1.383894; + + /* Constant * constant, shouldn't generete fmulx or fmul, only fmov. */ + SETUP_TEST_CASE_VEC (1, vmulx_f64, float64_t, float64x1_t, float64x1_t, + PASS_ARRAY (v1), PASS_ARRAY (v2), PASS_ARRAY (v1 * v2), + 1, f64, , , ,); + SETUP_TEST_CASE_VEC (2, vmulx_f64, float64_t, float64x1_t, float64x1_t, + PASS_ARRAY (0.0), PASS_ARRAY (__builtin_huge_val ()), + PASS_ARRAY (2.0), 1, f64, , , ,); + SETUP_TEST_CASE_VEC (3, vmulx_f64, float64_t, float64x1_t, float64x1_t, + PASS_ARRAY (0.0), PASS_ARRAY (-__builtin_huge_val ()), + PASS_ARRAY (-2.0), 1, f64, , , ,); + SETUP_TEST_CASE_VEC (4, vmulx_f64, float64_t, float64x1_t, float64x1_t, + PASS_ARRAY (-0.0), PASS_ARRAY (__builtin_huge_val ()), + PASS_ARRAY (-2.0), 1, f64, , , ,); + SETUP_TEST_CASE_VEC (5, vmulx_f64, float64_t, float64x1_t, float64x1_t, + PASS_ARRAY (-0.0), PASS_ARRAY (-__builtin_huge_val ()), + PASS_ARRAY (2.0), 1, f64, , , ,); + /* Constant +/- 0 or +/- inf * non-constant should generate fmulx. */ + SETUP_TEST_CASE_VEC (6, vmulx_f64, float64_t, float64x1_t, float64x1_t, + PASS_ARRAY (/* volatile. */1.0), + PASS_ARRAY (-__builtin_huge_val ()), + PASS_ARRAY (-__builtin_huge_val ()), 1, f64, , , volatile + ,); + SETUP_TEST_CASE_VEC (7, vmulx_f64, float64_t, float64x1_t, float64x1_t, + PASS_ARRAY (/* volatile. */1.0), + PASS_ARRAY (__builtin_huge_val ()), + PASS_ARRAY (__builtin_huge_val ()), 1, f64, , , volatile + ,); + SETUP_TEST_CASE_VEC (8, vmulx_f64, float64_t, float64x1_t, float64x1_t, + PASS_ARRAY (/* volatile. */1.0), PASS_ARRAY (0.0), + PASS_ARRAY (0.0), 1, f64, , , volatile,); + SETUP_TEST_CASE_VEC (9, vmulx_f64, float64_t, float64x1_t, float64x1_t, + PASS_ARRAY (/* volatile. */1.0), PASS_ARRAY (-0.0), + PASS_ARRAY (-0.0), 1, f64, , , volatile,); + /* Constant non +/- 0 or non +/- inf * non-constant should generate fmul. */ + SETUP_TEST_CASE_VEC (10, vmulx_f64, float64_t, float64x1_t, float64x1_t, + PASS_ARRAY (/* volatile. */1.0), PASS_ARRAY (v1), + PASS_ARRAY (v1), 1, f64, , , volatile,); + return 0; +} +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 4 } } */ +/* { dg-final { scan-assembler-times "fmul\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmov\[ \t\]+\[dD\]\[0-9\]+, ?2.0e\\+0\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmov\[ \t\]+\[dD\]\[0-9\]+, ?-2.0e\\+0\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_f64_2.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_f64_2.c new file mode 100644 index 00000000000..b1f4bcd33fb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_f64_2.c @@ -0,0 +1,45 @@ +/* Test the vmulxd_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" +#include "vmulx.x" + +extern void abort (void); + +int +main (void) +{ + float64_t v1 = 3.14159265359; + float64_t v2 = 1.383894; + + /* Constant * constant, shouldn't generete fmulx or fmul, only fmov. */ + SETUP_TEST_CASE_SCALAR (1, vmulxd_f64, float64_t, v1, v2, v1 * v2); + SETUP_TEST_CASE_SCALAR (2, vmulxd_f64, float64_t, 0.0, + __builtin_huge_val (), 2.0); + SETUP_TEST_CASE_SCALAR (3, vmulxd_f64, float64_t, 0.0, + -__builtin_huge_val (), -2.0); + SETUP_TEST_CASE_SCALAR (4, vmulxd_f64, float64_t, -0.0, + __builtin_huge_val (), -2.0); + SETUP_TEST_CASE_SCALAR (5, vmulxd_f64, float64_t, -0.0, + -__builtin_huge_val (), 2.0); + /* Constant +/- 0 or +/- inf * non-constant should generate fmulx. */ + SETUP_TEST_CASE_SCALAR (6, vmulxd_f64, float64_t, foo64 (), + -__builtin_huge_val (), -__builtin_huge_val ()); + SETUP_TEST_CASE_SCALAR (7, vmulxd_f64, float64_t, foo64 (), + __builtin_huge_val (), __builtin_huge_val ()); + SETUP_TEST_CASE_SCALAR (8, vmulxd_f64, float64_t, foo64 (), + 0, 0); + SETUP_TEST_CASE_SCALAR (9, vmulxd_f64, float64_t, foo64 (), + -0.0, -0.0); + /* Constant non +/- 0 or non +/- inf * non-constant should generate fmul. */ + SETUP_TEST_CASE_SCALAR (10, vmulxd_f64, float64_t, foo64 (), + v1, v1); + + return 0; +} +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 4 } } */ +/* { dg-final { scan-assembler-times "fmul\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmov\[ \t\]+\[dD\]\[0-9\]+, ?2.0e\\+0\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmov\[ \t\]+\[dD\]\[0-9\]+, ?-2.0e\\+0\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_f32_2.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_f32_2.c new file mode 100644 index 00000000000..3d9139859ce --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_f32_2.c @@ -0,0 +1,44 @@ +/* Test the vmulxs_f32 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" +#include "vmulx.x" + +extern void abort (void); + +int +main (void) +{ + float32_t v1 = 3.14159265359; + float32_t v2 = 1.383894; + + /* Constant * constant, shouldn't generete fmulx or fmul, only fmov. */ + SETUP_TEST_CASE_SCALAR (1, vmulxs_f32, float32_t, v1, v2, v1 * v2); + SETUP_TEST_CASE_SCALAR (2, vmulxs_f32, float32_t, 0.0, + __builtin_huge_valf (), 2.0); + SETUP_TEST_CASE_SCALAR (3, vmulxs_f32, float32_t, 0.0, + -__builtin_huge_valf (), -2.0); + SETUP_TEST_CASE_SCALAR (4, vmulxs_f32, float32_t, -0.0, + __builtin_huge_valf (), -2.0); + SETUP_TEST_CASE_SCALAR (5, vmulxs_f32, float32_t, -0.0, + -__builtin_huge_valf (), 2.0); + /* Constant +/- 0 or +/- inf * non-constant should generate fmulx. */ + SETUP_TEST_CASE_SCALAR (6, vmulxs_f32, float32_t, foo32 (), + -__builtin_huge_valf (), -__builtin_huge_valf ()); + SETUP_TEST_CASE_SCALAR (7, vmulxs_f32, float32_t, foo32 (), + __builtin_huge_valf (), __builtin_huge_valf ()); + SETUP_TEST_CASE_SCALAR (8, vmulxs_f32, float32_t, foo32 (), + 0, 0); + SETUP_TEST_CASE_SCALAR (9, vmulxs_f32, float32_t, foo32 (), + -0.0, -0.0); + /* Constant non +/- 0 or non +/- inf * non-constant should generate fmul. */ + SETUP_TEST_CASE_SCALAR (10, vmulxs_f32, float32_t, foo32 (), + v1, v1); + return 0; +} +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+\n" 4 } } */ +/* { dg-final { scan-assembler-times "fmul\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmov\[ \t\]+\[sS\]\[0-9\]+, ?2.0e\\+0\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmov\[ \t\]+\[sS\]\[0-9\]+, ?-2.0e\\+0\n" 1 } } */