From 9b1de7e2e8e99eabf2b8d1ef74eb57fbd41bc730 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 1 Nov 2017 09:41:48 +0000 Subject: [PATCH] Add more vec_duplicate simplifications This patch adds a vec_duplicate_p helper that tests for constant or non-constant vector duplicates. Together with the existing const_vec_duplicate_p, this complements the gen_vec_duplicate and gen_const_vec_duplicate added by a previous patch. The patch uses the new routines to add more rtx simplifications involving vector duplicates. These mirror simplifications that we already do for CONST_VECTOR broadcasts and are needed for variable-length SVE, which uses: (const:M (vec_duplicate:M X)) to represent constant broadcasts instead. The simplifications do trigger on the testsuite for variable duplicates too, and in each case I saw the change was an improvement. The best way of testing the new simplifications seemed to be via selftests. The patch cribs part of David's patch here: https://gcc.gnu.org/ml/gcc-patches/2016-07/msg00270.html . 2017-11-01 Richard Sandiford David Malcolm Alan Hayward David Sherwood gcc/ * rtl.h (vec_duplicate_p): New function. * selftest-rtl.c (assert_rtx_eq_at): New function. * selftest-rtl.h (ASSERT_RTX_EQ): New macro. (assert_rtx_eq_at): Declare. * selftest.h (selftest::simplify_rtx_c_tests): Declare. * selftest-run-tests.c (selftest::run_tests): Call it. * simplify-rtx.c: Include selftest.h and selftest-rtl.h. (simplify_unary_operation_1): Recursively handle vector duplicates. (simplify_binary_operation_1): Likewise. Handle VEC_SELECTs of vector duplicates. (simplify_subreg): Handle subregs of vector duplicates. (make_test_reg, test_vector_ops_duplicate, test_vector_ops) (selftest::simplify_rtx_c_tests): New functions. Co-Authored-By: Alan Hayward Co-Authored-By: David Malcolm Co-Authored-By: David Sherwood From-SVN: r254294 --- gcc/ChangeLog | 19 ++++ gcc/rtl.h | 15 +++ gcc/selftest-rtl.c | 23 +++++ gcc/selftest-rtl.h | 9 ++ gcc/selftest-run-tests.c | 1 + gcc/selftest.h | 1 + gcc/simplify-rtx.c | 201 ++++++++++++++++++++++++++++++++++++++- 7 files changed, 264 insertions(+), 5 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d04af4e0cf7..0dde53bfa5f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2017-11-01 Richard Sandiford + David Malcolm + Alan Hayward + David Sherwood + + * rtl.h (vec_duplicate_p): New function. + * selftest-rtl.c (assert_rtx_eq_at): New function. + * selftest-rtl.h (ASSERT_RTX_EQ): New macro. + (assert_rtx_eq_at): Declare. + * selftest.h (selftest::simplify_rtx_c_tests): Declare. + * selftest-run-tests.c (selftest::run_tests): Call it. + * simplify-rtx.c: Include selftest.h and selftest-rtl.h. + (simplify_unary_operation_1): Recursively handle vector duplicates. + (simplify_binary_operation_1): Likewise. Handle VEC_SELECTs of + vector duplicates. + (simplify_subreg): Handle subregs of vector duplicates. + (make_test_reg, test_vector_ops_duplicate, test_vector_ops) + (selftest::simplify_rtx_c_tests): New functions. + 2017-11-01 Richard Sandiford Alan Hayward David Sherwood diff --git a/gcc/rtl.h b/gcc/rtl.h index 8e82f045402..b01e306c395 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -2772,6 +2772,21 @@ const_vec_duplicate_p (T x, T *elt) return false; } +/* Return true if X is a vector with a duplicated element value, either + constant or nonconstant. Store the duplicated element in *ELT if so. */ + +template +inline bool +vec_duplicate_p (T x, T *elt) +{ + if (GET_CODE (x) == VEC_DUPLICATE) + { + *elt = XEXP (x, 0); + return true; + } + return const_vec_duplicate_p (x, elt); +} + /* If X is a vector constant with a duplicated element value, return that element value, otherwise return X. */ diff --git a/gcc/selftest-rtl.c b/gcc/selftest-rtl.c index bfb1ab733a8..7e748575825 100644 --- a/gcc/selftest-rtl.c +++ b/gcc/selftest-rtl.c @@ -35,6 +35,29 @@ along with GCC; see the file COPYING3. If not see namespace selftest { +/* Compare rtx EXPECTED and ACTUAL using rtx_equal_p, calling + ::selftest::pass if they are equal, aborting if they are non-equal. + LOC is the effective location of the assertion, MSG describes it. */ + +void +assert_rtx_eq_at (const location &loc, const char *msg, + rtx expected, rtx actual) +{ + if (rtx_equal_p (expected, actual)) + ::selftest::pass (loc, msg); + else + { + fprintf (stderr, "%s:%i: %s: FAIL: %s\n", loc.m_file, loc.m_line, + loc.m_function, msg); + fprintf (stderr, " expected: "); + print_rtl (stderr, expected); + fprintf (stderr, "\n actual: "); + print_rtl (stderr, actual); + fprintf (stderr, "\n"); + abort (); + } +} + /* Compare rtx EXPECTED and ACTUAL by pointer equality, calling ::selftest::pass if they are equal, aborting if they are non-equal. LOC is the effective location of the assertion, MSG describes it. */ diff --git a/gcc/selftest-rtl.h b/gcc/selftest-rtl.h index 2218f9c5280..fe928e78da8 100644 --- a/gcc/selftest-rtl.h +++ b/gcc/selftest-rtl.h @@ -47,6 +47,15 @@ assert_rtl_dump_eq (const location &loc, const char *expected_dump, rtx x, assert_rtl_dump_eq (SELFTEST_LOCATION, (EXPECTED_DUMP), (RTX), \ (REUSE_MANAGER)) +#define ASSERT_RTX_EQ(EXPECTED, ACTUAL) \ + SELFTEST_BEGIN_STMT \ + const char *desc = "ASSERT_RTX_EQ (" #EXPECTED ", " #ACTUAL ")"; \ + ::selftest::assert_rtx_eq_at (SELFTEST_LOCATION, desc, (EXPECTED), \ + (ACTUAL)); \ + SELFTEST_END_STMT + +extern void assert_rtx_eq_at (const location &, const char *, rtx, rtx); + /* Evaluate rtx EXPECTED and ACTUAL and compare them with == (i.e. pointer equality), calling ::selftest::pass if they are equal, aborting if they are non-equal. */ diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c index 11bf0cc9cb7..6030d3b22e7 100644 --- a/gcc/selftest-run-tests.c +++ b/gcc/selftest-run-tests.c @@ -95,6 +95,7 @@ selftest::run_tests () store_merging_c_tests (); predict_c_tests (); + simplify_rtx_c_tests (); /* Run any lang-specific selftests. */ lang_hooks.run_lang_selftests (); diff --git a/gcc/selftest.h b/gcc/selftest.h index 6478922cd2c..cdad939ce68 100644 --- a/gcc/selftest.h +++ b/gcc/selftest.h @@ -199,6 +199,7 @@ extern void unique_ptr_tests_cc_tests (); extern void vec_c_tests (); extern void wide_int_cc_tests (); extern void predict_c_tests (); +extern void simplify_rtx_c_tests (); extern int num_passes; diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 9736bccdaf0..fea5a8ad17b 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -33,6 +33,8 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic-core.h" #include "varasm.h" #include "flags.h" +#include "selftest.h" +#include "selftest-rtl.h" /* Simplification and canonicalization of RTL. */ @@ -925,7 +927,7 @@ static rtx simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op) { enum rtx_code reversed; - rtx temp; + rtx temp, elt; scalar_int_mode inner, int_mode, op_mode, op0_mode; switch (code) @@ -1684,6 +1686,28 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op) break; } + if (VECTOR_MODE_P (mode) && vec_duplicate_p (op, &elt)) + { + /* Try applying the operator to ELT and see if that simplifies. + We can duplicate the result if so. + + The reason we don't use simplify_gen_unary is that it isn't + necessarily a win to convert things like: + + (neg:V (vec_duplicate:V (reg:S R))) + + to: + + (vec_duplicate:V (neg:S (reg:S R))) + + The first might be done entirely in vector registers while the + second might need a move between register files. */ + temp = simplify_unary_operation (code, GET_MODE_INNER (mode), + elt, GET_MODE_INNER (GET_MODE (op))); + if (temp) + return gen_vec_duplicate (mode, temp); + } + return 0; } @@ -2141,7 +2165,7 @@ static rtx simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, rtx op0, rtx op1, rtx trueop0, rtx trueop1) { - rtx tem, reversed, opleft, opright; + rtx tem, reversed, opleft, opright, elt0, elt1; HOST_WIDE_INT val; scalar_int_mode int_mode, inner_mode; @@ -3484,6 +3508,9 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, gcc_assert (XVECLEN (trueop1, 0) == 1); gcc_assert (CONST_INT_P (XVECEXP (trueop1, 0, 0))); + if (vec_duplicate_p (trueop0, &elt0)) + return elt0; + if (GET_CODE (trueop0) == CONST_VECTOR) return CONST_VECTOR_ELT (trueop0, INTVAL (XVECEXP (trueop1, 0, 0))); @@ -3566,9 +3593,6 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, tmp_op, gen_rtx_PARALLEL (VOIDmode, vec)); return tmp; } - if (GET_CODE (trueop0) == VEC_DUPLICATE - && GET_MODE (XEXP (trueop0, 0)) == mode) - return XEXP (trueop0, 0); } else { @@ -3577,6 +3601,11 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, == GET_MODE_INNER (GET_MODE (trueop0))); gcc_assert (GET_CODE (trueop1) == PARALLEL); + if (vec_duplicate_p (trueop0, &elt0)) + /* It doesn't matter which elements are selected by trueop1, + because they are all the same. */ + return gen_vec_duplicate (mode, elt0); + if (GET_CODE (trueop0) == CONST_VECTOR) { int elt_size = GET_MODE_UNIT_SIZE (mode); @@ -3877,6 +3906,32 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, gcc_unreachable (); } + if (mode == GET_MODE (op0) + && mode == GET_MODE (op1) + && vec_duplicate_p (op0, &elt0) + && vec_duplicate_p (op1, &elt1)) + { + /* Try applying the operator to ELT and see if that simplifies. + We can duplicate the result if so. + + The reason we don't use simplify_gen_binary is that it isn't + necessarily a win to convert things like: + + (plus:V (vec_duplicate:V (reg:S R1)) + (vec_duplicate:V (reg:S R2))) + + to: + + (vec_duplicate:V (plus:S (reg:S R1) (reg:S R2))) + + The first might be done entirely in vector registers while the + second might need a move between register files. */ + tem = simplify_binary_operation (code, GET_MODE_INNER (mode), + elt0, elt1); + if (tem) + return gen_vec_duplicate (mode, tem); + } + return 0; } @@ -6025,6 +6080,20 @@ simplify_subreg (machine_mode outermode, rtx op, if (outermode == innermode && !byte) return op; + if (byte % GET_MODE_UNIT_SIZE (innermode) == 0) + { + rtx elt; + + if (VECTOR_MODE_P (outermode) + && GET_MODE_INNER (outermode) == GET_MODE_INNER (innermode) + && vec_duplicate_p (op, &elt)) + return gen_vec_duplicate (outermode, elt); + + if (outermode == GET_MODE_INNER (innermode) + && vec_duplicate_p (op, &elt)) + return elt; + } + if (CONST_SCALAR_INT_P (op) || CONST_DOUBLE_AS_FLOAT_P (op) || GET_CODE (op) == CONST_FIXED @@ -6330,3 +6399,125 @@ simplify_rtx (const_rtx x) } return NULL; } + +#if CHECKING_P + +namespace selftest { + +/* Make a unique pseudo REG of mode MODE for use by selftests. */ + +static rtx +make_test_reg (machine_mode mode) +{ + static int test_reg_num = LAST_VIRTUAL_REGISTER + 1; + + return gen_rtx_REG (mode, test_reg_num++); +} + +/* Test vector simplifications involving VEC_DUPLICATE in which the + operands and result have vector mode MODE. SCALAR_REG is a pseudo + register that holds one element of MODE. */ + +static void +test_vector_ops_duplicate (machine_mode mode, rtx scalar_reg) +{ + scalar_mode inner_mode = GET_MODE_INNER (mode); + rtx duplicate = gen_rtx_VEC_DUPLICATE (mode, scalar_reg); + unsigned int nunits = GET_MODE_NUNITS (mode); + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + { + /* Test some simple unary cases with VEC_DUPLICATE arguments. */ + rtx not_scalar_reg = gen_rtx_NOT (inner_mode, scalar_reg); + rtx duplicate_not = gen_rtx_VEC_DUPLICATE (mode, not_scalar_reg); + ASSERT_RTX_EQ (duplicate, + simplify_unary_operation (NOT, mode, + duplicate_not, mode)); + + rtx neg_scalar_reg = gen_rtx_NEG (inner_mode, scalar_reg); + rtx duplicate_neg = gen_rtx_VEC_DUPLICATE (mode, neg_scalar_reg); + ASSERT_RTX_EQ (duplicate, + simplify_unary_operation (NEG, mode, + duplicate_neg, mode)); + + /* Test some simple binary cases with VEC_DUPLICATE arguments. */ + ASSERT_RTX_EQ (duplicate, + simplify_binary_operation (PLUS, mode, duplicate, + CONST0_RTX (mode))); + + ASSERT_RTX_EQ (duplicate, + simplify_binary_operation (MINUS, mode, duplicate, + CONST0_RTX (mode))); + + ASSERT_RTX_PTR_EQ (CONST0_RTX (mode), + simplify_binary_operation (MINUS, mode, duplicate, + duplicate)); + } + + /* Test a scalar VEC_SELECT of a VEC_DUPLICATE. */ + rtx zero_par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); + ASSERT_RTX_PTR_EQ (scalar_reg, + simplify_binary_operation (VEC_SELECT, inner_mode, + duplicate, zero_par)); + + /* And again with the final element. */ + rtx last_index = gen_int_mode (GET_MODE_NUNITS (mode) - 1, word_mode); + rtx last_par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, last_index)); + ASSERT_RTX_PTR_EQ (scalar_reg, + simplify_binary_operation (VEC_SELECT, inner_mode, + duplicate, last_par)); + + /* Test a scalar subreg of a VEC_DUPLICATE. */ + unsigned int offset = subreg_lowpart_offset (inner_mode, mode); + ASSERT_RTX_EQ (scalar_reg, + simplify_gen_subreg (inner_mode, duplicate, + mode, offset)); + + machine_mode narrower_mode; + if (nunits > 2 + && mode_for_vector (inner_mode, 2).exists (&narrower_mode) + && VECTOR_MODE_P (narrower_mode)) + { + /* Test VEC_SELECT of a vector. */ + rtx vec_par + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, const1_rtx, const0_rtx)); + rtx narrower_duplicate + = gen_rtx_VEC_DUPLICATE (narrower_mode, scalar_reg); + ASSERT_RTX_EQ (narrower_duplicate, + simplify_binary_operation (VEC_SELECT, narrower_mode, + duplicate, vec_par)); + + /* Test a vector subreg of a VEC_DUPLICATE. */ + unsigned int offset = subreg_lowpart_offset (narrower_mode, mode); + ASSERT_RTX_EQ (narrower_duplicate, + simplify_gen_subreg (narrower_mode, duplicate, + mode, offset)); + } +} + +/* Verify some simplifications involving vectors. */ + +static void +test_vector_ops () +{ + for (unsigned int i = 0; i < NUM_MACHINE_MODES; ++i) + { + machine_mode mode = (machine_mode) i; + if (VECTOR_MODE_P (mode)) + { + rtx scalar_reg = make_test_reg (GET_MODE_INNER (mode)); + test_vector_ops_duplicate (mode, scalar_reg); + } + } +} + +/* Run all of the selftests within this file. */ + +void +simplify_rtx_c_tests () +{ + test_vector_ops (); +} + +} // namespace selftest + +#endif /* CHECKING_P */ -- 2.30.2