From 294973a49751a7fc2d6a7a9f2749ce851a368c04 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Thu, 18 Oct 2018 08:18:42 +0000 Subject: [PATCH] Simplify subreg of vec_merge of vec_duplicate We can simplify (subreg (vec_merge (vec_duplicate X) (vector) (const_int ((1 << N) | M))) (N * sizeof (X))) to X when mode of X is the same as of mode of subreg. gcc/ PR target/87537 * simplify-rtx.c (simplify_subreg): Simplify subreg of vec_merge of vec_duplicate. (test_vector_ops_duplicate): Add test for a scalar subreg of a VEC_MERGE of a VEC_DUPLICATE. gcc/testsuite/ PR target/87537 * gcc.target/i386/pr87537-1.c: New test. From-SVN: r265260 --- gcc/ChangeLog | 8 +++++++ gcc/simplify-rtx.c | 29 ++++++++++++++++++++++- gcc/testsuite/ChangeLog | 5 ++++ gcc/testsuite/gcc.target/i386/pr87537-1.c | 12 ++++++++++ 4 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr87537-1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f4e147c26ee..41a1dd13b67 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2018-10-18 H.J. Lu + + PR target/87537 + * simplify-rtx.c (simplify_subreg): Simplify subreg of vec_merge + of vec_duplicate. + (test_vector_ops_duplicate): Add test for a scalar subreg of a + VEC_MERGE of a VEC_DUPLICATE. + 2018-10-17 Joseph Myers * doc/cpp.texi (__STDC_VERSION__): Document C2X handling. diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 9bc53866b9f..b0cf3bbb2a9 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -6601,6 +6601,21 @@ simplify_subreg (machine_mode outermode, rtx op, return NULL_RTX; } + /* Return X for + (subreg (vec_merge (vec_duplicate X) + (vector) + (const_int ((1 << N) | M))) + (N * sizeof (X))) + */ + unsigned int idx; + if (constant_multiple_p (byte, GET_MODE_SIZE (outermode), &idx) + && GET_CODE (op) == VEC_MERGE + && GET_CODE (XEXP (op, 0)) == VEC_DUPLICATE + && GET_MODE (XEXP (XEXP (op, 0), 0)) == outermode + && CONST_INT_P (XEXP (op, 2)) + && (UINTVAL (XEXP (op, 2)) & (HOST_WIDE_INT_1U << idx)) != 0) + return XEXP (XEXP (op, 0), 0); + /* A SUBREG resulting from a zero extension may fold to zero if it extracts higher bits that the ZERO_EXTEND's source bits. */ if (GET_CODE (op) == ZERO_EXTEND && SCALAR_INT_MODE_P (innermode)) @@ -6831,15 +6846,27 @@ test_vector_ops_duplicate (machine_mode mode, rtx scalar_reg) simplify_binary_operation (VEC_SELECT, inner_mode, duplicate, zero_par)); - /* And again with the final element. */ unsigned HOST_WIDE_INT const_nunits; if (nunits.is_constant (&const_nunits)) { + /* And again with the final element. */ rtx last_index = gen_int_mode (const_nunits - 1, word_mode); rtx last_par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, last_index)); ASSERT_RTX_PTR_EQ (scalar_reg, simplify_binary_operation (VEC_SELECT, inner_mode, duplicate, last_par)); + + /* Test a scalar subreg of a VEC_MERGE of a VEC_DUPLICATE. */ + rtx vector_reg = make_test_reg (mode); + for (unsigned HOST_WIDE_INT i = 0; i < const_nunits; i++) + { + rtx mask = GEN_INT ((HOST_WIDE_INT_1U << i) | (i + 1)); + rtx vm = gen_rtx_VEC_MERGE (mode, duplicate, vector_reg, mask); + poly_uint64 offset = i * GET_MODE_SIZE (inner_mode); + ASSERT_RTX_EQ (scalar_reg, + simplify_gen_subreg (inner_mode, vm, + mode, offset)); + } } /* Test a scalar subreg of a VEC_DUPLICATE. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f6dbcf731c2..5e1ad063d9b 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2018-10-18 H.J. Lu + + PR target/87537 + * gcc.target/i386/pr87537-1.c: New test. + 2018-10-17 Joseph Myers * gcc.dg/c11-static-assert-7.c, gcc.dg/c11-static-assert-8.c, diff --git a/gcc/testsuite/gcc.target/i386/pr87537-1.c b/gcc/testsuite/gcc.target/i386/pr87537-1.c new file mode 100644 index 00000000000..df849b032e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr87537-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O2" } */ +/* { dg-final { scan-assembler-times "vbroadcastss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-not "vmovss" } } */ + +#include + +__m128 +foo (float *x) +{ + return _mm_broadcastss_ps(_mm_load_ss(x)); +} -- 2.30.2