From: Richard Biener Date: Thu, 16 May 2019 08:03:49 +0000 (+0000) Subject: re PR target/90424 (memcpy into vector builtin not optimized) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1bf2a0b90f2457f6d9301535560eb5e05978261b;p=gcc.git re PR target/90424 (memcpy into vector builtin not optimized) 2019-05-16 Richard Biener PR tree-optimization/90424 * tree-ssa.c (non_rewritable_lvalue_p): Handle inserts from aligned subvectors. (execute_update_addresses_taken): Likewise. * tree-cfg.c (verify_gimple_assign_ternary): Likewise. * g++.target/i386/pr90424-1.C: New testcase. * g++.target/i386/pr90424-2.C: Likewise. From-SVN: r271279 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b8c658d1c83..1ddc8bfdbb3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2019-05-16 Richard Biener + + PR tree-optimization/90424 + * tree-ssa.c (non_rewritable_lvalue_p): Handle inserts from + aligned subvectors. + (execute_update_addresses_taken): Likewise. + * tree-cfg.c (verify_gimple_assign_ternary): Likewise. + 2019-05-16 Richard Biener * gimple-pretty-print.c (dump_ternary_rhs): Dump BIT_INSERT_EXPR diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2ca1d277804..5f01fdf15a8 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2019-05-16 Richard Biener + + PR tree-optimization/90424 + * g++.target/i386/pr90424-1.C: New testcase. + * g++.target/i386/pr90424-2.C: Likewise. + 2019-05-16 Richard Biener * gcc.dg/gimplefe-40.c: Amend again. diff --git a/gcc/testsuite/g++.target/i386/pr90424-1.C b/gcc/testsuite/g++.target/i386/pr90424-1.C new file mode 100644 index 00000000000..9df8c089ba2 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr90424-1.C @@ -0,0 +1,32 @@ +/* { dg-do compile { target c++11 } } */ +/* { dg-options "-O2 -msse2 -fdump-tree-optimized" } */ + +template +using V [[gnu::vector_size(16)]] = T; + +template )> +V load(const void *p) { + using W = V; + W r; + __builtin_memcpy(&r, p, M); + return r; +} + +// movq or movsd +template V load(const void *); // bad +template V load(const void *); // bad +template V load(const void *); // bad +template V load(const void *); // good +// the following is disabled because V2SF isn't a supported mode +// template V load(const void *); // bad +template V load(const void *); // good (movsd?) + +// movd or movss +template V load(const void *); // bad +template V load(const void *); // bad +template V load(const void *); // good +template V load(const void *); // good + +/* We should end up with one load and one insert for each function. */ +/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 9 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "MEM" 9 "optimized" } } */ diff --git a/gcc/testsuite/g++.target/i386/pr90424-2.C b/gcc/testsuite/g++.target/i386/pr90424-2.C new file mode 100644 index 00000000000..3abb65f4591 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr90424-2.C @@ -0,0 +1,31 @@ +/* { dg-do compile { target c++11 } } */ +/* { dg-options "-O2 -msse2 -fdump-tree-optimized" } */ + +template +using V [[gnu::vector_size(16)]] = T; + +template )> +V load(const void *p) { + V r = {}; + __builtin_memcpy(&r, p, M); + return r; +} + +// movq or movsd +template V load(const void *); // bad +template V load(const void *); // bad +template V load(const void *); // bad +template V load(const void *); // good +// the following is disabled because V2SF isn't a supported mode +// template V load(const void *); // bad +template V load(const void *); // good (movsd?) + +// movd or movss +template V load(const void *); // bad +template V load(const void *); // bad +template V load(const void *); // good +template V load(const void *); // good + +/* We should end up with one load and one insert for each function. */ +/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 9 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "MEM" 9 "optimized" } } */ diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index 966ce5a373c..c6a70c8f10b 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -4263,8 +4263,17 @@ verify_gimple_assign_ternary (gassign *stmt) } if (! ((INTEGRAL_TYPE_P (rhs1_type) && INTEGRAL_TYPE_P (rhs2_type)) + /* Vector element insert. */ || (VECTOR_TYPE_P (rhs1_type) - && types_compatible_p (TREE_TYPE (rhs1_type), rhs2_type)))) + && types_compatible_p (TREE_TYPE (rhs1_type), rhs2_type)) + /* Aligned sub-vector insert. */ + || (VECTOR_TYPE_P (rhs1_type) + && VECTOR_TYPE_P (rhs2_type) + && types_compatible_p (TREE_TYPE (rhs1_type), + TREE_TYPE (rhs2_type)) + && multiple_p (TYPE_VECTOR_SUBPARTS (rhs1_type), + TYPE_VECTOR_SUBPARTS (rhs2_type)) + && multiple_of_p (bitsizetype, rhs3, TYPE_SIZE (rhs2_type))))) { error ("not allowed type combination in BIT_INSERT_EXPR"); debug_generic_expr (rhs1_type); diff --git a/gcc/tree-ssa.c b/gcc/tree-ssa.c index 489f6dc1501..8e3aec1e0a6 100644 --- a/gcc/tree-ssa.c +++ b/gcc/tree-ssa.c @@ -1521,14 +1521,29 @@ non_rewritable_lvalue_p (tree lhs) if (DECL_P (decl) && VECTOR_TYPE_P (TREE_TYPE (decl)) && TYPE_MODE (TREE_TYPE (decl)) != BLKmode - && operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (lhs)), - TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (decl))), 0) && known_ge (mem_ref_offset (lhs), 0) && known_gt (wi::to_poly_offset (TYPE_SIZE_UNIT (TREE_TYPE (decl))), mem_ref_offset (lhs)) && multiple_of_p (sizetype, TREE_OPERAND (lhs, 1), TYPE_SIZE_UNIT (TREE_TYPE (lhs)))) - return false; + { + poly_uint64 lhs_bits, nelts; + if (poly_int_tree_p (TYPE_SIZE (TREE_TYPE (lhs)), &lhs_bits) + && multiple_p (lhs_bits, + tree_to_uhwi + (TYPE_SIZE (TREE_TYPE (TREE_TYPE (decl)))), + &nelts)) + { + if (known_eq (nelts, 1u)) + return false; + /* For sub-vector inserts the insert vector mode has to be + supported. */ + tree vtype = build_vector_type (TREE_TYPE (TREE_TYPE (decl)), + nelts); + if (TYPE_MODE (vtype) != BLKmode) + return false; + } + } } /* A vector-insert using a BIT_FIELD_REF is rewritable using @@ -1866,20 +1881,30 @@ execute_update_addresses_taken (void) && bitmap_bit_p (suitable_for_renaming, DECL_UID (sym)) && VECTOR_TYPE_P (TREE_TYPE (sym)) && TYPE_MODE (TREE_TYPE (sym)) != BLKmode - && operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (lhs)), - TYPE_SIZE_UNIT - (TREE_TYPE (TREE_TYPE (sym))), 0) - && tree_fits_uhwi_p (TREE_OPERAND (lhs, 1)) - && tree_int_cst_lt (TREE_OPERAND (lhs, 1), - TYPE_SIZE_UNIT (TREE_TYPE (sym))) - && (tree_to_uhwi (TREE_OPERAND (lhs, 1)) - % tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (lhs)))) == 0) + && known_ge (mem_ref_offset (lhs), 0) + && known_gt (wi::to_poly_offset + (TYPE_SIZE_UNIT (TREE_TYPE (sym))), + mem_ref_offset (lhs)) + && multiple_of_p (sizetype, + TREE_OPERAND (lhs, 1), + TYPE_SIZE_UNIT (TREE_TYPE (lhs)))) { tree val = gimple_assign_rhs1 (stmt); if (! types_compatible_p (TREE_TYPE (val), TREE_TYPE (TREE_TYPE (sym)))) { - tree tem = make_ssa_name (TREE_TYPE (TREE_TYPE (sym))); + poly_uint64 lhs_bits, nelts; + tree temtype = TREE_TYPE (TREE_TYPE (sym)); + if (poly_int_tree_p (TYPE_SIZE (TREE_TYPE (lhs)), + &lhs_bits) + && multiple_p (lhs_bits, + tree_to_uhwi + (TYPE_SIZE (TREE_TYPE + (TREE_TYPE (sym)))), + &nelts) + && maybe_ne (nelts, 1u)) + temtype = build_vector_type (temtype, nelts); + tree tem = make_ssa_name (temtype); gimple *pun = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,