From: Richard Sandiford Date: Sat, 16 Dec 2017 14:03:30 +0000 (+0000) Subject: Add VEC_DUPLICATE_EXPR and associated optab X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=be4c1d4a42c5c7dc8bffbc5c9e3250f02be0d922;p=gcc.git Add VEC_DUPLICATE_EXPR and associated optab SVE needs a way of broadcasting a scalar to a variable-length vector. This patch adds VEC_DUPLICATE_EXPR for when CONSTRUCTOR would be used for fixed-length vectors; this is the tree equivalent of the existing rtl code VEC_DUPLICATE. The patch also adds a vec_duplicate_optab to go with VEC_DUPLICATE_EXPR. 2017-12-16 Richard Sandiford Alan Hayward David Sherwood gcc/ * doc/generic.texi (VEC_DUPLICATE_EXPR): Document. (VEC_COND_EXPR): Add missing @tindex. * doc/md.texi (vec_duplicate@var{m}): Document. * tree.def (VEC_DUPLICATE_EXPR): New tree codes. * tree.c (build_vector_from_val): Add stubbed-out handling of variable-length vectors, using VEC_DUPLICATE_EXPR. (uniform_vector_p): Handle VEC_DUPLICATE_EXPR. * cfgexpand.c (expand_debug_expr): Likewise. * tree-cfg.c (verify_gimple_assign_unary): Likewise. * tree-inline.c (estimate_operator_cost): Likewise. * tree-pretty-print.c (dump_generic_node): Likewise. * tree-vect-generic.c (ssa_uniform_vector_p): Likewise. * fold-const.c (const_unop): Fold VEC_DUPLICATE_EXPRs of a constant. (test_vec_duplicate_folding): New function. (fold_const_c_tests): Call it. * optabs.def (vec_duplicate_optab): New optab. * optabs-tree.c (optab_for_tree_code): Handle VEC_DUPLICATE_EXPR. * optabs.h (expand_vector_broadcast): Declare. * optabs.c (expand_vector_broadcast): Make non-static. Try using vec_duplicate_optab. * expr.c (store_constructor): Try using vec_duplicate_optab for uniform vectors. (expand_expr_real_2): Handle VEC_DUPLICATE_EXPR. Co-Authored-By: Alan Hayward Co-Authored-By: David Sherwood From-SVN: r255740 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c2d037a9f8d..b3cbc1c5418 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,31 @@ +2017-12-16 Richard Sandiford + Alan Hayward + David Sherwood + + * doc/generic.texi (VEC_DUPLICATE_EXPR): Document. + (VEC_COND_EXPR): Add missing @tindex. + * doc/md.texi (vec_duplicate@var{m}): Document. + * tree.def (VEC_DUPLICATE_EXPR): New tree codes. + * tree.c (build_vector_from_val): Add stubbed-out handling of + variable-length vectors, using VEC_DUPLICATE_EXPR. + (uniform_vector_p): Handle VEC_DUPLICATE_EXPR. + * cfgexpand.c (expand_debug_expr): Likewise. + * tree-cfg.c (verify_gimple_assign_unary): Likewise. + * tree-inline.c (estimate_operator_cost): Likewise. + * tree-pretty-print.c (dump_generic_node): Likewise. + * tree-vect-generic.c (ssa_uniform_vector_p): Likewise. + * fold-const.c (const_unop): Fold VEC_DUPLICATE_EXPRs of a constant. + (test_vec_duplicate_folding): New function. + (fold_const_c_tests): Call it. + * optabs.def (vec_duplicate_optab): New optab. + * optabs-tree.c (optab_for_tree_code): Handle VEC_DUPLICATE_EXPR. + * optabs.h (expand_vector_broadcast): Declare. + * optabs.c (expand_vector_broadcast): Make non-static. Try using + vec_duplicate_optab. + * expr.c (store_constructor): Try using vec_duplicate_optab for + uniform vectors. + (expand_expr_real_2): Handle VEC_DUPLICATE_EXPR. + 2017-12-15 Markus Trippelsdorf PR target/83358 diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c index ce98264214a..bde2119f9b4 100644 --- a/gcc/cfgexpand.c +++ b/gcc/cfgexpand.c @@ -5069,6 +5069,7 @@ expand_debug_expr (tree exp) case VEC_WIDEN_LSHIFT_HI_EXPR: case VEC_WIDEN_LSHIFT_LO_EXPR: case VEC_PERM_EXPR: + case VEC_DUPLICATE_EXPR: return NULL; /* Misc codes. */ diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi index b01cdaa4d88..640eb3bee84 100644 --- a/gcc/doc/generic.texi +++ b/gcc/doc/generic.texi @@ -1768,6 +1768,7 @@ a value from @code{enum annot_expr_kind}, the third is an @code{INTEGER_CST}. @node Vectors @subsection Vectors +@tindex VEC_DUPLICATE_EXPR @tindex VEC_LSHIFT_EXPR @tindex VEC_RSHIFT_EXPR @tindex VEC_WIDEN_MULT_HI_EXPR @@ -1779,9 +1780,14 @@ a value from @code{enum annot_expr_kind}, the third is an @code{INTEGER_CST}. @tindex VEC_PACK_TRUNC_EXPR @tindex VEC_PACK_SAT_EXPR @tindex VEC_PACK_FIX_TRUNC_EXPR +@tindex VEC_COND_EXPR @tindex SAD_EXPR @table @code +@item VEC_DUPLICATE_EXPR +This node has a single operand and represents a vector in which every +element is equal to that operand. + @item VEC_LSHIFT_EXPR @itemx VEC_RSHIFT_EXPR These nodes represent whole vector left and right shifts, respectively. diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 9e0540a1e8b..f9d997af95b 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -4888,6 +4888,17 @@ and operand 1 is parallel containing values for individual fields. The the vector mode @var{m}, or a vector mode with the same element mode and smaller number of elements. +@cindex @code{vec_duplicate@var{m}} instruction pattern +@item @samp{vec_duplicate@var{m}} +Initialize vector output operand 0 so that each element has the value given +by scalar input operand 1. The vector has mode @var{m} and the scalar has +the mode appropriate for one element of @var{m}. + +This pattern only handles duplicates of non-constant inputs. Constant +vectors go through the @code{mov@var{m}} pattern instead. + +This pattern is not allowed to @code{FAIL}. + @cindex @code{vec_cmp@var{m}@var{n}} instruction pattern @item @samp{vec_cmp@var{m}@var{n}} Output a vector comparison. Operand 0 of mode @var{n} is the destination for diff --git a/gcc/expr.c b/gcc/expr.c index 80116381f1b..5f7c7e4f036 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -6598,7 +6598,8 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size, constructor_elt *ce; int i; int need_to_clear; - int icode = CODE_FOR_nothing; + insn_code icode = CODE_FOR_nothing; + tree elt; tree elttype = TREE_TYPE (type); int elt_size = tree_to_uhwi (TYPE_SIZE (elttype)); machine_mode eltmode = TYPE_MODE (elttype); @@ -6608,13 +6609,30 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size, unsigned n_elts; alias_set_type alias; bool vec_vec_init_p = false; + machine_mode mode = GET_MODE (target); gcc_assert (eltmode != BLKmode); + /* Try using vec_duplicate_optab for uniform vectors. */ + if (!TREE_SIDE_EFFECTS (exp) + && VECTOR_MODE_P (mode) + && eltmode == GET_MODE_INNER (mode) + && ((icode = optab_handler (vec_duplicate_optab, mode)) + != CODE_FOR_nothing) + && (elt = uniform_vector_p (exp))) + { + struct expand_operand ops[2]; + create_output_operand (&ops[0], target, mode); + create_input_operand (&ops[1], expand_normal (elt), eltmode); + expand_insn (icode, 2, ops); + if (!rtx_equal_p (target, ops[0].value)) + emit_move_insn (target, ops[0].value); + break; + } + n_elts = TYPE_VECTOR_SUBPARTS (type); - if (REG_P (target) && VECTOR_MODE_P (GET_MODE (target))) + if (REG_P (target) && VECTOR_MODE_P (mode)) { - machine_mode mode = GET_MODE (target); machine_mode emode = eltmode; if (CONSTRUCTOR_NELTS (exp) @@ -6626,7 +6644,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size, == n_elts); emode = TYPE_MODE (etype); } - icode = (int) convert_optab_handler (vec_init_optab, mode, emode); + icode = convert_optab_handler (vec_init_optab, mode, emode); if (icode != CODE_FOR_nothing) { unsigned int i, n = n_elts; @@ -6674,7 +6692,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size, if (need_to_clear && size > 0 && !vector) { if (REG_P (target)) - emit_move_insn (target, CONST0_RTX (GET_MODE (target))); + emit_move_insn (target, CONST0_RTX (mode)); else clear_storage (target, GEN_INT (size), BLOCK_OP_NORMAL); cleared = 1; @@ -6682,7 +6700,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size, /* Inform later passes that the old value is dead. */ if (!cleared && !vector && REG_P (target)) - emit_move_insn (target, CONST0_RTX (GET_MODE (target))); + emit_move_insn (target, CONST0_RTX (mode)); if (MEM_P (target)) alias = MEM_ALIAS_SET (target); @@ -6733,8 +6751,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size, if (vector) emit_insn (GEN_FCN (icode) (target, - gen_rtx_PARALLEL (GET_MODE (target), - vector))); + gen_rtx_PARALLEL (mode, vector))); break; } @@ -9567,6 +9584,12 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target); return target; + case VEC_DUPLICATE_EXPR: + op0 = expand_expr (treeop0, NULL_RTX, VOIDmode, modifier); + target = expand_vector_broadcast (mode, op0); + gcc_assert (target); + return target; + case BIT_INSERT_EXPR: { unsigned bitpos = tree_to_uhwi (treeop2); diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 9fc69e8058b..6ce9ea111a0 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -1770,6 +1770,11 @@ const_unop (enum tree_code code, tree type, tree arg0) return elts.build (); } + case VEC_DUPLICATE_EXPR: + if (CONSTANT_CLASS_P (arg0)) + return build_vector_from_val (type, arg0); + return NULL_TREE; + default: break; } @@ -14477,6 +14482,22 @@ test_vector_folding () ASSERT_FALSE (integer_nonzerop (fold_build2 (NE_EXPR, res_type, one, one))); } +/* Verify folding of VEC_DUPLICATE_EXPRs. */ + +static void +test_vec_duplicate_folding () +{ + scalar_int_mode int_mode = SCALAR_INT_TYPE_MODE (ssizetype); + machine_mode vec_mode = targetm.vectorize.preferred_simd_mode (int_mode); + /* This will be 1 if VEC_MODE isn't a vector mode. */ + unsigned int nunits = GET_MODE_NUNITS (vec_mode); + + tree type = build_vector_type (ssizetype, nunits); + tree dup5_expr = fold_unary (VEC_DUPLICATE_EXPR, type, ssize_int (5)); + tree dup5_cst = build_vector_from_val (type, ssize_int (5)); + ASSERT_TRUE (operand_equal_p (dup5_expr, dup5_cst, 0)); +} + /* Run all of the selftests within this file. */ void @@ -14484,6 +14505,7 @@ fold_const_c_tests () { test_arithmetic_folding (); test_vector_folding (); + test_vec_duplicate_folding (); } } // namespace selftest diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c index a510c165b05..e0eb20cdb26 100644 --- a/gcc/optabs-tree.c +++ b/gcc/optabs-tree.c @@ -199,6 +199,9 @@ optab_for_tree_code (enum tree_code code, const_tree type, return TYPE_UNSIGNED (type) ? vec_pack_ufix_trunc_optab : vec_pack_sfix_trunc_optab; + case VEC_DUPLICATE_EXPR: + return vec_duplicate_optab; + default: break; } diff --git a/gcc/optabs.c b/gcc/optabs.c index 518ce7a972c..30fe996ed53 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -367,7 +367,7 @@ force_expand_binop (machine_mode mode, optab binoptab, mode of OP must be the element mode of VMODE. If OP is a constant, then the return value will be a constant. */ -static rtx +rtx expand_vector_broadcast (machine_mode vmode, rtx op) { enum insn_code icode; @@ -380,6 +380,16 @@ expand_vector_broadcast (machine_mode vmode, rtx op) if (valid_for_const_vec_duplicate_p (vmode, op)) return gen_const_vec_duplicate (vmode, op); + icode = optab_handler (vec_duplicate_optab, vmode); + if (icode != CODE_FOR_nothing) + { + struct expand_operand ops[2]; + create_output_operand (&ops[0], NULL_RTX, vmode); + create_input_operand (&ops[1], op, GET_MODE (op)); + expand_insn (icode, 2, ops); + return ops[0].value; + } + /* ??? If the target doesn't have a vec_init, then we have no easy way of performing this operation. Most of this sort of generic support is hidden away in the vector lowering support in gimple. */ diff --git a/gcc/optabs.def b/gcc/optabs.def index 54afe2d796e..f3f4bc896c4 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -364,3 +364,5 @@ OPTAB_D (atomic_xor_optab, "atomic_xor$I$a") OPTAB_D (get_thread_pointer_optab, "get_thread_pointer$I$a") OPTAB_D (set_thread_pointer_optab, "set_thread_pointer$I$a") + +OPTAB_DC (vec_duplicate_optab, "vec_duplicate$a", VEC_DUPLICATE) diff --git a/gcc/optabs.h b/gcc/optabs.h index 07d07fe8366..32f876a2e05 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -181,6 +181,7 @@ extern rtx simplify_expand_binop (machine_mode mode, optab binoptab, enum optab_methods methods); extern bool force_expand_binop (machine_mode, optab, rtx, rtx, rtx, int, enum optab_methods); +extern rtx expand_vector_broadcast (machine_mode, rtx); /* Generate code for a simple binary or unary operation. "Simple" in this case means "can be unambiguously described by a (mode, code) diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index 3b16c101a6f..2b331d6003f 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -3880,6 +3880,17 @@ verify_gimple_assign_unary (gassign *stmt) case CONJ_EXPR: break; + case VEC_DUPLICATE_EXPR: + if (TREE_CODE (lhs_type) != VECTOR_TYPE + || !useless_type_conversion_p (TREE_TYPE (lhs_type), rhs1_type)) + { + error ("vec_duplicate should be from a scalar to a like vector"); + debug_generic_expr (lhs_type); + debug_generic_expr (rhs1_type); + return true; + } + return false; + default: gcc_unreachable (); } diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index 8604ba14a00..99546be8c18 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -3928,6 +3928,7 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights, case VEC_PACK_FIX_TRUNC_EXPR: case VEC_WIDEN_LSHIFT_HI_EXPR: case VEC_WIDEN_LSHIFT_LO_EXPR: + case VEC_DUPLICATE_EXPR: return 1; diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index 6519f3e3548..31ed9004576 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -3178,6 +3178,15 @@ dump_generic_node (pretty_printer *pp, tree node, int spc, dump_flags_t flags, pp_string (pp, " > "); break; + case VEC_DUPLICATE_EXPR: + pp_space (pp); + for (str = get_tree_code_name (code); *str; str++) + pp_character (pp, TOUPPER (*str)); + pp_string (pp, " < "); + dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false); + pp_string (pp, " > "); + break; + case VEC_UNPACK_HI_EXPR: pp_string (pp, " VEC_UNPACK_HI_EXPR < "); dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false); diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c index aa5542d5dc6..b214208894e 100644 --- a/gcc/tree-vect-generic.c +++ b/gcc/tree-vect-generic.c @@ -1419,6 +1419,7 @@ static tree ssa_uniform_vector_p (tree op) { if (TREE_CODE (op) == VECTOR_CST + || TREE_CODE (op) == VEC_DUPLICATE_EXPR || TREE_CODE (op) == CONSTRUCTOR) return uniform_vector_p (op); if (TREE_CODE (op) == SSA_NAME) diff --git a/gcc/tree.c b/gcc/tree.c index ed1852b3e66..8e0313ceeb5 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -1785,6 +1785,8 @@ build_vector_from_val (tree vectype, tree sc) v.quick_push (sc); return v.build (); } + else if (0) + return fold_build1 (VEC_DUPLICATE_EXPR, vectype, sc); else { vec *v; @@ -10468,7 +10470,10 @@ uniform_vector_p (const_tree vec) gcc_assert (VECTOR_TYPE_P (TREE_TYPE (vec))); - if (TREE_CODE (vec) == VECTOR_CST) + if (TREE_CODE (vec) == VEC_DUPLICATE_EXPR) + return TREE_OPERAND (vec, 0); + + else if (TREE_CODE (vec) == VECTOR_CST) { if (VECTOR_CST_NPATTERNS (vec) == 1 && VECTOR_CST_DUPLICATE_P (vec)) return VECTOR_CST_ENCODED_ELT (vec, 0); diff --git a/gcc/tree.def b/gcc/tree.def index 137e63f255f..c3af82461f7 100644 --- a/gcc/tree.def +++ b/gcc/tree.def @@ -537,6 +537,9 @@ DEFTREECODE (TARGET_EXPR, "target_expr", tcc_expression, 4) 1 and 2 are NULL. The operands are then taken from the cfg edges. */ DEFTREECODE (COND_EXPR, "cond_expr", tcc_expression, 3) +/* Represents a vector in which every element is equal to operand 0. */ +DEFTREECODE (VEC_DUPLICATE_EXPR, "vec_duplicate_expr", tcc_unary, 1) + /* Vector conditional expression. It is like COND_EXPR, but with vector operands.