From 86ce59b4f05d8f68ec4d9a14a7732acb370412db Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 25 Jun 2020 11:21:20 +0200 Subject: [PATCH] tree-optimization/95866 - avoid using scalar ops for vectorized shift This avoids using the original scalar SSA operand when vectorizing a shift with a vectorized shift operand where we know all vector components have the same value and thus we can use a vector by scalar shift. Using the scalar SSA operand causes a possibly long chain of scalar computation to be retained so it's better to simply extract lane zero from the available vectorized shift operand. 2020-06-25 Richard Biener PR tree-optimization/95866 * tree-vect-stmts.c (vectorizable_shift): Reject incompatible vectorized shift operands. For scalar shifts use lane zero of a vectorized shift operand. * gcc.dg/vect/bb-slp-pr95866.c: New testcase. --- gcc/testsuite/gcc.dg/vect/bb-slp-pr95866.c | 17 ++++++++++++ gcc/tree-vect-stmts.c | 30 +++++++++++++++++++--- 2 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr95866.c diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr95866.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr95866.c new file mode 100644 index 00000000000..5de46719101 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr95866.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_shift } */ + +int x[4]; +int j[4]; +void foo() +{ + x[0] = (x[0] << j[0]) + j[0]; + x[1] = (x[1] << j[0]) + j[1]; + x[2] = (x[2] << j[0]) + j[2]; + x[3] = (x[3] << j[0]) + j[3]; +} + +/* The scalar shift argument should be extracted from the available vector. */ +/* { dg-final { scan-tree-dump "BIT_FIELD_REF" "slp2" } } */ +/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" } } */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index de7d77f3872..edd28534cb0 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -5413,6 +5413,15 @@ vectorizable_shift (vec_info *vinfo, = (!op1_vectype || !tree_nop_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op1))); + if (incompatible_op1_vectype_p + && dt[1] == vect_internal_def) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "unusable type for last operand in" + " vector/vector shift/rotate.\n"); + return false; + } } } } @@ -5457,7 +5466,7 @@ vectorizable_shift (vec_info *vinfo, { if (slp_node && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype) - || (!scalar_shift_arg + || ((!scalar_shift_arg || dt[1] == vect_internal_def) && (!incompatible_op1_vectype_p || dt[1] == vect_constant_def) && !vect_maybe_update_slp_op_vectype @@ -5499,6 +5508,7 @@ vectorizable_shift (vec_info *vinfo, if (incompatible_op1_vectype_p && !slp_node) { + gcc_assert (!scalar_shift_arg && was_scalar_shift_arg); op1 = fold_convert (TREE_TYPE (vectype), op1); if (dt[1] != vect_constant_def) op1 = vect_init_vector (vinfo, stmt_info, op1, @@ -5508,7 +5518,7 @@ vectorizable_shift (vec_info *vinfo, /* Handle def. */ vec_dest = vect_create_destination_var (scalar_dest, vectype); - if (scalar_shift_arg) + if (scalar_shift_arg && dt[1] != vect_internal_def) { /* Vector shl and shr insn patterns can be defined with scalar operand 2 (shift operand). In this case, use constant or loop @@ -5533,7 +5543,7 @@ vectorizable_shift (vec_info *vinfo, vec_oprnds1.quick_push (vec_oprnd1); } } - else if (slp_node && incompatible_op1_vectype_p) + else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p) { if (was_scalar_shift_arg) { @@ -5566,6 +5576,20 @@ vectorizable_shift (vec_info *vinfo, FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) { vop1 = vec_oprnds1[i]; + /* For internal defs where we need to use a scalar shift arg + extract the first lane. */ + if (scalar_shift_arg && dt[1] == vect_internal_def) + { + new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1))); + gassign *new_stmt + = gimple_build_assign (new_temp, + build3 (BIT_FIELD_REF, TREE_TYPE (new_temp), + vop1, + TYPE_SIZE (TREE_TYPE (new_temp)), + bitsize_zero_node)); + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); + vop1 = new_temp; + } gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1); new_temp = make_ssa_name (vec_dest, new_stmt); gimple_assign_set_lhs (new_stmt, new_temp); -- 2.30.2