+2018-07-03 Richard Sandiford <richard.sandiford@arm.com>
+
+ PR tree-optimization/85694
+ * doc/md.texi (avgM3_floor, uavgM3_floor, avgM3_ceil)
+ (uavgM3_ceil): Document new optabs.
+ * doc/sourcebuild.texi (vect_avg_qi): Document new target selector.
+ * internal-fn.def (IFN_AVG_FLOOR, IFN_AVG_CEIL): New internal
+ functions.
+ * optabs.def (savg_floor_optab, uavg_floor_optab, savg_ceil_optab)
+ (savg_ceil_optab): New optabs.
+ * tree-vect-patterns.c (vect_recog_average_pattern): New function.
+ (vect_vect_recog_func_ptrs): Add it.
+ * tree-vect-stmts.c (vectorizable_call): Get the type of the zero
+ constant directly from the associated lhs.
+
2018-07-03 Richard Sandiford <richard.sandiford@arm.com>
* tree-vect-patterns.c (vect_split_statement): New function.
Vector shift and rotate instructions that take vectors as operand 2
instead of a scalar type.
+@cindex @code{avg@var{m}3_floor} instruction pattern
+@cindex @code{uavg@var{m}3_floor} instruction pattern
+@item @samp{avg@var{m}3_floor}
+@itemx @samp{uavg@var{m}3_floor}
+Signed and unsigned average instructions. These instructions add
+operands 1 and 2 without truncation, divide the result by 2,
+round towards -Inf, and store the result in operand 0. This is
+equivalent to the C code:
+@smallexample
+narrow op0, op1, op2;
+@dots{}
+op0 = (narrow) (((wide) op1 + (wide) op2) >> 1);
+@end smallexample
+where the sign of @samp{narrow} determines whether this is a signed
+or unsigned operation.
+
+@cindex @code{avg@var{m}3_ceil} instruction pattern
+@cindex @code{uavg@var{m}3_ceil} instruction pattern
+@item @samp{avg@var{m}3_ceil}
+@itemx @samp{uavg@var{m}3_ceil}
+Like @samp{avg@var{m}3_floor} and @samp{uavg@var{m}3_floor}, but round
+towards +Inf. This is equivalent to the C code:
+@smallexample
+narrow op0, op1, op2;
+@dots{}
+op0 = (narrow) (((wide) op1 + (wide) op2 + 1) >> 1);
+@end smallexample
+
@cindex @code{bswap@var{m}2} instruction pattern
@item @samp{bswap@var{m}2}
Reverse the order of bytes of operand 1 and store the result in operand 0.
The target's ABI allows stack variables to be aligned to the preferred
vector alignment.
+@item vect_avg_qi
+Target supports both signed and unsigned averaging operations on vectors
+of bytes.
+
@item vect_condition
Target supports vector conditional operations.
DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
+DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
+ savg_floor, uavg_floor, binary)
+DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
+ savg_ceil, uavg_ceil, binary)
+
DEF_INTERNAL_OPTAB_FN (COND_ADD, ECF_CONST, cond_add, cond_binary)
DEF_INTERNAL_OPTAB_FN (COND_SUB, ECF_CONST, cond_sub, cond_binary)
DEF_INTERNAL_OPTAB_FN (COND_MUL, ECF_CONST, cond_smul, cond_binary)
OPTAB_D (extract_last_optab, "extract_last_$a")
OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
+OPTAB_D (savg_floor_optab, "avg$a3_floor")
+OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
+OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
+OPTAB_D (uavg_ceil_optab, "uavg$a3_ceil")
OPTAB_D (sdot_prod_optab, "sdot_prod$I$a")
OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3")
OPTAB_D (udot_prod_optab, "udot_prod$I$a")
+2018-07-03 Richard Sandiford <richard.sandiford@arm.com>
+
+ PR tree-optimization/85694
+ * lib/target-supports.exp (check_effective_target_vect_avg_qi): New
+ proc.
+ * gcc.dg/vect/vect-avg-1.c: New test.
+ * gcc.dg/vect/vect-avg-2.c: Likewise.
+ * gcc.dg/vect/vect-avg-3.c: Likewise.
+ * gcc.dg/vect/vect-avg-4.c: Likewise.
+ * gcc.dg/vect/vect-avg-5.c: Likewise.
+ * gcc.dg/vect/vect-avg-6.c: Likewise.
+ * gcc.dg/vect/vect-avg-7.c: Likewise.
+ * gcc.dg/vect/vect-avg-8.c: Likewise.
+ * gcc.dg/vect/vect-avg-9.c: Likewise.
+ * gcc.dg/vect/vect-avg-10.c: Likewise.
+ * gcc.dg/vect/vect-avg-11.c: Likewise.
+ * gcc.dg/vect/vect-avg-12.c: Likewise.
+ * gcc.dg/vect/vect-avg-13.c: Likewise.
+ * gcc.dg/vect/vect-avg-14.c: Likewise.
+
2018-07-03 Richard Sandiford <richard.sandiford@arm.com>
* gcc.dg/vect/vect-over-widen-5.c: Test that the extensions
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+#define N 50
+
+#ifndef SIGNEDNESS
+#define SIGNEDNESS unsigned
+#endif
+#ifndef BIAS
+#define BIAS 0
+#endif
+
+void __attribute__ ((noipa))
+f (SIGNEDNESS char *restrict a, SIGNEDNESS char *restrict b,
+ SIGNEDNESS char *restrict c)
+{
+ for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+ a[i] = (b[i] + c[i] + BIAS) >> 1;
+}
+
+#define BASE1 ((SIGNEDNESS int) -1 < 0 ? -126 : 4)
+#define BASE2 ((SIGNEDNESS int) -1 < 0 ? -101 : 26)
+
+int
+main (void)
+{
+ check_vect ();
+
+ SIGNEDNESS char a[N], b[N], c[N];
+ for (int i = 0; i < N; ++i)
+ {
+ b[i] = BASE1 + i * 5;
+ c[i] = BASE2 + i * 4;
+ asm volatile ("" ::: "memory");
+ }
+ f (a, b, c);
+ for (int i = 0; i < N; ++i)
+ if (a[i] != ((BASE1 + BASE2 + i * 9 + BIAS) >> 1))
+ __builtin_abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS signed
+#define BIAS 2
+
+#include "vect-avg-5.c"
+
+/* { dg-final { scan-tree-dump-not "vect_recog_average_pattern: detected" "vect" } } */
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+#define N 50
+
+#ifndef SIGNEDNESS
+#define SIGNEDNESS unsigned
+#endif
+#ifndef BIAS
+#define BIAS 0
+#endif
+
+void __attribute__ ((noipa))
+f (SIGNEDNESS char *restrict a, SIGNEDNESS char *restrict b,
+ SIGNEDNESS char *restrict c)
+{
+ for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+ {
+ int tmp = b[i];
+ tmp ^= 0x55;
+ tmp += BIAS;
+ tmp += c[i];
+ tmp >>= 1;
+ tmp |= 0x40;
+ a[i] = tmp;
+ }
+}
+
+#define BASE1 ((SIGNEDNESS int) -1 < 0 ? -126 : 4)
+#define BASE2 ((SIGNEDNESS int) -1 < 0 ? -101 : 26)
+
+int
+main (void)
+{
+ check_vect ();
+
+ SIGNEDNESS char a[N], b[N], c[N];
+ for (int i = 0; i < N; ++i)
+ {
+ b[i] = BASE1 + i * 5;
+ c[i] = BASE2 + i * 4;
+ asm volatile ("" ::: "memory");
+ }
+ f (a, b, c);
+ for (int i = 0; i < N; ++i)
+ if (a[i] != (((((BASE1 + i * 5) ^ 0x55)
+ + (BASE2 + i * 4)
+ + BIAS) >> 1) | 0x40))
+ __builtin_abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS signed
+
+#include "vect-avg-11.c"
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS unsigned
+#define BIAS 1
+
+#include "vect-avg-11.c"
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS signed
+#define BIAS 1
+
+#include "vect-avg-11.c"
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS signed
+
+#include "vect-avg-1.c"
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS unsigned
+#define BIAS 1
+
+#include "vect-avg-1.c"
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS signed
+#define BIAS 1
+
+#include "vect-avg-1.c"
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+#define N 50
+
+#ifndef SIGNEDNESS
+#define SIGNEDNESS unsigned
+#endif
+#ifndef BIAS
+#define BIAS 0
+#endif
+
+void __attribute__ ((noipa))
+f (SIGNEDNESS char *restrict a, SIGNEDNESS char *restrict b,
+ SIGNEDNESS char *restrict c)
+{
+ for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+ {
+ int tmp1 = b[i] + BIAS;
+ int tmp2 = tmp1 + c[i];
+ a[i] = tmp2 >> 1;
+ }
+}
+
+#define BASE1 ((SIGNEDNESS int) -1 < 0 ? -126 : 4)
+#define BASE2 ((SIGNEDNESS int) -1 < 0 ? -101 : 26)
+
+int
+main (void)
+{
+ check_vect ();
+
+ SIGNEDNESS char a[N], b[N], c[N];
+ for (int i = 0; i < N; ++i)
+ {
+ b[i] = BASE1 + i * 5;
+ c[i] = BASE2 + i * 4;
+ asm volatile ("" ::: "memory");
+ }
+ f (a, b, c);
+ for (int i = 0; i < N; ++i)
+ if (a[i] != ((BASE1 + BASE2 + i * 9 + BIAS) >> 1))
+ __builtin_abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS signed
+
+#include "vect-avg-5.c"
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS unsigned
+#define BIAS 1
+
+#include "vect-avg-5.c"
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS signed
+#define BIAS 1
+
+#include "vect-avg-5.c"
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS unsigned
+#define BIAS 2
+
+#include "vect-avg-5.c"
+
+/* { dg-final { scan-tree-dump-not "vect_recog_average_pattern: detected" "vect" } } */
return $et_vect_usad_char_saved($et_index)
}
+# Return 1 if the target plus current options supports both signed
+# and unsigned average operations on vectors of bytes.
+
+proc check_effective_target_vect_avg_qi {} {
+ return 0
+}
+
# Return 1 if the target plus current options supports a vector
# demotion (packing) of shorts (to chars) and ints (to shorts)
# using modulo arithmetic, 0 otherwise.
return pattern_stmt;
}
+/* Recognize the patterns:
+
+ ATYPE a; // narrower than TYPE
+ BTYPE b; // narrower than TYPE
+ (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
+ or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
+
+ where only the bottom half of avg is used. Try to transform them into:
+
+ (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
+ or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
+
+ followed by:
+
+ TYPE avg = (TYPE) avg';
+
+ where NTYPE is no wider than half of TYPE. Since only the bottom half
+ of avg is used, all or part of the cast of avg' should become redundant. */
+
+static gimple *
+vect_recog_average_pattern (vec<gimple *> *stmts, tree *type_out)
+{
+ /* Check for a shift right by one bit. */
+ gassign *last_stmt = dyn_cast <gassign *> (stmts->pop ());
+ if (!last_stmt
+ || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
+ || !integer_onep (gimple_assign_rhs2 (last_stmt)))
+ return NULL;
+
+ stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
+ vec_info *vinfo = last_stmt_info->vinfo;
+
+ /* Check that the shift result is wider than the users of the
+ result need (i.e. that narrowing would be a natural choice). */
+ tree lhs = gimple_assign_lhs (last_stmt);
+ tree type = TREE_TYPE (lhs);
+ unsigned int target_precision
+ = vect_element_precision (last_stmt_info->min_output_precision);
+ if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
+ return NULL;
+
+ /* Get the definition of the shift input. */
+ tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
+ stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
+ if (!plus_stmt_info)
+ return NULL;
+
+ /* Check whether the shift input can be seen as a tree of additions on
+ 2 or 3 widened inputs.
+
+ Note that the pattern should be a win even if the result of one or
+ more additions is reused elsewhere: if the pattern matches, we'd be
+ replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
+ internal_fn ifn = IFN_AVG_FLOOR;
+ vect_unpromoted_value unprom[3];
+ tree new_type;
+ unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
+ PLUS_EXPR, false, 3,
+ unprom, &new_type);
+ if (nops == 0)
+ return NULL;
+ if (nops == 3)
+ {
+ /* Check that one operand is 1. */
+ unsigned int i;
+ for (i = 0; i < 3; ++i)
+ if (integer_onep (unprom[i].op))
+ break;
+ if (i == 3)
+ return NULL;
+ /* Throw away the 1 operand and keep the other two. */
+ if (i < 2)
+ unprom[i] = unprom[2];
+ ifn = IFN_AVG_CEIL;
+ }
+
+ vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
+
+ /* We know that:
+
+ (a) the operation can be viewed as:
+
+ TYPE widened0 = (TYPE) UNPROM[0];
+ TYPE widened1 = (TYPE) UNPROM[1];
+ TYPE tmp1 = widened0 + widened1 {+ 1};
+ TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
+
+ (b) the first two statements are equivalent to:
+
+ TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
+ TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
+
+ (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
+ where sensible;
+
+ (d) all the operations can be performed correctly at twice the width of
+ NEW_TYPE, due to the nature of the average operation; and
+
+ (e) users of the result of the right shift need only TARGET_PRECISION
+ bits, where TARGET_PRECISION is no more than half of TYPE's
+ precision.
+
+ Under these circumstances, the only situation in which NEW_TYPE
+ could be narrower than TARGET_PRECISION is if widened0, widened1
+ and an addition result are all used more than once. Thus we can
+ treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
+ as "free", whereas widening the result of the average instruction
+ from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
+ therefore better not to go narrower than TARGET_PRECISION. */
+ if (TYPE_PRECISION (new_type) < target_precision)
+ new_type = build_nonstandard_integer_type (target_precision,
+ TYPE_UNSIGNED (new_type));
+
+ /* Check for target support. */
+ tree new_vectype = get_vectype_for_scalar_type (new_type);
+ if (!new_vectype
+ || !direct_internal_fn_supported_p (ifn, new_vectype,
+ OPTIMIZE_FOR_SPEED))
+ return NULL;
+
+ /* The IR requires a valid vector type for the cast result, even though
+ it's likely to be discarded. */
+ *type_out = get_vectype_for_scalar_type (type);
+ if (!*type_out)
+ return NULL;
+
+ /* Generate the IFN_AVG* call. */
+ tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
+ tree new_ops[2];
+ vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
+ unprom, new_vectype);
+ gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
+ new_ops[1]);
+ gimple_call_set_lhs (average_stmt, new_var);
+ gimple_set_location (average_stmt, gimple_location (last_stmt));
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "created pattern stmt: ");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, average_stmt, 0);
+ }
+
+ stmts->safe_push (last_stmt);
+ return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
+}
+
/* Recognize cases in which the input to a cast is wider than its
output, and the input is fed by a widening operation. Fold this
by removing the unnecessary intermediate widening. E.g.:
less comples onex (widen_sum only after dot_prod or sad for example). */
static vect_recog_func vect_vect_recog_func_ptrs[] = {
{ vect_recog_over_widening_pattern, "over_widening" },
+ /* Must come after over_widening, which narrows the shift as much as
+ possible beforehand. */
+ { vect_recog_average_pattern, "average" },
{ vect_recog_cast_forwprop_pattern, "cast_forwprop" },
{ vect_recog_widen_mult_pattern, "widen_mult" },
{ vect_recog_dot_prod_pattern, "dot_prod" },
gcall *stmt;
tree vec_dest;
tree scalar_dest;
- tree op, type;
+ tree op;
tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
tree vectype_out, vectype_in;
if (slp_node)
return true;
- type = TREE_TYPE (scalar_dest);
if (is_pattern_stmt_p (stmt_info))
stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
lhs = gimple_get_lhs (stmt_info->stmt);
- new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
+ new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
set_vinfo_for_stmt (new_stmt, stmt_info);
set_vinfo_for_stmt (stmt_info->stmt, NULL);
STMT_VINFO_STMT (stmt_info) = new_stmt;