--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_double } */
+
+double a[2];
+
+void __GIMPLE (ssa,startwith ("fix_loops"))
+foo (double x)
+{
+ double tem2;
+ double tem1;
+ double _1;
+ double _2;
+ double _3;
+ double _4;
+
+ __BB(2):
+ _1 = a[0];
+ _2 = x_6(D) * 3.0e+0;
+ tem1_7 = _1 + _2;
+ _3 = x_6(D) + 1.0e+0;
+ _4 = a[1];
+ tem2_8 = _4 + _3;
+ a[0] = tem1_7;
+ a[1] = tem2_8;
+ return;
+}
+
+void __GIMPLE (ssa,startwith ("fix_loops"))
+bar (double x)
+{
+ double tem2;
+ double tem1;
+ double _1;
+ double _2;
+ double _3;
+ double _4;
+
+ __BB(2):
+ _1 = a[0];
+ _2 = x_6(D) * 3.0e+0;
+ tem1_7 = _1 + _2;
+ _3 = x_6(D) + 1.0e+0;
+ _4 = a[1];
+ /* Once with operands swapped. */
+ tem2_8 = _3 + _4;
+ a[0] = tem1_7;
+ a[1] = tem2_8;
+ return;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block part vectorized" 2 "slp2" } } */
+/* We want to vectorize as { a[0], a[1] } + { x*3, x+1 } and thus
+ elide one add in each function. */
+/* { dg-final { scan-tree-dump-times " \\+ " 4 "optimized" } } */
continue;
}
- /* If the SLP build failed fatally and we analyze a basic-block
- simply treat nodes we fail to build as externally defined
- (and thus build vectors from the scalar defs).
- The cost model will reject outright expensive cases.
- ??? This doesn't treat cases where permutation ultimatively
- fails (or we don't try permutation below). Ideally we'd
- even compute a permutation that will end up with the maximum
- SLP tree size... */
- if (is_a <bb_vec_info> (vinfo)
- && !matches[0]
- /* ??? Rejecting patterns this way doesn't work. We'd have to
- do extra work to cancel the pattern so the uses see the
- scalar version. */
- && !is_pattern_stmt_p (stmt_info)
- && !oprnd_info->any_pattern)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Building vector operands from scalars\n");
- this_tree_size++;
- child = vect_create_new_slp_node (oprnd_info->ops);
- children.safe_push (child);
- oprnd_info->ops = vNULL;
- oprnd_info->def_stmts = vNULL;
- continue;
- }
-
/* If the SLP build for operand zero failed and operand zero
and one can be commutated try that for the scalar stmts
that failed the match. */
children.safe_push (child);
continue;
}
-
+ /* We do not undo the swapping here since it might still be
+ the better order for the second operand in case we build
+ the first one from scalars below. */
++*npermutes;
}
-
fail:
+
+ /* If the SLP build failed and we analyze a basic-block
+ simply treat nodes we fail to build as externally defined
+ (and thus build vectors from the scalar defs).
+ The cost model will reject outright expensive cases.
+ ??? This doesn't treat cases where permutation ultimatively
+ fails (or we don't try permutation below). Ideally we'd
+ even compute a permutation that will end up with the maximum
+ SLP tree size... */
+ if (is_a <bb_vec_info> (vinfo)
+ /* ??? Rejecting patterns this way doesn't work. We'd have to
+ do extra work to cancel the pattern so the uses see the
+ scalar version. */
+ && !is_pattern_stmt_p (stmt_info)
+ && !oprnd_info->any_pattern)
+ {
+ /* But if there's a leading vector sized set of matching stmts
+ fail here so we can split the group. This matches the condition
+ vect_analyze_slp_instance uses. */
+ /* ??? We might want to split here and combine the results to support
+ multiple vector sizes better. */
+ for (j = 0; j < group_size; ++j)
+ if (!matches[j])
+ break;
+ if (!known_ge (j, TYPE_VECTOR_SUBPARTS (vectype)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Building vector operands from scalars\n");
+ this_tree_size++;
+ child = vect_create_new_slp_node (oprnd_info->ops);
+ children.safe_push (child);
+ oprnd_info->ops = vNULL;
+ oprnd_info->def_stmts = vNULL;
+ continue;
+ }
+ }
+
gcc_assert (child == NULL);
FOR_EACH_VEC_ELT (children, j, child)
vect_free_slp_tree (child, false);