--- /dev/null
+/* { dg-require-effective-target vect_usad_char } */
+
+#include "tree-vect.h"
+
+typedef unsigned int uint32_t;
+typedef unsigned short uint16_t;
+typedef unsigned char uint8_t;
+
+extern int abs (int);
+extern void abort (void);
+
+int __attribute__((noinline,noclone))
+foo (uint8_t *pix1, uint8_t *pix2, int i_stride_pix2)
+{
+ int i_sum = 0;
+ for( int y = 0; y < 16; y++ )
+ {
+ i_sum += abs ( pix1[0] - pix2[0] );
+ i_sum += abs ( pix1[1] - pix2[1] );
+ i_sum += abs ( pix1[2] - pix2[2] );
+ i_sum += abs ( pix1[3] - pix2[3] );
+ i_sum += abs ( pix1[4] - pix2[4] );
+ i_sum += abs ( pix1[5] - pix2[5] );
+ i_sum += abs ( pix1[6] - pix2[6] );
+ i_sum += abs ( pix1[7] - pix2[7] );
+ i_sum += abs ( pix1[8] - pix2[8] );
+ i_sum += abs ( pix1[9] - pix2[9] );
+ i_sum += abs ( pix1[10] - pix2[10] );
+ i_sum += abs ( pix1[11] - pix2[11] );
+ i_sum += abs ( pix1[12] - pix2[12] );
+ i_sum += abs ( pix1[13] - pix2[13] );
+ i_sum += abs ( pix1[14] - pix2[14] );
+ i_sum += abs ( pix1[15] - pix2[15] );
+ pix1 += 16;
+ pix2 += i_stride_pix2;
+ }
+ return i_sum;
+}
+
+int
+main ()
+{
+ check_vect ();
+
+ uint8_t X[16*16];
+ uint8_t Y[16*16];
+
+ for (int i = 0; i < 16*16; ++i)
+ {
+ X[i] = i;
+ Y[i] = 16*16 - i;
+ __asm__ volatile ("");
+ }
+
+ if (foo (X, Y, 16) != 32512)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "vect_recog_sad_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
}
+/* Transfer group and reduction information from STMT to its pattern stmt. */
+
+static void
+vect_fixup_reduc_chain (gimple stmt)
+{
+ gimple firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
+ gimple stmtp;
+ gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp))
+ && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
+ GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt));
+ do
+ {
+ stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
+ GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp;
+ stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
+ if (stmt)
+ GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp))
+ = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
+ }
+ while (stmt);
+ STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def;
+}
+
+/* Fixup scalar cycles that now have their stmts detected as patterns. */
+
+static void
+vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
+{
+ gimple first;
+ unsigned i;
+
+ FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
+ if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first)))
+ {
+ vect_fixup_reduc_chain (first);
+ LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
+ = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first));
+ }
+}
/* Function vect_get_loop_niters.
vect_pattern_recog (loop_vinfo, NULL);
+ vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
+
/* Analyze the access patterns of the data-refs in the loop (consecutive,
complex, etc.). FORNOW: Only handle consecutive access pattern. */
exit phi node. */
if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
{
- scalar_dest = gimple_assign_lhs (
- SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]);
+ gimple dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
+ /* Handle reduction patterns. */
+ if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)))
+ dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt));
+
+ scalar_dest = gimple_assign_lhs (dest_stmt);
group_size = 1;
}
auto_vec<gimple> phis;
int vec_num;
tree def0, def1, tem, op0, op1 = NULL_TREE;
+ bool first_p = true;
/* In case of reduction chain we switch to the first stmt in the chain, but
we don't update STMT_INFO, since only the last stmt is marked as reduction
and has reduction properties. */
- if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
- stmt = GROUP_FIRST_ELEMENT (stmt_info);
+ if (GROUP_FIRST_ELEMENT (stmt_info)
+ && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
+ {
+ stmt = GROUP_FIRST_ELEMENT (stmt_info);
+ first_p = false;
+ }
if (nested_in_vect_loop_p (loop, stmt))
{
return false;
/* Make sure it was already recognized as a reduction computation. */
- if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
- && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
+ if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def
+ && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle)
return false;
/* 2. Has this been recognized as a reduction pattern?
the STMT_VINFO_RELATED_STMT field records the last stmt in
the original sequence that constitutes the pattern. */
- orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+ orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
if (orig_stmt)
{
orig_stmt_info = vinfo_for_stmt (orig_stmt);
return false;
}
+ gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
+ !nested_cycle, &dummy);
if (orig_stmt)
- gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
- reduc_def_stmt,
- !nested_cycle,
- &dummy));
+ gcc_assert (tmp == orig_stmt
+ || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
else
- {
- gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
- !nested_cycle, &dummy);
- /* We changed STMT to be the first stmt in reduction chain, hence we
- check that in this case the first element in the chain is STMT. */
- gcc_assert (stmt == tmp
- || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
- }
+ /* We changed STMT to be the first stmt in reduction chain, hence we
+ check that in this case the first element in the chain is STMT. */
+ gcc_assert (stmt == tmp
+ || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
return false;
if (!vec_stmt) /* transformation not required. */
{
- if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies,
- reduc_index))
+ if (first_p
+ && !vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies,
+ reduc_index))
return false;
STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
return true;
prev_stmt_info = NULL;
prev_phi_info = NULL;
if (slp_node)
- {
- vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out)
- == TYPE_VECTOR_SUBPARTS (vectype_in));
- }
+ vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
else
{
vec_num = 1;
scalar_stmts.safe_push (next);
next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
}
+ /* Mark the first element of the reduction chain as reduction to properly
+ transform the node. In the reduction analysis phase only the last
+ element of the chain is marked as reduction. */
+ if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
+ STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
}
else
{
(s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
{s5, s6, s7, s8}. */
- number_of_copies = least_common_multiple (nunits, group_size) / group_size;
+ number_of_copies = nunits * number_of_vectors / group_size;
number_of_places_left_in_vector = nunits;
elts = XALLOCAVEC (tree, nunits);
for the scalar stmts in each node of the SLP tree. Number of vector
elements in one vector iteration is the number of scalar elements in
one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
- size. */
- vec_stmts_size = (vectorization_factor * group_size) / nunits;
+ size.
+ Unless this is a SLP reduction in which case the number of vector
+ stmts is equal to the number of vector stmts of the children. */
+ if (GROUP_FIRST_ELEMENT (stmt_info)
+ && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
+ vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
+ else
+ vec_stmts_size = (vectorization_factor * group_size) / nunits;
if (!SLP_TREE_VEC_STMTS (node).exists ())
{