+2015-07-17 Yuri Rumyantsev <ysrumyan@gmail.com>
+
+ * tree-vect-loop-manip.c (rename_variables_in_bb): Add argument
+ to allow renaming of PHI arguments on edges incoming from outer
+ loop header, add corresponding check before start PHI iterator.
+ (slpeel_tree_duplicate_loop_to_edge_cfg): Introduce new bool
+ variable DUPLICATE_OUTER_LOOP and set it to true for outer loops
+ with true force_vectorize. Set-up dominator for outer loop too.
+ Pass DUPLICATE_OUTER_LOOP as argument to rename_variables_in_bb.
+ (slpeel_can_duplicate_loop_p): Allow duplicate of outer loop if it
+ was marked with force_vectorize and has restricted cfg.
+ (slpeel_tree_peel_loop_to_edge): Do not rename exit PHI uses in
+ inner loop.
+ * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not
+ do peeling for outer loops.
+
2015-07-17 Yvan Roux <yvan.roux@linaro.org>
Matthias Klose <doko@ubuntu.com>
--- /dev/null
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd -ffast-math" } */
+#include <stdlib.h>
+#include "tree-vect.h"
+#define N 64
+
+float *px, *py;
+float *tx, *ty;
+float *x1, *z1, *t1, *t2;
+
+static void inline bar (const float cx, float cy,
+ float *vx, float *vy)
+{
+ int j;
+ for (j = 0; j < N; ++j)
+ {
+ const float dx = cx - px[j];
+ const float dy = cy - py[j];
+ *vx -= dx * tx[j];
+ *vy -= dy * ty[j];
+ }
+}
+
+__attribute__((noinline, noclone)) void foo1 (int n)
+{
+ int i;
+#pragma omp simd
+ for (i=0; i<n; i++)
+ bar (px[i], py[i], x1+i, z1+i);
+}
+
+__attribute__((noinline, noclone)) void foo2 (int n)
+{
+ volatile int i;
+ for (i=0; i<n; i++)
+ bar (px[i], py[i], x1+i, z1+i);
+}
+
+
+int main ()
+{
+ float *X = (float*)malloc (N * 8 * sizeof (float));
+ int i;
+ int n = N - 1;
+ check_vect ();
+ px = &X[0];
+ py = &X[N * 1];
+ tx = &X[N * 2];
+ ty = &X[N * 3];
+ x1 = &X[N * 4];
+ z1 = &X[N * 5];
+ t1 = &X[N * 6];
+ t2 = &X[N * 7];
+
+ for (i=0; i<N; i++)
+ {
+ px[i] = (float) (i+2);
+ tx[i] = (float) (i+1);
+ py[i] = (float) (i+4);
+ ty[i] = (float) (i+3);
+ x1[i] = z1[i] = 1.0f;
+ }
+ foo1 (n); /* vector variant. */
+ for (i=0; i<N;i++)
+ {
+ t1[i] = x1[i]; x1[i] = 1.0f;
+ t2[i] = z1[i]; z1[i] = 1.0f;
+ }
+ foo2 (n); /* scalar variant. */
+ for (i=0; i<N; i++)
+ if (x1[i] != t1[i] || z1[i] != t2[i])
+ abort ();
+ return 0;
+}
+/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" } } */
}
-/* Renames the variables in basic block BB. */
+/* Renames the variables in basic block BB. Allow renaming of PHI argumnets
+ on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
+ true. */
static void
-rename_variables_in_bb (basic_block bb)
+rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
{
gimple stmt;
use_operand_p use_p;
edge e;
edge_iterator ei;
struct loop *loop = bb->loop_father;
+ struct loop *outer_loop = NULL;
+
+ if (rename_from_outer_loop)
+ {
+ gcc_assert (loop);
+ outer_loop = loop_outer (loop);
+ }
for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
gsi_next (&gsi))
FOR_EACH_EDGE (e, ei, bb->preds)
{
- if (!flow_bb_inside_loop_p (loop, e->src))
+ if (!flow_bb_inside_loop_p (loop, e->src)
+ && (!rename_from_outer_loop || e->src != outer_loop->header))
continue;
for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
gsi_next (&gsi))
bool was_imm_dom;
basic_block exit_dest;
edge exit, new_exit;
+ bool duplicate_outer_loop = false;
exit = single_exit (loop);
at_exit = (e == exit);
bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes);
-
+ /* Allow duplication of outer loops. */
+ if (scalar_loop->inner)
+ duplicate_outer_loop = true;
/* Check whether duplication is possible. */
if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes))
{
redirect_edge_and_branch_force (e, new_preheader);
flush_pending_stmts (e);
set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
- if (was_imm_dom)
+ if (was_imm_dom || duplicate_outer_loop)
set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
/* And remove the non-necessary forwarder again. Keep the other
}
for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
- rename_variables_in_bb (new_bbs[i]);
+ rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
if (scalar_loop != loop)
{
/* This function verifies that the following restrictions apply to LOOP:
- (1) it is innermost
- (2) it consists of exactly 2 basic blocks - header, and an empty latch.
- (3) it is single entry, single exit
- (4) its exit condition is the last stmt in the header
- (5) E is the entry/exit edge of LOOP.
+ (1) it consists of exactly 2 basic blocks - header, and an empty latch
+ for innermost loop and 5 basic blocks for outer-loop.
+ (2) it is single entry, single exit
+ (3) its exit condition is the last stmt in the header
+ (4) E is the entry/exit edge of LOOP.
*/
bool
edge entry_e = loop_preheader_edge (loop);
gcond *orig_cond = get_loop_exit_condition (loop);
gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
+ unsigned int num_bb = loop->inner? 5 : 2;
- if (loop->inner
/* All loops have an outer scope; the only case loop->outer is NULL is for
the function itself. */
- || !loop_outer (loop)
- || loop->num_nodes != 2
+ if (!loop_outer (loop)
+ || loop->num_nodes != num_bb
|| !empty_block_p (loop->latch)
|| !single_exit (loop)
/* Verify that new loop exit condition can be trivially modified. */
int bound1, int bound2)
{
struct loop *new_loop = NULL, *first_loop, *second_loop;
+ struct loop *inner_loop = NULL;
edge skip_e;
tree pre_condition = NULL_TREE;
basic_block bb_before_second_loop, bb_after_second_loop;
if (!slpeel_can_duplicate_loop_p (loop, e))
return NULL;
+ if (loop->inner)
+ inner_loop = loop->inner;
+
/* We might have a queued need to update virtual SSA form. As we
delete the update SSA machinery below after doing a regular
incremental SSA update during loop copying make sure we don't
add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
gimple_phi_set_result (new_phi, new_vop);
FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
- if (stmt != new_phi && gimple_bb (stmt) != loop->header)
+ if (stmt != new_phi && gimple_bb (stmt) != loop->header
+ /* Do not rename PHI arguments in inner-loop. */
+ && (!inner_loop || gimple_bb (stmt) != inner_loop->header))
FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
SET_USE (use_p, new_vop);
}