loops, do this only for the rectangular loops. Then pick
the loops which reference outer vars in their bound expressions
and the loops which they refer to and for this sub-nest compute
- number of iterations. For triangular loops use Faulhaber's formula
- (TBD.), otherwise as a fallback, compute by iterating the loops.
+ number of iterations. For triangular loops use Faulhaber's formula,
+ otherwise as a fallback, compute by iterating the loops.
If e.g. the sub-nest is
for (I = N11; I COND1 N12; I += STEP1)
for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
into its _looptemp_ temporaries instead.
For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
inclusive), use the count of all those loops together, and either
- find quadratic etc. equation roots (TBD), or as a fallback, do:
+ find quadratic etc. equation roots, or as a fallback, do:
COUNT = 0;
for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
for (tmpj = M21 * tmpi + N21;
if (V cond N2) goto L0; else goto L2;
L2:
- For collapsed loops, given parameters:
- collapse(3)
- for (V1 = N11; V1 cond1 N12; V1 += STEP1)
- for (V2 = N21; V2 cond2 N22; V2 += STEP2)
- for (V3 = N31; V3 cond3 N32; V3 += STEP3)
- BODY;
-
- we generate pseudocode
-
- if (cond3 is <)
- adj = STEP3 - 1;
- else
- adj = STEP3 + 1;
- count3 = (adj + N32 - N31) / STEP3;
- if (cond2 is <)
- adj = STEP2 - 1;
- else
- adj = STEP2 + 1;
- count2 = (adj + N22 - N21) / STEP2;
- if (cond1 is <)
- adj = STEP1 - 1;
- else
- adj = STEP1 + 1;
- count1 = (adj + N12 - N11) / STEP1;
- count = count1 * count2 * count3;
- V = 0;
- V1 = N11;
- V2 = N21;
- V3 = N31;
- goto L1;
- L0:
- BODY;
- V += 1;
- V3 += STEP3;
- V2 += (V3 cond3 N32) ? 0 : STEP2;
- V3 = (V3 cond3 N32) ? V3 : N31;
- V1 += (V2 cond2 N22) ? 0 : STEP1;
- V2 = (V2 cond2 N22) ? V2 : N21;
- L1:
- if (V < count) goto L0; else goto L2;
- L2:
-
- */
+ For collapsed loops, emit the outer loops as scalar
+ and only try to vectorize the innermost loop. */
static void
expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
/* Not needed in SSA form right now. */
gcc_assert (!gimple_in_ssa_p (cfun));
- if (fd->collapse > 1)
+ if (fd->collapse > 1
+ && (gimple_omp_for_combined_into_p (fd->for_stmt)
+ || broken_loop))
{
int first_zero_iter = -1, dummy = -1;
basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
}
- expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
+ tree n2var = NULL_TREE;
+ tree n2v = NULL_TREE;
+ tree *nonrect_bounds = NULL;
if (fd->collapse > 1)
{
- if (gimple_omp_for_combined_into_p (fd->for_stmt))
+ if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
{
+ if (fd->non_rect)
+ {
+ nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
+ memset (nonrect_bounds, 0,
+ sizeof (tree) * (fd->last_nonrect + 1));
+ }
+ expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
+ gcc_assert (entry_bb == gsi_bb (gsi));
+ gcc_assert (fd->for_stmt == gsi_stmt (gsi));
gsi_prev (&gsi);
- expand_omp_for_init_vars (fd, &gsi, counts, NULL, NULL, n1);
- gsi_next (&gsi);
+ entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
+ expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
+ NULL, n1);
+ gsi = gsi_for_stmt (fd->for_stmt);
+ }
+ if (broken_loop)
+ ;
+ else if (gimple_omp_for_combined_into_p (fd->for_stmt))
+ {
+ /* Compute in n2var the limit for the first innermost loop,
+ i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
+ where cnt is how many iterations would the loop have if
+ all further iterations were assigned to the current task. */
+ n2var = create_tmp_var (type);
+ i = fd->collapse - 1;
+ tree itype = TREE_TYPE (fd->loops[i].v);
+ if (POINTER_TYPE_P (itype))
+ itype = signed_type_for (itype);
+ t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
+ ? -1 : 1));
+ t = fold_build2 (PLUS_EXPR, itype,
+ fold_convert (itype, fd->loops[i].step), t);
+ t = fold_build2 (PLUS_EXPR, itype, t,
+ fold_convert (itype, fd->loops[i].n2));
+ if (fd->loops[i].m2)
+ {
+ tree t2 = fold_convert (itype,
+ fd->loops[i - fd->loops[i].outer].v);
+ tree t3 = fold_convert (itype, fd->loops[i].m2);
+ t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
+ t = fold_build2 (PLUS_EXPR, itype, t, t2);
+ }
+ t = fold_build2 (MINUS_EXPR, itype, t,
+ fold_convert (itype, fd->loops[i].v));
+ if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
+ t = fold_build2 (TRUNC_DIV_EXPR, itype,
+ fold_build1 (NEGATE_EXPR, itype, t),
+ fold_build1 (NEGATE_EXPR, itype,
+ fold_convert (itype,
+ fd->loops[i].step)));
+ else
+ t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
+ fold_convert (itype, fd->loops[i].step));
+ t = fold_convert (type, t);
+ tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
+ t = fold_build2 (MIN_EXPR, type, t2, t);
+ t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
+ expand_omp_build_assign (&gsi, n2var, t);
}
else
- for (i = 0; i < fd->collapse; i++)
- {
- tree itype = TREE_TYPE (fd->loops[i].v);
- if (POINTER_TYPE_P (itype))
- itype = signed_type_for (itype);
- t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
- expand_omp_build_assign (&gsi, fd->loops[i].v, t);
- }
+ {
+ if (TREE_CODE (n2) == INTEGER_CST)
+ {
+ /* Indicate for lastprivate handling that at least one iteration
+ has been performed, without wasting runtime. */
+ if (integer_nonzerop (n2))
+ expand_omp_build_assign (&gsi, fd->loop.v,
+ fold_convert (type, n2));
+ else
+ /* Indicate that no iteration has been performed. */
+ expand_omp_build_assign (&gsi, fd->loop.v,
+ build_one_cst (type));
+ }
+ else
+ {
+ expand_omp_build_assign (&gsi, fd->loop.v,
+ build_zero_cst (type));
+ expand_omp_build_assign (&gsi, n2, build_one_cst (type));
+ }
+ for (i = 0; i < fd->collapse; i++)
+ {
+ t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
+ if (fd->loops[i].m1)
+ {
+ tree t2
+ = fold_convert (TREE_TYPE (t),
+ fd->loops[i - fd->loops[i].outer].v);
+ tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
+ t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
+ t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
+ }
+ expand_omp_build_assign (&gsi, fd->loops[i].v, t);
+ /* For normal non-combined collapsed loops just initialize
+ the outermost iterator in the entry_bb. */
+ if (!broken_loop)
+ break;
+ }
+ }
}
+ else
+ expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
if (cond_var)
{
if (POINTER_TYPE_P (type)
stmt = gsi_stmt (gsi);
gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
- if (POINTER_TYPE_P (type))
- t = fold_build_pointer_plus (fd->loop.v, step);
- else
- t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
- expand_omp_build_assign (&gsi, fd->loop.v, t);
+ if (fd->collapse == 1
+ || gimple_omp_for_combined_into_p (fd->for_stmt))
+ {
+ if (POINTER_TYPE_P (type))
+ t = fold_build_pointer_plus (fd->loop.v, step);
+ else
+ t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
+ expand_omp_build_assign (&gsi, fd->loop.v, t);
+ }
+ else if (TREE_CODE (n2) != INTEGER_CST)
+ expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
if (fd->collapse > 1)
{
fd->loops[i].v, t);
}
expand_omp_build_assign (&gsi, fd->loops[i].v, t);
-
- for (i = fd->collapse - 1; i > 0; i--)
- {
- tree itype = TREE_TYPE (fd->loops[i].v);
- tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
- if (POINTER_TYPE_P (itype2))
- itype2 = signed_type_for (itype2);
- t = fold_convert (itype2, fd->loops[i - 1].step);
- t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
- GSI_SAME_STMT);
- t = build3 (COND_EXPR, itype2,
- build2 (fd->loops[i].cond_code, boolean_type_node,
- fd->loops[i].v,
- fold_convert (itype, fd->loops[i].n2)),
- build_int_cst (itype2, 0), t);
- if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
- t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
- else
- t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
- expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
-
- t = fold_convert (itype, fd->loops[i].n1);
- t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
- GSI_SAME_STMT);
- t = build3 (COND_EXPR, itype,
- build2 (fd->loops[i].cond_code, boolean_type_node,
- fd->loops[i].v,
- fold_convert (itype, fd->loops[i].n2)),
- fd->loops[i].v, t);
- expand_omp_build_assign (&gsi, fd->loops[i].v, t);
- }
}
if (cond_var)
{
/* Emit the condition in L1_BB. */
gsi = gsi_start_bb (l1_bb);
- t = fold_convert (type, n2);
- t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
- false, GSI_CONTINUE_LINKING);
- tree v = fd->loop.v;
- if (DECL_P (v) && TREE_ADDRESSABLE (v))
- v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
- false, GSI_CONTINUE_LINKING);
- t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
+ if (fd->collapse > 1
+ && !gimple_omp_for_combined_into_p (fd->for_stmt)
+ && !broken_loop)
+ {
+ i = fd->collapse - 1;
+ tree itype = TREE_TYPE (fd->loops[i].v);
+ if (fd->loops[i].m2)
+ t = n2v = create_tmp_var (itype);
+ else
+ t = fold_convert (itype, fd->loops[i].n2);
+ t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
+ false, GSI_CONTINUE_LINKING);
+ tree v = fd->loops[i].v;
+ if (DECL_P (v) && TREE_ADDRESSABLE (v))
+ v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
+ false, GSI_CONTINUE_LINKING);
+ t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
+ }
+ else
+ {
+ if (fd->collapse > 1 && !broken_loop)
+ t = n2var;
+ else
+ t = fold_convert (type, n2);
+ t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
+ false, GSI_CONTINUE_LINKING);
+ tree v = fd->loop.v;
+ if (DECL_P (v) && TREE_ADDRESSABLE (v))
+ v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
+ false, GSI_CONTINUE_LINKING);
+ t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
+ }
cond_stmt = gimple_build_cond_empty (t);
gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
FALLTHRU_EDGE (entry_bb)->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
- BRANCH_EDGE (entry_bb)->probability
+ BRANCH_EDGE (entry_bb)->probability
= FALLTHRU_EDGE (entry_bb)->probability.invert ();
l2_dom_bb = entry_bb;
}
set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
+ if (!broken_loop && fd->collapse > 1)
+ {
+ basic_block last_bb = l1_bb;
+ basic_block init_bb = NULL;
+ for (i = fd->collapse - 2; i >= 0; i--)
+ {
+ tree nextn2v = NULL_TREE;
+ if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
+ e = EDGE_SUCC (last_bb, 0);
+ else
+ e = EDGE_SUCC (last_bb, 1);
+ basic_block bb = split_edge (e);
+ if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
+ {
+ t = fold_convert (sizetype, fd->loops[i].step);
+ t = fold_build_pointer_plus (fd->loops[i].v, t);
+ }
+ else
+ {
+ t = fold_convert (TREE_TYPE (fd->loops[i].v),
+ fd->loops[i].step);
+ t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
+ fd->loops[i].v, t);
+ }
+ gsi = gsi_after_labels (bb);
+ expand_omp_build_assign (&gsi, fd->loops[i].v, t);
+
+ bb = split_block (bb, last_stmt (bb))->dest;
+ gsi = gsi_start_bb (bb);
+ tree itype = TREE_TYPE (fd->loops[i].v);
+ if (fd->loops[i].m2)
+ t = nextn2v = create_tmp_var (itype);
+ else
+ t = fold_convert (itype, fd->loops[i].n2);
+ t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
+ false, GSI_CONTINUE_LINKING);
+ tree v = fd->loops[i].v;
+ if (DECL_P (v) && TREE_ADDRESSABLE (v))
+ v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
+ false, GSI_CONTINUE_LINKING);
+ t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
+ cond_stmt = gimple_build_cond_empty (t);
+ gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
+ if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
+ expand_omp_regimplify_p, NULL, NULL)
+ || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
+ expand_omp_regimplify_p, NULL, NULL))
+ {
+ gsi = gsi_for_stmt (cond_stmt);
+ gimple_regimplify_operands (cond_stmt, &gsi);
+ }
+ ne = single_succ_edge (bb);
+ ne->flags = EDGE_FALSE_VALUE;
+
+ init_bb = create_empty_bb (bb);
+ set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
+ add_bb_to_loop (init_bb, bb->loop_father);
+ e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
+ e->probability
+ = profile_probability::guessed_always ().apply_scale (7, 8);
+ ne->probability = e->probability.invert ();
+
+ gsi = gsi_after_labels (init_bb);
+ t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
+ fd->loops[i + 1].n1);
+ if (fd->loops[i + 1].m1)
+ {
+ tree t2 = fold_convert (TREE_TYPE (t),
+ fd->loops[i + 1
+ - fd->loops[i + 1].outer].v);
+ tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
+ t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
+ t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
+ }
+ expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
+ if (fd->loops[i + 1].m2)
+ {
+ if (i + 2 == fd->collapse && n2var)
+ {
+ gcc_assert (n2v == NULL_TREE);
+ n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
+ }
+ t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
+ fd->loops[i + 1].n2);
+ tree t2 = fold_convert (TREE_TYPE (t),
+ fd->loops[i + 1
+ - fd->loops[i + 1].outer].v);
+ tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
+ t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
+ t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
+ expand_omp_build_assign (&gsi, n2v, t);
+ }
+ if (i + 2 == fd->collapse && n2var)
+ {
+ /* For composite simd, n2 is the first iteration the current
+ task shouldn't already handle, so we effectively want to use
+ for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
+ as the vectorized loop. Except the vectorizer will not
+ vectorize that, so instead compute N2VAR as
+ N2VAR = V + MIN (N2 - V, COUNTS3) and use
+ for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
+ as the loop to vectorize. */
+ tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
+ if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
+ {
+ t = build_int_cst (itype, (fd->loops[i + 1].cond_code
+ == LT_EXPR ? -1 : 1));
+ t = fold_build2 (PLUS_EXPR, itype,
+ fold_convert (itype,
+ fd->loops[i + 1].step), t);
+ if (fd->loops[i + 1].m2)
+ t = fold_build2 (PLUS_EXPR, itype, t, n2v);
+ else
+ t = fold_build2 (PLUS_EXPR, itype, t,
+ fold_convert (itype,
+ fd->loops[i + 1].n2));
+ t = fold_build2 (MINUS_EXPR, itype, t,
+ fold_convert (itype, fd->loops[i + 1].v));
+ tree step = fold_convert (itype, fd->loops[i + 1].step);
+ if (TYPE_UNSIGNED (itype)
+ && fd->loops[i + 1].cond_code == GT_EXPR)
+ t = fold_build2 (TRUNC_DIV_EXPR, itype,
+ fold_build1 (NEGATE_EXPR, itype, t),
+ fold_build1 (NEGATE_EXPR, itype, step));
+ else
+ t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
+ t = fold_convert (type, t);
+ }
+ else
+ t = counts[i + 1];
+ t = fold_build2 (MIN_EXPR, type, t2, t);
+ t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
+ expand_omp_build_assign (&gsi, n2var, t);
+ }
+ n2v = nextn2v;
+
+ make_edge (init_bb, last_bb, EDGE_FALLTHRU);
+ if (!gimple_omp_for_combined_into_p (fd->for_stmt))
+ {
+ e = find_edge (entry_bb, last_bb);
+ redirect_edge_succ (e, bb);
+ set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
+ set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
+ }
+
+ last_bb = bb;
+ }
+ }
if (!broken_loop)
{
class loop *loop = alloc_loop ();
loops_state_set (LOOPS_NEED_FIXUP);
if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
- {
- if (fd.non_rect)
- sorry_at (gimple_location (fd.for_stmt),
- "non-rectangular %<simd%> not supported yet");
- expand_omp_simd (region, &fd);
- }
+ expand_omp_simd (region, &fd);
else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
{
gcc_assert (!inner_stmt && !fd.non_rect);
--- /dev/null
+/* { dg-additional-options "-fopenmp-simd -fno-tree-vectorize" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump "vectorized \(\[4-9]\|1\[0-2]\) loops" "vect" { target i?86-*-* x86_64-*-* } } } */
+
+#include "tree-vect.h"
+
+int x, i, j;
+volatile int a, b, c, d, e, f, g, h;
+int k[11][101];
+
+__attribute__((noipa)) void
+doit (void)
+{
+ int niters, err = 0;
+ for (i = 1; i <= 10; i++)
+ for (j = 1; j <= 10 * i; j++)
+ {
+ k[i][j] = 1;
+ asm volatile ("" : : : "memory");
+ }
+ a = 1; b = 11; c = 1; d = 0; e = 1; f = 10; g = 1; h = 1;
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = 1; i <= 10; i++)
+ for (j = 1; j <= 10 * i; j++)
+ {
+ err |= (i < 1);
+ err |= (i > 10);
+ err |= (j < 1);
+ err |= (j > 10 * i);
+ err |= (k[i][j] != 1);
+ k[i][j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != 11 || j != 101 || x != 10340 || niters != 550 || err)
+ abort ();
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = a; i < b; i += c)
+ for (j = d * i + e; j < g + i * f; j += h)
+ {
+ err |= (i < 1);
+ err |= (i > 10);
+ err |= (j < 1);
+ err |= (j > 10 * i);
+ err |= (k[i][j] != 2);
+ k[i][j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != 11 || j != 101 || x != 10340 || niters != 550 || err)
+ abort ();
+ for (i = 1; i <= 10; i++)
+ for (j = 1; j <= 10 * i; j++)
+ if (k[i][j] == 3)
+ k[i][j] = 0;
+ else
+ abort ();
+ for (i = 0; i < 11; i++)
+ for (j = 0; j < 101; j++)
+ if (k[i][j] != 0)
+ abort ();
+ for (i = 0; i < 10; i++)
+ for (j = 0; j < 10 * i; j++)
+ {
+ k[i][j] = 1;
+ asm volatile ("" : : : "memory");
+ }
+ a = 0; b = 10; c = 1; d = 0; e = 0; f = 10; g = 0; h = 1;
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = 0; i < 10; i++)
+ for (j = 0; j < 10 * i; j++)
+ {
+ err |= (i < 0);
+ err |= (i >= 10);
+ err |= (j < 0);
+ err |= (j >= 10 * i);
+ err |= (k[i][j] != 1);
+ k[i][j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != 10 || j != 90 || x != 9305 || niters != 450 || err)
+ abort ();
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = a; i < b; i += c)
+ for (j = d * i + e; j < g + i * f; j += h)
+ {
+ err |= (i < 0);
+ err |= (i >= 10);
+ err |= (j < 0);
+ err |= (j >= 10 * i);
+ err |= (k[i][j] != 2);
+ k[i][j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != 10 || j != 90 || x != 9305 || niters != 450 || err)
+ abort ();
+ for (i = 0; i < 10; i++)
+ for (j = 0; j < 10 * i; j++)
+ if (k[i][j] == 3)
+ k[i][j] = 0;
+ else
+ abort ();
+ for (i = 0; i < 11; i++)
+ for (j = 0; j < 101; j++)
+ if (k[i][j] != 0)
+ abort ();
+ for (i = 4; i < 10; i++)
+ for (j = -9 + 2 * i; j < i; j++)
+ {
+ k[i][j + 1] = 1;
+ asm volatile ("" : : : "memory");
+ }
+ a = 4; b = 10; c = 1; d = 2; e = -9; f = 1; g = 0; h = 1;
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = 4; i < 10; i++)
+ for (j = -9 + 2 * i; j < i; j++)
+ {
+ err |= (i < 4);
+ err |= (i >= 10);
+ err |= (j < -9 + 2 * i);
+ err |= (j >= i);
+ err |= (k[i][j + 1] != 1);
+ k[i][j + 1]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (/*i != 10 || j != 9 || */x != 8199 || niters != 15 || err)
+ abort ();
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = a; i < b; i += c)
+ for (j = d * i + e; j < g + i * f; j += h)
+ {
+ err |= (i < 4);
+ err |= (i >= 10);
+ err |= (j < -9 + 2 * i);
+ err |= (j >= i);
+ err |= (k[i][j + 1] != 2);
+ k[i][j + 1]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (/*i != 10 || j != 9 || */x != 8199 || niters != 15 || err)
+ abort ();
+ for (i = 4; i < 10; i++)
+ for (j = -9 + 2 * i; j < i; j++)
+ if (k[i][j + 1] == 3)
+ k[i][j + 1] = 0;
+ else
+ abort ();
+ for (i = 0; i < 11; i++)
+ for (j = 0; j < 101; j++)
+ if (k[i][j] != 0)
+ abort ();
+ for (i = 1; i < 10; i += 2)
+ for (j = 1; j < i + 1; j++)
+ {
+ k[i][j] = 1;
+ asm volatile ("" : : : "memory");
+ }
+ a = 1; b = 10; c = 2; d = 0; e = 1; f = 1; g = 1; h = 1;
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = 1; i < 10; i += 2)
+ for (j = 1; j < i + 1; j++)
+ {
+ err |= (i < 1);
+ err |= (i >= 10);
+ err |= (j < 1);
+ err |= (j >= i + 1);
+ err |= (k[i][j] != 1);
+ k[i][j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != 11 || j != 10 || x != 9225 || niters != 25 || err)
+ abort ();
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = a; i < b; i += c)
+ for (j = d * i + e; j < g + i * f; j += h)
+ {
+ err |= (i < 1);
+ err |= (i >= 10);
+ err |= (j < 1);
+ err |= (j >= i + 1);
+ err |= (k[i][j] != 2);
+ k[i][j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != 11 || j != 10 || x != 9225 || niters != 25 || err)
+ abort ();
+ for (i = 1; i < 10; i += 2)
+ for (j = 1; j < i + 1; j++)
+ if (k[i][j] == 3)
+ k[i][j] = 0;
+ else
+ abort ();
+ for (i = 0; i < 11; i++)
+ for (j = 0; j < 101; j++)
+ if (k[i][j] != 0)
+ abort ();
+ for (j = -11; j >= -41; j -= 15)
+ {
+ k[0][-j] = 1;
+ asm volatile ("" : : : "memory");
+ }
+ a = 4; b = 8; c = 12; d = -8; e = -9; f = -3; g = 6; h = 15;
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = 4; i < 8; i += 12)
+ for (j = -8 * i - 9; j < i * -3 + 6; j += 15)
+ {
+ err |= (i != 4);
+ err |= (j < -41);
+ err |= (j > -11);
+ err |= (k[0][-j] != 1);
+ k[0][-j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != 16 || j != 4 || x != 5109 || niters != 3 || err)
+ abort ();
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = a; i < b; i += c)
+ for (j = d * i + e; j < g + i * f; j += h)
+ {
+ err |= (i != 4);
+ err |= (j < -41);
+ err |= (j > -11);
+ err |= (k[0][-j] != 2);
+ k[0][-j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != 16 || j != 4 || x != 5109 || niters != 3 || err)
+ abort ();
+ for (j = -11; j >= -41; j -= 15)
+ if (k[0][-j] == 3)
+ k[0][-j] = 0;
+ else
+ abort ();
+ for (j = -11; j >= -41; j--)
+ if (k[0][-j] != 0)
+ abort ();
+ for (j = -34; j <= -7; j++)
+ {
+ k[0][-j] = 1;
+ asm volatile ("" : : : "memory");
+ }
+ a = -13; b = 7; c = 12; d = 3; e = 5; f = 0; g = -6; h = 1;
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = -13; i < 7; i += 12)
+ for (j = 3 * i + 5; j < -6; j++)
+ {
+ err |= (i != -13);
+ err |= (j < -34);
+ err |= (j > -7);
+ err |= (k[0][-j] != 1);
+ k[0][-j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (/*i != 11 || j != 2 || */x != -12295 || niters != 28 || err)
+ abort ();
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = a; i < b; i += c)
+ for (j = d * i + e; j < g + i * f; j += h)
+ {
+ err |= (i != -13);
+ err |= (j < -34);
+ err |= (j > -7);
+ err |= (k[0][-j] != 2);
+ k[0][-j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (/*i != 11 || j != 2 || */x != -12295 || niters != 28 || err)
+ abort ();
+ for (j = -34; j <= -7; j++)
+ if (k[0][-j] == 3)
+ k[0][-j] = 0;
+ else
+ abort ();
+}
+
+int
+main ()
+{
+ check_vect ();
+ doit ();
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+int x, i, j;
+volatile int a, b, c, d, e, f, g, h;
+int k[11][101];
+extern void abort (void);
+
+int
+main ()
+{
+ int niters, err = 0;
+ for (i = 1; i <= 10; i++)
+ for (j = 1; j <= 10 * i; j++)
+ {
+ k[i][j] = 1;
+ asm volatile ("" : : : "memory");
+ }
+ a = 1; b = 11; c = 1; d = 0; e = 1; f = 10; g = 1; h = 1;
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = 1; i <= 10; i++)
+ for (j = 1; j <= 10 * i; j++)
+ {
+ err |= (i < 1);
+ err |= (i > 10);
+ err |= (j < 1);
+ err |= (j > 10 * i);
+ err |= (k[i][j] != 1);
+ k[i][j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != 11 || j != 101 || x != 10340 || niters != 550 || err)
+ abort ();
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = a; i < b; i += c)
+ for (j = d * i + e; j < g + i * f; j += h)
+ {
+ err |= (i < 1);
+ err |= (i > 10);
+ err |= (j < 1);
+ err |= (j > 10 * i);
+ err |= (k[i][j] != 2);
+ k[i][j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != 11 || j != 101 || x != 10340 || niters != 550 || err)
+ abort ();
+ for (i = 1; i <= 10; i++)
+ for (j = 1; j <= 10 * i; j++)
+ if (k[i][j] == 3)
+ k[i][j] = 0;
+ else
+ abort ();
+ for (i = 0; i < 11; i++)
+ for (j = 0; j < 101; j++)
+ if (k[i][j] != 0)
+ abort ();
+ for (i = 0; i < 10; i++)
+ for (j = 0; j < 10 * i; j++)
+ {
+ k[i][j] = 1;
+ asm volatile ("" : : : "memory");
+ }
+ a = 0; b = 10; c = 1; d = 0; e = 0; f = 10; g = 0; h = 1;
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = 0; i < 10; i++)
+ for (j = 0; j < 10 * i; j++)
+ {
+ err |= (i < 0);
+ err |= (i >= 10);
+ err |= (j < 0);
+ err |= (j >= 10 * i);
+ err |= (k[i][j] != 1);
+ k[i][j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != 10 || j != 90 || x != 9305 || niters != 450 || err)
+ abort ();
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = a; i < b; i += c)
+ for (j = d * i + e; j < g + i * f; j += h)
+ {
+ err |= (i < 0);
+ err |= (i >= 10);
+ err |= (j < 0);
+ err |= (j >= 10 * i);
+ err |= (k[i][j] != 2);
+ k[i][j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != 10 || j != 90 || x != 9305 || niters != 450 || err)
+ abort ();
+ for (i = 0; i < 10; i++)
+ for (j = 0; j < 10 * i; j++)
+ if (k[i][j] == 3)
+ k[i][j] = 0;
+ else
+ abort ();
+ for (i = 0; i < 11; i++)
+ for (j = 0; j < 101; j++)
+ if (k[i][j] != 0)
+ abort ();
+ for (i = 4; i < 10; i++)
+ for (j = -9 + 2 * i; j < i; j++)
+ {
+ k[i][j + 1] = 1;
+ asm volatile ("" : : : "memory");
+ }
+ a = 4; b = 10; c = 1; d = 2; e = -9; f = 1; g = 0; h = 1;
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = 4; i < 10; i++)
+ for (j = -9 + 2 * i; j < i; j++)
+ {
+ err |= (i < 4);
+ err |= (i >= 10);
+ err |= (j < -9 + 2 * i);
+ err |= (j >= i);
+ err |= (k[i][j + 1] != 1);
+ k[i][j + 1]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (/*i != 10 || j != 9 || */x != 8199 || niters != 15 || err)
+ abort ();
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = a; i < b; i += c)
+ for (j = d * i + e; j < g + i * f; j += h)
+ {
+ err |= (i < 4);
+ err |= (i >= 10);
+ err |= (j < -9 + 2 * i);
+ err |= (j >= i);
+ err |= (k[i][j + 1] != 2);
+ k[i][j + 1]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (/*i != 10 || j != 9 || */x != 8199 || niters != 15 || err)
+ abort ();
+ for (i = 4; i < 10; i++)
+ for (j = -9 + 2 * i; j < i; j++)
+ if (k[i][j + 1] == 3)
+ k[i][j + 1] = 0;
+ else
+ abort ();
+ for (i = 0; i < 11; i++)
+ for (j = 0; j < 101; j++)
+ if (k[i][j] != 0)
+ abort ();
+ for (i = 1; i < 10; i += 2)
+ for (j = 1; j < i + 1; j++)
+ {
+ k[i][j] = 1;
+ asm volatile ("" : : : "memory");
+ }
+ a = 1; b = 10; c = 2; d = 0; e = 1; f = 1; g = 1; h = 1;
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = 1; i < 10; i += 2)
+ for (j = 1; j < i + 1; j++)
+ {
+ err |= (i < 1);
+ err |= (i >= 10);
+ err |= (j < 1);
+ err |= (j >= i + 1);
+ err |= (k[i][j] != 1);
+ k[i][j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != 11 || j != 10 || x != 9225 || niters != 25 || err)
+ abort ();
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = a; i < b; i += c)
+ for (j = d * i + e; j < g + i * f; j += h)
+ {
+ err |= (i < 1);
+ err |= (i >= 10);
+ err |= (j < 1);
+ err |= (j >= i + 1);
+ err |= (k[i][j] != 2);
+ k[i][j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != 11 || j != 10 || x != 9225 || niters != 25 || err)
+ abort ();
+ for (i = 1; i < 10; i += 2)
+ for (j = 1; j < i + 1; j++)
+ if (k[i][j] == 3)
+ k[i][j] = 0;
+ else
+ abort ();
+ for (i = 0; i < 11; i++)
+ for (j = 0; j < 101; j++)
+ if (k[i][j] != 0)
+ abort ();
+ for (j = -11; j >= -41; j -= 15)
+ {
+ k[0][-j] = 1;
+ asm volatile ("" : : : "memory");
+ }
+ a = 4; b = 8; c = 12; d = -8; e = -9; f = -3; g = 6; h = 15;
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = 4; i < 8; i += 12)
+ for (j = -8 * i - 9; j < i * -3 + 6; j += 15)
+ {
+ err |= (i != 4);
+ err |= (j < -41);
+ err |= (j > -11);
+ err |= (k[0][-j] != 1);
+ k[0][-j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != 16 || j != 4 || x != 5109 || niters != 3 || err)
+ abort ();
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = a; i < b; i += c)
+ for (j = d * i + e; j < g + i * f; j += h)
+ {
+ err |= (i != 4);
+ err |= (j < -41);
+ err |= (j > -11);
+ err |= (k[0][-j] != 2);
+ k[0][-j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != 16 || j != 4 || x != 5109 || niters != 3 || err)
+ abort ();
+ for (j = -11; j >= -41; j -= 15)
+ if (k[0][-j] == 3)
+ k[0][-j] = 0;
+ else
+ abort ();
+ for (j = -11; j >= -41; j--)
+ if (k[0][-j] != 0)
+ abort ();
+ for (j = -34; j <= -7; j++)
+ {
+ k[0][-j] = 1;
+ asm volatile ("" : : : "memory");
+ }
+ a = -13; b = 7; c = 12; d = 3; e = 5; f = 0; g = -6; h = 1;
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = -13; i < 7; i += 12)
+ for (j = 3 * i + 5; j < -6; j++)
+ {
+ err |= (i != -13);
+ err |= (j < -34);
+ err |= (j > -7);
+ err |= (k[0][-j] != 1);
+ k[0][-j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (/*i != 11 || j != 2 || */x != -12295 || niters != 28 || err)
+ abort ();
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err)
+ for (i = a; i < b; i += c)
+ for (j = d * i + e; j < g + i * f; j += h)
+ {
+ err |= (i != -13);
+ err |= (j < -34);
+ err |= (j > -7);
+ err |= (k[0][-j] != 2);
+ k[0][-j]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (/*i != 11 || j != 2 || */x != -12295 || niters != 28 || err)
+ abort ();
+ for (j = -34; j <= -7; j++)
+ if (k[0][-j] == 3)
+ k[0][-j] = 0;
+ else
+ abort ();
+ return 0;
+}