&& (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
!= CODE_FOR_nothing))
{
+ tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
tree itype = TREE_TYPE (fd->loops[i].v);
tree min_inner_iterations = fd->min_inner_iterations;
tree factor = fd->factor;
*gsi = gsi_after_labels (e->dest);
t = fold_convert (itype, c);
t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
- t = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t);
+ t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
GSI_CONTINUE_LINKING);
expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
fd->simd_schedule = false;
fd->min_inner_iterations = NULL_TREE;
fd->factor = NULL_TREE;
+ fd->adjn1 = NULL_TREE;
collapse_iter = NULL;
collapse_count = NULL;
continue;
if (single_nonrect == -1
|| (loop->m1 && TREE_CODE (loop->m1) != INTEGER_CST)
- || (loop->m2 && TREE_CODE (loop->m2) != INTEGER_CST))
+ || (loop->m2 && TREE_CODE (loop->m2) != INTEGER_CST)
+ || TREE_CODE (loop->n1) != INTEGER_CST
+ || TREE_CODE (loop->n2) != INTEGER_CST
+ || TREE_CODE (loop->step) != INTEGER_CST)
{
count = NULL_TREE;
continue;
else if (t && t2 && integer_zerop (t) && integer_zerop (t2))
/* No iterations of the inner loop. count will be set to
zero cst below. */;
- else
+ else if (TYPE_UNSIGNED (itype)
+ || t == NULL_TREE
+ || t2 == NULL_TREE
+ || TREE_CODE (t) != INTEGER_CST
+ || TREE_CODE (t2) != INTEGER_CST)
{
/* Punt (for now). */
count = NULL_TREE;
continue;
}
+ else
+ {
+ /* Some iterations of the outer loop have zero iterations
+ of the inner loop, while others have at least one.
+ In this case, we need to adjust one of those outer
+ loop bounds. If ADJ_FIRST, we need to adjust outer n1
+ (first), otherwise outer n2 (last). */
+ bool adj_first = integer_zerop (t);
+ tree n1 = fold_convert (itype, loop->n1);
+ tree n2 = fold_convert (itype, loop->n2);
+ tree m1 = loop->m1 ? fold_convert (itype, loop->m1)
+ : build_zero_cst (itype);
+ tree m2 = loop->m2 ? fold_convert (itype, loop->m2)
+ : build_zero_cst (itype);
+ t = fold_binary (MINUS_EXPR, itype, n1, n2);
+ t2 = fold_binary (MINUS_EXPR, itype, m2, m1);
+ t = fold_binary (TRUNC_DIV_EXPR, itype, t, t2);
+ t2 = fold_binary (MINUS_EXPR, itype, t, first);
+ t2 = fold_binary (TRUNC_MOD_EXPR, itype, t2, ostep);
+ t = fold_binary (MINUS_EXPR, itype, t, t2);
+ tree n1cur
+ = fold_binary (PLUS_EXPR, itype, n1,
+ fold_binary (MULT_EXPR, itype, m1, t));
+ tree n2cur
+ = fold_binary (PLUS_EXPR, itype, n2,
+ fold_binary (MULT_EXPR, itype, m2, t));
+ t2 = fold_binary (loop->cond_code, boolean_type_node,
+ n1cur, n2cur);
+ tree t3 = fold_binary (MULT_EXPR, itype, m1, ostep);
+ tree t4 = fold_binary (MULT_EXPR, itype, m2, ostep);
+ tree diff;
+ if (adj_first)
+ {
+ tree new_first;
+ if (integer_nonzerop (t2))
+ {
+ new_first = t;
+ n1first = n1cur;
+ n2first = n2cur;
+ if (flag_checking)
+ {
+ t3 = fold_binary (MINUS_EXPR, itype, n1cur, t3);
+ t4 = fold_binary (MINUS_EXPR, itype, n2cur, t4);
+ t3 = fold_binary (loop->cond_code,
+ boolean_type_node, t3, t4);
+ gcc_assert (integer_zerop (t3));
+ }
+ }
+ else
+ {
+ t3 = fold_binary (PLUS_EXPR, itype, n1cur, t3);
+ t4 = fold_binary (PLUS_EXPR, itype, n2cur, t4);
+ new_first = fold_binary (PLUS_EXPR, itype, t, ostep);
+ n1first = t3;
+ n2first = t4;
+ if (flag_checking)
+ {
+ t3 = fold_binary (loop->cond_code,
+ boolean_type_node, t3, t4);
+ gcc_assert (integer_nonzerop (t3));
+ }
+ }
+ diff = fold_binary (MINUS_EXPR, itype, new_first, first);
+ first = new_first;
+ fd->adjn1 = first;
+ }
+ else
+ {
+ tree new_last;
+ if (integer_zerop (t2))
+ {
+ t3 = fold_binary (MINUS_EXPR, itype, n1cur, t3);
+ t4 = fold_binary (MINUS_EXPR, itype, n2cur, t4);
+ new_last = fold_binary (MINUS_EXPR, itype, t, ostep);
+ n1last = t3;
+ n2last = t4;
+ if (flag_checking)
+ {
+ t3 = fold_binary (loop->cond_code,
+ boolean_type_node, t3, t4);
+ gcc_assert (integer_nonzerop (t3));
+ }
+ }
+ else
+ {
+ new_last = t;
+ n1last = n1cur;
+ n2last = n2cur;
+ if (flag_checking)
+ {
+ t3 = fold_binary (PLUS_EXPR, itype, n1cur, t3);
+ t4 = fold_binary (PLUS_EXPR, itype, n2cur, t4);
+ t3 = fold_binary (loop->cond_code,
+ boolean_type_node, t3, t4);
+ gcc_assert (integer_zerop (t3));
+ }
+ }
+ diff = fold_binary (MINUS_EXPR, itype, last, new_last);
+ }
+ if (TYPE_UNSIGNED (itype)
+ && single_nonrect_cond_code == GT_EXPR)
+ diff = fold_binary (TRUNC_DIV_EXPR, itype,
+ fold_unary (NEGATE_EXPR, itype, diff),
+ fold_unary (NEGATE_EXPR, itype,
+ ostep));
+ else
+ diff = fold_binary (TRUNC_DIV_EXPR, itype, diff, ostep);
+ diff = fold_convert (long_long_unsigned_type_node, diff);
+ single_nonrect_count
+ = fold_binary (MINUS_EXPR, long_long_unsigned_type_node,
+ single_nonrect_count, diff);
+ t = NULL_TREE;
+ }
}
else
t = fold_binary (loop->cond_code, boolean_type_node,
*collapse_count = fold_convert_loc (loc, iter_type, count);
if (fd->min_inner_iterations && fd->factor)
{
- t = make_tree_vec (3);
+ t = make_tree_vec (4);
TREE_VEC_ELT (t, 0) = *collapse_count;
TREE_VEC_ELT (t, 1) = fd->min_inner_iterations;
TREE_VEC_ELT (t, 2) = fd->factor;
+ TREE_VEC_ELT (t, 3) = fd->adjn1;
*collapse_count = t;
}
}
gcc_assert (fd->non_rect);
fd->min_inner_iterations = TREE_VEC_ELT (fd->loop.n2, 1);
fd->factor = TREE_VEC_ELT (fd->loop.n2, 2);
+ fd->adjn1 = TREE_VEC_ELT (fd->loop.n2, 3);
fd->loop.n2 = TREE_VEC_ELT (fd->loop.n2, 0);
}
fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1);
outer iterator, depending on which
results in fewer iterations. */
tree factor; /* (m2 - m1) * outer_step / inner_step. */
+ /* Adjusted n1 of the outer loop in such loop nests (if needed). */
+ tree adjn1;
};
#define OACC_FN_ATTRIB "oacc function"
--- /dev/null
+/* { dg-do run } */
+
+extern void abort (void);
+
+int x, i, j;
+volatile int a, b, c, d, e, f, g, h;
+int k[13][27];
+
+int
+main ()
+{
+ int niters;
+ for (i = -4; i < 8; i++)
+ for (j = 3 * i; j > 2 * i; j--)
+ k[i + 5][j + 5] = 1;
+ a = -4; b = 8; c = 1; d = 3; e = 0; f = 2; g = 0; h = -1;
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+ for (i = -4; i < 8; i++)
+ for (j = 3 * i; j > 2 * i; j--)
+ {
+ if (i < -4 || i >= 8 || j > 3 * i || j <= i * 2 || k[i + 5][j + 5] != 1)
+ abort ();
+ k[i + 5][j + 5]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != 8 || j != 14 || x != 7183 || niters != 28)
+ abort ();
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+ for (i = a; i < b; i += c)
+ for (j = d * i + e; j > g + i * f; j += h)
+ {
+ if (i < -4 || i >= 8 || j > 3 * i || j <= i * 2 || k[i + 5][j + 5] != 2)
+ abort ();
+ k[i + 5][j + 5]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != 8 || j != 14 || x != 7183 || niters != 28)
+ abort ();
+ for (int i = -4; i < 8; i++)
+ for (int j = 3 * i; j > 2 * i; j--)
+ if (k[i + 5][j + 5] == 3)
+ k[i + 5][j + 5] = 0;
+ else
+ abort ();
+ for (i = -2; i < 4; i++)
+ for (j = -2 * i + 3; j > -3; j -= 2)
+ k[i + 5][j + 5] = 1;
+ a = -2; b = 4; c = 1; d = -2; e = 3; f = 0; g = -3; h = -2;
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+ for (i = -2; i < 4; i++)
+ for (j = -2 * i + 3; j > -3; j -= 2)
+ {
+ if (i < -2 || i >= 4 || j <= -3 || j > -2 * i + 3 || k[i + 5][j + 5] != 1)
+ abort ();
+ k[i + 5][j + 5]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (/* i != 4 || j != -3 || */x != 3071 || niters != 15)
+ abort ();
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+ for (i = a; i < b; i += c)
+ for (j = d * i + e; j > g + i * f; j += h)
+ {
+ if (i < -2 || i >= 4 || j <= -3 || j > -2 * i + 3 || k[i + 5][j + 5] != 2)
+ abort ();
+ k[i + 5][j + 5]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (/*i != 4 || j != -3 || */x != 3071 || niters != 15)
+ abort ();
+ for (i = -2; i < 4; i++)
+ for (j = -2 * i + 3; j > -3; j -= 2)
+ if (k[i + 5][j + 5] == 3)
+ k[i + 5][j + 5] = 0;
+ else
+ abort ();
+ for (i = 3; i > -3; i--)
+ for (j = -2 * i + 7; j > 2 * i + 1; j--)
+ k[i + 5][j + 5] = 1;
+ a = 3; b = -3; c = -1; d = -2; e = 7; f = 2; g = 1; h = -1;
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+ for (i = 3; i > -3; i--)
+ for (j = -2 * i + 7; j > 2 * i + 1; j--)
+ {
+ if (i <= -3 || i > 3 || j <= 2 * i + 1 || j > -2 * i + 7 || k[i + 5][j + 5] != 1)
+ abort ();
+ k[i + 5][j + 5]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != -3 || j != -3 || x != -1026 || niters != 32)
+ abort ();
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+ for (i = a; i > b; i += c)
+ for (j = d * i + e; j > g + i * f; j += h)
+ {
+ if (i <= -3 || i > 3 || j <= 2 * i + 1 || j > -2 * i + 7 || k[i + 5][j + 5] != 2)
+ abort ();
+ k[i + 5][j + 5]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != -3 || j != -3 || x != -1026 || niters != 32)
+ abort ();
+ for (i = 3; i > -3; i--)
+ for (j = -2 * i + 7; j > 2 * i + 1; j--)
+ if (k[i + 5][j + 5] == 3)
+ k[i + 5][j + 5] = 0;
+ else
+ abort ();
+ for (i = 3; i > -3; i--)
+ for (j = 2 * i + 7; j > -2 * i + 1; j--)
+ k[i + 5][j + 5] = 1;
+ a = 3; b = -3; c = -1; d = 2; e = 7; f = -2; g = 1; h = -1;
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+ for (i = 3; i > -3; i--)
+ for (j = 2 * i + 7; j > -2 * i + 1; j--)
+ {
+ if (i <= -3 || i > 3 || j <= -2 * i + 1 || j > 2 * i + 7 || k[i + 5][j + 5] != 1)
+ abort ();
+ k[i + 5][j + 5]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (/*i != -3 || j != 3 || */x != -1020 || niters != 50)
+ abort ();
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+ for (i = a; i > b; i += c)
+ for (j = d * i + e; j > g + i * f; j += h)
+ {
+ if (i <= -3 || i > 3 || j <= -2 * i + 1 || j > 2 * i + 7 || k[i + 5][j + 5] != 2)
+ abort ();
+ k[i + 5][j + 5]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (/*i != -3 || j != 3 || */x != -1020 || niters != 50)
+ abort ();
+ for (i = 3; i > -3; i--)
+ for (j = 2 * i + 7; j > -2 * i + 1; j--)
+ if (k[i + 5][j + 5] == 3)
+ k[i + 5][j + 5] = 0;
+ else
+ abort ();
+ for (i = 6; i > -6; i--)
+ for (j = 2 * i + 7; j <= -2 * i + 1; j++)
+ k[i + 5][j + 5] = 1;
+ a = 6; b = -6; c = -1; d = 2; e = 7; f = -2; g = 2; h = 1;
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+ for (i = 6; i > -6; i--)
+ for (j = 2 * i + 7; j <= -2 * i + 1; j++)
+ {
+ if (i <= -6 || i > 6 || j < 2 * i + 7 || j >= -2 * i + 2 || k[i + 5][j + 5] != 1)
+ abort ();
+ k[i + 5][j + 5]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != -6 || j != 12 || x != -5109 || niters != 36)
+ abort ();
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+ for (i = a; i > b; i += c)
+ for (j = d * i + e; j < g + i * f; j += h)
+ {
+ if (i <= -6 || i > 6 || j < 2 * i + 7 || j >= -2 * i + 2 || k[i + 5][j + 5] != 2)
+ abort ();
+ k[i + 5][j + 5]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (i != -6 || j != 12 || x != -5109 || niters != 36)
+ abort ();
+ for (i = 6; i > -6; i--)
+ for (j = 2 * i + 7; j <= -2 * i + 1; j++)
+ if (k[i + 5][j + 5] == 3)
+ k[i + 5][j + 5] = 0;
+ else
+ abort ();
+ for (i = 6; i > -6; i -= 2)
+ for (j = -2 * i + 7; j <= 2 * i + 1; j++)
+ k[i + 5][j + 5] = 1;
+ a = 6; b = -6; c = -2; d = -2; e = 7; f = 2; g = 2; h = 1;
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+ for (i = 6; i > -6; i -= 2)
+ for (j = -2 * i + 7; j <= 2 * i + 1; j++)
+ {
+ if (i <= -6 || i > 6 || j < -2 * i + 7 || j >= 2 * i + 2 || k[i + 5][j + 5] != 1)
+ abort ();
+ k[i + 5][j + 5]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (/*i != -6 || j != 15 || */x != 2053 || niters != 33)
+ abort ();
+ niters = 0; i = -100; j = -100; x = -100;
+ #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+ for (i = a; i > b; i += c)
+ for (j = d * i + e; j < g + i * f; j += h)
+ {
+ if (i <= -6 || i > 6 || j < -2 * i + 7 || j >= 2 * i + 2 || k[i + 5][j + 5] != 2)
+ abort ();
+ k[i + 5][j + 5]++;
+ x = i * 1024 + (j & 1023);
+ niters++;
+ }
+ if (/*i != -6 || j != 15 || */x != 2053 || niters != 33)
+ abort ();
+ for (i = 6; i > -6; i -= 2)
+ for (j = -2 * i + 7; j <= 2 * i + 1; j++)
+ if (k[i + 5][j + 5] == 3)
+ k[i + 5][j + 5] = 0;
+ else
+ abort ();
+ return 0;
+}