+2004-10-27 Zdenek Dvorak <dvorakz@suse.cz>
+
+ PR tree-optimization/18048
+ * fold-const.c (try_move_mult_to_index): New function.
+ (fold): Use try_move_mult_to_index.
+ * tree-ssa-loop-ivopts.c (try_add_cand_for): Prefer common candidates.
+ * tree-ssa-loop-niter.c (number_of_iterations_cond): Produce
+ an all-ones unsigned constant without extra bits.
+ * tree.c (build_low_bits_mask): New function.
+ * tree.h (build_low_bits_mask): Declare.
+
2004-10-27 David Edelsohn <edelsohn@gnu.org>
PR target/17956
return 0;
}
+/* Tries to replace &a[idx] CODE s * delta with &a[idx CODE delta], if s is
+ step of the array. TYPE is the type of the expression. ADDR is the address.
+ MULT is the multiplicative expression. If the function succeeds, the new
+ address expression is returned. Otherwise NULL_TREE is returned. */
+
+static tree
+try_move_mult_to_index (tree type, enum tree_code code, tree addr, tree mult)
+{
+ tree s, delta, step;
+ tree arg0 = TREE_OPERAND (mult, 0), arg1 = TREE_OPERAND (mult, 1);
+ tree ref = TREE_OPERAND (addr, 0), pref;
+ tree ret, pos;
+ tree itype;
+
+ STRIP_NOPS (arg0);
+ STRIP_NOPS (arg1);
+
+ if (TREE_CODE (arg0) == INTEGER_CST)
+ {
+ s = arg0;
+ delta = arg1;
+ }
+ else if (TREE_CODE (arg1) == INTEGER_CST)
+ {
+ s = arg1;
+ delta = arg0;
+ }
+ else
+ return NULL_TREE;
+
+ for (;; ref = TREE_OPERAND (ref, 0))
+ {
+ if (TREE_CODE (ref) == ARRAY_REF)
+ {
+ step = array_ref_element_size (ref);
+
+ if (TREE_CODE (step) != INTEGER_CST)
+ continue;
+
+ itype = TREE_TYPE (step);
+
+ /* If the type sizes do not match, we might run into problems
+ when one of them would overflow. */
+ if (TYPE_PRECISION (itype) != TYPE_PRECISION (type))
+ continue;
+
+ if (!operand_equal_p (step, fold_convert (itype, s), 0))
+ continue;
+
+ delta = fold_convert (itype, delta);
+ break;
+ }
+
+ if (!handled_component_p (ref))
+ return NULL_TREE;
+ }
+
+ /* We found the suitable array reference. So copy everything up to it,
+ and replace the index. */
+
+ pref = TREE_OPERAND (addr, 0);
+ ret = copy_node (pref);
+ pos = ret;
+
+ while (pref != ref)
+ {
+ pref = TREE_OPERAND (pref, 0);
+ TREE_OPERAND (pos, 0) = copy_node (pref);
+ pos = TREE_OPERAND (pos, 0);
+ }
+
+ TREE_OPERAND (pos, 1) = fold (build2 (code, itype,
+ TREE_OPERAND (pos, 1),
+ delta));
+
+ return build1 (ADDR_EXPR, type, ret);
+}
+
/* Perform constant folding and related simplification of EXPR.
The related simplifications include x*1 => x, x*0 => 0, etc.,
and application of the associative law.
alt0, alt1)),
same));
}
+
+ /* Try replacing &a[i1] + c * i2 with &a[i1 + i2], if c is step
+ of the array. Loop optimizer sometimes produce this type of
+ expressions. */
+ if (TREE_CODE (arg0) == ADDR_EXPR
+ && TREE_CODE (arg1) == MULT_EXPR)
+ {
+ tem = try_move_mult_to_index (type, PLUS_EXPR, arg0, arg1);
+ if (tem)
+ return fold (tem);
+ }
+ else if (TREE_CODE (arg1) == ADDR_EXPR
+ && TREE_CODE (arg0) == MULT_EXPR)
+ {
+ tem = try_move_mult_to_index (type, PLUS_EXPR, arg1, arg0);
+ if (tem)
+ return fold (tem);
+ }
}
else
{
&diff))
return build_int_cst_type (type, diff);
}
+
+ /* Try replacing &a[i1] - c * i2 with &a[i1 - i2], if c is step
+ of the array. Loop optimizer sometimes produce this type of
+ expressions. */
+ if (TREE_CODE (arg0) == ADDR_EXPR
+ && TREE_CODE (arg1) == MULT_EXPR)
+ {
+ tem = try_move_mult_to_index (type, MINUS_EXPR, arg0, arg1);
+ if (tem)
+ return fold (tem);
+ }
if (TREE_CODE (arg0) == MULT_EXPR
&& TREE_CODE (arg1) == MULT_EXPR
bitmap act_inv = BITMAP_XMALLOC ();
unsigned i;
struct cost_pair *cp;
+ bitmap_iterator bi;
+ struct iv_cand *cand;
+ bitmap depends_on;
bitmap_copy (best_ivs, ivs);
bitmap_copy (best_inv, inv);
- for (i = 0; i < use->n_map_members; i++)
+ /* First try important candidates. Only if it fails, try the specific ones.
+ Rationale -- in loops with many variables the best choice often is to use
+ just one generic biv. If we added here many ivs specific to the uses,
+ the optimization algorithm later would be likely to get stuck in a local
+ minimum, thus causing us to create too many ivs. The approach from
+ few ivs to more seems more likely to be succesful -- starting from few
+ ivs, replacing an expensive use by a specific iv should always be a
+ win. */
+ EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
{
- cp = use->cost_map + i;
- if (cp->cost == INFTY)
+ cand = iv_cand (data, i);
+
+ if (get_use_iv_cost (data, use, cand, &depends_on) == INFTY)
continue;
bitmap_copy (act_ivs, ivs);
- bitmap_set_bit (act_ivs, cp->cand->id);
- if (cp->depends_on)
- bitmap_a_or_b (act_inv, inv, cp->depends_on);
+ bitmap_set_bit (act_ivs, cand->id);
+ if (depends_on)
+ bitmap_a_or_b (act_inv, inv, depends_on);
else
bitmap_copy (act_inv, inv);
act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
}
}
+ if (best_cost == INFTY)
+ {
+ for (i = 0; i < use->n_map_members; i++)
+ {
+ cp = use->cost_map + i;
+ if (cp->cost == INFTY)
+ continue;
+
+ /* Already tried this. */
+ if (cp->cand->important)
+ continue;
+
+ bitmap_copy (act_ivs, ivs);
+ bitmap_set_bit (act_ivs, cp->cand->id);
+ if (cp->depends_on)
+ bitmap_a_or_b (act_inv, inv, cp->depends_on);
+ else
+ bitmap_copy (act_inv, inv);
+ act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
+
+ if (act_cost < best_cost)
+ {
+ best_cost = act_cost;
+ bitmap_copy (best_ivs, act_ivs);
+ bitmap_copy (best_inv, act_inv);
+ }
+ }
+ }
+
bitmap_copy (ivs, best_ivs);
bitmap_copy (inv, best_inv);
d = EXEC_BINARY (LSHIFT_EXPR, niter_type,
build_int_cst_type (niter_type, 1), bits);
s = EXEC_BINARY (RSHIFT_EXPR, niter_type, step0, bits);
- bound = EXEC_BINARY (RSHIFT_EXPR, niter_type,
- build_int_cst (niter_type, -1),
- bits);
+
+ bound = build_low_bits_mask (niter_type,
+ (TYPE_PRECISION (niter_type)
+ - tree_low_cst (bits, 1)));
assumption = fold (build2 (FLOOR_MOD_EXPR, niter_type, base1, d));
assumption = fold (build2 (EQ_EXPR, boolean_type_node,
return t;
}
+/* Builds an integer constant in TYPE such that lowest BITS bits are ones
+ and the rest are zeros. */
+
+tree
+build_low_bits_mask (tree type, unsigned bits)
+{
+ unsigned HOST_WIDE_INT low;
+ HOST_WIDE_INT high;
+ unsigned HOST_WIDE_INT all_ones = ~(unsigned HOST_WIDE_INT) 0;
+
+ gcc_assert (bits <= TYPE_PRECISION (type));
+
+ if (bits == TYPE_PRECISION (type)
+ && !TYPE_UNSIGNED (type))
+ {
+ /* Sign extended all-ones mask. */
+ low = all_ones;
+ high = -1;
+ }
+ else if (bits <= HOST_BITS_PER_WIDE_INT)
+ {
+ low = all_ones >> (HOST_BITS_PER_WIDE_INT - bits);
+ high = 0;
+ }
+ else
+ {
+ bits -= HOST_BITS_PER_WIDE_INT;
+ low = all_ones;
+ high = all_ones >> (HOST_BITS_PER_WIDE_INT - bits);
+ }
+
+ return build_int_cst_wide (type, low, high);
+}
+
/* Checks that X is integer constant that can be expressed in (unsigned)
HOST_WIDE_INT without loss of precision. */
extern tree build_fold_addr_expr_with_type (tree, tree);
extern tree build_fold_indirect_ref (tree);
extern tree constant_boolean_node (int, tree);
+extern tree build_low_bits_mask (tree, unsigned);
extern bool tree_swap_operands_p (tree, tree, bool);
extern enum tree_code swap_tree_comparison (enum tree_code);