/* SCC value numbering for trees
- Copyright (C) 2006-2018 Free Software Foundation, Inc.
+ Copyright (C) 2006-2020 Free Software Foundation, Inc.
Contributed by Daniel Berlin <dan@dberlin.org>
This file is part of GCC.
#include "config.h"
#include "system.h"
#include "coretypes.h"
+#include "splay-tree.h"
#include "backend.h"
#include "rtl.h"
#include "tree.h"
#include "tree-ssa.h"
#include "dumpfile.h"
#include "cfgloop.h"
-#include "params.h"
#include "tree-ssa-propagate.h"
#include "tree-cfg.h"
#include "domwalk.h"
#include "tree-cfgcleanup.h"
#include "tree-ssa-loop.h"
#include "tree-scalar-evolution.h"
+#include "tree-ssa-loop-niter.h"
+#include "builtins.h"
#include "tree-ssa-sccvn.h"
/* This algorithm is based on the SCC algorithm presented by Keith
/* There's no BB_EXECUTABLE but we can use BB_VISITED. */
#define BB_EXECUTABLE BB_VISITED
-static tree *last_vuse_ptr;
-static vn_lookup_kind vn_walk_kind;
static vn_lookup_kind default_vn_walk_kind;
-bitmap const_parms;
/* vn_nary_op hashtable helpers. */
/* Valueization hook. Valueize NAME if it is an SSA name, otherwise
just return it. */
tree (*vn_valueize) (tree);
+tree vn_valueize_wrapper (tree t, void* context ATTRIBUTE_UNUSED)
+{
+ return vn_valueize (t);
+}
/* This represents the top of the VN lattice, which is the universal
static inline hashval_t hash (const value_type &);
static inline bool equal (const value_type &, const compare_type &);
static inline void mark_deleted (value_type &) {}
+ static const bool empty_zero_p = true;
static inline void mark_empty (value_type &e) { e = NULL; }
static inline bool is_deleted (value_type &) { return false; }
static inline bool is_empty (value_type &e) { return e == NULL; }
static void init_vn_nary_op_from_pieces (vn_nary_op_t, unsigned int,
enum tree_code, tree, tree *);
static tree vn_lookup_simplify_result (gimple_match_op *);
+static vn_reference_t vn_reference_lookup_or_insert_for_pieces
+ (tree, alias_set_type, tree, vec<vn_reference_op_s, va_heap>, tree);
/* Return whether there is value numbering information for a given SSA name. */
/* Return the SSA value of X. */
inline tree
-SSA_VAL (tree x)
+SSA_VAL (tree x, bool *visited = NULL)
{
vn_ssa_aux_t tem = vn_ssa_aux_hash->find_with_hash (x, SSA_NAME_VERSION (x));
+ if (visited)
+ *visited = tem && tem->visited;
return tem && tem->visited ? tem->valnum : x;
}
do
{
- tree tem = SSA_VAL (x);
- /* stmt walking can walk over a backedge and reach code we didn't
- value-number yet. */
- if (tem == VN_TOP)
- return x;
- x = tem;
+ x = SSA_VAL (x);
+ gcc_assert (x != VN_TOP);
}
while (SSA_NAME_IN_FREE_LIST (x));
return x;
}
+/* Similar to the above but used as callback for walk_non_aliases_vuses
+ and thus should stop at unvisited VUSE to not walk across region
+ boundaries. */
+
+static tree
+vuse_valueize (tree vuse)
+{
+ do
+ {
+ bool visited;
+ vuse = SSA_VAL (vuse, &visited);
+ if (!visited)
+ return NULL_TREE;
+ gcc_assert (vuse != VN_TOP);
+ }
+ while (SSA_NAME_IN_FREE_LIST (vuse));
+ return vuse;
+}
+
/* Return the vn_kind the expression computed by the stmt should be
associated with. */
static void
copy_reference_ops_from_ref (tree ref, vec<vn_reference_op_s> *result)
{
- if (TREE_CODE (ref) == TARGET_MEM_REF)
- {
- vn_reference_op_s temp;
-
- result->reserve (3);
-
- memset (&temp, 0, sizeof (temp));
- temp.type = TREE_TYPE (ref);
- temp.opcode = TREE_CODE (ref);
- temp.op0 = TMR_INDEX (ref);
- temp.op1 = TMR_STEP (ref);
- temp.op2 = TMR_OFFSET (ref);
- temp.off = -1;
- temp.clique = MR_DEPENDENCE_CLIQUE (ref);
- temp.base = MR_DEPENDENCE_BASE (ref);
- result->quick_push (temp);
-
- memset (&temp, 0, sizeof (temp));
- temp.type = NULL_TREE;
- temp.opcode = ERROR_MARK;
- temp.op0 = TMR_INDEX2 (ref);
- temp.off = -1;
- result->quick_push (temp);
-
- memset (&temp, 0, sizeof (temp));
- temp.type = NULL_TREE;
- temp.opcode = TREE_CODE (TMR_BASE (ref));
- temp.op0 = TMR_BASE (ref);
- temp.off = -1;
- result->quick_push (temp);
- return;
- }
-
/* For non-calls, store the information that makes up the address. */
tree orig = ref;
while (ref)
temp.base = MR_DEPENDENCE_BASE (ref);
temp.reverse = REF_REVERSE_STORAGE_ORDER (ref);
break;
+ case TARGET_MEM_REF:
+ /* The base address gets its own vn_reference_op_s structure. */
+ temp.op0 = TMR_INDEX (ref);
+ temp.op1 = TMR_STEP (ref);
+ temp.op2 = TMR_OFFSET (ref);
+ temp.clique = MR_DEPENDENCE_CLIQUE (ref);
+ temp.base = MR_DEPENDENCE_BASE (ref);
+ result->safe_push (temp);
+ memset (&temp, 0, sizeof (temp));
+ temp.type = NULL_TREE;
+ temp.opcode = ERROR_MARK;
+ temp.op0 = TMR_INDEX2 (ref);
+ temp.off = -1;
+ break;
case BIT_FIELD_REF:
/* Record bits, position and storage order. */
temp.op0 = TREE_OPERAND (ref, 1);
break;
case STRING_CST:
case INTEGER_CST:
+ case POLY_INT_CST:
case COMPLEX_CST:
case VECTOR_CST:
case REAL_CST:
/* Copy the type, opcode, function, static chain and EH region, if any. */
memset (&temp, 0, sizeof (temp));
- temp.type = gimple_call_return_type (call);
+ temp.type = gimple_call_fntype (call);
temp.opcode = CALL_EXPR;
temp.op0 = gimple_call_fn (call);
temp.op1 = gimple_call_chain (call);
- if (stmt_could_throw_p (call) && (lr = lookup_stmt_eh_lp (call)) > 0)
+ if (stmt_could_throw_p (cfun, call) && (lr = lookup_stmt_eh_lp (call)) > 0)
temp.op2 = size_int (lr);
temp.off = -1;
result->safe_push (temp);
vn_reference_maybe_forwprop_address (vec<vn_reference_op_s> *ops,
unsigned int *i_p)
{
- unsigned int i = *i_p;
- vn_reference_op_t op = &(*ops)[i];
- vn_reference_op_t mem_op = &(*ops)[i - 1];
- gimple *def_stmt;
- enum tree_code code;
- poly_offset_int off;
-
- def_stmt = SSA_NAME_DEF_STMT (op->op0);
- if (!is_gimple_assign (def_stmt))
- return false;
-
- code = gimple_assign_rhs_code (def_stmt);
- if (code != ADDR_EXPR
- && code != POINTER_PLUS_EXPR)
- return false;
-
- off = poly_offset_int::from (wi::to_poly_wide (mem_op->op0), SIGNED);
+ bool changed = false;
+ vn_reference_op_t op;
- /* The only thing we have to do is from &OBJ.foo.bar add the offset
- from .foo.bar to the preceding MEM_REF offset and replace the
- address with &OBJ. */
- if (code == ADDR_EXPR)
- {
- tree addr, addr_base;
- poly_int64 addr_offset;
-
- addr = gimple_assign_rhs1 (def_stmt);
- addr_base = get_addr_base_and_unit_offset (TREE_OPERAND (addr, 0),
- &addr_offset);
- /* If that didn't work because the address isn't invariant propagate
- the reference tree from the address operation in case the current
- dereference isn't offsetted. */
- if (!addr_base
- && *i_p == ops->length () - 1
- && known_eq (off, 0)
- /* This makes us disable this transform for PRE where the
- reference ops might be also used for code insertion which
- is invalid. */
- && default_vn_walk_kind == VN_WALKREWRITE)
+ do
+ {
+ unsigned int i = *i_p;
+ op = &(*ops)[i];
+ vn_reference_op_t mem_op = &(*ops)[i - 1];
+ gimple *def_stmt;
+ enum tree_code code;
+ poly_offset_int off;
+
+ def_stmt = SSA_NAME_DEF_STMT (op->op0);
+ if (!is_gimple_assign (def_stmt))
+ return changed;
+
+ code = gimple_assign_rhs_code (def_stmt);
+ if (code != ADDR_EXPR
+ && code != POINTER_PLUS_EXPR)
+ return changed;
+
+ off = poly_offset_int::from (wi::to_poly_wide (mem_op->op0), SIGNED);
+
+ /* The only thing we have to do is from &OBJ.foo.bar add the offset
+ from .foo.bar to the preceding MEM_REF offset and replace the
+ address with &OBJ. */
+ if (code == ADDR_EXPR)
{
- auto_vec<vn_reference_op_s, 32> tem;
- copy_reference_ops_from_ref (TREE_OPERAND (addr, 0), &tem);
- /* Make sure to preserve TBAA info. The only objects not
- wrapped in MEM_REFs that can have their address taken are
- STRING_CSTs. */
- if (tem.length () >= 2
- && tem[tem.length () - 2].opcode == MEM_REF)
+ tree addr, addr_base;
+ poly_int64 addr_offset;
+
+ addr = gimple_assign_rhs1 (def_stmt);
+ addr_base = get_addr_base_and_unit_offset (TREE_OPERAND (addr, 0),
+ &addr_offset);
+ /* If that didn't work because the address isn't invariant propagate
+ the reference tree from the address operation in case the current
+ dereference isn't offsetted. */
+ if (!addr_base
+ && *i_p == ops->length () - 1
+ && known_eq (off, 0)
+ /* This makes us disable this transform for PRE where the
+ reference ops might be also used for code insertion which
+ is invalid. */
+ && default_vn_walk_kind == VN_WALKREWRITE)
{
- vn_reference_op_t new_mem_op = &tem[tem.length () - 2];
- new_mem_op->op0
- = wide_int_to_tree (TREE_TYPE (mem_op->op0),
- wi::to_poly_wide (new_mem_op->op0));
+ auto_vec<vn_reference_op_s, 32> tem;
+ copy_reference_ops_from_ref (TREE_OPERAND (addr, 0), &tem);
+ /* Make sure to preserve TBAA info. The only objects not
+ wrapped in MEM_REFs that can have their address taken are
+ STRING_CSTs. */
+ if (tem.length () >= 2
+ && tem[tem.length () - 2].opcode == MEM_REF)
+ {
+ vn_reference_op_t new_mem_op = &tem[tem.length () - 2];
+ new_mem_op->op0
+ = wide_int_to_tree (TREE_TYPE (mem_op->op0),
+ wi::to_poly_wide (new_mem_op->op0));
+ }
+ else
+ gcc_assert (tem.last ().opcode == STRING_CST);
+ ops->pop ();
+ ops->pop ();
+ ops->safe_splice (tem);
+ --*i_p;
+ return true;
}
- else
- gcc_assert (tem.last ().opcode == STRING_CST);
- ops->pop ();
- ops->pop ();
- ops->safe_splice (tem);
- --*i_p;
- return true;
+ if (!addr_base
+ || TREE_CODE (addr_base) != MEM_REF
+ || (TREE_CODE (TREE_OPERAND (addr_base, 0)) == SSA_NAME
+ && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (TREE_OPERAND (addr_base,
+ 0))))
+ return changed;
+
+ off += addr_offset;
+ off += mem_ref_offset (addr_base);
+ op->op0 = TREE_OPERAND (addr_base, 0);
+ }
+ else
+ {
+ tree ptr, ptroff;
+ ptr = gimple_assign_rhs1 (def_stmt);
+ ptroff = gimple_assign_rhs2 (def_stmt);
+ if (TREE_CODE (ptr) != SSA_NAME
+ || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr)
+ /* Make sure to not endlessly recurse.
+ See gcc.dg/tree-ssa/20040408-1.c for an example. Can easily
+ happen when we value-number a PHI to its backedge value. */
+ || SSA_VAL (ptr) == op->op0
+ || !poly_int_tree_p (ptroff))
+ return changed;
+
+ off += wi::to_poly_offset (ptroff);
+ op->op0 = ptr;
}
- if (!addr_base
- || TREE_CODE (addr_base) != MEM_REF
- || (TREE_CODE (TREE_OPERAND (addr_base, 0)) == SSA_NAME
- && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (TREE_OPERAND (addr_base, 0))))
- return false;
-
- off += addr_offset;
- off += mem_ref_offset (addr_base);
- op->op0 = TREE_OPERAND (addr_base, 0);
- }
- else
- {
- tree ptr, ptroff;
- ptr = gimple_assign_rhs1 (def_stmt);
- ptroff = gimple_assign_rhs2 (def_stmt);
- if (TREE_CODE (ptr) != SSA_NAME
- || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr)
- /* Make sure to not endlessly recurse.
- See gcc.dg/tree-ssa/20040408-1.c for an example. Can easily
- happen when we value-number a PHI to its backedge value. */
- || SSA_VAL (ptr) == op->op0
- || !poly_int_tree_p (ptroff))
- return false;
- off += wi::to_poly_offset (ptroff);
- op->op0 = ptr;
+ mem_op->op0 = wide_int_to_tree (TREE_TYPE (mem_op->op0), off);
+ if (tree_fits_shwi_p (mem_op->op0))
+ mem_op->off = tree_to_shwi (mem_op->op0);
+ else
+ mem_op->off = -1;
+ /* ??? Can end up with endless recursion here!?
+ gcc.c-torture/execute/strcmp-1.c */
+ if (TREE_CODE (op->op0) == SSA_NAME)
+ op->op0 = SSA_VAL (op->op0);
+ if (TREE_CODE (op->op0) != SSA_NAME)
+ op->opcode = TREE_CODE (op->op0);
+
+ changed = true;
}
+ /* Tail-recurse. */
+ while (TREE_CODE (op->op0) == SSA_NAME);
- mem_op->op0 = wide_int_to_tree (TREE_TYPE (mem_op->op0), off);
- if (tree_fits_shwi_p (mem_op->op0))
- mem_op->off = tree_to_shwi (mem_op->op0);
- else
- mem_op->off = -1;
- /* ??? Can end up with endless recursion here!?
- gcc.c-torture/execute/strcmp-1.c */
- if (TREE_CODE (op->op0) == SSA_NAME)
- op->op0 = SSA_VAL (op->op0);
- if (TREE_CODE (op->op0) != SSA_NAME)
- op->opcode = TREE_CODE (op->op0);
-
- /* And recurse. */
- if (TREE_CODE (op->op0) == SSA_NAME)
- vn_reference_maybe_forwprop_address (ops, i_p);
- else if (TREE_CODE (op->op0) == ADDR_EXPR)
+ /* Fold a remaining *&. */
+ if (TREE_CODE (op->op0) == ADDR_EXPR)
vn_reference_fold_indirect (ops, i_p);
- return true;
+
+ return changed;
}
/* Optimize the reference REF to a constant if possible or return
/* Simplify reads from constants or constant initializers. */
else if (BITS_PER_UNIT == 8
- && is_gimple_reg_type (ref->type)
- && (!INTEGRAL_TYPE_P (ref->type)
- || TYPE_PRECISION (ref->type) % BITS_PER_UNIT == 0))
+ && COMPLETE_TYPE_P (ref->type)
+ && is_gimple_reg_type (ref->type))
{
poly_int64 off = 0;
HOST_WIDE_INT size;
if (INTEGRAL_TYPE_P (ref->type))
size = TYPE_PRECISION (ref->type);
- else
+ else if (tree_fits_shwi_p (TYPE_SIZE (ref->type)))
size = tree_to_shwi (TYPE_SIZE (ref->type));
+ else
+ return NULL_TREE;
if (size % BITS_PER_UNIT != 0
|| size > MAX_BITSIZE_MODE_ANY_MODE)
return NULL_TREE;
return NULL_TREE;
}
+
+/* Partial definition tracking support. */
+
+struct pd_range
+{
+ HOST_WIDE_INT offset;
+ HOST_WIDE_INT size;
+};
+
+struct pd_data
+{
+ tree rhs;
+ HOST_WIDE_INT offset;
+ HOST_WIDE_INT size;
+};
+
+/* Context for alias walking. */
+
+struct vn_walk_cb_data
+{
+ vn_walk_cb_data (vn_reference_t vr_, tree orig_ref_, tree *last_vuse_ptr_,
+ vn_lookup_kind vn_walk_kind_, bool tbaa_p_)
+ : vr (vr_), last_vuse_ptr (last_vuse_ptr_), last_vuse (NULL_TREE),
+ vn_walk_kind (vn_walk_kind_), tbaa_p (tbaa_p_),
+ saved_operands (vNULL), first_set (-2), known_ranges (NULL)
+ {
+ if (!last_vuse_ptr)
+ last_vuse_ptr = &last_vuse;
+ ao_ref_init (&orig_ref, orig_ref_);
+ }
+ ~vn_walk_cb_data ();
+ void *finish (alias_set_type, tree);
+ void *push_partial_def (const pd_data& pd, alias_set_type, HOST_WIDE_INT);
+
+ vn_reference_t vr;
+ ao_ref orig_ref;
+ tree *last_vuse_ptr;
+ tree last_vuse;
+ vn_lookup_kind vn_walk_kind;
+ bool tbaa_p;
+ vec<vn_reference_op_s> saved_operands;
+
+ /* The VDEFs of partial defs we come along. */
+ auto_vec<pd_data, 2> partial_defs;
+ /* The first defs range to avoid splay tree setup in most cases. */
+ pd_range first_range;
+ alias_set_type first_set;
+ splay_tree known_ranges;
+ obstack ranges_obstack;
+};
+
+vn_walk_cb_data::~vn_walk_cb_data ()
+{
+ if (known_ranges)
+ {
+ splay_tree_delete (known_ranges);
+ obstack_free (&ranges_obstack, NULL);
+ }
+ saved_operands.release ();
+}
+
+void *
+vn_walk_cb_data::finish (alias_set_type set, tree val)
+{
+ if (first_set != -2)
+ set = first_set;
+ return vn_reference_lookup_or_insert_for_pieces
+ (last_vuse, set, vr->type,
+ saved_operands.exists () ? saved_operands : vr->operands, val);
+}
+
+/* pd_range splay-tree helpers. */
+
+static int
+pd_range_compare (splay_tree_key offset1p, splay_tree_key offset2p)
+{
+ HOST_WIDE_INT offset1 = *(HOST_WIDE_INT *)offset1p;
+ HOST_WIDE_INT offset2 = *(HOST_WIDE_INT *)offset2p;
+ if (offset1 < offset2)
+ return -1;
+ else if (offset1 > offset2)
+ return 1;
+ return 0;
+}
+
+static void *
+pd_tree_alloc (int size, void *data_)
+{
+ vn_walk_cb_data *data = (vn_walk_cb_data *)data_;
+ return obstack_alloc (&data->ranges_obstack, size);
+}
+
+static void
+pd_tree_dealloc (void *, void *)
+{
+}
+
+/* Push PD to the vector of partial definitions returning a
+ value when we are ready to combine things with VUSE, SET and MAXSIZEI,
+ NULL when we want to continue looking for partial defs or -1
+ on failure. */
+
+void *
+vn_walk_cb_data::push_partial_def (const pd_data &pd,
+ alias_set_type set, HOST_WIDE_INT maxsizei)
+{
+ const HOST_WIDE_INT bufsize = 64;
+ /* We're using a fixed buffer for encoding so fail early if the object
+ we want to interpret is bigger. */
+ if (maxsizei > bufsize * BITS_PER_UNIT
+ || CHAR_BIT != 8
+ || BITS_PER_UNIT != 8
+ /* Not prepared to handle PDP endian. */
+ || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
+ return (void *)-1;
+
+ bool pd_constant_p = (TREE_CODE (pd.rhs) == CONSTRUCTOR
+ || CONSTANT_CLASS_P (pd.rhs));
+ if (partial_defs.is_empty ())
+ {
+ /* If we get a clobber upfront, fail. */
+ if (TREE_CLOBBER_P (pd.rhs))
+ return (void *)-1;
+ if (!pd_constant_p)
+ return (void *)-1;
+ partial_defs.safe_push (pd);
+ first_range.offset = pd.offset;
+ first_range.size = pd.size;
+ first_set = set;
+ last_vuse_ptr = NULL;
+ /* Continue looking for partial defs. */
+ return NULL;
+ }
+
+ if (!known_ranges)
+ {
+ /* ??? Optimize the case where the 2nd partial def completes things. */
+ gcc_obstack_init (&ranges_obstack);
+ known_ranges = splay_tree_new_with_allocator (pd_range_compare, 0, 0,
+ pd_tree_alloc,
+ pd_tree_dealloc, this);
+ splay_tree_insert (known_ranges,
+ (splay_tree_key)&first_range.offset,
+ (splay_tree_value)&first_range);
+ }
+
+ pd_range newr = { pd.offset, pd.size };
+ splay_tree_node n;
+ pd_range *r;
+ /* Lookup the predecessor of offset + 1 and see if we need to merge. */
+ HOST_WIDE_INT loffset = newr.offset + 1;
+ if ((n = splay_tree_predecessor (known_ranges, (splay_tree_key)&loffset))
+ && ((r = (pd_range *)n->value), true)
+ && ranges_known_overlap_p (r->offset, r->size + 1,
+ newr.offset, newr.size))
+ {
+ /* Ignore partial defs already covered. Here we also drop shadowed
+ clobbers arriving here at the floor. */
+ if (known_subrange_p (newr.offset, newr.size, r->offset, r->size))
+ return NULL;
+ r->size = MAX (r->offset + r->size, newr.offset + newr.size) - r->offset;
+ }
+ else
+ {
+ /* newr.offset wasn't covered yet, insert the range. */
+ r = XOBNEW (&ranges_obstack, pd_range);
+ *r = newr;
+ splay_tree_insert (known_ranges, (splay_tree_key)&r->offset,
+ (splay_tree_value)r);
+ }
+ /* Merge r which now contains newr and is a member of the splay tree with
+ adjacent overlapping ranges. */
+ pd_range *rafter;
+ while ((n = splay_tree_successor (known_ranges, (splay_tree_key)&r->offset))
+ && ((rafter = (pd_range *)n->value), true)
+ && ranges_known_overlap_p (r->offset, r->size + 1,
+ rafter->offset, rafter->size))
+ {
+ r->size = MAX (r->offset + r->size,
+ rafter->offset + rafter->size) - r->offset;
+ splay_tree_remove (known_ranges, (splay_tree_key)&rafter->offset);
+ }
+ /* If we get a clobber, fail. */
+ if (TREE_CLOBBER_P (pd.rhs))
+ return (void *)-1;
+ /* Non-constants are OK as long as they are shadowed by a constant. */
+ if (!pd_constant_p)
+ return (void *)-1;
+ partial_defs.safe_push (pd);
+
+ /* Now we have merged newr into the range tree. When we have covered
+ [offseti, sizei] then the tree will contain exactly one node which has
+ the desired properties and it will be 'r'. */
+ if (!known_subrange_p (0, maxsizei, r->offset, r->size))
+ /* Continue looking for partial defs. */
+ return NULL;
+
+ /* Now simply native encode all partial defs in reverse order. */
+ unsigned ndefs = partial_defs.length ();
+ /* We support up to 512-bit values (for V8DFmode). */
+ unsigned char buffer[bufsize + 1];
+ unsigned char this_buffer[bufsize + 1];
+ int len;
+
+ memset (buffer, 0, bufsize + 1);
+ unsigned needed_len = ROUND_UP (maxsizei, BITS_PER_UNIT) / BITS_PER_UNIT;
+ while (!partial_defs.is_empty ())
+ {
+ pd_data pd = partial_defs.pop ();
+ unsigned int amnt;
+ if (TREE_CODE (pd.rhs) == CONSTRUCTOR)
+ {
+ /* Empty CONSTRUCTOR. */
+ if (pd.size >= needed_len * BITS_PER_UNIT)
+ len = needed_len;
+ else
+ len = ROUND_UP (pd.size, BITS_PER_UNIT) / BITS_PER_UNIT;
+ memset (this_buffer, 0, len);
+ }
+ else
+ {
+ len = native_encode_expr (pd.rhs, this_buffer, bufsize,
+ MAX (0, -pd.offset) / BITS_PER_UNIT);
+ if (len <= 0
+ || len < (ROUND_UP (pd.size, BITS_PER_UNIT) / BITS_PER_UNIT
+ - MAX (0, -pd.offset) / BITS_PER_UNIT))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Failed to encode %u "
+ "partial definitions\n", ndefs);
+ return (void *)-1;
+ }
+ }
+
+ unsigned char *p = buffer;
+ HOST_WIDE_INT size = pd.size;
+ if (pd.offset < 0)
+ size -= ROUND_DOWN (-pd.offset, BITS_PER_UNIT);
+ this_buffer[len] = 0;
+ if (BYTES_BIG_ENDIAN)
+ {
+ /* LSB of this_buffer[len - 1] byte should be at
+ pd.offset + pd.size - 1 bits in buffer. */
+ amnt = ((unsigned HOST_WIDE_INT) pd.offset
+ + pd.size) % BITS_PER_UNIT;
+ if (amnt)
+ shift_bytes_in_array_right (this_buffer, len + 1, amnt);
+ unsigned char *q = this_buffer;
+ unsigned int off = 0;
+ if (pd.offset >= 0)
+ {
+ unsigned int msk;
+ off = pd.offset / BITS_PER_UNIT;
+ gcc_assert (off < needed_len);
+ p = buffer + off;
+ if (size <= amnt)
+ {
+ msk = ((1 << size) - 1) << (BITS_PER_UNIT - amnt);
+ *p = (*p & ~msk) | (this_buffer[len] & msk);
+ size = 0;
+ }
+ else
+ {
+ if (TREE_CODE (pd.rhs) != CONSTRUCTOR)
+ q = (this_buffer + len
+ - (ROUND_UP (size - amnt, BITS_PER_UNIT)
+ / BITS_PER_UNIT));
+ if (pd.offset % BITS_PER_UNIT)
+ {
+ msk = -1U << (BITS_PER_UNIT
+ - (pd.offset % BITS_PER_UNIT));
+ *p = (*p & msk) | (*q & ~msk);
+ p++;
+ q++;
+ off++;
+ size -= BITS_PER_UNIT - (pd.offset % BITS_PER_UNIT);
+ gcc_assert (size >= 0);
+ }
+ }
+ }
+ else if (TREE_CODE (pd.rhs) != CONSTRUCTOR)
+ {
+ q = (this_buffer + len
+ - (ROUND_UP (size - amnt, BITS_PER_UNIT)
+ / BITS_PER_UNIT));
+ if (pd.offset % BITS_PER_UNIT)
+ {
+ q++;
+ size -= BITS_PER_UNIT - ((unsigned HOST_WIDE_INT) pd.offset
+ % BITS_PER_UNIT);
+ gcc_assert (size >= 0);
+ }
+ }
+ if ((unsigned HOST_WIDE_INT) size / BITS_PER_UNIT + off
+ > needed_len)
+ size = (needed_len - off) * BITS_PER_UNIT;
+ memcpy (p, q, size / BITS_PER_UNIT);
+ if (size % BITS_PER_UNIT)
+ {
+ unsigned int msk
+ = -1U << (BITS_PER_UNIT - (size % BITS_PER_UNIT));
+ p += size / BITS_PER_UNIT;
+ q += size / BITS_PER_UNIT;
+ *p = (*q & msk) | (*p & ~msk);
+ }
+ }
+ else
+ {
+ size = MIN (size, (HOST_WIDE_INT) needed_len * BITS_PER_UNIT);
+ if (pd.offset >= 0)
+ {
+ /* LSB of this_buffer[0] byte should be at pd.offset bits
+ in buffer. */
+ unsigned int msk;
+ amnt = pd.offset % BITS_PER_UNIT;
+ if (amnt)
+ shift_bytes_in_array_left (this_buffer, len + 1, amnt);
+ unsigned int off = pd.offset / BITS_PER_UNIT;
+ gcc_assert (off < needed_len);
+ p = buffer + off;
+ if (amnt + size < BITS_PER_UNIT)
+ {
+ /* Low amnt bits come from *p, then size bits
+ from this_buffer[0] and the remaining again from
+ *p. */
+ msk = ((1 << size) - 1) << amnt;
+ *p = (*p & ~msk) | (this_buffer[0] & msk);
+ size = 0;
+ }
+ else if (amnt)
+ {
+ msk = -1U << amnt;
+ *p = (*p & ~msk) | (this_buffer[0] & msk);
+ p++;
+ size -= (BITS_PER_UNIT - amnt);
+ }
+ }
+ else
+ {
+ amnt = (unsigned HOST_WIDE_INT) pd.offset % BITS_PER_UNIT;
+ if (amnt)
+ shift_bytes_in_array_left (this_buffer, len + 1, amnt);
+ }
+ memcpy (p, this_buffer + (amnt != 0), size / BITS_PER_UNIT);
+ p += size / BITS_PER_UNIT;
+ if (size % BITS_PER_UNIT)
+ {
+ unsigned int msk = -1U << (size % BITS_PER_UNIT);
+ *p = (this_buffer[(amnt != 0) + size / BITS_PER_UNIT]
+ & ~msk) | (*p & msk);
+ }
+ }
+ }
+
+ tree type = vr->type;
+ /* Make sure to interpret in a type that has a range covering the whole
+ access size. */
+ if (INTEGRAL_TYPE_P (vr->type) && maxsizei != TYPE_PRECISION (vr->type))
+ type = build_nonstandard_integer_type (maxsizei, TYPE_UNSIGNED (type));
+ tree val;
+ if (BYTES_BIG_ENDIAN)
+ {
+ unsigned sz = needed_len;
+ if (maxsizei % BITS_PER_UNIT)
+ shift_bytes_in_array_right (buffer, needed_len,
+ BITS_PER_UNIT
+ - (maxsizei % BITS_PER_UNIT));
+ if (INTEGRAL_TYPE_P (type))
+ sz = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (type));
+ if (sz > needed_len)
+ {
+ memcpy (this_buffer + (sz - needed_len), buffer, needed_len);
+ val = native_interpret_expr (type, this_buffer, sz);
+ }
+ else
+ val = native_interpret_expr (type, buffer, needed_len);
+ }
+ else
+ val = native_interpret_expr (type, buffer, bufsize);
+ /* If we chop off bits because the types precision doesn't match the memory
+ access size this is ok when optimizing reads but not when called from
+ the DSE code during elimination. */
+ if (val && type != vr->type)
+ {
+ if (! int_fits_type_p (val, vr->type))
+ val = NULL_TREE;
+ else
+ val = fold_convert (vr->type, val);
+ }
+
+ if (val)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "Successfully combined %u partial definitions\n", ndefs);
+ /* We are using the alias-set of the first store we encounter which
+ should be appropriate here. */
+ return finish (first_set, val);
+ }
+ else
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "Failed to interpret %u encoded partial definitions\n", ndefs);
+ return (void *)-1;
+ }
+}
+
/* Callback for walk_non_aliased_vuses. Adjusts the vn_reference_t VR_
with the current VUSE and performs the expression lookup. */
static void *
-vn_reference_lookup_2 (ao_ref *op ATTRIBUTE_UNUSED, tree vuse,
- unsigned int cnt, void *vr_)
+vn_reference_lookup_2 (ao_ref *op ATTRIBUTE_UNUSED, tree vuse, void *data_)
{
- vn_reference_t vr = (vn_reference_t)vr_;
+ vn_walk_cb_data *data = (vn_walk_cb_data *)data_;
+ vn_reference_t vr = data->vr;
vn_reference_s **slot;
hashval_t hash;
- /* This bounds the stmt walks we perform on reference lookups
- to O(1) instead of O(N) where N is the number of dominating
- stores. */
- if (cnt > (unsigned) PARAM_VALUE (PARAM_SCCVN_MAX_ALIAS_QUERIES_PER_ACCESS))
- return (void *)-1;
+ /* If we have partial definitions recorded we have to go through
+ vn_reference_lookup_3. */
+ if (!data->partial_defs.is_empty ())
+ return NULL;
- if (last_vuse_ptr)
- *last_vuse_ptr = vuse;
+ if (data->last_vuse_ptr)
+ {
+ *data->last_vuse_ptr = vuse;
+ data->last_vuse = vuse;
+ }
/* Fixup vuse and hash. */
if (vr->vuse)
hash = vr->hashcode;
slot = valid_info->references->find_slot_with_hash (vr, hash, NO_INSERT);
if (slot)
- return *slot;
+ {
+ if ((*slot)->result && data->saved_operands.exists ())
+ return data->finish (vr->set, (*slot)->result);
+ return *slot;
+ }
return NULL;
}
RCODE (OPS...).
So first simplify and lookup this expression to see if it
is already available. */
- mprts_hook = vn_lookup_simplify_result;
+ /* For simplification valueize. */
+ unsigned i;
+ for (i = 0; i < res_op->num_ops; ++i)
+ if (TREE_CODE (res_op->ops[i]) == SSA_NAME)
+ {
+ tree tem = vn_valueize (res_op->ops[i]);
+ if (!tem)
+ break;
+ res_op->ops[i] = tem;
+ }
+ /* If valueization of an operand fails (it is not available), skip
+ simplification. */
bool res = false;
- switch (TREE_CODE_LENGTH ((tree_code) res_op->code))
+ if (i == res_op->num_ops)
{
- case 1:
- res = gimple_resimplify1 (NULL, res_op, vn_valueize);
- break;
- case 2:
- res = gimple_resimplify2 (NULL, res_op, vn_valueize);
- break;
- case 3:
- res = gimple_resimplify3 (NULL, res_op, vn_valueize);
- break;
+ mprts_hook = vn_lookup_simplify_result;
+ res = res_op->resimplify (NULL, vn_valueize);
+ mprts_hook = NULL;
}
- mprts_hook = NULL;
gimple *new_stmt = NULL;
if (res
&& gimple_simplified_result_is_gimple_val (res_op))
- /* The expression is already available. */
- result = res_op->ops[0];
+ {
+ /* The expression is already available. */
+ result = res_op->ops[0];
+ /* Valueize it, simplification returns sth in AVAIL only. */
+ if (TREE_CODE (result) == SSA_NAME)
+ result = SSA_VAL (result);
+ }
else
{
tree val = vn_lookup_simplify_result (res_op);
/* The expression is not yet available, value-number lhs to
the new SSA_NAME we created. */
/* Initialize value-number information properly. */
- VN_INFO (result)->valnum = result;
- VN_INFO (result)->value_id = get_next_value_id ();
+ vn_ssa_aux_t result_info = VN_INFO (result);
+ result_info->valnum = result;
+ result_info->value_id = get_next_value_id ();
+ result_info->visited = 1;
gimple_seq_add_stmt_without_update (&VN_INFO (result)->expr,
new_stmt);
- VN_INFO (result)->needs_insertion = true;
+ result_info->needs_insertion = true;
/* ??? PRE phi-translation inserts NARYs without corresponding
SSA name result. Re-use those but set their result according
to the stmt we just built. */
unsigned int length = vn_nary_length_from_stmt (new_stmt);
vn_nary_op_t vno1
= alloc_vn_nary_op_noinit (length, &vn_tables_insert_obstack);
- vno1->value_id = VN_INFO (result)->value_id;
+ vno1->value_id = result_info->value_id;
vno1->length = length;
vno1->predicated_values = 0;
vno1->u.result = result;
return vn_nary_build_or_lookup_1 (&op, false);
}
+/* Elimination engine. */
+
+class eliminate_dom_walker : public dom_walker
+{
+public:
+ eliminate_dom_walker (cdi_direction, bitmap);
+ ~eliminate_dom_walker ();
+
+ virtual edge before_dom_children (basic_block);
+ virtual void after_dom_children (basic_block);
+
+ virtual tree eliminate_avail (basic_block, tree op);
+ virtual void eliminate_push_avail (basic_block, tree op);
+ tree eliminate_insert (basic_block, gimple_stmt_iterator *gsi, tree val);
+
+ void eliminate_stmt (basic_block, gimple_stmt_iterator *);
+
+ unsigned eliminate_cleanup (bool region_p = false);
+
+ bool do_pre;
+ unsigned int el_todo;
+ unsigned int eliminations;
+ unsigned int insertions;
+
+ /* SSA names that had their defs inserted by PRE if do_pre. */
+ bitmap inserted_exprs;
+
+ /* Blocks with statements that have had their EH properties changed. */
+ bitmap need_eh_cleanup;
+
+ /* Blocks with statements that have had their AB properties changed. */
+ bitmap need_ab_cleanup;
+
+ /* Local state for the eliminate domwalk. */
+ auto_vec<gimple *> to_remove;
+ auto_vec<gimple *> to_fixup;
+ auto_vec<tree> avail;
+ auto_vec<tree> avail_stack;
+};
+
+/* Adaptor to the elimination engine using RPO availability. */
+
+class rpo_elim : public eliminate_dom_walker
+{
+public:
+ rpo_elim(basic_block entry_)
+ : eliminate_dom_walker (CDI_DOMINATORS, NULL), entry (entry_),
+ m_avail_freelist (NULL) {}
+
+ virtual tree eliminate_avail (basic_block, tree op);
+
+ virtual void eliminate_push_avail (basic_block, tree);
+
+ basic_block entry;
+ /* Freelist of avail entries which are allocated from the vn_ssa_aux
+ obstack. */
+ vn_avail *m_avail_freelist;
+};
+
+/* Global RPO state for access from hooks. */
+static rpo_elim *rpo_avail;
basic_block vn_context_bb;
+/* Return true if BASE1 and BASE2 can be adjusted so they have the
+ same address and adjust *OFFSET1 and *OFFSET2 accordingly.
+ Otherwise return false. */
+
+static bool
+adjust_offsets_for_equal_base_address (tree base1, poly_int64 *offset1,
+ tree base2, poly_int64 *offset2)
+{
+ poly_int64 soff;
+ if (TREE_CODE (base1) == MEM_REF
+ && TREE_CODE (base2) == MEM_REF)
+ {
+ if (mem_ref_offset (base1).to_shwi (&soff))
+ {
+ base1 = TREE_OPERAND (base1, 0);
+ *offset1 += soff * BITS_PER_UNIT;
+ }
+ if (mem_ref_offset (base2).to_shwi (&soff))
+ {
+ base2 = TREE_OPERAND (base2, 0);
+ *offset2 += soff * BITS_PER_UNIT;
+ }
+ return operand_equal_p (base1, base2, 0);
+ }
+ return operand_equal_p (base1, base2, OEP_ADDRESS_OF);
+}
+
/* Callback for walk_non_aliased_vuses. Tries to perform a lookup
from the statement defining VUSE and if not successful tries to
translate *REFP and VR_ through an aggregate copy at the definition
*DISAMBIGUATE_ONLY is set to true. */
static void *
-vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *vr_,
- bool *disambiguate_only)
+vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
+ translate_flags *disambiguate_only)
{
- vn_reference_t vr = (vn_reference_t)vr_;
+ vn_walk_cb_data *data = (vn_walk_cb_data *)data_;
+ vn_reference_t vr = data->vr;
gimple *def_stmt = SSA_NAME_DEF_STMT (vuse);
tree base = ao_ref_base (ref);
- HOST_WIDE_INT offseti, maxsizei;
+ HOST_WIDE_INT offseti = 0, maxsizei, sizei = 0;
static vec<vn_reference_op_s> lhs_ops;
ao_ref lhs_ref;
bool lhs_ref_ok = false;
poly_int64 copy_size;
- /* If the reference is based on a parameter that was determined as
- pointing to readonly memory it doesn't change. */
- if (TREE_CODE (base) == MEM_REF
- && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
- && SSA_NAME_IS_DEFAULT_DEF (TREE_OPERAND (base, 0))
- && bitmap_bit_p (const_parms,
- SSA_NAME_VERSION (TREE_OPERAND (base, 0))))
- {
- *disambiguate_only = true;
- return NULL;
- }
-
/* First try to disambiguate after value-replacing in the definitions LHS. */
if (is_gimple_assign (def_stmt))
{
lhs_ops.truncate (0);
basic_block saved_rpo_bb = vn_context_bb;
vn_context_bb = gimple_bb (def_stmt);
- copy_reference_ops_from_ref (lhs, &lhs_ops);
- lhs_ops = valueize_refs_1 (lhs_ops, &valueized_anything, true);
+ if (*disambiguate_only <= TR_VALUEIZE_AND_DISAMBIGUATE)
+ {
+ copy_reference_ops_from_ref (lhs, &lhs_ops);
+ lhs_ops = valueize_refs_1 (lhs_ops, &valueized_anything, true);
+ }
vn_context_bb = saved_rpo_bb;
if (valueized_anything)
{
get_alias_set (lhs),
TREE_TYPE (lhs), lhs_ops);
if (lhs_ref_ok
- && !refs_may_alias_p_1 (ref, &lhs_ref, true))
+ && !refs_may_alias_p_1 (ref, &lhs_ref, data->tbaa_p))
{
- *disambiguate_only = true;
+ *disambiguate_only = TR_VALUEIZE_AND_DISAMBIGUATE;
return NULL;
}
}
lhs_ref_ok = true;
}
+ /* Besides valueizing the LHS we can also use access-path based
+ disambiguation on the original non-valueized ref. */
+ if (!ref->ref
+ && lhs_ref_ok
+ && data->orig_ref.ref)
+ {
+ /* We want to use the non-valueized LHS for this, but avoid redundant
+ work. */
+ ao_ref *lref = &lhs_ref;
+ ao_ref lref_alt;
+ if (valueized_anything)
+ {
+ ao_ref_init (&lref_alt, lhs);
+ lref = &lref_alt;
+ }
+ if (!refs_may_alias_p_1 (&data->orig_ref, lref, data->tbaa_p))
+ {
+ *disambiguate_only = (valueized_anything
+ ? TR_VALUEIZE_AND_DISAMBIGUATE
+ : TR_DISAMBIGUATE);
+ return NULL;
+ }
+ }
+
/* If we reach a clobbering statement try to skip it and see if
we find a VN result with exactly the same value as the
possible clobber. In this case we can ignore the clobber
- and return the found value.
- Note that we don't need to worry about partial overlapping
- accesses as we then can use TBAA to disambiguate against the
- clobbering statement when looking up a load (thus the
- VN_WALKREWRITE guard). */
- if (vn_walk_kind == VN_WALKREWRITE
- && is_gimple_reg_type (TREE_TYPE (lhs))
- && types_compatible_p (TREE_TYPE (lhs), vr->type))
+ and return the found value. */
+ if (is_gimple_reg_type (TREE_TYPE (lhs))
+ && types_compatible_p (TREE_TYPE (lhs), vr->type)
+ && ref->ref)
{
- tree *saved_last_vuse_ptr = last_vuse_ptr;
+ tree *saved_last_vuse_ptr = data->last_vuse_ptr;
/* Do not update last_vuse_ptr in vn_reference_lookup_2. */
- last_vuse_ptr = NULL;
+ data->last_vuse_ptr = NULL;
tree saved_vuse = vr->vuse;
hashval_t saved_hashcode = vr->hashcode;
- void *res = vn_reference_lookup_2 (ref,
- gimple_vuse (def_stmt), 0, vr);
+ void *res = vn_reference_lookup_2 (ref, gimple_vuse (def_stmt), data);
/* Need to restore vr->vuse and vr->hashcode. */
vr->vuse = saved_vuse;
vr->hashcode = saved_hashcode;
- last_vuse_ptr = saved_last_vuse_ptr;
+ data->last_vuse_ptr = saved_last_vuse_ptr;
if (res && res != (void *)-1)
{
vn_reference_t vnresult = (vn_reference_t) res;
+ tree rhs = gimple_assign_rhs1 (def_stmt);
+ if (TREE_CODE (rhs) == SSA_NAME)
+ rhs = SSA_VAL (rhs);
if (vnresult->result
- && operand_equal_p (vnresult->result,
- gimple_assign_rhs1 (def_stmt), 0))
+ && operand_equal_p (vnresult->result, rhs, 0)
+ /* We have to honor our promise about union type punning
+ and also support arbitrary overlaps with
+ -fno-strict-aliasing. So simply resort to alignment to
+ rule out overlaps. Do this check last because it is
+ quite expensive compared to the hash-lookup above. */
+ && multiple_p (get_object_alignment (ref->ref), ref->size)
+ && multiple_p (get_object_alignment (lhs), ref->size))
return res;
}
}
}
- else if (gimple_call_builtin_p (def_stmt, BUILT_IN_NORMAL)
+ else if (*disambiguate_only <= TR_VALUEIZE_AND_DISAMBIGUATE
+ && gimple_call_builtin_p (def_stmt, BUILT_IN_NORMAL)
&& gimple_call_num_args (def_stmt) <= 4)
{
/* For builtin calls valueize its arguments and call the
gimple_call_set_arg (def_stmt, i, oldargs[i]);
if (!res)
{
- *disambiguate_only = true;
+ *disambiguate_only = TR_VALUEIZE_AND_DISAMBIGUATE;
return NULL;
}
}
}
- if (*disambiguate_only)
+ if (*disambiguate_only > TR_TRANSLATE)
return (void *)-1;
/* If we cannot constrain the size of the reference we cannot
poly_int64 offset = ref->offset;
poly_int64 maxsize = ref->max_size;
- /* We can't deduce anything useful from clobbers. */
- if (gimple_clobber_p (def_stmt))
- return (void *)-1;
-
/* def_stmt may-defs *ref. See if we can derive a value for *ref
from that definition.
1) Memset. */
if (is_gimple_reg_type (vr->type)
- && gimple_call_builtin_p (def_stmt, BUILT_IN_MEMSET)
+ && (gimple_call_builtin_p (def_stmt, BUILT_IN_MEMSET)
+ || gimple_call_builtin_p (def_stmt, BUILT_IN_MEMSET_CHK))
&& (integer_zerop (gimple_call_arg (def_stmt, 1))
|| ((TREE_CODE (gimple_call_arg (def_stmt, 1)) == INTEGER_CST
|| (INTEGRAL_TYPE_P (vr->type) && known_eq (ref->size, 8)))
- && CHAR_BIT == 8 && BITS_PER_UNIT == 8
+ && CHAR_BIT == 8
+ && BITS_PER_UNIT == 8
+ && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
&& offset.is_constant (&offseti)
- && offseti % BITS_PER_UNIT == 0))
- && poly_int_tree_p (gimple_call_arg (def_stmt, 2))
+ && ref->size.is_constant (&sizei)
+ && (offseti % BITS_PER_UNIT == 0
+ || TREE_CODE (gimple_call_arg (def_stmt, 1)) == INTEGER_CST)))
+ && (poly_int_tree_p (gimple_call_arg (def_stmt, 2))
+ || (TREE_CODE (gimple_call_arg (def_stmt, 2)) == SSA_NAME
+ && poly_int_tree_p (SSA_VAL (gimple_call_arg (def_stmt, 2)))))
&& (TREE_CODE (gimple_call_arg (def_stmt, 0)) == ADDR_EXPR
|| TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME))
{
else
return (void *)-1;
tree len = gimple_call_arg (def_stmt, 2);
- if (known_subrange_p (offset, maxsize, offset2,
- wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT))
+ HOST_WIDE_INT leni, offset2i;
+ if (TREE_CODE (len) == SSA_NAME)
+ len = SSA_VAL (len);
+ /* Sometimes the above trickery is smarter than alias analysis. Take
+ advantage of that. */
+ if (!ranges_maybe_overlap_p (offset, maxsize, offset2,
+ (wi::to_poly_offset (len)
+ << LOG2_BITS_PER_UNIT)))
+ return NULL;
+ if (data->partial_defs.is_empty ()
+ && known_subrange_p (offset, maxsize, offset2,
+ wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT))
{
tree val;
if (integer_zerop (gimple_call_arg (def_stmt, 1)))
val = build_zero_cst (vr->type);
else if (INTEGRAL_TYPE_P (vr->type)
- && known_eq (ref->size, 8))
+ && known_eq (ref->size, 8)
+ && offseti % BITS_PER_UNIT == 0)
{
gimple_match_op res_op (gimple_match_cond::UNCOND, NOP_EXPR,
vr->type, gimple_call_arg (def_stmt, 1));
}
else
{
- unsigned len = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (vr->type));
- unsigned char *buf = XALLOCAVEC (unsigned char, len);
+ unsigned buflen = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (vr->type)) + 1;
+ if (INTEGRAL_TYPE_P (vr->type))
+ buflen = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (vr->type)) + 1;
+ unsigned char *buf = XALLOCAVEC (unsigned char, buflen);
memset (buf, TREE_INT_CST_LOW (gimple_call_arg (def_stmt, 1)),
- len);
- val = native_interpret_expr (vr->type, buf, len);
+ buflen);
+ if (BYTES_BIG_ENDIAN)
+ {
+ unsigned int amnt
+ = (((unsigned HOST_WIDE_INT) offseti + sizei)
+ % BITS_PER_UNIT);
+ if (amnt)
+ {
+ shift_bytes_in_array_right (buf, buflen,
+ BITS_PER_UNIT - amnt);
+ buf++;
+ buflen--;
+ }
+ }
+ else if (offseti % BITS_PER_UNIT != 0)
+ {
+ unsigned int amnt
+ = BITS_PER_UNIT - ((unsigned HOST_WIDE_INT) offseti
+ % BITS_PER_UNIT);
+ shift_bytes_in_array_left (buf, buflen, amnt);
+ buf++;
+ buflen--;
+ }
+ val = native_interpret_expr (vr->type, buf, buflen);
if (!val)
return (void *)-1;
}
- return vn_reference_lookup_or_insert_for_pieces
- (vuse, vr->set, vr->type, vr->operands, val);
+ return data->finish (0, val);
+ }
+ /* For now handle clearing memory with partial defs. */
+ else if (known_eq (ref->size, maxsize)
+ && integer_zerop (gimple_call_arg (def_stmt, 1))
+ && tree_fits_poly_int64_p (len)
+ && tree_to_poly_int64 (len).is_constant (&leni)
+ && leni <= INTTYPE_MAXIMUM (HOST_WIDE_INT) / BITS_PER_UNIT
+ && offset.is_constant (&offseti)
+ && offset2.is_constant (&offset2i)
+ && maxsize.is_constant (&maxsizei)
+ && ranges_known_overlap_p (offseti, maxsizei, offset2i,
+ leni << LOG2_BITS_PER_UNIT))
+ {
+ pd_data pd;
+ pd.rhs = build_constructor (NULL_TREE, NULL);
+ pd.offset = offset2i - offseti;
+ pd.size = leni << LOG2_BITS_PER_UNIT;
+ return data->push_partial_def (pd, 0, maxsizei);
}
}
&& gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR
&& CONSTRUCTOR_NELTS (gimple_assign_rhs1 (def_stmt)) == 0)
{
+ tree lhs = gimple_assign_lhs (def_stmt);
tree base2;
poly_int64 offset2, size2, maxsize2;
+ HOST_WIDE_INT offset2i, size2i;
bool reverse;
- base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
- &offset2, &size2, &maxsize2, &reverse);
+ if (lhs_ref_ok)
+ {
+ base2 = ao_ref_base (&lhs_ref);
+ offset2 = lhs_ref.offset;
+ size2 = lhs_ref.size;
+ maxsize2 = lhs_ref.max_size;
+ reverse = reverse_storage_order_for_component_p (lhs);
+ }
+ else
+ base2 = get_ref_base_and_extent (lhs,
+ &offset2, &size2, &maxsize2, &reverse);
if (known_size_p (maxsize2)
- && operand_equal_p (base, base2, 0)
- && known_subrange_p (offset, maxsize, offset2, size2))
+ && known_eq (maxsize2, size2)
+ && adjust_offsets_for_equal_base_address (base, &offset,
+ base2, &offset2))
{
- tree val = build_zero_cst (vr->type);
- return vn_reference_lookup_or_insert_for_pieces
- (vuse, vr->set, vr->type, vr->operands, val);
+ if (data->partial_defs.is_empty ()
+ && known_subrange_p (offset, maxsize, offset2, size2))
+ {
+ /* While technically undefined behavior do not optimize
+ a full read from a clobber. */
+ if (gimple_clobber_p (def_stmt))
+ return (void *)-1;
+ tree val = build_zero_cst (vr->type);
+ return data->finish (get_alias_set (lhs), val);
+ }
+ else if (known_eq (ref->size, maxsize)
+ && maxsize.is_constant (&maxsizei)
+ && offset.is_constant (&offseti)
+ && offset2.is_constant (&offset2i)
+ && size2.is_constant (&size2i)
+ && ranges_known_overlap_p (offseti, maxsizei,
+ offset2i, size2i))
+ {
+ /* Let clobbers be consumed by the partial-def tracker
+ which can choose to ignore them if they are shadowed
+ by a later def. */
+ pd_data pd;
+ pd.rhs = gimple_assign_rhs1 (def_stmt);
+ pd.offset = offset2i - offseti;
+ pd.size = size2i;
+ return data->push_partial_def (pd, get_alias_set (lhs), maxsizei);
+ }
}
}
&& is_gimple_reg_type (vr->type)
&& !contains_storage_order_barrier_p (vr->operands)
&& gimple_assign_single_p (def_stmt)
- && CHAR_BIT == 8 && BITS_PER_UNIT == 8
+ && CHAR_BIT == 8
+ && BITS_PER_UNIT == 8
+ && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
/* native_encode and native_decode operate on arrays of bytes
and so fundamentally need a compile-time size and offset. */
&& maxsize.is_constant (&maxsizei)
- && maxsizei % BITS_PER_UNIT == 0
&& offset.is_constant (&offseti)
- && offseti % BITS_PER_UNIT == 0
&& (is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt))
|| (TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
&& is_gimple_min_invariant (SSA_VAL (gimple_assign_rhs1 (def_stmt))))))
{
+ tree lhs = gimple_assign_lhs (def_stmt);
tree base2;
- HOST_WIDE_INT offset2, size2;
+ poly_int64 offset2, size2, maxsize2;
+ HOST_WIDE_INT offset2i, size2i;
bool reverse;
- base2 = get_ref_base_and_extent_hwi (gimple_assign_lhs (def_stmt),
- &offset2, &size2, &reverse);
+ if (lhs_ref_ok)
+ {
+ base2 = ao_ref_base (&lhs_ref);
+ offset2 = lhs_ref.offset;
+ size2 = lhs_ref.size;
+ maxsize2 = lhs_ref.max_size;
+ reverse = reverse_storage_order_for_component_p (lhs);
+ }
+ else
+ base2 = get_ref_base_and_extent (lhs,
+ &offset2, &size2, &maxsize2, &reverse);
if (base2
&& !reverse
- && size2 % BITS_PER_UNIT == 0
- && offset2 % BITS_PER_UNIT == 0
- && operand_equal_p (base, base2, 0)
- && known_subrange_p (offseti, maxsizei, offset2, size2))
+ && !storage_order_barrier_p (lhs)
+ && known_eq (maxsize2, size2)
+ && adjust_offsets_for_equal_base_address (base, &offset,
+ base2, &offset2)
+ && offset.is_constant (&offseti)
+ && offset2.is_constant (&offset2i)
+ && size2.is_constant (&size2i))
{
- /* We support up to 512-bit values (for V8DFmode). */
- unsigned char buffer[64];
- int len;
-
- tree rhs = gimple_assign_rhs1 (def_stmt);
- if (TREE_CODE (rhs) == SSA_NAME)
- rhs = SSA_VAL (rhs);
- len = native_encode_expr (gimple_assign_rhs1 (def_stmt),
- buffer, sizeof (buffer),
- (offseti - offset2) / BITS_PER_UNIT);
- if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
+ if (data->partial_defs.is_empty ()
+ && known_subrange_p (offseti, maxsizei, offset2, size2))
{
- tree type = vr->type;
- /* Make sure to interpret in a type that has a range
- covering the whole access size. */
- if (INTEGRAL_TYPE_P (vr->type)
- && maxsizei != TYPE_PRECISION (vr->type))
- type = build_nonstandard_integer_type (maxsizei,
- TYPE_UNSIGNED (type));
- tree val = native_interpret_expr (type, buffer,
- maxsizei / BITS_PER_UNIT);
- /* If we chop off bits because the types precision doesn't
- match the memory access size this is ok when optimizing
- reads but not when called from the DSE code during
- elimination. */
- if (val
- && type != vr->type)
+ /* We support up to 512-bit values (for V8DFmode). */
+ unsigned char buffer[65];
+ int len;
+
+ tree rhs = gimple_assign_rhs1 (def_stmt);
+ if (TREE_CODE (rhs) == SSA_NAME)
+ rhs = SSA_VAL (rhs);
+ len = native_encode_expr (rhs,
+ buffer, sizeof (buffer) - 1,
+ (offseti - offset2i) / BITS_PER_UNIT);
+ if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
{
- if (! int_fits_type_p (val, vr->type))
- val = NULL_TREE;
+ tree type = vr->type;
+ unsigned char *buf = buffer;
+ unsigned int amnt = 0;
+ /* Make sure to interpret in a type that has a range
+ covering the whole access size. */
+ if (INTEGRAL_TYPE_P (vr->type)
+ && maxsizei != TYPE_PRECISION (vr->type))
+ type = build_nonstandard_integer_type (maxsizei,
+ TYPE_UNSIGNED (type));
+ if (BYTES_BIG_ENDIAN)
+ {
+ /* For big-endian native_encode_expr stored the rhs
+ such that the LSB of it is the LSB of buffer[len - 1].
+ That bit is stored into memory at position
+ offset2 + size2 - 1, i.e. in byte
+ base + (offset2 + size2 - 1) / BITS_PER_UNIT.
+ E.g. for offset2 1 and size2 14, rhs -1 and memory
+ previously cleared that is:
+ 0 1
+ 01111111|11111110
+ Now, if we want to extract offset 2 and size 12 from
+ it using native_interpret_expr (which actually works
+ for integral bitfield types in terms of byte size of
+ the mode), the native_encode_expr stored the value
+ into buffer as
+ XX111111|11111111
+ and returned len 2 (the X bits are outside of
+ precision).
+ Let sz be maxsize / BITS_PER_UNIT if not extracting
+ a bitfield, and GET_MODE_SIZE otherwise.
+ We need to align the LSB of the value we want to
+ extract as the LSB of buf[sz - 1].
+ The LSB from memory we need to read is at position
+ offset + maxsize - 1. */
+ HOST_WIDE_INT sz = maxsizei / BITS_PER_UNIT;
+ if (INTEGRAL_TYPE_P (type))
+ sz = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (type));
+ amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i
+ - offseti - maxsizei) % BITS_PER_UNIT;
+ if (amnt)
+ shift_bytes_in_array_right (buffer, len, amnt);
+ amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i
+ - offseti - maxsizei - amnt) / BITS_PER_UNIT;
+ if ((unsigned HOST_WIDE_INT) sz + amnt > (unsigned) len)
+ len = 0;
+ else
+ {
+ buf = buffer + len - sz - amnt;
+ len -= (buf - buffer);
+ }
+ }
else
- val = fold_convert (vr->type, val);
- }
+ {
+ amnt = ((unsigned HOST_WIDE_INT) offset2i
+ - offseti) % BITS_PER_UNIT;
+ if (amnt)
+ {
+ buffer[len] = 0;
+ shift_bytes_in_array_left (buffer, len + 1, amnt);
+ buf = buffer + 1;
+ }
+ }
+ tree val = native_interpret_expr (type, buf, len);
+ /* If we chop off bits because the types precision doesn't
+ match the memory access size this is ok when optimizing
+ reads but not when called from the DSE code during
+ elimination. */
+ if (val
+ && type != vr->type)
+ {
+ if (! int_fits_type_p (val, vr->type))
+ val = NULL_TREE;
+ else
+ val = fold_convert (vr->type, val);
+ }
- if (val)
- return vn_reference_lookup_or_insert_for_pieces
- (vuse, vr->set, vr->type, vr->operands, val);
+ if (val)
+ return data->finish (get_alias_set (lhs), val);
+ }
+ }
+ else if (ranges_known_overlap_p (offseti, maxsizei, offset2i,
+ size2i))
+ {
+ pd_data pd;
+ tree rhs = gimple_assign_rhs1 (def_stmt);
+ if (TREE_CODE (rhs) == SSA_NAME)
+ rhs = SSA_VAL (rhs);
+ pd.rhs = rhs;
+ pd.offset = offset2i - offseti;
+ pd.size = size2i;
+ return data->push_partial_def (pd, get_alias_set (lhs), maxsizei);
}
}
}
/* 4) Assignment from an SSA name which definition we may be able
- to access pieces from. */
+ to access pieces from or we can combine to a larger entity. */
else if (known_eq (ref->size, maxsize)
&& is_gimple_reg_type (vr->type)
&& !contains_storage_order_barrier_p (vr->operands)
&& gimple_assign_single_p (def_stmt)
&& TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
{
+ tree lhs = gimple_assign_lhs (def_stmt);
tree base2;
poly_int64 offset2, size2, maxsize2;
+ HOST_WIDE_INT offset2i, size2i, offseti;
bool reverse;
- base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
- &offset2, &size2, &maxsize2,
- &reverse);
+ if (lhs_ref_ok)
+ {
+ base2 = ao_ref_base (&lhs_ref);
+ offset2 = lhs_ref.offset;
+ size2 = lhs_ref.size;
+ maxsize2 = lhs_ref.max_size;
+ reverse = reverse_storage_order_for_component_p (lhs);
+ }
+ else
+ base2 = get_ref_base_and_extent (lhs,
+ &offset2, &size2, &maxsize2, &reverse);
+ tree def_rhs = gimple_assign_rhs1 (def_stmt);
if (!reverse
+ && !storage_order_barrier_p (lhs)
&& known_size_p (maxsize2)
&& known_eq (maxsize2, size2)
- && operand_equal_p (base, base2, 0)
- && known_subrange_p (offset, maxsize, offset2, size2)
- /* ??? We can't handle bitfield precision extracts without
- either using an alternate type for the BIT_FIELD_REF and
- then doing a conversion or possibly adjusting the offset
- according to endianness. */
- && (! INTEGRAL_TYPE_P (vr->type)
- || known_eq (ref->size, TYPE_PRECISION (vr->type)))
- && multiple_p (ref->size, BITS_PER_UNIT))
+ && adjust_offsets_for_equal_base_address (base, &offset,
+ base2, &offset2))
{
- gimple_match_op op (gimple_match_cond::UNCOND,
- BIT_FIELD_REF, vr->type,
- vn_valueize (gimple_assign_rhs1 (def_stmt)),
- bitsize_int (ref->size),
- bitsize_int (offset - offset2));
- tree val = vn_nary_build_or_lookup (&op);
- if (val
- && (TREE_CODE (val) != SSA_NAME
- || ! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (val)))
+ if (data->partial_defs.is_empty ()
+ && known_subrange_p (offset, maxsize, offset2, size2)
+ /* ??? We can't handle bitfield precision extracts without
+ either using an alternate type for the BIT_FIELD_REF and
+ then doing a conversion or possibly adjusting the offset
+ according to endianness. */
+ && (! INTEGRAL_TYPE_P (vr->type)
+ || known_eq (ref->size, TYPE_PRECISION (vr->type)))
+ && multiple_p (ref->size, BITS_PER_UNIT))
{
- vn_reference_t res = vn_reference_lookup_or_insert_for_pieces
- (vuse, vr->set, vr->type, vr->operands, val);
- return res;
+ tree val = NULL_TREE;
+ if (! INTEGRAL_TYPE_P (TREE_TYPE (def_rhs))
+ || type_has_mode_precision_p (TREE_TYPE (def_rhs)))
+ {
+ gimple_match_op op (gimple_match_cond::UNCOND,
+ BIT_FIELD_REF, vr->type,
+ SSA_VAL (def_rhs),
+ bitsize_int (ref->size),
+ bitsize_int (offset - offset2));
+ val = vn_nary_build_or_lookup (&op);
+ }
+ else if (known_eq (ref->size, size2))
+ {
+ gimple_match_op op (gimple_match_cond::UNCOND,
+ VIEW_CONVERT_EXPR, vr->type,
+ SSA_VAL (def_rhs));
+ val = vn_nary_build_or_lookup (&op);
+ }
+ if (val
+ && (TREE_CODE (val) != SSA_NAME
+ || ! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (val)))
+ return data->finish (get_alias_set (lhs), val);
+ }
+ else if (maxsize.is_constant (&maxsizei)
+ && offset.is_constant (&offseti)
+ && offset2.is_constant (&offset2i)
+ && size2.is_constant (&size2i)
+ && ranges_known_overlap_p (offset, maxsize, offset2, size2))
+ {
+ pd_data pd;
+ pd.rhs = SSA_VAL (def_rhs);
+ pd.offset = offset2i - offseti;
+ pd.size = size2i;
+ return data->push_partial_def (pd, get_alias_set (lhs), maxsizei);
}
}
}
/* 5) For aggregate copies translate the reference through them if
the copy kills ref. */
- else if (vn_walk_kind == VN_WALKREWRITE
+ else if (data->vn_walk_kind == VN_WALKREWRITE
&& gimple_assign_single_p (def_stmt)
&& (DECL_P (gimple_assign_rhs1 (def_stmt))
|| TREE_CODE (gimple_assign_rhs1 (def_stmt)) == MEM_REF
|| handled_component_p (gimple_assign_rhs1 (def_stmt))))
{
+ tree lhs = gimple_assign_lhs (def_stmt);
tree base2;
int i, j, k;
auto_vec<vn_reference_op_s> rhs;
}
/* Now re-write REF to be based on the rhs of the assignment. */
- copy_reference_ops_from_ref (gimple_assign_rhs1 (def_stmt), &rhs);
+ tree rhs1 = gimple_assign_rhs1 (def_stmt);
+ copy_reference_ops_from_ref (rhs1, &rhs);
/* Apply an extra offset to the inner MEM_REF of the RHS. */
if (maybe_ne (extra_off, 0))
extra_off));
}
+ /* Save the operands since we need to use the original ones for
+ the hash entry we use. */
+ if (!data->saved_operands.exists ())
+ data->saved_operands = vr->operands.copy ();
+
/* We need to pre-pend vr->operands[0..i] to rhs. */
vec<vn_reference_op_s> old = vr->operands;
if (i + 1 + rhs.length () > vr->operands.length ())
/* Try folding the new reference to a constant. */
tree val = fully_constant_vn_reference_p (vr);
if (val)
- return vn_reference_lookup_or_insert_for_pieces
- (vuse, vr->set, vr->type, vr->operands, val);
+ {
+ if (data->partial_defs.is_empty ())
+ return data->finish (get_alias_set (lhs), val);
+ /* This is the only interesting case for partial-def handling
+ coming from targets that like to gimplify init-ctors as
+ aggregate copies from constant data like aarch64 for
+ PR83518. */
+ if (maxsize.is_constant (&maxsizei) && known_eq (ref->size, maxsize))
+ {
+ pd_data pd;
+ pd.rhs = val;
+ pd.offset = 0;
+ pd.size = maxsizei;
+ return data->push_partial_def (pd, get_alias_set (lhs),
+ maxsizei);
+ }
+ }
+
+ /* Continuing with partial defs isn't easily possible here, we
+ have to find a full def from further lookups from here. Probably
+ not worth the special-casing everywhere. */
+ if (!data->partial_defs.is_empty ())
+ return (void *)-1;
/* Adjust *ref from the new operands. */
- if (!ao_ref_init_from_vn_reference (&r, vr->set, vr->type, vr->operands))
+ if (!ao_ref_init_from_vn_reference (&r, get_alias_set (rhs1),
+ vr->type, vr->operands))
return (void *)-1;
/* This can happen with bitfields. */
if (maybe_ne (ref->size, r.size))
*ref = r;
/* Do not update last seen VUSE after translating. */
- last_vuse_ptr = NULL;
+ data->last_vuse_ptr = NULL;
+ /* Invalidate the original access path since it now contains
+ the wrong base. */
+ data->orig_ref.ref = NULL_TREE;
+ /* Use the alias-set of this LHS for recording an eventual result. */
+ if (data->first_set == -2)
+ data->first_set = get_alias_set (lhs);
/* Keep looking for the adjusted *REF / VR pair. */
return NULL;
/* 6) For memcpy copies translate the reference through them if
the copy kills ref. */
- else if (vn_walk_kind == VN_WALKREWRITE
+ else if (data->vn_walk_kind == VN_WALKREWRITE
&& is_gimple_reg_type (vr->type)
/* ??? Handle BCOPY as well. */
&& (gimple_call_builtin_p (def_stmt, BUILT_IN_MEMCPY)
+ || gimple_call_builtin_p (def_stmt, BUILT_IN_MEMCPY_CHK)
|| gimple_call_builtin_p (def_stmt, BUILT_IN_MEMPCPY)
- || gimple_call_builtin_p (def_stmt, BUILT_IN_MEMMOVE))
+ || gimple_call_builtin_p (def_stmt, BUILT_IN_MEMPCPY_CHK)
+ || gimple_call_builtin_p (def_stmt, BUILT_IN_MEMMOVE)
+ || gimple_call_builtin_p (def_stmt, BUILT_IN_MEMMOVE_CHK))
&& (TREE_CODE (gimple_call_arg (def_stmt, 0)) == ADDR_EXPR
|| TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME)
&& (TREE_CODE (gimple_call_arg (def_stmt, 1)) == ADDR_EXPR
|| TREE_CODE (gimple_call_arg (def_stmt, 1)) == SSA_NAME)
- && poly_int_tree_p (gimple_call_arg (def_stmt, 2), ©_size))
+ && (poly_int_tree_p (gimple_call_arg (def_stmt, 2), ©_size)
+ || (TREE_CODE (gimple_call_arg (def_stmt, 2)) == SSA_NAME
+ && poly_int_tree_p (SSA_VAL (gimple_call_arg (def_stmt, 2)),
+ ©_size)))
+ /* Handling this is more complicated, give up for now. */
+ && data->partial_defs.is_empty ())
{
tree lhs, rhs;
ao_ref r;
else
return (void *)-1;
}
- if (TREE_CODE (rhs) != SSA_NAME
- && TREE_CODE (rhs) != ADDR_EXPR)
+ if (TREE_CODE (rhs) == SSA_NAME)
+ rhs = SSA_VAL (rhs);
+ else if (TREE_CODE (rhs) != ADDR_EXPR)
return (void *)-1;
/* The bases of the destination and the references have to agree. */
if (!known_subrange_p (at, byte_maxsize, lhs_offset, copy_size))
return (void *)-1;
+ /* Save the operands since we need to use the original ones for
+ the hash entry we use. */
+ if (!data->saved_operands.exists ())
+ data->saved_operands = vr->operands.copy ();
+
/* Make room for 2 operands in the new reference. */
if (vr->operands.length () < 2)
{
/* Try folding the new reference to a constant. */
tree val = fully_constant_vn_reference_p (vr);
if (val)
- return vn_reference_lookup_or_insert_for_pieces
- (vuse, vr->set, vr->type, vr->operands, val);
+ return data->finish (0, val);
/* Adjust *ref from the new operands. */
- if (!ao_ref_init_from_vn_reference (&r, vr->set, vr->type, vr->operands))
+ if (!ao_ref_init_from_vn_reference (&r, 0, vr->type, vr->operands))
return (void *)-1;
/* This can happen with bitfields. */
if (maybe_ne (ref->size, r.size))
*ref = r;
/* Do not update last seen VUSE after translating. */
- last_vuse_ptr = NULL;
+ data->last_vuse_ptr = NULL;
+ /* Invalidate the original access path since it now contains
+ the wrong base. */
+ data->orig_ref.ref = NULL_TREE;
+ /* Use the alias-set of this stmt for recording an eventual result. */
+ if (data->first_set == -2)
+ data->first_set = 0;
/* Keep looking for the adjusted *REF / VR pair. */
return NULL;
&& vr1.vuse)
{
ao_ref r;
- vn_walk_kind = kind;
+ unsigned limit = param_sccvn_max_alias_queries_per_access;
+ vn_walk_cb_data data (&vr1, NULL_TREE, NULL, kind, true);
if (ao_ref_init_from_vn_reference (&r, set, type, vr1.operands))
*vnresult =
- (vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse,
+ (vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse, true,
vn_reference_lookup_2,
vn_reference_lookup_3,
- vuse_ssa_val, &vr1);
+ vuse_valueize, limit, &data);
gcc_checking_assert (vr1.operands == shared_lookup_references);
}
not exist in the hash table or if the result field of the structure
was NULL.. VNRESULT will be filled in with the vn_reference_t
stored in the hashtable if one exists. When TBAA_P is false assume
- we are looking up a store and treat it as having alias-set zero. */
+ we are looking up a store and treat it as having alias-set zero.
+ *LAST_VUSE_PTR will be updated with the VUSE the value lookup succeeded. */
tree
vn_reference_lookup (tree op, tree vuse, vn_lookup_kind kind,
- vn_reference_t *vnresult, bool tbaa_p)
+ vn_reference_t *vnresult, bool tbaa_p, tree *last_vuse_ptr)
{
vec<vn_reference_op_s> operands;
struct vn_reference_s vr1;
vr1.operands = operands
= valueize_shared_reference_ops_from_ref (op, &valuezied_anything);
vr1.type = TREE_TYPE (op);
- vr1.set = tbaa_p ? get_alias_set (op) : 0;
+ vr1.set = get_alias_set (op);
vr1.hashcode = vn_reference_compute_hash (&vr1);
if ((cst = fully_constant_vn_reference_p (&vr1)))
return cst;
{
vn_reference_t wvnresult;
ao_ref r;
+ unsigned limit = param_sccvn_max_alias_queries_per_access;
/* Make sure to use a valueized reference if we valueized anything.
Otherwise preserve the full reference for advanced TBAA. */
if (!valuezied_anything
|| !ao_ref_init_from_vn_reference (&r, vr1.set, vr1.type,
vr1.operands))
ao_ref_init (&r, op);
- if (! tbaa_p)
- r.ref_alias_set = r.base_alias_set = 0;
- vn_walk_kind = kind;
+ vn_walk_cb_data data (&vr1, r.ref ? NULL_TREE : op,
+ last_vuse_ptr, kind, tbaa_p);
wvnresult =
- (vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse,
+ (vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse, tbaa_p,
vn_reference_lookup_2,
vn_reference_lookup_3,
- vuse_ssa_val, &vr1);
+ vuse_valueize, limit, &data);
gcc_checking_assert (vr1.operands == shared_lookup_references);
if (wvnresult)
{
return NULL_TREE;
}
+ if (last_vuse_ptr)
+ *last_vuse_ptr = vr1.vuse;
return vn_reference_lookup_1 (&vr1, vnresult);
}
but save a lookup if we deal with already inserted refs here. */
if (*slot)
{
- gcc_assert (operand_equal_p ((*slot)->result, vr1->result, 0));
+ /* We cannot assert that we have the same value either because
+ when disentangling an irreducible region we may end up visiting
+ a use before the corresponding def. That's a missed optimization
+ only though. See gcc.dg/tree-ssa/pr87126.c for example. */
+ if (dump_file && (dump_flags & TDF_DETAILS)
+ && !operand_equal_p ((*slot)->result, vr1->result, 0))
+ {
+ fprintf (dump_file, "Keeping old value ");
+ print_generic_expr (dump_file, (*slot)->result);
+ fprintf (dump_file, " because of collision\n");
+ }
free_reference (vr1);
obstack_free (&vn_tables_obstack, vr1);
return;
memcpy (&vno->op[0], ops, sizeof (tree) * length);
}
-/* Initialize VNO from OP. */
-
-static void
-init_vn_nary_op_from_op (vn_nary_op_t vno, tree op)
-{
- unsigned i;
-
- vno->opcode = TREE_CODE (op);
- vno->length = TREE_CODE_LENGTH (TREE_CODE (op));
- vno->type = TREE_TYPE (op);
- for (i = 0; i < vno->length; ++i)
- vno->op[i] = TREE_OPERAND (op, i);
-}
-
/* Return the number of operands for a vn_nary ops structure from STMT. */
static unsigned int
return vn_nary_op_lookup_1 (vno1, vnresult);
}
-/* Lookup OP in the current hash table, and return the resulting value
- number if it exists in the hash table. Return NULL_TREE if it does
- not exist in the hash table or if the result field of the operation
- is NULL. VNRESULT will contain the vn_nary_op_t from the hashtable
- if it exists. */
-
-tree
-vn_nary_op_lookup (tree op, vn_nary_op_t *vnresult)
-{
- vn_nary_op_t vno1
- = XALLOCAVAR (struct vn_nary_op_s,
- sizeof_vn_nary_op (TREE_CODE_LENGTH (TREE_CODE (op))));
- init_vn_nary_op_from_op (vno1, op);
- return vn_nary_op_lookup_1 (vno1, vnresult);
-}
-
/* Lookup the rhs of STMT in the current hash table, and return the resulting
value number if it exists in the hash table. Return NULL_TREE if
it does not exist in the hash table. VNRESULT will contain the
vno->hashcode = vn_nary_op_compute_hash (vno);
gcc_assert (! vno->predicated_values
|| (! vno->u.values->next
- && vno->u.values->valid_dominated_by_p[0] != EXIT_BLOCK
- && vno->u.values->valid_dominated_by_p[1] == EXIT_BLOCK));
+ && vno->u.values->n == 1));
}
slot = table->find_slot_with_hash (vno, vno->hashcode, INSERT);
vno1->u.values->result = result;
vno1->u.values->n = 1;
vno1->u.values->valid_dominated_by_p[0] = pred_e->dest->index;
- vno1->u.values->valid_dominated_by_p[1] = EXIT_BLOCK;
return vn_nary_op_insert_into (vno1, valid_info->nary, true);
}
return NULL_TREE;
}
-/* Insert OP into the current hash table with a value number of
- RESULT. Return the vn_nary_op_t structure we created and put in
- the hashtable. */
-
-vn_nary_op_t
-vn_nary_op_insert (tree op, tree result)
-{
- unsigned length = TREE_CODE_LENGTH (TREE_CODE (op));
- vn_nary_op_t vno1;
-
- vno1 = alloc_vn_nary_op (length, result, VN_INFO (result)->value_id);
- init_vn_nary_op_from_op (vno1, op);
- return vn_nary_op_insert_into (vno1, valid_info->nary, true);
-}
-
/* Insert the rhs of STMT into the current hash table with a value number of
RESULT. */
}
return false;
}
- else if (currval != VN_TOP
- && ! is_gimple_min_invariant (currval)
- && ! ssa_undefined_value_p (currval, false)
- && is_gimple_min_invariant (to))
+ bool curr_invariant = is_gimple_min_invariant (currval);
+ bool curr_undefined = (TREE_CODE (currval) == SSA_NAME
+ && ssa_undefined_value_p (currval, false));
+ if (currval != VN_TOP
+ && !curr_invariant
+ && !curr_undefined
+ && is_gimple_min_invariant (to))
{
if (dump_file && (dump_flags & TDF_DETAILS))
{
}
to = from;
}
+ else if (currval != VN_TOP
+ && !curr_undefined
+ && TREE_CODE (to) == SSA_NAME
+ && ssa_undefined_value_p (to, false))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Forcing VARYING instead of changing "
+ "value number of ");
+ print_generic_expr (dump_file, from);
+ fprintf (dump_file, " from ");
+ print_generic_expr (dump_file, currval);
+ fprintf (dump_file, " (non-undefined) to ");
+ print_generic_expr (dump_file, to);
+ fprintf (dump_file, " (undefined)\n");
+ }
+ to = from;
+ }
else if (TREE_CODE (to) == SSA_NAME
&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (to))
to = from;
operation. */
if (INTEGRAL_TYPE_P (type)
&& TREE_CODE (rhs1) == SSA_NAME
- /* We only handle sign-changes or zero-extension -> & mask. */
- && ((TYPE_UNSIGNED (TREE_TYPE (rhs1))
+ /* We only handle sign-changes, zero-extension -> & mask or
+ sign-extension if we know the inner operation doesn't
+ overflow. */
+ && (((TYPE_UNSIGNED (TREE_TYPE (rhs1))
+ || (INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
+ && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs1))))
&& TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (rhs1)))
|| TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (rhs1))))
{
ops[0] = vn_nary_op_lookup_pieces
(2, gimple_assign_rhs_code (def), type, ops, NULL);
/* We have wider operation available. */
- if (ops[0])
+ if (ops[0]
+ /* If the leader is a wrapping operation we can
+ insert it for code hoisting w/o introducing
+ undefined overflow. If it is not it has to
+ be available. See PR86554. */
+ && (TYPE_OVERFLOW_WRAPS (TREE_TYPE (ops[0]))
+ || (rpo_avail && vn_context_bb
+ && rpo_avail->eliminate_avail (vn_context_bb,
+ ops[0]))))
{
unsigned lhs_prec = TYPE_PRECISION (type);
unsigned rhs_prec = TYPE_PRECISION (TREE_TYPE (rhs1));
- if (lhs_prec == rhs_prec)
+ if (lhs_prec == rhs_prec
+ || (INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
+ && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs1))))
{
gimple_match_op match_op (gimple_match_cond::UNCOND,
NOP_EXPR, type, ops[0]);
vr2->hashcode = vr1.hashcode;
vr2->result = lhs;
vr2->result_vdef = vdef_val;
+ vr2->value_id = 0;
slot = valid_info->references->find_slot_with_hash (vr2, vr2->hashcode,
INSERT);
gcc_assert (!*slot);
tree result;
last_vuse = gimple_vuse (stmt);
- last_vuse_ptr = &last_vuse;
result = vn_reference_lookup (op, gimple_vuse (stmt),
- default_vn_walk_kind, NULL, true);
- last_vuse_ptr = NULL;
+ default_vn_walk_kind, NULL, true, &last_vuse);
/* We handle type-punning through unions by value-numbering based
on offset and size of the access. Be prepared to handle a
visit_phi (gimple *phi, bool *inserted, bool backedges_varying_p)
{
tree result, sameval = VN_TOP, seen_undef = NULL_TREE;
+ tree backedge_val = NULL_TREE;
+ bool seen_non_backedge = false;
tree sameval_base = NULL_TREE;
poly_int64 soff, doff;
unsigned n_executable = 0;
- bool allsame = true;
edge_iterator ei;
edge e;
tree def = PHI_ARG_DEF_FROM_EDGE (phi, e);
++n_executable;
- if (TREE_CODE (def) == SSA_NAME
- && (!backedges_varying_p || !(e->flags & EDGE_DFS_BACK)))
- def = SSA_VAL (def);
+ if (TREE_CODE (def) == SSA_NAME)
+ {
+ if (!backedges_varying_p || !(e->flags & EDGE_DFS_BACK))
+ def = SSA_VAL (def);
+ if (e->flags & EDGE_DFS_BACK)
+ backedge_val = def;
+ }
+ if (!(e->flags & EDGE_DFS_BACK))
+ seen_non_backedge = true;
if (def == VN_TOP)
;
/* Ignore undefined defs for sameval but record one. */
&& known_eq (soff, doff))
continue;
}
- allsame = false;
+ sameval = NULL_TREE;
break;
}
}
-
+ /* If the value we want to use is flowing over the backedge and we
+ should take it as VARYING but it has a non-VARYING value drop to
+ VARYING.
+ If we value-number a virtual operand never value-number to the
+ value from the backedge as that confuses the alias-walking code.
+ See gcc.dg/torture/pr87176.c. If the value is the same on a
+ non-backedge everything is OK though. */
+ bool visited_p;
+ if ((backedge_val
+ && !seen_non_backedge
+ && TREE_CODE (backedge_val) == SSA_NAME
+ && sameval == backedge_val
+ && (SSA_NAME_IS_VIRTUAL_OPERAND (backedge_val)
+ || SSA_VAL (backedge_val) != backedge_val))
+ /* Do not value-number a virtual operand to sth not visited though
+ given that allows us to escape a region in alias walking. */
+ || (sameval
+ && TREE_CODE (sameval) == SSA_NAME
+ && !SSA_NAME_IS_DEFAULT_DEF (sameval)
+ && SSA_NAME_IS_VIRTUAL_OPERAND (sameval)
+ && (SSA_VAL (sameval, &visited_p), !visited_p)))
+ /* Note this just drops to VARYING without inserting the PHI into
+ the hashes. */
+ result = PHI_RESULT (phi);
/* If none of the edges was executable keep the value-number at VN_TOP,
if only a single edge is exectuable use its value. */
- if (n_executable <= 1)
+ else if (n_executable <= 1)
result = seen_undef ? seen_undef : sameval;
/* If we saw only undefined values and VN_TOP use one of the
undefined values. */
/* If all values are the same use that, unless we've seen undefined
values as well and the value isn't constant.
CCP/copyprop have the same restriction to not remove uninit warnings. */
- else if (allsame
+ else if (sameval
&& (! seen_undef || is_gimple_min_invariant (sameval)))
result = sameval;
else
set_value_id_for_result (vr->result, &vr->value_id);
}
-
-/* Allocate and initialize CONST_PARAMS, a bitmap of parameter default defs
- we know point to readonly memory. */
-
-static void
-init_const_parms ()
-{
- /* Collect pointers we know point to readonly memory. */
- const_parms = BITMAP_ALLOC (NULL);
- tree fnspec = lookup_attribute ("fn spec",
- TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)));
- if (fnspec)
- {
- fnspec = TREE_VALUE (TREE_VALUE (fnspec));
- unsigned i = 1;
- for (tree arg = DECL_ARGUMENTS (cfun->decl);
- arg; arg = DECL_CHAIN (arg), ++i)
- {
- if (i >= (unsigned) TREE_STRING_LENGTH (fnspec))
- break;
- if (TREE_STRING_POINTER (fnspec)[i] == 'R'
- || TREE_STRING_POINTER (fnspec)[i] == 'r')
- {
- tree name = ssa_default_def (cfun, arg);
- if (name)
- bitmap_set_bit (const_parms, SSA_NAME_VERSION (name));
- }
- }
- }
-}
-
/* Return the maximum value id we have ever seen. */
unsigned int
honor_nans = flag_trapping_math && !flag_finite_math_only;
honor_snans = flag_signaling_nans != 0;
}
- else if (INTEGRAL_TYPE_P (type)
- && TYPE_OVERFLOW_TRAPS (type))
+ else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
honor_trapv = true;
}
if (nary->length >= 2)
rhs2 = nary->op[1];
ret = operation_could_trap_helper_p (nary->opcode, fp_operation,
- honor_trapv,
- honor_nans, honor_snans, rhs2,
- &handled);
- if (handled
- && ret)
+ honor_trapv, honor_nans, honor_snans,
+ rhs2, &handled);
+ if (handled && ret)
return true;
for (i = 0; i < nary->length; ++i)
return false;
}
+/* Return true if the reference operation REF may trap. */
-class eliminate_dom_walker : public dom_walker
+bool
+vn_reference_may_trap (vn_reference_t ref)
{
-public:
- eliminate_dom_walker (cdi_direction, bitmap);
- ~eliminate_dom_walker ();
-
- virtual edge before_dom_children (basic_block);
- virtual void after_dom_children (basic_block);
-
- virtual tree eliminate_avail (basic_block, tree op);
- virtual void eliminate_push_avail (basic_block, tree op);
- tree eliminate_insert (basic_block, gimple_stmt_iterator *gsi, tree val);
-
- void eliminate_stmt (basic_block, gimple_stmt_iterator *);
-
- unsigned eliminate_cleanup (bool region_p = false);
-
- bool do_pre;
- unsigned int el_todo;
- unsigned int eliminations;
- unsigned int insertions;
-
- /* SSA names that had their defs inserted by PRE if do_pre. */
- bitmap inserted_exprs;
-
- /* Blocks with statements that have had their EH properties changed. */
- bitmap need_eh_cleanup;
-
- /* Blocks with statements that have had their AB properties changed. */
- bitmap need_ab_cleanup;
-
- /* Local state for the eliminate domwalk. */
- auto_vec<gimple *> to_remove;
- auto_vec<gimple *> to_fixup;
- auto_vec<tree> avail;
- auto_vec<tree> avail_stack;
-};
+ switch (ref->operands[0].opcode)
+ {
+ case MODIFY_EXPR:
+ case CALL_EXPR:
+ /* We do not handle calls. */
+ case ADDR_EXPR:
+ /* And toplevel address computations never trap. */
+ return false;
+ default:;
+ }
+
+ vn_reference_op_t op;
+ unsigned i;
+ FOR_EACH_VEC_ELT (ref->operands, i, op)
+ {
+ switch (op->opcode)
+ {
+ case WITH_SIZE_EXPR:
+ case TARGET_MEM_REF:
+ /* Always variable. */
+ return true;
+ case COMPONENT_REF:
+ if (op->op1 && TREE_CODE (op->op1) == SSA_NAME)
+ return true;
+ break;
+ case ARRAY_RANGE_REF:
+ case ARRAY_REF:
+ if (TREE_CODE (op->op0) == SSA_NAME)
+ return true;
+ break;
+ case MEM_REF:
+ /* Nothing interesting in itself, the base is separate. */
+ break;
+ /* The following are the address bases. */
+ case SSA_NAME:
+ return true;
+ case ADDR_EXPR:
+ if (op->op0)
+ return tree_could_trap_p (TREE_OPERAND (op->op0, 0));
+ return false;
+ default:;
+ }
+ }
+ return false;
+}
eliminate_dom_walker::eliminate_dom_walker (cdi_direction direction,
bitmap inserted_exprs_)
/* If this is an assignment from our leader (which
happens in the case the value-number is a constant)
- then there is nothing to do. */
- if (gimple_assign_single_p (stmt)
+ then there is nothing to do. Likewise if we run into
+ inserted code that needed a conversion because of
+ our type-agnostic value-numbering of loads. */
+ if ((gimple_assign_single_p (stmt)
+ || (is_gimple_assign (stmt)
+ && (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
+ || gimple_assign_rhs_code (stmt) == VIEW_CONVERT_EXPR)))
&& sprime == gimple_assign_rhs1 (stmt))
return;
/* Else replace its RHS. */
- bool can_make_abnormal_goto
- = is_gimple_call (stmt)
- && stmt_can_make_abnormal_goto (stmt);
-
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Replaced ");
fprintf (dump_file, " in ");
print_gimple_stmt (dump_file, stmt, 0);
}
-
eliminations++;
+
+ bool can_make_abnormal_goto = (is_gimple_call (stmt)
+ && stmt_can_make_abnormal_goto (stmt));
gimple *orig_stmt = stmt;
if (!useless_type_conversion_p (TREE_TYPE (lhs),
TREE_TYPE (sprime)))
- sprime = fold_convert (TREE_TYPE (lhs), sprime);
+ {
+ /* We preserve conversions to but not from function or method
+ types. This asymmetry makes it necessary to re-instantiate
+ conversions here. */
+ if (POINTER_TYPE_P (TREE_TYPE (lhs))
+ && FUNC_OR_METHOD_TYPE_P (TREE_TYPE (TREE_TYPE (lhs))))
+ sprime = fold_convert (TREE_TYPE (lhs), sprime);
+ else
+ gcc_unreachable ();
+ }
tree vdef = gimple_vdef (stmt);
tree vuse = gimple_vuse (stmt);
propagate_tree_value_into_stmt (gsi, sprime);
stmt = gsi_stmt (*gsi);
update_stmt (stmt);
+ /* In case the VDEF on the original stmt was released, value-number
+ it to the VUSE. This is to make vuse_ssa_val able to skip
+ released virtual operands. */
if (vdef != gimple_vdef (stmt))
- VN_INFO (vdef)->valnum = vuse;
+ {
+ gcc_assert (SSA_NAME_IN_FREE_LIST (vdef));
+ VN_INFO (vdef)->valnum = vuse;
+ }
/* If we removed EH side-effects from the statement, clean
its EH information. */
&& (TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME
|| is_gimple_min_invariant (gimple_assign_rhs1 (stmt))))
{
- tree val;
tree rhs = gimple_assign_rhs1 (stmt);
vn_reference_t vnresult;
- val = vn_reference_lookup (lhs, gimple_vuse (stmt), VN_WALKREWRITE,
- &vnresult, false);
+ /* ??? gcc.dg/torture/pr91445.c shows that we lookup a boolean
+ typed load of a byte known to be 0x11 as 1 so a store of
+ a boolean 1 is detected as redundant. Because of this we
+ have to make sure to lookup with a ref where its size
+ matches the precision. */
+ tree lookup_lhs = lhs;
+ if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
+ && (TREE_CODE (lhs) != COMPONENT_REF
+ || !DECL_BIT_FIELD_TYPE (TREE_OPERAND (lhs, 1)))
+ && !type_has_mode_precision_p (TREE_TYPE (lhs)))
+ {
+ if (TREE_CODE (lhs) == COMPONENT_REF
+ || TREE_CODE (lhs) == MEM_REF)
+ {
+ tree ltype = build_nonstandard_integer_type
+ (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (lhs))),
+ TYPE_UNSIGNED (TREE_TYPE (lhs)));
+ if (TREE_CODE (lhs) == COMPONENT_REF)
+ {
+ tree foff = component_ref_field_offset (lhs);
+ tree f = TREE_OPERAND (lhs, 1);
+ if (!poly_int_tree_p (foff))
+ lookup_lhs = NULL_TREE;
+ else
+ lookup_lhs = build3 (BIT_FIELD_REF, ltype,
+ TREE_OPERAND (lhs, 0),
+ TYPE_SIZE (TREE_TYPE (lhs)),
+ bit_from_pos
+ (foff, DECL_FIELD_BIT_OFFSET (f)));
+ }
+ else
+ lookup_lhs = build2 (MEM_REF, ltype,
+ TREE_OPERAND (lhs, 0),
+ TREE_OPERAND (lhs, 1));
+ }
+ else
+ lookup_lhs = NULL_TREE;
+ }
+ tree val = NULL_TREE;
+ if (lookup_lhs)
+ val = vn_reference_lookup (lookup_lhs, gimple_vuse (stmt),
+ VN_WALKREWRITE, &vnresult, false);
if (TREE_CODE (rhs) == SSA_NAME)
rhs = VN_INFO (rhs)->valnum;
if (val
- && operand_equal_p (val, rhs, 0))
+ && (operand_equal_p (val, rhs, 0)
+ /* Due to the bitfield lookups above we can get bit
+ interpretations of the same RHS as values here. Those
+ are redundant as well. */
+ || (TREE_CODE (val) == SSA_NAME
+ && gimple_assign_single_p (SSA_NAME_DEF_STMT (val))
+ && (val = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val)))
+ && TREE_CODE (val) == VIEW_CONVERT_EXPR
+ && TREE_OPERAND (val, 0) == rhs)))
{
/* We can only remove the later store if the former aliases
at least all accesses the later one does or if the store
ipa_polymorphic_call_context context (current_function_decl,
fn, stmt, &instance);
context.get_dynamic_type (instance, OBJ_TYPE_REF_OBJECT (fn),
- otr_type, stmt);
+ otr_type, stmt, NULL);
bool final;
vec <cgraph_node *> targets
= possible_polymorphic_call_targets (obj_type_ref_class (fn),
fprintf (dump_file, " Removed AB side-effects.\n");
}
update_stmt (stmt);
- if (vdef != gimple_vdef (stmt))
+ /* In case the VDEF on the original stmt was released, value-number
+ it to the VUSE. This is to make vuse_ssa_val able to skip
+ released virtual operands. */
+ if (vdef && SSA_NAME_IN_FREE_LIST (vdef))
VN_INFO (vdef)->valnum = vuse;
}
do_release_defs = false;
}
}
- else
- {
- tree lhs = gimple_get_lhs (stmt);
- if (TREE_CODE (lhs) == SSA_NAME
- && !has_zero_uses (lhs))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Keeping eliminated stmt live "
- "as copy because of out-of-region uses\n");
- tree sprime = eliminate_avail (gimple_bb (stmt), lhs);
- gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
- if (is_gimple_assign (stmt))
- {
- gimple_assign_set_rhs_from_tree (&gsi, sprime);
- update_stmt (gsi_stmt (gsi));
- continue;
- }
- else
- {
- gimple *copy = gimple_build_assign (lhs, sprime);
- gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
- do_release_defs = false;
- }
- }
- }
+ else if (tree lhs = gimple_get_lhs (stmt))
+ if (TREE_CODE (lhs) == SSA_NAME
+ && !has_zero_uses (lhs))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Keeping eliminated stmt live "
+ "as copy because of out-of-region uses\n");
+ tree sprime = eliminate_avail (gimple_bb (stmt), lhs);
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+ if (is_gimple_assign (stmt))
+ {
+ gimple_assign_set_rhs_from_tree (&gsi, sprime);
+ stmt = gsi_stmt (gsi);
+ update_stmt (stmt);
+ if (maybe_clean_or_replace_eh_stmt (stmt, stmt))
+ bitmap_set_bit (need_eh_cleanup, gimple_bb (stmt)->index);
+ continue;
+ }
+ else
+ {
+ gimple *copy = gimple_build_assign (lhs, sprime);
+ gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
+ do_release_defs = false;
+ }
+ }
}
if (dump_file && (dump_flags & TDF_DETAILS))
obstack_free (&vn_tables_obstack, NULL);
obstack_free (&vn_tables_insert_obstack, NULL);
- BITMAP_FREE (const_parms);
-
vn_ssa_aux_iterator_type it;
vn_ssa_aux_t info;
FOR_EACH_HASH_TABLE_ELEMENT (*vn_ssa_aux_hash, info, vn_ssa_aux_t, it)
BITMAP_FREE (constant_value_ids);
}
-/* Adaptor to the elimination engine using RPO availability. */
-
-class rpo_elim : public eliminate_dom_walker
-{
-public:
- rpo_elim(basic_block entry_)
- : eliminate_dom_walker (CDI_DOMINATORS, NULL), entry (entry_) {}
- ~rpo_elim();
-
- virtual tree eliminate_avail (basic_block, tree op);
-
- virtual void eliminate_push_avail (basic_block, tree);
-
- basic_block entry;
- /* Instead of having a local availability lattice for each
- basic-block and availability at X defined as union of
- the local availabilities at X and its dominators we're
- turning this upside down and track availability per
- value given values are usually made available at very
- few points (at least one).
- So we have a value -> vec<location, leader> map where
- LOCATION is specifying the basic-block LEADER is made
- available for VALUE. We push to this vector in RPO
- order thus for iteration we can simply pop the last
- entries.
- LOCATION is the basic-block index and LEADER is its
- SSA name version. */
- /* ??? We'd like to use auto_vec here with embedded storage
- but that doesn't play well until we can provide move
- constructors and use std::move on hash-table expansion.
- So for now this is a bit more expensive than necessary.
- We eventually want to switch to a chaining scheme like
- for hashtable entries for unwinding which would make
- making the vector part of the vn_ssa_aux structure possible. */
- typedef hash_map<tree, vec<std::pair<int, int> > > rpo_avail_t;
- rpo_avail_t m_rpo_avail;
-};
-
-/* Global RPO state for access from hooks. */
-static rpo_elim *rpo_avail;
-
/* Hook for maybe_push_res_to_seq, lookup the expression in the VN tables. */
static tree
res_op->type, ops, &vnresult);
/* If this is used from expression simplification make sure to
return an available expression. */
- if (res && mprts_hook && rpo_avail)
+ if (res && TREE_CODE (res) == SSA_NAME && mprts_hook && rpo_avail)
res = rpo_avail->eliminate_avail (vn_context_bb, res);
return res;
}
-rpo_elim::~rpo_elim ()
-{
- /* Release the avail vectors. */
- for (rpo_avail_t::iterator i = m_rpo_avail.begin ();
- i != m_rpo_avail.end (); ++i)
- (*i).second.release ();
-}
-
/* Return a leader for OPs value that is valid at BB. */
tree
rpo_elim::eliminate_avail (basic_block bb, tree op)
{
- tree valnum = SSA_VAL (op);
+ bool visited;
+ tree valnum = SSA_VAL (op, &visited);
+ /* If we didn't visit OP then it must be defined outside of the
+ region we process and also dominate it. So it is available. */
+ if (!visited)
+ return op;
if (TREE_CODE (valnum) == SSA_NAME)
{
if (SSA_NAME_IS_DEFAULT_DEF (valnum))
return valnum;
- vec<std::pair<int, int> > *av = m_rpo_avail.get (valnum);
- if (!av || av->is_empty ())
+ vn_avail *av = VN_INFO (valnum)->avail;
+ if (!av)
return NULL_TREE;
- int i = av->length () - 1;
- if ((*av)[i].first == bb->index)
+ if (av->location == bb->index)
/* On tramp3d 90% of the cases are here. */
- return ssa_name ((*av)[i].second);
+ return ssa_name (av->leader);
do
{
- basic_block abb = BASIC_BLOCK_FOR_FN (cfun, (*av)[i].first);
+ basic_block abb = BASIC_BLOCK_FOR_FN (cfun, av->location);
/* ??? During elimination we have to use availability at the
definition site of a use we try to replace. This
is required to not run into inconsistencies because
executable. */
if (dominated_by_p_w_unex (bb, abb))
{
- tree leader = ssa_name ((*av)[i].second);
+ tree leader = ssa_name (av->leader);
/* Prevent eliminations that break loop-closed SSA. */
if (loops_state_satisfies_p (LOOP_CLOSED_SSA)
&& ! SSA_NAME_IS_DEFAULT_DEF (leader)
/* ??? Can we somehow skip to the immediate dominator
RPO index (bb_to_rpo)? Again, maybe not worth, on
tramp3d the worst number of elements in the vector is 9. */
+ av = av->next;
}
- while (--i >= 0);
+ while (av);
}
else if (valnum != VN_TOP)
/* valnum is is_gimple_min_invariant. */
rpo_elim::eliminate_push_avail (basic_block bb, tree leader)
{
tree valnum = VN_INFO (leader)->valnum;
- if (valnum == VN_TOP)
+ if (valnum == VN_TOP
+ || is_gimple_min_invariant (valnum))
return;
if (dump_file && (dump_flags & TDF_DETAILS))
{
print_generic_expr (dump_file, valnum);
fprintf (dump_file, "\n");
}
- bool existed;
- vec<std::pair<int, int> > &av = m_rpo_avail.get_or_insert (valnum, &existed);
- if (!existed)
+ vn_ssa_aux_t value = VN_INFO (valnum);
+ vn_avail *av;
+ if (m_avail_freelist)
{
- new (&av) vec<std::pair<int, int> >;
- av.reserve_exact (2);
+ av = m_avail_freelist;
+ m_avail_freelist = m_avail_freelist->next;
}
- av.safe_push (std::make_pair (bb->index, SSA_NAME_VERSION (leader)));
+ else
+ av = XOBNEW (&vn_ssa_aux_obstack, vn_avail);
+ av->location = bb->index;
+ av->leader = SSA_NAME_VERSION (leader);
+ av->next = value->avail;
+ value->avail = av;
}
/* Valueization hook for RPO VN plus required state. */
if (TREE_CODE (name) == SSA_NAME)
{
vn_ssa_aux_t val = VN_INFO (name);
- /* For region-based VN this makes walk_non_aliased_vuses stop walking
- when we are about to look at a def outside of the region. */
- if (SSA_NAME_IS_VIRTUAL_OPERAND (name)
- && !val->visited)
- return NULL_TREE;
if (val)
{
tree tem = val->valnum;
static unsigned
process_bb (rpo_elim &avail, basic_block bb,
bool bb_visited, bool iterate_phis, bool iterate, bool eliminate,
- bool do_region, bitmap exit_bbs)
+ bool do_region, bitmap exit_bbs, bool skip_phis)
{
unsigned todo = 0;
edge_iterator ei;
/* If we are in loop-closed SSA preserve this state. This is
relevant when called on regions from outside of FRE/PRE. */
bool lc_phi_nodes = false;
- if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
+ if (!skip_phis
+ && loops_state_satisfies_p (LOOP_CLOSED_SSA))
FOR_EACH_EDGE (e, ei, bb->preds)
if (e->src->loop_father != e->dest->loop_father
&& flow_loop_nested_p (e->dest->loop_father,
break;
}
- /* Value-number all defs in the basic-block. */
- for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
- gsi_next (&gsi))
+ /* When we visit a loop header substitute into loop info. */
+ if (!iterate && eliminate && bb->loop_father->header == bb)
{
- gphi *phi = gsi.phi ();
- tree res = PHI_RESULT (phi);
- vn_ssa_aux_t res_info = VN_INFO (res);
- if (!bb_visited)
- {
- gcc_assert (!res_info->visited);
- res_info->valnum = VN_TOP;
- res_info->visited = true;
- }
+ /* Keep fields in sync with substitute_in_loop_info. */
+ if (bb->loop_father->nb_iterations)
+ bb->loop_father->nb_iterations
+ = simplify_replace_tree (bb->loop_father->nb_iterations,
+ NULL_TREE, NULL_TREE, &vn_valueize_wrapper);
+ }
- /* When not iterating force backedge values to varying. */
- visit_stmt (phi, !iterate_phis);
- if (virtual_operand_p (res))
- continue;
+ /* Value-number all defs in the basic-block. */
+ if (!skip_phis)
+ for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ {
+ gphi *phi = gsi.phi ();
+ tree res = PHI_RESULT (phi);
+ vn_ssa_aux_t res_info = VN_INFO (res);
+ if (!bb_visited)
+ {
+ gcc_assert (!res_info->visited);
+ res_info->valnum = VN_TOP;
+ res_info->visited = true;
+ }
- /* Eliminate */
- /* The interesting case is gcc.dg/tree-ssa/pr22230.c for correctness
- how we handle backedges and availability.
- And gcc.dg/tree-ssa/ssa-sccvn-2.c for optimization. */
- tree val = res_info->valnum;
- if (res != val && !iterate && eliminate)
- {
- if (tree leader = avail.eliminate_avail (bb, res))
- {
- if (leader != res
- /* Preserve loop-closed SSA form. */
- && (! lc_phi_nodes
- || is_gimple_min_invariant (leader)))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "Replaced redundant PHI node "
- "defining ");
- print_generic_expr (dump_file, res);
- fprintf (dump_file, " with ");
- print_generic_expr (dump_file, leader);
- fprintf (dump_file, "\n");
- }
- avail.eliminations++;
+ /* When not iterating force backedge values to varying. */
+ visit_stmt (phi, !iterate_phis);
+ if (virtual_operand_p (res))
+ continue;
- if (may_propagate_copy (res, leader))
- {
- /* Schedule for removal. */
- avail.to_remove.safe_push (phi);
- continue;
- }
- /* ??? Else generate a copy stmt. */
- }
- }
- }
- /* Only make defs available that not already are. But make
- sure loop-closed SSA PHI node defs are picked up for
- downstream uses. */
- if (lc_phi_nodes
- || res == val
- || ! avail.eliminate_avail (bb, res))
- avail.eliminate_push_avail (bb, res);
- }
+ /* Eliminate */
+ /* The interesting case is gcc.dg/tree-ssa/pr22230.c for correctness
+ how we handle backedges and availability.
+ And gcc.dg/tree-ssa/ssa-sccvn-2.c for optimization. */
+ tree val = res_info->valnum;
+ if (res != val && !iterate && eliminate)
+ {
+ if (tree leader = avail.eliminate_avail (bb, res))
+ {
+ if (leader != res
+ /* Preserve loop-closed SSA form. */
+ && (! lc_phi_nodes
+ || is_gimple_min_invariant (leader)))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Replaced redundant PHI node "
+ "defining ");
+ print_generic_expr (dump_file, res);
+ fprintf (dump_file, " with ");
+ print_generic_expr (dump_file, leader);
+ fprintf (dump_file, "\n");
+ }
+ avail.eliminations++;
+
+ if (may_propagate_copy (res, leader))
+ {
+ /* Schedule for removal. */
+ avail.to_remove.safe_push (phi);
+ continue;
+ }
+ /* ??? Else generate a copy stmt. */
+ }
+ }
+ }
+ /* Only make defs available that not already are. But make
+ sure loop-closed SSA PHI node defs are picked up for
+ downstream uses. */
+ if (lc_phi_nodes
+ || res == val
+ || ! avail.eliminate_avail (bb, res))
+ avail.eliminate_push_avail (bb, res);
+ }
/* For empty BBs mark outgoing edges executable. For non-empty BBs
we do this when processing the last stmt as we have to do this
{
FOR_EACH_EDGE (e, ei, bb->succs)
{
- if (e->flags & EDGE_EXECUTABLE)
- continue;
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file,
- "marking outgoing edge %d -> %d executable\n",
- e->src->index, e->dest->index);
- gcc_checking_assert (iterate || !(e->flags & EDGE_DFS_BACK));
- e->flags |= EDGE_EXECUTABLE;
- e->dest->flags |= BB_EXECUTABLE;
+ if (!(e->flags & EDGE_EXECUTABLE))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "marking outgoing edge %d -> %d executable\n",
+ e->src->index, e->dest->index);
+ e->flags |= EDGE_EXECUTABLE;
+ e->dest->flags |= BB_EXECUTABLE;
+ }
+ else if (!(e->dest->flags & BB_EXECUTABLE))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "marking destination block %d reachable\n",
+ e->dest->index);
+ e->dest->flags |= BB_EXECUTABLE;
+ }
}
}
for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
"marking known outgoing %sedge %d -> %d executable\n",
e->flags & EDGE_DFS_BACK ? "back-" : "",
e->src->index, e->dest->index);
- gcc_checking_assert (iterate || !(e->flags & EDGE_DFS_BACK));
e->flags |= EDGE_EXECUTABLE;
e->dest->flags |= BB_EXECUTABLE;
}
+ else if (!(e->dest->flags & BB_EXECUTABLE))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "marking destination block %d reachable\n",
+ e->dest->index);
+ e->dest->flags |= BB_EXECUTABLE;
+ }
}
else if (gsi_one_before_end_p (gsi))
{
FOR_EACH_EDGE (e, ei, bb->succs)
{
- if (e->flags & EDGE_EXECUTABLE)
- continue;
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file,
- "marking outgoing edge %d -> %d executable\n",
- e->src->index, e->dest->index);
- gcc_checking_assert (iterate || !(e->flags & EDGE_DFS_BACK));
- e->flags |= EDGE_EXECUTABLE;
- e->dest->flags |= BB_EXECUTABLE;
+ if (!(e->flags & EDGE_EXECUTABLE))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "marking outgoing edge %d -> %d executable\n",
+ e->src->index, e->dest->index);
+ e->flags |= EDGE_EXECUTABLE;
+ e->dest->flags |= BB_EXECUTABLE;
+ }
+ else if (!(e->dest->flags & BB_EXECUTABLE))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "marking destination block %d reachable\n",
+ e->dest->index);
+ e->dest->flags |= BB_EXECUTABLE;
+ }
}
}
/* Whether to handle this as iteration point or whether to treat
incoming backedge PHI values as varying. */
bool iterate;
+ /* Maximum RPO index this block is reachable from. */
+ int max_rpo;
+ /* Unwind state. */
void *ob_top;
vn_reference_t ref_top;
vn_phi_t phi_top;
/* Prune [rpo_idx, ] from avail. */
/* ??? This is O(number-of-values-in-region) which is
O(region-size) rather than O(iteration-piece). */
- for (rpo_elim::rpo_avail_t::iterator i
- = avail.m_rpo_avail.begin ();
- i != avail.m_rpo_avail.end (); ++i)
+ for (hash_table<vn_ssa_aux_hasher>::iterator i = vn_ssa_aux_hash->begin ();
+ i != vn_ssa_aux_hash->end (); ++i)
{
- while (! (*i).second.is_empty ())
+ while ((*i)->avail)
{
- if (bb_to_rpo[(*i).second.last ().first] < rpo_idx)
+ if (bb_to_rpo[(*i)->avail->location] < rpo_idx)
break;
- (*i).second.pop ();
+ vn_avail *av = (*i)->avail;
+ (*i)->avail = (*i)->avail->next;
+ av->next = avail.m_avail_freelist;
+ avail.m_avail_freelist = av;
}
}
}
bitmap_set_bit (exit_bbs, EXIT_BLOCK);
}
+ /* Clear EDGE_DFS_BACK on "all" entry edges, RPO order compute will
+ re-mark those that are contained in the region. */
+ edge_iterator ei;
+ edge e;
+ FOR_EACH_EDGE (e, ei, entry->dest->preds)
+ e->flags &= ~EDGE_DFS_BACK;
+
int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (fn) - NUM_FIXED_BLOCKS);
- int n = rev_post_order_and_mark_dfs_back_seme (fn, entry, exit_bbs,
- iterate, rpo);
+ int n = rev_post_order_and_mark_dfs_back_seme
+ (fn, entry, exit_bbs, !loops_state_satisfies_p (LOOPS_NEED_FIXUP), rpo);
/* rev_post_order_and_mark_dfs_back_seme fills RPO in reverse order. */
for (int i = 0; i < n / 2; ++i)
std::swap (rpo[i], rpo[n-i-1]);
if (!do_region)
BITMAP_FREE (exit_bbs);
+ /* If there are any non-DFS_BACK edges into entry->dest skip
+ processing PHI nodes for that block. This supports
+ value-numbering loop bodies w/o the actual loop. */
+ FOR_EACH_EDGE (e, ei, entry->dest->preds)
+ if (e != entry
+ && !(e->flags & EDGE_DFS_BACK))
+ break;
+ bool skip_entry_phis = e != NULL;
+ if (skip_entry_phis && dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Region does not contain all edges into "
+ "the entry block, skipping its PHIs.\n");
+
int *bb_to_rpo = XNEWVEC (int, last_basic_block_for_fn (fn));
for (int i = 0; i < n; ++i)
bb_to_rpo[rpo[i]] = i;
edge e;
edge_iterator ei;
FOR_EACH_EDGE (e, ei, bb->preds)
- gcc_assert (e == entry || (e->src->flags & bb_in_region));
+ gcc_assert (e == entry
+ || (skip_entry_phis && bb == entry->dest)
+ || (e->src->flags & bb_in_region));
}
for (int i = 0; i < n; ++i)
{
unsigned region_size = (((unsigned HOST_WIDE_INT)n * num_ssa_names)
/ (n_basic_blocks_for_fn (fn) - NUM_FIXED_BLOCKS));
VN_TOP = create_tmp_var_raw (void_type_node, "vn_top");
- init_const_parms ();
+ next_value_id = 1;
vn_ssa_aux_hash = new hash_table <vn_ssa_aux_hasher> (region_size * 2);
gcc_obstack_init (&vn_ssa_aux_obstack);
vn_valueize = rpo_vn_valueize;
/* Initialize the unwind state and edge/BB executable state. */
+ bool need_max_rpo_iterate = false;
for (int i = 0; i < n; ++i)
{
basic_block bb = BASIC_BLOCK_FOR_FN (fn, rpo[i]);
rpo_state[i].visited = 0;
+ rpo_state[i].max_rpo = i;
+ bb->flags &= ~BB_EXECUTABLE;
bool has_backedges = false;
edge e;
edge_iterator ei;
{
if (e->flags & EDGE_DFS_BACK)
has_backedges = true;
- if (! iterate && (e->flags & EDGE_DFS_BACK))
- e->flags |= EDGE_EXECUTABLE;
+ e->flags &= ~EDGE_EXECUTABLE;
+ if (iterate || e == entry || (skip_entry_phis && bb == entry->dest))
+ continue;
+ if (bb_to_rpo[e->src->index] > i)
+ {
+ rpo_state[i].max_rpo = MAX (rpo_state[i].max_rpo,
+ bb_to_rpo[e->src->index]);
+ need_max_rpo_iterate = true;
+ }
else
- e->flags &= ~EDGE_EXECUTABLE;
+ rpo_state[i].max_rpo
+ = MAX (rpo_state[i].max_rpo,
+ rpo_state[bb_to_rpo[e->src->index]].max_rpo);
}
rpo_state[i].iterate = iterate && has_backedges;
- bb->flags &= ~BB_EXECUTABLE;
}
entry->flags |= EDGE_EXECUTABLE;
entry->dest->flags |= BB_EXECUTABLE;
+ /* When there are irreducible regions the simplistic max_rpo computation
+ above for the case of backedges doesn't work and we need to iterate
+ until there are no more changes. */
+ unsigned nit = 0;
+ while (need_max_rpo_iterate)
+ {
+ nit++;
+ need_max_rpo_iterate = false;
+ for (int i = 0; i < n; ++i)
+ {
+ basic_block bb = BASIC_BLOCK_FOR_FN (fn, rpo[i]);
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ {
+ if (e == entry || (skip_entry_phis && bb == entry->dest))
+ continue;
+ int max_rpo = MAX (rpo_state[i].max_rpo,
+ rpo_state[bb_to_rpo[e->src->index]].max_rpo);
+ if (rpo_state[i].max_rpo != max_rpo)
+ {
+ rpo_state[i].max_rpo = max_rpo;
+ need_max_rpo_iterate = true;
+ }
+ }
+ }
+ }
+ statistics_histogram_event (cfun, "RPO max_rpo iterations", nit);
+
/* As heuristic to improve compile-time we handle only the N innermost
loops and the outermost one optimistically. */
if (iterate)
{
loop_p loop;
- unsigned max_depth = PARAM_VALUE (PARAM_RPO_VN_MAX_LOOP_DEPTH);
+ unsigned max_depth = param_rpo_vn_max_loop_depth;
FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
if (loop_depth (loop) > max_depth)
for (unsigned i = 2;
i < loop_depth (loop) - max_depth; ++i)
{
basic_block header = superloop_at_depth (loop, i)->header;
- rpo_state[bb_to_rpo[header->index]].iterate = false;
+ bool non_latch_backedge = false;
edge e;
edge_iterator ei;
FOR_EACH_EDGE (e, ei, header->preds)
if (e->flags & EDGE_DFS_BACK)
- e->flags |= EDGE_EXECUTABLE;
+ {
+ /* There can be a non-latch backedge into the header
+ which is part of an outer irreducible region. We
+ cannot avoid iterating this block then. */
+ if (!dominated_by_p (CDI_DOMINATORS,
+ e->src, e->dest))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "non-latch backedge %d -> %d "
+ "forces iteration of loop %d\n",
+ e->src->index, e->dest->index, loop->num);
+ non_latch_backedge = true;
+ }
+ else
+ e->flags |= EDGE_EXECUTABLE;
+ }
+ rpo_state[bb_to_rpo[header->index]].iterate = non_latch_backedge;
}
}
- /* Go and process all blocks, iterating as necessary. */
- int idx = 0;
uint64_t nblk = 0;
- do
- {
- basic_block bb = BASIC_BLOCK_FOR_FN (fn, rpo[idx]);
+ int idx = 0;
+ if (iterate)
+ /* Go and process all blocks, iterating as necessary. */
+ do
+ {
+ basic_block bb = BASIC_BLOCK_FOR_FN (fn, rpo[idx]);
+
+ /* If the block has incoming backedges remember unwind state. This
+ is required even for non-executable blocks since in irreducible
+ regions we might reach them via the backedge and re-start iterating
+ from there.
+ Note we can individually mark blocks with incoming backedges to
+ not iterate where we then handle PHIs conservatively. We do that
+ heuristically to reduce compile-time for degenerate cases. */
+ if (rpo_state[idx].iterate)
+ {
+ rpo_state[idx].ob_top = obstack_alloc (&vn_tables_obstack, 0);
+ rpo_state[idx].ref_top = last_inserted_ref;
+ rpo_state[idx].phi_top = last_inserted_phi;
+ rpo_state[idx].nary_top = last_inserted_nary;
+ }
- /* If the block has incoming backedges remember unwind state. This
- is required even for non-executable blocks since in irreducible
- regions we might reach them via the backedge and re-start iterating
- from there.
- Note we can individually mark blocks with incoming backedges to
- not iterate where we then handle PHIs conservatively. We do that
- heuristically to reduce compile-time for degenerate cases. */
- if (rpo_state[idx].iterate)
- {
- rpo_state[idx].ob_top = obstack_alloc (&vn_tables_obstack, 0);
- rpo_state[idx].ref_top = last_inserted_ref;
- rpo_state[idx].phi_top = last_inserted_phi;
- rpo_state[idx].nary_top = last_inserted_nary;
- }
+ if (!(bb->flags & BB_EXECUTABLE))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Block %d: BB%d found not executable\n",
+ idx, bb->index);
+ idx++;
+ continue;
+ }
- if (!(bb->flags & BB_EXECUTABLE))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Block %d: BB%d found not executable\n",
- idx, bb->index);
- idx++;
- continue;
- }
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Processing block %d: BB%d\n", idx, bb->index);
+ nblk++;
+ todo |= process_bb (avail, bb,
+ rpo_state[idx].visited != 0,
+ rpo_state[idx].iterate,
+ iterate, eliminate, do_region, exit_bbs, false);
+ rpo_state[idx].visited++;
+
+ /* Verify if changed values flow over executable outgoing backedges
+ and those change destination PHI values (that's the thing we
+ can easily verify). Reduce over all such edges to the farthest
+ away PHI. */
+ int iterate_to = -1;
+ edge_iterator ei;
+ edge e;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if ((e->flags & (EDGE_DFS_BACK|EDGE_EXECUTABLE))
+ == (EDGE_DFS_BACK|EDGE_EXECUTABLE)
+ && rpo_state[bb_to_rpo[e->dest->index]].iterate)
+ {
+ int destidx = bb_to_rpo[e->dest->index];
+ if (!rpo_state[destidx].visited)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Unvisited destination %d\n",
+ e->dest->index);
+ if (iterate_to == -1 || destidx < iterate_to)
+ iterate_to = destidx;
+ continue;
+ }
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Looking for changed values of backedge"
+ " %d->%d destination PHIs\n",
+ e->src->index, e->dest->index);
+ vn_context_bb = e->dest;
+ gphi_iterator gsi;
+ for (gsi = gsi_start_phis (e->dest);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ bool inserted = false;
+ /* While we'd ideally just iterate on value changes
+ we CSE PHIs and do that even across basic-block
+ boundaries. So even hashtable state changes can
+ be important (which is roughly equivalent to
+ PHI argument value changes). To not excessively
+ iterate because of that we track whether a PHI
+ was CSEd to with GF_PLF_1. */
+ bool phival_changed;
+ if ((phival_changed = visit_phi (gsi.phi (),
+ &inserted, false))
+ || (inserted && gimple_plf (gsi.phi (), GF_PLF_1)))
+ {
+ if (!phival_changed
+ && dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "PHI was CSEd and hashtable "
+ "state (changed)\n");
+ if (iterate_to == -1 || destidx < iterate_to)
+ iterate_to = destidx;
+ break;
+ }
+ }
+ vn_context_bb = NULL;
+ }
+ if (iterate_to != -1)
+ {
+ do_unwind (&rpo_state[iterate_to], iterate_to, avail, bb_to_rpo);
+ idx = iterate_to;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Iterating to %d BB%d\n",
+ iterate_to, rpo[iterate_to]);
+ continue;
+ }
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Processing block %d: BB%d\n", idx, bb->index);
- nblk++;
- todo |= process_bb (avail, bb,
- rpo_state[idx].visited != 0,
- rpo_state[idx].iterate,
- iterate, eliminate, do_region, exit_bbs);
- rpo_state[idx].visited++;
+ idx++;
+ }
+ while (idx < n);
- if (iterate)
+ else /* !iterate */
+ {
+ /* Process all blocks greedily with a worklist that enforces RPO
+ processing of reachable blocks. */
+ auto_bitmap worklist;
+ bitmap_set_bit (worklist, 0);
+ while (!bitmap_empty_p (worklist))
{
- /* Verify if changed values flow over executable outgoing backedges
- and those change destination PHI values (that's the thing we
- can easily verify). Reduce over all such edges to the farthest
- away PHI. */
- int iterate_to = -1;
+ int idx = bitmap_first_set_bit (worklist);
+ bitmap_clear_bit (worklist, idx);
+ basic_block bb = BASIC_BLOCK_FOR_FN (fn, rpo[idx]);
+ gcc_assert ((bb->flags & BB_EXECUTABLE)
+ && !rpo_state[idx].visited);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Processing block %d: BB%d\n", idx, bb->index);
+
+ /* When we run into predecessor edges where we cannot trust its
+ executable state mark them executable so PHI processing will
+ be conservative.
+ ??? Do we need to force arguments flowing over that edge
+ to be varying or will they even always be? */
edge_iterator ei;
edge e;
- FOR_EACH_EDGE (e, ei, bb->succs)
- if ((e->flags & (EDGE_DFS_BACK|EDGE_EXECUTABLE))
- == (EDGE_DFS_BACK|EDGE_EXECUTABLE)
- && rpo_state[bb_to_rpo[e->dest->index]].iterate)
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ if (!(e->flags & EDGE_EXECUTABLE)
+ && (bb == entry->dest
+ || (!rpo_state[bb_to_rpo[e->src->index]].visited
+ && (rpo_state[bb_to_rpo[e->src->index]].max_rpo
+ >= (int)idx))))
{
if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Looking for changed values of backedge "
- "%d->%d destination PHIs\n",
+ fprintf (dump_file, "Cannot trust state of predecessor "
+ "edge %d -> %d, marking executable\n",
e->src->index, e->dest->index);
- vn_context_bb = e->dest;
- gphi_iterator gsi;
- for (gsi = gsi_start_phis (e->dest);
- !gsi_end_p (gsi); gsi_next (&gsi))
- {
- bool inserted = false;
- /* While we'd ideally just iterate on value changes
- we CSE PHIs and do that even across basic-block
- boundaries. So even hashtable state changes can
- be important (which is roughly equivalent to
- PHI argument value changes). To not excessively
- iterate because of that we track whether a PHI
- was CSEd to with GF_PLF_1. */
- bool phival_changed;
- if ((phival_changed = visit_phi (gsi.phi (),
- &inserted, false))
- || (inserted && gimple_plf (gsi.phi (), GF_PLF_1)))
- {
- if (!phival_changed
- && dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "PHI was CSEd and hashtable "
- "state (changed)\n");
- int destidx = bb_to_rpo[e->dest->index];
- if (iterate_to == -1
- || destidx < iterate_to)
- iterate_to = destidx;
- break;
- }
- }
- vn_context_bb = NULL;
+ e->flags |= EDGE_EXECUTABLE;
}
- if (iterate_to != -1)
- {
- do_unwind (&rpo_state[iterate_to], iterate_to,
- avail, bb_to_rpo);
- idx = iterate_to;
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Iterating to %d BB%d\n",
- iterate_to, rpo[iterate_to]);
- continue;
- }
- }
- idx++;
+ nblk++;
+ todo |= process_bb (avail, bb, false, false, false, eliminate,
+ do_region, exit_bbs,
+ skip_entry_phis && bb == entry->dest);
+ rpo_state[idx].visited++;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if ((e->flags & EDGE_EXECUTABLE)
+ && e->dest->index != EXIT_BLOCK
+ && (!do_region || !bitmap_bit_p (exit_bbs, e->dest->index))
+ && !rpo_state[bb_to_rpo[e->dest->index]].visited)
+ bitmap_set_bit (worklist, bb_to_rpo[e->dest->index]);
+ }
}
- while (idx < n);
/* If statistics or dump file active. */
int nex = 0;
max_visited = rpo_state[i].visited;
}
unsigned nvalues = 0, navail = 0;
- for (rpo_elim::rpo_avail_t::iterator i = avail.m_rpo_avail.begin ();
- i != avail.m_rpo_avail.end (); ++i)
+ for (hash_table<vn_ssa_aux_hasher>::iterator i = vn_ssa_aux_hash->begin ();
+ i != vn_ssa_aux_hash->end (); ++i)
{
nvalues++;
- navail += (*i).second.length ();
+ vn_avail *av = (*i)->avail;
+ while (av)
+ {
+ navail++;
+ av = av->next;
+ }
}
statistics_counter_event (cfun, "RPO blocks", n);
statistics_counter_event (cfun, "RPO blocks visited", nblk);
XDELETEVEC (bb_to_rpo);
XDELETEVEC (rpo);
+ XDELETEVEC (rpo_state);
return todo;
}
/* Region-based entry for RPO VN. Performs value-numbering and elimination
- on the SEME region specified by ENTRY and EXIT_BBS. */
+ on the SEME region specified by ENTRY and EXIT_BBS. If ENTRY is not
+ the only edge into the region at ENTRY->dest PHI nodes in ENTRY->dest
+ are not considered. */
unsigned
do_rpo_vn (function *fn, edge entry, bitmap exit_bbs)
{
public:
pass_fre (gcc::context *ctxt)
- : gimple_opt_pass (pass_data_fre, ctxt)
+ : gimple_opt_pass (pass_data_fre, ctxt), may_iterate (true)
{}
/* opt_pass methods: */
opt_pass * clone () { return new pass_fre (m_ctxt); }
- virtual bool gate (function *) { return flag_tree_fre != 0; }
+ void set_pass_param (unsigned int n, bool param)
+ {
+ gcc_assert (n == 0);
+ may_iterate = param;
+ }
+ virtual bool gate (function *)
+ {
+ return flag_tree_fre != 0 && (may_iterate || optimize > 1);
+ }
virtual unsigned int execute (function *);
+private:
+ bool may_iterate;
}; // class pass_fre
unsigned int
unsigned todo = 0;
/* At -O[1g] use the cheap non-iterating mode. */
+ bool iterate_p = may_iterate && (optimize > 1);
calculate_dominance_info (CDI_DOMINATORS);
- if (optimize > 1)
+ if (iterate_p)
loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
default_vn_walk_kind = VN_WALKREWRITE;
- todo = do_rpo_vn (fun, NULL, NULL, optimize > 1, true);
+ todo = do_rpo_vn (fun, NULL, NULL, iterate_p, true);
free_rpo_vn ();
- if (optimize > 1)
+ if (iterate_p)
loop_optimizer_finalize ();
+ /* For late FRE after IVOPTs and unrolling, see if we can
+ remove some TREE_ADDRESSABLE and rewrite stuff into SSA. */
+ if (!may_iterate)
+ todo |= TODO_update_address_taken;
+
return todo;
}