#include "config.h"
#include "system.h"
#include "coretypes.h"
+#include "splay-tree.h"
#include "backend.h"
#include "rtl.h"
#include "tree.h"
static void init_vn_nary_op_from_pieces (vn_nary_op_t, unsigned int,
enum tree_code, tree, tree *);
static tree vn_lookup_simplify_result (gimple_match_op *);
+static vn_reference_t vn_reference_lookup_or_insert_for_pieces
+ (tree, alias_set_type, tree, vec<vn_reference_op_s, va_heap>, tree);
/* Return whether there is value numbering information for a given SSA name. */
return NULL_TREE;
}
+
+/* Partial definition tracking support. */
+
+struct pd_range
+{
+ HOST_WIDE_INT offset;
+ HOST_WIDE_INT size;
+};
+
+struct pd_data
+{
+ tree rhs;
+ HOST_WIDE_INT offset;
+ HOST_WIDE_INT size;
+};
+
+/* Context for alias walking. */
+
struct vn_walk_cb_data
{
vn_walk_cb_data (vn_reference_t vr_, tree *last_vuse_ptr_,
- vn_lookup_kind vn_walk_kind_, bool tbaa_p_)
+ vn_lookup_kind vn_walk_kind_, bool tbaa_p_)
: vr (vr_), last_vuse_ptr (last_vuse_ptr_), vn_walk_kind (vn_walk_kind_),
- tbaa_p (tbaa_p_)
- {}
+ tbaa_p (tbaa_p_), known_ranges (NULL)
+ {}
+ ~vn_walk_cb_data ();
+ void *push_partial_def (const pd_data& pd, tree, HOST_WIDE_INT);
vn_reference_t vr;
tree *last_vuse_ptr;
vn_lookup_kind vn_walk_kind;
bool tbaa_p;
+
+ /* The VDEFs of partial defs we come along. */
+ auto_vec<pd_data, 2> partial_defs;
+ /* The first defs range to avoid splay tree setup in most cases. */
+ pd_range first_range;
+ tree first_vuse;
+ splay_tree known_ranges;
+ obstack ranges_obstack;
};
+vn_walk_cb_data::~vn_walk_cb_data ()
+{
+ if (known_ranges)
+ {
+ splay_tree_delete (known_ranges);
+ obstack_free (&ranges_obstack, NULL);
+ }
+}
+
+/* pd_range splay-tree helpers. */
+
+static int
+pd_range_compare (splay_tree_key offset1p, splay_tree_key offset2p)
+{
+ HOST_WIDE_INT offset1 = *(HOST_WIDE_INT *)offset1p;
+ HOST_WIDE_INT offset2 = *(HOST_WIDE_INT *)offset2p;
+ if (offset1 < offset2)
+ return -1;
+ else if (offset1 > offset2)
+ return 1;
+ return 0;
+}
+
+static void *
+pd_tree_alloc (int size, void *data_)
+{
+ vn_walk_cb_data *data = (vn_walk_cb_data *)data_;
+ return obstack_alloc (&data->ranges_obstack, size);
+}
+
+static void
+pd_tree_dealloc (void *, void *)
+{
+}
+
+/* Push PD to the vector of partial definitions returning a
+ value when we are ready to combine things with VUSE and MAXSIZEI,
+ NULL when we want to continue looking for partial defs or -1
+ on failure. */
+
+void *
+vn_walk_cb_data::push_partial_def (const pd_data &pd, tree vuse,
+ HOST_WIDE_INT maxsizei)
+{
+ if (partial_defs.is_empty ())
+ {
+ partial_defs.safe_push (pd);
+ first_range.offset = pd.offset;
+ first_range.size = pd.size;
+ first_vuse = vuse;
+ last_vuse_ptr = NULL;
+ }
+ else
+ {
+ if (!known_ranges)
+ {
+ /* ??? Optimize the case where the second partial def
+ completes things. */
+ gcc_obstack_init (&ranges_obstack);
+ known_ranges
+ = splay_tree_new_with_allocator (pd_range_compare, 0, 0,
+ pd_tree_alloc,
+ pd_tree_dealloc, this);
+ splay_tree_insert (known_ranges,
+ (splay_tree_key)&first_range.offset,
+ (splay_tree_value)&first_range);
+ }
+ if (known_ranges)
+ {
+ pd_range newr = { pd.offset, pd.size };
+ splay_tree_node n;
+ pd_range *r;
+ /* Lookup the predecessor of offset + 1 and see if
+ we need to merge with it. */
+ HOST_WIDE_INT loffset = newr.offset + 1;
+ if ((n = splay_tree_predecessor (known_ranges,
+ (splay_tree_key)&loffset))
+ && ((r = (pd_range *)n->value), true)
+ && ranges_known_overlap_p (r->offset, r->size + 1,
+ newr.offset, newr.size))
+ {
+ /* Ignore partial defs already covered. */
+ if (known_subrange_p (newr.offset, newr.size,
+ r->offset, r->size))
+ return NULL;
+ r->size = MAX (r->offset + r->size,
+ newr.offset + newr.size) - r->offset;
+ }
+ else
+ {
+ /* newr.offset wasn't covered yet, insert the
+ range. */
+ r = XOBNEW (&ranges_obstack, pd_range);
+ *r = newr;
+ splay_tree_insert (known_ranges,
+ (splay_tree_key)&r->offset,
+ (splay_tree_value)r);
+ }
+ /* Merge r which now contains newr and is a member
+ of the splay tree with adjacent overlapping ranges. */
+ pd_range *rafter;
+ while ((n = splay_tree_successor (known_ranges,
+ (splay_tree_key)&r->offset))
+ && ((rafter = (pd_range *)n->value), true)
+ && ranges_known_overlap_p (r->offset, r->size + 1,
+ rafter->offset, rafter->size))
+ {
+ r->size = MAX (r->offset + r->size,
+ rafter->offset + rafter->size) - r->offset;
+ splay_tree_remove (known_ranges,
+ (splay_tree_key)&rafter->offset);
+ }
+ partial_defs.safe_push (pd);
+
+ /* Now we have merged newr into the range tree.
+ When we have covered [offseti, sizei] then the
+ tree will contain exactly one node which has
+ the desired properties and it will be 'r'. */
+ if (known_subrange_p (0, maxsizei / BITS_PER_UNIT,
+ r->offset, r->size))
+ {
+ /* Now simply native encode all partial defs
+ in reverse order. */
+ unsigned ndefs = partial_defs.length ();
+ /* We support up to 512-bit values (for V8DFmode). */
+ unsigned char buffer[64];
+ int len;
+
+ while (!partial_defs.is_empty ())
+ {
+ pd_data pd = partial_defs.pop ();
+ if (TREE_CODE (pd.rhs) == CONSTRUCTOR)
+ /* Empty CONSTRUCTOR. */
+ memset (buffer + MAX (0, pd.offset),
+ 0, MIN ((HOST_WIDE_INT)sizeof (buffer), pd.size));
+ else
+ {
+ len = native_encode_expr (pd.rhs,
+ buffer + MAX (0, pd.offset),
+ sizeof (buffer - MAX (0, pd.offset)),
+ MAX (0, -pd.offset));
+ if (len <= 0
+ || len < (pd.size - MAX (0, -pd.offset)))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Failed to encode %u "
+ "partial definitions\n", ndefs);
+ return (void *)-1;
+ }
+ }
+ }
+
+ tree type = vr->type;
+ /* Make sure to interpret in a type that has a range
+ covering the whole access size. */
+ if (INTEGRAL_TYPE_P (vr->type)
+ && maxsizei != TYPE_PRECISION (vr->type))
+ type = build_nonstandard_integer_type (maxsizei,
+ TYPE_UNSIGNED (type));
+ tree val = native_interpret_expr (type, buffer,
+ maxsizei / BITS_PER_UNIT);
+ /* If we chop off bits because the types precision doesn't
+ match the memory access size this is ok when optimizing
+ reads but not when called from the DSE code during
+ elimination. */
+ if (val
+ && type != vr->type)
+ {
+ if (! int_fits_type_p (val, vr->type))
+ val = NULL_TREE;
+ else
+ val = fold_convert (vr->type, val);
+ }
+
+ if (val)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Successfully combined %u "
+ "partial definitions\n", ndefs);
+ return vn_reference_lookup_or_insert_for_pieces
+ (first_vuse,
+ vr->set, vr->type, vr->operands, val);
+ }
+ else
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Failed to interpret %u "
+ "encoded partial definitions\n", ndefs);
+ return (void *)-1;
+ }
+ }
+ }
+ }
+ /* Continue looking for partial defs. */
+ return NULL;
+}
+
/* Callback for walk_non_aliased_vuses. Adjusts the vn_reference_t VR_
with the current VUSE and performs the expression lookup. */
vn_reference_s **slot;
hashval_t hash;
+ /* If we have partial definitions recorded we have to go through
+ vn_reference_lookup_3. */
+ if (!data->partial_defs.is_empty ())
+ return NULL;
+
if (data->last_vuse_ptr)
*data->last_vuse_ptr = vuse;
else
return (void *)-1;
tree len = gimple_call_arg (def_stmt, 2);
- if (known_subrange_p (offset, maxsize, offset2,
- wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT))
+ HOST_WIDE_INT leni, offset2i, offseti;
+ if (data->partial_defs.is_empty ()
+ && known_subrange_p (offset, maxsize, offset2,
+ wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT))
{
tree val;
if (integer_zerop (gimple_call_arg (def_stmt, 1)))
return vn_reference_lookup_or_insert_for_pieces
(vuse, vr->set, vr->type, vr->operands, val);
}
+ /* For now handle clearing memory with partial defs. */
+ else if (integer_zerop (gimple_call_arg (def_stmt, 1))
+ && tree_to_poly_int64 (len).is_constant (&leni)
+ && offset.is_constant (&offseti)
+ && offset2.is_constant (&offset2i)
+ && maxsize.is_constant (&maxsizei))
+ {
+ pd_data pd;
+ pd.rhs = build_constructor (NULL_TREE, NULL);
+ pd.offset = offset2i - offseti;
+ pd.size = leni;
+ return data->push_partial_def (pd, vuse, maxsizei);
+ }
}
/* 2) Assignment from an empty CONSTRUCTOR. */
{
tree base2;
poly_int64 offset2, size2, maxsize2;
+ HOST_WIDE_INT offset2i, size2i;
bool reverse;
base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
&offset2, &size2, &maxsize2, &reverse);
if (known_size_p (maxsize2)
&& known_eq (maxsize2, size2)
&& adjust_offsets_for_equal_base_address (base, &offset,
- base2, &offset2)
- && known_subrange_p (offset, maxsize, offset2, size2))
+ base2, &offset2))
{
- tree val = build_zero_cst (vr->type);
- return vn_reference_lookup_or_insert_for_pieces
- (vuse, vr->set, vr->type, vr->operands, val);
+ if (data->partial_defs.is_empty ()
+ && known_subrange_p (offset, maxsize, offset2, size2))
+ {
+ tree val = build_zero_cst (vr->type);
+ return vn_reference_lookup_or_insert_for_pieces
+ (vuse, vr->set, vr->type, vr->operands, val);
+ }
+ else if (maxsize.is_constant (&maxsizei)
+ && maxsizei % BITS_PER_UNIT == 0
+ && offset.is_constant (&offseti)
+ && offseti % BITS_PER_UNIT == 0
+ && offset2.is_constant (&offset2i)
+ && offset2i % BITS_PER_UNIT == 0
+ && size2.is_constant (&size2i)
+ && size2i % BITS_PER_UNIT == 0)
+ {
+ pd_data pd;
+ pd.rhs = gimple_assign_rhs1 (def_stmt);
+ pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
+ pd.size = size2i / BITS_PER_UNIT;
+ return data->push_partial_def (pd, vuse, maxsizei);
+ }
}
}
{
tree base2;
poly_int64 offset2, size2, maxsize2;
- HOST_WIDE_INT offset2i;
+ HOST_WIDE_INT offset2i, size2i;
bool reverse;
base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
&offset2, &size2, &maxsize2, &reverse);
base2, &offset2)
&& offset.is_constant (&offseti)
&& offset2.is_constant (&offset2i)
- && known_subrange_p (offseti, maxsizei, offset2, size2))
+ && size2.is_constant (&size2i))
{
- /* We support up to 512-bit values (for V8DFmode). */
- unsigned char buffer[64];
- int len;
-
- tree rhs = gimple_assign_rhs1 (def_stmt);
- if (TREE_CODE (rhs) == SSA_NAME)
- rhs = SSA_VAL (rhs);
- len = native_encode_expr (rhs,
- buffer, sizeof (buffer),
- (offseti - offset2i) / BITS_PER_UNIT);
- if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
+ if (data->partial_defs.is_empty ()
+ && known_subrange_p (offseti, maxsizei, offset2, size2))
{
- tree type = vr->type;
- /* Make sure to interpret in a type that has a range
- covering the whole access size. */
- if (INTEGRAL_TYPE_P (vr->type)
- && maxsizei != TYPE_PRECISION (vr->type))
- type = build_nonstandard_integer_type (maxsizei,
- TYPE_UNSIGNED (type));
- tree val = native_interpret_expr (type, buffer,
- maxsizei / BITS_PER_UNIT);
- /* If we chop off bits because the types precision doesn't
- match the memory access size this is ok when optimizing
- reads but not when called from the DSE code during
- elimination. */
- if (val
- && type != vr->type)
+ /* We support up to 512-bit values (for V8DFmode). */
+ unsigned char buffer[64];
+ int len;
+
+ tree rhs = gimple_assign_rhs1 (def_stmt);
+ if (TREE_CODE (rhs) == SSA_NAME)
+ rhs = SSA_VAL (rhs);
+ len = native_encode_expr (rhs,
+ buffer, sizeof (buffer),
+ (offseti - offset2i) / BITS_PER_UNIT);
+ if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
{
- if (! int_fits_type_p (val, vr->type))
- val = NULL_TREE;
- else
- val = fold_convert (vr->type, val);
- }
+ tree type = vr->type;
+ /* Make sure to interpret in a type that has a range
+ covering the whole access size. */
+ if (INTEGRAL_TYPE_P (vr->type)
+ && maxsizei != TYPE_PRECISION (vr->type))
+ type = build_nonstandard_integer_type (maxsizei,
+ TYPE_UNSIGNED (type));
+ tree val = native_interpret_expr (type, buffer,
+ maxsizei / BITS_PER_UNIT);
+ /* If we chop off bits because the types precision doesn't
+ match the memory access size this is ok when optimizing
+ reads but not when called from the DSE code during
+ elimination. */
+ if (val
+ && type != vr->type)
+ {
+ if (! int_fits_type_p (val, vr->type))
+ val = NULL_TREE;
+ else
+ val = fold_convert (vr->type, val);
+ }
- if (val)
- return vn_reference_lookup_or_insert_for_pieces
- (vuse, vr->set, vr->type, vr->operands, val);
+ if (val)
+ return vn_reference_lookup_or_insert_for_pieces
+ (vuse, vr->set, vr->type, vr->operands, val);
+ }
+ }
+ else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, size2i))
+ {
+ pd_data pd;
+ tree rhs = gimple_assign_rhs1 (def_stmt);
+ if (TREE_CODE (rhs) == SSA_NAME)
+ rhs = SSA_VAL (rhs);
+ pd.rhs = rhs;
+ pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
+ pd.size = size2i / BITS_PER_UNIT;
+ return data->push_partial_def (pd, vuse, maxsizei);
}
}
}
&& is_gimple_reg_type (vr->type)
&& !contains_storage_order_barrier_p (vr->operands)
&& gimple_assign_single_p (def_stmt)
- && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
+ && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
+ /* A subset of partial defs from non-constants can be handled
+ by for example inserting a CONSTRUCTOR, a COMPLEX_EXPR or
+ even a (series of) BIT_INSERT_EXPR hoping for simplifications
+ downstream, not so much for actually doing the insertion. */
+ && data->partial_defs.is_empty ())
{
tree base2;
poly_int64 offset2, size2, maxsize2;
&& gimple_assign_single_p (def_stmt)
&& (DECL_P (gimple_assign_rhs1 (def_stmt))
|| TREE_CODE (gimple_assign_rhs1 (def_stmt)) == MEM_REF
- || handled_component_p (gimple_assign_rhs1 (def_stmt))))
+ || handled_component_p (gimple_assign_rhs1 (def_stmt)))
+ /* Handling this is more complicated, give up for now. */
+ && data->partial_defs.is_empty ())
{
tree base2;
int i, j, k;
|| TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME)
&& (TREE_CODE (gimple_call_arg (def_stmt, 1)) == ADDR_EXPR
|| TREE_CODE (gimple_call_arg (def_stmt, 1)) == SSA_NAME)
- && poly_int_tree_p (gimple_call_arg (def_stmt, 2), ©_size))
+ && poly_int_tree_p (gimple_call_arg (def_stmt, 2), ©_size)
+ /* Handling this is more complicated, give up for now. */
+ && data->partial_defs.is_empty ())
{
tree lhs, rhs;
ao_ref r;