X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=gcc%2Ftree-ssa-sccvn.c;h=e269f7885f4b7757f8176cbc4248fa19dfb51e8e;hb=0cd58a9f091b39c5e41b7954d6c4bd88f3567d49;hp=830d37b86bb0ecb8ba55e45597fcb80ddf743e3c;hpb=9505acd8501e6c79bc4fa9ed9f1ee174462601d1;p=gcc.git diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c index 830d37b86bb..e269f7885f4 100644 --- a/gcc/tree-ssa-sccvn.c +++ b/gcc/tree-ssa-sccvn.c @@ -1,5 +1,5 @@ /* SCC value numbering for trees - Copyright (C) 2006-2019 Free Software Foundation, Inc. + Copyright (C) 2006-2020 Free Software Foundation, Inc. Contributed by Daniel Berlin This file is part of GCC. @@ -53,7 +53,6 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa.h" #include "dumpfile.h" #include "cfgloop.h" -#include "params.h" #include "tree-ssa-propagate.h" #include "tree-cfg.h" #include "domwalk.h" @@ -309,6 +308,10 @@ static vn_tables_t valid_info; /* Valueization hook. Valueize NAME if it is an SSA name, otherwise just return it. */ tree (*vn_valueize) (tree); +tree vn_valueize_wrapper (tree t, void* context ATTRIBUTE_UNUSED) +{ + return vn_valueize (t); +} /* This represents the top of the VN lattice, which is the universal @@ -332,6 +335,7 @@ struct vn_ssa_aux_hasher : typed_noop_remove static inline hashval_t hash (const value_type &); static inline bool equal (const value_type &, const compare_type &); static inline void mark_deleted (value_type &) {} + static const bool empty_zero_p = true; static inline void mark_empty (value_type &e) { e = NULL; } static inline bool is_deleted (value_type &) { return false; } static inline bool is_empty (value_type &e) { return e == NULL; } @@ -363,7 +367,8 @@ static void init_vn_nary_op_from_pieces (vn_nary_op_t, unsigned int, enum tree_code, tree, tree *); static tree vn_lookup_simplify_result (gimple_match_op *); static vn_reference_t vn_reference_lookup_or_insert_for_pieces - (tree, alias_set_type, tree, vec, tree); + (tree, alias_set_type, alias_set_type, tree, + vec, tree); /* Return whether there is value numbering information for a given SSA name. */ @@ -924,6 +929,7 @@ copy_reference_ops_from_ref (tree ref, vec *result) break; case STRING_CST: case INTEGER_CST: + case POLY_INT_CST: case COMPLEX_CST: case VECTOR_CST: case REAL_CST: @@ -977,8 +983,8 @@ copy_reference_ops_from_ref (tree ref, vec *result) bool ao_ref_init_from_vn_reference (ao_ref *ref, - alias_set_type set, tree type, - vec ops) + alias_set_type set, alias_set_type base_set, + tree type, vec ops) { vn_reference_op_t op; unsigned i; @@ -988,7 +994,6 @@ ao_ref_init_from_vn_reference (ao_ref *ref, poly_offset_int max_size; poly_offset_int size = -1; tree size_tree = NULL_TREE; - alias_set_type base_alias_set = -1; /* First get the final access size from just the outermost expression. */ op = &ops[0]; @@ -1045,7 +1050,6 @@ ao_ref_init_from_vn_reference (ao_ref *ref, /* Record the base objects. */ case MEM_REF: - base_alias_set = get_deref_alias_set (op->op0); *op0_p = build2 (MEM_REF, op->type, NULL_TREE, op->op0); MR_DEPENDENCE_CLIQUE (*op0_p) = op->clique; @@ -1135,10 +1139,7 @@ ao_ref_init_from_vn_reference (ao_ref *ref, ref->ref = NULL_TREE; ref->base = base; ref->ref_alias_set = set; - if (base_alias_set != -1) - ref->base_alias_set = base_alias_set; - else - ref->base_alias_set = get_alias_set (base); + ref->base_alias_set = base_set; /* We discount volatiles from value-numbering elsewhere. */ ref->volatile_p = false; @@ -1223,8 +1224,8 @@ vn_reference_fold_indirect (vec *ops, /* The only thing we have to do is from &OBJ.foo.bar add the offset from .foo.bar to the preceding MEM_REF offset and replace the address with &OBJ. */ - addr_base = get_addr_base_and_unit_offset (TREE_OPERAND (op->op0, 0), - &addr_offset); + addr_base = get_addr_base_and_unit_offset_1 (TREE_OPERAND (op->op0, 0), + &addr_offset, vn_valueize); gcc_checking_assert (addr_base && TREE_CODE (addr_base) != MEM_REF); if (addr_base != TREE_OPERAND (op->op0, 0)) { @@ -1249,113 +1250,124 @@ static bool vn_reference_maybe_forwprop_address (vec *ops, unsigned int *i_p) { - unsigned int i = *i_p; - vn_reference_op_t op = &(*ops)[i]; - vn_reference_op_t mem_op = &(*ops)[i - 1]; - gimple *def_stmt; - enum tree_code code; - poly_offset_int off; - - def_stmt = SSA_NAME_DEF_STMT (op->op0); - if (!is_gimple_assign (def_stmt)) - return false; - - code = gimple_assign_rhs_code (def_stmt); - if (code != ADDR_EXPR - && code != POINTER_PLUS_EXPR) - return false; - - off = poly_offset_int::from (wi::to_poly_wide (mem_op->op0), SIGNED); + bool changed = false; + vn_reference_op_t op; - /* The only thing we have to do is from &OBJ.foo.bar add the offset - from .foo.bar to the preceding MEM_REF offset and replace the - address with &OBJ. */ - if (code == ADDR_EXPR) - { - tree addr, addr_base; - poly_int64 addr_offset; - - addr = gimple_assign_rhs1 (def_stmt); - addr_base = get_addr_base_and_unit_offset (TREE_OPERAND (addr, 0), - &addr_offset); - /* If that didn't work because the address isn't invariant propagate - the reference tree from the address operation in case the current - dereference isn't offsetted. */ - if (!addr_base - && *i_p == ops->length () - 1 - && known_eq (off, 0) - /* This makes us disable this transform for PRE where the - reference ops might be also used for code insertion which - is invalid. */ - && default_vn_walk_kind == VN_WALKREWRITE) - { - auto_vec tem; - copy_reference_ops_from_ref (TREE_OPERAND (addr, 0), &tem); - /* Make sure to preserve TBAA info. The only objects not - wrapped in MEM_REFs that can have their address taken are - STRING_CSTs. */ - if (tem.length () >= 2 - && tem[tem.length () - 2].opcode == MEM_REF) + do + { + unsigned int i = *i_p; + op = &(*ops)[i]; + vn_reference_op_t mem_op = &(*ops)[i - 1]; + gimple *def_stmt; + enum tree_code code; + poly_offset_int off; + + def_stmt = SSA_NAME_DEF_STMT (op->op0); + if (!is_gimple_assign (def_stmt)) + return changed; + + code = gimple_assign_rhs_code (def_stmt); + if (code != ADDR_EXPR + && code != POINTER_PLUS_EXPR) + return changed; + + off = poly_offset_int::from (wi::to_poly_wide (mem_op->op0), SIGNED); + + /* The only thing we have to do is from &OBJ.foo.bar add the offset + from .foo.bar to the preceding MEM_REF offset and replace the + address with &OBJ. */ + if (code == ADDR_EXPR) + { + tree addr, addr_base; + poly_int64 addr_offset; + + addr = gimple_assign_rhs1 (def_stmt); + addr_base = get_addr_base_and_unit_offset_1 (TREE_OPERAND (addr, 0), + &addr_offset, + vn_valueize); + /* If that didn't work because the address isn't invariant propagate + the reference tree from the address operation in case the current + dereference isn't offsetted. */ + if (!addr_base + && *i_p == ops->length () - 1 + && known_eq (off, 0) + /* This makes us disable this transform for PRE where the + reference ops might be also used for code insertion which + is invalid. */ + && default_vn_walk_kind == VN_WALKREWRITE) { - vn_reference_op_t new_mem_op = &tem[tem.length () - 2]; - new_mem_op->op0 - = wide_int_to_tree (TREE_TYPE (mem_op->op0), - wi::to_poly_wide (new_mem_op->op0)); + auto_vec tem; + copy_reference_ops_from_ref (TREE_OPERAND (addr, 0), &tem); + /* Make sure to preserve TBAA info. The only objects not + wrapped in MEM_REFs that can have their address taken are + STRING_CSTs. */ + if (tem.length () >= 2 + && tem[tem.length () - 2].opcode == MEM_REF) + { + vn_reference_op_t new_mem_op = &tem[tem.length () - 2]; + new_mem_op->op0 + = wide_int_to_tree (TREE_TYPE (mem_op->op0), + wi::to_poly_wide (new_mem_op->op0)); + } + else + gcc_assert (tem.last ().opcode == STRING_CST); + ops->pop (); + ops->pop (); + ops->safe_splice (tem); + --*i_p; + return true; } - else - gcc_assert (tem.last ().opcode == STRING_CST); - ops->pop (); - ops->pop (); - ops->safe_splice (tem); - --*i_p; - return true; + if (!addr_base + || TREE_CODE (addr_base) != MEM_REF + || (TREE_CODE (TREE_OPERAND (addr_base, 0)) == SSA_NAME + && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (TREE_OPERAND (addr_base, + 0)))) + return changed; + + off += addr_offset; + off += mem_ref_offset (addr_base); + op->op0 = TREE_OPERAND (addr_base, 0); + } + else + { + tree ptr, ptroff; + ptr = gimple_assign_rhs1 (def_stmt); + ptroff = gimple_assign_rhs2 (def_stmt); + if (TREE_CODE (ptr) != SSA_NAME + || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr) + /* Make sure to not endlessly recurse. + See gcc.dg/tree-ssa/20040408-1.c for an example. Can easily + happen when we value-number a PHI to its backedge value. */ + || SSA_VAL (ptr) == op->op0 + || !poly_int_tree_p (ptroff)) + return changed; + + off += wi::to_poly_offset (ptroff); + op->op0 = ptr; } - if (!addr_base - || TREE_CODE (addr_base) != MEM_REF - || (TREE_CODE (TREE_OPERAND (addr_base, 0)) == SSA_NAME - && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (TREE_OPERAND (addr_base, 0)))) - return false; - - off += addr_offset; - off += mem_ref_offset (addr_base); - op->op0 = TREE_OPERAND (addr_base, 0); - } - else - { - tree ptr, ptroff; - ptr = gimple_assign_rhs1 (def_stmt); - ptroff = gimple_assign_rhs2 (def_stmt); - if (TREE_CODE (ptr) != SSA_NAME - || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr) - /* Make sure to not endlessly recurse. - See gcc.dg/tree-ssa/20040408-1.c for an example. Can easily - happen when we value-number a PHI to its backedge value. */ - || SSA_VAL (ptr) == op->op0 - || !poly_int_tree_p (ptroff)) - return false; - off += wi::to_poly_offset (ptroff); - op->op0 = ptr; + mem_op->op0 = wide_int_to_tree (TREE_TYPE (mem_op->op0), off); + if (tree_fits_shwi_p (mem_op->op0)) + mem_op->off = tree_to_shwi (mem_op->op0); + else + mem_op->off = -1; + /* ??? Can end up with endless recursion here!? + gcc.c-torture/execute/strcmp-1.c */ + if (TREE_CODE (op->op0) == SSA_NAME) + op->op0 = SSA_VAL (op->op0); + if (TREE_CODE (op->op0) != SSA_NAME) + op->opcode = TREE_CODE (op->op0); + + changed = true; } + /* Tail-recurse. */ + while (TREE_CODE (op->op0) == SSA_NAME); - mem_op->op0 = wide_int_to_tree (TREE_TYPE (mem_op->op0), off); - if (tree_fits_shwi_p (mem_op->op0)) - mem_op->off = tree_to_shwi (mem_op->op0); - else - mem_op->off = -1; - /* ??? Can end up with endless recursion here!? - gcc.c-torture/execute/strcmp-1.c */ - if (TREE_CODE (op->op0) == SSA_NAME) - op->op0 = SSA_VAL (op->op0); - if (TREE_CODE (op->op0) != SSA_NAME) - op->opcode = TREE_CODE (op->op0); - - /* And recurse. */ - if (TREE_CODE (op->op0) == SSA_NAME) - vn_reference_maybe_forwprop_address (ops, i_p); - else if (TREE_CODE (op->op0) == ADDR_EXPR) + /* Fold a remaining *&. */ + if (TREE_CODE (op->op0) == ADDR_EXPR) vn_reference_fold_indirect (ops, i_p); - return true; + + return changed; } /* Optimize the reference REF to a constant if possible or return @@ -1671,26 +1683,77 @@ struct pd_data struct vn_walk_cb_data { vn_walk_cb_data (vn_reference_t vr_, tree orig_ref_, tree *last_vuse_ptr_, - vn_lookup_kind vn_walk_kind_, bool tbaa_p_) - : vr (vr_), last_vuse_ptr (last_vuse_ptr_), - vn_walk_kind (vn_walk_kind_), tbaa_p (tbaa_p_), known_ranges (NULL) - { - ao_ref_init (&orig_ref, orig_ref_); - } + vn_lookup_kind vn_walk_kind_, bool tbaa_p_, tree mask_) + : vr (vr_), last_vuse_ptr (last_vuse_ptr_), last_vuse (NULL_TREE), + mask (mask_), masked_result (NULL_TREE), vn_walk_kind (vn_walk_kind_), + tbaa_p (tbaa_p_), saved_operands (vNULL), first_set (-2), + first_base_set (-2), known_ranges (NULL) + { + if (!last_vuse_ptr) + last_vuse_ptr = &last_vuse; + ao_ref_init (&orig_ref, orig_ref_); + if (mask) + { + wide_int w = wi::to_wide (mask); + unsigned int pos = 0, prec = w.get_precision (); + pd_data pd; + pd.rhs = build_constructor (NULL_TREE, NULL); + /* When bitwise and with a constant is done on a memory load, + we don't really need all the bits to be defined or defined + to constants, we don't really care what is in the position + corresponding to 0 bits in the mask. + So, push the ranges of those 0 bits in the mask as artificial + zero stores and let the partial def handling code do the + rest. */ + while (pos < prec) + { + int tz = wi::ctz (w); + if (pos + tz > prec) + tz = prec - pos; + if (tz) + { + if (BYTES_BIG_ENDIAN) + pd.offset = prec - pos - tz; + else + pd.offset = pos; + pd.size = tz; + void *r = push_partial_def (pd, 0, 0, 0, prec); + gcc_assert (r == NULL_TREE); + } + pos += tz; + if (pos == prec) + break; + w = wi::lrshift (w, tz); + tz = wi::ctz (wi::bit_not (w)); + if (pos + tz > prec) + tz = prec - pos; + pos += tz; + w = wi::lrshift (w, tz); + } + } + } ~vn_walk_cb_data (); - void *push_partial_def (const pd_data& pd, tree, HOST_WIDE_INT); + void *finish (alias_set_type, alias_set_type, tree); + void *push_partial_def (pd_data pd, + alias_set_type, alias_set_type, HOST_WIDE_INT, + HOST_WIDE_INT); vn_reference_t vr; ao_ref orig_ref; tree *last_vuse_ptr; + tree last_vuse; + tree mask; + tree masked_result; vn_lookup_kind vn_walk_kind; bool tbaa_p; + vec saved_operands; /* The VDEFs of partial defs we come along. */ auto_vec partial_defs; /* The first defs range to avoid splay tree setup in most cases. */ pd_range first_range; - tree first_vuse; + alias_set_type first_set; + alias_set_type first_base_set; splay_tree known_ranges; obstack ranges_obstack; }; @@ -1702,6 +1765,26 @@ vn_walk_cb_data::~vn_walk_cb_data () splay_tree_delete (known_ranges); obstack_free (&ranges_obstack, NULL); } + saved_operands.release (); +} + +void * +vn_walk_cb_data::finish (alias_set_type set, alias_set_type base_set, tree val) +{ + if (first_set != -2) + { + set = first_set; + base_set = first_base_set; + } + if (mask) + { + masked_result = val; + return (void *) -1; + } + vec &operands + = saved_operands.exists () ? saved_operands : vr->operands; + return vn_reference_lookup_or_insert_for_pieces (last_vuse, set, base_set, + vr->type, operands, val); } /* pd_range splay-tree helpers. */ @@ -1731,175 +1814,340 @@ pd_tree_dealloc (void *, void *) } /* Push PD to the vector of partial definitions returning a - value when we are ready to combine things with VUSE and MAXSIZEI, + value when we are ready to combine things with VUSE, SET and MAXSIZEI, NULL when we want to continue looking for partial defs or -1 on failure. */ void * -vn_walk_cb_data::push_partial_def (const pd_data &pd, tree vuse, +vn_walk_cb_data::push_partial_def (pd_data pd, + alias_set_type set, alias_set_type base_set, + HOST_WIDE_INT offseti, HOST_WIDE_INT maxsizei) { + const HOST_WIDE_INT bufsize = 64; + /* We're using a fixed buffer for encoding so fail early if the object + we want to interpret is bigger. */ + if (maxsizei > bufsize * BITS_PER_UNIT + || CHAR_BIT != 8 + || BITS_PER_UNIT != 8 + /* Not prepared to handle PDP endian. */ + || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) + return (void *)-1; + + /* Turn too large constant stores into non-constant stores. */ + if (CONSTANT_CLASS_P (pd.rhs) && pd.size > bufsize * BITS_PER_UNIT) + pd.rhs = error_mark_node; + + /* And for non-constant or CONSTRUCTOR stores shrink them to only keep at + most a partial byte before and/or after the region. */ + if (!CONSTANT_CLASS_P (pd.rhs)) + { + if (pd.offset < offseti) + { + HOST_WIDE_INT o = ROUND_DOWN (offseti - pd.offset, BITS_PER_UNIT); + gcc_assert (pd.size > o); + pd.size -= o; + pd.offset += o; + } + if (pd.size > maxsizei) + pd.size = maxsizei + ((pd.size - maxsizei) % BITS_PER_UNIT); + } + + pd.offset -= offseti; + + bool pd_constant_p = (TREE_CODE (pd.rhs) == CONSTRUCTOR + || CONSTANT_CLASS_P (pd.rhs)); if (partial_defs.is_empty ()) { + /* If we get a clobber upfront, fail. */ + if (TREE_CLOBBER_P (pd.rhs)) + return (void *)-1; + if (!pd_constant_p) + return (void *)-1; partial_defs.safe_push (pd); first_range.offset = pd.offset; first_range.size = pd.size; - first_vuse = vuse; + first_set = set; + first_base_set = base_set; last_vuse_ptr = NULL; + /* Continue looking for partial defs. */ + return NULL; + } + + if (!known_ranges) + { + /* ??? Optimize the case where the 2nd partial def completes things. */ + gcc_obstack_init (&ranges_obstack); + known_ranges = splay_tree_new_with_allocator (pd_range_compare, 0, 0, + pd_tree_alloc, + pd_tree_dealloc, this); + splay_tree_insert (known_ranges, + (splay_tree_key)&first_range.offset, + (splay_tree_value)&first_range); + } + + pd_range newr = { pd.offset, pd.size }; + splay_tree_node n; + pd_range *r; + /* Lookup the predecessor of offset + 1 and see if we need to merge. */ + HOST_WIDE_INT loffset = newr.offset + 1; + if ((n = splay_tree_predecessor (known_ranges, (splay_tree_key)&loffset)) + && ((r = (pd_range *)n->value), true) + && ranges_known_overlap_p (r->offset, r->size + 1, + newr.offset, newr.size)) + { + /* Ignore partial defs already covered. Here we also drop shadowed + clobbers arriving here at the floor. */ + if (known_subrange_p (newr.offset, newr.size, r->offset, r->size)) + return NULL; + r->size = MAX (r->offset + r->size, newr.offset + newr.size) - r->offset; } else { - if (!known_ranges) - { - /* ??? Optimize the case where the second partial def - completes things. */ - gcc_obstack_init (&ranges_obstack); - known_ranges - = splay_tree_new_with_allocator (pd_range_compare, 0, 0, - pd_tree_alloc, - pd_tree_dealloc, this); - splay_tree_insert (known_ranges, - (splay_tree_key)&first_range.offset, - (splay_tree_value)&first_range); - } - if (known_ranges) - { - pd_range newr = { pd.offset, pd.size }; - splay_tree_node n; - pd_range *r; - /* Lookup the predecessor of offset + 1 and see if - we need to merge with it. */ - HOST_WIDE_INT loffset = newr.offset + 1; - if ((n = splay_tree_predecessor (known_ranges, - (splay_tree_key)&loffset)) - && ((r = (pd_range *)n->value), true) - && ranges_known_overlap_p (r->offset, r->size + 1, - newr.offset, newr.size)) - { - /* Ignore partial defs already covered. */ - if (known_subrange_p (newr.offset, newr.size, - r->offset, r->size)) - return NULL; - r->size = MAX (r->offset + r->size, - newr.offset + newr.size) - r->offset; - } + /* newr.offset wasn't covered yet, insert the range. */ + r = XOBNEW (&ranges_obstack, pd_range); + *r = newr; + splay_tree_insert (known_ranges, (splay_tree_key)&r->offset, + (splay_tree_value)r); + } + /* Merge r which now contains newr and is a member of the splay tree with + adjacent overlapping ranges. */ + pd_range *rafter; + while ((n = splay_tree_successor (known_ranges, (splay_tree_key)&r->offset)) + && ((rafter = (pd_range *)n->value), true) + && ranges_known_overlap_p (r->offset, r->size + 1, + rafter->offset, rafter->size)) + { + r->size = MAX (r->offset + r->size, + rafter->offset + rafter->size) - r->offset; + splay_tree_remove (known_ranges, (splay_tree_key)&rafter->offset); + } + /* If we get a clobber, fail. */ + if (TREE_CLOBBER_P (pd.rhs)) + return (void *)-1; + /* Non-constants are OK as long as they are shadowed by a constant. */ + if (!pd_constant_p) + return (void *)-1; + partial_defs.safe_push (pd); + + /* Now we have merged newr into the range tree. When we have covered + [offseti, sizei] then the tree will contain exactly one node which has + the desired properties and it will be 'r'. */ + if (!known_subrange_p (0, maxsizei, r->offset, r->size)) + /* Continue looking for partial defs. */ + return NULL; + + /* Now simply native encode all partial defs in reverse order. */ + unsigned ndefs = partial_defs.length (); + /* We support up to 512-bit values (for V8DFmode). */ + unsigned char buffer[bufsize + 1]; + unsigned char this_buffer[bufsize + 1]; + int len; + + memset (buffer, 0, bufsize + 1); + unsigned needed_len = ROUND_UP (maxsizei, BITS_PER_UNIT) / BITS_PER_UNIT; + while (!partial_defs.is_empty ()) + { + pd_data pd = partial_defs.pop (); + unsigned int amnt; + if (TREE_CODE (pd.rhs) == CONSTRUCTOR) + { + /* Empty CONSTRUCTOR. */ + if (pd.size >= needed_len * BITS_PER_UNIT) + len = needed_len; else + len = ROUND_UP (pd.size, BITS_PER_UNIT) / BITS_PER_UNIT; + memset (this_buffer, 0, len); + } + else + { + len = native_encode_expr (pd.rhs, this_buffer, bufsize, + MAX (0, -pd.offset) / BITS_PER_UNIT); + if (len <= 0 + || len < (ROUND_UP (pd.size, BITS_PER_UNIT) / BITS_PER_UNIT + - MAX (0, -pd.offset) / BITS_PER_UNIT)) { - /* newr.offset wasn't covered yet, insert the - range. */ - r = XOBNEW (&ranges_obstack, pd_range); - *r = newr; - splay_tree_insert (known_ranges, - (splay_tree_key)&r->offset, - (splay_tree_value)r); - } - /* Merge r which now contains newr and is a member - of the splay tree with adjacent overlapping ranges. */ - pd_range *rafter; - while ((n = splay_tree_successor (known_ranges, - (splay_tree_key)&r->offset)) - && ((rafter = (pd_range *)n->value), true) - && ranges_known_overlap_p (r->offset, r->size + 1, - rafter->offset, rafter->size)) - { - r->size = MAX (r->offset + r->size, - rafter->offset + rafter->size) - r->offset; - splay_tree_remove (known_ranges, - (splay_tree_key)&rafter->offset); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Failed to encode %u " + "partial definitions\n", ndefs); + return (void *)-1; } - partial_defs.safe_push (pd); - - /* Now we have merged newr into the range tree. - When we have covered [offseti, sizei] then the - tree will contain exactly one node which has - the desired properties and it will be 'r'. */ - if (known_subrange_p (0, maxsizei / BITS_PER_UNIT, - r->offset, r->size)) - { - /* Now simply native encode all partial defs - in reverse order. */ - unsigned ndefs = partial_defs.length (); - /* We support up to 512-bit values (for V8DFmode). */ - unsigned char buffer[64]; - int len; + } - while (!partial_defs.is_empty ()) + unsigned char *p = buffer; + HOST_WIDE_INT size = pd.size; + if (pd.offset < 0) + size -= ROUND_DOWN (-pd.offset, BITS_PER_UNIT); + this_buffer[len] = 0; + if (BYTES_BIG_ENDIAN) + { + /* LSB of this_buffer[len - 1] byte should be at + pd.offset + pd.size - 1 bits in buffer. */ + amnt = ((unsigned HOST_WIDE_INT) pd.offset + + pd.size) % BITS_PER_UNIT; + if (amnt) + shift_bytes_in_array_right (this_buffer, len + 1, amnt); + unsigned char *q = this_buffer; + unsigned int off = 0; + if (pd.offset >= 0) + { + unsigned int msk; + off = pd.offset / BITS_PER_UNIT; + gcc_assert (off < needed_len); + p = buffer + off; + if (size <= amnt) { - pd_data pd = partial_defs.pop (); - if (TREE_CODE (pd.rhs) == CONSTRUCTOR) - /* Empty CONSTRUCTOR. */ - memset (buffer + MAX (0, pd.offset), - 0, MIN ((HOST_WIDE_INT)sizeof (buffer), pd.size)); - else + msk = ((1 << size) - 1) << (BITS_PER_UNIT - amnt); + *p = (*p & ~msk) | (this_buffer[len] & msk); + size = 0; + } + else + { + if (TREE_CODE (pd.rhs) != CONSTRUCTOR) + q = (this_buffer + len + - (ROUND_UP (size - amnt, BITS_PER_UNIT) + / BITS_PER_UNIT)); + if (pd.offset % BITS_PER_UNIT) { - unsigned pad = 0; - if (BYTES_BIG_ENDIAN - && is_a (TYPE_MODE (TREE_TYPE (pd.rhs)))) - { - /* On big-endian the padding is at the 'front' so - just skip the initial bytes. */ - fixed_size_mode mode = as_a - (TYPE_MODE (TREE_TYPE (pd.rhs))); - pad = GET_MODE_SIZE (mode) - pd.size; - } - len = native_encode_expr (pd.rhs, - buffer + MAX (0, pd.offset), - sizeof (buffer - MAX (0, pd.offset)), - MAX (0, -pd.offset) + pad); - if (len <= 0 - || len < (pd.size - MAX (0, -pd.offset))) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Failed to encode %u " - "partial definitions\n", ndefs); - return (void *)-1; - } + msk = -1U << (BITS_PER_UNIT + - (pd.offset % BITS_PER_UNIT)); + *p = (*p & msk) | (*q & ~msk); + p++; + q++; + off++; + size -= BITS_PER_UNIT - (pd.offset % BITS_PER_UNIT); + gcc_assert (size >= 0); } } - - tree type = vr->type; - /* Make sure to interpret in a type that has a range - covering the whole access size. */ - if (INTEGRAL_TYPE_P (vr->type) - && maxsizei != TYPE_PRECISION (vr->type)) - type = build_nonstandard_integer_type (maxsizei, - TYPE_UNSIGNED (type)); - tree val = native_interpret_expr (type, buffer, - maxsizei / BITS_PER_UNIT); - /* If we chop off bits because the types precision doesn't - match the memory access size this is ok when optimizing - reads but not when called from the DSE code during - elimination. */ - if (val - && type != vr->type) + } + else if (TREE_CODE (pd.rhs) != CONSTRUCTOR) + { + q = (this_buffer + len + - (ROUND_UP (size - amnt, BITS_PER_UNIT) + / BITS_PER_UNIT)); + if (pd.offset % BITS_PER_UNIT) { - if (! int_fits_type_p (val, vr->type)) - val = NULL_TREE; - else - val = fold_convert (vr->type, val); + q++; + size -= BITS_PER_UNIT - ((unsigned HOST_WIDE_INT) pd.offset + % BITS_PER_UNIT); + gcc_assert (size >= 0); } - - if (val) + } + if ((unsigned HOST_WIDE_INT) size / BITS_PER_UNIT + off + > needed_len) + size = (needed_len - off) * BITS_PER_UNIT; + memcpy (p, q, size / BITS_PER_UNIT); + if (size % BITS_PER_UNIT) + { + unsigned int msk + = -1U << (BITS_PER_UNIT - (size % BITS_PER_UNIT)); + p += size / BITS_PER_UNIT; + q += size / BITS_PER_UNIT; + *p = (*q & msk) | (*p & ~msk); + } + } + else + { + size = MIN (size, (HOST_WIDE_INT) needed_len * BITS_PER_UNIT); + if (pd.offset >= 0) + { + /* LSB of this_buffer[0] byte should be at pd.offset bits + in buffer. */ + unsigned int msk; + amnt = pd.offset % BITS_PER_UNIT; + if (amnt) + shift_bytes_in_array_left (this_buffer, len + 1, amnt); + unsigned int off = pd.offset / BITS_PER_UNIT; + gcc_assert (off < needed_len); + size = MIN (size, + (HOST_WIDE_INT) (needed_len - off) * BITS_PER_UNIT); + p = buffer + off; + if (amnt + size < BITS_PER_UNIT) { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Successfully combined %u " - "partial definitions\n", ndefs); - return vn_reference_lookup_or_insert_for_pieces - (first_vuse, - vr->set, vr->type, vr->operands, val); + /* Low amnt bits come from *p, then size bits + from this_buffer[0] and the remaining again from + *p. */ + msk = ((1 << size) - 1) << amnt; + *p = (*p & ~msk) | (this_buffer[0] & msk); + size = 0; } - else + else if (amnt) { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Failed to interpret %u " - "encoded partial definitions\n", ndefs); - return (void *)-1; + msk = -1U << amnt; + *p = (*p & ~msk) | (this_buffer[0] & msk); + p++; + size -= (BITS_PER_UNIT - amnt); } } + else + { + amnt = (unsigned HOST_WIDE_INT) pd.offset % BITS_PER_UNIT; + if (amnt) + shift_bytes_in_array_left (this_buffer, len + 1, amnt); + } + memcpy (p, this_buffer + (amnt != 0), size / BITS_PER_UNIT); + p += size / BITS_PER_UNIT; + if (size % BITS_PER_UNIT) + { + unsigned int msk = -1U << (size % BITS_PER_UNIT); + *p = (this_buffer[(amnt != 0) + size / BITS_PER_UNIT] + & ~msk) | (*p & msk); + } } } - /* Continue looking for partial defs. */ - return NULL; + + tree type = vr->type; + /* Make sure to interpret in a type that has a range covering the whole + access size. */ + if (INTEGRAL_TYPE_P (vr->type) && maxsizei != TYPE_PRECISION (vr->type)) + type = build_nonstandard_integer_type (maxsizei, TYPE_UNSIGNED (type)); + tree val; + if (BYTES_BIG_ENDIAN) + { + unsigned sz = needed_len; + if (maxsizei % BITS_PER_UNIT) + shift_bytes_in_array_right (buffer, needed_len, + BITS_PER_UNIT + - (maxsizei % BITS_PER_UNIT)); + if (INTEGRAL_TYPE_P (type)) + sz = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (type)); + if (sz > needed_len) + { + memcpy (this_buffer + (sz - needed_len), buffer, needed_len); + val = native_interpret_expr (type, this_buffer, sz); + } + else + val = native_interpret_expr (type, buffer, needed_len); + } + else + val = native_interpret_expr (type, buffer, bufsize); + /* If we chop off bits because the types precision doesn't match the memory + access size this is ok when optimizing reads but not when called from + the DSE code during elimination. */ + if (val && type != vr->type) + { + if (! int_fits_type_p (val, vr->type)) + val = NULL_TREE; + else + val = fold_convert (vr->type, val); + } + + if (val) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Successfully combined %u partial definitions\n", ndefs); + /* We are using the alias-set of the first store we encounter which + should be appropriate here. */ + return finish (first_set, first_base_set, val); + } + else + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Failed to interpret %u encoded partial definitions\n", ndefs); + return (void *)-1; + } } /* Callback for walk_non_aliased_vuses. Adjusts the vn_reference_t VR_ @@ -1919,7 +2167,10 @@ vn_reference_lookup_2 (ao_ref *op ATTRIBUTE_UNUSED, tree vuse, void *data_) return NULL; if (data->last_vuse_ptr) - *data->last_vuse_ptr = vuse; + { + *data->last_vuse_ptr = vuse; + data->last_vuse = vuse; + } /* Fixup vuse and hash. */ if (vr->vuse) @@ -1931,7 +2182,11 @@ vn_reference_lookup_2 (ao_ref *op ATTRIBUTE_UNUSED, tree vuse, void *data_) hash = vr->hashcode; slot = valid_info->references->find_slot_with_hash (vr, hash, NO_INSERT); if (slot) - return *slot; + { + if ((*slot)->result && data->saved_operands.exists ()) + return data->finish (vr->set, vr->base_set, (*slot)->result); + return *slot; + } return NULL; } @@ -1943,6 +2198,7 @@ vn_reference_lookup_2 (ao_ref *op ATTRIBUTE_UNUSED, tree vuse, void *data_) static vn_reference_t vn_reference_lookup_or_insert_for_pieces (tree vuse, alias_set_type set, + alias_set_type base_set, tree type, vec operands, @@ -1955,6 +2211,7 @@ vn_reference_lookup_or_insert_for_pieces (tree vuse, vr1.operands = operands; vr1.type = type; vr1.set = set; + vr1.base_set = base_set; vr1.hashcode = vn_reference_compute_hash (&vr1); if (vn_reference_lookup_1 (&vr1, &result)) return result; @@ -1962,7 +2219,7 @@ vn_reference_lookup_or_insert_for_pieces (tree vuse, value_id = VN_INFO (value)->value_id; else value_id = get_or_alloc_constant_value_id (value); - return vn_reference_insert_pieces (vuse, set, type, + return vn_reference_insert_pieces (vuse, set, base_set, type, operands.copy (), value, value_id); } @@ -2149,40 +2406,21 @@ class rpo_elim : public eliminate_dom_walker { public: rpo_elim(basic_block entry_) - : eliminate_dom_walker (CDI_DOMINATORS, NULL), entry (entry_) {} - ~rpo_elim(); + : eliminate_dom_walker (CDI_DOMINATORS, NULL), entry (entry_), + m_avail_freelist (NULL) {} virtual tree eliminate_avail (basic_block, tree op); virtual void eliminate_push_avail (basic_block, tree); basic_block entry; - /* Instead of having a local availability lattice for each - basic-block and availability at X defined as union of - the local availabilities at X and its dominators we're - turning this upside down and track availability per - value given values are usually made available at very - few points (at least one). - So we have a value -> vec map where - LOCATION is specifying the basic-block LEADER is made - available for VALUE. We push to this vector in RPO - order thus for iteration we can simply pop the last - entries. - LOCATION is the basic-block index and LEADER is its - SSA name version. */ - /* ??? We'd like to use auto_vec here with embedded storage - but that doesn't play well until we can provide move - constructors and use std::move on hash-table expansion. - So for now this is a bit more expensive than necessary. - We eventually want to switch to a chaining scheme like - for hashtable entries for unwinding which would make - making the vector part of the vn_ssa_aux structure possible. */ - typedef hash_map > > rpo_avail_t; - rpo_avail_t m_rpo_avail; + /* Freelist of avail entries which are allocated from the vn_ssa_aux + obstack. */ + vn_avail *m_avail_freelist; }; /* Global RPO state for access from hooks. */ -static rpo_elim *rpo_avail; +static eliminate_dom_walker *rpo_avail; basic_block vn_context_bb; /* Return true if BASE1 and BASE2 can be adjusted so they have the @@ -2221,13 +2459,13 @@ adjust_offsets_for_equal_base_address (tree base1, poly_int64 *offset1, static void * vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, - bool *disambiguate_only) + translate_flags *disambiguate_only) { vn_walk_cb_data *data = (vn_walk_cb_data *)data_; vn_reference_t vr = data->vr; gimple *def_stmt = SSA_NAME_DEF_STMT (vuse); tree base = ao_ref_base (ref); - HOST_WIDE_INT offseti, maxsizei; + HOST_WIDE_INT offseti = 0, maxsizei, sizei = 0; static vec lhs_ops; ao_ref lhs_ref; bool lhs_ref_ok = false; @@ -2242,25 +2480,22 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, lhs_ops.truncate (0); basic_block saved_rpo_bb = vn_context_bb; vn_context_bb = gimple_bb (def_stmt); - copy_reference_ops_from_ref (lhs, &lhs_ops); - lhs_ops = valueize_refs_1 (lhs_ops, &valueized_anything, true); - vn_context_bb = saved_rpo_bb; - if (valueized_anything) + if (*disambiguate_only <= TR_VALUEIZE_AND_DISAMBIGUATE) { - lhs_ref_ok = ao_ref_init_from_vn_reference (&lhs_ref, - get_alias_set (lhs), - TREE_TYPE (lhs), lhs_ops); - if (lhs_ref_ok - && !refs_may_alias_p_1 (ref, &lhs_ref, data->tbaa_p)) - { - *disambiguate_only = true; - return NULL; - } + copy_reference_ops_from_ref (lhs, &lhs_ops); + lhs_ops = valueize_refs_1 (lhs_ops, &valueized_anything, true); } - else + vn_context_bb = saved_rpo_bb; + ao_ref_init (&lhs_ref, lhs); + lhs_ref_ok = true; + if (valueized_anything + && ao_ref_init_from_vn_reference + (&lhs_ref, ao_ref_alias_set (&lhs_ref), + ao_ref_base_alias_set (&lhs_ref), TREE_TYPE (lhs), lhs_ops) + && !refs_may_alias_p_1 (ref, &lhs_ref, data->tbaa_p)) { - ao_ref_init (&lhs_ref, lhs); - lhs_ref_ok = true; + *disambiguate_only = TR_VALUEIZE_AND_DISAMBIGUATE; + return NULL; } /* Besides valueizing the LHS we can also use access-path based @@ -2280,7 +2515,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, } if (!refs_may_alias_p_1 (&data->orig_ref, lref, data->tbaa_p)) { - *disambiguate_only = true; + *disambiguate_only = (valueized_anything + ? TR_VALUEIZE_AND_DISAMBIGUATE + : TR_DISAMBIGUATE); return NULL; } } @@ -2291,7 +2528,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, and return the found value. */ if (is_gimple_reg_type (TREE_TYPE (lhs)) && types_compatible_p (TREE_TYPE (lhs), vr->type) - && ref->ref) + && (ref->ref || data->orig_ref.ref)) { tree *saved_last_vuse_ptr = data->last_vuse_ptr; /* Do not update last_vuse_ptr in vn_reference_lookup_2. */ @@ -2316,13 +2553,16 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, -fno-strict-aliasing. So simply resort to alignment to rule out overlaps. Do this check last because it is quite expensive compared to the hash-lookup above. */ - && multiple_p (get_object_alignment (ref->ref), ref->size) + && multiple_p (get_object_alignment + (ref->ref ? ref->ref : data->orig_ref.ref), + ref->size) && multiple_p (get_object_alignment (lhs), ref->size)) return res; } } } - else if (gimple_call_builtin_p (def_stmt, BUILT_IN_NORMAL) + else if (*disambiguate_only <= TR_VALUEIZE_AND_DISAMBIGUATE + && gimple_call_builtin_p (def_stmt, BUILT_IN_NORMAL) && gimple_call_num_args (def_stmt) <= 4) { /* For builtin calls valueize its arguments and call the @@ -2351,15 +2591,13 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, gimple_call_set_arg (def_stmt, i, oldargs[i]); if (!res) { - *disambiguate_only = true; + *disambiguate_only = TR_VALUEIZE_AND_DISAMBIGUATE; return NULL; } } } - /* If we are looking for redundant stores do not create new hashtable - entries from aliasing defs with made up alias-sets. */ - if (*disambiguate_only || !data->tbaa_p) + if (*disambiguate_only > TR_TRANSLATE) return (void *)-1; /* If we cannot constrain the size of the reference we cannot @@ -2370,22 +2608,25 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, poly_int64 offset = ref->offset; poly_int64 maxsize = ref->max_size; - /* We can't deduce anything useful from clobbers. */ - if (gimple_clobber_p (def_stmt)) - return (void *)-1; - /* def_stmt may-defs *ref. See if we can derive a value for *ref from that definition. 1) Memset. */ if (is_gimple_reg_type (vr->type) - && gimple_call_builtin_p (def_stmt, BUILT_IN_MEMSET) + && (gimple_call_builtin_p (def_stmt, BUILT_IN_MEMSET) + || gimple_call_builtin_p (def_stmt, BUILT_IN_MEMSET_CHK)) && (integer_zerop (gimple_call_arg (def_stmt, 1)) || ((TREE_CODE (gimple_call_arg (def_stmt, 1)) == INTEGER_CST || (INTEGRAL_TYPE_P (vr->type) && known_eq (ref->size, 8))) - && CHAR_BIT == 8 && BITS_PER_UNIT == 8 + && CHAR_BIT == 8 + && BITS_PER_UNIT == 8 + && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN && offset.is_constant (&offseti) - && offseti % BITS_PER_UNIT == 0)) - && poly_int_tree_p (gimple_call_arg (def_stmt, 2)) + && ref->size.is_constant (&sizei) + && (offseti % BITS_PER_UNIT == 0 + || TREE_CODE (gimple_call_arg (def_stmt, 1)) == INTEGER_CST))) + && (poly_int_tree_p (gimple_call_arg (def_stmt, 2)) + || (TREE_CODE (gimple_call_arg (def_stmt, 2)) == SSA_NAME + && poly_int_tree_p (SSA_VAL (gimple_call_arg (def_stmt, 2))))) && (TREE_CODE (gimple_call_arg (def_stmt, 0)) == ADDR_EXPR || TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME)) { @@ -2420,7 +2661,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, { poly_int64 soff; if (TREE_CODE (base) != MEM_REF - || !(mem_ref_offset (base) << LOG2_BITS_PER_UNIT).to_shwi (&soff)) + || !(mem_ref_offset (base) + << LOG2_BITS_PER_UNIT).to_shwi (&soff)) return (void *)-1; offset += soff; offset2 = 0; @@ -2430,10 +2672,13 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, if (is_gimple_assign (def) && gimple_assign_rhs_code (def) == POINTER_PLUS_EXPR && gimple_assign_rhs1 (def) == TREE_OPERAND (base, 0) - && poly_int_tree_p (gimple_assign_rhs2 (def)) - && (wi::to_poly_offset (gimple_assign_rhs2 (def)) - << LOG2_BITS_PER_UNIT).to_shwi (&offset2)) + && poly_int_tree_p (gimple_assign_rhs2 (def))) { + tree rhs2 = gimple_assign_rhs2 (def); + if (!(poly_offset_int::from (wi::to_poly_wide (rhs2), + SIGNED) + << LOG2_BITS_PER_UNIT).to_shwi (&offset2)) + return (void *)-1; ref2 = gimple_assign_rhs1 (def); if (TREE_CODE (ref2) == SSA_NAME) ref2 = SSA_VAL (ref2); @@ -2445,7 +2690,15 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, else return (void *)-1; tree len = gimple_call_arg (def_stmt, 2); - HOST_WIDE_INT leni, offset2i, offseti; + HOST_WIDE_INT leni, offset2i; + if (TREE_CODE (len) == SSA_NAME) + len = SSA_VAL (len); + /* Sometimes the above trickery is smarter than alias analysis. Take + advantage of that. */ + if (!ranges_maybe_overlap_p (offset, maxsize, offset2, + (wi::to_poly_offset (len) + << LOG2_BITS_PER_UNIT))) + return NULL; if (data->partial_defs.is_empty () && known_subrange_p (offset, maxsize, offset2, wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT)) @@ -2454,7 +2707,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, if (integer_zerop (gimple_call_arg (def_stmt, 1))) val = build_zero_cst (vr->type); else if (INTEGRAL_TYPE_P (vr->type) - && known_eq (ref->size, 8)) + && known_eq (ref->size, 8) + && offseti % BITS_PER_UNIT == 0) { gimple_match_op res_op (gimple_match_cond::UNCOND, NOP_EXPR, vr->type, gimple_call_arg (def_stmt, 1)); @@ -2466,29 +2720,57 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, } else { - unsigned len = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (vr->type)); - unsigned char *buf = XALLOCAVEC (unsigned char, len); + unsigned buflen = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (vr->type)) + 1; + if (INTEGRAL_TYPE_P (vr->type)) + buflen = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (vr->type)) + 1; + unsigned char *buf = XALLOCAVEC (unsigned char, buflen); memset (buf, TREE_INT_CST_LOW (gimple_call_arg (def_stmt, 1)), - len); - val = native_interpret_expr (vr->type, buf, len); + buflen); + if (BYTES_BIG_ENDIAN) + { + unsigned int amnt + = (((unsigned HOST_WIDE_INT) offseti + sizei) + % BITS_PER_UNIT); + if (amnt) + { + shift_bytes_in_array_right (buf, buflen, + BITS_PER_UNIT - amnt); + buf++; + buflen--; + } + } + else if (offseti % BITS_PER_UNIT != 0) + { + unsigned int amnt + = BITS_PER_UNIT - ((unsigned HOST_WIDE_INT) offseti + % BITS_PER_UNIT); + shift_bytes_in_array_left (buf, buflen, amnt); + buf++; + buflen--; + } + val = native_interpret_expr (vr->type, buf, buflen); if (!val) return (void *)-1; } - return vn_reference_lookup_or_insert_for_pieces - (vuse, vr->set, vr->type, vr->operands, val); + return data->finish (0, 0, val); } /* For now handle clearing memory with partial defs. */ - else if (integer_zerop (gimple_call_arg (def_stmt, 1)) + else if (known_eq (ref->size, maxsize) + && integer_zerop (gimple_call_arg (def_stmt, 1)) + && tree_fits_poly_int64_p (len) && tree_to_poly_int64 (len).is_constant (&leni) + && leni <= INTTYPE_MAXIMUM (HOST_WIDE_INT) / BITS_PER_UNIT && offset.is_constant (&offseti) && offset2.is_constant (&offset2i) - && maxsize.is_constant (&maxsizei)) + && maxsize.is_constant (&maxsizei) + && ranges_known_overlap_p (offseti, maxsizei, offset2i, + leni << LOG2_BITS_PER_UNIT)) { pd_data pd; pd.rhs = build_constructor (NULL_TREE, NULL); - pd.offset = (offset2i - offseti) / BITS_PER_UNIT; - pd.size = leni; - return data->push_partial_def (pd, vuse, maxsizei); + pd.offset = offset2i; + pd.size = leni << LOG2_BITS_PER_UNIT; + return data->push_partial_def (pd, 0, 0, offseti, maxsizei); } } @@ -2498,22 +2780,14 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (def_stmt)) == 0) { - tree lhs = gimple_assign_lhs (def_stmt); tree base2; poly_int64 offset2, size2, maxsize2; HOST_WIDE_INT offset2i, size2i; - bool reverse; - if (lhs_ref_ok) - { - base2 = ao_ref_base (&lhs_ref); - offset2 = lhs_ref.offset; - size2 = lhs_ref.size; - maxsize2 = lhs_ref.max_size; - reverse = reverse_storage_order_for_component_p (lhs); - } - else - base2 = get_ref_base_and_extent (lhs, - &offset2, &size2, &maxsize2, &reverse); + gcc_assert (lhs_ref_ok); + base2 = ao_ref_base (&lhs_ref); + offset2 = lhs_ref.offset; + size2 = lhs_ref.size; + maxsize2 = lhs_ref.max_size; if (known_size_p (maxsize2) && known_eq (maxsize2, size2) && adjust_offsets_for_equal_base_address (base, &offset, @@ -2522,24 +2796,32 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, if (data->partial_defs.is_empty () && known_subrange_p (offset, maxsize, offset2, size2)) { + /* While technically undefined behavior do not optimize + a full read from a clobber. */ + if (gimple_clobber_p (def_stmt)) + return (void *)-1; tree val = build_zero_cst (vr->type); - return vn_reference_lookup_or_insert_for_pieces - (vuse, vr->set, vr->type, vr->operands, val); + return data->finish (ao_ref_alias_set (&lhs_ref), + ao_ref_base_alias_set (&lhs_ref), val); } - else if (maxsize.is_constant (&maxsizei) - && maxsizei % BITS_PER_UNIT == 0 + else if (known_eq (ref->size, maxsize) + && maxsize.is_constant (&maxsizei) && offset.is_constant (&offseti) - && offseti % BITS_PER_UNIT == 0 && offset2.is_constant (&offset2i) - && offset2i % BITS_PER_UNIT == 0 && size2.is_constant (&size2i) - && size2i % BITS_PER_UNIT == 0) + && ranges_known_overlap_p (offseti, maxsizei, + offset2i, size2i)) { + /* Let clobbers be consumed by the partial-def tracker + which can choose to ignore them if they are shadowed + by a later def. */ pd_data pd; pd.rhs = gimple_assign_rhs1 (def_stmt); - pd.offset = (offset2i - offseti) / BITS_PER_UNIT; - pd.size = size2i / BITS_PER_UNIT; - return data->push_partial_def (pd, vuse, maxsizei); + pd.offset = offset2i; + pd.size = size2i; + return data->push_partial_def (pd, ao_ref_alias_set (&lhs_ref), + ao_ref_base_alias_set (&lhs_ref), + offseti, maxsizei); } } } @@ -2550,13 +2832,13 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, && is_gimple_reg_type (vr->type) && !contains_storage_order_barrier_p (vr->operands) && gimple_assign_single_p (def_stmt) - && CHAR_BIT == 8 && BITS_PER_UNIT == 8 + && CHAR_BIT == 8 + && BITS_PER_UNIT == 8 + && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN /* native_encode and native_decode operate on arrays of bytes and so fundamentally need a compile-time size and offset. */ && maxsize.is_constant (&maxsizei) - && maxsizei % BITS_PER_UNIT == 0 && offset.is_constant (&offseti) - && offseti % BITS_PER_UNIT == 0 && (is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)) || (TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME && is_gimple_min_invariant (SSA_VAL (gimple_assign_rhs1 (def_stmt)))))) @@ -2566,22 +2848,16 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, poly_int64 offset2, size2, maxsize2; HOST_WIDE_INT offset2i, size2i; bool reverse; - if (lhs_ref_ok) - { - base2 = ao_ref_base (&lhs_ref); - offset2 = lhs_ref.offset; - size2 = lhs_ref.size; - maxsize2 = lhs_ref.max_size; - reverse = reverse_storage_order_for_component_p (lhs); - } - else - base2 = get_ref_base_and_extent (lhs, - &offset2, &size2, &maxsize2, &reverse); + gcc_assert (lhs_ref_ok); + base2 = ao_ref_base (&lhs_ref); + offset2 = lhs_ref.offset; + size2 = lhs_ref.size; + maxsize2 = lhs_ref.max_size; + reverse = reverse_storage_order_for_component_p (lhs); if (base2 && !reverse + && !storage_order_barrier_p (lhs) && known_eq (maxsize2, size2) - && multiple_p (size2, BITS_PER_UNIT) - && multiple_p (offset2, BITS_PER_UNIT) && adjust_offsets_for_equal_base_address (base, &offset, base2, &offset2) && offset.is_constant (&offseti) @@ -2592,37 +2868,80 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, && known_subrange_p (offseti, maxsizei, offset2, size2)) { /* We support up to 512-bit values (for V8DFmode). */ - unsigned char buffer[64]; + unsigned char buffer[65]; int len; tree rhs = gimple_assign_rhs1 (def_stmt); if (TREE_CODE (rhs) == SSA_NAME) rhs = SSA_VAL (rhs); - unsigned pad = 0; - if (BYTES_BIG_ENDIAN - && is_a (TYPE_MODE (TREE_TYPE (rhs)))) - { - /* On big-endian the padding is at the 'front' so - just skip the initial bytes. */ - fixed_size_mode mode - = as_a (TYPE_MODE (TREE_TYPE (rhs))); - pad = GET_MODE_SIZE (mode) - size2i / BITS_PER_UNIT; - } len = native_encode_expr (rhs, - buffer, sizeof (buffer), - ((offseti - offset2i) / BITS_PER_UNIT - + pad)); + buffer, sizeof (buffer) - 1, + (offseti - offset2i) / BITS_PER_UNIT); if (len > 0 && len * BITS_PER_UNIT >= maxsizei) { tree type = vr->type; + unsigned char *buf = buffer; + unsigned int amnt = 0; /* Make sure to interpret in a type that has a range covering the whole access size. */ if (INTEGRAL_TYPE_P (vr->type) && maxsizei != TYPE_PRECISION (vr->type)) type = build_nonstandard_integer_type (maxsizei, TYPE_UNSIGNED (type)); - tree val = native_interpret_expr (type, buffer, - maxsizei / BITS_PER_UNIT); + if (BYTES_BIG_ENDIAN) + { + /* For big-endian native_encode_expr stored the rhs + such that the LSB of it is the LSB of buffer[len - 1]. + That bit is stored into memory at position + offset2 + size2 - 1, i.e. in byte + base + (offset2 + size2 - 1) / BITS_PER_UNIT. + E.g. for offset2 1 and size2 14, rhs -1 and memory + previously cleared that is: + 0 1 + 01111111|11111110 + Now, if we want to extract offset 2 and size 12 from + it using native_interpret_expr (which actually works + for integral bitfield types in terms of byte size of + the mode), the native_encode_expr stored the value + into buffer as + XX111111|11111111 + and returned len 2 (the X bits are outside of + precision). + Let sz be maxsize / BITS_PER_UNIT if not extracting + a bitfield, and GET_MODE_SIZE otherwise. + We need to align the LSB of the value we want to + extract as the LSB of buf[sz - 1]. + The LSB from memory we need to read is at position + offset + maxsize - 1. */ + HOST_WIDE_INT sz = maxsizei / BITS_PER_UNIT; + if (INTEGRAL_TYPE_P (type)) + sz = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (type)); + amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i + - offseti - maxsizei) % BITS_PER_UNIT; + if (amnt) + shift_bytes_in_array_right (buffer, len, amnt); + amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i + - offseti - maxsizei - amnt) / BITS_PER_UNIT; + if ((unsigned HOST_WIDE_INT) sz + amnt > (unsigned) len) + len = 0; + else + { + buf = buffer + len - sz - amnt; + len -= (buf - buffer); + } + } + else + { + amnt = ((unsigned HOST_WIDE_INT) offset2i + - offseti) % BITS_PER_UNIT; + if (amnt) + { + buffer[len] = 0; + shift_bytes_in_array_left (buffer, len + 1, amnt); + buf = buffer + 1; + } + } + tree val = native_interpret_expr (type, buf, len); /* If we chop off bits because the types precision doesn't match the memory access size this is ok when optimizing reads but not when called from the DSE code during @@ -2637,82 +2956,101 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, } if (val) - return vn_reference_lookup_or_insert_for_pieces - (vuse, vr->set, vr->type, vr->operands, val); + return data->finish (ao_ref_alias_set (&lhs_ref), + ao_ref_base_alias_set (&lhs_ref), val); } } - else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, size2i)) + else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, + size2i)) { pd_data pd; tree rhs = gimple_assign_rhs1 (def_stmt); if (TREE_CODE (rhs) == SSA_NAME) rhs = SSA_VAL (rhs); pd.rhs = rhs; - pd.offset = (offset2i - offseti) / BITS_PER_UNIT; - pd.size = size2i / BITS_PER_UNIT; - return data->push_partial_def (pd, vuse, maxsizei); + pd.offset = offset2i; + pd.size = size2i; + return data->push_partial_def (pd, ao_ref_alias_set (&lhs_ref), + ao_ref_base_alias_set (&lhs_ref), + offseti, maxsizei); } } } /* 4) Assignment from an SSA name which definition we may be able - to access pieces from. */ + to access pieces from or we can combine to a larger entity. */ else if (known_eq (ref->size, maxsize) && is_gimple_reg_type (vr->type) && !contains_storage_order_barrier_p (vr->operands) && gimple_assign_single_p (def_stmt) - && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME - /* A subset of partial defs from non-constants can be handled - by for example inserting a CONSTRUCTOR, a COMPLEX_EXPR or - even a (series of) BIT_INSERT_EXPR hoping for simplifications - downstream, not so much for actually doing the insertion. */ - && data->partial_defs.is_empty ()) + && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME) { tree lhs = gimple_assign_lhs (def_stmt); tree base2; poly_int64 offset2, size2, maxsize2; + HOST_WIDE_INT offset2i, size2i, offseti; bool reverse; - if (lhs_ref_ok) - { - base2 = ao_ref_base (&lhs_ref); - offset2 = lhs_ref.offset; - size2 = lhs_ref.size; - maxsize2 = lhs_ref.max_size; - reverse = reverse_storage_order_for_component_p (lhs); - } - else - base2 = get_ref_base_and_extent (lhs, - &offset2, &size2, &maxsize2, &reverse); + gcc_assert (lhs_ref_ok); + base2 = ao_ref_base (&lhs_ref); + offset2 = lhs_ref.offset; + size2 = lhs_ref.size; + maxsize2 = lhs_ref.max_size; + reverse = reverse_storage_order_for_component_p (lhs); tree def_rhs = gimple_assign_rhs1 (def_stmt); if (!reverse + && !storage_order_barrier_p (lhs) && known_size_p (maxsize2) && known_eq (maxsize2, size2) && adjust_offsets_for_equal_base_address (base, &offset, - base2, &offset2) - && known_subrange_p (offset, maxsize, offset2, size2) - /* ??? We can't handle bitfield precision extracts without - either using an alternate type for the BIT_FIELD_REF and - then doing a conversion or possibly adjusting the offset - according to endianness. */ - && (! INTEGRAL_TYPE_P (vr->type) - || known_eq (ref->size, TYPE_PRECISION (vr->type))) - && multiple_p (ref->size, BITS_PER_UNIT) - && (! INTEGRAL_TYPE_P (TREE_TYPE (def_rhs)) - || type_has_mode_precision_p (TREE_TYPE (def_rhs)))) - { - gimple_match_op op (gimple_match_cond::UNCOND, - BIT_FIELD_REF, vr->type, - vn_valueize (def_rhs), - bitsize_int (ref->size), - bitsize_int (offset - offset2)); - tree val = vn_nary_build_or_lookup (&op); - if (val - && (TREE_CODE (val) != SSA_NAME - || ! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (val))) + base2, &offset2)) + { + if (data->partial_defs.is_empty () + && known_subrange_p (offset, maxsize, offset2, size2) + /* ??? We can't handle bitfield precision extracts without + either using an alternate type for the BIT_FIELD_REF and + then doing a conversion or possibly adjusting the offset + according to endianness. */ + && (! INTEGRAL_TYPE_P (vr->type) + || known_eq (ref->size, TYPE_PRECISION (vr->type))) + && multiple_p (ref->size, BITS_PER_UNIT)) + { + tree val = NULL_TREE; + if (! INTEGRAL_TYPE_P (TREE_TYPE (def_rhs)) + || type_has_mode_precision_p (TREE_TYPE (def_rhs))) + { + gimple_match_op op (gimple_match_cond::UNCOND, + BIT_FIELD_REF, vr->type, + SSA_VAL (def_rhs), + bitsize_int (ref->size), + bitsize_int (offset - offset2)); + val = vn_nary_build_or_lookup (&op); + } + else if (known_eq (ref->size, size2)) + { + gimple_match_op op (gimple_match_cond::UNCOND, + VIEW_CONVERT_EXPR, vr->type, + SSA_VAL (def_rhs)); + val = vn_nary_build_or_lookup (&op); + } + if (val + && (TREE_CODE (val) != SSA_NAME + || ! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (val))) + return data->finish (ao_ref_alias_set (&lhs_ref), + ao_ref_base_alias_set (&lhs_ref), val); + } + else if (maxsize.is_constant (&maxsizei) + && offset.is_constant (&offseti) + && offset2.is_constant (&offset2i) + && size2.is_constant (&size2i) + && ranges_known_overlap_p (offset, maxsize, offset2, size2)) { - vn_reference_t res = vn_reference_lookup_or_insert_for_pieces - (vuse, vr->set, vr->type, vr->operands, val); - return res; + pd_data pd; + pd.rhs = SSA_VAL (def_rhs); + pd.offset = offset2i; + pd.size = size2i; + return data->push_partial_def (pd, ao_ref_alias_set (&lhs_ref), + ao_ref_base_alias_set (&lhs_ref), + offseti, maxsizei); } } } @@ -2723,9 +3061,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, && gimple_assign_single_p (def_stmt) && (DECL_P (gimple_assign_rhs1 (def_stmt)) || TREE_CODE (gimple_assign_rhs1 (def_stmt)) == MEM_REF - || handled_component_p (gimple_assign_rhs1 (def_stmt))) - /* Handling this is more complicated, give up for now. */ - && data->partial_defs.is_empty ()) + || handled_component_p (gimple_assign_rhs1 (def_stmt)))) { tree base2; int i, j, k; @@ -2733,8 +3069,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, vn_reference_op_t vro; ao_ref r; - if (!lhs_ref_ok) - return (void *)-1; + gcc_assert (lhs_ref_ok); /* See if the assignment kills REF. */ base2 = ao_ref_base (&lhs_ref); @@ -2796,7 +3131,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, } /* Now re-write REF to be based on the rhs of the assignment. */ - copy_reference_ops_from_ref (gimple_assign_rhs1 (def_stmt), &rhs); + tree rhs1 = gimple_assign_rhs1 (def_stmt); + copy_reference_ops_from_ref (rhs1, &rhs); /* Apply an extra offset to the inner MEM_REF of the RHS. */ if (maybe_ne (extra_off, 0)) @@ -2813,6 +3149,11 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, extra_off)); } + /* Save the operands since we need to use the original ones for + the hash entry we use. */ + if (!data->saved_operands.exists ()) + data->saved_operands = vr->operands.copy (); + /* We need to pre-pend vr->operands[0..i] to rhs. */ vec old = vr->operands; if (i + 1 + rhs.length () > vr->operands.length ()) @@ -2829,11 +3170,38 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, /* Try folding the new reference to a constant. */ tree val = fully_constant_vn_reference_p (vr); if (val) - return vn_reference_lookup_or_insert_for_pieces - (vuse, vr->set, vr->type, vr->operands, val); + { + if (data->partial_defs.is_empty ()) + return data->finish (ao_ref_alias_set (&lhs_ref), + ao_ref_base_alias_set (&lhs_ref), val); + /* This is the only interesting case for partial-def handling + coming from targets that like to gimplify init-ctors as + aggregate copies from constant data like aarch64 for + PR83518. */ + if (maxsize.is_constant (&maxsizei) && known_eq (ref->size, maxsize)) + { + pd_data pd; + pd.rhs = val; + pd.offset = 0; + pd.size = maxsizei; + return data->push_partial_def (pd, ao_ref_alias_set (&lhs_ref), + ao_ref_base_alias_set (&lhs_ref), + 0, maxsizei); + } + } + + /* Continuing with partial defs isn't easily possible here, we + have to find a full def from further lookups from here. Probably + not worth the special-casing everywhere. */ + if (!data->partial_defs.is_empty ()) + return (void *)-1; /* Adjust *ref from the new operands. */ - if (!ao_ref_init_from_vn_reference (&r, vr->set, vr->type, vr->operands)) + ao_ref rhs1_ref; + ao_ref_init (&rhs1_ref, rhs1); + if (!ao_ref_init_from_vn_reference (&r, ao_ref_alias_set (&rhs1_ref), + ao_ref_base_alias_set (&rhs1_ref), + vr->type, vr->operands)) return (void *)-1; /* This can happen with bitfields. */ if (maybe_ne (ref->size, r.size)) @@ -2845,24 +3213,38 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, /* Invalidate the original access path since it now contains the wrong base. */ data->orig_ref.ref = NULL_TREE; + /* Use the alias-set of this LHS for recording an eventual result. */ + if (data->first_set == -2) + { + data->first_set = ao_ref_alias_set (&lhs_ref); + data->first_base_set = ao_ref_base_alias_set (&lhs_ref); + } /* Keep looking for the adjusted *REF / VR pair. */ return NULL; } - /* 6) For memcpy copies translate the reference through them if - the copy kills ref. */ + /* 6) For memcpy copies translate the reference through them if the copy + kills ref. But we cannot (easily) do this translation if the memcpy is + a storage order barrier, i.e. is equivalent to a VIEW_CONVERT_EXPR that + can modify the storage order of objects (see storage_order_barrier_p). */ else if (data->vn_walk_kind == VN_WALKREWRITE && is_gimple_reg_type (vr->type) /* ??? Handle BCOPY as well. */ && (gimple_call_builtin_p (def_stmt, BUILT_IN_MEMCPY) + || gimple_call_builtin_p (def_stmt, BUILT_IN_MEMCPY_CHK) || gimple_call_builtin_p (def_stmt, BUILT_IN_MEMPCPY) - || gimple_call_builtin_p (def_stmt, BUILT_IN_MEMMOVE)) + || gimple_call_builtin_p (def_stmt, BUILT_IN_MEMPCPY_CHK) + || gimple_call_builtin_p (def_stmt, BUILT_IN_MEMMOVE) + || gimple_call_builtin_p (def_stmt, BUILT_IN_MEMMOVE_CHK)) && (TREE_CODE (gimple_call_arg (def_stmt, 0)) == ADDR_EXPR || TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME) && (TREE_CODE (gimple_call_arg (def_stmt, 1)) == ADDR_EXPR || TREE_CODE (gimple_call_arg (def_stmt, 1)) == SSA_NAME) - && poly_int_tree_p (gimple_call_arg (def_stmt, 2), ©_size) + && (poly_int_tree_p (gimple_call_arg (def_stmt, 2), ©_size) + || (TREE_CODE (gimple_call_arg (def_stmt, 2)) == SSA_NAME + && poly_int_tree_p (SSA_VAL (gimple_call_arg (def_stmt, 2)), + ©_size))) /* Handling this is more complicated, give up for now. */ && data->partial_defs.is_empty ()) { @@ -2895,6 +3277,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, } if (TREE_CODE (lhs) == ADDR_EXPR) { + if (AGGREGATE_TYPE_P (TREE_TYPE (TREE_TYPE (lhs))) + && TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_TYPE (lhs)))) + return (void *)-1; tree tem = get_addr_base_and_unit_offset (TREE_OPERAND (lhs, 0), &lhs_offset); if (!tem) @@ -2923,6 +3308,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, rhs = vn_valueize (rhs); if (TREE_CODE (rhs) == ADDR_EXPR) { + if (AGGREGATE_TYPE_P (TREE_TYPE (TREE_TYPE (rhs))) + && TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_TYPE (rhs)))) + return (void *)-1; tree tem = get_addr_base_and_unit_offset (TREE_OPERAND (rhs, 0), &rhs_offset); if (!tem) @@ -2939,8 +3327,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, else return (void *)-1; } - if (TREE_CODE (rhs) != SSA_NAME - && TREE_CODE (rhs) != ADDR_EXPR) + if (TREE_CODE (rhs) == SSA_NAME) + rhs = SSA_VAL (rhs); + else if (TREE_CODE (rhs) != ADDR_EXPR) return (void *)-1; /* The bases of the destination and the references have to agree. */ @@ -2964,6 +3353,11 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, if (!known_subrange_p (at, byte_maxsize, lhs_offset, copy_size)) return (void *)-1; + /* Save the operands since we need to use the original ones for + the hash entry we use. */ + if (!data->saved_operands.exists ()) + data->saved_operands = vr->operands.copy (); + /* Make room for 2 operands in the new reference. */ if (vr->operands.length () < 2) { @@ -2992,11 +3386,10 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, /* Try folding the new reference to a constant. */ tree val = fully_constant_vn_reference_p (vr); if (val) - return vn_reference_lookup_or_insert_for_pieces - (vuse, vr->set, vr->type, vr->operands, val); + return data->finish (0, 0, val); /* Adjust *ref from the new operands. */ - if (!ao_ref_init_from_vn_reference (&r, vr->set, vr->type, vr->operands)) + if (!ao_ref_init_from_vn_reference (&r, 0, 0, vr->type, vr->operands)) return (void *)-1; /* This can happen with bitfields. */ if (maybe_ne (ref->size, r.size)) @@ -3008,6 +3401,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, /* Invalidate the original access path since it now contains the wrong base. */ data->orig_ref.ref = NULL_TREE; + /* Use the alias-set of this stmt for recording an eventual result. */ + if (data->first_set == -2) + { + data->first_set = 0; + data->first_base_set = 0; + } /* Keep looking for the adjusted *REF / VR pair. */ return NULL; @@ -3034,7 +3433,8 @@ vn_reference_operands_for_lookup (tree op) vn_reference_t stored in the hashtable if something is found. */ tree -vn_reference_lookup_pieces (tree vuse, alias_set_type set, tree type, +vn_reference_lookup_pieces (tree vuse, alias_set_type set, + alias_set_type base_set, tree type, vec operands, vn_reference_t *vnresult, vn_lookup_kind kind) { @@ -3057,6 +3457,7 @@ vn_reference_lookup_pieces (tree vuse, alias_set_type set, tree type, = valueize_refs (shared_lookup_references); vr1.type = type; vr1.set = set; + vr1.base_set = base_set; vr1.hashcode = vn_reference_compute_hash (&vr1); if ((cst = fully_constant_vn_reference_p (&vr1))) return cst; @@ -3067,14 +3468,15 @@ vn_reference_lookup_pieces (tree vuse, alias_set_type set, tree type, && vr1.vuse) { ao_ref r; - unsigned limit = PARAM_VALUE (PARAM_SCCVN_MAX_ALIAS_QUERIES_PER_ACCESS); - vn_walk_cb_data data (&vr1, NULL_TREE, NULL, kind, true); - if (ao_ref_init_from_vn_reference (&r, set, type, vr1.operands)) - *vnresult = - (vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse, true, - vn_reference_lookup_2, - vn_reference_lookup_3, - vuse_valueize, limit, &data); + unsigned limit = param_sccvn_max_alias_queries_per_access; + vn_walk_cb_data data (&vr1, NULL_TREE, NULL, kind, true, NULL_TREE); + if (ao_ref_init_from_vn_reference (&r, set, base_set, type, + vr1.operands)) + *vnresult + = ((vn_reference_t) + walk_non_aliased_vuses (&r, vr1.vuse, true, vn_reference_lookup_2, + vn_reference_lookup_3, vuse_valueize, + limit, &data)); gcc_checking_assert (vr1.operands == shared_lookup_references); } @@ -3090,15 +3492,19 @@ vn_reference_lookup_pieces (tree vuse, alias_set_type set, tree type, was NULL.. VNRESULT will be filled in with the vn_reference_t stored in the hashtable if one exists. When TBAA_P is false assume we are looking up a store and treat it as having alias-set zero. - *LAST_VUSE_PTR will be updated with the VUSE the value lookup succeeded. */ + *LAST_VUSE_PTR will be updated with the VUSE the value lookup succeeded. + MASK is either NULL_TREE, or can be an INTEGER_CST if the result of the + load is bitwise anded with MASK and so we are only interested in a subset + of the bits and can ignore if the other bits are uninitialized or + not initialized with constants. */ tree vn_reference_lookup (tree op, tree vuse, vn_lookup_kind kind, - vn_reference_t *vnresult, bool tbaa_p, tree *last_vuse_ptr) + vn_reference_t *vnresult, bool tbaa_p, + tree *last_vuse_ptr, tree mask) { vec operands; struct vn_reference_s vr1; - tree cst; bool valuezied_anything; if (vnresult) @@ -3108,41 +3514,52 @@ vn_reference_lookup (tree op, tree vuse, vn_lookup_kind kind, vr1.operands = operands = valueize_shared_reference_ops_from_ref (op, &valuezied_anything); vr1.type = TREE_TYPE (op); - vr1.set = get_alias_set (op); + ao_ref op_ref; + ao_ref_init (&op_ref, op); + vr1.set = ao_ref_alias_set (&op_ref); + vr1.base_set = ao_ref_base_alias_set (&op_ref); vr1.hashcode = vn_reference_compute_hash (&vr1); - if ((cst = fully_constant_vn_reference_p (&vr1))) - return cst; + if (mask == NULL_TREE) + if (tree cst = fully_constant_vn_reference_p (&vr1)) + return cst; - if (kind != VN_NOWALK - && vr1.vuse) + if (kind != VN_NOWALK && vr1.vuse) { vn_reference_t wvnresult; ao_ref r; - unsigned limit = PARAM_VALUE (PARAM_SCCVN_MAX_ALIAS_QUERIES_PER_ACCESS); + unsigned limit = param_sccvn_max_alias_queries_per_access; /* Make sure to use a valueized reference if we valueized anything. Otherwise preserve the full reference for advanced TBAA. */ if (!valuezied_anything - || !ao_ref_init_from_vn_reference (&r, vr1.set, vr1.type, - vr1.operands)) + || !ao_ref_init_from_vn_reference (&r, vr1.set, vr1.base_set, + vr1.type, vr1.operands)) ao_ref_init (&r, op); vn_walk_cb_data data (&vr1, r.ref ? NULL_TREE : op, - last_vuse_ptr, kind, tbaa_p); - wvnresult = - (vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse, tbaa_p, - vn_reference_lookup_2, - vn_reference_lookup_3, - vuse_valueize, limit, &data); + last_vuse_ptr, kind, tbaa_p, mask); + + wvnresult + = ((vn_reference_t) + walk_non_aliased_vuses (&r, vr1.vuse, tbaa_p, vn_reference_lookup_2, + vn_reference_lookup_3, vuse_valueize, limit, + &data)); gcc_checking_assert (vr1.operands == shared_lookup_references); if (wvnresult) { + gcc_assert (mask == NULL_TREE); if (vnresult) *vnresult = wvnresult; return wvnresult->result; } + else if (mask) + return data.masked_result; return NULL_TREE; } + if (last_vuse_ptr) + *last_vuse_ptr = vr1.vuse; + if (mask) + return NULL_TREE; return vn_reference_lookup_1 (&vr1, vnresult); } @@ -3162,6 +3579,7 @@ vn_reference_lookup_call (gcall *call, vn_reference_t *vnresult, vr->operands = valueize_shared_reference_ops_from_call (call); vr->type = gimple_expr_type (call); vr->set = 0; + vr->base_set = 0; vr->hashcode = vn_reference_compute_hash (vr); vn_reference_lookup_1 (vr, vnresult); } @@ -3183,7 +3601,10 @@ vn_reference_insert (tree op, tree result, tree vuse, tree vdef) vr1->vuse = vuse_ssa_val (vuse); vr1->operands = valueize_shared_reference_ops_from_ref (op, &tem).copy (); vr1->type = TREE_TYPE (op); - vr1->set = get_alias_set (op); + ao_ref op_ref; + ao_ref_init (&op_ref, op); + vr1->set = ao_ref_alias_set (&op_ref); + vr1->base_set = ao_ref_base_alias_set (&op_ref); vr1->hashcode = vn_reference_compute_hash (vr1); vr1->result = TREE_CODE (result) == SSA_NAME ? SSA_VAL (result) : result; vr1->result_vdef = vdef; @@ -3225,7 +3646,8 @@ vn_reference_insert (tree op, tree result, tree vuse, tree vdef) structure we created. */ vn_reference_t -vn_reference_insert_pieces (tree vuse, alias_set_type set, tree type, +vn_reference_insert_pieces (tree vuse, alias_set_type set, + alias_set_type base_set, tree type, vec operands, tree result, unsigned int value_id) @@ -3239,6 +3661,7 @@ vn_reference_insert_pieces (tree vuse, alias_set_type set, tree type, vr1->operands = valueize_refs (operands); vr1->type = type; vr1->set = set; + vr1->base_set = base_set; vr1->hashcode = vn_reference_compute_hash (vr1); if (result && TREE_CODE (result) == SSA_NAME) result = SSA_VAL (result); @@ -3335,20 +3758,6 @@ init_vn_nary_op_from_pieces (vn_nary_op_t vno, unsigned int length, memcpy (&vno->op[0], ops, sizeof (tree) * length); } -/* Initialize VNO from OP. */ - -static void -init_vn_nary_op_from_op (vn_nary_op_t vno, tree op) -{ - unsigned i; - - vno->opcode = TREE_CODE (op); - vno->length = TREE_CODE_LENGTH (TREE_CODE (op)); - vno->type = TREE_TYPE (op); - for (i = 0; i < vno->length; ++i) - vno->op[i] = TREE_OPERAND (op, i); -} - /* Return the number of operands for a vn_nary ops structure from STMT. */ static unsigned int @@ -3450,22 +3859,6 @@ vn_nary_op_lookup_pieces (unsigned int length, enum tree_code code, return vn_nary_op_lookup_1 (vno1, vnresult); } -/* Lookup OP in the current hash table, and return the resulting value - number if it exists in the hash table. Return NULL_TREE if it does - not exist in the hash table or if the result field of the operation - is NULL. VNRESULT will contain the vn_nary_op_t from the hashtable - if it exists. */ - -tree -vn_nary_op_lookup (tree op, vn_nary_op_t *vnresult) -{ - vn_nary_op_t vno1 - = XALLOCAVAR (struct vn_nary_op_s, - sizeof_vn_nary_op (TREE_CODE_LENGTH (TREE_CODE (op)))); - init_vn_nary_op_from_op (vno1, op); - return vn_nary_op_lookup_1 (vno1, vnresult); -} - /* Lookup the rhs of STMT in the current hash table, and return the resulting value number if it exists in the hash table. Return NULL_TREE if it does not exist in the hash table. VNRESULT will contain the @@ -3718,21 +4111,6 @@ vn_nary_op_get_predicated_value (vn_nary_op_t vno, basic_block bb) return NULL_TREE; } -/* Insert OP into the current hash table with a value number of - RESULT. Return the vn_nary_op_t structure we created and put in - the hashtable. */ - -vn_nary_op_t -vn_nary_op_insert (tree op, tree result) -{ - unsigned length = TREE_CODE_LENGTH (TREE_CODE (op)); - vn_nary_op_t vno1; - - vno1 = alloc_vn_nary_op (length, result, VN_INFO (result)->value_id); - init_vn_nary_op_from_op (vno1, op); - return vn_nary_op_insert_into (vno1, valid_info->nary, true); -} - /* Insert the rhs of STMT into the current hash table with a value number of RESULT. */ @@ -4102,6 +4480,8 @@ set_ssa_val_to (tree from, tree to) vn_ssa_aux_t from_info = VN_INFO (from); tree currval = from_info->valnum; // SSA_VAL (from) poly_int64 toff, coff; + bool curr_undefined = false; + bool curr_invariant = false; /* The only thing we allow as value numbers are ssa_names and invariants. So assert that here. We don't allow VN_TOP @@ -4144,9 +4524,9 @@ set_ssa_val_to (tree from, tree to) } return false; } - bool curr_invariant = is_gimple_min_invariant (currval); - bool curr_undefined = (TREE_CODE (currval) == SSA_NAME - && ssa_undefined_value_p (currval, false)); + curr_invariant = is_gimple_min_invariant (currval); + curr_undefined = (TREE_CODE (currval) == SSA_NAME + && ssa_undefined_value_p (currval, false)); if (currval != VN_TOP && !curr_invariant && !curr_undefined @@ -4201,9 +4581,8 @@ set_and_exit: && !operand_equal_p (currval, to, 0) /* Different undefined SSA names are not actually different. See PR82320 for a testcase were we'd otherwise not terminate iteration. */ - && !(TREE_CODE (currval) == SSA_NAME + && !(curr_undefined && TREE_CODE (to) == SSA_NAME - && ssa_undefined_value_p (currval, false) && ssa_undefined_value_p (to, false)) /* ??? For addresses involving volatile objects or types operand_equal_p does not reliably detect ADDR_EXPRs as equal. We know we are only @@ -4215,6 +4594,22 @@ set_and_exit: == get_addr_base_and_unit_offset (TREE_OPERAND (to, 0), &toff)) && known_eq (coff, toff))) { + if (to != from + && currval != VN_TOP + && !curr_undefined + /* We do not want to allow lattice transitions from one value + to another since that may lead to not terminating iteration + (see PR95049). Since there's no convenient way to check + for the allowed transition of VAL -> PHI (loop entry value, + same on two PHIs, to same PHI result) we restrict the check + to invariants. */ + && curr_invariant + && is_gimple_min_invariant (to)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " forced VARYING"); + to = from; + } if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " (changed)\n"); from_info->valnum = to; @@ -4322,8 +4717,12 @@ visit_nary_op (tree lhs, gassign *stmt) operation. */ if (INTEGRAL_TYPE_P (type) && TREE_CODE (rhs1) == SSA_NAME - /* We only handle sign-changes or zero-extension -> & mask. */ - && ((TYPE_UNSIGNED (TREE_TYPE (rhs1)) + /* We only handle sign-changes, zero-extension -> & mask or + sign-extension if we know the inner operation doesn't + overflow. */ + && (((TYPE_UNSIGNED (TREE_TYPE (rhs1)) + || (INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) + && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs1)))) && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (rhs1))) || TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (rhs1)))) { @@ -4357,7 +4756,9 @@ visit_nary_op (tree lhs, gassign *stmt) { unsigned lhs_prec = TYPE_PRECISION (type); unsigned rhs_prec = TYPE_PRECISION (TREE_TYPE (rhs1)); - if (lhs_prec == rhs_prec) + if (lhs_prec == rhs_prec + || (INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) + && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs1)))) { gimple_match_op match_op (gimple_match_cond::UNCOND, NOP_EXPR, type, ops[0]); @@ -4389,7 +4790,39 @@ visit_nary_op (tree lhs, gassign *stmt) } } } - default:; + break; + case BIT_AND_EXPR: + if (INTEGRAL_TYPE_P (type) + && TREE_CODE (rhs1) == SSA_NAME + && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST + && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1) + && default_vn_walk_kind != VN_NOWALK + && CHAR_BIT == 8 + && BITS_PER_UNIT == 8 + && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN + && !integer_all_onesp (gimple_assign_rhs2 (stmt)) + && !integer_zerop (gimple_assign_rhs2 (stmt))) + { + gassign *ass = dyn_cast (SSA_NAME_DEF_STMT (rhs1)); + if (ass + && !gimple_has_volatile_ops (ass) + && vn_get_stmt_kind (ass) == VN_REFERENCE) + { + tree last_vuse = gimple_vuse (ass); + tree op = gimple_assign_rhs1 (ass); + tree result = vn_reference_lookup (op, gimple_vuse (ass), + default_vn_walk_kind, + NULL, true, &last_vuse, + gimple_assign_rhs2 (stmt)); + if (result + && useless_type_conversion_p (TREE_TYPE (result), + TREE_TYPE (op))) + return set_ssa_val_to (lhs, result); + } + } + break; + default: + break; } bool changed = set_ssa_val_to (lhs, lhs); @@ -4460,6 +4893,7 @@ visit_reference_op_call (tree lhs, gcall *stmt) vr2->operands = vr1.operands.copy (); vr2->type = vr1.type; vr2->set = vr1.set; + vr2->base_set = vr1.base_set; vr2->hashcode = vr1.hashcode; vr2->result = lhs; vr2->result_vdef = vdef_val; @@ -4495,13 +4929,22 @@ visit_reference_op_load (tree lhs, tree op, gimple *stmt) if (result && !useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (op))) { - /* We will be setting the value number of lhs to the value number - of VIEW_CONVERT_EXPR (result). - So first simplify and lookup this expression to see if it - is already available. */ - gimple_match_op res_op (gimple_match_cond::UNCOND, - VIEW_CONVERT_EXPR, TREE_TYPE (op), result); - result = vn_nary_build_or_lookup (&res_op); + /* Avoid the type punning in case the result mode has padding where + the op we lookup has not. */ + if (maybe_lt (GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (result))), + GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))) + result = NULL_TREE; + else + { + /* We will be setting the value number of lhs to the value number + of VIEW_CONVERT_EXPR (result). + So first simplify and lookup this expression to see if it + is already available. */ + gimple_match_op res_op (gimple_match_cond::UNCOND, + VIEW_CONVERT_EXPR, TREE_TYPE (op), result); + result = vn_nary_build_or_lookup (&res_op); + } + /* When building the conversion fails avoid inserting the reference again. */ if (!result) @@ -4564,9 +5007,14 @@ visit_reference_op_store (tree lhs, tree op, gimple *stmt) { /* If the TBAA state isn't compatible for downstream reads we cannot value-number the VDEFs the same. */ - alias_set_type set = get_alias_set (lhs); - if (vnresult->set != set - && ! alias_set_subset_of (set, vnresult->set)) + ao_ref lhs_ref; + ao_ref_init (&lhs_ref, lhs); + alias_set_type set = ao_ref_alias_set (&lhs_ref); + alias_set_type base_set = ao_ref_base_alias_set (&lhs_ref); + if ((vnresult->set != set + && ! alias_set_subset_of (set, vnresult->set)) + || (vnresult->base_set != base_set + && ! alias_set_subset_of (base_set, vnresult->base_set))) resultsame = false; } } @@ -4900,14 +5348,14 @@ visit_stmt (gimple *stmt, bool backedges_varying_p = false) switch (vn_get_stmt_kind (ass)) { case VN_NARY: - changed = visit_nary_op (lhs, ass); - break; + changed = visit_nary_op (lhs, ass); + break; case VN_REFERENCE: - changed = visit_reference_op_load (lhs, rhs1, ass); - break; + changed = visit_reference_op_load (lhs, rhs1, ass); + break; default: - changed = defs_to_varying (ass); - break; + changed = defs_to_varying (ass); + break; } } } @@ -5142,18 +5590,15 @@ vn_nary_may_trap (vn_nary_op_t nary) honor_nans = flag_trapping_math && !flag_finite_math_only; honor_snans = flag_signaling_nans != 0; } - else if (INTEGRAL_TYPE_P (type) - && TYPE_OVERFLOW_TRAPS (type)) + else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)) honor_trapv = true; } if (nary->length >= 2) rhs2 = nary->op[1]; ret = operation_could_trap_helper_p (nary->opcode, fp_operation, - honor_trapv, - honor_nans, honor_snans, rhs2, - &handled); - if (handled - && ret) + honor_trapv, honor_nans, honor_snans, + rhs2, &handled); + if (handled && ret) return true; for (i = 0; i < nary->length; ++i) @@ -5499,8 +5944,13 @@ eliminate_dom_walker::eliminate_stmt (basic_block b, gimple_stmt_iterator *gsi) /* If this is an assignment from our leader (which happens in the case the value-number is a constant) - then there is nothing to do. */ - if (gimple_assign_single_p (stmt) + then there is nothing to do. Likewise if we run into + inserted code that needed a conversion because of + our type-agnostic value-numbering of loads. */ + if ((gimple_assign_single_p (stmt) + || (is_gimple_assign (stmt) + && (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)) + || gimple_assign_rhs_code (stmt) == VIEW_CONVERT_EXPR))) && sprime == gimple_assign_rhs1 (stmt)) return; @@ -5578,23 +6028,75 @@ eliminate_dom_walker::eliminate_stmt (basic_block b, gimple_stmt_iterator *gsi) && (TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME || is_gimple_min_invariant (gimple_assign_rhs1 (stmt)))) { - tree val; tree rhs = gimple_assign_rhs1 (stmt); vn_reference_t vnresult; - val = vn_reference_lookup (lhs, gimple_vuse (stmt), VN_WALKREWRITE, - &vnresult, false); + /* ??? gcc.dg/torture/pr91445.c shows that we lookup a boolean + typed load of a byte known to be 0x11 as 1 so a store of + a boolean 1 is detected as redundant. Because of this we + have to make sure to lookup with a ref where its size + matches the precision. */ + tree lookup_lhs = lhs; + if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)) + && (TREE_CODE (lhs) != COMPONENT_REF + || !DECL_BIT_FIELD_TYPE (TREE_OPERAND (lhs, 1))) + && !type_has_mode_precision_p (TREE_TYPE (lhs))) + { + if (TREE_CODE (lhs) == COMPONENT_REF + || TREE_CODE (lhs) == MEM_REF) + { + tree ltype = build_nonstandard_integer_type + (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (lhs))), + TYPE_UNSIGNED (TREE_TYPE (lhs))); + if (TREE_CODE (lhs) == COMPONENT_REF) + { + tree foff = component_ref_field_offset (lhs); + tree f = TREE_OPERAND (lhs, 1); + if (!poly_int_tree_p (foff)) + lookup_lhs = NULL_TREE; + else + lookup_lhs = build3 (BIT_FIELD_REF, ltype, + TREE_OPERAND (lhs, 0), + TYPE_SIZE (TREE_TYPE (lhs)), + bit_from_pos + (foff, DECL_FIELD_BIT_OFFSET (f))); + } + else + lookup_lhs = build2 (MEM_REF, ltype, + TREE_OPERAND (lhs, 0), + TREE_OPERAND (lhs, 1)); + } + else + lookup_lhs = NULL_TREE; + } + tree val = NULL_TREE; + if (lookup_lhs) + val = vn_reference_lookup (lookup_lhs, gimple_vuse (stmt), + VN_WALKREWRITE, &vnresult, false); if (TREE_CODE (rhs) == SSA_NAME) rhs = VN_INFO (rhs)->valnum; if (val - && operand_equal_p (val, rhs, 0)) + && (operand_equal_p (val, rhs, 0) + /* Due to the bitfield lookups above we can get bit + interpretations of the same RHS as values here. Those + are redundant as well. */ + || (TREE_CODE (val) == SSA_NAME + && gimple_assign_single_p (SSA_NAME_DEF_STMT (val)) + && (val = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val))) + && TREE_CODE (val) == VIEW_CONVERT_EXPR + && TREE_OPERAND (val, 0) == rhs))) { /* We can only remove the later store if the former aliases at least all accesses the later one does or if the store was to readonly memory storing the same value. */ - alias_set_type set = get_alias_set (lhs); + ao_ref lhs_ref; + ao_ref_init (&lhs_ref, lhs); + alias_set_type set = ao_ref_alias_set (&lhs_ref); + alias_set_type base_set = ao_ref_base_alias_set (&lhs_ref); if (! vnresult - || vnresult->set == set - || alias_set_subset_of (set, vnresult->set)) + || ((vnresult->set == set + || alias_set_subset_of (set, vnresult->set)) + && (vnresult->base_set == base_set + || alias_set_subset_of (base_set, vnresult->base_set)))) { if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -6083,7 +6585,11 @@ eliminate_with_rpo_vn (bitmap inserted_exprs) { eliminate_dom_walker walker (CDI_DOMINATORS, inserted_exprs); + eliminate_dom_walker *saved_rpo_avail = rpo_avail; + rpo_avail = &walker; walker.walk (cfun->cfg->x_entry_block_ptr); + rpo_avail = saved_rpo_avail; + return walker.eliminate_cleanup (); } @@ -6198,14 +6704,6 @@ vn_lookup_simplify_result (gimple_match_op *res_op) return res; } -rpo_elim::~rpo_elim () -{ - /* Release the avail vectors. */ - for (rpo_avail_t::iterator i = m_rpo_avail.begin (); - i != m_rpo_avail.end (); ++i) - (*i).second.release (); -} - /* Return a leader for OPs value that is valid at BB. */ tree @@ -6221,16 +6719,15 @@ rpo_elim::eliminate_avail (basic_block bb, tree op) { if (SSA_NAME_IS_DEFAULT_DEF (valnum)) return valnum; - vec > *av = m_rpo_avail.get (valnum); - if (!av || av->is_empty ()) + vn_avail *av = VN_INFO (valnum)->avail; + if (!av) return NULL_TREE; - int i = av->length () - 1; - if ((*av)[i].first == bb->index) + if (av->location == bb->index) /* On tramp3d 90% of the cases are here. */ - return ssa_name ((*av)[i].second); + return ssa_name (av->leader); do { - basic_block abb = BASIC_BLOCK_FOR_FN (cfun, (*av)[i].first); + basic_block abb = BASIC_BLOCK_FOR_FN (cfun, av->location); /* ??? During elimination we have to use availability at the definition site of a use we try to replace. This is required to not run into inconsistencies because @@ -6244,7 +6741,7 @@ rpo_elim::eliminate_avail (basic_block bb, tree op) executable. */ if (dominated_by_p_w_unex (bb, abb)) { - tree leader = ssa_name ((*av)[i].second); + tree leader = ssa_name (av->leader); /* Prevent eliminations that break loop-closed SSA. */ if (loops_state_satisfies_p (LOOP_CLOSED_SSA) && ! SSA_NAME_IS_DEFAULT_DEF (leader) @@ -6266,8 +6763,9 @@ rpo_elim::eliminate_avail (basic_block bb, tree op) /* ??? Can we somehow skip to the immediate dominator RPO index (bb_to_rpo)? Again, maybe not worth, on tramp3d the worst number of elements in the vector is 9. */ + av = av->next; } - while (--i >= 0); + while (av); } else if (valnum != VN_TOP) /* valnum is is_gimple_min_invariant. */ @@ -6281,7 +6779,8 @@ void rpo_elim::eliminate_push_avail (basic_block bb, tree leader) { tree valnum = VN_INFO (leader)->valnum; - if (valnum == VN_TOP) + if (valnum == VN_TOP + || is_gimple_min_invariant (valnum)) return; if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -6291,15 +6790,19 @@ rpo_elim::eliminate_push_avail (basic_block bb, tree leader) print_generic_expr (dump_file, valnum); fprintf (dump_file, "\n"); } - bool existed; - vec > &av = m_rpo_avail.get_or_insert (valnum, &existed); - if (!existed) + vn_ssa_aux_t value = VN_INFO (valnum); + vn_avail *av; + if (m_avail_freelist) { - new (&av) vec >; - av = vNULL; - av.reserve_exact (2); + av = m_avail_freelist; + m_avail_freelist = m_avail_freelist->next; } - av.safe_push (std::make_pair (bb->index, SSA_NAME_VERSION (leader))); + else + av = XOBNEW (&vn_ssa_aux_obstack, vn_avail); + av->location = bb->index; + av->leader = SSA_NAME_VERSION (leader); + av->next = value->avail; + value->avail = av; } /* Valueization hook for RPO VN plus required state. */ @@ -6410,7 +6913,7 @@ process_bb (rpo_elim &avail, basic_block bb, if (bb->loop_father->nb_iterations) bb->loop_father->nb_iterations = simplify_replace_tree (bb->loop_father->nb_iterations, - NULL_TREE, NULL_TREE, vn_valueize); + NULL_TREE, NULL_TREE, &vn_valueize_wrapper); } /* Value-number all defs in the basic-block. */ @@ -6781,15 +7284,17 @@ do_unwind (unwind_state *to, int rpo_idx, rpo_elim &avail, int *bb_to_rpo) /* Prune [rpo_idx, ] from avail. */ /* ??? This is O(number-of-values-in-region) which is O(region-size) rather than O(iteration-piece). */ - for (rpo_elim::rpo_avail_t::iterator i - = avail.m_rpo_avail.begin (); - i != avail.m_rpo_avail.end (); ++i) + for (hash_table::iterator i = vn_ssa_aux_hash->begin (); + i != vn_ssa_aux_hash->end (); ++i) { - while (! (*i).second.is_empty ()) + while ((*i)->avail) { - if (bb_to_rpo[(*i).second.last ().first] < rpo_idx) + if (bb_to_rpo[(*i)->avail->location] < rpo_idx) break; - (*i).second.pop (); + vn_avail *av = (*i)->avail; + (*i)->avail = (*i)->avail->next; + av->next = avail.m_avail_freelist; + avail.m_avail_freelist = av; } } } @@ -6976,7 +7481,7 @@ do_rpo_vn (function *fn, edge entry, bitmap exit_bbs, if (iterate) { loop_p loop; - unsigned max_depth = PARAM_VALUE (PARAM_RPO_VN_MAX_LOOP_DEPTH); + unsigned max_depth = param_rpo_vn_max_loop_depth; FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) if (loop_depth (loop) > max_depth) for (unsigned i = 2; @@ -7185,11 +7690,16 @@ do_rpo_vn (function *fn, edge entry, bitmap exit_bbs, max_visited = rpo_state[i].visited; } unsigned nvalues = 0, navail = 0; - for (rpo_elim::rpo_avail_t::iterator i = avail.m_rpo_avail.begin (); - i != avail.m_rpo_avail.end (); ++i) + for (hash_table::iterator i = vn_ssa_aux_hash->begin (); + i != vn_ssa_aux_hash->end (); ++i) { nvalues++; - navail += (*i).second.length (); + vn_avail *av = (*i)->avail; + while (av) + { + navail++; + av = av->next; + } } statistics_counter_event (cfun, "RPO blocks", n); statistics_counter_event (cfun, "RPO blocks visited", nblk); @@ -7312,6 +7822,11 @@ pass_fre::execute (function *fun) if (iterate_p) loop_optimizer_finalize (); + /* For late FRE after IVOPTs and unrolling, see if we can + remove some TREE_ADDRESSABLE and rewrite stuff into SSA. */ + if (!may_iterate) + todo |= TODO_update_address_taken; + return todo; }