re PR tree-optimization/83518 (Missing optimization: useless instructions should...
authorRichard Biener <rguenther@suse.de>
Mon, 8 Jul 2019 07:09:24 +0000 (07:09 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Mon, 8 Jul 2019 07:09:24 +0000 (07:09 +0000)
2019-07-08  Richard Biener  <rguenther@suse.de>

PR tree-optimization/83518
* tree-ssa-sccvn.c: Include splay-tree.h.
(struct pd_range, struct pd_data): New.
(struct vn_walk_cb_data): Add data to track partial definitions.
(vn_walk_cb_data::~vn_walk_cb_data): New.
(vn_walk_cb_data::push_partial_def): New.
(pd_tree_alloc, pd_tree_dealloc, pd_range_compare): New.
(vn_reference_lookup_2): When partial defs are registered give up.
(vn_reference_lookup_3): Track partial defs for memset and
constructor zeroing and for defs from constants.

* gcc.dg/tree-ssa/ssa-fre-73.c: New testcase.
* gcc.dg/tree-ssa/ssa-fre-74.c: Likewise.
* gcc.dg/tree-ssa/ssa-fre-75.c: Likewise.
* gcc.dg/tree-ssa/ssa-fre-76.c: Likewise.
* g++.dg/tree-ssa/pr83518.C: Likewise.

From-SVN: r273194

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/g++.dg/tree-ssa/pr83518.C [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-73.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-74.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-75.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-76.c [new file with mode: 0644]
gcc/tree-ssa-sccvn.c

index 145b7b206c688fa03d687ef2bff0d454bb4323cf..74a98908b9052156471ea59ac797da784d49698e 100644 (file)
@@ -1,3 +1,16 @@
+2019-07-08  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/83518
+       * tree-ssa-sccvn.c: Include splay-tree.h.
+       (struct pd_range, struct pd_data): New.
+       (struct vn_walk_cb_data): Add data to track partial definitions.
+       (vn_walk_cb_data::~vn_walk_cb_data): New.
+       (vn_walk_cb_data::push_partial_def): New.
+       (pd_tree_alloc, pd_tree_dealloc, pd_range_compare): New.
+       (vn_reference_lookup_2): When partial defs are registered give up.
+       (vn_reference_lookup_3): Track partial defs for memset and
+       constructor zeroing and for defs from constants.
+
 2019-07-08  Richard Sandiford  <richard.sandiford@arm.com>
 
        * doc/install.texi (bootstrap-Og): Document.
index 911dc2167359ec72b33155ebee1e8bc142fd7e56..db493b33cf54124fed3f14893b53ff9398f26819 100644 (file)
@@ -1,3 +1,12 @@
+2019-07-08  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/83518
+       * gcc.dg/tree-ssa/ssa-fre-73.c: New testcase.
+       * gcc.dg/tree-ssa/ssa-fre-74.c: Likewise.
+       * gcc.dg/tree-ssa/ssa-fre-75.c: Likewise.
+       * gcc.dg/tree-ssa/ssa-fre-76.c: Likewise.
+       * g++.dg/tree-ssa/pr83518.C: Likewise.
+
 2019-07-08  Richard Sandiford  <richard.sandiford@arm.com>
 
        * gcc.dg/guality/guality.h: Include <sys/prctl.h> on Linux targets.
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr83518.C b/gcc/testsuite/g++.dg/tree-ssa/pr83518.C
new file mode 100644 (file)
index 0000000..3e153c5
--- /dev/null
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+unsigned test()
+{
+  int arr[] = {5,4,3,2,1};
+  int sum = 0;
+
+  for(int i = 0;i < 5;++i)
+    {
+      for(int j = 0; j < 5; ++j)
+       {
+         int t = arr[i];
+         arr[i] = arr[j];
+         arr[j] = t;
+       }
+    }
+
+  for(int i = 0; i < 5; ++i)
+    {
+      sum += arr[i];
+    }
+
+  return sum;
+}
+
+/* { dg-final { scan-tree-dump "return 15;" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-73.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-73.c
new file mode 100644 (file)
index 0000000..f138203
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-fre1" } */
+
+typedef int v2si __attribute__((vector_size(__SIZEOF_INT__ * 2)));
+int foo (int *a)
+{
+  a[0] = 1;
+  a[1] = 2;
+  v2si x = *(v2si *)a;
+  *(v2si *)&a[2] = x;
+  return a[3];
+}
+
+/* { dg-final { scan-tree-dump "return 2;" "fre1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-74.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-74.c
new file mode 100644 (file)
index 0000000..4439d83
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-fre1" } */
+
+typedef int v4si __attribute__((vector_size(__SIZEOF_INT__ * 4)));
+int foo (int *a)
+{
+  a[2] = 2;
+  a[0] = 0;
+  a[1] = 1;
+  a[3] = 4;
+  v4si x = *(v4si *)a;
+  *(v4si *)&a[4] = x;
+  return a[4] + a[7];
+}
+
+/* { dg-final { scan-tree-dump "return 4;" "fre1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-75.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-75.c
new file mode 100644 (file)
index 0000000..54bfe73
--- /dev/null
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int32plus } */
+/* { dg-options "-O -fgimple -fdump-tree-fre1" } */
+
+typedef int v4si __attribute__((vector_size(__SIZEOF_INT__ * 4)));
+#if __SIZEOF_INT__ == 4
+__GIMPLE (ssa) int foo (int *a)
+{
+  v4si _2;
+  int _3;
+  int _4;
+  int _5;
+  int _6;
+  int _7;
+  int _8;
+  int _9;
+
+__BB(2):
+  __MEM <unsigned char[3 * __SIZEOF_INT__]> ((char *)a_1(D) + 4) = _Literal (unsigned char[3 * __SIZEOF_INT__]) {};
+  __MEM <int> (a_1(D) + 8) = 2;
+  __MEM <int> (a_1(D)) = 1;
+  _2 = __MEM <v4si> (a_1(D));
+  _3 = __BIT_FIELD_REF <int> (_2, 32, 0);
+  _4 = __BIT_FIELD_REF <int> (_2, 32, 32);
+  _5 = __BIT_FIELD_REF <int> (_2, 32, 64);
+  _6 = __BIT_FIELD_REF <int> (_2, 32, 96);
+  _7 = _3 + _4;
+  _8 = _7 + _5;
+  _9 = _8 + _6;
+  return _9;
+}
+#endif
+
+/* { dg-final { scan-tree-dump "return 3;" "fre1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-76.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-76.c
new file mode 100644 (file)
index 0000000..7a1eede
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-fre1" } */
+
+typedef int v4si __attribute__((vector_size(__SIZEOF_INT__ * 4)));
+int foo (int *a)
+{
+  __builtin_memset (a, 0, 2 * __SIZEOF_INT__);
+  a[2] = 2;
+  a[0] = 1;
+  a[3] = 3;
+  v4si x = *(v4si *)a;
+  *(v4si *)&a[4] = x;
+  return a[4] + a[5] + a[7];
+}
+
+/* { dg-final { scan-tree-dump "return 4;" "fre1" } } */
index 67d51bfd5c9f94dcbffd810c7ebde6219a7f4488..3c45adcf3266a6f353f2cf42963a16f976491427 100644 (file)
@@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
+#include "splay-tree.h"
 #include "backend.h"
 #include "rtl.h"
 #include "tree.h"
@@ -360,6 +361,8 @@ static void init_vn_nary_op_from_stmt (vn_nary_op_t, gimple *);
 static void init_vn_nary_op_from_pieces (vn_nary_op_t, unsigned int,
                                         enum tree_code, tree, tree *);
 static tree vn_lookup_simplify_result (gimple_match_op *);
+static vn_reference_t vn_reference_lookup_or_insert_for_pieces
+         (tree, alias_set_type, tree, vec<vn_reference_op_s, va_heap>, tree);
 
 /* Return whether there is value numbering information for a given SSA name.  */
 
@@ -1646,20 +1649,245 @@ vn_reference_lookup_1 (vn_reference_t vr, vn_reference_t *vnresult)
   return NULL_TREE;
 }
 
+
+/* Partial definition tracking support.  */
+
+struct pd_range
+{
+  HOST_WIDE_INT offset;
+  HOST_WIDE_INT size;
+};
+
+struct pd_data
+{
+  tree rhs;
+  HOST_WIDE_INT offset;
+  HOST_WIDE_INT size;
+};
+
+/* Context for alias walking.  */
+
 struct vn_walk_cb_data
 {
   vn_walk_cb_data (vn_reference_t vr_, tree *last_vuse_ptr_,
-                   vn_lookup_kind vn_walk_kind_, bool tbaa_p_)
+                  vn_lookup_kind vn_walk_kind_, bool tbaa_p_)
     : vr (vr_), last_vuse_ptr (last_vuse_ptr_), vn_walk_kind (vn_walk_kind_),
-      tbaa_p (tbaa_p_)
-    {}
+      tbaa_p (tbaa_p_), known_ranges (NULL)
+   {}
+  ~vn_walk_cb_data ();
+  void *push_partial_def (const pd_data& pd, tree, HOST_WIDE_INT);
 
   vn_reference_t vr;
   tree *last_vuse_ptr;
   vn_lookup_kind vn_walk_kind;
   bool tbaa_p;
+
+  /* The VDEFs of partial defs we come along.  */
+  auto_vec<pd_data, 2> partial_defs;
+  /* The first defs range to avoid splay tree setup in most cases.  */
+  pd_range first_range;
+  tree first_vuse;
+  splay_tree known_ranges;
+  obstack ranges_obstack;
 };
 
+vn_walk_cb_data::~vn_walk_cb_data ()
+{
+  if (known_ranges)
+    {
+      splay_tree_delete (known_ranges);
+      obstack_free (&ranges_obstack, NULL);
+    }
+}
+
+/* pd_range splay-tree helpers.  */
+
+static int
+pd_range_compare (splay_tree_key offset1p, splay_tree_key offset2p)
+{
+  HOST_WIDE_INT offset1 = *(HOST_WIDE_INT *)offset1p;
+  HOST_WIDE_INT offset2 = *(HOST_WIDE_INT *)offset2p;
+  if (offset1 < offset2)
+    return -1;
+  else if (offset1 > offset2)
+    return 1;
+  return 0;
+}
+
+static void *
+pd_tree_alloc (int size, void *data_)
+{
+  vn_walk_cb_data *data = (vn_walk_cb_data *)data_;
+  return obstack_alloc (&data->ranges_obstack, size);
+}
+
+static void
+pd_tree_dealloc (void *, void *)
+{
+}
+
+/* Push PD to the vector of partial definitions returning a
+   value when we are ready to combine things with VUSE and MAXSIZEI,
+   NULL when we want to continue looking for partial defs or -1
+   on failure.  */
+
+void *
+vn_walk_cb_data::push_partial_def (const pd_data &pd, tree vuse,
+                                  HOST_WIDE_INT maxsizei)
+{
+  if (partial_defs.is_empty ())
+    {
+      partial_defs.safe_push (pd);
+      first_range.offset = pd.offset;
+      first_range.size = pd.size;
+      first_vuse = vuse;
+      last_vuse_ptr = NULL;
+    }
+  else
+    {
+      if (!known_ranges)
+       {
+         /* ???  Optimize the case where the second partial def
+            completes things.  */
+         gcc_obstack_init (&ranges_obstack);
+         known_ranges
+             = splay_tree_new_with_allocator (pd_range_compare, 0, 0,
+                                              pd_tree_alloc,
+                                              pd_tree_dealloc, this);
+         splay_tree_insert (known_ranges,
+                            (splay_tree_key)&first_range.offset,
+                            (splay_tree_value)&first_range);
+       }
+      if (known_ranges)
+       {
+         pd_range newr = { pd.offset, pd.size };
+         splay_tree_node n;
+         pd_range *r;
+         /* Lookup the predecessor of offset + 1 and see if
+            we need to merge with it.  */
+         HOST_WIDE_INT loffset = newr.offset + 1;
+         if ((n = splay_tree_predecessor (known_ranges,
+                                          (splay_tree_key)&loffset))
+             && ((r = (pd_range *)n->value), true)
+             && ranges_known_overlap_p (r->offset, r->size + 1,
+                                        newr.offset, newr.size))
+           {
+             /* Ignore partial defs already covered.  */
+             if (known_subrange_p (newr.offset, newr.size,
+                                   r->offset, r->size))
+               return NULL;
+             r->size = MAX (r->offset + r->size,
+                            newr.offset + newr.size) - r->offset;
+           }
+         else
+           {
+             /* newr.offset wasn't covered yet, insert the
+                range.  */
+             r = XOBNEW (&ranges_obstack, pd_range);
+             *r = newr;
+             splay_tree_insert (known_ranges,
+                                (splay_tree_key)&r->offset,
+                                (splay_tree_value)r);
+           }
+         /* Merge r which now contains newr and is a member
+            of the splay tree with adjacent overlapping ranges.  */
+         pd_range *rafter;
+         while ((n = splay_tree_successor (known_ranges,
+                                           (splay_tree_key)&r->offset))
+                && ((rafter = (pd_range *)n->value), true)
+                && ranges_known_overlap_p (r->offset, r->size + 1,
+                                           rafter->offset, rafter->size))
+           {
+             r->size = MAX (r->offset + r->size,
+                            rafter->offset + rafter->size) - r->offset;
+             splay_tree_remove (known_ranges,
+                                (splay_tree_key)&rafter->offset);
+           }
+         partial_defs.safe_push (pd);
+
+         /* Now we have merged newr into the range tree.
+            When we have covered [offseti, sizei] then the
+            tree will contain exactly one node which has
+            the desired properties and it will be 'r'.  */
+         if (known_subrange_p (0, maxsizei / BITS_PER_UNIT,
+                               r->offset, r->size))
+           {
+             /* Now simply native encode all partial defs
+                in reverse order.  */
+             unsigned ndefs = partial_defs.length ();
+             /* We support up to 512-bit values (for V8DFmode).  */
+             unsigned char buffer[64];
+             int len;
+
+             while (!partial_defs.is_empty ())
+               {
+                 pd_data pd = partial_defs.pop ();
+                 if (TREE_CODE (pd.rhs) == CONSTRUCTOR)
+                   /* Empty CONSTRUCTOR.  */
+                   memset (buffer + MAX (0, pd.offset),
+                           0, MIN ((HOST_WIDE_INT)sizeof (buffer), pd.size));
+                 else
+                   {
+                     len = native_encode_expr (pd.rhs,
+                                               buffer + MAX (0, pd.offset),
+                                               sizeof (buffer - MAX (0, pd.offset)),
+                                               MAX (0, -pd.offset));
+                     if (len <= 0
+                         || len < (pd.size - MAX (0, -pd.offset)))
+                       {
+                         if (dump_file && (dump_flags & TDF_DETAILS))
+                           fprintf (dump_file, "Failed to encode %u "
+                                    "partial definitions\n", ndefs);
+                         return (void *)-1;
+                       }
+                   }
+               }
+
+             tree type = vr->type;
+             /* Make sure to interpret in a type that has a range
+                covering the whole access size.  */
+             if (INTEGRAL_TYPE_P (vr->type)
+                 && maxsizei != TYPE_PRECISION (vr->type))
+               type = build_nonstandard_integer_type (maxsizei,
+                                                      TYPE_UNSIGNED (type));
+             tree val = native_interpret_expr (type, buffer,
+                                               maxsizei / BITS_PER_UNIT);
+             /* If we chop off bits because the types precision doesn't
+                match the memory access size this is ok when optimizing
+                reads but not when called from the DSE code during
+                elimination.  */
+             if (val
+                 && type != vr->type)
+               {
+                 if (! int_fits_type_p (val, vr->type))
+                   val = NULL_TREE;
+                 else
+                   val = fold_convert (vr->type, val);
+               }
+
+             if (val)
+               {
+                 if (dump_file && (dump_flags & TDF_DETAILS))
+                   fprintf (dump_file, "Successfully combined %u "
+                            "partial definitions\n", ndefs);
+                 return vn_reference_lookup_or_insert_for_pieces
+                     (first_vuse,
+                      vr->set, vr->type, vr->operands, val);
+               }
+             else
+               {
+                 if (dump_file && (dump_flags & TDF_DETAILS))
+                   fprintf (dump_file, "Failed to interpret %u "
+                            "encoded partial definitions\n", ndefs);
+                 return (void *)-1;
+               }
+           }
+       }
+    }
+  /* Continue looking for partial defs.  */
+  return NULL;
+}
+
 /* Callback for walk_non_aliased_vuses.  Adjusts the vn_reference_t VR_
    with the current VUSE and performs the expression lookup.  */
 
@@ -1671,6 +1899,11 @@ vn_reference_lookup_2 (ao_ref *op ATTRIBUTE_UNUSED, tree vuse, void *data_)
   vn_reference_s **slot;
   hashval_t hash;
 
+  /* If we have partial definitions recorded we have to go through
+     vn_reference_lookup_3.  */
+  if (!data->partial_defs.is_empty ())
+    return NULL;
+
   if (data->last_vuse_ptr)
     *data->last_vuse_ptr = vuse;
 
@@ -2179,8 +2412,10 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
       else
        return (void *)-1;
       tree len = gimple_call_arg (def_stmt, 2);
-      if (known_subrange_p (offset, maxsize, offset2,
-                           wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT))
+      HOST_WIDE_INT leni, offset2i, offseti;
+      if (data->partial_defs.is_empty ()
+         && known_subrange_p (offset, maxsize, offset2,
+                              wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT))
        {
          tree val;
          if (integer_zerop (gimple_call_arg (def_stmt, 1)))
@@ -2209,6 +2444,19 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
          return vn_reference_lookup_or_insert_for_pieces
                   (vuse, vr->set, vr->type, vr->operands, val);
        }
+      /* For now handle clearing memory with partial defs.  */
+      else if (integer_zerop (gimple_call_arg (def_stmt, 1))
+              && tree_to_poly_int64 (len).is_constant (&leni)
+              && offset.is_constant (&offseti)
+              && offset2.is_constant (&offset2i)
+              && maxsize.is_constant (&maxsizei))
+       {
+         pd_data pd;
+         pd.rhs = build_constructor (NULL_TREE, NULL);
+         pd.offset = offset2i - offseti;
+         pd.size = leni;
+         return data->push_partial_def (pd, vuse, maxsizei);
+       }
     }
 
   /* 2) Assignment from an empty CONSTRUCTOR.  */
@@ -2219,18 +2467,37 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
     {
       tree base2;
       poly_int64 offset2, size2, maxsize2;
+      HOST_WIDE_INT offset2i, size2i;
       bool reverse;
       base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
                                       &offset2, &size2, &maxsize2, &reverse);
       if (known_size_p (maxsize2)
          && known_eq (maxsize2, size2)
          && adjust_offsets_for_equal_base_address (base, &offset,
-                                                   base2, &offset2)
-         && known_subrange_p (offset, maxsize, offset2, size2))
+                                                   base2, &offset2))
        {
-         tree val = build_zero_cst (vr->type);
-         return vn_reference_lookup_or_insert_for_pieces
-                  (vuse, vr->set, vr->type, vr->operands, val);
+         if (data->partial_defs.is_empty ()
+             && known_subrange_p (offset, maxsize, offset2, size2))
+           {
+             tree val = build_zero_cst (vr->type);
+             return vn_reference_lookup_or_insert_for_pieces
+                 (vuse, vr->set, vr->type, vr->operands, val);
+           }
+         else if (maxsize.is_constant (&maxsizei)
+                  && maxsizei % BITS_PER_UNIT == 0
+                  && offset.is_constant (&offseti)
+                  && offseti % BITS_PER_UNIT == 0
+                  && offset2.is_constant (&offset2i)
+                  && offset2i % BITS_PER_UNIT == 0
+                  && size2.is_constant (&size2i)
+                  && size2i % BITS_PER_UNIT == 0)
+           {
+             pd_data pd;
+             pd.rhs = gimple_assign_rhs1 (def_stmt);
+             pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
+             pd.size = size2i / BITS_PER_UNIT;
+             return data->push_partial_def (pd, vuse, maxsizei);
+           }
        }
     }
 
@@ -2253,7 +2520,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
     {
       tree base2;
       poly_int64 offset2, size2, maxsize2;
-      HOST_WIDE_INT offset2i;
+      HOST_WIDE_INT offset2i, size2i;
       bool reverse;
       base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
                                       &offset2, &size2, &maxsize2, &reverse);
@@ -2266,45 +2533,60 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
                                                    base2, &offset2)
          && offset.is_constant (&offseti)
          && offset2.is_constant (&offset2i)
-         && known_subrange_p (offseti, maxsizei, offset2, size2))
+         && size2.is_constant (&size2i))
        {
-         /* We support up to 512-bit values (for V8DFmode).  */
-         unsigned char buffer[64];
-         int len;
-
-         tree rhs = gimple_assign_rhs1 (def_stmt);
-         if (TREE_CODE (rhs) == SSA_NAME)
-           rhs = SSA_VAL (rhs);
-         len = native_encode_expr (rhs,
-                                   buffer, sizeof (buffer),
-                                   (offseti - offset2i) / BITS_PER_UNIT);
-         if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
+         if (data->partial_defs.is_empty ()
+             && known_subrange_p (offseti, maxsizei, offset2, size2))
            {
-             tree type = vr->type;
-             /* Make sure to interpret in a type that has a range
-                covering the whole access size.  */
-             if (INTEGRAL_TYPE_P (vr->type)
-                 && maxsizei != TYPE_PRECISION (vr->type))
-               type = build_nonstandard_integer_type (maxsizei,
-                                                      TYPE_UNSIGNED (type));
-             tree val = native_interpret_expr (type, buffer,
-                                               maxsizei / BITS_PER_UNIT);
-             /* If we chop off bits because the types precision doesn't
-                match the memory access size this is ok when optimizing
-                reads but not when called from the DSE code during
-                elimination.  */
-             if (val
-                 && type != vr->type)
+             /* We support up to 512-bit values (for V8DFmode).  */
+             unsigned char buffer[64];
+             int len;
+
+             tree rhs = gimple_assign_rhs1 (def_stmt);
+             if (TREE_CODE (rhs) == SSA_NAME)
+               rhs = SSA_VAL (rhs);
+             len = native_encode_expr (rhs,
+                                       buffer, sizeof (buffer),
+                                       (offseti - offset2i) / BITS_PER_UNIT);
+             if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
                {
-                 if (! int_fits_type_p (val, vr->type))
-                   val = NULL_TREE;
-                 else
-                   val = fold_convert (vr->type, val);
-               }
+                 tree type = vr->type;
+                 /* Make sure to interpret in a type that has a range
+                    covering the whole access size.  */
+                 if (INTEGRAL_TYPE_P (vr->type)
+                     && maxsizei != TYPE_PRECISION (vr->type))
+                   type = build_nonstandard_integer_type (maxsizei,
+                                                          TYPE_UNSIGNED (type));
+                 tree val = native_interpret_expr (type, buffer,
+                                                   maxsizei / BITS_PER_UNIT);
+                 /* If we chop off bits because the types precision doesn't
+                    match the memory access size this is ok when optimizing
+                    reads but not when called from the DSE code during
+                    elimination.  */
+                 if (val
+                     && type != vr->type)
+                   {
+                     if (! int_fits_type_p (val, vr->type))
+                       val = NULL_TREE;
+                     else
+                       val = fold_convert (vr->type, val);
+                   }
 
-             if (val)
-               return vn_reference_lookup_or_insert_for_pieces
-                        (vuse, vr->set, vr->type, vr->operands, val);
+                 if (val)
+                   return vn_reference_lookup_or_insert_for_pieces
+                       (vuse, vr->set, vr->type, vr->operands, val);
+               }
+           }
+         else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, size2i))
+           {
+             pd_data pd;
+             tree rhs = gimple_assign_rhs1 (def_stmt);
+             if (TREE_CODE (rhs) == SSA_NAME)
+               rhs = SSA_VAL (rhs);
+             pd.rhs = rhs;
+             pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
+             pd.size = size2i / BITS_PER_UNIT;
+             return data->push_partial_def (pd, vuse, maxsizei);
            }
        }
     }
@@ -2315,7 +2597,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
           && is_gimple_reg_type (vr->type)
           && !contains_storage_order_barrier_p (vr->operands)
           && gimple_assign_single_p (def_stmt)
-          && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
+          && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
+          /* A subset of partial defs from non-constants can be handled
+             by for example inserting a CONSTRUCTOR, a COMPLEX_EXPR or
+             even a (series of) BIT_INSERT_EXPR hoping for simplifications
+             downstream, not so much for actually doing the insertion.  */
+          && data->partial_defs.is_empty ())
     {
       tree base2;
       poly_int64 offset2, size2, maxsize2;
@@ -2363,7 +2650,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
           && gimple_assign_single_p (def_stmt)
           && (DECL_P (gimple_assign_rhs1 (def_stmt))
               || TREE_CODE (gimple_assign_rhs1 (def_stmt)) == MEM_REF
-              || handled_component_p (gimple_assign_rhs1 (def_stmt))))
+              || handled_component_p (gimple_assign_rhs1 (def_stmt)))
+          /* Handling this is more complicated, give up for now.  */
+          && data->partial_defs.is_empty ())
     {
       tree base2;
       int i, j, k;
@@ -2497,7 +2786,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
               || TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME)
           && (TREE_CODE (gimple_call_arg (def_stmt, 1)) == ADDR_EXPR
               || TREE_CODE (gimple_call_arg (def_stmt, 1)) == SSA_NAME)
-          && poly_int_tree_p (gimple_call_arg (def_stmt, 2), &copy_size))
+          && poly_int_tree_p (gimple_call_arg (def_stmt, 2), &copy_size)
+          /* Handling this is more complicated, give up for now.  */
+          && data->partial_defs.is_empty ())
     {
       tree lhs, rhs;
       ao_ref r;