builtins.c (expand_builtin_memory_copy_args): Add might_overlap parm.
authorAaron Sawdey <acsawdey@linux.ibm.com>
Wed, 2 Oct 2019 14:23:51 +0000 (14:23 +0000)
committerAaron Sawdey <acsawdey@gcc.gnu.org>
Wed, 2 Oct 2019 14:23:51 +0000 (09:23 -0500)
2019-10-02  Aaron Sawdey <acsawdey@linux.ibm.com>

* builtins.c (expand_builtin_memory_copy_args): Add might_overlap parm.
(expand_builtin_memcpy): Use might_overlap parm.
(expand_builtin_mempcpy_args): Use might_overlap parm.
(expand_builtin_memmove): Call expand_builtin_memory_copy_args.
(expand_builtin_memory_copy_args): Add might_overlap parm.
* expr.c (emit_block_move_via_cpymem): Rename to
emit_block_move_via_pattern, add might_overlap parm, use cpymem
or movmem optab as appropriate.
(emit_block_move_hints): Add might_overlap parm, do the right
thing for might_overlap==true.
* expr.h (emit_block_move_hints): Update prototype.

From-SVN: r276461

gcc/ChangeLog
gcc/builtins.c
gcc/expr.c
gcc/expr.h

index d66b03e61deac31955422086c5919ec90dda71d8..fe1457769db452d7c35d01fc2f60d48b1e76a9a6 100644 (file)
@@ -1,3 +1,17 @@
+2019-10-02  Aaron Sawdey <acsawdey@linux.ibm.com>
+
+       * builtins.c (expand_builtin_memory_copy_args): Add might_overlap parm.
+       (expand_builtin_memcpy): Use might_overlap parm.
+       (expand_builtin_mempcpy_args): Use might_overlap parm.
+       (expand_builtin_memmove): Call expand_builtin_memory_copy_args.
+       (expand_builtin_memory_copy_args): Add might_overlap parm.
+       * expr.c (emit_block_move_via_cpymem): Rename to
+       emit_block_move_via_pattern, add might_overlap parm, use cpymem
+       or movmem optab as appropriate.
+       (emit_block_move_hints): Add might_overlap parm, do the right
+       thing for might_overlap==true.
+       * expr.h (emit_block_move_hints): Update prototype.
+
 2019-10-02  Eric Botcazou  <ebotcazou@adacore.com>
 
        * tree-eh.h (unsplit_eh_edges): Declare.
index 1fd4b88bcacd511a3bccdcacb041ff67469a6f53..fa17afd2e8a974338adbbeb583e42124dd973f3a 100644 (file)
@@ -127,7 +127,8 @@ static rtx expand_builtin_memchr (tree, rtx);
 static rtx expand_builtin_memcpy (tree, rtx);
 static rtx expand_builtin_memory_copy_args (tree dest, tree src, tree len,
                                            rtx target, tree exp,
-                                           memop_ret retmode);
+                                           memop_ret retmode,
+                                           bool might_overlap);
 static rtx expand_builtin_memmove (tree, rtx);
 static rtx expand_builtin_mempcpy (tree, rtx);
 static rtx expand_builtin_mempcpy_args (tree, tree, tree, rtx, tree, memop_ret);
@@ -3790,14 +3791,14 @@ expand_builtin_memcpy (tree exp, rtx target)
   check_memop_access (exp, dest, src, len);
 
   return expand_builtin_memory_copy_args (dest, src, len, target, exp,
-                                         /*retmode=*/ RETURN_BEGIN);
+                                         /*retmode=*/ RETURN_BEGIN, false);
 }
 
 /* Check a call EXP to the memmove built-in for validity.
    Return NULL_RTX on both success and failure.  */
 
 static rtx
-expand_builtin_memmove (tree exp, rtx)
+expand_builtin_memmove (tree exp, rtx target)
 {
   if (!validate_arglist (exp,
                         POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
@@ -3809,7 +3810,8 @@ expand_builtin_memmove (tree exp, rtx)
 
   check_memop_access (exp, dest, src, len);
 
-  return NULL_RTX;
+  return expand_builtin_memory_copy_args (dest, src, len, target, exp,
+                                         /*retmode=*/ RETURN_BEGIN, true);
 }
 
 /* Expand a call EXP to the mempcpy builtin.
@@ -3858,7 +3860,8 @@ expand_builtin_mempcpy (tree exp, rtx target)
 
 static rtx
 expand_builtin_memory_copy_args (tree dest, tree src, tree len,
-                                rtx target, tree exp, memop_ret retmode)
+                                rtx target, tree exp, memop_ret retmode,
+                                bool might_overlap)
 {
   const char *src_str;
   unsigned int src_align = get_pointer_alignment (src);
@@ -3894,9 +3897,12 @@ expand_builtin_memory_copy_args (tree dest, tree src, tree len,
                        &probable_max_size);
   src_str = c_getstr (src);
 
-  /* If SRC is a string constant and block move would be done
-     by pieces, we can avoid loading the string from memory
-     and only stored the computed constants.  */
+  /* If SRC is a string constant and block move would be done by
+     pieces, we can avoid loading the string from memory and only
+     stored the computed constants.  This works in the overlap
+     (memmove) case as well because store_by_pieces just generates a
+     series of stores of constants from the string constant returned
+     by c_getstr().  */
   if (src_str
       && CONST_INT_P (len_rtx)
       && (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= strlen (src_str) + 1
@@ -3923,13 +3929,14 @@ expand_builtin_memory_copy_args (tree dest, tree src, tree len,
     method = BLOCK_OP_TAILCALL;
   bool use_mempcpy_call = (targetm.libc_has_fast_function (BUILT_IN_MEMPCPY)
                           && retmode == RETURN_END
+                          && !might_overlap
                           && target != const0_rtx);
   if (use_mempcpy_call)
     method = BLOCK_OP_NO_LIBCALL_RET;
   dest_addr = emit_block_move_hints (dest_mem, src_mem, len_rtx, method,
                                     expected_align, expected_size,
                                     min_size, max_size, probable_max_size,
-                                    use_mempcpy_call, &is_move_done);
+                                    use_mempcpy_call, &is_move_done, might_overlap);
 
   /* Bail out when a mempcpy call would be expanded as libcall and when
      we have a target that provides a fast implementation
@@ -3962,7 +3969,7 @@ expand_builtin_mempcpy_args (tree dest, tree src, tree len,
                             rtx target, tree orig_exp, memop_ret retmode)
 {
   return expand_builtin_memory_copy_args (dest, src, len, target, orig_exp,
-                                         retmode);
+                                         retmode, false);
 }
 
 /* Expand into a movstr instruction, if one is available.  Return NULL_RTX if
index 7a7070670ed86b55c9f6256bcfbabe54ea69109e..bd6a71a4e2fde97c511a6827d39c6229bfb1cb60 100644 (file)
@@ -73,9 +73,10 @@ along with GCC; see the file COPYING3.  If not see
 int cse_not_expected;
 
 static bool block_move_libcall_safe_for_call_parm (void);
-static bool emit_block_move_via_cpymem (rtx, rtx, rtx, unsigned, unsigned, HOST_WIDE_INT,
-                                       unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
-                                       unsigned HOST_WIDE_INT);
+static bool emit_block_move_via_pattern (rtx, rtx, rtx, unsigned, unsigned, 
+                                        HOST_WIDE_INT, unsigned HOST_WIDE_INT,
+                                        unsigned HOST_WIDE_INT,
+                                        unsigned HOST_WIDE_INT, bool);
 static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned);
 static void clear_by_pieces (rtx, unsigned HOST_WIDE_INT, unsigned int);
 static rtx_insn *compress_float_constant (rtx, rtx);
@@ -1562,7 +1563,8 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method,
                       unsigned HOST_WIDE_INT min_size,
                       unsigned HOST_WIDE_INT max_size,
                       unsigned HOST_WIDE_INT probable_max_size,
-                      bool bail_out_libcall, bool *is_move_done)
+                      bool bail_out_libcall, bool *is_move_done,
+                      bool might_overlap)
 {
   int may_use_call;
   rtx retval = 0;
@@ -1622,13 +1624,30 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method,
       set_mem_size (y, const_size);
     }
 
-  if (CONST_INT_P (size) && can_move_by_pieces (INTVAL (size), align))
-    move_by_pieces (x, y, INTVAL (size), align, RETURN_BEGIN);
-  else if (emit_block_move_via_cpymem (x, y, size, align,
-                                      expected_align, expected_size,
-                                      min_size, max_size, probable_max_size))
+  bool pieces_ok = can_move_by_pieces (INTVAL (size), align);
+  bool pattern_ok = false;
+
+  if (!CONST_INT_P (size) || !pieces_ok || might_overlap)
+    {
+      pattern_ok = 
+       emit_block_move_via_pattern (x, y, size, align,
+                                    expected_align, expected_size,
+                                    min_size, max_size, probable_max_size,
+                                    might_overlap);
+      if (!pattern_ok && might_overlap)
+       {
+         /* Do not try any of the other methods below as they are not safe
+            for overlapping moves.  */
+         *is_move_done = false;
+         return retval;
+       }
+    }
+
+  if (pattern_ok)
     ;
-  else if (may_use_call
+  else if (CONST_INT_P (size) && pieces_ok)
+    move_by_pieces (x, y, INTVAL (size), align, RETURN_BEGIN);
+  else if (may_use_call && !might_overlap
           && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))
           && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (y)))
     {
@@ -1645,7 +1664,8 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method,
       retval = emit_block_copy_via_libcall (x, y, size,
                                            method == BLOCK_OP_TAILCALL);
     }
-
+  else if (might_overlap)
+    *is_move_done = false;
   else
     emit_block_move_via_loop (x, y, size, align);
 
@@ -1721,15 +1741,26 @@ block_move_libcall_safe_for_call_parm (void)
   return true;
 }
 
-/* A subroutine of emit_block_move.  Expand a cpymem pattern;
-   return true if successful.  */
+/* A subroutine of emit_block_move.  Expand a cpymem or movmem pattern;
+   return true if successful.
+  
+   X is the destination of the copy or move.
+   Y is the source of the copy or move.
+   SIZE is the size of the block to be moved.
+
+   MIGHT_OVERLAP indicates this originated with expansion of a
+   builtin_memmove() and the source and destination blocks may
+   overlap.
+  */
 
 static bool
-emit_block_move_via_cpymem (rtx x, rtx y, rtx size, unsigned int align,
-                           unsigned int expected_align, HOST_WIDE_INT expected_size,
-                           unsigned HOST_WIDE_INT min_size,
-                           unsigned HOST_WIDE_INT max_size,
-                           unsigned HOST_WIDE_INT probable_max_size)
+emit_block_move_via_pattern (rtx x, rtx y, rtx size, unsigned int align,
+                            unsigned int expected_align,
+                            HOST_WIDE_INT expected_size,
+                            unsigned HOST_WIDE_INT min_size,
+                            unsigned HOST_WIDE_INT max_size,
+                            unsigned HOST_WIDE_INT probable_max_size,
+                            bool might_overlap)
 {
   if (expected_align < align)
     expected_align = align;
@@ -1752,7 +1783,11 @@ emit_block_move_via_cpymem (rtx x, rtx y, rtx size, unsigned int align,
   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
     {
       scalar_int_mode mode = mode_iter.require ();
-      enum insn_code code = direct_optab_handler (cpymem_optab, mode);
+      enum insn_code code;
+      if (might_overlap)
+       code = direct_optab_handler (movmem_optab, mode);
+      else
+       code = direct_optab_handler (cpymem_optab, mode);
 
       if (code != CODE_FOR_nothing
          /* We don't need MODE to be narrower than BITS_PER_HOST_WIDE_INT
index 6eb70bf12f17ebe1dd8eb36fe5cc0145c9ecc6d6..17c9163c351113e9e4badc3be08507b0d738b321 100644 (file)
@@ -116,7 +116,8 @@ extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods,
                                  unsigned HOST_WIDE_INT,
                                  unsigned HOST_WIDE_INT,
                                  bool bail_out_libcall = false,
-                                 bool *is_move_done = NULL);
+                                 bool *is_move_done = NULL,
+                                 bool might_overlap = false);
 extern rtx emit_block_cmp_hints (rtx, rtx, rtx, tree, rtx, bool,
                                 by_pieces_constfn, void *);
 extern bool emit_storent_insn (rtx to, rtx from);