modulo-sched: Carefully process loop counter initialization [PR97421]
authorRoman Zhuykov <zhroma@ispras.ru>
Sat, 5 Dec 2020 15:45:27 +0000 (18:45 +0300)
committerRoman Zhuykov <zhroma@ispras.ru>
Sat, 5 Dec 2020 15:45:27 +0000 (18:45 +0300)
Do not allow direct adjustment of pre-header initialization instruction for
count register if is read in some instruction below in that basic block.

gcc/ChangeLog:

PR rtl-optimization/97421
* modulo-sched.c (generate_prolog_epilog): Remove forward
declaration, adjust last argument name and type.
(const_iteration_count): Add bool pointer parameter to return
whether count register is read in pre-header after its
initialization.
(sms_schedule): Fix count register initialization adjustment
procedure according to what const_iteration_count said.

gcc/testsuite/ChangeLog:

PR rtl-optimization/97421
* gcc.c-torture/execute/pr97421-1.c: New test.
* gcc.c-torture/execute/pr97421-2.c: New test.
* gcc.c-torture/execute/pr97421-3.c: New test.

gcc/modulo-sched.c
gcc/testsuite/gcc.c-torture/execute/pr97421-1.c [new file with mode: 0644]
gcc/testsuite/gcc.c-torture/execute/pr97421-2.c [new file with mode: 0644]
gcc/testsuite/gcc.c-torture/execute/pr97421-3.c [new file with mode: 0644]

index 6f699a874e3c69349da9cfa1b80f225a95256606..4568674aa6c58f0edf71672120239159ee37c2b8 100644 (file)
@@ -210,8 +210,6 @@ static int sms_order_nodes (ddg_ptr, int, int *, int *);
 static void set_node_sched_params (ddg_ptr);
 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
 static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
-static void generate_prolog_epilog (partial_schedule_ptr, class loop *,
-                                    rtx, rtx);
 static int calculate_stage_count (partial_schedule_ptr, int);
 static void calculate_must_precede_follow (ddg_node_ptr, int, int,
                                           int, int, sbitmap, sbitmap, sbitmap);
@@ -391,30 +389,40 @@ doloop_register_get (rtx_insn *head, rtx_insn *tail)
    this constant.  Otherwise return 0.  */
 static rtx_insn *
 const_iteration_count (rtx count_reg, basic_block pre_header,
-                      int64_t * count)
+                      int64_t *count, bool* adjust_inplace)
 {
   rtx_insn *insn;
   rtx_insn *head, *tail;
 
+  *adjust_inplace = false;
+  bool read_after = false;
+
   if (! pre_header)
     return NULL;
 
   get_ebb_head_tail (pre_header, pre_header, &head, &tail);
 
   for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
-    if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
-       rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
+    if (single_set (insn) && rtx_equal_p (count_reg,
+                                         SET_DEST (single_set (insn))))
       {
        rtx pat = single_set (insn);
 
        if (CONST_INT_P (SET_SRC (pat)))
          {
            *count = INTVAL (SET_SRC (pat));
+           *adjust_inplace = !read_after;
            return insn;
          }
 
        return NULL;
       }
+    else if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (count_reg, insn))
+      {
+       read_after = true;
+       if (reg_set_p (count_reg, insn))
+          break;
+      }
 
   return NULL;
 }
@@ -1126,7 +1134,7 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
 /* Generate the instructions (including reg_moves) for prolog & epilog.  */
 static void
 generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
-                        rtx count_reg, rtx count_init)
+                       rtx count_reg, bool adjust_init)
 {
   int i;
   int last_stage = PS_STAGE_COUNT (ps) - 1;
@@ -1135,12 +1143,12 @@ generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
   /* Generate the prolog, inserting its insns on the loop-entry edge.  */
   start_sequence ();
 
-  if (!count_init)
+  if (adjust_init)
     {
       /* Generate instructions at the beginning of the prolog to
-         adjust the loop count by STAGE_COUNT.  If loop count is constant
-         (count_init), this constant is adjusted by STAGE_COUNT in
-         generate_prolog_epilog function.  */
+        adjust the loop count by STAGE_COUNT.  If loop count is constant
+        and it not used anywhere in prologue, this constant is adjusted by
+        STAGE_COUNT outside of generate_prolog_epilog function.  */
       rtx sub_reg = NULL_RTX;
 
       sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
@@ -1528,7 +1536,8 @@ sms_schedule (void)
       rtx_insn *count_init;
       int mii, rec_mii, stage_count, min_cycle;
       int64_t loop_count = 0;
-      bool opt_sc_p;
+      bool opt_sc_p, adjust_inplace = false;
+      basic_block pre_header;
 
       if (! (g = g_arr[loop->num]))
         continue;
@@ -1569,19 +1578,13 @@ sms_schedule (void)
        }
 
 
-      /* In case of th loop have doloop register it gets special
-        handling.  */
-      count_init = NULL;
-      if ((count_reg = doloop_register_get (head, tail)))
-       {
-         basic_block pre_header;
-
-         pre_header = loop_preheader_edge (loop)->src;
-         count_init = const_iteration_count (count_reg, pre_header,
-                                             &loop_count);
-       }
+      count_reg = doloop_register_get (head, tail);
       gcc_assert (count_reg);
 
+      pre_header = loop_preheader_edge (loop)->src;
+      count_init = const_iteration_count (count_reg, pre_header, &loop_count,
+                                         &adjust_inplace);
+
       if (dump_file && count_init)
         {
           fprintf (dump_file, "SMS const-doloop ");
@@ -1701,9 +1704,20 @@ sms_schedule (void)
              print_partial_schedule (ps, dump_file);
            }
  
-          /* case the BCT count is not known , Do loop-versioning */
-         if (count_reg && ! count_init)
+         if (count_init)
+           {
+              if (adjust_inplace)
+               {
+                 /* When possible, set new iteration count of loop kernel in
+                    place.  Otherwise, generate_prolog_epilog creates an insn
+                    to adjust.  */
+                 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
+                                                           - stage_count + 1);
+               }
+           }
+         else
             {
+             /* case the BCT count is not known , Do loop-versioning */
              rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
                                         gen_int_mode (stage_count,
                                                       GET_MODE (count_reg)));
@@ -1713,12 +1727,7 @@ sms_schedule (void)
              loop_version (loop, comp_rtx, &condition_bb,
                            prob, prob.invert (),
                            prob, prob.invert (), true);
-            }
-
-         /* Set new iteration count of loop kernel.  */
-          if (count_reg && count_init)
-           SET_SRC (single_set (count_init)) = GEN_INT (loop_count
-                                                    - stage_count + 1);
+           }
 
          /* Now apply the scheduled kernel to the RTL of the loop.  */
          permute_partial_schedule (ps, g->closing_branch->first_note);
@@ -1735,7 +1744,7 @@ sms_schedule (void)
          if (dump_file)
            print_node_sched_params (dump_file, g->num_nodes, ps);
          /* Generate prolog and epilog.  */
-          generate_prolog_epilog (ps, loop, count_reg, count_init);
+         generate_prolog_epilog (ps, loop, count_reg, !adjust_inplace);
          break;
        }
 
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c
new file mode 100644 (file)
index 0000000..e32fb12
--- /dev/null
@@ -0,0 +1,23 @@
+/* PR rtl-optimization/97421 */
+/* { dg-additional-options "-fmodulo-sched" } */
+
+int a, b, d, e;
+int *volatile c = &a;
+
+__attribute__((noinline))
+void f(void)
+{
+  for (int g = 2; g >= 0; g--) {
+    d = 0;
+    for (b = 0; b <= 2; b++)
+      ;
+    e = *c;
+  }
+}
+
+int main(void)
+{
+  f();
+  if (b != 3)
+    __builtin_abort();
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c
new file mode 100644 (file)
index 0000000..142bcbc
--- /dev/null
@@ -0,0 +1,18 @@
+/* PR rtl-optimization/97421 */
+/* { dg-additional-options "-fmodulo-sched -fno-dce -fno-strict-aliasing" } */
+
+static int a, b, c;
+int *d = &c;
+int **e = &d;
+int ***f = &e;
+int main()
+{
+  int h;
+  for (a = 2; a; a--)
+    for (h = 0; h <= 2; h++)
+      for (b = 0; b <= 2; b++)
+        ***f = 6;
+
+  if (b != 3)
+    __builtin_abort();
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c
new file mode 100644 (file)
index 0000000..3f1485a
--- /dev/null
@@ -0,0 +1,22 @@
+/* PR rtl-optimization/97421 */
+/* { dg-additional-options "-fmodulo-sched" } */
+
+int a, b, c;
+short d;
+void e(void) {
+  unsigned f = 0;
+  for (; f <= 2; f++) {
+    int g[1];
+    int h = (long)g;
+    c = 0;
+    for (; c < 10; c++)
+      g[0] = a = 0;
+    for (; a <= 2; a++)
+      b = d;
+  }
+}
+int main(void) {
+  e();
+  if (a != 3)
+    __builtin_abort();
+}