re PR fortran/90539 (481.wrf slowdown by 25% on Intel Kaby with -Ofast -march=native...
authorThomas Koenig <tkoenig@gcc.gnu.org>
Sun, 2 Jun 2019 15:18:22 +0000 (15:18 +0000)
committerThomas Koenig <tkoenig@gcc.gnu.org>
Sun, 2 Jun 2019 15:18:22 +0000 (15:18 +0000)
2019-06-02  Thomas Koenig  <tkoenig@gcc.gnu.org>

PR fortran/90539
* trans-expr.c (gfc_conv_subref_array_arg): If the size of the
expression can be determined to be one, treat it as contiguous.
Set likelyhood of presence of an actual argument according to
PRED_FORTRAN_ABSENT_DUMMY and likelyhood of being contiguous
according to PRED_FORTRAN_CONTIGUOUS.

2019-06-02  Thomas Koenig  <tkoenig@gcc.gnu.org>

PR fortran/90539
* predict.def (PRED_FORTRAN_CONTIGUOUS): New predictor.

2019-06-02  Thomas Koenig  <tkoenig@gcc.gnu.org>

PR fortran/90539
* gfortran.dg/internal_pack_24.f90: New test.

From-SVN: r271844

gcc/ChangeLog
gcc/fortran/ChangeLog
gcc/fortran/trans-expr.c
gcc/predict.def
gcc/testsuite/ChangeLog
gcc/testsuite/gfortran.dg/internal_pack_24.f90 [new file with mode: 0644]

index 20bdc2bec37aca9db029bd6bda022e31654f3484..0d4ba896ea4bdc5587656b7258ab13aade565a8a 100644 (file)
@@ -1,3 +1,8 @@
+2019-06-02  Thomas Koenig  <tkoenig@gcc.gnu.org>
+
+       PR fortran/90539
+       * predict.def (PRED_FORTRAN_CONTIGUOUS): New predictor.
+
 2019-06-01  Martin Sebor  <msebor@redhat.com>
 
        PR middle-end/90694
index 16cc7e9cfaf237b04a8539aff800c6b2bfc73534..1c3f8bc5948bb43f797fa8cb07748cd43bb03ffd 100644 (file)
@@ -1,6 +1,15 @@
+2019-06-02  Thomas Koenig  <tkoenig@gcc.gnu.org>
+
+       PR fortran/90539
+       * trans-expr.c (gfc_conv_subref_array_arg): If the size of the
+       expression can be determined to be one, treat it as contiguous.
+       Set likelyhood of presence of an actual argument according to
+       PRED_FORTRAN_ABSENT_DUMMY and likelyhood of being contiguous
+       according to PRED_FORTRAN_CONTIGUOUS.
+
 2019-05-30  Thomas Koenig  <tkoenig@gcc.gnu.org>
 
-    * gfc-internals.texi (Translating to GENERIC): New chapter.
+       * gfc-internals.texi (Translating to GENERIC): New chapter.
 
 2019-05-30  Marek Polacek  <polacek@redhat.com>
 
index 5183029a66685bebd70cbed55cdd41ce9fe1a311..d23520fdbaa4caf2888e3d07fd6b83eeedaad469 100644 (file)
@@ -4922,15 +4922,35 @@ class_array_fcn:
          gfc_se cont_se, array_se;
          stmtblock_t if_block, else_block;
          tree if_stmt, else_stmt;
+         mpz_t size;
+         bool size_set;
 
          cont_var = gfc_create_var (boolean_type_node, "contiguous");
 
-         /* cont_var = is_contiguous (expr); .  */
-         gfc_init_se (&cont_se, parmse);
-         gfc_conv_is_contiguous_expr (&cont_se, expr);
-         gfc_add_block_to_block (&se->pre, &(&cont_se)->pre);
-         gfc_add_modify (&se->pre, cont_var, cont_se.expr);
-         gfc_add_block_to_block (&se->pre, &(&cont_se)->post);
+         /* If the size is known to be one at compile-time, set
+            cont_var to true unconditionally.  This may look
+            inelegant, but we're only doing this during
+            optimization, so the statements will be optimized away,
+            and this saves complexity here.  */
+
+         size_set = gfc_array_size (expr, &size);
+         if (size_set && mpz_cmp_ui (size, 1) == 0)
+           {
+             gfc_add_modify (&se->pre, cont_var,
+                             build_one_cst (boolean_type_node));
+           }
+         else
+           {
+             /* cont_var = is_contiguous (expr); .  */
+             gfc_init_se (&cont_se, parmse);
+             gfc_conv_is_contiguous_expr (&cont_se, expr);
+             gfc_add_block_to_block (&se->pre, &(&cont_se)->pre);
+             gfc_add_modify (&se->pre, cont_var, cont_se.expr);
+             gfc_add_block_to_block (&se->pre, &(&cont_se)->post);
+           }
+
+         if (size_set)
+           mpz_clear (size);
 
          /* arrayse->expr = descriptor of a.  */
          gfc_init_se (&array_se, se);
@@ -4953,7 +4973,9 @@ class_array_fcn:
 
          /* And put the above into an if statement.  */
          pre_stmts = fold_build3_loc (input_location, COND_EXPR, void_type_node,
-                                     cont_var, if_stmt, else_stmt);
+                                      gfc_likely (cont_var,
+                                                  PRED_FORTRAN_CONTIGUOUS),
+                                      if_stmt, else_stmt);
        }
       else
        {
@@ -4976,11 +4998,11 @@ class_array_fcn:
          gfc_add_modify (&else_block, pointer, build_int_cst (type, 0));
          else_stmt = gfc_finish_block (&else_block);
 
-         tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, present_var,
+         tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node,
+                                gfc_likely (present_var,
+                                            PRED_FORTRAN_ABSENT_DUMMY),
                                 pre_stmts, else_stmt);
          gfc_add_expr_to_block (&se->pre, tmp);
-
-
        }
       else
        gfc_add_expr_to_block (&se->pre, pre_stmts);
@@ -4995,9 +5017,16 @@ class_array_fcn:
          tmp = fold_build2_loc (input_location, EQ_EXPR, boolean_type_node,
                                 cont_var,
                                 build_zero_cst (boolean_type_node));
+         tmp = gfc_unlikely (tmp, PRED_FORTRAN_CONTIGUOUS);
+
          if (pass_optional)
-           post_cond = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR,
-                                        boolean_type_node, present_var, tmp);
+           {
+             tree present_likely = gfc_likely (present_var,
+                                               PRED_FORTRAN_ABSENT_DUMMY);
+             post_cond = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR,
+                                          boolean_type_node, present_likely,
+                                          tmp);
+           }
          else
            post_cond = tmp;
        }
index 53b39ab0e3adf5c7968930e5a07c7614227e5ef7..24c1385943e269c98cd88b30e3d7051fdd3fc68e 100644 (file)
@@ -229,3 +229,10 @@ DEF_PREDICTOR (PRED_FORTRAN_ABSENT_DUMMY, "Fortran absent dummy", \
    to be very likely.  */
 DEF_PREDICTOR (PRED_FORTRAN_LOOP_PREHEADER, "Fortran loop preheader", \
               HITRATE (99), 0)
+
+/* Fortran assumed size arrays can be non-contiguous, so they need
+   to be repacked.  */
+
+DEF_PREDICTOR (PRED_FORTRAN_CONTIGUOUS, "Fortran contiguous", \
+              HITRATE (75), 0)
+       
index bf406ae88f494964e2f6d590b3fc0542a493da48..6535ca09f32d230c0538a792d32275f9397e6fa6 100644 (file)
@@ -1,3 +1,8 @@
+2019-06-02  Thomas Koenig  <tkoenig@gcc.gnu.org>
+
+       PR fortran/90539
+       * gfortran.dg/internal_pack_24.f90: New test.
+
 2019-06-01  Iain Sandoe  <iain@sandoe.co.uk>
 
        PR target/90698
diff --git a/gcc/testsuite/gfortran.dg/internal_pack_24.f90 b/gcc/testsuite/gfortran.dg/internal_pack_24.f90
new file mode 100644 (file)
index 0000000..cc2443e
--- /dev/null
@@ -0,0 +1,39 @@
+! { dg-do run }
+! { dg-additional-options "-O -fdump-tree-optimized" }
+module y
+  implicit none
+contains
+  subroutine foo(a,b,c,d,e,f)
+    real, dimension(1), intent(inout) :: a, b, c, d, e, f
+    if (any([a,b,c,d,e,f] /= [1,2,3,4,5,6])) stop 1
+    a = -a
+    b = -b
+    c = -c
+    d = -d
+    e = -e
+    f = -f
+  end subroutine foo
+end module y
+module x
+  use y
+  implicit none
+contains
+  subroutine bar(a)
+    real, dimension(:) :: a
+    integer :: n1, n3, n5
+    n1 = 1
+    n3 = 3
+    n5 = 5
+    call foo(a(n1:n1), a(n1+1:n1+1), a(n3:n3), a(n3+1:n3+1), a(n5:n5), a(n5+1:n5+1))
+  end subroutine bar
+end module x
+
+program main
+  use x
+  real, dimension(6) :: a,b
+  b = [1,2,3,4,5,6]
+  a = b
+  call bar(a)
+  if (any(a /= -b)) stop 2
+end program main
+! { dg-final { scan-tree-dump-not "contiguous" "optimized" } }