re PR fortran/66094 (Handle transpose(A) in inline matmul)

author Thomas Koenig <tkoenig@gcc.gnu.org>

Wed, 24 May 2017 18:44:35 +0000 (18:44 +0000)

committer Thomas Koenig <tkoenig@gcc.gnu.org>

Wed, 24 May 2017 18:44:35 +0000 (18:44 +0000)
author Thomas Koenig <tkoenig@gcc.gnu.org>
Wed, 24 May 2017 18:44:35 +0000 (18:44 +0000)
committer Thomas Koenig <tkoenig@gcc.gnu.org>
Wed, 24 May 2017 18:44:35 +0000 (18:44 +0000)
diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog

index b3179e073c77dc471bc927b62be3544890cfba0b..760cbe2f4afe5ed6ff678411c732cf14c4a61fb3 100644 (file)
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,10 @@
+2017-05-24  Thomas Koenig  <tkoenig@gcc.gnu.org>
+
+       PR fortran/66094
+       * frontend-passes.c (matrix_case):  Add A2TB2.
+       (inline_limit_check):  Handle MATMUL(TRANSPOSE(A),B)
+       (inline_matmul_assign):  Likewise.
+
  2017-05-23  Thomas Schwinge  <thomas@codesourcery.com>
  
         * openmp.c (OACC_KERNELS_CLAUSES): Add "OMP_CLAUSE_NUM_GANGS",
diff --git a/gcc/fortran/frontend-passes.c b/gcc/fortran/frontend-passes.c

index 32b4e80d515fac9aa74f772de0ff7c338b2944e4..1ea5b5403ce0523b56a85e1dd54a11d826f42efb 100644 (file)
--- a/gcc/fortran/frontend-passes.c
+++ b/gcc/fortran/frontend-passes.c
@@ -112,7 +112,7 @@ static int var_num = 1;
  
  /* What sort of matrix we are dealing with when inlining MATMUL.  */
  
-enum matrix_case { none=0, A2B2, A2B1, A1B2, A2B2T };
+enum matrix_case { none=0, A2B2, A2B1, A1B2, A2B2T, A2TB2 };
  
  /* Keep track of the number of expressions we have inserted so far
     using create_var.  */
@@ -2252,7 +2252,7 @@ inline_limit_check (gfc_expr *a, gfc_expr *b, enum matrix_case m_case)
    gfc_typespec ts;
    gfc_expr *cond;
  
-  gcc_assert (m_case == A2B2 || m_case == A2B2T);
+  gcc_assert (m_case == A2B2 || m_case == A2B2T || m_case == A2TB2);
  
    /* Calculation is done in real to avoid integer overflow.  */
  
@@ -2425,6 +2425,20 @@ matmul_lhs_realloc (gfc_expr *c, gfc_expr *a, gfc_expr *b,
        cond = build_logical_expr (INTRINSIC_OR, ne1, ne2);
        break;
  
+    case A2TB2:
+
+      ar->start[0] = get_array_inq_function (GFC_ISYM_SIZE, a, 2);
+      ar->start[1] = get_array_inq_function (GFC_ISYM_SIZE, b, 2);
+
+      ne1 = build_logical_expr (INTRINSIC_NE,
+                               get_array_inq_function (GFC_ISYM_SIZE, c, 1),
+                               get_array_inq_function (GFC_ISYM_SIZE, a, 2));
+      ne2 = build_logical_expr (INTRINSIC_NE,
+                               get_array_inq_function (GFC_ISYM_SIZE, c, 2),
+                               get_array_inq_function (GFC_ISYM_SIZE, b, 2));
+      cond = build_logical_expr (INTRINSIC_OR, ne1, ne2);
+      break;
+
      case A2B1:
        ar->start[0] = get_array_inq_function (GFC_ISYM_SIZE, a, 1);
        cond = build_logical_expr (INTRINSIC_NE,
@@ -3009,7 +3023,7 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees,
  
    a = expr2->value.function.actual;
    matrix_a = check_conjg_transpose_variable (a->expr, &conjg_a, &transpose_a);
-  if (transpose_a || matrix_a == NULL)
+  if (matrix_a == NULL)
      return 0;
  
    b = a->next;
@@ -3026,27 +3040,36 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees,
        || gfc_check_dependency (expr1, matrix_b, true))
      return 0;
  
+  m_case = none;
    if (matrix_a->rank == 2)
      {
-      if (matrix_b->rank == 1)
-       m_case = A2B1;
+      if (transpose_a)
+       {
+         if (matrix_b->rank == 2 && !transpose_b)
+           m_case = A2TB2;
+       }
        else
         {
-         if (transpose_b)
-           m_case = A2B2T;
-         else
-           m_case = A2B2;
+         if (matrix_b->rank == 1)
+           m_case = A2B1;
+         else /* matrix_b->rank == 2 */
+           {
+             if (transpose_b)
+               m_case = A2B2T;
+             else
+               m_case = A2B2;
+           }
         }
      }
-  else
+  else /* matrix_a->rank == 1 */
      {
-      /* Vector * Transpose(B) not handled yet.  */
-      if (transpose_b)
-       m_case = none;
-      else
-       m_case = A1B2;
+      if (matrix_b->rank == 2)
+       {
+         if (!transpose_b)
+           m_case = A1B2;
+       }
      }
-
+    
    if (m_case == none)
      return 0;
  
@@ -3250,6 +3273,37 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees,
           next_code_point = &test->next;
  
         }
+
+      if (m_case == A2TB2)
+       {
+         c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1);
+         a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2);
+
+         test = runtime_error_ne (c1, a2, "Incorrect extent in return array in "
+                                  "MATMUL intrinsic for dimension 1: "
+                                  "is %ld, should be %ld");
+
+         *next_code_point = test;
+         next_code_point = &test->next;
+
+         c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2);
+         b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2);
+         test = runtime_error_ne (c2, b2, "Incorrect extent in return array in "
+                                  "MATMUL intrinsic for dimension 2: "
+                                  "is %ld, should be %ld");
+         *next_code_point = test;
+         next_code_point = &test->next;
+
+         a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1);
+         b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1);
+
+         test = runtime_error_ne (b1, a1, "Incorrect extent in argument B in "
+                                  "MATMUL intrnisic for dimension 2: "
+                                  "is %ld, should be %ld");
+         *next_code_point = test;
+         next_code_point = &test->next;
+
+       }
      }
  
    *next_code_point = assign_zero;
@@ -3331,6 +3385,39 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees,
  
        break;
  
+    case A2TB2:
+      inline_limit_check (matrix_a, matrix_b, m_case);
+
+      u1 = get_size_m1 (matrix_a, 2);
+      u2 = get_size_m1 (matrix_b, 2);
+      u3 = get_size_m1 (matrix_a, 1);
+
+      do_1 = create_do_loop (gfc_copy_expr (zero), u1, NULL, &co->loc, ns);
+      do_2 = create_do_loop (gfc_copy_expr (zero), u2, NULL, &co->loc, ns);
+      do_3 = create_do_loop (gfc_copy_expr (zero), u3, NULL, &co->loc, ns);
+
+      do_1->block->next = do_2;
+      do_2->block->next = do_3;
+      do_3->block->next = assign_matmul;
+
+      var_1 = do_1->ext.iterator->var;
+      var_2 = do_2->ext.iterator->var;
+      var_3 = do_3->ext.iterator->var;
+
+      list[0] = var_1;
+      list[1] = var_2;
+      cscalar = scalarized_expr (co->expr1, list, 2);
+
+      list[0] = var_3;
+      list[1] = var_1;
+      ascalar = scalarized_expr (matrix_a, list, 2);
+
+      list[0] = var_3;
+      list[1] = var_2;
+      bscalar = scalarized_expr (matrix_b, list, 2);
+
+      break;
+
      case A2B1:
        u1 = get_size_m1 (matrix_b, 1);
        u2 = get_size_m1 (matrix_a, 1);
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index e9c316f6d03d282cdd26ab157dc848c2a00feaea..b6f824b21fafb0136514a6341f903d6c5c75e575 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2017-05-24  Thomas Koenig  <tkoenig@gcc.gnu.org>
+
+       PR fortran/66094
+       * gfortran.dg/inline_matmul_16.f90:  New test.
+
  2017-05-24  Tom de Vries  <tom@codesourcery.com>
  
         * gcc.dg/cpp/19940712-1.c: Reinstate absolute line number.
diff --git a/gcc/testsuite/gfortran.dg/inline_matmul_16.f90 b/gcc/testsuite/gfortran.dg/inline_matmul_16.f90

new file mode 100644 (file)

index 0000000..37dc5ef
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/inline_matmul_16.f90
@@ -0,0 +1,61 @@
+! { dg-do run }
+! { dg-options "-ffrontend-optimize -fdump-tree-optimized -Wrealloc-lhs -finline-matmul-limit=1000 -O" }
+! PR 66094: Check functionality for MATMUL(TRANSPOSE(A),B)) for two-dimensional arrays
+program main
+  implicit none
+  integer, parameter :: n = 3, m=4, cnt=2
+  real, dimension(cnt,n) :: a
+  real, dimension(cnt,m) :: b
+  real, dimension(n,m) :: c, cres
+  real, dimension(:,:), allocatable :: calloc
+  integer :: in, im, icnt
+
+  data a / 2., -3., 5., -7., 11., -13./
+  data b /17., -23., 29., -31., 37., -39., 41., -47./
+  data cres /103.,  246.,  486.,  151.,  362.,  722., &
+             191.,  458.,  914.,  223.,  534., 1062./
+
+  c = matmul(transpose(a),b)
+  if (sum(c-cres)>1e-4) call abort
+  if (sum(c-cres)>1e-4) call abort
+
+  ! Unallocated
+  calloc = matmul(transpose(a),b) ! { dg-warning "Code for reallocating the allocatable array" }
+  if (any(shape(c) /= shape(calloc))) call abort
+  if (sum(calloc-cres)>1e-4) call abort
+  deallocate(calloc)
+
+  ! Allocated to wrong shape
+  allocate (calloc(10,10))
+  calloc = matmul(transpose(a),b) ! { dg-warning "Code for reallocating the allocatable array" }
+  if (any(shape(c) /= shape(calloc))) call abort
+  if (sum(calloc-cres)>1e-4) call abort
+  deallocate(calloc)
+
+  ! cycle through a few test cases...
+  do in=2,10 
+     do im = 2,10
+        do icnt = 2,10
+           block
+             real, dimension(icnt,in) :: a2
+             real, dimension(icnt,im) :: b2
+             real, dimension(in,im) :: c2,cr
+             integer :: i,j,k
+             call random_number(a2)
+             call random_number(b2)
+             c2 = 0
+             do i=1,size(a2,2)
+                do j=1, size(b2,2)
+                   do k=1, size(a2,1)
+                      c2(i,j) = c2(i,j) + a2(k,i) * b2(k,j)
+                   end do
+                end do
+             end do
+             cr = matmul(transpose(a2), b2)
+             if (any(abs(c2-cr) > 1e-4)) call abort
+           end block
+        end do
+     end do
+  end do
+end program main
+! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
author	Thomas Koenig <tkoenig@gcc.gnu.org>
	Wed, 24 May 2017 18:44:35 +0000 (18:44 +0000)
committer	Thomas Koenig <tkoenig@gcc.gnu.org>
	Wed, 24 May 2017 18:44:35 +0000 (18:44 +0000)
gcc/fortran/ChangeLog		patch \| blob \| history
gcc/fortran/frontend-passes.c		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gfortran.dg/inline_matmul_16.f90	[new file with mode: 0644]	patch \| blob