From 610abc0212d728ea9f64309c549e6a37d5504517 Mon Sep 17 00:00:00 2001 From: Thomas Koenig Date: Wed, 24 May 2017 18:44:35 +0000 Subject: [PATCH] re PR fortran/66094 (Handle transpose(A) in inline matmul) 2017-05-24 Thomas Koenig PR fortran/66094 * frontend-passes.c (matrix_case): Add A2TB2. (inline_limit_check): Handle MATMUL(TRANSPOSE(A),B) (inline_matmul_assign): Likewise. 2017-05-24 Thomas Koenig PR fortran/66094 * gfortran.dg/inline_matmul_16.f90: New test. From-SVN: r248425 --- gcc/fortran/ChangeLog | 7 ++ gcc/fortran/frontend-passes.c | 119 +++++++++++++++--- gcc/testsuite/ChangeLog | 5 + .../gfortran.dg/inline_matmul_16.f90 | 61 +++++++++ 4 files changed, 176 insertions(+), 16 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/inline_matmul_16.f90 diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index b3179e073c7..760cbe2f4af 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,10 @@ +2017-05-24 Thomas Koenig + + PR fortran/66094 + * frontend-passes.c (matrix_case): Add A2TB2. + (inline_limit_check): Handle MATMUL(TRANSPOSE(A),B) + (inline_matmul_assign): Likewise. + 2017-05-23 Thomas Schwinge * openmp.c (OACC_KERNELS_CLAUSES): Add "OMP_CLAUSE_NUM_GANGS", diff --git a/gcc/fortran/frontend-passes.c b/gcc/fortran/frontend-passes.c index 32b4e80d515..1ea5b5403ce 100644 --- a/gcc/fortran/frontend-passes.c +++ b/gcc/fortran/frontend-passes.c @@ -112,7 +112,7 @@ static int var_num = 1; /* What sort of matrix we are dealing with when inlining MATMUL. */ -enum matrix_case { none=0, A2B2, A2B1, A1B2, A2B2T }; +enum matrix_case { none=0, A2B2, A2B1, A1B2, A2B2T, A2TB2 }; /* Keep track of the number of expressions we have inserted so far using create_var. */ @@ -2252,7 +2252,7 @@ inline_limit_check (gfc_expr *a, gfc_expr *b, enum matrix_case m_case) gfc_typespec ts; gfc_expr *cond; - gcc_assert (m_case == A2B2 || m_case == A2B2T); + gcc_assert (m_case == A2B2 || m_case == A2B2T || m_case == A2TB2); /* Calculation is done in real to avoid integer overflow. */ @@ -2425,6 +2425,20 @@ matmul_lhs_realloc (gfc_expr *c, gfc_expr *a, gfc_expr *b, cond = build_logical_expr (INTRINSIC_OR, ne1, ne2); break; + case A2TB2: + + ar->start[0] = get_array_inq_function (GFC_ISYM_SIZE, a, 2); + ar->start[1] = get_array_inq_function (GFC_ISYM_SIZE, b, 2); + + ne1 = build_logical_expr (INTRINSIC_NE, + get_array_inq_function (GFC_ISYM_SIZE, c, 1), + get_array_inq_function (GFC_ISYM_SIZE, a, 2)); + ne2 = build_logical_expr (INTRINSIC_NE, + get_array_inq_function (GFC_ISYM_SIZE, c, 2), + get_array_inq_function (GFC_ISYM_SIZE, b, 2)); + cond = build_logical_expr (INTRINSIC_OR, ne1, ne2); + break; + case A2B1: ar->start[0] = get_array_inq_function (GFC_ISYM_SIZE, a, 1); cond = build_logical_expr (INTRINSIC_NE, @@ -3009,7 +3023,7 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees, a = expr2->value.function.actual; matrix_a = check_conjg_transpose_variable (a->expr, &conjg_a, &transpose_a); - if (transpose_a || matrix_a == NULL) + if (matrix_a == NULL) return 0; b = a->next; @@ -3026,27 +3040,36 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees, || gfc_check_dependency (expr1, matrix_b, true)) return 0; + m_case = none; if (matrix_a->rank == 2) { - if (matrix_b->rank == 1) - m_case = A2B1; + if (transpose_a) + { + if (matrix_b->rank == 2 && !transpose_b) + m_case = A2TB2; + } else { - if (transpose_b) - m_case = A2B2T; - else - m_case = A2B2; + if (matrix_b->rank == 1) + m_case = A2B1; + else /* matrix_b->rank == 2 */ + { + if (transpose_b) + m_case = A2B2T; + else + m_case = A2B2; + } } } - else + else /* matrix_a->rank == 1 */ { - /* Vector * Transpose(B) not handled yet. */ - if (transpose_b) - m_case = none; - else - m_case = A1B2; + if (matrix_b->rank == 2) + { + if (!transpose_b) + m_case = A1B2; + } } - + if (m_case == none) return 0; @@ -3250,6 +3273,37 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees, next_code_point = &test->next; } + + if (m_case == A2TB2) + { + c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1); + a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2); + + test = runtime_error_ne (c1, a2, "Incorrect extent in return array in " + "MATMUL intrinsic for dimension 1: " + "is %ld, should be %ld"); + + *next_code_point = test; + next_code_point = &test->next; + + c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2); + b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2); + test = runtime_error_ne (c2, b2, "Incorrect extent in return array in " + "MATMUL intrinsic for dimension 2: " + "is %ld, should be %ld"); + *next_code_point = test; + next_code_point = &test->next; + + a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1); + b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1); + + test = runtime_error_ne (b1, a1, "Incorrect extent in argument B in " + "MATMUL intrnisic for dimension 2: " + "is %ld, should be %ld"); + *next_code_point = test; + next_code_point = &test->next; + + } } *next_code_point = assign_zero; @@ -3331,6 +3385,39 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees, break; + case A2TB2: + inline_limit_check (matrix_a, matrix_b, m_case); + + u1 = get_size_m1 (matrix_a, 2); + u2 = get_size_m1 (matrix_b, 2); + u3 = get_size_m1 (matrix_a, 1); + + do_1 = create_do_loop (gfc_copy_expr (zero), u1, NULL, &co->loc, ns); + do_2 = create_do_loop (gfc_copy_expr (zero), u2, NULL, &co->loc, ns); + do_3 = create_do_loop (gfc_copy_expr (zero), u3, NULL, &co->loc, ns); + + do_1->block->next = do_2; + do_2->block->next = do_3; + do_3->block->next = assign_matmul; + + var_1 = do_1->ext.iterator->var; + var_2 = do_2->ext.iterator->var; + var_3 = do_3->ext.iterator->var; + + list[0] = var_1; + list[1] = var_2; + cscalar = scalarized_expr (co->expr1, list, 2); + + list[0] = var_3; + list[1] = var_1; + ascalar = scalarized_expr (matrix_a, list, 2); + + list[0] = var_3; + list[1] = var_2; + bscalar = scalarized_expr (matrix_b, list, 2); + + break; + case A2B1: u1 = get_size_m1 (matrix_b, 1); u2 = get_size_m1 (matrix_a, 1); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e9c316f6d03..b6f824b21fa 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-05-24 Thomas Koenig + + PR fortran/66094 + * gfortran.dg/inline_matmul_16.f90: New test. + 2017-05-24 Tom de Vries * gcc.dg/cpp/19940712-1.c: Reinstate absolute line number. diff --git a/gcc/testsuite/gfortran.dg/inline_matmul_16.f90 b/gcc/testsuite/gfortran.dg/inline_matmul_16.f90 new file mode 100644 index 00000000000..37dc5ef0ba3 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/inline_matmul_16.f90 @@ -0,0 +1,61 @@ +! { dg-do run } +! { dg-options "-ffrontend-optimize -fdump-tree-optimized -Wrealloc-lhs -finline-matmul-limit=1000 -O" } +! PR 66094: Check functionality for MATMUL(TRANSPOSE(A),B)) for two-dimensional arrays +program main + implicit none + integer, parameter :: n = 3, m=4, cnt=2 + real, dimension(cnt,n) :: a + real, dimension(cnt,m) :: b + real, dimension(n,m) :: c, cres + real, dimension(:,:), allocatable :: calloc + integer :: in, im, icnt + + data a / 2., -3., 5., -7., 11., -13./ + data b /17., -23., 29., -31., 37., -39., 41., -47./ + data cres /103., 246., 486., 151., 362., 722., & + 191., 458., 914., 223., 534., 1062./ + + c = matmul(transpose(a),b) + if (sum(c-cres)>1e-4) call abort + if (sum(c-cres)>1e-4) call abort + + ! Unallocated + calloc = matmul(transpose(a),b) ! { dg-warning "Code for reallocating the allocatable array" } + if (any(shape(c) /= shape(calloc))) call abort + if (sum(calloc-cres)>1e-4) call abort + deallocate(calloc) + + ! Allocated to wrong shape + allocate (calloc(10,10)) + calloc = matmul(transpose(a),b) ! { dg-warning "Code for reallocating the allocatable array" } + if (any(shape(c) /= shape(calloc))) call abort + if (sum(calloc-cres)>1e-4) call abort + deallocate(calloc) + + ! cycle through a few test cases... + do in=2,10 + do im = 2,10 + do icnt = 2,10 + block + real, dimension(icnt,in) :: a2 + real, dimension(icnt,im) :: b2 + real, dimension(in,im) :: c2,cr + integer :: i,j,k + call random_number(a2) + call random_number(b2) + c2 = 0 + do i=1,size(a2,2) + do j=1, size(b2,2) + do k=1, size(a2,1) + c2(i,j) = c2(i,j) + a2(k,i) * b2(k,j) + end do + end do + end do + cr = matmul(transpose(a2), b2) + if (any(abs(c2-cr) > 1e-4)) call abort + end block + end do + end do + end do +end program main +! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } } -- 2.30.2