static int callback_reduction (gfc_expr **, int *, void *);
static void realloc_strings (gfc_namespace *);
static gfc_expr *create_var (gfc_expr *, const char *vname=NULL);
+static int matmul_to_var_expr (gfc_expr **, int *, void *);
+static int matmul_to_var_code (gfc_code **, int *, void *);
static int inline_matmul_assign (gfc_code **, int *, void *);
static gfc_code * create_do_loop (gfc_expr *, gfc_expr *, gfc_expr *,
locus *, gfc_namespace *,
gfc_code_walker (&ns->code, cfe_code, cfe_expr_0, NULL);
gfc_code_walker (&ns->code, optimize_code, optimize_expr, NULL);
if (flag_inline_matmul_limit != 0)
- gfc_code_walker (&ns->code, inline_matmul_assign, dummy_expr_callback,
- NULL);
-
+ {
+ bool found;
+ do
+ {
+ found = false;
+ gfc_code_walker (&ns->code, matmul_to_var_code, matmul_to_var_expr,
+ (void *) &found);
+ }
+ while (found);
+
+ gfc_code_walker (&ns->code, inline_matmul_assign, dummy_expr_callback,
+ NULL);
+ }
+
/* BLOCKs are handled in the expression walker below. */
for (ns = ns->contained; ns; ns = ns->sibling)
{
/* This selction deals with inlining calls to MATMUL. */
+/* Replace calls to matmul outside of straight assignments with a temporary
+ variable so that later inlining will work. */
+
+static int
+matmul_to_var_expr (gfc_expr **ep, int *walk_subtrees ATTRIBUTE_UNUSED,
+ void *data)
+{
+ gfc_expr *e, *n;
+ bool *found = (bool *) data;
+
+ e = *ep;
+
+ if (e->expr_type != EXPR_FUNCTION
+ || e->value.function.isym == NULL
+ || e->value.function.isym->id != GFC_ISYM_MATMUL)
+ return 0;
+
+ if (forall_level > 0 || iterator_level > 0 || in_omp_workshare
+ || in_where)
+ return 0;
+
+ /* Check if this is already in the form c = matmul(a,b). */
+
+ if ((*current_code)->expr2 == e)
+ return 0;
+
+ n = create_var (e, "matmul");
+
+ /* If create_var is unable to create a variable (for example if
+ -fno-realloc-lhs is in force with a variable that does not have bounds
+ known at compile-time), just return. */
+
+ if (n == NULL)
+ return 0;
+
+ *ep = n;
+ *found = true;
+ return 0;
+}
+
+/* Set current_code and associated variables so that matmul_to_var_expr can
+ work. */
+
+static int
+matmul_to_var_code (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
+ void *data ATTRIBUTE_UNUSED)
+{
+ if (current_code != c)
+ {
+ current_code = c;
+ inserted_block = NULL;
+ changed_statement = NULL;
+ }
+
+ return 0;
+}
+
+
/* Auxiliary function to build and simplify an array inquiry function.
dim is zero-based. */
--- /dev/null
+! { dg-do run }
+! { dg-options "-O -ffrontend-optimize -fdump-tree-optimized" }
+! PR 79930 - missed optimization by not inlining matmul in expressions.
+
+module foo
+ implicit none
+contains
+ subroutine test1
+ ! Test with fixed dimensions
+ real, dimension(3,2) :: a1
+ real, dimension(2,4) :: b1
+ real, dimension(3,4) :: cres1
+ real, dimension(3,3) :: a2
+ real, dimension(3) :: v1, v2
+ real :: r
+ character(len=9*18) :: r1, r2
+ real(kind=8), dimension(3,3) :: a3, b3, c3, d3, res3
+
+ data a1 / 2., -3., 5., -7., 11., -13./
+ data b1 /17., -23., 29., -31., 37., -39., 41., -47./
+ data cres1 /195., -304., 384., 275., -428., 548., 347., -540., 692., 411., -640., 816./
+
+ data a2 / 2., -3., 5., -7., 11., -13., 17., -23., 29./
+ data v1 /-31., 37., -41./
+ data v2 /43., -47., 53./
+
+ data a3/-2.d0, 3.d0, 5.d0, -7.d0, -11.d0, 13.d0, 17.d0, -19.d0, -23.d0/
+ data b3/29.d0, -31.d0, 37.d0, -41.d0, 43.d0, -47.d0, 53.d0, -59.d0, 61.d0/
+ data c3/-67.d0,71.d0, 73.d0, -79.d0, -83.d0, 89.d0, 97.d0, -101.d0, 103.d0/
+ data d3/107.d0, 109.d0, 113.d0, 127.d0, 131.d0, 137.d0, 139.d0, 149.d0, 151.d0/
+ data res3/48476106.d0, -12727087.d0, -68646789.d0, 58682206.d0, -15428737.d0, -83096539.d0,&
+ & 65359710.d0, -17176589.d0, -92551887.d0/
+
+ write (unit=r1, fmt='(12F12.5)') matmul(a1,b1)
+ write (unit=r2, fmt='(12F12.5)') cres1
+ if (r1 /= r2) call abort
+
+ r = dot_product(matmul(a2,v1),v2)
+ if (abs(r+208320) > 1) call abort
+
+ write (unit=r1,fmt='(1P,9E18.10)') matmul(matmul(a3,b3),matmul(c3,d3))
+ write (unit=r2,fmt='(1P,9E18.10)') res3
+ if (r1 /= r2) call abort
+
+ end subroutine test1
+
+ subroutine test2
+ ! Test with dimensions not known at compile-time
+ real, dimension(:,:), allocatable :: a1
+ real, dimension(:,:), allocatable :: b1
+ real, dimension(3,4) :: cres1
+ real, dimension(:,:), allocatable :: a2
+ real, dimension(:), allocatable :: v1, v2
+ real :: r
+ character(len=9*18) :: r1, r2
+ real(kind=8), dimension(3,3) :: a3, b3, c3, d3, res3
+ data cres1 /195., -304., 384., 275., -428., 548., 347., -540., 692., 411., -640., 816./
+ data res3/48476106.d0, -12727087.d0, -68646789.d0, 58682206.d0, -15428737.d0, -83096539.d0,&
+ & 65359710.d0, -17176589.d0, -92551887.d0/
+
+ a1 = reshape([ 2., -3., 5., -7., 11., -13.], [3,2])
+ b1 = reshape([17., -23., 29., -31., 37., -39., 41., -47.],[2,4])
+
+ a2 = reshape([2., -3., 5., -7., 11., -13., 17., -23., 29.],[3,3]);
+ v1 = [-31., 37., -41.]
+ v2 = [43., -47., 53.]
+
+ a3 = reshape([-2.d0, 3.d0, 5.d0, -7.d0, -11.d0, 13.d0, 17.d0, -19.d0, -23.d0], [3,3])
+ b3 = reshape([29.d0, -31.d0, 37.d0, -41.d0, 43.d0, -47.d0, 53.d0, -59.d0, 61.d0], [3,3])
+ c3 = reshape([-67.d0,71.d0, 73.d0, -79.d0, -83.d0, 89.d0, 97.d0, -101.d0, 103.d0], [3,3])
+ d3 = reshape([107.d0, 109.d0, 113.d0, 127.d0, 131.d0, 137.d0, 139.d0, 149.d0, 151.d0],[3,3])
+
+ write (unit=r1, fmt='(12F12.5)') matmul(a1,b1)
+ write (unit=r2, fmt='(12F12.5)') cres1
+ if (r1 /= r2) call abort
+
+ r = dot_product(matmul(a2,v1),v2)
+ if (abs(r+208320) > 1) call abort
+
+ write (unit=r1,fmt='(1P,9E18.10)') matmul(matmul(a3,b3),matmul(c3,d3))
+ write (unit=r2,fmt='(1P,9E18.10)') res3
+ if (r1 /= r2) call abort
+
+ end subroutine test2
+
+end module foo
+
+program main
+ use foo
+ implicit none
+ call test1
+ call test2
+! call test3
+end program main
+! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }