char *vname=NULL);
static gfc_expr* check_conjg_transpose_variable (gfc_expr *, bool *,
bool *);
+static int call_external_blas (gfc_code **, int *, void *);
static bool has_dimen_vector_ref (gfc_expr *);
static int matmul_temp_args (gfc_code **, int *,void *data);
static int index_interchange (gfc_code **, int*, void *);
/* What sort of matrix we are dealing with when inlining MATMUL. */
-enum matrix_case { none=0, A2B2, A2B1, A1B2, A2B2T, A2TB2 };
+enum matrix_case { none=0, A2B2, A2B1, A1B2, A2B2T, A2TB2, A2TB2T };
/* Keep track of the number of expressions we have inserted so far
using create_var. */
gfc_code_walker (&ns->code, convert_elseif, dummy_expr_callback, NULL);
gfc_code_walker (&ns->code, cfe_code, cfe_expr_0, NULL);
gfc_code_walker (&ns->code, optimize_code, optimize_expr, NULL);
- if (flag_inline_matmul_limit != 0)
+ if (flag_inline_matmul_limit != 0 || flag_external_blas)
{
bool found;
do
gfc_code_walker (&ns->code, matmul_temp_args, dummy_expr_callback,
NULL);
- gfc_code_walker (&ns->code, inline_matmul_assign, dummy_expr_callback,
- NULL);
}
+
+ if (flag_external_blas)
+ gfc_code_walker (&ns->code, call_external_blas, dummy_expr_callback,
+ NULL);
+
+ if (flag_inline_matmul_limit != 0)
+ gfc_code_walker (&ns->code, inline_matmul_assign, dummy_expr_callback,
+ NULL);
}
if (flag_frontend_loop_interchange)
dim is zero-based. */
static gfc_expr *
-get_array_inq_function (gfc_isym_id id, gfc_expr *e, int dim)
+get_array_inq_function (gfc_isym_id id, gfc_expr *e, int dim, int okind = 0)
{
gfc_expr *fcn;
gfc_expr *dim_arg, *kind;
}
dim_arg = gfc_get_int_expr (gfc_default_integer_kind, &e->where, dim);
- kind = gfc_get_int_expr (gfc_default_integer_kind, &e->where,
- gfc_index_integer_kind);
+ if (okind != 0)
+ kind = gfc_get_int_expr (gfc_default_integer_kind, &e->where,
+ okind);
+ else
+ kind = gfc_get_int_expr (gfc_default_integer_kind, &e->where,
+ gfc_index_integer_kind);
ec = gfc_copy_expr (e);
removed by DCE. Only called for rank-two matrices A and B. */
static gfc_code *
-inline_limit_check (gfc_expr *a, gfc_expr *b, enum matrix_case m_case)
+inline_limit_check (gfc_expr *a, gfc_expr *b, int limit)
{
gfc_expr *inline_limit;
gfc_code *if_1, *if_2, *else_2;
gfc_typespec ts;
gfc_expr *cond;
- gcc_assert (m_case == A2B2 || m_case == A2B2T || m_case == A2TB2);
-
/* Calculation is done in real to avoid integer overflow. */
inline_limit = gfc_get_constant_expr (BT_REAL, gfc_default_real_kind,
&a->where);
- mpfr_set_si (inline_limit->value.real, flag_inline_matmul_limit,
- GFC_RND_MODE);
+ mpfr_set_si (inline_limit->value.real, limit, GFC_RND_MODE);
mpfr_pow_ui (inline_limit->value.real, inline_limit->value.real, 3,
GFC_RND_MODE);
get_array_inq_function (GFC_ISYM_SIZE, b, 2));
break;
+ case A2TB2T:
+ /* This can only happen for BLAS, we do not handle that case in
+ inline mamtul. */
+ ar->start[0] = get_array_inq_function (GFC_ISYM_SIZE, a, 2);
+ ar->start[1] = get_array_inq_function (GFC_ISYM_SIZE, b, 1);
+
+ ne1 = build_logical_expr (INTRINSIC_NE,
+ get_array_inq_function (GFC_ISYM_SIZE, c, 1),
+ get_array_inq_function (GFC_ISYM_SIZE, a, 2));
+ ne2 = build_logical_expr (INTRINSIC_NE,
+ get_array_inq_function (GFC_ISYM_SIZE, c, 2),
+ get_array_inq_function (GFC_ISYM_SIZE, b, 1));
+
+ cond = build_logical_expr (INTRINSIC_OR, ne1, ne2);
+ break;
+
default:
gcc_unreachable();
/* Take care of the inline flag. If the limit check evaluates to a
constant, dead code elimination will eliminate the unneeded branch. */
- if (m_case == A2B2 && flag_inline_matmul_limit > 0)
+ if (flag_inline_matmul_limit > 0 && matrix_a->rank == 2
+ && matrix_b->rank == 2)
{
- if_limit = inline_limit_check (matrix_a, matrix_b, m_case);
+ if_limit = inline_limit_check (matrix_a, matrix_b,
+ flag_inline_matmul_limit);
/* Insert the original statement into the else branch. */
if_limit->block->block->next = co;
switch (m_case)
{
case A2B2:
- inline_limit_check (matrix_a, matrix_b, m_case);
u1 = get_size_m1 (matrix_b, 2);
u2 = get_size_m1 (matrix_a, 2);
break;
case A2B2T:
- inline_limit_check (matrix_a, matrix_b, m_case);
u1 = get_size_m1 (matrix_b, 1);
u2 = get_size_m1 (matrix_a, 2);
break;
case A2TB2:
- inline_limit_check (matrix_a, matrix_b, m_case);
u1 = get_size_m1 (matrix_a, 2);
u2 = get_size_m1 (matrix_b, 2);
return 0;
}
+/* Change matmul function calls in the form of
+
+ c = matmul(a,b)
+
+ to the corresponding call to a BLAS routine, if applicable. */
+
+static int
+call_external_blas (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
+ void *data ATTRIBUTE_UNUSED)
+{
+ gfc_code *co, *co_next;
+ gfc_expr *expr1, *expr2;
+ gfc_expr *matrix_a, *matrix_b;
+ gfc_code *if_limit = NULL;
+ gfc_actual_arglist *a, *b;
+ bool conjg_a, conjg_b, transpose_a, transpose_b;
+ gfc_code *call;
+ const char *blas_name;
+ const char *transa, *transb;
+ gfc_expr *c1, *c2, *b1;
+ gfc_actual_arglist *actual, *next;
+ bt type;
+ int kind;
+ enum matrix_case m_case;
+ bool realloc_c;
+ gfc_code **next_code_point;
+
+ /* Many of the tests for inline matmul also apply here. */
+
+ co = *c;
+
+ if (co->op != EXEC_ASSIGN)
+ return 0;
+
+ if (in_where || in_assoc_list)
+ return 0;
+
+ /* The BLOCKS generated for the temporary variables and FORALL don't
+ mix. */
+ if (forall_level > 0)
+ return 0;
+
+ /* For now don't do anything in OpenMP workshare, it confuses
+ its translation, which expects only the allowed statements in there. */
+
+ if (in_omp_workshare)
+ return 0;
+
+ expr1 = co->expr1;
+ expr2 = co->expr2;
+ if (expr2->expr_type != EXPR_FUNCTION
+ || expr2->value.function.isym == NULL
+ || expr2->value.function.isym->id != GFC_ISYM_MATMUL)
+ return 0;
+
+ type = expr2->ts.type;
+ kind = expr2->ts.kind;
+
+ /* Guard against recursion. */
+
+ if (expr2->external_blas)
+ return 0;
+
+ if (type != expr1->ts.type || kind != expr1->ts.kind)
+ return 0;
+
+ if (type == BT_REAL)
+ {
+ if (kind == 4)
+ blas_name = "sgemm";
+ else if (kind == 8)
+ blas_name = "dgemm";
+ else
+ return 0;
+ }
+ else if (type == BT_COMPLEX)
+ {
+ if (kind == 4)
+ blas_name = "cgemm";
+ else if (kind == 8)
+ blas_name = "zgemm";
+ else
+ return 0;
+ }
+ else
+ return 0;
+
+ a = expr2->value.function.actual;
+ if (a->expr->rank != 2)
+ return 0;
+
+ b = a->next;
+ if (b->expr->rank != 2)
+ return 0;
+
+ matrix_a = check_conjg_transpose_variable (a->expr, &conjg_a, &transpose_a);
+ if (matrix_a == NULL)
+ return 0;
+
+ if (transpose_a)
+ {
+ if (conjg_a)
+ transa = "C";
+ else
+ transa = "T";
+ }
+ else
+ transa = "N";
+
+ matrix_b = check_conjg_transpose_variable (b->expr, &conjg_b, &transpose_b);
+ if (matrix_b == NULL)
+ return 0;
+
+ if (transpose_b)
+ {
+ if (conjg_b)
+ transb = "C";
+ else
+ transb = "T";
+ }
+ else
+ transb = "N";
+
+ if (transpose_a)
+ {
+ if (transpose_b)
+ m_case = A2TB2T;
+ else
+ m_case = A2TB2;
+ }
+ else
+ {
+ if (transpose_b)
+ m_case = A2B2T;
+ else
+ m_case = A2B2;
+ }
+
+ current_code = c;
+ inserted_block = NULL;
+ changed_statement = NULL;
+
+ expr2->external_blas = 1;
+
+ /* We do not handle data dependencies yet. */
+ if (gfc_check_dependency (expr1, matrix_a, true)
+ || gfc_check_dependency (expr1, matrix_b, true))
+ return 0;
+
+ /* Generate the if statement and hang it into the tree. */
+ if_limit = inline_limit_check (matrix_a, matrix_b, flag_blas_matmul_limit);
+ co_next = co->next;
+ (*current_code) = if_limit;
+ co->next = NULL;
+ if_limit->block->next = co;
+
+ call = XCNEW (gfc_code);
+ call->loc = co->loc;
+
+ /* Bounds checking - a bit simpler than for inlining since we only
+ have to take care of two-dimensional arrays here. */
+
+ realloc_c = flag_realloc_lhs && gfc_is_reallocatable_lhs (expr1);
+ next_code_point = &(if_limit->block->block->next);
+
+ if (gfc_option.rtcheck & GFC_RTCHECK_BOUNDS)
+ {
+ gfc_code *test;
+ // gfc_expr *a2, *b1, *c1, *c2, *a1, *b2;
+ gfc_expr *c1, *a1, *c2, *b2, *a2;
+ switch (m_case)
+ {
+ case A2B2:
+ b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1);
+ a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2);
+ test = runtime_error_ne (b1, a2, B_ERROR(1));
+ *next_code_point = test;
+ next_code_point = &test->next;
+
+ if (!realloc_c)
+ {
+ c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1);
+ a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1);
+ test = runtime_error_ne (c1, a1, C_ERROR(1));
+ *next_code_point = test;
+ next_code_point = &test->next;
+
+ c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2);
+ b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2);
+ test = runtime_error_ne (c2, b2, C_ERROR(2));
+ *next_code_point = test;
+ next_code_point = &test->next;
+ }
+ break;
+
+ case A2B2T:
+
+ b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2);
+ a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2);
+ /* matrix_b is transposed, hence dimension 1 for the error message. */
+ test = runtime_error_ne (b2, a2, B_ERROR(1));
+ *next_code_point = test;
+ next_code_point = &test->next;
+
+ if (!realloc_c)
+ {
+ c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1);
+ a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1);
+ test = runtime_error_ne (c1, a1, C_ERROR(1));
+ *next_code_point = test;
+ next_code_point = &test->next;
+
+ c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2);
+ b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1);
+ test = runtime_error_ne (c2, b1, C_ERROR(2));
+ *next_code_point = test;
+ next_code_point = &test->next;
+ }
+ break;
+
+ case A2TB2:
+
+ b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1);
+ a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1);
+ test = runtime_error_ne (b1, a1, B_ERROR(1));
+ *next_code_point = test;
+ next_code_point = &test->next;
+
+ if (!realloc_c)
+ {
+ c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1);
+ a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2);
+ test = runtime_error_ne (c1, a2, C_ERROR(1));
+ *next_code_point = test;
+ next_code_point = &test->next;
+
+ c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2);
+ b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2);
+ test = runtime_error_ne (c2, b2, C_ERROR(2));
+ *next_code_point = test;
+ next_code_point = &test->next;
+ }
+ break;
+
+ case A2TB2T:
+ b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2);
+ a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1);
+ test = runtime_error_ne (b2, a1, B_ERROR(1));
+ *next_code_point = test;
+ next_code_point = &test->next;
+
+ if (!realloc_c)
+ {
+ c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1);
+ a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2);
+ test = runtime_error_ne (c1, a2, C_ERROR(1));
+ *next_code_point = test;
+ next_code_point = &test->next;
+
+ c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2);
+ b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1);
+ test = runtime_error_ne (c2, b1, C_ERROR(2));
+ *next_code_point = test;
+ next_code_point = &test->next;
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ /* Handle the reallocation, if needed. */
+
+ if (realloc_c)
+ {
+ gfc_code *lhs_alloc;
+
+ lhs_alloc = matmul_lhs_realloc (expr1, matrix_a, matrix_b, m_case);
+ *next_code_point = lhs_alloc;
+ next_code_point = &lhs_alloc->next;
+ }
+
+ *next_code_point = call;
+ if_limit->next = co_next;
+
+ /* Set up the BLAS call. */
+
+ call->op = EXEC_CALL;
+
+ gfc_get_sym_tree (blas_name, current_ns, &(call->symtree), true);
+ call->symtree->n.sym->attr.subroutine = 1;
+ call->symtree->n.sym->attr.procedure = 1;
+ call->symtree->n.sym->attr.flavor = FL_PROCEDURE;
+ call->resolved_sym = call->symtree->n.sym;
+
+ /* Argument TRANSA. */
+ next = gfc_get_actual_arglist ();
+ next->expr = gfc_get_character_expr (gfc_default_character_kind, &co->loc,
+ transa, 1);
+
+ call->ext.actual = next;
+
+ /* Argument TRANSB. */
+ actual = next;
+ next = gfc_get_actual_arglist ();
+ next->expr = gfc_get_character_expr (gfc_default_character_kind, &co->loc,
+ transb, 1);
+ actual->next = next;
+
+ c1 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (a->expr), 1,
+ gfc_integer_4_kind);
+ c2 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (b->expr), 2,
+ gfc_integer_4_kind);
+
+ b1 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (b->expr), 1,
+ gfc_integer_4_kind);
+
+ /* Argument M. */
+ actual = next;
+ next = gfc_get_actual_arglist ();
+ next->expr = c1;
+ actual->next = next;
+
+ /* Argument N. */
+ actual = next;
+ next = gfc_get_actual_arglist ();
+ next->expr = c2;
+ actual->next = next;
+
+ /* Argument K. */
+ actual = next;
+ next = gfc_get_actual_arglist ();
+ next->expr = b1;
+ actual->next = next;
+
+ /* Argument ALPHA - set to one. */
+ actual = next;
+ next = gfc_get_actual_arglist ();
+ next->expr = gfc_get_constant_expr (type, kind, &co->loc);
+ if (type == BT_REAL)
+ mpfr_set_ui (next->expr->value.real, 1, GFC_RND_MODE);
+ else
+ mpc_set_ui (next->expr->value.complex, 1, GFC_MPC_RND_MODE);
+ actual->next = next;
+
+ /* Argument A. */
+ actual = next;
+ next = gfc_get_actual_arglist ();
+ next->expr = gfc_copy_expr (matrix_a);
+ actual->next = next;
+
+ /* Argument LDA. */
+ actual = next;
+ next = gfc_get_actual_arglist ();
+ next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (matrix_a),
+ 1, gfc_integer_4_kind);
+ actual->next = next;
+
+ /* Argument B. */
+ actual = next;
+ next = gfc_get_actual_arglist ();
+ next->expr = gfc_copy_expr (matrix_b);
+ actual->next = next;
+
+ /* Argument LDB. */
+ actual = next;
+ next = gfc_get_actual_arglist ();
+ next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (matrix_b),
+ 1, gfc_integer_4_kind);
+ actual->next = next;
+
+ /* Argument BETA - set to zero. */
+ actual = next;
+ next = gfc_get_actual_arglist ();
+ next->expr = gfc_get_constant_expr (type, kind, &co->loc);
+ if (type == BT_REAL)
+ mpfr_set_ui (next->expr->value.real, 0, GFC_RND_MODE);
+ else
+ mpc_set_ui (next->expr->value.complex, 0, GFC_MPC_RND_MODE);
+ actual->next = next;
+
+ /* Argument C. */
+
+ actual = next;
+ next = gfc_get_actual_arglist ();
+ next->expr = gfc_copy_expr (expr1);
+ actual->next = next;
+
+ /* Argument LDC. */
+ actual = next;
+ next = gfc_get_actual_arglist ();
+ next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (expr1),
+ 1, gfc_integer_4_kind);
+ actual->next = next;
+
+ return 0;
+}
+
/* Code for index interchange for loops which are grouped together in DO
CONCURRENT or FORALL statements. This is currently only applied if the
--- /dev/null
+! { dg-do compile }
+! { dg-options "-std=legacy" }
+*> \brief \b CGEMM
+*
+* =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+* http://www.netlib.org/lapack/explore-html/
+*
+* Definition:
+* ===========
+*
+* SUBROUTINE CGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
+*
+* .. Scalar Arguments ..
+* COMPLEX ALPHA,BETA
+* INTEGER K,LDA,LDB,LDC,M,N
+* CHARACTER TRANSA,TRANSB
+* ..
+* .. Array Arguments ..
+* COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
+* ..
+*
+*
+*> \par Purpose:
+* =============
+*>
+*> \verbatim
+*>
+*> CGEMM performs one of the matrix-matrix operations
+*>
+*> C := alpha*op( A )*op( B ) + beta*C,
+*>
+*> where op( X ) is one of
+*>
+*> op( X ) = X or op( X ) = X**T or op( X ) = X**H,
+*>
+*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
+*> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
+*> \endverbatim
+*
+* Arguments:
+* ==========
+*
+*> \param[in] TRANSA
+*> \verbatim
+*> TRANSA is CHARACTER*1
+*> On entry, TRANSA specifies the form of op( A ) to be used in
+*> the matrix multiplication as follows:
+*>
+*> TRANSA = 'N' or 'n', op( A ) = A.
+*>
+*> TRANSA = 'T' or 't', op( A ) = A**T.
+*>
+*> TRANSA = 'C' or 'c', op( A ) = A**H.
+*> \endverbatim
+*>
+*> \param[in] TRANSB
+*> \verbatim
+*> TRANSB is CHARACTER*1
+*> On entry, TRANSB specifies the form of op( B ) to be used in
+*> the matrix multiplication as follows:
+*>
+*> TRANSB = 'N' or 'n', op( B ) = B.
+*>
+*> TRANSB = 'T' or 't', op( B ) = B**T.
+*>
+*> TRANSB = 'C' or 'c', op( B ) = B**H.
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*> M is INTEGER
+*> On entry, M specifies the number of rows of the matrix
+*> op( A ) and of the matrix C. M must be at least zero.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*> N is INTEGER
+*> On entry, N specifies the number of columns of the matrix
+*> op( B ) and the number of columns of the matrix C. N must be
+*> at least zero.
+*> \endverbatim
+*>
+*> \param[in] K
+*> \verbatim
+*> K is INTEGER
+*> On entry, K specifies the number of columns of the matrix
+*> op( A ) and the number of rows of the matrix op( B ). K must
+*> be at least zero.
+*> \endverbatim
+*>
+*> \param[in] ALPHA
+*> \verbatim
+*> ALPHA is COMPLEX
+*> On entry, ALPHA specifies the scalar alpha.
+*> \endverbatim
+*>
+*> \param[in] A
+*> \verbatim
+*> A is COMPLEX array, dimension ( LDA, ka ), where ka is
+*> k when TRANSA = 'N' or 'n', and is m otherwise.
+*> Before entry with TRANSA = 'N' or 'n', the leading m by k
+*> part of the array A must contain the matrix A, otherwise
+*> the leading k by m part of the array A must contain the
+*> matrix A.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*> LDA is INTEGER
+*> On entry, LDA specifies the first dimension of A as declared
+*> in the calling (sub) program. When TRANSA = 'N' or 'n' then
+*> LDA must be at least max( 1, m ), otherwise LDA must be at
+*> least max( 1, k ).
+*> \endverbatim
+*>
+*> \param[in] B
+*> \verbatim
+*> B is COMPLEX array, dimension ( LDB, kb ), where kb is
+*> n when TRANSB = 'N' or 'n', and is k otherwise.
+*> Before entry with TRANSB = 'N' or 'n', the leading k by n
+*> part of the array B must contain the matrix B, otherwise
+*> the leading n by k part of the array B must contain the
+*> matrix B.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*> LDB is INTEGER
+*> On entry, LDB specifies the first dimension of B as declared
+*> in the calling (sub) program. When TRANSB = 'N' or 'n' then
+*> LDB must be at least max( 1, k ), otherwise LDB must be at
+*> least max( 1, n ).
+*> \endverbatim
+*>
+*> \param[in] BETA
+*> \verbatim
+*> BETA is COMPLEX
+*> On entry, BETA specifies the scalar beta. When BETA is
+*> supplied as zero then C need not be set on input.
+*> \endverbatim
+*>
+*> \param[in,out] C
+*> \verbatim
+*> C is COMPLEX array, dimension ( LDC, N )
+*> Before entry, the leading m by n part of the array C must
+*> contain the matrix C, except when beta is zero, in which
+*> case C need not be set on entry.
+*> On exit, the array C is overwritten by the m by n matrix
+*> ( alpha*op( A )*op( B ) + beta*C ).
+*> \endverbatim
+*>
+*> \param[in] LDC
+*> \verbatim
+*> LDC is INTEGER
+*> On entry, LDC specifies the first dimension of C as declared
+*> in the calling (sub) program. LDC must be at least
+*> max( 1, m ).
+*> \endverbatim
+*
+* Authors:
+* ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup complex_blas_level3
+*
+*> \par Further Details:
+* =====================
+*>
+*> \verbatim
+*>
+*> Level 3 Blas routine.
+*>
+*> -- Written on 8-February-1989.
+*> Jack Dongarra, Argonne National Laboratory.
+*> Iain Duff, AERE Harwell.
+*> Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*> Sven Hammarling, Numerical Algorithms Group Ltd.
+*> \endverbatim
+*>
+* =====================================================================
+ SUBROUTINE CGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
+*
+* -- Reference BLAS level3 routine (version 3.7.0) --
+* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
+* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+* December 2016
+*
+* .. Scalar Arguments ..
+ COMPLEX ALPHA,BETA
+ INTEGER K,LDA,LDB,LDC,M,N
+ CHARACTER TRANSA,TRANSB
+* ..
+* .. Array Arguments ..
+ COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
+* ..
+*
+* =====================================================================
+*
+* .. External Functions ..
+ LOGICAL LSAME
+ EXTERNAL LSAME
+* ..
+* .. External Subroutines ..
+ EXTERNAL XERBLA
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC CONJG,MAX
+* ..
+* .. Local Scalars ..
+ COMPLEX TEMP
+ INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB
+ LOGICAL CONJA,CONJB,NOTA,NOTB
+* ..
+* .. Parameters ..
+ COMPLEX ONE
+ PARAMETER (ONE= (1.0E+0,0.0E+0))
+ COMPLEX ZERO
+ PARAMETER (ZERO= (0.0E+0,0.0E+0))
+* ..
+*
+* Set NOTA and NOTB as true if A and B respectively are not
+* conjugated or transposed, set CONJA and CONJB as true if A and
+* B respectively are to be transposed but not conjugated and set
+* NROWA, NCOLA and NROWB as the number of rows and columns of A
+* and the number of rows of B respectively.
+*
+ NOTA = LSAME(TRANSA,'N')
+ NOTB = LSAME(TRANSB,'N')
+ CONJA = LSAME(TRANSA,'C')
+ CONJB = LSAME(TRANSB,'C')
+ IF (NOTA) THEN
+ NROWA = M
+ NCOLA = K
+ ELSE
+ NROWA = K
+ NCOLA = M
+ END IF
+ IF (NOTB) THEN
+ NROWB = K
+ ELSE
+ NROWB = N
+ END IF
+*
+* Test the input parameters.
+*
+ INFO = 0
+ IF ((.NOT.NOTA) .AND. (.NOT.CONJA) .AND.
+ + (.NOT.LSAME(TRANSA,'T'))) THEN
+ INFO = 1
+ ELSE IF ((.NOT.NOTB) .AND. (.NOT.CONJB) .AND.
+ + (.NOT.LSAME(TRANSB,'T'))) THEN
+ INFO = 2
+ ELSE IF (M.LT.0) THEN
+ INFO = 3
+ ELSE IF (N.LT.0) THEN
+ INFO = 4
+ ELSE IF (K.LT.0) THEN
+ INFO = 5
+ ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
+ INFO = 8
+ ELSE IF (LDB.LT.MAX(1,NROWB)) THEN
+ INFO = 10
+ ELSE IF (LDC.LT.MAX(1,M)) THEN
+ INFO = 13
+ END IF
+ IF (INFO.NE.0) THEN
+ CALL XERBLA('CGEMM ',INFO)
+ RETURN
+ END IF
+*
+* Quick return if possible.
+*
+ IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
+ + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
+*
+* And when alpha.eq.zero.
+*
+ IF (ALPHA.EQ.ZERO) THEN
+ IF (BETA.EQ.ZERO) THEN
+ DO 20 J = 1,N
+ DO 10 I = 1,M
+ C(I,J) = ZERO
+ 10 CONTINUE
+ 20 CONTINUE
+ ELSE
+ DO 40 J = 1,N
+ DO 30 I = 1,M
+ C(I,J) = BETA*C(I,J)
+ 30 CONTINUE
+ 40 CONTINUE
+ END IF
+ RETURN
+ END IF
+*
+* Start the operations.
+*
+ IF (NOTB) THEN
+ IF (NOTA) THEN
+*
+* Form C := alpha*A*B + beta*C.
+*
+ DO 90 J = 1,N
+ IF (BETA.EQ.ZERO) THEN
+ DO 50 I = 1,M
+ C(I,J) = ZERO
+ 50 CONTINUE
+ ELSE IF (BETA.NE.ONE) THEN
+ DO 60 I = 1,M
+ C(I,J) = BETA*C(I,J)
+ 60 CONTINUE
+ END IF
+ DO 80 L = 1,K
+ TEMP = ALPHA*B(L,J)
+ DO 70 I = 1,M
+ C(I,J) = C(I,J) + TEMP*A(I,L)
+ 70 CONTINUE
+ 80 CONTINUE
+ 90 CONTINUE
+ ELSE IF (CONJA) THEN
+*
+* Form C := alpha*A**H*B + beta*C.
+*
+ DO 120 J = 1,N
+ DO 110 I = 1,M
+ TEMP = ZERO
+ DO 100 L = 1,K
+ TEMP = TEMP + CONJG(A(L,I))*B(L,J)
+ 100 CONTINUE
+ IF (BETA.EQ.ZERO) THEN
+ C(I,J) = ALPHA*TEMP
+ ELSE
+ C(I,J) = ALPHA*TEMP + BETA*C(I,J)
+ END IF
+ 110 CONTINUE
+ 120 CONTINUE
+ ELSE
+*
+* Form C := alpha*A**T*B + beta*C
+*
+ DO 150 J = 1,N
+ DO 140 I = 1,M
+ TEMP = ZERO
+ DO 130 L = 1,K
+ TEMP = TEMP + A(L,I)*B(L,J)
+ 130 CONTINUE
+ IF (BETA.EQ.ZERO) THEN
+ C(I,J) = ALPHA*TEMP
+ ELSE
+ C(I,J) = ALPHA*TEMP + BETA*C(I,J)
+ END IF
+ 140 CONTINUE
+ 150 CONTINUE
+ END IF
+ ELSE IF (NOTA) THEN
+ IF (CONJB) THEN
+*
+* Form C := alpha*A*B**H + beta*C.
+*
+ DO 200 J = 1,N
+ IF (BETA.EQ.ZERO) THEN
+ DO 160 I = 1,M
+ C(I,J) = ZERO
+ 160 CONTINUE
+ ELSE IF (BETA.NE.ONE) THEN
+ DO 170 I = 1,M
+ C(I,J) = BETA*C(I,J)
+ 170 CONTINUE
+ END IF
+ DO 190 L = 1,K
+ TEMP = ALPHA*CONJG(B(J,L))
+ DO 180 I = 1,M
+ C(I,J) = C(I,J) + TEMP*A(I,L)
+ 180 CONTINUE
+ 190 CONTINUE
+ 200 CONTINUE
+ ELSE
+*
+* Form C := alpha*A*B**T + beta*C
+*
+ DO 250 J = 1,N
+ IF (BETA.EQ.ZERO) THEN
+ DO 210 I = 1,M
+ C(I,J) = ZERO
+ 210 CONTINUE
+ ELSE IF (BETA.NE.ONE) THEN
+ DO 220 I = 1,M
+ C(I,J) = BETA*C(I,J)
+ 220 CONTINUE
+ END IF
+ DO 240 L = 1,K
+ TEMP = ALPHA*B(J,L)
+ DO 230 I = 1,M
+ C(I,J) = C(I,J) + TEMP*A(I,L)
+ 230 CONTINUE
+ 240 CONTINUE
+ 250 CONTINUE
+ END IF
+ ELSE IF (CONJA) THEN
+ IF (CONJB) THEN
+*
+* Form C := alpha*A**H*B**H + beta*C.
+*
+ DO 280 J = 1,N
+ DO 270 I = 1,M
+ TEMP = ZERO
+ DO 260 L = 1,K
+ TEMP = TEMP + CONJG(A(L,I))*CONJG(B(J,L))
+ 260 CONTINUE
+ IF (BETA.EQ.ZERO) THEN
+ C(I,J) = ALPHA*TEMP
+ ELSE
+ C(I,J) = ALPHA*TEMP + BETA*C(I,J)
+ END IF
+ 270 CONTINUE
+ 280 CONTINUE
+ ELSE
+*
+* Form C := alpha*A**H*B**T + beta*C
+*
+ DO 310 J = 1,N
+ DO 300 I = 1,M
+ TEMP = ZERO
+ DO 290 L = 1,K
+ TEMP = TEMP + CONJG(A(L,I))*B(J,L)
+ 290 CONTINUE
+ IF (BETA.EQ.ZERO) THEN
+ C(I,J) = ALPHA*TEMP
+ ELSE
+ C(I,J) = ALPHA*TEMP + BETA*C(I,J)
+ END IF
+ 300 CONTINUE
+ 310 CONTINUE
+ END IF
+ ELSE
+ IF (CONJB) THEN
+*
+* Form C := alpha*A**T*B**H + beta*C
+*
+ DO 340 J = 1,N
+ DO 330 I = 1,M
+ TEMP = ZERO
+ DO 320 L = 1,K
+ TEMP = TEMP + A(L,I)*CONJG(B(J,L))
+ 320 CONTINUE
+ IF (BETA.EQ.ZERO) THEN
+ C(I,J) = ALPHA*TEMP
+ ELSE
+ C(I,J) = ALPHA*TEMP + BETA*C(I,J)
+ END IF
+ 330 CONTINUE
+ 340 CONTINUE
+ ELSE
+*
+* Form C := alpha*A**T*B**T + beta*C
+*
+ DO 370 J = 1,N
+ DO 360 I = 1,M
+ TEMP = ZERO
+ DO 350 L = 1,K
+ TEMP = TEMP + A(L,I)*B(J,L)
+ 350 CONTINUE
+ IF (BETA.EQ.ZERO) THEN
+ C(I,J) = ALPHA*TEMP
+ ELSE
+ C(I,J) = ALPHA*TEMP + BETA*C(I,J)
+ END IF
+ 360 CONTINUE
+ 370 CONTINUE
+ END IF
+ END IF
+*
+ RETURN
+*
+* End of CGEMM .
+*
+ END
+
+*> \brief \b LSAME
+*
+* =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+* http://www.netlib.org/lapack/explore-html/
+*
+* Definition:
+* ===========
+*
+* LOGICAL FUNCTION LSAME(CA,CB)
+*
+* .. Scalar Arguments ..
+* CHARACTER CA,CB
+* ..
+*
+*
+*> \par Purpose:
+* =============
+*>
+*> \verbatim
+*>
+*> LSAME returns .TRUE. if CA is the same letter as CB regardless of
+*> case.
+*> \endverbatim
+*
+* Arguments:
+* ==========
+*
+*> \param[in] CA
+*> \verbatim
+*> CA is CHARACTER*1
+*> \endverbatim
+*>
+*> \param[in] CB
+*> \verbatim
+*> CB is CHARACTER*1
+*> CA and CB specify the single characters to be compared.
+*> \endverbatim
+*
+* Authors:
+* ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup aux_blas
+*
+* =====================================================================
+ LOGICAL FUNCTION LSAME(CA,CB)
+*
+* -- Reference BLAS level1 routine (version 3.1) --
+* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
+* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+* December 2016
+*
+* .. Scalar Arguments ..
+ CHARACTER CA,CB
+* ..
+*
+* =====================================================================
+*
+* .. Intrinsic Functions ..
+ INTRINSIC ICHAR
+* ..
+* .. Local Scalars ..
+ INTEGER INTA,INTB,ZCODE
+* ..
+*
+* Test if the characters are equal
+*
+ LSAME = CA .EQ. CB
+ IF (LSAME) RETURN
+*
+* Now test for equivalence if both characters are alphabetic.
+*
+ ZCODE = ICHAR('Z')
+*
+* Use 'Z' rather than 'A' so that ASCII can be detected on Prime
+* machines, on which ICHAR returns a value with bit 8 set.
+* ICHAR('A') on Prime machines returns 193 which is the same as
+* ICHAR('A') on an EBCDIC machine.
+*
+ INTA = ICHAR(CA)
+ INTB = ICHAR(CB)
+*
+ IF (ZCODE.EQ.90 .OR. ZCODE.EQ.122) THEN
+*
+* ASCII is assumed - ZCODE is the ASCII code of either lower or
+* upper case 'Z'.
+*
+ IF (INTA.GE.97 .AND. INTA.LE.122) INTA = INTA - 32
+ IF (INTB.GE.97 .AND. INTB.LE.122) INTB = INTB - 32
+*
+ ELSE IF (ZCODE.EQ.233 .OR. ZCODE.EQ.169) THEN
+*
+* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or
+* upper case 'Z'.
+*
+ IF (INTA.GE.129 .AND. INTA.LE.137 .OR.
+ + INTA.GE.145 .AND. INTA.LE.153 .OR.
+ + INTA.GE.162 .AND. INTA.LE.169) INTA = INTA + 64
+ IF (INTB.GE.129 .AND. INTB.LE.137 .OR.
+ + INTB.GE.145 .AND. INTB.LE.153 .OR.
+ + INTB.GE.162 .AND. INTB.LE.169) INTB = INTB + 64
+*
+ ELSE IF (ZCODE.EQ.218 .OR. ZCODE.EQ.250) THEN
+*
+* ASCII is assumed, on Prime machines - ZCODE is the ASCII code
+* plus 128 of either lower or upper case 'Z'.
+*
+ IF (INTA.GE.225 .AND. INTA.LE.250) INTA = INTA - 32
+ IF (INTB.GE.225 .AND. INTB.LE.250) INTB = INTB - 32
+ END IF
+ LSAME = INTA .EQ. INTB
+*
+* RETURN
+*
+* End of LSAME
+*
+ END
+
+*> \brief \b XERBLA
+*
+* =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+* http://www.netlib.org/lapack/explore-html/
+*
+* Definition:
+* ===========
+*
+* SUBROUTINE XERBLA( SRNAME, INFO )
+*
+* .. Scalar Arguments ..
+* CHARACTER*(*) SRNAME
+* INTEGER INFO
+* ..
+*
+*
+*> \par Purpose:
+* =============
+*>
+*> \verbatim
+*>
+*> XERBLA is an error handler for the LAPACK routines.
+*> It is called by an LAPACK routine if an input parameter has an
+*> invalid value. A message is printed and execution stops.
+*>
+*> Installers may consider modifying the STOP statement in order to
+*> call system-specific exception-handling facilities.
+*> \endverbatim
+*
+* Arguments:
+* ==========
+*
+*> \param[in] SRNAME
+*> \verbatim
+*> SRNAME is CHARACTER*(*)
+*> The name of the routine which called XERBLA.
+*> \endverbatim
+*>
+*> \param[in] INFO
+*> \verbatim
+*> INFO is INTEGER
+*> The position of the invalid parameter in the parameter list
+*> of the calling routine.
+*> \endverbatim
+*
+* Authors:
+* ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup aux_blas
+*
+* =====================================================================
+ SUBROUTINE XERBLA( SRNAME, INFO )
+*
+* -- Reference BLAS level1 routine (version 3.7.0) --
+* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
+* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+* December 2016
+*
+* .. Scalar Arguments ..
+ CHARACTER*(*) SRNAME
+ INTEGER INFO
+* ..
+*
+* =====================================================================
+*
+* .. Intrinsic Functions ..
+ INTRINSIC LEN_TRIM
+* ..
+* .. Executable Statements ..
+*
+ WRITE( *, FMT = 9999 )SRNAME( 1:LEN_TRIM( SRNAME ) ), INFO
+*
+ STOP
+*
+ 9999 FORMAT( ' ** On entry to ', A, ' parameter number ', I2, ' had ',
+ $ 'an illegal value' )
+*
+* End of XERBLA
+*
+ END
+
+*> \brief \b SGEMM
+*
+* =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+* http://www.netlib.org/lapack/explore-html/
+*
+* Definition:
+* ===========
+*
+* SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
+*
+* .. Scalar Arguments ..
+* REAL ALPHA,BETA
+* INTEGER K,LDA,LDB,LDC,M,N
+* CHARACTER TRANSA,TRANSB
+* ..
+* .. Array Arguments ..
+* REAL A(LDA,*),B(LDB,*),C(LDC,*)
+* ..
+*
+*
+*> \par Purpose:
+* =============
+*>
+*> \verbatim
+*>
+*> SGEMM performs one of the matrix-matrix operations
+*>
+*> C := alpha*op( A )*op( B ) + beta*C,
+*>
+*> where op( X ) is one of
+*>
+*> op( X ) = X or op( X ) = X**T,
+*>
+*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
+*> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
+*> \endverbatim
+*
+* Arguments:
+* ==========
+*
+*> \param[in] TRANSA
+*> \verbatim
+*> TRANSA is CHARACTER*1
+*> On entry, TRANSA specifies the form of op( A ) to be used in
+*> the matrix multiplication as follows:
+*>
+*> TRANSA = 'N' or 'n', op( A ) = A.
+*>
+*> TRANSA = 'T' or 't', op( A ) = A**T.
+*>
+*> TRANSA = 'C' or 'c', op( A ) = A**T.
+*> \endverbatim
+*>
+*> \param[in] TRANSB
+*> \verbatim
+*> TRANSB is CHARACTER*1
+*> On entry, TRANSB specifies the form of op( B ) to be used in
+*> the matrix multiplication as follows:
+*>
+*> TRANSB = 'N' or 'n', op( B ) = B.
+*>
+*> TRANSB = 'T' or 't', op( B ) = B**T.
+*>
+*> TRANSB = 'C' or 'c', op( B ) = B**T.
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*> M is INTEGER
+*> On entry, M specifies the number of rows of the matrix
+*> op( A ) and of the matrix C. M must be at least zero.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*> N is INTEGER
+*> On entry, N specifies the number of columns of the matrix
+*> op( B ) and the number of columns of the matrix C. N must be
+*> at least zero.
+*> \endverbatim
+*>
+*> \param[in] K
+*> \verbatim
+*> K is INTEGER
+*> On entry, K specifies the number of columns of the matrix
+*> op( A ) and the number of rows of the matrix op( B ). K must
+*> be at least zero.
+*> \endverbatim
+*>
+*> \param[in] ALPHA
+*> \verbatim
+*> ALPHA is REAL
+*> On entry, ALPHA specifies the scalar alpha.
+*> \endverbatim
+*>
+*> \param[in] A
+*> \verbatim
+*> A is REAL array, dimension ( LDA, ka ), where ka is
+*> k when TRANSA = 'N' or 'n', and is m otherwise.
+*> Before entry with TRANSA = 'N' or 'n', the leading m by k
+*> part of the array A must contain the matrix A, otherwise
+*> the leading k by m part of the array A must contain the
+*> matrix A.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*> LDA is INTEGER
+*> On entry, LDA specifies the first dimension of A as declared
+*> in the calling (sub) program. When TRANSA = 'N' or 'n' then
+*> LDA must be at least max( 1, m ), otherwise LDA must be at
+*> least max( 1, k ).
+*> \endverbatim
+*>
+*> \param[in] B
+*> \verbatim
+*> B is REAL array, dimension ( LDB, kb ), where kb is
+*> n when TRANSB = 'N' or 'n', and is k otherwise.
+*> Before entry with TRANSB = 'N' or 'n', the leading k by n
+*> part of the array B must contain the matrix B, otherwise
+*> the leading n by k part of the array B must contain the
+*> matrix B.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*> LDB is INTEGER
+*> On entry, LDB specifies the first dimension of B as declared
+*> in the calling (sub) program. When TRANSB = 'N' or 'n' then
+*> LDB must be at least max( 1, k ), otherwise LDB must be at
+*> least max( 1, n ).
+*> \endverbatim
+*>
+*> \param[in] BETA
+*> \verbatim
+*> BETA is REAL
+*> On entry, BETA specifies the scalar beta. When BETA is
+*> supplied as zero then C need not be set on input.
+*> \endverbatim
+*>
+*> \param[in,out] C
+*> \verbatim
+*> C is REAL array, dimension ( LDC, N )
+*> Before entry, the leading m by n part of the array C must
+*> contain the matrix C, except when beta is zero, in which
+*> case C need not be set on entry.
+*> On exit, the array C is overwritten by the m by n matrix
+*> ( alpha*op( A )*op( B ) + beta*C ).
+*> \endverbatim
+*>
+*> \param[in] LDC
+*> \verbatim
+*> LDC is INTEGER
+*> On entry, LDC specifies the first dimension of C as declared
+*> in the calling (sub) program. LDC must be at least
+*> max( 1, m ).
+*> \endverbatim
+*
+* Authors:
+* ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup single_blas_level3
+*
+*> \par Further Details:
+* =====================
+*>
+*> \verbatim
+*>
+*> Level 3 Blas routine.
+*>
+*> -- Written on 8-February-1989.
+*> Jack Dongarra, Argonne National Laboratory.
+*> Iain Duff, AERE Harwell.
+*> Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*> Sven Hammarling, Numerical Algorithms Group Ltd.
+*> \endverbatim
+*>
+* =====================================================================
+ SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
+*
+* -- Reference BLAS level3 routine (version 3.7.0) --
+* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
+* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+* December 2016
+*
+* .. Scalar Arguments ..
+ REAL ALPHA,BETA
+ INTEGER K,LDA,LDB,LDC,M,N
+ CHARACTER TRANSA,TRANSB
+* ..
+* .. Array Arguments ..
+ REAL A(LDA,*),B(LDB,*),C(LDC,*)
+* ..
+*
+* =====================================================================
+*
+* .. External Functions ..
+ LOGICAL LSAME
+ EXTERNAL LSAME
+* ..
+* .. External Subroutines ..
+ EXTERNAL XERBLA
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX
+* ..
+* .. Local Scalars ..
+ REAL TEMP
+ INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB
+ LOGICAL NOTA,NOTB
+* ..
+* .. Parameters ..
+ REAL ONE,ZERO
+ PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
+* ..
+*
+* Set NOTA and NOTB as true if A and B respectively are not
+* transposed and set NROWA, NCOLA and NROWB as the number of rows
+* and columns of A and the number of rows of B respectively.
+*
+ NOTA = LSAME(TRANSA,'N')
+ NOTB = LSAME(TRANSB,'N')
+ IF (NOTA) THEN
+ NROWA = M
+ NCOLA = K
+ ELSE
+ NROWA = K
+ NCOLA = M
+ END IF
+ IF (NOTB) THEN
+ NROWB = K
+ ELSE
+ NROWB = N
+ END IF
+*
+* Test the input parameters.
+*
+ INFO = 0
+ IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND.
+ + (.NOT.LSAME(TRANSA,'T'))) THEN
+ INFO = 1
+ ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND.
+ + (.NOT.LSAME(TRANSB,'T'))) THEN
+ INFO = 2
+ ELSE IF (M.LT.0) THEN
+ INFO = 3
+ ELSE IF (N.LT.0) THEN
+ INFO = 4
+ ELSE IF (K.LT.0) THEN
+ INFO = 5
+ ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
+ INFO = 8
+ ELSE IF (LDB.LT.MAX(1,NROWB)) THEN
+ INFO = 10
+ ELSE IF (LDC.LT.MAX(1,M)) THEN
+ INFO = 13
+ END IF
+ IF (INFO.NE.0) THEN
+ CALL XERBLA('SGEMM ',INFO)
+ RETURN
+ END IF
+*
+* Quick return if possible.
+*
+ IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
+ + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
+*
+* And if alpha.eq.zero.
+*
+ IF (ALPHA.EQ.ZERO) THEN
+ IF (BETA.EQ.ZERO) THEN
+ DO 20 J = 1,N
+ DO 10 I = 1,M
+ C(I,J) = ZERO
+ 10 CONTINUE
+ 20 CONTINUE
+ ELSE
+ DO 40 J = 1,N
+ DO 30 I = 1,M
+ C(I,J) = BETA*C(I,J)
+ 30 CONTINUE
+ 40 CONTINUE
+ END IF
+ RETURN
+ END IF
+*
+* Start the operations.
+*
+ IF (NOTB) THEN
+ IF (NOTA) THEN
+*
+* Form C := alpha*A*B + beta*C.
+*
+ DO 90 J = 1,N
+ IF (BETA.EQ.ZERO) THEN
+ DO 50 I = 1,M
+ C(I,J) = ZERO
+ 50 CONTINUE
+ ELSE IF (BETA.NE.ONE) THEN
+ DO 60 I = 1,M
+ C(I,J) = BETA*C(I,J)
+ 60 CONTINUE
+ END IF
+ DO 80 L = 1,K
+ TEMP = ALPHA*B(L,J)
+ DO 70 I = 1,M
+ C(I,J) = C(I,J) + TEMP*A(I,L)
+ 70 CONTINUE
+ 80 CONTINUE
+ 90 CONTINUE
+ ELSE
+*
+* Form C := alpha*A**T*B + beta*C
+*
+ DO 120 J = 1,N
+ DO 110 I = 1,M
+ TEMP = ZERO
+ DO 100 L = 1,K
+ TEMP = TEMP + A(L,I)*B(L,J)
+ 100 CONTINUE
+ IF (BETA.EQ.ZERO) THEN
+ C(I,J) = ALPHA*TEMP
+ ELSE
+ C(I,J) = ALPHA*TEMP + BETA*C(I,J)
+ END IF
+ 110 CONTINUE
+ 120 CONTINUE
+ END IF
+ ELSE
+ IF (NOTA) THEN
+*
+* Form C := alpha*A*B**T + beta*C
+*
+ DO 170 J = 1,N
+ IF (BETA.EQ.ZERO) THEN
+ DO 130 I = 1,M
+ C(I,J) = ZERO
+ 130 CONTINUE
+ ELSE IF (BETA.NE.ONE) THEN
+ DO 140 I = 1,M
+ C(I,J) = BETA*C(I,J)
+ 140 CONTINUE
+ END IF
+ DO 160 L = 1,K
+ TEMP = ALPHA*B(J,L)
+ DO 150 I = 1,M
+ C(I,J) = C(I,J) + TEMP*A(I,L)
+ 150 CONTINUE
+ 160 CONTINUE
+ 170 CONTINUE
+ ELSE
+*
+* Form C := alpha*A**T*B**T + beta*C
+*
+ DO 200 J = 1,N
+ DO 190 I = 1,M
+ TEMP = ZERO
+ DO 180 L = 1,K
+ TEMP = TEMP + A(L,I)*B(J,L)
+ 180 CONTINUE
+ IF (BETA.EQ.ZERO) THEN
+ C(I,J) = ALPHA*TEMP
+ ELSE
+ C(I,J) = ALPHA*TEMP + BETA*C(I,J)
+ END IF
+ 190 CONTINUE
+ 200 CONTINUE
+ END IF
+ END IF
+*
+ RETURN
+*
+* End of SGEMM .
+*
+ END
+
+*> \brief \b DGEMM
+*
+* =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+* http://www.netlib.org/lapack/explore-html/
+*
+* Definition:
+* ===========
+*
+* SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
+*
+* .. Scalar Arguments ..
+* DOUBLE PRECISION ALPHA,BETA
+* INTEGER K,LDA,LDB,LDC,M,N
+* CHARACTER TRANSA,TRANSB
+* ..
+* .. Array Arguments ..
+* DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
+* ..
+*
+*
+*> \par Purpose:
+* =============
+*>
+*> \verbatim
+*>
+*> DGEMM performs one of the matrix-matrix operations
+*>
+*> C := alpha*op( A )*op( B ) + beta*C,
+*>
+*> where op( X ) is one of
+*>
+*> op( X ) = X or op( X ) = X**T,
+*>
+*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
+*> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
+*> \endverbatim
+*
+* Arguments:
+* ==========
+*
+*> \param[in] TRANSA
+*> \verbatim
+*> TRANSA is CHARACTER*1
+*> On entry, TRANSA specifies the form of op( A ) to be used in
+*> the matrix multiplication as follows:
+*>
+*> TRANSA = 'N' or 'n', op( A ) = A.
+*>
+*> TRANSA = 'T' or 't', op( A ) = A**T.
+*>
+*> TRANSA = 'C' or 'c', op( A ) = A**T.
+*> \endverbatim
+*>
+*> \param[in] TRANSB
+*> \verbatim
+*> TRANSB is CHARACTER*1
+*> On entry, TRANSB specifies the form of op( B ) to be used in
+*> the matrix multiplication as follows:
+*>
+*> TRANSB = 'N' or 'n', op( B ) = B.
+*>
+*> TRANSB = 'T' or 't', op( B ) = B**T.
+*>
+*> TRANSB = 'C' or 'c', op( B ) = B**T.
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*> M is INTEGER
+*> On entry, M specifies the number of rows of the matrix
+*> op( A ) and of the matrix C. M must be at least zero.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*> N is INTEGER
+*> On entry, N specifies the number of columns of the matrix
+*> op( B ) and the number of columns of the matrix C. N must be
+*> at least zero.
+*> \endverbatim
+*>
+*> \param[in] K
+*> \verbatim
+*> K is INTEGER
+*> On entry, K specifies the number of columns of the matrix
+*> op( A ) and the number of rows of the matrix op( B ). K must
+*> be at least zero.
+*> \endverbatim
+*>
+*> \param[in] ALPHA
+*> \verbatim
+*> ALPHA is DOUBLE PRECISION.
+*> On entry, ALPHA specifies the scalar alpha.
+*> \endverbatim
+*>
+*> \param[in] A
+*> \verbatim
+*> A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is
+*> k when TRANSA = 'N' or 'n', and is m otherwise.
+*> Before entry with TRANSA = 'N' or 'n', the leading m by k
+*> part of the array A must contain the matrix A, otherwise
+*> the leading k by m part of the array A must contain the
+*> matrix A.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*> LDA is INTEGER
+*> On entry, LDA specifies the first dimension of A as declared
+*> in the calling (sub) program. When TRANSA = 'N' or 'n' then
+*> LDA must be at least max( 1, m ), otherwise LDA must be at
+*> least max( 1, k ).
+*> \endverbatim
+*>
+*> \param[in] B
+*> \verbatim
+*> B is DOUBLE PRECISION array, dimension ( LDB, kb ), where kb is
+*> n when TRANSB = 'N' or 'n', and is k otherwise.
+*> Before entry with TRANSB = 'N' or 'n', the leading k by n
+*> part of the array B must contain the matrix B, otherwise
+*> the leading n by k part of the array B must contain the
+*> matrix B.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*> LDB is INTEGER
+*> On entry, LDB specifies the first dimension of B as declared
+*> in the calling (sub) program. When TRANSB = 'N' or 'n' then
+*> LDB must be at least max( 1, k ), otherwise LDB must be at
+*> least max( 1, n ).
+*> \endverbatim
+*>
+*> \param[in] BETA
+*> \verbatim
+*> BETA is DOUBLE PRECISION.
+*> On entry, BETA specifies the scalar beta. When BETA is
+*> supplied as zero then C need not be set on input.
+*> \endverbatim
+*>
+*> \param[in,out] C
+*> \verbatim
+*> C is DOUBLE PRECISION array, dimension ( LDC, N )
+*> Before entry, the leading m by n part of the array C must
+*> contain the matrix C, except when beta is zero, in which
+*> case C need not be set on entry.
+*> On exit, the array C is overwritten by the m by n matrix
+*> ( alpha*op( A )*op( B ) + beta*C ).
+*> \endverbatim
+*>
+*> \param[in] LDC
+*> \verbatim
+*> LDC is INTEGER
+*> On entry, LDC specifies the first dimension of C as declared
+*> in the calling (sub) program. LDC must be at least
+*> max( 1, m ).
+*> \endverbatim
+*
+* Authors:
+* ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup double_blas_level3
+*
+*> \par Further Details:
+* =====================
+*>
+*> \verbatim
+*>
+*> Level 3 Blas routine.
+*>
+*> -- Written on 8-February-1989.
+*> Jack Dongarra, Argonne National Laboratory.
+*> Iain Duff, AERE Harwell.
+*> Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*> Sven Hammarling, Numerical Algorithms Group Ltd.
+*> \endverbatim
+*>
+* =====================================================================
+ SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
+*
+* -- Reference BLAS level3 routine (version 3.7.0) --
+* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
+* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+* December 2016
+*
+* .. Scalar Arguments ..
+ DOUBLE PRECISION ALPHA,BETA
+ INTEGER K,LDA,LDB,LDC,M,N
+ CHARACTER TRANSA,TRANSB
+* ..
+* .. Array Arguments ..
+ DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
+* ..
+*
+* =====================================================================
+*
+* .. External Functions ..
+ LOGICAL LSAME
+ EXTERNAL LSAME
+* ..
+* .. External Subroutines ..
+ EXTERNAL XERBLA
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX
+* ..
+* .. Local Scalars ..
+ DOUBLE PRECISION TEMP
+ INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB
+ LOGICAL NOTA,NOTB
+* ..
+* .. Parameters ..
+ DOUBLE PRECISION ONE,ZERO
+ PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
+* ..
+*
+* Set NOTA and NOTB as true if A and B respectively are not
+* transposed and set NROWA, NCOLA and NROWB as the number of rows
+* and columns of A and the number of rows of B respectively.
+*
+ NOTA = LSAME(TRANSA,'N')
+ NOTB = LSAME(TRANSB,'N')
+ IF (NOTA) THEN
+ NROWA = M
+ NCOLA = K
+ ELSE
+ NROWA = K
+ NCOLA = M
+ END IF
+ IF (NOTB) THEN
+ NROWB = K
+ ELSE
+ NROWB = N
+ END IF
+*
+* Test the input parameters.
+*
+ INFO = 0
+ IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND.
+ + (.NOT.LSAME(TRANSA,'T'))) THEN
+ INFO = 1
+ ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND.
+ + (.NOT.LSAME(TRANSB,'T'))) THEN
+ INFO = 2
+ ELSE IF (M.LT.0) THEN
+ INFO = 3
+ ELSE IF (N.LT.0) THEN
+ INFO = 4
+ ELSE IF (K.LT.0) THEN
+ INFO = 5
+ ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
+ INFO = 8
+ ELSE IF (LDB.LT.MAX(1,NROWB)) THEN
+ INFO = 10
+ ELSE IF (LDC.LT.MAX(1,M)) THEN
+ INFO = 13
+ END IF
+ IF (INFO.NE.0) THEN
+ CALL XERBLA('DGEMM ',INFO)
+ RETURN
+ END IF
+*
+* Quick return if possible.
+*
+ IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
+ + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
+*
+* And if alpha.eq.zero.
+*
+ IF (ALPHA.EQ.ZERO) THEN
+ IF (BETA.EQ.ZERO) THEN
+ DO 20 J = 1,N
+ DO 10 I = 1,M
+ C(I,J) = ZERO
+ 10 CONTINUE
+ 20 CONTINUE
+ ELSE
+ DO 40 J = 1,N
+ DO 30 I = 1,M
+ C(I,J) = BETA*C(I,J)
+ 30 CONTINUE
+ 40 CONTINUE
+ END IF
+ RETURN
+ END IF
+*
+* Start the operations.
+*
+ IF (NOTB) THEN
+ IF (NOTA) THEN
+*
+* Form C := alpha*A*B + beta*C.
+*
+ DO 90 J = 1,N
+ IF (BETA.EQ.ZERO) THEN
+ DO 50 I = 1,M
+ C(I,J) = ZERO
+ 50 CONTINUE
+ ELSE IF (BETA.NE.ONE) THEN
+ DO 60 I = 1,M
+ C(I,J) = BETA*C(I,J)
+ 60 CONTINUE
+ END IF
+ DO 80 L = 1,K
+ TEMP = ALPHA*B(L,J)
+ DO 70 I = 1,M
+ C(I,J) = C(I,J) + TEMP*A(I,L)
+ 70 CONTINUE
+ 80 CONTINUE
+ 90 CONTINUE
+ ELSE
+*
+* Form C := alpha*A**T*B + beta*C
+*
+ DO 120 J = 1,N
+ DO 110 I = 1,M
+ TEMP = ZERO
+ DO 100 L = 1,K
+ TEMP = TEMP + A(L,I)*B(L,J)
+ 100 CONTINUE
+ IF (BETA.EQ.ZERO) THEN
+ C(I,J) = ALPHA*TEMP
+ ELSE
+ C(I,J) = ALPHA*TEMP + BETA*C(I,J)
+ END IF
+ 110 CONTINUE
+ 120 CONTINUE
+ END IF
+ ELSE
+ IF (NOTA) THEN
+*
+* Form C := alpha*A*B**T + beta*C
+*
+ DO 170 J = 1,N
+ IF (BETA.EQ.ZERO) THEN
+ DO 130 I = 1,M
+ C(I,J) = ZERO
+ 130 CONTINUE
+ ELSE IF (BETA.NE.ONE) THEN
+ DO 140 I = 1,M
+ C(I,J) = BETA*C(I,J)
+ 140 CONTINUE
+ END IF
+ DO 160 L = 1,K
+ TEMP = ALPHA*B(J,L)
+ DO 150 I = 1,M
+ C(I,J) = C(I,J) + TEMP*A(I,L)
+ 150 CONTINUE
+ 160 CONTINUE
+ 170 CONTINUE
+ ELSE
+*
+* Form C := alpha*A**T*B**T + beta*C
+*
+ DO 200 J = 1,N
+ DO 190 I = 1,M
+ TEMP = ZERO
+ DO 180 L = 1,K
+ TEMP = TEMP + A(L,I)*B(J,L)
+ 180 CONTINUE
+ IF (BETA.EQ.ZERO) THEN
+ C(I,J) = ALPHA*TEMP
+ ELSE
+ C(I,J) = ALPHA*TEMP + BETA*C(I,J)
+ END IF
+ 190 CONTINUE
+ 200 CONTINUE
+ END IF
+ END IF
+*
+ RETURN
+*
+* End of DGEMM .
+*
+ END
+
+*> \brief \b ZGEMM
+*
+* =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+* http://www.netlib.org/lapack/explore-html/
+*
+* Definition:
+* ===========
+*
+* SUBROUTINE ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
+*
+* .. Scalar Arguments ..
+* COMPLEX*16 ALPHA,BETA
+* INTEGER K,LDA,LDB,LDC,M,N
+* CHARACTER TRANSA,TRANSB
+* ..
+* .. Array Arguments ..
+* COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
+* ..
+*
+*
+*> \par Purpose:
+* =============
+*>
+*> \verbatim
+*>
+*> ZGEMM performs one of the matrix-matrix operations
+*>
+*> C := alpha*op( A )*op( B ) + beta*C,
+*>
+*> where op( X ) is one of
+*>
+*> op( X ) = X or op( X ) = X**T or op( X ) = X**H,
+*>
+*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
+*> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
+*> \endverbatim
+*
+* Arguments:
+* ==========
+*
+*> \param[in] TRANSA
+*> \verbatim
+*> TRANSA is CHARACTER*1
+*> On entry, TRANSA specifies the form of op( A ) to be used in
+*> the matrix multiplication as follows:
+*>
+*> TRANSA = 'N' or 'n', op( A ) = A.
+*>
+*> TRANSA = 'T' or 't', op( A ) = A**T.
+*>
+*> TRANSA = 'C' or 'c', op( A ) = A**H.
+*> \endverbatim
+*>
+*> \param[in] TRANSB
+*> \verbatim
+*> TRANSB is CHARACTER*1
+*> On entry, TRANSB specifies the form of op( B ) to be used in
+*> the matrix multiplication as follows:
+*>
+*> TRANSB = 'N' or 'n', op( B ) = B.
+*>
+*> TRANSB = 'T' or 't', op( B ) = B**T.
+*>
+*> TRANSB = 'C' or 'c', op( B ) = B**H.
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*> M is INTEGER
+*> On entry, M specifies the number of rows of the matrix
+*> op( A ) and of the matrix C. M must be at least zero.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*> N is INTEGER
+*> On entry, N specifies the number of columns of the matrix
+*> op( B ) and the number of columns of the matrix C. N must be
+*> at least zero.
+*> \endverbatim
+*>
+*> \param[in] K
+*> \verbatim
+*> K is INTEGER
+*> On entry, K specifies the number of columns of the matrix
+*> op( A ) and the number of rows of the matrix op( B ). K must
+*> be at least zero.
+*> \endverbatim
+*>
+*> \param[in] ALPHA
+*> \verbatim
+*> ALPHA is COMPLEX*16
+*> On entry, ALPHA specifies the scalar alpha.
+*> \endverbatim
+*>
+*> \param[in] A
+*> \verbatim
+*> A is COMPLEX*16 array, dimension ( LDA, ka ), where ka is
+*> k when TRANSA = 'N' or 'n', and is m otherwise.
+*> Before entry with TRANSA = 'N' or 'n', the leading m by k
+*> part of the array A must contain the matrix A, otherwise
+*> the leading k by m part of the array A must contain the
+*> matrix A.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*> LDA is INTEGER
+*> On entry, LDA specifies the first dimension of A as declared
+*> in the calling (sub) program. When TRANSA = 'N' or 'n' then
+*> LDA must be at least max( 1, m ), otherwise LDA must be at
+*> least max( 1, k ).
+*> \endverbatim
+*>
+*> \param[in] B
+*> \verbatim
+*> B is COMPLEX*16 array, dimension ( LDB, kb ), where kb is
+*> n when TRANSB = 'N' or 'n', and is k otherwise.
+*> Before entry with TRANSB = 'N' or 'n', the leading k by n
+*> part of the array B must contain the matrix B, otherwise
+*> the leading n by k part of the array B must contain the
+*> matrix B.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*> LDB is INTEGER
+*> On entry, LDB specifies the first dimension of B as declared
+*> in the calling (sub) program. When TRANSB = 'N' or 'n' then
+*> LDB must be at least max( 1, k ), otherwise LDB must be at
+*> least max( 1, n ).
+*> \endverbatim
+*>
+*> \param[in] BETA
+*> \verbatim
+*> BETA is COMPLEX*16
+*> On entry, BETA specifies the scalar beta. When BETA is
+*> supplied as zero then C need not be set on input.
+*> \endverbatim
+*>
+*> \param[in,out] C
+*> \verbatim
+*> C is COMPLEX*16 array, dimension ( LDC, N )
+*> Before entry, the leading m by n part of the array C must
+*> contain the matrix C, except when beta is zero, in which
+*> case C need not be set on entry.
+*> On exit, the array C is overwritten by the m by n matrix
+*> ( alpha*op( A )*op( B ) + beta*C ).
+*> \endverbatim
+*>
+*> \param[in] LDC
+*> \verbatim
+*> LDC is INTEGER
+*> On entry, LDC specifies the first dimension of C as declared
+*> in the calling (sub) program. LDC must be at least
+*> max( 1, m ).
+*> \endverbatim
+*
+* Authors:
+* ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup complex16_blas_level3
+*
+*> \par Further Details:
+* =====================
+*>
+*> \verbatim
+*>
+*> Level 3 Blas routine.
+*>
+*> -- Written on 8-February-1989.
+*> Jack Dongarra, Argonne National Laboratory.
+*> Iain Duff, AERE Harwell.
+*> Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*> Sven Hammarling, Numerical Algorithms Group Ltd.
+*> \endverbatim
+*>
+* =====================================================================
+ SUBROUTINE ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
+*
+* -- Reference BLAS level3 routine (version 3.7.0) --
+* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
+* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+* December 2016
+*
+* .. Scalar Arguments ..
+ COMPLEX*16 ALPHA,BETA
+ INTEGER K,LDA,LDB,LDC,M,N
+ CHARACTER TRANSA,TRANSB
+* ..
+* .. Array Arguments ..
+ COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
+* ..
+*
+* =====================================================================
+*
+* .. External Functions ..
+ LOGICAL LSAME
+ EXTERNAL LSAME
+* ..
+* .. External Subroutines ..
+ EXTERNAL XERBLA
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC DCONJG,MAX
+* ..
+* .. Local Scalars ..
+ COMPLEX*16 TEMP
+ INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB
+ LOGICAL CONJA,CONJB,NOTA,NOTB
+* ..
+* .. Parameters ..
+ COMPLEX*16 ONE
+ PARAMETER (ONE= (1.0D+0,0.0D+0))
+ COMPLEX*16 ZERO
+ PARAMETER (ZERO= (0.0D+0,0.0D+0))
+* ..
+*
+* Set NOTA and NOTB as true if A and B respectively are not
+* conjugated or transposed, set CONJA and CONJB as true if A and
+* B respectively are to be transposed but not conjugated and set
+* NROWA, NCOLA and NROWB as the number of rows and columns of A
+* and the number of rows of B respectively.
+*
+ NOTA = LSAME(TRANSA,'N')
+ NOTB = LSAME(TRANSB,'N')
+ CONJA = LSAME(TRANSA,'C')
+ CONJB = LSAME(TRANSB,'C')
+ IF (NOTA) THEN
+ NROWA = M
+ NCOLA = K
+ ELSE
+ NROWA = K
+ NCOLA = M
+ END IF
+ IF (NOTB) THEN
+ NROWB = K
+ ELSE
+ NROWB = N
+ END IF
+*
+* Test the input parameters.
+*
+ INFO = 0
+ IF ((.NOT.NOTA) .AND. (.NOT.CONJA) .AND.
+ + (.NOT.LSAME(TRANSA,'T'))) THEN
+ INFO = 1
+ ELSE IF ((.NOT.NOTB) .AND. (.NOT.CONJB) .AND.
+ + (.NOT.LSAME(TRANSB,'T'))) THEN
+ INFO = 2
+ ELSE IF (M.LT.0) THEN
+ INFO = 3
+ ELSE IF (N.LT.0) THEN
+ INFO = 4
+ ELSE IF (K.LT.0) THEN
+ INFO = 5
+ ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
+ INFO = 8
+ ELSE IF (LDB.LT.MAX(1,NROWB)) THEN
+ INFO = 10
+ ELSE IF (LDC.LT.MAX(1,M)) THEN
+ INFO = 13
+ END IF
+ IF (INFO.NE.0) THEN
+ CALL XERBLA('ZGEMM ',INFO)
+ RETURN
+ END IF
+*
+* Quick return if possible.
+*
+ IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
+ + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
+*
+* And when alpha.eq.zero.
+*
+ IF (ALPHA.EQ.ZERO) THEN
+ IF (BETA.EQ.ZERO) THEN
+ DO 20 J = 1,N
+ DO 10 I = 1,M
+ C(I,J) = ZERO
+ 10 CONTINUE
+ 20 CONTINUE
+ ELSE
+ DO 40 J = 1,N
+ DO 30 I = 1,M
+ C(I,J) = BETA*C(I,J)
+ 30 CONTINUE
+ 40 CONTINUE
+ END IF
+ RETURN
+ END IF
+*
+* Start the operations.
+*
+ IF (NOTB) THEN
+ IF (NOTA) THEN
+*
+* Form C := alpha*A*B + beta*C.
+*
+ DO 90 J = 1,N
+ IF (BETA.EQ.ZERO) THEN
+ DO 50 I = 1,M
+ C(I,J) = ZERO
+ 50 CONTINUE
+ ELSE IF (BETA.NE.ONE) THEN
+ DO 60 I = 1,M
+ C(I,J) = BETA*C(I,J)
+ 60 CONTINUE
+ END IF
+ DO 80 L = 1,K
+ TEMP = ALPHA*B(L,J)
+ DO 70 I = 1,M
+ C(I,J) = C(I,J) + TEMP*A(I,L)
+ 70 CONTINUE
+ 80 CONTINUE
+ 90 CONTINUE
+ ELSE IF (CONJA) THEN
+*
+* Form C := alpha*A**H*B + beta*C.
+*
+ DO 120 J = 1,N
+ DO 110 I = 1,M
+ TEMP = ZERO
+ DO 100 L = 1,K
+ TEMP = TEMP + DCONJG(A(L,I))*B(L,J)
+ 100 CONTINUE
+ IF (BETA.EQ.ZERO) THEN
+ C(I,J) = ALPHA*TEMP
+ ELSE
+ C(I,J) = ALPHA*TEMP + BETA*C(I,J)
+ END IF
+ 110 CONTINUE
+ 120 CONTINUE
+ ELSE
+*
+* Form C := alpha*A**T*B + beta*C
+*
+ DO 150 J = 1,N
+ DO 140 I = 1,M
+ TEMP = ZERO
+ DO 130 L = 1,K
+ TEMP = TEMP + A(L,I)*B(L,J)
+ 130 CONTINUE
+ IF (BETA.EQ.ZERO) THEN
+ C(I,J) = ALPHA*TEMP
+ ELSE
+ C(I,J) = ALPHA*TEMP + BETA*C(I,J)
+ END IF
+ 140 CONTINUE
+ 150 CONTINUE
+ END IF
+ ELSE IF (NOTA) THEN
+ IF (CONJB) THEN
+*
+* Form C := alpha*A*B**H + beta*C.
+*
+ DO 200 J = 1,N
+ IF (BETA.EQ.ZERO) THEN
+ DO 160 I = 1,M
+ C(I,J) = ZERO
+ 160 CONTINUE
+ ELSE IF (BETA.NE.ONE) THEN
+ DO 170 I = 1,M
+ C(I,J) = BETA*C(I,J)
+ 170 CONTINUE
+ END IF
+ DO 190 L = 1,K
+ TEMP = ALPHA*DCONJG(B(J,L))
+ DO 180 I = 1,M
+ C(I,J) = C(I,J) + TEMP*A(I,L)
+ 180 CONTINUE
+ 190 CONTINUE
+ 200 CONTINUE
+ ELSE
+*
+* Form C := alpha*A*B**T + beta*C
+*
+ DO 250 J = 1,N
+ IF (BETA.EQ.ZERO) THEN
+ DO 210 I = 1,M
+ C(I,J) = ZERO
+ 210 CONTINUE
+ ELSE IF (BETA.NE.ONE) THEN
+ DO 220 I = 1,M
+ C(I,J) = BETA*C(I,J)
+ 220 CONTINUE
+ END IF
+ DO 240 L = 1,K
+ TEMP = ALPHA*B(J,L)
+ DO 230 I = 1,M
+ C(I,J) = C(I,J) + TEMP*A(I,L)
+ 230 CONTINUE
+ 240 CONTINUE
+ 250 CONTINUE
+ END IF
+ ELSE IF (CONJA) THEN
+ IF (CONJB) THEN
+*
+* Form C := alpha*A**H*B**H + beta*C.
+*
+ DO 280 J = 1,N
+ DO 270 I = 1,M
+ TEMP = ZERO
+ DO 260 L = 1,K
+ TEMP = TEMP + DCONJG(A(L,I))*DCONJG(B(J,L))
+ 260 CONTINUE
+ IF (BETA.EQ.ZERO) THEN
+ C(I,J) = ALPHA*TEMP
+ ELSE
+ C(I,J) = ALPHA*TEMP + BETA*C(I,J)
+ END IF
+ 270 CONTINUE
+ 280 CONTINUE
+ ELSE
+*
+* Form C := alpha*A**H*B**T + beta*C
+*
+ DO 310 J = 1,N
+ DO 300 I = 1,M
+ TEMP = ZERO
+ DO 290 L = 1,K
+ TEMP = TEMP + DCONJG(A(L,I))*B(J,L)
+ 290 CONTINUE
+ IF (BETA.EQ.ZERO) THEN
+ C(I,J) = ALPHA*TEMP
+ ELSE
+ C(I,J) = ALPHA*TEMP + BETA*C(I,J)
+ END IF
+ 300 CONTINUE
+ 310 CONTINUE
+ END IF
+ ELSE
+ IF (CONJB) THEN
+*
+* Form C := alpha*A**T*B**H + beta*C
+*
+ DO 340 J = 1,N
+ DO 330 I = 1,M
+ TEMP = ZERO
+ DO 320 L = 1,K
+ TEMP = TEMP + A(L,I)*DCONJG(B(J,L))
+ 320 CONTINUE
+ IF (BETA.EQ.ZERO) THEN
+ C(I,J) = ALPHA*TEMP
+ ELSE
+ C(I,J) = ALPHA*TEMP + BETA*C(I,J)
+ END IF
+ 330 CONTINUE
+ 340 CONTINUE
+ ELSE
+*
+* Form C := alpha*A**T*B**T + beta*C
+*
+ DO 370 J = 1,N
+ DO 360 I = 1,M
+ TEMP = ZERO
+ DO 350 L = 1,K
+ TEMP = TEMP + A(L,I)*B(J,L)
+ 350 CONTINUE
+ IF (BETA.EQ.ZERO) THEN
+ C(I,J) = ALPHA*TEMP
+ ELSE
+ C(I,J) = ALPHA*TEMP + BETA*C(I,J)
+ END IF
+ 360 CONTINUE
+ 370 CONTINUE
+ END IF
+ END IF
+*
+ RETURN
+*
+* End of ZGEMM .
+*
+ END
--- /dev/null
+C { dg-do run }
+C { dg-options "-fcheck=bounds -fdump-tree-optimized -fblas-matmul-limit=1 -O -fexternal-blas" }
+C { dg-additional-sources blas_gemm_routines.f }
+C Test calling of BLAS routines
+
+ program main
+ call sub_s
+ call sub_d
+ call sub_c
+ call sub_z
+ end
+
+ subroutine sub_d
+ implicit none
+ real(8), dimension(3,2) :: a
+ real(8), dimension(2,3) :: at
+ real(8), dimension(2,4) :: b
+ real(8), dimension(4,2) :: bt
+ real(8), dimension(3,4) :: c
+ real(8), dimension(3,4) :: cres
+ real(8), dimension(:,:), allocatable :: c_alloc
+ data a / 2., -3., 5., -7., 11., -13./
+ data b /17., -23., 29., -31., 37., -39., 41., -47./
+ data cres /195., -304., 384., 275., -428., 548., 347., -540.,
+ & 692., 411., -640., 816./
+
+ c = matmul(a,b)
+ if (any (c /= cres)) stop 31
+
+ at = transpose(a)
+ c = (1.2,-2.2)
+ c = matmul(transpose(at), b)
+ if (any (c /= cres)) stop 32
+
+ bt = transpose(b)
+ c = (1.2,-2.1)
+ c = matmul(a, transpose(bt))
+ if (any (c /= cres)) stop 33
+
+ c_alloc = matmul(a,b)
+ if (any (c /= cres)) stop 34
+
+ at = transpose(a)
+ deallocate (c_alloc)
+ c = matmul(transpose(at), b)
+ if (any (c /= cres)) stop 35
+
+ bt = transpose(b)
+ allocate (c_alloc(20,20))
+ c = (1.2,-2.1)
+ c = matmul(a, transpose(bt))
+ if (any (c /= cres)) stop 36
+
+ end
+
+ subroutine sub_s
+ implicit none
+ real, dimension(3,2) :: a
+ real, dimension(2,3) :: at
+ real, dimension(2,4) :: b
+ real, dimension(4,2) :: bt
+ real, dimension(3,4) :: c
+ real, dimension(3,4) :: cres
+ real, dimension(:,:), allocatable :: c_alloc
+ data a / 2., -3., 5., -7., 11., -13./
+ data b /17., -23., 29., -31., 37., -39., 41., -47./
+ data cres /195., -304., 384., 275., -428., 548., 347., -540.,
+ & 692., 411., -640., 816./
+
+ c = matmul(a,b)
+ if (any (c /= cres)) stop 21
+
+ at = transpose(a)
+ c = (1.2,-2.2)
+ c = matmul(transpose(at), b)
+ if (any (c /= cres)) stop 22
+
+ bt = transpose(b)
+ c = (1.2,-2.1)
+ c = matmul(a, transpose(bt))
+ if (any (c /= cres)) stop 23
+
+ c_alloc = matmul(a,b)
+ if (any (c /= cres)) stop 24
+
+ at = transpose(a)
+ deallocate (c_alloc)
+ c = matmul(transpose(at), b)
+ if (any (c /= cres)) stop 25
+
+ bt = transpose(b)
+ allocate (c_alloc(20,20))
+ c = (1.2,-2.1)
+ c = matmul(a, transpose(bt))
+ if (any (c /= cres)) stop 26
+
+ end
+
+ subroutine sub_c
+ implicit none
+ complex, dimension(3,2) :: a
+ complex, dimension(2,3) :: at, ah
+ complex, dimension(2,4) :: b
+ complex, dimension(4,2) :: bt, bh
+ complex, dimension(3,4) :: c
+ complex, dimension(3,4) :: cres
+ complex, dimension(:,:), allocatable :: c_alloc
+
+ data a / (2.,-3.), (-5.,7.), (11.,-13.), (17.,19), (-23., -29),
+ & (-31., 37.)/
+
+ data b / (-41., 43.), (-47., 53.), (-59.,-61.), (-67., 71),
+ & ( 73.,79. ), (83.,-89.), (97.,-101.), (-107.,-109.)/
+ data cres /(-1759.,217.), (2522.,-358.), (-396.,-2376.),
+ & (-2789.,-11.),
+ & (4322.,202.), (-1992.,-4584.), (3485.,3.), (-5408.,-244.),
+ & (2550.,5750.), (143.,-4379.), (-478.,6794.), (7104.,-2952.) /
+
+ c = matmul(a,b)
+ if (any (c /= cres)) stop 1
+
+ at = transpose(a)
+ c = (1.2,-2.2)
+ c = matmul(transpose(at), b)
+ if (any (c /= cres)) stop 2
+
+ bt = transpose(b)
+ c = (1.2,-2.1)
+ c = matmul(a, transpose(bt))
+ if (any (c /= cres)) stop 3
+
+ ah = transpose(conjg(a))
+ c = (1.2,-2.2)
+ c = matmul(conjg(transpose(ah)), b)
+ if (any (c /= cres)) stop 4
+
+ bh = transpose(conjg(b))
+ c = (1.2,-2.2)
+ c = matmul(a, transpose(conjg(bh)))
+ if (any (c /= cres)) stop 5
+
+ c_alloc = matmul(a,b)
+ if (any (c /= cres)) stop 6
+
+ at = transpose(a)
+ deallocate (c_alloc)
+ c = matmul(transpose(at), b)
+ if (any (c /= cres)) stop 7
+
+ bt = transpose(b)
+ allocate (c_alloc(20,20))
+ c = (1.2,-2.1)
+ c = matmul(a, transpose(bt))
+ if (any (c /= cres)) stop 8
+
+ ah = transpose(conjg(a))
+ c = (1.2,-2.2)
+ c = matmul(conjg(transpose(ah)), b)
+ if (any (c /= cres)) stop 9
+
+ deallocate (c_alloc)
+ allocate (c_alloc(0,0))
+ bh = transpose(conjg(b))
+ c = (1.2,-2.2)
+ c = matmul(a, transpose(conjg(bh)))
+ if (any (c /= cres)) stop 10
+
+ end
+
+ subroutine sub_z
+ implicit none
+ complex(8), dimension(3,2) :: a
+ complex(8), dimension(2,3) :: at, ah
+ complex(8), dimension(2,4) :: b
+ complex(8), dimension(4,2) :: bt, bh
+ complex(8), dimension(3,4) :: c
+ complex(8), dimension(3,4) :: cres
+ complex(8), dimension(:,:), allocatable :: c_alloc
+
+ data a / (2.,-3.), (-5._8,7.), (11.,-13.), (17.,19),
+ & (-23., -29), (-31., 37.)/
+
+ data b / (-41., 43.), (-47., 53.), (-59.,-61.), (-67., 71),
+ & ( 73.,79. ), (83.,-89.), (97.,-101.), (-107.,-109.)/
+ data cres /(-1759.,217.), (2522.,-358.), (-396.,-2376.),
+ & (-2789.,-11.),
+ & (4322.,202.), (-1992.,-4584.), (3485.,3.), (-5408.,-244.),
+ & (2550.,5750.), (143.,-4379.), (-478.,6794.), (7104.,-2952.) /
+
+ c = matmul(a,b)
+ if (any (c /= cres)) stop 11
+
+ at = transpose(a)
+ c = (1.2,-2.2)
+ c = matmul(transpose(at), b)
+ if (any (c /= cres)) stop 12
+
+ bt = transpose(b)
+ c = (1.2,-2.1)
+ c = matmul(a, transpose(bt))
+ if (any (c /= cres)) stop 13
+
+ ah = transpose(conjg(a))
+ c = (1.2,-2.2)
+ c = matmul(conjg(transpose(ah)), b)
+ if (any (c /= cres)) stop 14
+
+ bh = transpose(conjg(b))
+ c = (1.2,-2.2)
+ c = matmul(a, transpose(conjg(bh)))
+ if (any (c /= cres)) stop 15
+
+ c_alloc = matmul(a,b)
+ if (any (c /= cres)) stop 16
+
+ at = transpose(a)
+ deallocate (c_alloc)
+ c = matmul(transpose(at), b)
+ if (any (c /= cres)) stop 17
+
+ bt = transpose(b)
+ allocate (c_alloc(20,20))
+ c = (1.2,-2.1)
+ c = matmul(a, transpose(bt))
+ if (any (c /= cres)) stop 18
+
+ ah = transpose(conjg(a))
+ c = (1.2,-2.2)
+ c = matmul(conjg(transpose(ah)), b)
+ if (any (c /= cres)) stop 19
+
+ deallocate (c_alloc)
+ allocate (c_alloc(0,0))
+ bh = transpose(conjg(b))
+ c = (1.2,-2.2)
+ c = matmul(a, transpose(conjg(bh)))
+ if (any (c /= cres)) stop 20
+
+ end
+! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }