--- /dev/null
+! { dg-do compile }
+! { dg-options "-fcoarray=lib" }
+!
+! PR fortran/64771
+!
+! Contributed by Alessandro Fanfarill
+!
+! Reduced version of the full NAS CG benchmark
+!
+
+!-------------------------------------------------------------------------!
+! !
+! N A S P A R A L L E L B E N C H M A R K S 3.3 !
+! !
+! C G !
+! !
+!-------------------------------------------------------------------------!
+! !
+! This benchmark is part of the NAS Parallel Benchmark 3.3 suite. !
+! It is described in NAS Technical Reports 95-020 and 02-007 !
+! !
+! Permission to use, copy, distribute and modify this software !
+! for any purpose with or without fee is hereby granted. We !
+! request, however, that all derived work reference the NAS !
+! Parallel Benchmarks 3.3. This software is provided "as is" !
+! without express or implied warranty. !
+! !
+! Information on NPB 3.3, including the technical report, the !
+! original specifications, source code, results and information !
+! on how to submit new results, is available at: !
+! !
+! http://www.nas.nasa.gov/Software/NPB/ !
+! !
+! Send comments or suggestions to npb@nas.nasa.gov !
+! !
+! NAS Parallel Benchmarks Group !
+! NASA Ames Research Center !
+! Mail Stop: T27A-1 !
+! Moffett Field, CA 94035-1000 !
+! !
+! E-mail: npb@nas.nasa.gov !
+! Fax: (650) 604-3957 !
+! !
+!-------------------------------------------------------------------------!
+
+
+c---------------------------------------------------------------------
+c
+c Authors: M. Yarrow
+c C. Kuszmaul
+c R. F. Van der Wijngaart
+c H. Jin
+c
+c---------------------------------------------------------------------
+
+
+c---------------------------------------------------------------------
+c---------------------------------------------------------------------
+ program cg
+c---------------------------------------------------------------------
+c---------------------------------------------------------------------
+ implicit none
+
+ integer na, nonzer, niter
+ double precision shift, rcond
+ parameter( na=75000,
+ > nonzer=13,
+ > niter=75,
+ > shift=60.,
+ > rcond=1.0d-1 )
+
+
+
+ integer num_proc_rows, num_proc_cols
+ parameter( num_proc_rows = 2, num_proc_cols = 2)
+ integer num_procs
+ parameter( num_procs = num_proc_cols * num_proc_rows )
+
+ integer nz
+ parameter( nz = na*(nonzer+1)/num_procs*(nonzer+1)+nonzer
+ > + na*(nonzer+2+num_procs/256)/num_proc_cols )
+
+ common / partit_size / naa, nzz,
+ > npcols, nprows,
+ > proc_col, proc_row,
+ > firstrow,
+ > lastrow,
+ > firstcol,
+ > lastcol,
+ > exch_proc,
+ > exch_recv_length,
+ > send_start,
+ > send_len
+ integer naa, nzz,
+ > npcols, nprows,
+ > proc_col, proc_row,
+ > firstrow,
+ > lastrow,
+ > firstcol,
+ > lastcol,
+ > exch_proc,
+ > exch_recv_length,
+ > send_start,
+ > send_len
+
+
+ common / main_int_mem / colidx, rowstr,
+ > iv, arow, acol
+ integer colidx(nz), rowstr(na+1),
+ > iv(2*na+1), arow(nz), acol(nz)
+
+
+c---------------------------------
+c Coarray Decalarations
+c---------------------------------
+ double precision v(na+1)[0:*], aelt(nz)[0:*], a(nz)[0:*],
+ > x(na/num_proc_rows+2)[0:*],
+ > z(na/num_proc_rows+2)[0:*],
+ > p(na/num_proc_rows+2)[0:*],
+ > q(na/num_proc_rows+2)[0:*],
+ > r(na/num_proc_rows+2)[0:*],
+ > w(na/num_proc_rows+2)[0:*]
+
+
+ common /urando/ amult, tran
+ double precision amult, tran
+
+
+
+ integer l2npcols
+ integer reduce_exch_proc(num_proc_cols)
+ integer reduce_send_starts(num_proc_cols)
+ integer reduce_send_lengths(num_proc_cols)
+ integer reduce_recv_lengths(num_proc_cols)
+ integer reduce_rrecv_starts(num_proc_cols)
+c---------------------------------
+c Coarray Decalarations
+c---------------------------------
+ integer reduce_recv_starts(num_proc_cols)[0:*]
+
+ integer i, j, k, it, me, nprocs, root
+
+ double precision zeta, randlc
+ external randlc
+ double precision rnorm
+c---------------------------------
+c Coarray Decalarations
+c---------------------------------
+ double precision norm_temp1(2)[0:*], norm_temp2(2)[0:*]
+
+ double precision t, tmax, mflops
+ double precision u(1), umax(1)
+ external timer_read
+ double precision timer_read
+ character class
+ logical verified
+ double precision zeta_verify_value, epsilon, err
+
+c---------------------------------------------------------------------
+c Explicit interface for conj_grad, due to coarray args
+c---------------------------------------------------------------------
+ interface
+
+ subroutine conj_grad ( colidx,
+ > rowstr,
+ > x,
+ > z,
+ > a,
+ > p,
+ > q,
+ > r,
+ > w,
+ > rnorm,
+ > l2npcols,
+ > reduce_exch_proc,
+ > reduce_send_starts,
+ > reduce_send_lengths,
+ > reduce_recv_starts,
+ > reduce_recv_lengths,
+ > reduce_rrecv_starts )
+
+ common / partit_size / naa, nzz,
+ > npcols, nprows,
+ > proc_col, proc_row,
+ > firstrow,
+ > lastrow,
+ > firstcol,
+ > lastcol,
+ > exch_proc,
+ > exch_recv_length,
+ > send_start,
+ > send_len
+
+ integer naa, nzz,
+ > npcols, nprows,
+ > proc_col, proc_row,
+ > firstrow,
+ > lastrow,
+ > firstcol,
+ > lastcol,
+ > exch_proc,
+ > exch_recv_length,
+ > send_start,
+ > send_len
+
+ double precision x(*),
+ > z(*),
+ > a(nzz)
+ integer colidx(nzz), rowstr(naa+1)
+
+ double precision p(*),
+ > q(*)[0:*],
+ > r(*)[0:*],
+ > w(*)[0:*] ! used as work temporary
+
+ integer l2npcols
+ integer reduce_exch_proc(l2npcols)
+ integer reduce_send_starts(l2npcols)
+ integer reduce_send_lengths(l2npcols)
+ integer reduce_recv_starts(l2npcols)[0:*]
+ integer reduce_recv_lengths(l2npcols)
+ integer reduce_rrecv_starts(l2npcols)
+
+ double precision rnorm
+
+ end subroutine
+
+ end interface
+
+c---------------------------------------------------------------------
+c The call to the conjugate gradient routine:
+c---------------------------------------------------------------------
+ call conj_grad ( colidx,
+ > rowstr,
+ > x,
+ > z,
+ > a,
+ > p,
+ > q,
+ > r,
+ > w,
+ > rnorm,
+ > l2npcols,
+ > reduce_exch_proc,
+ > reduce_send_starts,
+ > reduce_send_lengths,
+ > reduce_recv_starts,
+ > reduce_recv_lengths,
+ > reduce_rrecv_starts )
+
+
+ sync all
+
+ end ! end main
+
+c---------------------------------------------------------------------
+c---------------------------------------------------------------------
+ subroutine conj_grad ( colidx,
+ > rowstr,
+ > x,
+ > z,
+ > a,
+ > p,
+ > q,
+ > r,
+ > w,
+ > rnorm,
+ > l2npcols,
+ > reduce_exch_proc,
+ > reduce_send_starts,
+ > reduce_send_lengths,
+ > reduce_recv_starts,
+ > reduce_recv_lengths,
+ > reduce_rrecv_starts )
+c---------------------------------------------------------------------
+c---------------------------------------------------------------------
+
+c---------------------------------------------------------------------
+c Floaging point arrays here are named as in NPB1 spec discussion of
+c CG algorithm
+c---------------------------------------------------------------------
+
+ implicit none
+
+c include 'cafnpb.h'
+
+ common / partit_size / naa, nzz,
+ > npcols, nprows,
+ > proc_col, proc_row,
+ > firstrow,
+ > lastrow,
+ > firstcol,
+ > lastcol,
+ > exch_proc,
+ > exch_recv_length,
+ > send_start,
+ > send_len
+ integer naa, nzz,
+ > npcols, nprows,
+ > proc_col, proc_row,
+ > firstrow,
+ > lastrow,
+ > firstcol,
+ > lastcol,
+ > exch_proc,
+ > exch_recv_length,
+ > send_start,
+ > send_len
+
+
+
+ double precision x(*),
+ > z(*),
+ > a(nzz)
+ integer colidx(nzz), rowstr(naa+1)
+
+ double precision p(*),
+ > q(*)[0:*],
+ > r(*)[0:*],
+ > w(*)[0:*] ! used as work temporary
+
+ integer l2npcols
+ integer reduce_exch_proc(l2npcols)
+ integer reduce_send_starts(l2npcols)
+ integer reduce_send_lengths(l2npcols)
+ integer reduce_recv_starts(l2npcols)[0:*]
+ integer reduce_recv_lengths(l2npcols)
+ integer reduce_rrecv_starts(l2npcols)
+
+ integer recv_start_idx, recv_end_idx, send_start_idx,
+ > send_end_idx, recv_length
+
+ integer i, j, k, ierr
+ integer cgit, cgitmax
+
+ double precision, save :: d[0:*], rho[0:*]
+ double precision sum, rho0, alpha, beta, rnorm
+
+ external timer_read
+ double precision timer_read
+
+ data cgitmax / 25 /
+
+
+ return
+ end ! end of routine conj_grad
+