C     PROGRAM MFLOPS(TAPE6=OUTPUT)
C                             LATEST FILE MODIFICATION DATE:  22 Dec 86
C****************************************************************************
C MEASURES CPU PERFORMANCE RANGE OF THE COMPUTER/COMPILER/COMPUTATION COMPLEX
C****************************************************************************
C                                                                           *
C     L. L. N. L.   F O R T R A N   K E R N E L S:   M F L O P S            *
C                                                                           *
C     These kernels measure  Fortran  numerical  computation rates for a    *
C     spectrum of  CPU-limited  computational  structures.  Mathematical    *
C     through-put is measured  in  units  of  millions of floating-point    *
C     operations executed per second, called Megaflops/sec.                 *
C                                                                           *
C     This program  measures  a realistic  CPU performance range for the    *
C     Fortran programming system  on  a  given day.  The CPU performance    *
C     rates depend  strongly  on  the maturity of the Fortran compiler's    *
C     ability to translate Fortran code into efficient machine code.        *
C                                                                           *
C     [ The CPU hardware  capability  apart  from  compiler maturity (or    *
C     availability), could be measured (or simulated) by programming the    *
C     kernels in assembly  or machine code directly.  These measurements    *
C     can also  serve  as a framework for tracking the maturation of the    *
C     Fortran compiler during system development.]                          *
C                                                                           *
C     While this test evaluates  the  performance of a broad sampling of    *
C     Fortran computations,  it  is not an application program and hence    *
C     it is not a benchmark  per  se.  The performance of benchmarks and    *
C     even workloads,  if  CPU  limited,  could  be roughly estimated by    *
C     choosing appropriate weights  and loop limits for each kernel (see    *
C     Block Data).                                                          *
C                                                                           *
C     Use of this program is granted with the request that a copy of the    *
C     results be sent to  the  author  at the address shown below, to be    *
C     added to  our studies of computer performance.  The timing results    *
C     will be held as  proprietary  data  if  so marked.  In return, you    *
C     will receive a copy of our latest report.                             *
C                                                                           *
C                                                                           *
C          F.H. McMahon    L-35                                             *
C          Lawrence Livermore National Laboratory                           *
C          P.0. Box 808                                                     *
C          Livermore, CA                                                    *
C          94550                                                            *
C                                                                           *
C                                                                           *
C                    (C) Copyright 1983 the Regents of the                  *
C                University of California. All Rights Reserved.             *
C                                                                           *
C               This work was produced under the sponsorship of             *
C                the U.S. Department of Energy. The Government              *
C                       retains certain rights therein.                     *
C                                                                           *
C****************************************************************************
C
C
C
C
C
C
C
C                             DIRECTIONS
C
C  1. We REQUIRE one run of the Fortran kernels as is, that is, with
C     no reprogramming.  Standard product compiler directives may be
C     used for optimization as these do not constitute reprogramming.
C
C     In addition, the vendor may, if so desired, reprogram the kernels to
C     demonstrate high performance hardware features.  Kernels 13,14,23
C     are partially vectorisable and kernels 15,16,24 are vectorisable if
C     re-written. Kernels 5,11,17,19,20 are implicit computations that
C     must not be explicitly vectorised using compiler directives to
C     ignore dependencies.  In any case, compiler listings of the codes
C     actually used should be returned along with the timing results.
C
C  2. For vector processors, we REQUIRE an ALL scalar-compilation run
C     to measure the basic scalar performance range of the processor.
C
C  3. On computers where default single precision is REAL*4 we REQUIRE
C     an additional run with all mantissas.ge.47  i.e. declare all real
C     variables REAL*8 using one of the following declarations in each routine:
C
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
C
C  4. Installation includes verifying or changing the following:
C
C      First :  the I/O output device number= IOU assignment in MAIN.
C      Second:  the definition of function SECOND for CPU time only, and
C               the value of TIC:= minimum cpu clock time(sec) in SIZES.
C      Third :  the definition of function MOD2N in KERNEL
C      Fourth:  the system names Komput, Kontrl, and Kompil in REPORT
C      Fifth :  after checkout set Nruns=7 in SIZES for Standard Benchmark Test
C
C  5. Each kernel's computation is check-summed for easy validation.
C     Verify correct processing using the checksums in subroutine REPORT
C     which were computed setting  Loop= 10*Loop  in subroutine SIZES.
C     Your checksums should compare to the precision used, within round-off.
C
C  6. Verify CPU Time measurements from function SECOND by comparing the clock
C     calibration printout of total CPU time with system or real-time measures.
C     The accuracy of SECOND is also tested using the test routine VERIFY.
C
C  7. On computers with Virtual Storage Systems assure a working-set space
C     larger than the entire program so that page faults are negligible,
C     because we must measure the CPU-limited computation rates.
C     IT IS ALSO NECESSARY to run this test stand-alone, i.e. NO timesharing.
C     In VS Systems a series of runs are needed to show stable CPU timings.
C
C  8. On parallel computer systems which compile vectors or Multi-tasking
C     at the Do-loop level (Micro-tasking) parallelisation of the first
C     DO (on L) in each kernel must be prevented by using a compiler directive
C     or by setting Loop= 1. This outermost DO Loop is merely repitition
C     used to increase timing accuracy and could distort the computation
C     sample if parallelisation is based on this artificial iteration level.
C
C  9. On computers with Cache memories and high resolution CPU clocks we
C     may ask for an addtional run setting repitition Loop= 1 in SIZES,
C     to show un-initialized as well as encached execution rates.
C     Increase the size of array CACHE(in subr. VALUES) from 8192 to cache size
C****************************************************************************
C
C
C
c
C/      PARAMETER( kn= 47, kn2= 95, np= 3, ls= 3*47, krs= 24)
C/      PARAMETER( nk= 47, nl= 3, nr= 8 )
c
      COMMON /ALPHA/ mk,ik,ml,il,Nruns,jr, NPFS(8,3,47)
      DIMENSION  FLOPS(141), TR(141), RATES(141), ID(141)
      DIMENSION  LSPAN(141), WG(141), OSUM (141), TERR(141)
      REAL       SECOND
c
c
              t= SECOND(0.0)
            iou= 6
           OPEN  (UNIT=6, FILE='FLOP.OUT', STATUS='NEW')
cLLNL      call  Q8EBM
cLLNL      call  PFM( 0, iou)
c
c                        Verify Sufficient Loop Size Versus Cpu Clock Accuracy
           CALL  VERIFY( iou)
c
c                        Define control limits:  Nruns(runs), Loop(time), tic,
           CALL  SIZES(-1)
c
c
c                        Run test Nruns times Cpu-limited; I/O is deferred:
      DO 1    k= 1,Nruns
             jr= k
c                        Run test using one of 3 sets of DO-Loop spans:
c                        Set iou Negative to supress all I/O during Cpu timing.
      DO 1    j= 1,ml
             il= j
           tock= TICK( -iou)
c
           CALL  KERNEL
    1 continue
c
c
c
c                        Report timing errors, Mflops statistics:
      DO 2    j= 1,ml
             il= j
           CALL  RESULT( iou,FLOPS,TR,RATES,LSPAN,WG,OSUM,TERR,ID)
c
           CALL  REPORT( iou,   mk,mk,FLOPS,TR,RATES,LSPAN,WG,OSUM,ID)
    2 continue
c
           CALL  REPORT( iou,3*mk,mk,FLOPS,TR,RATES,LSPAN,WG,OSUM,ID)
c
c
              t= SECOND(0.0) - t
          WRITE( iou,9) t
    9    FORMAT( 1H1,//,26H CHECK CLOCK CALIBRATION: ,/,
     .                  18H Total cpu Time = ,e14.5, 5H Sec. )
      STOP
c
c
c
c
c
c
c
c
c
c
c          Subroutine timing of all-scalar execution on CRAY-1:
c
c          Subroutine     Time(%)
c
c          KERNEL         43.46%
c          SUPPLY         21.82%
c          VERIFY         13.12%
c          STATS           8.83%
c          SQRT            1.84%
c          SORDID          1.21%
c          VALUES           .74%
c          SUMO             .47%
c          SIGNAL           .34%
c          IQRANF           .26%
c          STATW            .17%
c
      END
c***********************************************
      BLOCK DATA
C***********************************************
C
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
      DOUBLE PRECISION  SUMS
C
C     l1 :=  param-dimension governs the size of most 1-d arrays
C     l2 :=  param-dimension governs the size of most 2-d arrays
C
C  ISPAN :=  Array of limits for DO loop control in the kernels
C  IPASS :=  Array of limits for multiple pass execution of each kernel
C  FLOPN :=  Array of floating-point operation counts for one pass thru kernel
C     WT :=  Array of weights to average kernel execution rates.
C  SKALE :=  Array of scale factors for SIGNAL data generator.
C   BIAS :=  Array of scale factors for SIGNAL data generator.
C
C    MUL :=  Array of multipliers * FLOPN  for each pass
C    WTP :=  Array of multipliers *    WT  for each pass
C     FR :=  Array of vectorisation fractions in REPORT
C   SUMW :=  Array of quartile weights in REPORT
C     IQ :=  Array of workload weights in REPORT
C   SUMS :=  Array of Verified Checksums of Kernels results: Nruns= 1 and 7.
C
C/      PARAMETER( l1= 1001, l2=  101, l1d= 2*1001 )
C/      PARAMETER( l13=  64, l13h= l13/2, l213= l13+l13h, l813= 8*l13 )
C/      PARAMETER( l14=2048, l16=  75, l416= 4*l16 , l21= 25 )
C
C/      PARAMETER( l1=   27, l2=   15, l1d= 2*1001 )
C/      PARAMETER( l13=   8, l13h= 8/2, l213= 8+4, l813= 8*8 )
C/      PARAMETER( l14=  16, l16= 15, l416= 4*15 , l21= 15)
C
C
C/      PARAMETER( l1=   1001, l2=   101, l1d= 2*1001 )
C/      PARAMETER( l13= 64, l13h= 64/2, l213= 64+32, l813= 8*64 )
C/      PARAMETER( l14= 2048, l16= 75, l416= 4*75 , l21= 25)
C
C/      PARAMETER( kn= 47, kn2= 95, np= 3, ls= 3*47, krs= 24)
C/      PARAMETER( m1= 1001-1, m2= 101-1, m7= 1001-6 )
C
      COMMON /SPACES/ ion,j5,k2,k3,Loop,m,kr,it,n13h,ibuf,
     1                n,n1,n2,n13,n213,n813,n14,n16,n416,n21,nt1,nt2
C
      COMMON /SPACE0/ TIME(47), CSUM(47), WW(47), WT(47), ticks,
     1                FR(9), TERR1(47), SUMW(7), START,
     2              SKALE(47), BIAS(47), WS(95), TOTAL(47), FLOPN(47),
     3                IQ(7), NPF, NPFS1(47)
C
      COMMON /SPACEI/ WTP(3), MUL(3), ISPAN(47,3), IPASS(47,3)
C
      COMMON /PROOF/  SUMS(24,3,2)
C     ****************************************************************
C
      DATA  ( ISPAN(i,1), i= 1,47) /
     : 1001, 101, 1001, 1001, 1001, 64, 995, 100,
     : 101, 101, 1001, 1000, 64, 1001, 101, 75,
     : 101, 100, 101, 1000, 101, 101, 100, 1001, 23*0/
C
C*   : l1, l2, l1, l1, l1, l13, m7, m2,
C*   : l2, l2, l1, m1, l13, l1, l2, l16,
C*   : l2, m2, l2, m1, l21, l2, m2, l1, 23*0/
C
      DATA  ( ISPAN(i,2), i= 1,47) /
     : 101, 101, 101, 101, 101,  32, 101, 100,
     : 101, 101, 101, 100,  32, 101, 101,  40,
     : 101, 100, 101, 100,  50, 101, 100, 101,  23*0/
C
      DATA  ( ISPAN(i,3), i= 1,47) /
     : 27, 15, 27, 27, 27,  8, 21, 14,
     : 15, 15, 27, 26,  8, 27, 15, 15,
     : 15, 14, 15, 26, 20, 15, 14, 27,  23*0/
C
      DATA  ( IPASS(i,1), i= 1,47) /
     :   7, 67,  9, 14, 10,  3,  4, 10, 36, 34, 11, 12,
     :  36,  2,  1, 25, 35,  2, 39,  1,  1, 11,  8,  5,  23*0/
C
      DATA  ( IPASS(i,2), i= 1,47) /
     :   40, 40, 53, 70, 55,  7, 22,  6, 21, 19, 64, 68,
     :   41, 10,  1, 27, 20,  1, 23,  8,  1,  7,  5, 31,  23*0/
C
      DATA  ( IPASS(i,3), i= 1,47) /
     :   28, 46, 37, 38, 40, 21, 20,  9, 26, 25, 46, 48,
     :   31,  8,  1, 14, 26,  2, 28,  7,  1,  8,  7, 23,  23*0/
C
      DATA  (  MUL(i), i= 1,3) / 1, 2, 8 /
      DATA  (  WTP(i), i= 1,3) / 1.0, 2.0, 1.0 /
c
c     The following flop-counts (FLOPN) are required for scalar or serial
c     execution.  The scalar version defines the NECESSARY computation
c     generally, in the absence of proof to the contrary.  The vector
c     or parallel executions are only credited with executing the same
c     necessary computation.  If the parallel methods do more computation
c     than is necessary then the extra flops are not counted as through-put.
c
      DATA  ( FLOPN(i), i= 1,47)
     :     /5., 4., 2., 2., 2., 2., 16., 36., 17., 9., 1., 1.,
     :     7., 11., 33., 7., 9., 44., 6., 26., 2., 17., 11., 1., 23*0.0/
C
      DATA  ( WT(i), i= 1,47) /
     : 1.0,  1.0,  1.0,  1.0,  1.0,  1.0,  1.0,  1.0,
     : 1.0,  1.0,  1.0,  1.0,  1.0,  1.0,  1.0,  1.0,
     : 1.0,  1.0,  1.0,  1.0,  1.0,  1.0,  1.0,  1.0, 23*0.0/
C
C
      DATA  ( SKALE(i), i= 1,47) /
     & 0.100D0, 0.100D0, 0.100D0, 0.100D0, 0.100D0, 0.100D0,
     & 0.100D0, 0.100D0, 0.100D0, 0.100D0, 0.100D0, 0.100D0,
     & 0.100D0, 0.100D0, 0.100D0, 0.100D0, 0.100D0, 0.100D0,
     & 0.100D0, 0.100D0, 0.100D0, 0.100D0, 0.100D0, 0.100D0,
     &       23*0.000D0 /
C
c    : 0.1,  0.1,  0.1,  0.1,  0.1,  0.1,  0.1,  0.1,
c    : 0.1,  0.1,  0.1,  0.1,  0.1,  0.1,  0.1,  0.1,
c    : 0.1,  0.1,  0.1,  0.1,  0.1,  0.1,  0.1,  0.1,  23*0.0/
C
      DATA  ( BIAS(i), i= 1,47) /
     : 0.0,  0.0,  0.0,  0.0,  0.0,  0.0,  0.0,  0.0,
     : 0.0,  0.0,  0.0,  0.0,  0.0,  0.0,  0.0,  0.0,
     : 0.0,  0.0,  0.0,  0.0,  0.0,  0.0,  0.0,  0.0,  23*0.0/
C
      DATA  ( FR(i), i= 1,9) /
     :  0.0, 0.2, 0.4, 0.6, 0.7, 0.8, 0.9, 0.95, 1.0/
C
      DATA  ( SUMW(i), i= 1,7) /
     : 1.0, 0.95, 0.9, 0.8, 0.7, 0.6, 0.5/
C
      DATA  ( IQ(i), i= 1,7) /
     : 1, 2, 1, 2, 1, 2, 1/
C
      DATA  START /0.0/, NPF/0/, ibuf/0/
C
      DATA  ( SUMS(i,1,1), i= 1,24 ) /
     &.5114652693224705D+05,.5150345372943066D+03,.1000742883066623D+02,
     &.5999250595474070D+00,.4548871642388544D+04,.5229095383954675D+13,
     &.6104251075163778D+05,.1501268005627157D+06,.1189443609975085D+06,
     &.7310369784325972D+05,.3342910984950433D+08,.2907141428639174D-04,
     &.4057110454105263D+10,.2982036205992255D+10,.3943816690352311D+05,
     &.2832600000000000D+05,.1114641772903091D+04,.5165625410757306D+05,
     &.5421816960150398D+03,.3040644339317275D+08,.8289464835786202D+07,
     &.2938604376567099D+03,.3549834542446150D+05,.5000000000000000D+03/
c
      DATA  ( SUMS(i,2,1), i= 1,24 ) /
     &.5253344778938000D+03,.5150345372943066D+03,.1009741436579188D+01,
     &.5999250595474070D+00,.4589031939602131D+02,.2693280957416549D+16,
     &.6345586315772524D+03,.1501268005627157D+06,.1189443609975085D+06,
     &.7310369784325972D+05,.3433560531581074D+05,.7127569144561925D-05,
     &.2325318944820836D+10,.3045676741897511D+08,.3943816690352311D+05,
     &.3244100000000000D+05,.1114641772903091D+04,.5165625410757306D+05,
     &.5421816960150398D+03,.3126205178811007D+05,.3986531136462291D+07,
     &.2938604376567099D+03,.3549894609776936D+05,.5000000000000000D+02/
c
      DATA  ( SUMS(i,3,1), i= 1,24 ) /
     &.3855104502494983D+02,.1199847611437483D+02,.2699309089321296D+00,
     &.5999250595474070D+00,.3182615248448271D+01,.8303480073326955D+12,
     &.2845720217638848D+02,.2960543667877649D+04,.2623968460874419D+04,
     &.1651291227698377D+04,.6551162198437113D+03,.1943435981776804D-05,
     &.4755211251524563D+09,.2547733008933910D+07,.1108997288135066D+04,
     &.2577600000000000D+05,.2947368618590713D+02,.9700646212341513D+03,
     &.1268230698051747D+02,.5987713249471801D+03,.2516870081042209D+07,
     &.6109968728264795D+01,.4850340602751675D+03,.1300000000000000D+02/
c
      DATA  ( SUMS(i,1,2), i= 1,7 ) /
     &.2982036205992255D+10,.6118901630090488D+10,.9103526877478772D+10,
     &.1215176334476067D+11,.1519764492169999D+11,.1820312504465359D+11,
     &.2116750694993432D+11/
c
      DATA  ( SUMS(i,2,2), i= 1,7 ) /
     &.3045676741897511D+08,.5718526521576222D+08,.8885029941358330D+08,
     &.1174925822726987D+09,.1501582054695641D+09,.1819691693283694D+09,
     &.2130649341195080D+09/
c
      DATA  ( SUMS(i,3,2), i= 1,7 ) /
     &.2547733008933910D+07,.5131714230651644D+07,.7946120246231201D+07,
     &.1008019578807808D+08,.1269997234773526D+08,.1504905863862026D+08,
     &.1721399839433381D+08/
      END
C
C***********************************************
      SUBROUTINE INDEX
C***********************************************
C
C       MODULE     PURPOSE
C       ------     -----------------------------------------------
C       IQRANF     computes a vector of pseudo-random indices
C
C       KERNEL     executes 24 samples of Fortran computation
C
C       PFM        optional call to system hardware performance monitor
C
C       REPORT     prints timing results
C
C       RESULT     computes execution rates  into pushdown store
C
C       SECOND     cumulative CPU time for task in seconds (MKS units)
C
C       SENSIT     sensitivity analysis of harmonic mean to 49 workloads
C
C       SIGNAL     generates a set of floating-point numbers near 1.0
C
C       SIMD       sensitivity analysis of harmonic mean to SISD/SIMD model
C
C       SIZES      test and set the loop controls before each kernel test
C
C       SORDID     simple sort
C
C       SPACE      sets memory pointers for array variables.  optional.
C
C       STATS      calculates unweighted statistics
C
C       STATW      calculates   weighted statistics
C
C       SUMO       check-sum with ordinal dependency
C
C       SUPPLY     initializes common blocks containing type real arrays.
C
C       TALLY      computes average and minimum Cpu timings and variances.
C
C       TDIGIT     counts lead digits followed by trailing zeroes
C
C       TEST       times, tests, and initializes each kernel test
C
C       TICK       measures timing overhead of subroutine test
C
C       TILE       computes  m-tile value and corresponding index
C
C       VALID      compresses valid timing results
C
C       VALUES     initializes special values
C
C       VERIFY     verifies sufficient Loop size versus cpu clock accuracy
C       ------     -----------------------------------------------
C
C
C
C
c
c  ------------ -------- -------- -------- -------- -------- --------
c  ENTRY LEVELS:   1        2        3        4        5        6
c  ------------ -------- -------- -------- -------- -------- --------
c               MAIN.    SECOND
c                        VERIFY   SECOND
c                                 SIZES
c                                 STATS    SQRT
c                                 TDIGIT   ALOG10
c                        SIZES
c
c                        TICK     TEST     SECOND
c                                          SIZES
c                                          SUMO
c                                          VALUES   SUPPLY   SIGNAL
c                                                   IQRANF   MOD
c                                 VALID
c                                 STATS    SQRT
c                                 IQRANF   MOD
c
c                        KERNEL   SPACE
c                                 SQRT
c                                 EXP
c                                 TEST     SECOND
c                                          SIZES
c                                          SUMO
c                                          VALUES   SUPPLY   SIGNAL
c                                                   IQRANF   MOD
c                        RESULT   TALLY    SIZES
c                                          PAGE
c                                          STATS    SQRT
c                                 ALOG10
c
c                        REPORT   VALID
c                                 MOD
c                                 STATW    SORDID
c                                          TILE
c                                          SQRT
c                                          ALOG10
c                                 PAGE
c                                 SENSIT   VALID
c                                          SORDID
c                                          PAGE
c                                          STATW    SORDID
c                                                   TILE
c                                 SIMD     VALID
c                                          STATW    SORDID
c                                                   TILE
C                        STOP
C
C
C
C
C
C
C
C
C
C
C
C
C
c    ------ ---- ------     -----   ------------------------------------
c    BASE   TYPE CLASS      NAME    GLOSSARY
c    ------ ---- ------     -----   ------------------------------------
c    SPACE0    R Array      BIAS  - scale factors for SIGNAL data generator
c    SPACE0    R Array      CSUM  - checksums of KERNEL result arrays
c    BETA      R Array      CSUMS - sets of CSUM for all test runs
c    BETA      R Array      DOS   - sets of TOTAL flops for all test runs
c    SPACE0    R Array      FLOPN - flop counts for one execution pass
c    BETA      R Array      FOPN  - sets of FLOPN for all test runs
c    SPACE0    R Array      FR    - vectorisation fractions; abscissa for REPOR
c    SPACES    I scalar     ibuf  - flag enables one call to SIGNAL
c    ALPHA     I scalar     ik    - current number of executing kernel
c    ALPHA     I scalar     il    - selects one of three sets of loop spans
c    SPACES    I scalar     ion   - logical I/O unit number for output
c    SPACEI    I Array      IPASS - Loop control limits for multiple-pass loops
c    SPACE0    I Array      IQ    - set of workload weights for REPORT
c    SPACEI    I Array      ISPAN - loop control limits for each kernel
c    SPACES    I scalar     it    - flags timing call to TEST from TICK
c    SPACES    I scalar     j5    - datum in kernel 16
c    ALPHA     I scalar     jr    - current test run number (1 thru 7)
c    SPACES    I scalar     k2    - counter in kernel 16
c    SPACES    I scalar     k3    - counter in kernel 16
c    SPACES    I scalar     kr    - a copy of mk
c    SPACES    I scalar     Loop  - current multiple-pass loop limit in KERNEL
c    SPACES    I scalar     m     - temp integer datum
c    ALPHA     I scalar     mk    - number of kernels to evaluate .LE.24
c    ALPHA     I scalar     ml    - maximum value of il=  3
c    SPACEI    I Array      MUL   - multipliers * IPASS defines Loop
c    SPACES    I scalar     n     - current DO loop limit in KERNEL
c    SPACES    I scalar     n1    - dimension of most 1-D arrays
c    SPACES    I scalar     n13   - dimension used in kernel 13
c    SPACES    I scalar     n13h  - dimension used in kernel 13
c    SPACES    I scalar     n14   - dimension used in kernel 14
c    SPACES    I scalar     n16   - dimension used in kernel 16
c    SPACES    I scalar     n2    - dimension of most 2-D arrays
c    SPACES    I scalar     n21   - dimension used in kernel 21
c    SPACES    I scalar     n213  - dimension used in kernel 21
c    SPACES    I scalar     n416  - dimension used in kernel 16
c    SPACES    I scalar     n813  - dimension used in kernel 13
c    SPACE0    I scalar     npf   - temp integer datum
c    ALPHA     I Array      NPFS  - sets of NPFS1 for all test runs
c    SPACE0    I Array      NPFS1 - number of page-faults for each kernel
c    ALPHA     I scalar     Nruns - number of complete test runs
c    SPACES    I scalar     nt1   - total size of common -SPACE1- words
c    SPACES    I scalar     nt2   - total size of common -SPACE2- words
c    BETA      R Array      SEE   - (i,1,jr,il) sets of TEST overhead times
c    BETA      R Array      SEE   - (i,2,jr,il) sets of csums of SPACE1
c    BETA      R Array      SEE   - (i,3,jr,il) sets of csums of SPACE2
c    SPACE0    R Array      SKALE - scale factors for SIGNAL data generator
c    SPACE0    R scalar     start - temp start time of each kernel
c    PROOF     R Array      SUMS  - sets of verified checksums for all test run
c    SPACE0    R Array      SUMW  - set of quartile weights for REPORT
c    SPACE0    R Array      TERR1 - overhead-time errors for each kernel
c    BETA      R Array      TERRS - sets of TERR1 for all runs
c    BETA      R scalar     tic   - minimum cpu clock time= resolution
c    SPACE0    R scalar     ticks - average overhead time in TEST linkage
c    SPACE0    R Array      TIME  - net execution times for all kernels
c    BETA      R Array      TIMES - sets of TIME for all test runs
c    SPACE0    R Array      TOTAL - total flops computed by each kernel
c    SPACE0    R Array      WS    - unused
c    SPACE0    R Array      WT    - weights for each kernel sample
c    SPACEI    R Array      WTP   - weights for the 3 span-varying passes
c    SPACE0    R Array      WW    - unused
C
C
c  --------- -----------------------------------------------------------------
c   COMMON   Usage
c  --------- -----------------------------------------------------------------
C
C   /ALPHA /
C            VERIFY    TICK      TALLY     SIZES     RESULT    REPORT    KERNEL
C            MAIN.
C   /BASE1 /
C            SUPPLY
C   /BASE2 /
C            SUPPLY
C   /BASER /
C            SUPPLY
C   /BETA  /
C            TICK      TALLY     SIZES     RESULT    REPORT    KERNEL
C   /PROOF /
C            RESULT    BLOCKDATA
C   /SPACE0/
C            VALUES    TICK      TEST      TALLY     SUPPLY    SIZES     RESULT
C            REPORT    KERNEL    BLOCKDATA
C   /SPACE1/
C            VERIFY    VALUES    TICK      TEST      SUPPLY    SPACE     KERNEL
C   /SPACE2/
C            VERIFY    VALUES    TICK      TEST      SUPPLY    SPACE     KERNEL
C   /SPACE3/
C            VALUES
C   /SPACEI/
C            VERIFY    VALUES    TICK      TEST      SIZES     RESULT    REPORT
C            KERNEL    BLOCKDATA
C   /SPACER/
C            VALUES    TICK      TEST      SUPPLY    SIZES     KERNEL
C   /SPACES/
C            VERIFY    VALUES    TICK      TEST      SUPPLY    SIZES     KERNEL
C            BLOCKDATA
c  --------- -----------------------------------------------------------------
      RETURN
      END
C
C
C***************************************
      SUBROUTINE IQRANF( M, Mmin,Mmax, n)
C***********************************************************************
C                                                                      *
c     IQRANF  - computes a vector of psuedo-random indices             *
c               in the domain (Mmin,Mmax)                              *
C                                                                      *
C     M     - result array ,  psuedo-random positive integers          *
C     Mmin  - input integer,  lower bound for random integers          *
C     Mmax  - input integer,  upper bound for random integers          *
C     n     - input integer,  number of results in M.                  *
C                                                                      *
C       M(i)= Mmin + INT( (Mmax-Mmin) * RANF(0))                       *
C                                                                      *
c        CALL IQRANF( IX, 1,1001, 30)      should produce in IX:       *
c           3  674  435  415  389   54   44  790  900  282             *
c         177  971  728  851  687  604  815  971  155  112             *
c         877  814  779  192  619  894  544  404  496  505  ...        *
C                                                                      *
C***********************************************************************
C
cANSI IMPLICIT  DOUBLE PRECISION (A-H,K,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,K,O-Z)
      DOUBLE PRECISION  dq, dp, per, dk, spin, span
C
      dimension  M(n)
      save k
            IF( n.LE.0 ) RETURN
         inset= Mmin
          span= Mmax - Mmin
c         spin= 16807.00D0
c          per= 2147483647.00D0
          spin= 16807
           per= 2147483647
         realn= n
         scale= 1.0000100
             q= scale*(span/realn)
C
            dk= k
      DO  1  i= 1,n
            dp= dk*spin
c           dk=    DMOD( dp, per)
            dk= dp -INT( dp/per)*per
            dq= dk*span
          M(i)= inset + ( dq/ per)
            IF( M(i).LT.Mmin .OR. M(i).GT.Mmax )  M(i)= inset + i*q
    1 continue
             k= dk
C
C
ciC     double precision  k, ip, iq, id
ci         inset= Mmin
ci         ispan= Mmax - Mmin
ci         ispin= 16807
ci            id= 2147483647
ci             q= (REAL(ispan)/REAL(n))*1.00001
ciC
ci      DO  2  i= 1,n
ci            ip= k*ispin
ci             k=      MOD( ip, id)
ci            iq= k*ispan
ci          M(i)= inset + ( iq/ id)
ci            IF( M(i).LT.Mmin .OR. M(i).GT.Mmax )  M(i)= inset + i*q
ci    2 continue
C
      RETURN
      DATA  k /256/
      END
C***********************************************
      SUBROUTINE KERNEL
C***********************************************************************
C                                                                      *
C            KERNEL     executes 24 samples of Fortran computation     *
C                                                                      *
C***********************************************************************
C                                                                      *
C     L. L. N. L.   F O R T R A N   K E R N E L S:   M F L O P S       *
C                                                                      *
C     These kernels measure  Fortran  numerical computation            *
C     rates for  a  spectrum  of  cpu-limited computational            *
C     structures or benchmarks.   Mathematical  through-put            *
C     is measured  in  units  of millions of floating-point            *
C     operations executed per second, called Megaflops/sec.            *
C                                                                      *
C     Fonzi's Law: There is not now and there never will be a language *
C                  in which it is the least bit difficult to write     *
C                  bad programs.                                       *
C                                                    F.H.MCMAHON  1972 *
C***********************************************************************
C
C     l1 :=  param-dimension governs the size of most 1-d arrays
C     l2 :=  param-dimension governs the size of most 2-d arrays
C
C     Loop :=  multiple pass control to execute kernel long enough to time.
C     n  :=  DO loop control for each kernel.  Controls are set in subr. SIZES
C
C     ******************************************************************
C
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
C
C/      PARAMETER( l1= 1001, l2=  101, l1d= 2*1001 )
C/      PARAMETER( l13=  64, l13h= l13/2, l213= l13+l13h, l813= 8*l13 )
C/      PARAMETER( l14=2048, l16=  75, l416= 4*l16 , l21= 25 )
C/      PARAMETER( kn= 47, kn2= 95, np= 3, ls= 3*47, krs= 24)
C
C
C/      PARAMETER( nk= 47, nl= 3, nr= 8 )
C
      COMMON /ALPHA/ mk,ik,ml,il,Nruns,jr, NPFS(8,3,47)
      COMMON /BETA / tic, TIMES(8,3,47), SEE(5,3,8,3),
     1              TERRS(8,3,47), CSUMS(8,3,47),
     2              FOPN(8,3,47), DOS(8,3,47)
C
      COMMON /SPACES/ ion,j5,k2,k3,Loop,m,kr,it,n13h,ibuf,
     1                n,n1,n2,n13,n213,n813,n14,n16,n416,n21,nt1,nt2
C
      COMMON /SPACER/ A11,A12,A13,A21,A22,A23,A31,A32,A33,
     2                AR,BR,C0,CR,DI,DK,
     3  DM22,DM23,DM24,DM25,DM26,DM27,DM28,DN,E3,E6,EXPMAX,FLX,
     4  Q,QA,R,RI,S,SCALE,SIG,STB5,T,XNC,XNEI,XNM
C
      COMMON /SPACE0/ TIME(47), CSUM(47), WW(47), WT(47), ticks,
     1                FR(9), TERR1(47), SUMW(7), START,
     2              SKALE(47), BIAS(47), WS(95), TOTAL(47), FLOPN(47),
     3                IQ(7), NPF, NPFS1(47)
C
      COMMON /SPACEI/ WTP(3), MUL(3), ISPAN(47,3), IPASS(47,3)
C
C/      INTEGER    E,F,ZONE
C/      COMMON /ISPACE/ E(l213), F(l213),
C/     1  IX(l1), IR(l1), ZONE(l416)
C/C
C/      COMMON /SPACE1/ U(l1), V(l1), W(l1),
C/     1  X(l1), Y(l1), Z(l1), G(l1),
C/     2  DU1(l2), DU2(l2), DU3(l2), GRD(l1), DEX(l1),
C/     3  XI(l1), EX(l1), EX1(l1), DEX1(l1),
C/     4  VX(l14), XX(l14), RX(l14), RH(l14),
C/     5  VSP(l2), VSTP(l2), VXNE(l2), VXND(l2),
C/     6  VE3(l2), VLR(l2), VLIN(l2), B5(l2),
C/     7  PLAN(l416), D(l416), SA(l2), SB(l2)
C/C
C/      COMMON /SPACE2/ P(4,l813), PX(l21,l2), CX(l21,l2),
C/     1  VY(l2,l21), VH(l2,7), VF(l2,7), VG(l2,7), VS(l2,7),
C/     2  ZA(l2,7)  , ZP(l2,7), ZQ(l2,7), ZR(l2,7), ZM(l2,7),
C/     3  ZB(l2,7)  , ZU(l2,7), ZV(l2,7), ZZ(l2,7),
C/     4  B(l13,l13), C(l13,l13), H(l13,l13),
C/     5  U1(5,l2,2),  U2(5,l2,2),  U3(5,l2,2)
C
C     ******************************************************************
C
C
C/      PARAMETER( l1=   1001, l2=   101, l1d= 2*1001 )
C/      PARAMETER( l13= 64, l13h= 64/2, l213= 64+32, l813= 8*64 )
C/      PARAMETER( l14= 2048, l16= 75, l416= 4*75 , l21= 25)
C
C
care
C
      INTEGER    E,F,ZONE
      COMMON /ISPACE/ E(96), F(96),
     1  IX(1001), IR(1001), ZONE(300)
C
      COMMON /SPACE1/ U(1001), V(1001), W(1001),
     1  X(1001), Y(1001), Z(1001), G(1001),
     2  DU1(101), DU2(101), DU3(101), GRD(1001), DEX(1001),
     3  XI(1001), EX(1001), EX1(1001), DEX1(1001),
     4  VX(1001), XX(1001), RX(1001), RH(2048),
     5  VSP(101), VSTP(101), VXNE(101), VXND(101),
     6  VE3(101), VLR(101), VLIN(101), B5(101),
     7  PLAN(300), D(300), SA(101), SB(101)
C
      COMMON /SPACE2/ P(4,512), PX(25,101), CX(25,101),
     1  VY(101,25), VH(101,7), VF(101,7), VG(101,7), VS(101,7),
     2  ZA(101,7)  , ZP(101,7), ZQ(101,7), ZR(101,7), ZM(101,7),
     3  ZB(101,7)  , ZU(101,7), ZV(101,7), ZZ(101,7),
     4  B(64,64), C(64,64), H(64,64),
     5  U1(5,101,2),  U2(5,101,2),  U3(5,101,2)
C
C     ******************************************************************
C
      DIMENSION       XZ(2001), ZX(2001)
      EQUIVALENCE   ( XZ(1), X(1)), ( ZX(1), Z(1))
C
C
C//      DIMENSION       E(96), F(96), U(1001), V(1001), W(1001),
C//     1  X(1001), Y(1001), Z(1001), G(1001),
C//     2  DU1(101), DU2(101), DU3(101), GRD(1001), DEX(1001),
C//     3  IX(1001), XI(1001), EX(1001), EX1(1001), DEX1(1001),
C//     4  VX(1001), XX(1001), IR(1001), RX(1001), RH(2048),
C//     5  VSP(101), VSTP(101), VXNE(101), VXND(101),
C//     6  VE3(101), VLR(101), VLIN(101), B5(101),
C//     7  PLAN(300), ZONE(300), D(300), SA(101), SB(101)
C//C
C//      DIMENSION       P(4,512), PX(25,101), CX(25,101),
C//     1  VY(101,25), VH(101,7), VF(101,7), VG(101,7), VS(101,7),
C//     2  ZA(101,7)  , ZP(101,7), ZQ(101,7), ZR(101,7), ZM(101,7),
C//     3  ZB(101,7)  , ZU(101,7), ZV(101,7), ZZ(101,7),
C//     4  B(64,64), C(64,64), H(64,64),
C//     5  U1(5,101,2),  U2(5,101,2),  U3(5,101,2)
C//C
C//C     ******************************************************************
C//C
C//      COMMON /POINT/ ME,MF,MU,MV,MW,MX,MY,MZ,MG,MDU1,MDU2,MDU3,MGRD,
C//     1  MDEX,MIX,MXI,MEX,MEX1,MDEX1,MVX,MXX,MIR,MRX,MRH,MVSP,MVSTP,
C//     2  MVXNE,MVXND,MVE3,MVLR,MVLIN,MB5,MPLAN,MZONE,MD,MSA,MSB,
C//     3  MP,MPX,MCX,MVY,MVH,MVF,MVG,MVS,MZA,MZP,MZQ,MZR,MZM,MZB,MZU,
C//     4  MZV,MZZ,MB,MC,MH,MU1,MU2,MU3
C//C
C//      POINTER  (ME,E), (MF,F), (MU,U), (MV,V), (MW,W),
C//     1         (MX,X), (MY,Y), (MZ,Z), (MG,G),
C//     2         (MDU1,DU1),(MDU2,DU2),(MDU3,DU3),(MGRD,GRD),(MDEX,DEX),
C//     3         (MIX,IX), (MXI,XI), (MEX,EX), (MEX1,EX1), (MDEX1,DEX1),
C//     4         (MVX,VX), (MXX,XX), (MIR,IR), (MRX,RX), (MRH,RH),
C//     5         (MVSP,VSP), (MVSTP,VSTP), (MVXNE,VXNE), (MVXND,VXND),
C//     6         (MVE3,VE3), (MVLR,VLR), (MVLIN,VLIN), (MB5,B5),
C//     7         (MPLAN,PLAN), (MZONE,ZONE), (MD,D), (MSA,SA), (MSB,SB)
C//C
C//      POINTER  (MP,P), (MPX,PX), (MCX,CX),
C//     1         (MVY,VY), (MVH,VH), (MVF,VF), (MVG,VG), (MVS,VS),
C//     2         (MZA,ZA), (MZP,ZP), (MZQ,ZQ), (MZR,ZR), (MZM,ZM),
C//     3         (MZB,ZB), (MZU,ZU), (MZV,ZV), (MZZ,ZZ),
C//     4         (MB,B), (MC,C), (MH,H),
C//     5         (MU1,U1), (MU2,U2), (MU3,U3)
C..      COMMON DUMMY(2000)
C..      LOC(X)  =.LOC.X
C..      IQ8QDSP = 64*LOC(DUMMY)
C
C     ******************************************************************
C
C     STANDARD PRODUCT COMPILER DIRECTIVES MAY BE USED FOR OPTIMIZATION
C
CDIR$ VECTOR
CLLL. OPTIMIZE LEVEL i
CLLL. OPTION INTEGER (7)
CLLL. OPTION ASSERT (NO HAZARD)
CLLL. OPTION NODYNEQV
C
C     ******************************************************************
C       BINARY MACHINES MAY USE THE  AND(P,Q)  FUNCTION IF AVAILABLE
C       IN PLACE OF THE FOLLOWING CONGRUENCE FUNCTION (SEE KERNEL 13, 14)
C
CLLL. INTEGER     AND, OR, XOR
CLLL. AND(j,k)  = j.INT.k
      AND(j,k)  = j.AND.k
C     AND(j,k)  = IAND(j,k)
C     MOD2N(i,j)= MOD(i,j) +j/2 -ISIGN(j/2,i)
      MOD2N(i,j)= AND(i,j-1)
      MOD2P(i,j)= AND(i-1,j-1) + 1
C                             i  is Congruent to  MOD2N(i,j)   mod(j)
C     ******************************************************************
C
C     MAX(x,y)= AMAX1(x,y)  or  DMAX1(x,y)
C     MIN(x,y)= AMIN1(x,y)  or  DMIN1(x,y)
C
C
      CALL SPACE
C
cLLNL      call  PFM( 0, ion)
      CALL TEST(0)
C
C******************************************************************************
C***  KERNEL 1      HYDRO FRAGMENT
C******************************************************************************
C
         DO 1 L = 1,Loop
         DO 1 k = 1,n
    1       X(k)= Q + Y(k)*(R*ZX(k+10) + T*ZX(k+11))
C
C...................
      CALL TEST(1)
C
C******************************************************************************
C***  KERNEL 2      ICCG EXCERPT (INCOMPLETE CHOLESKY - CONJUGATE GRADIENT)
C******************************************************************************
C
      DO 200  L= 1,Loop
          II= n
       IPNTP= 0
  222   IPNT= IPNTP
       IPNTP= IPNTP+II
          II= II/2
           i= IPNTP
CDIR$ IVDEP
C
      DO 2 k= IPNT+2,IPNTP,2
           i= i+1
    2   X(i)= X(k) - V(k)*X(k-1) - V(k+1)*X(k+1)
          IF( II.GT.1) GO TO 222
  200 CONTINUE
C
C...................
      CALL TEST(2)
C
C******************************************************************************
C***  KERNEL 3      INNER PRODUCT
C******************************************************************************
C
      DO 3 L= 1,Loop
           Q= 0.000D0
      DO 3 k= 1,n
    3      Q= Q + Z(k)*X(k)
C
C...................
      CALL TEST(3)
C
C
C
C******************************************************************************
C***  KERNEL 4      BANDED LINEAR EQUATIONS
C******************************************************************************
C
              m= (1001-7)/2
      DO 444  L= 1,Loop
      DO 444  k= 7,1001,m
             lw= k-6
           temp= X(k-1)
CDIR$ IVDEP
      DO   4  j= 5,n,5
         temp  = temp   - XZ(lw)*Y(j)
    4        lw= lw+1
         X(k-1)= Y(5)*temp
 444  CONTINUE
C
C...................
      CALL TEST(4)
C
C******************************************************************************
C***  KERNEL 5      TRI-DIAGONAL ELIMINATION, BELOW DIAGONAL
C******************************************************************************
C
      DO 5 L = 1,Loop
CDIR$ NOVECTOR
      DO 5 i = 2,n
    5    X(i)= Z(i)*(Y(i) - X(i-1))
CDIR$ VECTOR
C
C...................
      CALL TEST(5)
C
C******************************************************************************
C***  KERNEL 6      GENERAL LINEAR RECURRENCE EQUATIONS
C******************************************************************************
C
      DO  6  L= 1,Loop
      DO  6  i= 2,n
C         W(i)= 0.0100D0              use only if overflow occurs
      DO  6  k= 1,i-1
          W(i)= W(i)  + B(i,k) * W(i-k)
    6 CONTINUE
C
C...................
      CALL TEST(6)
C
C******************************************************************************
C***  KERNEL 7      EQUATION OF STATE FRAGMENT
C******************************************************************************
C
      DO 7 L= 1,Loop
      DO 7 k= 1,n
        X(k)=     U(k  ) + R*( Z(k  ) + R*Y(k  )) +
     .        T*( U(k+3) + R*( U(k+2) + R*U(k+1)) +
     .        T*( U(k+6) + R*( U(k+5) + R*U(k+4))))
    7 CONTINUE
C
C...................
      CALL TEST(7)
C
C
C******************************************************************************
C***  KERNEL 8      A.D.I. INTEGRATION
C******************************************************************************
C
      DO  8      L = 1,Loop
               nl1 = 1
               nl2 = 2
                fw= 2.000D0
      DO  8     kx = 2,3
CDIR$ IVDEP
      DO  8     ky = 2,n
            DU1(ky)=U1(kx,ky+1,nl1)  -  U1(kx,ky-1,nl1)
            DU2(ky)=U2(kx,ky+1,nl1)  -  U2(kx,ky-1,nl1)
            DU3(ky)=U3(kx,ky+1,nl1)  -  U3(kx,ky-1,nl1)
      U1(kx,ky,nl2)=U1(kx,ky,nl1) +A11*DU1(ky) +A12*DU2(ky) +A13*DU3(ky)
     .       + SIG*(U1(kx+1,ky,nl1) -fw*U1(kx,ky,nl1) +U1(kx-1,ky,nl1))
      U2(kx,ky,nl2)=U2(kx,ky,nl1) +A21*DU1(ky) +A22*DU2(ky) +A23*DU3(ky)
     .       + SIG*(U2(kx+1,ky,nl1) -fw*U2(kx,ky,nl1) +U2(kx-1,ky,nl1))
      U3(kx,ky,nl2)=U3(kx,ky,nl1) +A31*DU1(ky) +A32*DU2(ky) +A33*DU3(ky)
     .       + SIG*(U3(kx+1,ky,nl1) -fw*U3(kx,ky,nl1) +U3(kx-1,ky,nl1))
    8 CONTINUE
C
C...................
      CALL TEST(8)
C
C******************************************************************************
C***  KERNEL 9      INTEGRATE PREDICTORS
C******************************************************************************
C
      DO 9  L = 1,Loop
      DO 9  i = 1,n
      PX( 1,i)= DM28*PX(13,i) + DM27*PX(12,i) + DM26*PX(11,i) +
     .          DM25*PX(10,i) + DM24*PX( 9,i) + DM23*PX( 8,i) +
     .          DM22*PX( 7,i) +  C0*(PX( 5,i) +      PX( 6,i))+ PX( 3,i)
    9 CONTINUE
C
C...................
      CALL TEST(9)
C
C******************************************************************************
C***  KERNEL 10     DIFFERENCE PREDICTORS
C******************************************************************************
C
      DO 10  L= 1,Loop
      DO 10  i= 1,n
      AR      =      CX(5,i)
      BR      = AR - PX(5,i)
      PX(5,i) = AR
      CR      = BR - PX(6,i)
      PX(6,i) = BR
      AR      = CR - PX(7,i)
      PX(7,i) = CR
      BR      = AR - PX(8,i)
      PX(8,i) = AR
      CR      = BR - PX(9,i)
      PX(9,i) = BR
      AR      = CR - PX(10,i)
      PX(10,i)= CR
      BR      = AR - PX(11,i)
      PX(11,i)= AR
      CR      = BR - PX(12,i)
      PX(12,i)= BR
      PX(14,i)= CR - PX(13,i)
      PX(13,i)= CR
   10 CONTINUE
C
C...................
      CALL TEST(10)
C
C******************************************************************************
C***  KERNEL 11     FIRST SUM.   PARTIAL SUMS.
C******************************************************************************
C
            fw= 1.000D-25
      DO 11 L = 1,Loop
C         Y(1)= Y(1) + L*fw   use only if optimization eliminates L-loop.
          X(1)= Y(1)
CDIR$ NOVECTOR
      DO 11 k = 2,n
   11     X(k)= X(k-1) + Y(k)
CDIR$ VECTOR
C
C...................
      CALL TEST(11)
C
C******************************************************************************
C***  KERNEL 12     FIRST DIFF.
C******************************************************************************
C
            fw= 1.000D-25
      DO 12 L = 1,Loop
C         Y(1)= Y(1) + L*fw   use only if optimization eliminates L-loop.
      DO 12 k = 1,n
   12     X(k)= Y(k+1) - Y(k)
C
C...................
      CALL TEST(12)
C
C******************************************************************************
C***  KERNEL 13      2-D PIC   Particle In Cell
C******************************************************************************
C
                fw= 1.000D0
      DO  13     L= 1,Loop
      DO  13    ip= 1,n
                i1= P(1,ip)
                j1= P(2,ip)
                i1=        1 + MOD2N(i1,64)
                j1=        1 + MOD2N(j1,64)
           P(3,ip)= P(3,ip)  + B(i1,j1)
           P(4,ip)= P(4,ip)  + C(i1,j1)
           P(1,ip)= P(1,ip)  + P(3,ip)
           P(2,ip)= P(2,ip)  + P(4,ip)
                i2= P(1,ip)
                j2= P(2,ip)
                i2=            MOD2N(i2,64)
                j2=            MOD2N(j2,64)
           P(1,ip)= P(1,ip)  + Y(i2+32)
           P(2,ip)= P(2,ip)  + Z(j2+32)
                i2= i2       + E(i2+32)
                j2= j2       + F(j2+32)
          H(i2,j2)= H(i2,j2) + fw
   13 CONTINUE
C
C...................
      CALL TEST(13)
C
C******************************************************************************
C***  KERNEL 14      1-D PIC   Particle In Cell
C******************************************************************************
C
C
               fw= 1.000D0
      DO   14   L= 1,Loop
      DO   141  k= 1,n
            VX(k)= 0.0
            XX(k)= 0.0
            IX(k)= INT(  GRD(k))
            XI(k)= REAL( IX(k))
           EX1(k)= EX   ( IX(k))
          DEX1(k)= DEX  ( IX(k))
 141  CONTINUE
C
      DO   142  k= 1,n
            VX(k)= VX(k) + EX1(k) + (XX(k) - XI(k))*DEX1(k)
            XX(k)= XX(k) + VX(k)  + FLX
            IR(k)= XX(k)
            RX(k)= XX(k) - IR(k)
            IR(k)= MOD2N(  IR(k),2048) + 1
            XX(k)= RX(k) + IR(k)
 142  CONTINUE
C
      DO  14    k= 1,n
      RH(IR(k)  )= RH(IR(k)  ) + fw - RX(k)
      RH(IR(k)+1)= RH(IR(k)+1) + RX(k)
  14  CONTINUE
C
C...................
      CALL TEST(14)
C
C
C
C
C
C
C
C
C
C
C
C
C
C
C
C
C
C
C
C******************************************************************************
C***  KERNEL 15     CASUAL FORTRAN.  DEVELOPMENT VERSION.
C******************************************************************************
C
C
C       CASUAL ORDERING OF SCALAR OPERATIONS IS TYPICAL PRACTICE.
C       THIS EXAMPLE DEMONSTRATES THE NON-TRIVIAL TRANSFORMATION
C       REQUIRED TO MAP INTO AN EFFICIENT MACHINE IMPLEMENTATION.
C
        DO 45  L = 1,Loop
               NG= 7
               NZ= n
               AR= 0.05300D0
               BR= 0.07300D0
   15   DO 45  j = 2,NG
        DO 45  k = 2,NZ
               IF( j-NG) 31,30,30
   30     VY(k,j)= 0.0
                   GO TO 45
   31          IF( VH(k,j+1) -VH(k,j)) 33,33,32
   32           T= AR
                   GO TO 34
   33           T= BR
   34          IF( VF(k,j) -VF(k-1,j)) 35,36,36
   35           R= MAX( VH(k-1,j), VH(k-1,j+1))
                S= VF(k-1,j)
                   GO TO 37
   36           R= MAX( VH(k,j),   VH(k,j+1))
                S= VF(k,j)
   37     VY(k,j)= SQRT( VG(k,j)**2 +R*R)*T/S
   38          IF( k-NZ) 40,39,39
   39     VS(k,j)= 0.0
                   GO TO 45
   40          IF( VF(k,j) -VF(k,j-1)) 41,42,42
   41           R= MAX( VG(k,j-1), VG(k+1,j-1))
                S= VF(k,j-1)
                T= BR
                   GO TO 43
   42           R= MAX( VG(k,j),   VG(k+1,j))
                S= VF(k,j)
                T= AR
   43     VS(k,j)= SQRT( VH(k,j)**2 +R*R)*T/S
   45    CONTINUE
C
C...................
      CALL TEST(15)
C
C
C
C
C
C
C
C
C
C
C
C
C
C
C******************************************************************************
C***  KERNEL 16     MONTE CARLO SEARCH LOOP
C******************************************************************************
C
            II= n/3
            LB= II+II
            k2= 0
            k3= 0
C
      DO 485 L= 1,Loop
             m= 1
  405       i1= m
  410       j2= (n+n)*(m-1)+1
      DO 470 k= 1,n
            k2= k2+1
            j4= j2+k+k
            j5= ZONE(j4)
            IF( j5-n      ) 420,475,450
  415       IF( j5-n+II   ) 430,425,425
  420       IF( j5-n+LB   ) 435,415,415
  425       IF( PLAN(j5)-R) 445,480,440
  430       IF( PLAN(j5)-S) 445,480,440
  435       IF( PLAN(j5)-T) 445,480,440
  440       IF( ZONE(j4-1)) 455,485,470
  445       IF( ZONE(j4-1)) 470,485,455
  450       k3= k3+1
            IF( D(j5)-(D(j5-1)*(T-D(j5-2))**2+(S-D(j5-3))**2
     .                        +(R-D(j5-4))**2)) 445,480,440
  455        m= m+1
            IF( m-ZONE(1) ) 465,465,460
  460        m= 1
  465       IF( i1-m) 410,480,410
  470 CONTINUE
  475 CONTINUE
  480 CONTINUE
  485 CONTINUE
C
C...................
      CALL TEST(16)
C
C******************************************************************************
C***  KERNEL 17     IMPLICIT, CONDITIONAL COMPUTATION
C******************************************************************************
C
C
C         RECURSIVE-DOUBLING VECTOR TECHNIQUES CAN NOT BE USED
C         BECAUSE CONDITIONAL OPERATIONS APPLY TO EACH ELEMENT.
C
                 dw= 5.0000D0/3.0000D0
                 fw= 1.0000D0/3.0000D0
                 tw= 1.0300D0/3.0700D0
            DO 62 L= 1,Loop
                  i= n
                  j= 1
                INK= -1
              SCALE= dw
                XNM= fw
                 E6= tw
                     GO TO 61
C                                            STEP MODEL
  60             E6= XNM*VSP(i)+VSTP(i)
            VXNE(i)= E6
                XNM= E6
             VE3(i)= E6
                  i= i+INK
                 IF( i.EQ.j) GO TO  62
  61             E3= XNM*VLR(i) +VLIN(i)
               XNEI= VXNE(i)
            VXND(i)= E6
                XNC= SCALE*E3
C                                            SELECT MODEL
                 IF( XNM .GT.XNC) GO TO  60
                 IF( XNEI.GT.XNC) GO TO  60
C                                            LINEAR MODEL
             VE3(i)= E3
                 E6= E3+E3-XNM
            VXNE(i)= E3+E3-XNEI
                XNM= E6
                  i= i+INK
                 IF( i.NE.j) GO TO 61
   62 CONTINUE
C
C...................
      CALL TEST(17)
C
C******************************************************************************
C***  KERNEL 18     2-D EXPLICIT HYDRODYNAMICS FRAGMENT
C******************************************************************************
C
         DO 75  L= 1,Loop
                T= 0.003700D0
                S= 0.004100D0
               KN= 6
               JN= n
         DO 70  k= 2,KN
         DO 70  j= 2,JN
          ZA(j,k)= (ZP(j-1,k+1)+ZQ(j-1,k+1)-ZP(j-1,k)-ZQ(j-1,k))
     .            *(ZR(j,k)+ZR(j-1,k))/(ZM(j-1,k)+ZM(j-1,k+1))
          ZB(j,k)= (ZP(j-1,k)+ZQ(j-1,k)-ZP(j,k)-ZQ(j,k))
     .            *(ZR(j,k)+ZR(j,k-1))/(ZM(j,k)+ZM(j-1,k))
   70    CONTINUE
C
         DO 72  k= 2,KN
         DO 72  j= 2,JN
          ZU(j,k)= ZU(j,k)+S*(ZA(j,k)*(ZZ(j,k)-ZZ(j+1,k))
     .                    -ZA(j-1,k) *(ZZ(j,k)-ZZ(j-1,k))
     .                    -ZB(j,k)   *(ZZ(j,k)-ZZ(j,k-1))
     .                    +ZB(j,k+1) *(ZZ(j,k)-ZZ(j,k+1)))
          ZV(j,k)= ZV(j,k)+S*(ZA(j,k)*(ZR(j,k)-ZR(j+1,k))
     .                    -ZA(j-1,k) *(ZR(j,k)-ZR(j-1,k))
     .                    -ZB(j,k)   *(ZR(j,k)-ZR(j,k-1))
     .                    +ZB(j,k+1) *(ZR(j,k)-ZR(j,k+1)))
   72    CONTINUE
C
         DO 75  k= 2,KN
         DO 75  j= 2,JN
          ZR(j,k)= ZR(j,k)+T*ZU(j,k)
          ZZ(j,k)= ZZ(j,k)+T*ZV(j,k)
   75    CONTINUE
C
C...................
      CALL TEST(18)
C
C******************************************************************************
C***  KERNEL 19      GENERAL LINEAR RECURRENCE EQUATIONS
C******************************************************************************
C
C                  IF( JR.GT.1 ) GO TO 192
C
                 KB5I= 0
             DO 194 L= 1,Loop
             DO 191 k= 1,n
           B5(k+KB5I)= SA(k) +STB5*SB(k)
                 STB5= B5(k+KB5I) -STB5
  191        CONTINUE
C     GO TO 194
C
  192        DO 193 i= 1,n
                    k= n-i+1
           B5(k+KB5I)= SA(k) +STB5*SB(k)
                 STB5= B5(k+KB5I) -STB5
  193        CONTINUE
  194 CONTINUE
C
C...................
      CALL TEST(19)
C
C******************************************************************************
C***  KERNEL 20     DISCRETE ORDINATES TRANSPORT: CONDITIONAL RECURRENCE ON XX.
C******************************************************************************
C
           dw= 0.200D0
      DO 20 L= 1,Loop
      DO 20 k= 1,n
           DI= Y(k)-G(k)/( XX(k)+DK)
           DN= dw
           IF( DI.NE.0.0) DN= MAX( S,MIN( Z(k)/DI, T))
         X(k)= ((W(k)+V(k)*DN)* XX(k)+U(k))/(VX(k)+V(k)*DN)
      XX(k+1)= (X(k)- XX(k))*DN+ XX(k)
   20 CONTINUE
C
C...................
      CALL TEST(20)
C
C******************************************************************************
C***  KERNEL 21     MATRIX*MATRIX PRODUCT
C******************************************************************************
C
      DO 21 L= 1,Loop
      DO 21 k= 1,25
      DO 21 i= 1,25
      DO 21 j= 1,n
      PX(i,j)= PX(i,j) +VY(i,k) * CX(k,j)
   21 CONTINUE
C
C...................
      CALL TEST(21)
C
C
C
C
C
C
C
C******************************************************************************
C***  KERNEL 22     PLANCKIAN DISTRIBUTION
C******************************************************************************
C
C
C      EXPMAX= 234.500D0
       EXPMAX= 20.0000D0
           fw= 1.00000D0
         U(n)= 0.99000D0*EXPMAX*V(n)
      DO 22 L= 1,Loop
      DO 22 k= 1,n
CARE       IF( U(k) .LT. EXPMAX*V(k))  THEN
                                            Y(k)= U(k)/V(k)
CARE                                   ELSE
CARE                                        Y(k)= EXPMAX
CARE    ENDIF
         W(k)= X(k)/( EXP( Y(k)) -fw)
   22 CONTINUE
C...................
      CALL TEST(22)
C
C******************************************************************************
C***  KERNEL 23     2-D IMPLICIT HYDRODYNAMICS FRAGMENT
C******************************************************************************
C
            fw= 0.17500D0
      DO 23  L= 1,Loop
      DO 23  j= 2,6
      DO 23  k= 2,n
            QA= ZA(k,j+1)*ZR(k,j) +ZA(k,j-1)*ZB(k,j) +
     .          ZA(k+1,j)*ZU(k,j) +ZA(k-1,j)*ZV(k,j) +ZZ(k,j)
   23  ZA(k,j)= ZA(k,j) +fw*(QA -ZA(k,j))
C
C...................
      CALL TEST(23)
C
C******************************************************************************
C***  KERNEL 24     FIND LOCATION OF FIRST MINIMUM IN ARRAY
C******************************************************************************
C
C      X( n/2)= -1.000D+50
       X( n/2)= -1.000D+10
      DO 24  L= 1,Loop
             m= 1
      DO 24  k= 2,n
            IF( X(k).LT.X(m))  m= k
   24 CONTINUE
C
C            m= imin1( n,x,1)        35 nanosec./element STACKLIBE/CRAY
C...................
      CALL TEST(24)
C
C******************************************************************************
C
C
      IF( jr .LT.  1)  jr= 1
      IF( jr .GT. 8)  jr= 8-1
      IF( il .LT.  1)  il= 1
      IF( il .GT. 3)  il= 3
C
      DO 999 k= 1,mk
      TIMES(jr,il,k)= TIME (k)
      TERRS(jr,il,k)= TERR1(k)
      NPFS (jr,il,k)= NPFS1(k)
      CSUMS(jr,il,k)= CSUM (k)
      DOS  (jr,il,k)= TOTAL(k)
      FOPN (jr,il,k)= FLOPN(k)
  999 continue
C
      RETURN
      END
C
C***********************************************
      SUBROUTINE  PAGE( iou)
C***********************************************
      WRITE(iou,1)
    1 FORMAT(1H1)
c   1 FORMAT(1H)
      RETURN
      END
C***********************************************
      SUBROUTINE REPORT( iou, ntk,nek,FLOPS,TR,RATES,LSPAN,WG,OSUM,ID)
C***********************************************************************
C                                                                      *
C     REPORT -  Prints Statistical Evaluation Of Fortran Kernel Timings*
C                                                                      *
C     iou    -  Logical Output Device Number                           *
C     ntk    -  Total number of Kernels to Edit in Report              *
C     nek    -  Number of Effective Kernels in each set to Edit        *
C     FLOPS  -  Array:  Number of Flops executed by each kernel        *
C     TR     -  Array:  Time of execution of each kernel(microsecs)    *
C     RATES  -  Array:  Rate of execution of each kernel(megaflops/sec)*
C     LSPAN  -  Array:  Span of inner DO loop in each kernel           *
C     WG     -  Array:  Weight assigned to each kernel for statistics  *
C     OSUM   -  Array:  Checksums of the results of each kernel        *
C                                                                      *
C***********************************************************************
C
C
C                                REFERENCE
C
C          F.H.McMahon, The Livermore Fortran Kernels:  A Computer
C          Test Of The Numerical Performance Range, Lawrence
C          Livermore National Laboratory, Livermore, California,
C          UCRL-53745, October 1986.
C
C
C
C
C                                NOTICE
C
C               "This report was  prepared  as an account
C               of work  sponsored  by  the United States
C               Government.  Neither  the  United  States
C               nor the   United   States  Department  of
C               Energy, nor any  of  their employees, nor
C               any of their contractors, subcontractors,
C               or their employees,  makes  any warranty,
C               express or  implied, or assumes any legal
C               liability or   responsibility   for   the
C               accuracy, completeness  or  usefulness of
C               any information,  apparatus,  product  or
C               process disclosed, or represents that its
C               use would  not  infringe  privateiy-owned
C               rights."
C
C               Reference to  a  company  or product name
C               does not impiy approval or recommendation
C               of the   product  by  the  University  of
C               California or  the   U.S. Department   of
C               Energy to  the  exclusion  of others that
C               may be suitable.
C
C
C               Work performed under  the auspices of the
C               U.S. Department of Energy by the Lawrence
C               Livermore Laboratory    under    contract
C               number W-7405-ENG-48.
C
C***********************************************************************
C
C
C  I: FORTRAN CPU PERFORMANCE ANALYSIS                  F.H.McMahon
C
C
c     These kernels measure  Fortran numerical computation rates for a
c     spectrum of  CPU-limited computational structures or benchmarks.
c     The kernels benchmark  contains  extracts  or  kernels from more
c     than a score CPU-limited scientific application programs.  These
c     kernels are The most  important  CPU  time  components  from The
c     application programs.   This  benchmark  may  be easily extended
c     with important new kernels leaving performance statistics intact.
c
c     The time required  to  convert,  debug,  execute  and time many,
c     entire, large  programs  on  new  machines  each  having  a  new
c     implementation of  Fortran,   or   several   implementations  or
c     dialects rapidly  becomes  excessive.  Almost all The conversion
c     costs are in segments  of  The programs which are irrelevant for
c     evaluation of  The  CPU,  e.g.,  I/O, Fortran variations, memory
c     allocation, overlays,   job  control,   etc.    all   of   these
c     complexities are reduced to a single, small benchmark which uses
c     a minimum of I/O  and  a  single level of storage.  further, the
c     computation in  the  kernels  is  the  most  stable  part of the
c     Fortran language.
c
c     The kernels benchmark  is sufficient to determine a range of CPU
c     performance for  many  different  computational  structures in a
c     single computer run.   Since The range in performance is usually
c     large the  mean  has  a secondary significance.  To estimate the
c     performance of  a particular,  CPU-limited  application  program
c     select the  case(s) which are most similar to the application as
c     most relevent to the  estimate.   The  performance  ratio  of  a
c     kernel on  two  different  machines or compiled by two different
c     compilers on the same  machine  will  approximate  the  ratio of
c     through-puts for   an  application  which  is  very  similar  in
c     structure.
c
c     This set of kernels was chosen to measure lower and upper bounds
c     for scalar Fortran computation rates.  The upper bound on scalar
c     rates serves as a base  to  evaluate the effectiveness of vector
c     computation.  The  kind  of  Fortran  which  has the highest MIP
c     rates is pure arithmetic  in  DO-loops where complete local code
c     optimization by a Fortran compiler is possible.  All other kinds
c     of Fortran  operations  execute  at  much  lower  MIP  rates  on
c     multiple register machines (these ops may not be necessary).
c
c     Through-put is  measured  in  units of floating-point operations
c     executed per micro-second;  called  results  per micro-second or
c     mega-flops.  The  Mflop is a measure of the NECESSARY results in
c     a scientific application  program  regardless  of  the number or
c     kind of  operations  or processing.  The ratio of Mflops for two
c     different machines will approximate  the  ratio  of through-puts
c     for the  majority  of compute-limited scientific applications on
c     the two  machines.    The   kernels  measure  performance  scale
c     factors.
c
C
C
c
c -----------------------------------------------------------------------------
c II:  A CPU Performance Metric For Computational Physics:    Mega-Flops/sec.
c -----------------------------------------------------------------------------
c                                                                 F.H. McMahon
c
c A:  Floating-Point Instructions:  The Necessary Mathematics
c
c Computational physics applies  systems  of  PDEs from Mathematical physics to
c simulate the  evolution of physical systems.  The mathematical methods depend
c on real  valued  functions   and   the   algorithms  are  programmed,  almost
c exclusively, in  Fortran  Floating-point  computer operations (Flops).  These
c floating-point operations  are,  unquestionably,   the   NECESSARY   computer
c operations on  ANY  computer  and  the  total  number  is  INVARIANT.  Thus a
c meaningful computation rate can  always  be  measured  by  counting the total
c number of Flops and dividing by the total execution time of a program.
c
c B:   Procedural Machine  Instructions:   Artifices Of An Archetecture
c
c All of  the non-arithmetic instructions in a machine program are artifices of
c a particular hardware  architecture,  i.e.  machine dependant, as well as the
c result of  a  particular compiler's imperfect coding techniques.  How many of
c these procedural machine  instructions  are  strictly  necessary  can only be
c determined by further, tedious analysis which is ALWAYS machine dependant.  A
c famous example  of software  masking  hardware  capabilities  is  the  PASCAL
c compiler written  by  n.Wirth  which  used  only  50%  of  the command set to
c generate machine programs for the CDC-7600.
c
c Unless the next generation computer design is constrained for some reason, to
c closely resemble  its  obsolete  predecessor,  the  instruction  mix  used in
c current machines is not necessarily  relevent.   Furthermore, the instruction
c mix is  not  a  definitive  characterization  of the intrinsic physics or the
c mathematical algorithms.
c
c  1.  Primary Memory Access Instructions
c
c The number of memory  instructions  that  are necessary for a given algorithm
c depends strongly  on  the  number  and  kind of CPU registers and is a highly
c machine dependent number.   Operating registers, scratch-pad memories, vector
c buffers, short-stop  and  feed-back paths in the cpu are examples of hardware
c artifices which reduce the  number  of  primary memory operations.  Compilers
c and other  coders must make intelligent use of these particular cpu resources
c to minimize memory operations  and this is generally not the case, as is well
c known.
c
c  2.  Branching Instructions
c
c Branching instructions   are   the  slowest  and  most  expensive  procedural
c instructions and are very  often  unecessary.  Here the source programmer has
c primary responsibility  to  minimize  branching in the program by avoiding IF
c statements whenever possible  by  using MAX, MIN, or merge functions like
c CSMG.  Careful  logical  reduction  and  placement of IF tests is required to
c minimize the execution of branching operations.  Compilers can do very little
c to change or optimize the branch graph specified in the source program.
c
c On vector  computers ALL IF tests over mesh or array (state) variables can be
c eliminated.  Conditional computation can be vectorized by direct construction
c using explicit  sub-set mappings.  Vector relationals replace the IF clauses.
c Then sparse,  one-to-one   mappings  called  vector  Compress/Decompress  and
c one-to-many mappings   called   vector   Gather/Scatter   are  necessary  and
c sufficient to compose sub-vector operands for simple vector operations.
c
c
C
CIII: FORTRAN PROGRAMMING SYSTEM MATURITY
c
c     Hardware performance   gains   depend   criticaly   on  compiler
c     maturity.  These  kernels   measure  the  joint  performance  of
c     hardware and  Fortran  compiler  software and may easily be used
c     for a comparative  analysis  of  all  the available compilers or
c     options on a given machine.  For a new or proposed machine where
c     no compiler is available  the  performance  may  be estimated by
c     simulating a  reasonable  compilation.  An example of simulation
c     rationale is given below.
c
c     Fortran compilers for new  types  of  machines require a lengthy
c     development cycle  to  achive  an  effective  level  of  machine
c     utilization.  A fully  mature  compiler  may not be completed in
c     the first  years  of  a  new machine.  Indeed, maturity is not a
c     stationary state   but  evolves   with   advances   in   program
c     optimization techniques.   Some  of  these  techniques depend on
c     special facilities in the  new  machines and serious development
c     and implementation cannot start much earlier than development of
c     the new machine.   Assumptions  on  the  maturity  of  available
c     Fortran compilers  are  crucial  to  the  evaluation  of Fortran
c     performance and  thus,   compiler   characteristics   should  be
c     explicit parameters of the performance analysis.
c
C
C  A: OPTIMIZATION CRITERIA FOR SIMULATING A MODERATELY MATURE COMPILER
c
c     The coding is deliberately simplified to the level of complexity
c     of a moderately  mature  compiler,  i.e.   a proper compiler for
c     processors with  segmented cpu design circa 1975.  The coding is
c     deliberately not lossless,  ideal  assembly  language since this
c     technique may  outperform  Fortran  by more than a factor of two
c     and may over estimate typical Fortran performance.
c
c       0. reduction of redundant operations in the source text.
c
c       1. all scalar values are defined in registers over the
c          scope of loops.
c
c       2. all scripted input variables are loaded into registers
c          at the begining of loops.
c
c       3. code optimization techniques include:
c
c            3.1 effective address computation for subscripted
c                variables which are a function of the loop variable
c                is optimized by:
c            3.1.1   reduction to recursive address functions.
c            3.1.2   indicial address and increment as well as
c                    current index are register values.
c            3.2 moderate scheduling of instructions.
c                not pert scheduling level, hence not best ordering.
c            3.3 loop controls are defined in registers.
c                specific, minimum loop branch tests.
c            3.4 pre-loading operands for the next loop iteration
c                was not allowed.  all operations in a loop have
c                the same phase, i.e. are the same iteration.
c
c       4. memory bank conflicts were ignored.
c
c       5. branch ratios on if conditionals are equal, then
c          averaged for two cases.
c
C
C  B: OPTIMIZATION CRITERIA FOR A MATURE COMPILER INCLUDE:
c
c       0. minimization of effective address computation
c          especially in loops.
c
c       1. optimal register allocation; ranked by usage.
c
c       2. critical path(pert) time schedule for each block of
c          instructions on machines with segmented function units.
c
c       3. global program optimization, viz IBM Fortran-H. implies
c          reduction of redundant operations in object code string.
c          important reductions for programs using dynamic addressing
c          and dynamically dimensioned arrays.
c
c       4. decomposition of iterative scalar Fortran into parallel
c          vector or array operations where these exist as parallel
c          machine operations.
c
c       5. recomposition of scalar Fortran loops that cannot be
c          decomposed as in (4.) into multi-phase loops.  the goal
c          is to eliminate serial fetch delays by forward-fetching
c          of scalar operands, i.e. memory references in a loop are
c          for operands in the next iteration.
c
c
c
C  C: EXTENSIONS TO THE SET OF KERNELS
C
c     The kernels  benchmarks  were  extended from 14 to 20 kernels in
c     1976 , largely as a result of our experiences in vectorizing old
c     applications.  They   are   intended   to   compensate  for  the
c     oversimplicity of some of  the original kernels which are mostly
c     elementary matrix  operations  and  thus,  to help represent the
c     Fortran workload  more  accurately.    The   new   kernels  have
c     significant improvements:
c
c          1. more complete excerpts of real  applications.
c
c          2. high complexity computation to test the claims
c             made for Fortran compiler code optimization.
c
c          3. original, amateur Fortran coding to test the claims
c             made for automatic vectorizers of olde Fortran.
c
c     As a  matter  of policy kernels should be updated, without prior
c     notice or further  justification,  whenever  new  algorithms are
c     percieved in  important  applications.   The  primitive  kernels
c     should be  retained   for  comparisons  and  scaling  over  long
c     periods.
c
C
C
c
c
c
c
c
c
C
C IV: PERFORMANCE MEASUREMENTS
c
c
c
c
c     Through-put is measured in units of millions of floating-point
c     operations executed per second, called mflops.
c
c
c     Artificially long computer  runs do not have to be contrived for
c     timing on  machines  where  a cpu clock may be read in job mode.
c     Statistics on  the accuracy  of  the  timing  method  should  be
c     measured.
c
c     Net mflops is meaningful only if real run time of each kernel
c     is adjusted such that it weights the total time in proportion
c     to the actual usage of that catagory of computation in the
c     total workload.
c
C
C
c
c
c   1. Assignment Of Weights To Floating-Point Operations
c
c     Weights are assigned to different kinds of floating-point
c     operations to normalize their hardware execution time to
c     addition time so that the flop rates computed for various
c     Fortran Kernels will be commensurable.
c
C                           +,-,*   1
C                          /,SQRT   4
C                     EXP,SIN,ETC.  8
C                     IF(X.REL.Y)   1
c
c
c     Each Kernel flop-count is the weighted number of flops required for
c     serial execution.  The scalar version defines the NECESSARY computation
c     generally, in the absence of proof to the contrary.  The vector
c     or parallel executions are only credited with executing the same
c     necessary computation.  If the parallel methods do more computation
c     than is necessary then the extra flops are not counted as through-put.
c
c
c
c
c
C
C
c
c
c
c
c
c
c
c
C
C
C
C       2. SAMPLE OUTPUT:               CDC-7600/FTN-4.4
C
C
C                 KERNEL  FLOPS   TIME   MFLOPS
C                      1    500    94.4    5.30
C                      2    300    45.3    6.62
C                      3    100    21.9    4.57
C                      4    300   109.3    2.75
C                      5    100    25.6    3.91
C                      6    100    27.8    3.60
C                      7    640    88.2    7.25
C                      8   1440   249.0    5.78
C                      9    680   123.2    5.52
C                     10    360   102.8    3.50
C                     11     49    34.8    1.41
C                     12     49    18.3    2.68
C                     13    224   107.7    2.08
C                     14   3300   809.3    4.08
C                     15   3960  1769.5    2.24
C                     16    530   320.3    1.65
C                     17    405    92.2    4.39
C                     18   6600  1121.5    5.88
C                     19    540   105.8    5.11
C                     20   1300   266.0    4.89
C                     21   1250   370.9    3.37
c                     22   1700   601.9    2.82
c                     23   1650   362.4    4.55
c                     24    200   171.7    1.16
C
C
C                      AVERAGE  RATE =     3.96 MEGA-FLOPS/SEC.
C
C                      MEDIAN   RATE =     4.08 MEGA-FLOPS/SEC.
C
C                      HARMONIC MEAN =     3.15 MEGA-FLOPS/SEC.
C
C                      STANDARD DEV. =     1.61 MEGA-FLOPS/SEC.
C
C
C                                                    F.H.MCMAHON  1972
c
c
c
c
c
c
c
c
c
c
c
c
c
c
C   SAMPLE OUTPUT FROM SUBROUTINe REPORT:   (CRAY-XMP4/CFT Compiler)
C
C
c                                                                       aus
c
c
c
c
c
c
C
C         The following output was produced on CRAY-XMP4 in a
C         fully loaded, multi-processing, multi-programming system:
C
c
c
c
c
C
C         VERIFY ADEQUATE Loop SIZE VERSUS CPU CLOCK ACCURACY
C         -----     -------     -------    -------   --------
C         EXTRA     MAXIMUM     DIGITAL    DYNAMIC   RELATIVE
C         Loop      CPUTIME     CLOCK      CLOCK     TIMING
C         SIZE      SECONDS     ERROR      ERROR     ERROR
C         -----     -------     -------    -------   --------
C             1  5.0000e-06      10.00%     17.63%     14.26%
C             2  7.0000e-06       7.14%      6.93%      4.79%
C             4  1.6000e-05       3.12%      6.56%      7.59%
C             8  2.8000e-05       1.79%      2.90%      2.35%
C            16  6.1000e-05       0.82%      6.72%      4.50%
C            32  1.1700e-04       0.43%      4.21%      4.62%
C            64  2.2700e-04       0.22%      3.13%      2.41%
C           128  4.4900e-04       0.11%      3.14%      0.96%
C           256  8.8900e-04       0.06%      2.06%      2.50%
C           512  1.7740e-03       0.03%      1.92%      1.59%
C          1024  3.4780e-03       0.01%      0.70%      1.63%
C          1360              Current Run:    Loop=   10.000*Loop
C          2048  7.0050e-03       0.01%      0.74%      1.28%
C          4096  1.3823e-02       0.00%      1.35%      0.78%
C         -----     -------     -------    -------   --------
C
C          Approximate Serial Job Time=   2.5e+01 Sec.    ( Nruns= 7 RUNS)
C
c
c
c
c
c
c
c
c
c
c
c
c
c
c
c
c
c
c  1
c
c
c
c
c   CPU CLOCK OVERHEAD (t err):
c
c        RUN        AVERAGE        STANDEV        MINIMUM        MAXIMUM
c   TICK   1   4.666667e-06   4.714045e-07   4.000000e-06   5.000000e-06
c   TICK   2   4.733333e-06   4.422166e-07   4.000000e-06   5.000000e-06
c   TICK   3   5.000000e-06   1.932305e-14   5.000000e-06   5.000000e-06
c   TICK   4   4.800000e-06   4.000000e-07   4.000000e-06   5.000000e-06
c   TICK   5   4.733333e-06   4.422166e-07   4.000000e-06   5.000000e-06
c   TICK   6   4.866667e-06   3.399346e-07   4.000000e-06   5.000000e-06
c   TICK   7   5.066667e-06   5.734884e-07   4.000000e-06   7.000000e-06
c   DATA   7   9.998664e-02   5.433188e-07   9.998564e-02   9.998765e-02
c   DATA   7   9.998599e-02   9.174211e-07   9.998434e-02   9.998765e-02
c   TICK   7   4.838095e-06   1.373543e-07   4.666667e-06   5.066667e-06
c
c
c
c
c   THE EXPERIMENTAL TIMING ERRORS FOR ALL  7 RUNS
c   --  ---------  ---------  --------- -----  -----   ---
c    k   T min      T avg      T max    T err  t err   P-F
c   --  ---------  ---------  --------- -----  -----   ---
c    1 2.3125e-03 2.3215e-03 2.3375e-03  0.34%  0.19%     0
c    2 1.0258e-02 1.0344e-02 1.0427e-02  0.58%  0.04%     0
c    3 1.4115e-03 1.4585e-03 1.4987e-03  2.24%  0.30%     0
c    4 4.0003e-03 4.0989e-03 4.1958e-03  1.70%  0.11%     0
c    5 3.3954e-02 3.4047e-02 3.4153e-02  0.25%  0.01%     0
c    6 1.0215e-02 1.0350e-02 1.0475e-02  0.85%  0.04%     0
c    7 3.7788e-03 3.7967e-03 3.8267e-03  0.44%  0.12%     0
c    8 6.8435e-03 6.9755e-03 7.1138e-03  1.07%  0.06%     0
c    9 4.4287e-03 4.5215e-03 4.5795e-03  0.99%  0.10%     0
c   10 5.2315e-03 5.3632e-03 5.4845e-03  1.49%  0.08%     0
c   11 1.3358e-02 1.3369e-02 1.3376e-02  0.04%  0.03%     0
c   12 1.9138e-03 2.0810e-03 2.2345e-03  4.38%  0.21%     0
c   13 3.7553e-02 3.7656e-02 3.7814e-02  0.25%  0.01%     0
c   14 1.8911e-02 1.9014e-02 1.9120e-02  0.37%  0.02%     0
c   15 3.5405e-02 3.5446e-02 3.5505e-02  0.09%  0.01%     0
c   16 3.9233e-02 3.9362e-02 3.9782e-02  0.45%  0.01%     0
c   17 3.4725e-02 3.4837e-02 3.4940e-02  0.22%  0.01%     0
c   18 4.0667e-03 4.0942e-03 4.1595e-03  0.72%  0.11%     0
c   19 3.3786e-02 3.4032e-02 3.4180e-02  0.35%  0.01%     0
c   20 2.2068e-02 2.2096e-02 2.2120e-02  0.08%  0.02%     0
c   21 1.9691e-02 2.0053e-02 2.0386e-02  1.27%  0.02%     0
c   22 2.8807e-03 2.8906e-03 2.8965e-03  0.22%  0.15%     0
c   23 3.3050e-02 3.3136e-02 3.3235e-02  0.19%  0.01%     0
c   24 3.1530e-02 3.1588e-02 3.1646e-02  0.13%  0.01%     0
c   --  ---------  ---------  --------- -----  -----   ---
c
c   NET CPU TIMING VARIANCE (T err);  A few % is ok:
c
c                AVERAGE        STANDEV        MINIMUM        MAXIMUM
c    Terr          0.78%          0.94%          0.04%          4.38%
c
c
c
c  1
c   ********************************************
c   THE LIVERMORE  FORTRAN KERNELS:  M F L O P S
c   ********************************************
c
c                Computer : CRAY-XMP-1 of 4 procs
c                System   : CTSS 10e    fully loaded
c                Compiler : CFT 1.14e   Auto  vector
c
c           Mean  DO Span =   471
c
c           When the computer performance range is very large
c           the net Mflops rate of many Fortran programs and
c           workloads will be in the sub-range between the equi-
c           weighted harmonic and arithmetic means depending
c           on the degree of code parallelism and optimization.
c           More accurate estimates of cpu workload rates depend
c           on assigning appropriate weights for each kernel.
c
c
c
c   KERNEL  FLOPS   MICROSEC   MFLOP/SEC SPAN WEIGHT  CHECK-SUMS         OK
c   ------  -----   --------   --------- ---- ------  ----------         --
c    1 3.5035e+05 2.3215e+03    150.9181 1001   1.00  3.580256885257e+05 15
c    2 2.5996e+05 1.0344e+04     25.1321  101   1.00  3.605241761060e+03 14
c    3 1.8018e+05 1.4585e+03    123.5415 1001   1.00  7.005200181467e+01 14
c    4 1.6800e+05 4.0989e+03     40.9867 1001   1.00  4.199475416832e+00 15
c    5 2.0000e+05 3.4047e+04      5.8743 1001   1.00  3.184210149672e+04 14
c    6 1.1904e+05 1.0350e+04     11.5011   64   1.00  3.660366768768e+13 13
c    7 6.3680e+05 3.7967e+03    167.7227  995   1.00  4.272975752615e+05 14
c    8 7.1280e+05 6.9755e+03    102.1869  100   1.00  1.050887603939e+06 14
c    9 6.1812e+05 4.5215e+03    136.7081  101   1.00  8.326105269826e+05 15
c   10 3.0906e+05 5.3632e+03     57.6264  101   1.00  5.117258849028e+05 15
c   11 1.1000e+05 1.3369e+04      8.2278 1001   1.00  2.340037689466e+08  9
c   12 1.1988e+05 2.0810e+03     57.6061 1000   1.00  2.034999000047e-04  9
c   13 1.6128e+05 3.7656e+04      4.2830   64   1.00  2.839977317873e+10 12
c   14 2.2022e+05 1.9014e+04     11.5819 1001   1.00  2.116750694993e+10 14
c   15 1.6500e+05 3.5446e+04      4.6550  101   1.00  2.760671683247e+05 15
c   16 1.3250e+05 3.9362e+04      3.3662   75   1.00  1.982820000000e+05 16
c   17 3.1815e+05 3.4837e+04      9.1325  101   1.00  7.802492410322e+03 14
c   18 4.3560e+05 4.0942e+03    106.3952  100   1.00  3.615937787530e+05 14
c   19 2.3634e+05 3.4032e+04      6.9447  101   1.00  3.795271872105e+03 14
c   20 2.6000e+05 2.2096e+04     11.7666 1000   1.00  2.128451037522e+08 12
c   21 1.2625e+06 2.0053e+04     62.9576  101   1.00  5.802625385051e+07 13
c   22 1.8887e+05 2.8906e+03     65.3394  101   1.00  2.057023063597e+03 15
c   23 4.3560e+05 3.3136e+04     13.1456  100   1.00  2.484884179713e+05 14
c   24 5.0000e+04 3.1588e+04      1.5829 1001   1.00  3.500000000000e+03 16
c   ------  -----   --------   --------- ---- ------  ----------         --
c           MFLOPS   Range :                                            332
c
c           Maximum   Rate =    167.7227 Mega-Flops/Sec.
c           Average   Rate =     49.5493 Mega-Flops/Sec.
c           Geometric Mean =     22.9138 Mega-Flops/Sec.
c           Median    Rate =     19.1321 Mega-Flops/Sec.
c           Harmonic  Mean =      9.8602 Mega-Flops/Sec.
c           Minimum   Rate =      1.5829 Mega-Flops/Sec.
c
c           Standard  Dev. =     52.3398 Mega-Flops/Sec.
c           Median    Dev. =     37.8255 Mega-Flops/Sec.
c
c
c
c
c
c
c                      SENSITIVITY ANALYSIS
c
c
c           The sensitivity of the harmonic mean rate (Mflops)
c           to various weightings is shown in the table below.
c           Seven work distributions are generated by assigning
c           two distinct weights to ranked kernels by quartiles.
c           Forty nine possible cpu workloads are then evaluated
c           using seven sets of values for the total weights:
c
c
c               ------ ------ ------ ------ ------ ------ ------
c     1st QT:      O      O      O      O      O      X      X
c     2nd QT:      O      O      O      X      X      X      O
c     3rd QT:      O      X      X      X      O      O      O
c     4th QT:      X      X      O      O      O      O      O
c               ------ ------ ------ ------ ------ ------ ------
c     Total
c     Weights                    Net Mflops:
c      X    O
c    ---- ----
c
c    1.00 0.00    3.55   5.32  10.61  17.25  46.12  67.69 127.17
c
c    0.95 0.05    3.71   5.57  10.56  16.05  37.04  42.67  70.92
c
c    0.90 0.10    3.88   5.86  10.51  15.00  30.95  31.15  49.17
c
c    0.80 0.20    4.28   6.52  10.40  13.27  23.29  20.23  30.48
c
c    0.70 0.30    4.77   7.35  10.30  11.90  18.67  14.98  22.08
c
c    0.60 0.40    5.39   8.42  10.20  10.78  15.57  11.89  17.31
c
c    0.50 0.50    6.19   9.86  10.10   9.86  13.36   9.86  14.24
c    ---- ----
c               ------ ------ ------ ------ ------ ------ ------
c
c
c
c
c
c
c SENSITIVITY OF NET MFLOPS RATE TO USE OF OPTIMAL FORTRAN CODE(SISD/SIMD MODEL
c
c      5.31   6.57   8.61  12.50  16.13   22.76   38.60   59.23  127.17
c
c      0.00   0.20   0.40   0.60   0.70    0.80    0.90    0.95    1.00
c      Fraction Of Operations Run At Optimal Fortran Rates
c
c
c
c
c
c
c
c
c
c
c   CPU CLOCK OVERHEAD (t err):
c
c        RUN        AVERAGE        STANDEV        MINIMUM        MAXIMUM
c   TICK   1   4.733333e-06   4.422166e-07   4.000000e-06   5.000000e-06
c   TICK   2   4.866667e-06   3.399346e-07   4.000000e-06   5.000000e-06
c   TICK   3   4.800000e-06   4.000000e-07   4.000000e-06   5.000000e-06
c   TICK   4   4.733333e-06   4.422166e-07   4.000000e-06   5.000000e-06
c   TICK   5   4.800000e-06   4.000000e-07   4.000000e-06   5.000000e-06
c   TICK   6   4.866667e-06   3.399346e-07   4.000000e-06   5.000000e-06
c   TICK   7   4.866667e-06   4.988876e-07   4.000000e-06   6.000000e-06
c   DATA   7   9.998664e-02   5.433188e-07   9.998564e-02   9.998765e-02
c   DATA   7   9.998599e-02   9.174211e-07   9.998434e-02   9.998765e-02
c   TICK   7   4.809524e-06   5.553288e-08   4.733333e-06   4.866667e-06
c
c
c
c
c   THE EXPERIMENTAL TIMING ERRORS FOR ALL  7 RUNS
c   --  ---------  ---------  --------- -----  -----   ---
c    k   T min      T avg      T max    T err  t err   P-F
c   --  ---------  ---------  --------- -----  -----   ---
c    1 3.6396e-03 3.7041e-03 3.7478e-03  0.86%  0.12%     0
c    2 1.2313e-02 1.2445e-02 1.2518e-02  0.58%  0.03%     0
c    3 6.0748e-03 6.1361e-03 6.1847e-03  0.82%  0.07%     0
c    4 2.0149e-02 2.0526e-02 2.0930e-02  1.12%  0.02%     0
c    5 3.7970e-02 3.8241e-02 3.8436e-02  0.34%  0.01%     0
c    6 1.9979e-02 2.0156e-02 2.0372e-02  0.64%  0.02%     0
c    7 4.8317e-03 4.9201e-03 5.0215e-03  1.21%  0.09%     0
c    8 8.2217e-03 8.4281e-03 8.5418e-03  1.15%  0.05%     0
c    9 5.2438e-03 5.3005e-03 5.3525e-03  0.87%  0.08%     0
c   10 5.7437e-03 6.0877e-03 6.4375e-03  3.70%  0.07%     0
c   11 2.0656e-02 2.0681e-02 2.0727e-02  0.11%  0.02%     0
c   12 3.4358e-03 3.5171e-03 3.7247e-03  2.81%  0.12%     0
c   13 4.2846e-02 4.3043e-02 4.3223e-02  0.26%  0.01%     0
c   14 1.9670e-02 1.9733e-02 1.9833e-02  0.28%  0.02%     0
c   15 7.0843e-02 7.0946e-02 7.1094e-02  0.11%  0.01%     0
c   16 4.5160e-02 4.5291e-02 4.5388e-02  0.17%  0.01%     0
c   17 3.9607e-02 3.9716e-02 3.9859e-02  0.25%  0.01%     0
c   18 4.0446e-03 4.0740e-03 4.1098e-03  0.48%  0.11%     0
c   19 4.0040e-02 4.0279e-02 4.0477e-02  0.30%  0.01%     0
c   20 3.5444e-02 3.5467e-02 3.5490e-02  0.04%  0.01%     0
c   21 2.7746e-02 2.8205e-02 2.8780e-02  1.34%  0.02%     0
c   22 3.6666e-03 3.6830e-03 3.6978e-03  0.27%  0.12%     0
c   23 4.1323e-02 4.1499e-02 4.1603e-02  0.22%  0.01%     0
c   24 3.9596e-02 3.9644e-02 3.9715e-02  0.11%  0.01%     0
c   --  ---------  ---------  --------- -----  -----   ---
c
c   NET CPU TIMING VARIANCE (T err);  A few % is ok:
c
c                AVERAGE        STANDEV        MINIMUM        MAXIMUM
c    Terr          0.75%          0.86%          0.04%          3.70%
c
c
c
c
c
c  1
c   ********************************************
c   THE LIVERMORE  FORTRAN KERNELS:  M F L O P S
c   ********************************************
c
c                Computer : CRAY-XMP-1 of 4 procs
c                System   : CTSS 10e    fully loaded
c                Compiler : CFT 1.14e   Auto  vector
c
c           Mean  DO Span =    90
c
c           When the computer performance range is very large
c           the net Mflops rate of many Fortran programs and
c           workloads will be in the sub-range between the equi-
c           weighted harmonic and arithmetic means depending
c           on the degree of code parallelism and optimization.
c           More accurate estimates of cpu workload rates depend
c           on assigning appropriate weights for each kernel.
c
c
c
c   KERNEL  FLOPS   MICROSEC   MFLOP/SEC SPAN WEIGHT  CHECK-SUMS         OK
c   ------  -----   --------   --------- ---- ------  ----------         --
c    1 4.0400e+05 3.7041e+03    109.0679  101   2.00  3.677341345257e+03 14
c    2 3.1040e+05 1.2445e+04     24.9427  101   2.00  3.605241761060e+03 14
c    3 2.1412e+05 6.1361e+03     34.8950  101   2.00  7.068190056054e+00 14
c    4 1.6800e+05 2.0526e+04      8.1848  101   2.00  4.199475416832e+00 15
c    5 2.2000e+05 3.8241e+04      5.7530  101   2.00  3.212322357721e+02 14
c    6 1.3440e+05 2.0156e+04      6.6679   32   2.00  1.885296670192e+16 12
c    7 7.1104e+05 4.9201e+03    144.5170  101   2.00  4.441910421041e+03 14
c    8 8.5536e+05 8.4281e+03    101.4889  100   2.00  1.050887603939e+06 14
c    9 7.2114e+05 5.3005e+03    136.0502  101   2.00  8.326105269825e+05 15
c   10 3.4542e+05 6.0877e+03     56.7408  101   2.00  5.117258849028e+05 14
c   11 1.2800e+05 2.0681e+04      6.1893  101   2.00  2.403492372107e+05  8
c   12 1.3464e+05 3.5171e+03     38.2814  100   2.00  4.989298401193e-05  9
c   13 1.8368e+05 4.3043e+04      4.2673   32   2.00  1.627723261374e+10 12
c   14 2.2220e+05 1.9733e+04     11.2604  101   2.00  2.130649341195e+08 14
c   15 3.3000e+05 7.0946e+04      4.6514  101   2.00  2.760671683247e+05 15
c   16 1.5120e+05 4.5291e+04      3.3384   40   2.00  2.270870000000e+05 16
c   17 3.6360e+05 3.9716e+04      9.1550  101   2.00  7.802492410321e+03 14
c   18 4.3560e+05 4.0740e+03    106.9227  100   2.00  3.615937787530e+05 14
c   19 2.7876e+05 4.0279e+04      6.9208  101   2.00  3.795271872105e+03 14
c   20 4.1600e+05 3.5467e+04     11.7292  100   2.00  2.188343625168e+05 14
c   21 1.2500e+06 2.8205e+04     44.3180   50   2.00  2.790571795524e+07 13
c   22 2.4038e+05 3.6830e+03     65.2680  101   2.00  2.057023063597e+03 15
c   23 5.4450e+05 4.1499e+04     13.1207  100   2.00  2.484926226843e+05 14
c   24 6.2000e+04 3.9644e+04      1.5639  101   2.00  3.500000000000e+02 16
c   ------  -----   --------   --------- ---- ------  ----------         --
c           MFLOPS   Range :                                            331
c
c           Maximum   Rate =    144.5170 Mega-Flops/Sec.
c           Average   Rate =     39.8039 Mega-Flops/Sec.
c           Geometric Mean =     18.5628 Mega-Flops/Sec.
c           Median    Rate =     12.4307 Mega-Flops/Sec.
c           Harmonic  Mean =      8.9125 Mega-Flops/Sec.
c           Minimum   Rate =      1.5639 Mega-Flops/Sec.
c
c           Standard  Dev. =     44.9777 Mega-Flops/Sec.
c           Median    Dev. =     31.1973 Mega-Flops/Sec.
c
c
c
c
c
c
c                      SENSITIVITY ANALYSIS
c
c
c           The sensitivity of the harmonic mean rate (Mflops)
c           to various weightings is shown in the table below.
c           Seven work distributions are generated by assigning
c           two distinct weights to ranked kernels by quartiles.
c           Forty nine possible cpu workloads are then evaluated
c           using seven sets of values for the total weights:
c
c
c               ------ ------ ------ ------ ------ ------ ------
c     1st QT:      O      O      O      O      O      X      X
c     2nd QT:      O      O      O      X      X      X      O
c     3rd QT:      O      X      X      X      O      O      O
c     4th QT:      X      X      O      O      O      O      O
c               ------ ------ ------ ------ ------ ------ ------
c     Total
c     Weights                    Net Mflops:
c      X    O
c    ---- ----
c
c    1.00 0.00    3.48   4.95   8.57  13.17  28.40  44.57 103.51
c
c    0.95 0.05    3.63   5.18   8.59  12.57  24.79  31.83  60.62
c
c    0.90 0.10    3.79   5.43   8.62  12.02  21.99  24.76  42.86
c
c    0.80 0.20    4.16   6.02   8.66  11.06  17.94  17.14  27.02
c
c    0.70 0.30    4.60   6.75   8.71  10.24  15.15  13.11  19.73
c
c    0.60 0.40    5.16   7.68   8.75   9.53  13.11  10.61  15.54
c
c    0.50 0.50    5.86   8.91   8.80   8.91  11.56   8.91  12.82
c    ---- ----
c               ------ ------ ------ ------ ------ ------ ------
c
c
c
c
c
c
c SENSITIVITY OF NET MFLOPS RATE TO USE OF OPTIMAL FORTRAN CODE(SISD/SIMD MODEL
c
c      4.95   6.11   7.99  11.54  14.84   20.77   34.60   51.86  103.51
c
c      0.00   0.20   0.40   0.60   0.70    0.80    0.90    0.95    1.00
c      Fraction Of Operations Run At Optimal Fortran Rates
c
c
c
c
c
c
c
c
c
c
c
c   CPU CLOCK OVERHEAD (t err):
c
c        RUN        AVERAGE        STANDEV        MINIMUM        MAXIMUM
c   TICK   1   4.933333e-06   2.494438e-07   4.000000e-06   5.000000e-06
c   TICK   2   4.933333e-06   4.422166e-07   4.000000e-06   6.000000e-06
c   TICK   3   4.800000e-06   4.000000e-07   4.000000e-06   5.000000e-06
c   TICK   4   4.800000e-06   5.416026e-07   4.000000e-06   6.000000e-06
c   TICK   5   4.800000e-06   4.000000e-07   4.000000e-06   5.000000e-06
c   TICK   6   4.933333e-06   2.494438e-07   4.000000e-06   5.000000e-06
c   TICK   7   4.800000e-06   4.000000e-07   4.000000e-06   5.000000e-06
c   DATA   7   9.998664e-02   5.433188e-07   9.998564e-02   9.998765e-02
c   DATA   7   9.998599e-02   9.174211e-07   9.998434e-02   9.998765e-02
c   TICK   7   4.857143e-06   6.598289e-08   4.800000e-06   4.933333e-06
c
c
c
c
c   THE EXPERIMENTAL TIMING ERRORS FOR ALL  7 RUNS
c   --  ---------  ---------  --------- -----  -----   ---
c    k   T min      T avg      T max    T err  t err   P-F
c   --  ---------  ---------  --------- -----  -----   ---
c    1 4.9255e-03 4.9673e-03 5.0215e-03  0.62%  0.09%     0
c    2 2.5391e-02 2.5497e-02 2.5668e-02  0.43%  0.02%     0
c    3 1.2076e-02 1.2181e-02 1.2255e-02  0.44%  0.04%     0
c    4 4.1387e-02 4.1650e-02 4.1767e-02  0.30%  0.01%     0
c    5 3.0290e-02 3.0553e-02 3.0731e-02  0.42%  0.01%     0
c    6 4.8437e-02 4.8809e-02 4.9144e-02  0.42%  0.01%     0
c    7 5.5685e-03 5.5897e-03 5.6178e-03  0.32%  0.08%     0
c    8 1.0261e-02 1.0418e-02 1.0573e-02  0.97%  0.04%     0
c    9 7.0038e-03 7.0966e-03 7.1828e-03  0.98%  0.06%     0
c   10 7.7975e-03 7.9491e-03 8.0408e-03  0.94%  0.05%     0
c   11 2.5637e-02 2.5858e-02 2.6006e-02  0.41%  0.02%     0
c   12 5.5238e-03 5.5703e-03 5.6385e-03  0.61%  0.08%     0
c   13 3.3584e-02 3.3680e-02 3.3751e-02  0.14%  0.01%     0
c   14 1.8261e-02 1.8469e-02 1.8586e-02  0.52%  0.02%     0
c   15 3.9167e-02 3.9268e-02 3.9397e-02  0.20%  0.01%     0
c   16 3.4676e-02 3.4778e-02 3.4840e-02  0.14%  0.01%     0
c   17 3.1477e-02 3.1622e-02 3.1756e-02  0.26%  0.01%     0
c   18 8.7125e-03 8.7759e-03 8.8358e-03  0.46%  0.05%     0
c   19 3.2181e-02 3.2335e-02 3.2427e-02  0.27%  0.01%     0
c   20 3.2594e-02 3.2634e-02 3.2668e-02  0.06%  0.01%     0
c   21 8.8811e-02 8.9406e-02 8.9884e-02  0.43%  0.00%     0
c   22 3.9238e-03 3.9380e-03 3.9808e-03  0.46%  0.11%     0
c   23 3.3196e-02 3.3461e-02 3.3900e-02  0.62%  0.01%     0
c   24 3.1468e-02 3.1525e-02 3.1665e-02  0.21%  0.01%     0
c   --  ---------  ---------  --------- -----  -----   ---
c
c   NET CPU TIMING VARIANCE (T err);  A few % is ok:
c
c                AVERAGE        STANDEV        MINIMUM        MAXIMUM
c    Terr          0.44%          0.25%          0.06%          0.98%
c
c
c
c
c
c  1
c   ********************************************
c   THE LIVERMORE  FORTRAN KERNELS:  M F L O P S
c   ********************************************
c
c                Computer : CRAY-XMP-1 of 4 procs
c                System   : CTSS 10e    fully loaded
c                Compiler : CFT 1.14e   Auto  vector
c
c           Mean  DO Span =    19
c
c           When the computer performance range is very large
c           the net Mflops rate of many Fortran programs and
c           workloads will be in the sub-range between the equi-
c           weighted harmonic and arithmetic means depending
c           on the degree of code parallelism and optimization.
c           More accurate estimates of cpu workload rates depend
c           on assigning appropriate weights for each kernel.
c
c
c
c   KERNEL  FLOPS   MICROSEC   MFLOP/SEC SPAN WEIGHT  CHECK-SUMS         OK
c   ------  -----   --------   --------- ---- ------  ----------         --
c    1 3.0240e+05 4.9673e+03     60.8783   27   1.00  2.698573151746e+02 14
c    2 1.6192e+05 2.5497e+04      6.3506   15   1.00  8.398933280062e+01 15
c    3 1.5984e+05 1.2181e+04     13.1216   27   1.00  1.889516362525e+00 14
c    4 9.1200e+04 4.1650e+04      2.1897   27   1.00  4.199475416832e+00 15
c    5 1.6640e+05 3.0553e+04      5.4464   27   1.00  2.227830673914e+01 15
c    6 8.0640e+04 4.8809e+04      1.6522    8   1.00  5.812436051329e+12 11
c    7 5.3760e+05 5.5897e+03     96.1767   21   1.00  1.992004152347e+02 14
c    8 6.7392e+05 1.0418e+04     64.6898   14   1.00  2.072380567514e+04 15
c    9 5.3040e+05 7.0966e+03     74.7403   15   1.00  1.836777922612e+04 15
c   10 2.7000e+05 7.9491e+03     33.9659   15   1.00  1.155903859389e+04 15
c   11 9.5680e+04 2.5858e+04      3.7002   27   1.00  4.585813538906e+03  7
c   12 9.6000e+04 5.5703e+03     17.2343   26   1.00  1.360405187244e-05 10
c   13 1.3888e+05 3.3680e+04      4.1235    8   1.00  3.328647876067e+09 12
c   14 1.9008e+05 1.8469e+04     10.2921   27   1.00  1.721399839433e+07 14
c   15 1.8480e+05 3.9268e+04      4.7061   15   1.00  7.762981016945e+03 14
c   16 1.2320e+05 3.4778e+04      3.5425   15   1.00  1.804320000000e+05 16
c   17 2.8080e+05 3.1622e+04      8.8799   15   1.00  2.063158033013e+02 14
c   18 4.5760e+05 8.7759e+03     52.1431   14   1.00  6.790452348639e+03 14
c   19 2.0160e+05 3.2335e+04      6.2347   15   1.00  8.877614886362e+01 14
c   20 3.7856e+05 3.2634e+04     11.6002   26   1.00  4.191399274630e+03 14
c   21 2.0000e+06 8.9406e+04     22.3698   20   1.00  1.761809056730e+07 12
c   22 1.6320e+05 3.9380e+03     41.4424   15   1.00  4.276978109785e+01 15
c   23 4.0040e+05 3.3461e+04     11.9660   14   1.00  3.395238421926e+03 15
c   24 4.7840e+04 3.1525e+04      1.5175   27   1.00  9.100000000000e+01 16
c   ------  -----   --------   --------- ---- ------  ----------         --
c           MFLOPS   Range :                                            330
c
c           Maximum   Rate =     96.1767 Mega-Flops/Sec.
c           Average   Rate =     23.2902 Mega-Flops/Sec.
c           Geometric Mean =     11.6641 Mega-Flops/Sec.
c           Median    Rate =     10.9402 Mega-Flops/Sec.
c           Harmonic  Mean =      6.1274 Mega-Flops/Sec.
c           Minimum   Rate =      1.5786 Mega-Flops/Sec.
c
c           Standard  Dev. =     26.5479 Mega-Flops/Sec.
c           Median    Dev. =     10.7695 Mega-Flops/Sec.
c
c
c
c
c
c
c                      SENSITIVITY ANALYSIS
c
c
c           The sensitivity of the harmonic mean rate (Mflops)
c           to various weightings is shown in the table below.
c           Seven work distributions are generated by assigning
c           two distinct weights to ranked kernels by quartiles.
c           Forty nine possible cpu workloads are then evaluated
c           using seven sets of values for the total weights:
c
c
c               ------ ------ ------ ------ ------ ------ ------
c     1st QT:      O      O      O      O      O      X      X
c     2nd QT:      O      O      O      X      X      X      O
c     3rd QT:      O      X      X      X      O      O      O
c     4th QT:      X      X      O      O      O      O      O
c               ------ ------ ------ ------ ------ ------ ------
c     Total
c     Weights                    Net Mflops:
c      X    O
c    ---- ----
c
c    1.00 0.00    2.38   3.49   6.50   9.22  15.87  25.15  60.62
c
c    0.95 0.05    2.49   3.65   6.47   8.77  14.35  19.19  38.06
c
c    0.90 0.10    2.60   3.82   6.44   8.37  13.09  15.52  27.73
c
c    0.80 0.20    2.85   4.21   6.39   7.67  11.14  11.22  17.98
c
c    0.70 0.30    3.16   4.70   6.34   7.08   9.70   8.79  13.30
c
c    0.60 0.40    3.54   5.32   6.29   6.57   8.59   7.22  10.56
c
c    0.50 0.50    4.02   6.13   6.25   6.13   7.70   6.13   8.75
c    ---- ----
c               ------ ------ ------ ------ ------ ------ ------
c
c
c
c
c
c
c SENSITIVITY OF NET MFLOPS RATE TO USE OF OPTIMAL FORTRAN CODE(SISD/SIMD MODEL
c
c      3.49   4.30   5.60   8.03  10.25   14.18   22.98   33.33   60.62
c
c      0.00   0.20   0.40   0.60   0.70    0.80    0.90    0.95    1.00
c      Fraction Of Operations Run At Optimal Fortran Rates
c
c
c
c
c
c
c  1
c   ********************************************
c   THE LIVERMORE  FORTRAN KERNELS:  M F L O P S
c   ********************************************
c
c                Computer : CRAY-XMP-1 of 4 procs
c                System   : CTSS 10e    fully loaded
c                Compiler : CFT 1.14e   Auto  vector
c
c           Mean  DO Span =   167
c
c           When the computer performance range is very large
c           the net Mflops rate of many Fortran programs and
c           workloads will be in the sub-range between the equi-
c           weighted harmonic and arithmetic means depending
c           on the degree of code parallelism and optimization.
c           More accurate estimates of cpu workload rates depend
c           on assigning appropriate weights for each kernel.
c
c
c
c   KERNEL  FLOPS   MICROSEC   MFLOP/SEC SPAN WEIGHT  CHECK-SUMS         OK
c   ------  -----   --------   --------- ---- ------  ----------         --
c    1 3.0240e+05 4.9673e+03     60.8783   27   1.00  2.698573151746e+02 14
c    2 1.6192e+05 2.5497e+04      6.3506   15   1.00  8.398933280062e+01 15
c    3 1.5984e+05 1.2181e+04     13.1216   27   1.00  1.889516362525e+00 14
c    4 9.1200e+04 4.1650e+04      2.1897   27   1.00  4.199475416832e+00 15
c    5 1.6640e+05 3.0553e+04      5.4464   27   1.00  2.227830673914e+01 15
c    6 8.0640e+04 4.8809e+04      1.6522    8   1.00  5.812436051329e+12 11
c    7 5.3760e+05 5.5897e+03     96.1767   21   1.00  1.992004152347e+02 14
c    8 6.7392e+05 1.0418e+04     64.6898   14   1.00  2.072380567514e+04 15
c    9 5.3040e+05 7.0966e+03     74.7403   15   1.00  1.836777922612e+04 15
c   10 2.7000e+05 7.9491e+03     33.9659   15   1.00  1.155903859389e+04 15
c   11 9.5680e+04 2.5858e+04      3.7002   27   1.00  4.585813538906e+03  7
c   12 9.6000e+04 5.5703e+03     17.2343   26   1.00  1.360405187244e-05 10
c   13 1.3888e+05 3.3680e+04      4.1235    8   1.00  3.328647876067e+09 12
c   14 1.9008e+05 1.8469e+04     10.2921   27   1.00  1.721399839433e+07 14
c   15 1.8480e+05 3.9268e+04      4.7061   15   1.00  7.762981016945e+03 14
c   16 1.2320e+05 3.4778e+04      3.5425   15   1.00  1.804320000000e+05 16
c   17 2.8080e+05 3.1622e+04      8.8799   15   1.00  2.063158033013e+02 14
c   18 4.5760e+05 8.7759e+03     52.1431   14   1.00  6.790452348639e+03 14
c   19 2.0160e+05 3.2335e+04      6.2347   15   1.00  8.877614886362e+01 14
c   20 3.7856e+05 3.2634e+04     11.6002   26   1.00  4.191399274630e+03 14
c   21 2.0000e+06 8.9406e+04     22.3698   20   1.00  1.761809056730e+07 12
c   22 1.6320e+05 3.9380e+03     41.4424   15   1.00  4.276978109785e+01 15
c   23 4.0040e+05 3.3461e+04     11.9660   14   1.00  3.395238421926e+03 15
c   24 4.7840e+04 3.1525e+04      1.5175   27   1.00  9.100000000000e+01 16
c    1 4.0400e+05 3.7041e+03    109.0679  101   2.00  3.677341345257e+03 14
c    2 3.1040e+05 1.2445e+04     24.9427  101   2.00  3.605241761060e+03 14
c    3 2.1412e+05 6.1361e+03     34.8950  101   2.00  7.068190056054e+00 14
c    4 1.6800e+05 2.0526e+04      8.1848  101   2.00  4.199475416832e+00 15
c    5 2.2000e+05 3.8241e+04      5.7530  101   2.00  3.212322357721e+02 14
c    6 1.3440e+05 2.0156e+04      6.6679   32   2.00  1.885296670192e+16 12
c    7 7.1104e+05 4.9201e+03    144.5170  101   2.00  4.441910421041e+03 14
c    8 8.5536e+05 8.4281e+03    101.4889  100   2.00  1.050887603939e+06 14
c    9 7.2114e+05 5.3005e+03    136.0502  101   2.00  8.326105269825e+05 15
c   10 3.4542e+05 6.0877e+03     56.7408  101   2.00  5.117258849028e+05 14
c   11 1.2800e+05 2.0681e+04      6.1893  101   2.00  2.403492372107e+05  8
c   12 1.3464e+05 3.5171e+03     38.2814  100   2.00  4.989298401193e-05  9
c   13 1.8368e+05 4.3043e+04      4.2673   32   2.00  1.627723261374e+10 12
c   14 2.2220e+05 1.9733e+04     11.2604  101   2.00  2.130649341195e+08 14
c   15 3.3000e+05 7.0946e+04      4.6514  101   2.00  2.760671683247e+05 15
c   16 1.5120e+05 4.5291e+04      3.3384   40   2.00  2.270870000000e+05 16
c   17 3.6360e+05 3.9716e+04      9.1550  101   2.00  7.802492410321e+03 14
c   18 4.3560e+05 4.0740e+03    106.9227  100   2.00  3.615937787530e+05 14
c   19 2.7876e+05 4.0279e+04      6.9208  101   2.00  3.795271872105e+03 14
c   20 4.1600e+05 3.5467e+04     11.7292  100   2.00  2.188343625168e+05 14
c   21 1.2500e+06 2.8205e+04     44.3180   50   2.00  2.790571795524e+07 13
c   22 2.4038e+05 3.6830e+03     65.2680  101   2.00  2.057023063597e+03 15
c   23 5.4450e+05 4.1499e+04     13.1207  100   2.00  2.484926226843e+05 14
c   24 6.2000e+04 3.9644e+04      1.5639  101   2.00  3.500000000000e+02 16
c    1 3.5035e+05 2.3215e+03    150.9181 1001   1.00  3.580256885257e+05 15
c    2 2.5996e+05 1.0344e+04     25.1321  101   1.00  3.605241761060e+03 14
c    3 1.8018e+05 1.4585e+03    123.5415 1001   1.00  7.005200181467e+01 14
c    4 1.6800e+05 4.0989e+03     40.9867 1001   1.00  4.199475416832e+00 15
c    5 2.0000e+05 3.4047e+04      5.8743 1001   1.00  3.184210149672e+04 14
c    6 1.1904e+05 1.0350e+04     11.5011   64   1.00  3.660366768768e+13 13
c    7 6.3680e+05 3.7967e+03    167.7227  995   1.00  4.272975752615e+05 14
c    8 7.1280e+05 6.9755e+03    102.1869  100   1.00  1.050887603939e+06 14
c    9 6.1812e+05 4.5215e+03    136.7081  101   1.00  8.326105269826e+05 15
c   10 3.0906e+05 5.3632e+03     57.6264  101   1.00  5.117258849028e+05 15
c   11 1.1000e+05 1.3369e+04      8.2278 1001   1.00  2.340037689466e+08  9
c   12 1.1988e+05 2.0810e+03     57.6061 1000   1.00  2.034999000047e-04  9
c   13 1.6128e+05 3.7656e+04      4.2830   64   1.00  2.839977317873e+10 12
c   14 2.2022e+05 1.9014e+04     11.5819 1001   1.00  2.116750694993e+10 14
c   15 1.6500e+05 3.5446e+04      4.6550  101   1.00  2.760671683247e+05 15
c   16 1.3250e+05 3.9362e+04      3.3662   75   1.00  1.982820000000e+05 16
c   17 3.1815e+05 3.4837e+04      9.1325  101   1.00  7.802492410322e+03 14
c   18 4.3560e+05 4.0942e+03    106.3952  100   1.00  3.615937787530e+05 14
c   19 2.3634e+05 3.4032e+04      6.9447  101   1.00  3.795271872105e+03 14
c   20 2.6000e+05 2.2096e+04     11.7666 1000   1.00  2.128451037522e+08 12
c   21 1.2625e+06 2.0053e+04     62.9576  101   1.00  5.802625385051e+07 13
c   22 1.8887e+05 2.8906e+03     65.3394  101   1.00  2.057023063597e+03 15
c   23 4.3560e+05 3.3136e+04     13.1456  100   1.00  2.484884179713e+05 14
c   24 5.0000e+04 3.1588e+04      1.5829 1001   1.00  3.500000000000e+03 16
c   ------  -----   --------   --------- ---- ------  ----------         --
c           MFLOPS   Range :                                            993
c
c           Maximum   Rate =    167.7227 Mega-Flops/Sec.
c           Average   Rate =     38.1118 Mega-Flops/Sec.
c           Geometric Mean =     17.0512 Mega-Flops/Sec.
c           Median    Rate =     11.7660 Mega-Flops/Sec.
c           Harmonic  Mean =      8.1796 Mega-Flops/Sec.
c           Minimum   Rate =      1.5175 Mega-Flops/Sec.
c
c           Standard  Dev. =     44.2901 Mega-Flops/Sec.
c           Median    Dev. =     40.1770 Mega-Flops/Sec.
c
c
c
c
c
c
c
c
c
c
c
c
c
c
c
c
c   TOP QUARTILE: BEST ARCHITECTURE/APPLICATION MATCH
c
c
c
c   KERNEL  FLOPS   MICROSEC   MFLOP/SEC SPAN WEIGHT SUM
c   ------  -----   --------   --------- ---- ------ ---
c    7 6.3680e+05 3.7967e+03    167.7227  995   1.00
c    1 3.5035e+05 2.3215e+03    150.9181 1001   1.00
c    7 7.1104e+05 4.9201e+03    144.5170  101   2.00
c    9 6.1812e+05 4.5215e+03    136.7081  101   1.00
c    9 7.2114e+05 5.3005e+03    136.0502  101   2.00
c    3 1.8018e+05 1.4585e+03    123.5415 1001   1.00
c    1 4.0400e+05 3.7041e+03    109.0679  101   2.00
c   18 4.3560e+05 4.0740e+03    106.9227  100   2.00
c   18 4.3560e+05 4.0942e+03    106.3952  100   1.00
c    8 7.1280e+05 6.9755e+03    102.1869  100   1.00
c    8 8.5536e+05 8.4281e+03    101.4889  100   2.00
c    7 5.3760e+05 5.5897e+03     96.1767   21   1.00
c    9 5.3040e+05 7.0966e+03     74.7403   15   1.00
c   22 1.8887e+05 2.8906e+03     65.3394  101   1.00
c   22 2.4038e+05 3.6830e+03     65.2680  101   2.00
c    8 6.7392e+05 1.0418e+04     64.6898   14   1.00
c   21 1.2625e+06 2.0053e+04     62.9576  101   1.00
c    1 3.0240e+05 4.9673e+03     60.8783   27   1.00
c   ------  -----   --------   --------- ---- ------ ---
c           FRAC. WEIGHTS =       0.2500
c           AVERAGE  RATE =     105.7868 Mega-Flops/Sec.
c           HARMONIC MEAN =      96.0137 Mega-Flops/Sec.
c           STANDARD DEV. =      31.3357 Mega-Flops/Sec.
c
c
c
c   KERNEL  FLOPS   MICROSEC   MFLOP/SEC SPAN WEIGHT SUM
c   ------  -----   --------   --------- ---- ------ ---
c   10 3.0906e+05 5.3632e+03     57.6264  101   1.00
c   12 1.1988e+05 2.0810e+03     57.6061 1000   1.00
c   10 3.4542e+05 6.0877e+03     56.7408  101   2.00
c   18 4.5760e+05 8.7759e+03     52.1431   14   1.00
c   21 1.2500e+06 2.8205e+04     44.3180   50   2.00
c   22 1.6320e+05 3.9380e+03     41.4424   15   1.00
c    4 1.6800e+05 4.0989e+03     40.9867 1001   1.00
c   12 1.3464e+05 3.5171e+03     38.2814  100   2.00
c    3 2.1412e+05 6.1361e+03     34.8950  101   2.00
c   10 2.7000e+05 7.9491e+03     33.9659   15   1.00
c    2 2.5996e+05 1.0344e+04     25.1321  101   1.00
c    2 3.1040e+05 1.2445e+04     24.9427  101   2.00
c   21 2.0000e+06 8.9406e+04     22.3698   20   1.00
c   12 9.6000e+04 5.5703e+03     17.2343   26   1.00
c   23 4.3560e+05 3.3136e+04     13.1456  100   1.00
c    3 1.5984e+05 1.2181e+04     13.1216   27   1.00
c   23 5.4450e+05 4.1499e+04     13.1207  100   2.00
c   23 4.0040e+05 3.3461e+04     11.9660   14   1.00
c   20 2.6000e+05 2.2096e+04     11.7666 1000   1.00
c   20 4.1600e+05 3.5467e+04     11.7292  100   2.00
c   20 3.7856e+05 3.2634e+04     11.6002   26   1.00
c   14 2.2022e+05 1.9014e+04     11.5819 1001   1.00
c    6 1.1904e+05 1.0350e+04     11.5011   64   1.00
c   14 2.2220e+05 1.9733e+04     11.2604  101   2.00
c   14 1.9008e+05 1.8469e+04     10.2921   27   1.00
c   17 3.6360e+05 3.9716e+04      9.1550  101   2.00
c   17 3.1815e+05 3.4837e+04      9.1325  101   1.00
c   17 2.8080e+05 3.1622e+04      8.8799   15   1.00
c   11 1.1000e+05 1.3369e+04      8.2278 1001   1.00
c    4 1.6800e+05 2.0526e+04      8.1848  101   2.00
c   19 2.3634e+05 3.4032e+04      6.9447  101   1.00
c   19 2.7876e+05 4.0279e+04      6.9208  101   2.00
c    6 1.3440e+05 2.0156e+04      6.6679   32   2.00
c    2 1.6192e+05 2.5497e+04      6.3506   15   1.00
c   19 2.0160e+05 3.2335e+04      6.2347   15   1.00
c   11 1.2800e+05 2.0681e+04      6.1893  101   2.00
c   ------  -----   --------   --------- ---- ------ ---
c           FRAC. WEIGHTS =       0.5104
c           AVERAGE  RATE =      21.1033 Mega-Flops/Sec.
c           HARMONIC MEAN =      12.5546 Mega-Flops/Sec.
c           STANDARD DEV. =      16.5497 Mega-Flops/Sec.
c
c
c
c   KERNEL  FLOPS   MICROSEC   MFLOP/SEC SPAN WEIGHT SUM
c   ------  -----   --------   --------- ---- ------ ---
c    5 2.0000e+05 3.4047e+04      5.8743 1001   1.00
c    5 2.2000e+05 3.8241e+04      5.7530  101   2.00
c    5 1.6640e+05 3.0553e+04      5.4464   27   1.00
c   15 1.8480e+05 3.9268e+04      4.7061   15   1.00
c   15 1.6500e+05 3.5446e+04      4.6550  101   1.00
c   15 3.3000e+05 7.0946e+04      4.6514  101   2.00
c   13 1.6128e+05 3.7656e+04      4.2830   64   1.00
c   13 1.8368e+05 4.3043e+04      4.2673   32   2.00
c   13 1.3888e+05 3.3680e+04      4.1235    8   1.00
c   11 9.5680e+04 2.5858e+04      3.7002   27   1.00
c   16 1.2320e+05 3.4778e+04      3.5425   15   1.00
c   16 1.3250e+05 3.9362e+04      3.3662   75   1.00
c   16 1.5120e+05 4.5291e+04      3.3384   40   2.00
c    4 9.1200e+04 4.1650e+04      2.1897   27   1.00
c    6 8.0640e+04 4.8809e+04      1.6522    8   1.00
c   24 5.0000e+04 3.1588e+04      1.5829 1001   1.00
c   24 6.2000e+04 3.9644e+04      1.5639  101   2.00
c   24 4.7840e+04 3.1525e+04      1.5175   27   1.00
c   ------  -----   --------   --------- ---- ------ ---
c           FRAC. WEIGHTS =       0.2396
c           AVERAGE  RATE =       3.7299 Mega-Flops/Sec.
c           HARMONIC MEAN =       3.0328 Mega-Flops/Sec.
c           STANDARD DEV. =       1.4192 Mega-Flops/Sec.
c
c
c
c
c
c
c
c
c
c
c
c
c
c
c
c  1
c
c                      SENSITIVITY ANALYSIS
c
c
c           The sensitivity of the harmonic mean rate (Mflops)
c           to various weightings is shown in the table below.
c           Seven work distributions are generated by assigning
c           two distinct weights to ranked kernels by quartiles.
c           Forty nine possible cpu workloads are then evaluated
c           using seven sets of values for the total weights:
c
c
c               ------ ------ ------ ------ ------ ------ ------
c     1st QT:      O      O      O      O      O      X      X
c     2nd QT:      O      O      O      X      X      X      O
c     3rd QT:      O      X      X      X      O      O      O
c     4th QT:      X      X      O      O      O      O      O
c               ------ ------ ------ ------ ------ ------ ------
c     Total
c     Weights                    Net Mflops:
c      X    O
c    ---- ----
c
c    1.00 0.00    2.99   4.43   8.54  12.72  24.95  39.41  93.75
c
c    0.95 0.05    3.12   4.63   8.50  12.00  21.84  28.25  54.56
c
c    0.90 0.10    3.26   4.86   8.45  11.36  19.42  22.02  38.47
c
c    0.80 0.20    3.59   5.38   8.37  10.27  15.90  15.27  24.20
c
c    0.70 0.30    3.98   6.04   8.30   9.36  13.46  11.69  17.65
c
c    0.60 0.40    4.48   6.87   8.22   8.61  11.67   9.47  13.90
c
c    0.50 0.50    5.12   7.96   8.14   7.96  10.30   7.96  11.46
c    ---- ----
c               ------ ------ ------ ------ ------ ------ ------
c
c
c
c
c SENSITIVITY OF NET MFLOPS RATE TO USE OF OPTIMAL FORTRAN CODE(SISD/SIMD MODEL
c
c      4.55   5.62   7.34  10.61  13.64   19.11   31.87   47.86   96.01
c
c      0.00   0.20   0.40   0.60   0.70    0.80    0.90    0.95    1.00
c      Fraction Of Operations Run At Optimal Fortran Rates
c
c
c
c
c
c
c
c
c
c
c
c  Quadruple precision checksums computed by Dr. D.S. Lindsey, NAS:
c
c  CUMULATIVE CHECKSUMS:  RUN=    1
c
c  K             VL=  471                 90                 19
c  1 .5114652693224705102Q+05 .5253344778938000681Q+03 .3855104502494983491Q+02
c  2 .5150345372943066022Q+03 .5150345372943066022Q+03 .1199847611437483513Q+02
c  3 .1000742883066623145Q+02 .1009741436579188086Q+01 .2699309089321296439Q+00
c  4 .5999250595474070357Q+00 .5999250595474070357Q+00 .5999250595474070357Q+00
c  5 .4548871642388544199Q+04 .4589031939602131581Q+02 .3182615248448271678Q+01
c  6 .5229095383954675635Q+13 .2693280957416549457Q+16 .8303480073326955433Q+12
c  7 .6104251075163778121Q+05 .6345586315772524401Q+03 .2845720217638848365Q+02
c  8 .1501268005627157186Q+06 .1501268005627157186Q+06 .2960543667877649943Q+04
c  9 .1189443609975085966Q+06 .1189443609975085966Q+06 .2623968460874419268Q+04
c 10 .7310369784325972183Q+05 .7310369784325972183Q+05 .1651291227698377392Q+04
c 11 .3342910984950433340Q+08 .3433560531581074341Q+05 .6551162198437113656Q+03
c 12 .2907141428639174056Q-04 .7127569144561925151Q-05 .1943435981776804808Q-05
c 13 .4057110454105263471Q+10 .2325318944820836005Q+10 .4755211251524563699Q+09
c 14 .2982036205992255154Q+10 .3045676741897511424Q+08 .2547733008933910800Q+07
c 15 .3943816690352311804Q+05 .3943816690352311804Q+05 .1108997288135066584Q+04
c 16 .2832600000000000000Q+05 .3244100000000000000Q+05 .2577600000000000000Q+05
c 17 .1114641772903091760Q+04 .1114641772903091760Q+04 .2947368618590713935Q+02
c 18 .5165625410757306606Q+05 .5165625410757306606Q+05 .9700646212341513210Q+03
c 19 .5421816960150398899Q+03 .5421816960150398899Q+03 .1268230698051747067Q+02
c 20 .3040644339317275409Q+08 .3126205178811007613Q+05 .5987713249471801461Q+03
c 21 .8289464835786202431Q+07 .3986531136462291709Q+07 .2516870081042209239Q+07
c 22 .2938604376567099667Q+03 .2938604376567099667Q+03 .6109968728264795136Q+01
c 23 .3549834542446150511Q+05 .3549894609776936556Q+05 .4850340602751675804Q+03
c 24 .5000000000000000000Q+03 .5000000000000000000Q+02 .1300000000000000000Q+02
c
c
c  CUMULATIVE CHECKSUMS:  RUN=    2
c
c  K             VL=  471                 90                 19
c  1 .1022930538644941020Q+06 .1050668955787600136Q+04 .7710209004989966983Q+02
c  2 .1030069074588613204Q+04 .1030069074588613204Q+04 .2399695222874967026Q+02
c  3 .2001485766133246290Q+02 .2019482873158376173Q+01 .5398618178642592878Q+00
c  4 .1199850119094814071Q+01 .1199850119094814071Q+01 .1199850119094814071Q+01
c  5 .9097743284777088398Q+04 .9178063879204263162Q+02 .6365230496896543357Q+01
c  6 .1045819076790935127Q+14 .5386561914833098914Q+16 .1660696014665391086Q+13
c  7 .1220850215032755624Q+06 .1269117263154504880Q+04 .5691440435277696731Q+02
c  8 .3002536011254314373Q+06 .3002536011254314373Q+06 .5921087335755299887Q+04
c  9 .2378887219950171932Q+06 .2378887219950171932Q+06 .5247936921748838536Q+04
c 10 .1462073956865194436Q+06 .1462073956865194436Q+06 .3302582455396754785Q+04
c 11 .6685821969900866681Q+08 .6867121063162148682Q+05 .1310232439687422731Q+04
c 12 .5814282857278348113Q-04 .1425513828912385030Q-04 .3886871963553609616Q-05
c 13 .8114220908210526943Q+10 .4650637889641672010Q+10 .9510422503049127399Q+09
c 14 .6118901630090488734Q+10 .5718526521576222224Q+08 .5131714230651644153Q+07
c 15 .7887633380704623608Q+05 .7887633380704623608Q+05 .2217994576270133168Q+04
c 16 .5665200000000000000Q+05 .6488200000000000000Q+05 .5155200000000000000Q+05
c 17 .2229283545806183520Q+04 .2229283545806183520Q+04 .5894737237181427870Q+02
c 18 .1033125082151461321Q+06 .1033125082151461321Q+06 .1940129242468302642Q+04
c 19 .1084363392030079779Q+04 .1084363392030079779Q+04 .2536461396103494135Q+02
c 20 .6081288678634550819Q+08 .6252410357622015226Q+05 .1197542649894360292Q+04
c 21 .1657892967157240486Q+08 .7973062272924583418Q+07 .5033740162084418479Q+07
c 22 .5877208753134199335Q+03 .5877208753134199335Q+03 .1221993745652959027Q+02
c 23 .7099669084892301023Q+05 .7099789219553873113Q+05 .9700681205503351609Q+03
c 24 .1000000000000000000Q+04 .1000000000000000000Q+03 .2600000000000000000Q+02
c
c
c  CUMULATIVE CHECKSUMS:  RUN=    3
c
c  K             VL=  471                 90                 19
c  1 .1534395807967411530Q+06 .1576003433681400204Q+04 .1156531350748495047Q+03
c  2 .1545103611882919806Q+04 .1545103611882919806Q+04 .3599542834312450539Q+02
c  3 .3002228649199869435Q+02 .3029224309737564260Q+01 .8097927267963889317Q+00
c  4 .1799775178642221107Q+01 .1799775178642221107Q+01 .1799775178642221107Q+01
c  5 .1364661492716563259Q+05 .1376709581880639474Q+03 .9547845745344815036Q+01
c  6 .1568728615186402690Q+14 .8079842872249648371Q+16 .2491044021998086629Q+13
c  7 .1831275322549133436Q+06 .1903675894731757320Q+04 .8537160652916545097Q+02
c  8 .4503804016881471560Q+06 .4503804016881471560Q+06 .8881631003632949831Q+04
c  9 .3568330829925257898Q+06 .3568330829925257898Q+06 .7871905382623257805Q+04
c 10 .2193110935297791654Q+06 .2193110935297791654Q+06 .4953873683095132178Q+04
c 11 .1002873295485130002Q+09 .1030068159474322302Q+06 .1965348659531134097Q+04
c 12 .8721424285917522169Q-04 .2138270743368577545Q-04 .5830307945330414425Q-05
c 13 .1217133136231579041Q+11 .6975956834462508016Q+10 .1426563375457369109Q+10
c 14 .9103526877478772509Q+10 .8885029941358330359Q+08 .7946120246231201651Q+07
c 15 .1183145007105693541Q+06 .1183145007105693541Q+06 .3326991864405199752Q+04
c 16 .8497800000000000000Q+05 .9732300000000000000Q+05 .7732800000000000000Q+05
c 17 .3343925318709275281Q+04 .3343925318709275281Q+04 .8842105855772141805Q+02
c 18 .1549687623227191981Q+06 .1549687623227191981Q+06 .2910193863702453963Q+04
c 19 .1626545088045119669Q+04 .1626545088045119669Q+04 .3804692094155241203Q+02
c 20 .9121933017951826228Q+08 .9378615536433022839Q+05 .1796313974841540438Q+04
c 21 .2486839450735860729Q+08 .1195959340938687512Q+08 .7550610243126627718Q+07
c 22 .8815813129701299003Q+03 .8815813129701299003Q+03 .1832990618479438540Q+02
c 23 .1064950362733845153Q+06 .1064968382933080966Q+06 .1455102180825502741Q+04
c 24 .1500000000000000000Q+04 .1500000000000000000Q+03 .3900000000000000000Q+02
c
c
c  CUMULATIVE CHECKSUMS:  RUN=    4
c
c  K             VL=  471                 90                 19
c  1 .2045861077289882040Q+06 .2101337911575200272Q+04 .1542041800997993396Q+03
c  2 .2060138149177226409Q+04 .2060138149177226409Q+04 .4799390445749934052Q+02
c  3 .4002971532266492580Q+02 .4038965746316752347Q+01 .1079723635728518575Q+01
c  4 .2399700238189628143Q+01 .2399700238189628143Q+01 .2399700238189628143Q+01
c  5 .1819548656955417679Q+05 .1835612775840852632Q+03 .1273046099379308671Q+02
c  6 .2091638153581870254Q+14 .1077312382966619782Q+17 .3321392029330782173Q+13
c  7 .2441700430065511248Q+06 .2538234526309009760Q+04 .1138288087055539346Q+03
c  8 .6005072022508628747Q+06 .6005072022508628747Q+06 .1184217467151059977Q+05
c  9 .4757774439900343865Q+06 .4757774439900343865Q+06 .1049587384349767707Q+05
c 10 .2924147913730388873Q+06 .2924147913730388873Q+06 .6605164910793509571Q+04
c 11 .1337164393980173336Q+09 .1373424212632429736Q+06 .2620464879374845462Q+04
c 12 .1162856571455669622Q-03 .2851027657824770060Q-04 .7773743927107219233Q-05
c 13 .1622844181642105388Q+11 .9301275779283344021Q+10 .1902084500609825479Q+10
c 14 .1215176334476067437Q+11 .1174925822726987451Q+09 .1008019578807808401Q+08
c 15 .1577526676140924721Q+06 .1577526676140924721Q+06 .4435989152540266336Q+04
c 16 .1133040000000000000Q+06 .1297640000000000000Q+06 .1031040000000000000Q+06
c 17 .4458567091612367041Q+04 .4458567091612367041Q+04 .1178947447436285574Q+03
c 18 .2066250164302922642Q+06 .2066250164302922642Q+06 .3880258484936605284Q+04
c 19 .2168726784060159559Q+04 .2168726784060159559Q+04 .5072922792206988271Q+02
c 20 .1216257735726910163Q+09 .1250482071524403045Q+06 .2395085299788720584Q+04
c 21 .3315785934314480972Q+08 .1594612454584916683Q+08 .1006748032416883695Q+08
c 22 .1175441750626839867Q+04 .1175441750626839867Q+04 .2443987491305918054Q+02
c 23 .1419933816978460204Q+06 .1419957843910774622Q+06 .1940136241100670321Q+04
c 24 .2000000000000000000Q+04 .2000000000000000000Q+03 .5200000000000000000Q+02
c
c
c
c
c  CUMULATIVE CHECKSUMS:  RUN=    5
c
c  K             VL=  471                 90                 19
c  1 .2557326346612352551Q+06 .2626672389469000340Q+04 .1927552251247491745Q+03
c  2 .2575172686471533011Q+04 .2575172686471533011Q+04 .5999238057187417565Q+02
c  3 .5003714415333115725Q+02 .5048707182895940434Q+01 .1349654544660648219Q+01
c  4 .2999625297737035178Q+01 .2999625297737035178Q+01 .2999625297737035178Q+01
c  5 .2274435821194272099Q+05 .2294515969801065790Q+03 .1591307624224135839Q+02
c  6 .2614547691977337817Q+14 .1346640478708274728Q+17 .4151740036663477716Q+13
c  7 .3052125537581889060Q+06 .3172793157886262200Q+04 .1422860108819424182Q+03
c  8 .7506340028135785934Q+06 .7506340028135785934Q+06 .1480271833938824971Q+05
c  9 .5947218049875429831Q+06 .5947218049875429831Q+06 .1311984230437209634Q+05
c 10 .3655184892162986091Q+06 .3655184892162986091Q+06 .8256456138491886963Q+04
c 11 .1671455492475216670Q+09 .1716780265790537170Q+06 .3275581099218556828Q+04
c 12 .1453570714319587028Q-03 .3563784572280962575Q-04 .9717179908884024042Q-05
c 13 .2028555227052631735Q+11 .1162659472410418002Q+11 .2377605625762281849Q+10
c 14 .1519764492169999253Q+11 .1501582054695641385Q+09 .1269997234773526700Q+08
c 15 .1971908345176155902Q+06 .1971908345176155902Q+06 .5544986440675332920Q+04
c 16 .1416300000000000000Q+06 .1622050000000000000Q+06 .1288800000000000000Q+06
c 17 .5573208864515458802Q+04 .5573208864515458802Q+04 .1473684309295356967Q+03
c 18 .2582812705378653303Q+06 .2582812705378653303Q+06 .4850323106170756605Q+04
c 19 .2710908480075199449Q+04 .2710908480075199449Q+04 .6341153490258735339Q+02
c 20 .1520322169658637704Q+09 .1563102589405503806Q+06 .2993856624735900730Q+04
c 21 .4144732417893101215Q+08 .1993265568231145854Q+08 .1258435040521104619Q+08
c 22 .1469302188283549833Q+04 .1469302188283549833Q+04 .3054984364132397568Q+02
c 23 .1774917271223075255Q+06 .1774947304888468278Q+06 .2425170301375837902Q+04
c 24 .2500000000000000000Q+04 .2500000000000000000Q+03 .6500000000000000000Q+02
c
c
c  CUMULATIVE CHECKSUMS:  RUN=    6
c
c  K             VL=  471                 90                 19
c  1 .3068791615934823061Q+06 .3152006867362800409Q+04 .2313062701496990095Q+03
c  2 .3090207223765839613Q+04 .3090207223765839613Q+04 .7199085668624901078Q+02
c  3 .6004457298399738870Q+02 .6058448619475128521Q+01 .1619585453592777863Q+01
c  4 .3599550357284442214Q+01 .3599550357284442214Q+01 .3599550357284442214Q+01
c  5 .2729322985433126519Q+05 .2753419163761278948Q+03 .1909569149068963007Q+02
c  6 .3137457230372805381Q+14 .1615968574449929674Q+17 .4982088043996173259Q+13
c  7 .3662550645098266873Q+06 .3807351789463514640Q+04 .1707432130583309019Q+03
c  8 .9007608033762943120Q+06 .9007608033762943120Q+06 .1776326200726589966Q+05
c  9 .7136661659850515797Q+06 .7136661659850515797Q+06 .1574381076524651561Q+05
c 10 .4386221870595583309Q+06 .4386221870595583309Q+06 .9907747366190264356Q+04
c 11 .2005746590970260004Q+09 .2060136318948644604Q+06 .3930697319062268194Q+04
c 12 .1744284857183504433Q-03 .4276541486737155090Q-04 .1166061589066082885Q-04
c 13 .2434266272463158082Q+11 .1395191366892501603Q+11 .2853126750914738219Q+10
c 14 .1820312504465359131Q+11 .1819691693283694618Q+09 .1504905863862026021Q+08
c 15 .2366290014211387082Q+06 .2366290014211387082Q+06 .6653983728810399504Q+04
c 16 .1699560000000000000Q+06 .1946460000000000000Q+06 .1546560000000000000Q+06
c 17 .6687850637418550562Q+04 .6687850637418550562Q+04 .1768421171154428361Q+03
c 18 .3099375246454383963Q+06 .3099375246454383963Q+06 .5820387727404907926Q+04
c 19 .3253090176090239339Q+04 .3253090176090239339Q+04 .7609384188310482407Q+02
c 20 .1824386603590365245Q+09 .1875723107286604567Q+06 .3592627949683080876Q+04
c 21 .4973678901471721458Q+08 .2391918681877375025Q+08 .1510122048625325543Q+08
c 22 .1763162625940259800Q+04 .1763162625940259800Q+04 .3665981236958877081Q+02
c 23 .2129900725467690306Q+06 .2129936765866161933Q+06 .2910204361651005482Q+04
c 24 .3000000000000000000Q+04 .3000000000000000000Q+03 .7800000000000000000Q+02
c
c
c
c  CUMULATIVE CHECKSUMS:  RUN=    7
c
c  K             VL=  471                 90                 19
c  1 .3580256885257293571Q+06 .3677341345256600477Q+04 .2698573151746488444Q+03
c  2 .3605241761060146215Q+04 .3605241761060146215Q+04 .8398933280062384591Q+02
c  3 .7005200181466362015Q+02 .7068190056054316608Q+01 .1889516362524907507Q+01
c  4 .4199475416831849250Q+01 .4199475416831849250Q+01 .4199475416831849250Q+01
c  5 .3184210149671980939Q+05 .3212322357721492106Q+03 .2227830673913790175Q+02
c  6 .3660366768768272944Q+14 .1885296670191584620Q+17 .5812436051328868803Q+13
c  7 .4272975752614644685Q+06 .4441910421040767080Q+04 .1992004152347193856Q+03
c  8 .1050887603939010030Q+07 .1050887603939010030Q+07 .2072380567514354960Q+05
c  9 .8326105269825601763Q+06 .8326105269825601763Q+06 .1836777922612093487Q+05
c 10 .5117258849028180528Q+06 .5117258849028180528Q+06 .1155903859388864174Q+05
c 11 .2340037689465303338Q+09 .2403492372106752038Q+06 .4585813538905979559Q+04
c 12 .2034999000047421839Q-03 .4989298401193347605Q-04 .1360405187243763365Q-04
c 13 .2839977317873684430Q+11 .1627723261374585203Q+11 .3328647876067194589Q+10
c 14 .2116750694993432503Q+11 .2130649341195080691Q+09 .1721399839433381833Q+08
c 15 .2760671683246618263Q+06 .2760671683246618263Q+06 .7762981016945466088Q+04
c 16 .1982820000000000000Q+06 .2270870000000000000Q+06 .1804320000000000000Q+06
c 17 .7802492410321642323Q+04 .7802492410321642323Q+04 .2063158033013499754Q+03
c 18 .3615937787530114624Q+06 .3615937787530114624Q+06 .6790452348639059247Q+04
c 19 .3795271872105279229Q+04 .3795271872105279229Q+04 .8877614886362229475Q+02
c 20 .2128451037522092786Q+09 .2188343625167705329Q+06 .4191399274630261022Q+04
c 21 .5802625385050341702Q+08 .2790571795523604196Q+08 .1761809056729546467Q+08
c 22 .2057023063596969767Q+04 .2057023063596969767Q+04 .4276978109785356595Q+02
c 23 .2484884179712305358Q+06 .2484926226843855589Q+06 .3395238421926173063Q+04
c 24 .3500000000000000000Q+04 .3500000000000000000Q+03 .9100000000000000000Q+02
c
c
c
c
c
c
c   CHECK CLOCK CALIBRATION:
c   Total cpu Time =    1.62376e+01 Sec.
c
C                                                    F.H.MCMAHON  1986
C**********************************************************************
c
c
C
C
C
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
      DOUBLE PRECISION  sum
C
C/      PARAMETER( kn= 47, kn2= 95, np= 3, ls= 3*47, krs= 24)
C/      PARAMETER( nk= 47, nl= 3, nr= 8 )
C
      COMMON /ALPHA/ mk,ik,ml,il,Nruns,jr, NPFS(8,3,47)
      COMMON /BETA / tic, TIMES(8,3,47), SEE(5,3,8,3),
     1              TERRS(8,3,47), CSUMS(8,3,47),
     2              FOPN(8,3,47), DOS(8,3,47)
C
      COMMON /SPACE0/ TIME(47), CSUM(47), WW(47), WT(47), ticks,
     1                FR(9), TERR1(47), SUMW(7), START,
     2              SKALE(47), BIAS(47), WS(95), TOTAL(47), FLOPN(47),
     3                IQ(7), NPF, NPFS1(47)
C
      COMMON /SPACEI/ WTP(3), MUL(3), ISPAN(47,3), IPASS(47,3)
C
      DIMENSION  FLOPS(141), TR(141), RATES(141)
      DIMENSION  LSPAN(141), WG(141), OSUM (141), ID(141)
      DIMENSION  HM(12), N1(10), N2(10), LVL(10)
      DIMENSION  LQ(5), STAT1(20), STAT2(20)
      DIMENSION  IN(141), CSUM1(141)
      DIMENSION  MAP1(141), MAP2(141), IN2(141), VL1(141)
      DIMENSION  MAP(141), VL(141), WL(141), TV(141), TV1(141), TV2(141)
      DIMENSION  FLOPS1(141), RT1(141), ISPAN1(141), WT1(141)
      DIMENSION  FLOPS2(141), RT2(141), ISPAN2(141), WT2(141)
      SAVE       Komput, Kontrl, Kompil
C
C     The Fortran-77 Character variables Komput, Kontrl, Kompil, Tag  are
C     used in Format statements  7007, 7057, 7008, 7341, 7342, 7343, 7344
C
      CHARACTER  Komput*24, Kontrl*24, Kompil*24                         f77
C
      MODI(i,M)=  (MOD(i,M) + M*( 1 - MIN(1,MOD(i,M))))
C
      DATA       Komput   /'VAX-11/780  w/FPA  '/
c     DATA       Komput   /'VAX-11/780 wo/FPA  '/
c     DATA       Komput   /'CRAY-1                  '/                   f77
c      DATA       Komput   /'CRAY-XMP                '/                   f77
c     DATA       Komput   / 8HCRAY-XMP/                                  f66
c     DATA       Komput   / 8HCRAY-1  /                                  f66
C
      DATA       Kontrl   /'VMS  4.5 '/
c     DATA       Kontrl   /'UNIX 4.2 '/
c      DATA       Kontrl   /'CTSS 10e  fully loaded  '/                   f77
c     DATA       Kontrl   / 8HCTSS 10e/                                  f66
C
      DATA       Kompil   /'VAX/Fortran-77 V4.0     '/
c     DATA       Kompil   /' 4.2bsd UNIX F77        '/
c     DATA       Kompil   /'LCC CC 1.05             '/                   f77
c      DATA       Kompil   /'CFT 1.14                '/                   f77
c     DATA       Kompil   /'CIVIC 30i               '/                   f77
c     DATA       Kompil   / 8HCFT-1.14/                                  f66
c     DATA       Kompil   / 8HCIVIC30i/                                  f66
C
C
          IF( iou.LT.0) RETURN
C
            fuzz= 1.0e-9
       DO 1000 k= 1,ntk
           VL(k)= LSPAN(k)
 1000  CONTINUE
C
              bl= 1.0D-5
              bu= 1.0D+4
            CALL  VALID( TV,MAP,neff,  bl, RATES, bu, ntk)
C
C      Compress valid data sets mapping on MAP.
C
              nd= 0
        DO  1  k= 1,neff
         MAP1(k)=  MODI( MAP(k),nek)
       FLOPS1(k)= FLOPS( MAP(k))
          RT1(k)=    TR( MAP(k))
          VL1(k)=    VL( MAP(k))
       ISPAN1(k)= LSPAN( MAP(k))
          WT1(k)=    WG( MAP(k))
          TV1(k)= RATES( MAP(k))
        CSUM1(k)=  OSUM( MAP(k))
              nd=    ID( MAP(k)) + nd
    1  continue
C
            CALL  STATW( STAT1,TV,IN, VL1,WT1,neff)
              LV= STAT1(1)
C
            CALL  STATW( STAT1,TV,IN, TV1,WT1,neff)
             twt= STAT1(6)
C
          WRITE ( iou,7001)
          WRITE ( iou,7001)
          WRITE ( iou,7001)
          WRITE ( iou,7001)
          WRITE ( iou,7001)
          WRITE ( iou,7001)
       CALL PAGE( iou)
          WRITE ( iou,7002)
          WRITE ( iou,7003)
          WRITE ( iou,7002)
          WRITE ( iou,7007)  Komput
          WRITE ( iou,7057)  Kontrl
          WRITE ( iou,7008)  Kompil
          WRITE ( iou,7009)  LV
          WRITE ( iou,7061)
          WRITE ( iou,7062)
          WRITE ( iou,7063)
          WRITE ( iou,7064)
          WRITE ( iou,7065)
          WRITE ( iou,7066)
          WRITE ( iou,7067)
          WRITE ( iou,7001)
          WRITE ( iou,7004)
          WRITE ( iou,7005)
          WRITE ( iou,7011) (MAP1(k),  FLOPS1(k), RT1(k), TV1(k),
     .                    ISPAN1(k), WT1(k), CSUM1(k), ID(k), k=1,neff)
          WRITE ( iou,7005)
C
          WRITE ( iou,7022)  nd
          WRITE ( iou,7041)  STAT1( 4)
          WRITE ( iou,7033)  STAT1( 1)
          WRITE ( iou,7043)  STAT1(10)
          WRITE ( iou,7030)  STAT1( 7)
          WRITE ( iou,7055)  STAT1( 5)
          WRITE ( iou,7042)  STAT1( 3)
          WRITE ( iou,7001)
          WRITE ( iou,7044)  STAT1( 2)
          WRITE ( iou,7031)  STAT1( 9)
C
 7001 FORMAT(/)
 7002 FORMAT(  45H ******************************************** )
 7003 FORMAT(  45H THE LIVERMORE  FORTRAN KERNELS:  M F L O P S  )
 7004 FORMAT(/,53H KERNEL  FLOPS   MICROSEC   MFLOP/SEC SPAN WEIGHT  CH,
     X19HECK-SUMS         OK )
 7005 FORMAT(  53H ------  -----   --------   --------- ---- ------  --,
     X19H---------------- -- )
 7007 FORMAT(/,9X,16H     Computer :  ,A )                               f77
 7057 FORMAT(  9X,16H     System   :  ,A )                               f77
 7008 FORMAT(  9X,16H     Compiler :  ,A )                               f77
c7007 FORMAT(/,9X,16H     Computer :  ,A8)                               f66
c7057 FORMAT(  9X,16H     System   :  ,A8)                               f66
c7008 FORMAT(  9X,16H     Compiler :  ,A8)                               f66
 7009 FORMAT(/,9X,16HMean  DO Span =  ,I5,/)
 7011 FORMAT(1X,i2,E11.4,E11.4,F12.4,1X,I4,1X,F6.2,E20.12,1X,I2)
 7012 FORMAT(1X,i2,E11.4,E11.4,F12.4,1X,I4,1X,F6.2)
 7022 FORMAT(  9X,16HMFLOPS   Range :  ,43X,I4)
 7041 FORMAT(/,9X,16HMaximum   Rate =  ,F12.4,16H Mega-Flops/Sec. )
 7033 FORMAT(  9X,16HAverage   Rate =  ,F12.4,16H Mega-Flops/Sec. )
 7043 FORMAT(  9X,16HGeometric Mean =  ,F12.4,16H Mega-Flops/Sec. )
 7030 FORMAT(  9X,16HMedian    Rate =  ,F12.4,16H Mega-Flops/Sec. )
 7055 FORMAT(  9X,16HHarmonic  Mean =  ,F12.4,16H Mega-Flops/Sec. )
 7042 FORMAT(  9X,16HMinimum   Rate =  ,F12.4,16H Mega-Flops/Sec. )
 7044 FORMAT(  9X,16HStandard  Dev. =  ,F12.4,16H Mega-Flops/Sec. )
 7031 FORMAT(  9X,16HMedian    Dev. =  ,F12.4,16H Mega-Flops/Sec. )
 7053 FORMAT(/,9X,16HFrac.  Weights =  ,F12.4)
 7104 FORMAT(/,50H KERNEL  FLOPS   MICROSEC   MFLOP/SEC SPAN WEIGHT   )
 7105 FORMAT(  50H ------  -----   --------   --------- ---- ------   )
C
 7061 FORMAT(9X,52HWhen the computer performance range is very large   )
 7062 FORMAT(9X,52Hthe net Mflops rate of many Fortran programs and    )
 7063 FORMAT(9X,52Hworkloads will be in the sub-range between the equi-)
 7064 FORMAT(9X,52Hweighted harmonic and arithmetic means depending    )
 7065 FORMAT(9X,52Hon the degree of code parallelism and optimization. )
 7066 FORMAT(9X,52HMore accurate estimates of cpu workload rates depend)
 7067 FORMAT(9X,52Hon assigning appropriate weights for each kernel.   )
C
      IF( ntk .NE. nek )  THEN
C
       CALL PAGE( iou)
          WRITE ( iou,7070)
 7070 FORMAT(//,50H TOP QUARTILE: BEST ARCHITECTURE/APPLICATION MATCH )
C
C      Compute compression index-list MAP1:  Non-zero weights.
C
              bl= 1.0D-6
              bu= 1.0D+6
            CALL  VALID( TV,MAP1,meff,  bl, WT1, bu, neff)
C
C      Re-order data sets mapping on IN (descending order of MFlops).
C
        DO  2  k= 1,meff
               i=     IN( MAP1(k))
        FLOPS2(k)=  FLOPS1(i)
          RT2(k)=    RT1(i)
       ISPAN2(k)= ISPAN1(i)
          WT2(k)=    WT1(i)
          TV2(k)=    TV1(i)
         MAP2(k)=   MODI( MAP(i),nek)
    2  continue
C
              nq= meff/4
              lo= meff -4*nq
           LQ(1)= nq
           LQ(2)= nq + nq + lo
           LQ(3)= nq
              i2= 0
C
         DO 5  j= 1,3
              i1= i2 + 1
              i2= i2 + LQ(j)
              ll= i2 - i1 + 1
            CALL  STATW( STAT2,TV,IN2, TV2(i1),WT2(i1),ll)
            frac= STAT2(6)/( twt +fuzz)
           WL(j)= STAT2(5)
C
          WRITE ( iou,7001)
          WRITE ( iou,7104)
          WRITE ( iou,7105)
          WRITE ( iou,7012) ( MAP2(k),  FLOPS2(k), RT2(k), TV2(k),
     .                         ISPAN2(k), WT2(k),  k=i1,i2 )
          WRITE ( iou,7105)
C
          WRITE ( iou,7053)  frac
          WRITE ( iou,7033)  STAT2(1)
          WRITE ( iou,7055)  STAT2(5)
          WRITE ( iou,7044)  STAT2(2)
    5 continue
      ENDIF
C
C
C           Sensitivity analysis of harmonic mean rate to 49 workloads
C
      CALL  SENSIT(   iou,RATES,WG,IQ,SUMW, MAP,TV,TV1,TV2,RT1, ntk)
C
C
C           Sensitivity analysis of harmonic mean rate to SISD/SIMD model
C
      CALL  SIMD( HM, iou,RATES,WG,FR,9,    MAP,TV,TV1,TV2, ntk)
C
C
      IF( ntk .NE. nek )  THEN
               CALL  PAGE( iou)
               mrl= Nruns
                IF( Nruns.gt.8) mrl= 8
C
      DO  8      k= 1,mk
      DO  8      j= 1,ml
               sum= 0.
      DO  8      i= 1,mrl
               sum= sum + CSUMS(i,j,k)
      CSUMS(i,j,k)= sum
    8 continue
C
      DO  10     i= 1,mrl
                IF( (i.NE.1).AND.(i.NE.mrl))  GO TO 10
             WRITE( iou,76) i
             WRITE( iou,77)  ( LVL(j), j= 1,3 )
   76       FORMAT( //,29h  Cumulative Checksums:  RUN=,i5)
   77       FORMAT( /,10h  k    VL=,i5,3i19)
C
      DO  9      k= 1,mk
             WRITE( iou,78)  k, ( CSUMS(i,j,k), j= 1,3)
   78       FORMAT( 1x,i2,4e19.12)
    9 continue
   10 continue
C
      ENDIF
          LVL(il)= LV
      RETURN
C
      END
C**********************************************
      SUBROUTINE RESULT( iou,FLOPS,TR,RATES,LSPAN,WG,OSUM,TERR,ID)
C***********************************************************************
C                                                                      *
C     RESULT -  Computes timing Results into pushdown store.           *
C                                                                      *
C      iou   -  Input   IO unit number for print output                *
C     FLOPS  - Out.Ary  Number of Flops executed by each kernel        *
C     TR     - Out.Ary  Time of execution of each kernel(microsecs)    *
C     RATES  - Out.Ary  Rate of execution of each kernel(megaflops/sec)*
C     LSPAN  - Out.Ary  Span of inner DO loop in each kernel           *
C     WG     - Out.Ary  Weight assigned to each kernel for statistics  *
C     OSUM   - Out.Ary  Checksums of the results of each kernel        *
C                                                                      *
C***********************************************************************
C
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
      DOUBLE PRECISION  SUMS, cs
C
C/      PARAMETER( kn= 47, kn2= 95, np= 3, ls= 3*47, krs= 24)
C/      PARAMETER( nk= 47, nl= 3, nr= 8 )
C
      COMMON /ALPHA/ mk,ik,ml,il,Nruns,jr, NPFS(8,3,47)
      COMMON /BETA / tic, TIMES(8,3,47), SEE(5,3,8,3),
     1              TERRS(8,3,47), CSUMS(8,3,47),
     2              FOPN(8,3,47), DOS(8,3,47)
C
      DIMENSION  FLOPS(141), TR(141), RATES(141), ID(141)
      DIMENSION  LSPAN(141), WG(141), OSUM (141), TERR(141)
C
      COMMON /SPACE0/ TIME(47), CSUM(47), WW(47), WT(47), ticks,
     1                FR(9), TERR1(47), SUMW(7), START,
     2              SKALE(47), BIAS(47), WS(95), TOTAL(47), FLOPN(47),
     3                IQ(7), NPF, NPFS1(47)
C
      COMMON /SPACEI/ WTP(3), MUL(3), ISPAN(47,3), IPASS(47,3)
C
      COMMON /PROOF/  SUMS(24,3,2)
C
C
C
           CALL  TALLY( iou, 1 )
C
C                             Push Result Arrays Down before entering new resul
           limit= 141 - mk
              j = 141
      DO 1001 k = limit,1,-1
        FLOPS(j)= FLOPS(k)
           TR(j)=    TR(k)
        RATES(j)= RATES(k)
        LSPAN(j)= LSPAN(k)
           WG(j)=    WG(k)
         OSUM(j)=  OSUM(k)
         TERR(j)=  TERR(k)
           ID(j)=    ID(k)
              j = j - 1
 1001 CONTINUE
C
C                             CALCULATE MFLOPS FOR EACH KERNEL
            tmin= 5.0*tic
      DO 1010 k = 1,mk
        FLOPS(k)= FLOPN(k)*TOTAL(k)
           TR(k)=  TIME(k) * 1.0e+6
        RATES(k)= 0.0
              IF( TR(k).NE. 0.0)   RATES(k)= FLOPS(k)/TR(k)
              IF( WT(k).LE. 0.0)   RATES(k)= 0.0
              IF( TIME(k).LE.tmin) RATES(k)= 0.0
        LSPAN(k)= ISPAN(k,il)
           WG(k)= WT(k)*WTP(il)
         OSUM(k)= CSUM(k)
         TERR(k)= TERR1(k)
c
c                 compute relative error and digits of precision in CSUM
c
              cs= REAL( Nruns) * SUMS(k,il,1)
              IF( k .EQ. 14  )  cs= SUMS( Nruns,il,2)
                                re= ABS(  CSUM(k))
              IF( cs.NE. 0.0 )  re= ABS( (CSUM(k) - cs)/cs)
              IF((re.GT. 0.0 ) .AND. (re.LT. 1.0))  THEN
                  ID(k)= INT( ABS( ALOG10( re)) + 0.9999999 )
              ELSE
                  ID(k)= 16
              ENDIF
              IF(    re.GE. 1.0 )  ID(k)= 0
              IF( ID(k).GT. 16  )  ID(k)= 16
 1010 CONTINUE
C
      RETURN
C
      END
C
C
C**********************************************
      function   SECOND( OLDSEC)
C***********************************************
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
      REAL SECOND
C
C     SECOND= Cumulative CPU time for job in seconds.  MKS unit is seconds.
C             Clock resolution should be less than 2% of Kernel 12 run-time.
C             ONLY CPU time should be measured, NO system or I/O time included.
C             In VM systems, page-fault time must be avoided (Direction 4).
C             SECOND accuracy may be tested by calling the test program VERIFY.
C
C     If your system provides a timing routine that satisfies
C     the definition above; then simply delete this function.
C
C     Else this function must be programmed using some
C     timing routine available in your system.
C     Timing routines with CPU clock resolution are always sufficient.
C     Timing routines with microsec. resolution are usually sufficient.
C
C     Timing routines with much less resolution may require the use
C     of multiple-pass loops around each kernel to make the run time
C     at least 50 times the tick-period of the timing routine.
C     To reduce timing errors you can increase the value of Loop
C     in subroutine SIZES.
C
C     If no CPU timer is available, then you can time each kernel by
C     the wall clock using the PAUSE statement at the end of subr. VALUES.
C
C     Function TICK measures the overhead time for a call to SECOND.
C     An independent calibration of the running time of at least
C     one kernel may be wise.  See Subroutine VERIFY.
C
C
C     The following statement is deliberately incomplete:
C
c      SECOND=                                                            sdef
C
C******************************************************************************
C
C     The following statements were used on the DEC  VAX/780  VMS 3.0 .
C     Enable page-fault tallys in TEST by un-commenting LIB$STAT_TIMER calls.
C     Also set:   Loop= 10*Loop in Subroutine SIZES to run kernels long enough.
C
        DATA  INITIA   /123/
        IF(   INITIA.EQ.123 )  THEN
              INITIA= 1
              NSTAT = LIB$INIT_TIMER()
        ELSE
              NSTAT = LIB$STAT_TIMER(2,ISEC)
              SECOND= REAL(ISEC)*0.01 - OLDSEC
        ENDIF
C
C* OR less accurately:
C*        REAL    SECNDS
C*        SECOND= SECNDS( OLDSEC)
C
C******************************************************************************
C
C     The following statements were used on  UNIX 4.2bsd systems, e.g.  SUN
C     Also set:   Loop= 10*Loop in Subroutine SIZES to run kernels long enough.
C
C*       DIMENSION XTIME(2)                                               unix
C*       REAL*4 XTIME                                                     unix
C*       XT= ETIME( XTIME)                                                unix
C*       SECOND=  XTIME(1) - OLDSEC                                       unix
C
C******************************************************************************
C
C     The following statements were used on the DEC PDP-11/23 RT-11 system.
C     Also set:   Loop=100*Loop in Subroutine SIZES to run kernels long enough.
C
C*       DIMENSION JT(2)
C*       CALL GTIM(JT)
C*       TIME1 = JT(1)
C*       TIME2 = JT(2)
C*       TIME = TIME1 * 65768. + TIME2
C*       SECOND=TIME/60. - OLDSEC
C
C******************************************************************************
C
C     The following statements were used on the Hewlett-Packard HP 9000
C     Also set:   Loop= 10*Loop in Subroutine SIZES to run kernels long enough.
C
C*       INTEGER*4 ITIME(4)
C*       CALL TIMES( ITIME(4))
C*       TIMEX= ITIME(1) + ITIME(2) + ITIME(3) + ITIME(4)
C*       SECOND= TIMEX/60. - OLDSEC
C
C******************************************************************************
C
C     The following statement was used on the IBM 3090  MVS
C
c     CALL TOD( itime)
c     xtime = ( DFLOAT( itime)/64.0) * 1.0D-6
c     SECOND= xtime - oldsec
C
C
C******************************************************************************
C
C     FOR THE GOULD 32/87 WITH MPX 3.2  (et seq. gratis D.Lindsey)
C
C     INTEGER*4 NSEC, NCLICK
C     REAL*8 CPUTIM
C
C      CALL M:CLOCK (NSEC, NCLICK)
C      CPUTIM = FLOAT(NSEC)
C      SECOND = CPUTIM + FLOAT(NCLICK)/60.
C
C******************************************************************************
C
C  FOR THE HP 1000 RUNNING FORTRAN 77.
C  note that since the hp operating system has no facility for
C  returning cpu time, this routine only measures elapsed time.
C  therefore, the tests must be run stand-alone.
C
C     REAL*8 TOTIME
C     INTEGER*2 TIMEA(5)
C
C     CALL EXEC (11, TIMEA)
C     TOTIME = DBLE (TIMEA(1))/100.
C     TOTIME = TOTIME + DBLE (TIMEA(2))
C     TOTIME = TOTIME + DBLE (TIMEA(3)) * 60.
C     SECOND = TOTIME + DBLE (TIMEA(4)) * 3600.
C
C******************************************************************************
C
C  FOR THE IBM PC.
C  note that the pc's operating system has no facility for
C  returning cpu time; this routine only measures elapsed time.
C  also, the pc does not have real*8.  Remove all references to real*8
C
C      IMPLICIT INTEGER*4 (I-N)
C      LOGICAL FIRST
C      DATA FIRST /.TRUE./
C
C      CALL GETTIM (IYEAR, IMONTH, IDAY, IHOUR, IMIN, ISEC, IFRACT)
C
C  ifract is integer fractions of a second
C  in units of 1/32,768 seconds
C
C      IF (.NOT. FIRST) GO TO 10
C        FIRST = .FALSE.
C
C        LASTHR = IHOUR
C        BASETM = 0.
C10    CONTINUE
C
C  because of limited precision, do not include the time of day
C  in hours in the total time.  but correct for an hour change.
C
C      IF (LASTHR .EQ. IHOUR) GO TO 20
C        BASETM = BASETM + 3600.
C        LASTHR = IHOUR
C
C20    TOTIME = FLOAT(IMIN) * 60
C    . + FLOAT(ISEC)
C    . + FLOAT(IFRACT)/32768.
C      SECOND = TOTIM + BASETM
C
C******************************************************************************
C
C     FOR THE PR1ME SYSTEM UNDER PRIMOS
C
C     REAL*8 CPUTIM
C     INTEGER*2 TIMERS (28)
C
C     CALL TMDAT (TIMERS)
C     SECOND = DBLE (TIMERS(7))
C    .+ DBLE(TIMERS(8)) / DBLE(TIMERS(11))
C
C
C
C******************************************************************************
C
C     FOR IBM M V S    (Calls following MVSTIM assembly code)
C
C   the microsecond clock accessed by the routine MVSTIM counts down
C   from about 2 billion (2**31).  so when it gets below 1 minute,
C   reset it to prevent underflow.
C
C   a call to MVSTIM with an argument of 0 resets the clock;
C   any other argument causes the counted-down clock to be returned.
C
C     IMPLICIT REAL*8 (T)
C     LOGICAL FIRST
C     DATA FIRST /.TRUE./
C
C     IF (.NOT. FIRST) GO TO 5
C       L = 0
C       CALL MVSTIM (L)
C       FIRST = .FALSE.
C       TBASE = L
C       TBASE = TBASE/1.D6
C5     CONTINUE
C
C   always get current time (even if first entry)
C
C      CALL MVSTIM (L)
C      TEMP = L
C      TEMP = TEMP/1.D6
C      SECOND = TBASE - TEMP
C      IF (L .GT. 60000000) RETURN
C
C   here we reset the counter if less than one minute left.
C
C      L = 0
C      CALL MVSTIM (L)
C      TNEW = L
C     TNEW = TNEW/1.D6
C     TBASE = TBASE + TNEW - TEMP
C     RETURN
C     END
C
C for MVS systems, remove these 2 cards, move all the following
C assembler code 1 space left (to remove column 1 stuff), and assembl
CMVSTIM   CSECT
C         PRINT GEN
C         USING *,12
C********************************************************************
C* STANDARD SUBROUTINE LINKAGE FROM FORTRAN PROGRAM
C* GPR  1: ARGUMENT LIST ADDRESS FROM CALLER
C* GPR 13: CALLER'S SAVEAREA ADDRESS
C* GPR 14: ADDRESS IN CALLER FOR RETURN
C* GPR 15: ENTRY ADDRESS OF THIS SUBPROGRAM
C********************************************************************
C* REGISTER USAGE IN THIS PROGRAM...
C* GPR  0: RETURN OF ELAPSED MICROSECONDS TO FORTRAN
C* GPR  1: ARGUMENT LIST ADDRESS FROM CALLER
C/C* GPR  2: ADDRESS OF PARAMETER PASSED FROM FORTRAN
C* GPR  3: WORK REGISTER
C* GPR  4: WORK REGISTER
C* GPR  5: WORK REGISTER
C* GPR  6: NOT USED
C* GPR  7: NOT USED
C* GPR  8: NOT USED
C* GPR  9: NOT USED
C* GPR 10: NOT USED
C* GPR 11: NOT USED
C* GPR 12: BASE REGISTER FOR THIS PROGRAM
C* GPR 13: NOT USED
C* GPR 14: NOT USED
C* GPR 15: RETURN CODE TO FORTRAN IN CASE OF ERROR IN PROCESSING
C********************************************************************
C********************************************************************
C*                      REGISTER EQUATES
C********************************************************************
CR0       EQU   0
CR1       EQU   1
CR2       EQU   2
CR3       EQU   3
CR4       EQU   4
CR5       EQU   5
CR6       EQU   6
CR7       EQU   7
CR8       EQU   8
CR9       EQU   9
CR10      EQU   10
CR11      EQU   11
CR12      EQU   12
CR13      EQU   13
CR14      EQU   14
CR15      EQU   15
C***********************************************************************
C*             END REGISTER EQUATES                                    *
C***********************************************************************
C*             BEGIN BODY OF PROGRAM CODE                              *
C***********************************************************************
C         STM   R14,R12,12(R13)         STORE CALLING PGM'S GPRS
C         LR    R12,R15                 OUR ENTRY POINT INTO BASE REG
C         LA    R15,SAVEAREA            THIS PGM'S SAVEAREA ADDR IN 15
C         ST    R13,4(R15)              SAVE CALLER'S SAVEAREA ADDR
C         ST    R15,8(R13)              SAVE OUR SAVEAREA ADDR IN CALLER
C         LR    R13,R15                 CURRENT SAVEAREA ADDR IN 13
C         L     R2,0(R1)                ADDRESS OF PASSED VARIABLE
C         SR    R3,R3                   ZERO OUT R3
C         C     R3,0(R2)                IS SUBROUTINE INVOKED WITH 0?
C         BE    SETIT                   YES, SET INTERVAL TIMER
CGETIT    DS    0H                      NO, GET INTERVAL TIMER VALUE
C         TTIMER ,MIC,TVAL2             GET TIME INTO TVAL2
C         L     R4,TVAL2                LOAD, GET READY FOR SHIFT
C         L     R5,TVAL2+4              LOAD, GET READY FOR SHIFT
C         LA    R3,X'0C'                LOAD 12 TO REG 3
C         SRDL  R4,0(R3)                SHIFT RIGHT 12 BITS
C         ST    R5,0(R2)                SAVE ELAPSED TIME FOR CALLER
C         B     GETOUT                  OUR WORK HERE IS DONE KEMOSABE
CSETIT    DS    0H                      HERE IS WHERE WE SET THE I.T.
C         L     R4,HOURS1               PART 1 OF TIME VAL TO R3
C         ST    R4,TVAL                 SET TVAL PART 1
C         L     R5,HOURS2               PART 2 OF TIME VAL TO R3
C         ST    R5,TVAL+4               SET TVAL PART 2
C         LA    R3,X'0C'                LOAD 12 TO REG 3
C         SRDL  R4,0(R3)                SHIFT RIGHT 12 BITS
C         ST    R5,0(R2)                SAVE START TIME FOR CALLER
C         STIMER TASK,MICVL=TVAL        SET TIMER BASED IN MICROSECS
C         SR    R0,R0                   RETURN DUMMY VALUE ON SET
C***********************************************************************
C*                       END OF PROGRAM                                *
C***********************************************************************
CGETOUT   DS    0H
C         L     R13,4(R13)             LOAD CALLER'S SAVEAREA ADDR TO 13
C         L     R14,12(R13)            LOAD RETURN ADDR OF CALLER TO 14
C         LM    R1,R12,24(R13)         RESTORE CALLER'S REGISTERS
C*                                      (BUT DON'T RELOAD R0)
C         SR    R15,R15                SET RETURN CODE TO FORTRAN
C         BR    R14                    RETURN TO CALLER
C***********************************************************************
C*              DATA AREAS REQUIRED FOR OPERATION                      *
C***********************************************************************
CALGN0    DS    0D            DOUBLEWORD ALIGNMENT REQUIRED
CTVAL     DC    D'0'          TIMER VALUE
CTVAL2    DC    D'0'          TIMER VALUE 2 - ("NEW" VALUE)
CHOURS1   DC    X'000007FF'
CHOURS2   DC    X'FFFFF000'
CSAVEAREA DS    18F           18 WORD REGISTER SAVE AREA
C         END
C
C
C
C
C******************************************************************************
C
C     FOR IBM VM SYSTEMS    (Calls following VMTIME assembly code)
C
C     IMPLICIT INTEGER*4 (I-N)
C     REAL*8 CPUTIM
C     INTEGER*4 VSEC, VUSEC
C
C     CALL VMTIME (VSEC, VUSEC)
C
C     SECOND = VSEC + VUSEC/1.D6
C
C     RETURN
C     END
C
C**********************************************************************
C*                                                                    *
C*   this program is a fortran callable routine that returns a        *
C*   result of the diagnose x'c' instruction.                         *
C*                                                                    *
C*   FORTRAN CALL:  CALL VMTIME(VSEC, VUSEC)                          *
C*                                                                    *
C*   where:                                                           *
C*           vsec   the virtual cpu time, the seconds portion (i*4)   *
C*           vusec  the virtual cpu time, the micro-second portion    *
C*                                                                    *
C**********************************************************************
C
CVMTIME   START 0
CBEGIN    SAVE  (14,12)
C         BALR  3,0
C         USING *,3
C         ST    13,SAVE+4
C         LA    13,SAVE
C
C         LM    4,5,0(1)           LOAD THE FORTRAN PLIST INTO REGS
C
C         LA    2,PTIMER
C
C         DIAG  2,0,X'C'
C
C         L     6,VCPU
C         L     7,VCPU+4
C         D     6,=F'1000000'      SEPARATE SEC AND USEC
C         ST    7,0(4)             STORE THE SECOND PORTION
C         ST    6,0(5)             STORE THE MICROSECOND PORTION
C
C
C         L     13,SAVE+4
C         RETURN (14,12),RC=0
C
C
CPTIMER   DS    0D
CDATE     DS    D                  CURRENT DATE
CTIME     DS    D                  CURRENT TIME
CVCPU     DS    D                  VIRTUAL CPU TIME
CTCPU     DS    D                  TOTAL CPUTIME
CSAVE     DS    18F
C
C         LTORG
C         END
      RETURN
      END
c
c
c
C
C***********************************************************************
      SUBROUTINE  SENSIT( iou, RATES,WG,IQ,SUMW,  MAP,TV,TV1,TV2,TV3,n)
C***********************************************************************
C                                                                      *
C     SENSIT  - Sensitivity Of Harmonic Mean Rate(Mflops) 49 Workloads *
C                                                                      *
C     iou     - input scalar,  i/o unit number                         *
C     RATES   - input array ,  execution rates (Mflops)                *
C     WG      - input array ,  weights paired with RATES               *
C     IQ      - input array ,  1 or 2 quartiles specifier              *
C     SUMW    - input array ,  workload fractions.                     *
C                                                                      *
C     MAP,TV,TV1,TV2,TV3    -  output temporary arrays                 *
C     n       - input scalar,  number of rates, etc.                   *
C                                                                      *
C***********************************************************************
C
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
C
c In
      DIMENSION  RATES(n), WG(n), IQ(7), SUMW(7)
c Temp
      DIMENSION  MAP(n), TV(n), TV1(n), TV2(n), TV3(n)
      DIMENSION  NR1(10), NR2(10), STAT2(20)
 
c     DIMENSION  TAG(4)                                                  f66
      CHARACTER*8  TAG(4)                                                f77
C
      DATA  ( TAG(i), i= 1,4)
     . /'1st QT: ',  '2nd QT: ',  '3rd QT: ',  '4th QT: '/               f77
c    ./8H1st QT:  , 8H2nd QT:  , 8H3rd QT:  , 8H4th QT:  /               f66
C
C                 Compress valid data sets RATES,  mapping on MAP.
 
              bl= 1.0D-5
              bu= 1.0D+5
            CALL  VALID( TV1,MAP,neff,  bl, RATES, bu, n)
 
        DO  1  k= 1,neff
          TV3(k)=    WG( MAP(k))
    1  continue
 
 
C                 Compress valid data sets WG,  mapping on MAP.
 
            CALL  VALID( TV3,MAP,meff,  bl, TV3, bu, neff)
 
        DO  3  k= 1,meff
           TV(k)=TV1( MAP(k))
    3  continue
C
C                 Sort selected rates into descending order
 
            CALL  SORDID( MAP,TV2,   TV,meff,2)
 
C
C
       CALL PAGE( iou)
          WRITE ( iou,7001)
C
 7001 FORMAT(/)
 7301 FORMAT(9X,31H           SENSITIVITY ANALYSIS )
 7302 FORMAT(9X,52HThe sensitivity of the harmonic mean rate (Mflops)  )
 7303 FORMAT(9X,52Hto various weightings is shown in the table below.  )
 7304 FORMAT(9X,52HSeven work distributions are generated by assigning )
 7305 FORMAT(9X,52Htwo distinct weights to ranked kernels by quartiles.)
 7306 FORMAT(9X,52HForty nine possible cpu workloads are then evaluated)
 7307 FORMAT(9X,52Husing seven sets of values for the total weights:   )
 7341 FORMAT(3X,A ,6X,43HO      O      O      O      O      X      X)    f77
 7342 FORMAT(3X,A ,6X,43HO      O      O      X      X      X      O)    f77
 7343 FORMAT(3X,A ,6X,43HO      X      X      X      O      O      O)    f77
 7344 FORMAT(3X,A ,6X,43HX      X      O      O      O      O      O)    f77
c7341 FORMAT(3X,A7,6X,43HO      O      O      O      O      X      X)    f66
c7342 FORMAT(3X,A7,6X,43HO      O      O      X      X      X      O)    f66
c7343 FORMAT(3X,A7,6X,43HO      X      X      X      O      O      O)    f66
c7344 FORMAT(3X,A7,6X,43HX      X      O      O      O      O      O)    f66
 7346 FORMAT(13X,  48H------ ------ ------ ------ ------ ------ ------)
 7348 FORMAT(3X,5HTotal,/,3X,7HWeights,20X,11HNet Mflops:,/,4X,6HX    O)
 7349 FORMAT(2X,9H---- ---- )
 7220 FORMAT(/,1X,2F5.2,1X,7F7.2)
C
          WRITE ( iou,7001)
          WRITE ( iou,7001)
          WRITE ( iou,7301)
          WRITE ( iou,7001)
          WRITE ( iou,7302)
          WRITE ( iou,7303)
          WRITE ( iou,7304)
          WRITE ( iou,7305)
          WRITE ( iou,7306)
          WRITE ( iou,7307)
          WRITE ( iou,7001)
          WRITE ( iou,7346)
          WRITE ( iou,7341)   TAG(1)
          WRITE ( iou,7342)   TAG(2)
          WRITE ( iou,7343)   TAG(3)
          WRITE ( iou,7344)   TAG(4)
          WRITE ( iou,7346)
          WRITE ( iou,7348)
          WRITE ( iou,7349)
C
            IF( meff .LE. 0 )  RETURN
          fuzz= 1.0e-9
             r= meff
            mq= (meff+3)/4
             q= mq
             j= 1
      DO 21  i= 8,2,-2
      NR1(i  )= j
      NR1(i+1)= j
      NR2(i  )= j + mq + mq - 1
      NR2(i+1)= j + mq - 1
             j= j + mq
   21  continue
C
       DO 29 j= 1,7
          sumo= 1.0 - SUMW(j)
       DO 27 i= 1,7
             p= IQ(i)*q
            xt= SUMW(j)/(p + fuzz)
            ot= sumo   /(r - p + fuzz)
       DO 23 k= 1,meff
        TV3(k)= ot
   23  continue
            k1= NR1(i+2)
            k2= NR2(i+2)
       DO 25 k= k1,k2
        TV3(k)= xt
   25  continue
          CALL  STATW( STAT2,TV,MAP, TV2,TV3,meff)
        TV1(i)= STAT2(5)
   27  continue
        WRITE ( iou,7220) SUMW(j), sumo, ( TV1(k), k=1,7)
   29  continue
C
           WRITE ( iou,7349)
           WRITE ( iou,7346)
C
C
      RETURN
C
      END
C***********************************************
      SUBROUTINE  SIGNAL( V, SCALE,BIAS, n)
C***********************************************
C
C    SIGNAL GENERATES VERY FRIENDLY FLOATING-POINT NUMBERS NEAR 1.0
C                     WHEN SCALE= 1.0 AND BIAS= 0.
C
C     V      - result array,  floating-point test data
C     SCALE  - input scalar,  scales magnitude of results
C     BIAS   - input scalar,  offsets magnitude of results
C     n      - input integer, number of results in V.
C
C***********************************************
cANSI DOUBLE PRECISION  V, SCALE, BIAS
cIBM  REAL*8            V, SCALE, BIAS
c
      DOUBLE PRECISION  SCALED,BIASED,FUZZ,BUZZ,FIZZ,ONE
cIBM  REAL*16           SCALED,BIASED,FUZZ,BUZZ,FIZZ,ONE
C
      DIMENSION  V(n)
C
        SCALED= SCALE
        BIASED= BIAS
C
        SCALED= 10.00D0
        SCALED=  1.00D0/SCALED
        BIASED=  0.00D0
C
C         FUZZ= 1.234500D-9
          FUZZ= 1.234500D-3
          BUZZ= 1.000D0  + FUZZ
          FIZZ= 1.100D0  * FUZZ
           ONE= 1.000D0
C
        DO 1 k= 1,n
          BUZZ= (ONE - FUZZ)*BUZZ +FUZZ
          FUZZ= -FUZZ
c         V(k)=((BUZZ- FIZZ) -BIASED)*SCALED
          V(k)= (BUZZ- FIZZ)*SCALED
    1 CONTINUE
C
      RETURN
      END
C
C
C***********************************************************************
      SUBROUTINE SIMD( HM,  iou,RATES,WG,FR,m,  MAP,TV1,TV2,TV3,n)
C***********************************************************************
C                                                                      *
C     SIMD  - Sensitivity Of Harmonic Mean Rate(Mflops) SISD/SIMD Model*
C                                                                      *
C     iou     - input scalar,  i/o unit number                         *
C     RATES   - input array ,  execution rates (Mflops)                *
C     WG      - input array ,  weights paired with RATES               *
C     FR      - input array ,  fractions of flops executed SIMD        *
C     m       - input scalar,  number of fractions                     *
C                                                                      *
C     MAP,TV,TV1,TV2,TV3    -  output temporary arrays                 *
C     n       - input scalar,  number of rates, etc.                   *
C                                                                      *
C***********************************************************************
C
C
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
C
c SENSITIVITY OF NET MFLOPS RATE TO USE OF OPTIMAL FORTRAN CODE(SISD/SIMD MODEL
c Out
      DIMENSION  HM(m)
c In
      DIMENSION  FR(m), RATES(n), WG(n)
c Temp
      DIMENSION  MAP(n), TV1(n), TV2(n), TV3(n), STAT2(20)
 
C                 Compress valid data sets RATES,  mapping on MAP.
 
              bl= 1.0D-5
              bu= 1.0D+5
            CALL  VALID( TV1,MAP,neff,  bl, RATES, bu, n)
 
        DO  1  k= 1,neff
          TV3(k)=    WG( MAP(k))
    1  continue
 
 
C                 Compress valid data sets WG,  mapping on MAP.
 
            CALL  VALID( TV3,MAP,meff,  bl, TV3, bu, neff)
 
        DO  3  k= 1,meff
          TV2(k)= TV1( MAP(k))
    3  continue
 
C                 Sort RATES,WT into descending order.
 
            CALL  STATW( STAT2,TV1,MAP, TV2, TV3, meff)
             med= meff + 1 - INT(STAT2(8))
              lh= meff + 1 - med
 
        DO  5  k= 1,meff
          TV2(k)= TV3( MAP(k))
    5  continue
 
 
C                 Estimate vector rate= HMean of top LFK quartile.
 
              nq= meff/4
            CALL  STATW( STAT2,TV3,MAP, TV1,TV2,nq)
             vmf= STAT2(5)
 
C                 Estimate scalar rate= HMean of lowest two LFK quartiles.
 
            CALL  STATW( STAT2,TV3,MAP, TV1(med),TV2(med),lh)
             smf= STAT2(5)
            fuzz= 1.0e-9
 
               g= 1.0 -   smf/( vmf + fuzz)
           HM(1)= smf
 
          DO 7 k= 2,m
           HM(k)=   smf/( 1.0 - FR(k)*g + fuzz)
   7      continue
C
      IF( iou .GT. 0)  THEN
C
          WRITE ( iou,7001)
          WRITE ( iou,7001)
          WRITE ( iou,7001)
          WRITE ( iou,7101)
          WRITE ( iou,7102) ( HM(k), k= 1,9)
          WRITE ( iou,7102) ( FR(k), k= 1,9)
          WRITE ( iou,7103)
          WRITE ( iou,7001)
 7001 FORMAT(/)
 7101 FORMAT(' SENSITIVITY OF NET MFLOPS RATE TO USE OF OPTIMAL FORTRAN
     1CODE(SISD/SIMD MODEL)' )
 7102 FORMAT(/,1X,5F7.2,4F8.2)
 7103 FORMAT(3x,52H Fraction Of Operations Run At Optimal Fortran Rates)
C
      ENDIF
C
      RETURN
C
      END
C
C
C***********************************************
      SUBROUTINE SIZES(i)
C***********************************************
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
C
C            SIZES      test and set the loop controls before each kernel test
C
C     i    :=  kernel number
C
C     mk   :=  number of kernels to test
C     Nruns:=  number of timed runs of complete test.
C     tic  :=  cpu clock resolution or minimum time in seconds.
C     Loop :=  multiple pass control to execute kernel long enough to time.
C     n    :=  DO loop control for each kernel.
C     ******************************************************************
C
C
C/      PARAMETER( l1= 1001, l2=  101, l1d= 2*1001 )
C/      PARAMETER( l13=  64, l13h= l13/2, l213= l13+l13h, l813= 8*l13 )
C/      PARAMETER( l14=2048, l16=  75, l416= 4*l16 , l21= 25 )
C
C/      PARAMETER( l1=   27, l2=   15, l1d= 2*1001 )
C/      PARAMETER( l13= 8, l13h= 8/2, l213= 8+4, l813= 8*8 )
C/      PARAMETER( l14=  16, l16= 15, l416= 4*15 , l21= 15)
C
C/      PARAMETER( l1=   1001, l2=   101, l1d= 2*1001 )
C/      PARAMETER( l13= 64, l13h= 64/2, l213= 64+32, l813= 8*64 )
C/      PARAMETER( l14= 2048, l16= 75, l416= 4*75 , l21= 25)
C
C/      PARAMETER( kn= 47, kn2= 95, np= 3, ls= 3*47, krs= 24)
C
C/      PARAMETER( NNI=  2*l1 +2*l213 +l416 )
C/      PARAMETER( NN1= 16*l1 +13*l2 +2*l416 + l14 )
C/      PARAMETER( NN2= 4*l813 + 3*l21*l2 +121*l2 +3*l13*l13 )
C/      PARAMETER( Nl1= 19*l1, Nl2= 131*l2 +3*l21*l2 )
C/      PARAMETER( Nl13= 3*l13*l13 +34*l13 +32)
C
C/      PARAMETER( nk= 47, nl= 3, nr= 8 )
C
      COMMON /ALPHA/ mk,ik,ml,il,Nruns,jr, NPFS(8,3,47)
      COMMON /BETA / tic, TIMES(8,3,47), SEE(5,3,8,3),
     1              TERRS(8,3,47), CSUMS(8,3,47),
     2              FOPN(8,3,47), DOS(8,3,47)
C
C
      COMMON /SPACES/ ion,j5,k2,k3,Loop,m,kr,it,n13h,ibuf,
     1                n,n1,n2,n13,n213,n813,n14,n16,n416,n21,nt1,nt2
C
      COMMON /SPACER/ A11,A12,A13,A21,A22,A23,A31,A32,A33,
     2                AR,BR,C0,CR,DI,DK,
     3  DM22,DM23,DM24,DM25,DM26,DM27,DM28,DN,E3,E6,EXPMAX,FLX,
     4  Q,QA,R,RI,S,SCALE,SIG,STB5,T,XNC,XNEI,XNM
C
      COMMON /SPACE0/ TIME(47), CSUM(47), WW(47), WT(47), ticks,
     1                FR(9), TERR1(47), SUMW(7), START,
     2              SKALE(47), BIAS(47), WS(95), TOTAL(47), FLOPN(47),
     3                IQ(7), NPF, NPFS1(47)
C
      COMMON /SPACEI/ WTP(3), MUL(3), ISPAN(47,3), IPASS(47,3)
C
C     ******************************************************************
C
C
C
      nif= 0
C
c                        Set  mk .LE. 47  number of kernels to test.
c            mk= 14
             mk= 24
             ml= 3
c                        Set  Nruns .LT. 8  number of timed runs of KERNEL test
c                        Set  Nruns= 1   to REDUCE RUN TIME for debug runs.
          Nruns= 1
c                        Set  Nruns= 7   for Standard BENCHMARK Test.
c          Nruns= 7
             IF( Nruns.GT. 7) Nruns= 7
c                        Set tic to the minimum Cpu clock resolution time(Sec.)
c           tic= 1./60.
            tic= 1.0e-6
C
      IF( i.EQ.-1) RETURN
C
C
C     Domain tests follow to detect overstoring of controls for array opns.
C
      nif= 1
      iup= 999000
      IF( iup.LT.65000 ) iup= 65000
      IF( i.LT.0 .OR.  (i-1).GT.  24)      GO TO 911
      IF( n.LT.0 .OR.  n.GT.   1001)          GO TO 911
      IF(Loop.LT.0 .OR. Loop.GT.iup)        GO TO 911
C
      nif= 2
      IF(  il.LT.1 .OR. il.GT.3 )  GO TO 911
                 n= ISPAN(i,il)
      Loop        = IPASS(i,il) * MUL(il)
C
c
C     If clock resolution test fails, you must increase Loop= 40*Loop etc.
C     Standard Checksums in REPORT were obtained with   Loop= 10*Loop.
C     Verify correct execution using these checksums then, if timing
C     errors are too large, increase the value of Loop:
c
C     Loop= 100*Loop
      Loop=  10*Loop
c
Computers with high resolution clocks tic= O(microsec.) should use Loop= 1
C
C     Loop= 1
C
      IF( Loop.LT. 1)   Loop= 1
      nif= 3
      IF( n.LT.0 .OR.  n.GT.   1001)  GO TO 911
      IF(Loop.LT.0 .OR. Loop.GT.iup)  GO TO 911
      n1  = 1001
      n2  = 101
      n13 = 64
      n13h= 32
      n213= 96
      n813= 512
      n14 = 2048
      n16 = 75
      n416= 300
      n21 = 25
C
      nt1= 16*1001 +13*101 +2*300 + 2048
      nt2= 4*512 + 3*25*101 +121*101 +3*64*64
C
      RETURN
C
C
  911 io= ABS( ion)
      IF( io.LE.0 .OR. io.GT.100 ) io=6
      WRITE( io,913) i, nif, n, Loop, il
  913 FORMAT(1H1,///,37H FATAL OVERSTORE/ DATA LOSS.  TEST=  ,6I6)
      STOP
C
      END
C***********************************************
      SUBROUTINE SORDID( i,W, V,n,KIND)
C***********************************************
C                    QUICK AND DIRTY PORTABLE SORT.
C
C                i - RESULT INDEX-LIST. MAPS V TO SORTED W.
C                W - RESULT ARRAY, SORTED V.
C
C                V - INPUT  ARRAY SORTED IN PLACE.
C                n - INPUT  NUMBER OF ELEMENTS IN V
C             KIND - SORT ORDER:   = 1  ASCENDING MAGNITUDE
C                                  = 2 DESCENDING MAGNITUDE
C
C***********************************************
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
C
      DIMENSION  i(n), W(n), V(n)
C
      DO  1  k= 1,n
          W(k)= V(k)
    1     i(k)= k
C
            IF( KIND.EQ.2) GO TO 4
C
      DO  3  j= 1,n-1
             m= j
      DO  2  k= j+1,n
            IF( W(k).LT.W(m)) m= k
    2 CONTINUE
             X= W(j)
             k= i(j)
          W(j)= W(m)
          i(j)= i(m)
          W(m)= X
          i(m)= k
    3 CONTINUE
      RETURN
C
C
    4 DO  6  j= 1,n-1
             m= j
      DO  5  k= j+1,n
            IF( W(k).GT.W(m)) m= k
    5 CONTINUE
             X= W(j)
             k= i(j)
          W(j)= W(m)
          i(j)= i(m)
          W(m)= X
          i(m)= k
    6 CONTINUE
      RETURN
      END
C
C***********************************************
      SUBROUTINE  SPACE
C***********************************************
C
C            SPACE      sets memory pointers for array variables.  optional.
C
C     Subroutine Space dynamically allocates physical memory space
C     for the array variables in KERNEL by setting pointer values.
C     The POINTER declaration has been defined in the IBM PL1 language
C     and defined as a Fortran extension in Livermore and CRAY compilers.
C
C     In general, large FORTRAN simulation programs use a memory
C     manager to dynamically allocate arrays to conserve high speed
C     physical memory and thus avoid slow disk references (page faults).
C
C     It is sufficient for our purposes to trivially set the values
C     of pointers to the location of static arrays used in common.
C     The efficiency of pointered (indirect) computation should be measured
C     if available.
C
C***********************************************
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
C
C
C/      PARAMETER( l1=   1001, l2=   101, l1d= 2*1001 )
C/      PARAMETER( l13= 64, l13h= 64/2, l213= 64+32, l813= 8*64 )
C/      PARAMETER( l14= 2048, l16= 75, l416= 4*75 , l21= 25)
C
      INTEGER    E,F,ZONE
      COMMON /ISPACE/ E(96), F(96),
     1  IX(1001), IR(1001), ZONE(300)
C
      COMMON /SPACE1/ U(1001), V(1001), W(1001),
     1  X(1001), Y(1001), Z(1001), G(1001),
     2  DU1(101), DU2(101), DU3(101), GRD(1001), DEX(1001),
     3  XI(1001), EX(1001), EX1(1001), DEX1(1001),
     4  VX(1001), XX(1001), RX(1001), RH(2048),
     5  VSP(101), VSTP(101), VXNE(101), VXND(101),
     6  VE3(101), VLR(101), VLIN(101), B5(101),
     7  PLAN(300), D(300), SA(101), SB(101)
C
      COMMON /SPACE2/ P(4,512), PX(25,101), CX(25,101),
     1  VY(101,25), VH(101,7), VF(101,7), VG(101,7), VS(101,7),
     2  ZA(101,7)  , ZP(101,7), ZQ(101,7), ZR(101,7), ZM(101,7),
     3  ZB(101,7)  , ZU(101,7), ZV(101,7), ZZ(101,7),
     4  B(64,64), C(64,64), H(64,64),
     5  U1(5,101,2),  U2(5,101,2),  U3(5,101,2)
C
C     ******************************************************************
C
C//      COMMON /POINT/ ME,MF,MU,MV,MW,MX,MY,MZ,MG,MDU1,MDU2,MDU3,MGRD,
C//     1  MDEX,MIX,MXI,MEX,MEX1,MDEX1,MVX,MXX,MIR,MRX,MRH,MVSP,MVSTP,
C//     2  MVXNE,MVXND,MVE3,MVLR,MVLIN,MB5,MPLAN,MZONE,MD,MSA,MSB,
C//     3  MP,MPX,MCX,MVY,MVH,MVF,MVG,MVS,MZA,MZP,MZQ,MZR,MZM,MZB,MZU,
C//     4  MZV,MZZ,MB,MC,MH,MU1,MU2,MU3
C//C
C//CLLL. LOC(X) =.LOC.X
C//C
C//      ME     = LOC( E )
C//      MF     = LOC( F )
C//      MU     = LOC( U )
C//      MV     = LOC( V )
C//      MW     = LOC( W )
C//      MX     = LOC( X )
C//      MY     = LOC( Y )
C//      MZ     = LOC( Z )
C//      MG     = LOC( G )
C//      MDU1   = LOC( DU1 )
C//      MDU2   = LOC( DU2 )
C//      MDU3   = LOC( DU3 )
C//      MGRD   = LOC( GRD )
C//      MDEX   = LOC( DEX )
C//      MIX    = LOC( IX )
C//      MXI    = LOC( XI )
C//      MEX    = LOC( EX )
C//      MEX1   = LOC( EX1 )
C//      MDEX1  = LOC( DEX1 )
C//      MVX    = LOC( VX )
C//      MXX    = LOC( XX )
C//      MIR    = LOC( IR )
C//      MRX    = LOC( RX )
C//      MRH    = LOC( RH )
C//      MVSP   = LOC( VSP )
C//      MVSTP  = LOC( VSTP )
C//      MVXNE  = LOC( VXNE )
C//      MVXND  = LOC( VXND )
C//      MVE3   = LOC( VE3 )
C//      MVLR   = LOC( VLR )
C//      MVLIN  = LOC( VLIN )
C//      MB5    = LOC( B5 )
C//      MPLAN  = LOC( PLAN )
C//      MZONE  = LOC( ZONE )
C//      MD     = LOC( D )
C//      MSA    = LOC( SA )
C//      MSB    = LOC( SB )
C//      MP     = LOC( P )
C//      MPX    = LOC( PX )
C//      MCX    = LOC( CX )
C//      MVY    = LOC( VY )
C//      MVH    = LOC( VH )
C//      MVF    = LOC( VF )
C//      MVG    = LOC( VG )
C//      MVS    = LOC( VS )
C//      MZA    = LOC( ZA )
C//      MZP    = LOC( ZP )
C//      MZQ    = LOC( ZQ )
C//      MZR    = LOC( ZR )
C//      MZM    = LOC( ZM )
C//      MZB    = LOC( ZB )
C//      MZU    = LOC( ZU )
C//      MZV    = LOC( ZV )
C//      MZZ    = LOC( ZZ )
C//      MB     = LOC( B )
C//      MC     = LOC( C )
C//      MH     = LOC( H )
C//      MU1    = LOC( U1 )
C//      MU2    = LOC( U2 )
C//      MU3    = LOC( U3 )
C
      RETURN
      END
C***********************************************
      SUBROUTINE STATS( STAT, X,n)
C***********************************************
C
C     UNWEIGHTED STATISTICS: MEAN, STADEV, MIN, MAX, HARMONIC MEAN.
C
C     STAT(1)= THE MEAN OF X.
C     STAT(2)= THE STANDARD DEVIATION OF THE MEAN OF X.
C     STAT(3)= THE MINIMUM OF X.
C     STAT(4)= THE MAXIMUM OF X.
C     STAT(5)= THE HARMONIC MEAN
C     X       IS THE ARRAY  OF INPUT VALUES.
C     n       IS THE NUMBER OF INPUT VALUES IN X.
C
C***********************************************
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
C
      DIMENSION X(n), STAT(9)
cLLL. OPTIMIZE LEVEL G
C
C
      DO 10   k= 1,9
   10 STAT(k)= 0.0
C
      IF(n.LE.0) RETURN
C                             CALCULATE MEAN OF X.
      S= 0.0
      DO 1 k= 1,n
    1 S= S + X(k)
      A= S/n
      STAT(1)= A
C                             CALCULATE STANDARD DEVIATION OF X.
      D= 0.0
      DO 2 k= 1,n
    2 D= D + (X(k)-A)**2
      D= D/n
      STAT(2)= SQRT(D)
C                             CALCULATE MINIMUM OF X.
      U= X(1)
      DO 3 k= 2,n
    3 U= MIN(U,X(k))
      STAT(3)= U
C                             CALCULATE MAXIMUM OF X.
      V= X(1)
      DO 4 k= 2,n
    4 V= MAX(V,X(k))
      STAT(4)= V
C                             CALCULATE HARMONIC MEAN OF X.
      H= 0.0
      DO 5 k= 1,n
          IF( X(k).NE.0.0) H= H + 1.0/X(k)
    5 CONTINUE
          IF( H.NE.0.0) H= REAL(n)/H
      STAT(5)= H
C
      RETURN
      END
C***********************************************
      SUBROUTINE STATW( STAT,OX,IX, X,W,n)
C***********************************************
C
C     WEIGHTED STATISTICS: MEAN, STADEV, MIN, MAX, HARMONIC MEAN, MEDIAN.
C
C     STAT( 1)=  THE MEAN OF X.
C     STAT( 2)=  THE STANDARD DEVIATION OF THE MEAN OF X.
C     STAT( 3)=  THE MINIMUM OF X.
C     STAT( 4)=  THE MAXIMUM OF X.
C     STAT( 5)=  THE HARMONIC MEAN
C     STAT( 6)=  THE TOTAL WEIGHT.
C     STAT( 7)=  THE MEDIAN.
C     STAT( 8)=  THE MEDIAN INDEX, ASCENDING.
C     STAT( 9)=  THE ROBUST MEDIAN ABSOLUTE DEVIATION.
C     STAT(10)=  THE GEOMETRIC MEAN
C     STAT(11)=  THE MOMENTAL SKEWNESS
C     STAT(12)=  THE KURTOSIS
C     STAT(13)=  THE LOWER QUARTILE BOUND Q1/Q2 VALUE
C     STAT(14)=  THE UPPER QUARTILE BOUND Q3/Q4 VALUE
C
C     OX      IS THE ARRAY  OF ORDERED (DECENDING) Xs.
C     IX      IS THE ARRAY  OF INDEX LIST MAPS X TO OX.
C
C     X       IS THE ARRAY  OF INPUT VALUES.
C     W       IS THE ARRAY  OF INPUT WEIGHTS.
C     n       IS THE NUMBER OF INPUT VALUES IN X.
C
C***********************************************
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
C
      DIMENSION STAT(14), OX(n), IX(n), X(n), W(n)
cLLL. OPTIMIZE LEVEL G
C
C
      DO 50   k= 1,14
   50 STAT(k)= 0.0
C
      IF( n.LE.0 ) RETURN
C
      IF( n.EQ.1 )  THEN
          STAT( 1)= X(1)
          STAT( 3)= X(1)
          STAT( 4)= X(1)
          STAT( 5)= X(1)
          STAT( 6)= W(1)
          STAT( 7)= X(1)
          STAT( 8)= 1.0
          STAT(10)= X(1)
          RETURN
      ENDIF
C
C
C                             CALCULATE MEAN OF X.
      A= 0.0
      S= 0.0
      T= 0.0
C
      DO 1 k= 1,n
      S= S + W(k)*X(k)
    1 T= T + W(k)
          IF( T.NE.0.0) A= S/T
      STAT(1)= A
C                             CALCULATE STANDARD DEVIATION OF X.
      D= 0.0
      E= 0.0
      F= 0.0
      Q= 0.0
      U= 0.0
C
      DO 2 k= 1,n
      B= W(k) *( X(k) -A)**2
      D= D + B
      E= E + B*( X(k) -A)
    2 F= F + B*( X(k) -A)**2
          IF( T.NE.0.0) Q= 1.0/T
                        D= D*Q
                        E= E*Q
                        F= F*Q
          IF( D.GE.0.0) U= SQRT(D)
      STAT(2)= U
C                             CALCULATE MINIMUM OF X.
      U= X(1)
      DO 3 k= 2,n
    3 U= MIN(U,X(k))
      STAT(3)= U
C                             CALCULATE MAXIMUM OF X.
      V= X(1)
      DO 4 k= 2,n
    4 V= MAX(V,X(k))
      STAT(4)= V
C                             CALCULATE HARMONIC MEAN OF X.
      H= 0.0
      DO 5 k= 1,n
          IF( X(k).NE.0.0) H= H + W(k)/X(k)
    5 CONTINUE
          IF( H.NE.0.0) H= T/H
      STAT(5)= H
      STAT(6)= T
C                             CALCULATE WEIGHTED MEDIAN
      CALL SORDID( IX, OX, X, n, 1)
C
           ew= 0.0
      DO 7  k= 2,n
           IF( W(1) .NE. W(k))  GO TO 75
    7 continue
           ew= 1.0
   75 continue
C
        qt= 0.500D0
      CALL  TILE( STAT( 7), STAT(8), OX,IX,W,ew,T, qt,n)
C
        qt= 0.250D0
      CALL  TILE( STAT(13),  stin13, OX,IX,W,ew,T, qt,n)
C
        qt= 0.750D0
      CALL  TILE( STAT(14),  stin14, OX,IX,W,ew,T, qt,n)
C
C
C                           CALCULATE ROBUST MEDIAN ABSOLUTE DEVIATION (MAD)
      DO 90 k= 1,n
   90   OX(k)= ABS( X(k) - STAT(7))
C
      CALL SORDID( IX, OX, OX, n, 1)
C
        qt= 0.700D0
      CALL  TILE( STAT( 9),  stin09, OX,IX,W,ew,T, qt,n)
C
C                             CALCULATE GEOMETRIC MEAN
            R= 0.0
      DO 10 k= 1,n
           IF( X(k).LE. 0.)  GO TO 10
            R= R + W(k) *ALOG10( X(k))
   10 CONTINUE
             U= R*Q
             G= 10.0
            IF( U.LT. 0.)  G= 0.1
        POWTEN= 50.0
            IF( ABS(U) .GT. POWTEN)  U= SIGN( POWTEN, U)
      STAT(10)=  G** ABS(U)
C
C                             CALCULATE MOMENTAL SKEWNESS
             G= 0.0
           DXD= D*D
            IF( DXD.NE.0.0) G= 1.0/(DXD)
      STAT(11)= 0.5*E*G*STAT(2)
C
C                             CALCULATE KURTOSIS
      STAT(12)= 0.5*( F*G -3.0)
C
C                             CALCULATE DESCENDING ORDERED X.
      CALL SORDID( IX, OX, X, n, 2)
C
      RETURN
      END
C
C***********************************************
      FUNCTION SUMO( V,n)
C***********************************************
C
C     CHECK-SUM WITH ORDINAL DEPENDENCY.
C
C     V   - input array,   floating-point numbers
C     n   - input integer, number of elements in V.
C
C***********************************************
cANSI DOUBLE PRECISION  SUMO, V
cIBM  REAL*8            SUMO, V
c
      DOUBLE PRECISION  S
cIBM  REAL*16           S
C
      DIMENSION  V(1)
           S= 0.00D0
C
      DO 1 k= 1,n
    1      S= S + REAL(k)*V(k)
       SUMO = S
      RETURN
      END
C
C***********************************************
      SUBROUTINE  SUPPLY(i)
C***********************************************
C
C            SUPPLY     initializes common blocks containing type real arrays.
C
C     i    :=  kernel number
C
C****************************************************************************
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
C
C/      PARAMETER( l1=   1001, l2=   101, l1d= 2*1001 )
C/      PARAMETER( l13= 64, l13h= 64/2, l213= 64+32, l813= 8*64 )
C/      PARAMETER( l14= 2048, l16= 75, l416= 4*75 , l21= 25)
C
C/      PARAMETER( kn= 47, kn2= 95, np= 3, ls= 3*47, krs= 24)
C/C
C/C/      PARAMETER( NN0= 39 )
C/C/      PARAMETER( NNI=  2*l1 +2*l213 +l416 )
C/C/      PARAMETER( NN1= 16*l1 +13*l2 +2*l416 + l14 )
C/C/      PARAMETER( NN2= 4*512 + 3*25*101 +121*101 +3*64*64 )
C
      COMMON /SPACES/ ion,j5,k2,k3,Loop,m,kr,it,n13h,ibuf,
     1                n,n1,n2,n13,n213,n813,n14,n16,n416,n21,nt1,nt2
C
      COMMON /SPACE0/ TIME(47), CSUM(47), WW(47), WT(47), ticks,
     1                FR(9), TERR1(47), SUMW(7), START,
     2              SKALE(47), BIAS(47), WS(95), TOTAL(47), FLOPN(47),
     3                IQ(7), NPF, NPFS1(47)
C
C/      COMMON /SPACE1/ U(NN1)
C/      COMMON /SPACE2/ P(NN2)
C/      COMMON /SPACER/ A11(NN0)
C/C
        COMMON /SPACE1/ U(19977)
        COMMON /SPACE2/ P(34132)
        COMMON /SPACER/ A11(39)
C
C
C***********************************************************************
C           Method 1:  Least space and most cpu time (D.P. SIGNAL arith)
C***********************************************************************
C
Csmall      IP1= i+1
Csmall      nt0= 39
CsmallC
Csmall      CALL SIGNAL(  U, SKALE(IP1), BIAS(IP1), nt1)
Csmall      CALL SIGNAL(  P, SKALE(IP1), BIAS(IP1), nt2)
Csmall      CALL SIGNAL(A11, SKALE(IP1), BIAS(IP1), nt0)
Csmall      RETURN
C
C***********************************************************************
C           Method 2:  Double space and least cpu time
C***********************************************************************
C
        COMMON /BASE1/ BUFU(19977)
        COMMON /BASE2/ BUFP(34132)
        COMMON /BASER/ BUFA(39)
C
      IP1= i+1
      nt0= 39
C               Execute SIGNAL calls only once; re-use generated data.
          ibuf= ibuf+1
      IF( ibuf.EQ. 1) THEN
          CALL SIGNAL(  BUFU, SKALE(IP1), BIAS(IP1), nt1)
          CALL SIGNAL(  BUFP, SKALE(IP1), BIAS(IP1), nt2)
          CALL SIGNAL(  BUFA, SKALE(IP1), BIAS(IP1), nt0)
      ENDIF
C
      DO 1 k= 1,nt0
    1 A11(k)= BUFA(k)
      DO 2 k= 1,nt1
    2   U(k)= BUFU(k)
      DO 3 k= 1,nt2
    3   P(k)= BUFP(k)
C
      RETURN
      END
C
C***********************************************************************
      SUBROUTINE  TALLY( iou, mode )
C***********************************************************************
C                                                                      *
C    TALLY      computes average and minimum Cpu timings and variances.*
C                                                                      *
C               iou -  i/o unit number                                 *
C                                                                      *
C              mode -  = 1 selects average run time: Preferred mode.   *
C                      = 2 selects minimum run time: Less accurate mode*
C                                                                      *
C***********************************************************************
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
      DOUBLE PRECISION  cs
C
C/      PARAMETER( nk= 47, nl= 3, nr= 8 )
C
      COMMON /ALPHA/ mk,ik,ml,il,Nruns,jr, NPFS(8,3,47)
      COMMON /BETA / tic, TIMES(8,3,47), SEE(5,3,8,3),
     1              TERRS(8,3,47), CSUMS(8,3,47),
     2              FOPN(8,3,47), DOS(8,3,47)
C
      COMMON /SPACE0/ TIME(47), CSUM(47), WW(47), WT(47), ticks,
     1                FR(9), TERR1(47), SUMW(7), START,
     2              SKALE(47), BIAS(47), WS(95), TOTAL(47), FLOPN(47),
     3                IQ(7), NPF, NPFS1(47)
C
      DIMENSION  S1(12), S2(12), S3(12), S4(12)
      DIMENSION  T1(47), T4(47)
C
           CALL  SIZES(-1)
C
      m= 1
      IF( mode .EQ. 2 )  m= 3
C
      IF(    jr .LT.  1)     jr= 1
      IF(    jr .GE.  8)     jr= 8-1
      IF( Nruns .LT.  1)  Nruns= 1
      IF( Nruns .GE.  8)  Nruns= 8-1
      IF(    il .LT.  1)     il= 1
      IF(    il .GT.  3)     il= 3
      IF(    mk .LT.  1)     mk= 1
      IF(    mk .GT. 24)     mk= 24
C
      CALL  PAGE(iou)
      WRITE( iou, 99)
      WRITE( iou,100)
C
      DO 2 j= 1,Nruns
      WRITE( iou,102)  j, ( SEE(k,1,j,il), k= 1,4)
      T1(j)= SEE(1,1,j,il)
      i= 0
      IF( (SEE(3,2,j,il).LT. 0.01) .OR. (SEE(4,2,j,il).GT. 1.0))  i= i+1
      IF( (SEE(3,3,j,il).LT. 0.01) .OR. (SEE(4,3,j,il).GT. 1.0))  i= i+1
      IF( i.GT.0 )  THEN
      WRITE( iou,131)  j, il
      ENDIF
      IF( ( j.EQ.Nruns ) .OR. ( i.GT.0 ))  THEN
      WRITE( iou,104)  j, ( SEE(k,2,j,il), k= 1,4)
      WRITE( iou,104)  j, ( SEE(k,3,j,il), k= 1,4)
      ENDIF
    2 continue
C
      CALL STATS( S1, T1, Nruns)
      WRITE( iou,102)  Nruns, ( S1(k), k= 1,4)
C
C
C
      WRITE( iou,120) Nruns
      WRITE( iou,122)
      WRITE( iou,121)
      WRITE( iou,122)
C
      DO 8 k= 1,mk
        npft= 0
          cs= 0.0
C
      DO 4 j= 1,Nruns
        npft= npft +  NPFS(j,il,k)
          cs= cs   + CSUMS(j,il,k)
    4 continue
C
      CALL  STATS( S2, TIMES(1,il,k), Nruns)
      TIME(k)= S2(m)
      CSUM(k)= cs
      TERR1(k)= 100.0*( S2(2)/( S2(1) + 1.0e-9))
      T4(k)= TERR1(k)
C
C
C     If this clock resolution test fails, you must increase Loop (Subr. SIZES)
C
      CALL  STATS( S3, TERRS(1,il,k), Nruns)
         IF( S3(1) .GT. 15.0)  THEN
            WRITE( iou,113) k
         ENDIF
C
      WRITE( iou,123) k, S2(3), S2(1), S2(4), TERR1(k), S3(1), npft
      TERR1(k)= MAX( TERR1(k), S3(1))
      CALL  STATS( S1, DOS(1,il,k), Nruns)
      TOTAL(k)= S1(1)
           IF( (S1(1).LE.0.) .OR. (ABS(S1(3)-S1(4)).GT.1.0E-5)) THEN
           WRITE( iou,131) il, k, ( S1(k4), k4= 1,4)
           ENDIF
      CALL  STATS( S4, FOPN(1,il,k), Nruns)
      FLOPN(k)= S4(1)
           IF( (S4(1).LE.0.) .OR. (ABS(S4(3)-S4(4)).GT.1.0E-5)) THEN
           WRITE( iou,131) il, k, ( S4(k4), k4= 1,4)
           ENDIF
    8 continue
C
      WRITE( iou,122)
      CALL  STATS( S4, T4, mk)
      WRITE( iou,124)
      WRITE( iou,133)
      WRITE( iou,125)  ( S4(k), k= 1,4)
C
      RETURN
C
   99 FORMAT(//,29H CPU CLOCK OVERHEAD (t err):  )
  100 FORMAT(/,6X,3HRUN,8X,7HAVERAGE,8X,7HSTANDEV,8X,7HMINIMUM,8X,
     1 7HMAXIMUM )
  102 FORMAT(1X,5HTICK ,I3,4E15.6)
  104 FORMAT(1X,5HDATA ,I3,4E15.6)
  113 FORMAT(/,1X,I2,45H POOR CPU CLOCK RESOLUTION; NEED LONGER RUN. )
  120 FORMAT(//,39H THE EXPERIMENTAL TIMING ERRORS FOR ALL,I3,5H RUNS)
  121 FORMAT(55H  k   T min      T avg      T max    T err   tick   P-F)
  122 FORMAT(55H --  ---------  ---------  --------- -----  -----   ---)
  123 FORMAT(1X,I2,3E11.4,F6.2,1H%,F6.2,1H%,1X,I5)
  124 FORMAT(//,50H NET CPU TIMING VARIANCE (T err);  A few % is ok: )
  125 FORMAT(4X,5H Terr,4(F14.2,1H%))
  131 FORMAT(1X,25H** ERROR: INVALID DATA** ,2I6,4E14.6)
  133 FORMAT(/,17X,7HAVERAGE,8X,7HSTANDEV,8X,7HMINIMUM,8X,7HMAXIMUM )
      END
C
C***********************************************
      SUBROUTINE TDIGIT( derr, nzd, s )
C***********************************************************************
C                                                                      *
C     TDIGIT  -  Count Lead Digits Followed By Trailing Zeroes.        *
C                                                                      *
C       derr  -  Result,  Digital Error in percent.                    *
C        nzd  -  Result,  Number Of Lead Digits                        *
C          s  -  Input ,  A Floated Integer                            *
C                                                                      *
C***********************************************************************
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
C
      frac(z)= (ABS(z) - REAL(INT(ABS(z))))
C
            n= 14
            x= ABS(s)
         fuzz= 1.0e-6
         derr= 100.
          nzd= 0
           IF( x.EQ. 0.0)  RETURN
C                                  Normalize x
            y= ALOG10(x)
            v= REAL( 10**( ABS( INT(y)) + 1 ))
           IF( (y.GE. 0.0) .AND. (v.NE. 0.0))  v= 10./v
            x= x*v
C                                  Multiply x Until Trailing Digits= Fuzz
       DO 1 k= 1,n
           IF( ((1.-frac(x)).LE.fuzz) .OR. (frac(x).LE.fuzz)) GO TO 2
            x= 10.*x
    1 continue
C
    2      IF( x.NE. 0.0)  derr= 50./x
           IF( x.NE. 0.0)   nzd= INT( ALOG10( ABS( 9.99999*x )))
C
      RETURN
      END
C
C***********************************************
      SUBROUTINE  TEST(i)
C***********************************************
C
C     TIME, TEST, AND INITIALIZE THE EXECUTION OF KERNEL i.
C
C******************************************************************************
C
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
C
C            SIZES      test and set the loop controls before each kernel test
C
C
C/      PARAMETER( l1= 1001, l2=  101, l1d= 2*1001 )
C/      PARAMETER( l13=  64, l13h= l13/2, l213= l13+l13h, l813= 8*l13 )
C/      PARAMETER( l14=2048, l16=  75, l416= 4*l16 , l21= 25 )
C
C/      PARAMETER( kn= 47, kn2= 95, np= 3, ls= 3*47, krs= 24)
C
      COMMON /SPACES/ ion,j5,k2,k3,Loop,m,kr,it,n13h,ibuf,
     1                n,n1,n2,n13,n213,n813,n14,n16,n416,n21,nt1,nt2
C
      COMMON /SPACER/ A11,A12,A13,A21,A22,A23,A31,A32,A33,
     2                AR,BR,C0,CR,DI,DK,
     3  DM22,DM23,DM24,DM25,DM26,DM27,DM28,DN,E3,E6,EXPMAX,FLX,
     4  Q,QA,R,RI,S,SCALE,SIG,STB5,T,XNC,XNEI,XNM
C
      COMMON /SPACE0/ TIME(47), CSUM(47), WW(47), WT(47), ticks,
     1                FR(9), TERR1(47), SUMW(7), START,
     2              SKALE(47), BIAS(47), WS(95), TOTAL(47), FLOPN(47),
     3                IQ(7), NPF, NPFS1(47)
C
      COMMON /SPACEI/ WTP(3), MUL(3), ISPAN(47,3), IPASS(47,3)
C
      INTEGER    E,F,ZONE
      COMMON /ISPACE/ E(96), F(96),
     1  IX(1001), IR(1001), ZONE(300)
C
      COMMON /SPACE1/ U(1001), V(1001), W(1001),
     1  X(1001), Y(1001), Z(1001), G(1001),
     2  DU1(101), DU2(101), DU3(101), GRD(1001), DEX(1001),
     3  XI(1001), EX(1001), EX1(1001), DEX1(1001),
     4  VX(1001), XX(1001), RX(1001), RH(2048),
     5  VSP(101), VSTP(101), VXNE(101), VXND(101),
     6  VE3(101), VLR(101), VLIN(101), B5(101),
     7  PLAN(300), D(300), SA(101), SB(101)
C
C
      COMMON /SPACE2/ P(4,512), PX(25,101), CX(25,101),
     1  VY(101,25), VH(101,7), VF(101,7), VG(101,7), VS(101,7),
     2  ZA(101,7)  , ZP(101,7), ZQ(101,7), ZR(101,7), ZM(101,7),
     3  ZB(101,7)  , ZU(101,7), ZV(101,7), ZZ(101,7),
     4  B(64,64), C(64,64), H(64,64),
     5  U1(5,101,2),  U2(5,101,2),  U3(5,101,2)
C
C
      REAL SECOND
C
C******************************************************************************
C           t= second(0)  := cumulative cpu time for task in seconds.
C
       TEMPUS= SECOND(0.0) - START
C
cLLNL     call ENDPFM
C$C                           5 get number of page faults (optional)
C$      KSTAT= LIB$STAT_TIMER(5,KPF)
C$      NPF  = KPF - IPF
           NN= n
           NP= Loop
         Loop= 0
           ik= i
           IF( i.LT.0 .OR. i.GT.kr )  GO TO 200
           IF( i.EQ.0 )  GO TO 100
         CALL  SIZES(i)
C     Net Time=  Timing - Overhead Time
      TIME(i)= TEMPUS - ticks
           IF( it.EQ.23456) GO TO 200
C
        GO TO(  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,
     .         11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
     .         21, 22, 23, 24, 25                      ), i
C
C
C
C******************************************************************************
C
    1 CSUM (1) =  SUMO ( X, n)
      TOTAL(1) =  NP*NN
      GO TO 100
C
C******************************************************************************
C
    2 CSUM (2) =  SUMO ( X, n)
      TOTAL(2) =  NP*(NN-4)
      GO TO 100
C
C******************************************************************************
C
    3 CSUM (3) =  Q
      TOTAL(3) =  NP*NN
      GO TO 100
C
C******************************************************************************
C
    4        MM= (1001-7)/2
      DO 400 k = 7,1001,MM
  400      V(k)= X(k)
      CSUM (4) = SUMO ( V, 3)
      TOTAL(4) =  NP*(((NN-5)/5)+1)*3
      GO TO 100
C
C******************************************************************************
C
    5 CSUM (5) =  SUMO ( X(2), n-1)
      TOTAL(5) =  NP*(NN-1)
      GO TO 100
C
C******************************************************************************
C
    6 CSUM (6) =  SUMO ( W, n)
      TOTAL(6) =  NP*NN*((NN-1)/2)
      GO TO 100
C
C******************************************************************************
C
    7 CSUM (7) =  SUMO ( X, n)
      TOTAL(7) =  NP*NN
      GO TO 100
C
C******************************************************************************
C
    8 CSUM (8) = SUMO ( U1,5*n*2) + SUMO ( U2,5*n*2) + SUMO ( U3,5*n*2)
      TOTAL(8) =  NP*(NN-1)*2
      GO TO 100
C
C******************************************************************************
C
    9 CSUM (9) =  SUMO ( PX, 15*n)
      TOTAL(9) =  NP*NN
      GO TO 100
C
C******************************************************************************
C
   10 CSUM (10) =  SUMO ( PX, 15*n)
      TOTAL(10) =  NP*NN
      GO TO 100
C
C******************************************************************************
C
   11 CSUM (11) =  SUMO ( X(2), n-1)
      TOTAL(11) =  NP*(NN-1)
      GO TO 100
C
C******************************************************************************
C
   12 CSUM (12) =  SUMO ( X, n-1)
      TOTAL(12) =  NP*NN
      GO TO 100
C
C******************************************************************************
C
   13 CSUM (13) =  SUMO ( P, 8*n) + SUMO ( H, 8*n)
      TOTAL(13) =  NP*NN
      GO TO 100
C
C******************************************************************************
C
   14 CSUM (14) =  SUMO ( VX,n) + SUMO ( XX,n) + SUMO ( RH,67)
      TOTAL(14) =  NP*NN
      GO TO 100
C
C******************************************************************************
C
   15 CSUM (15) =  SUMO ( VY, n*7) + SUMO ( VS, n*7)
      TOTAL(15) =  NP*(NN-1)*5
      GO TO 100
C
C******************************************************************************
C
   16 CSUM (16) =  REAL( k3+k2+j5+m)
      FLOPN(16) =  k2+k2+10*k3
      TOTAL(16) =  1.
      GO TO 100
C
C******************************************************************************
C
   17 CSUM (17) =  SUMO ( VXNE, n) + SUMO ( VXND, n) + XNM
      TOTAL(17) =  NP*NN
      GO TO 100
C
C******************************************************************************
C
   18 CSUM (18) =  SUMO ( ZR, n*7) + SUMO ( ZZ, n*7)
      TOTAL(18) =  NP*(NN-1)*5
      GO TO 100
C
C******************************************************************************
C
   19 CSUM (19) =  SUMO ( B5, n) + STB5
      TOTAL(19) =  NP*NN
      GO TO 100
C
C******************************************************************************
C
   20 CSUM (20) =  SUMO ( XX(2), n)
      TOTAL(20) =  NP*NN
      GO TO 100
C
C******************************************************************************
C
   21 CSUM (21) =  SUMO ( PX, 25*n)
      TOTAL(21) =  NP*25*25*NN
      GO TO 100
C
C******************************************************************************
C
   22 CSUM (22) =  SUMO ( W, n)
      TOTAL(22) =  NP*NN
      GO TO 100
C
C******************************************************************************
C
   23 CSUM (23) =  SUMO ( ZA, n*7)
      TOTAL(23) =  NP*(NN-1)*5
      GO TO 100
C
C******************************************************************************
C
   24 CSUM (24) =  REAL(m)
      TOTAL(24) =  NP*(NN-1)
      GO TO 100
C
C******************************************************************************
C
   25 CONTINUE
      GO TO 100
C
C******************************************************************************
C
  100 CONTINUE
C
      CALL SIZES (i+1)
C             it = 23456
               IF( it.EQ.23456) GO TO 120
               IF( i.EQ.0     ) GO TO 120
            TERR0= 100.0
               IF( TEMPUS.NE. 0.0)  TERR0= TERR0*(ticks/TEMPUS)
         TERR1(i)= TERR0
         NPFS1(i)= NPF
               IF( ion .LE. 0 )  GO TO 120
C
C     If this clock resolution test fails, you must increase Loop (Subr. SIZES)
C
               IF( TERR0 .LT. 15.0)  GO TO 114
            WRITE( ion,113) I
  113 FORMAT(/,1X,I2,45H INACCURATE TIMING OR ERROR. NEED LONGER RUN )
C
  114      WRITE ( ion,115) i, TEMPUS, TERR0, NPF
  115      FORMAT( 2X,i2,9H Done  T= ,E11.4,8H  T err= ,F8.2,1H% ,
     1             I8,13H  Page-Faults )
C
  120 CALL VALUES(i)
      CALL SIZES (i+1)
C
C      The following pause can be used for stop-watch timing of each kernel.
C      You may have to increase the iteration count Loop in Subr. SIZES.
C
C/           pause
  200 CONTINUE
C
      IF(    jr .LT.  1)     jr= 1
      IF(    jr .GE.  8)     jr= 8-1
      IF( Nruns .LT.  1)  Nruns= 1
      IF( Nruns .GE.  8)  Nruns= 8-1
      IF(    il .LT.  1)     il= 1
      IF(    il .GT.  3)     il= 3
      IF(    mk .LT.  1)     mk= 1
      IF(    mk .GT. 24)     mk= 24
C
C
C$C                           5 get number of page faults (optional)
C$      NSTAT= LIB$STAT_TIMER(5,IPF)
cLLNL     call PFM( 0, ion)
C
         ik= i+1
      START= SECOND(0.0)
      RETURN
C
C$      DATA  IPF/0/, KPF/0/
      END
C
C***********************************************
      FUNCTION TICK( iou )
C***********************************************
C
C            TICK       measures timing overhead of subroutine test
C
C     iou    -  Logical Output Device Number                           *
C
C***********************************************
C
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
C
C
C/      PARAMETER( l1= 1001, l2=  101, l1d= 2*1001 )
C/      PARAMETER( l13=  64, l13h= l13/2, l213= l13+l13h, l813= 8*l13 )
C/      PARAMETER( l14=2048, l16=  75, l416= 4*l16 , l21= 25 )
C
C/      PARAMETER( kn= 47, kn2= 95, np= 3, ls= 3*47, krs= 24)
C
C/      PARAMETER( nk= 47, nl= 3, nr= 8 )
C
      COMMON /ALPHA/ mk,ik,ml,il,Nruns,jr, NPFS(8,3,47)
      COMMON /BETA / tic, TIMES(8,3,47), SEE(5,3,8,3),
     1              TERRS(8,3,47), CSUMS(8,3,47),
     2              FOPN(8,3,47), DOS(8,3,47)
C
C
      COMMON /SPACES/ ion,j5,k2,k3,Loop,m,kr,it,n13h,ibuf,
     1                n,n1,n2,n13,n213,n813,n14,n16,n416,n21,nt1,nt2
C
      COMMON /SPACER/ A11,A12,A13,A21,A22,A23,A31,A32,A33,
     2                AR,BR,C0,CR,DI,DK,
     3  DM22,DM23,DM24,DM25,DM26,DM27,DM28,DN,E3,E6,EXPMAX,FLX,
     4  Q,QA,R,RI,S,SCALE,SIG,STB5,T,XNC,XNEI,XNM
C
      COMMON /SPACE0/ TIME(47), CSUM(47), WW(47), WT(47), ticks,
     1                FR(9), TERR1(47), SUMW(7), START,
     2              SKALE(47), BIAS(47), WS(95), TOTAL(47), FLOPN(47),
     3                IQ(7), NPF, NPFS1(47)
C
      COMMON /SPACEI/ WTP(3), MUL(3), ISPAN(47,3), IPASS(47,3)
C
      INTEGER    E,F,ZONE
      COMMON /ISPACE/ E(96), F(96),
     1  IX(1001), IR(1001), ZONE(300)
C
      COMMON /SPACE1/ U(1001), V(1001), W(1001),
     1  X(1001), Y(1001), Z(1001), G(1001),
     2  DU1(101), DU2(101), DU3(101), GRD(1001), DEX(1001),
     3  XI(1001), EX(1001), EX1(1001), DEX1(1001),
     4  VX(1001), XX(1001), RX(1001), RH(2048),
     5  VSP(101), VSTP(101), VXNE(101), VXND(101),
     6  VE3(101), VLR(101), VLIN(101), B5(101),
     7  PLAN(300), D(300), SA(101), SB(101)
C
      COMMON /SPACE2/ P(4,512), PX(25,101), CX(25,101),
     1  VY(101,25), VH(101,7), VF(101,7), VG(101,7), VS(101,7),
     2  ZA(101,7)  , ZP(101,7), ZQ(101,7), ZR(101,7), ZM(101,7),
     3  ZB(101,7)  , ZU(101,7), ZV(101,7), ZZ(101,7),
     4  B(64,64), C(64,64), H(64,64),
     5  U1(5,101,2),  U2(5,101,2),  U3(5,101,2)
C
C
C
      DIMENSION TSEC(16),  MAP(47)
C
      ion= iou
      kr = mk
      n  = 0
      Loop = 0
      k2 = 0
      k3 = 0
      it = 23456
      m  = 0
      ticks= 0.0
C
C
      CALL TEST(0)
      CALL TEST(1)
      CALL TEST(2)
      CALL TEST(3)
      CALL TEST(4)
      CALL TEST(5)
      CALL TEST(6)
      CALL TEST(7)
      CALL TEST(8)
      it = 0
C
      DO 10 k= 1,8
10    TSEC(k)= TIME(k)
C
              bl=  0.00D0
              bu= 10.00D0
      CALL  VALID( TIME,MAP,neff, bl, TSEC, bu, 8)
      CALL  STATS( SEE(1,1,jr,il), TIME, neff)
c     ticks= MAX( tic, SEE(1,1,jr,il))
      ticks= SEE(1,1,jr,il)
      TICK= ticks
C
      DO 20 k= 1,47
      TIME(k)= 0.0
      CSUM(k)= 0.0
20    CONTINUE
C
       CALL  STATS( SEE(1,2,jr,il), U, nt1)
       CALL  STATS( SEE(1,3,jr,il), P, nt2)
C
      mmin= 1
      mmax= 1001
      CALL IQRANF( IX, mmin, mmax, 1001)
c
C
         IF( iou.LE.0)  GO TO 73
      WRITE( iou, 99)
      WRITE( iou,100)
      WRITE( iou,102)  ( SEE(k,1,jr,il), k= 1,4)
      WRITE( iou,104)  ( SEE(k,2,jr,il), k= 1,4)
      WRITE( iou,104)  ( SEE(k,3,jr,il), k= 1,4)
C
   73 RETURN
C
   99 FORMAT(//,17H CLOCK OVERHEAD:  )
  100 FORMAT(/,14X,7HAVERAGE,8X,7HSTANDEV,8X,7HMINIMUM,8X,7HMAXIMUM )
  102 FORMAT(/,1X,5H TICK,4E15.6)
  104 FORMAT(/,1X,5H DATA,4E15.6)
      END
C
C***********************************************
      SUBROUTINE TILE( sm, si, OX,IX,W,ew,T,tiles,n)
C***********************************************
C
C     TILE       computes  m-tile value and corresponding index
C
C     sm      -  RESULT VALUE  IS m-TILE VALUE
C     si      -  RESULT VALUE  IS CORRESPONDING INDEX.r IN W
C
C     OX      -  INPUT  ARRAY  OF ORDERED (DECENDING) Xs.
C     IX      -  INPUT  ARRAY  OF INDEX LIST MAPS X TO OX.
C     W       -  INPUT  ARRAY  OF INPUT  WEIGHTS.
C     ew      -  INPUT  VALUE  FLAGS EQUAL WEIGHTS= 1.0; ELSE 0.0
C     T       -  INPUT  VALUE  IS SUM OF WEIGHTS
C     tiles   -  INPUT  VALUE  IS FRACTION OF RANGE, E.G. 0.25
C     n       -  INPUT  NUMBER OF INPUT  VALUES IN X.
C
C***********************************************
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
C
      DIMENSION  OX(n), IX(n), W(n)
C
C
       thresh= tiles*T + 0.50*ew*W(1)
            R= 0.0
      DO 70 k= 1,n
            S= R
            R= R + W( IX(k))
           IF( R .GT. thresh )  GO TO 7
   70 CONTINUE
            k= n
    7       z= 0.0
            y= 0.0
           IF( k.GT.1 )    y =   OX(k-1)
           IF( R.NE.S )    z = ( thresh - S)/( R - S)
           sm= y         + z * ( OX(k)  - y)
           si= REAL(k-1) + z
C
      RETURN
      END
C
C***********************************************
      SUBROUTINE VALID( VX,MAP,L,  BL,X,BU,n )
C***********************************************
C
C      Compress valid data sets;  form compression list.
C
C
C     VX    - ARRAY  OF RESULT COMPRESSED Xs.
C     MAP   - ARRAY  OF RESULT COMPRESSION INDICES
C     L     -           RESULT COMPRESSED LENGTH OF VX, MAP
C           -
C     BL    -           INPUT LOWER BOUND FOR VX
C     X     - ARRAY  OF INPUT VALUES.
C     BU    -           INPUT UPPER BOUND FOR VX
C     n     - NUMBER OF INPUT VALUES IN X.
C
C***********************************************
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
C
      DIMENSION  VX(n), MAP(n), X(n)
CLLL. OPTIMIZE LEVEL G
C
           m= 0
      DO 1 k= 1,n
              IF( X(k).LE. BL .OR. X(k).GE. BU )  GO TO 1
                     m= m + 1
                MAP(m)= k
                 VX(m)= X(k)
    1 CONTINUE
C
      L= m
      RETURN
      END
C
C***********************************************
      SUBROUTINE VALUES(i)
C***********************************************
C
C            VALUES     initializes special values
C
C     i    :=  kernel number
C
C****************************************************************************
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
      DOUBLE PRECISION  DS, DW
C
C/      PARAMETER( l1=   1001, l2=   101, l1d= 2*1001 )
C/      PARAMETER( l13= 64, l13h= 64/2, l213= 64+32, l813= 8*64 )
C/      PARAMETER( l14= 2048, l16= 75, l416= 4*75 , l21= 25)
C
C/      PARAMETER( kn= 47, kn2= 95, np= 3, ls= 3*47, krs= 24)
C
      COMMON /SPACES/ ion,j5,k2,k3,Loop,m,kr,it,n13h,ibuf,
     1                n,n1,n2,n13,n213,n813,n14,n16,n416,n21,nt1,nt2
C
      COMMON /SPACER/ A11,A12,A13,A21,A22,A23,A31,A32,A33,
     2                AR,BR,C0,CR,DI,DK,
     3  DM22,DM23,DM24,DM25,DM26,DM27,DM28,DN,E3,E6,EXPMAX,FLX,
     4  Q,QA,R,RI,S,SCALE,SIG,STB5,T,XNC,XNEI,XNM
C
      COMMON /SPACE0/ TIME(47), CSUM(47), WW(47), WT(47), ticks,
     1                FR(9), TERR1(47), SUMW(7), START,
     2              SKALE(47), BIAS(47), WS(95), TOTAL(47), FLOPN(47),
     3                IQ(7), NPF, NPFS1(47)
C
      COMMON /SPACEI/ WTP(3), MUL(3), ISPAN(47,3), IPASS(47,3)
C
      INTEGER    E,F,ZONE
      COMMON /ISPACE/ E(96), F(96),
     1  IX(1001), IR(1001), ZONE(300)
C
      COMMON /SPACE1/ U(1001), V(1001), W(1001),
     1  X(1001), Y(1001), Z(1001), G(1001),
     2  DU1(101), DU2(101), DU3(101), GRD(1001), DEX(1001),
     3  XI(1001), EX(1001), EX1(1001), DEX1(1001),
     4  VX(1001), XX(1001), RX(1001), RH(2048),
     5  VSP(101), VSTP(101), VXNE(101), VXND(101),
     6  VE3(101), VLR(101), VLIN(101), B5(101),
     7  PLAN(300), D(300), SA(101), SB(101)
C
      COMMON /SPACE2/ P(4,512), PX(25,101), CX(25,101),
     1  VY(101,25), VH(101,7), VF(101,7), VG(101,7), VS(101,7),
     2  ZA(101,7)  , ZP(101,7), ZQ(101,7), ZR(101,7), ZM(101,7),
     3  ZB(101,7)  , ZU(101,7), ZV(101,7), ZZ(101,7),
     4  B(64,64), C(64,64), H(64,64),
     5  U1(5,101,2),  U2(5,101,2),  U3(5,101,2)
C
      COMMON /SPACE3/ CACHE(8192)
C
C     ******************************************************************
C
      IP1= i+1
C
      CALL  SUPPLY( i)
C
   13 IF( IP1.NE.13 ) GO TO 14
            DS= 1.000D0
            DW= 0.500D0
      DO 205 j= 1,4
      DO 205 k= 1,512
      P(j,k)  = DS
            DS= DS + DW
  205 CONTINUE
C
      DO 210 j= 1,96
      E(j) = 1
      F(j) = 1
  210 CONTINUE
C
   14 IF( IP1.NE.14) GO TO 16
C
      mmin= 1
      mmax= 1001
      CALL IQRANF( IX, mmin, mmax, 1001)
c
            DW= -100.000D0
      DO 215 J= 1,1001
      DEX(J) =  DW*DEX(J)
      GRD(J) = IX(J)
  215 CONTINUE
      FLX= 0.00100D0
C
   16 IF( IP1.NE.16 ) GO TO 73
CONDITIONS:
            MC= 2
            lr= n
            II= lr/3
            LB= II+II
            FW= 1.000D-4
          D(1)= 1.0198048642876400D0
      DO 400 k= 2,300
  400     D(k)= D(k-1) + FW/D(k-1)
             R= D(lr)
            FW= 1.000D0
      DO 403 L= 1,MC
             m= (lr+lr)*(L-1)
      DO 401 j= 1,2
      DO 401 k= 1,lr
             m= m+1
             S= REAL(k)
       PLAN(m)= R*((S + FW)/S)
  401  ZONE(m)= k+k
  403 CONTINUE
             k= lr+lr+1
       ZONE(k)= lr
             S= D(lr-1)
             T= D(lr-2)
C
   73 CONTINUE
c               Clear the scalar Cache-memory with never used data-set.
c     fw= 1.000D0
c     CALL SIGNAL( CACHE, fw, 0.0, 8192)
c
      CACHE(1  )= 0.0
      CACHE(5  )= 0.0
      DO  520  k= 9,8192,8
      CACHE(k  )= CACHE(k-4) + 0.1
      CACHE(k+4)= CACHE(k  ) + 0.1
  520 CONTINUE
C
      RETURN
      END
C
C***********************************************
      SUBROUTINE VERIFY( iou)
C***********************************************************************
C                                                                      *
C      VERIFY     auxiliary test routine to check-out function SECOND  *
C                 and to verify that sufficiently long Loop sizes are  *
C                 defined in Subr. SIZES for accurate CPU timing.      *
C                                                                      *
C       iou    -  Logical Output Device Number                         *
C                                                                      *
C***********************************************************************
cANSI IMPLICIT  DOUBLE PRECISION (A-H,O-Z)
cIBM  IMPLICIT  REAL*8           (A-H,O-Z)
C
C
C/      PARAMETER( l1=   1001, l2=   101, l1d= 2*1001 )
C/      PARAMETER( l13= 64, l13h= 64/2, l213= 64+32, l813= 8*64 )
C/      PARAMETER( l14= 2048, l16= 75, l416= 4*75 , l21= 25)
C/      PARAMETER( kn= 47, kn2= 95, np= 3, ls= 3*47, krs= 24)
C
C
      COMMON /SPACE1/ U(1001), V(1001), W(1001),
     1  X(1001), Y(1001), Z(1001), G(1001),
     2  DU1(101), DU2(101), DU3(101), GRD(1001), DEX(1001),
     3  XI(1001), EX(1001), EX1(1001), DEX1(1001),
     4  VX(1001), XX(1001), RX(1001), RH(2048),
     5  VSP(101), VSTP(101), VXNE(101), VXND(101),
     6  VE3(101), VLR(101), VLIN(101), B5(101),
     7  PLAN(300), D(300), SA(101), SB(101)
C
      COMMON /SPACE2/ P(4,512), PX(25,101), CX(25,101),
     1  VY(101,25), VH(101,7), VF(101,7), VG(101,7), VS(101,7),
     2  ZA(101,7)  , ZP(101,7), ZQ(101,7), ZR(101,7), ZM(101,7),
     3  ZB(101,7)  , ZU(101,7), ZV(101,7), ZZ(101,7),
     4  B(64,64), C(64,64), H(64,64),
     5  U1(5,101,2),  U2(5,101,2),  U3(5,101,2)
C
      COMMON /ALPHA/ mk,ik,ml,il,Nruns,jr, NPFS(8,3,47)
C
      COMMON /SPACES/ ion,j5,k2,k3,Loop,m,kr,it,n13h,ibuf,
     1                n,n1,n2,n13,n213,n813,n14,n16,n416,n21,nt1,nt2
C
      COMMON /SPACEI/ WTP(3), MUL(3), ISPAN(47,3), IPASS(47,3)
C
C
      DIMENSION  TIM(20), TUM(20), TAV(20), TER(20), TMX(20)
      DIMENSION  LEN(20), SIG(20)
C
C
C
C
C
C
C
C
C
C
C
C
C
C****************************************************************************
C         VERIFY ADEQUATE Loop SIZE VERSUS CPU CLOCK ACCURACY
C****************************************************************************
C
C         VERIFY produced the following output on CRAY-XMP4 in a
C         fully loaded, multi-processing, multi-programming system:
C
C
C         VERIFY ADEQUATE Loop SIZE VERSUS CPU CLOCK ACCURACY
C         -----     -------     -------    -------   --------
C         EXTRA     MAXIMUM     DIGITAL    DYNAMIC   RELATIVE
C         Loop      CPUTIME     CLOCK      CLOCK     TIMING
C         SIZE      SECONDS     ERROR      ERROR     ERROR
C         -----     -------     -------    -------   --------
C             1  5.0000e-06      10.00%     17.63%     14.26%
C             2  7.0000e-06       7.14%      6.93%      4.79%
C             4  1.6000e-05       3.12%      6.56%      7.59%
C             8  2.8000e-05       1.79%      2.90%      2.35%
C            16  6.1000e-05       0.82%      6.72%      4.50%
C            32  1.1700e-04       0.43%      4.21%      4.62%
C            64  2.2700e-04       0.22%      3.13%      2.41%
C           128  4.4900e-04       0.11%      3.14%      0.96%
C           256  8.8900e-04       0.06%      2.06%      2.50%
C           512  1.7740e-03       0.03%      1.92%      1.59%
C          1024  3.4780e-03       0.01%      0.70%      1.63%
C          1360              Current Run:    Loop=   10.000*Loop
C          2048  7.0050e-03       0.01%      0.74%      1.28%
C          4096  1.3823e-02       0.00%      1.35%      0.78%
C         -----     -------     -------    -------   --------
C
C          Approximate Serial Job Time=   2.5e+01 Sec.    ( Nruns= 7 RUNS)
C
C****************************************************************************
c
c
      DO  1 k = 1,101
          X(k)= 0.0
    1  CX(1,k)= 0.0
C
            t0= SECOND(0.0)
            to= SECOND(0.0) - t0
C
            il= 2
           CALL SIZES(12)
C
         loops= IPASS(12,il)*MUL(il)
         scale=  REAL(Loop)/( REAL(loops) - 1.0e-6)
                WRITE( iou,45)
                WRITE( iou,49)
                WRITE( iou,46)
                WRITE( iou,47)
                WRITE( iou,48)
                WRITE( iou,49)
   45 FORMAT(8X,51HVERIFY ADEQUATE Loop SIZE VERSUS CPU CLOCK ACCURACY)
   46 FORMAT(8X,'EXTRA     MAXIMUM     DIGITAL    DYNAMIC   RELATIVE')
   47 FORMAT(8X,'Loop      CPUTIME     CLOCK      CLOCK     TIMING  ')
   48 FORMAT(8X,'SIZE      SECONDS     ERROR      ERROR     ERROR   ')
   49 FORMAT(8X,'-----     -------     -------    -------   --------')
C
C
C****************************************************************************
C     Test Cpu Clock Timing Errors As A Function Of Loop Size(lo)
C****************************************************************************
C
             m= 0
            lo= 1
      DO 59  i= 1,18
            mj= Nruns+1
      DO 53  j= 1,mj
             n= 101-1
            t0= SECOND(0.0)
c                                    Time Kernel 12
      DO 12 L = 1,lo
      DO 12 k = 1,n
   12     X(k)= Y(k+1) - Y(k)
c
        TIM(j)= SECOND(0.0) - t0 - to
   53 continue
c                                    Compute Dynamic Clock Error
c
          CALL  STATS( TUM, TIM, mj)
         rterr= 100.0*( TUM(2)/( TUM(1) +1.0E-9))
            IF( TUM(1).LE. 0.0)  rterr= 100.0
c
c                                    Compute Digital Clock Error
c
          CALL  TDIGIT( SIG(i), nzd, TUM(4))
C
        TAV(i)= TUM(1)
        TMX(i)= TUM(4)
        TER(i)= rterr
        LEN(i)= lo
            lo= lo + lo
            IF( (i.GT.13) .AND. (rterr.LT. 4.0))  GO TO 60
            nn= i
   59 continue
C
C
C****************************************************************************
C     Test Timing Error By Comparing Time Of Each Run With Longest Run
C****************************************************************************
C
   60        m= 0
           tnn= ( TAV(nn+1) + 2.0* TAV(nn))* 0.5
          fuzz= 1.0e-9
            IF( tnn.LT.fuzz)  tnn= fuzz
      DO 69  i= 1,nn
         rterr= TER(i)
            lo= LEN(i)
c                                    Compute Relative Clock Error
c
            rt= 0.
            IF( LEN(i).GE. 0)     rt= LEN(nn+1)/LEN(i)
         rperr= 100.
            IF( tnn.GT. fuzz)  rperr= 100.*(ABS( tnn - rt* TAV(i)) /tnn)
         WRITE( iou,64) lo, TMX(i), SIG(i),rterr, rperr
   64   FORMAT(6X,I7,E12.4,F11.2,1H%,F10.2,1H%,F10.2,1H%)
c
c                                    Find Loop Size Used in Subr. SIZES
c
            IF( (Loop.GE.lo) .AND. (Loop.LE.2*lo))  THEN
                     m= lo
                WRITE( iou,66)  Loop, scale
                IF( rterr .GT. 10.0)  THEN
                  WRITE( iou, 67)
                  WRITE( iou, 68)
                ENDIF
   66 FORMAT(7X,i6,14X,21HCurrent Run:    Loop=, f9.3,5H*Loop )
   67 FORMAT(34X,44HINACCURATE TIMING OR ERROR. NEED LONGER RUN )
   68 FORMAT(34X,44HINCREASE:  Loop  OR  Nruns  IN SUBR. SIZES  )
            ENDIF
C
   69 continue
            IF( m.LE.0 )  THEN
                WRITE( iou,66)  Loop, scale
            ENDIF
                WRITE( iou,49)
C
C
C****************************************************************************
C     Estimate Approximate Job Time By Scaling Time For Kernel 11
C****************************************************************************
C
c                 Time Kernel 11.  Calibrate with static timing of machine code
            lo= LEN(nn)
            t0= SECOND(0.0)
      DO 51 L = 1,lo
       PX(1,1)= CX(1,1) + REAL(L)*1.0e-6
       PX(1,2)= CX(1,2)
CDIR$ NOVECTOR
      DO 51 k = 3,101
   51  PX(1,k)= PX(1,k-1) + PX(1,k-2)
c
       TIM(nn)= SECOND(0.0) - t0 - to
      tx=(TIM(nn)/( 101*LEN(nn) +1.0e-9))
      su= tx/3.2E-7
      estime= su*( 2.387 + Nruns*( 0.1616*scale + 1.534))
      WRITE( iou,70) estime, Nruns
   70 FORMAT(/,9X,28HApproximate Serial Job Time= ,E10.1,5H Sec.,
     1 4X,8H( Nruns=,i2,6H RUNS),/)
C
      RETURN
C
C****************************************************************************
C
C
C                   EXTRA CLOCK CALIBRATION TEST
C
C           Timing For A Very Long Run Versus External Clock Time.
C
C
C     VERIFY is an auxiliary test that is not needed to execute MFLOPS.
C     This optional routine may be called to verify the accuracy of
C     the function SECOND programmed above to time CPU execution.
C
C     In most cases, the calibration timing in the main program MFLOPS
C     will be sufficient, if the total CPU time measured by SECOND agrees
C     well with the CPU time charged by the operating system for several,
C     stand-alone computer runs. Stable timings should be demonstrated.
C
C     The method used below times Kernel 11 using SECOND and then
C     scales this timing up to predict the time for a very long run
C     of Kernel 11.  This long run is also timed using SECOND and
C     should be long enough to time independently using external, wall-clock
C     timings and/or CPU charge time measured by the operating system.
C     Function SECOND is verified by a close agreement of the
C     predicted total CPU time, the total CPU time measured by SECOND,
C     and the CPU time measured by the operating system.
C     To increase CPU run time simply increase the value of MORE.  The
C     total CPU time is then nearly equal to the CPU time for loop DO 99.
C
C
C     Print output from VERIFY on processor:  CRAY-1S/CFT 1.11
C
C          SECOND  time for          1 loops=   4.524000e-07 Sec.
C
C          Predict time for   10000000 loops=   4.524000e+00 Sec.
C
C          SECOND  time for   10000000 loops=   4.527072e+00 Sec.
C
C
C     Print output from VERIFY on processor:  CRAY-1S/CIVIC 130k
C
C          SECOND  time for          1 loops=   3.620000e-07 Sec.
C
C          Predict time for   10000000 loops=   3.620000e+00 Sec.
C
C          SECOND  time for   10000000 loops=   3.622626e+00 Sec.
C
C     The results of this test on both processors demonstrate a
C     consistant error of three in the fourth significant digit.
C     More important these SECOND measurements agree with external timings.
C     Consistant timing accuracy of two significant digits may be adequate.
C
C     The time for one execution pass can also be compared with a
C     timing analysis of the machine code for Kernel 11 for additional
C     verification of the accuracy of function SECOND.
C     Verification of CPU time measurements in Virtual Memory systems may
C     require this degree of circumspection.
C
C****************************************************************************
C
cd          MORE= 1000
cd          loop= 100
cd             m= loop*MORE
cd             n= 101
cd         nloop= loop*(n-1)
cd        mnloop= m*(n-1)
cd             r= 1.0/float(nloop)
cd            t0= SECOND(0.0)
C
cd            t0= SECOND(0.0)
cd      DO 11 L = 1,loop
C      PX(1,1)= CX(1,1) + L*1.0e-25  use only if optimization distributes L-loo
cd       PX(1,1)= CX(1,1)
cd      DO 11 k = 2,n
cd   11  PX(1,k)= PX(1,k-1) + CX(1,k)
cd            tl= SECOND(0.0) - t0 - to
C
cd             i= 1
cd            t1= tl*r
cd            tp= tl*float(MORE)
cd         WRITE( iou,15)       i,  t1
cd   15   FORMAT(/,18H SECOND  time for ,I10,8H loops= ,E14.6,5H Sec.)
cd         WRITE( iou,16)  mnloop,  tp
cd   16   FORMAT(/,18H Predict time for ,I10,8H loops= ,E14.6,5H Sec.)
C
C****************************************************************************
C
C     The following loop should be set-up to run long enough for
C     an accurate, independent CPU job timing which should be nearly
C     equal to the total time predicted above (in WRITE-16) if the
C     timing measured by SECOND is valid.
C****************************************************************************
C
cd            t0= SECOND(0.0)
cd      DO 99 L = 1,m
C      PX(1,1)= CX(1,1) + L*1.0e-25  use only if optimization distributes L-loo
cd       PX(1,1)= CX(1,1)
cd      DO 99 k = 2,n
cd   99  PX(1,k)= PX(1,k-1) + CX(1,k)
cd            tm= SECOND(0.0) - t0 - to
C
cd         WRITE( iou,15)  mnloop,  tm
cd      STOP
      END
