include ../Makefile.inc

######################################################
# This makefile may be used (with gmake) to build a set of
# executables which will run the comparative Benchmark
# program supplied in this software bundle and described
# in some detail in the accompanying user manual.
#
# The executables currently available provide comparisons
# with a range of implementations of the BLAS Level 1
# routines for computing the l2 (Euclidean) norm of
# real and complex vectors in both single ond double precion.
# These routines were originally published as part of the
# software package accompanying
# (Algorithm 539: Basic Linear Algebraic Subprograms  for
# FORTRAN Usage. J. J. Dongarra and J. Du Croz and S. Hammarling
# and R. J. Hanson. ACM Transactions on Mathematical Software,
# 5(3), pp. 324--325, September, 1979).
#
# The following executables may be built using this makefile:
# a) sdBenchOnePassKahan: Kahan's algorithm (one pass version)
#       on its own
# b) sdTestOrigKahan: version of a) above using the original
#       Kahan implementation which requires a second pass
#       through the data when scaling is necessary.
# c) sdBenchOrigBLAS: original BLAS routines (Algorithm 539) 
#       with the two parameters cutlo and cuthi set to their
#       defined values for IEEE arithmetic.
# d) sdBenchNewOrig: more readable/maintainable version of b)
# e) sdBenchBlue: Blue's algorithm as described in A Portable 
#       Fortran Program to Find the Euclidean Norm of a Vector 
#       by James L. Blue, ACM Transactions on Mathematical
#       Software, 4(1), pp. 15--23, March, 1978.
# f) sdBenchLa: version now available (June 2015) via netlib and 
#       distributed with Lapack software (v3.5.0, November, 2013).
# g) sdBenchNorm2: version using the new Fortran (2008+) intrinsic
#       function, norm2.
# h) sdBenchNewOrigCsum: version of d) above with compensated 
#       summation implemented. 
# i) sdBenchBlueCsum: version of e) above with compensated 
#       summation implemented. 
# j) sdBenchLaCsum: version of f) above with compensated 
#       summation implemented. 
# k) sdBenchLaCsSc: version of f) above with compensated 
#       summation and intermediate scaling factors forced to be
#       powers of two to try and reduce rounding errors.
# l) sdTestNewBlue: version of Blue's algorithm which only uses
#       two accumulators rather than three.
# m) testOracle: an executable to check that the multiple
#       precision package is generating plausible results for
#       double precision data.
# n) sdBenchBLASLib: benchmark an existing platform dependent
#       library; for example, a hardware dependent of the Level-1
#       BLAS specially tuned for a particular processor.
#
#       This is a template and may require editing to successfully
#       link to the library.
# o) sdBenchBLASSubs: benchmark an existing implementation of the
#       BLAS Level 1 nrm2 routines supplied as either a standalone
#       set of source routines (not wrapped in a module) in one
#       or more files or as one or more pre-compiled (.o) files.
#
#       This is a template and may require editing to successfully
#       link to the library.
#
# For details of how to build n) and o) see comments later
# in this makefile

executables = sdBenchLa  sdBenchLaCsum sdBenchNorm2 \
              sdBenchOrigKahan \
              sdBenchBlue sdBenchNewOrig sdBenchNewOrigCsum \
              sdBenchOrigBlas sdBenchBlueCsum sdBenchLaCsSc \
              sdBenchNewBlue sdBenchOnePassKahan 

all: $(executables)

# Use vpath to pick up source files and include files
# from other directories
vpath %.f90 ../Hybrid ../OtherVersions ../MpPackage
vpath %.f  ../OtherVersions
vpath %.inc ../Hybrid ../OtherVersions
SrcPath = ../Hybrid
OtherPath = ../OtherVersions

SRCMOD = nrm2HybridMod.o
ONEPASSSRCMOD = nrm2HybridOnePassMod.o
BENCHOBJS = set_precision.o $(SRCMOD) quicksort.o oracle.o randMod.o mpfun90.o mpmod90.o GenerateTestVectors.o

OrigBLAS = BlasOrig.o
NewOrigBLAS = newOrig.o
NewOrigBLASCsum = newOrigCsum.o
LaSubs = lapack.o
LaSubsCsum = lapackCsum.o
LaSubsCsSc = lapackCsSc.o
Norm2Subs = norm2Intrinsic.o
BlueSubs = Blue.o
NewBlueSubs = NewBlue.o
BlueSubsCsum = BlueCsum.o
OrigKahanSub = origKahan.o
OnePassKahanSub = onePassKahan.o

CompMod = onePassWrap.o

HYBRIDINCS = realHybrid.inc complexHybrid.inc
ORIGKAHANINCS = realKahan.inc complexKahan.inc
ONEPASSKAHANINCS = onePassRealKahan.inc onePassComplexKahan.inc
BLUEINCS = blueReal.inc blueComplex.inc
NEWBLUEINCS = newBlueReal.inc newBlueComplex.inc
BLUECSUMINCS = blueRealCsum.inc blueComplexCsum.inc
NEWORIGINCS = newOrigReal.inc newOrigComplex.inc
NEWORIGCSUMINCS = newOrigRealCsum.inc newOrigComplexCsum.inc

sdBenchOrigKahan: $(BENCHOBJS) $(OrigKahanSub) sdBenchmark.f90
	$(F95) $(F95FLAGS) -c -o sdBenchmark.o sdBenchmark.f90
	$(F95) $(F95LINKFLAGS) -o $@ $(BENCHOBJS) $(OrigKahanSub) sdBenchmark.o

sdBenchOnePassKahan: $(BENCHOBJS) $(OnePassKahanSub) sdBenchmark.f90
	$(F95) $(F95FLAGS) -c -o sdBenchmark.o sdBenchmark.f90
	$(F95) $(F95LINKFLAGS) -o $@ $(BENCHOBJS) $(OnePassKahanSub) sdBenchmark.o

sdBenchHybridKahan: $(BENCHOBJS) HybridWrap.f90 sdBenchmark.f90
	$(F95) $(F95FLAGS) -c -o HybridWrap.o HybridWrap.f90
	$(F95) $(F95FLAGS) -c -o sdBenchmark.o sdBenchmark.f90
	$(F95) $(F95LINKFLAGS) -o $@ $(BENCHOBJS) $(HybridWrap) sdBenchmark.o

sdBenchBlue: $(BENCHOBJS) $(BlueSubs) sdBenchmark.f90 
	$(F95) $(F95FLAGS) -c -o sdBenchmark.o sdBenchmark.f90
	$(F95) $(F95LINKFLAGS) -o $@ $(BENCHOBJS) $(BlueSubs) sdBenchmark.o

sdBenchNewBlue: $(BENCHOBJS) $(NewBlueSubs) sdBenchmark.f90 
	$(F95) $(F95FLAGS) -c -o sdBenchmark.o sdBenchmark.f90
	$(F95) $(F95LINKFLAGS) -o $@ $(BENCHOBJS) $(NewBlueSubs) sdBenchmark.o

sdBenchBlueCsum: $(BENCHOBJS) $(BlueSubsCsum) sdBenchmark.f90
	$(F95) $(F95FLAGS) -c -o sdBenchmark.o sdBenchmark.f90
	$(F95) $(F95LINKFLAGS) -o $@ $(BENCHOBJS) $(BlueSubsCsum) sdBenchmark.o

sdBenchLa: $(BENCHOBJS) $(LaSubs) sdBenchmark.f90 lapack.f90
	$(F95) $(F95FLAGS) -c -o sdBenchmark.o sdBenchmark.f90
	$(F95) $(F95LINKFLAGS) -o $@ $(BENCHOBJS) $(LaSubs) sdBenchmark.o

sdBenchLaCsum: $(BENCHOBJS) $(LaSubsCsum) sdBenchmark.f90
	$(F95) $(F95FLAGS) -c -o sdBenchmark.o sdBenchmark.f90
	$(F95) $(F95LINKFLAGS) -o $@ $(BENCHOBJS) $(LaSubsCsum) sdBenchmark.o

sdBenchLaCsSc: $(BENCHOBJS) $(LaSubsCsSc) sdBenchmark.f90
	$(F95) $(F95FLAGS) -c -o sdBenchmark.o sdBenchmark.f90
	$(F95) $(F95LINKFLAGS) -o $@ $(BENCHOBJS) $(LaSubsCsSc) sdBenchmark.o

sdBenchNewOrig: $(BENCHOBJS) $(NewOrigBLAS) sdBenchmark.f90
	$(F95) $(F95FLAGS) -c -o sdBenchmark.o sdBenchmark.f90
	$(F95) $(F95LINKFLAGS) -o $@ $(BENCHOBJS) $(NewOrigBLAS) sdBenchmark.o

sdBenchNewOrigCsum: $(BENCHOBJS) $(NewOrigBLASCsum) sdBenchmark.f90
	$(F95) $(F95FLAGS) -c -o sdBenchmark.o sdBenchmark.f90
	$(F95) $(F95LINKFLAGS) -o $@ $(BENCHOBJS) $(NewOrigBLASCsum) sdBenchmark.o

sdBenchOrigBlas: $(BENCHOBJS) $(OrigBLAS) sdBenchmark.f90
	$(F95) $(F95FLAGS) -c -o sdBenchmark.o sdBenchmark.f90
	$(F95) $(F95LINKFLAGS) -o $@ $(BENCHOBJS) $(OrigBLAS) sdBenchmark.o

sdBenchNorm2: $(BENCHOBJS) $(Norm2Subs) sdBenchmark.f90
	$(F95) $(F95FLAGS) -c -o sdBenchmark.o sdBenchmark.f90
	$(F95) $(F95LINKFLAGS) -o $@ $(BENCHOBJS) $(Norm2Subs) sdBenchmark.o

sdBenchCompKahan: $(BENCHOBJS) $(ONEPASSSRCMOD) $(CompMod) sdBenchmark.f90
	$(F95) $(F95FLAGS) -c -o sdBenchmark.o sdBenchmark.f90
	$(F95) $(F95LINKFLAGS) -o $@ $(BENCHOBJS) $(ONEPASSSRCMOD) $(CompMod) sdBenchmark.o

testOracle: $(BENCHOBJS) testOracle.o
	$(F95) $(F95LINKFLAGS) -o $@ $(BENCHOBJS) testOracle.o

checkVectors: checkVectors.o set_precision.o randMod.o oracle.o quicksort.o mpfun90.o mpmod90.o GenerateTestVectors.o
	$(F95) $(F95LINKFLAGS) -o $@ $^

########################################################
# The following shows how to build a Benchmark executable
# to be used to compare the new Hybrid version against an
# existing external library (e.g., one available only
# in library form from a compiler vendor). This can
# also be used to link against a set of routines that are
# not presented in a module by replacing $(LIBS) with the
# required source files. 
#
# A separate compilation line for the sdBenchmark program
# file ensures that no `old' module and .o files are 
# mistakenly used at thelink stage.
########################################################
sdBenchBLASLib: $(BENCHOBJS) sdBenchmark.f90
	$(F95) $(F95FLAGS) -c -o sdBenchmark.o sdBenchmark.f90
	$(F95) $(F95LINKFLAGS) -o $@ $^ $(LIBS)
sdBenchBLASSubs: $(BENCHOBJS) sdBenchmark.f90 blasSubs.o
	$(F95) $(F95FLAGS) -c -o sdBenchmark.o sdBenchmark.f90
	$(F95) $(F95LINKFLAGS) -o $@ $^
blasSubs.o:
########################################################
	
lapack.o: set_precision.o

lapackCsum.o: set_precision.o

norm2Intrinsic.o: set_precision.o

quicksort.o: set_precision.o

oracle.o: set_precision.o randMod.o mpfun90.o mpmod90.o

randMod.o: set_precision.o

GenerateTestVectors.o: set_precision.o randMod.o oracle.o quicksort.o

set_precision.o:

onePassWrap.o: nrm2HybridOnePassMod.o

checkVectors.o: GenerateTestVectors.o set_precision.o randMod.o oracle.o quicksort.o

$(SRCMOD): $(subst .o,.f90,$(SRCMOD)) set_precision.o $(HYBRIDINCS) \
             $(ONEPASSKAHANINCS) 
	$(F95) $(F95LINKFLAGS) -I $(SrcPath) -c $<

$(ORIGSRCMOD): $(subst .o,.f90,$(ORIGSRCMOD)) set_precision.o $(HYBRIDINCS) \
             $(ORIGKAHANINCS) 
	$(F95) $(F95LINKFLAGS) -I $(SrcPath) -c $<

$(BlueSubs): $(subst .o,.f90,$(BlueSubs)) set_precision.o $(BLUEINCS)
	$(F95) $(F95LINKFLAGS) -I $(OtherPath) -c $<

$(LaSubs): $(subst .o,.f90,$(LaSubs)) set_precision.o
	$(F95) $(F95LINKFLAGS) -I $(OtherPath) -c $<

$(LaSubsCsum): $(subst .o,.f90,$(LaSubsCsum)) set_precision.o 
	$(F95) $(F95LINKFLAGS) -I $(OtherPath) -c $<

$(Norm2Subs): $(subst .o,.f90,$(Norm2Subs)) set_precision.o 
	$(F95) $(F95LINKFLAGS) -I $(OtherPath) -c $<

$(NewOrigBLAS): $(subst .o,.f90,$(NewOrigBLAS)) set_precision.o $(NEWORIGINCS)
	$(F95) $(F95LINKFLAGS) -I $(OtherPath) -c $<

$(NewOrigBLASCsum): $(subst .o,.f90,$(NewOrigBLASCsum)) set_precision.o $(NEWORIGCSUMINCS)
	$(F95) $(F95LINKFLAGS) -I $(OtherPath) -c $<

$(NewBlueSubs): $(subst .o,.f90,$(NewBlueSubs)) set_precision.o $(NEWBLUEINCS)
	$(F95) $(F95LINKFLAGS) -I $(OtherPath) -c $<

$(BlueSubsCsum): $(subst .o,.f90,$(BlueSubsCsum)) set_precision.o $(BLUECSUMINCS)
	$(F95) $(F95LINKFLAGS) -I $(OtherPath) -c $<

$(OrigKahanSub): $(subst .o,.f90,$(OrigKahanSub)) set_precision.o $(ORIGKAHANINCS)
	$(F95) $(F95LINKFLAGS) -I $(SrcPath) -c $<

$(OnePassKahanSub): $(subst .o,.f90,$(OnePassKahanSub)) set_precision.o $(ONEPASSKAHANINCS)
	$(F95) $(F95LINKFLAGS) -I $(SrcPath) -c $<

$(CompMod): $(subst .o,.f90,$(CompMod)) $(ONEPASSKAHANINCS)
	$(F95) $(F95LINKFLAGS) -I $(SrcPath) -c $<

$(SRCSUBS): $(subst .o,.f90,$(SRCSUBS)) set_precision.o $(HYBRIDINCS) \
             $(KAHANINCS) 
	$(F95) $(F95LINKFLAGS) -I $(SrcPath) -c $<

.PHONY: clean
clean: 
	# remove all files generated by the compiler
	# these are compiler dependent
	$(REMOVE) *.o *.d *.g90 *.mod 
	# remove all executables
	$(REMOVE) $(executables) testOracle
