Skip to content

Commit

Permalink
update papi documentation and initialization
Browse files Browse the repository at this point in the history
  • Loading branch information
klausspanderen committed Nov 16, 2023
1 parent 2de91c2 commit b0be193
Showing 1 changed file with 21 additions and 84 deletions.
105 changes: 21 additions & 84 deletions test-suite/quantlibbenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,70 +35,11 @@
./quantlib-benchmark --mp
The number of floating point operations of a given test case was measured
using the perfex library, http://user.it.uu.se/~mikpe/linux/perfctr
and PAPI, http://icl.cs.utk.edu/papi
Example results: 1. i7 [email protected] :24192.2 mflops
2. i7 [email protected] : 6524.9 mflops
3. i7 [email protected] : 4759.2 mflops
4. Core2 [email protected] : 2272.6 mflops
5. Core2 [email protected] : 1984.0 mflops
6. i3 [email protected] : 1755.3 mflops
7. Raspberry [email protected] : 1704.2 mflops
8. Core2 [email protected] : 835.9 mflops
9. Athlon 64 X2 4400+ : 824.2 mflops
10. [email protected] : 821.7 mflops
11. Core2 [email protected] : 754.1 mflops
12. Pentium4 [email protected] : 423.8 mflops
13. Raspberry [email protected] : 309.2 mflops
14. [email protected] : 266.3 mflops
15. [email protected] : 146.2 mflops
16. Alpha 2xEV68@833Mhz : 184.6 mflops
17. Wii PowerPC 750@729MHz : 46.1 mflops
18. Raspberry Pi ARM@700Mhz: 28.3 mflops
19. MIPS R5000@150MHz : 12.6 mflops
20. RISC-V on FPGA@25Mhz : 2.4 mflops
21. Strong ARM@206Mhz : 1.4 mflops
22. SPARC v7@25MHz : 0.78mflops
Remarks: OS: Linux, static libs
1. g++-6.3.0 -O3 -ffast-math -march=core-avx2
Remark: 16 processes
2. g++-4.8.1 -O3 -ffast-math -march=core-avx2
Remark: eight processes
3. gcc-4.6.3, -O3 -ffast-math -mfpmath=sse,387 -march=corei7
Remark: eight processes
4. icc-11.0, -gcc-version=420 -fast -fp-model fast=2 -ipo-jobs2
Remark: four processes
5. icc-11.0, -gcc-version=420 -fast -fp-model fast=2 -ipo-jobs2
Remark: four processes
6. gcc-4.4.5, -O3 -ffast-math -mfpmath=sse,387 -msse4.2 -march=core2
Remark: four processes
7. gcc-8.3.0, -O3 -ffast-math -mcpu=cortx-a8 -mfpu=neon-fp-armv8
Remark: four processes
8. icc-11.0, -gcc-version=420 -fast -fp-model fast=2 -ipo-jobs2
Remark: two processes
9. icc-11.0, -gcc-version=420 -xSSSE3 -O3 -ipo -no-prec-div -static
-fp-model fast=2 -ipo-jobs2, Remark: two processes
10. clang++-6.0.1 -O2, Remark: four processes
11. gcc-4.2.1, -O3 -ffast-math -mfpmath=sse,387 -msse3 -funroll-all-loops
Remark: two processes
12. gcc-4.0.1, -O3 -march=pentium4 -ffast-math
-mfpmath=sse,387 -msse2 -funroll-all-loops, Remark: two processes
13. gcc-4.9.2 -O2, Remark: four processes
14. gcc-4.0.1, -O3 -march=pentium4 -ffast-math
-mfpmath=sse,387 -msse2 -funroll-all-loops
15. gcc-4.1.1, -O3 -march=pentium3 -ffast-math
-mfpmath=sse,387 -msse -funroll-all-loops
16. gcc-3.3.5, -O3 -mcpu=e67 -funroll-all-loops, Remark: two processes
17. gcc-4.9.2, -O2 -g on a Nintendo Wii
18. gcc-4.6.3, -O3
19. gcc-4-7-4, -O2 on a SGI Indy
20. gcc-9.2, -O2 on RISC-V softcore on an Artix7 100T FPGA
21. gcc-3.4.3, -O2 -g on a Zaurus PDA
22. gcc-7.5.0, -O2 on a Sun SPARCstation IPC, FPU: Weitek 3170
This benchmark is derived from quantlibtestsuite.cpp. Please see the
using PAPI, http://icl.cs.utk.edu/papi
Example results can be found at https://openbenchmarking.org/test/pts/quantlib
This benchmark is derived from quantlibtestsuite.cpp. Please see the
copyrights therein.
*/

Expand All @@ -124,10 +65,15 @@
#include <chrono>
#include <thread>

/* PAPI code
#include <stdio.h
#include <papi.h>

/* initialize PAPI on Linux
sudo sysctl -w kernel.perf_event_paranoid=0
export PAPI_EVENTS="PAPI_TOT_INS,PAPI_FP_OPS,PAPI_FP_INS"
export PAPI_REPORT=1
*/
//#include <papi.h>



/* Use BOOST_MSVC instead of _MSC_VER since some other vendors (Metrowerks,
for example) also #define _MSC_VER
Expand Down Expand Up @@ -204,7 +150,7 @@ namespace {
Benchmark("EuropeanOption::FdEngines", &EuropeanOptionTest::testFdEngines, 148.43),
Benchmark("FdHestonTest::testFdmHestonAmerican", &FdHestonTest::testFdmHestonAmerican, 234.21),
Benchmark("HestonModel::DAXCalibration", &HestonModelTest::testDAXCalibration, 555.19),
Benchmark("InterpolationTest::testSabrInterpolation", &InterpolationTest::testSabrInterpolation, 2266.06),
Benchmark("InterpolationTest::testSabrInterpolation", &InterpolationTest::testSabrInterpolation, 295.63),
Benchmark("JumpDiffusion::Greeks", &JumpDiffusionTest::testGreeks, 433.77),
Benchmark("MarketModelCmsTest::testCmSwapsSwaptions", &MarketModelCmsTest::testMultiStepCmSwapsAndSwaptions, 11497.73),
Benchmark("MarketModelSmmTest::testMultiSmmSwaptions", &MarketModelSmmTest::testMultiStepCoterminalSwapsAndSwaptions, 11244.95),
Expand All @@ -214,29 +160,19 @@ namespace {
Benchmark("ShortRateModel::Swaps", &ShortRateModelTest::testSwaps, 454.73)
};

/* PAPI code
float real_time, proc_time, mflops;
long_long lflop, flop=0;
*/

class TimedBenchmark {
public:
typedef void (*fct_ptr)();
explicit TimedBenchmark(fct_ptr f) : f_(f) {}
explicit TimedBenchmark(fct_ptr f, const std::string name) : f_(f), name_(name) {}

void startMeasurement() const {
/* PAPI code
lflop = flop;
PAPI_flops(&real_time, &proc_time, &flop, &mflops);
*/
//QL_REQUIRE(PAPI_hl_region_begin(name_.c_str()) == PAPI_OK,
// "could not initialize PAPI");
}

void stopMeasurement() const {
/* PAPI code
PAPI_flops(&real_time, &proc_time, &flop, &mflops);
printf("Real_time: %f Proc_time: %f Total mflop: %f\n",
real_time, proc_time, (flop-lflop)/1e6);
*/
//QL_REQUIRE(PAPI_hl_region_end(name_.c_str()) == PAPI_OK,
// "could not stop PAPI");
}

double operator()() const {
Expand All @@ -251,6 +187,7 @@ namespace {
}
private:
fct_ptr f_;
const std::string name_;
};

void printResults(
Expand Down Expand Up @@ -358,7 +295,7 @@ int main(int argc, char* argv[] ) {
std::for_each(bm.begin(), bm.end(),
[&runTimes](const Benchmark& iter) {
runTimes.emplace_back(
iter, TimedBenchmark(iter.getTestCase())());
iter, TimedBenchmark(iter.getTestCase(), iter.getName())());
});
printResults(nProc, runTimes);
}
Expand Down

0 comments on commit b0be193

Please sign in to comment.