diff --git a/test-suite/quantlibbenchmark.cpp b/test-suite/quantlibbenchmark.cpp index 72734b5b6a9..bf0aaccb7c7 100644 --- a/test-suite/quantlibbenchmark.cpp +++ b/test-suite/quantlibbenchmark.cpp @@ -35,70 +35,11 @@ ./quantlib-benchmark --mp The number of floating point operations of a given test case was measured - using the perfex library, http://user.it.uu.se/~mikpe/linux/perfctr - and PAPI, http://icl.cs.utk.edu/papi - - Example results: 1. i7 7820X@3.6GHz :24192.2 mflops - 2. i7 4702HQ@2.2GHz : 6524.9 mflops - 3. i7 870@2.93GHz : 4759.2 mflops - 4. Core2 Q9300@2.5Ghz : 2272.6 mflops - 5. Core2 Q6600@2.4Ghz : 1984.0 mflops - 6. i3 540@3.1Ghz : 1755.3 mflops - 7. Raspberry Pi4@1.5GHz : 1704.2 mflops - 8. Core2 Dual@2.0Ghz : 835.9 mflops - 9. Athlon 64 X2 4400+ : 824.2 mflops - 10. Cortex-A57@2.0GHz : 821.7 mflops - 11. Core2 Dual@2.0Ghz : 754.1 mflops - 12. Pentium4 Dual@2.8Ghz : 423.8 mflops - 13. Raspberry Pi3@1.2GHz : 309.2 mflops - 14. Pentium4@3.0Ghz : 266.3 mflops - 15. PentiumIII@1.1Ghz : 146.2 mflops - 16. Alpha 2xEV68@833Mhz : 184.6 mflops - 17. Wii PowerPC 750@729MHz : 46.1 mflops - 18. Raspberry Pi ARM@700Mhz: 28.3 mflops - 19. MIPS R5000@150MHz : 12.6 mflops - 20. RISC-V on FPGA@25Mhz : 2.4 mflops - 21. Strong ARM@206Mhz : 1.4 mflops - 22. SPARC v7@25MHz : 0.78mflops - - Remarks: OS: Linux, static libs - 1. g++-6.3.0 -O3 -ffast-math -march=core-avx2 - Remark: 16 processes - 2. g++-4.8.1 -O3 -ffast-math -march=core-avx2 - Remark: eight processes - 3. gcc-4.6.3, -O3 -ffast-math -mfpmath=sse,387 -march=corei7 - Remark: eight processes - 4. icc-11.0, -gcc-version=420 -fast -fp-model fast=2 -ipo-jobs2 - Remark: four processes - 5. icc-11.0, -gcc-version=420 -fast -fp-model fast=2 -ipo-jobs2 - Remark: four processes - 6. gcc-4.4.5, -O3 -ffast-math -mfpmath=sse,387 -msse4.2 -march=core2 - Remark: four processes - 7. gcc-8.3.0, -O3 -ffast-math -mcpu=cortx-a8 -mfpu=neon-fp-armv8 - Remark: four processes - 8. icc-11.0, -gcc-version=420 -fast -fp-model fast=2 -ipo-jobs2 - Remark: two processes - 9. icc-11.0, -gcc-version=420 -xSSSE3 -O3 -ipo -no-prec-div -static - -fp-model fast=2 -ipo-jobs2, Remark: two processes - 10. clang++-6.0.1 -O2, Remark: four processes - 11. gcc-4.2.1, -O3 -ffast-math -mfpmath=sse,387 -msse3 -funroll-all-loops - Remark: two processes - 12. gcc-4.0.1, -O3 -march=pentium4 -ffast-math - -mfpmath=sse,387 -msse2 -funroll-all-loops, Remark: two processes - 13. gcc-4.9.2 -O2, Remark: four processes - 14. gcc-4.0.1, -O3 -march=pentium4 -ffast-math - -mfpmath=sse,387 -msse2 -funroll-all-loops - 15. gcc-4.1.1, -O3 -march=pentium3 -ffast-math - -mfpmath=sse,387 -msse -funroll-all-loops - 16. gcc-3.3.5, -O3 -mcpu=e67 -funroll-all-loops, Remark: two processes - 17. gcc-4.9.2, -O2 -g on a Nintendo Wii - 18. gcc-4.6.3, -O3 - 19. gcc-4-7-4, -O2 on a SGI Indy - 20. gcc-9.2, -O2 on RISC-V softcore on an Artix7 100T FPGA - 21. gcc-3.4.3, -O2 -g on a Zaurus PDA - 22. gcc-7.5.0, -O2 on a Sun SPARCstation IPC, FPU: Weitek 3170 - - This benchmark is derived from quantlibtestsuite.cpp. Please see the + using PAPI, http://icl.cs.utk.edu/papi + + Example results can be found at https://openbenchmarking.org/test/pts/quantlib + + This benchmark is derived from quantlibtestsuite.cpp. Please see the copyrights therein. */ @@ -124,10 +65,15 @@ #include #include -/* PAPI code -#include + +/* initialize PAPI on Linux + sudo sysctl -w kernel.perf_event_paranoid=0 + export PAPI_EVENTS="PAPI_TOT_INS,PAPI_FP_OPS,PAPI_FP_INS" + export PAPI_REPORT=1 */ +//#include + + /* Use BOOST_MSVC instead of _MSC_VER since some other vendors (Metrowerks, for example) also #define _MSC_VER @@ -204,7 +150,7 @@ namespace { Benchmark("EuropeanOption::FdEngines", &EuropeanOptionTest::testFdEngines, 148.43), Benchmark("FdHestonTest::testFdmHestonAmerican", &FdHestonTest::testFdmHestonAmerican, 234.21), Benchmark("HestonModel::DAXCalibration", &HestonModelTest::testDAXCalibration, 555.19), - Benchmark("InterpolationTest::testSabrInterpolation", &InterpolationTest::testSabrInterpolation, 2266.06), + Benchmark("InterpolationTest::testSabrInterpolation", &InterpolationTest::testSabrInterpolation, 295.63), Benchmark("JumpDiffusion::Greeks", &JumpDiffusionTest::testGreeks, 433.77), Benchmark("MarketModelCmsTest::testCmSwapsSwaptions", &MarketModelCmsTest::testMultiStepCmSwapsAndSwaptions, 11497.73), Benchmark("MarketModelSmmTest::testMultiSmmSwaptions", &MarketModelSmmTest::testMultiStepCoterminalSwapsAndSwaptions, 11244.95), @@ -214,29 +160,19 @@ namespace { Benchmark("ShortRateModel::Swaps", &ShortRateModelTest::testSwaps, 454.73) }; - /* PAPI code - float real_time, proc_time, mflops; - long_long lflop, flop=0; - */ - class TimedBenchmark { public: typedef void (*fct_ptr)(); - explicit TimedBenchmark(fct_ptr f) : f_(f) {} + explicit TimedBenchmark(fct_ptr f, const std::string name) : f_(f), name_(name) {} void startMeasurement() const { - /* PAPI code - lflop = flop; - PAPI_flops(&real_time, &proc_time, &flop, &mflops); - */ + //QL_REQUIRE(PAPI_hl_region_begin(name_.c_str()) == PAPI_OK, + // "could not initialize PAPI"); } void stopMeasurement() const { - /* PAPI code - PAPI_flops(&real_time, &proc_time, &flop, &mflops); - printf("Real_time: %f Proc_time: %f Total mflop: %f\n", - real_time, proc_time, (flop-lflop)/1e6); - */ + //QL_REQUIRE(PAPI_hl_region_end(name_.c_str()) == PAPI_OK, + // "could not stop PAPI"); } double operator()() const { @@ -251,6 +187,7 @@ namespace { } private: fct_ptr f_; + const std::string name_; }; void printResults( @@ -358,7 +295,7 @@ int main(int argc, char* argv[] ) { std::for_each(bm.begin(), bm.end(), [&runTimes](const Benchmark& iter) { runTimes.emplace_back( - iter, TimedBenchmark(iter.getTestCase())()); + iter, TimedBenchmark(iter.getTestCase(), iter.getName())()); }); printResults(nProc, runTimes); }