-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
update papi documentation and initialization
- Loading branch information
1 parent
2de91c2
commit b0be193
Showing
1 changed file
with
21 additions
and
84 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,70 +35,11 @@ | |
./quantlib-benchmark --mp | ||
The number of floating point operations of a given test case was measured | ||
using the perfex library, http://user.it.uu.se/~mikpe/linux/perfctr | ||
and PAPI, http://icl.cs.utk.edu/papi | ||
Example results: 1. i7 [email protected] :24192.2 mflops | ||
2. i7 [email protected] : 6524.9 mflops | ||
3. i7 [email protected] : 4759.2 mflops | ||
4. Core2 [email protected] : 2272.6 mflops | ||
5. Core2 [email protected] : 1984.0 mflops | ||
6. i3 [email protected] : 1755.3 mflops | ||
7. Raspberry [email protected] : 1704.2 mflops | ||
8. Core2 [email protected] : 835.9 mflops | ||
9. Athlon 64 X2 4400+ : 824.2 mflops | ||
10. [email protected] : 821.7 mflops | ||
11. Core2 [email protected] : 754.1 mflops | ||
12. Pentium4 [email protected] : 423.8 mflops | ||
13. Raspberry [email protected] : 309.2 mflops | ||
14. [email protected] : 266.3 mflops | ||
15. [email protected] : 146.2 mflops | ||
16. Alpha 2xEV68@833Mhz : 184.6 mflops | ||
17. Wii PowerPC 750@729MHz : 46.1 mflops | ||
18. Raspberry Pi ARM@700Mhz: 28.3 mflops | ||
19. MIPS R5000@150MHz : 12.6 mflops | ||
20. RISC-V on FPGA@25Mhz : 2.4 mflops | ||
21. Strong ARM@206Mhz : 1.4 mflops | ||
22. SPARC v7@25MHz : 0.78mflops | ||
Remarks: OS: Linux, static libs | ||
1. g++-6.3.0 -O3 -ffast-math -march=core-avx2 | ||
Remark: 16 processes | ||
2. g++-4.8.1 -O3 -ffast-math -march=core-avx2 | ||
Remark: eight processes | ||
3. gcc-4.6.3, -O3 -ffast-math -mfpmath=sse,387 -march=corei7 | ||
Remark: eight processes | ||
4. icc-11.0, -gcc-version=420 -fast -fp-model fast=2 -ipo-jobs2 | ||
Remark: four processes | ||
5. icc-11.0, -gcc-version=420 -fast -fp-model fast=2 -ipo-jobs2 | ||
Remark: four processes | ||
6. gcc-4.4.5, -O3 -ffast-math -mfpmath=sse,387 -msse4.2 -march=core2 | ||
Remark: four processes | ||
7. gcc-8.3.0, -O3 -ffast-math -mcpu=cortx-a8 -mfpu=neon-fp-armv8 | ||
Remark: four processes | ||
8. icc-11.0, -gcc-version=420 -fast -fp-model fast=2 -ipo-jobs2 | ||
Remark: two processes | ||
9. icc-11.0, -gcc-version=420 -xSSSE3 -O3 -ipo -no-prec-div -static | ||
-fp-model fast=2 -ipo-jobs2, Remark: two processes | ||
10. clang++-6.0.1 -O2, Remark: four processes | ||
11. gcc-4.2.1, -O3 -ffast-math -mfpmath=sse,387 -msse3 -funroll-all-loops | ||
Remark: two processes | ||
12. gcc-4.0.1, -O3 -march=pentium4 -ffast-math | ||
-mfpmath=sse,387 -msse2 -funroll-all-loops, Remark: two processes | ||
13. gcc-4.9.2 -O2, Remark: four processes | ||
14. gcc-4.0.1, -O3 -march=pentium4 -ffast-math | ||
-mfpmath=sse,387 -msse2 -funroll-all-loops | ||
15. gcc-4.1.1, -O3 -march=pentium3 -ffast-math | ||
-mfpmath=sse,387 -msse -funroll-all-loops | ||
16. gcc-3.3.5, -O3 -mcpu=e67 -funroll-all-loops, Remark: two processes | ||
17. gcc-4.9.2, -O2 -g on a Nintendo Wii | ||
18. gcc-4.6.3, -O3 | ||
19. gcc-4-7-4, -O2 on a SGI Indy | ||
20. gcc-9.2, -O2 on RISC-V softcore on an Artix7 100T FPGA | ||
21. gcc-3.4.3, -O2 -g on a Zaurus PDA | ||
22. gcc-7.5.0, -O2 on a Sun SPARCstation IPC, FPU: Weitek 3170 | ||
This benchmark is derived from quantlibtestsuite.cpp. Please see the | ||
using PAPI, http://icl.cs.utk.edu/papi | ||
Example results can be found at https://openbenchmarking.org/test/pts/quantlib | ||
This benchmark is derived from quantlibtestsuite.cpp. Please see the | ||
copyrights therein. | ||
*/ | ||
|
||
|
@@ -124,10 +65,15 @@ | |
#include <chrono> | ||
#include <thread> | ||
|
||
/* PAPI code | ||
#include <stdio.h | ||
#include <papi.h> | ||
|
||
/* initialize PAPI on Linux | ||
sudo sysctl -w kernel.perf_event_paranoid=0 | ||
export PAPI_EVENTS="PAPI_TOT_INS,PAPI_FP_OPS,PAPI_FP_INS" | ||
export PAPI_REPORT=1 | ||
*/ | ||
//#include <papi.h> | ||
|
||
|
||
|
||
/* Use BOOST_MSVC instead of _MSC_VER since some other vendors (Metrowerks, | ||
for example) also #define _MSC_VER | ||
|
@@ -204,7 +150,7 @@ namespace { | |
Benchmark("EuropeanOption::FdEngines", &EuropeanOptionTest::testFdEngines, 148.43), | ||
Benchmark("FdHestonTest::testFdmHestonAmerican", &FdHestonTest::testFdmHestonAmerican, 234.21), | ||
Benchmark("HestonModel::DAXCalibration", &HestonModelTest::testDAXCalibration, 555.19), | ||
Benchmark("InterpolationTest::testSabrInterpolation", &InterpolationTest::testSabrInterpolation, 2266.06), | ||
Benchmark("InterpolationTest::testSabrInterpolation", &InterpolationTest::testSabrInterpolation, 295.63), | ||
Benchmark("JumpDiffusion::Greeks", &JumpDiffusionTest::testGreeks, 433.77), | ||
Benchmark("MarketModelCmsTest::testCmSwapsSwaptions", &MarketModelCmsTest::testMultiStepCmSwapsAndSwaptions, 11497.73), | ||
Benchmark("MarketModelSmmTest::testMultiSmmSwaptions", &MarketModelSmmTest::testMultiStepCoterminalSwapsAndSwaptions, 11244.95), | ||
|
@@ -214,29 +160,19 @@ namespace { | |
Benchmark("ShortRateModel::Swaps", &ShortRateModelTest::testSwaps, 454.73) | ||
}; | ||
|
||
/* PAPI code | ||
float real_time, proc_time, mflops; | ||
long_long lflop, flop=0; | ||
*/ | ||
|
||
class TimedBenchmark { | ||
public: | ||
typedef void (*fct_ptr)(); | ||
explicit TimedBenchmark(fct_ptr f) : f_(f) {} | ||
explicit TimedBenchmark(fct_ptr f, const std::string name) : f_(f), name_(name) {} | ||
|
||
void startMeasurement() const { | ||
/* PAPI code | ||
lflop = flop; | ||
PAPI_flops(&real_time, &proc_time, &flop, &mflops); | ||
*/ | ||
//QL_REQUIRE(PAPI_hl_region_begin(name_.c_str()) == PAPI_OK, | ||
// "could not initialize PAPI"); | ||
} | ||
|
||
void stopMeasurement() const { | ||
/* PAPI code | ||
PAPI_flops(&real_time, &proc_time, &flop, &mflops); | ||
printf("Real_time: %f Proc_time: %f Total mflop: %f\n", | ||
real_time, proc_time, (flop-lflop)/1e6); | ||
*/ | ||
//QL_REQUIRE(PAPI_hl_region_end(name_.c_str()) == PAPI_OK, | ||
// "could not stop PAPI"); | ||
} | ||
|
||
double operator()() const { | ||
|
@@ -251,6 +187,7 @@ namespace { | |
} | ||
private: | ||
fct_ptr f_; | ||
const std::string name_; | ||
}; | ||
|
||
void printResults( | ||
|
@@ -358,7 +295,7 @@ int main(int argc, char* argv[] ) { | |
std::for_each(bm.begin(), bm.end(), | ||
[&runTimes](const Benchmark& iter) { | ||
runTimes.emplace_back( | ||
iter, TimedBenchmark(iter.getTestCase())()); | ||
iter, TimedBenchmark(iter.getTestCase(), iter.getName())()); | ||
}); | ||
printResults(nProc, runTimes); | ||
} | ||
|