diff --git a/.github/workflows/ci-test.yml b/.github/workflows/ci-test.yml
new file mode 100644
index 00000000..42038835
--- /dev/null
+++ b/.github/workflows/ci-test.yml
@@ -0,0 +1,22 @@
+name: tests
+
+on:
+ push:
+ branches: [ '**' ]
+ pull_request:
+ branches: [ '**' ]
+
+jobs:
+ build:
+
+ runs-on: ci-test
+ if: ${{ github.repository != 'opcm/pcm' }}
+
+ steps:
+ - uses: actions/checkout@v2
+ - name: make
+ run: |
+ g++ --version
+ git clone https://github.com/simdjson/simdjson.git
+ make -j
+ sh test.sh
diff --git a/PCM-Lib_Win/pcm-lib.vcxproj b/PCM-Lib_Win/pcm-lib.vcxproj
index ad212ce1..2b906394 100644
--- a/PCM-Lib_Win/pcm-lib.vcxproj
+++ b/PCM-Lib_Win/pcm-lib.vcxproj
@@ -191,7 +191,8 @@
-
+
+
@@ -206,7 +207,8 @@
-
+
+
diff --git a/PCM-Raw_Win/pcm-raw-win.vcxproj b/PCM-Raw_Win/pcm-raw-win.vcxproj
index e967ff34..dc536f2c 100644
--- a/PCM-Raw_Win/pcm-raw-win.vcxproj
+++ b/PCM-Raw_Win/pcm-raw-win.vcxproj
@@ -193,4 +193,4 @@
-
\ No newline at end of file
+
diff --git a/PCM-Service_Win/AssemblyInfo.cpp b/PCM-Service_Win/AssemblyInfo.cpp
index 51bd41e0..a570996c 100644
--- a/PCM-Service_Win/AssemblyInfo.cpp
+++ b/PCM-Service_Win/AssemblyInfo.cpp
@@ -29,9 +29,9 @@ using namespace System::Security::Permissions;
[assembly:AssemblyTitleAttribute("PCMService")];
[assembly:AssemblyDescriptionAttribute("")];
[assembly:AssemblyConfigurationAttribute("")];
-[assembly:AssemblyCompanyAttribute("Intel GmbH")];
+[assembly:AssemblyCompanyAttribute("Intel Corp")];
[assembly:AssemblyProductAttribute("PCMService")];
-[assembly:AssemblyCopyrightAttribute("Copyright (c) Intel GmbH 2010")];
+[assembly:AssemblyCopyrightAttribute("Copyright (c) Intel Corp 2010-2021")];
[assembly:AssemblyTrademarkAttribute("")];
[assembly:AssemblyCultureAttribute("")];
@@ -50,7 +50,4 @@ using namespace System::Security::Permissions;
[assembly:ComVisible(false)];
-[assembly:CLSCompliantAttribute(true)];
-
-[assembly:SecurityPermission(SecurityAction::RequestMinimum, UnmanagedCode = true)];
-
+[assembly:CLSCompliantAttribute(true)];
\ No newline at end of file
diff --git a/PCM-Service_Win/PCMService.h b/PCM-Service_Win/PCMService.h
index e330fee8..2d7dc9aa 100644
--- a/PCM-Service_Win/PCMService.h
+++ b/PCM-Service_Win/PCMService.h
@@ -124,6 +124,8 @@ namespace PCMServiceNS {
counterCollection->Add( counter );
counter = gcnew CounterCreationData(MetricCoreResC0, "Displays the residency of core or socket in core C0-state in percent.", PerformanceCounterType::NumberOfItems64);
counterCollection->Add( counter );
+ counter = gcnew CounterCreationData(MetricCoreResC1, "Displays the residency of core or socket in core C1-state in percent.", PerformanceCounterType::NumberOfItems64);
+ counterCollection->Add( counter );
counter = gcnew CounterCreationData(MetricCoreResC3, "Displays the residency of core or socket in core C3-state in percent.", PerformanceCounterType::NumberOfItems64);
counterCollection->Add( counter );
counter = gcnew CounterCreationData(MetricCoreResC6, "Displays the residency of core or socket in core C6-state in percent.", PerformanceCounterType::NumberOfItems64);
@@ -144,6 +146,8 @@ namespace PCMServiceNS {
counterCollection->Add( counter );
counter = gcnew CounterCreationData(MetricSocketEnergyDram, "Displays the energy in Joules consumed by DRAM memory attached to the memory controller of this socket.", PerformanceCounterType::NumberOfItems64);
counterCollection->Add( counter );
+ counter = gcnew CounterCreationData(MetricSocketResC0, "Displays the residency of socket in package C0-state in percent.", PerformanceCounterType::NumberOfItems64);
+ counterCollection->Add( counter );
counter = gcnew CounterCreationData(MetricSocketResC2, "Displays the residency of socket in package C2-state in percent.", PerformanceCounterType::NumberOfItems64);
counterCollection->Add( counter );
counter = gcnew CounterCreationData(MetricSocketResC3, "Displays the residency of socket in package C3-state in percent.", PerformanceCounterType::NumberOfItems64);
@@ -152,6 +156,12 @@ namespace PCMServiceNS {
counterCollection->Add( counter );
counter = gcnew CounterCreationData(MetricSocketResC7, "Displays the residency of socket in package C7-state in percent.", PerformanceCounterType::NumberOfItems64);
counterCollection->Add( counter );
+ counter = gcnew CounterCreationData(MetricSocketResC8, "Displays the residency of socket in package C8-state in percent.", PerformanceCounterType::NumberOfItems64);
+ counterCollection->Add( counter );
+ counter = gcnew CounterCreationData(MetricSocketResC9, "Displays the residency of socket in package C9-state in percent.", PerformanceCounterType::NumberOfItems64);
+ counterCollection->Add( counter );
+ counter = gcnew CounterCreationData(MetricSocketResC10, "Displays the residency of socket in package C10-state in percent.", PerformanceCounterType::NumberOfItems64);
+ counterCollection->Add( counter );
PerformanceCounterCategory::Create(CountersSocket, "Processor Counter Monitor", PerformanceCounterCategoryType::MultiInstance, counterCollection);
}
@@ -185,6 +195,7 @@ namespace PCMServiceNS {
baseTicksForRelFreqHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreFreqNom, s, false));
thermalHeadroomHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreHeadroom, s, false));
CoreC0StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC0, s, false));
+ CoreC1StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC1, s, false));
CoreC3StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC3, s, false));
CoreC6StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC6, s, false));
CoreC7StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC7, s, false));
@@ -207,6 +218,7 @@ namespace PCMServiceNS {
baseTicksForRelFreqHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreFreqNom, s, false));
thermalHeadroomHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreHeadroom, s, false));
CoreC0StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC0, s, false));
+ CoreC1StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC1, s, false));
CoreC3StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC3, s, false));
CoreC6StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC6, s, false));
CoreC7StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC7, s, false));
@@ -218,10 +230,14 @@ namespace PCMServiceNS {
mwbHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketBandWrite, s, false));
packageEnergyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketEnergyPack, s, false));
DRAMEnergyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketEnergyDram, s, false));
+ PackageC0StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC0, s, false));
PackageC2StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC2, s, false));
PackageC3StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC3, s, false));
PackageC6StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC6, s, false));
PackageC7StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC7, s, false));
+ PackageC8StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC8, s, false));
+ PackageC9StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC9, s, false));
+ PackageC10StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC10, s, false));
}
if (collectionInformation_->qpi)
@@ -250,6 +266,7 @@ namespace PCMServiceNS {
baseTicksForRelFreqHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreFreqNom, s, false));
thermalHeadroomHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreHeadroom, s, false));
CoreC0StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC0, s, false));
+ CoreC1StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC1, s, false));
CoreC3StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC3, s, false));
CoreC6StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC6, s, false));
CoreC7StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC7, s, false));
@@ -261,10 +278,14 @@ namespace PCMServiceNS {
mwbHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketBandWrite, s, false));
packageEnergyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketEnergyPack, s, false));
DRAMEnergyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketEnergyDram, s, false));
+ PackageC0StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC0, s, false));
PackageC2StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC2, s, false));
PackageC3StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC3, s, false));
PackageC6StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC6, s, false));
PackageC7StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC7, s, false));
+ PackageC8StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC8, s, false));
+ PackageC9StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC9, s, false));
+ PackageC10StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC10, s, false));
}
if (collectionInformation_->qpi)
@@ -325,6 +346,7 @@ namespace PCMServiceNS {
((PerformanceCounter^)baseTicksForRelFreqHash_[s])->IncrementBy(totalRefTicks >> 17);
((PerformanceCounter^)thermalHeadroomHash_[s])->RawValue = systemState.getThermalHeadroom();
((PerformanceCounter^)CoreC0StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(0,oldSystemState, systemState));
+ ((PerformanceCounter^)CoreC1StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(1,oldSystemState, systemState));
((PerformanceCounter^)CoreC3StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(3,oldSystemState, systemState));
((PerformanceCounter^)CoreC6StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(6,oldSystemState, systemState));
((PerformanceCounter^)CoreC7StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(7,oldSystemState, systemState));
@@ -338,10 +360,14 @@ namespace PCMServiceNS {
((PerformanceCounter^)mwbHash_[s])->RawValue = toBW(getBytesWrittenToMC(oldSystemState, systemState));
((PerformanceCounter^)packageEnergyHash_[s])->RawValue = (__int64)getConsumedJoules(oldSystemState, systemState);
((PerformanceCounter^)DRAMEnergyHash_[s])->RawValue = (__int64)getDRAMConsumedJoules(oldSystemState, systemState);
+ ((PerformanceCounter^)PackageC0StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(0, oldSystemState, systemState));
((PerformanceCounter^)PackageC2StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(2, oldSystemState, systemState));
((PerformanceCounter^)PackageC3StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(3, oldSystemState, systemState));
((PerformanceCounter^)PackageC6StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(6, oldSystemState, systemState));
((PerformanceCounter^)PackageC7StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(7, oldSystemState, systemState));
+ ((PerformanceCounter^)PackageC8StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(8, oldSystemState, systemState));
+ ((PerformanceCounter^)PackageC9StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(9, oldSystemState, systemState));
+ ((PerformanceCounter^)PackageC10StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(10, oldSystemState, systemState));
}
if (collectionInformation_->qpi)
@@ -369,6 +395,7 @@ namespace PCMServiceNS {
((PerformanceCounter^)baseTicksForRelFreqHash_[s])->IncrementBy(socketRefTicks >> 17);
((PerformanceCounter^)thermalHeadroomHash_[s])->RawValue = socketState.getThermalHeadroom();
((PerformanceCounter^)CoreC0StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(0, oldSocketStates[i], socketState));
+ ((PerformanceCounter^)CoreC1StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(1, oldSocketStates[i], socketState));
((PerformanceCounter^)CoreC3StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(3, oldSocketStates[i], socketState));
((PerformanceCounter^)CoreC6StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(6, oldSocketStates[i], socketState));
((PerformanceCounter^)CoreC7StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(7, oldSocketStates[i], socketState));
@@ -380,10 +407,14 @@ namespace PCMServiceNS {
((PerformanceCounter^)mwbHash_[s])->RawValue = toBW(getBytesWrittenToMC(oldSocketStates[i], socketState));
((PerformanceCounter^)packageEnergyHash_[s])->RawValue = (__int64)getConsumedJoules(oldSocketStates[i], socketState);
((PerformanceCounter^)DRAMEnergyHash_[s])->RawValue = (__int64)getDRAMConsumedJoules(oldSocketStates[i], socketState);
+ ((PerformanceCounter^)PackageC0StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(0,oldSocketStates[i], socketState));
((PerformanceCounter^)PackageC2StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(2,oldSocketStates[i], socketState));
((PerformanceCounter^)PackageC3StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(3,oldSocketStates[i], socketState));
((PerformanceCounter^)PackageC6StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(6,oldSocketStates[i], socketState));
((PerformanceCounter^)PackageC7StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(7,oldSocketStates[i], socketState));
+ ((PerformanceCounter^)PackageC8StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(8,oldSocketStates[i], socketState));
+ ((PerformanceCounter^)PackageC9StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(9,oldSocketStates[i], socketState));
+ ((PerformanceCounter^)PackageC10StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(10,oldSocketStates[i], socketState));
}
if (collectionInformation_->qpi)
@@ -419,6 +450,7 @@ namespace PCMServiceNS {
((PerformanceCounter^)baseTicksForRelFreqHash_[s])->IncrementBy(refTicks >> 17);
((PerformanceCounter^)thermalHeadroomHash_[s])->RawValue = coreState.getThermalHeadroom();
((PerformanceCounter^)CoreC0StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(0,oldCoreStates[i], coreState));
+ ((PerformanceCounter^)CoreC1StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(1,oldCoreStates[i], coreState));
((PerformanceCounter^)CoreC3StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(3,oldCoreStates[i], coreState));
((PerformanceCounter^)CoreC6StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(6,oldCoreStates[i], coreState));
((PerformanceCounter^)CoreC7StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(7,oldCoreStates[i], coreState));
@@ -464,13 +496,18 @@ namespace PCMServiceNS {
System::Collections::Hashtable thermalHeadroomHash_;
// C-state Residencies
System::Collections::Hashtable CoreC0StateResidencyHash_;
+ System::Collections::Hashtable CoreC1StateResidencyHash_;
System::Collections::Hashtable CoreC3StateResidencyHash_;
System::Collections::Hashtable CoreC6StateResidencyHash_;
System::Collections::Hashtable CoreC7StateResidencyHash_;
+ System::Collections::Hashtable PackageC0StateResidencyHash_;
System::Collections::Hashtable PackageC2StateResidencyHash_;
System::Collections::Hashtable PackageC3StateResidencyHash_;
System::Collections::Hashtable PackageC6StateResidencyHash_;
System::Collections::Hashtable PackageC7StateResidencyHash_;
+ System::Collections::Hashtable PackageC8StateResidencyHash_;
+ System::Collections::Hashtable PackageC9StateResidencyHash_;
+ System::Collections::Hashtable PackageC10StateResidencyHash_;
System::Diagnostics::EventLog^ log_;
@@ -491,6 +528,7 @@ namespace PCMServiceNS {
initonly String^ MetricCoreFreqNom = gcnew String(L"Nominal Frequency");
initonly String^ MetricCoreHeadroom = gcnew String(L"Thermal Headroom below TjMax");
initonly String^ MetricCoreResC0 = gcnew String(L"core C0-state residency (%)");
+ initonly String^ MetricCoreResC1 = gcnew String(L"core C1-state residency (%)");
initonly String^ MetricCoreResC3 = gcnew String(L"core C3-state residency (%)");
initonly String^ MetricCoreResC6 = gcnew String(L"core C6-state residency (%)");
initonly String^ MetricCoreResC7 = gcnew String(L"core C7-state residency (%)");
@@ -499,10 +537,14 @@ namespace PCMServiceNS {
initonly String^ MetricSocketBandWrite = gcnew String(L"Memory Write Bandwidth");
initonly String^ MetricSocketEnergyPack = gcnew String(L"Package/Socket Consumed Energy");
initonly String^ MetricSocketEnergyDram = gcnew String(L"DRAM/Memory Consumed Energy");
+ initonly String^ MetricSocketResC0 = gcnew String(L"package C0-state residency (%)");
initonly String^ MetricSocketResC2 = gcnew String(L"package C2-state residency (%)");
initonly String^ MetricSocketResC3 = gcnew String(L"package C3-state residency (%)");
initonly String^ MetricSocketResC6 = gcnew String(L"package C6-state residency (%)");
initonly String^ MetricSocketResC7 = gcnew String(L"package C7-state residency (%)");
+ initonly String^ MetricSocketResC8 = gcnew String(L"package C8-state residency (%)");
+ initonly String^ MetricSocketResC9 = gcnew String(L"package C9-state residency (%)");
+ initonly String^ MetricSocketResC10 = gcnew String(L"package C10-state residency (%)");
initonly String^ MetricQpiBand = gcnew String(L"QPI Link Bandwidth");
diff --git a/PCM_Win/pcm.vcxproj b/PCM_Win/pcm.vcxproj
index 11f29ef6..990cc7d9 100644
--- a/PCM_Win/pcm.vcxproj
+++ b/PCM_Win/pcm.vcxproj
@@ -156,7 +156,8 @@
-
+
+
@@ -172,7 +173,8 @@
-
+
+
diff --git a/PMURegisterDeclarations/GenuineIntel-6-55-4.json b/PMURegisterDeclarations/GenuineIntel-6-55-4.json
index 2729d142..eb796497 100644
--- a/PMURegisterDeclarations/GenuineIntel-6-55-4.json
+++ b/PMURegisterDeclarations/GenuineIntel-6-55-4.json
@@ -98,6 +98,14 @@
"Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}
}
},
+ "irp" : {
+ "programmable" : {
+ "EventCode": {"Config": 0, "Position": 0, "Width": 8},
+ "UMask": {"Config": 0, "Position": 8, "Width": 8},
+ "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0},
+ "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}
+ }
+ },
"iio" : {
"programmable" : {
"EventCode": {"Config": 0, "Position": 0, "Width": 8},
diff --git a/PMURegisterDeclarations/GenuineIntel-6-55-7.json b/PMURegisterDeclarations/GenuineIntel-6-55-7.json
index 2729d142..eb796497 100644
--- a/PMURegisterDeclarations/GenuineIntel-6-55-7.json
+++ b/PMURegisterDeclarations/GenuineIntel-6-55-7.json
@@ -98,6 +98,14 @@
"Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}
}
},
+ "irp" : {
+ "programmable" : {
+ "EventCode": {"Config": 0, "Position": 0, "Width": 8},
+ "UMask": {"Config": 0, "Position": 8, "Width": 8},
+ "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0},
+ "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}
+ }
+ },
"iio" : {
"programmable" : {
"EventCode": {"Config": 0, "Position": 0, "Width": 8},
diff --git a/PMURegisterDeclarations/GenuineIntel-6-6A-6.json b/PMURegisterDeclarations/GenuineIntel-6-6A-6.json
index 56bb9368..afb293fd 100644
--- a/PMURegisterDeclarations/GenuineIntel-6-6A-6.json
+++ b/PMURegisterDeclarations/GenuineIntel-6-6A-6.json
@@ -106,6 +106,14 @@
"Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}
}
},
+ "irp" : {
+ "programmable" : {
+ "EventCode": {"Config": 0, "Position": 0, "Width": 8},
+ "UMask": {"Config": 0, "Position": 8, "Width": 8},
+ "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0},
+ "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}
+ }
+ },
"pcu" : {
"programmable" : {
"EventCode": {"Config": 0, "Position": 0, "Width": 8},
diff --git a/build_solution.bat b/build_solution.bat
index e81e2b9a..77547709 100644
--- a/build_solution.bat
+++ b/build_solution.bat
@@ -1,9 +1,9 @@
REM change path to your VCVARS.BAT
-CALL "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\x86_amd64\vcvarsx86_amd64.bat"
-SET "PATH=C:\Program Files (x86)\MSBuild\14.0\Bin\amd64;%PATH%"
-REM CALL "c:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\VC\Auxiliary\Build\vcvarsamd64_x86.bat"
-REM SET "PATH=C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\MSBuild\15.0\Bin\amd64;%PATH%"
+REM CALL "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\x86_amd64\vcvarsx86_amd64.bat"
+REM SET "PATH=C:\Program Files (x86)\MSBuild\14.0\Bin\amd64;%PATH%"
+CALL "c:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\VC\Auxiliary\Build\vcvarsamd64_x86.bat"
+SET "PATH=C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\MSBuild\15.0\Bin\amd64;%PATH%"
msbuild pcm-all.sln /p:Configuration=Release;Platform=x64 /t:Clean,Build /m
diff --git a/bw.cpp b/bw.cpp
index d96b1abf..4d235497 100644
--- a/bw.cpp
+++ b/bw.cpp
@@ -74,7 +74,9 @@ namespace pcm {
return result;
}
-#define PCM_CLIENT_IMC_DRAM_IO_REQESTS (0x5048)
+#define PCM_CLIENT_IMC_DRAM_GT_REQUESTS (0x5040)
+#define PCM_CLIENT_IMC_DRAM_IA_REQUESTS (0x5044)
+#define PCM_CLIENT_IMC_DRAM_IO_REQUESTS (0x5048)
#define PCM_CLIENT_IMC_DRAM_DATA_READS (0x5050)
#define PCM_CLIENT_IMC_DRAM_DATA_WRITES (0x5054)
#define PCM_CLIENT_IMC_MMAP_SIZE (0x6000)
@@ -95,9 +97,19 @@ uint64 ClientBW::getImcWrites()
return mmioRange->read32(PCM_CLIENT_IMC_DRAM_DATA_WRITES - PCM_CLIENT_IMC_EVENT_BASE);
}
+uint64 ClientBW::getGtRequests()
+{
+ return mmioRange->read32(PCM_CLIENT_IMC_DRAM_GT_REQUESTS - PCM_CLIENT_IMC_EVENT_BASE);
+}
+
+uint64 ClientBW::getIaRequests()
+{
+ return mmioRange->read32(PCM_CLIENT_IMC_DRAM_IA_REQUESTS - PCM_CLIENT_IMC_EVENT_BASE);
+}
+
uint64 ClientBW::getIoRequests()
{
- return mmioRange->read32(PCM_CLIENT_IMC_DRAM_IO_REQESTS - PCM_CLIENT_IMC_EVENT_BASE);
+ return mmioRange->read32(PCM_CLIENT_IMC_DRAM_IO_REQUESTS - PCM_CLIENT_IMC_EVENT_BASE);
}
#define PCM_SERVER_IMC_DRAM_DATA_READS (0x2290)
diff --git a/bw.h b/bw.h
index e4e1d173..3ae83d34 100644
--- a/bw.h
+++ b/bw.h
@@ -32,6 +32,8 @@ namespace pcm {
public:
virtual uint64 getImcReads() { return 0; }
virtual uint64 getImcWrites() { return 0; }
+ virtual uint64 getGtRequests() { return 0; }
+ virtual uint64 getIaRequests() { return 0; }
virtual uint64 getIoRequests() { return 0; }
virtual uint64 getPMMReads() { return 0; }
virtual uint64 getPMMWrites() { return 0; }
@@ -57,6 +59,8 @@ namespace pcm {
uint64 getImcReads() override;
uint64 getImcWrites() override;
+ uint64 getGtRequests() override;
+ uint64 getIaRequests() override;
uint64 getIoRequests() override;
};
diff --git a/cpucounters.cpp b/cpucounters.cpp
index 8ca15670..851d80bf 100644
--- a/cpucounters.cpp
+++ b/cpucounters.cpp
@@ -528,6 +528,18 @@ int32 PCM::getMaxCustomCoreEvents()
return core_gen_counter_num_max;
}
+int PCM::getCPUModelFromCPUID()
+{
+ static int result = -1;
+ if (result < 0)
+ {
+ PCM_CPUID_INFO cpuinfo;
+ pcm_cpuid(1, cpuinfo);
+ result = (((cpuinfo.array[0]) & 0xf0) >> 4) | ((cpuinfo.array[0] & 0xf0000) >> 12);
+ }
+ return result;
+}
+
bool PCM::detectModel()
{
char buffer[1024];
@@ -1542,8 +1554,7 @@ bool PCM::initMSR()
#ifdef _MSC_VER
std::cerr << "You must have signed msr.sys driver in your current directory and have administrator rights to run this program.\n";
#elif defined(__linux__)
- std::cerr << "Try to execute 'modprobe msr' as root user and then\n";
- std::cerr << "you also must have read and write permissions for /dev/cpu/*/msr devices (/dev/msr* for Android). The 'chown' command can help.\n";
+ std::cerr << "execute 'modprobe msr' as root user, then execute pcm as root user.\n";
#elif defined(__FreeBSD__) || defined(__DragonFly__)
std::cerr << "Ensure cpuctl module is loaded and that you have read and write\n";
std::cerr << "permissions for /dev/cpuctl* devices (the 'chown' command can help).\n";
@@ -1711,6 +1722,10 @@ void PCM::initUncoreObjects()
new CounterWidthExtender::ClientImcReadsCounter(clientBW), 32, 10000);
clientImcWrites = std::make_shared(
new CounterWidthExtender::ClientImcWritesCounter(clientBW), 32, 10000);
+ clientGtRequests = std::make_shared(
+ new CounterWidthExtender::ClientGtRequestsCounter(clientBW), 32, 10000);
+ clientIaRequests = std::make_shared(
+ new CounterWidthExtender::ClientIaRequestsCounter(clientBW), 32, 10000);
clientIoRequests = std::make_shared(
new CounterWidthExtender::ClientIoRequestsCounter(clientBW), 32, 10000);
@@ -1936,6 +1951,56 @@ void PCM::initUncorePMUsDirect()
}
}
+ // init IRP PMU
+ int irpStacks = 0;
+ size_t IRP_CTL_REG_OFFSET = 0;
+ size_t IRP_CTR_REG_OFFSET = 0;
+ const uint32* IRP_UNIT_CTL = nullptr;
+
+ switch (getCPUModel())
+ {
+ case SKX:
+ irpStacks = SKX_IIO_STACK_COUNT;
+ IRP_CTL_REG_OFFSET = SKX_IRP_CTL_REG_OFFSET;
+ IRP_CTR_REG_OFFSET = SKX_IRP_CTR_REG_OFFSET;
+ IRP_UNIT_CTL = SKX_IRP_UNIT_CTL;
+ break;
+ case ICX:
+ irpStacks = ICX_IIO_STACK_COUNT;
+ IRP_CTL_REG_OFFSET = ICX_IRP_CTL_REG_OFFSET;
+ IRP_CTR_REG_OFFSET = ICX_IRP_CTR_REG_OFFSET;
+ IRP_UNIT_CTL = ICX_IRP_UNIT_CTL;
+ break;
+ case SNOWRIDGE:
+ irpStacks = SNR_IIO_STACK_COUNT;
+ IRP_CTL_REG_OFFSET = SNR_IRP_CTL_REG_OFFSET;
+ IRP_CTR_REG_OFFSET = SNR_IRP_CTR_REG_OFFSET;
+ IRP_UNIT_CTL = SNR_IRP_UNIT_CTL;
+ break;
+ }
+ if (IRP_UNIT_CTL)
+ {
+ irpPMUs.resize(num_sockets);
+ for (uint32 s = 0; s < (uint32)num_sockets; ++s)
+ {
+ auto& handle = MSR[socketRefCore[s]];
+ for (int unit = 0; unit < irpStacks; ++unit)
+ {
+ irpPMUs[s][unit] = UncorePMU(
+ std::make_shared(handle, IRP_UNIT_CTL[unit]),
+ std::make_shared(handle, IRP_UNIT_CTL[unit] + IRP_CTL_REG_OFFSET + 0),
+ std::make_shared(handle, IRP_UNIT_CTL[unit] + IRP_CTL_REG_OFFSET + 1),
+ std::shared_ptr(),
+ std::shared_ptr(),
+ std::make_shared(handle, IRP_UNIT_CTL[unit] + IRP_CTR_REG_OFFSET + 0),
+ std::make_shared(handle, IRP_UNIT_CTL[unit] + IRP_CTR_REG_OFFSET + 1),
+ std::shared_ptr(),
+ std::shared_ptr()
+ );
+ }
+ }
+ }
+
if (hasPCICFGUncore() && MSR.size())
{
cboPMUs.resize(num_sockets);
@@ -1981,6 +2046,7 @@ void PCM::initUncorePMUsPerf()
{
#ifdef PCM_USE_PERF
iioPMUs.resize(num_sockets);
+ irpPMUs.resize(num_sockets);
cboPMUs.resize(num_sockets);
for (uint32 s = 0; s < (uint32)num_sockets; ++s)
{
@@ -1988,12 +2054,17 @@ void PCM::initUncorePMUsPerf()
populatePerfPMUs(s, enumeratePerfPMUs("ubox", 100), uboxPMUs, true);
populatePerfPMUs(s, enumeratePerfPMUs("cbox", 100), cboPMUs[s], false, true, true);
populatePerfPMUs(s, enumeratePerfPMUs("cha", 200), cboPMUs[s], false, true, true);
- std::vector iioPMUVector;
- populatePerfPMUs(s, enumeratePerfPMUs("iio", 100), iioPMUVector, false);
- for (size_t i = 0; i < iioPMUVector.size(); ++i)
+ auto populateMapPMUs = [&s](const std::string& type, std::vector > & out)
{
- iioPMUs[s][i] = iioPMUVector[i];
- }
+ std::vector PMUVector;
+ populatePerfPMUs(s, enumeratePerfPMUs(type, 100), PMUVector, false);
+ for (size_t i = 0; i < PMUVector.size(); ++i)
+ {
+ out[s][i] = PMUVector[i];
+ }
+ };
+ populateMapPMUs("iio", iioPMUs);
+ populateMapPMUs("irp", irpPMUs);
}
#endif
}
@@ -2002,9 +2073,9 @@ void PCM::initUncorePMUsPerf()
#define PCM_NMI_WATCHDOG_PATH "/proc/sys/kernel/nmi_watchdog"
-bool isNMIWatchdogEnabled()
+bool isNMIWatchdogEnabled(const bool silent)
{
- const auto watchdog = readSysFS(PCM_NMI_WATCHDOG_PATH);
+ const auto watchdog = readSysFS(PCM_NMI_WATCHDOG_PATH, silent);
if (watchdog.length() == 0)
{
return false;
@@ -2343,7 +2414,7 @@ perf_event_attr PCM_init_perf_event_attr(bool group = true)
PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter_, const bool silent)
{
#ifdef __linux__
- if (isNMIWatchdogEnabled())
+ if (isNMIWatchdogEnabled(silent))
{
disableNMIWatchdog(silent);
needToRestoreNMIWatchdog = true;
@@ -2975,9 +3046,9 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
perf_event_attr e = PCM_init_perf_event_attr();
e.type = PERF_TYPE_RAW;
e.config = (1ULL << 63ULL) + event_select_reg.value;
- if (event_select_reg.fields.event_select == OFFCORE_RESPONSE_0_EVTNR && event_select_reg.fields.umask == OFFCORE_RESPONSE_0_UMASK)
+ if (event_select_reg.fields.event_select == getOCREventNr(0, i).first && event_select_reg.fields.umask == getOCREventNr(0, i).second)
e.config1 = pExtDesc->OffcoreResponseMsrValue[0];
- if (event_select_reg.fields.event_select == OFFCORE_RESPONSE_1_EVTNR && event_select_reg.fields.umask == OFFCORE_RESPONSE_1_UMASK)
+ if (event_select_reg.fields.event_select == getOCREventNr(1, i).first && event_select_reg.fields.umask == getOCREventNr(1, i).second)
e.config1 = pExtDesc->OffcoreResponseMsrValue[1];
if (event_select_reg.fields.event_select == LOAD_LATENCY_EVTNR && event_select_reg.fields.umask == LOAD_LATENCY_UMASK)
@@ -3751,6 +3822,13 @@ void PCM::cleanupUncorePMUs(const bool silent)
pmu.second.cleanup();
}
}
+ for (auto& sPMUs : irpPMUs)
+ {
+ for (auto& pmu : sPMUs)
+ {
+ pmu.second.cleanup();
+ }
+ }
for (auto & sCBOPMUs : cboPMUs)
{
for (auto & pmu : sCBOPMUs)
@@ -4218,10 +4296,13 @@ void BasicCounterState::readAndAggregate(std::shared_ptr msr)
cBackendBoundSlots = extract_bits(perfMetrics, 24, 31);
cRetiringSlots = extract_bits(perfMetrics, 0, 7);
const double total = double(cFrontendBoundSlots + cBadSpeculationSlots + cBackendBoundSlots + cRetiringSlots);
- cFrontendBoundSlots = m->FrontendBoundSlots[core_id] += uint64((double(cFrontendBoundSlots) / total) * double(slots));
- cBadSpeculationSlots = m->BadSpeculationSlots[core_id] += uint64((double(cBadSpeculationSlots) / total) * double(slots));
- cBackendBoundSlots = m->BackendBoundSlots[core_id] += uint64((double(cBackendBoundSlots) / total) * double(slots));
- cRetiringSlots = m->RetiringSlots[core_id] += uint64((double(cRetiringSlots) / total) * double(slots));
+ if (total != 0)
+ {
+ cFrontendBoundSlots = m->FrontendBoundSlots[core_id] += uint64((double(cFrontendBoundSlots) / total) * double(slots));
+ cBadSpeculationSlots = m->BadSpeculationSlots[core_id] += uint64((double(cBadSpeculationSlots) / total) * double(slots));
+ cBackendBoundSlots = m->BackendBoundSlots[core_id] += uint64((double(cBackendBoundSlots) / total) * double(slots));
+ cRetiringSlots = m->RetiringSlots[core_id] += uint64((double(cRetiringSlots) / total) * double(slots));
+ }
cAllSlotsRaw = m->AllSlotsRaw[core_id] += slots;
// std::cout << "DEBUG: "<< slots << " " << cFrontendBoundSlots << " " << cBadSpeculationSlots << " " << cBackendBoundSlots << " " << cRetiringSlots << std::endl;
msr->unlock();
@@ -4609,6 +4690,10 @@ PCM::ErrorCode PCM::program(const RawPMUConfigs& curPMUConfigs_, const bool sile
}
programCboRaw(events64, filter0, filter1);
}
+ else if (type == "irp")
+ {
+ programIRPCounters(events64);
+ }
else if (type == "iio")
{
programIIOCounters(events64);
@@ -4627,6 +4712,10 @@ void PCM::freezeServerUncoreCounters()
for (int i = 0; (i < (int)server_pcicfg_uncore.size()) && MSR.size(); ++i)
{
server_pcicfg_uncore[i]->freezeCounters();
+
+ const auto refCore = socketRefCore[i];
+ TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
+
pcuPMUs[i].freeze(UNC_PMON_UNIT_CTL_FRZ_EN);
if (IIOEventsAvailable())
@@ -4637,11 +4726,20 @@ void PCM::freezeServerUncoreCounters()
}
}
- const auto refCore = socketRefCore[i];
- TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
- for (auto & pmu : cboPMUs[i])
+ if (size_t(i) < irpPMUs.size())
{
- pmu.freeze(UNC_PMON_UNIT_CTL_FRZ_EN);
+ for (auto& pmu : irpPMUs[i])
+ {
+ pmu.second.freeze(UNC_PMON_UNIT_CTL_RSV);
+ }
+ }
+
+ if (size_t(i) < cboPMUs.size())
+ {
+ for (auto& pmu : cboPMUs[i])
+ {
+ pmu.freeze(UNC_PMON_UNIT_CTL_FRZ_EN);
+ }
}
}
}
@@ -4650,6 +4748,10 @@ void PCM::unfreezeServerUncoreCounters()
for (int i = 0; (i < (int)server_pcicfg_uncore.size()) && MSR.size(); ++i)
{
server_pcicfg_uncore[i]->unfreezeCounters();
+
+ const auto refCore = socketRefCore[i];
+ TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
+
pcuPMUs[i].unfreeze(UNC_PMON_UNIT_CTL_FRZ_EN);
if (IIOEventsAvailable())
@@ -4660,11 +4762,20 @@ void PCM::unfreezeServerUncoreCounters()
}
}
- const auto refCore = socketRefCore[i];
- TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
- for (auto & pmu : cboPMUs[i])
+ if (size_t(i) < irpPMUs.size())
+ {
+ for (auto& pmu : irpPMUs[i])
+ {
+ pmu.second.unfreeze(UNC_PMON_UNIT_CTL_RSV);
+ }
+ }
+
+ if (size_t(i) < cboPMUs.size())
{
- pmu.unfreeze(UNC_PMON_UNIT_CTL_FRZ_EN);
+ for (auto& pmu : cboPMUs[i])
+ {
+ pmu.unfreeze(UNC_PMON_UNIT_CTL_FRZ_EN);
+ }
}
}
}
@@ -4811,6 +4922,8 @@ void PCM::readAndAggregateUncoreMCCounters(const uint32 socket, CounterStateType
{
result.UncMCNormalReads += clientImcReads->read();
result.UncMCFullWrites += clientImcWrites->read();
+ result.UncMCGTRequests += clientGtRequests->read();
+ result.UncMCIARequests += clientIaRequests->read();
result.UncMCIORequests += clientIoRequests->read();
}
else
@@ -5265,6 +5378,16 @@ ServerUncoreCounterState PCM::getServerUncoreCounterState(uint32 socket)
result.IIOCounter[stack][i] = *(iioPMUs[socket][stack].counterValue[i]);
}
}
+ for (uint32 stack = 0; socket < irpPMUs.size() && stack < irpPMUs[socket].size() && stack < ServerUncoreCounterState::maxIIOStacks; ++stack)
+ {
+ for (int i = 0; i < ServerUncoreCounterState::maxCounters; ++i)
+ {
+ if (irpPMUs[socket][stack].counterValue[i].get())
+ {
+ result.IRPCounter[stack][i] = *(irpPMUs[socket][stack].counterValue[i]);
+ }
+ }
+ }
for (int i = 0; i < 2 && socket < uboxPMUs.size(); ++i)
{
result.UBOXCounter[i] = *(uboxPMUs[socket].counterValue[i]);
@@ -5937,6 +6060,10 @@ void ServerPCICFGUncore::initDirect(uint32 socket_, const PCM * pcm)
if (cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::ICX)
{
numChannels = 2;
+ if (PCM::getCPUModelFromCPUID() == PCM::ICX_D)
+ {
+ numChannels = 3;
+ }
}
if (numChannels > 0)
@@ -7386,6 +7513,7 @@ uint32 PCM::getMaxNumOfIIOStacks() const
{
if (iioPMUs.size() > 0)
{
+ assert(iioPMUs[0].size() == irpPMUs[0].size());
return (uint32)iioPMUs[0].size();
}
return 0;
@@ -7464,6 +7592,41 @@ void PCM::programIIOCounters(uint64 rawEvents[4], int IIOStack)
}
}
+void PCM::programIRPCounters(uint64 rawEvents[4], int IIOStack)
+{
+ std::vector IIO_units;
+ if (IIOStack == -1)
+ {
+ for (uint32 stack = 0; stack < getMaxNumOfIIOStacks(); ++stack)
+ {
+ IIO_units.push_back(stack);
+ }
+ }
+ else
+ {
+ IIO_units.push_back(IIOStack);
+ }
+
+ for (int32 i = 0; (i < num_sockets) && MSR.size() && irpPMUs.size(); ++i)
+ {
+ uint32 refCore = socketRefCore[i];
+ TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
+
+ for (const auto& unit : IIO_units)
+ {
+ if (irpPMUs[i].count(unit) == 0)
+ {
+ std::cerr << "IRP PMU unit (stack) " << unit << " is not found \n";
+ continue;
+ }
+ auto& pmu = irpPMUs[i][unit];
+ pmu.initFreeze(UNC_PMON_UNIT_CTL_RSV);
+
+ program(pmu, &rawEvents[0], &rawEvents[2], UNC_PMON_UNIT_CTL_RSV);
+ }
+ }
+}
+
void PCM::programPCIeEventGroup(eventGroup_t &eventGroup)
{
assert(eventGroup.size() > 0);
diff --git a/cpucounters.h b/cpucounters.h
index 33bea7cf..5f4f8a72 100644
--- a/cpucounters.h
+++ b/cpucounters.h
@@ -605,6 +605,7 @@ class PCM_API PCM
std::vector > server_pcicfg_uncore;
std::vector pcuPMUs;
std::vector > iioPMUs;
+ std::vector > irpPMUs;
std::vector uboxPMUs;
double joulesPerEnergyUnit;
std::vector > energy_status;
@@ -621,6 +622,8 @@ class PCM_API PCM
std::shared_ptr clientBW;
std::shared_ptr clientImcReads;
std::shared_ptr clientImcWrites;
+ std::shared_ptr clientGtRequests;
+ std::shared_ptr clientIaRequests;
std::shared_ptr clientIoRequests;
std::vector > serverBW;
@@ -1183,6 +1186,39 @@ class PCM_API PCM
typedef std::map RawPMUConfigs;
ErrorCode program(const RawPMUConfigs& curPMUConfigs, const bool silent = false);
+ std::pair getOCREventNr(const int event, const unsigned coreID) const
+ {
+ assert (coreID < topology.size());
+ if (hybrid)
+ {
+ switch (cpu_model)
+ {
+ case ADL:
+ if (topology[coreID].core_type == TopologyEntry::Atom)
+ {
+ return std::make_pair(OFFCORE_RESPONSE_0_EVTNR, event + 1);
+ }
+ break;
+ }
+ }
+ bool useGLCOCREvent = false;
+ switch (cpu_model)
+ {
+ case ADL: // ADL big core (GLC)
+ useGLCOCREvent = true;
+ break;
+ }
+ switch (event)
+ {
+ case 0:
+ return std::make_pair(useGLCOCREvent ? GLC_OFFCORE_RESPONSE_0_EVTNR : OFFCORE_RESPONSE_0_EVTNR, OFFCORE_RESPONSE_0_UMASK);
+ case 1:
+ return std::make_pair(useGLCOCREvent ? GLC_OFFCORE_RESPONSE_1_EVTNR : OFFCORE_RESPONSE_1_EVTNR, OFFCORE_RESPONSE_1_UMASK);
+ }
+ assert (false && "wrong event nr in getOCREventNr");
+ return std::make_pair(0U, 0U);
+ }
+
//! \brief Freezes uncore event counting (works only on microarchitecture codename SandyBridge-EP and IvyTown)
void freezeServerUncoreCounters();
@@ -1317,6 +1353,10 @@ class PCM_API PCM
*/
int32 getMaxCustomCoreEvents();
+ /*! \brief Returns cpu model id number from cpuid instruction
+ */
+ static int getCPUModelFromCPUID();
+
//! \brief Identifiers of supported CPU models
enum SupportedCPUModels
{
@@ -1741,6 +1781,11 @@ class PCM_API PCM
//! \param IIOStack id of the IIO stack to program (-1 for all, if parameter omitted)
void programIIOCounters(uint64 rawEvents[4], int IIOStack = -1);
+ //! \brief Program uncore IRP events
+ //! \param rawEvents events to program (raw format)
+ //! \param IIOStack id of the IIO stack to program (-1 for all, if parameter omitted)
+ void programIRPCounters(uint64 rawEvents[4], int IIOStack = -1);
+
//! \brief Get the state of IIO counter
//! \param socket socket of the IIO stack
//! \param IIOStack id of the IIO stack
@@ -2440,7 +2485,6 @@ uint64 getCBOCounter(uint32 cbo, uint32 counter, const CounterStateType& before,
/*! \brief Direct read of UBOX PMU counter (counter meaning depends on the programming: power/performance/etc)
\param counter counter number
- \param cbo cbo or cha number
\param before CPU counter state before the experiment
\param after CPU counter state after the experiment
*/
@@ -2452,7 +2496,7 @@ uint64 getUBOXCounter(uint32 counter, const CounterStateType& before, const Coun
/*! \brief Direct read of IIO PMU counter (counter meaning depends on the programming: power/performance/etc)
\param counter counter number
- \param cbo IIO stack number
+ \param stack IIO stack number
\param before CPU counter state before the experiment
\param after CPU counter state after the experiment
*/
@@ -2462,6 +2506,18 @@ uint64 getIIOCounter(uint32 stack, uint32 counter, const CounterStateType& befor
return after.IIOCounter[stack][counter] - before.IIOCounter[stack][counter];
}
+/*! \brief Direct read of IRP PMU counter (counter meaning depends on the programming: power/performance/etc)
+ \param counter counter number
+ \param stack IIO stack number
+ \param before CPU counter state before the experiment
+ \param after CPU counter state after the experiment
+*/
+template
+uint64 getIRPCounter(uint32 stack, uint32 counter, const CounterStateType& before, const CounterStateType& after)
+{
+ return after.IRPCounter[stack][counter] - before.IRPCounter[stack][counter];
+}
+
/*! \brief Direct read of UPI or QPI PMU counter (counter meaning depends on the programming: power/performance/etc)
\param counter counter number
\param port UPI/QPI port number
@@ -2639,6 +2695,10 @@ class UncoreCounterState
template
friend uint64 getBytesWrittenToEDC(const CounterStateType & before, const CounterStateType & after);
template
+ friend uint64 getGTRequestBytesFromMC(const CounterStateType & before, const CounterStateType & after);
+ template
+ friend uint64 getIARequestBytesFromMC(const CounterStateType & before, const CounterStateType & after);
+ template
friend uint64 getIORequestBytesFromMC(const CounterStateType & before, const CounterStateType & after);
template
friend uint64 getConsumedEnergy(const CounterStateType & before, const CounterStateType & after);
@@ -2668,6 +2728,8 @@ class UncoreCounterState
uint64 UncPMMReads;
uint64 UncEDCFullWrites;
uint64 UncEDCNormalReads;
+ uint64 UncMCGTRequests;
+ uint64 UncMCIARequests;
uint64 UncMCIORequests;
uint64 PackageEnergyStatus;
uint64 DRAMEnergyStatus;
@@ -2687,6 +2749,8 @@ class UncoreCounterState
UncPMMReads(0),
UncEDCFullWrites(0),
UncEDCNormalReads(0),
+ UncMCGTRequests(0),
+ UncMCIARequests(0),
UncMCIORequests(0),
PackageEnergyStatus(0),
DRAMEnergyStatus(0),
@@ -2712,6 +2776,8 @@ class UncoreCounterState
UncPMMWrites += o.UncPMMWrites;
UncEDCFullWrites += o.UncEDCFullWrites;
UncEDCNormalReads += o.UncEDCNormalReads;
+ UncMCGTRequests += o.UncMCGTRequests;
+ UncMCIARequests += o.UncMCIARequests;
UncMCIORequests += o.UncMCIORequests;
PackageEnergyStatus += o.PackageEnergyStatus;
DRAMEnergyStatus += o.DRAMEnergyStatus;
@@ -2756,6 +2822,7 @@ class ServerUncoreCounterState : public UncoreCounterState
std::array, maxXPILinks> M3UPICounter;
std::array, maxCBOs> CBOCounter;
std::array, maxIIOStacks> IIOCounter;
+ std::array, maxIIOStacks> IRPCounter;
std::array UBOXCounter;
std::array DRAMClocks;
std::array MCDRAMClocks;
@@ -2782,6 +2849,8 @@ class ServerUncoreCounterState : public UncoreCounterState
template
friend uint64 getIIOCounter(uint32 stack, uint32 counter, const CounterStateType& before, const CounterStateType& after);
template
+ friend uint64 getIRPCounter(uint32 stack, uint32 counter, const CounterStateType& before, const CounterStateType& after);
+ template
friend uint64 getXPICounter(uint32 port, uint32 counter, const CounterStateType& before, const CounterStateType& after);
template
friend uint64 getM2MCounter(uint32 controller, uint32 counter, const CounterStateType & before, const CounterStateType & after);
@@ -2808,6 +2877,7 @@ class ServerUncoreCounterState : public UncoreCounterState
M3UPICounter{{}},
CBOCounter{{}},
IIOCounter{{}},
+ IRPCounter{{}},
UBOXCounter{{}},
DRAMClocks{{}},
MCDRAMClocks{{}},
@@ -3581,6 +3651,33 @@ uint64 getBytesWrittenToEDC(const CounterStateType & before, const CounterStateT
return 0ULL;
}
+/*! \brief Computes number of bytes of read/write requests from GT engine
+
+ \param before CPU counter state before the experiment
+ \param after CPU counter state after the experiment
+ \return Number of bytes
+*/
+template
+uint64 getGTRequestBytesFromMC(const CounterStateType & before, const CounterStateType & after)
+{
+ if (PCM::getInstance()->memoryIOTrafficMetricAvailable())
+ return (after.UncMCGTRequests - before.UncMCGTRequests) * 64;
+ return 0ULL;
+}
+
+/*! \brief Computes number of bytes of read/write requests from all IA
+
+ \param before CPU counter state before the experiment
+ \param after CPU counter state after the experiment
+ \return Number of bytes
+*/
+template
+uint64 getIARequestBytesFromMC(const CounterStateType & before, const CounterStateType & after)
+{
+ if (PCM::getInstance()->memoryIOTrafficMetricAvailable())
+ return (after.UncMCIARequests - before.UncMCIARequests) * 64;
+ return 0ULL;
+}
/*! \brief Computes number of bytes of read/write requests from all IO sources
diff --git a/pcm-all.sln b/pcm-all.sln
index 00a3b8c5..0676b559 100644
--- a/pcm-all.sln
+++ b/pcm-all.sln
@@ -3,7 +3,7 @@ Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 14
VisualStudioVersion = 14.0.25420.1
MinimumVisualStudioVersion = 10.0.40219.1
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PCM-Lib", "PCM-Lib_Win\PCM-Lib.vcxproj", "{A4206CE7-A913-42ED-B3B9-F7CF5076633B}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PCM-Lib", "PCM-Lib_Win\pcm-lib.vcxproj", "{A4206CE7-A913-42ED-B3B9-F7CF5076633B}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pcm", "PCM_Win\pcm.vcxproj", "{D919CF99-5D9F-46C9-B6F0-626700E63592}"
EndProject
diff --git a/pcm-iio.cpp b/pcm-iio.cpp
index 661ccf11..591fb9c5 100644
--- a/pcm-iio.cpp
+++ b/pcm-iio.cpp
@@ -423,7 +423,7 @@ void PurleyPlatformMapping::getUboxBusNumbers(std::vector& ubox)
for (uint8_t device = 0; device < 32; device++) {
for (uint8_t function = 0; function < 8; function++) {
struct pci pci_dev;
- pci_dev.bdf.busno = bus;
+ pci_dev.bdf.busno = (uint8_t)bus;
pci_dev.bdf.devno = device;
pci_dev.bdf.funcno = function;
if (probe_pci(&pci_dev)) {
@@ -513,7 +513,7 @@ bool IPlatformMapping10Nm::getSadIdRootBusMap(uint32_t socket_id, std::map pccr(get_ccr(m, ctr.ccr));
rawEvents[ctr.idx] = pccr->get_ccr_value();
- int stacks_count = iios[0].stacks.size();
+ int stacks_count = (int)iios[0].stacks.size();
before = new IIOCounterState[iios.size() * stacks_count];
after = new IIOCounterState[iios.size() * stacks_count];
diff --git a/pcm-memory.cpp b/pcm-memory.cpp
index 4d398d0f..5d89f76b 100644
--- a/pcm-memory.cpp
+++ b/pcm-memory.cpp
@@ -161,7 +161,36 @@ void printSocketRankBWHeader(uint32 no_columns, uint32 skt)
cout << "\n";
}
-void printSocketChannelBW(PCM */*m*/, memdata_t *md, uint32 no_columns, uint32 skt)
+void printSocketRankBWHeader_cvt(const uint32 numSockets, const uint32 num_imc_channels, const int rankA, const int rankB)
+{
+ printDateForCSV(Header1);
+ for (uint32 skt = 0 ; skt < (numSockets) ; ++skt) {
+ for (uint32 channel = 0; channel < num_imc_channels; ++channel) {
+ if (rankA >= 0)
+ cout << "SKT" << skt << "," << "SKT" << skt << ",";
+ if (rankB >= 0)
+ cout << "SKT" << skt << "," << "SKT" << skt << ",";
+ }
+ }
+ cout << endl;
+
+ printDateForCSV(Header2);
+ for (uint32 skt = 0 ; skt < (numSockets) ; ++skt) {
+ for (uint32 channel = 0; channel < num_imc_channels; ++channel) {
+ if (rankA >= 0) {
+ cout << "Mem_Ch" << channel << "_R" << rankA << "_reads,"
+ << "Mem_Ch" << channel << "_R" << setw(1) << rankA << "_writes,";
+ }
+ if (rankB >= 0) {
+ cout << "Mem_Ch" << channel << "_R" << rankB << "_reads,"
+ << "Mem_Ch" << channel << "_R" << setw(1) << rankB << "_writes,";
+ }
+ }
+ }
+ cout << endl;
+}
+
+void printSocketChannelBW(PCM * /*m*/, memdata_t *md, uint32 no_columns, uint32 skt)
{
for (uint32 channel = 0; channel < max_imc_channels; ++channel) {
// check all the sockets for bad channel "channel"
@@ -221,6 +250,25 @@ void printSocketChannelBW(uint32 no_columns, uint32 skt, uint32 num_imc_channels
}
}
+void printSocketChannelBW_cvt(const uint32 numSockets, const uint32 num_imc_channels, const ServerUncoreCounterState * uncState1,
+ const ServerUncoreCounterState * uncState2, const uint64 elapsedTime, const int rankA, const int rankB)
+{
+ printDateForCSV(Data);
+ for (uint32 skt = 0 ; skt < numSockets; ++skt) {
+ for (uint32 channel = 0 ; channel < num_imc_channels ; ++channel) {
+ if(rankA >= 0) {
+ cout << (float) (getMCCounter(channel,ServerPCICFGUncore::EventPosition::READ_RANK_A,uncState1[skt],uncState2[skt]) * 64 / 1000000.0 / (elapsedTime/1000.0))
+ << "," << (float) (getMCCounter(channel,ServerPCICFGUncore::EventPosition::WRITE_RANK_A,uncState1[skt],uncState2[skt]) * 64 / 1000000.0 / (elapsedTime/1000.0)) << ",";
+ }
+ if(rankB >= 0) {
+ cout << (float) (getMCCounter(channel,ServerPCICFGUncore::EventPosition::READ_RANK_B,uncState1[skt],uncState2[skt]) * 64 / 1000000.0 / (elapsedTime/1000.0))
+ << "," << (float) (getMCCounter(channel,ServerPCICFGUncore::EventPosition::WRITE_RANK_B,uncState1[skt],uncState2[skt]) * 64 / 1000000.0 / (elapsedTime/1000.0)) << ",";
+ }
+ }
+ }
+ cout << endl;
+}
+
float AD_BW(const memdata_t *md, const uint32 skt)
{
const auto totalPMM = md->iMC_PMM_Rd_socket[skt] + md->iMC_PMM_Wr_socket[skt];
@@ -812,8 +860,8 @@ void calculate_bandwidth(PCM *m,
}
else if (metrics == PmemMemoryMode)
{
- md.iMC_PMM_MemoryMode_Miss_socket[skt] += (pmmMemoryModeCleanMisses + pmmMemoryModeDirtyMisses) / (elapsedTime / 1000.0);
- md.iMC_PMM_MemoryMode_Hit_socket[skt] += (pmmMemoryModeHits) / (elapsedTime / 1000.0);
+ md.iMC_PMM_MemoryMode_Miss_socket[skt] += (float)((pmmMemoryModeCleanMisses + pmmMemoryModeDirtyMisses) / (elapsedTime / 1000.0));
+ md.iMC_PMM_MemoryMode_Hit_socket[skt] += (float)((pmmMemoryModeHits) / (elapsedTime / 1000.0));
}
else
{
@@ -881,33 +929,35 @@ void calculate_bandwidth(PCM *m,
}
}
-void calculate_bandwidth_rank(PCM *m, const ServerUncoreCounterState uncState1[], const ServerUncoreCounterState uncState2[], const uint64 elapsedTime, const bool /*csv*/, bool & /*csvheader*/, const uint32 no_columns, const int rankA, const int rankB)
+void calculate_bandwidth_rank(PCM *m, const ServerUncoreCounterState uncState1[], const ServerUncoreCounterState uncState2[],
+ const uint64 elapsedTime, const bool csv, bool &csvheader, const uint32 no_columns, const int rankA, const int rankB)
{
uint32 skt = 0;
cout.setf(ios::fixed);
cout.precision(2);
uint32 numSockets = m->getNumSockets();
- while(skt < numSockets)
- {
- auto printRow = [&skt, &uncState1, &uncState2, &elapsedTime, &rankA, &rankB](const uint32 no_columns) {
- printSocketRankBWHeader(no_columns, skt);
- printSocketChannelBW(no_columns, skt, max_imc_channels, uncState1, uncState2, elapsedTime, rankA, rankB);
- for (uint32 i = skt; i < (no_columns + skt); ++i)
- {
- cout << "|-------------------------------------------|";
- }
- cout << "\n";
- skt += no_columns;
- };
- // Full row
- if ((skt + no_columns) <= numSockets)
- {
- printRow(no_columns);
+ if (csv) {
+ if (csvheader) {
+ printSocketRankBWHeader_cvt(numSockets, max_imc_channels, rankA, rankB);
+ csvheader = false;
}
- else //Display the remaining sockets in this row
- {
- printRow(numSockets - skt);
+ printSocketChannelBW_cvt(numSockets, max_imc_channels, uncState1, uncState2, elapsedTime, rankA, rankB);
+ } else {
+ while(skt < numSockets) {
+ auto printRow = [&skt, &uncState1, &uncState2, &elapsedTime, &rankA, &rankB](const uint32 no_columns) {
+ printSocketRankBWHeader(no_columns, skt);
+ printSocketChannelBW(no_columns, skt, max_imc_channels, uncState1, uncState2, elapsedTime, rankA, rankB);
+ for (uint32 i = skt; i < (no_columns + skt); ++i)
+ cout << "|-------------------------------------------|";
+ cout << "\n";
+ skt += no_columns;
+ };
+ // Full row
+ if ((skt + no_columns) <= numSockets)
+ printRow(no_columns);
+ else //Display the remaining sockets in this row
+ printRow(numSockets - skt);
}
}
}
@@ -1010,6 +1060,7 @@ int main(int argc, char * argv[])
}
if(rankA < 0) rankA = rank;
else if(rankB < 0) rankB = rank;
+ metrics = PartialWrites;
}
}
continue;
@@ -1140,7 +1191,7 @@ int main(int argc, char * argv[])
exit(EXIT_FAILURE);
}
- max_imc_channels = m->getMCChannelsPerSocket();
+ max_imc_channels = (pcm::uint32)m->getMCChannelsPerSocket();
ServerUncoreCounterState * BeforeState = new ServerUncoreCounterState[m->getNumSockets()];
ServerUncoreCounterState * AfterState = new ServerUncoreCounterState[m->getNumSockets()];
@@ -1165,6 +1216,9 @@ int main(int argc, char * argv[])
cerr << "Update every " << delay << " seconds\n";
+ if (csv)
+ cerr << "Read/Write values expressed in (MB/s)" << endl;
+
for(uint32 i=0; igetNumSockets(); ++i)
BeforeState[i] = m->getServerUncoreCounterState(i);
diff --git a/pcm-numa.cpp b/pcm-numa.cpp
index 811fd740..69538e5f 100644
--- a/pcm-numa.cpp
+++ b/pcm-numa.cpp
@@ -193,10 +193,10 @@ int main(int argc, char * argv[])
for (int i = 0; i < 4; ++i)
regs[i] = def_event_select_reg;
- regs[0].fields.event_select = OFFCORE_RESPONSE_0_EVTNR; // OFFCORE_RESPONSE 0 event
- regs[0].fields.umask = OFFCORE_RESPONSE_0_UMASK;
- regs[1].fields.event_select = OFFCORE_RESPONSE_1_EVTNR; // OFFCORE_RESPONSE 1 event
- regs[1].fields.umask = OFFCORE_RESPONSE_1_UMASK;
+ regs[0].fields.event_select = m->getOCREventNr(0, 0).first; // OFFCORE_RESPONSE 0 event
+ regs[0].fields.umask = m->getOCREventNr(0, 0).second;
+ regs[1].fields.event_select = m->getOCREventNr(1, 0).first; // OFFCORE_RESPONSE 1 event
+ regs[1].fields.umask = m->getOCREventNr(1, 0).second;
PCM::ErrorCode status = m->program(PCM::EXT_CUSTOM_CORE_EVENTS, &conf);
m->checkError(status);
diff --git a/pcm-pcicfg.cpp b/pcm-pcicfg.cpp
index 5a8c0916..ff8f5703 100644
--- a/pcm-pcicfg.cpp
+++ b/pcm-pcicfg.cpp
@@ -61,7 +61,7 @@ int main(int argc, char * argv[])
{
case 'w':
write = true;
- value = read_number(optarg);
+ value = (pcm::uint32)read_number(optarg);
break;
case 'd':
dec = true;
diff --git a/pcm-pcie.h b/pcm-pcie.h
index 00da6923..a70c9d9e 100644
--- a/pcm-pcie.h
+++ b/pcm-pcie.h
@@ -46,7 +46,7 @@ static uint getIdent (const string &s)
* We are adding "| " before and " " after the event name hence +5 to
* strlen(eventNames). Rest of the logic is to center the event name.
*/
- uint ident = 5 + s.size();
+ uint ident = 5 + (uint)s.size();
return (3 + ident / 2);
}
@@ -140,7 +140,7 @@ class LegacyPlatform: public IPlatform
eventNames(events), eventGroups(eventCodes)
{
int eventsCount = 0;
- for (auto &group : eventGroups) eventsCount += group.size();
+ for (auto &group : eventGroups) eventsCount += (int)group.size();
m_delay = uint32(delay * 1000 / (eventGroups.size()) / NUM_SAMPLES);
if (m_delay * eventsCount * NUM_SAMPLES < delay * 1000) ++m_delay;
@@ -180,10 +180,10 @@ inline uint64 LegacyPlatform::getEventCount (uint skt, uint idx)
uint LegacyPlatform::eventGroupOffset(eventGroup_t &eventGroup)
{
uint offset = 0;
- uint grpIdx = &eventGroup - eventGroups.data();
+ uint grpIdx = (uint)(&eventGroup - eventGroups.data());
for (auto iter = eventGroups.begin(); iter < eventGroups.begin() + grpIdx; iter++)
- offset += iter->size();
+ offset += (uint)iter->size();
return offset;
}
@@ -264,7 +264,7 @@ void LegacyPlatform::printSocketScopeEvent(uint skt, eventFilter filter, uint id
void LegacyPlatform::printSocketScopeEvents(uint skt, eventFilter filter)
{
if (!m_csv) {
- int ident = strlen("Skt |") / 2;
+ int ident = (int)strlen("Skt |") / 2;
cout << setw(ident) << skt << setw(ident) << ' ';
} else
cout << skt;
@@ -324,20 +324,20 @@ void LegacyPlatform::printAggregatedEvents()
{
if (!m_csv)
{
- uint len = strlen("Skt ");
+ uint len = (uint)strlen("Skt ");
for(auto& evt : eventNames)
- len += (5 + evt.size());
+ len += (5 + (uint)evt.size());
if (m_bandwidth)
for(auto& bw : bwNames)
- len += (5 + bw.size());
+ len += (5 + (uint)bw.size());
while (len--)
cout << '-';
cout << "\n";
- int ident = strlen("Skt |") /2 ;
+ int ident = (int)strlen("Skt |") /2 ;
cout << setw(ident) << "*" << setw(ident) << ' ';
for (uint idx = 0; idx < eventNames.size(); ++idx)
diff --git a/pcm-power.cpp b/pcm-power.cpp
index 62ba0d3b..6b617876 100644
--- a/pcm-power.cpp
+++ b/pcm-power.cpp
@@ -333,7 +333,7 @@ int main(int argc, char * argv[])
m->setBlocked(false);
}
- if (((delay < 1.0) && (delay > 0.0)) || (delay <= 0.0)) delay = PCM_DELAY_DEFAULT;
+ if (delay <= 0.0) delay = PCM_DELAY_DEFAULT;
uint32 i = 0;
diff --git a/pcm-raw.cpp b/pcm-raw.cpp
index 1a1b51f2..d24b0ccc 100644
--- a/pcm-raw.cpp
+++ b/pcm-raw.cpp
@@ -72,6 +72,8 @@ void print_usage(const string progname)
cerr << " -f | /f => enforce flushing each line for interactive output\n";
cerr << " -i[=number] | /i[=number] => allow to determine number of iterations\n";
cerr << " -tr | /tr => transpose output (print single event data in a row)\n";
+ cerr << " -l => use locale for printing values, calls -tab for readability\n";
+ cerr << " -tab => replace default comma separator with tab\n";
cerr << " -el event_list.txt | /el event_list.txt => read event list from event_list.txt file, \n";
cerr << " each line represents an event,\n";
cerr << " event groups are separated by a semicolon\n";
@@ -174,7 +176,7 @@ bool initPMUEventMap()
assert(EventTypetPos >= 0);
const std::string ourFMS = PCM::getInstance()->getCPUFamilyModelString();
// cout << "Our FMS: " << ourFMS << "\n";
- std::map eventFiles;
+ std::multimap eventFiles;
cout << "Matched event files:\n";
while (std::getline(in, line))
{
@@ -187,14 +189,14 @@ bool initPMUEventMap()
if (std::regex_search(ourFMS.c_str(), FMSMatch, FMSRegex))
{
cout << tokens[FMSPos] << " " << tokens[EventTypetPos] << " " << tokens[FilenamePos] << "\n";
- eventFiles[tokens[EventTypetPos]] = tokens[FilenamePos];
+ eventFiles.insert(std::make_pair(tokens[EventTypetPos], tokens[FilenamePos]));
}
}
in.close();
if (eventFiles.empty())
{
- cerr << "ERROR: CPU " << ourFMS << "not found in " << mapfile << "\n";
+ cerr << "ERROR: CPU " << ourFMS << " not found in " << mapfile << "\n";
return false;
}
@@ -734,11 +736,12 @@ bool show_partial_core_output = false;
bitset ycores;
bool flushLine = false;
bool transpose = false;
+std::string sep = ",";
void printRowBegin(const std::string & EventName, const CoreCounterState & BeforeState, const CoreCounterState & AfterState, PCM* m)
{
printDateForCSV(CsvOutputType::Data);
- cout << EventName << "," << (1000ULL * getInvariantTSC(BeforeState, AfterState)) / m->getNominalFrequency() << "," << getInvariantTSC(BeforeState, AfterState);
+ cout << EventName << sep << (1000ULL * getInvariantTSC(BeforeState, AfterState)) / m->getNominalFrequency() << sep << getInvariantTSC(BeforeState, AfterState);
}
@@ -750,7 +753,7 @@ void printRow(const std::string & EventName, MetricFunc metricFunc, const std::v
{
if (!(m->isCoreOnline(core) == false || (show_partial_core_output && ycores.test(core) == false)))
{
- cout << "," << metricFunc(BeforeState[core], AfterState[core]);
+ cout << sep << metricFunc(BeforeState[core], AfterState[core]);
}
}
cout << "\n";
@@ -788,7 +791,7 @@ void printTransposed(const PCM::RawPMUConfigs& curPMUConfigs, PCM* m, vectorgetMaxNumOfCBoxes());
}
+ else if (type == "irp")
+ {
+ printUncoreRows([](const uint32 u, const uint32 i, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getIRPCounter(u, i, before, after); }, (uint32)m->getMaxNumOfIIOStacks());
+ }
else if (type == "iio")
{
printUncoreRows([](const uint32 u, const uint32 i, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getIIOCounter(u, i, before, after); }, (uint32)m->getMaxNumOfIIOStacks());
@@ -904,9 +911,9 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, PCM* m, vector 0 && AfterState.size() > 0)
{
choose(outputType,
- []() { cout << ","; },
+ []() { cout << sep; },
[]() { cout << "ms,"; },
- [&]() { cout << (1000ULL * getInvariantTSC(BeforeState[0], AfterState[0])) / m->getNominalFrequency() << ","; });
+ [&]() { cout << (1000ULL * getInvariantTSC(BeforeState[0], AfterState[0])) / m->getNominalFrequency() << sep; });
}
for (auto typeEvents : curPMUConfigs)
{
@@ -937,9 +944,9 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, PCM* m, vectorgetSocketId(core) << "CORE" << core << ","; },
- [&metric]() { cout << metric << ","; },
- [&value]() { cout << value << ","; });
+ [m, core]() { cout << "SKT" << m->getSocketId(core) << "CORE" << core << sep; },
+ [&metric]() { cout << metric << sep; },
+ [&value]() { cout << value << sep; });
};
for (uint32 cnt = 0; cnt < 4; ++cnt)
{
@@ -960,9 +967,9 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, PCM* m, vectorgetSocketId(core) << "CORE" << core << ","; },
- [&event, &i]() { if (event.second.empty()) cout << "COREEvent" << i << ","; else cout << event.second << ","; },
- [&]() { cout << getNumberOfCustomEvents(i, BeforeState[core], AfterState[core]) << ","; });
+ [m, core]() { cout << "SKT" << m->getSocketId(core) << "CORE" << core << sep; },
+ [&event, &i]() { if (event.second.empty()) cout << "COREEvent" << i << sep; else cout << event.second << sep; },
+ [&]() { cout << getNumberOfCustomEvents(i, BeforeState[core], AfterState[core]) << sep; });
++i;
}
}
@@ -977,9 +984,9 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, PCM* m, vectorgetNumSockets(); ++s)
+ {
+ for (uint32 stack = 0; stack < m->getMaxNumOfIIOStacks(); ++stack)
+ {
+ int i = 0;
+ for (auto event : events)
+ {
+ choose(outputType,
+ [s, stack]() { cout << "SKT" << s << "IRP" << stack << sep; },
+ [&event, &i]() { if (event.second.empty()) cout << "IRPEvent" << i << sep; else cout << event.second << sep; },
+ [&]() { cout << getIRPCounter(stack, i, BeforeUncoreState[s], AfterUncoreState[s]) << sep; });
++i;
}
}
@@ -1111,9 +1136,9 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, PCM* m, vectorPMMTrafficMetricsAvailable()) cout << " PMM WR : bytes written to PMM memory (in GBytes)\n";
if (m->MCDRAMmemoryTrafficMetricsAvailable()) cout << " MCDRAM READ : bytes read from MCDRAM controller (in GBytes)\n";
if (m->MCDRAMmemoryTrafficMetricsAvailable()) cout << " MCDRAM WRITE : bytes written to MCDRAM controller (in GBytes)\n";
- if (m->memoryIOTrafficMetricAvailable()) cout << " IO : bytes read/written due to IO requests to memory controller (in GBytes); this may be an over estimate due to same-cache-line partial requests\n";
+ if (m->memoryIOTrafficMetricAvailable()) {
+ cout << " IO : bytes read/written due to IO requests to memory controller (in GBytes); this may be an over estimate due to same-cache-line partial requests\n";
+ cout << " IA : bytes read/written due to IA requests to memory controller (in GBytes); this may be an over estimate due to same-cache-line partial requests\n";
+ cout << " GT : bytes read/written due to GT requests to memory controller (in GBytes); this may be an over estimate due to same-cache-line partial requests\n";
+ }
if (m->L3CacheOccupancyMetricAvailable()) cout << " L3OCC : L3 occupancy (in KBytes)\n";
if (m->CoreLocalMemoryBWMetricAvailable()) cout << " LMB : L3 cache external bandwidth satisfied by local memory (in MBytes)\n";
if (m->CoreRemoteMemoryBWMetricAvailable()) cout << " RMB : L3 cache external bandwidth satisfied by remote memory (in MBytes)\n";
@@ -433,6 +437,10 @@ void print_output(PCM * m,
cout << " MCDRAM READ | MCDRAM WRITE |";
if (m->memoryIOTrafficMetricAvailable())
cout << " IO |";
+ if (m->memoryIOTrafficMetricAvailable())
+ cout << " IA |";
+ if (m->memoryIOTrafficMetricAvailable())
+ cout << " GT |";
if (m->packageEnergyMetricsAvailable())
cout << " CPU energy |";
if (m->dramEnergyMetricsAvailable())
@@ -459,6 +467,10 @@ void print_output(PCM * m,
" " << setw(11) << getBytesWrittenToEDC(sktstate1[i], sktstate2[i]) / double(1e9);
if (m->memoryIOTrafficMetricAvailable())
cout << " " << setw(5) << getIORequestBytesFromMC(sktstate1[i], sktstate2[i]) / double(1e9);
+ if (m->memoryIOTrafficMetricAvailable())
+ cout << " " << setw(5) << getIARequestBytesFromMC(sktstate1[i], sktstate2[i]) / double(1e9);
+ if (m->memoryIOTrafficMetricAvailable())
+ cout << " " << setw(5) << getGTRequestBytesFromMC(sktstate1[i], sktstate2[i]) / double(1e9);
cout << " ";
if(m->packageEnergyMetricsAvailable()) {
cout << setw(6) << getConsumedJoules(sktstate1[i], sktstate2[i]);
diff --git a/simdjson_wrapper.h b/simdjson_wrapper.h
index b6e76880..03a131e2 100644
--- a/simdjson_wrapper.h
+++ b/simdjson_wrapper.h
@@ -17,5 +17,9 @@
#pragma message("parsing events from 01.org/perfmon won't be supported because simdjson library is not found in simdjson/singleheader/simdjson.h")
#pragma message("run 'git clone https://github.com/simdjson/simdjson.git' to get simdjson library")
#endif
+ #else
+ #pragma message("The compiler is too old, it does not support '__has_include' directive and other c++ features required for simdjson library. Parsing events from 01.org/perfmon won't be supported.")
#endif
+#else
+ #pragma message("The compiler is too old (g++ 6 or below). Parsing events from 01.org/perfmon won't be supported.")
#endif
diff --git a/test.sh b/test.sh
new file mode 100644
index 00000000..70657b5d
--- /dev/null
+++ b/test.sh
@@ -0,0 +1,100 @@
+modprobe msr
+
+./pcm.x -r -- sleep 1
+if [ "$?" -ne "0" ]; then
+ echo "Error in pcm.x"
+ exit 1
+fi
+
+./pcm-memory.x -- sleep 1
+if [ "$?" -ne "0" ]; then
+ echo "Error in pcm-memory.x"
+ exit 1
+fi
+
+./pcm-memory.x -rank=1 -- sleep 1
+if [ "$?" -ne "0" ]; then
+ echo "Error in pcm-memory.x"
+ exit 1
+fi
+
+./pcm-memory.x -rank=1 -csv -- sleep 1
+if [ "$?" -ne "0" ]; then
+ echo "Error in pcm-memory.x"
+ exit 1
+fi
+
+./pcm-raw.x -e core/config=0x30203,name=LD_BLOCKS.STORE_FORWARD/ -e cha/config=0,name=UNC_CHA_CLOCKTICKS/ -e imc/fixed,name=DRAM_CLOCKS -- sleep 1
+if [ "$?" -ne "0" ]; then
+ echo "Error in pcm-raw.x"
+ exit 1
+fi
+
+./pcm-mmio.x 0x0
+if [ "$?" -ne "0" ]; then
+ echo "Error in pcm-mmio.x"
+ exit 1
+fi
+
+./pcm-pcicfg.x 0 0 0 0 0
+if [ "$?" -ne "0" ]; then
+ echo "Error in pcm-pcicfg.x"
+ exit 1
+fi
+
+./pcm-numa.x -- sleep 1
+if [ "$?" -ne "0" ]; then
+ echo "Error in pcm-numa.x"
+ exit 1
+fi
+
+./pcm-core.x -e cpu/umask=0x01,event=0x0e,name=UOPS_ISSUED.STALL_CYCLES/ -- sleep 1
+if [ "$?" -ne "0" ]; then
+ echo "Error in pcm-core.x"
+ exit 1
+fi
+
+./c_example.x
+if [ "$?" -ne "0" ]; then
+ echo "Error in c_example.x"
+ exit 1
+fi
+
+./c_example_shlib.x
+if [ "$?" -ne "0" ]; then
+ echo "Error in c_example_shlib.x"
+ exit 1
+fi
+
+./pcm-msr.x -a 0x30A
+if [ "$?" -ne "0" ]; then
+ echo "Error in pcm-msr.x"
+ exit 1
+fi
+
+./pcm-power.x -- sleep 1
+if [ "$?" -ne "0" ]; then
+ echo "Error in pcm-power.x"
+ exit 1
+fi
+
+./pcm-pcie.x -- sleep 1
+if [ "$?" -ne "0" ]; then
+ echo "Error in pcm-pcie.x"
+ exit 1
+fi
+
+./pcm-latency.x -i=1
+if [ "$?" -ne "0" ]; then
+ echo "Error in pcm-latency.x"
+ exit 1
+fi
+
+./pcm-tsx.x -- sleep 1
+if [ "$?" -ne "0" ]; then
+ echo "Error in pcm-tsx.x"
+ exit 1
+fi
+
+# TODO add more tests
+# e.g for ./pcm-sensor-server.x, ./pcm-iio.x, ./pcm-sensor.x, ...
diff --git a/types.h b/types.h
index 05b8612a..cf33b41c 100644
--- a/types.h
+++ b/types.h
@@ -162,6 +162,8 @@ constexpr auto MSR_FRONTEND = 0x3F7;
// Offcore response events
#define OFFCORE_RESPONSE_0_EVTNR (0xB7)
#define OFFCORE_RESPONSE_1_EVTNR (0xBB)
+#define GLC_OFFCORE_RESPONSE_0_EVTNR (0x2A)
+#define GLC_OFFCORE_RESPONSE_1_EVTNR (0x2B)
#define OFFCORE_RESPONSE_0_UMASK (1)
#define OFFCORE_RESPONSE_1_UMASK (1)
@@ -1080,6 +1082,42 @@ static const uint32 ICX_IIO_UNIT_CTL[] = {
0x0A50, 0x0A70, 0x0A90, 0x0AE0, 0x0B00, 0x0B20
};
+static const uint32 ICX_IRP_UNIT_CTL[] = {
+ 0x0A4A,
+ 0x0A6A,
+ 0x0A8A,
+ 0x0ADA,
+ 0x0AFA,
+ 0x0B1A
+};
+
+#define ICX_IRP_CTL_REG_OFFSET (0x0003)
+#define ICX_IRP_CTR_REG_OFFSET (0x0001)
+
+
+static const uint32 SNR_IRP_UNIT_CTL[] = {
+ 0x1EA0,
+ 0x1EB0,
+ 0x1EC0,
+ 0x1ED0,
+ 0x1EE0
+};
+
+#define SNR_IRP_CTL_REG_OFFSET (0x0008)
+#define SNR_IRP_CTR_REG_OFFSET (0x0001)
+
+static const uint32 SKX_IRP_UNIT_CTL[] = {
+ 0x0A58,
+ 0x0A78,
+ 0x0A98,
+ 0x0AB8,
+ 0x0AD8,
+ 0x0AF8
+};
+
+#define SKX_IRP_CTL_REG_OFFSET (0x0003)
+#define SKX_IRP_CTR_REG_OFFSET (0x0001)
+
#define SNR_IIO_CBDMA_UNIT_STATUS (0x1E07)
#define SNR_IIO_CBDMA_UNIT_CTL (0x1E00)
#define SNR_IIO_CBDMA_CTR0 (0x1E01)
diff --git a/width_extender.h b/width_extender.h
index dc0f0ffb..93187c8e 100644
--- a/width_extender.h
+++ b/width_extender.h
@@ -69,6 +69,8 @@ class CounterWidthExtender
typedef ClientImcCounter<&FreeRunningBWCounters::getImcReads> ClientImcReadsCounter;
typedef ClientImcCounter<&FreeRunningBWCounters::getImcWrites> ClientImcWritesCounter;
+ typedef ClientImcCounter<&FreeRunningBWCounters::getGtRequests> ClientGtRequestsCounter;
+ typedef ClientImcCounter<&FreeRunningBWCounters::getIaRequests> ClientIaRequestsCounter;
typedef ClientImcCounter<&FreeRunningBWCounters::getIoRequests> ClientIoRequestsCounter;
struct MBLCounter : public AbstractRawCounter