diff --git a/.github/workflows/ci-test.yml b/.github/workflows/ci-test.yml new file mode 100644 index 00000000..42038835 --- /dev/null +++ b/.github/workflows/ci-test.yml @@ -0,0 +1,22 @@ +name: tests + +on: + push: + branches: [ '**' ] + pull_request: + branches: [ '**' ] + +jobs: + build: + + runs-on: ci-test + if: ${{ github.repository != 'opcm/pcm' }} + + steps: + - uses: actions/checkout@v2 + - name: make + run: | + g++ --version + git clone https://github.com/simdjson/simdjson.git + make -j + sh test.sh diff --git a/PCM-Lib_Win/pcm-lib.vcxproj b/PCM-Lib_Win/pcm-lib.vcxproj index ad212ce1..2b906394 100644 --- a/PCM-Lib_Win/pcm-lib.vcxproj +++ b/PCM-Lib_Win/pcm-lib.vcxproj @@ -191,7 +191,8 @@ - + + @@ -206,7 +207,8 @@ - + + diff --git a/PCM-Raw_Win/pcm-raw-win.vcxproj b/PCM-Raw_Win/pcm-raw-win.vcxproj index e967ff34..dc536f2c 100644 --- a/PCM-Raw_Win/pcm-raw-win.vcxproj +++ b/PCM-Raw_Win/pcm-raw-win.vcxproj @@ -193,4 +193,4 @@ - \ No newline at end of file + diff --git a/PCM-Service_Win/AssemblyInfo.cpp b/PCM-Service_Win/AssemblyInfo.cpp index 51bd41e0..a570996c 100644 --- a/PCM-Service_Win/AssemblyInfo.cpp +++ b/PCM-Service_Win/AssemblyInfo.cpp @@ -29,9 +29,9 @@ using namespace System::Security::Permissions; [assembly:AssemblyTitleAttribute("PCMService")]; [assembly:AssemblyDescriptionAttribute("")]; [assembly:AssemblyConfigurationAttribute("")]; -[assembly:AssemblyCompanyAttribute("Intel GmbH")]; +[assembly:AssemblyCompanyAttribute("Intel Corp")]; [assembly:AssemblyProductAttribute("PCMService")]; -[assembly:AssemblyCopyrightAttribute("Copyright (c) Intel GmbH 2010")]; +[assembly:AssemblyCopyrightAttribute("Copyright (c) Intel Corp 2010-2021")]; [assembly:AssemblyTrademarkAttribute("")]; [assembly:AssemblyCultureAttribute("")]; @@ -50,7 +50,4 @@ using namespace System::Security::Permissions; [assembly:ComVisible(false)]; -[assembly:CLSCompliantAttribute(true)]; - -[assembly:SecurityPermission(SecurityAction::RequestMinimum, UnmanagedCode = true)]; - +[assembly:CLSCompliantAttribute(true)]; \ No newline at end of file diff --git a/PCM-Service_Win/PCMService.h b/PCM-Service_Win/PCMService.h index e330fee8..2d7dc9aa 100644 --- a/PCM-Service_Win/PCMService.h +++ b/PCM-Service_Win/PCMService.h @@ -124,6 +124,8 @@ namespace PCMServiceNS { counterCollection->Add( counter ); counter = gcnew CounterCreationData(MetricCoreResC0, "Displays the residency of core or socket in core C0-state in percent.", PerformanceCounterType::NumberOfItems64); counterCollection->Add( counter ); + counter = gcnew CounterCreationData(MetricCoreResC1, "Displays the residency of core or socket in core C1-state in percent.", PerformanceCounterType::NumberOfItems64); + counterCollection->Add( counter ); counter = gcnew CounterCreationData(MetricCoreResC3, "Displays the residency of core or socket in core C3-state in percent.", PerformanceCounterType::NumberOfItems64); counterCollection->Add( counter ); counter = gcnew CounterCreationData(MetricCoreResC6, "Displays the residency of core or socket in core C6-state in percent.", PerformanceCounterType::NumberOfItems64); @@ -144,6 +146,8 @@ namespace PCMServiceNS { counterCollection->Add( counter ); counter = gcnew CounterCreationData(MetricSocketEnergyDram, "Displays the energy in Joules consumed by DRAM memory attached to the memory controller of this socket.", PerformanceCounterType::NumberOfItems64); counterCollection->Add( counter ); + counter = gcnew CounterCreationData(MetricSocketResC0, "Displays the residency of socket in package C0-state in percent.", PerformanceCounterType::NumberOfItems64); + counterCollection->Add( counter ); counter = gcnew CounterCreationData(MetricSocketResC2, "Displays the residency of socket in package C2-state in percent.", PerformanceCounterType::NumberOfItems64); counterCollection->Add( counter ); counter = gcnew CounterCreationData(MetricSocketResC3, "Displays the residency of socket in package C3-state in percent.", PerformanceCounterType::NumberOfItems64); @@ -152,6 +156,12 @@ namespace PCMServiceNS { counterCollection->Add( counter ); counter = gcnew CounterCreationData(MetricSocketResC7, "Displays the residency of socket in package C7-state in percent.", PerformanceCounterType::NumberOfItems64); counterCollection->Add( counter ); + counter = gcnew CounterCreationData(MetricSocketResC8, "Displays the residency of socket in package C8-state in percent.", PerformanceCounterType::NumberOfItems64); + counterCollection->Add( counter ); + counter = gcnew CounterCreationData(MetricSocketResC9, "Displays the residency of socket in package C9-state in percent.", PerformanceCounterType::NumberOfItems64); + counterCollection->Add( counter ); + counter = gcnew CounterCreationData(MetricSocketResC10, "Displays the residency of socket in package C10-state in percent.", PerformanceCounterType::NumberOfItems64); + counterCollection->Add( counter ); PerformanceCounterCategory::Create(CountersSocket, "Processor Counter Monitor", PerformanceCounterCategoryType::MultiInstance, counterCollection); } @@ -185,6 +195,7 @@ namespace PCMServiceNS { baseTicksForRelFreqHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreFreqNom, s, false)); thermalHeadroomHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreHeadroom, s, false)); CoreC0StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC0, s, false)); + CoreC1StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC1, s, false)); CoreC3StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC3, s, false)); CoreC6StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC6, s, false)); CoreC7StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC7, s, false)); @@ -207,6 +218,7 @@ namespace PCMServiceNS { baseTicksForRelFreqHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreFreqNom, s, false)); thermalHeadroomHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreHeadroom, s, false)); CoreC0StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC0, s, false)); + CoreC1StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC1, s, false)); CoreC3StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC3, s, false)); CoreC6StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC6, s, false)); CoreC7StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC7, s, false)); @@ -218,10 +230,14 @@ namespace PCMServiceNS { mwbHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketBandWrite, s, false)); packageEnergyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketEnergyPack, s, false)); DRAMEnergyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketEnergyDram, s, false)); + PackageC0StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC0, s, false)); PackageC2StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC2, s, false)); PackageC3StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC3, s, false)); PackageC6StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC6, s, false)); PackageC7StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC7, s, false)); + PackageC8StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC8, s, false)); + PackageC9StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC9, s, false)); + PackageC10StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC10, s, false)); } if (collectionInformation_->qpi) @@ -250,6 +266,7 @@ namespace PCMServiceNS { baseTicksForRelFreqHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreFreqNom, s, false)); thermalHeadroomHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreHeadroom, s, false)); CoreC0StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC0, s, false)); + CoreC1StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC1, s, false)); CoreC3StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC3, s, false)); CoreC6StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC6, s, false)); CoreC7StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersCore, MetricCoreResC7, s, false)); @@ -261,10 +278,14 @@ namespace PCMServiceNS { mwbHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketBandWrite, s, false)); packageEnergyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketEnergyPack, s, false)); DRAMEnergyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketEnergyDram, s, false)); + PackageC0StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC0, s, false)); PackageC2StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC2, s, false)); PackageC3StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC3, s, false)); PackageC6StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC6, s, false)); PackageC7StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC7, s, false)); + PackageC8StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC8, s, false)); + PackageC9StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC9, s, false)); + PackageC10StateResidencyHash_.Add(s, gcnew PerformanceCounter(CountersSocket, MetricSocketResC10, s, false)); } if (collectionInformation_->qpi) @@ -325,6 +346,7 @@ namespace PCMServiceNS { ((PerformanceCounter^)baseTicksForRelFreqHash_[s])->IncrementBy(totalRefTicks >> 17); ((PerformanceCounter^)thermalHeadroomHash_[s])->RawValue = systemState.getThermalHeadroom(); ((PerformanceCounter^)CoreC0StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(0,oldSystemState, systemState)); + ((PerformanceCounter^)CoreC1StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(1,oldSystemState, systemState)); ((PerformanceCounter^)CoreC3StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(3,oldSystemState, systemState)); ((PerformanceCounter^)CoreC6StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(6,oldSystemState, systemState)); ((PerformanceCounter^)CoreC7StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(7,oldSystemState, systemState)); @@ -338,10 +360,14 @@ namespace PCMServiceNS { ((PerformanceCounter^)mwbHash_[s])->RawValue = toBW(getBytesWrittenToMC(oldSystemState, systemState)); ((PerformanceCounter^)packageEnergyHash_[s])->RawValue = (__int64)getConsumedJoules(oldSystemState, systemState); ((PerformanceCounter^)DRAMEnergyHash_[s])->RawValue = (__int64)getDRAMConsumedJoules(oldSystemState, systemState); + ((PerformanceCounter^)PackageC0StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(0, oldSystemState, systemState)); ((PerformanceCounter^)PackageC2StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(2, oldSystemState, systemState)); ((PerformanceCounter^)PackageC3StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(3, oldSystemState, systemState)); ((PerformanceCounter^)PackageC6StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(6, oldSystemState, systemState)); ((PerformanceCounter^)PackageC7StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(7, oldSystemState, systemState)); + ((PerformanceCounter^)PackageC8StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(8, oldSystemState, systemState)); + ((PerformanceCounter^)PackageC9StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(9, oldSystemState, systemState)); + ((PerformanceCounter^)PackageC10StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(10, oldSystemState, systemState)); } if (collectionInformation_->qpi) @@ -369,6 +395,7 @@ namespace PCMServiceNS { ((PerformanceCounter^)baseTicksForRelFreqHash_[s])->IncrementBy(socketRefTicks >> 17); ((PerformanceCounter^)thermalHeadroomHash_[s])->RawValue = socketState.getThermalHeadroom(); ((PerformanceCounter^)CoreC0StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(0, oldSocketStates[i], socketState)); + ((PerformanceCounter^)CoreC1StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(1, oldSocketStates[i], socketState)); ((PerformanceCounter^)CoreC3StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(3, oldSocketStates[i], socketState)); ((PerformanceCounter^)CoreC6StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(6, oldSocketStates[i], socketState)); ((PerformanceCounter^)CoreC7StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(7, oldSocketStates[i], socketState)); @@ -380,10 +407,14 @@ namespace PCMServiceNS { ((PerformanceCounter^)mwbHash_[s])->RawValue = toBW(getBytesWrittenToMC(oldSocketStates[i], socketState)); ((PerformanceCounter^)packageEnergyHash_[s])->RawValue = (__int64)getConsumedJoules(oldSocketStates[i], socketState); ((PerformanceCounter^)DRAMEnergyHash_[s])->RawValue = (__int64)getDRAMConsumedJoules(oldSocketStates[i], socketState); + ((PerformanceCounter^)PackageC0StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(0,oldSocketStates[i], socketState)); ((PerformanceCounter^)PackageC2StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(2,oldSocketStates[i], socketState)); ((PerformanceCounter^)PackageC3StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(3,oldSocketStates[i], socketState)); ((PerformanceCounter^)PackageC6StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(6,oldSocketStates[i], socketState)); ((PerformanceCounter^)PackageC7StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(7,oldSocketStates[i], socketState)); + ((PerformanceCounter^)PackageC8StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(8,oldSocketStates[i], socketState)); + ((PerformanceCounter^)PackageC9StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(9,oldSocketStates[i], socketState)); + ((PerformanceCounter^)PackageC10StateResidencyHash_[s])->RawValue = __int64(100.*getPackageCStateResidency(10,oldSocketStates[i], socketState)); } if (collectionInformation_->qpi) @@ -419,6 +450,7 @@ namespace PCMServiceNS { ((PerformanceCounter^)baseTicksForRelFreqHash_[s])->IncrementBy(refTicks >> 17); ((PerformanceCounter^)thermalHeadroomHash_[s])->RawValue = coreState.getThermalHeadroom(); ((PerformanceCounter^)CoreC0StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(0,oldCoreStates[i], coreState)); + ((PerformanceCounter^)CoreC1StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(1,oldCoreStates[i], coreState)); ((PerformanceCounter^)CoreC3StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(3,oldCoreStates[i], coreState)); ((PerformanceCounter^)CoreC6StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(6,oldCoreStates[i], coreState)); ((PerformanceCounter^)CoreC7StateResidencyHash_[s])->RawValue = __int64(100.*getCoreCStateResidency(7,oldCoreStates[i], coreState)); @@ -464,13 +496,18 @@ namespace PCMServiceNS { System::Collections::Hashtable thermalHeadroomHash_; // C-state Residencies System::Collections::Hashtable CoreC0StateResidencyHash_; + System::Collections::Hashtable CoreC1StateResidencyHash_; System::Collections::Hashtable CoreC3StateResidencyHash_; System::Collections::Hashtable CoreC6StateResidencyHash_; System::Collections::Hashtable CoreC7StateResidencyHash_; + System::Collections::Hashtable PackageC0StateResidencyHash_; System::Collections::Hashtable PackageC2StateResidencyHash_; System::Collections::Hashtable PackageC3StateResidencyHash_; System::Collections::Hashtable PackageC6StateResidencyHash_; System::Collections::Hashtable PackageC7StateResidencyHash_; + System::Collections::Hashtable PackageC8StateResidencyHash_; + System::Collections::Hashtable PackageC9StateResidencyHash_; + System::Collections::Hashtable PackageC10StateResidencyHash_; System::Diagnostics::EventLog^ log_; @@ -491,6 +528,7 @@ namespace PCMServiceNS { initonly String^ MetricCoreFreqNom = gcnew String(L"Nominal Frequency"); initonly String^ MetricCoreHeadroom = gcnew String(L"Thermal Headroom below TjMax"); initonly String^ MetricCoreResC0 = gcnew String(L"core C0-state residency (%)"); + initonly String^ MetricCoreResC1 = gcnew String(L"core C1-state residency (%)"); initonly String^ MetricCoreResC3 = gcnew String(L"core C3-state residency (%)"); initonly String^ MetricCoreResC6 = gcnew String(L"core C6-state residency (%)"); initonly String^ MetricCoreResC7 = gcnew String(L"core C7-state residency (%)"); @@ -499,10 +537,14 @@ namespace PCMServiceNS { initonly String^ MetricSocketBandWrite = gcnew String(L"Memory Write Bandwidth"); initonly String^ MetricSocketEnergyPack = gcnew String(L"Package/Socket Consumed Energy"); initonly String^ MetricSocketEnergyDram = gcnew String(L"DRAM/Memory Consumed Energy"); + initonly String^ MetricSocketResC0 = gcnew String(L"package C0-state residency (%)"); initonly String^ MetricSocketResC2 = gcnew String(L"package C2-state residency (%)"); initonly String^ MetricSocketResC3 = gcnew String(L"package C3-state residency (%)"); initonly String^ MetricSocketResC6 = gcnew String(L"package C6-state residency (%)"); initonly String^ MetricSocketResC7 = gcnew String(L"package C7-state residency (%)"); + initonly String^ MetricSocketResC8 = gcnew String(L"package C8-state residency (%)"); + initonly String^ MetricSocketResC9 = gcnew String(L"package C9-state residency (%)"); + initonly String^ MetricSocketResC10 = gcnew String(L"package C10-state residency (%)"); initonly String^ MetricQpiBand = gcnew String(L"QPI Link Bandwidth"); diff --git a/PCM_Win/pcm.vcxproj b/PCM_Win/pcm.vcxproj index 11f29ef6..990cc7d9 100644 --- a/PCM_Win/pcm.vcxproj +++ b/PCM_Win/pcm.vcxproj @@ -156,7 +156,8 @@ - + + @@ -172,7 +173,8 @@ - + + diff --git a/PMURegisterDeclarations/GenuineIntel-6-55-4.json b/PMURegisterDeclarations/GenuineIntel-6-55-4.json index 2729d142..eb796497 100644 --- a/PMURegisterDeclarations/GenuineIntel-6-55-4.json +++ b/PMURegisterDeclarations/GenuineIntel-6-55-4.json @@ -98,6 +98,14 @@ "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} } }, + "irp" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, "iio" : { "programmable" : { "EventCode": {"Config": 0, "Position": 0, "Width": 8}, diff --git a/PMURegisterDeclarations/GenuineIntel-6-55-7.json b/PMURegisterDeclarations/GenuineIntel-6-55-7.json index 2729d142..eb796497 100644 --- a/PMURegisterDeclarations/GenuineIntel-6-55-7.json +++ b/PMURegisterDeclarations/GenuineIntel-6-55-7.json @@ -98,6 +98,14 @@ "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} } }, + "irp" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, "iio" : { "programmable" : { "EventCode": {"Config": 0, "Position": 0, "Width": 8}, diff --git a/PMURegisterDeclarations/GenuineIntel-6-6A-6.json b/PMURegisterDeclarations/GenuineIntel-6-6A-6.json index 56bb9368..afb293fd 100644 --- a/PMURegisterDeclarations/GenuineIntel-6-6A-6.json +++ b/PMURegisterDeclarations/GenuineIntel-6-6A-6.json @@ -106,6 +106,14 @@ "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} } }, + "irp" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, "pcu" : { "programmable" : { "EventCode": {"Config": 0, "Position": 0, "Width": 8}, diff --git a/build_solution.bat b/build_solution.bat index e81e2b9a..77547709 100644 --- a/build_solution.bat +++ b/build_solution.bat @@ -1,9 +1,9 @@ REM change path to your VCVARS.BAT -CALL "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\x86_amd64\vcvarsx86_amd64.bat" -SET "PATH=C:\Program Files (x86)\MSBuild\14.0\Bin\amd64;%PATH%" -REM CALL "c:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\VC\Auxiliary\Build\vcvarsamd64_x86.bat" -REM SET "PATH=C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\MSBuild\15.0\Bin\amd64;%PATH%" +REM CALL "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\x86_amd64\vcvarsx86_amd64.bat" +REM SET "PATH=C:\Program Files (x86)\MSBuild\14.0\Bin\amd64;%PATH%" +CALL "c:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\VC\Auxiliary\Build\vcvarsamd64_x86.bat" +SET "PATH=C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\MSBuild\15.0\Bin\amd64;%PATH%" msbuild pcm-all.sln /p:Configuration=Release;Platform=x64 /t:Clean,Build /m diff --git a/bw.cpp b/bw.cpp index d96b1abf..4d235497 100644 --- a/bw.cpp +++ b/bw.cpp @@ -74,7 +74,9 @@ namespace pcm { return result; } -#define PCM_CLIENT_IMC_DRAM_IO_REQESTS (0x5048) +#define PCM_CLIENT_IMC_DRAM_GT_REQUESTS (0x5040) +#define PCM_CLIENT_IMC_DRAM_IA_REQUESTS (0x5044) +#define PCM_CLIENT_IMC_DRAM_IO_REQUESTS (0x5048) #define PCM_CLIENT_IMC_DRAM_DATA_READS (0x5050) #define PCM_CLIENT_IMC_DRAM_DATA_WRITES (0x5054) #define PCM_CLIENT_IMC_MMAP_SIZE (0x6000) @@ -95,9 +97,19 @@ uint64 ClientBW::getImcWrites() return mmioRange->read32(PCM_CLIENT_IMC_DRAM_DATA_WRITES - PCM_CLIENT_IMC_EVENT_BASE); } +uint64 ClientBW::getGtRequests() +{ + return mmioRange->read32(PCM_CLIENT_IMC_DRAM_GT_REQUESTS - PCM_CLIENT_IMC_EVENT_BASE); +} + +uint64 ClientBW::getIaRequests() +{ + return mmioRange->read32(PCM_CLIENT_IMC_DRAM_IA_REQUESTS - PCM_CLIENT_IMC_EVENT_BASE); +} + uint64 ClientBW::getIoRequests() { - return mmioRange->read32(PCM_CLIENT_IMC_DRAM_IO_REQESTS - PCM_CLIENT_IMC_EVENT_BASE); + return mmioRange->read32(PCM_CLIENT_IMC_DRAM_IO_REQUESTS - PCM_CLIENT_IMC_EVENT_BASE); } #define PCM_SERVER_IMC_DRAM_DATA_READS (0x2290) diff --git a/bw.h b/bw.h index e4e1d173..3ae83d34 100644 --- a/bw.h +++ b/bw.h @@ -32,6 +32,8 @@ namespace pcm { public: virtual uint64 getImcReads() { return 0; } virtual uint64 getImcWrites() { return 0; } + virtual uint64 getGtRequests() { return 0; } + virtual uint64 getIaRequests() { return 0; } virtual uint64 getIoRequests() { return 0; } virtual uint64 getPMMReads() { return 0; } virtual uint64 getPMMWrites() { return 0; } @@ -57,6 +59,8 @@ namespace pcm { uint64 getImcReads() override; uint64 getImcWrites() override; + uint64 getGtRequests() override; + uint64 getIaRequests() override; uint64 getIoRequests() override; }; diff --git a/cpucounters.cpp b/cpucounters.cpp index 8ca15670..851d80bf 100644 --- a/cpucounters.cpp +++ b/cpucounters.cpp @@ -528,6 +528,18 @@ int32 PCM::getMaxCustomCoreEvents() return core_gen_counter_num_max; } +int PCM::getCPUModelFromCPUID() +{ + static int result = -1; + if (result < 0) + { + PCM_CPUID_INFO cpuinfo; + pcm_cpuid(1, cpuinfo); + result = (((cpuinfo.array[0]) & 0xf0) >> 4) | ((cpuinfo.array[0] & 0xf0000) >> 12); + } + return result; +} + bool PCM::detectModel() { char buffer[1024]; @@ -1542,8 +1554,7 @@ bool PCM::initMSR() #ifdef _MSC_VER std::cerr << "You must have signed msr.sys driver in your current directory and have administrator rights to run this program.\n"; #elif defined(__linux__) - std::cerr << "Try to execute 'modprobe msr' as root user and then\n"; - std::cerr << "you also must have read and write permissions for /dev/cpu/*/msr devices (/dev/msr* for Android). The 'chown' command can help.\n"; + std::cerr << "execute 'modprobe msr' as root user, then execute pcm as root user.\n"; #elif defined(__FreeBSD__) || defined(__DragonFly__) std::cerr << "Ensure cpuctl module is loaded and that you have read and write\n"; std::cerr << "permissions for /dev/cpuctl* devices (the 'chown' command can help).\n"; @@ -1711,6 +1722,10 @@ void PCM::initUncoreObjects() new CounterWidthExtender::ClientImcReadsCounter(clientBW), 32, 10000); clientImcWrites = std::make_shared( new CounterWidthExtender::ClientImcWritesCounter(clientBW), 32, 10000); + clientGtRequests = std::make_shared( + new CounterWidthExtender::ClientGtRequestsCounter(clientBW), 32, 10000); + clientIaRequests = std::make_shared( + new CounterWidthExtender::ClientIaRequestsCounter(clientBW), 32, 10000); clientIoRequests = std::make_shared( new CounterWidthExtender::ClientIoRequestsCounter(clientBW), 32, 10000); @@ -1936,6 +1951,56 @@ void PCM::initUncorePMUsDirect() } } + // init IRP PMU + int irpStacks = 0; + size_t IRP_CTL_REG_OFFSET = 0; + size_t IRP_CTR_REG_OFFSET = 0; + const uint32* IRP_UNIT_CTL = nullptr; + + switch (getCPUModel()) + { + case SKX: + irpStacks = SKX_IIO_STACK_COUNT; + IRP_CTL_REG_OFFSET = SKX_IRP_CTL_REG_OFFSET; + IRP_CTR_REG_OFFSET = SKX_IRP_CTR_REG_OFFSET; + IRP_UNIT_CTL = SKX_IRP_UNIT_CTL; + break; + case ICX: + irpStacks = ICX_IIO_STACK_COUNT; + IRP_CTL_REG_OFFSET = ICX_IRP_CTL_REG_OFFSET; + IRP_CTR_REG_OFFSET = ICX_IRP_CTR_REG_OFFSET; + IRP_UNIT_CTL = ICX_IRP_UNIT_CTL; + break; + case SNOWRIDGE: + irpStacks = SNR_IIO_STACK_COUNT; + IRP_CTL_REG_OFFSET = SNR_IRP_CTL_REG_OFFSET; + IRP_CTR_REG_OFFSET = SNR_IRP_CTR_REG_OFFSET; + IRP_UNIT_CTL = SNR_IRP_UNIT_CTL; + break; + } + if (IRP_UNIT_CTL) + { + irpPMUs.resize(num_sockets); + for (uint32 s = 0; s < (uint32)num_sockets; ++s) + { + auto& handle = MSR[socketRefCore[s]]; + for (int unit = 0; unit < irpStacks; ++unit) + { + irpPMUs[s][unit] = UncorePMU( + std::make_shared(handle, IRP_UNIT_CTL[unit]), + std::make_shared(handle, IRP_UNIT_CTL[unit] + IRP_CTL_REG_OFFSET + 0), + std::make_shared(handle, IRP_UNIT_CTL[unit] + IRP_CTL_REG_OFFSET + 1), + std::shared_ptr(), + std::shared_ptr(), + std::make_shared(handle, IRP_UNIT_CTL[unit] + IRP_CTR_REG_OFFSET + 0), + std::make_shared(handle, IRP_UNIT_CTL[unit] + IRP_CTR_REG_OFFSET + 1), + std::shared_ptr(), + std::shared_ptr() + ); + } + } + } + if (hasPCICFGUncore() && MSR.size()) { cboPMUs.resize(num_sockets); @@ -1981,6 +2046,7 @@ void PCM::initUncorePMUsPerf() { #ifdef PCM_USE_PERF iioPMUs.resize(num_sockets); + irpPMUs.resize(num_sockets); cboPMUs.resize(num_sockets); for (uint32 s = 0; s < (uint32)num_sockets; ++s) { @@ -1988,12 +2054,17 @@ void PCM::initUncorePMUsPerf() populatePerfPMUs(s, enumeratePerfPMUs("ubox", 100), uboxPMUs, true); populatePerfPMUs(s, enumeratePerfPMUs("cbox", 100), cboPMUs[s], false, true, true); populatePerfPMUs(s, enumeratePerfPMUs("cha", 200), cboPMUs[s], false, true, true); - std::vector iioPMUVector; - populatePerfPMUs(s, enumeratePerfPMUs("iio", 100), iioPMUVector, false); - for (size_t i = 0; i < iioPMUVector.size(); ++i) + auto populateMapPMUs = [&s](const std::string& type, std::vector > & out) { - iioPMUs[s][i] = iioPMUVector[i]; - } + std::vector PMUVector; + populatePerfPMUs(s, enumeratePerfPMUs(type, 100), PMUVector, false); + for (size_t i = 0; i < PMUVector.size(); ++i) + { + out[s][i] = PMUVector[i]; + } + }; + populateMapPMUs("iio", iioPMUs); + populateMapPMUs("irp", irpPMUs); } #endif } @@ -2002,9 +2073,9 @@ void PCM::initUncorePMUsPerf() #define PCM_NMI_WATCHDOG_PATH "/proc/sys/kernel/nmi_watchdog" -bool isNMIWatchdogEnabled() +bool isNMIWatchdogEnabled(const bool silent) { - const auto watchdog = readSysFS(PCM_NMI_WATCHDOG_PATH); + const auto watchdog = readSysFS(PCM_NMI_WATCHDOG_PATH, silent); if (watchdog.length() == 0) { return false; @@ -2343,7 +2414,7 @@ perf_event_attr PCM_init_perf_event_attr(bool group = true) PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter_, const bool silent) { #ifdef __linux__ - if (isNMIWatchdogEnabled()) + if (isNMIWatchdogEnabled(silent)) { disableNMIWatchdog(silent); needToRestoreNMIWatchdog = true; @@ -2975,9 +3046,9 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */, perf_event_attr e = PCM_init_perf_event_attr(); e.type = PERF_TYPE_RAW; e.config = (1ULL << 63ULL) + event_select_reg.value; - if (event_select_reg.fields.event_select == OFFCORE_RESPONSE_0_EVTNR && event_select_reg.fields.umask == OFFCORE_RESPONSE_0_UMASK) + if (event_select_reg.fields.event_select == getOCREventNr(0, i).first && event_select_reg.fields.umask == getOCREventNr(0, i).second) e.config1 = pExtDesc->OffcoreResponseMsrValue[0]; - if (event_select_reg.fields.event_select == OFFCORE_RESPONSE_1_EVTNR && event_select_reg.fields.umask == OFFCORE_RESPONSE_1_UMASK) + if (event_select_reg.fields.event_select == getOCREventNr(1, i).first && event_select_reg.fields.umask == getOCREventNr(1, i).second) e.config1 = pExtDesc->OffcoreResponseMsrValue[1]; if (event_select_reg.fields.event_select == LOAD_LATENCY_EVTNR && event_select_reg.fields.umask == LOAD_LATENCY_UMASK) @@ -3751,6 +3822,13 @@ void PCM::cleanupUncorePMUs(const bool silent) pmu.second.cleanup(); } } + for (auto& sPMUs : irpPMUs) + { + for (auto& pmu : sPMUs) + { + pmu.second.cleanup(); + } + } for (auto & sCBOPMUs : cboPMUs) { for (auto & pmu : sCBOPMUs) @@ -4218,10 +4296,13 @@ void BasicCounterState::readAndAggregate(std::shared_ptr msr) cBackendBoundSlots = extract_bits(perfMetrics, 24, 31); cRetiringSlots = extract_bits(perfMetrics, 0, 7); const double total = double(cFrontendBoundSlots + cBadSpeculationSlots + cBackendBoundSlots + cRetiringSlots); - cFrontendBoundSlots = m->FrontendBoundSlots[core_id] += uint64((double(cFrontendBoundSlots) / total) * double(slots)); - cBadSpeculationSlots = m->BadSpeculationSlots[core_id] += uint64((double(cBadSpeculationSlots) / total) * double(slots)); - cBackendBoundSlots = m->BackendBoundSlots[core_id] += uint64((double(cBackendBoundSlots) / total) * double(slots)); - cRetiringSlots = m->RetiringSlots[core_id] += uint64((double(cRetiringSlots) / total) * double(slots)); + if (total != 0) + { + cFrontendBoundSlots = m->FrontendBoundSlots[core_id] += uint64((double(cFrontendBoundSlots) / total) * double(slots)); + cBadSpeculationSlots = m->BadSpeculationSlots[core_id] += uint64((double(cBadSpeculationSlots) / total) * double(slots)); + cBackendBoundSlots = m->BackendBoundSlots[core_id] += uint64((double(cBackendBoundSlots) / total) * double(slots)); + cRetiringSlots = m->RetiringSlots[core_id] += uint64((double(cRetiringSlots) / total) * double(slots)); + } cAllSlotsRaw = m->AllSlotsRaw[core_id] += slots; // std::cout << "DEBUG: "<< slots << " " << cFrontendBoundSlots << " " << cBadSpeculationSlots << " " << cBackendBoundSlots << " " << cRetiringSlots << std::endl; msr->unlock(); @@ -4609,6 +4690,10 @@ PCM::ErrorCode PCM::program(const RawPMUConfigs& curPMUConfigs_, const bool sile } programCboRaw(events64, filter0, filter1); } + else if (type == "irp") + { + programIRPCounters(events64); + } else if (type == "iio") { programIIOCounters(events64); @@ -4627,6 +4712,10 @@ void PCM::freezeServerUncoreCounters() for (int i = 0; (i < (int)server_pcicfg_uncore.size()) && MSR.size(); ++i) { server_pcicfg_uncore[i]->freezeCounters(); + + const auto refCore = socketRefCore[i]; + TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux + pcuPMUs[i].freeze(UNC_PMON_UNIT_CTL_FRZ_EN); if (IIOEventsAvailable()) @@ -4637,11 +4726,20 @@ void PCM::freezeServerUncoreCounters() } } - const auto refCore = socketRefCore[i]; - TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux - for (auto & pmu : cboPMUs[i]) + if (size_t(i) < irpPMUs.size()) { - pmu.freeze(UNC_PMON_UNIT_CTL_FRZ_EN); + for (auto& pmu : irpPMUs[i]) + { + pmu.second.freeze(UNC_PMON_UNIT_CTL_RSV); + } + } + + if (size_t(i) < cboPMUs.size()) + { + for (auto& pmu : cboPMUs[i]) + { + pmu.freeze(UNC_PMON_UNIT_CTL_FRZ_EN); + } } } } @@ -4650,6 +4748,10 @@ void PCM::unfreezeServerUncoreCounters() for (int i = 0; (i < (int)server_pcicfg_uncore.size()) && MSR.size(); ++i) { server_pcicfg_uncore[i]->unfreezeCounters(); + + const auto refCore = socketRefCore[i]; + TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux + pcuPMUs[i].unfreeze(UNC_PMON_UNIT_CTL_FRZ_EN); if (IIOEventsAvailable()) @@ -4660,11 +4762,20 @@ void PCM::unfreezeServerUncoreCounters() } } - const auto refCore = socketRefCore[i]; - TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux - for (auto & pmu : cboPMUs[i]) + if (size_t(i) < irpPMUs.size()) + { + for (auto& pmu : irpPMUs[i]) + { + pmu.second.unfreeze(UNC_PMON_UNIT_CTL_RSV); + } + } + + if (size_t(i) < cboPMUs.size()) { - pmu.unfreeze(UNC_PMON_UNIT_CTL_FRZ_EN); + for (auto& pmu : cboPMUs[i]) + { + pmu.unfreeze(UNC_PMON_UNIT_CTL_FRZ_EN); + } } } } @@ -4811,6 +4922,8 @@ void PCM::readAndAggregateUncoreMCCounters(const uint32 socket, CounterStateType { result.UncMCNormalReads += clientImcReads->read(); result.UncMCFullWrites += clientImcWrites->read(); + result.UncMCGTRequests += clientGtRequests->read(); + result.UncMCIARequests += clientIaRequests->read(); result.UncMCIORequests += clientIoRequests->read(); } else @@ -5265,6 +5378,16 @@ ServerUncoreCounterState PCM::getServerUncoreCounterState(uint32 socket) result.IIOCounter[stack][i] = *(iioPMUs[socket][stack].counterValue[i]); } } + for (uint32 stack = 0; socket < irpPMUs.size() && stack < irpPMUs[socket].size() && stack < ServerUncoreCounterState::maxIIOStacks; ++stack) + { + for (int i = 0; i < ServerUncoreCounterState::maxCounters; ++i) + { + if (irpPMUs[socket][stack].counterValue[i].get()) + { + result.IRPCounter[stack][i] = *(irpPMUs[socket][stack].counterValue[i]); + } + } + } for (int i = 0; i < 2 && socket < uboxPMUs.size(); ++i) { result.UBOXCounter[i] = *(uboxPMUs[socket].counterValue[i]); @@ -5937,6 +6060,10 @@ void ServerPCICFGUncore::initDirect(uint32 socket_, const PCM * pcm) if (cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::ICX) { numChannels = 2; + if (PCM::getCPUModelFromCPUID() == PCM::ICX_D) + { + numChannels = 3; + } } if (numChannels > 0) @@ -7386,6 +7513,7 @@ uint32 PCM::getMaxNumOfIIOStacks() const { if (iioPMUs.size() > 0) { + assert(iioPMUs[0].size() == irpPMUs[0].size()); return (uint32)iioPMUs[0].size(); } return 0; @@ -7464,6 +7592,41 @@ void PCM::programIIOCounters(uint64 rawEvents[4], int IIOStack) } } +void PCM::programIRPCounters(uint64 rawEvents[4], int IIOStack) +{ + std::vector IIO_units; + if (IIOStack == -1) + { + for (uint32 stack = 0; stack < getMaxNumOfIIOStacks(); ++stack) + { + IIO_units.push_back(stack); + } + } + else + { + IIO_units.push_back(IIOStack); + } + + for (int32 i = 0; (i < num_sockets) && MSR.size() && irpPMUs.size(); ++i) + { + uint32 refCore = socketRefCore[i]; + TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux + + for (const auto& unit : IIO_units) + { + if (irpPMUs[i].count(unit) == 0) + { + std::cerr << "IRP PMU unit (stack) " << unit << " is not found \n"; + continue; + } + auto& pmu = irpPMUs[i][unit]; + pmu.initFreeze(UNC_PMON_UNIT_CTL_RSV); + + program(pmu, &rawEvents[0], &rawEvents[2], UNC_PMON_UNIT_CTL_RSV); + } + } +} + void PCM::programPCIeEventGroup(eventGroup_t &eventGroup) { assert(eventGroup.size() > 0); diff --git a/cpucounters.h b/cpucounters.h index 33bea7cf..5f4f8a72 100644 --- a/cpucounters.h +++ b/cpucounters.h @@ -605,6 +605,7 @@ class PCM_API PCM std::vector > server_pcicfg_uncore; std::vector pcuPMUs; std::vector > iioPMUs; + std::vector > irpPMUs; std::vector uboxPMUs; double joulesPerEnergyUnit; std::vector > energy_status; @@ -621,6 +622,8 @@ class PCM_API PCM std::shared_ptr clientBW; std::shared_ptr clientImcReads; std::shared_ptr clientImcWrites; + std::shared_ptr clientGtRequests; + std::shared_ptr clientIaRequests; std::shared_ptr clientIoRequests; std::vector > serverBW; @@ -1183,6 +1186,39 @@ class PCM_API PCM typedef std::map RawPMUConfigs; ErrorCode program(const RawPMUConfigs& curPMUConfigs, const bool silent = false); + std::pair getOCREventNr(const int event, const unsigned coreID) const + { + assert (coreID < topology.size()); + if (hybrid) + { + switch (cpu_model) + { + case ADL: + if (topology[coreID].core_type == TopologyEntry::Atom) + { + return std::make_pair(OFFCORE_RESPONSE_0_EVTNR, event + 1); + } + break; + } + } + bool useGLCOCREvent = false; + switch (cpu_model) + { + case ADL: // ADL big core (GLC) + useGLCOCREvent = true; + break; + } + switch (event) + { + case 0: + return std::make_pair(useGLCOCREvent ? GLC_OFFCORE_RESPONSE_0_EVTNR : OFFCORE_RESPONSE_0_EVTNR, OFFCORE_RESPONSE_0_UMASK); + case 1: + return std::make_pair(useGLCOCREvent ? GLC_OFFCORE_RESPONSE_1_EVTNR : OFFCORE_RESPONSE_1_EVTNR, OFFCORE_RESPONSE_1_UMASK); + } + assert (false && "wrong event nr in getOCREventNr"); + return std::make_pair(0U, 0U); + } + //! \brief Freezes uncore event counting (works only on microarchitecture codename SandyBridge-EP and IvyTown) void freezeServerUncoreCounters(); @@ -1317,6 +1353,10 @@ class PCM_API PCM */ int32 getMaxCustomCoreEvents(); + /*! \brief Returns cpu model id number from cpuid instruction + */ + static int getCPUModelFromCPUID(); + //! \brief Identifiers of supported CPU models enum SupportedCPUModels { @@ -1741,6 +1781,11 @@ class PCM_API PCM //! \param IIOStack id of the IIO stack to program (-1 for all, if parameter omitted) void programIIOCounters(uint64 rawEvents[4], int IIOStack = -1); + //! \brief Program uncore IRP events + //! \param rawEvents events to program (raw format) + //! \param IIOStack id of the IIO stack to program (-1 for all, if parameter omitted) + void programIRPCounters(uint64 rawEvents[4], int IIOStack = -1); + //! \brief Get the state of IIO counter //! \param socket socket of the IIO stack //! \param IIOStack id of the IIO stack @@ -2440,7 +2485,6 @@ uint64 getCBOCounter(uint32 cbo, uint32 counter, const CounterStateType& before, /*! \brief Direct read of UBOX PMU counter (counter meaning depends on the programming: power/performance/etc) \param counter counter number - \param cbo cbo or cha number \param before CPU counter state before the experiment \param after CPU counter state after the experiment */ @@ -2452,7 +2496,7 @@ uint64 getUBOXCounter(uint32 counter, const CounterStateType& before, const Coun /*! \brief Direct read of IIO PMU counter (counter meaning depends on the programming: power/performance/etc) \param counter counter number - \param cbo IIO stack number + \param stack IIO stack number \param before CPU counter state before the experiment \param after CPU counter state after the experiment */ @@ -2462,6 +2506,18 @@ uint64 getIIOCounter(uint32 stack, uint32 counter, const CounterStateType& befor return after.IIOCounter[stack][counter] - before.IIOCounter[stack][counter]; } +/*! \brief Direct read of IRP PMU counter (counter meaning depends on the programming: power/performance/etc) + \param counter counter number + \param stack IIO stack number + \param before CPU counter state before the experiment + \param after CPU counter state after the experiment +*/ +template +uint64 getIRPCounter(uint32 stack, uint32 counter, const CounterStateType& before, const CounterStateType& after) +{ + return after.IRPCounter[stack][counter] - before.IRPCounter[stack][counter]; +} + /*! \brief Direct read of UPI or QPI PMU counter (counter meaning depends on the programming: power/performance/etc) \param counter counter number \param port UPI/QPI port number @@ -2639,6 +2695,10 @@ class UncoreCounterState template friend uint64 getBytesWrittenToEDC(const CounterStateType & before, const CounterStateType & after); template + friend uint64 getGTRequestBytesFromMC(const CounterStateType & before, const CounterStateType & after); + template + friend uint64 getIARequestBytesFromMC(const CounterStateType & before, const CounterStateType & after); + template friend uint64 getIORequestBytesFromMC(const CounterStateType & before, const CounterStateType & after); template friend uint64 getConsumedEnergy(const CounterStateType & before, const CounterStateType & after); @@ -2668,6 +2728,8 @@ class UncoreCounterState uint64 UncPMMReads; uint64 UncEDCFullWrites; uint64 UncEDCNormalReads; + uint64 UncMCGTRequests; + uint64 UncMCIARequests; uint64 UncMCIORequests; uint64 PackageEnergyStatus; uint64 DRAMEnergyStatus; @@ -2687,6 +2749,8 @@ class UncoreCounterState UncPMMReads(0), UncEDCFullWrites(0), UncEDCNormalReads(0), + UncMCGTRequests(0), + UncMCIARequests(0), UncMCIORequests(0), PackageEnergyStatus(0), DRAMEnergyStatus(0), @@ -2712,6 +2776,8 @@ class UncoreCounterState UncPMMWrites += o.UncPMMWrites; UncEDCFullWrites += o.UncEDCFullWrites; UncEDCNormalReads += o.UncEDCNormalReads; + UncMCGTRequests += o.UncMCGTRequests; + UncMCIARequests += o.UncMCIARequests; UncMCIORequests += o.UncMCIORequests; PackageEnergyStatus += o.PackageEnergyStatus; DRAMEnergyStatus += o.DRAMEnergyStatus; @@ -2756,6 +2822,7 @@ class ServerUncoreCounterState : public UncoreCounterState std::array, maxXPILinks> M3UPICounter; std::array, maxCBOs> CBOCounter; std::array, maxIIOStacks> IIOCounter; + std::array, maxIIOStacks> IRPCounter; std::array UBOXCounter; std::array DRAMClocks; std::array MCDRAMClocks; @@ -2782,6 +2849,8 @@ class ServerUncoreCounterState : public UncoreCounterState template friend uint64 getIIOCounter(uint32 stack, uint32 counter, const CounterStateType& before, const CounterStateType& after); template + friend uint64 getIRPCounter(uint32 stack, uint32 counter, const CounterStateType& before, const CounterStateType& after); + template friend uint64 getXPICounter(uint32 port, uint32 counter, const CounterStateType& before, const CounterStateType& after); template friend uint64 getM2MCounter(uint32 controller, uint32 counter, const CounterStateType & before, const CounterStateType & after); @@ -2808,6 +2877,7 @@ class ServerUncoreCounterState : public UncoreCounterState M3UPICounter{{}}, CBOCounter{{}}, IIOCounter{{}}, + IRPCounter{{}}, UBOXCounter{{}}, DRAMClocks{{}}, MCDRAMClocks{{}}, @@ -3581,6 +3651,33 @@ uint64 getBytesWrittenToEDC(const CounterStateType & before, const CounterStateT return 0ULL; } +/*! \brief Computes number of bytes of read/write requests from GT engine + + \param before CPU counter state before the experiment + \param after CPU counter state after the experiment + \return Number of bytes +*/ +template +uint64 getGTRequestBytesFromMC(const CounterStateType & before, const CounterStateType & after) +{ + if (PCM::getInstance()->memoryIOTrafficMetricAvailable()) + return (after.UncMCGTRequests - before.UncMCGTRequests) * 64; + return 0ULL; +} + +/*! \brief Computes number of bytes of read/write requests from all IA + + \param before CPU counter state before the experiment + \param after CPU counter state after the experiment + \return Number of bytes +*/ +template +uint64 getIARequestBytesFromMC(const CounterStateType & before, const CounterStateType & after) +{ + if (PCM::getInstance()->memoryIOTrafficMetricAvailable()) + return (after.UncMCIARequests - before.UncMCIARequests) * 64; + return 0ULL; +} /*! \brief Computes number of bytes of read/write requests from all IO sources diff --git a/pcm-all.sln b/pcm-all.sln index 00a3b8c5..0676b559 100644 --- a/pcm-all.sln +++ b/pcm-all.sln @@ -3,7 +3,7 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 14 VisualStudioVersion = 14.0.25420.1 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PCM-Lib", "PCM-Lib_Win\PCM-Lib.vcxproj", "{A4206CE7-A913-42ED-B3B9-F7CF5076633B}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PCM-Lib", "PCM-Lib_Win\pcm-lib.vcxproj", "{A4206CE7-A913-42ED-B3B9-F7CF5076633B}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pcm", "PCM_Win\pcm.vcxproj", "{D919CF99-5D9F-46C9-B6F0-626700E63592}" EndProject diff --git a/pcm-iio.cpp b/pcm-iio.cpp index 661ccf11..591fb9c5 100644 --- a/pcm-iio.cpp +++ b/pcm-iio.cpp @@ -423,7 +423,7 @@ void PurleyPlatformMapping::getUboxBusNumbers(std::vector& ubox) for (uint8_t device = 0; device < 32; device++) { for (uint8_t function = 0; function < 8; function++) { struct pci pci_dev; - pci_dev.bdf.busno = bus; + pci_dev.bdf.busno = (uint8_t)bus; pci_dev.bdf.devno = device; pci_dev.bdf.funcno = function; if (probe_pci(&pci_dev)) { @@ -513,7 +513,7 @@ bool IPlatformMapping10Nm::getSadIdRootBusMap(uint32_t socket_id, std::map pccr(get_ccr(m, ctr.ccr)); rawEvents[ctr.idx] = pccr->get_ccr_value(); - int stacks_count = iios[0].stacks.size(); + int stacks_count = (int)iios[0].stacks.size(); before = new IIOCounterState[iios.size() * stacks_count]; after = new IIOCounterState[iios.size() * stacks_count]; diff --git a/pcm-memory.cpp b/pcm-memory.cpp index 4d398d0f..5d89f76b 100644 --- a/pcm-memory.cpp +++ b/pcm-memory.cpp @@ -161,7 +161,36 @@ void printSocketRankBWHeader(uint32 no_columns, uint32 skt) cout << "\n"; } -void printSocketChannelBW(PCM */*m*/, memdata_t *md, uint32 no_columns, uint32 skt) +void printSocketRankBWHeader_cvt(const uint32 numSockets, const uint32 num_imc_channels, const int rankA, const int rankB) +{ + printDateForCSV(Header1); + for (uint32 skt = 0 ; skt < (numSockets) ; ++skt) { + for (uint32 channel = 0; channel < num_imc_channels; ++channel) { + if (rankA >= 0) + cout << "SKT" << skt << "," << "SKT" << skt << ","; + if (rankB >= 0) + cout << "SKT" << skt << "," << "SKT" << skt << ","; + } + } + cout << endl; + + printDateForCSV(Header2); + for (uint32 skt = 0 ; skt < (numSockets) ; ++skt) { + for (uint32 channel = 0; channel < num_imc_channels; ++channel) { + if (rankA >= 0) { + cout << "Mem_Ch" << channel << "_R" << rankA << "_reads," + << "Mem_Ch" << channel << "_R" << setw(1) << rankA << "_writes,"; + } + if (rankB >= 0) { + cout << "Mem_Ch" << channel << "_R" << rankB << "_reads," + << "Mem_Ch" << channel << "_R" << setw(1) << rankB << "_writes,"; + } + } + } + cout << endl; +} + +void printSocketChannelBW(PCM * /*m*/, memdata_t *md, uint32 no_columns, uint32 skt) { for (uint32 channel = 0; channel < max_imc_channels; ++channel) { // check all the sockets for bad channel "channel" @@ -221,6 +250,25 @@ void printSocketChannelBW(uint32 no_columns, uint32 skt, uint32 num_imc_channels } } +void printSocketChannelBW_cvt(const uint32 numSockets, const uint32 num_imc_channels, const ServerUncoreCounterState * uncState1, + const ServerUncoreCounterState * uncState2, const uint64 elapsedTime, const int rankA, const int rankB) +{ + printDateForCSV(Data); + for (uint32 skt = 0 ; skt < numSockets; ++skt) { + for (uint32 channel = 0 ; channel < num_imc_channels ; ++channel) { + if(rankA >= 0) { + cout << (float) (getMCCounter(channel,ServerPCICFGUncore::EventPosition::READ_RANK_A,uncState1[skt],uncState2[skt]) * 64 / 1000000.0 / (elapsedTime/1000.0)) + << "," << (float) (getMCCounter(channel,ServerPCICFGUncore::EventPosition::WRITE_RANK_A,uncState1[skt],uncState2[skt]) * 64 / 1000000.0 / (elapsedTime/1000.0)) << ","; + } + if(rankB >= 0) { + cout << (float) (getMCCounter(channel,ServerPCICFGUncore::EventPosition::READ_RANK_B,uncState1[skt],uncState2[skt]) * 64 / 1000000.0 / (elapsedTime/1000.0)) + << "," << (float) (getMCCounter(channel,ServerPCICFGUncore::EventPosition::WRITE_RANK_B,uncState1[skt],uncState2[skt]) * 64 / 1000000.0 / (elapsedTime/1000.0)) << ","; + } + } + } + cout << endl; +} + float AD_BW(const memdata_t *md, const uint32 skt) { const auto totalPMM = md->iMC_PMM_Rd_socket[skt] + md->iMC_PMM_Wr_socket[skt]; @@ -812,8 +860,8 @@ void calculate_bandwidth(PCM *m, } else if (metrics == PmemMemoryMode) { - md.iMC_PMM_MemoryMode_Miss_socket[skt] += (pmmMemoryModeCleanMisses + pmmMemoryModeDirtyMisses) / (elapsedTime / 1000.0); - md.iMC_PMM_MemoryMode_Hit_socket[skt] += (pmmMemoryModeHits) / (elapsedTime / 1000.0); + md.iMC_PMM_MemoryMode_Miss_socket[skt] += (float)((pmmMemoryModeCleanMisses + pmmMemoryModeDirtyMisses) / (elapsedTime / 1000.0)); + md.iMC_PMM_MemoryMode_Hit_socket[skt] += (float)((pmmMemoryModeHits) / (elapsedTime / 1000.0)); } else { @@ -881,33 +929,35 @@ void calculate_bandwidth(PCM *m, } } -void calculate_bandwidth_rank(PCM *m, const ServerUncoreCounterState uncState1[], const ServerUncoreCounterState uncState2[], const uint64 elapsedTime, const bool /*csv*/, bool & /*csvheader*/, const uint32 no_columns, const int rankA, const int rankB) +void calculate_bandwidth_rank(PCM *m, const ServerUncoreCounterState uncState1[], const ServerUncoreCounterState uncState2[], + const uint64 elapsedTime, const bool csv, bool &csvheader, const uint32 no_columns, const int rankA, const int rankB) { uint32 skt = 0; cout.setf(ios::fixed); cout.precision(2); uint32 numSockets = m->getNumSockets(); - while(skt < numSockets) - { - auto printRow = [&skt, &uncState1, &uncState2, &elapsedTime, &rankA, &rankB](const uint32 no_columns) { - printSocketRankBWHeader(no_columns, skt); - printSocketChannelBW(no_columns, skt, max_imc_channels, uncState1, uncState2, elapsedTime, rankA, rankB); - for (uint32 i = skt; i < (no_columns + skt); ++i) - { - cout << "|-------------------------------------------|"; - } - cout << "\n"; - skt += no_columns; - }; - // Full row - if ((skt + no_columns) <= numSockets) - { - printRow(no_columns); + if (csv) { + if (csvheader) { + printSocketRankBWHeader_cvt(numSockets, max_imc_channels, rankA, rankB); + csvheader = false; } - else //Display the remaining sockets in this row - { - printRow(numSockets - skt); + printSocketChannelBW_cvt(numSockets, max_imc_channels, uncState1, uncState2, elapsedTime, rankA, rankB); + } else { + while(skt < numSockets) { + auto printRow = [&skt, &uncState1, &uncState2, &elapsedTime, &rankA, &rankB](const uint32 no_columns) { + printSocketRankBWHeader(no_columns, skt); + printSocketChannelBW(no_columns, skt, max_imc_channels, uncState1, uncState2, elapsedTime, rankA, rankB); + for (uint32 i = skt; i < (no_columns + skt); ++i) + cout << "|-------------------------------------------|"; + cout << "\n"; + skt += no_columns; + }; + // Full row + if ((skt + no_columns) <= numSockets) + printRow(no_columns); + else //Display the remaining sockets in this row + printRow(numSockets - skt); } } } @@ -1010,6 +1060,7 @@ int main(int argc, char * argv[]) } if(rankA < 0) rankA = rank; else if(rankB < 0) rankB = rank; + metrics = PartialWrites; } } continue; @@ -1140,7 +1191,7 @@ int main(int argc, char * argv[]) exit(EXIT_FAILURE); } - max_imc_channels = m->getMCChannelsPerSocket(); + max_imc_channels = (pcm::uint32)m->getMCChannelsPerSocket(); ServerUncoreCounterState * BeforeState = new ServerUncoreCounterState[m->getNumSockets()]; ServerUncoreCounterState * AfterState = new ServerUncoreCounterState[m->getNumSockets()]; @@ -1165,6 +1216,9 @@ int main(int argc, char * argv[]) cerr << "Update every " << delay << " seconds\n"; + if (csv) + cerr << "Read/Write values expressed in (MB/s)" << endl; + for(uint32 i=0; igetNumSockets(); ++i) BeforeState[i] = m->getServerUncoreCounterState(i); diff --git a/pcm-numa.cpp b/pcm-numa.cpp index 811fd740..69538e5f 100644 --- a/pcm-numa.cpp +++ b/pcm-numa.cpp @@ -193,10 +193,10 @@ int main(int argc, char * argv[]) for (int i = 0; i < 4; ++i) regs[i] = def_event_select_reg; - regs[0].fields.event_select = OFFCORE_RESPONSE_0_EVTNR; // OFFCORE_RESPONSE 0 event - regs[0].fields.umask = OFFCORE_RESPONSE_0_UMASK; - regs[1].fields.event_select = OFFCORE_RESPONSE_1_EVTNR; // OFFCORE_RESPONSE 1 event - regs[1].fields.umask = OFFCORE_RESPONSE_1_UMASK; + regs[0].fields.event_select = m->getOCREventNr(0, 0).first; // OFFCORE_RESPONSE 0 event + regs[0].fields.umask = m->getOCREventNr(0, 0).second; + regs[1].fields.event_select = m->getOCREventNr(1, 0).first; // OFFCORE_RESPONSE 1 event + regs[1].fields.umask = m->getOCREventNr(1, 0).second; PCM::ErrorCode status = m->program(PCM::EXT_CUSTOM_CORE_EVENTS, &conf); m->checkError(status); diff --git a/pcm-pcicfg.cpp b/pcm-pcicfg.cpp index 5a8c0916..ff8f5703 100644 --- a/pcm-pcicfg.cpp +++ b/pcm-pcicfg.cpp @@ -61,7 +61,7 @@ int main(int argc, char * argv[]) { case 'w': write = true; - value = read_number(optarg); + value = (pcm::uint32)read_number(optarg); break; case 'd': dec = true; diff --git a/pcm-pcie.h b/pcm-pcie.h index 00da6923..a70c9d9e 100644 --- a/pcm-pcie.h +++ b/pcm-pcie.h @@ -46,7 +46,7 @@ static uint getIdent (const string &s) * We are adding "| " before and " " after the event name hence +5 to * strlen(eventNames). Rest of the logic is to center the event name. */ - uint ident = 5 + s.size(); + uint ident = 5 + (uint)s.size(); return (3 + ident / 2); } @@ -140,7 +140,7 @@ class LegacyPlatform: public IPlatform eventNames(events), eventGroups(eventCodes) { int eventsCount = 0; - for (auto &group : eventGroups) eventsCount += group.size(); + for (auto &group : eventGroups) eventsCount += (int)group.size(); m_delay = uint32(delay * 1000 / (eventGroups.size()) / NUM_SAMPLES); if (m_delay * eventsCount * NUM_SAMPLES < delay * 1000) ++m_delay; @@ -180,10 +180,10 @@ inline uint64 LegacyPlatform::getEventCount (uint skt, uint idx) uint LegacyPlatform::eventGroupOffset(eventGroup_t &eventGroup) { uint offset = 0; - uint grpIdx = &eventGroup - eventGroups.data(); + uint grpIdx = (uint)(&eventGroup - eventGroups.data()); for (auto iter = eventGroups.begin(); iter < eventGroups.begin() + grpIdx; iter++) - offset += iter->size(); + offset += (uint)iter->size(); return offset; } @@ -264,7 +264,7 @@ void LegacyPlatform::printSocketScopeEvent(uint skt, eventFilter filter, uint id void LegacyPlatform::printSocketScopeEvents(uint skt, eventFilter filter) { if (!m_csv) { - int ident = strlen("Skt |") / 2; + int ident = (int)strlen("Skt |") / 2; cout << setw(ident) << skt << setw(ident) << ' '; } else cout << skt; @@ -324,20 +324,20 @@ void LegacyPlatform::printAggregatedEvents() { if (!m_csv) { - uint len = strlen("Skt "); + uint len = (uint)strlen("Skt "); for(auto& evt : eventNames) - len += (5 + evt.size()); + len += (5 + (uint)evt.size()); if (m_bandwidth) for(auto& bw : bwNames) - len += (5 + bw.size()); + len += (5 + (uint)bw.size()); while (len--) cout << '-'; cout << "\n"; - int ident = strlen("Skt |") /2 ; + int ident = (int)strlen("Skt |") /2 ; cout << setw(ident) << "*" << setw(ident) << ' '; for (uint idx = 0; idx < eventNames.size(); ++idx) diff --git a/pcm-power.cpp b/pcm-power.cpp index 62ba0d3b..6b617876 100644 --- a/pcm-power.cpp +++ b/pcm-power.cpp @@ -333,7 +333,7 @@ int main(int argc, char * argv[]) m->setBlocked(false); } - if (((delay < 1.0) && (delay > 0.0)) || (delay <= 0.0)) delay = PCM_DELAY_DEFAULT; + if (delay <= 0.0) delay = PCM_DELAY_DEFAULT; uint32 i = 0; diff --git a/pcm-raw.cpp b/pcm-raw.cpp index 1a1b51f2..d24b0ccc 100644 --- a/pcm-raw.cpp +++ b/pcm-raw.cpp @@ -72,6 +72,8 @@ void print_usage(const string progname) cerr << " -f | /f => enforce flushing each line for interactive output\n"; cerr << " -i[=number] | /i[=number] => allow to determine number of iterations\n"; cerr << " -tr | /tr => transpose output (print single event data in a row)\n"; + cerr << " -l => use locale for printing values, calls -tab for readability\n"; + cerr << " -tab => replace default comma separator with tab\n"; cerr << " -el event_list.txt | /el event_list.txt => read event list from event_list.txt file, \n"; cerr << " each line represents an event,\n"; cerr << " event groups are separated by a semicolon\n"; @@ -174,7 +176,7 @@ bool initPMUEventMap() assert(EventTypetPos >= 0); const std::string ourFMS = PCM::getInstance()->getCPUFamilyModelString(); // cout << "Our FMS: " << ourFMS << "\n"; - std::map eventFiles; + std::multimap eventFiles; cout << "Matched event files:\n"; while (std::getline(in, line)) { @@ -187,14 +189,14 @@ bool initPMUEventMap() if (std::regex_search(ourFMS.c_str(), FMSMatch, FMSRegex)) { cout << tokens[FMSPos] << " " << tokens[EventTypetPos] << " " << tokens[FilenamePos] << "\n"; - eventFiles[tokens[EventTypetPos]] = tokens[FilenamePos]; + eventFiles.insert(std::make_pair(tokens[EventTypetPos], tokens[FilenamePos])); } } in.close(); if (eventFiles.empty()) { - cerr << "ERROR: CPU " << ourFMS << "not found in " << mapfile << "\n"; + cerr << "ERROR: CPU " << ourFMS << " not found in " << mapfile << "\n"; return false; } @@ -734,11 +736,12 @@ bool show_partial_core_output = false; bitset ycores; bool flushLine = false; bool transpose = false; +std::string sep = ","; void printRowBegin(const std::string & EventName, const CoreCounterState & BeforeState, const CoreCounterState & AfterState, PCM* m) { printDateForCSV(CsvOutputType::Data); - cout << EventName << "," << (1000ULL * getInvariantTSC(BeforeState, AfterState)) / m->getNominalFrequency() << "," << getInvariantTSC(BeforeState, AfterState); + cout << EventName << sep << (1000ULL * getInvariantTSC(BeforeState, AfterState)) / m->getNominalFrequency() << sep << getInvariantTSC(BeforeState, AfterState); } @@ -750,7 +753,7 @@ void printRow(const std::string & EventName, MetricFunc metricFunc, const std::v { if (!(m->isCoreOnline(core) == false || (show_partial_core_output && ycores.test(core) == false))) { - cout << "," << metricFunc(BeforeState[core], AfterState[core]); + cout << sep << metricFunc(BeforeState[core], AfterState[core]); } } cout << "\n"; @@ -788,7 +791,7 @@ void printTransposed(const PCM::RawPMUConfigs& curPMUConfigs, PCM* m, vectorgetMaxNumOfCBoxes()); } + else if (type == "irp") + { + printUncoreRows([](const uint32 u, const uint32 i, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getIRPCounter(u, i, before, after); }, (uint32)m->getMaxNumOfIIOStacks()); + } else if (type == "iio") { printUncoreRows([](const uint32 u, const uint32 i, const ServerUncoreCounterState& before, const ServerUncoreCounterState& after) { return getIIOCounter(u, i, before, after); }, (uint32)m->getMaxNumOfIIOStacks()); @@ -904,9 +911,9 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, PCM* m, vector 0 && AfterState.size() > 0) { choose(outputType, - []() { cout << ","; }, + []() { cout << sep; }, []() { cout << "ms,"; }, - [&]() { cout << (1000ULL * getInvariantTSC(BeforeState[0], AfterState[0])) / m->getNominalFrequency() << ","; }); + [&]() { cout << (1000ULL * getInvariantTSC(BeforeState[0], AfterState[0])) / m->getNominalFrequency() << sep; }); } for (auto typeEvents : curPMUConfigs) { @@ -937,9 +944,9 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, PCM* m, vectorgetSocketId(core) << "CORE" << core << ","; }, - [&metric]() { cout << metric << ","; }, - [&value]() { cout << value << ","; }); + [m, core]() { cout << "SKT" << m->getSocketId(core) << "CORE" << core << sep; }, + [&metric]() { cout << metric << sep; }, + [&value]() { cout << value << sep; }); }; for (uint32 cnt = 0; cnt < 4; ++cnt) { @@ -960,9 +967,9 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, PCM* m, vectorgetSocketId(core) << "CORE" << core << ","; }, - [&event, &i]() { if (event.second.empty()) cout << "COREEvent" << i << ","; else cout << event.second << ","; }, - [&]() { cout << getNumberOfCustomEvents(i, BeforeState[core], AfterState[core]) << ","; }); + [m, core]() { cout << "SKT" << m->getSocketId(core) << "CORE" << core << sep; }, + [&event, &i]() { if (event.second.empty()) cout << "COREEvent" << i << sep; else cout << event.second << sep; }, + [&]() { cout << getNumberOfCustomEvents(i, BeforeState[core], AfterState[core]) << sep; }); ++i; } } @@ -977,9 +984,9 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, PCM* m, vectorgetNumSockets(); ++s) + { + for (uint32 stack = 0; stack < m->getMaxNumOfIIOStacks(); ++stack) + { + int i = 0; + for (auto event : events) + { + choose(outputType, + [s, stack]() { cout << "SKT" << s << "IRP" << stack << sep; }, + [&event, &i]() { if (event.second.empty()) cout << "IRPEvent" << i << sep; else cout << event.second << sep; }, + [&]() { cout << getIRPCounter(stack, i, BeforeUncoreState[s], AfterUncoreState[s]) << sep; }); ++i; } } @@ -1111,9 +1136,9 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, PCM* m, vectorPMMTrafficMetricsAvailable()) cout << " PMM WR : bytes written to PMM memory (in GBytes)\n"; if (m->MCDRAMmemoryTrafficMetricsAvailable()) cout << " MCDRAM READ : bytes read from MCDRAM controller (in GBytes)\n"; if (m->MCDRAMmemoryTrafficMetricsAvailable()) cout << " MCDRAM WRITE : bytes written to MCDRAM controller (in GBytes)\n"; - if (m->memoryIOTrafficMetricAvailable()) cout << " IO : bytes read/written due to IO requests to memory controller (in GBytes); this may be an over estimate due to same-cache-line partial requests\n"; + if (m->memoryIOTrafficMetricAvailable()) { + cout << " IO : bytes read/written due to IO requests to memory controller (in GBytes); this may be an over estimate due to same-cache-line partial requests\n"; + cout << " IA : bytes read/written due to IA requests to memory controller (in GBytes); this may be an over estimate due to same-cache-line partial requests\n"; + cout << " GT : bytes read/written due to GT requests to memory controller (in GBytes); this may be an over estimate due to same-cache-line partial requests\n"; + } if (m->L3CacheOccupancyMetricAvailable()) cout << " L3OCC : L3 occupancy (in KBytes)\n"; if (m->CoreLocalMemoryBWMetricAvailable()) cout << " LMB : L3 cache external bandwidth satisfied by local memory (in MBytes)\n"; if (m->CoreRemoteMemoryBWMetricAvailable()) cout << " RMB : L3 cache external bandwidth satisfied by remote memory (in MBytes)\n"; @@ -433,6 +437,10 @@ void print_output(PCM * m, cout << " MCDRAM READ | MCDRAM WRITE |"; if (m->memoryIOTrafficMetricAvailable()) cout << " IO |"; + if (m->memoryIOTrafficMetricAvailable()) + cout << " IA |"; + if (m->memoryIOTrafficMetricAvailable()) + cout << " GT |"; if (m->packageEnergyMetricsAvailable()) cout << " CPU energy |"; if (m->dramEnergyMetricsAvailable()) @@ -459,6 +467,10 @@ void print_output(PCM * m, " " << setw(11) << getBytesWrittenToEDC(sktstate1[i], sktstate2[i]) / double(1e9); if (m->memoryIOTrafficMetricAvailable()) cout << " " << setw(5) << getIORequestBytesFromMC(sktstate1[i], sktstate2[i]) / double(1e9); + if (m->memoryIOTrafficMetricAvailable()) + cout << " " << setw(5) << getIARequestBytesFromMC(sktstate1[i], sktstate2[i]) / double(1e9); + if (m->memoryIOTrafficMetricAvailable()) + cout << " " << setw(5) << getGTRequestBytesFromMC(sktstate1[i], sktstate2[i]) / double(1e9); cout << " "; if(m->packageEnergyMetricsAvailable()) { cout << setw(6) << getConsumedJoules(sktstate1[i], sktstate2[i]); diff --git a/simdjson_wrapper.h b/simdjson_wrapper.h index b6e76880..03a131e2 100644 --- a/simdjson_wrapper.h +++ b/simdjson_wrapper.h @@ -17,5 +17,9 @@ #pragma message("parsing events from 01.org/perfmon won't be supported because simdjson library is not found in simdjson/singleheader/simdjson.h") #pragma message("run 'git clone https://github.com/simdjson/simdjson.git' to get simdjson library") #endif + #else + #pragma message("The compiler is too old, it does not support '__has_include' directive and other c++ features required for simdjson library. Parsing events from 01.org/perfmon won't be supported.") #endif +#else + #pragma message("The compiler is too old (g++ 6 or below). Parsing events from 01.org/perfmon won't be supported.") #endif diff --git a/test.sh b/test.sh new file mode 100644 index 00000000..70657b5d --- /dev/null +++ b/test.sh @@ -0,0 +1,100 @@ +modprobe msr + +./pcm.x -r -- sleep 1 +if [ "$?" -ne "0" ]; then + echo "Error in pcm.x" + exit 1 +fi + +./pcm-memory.x -- sleep 1 +if [ "$?" -ne "0" ]; then + echo "Error in pcm-memory.x" + exit 1 +fi + +./pcm-memory.x -rank=1 -- sleep 1 +if [ "$?" -ne "0" ]; then + echo "Error in pcm-memory.x" + exit 1 +fi + +./pcm-memory.x -rank=1 -csv -- sleep 1 +if [ "$?" -ne "0" ]; then + echo "Error in pcm-memory.x" + exit 1 +fi + +./pcm-raw.x -e core/config=0x30203,name=LD_BLOCKS.STORE_FORWARD/ -e cha/config=0,name=UNC_CHA_CLOCKTICKS/ -e imc/fixed,name=DRAM_CLOCKS -- sleep 1 +if [ "$?" -ne "0" ]; then + echo "Error in pcm-raw.x" + exit 1 +fi + +./pcm-mmio.x 0x0 +if [ "$?" -ne "0" ]; then + echo "Error in pcm-mmio.x" + exit 1 +fi + +./pcm-pcicfg.x 0 0 0 0 0 +if [ "$?" -ne "0" ]; then + echo "Error in pcm-pcicfg.x" + exit 1 +fi + +./pcm-numa.x -- sleep 1 +if [ "$?" -ne "0" ]; then + echo "Error in pcm-numa.x" + exit 1 +fi + +./pcm-core.x -e cpu/umask=0x01,event=0x0e,name=UOPS_ISSUED.STALL_CYCLES/ -- sleep 1 +if [ "$?" -ne "0" ]; then + echo "Error in pcm-core.x" + exit 1 +fi + +./c_example.x +if [ "$?" -ne "0" ]; then + echo "Error in c_example.x" + exit 1 +fi + +./c_example_shlib.x +if [ "$?" -ne "0" ]; then + echo "Error in c_example_shlib.x" + exit 1 +fi + +./pcm-msr.x -a 0x30A +if [ "$?" -ne "0" ]; then + echo "Error in pcm-msr.x" + exit 1 +fi + +./pcm-power.x -- sleep 1 +if [ "$?" -ne "0" ]; then + echo "Error in pcm-power.x" + exit 1 +fi + +./pcm-pcie.x -- sleep 1 +if [ "$?" -ne "0" ]; then + echo "Error in pcm-pcie.x" + exit 1 +fi + +./pcm-latency.x -i=1 +if [ "$?" -ne "0" ]; then + echo "Error in pcm-latency.x" + exit 1 +fi + +./pcm-tsx.x -- sleep 1 +if [ "$?" -ne "0" ]; then + echo "Error in pcm-tsx.x" + exit 1 +fi + +# TODO add more tests +# e.g for ./pcm-sensor-server.x, ./pcm-iio.x, ./pcm-sensor.x, ... diff --git a/types.h b/types.h index 05b8612a..cf33b41c 100644 --- a/types.h +++ b/types.h @@ -162,6 +162,8 @@ constexpr auto MSR_FRONTEND = 0x3F7; // Offcore response events #define OFFCORE_RESPONSE_0_EVTNR (0xB7) #define OFFCORE_RESPONSE_1_EVTNR (0xBB) +#define GLC_OFFCORE_RESPONSE_0_EVTNR (0x2A) +#define GLC_OFFCORE_RESPONSE_1_EVTNR (0x2B) #define OFFCORE_RESPONSE_0_UMASK (1) #define OFFCORE_RESPONSE_1_UMASK (1) @@ -1080,6 +1082,42 @@ static const uint32 ICX_IIO_UNIT_CTL[] = { 0x0A50, 0x0A70, 0x0A90, 0x0AE0, 0x0B00, 0x0B20 }; +static const uint32 ICX_IRP_UNIT_CTL[] = { + 0x0A4A, + 0x0A6A, + 0x0A8A, + 0x0ADA, + 0x0AFA, + 0x0B1A +}; + +#define ICX_IRP_CTL_REG_OFFSET (0x0003) +#define ICX_IRP_CTR_REG_OFFSET (0x0001) + + +static const uint32 SNR_IRP_UNIT_CTL[] = { + 0x1EA0, + 0x1EB0, + 0x1EC0, + 0x1ED0, + 0x1EE0 +}; + +#define SNR_IRP_CTL_REG_OFFSET (0x0008) +#define SNR_IRP_CTR_REG_OFFSET (0x0001) + +static const uint32 SKX_IRP_UNIT_CTL[] = { + 0x0A58, + 0x0A78, + 0x0A98, + 0x0AB8, + 0x0AD8, + 0x0AF8 +}; + +#define SKX_IRP_CTL_REG_OFFSET (0x0003) +#define SKX_IRP_CTR_REG_OFFSET (0x0001) + #define SNR_IIO_CBDMA_UNIT_STATUS (0x1E07) #define SNR_IIO_CBDMA_UNIT_CTL (0x1E00) #define SNR_IIO_CBDMA_CTR0 (0x1E01) diff --git a/width_extender.h b/width_extender.h index dc0f0ffb..93187c8e 100644 --- a/width_extender.h +++ b/width_extender.h @@ -69,6 +69,8 @@ class CounterWidthExtender typedef ClientImcCounter<&FreeRunningBWCounters::getImcReads> ClientImcReadsCounter; typedef ClientImcCounter<&FreeRunningBWCounters::getImcWrites> ClientImcWritesCounter; + typedef ClientImcCounter<&FreeRunningBWCounters::getGtRequests> ClientGtRequestsCounter; + typedef ClientImcCounter<&FreeRunningBWCounters::getIaRequests> ClientIaRequestsCounter; typedef ClientImcCounter<&FreeRunningBWCounters::getIoRequests> ClientIoRequestsCounter; struct MBLCounter : public AbstractRawCounter