From a3e671a84e7fb91ac4396da2d0726fec0fcc5849 Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Mon, 12 Aug 2024 12:00:52 +0200 Subject: [PATCH 01/23] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7fdb4c83..1a7ebf5b 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ Building PCM Tools Clone PCM repository with submodules: ``` -git clone --recursive https://github.com/intel/pcm.git +git clone --recursive https://github.com/intel/pcm ``` or clone the repository first, and then update submodules with: From 68f3d928f1bf49be498e6303c453dc9b5ab28c68 Mon Sep 17 00:00:00 2001 From: "Bruggeman, Otto G" Date: Tue, 13 Aug 2024 11:42:48 +0200 Subject: [PATCH 02/23] Fix public github #810 --- src/lspci.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/lspci.h b/src/lspci.h index 0cf09134..af81886b 100644 --- a/src/lspci.h +++ b/src/lspci.h @@ -17,7 +17,9 @@ " https://raw.githubusercontent.com/pciutils/pciids/master/pci.ids and" \ " copy it to the current directory." #else +// different distributions put it in different places #define PCI_IDS_PATH "/usr/share/hwdata/pci.ids" +#define PCI_IDS_PATH2 "/usr/share/misc/pci.ids" #define PCI_IDS_NOT_FOUND "/usr/share/hwdata/pci.ids file is not available." \ " Ensure that the \"hwdata\" package is properly installed or download" \ " https://raw.githubusercontent.com/pciutils/pciids/master/pci.ids and" \ @@ -448,6 +450,12 @@ void load_PCIDB(PCIDB & pciDB) if (!in.is_open()) { #ifndef _MSC_VER + // On Unix, try PCI_IDS_PATH2 + in.open(PCI_IDS_PATH2); + } + + if (!in.is_open()) + { // On Unix, try the current directory if the default path failed in.open("pci.ids"); } From e3960c01f045cd5ad331bf70173bf881eaeca657 Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Mon, 1 Jul 2024 19:16:10 +0200 Subject: [PATCH 03/23] pcm-power for SRF --- src/cpucounters.cpp | 59 ++++++++++++++++++++++++++++++++++++--------- src/pcm-power.cpp | 18 ++++++++++++-- 2 files changed, 64 insertions(+), 13 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 68abab4b..50399d51 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -5598,6 +5598,7 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof { case SPR: case EMR: + case SRF: PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(1); // clock ticks break; default: @@ -5616,6 +5617,7 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof { case SPR: case EMR: + case SRF: PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x35); // POWER_STATE_OCCUPANCY.C0 PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x36); // POWER_STATE_OCCUPANCY.C3 PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x37); // POWER_STATE_OCCUPANCY.C6 @@ -5634,12 +5636,12 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof case 3: PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x04); // Thermal frequency limit cycles: FREQ_MAX_LIMIT_THERMAL_CYCLES PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x05); // Power frequency limit cycles: FREQ_MAX_POWER_CYCLES - PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR) + PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) break; case 4: // not supported on SKX, ICX, SNOWRIDGE, SPR, EMR PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x06); // OS frequency limit cycles: FREQ_MAX_OS_CYCLES PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x05); // Power frequency limit cycles: FREQ_MAX_POWER_CYCLES - PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX and ICX and SNOWRIDGE) + PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) break; case 5: if(JAKETOWN == cpu_model) @@ -5650,8 +5652,17 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof { PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x60) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x60) ; // cycles spent changing frequency: FREQ_TRANS_CYCLES - } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model - || ICX == cpu_model || SNOWRIDGE == cpu_model || SPR == cpu_model || EMR == cpu_model) + } else if ( + HASWELLX == cpu_model + || BDX_DE == cpu_model + || BDX == cpu_model + || SKX == cpu_model + || ICX == cpu_model + || SNOWRIDGE == cpu_model + || SPR == cpu_model + || EMR == cpu_model + || SRF == cpu_model + ) { PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x74) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x74) ; // cycles spent changing frequency: FREQ_TRANS_CYCLES @@ -5670,11 +5681,21 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof { PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x2B) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC2 transitions PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x2D) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC6 transitions - } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model || ICX == cpu_model || SNOWRIDGE == cpu_model || SPR == cpu_model || EMR == cpu_model) + } else if ( + HASWELLX == cpu_model + || BDX_DE == cpu_model + || BDX == cpu_model + || SKX == cpu_model + || ICX == cpu_model + || SNOWRIDGE == cpu_model + || SPR == cpu_model + || EMR == cpu_model + || SRF == cpu_model + ) { - PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x4E) ; // PC1e residenicies (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR) - PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x4E) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC1 transitions (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR) - PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x2B) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC2 transitions + PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x4E) ; // PC1e residenicies (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) + PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x4E) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC1 transitions (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) + PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x2B) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC2e transitions PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x2D) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC6 transitions } else { @@ -9073,9 +9094,25 @@ void ServerUncorePMUs::program_power_metrics(int mc_profile) case PCM::SNOWRIDGE: case PCM::SPR: case PCM::EMR: + case PCM::SRF: UNC_M_POWER_CKE_CYCLES = 0x47; break; } + unsigned int UNC_M_POWER_CHANNEL_PPD_CYCLES = 0x85; + switch (cpu_model) + { + case PCM::SRF: + UNC_M_POWER_CHANNEL_PPD_CYCLES = 0x88; + break; + } + unsigned int UNC_M_SELF_REFRESH_ENTER_SUCCESS_CYCLES_UMASK = 0; + switch (cpu_model) + { + case PCM::SRF: + UNC_M_SELF_REFRESH_ENTER_SUCCESS_CYCLES_UMASK = 0x01; + break; + } + switch(mc_profile) { case 0: // POWER_CKE_CYCLES.RANK0 and POWER_CKE_CYCLES.RANK1 @@ -9103,9 +9140,9 @@ void ServerUncorePMUs::program_power_metrics(int mc_profile) MCCntConfig[3] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(0x80) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET; break; case 4: // POWER_SELF_REFRESH - MCCntConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(0x43); - MCCntConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(0x43) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET; - MCCntConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(0x85); + MCCntConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(0x43) + MC_CH_PCI_PMON_CTL_UMASK(UNC_M_SELF_REFRESH_ENTER_SUCCESS_CYCLES_UMASK); + MCCntConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(0x43) + MC_CH_PCI_PMON_CTL_UMASK(UNC_M_SELF_REFRESH_ENTER_SUCCESS_CYCLES_UMASK) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET; + MCCntConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CHANNEL_PPD_CYCLES); break; } diff --git a/src/pcm-power.cpp b/src/pcm-power.cpp index 08a015d8..86923c26 100644 --- a/src/pcm-power.cpp +++ b/src/pcm-power.cpp @@ -474,13 +474,26 @@ int mainThrows(int argc, char * argv[]) printHeader(true); cout << "; Thermal freq limit cycles: " << getNormalizedPCUCounter(u, 1, BeforeState[socket], AfterState[socket]) * 100. << " %" << "; Power freq limit cycles:" << getNormalizedPCUCounter(u, 2, BeforeState[socket], AfterState[socket]) * 100. << " %"; - if(cpu_model != PCM::SKX && cpu_model != PCM::ICX && cpu_model != PCM::SNOWRIDGE && cpu_model != PCM::SPR && cpu_model != PCM::EMR) + if( + cpu_model != PCM::SKX + && cpu_model != PCM::ICX + && cpu_model != PCM::SNOWRIDGE + && cpu_model != PCM::SPR + && cpu_model != PCM::EMR + && cpu_model != PCM::SRF + ) cout << "; Clipped freq limit cycles:" << getNormalizedPCUCounter(u, 3, BeforeState[socket], AfterState[socket]) * 100. << " %"; cout << "\n"; break; case 4: - if (cpu_model == PCM::SKX || cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::SPR || cpu_model == PCM::EMR) + if ( cpu_model == PCM::SKX + || cpu_model == PCM::ICX + || cpu_model == PCM::SNOWRIDGE + || cpu_model == PCM::SPR + || cpu_model == PCM::EMR + || cpu_model == PCM::SRF + ) { cout << "This PCU profile is not supported on your processor\n"; break; @@ -517,6 +530,7 @@ int mainThrows(int argc, char * argv[]) case PCM::SNOWRIDGE: case PCM::SPR: case PCM::EMR: + case PCM::SRF: cout << "; PC2 residency: " << getPackageCStateResidency(2, BeforeState[socket], AfterState[socket]) * 100. << " %"; cout << "; PC2 transitions: " << getUncoreCounter(PCM::PCU_PMU_ID, u, 2, BeforeState[socket], AfterState[socket]) << " "; cout << "; PC3 residency: " << getPackageCStateResidency(3, BeforeState[socket], AfterState[socket]) * 100. << " %"; From 9be2dafba2080a759fff4f8303c63bdc70f6aa76 Mon Sep 17 00:00:00 2001 From: Thomas Willhalm Date: Thu, 22 Aug 2024 18:59:53 +0200 Subject: [PATCH 04/23] Added license to Dockerfile --- Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Dockerfile b/Dockerfile index 076eafda..906d37a6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,7 @@ FROM fedora:40@sha256:5ce8497aeea599bf6b54ab3979133923d82aaa4f6ca5ced1812611b197c79eb0 as builder +# Dockerfile for Intel PCM sensor server +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2020-2024 Intel Corporation RUN dnf -y install gcc-c++ git findutils make cmake openssl openssl-devel libasan libasan-static COPY . /tmp/pcm From 4f1eaf7f10bc9fe85931c568860437b6c9482d7d Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 10 Sep 2024 10:59:41 +0200 Subject: [PATCH 05/23] pcm-memory: replace NODE with SKT to avoid confusion with NUMA NODEs Change-Id: Ib474082f54f84a62b0d28b8cb07229502ca9fbf7 --- src/pcm-memory.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/pcm-memory.cpp b/src/pcm-memory.cpp index ba08e513..a283f5d2 100644 --- a/src/pcm-memory.cpp +++ b/src/pcm-memory.cpp @@ -355,21 +355,21 @@ float PMM_MM_Ratio(const memdata_t *md, const uint32 skt) void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t *md) { for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " Mem Read (MB/s) :" << setw(9) << md->iMC_Rd_socket[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " Mem Read (MB/s) :" << setw(9) << md->iMC_Rd_socket[i] << " --|"; } cout << "\n"; for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " Mem Write(MB/s) :" << setw(9) << md->iMC_Wr_socket[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " Mem Write(MB/s) :" << setw(9) << md->iMC_Wr_socket[i] << " --|"; } cout << "\n"; if (anyPmem(md->metrics)) { for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " PMM Read (MB/s): " << setw(8) << md->iMC_PMM_Rd_socket[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " PMM Read (MB/s): " << setw(8) << md->iMC_PMM_Rd_socket[i] << " --|"; } cout << "\n"; for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " PMM Write(MB/s): " << setw(8) << md->iMC_PMM_Wr_socket[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " PMM Write(MB/s): " << setw(8) << md->iMC_PMM_Wr_socket[i] << " --|"; } cout << "\n"; } @@ -377,17 +377,17 @@ void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t { for (uint32 i = skt; i < (skt + no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " PMM AD Bw(MB/s): " << setw(8) << AD_BW(md, i) << " --|"; + cout << "|-- SKT " << setw(2) << i << " PMM AD Bw(MB/s): " << setw(8) << AD_BW(md, i) << " --|"; } cout << "\n"; for (uint32 i = skt; i < (skt + no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " PMM MM Bw(MB/s): " << setw(8) << md->MemoryMode_Miss_socket[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " PMM MM Bw(MB/s): " << setw(8) << md->MemoryMode_Miss_socket[i] << " --|"; } cout << "\n"; for (uint32 i = skt; i < (skt + no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " PMM MM Bw/DRAM Bw:" << setw(8) << PMM_MM_Ratio(md, i) << " --|"; + cout << "|-- SKT " << setw(2) << i << " PMM MM Bw/DRAM Bw:" << setw(8) << PMM_MM_Ratio(md, i) << " --|"; } cout << "\n"; } @@ -396,7 +396,7 @@ void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t for (uint32 ctrl = 0; ctrl < max_imc_controllers; ++ctrl) { for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << "." << ctrl << " NM read hit rate :" << setw(6) << md->M2M_NM_read_hit_rate[i][ctrl] << " --|"; + cout << "|-- SKT " << setw(2) << i << "." << ctrl << " NM read hit rate :" << setw(6) << md->M2M_NM_read_hit_rate[i][ctrl] << " --|"; } cout << "\n"; } @@ -404,22 +404,22 @@ void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t if ((md->metrics == PmemMemoryMode && md->NM_hit_rate_supported) || md->BHS_NM == true) { for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " NM hit rate: " << setw(6) << md->NM_hit_rate[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " NM hit rate: " << setw(6) << md->NM_hit_rate[i] << " --|"; } cout << "\n"; for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " NM hits (M/s): " << setw(7) << (md->MemoryMode_Hit_socket[i])/1000000. << " --|"; + cout << "|-- SKT " << setw(2) << i << " NM hits (M/s): " << setw(7) << (md->MemoryMode_Hit_socket[i])/1000000. << " --|"; } cout << "\n"; for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " NM misses (M/s): " << setw(7) << (md->MemoryMode_Miss_socket[i])/1000000. << " --|"; + cout << "|-- SKT " << setw(2) << i << " NM misses (M/s): " << setw(7) << (md->MemoryMode_Miss_socket[i])/1000000. << " --|"; } cout << "\n"; } if (md->BHS_NM == true) { for (uint32 i = skt; i < (skt + no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " NM miss Bw(MB/s):" << setw(9) << (md->MemoryMode_Miss_socket[i] * 64. * 2.) / 1000000. << " --|"; + cout << "|-- SKT " << setw(2) << i << " NM miss Bw(MB/s):" << setw(9) << (md->MemoryMode_Miss_socket[i] * 64. * 2.) / 1000000. << " --|"; } cout << "\n"; } @@ -428,12 +428,12 @@ void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t ) { for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " P. Write (T/s): " << dec << setw(10) << md->partial_write[i] << " --|"; + cout << "|-- SKT " << setw(2) << i << " P. Write (T/s): " << dec << setw(10) << md->partial_write[i] << " --|"; } cout << "\n"; } for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " Memory (MB/s): " << setw(11) << right << (md->iMC_Rd_socket[i]+md->iMC_Wr_socket[i]+ + cout << "|-- SKT " << setw(2) << i << " Memory (MB/s): " << setw(11) << right << (md->iMC_Rd_socket[i]+md->iMC_Wr_socket[i]+ md->iMC_PMM_Rd_socket[i]+md->iMC_PMM_Wr_socket[i]) << " --|"; } cout << "\n"; From 992bd904c8affd80d3619141a352a80ab78a699e Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Thu, 8 Aug 2024 10:25:39 +0200 Subject: [PATCH 06/23] add perfmon event repository as submodule for convenience Change-Id: Icaa4159fb57da0597a8ff1692850645e54e46a88 --- .gitmodules | 3 +++ perfmon | 1 + 2 files changed, 4 insertions(+) create mode 160000 perfmon diff --git a/.gitmodules b/.gitmodules index f1908446..53b7abf1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "src/simdjson"] path = src/simdjson url = https://github.com/simdjson/simdjson.git +[submodule "perfmon"] + path = perfmon + url = https://github.com/intel/perfmon diff --git a/perfmon b/perfmon new file mode 160000 index 00000000..f8c51ca9 --- /dev/null +++ b/perfmon @@ -0,0 +1 @@ +Subproject commit f8c51ca9f611356a3deb0e1ab6c1404d7393d126 From 49c76a86548f61ad21dcd45713f355d062d6c28c Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Thu, 8 Aug 2024 11:10:44 +0200 Subject: [PATCH 07/23] pcm-raw: update instructions for perfmon event map/files Change-Id: Ib40edae09a6399cc764687afab476da6ce88c78d --- src/pcm-raw.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index 593b35eb..400e5139 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -222,7 +222,9 @@ bool initPMUEventMap() if (!in.is_open()) { cerr << "ERROR: File " << mapfilePath << " can't be open. \n"; - cerr << " Download it from https://raw.githubusercontent.com/intel/perfmon/main/" << mapfile << " \n"; + cerr << " Use -ep /perfmon option if you cloned PCM source repository recursively with submodules,\n"; + cerr << " or run 'git clone https://github.com/intel/perfmon' to download the perfmon event repository and use -ep option\n"; + cerr << " or download the file from https://raw.githubusercontent.com/intel/perfmon/main/" << mapfile << " \n"; return false; } int32 FMSPos = -1; From 471de17e0b8aaf800fff5c5a75d4d579856b20fe Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 11 Sep 2024 14:50:28 +0200 Subject: [PATCH 08/23] add CPM.cmake Change-Id: Ie48ae106573b46482f91238bafc3c23f30f2373f --- cmake/CPM.cmake | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 cmake/CPM.cmake diff --git a/cmake/CPM.cmake b/cmake/CPM.cmake new file mode 100644 index 00000000..baf2d8c3 --- /dev/null +++ b/cmake/CPM.cmake @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: MIT +# +# SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors + +set(CPM_DOWNLOAD_VERSION 0.40.2) +set(CPM_HASH_SUM "c8cdc32c03816538ce22781ed72964dc864b2a34a310d3b7104812a5ca2d835d") + +if(CPM_SOURCE_CACHE) + set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +elseif(DEFINED ENV{CPM_SOURCE_CACHE}) + set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +else() + set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +endif() + +# Expand relative path. This is important if the provided path contains a tilde (~) +get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE) + +file(DOWNLOAD + https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake + ${CPM_DOWNLOAD_LOCATION} EXPECTED_HASH SHA256=${CPM_HASH_SUM} +) + +include(${CPM_DOWNLOAD_LOCATION}) From 475f22e2d39daf7479009f70e29eda0d35965819 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 11 Sep 2024 17:43:25 +0200 Subject: [PATCH 09/23] specify a stable URL for WinPMem Change-Id: I8a5e543487673228bed6fcb9486022d8828e693e --- src/mmio.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mmio.cpp b/src/mmio.cpp index 2d4973c6..9e4c406c 100644 --- a/src/mmio.cpp +++ b/src/mmio.cpp @@ -47,7 +47,7 @@ class PCMPmem : public WinPmem { _tcscat_s(driver_filename, MAX_PATH, TEXT("\\winpmem_x64.sys")); if (GetFileAttributes(driver_filename) == INVALID_FILE_ATTRIBUTES) { - std::cerr << "ERROR: winpmem_x64.sys not found in current directory. Download it from https://github.com/Velocidex/WinPmem/blob/master/kernel/binaries/winpmem_x64.sys .\n"; + std::cerr << "ERROR: winpmem_x64.sys not found in current directory. Download it from https://github.com/Velocidex/WinPmem/blob/f044f340dd05658d026b0f293cdfa92876159872/kernel/binaries/winpmem_x64.sys .\n"; std::cerr << "ERROR: Memory bandwidth statistics will not be available.\n"; } break; @@ -55,7 +55,7 @@ class PCMPmem : public WinPmem { _tcscat_s(driver_filename, MAX_PATH, TEXT("\\winpmem_x86.sys")); if (GetFileAttributes(driver_filename) == INVALID_FILE_ATTRIBUTES) { - std::cerr << "ERROR: winpmem_x86.sys not found in current directory. Download it from https://github.com/Velocidex/WinPmem/blob/master/kernel/binaries/winpmem_x86.sys .\n"; + std::cerr << "ERROR: winpmem_x86.sys not found in current directory. Download it from https://github.com/Velocidex/WinPmem/blob/f044f340dd05658d026b0f293cdfa92876159872/kernel/binaries/winpmem_x86.sys .\n"; std::cerr << "ERROR: Memory bandwidth statistics will not be available.\n"; } break; From f00fc7b4db9b34b5f5a629993b117c79a27b993f Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 11 Sep 2024 14:28:29 +0200 Subject: [PATCH 10/23] add inital support for LNL (Lunar Lake) Change-Id: I86f53f2e8fde44dd3431ad5c9df5bd91be534420 --- src/cpucounters.cpp | 9 ++++++++- src/cpucounters.h | 8 ++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 50399d51..b2f0bf35 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -711,6 +711,7 @@ void PCM::initCStateSupportTables() case ADL: case RPL: case MTL: + case LNL: case SNOWRIDGE: PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x3F8, 0, 0x3F9, 0, 0x3FA, 0, 0, 0, 0 }) ); case NEHALEM_EP: @@ -785,6 +786,7 @@ void PCM::initCStateSupportTables() case ADL: case RPL: case MTL: + case LNL: case SNOWRIDGE: case ICX: case SPR: @@ -1637,6 +1639,7 @@ bool PCM::detectNominalFrequency() || cpu_model == ADL || cpu_model == RPL || cpu_model == MTL + || cpu_model == LNL || cpu_model == SKX || cpu_model == ICX || cpu_model == SPR @@ -3237,6 +3240,7 @@ bool PCM::isCPUModelSupported(const int model_) || model_ == ADL || model_ == RPL || model_ == MTL + || model_ == LNL || model_ == SKX || model_ == ICX || model_ == SPR @@ -3408,7 +3412,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter canUsePerf = false; if (!silent) std::cerr << "Installed Linux kernel perf does not support hardware top-down level-1 counters. Using direct PMU programming instead.\n"; } - if (canUsePerf && (cpu_model == ADL || cpu_model == RPL || cpu_model == MTL)) + if (canUsePerf && (cpu_model == ADL || cpu_model == RPL || cpu_model == MTL || cpu_model == LNL)) { canUsePerf = false; if (!silent) std::cerr << "Linux kernel perf rejects an architectural event on your platform. Using direct PMU programming instead.\n"; @@ -3495,6 +3499,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter case ADL: case RPL: case MTL: + case LNL: LLCArchEventInit(hybridAtomEventDesc); hybridAtomEventDesc[2].event_number = SKL_MEM_LOAD_RETIRED_L2_MISS_EVTNR; hybridAtomEventDesc[2].umask_value = SKL_MEM_LOAD_RETIRED_L2_MISS_UMASK; @@ -4844,6 +4849,8 @@ const char * PCM::getUArchCodename(const int32 cpu_model_param) const return "Raptor Lake"; case MTL: return "Meteor Lake"; + case LNL: + return "Lunar Lake"; case SKX: if (cpu_model_param >= 0) { diff --git a/src/cpucounters.h b/src/cpucounters.h index 876b2e5f..2f3aef3c 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -1633,6 +1633,7 @@ class PCM_API PCM case ADL: case RPL: case MTL: + case LNL: if (topology[coreID].core_type == TopologyEntry::Atom) { return std::make_pair(OFFCORE_RESPONSE_0_EVTNR, event + 1); @@ -1648,6 +1649,7 @@ class PCM_API PCM case ADL: // ADL big core (GLC) case RPL: case MTL: + case LNL: useGLCOCREvent = true; break; } @@ -1873,6 +1875,7 @@ class PCM_API PCM RPL_2 = 0xbf, RPL_3 = 0xbe, MTL = 0xAA, + LNL = 0xBD, BDX = 79, KNL = 87, SKL = 94, @@ -2093,6 +2096,8 @@ class PCM_API PCM case RPL: case MTL: return 6; + case LNL: + return 12; case SNOWRIDGE: return 4; case DENVERTON: @@ -2432,6 +2437,7 @@ class PCM_API PCM || cpu_model == PCM::ADL || cpu_model == PCM::RPL || cpu_model == PCM::MTL + || cpu_model == PCM::LNL || cpu_model == PCM::SPR || cpu_model == PCM::EMR || cpu_model == PCM::SRF @@ -4229,6 +4235,7 @@ uint64 getL2CacheMisses(const CounterStateType & before, const CounterStateType || cpu_model == PCM::ADL || cpu_model == PCM::RPL || cpu_model == PCM::MTL + || cpu_model == PCM::LNL ) { return after.Event[BasicCounterState::SKLL2MissPos] - before.Event[BasicCounterState::SKLL2MissPos]; } @@ -4340,6 +4347,7 @@ uint64 getL3CacheHitsSnoop(const CounterStateType & before, const CounterStateTy || cpu_model == PCM::ADL || cpu_model == PCM::RPL || cpu_model == PCM::MTL + || cpu_model == PCM::LNL ) { const int64 misses = getL3CacheMisses(before, after); From 1cbf5580db25392874f43cbf24ecaca618a73761 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 11 Sep 2024 15:14:39 +0200 Subject: [PATCH 11/23] support free-running BW counters on LNL Change-Id: Ieef26942c585e61ba16048dfe1c7b0752dcc6c26 --- src/cpucounters.cpp | 1 + src/cpucounters.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index b2f0bf35..dbf902cf 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -1901,6 +1901,7 @@ void PCM::initUncoreObjects() case ADL: // TGLClientBW works fine for ADL case RPL: // TGLClientBW works fine for RPL case MTL: // TGLClientBW works fine for MTL + case LNL: // TGLClientBW works fine for LNL clientBW = std::make_shared(); break; /* Disabled since ADLClientBW requires 2x multiplier for BW on top diff --git a/src/cpucounters.h b/src/cpucounters.h index 2f3aef3c..e3d3e82d 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -2732,6 +2732,7 @@ class PCM_API PCM || cpu_model == ADL || cpu_model == RPL || cpu_model == MTL + || cpu_model == LNL || useSKLPath() ; } From da08f11b1b8e3f5971ad234edb9786d5a60140c9 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Thu, 12 Sep 2024 14:18:37 +0200 Subject: [PATCH 12/23] add GNR support Co-authored-by: Alexander Antonov Co-authored-by: Gang Deng Co-authored-by: White Hu Co-authored-by: Pavithran Pandiyan Co-authored-by: Roman Dementiev Change-Id: I4aabb179568ec5652e1814f58cc98a554430407f --- .../GenuineIntel-6-AD-0.json | 158 ++++++++++++++++++ src/cpucounters.cpp | 65 ++++++- src/cpucounters.h | 26 ++- src/opCode-173.txt | 45 +++++ src/pcm-iio.cpp | 2 + src/pcm-memory.cpp | 6 +- src/pcm-pcie.cpp | 1 + src/pcm-power.cpp | 6 + 8 files changed, 303 insertions(+), 6 deletions(-) create mode 100644 src/PMURegisterDeclarations/GenuineIntel-6-AD-0.json create mode 100644 src/opCode-173.txt diff --git a/src/PMURegisterDeclarations/GenuineIntel-6-AD-0.json b/src/PMURegisterDeclarations/GenuineIntel-6-AD-0.json new file mode 100644 index 00000000..d68375b7 --- /dev/null +++ b/src/PMURegisterDeclarations/GenuineIntel-6-AD-0.json @@ -0,0 +1,158 @@ +{ + "core" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "User": {"Config": 0, "Position": 16, "Width": 1, "DefaultValue": 1}, + "OS": {"Config": 0, "Position": 17, "Width": 1, "DefaultValue": 1}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1}, + "PinControl": {"Config": 0, "Position": 19, "Width": 1, "DefaultValue": 0}, + "APICInt": {"Config": 0, "Position": 20, "Width": 1, "DefaultValue": 0}, + "Enable": {"Config": 0, "Position": 22, "Width": 1, "DefaultValue": 1}, + "Invert": {"Config": 0, "Position": 23, "Width": 1}, + "CounterMask": {"Config": 0, "Position": 24, "Width": 8}, + "InTX": {"Config": 0, "Position": 32, "Width": 1, "DefaultValue": 0}, + "InTXCheckpointed": {"Config": 0, "Position": 33, "Width": 1, "DefaultValue": 0}, + "MSRIndex": { + "0x1a6" : {"Config": 1, "Position": 0, "Width": 64}, + "0x1a7" : {"Config": 2, "Position": 0, "Width": 64}, + "0x3f6" : {"Config": 3, "Position": 0, "Width": 64}, + "0x3f7" : {"Config": 4, "Position": 0, "Width": 64} + } + }, + "fixed0" : { + "OS": {"Config": 0, "Position": 0, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 1, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 3, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"} + }, + "fixed1" : { + "OS": {"Config": 0, "Position": 4, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 5, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 7, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"} + }, + "fixed2" : { + "OS": {"Config": 0, "Position": 8, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 9, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 11, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"} + }, + "fixed3" : { + "OS": {"Config": 0, "Position": 12, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 13, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 15, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "PerfMetrics": {"Config": 2, "Position": 0, "Width": 1, "DefaultValue": 0, "__comment": "fake field to tell the collector to also print the L1 top-down metrics, not just raw slots count"} + } + }, + "cha" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "TIDEnable": {"Config": 0, "Position": 16, "Width": 1, "DefaultValue": 0}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, + "UMaskExt": {"Config": 0, "Position": 32, "Width": 26}, + "TID": {"Config": 1, "Position": 0, "Width": 10, "DefaultValue": 0} + } + }, + "imc" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "xpi" : { + "__comment" : "this is for UPI LL and QPI LL uncore PMUs", + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, + "UMaskExt": {"Config": 0, "Position": 32, "Width": 24} + } + }, + "m2m" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, + "UMaskExt": {"Config": 0, "Position": 32, "Width": 8} + } + }, + "m3upi" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "mdf" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "irp" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "pcu" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, + "pciex8" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, + "pciex16" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, + "iio" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 12, "DefaultValue": 0}, + "PortMask": {"Config": 0, "Position": 36, "Width": 12}, + "FCMask": {"Config": 0, "Position": 48, "Width": 3} + } + } +} diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index dbf902cf..7f3c16e4 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -737,6 +737,7 @@ void PCM::initCStateSupportTables() case ICX: case SPR: case EMR: + case GNR: case SRF: PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x60D, 0, 0, 0, 0x3F9, 0, 0, 0, 0}) ); case HASWELL_ULT: @@ -791,6 +792,7 @@ void PCM::initCStateSupportTables() case ICX: case SPR: case EMR: + case GNR: case SRF: PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0x3FC, 0, 0, 0x3FD, 0x3FE, 0, 0, 0}) ); case KNL: @@ -1644,6 +1646,7 @@ bool PCM::detectNominalFrequency() || cpu_model == ICX || cpu_model == SPR || cpu_model == EMR + || cpu_model == GNR || cpu_model == SRF ) ? (100000000ULL) : (133333333ULL); @@ -1941,6 +1944,7 @@ void PCM::initUncoreObjects() case SNOWRIDGE: case SPR: case EMR: + case GNR: case SRF: { bool failed = false; @@ -2126,6 +2130,7 @@ void PCM::initUncorePMUsDirect() } break; case SRF: + case GNR: uncorePMUs[s].resize(1); { std::vector > CounterControlRegs{ @@ -2251,6 +2256,7 @@ void PCM::initUncorePMUsDirect() break; case SPR: case EMR: + case GNR: case SRF: uncorePMUs[s].resize(1); addPMUsFromDiscoveryRef(uncorePMUs[s][0][PCU_PMU_ID], SPR_PCU_BOX_TYPE, 0xE); @@ -2277,6 +2283,7 @@ void PCM::initUncorePMUsDirect() case EMR: addMDFPMUs(SPR_MDF_BOX_TYPE); break; + case GNR: case SRF: addMDFPMUs(BHS_MDF_BOX_TYPE); break; @@ -2323,6 +2330,7 @@ void PCM::initUncorePMUsDirect() switch (cpu_model) { + case GNR: case SRF: uncorePMUs[s].resize(1); if (safe_getenv("PCM_NO_PCIE_GEN5_DISCOVERY") == std::string("1")) @@ -2427,6 +2435,7 @@ void PCM::initUncorePMUsDirect() } } break; + case PCM::GNR: case PCM::SRF: for (uint32 s = 0; s < (uint32)num_sockets; ++s) { @@ -2636,6 +2645,7 @@ void PCM::initUncorePMUsDirect() IRP_CTR_REG_OFFSET = SPR_IRP_CTR_REG_OFFSET; IRP_UNIT_CTL = SPR_IRP_UNIT_CTL; break; + case GNR: case SRF: irpStacks = BHS_M2IOSF_NUM; IRP_CTL_REG_OFFSET = BHS_IRP_CTL_REG_OFFSET; @@ -2770,6 +2780,7 @@ void PCM::initUncorePMUsDirect() { case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: { const auto n_units = (std::min)(uncorePMUDiscovery->getNumBoxes(SPR_CXLCM_BOX_TYPE, s), @@ -3246,6 +3257,7 @@ bool PCM::isCPUModelSupported(const int model_) || model_ == ICX || model_ == SPR || model_ == EMR + || model_ == GNR || model_ == SRF ); } @@ -3287,6 +3299,9 @@ bool PCM::checkModel() case RPL_3: cpu_model = RPL; break; + case GNR_D: + cpu_model = GNR; + break; } if(!isCPUModelSupported((int)cpu_model)) @@ -3555,6 +3570,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter case ICX: case SPR: case EMR: + case GNR: assert(useSkylakeEvents()); coreEventDesc[0].event_number = SKL_MEM_LOAD_RETIRED_L3_MISS_EVTNR; coreEventDesc[0].umask_value = SKL_MEM_LOAD_RETIRED_L3_MISS_UMASK; @@ -4873,6 +4889,8 @@ const char * PCM::getUArchCodename(const int32 cpu_model_param) const return "Sapphire Rapids-SP"; case EMR: return "Emerald Rapids-SP"; + case GNR: + return "Granite Rapids-SP"; case SRF: return "Sierra Forest"; } @@ -5607,6 +5625,8 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof case SPR: case EMR: case SRF: + case GNR: + case GNR_D: PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(1); // clock ticks break; default: @@ -5626,6 +5646,8 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof case SPR: case EMR: case SRF: + case GNR: + case GNR_D: PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x35); // POWER_STATE_OCCUPANCY.C0 PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x36); // POWER_STATE_OCCUPANCY.C3 PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x37); // POWER_STATE_OCCUPANCY.C6 @@ -5644,12 +5666,12 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof case 3: PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x04); // Thermal frequency limit cycles: FREQ_MAX_LIMIT_THERMAL_CYCLES PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x05); // Power frequency limit cycles: FREQ_MAX_POWER_CYCLES - PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) + PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF,GNR) break; case 4: // not supported on SKX, ICX, SNOWRIDGE, SPR, EMR PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x06); // OS frequency limit cycles: FREQ_MAX_OS_CYCLES PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x05); // Power frequency limit cycles: FREQ_MAX_POWER_CYCLES - PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) + PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF,GNR) break; case 5: if(JAKETOWN == cpu_model) @@ -5670,6 +5692,8 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof || SPR == cpu_model || EMR == cpu_model || SRF == cpu_model + || GNR == cpu_model + || GNR_D == cpu_model ) { PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x74) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions @@ -5699,10 +5723,12 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof || SPR == cpu_model || EMR == cpu_model || SRF == cpu_model + || GNR == cpu_model + || GNR_D == cpu_model ) { - PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x4E) ; // PC1e residenicies (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) - PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x4E) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC1 transitions (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF) + PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x4E) ; // PC1e residenicies (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF,GNR) + PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x4E) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC1 transitions (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF,GNR) PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x2B) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC2e transitions PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x2D) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC6 transitions } else @@ -7543,6 +7569,7 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) } else if ( cpu_model == PCM::SRF + || cpu_model == PCM::GNR ) { PCM_PCICFG_QPI_INIT(0, BHS); @@ -7770,6 +7797,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) case PCM::SNOWRIDGE: case PCM::SPR: case PCM::EMR: + case PCM::GNR: // B2CMI PMUs case PCM::SRF: m2mPMUs.push_back( UncorePMU( @@ -7954,6 +7982,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) switch (cpu_model) { + case PCM::GNR: case PCM::SRF: initBHSiMCPMUs(12); break; @@ -8040,6 +8069,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) ) ); break; + case PCM::GNR: case PCM::SRF: m3upiPMUs.push_back( UncorePMU( @@ -8205,6 +8235,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) break; case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: xpiPMUs.push_back( UncorePMU( @@ -8739,6 +8770,7 @@ void ServerUncorePMUs::programServerUncoreMemoryMetrics(const ServerUncoreMemory return; } break; + case PCM::GNR: case PCM::SRF: if (metrics == PmemMemoryMode) { @@ -8833,6 +8865,7 @@ void ServerUncorePMUs::program() EDCCntConfig[EventPosition::READ] = MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xcf); // monitor reads on counter 0: CAS_COUNT.RD EDCCntConfig[EventPosition::WRITE] = MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xf0); // monitor writes on counter 1: CAS_COUNT.WR break; + case PCM::GNR: case PCM::SRF: MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xcf); // monitor reads on counter 0: CAS_COUNT_SCH0.RD MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xf0); // monitor writes on counter 1: CAS_COUNT_SCH0.WR @@ -8964,6 +8997,7 @@ uint64 ServerUncorePMUs::getImcReadsForChannels(uint32 beginChannel, uint32 endC result += getMCCounter(i, EventPosition::READ); switch (cpu_model) { + case PCM::GNR: case PCM::SRF: result += getMCCounter(i, EventPosition::READ2); break; @@ -8980,6 +9014,7 @@ uint64 ServerUncorePMUs::getImcWrites() result += getMCCounter(i, EventPosition::WRITE); switch (cpu_model) { + case PCM::GNR: case PCM::SRF: result += getMCCounter(i, EventPosition::WRITE2); break; @@ -9103,6 +9138,8 @@ void ServerUncorePMUs::program_power_metrics(int mc_profile) case PCM::SPR: case PCM::EMR: case PCM::SRF: + case PCM::GNR: + case PCM::GNR_D: UNC_M_POWER_CKE_CYCLES = 0x47; break; } @@ -9110,6 +9147,8 @@ void ServerUncorePMUs::program_power_metrics(int mc_profile) switch (cpu_model) { case PCM::SRF: + case PCM::GNR: + case PCM::GNR_D: UNC_M_POWER_CHANNEL_PPD_CYCLES = 0x88; break; } @@ -9117,6 +9156,8 @@ void ServerUncorePMUs::program_power_metrics(int mc_profile) switch (cpu_model) { case PCM::SRF: + case PCM::GNR: + case PCM::GNR_D: UNC_M_SELF_REFRESH_ENTER_SUCCESS_CYCLES_UMASK = 0x01; break; } @@ -9218,6 +9259,7 @@ void ServerUncorePMUs::programM2M() cfg[EventPosition::PMM_READ] = M2M_PCI_PMON_CTL_EVENT(0x37) + M2M_PCI_PMON_CTL_UMASK(0x20) + UNC_PMON_CTL_UMASK_EXT(0x07); // UNC_M2M_IMC_READS.TO_PMM cfg[EventPosition::PMM_WRITE] = M2M_PCI_PMON_CTL_EVENT(0x38) + M2M_PCI_PMON_CTL_UMASK(0x80) + UNC_PMON_CTL_UMASK_EXT(0x1C); // UNC_M2M_IMC_WRITES.TO_PMM break; + case PCM::GNR: case PCM::SRF: cfg[EventPosition::NM_HIT] = M2M_PCI_PMON_CTL_EVENT(0x1F) + M2M_PCI_PMON_CTL_UMASK(0x0F); // UNC_B2CMI_TAG_HIT.ALL cfg[EventPosition::M2M_CLOCKTICKS] = 0; // CLOCKTICKS @@ -9569,6 +9611,7 @@ uint64 ServerUncorePMUs::computeQPISpeed(const uint32 core_nr, const int cpumode std::pair regBits{}; switch (cpumodel) { + case PCM::GNR: case PCM::SRF: UPISpeedMap = { { 0, 2500}, @@ -9692,6 +9735,7 @@ uint64 PCM::CX_MSR_PMON_CTRY(uint32 Cbo, uint32 Ctr) const case SPR: case EMR: + case GNR: case SRF: return SPR_CHA0_MSR_PMON_CTR0 + SPR_CHA_MSR_STEP * Cbo + Ctr; @@ -9721,6 +9765,7 @@ uint64 PCM::CX_MSR_PMON_BOX_FILTER(uint32 Cbo) const case SPR: case EMR: + case GNR: case SRF: return SPR_CHA0_MSR_PMON_BOX_FILTER + SPR_CHA_MSR_STEP * Cbo; @@ -9764,6 +9809,7 @@ uint64 PCM::CX_MSR_PMON_CTLY(uint32 Cbo, uint32 Ctl) const case SPR: case EMR: + case GNR: case SRF: return SPR_CHA0_MSR_PMON_CTL0 + SPR_CHA_MSR_STEP * Cbo + Ctl; @@ -9793,6 +9839,7 @@ uint64 PCM::CX_MSR_PMON_BOX_CTL(uint32 Cbo) const case SPR: case EMR: + case GNR: case SRF: return SPR_CHA0_MSR_PMON_BOX_CTRL + SPR_CHA_MSR_STEP * Cbo; @@ -9868,6 +9915,7 @@ uint32 PCM::getMaxNumOfCBoxesInternal() const uint64 val = 0; switch (cpu_model) { + case GNR: case SRF: { const auto MSR_PMON_NUMBER_CBOS = 0x3fed; @@ -9982,6 +10030,7 @@ void PCM::programIIOCounters(uint64 rawEvents[4], int IIOStack) int stacks_count; switch (getCPUModel()) { + case PCM::GNR: case PCM::SRF: stacks_count = BHS_M2IOSF_NUM; break; @@ -10076,6 +10125,7 @@ void PCM::programPCIeEventGroup(eventGroup_t &eventGroup) switch (cpu_model) { + case PCM::GNR: case PCM::SRF: case PCM::SPR: case PCM::EMR: @@ -10127,6 +10177,7 @@ void PCM::programCbo(const uint64 * events, const uint32 opCode, const uint32 nc && SNOWRIDGE != cpu_model && SPR != cpu_model && EMR != cpu_model + && GNR != cpu_model && SRF != cpu_model ) { @@ -10390,7 +10441,9 @@ bool PCM::supportIDXAccelDev() const { case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: + case PCM::GNR_D: retval = true; break; @@ -10625,6 +10678,7 @@ void UncorePMU::freeze(const uint32 extra) { case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ; break; @@ -10639,6 +10693,7 @@ void UncorePMU::unfreeze(const uint32 extra) { case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: *unitControl = 0; break; @@ -10658,6 +10713,7 @@ bool UncorePMU::initFreeze(const uint32 extra, const char* xPICheckMsg) { case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ; // freeze *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ + SPR_UNC_PMON_UNIT_CTL_RST_CONTROL; // freeze and reset control registers @@ -10696,6 +10752,7 @@ void UncorePMU::resetUnfreeze(const uint32 extra) { case PCM::SPR: case PCM::EMR: + case PCM::GNR: case PCM::SRF: *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ + SPR_UNC_PMON_UNIT_CTL_RST_COUNTERS; // freeze and reset counter registers *unitControl = 0; // unfreeze diff --git a/src/cpucounters.h b/src/cpucounters.h index e3d3e82d..8a02e69f 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -1248,6 +1248,7 @@ class PCM_API PCM { case SPR: case EMR: + case GNR: case SRF: *ctrl = *curEvent; break; @@ -1304,6 +1305,8 @@ class PCM_API PCM ( SPR == cpu_model || EMR == cpu_model + || GNR == cpu_model + || GNR_D == cpu_model ); } @@ -1884,7 +1887,9 @@ class PCM_API PCM ICX = 106, SPR = 143, EMR = 207, + GNR = 173, SRF = 175, + GNR_D = 174, END_OF_MODEL_LIST = 0x0ffff }; @@ -1978,6 +1983,7 @@ class PCM_API PCM case ICX: case SPR: case EMR: + case GNR: case SRF: return (serverUncorePMUs.size() && serverUncorePMUs[0].get()) ? (serverUncorePMUs[0]->getNumQPIPorts()) : 0; } @@ -2004,6 +2010,7 @@ class PCM_API PCM case ICX: case SPR: case EMR: + case GNR: case SRF: case BDX: case KNL: @@ -2032,6 +2039,7 @@ class PCM_API PCM case ICX: case SPR: case EMR: + case GNR: case SRF: case BDX: case KNL: @@ -2063,6 +2071,7 @@ class PCM_API PCM case ICX: case SPR: case EMR: + case GNR: case SRF: case BDX: case KNL: @@ -2125,6 +2134,7 @@ class PCM_API PCM return 5; case SPR: case EMR: + case GNR: case SRF: return 6; } @@ -2177,6 +2187,7 @@ class PCM_API PCM case SNOWRIDGE: case SPR: case EMR: + case GNR: case SRF: case KNL: return true; @@ -2440,6 +2451,7 @@ class PCM_API PCM || cpu_model == PCM::LNL || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::GNR || cpu_model == PCM::SRF ); } @@ -2457,6 +2469,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::GNR || cpu_model == PCM::SRF ); } @@ -2480,6 +2493,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::GNR || cpu_model == PCM::SRF ); } @@ -2496,6 +2510,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::GNR || cpu_model == PCM::SRF ); } @@ -2509,6 +2524,7 @@ class PCM_API PCM || cpu_model == PCM::SPR || cpu_model == PCM::EMR || cpu_model == PCM::SRF + || cpu_model == PCM::GNR ; } @@ -2521,6 +2537,7 @@ class PCM_API PCM { return ( cpu_model == PCM::SRF + || cpu_model == PCM::GNR ); } @@ -2557,10 +2574,11 @@ class PCM_API PCM return ( cpu_model == PCM::SKX || cpu_model == PCM::ICX - || cpu_model == PCM::SNOWRIDGE + || cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::SPR || cpu_model == PCM::EMR || cpu_model == PCM::SRF + || cpu_model == PCM::GNR ); } @@ -2569,6 +2587,7 @@ class PCM_API PCM return MSR.empty() == false && getMaxNumOfUncorePMUs(UBOX_PMU_ID) > 0ULL && getNumCores() == getNumOnlineCores() + && PCM::GNR != cpu_model && PCM::SRF != cpu_model ; } @@ -2661,6 +2680,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::GNR || cpu_model == PCM::SRF || cpu_model == PCM::BDX || cpu_model == PCM::KNL @@ -2681,6 +2701,7 @@ class PCM_API PCM || cpu_model_ == PCM::ICX || cpu_model_ == PCM::SPR || cpu_model_ == PCM::EMR + || cpu_model_ == PCM::GNR || cpu_model_ == PCM::SRF ); } @@ -2705,6 +2726,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::GNR || cpu_model == PCM::SRF ); } @@ -2720,6 +2742,7 @@ class PCM_API PCM || PCM::ICX == cpu_model || PCM::SPR == cpu_model || PCM::EMR == cpu_model + || PCM::GNR == cpu_model ; } @@ -3389,6 +3412,7 @@ double getDRAMConsumedJoules(const CounterStateType & before, const CounterState || PCM::BDX == cpu_model || PCM::SKX == cpu_model || PCM::ICX == cpu_model + || PCM::GNR == cpu_model || PCM::SRF == cpu_model || PCM::KNL == cpu_model ) { diff --git a/src/opCode-173.txt b/src/opCode-173.txt new file mode 100644 index 00000000..c3ccfbc9 --- /dev/null +++ b/src/opCode-173.txt @@ -0,0 +1,45 @@ +#Clockticks +#ctr=0,ev_sel=0x1,umask=0x0,en=1,ch_mask=0,fc_mask=0x0,multiplier=1,divider=1,hname=Clockticks,vname=Total +# Inbound (PCIe device DMA into system) payload events +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part0 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part1 +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part2 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part3 +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part4 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part5 +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part6 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part7 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part0 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part1 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part2 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part3 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part4 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part5 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part6 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part7 +# Outbound (CPU MMIO to the PCIe device) payload events +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part0 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part1 +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part2 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part3 +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part4 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part5 +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part6 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part7 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part0 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part1 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part2 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part3 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part4 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part5 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part6 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part7 +# IOMMU events +ctr=0,ev_sel=0x40,umask=0x01,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Lookup,vname=Total +ctr=1,ev_sel=0x40,umask=0x20,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Miss,vname=Total +ctr=2,ev_sel=0x40,umask=0x80,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=Ctxt Cache Hit,vname=Total +ctr=3,ev_sel=0x41,umask=0x10,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=256T Cache Hit,vname=Total +ctr=0,ev_sel=0x41,umask=0x08,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=512G Cache Hit,vname=Total +ctr=1,ev_sel=0x41,umask=0x04,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=1G Cache Hit,vname=Total +ctr=2,ev_sel=0x41,umask=0x02,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=2M Cache Hit,vname=Total +ctr=3,ev_sel=0x41,umask=0xc0,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOMMU Mem Access,vname=Total diff --git a/src/pcm-iio.cpp b/src/pcm-iio.cpp index ee97015c..d36f5f17 100644 --- a/src/pcm-iio.cpp +++ b/src/pcm-iio.cpp @@ -1700,6 +1700,7 @@ std::unique_ptr IPlatformMapping::getPlatformMapping(int cpu_m case PCM::EMR: return std::unique_ptr{new EagleStreamPlatformMapping(cpu_model, sockets_count)}; case PCM::SRF: + case PCM::GNR: return std::unique_ptr{new BirchStreamPlatform(cpu_model, sockets_count)}; default: return nullptr; @@ -1717,6 +1718,7 @@ ccr* get_ccr(PCM* m, uint64_t& ccr) case PCM::SPR: case PCM::EMR: case PCM::SRF: + case PCM::GNR: return new icx_ccr(ccr); default: cerr << m->getCPUFamilyModelString() << " is not supported! Program aborted" << endl; diff --git a/src/pcm-memory.cpp b/src/pcm-memory.cpp index a283f5d2..c2e9418f 100644 --- a/src/pcm-memory.cpp +++ b/src/pcm-memory.cpp @@ -425,6 +425,7 @@ void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t } if ( md->metrics == PartialWrites && m->getCPUModel() != PCM::SRF + && m->getCPUModel() != PCM::GNR ) { for (uint32 i=skt; i<(skt+no_columns); ++i) { @@ -732,6 +733,7 @@ void display_bandwidth_csv(PCM *m, memdata_t *md, uint64 /*elapsedTime*/, const if (m->HBMmemoryTrafficMetricsAvailable() == false) { if ( md->metrics == PartialWrites + && m->getCPUModel() != PCM::GNR && m->getCPUModel() != PCM::SRF ) { @@ -991,6 +993,7 @@ void calculate_bandwidth(PCM *m, writes = getMCCounter(channel, ServerUncorePMUs::EventPosition::WRITE, uncState1[skt], uncState2[skt]); switch (cpu_model) { + case PCM::GNR: case PCM::SRF: reads += getMCCounter(channel, ServerUncorePMUs::EventPosition::READ2, uncState1[skt], uncState2[skt]); writes += getMCCounter(channel, ServerUncorePMUs::EventPosition::WRITE2, uncState1[skt], uncState2[skt]); @@ -1054,7 +1057,8 @@ void calculate_bandwidth(PCM *m, md.MemoryMode_Hit_socket[skt] += toRate(memoryModeHits); } else if ( - cpu_model != PCM::SRF + cpu_model != PCM::GNR + && cpu_model != PCM::SRF ) { md.partial_write[skt] += (uint64)(getMCCounter(channel, ServerUncorePMUs::EventPosition::PARTIAL, uncState1[skt], uncState2[skt]) / (elapsedTime / 1000.0)); diff --git a/src/pcm-pcie.cpp b/src/pcm-pcie.cpp index 37ab8557..da55b93e 100644 --- a/src/pcm-pcie.cpp +++ b/src/pcm-pcie.cpp @@ -96,6 +96,7 @@ void print_usage(const string & progname) IPlatform *IPlatform::getPlatform(PCM *m, bool csv, bool print_bandwidth, bool print_additional_info, uint32 delay) { switch (m->getCPUModel()) { + case PCM::GNR: case PCM::SRF: return new BirchStreamPlatform(m, csv, print_bandwidth, print_additional_info, delay); case PCM::SPR: diff --git a/src/pcm-power.cpp b/src/pcm-power.cpp index 86923c26..7982375a 100644 --- a/src/pcm-power.cpp +++ b/src/pcm-power.cpp @@ -481,6 +481,8 @@ int mainThrows(int argc, char * argv[]) && cpu_model != PCM::SPR && cpu_model != PCM::EMR && cpu_model != PCM::SRF + && cpu_model != PCM::GNR + && cpu_model != PCM::GNR_D ) cout << "; Clipped freq limit cycles:" << getNormalizedPCUCounter(u, 3, BeforeState[socket], AfterState[socket]) * 100. << " %"; cout << "\n"; @@ -493,6 +495,8 @@ int mainThrows(int argc, char * argv[]) || cpu_model == PCM::SPR || cpu_model == PCM::EMR || cpu_model == PCM::SRF + || cpu_model == PCM::GNR + || cpu_model == PCM::GNR_D ) { cout << "This PCU profile is not supported on your processor\n"; @@ -531,6 +535,8 @@ int mainThrows(int argc, char * argv[]) case PCM::SPR: case PCM::EMR: case PCM::SRF: + case PCM::GNR: + case PCM::GNR_D: cout << "; PC2 residency: " << getPackageCStateResidency(2, BeforeState[socket], AfterState[socket]) * 100. << " %"; cout << "; PC2 transitions: " << getUncoreCounter(PCM::PCU_PMU_ID, u, 2, BeforeState[socket], AfterState[socket]) << " "; cout << "; PC3 residency: " << getPackageCStateResidency(3, BeforeState[socket], AfterState[socket]) * 100. << " %"; From ad5813ef3e00714a8b47ac1c045e8a59fdeb5ca7 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Mon, 23 Sep 2024 08:27:44 +0200 Subject: [PATCH 13/23] pcm-tsx: support GNR Change-Id: I5d1c9569895bfa4e05bcb70894e0042d76fb4c0a --- src/pcm-tsx.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/pcm-tsx.cpp b/src/pcm-tsx.cpp index 01a18c0f..003f10a7 100644 --- a/src/pcm-tsx.cpp +++ b/src/pcm-tsx.cpp @@ -321,6 +321,8 @@ int mainThrows(int argc, char * argv[]) break; case PCM::SPR: case PCM::EMR: + case PCM::GNR: + case PCM::GNR_D: eventDefinition = sprEventDefinition; break; } From cbd39df33c7d2f5a6c8bff5c553e9b90918a0cc1 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 24 Sep 2024 10:41:03 +0200 Subject: [PATCH 14/23] introduce PCM_CPU_FAMILY_MODEL Change-Id: I9ff627a873c0472a9b2fe9f0b9d7ce09e77840c8 --- src/cpucounters.h | 114 +++++++++++++++++++++++----------------------- 1 file changed, 58 insertions(+), 56 deletions(-) diff --git a/src/cpucounters.h b/src/cpucounters.h index 8a02e69f..d49dbbc1 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -1831,65 +1831,67 @@ class PCM_API PCM */ static int getCPUModelFromCPUID(); + #define PCM_CPU_FAMILY_MODEL(family_, model_) (((family_) << 8) + (model_)) + //! \brief Identifiers of supported CPU models enum SupportedCPUModels { - NEHALEM_EP = 26, - NEHALEM = 30, - ATOM = 28, - ATOM_2 = 53, - CENTERTON = 54, - BAYTRAIL = 55, - AVOTON = 77, - CHERRYTRAIL = 76, - APOLLO_LAKE = 92, - GEMINI_LAKE = 122, - DENVERTON = 95, - SNOWRIDGE = 134, - CLARKDALE = 37, - WESTMERE_EP = 44, - NEHALEM_EX = 46, - WESTMERE_EX = 47, - SANDY_BRIDGE = 42, - JAKETOWN = 45, - IVY_BRIDGE = 58, - HASWELL = 60, - HASWELL_ULT = 69, - HASWELL_2 = 70, - IVYTOWN = 62, - HASWELLX = 63, - BROADWELL = 61, - BROADWELL_XEON_E3 = 71, - BDX_DE = 86, - SKL_UY = 78, - KBL = 158, - KBL_1 = 142, - CML = 166, - CML_1 = 165, - ICL = 126, - ICL_1 = 125, - RKL = 167, - TGL = 140, - TGL_1 = 141, - ADL = 151, - ADL_1 = 154, - RPL = 0xb7, - RPL_1 = 0xba, - RPL_2 = 0xbf, - RPL_3 = 0xbe, - MTL = 0xAA, - LNL = 0xBD, - BDX = 79, - KNL = 87, - SKL = 94, - SKX = 85, - ICX_D = 108, - ICX = 106, - SPR = 143, - EMR = 207, - GNR = 173, - SRF = 175, - GNR_D = 174, + NEHALEM_EP = PCM_CPU_FAMILY_MODEL(6, 26), + NEHALEM = PCM_CPU_FAMILY_MODEL(6, 30), + ATOM = PCM_CPU_FAMILY_MODEL(6, 28), + ATOM_2 = PCM_CPU_FAMILY_MODEL(6, 53), + CENTERTON = PCM_CPU_FAMILY_MODEL(6, 54), + BAYTRAIL = PCM_CPU_FAMILY_MODEL(6, 55), + AVOTON = PCM_CPU_FAMILY_MODEL(6, 77), + CHERRYTRAIL = PCM_CPU_FAMILY_MODEL(6, 76), + APOLLO_LAKE = PCM_CPU_FAMILY_MODEL(6, 92), + GEMINI_LAKE = PCM_CPU_FAMILY_MODEL(6, 122), + DENVERTON = PCM_CPU_FAMILY_MODEL(6, 95), + SNOWRIDGE = PCM_CPU_FAMILY_MODEL(6, 134), + CLARKDALE = PCM_CPU_FAMILY_MODEL(6, 37), + WESTMERE_EP = PCM_CPU_FAMILY_MODEL(6, 44), + NEHALEM_EX = PCM_CPU_FAMILY_MODEL(6, 46), + WESTMERE_EX = PCM_CPU_FAMILY_MODEL(6, 47), + SANDY_BRIDGE = PCM_CPU_FAMILY_MODEL(6, 42), + JAKETOWN = PCM_CPU_FAMILY_MODEL(6, 45), + IVY_BRIDGE = PCM_CPU_FAMILY_MODEL(6, 58), + HASWELL = PCM_CPU_FAMILY_MODEL(6, 60), + HASWELL_ULT = PCM_CPU_FAMILY_MODEL(6, 69), + HASWELL_2 = PCM_CPU_FAMILY_MODEL(6, 70), + IVYTOWN = PCM_CPU_FAMILY_MODEL(6, 62), + HASWELLX = PCM_CPU_FAMILY_MODEL(6, 63), + BROADWELL = PCM_CPU_FAMILY_MODEL(6, 61), + BROADWELL_XEON_E3 = PCM_CPU_FAMILY_MODEL(6, 71), + BDX_DE = PCM_CPU_FAMILY_MODEL(6, 86), + SKL_UY = PCM_CPU_FAMILY_MODEL(6, 78), + KBL = PCM_CPU_FAMILY_MODEL(6, 158), + KBL_1 = PCM_CPU_FAMILY_MODEL(6, 142), + CML = PCM_CPU_FAMILY_MODEL(6, 166), + CML_1 = PCM_CPU_FAMILY_MODEL(6, 165), + ICL = PCM_CPU_FAMILY_MODEL(6, 126), + ICL_1 = PCM_CPU_FAMILY_MODEL(6, 125), + RKL = PCM_CPU_FAMILY_MODEL(6, 167), + TGL = PCM_CPU_FAMILY_MODEL(6, 140), + TGL_1 = PCM_CPU_FAMILY_MODEL(6, 141), + ADL = PCM_CPU_FAMILY_MODEL(6, 151), + ADL_1 = PCM_CPU_FAMILY_MODEL(6, 154), + RPL = PCM_CPU_FAMILY_MODEL(6, 0xb7), + RPL_1 = PCM_CPU_FAMILY_MODEL(6, 0xba), + RPL_2 = PCM_CPU_FAMILY_MODEL(6, 0xbf), + RPL_3 = PCM_CPU_FAMILY_MODEL(6, 0xbe), + MTL = PCM_CPU_FAMILY_MODEL(6, 0xAA), + LNL = PCM_CPU_FAMILY_MODEL(6, 0xBD), + BDX = PCM_CPU_FAMILY_MODEL(6, 79), + KNL = PCM_CPU_FAMILY_MODEL(6, 87), + SKL = PCM_CPU_FAMILY_MODEL(6, 94), + SKX = PCM_CPU_FAMILY_MODEL(6, 85), + ICX_D = PCM_CPU_FAMILY_MODEL(6, 108), + ICX = PCM_CPU_FAMILY_MODEL(6, 106), + SPR = PCM_CPU_FAMILY_MODEL(6, 143), + EMR = PCM_CPU_FAMILY_MODEL(6, 207), + GNR = PCM_CPU_FAMILY_MODEL(6, 173), + SRF = PCM_CPU_FAMILY_MODEL(6, 175), + GNR_D = PCM_CPU_FAMILY_MODEL(6, 174), END_OF_MODEL_LIST = 0x0ffff }; From 8686929d8006512afc56ecad315f18da51a82c0d Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 24 Sep 2024 10:47:51 +0200 Subject: [PATCH 15/23] don't check for family 6 Change-Id: Ifd23231db93d632626f079f2db2d24a54b195701 --- src/cpucounters.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 7f3c16e4..bca8f6ef 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -427,12 +427,6 @@ bool PCM::detectModel() readCoreCounterConfig(); - if (cpu_family != 6) - { - std::cerr << getUnsupportedMessage() << " CPU Family: " << cpu_family << "\n"; - return false; - } - pcm_cpuid(7, 0, cpuinfo); std::cerr << "\n===== Processor information =====\n"; From e1fa9aeb0e1ce2fc41a1ef55dcfc25163a2205a5 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 24 Sep 2024 10:53:49 +0200 Subject: [PATCH 16/23] disable getCPUModel APIs Change-Id: I3ad04c681edb5f458066cba2906c744d8b3351cc --- src/cpucounters.cpp | 2 ++ src/cpucounters.h | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index bca8f6ef..d7f2eb2a 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -382,6 +382,7 @@ int32 PCM::getMaxCustomCoreEvents() return core_gen_counter_num_max; } +/* int PCM::getCPUModelFromCPUID() { static int result = -1; @@ -393,6 +394,7 @@ int PCM::getCPUModelFromCPUID() } return result; } +*/ bool PCM::detectModel() { diff --git a/src/cpucounters.h b/src/cpucounters.h index d49dbbc1..48af0df3 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -1829,7 +1829,9 @@ class PCM_API PCM /*! \brief Returns cpu model id number from cpuid instruction */ + /* static int getCPUModelFromCPUID(); + */ #define PCM_CPU_FAMILY_MODEL(family_, model_) (((family_) << 8) + (model_)) @@ -1924,7 +1926,9 @@ class PCM_API PCM //! \brief Reads CPU model id //! \return CPU model ID + /* uint32 getCPUModel() const { return (uint32)cpu_model; } + */ //! \brief Reads CPU stepping id //! \return CPU stepping ID From 882126f14e512312b572ed62c7d833bf524fca45 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 24 Sep 2024 11:50:47 +0200 Subject: [PATCH 17/23] transition to family-model (part 1) Change-Id: I83b1f3c6e94266fe1ee4a9856af482f269b554d9 --- src/cpucounters.cpp | 171 +++++++++++++++++++++++++------------------- src/cpucounters.h | 93 +++++++++++++----------- src/pcm.cpp | 22 +++--- 3 files changed, 159 insertions(+), 127 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index d7f2eb2a..07a6dc1a 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -396,6 +396,20 @@ int PCM::getCPUModelFromCPUID() } */ +int PCM::getCPUFamilyModelFromCPUID() +{ + static int result = -1; + if (result < 0) + { + PCM_CPUID_INFO cpuinfo; + pcm_cpuid(1, cpuinfo); + const auto cpu_family_ = (((cpuinfo.array[0]) >> 8) & 0xf) | ((cpuinfo.array[0] & 0xf00000) >> 16); + const auto cpu_model_ = (((cpuinfo.array[0]) & 0xf0) >> 4) | ((cpuinfo.array[0] & 0xf0000) >> 12); + result = PCM_CPU_FAMILY_MODEL(cpu_family_, cpu_model_); + } + return result; +} + bool PCM::detectModel() { char buffer[1024]; @@ -2614,7 +2628,7 @@ void PCM::initUncorePMUsDirect() size_t IRP_CTR_REG_OFFSET = 0; const uint32* IRP_UNIT_CTL = nullptr; - switch (getCPUModel()) + switch (getCPUFamilyModel()) { case SKX: irpStacks = SKX_IIO_STACK_COUNT; @@ -2772,7 +2786,7 @@ void PCM::initUncorePMUsDirect() return UncorePMU(std::make_shared(handle, unitControlAddr - unitControlAddrAligned), CounterControlRegs, CounterValueRegs); }; - switch (getCPUModel()) + switch (getCPUFamilyModel()) { case PCM::SPR: case PCM::EMR: @@ -5340,8 +5354,8 @@ void BasicCounterState::readAndAggregateTSC(std::shared_ptr msr) { uint64 cInvariantTSC = 0; PCM * m = PCM::getInstance(); - const auto cpu_model = m->getCPUModel(); - if (m->isAtom() == false || cpu_model == PCM::AVOTON) + const auto cpu_family_model = m->getCPUFamilyModel(); + if (m->isAtom() == false || cpu_family_model == PCM::AVOTON) { cInvariantTSC = m->getInvariantTSC_Fast(msr->getCoreId()); MSRValues[IA32_TIME_STAMP_COUNTER] = cInvariantTSC; @@ -7360,7 +7374,7 @@ ServerUncorePMUs::ServerUncorePMUs(uint32 socket_, const PCM * pcm) : , UPIbus(-1) , M2Mbus(-1) , groupnr(0) - , cpu_model(pcm->getCPUModel()) + , cpu_family_model(pcm->getCPUFamilyModel()) , qpi_speed(0) { if (pcm->useLinuxPerfForUncore()) @@ -7417,7 +7431,10 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) HARegisterLocation.resize(x + 1); \ HARegisterLocation[x] = std::make_pair(arch##_HA##x##_REGISTER_DEV_ADDR, arch##_HA##x##_REGISTER_FUNC_ADDR); - if(cpu_model == PCM::JAKETOWN || cpu_model == PCM::IVYTOWN) + switch (cpu_family_model) + { + case PCM::JAKETOWN: + case PCM::IVYTOWN: { PCM_PCICFG_MC_INIT(0, 0, JKTIVT) PCM_PCICFG_MC_INIT(0, 1, JKTIVT) @@ -7432,7 +7449,10 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) PCM_PCICFG_QPI_INIT(1, JKTIVT); PCM_PCICFG_QPI_INIT(2, JKTIVT); } - else if(cpu_model == PCM::HASWELLX || cpu_model == PCM::BDX_DE || cpu_model == PCM::BDX) + break; + case PCM::HASWELLX: + case PCM::BDX_DE: + case PCM::BDX: { PCM_PCICFG_MC_INIT(0, 0, HSX) PCM_PCICFG_MC_INIT(0, 1, HSX) @@ -7450,7 +7470,8 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) PCM_PCICFG_HA_INIT(0, HSX); PCM_PCICFG_HA_INIT(1, HSX); } - else if(cpu_model == PCM::SKX) + break; + case PCM::SKX: { PCM_PCICFG_MC_INIT(0, 0, SKX) PCM_PCICFG_MC_INIT(0, 1, SKX) @@ -7494,7 +7515,8 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) PCM_PCICFG_M3UPI_INIT(2, SKX); } } - else if (cpu_model == PCM::ICX) + break; + case PCM::ICX: { PCM_PCICFG_QPI_INIT(0, ICX); PCM_PCICFG_QPI_INIT(1, ICX); @@ -7509,7 +7531,9 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) PCM_PCICFG_M2M_INIT(2, SERVER) PCM_PCICFG_M2M_INIT(3, SERVER) } - else if (cpu_model == PCM::SPR || cpu_model == PCM::EMR) + break; + case PCM::SPR: + case PCM::EMR: { PCM_PCICFG_QPI_INIT(0, SPR); PCM_PCICFG_QPI_INIT(1, SPR); @@ -7543,7 +7567,8 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) PCM_PCICFG_HBM_M2M_INIT(14, SERVER) PCM_PCICFG_HBM_M2M_INIT(15, SERVER) } - else if(cpu_model == PCM::KNL) + break; + case PCM::KNL: { // 2 DDR4 Memory Controllers with 3 channels each PCM_PCICFG_MC_INIT(0, 0, KNL) @@ -7563,10 +7588,9 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) PCM_PCICFG_EDC_INIT(6, ECLK, KNL) PCM_PCICFG_EDC_INIT(7, ECLK, KNL) } - else if ( - cpu_model == PCM::SRF - || cpu_model == PCM::GNR - ) + break; + case PCM::SRF: + case PCM::GNR: { PCM_PCICFG_QPI_INIT(0, BHS); PCM_PCICFG_QPI_INIT(1, BHS); @@ -7597,16 +7621,17 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) PCM_PCICFG_M3UPI_INIT(4, BHS); PCM_PCICFG_M3UPI_INIT(5, BHS); } - else if (cpu_model == PCM::SNOWRIDGE) + break; + case PCM::SNOWRIDGE: { PCM_PCICFG_M2M_INIT(0, SERVER) PCM_PCICFG_M2M_INIT(1, SERVER) PCM_PCICFG_M2M_INIT(2, SERVER) PCM_PCICFG_M2M_INIT(3, SERVER) } - else - { - std::cerr << "Error: Uncore PMU for processor with model id " << cpu_model << " is not supported.\n"; + break; + default: + std::cerr << "Error: Uncore PMU for processor with id 0x" << std::hex << cpu_family_model << std::dec << " is not supported.\n"; throw std::exception(); } @@ -7691,7 +7716,7 @@ void ServerUncorePMUs::initBuses(uint32 socket_, const PCM * pcm) return; #endif - if (PCM::hasUPI(cpu_model) && XPIRegisterLocation.size() > 0) + if (PCM::hasUPI(cpu_family_model) && XPIRegisterLocation.size() > 0) { initSocket2Bus(socket2UPIbus, XPIRegisterLocation[0].first, XPIRegisterLocation[0].second, UPI_DEV_IDS, (uint32)sizeof(UPI_DEV_IDS) / sizeof(UPI_DEV_IDS[0])); if(total_sockets_ == socket2UPIbus.size()) @@ -7737,7 +7762,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) for (auto & handle : imcHandles) { - if (cpu_model == PCM::KNL) { + if (cpu_family_model == PCM::KNL) { imcPMUs.push_back( UncorePMU( std::make_shared(handle, KNX_MC_CH_PCI_PMON_BOX_CTL_ADDR), @@ -7772,7 +7797,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) } } - auto populateM2MPMUs = [](uint32 groupnr, int32 M2Mbus, int32 cpu_model, const std::vector > & M2MRegisterLocation, UncorePMUVector & m2mPMUs) + auto populateM2MPMUs = [](uint32 groupnr, int32 M2Mbus, int32 cpu_family_model, const std::vector > & M2MRegisterLocation, UncorePMUVector & m2mPMUs) { std::vector > m2mHandles; @@ -7787,7 +7812,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) for (auto & handle : m2mHandles) { - switch (cpu_model) + switch (cpu_family_model) { case PCM::ICX: case PCM::SNOWRIDGE: @@ -7826,21 +7851,21 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) } } }; - populateM2MPMUs(groupnr, M2Mbus, cpu_model, M2MRegisterLocation, m2mPMUs); - populateM2MPMUs(groupnr, M2Mbus, cpu_model, HBM_M2MRegisterLocation, hbm_m2mPMUs); + populateM2MPMUs(groupnr, M2Mbus, cpu_family_model, M2MRegisterLocation, m2mPMUs); + populateM2MPMUs(groupnr, M2Mbus, cpu_family_model, HBM_M2MRegisterLocation, hbm_m2mPMUs); int numChannels = 0; if (safe_getenv("PCM_NO_IMC_DISCOVERY") == std::string("1")) { - if (cpu_model == PCM::SPR || cpu_model == PCM::EMR) + if (cpu_family_model == PCM::SPR || cpu_family_model == PCM::EMR) { numChannels = 3; } } - if (cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::ICX) + if (cpu_family_model == PCM::SNOWRIDGE || cpu_family_model == PCM::ICX) { numChannels = 2; - if (PCM::getCPUModelFromCPUID() == PCM::ICX_D) + if (PCM::getCPUFamilyModelFromCPUID() == PCM::ICX_D) { numChannels = 3; } @@ -7894,7 +7919,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) } else { - switch (cpu_model) + switch (cpu_family_model) { case PCM::SPR: case PCM::EMR: @@ -7976,7 +8001,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) } }; - switch (cpu_model) + switch (cpu_family_model) { case PCM::GNR: case PCM::SRF: @@ -7990,7 +8015,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) throw std::exception(); } - if (cpu_model == PCM::KNL) + if (cpu_family_model == PCM::KNL) { std::vector > edcHandles; @@ -8046,7 +8071,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) } for (auto& handle : m3upiHandles) { - switch (cpu_model) + switch (cpu_family_model) { case PCM::ICX: case PCM::SPR: @@ -8149,7 +8174,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) if (pcm->getNumSockets() <= 4 && safe_getenv("PCM_NO_UPILL_DISCOVERY") != std::string("1")) { - switch (cpu_model) + switch (cpu_family_model) { // don't use the discovery on SPR to work-around the issue // mentioned in https://lore.kernel.org/lkml/20221129191023.936738-1-kan.liang@linux.intel.com/T/ @@ -8197,7 +8222,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) if (xpiPMUs.empty()) for (auto & handle : qpiLLHandles) { - switch (cpu_model) + switch (cpu_family_model) { case PCM::SKX: xpiPMUs.push_back( @@ -8731,7 +8756,7 @@ void ServerUncorePMUs::programServerUncoreMemoryMetrics(const ServerUncoreMemory } return true; }; - switch(cpu_model) + switch(cpu_family_model) { case PCM::KNL: MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x03) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 0: CAS.RD @@ -8795,7 +8820,7 @@ void ServerUncorePMUs::programServerUncoreMemoryMetrics(const ServerUncoreMemory std::cerr << "PCM Error: invalid rankA value: " << rankA << "\n"; return; } - switch(cpu_model) + switch(cpu_family_model) { case PCM::IVYTOWN: MCCntConfig[EventPosition::READ_RANK_A] = MC_CH_PCI_PMON_CTL_EVENT((0xb0 + rankA)) + MC_CH_PCI_PMON_CTL_UMASK(0xff); // RD_CAS_RANK(rankA) all banks @@ -8825,7 +8850,7 @@ void ServerUncorePMUs::programServerUncoreMemoryMetrics(const ServerUncoreMemory EDCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x02) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 1: WPQ break; default: - std::cerr << "PCM Error: your processor " << pcm->getCPUBrandString() << " model " << cpu_model << " does not support the required performance events \n"; + std::cerr << "PCM Error: your processor " << pcm->getCPUBrandString() << " ID 0x" << std::hex << cpu_family_model << std::dec << " does not support the required performance events \n"; return; } } @@ -8843,7 +8868,7 @@ void ServerUncorePMUs::program() PCM * pcm = PCM::getInstance(); uint32 MCCntConfig[4] = {0, 0, 0, 0}; uint32 EDCCntConfig[4] = {0, 0, 0, 0}; - switch(cpu_model) + switch(cpu_family_model) { case PCM::KNL: MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x03) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 0: CAS_COUNT.RD @@ -8885,7 +8910,7 @@ void ServerUncorePMUs::program() programM2M(); uint32 event[4]; - if (PCM::hasUPI(cpu_model)) + if (PCM::hasUPI(cpu_family_model)) { // monitor TxL0_POWER_CYCLES event[0] = Q_P_PCI_PMON_CTL_EVENT(0x26); @@ -8913,7 +8938,7 @@ void ServerUncorePMUs::program() void ServerUncorePMUs::programXPI(const uint32 * event) { - const uint32 extra = PCM::hasUPI(cpu_model) ? UNC_PMON_UNIT_CTL_RSV : UNC_PMON_UNIT_CTL_FRZ_EN; + const uint32 extra = PCM::hasUPI(cpu_family_model) ? UNC_PMON_UNIT_CTL_RSV : UNC_PMON_UNIT_CTL_FRZ_EN; for (uint32 i = 0; i < (uint32)xpiPMUs.size(); ++i) { // QPI LL PMU @@ -8991,7 +9016,7 @@ uint64 ServerUncorePMUs::getImcReadsForChannels(uint32 beginChannel, uint32 endC for (uint32 i = beginChannel; i < endChannel && i < imcPMUs.size(); ++i) { result += getMCCounter(i, EventPosition::READ); - switch (cpu_model) + switch (cpu_family_model) { case PCM::GNR: case PCM::SRF: @@ -9008,7 +9033,7 @@ uint64 ServerUncorePMUs::getImcWrites() for (uint32 i = 0; i < (uint32)imcPMUs.size(); ++i) { result += getMCCounter(i, EventPosition::WRITE); - switch (cpu_model) + switch (cpu_family_model) { case PCM::GNR: case PCM::SRF: @@ -9095,7 +9120,7 @@ uint64 ServerUncorePMUs::getIncomingDataFlits(uint32 port) if (port >= (uint32)xpiPMUs.size()) return 0; - if (PCM::hasUPI(cpu_model) == false) + if (PCM::hasUPI(cpu_family_model) == false) { drs = *xpiPMUs[port].counterValue[0]; } @@ -9111,7 +9136,7 @@ uint64 ServerUncorePMUs::getOutgoingFlits(uint32 port) uint64 ServerUncorePMUs::getUPIL0TxCycles(uint32 port) { - if (PCM::hasUPI(cpu_model)) + if (PCM::hasUPI(cpu_family_model)) return getQPILLCounter(port,0); return 0; } @@ -9119,15 +9144,15 @@ uint64 ServerUncorePMUs::getUPIL0TxCycles(uint32 port) void ServerUncorePMUs::program_power_metrics(int mc_profile) { uint32 xPIEvents[4] = { 0,0,0,0 }; - xPIEvents[ServerUncoreCounterState::EventPosition::xPI_TxL0P_POWER_CYCLES] = (uint32)Q_P_PCI_PMON_CTL_EVENT((PCM::hasUPI(cpu_model) ? 0x27 : 0x0D)); // L0p Tx Cycles (TxL0P_POWER_CYCLES) - xPIEvents[ServerUncoreCounterState::EventPosition::xPI_L1_POWER_CYCLES] = (uint32)Q_P_PCI_PMON_CTL_EVENT((PCM::hasUPI(cpu_model) ? 0x21 : 0x12)); // L1 Cycles (L1_POWER_CYCLES) - xPIEvents[ServerUncoreCounterState::EventPosition::xPI_CLOCKTICKS] = (uint32)Q_P_PCI_PMON_CTL_EVENT((PCM::hasUPI(cpu_model) ? 0x01 : 0x14)); // QPI/UPI clocks (CLOCKTICKS) + xPIEvents[ServerUncoreCounterState::EventPosition::xPI_TxL0P_POWER_CYCLES] = (uint32)Q_P_PCI_PMON_CTL_EVENT((PCM::hasUPI(cpu_family_model) ? 0x27 : 0x0D)); // L0p Tx Cycles (TxL0P_POWER_CYCLES) + xPIEvents[ServerUncoreCounterState::EventPosition::xPI_L1_POWER_CYCLES] = (uint32)Q_P_PCI_PMON_CTL_EVENT((PCM::hasUPI(cpu_family_model) ? 0x21 : 0x12)); // L1 Cycles (L1_POWER_CYCLES) + xPIEvents[ServerUncoreCounterState::EventPosition::xPI_CLOCKTICKS] = (uint32)Q_P_PCI_PMON_CTL_EVENT((PCM::hasUPI(cpu_family_model) ? 0x01 : 0x14)); // QPI/UPI clocks (CLOCKTICKS) programXPI(xPIEvents); uint32 MCCntConfig[4] = {0,0,0,0}; unsigned int UNC_M_POWER_CKE_CYCLES = 0x83; - switch (cpu_model) + switch (cpu_family_model) { case PCM::ICX: case PCM::SNOWRIDGE: @@ -9140,7 +9165,7 @@ void ServerUncorePMUs::program_power_metrics(int mc_profile) break; } unsigned int UNC_M_POWER_CHANNEL_PPD_CYCLES = 0x85; - switch (cpu_model) + switch (cpu_family_model) { case PCM::SRF: case PCM::GNR: @@ -9149,7 +9174,7 @@ void ServerUncorePMUs::program_power_metrics(int mc_profile) break; } unsigned int UNC_M_SELF_REFRESH_ENTER_SUCCESS_CYCLES_UMASK = 0; - switch (cpu_model) + switch (cpu_family_model) { case PCM::SRF: case PCM::GNR: @@ -9205,7 +9230,7 @@ void enableAndResetMCFixedCounter(UncorePMU& pmu) void ServerUncorePMUs::programIMC(const uint32 * MCCntConfig) { - const uint32 extraIMC = (cpu_model == PCM::SKX)?UNC_PMON_UNIT_CTL_RSV:UNC_PMON_UNIT_CTL_FRZ_EN; + const uint32 extraIMC = (cpu_family_model == PCM::SKX)?UNC_PMON_UNIT_CTL_RSV:UNC_PMON_UNIT_CTL_FRZ_EN; for (uint32 i = 0; i < (uint32)imcPMUs.size(); ++i) { @@ -9225,7 +9250,7 @@ void ServerUncorePMUs::programEDC(const uint32 * EDCCntConfig) edcPMUs[i].initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); // HBM clocks enabled by default - if (cpu_model == PCM::KNL) + if (cpu_family_model == PCM::KNL) { *edcPMUs[i].fixedCounterControl = EDC_CH_PCI_PMON_FIXED_CTL_EN; } @@ -9241,7 +9266,7 @@ void ServerUncorePMUs::programEDC(const uint32 * EDCCntConfig) void ServerUncorePMUs::programM2M() { uint64 cfg[4] = {0, 0, 0, 0}; - switch (cpu_model) + switch (cpu_family_model) { case PCM::SPR: case PCM::EMR: @@ -9348,7 +9373,7 @@ void ServerUncorePMUs::freezeCounters() { for (auto& pmu : *pmuVector) { - pmu.freeze((cpu_model == PCM::SKX) ? UNC_PMON_UNIT_CTL_RSV : UNC_PMON_UNIT_CTL_FRZ_EN); + pmu.freeze((cpu_family_model == PCM::SKX) ? UNC_PMON_UNIT_CTL_RSV : UNC_PMON_UNIT_CTL_FRZ_EN); } } } @@ -9359,7 +9384,7 @@ void ServerUncorePMUs::unfreezeCounters() { for (auto& pmu : *pmuVector) { - pmu.unfreeze((cpu_model == PCM::SKX) ? UNC_PMON_UNIT_CTL_RSV : UNC_PMON_UNIT_CTL_FRZ_EN); + pmu.unfreeze((cpu_family_model == PCM::SKX) ? UNC_PMON_UNIT_CTL_RSV : UNC_PMON_UNIT_CTL_FRZ_EN); } } } @@ -10024,7 +10049,7 @@ void PCM::programIIOCounters(uint64 rawEvents[4], int IIOStack) if (IIOStack == -1) { int stacks_count; - switch (getCPUModel()) + switch (getCPUFamilyModel()) { case PCM::GNR: case PCM::SRF: @@ -10433,7 +10458,7 @@ bool PCM::supportIDXAccelDev() const { bool retval = false; - switch (this->getCPUModel()) + switch (this->getCPUFamilyModel()) { case PCM::SPR: case PCM::EMR: @@ -10619,7 +10644,7 @@ UncorePMU::UncorePMU(const HWRegisterPtr& unitControl_, const HWRegisterPtr& filter0, const HWRegisterPtr& filter1 ) : - cpu_model_(0), + cpu_family_model_(0), unitControl(unitControl_), counterControl{ counterControl0, counterControl1, counterControl2, counterControl3 }, counterValue{ counterValue0, counterValue1, counterValue2, counterValue3 }, @@ -10638,7 +10663,7 @@ UncorePMU::UncorePMU(const HWRegisterPtr& unitControl_, const HWRegisterPtr& filter0, const HWRegisterPtr& filter1 ): - cpu_model_(0), + cpu_family_model_(0), unitControl(unitControl_), counterControl{counterControl_}, counterValue{counterValue_}, @@ -10649,13 +10674,13 @@ UncorePMU::UncorePMU(const HWRegisterPtr& unitControl_, assert(counterControl.size() == counterValue.size()); } -uint32 UncorePMU::getCPUModel() +uint32 UncorePMU::getCPUFamilyModel() { - if (cpu_model_ == 0) + if (cpu_family_model_ == 0) { - cpu_model_ = PCM::getInstance()->getCPUModel(); + cpu_family_model_ = PCM::getInstance()->getCPUFamilyModel(); } - return cpu_model_; + return cpu_family_model_; } void UncorePMU::cleanup() @@ -10670,7 +10695,7 @@ void UncorePMU::cleanup() void UncorePMU::freeze(const uint32 extra) { - switch (getCPUModel()) + switch (getCPUFamilyModel()) { case PCM::SPR: case PCM::EMR: @@ -10685,7 +10710,7 @@ void UncorePMU::freeze(const uint32 extra) void UncorePMU::unfreeze(const uint32 extra) { - switch (getCPUModel()) + switch (getCPUFamilyModel()) { case PCM::SPR: case PCM::EMR: @@ -10705,7 +10730,7 @@ bool UncorePMU::initFreeze(const uint32 extra, const char* xPICheckMsg) return true; // this PMU does not have unit control register => no op } - switch (getCPUModel()) + switch (getCPUFamilyModel()) { case PCM::SPR: case PCM::EMR: @@ -10744,7 +10769,7 @@ bool UncorePMU::initFreeze(const uint32 extra, const char* xPICheckMsg) void UncorePMU::resetUnfreeze(const uint32 extra) { - switch (getCPUModel()) + switch (getCPUFamilyModel()) { case PCM::SPR: case PCM::EMR: @@ -10775,7 +10800,7 @@ IDX_PMU::IDX_PMU(const bool perfMode_, const std::vector & counterFilterPGSZ, const std::vector & counterFilterXFERSZ ) : - cpu_model_(0), + cpu_family_model_(0), perf_mode_(perfMode_), numa_node_(numaNode_), socket_id_(socketId_), @@ -10793,14 +10818,14 @@ IDX_PMU::IDX_PMU(const bool perfMode_, assert(counterControl.size() == counterValue.size()); } -uint32 IDX_PMU::getCPUModel() +uint32 IDX_PMU::getCPUFamilyModel() { - if (cpu_model_ == 0) + if (cpu_family_model_ == 0) { - cpu_model_ = PCM::getInstance()->getCPUModel(); + cpu_family_model_ = PCM::getInstance()->getCPUFamilyModel(); } - return cpu_model_; + return cpu_family_model_; } void IDX_PMU::cleanup() @@ -10892,7 +10917,7 @@ void PCM::getIIOCounterStates(int socket, int IIOStack, IIOCounterState * result void PCM::setupCustomCoreEventsForNuma(PCM::ExtendedCustomCoreEventDescription& conf) const { - switch (this->getCPUModel()) + switch (this->getCPUFamilyModel()) { case PCM::WESTMERE_EX: // OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_DRAM: Offcore requests satisfied by the local DRAM diff --git a/src/cpucounters.h b/src/cpucounters.h index 48af0df3..eadbeb1a 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -244,8 +244,8 @@ class CounterWidthExtenderRegister : public HWRegister class UncorePMU { typedef std::shared_ptr HWRegisterPtr; - uint32 cpu_model_; - uint32 getCPUModel(); + uint32 cpu_family_model_; + uint32 getCPUFamilyModel(); HWRegisterPtr unitControl; public: std::vector counterControl; @@ -279,7 +279,7 @@ class UncorePMU const HWRegisterPtr& filter0 = HWRegisterPtr(), const HWRegisterPtr& filter1 = HWRegisterPtr() ); - UncorePMU() : cpu_model_(0U) {} + UncorePMU() : cpu_family_model_(0U) {} size_t size() const { return counterControl.size(); } virtual ~UncorePMU() {} bool valid() const @@ -298,8 +298,8 @@ typedef std::shared_ptr UncorePMURef; class IDX_PMU { typedef std::shared_ptr HWRegisterPtr; - uint32 cpu_model_; - uint32 getCPUModel(); + uint32 cpu_family_model_; + uint32 getCPUFamilyModel(); bool perf_mode_; uint32 numa_node_; uint32 socket_id_; @@ -330,7 +330,7 @@ class IDX_PMU const std::vector & counterFilterXFERSZ ); - IDX_PMU() : cpu_model_(0U), perf_mode_(false), numa_node_(0), socket_id_(0) {} + IDX_PMU() : cpu_family_model_(0U), perf_mode_(false), numa_node_(0), socket_id_(0) {} size_t size() const { return counterControl.size(); } virtual ~IDX_PMU() {} bool valid() const @@ -361,7 +361,7 @@ class ServerUncorePMUs friend class PCM; int32 iMCbus,UPIbus,M2Mbus; uint32 groupnr; - int32 cpu_model; + int32 cpu_family_model; typedef std::vector UncorePMUVector; UncorePMUVector imcPMUs; UncorePMUVector edcPMUs; @@ -1238,13 +1238,13 @@ class PCM_API PCM { if (!eventsBegin) return; Iterator curEvent = eventsBegin; - const auto cpu_model = PCM::getInstance()->getCPUModel(); + const auto cpu_family_model = PCM::getInstance()->getCPUFamilyModel(); for (int c = 0; curEvent != eventsEnd && size_t(c) < pmu.size(); ++c, ++curEvent) { auto ctrl = pmu.counterControl[c]; if (ctrl.get() != nullptr) { - switch (cpu_model) + switch (cpu_family_model) { case SPR: case EMR: @@ -1833,6 +1833,11 @@ class PCM_API PCM static int getCPUModelFromCPUID(); */ + /*! \brief Returns cpu family and model id number from cpuid instruction + * \return cpu family and model id number (model id is in the lower 8 bits, family id is in the next 8 bits) + */ + static int getCPUFamilyModelFromCPUID(); + #define PCM_CPU_FAMILY_MODEL(family_, model_) (((family_) << 8) + (model_)) //! \brief Identifiers of supported CPU models @@ -1930,6 +1935,10 @@ class PCM_API PCM uint32 getCPUModel() const { return (uint32)cpu_model; } */ + //! \brief Reads CPU family and model id + //! \return CPU family and model ID (lowest 8 bits is the model, next 8 bits is the family) + uint32 getCPUFamilyModel() const { return PCM_CPU_FAMILY_MODEL((uint32)cpu_family, (uint32)cpu_model); } + //! \brief Reads CPU stepping id //! \return CPU stepping ID uint32 getCPUStepping() const { return (uint32)cpu_stepping; } @@ -2700,15 +2709,15 @@ class PCM_API PCM ); } - static bool hasUPI(const int32 cpu_model_) // Intel(r) Ultra Path Interconnect + static bool hasUPI(const int32 cpu_family_model_) // Intel(r) Ultra Path Interconnect { return ( - cpu_model_ == PCM::SKX - || cpu_model_ == PCM::ICX - || cpu_model_ == PCM::SPR - || cpu_model_ == PCM::EMR - || cpu_model_ == PCM::GNR - || cpu_model_ == PCM::SRF + cpu_family_model_ == PCM::SKX + || cpu_family_model_ == PCM::ICX + || cpu_family_model_ == PCM::SPR + || cpu_family_model_ == PCM::EMR + || cpu_family_model_ == PCM::GNR + || cpu_family_model_ == PCM::SRF ); } @@ -3411,16 +3420,16 @@ double getDRAMConsumedJoules(const CounterStateType & before, const CounterState PCM * m = PCM::getInstance(); if (!m) return -1.; double dram_joules_per_energy_unit = 0.; - const auto cpu_model = m->getCPUModel(); - - if (PCM::HASWELLX == cpu_model - || PCM::BDX_DE == cpu_model - || PCM::BDX == cpu_model - || PCM::SKX == cpu_model - || PCM::ICX == cpu_model - || PCM::GNR == cpu_model - || PCM::SRF == cpu_model - || PCM::KNL == cpu_model + const auto cpu_family_model = m->getCPUFamilyModel(); + + if (PCM::HASWELLX == cpu_family_model + || PCM::BDX_DE == cpu_family_model + || PCM::BDX == cpu_family_model + || PCM::SKX == cpu_family_model + || PCM::ICX == cpu_family_model + || PCM::GNR == cpu_family_model + || PCM::SRF == cpu_family_model + || PCM::KNL == cpu_family_model ) { /* as described in sections 5.3.2 (DRAM_POWER_INFO) and 5.3.3 (DRAM_ENERGY_STATUS) of * Volume 2 (Registers) of @@ -4259,18 +4268,18 @@ uint64 getL2CacheMisses(const CounterStateType & before, const CounterStateType { auto pcm = PCM::getInstance(); if (pcm->isL2CacheMissesAvailable() == false) return 0ULL; - const auto cpu_model = pcm->getCPUModel(); + const auto cpu_family_model = pcm->getCPUFamilyModel(); if (pcm->useSkylakeEvents() - || cpu_model == PCM::SNOWRIDGE - || cpu_model == PCM::SRF - || cpu_model == PCM::ADL - || cpu_model == PCM::RPL - || cpu_model == PCM::MTL - || cpu_model == PCM::LNL + || cpu_family_model == PCM::SNOWRIDGE + || cpu_family_model == PCM::SRF + || cpu_family_model == PCM::ADL + || cpu_family_model == PCM::RPL + || cpu_family_model == PCM::MTL + || cpu_family_model == PCM::LNL ) { return after.Event[BasicCounterState::SKLL2MissPos] - before.Event[BasicCounterState::SKLL2MissPos]; } - else if (pcm->isAtom() || cpu_model == PCM::KNL) + else if (pcm->isAtom() || cpu_family_model == PCM::KNL) { return after.Event[BasicCounterState::ArchLLCMissPos] - before.Event[BasicCounterState::ArchLLCMissPos]; } @@ -4296,7 +4305,7 @@ uint64 getL2CacheHits(const CounterStateType & before, const CounterStateType & { auto pcm = PCM::getInstance(); if (pcm->isL2CacheHitsAvailable() == false) return 0ULL; - if (pcm->isAtom() || pcm->getCPUModel() == PCM::KNL) + if (pcm->isAtom() || pcm->getCPUFamilyModel() == PCM::KNL) { uint64 L2Miss = after.Event[BasicCounterState::ArchLLCMissPos] - before.Event[BasicCounterState::ArchLLCMissPos]; uint64 L2Ref = after.Event[BasicCounterState::ArchLLCRefPos] - before.Event[BasicCounterState::ArchLLCRefPos]; @@ -4372,13 +4381,13 @@ uint64 getL3CacheHitsSnoop(const CounterStateType & before, const CounterStateTy { auto pcm = PCM::getInstance(); if (!pcm->isL3CacheHitsSnoopAvailable()) return 0; - const auto cpu_model = pcm->getCPUModel(); - if (cpu_model == PCM::SNOWRIDGE - || cpu_model == PCM::SRF - || cpu_model == PCM::ADL - || cpu_model == PCM::RPL - || cpu_model == PCM::MTL - || cpu_model == PCM::LNL + const auto cpu_family_model = pcm->getCPUFamilyModel(); + if (cpu_family_model == PCM::SNOWRIDGE + || cpu_family_model == PCM::SRF + || cpu_family_model == PCM::ADL + || cpu_family_model == PCM::RPL + || cpu_family_model == PCM::MTL + || cpu_family_model == PCM::LNL ) { const int64 misses = getL3CacheMisses(before, after); diff --git a/src/pcm.cpp b/src/pcm.cpp index 60ed261a..ba14b620 100644 --- a/src/pcm.cpp +++ b/src/pcm.cpp @@ -165,7 +165,7 @@ void print_output(PCM * m, const std::bitset & ycores, const SystemCounterState& sstate1, const SystemCounterState& sstate2, - const int cpu_model, + const int cpu_family_model, const bool show_core_output, const bool show_partial_core_output, const bool show_socket_output, @@ -199,7 +199,7 @@ void print_output(PCM * m, cout << " L3MISS: L3 (read) cache misses \n"; if (m->isL2CacheHitsAvailable()) { - if (m->isAtom() || cpu_model == PCM::KNL) + if (m->isAtom() || cpu_family_model == PCM::KNL) cout << " L2MISS: L2 (read) cache misses \n"; else cout << " L2MISS: L2 (read) cache misses (including other core's L2 cache *hits*) \n"; @@ -235,7 +235,7 @@ void print_output(PCM * m, const char * longDiv = "---------------------------------------------------------------------------------------------------------------\n"; cout.precision(2); cout << std::fixed; - if (cpu_model == PCM::KNL) + if (cpu_family_model == PCM::KNL) cout << " Proc Tile Core Thread |"; else cout << " Core (SKT) |"; @@ -288,7 +288,7 @@ void print_output(PCM * m, if (m->isCoreOnline(i) == false || (show_partial_core_output && ycores.test(i) == false)) continue; - if (cpu_model == PCM::KNL) + if (cpu_family_model == PCM::KNL) cout << setfill(' ') << internal << setw(5) << i << setw(5) << m->getTileId(i) << setw(5) << m->getCoreId(i) << setw(7) << m->getThreadId(i); @@ -302,7 +302,7 @@ void print_output(PCM * m, } if (show_socket_output) { - if (!(m->getNumSockets() == 1 && (m->isAtom() || cpu_model == PCM::KNL))) + if (!(m->getNumSockets() == 1 && (m->isAtom() || cpu_family_model == PCM::KNL))) { cout << longDiv; for (uint32 i = 0; i < m->getNumSockets(); ++i) @@ -318,7 +318,7 @@ void print_output(PCM * m, if (show_system_output) { - if (cpu_model == PCM::KNL) + if (cpu_family_model == PCM::KNL) cout << setw(22) << left << " TOTAL" << internal << setw(7-5); else cout << " TOTAL *"; @@ -714,7 +714,6 @@ void print_basic_metrics_csv_semicolons(const PCM * m, const string & header) void print_csv_header(PCM * m, const std::bitset & ycores, - const int /*cpu_model*/, const bool show_core_output, const bool show_partial_core_output, const bool show_socket_output, @@ -1137,7 +1136,6 @@ void print_csv(PCM * m, const std::bitset & ycores, const SystemCounterState& sstate1, const SystemCounterState& sstate2, - const int /*cpu_model*/, const bool show_core_output, const bool show_partial_core_output, const bool show_socket_output, @@ -1579,7 +1577,7 @@ int mainThrows(int argc, char * argv[]) std::vector cstates1, cstates2; std::vector sktstate1, sktstate2; SystemCounterState sstate1, sstate2; - const auto cpu_model = m->getCPUModel(); + const auto cpu_family_model = m->getCPUFamilyModel(); print_pid_collection_message(pid); @@ -1596,7 +1594,7 @@ int mainThrows(int argc, char * argv[]) // cerr << "DEBUG: Delay: " << delay << " seconds. Blocked: " << m->isBlocked() << "\n"; if (csv_output) { - print_csv_header(m, ycores, cpu_model, show_core_output, show_partial_core_output, show_socket_output, show_system_output); + print_csv_header(m, ycores, show_core_output, show_partial_core_output, show_socket_output, show_system_output); } m->getAllCounterStates(sstate1, sktstate1, cstates1); @@ -1615,10 +1613,10 @@ int mainThrows(int argc, char * argv[]) if (csv_output) print_csv(m, cstates1, cstates2, sktstate1, sktstate2, ycores, sstate1, sstate2, - cpu_model, show_core_output, show_partial_core_output, show_socket_output, show_system_output); + show_core_output, show_partial_core_output, show_socket_output, show_system_output); else print_output(m, cstates1, cstates2, sktstate1, sktstate2, ycores, sstate1, sstate2, - cpu_model, show_core_output, show_partial_core_output, show_socket_output, show_system_output, + cpu_family_model, show_core_output, show_partial_core_output, show_socket_output, show_system_output, metricVersion); std::swap(sstate1, sstate2); From c358b68c8c2c3ed10de916db59cfb943d1f30ae4 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 24 Sep 2024 13:15:03 +0200 Subject: [PATCH 18/23] transition to family-model (part 2) Change-Id: I0f46903af007c99b16225901c29ea5641378ede6 --- src/cpucounters.cpp | 297 ++++++++++++++++++---------------- src/cpucounters.h | 387 ++++++++++++++++++++++---------------------- 2 files changed, 348 insertions(+), 336 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 07a6dc1a..dec9bb56 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -433,7 +433,8 @@ bool PCM::detectModel() pcm_cpuid(1, cpuinfo); cpu_family = (((cpuinfo.array[0]) >> 8) & 0xf) | ((cpuinfo.array[0] & 0xf00000) >> 16); - cpu_model = (((cpuinfo.array[0]) & 0xf0) >> 4) | ((cpuinfo.array[0] & 0xf0000) >> 12); + cpu_model_private = (((cpuinfo.array[0]) & 0xf0) >> 4) | ((cpuinfo.array[0] & 0xf0000) >> 12); + cpu_family_model = PCM_CPU_FAMILY_MODEL(cpu_family, cpu_model_private); cpu_stepping = cpuinfo.array[0] & 0x0f; if (cpuinfo.reg.ecx & (1UL << 31UL)) { @@ -496,7 +497,8 @@ bool PCM::detectModel() std::cerr << "STIBP supported : " << ((cpuinfo.reg.edx & (1 << 27)) ? "yes" : "no") << "\n"; std::cerr << "Spec arch caps supported : " << ((cpuinfo.reg.edx & (1 << 29)) ? "yes" : "no") << "\n"; std::cerr << "Max CPUID level : " << max_cpuid << "\n"; - std::cerr << "CPU model number : " << cpu_model << "\n"; + std::cerr << "CPU family : " << cpu_family << "\n"; + std::cerr << "CPU model number : " << cpu_model_private << "\n"; return true; } @@ -574,7 +576,7 @@ bool isMBMEnforced() bool PCM::CoreLocalMemoryBWMetricAvailable() const { - if (isMBMEnforced() == false && cpu_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata + if (isMBMEnforced() == false && cpu_family_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata PCM_CPUID_INFO cpuinfo; if (!(QOSMetricAvailable() && L3QOSMetricAvailable())) return false; @@ -584,7 +586,7 @@ bool PCM::CoreLocalMemoryBWMetricAvailable() const bool PCM::CoreRemoteMemoryBWMetricAvailable() const { - if (isMBMEnforced() == false && cpu_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata + if (isMBMEnforced() == false && cpu_family_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata PCM_CPUID_INFO cpuinfo; if (!(QOSMetricAvailable() && L3QOSMetricAvailable())) return false; @@ -707,7 +709,7 @@ void PCM::initCStateSupportTables() } // fill package C state array - switch(cpu_model) + switch(cpu_family_model) { case ATOM: case ATOM_2: @@ -762,7 +764,7 @@ void PCM::initCStateSupportTables() }; // fill core C state array - switch(cpu_model) + switch(cpu_family_model) { case ATOM: case ATOM_2: @@ -1632,32 +1634,32 @@ bool PCM::detectNominalFrequency() uint64 freq = 0; MSR[socketRefCore[0]]->read(PLATFORM_INFO_ADDR, &freq); const uint64 bus_freq = ( - cpu_model == SANDY_BRIDGE - || cpu_model == JAKETOWN - || cpu_model == IVYTOWN - || cpu_model == HASWELLX - || cpu_model == BDX_DE - || cpu_model == BDX - || cpu_model == IVY_BRIDGE - || cpu_model == HASWELL - || cpu_model == BROADWELL - || cpu_model == AVOTON - || cpu_model == APOLLO_LAKE - || cpu_model == GEMINI_LAKE - || cpu_model == DENVERTON + cpu_family_model == SANDY_BRIDGE + || cpu_family_model == JAKETOWN + || cpu_family_model == IVYTOWN + || cpu_family_model == HASWELLX + || cpu_family_model == BDX_DE + || cpu_family_model == BDX + || cpu_family_model == IVY_BRIDGE + || cpu_family_model == HASWELL + || cpu_family_model == BROADWELL + || cpu_family_model == AVOTON + || cpu_family_model == APOLLO_LAKE + || cpu_family_model == GEMINI_LAKE + || cpu_family_model == DENVERTON || useSKLPath() - || cpu_model == SNOWRIDGE - || cpu_model == KNL - || cpu_model == ADL - || cpu_model == RPL - || cpu_model == MTL - || cpu_model == LNL - || cpu_model == SKX - || cpu_model == ICX - || cpu_model == SPR - || cpu_model == EMR - || cpu_model == GNR - || cpu_model == SRF + || cpu_family_model == SNOWRIDGE + || cpu_family_model == KNL + || cpu_family_model == ADL + || cpu_family_model == RPL + || cpu_family_model == MTL + || cpu_family_model == LNL + || cpu_family_model == SKX + || cpu_family_model == ICX + || cpu_family_model == SPR + || cpu_family_model == EMR + || cpu_family_model == GNR + || cpu_family_model == SRF ) ? (100000000ULL) : (133333333ULL); nominal_frequency = ((freq >> 8) & 255) * bus_freq; @@ -1693,7 +1695,7 @@ void PCM::initEnergyMonitoring() uint64 rapl_power_unit = 0; MSR[socketRefCore[0]]->read(MSR_RAPL_POWER_UNIT,&rapl_power_unit); uint64 energy_status_unit = extract_bits(rapl_power_unit,8,12); - if (cpu_model == PCM::CHERRYTRAIL || cpu_model == PCM::BAYTRAIL) + if (cpu_family_model == PCM::CHERRYTRAIL || cpu_family_model == PCM::BAYTRAIL) joulesPerEnergyUnit = double(1ULL << energy_status_unit)/1000000.; // (2)^energy_status_unit microJoules else joulesPerEnergyUnit = 1./double(1ULL<nGPCounters; } - if(cpu_model == JAKETOWN) + if(cpu_family_model == JAKETOWN) { bool enableWA = false; for(uint32 i = 0; i< core_gen_counter_num_used; ++i) @@ -3833,7 +3836,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter { serverUncorePMUs[i]->program(); qpi_speeds.push_back(std::async(std::launch::async, - &ServerUncorePMUs::computeQPISpeed, serverUncorePMUs[i].get(), socketRefCore[i], cpu_model)); + &ServerUncorePMUs::computeQPISpeed, serverUncorePMUs[i].get(), socketRefCore[i], cpu_family_model)); } for (size_t i = 0; i < (size_t)serverUncorePMUs.size(); ++i) { @@ -3843,7 +3846,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter programCbo(); } // program uncore counters on old CPU arch - else if (cpu_model == NEHALEM_EP || cpu_model == WESTMERE_EP || cpu_model == CLARKDALE) + else if (cpu_family_model == NEHALEM_EP || cpu_family_model == WESTMERE_EP || cpu_family_model == CLARKDALE) { for (int i = 0; i < (int)num_cores; ++i) { @@ -4178,7 +4181,7 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */, MSR[i]->write(PERF_METRICS_ADDR, 0); } - if (isAtom() || cpu_model == KNL) // KNL and Atom have 3 fixed + only 2 programmable counters + if (isAtom() || cpu_family_model == KNL) // KNL and Atom have 3 fixed + only 2 programmable counters value = (1ULL << 0) + (1ULL << 1) + (1ULL << 32) + (1ULL << 33) + (1ULL << 34); for (uint32 j = 0; j < core_gen_counter_num_used; ++j) @@ -4379,7 +4382,7 @@ void PCM::programBecktonUncore(int32 core) BecktonUncorePMUZDPCTLFVCRegister FVCreg; FVCreg.value = 0; - if (cpu_model == NEHALEM_EX) + if (cpu_family_model == NEHALEM_EX) { FVCreg.fields.bcmd = 0; // rd_bcmd FVCreg.fields.resp = 0; // ack_resp @@ -4500,14 +4503,14 @@ std::string PCM::getCPUBrandString() std::string PCM::getCPUFamilyModelString() { - return getCPUFamilyModelString(cpu_family, cpu_model, cpu_stepping); + return getCPUFamilyModelString(cpu_family, cpu_family_model, cpu_stepping); } -std::string PCM::getCPUFamilyModelString(const uint32 cpu_family_, const uint32 cpu_model_, const uint32 cpu_stepping_) +std::string PCM::getCPUFamilyModelString(const uint32 cpu_family_, const uint32 cpu_family_model_, const uint32 cpu_stepping_) { char buffer[sizeof(int)*4*3+6]; std::fill(buffer, buffer + sizeof(buffer), 0); - std::snprintf(buffer,sizeof(buffer),"GenuineIntel-%d-%2X-%X", cpu_family_, cpu_model_, cpu_stepping_); + std::snprintf(buffer,sizeof(buffer),"GenuineIntel-%d-%2X-%X", cpu_family_, cpu_family_model_, cpu_stepping_); std::string result(buffer); return result; } @@ -4797,13 +4800,13 @@ bool PCM::PMUinUse() return false; } -const char * PCM::getUArchCodename(const int32 cpu_model_param) const +const char * PCM::getUArchCodename(const int32 cpu_family_model_param) const { - auto cpu_model_ = cpu_model_param; - if(cpu_model_ < 0) - cpu_model_ = this->cpu_model ; + auto cpu_family_model_ = cpu_family_model_param; + if(cpu_family_model_ < 0) + cpu_family_model_ = this->cpu_family_model; - switch(cpu_model_) + switch(cpu_family_model_) { case CENTERTON: return "Centerton"; @@ -4879,7 +4882,7 @@ const char * PCM::getUArchCodename(const int32 cpu_model_param) const case LNL: return "Lunar Lake"; case SKX: - if (cpu_model_param >= 0) + if (cpu_family_model_param >= 0) { // query for specified cpu_model_param, stepping not provided return "Skylake-SP, Cascade Lake-SP"; @@ -4964,7 +4967,7 @@ void PCM::cleanupPMU(const bool silent) } cleanupPEBS = false; - if(cpu_model == JAKETOWN) + if(cpu_family_model == JAKETOWN) enableJKTWorkaround(false); #ifndef PCM_SILENT @@ -5579,7 +5582,7 @@ PCM::ErrorCode PCM::programServerUncoreLatencyMetrics(bool enable_pmm) if (enable_pmm == false) { //DDR is false - if (ICX == cpu_model || SPR == cpu_model || EMR == cpu_model) + if (ICX == cpu_family_model || SPR == cpu_family_model || EMR == cpu_family_model) { DDRConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(0x80) + MC_CH_PCI_PMON_CTL_UMASK(1); // DRAM RPQ occupancy DDRConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(0x10) + MC_CH_PCI_PMON_CTL_UMASK(1); // DRAM RPQ Insert @@ -5630,7 +5633,13 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof uint32 PCUCntConf[4] = {0,0,0,0}; - switch (cpu_model) + auto printError = [this](const char * eventCategory) + { + assert(eventCategory); + std::cerr << "ERROR: no " << eventCategory << " events defined for CPU family " << cpu_family << " model " << cpu_model_private << "\n"; + }; + + switch (cpu_family_model) { case SPR: case EMR: @@ -5651,7 +5660,7 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0xD); // FREQ_BAND2_CYCLES break; case 1: - switch (cpu_model) + switch (cpu_family_model) { case SPR: case EMR: @@ -5684,57 +5693,57 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF,GNR) break; case 5: - if(JAKETOWN == cpu_model) + if (JAKETOWN == cpu_family_model) { PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0) + PCU_MSR_PMON_CTL_EXTRA_SEL + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0) + PCU_MSR_PMON_CTL_EXTRA_SEL ; // cycles spent changing frequency - } else if (IVYTOWN == cpu_model ) + } else if (IVYTOWN == cpu_family_model) { PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x60) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x60) ; // cycles spent changing frequency: FREQ_TRANS_CYCLES } else if ( - HASWELLX == cpu_model - || BDX_DE == cpu_model - || BDX == cpu_model - || SKX == cpu_model - || ICX == cpu_model - || SNOWRIDGE == cpu_model - || SPR == cpu_model - || EMR == cpu_model - || SRF == cpu_model - || GNR == cpu_model - || GNR_D == cpu_model + HASWELLX == cpu_family_model + || BDX_DE == cpu_family_model + || BDX == cpu_family_model + || SKX == cpu_family_model + || ICX == cpu_family_model + || SNOWRIDGE == cpu_family_model + || SPR == cpu_family_model + || EMR == cpu_family_model + || SRF == cpu_family_model + || GNR == cpu_family_model + || GNR_D == cpu_family_model ) { PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x74) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x74) ; // cycles spent changing frequency: FREQ_TRANS_CYCLES - if(HASWELLX == cpu_model) + if(HASWELLX == cpu_family_model) { PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x79) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of UFS transitions PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x79) ; // UFS transition cycles } } else { - std::cerr << "ERROR: no frequency transition events defined for CPU model " << cpu_model << "\n"; + printError("frequency transition"); } break; case 6: - if (IVYTOWN == cpu_model ) + if (IVYTOWN == cpu_family_model) { PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x2B) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC2 transitions PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x2D) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC6 transitions } else if ( - HASWELLX == cpu_model - || BDX_DE == cpu_model - || BDX == cpu_model - || SKX == cpu_model - || ICX == cpu_model - || SNOWRIDGE == cpu_model - || SPR == cpu_model - || EMR == cpu_model - || SRF == cpu_model - || GNR == cpu_model - || GNR_D == cpu_model + HASWELLX == cpu_family_model + || BDX_DE == cpu_family_model + || BDX == cpu_family_model + || SKX == cpu_family_model + || ICX == cpu_family_model + || SNOWRIDGE == cpu_family_model + || SPR == cpu_family_model + || EMR == cpu_family_model + || SRF == cpu_family_model + || GNR == cpu_family_model + || GNR_D == cpu_family_model ) { PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x4E) ; // PC1e residenicies (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF,GNR) @@ -5743,11 +5752,11 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x2D) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC6 transitions } else { - std::cerr << "ERROR: no package C-state transition events defined for CPU model " << cpu_model << "\n"; + printError("package C-state transition"); } break; case 7: - if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model) + if (HASWELLX == cpu_family_model || BDX_DE == cpu_family_model || BDX == cpu_family_model) { PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x7E) ; // UFS_TRANSITIONS_PERF_P_LIMIT PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x7D) ; // UFS_TRANSITIONS_IO_P_LIMIT @@ -5755,16 +5764,16 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x7B) ; // UFS_TRANSITIONS_UP_STALL_CYCLES } else { - std::cerr << "ERROR: no UFS transition events defined for CPU model " << cpu_model << "\n"; + printError("UFS transition"); } break; case 8: - if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model) + if (HASWELLX == cpu_family_model || BDX_DE == cpu_family_model || BDX == cpu_family_model) { PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x7C) ; // UFS_TRANSITIONS_DOWN } else { - std::cerr << "ERROR: no UFS transition events defined for CPU model " << cpu_model << "\n"; + printError("UFS transition"); } break; default: @@ -6418,7 +6427,7 @@ void PCM::readAndAggregateUncoreMCCounters(const uint32 socket, CounterStateType { std::shared_ptr msr = MSR[socketRefCore[socket]]; TemporalThreadAffinity tempThreadAffinity(socketRefCore[socket]); // speedup trick for Linux - switch (cpu_model) + switch (cpu_family_model) { case PCM::WESTMERE_EP: case PCM::NEHALEM_EP: @@ -6640,7 +6649,7 @@ void PCM::readQPICounters(SystemCounterState & result) { // read QPI counters std::vector SocketProcessed(num_sockets, false); - if (cpu_model == PCM::NEHALEM_EX || cpu_model == PCM::WESTMERE_EX) + if (cpu_family_model == PCM::NEHALEM_EX || cpu_family_model == PCM::WESTMERE_EX) { for (int32 core = 0; core < num_cores; ++core) { @@ -6676,7 +6685,7 @@ void PCM::readQPICounters(SystemCounterState & result) } } } - else if ((cpu_model == PCM::NEHALEM_EP || cpu_model == PCM::WESTMERE_EP)) + else if ((cpu_family_model == PCM::NEHALEM_EP || cpu_family_model == PCM::WESTMERE_EP)) { if (num_sockets == 2) { @@ -9609,7 +9618,7 @@ void ServerUncorePMUs::cleanupMemTest(const ServerUncorePMUs::MemTestParam & par } } -uint64 ServerUncorePMUs::computeQPISpeed(const uint32 core_nr, const int cpumodel) +uint64 ServerUncorePMUs::computeQPISpeed(const uint32 core_nr, const int cpufamilymodel) { if(qpi_speed.empty()) { @@ -9618,9 +9627,9 @@ uint64 ServerUncorePMUs::computeQPISpeed(const uint32 core_nr, const int cpumode qpi_speed.resize(getNumQPIPorts()); auto getSpeed = [&] (size_t i) { - if (PCM::hasUPI(cpumodel) == false && i == 1) return 0ULL; // link 1 should have the same speed as link 0, skip it + if (PCM::hasUPI(cpufamilymodel) == false && i == 1) return 0ULL; // link 1 should have the same speed as link 0, skip it uint64 result = 0; - if (PCM::hasUPI(cpumodel) == false && i < XPIRegisterLocation.size()) + if (PCM::hasUPI(cpufamilymodel) == false && i < XPIRegisterLocation.size()) { PciHandleType reg(groupnr,UPIbus, XPIRegisterLocation[i].first, QPI_PORT0_MISC_REGISTER_FUNC_ADDR); uint32 value = 0; @@ -9630,7 +9639,7 @@ uint64 ServerUncorePMUs::computeQPISpeed(const uint32 core_nr, const int cpumode } std::unordered_map UPISpeedMap{}; std::pair regBits{}; - switch (cpumodel) + switch (cpufamilymodel) { case PCM::GNR: case PCM::SRF: @@ -9673,7 +9682,7 @@ uint64 ServerUncorePMUs::computeQPISpeed(const uint32 core_nr, const int cpumode } if(result == 0ULL) { - if (PCM::hasUPI(cpumodel) == false) + if (PCM::hasUPI(cpufamilymodel) == false) std::cerr << "Warning: QPI_RATE_STATUS register is not available on port " << i << ". Computing QPI speed using a measurement loop.\n"; // compute qpi speed @@ -9693,8 +9702,8 @@ uint64 ServerUncorePMUs::computeQPISpeed(const uint32 core_nr, const int cpumode uint64 endClocks = getQPIClocks((uint32)i); cleanupMemTest(param); - result = (uint64(double(endClocks - startClocks) * PCM::getBytesPerLinkCycle(cpumodel) * double(timerGranularity) / double(endTSC - startTSC))); - if(cpumodel == PCM::HASWELLX || cpumodel == PCM::BDX) /* BDX_DE does not have QPI. */{ + result = (uint64(double(endClocks - startClocks) * PCM::getBytesPerLinkCycle(cpufamilymodel) * double(timerGranularity) / double(endTSC - startTSC))); + if(cpufamilymodel == PCM::HASWELLX || cpufamilymodel == PCM::BDX) /* BDX_DE does not have QPI. */{ result /=2; // HSX runs QPI clocks with doubled speed } } @@ -9705,9 +9714,9 @@ uint64 ServerUncorePMUs::computeQPISpeed(const uint32 core_nr, const int cpumode getSpeedsAsync.push_back(std::async(std::launch::async, getSpeed, i)); } for (size_t i = 0; i < getNumQPIPorts(); ++i) { - qpi_speed[i] = (PCM::hasUPI(cpumodel) == false && i==1)? qpi_speed[0] : getSpeedsAsync[i].get(); // link 1 does not have own speed register, it runs with the speed of link 0 + qpi_speed[i] = (PCM::hasUPI(cpufamilymodel) == false && i==1)? qpi_speed[0] : getSpeedsAsync[i].get(); // link 1 does not have own speed register, it runs with the speed of link 0 } - if (PCM::hasUPI(cpumodel)) + if (PCM::hasUPI(cpufamilymodel)) { // check the speed of link 3 if(qpi_speed.size() == 3 && qpi_speed[2] == 0) @@ -9739,7 +9748,8 @@ void ServerUncorePMUs::reportQPISpeed() const uint64 PCM::CX_MSR_PMON_CTRY(uint32 Cbo, uint32 Ctr) const { - switch (cpu_model) { + switch (cpu_family_model) + { case JAKETOWN: case IVYTOWN: return JKT_C0_MSR_PMON_CTR0 + (JKTIVT_CBO_MSR_STEP * Cbo) + Ctr; @@ -9767,7 +9777,8 @@ uint64 PCM::CX_MSR_PMON_CTRY(uint32 Cbo, uint32 Ctr) const uint64 PCM::CX_MSR_PMON_BOX_FILTER(uint32 Cbo) const { - switch (cpu_model) { + switch (cpu_family_model) + { case JAKETOWN: case IVYTOWN: return JKT_C0_MSR_PMON_BOX_FILTER + (JKTIVT_CBO_MSR_STEP * Cbo); @@ -9797,7 +9808,7 @@ uint64 PCM::CX_MSR_PMON_BOX_FILTER(uint32 Cbo) const uint64 PCM::CX_MSR_PMON_BOX_FILTER1(uint32 Cbo) const { - switch (cpu_model) { + switch (cpu_family_model) { case IVYTOWN: return IVT_C0_MSR_PMON_BOX_FILTER1 + (JKTIVT_CBO_MSR_STEP * Cbo); @@ -9813,7 +9824,7 @@ uint64 PCM::CX_MSR_PMON_BOX_FILTER1(uint32 Cbo) const } uint64 PCM::CX_MSR_PMON_CTLY(uint32 Cbo, uint32 Ctl) const { - switch (cpu_model) { + switch (cpu_family_model) { case JAKETOWN: case IVYTOWN: return JKT_C0_MSR_PMON_CTL0 + (JKTIVT_CBO_MSR_STEP * Cbo) + Ctl; @@ -9841,7 +9852,7 @@ uint64 PCM::CX_MSR_PMON_CTLY(uint32 Cbo, uint32 Ctl) const uint64 PCM::CX_MSR_PMON_BOX_CTL(uint32 Cbo) const { - switch (cpu_model) { + switch (cpu_family_model) { case JAKETOWN: case IVYTOWN: return JKT_C0_MSR_PMON_BOX_CTL + (JKTIVT_CBO_MSR_STEP * Cbo); @@ -9934,7 +9945,7 @@ uint32 PCM::getMaxNumOfCBoxesInternal() const } const auto refCore = socketRefCore[0]; uint64 val = 0; - switch (cpu_model) + switch (cpu_family_model) { case GNR: case SRF: @@ -10019,14 +10030,14 @@ uint32 PCM::getMaxNumOfIIOStacks() const void PCM::programCboOpcodeFilter(const uint32 opc0, UncorePMU & pmu, const uint32 nc_, const uint32 opc1, const uint32 loc, const uint32 rem) { - if(JAKETOWN == cpu_model) + if (JAKETOWN == cpu_family_model) { *pmu.filter[0] = JKT_CBO_MSR_PMON_BOX_FILTER_OPC(opc0); - } else if(IVYTOWN == cpu_model || HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model) + } else if (IVYTOWN == cpu_family_model || HASWELLX == cpu_family_model || BDX_DE == cpu_family_model || BDX == cpu_family_model) { *pmu.filter[1] = IVTHSX_CBO_MSR_PMON_BOX_FILTER1_OPC(opc0); - } else if(SKX == cpu_model) + } else if (SKX == cpu_family_model) { *pmu.filter[1] = SKX_CHA_MSR_PMON_BOX_FILTER1_OPC0(opc0) + SKX_CHA_MSR_PMON_BOX_FILTER1_OPC1(opc1) + @@ -10038,7 +10049,7 @@ void PCM::programCboOpcodeFilter(const uint32 opc0, UncorePMU & pmu, const uint3 } else { - std::cerr << "ERROR: programCboOpcodeFilter function is not implemented for cpu model " << cpu_model << std::endl; + std::cerr << "ERROR: programCboOpcodeFilter function is not implemented for cpu family " << cpu_family << " model " << cpu_model_private << std::endl; throw std::exception(); } } @@ -10144,7 +10155,7 @@ void PCM::programPCIeEventGroup(eventGroup_t &eventGroup) uint64 events[4] = {0}; uint64 umask[4] = {0}; - switch (cpu_model) + switch (cpu_family_model) { case PCM::GNR: case PCM::SRF: @@ -10194,18 +10205,18 @@ void PCM::programCbo(const uint64 * events, const uint32 opCode, const uint32 nc { pmu.initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); - if ( ICX != cpu_model - && SNOWRIDGE != cpu_model - && SPR != cpu_model - && EMR != cpu_model - && GNR != cpu_model - && SRF != cpu_model + if ( ICX != cpu_family_model + && SNOWRIDGE != cpu_family_model + && SPR != cpu_family_model + && EMR != cpu_family_model + && GNR != cpu_family_model + && SRF != cpu_family_model ) { programCboOpcodeFilter(opCode, pmu, nc_, 0, loc, rem); } - if ((HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model) && llc_lookup_tid_filter != 0) + if ((HASWELLX == cpu_family_model || BDX_DE == cpu_family_model || BDX == cpu_family_model || SKX == cpu_family_model) && llc_lookup_tid_filter != 0) *pmu.filter[0] = llc_lookup_tid_filter; PCM::program(pmu, events, events + ServerUncoreCounterState::maxCounters, UNC_PMON_UNIT_CTL_FRZ_EN); @@ -10543,7 +10554,7 @@ void PCM::initLLCReadMissLatencyEvents(uint64 * events, uint32 & opCode) return; } uint64 umask = 3ULL; // MISS_OPCODE - switch (cpu_model) + switch (cpu_family_model) { case ICX: case SPR: @@ -10556,7 +10567,7 @@ void PCM::initLLCReadMissLatencyEvents(uint64 * events, uint32 & opCode) } uint64 umask_ext = 0; - switch (cpu_model) + switch (cpu_family_model) { case ICX: umask_ext = 0xC817FE; @@ -10573,7 +10584,7 @@ void PCM::initLLCReadMissLatencyEvents(uint64 * events, uint32 & opCode) events[EventPosition::TOR_OCCUPANCY] = CBO_MSR_PMON_CTL_EVENT(0x36) + all_umasks; // TOR_OCCUPANCY (must be on counter 0) events[EventPosition::TOR_INSERTS] = CBO_MSR_PMON_CTL_EVENT(0x35) + all_umasks; // TOR_INSERTS - opCode = (SKX == cpu_model) ? 0x202 : 0x182; + opCode = (SKX == cpu_family_model) ? 0x202 : 0x182; } void PCM::programCbo() diff --git a/src/cpucounters.h b/src/cpucounters.h index eadbeb1a..ffbdff4d 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -607,7 +607,8 @@ class PCM_API PCM PCM & operator = (const PCM &) = delete; int32 cpu_family; - int32 cpu_model; + int32 cpu_model_private; + int32 cpu_family_model; bool hybrid = false; int32 cpu_stepping; int64 cpu_microcode_level; @@ -1271,17 +1272,17 @@ class PCM_API PCM bool isCLX() const // Cascade Lake-SP { - return (PCM::SKX == cpu_model) && (cpu_stepping > 4 && cpu_stepping < 8); + return (PCM::SKX == cpu_family_model) && (cpu_stepping > 4 && cpu_stepping < 8); } - static bool isCPX(int cpu_model_, int cpu_stepping_) // Cooper Lake + static bool isCPX(int cpu_family_model_, int cpu_stepping_) // Cooper Lake { - return (PCM::SKX == cpu_model_) && (cpu_stepping_ >= 10); + return (PCM::SKX == cpu_family_model_) && (cpu_stepping_ >= 10); } bool isCPX() const { - return isCPX(cpu_model, cpu_stepping); + return isCPX(cpu_family_model, cpu_stepping); } void initUncorePMUsDirect(); @@ -1303,10 +1304,10 @@ class PCM_API PCM { return isHWTMAL1Supported() && ( - SPR == cpu_model - || EMR == cpu_model - || GNR == cpu_model - || GNR_D == cpu_model + SPR == cpu_family_model + || EMR == cpu_family_model + || GNR == cpu_family_model + || GNR_D == cpu_family_model ); } @@ -1631,7 +1632,7 @@ class PCM_API PCM assert (coreID < topology.size()); if (hybrid) { - switch (cpu_model) + switch (cpu_family_model) { case ADL: case RPL: @@ -1645,7 +1646,7 @@ class PCM_API PCM } } bool useGLCOCREvent = false; - switch (cpu_model) + switch (cpu_family_model) { case SPR: case EMR: @@ -1915,7 +1916,7 @@ class PCM_API PCM private: bool useSKLPath() const { - switch (cpu_model) + switch (cpu_family_model) { PCM_SKL_PATH_CASES return true; @@ -1937,7 +1938,7 @@ class PCM_API PCM //! \brief Reads CPU family and model id //! \return CPU family and model ID (lowest 8 bits is the model, next 8 bits is the family) - uint32 getCPUFamilyModel() const { return PCM_CPU_FAMILY_MODEL((uint32)cpu_family, (uint32)cpu_model); } + uint32 getCPUFamilyModel() const { return cpu_family_model; } //! \brief Reads CPU stepping id //! \return CPU stepping ID @@ -1977,7 +1978,7 @@ class PCM_API PCM //! \return number of QPI links per socket uint64 getQPILinksPerSocket() const { - switch (cpu_model) + switch (cpu_family_model) { case NEHALEM_EP: case WESTMERE_EP: @@ -2008,7 +2009,7 @@ class PCM_API PCM //! \brief Returns the number of detected integrated memory controllers per socket uint32 getMCPerSocket() const { - switch (cpu_model) + switch (cpu_family_model) { case NEHALEM_EP: case WESTMERE_EP: @@ -2037,7 +2038,7 @@ class PCM_API PCM //! \brief Returns the total number of detected memory channels on all integrated memory controllers per socket size_t getMCChannelsPerSocket() const { - switch (cpu_model) + switch (cpu_family_model) { case NEHALEM_EP: case WESTMERE_EP: @@ -2069,7 +2070,7 @@ class PCM_API PCM //! \param controller controller size_t getMCChannels(uint32 socket, uint32 controller) const { - switch (cpu_model) + switch (cpu_family_model) { case NEHALEM_EP: case WESTMERE_EP: @@ -2100,7 +2101,7 @@ class PCM_API PCM //! \brief Returns the total number of detected memory channels on all integrated memory controllers per socket size_t getEDCChannelsPerSocket() const { - switch (cpu_model) + switch (cpu_family_model) { case KNL: return (serverUncorePMUs.size() && serverUncorePMUs[0].get()) ? (serverUncorePMUs[0]->getNumEDCChannels()) : 0; @@ -2113,8 +2114,8 @@ class PCM_API PCM //! \return max number of instructions per cycle uint32 getMaxIPC() const { - if (ICL == cpu_model || TGL == cpu_model || RKL == cpu_model) return 5; - switch (cpu_model) + if (ICL == cpu_family_model || TGL == cpu_family_model || RKL == cpu_family_model) return 5; + switch (cpu_family_model) { case ADL: case RPL: @@ -2157,7 +2158,7 @@ class PCM_API PCM { return 2; } - std::cerr << "MaxIPC is not defined for your cpu model " << cpu_model << '\n'; + std::cerr << "MaxIPC is not defined for your cpu family " << cpu_family << " model " << cpu_model_private << '\n'; assert (0); return 0; } @@ -2165,7 +2166,7 @@ class PCM_API PCM //! \brief Returns the frequency of Power Control Unit uint64 getPCUFrequency() const { - switch (cpu_model) + switch (cpu_family_model) { case JAKETOWN: case IVYTOWN: @@ -2186,7 +2187,7 @@ class PCM_API PCM //! \brief Returns whether it is a server part bool isServerCPU() const { - switch (cpu_model) + switch (cpu_family_model) { case NEHALEM_EP: case NEHALEM_EX: @@ -2400,31 +2401,31 @@ class PCM_API PCM int64 getCPUMicrocodeLevel() const { return cpu_microcode_level; } //! \brief returns true if CPU model is Atom-based - static bool isAtom(const int32 cpu_model_) - { - return cpu_model_ == ATOM - || cpu_model_ == ATOM_2 - || cpu_model_ == CENTERTON - || cpu_model_ == BAYTRAIL - || cpu_model_ == AVOTON - || cpu_model_ == CHERRYTRAIL - || cpu_model_ == APOLLO_LAKE - || cpu_model_ == GEMINI_LAKE - || cpu_model_ == DENVERTON - // || cpu_model_ == SNOWRIDGE do not use Atom code for SNOWRIDGE + static bool isAtom(const int32 cpu_family_model_) + { + return cpu_family_model_ == ATOM + || cpu_family_model_ == ATOM_2 + || cpu_family_model_ == CENTERTON + || cpu_family_model_ == BAYTRAIL + || cpu_family_model_ == AVOTON + || cpu_family_model_ == CHERRYTRAIL + || cpu_family_model_ == APOLLO_LAKE + || cpu_family_model_ == GEMINI_LAKE + || cpu_family_model_ == DENVERTON + // || cpu_family_model_ == SNOWRIDGE do not use Atom code for SNOWRIDGE ; } //! \brief returns true if CPU is Atom-based bool isAtom() const { - return isAtom(cpu_model); + return isAtom(cpu_family_model); } // From commit message: https://github.com/torvalds/linux/commit/e979121b1b1556e184492e6fc149bbe188fc83e6 bool memoryEventErrata() const { - switch (cpu_model) + switch (cpu_family_model) { case SANDY_BRIDGE: case JAKETOWN: @@ -2440,52 +2441,52 @@ class PCM_API PCM bool packageEnergyMetricsAvailable() const { return ( - cpu_model == PCM::JAKETOWN - || cpu_model == PCM::IVYTOWN - || cpu_model == PCM::SANDY_BRIDGE - || cpu_model == PCM::IVY_BRIDGE - || cpu_model == PCM::HASWELL - || cpu_model == PCM::AVOTON - || cpu_model == PCM::CHERRYTRAIL - || cpu_model == PCM::BAYTRAIL - || cpu_model == PCM::APOLLO_LAKE - || cpu_model == PCM::GEMINI_LAKE - || cpu_model == PCM::DENVERTON - || cpu_model == PCM::SNOWRIDGE - || cpu_model == PCM::HASWELLX - || cpu_model == PCM::BROADWELL - || cpu_model == PCM::BDX_DE - || cpu_model == PCM::BDX - || cpu_model == PCM::KNL + cpu_family_model == PCM::JAKETOWN + || cpu_family_model == PCM::IVYTOWN + || cpu_family_model == PCM::SANDY_BRIDGE + || cpu_family_model == PCM::IVY_BRIDGE + || cpu_family_model == PCM::HASWELL + || cpu_family_model == PCM::AVOTON + || cpu_family_model == PCM::CHERRYTRAIL + || cpu_family_model == PCM::BAYTRAIL + || cpu_family_model == PCM::APOLLO_LAKE + || cpu_family_model == PCM::GEMINI_LAKE + || cpu_family_model == PCM::DENVERTON + || cpu_family_model == PCM::SNOWRIDGE + || cpu_family_model == PCM::HASWELLX + || cpu_family_model == PCM::BROADWELL + || cpu_family_model == PCM::BDX_DE + || cpu_family_model == PCM::BDX + || cpu_family_model == PCM::KNL || useSKLPath() - || cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::ADL - || cpu_model == PCM::RPL - || cpu_model == PCM::MTL - || cpu_model == PCM::LNL - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR - || cpu_model == PCM::GNR - || cpu_model == PCM::SRF + || cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::ADL + || cpu_family_model == PCM::RPL + || cpu_family_model == PCM::MTL + || cpu_family_model == PCM::LNL + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::GNR + || cpu_family_model == PCM::SRF ); } bool dramEnergyMetricsAvailable() const { return ( - cpu_model == PCM::JAKETOWN - || cpu_model == PCM::IVYTOWN - || cpu_model == PCM::HASWELLX - || cpu_model == PCM::BDX_DE - || cpu_model == PCM::BDX - || cpu_model == PCM::KNL - || cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR - || cpu_model == PCM::GNR - || cpu_model == PCM::SRF + cpu_family_model == PCM::JAKETOWN + || cpu_family_model == PCM::IVYTOWN + || cpu_family_model == PCM::HASWELLX + || cpu_family_model == PCM::BDX_DE + || cpu_family_model == PCM::BDX + || cpu_family_model == PCM::KNL + || cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::GNR + || cpu_family_model == PCM::SRF ); } @@ -2498,18 +2499,18 @@ class PCM_API PCM { return getQPILinksPerSocket() > 0 && ( - cpu_model == PCM::NEHALEM_EX - || cpu_model == PCM::WESTMERE_EX - || cpu_model == PCM::JAKETOWN - || cpu_model == PCM::IVYTOWN - || cpu_model == PCM::HASWELLX - || cpu_model == PCM::BDX - || cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR - || cpu_model == PCM::GNR - || cpu_model == PCM::SRF + cpu_family_model == PCM::NEHALEM_EX + || cpu_family_model == PCM::WESTMERE_EX + || cpu_family_model == PCM::JAKETOWN + || cpu_family_model == PCM::IVYTOWN + || cpu_family_model == PCM::HASWELLX + || cpu_family_model == PCM::BDX + || cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::GNR + || cpu_family_model == PCM::SRF ); } @@ -2517,29 +2518,29 @@ class PCM_API PCM { return getQPILinksPerSocket() > 0 && ( - cpu_model == PCM::NEHALEM_EX - || cpu_model == PCM::WESTMERE_EX - || cpu_model == PCM::JAKETOWN - || cpu_model == PCM::IVYTOWN - || (cpu_model == PCM::SKX && cpu_stepping > 1) - || cpu_model == PCM::ICX - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR - || cpu_model == PCM::GNR - || cpu_model == PCM::SRF + cpu_family_model == PCM::NEHALEM_EX + || cpu_family_model == PCM::WESTMERE_EX + || cpu_family_model == PCM::JAKETOWN + || cpu_family_model == PCM::IVYTOWN + || (cpu_family_model == PCM::SKX && cpu_stepping > 1) + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::GNR + || cpu_family_model == PCM::SRF ); } bool localMemoryRequestRatioMetricAvailable() const { - return cpu_model == PCM::HASWELLX - || cpu_model == PCM::BDX - || cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR - || cpu_model == PCM::SRF - || cpu_model == PCM::GNR + return cpu_family_model == PCM::HASWELLX + || cpu_family_model == PCM::BDX + || cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::SRF + || cpu_family_model == PCM::GNR ; } @@ -2551,14 +2552,14 @@ class PCM_API PCM bool nearMemoryMetricsAvailable() const { return ( - cpu_model == PCM::SRF - || cpu_model == PCM::GNR + cpu_family_model == PCM::SRF + || cpu_family_model == PCM::GNR ); } bool memoryTrafficMetricsAvailable() const { - return (!(isAtom() || cpu_model == PCM::CLARKDALE)) + return (!(isAtom() || cpu_family_model == PCM::CLARKDALE)) ; } @@ -2569,17 +2570,17 @@ class PCM_API PCM size_t getHBMCASTransferSize() const { - return (SPR == cpu_model) ? 32ULL : 64ULL; + return (SPR == cpu_family_model) ? 32ULL : 64ULL; } bool memoryIOTrafficMetricAvailable() const { - if (cpu_model == TGL) return false; + if (cpu_family_model == TGL) return false; return ( - cpu_model == PCM::SANDY_BRIDGE - || cpu_model == PCM::IVY_BRIDGE - || cpu_model == PCM::HASWELL - || cpu_model == PCM::BROADWELL + cpu_family_model == PCM::SANDY_BRIDGE + || cpu_family_model == PCM::IVY_BRIDGE + || cpu_family_model == PCM::HASWELL + || cpu_family_model == PCM::BROADWELL || useSKLPath() ); } @@ -2587,13 +2588,13 @@ class PCM_API PCM bool IIOEventsAvailable() const { return ( - cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::SNOWRIDGE - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR - || cpu_model == PCM::SRF - || cpu_model == PCM::GNR + cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SNOWRIDGE + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::SRF + || cpu_family_model == PCM::GNR ); } @@ -2602,20 +2603,20 @@ class PCM_API PCM return MSR.empty() == false && getMaxNumOfUncorePMUs(UBOX_PMU_ID) > 0ULL && getNumCores() == getNumOnlineCores() - && PCM::GNR != cpu_model - && PCM::SRF != cpu_model + && PCM::GNR != cpu_family_model + && PCM::SRF != cpu_family_model ; } bool LatencyMetricsAvailable() const { return ( - cpu_model == PCM::HASWELLX - || cpu_model == PCM::BDX - || cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR + cpu_family_model == PCM::HASWELLX + || cpu_family_model == PCM::BDX + || cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR || useSKLPath() ); } @@ -2623,10 +2624,10 @@ class PCM_API PCM bool DDRLatencyMetricsAvailable() const { return ( - cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR + cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR ); } @@ -2635,10 +2636,10 @@ class PCM_API PCM return ( isCLX() || isCPX() - || cpu_model == PCM::ICX - || cpu_model == PCM::SNOWRIDGE - || cpu_model == SPR - || cpu_model == EMR + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SNOWRIDGE + || cpu_family_model == SPR + || cpu_family_model == EMR ); } @@ -2647,8 +2648,8 @@ class PCM_API PCM return ( isCLX() || isCPX() - || cpu_model == PCM::ICX - || cpu_model == PCM::SNOWRIDGE + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SNOWRIDGE ); } @@ -2660,52 +2661,52 @@ class PCM_API PCM bool LLCReadMissLatencyMetricsAvailable() const { return ( - HASWELLX == cpu_model - || BDX_DE == cpu_model - || BDX == cpu_model + HASWELLX == cpu_family_model + || BDX_DE == cpu_family_model + || BDX == cpu_family_model || isCLX() || isCPX() #ifdef PCM_ENABLE_LLCRDLAT_SKX_MP - || SKX == cpu_model + || SKX == cpu_family_model #else - || ((SKX == cpu_model) && (num_sockets == 1)) + || ((SKX == cpu_family_model) && (num_sockets == 1)) #endif - || ICX == cpu_model - || SPR == cpu_model - || SNOWRIDGE == cpu_model + || ICX == cpu_family_model + || SPR == cpu_family_model + || SNOWRIDGE == cpu_family_model ); } bool hasBecktonUncore() const { return ( - cpu_model == PCM::NEHALEM_EX - || cpu_model == PCM::WESTMERE_EX + cpu_family_model == PCM::NEHALEM_EX + || cpu_family_model == PCM::WESTMERE_EX ); } bool hasPCICFGUncore() const // has PCICFG uncore PMON { return ( - cpu_model == PCM::JAKETOWN - || cpu_model == PCM::SNOWRIDGE - || cpu_model == PCM::IVYTOWN - || cpu_model == PCM::HASWELLX - || cpu_model == PCM::BDX_DE - || cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR - || cpu_model == PCM::GNR - || cpu_model == PCM::SRF - || cpu_model == PCM::BDX - || cpu_model == PCM::KNL + cpu_family_model == PCM::JAKETOWN + || cpu_family_model == PCM::SNOWRIDGE + || cpu_family_model == PCM::IVYTOWN + || cpu_family_model == PCM::HASWELLX + || cpu_family_model == PCM::BDX_DE + || cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::GNR + || cpu_family_model == PCM::SRF + || cpu_family_model == PCM::BDX + || cpu_family_model == PCM::KNL ); } bool isSkxCompatible() const { return ( - cpu_model == PCM::SKX + cpu_family_model == PCM::SKX ); } @@ -2723,7 +2724,7 @@ class PCM_API PCM bool hasUPI() const { - return hasUPI(cpu_model); + return hasUPI(cpu_family_model); } const char * xPI() const @@ -2737,12 +2738,12 @@ class PCM_API PCM bool hasCHA() const { return ( - cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR - || cpu_model == PCM::GNR - || cpu_model == PCM::SRF + cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::GNR + || cpu_family_model == PCM::SRF ); } @@ -2753,24 +2754,24 @@ class PCM_API PCM bool useSkylakeEvents() const { return useSKLPath() - || PCM::SKX == cpu_model - || PCM::ICX == cpu_model - || PCM::SPR == cpu_model - || PCM::EMR == cpu_model - || PCM::GNR == cpu_model + || PCM::SKX == cpu_family_model + || PCM::ICX == cpu_family_model + || PCM::SPR == cpu_family_model + || PCM::EMR == cpu_family_model + || PCM::GNR == cpu_family_model ; } bool hasClientMCCounters() const { - return cpu_model == SANDY_BRIDGE - || cpu_model == IVY_BRIDGE - || cpu_model == HASWELL - || cpu_model == BROADWELL - || cpu_model == ADL - || cpu_model == RPL - || cpu_model == MTL - || cpu_model == LNL + return cpu_family_model == SANDY_BRIDGE + || cpu_family_model == IVY_BRIDGE + || cpu_family_model == HASWELL + || cpu_family_model == BROADWELL + || cpu_family_model == ADL + || cpu_family_model == RPL + || cpu_family_model == MTL + || cpu_family_model == LNL || useSKLPath() ; } @@ -2780,9 +2781,9 @@ class PCM_API PCM return packageEnergyMetricsAvailable() && hasClientMCCounters() && num_sockets == 1; } - static double getBytesPerFlit(int32 cpu_model_) + static double getBytesPerFlit(int32 cpu_family_model_) { - if (hasUPI(cpu_model_)) + if (hasUPI(cpu_family_model_)) { // 172 bits per UPI flit return 172./8.; @@ -2793,12 +2794,12 @@ class PCM_API PCM double getBytesPerFlit() const { - return getBytesPerFlit(cpu_model); + return getBytesPerFlit(cpu_family_model); } - static double getDataBytesPerFlit(int32 cpu_model_) + static double getDataBytesPerFlit(const int32 cpu_family_model_) { - if (hasUPI(cpu_model_)) + if (hasUPI(cpu_family_model_)) { // 9 UPI flits to transfer 64 bytes return 64./9.; @@ -2809,12 +2810,12 @@ class PCM_API PCM double getDataBytesPerFlit() const { - return getDataBytesPerFlit(cpu_model); + return getDataBytesPerFlit(cpu_family_model); } - static double getFlitsPerLinkCycle(int32 cpu_model_) + static double getFlitsPerLinkCycle(const int32 cpu_family_model_) { - if (hasUPI(cpu_model_)) + if (hasUPI(cpu_family_model_)) { // 5 UPI flits sent every 6 link cycles return 5./6.; @@ -2829,7 +2830,7 @@ class PCM_API PCM double getBytesPerLinkCycle() const { - return getBytesPerLinkCycle(cpu_model); + return getBytesPerLinkCycle(cpu_family_model); } double getLinkTransfersPerLinkCycle() const From f999ac1797ab574aa8ad20e1382e184865e860b9 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 24 Sep 2024 13:43:02 +0200 Subject: [PATCH 19/23] transition to family-model (part 3) Change-Id: I4c75146f683883c07208c238537d60848fc8e987 --- src/cpucounters.h | 12 +++++------ src/pcm-iio.cpp | 20 ++++++++--------- src/pcm-lspci.cpp | 2 +- src/pcm-memory.cpp | 28 ++++++++++++------------ src/pcm-pcie.cpp | 2 +- src/pcm-power.cpp | 54 +++++++++++++++++++++++----------------------- src/pcm-raw.cpp | 2 +- src/pcm-tsx.cpp | 2 +- 8 files changed, 60 insertions(+), 62 deletions(-) diff --git a/src/cpucounters.h b/src/cpucounters.h index ffbdff4d..8298bd82 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -1930,11 +1930,9 @@ class PCM_API PCM //! \return CPU family uint32 getCPUFamily() const { return (uint32)cpu_family; } - //! \brief Reads CPU model id - //! \return CPU model ID - /* - uint32 getCPUModel() const { return (uint32)cpu_model; } - */ + //! \brief Reads CPU model id (use only with the family API together, don't always assume family 6) + //! \return Internal CPU model ID + uint32 getInternalCPUModel() const { return (uint32)cpu_model_private; } //! \brief Reads CPU family and model id //! \return CPU family and model ID (lowest 8 bits is the model, next 8 bits is the family) @@ -3142,8 +3140,8 @@ template uint64 getDRAMClocks(uint32 channel, const CounterStateType & before, const CounterStateType & after) { const auto clk = after.DRAMClocks[channel] - before.DRAMClocks[channel]; - const auto cpu_model = PCM::getInstance()->getCPUModel(); - if (cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE) + const auto cpu_family_model = PCM::getInstance()->getCPUFamilyModel(); + if (cpu_family_model == PCM::ICX || cpu_family_model == PCM::SNOWRIDGE) { return 2 * clk; } diff --git a/src/pcm-iio.cpp b/src/pcm-iio.cpp index d36f5f17..201862ce 100644 --- a/src/pcm-iio.cpp +++ b/src/pcm-iio.cpp @@ -855,7 +855,7 @@ class WhitleyPlatformMapping: public IPlatformMapping10Nm { const std::string * iio_stack_names; public: WhitleyPlatformMapping(int cpu_model, uint32_t sockets_count) : IPlatformMapping10Nm(cpu_model, sockets_count), - icx_d(PCM::getInstance()->getCPUModelFromCPUID() == PCM::ICX_D), + icx_d(PCM::getInstance()->getCPUFamilyModelFromCPUID() == PCM::ICX_D), sad_to_pmu_id_mapping(icx_d ? icx_d_sad_to_pmu_id_mapping : icx_sad_to_pmu_id_mapping), iio_stack_names(icx_d ? icx_d_iio_stack_names : icx_iio_stack_names) { @@ -1687,21 +1687,21 @@ bool BirchStreamPlatform::pciTreeDiscover(std::vector IPlatformMapping::getPlatformMapping(int cpu_model, uint32_t sockets_count) +std::unique_ptr IPlatformMapping::getPlatformMapping(int cpu_family_model, uint32_t sockets_count) { - switch (cpu_model) { + switch (cpu_family_model) { case PCM::SKX: - return std::unique_ptr{new PurleyPlatformMapping(cpu_model, sockets_count)}; + return std::unique_ptr{new PurleyPlatformMapping(cpu_family_model, sockets_count)}; case PCM::ICX: - return std::unique_ptr{new WhitleyPlatformMapping(cpu_model, sockets_count)}; + return std::unique_ptr{new WhitleyPlatformMapping(cpu_family_model, sockets_count)}; case PCM::SNOWRIDGE: - return std::unique_ptr{new JacobsvillePlatformMapping(cpu_model, sockets_count)}; + return std::unique_ptr{new JacobsvillePlatformMapping(cpu_family_model, sockets_count)}; case PCM::SPR: case PCM::EMR: - return std::unique_ptr{new EagleStreamPlatformMapping(cpu_model, sockets_count)}; + return std::unique_ptr{new EagleStreamPlatformMapping(cpu_family_model, sockets_count)}; case PCM::SRF: case PCM::GNR: - return std::unique_ptr{new BirchStreamPlatform(cpu_model, sockets_count)}; + return std::unique_ptr{new BirchStreamPlatform(cpu_family_model, sockets_count)}; default: return nullptr; } @@ -1709,7 +1709,7 @@ std::unique_ptr IPlatformMapping::getPlatformMapping(int cpu_m ccr* get_ccr(PCM* m, uint64_t& ccr) { - switch (m->getCPUModel()) + switch (m->getCPUFamilyModel()) { case PCM::SKX: return new skx_ccr(ccr); @@ -1976,7 +1976,7 @@ int mainThrows(int argc, char * argv[]) PCIDB pciDB; load_PCIDB(pciDB); - auto mapping = IPlatformMapping::getPlatformMapping(m->getCPUModel(), m->getNumSockets()); + auto mapping = IPlatformMapping::getPlatformMapping(m->getCPUFamilyModel(), m->getNumSockets()); if (!mapping) { cerr << "Failed to discover pci tree: unknown platform" << endl; exit(EXIT_FAILURE); diff --git a/src/pcm-lspci.cpp b/src/pcm-lspci.cpp index 6dc716e9..ae026a80 100644 --- a/src/pcm-lspci.cpp +++ b/src/pcm-lspci.cpp @@ -103,7 +103,7 @@ int mainThrows(int /*argc*/, char * /*argv*/[]) if (!m->isSkxCompatible()) { - cerr << "PCI tree display is currently not supported for processor model " << m->getCPUModel() << "\n"; + cerr << "PCI tree display is currently not supported for processor family/model 0x" << std::hex << m->getCPUFamilyModel() << std::dec << "\n"; } else { diff --git a/src/pcm-memory.cpp b/src/pcm-memory.cpp index c2e9418f..5cd5d2a7 100644 --- a/src/pcm-memory.cpp +++ b/src/pcm-memory.cpp @@ -424,8 +424,8 @@ void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t cout << "\n"; } if ( md->metrics == PartialWrites - && m->getCPUModel() != PCM::SRF - && m->getCPUModel() != PCM::GNR + && m->getCPUFamilyModel() != PCM::SRF + && m->getCPUFamilyModel() != PCM::GNR ) { for (uint32 i=skt; i<(skt+no_columns); ++i) { @@ -733,8 +733,8 @@ void display_bandwidth_csv(PCM *m, memdata_t *md, uint64 /*elapsedTime*/, const if (m->HBMmemoryTrafficMetricsAvailable() == false) { if ( md->metrics == PartialWrites - && m->getCPUModel() != PCM::GNR - && m->getCPUModel() != PCM::SRF + && m->getCPUFamilyModel() != PCM::GNR + && m->getCPUFamilyModel() != PCM::SRF ) { choose(outputType, @@ -905,9 +905,9 @@ void calculate_bandwidth(PCM *m, //const uint32 num_edc_channels = m->getEDCChannelsPerSocket(); memdata_t md; md.metrics = metrics; - const auto cpu_model = m->getCPUModel(); - md.M2M_NM_read_hit_rate_supported = (cpu_model == PCM::SKX); - md.NM_hit_rate_supported = (cpu_model == PCM::ICX); + const auto cpu_family_model = m->getCPUFamilyModel(); + md.M2M_NM_read_hit_rate_supported = (cpu_family_model == PCM::SKX); + md.NM_hit_rate_supported = (cpu_family_model == PCM::ICX); md.BHS_NM = m->nearMemoryMetricsAvailable(); md.BHS = md.BHS_NM; static bool mm_once = true; @@ -991,7 +991,7 @@ void calculate_bandwidth(PCM *m, uint64 memoryModeHits = 0; reads = getMCCounter(channel, ServerUncorePMUs::EventPosition::READ, uncState1[skt], uncState2[skt]); writes = getMCCounter(channel, ServerUncorePMUs::EventPosition::WRITE, uncState1[skt], uncState2[skt]); - switch (cpu_model) + switch (cpu_family_model) { case PCM::GNR: case PCM::SRF: @@ -1057,8 +1057,8 @@ void calculate_bandwidth(PCM *m, md.MemoryMode_Hit_socket[skt] += toRate(memoryModeHits); } else if ( - cpu_model != PCM::GNR - && cpu_model != PCM::SRF + cpu_family_model != PCM::GNR + && cpu_family_model != PCM::SRF ) { md.partial_write[skt] += (uint64)(getMCCounter(channel, ServerUncorePMUs::EventPosition::PARTIAL, uncState1[skt], uncState2[skt]) / (elapsedTime / 1000.0)); @@ -1248,7 +1248,7 @@ class CHAEventCollector pcm(m) { assert(pcm); - switch (pcm->getCPUModel()) + switch (pcm->getCPUFamilyModel()) { case PCM::SPR: eventGroups = { @@ -1516,10 +1516,10 @@ int mainThrows(int argc, char * argv[]) m->disableJKTWorkaround(); print_cpu_details(); - const auto cpu_model = m->getCPUModel(); + const auto cpu_family_model = m->getCPUFamilyModel(); if (!m->hasPCICFGUncore()) { - cerr << "Unsupported processor model (" << cpu_model << ").\n"; + cerr << "Unsupported processor model (0x" << std::hex << cpu_family_model << std::dec << ").\n"; if (m->memoryTrafficMetricsAvailable()) cerr << "For processor-level memory bandwidth statistics please use 'pcm' utility\n"; exit(EXIT_FAILURE); @@ -1577,7 +1577,7 @@ int mainThrows(int argc, char * argv[]) shared_ptr chaEventCollector; - SPR_CXL = (PCM::SPR == cpu_model || PCM::EMR == cpu_model) && (getNumCXLPorts(m) > 0); + SPR_CXL = (PCM::SPR == cpu_family_model || PCM::EMR == cpu_family_model) && (getNumCXLPorts(m) > 0); if (SPR_CXL) { chaEventCollector = std::make_shared(delay, sysCmd, mainLoop, m); diff --git a/src/pcm-pcie.cpp b/src/pcm-pcie.cpp index da55b93e..69d1c380 100644 --- a/src/pcm-pcie.cpp +++ b/src/pcm-pcie.cpp @@ -95,7 +95,7 @@ void print_usage(const string & progname) IPlatform *IPlatform::getPlatform(PCM *m, bool csv, bool print_bandwidth, bool print_additional_info, uint32 delay) { - switch (m->getCPUModel()) { + switch (m->getCPUFamilyModel()) { case PCM::GNR: case PCM::SRF: return new BirchStreamPlatform(m, csv, print_bandwidth, print_additional_info, delay); diff --git a/src/pcm-power.cpp b/src/pcm-power.cpp index 7982375a..d5ec90f5 100644 --- a/src/pcm-power.cpp +++ b/src/pcm-power.cpp @@ -251,10 +251,10 @@ int mainThrows(int argc, char * argv[]) m->disableJKTWorkaround(); - const int cpu_model = m->getCPUModel(); + const int cpu_family_model = m->getCPUFamilyModel(); if (!(m->hasPCICFGUncore())) { - cerr << "Unsupported processor model (" << cpu_model << ").\n"; + cerr << "Unsupported processor model (0x" << std::hex << cpu_family_model << std::dec << ").\n"; exit(EXIT_FAILURE); } @@ -262,7 +262,7 @@ int mainThrows(int argc, char * argv[]) PCM::ExtendedCustomCoreEventDescription conf; int32 nCorePowerLicenses = 0; std::vector licenseStr; - switch (cpu_model) + switch (cpu_family_model) { case PCM::SKX: case PCM::ICX: @@ -313,7 +313,7 @@ int mainThrows(int argc, char * argv[]) cerr << "\nMC counter group: " << imc_profile << "\n"; cerr << "PCU counter group: " << pcu_profile << "\n"; if (pcu_profile == 0) { - if (cpu_model == PCM::HASWELLX || cpu_model == PCM::BDX_DE || cpu_model == PCM::SKX) + if (cpu_family_model == PCM::HASWELLX || cpu_family_model == PCM::BDX_DE || cpu_family_model == PCM::SKX) cerr << "Your processor does not support frequency band statistics\n"; else cerr << "Freq bands [0/1/2]: " << freq_band[0] * 100 << " MHz; " << freq_band[1] * 100 << " MHz; " << freq_band[2] * 100 << " MHz; \n"; @@ -444,7 +444,7 @@ int mainThrows(int argc, char * argv[]) switch (pcu_profile) { case 0: - if (cpu_model == PCM::HASWELLX || cpu_model == PCM::BDX_DE || cpu_model == PCM::SKX) + if (cpu_family_model == PCM::HASWELLX || cpu_family_model == PCM::BDX_DE || cpu_family_model == PCM::SKX) break; printHeader(true); cout << "; Freq band 0/1/2 cycles: " << 100. * getNormalizedPCUCounter(u, 1, BeforeState[socket], AfterState[socket]) << "%" @@ -455,7 +455,7 @@ int mainThrows(int argc, char * argv[]) case 1: printHeader(true); - cout << ((cpu_model == PCM::SKX) ? "; core C0_1/C3/C6_7-state residency: " : "; core C0/C3/C6-state residency: ") + cout << ((cpu_family_model == PCM::SKX) ? "; core C0_1/C3/C6_7-state residency: " : "; core C0/C3/C6-state residency: ") << getNormalizedPCUCounter(u, 1, BeforeState[socket], AfterState[socket]) << "; " << getNormalizedPCUCounter(u, 2, BeforeState[socket], AfterState[socket]) << "; " << getNormalizedPCUCounter(u, 3, BeforeState[socket], AfterState[socket]) @@ -475,28 +475,28 @@ int mainThrows(int argc, char * argv[]) cout << "; Thermal freq limit cycles: " << getNormalizedPCUCounter(u, 1, BeforeState[socket], AfterState[socket]) * 100. << " %" << "; Power freq limit cycles:" << getNormalizedPCUCounter(u, 2, BeforeState[socket], AfterState[socket]) * 100. << " %"; if( - cpu_model != PCM::SKX - && cpu_model != PCM::ICX - && cpu_model != PCM::SNOWRIDGE - && cpu_model != PCM::SPR - && cpu_model != PCM::EMR - && cpu_model != PCM::SRF - && cpu_model != PCM::GNR - && cpu_model != PCM::GNR_D + cpu_family_model != PCM::SKX + && cpu_family_model != PCM::ICX + && cpu_family_model != PCM::SNOWRIDGE + && cpu_family_model != PCM::SPR + && cpu_family_model != PCM::EMR + && cpu_family_model != PCM::SRF + && cpu_family_model != PCM::GNR + && cpu_family_model != PCM::GNR_D ) cout << "; Clipped freq limit cycles:" << getNormalizedPCUCounter(u, 3, BeforeState[socket], AfterState[socket]) * 100. << " %"; cout << "\n"; break; case 4: - if ( cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::SNOWRIDGE - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR - || cpu_model == PCM::SRF - || cpu_model == PCM::GNR - || cpu_model == PCM::GNR_D + if ( cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SNOWRIDGE + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::SRF + || cpu_family_model == PCM::GNR + || cpu_family_model == PCM::GNR_D ) { cout << "This PCU profile is not supported on your processor\n"; @@ -512,7 +512,7 @@ int mainThrows(int argc, char * argv[]) printHeader(true); cout << "; Frequency transition count: " << getUncoreCounter(PCM::PCU_PMU_ID, u, 1, BeforeState[socket], AfterState[socket]) << " " << "; Cycles spent changing frequency: " << getNormalizedPCUCounter(u, 2, BeforeState[socket], AfterState[socket], m) * 100. << " %"; - if (PCM::HASWELLX == cpu_model) { + if (PCM::HASWELLX == cpu_family_model) { cout << "; UFS transition count: " << getUncoreCounter(PCM::PCU_PMU_ID, u, 3, BeforeState[socket], AfterState[socket]) << " "; cout << "; UFS transition cycles: " << getNormalizedPCUCounter(u, 0, BeforeState[socket], AfterState[socket], m) * 100. << " %"; } @@ -520,11 +520,11 @@ int mainThrows(int argc, char * argv[]) break; case 6: printHeader(false); - if (cpu_model == PCM::HASWELLX || PCM::BDX_DE == cpu_model) + if (cpu_family_model == PCM::HASWELLX || PCM::BDX_DE == cpu_family_model) cout << "; PC1e+ residency: " << getNormalizedPCUCounter(u, 0, BeforeState[socket], AfterState[socket], m) * 100. << " %" "; PC1e+ transition count: " << getUncoreCounter(PCM::PCU_PMU_ID, u, 1, BeforeState[socket], AfterState[socket]) << " "; - switch (cpu_model) + switch (cpu_family_model) { case PCM::IVYTOWN: case PCM::HASWELLX: @@ -548,7 +548,7 @@ int mainThrows(int argc, char * argv[]) cout << "\n"; break; case 7: - if (PCM::HASWELLX == cpu_model || PCM::BDX_DE == cpu_model || PCM::BDX == cpu_model) { + if (PCM::HASWELLX == cpu_family_model || PCM::BDX_DE == cpu_family_model || PCM::BDX == cpu_family_model) { printHeader(false); cout << "; UFS_TRANSITIONS_PERF_P_LIMIT: " << getNormalizedPCUCounter(u, 0, BeforeState[socket], AfterState[socket], m) * 100. << " %" << "; UFS_TRANSITIONS_IO_P_LIMIT: " << getNormalizedPCUCounter(u, 1, BeforeState[socket], AfterState[socket], m) * 100. << " %" @@ -558,7 +558,7 @@ int mainThrows(int argc, char * argv[]) } break; case 8: - if (PCM::HASWELLX == cpu_model || PCM::BDX_DE == cpu_model || PCM::BDX == cpu_model) { + if (PCM::HASWELLX == cpu_family_model || PCM::BDX_DE == cpu_family_model || PCM::BDX == cpu_family_model) { printHeader(false); cout << "; UFS_TRANSITIONS_DOWN: " << getNormalizedPCUCounter(u, 0, BeforeState[socket], AfterState[socket], m) * 100. << " %" << "\n"; diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index 400e5139..a6d4f1f7 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -558,7 +558,7 @@ AddEventStatus addEventFromDB(PCM::RawPMUConfigs& curPMUConfigs, string fullEven { try { - path = std::string("PMURegisterDeclarations/") + pcm->getCPUFamilyModelString(pcm->getCPUFamily(), pcm->getCPUModel(), (uint32)stepping) + ".json"; + path = std::string("PMURegisterDeclarations/") + pcm->getCPUFamilyModelString(pcm->getCPUFamily(), pcm->getInternalCPUModel(), (uint32)stepping) + ".json"; std::ifstream in(path); if (!in.is_open()) diff --git a/src/pcm-tsx.cpp b/src/pcm-tsx.cpp index 003f10a7..692c4cdc 100644 --- a/src/pcm-tsx.cpp +++ b/src/pcm-tsx.cpp @@ -307,7 +307,7 @@ int mainThrows(int argc, char * argv[]) PCM * m = PCM::getInstance(); const size_t numCtrSupported = m->getMaxCustomCoreEvents(); - switch (m->getCPUModel()) + switch (m->getCPUFamilyModel()) { case PCM::SKL: case PCM::SKX: From 3ab9e394ac36473e7b8f07aa53fa71daff48b5f5 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 24 Sep 2024 19:18:30 +0200 Subject: [PATCH 20/23] transition to family-model (part 4) Change-Id: I56711c8068c214d687240bf816a97a35e96ed848 --- src/cpucounters.cpp | 8 ++++---- src/cpucounters.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index dec9bb56..d8195683 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -2721,7 +2721,7 @@ void PCM::initUncorePMUsDirect() }; for (uint32 s = 0; s < (uint32)num_sockets; ++s) { - switch (cpu_model) + switch (cpu_family_model) { case BDX: irpPMUs[s][0] = findPCICFGPMU(0x6f39, s, 0xF4, {0xD8, 0xDC, 0xE0, 0xE4}, {0xA0, 0xB0, 0xB8, 0xC0}); @@ -3323,7 +3323,7 @@ bool PCM::checkModel() /* FOR TESTING PURPOSES ONLY */ #ifdef PCM_TEST_FALLBACK_TO_ATOM std::cerr << "Fall back to ATOM functionality.\n"; - cpu_model = ATOM; + cpu_family_model = ATOM; return true; #endif return false; @@ -3441,7 +3441,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter canUsePerf = false; if (!silent) std::cerr << "Installed Linux kernel perf does not support hardware top-down level-1 counters. Using direct PMU programming instead.\n"; } - if (canUsePerf && (cpu_model == ADL || cpu_model == RPL || cpu_model == MTL || cpu_model == LNL)) + if (canUsePerf && (cpu_family_model == ADL || cpu_family_model == RPL || cpu_family_model == MTL || cpu_family_model == LNL)) { canUsePerf = false; if (!silent) std::cerr << "Linux kernel perf rejects an architectural event on your platform. Using direct PMU programming instead.\n"; @@ -4884,7 +4884,7 @@ const char * PCM::getUArchCodename(const int32 cpu_family_model_param) const case SKX: if (cpu_family_model_param >= 0) { - // query for specified cpu_model_param, stepping not provided + // query for specified cpu_family_model_param, stepping not provided return "Skylake-SP, Cascade Lake-SP"; } if (isCLX()) diff --git a/src/cpucounters.h b/src/cpucounters.h index 8298bd82..46d9fb44 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -2375,8 +2375,8 @@ class PCM_API PCM uint64 extractQOSMonitoring(uint64 val); //! \brief Get a string describing the codename of the processor microarchitecture - //! \param cpu_model_ cpu model (if no parameter provided the codename of the detected CPU is returned) - const char * getUArchCodename(const int32 cpu_model_ = -1) const; + //! \param cpu_family_model_ cpu model (if no parameter provided the codename of the detected CPU is returned) + const char * getUArchCodename(const int32 cpu_family_model_ = -1) const; //! \brief Get Brand string of processor static std::string getCPUBrandString(); @@ -2821,9 +2821,9 @@ class PCM_API PCM return 2.; } - static double getBytesPerLinkCycle(int32 cpu_model_) + static double getBytesPerLinkCycle(const int32 cpu_family_model_) { - return getBytesPerFlit(cpu_model_) * getFlitsPerLinkCycle(cpu_model_); + return getBytesPerFlit(cpu_family_model_) * getFlitsPerLinkCycle(cpu_family_model_); } double getBytesPerLinkCycle() const From 8e1c2e8b1a3327c0f6ca6fa04932372b8b4a5bbf Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 24 Sep 2024 19:46:10 +0200 Subject: [PATCH 21/23] transition to family-model (part 5) Change-Id: I2b8f46f60249c6258e8f7b8dfb2094324ff1110c --- doc/PCM_ACCEL_README.md | 8 ++++---- src/{opCode-106.txt => opCode-6-106.txt} | 0 src/{opCode-134.txt => opCode-6-134.txt} | 0 src/{opCode-143-accel.txt => opCode-6-143-accel.txt} | 0 src/{opCode-143.txt => opCode-6-143.txt} | 0 src/{opCode-173.txt => opCode-6-173.txt} | 0 src/{opCode-175.txt => opCode-6-175.txt} | 0 src/{opCode-207.txt => opCode-6-207.txt} | 0 src/{opCode-85.txt => opCode-6-85.txt} | 0 src/pcm-accel-common.cpp | 2 +- src/pcm-iio.cpp | 2 +- tests/pcm-fuzz.cpp | 6 +++--- tests/pcm-memory-fuzz.cpp | 4 ++-- 13 files changed, 11 insertions(+), 11 deletions(-) rename src/{opCode-106.txt => opCode-6-106.txt} (100%) rename src/{opCode-134.txt => opCode-6-134.txt} (100%) rename src/{opCode-143-accel.txt => opCode-6-143-accel.txt} (100%) rename src/{opCode-143.txt => opCode-6-143.txt} (100%) rename src/{opCode-173.txt => opCode-6-173.txt} (100%) rename src/{opCode-175.txt => opCode-6-175.txt} (100%) rename src/{opCode-207.txt => opCode-6-207.txt} (100%) rename src/{opCode-85.txt => opCode-6-85.txt} (100%) diff --git a/doc/PCM_ACCEL_README.md b/doc/PCM_ACCEL_README.md index f1e23868..13c212c9 100644 --- a/doc/PCM_ACCEL_README.md +++ b/doc/PCM_ACCEL_README.md @@ -29,13 +29,13 @@ Notes: multiple options is allowed. | options | Default | Description | | ---------------------- | -------------------- | -------------------------------------------------------------------------------------------------------------------------- | | -numa | no | Print NUMA node mapping instead of CPU socket location. | -| -evt=[cfg.txt] | opCode-xxx-accel.txt | Specify the event config file name as cfg.txt.
- xxx is the cpu model id, for example 143 for sapphire rapid. | +| -evt=[cfg.txt] | opCode-x-y-accel.txt | Specify the event config file name as cfg.txt.
- x/y is cpu family is model id, for example 6/143 for Sapphire Rapids. | | -silent | no | Silence information output and print only measurements | | -csv[=file.csv] | no | Output compact CSV format to screen or a file in case filename is provided | | -csv-delimiter=[value] | no | Set custom csv delimiter | | -human-readable | no | Use human readable format for output (for csv only) | -| -i=[value] | 0 | Allow to determine number of iterations, default is 0(infinite loop) if not specified. | -| [interval] | 3 | Time interval in seconds (floating point number is accepted) to sample performance counters, default is 3s if not specified. | +| -i=[value] | 0 | Allow to determine number of iterations, default is 0(infinite loop) if not specified. | +| [interval] | 3 | Time interval in seconds (floating point number is accepted) to sample performance counters, default is 3s if not specified| #### Examples: @@ -117,7 +117,7 @@ Please refer to the spec or code to learn more about the event mapping if you wa - QAT: please refer to the [mapping table in source code](https://github.com/intel/pcm/blob/f20013f7563714cf592d7a59f169c1ddee3cf8ba/src/cpucounters.cpp#L915) -Here is the content of the event cfg file(opCode-143-accel.txt as example) +Here is the content of the event cfg file(opCode-6-143-accel.txt as example) ![image](https://user-images.githubusercontent.com/25432609/224027717-1dcdae9e-6701-4b6f-90a0-8108c4ea4550.png) diff --git a/src/opCode-106.txt b/src/opCode-6-106.txt similarity index 100% rename from src/opCode-106.txt rename to src/opCode-6-106.txt diff --git a/src/opCode-134.txt b/src/opCode-6-134.txt similarity index 100% rename from src/opCode-134.txt rename to src/opCode-6-134.txt diff --git a/src/opCode-143-accel.txt b/src/opCode-6-143-accel.txt similarity index 100% rename from src/opCode-143-accel.txt rename to src/opCode-6-143-accel.txt diff --git a/src/opCode-143.txt b/src/opCode-6-143.txt similarity index 100% rename from src/opCode-143.txt rename to src/opCode-6-143.txt diff --git a/src/opCode-173.txt b/src/opCode-6-173.txt similarity index 100% rename from src/opCode-173.txt rename to src/opCode-6-173.txt diff --git a/src/opCode-175.txt b/src/opCode-6-175.txt similarity index 100% rename from src/opCode-175.txt rename to src/opCode-6-175.txt diff --git a/src/opCode-207.txt b/src/opCode-6-207.txt similarity index 100% rename from src/opCode-207.txt rename to src/opCode-6-207.txt diff --git a/src/opCode-85.txt b/src/opCode-6-85.txt similarity index 100% rename from src/opCode-85.txt rename to src/opCode-6-85.txt diff --git a/src/pcm-accel-common.cpp b/src/pcm-accel-common.cpp index dfd3f4ab..4b95798c 100644 --- a/src/pcm-accel-common.cpp +++ b/src/pcm-accel-common.cpp @@ -390,7 +390,7 @@ void AcceleratorCounterState::setEvents(PCM *m,ACCEL_IP accel, std::string spec { if (evtfile==false) //All platform use the spr config file by default. { - ev_file_name = "opCode-143-accel.txt"; + ev_file_name = "opCode-6-143-accel.txt"; } else { diff --git a/src/pcm-iio.cpp b/src/pcm-iio.cpp index 201862ce..07a01311 100644 --- a/src/pcm-iio.cpp +++ b/src/pcm-iio.cpp @@ -2002,7 +2002,7 @@ int mainThrows(int argc, char * argv[]) string ev_file_name; if (m->IIOEventsAvailable()) { - ev_file_name = "opCode-" + std::to_string(m->getCPUModel()) + ".txt"; + ev_file_name = "opCode-" + std::to_string(m->getCPUFamily()) + "-" + std::to_string(m->getInternalCPUModel()) + ".txt"; } else { diff --git a/tests/pcm-fuzz.cpp b/tests/pcm-fuzz.cpp index 047d0a8a..941bbbed 100644 --- a/tests/pcm-fuzz.cpp +++ b/tests/pcm-fuzz.cpp @@ -56,7 +56,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) std::vector sktstate1, sktstate2; SystemCounterState sstate1, sstate2; bitset ycores; - const auto cpu_model = m->getCPUModel(); + const auto cpu_family_model = m->getCPUFamilyModel(); print_pid_collection_message(pid); bool show_partial_core_output = false; // TODO: add support for partial core output @@ -71,10 +71,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) m->getAllCounterStates(sstate2, sktstate2, cstates2); if (csv_output) print_csv(m, cstates1, cstates2, sktstate1, sktstate2, ycores, sstate1, sstate2, - cpu_model, show_core_output, show_partial_core_output, show_socket_output, show_system_output); + show_core_output, show_partial_core_output, show_socket_output, show_system_output); else print_output(m, cstates1, cstates2, sktstate1, sktstate2, ycores, sstate1, sstate2, - cpu_model, show_core_output, show_partial_core_output, show_socket_output, show_system_output, + cpu_family_model, show_core_output, show_partial_core_output, show_socket_output, show_system_output, metricVersion); return 0; diff --git a/tests/pcm-memory-fuzz.cpp b/tests/pcm-memory-fuzz.cpp index 66ddb6f9..09883675 100644 --- a/tests/pcm-memory-fuzz.cpp +++ b/tests/pcm-memory-fuzz.cpp @@ -57,10 +57,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) m->resetPMU(); m->disableJKTWorkaround(); - const auto cpu_model = m->getCPUModel(); + const auto cpu_family_model = m->getCPUFamilyModel(); if (!m->hasPCICFGUncore()) { - cerr << "Unsupported processor model (" << cpu_model << ").\n"; + cerr << "Unsupported processor model (0x" << std::hex << cpu_family_model << std::dec << ").\n"; if (m->memoryTrafficMetricsAvailable()) cerr << "For processor-level memory bandwidth statistics please use 'pcm' utility\n"; return 0; From 65b07c83be4757a6b75838757660896420c647b1 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 24 Sep 2024 20:12:52 +0200 Subject: [PATCH 22/23] transition to family-model (part 6) Change-Id: I9cfc811c2dc8ae9b87b2fdaa9f13f642ce5f7f68 --- src/cpucounters.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index d8195683..097103ad 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -4503,14 +4503,14 @@ std::string PCM::getCPUBrandString() std::string PCM::getCPUFamilyModelString() { - return getCPUFamilyModelString(cpu_family, cpu_family_model, cpu_stepping); + return getCPUFamilyModelString(cpu_family, cpu_model_private, cpu_stepping); } -std::string PCM::getCPUFamilyModelString(const uint32 cpu_family_, const uint32 cpu_family_model_, const uint32 cpu_stepping_) +std::string PCM::getCPUFamilyModelString(const uint32 cpu_family_, const uint32 internal_cpu_model_, const uint32 cpu_stepping_) { char buffer[sizeof(int)*4*3+6]; std::fill(buffer, buffer + sizeof(buffer), 0); - std::snprintf(buffer,sizeof(buffer),"GenuineIntel-%d-%2X-%X", cpu_family_, cpu_family_model_, cpu_stepping_); + std::snprintf(buffer,sizeof(buffer),"GenuineIntel-%d-%2X-%X", cpu_family_, internal_cpu_model_, cpu_stepping_); std::string result(buffer); return result; } From 53aab34d11b5ee26f8c9b231779f58ee93239a7a Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Thu, 26 Sep 2024 13:20:25 +0200 Subject: [PATCH 23/23] add basic support for Elkhart Lake and Jasper Lake addresses issue https://github.com/intel/pcm/issues/387 addresses issue https://github.com/intel/pcm/issues/830 Change-Id: Id0f89c8a80e6c8513d156a0f930eaf5d750c3354 --- src/cpucounters.cpp | 14 ++++++++++++++ src/cpucounters.h | 10 ++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 097103ad..55953671 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -725,6 +725,8 @@ void PCM::initCStateSupportTables() case MTL: case LNL: case SNOWRIDGE: + case ELKHART_LAKE: + case JASPER_LAKE: PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x3F8, 0, 0x3F9, 0, 0x3FA, 0, 0, 0, 0 }) ); case NEHALEM_EP: case NEHALEM: @@ -801,6 +803,8 @@ void PCM::initCStateSupportTables() case MTL: case LNL: case SNOWRIDGE: + case ELKHART_LAKE: + case JASPER_LAKE: case ICX: case SPR: case EMR: @@ -1649,6 +1653,8 @@ bool PCM::detectNominalFrequency() || cpu_family_model == DENVERTON || useSKLPath() || cpu_family_model == SNOWRIDGE + || cpu_family_model == ELKHART_LAKE + || cpu_family_model == JASPER_LAKE || cpu_family_model == KNL || cpu_family_model == ADL || cpu_family_model == RPL @@ -3243,6 +3249,8 @@ bool PCM::isCPUModelSupported(const int model_) || model_ == WESTMERE_EX || isAtom(model_) || model_ == SNOWRIDGE + || model_ == ELKHART_LAKE + || model_ == JASPER_LAKE || model_ == CLARKDALE || model_ == SANDY_BRIDGE || model_ == JAKETOWN @@ -3549,6 +3557,8 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter core_gen_counter_num_used = 4; break; case SNOWRIDGE: + case ELKHART_LAKE: + case JASPER_LAKE: LLCArchEventInit(coreEventDesc); coreEventDesc[2].event_number = SKL_MEM_LOAD_RETIRED_L2_MISS_EVTNR; coreEventDesc[2].umask_value = SKL_MEM_LOAD_RETIRED_L2_MISS_UMASK; @@ -4824,6 +4834,10 @@ const char * PCM::getUArchCodename(const int32 cpu_family_model_param) const return "Denverton"; case SNOWRIDGE: return "Snowridge"; + case ELKHART_LAKE: + return "Elkhart Lake"; + case JASPER_LAKE: + return "Jasper Lake"; case NEHALEM_EP: case NEHALEM: return "Nehalem/Nehalem-EP"; diff --git a/src/cpucounters.h b/src/cpucounters.h index 46d9fb44..6bb7e6db 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -1856,6 +1856,8 @@ class PCM_API PCM GEMINI_LAKE = PCM_CPU_FAMILY_MODEL(6, 122), DENVERTON = PCM_CPU_FAMILY_MODEL(6, 95), SNOWRIDGE = PCM_CPU_FAMILY_MODEL(6, 134), + ELKHART_LAKE = PCM_CPU_FAMILY_MODEL(6, 150), + JASPER_LAKE = PCM_CPU_FAMILY_MODEL(6, 156), CLARKDALE = PCM_CPU_FAMILY_MODEL(6, 37), WESTMERE_EP = PCM_CPU_FAMILY_MODEL(6, 44), NEHALEM_EX = PCM_CPU_FAMILY_MODEL(6, 46), @@ -2122,6 +2124,8 @@ class PCM_API PCM case LNL: return 12; case SNOWRIDGE: + case ELKHART_LAKE: + case JASPER_LAKE: return 4; case DENVERTON: return 3; @@ -2451,6 +2455,8 @@ class PCM_API PCM || cpu_family_model == PCM::GEMINI_LAKE || cpu_family_model == PCM::DENVERTON || cpu_family_model == PCM::SNOWRIDGE + || cpu_family_model == PCM::ELKHART_LAKE + || cpu_family_model == PCM::JASPER_LAKE || cpu_family_model == PCM::HASWELLX || cpu_family_model == PCM::BROADWELL || cpu_family_model == PCM::BDX_DE @@ -4270,6 +4276,8 @@ uint64 getL2CacheMisses(const CounterStateType & before, const CounterStateType const auto cpu_family_model = pcm->getCPUFamilyModel(); if (pcm->useSkylakeEvents() || cpu_family_model == PCM::SNOWRIDGE + || cpu_family_model == PCM::ELKHART_LAKE + || cpu_family_model == PCM::JASPER_LAKE || cpu_family_model == PCM::SRF || cpu_family_model == PCM::ADL || cpu_family_model == PCM::RPL @@ -4382,6 +4390,8 @@ uint64 getL3CacheHitsSnoop(const CounterStateType & before, const CounterStateTy if (!pcm->isL3CacheHitsSnoopAvailable()) return 0; const auto cpu_family_model = pcm->getCPUFamilyModel(); if (cpu_family_model == PCM::SNOWRIDGE + || cpu_family_model == PCM::ELKHART_LAKE + || cpu_family_model == PCM::JASPER_LAKE || cpu_family_model == PCM::SRF || cpu_family_model == PCM::ADL || cpu_family_model == PCM::RPL