diff --git a/doc/PCM_ACCEL_README.md b/doc/PCM_ACCEL_README.md index f1e23868..13c212c9 100644 --- a/doc/PCM_ACCEL_README.md +++ b/doc/PCM_ACCEL_README.md @@ -29,13 +29,13 @@ Notes: multiple options is allowed. | options | Default | Description | | ---------------------- | -------------------- | -------------------------------------------------------------------------------------------------------------------------- | | -numa | no | Print NUMA node mapping instead of CPU socket location. | -| -evt=[cfg.txt] | opCode-xxx-accel.txt | Specify the event config file name as cfg.txt.
- xxx is the cpu model id, for example 143 for sapphire rapid. | +| -evt=[cfg.txt] | opCode-x-y-accel.txt | Specify the event config file name as cfg.txt.
- x/y is cpu family is model id, for example 6/143 for Sapphire Rapids. | | -silent | no | Silence information output and print only measurements | | -csv[=file.csv] | no | Output compact CSV format to screen or a file in case filename is provided | | -csv-delimiter=[value] | no | Set custom csv delimiter | | -human-readable | no | Use human readable format for output (for csv only) | -| -i=[value] | 0 | Allow to determine number of iterations, default is 0(infinite loop) if not specified. | -| [interval] | 3 | Time interval in seconds (floating point number is accepted) to sample performance counters, default is 3s if not specified. | +| -i=[value] | 0 | Allow to determine number of iterations, default is 0(infinite loop) if not specified. | +| [interval] | 3 | Time interval in seconds (floating point number is accepted) to sample performance counters, default is 3s if not specified| #### Examples: @@ -117,7 +117,7 @@ Please refer to the spec or code to learn more about the event mapping if you wa - QAT: please refer to the [mapping table in source code](https://github.com/intel/pcm/blob/f20013f7563714cf592d7a59f169c1ddee3cf8ba/src/cpucounters.cpp#L915) -Here is the content of the event cfg file(opCode-143-accel.txt as example) +Here is the content of the event cfg file(opCode-6-143-accel.txt as example) ![image](https://user-images.githubusercontent.com/25432609/224027717-1dcdae9e-6701-4b6f-90a0-8108c4ea4550.png) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 7f3c16e4..55953671 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -382,6 +382,7 @@ int32 PCM::getMaxCustomCoreEvents() return core_gen_counter_num_max; } +/* int PCM::getCPUModelFromCPUID() { static int result = -1; @@ -393,6 +394,21 @@ int PCM::getCPUModelFromCPUID() } return result; } +*/ + +int PCM::getCPUFamilyModelFromCPUID() +{ + static int result = -1; + if (result < 0) + { + PCM_CPUID_INFO cpuinfo; + pcm_cpuid(1, cpuinfo); + const auto cpu_family_ = (((cpuinfo.array[0]) >> 8) & 0xf) | ((cpuinfo.array[0] & 0xf00000) >> 16); + const auto cpu_model_ = (((cpuinfo.array[0]) & 0xf0) >> 4) | ((cpuinfo.array[0] & 0xf0000) >> 12); + result = PCM_CPU_FAMILY_MODEL(cpu_family_, cpu_model_); + } + return result; +} bool PCM::detectModel() { @@ -417,7 +433,8 @@ bool PCM::detectModel() pcm_cpuid(1, cpuinfo); cpu_family = (((cpuinfo.array[0]) >> 8) & 0xf) | ((cpuinfo.array[0] & 0xf00000) >> 16); - cpu_model = (((cpuinfo.array[0]) & 0xf0) >> 4) | ((cpuinfo.array[0] & 0xf0000) >> 12); + cpu_model_private = (((cpuinfo.array[0]) & 0xf0) >> 4) | ((cpuinfo.array[0] & 0xf0000) >> 12); + cpu_family_model = PCM_CPU_FAMILY_MODEL(cpu_family, cpu_model_private); cpu_stepping = cpuinfo.array[0] & 0x0f; if (cpuinfo.reg.ecx & (1UL << 31UL)) { @@ -427,12 +444,6 @@ bool PCM::detectModel() readCoreCounterConfig(); - if (cpu_family != 6) - { - std::cerr << getUnsupportedMessage() << " CPU Family: " << cpu_family << "\n"; - return false; - } - pcm_cpuid(7, 0, cpuinfo); std::cerr << "\n===== Processor information =====\n"; @@ -486,7 +497,8 @@ bool PCM::detectModel() std::cerr << "STIBP supported : " << ((cpuinfo.reg.edx & (1 << 27)) ? "yes" : "no") << "\n"; std::cerr << "Spec arch caps supported : " << ((cpuinfo.reg.edx & (1 << 29)) ? "yes" : "no") << "\n"; std::cerr << "Max CPUID level : " << max_cpuid << "\n"; - std::cerr << "CPU model number : " << cpu_model << "\n"; + std::cerr << "CPU family : " << cpu_family << "\n"; + std::cerr << "CPU model number : " << cpu_model_private << "\n"; return true; } @@ -564,7 +576,7 @@ bool isMBMEnforced() bool PCM::CoreLocalMemoryBWMetricAvailable() const { - if (isMBMEnforced() == false && cpu_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata + if (isMBMEnforced() == false && cpu_family_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata PCM_CPUID_INFO cpuinfo; if (!(QOSMetricAvailable() && L3QOSMetricAvailable())) return false; @@ -574,7 +586,7 @@ bool PCM::CoreLocalMemoryBWMetricAvailable() const bool PCM::CoreRemoteMemoryBWMetricAvailable() const { - if (isMBMEnforced() == false && cpu_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata + if (isMBMEnforced() == false && cpu_family_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata PCM_CPUID_INFO cpuinfo; if (!(QOSMetricAvailable() && L3QOSMetricAvailable())) return false; @@ -697,7 +709,7 @@ void PCM::initCStateSupportTables() } // fill package C state array - switch(cpu_model) + switch(cpu_family_model) { case ATOM: case ATOM_2: @@ -713,6 +725,8 @@ void PCM::initCStateSupportTables() case MTL: case LNL: case SNOWRIDGE: + case ELKHART_LAKE: + case JASPER_LAKE: PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x3F8, 0, 0x3F9, 0, 0x3FA, 0, 0, 0, 0 }) ); case NEHALEM_EP: case NEHALEM: @@ -752,7 +766,7 @@ void PCM::initCStateSupportTables() }; // fill core C state array - switch(cpu_model) + switch(cpu_family_model) { case ATOM: case ATOM_2: @@ -789,6 +803,8 @@ void PCM::initCStateSupportTables() case MTL: case LNL: case SNOWRIDGE: + case ELKHART_LAKE: + case JASPER_LAKE: case ICX: case SPR: case EMR: @@ -1622,32 +1638,34 @@ bool PCM::detectNominalFrequency() uint64 freq = 0; MSR[socketRefCore[0]]->read(PLATFORM_INFO_ADDR, &freq); const uint64 bus_freq = ( - cpu_model == SANDY_BRIDGE - || cpu_model == JAKETOWN - || cpu_model == IVYTOWN - || cpu_model == HASWELLX - || cpu_model == BDX_DE - || cpu_model == BDX - || cpu_model == IVY_BRIDGE - || cpu_model == HASWELL - || cpu_model == BROADWELL - || cpu_model == AVOTON - || cpu_model == APOLLO_LAKE - || cpu_model == GEMINI_LAKE - || cpu_model == DENVERTON + cpu_family_model == SANDY_BRIDGE + || cpu_family_model == JAKETOWN + || cpu_family_model == IVYTOWN + || cpu_family_model == HASWELLX + || cpu_family_model == BDX_DE + || cpu_family_model == BDX + || cpu_family_model == IVY_BRIDGE + || cpu_family_model == HASWELL + || cpu_family_model == BROADWELL + || cpu_family_model == AVOTON + || cpu_family_model == APOLLO_LAKE + || cpu_family_model == GEMINI_LAKE + || cpu_family_model == DENVERTON || useSKLPath() - || cpu_model == SNOWRIDGE - || cpu_model == KNL - || cpu_model == ADL - || cpu_model == RPL - || cpu_model == MTL - || cpu_model == LNL - || cpu_model == SKX - || cpu_model == ICX - || cpu_model == SPR - || cpu_model == EMR - || cpu_model == GNR - || cpu_model == SRF + || cpu_family_model == SNOWRIDGE + || cpu_family_model == ELKHART_LAKE + || cpu_family_model == JASPER_LAKE + || cpu_family_model == KNL + || cpu_family_model == ADL + || cpu_family_model == RPL + || cpu_family_model == MTL + || cpu_family_model == LNL + || cpu_family_model == SKX + || cpu_family_model == ICX + || cpu_family_model == SPR + || cpu_family_model == EMR + || cpu_family_model == GNR + || cpu_family_model == SRF ) ? (100000000ULL) : (133333333ULL); nominal_frequency = ((freq >> 8) & 255) * bus_freq; @@ -1683,7 +1701,7 @@ void PCM::initEnergyMonitoring() uint64 rapl_power_unit = 0; MSR[socketRefCore[0]]->read(MSR_RAPL_POWER_UNIT,&rapl_power_unit); uint64 energy_status_unit = extract_bits(rapl_power_unit,8,12); - if (cpu_model == PCM::CHERRYTRAIL || cpu_model == PCM::BAYTRAIL) + if (cpu_family_model == PCM::CHERRYTRAIL || cpu_family_model == PCM::BAYTRAIL) joulesPerEnergyUnit = double(1ULL << energy_status_unit)/1000000.; // (2)^energy_status_unit microJoules else joulesPerEnergyUnit = 1./double(1ULL<(handle, unitControlAddr - unitControlAddrAligned), CounterControlRegs, CounterValueRegs); }; - switch (getCPUModel()) + switch (getCPUFamilyModel()) { case PCM::SPR: case PCM::EMR: @@ -2963,7 +2981,8 @@ void increaseULimit() PCM::PCM() : cpu_family(-1), - cpu_model(-1), + cpu_model_private(-1), + cpu_family_model(-1), cpu_stepping(-1), cpu_microcode_level(-1), max_cpuid(0), @@ -3230,6 +3249,8 @@ bool PCM::isCPUModelSupported(const int model_) || model_ == WESTMERE_EX || isAtom(model_) || model_ == SNOWRIDGE + || model_ == ELKHART_LAKE + || model_ == JASPER_LAKE || model_ == CLARKDALE || model_ == SANDY_BRIDGE || model_ == JAKETOWN @@ -3264,53 +3285,53 @@ bool PCM::isCPUModelSupported(const int model_) bool PCM::checkModel() { - switch (cpu_model) + switch (cpu_family_model) { case NEHALEM: - cpu_model = NEHALEM_EP; + cpu_family_model = NEHALEM_EP; break; case ATOM_2: - cpu_model = ATOM; + cpu_family_model = ATOM; break; case HASWELL_ULT: case HASWELL_2: - cpu_model = HASWELL; + cpu_family_model = HASWELL; break; case BROADWELL_XEON_E3: - cpu_model = BROADWELL; + cpu_family_model = BROADWELL; break; case ICX_D: - cpu_model = ICX; + cpu_family_model = ICX; break; case CML_1: - cpu_model = CML; + cpu_family_model = CML; break; case ICL_1: - cpu_model = ICL; + cpu_family_model = ICL; break; case TGL_1: - cpu_model = TGL; + cpu_family_model = TGL; break; case ADL_1: - cpu_model = ADL; + cpu_family_model = ADL; break; case RPL_1: case RPL_2: case RPL_3: - cpu_model = RPL; + cpu_family_model = RPL; break; case GNR_D: - cpu_model = GNR; + cpu_family_model = GNR; break; } - if(!isCPUModelSupported((int)cpu_model)) + if(!isCPUModelSupported((int)cpu_family_model)) { - std::cerr << getUnsupportedMessage() << " CPU model number: " << cpu_model << " Brand: \"" << getCPUBrandString().c_str() << "\"\n"; + std::cerr << getUnsupportedMessage() << " CPU family " << cpu_family << " model number " << cpu_model_private << " Brand: \"" << getCPUBrandString().c_str() << "\"\n"; /* FOR TESTING PURPOSES ONLY */ #ifdef PCM_TEST_FALLBACK_TO_ATOM std::cerr << "Fall back to ATOM functionality.\n"; - cpu_model = ATOM; + cpu_family_model = ATOM; return true; #endif return false; @@ -3428,7 +3449,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter canUsePerf = false; if (!silent) std::cerr << "Installed Linux kernel perf does not support hardware top-down level-1 counters. Using direct PMU programming instead.\n"; } - if (canUsePerf && (cpu_model == ADL || cpu_model == RPL || cpu_model == MTL || cpu_model == LNL)) + if (canUsePerf && (cpu_family_model == ADL || cpu_family_model == RPL || cpu_family_model == MTL || cpu_family_model == LNL)) { canUsePerf = false; if (!silent) std::cerr << "Linux kernel perf rejects an architectural event on your platform. Using direct PMU programming instead.\n"; @@ -3462,7 +3483,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter CustomCoreEventDescription * pDesc = (CustomCoreEventDescription *)parameter_; coreEventDesc[0] = pDesc[0]; coreEventDesc[1] = pDesc[1]; - if (isAtom() == false && cpu_model != KNL) + if (isAtom() == false && cpu_family_model != KNL) { coreEventDesc[2] = pDesc[2]; core_gen_counter_num_used = 3; @@ -3483,7 +3504,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter evt[1].event_number = ARCH_LLC_REFERENCE_EVTNR; evt[1].umask_value = ARCH_LLC_REFERENCE_UMASK; }; - if (isAtom() || cpu_model == KNL) + if (isAtom() || cpu_family_model == KNL) { LLCArchEventInit(coreEventDesc); L2CacheHitRatioAvailable = true; @@ -3499,7 +3520,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter L2CacheMissesAvailable = true; L3CacheHitsAvailable = true; core_gen_counter_num_used = 2; - if (HASWELLX == cpu_model || HASWELL == cpu_model) + if (HASWELLX == cpu_family_model || HASWELL == cpu_family_model) { coreEventDesc[BasicCounterState::HSXL2MissPos].event_number = HSX_L2_RQSTS_MISS_EVTNR; coreEventDesc[BasicCounterState::HSXL2MissPos].umask_value = HSX_L2_RQSTS_MISS_UMASK; @@ -3511,7 +3532,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter } } else - switch ( cpu_model ) { + switch (cpu_family_model) { case ADL: case RPL: case MTL: @@ -3536,6 +3557,8 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter core_gen_counter_num_used = 4; break; case SNOWRIDGE: + case ELKHART_LAKE: + case JASPER_LAKE: LLCArchEventInit(coreEventDesc); coreEventDesc[2].event_number = SKL_MEM_LOAD_RETIRED_L2_MISS_EVTNR; coreEventDesc[2].umask_value = SKL_MEM_LOAD_RETIRED_L2_MISS_UMASK; @@ -3696,7 +3719,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter core_gen_counter_num_used = pExtDesc->nGPCounters; } - if(cpu_model == JAKETOWN) + if(cpu_family_model == JAKETOWN) { bool enableWA = false; for(uint32 i = 0; i< core_gen_counter_num_used; ++i) @@ -3823,7 +3846,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter { serverUncorePMUs[i]->program(); qpi_speeds.push_back(std::async(std::launch::async, - &ServerUncorePMUs::computeQPISpeed, serverUncorePMUs[i].get(), socketRefCore[i], cpu_model)); + &ServerUncorePMUs::computeQPISpeed, serverUncorePMUs[i].get(), socketRefCore[i], cpu_family_model)); } for (size_t i = 0; i < (size_t)serverUncorePMUs.size(); ++i) { @@ -3833,7 +3856,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter programCbo(); } // program uncore counters on old CPU arch - else if (cpu_model == NEHALEM_EP || cpu_model == WESTMERE_EP || cpu_model == CLARKDALE) + else if (cpu_family_model == NEHALEM_EP || cpu_family_model == WESTMERE_EP || cpu_family_model == CLARKDALE) { for (int i = 0; i < (int)num_cores; ++i) { @@ -4168,7 +4191,7 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */, MSR[i]->write(PERF_METRICS_ADDR, 0); } - if (isAtom() || cpu_model == KNL) // KNL and Atom have 3 fixed + only 2 programmable counters + if (isAtom() || cpu_family_model == KNL) // KNL and Atom have 3 fixed + only 2 programmable counters value = (1ULL << 0) + (1ULL << 1) + (1ULL << 32) + (1ULL << 33) + (1ULL << 34); for (uint32 j = 0; j < core_gen_counter_num_used; ++j) @@ -4369,7 +4392,7 @@ void PCM::programBecktonUncore(int32 core) BecktonUncorePMUZDPCTLFVCRegister FVCreg; FVCreg.value = 0; - if (cpu_model == NEHALEM_EX) + if (cpu_family_model == NEHALEM_EX) { FVCreg.fields.bcmd = 0; // rd_bcmd FVCreg.fields.resp = 0; // ack_resp @@ -4490,14 +4513,14 @@ std::string PCM::getCPUBrandString() std::string PCM::getCPUFamilyModelString() { - return getCPUFamilyModelString(cpu_family, cpu_model, cpu_stepping); + return getCPUFamilyModelString(cpu_family, cpu_model_private, cpu_stepping); } -std::string PCM::getCPUFamilyModelString(const uint32 cpu_family_, const uint32 cpu_model_, const uint32 cpu_stepping_) +std::string PCM::getCPUFamilyModelString(const uint32 cpu_family_, const uint32 internal_cpu_model_, const uint32 cpu_stepping_) { char buffer[sizeof(int)*4*3+6]; std::fill(buffer, buffer + sizeof(buffer), 0); - std::snprintf(buffer,sizeof(buffer),"GenuineIntel-%d-%2X-%X", cpu_family_, cpu_model_, cpu_stepping_); + std::snprintf(buffer,sizeof(buffer),"GenuineIntel-%d-%2X-%X", cpu_family_, internal_cpu_model_, cpu_stepping_); std::string result(buffer); return result; } @@ -4787,13 +4810,13 @@ bool PCM::PMUinUse() return false; } -const char * PCM::getUArchCodename(const int32 cpu_model_param) const +const char * PCM::getUArchCodename(const int32 cpu_family_model_param) const { - auto cpu_model_ = cpu_model_param; - if(cpu_model_ < 0) - cpu_model_ = this->cpu_model ; + auto cpu_family_model_ = cpu_family_model_param; + if(cpu_family_model_ < 0) + cpu_family_model_ = this->cpu_family_model; - switch(cpu_model_) + switch(cpu_family_model_) { case CENTERTON: return "Centerton"; @@ -4811,6 +4834,10 @@ const char * PCM::getUArchCodename(const int32 cpu_model_param) const return "Denverton"; case SNOWRIDGE: return "Snowridge"; + case ELKHART_LAKE: + return "Elkhart Lake"; + case JASPER_LAKE: + return "Jasper Lake"; case NEHALEM_EP: case NEHALEM: return "Nehalem/Nehalem-EP"; @@ -4869,9 +4896,9 @@ const char * PCM::getUArchCodename(const int32 cpu_model_param) const case LNL: return "Lunar Lake"; case SKX: - if (cpu_model_param >= 0) + if (cpu_family_model_param >= 0) { - // query for specified cpu_model_param, stepping not provided + // query for specified cpu_family_model_param, stepping not provided return "Skylake-SP, Cascade Lake-SP"; } if (isCLX()) @@ -4954,7 +4981,7 @@ void PCM::cleanupPMU(const bool silent) } cleanupPEBS = false; - if(cpu_model == JAKETOWN) + if(cpu_family_model == JAKETOWN) enableJKTWorkaround(false); #ifndef PCM_SILENT @@ -5344,8 +5371,8 @@ void BasicCounterState::readAndAggregateTSC(std::shared_ptr msr) { uint64 cInvariantTSC = 0; PCM * m = PCM::getInstance(); - const auto cpu_model = m->getCPUModel(); - if (m->isAtom() == false || cpu_model == PCM::AVOTON) + const auto cpu_family_model = m->getCPUFamilyModel(); + if (m->isAtom() == false || cpu_family_model == PCM::AVOTON) { cInvariantTSC = m->getInvariantTSC_Fast(msr->getCoreId()); MSRValues[IA32_TIME_STAMP_COUNTER] = cInvariantTSC; @@ -5569,7 +5596,7 @@ PCM::ErrorCode PCM::programServerUncoreLatencyMetrics(bool enable_pmm) if (enable_pmm == false) { //DDR is false - if (ICX == cpu_model || SPR == cpu_model || EMR == cpu_model) + if (ICX == cpu_family_model || SPR == cpu_family_model || EMR == cpu_family_model) { DDRConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(0x80) + MC_CH_PCI_PMON_CTL_UMASK(1); // DRAM RPQ occupancy DDRConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(0x10) + MC_CH_PCI_PMON_CTL_UMASK(1); // DRAM RPQ Insert @@ -5620,7 +5647,13 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof uint32 PCUCntConf[4] = {0,0,0,0}; - switch (cpu_model) + auto printError = [this](const char * eventCategory) + { + assert(eventCategory); + std::cerr << "ERROR: no " << eventCategory << " events defined for CPU family " << cpu_family << " model " << cpu_model_private << "\n"; + }; + + switch (cpu_family_model) { case SPR: case EMR: @@ -5641,7 +5674,7 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0xD); // FREQ_BAND2_CYCLES break; case 1: - switch (cpu_model) + switch (cpu_family_model) { case SPR: case EMR: @@ -5674,57 +5707,57 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF,GNR) break; case 5: - if(JAKETOWN == cpu_model) + if (JAKETOWN == cpu_family_model) { PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0) + PCU_MSR_PMON_CTL_EXTRA_SEL + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0) + PCU_MSR_PMON_CTL_EXTRA_SEL ; // cycles spent changing frequency - } else if (IVYTOWN == cpu_model ) + } else if (IVYTOWN == cpu_family_model) { PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x60) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x60) ; // cycles spent changing frequency: FREQ_TRANS_CYCLES } else if ( - HASWELLX == cpu_model - || BDX_DE == cpu_model - || BDX == cpu_model - || SKX == cpu_model - || ICX == cpu_model - || SNOWRIDGE == cpu_model - || SPR == cpu_model - || EMR == cpu_model - || SRF == cpu_model - || GNR == cpu_model - || GNR_D == cpu_model + HASWELLX == cpu_family_model + || BDX_DE == cpu_family_model + || BDX == cpu_family_model + || SKX == cpu_family_model + || ICX == cpu_family_model + || SNOWRIDGE == cpu_family_model + || SPR == cpu_family_model + || EMR == cpu_family_model + || SRF == cpu_family_model + || GNR == cpu_family_model + || GNR_D == cpu_family_model ) { PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x74) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x74) ; // cycles spent changing frequency: FREQ_TRANS_CYCLES - if(HASWELLX == cpu_model) + if(HASWELLX == cpu_family_model) { PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x79) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of UFS transitions PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x79) ; // UFS transition cycles } } else { - std::cerr << "ERROR: no frequency transition events defined for CPU model " << cpu_model << "\n"; + printError("frequency transition"); } break; case 6: - if (IVYTOWN == cpu_model ) + if (IVYTOWN == cpu_family_model) { PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x2B) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC2 transitions PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x2D) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC6 transitions } else if ( - HASWELLX == cpu_model - || BDX_DE == cpu_model - || BDX == cpu_model - || SKX == cpu_model - || ICX == cpu_model - || SNOWRIDGE == cpu_model - || SPR == cpu_model - || EMR == cpu_model - || SRF == cpu_model - || GNR == cpu_model - || GNR_D == cpu_model + HASWELLX == cpu_family_model + || BDX_DE == cpu_family_model + || BDX == cpu_family_model + || SKX == cpu_family_model + || ICX == cpu_family_model + || SNOWRIDGE == cpu_family_model + || SPR == cpu_family_model + || EMR == cpu_family_model + || SRF == cpu_family_model + || GNR == cpu_family_model + || GNR_D == cpu_family_model ) { PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x4E) ; // PC1e residenicies (not supported on SKX,ICX,SNOWRIDGE,SPR,EMR,SRF,GNR) @@ -5733,11 +5766,11 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x2D) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC6 transitions } else { - std::cerr << "ERROR: no package C-state transition events defined for CPU model " << cpu_model << "\n"; + printError("package C-state transition"); } break; case 7: - if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model) + if (HASWELLX == cpu_family_model || BDX_DE == cpu_family_model || BDX == cpu_family_model) { PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x7E) ; // UFS_TRANSITIONS_PERF_P_LIMIT PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x7D) ; // UFS_TRANSITIONS_IO_P_LIMIT @@ -5745,16 +5778,16 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x7B) ; // UFS_TRANSITIONS_UP_STALL_CYCLES } else { - std::cerr << "ERROR: no UFS transition events defined for CPU model " << cpu_model << "\n"; + printError("UFS transition"); } break; case 8: - if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model) + if (HASWELLX == cpu_family_model || BDX_DE == cpu_family_model || BDX == cpu_family_model) { PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x7C) ; // UFS_TRANSITIONS_DOWN } else { - std::cerr << "ERROR: no UFS transition events defined for CPU model " << cpu_model << "\n"; + printError("UFS transition"); } break; default: @@ -6408,7 +6441,7 @@ void PCM::readAndAggregateUncoreMCCounters(const uint32 socket, CounterStateType { std::shared_ptr msr = MSR[socketRefCore[socket]]; TemporalThreadAffinity tempThreadAffinity(socketRefCore[socket]); // speedup trick for Linux - switch (cpu_model) + switch (cpu_family_model) { case PCM::WESTMERE_EP: case PCM::NEHALEM_EP: @@ -6630,7 +6663,7 @@ void PCM::readQPICounters(SystemCounterState & result) { // read QPI counters std::vector SocketProcessed(num_sockets, false); - if (cpu_model == PCM::NEHALEM_EX || cpu_model == PCM::WESTMERE_EX) + if (cpu_family_model == PCM::NEHALEM_EX || cpu_family_model == PCM::WESTMERE_EX) { for (int32 core = 0; core < num_cores; ++core) { @@ -6666,7 +6699,7 @@ void PCM::readQPICounters(SystemCounterState & result) } } } - else if ((cpu_model == PCM::NEHALEM_EP || cpu_model == PCM::WESTMERE_EP)) + else if ((cpu_family_model == PCM::NEHALEM_EP || cpu_family_model == PCM::WESTMERE_EP)) { if (num_sockets == 2) { @@ -7364,7 +7397,7 @@ ServerUncorePMUs::ServerUncorePMUs(uint32 socket_, const PCM * pcm) : , UPIbus(-1) , M2Mbus(-1) , groupnr(0) - , cpu_model(pcm->getCPUModel()) + , cpu_family_model(pcm->getCPUFamilyModel()) , qpi_speed(0) { if (pcm->useLinuxPerfForUncore()) @@ -7421,7 +7454,10 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) HARegisterLocation.resize(x + 1); \ HARegisterLocation[x] = std::make_pair(arch##_HA##x##_REGISTER_DEV_ADDR, arch##_HA##x##_REGISTER_FUNC_ADDR); - if(cpu_model == PCM::JAKETOWN || cpu_model == PCM::IVYTOWN) + switch (cpu_family_model) + { + case PCM::JAKETOWN: + case PCM::IVYTOWN: { PCM_PCICFG_MC_INIT(0, 0, JKTIVT) PCM_PCICFG_MC_INIT(0, 1, JKTIVT) @@ -7436,7 +7472,10 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) PCM_PCICFG_QPI_INIT(1, JKTIVT); PCM_PCICFG_QPI_INIT(2, JKTIVT); } - else if(cpu_model == PCM::HASWELLX || cpu_model == PCM::BDX_DE || cpu_model == PCM::BDX) + break; + case PCM::HASWELLX: + case PCM::BDX_DE: + case PCM::BDX: { PCM_PCICFG_MC_INIT(0, 0, HSX) PCM_PCICFG_MC_INIT(0, 1, HSX) @@ -7454,7 +7493,8 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) PCM_PCICFG_HA_INIT(0, HSX); PCM_PCICFG_HA_INIT(1, HSX); } - else if(cpu_model == PCM::SKX) + break; + case PCM::SKX: { PCM_PCICFG_MC_INIT(0, 0, SKX) PCM_PCICFG_MC_INIT(0, 1, SKX) @@ -7498,7 +7538,8 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) PCM_PCICFG_M3UPI_INIT(2, SKX); } } - else if (cpu_model == PCM::ICX) + break; + case PCM::ICX: { PCM_PCICFG_QPI_INIT(0, ICX); PCM_PCICFG_QPI_INIT(1, ICX); @@ -7513,7 +7554,9 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) PCM_PCICFG_M2M_INIT(2, SERVER) PCM_PCICFG_M2M_INIT(3, SERVER) } - else if (cpu_model == PCM::SPR || cpu_model == PCM::EMR) + break; + case PCM::SPR: + case PCM::EMR: { PCM_PCICFG_QPI_INIT(0, SPR); PCM_PCICFG_QPI_INIT(1, SPR); @@ -7547,7 +7590,8 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) PCM_PCICFG_HBM_M2M_INIT(14, SERVER) PCM_PCICFG_HBM_M2M_INIT(15, SERVER) } - else if(cpu_model == PCM::KNL) + break; + case PCM::KNL: { // 2 DDR4 Memory Controllers with 3 channels each PCM_PCICFG_MC_INIT(0, 0, KNL) @@ -7567,10 +7611,9 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) PCM_PCICFG_EDC_INIT(6, ECLK, KNL) PCM_PCICFG_EDC_INIT(7, ECLK, KNL) } - else if ( - cpu_model == PCM::SRF - || cpu_model == PCM::GNR - ) + break; + case PCM::SRF: + case PCM::GNR: { PCM_PCICFG_QPI_INIT(0, BHS); PCM_PCICFG_QPI_INIT(1, BHS); @@ -7601,16 +7644,17 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) PCM_PCICFG_M3UPI_INIT(4, BHS); PCM_PCICFG_M3UPI_INIT(5, BHS); } - else if (cpu_model == PCM::SNOWRIDGE) + break; + case PCM::SNOWRIDGE: { PCM_PCICFG_M2M_INIT(0, SERVER) PCM_PCICFG_M2M_INIT(1, SERVER) PCM_PCICFG_M2M_INIT(2, SERVER) PCM_PCICFG_M2M_INIT(3, SERVER) } - else - { - std::cerr << "Error: Uncore PMU for processor with model id " << cpu_model << " is not supported.\n"; + break; + default: + std::cerr << "Error: Uncore PMU for processor with id 0x" << std::hex << cpu_family_model << std::dec << " is not supported.\n"; throw std::exception(); } @@ -7695,7 +7739,7 @@ void ServerUncorePMUs::initBuses(uint32 socket_, const PCM * pcm) return; #endif - if (PCM::hasUPI(cpu_model) && XPIRegisterLocation.size() > 0) + if (PCM::hasUPI(cpu_family_model) && XPIRegisterLocation.size() > 0) { initSocket2Bus(socket2UPIbus, XPIRegisterLocation[0].first, XPIRegisterLocation[0].second, UPI_DEV_IDS, (uint32)sizeof(UPI_DEV_IDS) / sizeof(UPI_DEV_IDS[0])); if(total_sockets_ == socket2UPIbus.size()) @@ -7741,7 +7785,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) for (auto & handle : imcHandles) { - if (cpu_model == PCM::KNL) { + if (cpu_family_model == PCM::KNL) { imcPMUs.push_back( UncorePMU( std::make_shared(handle, KNX_MC_CH_PCI_PMON_BOX_CTL_ADDR), @@ -7776,7 +7820,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) } } - auto populateM2MPMUs = [](uint32 groupnr, int32 M2Mbus, int32 cpu_model, const std::vector > & M2MRegisterLocation, UncorePMUVector & m2mPMUs) + auto populateM2MPMUs = [](uint32 groupnr, int32 M2Mbus, int32 cpu_family_model, const std::vector > & M2MRegisterLocation, UncorePMUVector & m2mPMUs) { std::vector > m2mHandles; @@ -7791,7 +7835,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) for (auto & handle : m2mHandles) { - switch (cpu_model) + switch (cpu_family_model) { case PCM::ICX: case PCM::SNOWRIDGE: @@ -7830,21 +7874,21 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) } } }; - populateM2MPMUs(groupnr, M2Mbus, cpu_model, M2MRegisterLocation, m2mPMUs); - populateM2MPMUs(groupnr, M2Mbus, cpu_model, HBM_M2MRegisterLocation, hbm_m2mPMUs); + populateM2MPMUs(groupnr, M2Mbus, cpu_family_model, M2MRegisterLocation, m2mPMUs); + populateM2MPMUs(groupnr, M2Mbus, cpu_family_model, HBM_M2MRegisterLocation, hbm_m2mPMUs); int numChannels = 0; if (safe_getenv("PCM_NO_IMC_DISCOVERY") == std::string("1")) { - if (cpu_model == PCM::SPR || cpu_model == PCM::EMR) + if (cpu_family_model == PCM::SPR || cpu_family_model == PCM::EMR) { numChannels = 3; } } - if (cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::ICX) + if (cpu_family_model == PCM::SNOWRIDGE || cpu_family_model == PCM::ICX) { numChannels = 2; - if (PCM::getCPUModelFromCPUID() == PCM::ICX_D) + if (PCM::getCPUFamilyModelFromCPUID() == PCM::ICX_D) { numChannels = 3; } @@ -7898,7 +7942,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) } else { - switch (cpu_model) + switch (cpu_family_model) { case PCM::SPR: case PCM::EMR: @@ -7980,7 +8024,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) } }; - switch (cpu_model) + switch (cpu_family_model) { case PCM::GNR: case PCM::SRF: @@ -7994,7 +8038,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) throw std::exception(); } - if (cpu_model == PCM::KNL) + if (cpu_family_model == PCM::KNL) { std::vector > edcHandles; @@ -8050,7 +8094,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) } for (auto& handle : m3upiHandles) { - switch (cpu_model) + switch (cpu_family_model) { case PCM::ICX: case PCM::SPR: @@ -8153,7 +8197,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) if (pcm->getNumSockets() <= 4 && safe_getenv("PCM_NO_UPILL_DISCOVERY") != std::string("1")) { - switch (cpu_model) + switch (cpu_family_model) { // don't use the discovery on SPR to work-around the issue // mentioned in https://lore.kernel.org/lkml/20221129191023.936738-1-kan.liang@linux.intel.com/T/ @@ -8201,7 +8245,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) if (xpiPMUs.empty()) for (auto & handle : qpiLLHandles) { - switch (cpu_model) + switch (cpu_family_model) { case PCM::SKX: xpiPMUs.push_back( @@ -8735,7 +8779,7 @@ void ServerUncorePMUs::programServerUncoreMemoryMetrics(const ServerUncoreMemory } return true; }; - switch(cpu_model) + switch(cpu_family_model) { case PCM::KNL: MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x03) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 0: CAS.RD @@ -8799,7 +8843,7 @@ void ServerUncorePMUs::programServerUncoreMemoryMetrics(const ServerUncoreMemory std::cerr << "PCM Error: invalid rankA value: " << rankA << "\n"; return; } - switch(cpu_model) + switch(cpu_family_model) { case PCM::IVYTOWN: MCCntConfig[EventPosition::READ_RANK_A] = MC_CH_PCI_PMON_CTL_EVENT((0xb0 + rankA)) + MC_CH_PCI_PMON_CTL_UMASK(0xff); // RD_CAS_RANK(rankA) all banks @@ -8829,7 +8873,7 @@ void ServerUncorePMUs::programServerUncoreMemoryMetrics(const ServerUncoreMemory EDCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x02) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 1: WPQ break; default: - std::cerr << "PCM Error: your processor " << pcm->getCPUBrandString() << " model " << cpu_model << " does not support the required performance events \n"; + std::cerr << "PCM Error: your processor " << pcm->getCPUBrandString() << " ID 0x" << std::hex << cpu_family_model << std::dec << " does not support the required performance events \n"; return; } } @@ -8847,7 +8891,7 @@ void ServerUncorePMUs::program() PCM * pcm = PCM::getInstance(); uint32 MCCntConfig[4] = {0, 0, 0, 0}; uint32 EDCCntConfig[4] = {0, 0, 0, 0}; - switch(cpu_model) + switch(cpu_family_model) { case PCM::KNL: MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x03) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 0: CAS_COUNT.RD @@ -8889,7 +8933,7 @@ void ServerUncorePMUs::program() programM2M(); uint32 event[4]; - if (PCM::hasUPI(cpu_model)) + if (PCM::hasUPI(cpu_family_model)) { // monitor TxL0_POWER_CYCLES event[0] = Q_P_PCI_PMON_CTL_EVENT(0x26); @@ -8917,7 +8961,7 @@ void ServerUncorePMUs::program() void ServerUncorePMUs::programXPI(const uint32 * event) { - const uint32 extra = PCM::hasUPI(cpu_model) ? UNC_PMON_UNIT_CTL_RSV : UNC_PMON_UNIT_CTL_FRZ_EN; + const uint32 extra = PCM::hasUPI(cpu_family_model) ? UNC_PMON_UNIT_CTL_RSV : UNC_PMON_UNIT_CTL_FRZ_EN; for (uint32 i = 0; i < (uint32)xpiPMUs.size(); ++i) { // QPI LL PMU @@ -8995,7 +9039,7 @@ uint64 ServerUncorePMUs::getImcReadsForChannels(uint32 beginChannel, uint32 endC for (uint32 i = beginChannel; i < endChannel && i < imcPMUs.size(); ++i) { result += getMCCounter(i, EventPosition::READ); - switch (cpu_model) + switch (cpu_family_model) { case PCM::GNR: case PCM::SRF: @@ -9012,7 +9056,7 @@ uint64 ServerUncorePMUs::getImcWrites() for (uint32 i = 0; i < (uint32)imcPMUs.size(); ++i) { result += getMCCounter(i, EventPosition::WRITE); - switch (cpu_model) + switch (cpu_family_model) { case PCM::GNR: case PCM::SRF: @@ -9099,7 +9143,7 @@ uint64 ServerUncorePMUs::getIncomingDataFlits(uint32 port) if (port >= (uint32)xpiPMUs.size()) return 0; - if (PCM::hasUPI(cpu_model) == false) + if (PCM::hasUPI(cpu_family_model) == false) { drs = *xpiPMUs[port].counterValue[0]; } @@ -9115,7 +9159,7 @@ uint64 ServerUncorePMUs::getOutgoingFlits(uint32 port) uint64 ServerUncorePMUs::getUPIL0TxCycles(uint32 port) { - if (PCM::hasUPI(cpu_model)) + if (PCM::hasUPI(cpu_family_model)) return getQPILLCounter(port,0); return 0; } @@ -9123,15 +9167,15 @@ uint64 ServerUncorePMUs::getUPIL0TxCycles(uint32 port) void ServerUncorePMUs::program_power_metrics(int mc_profile) { uint32 xPIEvents[4] = { 0,0,0,0 }; - xPIEvents[ServerUncoreCounterState::EventPosition::xPI_TxL0P_POWER_CYCLES] = (uint32)Q_P_PCI_PMON_CTL_EVENT((PCM::hasUPI(cpu_model) ? 0x27 : 0x0D)); // L0p Tx Cycles (TxL0P_POWER_CYCLES) - xPIEvents[ServerUncoreCounterState::EventPosition::xPI_L1_POWER_CYCLES] = (uint32)Q_P_PCI_PMON_CTL_EVENT((PCM::hasUPI(cpu_model) ? 0x21 : 0x12)); // L1 Cycles (L1_POWER_CYCLES) - xPIEvents[ServerUncoreCounterState::EventPosition::xPI_CLOCKTICKS] = (uint32)Q_P_PCI_PMON_CTL_EVENT((PCM::hasUPI(cpu_model) ? 0x01 : 0x14)); // QPI/UPI clocks (CLOCKTICKS) + xPIEvents[ServerUncoreCounterState::EventPosition::xPI_TxL0P_POWER_CYCLES] = (uint32)Q_P_PCI_PMON_CTL_EVENT((PCM::hasUPI(cpu_family_model) ? 0x27 : 0x0D)); // L0p Tx Cycles (TxL0P_POWER_CYCLES) + xPIEvents[ServerUncoreCounterState::EventPosition::xPI_L1_POWER_CYCLES] = (uint32)Q_P_PCI_PMON_CTL_EVENT((PCM::hasUPI(cpu_family_model) ? 0x21 : 0x12)); // L1 Cycles (L1_POWER_CYCLES) + xPIEvents[ServerUncoreCounterState::EventPosition::xPI_CLOCKTICKS] = (uint32)Q_P_PCI_PMON_CTL_EVENT((PCM::hasUPI(cpu_family_model) ? 0x01 : 0x14)); // QPI/UPI clocks (CLOCKTICKS) programXPI(xPIEvents); uint32 MCCntConfig[4] = {0,0,0,0}; unsigned int UNC_M_POWER_CKE_CYCLES = 0x83; - switch (cpu_model) + switch (cpu_family_model) { case PCM::ICX: case PCM::SNOWRIDGE: @@ -9144,7 +9188,7 @@ void ServerUncorePMUs::program_power_metrics(int mc_profile) break; } unsigned int UNC_M_POWER_CHANNEL_PPD_CYCLES = 0x85; - switch (cpu_model) + switch (cpu_family_model) { case PCM::SRF: case PCM::GNR: @@ -9153,7 +9197,7 @@ void ServerUncorePMUs::program_power_metrics(int mc_profile) break; } unsigned int UNC_M_SELF_REFRESH_ENTER_SUCCESS_CYCLES_UMASK = 0; - switch (cpu_model) + switch (cpu_family_model) { case PCM::SRF: case PCM::GNR: @@ -9209,7 +9253,7 @@ void enableAndResetMCFixedCounter(UncorePMU& pmu) void ServerUncorePMUs::programIMC(const uint32 * MCCntConfig) { - const uint32 extraIMC = (cpu_model == PCM::SKX)?UNC_PMON_UNIT_CTL_RSV:UNC_PMON_UNIT_CTL_FRZ_EN; + const uint32 extraIMC = (cpu_family_model == PCM::SKX)?UNC_PMON_UNIT_CTL_RSV:UNC_PMON_UNIT_CTL_FRZ_EN; for (uint32 i = 0; i < (uint32)imcPMUs.size(); ++i) { @@ -9229,7 +9273,7 @@ void ServerUncorePMUs::programEDC(const uint32 * EDCCntConfig) edcPMUs[i].initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); // HBM clocks enabled by default - if (cpu_model == PCM::KNL) + if (cpu_family_model == PCM::KNL) { *edcPMUs[i].fixedCounterControl = EDC_CH_PCI_PMON_FIXED_CTL_EN; } @@ -9245,7 +9289,7 @@ void ServerUncorePMUs::programEDC(const uint32 * EDCCntConfig) void ServerUncorePMUs::programM2M() { uint64 cfg[4] = {0, 0, 0, 0}; - switch (cpu_model) + switch (cpu_family_model) { case PCM::SPR: case PCM::EMR: @@ -9352,7 +9396,7 @@ void ServerUncorePMUs::freezeCounters() { for (auto& pmu : *pmuVector) { - pmu.freeze((cpu_model == PCM::SKX) ? UNC_PMON_UNIT_CTL_RSV : UNC_PMON_UNIT_CTL_FRZ_EN); + pmu.freeze((cpu_family_model == PCM::SKX) ? UNC_PMON_UNIT_CTL_RSV : UNC_PMON_UNIT_CTL_FRZ_EN); } } } @@ -9363,7 +9407,7 @@ void ServerUncorePMUs::unfreezeCounters() { for (auto& pmu : *pmuVector) { - pmu.unfreeze((cpu_model == PCM::SKX) ? UNC_PMON_UNIT_CTL_RSV : UNC_PMON_UNIT_CTL_FRZ_EN); + pmu.unfreeze((cpu_family_model == PCM::SKX) ? UNC_PMON_UNIT_CTL_RSV : UNC_PMON_UNIT_CTL_FRZ_EN); } } } @@ -9588,7 +9632,7 @@ void ServerUncorePMUs::cleanupMemTest(const ServerUncorePMUs::MemTestParam & par } } -uint64 ServerUncorePMUs::computeQPISpeed(const uint32 core_nr, const int cpumodel) +uint64 ServerUncorePMUs::computeQPISpeed(const uint32 core_nr, const int cpufamilymodel) { if(qpi_speed.empty()) { @@ -9597,9 +9641,9 @@ uint64 ServerUncorePMUs::computeQPISpeed(const uint32 core_nr, const int cpumode qpi_speed.resize(getNumQPIPorts()); auto getSpeed = [&] (size_t i) { - if (PCM::hasUPI(cpumodel) == false && i == 1) return 0ULL; // link 1 should have the same speed as link 0, skip it + if (PCM::hasUPI(cpufamilymodel) == false && i == 1) return 0ULL; // link 1 should have the same speed as link 0, skip it uint64 result = 0; - if (PCM::hasUPI(cpumodel) == false && i < XPIRegisterLocation.size()) + if (PCM::hasUPI(cpufamilymodel) == false && i < XPIRegisterLocation.size()) { PciHandleType reg(groupnr,UPIbus, XPIRegisterLocation[i].first, QPI_PORT0_MISC_REGISTER_FUNC_ADDR); uint32 value = 0; @@ -9609,7 +9653,7 @@ uint64 ServerUncorePMUs::computeQPISpeed(const uint32 core_nr, const int cpumode } std::unordered_map UPISpeedMap{}; std::pair regBits{}; - switch (cpumodel) + switch (cpufamilymodel) { case PCM::GNR: case PCM::SRF: @@ -9652,7 +9696,7 @@ uint64 ServerUncorePMUs::computeQPISpeed(const uint32 core_nr, const int cpumode } if(result == 0ULL) { - if (PCM::hasUPI(cpumodel) == false) + if (PCM::hasUPI(cpufamilymodel) == false) std::cerr << "Warning: QPI_RATE_STATUS register is not available on port " << i << ". Computing QPI speed using a measurement loop.\n"; // compute qpi speed @@ -9672,8 +9716,8 @@ uint64 ServerUncorePMUs::computeQPISpeed(const uint32 core_nr, const int cpumode uint64 endClocks = getQPIClocks((uint32)i); cleanupMemTest(param); - result = (uint64(double(endClocks - startClocks) * PCM::getBytesPerLinkCycle(cpumodel) * double(timerGranularity) / double(endTSC - startTSC))); - if(cpumodel == PCM::HASWELLX || cpumodel == PCM::BDX) /* BDX_DE does not have QPI. */{ + result = (uint64(double(endClocks - startClocks) * PCM::getBytesPerLinkCycle(cpufamilymodel) * double(timerGranularity) / double(endTSC - startTSC))); + if(cpufamilymodel == PCM::HASWELLX || cpufamilymodel == PCM::BDX) /* BDX_DE does not have QPI. */{ result /=2; // HSX runs QPI clocks with doubled speed } } @@ -9684,9 +9728,9 @@ uint64 ServerUncorePMUs::computeQPISpeed(const uint32 core_nr, const int cpumode getSpeedsAsync.push_back(std::async(std::launch::async, getSpeed, i)); } for (size_t i = 0; i < getNumQPIPorts(); ++i) { - qpi_speed[i] = (PCM::hasUPI(cpumodel) == false && i==1)? qpi_speed[0] : getSpeedsAsync[i].get(); // link 1 does not have own speed register, it runs with the speed of link 0 + qpi_speed[i] = (PCM::hasUPI(cpufamilymodel) == false && i==1)? qpi_speed[0] : getSpeedsAsync[i].get(); // link 1 does not have own speed register, it runs with the speed of link 0 } - if (PCM::hasUPI(cpumodel)) + if (PCM::hasUPI(cpufamilymodel)) { // check the speed of link 3 if(qpi_speed.size() == 3 && qpi_speed[2] == 0) @@ -9718,7 +9762,8 @@ void ServerUncorePMUs::reportQPISpeed() const uint64 PCM::CX_MSR_PMON_CTRY(uint32 Cbo, uint32 Ctr) const { - switch (cpu_model) { + switch (cpu_family_model) + { case JAKETOWN: case IVYTOWN: return JKT_C0_MSR_PMON_CTR0 + (JKTIVT_CBO_MSR_STEP * Cbo) + Ctr; @@ -9746,7 +9791,8 @@ uint64 PCM::CX_MSR_PMON_CTRY(uint32 Cbo, uint32 Ctr) const uint64 PCM::CX_MSR_PMON_BOX_FILTER(uint32 Cbo) const { - switch (cpu_model) { + switch (cpu_family_model) + { case JAKETOWN: case IVYTOWN: return JKT_C0_MSR_PMON_BOX_FILTER + (JKTIVT_CBO_MSR_STEP * Cbo); @@ -9776,7 +9822,7 @@ uint64 PCM::CX_MSR_PMON_BOX_FILTER(uint32 Cbo) const uint64 PCM::CX_MSR_PMON_BOX_FILTER1(uint32 Cbo) const { - switch (cpu_model) { + switch (cpu_family_model) { case IVYTOWN: return IVT_C0_MSR_PMON_BOX_FILTER1 + (JKTIVT_CBO_MSR_STEP * Cbo); @@ -9792,7 +9838,7 @@ uint64 PCM::CX_MSR_PMON_BOX_FILTER1(uint32 Cbo) const } uint64 PCM::CX_MSR_PMON_CTLY(uint32 Cbo, uint32 Ctl) const { - switch (cpu_model) { + switch (cpu_family_model) { case JAKETOWN: case IVYTOWN: return JKT_C0_MSR_PMON_CTL0 + (JKTIVT_CBO_MSR_STEP * Cbo) + Ctl; @@ -9820,7 +9866,7 @@ uint64 PCM::CX_MSR_PMON_CTLY(uint32 Cbo, uint32 Ctl) const uint64 PCM::CX_MSR_PMON_BOX_CTL(uint32 Cbo) const { - switch (cpu_model) { + switch (cpu_family_model) { case JAKETOWN: case IVYTOWN: return JKT_C0_MSR_PMON_BOX_CTL + (JKTIVT_CBO_MSR_STEP * Cbo); @@ -9913,7 +9959,7 @@ uint32 PCM::getMaxNumOfCBoxesInternal() const } const auto refCore = socketRefCore[0]; uint64 val = 0; - switch (cpu_model) + switch (cpu_family_model) { case GNR: case SRF: @@ -9998,14 +10044,14 @@ uint32 PCM::getMaxNumOfIIOStacks() const void PCM::programCboOpcodeFilter(const uint32 opc0, UncorePMU & pmu, const uint32 nc_, const uint32 opc1, const uint32 loc, const uint32 rem) { - if(JAKETOWN == cpu_model) + if (JAKETOWN == cpu_family_model) { *pmu.filter[0] = JKT_CBO_MSR_PMON_BOX_FILTER_OPC(opc0); - } else if(IVYTOWN == cpu_model || HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model) + } else if (IVYTOWN == cpu_family_model || HASWELLX == cpu_family_model || BDX_DE == cpu_family_model || BDX == cpu_family_model) { *pmu.filter[1] = IVTHSX_CBO_MSR_PMON_BOX_FILTER1_OPC(opc0); - } else if(SKX == cpu_model) + } else if (SKX == cpu_family_model) { *pmu.filter[1] = SKX_CHA_MSR_PMON_BOX_FILTER1_OPC0(opc0) + SKX_CHA_MSR_PMON_BOX_FILTER1_OPC1(opc1) + @@ -10017,7 +10063,7 @@ void PCM::programCboOpcodeFilter(const uint32 opc0, UncorePMU & pmu, const uint3 } else { - std::cerr << "ERROR: programCboOpcodeFilter function is not implemented for cpu model " << cpu_model << std::endl; + std::cerr << "ERROR: programCboOpcodeFilter function is not implemented for cpu family " << cpu_family << " model " << cpu_model_private << std::endl; throw std::exception(); } } @@ -10028,7 +10074,7 @@ void PCM::programIIOCounters(uint64 rawEvents[4], int IIOStack) if (IIOStack == -1) { int stacks_count; - switch (getCPUModel()) + switch (getCPUFamilyModel()) { case PCM::GNR: case PCM::SRF: @@ -10123,7 +10169,7 @@ void PCM::programPCIeEventGroup(eventGroup_t &eventGroup) uint64 events[4] = {0}; uint64 umask[4] = {0}; - switch (cpu_model) + switch (cpu_family_model) { case PCM::GNR: case PCM::SRF: @@ -10173,18 +10219,18 @@ void PCM::programCbo(const uint64 * events, const uint32 opCode, const uint32 nc { pmu.initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); - if ( ICX != cpu_model - && SNOWRIDGE != cpu_model - && SPR != cpu_model - && EMR != cpu_model - && GNR != cpu_model - && SRF != cpu_model + if ( ICX != cpu_family_model + && SNOWRIDGE != cpu_family_model + && SPR != cpu_family_model + && EMR != cpu_family_model + && GNR != cpu_family_model + && SRF != cpu_family_model ) { programCboOpcodeFilter(opCode, pmu, nc_, 0, loc, rem); } - if ((HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model) && llc_lookup_tid_filter != 0) + if ((HASWELLX == cpu_family_model || BDX_DE == cpu_family_model || BDX == cpu_family_model || SKX == cpu_family_model) && llc_lookup_tid_filter != 0) *pmu.filter[0] = llc_lookup_tid_filter; PCM::program(pmu, events, events + ServerUncoreCounterState::maxCounters, UNC_PMON_UNIT_CTL_FRZ_EN); @@ -10437,7 +10483,7 @@ bool PCM::supportIDXAccelDev() const { bool retval = false; - switch (this->getCPUModel()) + switch (this->getCPUFamilyModel()) { case PCM::SPR: case PCM::EMR: @@ -10522,7 +10568,7 @@ void PCM::initLLCReadMissLatencyEvents(uint64 * events, uint32 & opCode) return; } uint64 umask = 3ULL; // MISS_OPCODE - switch (cpu_model) + switch (cpu_family_model) { case ICX: case SPR: @@ -10535,7 +10581,7 @@ void PCM::initLLCReadMissLatencyEvents(uint64 * events, uint32 & opCode) } uint64 umask_ext = 0; - switch (cpu_model) + switch (cpu_family_model) { case ICX: umask_ext = 0xC817FE; @@ -10552,7 +10598,7 @@ void PCM::initLLCReadMissLatencyEvents(uint64 * events, uint32 & opCode) events[EventPosition::TOR_OCCUPANCY] = CBO_MSR_PMON_CTL_EVENT(0x36) + all_umasks; // TOR_OCCUPANCY (must be on counter 0) events[EventPosition::TOR_INSERTS] = CBO_MSR_PMON_CTL_EVENT(0x35) + all_umasks; // TOR_INSERTS - opCode = (SKX == cpu_model) ? 0x202 : 0x182; + opCode = (SKX == cpu_family_model) ? 0x202 : 0x182; } void PCM::programCbo() @@ -10623,7 +10669,7 @@ UncorePMU::UncorePMU(const HWRegisterPtr& unitControl_, const HWRegisterPtr& filter0, const HWRegisterPtr& filter1 ) : - cpu_model_(0), + cpu_family_model_(0), unitControl(unitControl_), counterControl{ counterControl0, counterControl1, counterControl2, counterControl3 }, counterValue{ counterValue0, counterValue1, counterValue2, counterValue3 }, @@ -10642,7 +10688,7 @@ UncorePMU::UncorePMU(const HWRegisterPtr& unitControl_, const HWRegisterPtr& filter0, const HWRegisterPtr& filter1 ): - cpu_model_(0), + cpu_family_model_(0), unitControl(unitControl_), counterControl{counterControl_}, counterValue{counterValue_}, @@ -10653,13 +10699,13 @@ UncorePMU::UncorePMU(const HWRegisterPtr& unitControl_, assert(counterControl.size() == counterValue.size()); } -uint32 UncorePMU::getCPUModel() +uint32 UncorePMU::getCPUFamilyModel() { - if (cpu_model_ == 0) + if (cpu_family_model_ == 0) { - cpu_model_ = PCM::getInstance()->getCPUModel(); + cpu_family_model_ = PCM::getInstance()->getCPUFamilyModel(); } - return cpu_model_; + return cpu_family_model_; } void UncorePMU::cleanup() @@ -10674,7 +10720,7 @@ void UncorePMU::cleanup() void UncorePMU::freeze(const uint32 extra) { - switch (getCPUModel()) + switch (getCPUFamilyModel()) { case PCM::SPR: case PCM::EMR: @@ -10689,7 +10735,7 @@ void UncorePMU::freeze(const uint32 extra) void UncorePMU::unfreeze(const uint32 extra) { - switch (getCPUModel()) + switch (getCPUFamilyModel()) { case PCM::SPR: case PCM::EMR: @@ -10709,7 +10755,7 @@ bool UncorePMU::initFreeze(const uint32 extra, const char* xPICheckMsg) return true; // this PMU does not have unit control register => no op } - switch (getCPUModel()) + switch (getCPUFamilyModel()) { case PCM::SPR: case PCM::EMR: @@ -10748,7 +10794,7 @@ bool UncorePMU::initFreeze(const uint32 extra, const char* xPICheckMsg) void UncorePMU::resetUnfreeze(const uint32 extra) { - switch (getCPUModel()) + switch (getCPUFamilyModel()) { case PCM::SPR: case PCM::EMR: @@ -10779,7 +10825,7 @@ IDX_PMU::IDX_PMU(const bool perfMode_, const std::vector & counterFilterPGSZ, const std::vector & counterFilterXFERSZ ) : - cpu_model_(0), + cpu_family_model_(0), perf_mode_(perfMode_), numa_node_(numaNode_), socket_id_(socketId_), @@ -10797,14 +10843,14 @@ IDX_PMU::IDX_PMU(const bool perfMode_, assert(counterControl.size() == counterValue.size()); } -uint32 IDX_PMU::getCPUModel() +uint32 IDX_PMU::getCPUFamilyModel() { - if (cpu_model_ == 0) + if (cpu_family_model_ == 0) { - cpu_model_ = PCM::getInstance()->getCPUModel(); + cpu_family_model_ = PCM::getInstance()->getCPUFamilyModel(); } - return cpu_model_; + return cpu_family_model_; } void IDX_PMU::cleanup() @@ -10896,7 +10942,7 @@ void PCM::getIIOCounterStates(int socket, int IIOStack, IIOCounterState * result void PCM::setupCustomCoreEventsForNuma(PCM::ExtendedCustomCoreEventDescription& conf) const { - switch (this->getCPUModel()) + switch (this->getCPUFamilyModel()) { case PCM::WESTMERE_EX: // OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_DRAM: Offcore requests satisfied by the local DRAM diff --git a/src/cpucounters.h b/src/cpucounters.h index 8a02e69f..6bb7e6db 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -244,8 +244,8 @@ class CounterWidthExtenderRegister : public HWRegister class UncorePMU { typedef std::shared_ptr HWRegisterPtr; - uint32 cpu_model_; - uint32 getCPUModel(); + uint32 cpu_family_model_; + uint32 getCPUFamilyModel(); HWRegisterPtr unitControl; public: std::vector counterControl; @@ -279,7 +279,7 @@ class UncorePMU const HWRegisterPtr& filter0 = HWRegisterPtr(), const HWRegisterPtr& filter1 = HWRegisterPtr() ); - UncorePMU() : cpu_model_(0U) {} + UncorePMU() : cpu_family_model_(0U) {} size_t size() const { return counterControl.size(); } virtual ~UncorePMU() {} bool valid() const @@ -298,8 +298,8 @@ typedef std::shared_ptr UncorePMURef; class IDX_PMU { typedef std::shared_ptr HWRegisterPtr; - uint32 cpu_model_; - uint32 getCPUModel(); + uint32 cpu_family_model_; + uint32 getCPUFamilyModel(); bool perf_mode_; uint32 numa_node_; uint32 socket_id_; @@ -330,7 +330,7 @@ class IDX_PMU const std::vector & counterFilterXFERSZ ); - IDX_PMU() : cpu_model_(0U), perf_mode_(false), numa_node_(0), socket_id_(0) {} + IDX_PMU() : cpu_family_model_(0U), perf_mode_(false), numa_node_(0), socket_id_(0) {} size_t size() const { return counterControl.size(); } virtual ~IDX_PMU() {} bool valid() const @@ -361,7 +361,7 @@ class ServerUncorePMUs friend class PCM; int32 iMCbus,UPIbus,M2Mbus; uint32 groupnr; - int32 cpu_model; + int32 cpu_family_model; typedef std::vector UncorePMUVector; UncorePMUVector imcPMUs; UncorePMUVector edcPMUs; @@ -607,7 +607,8 @@ class PCM_API PCM PCM & operator = (const PCM &) = delete; int32 cpu_family; - int32 cpu_model; + int32 cpu_model_private; + int32 cpu_family_model; bool hybrid = false; int32 cpu_stepping; int64 cpu_microcode_level; @@ -1238,13 +1239,13 @@ class PCM_API PCM { if (!eventsBegin) return; Iterator curEvent = eventsBegin; - const auto cpu_model = PCM::getInstance()->getCPUModel(); + const auto cpu_family_model = PCM::getInstance()->getCPUFamilyModel(); for (int c = 0; curEvent != eventsEnd && size_t(c) < pmu.size(); ++c, ++curEvent) { auto ctrl = pmu.counterControl[c]; if (ctrl.get() != nullptr) { - switch (cpu_model) + switch (cpu_family_model) { case SPR: case EMR: @@ -1271,17 +1272,17 @@ class PCM_API PCM bool isCLX() const // Cascade Lake-SP { - return (PCM::SKX == cpu_model) && (cpu_stepping > 4 && cpu_stepping < 8); + return (PCM::SKX == cpu_family_model) && (cpu_stepping > 4 && cpu_stepping < 8); } - static bool isCPX(int cpu_model_, int cpu_stepping_) // Cooper Lake + static bool isCPX(int cpu_family_model_, int cpu_stepping_) // Cooper Lake { - return (PCM::SKX == cpu_model_) && (cpu_stepping_ >= 10); + return (PCM::SKX == cpu_family_model_) && (cpu_stepping_ >= 10); } bool isCPX() const { - return isCPX(cpu_model, cpu_stepping); + return isCPX(cpu_family_model, cpu_stepping); } void initUncorePMUsDirect(); @@ -1303,10 +1304,10 @@ class PCM_API PCM { return isHWTMAL1Supported() && ( - SPR == cpu_model - || EMR == cpu_model - || GNR == cpu_model - || GNR_D == cpu_model + SPR == cpu_family_model + || EMR == cpu_family_model + || GNR == cpu_family_model + || GNR_D == cpu_family_model ); } @@ -1631,7 +1632,7 @@ class PCM_API PCM assert (coreID < topology.size()); if (hybrid) { - switch (cpu_model) + switch (cpu_family_model) { case ADL: case RPL: @@ -1645,7 +1646,7 @@ class PCM_API PCM } } bool useGLCOCREvent = false; - switch (cpu_model) + switch (cpu_family_model) { case SPR: case EMR: @@ -1829,67 +1830,78 @@ class PCM_API PCM /*! \brief Returns cpu model id number from cpuid instruction */ + /* static int getCPUModelFromCPUID(); + */ + + /*! \brief Returns cpu family and model id number from cpuid instruction + * \return cpu family and model id number (model id is in the lower 8 bits, family id is in the next 8 bits) + */ + static int getCPUFamilyModelFromCPUID(); + + #define PCM_CPU_FAMILY_MODEL(family_, model_) (((family_) << 8) + (model_)) //! \brief Identifiers of supported CPU models enum SupportedCPUModels { - NEHALEM_EP = 26, - NEHALEM = 30, - ATOM = 28, - ATOM_2 = 53, - CENTERTON = 54, - BAYTRAIL = 55, - AVOTON = 77, - CHERRYTRAIL = 76, - APOLLO_LAKE = 92, - GEMINI_LAKE = 122, - DENVERTON = 95, - SNOWRIDGE = 134, - CLARKDALE = 37, - WESTMERE_EP = 44, - NEHALEM_EX = 46, - WESTMERE_EX = 47, - SANDY_BRIDGE = 42, - JAKETOWN = 45, - IVY_BRIDGE = 58, - HASWELL = 60, - HASWELL_ULT = 69, - HASWELL_2 = 70, - IVYTOWN = 62, - HASWELLX = 63, - BROADWELL = 61, - BROADWELL_XEON_E3 = 71, - BDX_DE = 86, - SKL_UY = 78, - KBL = 158, - KBL_1 = 142, - CML = 166, - CML_1 = 165, - ICL = 126, - ICL_1 = 125, - RKL = 167, - TGL = 140, - TGL_1 = 141, - ADL = 151, - ADL_1 = 154, - RPL = 0xb7, - RPL_1 = 0xba, - RPL_2 = 0xbf, - RPL_3 = 0xbe, - MTL = 0xAA, - LNL = 0xBD, - BDX = 79, - KNL = 87, - SKL = 94, - SKX = 85, - ICX_D = 108, - ICX = 106, - SPR = 143, - EMR = 207, - GNR = 173, - SRF = 175, - GNR_D = 174, + NEHALEM_EP = PCM_CPU_FAMILY_MODEL(6, 26), + NEHALEM = PCM_CPU_FAMILY_MODEL(6, 30), + ATOM = PCM_CPU_FAMILY_MODEL(6, 28), + ATOM_2 = PCM_CPU_FAMILY_MODEL(6, 53), + CENTERTON = PCM_CPU_FAMILY_MODEL(6, 54), + BAYTRAIL = PCM_CPU_FAMILY_MODEL(6, 55), + AVOTON = PCM_CPU_FAMILY_MODEL(6, 77), + CHERRYTRAIL = PCM_CPU_FAMILY_MODEL(6, 76), + APOLLO_LAKE = PCM_CPU_FAMILY_MODEL(6, 92), + GEMINI_LAKE = PCM_CPU_FAMILY_MODEL(6, 122), + DENVERTON = PCM_CPU_FAMILY_MODEL(6, 95), + SNOWRIDGE = PCM_CPU_FAMILY_MODEL(6, 134), + ELKHART_LAKE = PCM_CPU_FAMILY_MODEL(6, 150), + JASPER_LAKE = PCM_CPU_FAMILY_MODEL(6, 156), + CLARKDALE = PCM_CPU_FAMILY_MODEL(6, 37), + WESTMERE_EP = PCM_CPU_FAMILY_MODEL(6, 44), + NEHALEM_EX = PCM_CPU_FAMILY_MODEL(6, 46), + WESTMERE_EX = PCM_CPU_FAMILY_MODEL(6, 47), + SANDY_BRIDGE = PCM_CPU_FAMILY_MODEL(6, 42), + JAKETOWN = PCM_CPU_FAMILY_MODEL(6, 45), + IVY_BRIDGE = PCM_CPU_FAMILY_MODEL(6, 58), + HASWELL = PCM_CPU_FAMILY_MODEL(6, 60), + HASWELL_ULT = PCM_CPU_FAMILY_MODEL(6, 69), + HASWELL_2 = PCM_CPU_FAMILY_MODEL(6, 70), + IVYTOWN = PCM_CPU_FAMILY_MODEL(6, 62), + HASWELLX = PCM_CPU_FAMILY_MODEL(6, 63), + BROADWELL = PCM_CPU_FAMILY_MODEL(6, 61), + BROADWELL_XEON_E3 = PCM_CPU_FAMILY_MODEL(6, 71), + BDX_DE = PCM_CPU_FAMILY_MODEL(6, 86), + SKL_UY = PCM_CPU_FAMILY_MODEL(6, 78), + KBL = PCM_CPU_FAMILY_MODEL(6, 158), + KBL_1 = PCM_CPU_FAMILY_MODEL(6, 142), + CML = PCM_CPU_FAMILY_MODEL(6, 166), + CML_1 = PCM_CPU_FAMILY_MODEL(6, 165), + ICL = PCM_CPU_FAMILY_MODEL(6, 126), + ICL_1 = PCM_CPU_FAMILY_MODEL(6, 125), + RKL = PCM_CPU_FAMILY_MODEL(6, 167), + TGL = PCM_CPU_FAMILY_MODEL(6, 140), + TGL_1 = PCM_CPU_FAMILY_MODEL(6, 141), + ADL = PCM_CPU_FAMILY_MODEL(6, 151), + ADL_1 = PCM_CPU_FAMILY_MODEL(6, 154), + RPL = PCM_CPU_FAMILY_MODEL(6, 0xb7), + RPL_1 = PCM_CPU_FAMILY_MODEL(6, 0xba), + RPL_2 = PCM_CPU_FAMILY_MODEL(6, 0xbf), + RPL_3 = PCM_CPU_FAMILY_MODEL(6, 0xbe), + MTL = PCM_CPU_FAMILY_MODEL(6, 0xAA), + LNL = PCM_CPU_FAMILY_MODEL(6, 0xBD), + BDX = PCM_CPU_FAMILY_MODEL(6, 79), + KNL = PCM_CPU_FAMILY_MODEL(6, 87), + SKL = PCM_CPU_FAMILY_MODEL(6, 94), + SKX = PCM_CPU_FAMILY_MODEL(6, 85), + ICX_D = PCM_CPU_FAMILY_MODEL(6, 108), + ICX = PCM_CPU_FAMILY_MODEL(6, 106), + SPR = PCM_CPU_FAMILY_MODEL(6, 143), + EMR = PCM_CPU_FAMILY_MODEL(6, 207), + GNR = PCM_CPU_FAMILY_MODEL(6, 173), + SRF = PCM_CPU_FAMILY_MODEL(6, 175), + GNR_D = PCM_CPU_FAMILY_MODEL(6, 174), END_OF_MODEL_LIST = 0x0ffff }; @@ -1906,7 +1918,7 @@ class PCM_API PCM private: bool useSKLPath() const { - switch (cpu_model) + switch (cpu_family_model) { PCM_SKL_PATH_CASES return true; @@ -1920,9 +1932,13 @@ class PCM_API PCM //! \return CPU family uint32 getCPUFamily() const { return (uint32)cpu_family; } - //! \brief Reads CPU model id - //! \return CPU model ID - uint32 getCPUModel() const { return (uint32)cpu_model; } + //! \brief Reads CPU model id (use only with the family API together, don't always assume family 6) + //! \return Internal CPU model ID + uint32 getInternalCPUModel() const { return (uint32)cpu_model_private; } + + //! \brief Reads CPU family and model id + //! \return CPU family and model ID (lowest 8 bits is the model, next 8 bits is the family) + uint32 getCPUFamilyModel() const { return cpu_family_model; } //! \brief Reads CPU stepping id //! \return CPU stepping ID @@ -1962,7 +1978,7 @@ class PCM_API PCM //! \return number of QPI links per socket uint64 getQPILinksPerSocket() const { - switch (cpu_model) + switch (cpu_family_model) { case NEHALEM_EP: case WESTMERE_EP: @@ -1993,7 +2009,7 @@ class PCM_API PCM //! \brief Returns the number of detected integrated memory controllers per socket uint32 getMCPerSocket() const { - switch (cpu_model) + switch (cpu_family_model) { case NEHALEM_EP: case WESTMERE_EP: @@ -2022,7 +2038,7 @@ class PCM_API PCM //! \brief Returns the total number of detected memory channels on all integrated memory controllers per socket size_t getMCChannelsPerSocket() const { - switch (cpu_model) + switch (cpu_family_model) { case NEHALEM_EP: case WESTMERE_EP: @@ -2054,7 +2070,7 @@ class PCM_API PCM //! \param controller controller size_t getMCChannels(uint32 socket, uint32 controller) const { - switch (cpu_model) + switch (cpu_family_model) { case NEHALEM_EP: case WESTMERE_EP: @@ -2085,7 +2101,7 @@ class PCM_API PCM //! \brief Returns the total number of detected memory channels on all integrated memory controllers per socket size_t getEDCChannelsPerSocket() const { - switch (cpu_model) + switch (cpu_family_model) { case KNL: return (serverUncorePMUs.size() && serverUncorePMUs[0].get()) ? (serverUncorePMUs[0]->getNumEDCChannels()) : 0; @@ -2098,8 +2114,8 @@ class PCM_API PCM //! \return max number of instructions per cycle uint32 getMaxIPC() const { - if (ICL == cpu_model || TGL == cpu_model || RKL == cpu_model) return 5; - switch (cpu_model) + if (ICL == cpu_family_model || TGL == cpu_family_model || RKL == cpu_family_model) return 5; + switch (cpu_family_model) { case ADL: case RPL: @@ -2108,6 +2124,8 @@ class PCM_API PCM case LNL: return 12; case SNOWRIDGE: + case ELKHART_LAKE: + case JASPER_LAKE: return 4; case DENVERTON: return 3; @@ -2142,7 +2160,7 @@ class PCM_API PCM { return 2; } - std::cerr << "MaxIPC is not defined for your cpu model " << cpu_model << '\n'; + std::cerr << "MaxIPC is not defined for your cpu family " << cpu_family << " model " << cpu_model_private << '\n'; assert (0); return 0; } @@ -2150,7 +2168,7 @@ class PCM_API PCM //! \brief Returns the frequency of Power Control Unit uint64 getPCUFrequency() const { - switch (cpu_model) + switch (cpu_family_model) { case JAKETOWN: case IVYTOWN: @@ -2171,7 +2189,7 @@ class PCM_API PCM //! \brief Returns whether it is a server part bool isServerCPU() const { - switch (cpu_model) + switch (cpu_family_model) { case NEHALEM_EP: case NEHALEM_EX: @@ -2361,8 +2379,8 @@ class PCM_API PCM uint64 extractQOSMonitoring(uint64 val); //! \brief Get a string describing the codename of the processor microarchitecture - //! \param cpu_model_ cpu model (if no parameter provided the codename of the detected CPU is returned) - const char * getUArchCodename(const int32 cpu_model_ = -1) const; + //! \param cpu_family_model_ cpu model (if no parameter provided the codename of the detected CPU is returned) + const char * getUArchCodename(const int32 cpu_family_model_ = -1) const; //! \brief Get Brand string of processor static std::string getCPUBrandString(); @@ -2385,31 +2403,31 @@ class PCM_API PCM int64 getCPUMicrocodeLevel() const { return cpu_microcode_level; } //! \brief returns true if CPU model is Atom-based - static bool isAtom(const int32 cpu_model_) - { - return cpu_model_ == ATOM - || cpu_model_ == ATOM_2 - || cpu_model_ == CENTERTON - || cpu_model_ == BAYTRAIL - || cpu_model_ == AVOTON - || cpu_model_ == CHERRYTRAIL - || cpu_model_ == APOLLO_LAKE - || cpu_model_ == GEMINI_LAKE - || cpu_model_ == DENVERTON - // || cpu_model_ == SNOWRIDGE do not use Atom code for SNOWRIDGE + static bool isAtom(const int32 cpu_family_model_) + { + return cpu_family_model_ == ATOM + || cpu_family_model_ == ATOM_2 + || cpu_family_model_ == CENTERTON + || cpu_family_model_ == BAYTRAIL + || cpu_family_model_ == AVOTON + || cpu_family_model_ == CHERRYTRAIL + || cpu_family_model_ == APOLLO_LAKE + || cpu_family_model_ == GEMINI_LAKE + || cpu_family_model_ == DENVERTON + // || cpu_family_model_ == SNOWRIDGE do not use Atom code for SNOWRIDGE ; } //! \brief returns true if CPU is Atom-based bool isAtom() const { - return isAtom(cpu_model); + return isAtom(cpu_family_model); } // From commit message: https://github.com/torvalds/linux/commit/e979121b1b1556e184492e6fc149bbe188fc83e6 bool memoryEventErrata() const { - switch (cpu_model) + switch (cpu_family_model) { case SANDY_BRIDGE: case JAKETOWN: @@ -2425,52 +2443,54 @@ class PCM_API PCM bool packageEnergyMetricsAvailable() const { return ( - cpu_model == PCM::JAKETOWN - || cpu_model == PCM::IVYTOWN - || cpu_model == PCM::SANDY_BRIDGE - || cpu_model == PCM::IVY_BRIDGE - || cpu_model == PCM::HASWELL - || cpu_model == PCM::AVOTON - || cpu_model == PCM::CHERRYTRAIL - || cpu_model == PCM::BAYTRAIL - || cpu_model == PCM::APOLLO_LAKE - || cpu_model == PCM::GEMINI_LAKE - || cpu_model == PCM::DENVERTON - || cpu_model == PCM::SNOWRIDGE - || cpu_model == PCM::HASWELLX - || cpu_model == PCM::BROADWELL - || cpu_model == PCM::BDX_DE - || cpu_model == PCM::BDX - || cpu_model == PCM::KNL + cpu_family_model == PCM::JAKETOWN + || cpu_family_model == PCM::IVYTOWN + || cpu_family_model == PCM::SANDY_BRIDGE + || cpu_family_model == PCM::IVY_BRIDGE + || cpu_family_model == PCM::HASWELL + || cpu_family_model == PCM::AVOTON + || cpu_family_model == PCM::CHERRYTRAIL + || cpu_family_model == PCM::BAYTRAIL + || cpu_family_model == PCM::APOLLO_LAKE + || cpu_family_model == PCM::GEMINI_LAKE + || cpu_family_model == PCM::DENVERTON + || cpu_family_model == PCM::SNOWRIDGE + || cpu_family_model == PCM::ELKHART_LAKE + || cpu_family_model == PCM::JASPER_LAKE + || cpu_family_model == PCM::HASWELLX + || cpu_family_model == PCM::BROADWELL + || cpu_family_model == PCM::BDX_DE + || cpu_family_model == PCM::BDX + || cpu_family_model == PCM::KNL || useSKLPath() - || cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::ADL - || cpu_model == PCM::RPL - || cpu_model == PCM::MTL - || cpu_model == PCM::LNL - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR - || cpu_model == PCM::GNR - || cpu_model == PCM::SRF + || cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::ADL + || cpu_family_model == PCM::RPL + || cpu_family_model == PCM::MTL + || cpu_family_model == PCM::LNL + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::GNR + || cpu_family_model == PCM::SRF ); } bool dramEnergyMetricsAvailable() const { return ( - cpu_model == PCM::JAKETOWN - || cpu_model == PCM::IVYTOWN - || cpu_model == PCM::HASWELLX - || cpu_model == PCM::BDX_DE - || cpu_model == PCM::BDX - || cpu_model == PCM::KNL - || cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR - || cpu_model == PCM::GNR - || cpu_model == PCM::SRF + cpu_family_model == PCM::JAKETOWN + || cpu_family_model == PCM::IVYTOWN + || cpu_family_model == PCM::HASWELLX + || cpu_family_model == PCM::BDX_DE + || cpu_family_model == PCM::BDX + || cpu_family_model == PCM::KNL + || cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::GNR + || cpu_family_model == PCM::SRF ); } @@ -2483,18 +2503,18 @@ class PCM_API PCM { return getQPILinksPerSocket() > 0 && ( - cpu_model == PCM::NEHALEM_EX - || cpu_model == PCM::WESTMERE_EX - || cpu_model == PCM::JAKETOWN - || cpu_model == PCM::IVYTOWN - || cpu_model == PCM::HASWELLX - || cpu_model == PCM::BDX - || cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR - || cpu_model == PCM::GNR - || cpu_model == PCM::SRF + cpu_family_model == PCM::NEHALEM_EX + || cpu_family_model == PCM::WESTMERE_EX + || cpu_family_model == PCM::JAKETOWN + || cpu_family_model == PCM::IVYTOWN + || cpu_family_model == PCM::HASWELLX + || cpu_family_model == PCM::BDX + || cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::GNR + || cpu_family_model == PCM::SRF ); } @@ -2502,29 +2522,29 @@ class PCM_API PCM { return getQPILinksPerSocket() > 0 && ( - cpu_model == PCM::NEHALEM_EX - || cpu_model == PCM::WESTMERE_EX - || cpu_model == PCM::JAKETOWN - || cpu_model == PCM::IVYTOWN - || (cpu_model == PCM::SKX && cpu_stepping > 1) - || cpu_model == PCM::ICX - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR - || cpu_model == PCM::GNR - || cpu_model == PCM::SRF + cpu_family_model == PCM::NEHALEM_EX + || cpu_family_model == PCM::WESTMERE_EX + || cpu_family_model == PCM::JAKETOWN + || cpu_family_model == PCM::IVYTOWN + || (cpu_family_model == PCM::SKX && cpu_stepping > 1) + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::GNR + || cpu_family_model == PCM::SRF ); } bool localMemoryRequestRatioMetricAvailable() const { - return cpu_model == PCM::HASWELLX - || cpu_model == PCM::BDX - || cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR - || cpu_model == PCM::SRF - || cpu_model == PCM::GNR + return cpu_family_model == PCM::HASWELLX + || cpu_family_model == PCM::BDX + || cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::SRF + || cpu_family_model == PCM::GNR ; } @@ -2536,14 +2556,14 @@ class PCM_API PCM bool nearMemoryMetricsAvailable() const { return ( - cpu_model == PCM::SRF - || cpu_model == PCM::GNR + cpu_family_model == PCM::SRF + || cpu_family_model == PCM::GNR ); } bool memoryTrafficMetricsAvailable() const { - return (!(isAtom() || cpu_model == PCM::CLARKDALE)) + return (!(isAtom() || cpu_family_model == PCM::CLARKDALE)) ; } @@ -2554,17 +2574,17 @@ class PCM_API PCM size_t getHBMCASTransferSize() const { - return (SPR == cpu_model) ? 32ULL : 64ULL; + return (SPR == cpu_family_model) ? 32ULL : 64ULL; } bool memoryIOTrafficMetricAvailable() const { - if (cpu_model == TGL) return false; + if (cpu_family_model == TGL) return false; return ( - cpu_model == PCM::SANDY_BRIDGE - || cpu_model == PCM::IVY_BRIDGE - || cpu_model == PCM::HASWELL - || cpu_model == PCM::BROADWELL + cpu_family_model == PCM::SANDY_BRIDGE + || cpu_family_model == PCM::IVY_BRIDGE + || cpu_family_model == PCM::HASWELL + || cpu_family_model == PCM::BROADWELL || useSKLPath() ); } @@ -2572,13 +2592,13 @@ class PCM_API PCM bool IIOEventsAvailable() const { return ( - cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::SNOWRIDGE - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR - || cpu_model == PCM::SRF - || cpu_model == PCM::GNR + cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SNOWRIDGE + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::SRF + || cpu_family_model == PCM::GNR ); } @@ -2587,20 +2607,20 @@ class PCM_API PCM return MSR.empty() == false && getMaxNumOfUncorePMUs(UBOX_PMU_ID) > 0ULL && getNumCores() == getNumOnlineCores() - && PCM::GNR != cpu_model - && PCM::SRF != cpu_model + && PCM::GNR != cpu_family_model + && PCM::SRF != cpu_family_model ; } bool LatencyMetricsAvailable() const { return ( - cpu_model == PCM::HASWELLX - || cpu_model == PCM::BDX - || cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR + cpu_family_model == PCM::HASWELLX + || cpu_family_model == PCM::BDX + || cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR || useSKLPath() ); } @@ -2608,10 +2628,10 @@ class PCM_API PCM bool DDRLatencyMetricsAvailable() const { return ( - cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR + cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR ); } @@ -2620,10 +2640,10 @@ class PCM_API PCM return ( isCLX() || isCPX() - || cpu_model == PCM::ICX - || cpu_model == PCM::SNOWRIDGE - || cpu_model == SPR - || cpu_model == EMR + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SNOWRIDGE + || cpu_family_model == SPR + || cpu_family_model == EMR ); } @@ -2632,8 +2652,8 @@ class PCM_API PCM return ( isCLX() || isCPX() - || cpu_model == PCM::ICX - || cpu_model == PCM::SNOWRIDGE + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SNOWRIDGE ); } @@ -2645,70 +2665,70 @@ class PCM_API PCM bool LLCReadMissLatencyMetricsAvailable() const { return ( - HASWELLX == cpu_model - || BDX_DE == cpu_model - || BDX == cpu_model + HASWELLX == cpu_family_model + || BDX_DE == cpu_family_model + || BDX == cpu_family_model || isCLX() || isCPX() #ifdef PCM_ENABLE_LLCRDLAT_SKX_MP - || SKX == cpu_model + || SKX == cpu_family_model #else - || ((SKX == cpu_model) && (num_sockets == 1)) + || ((SKX == cpu_family_model) && (num_sockets == 1)) #endif - || ICX == cpu_model - || SPR == cpu_model - || SNOWRIDGE == cpu_model + || ICX == cpu_family_model + || SPR == cpu_family_model + || SNOWRIDGE == cpu_family_model ); } bool hasBecktonUncore() const { return ( - cpu_model == PCM::NEHALEM_EX - || cpu_model == PCM::WESTMERE_EX + cpu_family_model == PCM::NEHALEM_EX + || cpu_family_model == PCM::WESTMERE_EX ); } bool hasPCICFGUncore() const // has PCICFG uncore PMON { return ( - cpu_model == PCM::JAKETOWN - || cpu_model == PCM::SNOWRIDGE - || cpu_model == PCM::IVYTOWN - || cpu_model == PCM::HASWELLX - || cpu_model == PCM::BDX_DE - || cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR - || cpu_model == PCM::GNR - || cpu_model == PCM::SRF - || cpu_model == PCM::BDX - || cpu_model == PCM::KNL + cpu_family_model == PCM::JAKETOWN + || cpu_family_model == PCM::SNOWRIDGE + || cpu_family_model == PCM::IVYTOWN + || cpu_family_model == PCM::HASWELLX + || cpu_family_model == PCM::BDX_DE + || cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::GNR + || cpu_family_model == PCM::SRF + || cpu_family_model == PCM::BDX + || cpu_family_model == PCM::KNL ); } bool isSkxCompatible() const { return ( - cpu_model == PCM::SKX + cpu_family_model == PCM::SKX ); } - static bool hasUPI(const int32 cpu_model_) // Intel(r) Ultra Path Interconnect + static bool hasUPI(const int32 cpu_family_model_) // Intel(r) Ultra Path Interconnect { return ( - cpu_model_ == PCM::SKX - || cpu_model_ == PCM::ICX - || cpu_model_ == PCM::SPR - || cpu_model_ == PCM::EMR - || cpu_model_ == PCM::GNR - || cpu_model_ == PCM::SRF + cpu_family_model_ == PCM::SKX + || cpu_family_model_ == PCM::ICX + || cpu_family_model_ == PCM::SPR + || cpu_family_model_ == PCM::EMR + || cpu_family_model_ == PCM::GNR + || cpu_family_model_ == PCM::SRF ); } bool hasUPI() const { - return hasUPI(cpu_model); + return hasUPI(cpu_family_model); } const char * xPI() const @@ -2722,12 +2742,12 @@ class PCM_API PCM bool hasCHA() const { return ( - cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR - || cpu_model == PCM::GNR - || cpu_model == PCM::SRF + cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::GNR + || cpu_family_model == PCM::SRF ); } @@ -2738,24 +2758,24 @@ class PCM_API PCM bool useSkylakeEvents() const { return useSKLPath() - || PCM::SKX == cpu_model - || PCM::ICX == cpu_model - || PCM::SPR == cpu_model - || PCM::EMR == cpu_model - || PCM::GNR == cpu_model + || PCM::SKX == cpu_family_model + || PCM::ICX == cpu_family_model + || PCM::SPR == cpu_family_model + || PCM::EMR == cpu_family_model + || PCM::GNR == cpu_family_model ; } bool hasClientMCCounters() const { - return cpu_model == SANDY_BRIDGE - || cpu_model == IVY_BRIDGE - || cpu_model == HASWELL - || cpu_model == BROADWELL - || cpu_model == ADL - || cpu_model == RPL - || cpu_model == MTL - || cpu_model == LNL + return cpu_family_model == SANDY_BRIDGE + || cpu_family_model == IVY_BRIDGE + || cpu_family_model == HASWELL + || cpu_family_model == BROADWELL + || cpu_family_model == ADL + || cpu_family_model == RPL + || cpu_family_model == MTL + || cpu_family_model == LNL || useSKLPath() ; } @@ -2765,9 +2785,9 @@ class PCM_API PCM return packageEnergyMetricsAvailable() && hasClientMCCounters() && num_sockets == 1; } - static double getBytesPerFlit(int32 cpu_model_) + static double getBytesPerFlit(int32 cpu_family_model_) { - if (hasUPI(cpu_model_)) + if (hasUPI(cpu_family_model_)) { // 172 bits per UPI flit return 172./8.; @@ -2778,12 +2798,12 @@ class PCM_API PCM double getBytesPerFlit() const { - return getBytesPerFlit(cpu_model); + return getBytesPerFlit(cpu_family_model); } - static double getDataBytesPerFlit(int32 cpu_model_) + static double getDataBytesPerFlit(const int32 cpu_family_model_) { - if (hasUPI(cpu_model_)) + if (hasUPI(cpu_family_model_)) { // 9 UPI flits to transfer 64 bytes return 64./9.; @@ -2794,12 +2814,12 @@ class PCM_API PCM double getDataBytesPerFlit() const { - return getDataBytesPerFlit(cpu_model); + return getDataBytesPerFlit(cpu_family_model); } - static double getFlitsPerLinkCycle(int32 cpu_model_) + static double getFlitsPerLinkCycle(const int32 cpu_family_model_) { - if (hasUPI(cpu_model_)) + if (hasUPI(cpu_family_model_)) { // 5 UPI flits sent every 6 link cycles return 5./6.; @@ -2807,14 +2827,14 @@ class PCM_API PCM return 2.; } - static double getBytesPerLinkCycle(int32 cpu_model_) + static double getBytesPerLinkCycle(const int32 cpu_family_model_) { - return getBytesPerFlit(cpu_model_) * getFlitsPerLinkCycle(cpu_model_); + return getBytesPerFlit(cpu_family_model_) * getFlitsPerLinkCycle(cpu_family_model_); } double getBytesPerLinkCycle() const { - return getBytesPerLinkCycle(cpu_model); + return getBytesPerLinkCycle(cpu_family_model); } double getLinkTransfersPerLinkCycle() const @@ -3126,8 +3146,8 @@ template uint64 getDRAMClocks(uint32 channel, const CounterStateType & before, const CounterStateType & after) { const auto clk = after.DRAMClocks[channel] - before.DRAMClocks[channel]; - const auto cpu_model = PCM::getInstance()->getCPUModel(); - if (cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE) + const auto cpu_family_model = PCM::getInstance()->getCPUFamilyModel(); + if (cpu_family_model == PCM::ICX || cpu_family_model == PCM::SNOWRIDGE) { return 2 * clk; } @@ -3405,16 +3425,16 @@ double getDRAMConsumedJoules(const CounterStateType & before, const CounterState PCM * m = PCM::getInstance(); if (!m) return -1.; double dram_joules_per_energy_unit = 0.; - const auto cpu_model = m->getCPUModel(); - - if (PCM::HASWELLX == cpu_model - || PCM::BDX_DE == cpu_model - || PCM::BDX == cpu_model - || PCM::SKX == cpu_model - || PCM::ICX == cpu_model - || PCM::GNR == cpu_model - || PCM::SRF == cpu_model - || PCM::KNL == cpu_model + const auto cpu_family_model = m->getCPUFamilyModel(); + + if (PCM::HASWELLX == cpu_family_model + || PCM::BDX_DE == cpu_family_model + || PCM::BDX == cpu_family_model + || PCM::SKX == cpu_family_model + || PCM::ICX == cpu_family_model + || PCM::GNR == cpu_family_model + || PCM::SRF == cpu_family_model + || PCM::KNL == cpu_family_model ) { /* as described in sections 5.3.2 (DRAM_POWER_INFO) and 5.3.3 (DRAM_ENERGY_STATUS) of * Volume 2 (Registers) of @@ -4253,18 +4273,20 @@ uint64 getL2CacheMisses(const CounterStateType & before, const CounterStateType { auto pcm = PCM::getInstance(); if (pcm->isL2CacheMissesAvailable() == false) return 0ULL; - const auto cpu_model = pcm->getCPUModel(); + const auto cpu_family_model = pcm->getCPUFamilyModel(); if (pcm->useSkylakeEvents() - || cpu_model == PCM::SNOWRIDGE - || cpu_model == PCM::SRF - || cpu_model == PCM::ADL - || cpu_model == PCM::RPL - || cpu_model == PCM::MTL - || cpu_model == PCM::LNL + || cpu_family_model == PCM::SNOWRIDGE + || cpu_family_model == PCM::ELKHART_LAKE + || cpu_family_model == PCM::JASPER_LAKE + || cpu_family_model == PCM::SRF + || cpu_family_model == PCM::ADL + || cpu_family_model == PCM::RPL + || cpu_family_model == PCM::MTL + || cpu_family_model == PCM::LNL ) { return after.Event[BasicCounterState::SKLL2MissPos] - before.Event[BasicCounterState::SKLL2MissPos]; } - else if (pcm->isAtom() || cpu_model == PCM::KNL) + else if (pcm->isAtom() || cpu_family_model == PCM::KNL) { return after.Event[BasicCounterState::ArchLLCMissPos] - before.Event[BasicCounterState::ArchLLCMissPos]; } @@ -4290,7 +4312,7 @@ uint64 getL2CacheHits(const CounterStateType & before, const CounterStateType & { auto pcm = PCM::getInstance(); if (pcm->isL2CacheHitsAvailable() == false) return 0ULL; - if (pcm->isAtom() || pcm->getCPUModel() == PCM::KNL) + if (pcm->isAtom() || pcm->getCPUFamilyModel() == PCM::KNL) { uint64 L2Miss = after.Event[BasicCounterState::ArchLLCMissPos] - before.Event[BasicCounterState::ArchLLCMissPos]; uint64 L2Ref = after.Event[BasicCounterState::ArchLLCRefPos] - before.Event[BasicCounterState::ArchLLCRefPos]; @@ -4366,13 +4388,15 @@ uint64 getL3CacheHitsSnoop(const CounterStateType & before, const CounterStateTy { auto pcm = PCM::getInstance(); if (!pcm->isL3CacheHitsSnoopAvailable()) return 0; - const auto cpu_model = pcm->getCPUModel(); - if (cpu_model == PCM::SNOWRIDGE - || cpu_model == PCM::SRF - || cpu_model == PCM::ADL - || cpu_model == PCM::RPL - || cpu_model == PCM::MTL - || cpu_model == PCM::LNL + const auto cpu_family_model = pcm->getCPUFamilyModel(); + if (cpu_family_model == PCM::SNOWRIDGE + || cpu_family_model == PCM::ELKHART_LAKE + || cpu_family_model == PCM::JASPER_LAKE + || cpu_family_model == PCM::SRF + || cpu_family_model == PCM::ADL + || cpu_family_model == PCM::RPL + || cpu_family_model == PCM::MTL + || cpu_family_model == PCM::LNL ) { const int64 misses = getL3CacheMisses(before, after); diff --git a/src/opCode-106.txt b/src/opCode-6-106.txt similarity index 100% rename from src/opCode-106.txt rename to src/opCode-6-106.txt diff --git a/src/opCode-134.txt b/src/opCode-6-134.txt similarity index 100% rename from src/opCode-134.txt rename to src/opCode-6-134.txt diff --git a/src/opCode-143-accel.txt b/src/opCode-6-143-accel.txt similarity index 100% rename from src/opCode-143-accel.txt rename to src/opCode-6-143-accel.txt diff --git a/src/opCode-143.txt b/src/opCode-6-143.txt similarity index 100% rename from src/opCode-143.txt rename to src/opCode-6-143.txt diff --git a/src/opCode-173.txt b/src/opCode-6-173.txt similarity index 100% rename from src/opCode-173.txt rename to src/opCode-6-173.txt diff --git a/src/opCode-175.txt b/src/opCode-6-175.txt similarity index 100% rename from src/opCode-175.txt rename to src/opCode-6-175.txt diff --git a/src/opCode-207.txt b/src/opCode-6-207.txt similarity index 100% rename from src/opCode-207.txt rename to src/opCode-6-207.txt diff --git a/src/opCode-85.txt b/src/opCode-6-85.txt similarity index 100% rename from src/opCode-85.txt rename to src/opCode-6-85.txt diff --git a/src/pcm-accel-common.cpp b/src/pcm-accel-common.cpp index dfd3f4ab..4b95798c 100644 --- a/src/pcm-accel-common.cpp +++ b/src/pcm-accel-common.cpp @@ -390,7 +390,7 @@ void AcceleratorCounterState::setEvents(PCM *m,ACCEL_IP accel, std::string spec { if (evtfile==false) //All platform use the spr config file by default. { - ev_file_name = "opCode-143-accel.txt"; + ev_file_name = "opCode-6-143-accel.txt"; } else { diff --git a/src/pcm-iio.cpp b/src/pcm-iio.cpp index d36f5f17..07a01311 100644 --- a/src/pcm-iio.cpp +++ b/src/pcm-iio.cpp @@ -855,7 +855,7 @@ class WhitleyPlatformMapping: public IPlatformMapping10Nm { const std::string * iio_stack_names; public: WhitleyPlatformMapping(int cpu_model, uint32_t sockets_count) : IPlatformMapping10Nm(cpu_model, sockets_count), - icx_d(PCM::getInstance()->getCPUModelFromCPUID() == PCM::ICX_D), + icx_d(PCM::getInstance()->getCPUFamilyModelFromCPUID() == PCM::ICX_D), sad_to_pmu_id_mapping(icx_d ? icx_d_sad_to_pmu_id_mapping : icx_sad_to_pmu_id_mapping), iio_stack_names(icx_d ? icx_d_iio_stack_names : icx_iio_stack_names) { @@ -1687,21 +1687,21 @@ bool BirchStreamPlatform::pciTreeDiscover(std::vector IPlatformMapping::getPlatformMapping(int cpu_model, uint32_t sockets_count) +std::unique_ptr IPlatformMapping::getPlatformMapping(int cpu_family_model, uint32_t sockets_count) { - switch (cpu_model) { + switch (cpu_family_model) { case PCM::SKX: - return std::unique_ptr{new PurleyPlatformMapping(cpu_model, sockets_count)}; + return std::unique_ptr{new PurleyPlatformMapping(cpu_family_model, sockets_count)}; case PCM::ICX: - return std::unique_ptr{new WhitleyPlatformMapping(cpu_model, sockets_count)}; + return std::unique_ptr{new WhitleyPlatformMapping(cpu_family_model, sockets_count)}; case PCM::SNOWRIDGE: - return std::unique_ptr{new JacobsvillePlatformMapping(cpu_model, sockets_count)}; + return std::unique_ptr{new JacobsvillePlatformMapping(cpu_family_model, sockets_count)}; case PCM::SPR: case PCM::EMR: - return std::unique_ptr{new EagleStreamPlatformMapping(cpu_model, sockets_count)}; + return std::unique_ptr{new EagleStreamPlatformMapping(cpu_family_model, sockets_count)}; case PCM::SRF: case PCM::GNR: - return std::unique_ptr{new BirchStreamPlatform(cpu_model, sockets_count)}; + return std::unique_ptr{new BirchStreamPlatform(cpu_family_model, sockets_count)}; default: return nullptr; } @@ -1709,7 +1709,7 @@ std::unique_ptr IPlatformMapping::getPlatformMapping(int cpu_m ccr* get_ccr(PCM* m, uint64_t& ccr) { - switch (m->getCPUModel()) + switch (m->getCPUFamilyModel()) { case PCM::SKX: return new skx_ccr(ccr); @@ -1976,7 +1976,7 @@ int mainThrows(int argc, char * argv[]) PCIDB pciDB; load_PCIDB(pciDB); - auto mapping = IPlatformMapping::getPlatformMapping(m->getCPUModel(), m->getNumSockets()); + auto mapping = IPlatformMapping::getPlatformMapping(m->getCPUFamilyModel(), m->getNumSockets()); if (!mapping) { cerr << "Failed to discover pci tree: unknown platform" << endl; exit(EXIT_FAILURE); @@ -2002,7 +2002,7 @@ int mainThrows(int argc, char * argv[]) string ev_file_name; if (m->IIOEventsAvailable()) { - ev_file_name = "opCode-" + std::to_string(m->getCPUModel()) + ".txt"; + ev_file_name = "opCode-" + std::to_string(m->getCPUFamily()) + "-" + std::to_string(m->getInternalCPUModel()) + ".txt"; } else { diff --git a/src/pcm-lspci.cpp b/src/pcm-lspci.cpp index 6dc716e9..ae026a80 100644 --- a/src/pcm-lspci.cpp +++ b/src/pcm-lspci.cpp @@ -103,7 +103,7 @@ int mainThrows(int /*argc*/, char * /*argv*/[]) if (!m->isSkxCompatible()) { - cerr << "PCI tree display is currently not supported for processor model " << m->getCPUModel() << "\n"; + cerr << "PCI tree display is currently not supported for processor family/model 0x" << std::hex << m->getCPUFamilyModel() << std::dec << "\n"; } else { diff --git a/src/pcm-memory.cpp b/src/pcm-memory.cpp index c2e9418f..5cd5d2a7 100644 --- a/src/pcm-memory.cpp +++ b/src/pcm-memory.cpp @@ -424,8 +424,8 @@ void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t cout << "\n"; } if ( md->metrics == PartialWrites - && m->getCPUModel() != PCM::SRF - && m->getCPUModel() != PCM::GNR + && m->getCPUFamilyModel() != PCM::SRF + && m->getCPUFamilyModel() != PCM::GNR ) { for (uint32 i=skt; i<(skt+no_columns); ++i) { @@ -733,8 +733,8 @@ void display_bandwidth_csv(PCM *m, memdata_t *md, uint64 /*elapsedTime*/, const if (m->HBMmemoryTrafficMetricsAvailable() == false) { if ( md->metrics == PartialWrites - && m->getCPUModel() != PCM::GNR - && m->getCPUModel() != PCM::SRF + && m->getCPUFamilyModel() != PCM::GNR + && m->getCPUFamilyModel() != PCM::SRF ) { choose(outputType, @@ -905,9 +905,9 @@ void calculate_bandwidth(PCM *m, //const uint32 num_edc_channels = m->getEDCChannelsPerSocket(); memdata_t md; md.metrics = metrics; - const auto cpu_model = m->getCPUModel(); - md.M2M_NM_read_hit_rate_supported = (cpu_model == PCM::SKX); - md.NM_hit_rate_supported = (cpu_model == PCM::ICX); + const auto cpu_family_model = m->getCPUFamilyModel(); + md.M2M_NM_read_hit_rate_supported = (cpu_family_model == PCM::SKX); + md.NM_hit_rate_supported = (cpu_family_model == PCM::ICX); md.BHS_NM = m->nearMemoryMetricsAvailable(); md.BHS = md.BHS_NM; static bool mm_once = true; @@ -991,7 +991,7 @@ void calculate_bandwidth(PCM *m, uint64 memoryModeHits = 0; reads = getMCCounter(channel, ServerUncorePMUs::EventPosition::READ, uncState1[skt], uncState2[skt]); writes = getMCCounter(channel, ServerUncorePMUs::EventPosition::WRITE, uncState1[skt], uncState2[skt]); - switch (cpu_model) + switch (cpu_family_model) { case PCM::GNR: case PCM::SRF: @@ -1057,8 +1057,8 @@ void calculate_bandwidth(PCM *m, md.MemoryMode_Hit_socket[skt] += toRate(memoryModeHits); } else if ( - cpu_model != PCM::GNR - && cpu_model != PCM::SRF + cpu_family_model != PCM::GNR + && cpu_family_model != PCM::SRF ) { md.partial_write[skt] += (uint64)(getMCCounter(channel, ServerUncorePMUs::EventPosition::PARTIAL, uncState1[skt], uncState2[skt]) / (elapsedTime / 1000.0)); @@ -1248,7 +1248,7 @@ class CHAEventCollector pcm(m) { assert(pcm); - switch (pcm->getCPUModel()) + switch (pcm->getCPUFamilyModel()) { case PCM::SPR: eventGroups = { @@ -1516,10 +1516,10 @@ int mainThrows(int argc, char * argv[]) m->disableJKTWorkaround(); print_cpu_details(); - const auto cpu_model = m->getCPUModel(); + const auto cpu_family_model = m->getCPUFamilyModel(); if (!m->hasPCICFGUncore()) { - cerr << "Unsupported processor model (" << cpu_model << ").\n"; + cerr << "Unsupported processor model (0x" << std::hex << cpu_family_model << std::dec << ").\n"; if (m->memoryTrafficMetricsAvailable()) cerr << "For processor-level memory bandwidth statistics please use 'pcm' utility\n"; exit(EXIT_FAILURE); @@ -1577,7 +1577,7 @@ int mainThrows(int argc, char * argv[]) shared_ptr chaEventCollector; - SPR_CXL = (PCM::SPR == cpu_model || PCM::EMR == cpu_model) && (getNumCXLPorts(m) > 0); + SPR_CXL = (PCM::SPR == cpu_family_model || PCM::EMR == cpu_family_model) && (getNumCXLPorts(m) > 0); if (SPR_CXL) { chaEventCollector = std::make_shared(delay, sysCmd, mainLoop, m); diff --git a/src/pcm-pcie.cpp b/src/pcm-pcie.cpp index da55b93e..69d1c380 100644 --- a/src/pcm-pcie.cpp +++ b/src/pcm-pcie.cpp @@ -95,7 +95,7 @@ void print_usage(const string & progname) IPlatform *IPlatform::getPlatform(PCM *m, bool csv, bool print_bandwidth, bool print_additional_info, uint32 delay) { - switch (m->getCPUModel()) { + switch (m->getCPUFamilyModel()) { case PCM::GNR: case PCM::SRF: return new BirchStreamPlatform(m, csv, print_bandwidth, print_additional_info, delay); diff --git a/src/pcm-power.cpp b/src/pcm-power.cpp index 7982375a..d5ec90f5 100644 --- a/src/pcm-power.cpp +++ b/src/pcm-power.cpp @@ -251,10 +251,10 @@ int mainThrows(int argc, char * argv[]) m->disableJKTWorkaround(); - const int cpu_model = m->getCPUModel(); + const int cpu_family_model = m->getCPUFamilyModel(); if (!(m->hasPCICFGUncore())) { - cerr << "Unsupported processor model (" << cpu_model << ").\n"; + cerr << "Unsupported processor model (0x" << std::hex << cpu_family_model << std::dec << ").\n"; exit(EXIT_FAILURE); } @@ -262,7 +262,7 @@ int mainThrows(int argc, char * argv[]) PCM::ExtendedCustomCoreEventDescription conf; int32 nCorePowerLicenses = 0; std::vector licenseStr; - switch (cpu_model) + switch (cpu_family_model) { case PCM::SKX: case PCM::ICX: @@ -313,7 +313,7 @@ int mainThrows(int argc, char * argv[]) cerr << "\nMC counter group: " << imc_profile << "\n"; cerr << "PCU counter group: " << pcu_profile << "\n"; if (pcu_profile == 0) { - if (cpu_model == PCM::HASWELLX || cpu_model == PCM::BDX_DE || cpu_model == PCM::SKX) + if (cpu_family_model == PCM::HASWELLX || cpu_family_model == PCM::BDX_DE || cpu_family_model == PCM::SKX) cerr << "Your processor does not support frequency band statistics\n"; else cerr << "Freq bands [0/1/2]: " << freq_band[0] * 100 << " MHz; " << freq_band[1] * 100 << " MHz; " << freq_band[2] * 100 << " MHz; \n"; @@ -444,7 +444,7 @@ int mainThrows(int argc, char * argv[]) switch (pcu_profile) { case 0: - if (cpu_model == PCM::HASWELLX || cpu_model == PCM::BDX_DE || cpu_model == PCM::SKX) + if (cpu_family_model == PCM::HASWELLX || cpu_family_model == PCM::BDX_DE || cpu_family_model == PCM::SKX) break; printHeader(true); cout << "; Freq band 0/1/2 cycles: " << 100. * getNormalizedPCUCounter(u, 1, BeforeState[socket], AfterState[socket]) << "%" @@ -455,7 +455,7 @@ int mainThrows(int argc, char * argv[]) case 1: printHeader(true); - cout << ((cpu_model == PCM::SKX) ? "; core C0_1/C3/C6_7-state residency: " : "; core C0/C3/C6-state residency: ") + cout << ((cpu_family_model == PCM::SKX) ? "; core C0_1/C3/C6_7-state residency: " : "; core C0/C3/C6-state residency: ") << getNormalizedPCUCounter(u, 1, BeforeState[socket], AfterState[socket]) << "; " << getNormalizedPCUCounter(u, 2, BeforeState[socket], AfterState[socket]) << "; " << getNormalizedPCUCounter(u, 3, BeforeState[socket], AfterState[socket]) @@ -475,28 +475,28 @@ int mainThrows(int argc, char * argv[]) cout << "; Thermal freq limit cycles: " << getNormalizedPCUCounter(u, 1, BeforeState[socket], AfterState[socket]) * 100. << " %" << "; Power freq limit cycles:" << getNormalizedPCUCounter(u, 2, BeforeState[socket], AfterState[socket]) * 100. << " %"; if( - cpu_model != PCM::SKX - && cpu_model != PCM::ICX - && cpu_model != PCM::SNOWRIDGE - && cpu_model != PCM::SPR - && cpu_model != PCM::EMR - && cpu_model != PCM::SRF - && cpu_model != PCM::GNR - && cpu_model != PCM::GNR_D + cpu_family_model != PCM::SKX + && cpu_family_model != PCM::ICX + && cpu_family_model != PCM::SNOWRIDGE + && cpu_family_model != PCM::SPR + && cpu_family_model != PCM::EMR + && cpu_family_model != PCM::SRF + && cpu_family_model != PCM::GNR + && cpu_family_model != PCM::GNR_D ) cout << "; Clipped freq limit cycles:" << getNormalizedPCUCounter(u, 3, BeforeState[socket], AfterState[socket]) * 100. << " %"; cout << "\n"; break; case 4: - if ( cpu_model == PCM::SKX - || cpu_model == PCM::ICX - || cpu_model == PCM::SNOWRIDGE - || cpu_model == PCM::SPR - || cpu_model == PCM::EMR - || cpu_model == PCM::SRF - || cpu_model == PCM::GNR - || cpu_model == PCM::GNR_D + if ( cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::SNOWRIDGE + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::SRF + || cpu_family_model == PCM::GNR + || cpu_family_model == PCM::GNR_D ) { cout << "This PCU profile is not supported on your processor\n"; @@ -512,7 +512,7 @@ int mainThrows(int argc, char * argv[]) printHeader(true); cout << "; Frequency transition count: " << getUncoreCounter(PCM::PCU_PMU_ID, u, 1, BeforeState[socket], AfterState[socket]) << " " << "; Cycles spent changing frequency: " << getNormalizedPCUCounter(u, 2, BeforeState[socket], AfterState[socket], m) * 100. << " %"; - if (PCM::HASWELLX == cpu_model) { + if (PCM::HASWELLX == cpu_family_model) { cout << "; UFS transition count: " << getUncoreCounter(PCM::PCU_PMU_ID, u, 3, BeforeState[socket], AfterState[socket]) << " "; cout << "; UFS transition cycles: " << getNormalizedPCUCounter(u, 0, BeforeState[socket], AfterState[socket], m) * 100. << " %"; } @@ -520,11 +520,11 @@ int mainThrows(int argc, char * argv[]) break; case 6: printHeader(false); - if (cpu_model == PCM::HASWELLX || PCM::BDX_DE == cpu_model) + if (cpu_family_model == PCM::HASWELLX || PCM::BDX_DE == cpu_family_model) cout << "; PC1e+ residency: " << getNormalizedPCUCounter(u, 0, BeforeState[socket], AfterState[socket], m) * 100. << " %" "; PC1e+ transition count: " << getUncoreCounter(PCM::PCU_PMU_ID, u, 1, BeforeState[socket], AfterState[socket]) << " "; - switch (cpu_model) + switch (cpu_family_model) { case PCM::IVYTOWN: case PCM::HASWELLX: @@ -548,7 +548,7 @@ int mainThrows(int argc, char * argv[]) cout << "\n"; break; case 7: - if (PCM::HASWELLX == cpu_model || PCM::BDX_DE == cpu_model || PCM::BDX == cpu_model) { + if (PCM::HASWELLX == cpu_family_model || PCM::BDX_DE == cpu_family_model || PCM::BDX == cpu_family_model) { printHeader(false); cout << "; UFS_TRANSITIONS_PERF_P_LIMIT: " << getNormalizedPCUCounter(u, 0, BeforeState[socket], AfterState[socket], m) * 100. << " %" << "; UFS_TRANSITIONS_IO_P_LIMIT: " << getNormalizedPCUCounter(u, 1, BeforeState[socket], AfterState[socket], m) * 100. << " %" @@ -558,7 +558,7 @@ int mainThrows(int argc, char * argv[]) } break; case 8: - if (PCM::HASWELLX == cpu_model || PCM::BDX_DE == cpu_model || PCM::BDX == cpu_model) { + if (PCM::HASWELLX == cpu_family_model || PCM::BDX_DE == cpu_family_model || PCM::BDX == cpu_family_model) { printHeader(false); cout << "; UFS_TRANSITIONS_DOWN: " << getNormalizedPCUCounter(u, 0, BeforeState[socket], AfterState[socket], m) * 100. << " %" << "\n"; diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index 400e5139..a6d4f1f7 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -558,7 +558,7 @@ AddEventStatus addEventFromDB(PCM::RawPMUConfigs& curPMUConfigs, string fullEven { try { - path = std::string("PMURegisterDeclarations/") + pcm->getCPUFamilyModelString(pcm->getCPUFamily(), pcm->getCPUModel(), (uint32)stepping) + ".json"; + path = std::string("PMURegisterDeclarations/") + pcm->getCPUFamilyModelString(pcm->getCPUFamily(), pcm->getInternalCPUModel(), (uint32)stepping) + ".json"; std::ifstream in(path); if (!in.is_open()) diff --git a/src/pcm-tsx.cpp b/src/pcm-tsx.cpp index 01a18c0f..692c4cdc 100644 --- a/src/pcm-tsx.cpp +++ b/src/pcm-tsx.cpp @@ -307,7 +307,7 @@ int mainThrows(int argc, char * argv[]) PCM * m = PCM::getInstance(); const size_t numCtrSupported = m->getMaxCustomCoreEvents(); - switch (m->getCPUModel()) + switch (m->getCPUFamilyModel()) { case PCM::SKL: case PCM::SKX: @@ -321,6 +321,8 @@ int mainThrows(int argc, char * argv[]) break; case PCM::SPR: case PCM::EMR: + case PCM::GNR: + case PCM::GNR_D: eventDefinition = sprEventDefinition; break; } diff --git a/src/pcm.cpp b/src/pcm.cpp index 60ed261a..ba14b620 100644 --- a/src/pcm.cpp +++ b/src/pcm.cpp @@ -165,7 +165,7 @@ void print_output(PCM * m, const std::bitset & ycores, const SystemCounterState& sstate1, const SystemCounterState& sstate2, - const int cpu_model, + const int cpu_family_model, const bool show_core_output, const bool show_partial_core_output, const bool show_socket_output, @@ -199,7 +199,7 @@ void print_output(PCM * m, cout << " L3MISS: L3 (read) cache misses \n"; if (m->isL2CacheHitsAvailable()) { - if (m->isAtom() || cpu_model == PCM::KNL) + if (m->isAtom() || cpu_family_model == PCM::KNL) cout << " L2MISS: L2 (read) cache misses \n"; else cout << " L2MISS: L2 (read) cache misses (including other core's L2 cache *hits*) \n"; @@ -235,7 +235,7 @@ void print_output(PCM * m, const char * longDiv = "---------------------------------------------------------------------------------------------------------------\n"; cout.precision(2); cout << std::fixed; - if (cpu_model == PCM::KNL) + if (cpu_family_model == PCM::KNL) cout << " Proc Tile Core Thread |"; else cout << " Core (SKT) |"; @@ -288,7 +288,7 @@ void print_output(PCM * m, if (m->isCoreOnline(i) == false || (show_partial_core_output && ycores.test(i) == false)) continue; - if (cpu_model == PCM::KNL) + if (cpu_family_model == PCM::KNL) cout << setfill(' ') << internal << setw(5) << i << setw(5) << m->getTileId(i) << setw(5) << m->getCoreId(i) << setw(7) << m->getThreadId(i); @@ -302,7 +302,7 @@ void print_output(PCM * m, } if (show_socket_output) { - if (!(m->getNumSockets() == 1 && (m->isAtom() || cpu_model == PCM::KNL))) + if (!(m->getNumSockets() == 1 && (m->isAtom() || cpu_family_model == PCM::KNL))) { cout << longDiv; for (uint32 i = 0; i < m->getNumSockets(); ++i) @@ -318,7 +318,7 @@ void print_output(PCM * m, if (show_system_output) { - if (cpu_model == PCM::KNL) + if (cpu_family_model == PCM::KNL) cout << setw(22) << left << " TOTAL" << internal << setw(7-5); else cout << " TOTAL *"; @@ -714,7 +714,6 @@ void print_basic_metrics_csv_semicolons(const PCM * m, const string & header) void print_csv_header(PCM * m, const std::bitset & ycores, - const int /*cpu_model*/, const bool show_core_output, const bool show_partial_core_output, const bool show_socket_output, @@ -1137,7 +1136,6 @@ void print_csv(PCM * m, const std::bitset & ycores, const SystemCounterState& sstate1, const SystemCounterState& sstate2, - const int /*cpu_model*/, const bool show_core_output, const bool show_partial_core_output, const bool show_socket_output, @@ -1579,7 +1577,7 @@ int mainThrows(int argc, char * argv[]) std::vector cstates1, cstates2; std::vector sktstate1, sktstate2; SystemCounterState sstate1, sstate2; - const auto cpu_model = m->getCPUModel(); + const auto cpu_family_model = m->getCPUFamilyModel(); print_pid_collection_message(pid); @@ -1596,7 +1594,7 @@ int mainThrows(int argc, char * argv[]) // cerr << "DEBUG: Delay: " << delay << " seconds. Blocked: " << m->isBlocked() << "\n"; if (csv_output) { - print_csv_header(m, ycores, cpu_model, show_core_output, show_partial_core_output, show_socket_output, show_system_output); + print_csv_header(m, ycores, show_core_output, show_partial_core_output, show_socket_output, show_system_output); } m->getAllCounterStates(sstate1, sktstate1, cstates1); @@ -1615,10 +1613,10 @@ int mainThrows(int argc, char * argv[]) if (csv_output) print_csv(m, cstates1, cstates2, sktstate1, sktstate2, ycores, sstate1, sstate2, - cpu_model, show_core_output, show_partial_core_output, show_socket_output, show_system_output); + show_core_output, show_partial_core_output, show_socket_output, show_system_output); else print_output(m, cstates1, cstates2, sktstate1, sktstate2, ycores, sstate1, sstate2, - cpu_model, show_core_output, show_partial_core_output, show_socket_output, show_system_output, + cpu_family_model, show_core_output, show_partial_core_output, show_socket_output, show_system_output, metricVersion); std::swap(sstate1, sstate2); diff --git a/tests/pcm-fuzz.cpp b/tests/pcm-fuzz.cpp index 047d0a8a..941bbbed 100644 --- a/tests/pcm-fuzz.cpp +++ b/tests/pcm-fuzz.cpp @@ -56,7 +56,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) std::vector sktstate1, sktstate2; SystemCounterState sstate1, sstate2; bitset ycores; - const auto cpu_model = m->getCPUModel(); + const auto cpu_family_model = m->getCPUFamilyModel(); print_pid_collection_message(pid); bool show_partial_core_output = false; // TODO: add support for partial core output @@ -71,10 +71,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) m->getAllCounterStates(sstate2, sktstate2, cstates2); if (csv_output) print_csv(m, cstates1, cstates2, sktstate1, sktstate2, ycores, sstate1, sstate2, - cpu_model, show_core_output, show_partial_core_output, show_socket_output, show_system_output); + show_core_output, show_partial_core_output, show_socket_output, show_system_output); else print_output(m, cstates1, cstates2, sktstate1, sktstate2, ycores, sstate1, sstate2, - cpu_model, show_core_output, show_partial_core_output, show_socket_output, show_system_output, + cpu_family_model, show_core_output, show_partial_core_output, show_socket_output, show_system_output, metricVersion); return 0; diff --git a/tests/pcm-memory-fuzz.cpp b/tests/pcm-memory-fuzz.cpp index 66ddb6f9..09883675 100644 --- a/tests/pcm-memory-fuzz.cpp +++ b/tests/pcm-memory-fuzz.cpp @@ -57,10 +57,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) m->resetPMU(); m->disableJKTWorkaround(); - const auto cpu_model = m->getCPUModel(); + const auto cpu_family_model = m->getCPUFamilyModel(); if (!m->hasPCICFGUncore()) { - cerr << "Unsupported processor model (" << cpu_model << ").\n"; + cerr << "Unsupported processor model (0x" << std::hex << cpu_family_model << std::dec << ").\n"; if (m->memoryTrafficMetricsAvailable()) cerr << "For processor-level memory bandwidth statistics please use 'pcm' utility\n"; return 0;