Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Push 2023 11 16 #597

Merged
merged 10 commits into from
Nov 18, 2023
51 changes: 45 additions & 6 deletions src/WinMSRDriver/msrmain.c
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,16 @@ NTSTATUS deviceControl(PDEVICE_OBJECT DeviceObject, PIRP Irp)
new_affinity.Group = ProcNumber.Group;
new_affinity.Mask = 1ULL << (ProcNumber.Number);
KeSetSystemGroupAffinityThread(&new_affinity, &old_affinity);
__writemsr(input_msr_req->msr_address, input_msr_req->write_value);
__try
{
__writemsr(input_msr_req->msr_address, input_msr_req->write_value);
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
status = GetExceptionCode();
DbgPrint("Error: exception with code 0x%X in IO_CTL_MSR_WRITE core 0x%X msr 0x%llX value 0x%llX\n",
status, input_msr_req->core_id, input_msr_req->msr_address, input_msr_req->write_value);
}
KeRevertToUserGroupAffinityThread(&old_affinity);
Irp->IoStatus.Information = 0; // result size
break;
Expand All @@ -198,7 +207,16 @@ NTSTATUS deviceControl(PDEVICE_OBJECT DeviceObject, PIRP Irp)
new_affinity.Group = ProcNumber.Group;
new_affinity.Mask = 1ULL << (ProcNumber.Number);
KeSetSystemGroupAffinityThread(&new_affinity, &old_affinity);
*output = __readmsr(input_msr_req->msr_address);
__try
{
*output = __readmsr(input_msr_req->msr_address);
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
status = GetExceptionCode();
DbgPrint("Error: exception with code 0x%X in IO_CTL_MSR_READ core 0x%X msr 0x%llX\n",
status, input_msr_req->core_id, input_msr_req->msr_address);
}
KeRevertToUserGroupAffinityThread(&old_affinity);
Irp->IoStatus.Information = sizeof(ULONG64); // result size
break;
Expand Down Expand Up @@ -258,8 +276,19 @@ NTSTATUS deviceControl(PDEVICE_OBJECT DeviceObject, PIRP Irp)
slot.u.bits.FunctionNumber = input_pcicfg_req->func;
#pragma warning(push)
#pragma warning(disable: 4996)
size = HalSetBusDataByOffset(PCIConfiguration, input_pcicfg_req->bus, slot.u.AsULONG,
&(input_pcicfg_req->write_value), input_pcicfg_req->reg, input_pcicfg_req->bytes);
__try
{
size = HalSetBusDataByOffset(PCIConfiguration, input_pcicfg_req->bus, slot.u.AsULONG,
&(input_pcicfg_req->write_value), input_pcicfg_req->reg, input_pcicfg_req->bytes);
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
status = GetExceptionCode();
size = 0;
DbgPrint("Error: exception with code 0x%X in IO_CTL_PCICFG_WRITE b 0x%X d 0x%X f 0x%X reg 0x%X bytes 0x%X value 0x%llX\n",
status, input_pcicfg_req->bus, input_pcicfg_req->dev, input_pcicfg_req->func, input_pcicfg_req->reg, input_pcicfg_req->bytes,
input_pcicfg_req->write_value);
}
#pragma warning(pop)
if (size != input_pcicfg_req->bytes)
{
Expand All @@ -279,8 +308,18 @@ NTSTATUS deviceControl(PDEVICE_OBJECT DeviceObject, PIRP Irp)
slot.u.bits.FunctionNumber = input_pcicfg_req->func;
#pragma warning(push)
#pragma warning(disable: 4996)
size = HalGetBusDataByOffset(PCIConfiguration, input_pcicfg_req->bus, slot.u.AsULONG,
output, input_pcicfg_req->reg, input_pcicfg_req->bytes);
__try
{
size = HalGetBusDataByOffset(PCIConfiguration, input_pcicfg_req->bus, slot.u.AsULONG,
output, input_pcicfg_req->reg, input_pcicfg_req->bytes);
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
status = GetExceptionCode();
size = 0;
DbgPrint("Error: exception with code 0x%X in IO_CTL_PCICFG_READ b 0x%X d 0x%X f 0x%X reg 0x%X bytes 0x%X\n",
status, input_pcicfg_req->bus, input_pcicfg_req->dev, input_pcicfg_req->func, input_pcicfg_req->reg, input_pcicfg_req->bytes);
}
#pragma warning(pop)
if (size != input_pcicfg_req->bytes)
{
Expand Down
52 changes: 52 additions & 0 deletions src/cpucounters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1158,6 +1158,41 @@ bool PCM::discoverSystemTopology()
}
subleaf++;
} while (1);

struct domain
{
unsigned type, levelShift, nextLevelShift, width;
};
std::vector<domain> topologyDomains;
if (max_cpuid >= 0x1F)
{
subleaf = 0;
do
{
pcm_cpuid(0x1F, subleaf, cpuid_args);
domain d;
d.type = extract_bits_ui(cpuid_args.reg.ecx, 8, 15);
if (d.type == TopologyEntry::DomainTypeID::InvalidDomainTypeID)
{
break;
}
d.nextLevelShift = extract_bits_ui(cpuid_args.reg.eax, 0, 4);
d.levelShift = topologyDomains.empty() ? 0 : topologyDomains.back().nextLevelShift;
d.width = d.nextLevelShift - d.levelShift;
topologyDomains.push_back(d);
++subleaf;
} while (true);
#if 0
for (size_t l = 0; l < topologyDomains.size(); ++l)
{
std::cerr << "Topology level " << l <<
" type " << topologyDomains[l].type <<
" width " << topologyDomains[l].width <<
" levelShift " << topologyDomains[l].levelShift <<
" nextLevelShift " << topologyDomains[l].nextLevelShift << "\n";
}
#endif
}
}

if (wasThreadReported && wasCoreReported)
Expand Down Expand Up @@ -1431,6 +1466,7 @@ bool PCM::discoverSystemTopology()
MSR.push_back(std::make_shared<SafeMsrHandle>(i));
}

assert(num_cores > 0);
TopologyEntry entries[num_cores];
if (MSR[0]->buildTopology(num_cores, entries) != 0) {
std::cerr << "Unable to build CPU topology" << std::endl;
Expand Down Expand Up @@ -1736,6 +1772,14 @@ void PCM::initEnergyMonitoring()
std::make_shared<CounterWidthExtender>(
new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[i]], MSR_DRAM_ENERGY_STATUS), 32, 10000));
}

if (ppEnergyMetricsAvailable() && MSR.size() && num_sockets == 1 && pp_energy_status.empty())
{
pp_energy_status.push_back(std::make_shared<CounterWidthExtender>(
new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[0]], MSR_PP0_ENERGY_STATUS), 32, 10000));
pp_energy_status.push_back(std::make_shared<CounterWidthExtender>(
new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[0]], MSR_PP1_ENERGY_STATUS), 32, 10000));
}
}

static const uint32 UBOX0_DEV_IDS[] = {
Expand Down Expand Up @@ -6071,6 +6115,14 @@ void PCM::readAndAggregateEnergyCounters(const uint32 socket, CounterStateType &

if (socket < (uint32)dram_energy_status.size())
result.DRAMEnergyStatus += dram_energy_status[socket]->read();

if (socket == 0)
{
for (size_t pp = 0; pp < pp_energy_status.size(); ++pp)
{
result.PPEnergyStatus[pp] += pp_energy_status[pp]->read();
}
}
}

template <class CounterStateType>
Expand Down
37 changes: 37 additions & 0 deletions src/cpucounters.h
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,7 @@ class PCM_API PCM
double joulesPerEnergyUnit;
std::vector<std::shared_ptr<CounterWidthExtender> > energy_status;
std::vector<std::shared_ptr<CounterWidthExtender> > dram_energy_status;
std::vector<std::shared_ptr<CounterWidthExtender> > pp_energy_status;
std::vector<std::vector<UncorePMU> > cboPMUs;
std::vector<std::vector<UncorePMU> > mdfPMUs;
std::vector<std::vector<std::pair<UncorePMU, UncorePMU>>> cxlPMUs; // socket X CXL ports X UNIT {0,1}
Expand Down Expand Up @@ -691,6 +692,7 @@ class PCM_API PCM
bool linux_arch_perfmon = false;

public:
enum { MAX_PP = 1 }; // max power plane number on Intel architecture (client)
enum { MAX_C_STATE = 10 }; // max C-state on Intel architecture

//! \brief Returns true if the specified core C-state residency metric is supported
Expand Down Expand Up @@ -2426,6 +2428,11 @@ class PCM_API PCM
;
}

bool ppEnergyMetricsAvailable() const
{
return packageEnergyMetricsAvailable() && hasClientMCCounters() && num_sockets == 1;
}

static double getBytesPerFlit(int32 cpu_model_)
{
if (hasUPI(cpu_model_))
Expand Down Expand Up @@ -2966,6 +2973,18 @@ uint64 getConsumedEnergy(const CounterStateType & before, const CounterStateType
return after.PackageEnergyStatus - before.PackageEnergyStatus;
}

/*! \brief Returns energy consumed by processor, excluding DRAM (measured in internal units)
\param powerPlane power plane ID
\param before CPU counter state before the experiment
\param after CPU counter state after the experiment
*/
template <class CounterStateType>
uint64 getConsumedEnergy(const int powerPlane, const CounterStateType& before, const CounterStateType& after)
{
assert(powerPlane <= PCM::MAX_PP);
return after.PPEnergyStatus[powerPlane] - before.PPEnergyStatus[powerPlane];
}

/*! \brief Returns energy consumed by DRAM (measured in internal units)
\param before CPU counter state before the experiment
\param after CPU counter state after the experiment
Expand Down Expand Up @@ -3019,6 +3038,20 @@ double getConsumedJoules(const CounterStateType & before, const CounterStateType
return double(getConsumedEnergy(before, after)) * m->getJoulesPerEnergyUnit();
}

/*! \brief Returns Joules consumed by processor (excluding DRAM)
\param powePlane power plane
\param before CPU counter state before the experiment
\param after CPU counter state after the experiment
*/
template <class CounterStateType>
double getConsumedJoules(const int powerPlane, const CounterStateType& before, const CounterStateType& after)
{
PCM* m = PCM::getInstance();
if (!m) return -1.;

return double(getConsumedEnergy(powerPlane, before, after)) * m->getJoulesPerEnergyUnit();
}

/*! \brief Returns Joules consumed by DRAM
\param before CPU counter state before the experiment
\param after CPU counter state after the experiment
Expand Down Expand Up @@ -3081,6 +3114,8 @@ class UncoreCounterState
template <class CounterStateType>
friend uint64 getConsumedEnergy(const CounterStateType & before, const CounterStateType & after);
template <class CounterStateType>
friend uint64 getConsumedEnergy(const int pp, const CounterStateType& before, const CounterStateType& after);
template <class CounterStateType>
friend uint64 getDRAMConsumedEnergy(const CounterStateType & before, const CounterStateType & after);
template <class CounterStateType>
friend uint64 getUncoreClocks(const CounterStateType& before, const CounterStateType& after);
Expand Down Expand Up @@ -3110,6 +3145,7 @@ class UncoreCounterState
uint64 UncMCIARequests;
uint64 UncMCIORequests;
uint64 PackageEnergyStatus;
uint64 PPEnergyStatus[PCM::MAX_PP + 1];
uint64 DRAMEnergyStatus;
uint64 TOROccupancyIAMiss;
uint64 TORInsertsIAMiss;
Expand Down Expand Up @@ -3137,6 +3173,7 @@ class UncoreCounterState
UncClocks(0)
{
std::fill(CStateResidency, CStateResidency + PCM::MAX_C_STATE + 1, 0);
std::fill(PPEnergyStatus, PPEnergyStatus + PCM::MAX_PP + 1, 0);
}
virtual ~UncoreCounterState() { }

Expand Down
43 changes: 43 additions & 0 deletions src/pcm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,11 @@ void print_output(PCM * m,
cout << " GT |";
if (m->packageEnergyMetricsAvailable())
cout << " CPU energy |";
if (m->ppEnergyMetricsAvailable())
{
cout << " PP0 energy |";
cout << " PP1 energy |";
}
if (m->dramEnergyMetricsAvailable())
cout << " DIMM energy |";
if (m->LLCReadMissLatencyMetricsAvailable())
Expand Down Expand Up @@ -469,6 +474,12 @@ void print_output(PCM * m,
cout << " ";
cout << setw(6) << getConsumedJoules(sktstate1[i], sktstate2[i]);
}
if (m->ppEnergyMetricsAvailable()) {
cout << " ";
cout << setw(6) << getConsumedJoules(0, sktstate1[i], sktstate2[i]);
cout << " ";
cout << setw(6) << getConsumedJoules(1, sktstate1[i], sktstate2[i]);
}
if(m->dramEnergyMetricsAvailable()) {
cout << " ";
cout << setw(6) << getDRAMConsumedJoules(sktstate1[i], sktstate2[i]);
Expand Down Expand Up @@ -500,6 +511,12 @@ void print_output(PCM * m,
cout << " ";
cout << setw(6) << getConsumedJoules(sstate1, sstate2);
}
if (m->ppEnergyMetricsAvailable()) {
cout << " ";
cout << setw(6) << getConsumedJoules(0, sstate1, sstate2);
cout << " ";
cout << setw(6) << getConsumedJoules(1, sstate1, sstate2);
}
if (m->dramEnergyMetricsAvailable()) {
cout << " ";
cout << setw(6) << getDRAMConsumedJoules(sstate1, sstate2);
Expand Down Expand Up @@ -612,6 +629,8 @@ void print_csv_header(PCM * m,
print_csv_header_helper("System Pack C-States");
if (m->packageEnergyMetricsAvailable())
print_csv_header_helper(header);
if (m->ppEnergyMetricsAvailable())
print_csv_header_helper(header, 2);
if (m->dramEnergyMetricsAvailable())
print_csv_header_helper(header);
if (m->LLCReadMissLatencyMetricsAvailable())
Expand Down Expand Up @@ -692,6 +711,13 @@ void print_csv_header(PCM * m,
header = "Proc Energy (Joules)";
print_csv_header_helper(header,m->getNumSockets());
}
if (m->ppEnergyMetricsAvailable())
{
header = "Power Plane 0 Energy (Joules)";
print_csv_header_helper(header, m->getNumSockets());
header = "Power Plane 1 Energy (Joules)";
print_csv_header_helper(header, m->getNumSockets());
}
if (m->dramEnergyMetricsAvailable())
{
header = "DRAM Energy (Joules)";
Expand Down Expand Up @@ -772,6 +798,11 @@ void print_csv_header(PCM * m,

if (m->packageEnergyMetricsAvailable())
cout << "Proc Energy (Joules),";
if (m->ppEnergyMetricsAvailable())
{
cout << "Power Plane 0 Energy (Joules),";
cout << "Power Plane 1 Energy (Joules),";
}
if (m->dramEnergyMetricsAvailable())
cout << "DRAM Energy (Joules),";
if (m->LLCReadMissLatencyMetricsAvailable())
Expand Down Expand Up @@ -848,6 +879,11 @@ void print_csv_header(PCM * m,
for (uint32 i = 0; i < m->getNumSockets(); ++i)
cout << "SKT" << i << ",";
}
if (m->ppEnergyMetricsAvailable())
{
for (uint32 i = 0; i < m->getNumSockets(); ++i)
cout << "SKT" << i << "," << "SKT" << i << ",";
}
if (m->dramEnergyMetricsAvailable())
{
for (uint32 i = 0; i < m->getNumSockets(); ++i)
Expand Down Expand Up @@ -998,6 +1034,8 @@ void print_csv(PCM * m,

if (m->packageEnergyMetricsAvailable())
cout << getConsumedJoules(sstate1, sstate2) << ",";
if (m->ppEnergyMetricsAvailable())
cout << getConsumedJoules(0, sstate1, sstate2) << "," << getConsumedJoules(1, sstate1, sstate2) << ",";
if (m->dramEnergyMetricsAvailable())
cout << getDRAMConsumedJoules(sstate1, sstate2) << ",";
if (m->LLCReadMissLatencyMetricsAvailable())
Expand Down Expand Up @@ -1085,6 +1123,11 @@ void print_csv(PCM * m,
for (uint32 i = 0; i < m->getNumSockets(); ++i)
cout << getConsumedJoules(sktstate1[i], sktstate2[i]) << ",";
}
if (m->ppEnergyMetricsAvailable())
{
for (uint32 i = 0; i < m->getNumSockets(); ++i)
cout << getConsumedJoules(0, sktstate1[i], sktstate2[i]) << "," << getConsumedJoules(1, sktstate1[i], sktstate2[i]) << ",";
}
if (m->dramEnergyMetricsAvailable())
{
for (uint32 i = 0; i < m->getNumSockets(); ++i)
Expand Down
17 changes: 15 additions & 2 deletions src/topologyentry.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,22 @@ struct PCM_API TopologyEntry // describes a core
int32 os_id;
int32 thread_id;
int32 core_id;
int32 tile_id; // tile is a constalation of 1 or more cores sharing salem L2 cache. Unique for entire system
int32 module_id;
int32 tile_id; // tile is a constalation of 1 or more cores sharing same L2 cache. Unique for entire system
int32 die_id;
int32 die_grp_id;
int32 socket;
int32 native_cpu_model = -1;
enum DomainTypeID
{
InvalidDomainTypeID = 0,
LogicalProcessorDomain = 1,
CoreDomain = 2,
ModuleDomain = 3,
TileDomain = 4,
DieDomain = 5,
DieGrpDomain = 6
};
enum CoreType
{
Atom = 0x20,
Expand All @@ -24,7 +37,7 @@ struct PCM_API TopologyEntry // describes a core
};
CoreType core_type = Invalid;

TopologyEntry() : os_id(-1), thread_id (-1), core_id(-1), tile_id(-1), socket(-1) { }
TopologyEntry() : os_id(-1), thread_id (-1), core_id(-1), module_id(-1), tile_id(-1), die_id(-1), die_grp_id(-1), socket(-1) { }
const char* getCoreTypeStr()
{
switch (core_type)
Expand Down
2 changes: 2 additions & 0 deletions src/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -1353,6 +1353,8 @@ struct ICX_IIOPMUCNTCTLRegister
#define MSR_IA32_BIOS_SIGN_ID (0x8B)

#define MSR_DRAM_ENERGY_STATUS (0x0619)
constexpr auto MSR_PP0_ENERGY_STATUS = 0x639;
constexpr auto MSR_PP1_ENERGY_STATUS = 0x641;

#define MSR_PKG_C2_RESIDENCY (0x60D)
#define MSR_PKG_C3_RESIDENCY (0x3F8)
Expand Down