Skip to content

Commit

Permalink
Merge pull request #356 from opcm/push-2021-12-16
Browse files Browse the repository at this point in the history
Push 2021 12 16
  • Loading branch information
opcm authored Dec 16, 2021
2 parents 1ecb288 + 1c7b5e2 commit 315ae84
Show file tree
Hide file tree
Showing 24 changed files with 585 additions and 217 deletions.
2 changes: 1 addition & 1 deletion c_example.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ int main(int argc, const char *argv[])

if(PCM.pcm_c_init == NULL || PCM.pcm_c_start == NULL || PCM.pcm_c_stop == NULL ||
PCM.pcm_c_get_cycles == NULL || PCM.pcm_c_get_instr == NULL ||
PCM.pcm_c_build_core_event == NULL)
PCM.pcm_c_build_core_event == NULL || PCM.pcm_c_get_core_event == NULL)
return -1;
switch(argc-1)
{
Expand Down
6 changes: 3 additions & 3 deletions cpuasynchcounter.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class AsynchronCounterState {
~AsynchronCounterState()
{
pthread_cancel(UpdateThread);
pthread_mutex_destroy(&CounterMutex);
if (pthread_mutex_destroy(&CounterMutex) != 0) std::cerr << "pthread_mutex_destroy failed\n";
m->cleanup();
delete[] cstates1;
delete[] cstates2;
Expand Down Expand Up @@ -190,7 +190,7 @@ void * UpdateCounters(void * state)
AsynchronCounterState * s = (AsynchronCounterState *)state;

while (true) {
pthread_mutex_lock(&(s->CounterMutex));
if (pthread_mutex_lock(&(s->CounterMutex)) != 0) std::cerr << "pthread_mutex_lock failed\n";
for (uint32 core = 0; core < s->m->getNumCores(); ++core) {
s->cstates1[core] = std::move(s->cstates2[core]);
s->cstates2[core] = s->m->getCoreCounterState(core);
Expand All @@ -204,7 +204,7 @@ void * UpdateCounters(void * state)
s->sstate1 = std::move(s->sstate2);
s->sstate2 = s->m->getSystemCounterState();

pthread_mutex_unlock(&(s->CounterMutex));
if (pthread_mutex_unlock(&(s->CounterMutex)) != 0) std::cerr << "pthread_mutex_unlock failed\n";
sleep(1);
}
return NULL;
Expand Down
75 changes: 45 additions & 30 deletions cpucounters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ bool PCM::initWinRing0Lib()

if (result == FALSE)
{
CloseHandle(hOpenLibSys);
DeinitOpenLibSys(&hOpenLibSys);
hOpenLibSys = NULL;
return false;
}
Expand Down Expand Up @@ -161,9 +161,9 @@ class InstanceLock
public:
InstanceLock(const bool global_) : globalSemaphoreName(PCM_INSTANCE_LOCK_SEMAPHORE_NAME), globalSemaphore(NULL), global(global_)
{
if(!global)
if (!global)
{
pthread_mutex_lock(&processIntanceMutex);
if (pthread_mutex_lock(&processIntanceMutex) != 0) std::cerr << "pthread_mutex_lock failed\n";
return;
}
umask(0);
Expand Down Expand Up @@ -195,9 +195,9 @@ class InstanceLock
}
~InstanceLock()
{
if(!global)
if (!global)
{
pthread_mutex_unlock(&processIntanceMutex);
if (pthread_mutex_unlock(&processIntanceMutex) != 0) std::cerr << "pthread_mutex_unlock failed\n";
return;
}
if (sem_post(globalSemaphore)) {
Expand All @@ -220,6 +220,7 @@ class TemporalThreadAffinity // speedup trick for Linux, FreeBSD, DragonFlyBSD,
public:
TemporalThreadAffinity(uint32 core_id, bool checkStatus = true)
{
assert(core_id < 1024);
pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &old_affinity);

cpu_set_t new_affinity;
Expand Down Expand Up @@ -247,6 +248,7 @@ class TemporalThreadAffinity // speedup trick for Linux, FreeBSD, DragonFlyBSD,
TemporalThreadAffinity(const uint32 core_id, bool checkStatus = true)
: set_size(CPU_ALLOC_SIZE(maxCPUs))
{
assert(core_id < maxCPUs);
old_affinity = CPU_ALLOC(maxCPUs);
assert(old_affinity);
pthread_getaffinity_np(pthread_self(), set_size, old_affinity);
Expand Down Expand Up @@ -593,7 +595,7 @@ bool PCM::detectModel()
auto tokens = split(line, ':');
if (tokens.size() >= 2 && tokens[0].find("flags") == 0)
{
for (auto curFlag : split(tokens[1], ' '))
for (const auto & curFlag : split(tokens[1], ' '))
{
if (flag == curFlag)
{
Expand Down Expand Up @@ -730,27 +732,27 @@ void PCM::initRDT()
auto env = std::getenv("PCM_USE_RESCTRL");
if (env != nullptr && std::string(env) == std::string("1"))
{
std::cout << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because environment variable PCM_USE_RESCTRL=1\n";
std::cerr << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because environment variable PCM_USE_RESCTRL=1\n";
resctrl.init();
useResctrl = true;
return;
}
if (resctrl.isMounted())
{
std::cout << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because resctrl driver is mounted.\n";
std::cerr << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because resctrl driver is mounted.\n";
resctrl.init();
useResctrl = true;
return;
}
if (isSecureBoot())
{
std::cout << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because Secure Boot mode is enabled.\n";
std::cerr << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because Secure Boot mode is enabled.\n";
resctrl.init();
useResctrl = true;
return;
}
#endif
std::cout << "Initializing RMIDs" << std::endl;
std::cerr << "Initializing RMIDs" << std::endl;
unsigned maxRMID;
/* Calculate maximum number of RMID supported by socket */
maxRMID = getMaxRMID();
Expand Down Expand Up @@ -2105,6 +2107,7 @@ class CoreTaskQueue
std::thread worker;
CoreTaskQueue() = delete;
CoreTaskQueue(CoreTaskQueue &) = delete;
CoreTaskQueue & operator = (CoreTaskQueue &) = delete;
public:
CoreTaskQueue(int32 core) :
worker([=]() {
Expand Down Expand Up @@ -3046,18 +3049,21 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
perf_event_attr e = PCM_init_perf_event_attr();
e.type = PERF_TYPE_RAW;
e.config = (1ULL << 63ULL) + event_select_reg.value;
if (event_select_reg.fields.event_select == getOCREventNr(0, i).first && event_select_reg.fields.umask == getOCREventNr(0, i).second)
e.config1 = pExtDesc->OffcoreResponseMsrValue[0];
if (event_select_reg.fields.event_select == getOCREventNr(1, i).first && event_select_reg.fields.umask == getOCREventNr(1, i).second)
e.config1 = pExtDesc->OffcoreResponseMsrValue[1];

if (event_select_reg.fields.event_select == LOAD_LATENCY_EVTNR && event_select_reg.fields.umask == LOAD_LATENCY_UMASK)
{
e.config1 = pExtDesc->LoadLatencyMsrValue;
}
if (event_select_reg.fields.event_select == FRONTEND_EVTNR && event_select_reg.fields.umask == FRONTEND_UMASK)
if (pExtDesc != nullptr)
{
e.config1 = pExtDesc->FrontendMsrValue;
if (event_select_reg.fields.event_select == getOCREventNr(0, i).first && event_select_reg.fields.umask == getOCREventNr(0, i).second)
e.config1 = pExtDesc->OffcoreResponseMsrValue[0];
if (event_select_reg.fields.event_select == getOCREventNr(1, i).first && event_select_reg.fields.umask == getOCREventNr(1, i).second)
e.config1 = pExtDesc->OffcoreResponseMsrValue[1];

if (event_select_reg.fields.event_select == LOAD_LATENCY_EVTNR && event_select_reg.fields.umask == LOAD_LATENCY_UMASK)
{
e.config1 = pExtDesc->LoadLatencyMsrValue;
}
if (event_select_reg.fields.event_select == FRONTEND_EVTNR && event_select_reg.fields.umask == FRONTEND_UMASK)
{
e.config1 = pExtDesc->FrontendMsrValue;
}
}

if (programPerfEvent(e, PERF_GEN_EVENT_0_POS + j, std::string("generic event #") + std::to_string(i)) == false)
Expand Down Expand Up @@ -3113,12 +3119,12 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
std::make_pair(perfRetiringPath, PERF_TOPDOWN_RETIRING_POS)};
int readPos = core_fixed_counter_num_used + core_gen_counter_num_used;
leader_counter = -1;
for (auto event : topDownEvents)
for (const auto & event : topDownEvents)
{
uint64 eventSel = 0, umask = 0;
const auto eventDesc = readSysFS(event.first);
const auto tokens = split(eventDesc, ',');
for (auto token : tokens)
for (const auto & token : tokens)
{
if (match(token, "event=", &eventSel))
{
Expand Down Expand Up @@ -4605,9 +4611,9 @@ PCM::ErrorCode PCM::program(const RawPMUConfigs& curPMUConfigs_, const bool sile
else
{
fixedReg.value = 0;
for (auto cfg : corePMUConfig.fixed)
for (const auto & cfg : corePMUConfig.fixed)
{
fixedReg.value |= cfg.first[0];
fixedReg.value |= uint64(cfg.first[0]);
}
conf.fixedCfg = &fixedReg;
}
Expand Down Expand Up @@ -5428,6 +5434,7 @@ void print_mcfg(const char * path)
if(read_bytes == 0)
{
std::cerr << "PCM Error: Cannot read " << path << "\n";
::close(mcfg_handle);
throw std::exception();
}

Expand All @@ -5442,6 +5449,7 @@ void print_mcfg(const char * path)
if(read_bytes == 0)
{
std::cerr << "PCM Error: Cannot read " << path << " (2)\n";
::close(mcfg_handle);
throw std::exception();
}
std::cout << "Segment " << std::dec << i << " ";
Expand Down Expand Up @@ -5640,16 +5648,21 @@ bool PCM::useLinuxPerfForUncore() const
bool secureBoot = isSecureBoot();
#ifdef PCM_USE_PERF
const auto imcIDs = enumeratePerfPMUs("imc", 100);
std::cout << "INFO: Linux perf interface to program uncore PMUs is " << (imcIDs.empty()?"NOT ":"") << "present\n";
std::cerr << "INFO: Linux perf interface to program uncore PMUs is " << (imcIDs.empty()?"NOT ":"") << "present\n";
if (imcIDs.empty())
{
use = 0;
return 1 == use;
}
const char * perf_env = std::getenv("PCM_USE_UNCORE_PERF");
if (perf_env != NULL && std::string(perf_env) == std::string("1"))
{
std::cout << "INFO: using Linux perf interface to program uncore PMUs because env variable PCM_USE_UNCORE_PERF=1\n";
std::cerr << "INFO: using Linux perf interface to program uncore PMUs because env variable PCM_USE_UNCORE_PERF=1\n";
use = 1;
}
if (secureBoot)
{
std::cout << "INFO: Secure Boot detected. Using Linux perf for uncore PMU programming.\n";
std::cerr << "INFO: Secure Boot detected. Using Linux perf for uncore PMU programming.\n";
use = 1;
}
else
Expand Down Expand Up @@ -7209,7 +7222,8 @@ void ServerPCICFGUncore::initMemTest(ServerPCICFGUncore::MemTestParam & param)
std::cerr << "ERROR: mmap failed\n";
return;
}
unsigned long long maxNode = (unsigned long long)(readMaxFromSysFS("/sys/devices/system/node/online") + 1);
const int64 onlineNodes = (int64)readMaxFromSysFS("/sys/devices/system/node/online");
unsigned long long maxNode = (unsigned long long)(onlineNodes + 1);
if (maxNode == 0)
{
std::cerr << "ERROR: max node is 0 \n";
Expand Down Expand Up @@ -7682,6 +7696,7 @@ void PCM::programCbo(const uint64 * events, const uint32 opCode, const uint32 nc

for(uint32 cbo = 0; cbo < getMaxNumOfCBoxes(); ++cbo)
{
assert(cbo < cboPMUs[i].size());
cboPMUs[i][cbo].initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN);

if (ICX != cpu_model && SNOWRIDGE != cpu_model)
Expand Down Expand Up @@ -7984,7 +7999,7 @@ void PCM::setupCustomCoreEventsForNuma(PCM::ExtendedCustomCoreEventDescription&
conf.OffcoreResponseMsrValue[1] = 0x3FC0008FFF | (1 << 27) | (1 << 28) | (1 << 29);
break;
case PCM::ICX:
std::cout << "INFO: Monitored accesses include demand + L2 cache prefetcher, code read and RFO.\n";
std::cerr << "INFO: Monitored accesses include demand + L2 cache prefetcher, code read and RFO.\n";
// OCR.READS_TO_CORE.LOCAL_DRAM
conf.OffcoreResponseMsrValue[0] = 0x0104000477;
// OCR.READS_TO_CORE.REMOTE_DRAM
Expand Down
8 changes: 6 additions & 2 deletions cpucounters.h
Original file line number Diff line number Diff line change
Expand Up @@ -2934,6 +2934,7 @@ class CoreCounterState : public BasicCounterState
CoreCounterState( const CoreCounterState& ) = default;
CoreCounterState( CoreCounterState&& ) = default;
CoreCounterState & operator= ( CoreCounterState&& ) = default;
virtual ~ CoreCounterState() {}
};

//! \brief Socket-wide counter state
Expand Down Expand Up @@ -2972,6 +2973,8 @@ class SocketCounterState : public BasicCounterState, public UncoreCounterState
UncoreCounterState::operator = ( std::move(ucs) );
return *this;
}

virtual ~ SocketCounterState() {}
};

//! \brief System-wide counter state
Expand Down Expand Up @@ -3028,6 +3031,7 @@ class SystemCounterState : public SocketCounterState

return *this;
}
virtual ~ SystemCounterState() {}
};

/*! \brief Reads the counter state of the system
Expand Down Expand Up @@ -3977,11 +3981,11 @@ inline uint64 getNumberOfEvents(const CounterType & before, const CounterType &
template <class CounterStateType>
inline double getLLCReadMissLatency(const CounterStateType & before, const CounterStateType & after)
{
if (PCM::getInstance()->LLCReadMissLatencyMetricsAvailable() == false) return -1.;
auto * m = PCM::getInstance();
if (m->LLCReadMissLatencyMetricsAvailable() == false) return -1.;
const double occupancy = double(after.TOROccupancyIAMiss) - double(before.TOROccupancyIAMiss);
const double inserts = double(after.TORInsertsIAMiss) - double(before.TORInsertsIAMiss);
const double unc_clocks = double(after.UncClocks) - double(before.UncClocks);
auto * m = PCM::getInstance();
const double seconds = double(getInvariantTSC(before, after)) / double(m->getNumOnlineCores()/m->getNumSockets()) / double(m->getNominalFrequency());
return 1e9*seconds*(occupancy/inserts)/unc_clocks;
}
Expand Down
10 changes: 6 additions & 4 deletions daemon/daemon/daemon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ namespace PCMDaemon {
//Put the poll interval in shared memory so that the client knows
sharedPCMState_->pollMs = pollIntervalMs_;

updatePCMState(&systemStatesBefore_, &socketStatesBefore_, &coreStatesBefore_);
collectionTimeAfter_ = 0;

updatePCMState(&systemStatesBefore_, &socketStatesBefore_, &coreStatesBefore_, collectionTimeBefore_);
systemStatesForQPIBefore_ = SystemCounterState(systemStatesBefore_);

serverUncoreCounterStatesBefore_ = new ServerUncoreCounterState[pcmInstance_->getNumSockets()];
Expand Down Expand Up @@ -384,7 +386,7 @@ namespace PCMDaemon {

sharedPCMState_->lastUpdateTscBegin = RDTSC();

updatePCMState(&systemStatesAfter_, &socketStatesAfter_, &coreStatesAfter_);
updatePCMState(&systemStatesAfter_, &socketStatesAfter_, &coreStatesAfter_, collectionTimeAfter_);

getPCMSystem();

Expand Down Expand Up @@ -421,7 +423,7 @@ namespace PCMDaemon {
std::swap(collectionTimeBefore_, collectionTimeAfter_);
}

void Daemon::updatePCMState(SystemCounterState* systemStates, std::vector<SocketCounterState>* socketStates, std::vector<CoreCounterState>* coreStates)
void Daemon::updatePCMState(SystemCounterState* systemStates, std::vector<SocketCounterState>* socketStates, std::vector<CoreCounterState>* coreStates, uint64 & t)
{
if(subscribers_.find("core") != subscribers_.end())
{
Expand All @@ -434,7 +436,7 @@ namespace PCMDaemon {
pcmInstance_->getUncoreCounterStates(*systemStates, *socketStates);
}
}
collectionTimeAfter_ = pcmInstance_->getTickCount();
t = pcmInstance_->getTickCount();
}

void Daemon::swapPCMBeforeAfterState()
Expand Down
4 changes: 2 additions & 2 deletions daemon/daemon/daemon.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ namespace PCMDaemon {
void setupSharedMemory();
gid_t resolveGroupName(const std::string& groupName);
void getPCMCounters();
void updatePCMState(SystemCounterState* systemStates, std::vector<SocketCounterState>* socketStates, std::vector<CoreCounterState>* coreStates);
void updatePCMState(SystemCounterState* systemStates, std::vector<SocketCounterState>* socketStates, std::vector<CoreCounterState>* coreStates, uint64 & t);
void swapPCMBeforeAfterState();
void getPCMSystem();
void getPCMCore();
Expand All @@ -64,7 +64,7 @@ namespace PCMDaemon {
std::vector<std::string> allowedSubscribers_;

//Data for core, socket and system state
uint64 collectionTimeBefore_, collectionTimeAfter_;
uint64 collectionTimeBefore_{0ULL}, collectionTimeAfter_{0ULL};
std::vector<CoreCounterState> coreStatesBefore_, coreStatesAfter_;
std::vector<SocketCounterState> socketStatesBefore_, socketStatesAfter_;
SystemCounterState systemStatesBefore_, systemStatesForQPIBefore_, systemStatesAfter_;
Expand Down
Loading

0 comments on commit 315ae84

Please sign in to comment.