Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

profile #4

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion src/plugins/intel_cpu/src/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include <utility>
#include <vector>

#include "utils/my_profiler.hpp"

#include "edge.h"
#include "graph_dumper.h"
#include "graph_optimizer.h"
Expand Down Expand Up @@ -1124,12 +1126,17 @@ void Graph::PullOutputData(std::unordered_map<std::size_t, ov::SoPtr<ITensor>>&
}

void Graph::InferStatic(SyncInferRequest* request) {
auto _prof0 = MY_PROFILE("::InferStatic#" + std::to_string(infer_count));
for (const auto& node : m_executableGraphNodes) {
VERBOSE(node, getConfig().debugCaps.verbose);
PERF(node, getConfig().collectPerfCounters);

if (request)
request->throw_if_canceled();

auto _prof = MY_PROFILE_ARGS(node->getTypeStr(),
{{"Name", node->getName()}, {"Impl", node->getPrimitiveDescriptorType()}});

ExecuteNode(node, m_stream);
}
}
Expand Down Expand Up @@ -1458,7 +1465,8 @@ void Graph::Infer(SyncInferRequest* request) {
OPENVINO_ASSERT(IsReady(), "Wrong state of the ov::intel_cpu::Graph. Topology is not ready: ", static_cast<int>(status));
}

if (infer_count != -1) infer_count++;
// if (infer_count != -1) infer_count++;
infer_count++;
}

void Graph::SortTopologically() {
Expand Down
9 changes: 9 additions & 0 deletions src/plugins/intel_cpu/src/nodes/subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,15 @@ Subgraph::Subgraph(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr
OPENVINO_ASSERT(tmp_snippet, "Attempt to create Subgraph node from an invalid op type");
subgraph_attrs->snippet = tmp_snippet->clone();
subgraph_attrs->bodyHash = getBodyHash(tmp_snippet);
//
ov::pass::Manager mgr;
auto body = tmp_snippet->body_ptr();
auto name = body->get_friendly_name();
std::string xml = name + ".xml";
std::string bin = name + ".bin";
mgr.register_pass<ov::pass::Serialize>(xml, bin);
mgr.run_passes(body);
//

#if defined(OPENVINO_ARCH_ARM64)
subgraph_attrs->snippet->set_generator(std::make_shared<aarch64::CPUGenerator>(host_isa));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -929,6 +929,7 @@ void Transformations::MainSnippets(void) {

#if defined(OPENVINO_ARCH_X86_64)
auto is_supported_matmul = [this](const std::shared_ptr<const ov::Node>& n) {
// return true;
const auto matmul = ov::as_type_ptr<const ov::op::v0::MatMul>(n);
if (!matmul || matmul->is_dynamic())
return false;
Expand Down Expand Up @@ -1113,9 +1114,21 @@ void Transformations::Snippets(void) {
if (!useSnippets)
return;

ov::pass::Manager mgr0;
std::string xml0 = "before_tokenization.xml";
std::string bin0 = "before_tokenization.bin";
mgr0.register_pass<ov::pass::Serialize>(xml0, bin0);
mgr0.run_passes(model);

CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, Snippets);
MainSnippets();
PostSnippets();

ov::pass::Manager mgr1;
std::string xml1 = "after_tokenization.xml";
std::string bin1 = "after_tokenization.bin";
mgr1.register_pass<ov::pass::Serialize>(xml1, bin1);
mgr1.run_passes(model);
}

} // namespace intel_cpu
Expand Down
141 changes: 141 additions & 0 deletions src/plugins/intel_cpu/src/utils/my_profiler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <atomic>
#include <iostream>
#include <mutex>
#include <sstream>
#include <thread>
#include <vector>

#include "my_profiler.hpp"

#ifdef WIN32
#include <intrin.h>
#pragma intrinsic(__rdtsc)
#else
#include <x86intrin.h>
#endif

struct dump_items {
std::string name; // The name of the event, as displayed in Trace Viewer
std::string cat; // The event categories
std::string ph = "X"; // The event type, 'B'/'E' OR 'X'
std::string pid = "0"; // The process ID for the process
std::string tid; // The thread ID for the thread that output this event.
uint64_t ts1; // The tracing clock timestamp of the event, [microsecond]
uint64_t ts2; // Duration = ts2 - ts1.
std::string tts; // Optional. The thread clock timestamp of the event
std::vector<std::pair<std::string, std::string>> vecArgs;
};

static inline std::string get_thread_id() {
std::stringstream ss;
ss << std::this_thread::get_id();
return ss.str();
}

static uint64_t rdtsc_calibrate(int seconds = 1) {
uint64_t start_ticks = __rdtsc();
std::this_thread::sleep_for(std::chrono::seconds(seconds));
return (__rdtsc() - start_ticks) / seconds;
}

class ProfilerManager {
protected:
std::vector<dump_items> _vecItems;
std::atomic<uint64_t> tsc_ticks_per_second{0};
std::atomic<uint64_t> tsc_ticks_base{0};
std::mutex _mutex;

public:
ProfilerManager() {
if (tsc_ticks_per_second == 0) {
uint64_t expected = 0;
auto tps = rdtsc_calibrate();
tsc_ticks_per_second.compare_exchange_strong(expected, tps);
std::cout << "=== ProfilerManager: tsc_ticks_per_second = " << tsc_ticks_per_second << std::endl;
tsc_ticks_base.compare_exchange_strong(expected, __rdtsc());
std::cout << "=== ProfilerManager: tsc_ticks_base = " << tsc_ticks_base << std::endl;
}
}

ProfilerManager(ProfilerManager& other) = delete;
void operator=(const ProfilerManager&) = delete;
~ProfilerManager() {
// Save tracing log to json file.
printf("ProfilerManager unconstruct...............\n");
save_to_json();
}

void add(const dump_items& val) {
std::lock_guard<std::mutex> lk(_mutex);
_vecItems.emplace_back(val);
}

private:
std::string tsc_to_nsec(uint64_t tsc_ticks) {
double val = (tsc_ticks - tsc_ticks_base) * 1000000.0 / tsc_ticks_per_second;
return std::to_string(val);
}

std::string tsc_to_nsec(uint64_t start, uint64_t end) {
double val = (end - start) * 1000000.0 / tsc_ticks_per_second;
return std::to_string(val);
}

void save_to_json() {
std::string out_fn = std::string("profile_cpu_") + std::to_string(tsc_ticks_base) + ".json";
FILE* pf = fopen(out_fn.c_str(), "wb");
if (nullptr == pf) {
printf("Can't fopen: %s\n", out_fn.c_str());
return;
}
printf("Save profile log: %s\n", out_fn.c_str());

// Headers
fprintf(pf, "{\n\"schemaVersion\": 1,\n\"traceEvents\":[\n");

for (size_t i = 0; i < _vecItems.size(); i++) {
auto& itm = _vecItems[i];
// Write 1 event
fprintf(pf, "{");
fprintf(pf, "\"name\":\"%s\",", itm.name.c_str());
fprintf(pf, "\"cat\":\"%s\",", itm.cat.c_str());
fprintf(pf, "\"ph\":\"%s\",", itm.ph.c_str());
fprintf(pf, "\"pid\":\"%s\",", itm.pid.c_str());
fprintf(pf, "\"tid\":\"%s\",", itm.tid.c_str());
fprintf(pf, "\"ts\":\"%s\",", tsc_to_nsec(itm.ts1).c_str());
fprintf(pf, "\"dur\":\"%s\",", tsc_to_nsec(itm.ts1, itm.ts2).c_str());
fprintf(pf, "\"args\":{");
for (size_t j = 0; j < itm.vecArgs.size(); j++) {
fprintf(pf,
"\"%s\":\"%s\"%s",
itm.vecArgs[j].first.c_str(),
itm.vecArgs[j].second.c_str(),
j + 1 == itm.vecArgs.size() ? "" : ",");
}
fprintf(pf, "}}%s\n", i == _vecItems.size() - 1 ? "" : ",");
}

fprintf(pf, "]\n}\n");
fclose(pf);
}
};
static ProfilerManager g_profileManage;
MyProfile::MyProfile(const std::string& name, const std::vector<std::pair<std::string, std::string>>& args) {
_name = name;
_args = args;
_ts1 = __rdtsc();
}

MyProfile::~MyProfile() {
dump_items itm;
itm.ts2 = __rdtsc();
itm.ts1 = _ts1;
itm.name = _name;
itm.tid = get_thread_id();
itm.cat = "PERF";
itm.vecArgs = _args;
g_profileManage.add(itm);
}
38 changes: 38 additions & 0 deletions src/plugins/intel_cpu/src/utils/my_profiler.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <string>
#include <vector>

class MyProfile {
public:
MyProfile() = delete;
// MyProfile(MyProfile&)=delete;
MyProfile(const std::string& name,
const std::vector<std::pair<std::string, std::string>>& args =
std::vector<std::pair<std::string, std::string>>());
~MyProfile();

private:
std::string _name;
uint64_t _ts1;
std::vector<std::pair<std::string, std::string>> _args;
};

#define MY_PROFILE(NAME) MyProfile(NAME + std::string(":") + std::to_string(__LINE__))
#define MY_PROFILE_ARGS(NAME, ...) MyProfile(NAME + std::string(":") + std::to_string(__LINE__), __VA_ARGS__)

// Example:
// ==========================================
// auto p = MyProfile("fun_name")
// Or
// {
// auto p = MyProfile("fun_name")
// func()
// }
// Or
// {
// auto p2 = MyProfile("fun_name", {{"arg1", "sleep 30 ms"}});
// func()
// }
Loading