From 8817f257a00e7e52085a4c0bfe4c50a1998c867f Mon Sep 17 00:00:00 2001 From: devuser Date: Thu, 1 Aug 2024 04:07:36 +0200 Subject: [PATCH] profile --- src/plugins/intel_cpu/src/graph.cpp | 10 +- src/plugins/intel_cpu/src/nodes/subgraph.cpp | 9 ++ .../transformation_pipeline.cpp | 13 ++ .../intel_cpu/src/utils/my_profiler.cpp | 141 ++++++++++++++++++ .../intel_cpu/src/utils/my_profiler.hpp | 38 +++++ 5 files changed, 210 insertions(+), 1 deletion(-) create mode 100644 src/plugins/intel_cpu/src/utils/my_profiler.cpp create mode 100644 src/plugins/intel_cpu/src/utils/my_profiler.hpp diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 490c15fceb2ec4..8a788aa1b8d825 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -15,6 +15,8 @@ #include #include +#include "utils/my_profiler.hpp" + #include "edge.h" #include "graph_dumper.h" #include "graph_optimizer.h" @@ -1124,12 +1126,17 @@ void Graph::PullOutputData(std::unordered_map>& } void Graph::InferStatic(SyncInferRequest* request) { + auto _prof0 = MY_PROFILE("::InferStatic#" + std::to_string(infer_count)); for (const auto& node : m_executableGraphNodes) { VERBOSE(node, getConfig().debugCaps.verbose); PERF(node, getConfig().collectPerfCounters); if (request) request->throw_if_canceled(); + + auto _prof = MY_PROFILE_ARGS(node->getTypeStr(), + {{"Name", node->getName()}, {"Impl", node->getPrimitiveDescriptorType()}}); + ExecuteNode(node, m_stream); } } @@ -1458,7 +1465,8 @@ void Graph::Infer(SyncInferRequest* request) { OPENVINO_ASSERT(IsReady(), "Wrong state of the ov::intel_cpu::Graph. Topology is not ready: ", static_cast(status)); } - if (infer_count != -1) infer_count++; + // if (infer_count != -1) infer_count++; + infer_count++; } void Graph::SortTopologically() { diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index eac50bf04dbd82..f3bec23f1e483d 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -346,6 +346,15 @@ Subgraph::Subgraph(const std::shared_ptr& op, const GraphContext::CPtr OPENVINO_ASSERT(tmp_snippet, "Attempt to create Subgraph node from an invalid op type"); subgraph_attrs->snippet = tmp_snippet->clone(); subgraph_attrs->bodyHash = getBodyHash(tmp_snippet); + // + ov::pass::Manager mgr; + auto body = tmp_snippet->body_ptr(); + auto name = body->get_friendly_name(); + std::string xml = name + ".xml"; + std::string bin = name + ".bin"; + mgr.register_pass(xml, bin); + mgr.run_passes(body); + // #if defined(OPENVINO_ARCH_ARM64) subgraph_attrs->snippet->set_generator(std::make_shared(host_isa)); diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index a77498cb5f4a4d..b2b490a82c44c9 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -929,6 +929,7 @@ void Transformations::MainSnippets(void) { #if defined(OPENVINO_ARCH_X86_64) auto is_supported_matmul = [this](const std::shared_ptr& n) { + // return true; const auto matmul = ov::as_type_ptr(n); if (!matmul || matmul->is_dynamic()) return false; @@ -1113,9 +1114,21 @@ void Transformations::Snippets(void) { if (!useSnippets) return; + ov::pass::Manager mgr0; + std::string xml0 = "before_tokenization.xml"; + std::string bin0 = "before_tokenization.bin"; + mgr0.register_pass(xml0, bin0); + mgr0.run_passes(model); + CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, Snippets); MainSnippets(); PostSnippets(); + + ov::pass::Manager mgr1; + std::string xml1 = "after_tokenization.xml"; + std::string bin1 = "after_tokenization.bin"; + mgr1.register_pass(xml1, bin1); + mgr1.run_passes(model); } } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/utils/my_profiler.cpp b/src/plugins/intel_cpu/src/utils/my_profiler.cpp new file mode 100644 index 00000000000000..174cc438f9c052 --- /dev/null +++ b/src/plugins/intel_cpu/src/utils/my_profiler.cpp @@ -0,0 +1,141 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include +#include +#include +#include +#include +#include + +#include "my_profiler.hpp" + +#ifdef WIN32 +#include +#pragma intrinsic(__rdtsc) +#else +#include +#endif + +struct dump_items { + std::string name; // The name of the event, as displayed in Trace Viewer + std::string cat; // The event categories + std::string ph = "X"; // The event type, 'B'/'E' OR 'X' + std::string pid = "0"; // The process ID for the process + std::string tid; // The thread ID for the thread that output this event. + uint64_t ts1; // The tracing clock timestamp of the event, [microsecond] + uint64_t ts2; // Duration = ts2 - ts1. + std::string tts; // Optional. The thread clock timestamp of the event + std::vector> vecArgs; +}; + +static inline std::string get_thread_id() { + std::stringstream ss; + ss << std::this_thread::get_id(); + return ss.str(); +} + +static uint64_t rdtsc_calibrate(int seconds = 1) { + uint64_t start_ticks = __rdtsc(); + std::this_thread::sleep_for(std::chrono::seconds(seconds)); + return (__rdtsc() - start_ticks) / seconds; +} + +class ProfilerManager { +protected: + std::vector _vecItems; + std::atomic tsc_ticks_per_second{0}; + std::atomic tsc_ticks_base{0}; + std::mutex _mutex; + +public: + ProfilerManager() { + if (tsc_ticks_per_second == 0) { + uint64_t expected = 0; + auto tps = rdtsc_calibrate(); + tsc_ticks_per_second.compare_exchange_strong(expected, tps); + std::cout << "=== ProfilerManager: tsc_ticks_per_second = " << tsc_ticks_per_second << std::endl; + tsc_ticks_base.compare_exchange_strong(expected, __rdtsc()); + std::cout << "=== ProfilerManager: tsc_ticks_base = " << tsc_ticks_base << std::endl; + } + } + + ProfilerManager(ProfilerManager& other) = delete; + void operator=(const ProfilerManager&) = delete; + ~ProfilerManager() { + // Save tracing log to json file. + printf("ProfilerManager unconstruct...............\n"); + save_to_json(); + } + + void add(const dump_items& val) { + std::lock_guard lk(_mutex); + _vecItems.emplace_back(val); + } + +private: + std::string tsc_to_nsec(uint64_t tsc_ticks) { + double val = (tsc_ticks - tsc_ticks_base) * 1000000.0 / tsc_ticks_per_second; + return std::to_string(val); + } + + std::string tsc_to_nsec(uint64_t start, uint64_t end) { + double val = (end - start) * 1000000.0 / tsc_ticks_per_second; + return std::to_string(val); + } + + void save_to_json() { + std::string out_fn = std::string("profile_cpu_") + std::to_string(tsc_ticks_base) + ".json"; + FILE* pf = fopen(out_fn.c_str(), "wb"); + if (nullptr == pf) { + printf("Can't fopen: %s\n", out_fn.c_str()); + return; + } + printf("Save profile log: %s\n", out_fn.c_str()); + + // Headers + fprintf(pf, "{\n\"schemaVersion\": 1,\n\"traceEvents\":[\n"); + + for (size_t i = 0; i < _vecItems.size(); i++) { + auto& itm = _vecItems[i]; + // Write 1 event + fprintf(pf, "{"); + fprintf(pf, "\"name\":\"%s\",", itm.name.c_str()); + fprintf(pf, "\"cat\":\"%s\",", itm.cat.c_str()); + fprintf(pf, "\"ph\":\"%s\",", itm.ph.c_str()); + fprintf(pf, "\"pid\":\"%s\",", itm.pid.c_str()); + fprintf(pf, "\"tid\":\"%s\",", itm.tid.c_str()); + fprintf(pf, "\"ts\":\"%s\",", tsc_to_nsec(itm.ts1).c_str()); + fprintf(pf, "\"dur\":\"%s\",", tsc_to_nsec(itm.ts1, itm.ts2).c_str()); + fprintf(pf, "\"args\":{"); + for (size_t j = 0; j < itm.vecArgs.size(); j++) { + fprintf(pf, + "\"%s\":\"%s\"%s", + itm.vecArgs[j].first.c_str(), + itm.vecArgs[j].second.c_str(), + j + 1 == itm.vecArgs.size() ? "" : ","); + } + fprintf(pf, "}}%s\n", i == _vecItems.size() - 1 ? "" : ","); + } + + fprintf(pf, "]\n}\n"); + fclose(pf); + } +}; +static ProfilerManager g_profileManage; +MyProfile::MyProfile(const std::string& name, const std::vector>& args) { + _name = name; + _args = args; + _ts1 = __rdtsc(); +} + +MyProfile::~MyProfile() { + dump_items itm; + itm.ts2 = __rdtsc(); + itm.ts1 = _ts1; + itm.name = _name; + itm.tid = get_thread_id(); + itm.cat = "PERF"; + itm.vecArgs = _args; + g_profileManage.add(itm); +} \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/utils/my_profiler.hpp b/src/plugins/intel_cpu/src/utils/my_profiler.hpp new file mode 100644 index 00000000000000..6d311892f07210 --- /dev/null +++ b/src/plugins/intel_cpu/src/utils/my_profiler.hpp @@ -0,0 +1,38 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once +#include +#include + +class MyProfile { +public: + MyProfile() = delete; + // MyProfile(MyProfile&)=delete; + MyProfile(const std::string& name, + const std::vector>& args = + std::vector>()); + ~MyProfile(); + +private: + std::string _name; + uint64_t _ts1; + std::vector> _args; +}; + +#define MY_PROFILE(NAME) MyProfile(NAME + std::string(":") + std::to_string(__LINE__)) +#define MY_PROFILE_ARGS(NAME, ...) MyProfile(NAME + std::string(":") + std::to_string(__LINE__), __VA_ARGS__) + +// Example: +// ========================================== +// auto p = MyProfile("fun_name") +// Or +// { +// auto p = MyProfile("fun_name") +// func() +// } +// Or +// { +// auto p2 = MyProfile("fun_name", {{"arg1", "sleep 30 ms"}}); +// func() +// } \ No newline at end of file