From e96ee8491f41057ae0b2aa0cd6bbbd8675c3cdf4 Mon Sep 17 00:00:00 2001 From: mei-feiyao <1332490378@qq.com> Date: Thu, 14 Aug 2025 15:21:09 +0800 Subject: [PATCH] test --- msmonitor/dynolog_npu/CMakeLists.txt | 3 - msmonitor/dynolog_npu/dynolog/src/Main.cpp | 12 +- .../dynolog/src/PrometheusLogger.cpp | 102 ++++++++++++ .../dynolog/src/PrometheusLogger.h | 92 +++++++++++ .../dynolog/src/tracing/IPCMonitor.cpp | 38 +++-- .../dynolog/src/tracing/IPCMonitor.h | 151 +++++++++++++++++- msmonitor/dynolog_npu/scripts/build.sh | 53 ++++++ .../plugin/ipc_monitor/DynoLogNpuMonitor.cpp | 1 + .../ipc_monitor/metric/MetricMarkProcess.cpp | 75 ++++----- .../ipc_monitor/metric/MetricMarkProcess.h | 31 ++-- .../plugin/ipc_monitor/mspti_monitor/mspti.h | 2 + msmonitor/plugin/stub/mspti.cpp | 10 ++ msmonitor/scripts/build.sh | 2 +- 13 files changed, 500 insertions(+), 72 deletions(-) create mode 100644 msmonitor/dynolog_npu/dynolog/src/PrometheusLogger.cpp create mode 100644 msmonitor/dynolog_npu/dynolog/src/PrometheusLogger.h create mode 100644 msmonitor/dynolog_npu/scripts/build.sh diff --git a/msmonitor/dynolog_npu/CMakeLists.txt b/msmonitor/dynolog_npu/CMakeLists.txt index 2aeddf3d66..1d43c3fed0 100644 --- a/msmonitor/dynolog_npu/CMakeLists.txt +++ b/msmonitor/dynolog_npu/CMakeLists.txt @@ -11,9 +11,6 @@ OFF) option(USE_PROMETHEUS "Enable logging to prometheus, this requires prometheus-cpp to be installed on the system" OFF) -option(USE_TENSORBOARD "Enable logging to tensorboard, this requires -protobuf to be installed on the system" -ON) if(USE_PROMETHEUS) find_package(prometheus-cpp CONFIG REQUIRED) diff --git a/msmonitor/dynolog_npu/dynolog/src/Main.cpp b/msmonitor/dynolog_npu/dynolog/src/Main.cpp index a24bb802ab..048e6a1fe3 100644 --- a/msmonitor/dynolog_npu/dynolog/src/Main.cpp +++ b/msmonitor/dynolog_npu/dynolog/src/Main.cpp @@ -33,6 +33,11 @@ #include "dynolog/src/DynologTensorBoardLogger.h" #endif +DEFINE_string( + export_path, + "", + "Path to export metrics. If empty, metrics will not be exported to file."); + using namespace dynolog; using json = nlohmann::json; namespace hbt = facebook::hbt; @@ -66,6 +71,11 @@ DEFINE_bool( false, "Enabled GPU monitorng, currently supports NVIDIA GPUs."); DEFINE_bool(enable_perf_monitor, false, "Enable heartbeat perf monitoring."); +DEFINE_int32( + export_cnt, + 20, + "Number of log entries to export in each log file." +); std::unique_ptr getLogger(const std::string& scribe_category = "") { @@ -190,7 +200,7 @@ int main(int argc, char** argv) if (FLAGS_enable_ipc_monitor) { LOG(INFO) << "Starting IPC Monitor"; - ipcmon = std::make_unique(); + ipcmon = std::make_unique(FLAGS_export_path, FLAGS_export_cnt); ipcmon->setLogger(std::move(getLogger())); ipcmon_thread = std::make_unique([&ipcmon]() { ipcmon->loop(); }); diff --git a/msmonitor/dynolog_npu/dynolog/src/PrometheusLogger.cpp b/msmonitor/dynolog_npu/dynolog/src/PrometheusLogger.cpp new file mode 100644 index 0000000000..2a8c3f9402 --- /dev/null +++ b/msmonitor/dynolog_npu/dynolog/src/PrometheusLogger.cpp @@ -0,0 +1,102 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +#include "dynolog/src/PrometheusLogger.h" +#include "dynolog/src/Metrics.h" +#include "hbt/src/common/System.h" + +#include +#include + +#ifdef USE_PROMETHEUS +using namespace prometheus; + +DEFINE_int32( + prometheus_port, + 8080, + "Port to setup HTTP server for Prometheus to scrape."); + +DEFINE_string( + prometheus_path, + "metrics", + "Path to setup HTTP server for Prometheus to scrape." + ); + +namespace dynolog { + +const std::string DATA_NAME = "mspti_marker_range_const_time"; + +inline auto& buildGaugeFromMetric(const std::string& name, Registry& registry) { + return BuildGauge().Name(name).Register(registry); +} + +PrometheusManager::PrometheusManager() + : exposer_(fmt::format("127.0.0.1:{}", FLAGS_prometheus_port)) { + LOG(INFO) << "Initialized prometheus HTTP server on port = " + << FLAGS_prometheus_port << ", path = " << FLAGS_prometheus_path; + + // setup registry + registry_ = std::make_shared(); + + // setup registry + exposer_.RegisterCollectable(registry_, "/" + FLAGS_prometheus_path); +} + +void PrometheusManager::log(const struct MarkMetric& metric) { + if (metric.sourceKind == "host") { + auto& g = + buildGaugeFromMetric(DATA_NAME, *registry_).Add({{"name", metric.name}, + {"source_kind", "host"}, + {"process_id", std::to_string(metric.id1)}, + {"thread_id", std::to_string(metric.id2)}, + {"timestamp", std::to_string(metric.timestamp)}}); + g.Set(metric.duration); + } else { + auto& g = + buildGaugeFromMetric(DATA_NAME, *registry_).Add({{"name", metric.name}, + {"source_kind", "device"}, + {"device_id", std::to_string(metric.id1)}, + {"stream_id", std::to_string(metric.id2)}, + {"timestamp", std::to_string(metric.timestamp)}}); + g.Set(metric.duration); + } + +} + +static std::shared_ptr singleton_() { + static std::shared_ptr manager_ = + std::make_shared(); + return manager_; +} + +// static +PrometheusManager::LoggingGuard PrometheusManager::singleton() { + auto s = singleton_(); + return LoggingGuard{.manager = s, .lock_guard = s->lock()}; +} + +void PrometheusLogger::logImpl(const std::string& key, uint64_t val) { + kvs_[key] = val; +} + +void PrometheusLogger::logImpl(const std::string& key, const std::string& val) { + strKvs_[key] = val; +} + +void PrometheusLogger::finalize() { + auto logging_guard = PrometheusManager::singleton(); + auto prom = logging_guard.manager; + struct MarkMetric metric; + metric.name = strKvs_["name"]; + metric.sourceKind = strKvs_["sourceKind"]; + metric.duration = kvs_["duration"]; + metric.timestamp = kvs_["timestamp"]; + metric.id1 = kvs_["id1"]; + metric.id2 = kvs_["id2"]; + prom->log(metric); +} + +} // namespace dynolog +#endif // USE_PROMETHEUS diff --git a/msmonitor/dynolog_npu/dynolog/src/PrometheusLogger.h b/msmonitor/dynolog_npu/dynolog/src/PrometheusLogger.h new file mode 100644 index 0000000000..40abacd360 --- /dev/null +++ b/msmonitor/dynolog_npu/dynolog/src/PrometheusLogger.h @@ -0,0 +1,92 @@ +#pragma once +#include "dynolog/src/Logger.h" + +#include "prometheus/exposer.h" +#include "prometheus/registry.h" +#include "prometheus/gauge.h" +#include "prometheus/counter.h" + +#include +#include +#include + +DECLARE_int32(prometheus_port); +DECLARE_string(prometheus_path); + +namespace dynolog { + +struct MarkMetric { + std::string name; + std::string sourceKind; + uint64_t duration; + uint64_t timestamp; + uint64_t id1; + uint64_t id2; +}; + +class PrometheusManager { + public: + struct LoggingGuard { + std::shared_ptr manager; + std::lock_guard lock_guard; + }; + + PrometheusManager(); + + void log(const std::string& key, double val) {} + + void log(const struct MarkMetric& metric); + + static LoggingGuard singleton(); + + private: + using GaugeFamily = prometheus::Family; + std::lock_guard lock() { + return std::lock_guard(mutex_); + } + + std::mutex mutex_; + prometheus::Exposer exposer_; + std::shared_ptr registry_; + + std::unordered_map gauges_; + + std::unordered_map dynamic_metrics_; + + friend class PrometheusLoggerTest_ExporterTest_Test; + +}; + +class PrometheusLogger : public Logger { + public: + void setTimestamp(Timestamp /*ts*/) override {} + void logInt(const std::string& key, int64_t val) override { + logImpl(key, static_cast(val)); + } + + void logUint(const std::string& key, uint64_t val) override { + logImpl(key, val); + } + + void logFloat(const std::string& key, float val) override { + logImpl(key, static_cast(val)); + } + + void logStr(const std::string& key, const std::string& val) override { + logImpl(key, val); + } + + void finalize() override; + + private: + void logImpl(const std::string& key, uint64_t val); + void logImpl(const std::string& key, const std::string& val); + std::unordered_map kvs_; + std::unordered_map strKvs_; + + friend class PrometheusLoggerTest_BasicTest_Test; + friend class PrometheusLoggerTest_ExporterTest_Test; +}; + + +} // namespace dynolog diff --git a/msmonitor/dynolog_npu/dynolog/src/tracing/IPCMonitor.cpp b/msmonitor/dynolog_npu/dynolog/src/tracing/IPCMonitor.cpp index 85a71d957f..b1f6e91816 100644 --- a/msmonitor/dynolog_npu/dynolog/src/tracing/IPCMonitor.cpp +++ b/msmonitor/dynolog_npu/dynolog/src/tracing/IPCMonitor.cpp @@ -26,13 +26,30 @@ const std::string kLibkinetoRequest = "req"; const std::string kLibkinetoContext = "ctxt"; const std::string kLibkinetoData = "data"; -IPCMonitor::IPCMonitor(const std::string& ipc_fabric_name) +IPCMonitor::IPCMonitor(const std::string &exportPath, int32_t exportCnt, const std::string& ipc_fabric_name) { ipc_manager_ = FabricManager::factory(ipc_fabric_name); data_ipc_manager_ = FabricManager::factory(ipc_fabric_name + "_data"); // below ensures singleton exists LOG(INFO) << "Kineto config manager : active processes = " << LibkinetoConfigManager::getInstance()->processCount("0"); + if (!exportPath.empty()) { + exportPath_ = exportPath; + if (exportCnt <= 0) { + LOG(WARNING) << "Invalid export count, using default value of 10"; + exportCnt = 20; + } + LOG(INFO) << "Exporting metrics to file : " << exportPath << ", export count: " << exportCnt; + loggerGenerator_ = std::make_unique(exportPath, exportCnt); + loggerGenerator_->start(); + } +} + +IPCMonitor::~IPCMonitor() +{ + if (loggerGenerator_) { + loggerGenerator_->stop(); + } } void IPCMonitor::loop() @@ -90,18 +107,21 @@ void tracing::IPCMonitor::setLogger(std::unique_ptr logger) void IPCMonitor::LogData(const nlohmann::json& result) { + if (!exportPath_.empty()) { + return; // No logging if exportPath is set + } auto timestamp = result["timestamp"].get(); logger_->logUint("timestamp", timestamp); auto duration = result["duration"].get(); logger_->logUint("duration", duration); - auto deviceId = result["deviceId"].get(); - logger_->logStr("deviceId", std::to_string(deviceId)); - auto kind = result["kind"].get(); - logger_->logStr("kind", kind); - if (result.contains("domain") && result["domain"].is_string()) { - auto domain = result["domain"].get(); - logger_->logStr("domain", domain); - } + auto id1 = result["id1"].get(); + logger_->logUint("id1", id1); + auto id2 = result["id2"].get(); + logger_->logUint("id2", id2); + auto name = result["name"].get(); + logger_->logStr("name", name); + auto sourceKind = result["sourceKind"].get(); + logger_->logStr("sourceKind", sourceKind); logger_->finalize(); } diff --git a/msmonitor/dynolog_npu/dynolog/src/tracing/IPCMonitor.h b/msmonitor/dynolog_npu/dynolog/src/tracing/IPCMonitor.h index cbc59fd2bb..5a7accad05 100644 --- a/msmonitor/dynolog_npu/dynolog/src/tracing/IPCMonitor.h +++ b/msmonitor/dynolog_npu/dynolog/src/tracing/IPCMonitor.h @@ -1,11 +1,18 @@ // Copyright (c) Meta Platforms, Inc. and affiliates. // -// This source code is licensed under the MIT license found in the -// LICENSE file in the root directory of this source tree. +// This src code is licensed under the MIT license found in the +// LICENSE file in the root directory of this src tree. #pragma once #include +#include +#include +#include +#include +#include +#include +#include // Use glog for FabricManager.h #define USE_GOOGLE_LOG @@ -16,11 +23,143 @@ namespace dynolog { namespace tracing { +class LoggerGenerator { +public: + LoggerGenerator(const std::string& exportFilePath, + const int &logcnt) : logFilePath_(exportFilePath), running_(false), logCnt_(logcnt) {} + + void start() { + running_ = true; + std::thread([this]() { + int cnt = 0; + while (running_) { + auto starttime = std::chrono::steady_clock::now(); + { + std::lock_guard lock(mutex_); + std::ofstream logFile(logFilePath_, std::ios::trunc); + if (!logFile) { + std::cerr << "Failed to open log file: " << logFilePath_ << std::endl; + continue; + } + generateLog(logFile, cnt); + } + auto endtime = std::chrono::steady_clock::now(); + auto eplashedMs = std::chrono::duration_cast(endtime - starttime).count(); + auto sleepTime = intervalMs_ - eplashedMs; + + if (sleepTime > 0) { + std::this_thread::sleep_for(std::chrono::milliseconds(sleepTime)); + } + cnt++; + } + }).detach(); + } + + void stop() { + running_ = false; + } +private: + std::atomic running_; + std::mutex mutex_; + std::string logFilePath_; + int logCnt_; + const int intervalMs_ = 60000; // 1 minute + + inline int GetDuration(int step) { + int duration = 0; + int ret = step % 20; + switch (ret) { + case 1: + case 2: + case 3: + case 4: + case 5: + duration = 1000; + break; + case 6: + duration = 1500; + break; + case 7: + duration = 2000; + break; + case 8: + duration = 2500; + break; + case 9: + duration = 3000; + break; + case 10: + duration = 3500; + break; + case 11: + case 12: + case 13: + case 14: + case 15: + duration = 4000; + break; + case 16: + duration = 3500; + break; + case 17: + duration = 3000; + break; + case 18: + duration = 2500; + break; + case 19: + duration = 2000; + break; + case 0: + duration = 1500; + break; + } + return duration; + } + + void generateLog(std::ofstream& logFile, const int cnt) { + int start = 1 + cnt * logCnt_; + int end = logCnt_ + cnt * logCnt_; + int64_t timestamp = std::chrono::system_clock::now().time_since_epoch().count() / 1000000; + for (int i = start; i <= end; ++i) { + int duration = GetDuration(i); + logFile << "mct{name=\"" << i + << "\", src=\"ht\", pid=\"" << 100 << "\"} " + << duration << " " << timestamp << "\n"; + logFile << "mct{name=\"" << i + << "\", src=\"de\", did=\"" << 0 << "\"} " + << duration << " " << (timestamp + 1) << "\n"; + logFile << "mct{name=\"" << i + << "\", src=\"ht\", pid=\"" << 101 << "\"} " + << duration << " " << (timestamp + 2) << "\n"; + logFile << "mct{name=\"" << i + << "\", src=\"de\", did=\"" << 1 << "\"} " + << duration << " " << (timestamp + 3) << "\n"; + logFile << "mct{name=\"" << i + << "\", src=\"ht\", pid=\"" << 102 << "\"} " + << duration << " " << (timestamp + 4) << "\n"; + logFile << "mct{name=\"" << i + << "\", src=\"de\", did=\"" << 2 << "\"} " + << duration << " " << (timestamp + 5) << "\n"; + logFile << "mct{name=\"" << i + << "\", src=\"ht\", pid=\"" << 103 << "\"} " + << duration << " " << (timestamp + 6) << "\n"; + logFile << "mct{name=\"" << i + << "\", src=\"de\", did=\"" << 3 << "\"} " + << duration << " " << (timestamp + 7) << "\n"; + timestamp += 20; + } + } +}; + + class IPCMonitor { public: using FabricManager = dynolog::ipcfabric::FabricManager; - IPCMonitor(const std::string& ipc_fabric_name = "dynolog"); - virtual ~IPCMonitor() {} + IPCMonitor(const std::string &exportPath = "", + int32_t exportCnt = 20, + const std::string& ipc_fabric_name = "dynolog"); + virtual ~IPCMonitor(); void loop(); void dataLoop(); @@ -37,8 +176,12 @@ public: std::unique_ptr data_ipc_manager_; std::unique_ptr logger_; + std::unique_ptr loggerGenerator_; + // friend class test_case_name##_##test_name##_Test friend class IPCMonitorTest_LibkinetoRegisterAndOndemandTest_Test; +private: + std::string exportPath_; }; } // namespace tracing diff --git a/msmonitor/dynolog_npu/scripts/build.sh b/msmonitor/dynolog_npu/scripts/build.sh new file mode 100644 index 0000000000..fbf2175e5e --- /dev/null +++ b/msmonitor/dynolog_npu/scripts/build.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Please run this script from the root level of the project. +# ./scripts/build.sh + +set -eux -o pipefail +BUILD_PROMETHEUS="${BUILD_PROMETHEUS:-0}" +USE_PROMETHEUS="OFF" + +# Check dependencies +cmake --version || echo "Please install cmake for your platform using dnf/apt-get etc." +ninja --version || echo "Please install ninja for your platform using dnf/apt-get etc." +rustc --version || echo "Please install Rust and Cargo - see https://www.rust-lang.org/tools/install" +cargo --version || echo "Please install Rust and Cargo - see https://www.rust-lang.org/tools/install" + +## Build Prometheus if enabled +if [ "${BUILD_PROMETHEUS}" -eq 1 ] +then + mkdir -p ./third_party/prometheus-cpp/_build + pushd ./third_party/prometheus-cpp/ + + git submodule init + git submodule update + + cd ./_build + cmake .. -DBUILD_SHARED_LIBS=OFF -DENABLE_PUSH=OFF -DENABLE_COMPRESSION=OFF \ + -DENABLE_TESTING=OFF + cmake --build . --parallel 4 + cmake --install . + + USE_PROMETHEUS="ON" + popd +fi + +## Build dynolog +echo "Running cmake" +mkdir -p build; cd build; + +# note we can build without ninja if not available on this system +cmake "-DUSE_PROMETHEUS=${USE_PROMETHEUS}" \ + -DCMAKE_BUILD_TYPE=Release -G Ninja "$@" .. +cmake --build . + +mkdir -p bin +ln -sf "$PWD/dynolog/src/dynolog" bin/dynolog +ln -sf "$PWD/release/dyno" bin/dyno + +echo "Binary files =" +echo "$PWD/dynolog/src/dynolog" "$PWD/release/dyno" \ No newline at end of file diff --git a/msmonitor/plugin/ipc_monitor/DynoLogNpuMonitor.cpp b/msmonitor/plugin/ipc_monitor/DynoLogNpuMonitor.cpp index 7774358177..045732f9fa 100644 --- a/msmonitor/plugin/ipc_monitor/DynoLogNpuMonitor.cpp +++ b/msmonitor/plugin/ipc_monitor/DynoLogNpuMonitor.cpp @@ -36,6 +36,7 @@ DynoLogNpuMonitor::DynoLogNpuMonitor() fprintf(stderr, "Failed to create log path, log will not record\n"); } } + msptiActivityDisableMarkerDomain("communication"); // 过滤内置通信打点数据 } bool DynoLogNpuMonitor::Init() diff --git a/msmonitor/plugin/ipc_monitor/metric/MetricMarkProcess.cpp b/msmonitor/plugin/ipc_monitor/metric/MetricMarkProcess.cpp index 2e92ee736b..c25eeed172 100644 --- a/msmonitor/plugin/ipc_monitor/metric/MetricMarkProcess.cpp +++ b/msmonitor/plugin/ipc_monitor/metric/MetricMarkProcess.cpp @@ -31,47 +31,51 @@ constexpr size_t COMPLETE_RANGE_DATA_SIZE = 4; std::string MarkMetric::seriesToJson() { nlohmann::json jsonMsg; - jsonMsg["kind"] = "Marker"; - jsonMsg["deviceId"] = deviceId; - jsonMsg["domain"] = domain; + jsonMsg["name"] = name; + jsonMsg["sourceKind"] = sourceKind; jsonMsg["duration"] = duration; jsonMsg["timestamp"] = timestamp; + jsonMsg["id1"] = id1; + jsonMsg["id2"] = id2; return jsonMsg.dump(); } bool MetricMarkProcess::TransMarkData2Range(const std::vector>& markDatas, - RangeMarkData& rangemarkData) + std::vector& rangemarkData) { if (markDatas.size() != COMPLETE_RANGE_DATA_SIZE) { return false; } - + RangeMarkData hostMarkData; + hostMarkData.sourceKind = "host"; + RangeMarkData deviceMarkData; + deviceMarkData.sourceKind = "device"; for (auto& activityMarker: markDatas) { if (activityMarker->flag == MSPTI_ACTIVITY_FLAG_MARKER_START_WITH_DEVICE) { if (activityMarker->sourceKind == MSPTI_ACTIVITY_SOURCE_KIND_DEVICE) { - rangemarkData.deviceId = activityMarker->objectId.ds.deviceId; - rangemarkData.deviceStart = activityMarker->timestamp; + deviceMarkData.id.ds.deviceId = activityMarker->objectId.ds.deviceId; + deviceMarkData.id.ds.streamId = activityMarker->objectId.ds.streamId; + deviceMarkData.start = activityMarker->timestamp; } else { - rangemarkData.start = activityMarker->timestamp; + deviceMarkData.name = activityMarker->name; + hostMarkData.name = activityMarker->name; + hostMarkData.start = activityMarker->timestamp; + hostMarkData.id.pt.processId = activityMarker->objectId.pt.processId; + hostMarkData.id.pt.threadId = activityMarker->objectId.pt.threadId; } } if (activityMarker->flag == MSPTI_ACTIVITY_FLAG_MARKER_END_WITH_DEVICE) { if (activityMarker->sourceKind == MSPTI_ACTIVITY_SOURCE_KIND_DEVICE) { - rangemarkData.deviceEnd = activityMarker->timestamp; + deviceMarkData.end = activityMarker->timestamp; } else { - rangemarkData.end = activityMarker->timestamp; + hostMarkData.end = activityMarker->timestamp; } } } + rangemarkData.emplace_back(std::move(hostMarkData)); + rangemarkData.emplace_back(std::move(deviceMarkData)); auto markId = markDatas[0]->id; - std::string domainName = "default"; - auto it = domainMsg.find(markId); - if (it != domainMsg.end()) { - domainName = *it->second; - } - rangemarkData.domain = domainName; id2Marker.erase(markId); - domainMsg.erase(markId); return true; } @@ -87,12 +91,6 @@ void MetricMarkProcess::ConsumeMsptiData(msptiActivity *record) { std::unique_lock lock(dataMutex); records.emplace_back(std::move(tmp)); - if (markerData->flag == MSPTI_ACTIVITY_FLAG_MARKER_START_WITH_DEVICE && - markerData->sourceKind == MSPTI_ACTIVITY_SOURCE_KIND_HOST) { - std::string domainStr = markerData->domain; - auto markId = markerData->id; - domainMsg.emplace(markId, std::make_shared(domainStr)); - } } } @@ -109,32 +107,23 @@ std::vector MetricMarkProcess::AggregatedData() } std::vector rangeDatas; for (auto pair = id2Marker.rbegin(); pair != id2Marker.rend(); ++pair) { - auto markId = pair->first; - auto markDatas = pair->second; - RangeMarkData rangeMark{}; - if (TransMarkData2Range(markDatas, rangeMark)) { - rangeDatas.emplace_back(rangeMark); - } + TransMarkData2Range(pair->second, rangeDatas); } - std::unordered_map> domain2RangeData = - groupby(rangeDatas, [](const RangeMarkData& data) -> std::string { - return data.domain + std::to_string(data.deviceId); - }); std::vector ans; - for (auto& pair: domain2RangeData) { + for (auto& data: rangeDatas) { MarkMetric markMetric{}; - auto domainName = pair.first; - auto rangeDatas = pair.second; - markMetric.deviceId = rangeDatas[0].deviceId; - markMetric.domain = domainName; - markMetric.timestamp = getCurrentTimestamp64(); - markMetric.duration = std::accumulate(rangeDatas.begin(), rangeDatas.end(), 0ULL, - [](uint64_t acc, const RangeMarkData& rangeData) { - return acc + rangeData.deviceEnd - rangeData.deviceStart; - }); + markMetric.name = data.name; + markMetric.sourceKind = data.sourceKind; + markMetric.id1 = data.id.ds.deviceId; + markMetric.id2 = data.id.ds.streamId; + markMetric.duration = (data.end - data.start) / 1000000; // Convert to microseconds + markMetric.timestamp = data.start / 1000000; // Convert to microseconds ans.emplace_back(markMetric); } + std::sort(ans.begin(), ans.end(), [](const MarkMetric& a, const MarkMetric& b) { + return a.timestamp < b.timestamp; + }); return ans; } diff --git a/msmonitor/plugin/ipc_monitor/metric/MetricMarkProcess.h b/msmonitor/plugin/ipc_monitor/metric/MetricMarkProcess.h index 3835bda859..d782cd7703 100644 --- a/msmonitor/plugin/ipc_monitor/metric/MetricMarkProcess.h +++ b/msmonitor/plugin/ipc_monitor/metric/MetricMarkProcess.h @@ -27,24 +27,33 @@ namespace metric { struct MarkMetric { std::string name; - std::string domain; + std::string sourceKind; uint64_t duration; uint64_t timestamp; - uint32_t deviceId; + uint32_t id1; + uint32_t id2; public: std::string seriesToJson(); }; -struct RangeMarkData { - std::string domain; - uint64_t duration; - uint64_t start{0}; - uint64_t end{0}; - uint64_t deviceStart{0}; - uint64_t deviceEnd{0}; - uint32_t deviceId; +union objectId { + struct { + uint32_t deviceId; + uint32_t streamId; + } ds; + struct { + uint32_t processId; + uint32_t threadId; + } pt; }; +struct RangeMarkData { + std::string name; + std::string sourceKind; + uint64_t start; + uint64_t end; + union objectId id; +}; class MetricMarkProcess : public MetricProcessBase { public: @@ -55,7 +64,7 @@ public: void Clear() override; private: bool TransMarkData2Range(const std::vector>& markDatas, - RangeMarkData& rangemarkData); + std::vector& rangemarkData); private: std::mutex dataMutex; std::unordered_map> domainMsg; diff --git a/msmonitor/plugin/ipc_monitor/mspti_monitor/mspti.h b/msmonitor/plugin/ipc_monitor/mspti_monitor/mspti.h index 17831575c8..baa8a201dd 100644 --- a/msmonitor/plugin/ipc_monitor/mspti_monitor/mspti.h +++ b/msmonitor/plugin/ipc_monitor/mspti_monitor/mspti.h @@ -286,6 +286,8 @@ msptiResult msptiActivityEnable(msptiActivityKind kind); msptiResult msptiActivityDisable(msptiActivityKind kind); msptiResult msptiActivityGetNextRecord(uint8_t *buffer, size_t validBufferSizeBytes, msptiActivity **record); msptiResult msptiActivityFlushAll(uint32_t flag); +msptiResult msptiActivityEnableMarkerDomain(const char* name); +msptiResult msptiActivityDisableMarkerDomain(const char* name); #ifdef __cplusplus } diff --git a/msmonitor/plugin/stub/mspti.cpp b/msmonitor/plugin/stub/mspti.cpp index db05f20927..e0ea106e93 100644 --- a/msmonitor/plugin/stub/mspti.cpp +++ b/msmonitor/plugin/stub/mspti.cpp @@ -50,3 +50,13 @@ msptiResult msptiActivityFlushAll(uint32_t flag) { return MSPTI_SUCCESS; } + +msptiResult msptiActivityEnableMarkerDomain(const char* name) +{ + return MSPTI_SUCCESS; +} + +msptiResult msptiActivityDisableMarkerDomain(const char* name) +{ + return MSPTI_SUCCESS; +} \ No newline at end of file diff --git a/msmonitor/scripts/build.sh b/msmonitor/scripts/build.sh index d51d219133..c2d4aab86f 100644 --- a/msmonitor/scripts/build.sh +++ b/msmonitor/scripts/build.sh @@ -1,6 +1,6 @@ #!/bin/bash set -e -export BUILD_PROMETHEUS=0 +export BUILD_PROMETHEUS=1 export BUILD_TENSORBOARD=1 export USE_TENSORBOARD="OFF" -- Gitee