diff --git a/msmonitor/dynolog_npu/dynolog/src/tracing/IPCMonitor.cpp b/msmonitor/dynolog_npu/dynolog/src/tracing/IPCMonitor.cpp index 85a71d957f9054fccff233843cbeb79cab2f6362..b8ccd7aa023c146f12187473179838875287a05d 100644 --- a/msmonitor/dynolog_npu/dynolog/src/tracing/IPCMonitor.cpp +++ b/msmonitor/dynolog_npu/dynolog/src/tracing/IPCMonitor.cpp @@ -102,6 +102,10 @@ void IPCMonitor::LogData(const nlohmann::json& result) auto domain = result["domain"].get(); logger_->logStr("domain", domain); } + if (result.contains("name") && result["name"].is_string()) { + auto name = result["name"].get(); + logger_->logStr("name", name); + } logger_->finalize(); } diff --git a/msmonitor/plugin/ipc_monitor/metric/MetricApiProcess.cpp b/msmonitor/plugin/ipc_monitor/metric/MetricApiProcess.cpp index e39d119a238352a27d1bcffb751f7216014139f1..fe20ab8632fab2dae74ead6fa34ce46eefe1bc50 100644 --- a/msmonitor/plugin/ipc_monitor/metric/MetricApiProcess.cpp +++ b/msmonitor/plugin/ipc_monitor/metric/MetricApiProcess.cpp @@ -28,6 +28,7 @@ std::string ApiMetric::seriesToJson() { nlohmann::json jsonMsg; jsonMsg["kind"] = "API"; + jsonMsg["name"] = apiName; jsonMsg["deviceId"] = -1; jsonMsg["duration"] = duration; jsonMsg["timestamp"] = timestamp; @@ -57,15 +58,19 @@ std::vector MetricApiProcess::AggregatedData() copyRecords = std::move(records); records.clear(); } + if (copyRecords.empty()) { + return {}; + } + std::vector ans; ApiMetric apiMetric{}; - auto ans = std::accumulate(copyRecords.begin(), copyRecords.end(), 0ULL, - [](uint64_t acc, std::shared_ptr api) { - return acc + api->end - api->start; - }); - apiMetric.duration = ans; - apiMetric.deviceId = -1; - apiMetric.timestamp = getCurrentTimestamp64(); - return {apiMetric}; + for (const auto& record : copyRecords) { + apiMetric.apiName = record->name ? record->name : "Unknown"; + apiMetric.duration = record->end - record->start; + apiMetric.timestamp = record->start; + apiMetric.deviceId = -1; + ans.emplace_back(apiMetric); + } + return ans; } void MetricApiProcess::SendProcessMessage() diff --git a/msmonitor/plugin/ipc_monitor/metric/MetricApiProcess.h b/msmonitor/plugin/ipc_monitor/metric/MetricApiProcess.h index c9357e58eec78ebf4b67941c14c16c3747daa46f..d065fea654662c216a11bc22b3100b7268cc9a7a 100644 --- a/msmonitor/plugin/ipc_monitor/metric/MetricApiProcess.h +++ b/msmonitor/plugin/ipc_monitor/metric/MetricApiProcess.h @@ -26,6 +26,7 @@ namespace ipc_monitor { namespace metric { struct ApiMetric { + std::string apiName; uint64_t duration; uint64_t timestamp; uint32_t deviceId; diff --git a/msmonitor/plugin/ipc_monitor/metric/MetricCommunicationProcess.cpp b/msmonitor/plugin/ipc_monitor/metric/MetricCommunicationProcess.cpp index 9126b1dfd13721e4f679c6cebe887dcfa47e125f..b234bddd5e56f9bce6281a9e0bac660786c830c9 100644 --- a/msmonitor/plugin/ipc_monitor/metric/MetricCommunicationProcess.cpp +++ b/msmonitor/plugin/ipc_monitor/metric/MetricCommunicationProcess.cpp @@ -26,6 +26,7 @@ std::string CommunicationMetric::seriesToJson() { nlohmann::json jsonMsg; jsonMsg["kind"] = "Communication"; + jsonMsg["name"] = commOpName; jsonMsg["deviceId"] = deviceId; jsonMsg["duration"] = duration; jsonMsg["timestamp"] = timestamp; @@ -58,21 +59,13 @@ std::vector MetricCommunicationProcess::AggregatedData() if (copyRecords.empty()) { return {}; } - std::unordered_map>> deviceId2CommunicationData = - groupby(copyRecords, [](const std::shared_ptr& data) -> std::uint32_t { - return data->ds.deviceId; - }); std::vector ans; - auto curTimestamp = getCurrentTimestamp64(); - for (auto& pair: deviceId2CommunicationData) { - CommunicationMetric communicationMetric{}; - auto& communicationDatas = pair.second; - communicationMetric.duration = std::accumulate(communicationDatas.begin(), communicationDatas.end(), 0ULL, - [](uint64_t acc, std::shared_ptr communication) { - return acc + communication->end - communication->start; - }); - communicationMetric.deviceId = pair.first; - communicationMetric.timestamp = curTimestamp; + CommunicationMetric communicationMetric{}; + for (const auto& record : copyRecords) { + communicationMetric.commOpName = record->commName ? record->commName : "Unknown"; + communicationMetric.duration = record->end - record->start; + communicationMetric.deviceId = record->ds.deviceId; + communicationMetric.timestamp = record->start; ans.emplace_back(communicationMetric); } return ans; diff --git a/msmonitor/plugin/ipc_monitor/metric/MetricCommunicationProcess.h b/msmonitor/plugin/ipc_monitor/metric/MetricCommunicationProcess.h index 745652f8ef57082c16de2980658253f9a2ad5364..9bf0b11d30613795dd466b58ea38546ebc26d89b 100644 --- a/msmonitor/plugin/ipc_monitor/metric/MetricCommunicationProcess.h +++ b/msmonitor/plugin/ipc_monitor/metric/MetricCommunicationProcess.h @@ -26,7 +26,7 @@ namespace ipc_monitor { namespace metric { struct CommunicationMetric { - std::string kindName; + std::string commOpName; uint64_t duration; uint64_t timestamp; uint32_t deviceId; diff --git a/msmonitor/plugin/ipc_monitor/metric/MetricHcclProcess.cpp b/msmonitor/plugin/ipc_monitor/metric/MetricHcclProcess.cpp index 4ebf7d7b6faea5a457466bc2aab369e089c58a27..14cc27faee318c3fc48115fde97bedc530121817 100644 --- a/msmonitor/plugin/ipc_monitor/metric/MetricHcclProcess.cpp +++ b/msmonitor/plugin/ipc_monitor/metric/MetricHcclProcess.cpp @@ -26,6 +26,7 @@ std::string HcclMetric::seriesToJson() { nlohmann::json jsonMsg; jsonMsg["kind"] = "Hccl"; + jsonMsg["name"] = hcclOpName; jsonMsg["deviceId"] = deviceId; jsonMsg["duration"] = duration; jsonMsg["timestamp"] = timestamp; @@ -58,21 +59,13 @@ std::vector MetricHcclProcess::AggregatedData() if (copyRecords.empty()) { return {}; } - std::unordered_map>> deviceId2HcclData = - groupby(copyRecords, [](const std::shared_ptr& data) -> std::uint32_t { - return data->ds.deviceId; - }); std::vector ans; - auto curTimestamp = getCurrentTimestamp64(); - for (auto& pair: deviceId2HcclData) { - HcclMetric hcclMetric{}; - auto& hcclDatas = pair.second; - hcclMetric.duration = std::accumulate(hcclDatas.begin(), hcclDatas.end(), 0ULL, - [](uint64_t acc, std::shared_ptr hccl) { - return acc + hccl->end - hccl->start; - }); - hcclMetric.deviceId = pair.first; - hcclMetric.timestamp = curTimestamp; + HcclMetric hcclMetric{}; + for (const auto& record : copyRecords) { + hcclMetric.hcclOpName = record->name ? record->name : "Unknown"; + hcclMetric.duration = record->end - record->start; + hcclMetric.deviceId = record->ds.deviceId; + hcclMetric.timestamp = record->start; ans.emplace_back(hcclMetric); } return ans; diff --git a/msmonitor/plugin/ipc_monitor/metric/MetricHcclProcess.h b/msmonitor/plugin/ipc_monitor/metric/MetricHcclProcess.h index 2c846949d35f1dc3b0c5d359e15dc8d2818db6b5..25bc40fb1870593ea72281c35975c07a4744b186 100644 --- a/msmonitor/plugin/ipc_monitor/metric/MetricHcclProcess.h +++ b/msmonitor/plugin/ipc_monitor/metric/MetricHcclProcess.h @@ -26,7 +26,7 @@ namespace ipc_monitor { namespace metric { struct HcclMetric { - std::string kindName; + std::string hcclOpName; uint64_t duration; uint64_t timestamp; uint32_t deviceId; diff --git a/msmonitor/plugin/ipc_monitor/metric/MetricKernelProcess.cpp b/msmonitor/plugin/ipc_monitor/metric/MetricKernelProcess.cpp index 4013d841b4cff860d4934a618b3c64a9b32ee718..f28fa20b9af12ec55180bbf37efa52adc45c4c79 100644 --- a/msmonitor/plugin/ipc_monitor/metric/MetricKernelProcess.cpp +++ b/msmonitor/plugin/ipc_monitor/metric/MetricKernelProcess.cpp @@ -25,6 +25,7 @@ std::string KernelMetric::seriesToJson() { nlohmann::json jsonMsg; jsonMsg["kind"] = "Kernel"; + jsonMsg["name"] = kernelName; jsonMsg["deviceId"] = deviceId; jsonMsg["duration"] = duration; jsonMsg["timestamp"] = timestamp; @@ -57,25 +58,15 @@ std::vector MetricKernelProcess::AggregatedData() if (copyRecords.empty()) { return {}; } - std::unordered_map>> deviceId2KernelData = - groupby(copyRecords, [](const std::shared_ptr& data) -> std::uint32_t { - return data->ds.deviceId; - }); std::vector ans; - auto curTimestamp = getCurrentTimestamp64(); - for (auto& pair: deviceId2KernelData) { - auto deviceId = pair.first; - auto& kernelDatas = pair.second; - KernelMetric kernelMetric{}; - kernelMetric.duration = std::accumulate(kernelDatas.begin(), kernelDatas.end(), 0ULL, - [](uint64_t acc, std::shared_ptr kernel) { - return acc + kernel->end - kernel->start; - }); - kernelMetric.deviceId = deviceId; - kernelMetric.timestamp = curTimestamp; + KernelMetric kernelMetric{}; + for (const auto& record : copyRecords) { + kernelMetric.kernelName = record->name ? record->name : "Unknown"; + kernelMetric.duration = record->end - record->start; + kernelMetric.deviceId = record->ds.deviceId; + kernelMetric.timestamp = record->start; ans.emplace_back(kernelMetric); } - return ans; } diff --git a/msmonitor/plugin/ipc_monitor/metric/MetricKernelProcess.h b/msmonitor/plugin/ipc_monitor/metric/MetricKernelProcess.h index 9bd034283ece0ba3cd5cfc5f5215b104ef37334c..b2804c2afc6d4a249d4e26bbd7f83e836b10ed68 100644 --- a/msmonitor/plugin/ipc_monitor/metric/MetricKernelProcess.h +++ b/msmonitor/plugin/ipc_monitor/metric/MetricKernelProcess.h @@ -25,6 +25,7 @@ namespace ipc_monitor { namespace metric { struct KernelMetric { + std::string kernelName; uint64_t duration; uint64_t timestamp; uint32_t deviceId; diff --git a/msmonitor/plugin/ipc_monitor/metric/MetricMarkProcess.cpp b/msmonitor/plugin/ipc_monitor/metric/MetricMarkProcess.cpp index 2e92ee736b5d3e4cde19e369a73ee7d1a019169b..75dc9ee60e311bddcced5a26b24f8d0465ef116c 100644 --- a/msmonitor/plugin/ipc_monitor/metric/MetricMarkProcess.cpp +++ b/msmonitor/plugin/ipc_monitor/metric/MetricMarkProcess.cpp @@ -34,6 +34,7 @@ std::string MarkMetric::seriesToJson() jsonMsg["kind"] = "Marker"; jsonMsg["deviceId"] = deviceId; jsonMsg["domain"] = domain; + jsonMsg["name"] = name; jsonMsg["duration"] = duration; jsonMsg["timestamp"] = timestamp; return jsonMsg.dump(); @@ -53,6 +54,7 @@ bool MetricMarkProcess::TransMarkData2Range(const std::vectortimestamp; } else { rangemarkData.start = activityMarker->timestamp; + rangemarkData.name = activityMarker->name; } } if (activityMarker->flag == MSPTI_ACTIVITY_FLAG_MARKER_END_WITH_DEVICE) { @@ -116,24 +118,23 @@ std::vector MetricMarkProcess::AggregatedData() rangeDatas.emplace_back(rangeMark); } } - - std::unordered_map> domain2RangeData = - groupby(rangeDatas, [](const RangeMarkData& data) -> std::string { - return data.domain + std::to_string(data.deviceId); - }); std::vector ans; - for (auto& pair: domain2RangeData) { - MarkMetric markMetric{}; - auto domainName = pair.first; - auto rangeDatas = pair.second; - markMetric.deviceId = rangeDatas[0].deviceId; - markMetric.domain = domainName; - markMetric.timestamp = getCurrentTimestamp64(); - markMetric.duration = std::accumulate(rangeDatas.begin(), rangeDatas.end(), 0ULL, - [](uint64_t acc, const RangeMarkData& rangeData) { - return acc + rangeData.deviceEnd - rangeData.deviceStart; - }); - ans.emplace_back(markMetric); + MarkMetric hostMarkMetric{}; + MarkMetric devMarkMetric{}; + for (const auto& data : rangeDatas) { + hostMarkMetric.name = data.name; + hostMarkMetric.domain = data.domain; + hostMarkMetric.deviceId = -1; + hostMarkMetric.duration = data.end - data.start; + hostMarkMetric.timestamp = data.start; + ans.emplace_back(hostMarkMetric); + + devMarkMetric.name = data.name; + devMarkMetric.domain = data.domain; + devMarkMetric.deviceId = data.deviceId; + devMarkMetric.duration = data.deviceEnd - data.deviceStart; + devMarkMetric.timestamp = data.deviceStart; + ans.emplace_back(devMarkMetric); } return ans; } diff --git a/msmonitor/plugin/ipc_monitor/metric/MetricMarkProcess.h b/msmonitor/plugin/ipc_monitor/metric/MetricMarkProcess.h index 3835bda859b9e4f7a530f1165684146a122f4b4e..35c9ddf700e349283e6b1b1880dffe2fed5a4090 100644 --- a/msmonitor/plugin/ipc_monitor/metric/MetricMarkProcess.h +++ b/msmonitor/plugin/ipc_monitor/metric/MetricMarkProcess.h @@ -36,6 +36,7 @@ public: }; struct RangeMarkData { + std::string name; std::string domain; uint64_t duration; uint64_t start{0}; diff --git a/msmonitor/plugin/ipc_monitor/metric/MetricMemCpyProcess.cpp b/msmonitor/plugin/ipc_monitor/metric/MetricMemCpyProcess.cpp index aaa4fd62682a07442b3bd2dcb21462affa5244e8..0eeabf82afa376e291426debacd00c7b11ed3308 100644 --- a/msmonitor/plugin/ipc_monitor/metric/MetricMemCpyProcess.cpp +++ b/msmonitor/plugin/ipc_monitor/metric/MetricMemCpyProcess.cpp @@ -25,6 +25,7 @@ std::string MemCpyMetric::seriesToJson() { nlohmann::json jsonMsg; jsonMsg["kind"] = "MemCpy"; + jsonMsg["name"] = "MemCpy"; jsonMsg["deviceId"] = deviceId; jsonMsg["duration"] = duration; jsonMsg["timestamp"] = timestamp; @@ -57,22 +58,12 @@ std::vector MetricMemCpyProcess::AggregatedData() if (copyRecords.empty()) { return {}; } - std::unordered_map>> deviceId2Memcpy = - groupby(copyRecords, [](const std::shared_ptr& data) -> std::uint32_t { - return data->deviceId; - }); std::vector ans; - auto curTimestamp = getCurrentTimestamp64(); - for (auto& pair: deviceId2Memcpy) { - auto deviceId = pair.first; + for (const auto& record : copyRecords) { MemCpyMetric memCpyMetric{}; - auto& memCpyDatas = pair.second; - memCpyMetric.duration = std::accumulate(memCpyDatas.begin(), memCpyDatas.end(), 0ULL, - [](uint64_t acc, std::shared_ptr memcpy) { - return acc + memcpy->end - memcpy->start; - }); - memCpyMetric.deviceId = deviceId; - memCpyMetric.timestamp = curTimestamp; + memCpyMetric.duration = record->end - record->start; + memCpyMetric.deviceId = record->deviceId; + memCpyMetric.timestamp = record->start; ans.emplace_back(memCpyMetric); } return ans; diff --git a/msmonitor/plugin/ipc_monitor/metric/MetricMemProcess.cpp b/msmonitor/plugin/ipc_monitor/metric/MetricMemProcess.cpp index 3f51476595a4fcfe29e4938c1e64d94a35815913..4a77325842ee072c940fd9a3c696d759672c2d79 100644 --- a/msmonitor/plugin/ipc_monitor/metric/MetricMemProcess.cpp +++ b/msmonitor/plugin/ipc_monitor/metric/MetricMemProcess.cpp @@ -25,6 +25,7 @@ std::string MemMetric::seriesToJson() { nlohmann::json jsonMsg; jsonMsg["kind"] = "Memory"; + jsonMsg["name"] = "Memory"; jsonMsg["deviceId"] = deviceId; jsonMsg["duration"] = duration; jsonMsg["timestamp"] = timestamp; @@ -57,22 +58,13 @@ std::vector MetricMemProcess::AggregatedData() if (copyRecords.empty()) { return {}; } - std::unordered_map>> deviceId2MemData = - groupby(copyRecords, [](const std::shared_ptr& data) -> std::uint32_t { - return data->deviceId; - }); std::vector ans; - auto curTimestamp = getCurrentTimestamp64(); - for (auto& pair: deviceId2MemData) { - auto deviceId = pair.first; - auto& memDatas = pair.second; + for (const auto& record : copyRecords) { MemMetric memMetric{}; - memMetric.duration = std::accumulate(memDatas.begin(), memDatas.end(), 0ULL, - [](uint64_t acc, std::shared_ptr mem) { - return acc + mem->end - mem->start; - }); - memMetric.deviceId = deviceId; - memMetric.timestamp = curTimestamp; + memMetric.name = "Memory"; + memMetric.duration = record->end - record->start; + memMetric.timestamp = record->start; + memMetric.deviceId = record->deviceId; ans.emplace_back(memMetric); } return ans; diff --git a/msmonitor/plugin/ipc_monitor/metric/MetricMemSetProcess.cpp b/msmonitor/plugin/ipc_monitor/metric/MetricMemSetProcess.cpp index f165ae2d7bdf8b82d0028a604c80fe617168c55c..17f85d306b1c2d11032a46e78a67c1657cbf4562 100644 --- a/msmonitor/plugin/ipc_monitor/metric/MetricMemSetProcess.cpp +++ b/msmonitor/plugin/ipc_monitor/metric/MetricMemSetProcess.cpp @@ -25,6 +25,7 @@ std::string MemSetMetric::seriesToJson() { nlohmann::json jsonMsg; jsonMsg["kind"] = "MemSet"; + jsonMsg["name"] = "MemSet"; jsonMsg["deviceId"] = deviceId; jsonMsg["duration"] = duration; jsonMsg["timestamp"] = timestamp; @@ -57,22 +58,13 @@ std::vector MetricMemSetProcess::AggregatedData() if (copyRecords.empty()) { return {}; } - std::unordered_map>> deviceId2MemsetData = - groupby(copyRecords, [](const std::shared_ptr& data) -> std::uint32_t { - return data->deviceId; - }); std::vector ans; - auto curTimestamp = getCurrentTimestamp64(); - for (auto& pair: deviceId2MemsetData) { + for (const auto& record : copyRecords) { MemSetMetric memSetMetric{}; - auto deviceId = pair.first; - auto& memSetDatas = pair.second; - memSetMetric.duration = std::accumulate(memSetDatas.begin(), memSetDatas.end(), 0ULL, - [](uint64_t acc, std::shared_ptr memSet) { - return acc + memSet->end - memSet->start; - }); - memSetMetric.deviceId = deviceId; - memSetMetric.timestamp = curTimestamp; + memSetMetric.name = "MemSet"; + memSetMetric.duration = record->end - record->start; + memSetMetric.timestamp = record->start; + memSetMetric.deviceId = record->deviceId; ans.emplace_back(memSetMetric); } return ans;