From 3c4203747d83347b63470c50f4b654ed597958a5 Mon Sep 17 00:00:00 2001 From: fanglanyue Date: Thu, 13 Feb 2025 17:04:30 +0800 Subject: [PATCH 1/3] add IPCMonitor as dynolog npu plugin --- dynolog_npu/plugin/Readme.md | 17 ++ dynolog_npu/plugin/bindings.cpp | 12 ++ .../plugin/ipc_monitor/DynoLogNpuMonitor.cpp | 37 ++++ .../plugin/ipc_monitor/DynoLogNpuMonitor.h | 28 +++ dynolog_npu/plugin/ipc_monitor/MonitorBase.h | 15 ++ .../plugin/ipc_monitor/NpuIpcClient.cpp | 137 ++++++++++++ dynolog_npu/plugin/ipc_monitor/NpuIpcClient.h | 100 +++++++++ .../plugin/ipc_monitor/NpuIpcEndPoint.h | 201 ++++++++++++++++++ .../ipc_monitor/PyDynamicMonitorProxy.h | 36 ++++ dynolog_npu/plugin/ipc_monitor/singleton.h | 31 +++ dynolog_npu/plugin/ipc_monitor/utils.cpp | 142 +++++++++++++ dynolog_npu/plugin/ipc_monitor/utils.h | 61 ++++++ dynolog_npu/plugin/setup.py | 31 +++ 13 files changed, 848 insertions(+) create mode 100644 dynolog_npu/plugin/Readme.md create mode 100644 dynolog_npu/plugin/bindings.cpp create mode 100644 dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.cpp create mode 100644 dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.h create mode 100644 dynolog_npu/plugin/ipc_monitor/MonitorBase.h create mode 100644 dynolog_npu/plugin/ipc_monitor/NpuIpcClient.cpp create mode 100644 dynolog_npu/plugin/ipc_monitor/NpuIpcClient.h create mode 100644 dynolog_npu/plugin/ipc_monitor/NpuIpcEndPoint.h create mode 100644 dynolog_npu/plugin/ipc_monitor/PyDynamicMonitorProxy.h create mode 100644 dynolog_npu/plugin/ipc_monitor/singleton.h create mode 100644 dynolog_npu/plugin/ipc_monitor/utils.cpp create mode 100644 dynolog_npu/plugin/ipc_monitor/utils.h create mode 100644 dynolog_npu/plugin/setup.py diff --git a/dynolog_npu/plugin/Readme.md b/dynolog_npu/plugin/Readme.md new file mode 100644 index 00000000000..c59bfffad5a --- /dev/null +++ b/dynolog_npu/plugin/Readme.md @@ -0,0 +1,17 @@ + + +# Build and Install npu-dynolog-plugin +``` +# install pybind11 +pip install pybind11 + +# build dynolog_npu_plugin wheel +python3 setup.py bdist_wheel +# install +pip install dist/{dynolog-npu-plugin-xxx.wheel} + +# example +import IPCMonitor +dyno_worker = IPCMonitor.PyDynamicMonitorProxy() +dyno_worker.init_dyno(0) +``` diff --git a/dynolog_npu/plugin/bindings.cpp b/dynolog_npu/plugin/bindings.cpp new file mode 100644 index 00000000000..8663ce3ff06 --- /dev/null +++ b/dynolog_npu/plugin/bindings.cpp @@ -0,0 +1,12 @@ +// bindings.cpp +#include +#include "ipc_monitor/PyDynamicMonitorProxy.h" + +namespace py = pybind11; + +PYBIND11_MODULE(IPCMonitor, m) { + py::class_(m, "PyDynamicMonitorProxy") + .def(py::init<>()) + .def("init_dyno", &torch_npu::profiler::PyDynamicMonitorProxy::InitDyno, py::arg("npuId")) + .def("poll_dyno", &torch_npu::profiler::PyDynamicMonitorProxy::PollDyno); +} \ No newline at end of file diff --git a/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.cpp b/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.cpp new file mode 100644 index 00000000000..2160a40b98d --- /dev/null +++ b/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.cpp @@ -0,0 +1,37 @@ +#pragma once +#include "DynoLogNpuMonitor.h" + +#include + +#include "utils.h" + +namespace torch_npu { +namespace profiler { + +bool DynoLogNpuMonitor::Init() +{ + if (isInitialized_) { + std::cout << "[WRARNING] DynoLog npu monitor already initialized" << std::endl; + return true; + } + bool res = ipcClient_.RegisterInstance(npuId_); + if (res) { + isInitialized_ = true; + std::cout << "[INFO] DynoLog npu monitor initialized success !" << std::endl; + } + return res; +} + +std::string DynoLogNpuMonitor::Poll() +{ + std::string res = ipcClient_.IpcClientNpuConfig(); + if (res.empty()) { + std::cout << "[INFO] Request for dynolog server is empty !" << std::endl; + return ""; + } + std::cout << "[INFO] Received NPU configuration successfully" << std::endl; + return res; +} + +} // namespace profiler +} // namespace torch_npu \ No newline at end of file diff --git a/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.h b/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.h new file mode 100644 index 00000000000..0125cda078d --- /dev/null +++ b/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.h @@ -0,0 +1,28 @@ +#pragma once +#include "MonitorBase.h" +#include "NpuIpcClient.h" +#include "singleton.h" + +namespace torch_npu { +namespace profiler { + +class DynoLogNpuMonitor : public MonitorBase, public torch_npu::toolkit::profiler::Singleton { + friend class torch_npu::toolkit::profiler::Singleton; + +public: + DynoLogNpuMonitor() = default; + bool Init() override; + std::string Poll() override; + void SetNpuId(int id) override + { + npuId_ = id; + } + +private: + bool isInitialized_ = false; + int32_t npuId_ = 0; + IpcClient ipcClient_; +}; + +} // namespace profiler +} // namespace torch_npu diff --git a/dynolog_npu/plugin/ipc_monitor/MonitorBase.h b/dynolog_npu/plugin/ipc_monitor/MonitorBase.h new file mode 100644 index 00000000000..1c7885b3e59 --- /dev/null +++ b/dynolog_npu/plugin/ipc_monitor/MonitorBase.h @@ -0,0 +1,15 @@ +#pragma once +#include + +namespace torch_npu { +namespace profiler { + +class MonitorBase { +public: + virtual bool Init() = 0; + virtual std::string Poll() = 0; + virtual void SetNpuId(int id) = 0; +}; + +} // namespace profiler +} // namespace torch_npu diff --git a/dynolog_npu/plugin/ipc_monitor/NpuIpcClient.cpp b/dynolog_npu/plugin/ipc_monitor/NpuIpcClient.cpp new file mode 100644 index 00000000000..69a7548787e --- /dev/null +++ b/dynolog_npu/plugin/ipc_monitor/NpuIpcClient.cpp @@ -0,0 +1,137 @@ +#include "NpuIpcClient.h" + +#include + +namespace torch_npu { +namespace profiler { + +bool torch_npu::profiler::IpcClient::RegisterInstance(int32_t id) +{ + NpuContext context{ + .npu = id, + .pid = getpid(), + .jobId = JOB_ID, + }; + std::unique_ptr message = Message::ConstructMessage(context, "ctxt"); + try { + if (!SyncSendMessage(*message, std::string(DYNO_IPC_NAME))) { + std::cout << "[WARNING]Failed to send register ctxt for pid " << context.pid << " with dyno" << std::endl; + return false; + } + } catch (const std::exception &e) { + std::cout << "[WARNING] Error when SyncSendMessage: " << e.what() << std::endl; + return false; + } + std::cout << "[INFO] Resigter pid " << context.pid << " for dynolog success !" << std::endl; + return true; +} +std::string IpcClient::IpcClientNpuConfig() +{ + auto size = pids_.size(); + auto *req = (NpuRequest *)malloc(sizeof(NpuRequest) + sizeof(int32_t) * size); + req->type = DYNO_IPC_TYPE; + req->pidSize = size; + req->jobId = JOB_ID; + for (int i = 0; i < size; i++) { + req->pids[i] = pids_[i]; + } + std::unique_ptr message = Message::ConstructMessage(*req, "req", size); + if (!SyncSendMessage(*message, std::string(DYNO_IPC_NAME))) { + std::cout << "[WARNING] Failed to send config to dyno server fail !" << std::endl; + free(req); + req = nullptr; + return ""; + } + free(req); + message = PollRecvMessage(MAX_IPC_RETRIES, MAX_SLEEP_US); + if (!message) { + std::cout << "[WARNING] Failed to receive on-demand config !" << std::endl; + return ""; + } + std::string res = std::string((char *)message->buf.get(), message->metadata.size); + return res; +} +std::unique_ptr IpcClient::ReceiveMessage() +{ + std::lock_guard wguard(dequeLock_); + if (msgDynoDeque_.empty()) { + return nullptr; + } + std::unique_ptr message = std::move(msgDynoDeque_.front()); + msgDynoDeque_.pop_front(); + return message; +} +bool IpcClient::SyncSendMessage(const Message &message, const std::string &destName, int numRetry, int seepTimeUs) +{ + if (destName.empty()) { + std::cout << "[WARNING] Can not send to empty socket name !" << std::endl; + return false; + } + int i = 0; + std::vector npuPayLoad{ NpuPayLoad(sizeof(struct Metadata), (void *)&message.metadata), + NpuPayLoad(message.metadata.size, message.buf.get()) }; + try { + auto ctxt = ep_.BuildSendNpuCtxt(destName, npuPayLoad, std::vector()); + while (!ep_.TrySendMessage(*ctxt) && i < numRetry) { + i++; + usleep(seepTimeUs); + seepTimeUs *= 2; + } + } catch (const std::exception &e) { + std::cout << "[ERROR] Error when SyncSendMessage: " << e.what() << std::endl; + return false; + } + return i < numRetry; +} +bool IpcClient::Recv() +{ + try { + Metadata recvMetadata; + std::vector PeekNpuPayLoad{ NpuPayLoad(sizeof(struct Metadata), &recvMetadata) }; + auto peekCtxt = ep_.BuildNpuRcvCtxt(PeekNpuPayLoad); + bool successFlag = false; + try { + successFlag = ep_.TryPeekMessage(*peekCtxt); + } catch (std::exception &e) { + std::cout << "[ERROR] Error when TryPeekMessage: " << e.what() << std::endl; + return false; + } + if (successFlag) { + std::unique_ptr npuMessage = std::make_unique(Message()); + npuMessage->metadata = recvMetadata; + npuMessage->buf = std::unique_ptr(new unsigned char[recvMetadata.size]); + npuMessage->src = std::string(ep_.GetName(*peekCtxt)); + std::vector npuPayLoad{ NpuPayLoad(sizeof(struct Metadata), (void *)&npuMessage->metadata), + NpuPayLoad(recvMetadata.size, npuMessage->buf.get()) }; + auto recvCtxt = ep_.BuildNpuRcvCtxt(npuPayLoad); + try { + successFlag = ep_.TryRcvMessage(*recvCtxt); + } catch (std::exception &e) { + std::cout << "[ERROR] Error when TryRecvMsg: " << e.what() << std::endl; + return false; + } + if (successFlag) { + std::lock_guard wguard(dequeLock_); + msgDynoDeque_.push_back(std::move(npuMessage)); + return true; + } + } + } catch (std::exception &e) { + std::cout << "[ERROR] Error in Recv(): " << e.what() << std::endl; + return false; + } + return false; +} +std::unique_ptr IpcClient::PollRecvMessage(int maxRetry, int sleeTimeUs) +{ + for (int i = 0; i < maxRetry; i++) { + if (Recv()) { + return ReceiveMessage(); + } + usleep(sleeTimeUs); + } + return nullptr; +} + +} // namespace profiler +} // namespace torch_npu \ No newline at end of file diff --git a/dynolog_npu/plugin/ipc_monitor/NpuIpcClient.h b/dynolog_npu/plugin/ipc_monitor/NpuIpcClient.h new file mode 100644 index 00000000000..b152517b6dd --- /dev/null +++ b/dynolog_npu/plugin/ipc_monitor/NpuIpcClient.h @@ -0,0 +1,100 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include "NpuIpcEndPoint.h" +#include "utils.h" + +namespace torch_npu { +namespace profiler { + +constexpr int TYPE_SIZE = 32; +constexpr int JOB_ID = 0; +constexpr const char *DYNO_IPC_NAME = "dynolog"; +constexpr const int DYNO_IPC_TYPE = 3; +constexpr const int MAX_IPC_RETRIES = 5; +constexpr const int MAX_SLEEP_US = 10000; +struct NpuRequest { + int type; + int pidSize; + int64_t jobId; + int32_t pids[0]; +}; +struct NpuContext { + int32_t npu; + pid_t pid; + int64_t jobId; +}; +struct Metadata { + size_t size = 0; + char type[TYPE_SIZE] = ""; +}; +struct Message { + Metadata metadata; + std::unique_ptr buf; + std::string src; + template static std::unique_ptr ConstructMessage(const T &data, const std::string &type) + { + std::unique_ptr ipcNpuMessage = std::make_unique(Message()); + if (type.size() + 1 > sizeof(ipcNpuMessage->metadata.type)) { + throw std::runtime_error("Type string is too long to fit in metadata.type" + PROF_ERROR(ErrCode::PARAM)); + } + memcpy(ipcNpuMessage->metadata.type, type.c_str(), type.size() + 1); +#if __cplusplus >= 201703L + if constexpr (std::is_same::value == true) { + ipcNpuMessage->metadata.size = data.size(); + ipcNpuMessage->buf = std::make_unique(ipcNpuMessage->metadata.size); + memcpy(ipcNpuMessage->buf.get(), data.c_str(), sizeof(data)); + return ipcNpuMessage; + } +#endif + static_assert(std::is_trivially_copyable::value); + ipcNpuMessage->metadata.size = sizeof(data); + ipcNpuMessage->buf = std::make_unique(ipcNpuMessage->metadata.size); + memcpy(ipcNpuMessage->buf.get(), &data, sizeof(data)); + return ipcNpuMessage; + } + + template + static std::unique_ptr ConstructMessage(const T &data, const std::string &type, int n) + { + std::unique_ptr ipcNpuMessage = std::make_unique(Message()); + if (type.size() + 1 > sizeof(ipcNpuMessage->metadata.type)) { + throw std::runtime_error("Type string is too long to fit in metadata.type" + PROF_ERROR(ErrCode::PARAM)); + } + memcpy(ipcNpuMessage->metadata.type, type.c_str(), type.size() + 1); + static_assert(std::is_trivially_copyable::value); + static_assert(std::is_trivially_copyable::value); + ipcNpuMessage->metadata.size = sizeof(data) + sizeof(U) * n; + ipcNpuMessage->buf = std::make_unique(ipcNpuMessage->metadata.size); + memcpy(ipcNpuMessage->buf.get(), &data, ipcNpuMessage->metadata.size); + return ipcNpuMessage; + } +}; +class IpcClient { +public: + IpcClient(const IpcClient &) = delete; + IpcClient &operator = (const IpcClient &) = delete; + IpcClient() = default; + bool RegisterInstance(int32_t npu); + std::string IpcClientNpuConfig(); + +private: + std::vector pids_ = GetPids(); + NpuIpcEndPoint<0> ep_{ "dynoconfigclient" + GenerateUuidV4() }; + std::mutex dequeLock_; + std::deque> msgDynoDeque_; + std::unique_ptr ReceiveMessage(); + bool SyncSendMessage(const Message &message, const std::string &destName, int numRetry = 10, + int seepTimeUs = 10000); + bool Recv(); + std::unique_ptr PollRecvMessage(int maxRetry, int sleeTimeUs); +}; + +} // namespace profiler +} // namespace torch_npu diff --git a/dynolog_npu/plugin/ipc_monitor/NpuIpcEndPoint.h b/dynolog_npu/plugin/ipc_monitor/NpuIpcEndPoint.h new file mode 100644 index 00000000000..4f3b35ffd1d --- /dev/null +++ b/dynolog_npu/plugin/ipc_monitor/NpuIpcEndPoint.h @@ -0,0 +1,201 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" + +namespace torch_npu { +namespace profiler { + +using fileDesT = int; +constexpr const char STR_END_CHAR = '\0'; +constexpr int SOCKET_FD_CHMOD = 0666; + +struct NpuPayLoad { + size_t size; + void *data; + NpuPayLoad(size_t size, void *data) : size(size), data(data) {} +}; + +template struct NpuIpcEndPointCtxt { + struct sockaddr_un messageName; + size_t messageLen; + fileDesT *fileDesPtr; + struct msghdr msghdr; + std::vector iov; + char ancillaryBuf[CMSG_SPACE(MaxNumFileDes * sizeof(fileDesT))]; + explicit NpuIpcEndPointCtxt(size_t num) : iov(std::vector(num)){}; +}; + +template class NpuIpcEndPoint final { + using Ctxt = NpuIpcEndPointCtxt; + +public: + constexpr static size_t addressMaxLen = 108 - 2; // Max unix socket path length + explicit NpuIpcEndPoint(const std::string &addressName) + { + socketFd = socket(AF_UNIX, SOCK_DGRAM, 0); + if (socketFd == -1) { + throw std::runtime_error(std::strerror(errno) + PROF_ERROR(ErrCode::PARAM)); + } + struct sockaddr_un address; + size_t addressLen = SetSocketAdress(addressName, address); + if (address.sun_path[0] != STR_END_CHAR) { + unlink(address.sun_path); + } + int res = bind(socketFd, (const struct sockaddr *)&address, addressLen); + if (res == -1) { + throw std::runtime_error("Bind socket failed." + PROF_ERROR(ErrCode::PARAM)); + } + if (address.sun_path[0] != STR_END_CHAR) { + chmod(address.sun_path, SOCKET_FD_CHMOD); + } + } + ~NpuIpcEndPoint() + { + close(socketFd); + } + [[nodiscard]] auto BuildSendNpuCtxt(const std::string &desAddrName, const std::vector &npuPayLoad, + const std::vector &fileDes) + { + if (fileDes.size() > MaxNumFileDes) { + throw std::runtime_error("Request to fill more than max connections " + PROF_ERROR(ErrCode::PARAM)); + } + if (desAddrName.empty()) { + throw std::runtime_error("Can not send to dest point, because dest socket name is empty " + + PROF_ERROR(ErrCode::PARAM)); + } + auto ctxt = BuildNpuCtxt_(npuPayLoad, fileDes.size()); + ctxt->msghdr.msg_namelen = SetSocketAdress(desAddrName, ctxt->messageName); + if (!fileDes.empty()) { + if (sizeof(ctxt->fileDesPtr) < fileDes.size() * sizeof(fileDesT)) { + throw std::runtime_error("Memcpy failed when fileDes size large than ctxt fileDesPtr " + + PROF_ERROR(ErrCode::PARAM)); + } + memcpy(ctxt->fileDesPtr, fileDes.data(), fileDes.size() * sizeof(fileDesT)); + } + return ctxt; + } + + [[nodiscard]] bool TrySendMessage(Ctxt const & ctxt, bool retryOnConnRefused = true) + { + ssize_t retCode = sendmsg(socketFd, &ctxt.msghdr, MSG_DONTWAIT); + if (retCode > 0) { + return true; + } + if ((errno == EAGAIN || errno == EWOULDBLOCK) && retCode == -1) { + return false; + } + if (retryOnConnRefused && errno == ECONNREFUSED && retCode == -1) { + return false; + } + throw std::runtime_error("TrySendMessage occur " + std::string(std::strerror(errno)) + " " + + PROF_ERROR(ErrCode::PARAM)); + } + + [[nodiscard]] auto BuildNpuRcvCtxt(const std::vector &npuPayLoad) + { + return BuildNpuCtxt_(npuPayLoad, MaxNumFileDes); + } + + [[nodiscard]] bool TryRcvMessage(Ctxt &ctxt) noexcept + { + auto retCode = recvmsg(socketFd, &ctxt.msghdr, MSG_DONTWAIT); + if (retCode > 0) { + return true; + } + if (retCode == 0) { + return false; + } + if (errno == EWOULDBLOCK || errno == EAGAIN) { + return false; + } + throw std::runtime_error("TryRcvMessage occur " + std::string(std::strerror(errno)) + " " + + PROF_ERROR(ErrCode::PARAM)); + } + + [[nodiscard]] bool TryPeekMessage(Ctxt &ctxt) + { + ssize_t ret = recvmsg(socketFd, &ctxt.msghdr, MSG_DONTWAIT | MSG_PEEK); + if (ret > 0) { + return true; + } + if (ret == 0) { + return false; + } + if (errno == EAGAIN || errno == EWOULDBLOCK) { + return false; + } + throw std::runtime_error("TryPeekMessage occur " + std::string(std::strerror(errno))); + } + + const char *GetName(Ctxt const & ctxt) const noexcept + { + if (ctxt.messageName.sun_path[0] != STR_END_CHAR) { + throw std::runtime_error("GetName() want to got abstract socket, but got " + + std::string(ctxt.messageName.sun_path)); + } + return ctxt.messageName.sun_path + 1; + } + + std::vector GetFileDes(const Ctxt &ctxt) const + { + struct cmsghdr *cmg = CMSG_FIRSTHDR(&ctxt.msghdl); + unsigned numFileDes = (cmg->cmsg_len - sizeof(struct cmsghdr)) / sizeof(fileDesT); + return { ctxt.fileDesPtr, ctxt.fileDesPtr + numFileDes }; + } + +protected: + fileDesT socketFd; + size_t SetSocketAdress(const std::string &srcSocket, struct sockaddr_un &destSocket) + { + if (srcSocket.size() > addressMaxLen) { + throw std::runtime_error("Abstract UNIX Socket path cannot be larger than addressMaxLen"); + } + destSocket.sun_family = AF_UNIX; + destSocket.sun_path[0] = STR_END_CHAR; + if (srcSocket.empty()) { + return sizeof(sa_family_t); + } + srcSocket.copy(destSocket.sun_path + 1, srcSocket.size()); + destSocket.sun_path[srcSocket.size() + 1] = STR_END_CHAR; + return sizeof(sa_family_t) + srcSocket.size() + 2; + } + + auto BuildNpuCtxt_(const std::vector &npuPayLoad, unsigned numFileDes) + { + auto ctxt = std::make_unique(npuPayLoad.size()); + std::memset(&ctxt->msghdr, 0, sizeof(ctxt->msghdr)); + for (auto i = 0; i < npuPayLoad.size(); i++) { + ctxt->iov[i] = {npuPayLoad[i].data, npuPayLoad[i].size}; + } + ctxt->msghdr.msg_name = &ctxt->messageName; + ctxt->msghdr.msg_namelen = sizeof(decltype(ctxt->messageName)); + ctxt->msghdr.msg_iov = ctxt->iov.data(); + ctxt->msghdr.msg_iovlen = npuPayLoad.size(); + ctxt->fileDesPtr = nullptr; + if (numFileDes == 0) { + return ctxt; + } + const size_t fileDesSize = sizeof(fileDesT) * numFileDes; + ctxt->msghdr.msg_control = ctxt->ancillaryBuf; + ctxt->msghdr.msg_controllen = CMSG_SPACE(fileDesSize); + + struct cmsghdr *cmsg = CMSG_FIRSTHDR(&ctxt->msghdr); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(fileDesSize); + ctxt->fileDesPtr = (fileDesT *)CMSG_DATA(cmsg); + return ctxt; + } +}; + +} // namespace profiler +} // namespace torch_npu diff --git a/dynolog_npu/plugin/ipc_monitor/PyDynamicMonitorProxy.h b/dynolog_npu/plugin/ipc_monitor/PyDynamicMonitorProxy.h new file mode 100644 index 00000000000..101fde766ef --- /dev/null +++ b/dynolog_npu/plugin/ipc_monitor/PyDynamicMonitorProxy.h @@ -0,0 +1,36 @@ +#pragma once +#include "MonitorBase.h" +#include "DynoLogNpuMonitor.h" +#include +#include + +namespace torch_npu { +namespace profiler { + +class PyDynamicMonitorProxy { +public: + PyDynamicMonitorProxy() = default; + bool InitDyno(int npuId) + { + try { + monitor_ = DynoLogNpuMonitor::GetInstance(); + monitor_->SetNpuId(npuId); + bool res = monitor_->Init(); + return res; + } catch (const std::exception &e) { + std::cout << "[ERROR] Error when init dyno " << e.what() << std::endl; + return false; + } + } + + std::string PollDyno() + { + return monitor_->Poll(); + }; + +private: + MonitorBase *monitor_ = nullptr; +}; + +} // namespace profiler +} // namespace torch_npu diff --git a/dynolog_npu/plugin/ipc_monitor/singleton.h b/dynolog_npu/plugin/ipc_monitor/singleton.h new file mode 100644 index 00000000000..1f18347aedf --- /dev/null +++ b/dynolog_npu/plugin/ipc_monitor/singleton.h @@ -0,0 +1,31 @@ +#pragma once + +#include + +namespace torch_npu { +namespace toolkit { +namespace profiler { + +template +class Singleton { +public: + static T *GetInstance() noexcept(std::is_nothrow_constructible::value) { + static T instance; + return &instance; + } + + virtual ~Singleton() = default; + +protected: + explicit Singleton() = default; + +private: + explicit Singleton(const Singleton &obj) = delete; + Singleton& operator=(const Singleton &obj) = delete; + explicit Singleton(Singleton &&obj) = delete; + Singleton& operator=(Singleton &&obj) = delete; +}; + +} // profiler +} // toolkit +} // torch_npu diff --git a/dynolog_npu/plugin/ipc_monitor/utils.cpp b/dynolog_npu/plugin/ipc_monitor/utils.cpp new file mode 100644 index 00000000000..8b4efbba550 --- /dev/null +++ b/dynolog_npu/plugin/ipc_monitor/utils.cpp @@ -0,0 +1,142 @@ +#include "utils.h" + +namespace torch_npu { +namespace profiler { +std::unordered_map submoduleMap = { + {SubModule::PTA, "PTA"}, + {SubModule::OPS, "OPS"}, + {SubModule::DIST, "DIST"}, + {SubModule::GRAPH, "GRAPH"}, + {SubModule::PROF, "PROF"} +}; + +std::unordered_map errCodeMap = { + {ErrCode::SUC, "success"}, + {ErrCode::PARAM, "invalid parameter"}, + {ErrCode::TYPE, "invalid type"}, + {ErrCode::VALUE, "invalid value"}, + {ErrCode::PTR, "invalid pointer"}, + {ErrCode::INTERNAL, "internal error"}, + {ErrCode::MEMORY, "memory error"}, + {ErrCode::NOT_SUPPORT, "feature not supported"}, + {ErrCode::NOT_FOUND, "resource not found"}, + {ErrCode::UNAVAIL, "resource unavailable"}, + {ErrCode::SYSCALL, "system call failed"}, + {ErrCode::TIMEOUT, "timeout error"}, + {ErrCode::PERMISSION, "permission error"}, + {ErrCode::ACL, "call acl api failed"}, + {ErrCode::HCCL, "call hccl api failed"}, + {ErrCode::GE, "call ge api failed"} +}; + +static std::string getCurrentTimestamp() +{ + auto now = std::chrono::system_clock::now(); + auto micros = std::chrono::duration_cast(now.time_since_epoch()); + + std::time_t currentTime = std::chrono::system_clock::to_time_t(now); + std::tm* timeInfo = std::localtime(¤tTime); + + auto milli_time = std::chrono::duration_cast(micros).count() % 1000; + auto micro_time = micros.count() % 1000; + + std::ostringstream oss; + oss << std::put_time(timeInfo, "%Y-%m-%d-%H:%M:%S"); + return oss.str(); +} + +std::string formatErrorCode(SubModule submodule, ErrCode errorCode) +{ + std::ostringstream oss; + oss << "\n[ERROR] " << getCurrentTimestamp() << " (PID:" << getpid() << ")"; + oss << "ERR" << std::setw(2) << std::setfill('0') << static_cast(submodule); + oss << std::setw(3) << std::setfill('0') << static_cast(errorCode); + oss << " " << submoduleMap[submodule] << " " << errCodeMap[errorCode]; + + return oss.str(); +}; + + +int32_t GetProcessId() +{ + return static_cast(getpid()); +} + +std::pair GetParentPidAndCommand(int32_t pid) +{ + std::string fileName = "/proc/" + std::to_string(pid) + "/stat"; + std::ifstream statFile(fileName); + if (!statFile) { + return std::make_pair(0, ""); + } + int32_t parentPid = 0; + std::string command; + std::string line; + if (std::getline(statFile, line)) { + int ret = sscanf(line.c_str(), "%*d (%[^)]) %*c %d", command.data(), &parentPid); + if (ret == 2) { +// ASCEND_LOGI("Success to get parent pid %d", parentPid); + return std::make_pair(parentPid, command); + } + } +// ASCEND_LOGW("Failed to parse /proc/%d/stat", pid); + return std::make_pair(0, ""); +} + +std::vector> GetPidCommandPairsofAncestors() +{ + std::vector> process_pids_and_cmds; + process_pids_and_cmds.reserve(MaxParentPids + 1); + int32_t current_pid = GetProcessId(); + for (int i = 0; i <= MaxParentPids && (i == 0 || current_pid > 1); i++) { + std::pair parent_pid_and_cmd = GetParentPidAndCommand(current_pid); + process_pids_and_cmds.push_back(std::make_pair(current_pid, parent_pid_and_cmd.second)); + current_pid = parent_pid_and_cmd.first; + } + return process_pids_and_cmds; +} + +std::vector GetPids() +{ + const auto &pids = GetPidCommandPairsofAncestors(); + std::vector res; + res.reserve(pids.size()); + for (const auto &pidPair : pids) { + res.push_back(pidPair.first); + } + return res; +} +std::string GenerateUuidV4() +{ + static std::random_device randomDevice; + static std::mt19937 gen(randomDevice()); + static std::uniform_int_distribution<> dis(0, 15); + static std::uniform_int_distribution<> dis2(8, 11); + + std::stringstream stringStream; + stringStream << std::hex; + for (int i = 0; i < 8; i++) { + stringStream << dis(gen); + } + stringStream << "-"; + for (int j = 0; j < 4; j++) { + stringStream << dis(gen); + } + stringStream << "-4"; + for (int k = 0; k < 3; k++) { + stringStream << dis(gen); + } + stringStream << "-"; + stringStream << dis2(gen); + for (int m = 0; m < 3; m++) { + stringStream << dis(gen); + } + stringStream << "-"; + for (int n = 0; n < 12; n++) { + stringStream << dis(gen); + } + return stringStream.str(); +} + +} // namespace profiler +} // namespace torch_npu diff --git a/dynolog_npu/plugin/ipc_monitor/utils.h b/dynolog_npu/plugin/ipc_monitor/utils.h new file mode 100644 index 00000000000..678f04cbdb4 --- /dev/null +++ b/dynolog_npu/plugin/ipc_monitor/utils.h @@ -0,0 +1,61 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace torch_npu { +namespace profiler { + +constexpr int MaxParentPids = 5; +int32_t GetProcessId(); +std::string GenerateUuidV4(); +std::vector GetPids(); +std::pair GetParentPidAndCommand(int32_t pid); +std::vector> GetPidCommandPairsofAncestors(); +static std::string getCurrentTimestamp(); + +enum class SubModule { + PTA = 0, + OPS = 1, + DIST = 2, + GRAPH = 3, + PROF = 4 +}; + +enum class ErrCode { + SUC = 0, + PARAM = 1, + TYPE = 2, + VALUE = 3, + PTR = 4, + INTERNAL = 5, + MEMORY = 6, + NOT_SUPPORT = 7, + NOT_FOUND = 8, + UNAVAIL = 9, + SYSCALL = 10, + TIMEOUT = 11, + PERMISSION = 12, + ACL = 100, + HCCL = 200, + GE = 300 +}; + + +std::string formatErrorCode(SubModule submodule, ErrCode errorCode); + +#define PROF_ERROR(error) formatErrorCode(SubModule::PROF, error) + + +} // namespace profiler +} // namespace torch_npu + diff --git a/dynolog_npu/plugin/setup.py b/dynolog_npu/plugin/setup.py new file mode 100644 index 00000000000..9216e01f555 --- /dev/null +++ b/dynolog_npu/plugin/setup.py @@ -0,0 +1,31 @@ +# setup.py +import os +from setuptools import setup, Extension +import pybind11 + +BASE_DIR = os.path.dirname(os.path.realpath(__file__)) + + +# Define the extension module +ext_modules = [ + Extension( + "IPCMonitor", # Name of the Python module + sources=["bindings.cpp", + "ipc_monitor/utils.cpp", + "ipc_monitor/DynoLogNpuMonitor.cpp", + "ipc_monitor/NpuIpcClient.cpp", + ], # Source files + include_dirs=[pybind11.get_include(), + os.path.join(BASE_DIR, "ipc_monitor")], # Include Pybind11 headers + language="c++", # Specify the language + ), +] + +# Set up the package +setup( + name="dynolog_npu_plugin", + version="0.1", + description="dynolog npu plugins", + ext_modules=ext_modules, + install_requires=["pybind11"], +) \ No newline at end of file -- Gitee From dc83dc5cfeb2f89a549b52b05a6884ccaf75d249 Mon Sep 17 00:00:00 2001 From: Gallium Date: Tue, 18 Feb 2025 21:40:28 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E5=A4=84=E7=90=86mspti=5Fmonitor=E6=8B=89?= =?UTF-8?q?=E8=B5=B7=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../plugin/ipc_monitor/DynoLogNpuMonitor.cpp | 30 ++++++++++++++ .../plugin/ipc_monitor/DynoLogNpuMonitor.h | 1 + .../plugin/ipc_monitor/InputParser.cpp | 17 ++++++++ dynolog_npu/plugin/ipc_monitor/InputParser.h | 19 +++++++++ dynolog_npu/plugin/ipc_monitor/log.cpp | 10 +++++ dynolog_npu/plugin/ipc_monitor/log.h | 23 +++++++++++ dynolog_npu/plugin/ipc_monitor/utils.cpp | 39 +++++++++++++++++++ dynolog_npu/plugin/ipc_monitor/utils.h | 3 ++ 8 files changed, 142 insertions(+) create mode 100644 dynolog_npu/plugin/ipc_monitor/InputParser.cpp create mode 100644 dynolog_npu/plugin/ipc_monitor/InputParser.h create mode 100644 dynolog_npu/plugin/ipc_monitor/log.cpp create mode 100644 dynolog_npu/plugin/ipc_monitor/log.h diff --git a/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.cpp b/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.cpp index 2160a40b98d..f7b813e85c2 100644 --- a/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.cpp +++ b/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.cpp @@ -22,9 +22,39 @@ bool DynoLogNpuMonitor::Init() return res; } +std::bool isMonitor(const std::unordered_map& res) +{ + return true; +} + +bool DynoLogNpuMonitor::DealMonitorReq(const std::unordered_map& cmd) +{ + std::string enableActivity = cmd["MSPTI_ACTIVITY_KIND"]; + uint32_t reportTimes = 0; + Str2Uint32(reportTimes, cmd["REPORT_INTERVAL_S"]); + bool startSwitch = false; + Str2Bool(startSwitch, cmd["NPU_MONITOR_START"]); + bool endSwitch = false; + Str2Bool(endSwitch, cmd["NPU_MONITOR_START"]); + if (startSwitch) { + PRINT_INFO("Start Mspti Monitor thread to collect, reportTimes: %s, enableActivity: %s", reportTimes, enableActivity); + } + + if (endSwitch) { + PRINF_INFO("End Mpsit Monitor thread"); + } + return true; +} + std::string DynoLogNpuMonitor::Poll() { std::string res = ipcClient_.IpcClientNpuConfig(); + auto cmd = InputParser.GetInstance()->DynoLogGetOpts(res); + // 是个monitor的指令 + if (isMonitor(cmd) && msptiMonitorThread_) { + DealMonitorReq(cmd); + return + } if (res.empty()) { std::cout << "[INFO] Request for dynolog server is empty !" << std::endl; return ""; diff --git a/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.h b/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.h index 0125cda078d..3737ee88b94 100644 --- a/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.h +++ b/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.h @@ -13,6 +13,7 @@ public: DynoLogNpuMonitor() = default; bool Init() override; std::string Poll() override; + bool DealMonitorReq(const std::unordered_map& cmd); void SetNpuId(int id) override { npuId_ = id; diff --git a/dynolog_npu/plugin/ipc_monitor/InputParser.cpp b/dynolog_npu/plugin/ipc_monitor/InputParser.cpp new file mode 100644 index 00000000000..4be7228e5e4 --- /dev/null +++ b/dynolog_npu/plugin/ipc_monitor/InputParser.cpp @@ -0,0 +1,17 @@ +#include "log.h" +#include + +std::unordered_map InputParser::DynoLogGetOpts(const std::string& msg) +{ + std::unordered_map res; + pairs = msg.split("\n"); + for (auto pair : pairs) { + cfgSplit = pair.split("="); + if (cfgSplit.size() == 2) { + res[cfgSplit[0]] = cfgSplit[1]; + } else { + PRINT_INFO("Msg from dynolog is invalid"); + } + } + return res; +} \ No newline at end of file diff --git a/dynolog_npu/plugin/ipc_monitor/InputParser.h b/dynolog_npu/plugin/ipc_monitor/InputParser.h new file mode 100644 index 00000000000..0cbb2c4fcfe --- /dev/null +++ b/dynolog_npu/plugin/ipc_monitor/InputParser.h @@ -0,0 +1,19 @@ +#pragma once +#include +#include +#include +#include + +namespace torch_npu { +namespace profiler { + +class InputParser: public torch_npu::toolkit::profiler::Singleton { +public: + InputParser(); + virtual InputParser(); + + std::unordered_map DynoLogGetOpts(const std::string& msg); +} + +} // namespace profiler +} // namespace torch_npu diff --git a/dynolog_npu/plugin/ipc_monitor/log.cpp b/dynolog_npu/plugin/ipc_monitor/log.cpp new file mode 100644 index 00000000000..f602c556a28 --- /dev/null +++ b/dynolog_npu/plugin/ipc_monitor/log.cpp @@ -0,0 +1,10 @@ +#include "log.h" + +void Log::PrintMsg(const std::string& msg, const std::string& level) const +{ + std::ostringstream oss; + oss << "\n" << level << getCurrentTimestamp() << " (PID:" << getpid() << ")"; + oss << msg << "\n" + + std::cout << oss.str(); +} diff --git a/dynolog_npu/plugin/ipc_monitor/log.h b/dynolog_npu/plugin/ipc_monitor/log.h new file mode 100644 index 00000000000..d673b50b78e --- /dev/null +++ b/dynolog_npu/plugin/ipc_monitor/log.h @@ -0,0 +1,23 @@ +#include "singleton.h" + +#define PRINT_INFO(format, ...) + do { + Log::GetInstance()->PrintMsg(FORMAT(format, ##__VA_ARGS__), "[INFO]") + } while(0) + +#define PRINT_ERROR(format, ...) + do { + Log::GetInstance()->PrintMsg(FORMAT(format, ##__VA_ARGS__), "[ERROR]") + } while(0) + +#define PRINT_ERROR(format, ...) + do { + Log::GetInstance()->PrintMsg(FORMAT(format, ##__VA_ARGS__), "[WARNING]") + } while(0) + +class Log : public torch_npu::toolkit::profiler::Singleton +{ +public: + void PrintMsg(const std::string& msg, const std::string& level) const; +}; + diff --git a/dynolog_npu/plugin/ipc_monitor/utils.cpp b/dynolog_npu/plugin/ipc_monitor/utils.cpp index 8b4efbba550..48e9cfda411 100644 --- a/dynolog_npu/plugin/ipc_monitor/utils.cpp +++ b/dynolog_npu/plugin/ipc_monitor/utils.cpp @@ -1,4 +1,6 @@ #include "utils.h" +#include +#include namespace torch_npu { namespace profiler { @@ -138,5 +140,42 @@ std::string GenerateUuidV4() return stringStream.str(); } +bool std::uint32_t Str2Uint32(uint32_t& dest, const std::string& str) +{ + if (numStr.empty()) { + PRINT_INFO("Str to uint32 fail, input string is null"); + return false; + } + size_t pos = 0; + try { + dest = static_cast(std::stoul(str, &ops)); + } catch(...) { + PRINT_INFO("Str to uint32 fail, input string is %s", numStr.c_str()); + return false; + } + if (pos != str.size()) { + PRINT_INFO("Str to uint32 fail, input string is %s", numStr.c_str()); + return false; + } + return true; +} + +bool Str2Bool(bool& dest, const std::string& str) { + std::string lower_str = str; + std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), ::tolower); + + if (lower_str == "true" || lower_str == "1") { + dest = true; + return true; + } + + if (lower_str == "false" || lower_str == "0") { + dest = false; + return true; + } + PRINT_ERROR("Invalid boolean string: %s", std.c_str()); + return false; +} + } // namespace profiler } // namespace torch_npu diff --git a/dynolog_npu/plugin/ipc_monitor/utils.h b/dynolog_npu/plugin/ipc_monitor/utils.h index 678f04cbdb4..e2dee69aa2d 100644 --- a/dynolog_npu/plugin/ipc_monitor/utils.h +++ b/dynolog_npu/plugin/ipc_monitor/utils.h @@ -23,6 +23,9 @@ std::pair GetParentPidAndCommand(int32_t pid); std::vector> GetPidCommandPairsofAncestors(); static std::string getCurrentTimestamp(); +static std::uint32_t Str2Uint32(uint32_t& dest, const std::string& str); +static std::uint32_t Str2Bool(bool& dest, const std::string& str); + enum class SubModule { PTA = 0, OPS = 1, -- Gitee From 323657fb98d49853a65b08090da4836367154f9c Mon Sep 17 00:00:00 2001 From: Gallium Date: Tue, 18 Feb 2025 23:03:39 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E5=A4=84=E7=90=86mspti=5Fmonitor=E6=8B=89?= =?UTF-8?q?=E8=B5=B7=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 处理mspti_monitor拉起逻辑 处理mspti_monitor拉起逻辑 dynolog_npu --- .../plugin/ipc_monitor/DynoLogNpuMonitor.cpp | 14 +++++----- .../plugin/ipc_monitor/DynoLogNpuMonitor.h | 2 +- dynolog_npu/plugin/ipc_monitor/InputParser.h | 7 ++--- dynolog_npu/plugin/ipc_monitor/log.h | 26 +++++++++---------- dynolog_npu/plugin/ipc_monitor/utils.cpp | 9 ++++--- dynolog_npu/plugin/ipc_monitor/utils.h | 7 +++-- 6 files changed, 33 insertions(+), 32 deletions(-) diff --git a/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.cpp b/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.cpp index 80b488d4911..e32e332eb36 100644 --- a/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.cpp +++ b/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.cpp @@ -3,6 +3,8 @@ #include #include "utils.h" +#include "InputParser.h" +#include "log.h" namespace dynolog_npu { namespace ipc_monitor { @@ -21,12 +23,12 @@ bool DynoLogNpuMonitor::Init() return res; } -std::bool isMonitor(const std::unordered_map& res) +bool isMonitor(const std::unordered_map& res) { return true; } -bool DynoLogNpuMonitor::DealMonitorReq(const std::unordered_map& cmd) +bool DynoLogNpuMonitor::DealMonitorReq(std::unordered_map& cmd) { std::string enableActivity = cmd["MSPTI_ACTIVITY_KIND"]; uint32_t reportTimes = 0; @@ -40,7 +42,7 @@ bool DynoLogNpuMonitor::DealMonitorReq(const std::unordered_mapDynoLogGetOpts(res); + auto cmd = dynolog_npu::profiler::InputParser::GetInstance()->DynoLogGetOpts(res); // 是个monitor的指令 - if (isMonitor(cmd) && msptiMonitorThread_) { + if (isMonitor(cmd)) { DealMonitorReq(cmd); - return + return ""; } if (res.empty()) { std::cout << "[INFO] Request for dynolog server is empty !" << std::endl; diff --git a/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.h b/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.h index 0e98091e9eb..3a674b7a46c 100644 --- a/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.h +++ b/dynolog_npu/plugin/ipc_monitor/DynoLogNpuMonitor.h @@ -15,7 +15,7 @@ public: DynoLogNpuMonitor() = default; bool Init() override; std::string Poll() override; - bool DealMonitorReq(const std::unordered_map& cmd); + bool DealMonitorReq(std::unordered_map& cmd); void SetNpuId(int id) override { npuId_ = id; diff --git a/dynolog_npu/plugin/ipc_monitor/InputParser.h b/dynolog_npu/plugin/ipc_monitor/InputParser.h index 36b26495719..4f3f8d94e4d 100644 --- a/dynolog_npu/plugin/ipc_monitor/InputParser.h +++ b/dynolog_npu/plugin/ipc_monitor/InputParser.h @@ -7,13 +7,10 @@ namespace dynolog_npu { namespace profiler { -class InputParser: public dynolog_npu::toolkit::profiler::Singleton { +class InputParser: public dynolog_npu::ipc_monitor::Singleton { public: - InputParser(); - virtual InputParser(); - std::unordered_map DynoLogGetOpts(const std::string& msg); -} +}; } // namespace profiler } // namespace torch_npu diff --git a/dynolog_npu/plugin/ipc_monitor/log.h b/dynolog_npu/plugin/ipc_monitor/log.h index d673b50b78e..287b5975ee5 100644 --- a/dynolog_npu/plugin/ipc_monitor/log.h +++ b/dynolog_npu/plugin/ipc_monitor/log.h @@ -1,21 +1,21 @@ #include "singleton.h" -#define PRINT_INFO(format, ...) - do { - Log::GetInstance()->PrintMsg(FORMAT(format, ##__VA_ARGS__), "[INFO]") - } while(0) +#define PRINT_INFO(format, ...) \ + do { \ + Log::GetInstance()->PrintMsg(format, "[INFO]"); \ + } while(0) \ -#define PRINT_ERROR(format, ...) - do { - Log::GetInstance()->PrintMsg(FORMAT(format, ##__VA_ARGS__), "[ERROR]") - } while(0) +#define PRINT_WARNING(format, ...) \ + do { \ + Log::GetInstance()->PrintMsg(format, "[WARNING]"); \ + } while(0) \ -#define PRINT_ERROR(format, ...) - do { - Log::GetInstance()->PrintMsg(FORMAT(format, ##__VA_ARGS__), "[WARNING]") - } while(0) +#define PRINT_ERROR(format, ...) \ + do { \ + Log::GetInstance()->PrintMsg(format, "[ERROR]"); \ + } while(0) \ -class Log : public torch_npu::toolkit::profiler::Singleton +class Log : public dynolog_npu::ipc_monitor::Singleton { public: void PrintMsg(const std::string& msg, const std::string& level) const; diff --git a/dynolog_npu/plugin/ipc_monitor/utils.cpp b/dynolog_npu/plugin/ipc_monitor/utils.cpp index c916856f665..93fba59eaaf 100644 --- a/dynolog_npu/plugin/ipc_monitor/utils.cpp +++ b/dynolog_npu/plugin/ipc_monitor/utils.cpp @@ -1,4 +1,7 @@ #include "utils.h" +#include +#include +#include "log.h" namespace dynolog_npu { namespace ipc_monitor { @@ -131,15 +134,15 @@ std::string GenerateUuidV4() return stringStream.str(); } -bool std::uint32_t Str2Uint32(uint32_t& dest, const std::string& str) +bool Str2Uint32(uint32_t& dest, const std::string& str) { - if (numStr.empty()) { + if (str.empty()) { PRINT_INFO("Str to uint32 fail, input string is null"); return false; } size_t pos = 0; try { - dest = static_cast(std::stoul(str, &ops)); + dest = static_cast(std::stoul(str, &pos)); } catch(...) { PRINT_INFO("Str to uint32 fail, input string is %s", numStr.c_str()); return false; diff --git a/dynolog_npu/plugin/ipc_monitor/utils.h b/dynolog_npu/plugin/ipc_monitor/utils.h index bf15a44f3b7..7c292240c27 100644 --- a/dynolog_npu/plugin/ipc_monitor/utils.h +++ b/dynolog_npu/plugin/ipc_monitor/utils.h @@ -23,10 +23,9 @@ std::string GenerateUuidV4(); std::vector GetPids(); std::pair GetParentPidAndCommand(int32_t pid); std::vector> GetPidCommandPairsofAncestors(); -static std::string getCurrentTimestamp(); - -static std::uint32_t Str2Uint32(uint32_t& dest, const std::string& str); -static std::uint32_t Str2Bool(bool& dest, const std::string& str); +std::string getCurrentTimestamp(); +bool Str2Uint32(uint32_t& dest, const std::string& str); +bool Str2Bool(bool& dest, const std::string& str); enum class SubModule { IPC = 0 -- Gitee