diff --git a/msmonitor/dynolog_npu/CMakeLists.txt b/msmonitor/dynolog_npu/CMakeLists.txt index d0da4c68f37a0b22bad7c873e8c17c66c0c03d5b..c0c43e8272f9879abaf4109a351ab0b095b967ea 100644 --- a/msmonitor/dynolog_npu/CMakeLists.txt +++ b/msmonitor/dynolog_npu/CMakeLists.txt @@ -78,9 +78,11 @@ target_link_libraries(dynolog_lib PUBLIC pfs) add_subdirectory(third_party/fmt) target_link_libraries(dynolog_lib PUBLIC fmt::fmt) -add_subdirectory(third_party/tensorboard_logger) -target_include_directories(dynolog_lib PUBLIC third_party/tensorboard_logger/include) -target_link_libraries(dynolog_lib PUBLIC tensorboard_logger) +if(USE_TENSORBOARD) + add_subdirectory(third_party/tensorboard_logger) + target_include_directories(dynolog_lib PUBLIC third_party/tensorboard_logger/include) + target_link_libraries(dynolog_lib PUBLIC tensorboard_logger) +endif() if(USE_ODS_GRAPH_API) add_subdirectory(third_party/cpr) diff --git a/msmonitor/dynolog_npu/dynolog/src/CMakeLists.txt b/msmonitor/dynolog_npu/dynolog/src/CMakeLists.txt index dfa337ec532df3eeca520e2754b9deb1fa7dea88..7d05a447091e90645c9a7fa5830a42337025a637 100644 --- a/msmonitor/dynolog_npu/dynolog/src/CMakeLists.txt +++ b/msmonitor/dynolog_npu/dynolog/src/CMakeLists.txt @@ -7,6 +7,7 @@ add_definitions(-DDYNOLOG_VERSION=${DYNOLOG_VERSION} -DDYNOLOG_GIT_REV=${DYNOLOG message("Use Prometheus = ${USE_PROMETHEUS}") message("Use ODS Graph API = ${USE_ODS_GRAPH_API}") +message("Use Tensorboard = ${USE_TENSORBOARD}") # our build script will first create a src/ dir where all source code will exist file (GLOB dynolog_src "*.h" "*.cpp") diff --git a/msmonitor/dynolog_npu/dynolog/src/DynologTensorBoardLogger.cpp b/msmonitor/dynolog_npu/dynolog/src/DynologTensorBoardLogger.cpp index 9b15a2a8a12f46aba85807d8a6ed740a52403d9f..936461b494e6ec3f6f029328c11436ab3ae36131 100644 --- a/msmonitor/dynolog_npu/dynolog/src/DynologTensorBoardLogger.cpp +++ b/msmonitor/dynolog_npu/dynolog/src/DynologTensorBoardLogger.cpp @@ -11,6 +11,7 @@ #include #include +#ifdef USE_TENSORBOARD DEFINE_string(metric_log_dir, "", "The Path to store tensorboard logs"); namespace dynolog { @@ -136,4 +137,5 @@ void TensorBoardLoggerImpl::log(const std::string &key, double val, uint64_t ste } logger_->add_scalar(key, step, val); } -} \ No newline at end of file +} +#endif \ No newline at end of file diff --git a/msmonitor/dynolog_npu/dynolog/src/DynologTensorBoardLogger.h b/msmonitor/dynolog_npu/dynolog/src/DynologTensorBoardLogger.h index 4b174dbaf98b2e75eed5555ac3fa62500134a471..586215065aeead409daaf2d4845050757599c759 100644 --- a/msmonitor/dynolog_npu/dynolog/src/DynologTensorBoardLogger.h +++ b/msmonitor/dynolog_npu/dynolog/src/DynologTensorBoardLogger.h @@ -9,6 +9,8 @@ #include "MsMonitorMetrics.h" +#ifdef USE_TENSORBOARD + #include "tensorboard_logger.h" DECLARE_string(metric_log_dir); @@ -94,4 +96,5 @@ private: std::string hostName_; }; -} // namespace dynolog \ No newline at end of file +} // namespace dynolog +#endif \ No newline at end of file diff --git a/msmonitor/dynolog_npu/dynolog/src/Main.cpp b/msmonitor/dynolog_npu/dynolog/src/Main.cpp index 693b729bbd20e3aa23436ac6fbffdf6f85c88726..c6d9645c0faf7f899208b3a06bac86cdf568380d 100644 --- a/msmonitor/dynolog_npu/dynolog/src/Main.cpp +++ b/msmonitor/dynolog_npu/dynolog/src/Main.cpp @@ -15,7 +15,7 @@ #include "dynolog/src/KernelCollector.h" #include "dynolog/src/Logger.h" #include "dynolog/src/ODSJsonLogger.h" -#include "dynolog/src/DynologTensorBoardLogger.h" + #include "dynolog/src/PerfMonitor.h" #include "dynolog/src/ScubaLogger.h" #include "dynolog/src/ServiceHandler.h" @@ -29,6 +29,10 @@ #include "dynolog/src/PrometheusLogger.h" #endif +#ifdef USE_TENSORBOARD +#include "dynolog/src/DynologTensorBoardLogger.h" +#endif + using namespace dynolog; using json = nlohmann::json; namespace hbt = facebook::hbt; @@ -69,6 +73,11 @@ std::unique_ptr getLogger(const std::string& scribe_category = "") { if (FLAGS_use_prometheus) { loggers.push_back(std::make_unique()); } +#endif +#ifdef USE_TENSORBOARD + if (!FLAGS_metric_log_dir.empty()) { + loggers.push_back(std::make_unique(FLAGS_metric_log_dir)); + } #endif if (FLAGS_use_fbrelay) { loggers.push_back(std::make_unique()); @@ -82,9 +91,6 @@ std::unique_ptr getLogger(const std::string& scribe_category = "") { if (FLAGS_use_scuba && !scribe_category.empty()) { loggers.push_back(std::make_unique(scribe_category)); } - if (!FLAGS_metric_log_dir.empty()) { - loggers.push_back(std::make_unique(FLAGS_metric_log_dir)); - } return std::make_unique(std::move(loggers)); } diff --git a/msmonitor/plugin/bindings.cpp b/msmonitor/plugin/bindings.cpp index b08f7e3e3df0c9fb0d2905cd7463480bf1b17b7d..79f75ac3e7241e01ff4acb7de152f47f5ae898d3 100644 --- a/msmonitor/plugin/bindings.cpp +++ b/msmonitor/plugin/bindings.cpp @@ -4,27 +4,11 @@ namespace py = pybind11; -void init_IPCMonitor(PyObject *module) { - py::class_(module, "PyDynamicMonitorProxy") +PYBIND11_MODULE(IPCMonitor, m) { + py::class_(m, "PyDynamicMonitorProxy") .def(py::init<>()) .def("init_dyno", &dynolog_npu::ipc_monitor::PyDynamicMonitorProxy::InitDyno, py::arg("npuId")) .def("poll_dyno", &dynolog_npu::ipc_monitor::PyDynamicMonitorProxy::PollDyno) .def("enable_dyno_npu_monitor", &dynolog_npu::ipc_monitor::PyDynamicMonitorProxy::EnableMsptiMonitor, py::arg("cfg_map")) .def("finalize_dyno", &dynolog_npu::ipc_monitor::PyDynamicMonitorProxy::FinalizeDyno); -} - -static PyMethodDef g_moduleMethods[] = {}; - -static struct PyModuleDef ipcMonitor_module = { - PyModuleDef_HEAD_INIT, - "IPCMonitor", - nullptr, - -1, - g_moduleMethods -}; - -PyMODINIT_FUNC PyInit_IPCMonitor(void) { - PyObject* m = PyModule_Create(&ipcMonitor_module); - init_IPCMonitor(m); - return m; } \ No newline at end of file diff --git a/msmonitor/scripts/build.sh b/msmonitor/scripts/build.sh index f2203d1734537b55bab7c61f1240424afdd550f4..042fec9b3a23a9acd7708a325d476646713c9e09 100644 --- a/msmonitor/scripts/build.sh +++ b/msmonitor/scripts/build.sh @@ -1,6 +1,8 @@ #!/bin/bash set -e -export BUILD_PROMETHEUS=1 +export BUILD_PROMETHEUS=0 +export BUILD_TENSORBOARD=1 +export USE_TENSORBOARD="OFF" check_gcc_version() { if ! command -v gcc >/dev/null 2>&1; then @@ -52,7 +54,10 @@ update_and_checkout_submodule() { cd ./third_party/dynolog git checkout ${DYNLOG_COMMIT_ID} - git submodule add https://github.com/RustingSword/tensorboard_logger.git ./third_party/tensorboard_logger + if [ ${BUILD_TENSORBOARD} -ne 0]; then + git submodule add https://github.com/RustingSword/tensorboard_logger.git ./third_party/tensorboard_logger + USE_TENSORBOARD="ON" + fi git submodule update --init --recursive git commit -am "Add tensorboard_logger as submodule"