diff --git a/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/FileParser.cpp b/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/FileParser.cpp new file mode 100644 index 0000000000000000000000000000000000000000..77b348f649a7196e33373c2a39264ac83a87db2e --- /dev/null +++ b/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/FileParser.cpp @@ -0,0 +1,92 @@ +/* +* Copyright (c), Huawei Technologies Co., Ltd. 2024-2024.All rights reserved. + */ +#include "FileParser.h" +#include "Logger.h" +#include "defs/HistoConceptDefs.h" + +using namespace Insight::Histogram::Parser; +using namespace Insight::Histogram; +using namespace Insight; + +bool FileParser::CheckFilePathVaild(const std::string &filePath) { + if (filePath.empty()) { + LOG(LogRank::Error) << "File path is empty"; + return false; + } + if (!fs::exists(filePath)) { + LOG(LogRank::Error) << "File path does not exist"; + return false; + } + auto readPermission = fs::status(filePath).permissions() & fs::perms::owner_read; + if (readPermission == fs::perms::none) { + LOG(LogRank::Error) << "File not permit to read"; + return false; + } + return true; +} + +std::ifstream FileParser::OpenFileSafe(const std::string &filePath) { + std::ifstream file; + file.setstate(std::ios::badbit); + if (!CheckFilePathVaild(filePath)) { + LOG(LogRank::Error) << "Open file failed"; + return file; + } + file = std::ifstream(filePath, std::ios::in | std::ios::binary); + return file; +} + +bool FileParser::ParseData(const std::string &filePath, uint64_t &offset) { + std::ifstream file = OpenFileSafe(filePath); + if (!file.is_open()) { + LOG(LogRank::Warning) << "Parse data faild, open file error"; + return false; + } + file.seekg(static_cast(offset), std::ios::beg); + std::string recordStr; + while (file && ReadRecord(recordStr, file)) { + if (!ParseRecord(std::move(recordStr))) { + break; + } + std::streampos index = file.tellg(); + if (index != -1) { + offset = index; + } + } + // 读到二进制数据之后转换成tag - graph 的映射方便前端画图 + return true; +} + +bool FileParser::ReadCheckSumRecord(std::ifstream &input, std::vector &buffer, size_t size) { + if (!input) { + return false; + } + if (size > std::numeric_limits::max()) { + LOG(LogRank::Error) << "Read data exceed limit"; + return false; + } + + buffer.clear(); + buffer.resize(size + 1); + input.read(buffer.data(), static_cast(size)); + if (input.gcount() != size) { + return false; + } + uint32_t ccrc = 0; + input.read(reinterpret_cast(&ccrc), sizeof(uint32_t)); + if (input.gcount() != sizeof(uint32_t)) { + return false; + } + return true; +} + +std::set FileParser::GetTags() +{ + return tags; +} + +std::map FileParser::GetTagToHistoGraph() +{ + return tagTohistoGraph; +} diff --git a/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/FileParser.h b/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/FileParser.h new file mode 100644 index 0000000000000000000000000000000000000000..2042bbcbeaf4518add830d9886379f38c000281f --- /dev/null +++ b/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/FileParser.h @@ -0,0 +1,56 @@ +/* +* Copyright (c), Huawei Technologies Co., Ltd. 2024-2024.All rights reserved. + */ +#ifndef FILEPARSER_H +#define FILEPARSER_H + +#include +#include +#include +#include +#include +#include "defs/HistoConceptDefs.h" + +namespace Insight::Histogram::Parser { +/** + * @brief This base class of data file parser + */ +class FileParser { +public: + FileParser() : type_(ParseDataType::Unknown) {} + + /** + * @brief open file stream safely + * @param filePath + * @return + */ + std::ifstream OpenFileSafe(const std::string &filePath); + /** + * + * @param filePath + * @param[in/out] offset file read offset, update after parsed + * @return + */ + + bool ParseData(const std::string &filePath, uint64_t &offset); + std::set GetTags(); + std::map GetTagToHistoGraph(); + + virtual bool ReadRecord(std::string &recordStr, std::ifstream &input) = 0; + static bool ReadCheckSumRecord(std::ifstream &input, std::vector &buffer, size_t size); + virtual bool ParseRecord(std::string &&record) = 0; + virtual ~FileParser() = default; + +protected: + std::set tags; + std::map tagTohistoGraph; + +private: + bool CheckFilePathVaild(const std::string &filePath); + +public: + ParseDataType type_; +}; +} + +#endif //FILEPARSER_H diff --git a/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/MindsporeParser.cpp b/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/MindsporeParser.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3e25b68a555ea488e68ecfbb34daf5320eb7ccb1 --- /dev/null +++ b/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/MindsporeParser.cpp @@ -0,0 +1,59 @@ +/* +* Copyright (c), Huawei Technologies Co., Ltd. 2024-2024.All rights reserved. + */ +#include "proto/mindspore_summary.pb.h" +#include "Logger.h" +#include "MindsporeParser.h" + +using namespace Insight::Histogram::Parser; +using namespace Insight::Histogram; +using namespace Insight; + +bool MindsporeParser::ReadRecord(std::string &eventStr, std::ifstream &input) { + /* + * The structure of tf event file: + * | data length | CRC sum | pb data | CRC sum | + * | uint64_t | uint32_t | .... | uint32_t | + */ + std::vector record; + if (!ReadCheckSumRecord(input, record, sizeof(uint64_t))) { + return false; + } + uint64_t length = 0; + memcpy(&length, record.data(), sizeof(uint64_t)); + if (!ReadCheckSumRecord(input, record, length)) { + return false; + } + eventStr = std::string(record.data(), length); + return true; +} + +bool MindsporeParser::ParseRecord(std::string &&record) { + mindspore::irpb::Event event; + if (!event.ParseFromString(record)) { + LOG(LogRank::Error) << "Can't convert str to tensorflow event"; + return false; + } + uint64_t step = event.step(); + if (!event.has_summary()) { + return true; + } + for (const auto& value : event.summary().value()) { + if (!value.has_histogram()) { + continue; + } + // 存储这个文件的相关tag + tags.insert(value.tag()); + // 获取原始的histo 并存储这些原始数据到tagTohistolines里 + const auto& histogram = value.histogram(); + // 由tag 到直方图的每一条线的对应关系存储数据 + // 如果这个tag到图的对应关系没有,就新建一个 + if (tagTohistoGraph.find(value.tag()) == tagTohistoGraph.end()) { + tagTohistoGraph[value.tag()] = HistogramGraph(); + } + HistogramLine line(step, histogram); + tagTohistoGraph[value.tag()].AddValue(line); + } + return true; +} + diff --git a/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/MindsporeParser.h b/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/MindsporeParser.h new file mode 100644 index 0000000000000000000000000000000000000000..6f5209ce42ec8b64de1659dc0863f5b4ddd54392 --- /dev/null +++ b/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/MindsporeParser.h @@ -0,0 +1,30 @@ +/* +* Copyright (c), Huawei Technologies Co., Ltd. 2024-2024.All rights reserved. + */ +#ifndef MINDSPOREPARSER_H +#define MINDSPOREPARSER_H + +#include "FileParser.h" +#include "defs/HistoConceptDefs.h" + +namespace Insight::Histogram::Parser { + class MindsporeParser final : public FileParser { + public: + MindsporeParser() { + type_ = ParseDataType::MindSpore_Summary; + } + + ~MindsporeParser() override = default; + + bool ReadRecord(std::string &eventStr, std::ifstream &input) override; + private: + /** + * @brief check whether contains scalar value + * @param event MindSpore_Summary event object + * @return true for success + */ + bool ParseRecord(std::string &&record) override; + }; +} + +#endif //MINDSPOREPARSER_H \ No newline at end of file diff --git a/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/ParserFactory.h b/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/ParserFactory.h new file mode 100644 index 0000000000000000000000000000000000000000..de53be94d92e76ebe4143ee1cc878631ed085b61 --- /dev/null +++ b/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/ParserFactory.h @@ -0,0 +1,60 @@ +/* +* Copyright (c), Huawei Technologies Co., Ltd. 2024-2024.All rights reserved. + */ +#ifndef PARSERFACTORY_H +#define PARSERFACTORY_H + +#include +#include +#include "defs/HistoConceptDefs.h" +#ifdef _WIN32 +#include +namespace fs = std::filesystem; +#else + +#include + +namespace fs = std::experimental::filesystem; +#endif +#include "TFEventParser.h" +#include "MindsporeParser.h" + +namespace Insight::Histogram::Parser { +using namespace Insight::Histogram; + +class ParserFactory { +public: + std::shared_ptr CreateFileParse(std::string_view filePath) { + ParseDataType type = GetFileType(filePath); + if (parsers_.find(type) == parsers_.end()) { + return nullptr; + } + return parsers_[type]; + } +private: + inline ParseDataType GetFileType(std::string_view filePath) { + for (const auto &[k, v]: fileTypeMap_) { + std::regex regex(k); + std::smatch match; + std::string fileName = fs::path(filePath).filename().string(); + if (std::regex_search(fileName, match, regex)) { + return v; + } + } + return ParseDataType::Unknown; + } + + std::map > parsers_ = { + {ParseDataType::TF_EVENT, std::make_shared()}, + {ParseDataType::MindSpore_Summary, std::make_shared()}, + {ParseDataType::Unknown, nullptr} + }; + + std::map fileTypeMap_ = { + {R"(out.tfevent)", ParseDataType::TF_EVENT}, + {R"(out.events.summary)", ParseDataType::MindSpore_Summary} + }; +}; +} + +#endif //PARSERFACTORY_H diff --git a/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/TFEventParser.cpp b/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/TFEventParser.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c5b8e2a3dc639c5d6653d651404e7b50db1c4d60 --- /dev/null +++ b/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/TFEventParser.cpp @@ -0,0 +1,59 @@ +/* +* Copyright (c), Huawei Technologies Co., Ltd. 2024-2024.All rights reserved. + */ +#include + +#include "TFEventParser.h" +#include "proto/event.pb.h" +#include "Logger.h" + +using namespace Insight::Histogram::Parser; +using namespace Insight::Histogram; +using namespace Insight; + +bool TFEventParser::ReadRecord(std::string &eventStr, std::ifstream &input) { + /* + * The structure of tf event file: + * | data length | CRC sum | pb data | CRC sum | + * | uint64_t | uint32_t | .... | uint32_t | + */ + std::vector record; + if (!ReadCheckSumRecord(input, record, sizeof(uint64_t))) { + return false; + } + uint64_t length = 0; + memcpy(&length, record.data(), sizeof(uint64_t)); + if (!ReadCheckSumRecord(input, record, length)) { + return false; + } + eventStr = std::string(record.data(), length); + return true; +} + +bool TFEventParser::ParseRecord(std::string &&record) { + tensorboard::Event event; + if (!event.ParseFromString(record)) { + LOG(LogRank::Error) << "Can't convert str to tensorflow event"; + return false; + } + uint64_t step = event.step(); + if (!event.has_summary()) { + return true; + } + for (const auto& value : event.summary().value()) { + if (!value.has_histo()) { + continue; + } + // 存储这个文件的相关tag + tags.insert(value.tag()); + // 获取原始的histo 并存储这些原始数据到tagTohistolines里 + const auto& histogram = value.histo(); + // 由tag 到直方图的每一条线的对应关系存储数据 + if (tagTohistoGraph.find(value.tag()) == tagTohistoGraph.end()) { + tagTohistoGraph[value.tag()] = HistogramGraph(); + } + HistogramLine line(step, histogram); + tagTohistoGraph[value.tag()].AddValue(line); + } + return true; +} diff --git a/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/TFEventParser.h b/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/TFEventParser.h new file mode 100644 index 0000000000000000000000000000000000000000..de0dab93bab730d08b4e8165e72e2a264de88f80 --- /dev/null +++ b/plugins/mindstudio-insight-plugins/Histogram/server/src/histoParser/TFEventParser.h @@ -0,0 +1,30 @@ +/* +* Copyright (c), Huawei Technologies Co., Ltd. 2024-2024.All rights reserved. + */ +#ifndef TFEVENTPARSER_H +#define TFEVENTPARSER_H + +#include "FileParser.h" +#include "defs/HistoConceptDefs.h" + +namespace Insight::Histogram::Parser { +class TFEventParser final : public FileParser { +public: + TFEventParser() { + type_ = ParseDataType::TF_EVENT; + } + + ~TFEventParser() override = default; + + bool ReadRecord(std::string &eventStr, std::ifstream &input) override; +private: + /** + * @brief check whether contains scalar value + * @param event tf event object + * @return true for success + */ + bool ParseRecord(std::string &&record) override; +}; +} + +#endif //TFEVENTPARSER_H