diff --git a/accuracy_tools/msprobe/csrc/atb_probe/Override.cpp b/accuracy_tools/msprobe/csrc/atb_probe/Override.cpp new file mode 100644 index 0000000000000000000000000000000000000000..47f300fb404128fa4daf6933cc84a3eb0389b7d5 --- /dev/null +++ b/accuracy_tools/msprobe/csrc/atb_probe/Override.cpp @@ -0,0 +1,247 @@ +/* + * Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "atb_probe/Override.h" + +#include +#include + +#include "atb_probe/include/Helper.h" +#include "atb_probe/include/SaveExtra.h" +#include "atb_probe/include/SaveGraph.h" +#include "atb_probe/include/SaveTensor.h" +#include "atb_probe/include/Stat.h" +#include "common/Toolkit.h" +#include "utils/Constant.h" +#include "utils/DataType.h" +#include "utils/IO.h" +#include "utils/Log.h" +#include "utils/Path.h" +#include "utils/Str.h" + +namespace atb { + // overflow + bool atb::Probe::IsOverflowCheck() { + // ⬇ Priority handling logic. + Kit::SetLogLevel(); + // ⬆ The first function to be called, sets the global log level. + const std::string taskEnvVar = StrSpace::GetEnvVar(Cst::LINK_DUMP_TASK); + return taskEnvVar == Cst::TASK_OVERFLOW; + } + + bool atb::Probe::ReportOperationGraphEnable() { + return true; + } + + void atb::Probe::ReportOperationGraph(const std::string &opName, const std::string &graph) { + Utility::ordered_json graphJson = StrSpace::Str2Json(graph); + GraphSpace::CheckInputValid(opName, graphJson); + Utility::ordered_json constructedGraph = GraphSpace::Build(graphJson); + Utility::fs::path graphPath = Utility::GetMsprobeDir() / "model" / (opName + ".json"); + Utility::SafePath::MakeParentDir(graphPath); + Utility::SaveJson(constructedGraph, graphPath.string(), std::ios::out); + LOG_INFO << "Graph structure of " << opName << " is already built. Path: " << graphPath.c_str(); + } + + bool atb::Probe::IsTensorNeedSave(const std::vector &ids, const std::string &optype) { + const std::string opidEnvVar = StrSpace::GetEnvVar(Cst::LINK_SAVE_TENSOR_IDS); // 2_1_3,1,5_2 + const std::string opNameEnvVar = StrSpace::GetEnvVar(Cst::LINK_SAVE_TENSOR_RUNNER); // Lin,SelfAttention + if (opidEnvVar.empty() && opNameEnvVar.empty()) { + return true; + } + if (!opidEnvVar.empty() && TensorSpace::IsOpidMatch(ids, opidEnvVar)) { + return true; + } + if (!opNameEnvVar.empty() && TensorSpace::IsOpNameMatch(optype, opNameEnvVar)) { + return true; + } + return false; + } + + bool atb::Probe::IsSaveTensorDesc() { + return true; + } + + bool atb::Probe::IsExecuteCountInRange(const uint64_t executeCount) { + return StrSpace::IsValueInGoal(Cst::LINK_STEP, executeCount); + } + + bool atb::Probe::ReportOperationStatisticEnable() { + const std::string cpuProfFlag = StrSpace::GetEnvVar(Cst::LINK_SAVE_CPU_PROFILING); + if (cpuProfFlag.empty()) { + return false; + } + return StrSpace::Str2Int(cpuProfFlag.c_str(), 0, Cst::LINK_SAVE_CPU_PROFILING) != 0; + } + + void atb::Probe::ReportOperationSetupStatistic(const uint64_t executeCount, + const std::string &opname, + const std::string &st) { + const uint64_t realStep = executeCount - 1; + bool flag = atb::Probe::IsExecuteCountInRange(realStep); + if (!flag) { + return; + } + ExtraSpace::SaveCpuProf(realStep, opname, st); + } + + bool atb::Probe::ReportKernelIOTensorEnable() { + const std::string kernelFlag = StrSpace::GetEnvVar(Cst::LINK_SAVE_KERNEL_INFO); + if (kernelFlag.empty()) { + return false; + } + return StrSpace::Str2Int(kernelFlag.c_str(), 0, Cst::LINK_SAVE_KERNEL_INFO) != 0; + } + + void atb::Probe::ReportKernelIOTensor(const size_t executeCount, + const std::string &opName, + const std::string &opParam, + const std::vector &inTensors, + const std::vector &outTensors) { + bool flag = atb::Probe::IsExecuteCountInRange(executeCount); + if (!flag) { + return; + } + atb::Probe::OpInfo opInfo{opName, opParam, inTensors, outTensors}; + ExtraSpace::SaveInfo(executeCount, opInfo, "kernel_io_info.txt"); + } + + bool atb::Probe::ReportOperationIOTensorEnable() { + const std::string opFlag = StrSpace::GetEnvVar(Cst::LINK_SAVE_OP_INFO); + if (opFlag.empty()) { + return false; + } + return StrSpace::Str2Int(opFlag.c_str(), 0, Cst::LINK_SAVE_OP_INFO) != 0; + } + + void atb::Probe::ReportOperationIOTensor(const size_t executeCount, + const std::string &opName, + const std::string &opParam, + const std::vector &inTensors, + const std::vector &outTensors) { + bool flag = atb::Probe::IsExecuteCountInRange(executeCount); + if (!flag) { + return; + } + atb::Probe::OpInfo opInfo{opName, opParam, inTensors, outTensors}; + ExtraSpace::SaveInfo(executeCount, opInfo, "operation_io_info.txt"); + } + + bool atb::Probe::IsSaveTiling() { + return true; + } + + void atb::Probe::SaveTiling(const uint8_t *data, uint64_t dataSize, const std::string &filePath) { + // ⬇ Mandatory steps for providing dump information: 1. PID and rank 2. PID and dump.json. + Kit::PidTieRank::Add(filePath); + bool hasDumpJson = Kit::DumpJsonManager::Instance().IsHas(filePath); + if (!hasDumpJson) { + Types::ArgsDumpJsonInit args; + std::string bufferSize = StrSpace::GetEnvVar(Cst::LINK_BUFFER_SIZE); + args.bufferSize = StrSpace::Str2Int(bufferSize.c_str(), Cst::BUFFER_SIZE, Cst::LINK_BUFFER_SIZE); + args.task = StrSpace::GetEnvVar(Cst::LINK_DUMP_TASK); + args.level = StrSpace::GetEnvVar(Cst::LINK_DUMP_LEVEL); + args.framework = Cst::FRAMEWORK_MINDIELLM; + args.outputDir = Kit::GetRankDir(filePath); + Kit::DumpJsonManager::Instance().Create(filePath, args); + } + // ⬆ The above parts are unrelated to SaveTiling. + bool saveFlag = ExtraSpace::IsSaveTiling(); + if (!saveFlag) { + return; + } + bool validFlag = ExtraSpace::IsValidParam(data, dataSize, filePath); + if (!validFlag) { + return; + } + ExtraSpace::SaveTiling(data, dataSize, filePath); + } + + bool atb::Probe::IsSaveTensorBefore() { + return true; + } + + bool atb::Probe::IsSaveTensorData() { + return true; + } + + void atb::Probe::SaveTensor(const std::string &format, + const std::string &dtype, + const std::string &dims, + const void *hostData, + uint64_t dataSize, + const std::string &filePath) { + Types::TensorInfo inputFile{format, dtype, dims, hostData, dataSize, filePath}; + if (!TensorSpace::IsExpectedTensor(inputFile)) { + return; + } + const std::string taskFlag = StrSpace::GetEnvVar(Cst::LINK_DUMP_TASK); + if (taskFlag.empty()) { + return; + } + Types::TensorStats stat = Stat::Compute(inputFile); + Types::PathInfo pInfo = Kit::GetPathInfo(filePath); + Kit::DumpJson *dumpJson = Kit::DumpJsonManager::Instance().Get(filePath); + if (dumpJson != nullptr) { + dumpJson->UpdateStat(pInfo.nodeName, pInfo.inOut, pInfo.argsName, stat); + } else { + LOG_ERROR << "DumpJson is nullptr, can not update stat."; + } + if (taskFlag == Cst::TASK_TENSOR) { + dumpJson->AddTensorDir(inputFile.filePath); + TensorSpace::Save(inputFile); + } + } + + bool atb::Probe::IsSaveTensorAfter() { + return true; + } + + bool atb::Probe::IsSaveParam() { + return true; + } + + void atb::Probe::SaveParam(const std::string ¶m, const std::string &filePath) { + // ⬇ Temporary solution: Wait until everything finishes running before checking the cache. + Kit::DumpJson *dumpJson = Kit::DumpJsonManager::Instance().Get(filePath); + if (dumpJson != nullptr) { + dumpJson->Flush(); + } else { + LOG_ERROR << "DumpJson is nullptr, can not flush cache."; + } + // ⬆ + bool flag = ExtraSpace::IsSaveParam() && Kit::IsPathInGoal(filePath); + if (!flag) { + return; + } + std::string newFilePath = Kit::GetNewPathForSave(filePath, Cst::SUBDIRNAME_DUMP_TENSOR); + Utility::SafePath::MakeParentDir(newFilePath); + Utility::SaveJson(StrSpace::Str2Json(param), newFilePath, std::ios::out); + } + + bool atb::Probe::IsOverflowStop() { + const std::string isExit = StrSpace::GetEnvVar(Cst::LINK_STOP); + if (isExit.empty()) { + return false; + } + return StrSpace::Str2Int(isExit.c_str(), 0, Cst::LINK_STOP) != 0; + } + + void atb::Probe::ReportOverflowKernel(const std::string &kernelPath) { + return; + } + +} // namespace atb diff --git a/accuracy_tools/msprobe/csrc/atb_probe/Override.h b/accuracy_tools/msprobe/csrc/atb_probe/Override.h new file mode 100644 index 0000000000000000000000000000000000000000..312258bfb859de2a2dcb543808c715da2feea583 --- /dev/null +++ b/accuracy_tools/msprobe/csrc/atb_probe/Override.h @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ATB_PROBE_OVERRIDE_H +#define ATB_PROBE_OVERRIDE_H + +#include +#include + +#define EXPORT_LLM __attribute__((visibility("default"))) + +namespace atb { + class Probe { + public: + struct Tensor { + std::string dtype; + std::string format; + std::string shape; + std::string filePath; + }; + + struct OpInfo { + const std::string &opName; + const std::string &opParam; + const std::vector &inTensors; + const std::vector &outTensors; + }; + + public: + EXPORT_LLM static bool IsOverflowCheck(); + EXPORT_LLM static bool ReportOperationGraphEnable(); + EXPORT_LLM static void ReportOperationGraph(const std::string &opName, const std::string &graph); + EXPORT_LLM static bool IsTensorNeedSave(const std::vector &ids, const std::string &optype); + EXPORT_LLM static bool IsSaveTensorDesc(); + EXPORT_LLM static bool IsExecuteCountInRange(const uint64_t executeCount); + EXPORT_LLM static bool IsSaveTiling(); + EXPORT_LLM static void SaveTiling(const uint8_t *data, uint64_t dataSize, const std::string &filePath); + EXPORT_LLM static bool ReportOperationStatisticEnable(); + EXPORT_LLM static void + ReportOperationSetupStatistic(const uint64_t executeCount, const std::string &opname, const std::string &st); + EXPORT_LLM static bool IsSaveTensorBefore(); + EXPORT_LLM static bool IsSaveTensorData(); + EXPORT_LLM static void SaveTensor(const std::string &format, + const std::string &dtype, + const std::string &dims, + const void *hostData, + uint64_t dataSize, + const std::string &filePath); + EXPORT_LLM static bool IsSaveTensorAfter(); + EXPORT_LLM static bool ReportOperationIOTensorEnable(); + EXPORT_LLM static void ReportOperationIOTensor(const size_t executeCount, + const std::string &opName, + const std::string &opParam, + const std::vector &inTensors, + const std::vector &outTensors); + EXPORT_LLM static bool ReportKernelIOTensorEnable(); + EXPORT_LLM static void ReportKernelIOTensor(const size_t executeCount, + const std::string &opName, + const std::string &opParam, + const std::vector &inTensors, + const std::vector &outTensors); + + EXPORT_LLM static bool IsSaveParam(); + EXPORT_LLM static void SaveParam(const std::string ¶m, const std::string &filePath); + + EXPORT_LLM static bool IsOverflowStop(); + EXPORT_LLM static void ReportOverflowKernel(const std::string &kernelPath); + }; +} // namespace atb + +#endif diff --git a/accuracy_tools/msprobe/csrc/atb_probe/core/Helper.cpp b/accuracy_tools/msprobe/csrc/atb_probe/core/Helper.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0f858079e48f070413f9d9206ffcde95cc18cce1 --- /dev/null +++ b/accuracy_tools/msprobe/csrc/atb_probe/core/Helper.cpp @@ -0,0 +1,287 @@ +/* + * Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "atb_probe/include/Helper.h" + +#include +#include +#include +#include + +#include "utils/Constant.h" +#include "utils/Exception.h" +#include "utils/IO.h" +#include "utils/Log.h" +#include "utils/Path.h" +#include "utils/Str.h" + +namespace Kit { + inline const uint8_t STEP_LOCATION = 2; + + static std::string ExtractStepFromFilePath(const std::string &filePath) { + // filePath: 3_2035814/5/2_PositionalEmbeddingGather/xxxxxxxx + std::vector dirVec = StrSpace::Split(filePath, "/"); + if (dirVec.size() >= STEP_LOCATION) { + return dirVec[1]; + } else { + LOG_ERROR << "Invalid file path: " << filePath; + return ""; + } + } + + static bool IsValidStep(const std::string &filePath) { + std::string step = ExtractStepFromFilePath(filePath); + return StrSpace::IsValueInGoal(Cst::LINK_STEP, step); + } + + static std::string ExtractRankFromFilePath(const std::string &filePath) { + // filePath: 3_2035814/5/2_PositionalEmbeddingGather/xxxxxxxx + size_t pos = filePath.find('_'); + if (pos != std::string::npos) { + return filePath.substr(0, pos); + } else { + LOG_ERROR << "Invalid file path: " << filePath; + return ""; + } + } + + static std::string GetNodeName(const std::vector &dirVec) { + std::vector nameVec; + for (size_t i = STEP_LOCATION; i < dirVec.size(); ++i) { + if (dirVec[i] == Cst::BEFORE || dirVec[i] == Cst::AFTER) + break; + nameVec.push_back(dirVec[i]); + } + return StrSpace::Join(nameVec, "/"); + } + + bool IsValidRank(const std::string &filePath) { + std::string rank = ExtractRankFromFilePath(filePath); + return StrSpace::IsValueInGoal(Cst::LINK_RANK, rank); + } + + bool IsPathInGoal(const std::string &filePath) { + return IsValidStep(filePath) && IsValidRank(filePath); + } + + Utility::fs::path GetRankDir(const std::string &filePath) { + // filePath: 3_2035814/5/2_PositionalEmbeddingGather/before/intensor0.bin + Utility::fs::path newPath; + newPath /= "step" + ExtractStepFromFilePath(filePath); + newPath /= "rank" + ExtractRankFromFilePath(filePath); + return Utility::GetMsprobeDir() / newPath; + } + + std::unordered_map PidTieRank::g_pidWithRankMap; + + void PidTieRank::Add(const std::string &filePath) { + // filePath: 3_2035814/5/2_PositionalEmbeddingGather/xxxxxxxx + std::string rank = ExtractRankFromFilePath(filePath); + pid_t pid = getpid(); + auto it = g_pidWithRankMap.find(pid); + if (it == g_pidWithRankMap.end()) { + g_pidWithRankMap[pid] = rank; + } else { + if (it->second != rank) { + LOG_WARNING << "The PID: " << pid << " is already associated with a different rank. " + << "Existing rank: " << it->second << ", New rank: " << rank; + } + } + } + + std::string PidTieRank::Get(const pid_t &pid) { + if (g_pidWithRankMap.find(pid) != g_pidWithRankMap.end()) { + return g_pidWithRankMap[pid]; + } else { + LOG_ERROR << "No association between PID and Rank has been established."; + return ""; + } + } + + Types::PathInfo GetPathInfo(const std::string &filePath) { + Types::PathInfo res; + std::vector dirVec = StrSpace::Split(filePath, "/"); + + for (size_t i = 0; i < dirVec.size(); ++i) { + const std::string &part = dirVec[i]; + if (part != Cst::BEFORE && part != Cst::AFTER) { + continue; + } + res.nodeName = (i > 0) ? GetNodeName(dirVec) : ""; + if (i + 1 >= dirVec.size()) { + LOG_ERROR << "Missing file after '" << part << "' in path: " << filePath; + break; + } + std::string fileName = dirVec[i + 1]; + std::vector fileVec = StrSpace::Split(fileName, "."); + const std::string argName = (fileVec.size() == Cst::ARGS_LEN_2) ? fileVec[0] : ""; + if (argName.find(Cst::INTENSOR) != std::string::npos) { + res.inOut = "input_args"; + } else if (argName.find(Cst::OUTTENSOR) != std::string::npos) { + res.inOut = "output_args"; + } else { + LOG_ERROR << "Unknown tensor direction in file name: " << filePath; + } + res.argsName = argName; + return res; + } + LOG_ERROR << "Invalid file path: " << filePath; + return Types::PathInfo{"", "", ""}; + } + + Utility::fs::path GetNewPathForSave(const Utility::fs::path &originalPath, const std::string &subDirName) { + // originalPath: 3_2035814/5/2_PositionalEmbeddingGather/xxxxxxxx + auto iter = originalPath.begin(); + std::string rankStr = iter->string(); // 3_2035814 + std::string rank = rankStr.substr(0, rankStr.find('_')); // extract 3 as rank + ++iter; + std::string step = iter->string(); // 5 + ++iter; + Utility::fs::path newPath; + newPath /= "step" + step; + newPath /= "rank" + rank; + newPath /= subDirName; + while (iter != originalPath.end()) { + newPath /= *iter; + ++iter; + } + return Utility::GetMsprobeDir() / newPath; + } + + std::vector GetColumns(const std::unordered_map &kvMap) { + std::vector columns; + for (const auto &pair : kvMap) { + columns.push_back(pair.first); + } + return columns; + } + + std::vector GetElement(const std::unordered_map &kvMap, + const std::vector &columns) { + std::vector formattedLine; + for (const auto &column : columns) { + auto it = kvMap.find(column); + if (it != kvMap.end()) { + formattedLine.push_back(it->second); + } else { + formattedLine.push_back(""); + } + } + return formattedLine; + } + + std::mutex DumpJson::mutex_; + + DumpJson::DumpJson(const size_t &bufferSize, + const std::string &task, + const std::string &level, + const std::string &framework, + const std::string &outputDir) + : formalBufferSize(bufferSize), formalCurrentSize(0), formalOutputDir(outputDir) { + formalCache["task"] = task; + formalCache["level"] = level; + formalCache["framework"] = framework; + formalCache["dump_data_dir"] = ""; + formalCache["data"] = nlohmann::json::object(); + } + + void DumpJson::AddTensorDir(const std::string &filePath) { + auto it = formalCache.find("dump_data_dir"); + if (it == formalCache.end() || !it.value().is_string() || it.value().get().empty()) { + std::string tensorDir = Kit::GetRankDir(filePath) / Cst::SUBDIRNAME_DUMP_TENSOR; + formalCache["dump_data_dir"] = tensorDir; + } + } + + void DumpJson::UpdateStat(const std::string &nodeName, + const std::string &inOut, + const std::string &dataName, + const Types::TensorStats &stats) { + Utility::ordered_json statJson = { + {"data_name", dataName}, + {"type", stats.type}, + {"dtype", stats.dtype}, + {"shape", stats.shape}, + {"Max", stats.max}, + {"Min", stats.min}, + {"Mean", stats.mean}, + {"Norm", stats.norm}, + }; + if (!stats.crc32.empty()) { + statJson["crc32"] = stats.crc32; + } + + std::lock_guard lock(mutex_); + if (!formalCache["data"].contains(nodeName)) { + formalCache["data"][nodeName] = Utility::json::object(); + } + if (!formalCache["data"][nodeName].contains(inOut)) { + formalCache["data"][nodeName][inOut] = Utility::json::array(); + } + formalCurrentSize += statJson.dump().size(); + formalCache["data"][nodeName][inOut].emplace_back(std::move(statJson)); + Flush(formalBufferSize); + } + + void DumpJson::Flush(size_t flushThreshold) { + if (formalCurrentSize > flushThreshold) { + std::string dumpJsonPath = formalOutputDir + "/dump.json"; + Utility::SafePath::MakeParentDir(dumpJsonPath); + Utility::SaveJson(formalCache, dumpJsonPath, std::ios_base::out); + formalCurrentSize = 0; + } + } + + DumpJsonManager &DumpJsonManager::Instance() { + static DumpJsonManager instance; + return instance; + } + + std::unordered_map> DumpJsonManager::g_pidWithDumpJsonMap; + std::mutex DumpJsonManager::mutex_; + + bool DumpJsonManager::IsHas(const std::string &filePath) { + std::lock_guard lock(mutex_); + std::string key; + key += "step" + ExtractStepFromFilePath(filePath); + key += "rank" + ExtractStepFromFilePath(filePath); + LOG_DEBUG << "DumpJson for StepRank " << key; + return g_pidWithDumpJsonMap.find(key) != g_pidWithDumpJsonMap.end(); + } + + void DumpJsonManager::Create(const std::string &filePath, const Types::ArgsDumpJsonInit &args) { + std::lock_guard lock(mutex_); + std::string key; + key += "step" + ExtractStepFromFilePath(filePath); + key += "rank" + ExtractStepFromFilePath(filePath); + if (g_pidWithDumpJsonMap.find(key) == g_pidWithDumpJsonMap.end()) { + g_pidWithDumpJsonMap[key] = + std::make_unique(args.bufferSize, args.task, args.level, args.framework, args.outputDir); + } else { + LOG_WARNING << "DumpJson for StepRank " << key << " already exists."; + } + } + + DumpJson *DumpJsonManager::Get(const std::string &filePath) { + std::lock_guard lock(mutex_); + std::string key; + key += "step" + ExtractStepFromFilePath(filePath); + key += "rank" + ExtractStepFromFilePath(filePath); + auto it = g_pidWithDumpJsonMap.find(key); + return (it != g_pidWithDumpJsonMap.end()) ? it->second.get() : nullptr; + } + +} // namespace Kit diff --git a/accuracy_tools/msprobe/csrc/atb_probe/include/Helper.h b/accuracy_tools/msprobe/csrc/atb_probe/include/Helper.h new file mode 100644 index 0000000000000000000000000000000000000000..28f953f8bb769ed8abe6a3bbbd951fb7ca14297c --- /dev/null +++ b/accuracy_tools/msprobe/csrc/atb_probe/include/Helper.h @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef KIT_HELPER_H +#define KIT_HELPER_H + +#include +#include +#include + +#include "utils/IO.h" +#include "utils/Path.h" + +namespace Kit { + bool IsValidRank(const std::string &filePath); + bool IsPathInGoal(const std::string &filePath); + Utility::fs::path GetRankDir(const std::string &filePath); + + class PidTieRank { + public: + static void Add(const std::string &filePath); + static std::string Get(const pid_t &pid); + + private: + static std::unordered_map g_pidWithRankMap; + }; + + Types::PathInfo GetPathInfo(const std::string &filePath); + Utility::fs::path GetNewPathForSave(const Utility::fs::path &original_path, const std::string &subDirName); + std::vector GetColumns(const std::unordered_map &kvMap); + std::vector GetElement(const std::unordered_map &kvMap, + const std::vector &columns); + + class DumpJson { + public: + DumpJson(const size_t &bufferSize, + const std::string &task, + const std::string &level, + const std::string &framework, + const std::string &outputDir); + void AddTensorDir(const std::string &filePath); + void UpdateStat(const std::string &nodeName, + const std::string &inOut, + const std::string &dataName, + const Types::TensorStats &stats); + void Flush(size_t flushThreshold = 0); + + private: + size_t formalBufferSize; + size_t formalCurrentSize; + std::string formalOutputDir; + Utility::ordered_json formalCache; + static std::mutex mutex_; + }; + + class DumpJsonManager { + public: + static DumpJsonManager &Instance(); + bool IsHas(const std::string &filePath); + void Create(const std::string &filePath, const Types::ArgsDumpJsonInit &args); + DumpJson *Get(const std::string &filePath); + + private: + DumpJsonManager() = default; + ~DumpJsonManager() = default; + DumpJsonManager(const DumpJsonManager &) = delete; + DumpJsonManager &operator=(const DumpJsonManager &) = delete; + static std::unordered_map> g_pidWithDumpJsonMap; + static std::mutex mutex_; + }; +} // namespace Kit + +#endif diff --git a/accuracy_tools/msprobe/csrc/atb_probe/include/SaveExtra.h b/accuracy_tools/msprobe/csrc/atb_probe/include/SaveExtra.h new file mode 100644 index 0000000000000000000000000000000000000000..28e5eb37cd65f6c964a7301203ba5abd16e2af6b --- /dev/null +++ b/accuracy_tools/msprobe/csrc/atb_probe/include/SaveExtra.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SAVE_TILING_H +#define SAVE_TILING_H + +#include +#include + +#include "atb_probe/Override.h" + +namespace atb { + namespace ExtraSpace { + bool IsSaveTiling(); + bool IsValidParam(const uint8_t *data, const uint64_t &dataSize, const std::string &filePath); + void SaveTiling(const uint8_t *data, const uint64_t &dataSize, const std::string &filePath); + void SaveCpuProf(const uint64_t &executeCount, const std::string &opName, const std::string &st); + void SaveInfo(const uint64_t &executeCount, const atb::Probe::OpInfo &opInfo, const std::string &fileName); + bool IsSaveParam(); + } // namespace ExtraSpace +} // namespace atb + +#endif diff --git a/accuracy_tools/msprobe/csrc/atb_probe/include/SaveGraph.h b/accuracy_tools/msprobe/csrc/atb_probe/include/SaveGraph.h new file mode 100644 index 0000000000000000000000000000000000000000..5557d3abb62ab462bbb7e630f552cbf43a7975e2 --- /dev/null +++ b/accuracy_tools/msprobe/csrc/atb_probe/include/SaveGraph.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SAVE_GRAPH_H +#define SAVE_GRAPH_H + +#include + +#include "utils/IO.h" + +namespace GraphSpace { + void CheckInputValid(const std::string &opName, const Utility::ordered_json &graphJson); + void RegisterLayer(const std::string &opName, const std::string &graph); + Utility::ordered_json Build(const Utility::ordered_json &graphJson); +} // namespace GraphSpace + +#endif diff --git a/accuracy_tools/msprobe/csrc/atb_probe/include/SaveTensor.h b/accuracy_tools/msprobe/csrc/atb_probe/include/SaveTensor.h new file mode 100644 index 0000000000000000000000000000000000000000..0d92ba497b486a386258811eb9c9f6527eb0473a --- /dev/null +++ b/accuracy_tools/msprobe/csrc/atb_probe/include/SaveTensor.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SAVE_TENSOR_H +#define SAVE_TENSOR_H + +#include +#include +#include + +#include "utils/DataType.h" + +namespace TensorSpace { + bool IsOpidMatch(const std::vector &ids, const std::string &opidStr); + bool IsOpNameMatch(const std::string &optype, const std::string &opNameStr); + bool IsExpectedTensor(const Types::TensorInfo &inputFile); + void Save(const Types::TensorInfo &inputFile); +} // namespace TensorSpace + +#endif diff --git a/accuracy_tools/msprobe/csrc/atb_probe/include/Stat.h b/accuracy_tools/msprobe/csrc/atb_probe/include/Stat.h new file mode 100644 index 0000000000000000000000000000000000000000..0da704227d005f5091382280743f2ef55e9980c4 --- /dev/null +++ b/accuracy_tools/msprobe/csrc/atb_probe/include/Stat.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef STATS_H +#define STATS_H + +#include + +#include "utils/DataType.h" +#include "utils/IO.h" + +namespace Stat { + Types::TensorStats Compute(const Types::TensorInfo &info); +} // namespace Stat + +#endif