From 802290cea5cb1d98996e01f61014bcd8d8c054d9 Mon Sep 17 00:00:00 2001 From: lining23666 Date: Wed, 1 Feb 2023 16:50:12 +0800 Subject: [PATCH] support graph cache for tf_adapter --- tf_adapter/kernels/geop_npu.cc | 5 ++++ .../tests/st/util/testcase/npu_attrs_test.cc | 3 +++ .../tests/ut/util/testcase/npu_attrs_test.cc | 27 +++++++++++++++++++ tf_adapter/util/npu_attrs.cc | 22 +++++++++++++-- tf_adapter_2.x/npu_device/core/npu_device.cpp | 14 +++++++--- .../npu_device/core/npu_wrapper.cpp | 3 ++- .../python/npu_device/configs/npu_config.py | 1 + tf_adapter_2.x/tests/st/adapter2_options.py | 7 +++++ tf_adapter_2.x/tests/st/adapter2_st.py | 1 + .../tests/ut/testcases/test_npu_device.cpp | 1 + 10 files changed, 77 insertions(+), 7 deletions(-) diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index a53d478eb..6d3698aca 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -853,6 +853,11 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { ge_graph.SetNeedIteration(this->need_iteration_); } + const auto cahce_option_iter = sess_options_.find("ge.graph_compiler_cache_dir"); + if (cahce_option_iter != sess_options_.cend() && !cahce_option_iter->second.empty()) { + graph_options_["ge.graph_key"] = geop_name; + } + if (is_host_graph_) { ADP_LOG(INFO) << "[GEOP] set graph option."; graph_options_["ge.exec.placement"] = "HOST"; diff --git a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc index aff1db6be..c120ffe36 100644 --- a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc @@ -130,6 +130,9 @@ TEST_F(NpuAttrTest, SetNpuOptimizerAttrInvalidEnableOnlineInference) { AttrValue optypelist_for_implmode = AttrValue(); optypelist_for_implmode.set_s("Pooling,SoftmaxV2"); (*custom_config->mutable_parameter_map())["optypelist_for_implmode"] = optypelist_for_implmode; + AttrValue graph_compiler_cache_dir = AttrValue(); + graph_compiler_cache_dir.set_s("./cache_dir"); + (*custom_config->mutable_parameter_map())["graph_compiler_cache_dir"] = graph_compiler_cache_dir; s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); EXPECT_EQ(s.ok(), false); } diff --git a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc index cf6b30c70..f6b1de891 100644 --- a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc @@ -207,5 +207,32 @@ TEST_F(NpuAttrTest, SetNpuOptimizerAttrInvalidEnableOnlineInference) { s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); EXPECT_EQ(s.ok(), false); } + +TEST_F(NpuAttrTest, CheckGraphCompilerCacheDir) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + + AttrValue graph_compiler_cache_dir = AttrValue(); + graph_compiler_cache_dir.set_s("./cache_dir"); + (*custom_config->mutable_parameter_map())["graph_compiler_cache_dir"] = graph_compiler_cache_dir; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); + EXPECT_FALSE(s.ok()); + + AttrValueMap attr_map; + AttrValue npu_optimizer = AttrValue(); + npu_optimizer.set_s("NpuOptimizer"); + attr_map["_NpuOptimizer"] = npu_optimizer; + attr_map["_graph_compiler_cache_dir"] = graph_compiler_cache_dir; + AttrSlice attrs(&attr_map); + const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); + auto find_ret = all_options.find("graph_compiler_cache_dir"); + ASSERT_TRUE(find_ret != all_options.cend()); + EXPECT_EQ(find_ret->second, "./cache_dir"); +} } } // end tensorflow diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index ad611a61b..dbd01e3b2 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -400,6 +400,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr std::string external_weight = "0"; std::string graph_parallel_option_path; std::string enable_graph_parallel; + std::string graph_compiler_cache_dir; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { (void) ctx->GetAttr("_variable_format_optimize", &variable_format_optimize); @@ -464,6 +465,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr (void) ctx->GetAttr("_external_weight", &external_weight); (void) ctx->GetAttr("_graph_parallel_option_path", &graph_parallel_option_path); (void) ctx->GetAttr("_enable_graph_parallel", &enable_graph_parallel); + (void) ctx->GetAttr("_graph_compiler_cache_dir", &graph_compiler_cache_dir); } // session options @@ -516,6 +518,9 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr sess_options["external_weight"] = external_weight; sess_options["ge.graphParallelOptionPath"] = graph_parallel_option_path; sess_options["ge.enableGraphParallel"] = enable_graph_parallel; + if (!graph_compiler_cache_dir.empty()) { + sess_options["ge.graph_compiler_cache_dir"] = graph_compiler_cache_dir; + } return sess_options; } @@ -666,7 +671,6 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr init_options_["stream_sync_timeout"] = stream_sync_timeout; init_options_["event_sync_timeout"] = event_sync_timeout; init_options_["ge.esClusterConfig"] = es_cluster_config; - return init_options_; } @@ -1039,6 +1043,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string es_cluster_config; std::string graph_parallel_option_path; std::string enable_graph_parallel; + std::string graph_compiler_cache_dir; auto NpuOptimizer_value = attrs.Find("_NpuOptimizer"); auto enable_data_pre_proc_value = attrs.Find("_enable_data_pre_proc"); @@ -1122,6 +1127,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto external_weight_value = attrs.Find("_external_weight"); auto graph_parallel_option_path_val = attrs.Find("_graph_parallel_option_path"); auto enable_graph_parallel_val = attrs.Find("_enable_graph_parallel"); + auto graph_compiler_cache_dir_val = attrs.Find("_graph_compiler_cache_dir"); if (NpuOptimizer_value != nullptr) { do_npu_optimizer = "1"; @@ -1392,6 +1398,9 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (es_cluster_config_value != nullptr) { es_cluster_config = es_cluster_config_value->s(); } + if (graph_compiler_cache_dir_val != nullptr) { + graph_compiler_cache_dir = graph_compiler_cache_dir_val->s(); + } } all_options["variable_format_optimize"] = variable_format_optimize; @@ -1403,6 +1412,9 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (!variable_memory_max_size.empty()) { all_options["variable_memory_max_size"] = variable_memory_max_size; } + if (!graph_compiler_cache_dir.empty()) { + all_options["graph_compiler_cache_dir"] = graph_compiler_cache_dir; + } all_options["enable_dump"] = enable_dump; all_options["dump_path"] = dump_path; @@ -1588,6 +1600,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options std::string model_deploy_mode; std::string model_deploy_devicelist; std::string aoe_config_file; + std::string graph_compiler_cache_dir; int32_t stream_sync_timeout = -1; int32_t event_sync_timeout = -1; bool external_weight = false; @@ -2013,6 +2026,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options init_options_["es_cluster_config"] = es_cluster_config; init_options_["ge.esClusterConfig"] = es_cluster_config; } + if (params.count("graph_compiler_cache_dir") > 0) { + graph_compiler_cache_dir = params.at("graph_compiler_cache_dir").s(); + } } } @@ -2025,7 +2041,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options if (!variable_memory_max_size.empty()) { sess_options["variable_memory_max_size"] = variable_memory_max_size; } - + if (!graph_compiler_cache_dir.empty()) { + sess_options["graph_compiler_cache_dir"] = graph_compiler_cache_dir; + } sess_options["enable_dump"] = std::to_string(static_cast(enable_dump)); sess_options["dump_path"] = dump_path; sess_options["dump_step"] = dump_step; diff --git a/tf_adapter_2.x/npu_device/core/npu_device.cpp b/tf_adapter_2.x/npu_device/core/npu_device.cpp index f1d6f0f8e..709fa65be 100644 --- a/tf_adapter_2.x/npu_device/core/npu_device.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_device.cpp @@ -925,18 +925,24 @@ uint64_t NpuDevice::AddGeGraphInner(TFE_Context *context, uint64_t graph_id, con if (def.node_size() == 0) { return kEmptyGeGraphId; } + std::map graph_options(options); + const auto cache_dir_option = device_options.find("ge.graph_compiler_cache_dir"); + if ((cache_dir_option != device_options.cend() && !cache_dir_option->second.empty())) { + LOG(INFO) << "ge.graph_compiler_cache_dir is exist, add option ge.graph_key=" << name; + graph_options["ge.graph_key"] = name; + } ge::Graph ge_graph; NPU_CTX_REQUIRES_OK_RETURN(status, TransTfGraph2GeGraph(context, name, def, ge_graph), graph_id); ge_graph.SetNeedIteration(loop); - if (kDumpExecutionDetail && !options.empty()) { + if (kDumpExecutionDetail && !graph_options.empty()) { LOG(INFO) << "Add ge graph " << graph_id << " with options:"; - for (auto &option : options) { + for (auto &option : graph_options) { LOG(INFO) << " " << option.first << ":" << option.second; } } - NPU_CTX_REQUIRES_GE_OK_RETURN(status, "Graph engine Add graph", GeSession()->AddGraph(graph_id, ge_graph, options), - graph_id); + NPU_CTX_REQUIRES_GE_OK_RETURN(status, "Graph engine Add graph", + GeSession()->AddGraph(graph_id, ge_graph, graph_options), graph_id); return graph_id; } diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index 7ffadcdd7..3e82e2265 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -117,7 +117,8 @@ const std::map kConfigurableOptions = { {"graph_parallel_option_path", "ge.graphParallelOptionPath"}, {"enable_graph_parallel", "ge.enableGraphParallel"}, {"atomic_clean_policy", "ge.exec.atomicCleanPolicy"}, - {"static_memory_policy", "ge.exec.staticMemoryPolicy"}}; + {"static_memory_policy", "ge.exec.staticMemoryPolicy"}, + {"graph_compiler_cache_dir", "ge.graph_compiler_cache_dir"}}; } // namespace #undef PYBIND11_CHECK_PYTHON_VERSION diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index da5492e08..8c0931b88 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -66,6 +66,7 @@ class NpuConfig(NpuBaseConfig): self.event_sync_timeout = OptionValue(-1, None) self.external_weight = OptionValue(False, [True, False]) self.memory_config = MemoryConfig() + self.graph_compiler_cache_dir = OptionValue(None, None) # Configuration for experiment self.experimental = NpuExperimentalConfig() diff --git a/tf_adapter_2.x/tests/st/adapter2_options.py b/tf_adapter_2.x/tests/st/adapter2_options.py index 89c86982d..26097469f 100644 --- a/tf_adapter_2.x/tests/st/adapter2_options.py +++ b/tf_adapter_2.x/tests/st/adapter2_options.py @@ -122,6 +122,13 @@ class Adapter2Options(unittest.TestCase): options = config.as_dict() self.assertTrue(options['hcom_parallel'], False) + def test_9_set_option_graph_compiler_cache_dir(self): + config = NpuConfig() + options = config.as_dict() + self.assertTrue('graph_compiler_cache_dir' not in options, True) + config.graph_compiler_cache_dir = "./st_graph_cache_dir" + options = config.as_dict() + self.assertEqual(options['graph_compiler_cache_dir'], "./st_graph_cache_dir") if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/tf_adapter_2.x/tests/st/adapter2_st.py b/tf_adapter_2.x/tests/st/adapter2_st.py index dda926e5b..50401668d 100644 --- a/tf_adapter_2.x/tests/st/adapter2_st.py +++ b/tf_adapter_2.x/tests/st/adapter2_st.py @@ -35,6 +35,7 @@ npu_device.global_options().experimental.multi_branches_config.dynamic_node_type npu_device.global_options().experimental.multi_branches_config.dynamic_dims = "1;2" npu_device.global_options().aoe_config.work_path = "./" npu_device.global_options().graph_run_mode = 0 +npu_device.global_options().graph_compiler_cache_dir = "./st_graph_cache_dir"; os.environ['RANK_TABLE_FILE'] = "rankTable" os.environ['RANK_SIZE'] = "2" os.environ['RANK_ID'] = "1" diff --git a/tf_adapter_2.x/tests/ut/testcases/test_npu_device.cpp b/tf_adapter_2.x/tests/ut/testcases/test_npu_device.cpp index d3ce9f4c3..7fb87a85e 100644 --- a/tf_adapter_2.x/tests/ut/testcases/test_npu_device.cpp +++ b/tf_adapter_2.x/tests/ut/testcases/test_npu_device.cpp @@ -139,6 +139,7 @@ class ST_NpuDevice : public ::testing::Test { std::map device_options; device_options["ge.jobType"] = "1"; device_options["ge.tuningPath"] = "./"; + device_options["ge.graph_compiler_cache_dir"] = "./"; npu::CreateDevice(context, kNpuDeviceName, kNpuDeviceIndex, device_options); for (const auto &function_def : FunctionStrLibrary::Instance().Get()) { -- Gitee