diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index a53d478eb431d786a45ae1f4f0546f791790a97e..6d3698aca3478eeae8d6ffcc7eaa93f59a9f0715 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -853,6 +853,11 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { ge_graph.SetNeedIteration(this->need_iteration_); } + const auto cahce_option_iter = sess_options_.find("ge.graph_compiler_cache_dir"); + if (cahce_option_iter != sess_options_.cend() && !cahce_option_iter->second.empty()) { + graph_options_["ge.graph_key"] = geop_name; + } + if (is_host_graph_) { ADP_LOG(INFO) << "[GEOP] set graph option."; graph_options_["ge.exec.placement"] = "HOST"; diff --git a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc index aff1db6be2906e962335ca2b6db7f871898b51d2..c120ffe3680c7c07fefe94510d08f2bb13e1204d 100644 --- a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc @@ -130,6 +130,9 @@ TEST_F(NpuAttrTest, SetNpuOptimizerAttrInvalidEnableOnlineInference) { AttrValue optypelist_for_implmode = AttrValue(); optypelist_for_implmode.set_s("Pooling,SoftmaxV2"); (*custom_config->mutable_parameter_map())["optypelist_for_implmode"] = optypelist_for_implmode; + AttrValue graph_compiler_cache_dir = AttrValue(); + graph_compiler_cache_dir.set_s("./cache_dir"); + (*custom_config->mutable_parameter_map())["graph_compiler_cache_dir"] = graph_compiler_cache_dir; s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); EXPECT_EQ(s.ok(), false); } diff --git a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc index cf6b30c7005601f0c9fbaad7b0aa12863223ab14..f6b1de891fc2933c2a648f3be705728a5921804b 100644 --- a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc @@ -207,5 +207,32 @@ TEST_F(NpuAttrTest, SetNpuOptimizerAttrInvalidEnableOnlineInference) { s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); EXPECT_EQ(s.ok(), false); } + +TEST_F(NpuAttrTest, CheckGraphCompilerCacheDir) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + + AttrValue graph_compiler_cache_dir = AttrValue(); + graph_compiler_cache_dir.set_s("./cache_dir"); + (*custom_config->mutable_parameter_map())["graph_compiler_cache_dir"] = graph_compiler_cache_dir; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); + EXPECT_FALSE(s.ok()); + + AttrValueMap attr_map; + AttrValue npu_optimizer = AttrValue(); + npu_optimizer.set_s("NpuOptimizer"); + attr_map["_NpuOptimizer"] = npu_optimizer; + attr_map["_graph_compiler_cache_dir"] = graph_compiler_cache_dir; + AttrSlice attrs(&attr_map); + const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); + auto find_ret = all_options.find("graph_compiler_cache_dir"); + ASSERT_TRUE(find_ret != all_options.cend()); + EXPECT_EQ(find_ret->second, "./cache_dir"); +} } } // end tensorflow diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index ad611a61b7faefe52727a2404ae4e105e1cd9863..dbd01e3b29655354de05e311a9258d299228a789 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -400,6 +400,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr std::string external_weight = "0"; std::string graph_parallel_option_path; std::string enable_graph_parallel; + std::string graph_compiler_cache_dir; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { (void) ctx->GetAttr("_variable_format_optimize", &variable_format_optimize); @@ -464,6 +465,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr (void) ctx->GetAttr("_external_weight", &external_weight); (void) ctx->GetAttr("_graph_parallel_option_path", &graph_parallel_option_path); (void) ctx->GetAttr("_enable_graph_parallel", &enable_graph_parallel); + (void) ctx->GetAttr("_graph_compiler_cache_dir", &graph_compiler_cache_dir); } // session options @@ -516,6 +518,9 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr sess_options["external_weight"] = external_weight; sess_options["ge.graphParallelOptionPath"] = graph_parallel_option_path; sess_options["ge.enableGraphParallel"] = enable_graph_parallel; + if (!graph_compiler_cache_dir.empty()) { + sess_options["ge.graph_compiler_cache_dir"] = graph_compiler_cache_dir; + } return sess_options; } @@ -666,7 +671,6 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr init_options_["stream_sync_timeout"] = stream_sync_timeout; init_options_["event_sync_timeout"] = event_sync_timeout; init_options_["ge.esClusterConfig"] = es_cluster_config; - return init_options_; } @@ -1039,6 +1043,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string es_cluster_config; std::string graph_parallel_option_path; std::string enable_graph_parallel; + std::string graph_compiler_cache_dir; auto NpuOptimizer_value = attrs.Find("_NpuOptimizer"); auto enable_data_pre_proc_value = attrs.Find("_enable_data_pre_proc"); @@ -1122,6 +1127,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto external_weight_value = attrs.Find("_external_weight"); auto graph_parallel_option_path_val = attrs.Find("_graph_parallel_option_path"); auto enable_graph_parallel_val = attrs.Find("_enable_graph_parallel"); + auto graph_compiler_cache_dir_val = attrs.Find("_graph_compiler_cache_dir"); if (NpuOptimizer_value != nullptr) { do_npu_optimizer = "1"; @@ -1392,6 +1398,9 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (es_cluster_config_value != nullptr) { es_cluster_config = es_cluster_config_value->s(); } + if (graph_compiler_cache_dir_val != nullptr) { + graph_compiler_cache_dir = graph_compiler_cache_dir_val->s(); + } } all_options["variable_format_optimize"] = variable_format_optimize; @@ -1403,6 +1412,9 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (!variable_memory_max_size.empty()) { all_options["variable_memory_max_size"] = variable_memory_max_size; } + if (!graph_compiler_cache_dir.empty()) { + all_options["graph_compiler_cache_dir"] = graph_compiler_cache_dir; + } all_options["enable_dump"] = enable_dump; all_options["dump_path"] = dump_path; @@ -1588,6 +1600,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options std::string model_deploy_mode; std::string model_deploy_devicelist; std::string aoe_config_file; + std::string graph_compiler_cache_dir; int32_t stream_sync_timeout = -1; int32_t event_sync_timeout = -1; bool external_weight = false; @@ -2013,6 +2026,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options init_options_["es_cluster_config"] = es_cluster_config; init_options_["ge.esClusterConfig"] = es_cluster_config; } + if (params.count("graph_compiler_cache_dir") > 0) { + graph_compiler_cache_dir = params.at("graph_compiler_cache_dir").s(); + } } } @@ -2025,7 +2041,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options if (!variable_memory_max_size.empty()) { sess_options["variable_memory_max_size"] = variable_memory_max_size; } - + if (!graph_compiler_cache_dir.empty()) { + sess_options["graph_compiler_cache_dir"] = graph_compiler_cache_dir; + } sess_options["enable_dump"] = std::to_string(static_cast(enable_dump)); sess_options["dump_path"] = dump_path; sess_options["dump_step"] = dump_step; diff --git a/tf_adapter_2.x/npu_device/core/npu_device.cpp b/tf_adapter_2.x/npu_device/core/npu_device.cpp index f1d6f0f8e7dad5c623bb148175e9078f8eac7540..709fa65be5633d537690ecceab9085a6e0dcb9e8 100644 --- a/tf_adapter_2.x/npu_device/core/npu_device.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_device.cpp @@ -925,18 +925,24 @@ uint64_t NpuDevice::AddGeGraphInner(TFE_Context *context, uint64_t graph_id, con if (def.node_size() == 0) { return kEmptyGeGraphId; } + std::map graph_options(options); + const auto cache_dir_option = device_options.find("ge.graph_compiler_cache_dir"); + if ((cache_dir_option != device_options.cend() && !cache_dir_option->second.empty())) { + LOG(INFO) << "ge.graph_compiler_cache_dir is exist, add option ge.graph_key=" << name; + graph_options["ge.graph_key"] = name; + } ge::Graph ge_graph; NPU_CTX_REQUIRES_OK_RETURN(status, TransTfGraph2GeGraph(context, name, def, ge_graph), graph_id); ge_graph.SetNeedIteration(loop); - if (kDumpExecutionDetail && !options.empty()) { + if (kDumpExecutionDetail && !graph_options.empty()) { LOG(INFO) << "Add ge graph " << graph_id << " with options:"; - for (auto &option : options) { + for (auto &option : graph_options) { LOG(INFO) << " " << option.first << ":" << option.second; } } - NPU_CTX_REQUIRES_GE_OK_RETURN(status, "Graph engine Add graph", GeSession()->AddGraph(graph_id, ge_graph, options), - graph_id); + NPU_CTX_REQUIRES_GE_OK_RETURN(status, "Graph engine Add graph", + GeSession()->AddGraph(graph_id, ge_graph, graph_options), graph_id); return graph_id; } diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index 7ffadcdd7b41d55992a34c5fab50dac8e6caabe0..3e82e22654854cc130ec9ca26a159ace6d939fb1 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -117,7 +117,8 @@ const std::map kConfigurableOptions = { {"graph_parallel_option_path", "ge.graphParallelOptionPath"}, {"enable_graph_parallel", "ge.enableGraphParallel"}, {"atomic_clean_policy", "ge.exec.atomicCleanPolicy"}, - {"static_memory_policy", "ge.exec.staticMemoryPolicy"}}; + {"static_memory_policy", "ge.exec.staticMemoryPolicy"}, + {"graph_compiler_cache_dir", "ge.graph_compiler_cache_dir"}}; } // namespace #undef PYBIND11_CHECK_PYTHON_VERSION diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index da5492e08447515c8cb7da51ddf5105299f6567e..8c0931b881fe287b9f8886c53df8d1551007675f 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -66,6 +66,7 @@ class NpuConfig(NpuBaseConfig): self.event_sync_timeout = OptionValue(-1, None) self.external_weight = OptionValue(False, [True, False]) self.memory_config = MemoryConfig() + self.graph_compiler_cache_dir = OptionValue(None, None) # Configuration for experiment self.experimental = NpuExperimentalConfig() diff --git a/tf_adapter_2.x/tests/st/adapter2_options.py b/tf_adapter_2.x/tests/st/adapter2_options.py index 89c86982d699be685ec34435473adb2f88708ad8..26097469f9fdfcfd6bcca8131f91417810f5e3f2 100644 --- a/tf_adapter_2.x/tests/st/adapter2_options.py +++ b/tf_adapter_2.x/tests/st/adapter2_options.py @@ -122,6 +122,13 @@ class Adapter2Options(unittest.TestCase): options = config.as_dict() self.assertTrue(options['hcom_parallel'], False) + def test_9_set_option_graph_compiler_cache_dir(self): + config = NpuConfig() + options = config.as_dict() + self.assertTrue('graph_compiler_cache_dir' not in options, True) + config.graph_compiler_cache_dir = "./st_graph_cache_dir" + options = config.as_dict() + self.assertEqual(options['graph_compiler_cache_dir'], "./st_graph_cache_dir") if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/tf_adapter_2.x/tests/st/adapter2_st.py b/tf_adapter_2.x/tests/st/adapter2_st.py index dda926e5b5871f6c9ae75e05958b632fca1389a6..50401668de18ff194dda0efd2c1bf85941b9de60 100644 --- a/tf_adapter_2.x/tests/st/adapter2_st.py +++ b/tf_adapter_2.x/tests/st/adapter2_st.py @@ -35,6 +35,7 @@ npu_device.global_options().experimental.multi_branches_config.dynamic_node_type npu_device.global_options().experimental.multi_branches_config.dynamic_dims = "1;2" npu_device.global_options().aoe_config.work_path = "./" npu_device.global_options().graph_run_mode = 0 +npu_device.global_options().graph_compiler_cache_dir = "./st_graph_cache_dir"; os.environ['RANK_TABLE_FILE'] = "rankTable" os.environ['RANK_SIZE'] = "2" os.environ['RANK_ID'] = "1" diff --git a/tf_adapter_2.x/tests/ut/testcases/test_npu_device.cpp b/tf_adapter_2.x/tests/ut/testcases/test_npu_device.cpp index d3ce9f4c387f3e2a5574f61b96de8bb09e9e04d2..7fb87a85e4d25e61d218f9b7b684d01e8161b926 100644 --- a/tf_adapter_2.x/tests/ut/testcases/test_npu_device.cpp +++ b/tf_adapter_2.x/tests/ut/testcases/test_npu_device.cpp @@ -139,6 +139,7 @@ class ST_NpuDevice : public ::testing::Test { std::map device_options; device_options["ge.jobType"] = "1"; device_options["ge.tuningPath"] = "./"; + device_options["ge.graph_compiler_cache_dir"] = "./"; npu::CreateDevice(context, kNpuDeviceName, kNpuDeviceIndex, device_options); for (const auto &function_def : FunctionStrLibrary::Instance().Get()) {