diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc
index a53d478eb431d786a45ae1f4f0546f791790a97e..6d3698aca3478eeae8d6ffcc7eaa93f59a9f0715 100644
--- a/tf_adapter/kernels/geop_npu.cc
+++ b/tf_adapter/kernels/geop_npu.cc
@@ -853,6 +853,11 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) {
       ge_graph.SetNeedIteration(this->need_iteration_);
     }
 
+    const auto cahce_option_iter = sess_options_.find("ge.graph_compiler_cache_dir");
+    if (cahce_option_iter != sess_options_.cend() && !cahce_option_iter->second.empty()) {
+      graph_options_["ge.graph_key"] = geop_name;
+    }
+
     if (is_host_graph_) {
       ADP_LOG(INFO) << "[GEOP] set graph option.";
       graph_options_["ge.exec.placement"] = "HOST";
diff --git a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc
index aff1db6be2906e962335ca2b6db7f871898b51d2..c120ffe3680c7c07fefe94510d08f2bb13e1204d 100644
--- a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc
+++ b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc
@@ -130,6 +130,9 @@ TEST_F(NpuAttrTest, SetNpuOptimizerAttrInvalidEnableOnlineInference) {
   AttrValue optypelist_for_implmode = AttrValue();
   optypelist_for_implmode.set_s("Pooling,SoftmaxV2");
   (*custom_config->mutable_parameter_map())["optypelist_for_implmode"] = optypelist_for_implmode;
+  AttrValue graph_compiler_cache_dir = AttrValue();
+  graph_compiler_cache_dir.set_s("./cache_dir");
+  (*custom_config->mutable_parameter_map())["graph_compiler_cache_dir"] = graph_compiler_cache_dir;
   s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr);
   EXPECT_EQ(s.ok(), false);
 }
diff --git a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc
index cf6b30c7005601f0c9fbaad7b0aa12863223ab14..f6b1de891fc2933c2a648f3be705728a5921804b 100644
--- a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc
+++ b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc
@@ -207,5 +207,32 @@ TEST_F(NpuAttrTest, SetNpuOptimizerAttrInvalidEnableOnlineInference) {
   s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr);
   EXPECT_EQ(s.ok(), false);
 }
+
+TEST_F(NpuAttrTest, CheckGraphCompilerCacheDir) {
+  GraphOptimizationPassOptions options;
+  SessionOptions session_options;
+  session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true);
+  auto *custom_config =
+      session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers();
+  custom_config->set_name("NpuOptimizer");
+  options.session_options = &session_options;
+
+  AttrValue graph_compiler_cache_dir = AttrValue();
+  graph_compiler_cache_dir.set_s("./cache_dir");
+  (*custom_config->mutable_parameter_map())["graph_compiler_cache_dir"] = graph_compiler_cache_dir;
+  Status s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr);
+  EXPECT_FALSE(s.ok());
+
+  AttrValueMap attr_map;
+  AttrValue npu_optimizer = AttrValue();
+  npu_optimizer.set_s("NpuOptimizer");
+  attr_map["_NpuOptimizer"] = npu_optimizer;
+  attr_map["_graph_compiler_cache_dir"] = graph_compiler_cache_dir;
+  AttrSlice attrs(&attr_map);
+  const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs);
+  auto find_ret = all_options.find("graph_compiler_cache_dir");
+  ASSERT_TRUE(find_ret != all_options.cend());
+  EXPECT_EQ(find_ret->second, "./cache_dir");
+}
 }
 } // end tensorflow
diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc
index ad611a61b7faefe52727a2404ae4e105e1cd9863..dbd01e3b29655354de05e311a9258d299228a789 100644
--- a/tf_adapter/util/npu_attrs.cc
+++ b/tf_adapter/util/npu_attrs.cc
@@ -400,6 +400,7 @@ std::map<std::string, std::string> NpuAttrs::GetSessOptions(const OpKernelConstr
   std::string external_weight = "0";
   std::string graph_parallel_option_path;
   std::string enable_graph_parallel;
+  std::string graph_compiler_cache_dir;
 
   if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) {
     (void) ctx->GetAttr("_variable_format_optimize", &variable_format_optimize);
@@ -464,6 +465,7 @@ std::map<std::string, std::string> NpuAttrs::GetSessOptions(const OpKernelConstr
     (void) ctx->GetAttr("_external_weight", &external_weight);
     (void) ctx->GetAttr("_graph_parallel_option_path", &graph_parallel_option_path);
     (void) ctx->GetAttr("_enable_graph_parallel", &enable_graph_parallel);
+    (void) ctx->GetAttr("_graph_compiler_cache_dir", &graph_compiler_cache_dir);
   }
 
   // session options
@@ -516,6 +518,9 @@ std::map<std::string, std::string> NpuAttrs::GetSessOptions(const OpKernelConstr
   sess_options["external_weight"] = external_weight;
   sess_options["ge.graphParallelOptionPath"] = graph_parallel_option_path;
   sess_options["ge.enableGraphParallel"] = enable_graph_parallel;
+  if (!graph_compiler_cache_dir.empty()) {
+    sess_options["ge.graph_compiler_cache_dir"] = graph_compiler_cache_dir;
+  }
 
   return sess_options;
 }
@@ -666,7 +671,6 @@ std::map<std::string, std::string> NpuAttrs::GetInitOptions(const OpKernelConstr
   init_options_["stream_sync_timeout"] = stream_sync_timeout;
   init_options_["event_sync_timeout"] = event_sync_timeout;
   init_options_["ge.esClusterConfig"] = es_cluster_config;
-
   return init_options_;
 }
 
@@ -1039,6 +1043,7 @@ std::map<std::string, std::string> NpuAttrs::GetAllAttrOptions(const AttrSlice &
   std::string es_cluster_config;
   std::string graph_parallel_option_path;
   std::string enable_graph_parallel;
+  std::string graph_compiler_cache_dir;
 
   auto NpuOptimizer_value = attrs.Find("_NpuOptimizer");
   auto enable_data_pre_proc_value = attrs.Find("_enable_data_pre_proc");
@@ -1122,6 +1127,7 @@ std::map<std::string, std::string> NpuAttrs::GetAllAttrOptions(const AttrSlice &
   auto external_weight_value = attrs.Find("_external_weight");
   auto graph_parallel_option_path_val = attrs.Find("_graph_parallel_option_path");
   auto enable_graph_parallel_val = attrs.Find("_enable_graph_parallel");
+  auto graph_compiler_cache_dir_val = attrs.Find("_graph_compiler_cache_dir");
 
   if (NpuOptimizer_value != nullptr) {
     do_npu_optimizer = "1";
@@ -1392,6 +1398,9 @@ std::map<std::string, std::string> NpuAttrs::GetAllAttrOptions(const AttrSlice &
     if (es_cluster_config_value != nullptr) {
       es_cluster_config = es_cluster_config_value->s();
     }
+    if (graph_compiler_cache_dir_val != nullptr) {
+      graph_compiler_cache_dir = graph_compiler_cache_dir_val->s();
+    }
   }
 
   all_options["variable_format_optimize"] = variable_format_optimize;
@@ -1403,6 +1412,9 @@ std::map<std::string, std::string> NpuAttrs::GetAllAttrOptions(const AttrSlice &
   if (!variable_memory_max_size.empty()) {
     all_options["variable_memory_max_size"] = variable_memory_max_size;
   }
+  if (!graph_compiler_cache_dir.empty()) {
+    all_options["graph_compiler_cache_dir"] = graph_compiler_cache_dir;
+  }
 
   all_options["enable_dump"] = enable_dump;
   all_options["dump_path"] = dump_path;
@@ -1588,6 +1600,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options
   std::string model_deploy_mode;
   std::string model_deploy_devicelist;
   std::string aoe_config_file;
+  std::string graph_compiler_cache_dir;
   int32_t stream_sync_timeout = -1;
   int32_t event_sync_timeout = -1;
   bool external_weight = false;
@@ -2013,6 +2026,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options
         init_options_["es_cluster_config"] = es_cluster_config;
         init_options_["ge.esClusterConfig"] = es_cluster_config;
       }
+      if (params.count("graph_compiler_cache_dir") > 0) {
+        graph_compiler_cache_dir = params.at("graph_compiler_cache_dir").s();
+      }
     }
   }
 
@@ -2025,7 +2041,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options
   if (!variable_memory_max_size.empty()) {
     sess_options["variable_memory_max_size"] = variable_memory_max_size;
   }
-
+  if (!graph_compiler_cache_dir.empty()) {
+    sess_options["graph_compiler_cache_dir"] = graph_compiler_cache_dir;
+  }
   sess_options["enable_dump"] = std::to_string(static_cast<int32_t>(enable_dump));
   sess_options["dump_path"] = dump_path;
   sess_options["dump_step"] = dump_step;
diff --git a/tf_adapter_2.x/npu_device/core/npu_device.cpp b/tf_adapter_2.x/npu_device/core/npu_device.cpp
index f1d6f0f8e7dad5c623bb148175e9078f8eac7540..709fa65be5633d537690ecceab9085a6e0dcb9e8 100644
--- a/tf_adapter_2.x/npu_device/core/npu_device.cpp
+++ b/tf_adapter_2.x/npu_device/core/npu_device.cpp
@@ -925,18 +925,24 @@ uint64_t NpuDevice::AddGeGraphInner(TFE_Context *context, uint64_t graph_id, con
   if (def.node_size() == 0) {
     return kEmptyGeGraphId;
   }
+  std::map<std::string, std::string> graph_options(options);
+  const auto cache_dir_option = device_options.find("ge.graph_compiler_cache_dir");
+  if ((cache_dir_option != device_options.cend() && !cache_dir_option->second.empty())) {
+    LOG(INFO) << "ge.graph_compiler_cache_dir is exist, add option ge.graph_key=" << name;
+    graph_options["ge.graph_key"] = name;
+  }
   ge::Graph ge_graph;
   NPU_CTX_REQUIRES_OK_RETURN(status, TransTfGraph2GeGraph(context, name, def, ge_graph), graph_id);
   ge_graph.SetNeedIteration(loop);
 
-  if (kDumpExecutionDetail && !options.empty()) {
+  if (kDumpExecutionDetail && !graph_options.empty()) {
     LOG(INFO) << "Add ge graph " << graph_id << " with options:";
-    for (auto &option : options) {
+    for (auto &option : graph_options) {
       LOG(INFO) << "  " << option.first << ":" << option.second;
     }
   }
-  NPU_CTX_REQUIRES_GE_OK_RETURN(status, "Graph engine Add graph", GeSession()->AddGraph(graph_id, ge_graph, options),
-                                graph_id);
+  NPU_CTX_REQUIRES_GE_OK_RETURN(status, "Graph engine Add graph",
+                                GeSession()->AddGraph(graph_id, ge_graph, graph_options), graph_id);
   return graph_id;
 }
 
diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp
index 7ffadcdd7b41d55992a34c5fab50dac8e6caabe0..3e82e22654854cc130ec9ca26a159ace6d939fb1 100644
--- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp
+++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp
@@ -117,7 +117,8 @@ const std::map<std::string, std::string> kConfigurableOptions = {
   {"graph_parallel_option_path", "ge.graphParallelOptionPath"},
   {"enable_graph_parallel", "ge.enableGraphParallel"},
   {"atomic_clean_policy", "ge.exec.atomicCleanPolicy"},
-  {"static_memory_policy", "ge.exec.staticMemoryPolicy"}};
+  {"static_memory_policy", "ge.exec.staticMemoryPolicy"},
+  {"graph_compiler_cache_dir", "ge.graph_compiler_cache_dir"}};
 }  // namespace
 
 #undef PYBIND11_CHECK_PYTHON_VERSION
diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py
index da5492e08447515c8cb7da51ddf5105299f6567e..8c0931b881fe287b9f8886c53df8d1551007675f 100644
--- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py
+++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py
@@ -66,6 +66,7 @@ class NpuConfig(NpuBaseConfig):
         self.event_sync_timeout = OptionValue(-1, None)
         self.external_weight = OptionValue(False, [True, False])
         self.memory_config = MemoryConfig()
+        self.graph_compiler_cache_dir = OptionValue(None, None)
 
         # Configuration for experiment
         self.experimental = NpuExperimentalConfig()
diff --git a/tf_adapter_2.x/tests/st/adapter2_options.py b/tf_adapter_2.x/tests/st/adapter2_options.py
index 89c86982d699be685ec34435473adb2f88708ad8..26097469f9fdfcfd6bcca8131f91417810f5e3f2 100644
--- a/tf_adapter_2.x/tests/st/adapter2_options.py
+++ b/tf_adapter_2.x/tests/st/adapter2_options.py
@@ -122,6 +122,13 @@ class Adapter2Options(unittest.TestCase):
         options = config.as_dict()
         self.assertTrue(options['hcom_parallel'], False)
 
+    def test_9_set_option_graph_compiler_cache_dir(self):
+        config = NpuConfig()
+        options = config.as_dict()
+        self.assertTrue('graph_compiler_cache_dir' not in options, True)
+        config.graph_compiler_cache_dir = "./st_graph_cache_dir"
+        options = config.as_dict()
+        self.assertEqual(options['graph_compiler_cache_dir'], "./st_graph_cache_dir")
 
 if __name__ == '__main__':
     unittest.main()
\ No newline at end of file
diff --git a/tf_adapter_2.x/tests/st/adapter2_st.py b/tf_adapter_2.x/tests/st/adapter2_st.py
index dda926e5b5871f6c9ae75e05958b632fca1389a6..50401668de18ff194dda0efd2c1bf85941b9de60 100644
--- a/tf_adapter_2.x/tests/st/adapter2_st.py
+++ b/tf_adapter_2.x/tests/st/adapter2_st.py
@@ -35,6 +35,7 @@ npu_device.global_options().experimental.multi_branches_config.dynamic_node_type
 npu_device.global_options().experimental.multi_branches_config.dynamic_dims = "1;2"
 npu_device.global_options().aoe_config.work_path = "./"
 npu_device.global_options().graph_run_mode = 0
+npu_device.global_options().graph_compiler_cache_dir = "./st_graph_cache_dir";
 os.environ['RANK_TABLE_FILE'] = "rankTable"
 os.environ['RANK_SIZE'] = "2"
 os.environ['RANK_ID'] = "1"
diff --git a/tf_adapter_2.x/tests/ut/testcases/test_npu_device.cpp b/tf_adapter_2.x/tests/ut/testcases/test_npu_device.cpp
index d3ce9f4c387f3e2a5574f61b96de8bb09e9e04d2..7fb87a85e4d25e61d218f9b7b684d01e8161b926 100644
--- a/tf_adapter_2.x/tests/ut/testcases/test_npu_device.cpp
+++ b/tf_adapter_2.x/tests/ut/testcases/test_npu_device.cpp
@@ -139,6 +139,7 @@ class ST_NpuDevice : public ::testing::Test {
     std::map<std::string, std::string> device_options;
     device_options["ge.jobType"] = "1";
     device_options["ge.tuningPath"] = "./";
+    device_options["ge.graph_compiler_cache_dir"] = "./";
     npu::CreateDevice(context, kNpuDeviceName, kNpuDeviceIndex, device_options);
 
     for (const auto &function_def : FunctionStrLibrary::Instance().Get()) {