diff --git a/tf_adapter/common/adp_logger.h b/tf_adapter/common/adp_logger.h
index 508f919caba71a69f535a17655659e2f6407d136..d351f4d9e4159fc8726dbaa6393fcd876c424a00 100644
--- a/tf_adapter/common/adp_logger.h
+++ b/tf_adapter/common/adp_logger.h
@@ -21,6 +21,17 @@
 
 #define FMK_MODULE_NAME static_cast<int>(FMK)
 
+#define LOG_DEPRECATED_WITH_REPLACEMENT(old, replacement)                                                              \
+  do {                                                                                                                 \
+    ADP_LOG(WARNING) << "The " #old " option IS DEPRECATED. It will be removed in a future version. Please "           \
+                        "use " #replacement " instead";                                                                \
+  } while (false)
+
+#define LOG_DEPRECATED(old)                                                                                            \
+  do {                                                                                                                 \
+    ADP_LOG(WARNING) << "The " #old " option IS DEPRECATED. It will be removed in a future version.";                  \
+  } while (false)
+
 namespace npu {
 constexpr const char *ADP_MODULE_NAME = "TF_ADAPTER";
 const int ADP_DEBUG = 0;
diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py
index 8001f13eea2f5e0337e59d163b89e1e4543358e5..3548d2774376b3a022ef899626feb41590678dfd 100644
--- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py
+++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py
@@ -53,7 +53,7 @@ class NPURunConfig(run_config_lib.RunConfig):
                  enable_reduce_precision=False,
                  variable_format_optimize=True,
                  mix_compile_mode=False,
-                 hcom_parallel=False,
+                 hcom_parallel=True,
                  graph_memory_max_size=None,
                  variable_memory_max_size=None,
                  auto_tune_mode=None,
diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc
index ed327226aeea4ca16db829a3c92b49e6609f76e5..17d639d3429fd4051648af75820bfe880c4cacd6 100644
--- a/tf_adapter/util/npu_attrs.cc
+++ b/tf_adapter/util/npu_attrs.cc
@@ -850,6 +850,7 @@ std::map<std::string, std::string> NpuAttrs::GetPassOptions(const AttrSlice &att
   if (NpuOptimizer_value != nullptr) {
     do_npu_optimizer = "1";
     if (enable_data_pre_proc_value != nullptr) {
+      LOG_DEPRECATED(enable_data_pre_proc);
       enable_dp = enable_data_pre_proc_value->s();
     }
     if (use_off_line_value != nullptr) {
@@ -1060,6 +1061,7 @@ std::map<std::string, std::string> NpuAttrs::GetAllAttrOptions(const AttrSlice &
   if (NpuOptimizer_value != nullptr) {
     do_npu_optimizer = "1";
     if (enable_data_pre_proc_value != nullptr) {
+      LOG_DEPRECATED(enable_data_pre_proc);
       enable_dp = enable_data_pre_proc_value->s();
     }
     if (use_off_line_value != nullptr) {
@@ -1189,9 +1191,11 @@ std::map<std::string, std::string> NpuAttrs::GetAllAttrOptions(const AttrSlice &
       enable_exception_dump = enable_exception_dump_value->s();
     }
     if (op_select_implmode_value != nullptr) {
+      LOG_DEPRECATED_WITH_REPLACEMENT(op_select_implmode, op_precision_mode);
       op_select_implmode = op_select_implmode_value->s();
     }
     if (optypelist_for_implmode_value != nullptr) {
+      LOG_DEPRECATED_WITH_REPLACEMENT(optypelist_for_implmode, op_precision_mode);
       optypelist_for_implmode = optypelist_for_implmode_value->s();
     }
     if (input_shape_value != nullptr) {
@@ -1385,7 +1389,7 @@ std::map<std::string, std::string> NpuAttrs::GetDefaultPassOptions() {
 Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options, Node *node) {
   std::map<std::string, std::string> sess_options;
   bool variable_format_optimize = true;
-  bool hcom_parallel = false;
+  bool hcom_parallel = true;
   std::string graph_memory_max_size;
   std::string variable_memory_max_size;
   bool enable_dump = false;
diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py
index 09d1572272d93cb93233f43b3b674c3ede27dfd6..a7a6c67b85f43a18ecb125d4974ee48ab98879aa 100644
--- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py
+++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py
@@ -42,7 +42,7 @@ class NpuConfig(NpuBaseConfig):
         self.op_compiler_cache_mode = OptionValue('disable', ['enable', 'disable', 'force'])
         self.op_compiler_cache_dir = OptionValue(None, None)
         self.stream_max_parallel_num = OptionValue(None, None)
-        self.hcom_parallel = OptionValue(False, [True, False])
+        self.hcom_parallel = OptionValue(True, [True, False])
         self.hcom_multi_mode = OptionValue(None, None)
         self.is_tailing_optimization = OptionValue(False, [True, False])
         self.op_debug_level = OptionValue(0, [0, 1, 2, 3, 4])