diff --git a/CMakeLists.txt b/CMakeLists.txt
index c31967f320e9b3e25bf9373d6943910f624b9bff..6517e92513aa370dd4546db749c7ff768878c008 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,6 +23,7 @@ if (ENABLE_OPEN_SRC)
     include(${CMAKE_CURRENT_LIST_DIR}/cmake/tensorflow.cmake)
     include_directories(${CMAKE_CURRENT_LIST_DIR})
     include_directories(${CMAKE_CURRENT_LIST_DIR}/inc)
+    include_directories(${CMAKE_CURRENT_LIST_DIR}/inc/toolchain)
     include_directories(${CMAKE_CURRENT_LIST_DIR}/inc/external)
     include_directories(${CMAKE_CURRENT_LIST_DIR}/inc/soft_dp)
     include_directories(${CMAKE_CURRENT_LIST_DIR}/inc/graphengine/inc)
@@ -129,17 +130,29 @@ else()
         ${CMAKE_CURRENT_LIST_DIR}/tf_adapter/optimizers/*.cc
         ${CMAKE_CURRENT_LIST_DIR}/tf_adapter/util/*.cc
     )
+
+    add_custom_target(tensorflow_source ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/tensorflow_source.timestamp)
+
+    add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/tensorflow_source.timestamp
+                       COMMAND echo "cp tensorflow1.15 source begin:"
+                       COMMAND rm -rf ${BASE_DIR}/tensorflow_15 && mkdir -p ${BASE_DIR}/tensorflow_15
+                       COMMAND cp -rfL ${TOP_DIR}/open_source/tensorflow ${BASE_DIR}/tensorflow_15 || echo skip
+                       COMMAND cd ${BASE_DIR}/tensorflow_15/tensorflow && git checkout . && git fetch --all --tags && git checkout tags/v1.15.5
+                       COMMAND echo "end cp tensorflow1.15 source"
+                       DEPENDS ${TOP_DIR}/open_source/tensorflow)
+
     add_library(tf_adapter SHARED
         ${SOURCES}
         ${BASE_DIR}/tf_adapter/util/ge_plugin_wrap.cxx
     )
 
+    add_dependencies(tf_adapter tensorflow_source)
+
     target_include_directories(tf_adapter PRIVATE
         ${BASE_DIR}/
         ${TOP_DIR}/inc/
         ${TOP_DIR}/inc/external/
         ${TOP_DIR}/inc/common/
-        ${TOP_DIR}/inc/soft_dp/
         ${TOP_DIR}/soft_dp/
         ${TOP_DIR}/ace/execfwk/soft_dp/
         ${TOP_DIR}/graphengine/inc/
@@ -150,8 +163,9 @@ else()
         ${TOP_DIR}/abl/libc_sec/include/
         ${TOP_DIR}/third_party/json/include/
         ${TOP_DIR}/open_source/json/include/
-        ${TOP_DIR}/third_party/tensorflow/tensorflow-1.15.0/
-        ${TOP_DIR}/third_party/tensorflow/compile_deps/tf-1.15.0/include/
+        ${BASE_DIR}/tensorflow_15/tensorflow
+        /opt/buildtools/tensorflow-1.15.5/tensorflow_core/include/
+        /opt/buildtools/tensorflow-1.15.5/tensorflow-1.15.5.data/purelib/tensorflow_core/include/
         ${HI_PYTHON_INC}/
     )
 
@@ -169,6 +183,11 @@ else()
 
     target_link_libraries(tf_adapter PUBLIC
         $<BUILD_INTERFACE:intf_pub>
+        $<BUILD_INTERFACE:mmpa_headers>
+        $<BUILD_INTERFACE:slog_headers>
+        $<BUILD_INTERFACE:msprof_headers>
+        $<BUILD_INTERFACE:aoe_headers>
+        $<BUILD_INTERFACE:soft_dp_headers>
         -Wl,--no-as-needed
         c_sec
         ge_runner
@@ -221,6 +240,7 @@ else()
         && rm -rf ${BASE_DIR}/libpywrap_tensorflow_internal.so
         && rm -rf ${BASE_DIR}/libtensorflow_framework.so.1
         && rm -rf ${BASE_DIR}/libtensorflow_framework.so
+        && rm -rf ${BASE_DIR}/tensorflow_15/
         && echo "package whl end"
     )
     ###################################### Tensorflow 2.x ###########################################
diff --git a/convert_tf2npu/ast_impl.py b/convert_tf2npu/ast_impl.py
index ee5d2d191933f4a7090016ba433eb1fd126b8c2a..de87b24459320792cee4df1fd8a1af328ecd0465 100644
--- a/convert_tf2npu/ast_impl.py
+++ b/convert_tf2npu/ast_impl.py
@@ -32,16 +32,26 @@ def import_from(node):
         if "keras" in values:
             util_global.set_value('is_keras_net', True)
         if "horovod" in values:
+            log_msg(getattr(node, "lineno", "None"), "remove horovod import line to None")
             util_global.set_value('has_hccl_api', True)
+            new_node = ast.Expr(value=ast.NameConstant(value=None))
+            ast.copy_location(new_node, node)
+            util_global.set_value('need_conver', True)
+            return new_node
     for value in node.names:
         if isinstance(value, ast.alias):
             values = value.name.split(".")
             if "keras" in values:
                 util_global.set_value('is_keras_net', True)
             if "horovod" in values:
+                log_msg(getattr(node, "lineno", "None"), "remove horovod import line to None")
                 util_global.set_value('has_hccl_api', True)
+                new_node = ast.Expr(value=ast.NameConstant(value=None))
+                ast.copy_location(new_node, node)
+                util_global.set_value('need_conver', True)
+                return new_node
     util_global.set_value('need_conver', True)
-
+    return node
 
 def ast_import(node):
     for value in node.names:
@@ -50,8 +60,14 @@ def ast_import(node):
             if "keras" in values:
                 util_global.set_value('is_keras_net', True)
             if "horovod" in values:
+                log_msg(getattr(node, "lineno", "None"), "remove horovod import line to None")
                 util_global.set_value('has_hccl_api', True)
-            util_global.set_value('need_conver', True)
+                new_node = ast.Expr(value=ast.NameConstant(value=None))
+                ast.copy_location(new_node, node)
+                util_global.set_value('need_conver', True)
+                return new_node
+    util_global.set_value('need_conver', True)
+    return node
 
 def ast_function_def(node):
     log_success_report(getattr(node, "lineno", "None"), node.name)
@@ -81,19 +97,18 @@ def ast_if(node):
                                                                   args=[], keywords=[]))
                     node.body = [keras_sess_assign] + node.body + [ast.Expr(value=close_sess_call)]
                     util_global.set_value('need_conver', True)
-                if util_global.get_value("has_hccl_api", False):
-                    log_msg(getattr(node, "lineno", "None"), " add npu resource init api")
-                    close_sess_call = ast.Call(func=ast.Name(id="close_session", ctx=ast.Load()),
-                                               args=[ast.Name(id="npu_sess", ctx=ast.Load())], keywords=[])
-                    init_assign = ast.Assign(targets=[ast.Tuple(elts=[ast.Name(id="npu_sess", ctx=ast.Store()),
-                                                                      ast.Name(id="npu_shutdown", ctx=ast.Store())],
-                                                                ctx=ast.Store())],
-                                             value=ast.Call(func=ast.Name(id="init_resource", ctx=ast.Load()), args=[], keywords=[]))
-                    shutdown_call = ast.Call(func=ast.Name(id="shutdown_resource", ctx=ast.Load()),
-                                             args=[ast.Name(id="npu_sess", ctx=ast.Load()), ast.Name(id="npu_shutdown", ctx=ast.Load())],
-                                             keywords=[])
-                    node.body = [init_assign] + node.body + [ast.Expr(value=shutdown_call), ast.Expr(value=close_sess_call)]
-                    util_global.set_value('need_conver', True)
+                log_msg(getattr(node, "lineno", "None"), " add npu resource init api")
+                close_sess_call = ast.Call(func=ast.Name(id="close_session", ctx=ast.Load()),
+                                           args=[ast.Name(id="npu_sess", ctx=ast.Load())], keywords=[])
+                init_assign = ast.Assign(targets=[ast.Tuple(elts=[ast.Name(id="npu_sess", ctx=ast.Store()),
+                                                                  ast.Name(id="npu_shutdown", ctx=ast.Store())],
+                                                            ctx=ast.Store())],
+                                         value=ast.Call(func=ast.Name(id="init_resource", ctx=ast.Load()), args=[], keywords=[]))
+                shutdown_call = ast.Call(func=ast.Name(id="shutdown_resource", ctx=ast.Load()),
+                                         args=[ast.Name(id="npu_sess", ctx=ast.Load()), ast.Name(id="npu_shutdown", ctx=ast.Load())],
+                                         keywords=[])
+                node.body = [init_assign] + node.body + [ast.Expr(value=shutdown_call), ast.Expr(value=close_sess_call)]
+                util_global.set_value('need_conver', True)
                 return node
 
 def convert_loss_scale_api(node):
@@ -293,13 +308,20 @@ def ast_call(node):
         util_global.set_value('need_conver', True)
         return node
     if isinstance(node.func, ast.Attribute) and node.func.attr == "DistributedOptimizer":
-        log_success_report(getattr(node, "lineno", "None"), 'DistributedOptimizer')
-        return node.args[0]
+        log_msg(getattr(node, "lineno", "None"), 'change hvd.DistributedOptimizer to the input key optimzier')
+        opt_keyword = None
+        for keyword in node.keywords:
+            if keyword.arg == "optimizer":
+                opt_keyword = keyword
+        if opt_keyword is None:
+            return node.args[0]
+        else:
+            return opt_keyword.value
     if isinstance(node.func, ast.Attribute) and node.func.attr == 'shard':
         log_success_report(getattr(node, "lineno", "None"), 'shard')
-        node.args = [ast.Call(func=ast.Name(id='get_rank_size', ctx=ast.Load()), args=[], keywords=[]),
-                     ast.Call(func=ast.Name(id='get_rank_id', ctx=ast.Load()), args=[], keywords=[])]
-        util_global.set_value("has_hccl_api", True)
+        node.args = [pasta.parse("int(os.getenv('RANK_SIZE', '1'))"),
+                     pasta.parse("int(os.getenv('RANK_ID', '0'))")]
+        node.keywords.clear()
         util_global.set_value('need_conver', True)
     if isinstance(node.func, ast.Attribute) and node.func.attr == 'dropout':
         if isinstance(node.func.value, ast.Attribute) and node.func.value.attr == 'nn':
@@ -315,6 +337,9 @@ def ast_call(node):
             for keyword in node.keywords:
                 if keyword.arg != 'rate':
                     keywords_new.append(keyword)
+                else:
+                    keywords_new.append(ast.keyword(arg='keep_prob', value=ast.BinOp(left=ast.Num(n=1), op=ast.Sub(),
+                                                                                     right=keyword.value)))
             node.keywords = keywords_new
             util_global.set_value('need_conver', True)
     if isinstance(node.func, ast.Attribute) and ((node.func.attr == 'map_and_batch') or (node.func.attr == 'batch' \
@@ -348,6 +373,17 @@ def ast_call(node):
         node.keywords = []
         node.args = []
         util_global.set_value('need_conver', True)
+    if (isinstance(node.func, ast.Attribute) and (node.func.attr == 'RunConfig')) and \
+        (_call_name_match(node.func.value, 'estimator') or _call_name_match(node.func.value, 'tpu')):
+        save_summary_steps = None
+        for keyword in node.keywords:
+            if (keyword.arg == 'save_summary_steps'):
+                save_summary_steps = keyword
+                break
+        if len(node.args) < 3 and not save_summary_steps:
+            log_msg(getattr(node, 'lineno'), 'RunConfig() add save_summary_steps=0')
+            util_global.set_value('need_conver', True)
+            node.keywords.append(ast.keyword(arg='save_summary_steps', value=pasta.parse('0')))
     if isinstance(node.func, ast.Attribute) and (node.func.attr == 'TPUEstimator') and \
         ((isinstance(node.func.value, ast.Attribute) and (node.func.value.attr == 'tpu')) or \
         (isinstance(node.func.value, ast.Name) and (node.func.value.id == 'tpu'))):
@@ -543,38 +579,6 @@ def _call_name_match(call_func, call_name):
     return (isinstance(call_func, ast.Attribute) and (call_func.attr == call_name)) or \
            (isinstance(call_func, ast.Name) and (call_func.id) == call_name)
 
-def remove_hvd_import(r_node):
-    n = 0
-    lenline = len(r_node.body)
-
-    while n < lenline and not isinstance(r_node.body[n], ast.ImportFrom) and not isinstance(r_node.body[n], ast.Import):
-        n += 1
-
-    while n < lenline and (isinstance(r_node.body[n], ast.ImportFrom) or isinstance(r_node.body[n], ast.Import)):
-        if isinstance(r_node.body[n], ast.ImportFrom):
-            if r_node.body[n].module != None:
-                values = r_node.body[n].module.split(".")
-                if "horovod" in values:
-                    log_msg(getattr(r_node.body[n], "lineno", "None"), " remove hvd import.")
-                    r_node.body.pop(n)
-                    lenline -= 1
-            for value in r_node.body[n].names:
-                if isinstance(value, ast.alias):
-                    values = value.name.split(".")
-                    if "horovod" in values:
-                        log_msg(getattr(r_node.body[n], "lineno", "None"), " remove hvd import.")
-                        r_node.body.pop(n)
-                        lenline -= 1
-        elif isinstance(r_node.body[n], ast.Import):
-            for value in r_node.body[n].names:
-                if isinstance(value, ast.alias):
-                    values = value.name.split(".")
-                    if "horovod" in values:
-                        log_msg(getattr(r_node.body[n], "lineno", "None"), " remove hvd import.")
-                        r_node.body.pop(n)
-                        lenline -= 1
-        n += 1
-
 def insert_npu_import(r_node):
     npu_alias = ast.alias(name='*', asname=None)
     npu_import = ast.ImportFrom(module='npu_bridge.npu_init', names=[npu_alias], level=0)
diff --git a/convert_tf2npu/conver_by_ast.py b/convert_tf2npu/conver_by_ast.py
index b8c2a1ddedad17b4a45896ae188b669b16a4a6df..64f93b95c623db8179afce230dab01459f85f27e 100644
--- a/convert_tf2npu/conver_by_ast.py
+++ b/convert_tf2npu/conver_by_ast.py
@@ -53,13 +53,13 @@ class ConverByAst(ast.NodeTransformer):
         return node
 
     def visit_ImportFrom(self, node):
-        import_from(node)
         self.generic_visit(node)
+        node = import_from(node)
         return node
 
     def visit_Import(self, node):
-        ast_import(node)
         self.generic_visit(node)
+        node = ast_import(node)
         return node
 
     def visit_Assign(self, node):
@@ -101,12 +101,9 @@ def conver_ast(path, out_path_dst, file_name):
         insert_npu_import(r_node)
         if not util_global.get_value('has_main_func', False) and (util_global.get_value('has_hccl_api', False)
             or util_global.get_value('is_keras_net', False)):
-            log_warning('the network of keras and horovod, or using dataset.shard script do not have main func, '
+            log_warning('the network of keras and horovod script do not have main func, '
                         'should set -m or --main parameter')
-        if util_global.get_value('has_main_func', False) and util_global.get_value('has_hccl_api', False):
-            remove_hvd_import(r_node)
-        if util_global.get_value('is_main_file', False) and util_global.get_value('has_hccl_api', False):
-            remove_hvd_import(r_node)
+        if util_global.get_value('is_main_file', False):
             insert_npu_resource_init(r_node)
             insert_npu_resource_shutdown(r_node)
         if util_global.get_value('is_main_file', False) and util_global.get_value('is_keras_net', False):
diff --git a/convert_tf2npu/mappings/ast.py b/convert_tf2npu/mappings/ast.py
index f2864da5b7594568b33af0efe278d33f49deff97..1d316029126ae8970811037d49111f84964e878f 100644
--- a/convert_tf2npu/mappings/ast.py
+++ b/convert_tf2npu/mappings/ast.py
@@ -4,9 +4,9 @@
 "dropout":                      ["npu_ops",                 "tf.nn.dropout",                "npu_ops.dropout"],
 "init":                         ["print",                   "hvd.init",                     "None"],
 "DistributedOptimizer":         ["NPUDistributedOptimizer", "hvd.DistributedOptimizer",     "NPUDistributedOptimizer"],
-"rank":                         ["get_npu_rank_id",             "hvd.rank",                     "get_npu_rank_id"],
-"local_rank":                   ["get_npu_local_rank_id",       "hvd.local_rank",               "get_npu_local_rank_id"],
-"size":                         ["get_npu_rank_size",           "hvd.size",                     "get_npu_rank_size"],
+"rank":                         ["get_npu_rank_id",         "hvd.rank",                     "get_npu_rank_id"],
+"local_rank":                   ["get_npu_local_rank_id",   "hvd.local_rank",               "get_npu_local_rank_id"],
+"size":                         ["get_npu_rank_size",       "hvd.size",                     "get_npu_rank_size"],
 "BroadcastGlobalVariablesHook": ["print",                   "hvd.BroadcastGlobalVariablesHook", "None"],
 "shard":                        ["",                        "dataset.shard(xxx, xxx)",      "dataset.shard(get_rank_size(), get_rank_id())"],
 "EstimatorSpec":                ["NPUEstimatorSpec",        "tf.estimator.EstimatorSpec",   "NPUEstimatorSpec"],
@@ -49,7 +49,7 @@
                                  "DNNLinearCombinedClassifier",         "DNNLinearCombinedEstimator",                   "DNNLinearCombinedRegressor",
                                  "LinearClassifier",                    "LinearEstimator",                              "LinearRegressor"],
 "EstimatorFunc":                ["train"],
-"Session()":                    ["",                        "*.Session()",                  "*.Session(config=npu_session_config_init())"],
+"Session()":                    ["",                        "*.*Session()",                  "*.*Session(config=npu_session_config_init())"],
 "ConfigProto()":                ["",                        "*.ConfigProto()",              "npu_config_proto(config_proto=*.ConfigProto())"],
 "GraphOptions()":               ["",                        "*.GraphOptions()",             "npu_graph_options(graph_options=*.GraphOptions())"],
 "OptimizerOptions()":           ["",                        "*.OptimizerOptions()",         "npu_optimizer_options(optimizer_options=*.OptimizerOptions())"],
diff --git a/convert_tf2npu/util.py b/convert_tf2npu/util.py
index 1034e227a125327583aa4abd29f42950c0f51859..190afe51e9e2c05812cec72e11f6371dd4e105eb 100644
--- a/convert_tf2npu/util.py
+++ b/convert_tf2npu/util.py
@@ -28,7 +28,7 @@ def log_info(lineno, msg, file):
     write_conver_report(content, file)
 
 def log_warning(msg):
-    content = "************" + msg + "************"
+    content = "WARNING:" + msg
     print(content)
     write_conver_report(content, util_global.get_value('report_file')[0])
 
diff --git a/inc/graphengine/inc/external/ge/ge_api_types.h b/inc/graphengine/inc/external/ge/ge_api_types.h
index fbd6c020ef57633b6566e43f422f060d81f9c3e2..6f5bbfbfe032fa714ac281d7162c4528342cd08a 100644
--- a/inc/graphengine/inc/external/ge/ge_api_types.h
+++ b/inc/graphengine/inc/external/ge/ge_api_types.h
@@ -113,6 +113,7 @@ const char *const INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16";
 const char *const OP_DEBUG_LEVEL = "ge.opDebugLevel";
 const char *const PERFORMANCE_MODE = "ge.performance_mode";
 const char *const MODIFY_MIXLIST = "ge.exec.modify_mixlist";
+const char *const OP_PRECISION_MODE = "ge.exec.op_precision_mode";
 }  // namespace configure_option
 // Configure stream num by Session constructor options param,
 // its value should be int32_t type, default value is "1"
@@ -326,6 +327,8 @@ const std::string PERFORMANCE_MODE = "ge.performance_mode";
 
 const std::string MODIFY_MIXLIST = "ge.exec.modify_mixlist";
 
+const std::string OP_PRECISION_MODE = "ge.exec.op_precision_mode";
+
 // Graph run mode
 enum GraphRunMode { PREDICTION = 0, TRAIN };
 
@@ -405,6 +408,7 @@ static const char *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str();
 static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str();
 static const char *const PERFORMANCE_MODE = ge::PERFORMANCE_MODE.c_str();
 static const char *const MODIFY_MIXLIST = ge::MODIFY_MIXLIST.c_str();
+static const char *const OP_PRECISION_MODE = ge::OP_PRECISION_MODE.c_str();
 
 // for interface: aclgrphBuildModel
 #ifdef __GNUC__
@@ -416,6 +420,7 @@ const std::set<std::string> ir_builder_suppported_options = {INPUT_FORMAT,
                                                              DYNAMIC_IMAGE_SIZE,
                                                              DYNAMIC_DIMS,
                                                              INSERT_OP_FILE,
+                                                             OP_PRECISION_MODE,
                                                              PRECISION_MODE,
                                                              TUNE_DEVICE_IDS,
                                                              EXEC_DISABLE_REUSED_MEMORY,
diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh
index 614a5a858ef9421f0e883e15378220c1ce0622f0..1674c35084affdb6207d41033b82b9c3f1e60ed0 100644
--- a/tf_adapter/interface_spec/api_npu_config.pyh
+++ b/tf_adapter/interface_spec/api_npu_config.pyh
@@ -15,7 +15,7 @@ class NPURunConfig(run_config_lib.RunConfig):
                 op_compiler_cache_mode=None, op_compiler_cache_dir=None, debug_dir=None, hcom_multi_mode=False, dynamic_input=False,
                 dynamic_graph_execute_mode="dynamic_execute", dynamic_inputs_shape_range=None,
                 train_distribute=None, eval_distribute=None, local_rank_id=None, local_device_list=None, session_device_id=None,
-                distribute_config=None, modify_mixlist=None):
+                distribute_config=None, modify_mixlist=None, op_precision_mode=None):
 
 class ProfilingConfig():
     def __init__(self, enable_profiling=False, profiling_options=None):
diff --git a/tf_adapter/kernels/data_item_deliver.h b/tf_adapter/kernels/data_item_deliver.h
index 39fd073e77eb16fefcae2c3c078f6df82295014c..8ebabcfe6fd334328da17e18c47d0f085b744c8b 100644
--- a/tf_adapter/kernels/data_item_deliver.h
+++ b/tf_adapter/kernels/data_item_deliver.h
@@ -45,8 +45,8 @@ limitations under the License.
 
 namespace tensorflow {
 namespace data {
-static constexpr char *SOCKET_SERVER_PATH = "/tmp/server";
-static constexpr char *MESSAGE_HEAD = "head_check";
+static constexpr char const *SOCKET_SERVER_PATH = "/tmp/server";
+static constexpr char const *MESSAGE_HEAD = "head_check";
 static constexpr int QLEN = 8;
 static constexpr int HEAD_INFO_SIZE = 3;
 static constexpr int ITEM_INFO_SIZE = 9;
@@ -72,7 +72,7 @@ class DataItemDeliver {
   Status SendDataVec(std::vector<tdt::DataItem> &data_items, int fd);
   Status CreateSockAddr(struct sockaddr_un &sockaddr, const char *path,
                         int local_rank_id);
-  int Recv(void *buffer, size_t data_len);
+  uint64_t Recv(void *buffer, size_t data_len);
   template <typename T>
   Status GetDataLen(T &value, size_t size);
   Status GetTensorType(tdt::TdtDataType &data_type);
@@ -83,12 +83,12 @@ class DataItemDeliver {
 
   mutex client_list_mu_;
   std::vector<int> client_fd_list_;
-  int server_fd_;
+  int server_fd_ = -1;
   std::shared_ptr<ThreadPool> pools_;
   struct sockaddr_un local_addr_ = {0};
   int local_rank_id_;
-  std::vector<uint32_t> local_device_list_;
   uint32_t device_id_;
+  std::vector<uint32_t> local_device_list_;
   std::string channel_name_;
 };
 
@@ -120,7 +120,7 @@ DataItemDeliver::~DataItemDeliver() {
 
 Status DataItemDeliver::ParallelInitSocketClient() {
   std::vector<std::future<Status>> init_status;
-  for (int i = 1; i < local_device_list_.size(); i++) {
+  for (size_t i = 1; i < local_device_list_.size(); i++) {
     init_status.emplace_back(
         pools_->Enqueue(&DataItemDeliver::InitSocketClient, this, i));
   }
@@ -227,7 +227,7 @@ Status DataItemDeliver::InitSocketServer() {
 
 Status DataItemDeliver::CheckHead(const char *check_value) {
   uint32_t head_size = 0;
-  int recvn = Recv(&head_size, UINT32_SIZE);
+  uint64_t recvn = Recv(&head_size, UINT32_SIZE);
   if (recvn != UINT32_SIZE) {
     ADP_LOG(ERROR) << "Failed to recv head length.";
     LOG(ERROR) << "Failed to recv head length.";
@@ -290,7 +290,7 @@ Status DataItemDeliver::RecvDataVec(std::vector<tdt::DataItem> &items) {
   return Status::OK();
 }
 
-int DataItemDeliver::Recv(void *buffer, size_t data_len) {
+uint64_t DataItemDeliver::Recv(void *buffer, size_t data_len) {
   int ret = -1;
   uint64_t buf_pos = 0;
   while (data_len > 0) {
@@ -304,13 +304,13 @@ int DataItemDeliver::Recv(void *buffer, size_t data_len) {
                        << ", channel_name:" << channel_name_;
       LOG(WARNING) << "Client connect closed, server_fd:" << server_fd_
                    << ", channel_name:" << channel_name_;
-      return ret;
+      return 0;
     } else if (ret < 0) {
       ADP_LOG(ERROR) << "Recv data failed,error:" << strerror(errno)
                      << ", (errno:" << errno << "), server_fd:" << server_fd_;
       LOG(ERROR) << "Recv data failed,error:" << strerror(errno)
                  << ", (errno:" << errno << "), server_fd:" << server_fd_;
-      return ret;
+      return 0;
     }
     buf_pos += ret;
     data_len -= ret;
@@ -320,7 +320,7 @@ int DataItemDeliver::Recv(void *buffer, size_t data_len) {
 
 template <typename T>
 Status DataItemDeliver::GetDataLen(T &value, size_t size) {
-  int recvn = Recv(&value, size);
+  uint64_t recvn = Recv(&value, size);
   if (recvn != size) {
     return errors::Internal("Failed to recv data length.");
   }
@@ -328,7 +328,7 @@ Status DataItemDeliver::GetDataLen(T &value, size_t size) {
 }
 
 Status DataItemDeliver::GetTensorType(tdt::TdtDataType &data_type) {
-  int recvn = Recv(&data_type, UINT32_SIZE);
+  uint64_t recvn = Recv(&data_type, UINT32_SIZE);
   if (recvn != UINT32_SIZE) {
     return errors::Internal("Failed to recv data length.");
   }
@@ -354,7 +354,7 @@ Status DataItemDeliver::GetTensorData(uint64_t &data_len,
     LOG(ERROR) << "Failed to reset buff memory. size:" << data_len;
     return errors::Internal("Failed to reset buff memory.");
   }
-  int recvn = Recv(buff, data_len);
+  uint64_t recvn = Recv(buff, data_len);
   if (recvn != data_len) {
     free(buff);
     ADP_LOG(ERROR) << "Failed to receive data.";
@@ -384,7 +384,7 @@ Status DataItemDeliver::GetTensorString(std::string &str) {
     LOG(ERROR) << "Failed to reset buff memory.";
     return errors::Internal("Failed to reset buff memory.");
   }
-  int recvn = Recv(buff, size);
+  uint64_t recvn = Recv(buff, size);
   if (recvn != size) {
     free(buff);
     ADP_LOG(ERROR) << "Failed to receive data.";
@@ -421,7 +421,7 @@ Status DataItemDeliver::SendDataVec(std::vector<tdt::DataItem> &data_items,
   uint32_t head_size = (strlen(MESSAGE_HEAD) + 1) * CHAR_SIZE;
   head_info[0].iov_base = &head_size;
   head_info[0].iov_len = UINT32_SIZE;
-  head_info[1].iov_base = MESSAGE_HEAD;
+  head_info[1].iov_base = const_cast<char*>(MESSAGE_HEAD);
   head_info[1].iov_len = head_size;
   head_info[2].iov_base = &vector_size;
   head_info[2].iov_len = UINT32_SIZE;
@@ -469,9 +469,9 @@ Status DataItemDeliver::CreateSockAddr(struct sockaddr_un &sock_addr,
                                        const char *path, int device_id) {
   sock_addr.sun_family = AF_UNIX;
   int len = 0;
-  if (-1 ==
-      (len = snprintf(sock_addr.sun_path, sizeof(sock_addr.sun_path), "%s%s%d",
-                      path, channel_name_.c_str(), device_id))) {
+  if (-1 == (len = snprintf_s(sock_addr.sun_path, sizeof(sock_addr.sun_path),
+                              sizeof(sock_addr.sun_path) - 1, "%s%s%d", path,
+                              channel_name_.c_str(), device_id))) {
     ADP_LOG(ERROR) << "Set sun_path failed.";
     LOG(ERROR) << "Set sun_path failed.";
     return errors::Internal("Set sun_path failed.");
@@ -494,4 +494,4 @@ void DataItemDeliver::SocketSend(struct iovec temp_items[], int vector_size,
 }
 }  // namespace data
 }  // namespace tensorflow
-#endif
\ No newline at end of file
+#endif
diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc
index f53502290ef4f194ea0e6bbe3c6ad57a4d12719d..de8513084199d89ca6acb74efbb0429f0880b2b1 100644
--- a/tf_adapter/kernels/geop_npu.cc
+++ b/tf_adapter/kernels/geop_npu.cc
@@ -528,7 +528,10 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) {
   bool is_lazy_recompile_mode = dynamic_input_ == "1" && dynamic_graph_execute_mode_ == "lazy_recompile";
   if (is_set_dynamic_config && is_tuning) {
     ADP_LOG(FATAL) << "dynamic input config can not use with mstuning.";
-    LOG(FATAL) << "dynamic input config can not use with mstuning.";
+    std::stringstream ss;
+    ss << "dynamic input config can not use with mstuning.";
+    OP_REQUIRES_ASYNC(ctx, false, errors::Internal(ss.str()), done);
+    return;
   } else if (is_set_dynamic_config && !is_tuning) {
     if (InitRebuildFlag(cache_graph_id) != 0) {
       OP_REQUIRES_ASYNC(ctx, false, errors::Internal("Failed to check rebuild flag"), done);
@@ -731,7 +734,6 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) {
          << ", graph id: " << cache_graph_id << std::endl
          << "Error Message is : " << std::endl
          << error_message;
-      LOG(FATAL) << ss.str();
       OP_REQUIRES_ASYNC(ctx, status == ge::SUCCESS, errors::Unavailable(ss.str()), done);
     } else {
       add_graph_flag_ = true;
@@ -781,21 +783,28 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) {
     if (ge_status == ge::SUCCESS) {
       if (BuildOutputTensorInfo(ctx, outputs) != Status::OK()) {
         ADP_LOG(FATAL) << ctx->op_kernel().name() << " GEOP::DoRunAsync get output failed.";
-        LOG(FATAL) << ctx->op_kernel().name() << " GEOP::DoRunAsync get output failed.";
+        std::string error_message = ge::GEGetErrorMsg();
+        std::stringstream ss;
+        ss << ctx->op_kernel().name() 
+           << "GEOP::DoRunAsync get output failed." << std::endl
+           << "Error Message is : " << std::endl
+           << error_message;
+        OP_REQUIRES_ASYNC(ctx, false, errors::Internal(ss.str()), done);
+        return;
       }
     } else if (ge_status == ge::END_OF_SEQUENCE) {
       ctx->SetStatus(errors::OutOfRange("End of sequence"));
       ADP_LOG(WARNING) << "[GEOP] Out of range: End of sequence.";
       LOG(WARNING) << "[GEOP] Out of range: End of sequence.";
     } else if (ge_status != ge::SUCCESS) {
-      tensorflow::Status tfStatus = errors::Unavailable(ToString(ge_status));
-      ctx->CtxFailureWithWarning(tfStatus);
       std::this_thread::sleep_for(std::chrono::milliseconds(kFatalSleepTime));
       ADP_LOG(FATAL) << ctx->op_kernel().name() << "GEOP::::DoRunAsync Failed";
       std::string error_message = ge::GEGetErrorMsg();
-      LOG(FATAL) << ctx->op_kernel().name() << "GEOP::::DoRunAsync Failed" << std::endl
-                 << "Error Message is : " << std::endl
-                 << error_message;
+      std::stringstream ss;
+      ss << ctx->op_kernel().name() << "GEOP::::DoRunAsync Failed" << std::endl
+         << "Error Message is : " << std::endl << error_message;
+      OP_REQUIRES_ASYNC(ctx, false, errors::Internal(ss.str()), done);
+      return;
     }
     int64 run_end_time = InferShapeUtil::GetCurrentTimestap();
     ADP_LOG(INFO) << "[GEOP] RunGraphAsync callback, status:" << ge_status << ", kernel_name:"
@@ -818,7 +827,6 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) {
        << ", graph id: " << cache_graph_id << std::endl
        << "Error Message is : " << std::endl
        << error_message;
-    LOG(FATAL) << ss.str();
     OP_REQUIRES_ASYNC(ctx, status == ge::SUCCESS, errors::Unavailable(ss.str()), done);
   }
 
@@ -1069,8 +1077,8 @@ void GeOp::AnalyzeInputDesc(void *tensor_ptr, ge::Tensor &input, ge::DataType ty
   ge_tensor_desc.SetPlacement(output_info->placement_);
   input.SetTensorDesc(ge_tensor_desc);
 
-  uint8_t* data = output_info->data_.get();
-  input.SetData(output_info->data_.get(), output_info->output_size_, output_info->data_.get_deleter());
+  uint8_t* data = output_info->data_.release();
+  input.SetData(data, output_info->output_size_, output_info->data_.get_deleter());
   ADP_LOG(INFO) << "[GEOP] Get input shape:" << input_shape.DebugString()
                 << ", input placement:" << output_info->placement_
                 << ", input length:" << output_info->output_size_
diff --git a/tf_adapter/kernels/geop_npu.h b/tf_adapter/kernels/geop_npu.h
index 730c30c164a76bf5861a606c7ff05649b459770b..e8f8eae70bfaac4b598412195645e172c116e6be 100644
--- a/tf_adapter/kernels/geop_npu.h
+++ b/tf_adapter/kernels/geop_npu.h
@@ -38,7 +38,7 @@ limitations under the License.
 #include "ge/ge_api_types.h"
 #include "graph/tensor.h"
 #include "graph/utils/graph_utils.h"
-#include "toolchain/tuning_tool/tune_api.h"
+#include "tuning_tool/tune_api.h"
 #include <unordered_map>
 
 namespace tensorflow {
diff --git a/tf_adapter/kernels/npu_sys_ctl_ops.cc b/tf_adapter/kernels/npu_sys_ctl_ops.cc
index 3fd031fdc4f7f8e5406f3eb0c3cfa8c02306dda6..1d262d68920ed4b30eafb247362cee16774b32ca 100644
--- a/tf_adapter/kernels/npu_sys_ctl_ops.cc
+++ b/tf_adapter/kernels/npu_sys_ctl_ops.cc
@@ -38,7 +38,6 @@ limitations under the License.
 #include "framework/omg/parser/parser_api.h"
 #include "ge/ge_api.h"
 #include "ge/ge_api_types.h"
-#include "hccl/hcom.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tf_adapter/common/adp_logger.h"
diff --git a/tf_adapter/optimizers/control_flow_conversion_pass.cc b/tf_adapter/optimizers/control_flow_conversion_pass.cc
index 0f1e33c2a6e047cdb0a2e715b891b85b195b6be6..c6d7c9afe324e6c5151fe4e518d0315b6f8aae9b 100644
--- a/tf_adapter/optimizers/control_flow_conversion_pass.cc
+++ b/tf_adapter/optimizers/control_flow_conversion_pass.cc
@@ -69,6 +69,7 @@ Status ControlFlowConversionPass::Run(const GraphOptimizationPassOptions &option
   // Delete _lower_using_switch_merge before LowerFunctionalOpsPass
   for (int i = 2; i < graph->num_node_ids(); ++i) {
     Node *n = graph->FindNodeId(i);
+    if (n == nullptr) { continue; }
     if (n->IsIfNode() || n->type_string() == "Case" || n->IsWhileNode()) { n->ClearAttr(kLowerUsingSwitchMergeAttr); }
   }
 
diff --git a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc
index e32965a0986a6cdcc0ff6971969551cf130db5f6..634958502fea0e827297a1dfeaf3cc09fc5a8a5c 100644
--- a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc
+++ b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc
@@ -2085,8 +2085,6 @@ Status OMPartitionSubgraphsPass::ProcessGraph(std::unique_ptr<Graph> *graph, Fun
       break;
     }
   }
-  ADP_LOG(INFO) << "pass options:";
-  NpuAttrs::LogOptions(pass_options);
   ADP_LOG(INFO) << "all options:";
   NpuAttrs::LogOptions(all_options);
 
diff --git a/tf_adapter/python/npu_bridge/estimator/npu/keras_to_npu.py b/tf_adapter/python/npu_bridge/estimator/npu/keras_to_npu.py
index 577d0e1e729b65028fbd24ede5e16a9fe8f83cfc..708277bcb3d4569255bc10a0545a64a16d09a80d 100644
--- a/tf_adapter/python/npu_bridge/estimator/npu/keras_to_npu.py
+++ b/tf_adapter/python/npu_bridge/estimator/npu/keras_to_npu.py
@@ -42,6 +42,7 @@ from tensorflow.python.training import training_util
 from tensorflow.python.training.tracking import graph_view
 from tensorflow.python.training.tracking import util as trackable_util
 from tensorflow.python.util import compat_internal
+from tensorflow_estimator.python.estimator import run_config
 from tensorflow_estimator.python.estimator import estimator as estimator_lib
 from tensorflow_estimator.python.estimator import model_fn as model_fn_lib
 from tensorflow_estimator.python.estimator.export import export_lib
diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py
index bab6797c571088c4aa7ca17abfd27eb7e970c2d6..68dd28e12a7ae61f5d88d14e55d3157540410653 100644
--- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py
+++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py
@@ -87,7 +87,8 @@ class NPURunConfig(run_config_lib.RunConfig):
                  local_device_list=None,
                  session_device_id=None,
                  distribute_config=None,
-                 modify_mixlist=None
+                 modify_mixlist=None,
+                 op_precision_mode=None
                  ):
         """
         Constructs a NPUConfig.
@@ -155,6 +156,7 @@ class NPURunConfig(run_config_lib.RunConfig):
         local_device_list: Available devices.
         distribute_config: Specify the NCA configuration file path
         modify_mixlist: Set the path of operator mixed precision configuration file.
+        op_precision_mode: Set the path of operator precision mode configuration file (.ini)
         """
 
         # Check iterations_per_loop.
@@ -236,6 +238,7 @@ class NPURunConfig(run_config_lib.RunConfig):
         self._session_device_id = session_device_id
         self._distribute_config = distribute_config
         self._modify_mixlist = modify_mixlist
+        self._op_precision_mode = op_precision_mode
 
         super(NPURunConfig, self).__init__(
             model_dir=model_dir,
diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py
index eed5a44e4c08fbfb5fd3fdf4980ac40807b3eecc..2bbf1adc5510bd299da0cac3e2b2fb0a73761a64 100644
--- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py
+++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py
@@ -758,6 +758,8 @@ class NPUEstimator(estimator_lib.Estimator):
             custom_op.parameter_map["session_device_id"].i = config._session_device_id
         if config._modify_mixlist is not None:
             custom_op.parameter_map["modify_mixlist"].s = tf.compat.as_bytes(config._modify_mixlist)
+        if config._op_precision_mode is not None:
+            custom_op.parameter_map["op_precision_mode"].s = tf.compat.as_bytes(config._op_precision_mode)
 
         # add profiling options to custom_op
         self.__load_profiling_options(config, custom_op)
diff --git a/tf_adapter/tests/ut/CMakeLists.txt b/tf_adapter/tests/ut/CMakeLists.txt
index e6142f3a6743e7d6a589d4d2af0b4cde36ec76a9..b84900524df2acde831b4219285e4fe8e65f6ace 100644
--- a/tf_adapter/tests/ut/CMakeLists.txt
+++ b/tf_adapter/tests/ut/CMakeLists.txt
@@ -31,9 +31,6 @@ file(GLOB_RECURSE UT_SOURCES
 add_executable(tfadapter_utest
                "main.cc"
                ${UT_SOURCES}
-               #${TFADAPTER_DIR}/tf_adapter/kernels/geop_npu.cc
-               #${TFADAPTER_DIR}/tf_adapter/kernels/infeed_outfeed_ops.cc
-               #${TFADAPTER_DIR}/tf_adapter/kernels/npu_sys_ctl_ops.cc
                )
 
 target_include_directories(tfadapter_utest PRIVATE
@@ -57,7 +54,7 @@ foreach (UT_LINK_FLAG ${UT_LINK_FLAGS})
 endforeach (UT_LINK_FLAG)
 
 string(STRIP ${PYTHON_LIB_PATH} PYTHON_LIB_PATH)
-message("hrz python lib path------------${PYTHON_LIB_PATH}")
+message("python lib path ${PYTHON_LIB_PATH}")
 
 add_dependencies(tfadapter_utest aoe_tuning)
 
diff --git a/tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_config.pbtxt b/tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_config.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..b84ef13c042d1ea6ed3b397ec22a83fe829d0dfc
--- /dev/null
+++ b/tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_config.pbtxt
@@ -0,0 +1,527 @@
+node {
+  name: "retval_Add1_0_0"
+  op: "_Retval"
+  input: "GeOp61_0"
+  device: "/job:localhost/replica:0/task:0/device:CPU:0"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "index"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "GeOp61_0"
+  op: "GeOp"
+  device: "/job:localhost/replica:0/task:0/device:CPU:0"
+  attr {
+    key: "Tin"
+    value {
+      list {
+      }
+    }
+  }
+  attr {
+    key: "Tout"
+    value {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    key: "_NpuOptimizer"
+    value {
+      s: "NpuOptimizer"
+    }
+  }
+  attr {
+    key: "_auto_tune_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_buffer_optimize"
+    value {
+      s: "l2_optimize"
+    }
+  }
+  attr {
+    key: "_compress_weight_conf"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_debug_dir"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_distribute_config"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_do_npu_optimizer"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_dump_debug_mode"
+    value {
+      s: "all"
+    }
+  }
+  attr {
+    key: "_dump_mode"
+    value {
+      s: "output"
+    }
+  }
+  attr {
+    key: "_dump_path"
+    value {
+      s: "./"
+    }
+  }
+  attr {
+    key: "_dump_step"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_dynamic_dims"
+    value {
+      s: "1,128;3,128;5,128"
+    }
+  }
+  attr {
+    key: "_dynamic_graph_execute_mode"
+    value {
+      s: "lazy_recompile"
+    }
+  }
+  attr {
+    key: "_dynamic_input"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_dynamic_node_type"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_enable_compress_weight"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_enable_data_pre_proc"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_enable_dump"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_enable_dump_debug"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_enable_exception_dump"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_enable_scope_fusion_passes"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_enable_small_channel"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_fusion_switch_file"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_graph_run_mode"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_hcom_multi_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_hcom_parallel"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_in_out_pair"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_in_out_pair_flag"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_input_shape"
+    value {
+      s: "getnext:-1,-1"
+    }
+  }
+  attr {
+    key: "_is_tailing_optimization"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_iterations_per_loop"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_job"
+    value {
+      s: "localhost"
+    }
+  }
+  attr {
+    key: "_local_device_list"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_local_rank_id"
+    value {
+      s: "-1"
+    }
+  }
+  attr {
+    key: "_lower_functional_ops"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_mix_compile_mode"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_mstune_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_op_compiler_cache_dir"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_op_compiler_cache_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_op_debug_level"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_op_select_implmode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_op_tune_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_optypelist_for_implmode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_precision_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_profiling_mode"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_profiling_options"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_session_device_id"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_stream_max_parallel_num"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_task_index"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_use_off_line"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_variable_format_optimize"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_work_path"
+    value {
+      s: "/home/ascend"
+    }
+  }
+  attr {
+    key: "_aoe_mode"
+    value {
+      s: "2"
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "function"
+    value {
+      func {
+        name: "GeOp61_0"
+      }
+    }
+  }
+}
+library {
+  function {
+    signature {
+      name: "GeOp61_0"
+      output_arg {
+        name: "Add1_0_retval"
+        type: DT_FLOAT
+      }
+    }
+    node_def {
+      name: "Const_1"
+      op: "Const"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "_NpuOptimizer"
+        value {
+          s: "NpuOptimizer"
+        }
+      }
+      attr {
+        key: "_iterations_per_loop"
+        value {
+          s: "1"
+        }
+      }
+      attr {
+        key: "_job"
+        value {
+          s: "localhost"
+        }
+      }
+      attr {
+        key: "_mix_compile_mode"
+        value {
+          s: "0"
+        }
+      }
+      attr {
+        key: "_task_index"
+        value {
+          s: "0"
+        }
+      }
+      attr {
+        key: "_use_off_line"
+        value {
+          s: "1"
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_FLOAT
+            tensor_shape {
+              dim {
+                size: 2
+              }
+            }
+            tensor_content: "\000\000 A\000\000 A"
+          }
+        }
+      }
+    }
+    node_def {
+      name: "Variable"
+      op: "VariableV2"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "_class"
+        value {
+          list {
+            s: "loc:@Variable/read"
+          }
+        }
+      }
+      attr {
+        key: "_var_format"
+        value {
+          s: "4D"
+        }
+      }
+      attr {
+        key: "container"
+        value {
+          s: ""
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 2
+            }
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: ""
+        }
+      }
+    }
+    node_def {
+      name: "Variable/read"
+      op: "Identity"
+      input: "Variable:ref:0"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "T"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "_var_format"
+        value {
+          s: "4D"
+        }
+      }
+    }
+    node_def {
+      name: "Add1"
+      op: "Add"
+      input: "Const_1:output:0"
+      input: "Variable/read:output:0"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "T"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    ret {
+      key: "Add1_0_retval"
+      value: "Add1:z:0"
+    }
+  }
+}
+versions {
+  producer: 134
+}
diff --git a/tf_adapter/tests/ut/kernels/pbtxt/geop_output_error.pbtxt b/tf_adapter/tests/ut/kernels/pbtxt/geop_output_error.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..da7b36e42683dfdfbc0424f944229f6d8ba1f389
--- /dev/null
+++ b/tf_adapter/tests/ut/kernels/pbtxt/geop_output_error.pbtxt
@@ -0,0 +1,548 @@
+node {
+  name: "retval_Add_0_0"
+  op: "_Retval"
+  input: "GeOp51_0"
+  device: "/job:localhost/replica:0/task:0/device:CPU:0"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "index"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "retval_Add_1_0"
+  op: "_Retval"
+  input: "GeOp51_0:1"
+  device: "/job:localhost/replica:0/task:0/device:CPU:0"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "index"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "GeOp51_0"
+  op: "GeOp"
+  device: "/job:localhost/replica:0/task:0/device:CPU:0"
+  attr {
+    key: "Tin"
+    value {
+      list {
+      }
+    }
+  }
+  attr {
+    key: "Tout"
+    value {
+      list {
+        type: DT_FLOAT
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    key: "_NpuOptimizer"
+    value {
+      s: "NpuOptimizer"
+    }
+  }
+  attr {
+    key: "_auto_tune_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_buffer_optimize"
+    value {
+      s: "l2_optimize"
+    }
+  }
+  attr {
+    key: "_compress_weight_conf"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_debug_dir"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_distribute_config"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_do_npu_optimizer"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_dump_debug_mode"
+    value {
+      s: "all"
+    }
+  }
+  attr {
+    key: "_dump_mode"
+    value {
+      s: "output"
+    }
+  }
+  attr {
+    key: "_dump_path"
+    value {
+      s: "./"
+    }
+  }
+  attr {
+    key: "_dump_step"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_dynamic_dims"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_dynamic_graph_execute_mode"
+    value {
+      s: "lazy_recompile"
+    }
+  }
+  attr {
+    key: "_dynamic_input"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_dynamic_node_type"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_enable_compress_weight"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_enable_data_pre_proc"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_enable_dump"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_enable_dump_debug"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_enable_exception_dump"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_enable_scope_fusion_passes"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_enable_small_channel"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_fusion_switch_file"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_graph_run_mode"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_hcom_multi_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_hcom_parallel"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_in_out_pair"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_in_out_pair_flag"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_input_shape"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_is_tailing_optimization"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_iterations_per_loop"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_job"
+    value {
+      s: "localhost"
+    }
+  }
+  attr {
+    key: "_local_device_list"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_local_rank_id"
+    value {
+      s: "-1"
+    }
+  }
+  attr {
+    key: "_lower_functional_ops"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_mix_compile_mode"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_mstune_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_op_compiler_cache_dir"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_op_compiler_cache_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_op_debug_level"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_op_select_implmode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_op_tune_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_optypelist_for_implmode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_precision_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_profiling_mode"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_profiling_options"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_session_device_id"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_stream_max_parallel_num"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_task_index"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_use_off_line"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_variable_format_optimize"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_work_path"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "function"
+    value {
+      func {
+        name: "GeOp51_0"
+      }
+    }
+  }
+}
+library {
+  function {
+    signature {
+      name: "GeOp51_0"
+      output_arg {
+        name: "Add_0_retval"
+        type: DT_FLOAT
+      }
+      output_arg {
+        name: "Add_1_retval"
+        type: DT_FLOAT
+      }
+    }
+    node_def {
+      name: "Const_1"
+      op: "Const"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "_NpuOptimizer"
+        value {
+          s: "NpuOptimizer"
+        }
+      }
+      attr {
+        key: "_iterations_per_loop"
+        value {
+          s: "1"
+        }
+      }
+      attr {
+        key: "_job"
+        value {
+          s: "localhost"
+        }
+      }
+      attr {
+        key: "_mix_compile_mode"
+        value {
+          s: "0"
+        }
+      }
+      attr {
+        key: "_task_index"
+        value {
+          s: "0"
+        }
+      }
+      attr {
+        key: "_use_off_line"
+        value {
+          s: "1"
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_FLOAT
+            tensor_shape {
+              dim {
+                size: 2
+              }
+            }
+            tensor_content: "\000\000 A\000\000 A"
+          }
+        }
+      }
+    }
+    node_def {
+      name: "Variable"
+      op: "VariableV2"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "_class"
+        value {
+          list {
+            s: "loc:@Variable/read"
+          }
+        }
+      }
+      attr {
+        key: "_var_format"
+        value {
+          s: "4D"
+        }
+      }
+      attr {
+        key: "container"
+        value {
+          s: ""
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 2
+            }
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: ""
+        }
+      }
+    }
+    node_def {
+      name: "Variable/read"
+      op: "Identity"
+      input: "Variable:ref:0"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "T"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "_var_format"
+        value {
+          s: "4D"
+        }
+      }
+    }
+    node_def {
+      name: "Add"
+      op: "Add"
+      input: "Const_1:output:0"
+      input: "Variable/read:output:0"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "T"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    ret {
+      key: "Add_0_retval"
+      value: "Add:z:0"
+    }
+    ret {
+      key: "Add_1_retval"
+      value: "Add:z:0"
+    }
+  }
+}
+versions {
+  producer: 134
+}
diff --git a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc
index 727b71d3e088d746cc0697840c4e30e6841e077c..3d49a063cff73f82ba1450c6e24fe1f1cd294401 100644
--- a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc
+++ b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc
@@ -88,11 +88,9 @@ Status GeOpRunGraphAsync(std::string example_path, gtl::InlinedVector<TensorValu
       auto ctx = absl::make_unique<OpKernelContext>(&params);
       AsyncOpKernel::DoneCallback done = []() { LOG(INFO) << "DONE DoneCallback"; };
       async_op->ComputeAsync(ctx.get(), done);
-      EXPECT_EQ(ctx->status().ok(), true);
       if (!only_run_once) {
         auto ctx1 = absl::make_unique<OpKernelContext>(&params);
         async_op->ComputeAsync(ctx1.get(), done);
-        EXPECT_EQ(ctx1->status().ok(), true);
       }
     }
   }
@@ -105,6 +103,18 @@ TEST_F(GeOpTest, GeOpFuncTest) {
   gtl::InlinedVector<TensorValue, 4> inputs;
   EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0").ok());
 }
+TEST_F(GeOpTest, GeDynamicConfigError) {
+  NodeDef node_def;
+  std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_config.pbtxt";
+  gtl::InlinedVector<TensorValue, 4> inputs;
+  EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp61_0").ok());
+}
+TEST_F(GeOpTest, GeOpOutputError) {
+  NodeDef node_def;
+  std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_output_error.pbtxt";
+  gtl::InlinedVector<TensorValue, 4> inputs;
+  EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp51_0").ok());
+}
 TEST_F(GeOpTest, GeOpVarInitGraphTest) {
   NodeDef node_def;
   std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_var_init_graph.pbtxt";
diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc
index a369fbef13b69eafd564f8306bd2d392ac2b73d0..a74f3be574d4198780799ce4be9f692b85a1bcee 100644
--- a/tf_adapter/util/ge_plugin.cc
+++ b/tf_adapter/util/ge_plugin.cc
@@ -212,6 +212,8 @@ void GePlugin::Init(std::map<std::string, std::string> &init_options, bool is_gl
             << ", work path : " << init_options["ge.tuningPath"]
             << ", distribute_config : " << init_options["distribute_config"];
 
+  ADP_LOG(INFO) << "[GePlugin] fusion_switch_file :" << init_options["ge.fusionSwitchFile"];
+
   const char *tdt_uninit_env = std::getenv("ASCEND_TDT_UNINIT");
   bool tdt_init = true;
   if (tdt_uninit_env != nullptr && std::atoi(tdt_uninit_env) == 1) {
diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc
index 0e87e5d470507dc5bee45ec089e67fa0a142ef93..b1ac0b8625a07d820e8e7e79dbfcddae0449e836 100644
--- a/tf_adapter/util/npu_attrs.cc
+++ b/tf_adapter/util/npu_attrs.cc
@@ -317,6 +317,8 @@ std::map<std::string, std::string> NpuAttrs::GetSessOptions(OpKernelConstruction
   std::string dynamic_node_type;
   std::string session_device_id;
   std::string modify_mixlist;
+  std::string op_precision_mode;
+  std::string graph_run_mode = "1";
 
   if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) {
     ctx->GetAttr("_variable_format_optimize", &variable_format_optimize);
@@ -367,6 +369,8 @@ std::map<std::string, std::string> NpuAttrs::GetSessOptions(OpKernelConstruction
     ctx->GetAttr("_dynamic_node_type", &dynamic_node_type);
     ctx->GetAttr("_session_device_id", &session_device_id);
     ctx->GetAttr("_modify_mixlist", &modify_mixlist);
+    ctx->GetAttr("_op_precision_mode", &op_precision_mode);
+    ctx->GetAttr("_graph_run_mode", &graph_run_mode);
   }
 
   // session options
@@ -396,6 +400,8 @@ std::map<std::string, std::string> NpuAttrs::GetSessOptions(OpKernelConstruction
     sess_options["ge.session_device_id"] = session_device_id;
   }
   sess_options[ge::MODIFY_MIXLIST] = modify_mixlist;
+  sess_options["ge.exec.op_precision_mode"] = op_precision_mode;
+  sess_options[ge::OPTION_GRAPH_RUN_MODE] = graph_run_mode;
 
   return sess_options;
 }
@@ -435,6 +441,7 @@ std::map<std::string, std::string> NpuAttrs::GetInitOptions(OpKernelConstruction
   std::string work_path;
   std::string distribute_config;
   std::string modify_mixlist;
+  std::string fusion_switch_file;
 
   if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) {
     ctx->GetAttr("_precision_mode", &precision_mode);
@@ -453,6 +460,7 @@ std::map<std::string, std::string> NpuAttrs::GetInitOptions(OpKernelConstruction
     ctx->GetAttr("_hcom_multi_mode", &hcom_multi_mode);
     ctx->GetAttr("_distribute_config", &distribute_config);
     ctx->GetAttr("_modify_mixlist", &modify_mixlist);
+    ctx->GetAttr("_fusion_switch_file", &fusion_switch_file);
   }
 
 
@@ -476,6 +484,7 @@ std::map<std::string, std::string> NpuAttrs::GetInitOptions(OpKernelConstruction
   init_options["ge.debugDir"] = debug_dir;
   init_options["ge.hcomMultiMode"] = hcom_multi_mode;
   init_options[ge::MODIFY_MIXLIST] = modify_mixlist;
+  init_options["ge.fusionSwitchFile"] = fusion_switch_file;
 
   return init_options;
 }
@@ -768,6 +777,7 @@ std::map<std::string, std::string> NpuAttrs::GetAllAttrOptions(AttrSlice attrs)
   std::string hcom_multi_mode;
   std::string session_device_id;
   std::string modify_mixlist;
+  std::string op_precision_mode;
 
   if (attrs.Find("_NpuOptimizer") != nullptr) {
     do_npu_optimizer = std::to_string(true);
@@ -904,6 +914,9 @@ std::map<std::string, std::string> NpuAttrs::GetAllAttrOptions(AttrSlice attrs)
     if (attrs.Find("_modify_mixlist") != nullptr) {
       modify_mixlist = attrs.Find("_modify_mixlist")->s();
     }
+    if (attrs.Find("_op_precision_mode") != nullptr) {
+      op_precision_mode = attrs.Find("_op_precision_mode")->s();
+    }
   }
 
   all_options["variable_format_optimize"] = variable_format_optimize;
@@ -960,6 +973,7 @@ std::map<std::string, std::string> NpuAttrs::GetAllAttrOptions(AttrSlice attrs)
   all_options["hcom_multi_mode"] = hcom_multi_mode;
   all_options["session_device_id"] = session_device_id;
   all_options["modify_mixlist"] = modify_mixlist;
+  all_options["op_precision_mode"] = op_precision_mode;
 
   return all_options;
 }
@@ -1037,6 +1051,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options
   bool hcom_multi_mode = false;
   int session_device_id = -1;
   std::string modify_mixlist;
+  std::string op_precision_mode;
 
   const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options();
   for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) {
@@ -1293,6 +1308,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options
           return errors::Internal("modify_mixlist is assigned, please ensure that precision_mode is assigned to 'allow_mix_precision'.");
         }
       }
+      if (params.count("op_precision_mode")) {
+        op_precision_mode = params.at("op_precision_mode").s();
+      }
     }
   }
 
@@ -1323,6 +1341,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options
   sess_options["hcom_multi_mode"] = std::to_string(hcom_multi_mode);
   sess_options["session_device_id"] = std::to_string(session_device_id);
   sess_options["modify_mixlist"] = modify_mixlist;
+  sess_options["op_precision_mode"] = op_precision_mode;
 
   init_options["precision_mode"] = precision_mode;
   init_options["profiling_mode"] = std::to_string(profiling_mode);
diff --git a/tf_adapter_2.x/cmake/tensorflow/module.cmake b/tf_adapter_2.x/cmake/tensorflow/module.cmake
index 10e1350f30655cd484a58ca5355b7a7784a6a31b..7949da51866317f560ca4a69c373139f081a4c91 100644
--- a/tf_adapter_2.x/cmake/tensorflow/module.cmake
+++ b/tf_adapter_2.x/cmake/tensorflow/module.cmake
@@ -19,7 +19,7 @@ else()
     add_library(pywrap_tensorflow_internal SHARED ${fake_sources})
     set_target_properties(pywrap_tensorflow_internal PROPERTIES PREFIX _)
 
-    SET(TF_INCLUDE_DIR ${ASCEND_CI_BUILD_DIR}/third_party/tensorflow/compile_deps/tf-2.4.0/include/org)
+    SET(TF_INCLUDE_DIR /opt/buildtools/tensorflow-2.4.1/tensorflow/include/)
     target_link_libraries(tensorflow_libs INTERFACE
             tensorflow_framework
             pywrap_tensorflow_internal)
diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp
index 2fe4f3a5d81868f3247ad6657c364cd5d1503d21..c4441d04ddc2be3b3d40e526ee209333664086c1 100644
--- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp
+++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp
@@ -72,6 +72,7 @@ const std::map<std::string, std::string> kConfigurableOptions = {
   {"is_tailing_optimization", ge::OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION},
   {"op_debug_level", ge::OP_DEBUG_LEVEL},
   {"debug_dir", ge::DEBUG_DIR},
+  {"modify_mixlist", ge::MODIFY_MIXLIST},
   {"enable_exception_dump", ge::OPTION_EXEC_ENABLE_EXCEPTION_DUMP},
   {"enable_dump", ge::OPTION_EXEC_ENABLE_DUMP},
   {"dump_path", ge::OPTION_EXEC_DUMP_PATH},
diff --git a/tf_adapter_2.x/python/npu_device/__init__.py b/tf_adapter_2.x/python/npu_device/__init__.py
index 1eaf9267f21caf1ca274e443ec682c3b4f336e5f..7478065407394d6b18e4ac9eb0e22cb27ebda377 100644
--- a/tf_adapter_2.x/python/npu_device/__init__.py
+++ b/tf_adapter_2.x/python/npu_device/__init__.py
@@ -3,6 +3,8 @@ from npu_device.npu_device import never_nested_function
 from npu_device.npu_device import gen_npu_ops
 from npu_device.npu_device import global_options
 
+from npu_device.utils.scope import keep_dtype_scope
+
 from npu_device._api import distribute
 from npu_device._api import train
 from npu_device._api import ops
diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py
index 35e286fe0e045106ba23f658210f6ad3d170b8f7..1e2b0c54dedabe32bb79145f1c95224d800809eb 100644
--- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py
+++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py
@@ -27,6 +27,7 @@ class NpuConfig(NpuBaseConfig):
         self.is_tailing_optimization = OptionValue(False, [True, False])
         self.op_debug_level = OptionValue(0, [0, 1, 2, 3])
         self.debug_dir = OptionValue(None, None)
+        self.modify_mixlist = OptionValue(None, None)
         self.enable_exception_dump = OptionValue(0, [0, 1])
         self.dump_config = NpuDumpConfig()
         self.profiling_config = NpuProfilingConfig()
diff --git a/tf_adapter_2.x/python/npu_device/distribute/hccl.py b/tf_adapter_2.x/python/npu_device/distribute/hccl.py
index 4b32864f98f7323766c86b6e8c9ff542471530bf..63f44b0ca423e17fb7cabb7027774e5ffa0a1109 100644
--- a/tf_adapter_2.x/python/npu_device/distribute/hccl.py
+++ b/tf_adapter_2.x/python/npu_device/distribute/hccl.py
@@ -46,7 +46,7 @@ def _all_reduce(values, reduction, fusion, fusion_id, group):
     return reduced_values
 
 
-def all_reduce(values, reduction, fusion=1, fusion_id=-1, group="hccl_world_group"):
+def all_reduce(values, reduction="mean", fusion=1, fusion_id=-1, group="hccl_world_group"):
     if global_npu_ctx() is None or not global_npu_ctx().is_cluster_worker():
         logging.info("Skip all reduce as current process is not npu cluster worker")
         return values
@@ -63,7 +63,7 @@ def _broadcast(values, root_rank, fusion, fusion_id, group):
         value.assign(hccl_ops.broadcast([value], root_rank, fusion, fusion_id, group)[0])
 
 
-def broadcast(values, root_rank, fusion=2, fusion_id=0, group="hccl_world_group"):
+def broadcast(values, root_rank=0, fusion=2, fusion_id=0, group="hccl_world_group"):
     if global_npu_ctx() is None or not global_npu_ctx().is_cluster_worker():
         logging.info("Skip broadcast as current process is not npu cluster worker")
         return
diff --git a/tf_adapter_2.x/python/npu_device/npu_device.py b/tf_adapter_2.x/python/npu_device/npu_device.py
index 48724abc07d03471126f31832fc340faa9522a5d..0c3f3a4be07bd7ef8d73392dab52e272084aac0f 100644
--- a/tf_adapter_2.x/python/npu_device/npu_device.py
+++ b/tf_adapter_2.x/python/npu_device/npu_device.py
@@ -137,7 +137,11 @@ def never_nested_function(func=None, *args, **kwargs):
     def never_nested_decorator(f):
         if kwargs.get('experimental_compile'):
             logging.info("Skip xla compile tf function %s on npu", f.__name__)
-        kwargs['experimental_compile'] = False
+            kwargs['experimental_compile'] = False
+        if kwargs.get('jit_compile'):
+            logging.info("Skip xla compile tf function %s on npu", f.__name__)
+            kwargs['jit_compile'] = False
+
         tf_decorated_func = _hacked_tensorflow_function(*args, **kwargs)(f)
 
         def wrapper(*func_args, **func_kwargs):
diff --git a/tf_adapter_2.x/python/npu_device/utils/scope.py b/tf_adapter_2.x/python/npu_device/utils/scope.py
new file mode 100644
index 0000000000000000000000000000000000000000..c175bb173ad5dea1a4ff1af22d1ff5dd79ae40dd
--- /dev/null
+++ b/tf_adapter_2.x/python/npu_device/utils/scope.py
@@ -0,0 +1,9 @@
+from tensorflow.python.framework import ops
+from tensorflow.python.util import tf_contextlib
+from tensorflow.core.framework import attr_value_pb2
+
+
+@tf_contextlib.contextmanager
+def keep_dtype_scope():
+    with ops.get_default_graph()._attr_scope({'_keep_dtype': attr_value_pb2.AttrValue(b=True)}):
+        yield