From 4df6a393dc025c74050b33a87e598c331e5d8fee Mon Sep 17 00:00:00 2001 From: l00657005 Date: Sat, 20 May 2023 17:32:05 +0800 Subject: [PATCH] modify jit_compile value --- tf_adapter/interface_spec/api_npu_config.pyh | 2 +- tf_adapter/kernels/geop_npu.cc | 17 ++++--------- .../npu_bridge/estimator/npu/npu_config.py | 2 +- .../npu_bridge/estimator/npu/npu_estimator.py | 3 +-- .../testcase/get_attr_optimize_pass_test.cc | 2 +- .../tests/st/util/testcase/npu_attrs_test.cc | 6 ++++- .../testcase/get_attr_optimize_pass_test.cc | 2 +- tf_adapter/util/npu_attrs.cc | 24 ++++++++++++++++--- .../npu_device/core/npu_wrapper.cpp | 1 + .../npu_device/kernels/npu_call.cpp | 10 +++----- .../python/npu_device/configs/npu_config.py | 2 +- .../python/npu_device/configs/option_base.py | 2 ++ .../tests/st/adapter2_jit_compile_st.py | 4 ++-- tf_adapter_2.x/tests/st/adapter2_options.py | 18 ++++++++++++++ tf_adapter_2.x/tests/st/adapter2_st.py | 1 + 15 files changed, 64 insertions(+), 32 deletions(-) diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index c48ed89df..97388f957 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -20,7 +20,7 @@ class NPURunConfig(run_config_lib.RunConfig): customize_dtypes=None, op_debug_config=None, memory_config=None, experimental_config=None, topo_sorting_mode=None, aoe_config_file=None, insert_op_file=None, stream_sync_timeout=-1, event_sync_timeout=-1, external_weight=False, es_cluster_config=None, deterministic=0, - frozen_variable=False, variable_placement="Device", jit_compile=None, graph_compiler_cache_dir=None, + frozen_variable=False, variable_placement="Device", jit_compile="Auto", graph_compiler_cache_dir=None, precision_mode_v2=None): class ProfilingConfig(): diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index 034dbd012..6f702bfaa 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -326,7 +326,7 @@ GeOp::GeOp(OpKernelConstruction *ctx) compute_graph_empty_(false), is_input_convert_(false), data_format_(""), graph_id_(0), is_initialized_graph_(false), need_iteration_(false), tf_session_(""), ge_session_(nullptr), job_type_(""), is_host_graph_(false), handle_(nullptr), need_compile_graph_first_(false), tuned_flag_(ATOMIC_FLAG_INIT), - jit_compile_(""), is_dynamic_input_(false), session_id_(0), aoe_initialize_(nullptr), + jit_compile_("2"), is_dynamic_input_(false), session_id_(0), aoe_initialize_(nullptr), aoe_finalize_(nullptr), aoe_create_session_(nullptr), aoe_destroy_session_(nullptr), aoe_set_gesession_(nullptr), aoe_set_dependgraphs_(nullptr), aoe_set_tuninggraph_(nullptr), aoe_tuning_graph_(nullptr), aoe_set_depend_graphs_inputs_(nullptr), aoe_set_tuning_graph_input_(nullptr) { @@ -689,7 +689,7 @@ bool GeOp::MaybeUpdateShape(OpKernelContext *const ctx) { << value_shape.DebugString(); if (jit_compile_ == "1") { shape = value_shape; - ADP_LOG(WARNING) << "Dynamic shape, recommended to configure jit_compile value to false"; + ADP_LOG(WARNING) << "Dynamic shape, recommended to configure jit_compile value to False or Auto"; } else { shape = MakeCompatShape(shape.value(), value_shape); } @@ -775,9 +775,6 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { if (dynamic_input_ != "1" && !is_set_dynamic_config) { bool shape_changed = MaybeUpdateShape(ctx); if (build_flag_ && shape_changed) { - if (jit_compile_.empty()) { - jit_compile_ = "0"; - } ge::Status status = ge_session_->RemoveGraph(graph_id_); if (status != ge::SUCCESS) { ADP_LOG(WARNING) << "[GEOP] GE remove graph failed, ret : " << ToString(status) << ", graph_id: " << graph_id_; @@ -944,9 +941,7 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { } SetDynamicInput(); graph_options_["ge.exec.isVarInitGraph"] = is_var_init_graph_; - if (!jit_compile_.empty()) { - graph_options_["ge.jit_compile"] = jit_compile_; - } + graph_options_["ge.jit_compile"] = jit_compile_; graph_options_["ge.exec.overflow"] = "1"; graph_options_["ge.graphLevelSat"] = (mix_compile_mode_ == "0") ? "1" : "0"; @@ -1331,7 +1326,6 @@ void GeOp::ProcessGetNextNode(const Node *node) { tensorflow::PartialTensorShape shape(shape_proto); if (!shape.IsFullyDefined()) { is_dynamic_shape = true; - if (jit_compile_.empty()) { jit_compile_ = "0"; } ADP_LOG(INFO) << "[GEOP]node: " + node->name() + " is_dynamic_shape come true."; } } @@ -1340,7 +1334,6 @@ void GeOp::ProcessGetNextNode(const Node *node) { for (auto i = 0; i < node->num_outputs(); i++) { if (type_attrs[i] == DT_STRING) { is_dynamic_shape = true; - if (jit_compile_.empty()) { jit_compile_ = "0"; } ADP_LOG(INFO) << "[GEOP]node: " + node->name() + "'s output_types include DT_STRING."; } } @@ -1414,8 +1407,8 @@ Status GeOp::BuildGraphDef(FunctionLibraryDefinition &flib_def, const std::vecto } HandleDpOpAndGetNextNodes(graph); - // 二进制场景, 如果shape变化,则更新输入shape - if (jit_compile_ == "0") { UpdateInputsShapeDesc(graph); } + // 二进制场景(jit=0 or jit=2), 如果shape变化,则更新输入shape + if (jit_compile_ != "1") { UpdateInputsShapeDesc(graph); } graph.ToGraphDef(&graph_def); std::string enable_force_v2_control; diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index 4d0c58bf0..f8d86b533 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -109,7 +109,7 @@ class NPURunConfig(run_config_lib.RunConfig): deterministic=0, frozen_variable=False, variable_placement="Device", - jit_compile=None, + jit_compile="Auto", graph_compiler_cache_dir=None, precision_mode_v2=None ): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 7efcd0824..b4a144d28 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -775,10 +775,9 @@ class NPUEstimator(estimator_lib.Estimator): custom_op.parameter_map["insert_op_file"].s = config.insert_op_file if config.es_cluster_config is not None: custom_op.parameter_map["es_cluster_config"].s = tf.compat.as_bytes(config.es_cluster_config) - if config._jit_compile is not None: - custom_op.parameter_map["jit_compile"].b = config._jit_compile if config._graph_compiler_cache_dir is not None: custom_op.parameter_map["graph_compiler_cache_dir"].s = tf.compat.as_bytes(config._graph_compiler_cache_dir) + custom_op.parameter_map["jit_compile"].s = tf.compat.as_bytes(config._jit_compile) custom_op.parameter_map["stream_sync_timeout"].i = config.stream_sync_timeout custom_op.parameter_map["event_sync_timeout"].i = config.event_sync_timeout custom_op.parameter_map["external_weight"].b = config._external_weight diff --git a/tf_adapter/tests/st/optimizers/testcase/get_attr_optimize_pass_test.cc b/tf_adapter/tests/st/optimizers/testcase/get_attr_optimize_pass_test.cc index 308d86526..bcc499b51 100644 --- a/tf_adapter/tests/st/optimizers/testcase/get_attr_optimize_pass_test.cc +++ b/tf_adapter/tests/st/optimizers/testcase/get_attr_optimize_pass_test.cc @@ -220,7 +220,7 @@ TEST_F(GetAttrOptimizationPassTest, SetAttrTest) { op_debug_level.set_i(0); (*custom_config->mutable_parameter_map())["op_debug_level"] = op_debug_level; AttrValue jit_compile = AttrValue(); - jit_compile.set_b(true); + jit_compile.set_s("True"); (*custom_config->mutable_parameter_map())["jit_compile"] = jit_compile; EXPECT_EQ(DoRunGetAttrOptimizationPassTest(session_options), target_graph); } diff --git a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc index 03db2503e..11324fba4 100644 --- a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc @@ -146,10 +146,14 @@ TEST_F(NpuAttrTest, CheckJitCompile) { options.session_options = &session_options; AttrValue jit_compile = AttrValue(); - jit_compile.set_b(true); + jit_compile.set_s("True"); (*custom_config->mutable_parameter_map())["jit_compile"] = jit_compile; Status s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); EXPECT_EQ(s.ok(), false); + jit_compile.set_s("False"); + (*custom_config->mutable_parameter_map())["jit_compile"] = jit_compile; + s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); + EXPECT_EQ(s.ok(), false); } TEST_F(NpuAttrTest, CheckVariablePlacement) { diff --git a/tf_adapter/tests/ut/optimizers/testcase/get_attr_optimize_pass_test.cc b/tf_adapter/tests/ut/optimizers/testcase/get_attr_optimize_pass_test.cc index 17820010f..fb26eae85 100644 --- a/tf_adapter/tests/ut/optimizers/testcase/get_attr_optimize_pass_test.cc +++ b/tf_adapter/tests/ut/optimizers/testcase/get_attr_optimize_pass_test.cc @@ -220,7 +220,7 @@ TEST_F(GetAttrOptimizationPassTest, SetAttrTest) { external_weight.set_b(true); (*custom_config->mutable_parameter_map())["external_weight"] = external_weight; AttrValue jit_compile = AttrValue(); - jit_compile.set_b(true); + jit_compile.set_s("True"); (*custom_config->mutable_parameter_map())["jit_compile"] = jit_compile; EXPECT_EQ(DoRunGetAttrOptimizationPassTest(session_options), target_graph); } diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 7fe14c5e3..1cb0041ef 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -370,6 +370,16 @@ void NpuAttrs::SetDatasetExecuteInDeviceStatus(const std::string &iterator_name, << " dataset_execute_info_: " << dataset_execute_info_[iterator_name]; } +std::string ConvertToGeJitValue(const std::string jit_compile) { + std::string ge_jit_compile = "2"; + if (jit_compile == "False") { + ge_jit_compile = "0"; + } else if (jit_compile == "True") { + ge_jit_compile = "1"; + } + return ge_jit_compile; +} + std::map NpuAttrs::GetSessOptions(const OpKernelConstruction *ctx) { std::map sess_options; std::string variable_format_optimize; @@ -1702,6 +1712,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options std::string variable_location = "Device"; std::string es_cluster_config; std::string graph_slice_mode; + std::string jit_compile; const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options(); for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) { @@ -2151,9 +2162,13 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options variable_location = params.at("variable_placement").s(); } if (params.count("jit_compile") > 0) { - bool jit_compile = params.at("jit_compile").b(); - sess_options["jit_compile"] = std::to_string(static_cast(jit_compile)); - sess_options["ge.jit_compile"] = std::to_string(static_cast(jit_compile)); + const static std::vector kJitCompileList = {"True", + "False", + "Auto"}; + NPU_REQUIRES_OK(CheckValueAllowed(params.at("jit_compile").s(), kJitCompileList)); + jit_compile = ConvertToGeJitValue(params.at("jit_compile").s()); + } else { + jit_compile = "2"; // 2 means Auto } if (params.count("graph_compiler_cache_dir") > 0) { graph_compiler_cache_dir = params.at("graph_compiler_cache_dir").s(); @@ -2214,6 +2229,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options sess_options["ge.exec.memoryOptimizationPolicy"] = memory_optimization_policy; sess_options["external_weight"] = std::to_string(static_cast(external_weight)); sess_options["ge.externalWeight"] = std::to_string(static_cast(external_weight)); + sess_options["jit_compile"] = jit_compile; + sess_options["ge.jit_compile"] = jit_compile; + init_options_["profiling_mode"] = std::to_string(static_cast(profiling_mode)); init_options_[ge::OPTION_EXEC_PROFILING_MODE] = std::to_string(static_cast(profiling_mode)); init_options_["profiling_options"] = profiling_options; diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index cdcce576e..41662b9db 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -157,6 +157,7 @@ bool CheckIsDistribute(std::map &global_options) { global_options.find(ge::OPTION_EXEC_CM_CHIEF_PORT) != global_options.end() && global_options.find(ge::OPTION_EXEC_CM_CHIEF_DEVICE) != global_options.end())); } + void ParseGlobalOptions(int device_index, const std::map &user_options, std::map &global_options) { for (const auto &option : user_options) { diff --git a/tf_adapter_2.x/npu_device/kernels/npu_call.cpp b/tf_adapter_2.x/npu_device/kernels/npu_call.cpp index 730e6a0cd..1bd6c29d2 100644 --- a/tf_adapter_2.x/npu_device/kernels/npu_call.cpp +++ b/tf_adapter_2.x/npu_device/kernels/npu_call.cpp @@ -47,6 +47,7 @@ class NpuCallOp : public OpKernel { auto status = std::unique_ptr(TF_NewStatus(), TF_DeleteStatus); OP_REQUIRES_OK(ctx, global::NpuCtx::GetDeviceCtx(device_id_, &context, &device)); if (device->device_options.find("ge.jit_compile") != device->device_options.end()) { + DLOG() << "device_options ge.jit_compile : " << device->device_options["ge.jit_compile"]; jit_compile_ = device->device_options["ge.jit_compile"]; } @@ -142,10 +143,8 @@ class NpuCallOp : public OpKernel { << value_shape.DebugString(); if (jit_compile_ == "1") { shape = value_shape; - DLOG() << "Dynamic shape, recommended to configure jit_compile value to false"; + DLOG() << "Dynamic shape, recommended to configure jit_compile value to False or Auto"; } else { - // 如果用户没有设置jit_compile,但是shape发生变化了,需要把jit设置成0 - jit_compile_ = "0"; shape = MakeCompatShape(shape.value(), value_shape); } DLOG() << "Refresh input " << i << " shape to " << shape.value().DebugString(); @@ -198,9 +197,6 @@ class NpuCallOp : public OpKernel { }(); NPU_REQUIRES_OK(status->status); static std::map kOptions; - if (!jit_compile_.empty()) { - kOptions.emplace("ge.jit_compile", jit_compile_); - } (void)device->AddGeGraph(context, graph_id_, attr_.name(), *graph_def_, status.get(), kOptions); NPU_REQUIRES_OK(status->status); loaded = true; @@ -238,7 +234,7 @@ class NpuCallOp : public OpKernel { std::unique_ptr graph_def_; std::vector args_; std::vector> input_shapes_; - std::string jit_compile_; + std::string jit_compile_{"2"}; }; REGISTER_KERNEL_BUILDER(Name("NpuCall").Device(DEVICE_CPU), NpuCallOp); diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index 804544815..1dda0adb1 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -67,7 +67,7 @@ class NpuConfig(NpuBaseConfig): self.event_sync_timeout = OptionValue(-1, None) self.external_weight = OptionValue(False, [True, False]) self.memory_config = MemoryConfig() - self.jit_compile = OptionValue(None, [True, False]) + self.jit_compile = OptionValue('Auto', ['True', 'False', 'Auto']) self.graph_compiler_cache_dir = OptionValue(None, None) # Configuration for experiment diff --git a/tf_adapter_2.x/python/npu_device/configs/option_base.py b/tf_adapter_2.x/python/npu_device/configs/option_base.py index 66b62b6d6..b95c71afd 100644 --- a/tf_adapter_2.x/python/npu_device/configs/option_base.py +++ b/tf_adapter_2.x/python/npu_device/configs/option_base.py @@ -43,6 +43,8 @@ class OptionValue: return "1" if str(self.__value) == str(False): return "0" + if str(self.__value) == "Auto": + return "2" return str(self.__value) @value.setter diff --git a/tf_adapter_2.x/tests/st/adapter2_jit_compile_st.py b/tf_adapter_2.x/tests/st/adapter2_jit_compile_st.py index 56ba29f00..6e9314d68 100644 --- a/tf_adapter_2.x/tests/st/adapter2_jit_compile_st.py +++ b/tf_adapter_2.x/tests/st/adapter2_jit_compile_st.py @@ -26,7 +26,7 @@ import npu_device from npu_device.npu_device import stupid_repeat from tensorflow.python.eager import context -npu_device.global_options().jit_compile = True +npu_device.global_options().jit_compile = "True" npu = npu_device.open().as_default() @@ -35,7 +35,7 @@ def tensor_equal(t1, t2): class Adapter2JitCompileSt(unittest.TestCase): - def test_mix_jit_compile_fuzz_compile(self): + def test_jit_compile_true(self): def gen(): v = [['1'], ['2', '3'], ['4', '5', '6']] while len(v): diff --git a/tf_adapter_2.x/tests/st/adapter2_options.py b/tf_adapter_2.x/tests/st/adapter2_options.py index 9e41787ad..e3d8ed9a3 100644 --- a/tf_adapter_2.x/tests/st/adapter2_options.py +++ b/tf_adapter_2.x/tests/st/adapter2_options.py @@ -136,5 +136,23 @@ class Adapter2Options(unittest.TestCase): options = config.as_dict() self.assertEqual(options['graph_slice'], "auto") + def test_10_set_jit_compile_option_error(self): + config = NpuConfig() + try: + config.jit_compile = "true" + except ValueError as e: + err = "'true' not in optional list ['True', 'False', 'Auto']" + self.assertEqual(err, str(e)) + try: + config.jit_compile = "false" + except ValueError as e: + err = "'false' not in optional list ['True', 'False', 'Auto']" + self.assertEqual(err, str(e)) + try: + config.jit_compile = "auto" + except ValueError as e: + err = "'auto' not in optional list ['True', 'False', 'Auto']" + self.assertEqual(err, str(e)) + if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/tf_adapter_2.x/tests/st/adapter2_st.py b/tf_adapter_2.x/tests/st/adapter2_st.py index 32872065d..43eb48b0c 100644 --- a/tf_adapter_2.x/tests/st/adapter2_st.py +++ b/tf_adapter_2.x/tests/st/adapter2_st.py @@ -29,6 +29,7 @@ import unittest import tensorflow as tf from tensorflow.python.eager import context +npu_device.global_options().jit_compile = "False" npu_device.global_options().is_tailing_optimization = True npu_device.global_options().experimental.multi_branches_config.input_shape = "data_0:-1" npu_device.global_options().experimental.multi_branches_config.dynamic_node_type = "0" -- Gitee