diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index f8dbb9460a8ee5701dc3b663c900b5b4c64a6144..a26565778126a3e8cd8f361857872e8d2ba3ffd7 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -23,7 +23,7 @@ class NPURunConfig(run_config_lib.RunConfig): frozen_variable=False, variable_placement="Device", jit_compile="auto", precision_mode_v2=None, ac_parallel_enable=None, quant_dumpable=None, input_fusion_size=131072, compile_dynamic_mode=None, execute_times=-1, graph_max_parallel_model_num=1, export_compile_stat=1, aicore_num=None, - oo_constant_folding=True, input_batch_cpy=False): + oo_constant_folding=True, input_batch_cpy=False, shape_generalization_mode="STRICT"): class ProfilingConfig(): def __init__(self, enable_profiling=False, profiling_options=None): diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index 45008c9a8136d0a74c272039ba1467faac6b3b6b..8ad0494acd69dd2437df1612cf975d6a4ec72184 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -347,6 +347,7 @@ class ExitCallbackGuarder { private: std::function done_; }; + } // namespace std::string CurrentTimeInStr() { @@ -427,6 +428,7 @@ void GeOp::Initialize(OpKernelConstruction *ctx) { } (void) ctx->GetAttr("_train_graph", &is_train_graph_); (void) ctx->GetAttr("_is_var_init_graph", &is_var_init_graph_); + (void) ctx->GetAttr("_shape_generalization_mode", &shape_generalization_mode_); ADP_LOG(INFO) << "[GEOP] dynamic_input: " << dynamic_input_ << ", dynamic_graph_execute_mode: " << dynamic_graph_execute_mode_ << ", jit_compile: " << jit_compile_ @@ -437,8 +439,21 @@ void GeOp::Initialize(OpKernelConstruction *ctx) { << ", is_var_init_graph: " << is_var_init_graph_ << ", use_counter_filter: " << use_counter_filter_ << ", max_key_num: " << max_key_num_ << ", embedding_dim: " << embedding_dim_ << ", padding_key: " << padding_key_ << ", embedding_flags: " << embedding_flags_ - << ", compile_dynamic_mode: " << compile_dynamic_mode_; + << ", compile_dynamic_mode: " << compile_dynamic_mode_ + << ", shape_generalization_mode: " << shape_generalization_mode_; + if (compile_dynamic_mode_ == "1" && shape_generalization_mode_ != "STRICT") { + ADP_LOG(WARNING) << "compile_dynamic_mode is 1, so shape_generalization_mode[" + << shape_generalization_mode_ << "] will be ignore, please set compile_dynamic_mode=0."; + } + if (jit_compile_ != "1" && shape_generalization_mode_ != "STRICT") { + LOG(WARNING) << "jit_compile[" << jit_compile_ << "] is not 1, so shape_generalization_mode[" + << shape_generalization_mode_ << "] will be ignore, please set jit_compile=1 " + << "and shape_generalization_mode=" << shape_generalization_mode_ << "."; + ADP_LOG(WARNING) << "jit_compile[" << jit_compile_ << "] is not 1, so shape_generalization_mode[" + << shape_generalization_mode_ << "] will be ignore, please set jit_compile=1 " + << "and shape_generalization_mode=" << shape_generalization_mode_ << "."; + } // global environment Initialize, invoke once for each process std::string sess_config = ""; OP_REQUIRES_OK(ctx, ctx->GetAttr("_NpuOptimizer", &sess_config)); @@ -938,6 +953,23 @@ PartialTensorShape GeOp::MakeCompatShape(const PartialTensorShape &a, const Part return MakeUnknownShape(b.dims()); } +PartialTensorShape GeOp::MakeAdaptiveShape(const PartialTensorShape &a, const PartialTensorShape &b) const { + const static auto kUnknownRankShape = PartialTensorShape(); + if (a.dims() != b.dims()) { + return kUnknownRankShape; + } + static constexpr int64 kUnknownDim = -1; + std::vector dims(a.dims(), kUnknownDim); + for (int32_t i = 0; i < a.dims(); i++) { + if (a.dim_size(i) == b.dim_size(i)) { + dims[i] = a.dim_size(i); + } + } + PartialTensorShape out_shape; + auto status = PartialTensorShape::MakePartialShape(dims.data(), static_cast(dims.size()), &out_shape); + return status.ok() ? out_shape : kUnknownRankShape; +} + void GeOp::InitGraphShape(OpKernelContext *const ctx) { mutex_lock lock{graph_handler_.graph_mu}; for (size_t i = 0UL; i < static_cast(ctx->num_inputs()); i++) { @@ -956,6 +988,8 @@ void GeOp::InitGraphShape(OpKernelContext *const ctx) { } bool GeOp::MaybeUpdateShape(OpKernelContext *const ctx) { + ADP_LOG(INFO) << "MaybeUpdateShape, compile_dynamic_mode: " << compile_dynamic_mode_ << ", jit_compile: " + << jit_compile_ << ", shape_generalization_mode: " << shape_generalization_mode_; bool updated = false; for (size_t i = 0UL; i < static_cast(ctx->num_inputs()); i++) { auto &shape = input_shapes_vec_[i]; @@ -964,9 +998,11 @@ bool GeOp::MaybeUpdateShape(OpKernelContext *const ctx) { ADP_LOG(INFO) << "Compat input " << i << " shape " << shape.value().DebugString() << " vs. " << value_shape.DebugString(); updated = true; - if ((jit_compile_ == "1") && (compile_dynamic_mode_ != "1")) { + if (compile_dynamic_mode_ != "1" && jit_compile_ == "1" && shape_generalization_mode_ == "STRICT") { shape = value_shape; ADP_LOG(WARNING) << "Dynamic shape, recommended to configure jit_compile value to false or auto"; + } else if (compile_dynamic_mode_ != "1" && jit_compile_ == "1" && shape_generalization_mode_ == "ADAPTIVE") { + shape = MakeAdaptiveShape(shape.value(), value_shape); } else { shape = MakeCompatShape(shape.value(), value_shape); } @@ -1818,8 +1854,12 @@ Status GeOp::BuildGraphDef(FunctionLibraryDefinition &flib_def, const std::vecto } HandleDpOpAndGetNextNodes(graph); - // 二进制场景(jit=0 or jit=2), 如果shape变化,则更新输入shape - if ((jit_compile_ != "1") || (compile_dynamic_mode_ == "1")) { UpdateInputsShapeDesc(graph); } + // 动态场景下需要将动态轴更新成-1,避免频繁触发编译 + if ((jit_compile_ != "1") || (compile_dynamic_mode_ == "1") || + (jit_compile_ == "1" && shape_generalization_mode_ != "STRICT")) { + ADP_LOG(INFO) << "[GEOP] UpdateInputsShapeDesc start."; + UpdateInputsShapeDesc(graph); + } graph.ToGraphDef(&graph_def); std::string enable_force_v2_control; diff --git a/tf_adapter/kernels/geop_npu.h b/tf_adapter/kernels/geop_npu.h index fc44c4022d27a9b879fd2a2c082cc52fdb0a8b0b..526847e3d54c81ffc2b8eb0463819af7ea44043b 100644 --- a/tf_adapter/kernels/geop_npu.h +++ b/tf_adapter/kernels/geop_npu.h @@ -189,6 +189,7 @@ public: bool IsDynamicConfig(); PartialTensorShape MakeCompatShape(const PartialTensorShape &a, const PartialTensorShape &b) const; + PartialTensorShape MakeAdaptiveShape(const PartialTensorShape &a, const PartialTensorShape &b) const; bool MaybeUpdateShape(OpKernelContext *const ctx); PartialTensorShape MakeUnknownShape(const int32_t &size) const; @@ -243,6 +244,7 @@ public: std::vector dynamic_shape_nodes_; std::string dynamic_input_; std::string compile_dynamic_mode_; + std::string shape_generalization_mode_{"STRICT"}; uint32_t graph_max_parallel_model_num_{1U}; std::string dynamic_graph_execute_mode_; std::string data_inputs_shape_range_; diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index 6068bc4d5321e7fdd42db9a53b9e05fc434e3fb2..e0e32ff6ce31fef0576fdb82316780bd1d76b459 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -120,7 +120,8 @@ class NPURunConfig(run_config_lib.RunConfig): export_compile_stat=1, aicore_num=None, oo_constant_folding=True, - input_batch_cpy=False + input_batch_cpy=False, + shape_generalization_mode="STRICT" ): """ Constructs a NPUConfig. @@ -193,6 +194,10 @@ class NPURunConfig(run_config_lib.RunConfig): exits (default); 2: Generated when graph compilation complete. oo_constant_folding: The switch of constant folding, False: disable; True(default): enable. input_batch_cpy: The switch of batch mem copy, False: disable; True(default): enable. + shape_generalization_mode: Work on jit_compile=1, jit_compile=0/2 will ignore this config. + STRICT: default, use the input shape; + FULL: full generalization; + ADAPTIVE: generalizes the varying axes. """ # Check iterations_per_loop. @@ -295,6 +300,7 @@ class NPURunConfig(run_config_lib.RunConfig): self._aicore_num = aicore_num self._oo_constant_folding = oo_constant_folding self._input_batch_cpy = input_batch_cpy + self._shape_generalization_mode = shape_generalization_mode super(NPURunConfig, self).__init__( model_dir=model_dir, tf_random_seed=tf_random_seed, diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 03031bde9c88bd9843a3062feecfa1ae1f5933ac..965965680206db50f4fe6fd64e12cf888caa6a98 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -851,6 +851,9 @@ class NPUEstimator(estimator_lib.Estimator): custom_op.parameter_map["frozen_variable"].b = config._frozen_variable custom_op.parameter_map["variable_placement"].s = tf.compat.as_bytes(config._variable_placement) custom_op.parameter_map["execute_times"].i = config.execute_times + if config._shape_generalization_mode is not None: + custom_op.parameter_map["shape_generalization_mode"].s = tf.compat.as_bytes( + config._shape_generalization_mode) self.__load_session_device_id(config, custom_op) self.__load_modify_mixlist(config, custom_op) diff --git a/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc index 330e3a61fba1267687ded6e7d7ef5c9bd50c94c4..28f90472d7113e3d2fbc930d0a8e549f4c80ce23 100644 --- a/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc +++ b/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc @@ -243,6 +243,28 @@ TEST_F(GeOpTest, GeOpCompileDynamicTest) { EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0").ok()); } +TEST_F(GeOpTest, GeOpShapeGeneralizationModeTest) { + NodeDef node_def; + std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_shape_generalization_mode.pbtxt"; + Tensor a(DT_INT32, TensorShape({2, 2})); + gtl::InlinedVector inputs{TensorValue(&a)}; + EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0").ok()); + Tensor b(DT_INT32, TensorShape({1, 2})); + gtl::InlinedVector inputs2{TensorValue(&b)}; + EXPECT_TRUE(GeOpRunGraphAsyncMultiStep(graph_def_path, {inputs2, inputs}, node_def, "GeOp1_0").ok()); +} + +TEST_F(GeOpTest, GeOpShapeGeneralizationModeIgnoreTest) { + NodeDef node_def; + std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_shape_generalization_mode_ignore.pbtxt"; + Tensor a(DT_INT32, TensorShape({2, 2})); + gtl::InlinedVector inputs{TensorValue(&a)}; + EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0").ok()); + Tensor b(DT_INT32, TensorShape({1, 2})); + gtl::InlinedVector inputs2{TensorValue(&b)}; + EXPECT_TRUE(GeOpRunGraphAsyncMultiStep(graph_def_path, {inputs2, inputs}, node_def, "GeOp1_0").ok()); +} + TEST_F(GeOpTest, GeDynamicConfigError) { NodeDef node_def; std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_config.pbtxt"; diff --git a/tf_adapter/tests/ut/kernels/pbtxt/geop_shape_generalization_mode.pbtxt b/tf_adapter/tests/ut/kernels/pbtxt/geop_shape_generalization_mode.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..887a72f15c95d43a873e4f69a82d98b8d95c1468 --- /dev/null +++ b/tf_adapter/tests/ut/kernels/pbtxt/geop_shape_generalization_mode.pbtxt @@ -0,0 +1,551 @@ +node { + name: "retval_Add_0_0" + op: "_Retval" + input: "GeOp1_0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "index" + value { + i: 0 + } + } +} +node { + name: "GeOp1_0" + op: "GeOp" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "Tin" + value { + list { + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_FLOAT + } + } + } + attr { + key: "_NpuOptimizer" + value { + s: "NpuOptimizer" + } + } + attr { + key: "_auto_tune_mode" + value { + s: "" + } + } + attr { + key: "_buffer_optimize" + value { + s: "l2_optimize" + } + } + attr { + key: "_compress_weight_conf" + value { + s: "" + } + } + attr { + key: "_debug_dir" + value { + s: "" + } + } + attr { + key: "_distribute_config" + value { + s: "" + } + } + attr { + key: "_do_npu_optimizer" + value { + s: "1" + } + } + attr { + key: "_dump_debug_mode" + value { + s: "all" + } + } + attr { + key: "_dump_mode" + value { + s: "output" + } + } + attr { + key: "_dump_path" + value { + s: "./" + } + } + attr { + key: "_dump_step" + value { + s: "1" + } + } + attr { + key: "_dynamic_dims" + value { + s: "" + } + } + attr { + key: "_dynamic_graph_execute_mode" + value { + s: "dynamic_execute" + } + } + attr { + key: "_max_key_num" + value { + s: "1" + } + } + attr { + key: "_embedding_dim" + value { + s: "1" + } + } + attr { + key: "_use_counter_filter" + value { + s: "1" + } + } + attr { + key: "_dynamic_input" + value { + s: "0" + } + } + attr { + key: "_jit_compile" + value { + s: "1" + } + } + attr { + key: "_shape_generalization_mode" + value { + s: "ADAPTIVE" + } + } + attr { + key: "_dynamic_node_type" + value { + s: "" + } + } + attr { + key: "_enable_compress_weight" + value { + s: "0" + } + } + attr { + key: "_enable_data_pre_proc" + value { + s: "0" + } + } + attr { + key: "_enable_dump" + value { + s: "1" + } + } + attr { + key: "_enable_dump_debug" + value { + s: "1" + } + } + attr { + key: "_enable_exception_dump" + value { + s: "" + } + } + attr { + key: "_enable_scope_fusion_passes" + value { + s: "" + } + } + attr { + key: "_enable_small_channel" + value { + s: "0" + } + } + attr { + key: "_fusion_switch_file" + value { + s: "" + } + } + attr { + key: "_graph_run_mode" + value { + s: "1" + } + } + attr { + key: "_hcom_multi_mode" + value { + s: "" + } + } + attr { + key: "_hcom_parallel" + value { + s: "0" + } + } + attr { + key: "_in_out_pair" + value { + s: "" + } + } + attr { + key: "_in_out_pair_flag" + value { + s: "1" + } + } + attr { + key: "_input_shape" + value { + s: "" + } + } + attr { + key: "_is_tailing_optimization" + value { + s: "0" + } + } + attr { + key: "_iterations_per_loop" + value { + s: "1" + } + } + attr { + key: "_job" + value { + s: "localhost" + } + } + attr { + key: "_local_device_list" + value { + s: "" + } + } + attr { + key: "_local_rank_id" + value { + s: "-1" + } + } + attr { + key: "_lower_functional_ops" + value { + s: "0" + } + } + attr { + key: "_mix_compile_mode" + value { + s: "0" + } + } + attr { + key: "_mstune_mode" + value { + s: "" + } + } + attr { + key: "_op_compiler_cache_dir" + value { + s: "" + } + } + attr { + key: "_op_compiler_cache_mode" + value { + s: "" + } + } + attr { + key: "_op_debug_level" + value { + s: "0" + } + } + attr { + key: "_op_select_implmode" + value { + s: "" + } + } + attr { + key: "_op_tune_mode" + value { + s: "" + } + } + attr { + key: "_optypelist_for_implmode" + value { + s: "" + } + } + attr { + key: "_precision_mode" + value { + s: "" + } + } + attr { + key: "_profiling_mode" + value { + s: "0" + } + } + attr { + key: "_profiling_options" + value { + s: "" + } + } + attr { + key: "_session_device_id" + value { + s: "" + } + } + attr { + key: "_stream_max_parallel_num" + value { + s: "" + } + } + attr { + key: "_task_index" + value { + s: "0" + } + } + attr { + key: "_use_off_line" + value { + s: "1" + } + } + attr { + key: "_variable_format_optimize" + value { + s: "1" + } + } + attr { + key: "_work_path" + value { + s: "" + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } + attr { + key: "function" + value { + func { + name: "GeOp1_0" + } + } + } +} +library { + function { + signature { + name: "GeOp1_0" + output_arg { + name: "Add_0_retval" + type: DT_FLOAT + } + } + node_def { + name: "Const_1" + op: "Const" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_NpuOptimizer" + value { + s: "NpuOptimizer" + } + } + attr { + key: "_iterations_per_loop" + value { + s: "1" + } + } + attr { + key: "_job" + value { + s: "localhost" + } + } + attr { + key: "_mix_compile_mode" + value { + s: "0" + } + } + attr { + key: "_task_index" + value { + s: "0" + } + } + attr { + key: "_use_off_line" + value { + s: "1" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000 A\000\000 A" + } + } + } + } + node_def { + name: "Variable" + op: "VariableV2" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_class" + value { + list { + s: "loc:@Variable/read" + } + } + } + attr { + key: "_var_format" + value { + s: "4D" + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } + } + node_def { + name: "Variable/read" + op: "Identity" + input: "Variable:ref:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_var_format" + value { + s: "4D" + } + } + } + node_def { + name: "Add" + op: "Add" + input: "Const_1:output:0" + input: "Variable/read:output:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + ret { + key: "Add_0_retval" + value: "Add:z:0" + } + } +} +versions { + producer: 134 +} diff --git a/tf_adapter/tests/ut/kernels/pbtxt/geop_shape_generalization_mode_ignore.pbtxt b/tf_adapter/tests/ut/kernels/pbtxt/geop_shape_generalization_mode_ignore.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..ae675689d5eeb7b5deaddca91c2dcd40c081d163 --- /dev/null +++ b/tf_adapter/tests/ut/kernels/pbtxt/geop_shape_generalization_mode_ignore.pbtxt @@ -0,0 +1,557 @@ +node { + name: "retval_Add_0_0" + op: "_Retval" + input: "GeOp1_0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "index" + value { + i: 0 + } + } +} +node { + name: "GeOp1_0" + op: "GeOp" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "Tin" + value { + list { + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_FLOAT + } + } + } + attr { + key: "_NpuOptimizer" + value { + s: "NpuOptimizer" + } + } + attr { + key: "_auto_tune_mode" + value { + s: "" + } + } + attr { + key: "_buffer_optimize" + value { + s: "l2_optimize" + } + } + attr { + key: "_compress_weight_conf" + value { + s: "" + } + } + attr { + key: "_debug_dir" + value { + s: "" + } + } + attr { + key: "_distribute_config" + value { + s: "" + } + } + attr { + key: "_do_npu_optimizer" + value { + s: "1" + } + } + attr { + key: "_dump_debug_mode" + value { + s: "all" + } + } + attr { + key: "_dump_mode" + value { + s: "output" + } + } + attr { + key: "_dump_path" + value { + s: "./" + } + } + attr { + key: "_dump_step" + value { + s: "1" + } + } + attr { + key: "_dynamic_dims" + value { + s: "" + } + } + attr { + key: "_dynamic_graph_execute_mode" + value { + s: "dynamic_execute" + } + } + attr { + key: "_max_key_num" + value { + s: "1" + } + } + attr { + key: "_embedding_dim" + value { + s: "1" + } + } + attr { + key: "_use_counter_filter" + value { + s: "1" + } + } + attr { + key: "_dynamic_input" + value { + s: "0" + } + } + attr { + key: "_compile_dynamic_mode" + value { + s: "1" + } + } + attr { + key: "_jit_compile" + value { + s: "0" + } + } + attr { + key: "_shape_generalization_mode" + value { + s: "FULL" + } + } + attr { + key: "_dynamic_node_type" + value { + s: "" + } + } + attr { + key: "_enable_compress_weight" + value { + s: "0" + } + } + attr { + key: "_enable_data_pre_proc" + value { + s: "0" + } + } + attr { + key: "_enable_dump" + value { + s: "1" + } + } + attr { + key: "_enable_dump_debug" + value { + s: "1" + } + } + attr { + key: "_enable_exception_dump" + value { + s: "" + } + } + attr { + key: "_enable_scope_fusion_passes" + value { + s: "" + } + } + attr { + key: "_enable_small_channel" + value { + s: "0" + } + } + attr { + key: "_fusion_switch_file" + value { + s: "" + } + } + attr { + key: "_graph_run_mode" + value { + s: "1" + } + } + attr { + key: "_hcom_multi_mode" + value { + s: "" + } + } + attr { + key: "_hcom_parallel" + value { + s: "0" + } + } + attr { + key: "_in_out_pair" + value { + s: "" + } + } + attr { + key: "_in_out_pair_flag" + value { + s: "1" + } + } + attr { + key: "_input_shape" + value { + s: "" + } + } + attr { + key: "_is_tailing_optimization" + value { + s: "0" + } + } + attr { + key: "_iterations_per_loop" + value { + s: "1" + } + } + attr { + key: "_job" + value { + s: "localhost" + } + } + attr { + key: "_local_device_list" + value { + s: "" + } + } + attr { + key: "_local_rank_id" + value { + s: "-1" + } + } + attr { + key: "_lower_functional_ops" + value { + s: "0" + } + } + attr { + key: "_mix_compile_mode" + value { + s: "0" + } + } + attr { + key: "_mstune_mode" + value { + s: "" + } + } + attr { + key: "_op_compiler_cache_dir" + value { + s: "" + } + } + attr { + key: "_op_compiler_cache_mode" + value { + s: "" + } + } + attr { + key: "_op_debug_level" + value { + s: "0" + } + } + attr { + key: "_op_select_implmode" + value { + s: "" + } + } + attr { + key: "_op_tune_mode" + value { + s: "" + } + } + attr { + key: "_optypelist_for_implmode" + value { + s: "" + } + } + attr { + key: "_precision_mode" + value { + s: "" + } + } + attr { + key: "_profiling_mode" + value { + s: "0" + } + } + attr { + key: "_profiling_options" + value { + s: "" + } + } + attr { + key: "_session_device_id" + value { + s: "" + } + } + attr { + key: "_stream_max_parallel_num" + value { + s: "" + } + } + attr { + key: "_task_index" + value { + s: "0" + } + } + attr { + key: "_use_off_line" + value { + s: "1" + } + } + attr { + key: "_variable_format_optimize" + value { + s: "1" + } + } + attr { + key: "_work_path" + value { + s: "" + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } + attr { + key: "function" + value { + func { + name: "GeOp1_0" + } + } + } +} +library { + function { + signature { + name: "GeOp1_0" + output_arg { + name: "Add_0_retval" + type: DT_FLOAT + } + } + node_def { + name: "Const_1" + op: "Const" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_NpuOptimizer" + value { + s: "NpuOptimizer" + } + } + attr { + key: "_iterations_per_loop" + value { + s: "1" + } + } + attr { + key: "_job" + value { + s: "localhost" + } + } + attr { + key: "_mix_compile_mode" + value { + s: "0" + } + } + attr { + key: "_task_index" + value { + s: "0" + } + } + attr { + key: "_use_off_line" + value { + s: "1" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000 A\000\000 A" + } + } + } + } + node_def { + name: "Variable" + op: "VariableV2" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_class" + value { + list { + s: "loc:@Variable/read" + } + } + } + attr { + key: "_var_format" + value { + s: "4D" + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } + } + node_def { + name: "Variable/read" + op: "Identity" + input: "Variable:ref:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_var_format" + value { + s: "4D" + } + } + } + node_def { + name: "Add" + op: "Add" + input: "Const_1:output:0" + input: "Variable/read:output:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + ret { + key: "Add_0_retval" + value: "Add:z:0" + } + } +} +versions { + producer: 134 +} diff --git a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc index 425d3685e1290d1711d48ba23ef7358c9279a86e..36db97eecca2836256e3f26c550e668920d17503 100644 --- a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc +++ b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc @@ -773,6 +773,28 @@ TEST_F(GeOpTest, GeOpDynamicTest) { EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs2, node_def, "GeOp1_0").ok()); } +TEST_F(GeOpTest, GeOpShapeGeneralizationModeTest) { + NodeDef node_def; + std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_shape_generalization_mode.pbtxt"; + Tensor a(DT_INT32, TensorShape({2, 2})); + gtl::InlinedVector inputs{TensorValue(&a)}; + EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0").ok()); + Tensor b(DT_INT32, TensorShape({1, 2})); + gtl::InlinedVector inputs2{TensorValue(&b)}; + EXPECT_TRUE(GeOpRunGraphAsyncMultiStep(graph_def_path, {inputs2, inputs}, node_def, "GeOp1_0").ok()); +} + +TEST_F(GeOpTest, GeOpShapeGeneralizationModeIgnoreTest) { + NodeDef node_def; + std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_shape_generalization_mode_ignore.pbtxt"; + Tensor a(DT_INT32, TensorShape({2, 2})); + gtl::InlinedVector inputs{TensorValue(&a)}; + EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0").ok()); + Tensor b(DT_INT32, TensorShape({1, 2})); + gtl::InlinedVector inputs2{TensorValue(&b)}; + EXPECT_TRUE(GeOpRunGraphAsyncMultiStep(graph_def_path, {inputs2, inputs}, node_def, "GeOp1_0").ok()); +} + TEST_F(GeOpTest, GeOpTestEndOfSequence) { NodeDef node_def; std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop.pbtxt"; diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index e91b5446c75a4ceca10128d220e59d6d3cfdf8b8..cc0081830169aa879bee599d8f360a7fcf07cf25 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -491,8 +491,10 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr std::string graph_slice_mode; std::string input_fusion_size = "131072"; std::string compile_dynamic_mode; + std::string shape_generalization_mode = "STRICT"; std::string graph_max_parallel_model_num = "1"; std::string input_batch_cpy; + std::string jit_compile; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { (void) ctx->GetAttr("_variable_format_optimize", &variable_format_optimize); (void) ctx->GetAttr("_hcom_parallel", &hcom_parallel); @@ -562,8 +564,8 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr (void) ctx->GetAttr("_enable_graph_parallel", &enable_graph_parallel); (void) ctx->GetAttr("_graph_slice", &graph_slice_mode); (void) ctx->GetAttr("_compile_dynamic_mode", &compile_dynamic_mode); + (void) ctx->GetAttr("_shape_generalization_mode", &shape_generalization_mode); (void) ctx->GetAttr("_graph_max_parallel_model_num", &graph_max_parallel_model_num); - std::string jit_compile; if (ctx->GetAttr("_jit_compile", &jit_compile).ok()) { sess_options["jit_compile"] = jit_compile; sess_options["ge.jit_compile"] = jit_compile; @@ -600,7 +602,13 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr sess_options[ge::GRAPH_MAX_PARALLEL_MODEL_NUM] = graph_max_parallel_model_num; sess_options["ge.inputShape"] = input_shape; sess_options["ge.dynamicDims"] = dynamic_dims; - sess_options["ge.compile_dynamic_mode"] = compile_dynamic_mode; + // 如果compile_dynamic_mode为0, jit_compile=1, shape_generalization_mode!=STRICT, 需要将ge.compile_dynamic_mode设置为1 + if ((compile_dynamic_mode == "0" && jit_compile != "1") || + (compile_dynamic_mode == "0" && jit_compile == "1" && shape_generalization_mode != "STRICT")) { + sess_options["ge.compile_dynamic_mode"] = "1"; + } else { + sess_options["ge.compile_dynamic_mode"] = compile_dynamic_mode; + } sess_options["ge.bufferOptimize"] = buffer_optimize; sess_options["ge.enableSmallChannel"] = enable_small_channel; sess_options["ge.fusionSwitchFile"] = fusion_switch_file; @@ -846,6 +854,7 @@ std::map NpuAttrs::GetPassOptions(const GraphOptimizat std::string const_input; bool frozen_variable = false; std::string variable_location = "Device"; + std::string shape_generalization_mode = "STRICT"; for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) { if (custom_optimizer.name() == "NpuOptimizer") { @@ -925,6 +934,9 @@ std::map NpuAttrs::GetPassOptions(const GraphOptimizat LOG(FATAL) << s.error_message(); } } + if (params.count("shape_generalization_mode")) { + shape_generalization_mode = params.at("shape_generalization_mode").s(); + } } } if (!do_npu_optimizer) { @@ -957,6 +969,7 @@ std::map NpuAttrs::GetPassOptions(const GraphOptimizat pass_options["const_input"] = const_input; pass_options["frozen_variable"] = std::to_string(static_cast(frozen_variable)); pass_options["variable_location"] = variable_location; + pass_options["shape_generalization_mode"] = shape_generalization_mode; return pass_options; } @@ -983,6 +996,7 @@ std::map NpuAttrs::GetPassOptions(const OpKernelConstr std::string variable_location = "Device"; std::string frozen_variable = "0"; std::string accelerate_train_mode; + std::string shape_generalization_mode = "STRICT"; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { do_npu_optimizer = "1"; @@ -1007,6 +1021,7 @@ std::map NpuAttrs::GetPassOptions(const OpKernelConstr (void) ctx->GetAttr("_variable_location", &variable_location); (void) ctx->GetAttr("_accelerate_train_mode", &accelerate_train_mode); (void) ctx->GetAttr("_graph_max_parallel_model_num", &graph_max_parallel_model_num); + (void) ctx->GetAttr("_shape_generalization_mode", &shape_generalization_mode); } // pass options pass_options["do_npu_optimizer"] = do_npu_optimizer; @@ -1028,6 +1043,7 @@ std::map NpuAttrs::GetPassOptions(const OpKernelConstr pass_options["frozen_variable"] = frozen_variable; pass_options["variable_location"] = variable_location; pass_options["accelerate_train_mode"] = accelerate_train_mode; + pass_options["shape_generalization_mode"] = shape_generalization_mode; return pass_options; } @@ -1245,6 +1261,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string aicore_num; std::string oo_constant_folding; std::string input_batch_cpy; + std::string shape_generalization_mode = "STRICT"; auto NpuOptimizer_value = attrs.Find("_NpuOptimizer"); auto enable_data_pre_proc_value = attrs.Find("_enable_data_pre_proc"); @@ -1346,6 +1363,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto aicore_num_value = attrs.Find("_aicore_num"); auto oo_constant_folding_value = attrs.Find("_oo_constant_folding"); auto input_batch_cpy_value = attrs.Find("_input_batch_cpy"); + auto shape_generalization_mode_value = attrs.Find("_shape_generalization_mode"); if (NpuOptimizer_value != nullptr) { do_npu_optimizer = "1"; if (enable_data_pre_proc_value != nullptr) { @@ -1374,7 +1392,6 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (compile_dynamic_mode_value != nullptr) { compile_dynamic_mode = compile_dynamic_mode_value->s(); } - if (task_index_value != nullptr) { task_index = task_index_value->s(); } @@ -1670,6 +1687,9 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (input_batch_cpy_value != nullptr) { input_batch_cpy = input_batch_cpy_value->s(); } + if (shape_generalization_mode_value != nullptr) { + shape_generalization_mode = shape_generalization_mode_value->s(); + } } all_options["variable_format_optimize"] = variable_format_optimize; @@ -1796,6 +1816,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & // input_batch_cpy all_options["input_batch_cpy"] = input_batch_cpy; all_options["ge.inputBatchCpy"] = input_batch_cpy; + all_options["shape_generalization_mode"] = shape_generalization_mode; return all_options; } @@ -1924,6 +1945,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options std::string aicore_num; bool oo_constant_folding = true; bool input_batch_cpy = false; + std::string shape_generalization_mode = "STRICT"; const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options(); for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) { if (custom_optimizer.name() == "NpuOptimizer") { @@ -2516,6 +2538,17 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options } else { jit_compile = "2"; // 2 means auto } + if (params.count("shape_generalization_mode") > 0) { + const static std::vector kShapeGeneralizationModeList = {"STRICT", "FULL", "ADAPTIVE"}; + NPU_REQUIRES(params.at("shape_generalization_mode").value_case() == params.at("shape_generalization_mode").kS, + errors::InvalidArgument( + "The data type of shape_generalization_mode is invalid. Expected string type.")); + NPU_REQUIRES_OK(CheckValueAllowed( + "shape_generalization_mode", params.at("shape_generalization_mode").s(), kShapeGeneralizationModeList)); + shape_generalization_mode = params.at("shape_generalization_mode").s(); + } else { + shape_generalization_mode = "STRICT"; + } if (params.count("graph_compiler_cache_dir") > 0) { graph_compiler_cache_dir = params.at("graph_compiler_cache_dir").s(); } @@ -2683,6 +2716,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options pass_options["frozen_variable"] = std::to_string(static_cast(frozen_variable)); pass_options["variable_location"] = variable_location; pass_options["accelerate_train_mode"] = accelerate_train_mode; + pass_options["shape_generalization_mode"] = shape_generalization_mode; for (const auto &option : sess_options) { std::string attr_name = std::string("_") + option.first; diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index 18fb0e09b9d7e5541743141ce6cfc78b1b011904..f22c6db0480769b2292be9f911af9fe002949801 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -97,6 +97,7 @@ const std::map kGlobalConfigOptions = { {"aicore_num", "ge.aicoreNum"}, {"oo_constant_folding", "ge.oo.constantFolding"}, {"input_batch_cpy", "ge.inputBatchCpy"}, + {"shape_generalization_mode", "shape_generalization_mode"}, // private options {"_distribute.rank_id", ge::OPTION_EXEC_RANK_ID}, {"_distribute.rank_table", ge::OPTION_EXEC_RANK_TABLE_FILE}, diff --git a/tf_adapter_2.x/npu_device/kernels/npu_call.cpp b/tf_adapter_2.x/npu_device/kernels/npu_call.cpp index 30b86e5aa6dddd32dc456cc62be71a7a29babcba..1c0f9ff0136900a96d634c21ca1ed809d4d98757 100644 --- a/tf_adapter_2.x/npu_device/kernels/npu_call.cpp +++ b/tf_adapter_2.x/npu_device/kernels/npu_call.cpp @@ -55,6 +55,23 @@ class NpuCallOp : public OpKernel { device->device_options["ge.compile_dynamic_mode"]; compile_dynamic_mode_ = device->device_options["ge.compile_dynamic_mode"]; } + if (device->device_options.find("shape_generalization_mode") != device->device_options.end()) { + DLOG() << "device_options shape_generalization_mode : " << + device->device_options["shape_generalization_mode"]; + shape_generalization_mode_ = device->device_options["shape_generalization_mode"]; + } + if (compile_dynamic_mode_ == "1" && shape_generalization_mode_ != "STRICT") { + DLOG() << "compile_dynamic_mode is 1, shape_generalization_mode[" + << shape_generalization_mode_ << "] will be ignore, please set compile_dynamic_mode=0."; + } + if (jit_compile_ != "1" && shape_generalization_mode_ != "STRICT") { + LOG(WARNING) << "jit_compile[" << jit_compile_ << "] is not 1, so shape_generalization_mode[" + << shape_generalization_mode_ << "] will be ignore, please set jit_compile=1 " + << "and shape_generalization_mode=" << shape_generalization_mode_ << "."; + DLOG() << "jit_compile[" << jit_compile_ << "] is not 1, shape_generalization_mode[" + << shape_generalization_mode_ << "] will be ignore, please set jit_compile=1 " + << "and shape_generalization_mode=" << shape_generalization_mode_ << "."; + } bool loaded = false; OP_REQUIRES_OK(ctx, Build(ctx, loaded)); @@ -128,6 +145,8 @@ class NpuCallOp : public OpKernel { } bool MaybeUpdateShape(const OpKernelContext *const ctx) { + DLOG() << "MaybeUpdateShape, compile_dynamic_mode: " << compile_dynamic_mode_ << ", jit_compile: " + << jit_compile_ << ", shape_generalization_mode: " << shape_generalization_mode_; bool updated = false; for (size_t i = 0UL; i < static_cast(ctx->num_inputs()); i++) { auto &shape = input_shapes_[i]; @@ -149,9 +168,11 @@ class NpuCallOp : public OpKernel { updated = true; DLOG() << "Compat input " << i << " shape " << shape.value().DebugString() << " vs. " << value_shape.DebugString(); - if ((jit_compile_ == "1") && (compile_dynamic_mode_ != "1")) { + if (compile_dynamic_mode_ != "1" && jit_compile_ == "1" && shape_generalization_mode_ == "STRICT") { shape = value_shape; DLOG() << "Dynamic shape, recommended to configure jit_compile value to false or auto"; + } else if (compile_dynamic_mode_ != "1" && jit_compile_ == "1" && shape_generalization_mode_ == "ADAPTIVE") { + shape = MakeAdaptiveShape(shape.value(), value_shape); } else { shape = MakeCompatShape(shape.value(), value_shape); } @@ -220,7 +241,22 @@ class NpuCallOp : public OpKernel { } return MakeUnknownShape(b.dims()); } - + static PartialTensorShape MakeAdaptiveShape(const PartialTensorShape &a, const PartialTensorShape &b) { + const static auto kUnknownRankShape = PartialTensorShape(); + if (a.dims() != b.dims()) { + return kUnknownRankShape; + } + static constexpr int64 kUnknownDim = -1; + std::vector dims(a.dims(), kUnknownDim); + for (int32_t i = 0; i < a.dims(); i++) { + if (a.dim_size(i) == b.dim_size(i)) { + dims[i] = a.dim_size(i); + } + } + PartialTensorShape out_shape; + auto status = PartialTensorShape::MakePartialShape(dims.data(), static_cast(dims.size()), &out_shape); + return status.ok() ? out_shape : kUnknownRankShape; + } static PartialTensorShape MakeUnknownShape(const int32_t &size) { const static auto kUnknownRankShape = PartialTensorShape(); static constexpr int64 kUnknownDim = -1; @@ -245,6 +281,7 @@ class NpuCallOp : public OpKernel { std::vector> input_shapes_; std::string jit_compile_{"2"}; std::string compile_dynamic_mode_{"0"}; + std::string shape_generalization_mode_{"STRICT"}; }; REGISTER_KERNEL_BUILDER(Name("NpuCall").Device(DEVICE_CPU), NpuCallOp); diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index 9e448c78a2d7a94622be247f6fb4c29e223ab0c8..72a6c9f0ef0438095d6837888118bfeda4f85f83 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -83,5 +83,6 @@ class NpuConfig(NpuBaseConfig): self.aicore_num = OptionValue(None, None) self.oo_constant_folding = OptionValue(True, [True, False]) self.input_batch_cpy = OptionValue(False, [True, False]) + self.shape_generalization_mode = OptionValue("STRICT", ["STRICT", "FULL", "ADAPTIVE"]) super(NpuConfig, self).__init__() diff --git a/tf_adapter_2.x/tests/st/CMakeLists.txt b/tf_adapter_2.x/tests/st/CMakeLists.txt index 7325a8b9f9d658024ac14ed715a7ca34d38b10cb..3e30fb5c2ef01e1aadb5ed04a194999afc6c8835 100644 --- a/tf_adapter_2.x/tests/st/CMakeLists.txt +++ b/tf_adapter_2.x/tests/st/CMakeLists.txt @@ -3,6 +3,8 @@ file(COPY ${CMAKE_CURRENT_LIST_DIR}/adapter2_st.py DESTINATION ${CMAKE_BINARY_DI file(COPY ${CMAKE_CURRENT_LIST_DIR}/adapter2_aoe_st.py DESTINATION ${CMAKE_BINARY_DIR}/dist/python) file(COPY ${CMAKE_CURRENT_LIST_DIR}/adapter2_jit_compile_st.py DESTINATION ${CMAKE_BINARY_DIR}/dist/python) file(COPY ${CMAKE_CURRENT_LIST_DIR}/adapter2_compile_dynamic_mode_st.py DESTINATION ${CMAKE_BINARY_DIR}/dist/python) +file(COPY ${CMAKE_CURRENT_LIST_DIR}/adapter2_shape_generalization_mode_st.py DESTINATION ${CMAKE_BINARY_DIR}/dist/python) +file(COPY ${CMAKE_CURRENT_LIST_DIR}/adapter2_shape_generalization_mode_ignore_st.py DESTINATION ${CMAKE_BINARY_DIR}/dist/python) file(COPY ${CMAKE_CURRENT_LIST_DIR}/../stub/hccl DESTINATION ${CMAKE_BINARY_DIR}/dist/python) file(COPY ${CMAKE_CURRENT_LIST_DIR}/adapter2_options.py DESTINATION ${CMAKE_BINARY_DIR}/dist/python) @@ -24,6 +26,10 @@ add_custom_target(adapter2_st ALL COMMAND lcov -c -q -d . -o st_p5.coverage && lcov -q -e st_p5.coverage "*npu_device*" -o st_p5.coverage COMMAND export NPU_DEBUG=true && export NPU_DUMP_GRAPH=true && cd ${CMAKE_BINARY_DIR}/dist/python/ && ${PYTHON_BIN_PATH} -m unittest adapter2_compile_dynamic_mode_st COMMAND lcov -c -q -d . -o st_p6.coverage && lcov -q -e st_p6.coverage "*npu_device*" -o st_p6.coverage - COMMAND lcov -o st.coverage -a st_p1.coverage -a st_p2.coverage -a st_p3.coverage -a st_p4.coverage -a st_p5.coverage -a st_p6.coverage + COMMAND export NPU_DEBUG=true && export NPU_DUMP_GRAPH=true && cd ${CMAKE_BINARY_DIR}/dist/python/ && ${PYTHON_BIN_PATH} -m unittest adapter2_shape_generalization_mode_st + COMMAND lcov -c -q -d . -o st_p7.coverage && lcov -q -e st_p7.coverage "*npu_device*" -o st_p7.coverage + COMMAND export NPU_DEBUG=true && export NPU_DUMP_GRAPH=true && cd ${CMAKE_BINARY_DIR}/dist/python/ && ${PYTHON_BIN_PATH} -m unittest adapter2_shape_generalization_mode_ignore_st + COMMAND lcov -c -q -d . -o st_p8.coverage && lcov -q -e st_p7.coverage "*npu_device*" -o st_p8.coverage + COMMAND lcov -o st.coverage -a st_p1.coverage -a st_p2.coverage -a st_p3.coverage -a st_p4.coverage -a st_p5.coverage -a st_p6.coverage -a st_p7.coverage -a st_p8.coverage DEPENDS _npu_ops _npu_device_backends aoe_tuning VERBATIM) diff --git a/tf_adapter_2.x/tests/st/adapter2_shape_generalization_mode_ignore_st.py b/tf_adapter_2.x/tests/st/adapter2_shape_generalization_mode_ignore_st.py new file mode 100644 index 0000000000000000000000000000000000000000..fab613002f2703645dbd0c3f150c571953eb30f8 --- /dev/null +++ b/tf_adapter_2.x/tests/st/adapter2_shape_generalization_mode_ignore_st.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import time + +import unittest +import tensorflow as tf +import npu_device + +from npu_device.npu_device import stupid_repeat +from tensorflow.python.eager import context + +npu_device.global_options().compile_dynamic_mode = True +npu_device.global_options().jit_compile = "false" +npu_device.global_options().shape_generalization_mode = "FULL" +npu = npu_device.open().as_default() + + +def tensor_equal(t1, t2): + return True + + +class Adapter2ShapeGeneralizationModeIgnoreSt(unittest.TestCase): + def test_shape_generalization_mode_true(self): + def gen(): + v = [['1'], ['2', '3'], ['4', '5', '6']] + while len(v): + yield v.pop(0) + + ds = tf.data.Dataset.from_generator(gen, output_types=tf.string) + iterator = iter(ds) + + @tf.function + def f(it): + v = next(it) + v = tf.strings.to_number(v) + return v + v + + self.assertTrue(tensor_equal(f(iterator), tf.constant([2.0]))) + self.assertTrue(tensor_equal(f(iterator), tf.constant([4.0, 6.0]))) + self.assertTrue(tensor_equal(f(iterator), tf.constant([8.0, 10.0, 12.0]))) + +if __name__ == '__main__': + unittest.main() diff --git a/tf_adapter_2.x/tests/st/adapter2_shape_generalization_mode_st.py b/tf_adapter_2.x/tests/st/adapter2_shape_generalization_mode_st.py new file mode 100644 index 0000000000000000000000000000000000000000..057844703ab68f6bac62d6bc8fb064ccc76ad6da --- /dev/null +++ b/tf_adapter_2.x/tests/st/adapter2_shape_generalization_mode_st.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import time + +import unittest +import tensorflow as tf +import npu_device + +from npu_device.npu_device import stupid_repeat +from tensorflow.python.eager import context + +npu_device.global_options().jit_compile = "true" +npu_device.global_options().shape_generalization_mode = "ADAPTIVE" +npu = npu_device.open().as_default() + + +def tensor_equal(t1, t2): + return True + + +class Adapter2ShapeGeneralizationModeSt(unittest.TestCase): + def test_shape_generalization_mode_true(self): + def gen(): + v = [['1'], ['2', '3'], ['4', '5', '6']] + while len(v): + yield v.pop(0) + + ds = tf.data.Dataset.from_generator(gen, output_types=tf.string) + iterator = iter(ds) + + @tf.function + def f(it): + v = next(it) + v = tf.strings.to_number(v) + return v + v + + self.assertTrue(tensor_equal(f(iterator), tf.constant([2.0]))) + self.assertTrue(tensor_equal(f(iterator), tf.constant([4.0, 6.0]))) + self.assertTrue(tensor_equal(f(iterator), tf.constant([8.0, 10.0, 12.0]))) + +if __name__ == '__main__': + unittest.main()