diff --git a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc index 3eced9e721fa508258f9bf8fa9f5e69a9887588c..65845499a7119316e5bd681bc6722282a1e601d1 100644 --- a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc +++ b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc @@ -917,6 +917,60 @@ std::vector string_split(const string &str, const string &pattern) { return resultVec; } +Status OptimizeSaveV2InMixeMode(Graph &graph) { + std::vector save_v2_nodes; + for (auto node : graph.op_nodes()) { + if (node->type_string() == "SaveV2") { + save_v2_nodes.emplace_back(node); + } + } + if (save_v2_nodes.empty()) { + return Status::OK(); + } + ADP_LOG(INFO) << "The size of SaveV2 is " << save_v2_nodes.size(); + + for (auto node : save_v2_nodes) { + std::vector data_nodes; + std::vector ctrl_nodes; + std::map del_data_edges; + std::vector del_ctrl_edges; + int index = 0; + for (const Edge *input_edge : node->in_edges()) { + if (input_edge->IsControlEdge()) { + ctrl_nodes.emplace_back(input_edge->src()); + del_ctrl_edges.emplace_back(input_edge); + } else { + auto dtype = node->input_type(input_edge->dst_input()); + if ((dtype != DT_STRING) && (dtype != DT_RESOURCE)) { + ADP_LOG(INFO) << "Get IdentityN output " << index << " to SaveV2 input " << input_edge->dst_input(); + data_nodes.emplace_back(tensorflow::NodeBuilder::NodeOut{input_edge->src(), input_edge->src_output()}); + del_data_edges[index++] = input_edge; + } + } + } + + if (data_nodes.size() <= 1UL) { + continue; + } + Node *identity_n_node = nullptr; + TF_CHECK_OK(NodeBuilder(strings::StrCat(node->name(), "dummyIdentityN"), "IdentityN") + .Input(data_nodes) + .ControlInputs(ctrl_nodes) + .Device(node->def().device()) + .Finalize(&graph, &identity_n_node)); + REQUIRES_NOT_NULL(identity_n_node); + + for (auto item : del_data_edges) { + graph.RemoveEdge(item.second); + (void) graph.AddEdge(identity_n_node, item.first, node, item.second->dst_input()); + } + for (auto ctrl_edge : del_ctrl_edges) { + graph.RemoveEdge(ctrl_edge); + } + } + return Status::OK(); +} + Status MarkForPartition(const std::unique_ptr *graph_in, int &clusterNum, int graph_num, const FunctionLibraryDefinition *func_lib, std::map pass_options, std::map &graph_options) { @@ -925,6 +979,9 @@ Status MarkForPartition(const std::unique_ptr *graph_in, int &clusterNum, bool mix_compile_mode = pass_options["mix_compile_mode"] == "1"; bool is_set_lazy_recompile = graph_options["dynamic_input"] == "1" && graph_options["dynamic_graph_execute_mode"] == "lazy_recompile"; + if (mix_compile_mode) { + TF_RETURN_IF_ERROR(OptimizeSaveV2InMixeMode(*graph)); + } OrderedNodeSet npuSupportCandidates; if (!pass_options["in_out_pair"].empty()) { if (!mix_compile_mode) { @@ -1082,6 +1139,8 @@ Status MarkForPartition(const std::unique_ptr *graph_in, int &clusterNum, } (void) seen.insert(cluster); } + + ADP_LOG(INFO) << "The size of clusterSet is " << clusterSet.size(); // Generate Merge possibility between clusters if (clusterSet.size() > 1) { for (int src : clusterSet) { diff --git a/tf_adapter/tests/st/optimizers/testcase/om_partition_subgraphs_pass_test.cc b/tf_adapter/tests/st/optimizers/testcase/om_partition_subgraphs_pass_test.cc index d3dce45b64323613b1397d1866451887669db598..a3521c6013ea072962b805d4e519986b790e53e0 100644 --- a/tf_adapter/tests/st/optimizers/testcase/om_partition_subgraphs_pass_test.cc +++ b/tf_adapter/tests/st/optimizers/testcase/om_partition_subgraphs_pass_test.cc @@ -210,6 +210,12 @@ TEST_F(OmOptimizationPassTest, StringInputMaxSizeTest) { std::string target_graph = DoRunOmOptimizationPassTest(); EXPECT_EQ(target_graph, "arg_input_0_0->DecodeJpeg;DecodeJpeg->retval_DecodeJpeg_0_0"); } +TEST_F(OmOptimizationPassTest, MixCompileSaveV2Test) { + std::string org_graph_def_path = "tf_adapter/tests/ut/optimizers/pbtxt/om_test_mix_compile_savev2.pbtxt"; + InitGraph(org_graph_def_path); + auto ret = DoRunOmOptimizationPassTest(); + EXPECT_NE(ret.find("IdentityN"), std::string::npos); +} TEST_F(OmOptimizationPassTest, GetOppPluginVendorsTest01) { SetLogLevelForC(0); std::string opp_path = __FILE__; diff --git a/tf_adapter/tests/ut/optimizers/pbtxt/om_test_mix_compile_savev2.pbtxt b/tf_adapter/tests/ut/optimizers/pbtxt/om_test_mix_compile_savev2.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..1c34a263b7e23e8d8b80032b63e1d166ada9a2ee --- /dev/null +++ b/tf_adapter/tests/ut/optimizers/pbtxt/om_test_mix_compile_savev2.pbtxt @@ -0,0 +1,183 @@ +node { + name: "save/const0" + op: "Const" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_NpuOptimizer" + value { + s: "NpuOptimizer" + } + } + attr { + key: "_mix_compile_mode" + value { + s: "1" + } + } + attr { + key: "_use_off_line" + value { + s: "1" + } + } + attr { + key: "_iterations_per_loop" + value { + s: "1" + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + int_val: 5 + } + } + } +} +node { + name: "save/SaveV2/tensor_names" + op: "Const" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + int64_val: 0 + } + } + } +} +node { + name: "save/SaveV2/shape_and_slices" + op: "Const" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + int64_val: 0 + } + } + } +} +node { + name: "biases1" + op: "VariableV2" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "weights1" + op: "VariableV2" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "save/SaveV2" + op: "SaveV2" + input: "save/const0" + input: "save/SaveV2/tensor_names" + input: "save/SaveV2/shape_and_slices" + input: "biases1" + input: "weights1" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + type: DT_FLOAT + } + } + } +} +library { +} +versions { + producer: 134 +} diff --git a/tf_adapter/tests/ut/optimizers/testcase/om_partition_subgraphs_pass_test.cc b/tf_adapter/tests/ut/optimizers/testcase/om_partition_subgraphs_pass_test.cc index 974d9fa8817277f5fb97aa5444779c01b498cae0..1bcafeea7ecf36456fc2cf190fd6c1a30f27940d 100644 --- a/tf_adapter/tests/ut/optimizers/testcase/om_partition_subgraphs_pass_test.cc +++ b/tf_adapter/tests/ut/optimizers/testcase/om_partition_subgraphs_pass_test.cc @@ -216,6 +216,12 @@ TEST_F(OmOptimizationPassTest, StringInputMaxSizeTest) { std::string target_graph = DoRunOmOptimizationPassTest(); EXPECT_EQ(target_graph, "arg_input_0_0->DecodeJpeg;DecodeJpeg->retval_DecodeJpeg_0_0"); } +TEST_F(OmOptimizationPassTest, MixCompileSaveV2Test) { + std::string org_graph_def_path = "tf_adapter/tests/ut/optimizers/pbtxt/om_test_mix_compile_savev2.pbtxt"; + InitGraph(org_graph_def_path); + auto ret = DoRunOmOptimizationPassTest(); + EXPECT_NE(ret.find("IdentityN"), std::string::npos); +} TEST_F(OmOptimizationPassTest, GetOppPluginVendorsTest01) { SetLogLevelForC(0); std::string opp_path = __FILE__;