From 62b607fa61762a8b96733458dcf297973352bcaa Mon Sep 17 00:00:00 2001 From: zhao-lupeng Date: Fri, 4 Nov 2022 14:39:41 +0800 Subject: [PATCH 1/2] functin test --- .../core/op_executors/npu_concrete_graph.cpp | 34 +++++++++++++++---- .../core/op_executors/npu_concrete_graph.h | 2 ++ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/tf_adapter_2.x/npu_device/core/op_executors/npu_concrete_graph.cpp b/tf_adapter_2.x/npu_device/core/op_executors/npu_concrete_graph.cpp index e8e496b04..9546b77fc 100644 --- a/tf_adapter_2.x/npu_device/core/op_executors/npu_concrete_graph.cpp +++ b/tf_adapter_2.x/npu_device/core/op_executors/npu_concrete_graph.cpp @@ -103,6 +103,11 @@ void NpuConcreteGraph::RunImpl(TFE_Context *context, NpuDevice *device, int tf_n static_cast(output_handles_.size()), output_handles_.data(), status); timer.Stop(); } else { + bool jit_compile = device->device_options["ge.jit_compile"] == "1"; + DLOG() << "jit_compile value is " << jit_compile; + if (!jit_compile) { + FuzzAllShape(); + } bool loaded = false; Load(context, device, loaded, status); NPU_REQUIRES_TFE_OK(status); @@ -133,6 +138,28 @@ void NpuConcreteGraph::RunImpl(TFE_Context *context, NpuDevice *device, int tf_n } } +void NpuConcreteGraph::FuzzAllShape() const { + for (auto node : graph_->op_nodes()) { + if (!node->IsArg()) { + continue; + } + const tensorflow::AttrValue *shape_attr = node->attrs().Find("_output_shapes"); + tensorflow::PartialTensorShape value_shape(shape_attr->list().shape(0)); + DLOG() << "Refresh value_shape for " << node->name() << " shape to " << value_shape.DebugString() + << "value_shape dims " << value_shape.dims(); + tensorflow::PartialTensorShape shape; + std::vector dims; + for (int j = 0; j < value_shape.dims(); j++) { + dims.push_back(-1); + } + auto status = + tensorflow::PartialTensorShape::MakePartialShape(dims.data(), static_cast(dims.size()), &shape); + DLOG() << "Refresh _output_shapes for " << node->name() << " shape to " << shape.DebugString(); + node->ClearAttr("_output_shapes"); + node->AddAttr("_output_shapes", std::vector{shape}); + } +} + void NpuConcreteGraph::RunAoeTuning(TFE_Context *context, NpuDevice *device, std::vector inputs, bool loaded, TF_Status *status) const { if (function_op_) { @@ -199,13 +226,8 @@ void NpuConcreteGraph::Load(TFE_Context *context, NpuDevice *device, bool &loade {"ge.recompute", npu::GetRunContextOptions().memory_optimize_options.recompute}, {"ge.graphParallelOptionPath", npu::GetRunContextOptions().graph_parallel_configs.config_path}, {"ge.enableGraphParallel", npu::GetRunContextOptions().graph_parallel_configs.enable_graph_parallel}}; - const static std::map kFuzzCompileOptions{ - {ge::OPTION_EXEC_DYNAMIC_INPUT, "1"}, - {ge::OPTION_EXEC_DYNAMIC_EXECUTE_MODE, "dynamic_execute"}, - {ge::SHAPE_GENERALIZED_BUILD_MODE, "shape_generalized"}}; - const auto need_fuzz_compile = NeedFuzzCompile(); if (device->AddGeGraphInner(context, GeGraphId(), Op(), GraphDef(), (loop_type_ == LoopType::NPU_LOOP), status, - (need_fuzz_compile ? kFuzzCompileOptions : kOptions)) == kEmptyGeGraphId) { + kOptions) == kEmptyGeGraphId) { empty_ge_graph_ = true; } NPU_REQUIRES_TFE_OK(status); diff --git a/tf_adapter_2.x/npu_device/core/op_executors/npu_concrete_graph.h b/tf_adapter_2.x/npu_device/core/op_executors/npu_concrete_graph.h index c137cd8bc..6e1da540e 100644 --- a/tf_adapter_2.x/npu_device/core/op_executors/npu_concrete_graph.h +++ b/tf_adapter_2.x/npu_device/core/op_executors/npu_concrete_graph.h @@ -51,6 +51,8 @@ class NpuConcreteGraph : public OpExecutor { uint64_t GeGraphId() const { return ge_graph_id_; } + void FuzzAllShape() const; + const std::map> &ConsumedIteratos() const { return consumed_iterators_; } -- Gitee From 892b612f053d1279e646c42b651100d227bc1c14 Mon Sep 17 00:00:00 2001 From: zhao-lupeng Date: Sat, 5 Nov 2022 11:29:39 +0800 Subject: [PATCH 2/2] add test --- .../core/op_executors/npu_concrete_graph.cpp | 1 + .../tests/st/adapter2_jit_compile_st.py | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/tf_adapter_2.x/npu_device/core/op_executors/npu_concrete_graph.cpp b/tf_adapter_2.x/npu_device/core/op_executors/npu_concrete_graph.cpp index 9546b77fc..e1d28fdc2 100644 --- a/tf_adapter_2.x/npu_device/core/op_executors/npu_concrete_graph.cpp +++ b/tf_adapter_2.x/npu_device/core/op_executors/npu_concrete_graph.cpp @@ -140,6 +140,7 @@ void NpuConcreteGraph::RunImpl(TFE_Context *context, NpuDevice *device, int tf_n void NpuConcreteGraph::FuzzAllShape() const { for (auto node : graph_->op_nodes()) { + DLOG() << "node IsArg " << node->name(); if (!node->IsArg()) { continue; } diff --git a/tf_adapter_2.x/tests/st/adapter2_jit_compile_st.py b/tf_adapter_2.x/tests/st/adapter2_jit_compile_st.py index c0aeb1d34..97e1e38d5 100644 --- a/tf_adapter_2.x/tests/st/adapter2_jit_compile_st.py +++ b/tf_adapter_2.x/tests/st/adapter2_jit_compile_st.py @@ -19,6 +19,8 @@ import os import time +os.environ['ASCEND_OPP_PATH'] = 'non-existed-path' + import unittest import tensorflow as tf import npu_device @@ -27,8 +29,20 @@ from npu_device.npu_device import stupid_repeat from tensorflow.python.eager import context npu_device.global_options().jit_compile = False +os.environ['RANK_TABLE_FILE'] = "rankTable" +os.environ['RANK_SIZE'] = "2" +os.environ['RANK_ID'] = "1" npu = npu_device.open().as_default() +npu.workers_num = 2 # mock run in 2P env + +@tf.function +def foo_add_(v): + return v.assign_add(1) +@tf.function +def foo_cpu_add_(v): + with context.device("/job:localhost/replica:0/task:0/device:CPU:0"): + return v.assign_add(1) def tensor_equal(t1, t2): return True @@ -54,5 +68,12 @@ class Adapter2JitCompileSt(unittest.TestCase): self.assertTrue(tensor_equal(f(iterator), tf.constant([4.0, 6.0]))) self.assertTrue(tensor_equal(f(iterator), tf.constant([8.0, 10.0, 12.0]))) + def test_fully_jit_compile_fuzz_compile(self): + @tf.function(input_signature=[tf.TensorSpec(shape=(2))]) + def f(v): + return v + v + + self.assertTrue(tensor_equal(f([1.0, 2.0]), tf.constant([2.0, 4.0]))) + if __name__ == '__main__': unittest.main() -- Gitee