diff --git a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc index aff1db6be2906e962335ca2b6db7f871898b51d2..1b9b91e134e14ab831987269ef67915da5515479 100644 --- a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc @@ -84,6 +84,22 @@ TEST_F(NpuAttrTest, CheckAoeMode) { EXPECT_EQ(s.ok(), false); } +TEST_F(NpuAttrTest, CheckPrecisionMode ) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + + AttrValue precision_mode = AttrValue(); + precision_mode.set_s("force_Dp32"); + (*custom_config->mutable_parameter_map())["precision_mode"] = precision_mode; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); + EXPECT_EQ(s.ok(), false); +} + TEST_F(NpuAttrTest, GetDumpPath) { setenv("DUMP_GRAPH_PATH", "./", 1); string path = GetDumpPath(); diff --git a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc index cf6b30c7005601f0c9fbaad7b0aa12863223ab14..dc796242b1b1124ae18f82b45bb9b545786cb5c3 100644 --- a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc @@ -85,6 +85,22 @@ TEST_F(NpuAttrTest, CheckAoeMode) { EXPECT_EQ(s.ok(), false); } +TEST_F(NpuAttrTest, CheckPrecisionMode ) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + + AttrValue precision_mode = AttrValue(); + precision_mode.set_s("force_Dp32"); + (*custom_config->mutable_parameter_map())["precision_mode"] = precision_mode; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); + EXPECT_EQ(s.ok(), false); +} + TEST_F(NpuAttrTest, GetDumpPath) { setenv("DUMP_GRAPH_PATH", "./", 1); string path = GetDumpPath(); diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 630da0290fb6720f052744780406a2d1ca8eb19d..cbf1af0adf217e6552a2f15aa6e1b81bb0b1dd34 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -1732,6 +1732,15 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options } if (params.count("precision_mode") > 0) { precision_mode = params.at("precision_mode").s(); + std::vector precision_mode_list = {"force_fp32", "allow_fp32_to_fp16", + "force_fp16", "must_keep_origin_dtype", + "allow_mix_precision", "cube_fp16in_fp32out"}; + Status s = CheckValueAllowed(precision_mode, precision_mode_list); + if (!s.ok()) { + ADP_LOG(ERROR) << s.error_message(); + LOG(ERROR) << s.error_message(); + return errors::Internal(s.error_message()); + } } else { if (static_cast(graph_run_mode)) { precision_mode = "allow_fp32_to_fp16"; diff --git a/tf_adapter/util/npu_attrs.h b/tf_adapter/util/npu_attrs.h index 048bbeb3f1446df909f172cbe14c3af2cd00af70..7e73c693a2c100c8c9b0b5836d79e9a8ca823301 100644 --- a/tf_adapter/util/npu_attrs.h +++ b/tf_adapter/util/npu_attrs.h @@ -17,6 +17,9 @@ #ifndef TENSORFLOW_NPU_ATTRS_H_ #define TENSORFLOW_NPU_ATTRS_H_ +#include +#include +#include #include #include #include "ge/ge_api_types.h" @@ -58,6 +61,32 @@ class NpuAttrs { static bool GetNewDataTransferFlag(); // only use for ut/st static void SetNewDataTransferFlag(bool flag); + template + static std::string VectorToString(const std::vector &values) { + std::stringstream ss; + ss << '['; + const auto size = values.size(); + for (size_t i = 0U; i < size; ++i) { + ss << values[i]; + if (i != (size - 1U)) { + ss << ", "; + } + } + ss << ']'; + return ss.str(); + } + template + static Status CheckValueAllowed(const T &v, const std::vector &allowed_values) { + if (find(allowed_values.begin(), allowed_values.end(), v) != allowed_values.cend()) { + return Status::OK(); + } else { + std::stringstream ss; + ss << v << " is invalid, it should be one of the list:"; + ss << VectorToString(allowed_values); + return errors::InvalidArgument(ss.str()); + } + } + private: static bool CheckIsNewDataTransfer(); static std::map turn_on_tdt_info_; diff --git a/tf_adapter_2.x/npu_device/core/npu_device.cpp b/tf_adapter_2.x/npu_device/core/npu_device.cpp index f1d6f0f8e7dad5c623bb148175e9078f8eac7540..1849e34ddaaca2606467bf894a82bf41bcc1b547 100644 --- a/tf_adapter_2.x/npu_device/core/npu_device.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_device.cpp @@ -808,7 +808,7 @@ void NpuDevice::RunGeGraphAsync(TFE_Context *context, uint64_t graph_id, int num if (err_msg.empty()) { err_msg = " code:" + std::to_string(s); } - done(tensorflow::errors::Internal("Graph engine process graph failed: ", err_msg)); + done(tensorflow::errors::Internal("Graph engine process graph failed:\n", err_msg)); return; } else if (ge_outputs.size() != static_cast(num_outputs)) { done(tensorflow::errors::Internal("Graph engine process graph succeed but output num ", ge_outputs.size(), diff --git a/tf_adapter_2.x/npu_device/core/npu_micros.h b/tf_adapter_2.x/npu_device/core/npu_micros.h index cfeeb4ee06241189296b84df1d92f48809a685ce..933ef44a8c51a399b35660ede228f36eb2bc4d20 100644 --- a/tf_adapter_2.x/npu_device/core/npu_micros.h +++ b/tf_adapter_2.x/npu_device/core/npu_micros.h @@ -79,7 +79,7 @@ if (err_msg.empty()) { \ err_msg = " code:" + std::to_string(_status); \ } \ - CTX->status = tensorflow::errors::Internal(PREFIX, ":", err_msg); \ + CTX->status = tensorflow::errors::Internal(PREFIX, ":\n", err_msg); \ LOG(ERROR) << CTX->status.ToString(); \ return; \ } \ @@ -93,7 +93,7 @@ if (err_msg.empty()) { \ err_msg = " code:" + std::to_string(_status); \ } \ - (CTX)->status = tensorflow::errors::Internal(PREFIX, ":", err_msg); \ + (CTX)->status = tensorflow::errors::Internal(PREFIX, ":\n", err_msg); \ LOG(ERROR) << (CTX)->status.ToString(); \ return RET; \ } \ diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index da5492e08447515c8cb7da51ddf5105299f6567e..30791404b0fea48455cc468c937153b4b4c34817 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -37,7 +37,7 @@ class NpuConfig(NpuBaseConfig): self.fusion_switch_file = OptionValue(None, None) self.precision_mode = OptionValue('allow_fp32_to_fp16', ['force_fp32', 'allow_fp32_to_fp16', 'force_fp16', 'must_keep_origin_dtype', - 'allow_mix_precision']) + 'allow_mix_precision', 'cube_fp16in_fp32out']) self.op_select_implmode = DeprecatedValue(['high_performance', 'high_precision'], replacement='op_precision_mode') self.optypelist_for_implmode = DeprecatedValue(None, replacement='op_precision_mode')