diff --git a/tf_adapter/kernels/aicpu/npu_cpu_ops.cc b/tf_adapter/kernels/aicpu/npu_cpu_ops.cc index ff9dc6d2ffe9fbae28d9febe6bef0b50b4a5d5df..58299c2f628114fc78dc7f4d86bf048e5d58fa28 100644 --- a/tf_adapter/kernels/aicpu/npu_cpu_ops.cc +++ b/tf_adapter/kernels/aicpu/npu_cpu_ops.cc @@ -221,6 +221,13 @@ class ScatterElementsV2Op : public OpKernel { } }; +class PrecisonCompareOp : public OpKernel { + public: + explicit PrecisonCompareOp(OpKernelConstruction *context) : OpKernel(context) {} + ~PrecisonCompareOp() override {} + void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "PrecisonCompareOp Compute"; } +}; + REGISTER_KERNEL_BUILDER(Name("ScatterElementsV2").Device(DEVICE_CPU), ScatterElementsV2Op); REGISTER_KERNEL_BUILDER(Name("EmbeddingRankId").Device(DEVICE_CPU), EmbeddingRankIdOpKernel); REGISTER_KERNEL_BUILDER(Name("EmbeddingLocalIndex").Device(DEVICE_CPU), EmbeddingLocalIndexOpKernel); @@ -242,6 +249,7 @@ REGISTER_KERNEL_BUILDER(Name("NonZeroWithValueShape").Device(DEVICE_CPU), NonZer REGISTER_KERNEL_BUILDER(Name("NonZeroWithValueShapeV2").Device(DEVICE_CPU), NonZeroWithValueShapeV2Op); REGISTER_KERNEL_BUILDER(Name("WarpAffineV2").Device(DEVICE_CPU), WarpAffineV2Op); REGISTER_KERNEL_BUILDER(Name("ResizeV2").Device(DEVICE_CPU), ResizeV2Op); +REGISTER_KERNEL_BUILDER(Name("PrecisonCompare").Device(DEVICE_CPU), PrecisonCompareOp); class DecodeImageV3Op : public OpKernel { public: diff --git a/tf_adapter/ops/aicpu/npu_cpu_ops.cc b/tf_adapter/ops/aicpu/npu_cpu_ops.cc index 40857b44989f707098f76c2f6a00ca1930d70969..d710815006283b1c31984a0de85c539cbf90dfd4 100644 --- a/tf_adapter/ops/aicpu/npu_cpu_ops.cc +++ b/tf_adapter/ops/aicpu/npu_cpu_ops.cc @@ -627,4 +627,11 @@ REGISTER_OP("ResizeV2") .Output("image: dtype") .Attr("expand_animations: bool = true") .SetShapeFn(DecodeImageV3ShapeFn); + + REGISTER_OP("PrecisionCompare") + .Input("x1: T") + .Input("x2: T") + .Attr("detect_type: int32") + .Attr("T: {float32, float16, bfloat16}") + .SetShapeFn(tensorflow::shape_inference::ScalarShape); } // namespace tensorflow diff --git a/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py index 417119584f09363575c133283ad275aab85380dd..4d6d77b19ede0a15a6ab65a7d7cbf4b0afb75047 100644 --- a/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py +++ b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py @@ -558,3 +558,14 @@ def embedding_hashmap_import_v2(file_path, table_ids, table_sizes, table_names, file_path=file_path, table_ids=table_ids, table_sizes=table_sizes, table_names=table_names, global_step=global_step, embedding_dims=embedding_dims, num=num) return result + +## 提供硬件精度检测功能 +# @param golden 支持float/float16/bfloat16类型 +# @param realdata 支持float/float16/bfloat16类型 +# @param detect_type int32 类型, 取值范围:0/1/2 +# @return uint32 +def precision_compare(golden, realdata, detect_type): + """ precision compare. """ + result = gen_npu_cpu_ops.PrecisonCompare( + x1=golden, x2=realdata, detect_type=detect_type) + return result \ No newline at end of file