From 814bc67e69ab088b7d8625a7d9b61bc90f3aba9c Mon Sep 17 00:00:00 2001 From: shenpengcheng Date: Sun, 20 Feb 2022 15:56:01 +0800 Subject: [PATCH] =?UTF-8?q?Expm1=E7=AE=97=E5=AD=90=E8=BF=81=E7=A7=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1.8_expm1 1.8_expm1 code fix --- test/test_network_ops/test_expm1.py | 155 +++++++++++++++++++++ torch_npu/csrc/aten/ops/Expm1KernelNpu.cpp | 51 +++++++ 2 files changed, 206 insertions(+) create mode 100644 test/test_network_ops/test_expm1.py create mode 100644 torch_npu/csrc/aten/ops/Expm1KernelNpu.cpp diff --git a/test/test_network_ops/test_expm1.py b/test/test_network_ops/test_expm1.py new file mode 100644 index 00000000000..2f32061883a --- /dev/null +++ b/test/test_network_ops/test_expm1.py @@ -0,0 +1,155 @@ +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch_npu +import numpy as np + +from torch_npu.testing.common_utils import TestCase, run_tests +from torch_npu.testing.common_device_type import instantiate_device_type_tests +from torch_npu.testing.util_test import create_common_tensor + +class TestExpm1(TestCase): + def get_shapeFormat1(self): + shape_format = [ + [np.float32, -1 , (4, 3)], + [np.float32, -1, (2, 4, 3)], + [np.float32, 3, (20, 13)], + [np.float32, 4, (20, 13)], + [np.float32, 29, (20, 13)] + ] + return shape_format + + def get_shapeFormat2(self): + shape_format = [ + [np.float32, -1, (4, 3)], + [np.float32, 0 , (4, 3)], + [np.float32, -1, (2, 4, 3)], + [np.float32, 3, (20, 13)], + [np.float32, 4, (20, 13)], + [np.float32, 29, (20, 13)] + ] + return shape_format + + def get_shapeFormat3(self): + shape_format = [ + [np.float16, -1, (4, 3)], + [np.float16, 0 , (4, 3)], + [np.float16, -1, (2, 4, 3)], + [np.float16, -1, (100, 20, 10)], + [np.float16, 3, (20, 13)], + [np.float16, 4, (20, 13)], + [np.float16, 29, (20, 13)] + ] + return shape_format + + def cpu_op_exec(self, input1): + output = torch.expm1(input1) + output = output.numpy() + return output + + def npu_op_exec(self, input1): + output = torch.expm1(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_(self, input1): + torch.expm1_(input1) + output = input1.numpy() + return output + + def npu_op_exec_(self, input1): + torch.expm1_(input1) + output = input1.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_out(self, input1, out): + torch.expm1(input1, out=out) + output = out.numpy() + return output + + def npu_op_exec_out(self, input1, out): + torch.expm1(input1, out=out) + output = out.to("cpu") + output = output.numpy() + return output + + def test_expm1_float32_common_shape_format(self, device): + shape_format = self.get_shapeFormat1() + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 1, 10) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_expm1_float321_common_shape_format(self, device): + shape_format = self.get_shapeFormat1() + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 1, 10) + cpu_output = self.cpu_op_exec_(cpu_input1) + npu_output = self.npu_op_exec_(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_expm1_out_float32_common_shape_format(self, device): + shape_format = self.get_shapeFormat2() + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 1, 10) + cpu_out, npu_out = create_common_tensor(item, 1, 10) + cpu_output = self.cpu_op_exec_out(cpu_input1,cpu_out) + npu_output = self.npu_op_exec_out(npu_input1,npu_out) + self.assertRtolEqual(cpu_output, npu_output) + + def test_expm1_float16_common_shape_format(self, device): + shape_format = self.get_shapeFormat2() + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 1, 10) + if item[0] == np.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + if item[0] == np.float16: + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + def test_expm1_float16__common_shape_format(self, device): + shape_format = self.get_shapeFormat3() + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 1, 10) + if item[0] == np.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec_(cpu_input1) + npu_output = self.npu_op_exec_(npu_input1) + if item[0] == np.float16: + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + def test_expm1_out_float16_common_shape_format(self, device): + shape_format = self.get_shapeFormat3() + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 1, 10) + cpu_out, npu_out = create_common_tensor(item, 1, 10) + if item[0] == np.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_out = cpu_out.to(torch.float32) + cpu_output = self.cpu_op_exec_out(cpu_input1,cpu_out) + npu_output = self.npu_op_exec_out(npu_input1,npu_out) + if item[0] == np.float16: + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestExpm1, globals(), except_for="cpu") + +if __name__ == "__main__": + run_tests() \ No newline at end of file diff --git a/torch_npu/csrc/aten/ops/Expm1KernelNpu.cpp b/torch_npu/csrc/aten/ops/Expm1KernelNpu.cpp new file mode 100644 index 00000000000..a0c8b106afd --- /dev/null +++ b/torch_npu/csrc/aten/ops/Expm1KernelNpu.cpp @@ -0,0 +1,51 @@ +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "torch_npu/csrc/framework/utils/CalcuOpUtil.h" +#include "torch_npu/csrc/framework/utils/KernelNpuOutputSize.h" +#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "torch_npu/csrc/framework/utils/NpuUtils.h" +#include "torch_npu/csrc/aten/NPUNativeFunctions.h" + +namespace at_npu { +namespace native { + +at::Tensor& NPUNativeFunctions::expm1_out(const at::Tensor& self, at::Tensor& out) { + OpCommand cmd; + cmd.Name("Expm1") + .Input(self) + .Output(out) + .Run(); + return out; +} + +at::Tensor NPUNativeFunctions::expm1(const at::Tensor& self) { + auto outputSize = input_same_output_size(self); + at::Tensor result = OpPreparation::ApplyTensor(self, outputSize); + NPUNativeFunctions::expm1_out(self, result); + return result; +} + +at::Tensor& NPUNativeFunctions::expm1_(at::Tensor& self) { + if (!NpuUtils::check_match(&self)) { + at::Tensor contiguousSelf = NpuUtils::format_contiguous(self); + at::Tensor result = NPUNativeFunctions::expm1_out(contiguousSelf, contiguousSelf); + NpuUtils::format_fresh_view(self, result); + } else { + NPUNativeFunctions::expm1_out(self, self); + } + return self; +} +} // namespace native +} // namespace at_npu -- Gitee