From 814bc67e69ab088b7d8625a7d9b61bc90f3aba9c Mon Sep 17 00:00:00 2001
From: shenpengcheng <shenpengcheng5@huawei.com>
Date: Sun, 20 Feb 2022 15:56:01 +0800
Subject: [PATCH] =?UTF-8?q?Expm1=E7=AE=97=E5=AD=90=E8=BF=81=E7=A7=BB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1.8_expm1

1.8_expm1

code fix
---
 test/test_network_ops/test_expm1.py        | 155 +++++++++++++++++++++
 torch_npu/csrc/aten/ops/Expm1KernelNpu.cpp |  51 +++++++
 2 files changed, 206 insertions(+)
 create mode 100644 test/test_network_ops/test_expm1.py
 create mode 100644 torch_npu/csrc/aten/ops/Expm1KernelNpu.cpp

diff --git a/test/test_network_ops/test_expm1.py b/test/test_network_ops/test_expm1.py
new file mode 100644
index 00000000000..2f32061883a
--- /dev/null
+++ b/test/test_network_ops/test_expm1.py
@@ -0,0 +1,155 @@
+# Copyright (c) 2020, Huawei Technologies.All rights reserved.
+#
+# Licensed under the BSD 3-Clause License  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+import torch_npu
+import numpy as np
+
+from torch_npu.testing.common_utils import TestCase, run_tests
+from torch_npu.testing.common_device_type import instantiate_device_type_tests
+from torch_npu.testing.util_test import create_common_tensor
+
+class TestExpm1(TestCase):
+    def get_shapeFormat1(self):
+        shape_format = [
+                [np.float32, -1 , (4, 3)],
+                [np.float32, -1, (2, 4, 3)],
+                [np.float32, 3, (20, 13)],
+                [np.float32, 4, (20, 13)],
+                [np.float32, 29, (20, 13)]
+        ]
+        return shape_format
+
+    def get_shapeFormat2(self):
+        shape_format = [
+                [np.float32, -1, (4, 3)],
+                [np.float32, 0 , (4, 3)],
+                [np.float32, -1, (2, 4, 3)],
+                [np.float32, 3, (20, 13)],
+                [np.float32, 4, (20, 13)],
+                [np.float32, 29, (20, 13)]        
+        ]
+        return shape_format
+
+    def get_shapeFormat3(self):
+        shape_format = [
+                [np.float16, -1, (4, 3)],
+                [np.float16, 0 , (4, 3)],
+                [np.float16, -1, (2, 4, 3)],
+                [np.float16, -1, (100, 20, 10)],
+                [np.float16, 3, (20, 13)],
+                [np.float16, 4, (20, 13)],
+                [np.float16, 29, (20, 13)]
+        ]
+        return shape_format
+
+    def cpu_op_exec(self, input1):
+        output = torch.expm1(input1)
+        output = output.numpy()
+        return output
+
+    def npu_op_exec(self, input1):
+        output = torch.expm1(input1)
+        output = output.to("cpu")
+        output = output.numpy()
+        return output
+
+    def cpu_op_exec_(self, input1):
+        torch.expm1_(input1)
+        output = input1.numpy()
+        return output
+
+    def npu_op_exec_(self, input1):
+        torch.expm1_(input1)
+        output = input1.to("cpu")
+        output = output.numpy()
+        return output
+
+    def cpu_op_exec_out(self, input1, out):
+        torch.expm1(input1, out=out)
+        output = out.numpy()
+        return output
+
+    def npu_op_exec_out(self, input1, out):
+        torch.expm1(input1, out=out)
+        output = out.to("cpu")
+        output = output.numpy()
+        return output
+
+    def test_expm1_float32_common_shape_format(self, device):
+        shape_format = self.get_shapeFormat1()
+        for item in shape_format:            
+            cpu_input1, npu_input1 = create_common_tensor(item, 1, 10)
+            cpu_output = self.cpu_op_exec(cpu_input1)
+            npu_output = self.npu_op_exec(npu_input1)
+            self.assertRtolEqual(cpu_output, npu_output)
+    		
+    def test_expm1_float321_common_shape_format(self, device):
+        shape_format = self.get_shapeFormat1()
+        for item in shape_format:        
+            cpu_input1, npu_input1 = create_common_tensor(item, 1, 10)
+            cpu_output = self.cpu_op_exec_(cpu_input1)
+            npu_output = self.npu_op_exec_(npu_input1)
+            self.assertRtolEqual(cpu_output, npu_output)
+	
+    def test_expm1_out_float32_common_shape_format(self, device):
+        shape_format = self.get_shapeFormat2()
+        for item in shape_format:          
+            cpu_input1, npu_input1 = create_common_tensor(item, 1, 10)
+            cpu_out, npu_out = create_common_tensor(item, 1, 10)
+            cpu_output = self.cpu_op_exec_out(cpu_input1,cpu_out)
+            npu_output = self.npu_op_exec_out(npu_input1,npu_out)
+            self.assertRtolEqual(cpu_output, npu_output)
+    
+    def test_expm1_float16_common_shape_format(self, device):
+        shape_format = self.get_shapeFormat2()
+        for item in shape_format:            
+            cpu_input1, npu_input1 = create_common_tensor(item, 1, 10)
+            if item[0] == np.float16:
+                cpu_input1 = cpu_input1.to(torch.float32)
+            cpu_output = self.cpu_op_exec(cpu_input1)
+            npu_output = self.npu_op_exec(npu_input1)
+            if item[0] == np.float16:
+                cpu_output = cpu_output.astype(np.float16)
+            self.assertRtolEqual(cpu_output, npu_output)
+    		
+    def test_expm1_float16__common_shape_format(self, device):
+        shape_format = self.get_shapeFormat3()
+        for item in shape_format:        
+            cpu_input1, npu_input1 = create_common_tensor(item, 1, 10)
+            if item[0] == np.float16:
+                cpu_input1 = cpu_input1.to(torch.float32)
+            cpu_output = self.cpu_op_exec_(cpu_input1)
+            npu_output = self.npu_op_exec_(npu_input1)
+            if item[0] == np.float16:
+                cpu_output = cpu_output.astype(np.float16)
+            self.assertRtolEqual(cpu_output, npu_output)
+	
+    def test_expm1_out_float16_common_shape_format(self, device):
+        shape_format = self.get_shapeFormat3()
+        for item in shape_format:          
+            cpu_input1, npu_input1 = create_common_tensor(item, 1, 10)
+            cpu_out, npu_out = create_common_tensor(item, 1, 10)
+            if item[0] == np.float16:
+                cpu_input1 = cpu_input1.to(torch.float32)
+                cpu_out = cpu_out.to(torch.float32)
+            cpu_output = self.cpu_op_exec_out(cpu_input1,cpu_out)
+            npu_output = self.npu_op_exec_out(npu_input1,npu_out)
+            if item[0] == np.float16:
+                cpu_output = cpu_output.astype(np.float16)
+            self.assertRtolEqual(cpu_output, npu_output)
+
+instantiate_device_type_tests(TestExpm1, globals(), except_for="cpu")
+
+if __name__ == "__main__":
+    run_tests()
\ No newline at end of file
diff --git a/torch_npu/csrc/aten/ops/Expm1KernelNpu.cpp b/torch_npu/csrc/aten/ops/Expm1KernelNpu.cpp
new file mode 100644
index 00000000000..a0c8b106afd
--- /dev/null
+++ b/torch_npu/csrc/aten/ops/Expm1KernelNpu.cpp
@@ -0,0 +1,51 @@
+// Copyright (c) 2020, Huawei Technologies.All rights reserved.
+//
+// Licensed under the BSD 3-Clause License  (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "torch_npu/csrc/framework/utils/CalcuOpUtil.h"
+#include "torch_npu/csrc/framework/utils/KernelNpuOutputSize.h"
+#include "torch_npu/csrc/framework/utils/OpAdapter.h"
+#include "torch_npu/csrc/framework/utils/NpuUtils.h"
+#include "torch_npu/csrc/aten/NPUNativeFunctions.h"
+
+namespace at_npu {
+namespace native {
+
+at::Tensor& NPUNativeFunctions::expm1_out(const at::Tensor& self, at::Tensor& out) {
+  OpCommand cmd;
+  cmd.Name("Expm1")
+      .Input(self)
+      .Output(out)
+      .Run();
+  return out;
+}
+
+at::Tensor NPUNativeFunctions::expm1(const at::Tensor& self) {
+  auto outputSize = input_same_output_size(self); 
+  at::Tensor result = OpPreparation::ApplyTensor(self, outputSize);
+  NPUNativeFunctions::expm1_out(self, result);
+  return result;
+}
+
+at::Tensor& NPUNativeFunctions::expm1_(at::Tensor& self) {
+  if (!NpuUtils::check_match(&self)) {
+    at::Tensor contiguousSelf = NpuUtils::format_contiguous(self);
+    at::Tensor result = NPUNativeFunctions::expm1_out(contiguousSelf, contiguousSelf);
+    NpuUtils::format_fresh_view(self, result);
+  } else {
+    NPUNativeFunctions::expm1_out(self, self);
+  }
+  return self;
+}
+}  // namespace native
+}  // namespace at_npu
-- 
Gitee