From 30c182449e6842a0f6df0cf8f5f6d428a275972e Mon Sep 17 00:00:00 2001
From: hxf12345677 <houxiaofang@h-partners.com>
Date: Mon, 21 Feb 2022 13:51:37 +0800
Subject: [PATCH] =?UTF-8?q?reflection=5Fpad1c1.8.1=E7=AE=97=E5=AD=90?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../test_network_ops/test_reflection_pad1d.py | 117 ++++++++++++++++++
 .../aten/ops/ReflectionPad1dKernelNpu.cpp     | 109 ++++++++++++++++
 2 files changed, 226 insertions(+)
 create mode 100644 test/test_network_ops/test_reflection_pad1d.py
 create mode 100644 torch_npu/csrc/aten/ops/ReflectionPad1dKernelNpu.cpp

diff --git a/test/test_network_ops/test_reflection_pad1d.py b/test/test_network_ops/test_reflection_pad1d.py
new file mode 100644
index 0000000000..13a6ddceb6
--- /dev/null
+++ b/test/test_network_ops/test_reflection_pad1d.py
@@ -0,0 +1,117 @@
+# Copyright (c) 2020, Huawei Technologies.All rights reserved.
+#
+# Licensed under the BSD 3-Clause License  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+import torch_npu
+import numpy as np
+
+from torch_npu.testing.common_utils import TestCase, run_tests
+from torch_npu.testing.common_device_type import instantiate_device_type_tests
+from torch_npu.testing.util_test import create_common_tensor
+
+class TestReflectionPad1d(TestCase):
+    def cpu_op_out_exec(self, input1, pad, output):
+        m = torch._C._nn.reflection_pad1d(input1, pad, out=output)
+        m = m.numpy()
+        return m
+
+    def npu_op_out_exec(self, input1, pad, output):
+        m_n = torch._C._nn.reflection_pad1d(input1, pad, out=output)
+        m_n = m_n.to("cpu")
+        m_n = m_n.numpy()
+        return m_n
+
+    def cpu_op_exec(self, input1, pad):
+        m = torch.nn.ReflectionPad1d(pad)
+        output = m(input1)
+        output = output.numpy()
+        return output
+
+    def npu_op_exec(self, input1, pad):
+        m = torch.nn.ReflectionPad1d(pad)
+        output = m(input1)
+        output = output.to("cpu")
+        output = output.numpy()
+        return output
+
+    def test_reflection_pad1d_out_shape_format_fp16(self, device):
+        shape_format = [
+            [[np.float16, 2, (1, 2, 4)], [3, 1]],
+            [[np.float16, 3, (1, 2, 4)], [3, 1]]
+        ]
+
+        def cpu_op_out_exec_fp16(input1, pad, output):
+            input1 = input1.to(torch.float32)
+            m = torch._C._nn.reflection_pad1d(input1, pad, out=output)
+            m = m.numpy()
+            m = m.astype(np.float16)
+            return m
+
+        for item in shape_format:
+            cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100)
+            cpuout = torch.randn(1, 3, 3)
+            npuout = cpuout.to(npu_input1.dtype).npu()
+            cpu_output = cpu_op_out_exec_fp16(cpu_input1, item[1], cpuout)
+            npu_output = self.npu_op_out_exec(npu_input1, item[1], npuout)
+            self.assertRtolEqual(cpu_output, npu_output)
+
+    def test_reflection_pad1d_out_shape_format_fp32(self, device):
+        shape_format = [
+            [[np.float32, 0, (1, 2, 4)], [3, 1]],
+            [[np.float32, 2, (1, 2, 4)], [3, 1]]
+        ]
+
+        for item in shape_format:
+            cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100)
+            cpuout = torch.randn(1, 3, 3)
+            npuout = cpuout.to(npu_input1.dtype).npu()
+            cpu_output = self.cpu_op_out_exec(cpu_input1, item[1], cpuout)
+            npu_output = self.npu_op_out_exec(npu_input1, item[1], npuout)
+            self.assertRtolEqual(cpu_output, npu_output)
+
+    def test_reflection_pad1d_shape_format_fp16(self, device):
+        shape_format = [
+            [[np.float16, 0, (2, 10, 12)], [4, 3]],
+            [[np.float16, 3, (2, 10, 12)], [4, 3]]
+        ]
+
+        def cpu_op_exec_fp16(input1, pad):
+            input1 = input1.to(torch.float32)
+            m = torch.nn.ReflectionPad1d(pad)
+            output = m(input1)
+            output = output.numpy()
+            output = output.astype(np.float16)
+            return output
+
+        for item in shape_format:
+            cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100)
+            cpu_output = cpu_op_exec_fp16(cpu_input1, item[1])
+            npu_output = self.npu_op_exec(npu_input1, item[1])
+            self.assertRtolEqual(cpu_output, npu_output)
+
+    def test_reflection_pad1d_shape_format_fp32(self, device):
+        shape_format = [
+            [[np.float32, 2, (2, 10, 12)], [4, 3]],
+            [[np.float32, 2, (2, 10, 12)], [4, 3]]
+        ]
+
+        for item in shape_format:
+            cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100)
+            cpu_output = self.cpu_op_exec(cpu_input1, item[1])
+            npu_output = self.npu_op_exec(npu_input1, item[1])
+            self.assertRtolEqual(cpu_output, npu_output)
+
+instantiate_device_type_tests(TestReflectionPad1d, globals(), except_for="cpu")
+if __name__ == "__main__":
+    run_tests()
\ No newline at end of file
diff --git a/torch_npu/csrc/aten/ops/ReflectionPad1dKernelNpu.cpp b/torch_npu/csrc/aten/ops/ReflectionPad1dKernelNpu.cpp
new file mode 100644
index 0000000000..0e33c3b20e
--- /dev/null
+++ b/torch_npu/csrc/aten/ops/ReflectionPad1dKernelNpu.cpp
@@ -0,0 +1,109 @@
+// Copyright (c) 2020 Huawei Technologies Co., Ltd
+// Copyright (c) 2019, Facebook CORPORATION.
+// All rights reserved.
+//
+// Licensed under the BSD 3-Clause License  (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "torch_npu/csrc/framework/utils/OpAdapter.h"
+#include "torch_npu/csrc/framework/utils/CalcuOpUtil.h"
+#include "torch_npu/csrc/aten/NPUNativeFunctions.h"
+
+namespace at_npu {
+namespace native {
+
+c10::SmallVector<int64_t, SIZE> reflection_pad1d_npu_output_size(const at::Tensor& self, at::IntArrayRef padding) {
+  int64_t N = self.size(0);
+  int64_t C = self.size(1);
+  int64_t H = self.size(2);
+  int64_t W = self.size(3);
+  int64_t padding_l = 0;
+  int64_t padding_r = 0;
+  int64_t padding_t = 0;
+  int64_t padding_b = 0;
+
+  padding_l = padding[0];
+  padding_r = padding[1];
+  padding_t = padding[2];
+  padding_b = padding[3];
+
+  int64_t Wo = W +  padding_l + padding_r;
+
+  c10::SmallVector<int64_t, SIZE> outputSize = {N, C, H, Wo};
+  return outputSize;
+}
+
+at::Tensor& reflection_pad1d_out_npu_nocheck(at::Tensor& out, const at::Tensor& self, at::IntArrayRef padding) {
+  TORCH_CHECK(padding.size() == 4, "padding size is expected to be 4");
+  c10::SmallVector<int64_t, N> vectorInt;
+  c10::SmallVector<int64_t, N> paddingsVector = array_to_small_vector(padding);
+  paddingsVector.resize(2 * self.dim(), 0);
+  for (int64_t i = paddingsVector.size(); i > 1; i -= 2) {
+    vectorInt.emplace_back(paddingsVector[i - 2]);
+    vectorInt.emplace_back(paddingsVector[i - 1]);
+ }
+
+  c10::SmallVector<int64_t, N> value_tensor = {(int64_t)0};
+  OpCommand cmd;
+  if(self.dtype() == at::kHalf) {
+    cmd.Name("PadV3")
+    .Input(self)
+    .Input(vectorInt, at::kInt)
+    .Input(value_tensor, self.scalar_type())
+    .Output(out)
+    .Attr("mode", (string)"reflect")
+    .Attr("paddings_contiguous", true)
+    .Run();
+  } else {
+    cmd.Name("MirrorPad")
+    .Input(self)
+    .Input(vectorInt, at::kInt)
+    .Output(out)
+    .Attr("mode", (string)"REFLECT")
+    .Run();
+  }
+  return out;
+}
+
+at::Tensor& NPUNativeFunctions::reflection_pad1d_out(
+    const at::Tensor& self,
+    at::IntArrayRef padding,
+    at::Tensor& result){
+  c10::SmallVector<int64_t, N> paddings = {padding[0], padding[1], 0, 0};
+  at::Tensor selfCopy = self;
+  selfCopy = selfCopy.unsqueeze(0);
+
+  auto outputSize = reflection_pad1d_npu_output_size(selfCopy, paddings);
+  OpPreparation::CheckOut(
+      {selfCopy},
+      result,
+      selfCopy,
+      outputSize);
+  reflection_pad1d_out_npu_nocheck(result, selfCopy, paddings);
+  result = result.squeeze(0);
+  return result;
+}
+
+at::Tensor NPUNativeFunctions::reflection_pad1d(const at::Tensor& self, at::IntArrayRef padding) {
+  c10::SmallVector<int64_t, N> paddings = {padding[0], padding[1], 0, 0};
+  at::Tensor selfCopy = self;
+  selfCopy = selfCopy.unsqueeze(0);
+
+  auto outputSize = reflection_pad1d_npu_output_size(selfCopy, paddings);
+  at::Tensor out = OpPreparation::ApplyTensor(selfCopy, outputSize);
+  reflection_pad1d_out_npu_nocheck(out, selfCopy, paddings);
+  out = out.squeeze(0);
+  return out;
+}
+
+} // namespace native
+} // namespace at_npu
\ No newline at end of file
-- 
Gitee