From 30c182449e6842a0f6df0cf8f5f6d428a275972e Mon Sep 17 00:00:00 2001 From: hxf12345677 Date: Mon, 21 Feb 2022 13:51:37 +0800 Subject: [PATCH] =?UTF-8?q?reflection=5Fpad1c1.8.1=E7=AE=97=E5=AD=90?= =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test_network_ops/test_reflection_pad1d.py | 117 ++++++++++++++++++ .../aten/ops/ReflectionPad1dKernelNpu.cpp | 109 ++++++++++++++++ 2 files changed, 226 insertions(+) create mode 100644 test/test_network_ops/test_reflection_pad1d.py create mode 100644 torch_npu/csrc/aten/ops/ReflectionPad1dKernelNpu.cpp diff --git a/test/test_network_ops/test_reflection_pad1d.py b/test/test_network_ops/test_reflection_pad1d.py new file mode 100644 index 0000000000..13a6ddceb6 --- /dev/null +++ b/test/test_network_ops/test_reflection_pad1d.py @@ -0,0 +1,117 @@ +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch_npu +import numpy as np + +from torch_npu.testing.common_utils import TestCase, run_tests +from torch_npu.testing.common_device_type import instantiate_device_type_tests +from torch_npu.testing.util_test import create_common_tensor + +class TestReflectionPad1d(TestCase): + def cpu_op_out_exec(self, input1, pad, output): + m = torch._C._nn.reflection_pad1d(input1, pad, out=output) + m = m.numpy() + return m + + def npu_op_out_exec(self, input1, pad, output): + m_n = torch._C._nn.reflection_pad1d(input1, pad, out=output) + m_n = m_n.to("cpu") + m_n = m_n.numpy() + return m_n + + def cpu_op_exec(self, input1, pad): + m = torch.nn.ReflectionPad1d(pad) + output = m(input1) + output = output.numpy() + return output + + def npu_op_exec(self, input1, pad): + m = torch.nn.ReflectionPad1d(pad) + output = m(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def test_reflection_pad1d_out_shape_format_fp16(self, device): + shape_format = [ + [[np.float16, 2, (1, 2, 4)], [3, 1]], + [[np.float16, 3, (1, 2, 4)], [3, 1]] + ] + + def cpu_op_out_exec_fp16(input1, pad, output): + input1 = input1.to(torch.float32) + m = torch._C._nn.reflection_pad1d(input1, pad, out=output) + m = m.numpy() + m = m.astype(np.float16) + return m + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpuout = torch.randn(1, 3, 3) + npuout = cpuout.to(npu_input1.dtype).npu() + cpu_output = cpu_op_out_exec_fp16(cpu_input1, item[1], cpuout) + npu_output = self.npu_op_out_exec(npu_input1, item[1], npuout) + self.assertRtolEqual(cpu_output, npu_output) + + def test_reflection_pad1d_out_shape_format_fp32(self, device): + shape_format = [ + [[np.float32, 0, (1, 2, 4)], [3, 1]], + [[np.float32, 2, (1, 2, 4)], [3, 1]] + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpuout = torch.randn(1, 3, 3) + npuout = cpuout.to(npu_input1.dtype).npu() + cpu_output = self.cpu_op_out_exec(cpu_input1, item[1], cpuout) + npu_output = self.npu_op_out_exec(npu_input1, item[1], npuout) + self.assertRtolEqual(cpu_output, npu_output) + + def test_reflection_pad1d_shape_format_fp16(self, device): + shape_format = [ + [[np.float16, 0, (2, 10, 12)], [4, 3]], + [[np.float16, 3, (2, 10, 12)], [4, 3]] + ] + + def cpu_op_exec_fp16(input1, pad): + input1 = input1.to(torch.float32) + m = torch.nn.ReflectionPad1d(pad) + output = m(input1) + output = output.numpy() + output = output.astype(np.float16) + return output + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_output = cpu_op_exec_fp16(cpu_input1, item[1]) + npu_output = self.npu_op_exec(npu_input1, item[1]) + self.assertRtolEqual(cpu_output, npu_output) + + def test_reflection_pad1d_shape_format_fp32(self, device): + shape_format = [ + [[np.float32, 2, (2, 10, 12)], [4, 3]], + [[np.float32, 2, (2, 10, 12)], [4, 3]] + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_output = self.cpu_op_exec(cpu_input1, item[1]) + npu_output = self.npu_op_exec(npu_input1, item[1]) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestReflectionPad1d, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() \ No newline at end of file diff --git a/torch_npu/csrc/aten/ops/ReflectionPad1dKernelNpu.cpp b/torch_npu/csrc/aten/ops/ReflectionPad1dKernelNpu.cpp new file mode 100644 index 0000000000..0e33c3b20e --- /dev/null +++ b/torch_npu/csrc/aten/ops/ReflectionPad1dKernelNpu.cpp @@ -0,0 +1,109 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "torch_npu/csrc/framework/utils/CalcuOpUtil.h" +#include "torch_npu/csrc/aten/NPUNativeFunctions.h" + +namespace at_npu { +namespace native { + +c10::SmallVector reflection_pad1d_npu_output_size(const at::Tensor& self, at::IntArrayRef padding) { + int64_t N = self.size(0); + int64_t C = self.size(1); + int64_t H = self.size(2); + int64_t W = self.size(3); + int64_t padding_l = 0; + int64_t padding_r = 0; + int64_t padding_t = 0; + int64_t padding_b = 0; + + padding_l = padding[0]; + padding_r = padding[1]; + padding_t = padding[2]; + padding_b = padding[3]; + + int64_t Wo = W + padding_l + padding_r; + + c10::SmallVector outputSize = {N, C, H, Wo}; + return outputSize; +} + +at::Tensor& reflection_pad1d_out_npu_nocheck(at::Tensor& out, const at::Tensor& self, at::IntArrayRef padding) { + TORCH_CHECK(padding.size() == 4, "padding size is expected to be 4"); + c10::SmallVector vectorInt; + c10::SmallVector paddingsVector = array_to_small_vector(padding); + paddingsVector.resize(2 * self.dim(), 0); + for (int64_t i = paddingsVector.size(); i > 1; i -= 2) { + vectorInt.emplace_back(paddingsVector[i - 2]); + vectorInt.emplace_back(paddingsVector[i - 1]); + } + + c10::SmallVector value_tensor = {(int64_t)0}; + OpCommand cmd; + if(self.dtype() == at::kHalf) { + cmd.Name("PadV3") + .Input(self) + .Input(vectorInt, at::kInt) + .Input(value_tensor, self.scalar_type()) + .Output(out) + .Attr("mode", (string)"reflect") + .Attr("paddings_contiguous", true) + .Run(); + } else { + cmd.Name("MirrorPad") + .Input(self) + .Input(vectorInt, at::kInt) + .Output(out) + .Attr("mode", (string)"REFLECT") + .Run(); + } + return out; +} + +at::Tensor& NPUNativeFunctions::reflection_pad1d_out( + const at::Tensor& self, + at::IntArrayRef padding, + at::Tensor& result){ + c10::SmallVector paddings = {padding[0], padding[1], 0, 0}; + at::Tensor selfCopy = self; + selfCopy = selfCopy.unsqueeze(0); + + auto outputSize = reflection_pad1d_npu_output_size(selfCopy, paddings); + OpPreparation::CheckOut( + {selfCopy}, + result, + selfCopy, + outputSize); + reflection_pad1d_out_npu_nocheck(result, selfCopy, paddings); + result = result.squeeze(0); + return result; +} + +at::Tensor NPUNativeFunctions::reflection_pad1d(const at::Tensor& self, at::IntArrayRef padding) { + c10::SmallVector paddings = {padding[0], padding[1], 0, 0}; + at::Tensor selfCopy = self; + selfCopy = selfCopy.unsqueeze(0); + + auto outputSize = reflection_pad1d_npu_output_size(selfCopy, paddings); + at::Tensor out = OpPreparation::ApplyTensor(selfCopy, outputSize); + reflection_pad1d_out_npu_nocheck(out, selfCopy, paddings); + out = out.squeeze(0); + return out; +} + +} // namespace native +} // namespace at_npu \ No newline at end of file -- Gitee