From 401877015fe33cb0f99395bf836d5cc2afc01ad6 Mon Sep 17 00:00:00 2001 From: XXX Date: Tue, 8 Feb 2022 17:27:46 +0800 Subject: [PATCH 01/12] =?UTF-8?q?maskedSelect=E7=AE=97=E5=AD=90=E8=BF=81?= =?UTF-8?q?=E7=A7=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/test_network_ops/test_masked_select.py | 138 ++++++++++++++++++ .../csrc/aten/ops/MaskedSelectKernelNpu.cpp | 107 ++++++++++++++ 2 files changed, 245 insertions(+) create mode 100644 test/test_network_ops/test_masked_select.py create mode 100644 torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp diff --git a/test/test_network_ops/test_masked_select.py b/test/test_network_ops/test_masked_select.py new file mode 100644 index 0000000000..f5aeaf16f5 --- /dev/null +++ b/test/test_network_ops/test_masked_select.py @@ -0,0 +1,138 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import torch +import torch_npu +import numpy as np + +from torch_npu.testing.common_utils import TestCase, run_tests +from torch_npu.testing.common_device_type import instantiate_device_type_tests +from torch_npu.testing.util_test import create_common_tensor + +class TestMaskedSelect(TestCase): + def cpu_op_exec(self, input, mask): + output = torch.masked_select(input, mask) + output = output.numpy() + return output + + def npu_op_exec(self, input, mask): + mask = mask.to("npu") + output = torch.masked_select(input, mask) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, input, mask, output): + output = torch.masked_select(input, mask, out=output) + return output.detach().to("cpu").numpy() + + def test_maskedselect_out_result(self, device): + shape_format = [ + [[np.float16, 2, [15, 15, 15, 16]], [np.float16, 2, [15, 15, 15, 16]]], + [[np.float16, 2, [15, 15, 15, 16]], [np.float16, 2, [3, 3, 7, 7]]], + [[np.float16, 0, [15, 15, 15, 16]], [np.float16, 0, [15, 15, 15, 16]]], + [[np.float16, 0, [15, 15, 15, 16]], [np.float16, 0, [116, 116, 1, 1]]], + [[np.float32, 2, [15, 15, 15, 16]], [np.float32, 2, [15, 15, 15, 16]]], + [[np.float32, 2, [15, 15, 15, 16]], [np.float32, 2, [3, 3, 7, 7]]], + [[np.float32, 0, [15, 15, 15, 16]], [np.float32, 0, [15, 15, 15, 16]]], + [[np.float32, 0, [15, 15, 15, 16]], [np.float32, 0, [232, 232, 1, 1]]], + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -2, 2) + cpu_input2, npu_input2 = create_common_tensor(item[0], -2, 2) + cpu_input3, npu_input3 = create_common_tensor(item[1], -2, 2) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2.to(torch.int32)>0) + npu_output = self.npu_op_exec_out(npu_input1, npu_input2.to(torch.int32)>0, npu_input3) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_maskedselect_shape_format_maskdiff(self, device): + dtype_list = [np.int64, np.int32, np.float32] + format_list = [0] + shape_list = [[3, 4, 5]] + shape_format = [ + [i, j, k] for i in dtype_list for j in format_list for k in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + mask_cpu, mask_npu = create_common_tensor((np.int32, 0, (3, 4, 1)), 0, 100) + cpu_output = self.cpu_op_exec(cpu_input, mask_cpu > 50) + npu_output = self.npu_op_exec(npu_input, mask_npu > 50) + self.assertRtolEqual(cpu_output, npu_output) + + def test_maskedselect_shape_format_fp32(self, device): + format_list = [0, 3] + shape_list = [[3, 4, 5]] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + mask = torch.tensor([[ + [ True, False, True, True, False], + [ True, False, False, True, False], + [False, False, False, False, False], + [ True, False, False, False, False]], + + [[ True, False, False, False, True], + [False, True, False, True, True], + [False, True, False, True, True], + [False, False, False, False, False]], + + [[False, True, True, False, True], + [False, True, True, True, True], + [False, True, False, True, False], + [False, True, True, False, False]]]) + + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_output = self.cpu_op_exec(cpu_input, mask) + npu_output = self.npu_op_exec(npu_input, mask) + self.assertRtolEqual(cpu_output, npu_output) + + def test_maskedselect_shape_format_int(self, device): + dtype_list = [np.int32, np.int64] + format_list = [0] + shape_list = [[3, 4, 5]] + shape_format = [ + [i, j, k] for i in dtype_list for j in format_list for k in shape_list + ] + mask = torch.tensor([[ + [ True, False, True, True, False], + [ True, False, False, True, False], + [False, False, False, False, False], + [ True, False, False, False, False]], + + [[ True, False, False, False, True], + [False, True, False, True, True], + [False, True, False, True, True], + [False, False, False, False, False]], + + [[False, True, True, False, True], + [False, True, True, True, True], + [False, True, False, True, False], + [False, True, True, False, False]]]) + + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_output = self.cpu_op_exec(cpu_input, mask) + npu_output = self.npu_op_exec(npu_input, mask) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestMaskedSelect, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() \ No newline at end of file diff --git a/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp b/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp new file mode 100644 index 0000000000..508ba1c1bb --- /dev/null +++ b/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp @@ -0,0 +1,107 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "torch_npu/csrc/aten/NPUNativeFunctions.h" + +namespace at_npu { +namespace native { + +at::SmallVector masked_select_npu_output_size( + const at::Tensor& self, + const at::Tensor& mask) { + int64_t shape; + shape = mask.sum().item().toInt(); + return {shape}; +} + +at::Tensor& masked_select_out_npu_nocheck( + at::Tensor& result, + const at::Tensor& self, + const at::Tensor& mask) { + at::Tensor maskBool = mask; + if (!(mask.dtype() == at::kBool)) { + maskBool = mask.to(at::kBool); + } + + OpCommand cmd; + cmd.Name("MaskedSelect") + .Input(self) + .Input(maskBool) + .Output(result) + .Run(); + + return result; +} + +at::Tensor& NPUNativeFunctions::masked_select_out( + const at::Tensor& self, + const at::Tensor& mask, + at::Tensor& result) { + at::Tensor dtypeCastOfSelf = self; + at::Tensor maskCast = mask; + if (maskCast.sizes() != dtypeCastOfSelf.sizes()) { + maskCast = NPUNativeFunctions::npu_broadcast(mask, dtypeCastOfSelf.sizes()); + } + if (dtypeCastOfSelf.scalar_type() == ScalarType::Half) { + dtypeCastOfSelf = NPUNativeFunctions::npu_dtype_cast(dtypeCastOfSelf, at::ScalarType::Float); + result = result.to(ScalarType::Float); + } + auto outputSize = masked_select_npu_output_size(dtypeCastOfSelf, maskCast); + + OpPreparation::CheckOut( + {dtypeCastOfSelf}, + result, + dtypeCastOfSelf, + outputSize); + + OpPipeWithDefinedOut pipe; + result = pipe.CheckMemory({dtypeCastOfSelf, maskCast}, {result}) + .Func([&dtypeCastOfSelf, &maskCast](at::Tensor& result) + {masked_select_out_npu_nocheck(result, dtypeCastOfSelf, maskCast);}) + .Call(result); + + if (result.scalar_type() != self.scalar_type()) { + result = result.npu_dtype_cast(ScalarType::Half); + } + return result; +} + +at::Tensor NPUNativeFunctions::masked_select( + const at::Tensor& self, + const at::Tensor& mask) { + at::Tensor dtypeCastOfSelf = self; + at::Tensor maskCast = mask; + if (maskCast.sizes() != dtypeCastOfSelf.sizes()) { + maskCast = NPUNativeFunctions::npu_broadcast(mask, dtypeCastOfSelf.sizes()); + } + if (dtypeCastOfSelf.scalar_type() == ScalarType::Half) { + dtypeCastOfSelf = NPUNativeFunctions::npu_dtype_cast(dtypeCastOfSelf, at::ScalarType::Float); + } + auto outputSize = masked_select_npu_output_size(dtypeCastOfSelf, maskCast); + + at::Tensor result = OpPreparation::ApplyTensor(dtypeCastOfSelf, outputSize); + + masked_select_out_npu_nocheck(result, dtypeCastOfSelf, maskCast); + + if (result.scalar_type() != self.scalar_type()) { + result = NPUNativeFunctions::npu_dtype_cast(result, at::ScalarType::Half); + } + return result; +} + +} // namespace native +} // namespace at_npu -- Gitee From bd0acebea0d4261838a3c15b2d4da31c84347b1b Mon Sep 17 00:00:00 2001 From: XXX Date: Tue, 8 Feb 2022 18:42:11 +0800 Subject: [PATCH 02/12] code fix --- torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp b/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp index 508ba1c1bb..7f8cbab6da 100644 --- a/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp +++ b/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp @@ -34,7 +34,7 @@ at::Tensor& masked_select_out_npu_nocheck( const at::Tensor& mask) { at::Tensor maskBool = mask; if (!(mask.dtype() == at::kBool)) { - maskBool = mask.to(at::kBool); + maskBool = NPUNativeFunctions::npu_dtype_cast(mask, at::kBool); } OpCommand cmd; @@ -56,9 +56,9 @@ at::Tensor& NPUNativeFunctions::masked_select_out( if (maskCast.sizes() != dtypeCastOfSelf.sizes()) { maskCast = NPUNativeFunctions::npu_broadcast(mask, dtypeCastOfSelf.sizes()); } - if (dtypeCastOfSelf.scalar_type() == ScalarType::Half) { + if (dtypeCastOfSelf.scalar_type() == at::ScalarType::Half) { dtypeCastOfSelf = NPUNativeFunctions::npu_dtype_cast(dtypeCastOfSelf, at::ScalarType::Float); - result = result.to(ScalarType::Float); + result = NPUNativeFunctions::npu_dtype_cast(result, at::ScalarType::Float); } auto outputSize = masked_select_npu_output_size(dtypeCastOfSelf, maskCast); @@ -75,7 +75,7 @@ at::Tensor& NPUNativeFunctions::masked_select_out( .Call(result); if (result.scalar_type() != self.scalar_type()) { - result = result.npu_dtype_cast(ScalarType::Half); + result = result.npu_dtype_cast(at::ScalarType::Half); } return result; } @@ -88,7 +88,7 @@ at::Tensor NPUNativeFunctions::masked_select( if (maskCast.sizes() != dtypeCastOfSelf.sizes()) { maskCast = NPUNativeFunctions::npu_broadcast(mask, dtypeCastOfSelf.sizes()); } - if (dtypeCastOfSelf.scalar_type() == ScalarType::Half) { + if (dtypeCastOfSelf.scalar_type() == at::ScalarType::Half) { dtypeCastOfSelf = NPUNativeFunctions::npu_dtype_cast(dtypeCastOfSelf, at::ScalarType::Float); } auto outputSize = masked_select_npu_output_size(dtypeCastOfSelf, maskCast); -- Gitee From f91585826daf0dc8b899885b50dbf425b9ab7a27 Mon Sep 17 00:00:00 2001 From: XXX Date: Tue, 8 Feb 2022 18:54:15 +0800 Subject: [PATCH 03/12] code fix2 --- torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp b/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp index 7f8cbab6da..46a21a0a55 100644 --- a/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp +++ b/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp @@ -75,7 +75,7 @@ at::Tensor& NPUNativeFunctions::masked_select_out( .Call(result); if (result.scalar_type() != self.scalar_type()) { - result = result.npu_dtype_cast(at::ScalarType::Half); + result = NPUNativeFunctions::npu_dtype_cast(result, at::ScalarType::Half); } return result; } -- Gitee From 02c71229b8dceb76d5348e07256cb4132dcda5e0 Mon Sep 17 00:00:00 2001 From: XXX Date: Tue, 8 Feb 2022 19:02:19 +0800 Subject: [PATCH 04/12] fix code check --- test/test_network_ops/test_masked_select.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/test_network_ops/test_masked_select.py b/test/test_network_ops/test_masked_select.py index f5aeaf16f5..f741d72222 100644 --- a/test/test_network_ops/test_masked_select.py +++ b/test/test_network_ops/test_masked_select.py @@ -24,20 +24,20 @@ from torch_npu.testing.common_device_type import instantiate_device_type_tests from torch_npu.testing.util_test import create_common_tensor class TestMaskedSelect(TestCase): - def cpu_op_exec(self, input, mask): - output = torch.masked_select(input, mask) + def cpu_op_exec(self, input1, mask): + output = torch.masked_select(input1, mask) output = output.numpy() return output - def npu_op_exec(self, input, mask): + def npu_op_exec(self, input1, mask): mask = mask.to("npu") - output = torch.masked_select(input, mask) + output = torch.masked_select(input1, mask) output = output.to("cpu") output = output.numpy() return output - def npu_op_exec_out(self, input, mask, output): - output = torch.masked_select(input, mask, out=output) + def npu_op_exec_out(self, input1, mask, output): + output = torch.masked_select(input1, mask, out=output) return output.detach().to("cpu").numpy() def test_maskedselect_out_result(self, device): @@ -57,8 +57,8 @@ class TestMaskedSelect(TestCase): cpu_input3, npu_input3 = create_common_tensor(item[1], -2, 2) if cpu_input1.dtype == torch.float16: cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2.to(torch.int32)>0) - npu_output = self.npu_op_exec_out(npu_input1, npu_input2.to(torch.int32)>0, npu_input3) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2.to(torch.int32) > 0) + npu_output = self.npu_op_exec_out(npu_input1, npu_input2.to(torch.int32) > 0, npu_input3) cpu_output = cpu_output.astype(npu_output.dtype) self.assertRtolEqual(cpu_output, npu_output) -- Gitee From 5ae1383a4817dca1939bfba68f9e87be2fec504d Mon Sep 17 00:00:00 2001 From: shenpengcheng Date: Tue, 8 Feb 2022 19:22:30 +0800 Subject: [PATCH 05/12] code fix3 --- test/test_network_ops/test_masked_select.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_network_ops/test_masked_select.py b/test/test_network_ops/test_masked_select.py index f741d72222..69910ed236 100644 --- a/test/test_network_ops/test_masked_select.py +++ b/test/test_network_ops/test_masked_select.py @@ -22,7 +22,7 @@ import numpy as np from torch_npu.testing.common_utils import TestCase, run_tests from torch_npu.testing.common_device_type import instantiate_device_type_tests from torch_npu.testing.util_test import create_common_tensor - + class TestMaskedSelect(TestCase): def cpu_op_exec(self, input1, mask): output = torch.masked_select(input1, mask) -- Gitee From 32a8f42ba5bbe1cdec323f963647902f4821eda5 Mon Sep 17 00:00:00 2001 From: shenpengcheng Date: Wed, 26 Jan 2022 13:59:33 +0800 Subject: [PATCH 06/12] =?UTF-8?q?reflection=5Fpad2d=5Fbackward=E7=AE=97?= =?UTF-8?q?=E5=AD=90=E7=A7=BB=E6=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test_reflection_pad2d_backward.py | 73 +++++++++++++++++ .../ops/ReflectionPad2dBackwardKernelNpu.cpp | 81 +++++++++++++++++++ 2 files changed, 154 insertions(+) create mode 100644 test/test_network_ops/test_reflection_pad2d_backward.py create mode 100644 torch_npu/csrc/aten/ops/ReflectionPad2dBackwardKernelNpu.cpp diff --git a/test/test_network_ops/test_reflection_pad2d_backward.py b/test/test_network_ops/test_reflection_pad2d_backward.py new file mode 100644 index 0000000000..b9a8853c85 --- /dev/null +++ b/test/test_network_ops/test_reflection_pad2d_backward.py @@ -0,0 +1,73 @@ +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch_npu +import numpy as np + +from torch_npu.testing.common_utils import TestCase, run_tests +from torch_npu.testing.common_device_type import instantiate_device_type_tests +from torch_npu.testing.util_test import create_common_tensor + +class TestReflectionPad2dBackward(TestCase): + def cpu_op_exec(self, input1, pad): + m = torch.nn.ReflectionPad2d(pad) + input1.requires_grad = True + output = m(input1) + output.backward(torch.ones_like(output)) + input_grad = input1.grad + output = output.numpy() + input_grad = input_grad.numpy() + return output, input_grad + + def npu_op_exec(self, input1, pad): + m = torch.nn.ReflectionPad2d(pad).to("npu") + input1.requires_grad = True + output = m(input1) + output.backward(torch.ones_like(output)) + input_grad = input1.grad + output = output.to("cpu") + output = output.detach().numpy() + input_grad = input_grad.cpu().numpy() + return output, input_grad + + def test_reflectionPad2d_backward_shape_format_fp16(self, device): + shape_format = [ + [[np.float16, 0, (1, 1, 37, 37)], [2, 2, 2, 2]], + [[np.float16, 3, (1, 1, 4, 3)], 2], + [[np.float16, 0, (1, 1, 17, 17)], [1, 2, 2, 2]], + ] + + def cpu_op_exec_fp16(input1, pad): + input1 = input1.to(torch.float32) + input1.requires_grad = True + m = torch.nn.ReflectionPad2d(pad) + output = m(input1) + output.backward(torch.ones_like(output)) + output = output.detach().numpy() + input_grad = input1.grad + input_grad = input_grad.numpy().astype(np.float16) + output = output.astype(np.float16) + return output, input_grad + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_output, cpu_grad = cpu_op_exec_fp16(cpu_input1, item[1]) + npu_output, npu_grad = self.npu_op_exec(npu_input1, item[1]) + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_grad, npu_grad) + +instantiate_device_type_tests(TestReflectionPad2dBackward, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/torch_npu/csrc/aten/ops/ReflectionPad2dBackwardKernelNpu.cpp b/torch_npu/csrc/aten/ops/ReflectionPad2dBackwardKernelNpu.cpp new file mode 100644 index 0000000000..b0c2c3731c --- /dev/null +++ b/torch_npu/csrc/aten/ops/ReflectionPad2dBackwardKernelNpu.cpp @@ -0,0 +1,81 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "torch_npu/csrc/aten/NPUNativeFunctions.h" + +namespace at_npu { +namespace native { + +at::Tensor& reflection_pad2d_backward_out_npu_nocheck( + const at::Tensor& gradOutput, + const at::Tensor& input, + at::IntArrayRef padding, + at::Tensor& gradInput) { + TORCH_CHECK(input.scalar_type() != at::ScalarType::Float, + "PadV3Grad don't supports torch.float!"); + c10::SmallVector vectorInt; + c10::SmallVector paddingsVector = array_to_small_vector(padding); + paddingsVector.resize(2 * input.dim(), 0); + for (int64_t i = paddingsVector.size(); i > 0; i -= 2) { + vectorInt.emplace_back(paddingsVector[i - 2]); + vectorInt.emplace_back(paddingsVector[i - 1]); + } + OpCommand cmd; + cmd.Name("PadV3Grad") + .Input(gradOutput) + .Input(vectorInt, at::kInt) + .Output(gradInput) + .Attr("mode", (string)"reflect") + .Attr("paddings_contiguous", true) + .Run(); + return gradInput; +} + +at::Tensor& NPUNativeFunctions::reflection_pad2d_backward_out( + const at::Tensor& gradOutput, + const at::Tensor& input, + at::IntArrayRef padding, + at::Tensor& gradInput) { + OpPreparation::CheckOut( + {input, gradOutput}, + gradInput, + input); + OpPipeWithDefinedOut pipe; + return pipe.CheckMemory({input, gradOutput}, {gradInput}) + .Func([&gradOutput, &input, &padding](at::Tensor& gradInput) + {reflection_pad2d_backward_out_npu_nocheck( + gradOutput, + input, + padding, + gradInput);}) + .Call(gradInput); +} + +at::Tensor NPUNativeFunctions::reflection_pad2d_backward( + const at::Tensor& gradOutput, + const at::Tensor& input, + at::IntArrayRef padding) { + at::Tensor gradInput = OpPreparation::ApplyTensor(input); + reflection_pad2d_backward_out_npu_nocheck( + gradOutput, + input, + padding, + gradInput); + return gradInput; +} +} // namespace native +} // namespace at_npu -- Gitee From 4e4412492ccad2efd2ae9650dad05b3c6591d51c Mon Sep 17 00:00:00 2001 From: shenpengcheng Date: Tue, 8 Feb 2022 17:27:46 +0800 Subject: [PATCH 07/12] =?UTF-8?q?maskedSelect=E7=AE=97=E5=AD=90=E8=BF=81?= =?UTF-8?q?=E7=A7=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/test_network_ops/test_masked_select.py | 138 ++++++++++++++++++ .../csrc/aten/ops/MaskedSelectKernelNpu.cpp | 107 ++++++++++++++ 2 files changed, 245 insertions(+) create mode 100644 test/test_network_ops/test_masked_select.py create mode 100644 torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp diff --git a/test/test_network_ops/test_masked_select.py b/test/test_network_ops/test_masked_select.py new file mode 100644 index 0000000000..f5aeaf16f5 --- /dev/null +++ b/test/test_network_ops/test_masked_select.py @@ -0,0 +1,138 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import torch +import torch_npu +import numpy as np + +from torch_npu.testing.common_utils import TestCase, run_tests +from torch_npu.testing.common_device_type import instantiate_device_type_tests +from torch_npu.testing.util_test import create_common_tensor + +class TestMaskedSelect(TestCase): + def cpu_op_exec(self, input, mask): + output = torch.masked_select(input, mask) + output = output.numpy() + return output + + def npu_op_exec(self, input, mask): + mask = mask.to("npu") + output = torch.masked_select(input, mask) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, input, mask, output): + output = torch.masked_select(input, mask, out=output) + return output.detach().to("cpu").numpy() + + def test_maskedselect_out_result(self, device): + shape_format = [ + [[np.float16, 2, [15, 15, 15, 16]], [np.float16, 2, [15, 15, 15, 16]]], + [[np.float16, 2, [15, 15, 15, 16]], [np.float16, 2, [3, 3, 7, 7]]], + [[np.float16, 0, [15, 15, 15, 16]], [np.float16, 0, [15, 15, 15, 16]]], + [[np.float16, 0, [15, 15, 15, 16]], [np.float16, 0, [116, 116, 1, 1]]], + [[np.float32, 2, [15, 15, 15, 16]], [np.float32, 2, [15, 15, 15, 16]]], + [[np.float32, 2, [15, 15, 15, 16]], [np.float32, 2, [3, 3, 7, 7]]], + [[np.float32, 0, [15, 15, 15, 16]], [np.float32, 0, [15, 15, 15, 16]]], + [[np.float32, 0, [15, 15, 15, 16]], [np.float32, 0, [232, 232, 1, 1]]], + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -2, 2) + cpu_input2, npu_input2 = create_common_tensor(item[0], -2, 2) + cpu_input3, npu_input3 = create_common_tensor(item[1], -2, 2) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2.to(torch.int32)>0) + npu_output = self.npu_op_exec_out(npu_input1, npu_input2.to(torch.int32)>0, npu_input3) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_maskedselect_shape_format_maskdiff(self, device): + dtype_list = [np.int64, np.int32, np.float32] + format_list = [0] + shape_list = [[3, 4, 5]] + shape_format = [ + [i, j, k] for i in dtype_list for j in format_list for k in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + mask_cpu, mask_npu = create_common_tensor((np.int32, 0, (3, 4, 1)), 0, 100) + cpu_output = self.cpu_op_exec(cpu_input, mask_cpu > 50) + npu_output = self.npu_op_exec(npu_input, mask_npu > 50) + self.assertRtolEqual(cpu_output, npu_output) + + def test_maskedselect_shape_format_fp32(self, device): + format_list = [0, 3] + shape_list = [[3, 4, 5]] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + mask = torch.tensor([[ + [ True, False, True, True, False], + [ True, False, False, True, False], + [False, False, False, False, False], + [ True, False, False, False, False]], + + [[ True, False, False, False, True], + [False, True, False, True, True], + [False, True, False, True, True], + [False, False, False, False, False]], + + [[False, True, True, False, True], + [False, True, True, True, True], + [False, True, False, True, False], + [False, True, True, False, False]]]) + + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_output = self.cpu_op_exec(cpu_input, mask) + npu_output = self.npu_op_exec(npu_input, mask) + self.assertRtolEqual(cpu_output, npu_output) + + def test_maskedselect_shape_format_int(self, device): + dtype_list = [np.int32, np.int64] + format_list = [0] + shape_list = [[3, 4, 5]] + shape_format = [ + [i, j, k] for i in dtype_list for j in format_list for k in shape_list + ] + mask = torch.tensor([[ + [ True, False, True, True, False], + [ True, False, False, True, False], + [False, False, False, False, False], + [ True, False, False, False, False]], + + [[ True, False, False, False, True], + [False, True, False, True, True], + [False, True, False, True, True], + [False, False, False, False, False]], + + [[False, True, True, False, True], + [False, True, True, True, True], + [False, True, False, True, False], + [False, True, True, False, False]]]) + + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_output = self.cpu_op_exec(cpu_input, mask) + npu_output = self.npu_op_exec(npu_input, mask) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestMaskedSelect, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() \ No newline at end of file diff --git a/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp b/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp new file mode 100644 index 0000000000..508ba1c1bb --- /dev/null +++ b/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp @@ -0,0 +1,107 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "torch_npu/csrc/aten/NPUNativeFunctions.h" + +namespace at_npu { +namespace native { + +at::SmallVector masked_select_npu_output_size( + const at::Tensor& self, + const at::Tensor& mask) { + int64_t shape; + shape = mask.sum().item().toInt(); + return {shape}; +} + +at::Tensor& masked_select_out_npu_nocheck( + at::Tensor& result, + const at::Tensor& self, + const at::Tensor& mask) { + at::Tensor maskBool = mask; + if (!(mask.dtype() == at::kBool)) { + maskBool = mask.to(at::kBool); + } + + OpCommand cmd; + cmd.Name("MaskedSelect") + .Input(self) + .Input(maskBool) + .Output(result) + .Run(); + + return result; +} + +at::Tensor& NPUNativeFunctions::masked_select_out( + const at::Tensor& self, + const at::Tensor& mask, + at::Tensor& result) { + at::Tensor dtypeCastOfSelf = self; + at::Tensor maskCast = mask; + if (maskCast.sizes() != dtypeCastOfSelf.sizes()) { + maskCast = NPUNativeFunctions::npu_broadcast(mask, dtypeCastOfSelf.sizes()); + } + if (dtypeCastOfSelf.scalar_type() == ScalarType::Half) { + dtypeCastOfSelf = NPUNativeFunctions::npu_dtype_cast(dtypeCastOfSelf, at::ScalarType::Float); + result = result.to(ScalarType::Float); + } + auto outputSize = masked_select_npu_output_size(dtypeCastOfSelf, maskCast); + + OpPreparation::CheckOut( + {dtypeCastOfSelf}, + result, + dtypeCastOfSelf, + outputSize); + + OpPipeWithDefinedOut pipe; + result = pipe.CheckMemory({dtypeCastOfSelf, maskCast}, {result}) + .Func([&dtypeCastOfSelf, &maskCast](at::Tensor& result) + {masked_select_out_npu_nocheck(result, dtypeCastOfSelf, maskCast);}) + .Call(result); + + if (result.scalar_type() != self.scalar_type()) { + result = result.npu_dtype_cast(ScalarType::Half); + } + return result; +} + +at::Tensor NPUNativeFunctions::masked_select( + const at::Tensor& self, + const at::Tensor& mask) { + at::Tensor dtypeCastOfSelf = self; + at::Tensor maskCast = mask; + if (maskCast.sizes() != dtypeCastOfSelf.sizes()) { + maskCast = NPUNativeFunctions::npu_broadcast(mask, dtypeCastOfSelf.sizes()); + } + if (dtypeCastOfSelf.scalar_type() == ScalarType::Half) { + dtypeCastOfSelf = NPUNativeFunctions::npu_dtype_cast(dtypeCastOfSelf, at::ScalarType::Float); + } + auto outputSize = masked_select_npu_output_size(dtypeCastOfSelf, maskCast); + + at::Tensor result = OpPreparation::ApplyTensor(dtypeCastOfSelf, outputSize); + + masked_select_out_npu_nocheck(result, dtypeCastOfSelf, maskCast); + + if (result.scalar_type() != self.scalar_type()) { + result = NPUNativeFunctions::npu_dtype_cast(result, at::ScalarType::Half); + } + return result; +} + +} // namespace native +} // namespace at_npu -- Gitee From 1e31b603d362334dc534c5119a075b0a6c061f43 Mon Sep 17 00:00:00 2001 From: shenpengcheng Date: Tue, 8 Feb 2022 18:42:11 +0800 Subject: [PATCH 08/12] code fix --- torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp b/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp index 508ba1c1bb..7f8cbab6da 100644 --- a/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp +++ b/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp @@ -34,7 +34,7 @@ at::Tensor& masked_select_out_npu_nocheck( const at::Tensor& mask) { at::Tensor maskBool = mask; if (!(mask.dtype() == at::kBool)) { - maskBool = mask.to(at::kBool); + maskBool = NPUNativeFunctions::npu_dtype_cast(mask, at::kBool); } OpCommand cmd; @@ -56,9 +56,9 @@ at::Tensor& NPUNativeFunctions::masked_select_out( if (maskCast.sizes() != dtypeCastOfSelf.sizes()) { maskCast = NPUNativeFunctions::npu_broadcast(mask, dtypeCastOfSelf.sizes()); } - if (dtypeCastOfSelf.scalar_type() == ScalarType::Half) { + if (dtypeCastOfSelf.scalar_type() == at::ScalarType::Half) { dtypeCastOfSelf = NPUNativeFunctions::npu_dtype_cast(dtypeCastOfSelf, at::ScalarType::Float); - result = result.to(ScalarType::Float); + result = NPUNativeFunctions::npu_dtype_cast(result, at::ScalarType::Float); } auto outputSize = masked_select_npu_output_size(dtypeCastOfSelf, maskCast); @@ -75,7 +75,7 @@ at::Tensor& NPUNativeFunctions::masked_select_out( .Call(result); if (result.scalar_type() != self.scalar_type()) { - result = result.npu_dtype_cast(ScalarType::Half); + result = result.npu_dtype_cast(at::ScalarType::Half); } return result; } @@ -88,7 +88,7 @@ at::Tensor NPUNativeFunctions::masked_select( if (maskCast.sizes() != dtypeCastOfSelf.sizes()) { maskCast = NPUNativeFunctions::npu_broadcast(mask, dtypeCastOfSelf.sizes()); } - if (dtypeCastOfSelf.scalar_type() == ScalarType::Half) { + if (dtypeCastOfSelf.scalar_type() == at::ScalarType::Half) { dtypeCastOfSelf = NPUNativeFunctions::npu_dtype_cast(dtypeCastOfSelf, at::ScalarType::Float); } auto outputSize = masked_select_npu_output_size(dtypeCastOfSelf, maskCast); -- Gitee From 48b1374750a4167e6d2d70220365c0a642081d47 Mon Sep 17 00:00:00 2001 From: shenpengcheng Date: Tue, 8 Feb 2022 18:54:15 +0800 Subject: [PATCH 09/12] code fix2 --- torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp b/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp index 7f8cbab6da..46a21a0a55 100644 --- a/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp +++ b/torch_npu/csrc/aten/ops/MaskedSelectKernelNpu.cpp @@ -75,7 +75,7 @@ at::Tensor& NPUNativeFunctions::masked_select_out( .Call(result); if (result.scalar_type() != self.scalar_type()) { - result = result.npu_dtype_cast(at::ScalarType::Half); + result = NPUNativeFunctions::npu_dtype_cast(result, at::ScalarType::Half); } return result; } -- Gitee From 2a47e46dfc45f6ad60e79848d71bec496eb15315 Mon Sep 17 00:00:00 2001 From: shenpengcheng Date: Tue, 8 Feb 2022 19:02:19 +0800 Subject: [PATCH 10/12] fix code check --- test/test_network_ops/test_masked_select.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/test_network_ops/test_masked_select.py b/test/test_network_ops/test_masked_select.py index f5aeaf16f5..f741d72222 100644 --- a/test/test_network_ops/test_masked_select.py +++ b/test/test_network_ops/test_masked_select.py @@ -24,20 +24,20 @@ from torch_npu.testing.common_device_type import instantiate_device_type_tests from torch_npu.testing.util_test import create_common_tensor class TestMaskedSelect(TestCase): - def cpu_op_exec(self, input, mask): - output = torch.masked_select(input, mask) + def cpu_op_exec(self, input1, mask): + output = torch.masked_select(input1, mask) output = output.numpy() return output - def npu_op_exec(self, input, mask): + def npu_op_exec(self, input1, mask): mask = mask.to("npu") - output = torch.masked_select(input, mask) + output = torch.masked_select(input1, mask) output = output.to("cpu") output = output.numpy() return output - def npu_op_exec_out(self, input, mask, output): - output = torch.masked_select(input, mask, out=output) + def npu_op_exec_out(self, input1, mask, output): + output = torch.masked_select(input1, mask, out=output) return output.detach().to("cpu").numpy() def test_maskedselect_out_result(self, device): @@ -57,8 +57,8 @@ class TestMaskedSelect(TestCase): cpu_input3, npu_input3 = create_common_tensor(item[1], -2, 2) if cpu_input1.dtype == torch.float16: cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2.to(torch.int32)>0) - npu_output = self.npu_op_exec_out(npu_input1, npu_input2.to(torch.int32)>0, npu_input3) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2.to(torch.int32) > 0) + npu_output = self.npu_op_exec_out(npu_input1, npu_input2.to(torch.int32) > 0, npu_input3) cpu_output = cpu_output.astype(npu_output.dtype) self.assertRtolEqual(cpu_output, npu_output) -- Gitee From 4f526756de03766002cfc53fb4ade32d80f86d82 Mon Sep 17 00:00:00 2001 From: shenpengcheng Date: Tue, 8 Feb 2022 19:22:30 +0800 Subject: [PATCH 11/12] code fix3 --- test/test_network_ops/test_masked_select.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_network_ops/test_masked_select.py b/test/test_network_ops/test_masked_select.py index f741d72222..69910ed236 100644 --- a/test/test_network_ops/test_masked_select.py +++ b/test/test_network_ops/test_masked_select.py @@ -22,7 +22,7 @@ import numpy as np from torch_npu.testing.common_utils import TestCase, run_tests from torch_npu.testing.common_device_type import instantiate_device_type_tests from torch_npu.testing.util_test import create_common_tensor - + class TestMaskedSelect(TestCase): def cpu_op_exec(self, input1, mask): output = torch.masked_select(input1, mask) -- Gitee From 1556be2550121c984470dc17fa2b15ee6669872b Mon Sep 17 00:00:00 2001 From: shenpengcheng Date: Tue, 8 Feb 2022 19:56:23 +0800 Subject: [PATCH 12/12] cla fix --- test/test_network_ops/test_masked_select.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_network_ops/test_masked_select.py b/test/test_network_ops/test_masked_select.py index 69910ed236..f741d72222 100644 --- a/test/test_network_ops/test_masked_select.py +++ b/test/test_network_ops/test_masked_select.py @@ -22,7 +22,7 @@ import numpy as np from torch_npu.testing.common_utils import TestCase, run_tests from torch_npu.testing.common_device_type import instantiate_device_type_tests from torch_npu.testing.util_test import create_common_tensor - + class TestMaskedSelect(TestCase): def cpu_op_exec(self, input1, mask): output = torch.masked_select(input1, mask) -- Gitee