From 4f17e7d6ca7c8a74471dfd0371c78df0e5568fa1 Mon Sep 17 00:00:00 2001 From: wangxiao Date: Mon, 21 Feb 2022 19:48:30 +0800 Subject: [PATCH 1/2] gelu_backward --- test/test_network_ops/test_gelu_backward.py | 77 +++++++++++++++++++ .../csrc/aten/ops/GeluBackwardKernelNpu.cpp | 47 +++++++++++ 2 files changed, 124 insertions(+) create mode 100644 test/test_network_ops/test_gelu_backward.py create mode 100644 torch_npu/csrc/aten/ops/GeluBackwardKernelNpu.cpp diff --git a/test/test_network_ops/test_gelu_backward.py b/test/test_network_ops/test_gelu_backward.py new file mode 100644 index 0000000000..89d367df6c --- /dev/null +++ b/test/test_network_ops/test_gelu_backward.py @@ -0,0 +1,77 @@ +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy +import torch +import torch_npu +import numpy as np + +from torch_npu.testing.common_utils import TestCase, run_tests +from torch_npu.testing.common_device_type import Dtypes, instantiate_device_type_tests +from torch_npu.testing.util_test import create_common_tensor + +class TestGeluBackward(TestCase): + def generate_single_data(self, min_val, max_val, shape, dtype): + input1 = np.random.uniform(min_val, max_val, shape).astype(dtype) + npu_input1 = torch.from_numpy(input1) + return npu_input1 + + def cpu_op_exec(self, input1): + input1.requires_grad_(True) + output = torch.nn.functional.gelu(input1) + z = output.sum() + z.backward() + res = input1.grad + return res.detach().numpy() + + def npu_op_exec(self, input1): + input1 = input1.to("npu") + input1.requires_grad = True + output = torch.nn.functional.gelu(input1) + z = output.sum() + z.backward() + res = input1.grad.to("cpu") + return res.detach().numpy() + + def test_gelu_backward_float32_1(self, device): + input1= self.generate_single_data(0, 100, (4, 3, 1, 1), np.float32) + cpu_input1 = copy.deepcopy(input1) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_gelu_backward_float32_2(self, device): + input1= self.generate_single_data(0, 100, (15, 3, 1), np.float32) + cpu_input1 = copy.deepcopy(input1) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_gelu_backward_float32_3(self, device): + input1= self.generate_single_data(0, 100, (4, 4), np.float32) + cpu_input1 = copy.deepcopy(input1) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_gelu_backward_float16(self, device): + input1 = self.generate_single_data(0, 100, (5, 10, 100), np.float16) + cpu_input1 = input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1) + cpu_output = cpu_output.astype(np.float16) + npu_output = self.npu_op_exec(input1) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestGeluBackward, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/torch_npu/csrc/aten/ops/GeluBackwardKernelNpu.cpp b/torch_npu/csrc/aten/ops/GeluBackwardKernelNpu.cpp new file mode 100644 index 0000000000..0e6c809960 --- /dev/null +++ b/torch_npu/csrc/aten/ops/GeluBackwardKernelNpu.cpp @@ -0,0 +1,47 @@ +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "torch_npu/csrc/framework/utils/CalcuOpUtil.h" +#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "torch_npu/csrc/aten/NPUNativeFunctions.h" + +namespace at_npu { +namespace native { + +at::Tensor& gelu_backward_out_npu_nocheck( + at::Tensor& grad_input, + const at::Tensor& grad, + const at::Tensor& self) { + at::Tensor unused = grad; + OpCommand cmd; + cmd.Name("GeluGrad") + .Input(grad) + .Input(self) + .Input(unused) + .Output(grad_input) + .Run(); + + return grad_input; +} + +at::Tensor NPUNativeFunctions::gelu_backward( + const at::Tensor& grad, + const at::Tensor& self) { + at::Tensor grad_input = OpPreparation::ApplyTensor(self); + gelu_backward_out_npu_nocheck(grad_input, grad, self); + return grad_input; +} + +} // namespace native +} // namespace at_npu \ No newline at end of file -- Gitee From b372e5fd21d279b97337090e8fbe4aeb9ca18d4d Mon Sep 17 00:00:00 2001 From: wangxiao Date: Tue, 22 Feb 2022 10:43:41 +0800 Subject: [PATCH 2/2] fix ut of gelu_backward --- test/test_network_ops/test_gelu_backward.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_network_ops/test_gelu_backward.py b/test/test_network_ops/test_gelu_backward.py index 89d367df6c..4952e4f602 100644 --- a/test/test_network_ops/test_gelu_backward.py +++ b/test/test_network_ops/test_gelu_backward.py @@ -44,21 +44,21 @@ class TestGeluBackward(TestCase): return res.detach().numpy() def test_gelu_backward_float32_1(self, device): - input1= self.generate_single_data(0, 100, (4, 3, 1, 1), np.float32) + input1 = self.generate_single_data(0, 100, (4, 3, 1, 1), np.float32) cpu_input1 = copy.deepcopy(input1) cpu_output = self.cpu_op_exec(cpu_input1) npu_output = self.npu_op_exec(input1) self.assertRtolEqual(cpu_output, npu_output) def test_gelu_backward_float32_2(self, device): - input1= self.generate_single_data(0, 100, (15, 3, 1), np.float32) + input1 = self.generate_single_data(0, 100, (15, 3, 1), np.float32) cpu_input1 = copy.deepcopy(input1) cpu_output = self.cpu_op_exec(cpu_input1) npu_output = self.npu_op_exec(input1) self.assertRtolEqual(cpu_output, npu_output) def test_gelu_backward_float32_3(self, device): - input1= self.generate_single_data(0, 100, (4, 4), np.float32) + input1 = self.generate_single_data(0, 100, (4, 4), np.float32) cpu_input1 = copy.deepcopy(input1) cpu_output = self.cpu_op_exec(cpu_input1) npu_output = self.npu_op_exec(input1) -- Gitee