From 24b2ce1a5c4a6dd320d3b961c76d16fb4ed78364 Mon Sep 17 00:00:00 2001 From: xiaxia3 Date: Mon, 13 Jun 2022 14:44:51 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0diou=E5=92=8Cfuse=5Fadd=5Fsof?= =?UTF-8?q?tmax=5Fdropout=E7=94=A8=E4=BE=8B=E6=A0=87=E6=9D=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test_fuse_add_softmax_dropout.py | 36 +++--- test/test_contrib/test_iou.py | 108 +++++++++++++++--- 2 files changed, 116 insertions(+), 28 deletions(-) diff --git a/test/test_contrib/test_fuse_add_softmax_dropout.py b/test/test_contrib/test_fuse_add_softmax_dropout.py index 4cf5306367b..dd9c79fa317 100644 --- a/test/test_contrib/test_fuse_add_softmax_dropout.py +++ b/test/test_contrib/test_fuse_add_softmax_dropout.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2022 Huawei Technologies Co., Ltd # All rights reserved. # # Licensed under the BSD 3-Clause License (the "License"); @@ -14,6 +14,7 @@ # limitations under the License. import unittest +import math import numpy as np import torch import torch.nn as nn @@ -22,22 +23,29 @@ import torch_npu from torch_npu.contrib.function import fuse_add_softmax_dropout from torch_npu.testing.testcase import TestCase, run_tests -from torch_npu.testing.common_utils import create_common_tensor -class TestFuseAddSoftmaxDropout(unittest.TestCase): - def test_fuse_add_softmax_dropout(seif): +class TestFuseAddSoftmaxDropout(TestCase): + def npu_fuse_add_softmax_dropout(self, dropout, attn_mask, attn_scores, attn_head_size): + attn_scores = torch.add(attn_mask, attn_scores, alpha=(1 / math.sqrt(attn_head_size))) + attn_probs = F.softmax(attn_scores, dim=-1) + attn_probs = dropout(attn_probs) + return attn_probs + + def test_fuse_add_softmax_dropout(self): training = True - dropout = nn.DropoutWithByteMask(0.1) - npu_input1 = torch.rand(96, 12, 384, 384).half().npu() - npu_input2 = torch.rand(96, 12, 384, 384).half().npu() - alpha = 0.125 - axis = -1 + dropout = nn.DropoutWithByteMask(0) + npu_input1 = torch.rand(96, 12, 384, 384).npu().half() + npu_input2 = torch.rand(96, 12, 384, 384).npu().half() + alpha = 64 + axis = 0 + + npu_output = self.npu_fuse_add_softmax_dropout(dropout, npu_input1, npu_input2, alpha) + high_performance_output = fuse_add_softmax_dropout(training=training, dropout=dropout, \ + attn_mask=npu_input1, attn_scores=npu_input2, + attn_head_size=alpha, p=axis) + + self.assertRtolEqual(npu_output.detach().cpu().numpy(), high_performance_output.detach().cpu().numpy()) - output = fuse_add_softmax_dropout(training=training, dropout=dropout, \ - attn_mask=npu_input1, attn_scores=npu_input2, - attn_head_size=alpha, p=axis) - if __name__ == "__main__": run_tests() - diff --git a/test/test_contrib/test_iou.py b/test/test_contrib/test_iou.py index 4154b5f708f..5e781ac9adc 100644 --- a/test/test_contrib/test_iou.py +++ b/test/test_contrib/test_iou.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import itertools +import numpy as np import torch import torch_npu from torch_npu.testing.testcase import TestCase, run_tests @@ -73,19 +75,97 @@ class TestIou(TestCase): self.assertRtolEqual(expedt_iou, iou.cpu().detach()) def test_npu_diou(self): - box1 = torch.tensor([[0, 0, 10, 10], - [10, 10, 20, 20], - [32, 32, 38, 42], - [8, 8, 4, 4]], dtype=torch.float32).to("npu") - box1.requires_grad = True - box2 = torch.tensor([[0, 0, 10, 20], - [0, 10, 10, 10], - [10, 10, 20, 20], - [8, 8, 4, 4]], dtype=torch.float32).to("npu") - box2.requires_grad = True - expedt_diou = torch.tensor([[-0.0083, -0.0450, -0.0007, -0.0567]], dtype=torch.float32) - diou = npu_diou(box1, box2) - self.assertRtolEqual(expedt_diou, diou.cpu().detach()) + def generate_diou_data(n, m, dtype): + data_bboxes = np.array([]).astype(dtype) + for i in range(4): + data_bboxes_array = i // 2 + math.pow(-1, i // 2) * 0.5 * np.random.rand(1, n).astype(dtype) + data_bboxes = np.append(data_bboxes, data_bboxes_array) + data_bboxes = data_bboxes.reshape([4, n]) + data_gtboxes = np.array([]).astype(dtype) + for i in range(4): + data_gtboxes_array = i // 2 + math.pow(-1, i // 2) * 0.5 * np.random.rand(1, m).astype(dtype) + data_gtboxes = np.append(data_gtboxes, data_gtboxes_array) + data_gtboxes = data_gtboxes.reshape([4, m]) + cpu_input1 = torch.from_numpy(data_bboxes) + cpu_input2 = torch.from_numpy(data_gtboxes) + npu_input1 = cpu_input1.npu() + npu_input2 = cpu_input2.npu() + list1 = [cpu_input1, cpu_input2, npu_input1, npu_input2] + return list1 + + def cpu_op_exec(box1, box2, trans=False, is_cross=False, mode="iou", eps=1e-9): + box3 = box1.numpy() + dtype = box3.dtype + _, n = box1.shape + _, m = box2.shape + if trans: + b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 + b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 + b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 + b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 + else: + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] + + diou_res = np.array([], dtype=dtype) + iter_list = itertools.product(list(range(n)), list(range(m))) + for i, j in iter_list: + cw = torch.max(b1_x2[i], b2_x2[j]) - torch.min(b1_x1[i], b2_x1[j]) + ch = torch.max(b1_y2[i], b2_y2[j]) - torch.min(b1_y1[i], b2_y1[j]) + c2 = cw ** 2 + ch ** 2 + eps + rho2 = ((b2_x1[i] + b2_x2[j] - b1_x1[i] - b1_x2[j]) ** 2 + + (b2_y1[i] + b2_y2[j] - b1_y1[i] - b1_y2[j]) ** 2) / 4 + inter_area = (torch.min(b1_x2[i], b2_x2[j]) - torch.max(b1_x1[i], b2_x1[j])).clamp(0) * \ + (torch.min(b1_y2[i], b2_y2[j]) - torch.max(b1_y1[i], b2_y1[j])).clamp(0) + w1, h1 = b1_x2[i] - b1_x1[i], b1_y2[i] - b1_y1[i] + eps + w2, h2 = b2_x2[j] - b2_x1[j], b2_y2[j] - b2_y1[j] + eps + union_area = w1 * h1 + w2 * h2 - inter_area + eps + diou_ij = inter_area / union_area - ( rho2 / c2) + if not is_cross: + if i == j: + diou_res = np.append(diou_res, diou_ij) + else: + diou_res = np.append(diou_res, diou_ij) + + if not is_cross: + res = diou_res.reshape(1, n) + else: + res = diou_res.reshape(n, m) + return res + + def test_npu_diou_shape_format_fp32(): + _test_npu_diou_shape_format(np.float32) + + def test_npu_diou_shape_format_fp16(): + _test_npu_diou_shape_format(np.float16) + + def _test_npu_diou_shape_format(dtype): + shape_list = [ + [10, 10], + [12, 12], + [100, 100] + ] + is_trans_list = [True] + mode_list = ["iou"] + shape_format = [[j, k, m] + for j in shape_list + for k in is_trans_list + for m in mode_list] + + for item in shape_format: + mode_digit = 0 if item[-1] == "iou" else 1 + is_cross = False if item[0][0] == item[0][1] else True + if dtype == np.float16: + dtype = np.float32 + list1 = self.generate_diou_data(*item[0], dtype) + cpu_output = cpu_op_exec(list1[0], list1[1], item[1], is_cross, item[-1]) + npu_output = npu_diou(list1[2], list1[3], item[1], is_cross, mode_digit) + cpu_output = cpu_output.astype(npu_output.dtype) + + if dtype == np.float16: + self.assertRtolEqual(cpu_output, npu_output, prec16=1e-2) + else: + self.assertRtolEqual(cpu_output, npu_output) def test_npu_ciou(self): box1 = torch.tensor([[0, 0, 10, 10], @@ -103,4 +183,4 @@ class TestIou(TestCase): self.assertRtolEqual(expedt_ciou, ciou.cpu().detach()) if __name__ == "__main__": - run_tests() \ No newline at end of file + run_tests() -- Gitee