From 24b2ce1a5c4a6dd320d3b961c76d16fb4ed78364 Mon Sep 17 00:00:00 2001
From: xiaxia3 <wangxiaxia3@huawei.com>
Date: Mon, 13 Jun 2022 14:44:51 +0800
Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0diou=E5=92=8Cfuse=5Fadd=5Fsof?=
 =?UTF-8?q?tmax=5Fdropout=E7=94=A8=E4=BE=8B=E6=A0=87=E6=9D=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../test_fuse_add_softmax_dropout.py          |  36 +++---
 test/test_contrib/test_iou.py                 | 108 +++++++++++++++---
 2 files changed, 116 insertions(+), 28 deletions(-)

diff --git a/test/test_contrib/test_fuse_add_softmax_dropout.py b/test/test_contrib/test_fuse_add_softmax_dropout.py
index 4cf5306367b..dd9c79fa317 100644
--- a/test/test_contrib/test_fuse_add_softmax_dropout.py
+++ b/test/test_contrib/test_fuse_add_softmax_dropout.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020 Huawei Technologies Co., Ltd
+# Copyright (c) 2022 Huawei Technologies Co., Ltd
 # All rights reserved.
 #
 # Licensed under the BSD 3-Clause License  (the "License");
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import unittest
+import math
 import numpy as np
 import torch
 import torch.nn as nn
@@ -22,22 +23,29 @@ import torch_npu
 from torch_npu.contrib.function import fuse_add_softmax_dropout
 
 from torch_npu.testing.testcase import TestCase, run_tests
-from torch_npu.testing.common_utils import create_common_tensor
 
 
-class TestFuseAddSoftmaxDropout(unittest.TestCase):
-    def test_fuse_add_softmax_dropout(seif):
+class TestFuseAddSoftmaxDropout(TestCase):
+    def npu_fuse_add_softmax_dropout(self, dropout, attn_mask, attn_scores, attn_head_size):
+        attn_scores = torch.add(attn_mask, attn_scores, alpha=(1 / math.sqrt(attn_head_size)))
+        attn_probs = F.softmax(attn_scores, dim=-1)
+        attn_probs = dropout(attn_probs)    
+        return attn_probs
+    
+    def test_fuse_add_softmax_dropout(self):
         training = True
-        dropout = nn.DropoutWithByteMask(0.1)
-        npu_input1 = torch.rand(96, 12, 384, 384).half().npu()
-        npu_input2 = torch.rand(96, 12, 384, 384).half().npu()
-        alpha = 0.125
-        axis = -1
+        dropout = nn.DropoutWithByteMask(0)
+        npu_input1 = torch.rand(96, 12, 384, 384).npu().half()
+        npu_input2 = torch.rand(96, 12, 384, 384).npu().half()
+        alpha = 64
+        axis = 0
+        
+        npu_output = self.npu_fuse_add_softmax_dropout(dropout, npu_input1, npu_input2, alpha)
+        high_performance_output = fuse_add_softmax_dropout(training=training, dropout=dropout, \
+                                            attn_mask=npu_input1, attn_scores=npu_input2, 
+                                            attn_head_size=alpha, p=axis)
+        
+        self.assertRtolEqual(npu_output.detach().cpu().numpy(), high_performance_output.detach().cpu().numpy())
         
-        output = fuse_add_softmax_dropout(training=training, dropout=dropout, \
-                                          attn_mask=npu_input1, attn_scores=npu_input2, 
-                                          attn_head_size=alpha, p=axis)
-
 if __name__ == "__main__":
     run_tests()
-
diff --git a/test/test_contrib/test_iou.py b/test/test_contrib/test_iou.py
index 4154b5f708f..5e781ac9adc 100644
--- a/test/test_contrib/test_iou.py
+++ b/test/test_contrib/test_iou.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import itertools
+import numpy as np
 import torch
 import torch_npu
 from torch_npu.testing.testcase import TestCase, run_tests
@@ -73,19 +75,97 @@ class TestIou(TestCase):
         self.assertRtolEqual(expedt_iou, iou.cpu().detach())
         
     def test_npu_diou(self):
-        box1 = torch.tensor([[0, 0, 10, 10],
-                            [10, 10, 20, 20],
-                            [32, 32, 38, 42],
-                            [8, 8, 4, 4]], dtype=torch.float32).to("npu")
-        box1.requires_grad = True
-        box2 = torch.tensor([[0, 0, 10, 20],
-                            [0, 10, 10, 10],
-                            [10, 10, 20, 20],
-                            [8, 8, 4, 4]], dtype=torch.float32).to("npu")
-        box2.requires_grad = True
-        expedt_diou = torch.tensor([[-0.0083, -0.0450, -0.0007, -0.0567]], dtype=torch.float32)
-        diou = npu_diou(box1, box2)
-        self.assertRtolEqual(expedt_diou, diou.cpu().detach())
+        def generate_diou_data(n, m, dtype):
+            data_bboxes = np.array([]).astype(dtype)
+            for i in range(4):
+                data_bboxes_array = i // 2 + math.pow(-1, i // 2) * 0.5 * np.random.rand(1, n).astype(dtype)
+                data_bboxes = np.append(data_bboxes, data_bboxes_array)
+            data_bboxes = data_bboxes.reshape([4, n])
+            data_gtboxes = np.array([]).astype(dtype)
+            for i in range(4):
+                data_gtboxes_array = i // 2 + math.pow(-1, i // 2) * 0.5 * np.random.rand(1, m).astype(dtype)
+                data_gtboxes = np.append(data_gtboxes, data_gtboxes_array)
+            data_gtboxes = data_gtboxes.reshape([4, m])
+            cpu_input1 = torch.from_numpy(data_bboxes)
+            cpu_input2 = torch.from_numpy(data_gtboxes)
+            npu_input1 = cpu_input1.npu()
+            npu_input2 = cpu_input2.npu()
+            list1 = [cpu_input1, cpu_input2, npu_input1, npu_input2]
+            return list1
+    
+        def cpu_op_exec(box1, box2, trans=False, is_cross=False, mode="iou", eps=1e-9):
+            box3 = box1.numpy()
+            dtype = box3.dtype
+            _, n = box1.shape
+            _, m = box2.shape
+            if trans:
+                b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
+                b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
+                b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
+                b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
+            else:
+                b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+                b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+
+            diou_res = np.array([], dtype=dtype)
+            iter_list = itertools.product(list(range(n)), list(range(m)))
+            for i, j in iter_list:
+                cw = torch.max(b1_x2[i], b2_x2[j]) - torch.min(b1_x1[i], b2_x1[j])
+                ch = torch.max(b1_y2[i], b2_y2[j]) - torch.min(b1_y1[i], b2_y1[j])
+                c2 = cw ** 2 + ch ** 2 + eps
+                rho2 = ((b2_x1[i] + b2_x2[j] - b1_x1[i] - b1_x2[j]) ** 2 +
+                        (b2_y1[i] + b2_y2[j] - b1_y1[i] - b1_y2[j]) ** 2) / 4
+                inter_area = (torch.min(b1_x2[i], b2_x2[j]) - torch.max(b1_x1[i], b2_x1[j])).clamp(0) * \
+                (torch.min(b1_y2[i], b2_y2[j]) - torch.max(b1_y1[i], b2_y1[j])).clamp(0)
+                w1, h1 = b1_x2[i] - b1_x1[i], b1_y2[i] - b1_y1[i] + eps
+                w2, h2 = b2_x2[j] - b2_x1[j], b2_y2[j] - b2_y1[j] + eps
+                union_area = w1 * h1 + w2 * h2 - inter_area + eps
+                diou_ij = inter_area / union_area - ( rho2 / c2)
+                if not is_cross:
+                    if i == j:
+                        diou_res = np.append(diou_res, diou_ij)
+                else:
+                    diou_res = np.append(diou_res, diou_ij)
+
+            if not is_cross:
+                res = diou_res.reshape(1, n)
+            else:
+                res = diou_res.reshape(n, m)
+            return res
+    
+        def test_npu_diou_shape_format_fp32():
+            _test_npu_diou_shape_format(np.float32)
+
+        def test_npu_diou_shape_format_fp16():
+            _test_npu_diou_shape_format(np.float16)
+
+        def _test_npu_diou_shape_format(dtype):
+            shape_list = [
+                [10, 10],
+                [12, 12],
+                [100, 100]
+            ]
+            is_trans_list = [True]
+            mode_list = ["iou"]
+            shape_format = [[j, k, m]
+                            for j in shape_list
+                            for k in is_trans_list
+                            for m in mode_list]
+
+            for item in shape_format:
+                mode_digit = 0 if item[-1] == "iou" else 1
+                is_cross = False if item[0][0] == item[0][1] else True
+                if dtype == np.float16:
+                    dtype = np.float32
+                list1 = self.generate_diou_data(*item[0], dtype)
+                cpu_output = cpu_op_exec(list1[0], list1[1], item[1], is_cross, item[-1])
+                npu_output = npu_diou(list1[2], list1[3], item[1], is_cross, mode_digit)
+                cpu_output = cpu_output.astype(npu_output.dtype)
+
+                if dtype == np.float16:
+                    self.assertRtolEqual(cpu_output, npu_output, prec16=1e-2)
+                else:
+                    self.assertRtolEqual(cpu_output, npu_output)
         
     def test_npu_ciou(self):
         box1 = torch.tensor([[0, 0, 10, 10],
@@ -103,4 +183,4 @@ class TestIou(TestCase):
         self.assertRtolEqual(expedt_ciou, ciou.cpu().detach())
         
 if __name__ == "__main__":
-    run_tests()
\ No newline at end of file
+    run_tests()
-- 
Gitee