diff --git a/test/test_network_ops/test_adaptive_max_pool2d.py b/test/test_network_ops/test_adaptive_max_pool2d.py
index 1d4b4180cd119b3f165b326f716f8734a2136955..823a04d033deabb24dc2cbf794ce49c6d4a4b2d6 100644
--- a/test/test_network_ops/test_adaptive_max_pool2d.py
+++ b/test/test_network_ops/test_adaptive_max_pool2d.py
@@ -11,10 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+import torch
 import torch.nn as nn
+import torch.nn.functional as F
 import numpy as np
-import torch_npu
 
 from torch_npu.testing.testcase import TestCase, run_tests
 from torch_npu.testing.common_utils import create_common_tensor
@@ -31,8 +31,9 @@ class TestAdaptiveMaxPool2d(TestCase):
         output = m(input1)
         return output.cpu().numpy()
 
-    def test_adaptive_max_pool2d_shape_format_fp32_6(self, device="npu"):
+    def test_adaptiveMaxPool2d_shape_format_fp32_6(self):
         format_list = [-1]
+        # (1, 8, 9) IndexError
         shape_list = [(1, 5, 9, 9)]
         shape_format = [
             [np.float32, i, j] for i in format_list for j in shape_list
@@ -43,8 +44,23 @@ class TestAdaptiveMaxPool2d(TestCase):
             for output_size in output_list:
                 cpu_output = self.cpu_op_exec(cpu_input, output_size)
                 npu_output = self.npu_op_exec(npu_input, output_size)
+
                 self.assertRtolEqual(cpu_output, npu_output, 0.0004)
 
+    def test_adaptiveMaxPool2d_case_in_photo2cartoon(self):
+        cpu_x = torch.rand(1, 256, 31, 31)
+        npu_x = cpu_x.npu()
+        cpu_out = F.adaptive_max_pool2d(cpu_x, 1)
+        npu_out = F.adaptive_max_pool2d(npu_x, 1)
+        self.assertRtolEqual(cpu_out, npu_out.cpu(), 0.0003)
+
+    def test_adaptiveMaxPool2d_case_in_photo2cartoon_fp16(self):
+        cpu_x = torch.rand(1, 256, 31, 31).half()
+        npu_x = cpu_x.npu()
+        cpu_out = F.adaptive_max_pool2d(cpu_x.float(), 1).half()
+        npu_out = F.adaptive_max_pool2d(npu_x, 1)
+        self.assertRtolEqual(cpu_out, npu_out.cpu())
+
 
 if __name__ == "__main__":
     run_tests()
diff --git a/test/test_network_ops/test_adaptive_max_pool2d_backward.py b/test/test_network_ops/test_adaptive_max_pool2d_backward.py
index 11f2564cee76fb2a95d574a12d31d260c9adfe21..562587e3e51fd96a8249fece86344f286be0305e 100644
--- a/test/test_network_ops/test_adaptive_max_pool2d_backward.py
+++ b/test/test_network_ops/test_adaptive_max_pool2d_backward.py
@@ -14,8 +14,8 @@
 
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 import numpy as np
-import torch_npu
 
 from torch_npu.testing.testcase import TestCase, run_tests
 from torch_npu.testing.common_utils import create_common_tensor
@@ -39,8 +39,8 @@ class TestAdaptiveMaxPool2dBackward(TestCase):
         npu_grad = npu_grad.to("cpu")
         return npu_grad
 
-    def test_adaptive_max_pool2d_shape_format_fp32_6(self, device="npu"):
-        format_list = [-1]
+    def test_adaptiveMaxPool2d_shape_format_fp32_6(self):
+        format_list = [0, 3]
         shape_list = [(1, 3, 8, 9)]
         shape_format = [
             [np.float16, i, j] for i in format_list for j in shape_list
@@ -55,6 +55,28 @@ class TestAdaptiveMaxPool2dBackward(TestCase):
                 npu_output = self.npu_op_exec(npu_input, output_size)
                 self.assertRtolEqual(cpu_output, npu_output)
 
+    def test_adaptiveMaxPool2d_backward_case_in_photo2cartoon(self):
+        cpu_x = torch.rand(1, 256, 31, 31)
+        npu_x = cpu_x.npu()
+        cpu_x.requires_grad = True
+        npu_x.requires_grad = True
+        cpu_out = F.adaptive_max_pool2d(cpu_x, 1)
+        npu_out = F.adaptive_max_pool2d(npu_x, 1)
+        cpu_out.backward(torch.ones_like(cpu_out))
+        npu_out.backward(torch.ones_like(npu_out))
+        self.assertRtolEqual(cpu_x.grad, npu_x.grad.cpu(), 0.0003)
+
+    def test_adaptiveMaxPool2d_backward_case_in_photo2cartoon_fp16(self):
+        cpu_x = torch.rand(1, 256, 31, 31).half()
+        npu_x = cpu_x.npu()
+        cpu_x.requires_grad = True
+        npu_x.requires_grad = True
+        cpu_out = F.adaptive_max_pool2d(cpu_x.float(), 1).half()
+        npu_out = F.adaptive_max_pool2d(npu_x, 1)
+        cpu_out.backward(torch.ones_like(cpu_out))
+        npu_out.backward(torch.ones_like(npu_out))
+        self.assertRtolEqual(cpu_x.grad, npu_x.grad.cpu())
+
 
 if __name__ == "__main__":
     run_tests()
diff --git a/test/test_network_ops/test_max_pool2d_backward.py b/test/test_network_ops/test_max_pool2d_backward.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ec842ba6a88ac69f9d2af853960392ce95d9b4f
--- /dev/null
+++ b/test/test_network_ops/test_max_pool2d_backward.py
@@ -0,0 +1,87 @@
+# Copyright (c) 2020 Huawei Technologies Co., Ltd
+# Copyright (c) 2019, Facebook CORPORATION. 
+# All rights reserved.
+#
+# Licensed under the BSD 3-Clause License  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import torch
+import torch.nn as nn
+import numpy as np
+import torch.nn.functional as F
+
+from torch_npu.testing.testcase import TestCase, run_tests
+from torch_npu.testing.common_utils import create_common_tensor
+
+
+class TestMaxPool2dBackward(TestCase):
+    def cpu_op_exec(self, inputCpu, kernel_size, stride, padding):
+        inputCpu.requires_grad = True
+        dataCpu, argMaxCpu = F.max_pool2d_with_indices(inputCpu, kernel_size=kernel_size, stride=stride,
+                                                       padding=padding)
+        z1 = torch.sum(dataCpu)
+        z1.backward()
+        cpu_grad = inputCpu.grad
+        output1 = dataCpu.detach()
+        output1 = output1
+        return output1, cpu_grad
+
+    def npu_op_exec(self, inputNpu, kernel_size, stride, padding):
+        inputNpu.requires_grad = True
+        dataNpu, argMaxNpu = F.max_pool2d_with_indices(inputNpu, kernel_size=kernel_size, stride=stride,
+                                                       padding=padding)
+        z2 = torch.sum(dataNpu)
+        z2.backward()
+        npu_grad = inputNpu.grad
+        npu_grad = npu_grad.to("cpu")
+        output1 = dataNpu.to("cpu").detach()
+        return output1, npu_grad
+
+    def test_max_pool2d_backward_shape_format(self):
+        shape_format = [
+            [[np.float16, 3, [256, 64, 112, 112]], [3, 3], [2, 2], 1],
+        ]
+
+        for item in shape_format:
+            cpu_input, npu_input = create_common_tensor(item[0], 0, 100)
+            if cpu_input.dtype == torch.float16:
+                cpu_input = cpu_input.to(torch.float32)
+            cpu_output, cpu_grad = self.cpu_op_exec(cpu_input, item[1], item[2], item[3])
+            npu_output, npu_grad = self.npu_op_exec(npu_input, item[1], item[2], item[3])
+            cpu_output = cpu_output.to(npu_output.dtype)
+            cpu_grad = cpu_grad.to(npu_grad.dtype)
+
+            self.assertRtolEqual(cpu_output.numpy(), npu_output.numpy())
+            self.assertRtolEqual(cpu_grad.numpy(), npu_grad.numpy())
+
+    def test_max_pool2d_backward_case_in_ctpn(self):
+        cpu_x = torch.rand(1, 128, 375, 500).half()
+        npu_x = cpu_x.npu()
+        cpu_x.requires_grad = True
+        npu_x.requires_grad = True
+
+        cpu_model = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
+        npu_model = copy.deepcopy(cpu_model)
+
+        cpu_out = cpu_model(cpu_x.float()).half()
+        npu_out = npu_model(npu_x)
+
+        cpu_out.backward(torch.ones_like(cpu_out))
+        npu_out.backward(torch.ones_like(npu_out))
+
+        self.assertRtolEqual(cpu_out.detach().numpy(), npu_out.cpu().detach().numpy())
+        self.assertRtolEqual(cpu_x.grad.numpy(), npu_x.grad.cpu().numpy())
+
+
+if __name__ == "__main__":
+    run_tests()
\ No newline at end of file
diff --git a/test/test_network_ops/test_max_pool2d_with_indices_backward.py b/test/test_network_ops/test_max_pool2d_with_indices_backward.py
index d5faf20310a163157065d9687c60587f2e88e7d0..06afe4b2c2bc3cbe172c79217dde3604548a7afd 100644
--- a/test/test_network_ops/test_max_pool2d_with_indices_backward.py
+++ b/test/test_network_ops/test_max_pool2d_with_indices_backward.py
@@ -26,9 +26,9 @@ from torch_npu.testing.common_utils import create_common_tensor
 class TestMaxPool2dWithIndicesBackward(TestCase):
     def cpu_op_exec(self, inputCpu, kernel_size, stride, padding, dilation, ceil_mode):
         inputCpu.requires_grad = True
-        dataCpu, argMaxCpu = F.max_pool2d_with_indices(inputCpu, kernel_size = kernel_size,
-                             stride = stride, padding = padding, dilation = dilation,
-                             ceil_mode = ceil_mode, return_indices=True)
+        dataCpu, argMaxCpu = F.max_pool2d_with_indices(inputCpu, kernel_size=kernel_size, stride=stride,
+                                                       padding=padding, dilation=dilation,
+                                                       ceil_mode=ceil_mode, return_indices=True)
         z1 = torch.sum(dataCpu)
         z1.backward()
         cpu_grad = inputCpu.grad
@@ -37,9 +37,9 @@ class TestMaxPool2dWithIndicesBackward(TestCase):
 
     def npu_op_exec(self, inputNpu, kernel_size, stride, padding, dilation, ceil_mode):
         inputNpu.requires_grad = True
-        dataNpu, argMaxNpu = F.max_pool2d_with_indices(inputNpu, kernel_size = kernel_size,
-                             stride = stride, padding = padding, dilation = dilation,
-                             ceil_mode = ceil_mode, return_indices=True)
+        dataNpu, argMaxNpu = F.max_pool2d_with_indices(inputNpu, kernel_size=kernel_size, stride=stride,
+                                                       padding=padding, dilation=dilation,
+                                                       ceil_mode=ceil_mode, return_indices=True)
         z2 = torch.sum(dataNpu)
         z2.backward()
         npu_grad = inputNpu.grad
@@ -47,11 +47,10 @@ class TestMaxPool2dWithIndicesBackward(TestCase):
         output1 = dataNpu.to("cpu").detach()
         return output1, npu_grad
 
-    def test_max_pool2d_with_indices_backward_fp16(self, device="npu"):
+    def test_max_pool2d_with_indices_backward_fp16(self):
         shape_format = [
             [[np.float16, 3, [256, 64, 112, 112]], [3, 3], [2, 2], 1, 1, False],
             [[np.float16, 3, [1024, 24, 56, 112]], [3, 3], [2, 2], 1, 1, False],
-            [[np.float16, 3, [1024, 24, 112, 112]], [3, 3], [2, 2], 1, 1, False],
             [[np.float16, 3, [1024, 24, 112, 56]], [3, 3], [2, 2], 1, 1, False],
         ]
 
@@ -61,20 +60,20 @@ class TestMaxPool2dWithIndicesBackward(TestCase):
                 cpu_input = cpu_input.to(torch.float32)
             cpu_output, cpu_grad = self.cpu_op_exec(cpu_input, item[1], item[2], item[3], item[4], item[5])
             npu_output, npu_grad = self.npu_op_exec(npu_input, item[1], item[2], item[3], item[4], item[5])
-            cpu_grad = cpu_grad.to(npu_grad.dtype)
             cpu_output = cpu_output.to(npu_output.dtype)
+            cpu_grad = cpu_grad.to(npu_grad.dtype)
 
             self.assertRtolEqual(cpu_output.numpy(), npu_output.numpy())
             self.assertRtolEqual(cpu_grad.numpy(), npu_grad.numpy())
 
 
-    def test_max_pool2d_with_indices_backward_fp32(self, device="npu"):
+    def test_max_pool2d_with_indices_backward_fp32(self):
         shape_format = [
             [[np.float16, 3, [256, 64, 112, 112]], [3, 3], [2, 2], 1, 1, False],
-            [[np.float16, 3, [1024, 24, 112, 112]], [3, 3], [2, 2], 1, 1, False],
             [[np.float16, 3, [1024, 24, 56, 112]], [3, 3], [2, 2], 1, 1, False],
             [[np.float16, 3, [1024, 24, 112, 56]], [3, 3], [2, 2], 1, 1, False],
         ]
+
         for item in shape_format:
             cpu_input, npu_input = create_common_tensor(item[0], 0, 100)
             if cpu_input.dtype == torch.float16:
@@ -87,7 +86,6 @@ class TestMaxPool2dWithIndicesBackward(TestCase):
             self.assertRtolEqual(cpu_output.numpy(), npu_output.numpy(), prec=1.e-3)
             self.assertRtolEqual(cpu_grad.numpy(), npu_grad.numpy(), prec=1.e-3)
 
-
 if __name__ == "__main__":
     run_tests()