diff --git a/test/test_c10d.py b/test/test_c10d.py index 428280e9430430e36ab24b471c132d6f4dc9fcd2..1046bab6ad14aebe7ac1b29362fc54741f49a84e 100644 --- a/test/test_c10d.py +++ b/test/test_c10d.py @@ -17,10 +17,10 @@ from enum import IntEnum, unique import os import unittest import torch -import torch_npu import torch.distributed as c10d import torch.distributed as dist import torch.multiprocessing as mp +import torch_npu from torch_npu.testing.testcase import TestCase, run_tests diff --git a/test/test_npu.py b/test/test_npu.py index b8f63f24abdfba51a1b89f1f56771ec814e0c6b0..d353e3c1d258e69711b366b613da74eab969d729 100644 --- a/test/test_npu.py +++ b/test/test_npu.py @@ -91,9 +91,6 @@ class TestNpu(TestCase): return torch.npu.FloatTensor(*size) def assert_change(comp=1, empty_cache=False, reset_peak=False): - # comp > 0: increased - # comp = 0: equal - # comp < 0: decreased new_m = torch_npu.npu.memory_allocated(device) new_max_m = torch_npu.npu.max_memory_allocated(device) if comp > 0: @@ -141,65 +138,68 @@ class TestNpu(TestCase): assert_change(0) yield - tensors1 = [alloc(1), alloc(10, 20), alloc(200, 300, 2000)] - m1 = torch_npu.npu.memory_allocated(device) - assert_change(1) - yield - - tensors2 = [] - - for i in range(1, int(N / 2) + 1): - # small ones - tensors2.append(alloc(i, i * 4)) + def change_with_tensor(): + tensors1 = [alloc(1), alloc(10, 20), alloc(200, 300, 2000)] + m1 = torch_npu.npu.memory_allocated(device) assert_change(1) yield - for i in range(5, int(N / 2) + 5): - # large ones - tensors2.append(alloc(i, i * 7, i * 9, i * 11)) - assert_change(1, reset_peak=(i % 2 == 0)) - yield + tensors2 = [] - tensors2.append(alloc(0, 0, 0)) - assert_change(0) - yield + for i in range(1, int(N / 2) + 1): + # small ones + tensors2.append(alloc(i, i * 4)) + assert_change(1) + yield - permute = [] - for i in torch.randperm(len(tensors2)): - permute.append(tensors2[i]) + for i in range(5, int(N / 2) + 5): + # large ones + tensors2.append(alloc(i, i * 7, i * 9, i * 11)) + assert_change(1, reset_peak=(i % 2 == 0)) + yield + + tensors2.append(alloc(0, 0, 0)) assert_change(0) yield - del tensors2 - assert_change(0) - yield - tensors2 = permute - assert_change(0) - yield - del permute - assert_change(0, reset_peak=True) - yield + permute = [] + for i in torch.randperm(len(tensors2)): + permute.append(tensors2[i]) + assert_change(0) + yield - for i in range(int(N / 2)): - x = tensors2[i].numel() - del tensors2[i] - assert_change(-x) # in case that tensors2[i] is empty + del tensors2 + assert_change(0) yield - - for i in range(2, int(2 * N / 3) + 2): - tensors2.append(alloc(i, i * 3, i * 8)) - assert_change(1) + tensors2 = permute + assert_change(0) + yield + del permute + assert_change(0, reset_peak=True) yield - del tensors2 - assert_change(-1, reset_peak=True) - assert_change(0) - self.assertEqual(torch_npu.npu.memory_allocated(device), m1) - yield True + for i in range(int(N / 2)): + x = tensors2[i].numel() + del tensors2[i] + assert_change(-x) # in case that tensors2[i] is empty + yield + + for i in range(2, int(2 * N / 3) + 2): + tensors2.append(alloc(i, i * 3, i * 8)) + assert_change(1) + yield + + del tensors2 + assert_change(-1, reset_peak=True) + assert_change(0) + self.assertEqual(torch_npu.npu.memory_allocated(device), m1) + yield True + + del tensors1 + assert_change(-1, reset_peak=True) + self.assertEqual(torch_npu.npu.memory_allocated(device), m0) - del tensors1 - assert_change(-1, reset_peak=True) - self.assertEqual(torch_npu.npu.memory_allocated(device), m0) + change_with_tensor() # test empty_cache and reset_peak assert_change(0, empty_cache=True)