diff --git a/ci/access_control_test.py b/ci/access_control_test.py index f4696667556e1e295ec7fc1a2098e63c5e7bbd19..aa61c310b33300888232e262d32e636a6a657e26 100644 --- a/ci/access_control_test.py +++ b/ci/access_control_test.py @@ -20,8 +20,10 @@ import sys import subprocess from abc import ABCMeta, abstractmethod -DEFAULT_UT_FILE = '../test/test_network_ops/test_add.py' -CUR_DIR = os.path.abspath(os.path.dirname(__file__)) + +BASE_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) +DEFAULT_UT_FILE = os.path.join(BASE_DIR, 'test/test_network_ops/test_add.py') + class AccurateTest(metaclass=ABCMeta): @abstractmethod @@ -34,7 +36,7 @@ class AccurateTest(metaclass=ABCMeta): @staticmethod def find_ut_by_regex(regex): ut_files = [] - cmd = "find {} -name {}".format('../test/test_network_ops', regex) + cmd = "find {} -name {}".format(os.path.join(BASE_DIR, 'test'), regex) status, output = subprocess.getstatusoutput(cmd) if status: pass # 对于找不到的暂时不作处理 @@ -75,7 +77,7 @@ class DirectoryStrategy(AccurateTest): def identify(self, modify_file): second_dir = modify_file.split("/")[0] if second_dir == 'test': - return [modify_file] + return [os.path.join(BASE_DIR, modify_file)] return [] @@ -160,7 +162,6 @@ def exec_ut(ut_files): ret_status = 0 exec_infos = [] for ut_file in ut_files: - os.chdir(CUR_DIR) temp_ret = change_dir_and_exec(ut_file) if temp_ret: ret_status = temp_ret @@ -175,7 +176,7 @@ def exec_ut(ut_files): if __name__ == "__main__": - cur_modify_files = os.path.join(CUR_DIR, '../modify_files.txt') + cur_modify_files = os.path.join(BASE_DIR, 'modify_files.txt') test_mgr = TestMgr() test_mgr.load(cur_modify_files) test_mgr.analyze() diff --git a/setup.py b/setup.py index c7ebcbea64422d11d515053812a4cd64044c94b2..44f9a6a843fe174b6e818f2d3e97794cc5492b01 100644 --- a/setup.py +++ b/setup.py @@ -152,6 +152,9 @@ class Build(build_ext, object): if self.compiler and '-Wstrict-prototypes' in self.compiler.compiler_so: self.compiler.compiler_so.remove('-Wstrict-prototypes') + if self.compiler and '-g' in self.compiler.compiler_so: + self.compiler.compiler_so.remove('-g') + return super(Build, self).build_extensions() diff --git a/test/test_amp/test_amp.py b/test/test_amp.py similarity index 97% rename from test/test_amp/test_amp.py rename to test/test_amp.py index 5a381d8196166c592c2e03faec052f7df73e3f58..b9232204621240bacd0d0cfea4b752e9858111cc 100644 --- a/test/test_amp/test_amp.py +++ b/test/test_amp.py @@ -72,7 +72,8 @@ class TestAmp(TestCase): opt_control = torch.optim.SGD(mod_control.parameters(), lr=1.0) opt_scaling = torch.optim.SGD(mod_scaling.parameters(), lr=1.0) - return mod_control, mod_scaling, opt_control, opt_scaling + ret = (mod_control, mod_scaling, opt_control, opt_scaling) + return ret def _create_scaling_case(self, device="npu", dtype=torch.float): data = [(torch.randn((8, 8), dtype=dtype, device=device), torch.randn((8, 8), dtype=dtype, device=device)), diff --git a/test/test_aoe/test_aoe.py b/test/test_aoe.py similarity index 100% rename from test/test_aoe/test_aoe.py rename to test/test_aoe.py diff --git a/test/test_profiler/test_pt_profiler.py b/test/test_profiler/test_pt_profiler.py deleted file mode 100644 index 7ec8aa1242bb182fbfe54208fea8cb9699d7ef5f..0000000000000000000000000000000000000000 --- a/test/test_profiler/test_pt_profiler.py +++ /dev/null @@ -1,210 +0,0 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -from torch_npu.testing.common_utils import TestCase, run_tests -import torch_npu - -class SmallModel(torch.nn.Module): - def __init__(self, in_channel, out_channel): - super(SmallModel, self).__init__() - self.conv1 = torch.nn.Conv2d(in_channel, in_channel, 1) - self.relu1 = torch.nn.ReLU() - self.conv2 = torch.nn.Conv2d(in_channel, out_channel, 1) - - def forward(self, input_1): - input_1 = self.conv1(input_1) - input_1 = self.relu1(input_1) - input_1 = self.conv2(input_1) - return input_1.reshape(input_1.shape[0], -1) - -class TestProfiler(TestCase): - - def mm_op(self, device="cpu"): - a = torch.rand(5, 5).to(device) - b = torch.randn(5, 5).to(device) - c = torch.mm(a, b) - - def test_cpu_op_profiler(self): - with torch.autograd.profiler.profile(use_npu=False) as prof: - self.mm_op() - found_mm = False - - for e in prof.function_events: - if "mm" in e.name: - found_mm = True - self.assertTrue(found_mm) - - def test_npu_op_profiler(self): - # test basic function for npu op - if torch.npu.is_available(): - device = "npu:0" - else: - return - with torch.autograd.profiler.profile(use_npu=True) as prof: - self.mm_op(device) - found_mm = False - - for e in prof.function_events: - if "mm" in e.name: - found_mm = True - self.assertTrue(found_mm) - - def test_memory_profiler(self): - # test momory usage - def run_profiler(creat_tensor, metric): - # collecting allocs / deallocs - with torch.autograd.profiler.profile(profile_memory=True, - record_shapes=False, use_npu=True) as prof: - input_x = None - with torch.autograd.profiler.record_function("user_allocate"): - input_x = creat_tensor() - with torch.autograd.profiler.record_function("user_deallocate"): - del input_x - return prof.key_averages() - - def check_metrics(stats, metric, allocs=None, deallocs=None): - stat_metrics = {} - for stat in stats: - stat_metrics[stat.key] = getattr(stat, metric) - if allocs is not None: - for alloc_fn in allocs: - self.assertTrue(alloc_fn in stat_metrics) - self.assertTrue(stat_metrics[alloc_fn] > 0) - if deallocs is not None: - for dealloc_fn in deallocs: - self.assertTrue(dealloc_fn in stat_metrics) - self.assertTrue(stat_metrics[dealloc_fn] < 0) - - def create_cpu_tensor(): - return torch.rand(10, 10) - - def create_npu_tensor(): - return torch.rand(20, 30).npu() - - stats = run_profiler(create_cpu_tensor, "cpu_memory_usage") - check_metrics( - stats, - "cpu_memory_usage", - allocs=[ - "aten::empty", - "aten::rand", - "user_allocate", - ], - deallocs=[ - "user_deallocate", - ] - ) - - if torch.npu.is_available(): - create_npu_tensor() - stats = run_profiler(create_npu_tensor, "npu_memory_usage") - check_metrics( - stats, - "npu_memory_usage", - allocs=[ - "user_allocate", - "aten::to", - "aten::empty_strided", - ], - deallocs=[ - "user_deallocate", - ] - ) - check_metrics( - stats, - "cpu_memory_usage", - allocs=[ - "aten::rand", - "aten::empty", - ] - ) - - def test_model_profiler(self): - """Checks that model forward and backward. - """ - def train(): - for index in range(steps): - x = torch.rand(input_shape).to(device) - y = torch.rand(out_shape).reshape(out_shape[0], -1).to(device) - y_pred = model(x) - loss = criterion(y_pred, y) - optimizer.zero_grad() - loss.backward() - optimizer.step() - input_shape = (4, 3, 24, 24) - out_shape = (4, 12, 24, 24) - steps = 5 - device = "npu:0" if torch.npu.is_available() else "cpu" - model = SmallModel(input_shape[1], out_shape[1]).to(device) - criterion = torch.nn.MSELoss(reduction='sum') - optimizer = torch.optim.SGD(model.parameters(), lr=1e-4) - - try: - train() - except Exception: - self.assertTrue(False, "Expected no exception without profiling.") - - def judge(expected_event_count, prof): - actual_event_count = {} - for e in prof.function_events: - if "#" in e.name: - key = e.name - if key in expected_event_count.keys(): - actual_event_count[key] = actual_event_count.setdefault(key, 0) + 1 - for key, count in expected_event_count.items(): - self.assertTrue((key in actual_event_count.keys()) and (count == actual_event_count[key])) - - with torch.autograd.profiler.profile(use_npu=True) as prof: - train() - expected_event_count = { - "Optimizer.step#SGD.step": steps, - "Optimizer.zero_grad#SGD.zero_grad": steps - } - judge(expected_event_count, prof) - - - def test_npu_simple_profiler(self): - def train(): - for index in range(steps): - x = torch.rand(input_shape).to(device) - y = torch.rand(out_shape).reshape(out_shape[0], -1).to(device) - y_pred = model(x) - loss = criterion(y_pred, y) - optimizer.zero_grad() - loss.backward() - optimizer.step() - input_shape = (4, 3, 24, 24) - out_shape = (4, 12, 24, 24) - steps = 5 - device = "npu:0" if torch.npu.is_available() else "cpu" - model = SmallModel(input_shape[1], out_shape[1]).to(device) - criterion = torch.nn.MSELoss(reduction='sum') - optimizer = torch.optim.SGD(model.parameters(), lr=1e-4) - try: - train() - except Exception: - self.assertTrue(False, "Expected no exception without profiling.") - with torch.autograd.profiler.profile(use_npu=True, use_npu_simple=True) as prof: - train() - prof.export_chrome_trace("./test_trace.prof") - -if __name__ == '__main__': - try: - # to init the device - torch.rand(2,3).npu() - except Exception: - print("there is no npu device") - exit() - run_tests()