diff --git a/ci/access_control_test.py b/ci/access_control_test.py
index f4696667556e1e295ec7fc1a2098e63c5e7bbd19..aa61c310b33300888232e262d32e636a6a657e26 100644
--- a/ci/access_control_test.py
+++ b/ci/access_control_test.py
@@ -20,8 +20,10 @@ import sys
 import subprocess
 from abc import ABCMeta, abstractmethod
 
-DEFAULT_UT_FILE = '../test/test_network_ops/test_add.py'
-CUR_DIR = os.path.abspath(os.path.dirname(__file__))
+
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+DEFAULT_UT_FILE = os.path.join(BASE_DIR, 'test/test_network_ops/test_add.py')
+
 
 class AccurateTest(metaclass=ABCMeta):
     @abstractmethod
@@ -34,7 +36,7 @@ class AccurateTest(metaclass=ABCMeta):
     @staticmethod
     def find_ut_by_regex(regex):
         ut_files = []
-        cmd = "find {} -name {}".format('../test/test_network_ops', regex)
+        cmd = "find {} -name {}".format(os.path.join(BASE_DIR, 'test'), regex)
         status, output = subprocess.getstatusoutput(cmd)
         if status:
             pass # 对于找不到的暂时不作处理
@@ -75,7 +77,7 @@ class DirectoryStrategy(AccurateTest):
     def identify(self, modify_file):
         second_dir = modify_file.split("/")[0]
         if second_dir == 'test':
-            return [modify_file]
+            return [os.path.join(BASE_DIR, modify_file)]
         return []
 
 
@@ -160,7 +162,6 @@ def exec_ut(ut_files):
     ret_status = 0
     exec_infos = []
     for ut_file in ut_files:
-        os.chdir(CUR_DIR)
         temp_ret = change_dir_and_exec(ut_file)
         if temp_ret:
             ret_status = temp_ret
@@ -175,7 +176,7 @@ def exec_ut(ut_files):
 
 
 if __name__ == "__main__":
-    cur_modify_files = os.path.join(CUR_DIR, '../modify_files.txt')
+    cur_modify_files = os.path.join(BASE_DIR, 'modify_files.txt')
     test_mgr = TestMgr()
     test_mgr.load(cur_modify_files)
     test_mgr.analyze()
diff --git a/setup.py b/setup.py
index c7ebcbea64422d11d515053812a4cd64044c94b2..44f9a6a843fe174b6e818f2d3e97794cc5492b01 100644
--- a/setup.py
+++ b/setup.py
@@ -152,6 +152,9 @@ class Build(build_ext, object):
         if self.compiler and '-Wstrict-prototypes' in self.compiler.compiler_so:
             self.compiler.compiler_so.remove('-Wstrict-prototypes')
 
+        if self.compiler and '-g' in self.compiler.compiler_so:
+            self.compiler.compiler_so.remove('-g')
+
         return super(Build, self).build_extensions()
 
 
diff --git a/test/test_amp/test_amp.py b/test/test_amp.py
similarity index 97%
rename from test/test_amp/test_amp.py
rename to test/test_amp.py
index 5a381d8196166c592c2e03faec052f7df73e3f58..b9232204621240bacd0d0cfea4b752e9858111cc 100644
--- a/test/test_amp/test_amp.py
+++ b/test/test_amp.py
@@ -72,7 +72,8 @@ class TestAmp(TestCase):
         opt_control = torch.optim.SGD(mod_control.parameters(), lr=1.0)
         opt_scaling = torch.optim.SGD(mod_scaling.parameters(), lr=1.0)
 
-        return mod_control, mod_scaling, opt_control, opt_scaling
+        ret = (mod_control, mod_scaling, opt_control, opt_scaling)
+        return ret
 
     def _create_scaling_case(self, device="npu", dtype=torch.float):
         data = [(torch.randn((8, 8), dtype=dtype, device=device), torch.randn((8, 8), dtype=dtype, device=device)),
diff --git a/test/test_aoe/test_aoe.py b/test/test_aoe.py
similarity index 100%
rename from test/test_aoe/test_aoe.py
rename to test/test_aoe.py
diff --git a/test/test_profiler/test_pt_profiler.py b/test/test_profiler/test_pt_profiler.py
deleted file mode 100644
index 7ec8aa1242bb182fbfe54208fea8cb9699d7ef5f..0000000000000000000000000000000000000000
--- a/test/test_profiler/test_pt_profiler.py
+++ /dev/null
@@ -1,210 +0,0 @@
-# Copyright (c) 2020, Huawei Technologies.All rights reserved.
-#
-# Licensed under the BSD 3-Clause License  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://opensource.org/licenses/BSD-3-Clause
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from torch_npu.testing.common_utils import TestCase, run_tests
-import torch_npu
-
-class SmallModel(torch.nn.Module):
-    def __init__(self, in_channel, out_channel):
-        super(SmallModel, self).__init__()
-        self.conv1 = torch.nn.Conv2d(in_channel, in_channel, 1)
-        self.relu1 = torch.nn.ReLU()
-        self.conv2 = torch.nn.Conv2d(in_channel, out_channel, 1)
-
-    def forward(self, input_1):
-        input_1 = self.conv1(input_1)
-        input_1 = self.relu1(input_1)
-        input_1 = self.conv2(input_1)
-        return input_1.reshape(input_1.shape[0], -1)
-
-class TestProfiler(TestCase):
-
-    def mm_op(self, device="cpu"):
-        a = torch.rand(5, 5).to(device)
-        b = torch.randn(5, 5).to(device)
-        c = torch.mm(a, b)
-
-    def test_cpu_op_profiler(self):
-        with torch.autograd.profiler.profile(use_npu=False) as prof:
-            self.mm_op()
-        found_mm = False 
-
-        for e in prof.function_events:
-            if "mm" in e.name:
-                found_mm = True
-        self.assertTrue(found_mm)
-
-    def test_npu_op_profiler(self):
-        # test basic function for npu op
-        if torch.npu.is_available():
-            device = "npu:0"
-        else:
-            return
-        with torch.autograd.profiler.profile(use_npu=True) as prof:
-            self.mm_op(device)
-        found_mm = False 
-
-        for e in prof.function_events:
-            if "mm" in e.name:
-                found_mm = True
-        self.assertTrue(found_mm)
-
-    def test_memory_profiler(self):
-        # test momory usage
-        def run_profiler(creat_tensor, metric):
-            # collecting allocs / deallocs
-            with torch.autograd.profiler.profile(profile_memory=True, 
-                                            record_shapes=False, use_npu=True) as prof:
-                input_x = None
-                with torch.autograd.profiler.record_function("user_allocate"):
-                    input_x = creat_tensor()
-                with torch.autograd.profiler.record_function("user_deallocate"):
-                    del input_x
-            return prof.key_averages()
-
-        def check_metrics(stats, metric, allocs=None, deallocs=None):
-            stat_metrics = {}
-            for stat in stats:
-                stat_metrics[stat.key] = getattr(stat, metric)
-            if allocs is not None:
-                for alloc_fn in allocs:
-                    self.assertTrue(alloc_fn in stat_metrics)
-                    self.assertTrue(stat_metrics[alloc_fn] > 0)
-            if deallocs is not None:
-                for dealloc_fn in deallocs:
-                    self.assertTrue(dealloc_fn in stat_metrics)
-                    self.assertTrue(stat_metrics[dealloc_fn] < 0)
-
-        def create_cpu_tensor():
-            return torch.rand(10, 10)
-
-        def create_npu_tensor():
-            return torch.rand(20, 30).npu()
-
-        stats = run_profiler(create_cpu_tensor, "cpu_memory_usage")
-        check_metrics(
-            stats,
-            "cpu_memory_usage",
-            allocs=[
-                "aten::empty",
-                "aten::rand",
-                "user_allocate",
-            ],
-            deallocs=[
-                "user_deallocate",
-            ]
-        )
-
-        if torch.npu.is_available():
-            create_npu_tensor()
-            stats = run_profiler(create_npu_tensor, "npu_memory_usage")
-            check_metrics(
-                stats,
-                "npu_memory_usage",
-                allocs=[
-                    "user_allocate",
-                    "aten::to",
-                    "aten::empty_strided",
-                ],
-                deallocs=[
-                    "user_deallocate",
-                ]
-            )
-            check_metrics(
-                stats,
-                "cpu_memory_usage",
-                allocs=[
-                    "aten::rand",
-                    "aten::empty",
-                ]
-            )
-
-    def test_model_profiler(self):
-        """Checks that model forward and backward.
-        """
-        def train():
-            for index in range(steps):
-                x = torch.rand(input_shape).to(device)
-                y = torch.rand(out_shape).reshape(out_shape[0], -1).to(device)
-                y_pred = model(x)
-                loss = criterion(y_pred, y)
-                optimizer.zero_grad()
-                loss.backward()
-                optimizer.step()
-        input_shape = (4, 3, 24, 24)
-        out_shape = (4, 12, 24, 24)
-        steps = 5
-        device = "npu:0" if torch.npu.is_available() else "cpu"
-        model = SmallModel(input_shape[1], out_shape[1]).to(device)
-        criterion = torch.nn.MSELoss(reduction='sum')
-        optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)
-
-        try:
-            train()
-        except Exception:
-            self.assertTrue(False, "Expected no exception without profiling.")
-
-        def judge(expected_event_count, prof):
-            actual_event_count = {}
-            for e in prof.function_events:
-                if "#" in e.name:
-                    key = e.name
-                    if key in expected_event_count.keys():
-                        actual_event_count[key] = actual_event_count.setdefault(key, 0) + 1
-            for key, count in expected_event_count.items():
-                self.assertTrue((key in actual_event_count.keys()) and (count == actual_event_count[key]))
-
-        with torch.autograd.profiler.profile(use_npu=True) as prof:
-            train()
-        expected_event_count = {
-            "Optimizer.step#SGD.step": steps,
-            "Optimizer.zero_grad#SGD.zero_grad": steps
-        }
-        judge(expected_event_count, prof)
-
-
-    def test_npu_simple_profiler(self):
-        def train():
-            for index in range(steps):
-                x = torch.rand(input_shape).to(device)
-                y = torch.rand(out_shape).reshape(out_shape[0], -1).to(device)
-                y_pred = model(x)
-                loss = criterion(y_pred, y)
-                optimizer.zero_grad()
-                loss.backward()
-                optimizer.step()
-        input_shape = (4, 3, 24, 24)
-        out_shape = (4, 12, 24, 24)
-        steps = 5
-        device = "npu:0" if torch.npu.is_available() else "cpu"
-        model = SmallModel(input_shape[1], out_shape[1]).to(device)
-        criterion = torch.nn.MSELoss(reduction='sum')
-        optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)
-        try:
-            train()
-        except Exception:
-            self.assertTrue(False, "Expected no exception without profiling.")
-        with torch.autograd.profiler.profile(use_npu=True, use_npu_simple=True) as prof:
-            train()
-        prof.export_chrome_trace("./test_trace.prof")
-
-if __name__ == '__main__':
-    try:
-        # to init the device
-        torch.rand(2,3).npu()
-    except Exception:
-        print("there is no npu device")
-        exit()
-    run_tests()