diff --git a/profiler/performance_analyse/README.md b/profiler/performance_analyse/README.md
deleted file mode 100644
index c769a0f25c27a92d5aac3c0d66608c3f66807907..0000000000000000000000000000000000000000
--- a/profiler/performance_analyse/README.md
+++ /dev/null
@@ -1,43 +0,0 @@
-# 性能分析工具
-
-## 大模型性能拆解
-### GPU性能拆解
-#### 算子耗时
-包含cube算子耗时和vector算子耗时
-#### 计算流耗时：
-gpu计算流所有event耗时总和
-#### 通信
-gpu通信未掩盖耗时
-#### 调度
-调度耗时 = 单步打屏时间 - 算子耗时 - 通信不可掩盖耗时，其中单步打屏时间需要用户输入，当用户不输入时，采用e2e耗时代替单步打屏时间 
-获得调度耗时后，使用调度占比 = 调度耗时/E2E耗时 获取调度占比
-#### 内存分析
-gpu上的内存使用可以使用nvidia-smi查看
-profiling信息采集时打开profile_memory=True开关，即可从json文件中读出运行稳定后的memory信息
-#### 计算流e2e耗时
-gpu计算流端到端耗时
-### npu性能拆解
-#### 算子耗时
-包含cube算子耗时和vector算子耗时
-#### 计算流耗时：
-npu计算流所有event耗时总和
-#### 通信
-npu通信未掩盖耗时
-#### 调度
-调度耗时 = 单步打屏时间 - 算子耗时 - 通信不可掩盖耗时，其中单步打屏时间需要用户输入，当用户不输入时，采用e2e耗时代替单步打屏时间 
-获得调度耗时后，使用调度占比 = 调度耗时/E2E耗时 获取调度占比
-#### 内存分析
-npu上的内存使用可以使用npu-smi查看
-profiling信息采集时打开profile_memory=True开关，即可从csv文件中读出运行稳定后的memory信息
-#### 计算流e2e耗时
-gpu计算流端到端耗时
-### 使用方法
-- 获取数据:获取gpu和npu的profiling数据，若采集profiling数据时没开启memory采集开关，则没有内存使用数据
-- 运行命令:python profiling_parse.py -g gpu\gpu_trace_device0.json -glt 0.9 -n npu\xxx_ascend_pt -nlt 1.2 -aop op1 op2
-- 输出结果：可以得到gpu与npu对照的打屏性能拆解数据，其中-nlt为输入打屏时间，-aop为手动添加的cube算子类型
-
-## 卡间不同步问题分析（实现中）
-### GPU通信算子8卡同步情况可视化
-### NPU通信算子8卡同步情况可视化
-
-## 更多分析功能规划中
\ No newline at end of file
diff --git a/profiler/performance_analyse/__init__.py b/profiler/performance_analyse/__init__.py
deleted file mode 100644
index 8400fd5ecd1246eaee795cebfccfacc80a94f08c..0000000000000000000000000000000000000000
--- a/profiler/performance_analyse/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# Copyright (c) 2023, Huawei Technologies Co., Ltd.
-# All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/profiler/performance_analyse/gpu_parser.py b/profiler/performance_analyse/gpu_parser.py
deleted file mode 100644
index 95391dc0ba9dd77020976a8843b1f343368820af..0000000000000000000000000000000000000000
--- a/profiler/performance_analyse/gpu_parser.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2023, Huawei Technologies Co., Ltd.
-# All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from collections import Counter, defaultdict
-import pandas as pd
-
-import parser_helper
-
-
-class GpuProfilingParser:
-    def __init__(self, args):
-        self.trace_events = self.read_profiling_json_file(args.gpu)
-        self.compute_stream_id = self.infer_compute_stream_id()
-        self.one_step_time = args.gpu_log_time
-        self.profiling_info = parser_helper.ProfilingInfo()
-
-    @staticmethod
-    def read_profiling_json_file(json_path):
-        data = parser_helper.read_json_file(json_path)
-        if 'traceEvents' not in data:
-            raise RuntimeError("The gpu profiling json doesn't contain traceEvents data.")
-        return data.get('traceEvents')
-
-    def parse_events(self):
-        cube_time = 0.0
-        all_op_time = 0.0
-        op_list = []
-        compute_stream_dur = 0.0  # 计算流耗时
-        marks = defaultdict(int)  # mark for compute communication_not_overlapped time
-
-        for event in self.trace_events:
-            if not isinstance(event, dict):
-                continue
-            if event.get('args') and event.get('args').get('stream') == self.compute_stream_id:
-                compute_stream_dur += float(event.get('dur'))
-            if not {'name', 'cat', 'dur', 'ts'} < event.keys():
-                continue
-            name = event.get('name')
-            dur = event.get('dur')
-            ts = event.get('ts')
-            cat = event.get('cat')
-            if cat.lower() != 'kernel':
-                continue
-            if 'nccl' in name:
-                for timestep in range(ts + 1, ts + dur + 1):
-                    marks[str(timestep)] += 1  # mark this timestep in communication stream
-                continue
-            else:
-                for timestep in range(ts + 1, ts + dur + 1):
-                    marks[str(timestep)] += -100  # mark this timestep in compute stream
-            if 'gemm' in name:
-                cube_time += float(dur)
-            all_op_time += float(dur)
-            op_list.append([ts, name, cat, dur])
-        op_dataframe = pd.DataFrame(op_list, columns=['time start', 'name', 'cat', 'dur'])
-        op_dataframe.to_csv('gpu_perf.csv', index=False)
-        self.profiling_info.compute_time = compute_stream_dur / 10 ** 6
-        self.profiling_info.communication_not_overlapped = len([_ for _, value in marks.items() if value > 0]) / 10 ** 6
-        self.profiling_info.cube_time = cube_time / 10 ** 6
-        self.profiling_info.vector_time = (all_op_time - cube_time) / 10 ** 6
-        self.parse_e2e_time()
-        if self.one_step_time:
-            self.profiling_info.scheduling_time = self.one_step_time - all_op_time / 10 ** 6 - \
-                                                  self.profiling_info.communication_not_overlapped
-        else:
-            self.profiling_info.scheduling_time = self.profiling_info.e2e_time - all_op_time / 10 ** 6 - \
-                                                  self.profiling_info.communication_not_overlapped
-        self.profiling_info.scheduling_ratio = self.profiling_info.scheduling_time / self.profiling_info.e2e_time
-        self.parse_memory_reserved()
-
-    def parse_e2e_time(self):
-        compute_events_timeline = [event for event in self.trace_events if
-                                   event.get('args') and event.get('args').get('stream') == self.compute_stream_id]
-        compute_events_timeline = sorted(compute_events_timeline, key=lambda event: event.get('ts'))
-        self.profiling_info.e2e_time = (compute_events_timeline[-1].get('ts') + compute_events_timeline[-1].get('dur') -
-                                        compute_events_timeline[0].get('ts')) / 10 ** 6
-
-    def parse_memory_reserved(self):
-        memories = [
-            event.get('args').get('Total Reserved') for event in self.trace_events
-            if event.get('name') == '[memory]' and event.get('args').get('Device Id') >= 0
-        ]
-        if not memories:
-            print("Gpu profiling data doesn't contain memory info")
-            return
-        self.profiling_info.memory_used = max(memories) / 1024 ** 3
-
-    def infer_compute_stream_id(self):
-        kernel_stream_ids = []
-        for event in self.trace_events:
-            is_kernel_exec_event = event.get('cat') == 'Kernel' and 'nccl' not in event.get('name')
-            has_stream_id_event = event.get('args') and event.get('args').get('stream')
-            if is_kernel_exec_event and has_stream_id_event:
-                kernel_stream_ids.append(event.get('args').get('stream'))
-        if not kernel_stream_ids:
-            raise RuntimeError('The profiling data does not contain kernel running data.')
-        counter = Counter(kernel_stream_ids)
-        return counter.most_common(1)[0][0]
diff --git a/profiler/performance_analyse/npu_parser.py b/profiler/performance_analyse/npu_parser.py
deleted file mode 100644
index 375dd85680ac07c6c537490395ec62ac4488aae0..0000000000000000000000000000000000000000
--- a/profiler/performance_analyse/npu_parser.py
+++ /dev/null
@@ -1,165 +0,0 @@
-# Copyright (c) 2023, Huawei Technologies Co., Ltd.
-# All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import pandas as pd
-from collections import defaultdict
-import parser_helper
-
-
-class NpuProfilingParser:
-    def __init__(self, npu_step_time, add_cube_name, npu_file_path):
-        self.npu_json_file = npu_file_path.get('trace_view')
-        self.npu_summary_file = npu_file_path.get('op_summary')
-        self.npu_mem_file = npu_file_path.get('memory_record')
-        self.profiling_info = parser_helper.ProfilingInfo()
-        self.npu_step_time = npu_step_time
-        self.parallel_time = 0
-        self.aicore_time = 0
-        self.cube_op_type = ['MatMul', 'BatchMatMul']
-        self.cube_op_type = list(set(self.cube_op_type + add_cube_name))
-        self.min_aicore_ts = sys.float_info.max
-        self.max_aicore_ts = sys.float_info.min
-
-    def parse_npu_json_events(self):
-        if not self.npu_json_file:
-            print('Npu trace json file is not available.')
-            return
-        compute_time = 0
-        min_ts = sys.float_info.max
-        max_ts = sys.float_info.min
-        ts_flag = False  # 表明没有获取到compute time的耗时
-        data = parser_helper.read_json_file(self.npu_json_file)
-        event_wait_sqe = defaultdict(list)
-        ai_core_dict = defaultdict(list)
-        event_wait_sqe_res = defaultdict(float)
-        ai_core_res = defaultdict(float)
-        for dic in data:
-            self.get_ts_by_task_type(dic, event_wait_sqe, ai_core_dict, event_wait_sqe_res, ai_core_res)
-            if ('name' in dic) and (dic.get('name') == 'Compute'):
-                ts_flag = True
-                ts = dic.get('ts')
-                dur = dic.get('dur')
-                compute_time += dur
-                min_ts = ts if ts < min_ts else min_ts
-                max_ts = (ts + dur) if (ts + dur) > max_ts else max_ts
-        # AI_CORE和EVENT_WAIT_SQE共存为计算流
-        compute_stream = []
-        parallel_stream = []
-        # 不存在算子并行的情况
-        if len(ai_core_dict) == 1:
-            compute_stream.append(min(ai_core_dict.keys()))
-        elif len(ai_core_dict) == 2:  # 2个ai_core，存在并行流（当前最多2条算子计算流）
-            compute_stream = list(event_wait_sqe.keys() & ai_core_dict.keys())
-            parallel_stream = list(ai_core_dict.keys() - set(compute_stream))
-        else:
-            print('Npu trace json file lack of Stream info')
-            return
-        cs_event_wait_sqe_list = event_wait_sqe[compute_stream[0]]
-        if parallel_stream:
-            cs_ai_core_list = ai_core_dict[parallel_stream[0]]
-            sorted(cs_event_wait_sqe_list, key=lambda x: (x[0]))
-            sorted(cs_ai_core_list, key=lambda x: (x[0]))
-            self.parallel_time = self.interval_intersection(cs_event_wait_sqe_list, cs_ai_core_list)
-        self.profiling_info.compute_time = compute_time / 10 ** 6 if ts_flag else ai_core_res[compute_stream[0]] / 10 ** 6
-        self.profiling_info.e2e_time = (max_ts - min_ts) / 10 ** 6 if ts_flag else (self.max_aicore_ts - self.min_aicore_ts) / 10 ** 6
-        self.profiling_info.communication_not_overlapped = (event_wait_sqe_res[compute_stream[0]] - 
-            self.parallel_time) / 10 ** 6
-        time_required = (self.profiling_info.cube_time + self.profiling_info.vector_time) + \
-            self.profiling_info.communication_not_overlapped
-        if self.npu_step_time:
-            self.profiling_info.scheduling_time = self.npu_step_time - time_required
-        else:
-            self.profiling_info.scheduling_time = self.profiling_info.e2e_time - time_required
-        self.profiling_info.scheduling_ratio = self.profiling_info.scheduling_time / self.profiling_info.e2e_time \
-            if self.profiling_info.e2e_time != 0 else 0
-
-    def parse_npu_csv_events(self):
-        if not self.npu_summary_file:
-            print('Npu op summary csv file is not available.')
-            return
-        info = pd.read_csv(self.npu_summary_file, index_col=None)
-        cube_time = 0.0
-        vec_time = 0.0
-        ai_core_time = 0.0
-        vec_mac_flag = True  # True标记当前summary文件中存在pmu信息
-        if info.get('aic_mac_time(us)') is None or info.get('aiv_vec_time(us)') is None:
-            print('当前的profiling结果可能是极简模式,通过cube算子白名单进行区分,白名单如下:')
-            print(self.cube_op_type)
-            vec_mac_flag = False
-        for i in range(len(info['Model ID'])):
-            task_type = info.loc[i, 'Task Type']
-            if task_type not in ['AI_CORE']:
-                continue
-            task_durations = info.loc[i, 'Task Duration(us)']
-            ai_core_time += task_durations
-            op_type = info.loc[i, 'OP Type']
-            if not vec_mac_flag:  # 如果是极简模式根据OP_Type计算完cube time后提前返回
-                cube_time += task_durations if op_type in self.cube_op_type else 0.0
-                continue
-            aiv_vec_time = info.loc[i, 'aiv_vec_time(us)']
-            if aiv_vec_time > 0:
-                vec_time += task_durations
-        
-        if vec_mac_flag:
-            cube_time = (ai_core_time - vec_time) / 10 ** 6
-            vec_time /= 10 ** 6
-        else:
-            vec_time = (ai_core_time - cube_time) / 10 ** 6
-            cube_time /= 10 ** 6
-        self.profiling_info.cube_time = cube_time
-        self.profiling_info.vector_time = vec_time
-        if not self.npu_mem_file:
-            print('Npu op memory csv file is not available.')
-            return
-        try:
-            info = pd.read_csv(self.npu_mem_file, usecols=['Total Reserved(MB)'], index_col=None)
-        except ValueError:
-            print('Npu profiling data does not contain memory info.')
-        else:
-            self.profiling_info.memory_used = max(info.get('Total Reserved(MB)')) / 1024
-
-    @staticmethod
-    def interval_intersection(cs_event_wait_sqe_list, cs_ai_core_list):
-        ans = 0
-        i = 0
-        j = 0
-        while i < len(cs_event_wait_sqe_list) and j < len(cs_ai_core_list):
-            lo = max(cs_event_wait_sqe_list[i][0], cs_ai_core_list[j][0])
-            hi = min(cs_event_wait_sqe_list[i][1], cs_ai_core_list[j][1])
-            if lo <= hi:
-                ans += (hi - lo)
-            if cs_event_wait_sqe_list[i][1] < cs_ai_core_list[j][1]:
-                i += 1
-            else:
-                j += 1
-        return ans
-
-    def get_ts_by_task_type(self, dic, event_wait_sqe, ai_core_dict, enent_wait_res, ai_core_res):
-        if not dic.get('args'):
-            return
-        args = dic.get('args')
-        if args.get('Stream Id'):
-            stream_id = args.get('Stream Id')
-            ts = dic.get('ts')
-            dur = dic.get('dur')
-            if args.get('Task Type') == 'EVENT_WAIT_SQE':
-                enent_wait_res[stream_id] += dur
-                event_wait_sqe[stream_id].append([ts, ts + dur])
-            elif args.get('Task Type') == 'AI_CORE':
-                self.min_aicore_ts = ts if ts < self.min_aicore_ts else self.min_aicore_ts
-                self.max_aicore_ts = (ts + dur) if (ts + dur) > self.max_aicore_ts else self.max_aicore_ts
-                ai_core_res[stream_id] += dur
-                ai_core_dict[stream_id].append([ts, ts + dur])
diff --git a/profiler/performance_analyse/parser_helper.py b/profiler/performance_analyse/parser_helper.py
deleted file mode 100644
index 958a3146bb58898cdb76003f5f59476a45c1593f..0000000000000000000000000000000000000000
--- a/profiler/performance_analyse/parser_helper.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (c) 2023, Huawei Technologies Co., Ltd.
-# All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import os
-
-
-class ProfilingInfo:
-    def __init__(self):
-        self.cube_time = 0.0
-        self.vector_time = 0.0
-        self.compute_time = 0.0
-        self.communication_not_overlapped = 0.0
-        self.scheduling_ratio = 0.0
-        self.memory_used = 0.0
-        self.e2e_time = 0.0
-        self.scheduling_time = 0.0
-
-
-def read_json_file(path):
-    if not os.path.isfile(path):
-        raise ValueError(f'The path "{path}" is not a valid json file.')
-    with open(path, 'r', encoding='utf-8') as json_handler:
-        data = json.load(json_handler)
-    return data
diff --git a/profiler/performance_analyse/profiling_parse.py b/profiler/performance_analyse/profiling_parse.py
deleted file mode 100644
index c45c73e9e27b6b4b4c9aae222499404b3ad3eac4..0000000000000000000000000000000000000000
--- a/profiler/performance_analyse/profiling_parse.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# Copyright (c) 2023, Huawei Technologies Co., Ltd.
-# All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import os
-
-from prettytable import PrettyTable
-
-from gpu_parser import GpuProfilingParser
-from npu_parser import NpuProfilingParser
-from parser_helper import ProfilingInfo
-
-
-def parse_command():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-g', '--gpu', required=False, default='', metavar='(FILE)', help='Gpu profiling json file.')
-    parser.add_argument('-glt', '--gpu_log_time', required=False, default=0.0, type=float, help='Gpu one step time(s)')
-    parser.add_argument('-n', '--npu', required=False, default='', metavar='(FILE)',
-                        help='Npu single core profiling root path.')
-    parser.add_argument('-nlt', '--npu_log_time', required=False, default=0.0, metavar='(FILE)', type=float, 
-                        help='Npu one step time(s).')
-    parser.add_argument('-aop', '--add_cube_op', required=False, default=[], nargs='*', help='add cube op name')
-    return parser.parse_args()
-
-
-def show_table(gpu_profiling_info, npu_profiling_info):
-    table = PrettyTable()
-    table.title = '大模型性能拆解'
-    table.field_names = ['', 'cube算子', 'vector算子', '计算流耗时', '通信', '调度耗时', '调度占比', '内存',
-                         'E2E性能值']
-    table.add_row(['GPU基线', f'{gpu_profiling_info.cube_time:.3f}s', f'{gpu_profiling_info.vector_time:.3f}s',
-                  f'{gpu_profiling_info.compute_time:.3f}s', f'{gpu_profiling_info.communication_not_overlapped: .3f}s',
-                  f'{gpu_profiling_info.scheduling_time:.3f}', f'{gpu_profiling_info.scheduling_ratio:.2%}',
-                  f'{gpu_profiling_info.memory_used:.2f}G', f'{gpu_profiling_info.e2e_time:.3f}s'])
-    table.add_row(['NPU现状', f'{npu_profiling_info.cube_time:.3f}s', f'{npu_profiling_info.vector_time:.3f}s',
-                  f'{npu_profiling_info.compute_time:.3f}s', f'{npu_profiling_info.communication_not_overlapped: .3f}s',
-                  f'{npu_profiling_info.scheduling_time:.3f}', f'{npu_profiling_info.scheduling_ratio:.2%}',
-                  f'{npu_profiling_info.memory_used:.2f}G', f'{npu_profiling_info.e2e_time:.3f}s'])
-    print(table)
-
-
-def parse_gpu(args):
-    if args.gpu:
-        if args.gpu_log_time < 0:
-            raise ValueError("Gpu one step time shouldn't less than 0.")
-        gpu_parser = GpuProfilingParser(args)
-        gpu_parser.parse_events()
-        return gpu_parser.profiling_info
-    print('Gpu trace json file is not specified.')
-    return ProfilingInfo()
-
-
-def parse_npu(args, npu_path):
-    npu_parser = NpuProfilingParser(args.npu_log_time, args.add_cube_op, npu_path)
-    npu_parser.parse_npu_csv_events()
-    npu_parser.parse_npu_json_events()
-    return npu_parser.profiling_info
-
-
-def main():
-    args = parse_command()
-    npu_path = {'trace_view': None, 'memory_record': None, 'op_summary': None}
-    for root, _, files in os.walk(args.npu):
-        for file in files:
-            if file == 'trace_view.json':
-                npu_path['trace_view'] = os.path.join(root, file)
-            if file == 'memory_record.csv':
-                npu_path['memory_record'] = os.path.join(root, file)
-            if 'op_summary_' in file:
-                npu_path['op_summary'] = os.path.join(root, file)
-    show_table(parse_gpu(args), parse_npu(args, npu_path))
-
-
-if __name__ == '__main__':
-    main()