From 6ee61b59f000e0065168515ce08d043709c332e0 Mon Sep 17 00:00:00 2001 From: renfeiyang Date: Wed, 28 May 2025 15:06:55 +0800 Subject: [PATCH 1/2] =?UTF-8?q?MOE=E9=80=9A=E7=AE=97=E8=9E=8D=E5=90=88?= =?UTF-8?q?=E7=AE=97=E5=AD=90=E7=A7=BB=E5=8A=A8=E5=88=B0=E9=80=9A=E4=BF=A1?= =?UTF-8?q?=E7=AE=97=E5=AD=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- profiler/.DS_Store | Bin 0 -> 6148 bytes profiler/msprof_analyze/.DS_Store | Bin 0 -> 6148 bytes .../msprof_analyze/cluster_analyse/.DS_Store | Bin 0 -> 6148 bytes .../cluster_analyse/recipes/.DS_Store | Bin 0 -> 6148 bytes .../recipes/moe2commop/__init__.py | 14 +++ .../recipes/moe2commop/moe2commop.py | 107 ++++++++++++++++++ .../prof_exports/moe2commop_export.py | 42 +++++++ .../prof_exports/moe_max_string_ids_export.py | 29 +++++ 8 files changed, 192 insertions(+) create mode 100644 profiler/.DS_Store create mode 100644 profiler/msprof_analyze/.DS_Store create mode 100644 profiler/msprof_analyze/cluster_analyse/.DS_Store create mode 100644 profiler/msprof_analyze/cluster_analyse/recipes/.DS_Store create mode 100644 profiler/msprof_analyze/cluster_analyse/recipes/moe2commop/__init__.py create mode 100644 profiler/msprof_analyze/cluster_analyse/recipes/moe2commop/moe2commop.py create mode 100644 profiler/msprof_analyze/prof_exports/moe2commop_export.py create mode 100644 profiler/msprof_analyze/prof_exports/moe_max_string_ids_export.py diff --git a/profiler/.DS_Store b/profiler/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..808e7adfe7fa8dc70ff506999083bdd74310035f GIT binary patch literal 6148 zcmeHK%Sr=55Ukc50z$~q6&5cu)PhyHt+i@U=3hNcf^+ubMt-onVnU{h;*JY;0ZUl!wday=G+yYalk9yCj1^B z_{8J>@X+56L+^<)DIf);fE17dQs8I>s-!M2jy_2TNC7EuY6|%Gq0t??!Z9&E9UP(s zATAgV<2-r^V)Fp8D;yIUp;=OiNwsP*Ea{B5%IgZp#H7Qj`LMd#szb53o#(emhjob> zrGONeDsZ05wb%b^`Y-+el%$mukOC*AfGxJW?S@aP+B$oj*V;zErF+gd-Hr30aENkD jjB?C{m*dMw%Dm=t?stV_V$c~6I#E9Zu8T|x{I>!>y|5QO literal 0 HcmV?d00001 diff --git a/profiler/msprof_analyze/.DS_Store b/profiler/msprof_analyze/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..d7eff12b5ed239dc982141feeab021026c9e1d85 GIT binary patch literal 6148 zcmeHKyG{c!5S)b+ktmXq(!anToTBgrd;o+b-HAwq6zQ(wyZE%sKAO`(712bq(t7Om zjxA4ddker;r`;2<1Td#N;@gL*`M&$a?kZwLI?wpR9-nx{1|O5`-viEFV2d6n97p^O zV`s+pyW_ThKOA~bl}Q09AO)m=6p#W}Do_P={&3|vqDuiOaC-{)_o2}pyTTzcJ{=sQ z1t88C4&yv}31agAu`3)B8KGHHiAl9;F)Znfx611Zhs31As`;?G*{VaaxSi*>NQZTa z8l`{~7%Om}%ca-TIIp#h{zmtlAG#aoLE#YP km>A`l3oplyk(7DO=iKiKhs2;W9(1C923!}J6!>oiF5OKRw*UYD literal 0 HcmV?d00001 diff --git a/profiler/msprof_analyze/cluster_analyse/.DS_Store b/profiler/msprof_analyze/cluster_analyse/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..e7211e0a48884917077a6796996fd8ea92a664d0 GIT binary patch literal 6148 zcmeHKI|@QE5ZqNk!N$@uSMUZw^aNf&{1k$qDE6=NTprEYPoX^Rv`}VYGs$KrAyd44 zDcLT6T~8!``-N zRDcRl0V+TRsKAF6$O1bUe)wD-M+K`hPn3=z+wSl zO>6=YfoV{ILDg(AH0X$z%&Un_V9-Ug`Ov&svqMq89p@KM7p;LDsQ?vtR-hZpk=6e> z{7wJ=OyY_PP=UWvKzp;rY>FpkZEZcyYHfip;g)lQn_=!03|@|bUXHP_ay<5=$SXF- XeobrwosPKEf&3XTU1(I`+X~zPO4t>W literal 0 HcmV?d00001 diff --git a/profiler/msprof_analyze/cluster_analyse/recipes/.DS_Store b/profiler/msprof_analyze/cluster_analyse/recipes/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..a94cbf9aed515d70a1a672e0d834abd485048f9e GIT binary patch literal 6148 zcmeHKI}XAy47Gs)iG`6dSAY{l2u{ES5HNy@1W1gXI2T9b`Lk4o4HooVIWKYj66Fnb zEh4(S9p)la5gEe`a?0FDrL!`yob zV6gzOCbofyz%;19plY@l8g#@<=GDYDFzBM$d}yAm*`cVPj{S?Li`GDnRDcR}6&S{H zYW05&|Iq(;NnB9@D)3hd=xDWBEper+t-Z@xtu62^+;X09Gt8ZW!OJnw%P|&Kj%S_} adBx^ Date: Thu, 29 May 2025 17:33:19 +0800 Subject: [PATCH 2/2] fix comm table append issue --- .../recipes/base_recipe_analysis.py | 10 +++++++ .../recipes/moe2commop/moe2commop.py | 22 ++++++++++---- .../prof_exports/moe2commop_export.py | 2 +- .../prof_exports/moe_max_comm_opId_export.py | 29 +++++++++++++++++++ .../prof_exports/moe_max_string_ids_export.py | 2 +- 5 files changed, 57 insertions(+), 8 deletions(-) create mode 100644 profiler/msprof_analyze/prof_exports/moe_max_comm_opId_export.py diff --git a/profiler/msprof_analyze/cluster_analyse/recipes/base_recipe_analysis.py b/profiler/msprof_analyze/cluster_analyse/recipes/base_recipe_analysis.py index 0d3fea0f4..e24918936 100644 --- a/profiler/msprof_analyze/cluster_analyse/recipes/base_recipe_analysis.py +++ b/profiler/msprof_analyze/cluster_analyse/recipes/base_recipe_analysis.py @@ -121,6 +121,16 @@ class BaseRecipeAnalysis(ABC): else: logger.error(f"Unknown dump data type: {type(data)}") + def append_data(self, data, file_name, table_name=None, index=True, custom_db_path=None): + if table_name: + result_db = custom_db_path if custom_db_path else os.path.join(self.output_path, file_name) + conn, cursor = DBManager.create_connect_db(result_db) + if isinstance(data, pd.DataFrame): + data.to_sql(table_name, conn, if_exists='append', index=index) + else: + logger.error(f"Unknown dump data type: {type(data)}") + DBManager.destroy_db_connect(conn, cursor) + def create_notebook(self, filename, notebook_template_dir=None, replace_dict=None): if notebook_template_dir is None: template_path = os.path.dirname(__file__) diff --git a/profiler/msprof_analyze/cluster_analyse/recipes/moe2commop/moe2commop.py b/profiler/msprof_analyze/cluster_analyse/recipes/moe2commop/moe2commop.py index 34c6005b7..5efbbd8bb 100644 --- a/profiler/msprof_analyze/cluster_analyse/recipes/moe2commop/moe2commop.py +++ b/profiler/msprof_analyze/cluster_analyse/recipes/moe2commop/moe2commop.py @@ -20,6 +20,7 @@ from msprof_analyze.cluster_analyse.recipes.base_recipe_analysis import BaseReci from msprof_analyze.prof_common.constant import Constant from msprof_analyze.prof_common.logger import get_logger from msprof_analyze.prof_exports.moe2commop_export import Moe2CommopExport +from msprof_analyze.prof_exports.moe_max_comm_opId_export import MoeMaxCommOpIdExport from msprof_analyze.prof_exports.moe_max_string_ids_export import MoeMaxStringIdsExport logger = get_logger() @@ -48,7 +49,7 @@ class Moe2Commop(BaseRecipeAnalysis): def _mapper_func(self, data_map, analysis_class): profiler_db_path = data_map.get(Constant.PROFILER_DB_PATH) - # 最大StringId获取 + # 获取最大StringId try: df_max_stringId = MoeMaxStringIdsExport(profiler_db_path, analysis_class).read_export_db() new_id = int(df_max_stringId['max_id'].iloc[0]) + 1 if not df_max_stringId.empty else 1 @@ -61,8 +62,9 @@ class Moe2Commop(BaseRecipeAnalysis): 'id': [new_id], 'value': ['MOE_group'] }) - self.dump_data(data=new_row, file_name=profiler_db_path, - table_name=TABLE_STRING_IDS, custom_db_path=profiler_db_path) + string_id_append = new_row[['id', 'value']] + self.append_data(data=string_id_append, file_name=profiler_db_path, + table_name=TABLE_STRING_IDS, index=False, custom_db_path=profiler_db_path) # 读取主数据 df = Moe2CommopExport(profiler_db_path, analysis_class).read_export_db() @@ -77,6 +79,14 @@ class Moe2Commop(BaseRecipeAnalysis): logger.error(f"Missing required columns in DataFrame: {required_columns}") return None + # 获取最大opId + try: + df_max_opId = MoeMaxCommOpIdExport(profiler_db_path, analysis_class).read_export_db() + new_opId = int(df_max_opId['max_opId'].iloc[0]) + 1 if not df_max_opId.empty else 1 + except (KeyError, IndexError, ValueError) as e: + logger.warning(f"Failed to get max communication op ID: {str(e)}") + new_opId = 1 + # 构建结果DataFrame num_records = len(df) df_concat = pd.DataFrame({ @@ -85,7 +95,7 @@ class Moe2Commop(BaseRecipeAnalysis): 'endNs': df['endNs'].astype(int), 'connectionId': df['connectionId'].astype(int), 'groupName': new_id, # 用STRING_IDS表的字符串ID代替字符串 - 'opId': range(1, num_records + 1), + 'opId': range(new_opId, new_opId + num_records), 'relay': DEFAULT_RELAY, 'retry': DEFAULT_RETRY, 'dataType': DEFAULT_DATA_TYPE, @@ -101,7 +111,7 @@ class Moe2Commop(BaseRecipeAnalysis): communication_op = communication_op.copy() communication_op.sort_values('startNs', ascending=True, inplace=True) - self.dump_data(data=communication_op, file_name=profiler_db_path, - table_name=TABLE_COMMUNICATION_OP, custom_db_path=profiler_db_path) + self.append_data(data=communication_op, file_name=profiler_db_path, + table_name=TABLE_COMMUNICATION_OP, index=False, custom_db_path=profiler_db_path) return data_map.get(Constant.RANK_ID) \ No newline at end of file diff --git a/profiler/msprof_analyze/prof_exports/moe2commop_export.py b/profiler/msprof_analyze/prof_exports/moe2commop_export.py index f05412ff6..c17ee55d9 100644 --- a/profiler/msprof_analyze/prof_exports/moe2commop_export.py +++ b/profiler/msprof_analyze/prof_exports/moe2commop_export.py @@ -38,5 +38,5 @@ LEFT JOIN class Moe2CommopExport(BaseStatsExport): def __init__(self, db_path, recipe_name): - super().__init__(db_path, recipe_name) + super().__init__(db_path, recipe_name, step_range={}) self._query = QUERY \ No newline at end of file diff --git a/profiler/msprof_analyze/prof_exports/moe_max_comm_opId_export.py b/profiler/msprof_analyze/prof_exports/moe_max_comm_opId_export.py new file mode 100644 index 000000000..dda88331e --- /dev/null +++ b/profiler/msprof_analyze/prof_exports/moe_max_comm_opId_export.py @@ -0,0 +1,29 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from msprof_analyze.prof_exports.base_stats_export import BaseStatsExport + +QUERY = """ +SELECT + MAX(opId) AS max_opId +FROM + COMMUNICATION_OP + """ + +class MoeMaxCommOpIdExport(BaseStatsExport): + + def __init__(self, db_path, recipe_name): + super().__init__(db_path, recipe_name, step_range={}) + self._query = QUERY \ No newline at end of file diff --git a/profiler/msprof_analyze/prof_exports/moe_max_string_ids_export.py b/profiler/msprof_analyze/prof_exports/moe_max_string_ids_export.py index 7859b36e5..f7017cc00 100644 --- a/profiler/msprof_analyze/prof_exports/moe_max_string_ids_export.py +++ b/profiler/msprof_analyze/prof_exports/moe_max_string_ids_export.py @@ -25,5 +25,5 @@ FROM class MoeMaxStringIdsExport(BaseStatsExport): def __init__(self, db_path, recipe_name): - super().__init__(db_path, recipe_name) + super().__init__(db_path, recipe_name, step_range={}) self._query = QUERY \ No newline at end of file -- Gitee