From c4a0ac57d1833d24908812d20faebff7507607dc Mon Sep 17 00:00:00 2001
From: sunyiming <sym990908@126.com>
Date: Sat, 22 Feb 2025 16:17:45 +0800
Subject: [PATCH 1/2] =?UTF-8?q?=E6=94=AF=E6=8C=81=E8=B6=85=E5=8F=82?=
 =?UTF-8?q?=E6=95=B0=E6=AF=94=E5=AF=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../config_checking/checkers/__init__.py      |   1 +
 .../config_checking/checkers/base_checker.py  |   1 +
 .../checkers/hyperparameter_checker.py        | 199 ++++++++++++++++++
 3 files changed, 201 insertions(+)
 create mode 100644 debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/hyperparameter_checker.py

diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/__init__.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/__init__.py
index 403d01e43..d0218b5fd 100644
--- a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/__init__.py
+++ b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/__init__.py
@@ -21,6 +21,7 @@ import msprobe.pytorch.config_checking.checkers.pip_checker
 import msprobe.pytorch.config_checking.checkers.checkpoint_checker
 import msprobe.pytorch.config_checking.checkers.dataset_checker
 import msprobe.pytorch.config_checking.checkers.weights_checker
+import msprobe.pytorch.config_checking.checkers.hyperparameter_checker
 
 
 from msprobe.pytorch.config_checking.checkers.base_checker import BaseChecker
diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/base_checker.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/base_checker.py
index 45b0cfcc1..d295cb580 100644
--- a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/base_checker.py
+++ b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/base_checker.py
@@ -24,6 +24,7 @@ class PackInput:
         self.ckpt_path = config_dict.get("ckpt path", None)
         self.need_env_args = config_dict.get("env args", None)
         self.need_pip_data = config_dict.get("pip data", None)
+        self.shell_path = config_dict.get("shell path", None)
         self.output_zip_path = config_dict.get("output zip path", "./config_check_pack.zip")
         self.model = model
 
diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/hyperparameter_checker.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/hyperparameter_checker.py
new file mode 100644
index 000000000..a59395102
--- /dev/null
+++ b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/hyperparameter_checker.py
@@ -0,0 +1,199 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import json
+from msprobe.pytorch.config_checking.checkers.base_checker import BaseChecker
+from msprobe.pytorch.config_checking.config_checker import register_checker_item
+from msprobe.pytorch.config_checking.utils.packing import add_file_to_zip
+from msprobe.pytorch.config_checking.utils.utils import load_json, compare_dict, write_list_to_file
+from msprobe.pytorch.config_checking.utils.utils import config_checking_print
+from typing import Union, List, Dict, Any
+from difflib import SequenceMatcher
+import tempfile
+import re
+
+@register_checker_item("hyperparameter")
+class HyperparameterChecker(BaseChecker):
+    input_needed = "shell_path"
+    target_name_in_zip = "hyperparameters"
+    result_filename = "hyperparameter_diff.txt"
+
+    PARAMETER_NAME_MAPPING = {
+        "learning_rate": ["lr", "learningrate"],
+        "batch_size": ["batch", "bs", "batch_size_per_gpu"],
+        "epochs": ["num_epochs", "max_epochs", "epoch"],
+        "weight_decay": ["wd", "weightdecay"],
+        "dropout_rate": ["dropout", "drop_rate"],
+    }
+
+    @staticmethod
+    def pack(pack_input):
+        shell_path = pack_input.shell_path
+        output_zip_path = pack_input.output_zip_path
+
+        if not isinstance(shell_path, list):
+            raise TypeError("shell_path should be a list of file paths.")
+
+        for script_path in shell_path:
+            if os.path.isfile(script_path):
+                hyperparameters = HyperparameterChecker._extract_hyperparameters_from_script(script_path)
+                if hyperparameters:
+                    dest_path_in_zip = os.path.join(HyperparameterChecker.target_name_in_zip, os.path.splitext(os.path.basename(script_path))[0] + ".json")
+                    with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tmp_file:
+                        json.dump(hyperparameters, tmp_file, indent=4)
+                        tmp_file_path = tmp_file.name
+                    add_file_to_zip(output_zip_path, tmp_file_path, dest_path_in_zip)
+                    os.remove(tmp_file_path)
+                    config_checking_print(f"add hyperparameters args to zip")
+                else:
+                    config_checking_print(f"Warning: Failed to extract hyperparameters from script {script_path}")
+            else:
+                config_checking_print(f"Warning: Script path {script_path} is not a file.")
+
+    @staticmethod
+    def _extract_hyperparameters_from_script(script_path: str) -> Dict[str, Any]:
+        """
+        Extracts arguments from bash script used to run a model training.
+        """
+        hyperparameters = {}
+        with open(script_path, 'r') as file:
+            script_content = file.read()
+
+        command_line = re.search(r'torchrun\s+(.*?)\s*\|', script_content, re.DOTALL)
+        if command_line:
+            command_line = command_line.group(1)
+
+            blocks = re.findall(r'(\w+_ARGS)="(.*?)"', script_content, re.DOTALL)
+            block_contents = {}
+            for block_name, block_content in blocks:
+                block_content = block_content.replace('\n', ' ')
+                block_contents[block_name] = block_content
+                command_line = command_line.replace(f"${block_name}", block_content)
+
+            matches = re.findall(r'--([\w-]+)(?:\s+([^\s\\]+))?', command_line)
+            for match in matches:
+                key, value = match
+                if value and value.startswith('$'):
+                    env_var = re.search(rf'{value[1:]}="?(.*?)"?\s', script_content)
+                    if env_var:
+                        value = env_var.group(1)
+                hyperparameters[key] = value if value else True
+
+        return hyperparameters
+
+    @staticmethod
+    def _fuzzy_match_parameter(param_name: str, available_params: Dict[str, Any]) -> Union[str, None]:
+        """
+        Fuzzy matches a parameter name against available parameter names using predefined mappings and string similarity.
+        """
+        if param_name in available_params:
+            return param_name
+
+        canonical_name = None
+        for standard_name, aliases in HyperparameterChecker.PARAMETER_NAME_MAPPING.items():
+            if param_name == standard_name or param_name in aliases:
+                canonical_name = standard_name
+                break
+
+        if canonical_name:
+            if canonical_name in available_params:
+                return canonical_name
+            for alias in HyperparameterChecker.PARAMETER_NAME_MAPPING[canonical_name]:
+                if alias in available_params:
+                    config_checking_print(f"Matched '{param_name}' to alias '{alias}' via canonical name '{canonical_name}'")
+                    return alias
+
+        best_match_name = None
+        best_match_ratio = 0.8
+        for available_param_name in available_params:
+            ratio = SequenceMatcher(None, param_name.lower(), available_param_name.lower()).ratio()
+            if ratio > best_match_ratio:
+                best_match_ratio = ratio
+                best_match_name = available_param_name
+
+        if best_match_name:
+            config_checking_print(f"Fuzzy matched parameter '{param_name}' to '{best_match_name}' (similarity: {best_match_ratio:.2f})")
+            return best_match_name
+
+        return None
+
+    def compare(bench_dir, cmp_dir, output_path):
+        bench_model_dir = os.path.join(bench_dir, HyperparameterChecker.target_name_in_zip)
+        cmp_model_dir = os.path.join(cmp_dir, HyperparameterChecker.target_name_in_zip)
+        output_filepath = os.path.join(output_path, HyperparameterChecker.result_filename)
+
+        bench_hyperparameters = {}
+        cmp_hyperparameters = {}
+
+        if os.path.exists(bench_model_dir):
+            for root, _, files in os.walk(bench_model_dir):
+                for file in files:
+                    if file.endswith('.json'):
+                        filepath = os.path.join(root, file)
+                        relative_filepath = os.path.relpath(filepath, bench_model_dir)
+                        params = load_json(filepath)
+                        if params:
+                            bench_hyperparameters[relative_filepath] = params
+
+        if os.path.exists(cmp_model_dir):
+            for root, _, files in os.walk(cmp_model_dir):
+                for file in files:
+                    if file.endswith('.json'):
+                        filepath = os.path.join(root, file)
+                        relative_filepath = os.path.relpath(filepath, cmp_model_dir)
+                        params = load_json(filepath)
+                        if params:
+                            cmp_hyperparameters[relative_filepath] = params
+
+        all_diffs = []
+        all_files = set(bench_hyperparameters.keys()) | set(cmp_hyperparameters.keys())
+
+        for filename in all_files:
+            bench_params = bench_hyperparameters.get(filename, None)
+            cmp_params = cmp_hyperparameters.get(filename, None)
+
+            if bench_params is not None and cmp_params is not None:
+                file_diffs = []
+                bench_param_names = set(bench_params.keys())
+                cmp_param_names = set(cmp_params.keys())
+
+                for bench_param_name in bench_param_names:
+                    matched_cmp_param_name = HyperparameterChecker._fuzzy_match_parameter(bench_param_name, cmp_params)
+                    if matched_cmp_param_name:
+                        bench_param_value = bench_params[bench_param_name]
+                        cmp_param_value = cmp_params[matched_cmp_param_name]
+                        if bench_param_value != cmp_param_value:
+                            diff = compare_dict({bench_param_name: bench_param_value},
+                                                {matched_cmp_param_name: cmp_param_value})
+                            if diff:
+                                file_diffs.extend([f"  Parameter '{bench_param_name}' (matched with '{matched_cmp_param_name}'): {d}" for d in diff])
+                        del cmp_params[matched_cmp_param_name]
+                    else:
+                        file_diffs.append(f"  [Only in benchmark] Parameter: '{bench_param_name}': {bench_params[bench_param_name]}")
+
+                for cmp_param_name, cmp_param_value in cmp_params.items():
+                    file_diffs.append(f"  [Only in compare] Parameter: '{cmp_param_name}': {cmp_param_value}")
+
+                if file_diffs:
+                    all_diffs.append(f"File: {filename}")
+                    all_diffs.extend(file_diffs)
+
+            elif bench_params is not None:
+                all_diffs.append(f"[Only in benchmark] File: {filename}")
+            elif cmp_params is not None:
+                all_diffs.append(f"[Only in compare] File: {filename}")
+
+        write_list_to_file(all_diffs, output_filepath)
\ No newline at end of file
-- 
Gitee


From 23339582c2bfd363e7b841f7cce6b19b7b533626 Mon Sep 17 00:00:00 2001
From: sunyiming <sym990908@126.com>
Date: Fri, 28 Feb 2025 10:45:53 +0800
Subject: [PATCH 2/2] update

---
 .../checkers/hyperparameter_checker.py        | 59 ++++++++++---------
 1 file changed, 31 insertions(+), 28 deletions(-)

diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/hyperparameter_checker.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/hyperparameter_checker.py
index a59395102..91a12ff0e 100644
--- a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/hyperparameter_checker.py
+++ b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/hyperparameter_checker.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -15,15 +15,20 @@
 
 import os
 import json
+import re
+import tempfile
+from difflib import SequenceMatcher
+
+from typing import Union, List, Dict, Any
+
 from msprobe.pytorch.config_checking.checkers.base_checker import BaseChecker
 from msprobe.pytorch.config_checking.config_checker import register_checker_item
 from msprobe.pytorch.config_checking.utils.packing import add_file_to_zip
 from msprobe.pytorch.config_checking.utils.utils import load_json, compare_dict, write_list_to_file
 from msprobe.pytorch.config_checking.utils.utils import config_checking_print
-from typing import Union, List, Dict, Any
-from difflib import SequenceMatcher
-import tempfile
-import re
+from msprobe.core.common.file_utils import os_walk_for_files
+from msprobe.pytorch.parse_tool.lib.config import Const
+from msprobe.core.common.const import FileCheckConst
 
 @register_checker_item("hyperparameter")
 class HyperparameterChecker(BaseChecker):
@@ -51,7 +56,8 @@ class HyperparameterChecker(BaseChecker):
             if os.path.isfile(script_path):
                 hyperparameters = HyperparameterChecker._extract_hyperparameters_from_script(script_path)
                 if hyperparameters:
-                    dest_path_in_zip = os.path.join(HyperparameterChecker.target_name_in_zip, os.path.splitext(os.path.basename(script_path))[0] + ".json")
+                    dest_path_in_zip = FileChecker(os.path.join(HyperparameterChecker.target_name_in_zip, os.path.splitext(os.path.basename(script_path))[0] + ".json"), FileCheckConst.FILE,
+                              FileCheckConst.READ_ABLE).common_check()
                     with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tmp_file:
                         json.dump(hyperparameters, tmp_file, indent=4)
                         tmp_file_path = tmp_file.name
@@ -130,33 +136,30 @@ class HyperparameterChecker(BaseChecker):
 
         return None
 
-    def compare(bench_dir, cmp_dir, output_path):
-        bench_model_dir = os.path.join(bench_dir, HyperparameterChecker.target_name_in_zip)
-        cmp_model_dir = os.path.join(cmp_dir, HyperparameterChecker.target_name_in_zip)
-        output_filepath = os.path.join(output_path, HyperparameterChecker.result_filename)
-
-        bench_hyperparameters = {}
-        cmp_hyperparameters = {}
-
-        if os.path.exists(bench_model_dir):
-            for root, _, files in os.walk(bench_model_dir):
+    def load_hyperparameters(model_dir):
+        hyperparameters = {}
+        if os.path.exists(model_dir):
+            subfiles = os_walk_for_files(model_dir, Const.MAX_TRAVERSAL_DEPTH)
+            for root, _, files in subfiles:
                 for file in files:
                     if file.endswith('.json'):
-                        filepath = os.path.join(root, file)
-                        relative_filepath = os.path.relpath(filepath, bench_model_dir)
+                        filepath = FileChecker(os.path.join(root, file), FileCheckConst.FILE,
+                              FileCheckConst.READ_ABLE).common_check()
+                        relative_filepath = os.path.relpath(filepath, model_dir)
                         params = load_json(filepath)
                         if params:
-                            bench_hyperparameters[relative_filepath] = params
+                            hyperparameters[relative_filepath] = params
+        return hyperparameters
 
-        if os.path.exists(cmp_model_dir):
-            for root, _, files in os.walk(cmp_model_dir):
-                for file in files:
-                    if file.endswith('.json'):
-                        filepath = os.path.join(root, file)
-                        relative_filepath = os.path.relpath(filepath, cmp_model_dir)
-                        params = load_json(filepath)
-                        if params:
-                            cmp_hyperparameters[relative_filepath] = params
+    def compare(bench_dir, cmp_dir, output_path):
+        bench_model_dir = FileChecker(os.path.join(bench_dir, HyperparameterChecker.target_name_in_zip), FileCheckConst.FILE,
+                              FileCheckConst.READ_ABLE).common_check()
+        cmp_model_dir = FileChecker(os.path.join(cmp_dir, HyperparameterChecker.target_name_in_zip), FileCheckConst.FILE,
+                              FileCheckConst.READ_ABLE).common_check()
+        output_filepath = FileChecker(os.path.join(output_path, HyperparameterChecker.result_filename), FileCheckConst.FILE,
+                              FileCheckConst.READ_ABLE).common_check()
+        bench_hyperparameters = load_hyperparameters(bench_model_dir)
+        cmp_hyperparameters = load_hyperparameters(cmp_model_dir)
 
         all_diffs = []
         all_files = set(bench_hyperparameters.keys()) | set(cmp_hyperparameters.keys())
-- 
Gitee