From 10bb23e510c76099dc4eed28b643d6eed77f0275 Mon Sep 17 00:00:00 2001 From: Kha-Zix-1 Date: Mon, 25 Aug 2025 00:57:37 +0000 Subject: [PATCH 1/4] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20eval=5Fbased=5Fon=5Fmi?= =?UTF-8?q?ndformers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../docs/source_zh_cn/example/eval_based_on_mindformers/.keep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 docs/mindformers/docs/source_zh_cn/example/eval_based_on_mindformers/.keep diff --git a/docs/mindformers/docs/source_zh_cn/example/eval_based_on_mindformers/.keep b/docs/mindformers/docs/source_zh_cn/example/eval_based_on_mindformers/.keep new file mode 100644 index 0000000000..e69de29bb2 -- Gitee From 448cff6725ca5a57e0379ad282bd2b5a4853af78 Mon Sep 17 00:00:00 2001 From: Kha-Zix-1 Date: Mon, 25 Aug 2025 00:58:51 +0000 Subject: [PATCH 2/4] =?UTF-8?q?=E5=A6=82=E4=BD=95=E8=AF=84=E6=B5=8Bmindfor?= =?UTF-8?q?mers=E8=AE=AD=E7=BB=83=E5=87=BA=E7=9A=84=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kha-Zix-1 --- .../eval_based_on_mindformers.md | 245 ++++++++++++++++++ 1 file changed, 245 insertions(+) create mode 100644 docs/mindformers/docs/source_zh_cn/example/eval_based_on_mindformers/eval_based_on_mindformers.md diff --git a/docs/mindformers/docs/source_zh_cn/example/eval_based_on_mindformers/eval_based_on_mindformers.md b/docs/mindformers/docs/source_zh_cn/example/eval_based_on_mindformers/eval_based_on_mindformers.md new file mode 100644 index 0000000000..72875eec64 --- /dev/null +++ b/docs/mindformers/docs/source_zh_cn/example/eval_based_on_mindformers/eval_based_on_mindformers.md @@ -0,0 +1,245 @@ +本文介绍了mindformers框架训练后模型的评测方法,分别使用lm-eval和opencompass进行了评测 + +# 1. lm-eval +lm-eval是一个大型综合评测框架,适用于众多通用领域测试集(MMLU、CEVAL等),同时支持方便的自定义数据测试。lm-eval适合评测客观题,且对base模型支持较好,故本教程中使用该框架对预训练获得的base模型进行评测。 + +## 1.1 安装lm-eval及其依赖 +**注意:极其推荐单开一个conda环境,python≥3.10,以避免某些兼容问题** + +需要注意需要采用本地安装的方法,不要直接`pip install lm-eval` + +``` +git clone https://github.com/EleutherAI/lm-evaluation-harness.git +cd lm-evaluation-harness +pip install -e . +``` + +如果遇到Error: Please make sure the libxml2 and libxslt development packages are installed +使用如下命令进行安装: +```conda install -c conda-forge libxml2 libxslt``` + +## 1.2 设置NPU执行环境 + +打开 `/lm-evaluation-harness/lm_eval/evaluator.py` + +在最上方添加两行 + +``` +import torch_npu +from torch_npu.contrib import transfer_to_npu +``` + +这样就可以在NPU环境下启动lm-eval + +补充:torch_npu的安装: +```pip3 install torch-npu==2.1.0.post8``` + +## 1.3 处理数据集 +注:该步为自定义数据集所需步骤,测试通用测试集直接下载即可。 + +本例中使用的专业测试数据集是主观题,因此需要进行转换,我们使用大模型Qwen32B来生成选择题。 + +生成的选择题文件是 `output_filtered.csv` + +进一步执行如下代码 + +``` python +import pandas as pd +from datasets import Dataset, DatasetDict +import os + +def convert_csv_to_parquet_dataset(csv_path, output_dir): + """ + 将无表头的CSV文件转换为Parquet格式数据集,并明确指定为validation split + + 参数: + csv_path: 输入的CSV文件路径(无表头,列顺序为:问题,A,B,C,D,答案) + output_dir: 输出目录(将保存为Hugging Face数据集格式) + """ + # 1. 读取CSV文件(无表头) + print(f"正在读取CSV文件: {csv_path}") + df = pd.read_csv(csv_path, header=None) + + # 2. 添加规范的列名 + df.columns = ["question", "A", "B", "C", "D", "answer"] + print(f"找到 {len(df)} 条数据") + + # 3. 转换为Hugging Face Dataset格式 + dataset = Dataset.from_pandas(df) + + # 4. 创建DatasetDict并指定为validation split + dataset_dict = DatasetDict({"validation": dataset}) + + # 5. 创建输出目录 + os.makedirs(output_dir, exist_ok=True) + + # 6. 保存完整数据集(Hugging Face格式) + print(f"正在保存数据集到: {output_dir}") + dataset_dict.save_to_disk(output_dir) + + # 7. 单独保存validation split为Parquet文件(可选) + validation_parquet_path = os.path.join(output_dir, "validation.parquet") + dataset_dict["validation"].to_parquet(validation_parquet_path) + print(f"单独保存的Parquet文件: {validation_parquet_path}") + + return dataset_dict + +# 使用示例 +if __name__ == "__main__": + # 输入输出配置 + input_csv = "output_filtered.csv" # 替换为你的CSV文件路径 + output_dir = "YOUR_OUTPUT_PATH" # 输出目录 + + # 执行转换 + dataset = convert_csv_to_parquet_dataset(input_csv, output_dir) + + # 打印验证信息 + print("\n转换结果验证:") + print(f"数据集结构: {dataset}") + print(f"Validation split样本数: {len(dataset['validation'])}") + print(f"首条数据示例: {dataset['validation'][0]}") +``` +这样可以把csv文件转换为huggingface的数据集形式 + +## 1.4 创建数据集配置文件(自定义数据集需要) + +在 `/lm-evaluation-harness/lm_eval/tasks` 下创建一个文件夹,命名为 `YOUR_DATASET_NAME`, 在这个文件夹下创建一个 `YOUR_DATASET_NAME.yaml`,内容为 + +``` +task: YOUR_DATASET_NAME +dataset_path: YOUR_DATASET_PATH_FOLDER +test_split: validation +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 +``` + +更多创建方式可以参考tasks文件夹下的cmmlu的yaml构建 + +## 1.5 测试精度 + +``` +lm_eval --model hf --model_args pretrained=path/to/your/model --tasks YOUR_DATASET_NAME --batch_size 1 --output_path path/to/save/output --log_samples +``` + +**注:需要在lm_eval根目录下运行** + +测试训练后的模型的额外步骤: +1. 运行命令需添加--trust_remote_code +2. mg2hf不能直接使用,需要把训练前仓库中的```tokenization_chatglm.py```、```modeling_chatglm.py```、```tokenizer_config.json```、 ```tokenizer.model```复制进去 + +--- +bug记录: + +jinja2.exceptions.UndefinedError: 'question' is undefined +原因:datasets版本不兼容 +解决方案: +``` +pip uninstall -y datasets +pip install datasets==2.18.0 +``` + +--- +_pad() got an unexpected keyword argument 'padding_side' +解决办法: +修改.../python3.8/site-packages/transformers/tokenization_utils_base.py的3505行 +在前面添加如下代码: + if padding_strategy == PaddingStrategy.DO_NOT_PAD: + encoded_inputs = self._pad( + encoded_inputs, + max_length=max_length, + padding_strategy=padding_strategy, + pad_to_multiple_of=pad_to_multiple_of, + return_attention_mask=return_attention_mask, + ) + return BatchEncoding(encoded_inputs, tensor_type=return_tensors) + +# 2. opencompass + +## 2.1 安装opencompass +pip install -U opencompass +可能会遇到报错AttributeError: module 'inspect' has no attribute 'getargspec'. Did you mean: 'getargs'? +解决方法: +改为从源码安装,将requirements/runtime.txt中的pyext和rouge删掉 + +## 2.2 准备数据集 +下载数据集并解压 +``` +wget https://github.com/open-compass/opencompass/releases/download/0.2.2.rc1/OpenCompassData-core-20240207.zip +unzip OpenCompassData-core-20240207.zip +``` + +## 2.3 本地使用vllm部署模型 +使用quay.io/ascend/vllm-ascend docker镜像启动容器 +部署模型: +python -m vllm.entrypoints.api_server \ + --model /data/model/glm-4-9b-chat \ + --tensor-parallel-size 4 \ + --max-model-len 8192 \ + --dtype float16 \ + --trust-remote-code \ + --port 8000 \ + --compilation-config '{"custom_ops":["+rms_norm", "+rotary_embedding"]}' + +## 2.4 设置config文件和运行脚本 +模型的config文件设置参考[text](https://opencompass.readthedocs.io/zh-cn/latest/advanced_guides/accelerator_intro.html#api) + +修改path为部署模型的名字,修改openai_api_base为部署模型的url,配置模型的tokenizer_path,batch_size可以适当调大来加速。 + +数据集一般不用自己配置,参考[text](https://opencompass.readthedocs.io/zh-cn/latest/advanced_guides/accelerator_intro.html#api)获得推荐配置,或是在每个数据集的config路径下查找合适的配置。例如bbh(big bench hard)数据集,在```opencompass/opencompass/configs/datasets/bbh/```下有```bbh_gen_ee62e9.py```、```bbh_0shot_nocot_academic_gen.py```等, 分别是zero-shot和five-shot的配置,根据需要自由选择。 + +运行脚本参考.../examples/eval_api_demo.py, 导入需要评测的模型配置和需要测试的数据集即可 + +报错: +``` +Traceback (most recent call last): + File "/root/miniconda3/envs/fuck_mindformers/bin/opencompass", line 33, in + sys.exit(load_entry_point('opencompass', 'console_scripts', 'opencompass')()) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/data/cxl/workspace/opencompass/opencompass/cli/main.py", line 259, in main + cfg = get_config_from_arg(args) + ^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/data/cxl/workspace/opencompass/opencompass/utils/run.py", line 97, in get_config_from_arg + config = Config.fromfile(args.config, format_python_code=False) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/root/miniconda3/envs/fuck_mindformers/lib/python3.11/site-packages/mmengine/config/config.py", line 494, in fromfile + raise e + File "/root/miniconda3/envs/fuck_mindformers/lib/python3.11/site-packages/mmengine/config/config.py", line 492, in fromfile + cfg_dict, imported_names = Config._parse_lazy_import(filename) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/root/miniconda3/envs/fuck_mindformers/lib/python3.11/site-packages/mmengine/config/config.py", line 1081, in _parse_lazy_import + _base_cfg_dict, _base_imported_names = Config._parse_lazy_import( # noqa: E501 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/root/miniconda3/envs/fuck_mindformers/lib/python3.11/site-packages/mmengine/config/config.py", line 1109, in _parse_lazy_import + exec( + File "/data/cxl/workspace/opencompass/opencompass/configs/datasets/bbh/bbh_gen_ee62e9.py", line 50, in + with open(os.path.join(hard_coded_path, 'lib_prompt', f'{_name}.txt'), 'r') as f: + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/root/miniconda3/envs/fuck_mindformers/lib/python3.11/site-packages/mmengine/config/lazy.py", line 205, in __call__ + raise RuntimeError() +RuntimeError +``` +原因:未知 +解决方法:把```with open(os.path.join(hard_coded_path, 'lib_prompt', f'{_name}.txt'), 'r') as f:```中的地址硬编码为如下: +``` +hard_coded_path = '/data/cxl/workspace/opencompass/opencompass/configs/datasets/bbh' \ + + '/lib_prompt/' \ + + f'{_name}.txt' +``` + +## 2.5 启动评测 +最快速的启动仅需要```opencompass /path/to/your/scripts``` + +如果需要额外的参数设置,参考[text](https://opencompass.readthedocs.io/zh-cn/latest/user_guides/experimentation.html) + +常用的配置有-r 断点续测,-w设置输出目录等 -- Gitee From 6f015c713601c6b923f30f118a71c5e4822290b6 Mon Sep 17 00:00:00 2001 From: Kha-Zix-1 Date: Mon, 25 Aug 2025 00:58:57 +0000 Subject: [PATCH 3/4] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20docs?= =?UTF-8?q?/mindformers/docs/source=5Fzh=5Fcn/example/eval=5Fbased=5Fon=5F?= =?UTF-8?q?mindformers/.keep?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../docs/source_zh_cn/example/eval_based_on_mindformers/.keep | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 docs/mindformers/docs/source_zh_cn/example/eval_based_on_mindformers/.keep diff --git a/docs/mindformers/docs/source_zh_cn/example/eval_based_on_mindformers/.keep b/docs/mindformers/docs/source_zh_cn/example/eval_based_on_mindformers/.keep deleted file mode 100644 index e69de29bb2..0000000000 -- Gitee From 58731ba6e94436bf917ef979b91580600fcf589e Mon Sep 17 00:00:00 2001 From: Kha-Zix-1 Date: Mon, 25 Aug 2025 06:45:08 +0000 Subject: [PATCH 4/4] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BF=A1=E6=81=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kha-Zix-1 --- .../eval_based_on_mindformers.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/mindformers/docs/source_zh_cn/example/eval_based_on_mindformers/eval_based_on_mindformers.md b/docs/mindformers/docs/source_zh_cn/example/eval_based_on_mindformers/eval_based_on_mindformers.md index 72875eec64..4e301dbd4a 100644 --- a/docs/mindformers/docs/source_zh_cn/example/eval_based_on_mindformers/eval_based_on_mindformers.md +++ b/docs/mindformers/docs/source_zh_cn/example/eval_based_on_mindformers/eval_based_on_mindformers.md @@ -99,6 +99,7 @@ if __name__ == "__main__": print(f"Validation split样本数: {len(dataset['validation'])}") print(f"首条数据示例: {dataset['validation'][0]}") ``` + 这样可以把csv文件转换为huggingface的数据集形式 ## 1.4 创建数据集配置文件(自定义数据集需要) @@ -144,13 +145,14 @@ bug记录: jinja2.exceptions.UndefinedError: 'question' is undefined 原因:datasets版本不兼容 解决方案: + ``` pip uninstall -y datasets pip install datasets==2.18.0 ``` --- -_pad() got an unexpected keyword argument 'padding_side' +_pad() got an unexpected keyword argument 'padding_side' 解决办法: 修改.../python3.8/site-packages/transformers/tokenization_utils_base.py的3505行 在前面添加如下代码: @@ -168,12 +170,13 @@ _pad() got an unexpected keyword argument 'padding_side' ## 2.1 安装opencompass pip install -U opencompass -可能会遇到报错AttributeError: module 'inspect' has no attribute 'getargspec'. Did you mean: 'getargs'? +可能会遇到报错AttributeError: module 'inspect' has no attribute 'getargspec'. Did you mean: 'getargs'? 解决方法: 改为从源码安装,将requirements/runtime.txt中的pyext和rouge删掉 ## 2.2 准备数据集 下载数据集并解压 + ``` wget https://github.com/open-compass/opencompass/releases/download/0.2.2.rc1/OpenCompassData-core-20240207.zip unzip OpenCompassData-core-20240207.zip @@ -201,6 +204,7 @@ python -m vllm.entrypoints.api_server \ 运行脚本参考.../examples/eval_api_demo.py, 导入需要评测的模型配置和需要测试的数据集即可 报错: + ``` Traceback (most recent call last): File "/root/miniconda3/envs/fuck_mindformers/bin/opencompass", line 33, in @@ -229,8 +233,10 @@ Traceback (most recent call last): raise RuntimeError() RuntimeError ``` + 原因:未知 解决方法:把```with open(os.path.join(hard_coded_path, 'lib_prompt', f'{_name}.txt'), 'r') as f:```中的地址硬编码为如下: + ``` hard_coded_path = '/data/cxl/workspace/opencompass/opencompass/configs/datasets/bbh' \ + '/lib_prompt/' \ -- Gitee