diff --git a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/README.md b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/README.md index ad6fa55819c430d0984848e39e52bea1a55ce529..9f487365fde4cce6996c68318d26a484a80e89a3 100644 --- a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/README.md +++ b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/README.md @@ -107,13 +107,20 @@ fairseq-preprocess \ --workers 70 ``` - +# 性能测试 +```单卡训练流程: +1.安装环境 +2.bash ./test/train_performance_1p.sh --data_path=/train_data/en_ro(数据集路径为当前磁盘路径,写到en_ro) +``` +```多卡训练流程: +1.安装环境 +2.bash ./test/train_performance_8p.sh --data_path=/train_data/en_ro(数据集路径为当前磁盘路径,写到en_ro) +``` # 在数据集上进行fine-tune - ```bash -1. 修改./test/train_full_8p.sh中PRETRAIN为模型的路径,DATA_PATH为数据集的路径(train_data/en_ro 或train_data/en_de) +1. 修改./test/train_full_8p.sh中DATA_PATH为数据集的路径(train_data/en_ro 或train_data/en_de),PRETRAIN为model.pt的路径,BPE_PATH为sentence.bpe.model路径; [若需要训练en_de数据集,则需要将train_full_8p.sh中dropout的参数设置为0.1,total-num-update与max-update设置为300000,target-lang设置为de_DE] -2. 执行 bash ./test/train_full_8p.sh +2. 执行 bash ./test/train_full_8p.sh --data_path=DATA_PATH ``` # 在数据集上进行评估 @@ -129,11 +136,11 @@ pip3.7 install sacrebleu==1.5.1 ```bash 验证en_ro精度 -1. 修改generate_on_en_ro.sh中DATA_PATH为数据集的路径,BPE_PATH为sentence.bpe.model的路径,SCRIPTS为mosesdecoder/scripts的路径,WMT16_SCRIPTS为wmt16-scripts的路径 -2. 执行 bash ./test/generate_on_en_ro.sh checkpoints/checkpoint_best.pt 验证en_ro的训练精度 +1. 修改 generate_on_en_ro.sh中DATA_PATH为数据集的路径,BPE_PATH为sentence.bpe.model的路径,SCRIPTS为mosesdecoder/scripts的路径,WMT16_SCRIPTS为wmt16-scripts的路径 +2. 执行 bash generate_on_en_ro.sh checkpoints/checkpoint_best.pt 验证en_ro的训练精度 验证en_de精度 -1. 修改generate_on_en_de.sh中DATA_PATH为数据集的路径,BPE_PATH为sentence.bpe.model的路径,DETOKENIZER为mosesdecoder/scripts/tokenizer/detokenizer.perl的路径 -2. 执行 bash ./test/generate_on_en_de.sh checkpoints/checkpoint_best.pt 验证en_de的训练精度 +1. 修改 generate_on_en_de.sh中DATA_PATH为数据集的路径,BPE_PATH为sentence.bpe.model的路径,DETOKENIZER为mosesdecoder/scripts/tokenizer/detokenizer.perl的路径 +2. 执行 bash generate_on_en_de.sh checkpoints/checkpoint_best.pt 验证en_de的训练精度 ``` # Docker容器训练 diff --git a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/generate_on_en_de.sh b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/generate_on_en_de.sh similarity index 97% rename from PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/generate_on_en_de.sh rename to PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/generate_on_en_de.sh index 7e13b087392c54d9996f155cc1f409c63298ba0d..6b4547e41a0f4c7c573d6956778a3177a8472ded 100644 --- a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/generate_on_en_de.sh +++ b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/generate_on_en_de.sh @@ -1,4 +1,4 @@ -source env.sh +source ./test/env_npu.sh DATA_PATH=path_of_data # fix it to your own train data path BPE_PATH=/path/sentence.bpe.model # fix it to your own sentence.bpe.model path langs=ar_AR,cs_CZ,de_DE,en_XX,es_XX,et_EE,fi_FI,fr_XX,gu_IN,hi_IN,it_IT,ja_XX,kk_KZ,ko_KR,lt_LT,lv_LV,my_MM,ne_NP,nl_XX,ro_RO,ru_RU,si_LK,tr_TR,vi_VN,zh_CN diff --git a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/generate_on_en_ro.sh b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/generate_on_en_ro.sh similarity index 94% rename from PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/generate_on_en_ro.sh rename to PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/generate_on_en_ro.sh index 18e527b1086180eb5d05a78bcbc9353017d2a811..e9efd429e9dbcd63b935eb3ee64c9cf4067e99af 100644 --- a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/generate_on_en_ro.sh +++ b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/generate_on_en_ro.sh @@ -1,4 +1,4 @@ -source env.sh +source ./test/env_npu.sh DATA_PATH=path_of_data # fix it to your own train data path BPE_PATH=/path/sentence.bpe.model # fix it to your own sentence.bpe.model path SCRIPTS=mosesdecoder/scripts # fix it to your own mosesdecoder path @@ -34,8 +34,8 @@ for f in $HYP $REF perl $REPLACE_UNICODE_PUNCT | \ perl $NORM_PUNC -l ro | \ perl $REM_NON_PRINT_CHAR | \ - python3 $NORMALIZE_ROMANIAN | \ - python3 $REMOVE_DIACRITICS | \ + python3.7 $NORMALIZE_ROMANIAN | \ + python3.7 $REMOVE_DIACRITICS | \ perl $TOKENIZER -no-escape -threads 16 -a -l ro >"en_ro."$f done sacrebleu -tok 'none' -s 'none' en_ro.ref < en_ro.hyp \ No newline at end of file diff --git a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_full_8p.sh b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_full_8p.sh index eb9b5ffa21a018997992a7bfdc9390cd16177e12..cc9262dcc63a704f2e48ac5a17d093c846bf68f1 100644 --- a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_full_8p.sh +++ b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_full_8p.sh @@ -11,6 +11,16 @@ export RANK_SIZE=8 Network="mBART_for_PyTorch" #训练batch_size token_size=1024 +# 数据集路径,保持为空,不需要修改 +data_path="" + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then @@ -18,7 +28,8 @@ if [[ $data_path == "" ]];then exit 1 fi -# cd到与test文件同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 cur_path=`pwd` cur_path_last_dirname=${cur_path##*/} if [ x"${cur_path_last_dirname}" == x"test" ]; then @@ -51,7 +62,7 @@ if [ x"${etp_flag}" != x"true" ];then fi # 将对应的数据以及模型等放到对应路径 或 修改以下路径以适应本地训练 -DATA_PATH=train_data/en_ro +DATA_PATH=train_data/en_ro PRETRAIN=mbart.cc25/model.pt BPE_PATH=mbart.cc25/sentence.bpe.model model_dir=checkpoints/checkpoint_best.pt @@ -84,7 +95,7 @@ do then let a=0+RANK_ID*24 let b=23+RANK_ID*24 - taskset -c $a-$b fairseq-train $DATA_PATH --fp16 --distributed-world-size 8 --npu \ + nohup taskset -c $a-$b fairseq-train $DATA_PATH --fp16 --distributed-world-size 8 --npu \ --device-id $RANK_ID --distributed-rank $RANK_ID --distributed-no-spawn --max-update 40000 \ --encoder-normalize-before --decoder-normalize-before \ --arch mbart_large --layernorm-embedding \ @@ -100,9 +111,9 @@ do --restore-file $PRETRAIN \ --reset-optimizer --reset-meters --reset-dataloader --reset-lr-scheduler \ --langs $langs \ - --ddp-backend no_c10d > ${test_path_dir}/output/${RANK_ID}/train_${RANK_ID}.log 2>&1 & + --ddp-backend no_c10d & > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & else - fairseq-train $DATA_PATH --fp16 --distributed-world-size 8 --npu \ + nohup fairseq-train $DATA_PATH --fp16 --distributed-world-size 8 --npu \ --device-id $RANK_ID --distributed-rank $RANK_ID --distributed-no-spawn --max-update 40000 \ --encoder-normalize-before --decoder-normalize-before \ --arch mbart_large --layernorm-embedding \ @@ -118,7 +129,7 @@ do --restore-file $PRETRAIN \ --reset-optimizer --reset-meters --reset-dataloader --reset-lr-scheduler \ --langs $langs \ - --ddp-backend no_c10d > ${test_path_dir}/output/${RANK_ID}/train_${RANK_ID}.log 2>&1 & + --ddp-backend no_c10d > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & fi done wait @@ -127,7 +138,7 @@ end_time=$(date +%s) e2e_time=$(( $end_time - $start_time )) -fairseq-generate $DATA_PATH \ +nohup fairseq-generate $DATA_PATH \ --fp16 --path $model_dir --max-tokens 4096 \ --task translation_from_pretrained_bart \ --gen-subset test \ @@ -146,15 +157,15 @@ for f in $HYP $REF perl $REPLACE_UNICODE_PUNCT | \ perl $NORM_PUNC -l ro | \ perl $REM_NON_PRINT_CHAR | \ - python3 $NORMALIZE_ROMANIAN | \ - python3 $REMOVE_DIACRITICS | \ + python3.7 $NORMALIZE_ROMANIAN | \ + python3.7 $REMOVE_DIACRITICS | \ perl $TOKENIZER -no-escape -threads 16 -a -l ro >"en_ro."$f done sacrebleu -tok 'none' -s 'none' en_ro.ref < en_ro.hyp > res.log wait ASCEND_DEVICE_ID=0 - +cp ${cur_path}/nohup.out ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log #结果打印,不需要修改 echo "------------------ Final result ------------------" #输出性能FPS,需要模型审视修改 diff --git a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_1p.sh index 70974d82c25d5b4a2613bee7bcd4534f9c90ac5f..d9b7154ca9241f900e56eb02fb9f838b4ef7ad8c 100644 --- a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_1p.sh +++ b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_1p.sh @@ -94,7 +94,7 @@ wait pip3.7 install --editable ./ start=$(date +%s) -nohup python3.7 ${cur_path}/train.py $data_path/en_ro/ \ +nohup python3.7 ${cur_path}/train.py $data_path/ \ --distributed-world-size 1 --npu --npu-id $ASCEND_DEVICE_ID --fp16 --encoder-normalize-before --decoder-normalize-before \ --arch mbart_large --layernorm-embedding \ --task translation_from_pretrained_bart \ diff --git a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_8p.sh b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_8p.sh index c4bdd78c247b5c7d5902d1b8e67cb5b44d7d4c89..b9de8aada6219a23c48b1c25937c22afc159dc78 100644 --- a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_8p.sh @@ -97,7 +97,7 @@ do then let a=0+RANK_ID*24 let b=23+RANK_ID*24 - taskset -c $a-$b nohup python3.7 ${cur_path}/train.py $data_path/en_ro/ --fp16 --distributed-world-size 8 --npu \ + taskset -c $a-$b nohup python3.7 ${cur_path}/train.py $data_path/ --fp16 --distributed-world-size 8 --npu \ --device-id $RANK_ID --distributed-rank $RANK_ID --distributed-no-spawn --max-update 50 \ --encoder-normalize-before --decoder-normalize-before \ --arch mbart_large --layernorm-embedding \ @@ -116,7 +116,7 @@ do --langs $langs \ --ddp-backend no_c10d > ${test_path_dir}/output/${RANK_ID}/train_${RANK_ID}.log 2>&1 & else - nohup python3.7 ${cur_path}/train.py $data_path/en_ro/ --fp16 --distributed-world-size 8 --npu \ + nohup python3.7 ${cur_path}/train.py $data_path/ --fp16 --distributed-world-size 8 --npu \ --device-id $RANK_ID --distributed-rank $RANK_ID --distributed-no-spawn --max-update 50 \ --encoder-normalize-before --decoder-normalize-before \ --arch mbart_large --layernorm-embedding \