diff --git a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/README.md b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/README.md index ad6fa55819c430d0984848e39e52bea1a55ce529..f3f51d03992f757c535abc16d024d5f9eb5b7608 100644 --- a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/README.md +++ b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/README.md @@ -111,7 +111,7 @@ fairseq-preprocess \ # 在数据集上进行fine-tune ```bash -1. 修改./test/train_full_8p.sh中PRETRAIN为模型的路径,DATA_PATH为数据集的路径(train_data/en_ro 或train_data/en_de) +1. 修改./test/train_full_8p.sh中DATA_PATH为数据集的路径(train_data/en_ro 或train_data/en_de),PRETRAIN为model.pt的路径,BPE_PATH为sentence.bpe.model路径; [若需要训练en_de数据集,则需要将train_full_8p.sh中dropout的参数设置为0.1,total-num-update与max-update设置为300000,target-lang设置为de_DE] 2. 执行 bash ./test/train_full_8p.sh ``` @@ -129,10 +129,10 @@ pip3.7 install sacrebleu==1.5.1 ```bash 验证en_ro精度 -1. 修改generate_on_en_ro.sh中DATA_PATH为数据集的路径,BPE_PATH为sentence.bpe.model的路径,SCRIPTS为mosesdecoder/scripts的路径,WMT16_SCRIPTS为wmt16-scripts的路径 +1. 修改./test/generate_on_en_ro.sh中DATA_PATH为数据集的路径,BPE_PATH为sentence.bpe.model的路径,SCRIPTS为mosesdecoder/scripts的路径,WMT16_SCRIPTS为wmt16-scripts的路径 2. 执行 bash ./test/generate_on_en_ro.sh checkpoints/checkpoint_best.pt 验证en_ro的训练精度 验证en_de精度 -1. 修改generate_on_en_de.sh中DATA_PATH为数据集的路径,BPE_PATH为sentence.bpe.model的路径,DETOKENIZER为mosesdecoder/scripts/tokenizer/detokenizer.perl的路径 +1. 修改./test/generate_on_en_de.sh中DATA_PATH为数据集的路径,BPE_PATH为sentence.bpe.model的路径,DETOKENIZER为mosesdecoder/scripts/tokenizer/detokenizer.perl的路径 2. 执行 bash ./test/generate_on_en_de.sh checkpoints/checkpoint_best.pt 验证en_de的训练精度 ``` diff --git a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/generate_on_en_de.sh b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/generate_on_en_de.sh similarity index 97% rename from PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/generate_on_en_de.sh rename to PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/generate_on_en_de.sh index 7e13b087392c54d9996f155cc1f409c63298ba0d..6b4547e41a0f4c7c573d6956778a3177a8472ded 100644 --- a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/generate_on_en_de.sh +++ b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/generate_on_en_de.sh @@ -1,4 +1,4 @@ -source env.sh +source ./test/env_npu.sh DATA_PATH=path_of_data # fix it to your own train data path BPE_PATH=/path/sentence.bpe.model # fix it to your own sentence.bpe.model path langs=ar_AR,cs_CZ,de_DE,en_XX,es_XX,et_EE,fi_FI,fr_XX,gu_IN,hi_IN,it_IT,ja_XX,kk_KZ,ko_KR,lt_LT,lv_LV,my_MM,ne_NP,nl_XX,ro_RO,ru_RU,si_LK,tr_TR,vi_VN,zh_CN diff --git a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/generate_on_en_ro.sh b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/generate_on_en_ro.sh similarity index 94% rename from PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/generate_on_en_ro.sh rename to PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/generate_on_en_ro.sh index 18e527b1086180eb5d05a78bcbc9353017d2a811..5158be82eb3a589bbaec4e0b1babdfc1c20b641d 100644 --- a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/generate_on_en_ro.sh +++ b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/generate_on_en_ro.sh @@ -1,4 +1,4 @@ -source env.sh +source ./test/env_npu.sh DATA_PATH=path_of_data # fix it to your own train data path BPE_PATH=/path/sentence.bpe.model # fix it to your own sentence.bpe.model path SCRIPTS=mosesdecoder/scripts # fix it to your own mosesdecoder path @@ -34,8 +34,8 @@ for f in $HYP $REF perl $REPLACE_UNICODE_PUNCT | \ perl $NORM_PUNC -l ro | \ perl $REM_NON_PRINT_CHAR | \ - python3 $NORMALIZE_ROMANIAN | \ - python3 $REMOVE_DIACRITICS | \ + nohup python3.7 $NORMALIZE_ROMANIAN | \ + nohup python3.7 $REMOVE_DIACRITICS | \ perl $TOKENIZER -no-escape -threads 16 -a -l ro >"en_ro."$f done sacrebleu -tok 'none' -s 'none' en_ro.ref < en_ro.hyp \ No newline at end of file diff --git a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_full_8p.sh b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_full_8p.sh index eb9b5ffa21a018997992a7bfdc9390cd16177e12..b08515bbb24bcbdf023d8f0739aa0ba6170affba 100644 --- a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_full_8p.sh +++ b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_full_8p.sh @@ -11,6 +11,16 @@ export RANK_SIZE=8 Network="mBART_for_PyTorch" #训练batch_size token_size=1024 +# 数据集路径,保持为空,不需要修改 +data_path="" + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then @@ -18,7 +28,8 @@ if [[ $data_path == "" ]];then exit 1 fi -# cd到与test文件同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 cur_path=`pwd` cur_path_last_dirname=${cur_path##*/} if [ x"${cur_path_last_dirname}" == x"test" ]; then @@ -51,7 +62,7 @@ if [ x"${etp_flag}" != x"true" ];then fi # 将对应的数据以及模型等放到对应路径 或 修改以下路径以适应本地训练 -DATA_PATH=train_data/en_ro +DATA_PATH=train_data/en_ro PRETRAIN=mbart.cc25/model.pt BPE_PATH=mbart.cc25/sentence.bpe.model model_dir=checkpoints/checkpoint_best.pt @@ -84,7 +95,7 @@ do then let a=0+RANK_ID*24 let b=23+RANK_ID*24 - taskset -c $a-$b fairseq-train $DATA_PATH --fp16 --distributed-world-size 8 --npu \ + nohup taskset -c $a-$b fairseq-train $DATA_PATH --fp16 --distributed-world-size 8 --npu \ --device-id $RANK_ID --distributed-rank $RANK_ID --distributed-no-spawn --max-update 40000 \ --encoder-normalize-before --decoder-normalize-before \ --arch mbart_large --layernorm-embedding \ @@ -100,9 +111,9 @@ do --restore-file $PRETRAIN \ --reset-optimizer --reset-meters --reset-dataloader --reset-lr-scheduler \ --langs $langs \ - --ddp-backend no_c10d > ${test_path_dir}/output/${RANK_ID}/train_${RANK_ID}.log 2>&1 & + --ddp-backend no_c10d & > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & else - fairseq-train $DATA_PATH --fp16 --distributed-world-size 8 --npu \ + nohup fairseq-train $DATA_PATH --fp16 --distributed-world-size 8 --npu \ --device-id $RANK_ID --distributed-rank $RANK_ID --distributed-no-spawn --max-update 40000 \ --encoder-normalize-before --decoder-normalize-before \ --arch mbart_large --layernorm-embedding \ @@ -118,7 +129,7 @@ do --restore-file $PRETRAIN \ --reset-optimizer --reset-meters --reset-dataloader --reset-lr-scheduler \ --langs $langs \ - --ddp-backend no_c10d > ${test_path_dir}/output/${RANK_ID}/train_${RANK_ID}.log 2>&1 & + --ddp-backend no_c10d > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & fi done wait @@ -146,15 +157,15 @@ for f in $HYP $REF perl $REPLACE_UNICODE_PUNCT | \ perl $NORM_PUNC -l ro | \ perl $REM_NON_PRINT_CHAR | \ - python3 $NORMALIZE_ROMANIAN | \ - python3 $REMOVE_DIACRITICS | \ + nohup python3.7 $NORMALIZE_ROMANIAN | \ + nohup python3.7 $REMOVE_DIACRITICS | \ perl $TOKENIZER -no-escape -threads 16 -a -l ro >"en_ro."$f done sacrebleu -tok 'none' -s 'none' en_ro.ref < en_ro.hyp > res.log wait ASCEND_DEVICE_ID=0 - +cp ${cur_path}/nohup.out ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log #结果打印,不需要修改 echo "------------------ Final result ------------------" #输出性能FPS,需要模型审视修改