From 3daf00ef170a5a5416d289e8fbd68807aef04985 Mon Sep 17 00:00:00 2001 From: YakeeXv <9837540+yakeexv@user.noreply.gitee.com> Date: Fri, 24 Mar 2023 03:23:50 +0000 Subject: [PATCH 1/5] update TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/README.md. Signed-off-by: YakeeXv <9837540+yakeexv@user.noreply.gitee.com> --- .../README.md | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/README.md b/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/README.md index 22d513d1d..1e4d6067f 100644 --- a/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/README.md +++ b/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/README.md @@ -152,24 +152,19 @@ InceptionV4是2016年提出的Inception系列网络的第四个版本,随着Re 2. 单卡训练 - 2.1 配置run_1p.sh脚本中`data_dir`(脚本路径InceptionV4_for_TensorFlow/script/run_1p.sh),请用户根据实际路径配置,数据集参数如下所示: - - --data_dir=/opt/npu/imagenet_data - - 2.2 单p指令如下: - - bash run_1p.sh + 2.1 单p指令如下: + cd test; + bash train_performance_1p.sh --data_path=/path/to/data 3. 8卡训练 - 3.1 配置run_8p.sh脚本中`data_dir`(脚本路径InceptionV4_for_TensorFlow/script/run_8p.sh),请用户根据实际路径配置,数据集参数如下所示: - - --data_dir=/opt/npu/imagenet_data - - 3.2 8p指令如下: - + 3.1 8p指令如下: + cd test; + bash train_performance_8p.sh --data_path=/path/to/data - bash run_8p.sh + 3.1 8p 绑核指令如下: + cd test; + train_performance_8p.sh --bind_core=1 --data_path=/path/to/data - 验证。 -- Gitee From 635a5e0d2a3dc956380c880a9dffd8244fc8760b Mon Sep 17 00:00:00 2001 From: YakeeXv <9837540+yakeexv@user.noreply.gitee.com> Date: Fri, 24 Mar 2023 03:28:45 +0000 Subject: [PATCH 2/5] update InceptionV2_ID0670_for_TensorFlow/test/train_performance_8p.sh. Signed-off-by: YakeeXv <9837540+yakeexv@user.noreply.gitee.com> --- .../test/train_performance_8p.sh | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_performance_8p.sh b/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_performance_8p.sh index d6169dc2e..cb1fa8e6b 100644 --- a/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_performance_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_performance_8p.sh @@ -87,6 +87,9 @@ do cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/ elif [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --bind_core* ]]; then + bind_core=`echo ${para#*=}` + name_bind="_bindcore" fi done @@ -124,12 +127,20 @@ do mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt fi + # 绑核,不需要的绑核的模型删除,需要模型审视修改 + corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` + let a=RANK_ID*${corenum}/${RANK_SIZE} + let b=RANK_ID+1 + let c=b*${corenum}/${RANK_SIZE}-1 #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path - nohup python3.7 $cur_path/../train.py --rank_size=8 \ - --mode=train_and_evaluate \ - --max_epochs=100 \ + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi + nohup ${bind_core} python3.7 $cur_path/../train.py --rank_size=8 \ + --mode=train \ + --max_epochs=1 \ --T_max=100 \ --iterations_per_loop=10 \ --batch_size=64 \ -- Gitee From d3470d7fd397b905e267161cf035994d244d5f8c Mon Sep 17 00:00:00 2001 From: YakeeXv <9837540+yakeexv@user.noreply.gitee.com> Date: Fri, 24 Mar 2023 03:31:11 +0000 Subject: [PATCH 3/5] update TensorFlow/built-in/cv/image_classification/InceptionV3_ID0504_for_TensorFlow/README.md. Signed-off-by: YakeeXv <9837540+yakeexv@user.noreply.gitee.com> --- .../README.md | 24 +++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/TensorFlow/built-in/cv/image_classification/InceptionV3_ID0504_for_TensorFlow/README.md b/TensorFlow/built-in/cv/image_classification/InceptionV3_ID0504_for_TensorFlow/README.md index f74011f5e..cdacdb8a2 100644 --- a/TensorFlow/built-in/cv/image_classification/InceptionV3_ID0504_for_TensorFlow/README.md +++ b/TensorFlow/built-in/cv/image_classification/InceptionV3_ID0504_for_TensorFlow/README.md @@ -152,25 +152,19 @@ InceptionV4是2016年提出的Inception系列网络的第四个版本,随着Re 2. 单卡训练 - 2.1 配置run_1p.sh脚本中`data_dir`(脚本路径InceptionV3_for_TensorFlow/script/run_1p.sh),请用户根据实际路径配置,数据集参数如下所示: - - --data_dir=/opt/npu/imagenet_data - - 2.2 单p指令如下: - - bash run_1p.sh + 2.1 单p指令如下: + cd test; + bash train_performance_1p.sh --data_path=/path/to/data 3. 8卡训练 - 3.1 配置train_performance_8p.sh脚本中`data_dir`(脚本路径InceptionV3_for_TensorFlow/test下),请用户根据实际路径配置,数据集参数如下所示: - - --data_dir=/opt/npu/imagenet_data - - 3.2 8p指令如下: + 3.1 8p指令如下: + cd test; + bash train_performance_8p.sh --data_path=/path/to/data - ``` - train_performance_8p.sh --bind_core=1 - ``` + 3.1 8p 绑核指令如下: + cd test; + train_performance_8p.sh --bind_core=1 --data_path=/path/to/data - 验证。 -- Gitee From f589e53ef8beb1d9d9b7d5fd4ee27336b280e97e Mon Sep 17 00:00:00 2001 From: YakeeXv <9837540+yakeexv@user.noreply.gitee.com> Date: Fri, 24 Mar 2023 03:32:05 +0000 Subject: [PATCH 4/5] update TensorFlow/built-in/cv/image_classification/InceptionV4_ID0002_for_TensorFlow/README.md. Signed-off-by: YakeeXv <9837540+yakeexv@user.noreply.gitee.com> --- .../README.md | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/TensorFlow/built-in/cv/image_classification/InceptionV4_ID0002_for_TensorFlow/README.md b/TensorFlow/built-in/cv/image_classification/InceptionV4_ID0002_for_TensorFlow/README.md index 8c196f945..db7fd7472 100644 --- a/TensorFlow/built-in/cv/image_classification/InceptionV4_ID0002_for_TensorFlow/README.md +++ b/TensorFlow/built-in/cv/image_classification/InceptionV4_ID0002_for_TensorFlow/README.md @@ -151,24 +151,19 @@ InceptionV4是2016年提出的Inception系列网络的第四个版本,随着Re 2. 单卡训练 - 2.1 配置train_performance_1p.sh脚本中`data_dir`(脚本路径InceptionV4_for_TensorFlow/test下),请用户根据实际路径配置,数据集参数如下所示: - - --data_dir=/path/to/data - - 2.2 单p指令如下: - - bash train_performance_1p.sh + 2.1 单p指令如下: + cd test; + bash train_performance_1p.sh --data_path=/path/to/data 3. 8卡训练 - 3.1 配置train_performance_8p.sh脚本中`data_dir`(脚本路径InceptionV4_for_TensorFlow/test下),请用户根据实际路径配置,数据集参数如下所示: - - --data_dir=/path/to/data - - 3.2 8p指令如下: - + 3.1 8p指令如下: + cd test; + bash train_performance_8p.sh --data_path=/path/to/data - train_performance_8p.sh --bind_core=1 + 3.1 8p 绑核指令如下: + cd test; + train_performance_8p.sh --bind_core=1 --data_path=/path/to/data - 验证。 -- Gitee From 7ba399d06a720cbbfaa3740dd632df4ca84cd80a Mon Sep 17 00:00:00 2001 From: YakeeXv <9837540+yakeexv@user.noreply.gitee.com> Date: Fri, 24 Mar 2023 03:36:04 +0000 Subject: [PATCH 5/5] update TensorFlow/built-in/cv/image_classification/DenseNet121_ID0067_for_TensorFlow/README.md. Signed-off-by: YakeeXv <9837540+yakeexv@user.noreply.gitee.com> --- .../README.md | 40 +++++-------------- 1 file changed, 10 insertions(+), 30 deletions(-) diff --git a/TensorFlow/built-in/cv/image_classification/DenseNet121_ID0067_for_TensorFlow/README.md b/TensorFlow/built-in/cv/image_classification/DenseNet121_ID0067_for_TensorFlow/README.md index 0825f958d..9b14a5bab 100644 --- a/TensorFlow/built-in/cv/image_classification/DenseNet121_ID0067_for_TensorFlow/README.md +++ b/TensorFlow/built-in/cv/image_classification/DenseNet121_ID0067_for_TensorFlow/README.md @@ -165,39 +165,19 @@ DenseNet-121是一个经典的图像分类网络,主要特点是采用各层 - 单卡训练 - 1. 配置训练参数。 - - 首先在脚本scripts/train_1p.sh中,配置训练数据集路径,请用户根据实际路径配置,数据集参数如下所示: - - ``` - --data_dir=/opt/npu/slimImagenet - ``` - - 2. 启动训练。 - - 启动单卡训练 (脚本为DenseNet121_for_TensorFlow/scripts/run_1p.sh) - - ``` - bash run_1p.sh - ``` + 1、 单p指令如下: + cd test; + bash train_performance_1p.sh --data_path=/path/to/data - 8卡训练 - 1. 配置训练参数。 - - 首先在脚本scripts/train_8p.sh中,配置训练数据集路径,请用户根据实际路径配置,数据集参数如下所示: - - ``` - --data_dir=/opt/npu/slimImagenet - ``` - - 2. 启动训练。 - - 启动单卡训练 (脚本为DenseNet121_for_TensorFlow/scripts/run_8p.sh) - - ``` - bash run_8p.sh - ``` + 1、 8p指令如下: + cd test; + bash train_performance_8p.sh --data_path=/path/to/data + + 2、 8p 绑核指令如下: + cd test; + train_performance_8p.sh --bind_core=1 --data_path=/path/to/data - 验证。 -- Gitee