From c6dbac34772d38070e3a0d62323af37ca88a54bd Mon Sep 17 00:00:00 2001 From: shenwei41 Date: Thu, 12 Dec 2024 20:21:45 +0800 Subject: [PATCH] change cache_admin to dataset-cache --- .../resnet/scripts/run_distribute_train.sh | 4 +-- .../scripts/run_distribute_train_2node_16p.sh | 4 +-- .../resnet/scripts/run_distribute_train_4p.sh | 4 +-- .../run_distribute_train_multi_server.sh | 4 +-- benchmark/ascend/resnet/train.py | 7 +++-- .../mobilenetv2/scripts/cache_util.sh | 12 +++---- .../scripts/run_train_nfs_cache.sh | 6 ++-- official/cv/MobileNet/mobilenetv2/train.py | 2 +- .../ResNet/modelarts/ResNet152/train_start.py | 31 +++++++++---------- .../modelarts/ResNet18/modelarts_train.py | 20 +++++------- official/cv/ResNet/scripts/cache_util.sh | 12 +++---- .../cv/ResNet/scripts/run_distribute_train.sh | 4 +-- .../scripts/run_distribute_train_gpu.sh | 4 +-- .../scripts/run_distribute_train_msrun.sh | 4 +-- .../cv/ResNet/scripts/run_standalone_train.sh | 4 +-- .../scripts/run_standalone_train_gpu.sh | 4 +-- official/cv/ResNet/train.py | 7 +++-- research/cv/ISyNet/scripts/cache_util.sh | 12 +++---- .../cv/ISyNet/scripts/run_distribute_train.sh | 2 +- .../scripts/run_distribute_train_gpu.sh | 2 +- .../cv/ISyNet/scripts/run_standalone_train.sh | 2 +- .../scripts/run_standalone_train_gpu.sh | 2 +- .../scripts/run_distribute_train.sh | 2 +- research/cv/res2net/scripts/cache_util.sh | 12 +++---- .../res2net/scripts/run_distribute_train.sh | 2 +- .../res2net/scripts/run_standalone_train.sh | 2 +- research/cv/res2net/train.py | 2 +- research/cv/wideresnet/scripts/cache_util.sh | 12 +++---- .../scripts/run_distribute_train.sh | 2 +- .../scripts/run_distribute_train_gpu.sh | 2 +- .../scripts/run_standalone_train.sh | 2 +- .../scripts/run_standalone_train_gpu.sh | 2 +- 32 files changed, 94 insertions(+), 99 deletions(-) diff --git a/benchmark/ascend/resnet/scripts/run_distribute_train.sh b/benchmark/ascend/resnet/scripts/run_distribute_train.sh index 6dc972281..652837950 100644 --- a/benchmark/ascend/resnet/scripts/run_distribute_train.sh +++ b/benchmark/ascend/resnet/scripts/run_distribute_train.sh @@ -141,7 +141,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi @@ -152,7 +152,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/benchmark/ascend/resnet/scripts/run_distribute_train_2node_16p.sh b/benchmark/ascend/resnet/scripts/run_distribute_train_2node_16p.sh index 6df3a9d69..9956487d2 100644 --- a/benchmark/ascend/resnet/scripts/run_distribute_train_2node_16p.sh +++ b/benchmark/ascend/resnet/scripts/run_distribute_train_2node_16p.sh @@ -116,7 +116,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi @@ -127,7 +127,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/benchmark/ascend/resnet/scripts/run_distribute_train_4p.sh b/benchmark/ascend/resnet/scripts/run_distribute_train_4p.sh index 6c6d1680b..e0696d183 100644 --- a/benchmark/ascend/resnet/scripts/run_distribute_train_4p.sh +++ b/benchmark/ascend/resnet/scripts/run_distribute_train_4p.sh @@ -136,7 +136,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi @@ -147,7 +147,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/benchmark/ascend/resnet/scripts/run_distribute_train_multi_server.sh b/benchmark/ascend/resnet/scripts/run_distribute_train_multi_server.sh index c8f66898c..fe5e570c6 100644 --- a/benchmark/ascend/resnet/scripts/run_distribute_train_multi_server.sh +++ b/benchmark/ascend/resnet/scripts/run_distribute_train_multi_server.sh @@ -115,7 +115,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi @@ -126,7 +126,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/benchmark/ascend/resnet/train.py b/benchmark/ascend/resnet/train.py index 6791a1bec..87b86270b 100644 --- a/benchmark/ascend/resnet/train.py +++ b/benchmark/ascend/resnet/train.py @@ -123,8 +123,8 @@ def init_loss_scale(): def set_ascend_max_device_memory(): if ms.get_context("enable_ge") and ms.get_context("mode") == ms.GRAPH_MODE and \ - hasattr(config, "max_device_memory"): - logger.warning("When encountering a memory shortage situation in 1980B, reduce the max_device_memory.") + hasattr(config, "max_device_memory"): + logger.warning("When encountering a memory shortage situation, reduce the max_device_memory.") ms.set_context(max_device_memory=config.max_device_memory) @@ -230,7 +230,8 @@ def train_net(): model.train(new_repeat_count, dataset, callbacks=cb, sink_size=sink_size, dataset_sink_mode=dataset_sink_mode) - config.logger.info("If run eval and enable_cache Remember to shut down the cache server via \"cache_admin --stop\"") + config.logger.info("If run eval and enable_cache Remember to" + "shut down the cache server via \"dataset-cache --stop\"") if __name__ == '__main__': diff --git a/official/cv/MobileNet/mobilenetv2/scripts/cache_util.sh b/official/cv/MobileNet/mobilenetv2/scripts/cache_util.sh index a3aa77e54..919b37f32 100644 --- a/official/cv/MobileNet/mobilenetv2/scripts/cache_util.sh +++ b/official/cv/MobileNet/mobilenetv2/scripts/cache_util.sh @@ -16,22 +16,22 @@ bootup_cache_server() { echo "Booting up cache server..." - result=$(cache_admin --start 2>&1) + result=$(dataset-cache --start 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Cache server is already up and running" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } generate_cache_session() { - result=$(cache_admin -g | awk 'END {print $NF}') + result=$(dataset-cache -g | awk 'END {print $NF}') rc=$? echo "${result}" if [ "${rc}" -ne 0 ]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } @@ -39,11 +39,11 @@ generate_cache_session() shutdown_cache_server() { echo "Shutting down cache server..." - result=$(cache_admin --stop 2>&1) + result=$(dataset-cache --stop 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Server on port 50052 is not reachable or has been shutdown already" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } diff --git a/official/cv/MobileNet/mobilenetv2/scripts/run_train_nfs_cache.sh b/official/cv/MobileNet/mobilenetv2/scripts/run_train_nfs_cache.sh index e7d71ac69..7304cc5f9 100644 --- a/official/cv/MobileNet/mobilenetv2/scripts/run_train_nfs_cache.sh +++ b/official/cv/MobileNet/mobilenetv2/scripts/run_train_nfs_cache.sh @@ -97,7 +97,7 @@ run_ascend() &> log$i.log & cd .. done - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" } run_gpu() @@ -157,7 +157,7 @@ run_gpu() --enable_cache=True \ --cache_session_id=$CACHE_SESSION_ID \ &> ../train.log & # dataset train folder - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" } run_cpu() @@ -209,7 +209,7 @@ run_cpu() --enable_cache=True \ --cache_session_id=$CACHE_SESSION_ID \ &> ../train.log & # dataset train folder - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" } if [ $1 = "Ascend" ] ; then diff --git a/official/cv/MobileNet/mobilenetv2/train.py b/official/cv/MobileNet/mobilenetv2/train.py index 3ae33dec7..35934b23f 100644 --- a/official/cv/MobileNet/mobilenetv2/train.py +++ b/official/cv/MobileNet/mobilenetv2/train.py @@ -150,7 +150,7 @@ def train_mobilenetv2(): print("total cost {:5.4f} s".format(time.time() - start)) if config.enable_cache: - print("Remember to shut down the cache server via \"cache_admin --stop\"") + print("Remember to shut down the cache server via \"dataset-cache --stop\"") if __name__ == '__main__': diff --git a/official/cv/ResNet/modelarts/ResNet152/train_start.py b/official/cv/ResNet/modelarts/ResNet152/train_start.py index a1eed503d..84b04eb54 100644 --- a/official/cv/ResNet/modelarts/ResNet152/train_start.py +++ b/official/cv/ResNet/modelarts/ResNet152/train_start.py @@ -135,7 +135,7 @@ def set_parameter(): if target == "Ascend": rank_save_graphs_path = os.path.join(config.save_graphs_path, "soma", str(os.getenv('DEVICE_ID', '0'))) mindspore.set_context(mode=0, device_target=target, save_graphs=config.save_graphs, - save_graphs_path=rank_save_graphs_path) + save_graphs_path=rank_save_graphs_path) else: mindspore.set_context(mode=0, device_target=target, save_graphs=config.save_graphs) set_graph_kernel_context(target, config.net_name) @@ -148,8 +148,8 @@ def set_parameter(): if target == "Ascend": device_id = int(os.getenv('DEVICE_ID', '0')) mindspore.set_context(device_id=device_id) - mindspore.set_auto_parallel_context(device_num=config.device_num, parallel_mode=mindspore.ParallelMode.DATA_PARALLEL, - gradients_mean=True) + mindspore.set_auto_parallel_context(device_num=config.device_num, + parallel_mode=mindspore.ParallelMode.DATA_PARALLEL, gradients_mean=True) set_algo_parameters(elementwise_op_strategy_follow=True) if config.net_name == "resnet50" or config.net_name == "se-resnet50": if config.boost_mode not in ["O1", "O2"]: @@ -161,8 +161,7 @@ def set_parameter(): else: init() mindspore.set_auto_parallel_context(device_num=get_device_num(), - parallel_mode=mindspore.ParallelMode.DATA_PARALLEL, - gradients_mean=True) + parallel_mode=mindspore.ParallelMode.DATA_PARALLEL, gradients_mean=True) if config.net_name == "resnet50": mindspore.set_auto_parallel_context(all_reduce_fusion_config=config.all_reduce_fusion_config) @@ -214,9 +213,9 @@ def init_weight(net, param_dict): for _, cell in net.cells_and_names(): if isinstance(cell, nn.Conv2d): if config.conv_init == "XavierUniform": - cell.weight.set_data(mindspore.common.initializer.initializer(mindspore.common.initializer.XavierUniform(), - cell.weight.shape, - cell.weight.dtype)) + cell.weight.set_data( + mindspore.common.initializer.initializer(mindspore.common.initializer.XavierUniform(), + cell.weight.shape, cell.weight.dtype)) elif config.conv_init == "TruncatedNormal": weight = conv_variance_scaling_initializer(cell.in_channels, cell.out_channels, @@ -224,9 +223,9 @@ def init_weight(net, param_dict): cell.weight.set_data(weight) if isinstance(cell, nn.Dense): if config.dense_init == "TruncatedNormal": - cell.weight.set_data(mindspore.common.initializer.initializer(mindspore.common.initializer.TruncatedNormal(), - cell.weight.shape, - cell.weight.dtype)) + cell.weight.set_data( + mindspore.common.initializer.initializer(mindspore.common.initializer.TruncatedNormal(), + cell.weight.shape, cell.weight.dtype)) elif config.dense_init == "RandomNormal": in_channel = cell.in_channels out_channel = cell.out_channels @@ -367,13 +366,12 @@ def train_net(): metrics = {'acc': DistAccuracy(batch_size=config.batch_size, device_num=config.device_num)} if (config.net_name not in ("resnet18", "resnet34", "resnet50", "resnet101", "resnet152", "se-resnet50")) or \ config.parameter_server or target == "CPU": - ## fp32 training + # fp32 training model = mindspore.Model(net, loss_fn=loss, optimizer=opt, metrics=metrics, eval_network=dist_eval_network) else: model = mindspore.Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics=metrics, - amp_level="O2", # boost_level=config.boost_mode, - keep_batchnorm_fp32=False, - eval_network=dist_eval_network) + amp_level="O2", # boost_level=config.boost_mode, + keep_batchnorm_fp32=False, eval_network=dist_eval_network) if config.optimizer == "Thor" and config.dataset == "imagenet2012": from src.lr_generator import get_thor_damping @@ -409,9 +407,10 @@ def train_net(): sink_size=dataset.get_dataset_size(), dataset_sink_mode=dataset_sink_mode) if config.run_eval and config.enable_cache: - print("Remember to shut down the cache server via \"cache_admin --stop\"") + print("Remember to shut down the cache server via \"dataset-cache --stop\"") _export_air(ckpt_save_dir) + if __name__ == '__main__': train_net() diff --git a/official/cv/ResNet/modelarts/ResNet18/modelarts_train.py b/official/cv/ResNet/modelarts/ResNet18/modelarts_train.py index 424875e79..93e4acbf2 100644 --- a/official/cv/ResNet/modelarts/ResNet18/modelarts_train.py +++ b/official/cv/ResNet/modelarts/ResNet18/modelarts_train.py @@ -122,8 +122,7 @@ def apply_eval(eval_param): def set_graph_kernel_context(run_platform, net_name): if run_platform == "GPU" and net_name == "resnet101": - mindspore.set_context(enable_graph_kernel=True, - graph_kernel_flags="--enable_parallel_fusion") + mindspore.set_context(enable_graph_kernel=True, graph_kernel_flags="--enable_parallel_fusion") def _get_last_ckpt(ckpt_dir): @@ -203,13 +202,11 @@ def init_weight(net): if isinstance(cell, nn.Conv2d): cell.weight.set_data( mindspore.common.initializer.initializer(mindspore.common.initializer.XavierUniform(), - cell.weight.shape, - cell.weight.dtype)) + cell.weight.shape, cell.weight.dtype)) if isinstance(cell, nn.Dense): cell.weight.set_data( mindspore.common.initializer.initializer(mindspore.common.initializer.TruncatedNormal(), - cell.weight.shape, - cell.weight.dtype)) + cell.weight.shape, cell.weight.dtype)) def init_lr(step_size): @@ -272,13 +269,10 @@ def define_model(net, opt, target): "se-resnet50")) or args_opt.parameter_server \ or target == "CPU": # fp32 training - model = mindspore.Model(net, loss_fn=loss, optimizer=opt, metrics=metrics, - eval_network=dist_eval_network) + model = mindspore.Model(net, loss_fn=loss, optimizer=opt, metrics=metrics, eval_network=dist_eval_network) else: - model = mindspore.Model(net, loss_fn=loss, optimizer=opt, - loss_scale_manager=loss_scale, metrics=metrics, - amp_level="O2", keep_batchnorm_fp32=False, - eval_network=dist_eval_network) + model = mindspore.Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics=metrics, + amp_level="O2", keep_batchnorm_fp32=False, eval_network=dist_eval_network) return model, loss, loss_scale @@ -380,7 +374,7 @@ def main(): dataset_sink_mode=dataset_sink_mode) if args_opt.run_eval and args_opt.enable_cache: - print("Remember to shut down the cache server via \"cache_admin --stop\"") + print("Remember to shut down the cache server via \"dataset-cache --stop\"") if __name__ == '__main__': diff --git a/official/cv/ResNet/scripts/cache_util.sh b/official/cv/ResNet/scripts/cache_util.sh index a3aa77e54..919b37f32 100644 --- a/official/cv/ResNet/scripts/cache_util.sh +++ b/official/cv/ResNet/scripts/cache_util.sh @@ -16,22 +16,22 @@ bootup_cache_server() { echo "Booting up cache server..." - result=$(cache_admin --start 2>&1) + result=$(dataset-cache --start 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Cache server is already up and running" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } generate_cache_session() { - result=$(cache_admin -g | awk 'END {print $NF}') + result=$(dataset-cache -g | awk 'END {print $NF}') rc=$? echo "${result}" if [ "${rc}" -ne 0 ]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } @@ -39,11 +39,11 @@ generate_cache_session() shutdown_cache_server() { echo "Shutting down cache server..." - result=$(cache_admin --stop 2>&1) + result=$(dataset-cache --stop 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Server on port 50052 is not reachable or has been shutdown already" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } diff --git a/official/cv/ResNet/scripts/run_distribute_train.sh b/official/cv/ResNet/scripts/run_distribute_train.sh index 183d32e10..624dabc80 100644 --- a/official/cv/ResNet/scripts/run_distribute_train.sh +++ b/official/cv/ResNet/scripts/run_distribute_train.sh @@ -142,7 +142,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi @@ -153,7 +153,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/official/cv/ResNet/scripts/run_distribute_train_gpu.sh b/official/cv/ResNet/scripts/run_distribute_train_gpu.sh index c37862b5d..238814786 100644 --- a/official/cv/ResNet/scripts/run_distribute_train_gpu.sh +++ b/official/cv/ResNet/scripts/run_distribute_train_gpu.sh @@ -113,7 +113,7 @@ then --enable_cache=True --cache_session_id=$CACHE_SESSION_ID --output_dir '../output' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi @@ -126,6 +126,6 @@ then --enable_cache=True --cache_session_id=$CACHE_SESSION_ID --output_dir '../output' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi \ No newline at end of file diff --git a/official/cv/ResNet/scripts/run_distribute_train_msrun.sh b/official/cv/ResNet/scripts/run_distribute_train_msrun.sh index 70e6bce2d..4dc1744f1 100644 --- a/official/cv/ResNet/scripts/run_distribute_train_msrun.sh +++ b/official/cv/ResNet/scripts/run_distribute_train_msrun.sh @@ -118,7 +118,7 @@ then --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir './outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi @@ -131,6 +131,6 @@ then --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir './outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi diff --git a/official/cv/ResNet/scripts/run_standalone_train.sh b/official/cv/ResNet/scripts/run_standalone_train.sh index c04268fdf..b93ed0e63 100644 --- a/official/cv/ResNet/scripts/run_standalone_train.sh +++ b/official/cv/ResNet/scripts/run_standalone_train.sh @@ -112,7 +112,7 @@ then --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi @@ -123,7 +123,7 @@ then --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/official/cv/ResNet/scripts/run_standalone_train_gpu.sh b/official/cv/ResNet/scripts/run_standalone_train_gpu.sh index 61a8769d8..4d8d78fc6 100644 --- a/official/cv/ResNet/scripts/run_standalone_train_gpu.sh +++ b/official/cv/ResNet/scripts/run_standalone_train_gpu.sh @@ -116,7 +116,7 @@ then --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi @@ -127,7 +127,7 @@ then --config_path=$CONFIG_FILE --output_dir '../outputs' --resume_ckpt=$RESUME_CKPT &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/official/cv/ResNet/train.py b/official/cv/ResNet/train.py index 1e5315ba1..a41ea2771 100644 --- a/official/cv/ResNet/train.py +++ b/official/cv/ResNet/train.py @@ -137,8 +137,8 @@ def init_loss_scale(): def set_ascend_max_device_memory(): if mindspore.get_context("enable_ge") and mindspore.get_context("mode") == 0 and \ - hasattr(config, "max_device_memory"): - logger.warning("When encountering a memory shortage situation in 1980B, reduce the max_device_memory.") + hasattr(config, "max_device_memory"): + logger.warning("When encountering a memory shortage situation, reduce the max_device_memory.") mindspore.set_context(max_device_memory=config.max_device_memory) @@ -234,7 +234,8 @@ def train_net(): model.train(config.epoch_size - config.start_epoch, dataset, callbacks=cb, sink_size=dataset.get_dataset_size(), dataset_sink_mode=dataset_sink_mode) - config.logger.info("If run eval and enable_cache Remember to shut down the cache server via \"cache_admin --stop\"") + config.logger.info("If run eval and enable_cache Remember to" + "shut down the cache server via \"dataset-cache --stop\"") if __name__ == '__main__': diff --git a/research/cv/ISyNet/scripts/cache_util.sh b/research/cv/ISyNet/scripts/cache_util.sh index c447b131b..83c4d4814 100644 --- a/research/cv/ISyNet/scripts/cache_util.sh +++ b/research/cv/ISyNet/scripts/cache_util.sh @@ -16,22 +16,22 @@ bootup_cache_server() { echo "Booting up cache server..." - result=$(cache_admin --start 2>&1) + result=$(dataset-cache --start 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Cache server is already up and running" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } generate_cache_session() { - result=$(cache_admin -g | awk 'END {print $NF}') + result=$(dataset-cache -g | awk 'END {print $NF}') rc=$? echo "${result}" if [ "${rc}" -ne 0 ]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } @@ -39,11 +39,11 @@ generate_cache_session() shutdown_cache_server() { echo "Shutting down cache server..." - result=$(cache_admin --stop 2>&1) + result=$(dataset-cache --stop 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Server on port 50052 is not reachable or has been shutdown already" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } diff --git a/research/cv/ISyNet/scripts/run_distribute_train.sh b/research/cv/ISyNet/scripts/run_distribute_train.sh index fcf5bd1a4..d1be0d85d 100644 --- a/research/cv/ISyNet/scripts/run_distribute_train.sh +++ b/research/cv/ISyNet/scripts/run_distribute_train.sh @@ -126,7 +126,7 @@ do --config_path=$CONFIG_FILE --output_path './output' &> log & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/research/cv/ISyNet/scripts/run_distribute_train_gpu.sh b/research/cv/ISyNet/scripts/run_distribute_train_gpu.sh index 565810965..b55738f9d 100644 --- a/research/cv/ISyNet/scripts/run_distribute_train_gpu.sh +++ b/research/cv/ISyNet/scripts/run_distribute_train_gpu.sh @@ -107,6 +107,6 @@ then --enable_cache=True --cache_session_id=$CACHE_SESSION_ID --output_path './output' &> log & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi diff --git a/research/cv/ISyNet/scripts/run_standalone_train.sh b/research/cv/ISyNet/scripts/run_standalone_train.sh index dbe205820..fdfb86831 100644 --- a/research/cv/ISyNet/scripts/run_standalone_train.sh +++ b/research/cv/ISyNet/scripts/run_standalone_train.sh @@ -105,7 +105,7 @@ then --config_path=$CONFIG_FILE --output_path './output' &> log & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/research/cv/ISyNet/scripts/run_standalone_train_gpu.sh b/research/cv/ISyNet/scripts/run_standalone_train_gpu.sh index 1a84c258d..d3fc5493b 100644 --- a/research/cv/ISyNet/scripts/run_standalone_train_gpu.sh +++ b/research/cv/ISyNet/scripts/run_standalone_train_gpu.sh @@ -110,7 +110,7 @@ then --config_path=$CONFIG_FILE --output_path './output' &> log & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/research/cv/ResidualAttentionNet/scripts/run_distribute_train.sh b/research/cv/ResidualAttentionNet/scripts/run_distribute_train.sh index 55db6ad64..6d145c437 100644 --- a/research/cv/ResidualAttentionNet/scripts/run_distribute_train.sh +++ b/research/cv/ResidualAttentionNet/scripts/run_distribute_train.sh @@ -122,7 +122,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_path './output' &> log & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/research/cv/res2net/scripts/cache_util.sh b/research/cv/res2net/scripts/cache_util.sh index a3aa77e54..919b37f32 100644 --- a/research/cv/res2net/scripts/cache_util.sh +++ b/research/cv/res2net/scripts/cache_util.sh @@ -16,22 +16,22 @@ bootup_cache_server() { echo "Booting up cache server..." - result=$(cache_admin --start 2>&1) + result=$(dataset-cache --start 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Cache server is already up and running" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } generate_cache_session() { - result=$(cache_admin -g | awk 'END {print $NF}') + result=$(dataset-cache -g | awk 'END {print $NF}') rc=$? echo "${result}" if [ "${rc}" -ne 0 ]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } @@ -39,11 +39,11 @@ generate_cache_session() shutdown_cache_server() { echo "Shutting down cache server..." - result=$(cache_admin --stop 2>&1) + result=$(dataset-cache --stop 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Server on port 50052 is not reachable or has been shutdown already" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } diff --git a/research/cv/res2net/scripts/run_distribute_train.sh b/research/cv/res2net/scripts/run_distribute_train.sh index 979cb51b8..90050d254 100644 --- a/research/cv/res2net/scripts/run_distribute_train.sh +++ b/research/cv/res2net/scripts/run_distribute_train.sh @@ -125,7 +125,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_path './output' &> log & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/research/cv/res2net/scripts/run_standalone_train.sh b/research/cv/res2net/scripts/run_standalone_train.sh index a0381dbea..b77949abd 100644 --- a/research/cv/res2net/scripts/run_standalone_train.sh +++ b/research/cv/res2net/scripts/run_standalone_train.sh @@ -104,7 +104,7 @@ then --config_path=$CONFIG_FILE --output_path './output' &> log & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/research/cv/res2net/train.py b/research/cv/res2net/train.py index 501eb8a04..c201f410f 100644 --- a/research/cv/res2net/train.py +++ b/research/cv/res2net/train.py @@ -407,7 +407,7 @@ def train_net(): sink_size=dataset.get_dataset_size(), dataset_sink_mode=dataset_sink_mode) if config.run_eval and config.enable_cache: - print("Remember to shut down the cache server via \"cache_admin --stop\"") + print("Remember to shut down the cache server via \"dataset-cache --stop\"") if __name__ == '__main__': diff --git a/research/cv/wideresnet/scripts/cache_util.sh b/research/cv/wideresnet/scripts/cache_util.sh index b1a651575..91f9673c1 100755 --- a/research/cv/wideresnet/scripts/cache_util.sh +++ b/research/cv/wideresnet/scripts/cache_util.sh @@ -16,22 +16,22 @@ bootup_cache_server() { echo "Booting up cache server..." - result=$(cache_admin --start 2>&1) + result=$(dataset-cache --start 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Cache server is already up and running" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } generate_cache_session() { - result=$(cache_admin -g | awk 'END {print $NF}') + result=$(dataset-cache -g | awk 'END {print $NF}') rc=$? echo "${result}" if [ "${rc}" -ne 0 ]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } @@ -39,11 +39,11 @@ generate_cache_session() shutdown_cache_server() { echo "Shutting down cache server..." - result=$(cache_admin --stop 2>&1) + result=$(dataset-cache --stop 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Server on port 50052 is not reachable or has been shutdown already" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } \ No newline at end of file diff --git a/research/cv/wideresnet/scripts/run_distribute_train.sh b/research/cv/wideresnet/scripts/run_distribute_train.sh index 72922c90c..b2b6a3602 100755 --- a/research/cv/wideresnet/scripts/run_distribute_train.sh +++ b/research/cv/wideresnet/scripts/run_distribute_train.sh @@ -126,7 +126,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --experiment_label=$LABEL &> log & if [ "${RUN_EVAL}" == "True" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/research/cv/wideresnet/scripts/run_distribute_train_gpu.sh b/research/cv/wideresnet/scripts/run_distribute_train_gpu.sh index d15f5de90..f15123b8f 100755 --- a/research/cv/wideresnet/scripts/run_distribute_train_gpu.sh +++ b/research/cv/wideresnet/scripts/run_distribute_train_gpu.sh @@ -107,7 +107,7 @@ then --enable_cache=True --experiment_label="$EXPERIMENT_LABEL" &> log & # if [ "${RUN_EVAL}" == "True" ] # then -# echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" +# echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" # fi fi # --cache_session_id="$CACHE_SESSION_ID" diff --git a/research/cv/wideresnet/scripts/run_standalone_train.sh b/research/cv/wideresnet/scripts/run_standalone_train.sh index 60591b163..11073703f 100755 --- a/research/cv/wideresnet/scripts/run_standalone_train.sh +++ b/research/cv/wideresnet/scripts/run_standalone_train.sh @@ -105,7 +105,7 @@ then --config_path=$CONFIG_FILE --experiment_label=$LABEL &> log & if [ "${RUN_EVAL}" == "True" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/research/cv/wideresnet/scripts/run_standalone_train_gpu.sh b/research/cv/wideresnet/scripts/run_standalone_train_gpu.sh index 43eb419d0..cda9a8295 100755 --- a/research/cv/wideresnet/scripts/run_standalone_train_gpu.sh +++ b/research/cv/wideresnet/scripts/run_standalone_train_gpu.sh @@ -111,7 +111,7 @@ then --config_path="$CONFIG_FILE" --experiment_label="$EXPERIMENT_LABEL" &> log & if [ "${RUN_EVAL}" == "True" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. -- Gitee