diff --git a/benchmark/ascend/resnet/scripts/run_distribute_train.sh b/benchmark/ascend/resnet/scripts/run_distribute_train.sh index 6dc9722819dc8b49af989d400cbe5e249850fdb5..6528379504adc6820052fae45a3849a683f922ae 100644 --- a/benchmark/ascend/resnet/scripts/run_distribute_train.sh +++ b/benchmark/ascend/resnet/scripts/run_distribute_train.sh @@ -141,7 +141,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi @@ -152,7 +152,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/benchmark/ascend/resnet/scripts/run_distribute_train_2node_16p.sh b/benchmark/ascend/resnet/scripts/run_distribute_train_2node_16p.sh index 6df3a9d69b16c00015b63c365b5e0d6283fc3664..9956487d2f632efbcacc9ae8b471e35757025b0d 100644 --- a/benchmark/ascend/resnet/scripts/run_distribute_train_2node_16p.sh +++ b/benchmark/ascend/resnet/scripts/run_distribute_train_2node_16p.sh @@ -116,7 +116,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi @@ -127,7 +127,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/benchmark/ascend/resnet/scripts/run_distribute_train_4p.sh b/benchmark/ascend/resnet/scripts/run_distribute_train_4p.sh index 6c6d1680be98d8bcd65cdfcc8f6d6258a7d72c10..e0696d183361b491dca6c6ca2d07155f6fa66dc6 100644 --- a/benchmark/ascend/resnet/scripts/run_distribute_train_4p.sh +++ b/benchmark/ascend/resnet/scripts/run_distribute_train_4p.sh @@ -136,7 +136,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi @@ -147,7 +147,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/benchmark/ascend/resnet/scripts/run_distribute_train_multi_server.sh b/benchmark/ascend/resnet/scripts/run_distribute_train_multi_server.sh index c8f66898c9c6f5ce2c0b145852eb71dc705394fd..fe5e570c6e4ed07d4c7ee090b35273247332c187 100644 --- a/benchmark/ascend/resnet/scripts/run_distribute_train_multi_server.sh +++ b/benchmark/ascend/resnet/scripts/run_distribute_train_multi_server.sh @@ -115,7 +115,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi @@ -126,7 +126,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/benchmark/ascend/resnet/train.py b/benchmark/ascend/resnet/train.py index 6791a1bec473198bd3469e42a9009b2f17c698d4..87b86270b0818976b05fd43be698a62907f1cb1c 100644 --- a/benchmark/ascend/resnet/train.py +++ b/benchmark/ascend/resnet/train.py @@ -123,8 +123,8 @@ def init_loss_scale(): def set_ascend_max_device_memory(): if ms.get_context("enable_ge") and ms.get_context("mode") == ms.GRAPH_MODE and \ - hasattr(config, "max_device_memory"): - logger.warning("When encountering a memory shortage situation in 1980B, reduce the max_device_memory.") + hasattr(config, "max_device_memory"): + logger.warning("When encountering a memory shortage situation, reduce the max_device_memory.") ms.set_context(max_device_memory=config.max_device_memory) @@ -230,7 +230,8 @@ def train_net(): model.train(new_repeat_count, dataset, callbacks=cb, sink_size=sink_size, dataset_sink_mode=dataset_sink_mode) - config.logger.info("If run eval and enable_cache Remember to shut down the cache server via \"cache_admin --stop\"") + config.logger.info("If run eval and enable_cache Remember to" + "shut down the cache server via \"dataset-cache --stop\"") if __name__ == '__main__': diff --git a/official/cv/MobileNet/mobilenetv2/scripts/cache_util.sh b/official/cv/MobileNet/mobilenetv2/scripts/cache_util.sh index a3aa77e54a8309e5f2e6ed63703a69d41cfc18ee..919b37f32115bd41d17089952f9eee557912ac2d 100644 --- a/official/cv/MobileNet/mobilenetv2/scripts/cache_util.sh +++ b/official/cv/MobileNet/mobilenetv2/scripts/cache_util.sh @@ -16,22 +16,22 @@ bootup_cache_server() { echo "Booting up cache server..." - result=$(cache_admin --start 2>&1) + result=$(dataset-cache --start 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Cache server is already up and running" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } generate_cache_session() { - result=$(cache_admin -g | awk 'END {print $NF}') + result=$(dataset-cache -g | awk 'END {print $NF}') rc=$? echo "${result}" if [ "${rc}" -ne 0 ]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } @@ -39,11 +39,11 @@ generate_cache_session() shutdown_cache_server() { echo "Shutting down cache server..." - result=$(cache_admin --stop 2>&1) + result=$(dataset-cache --stop 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Server on port 50052 is not reachable or has been shutdown already" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } diff --git a/official/cv/MobileNet/mobilenetv2/scripts/run_train_nfs_cache.sh b/official/cv/MobileNet/mobilenetv2/scripts/run_train_nfs_cache.sh index e7d71ac69f3daf84fc7a484ac97e215bb0c136fb..7304cc5f9c9655674e945e6d730de24c05e9f22a 100644 --- a/official/cv/MobileNet/mobilenetv2/scripts/run_train_nfs_cache.sh +++ b/official/cv/MobileNet/mobilenetv2/scripts/run_train_nfs_cache.sh @@ -97,7 +97,7 @@ run_ascend() &> log$i.log & cd .. done - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" } run_gpu() @@ -157,7 +157,7 @@ run_gpu() --enable_cache=True \ --cache_session_id=$CACHE_SESSION_ID \ &> ../train.log & # dataset train folder - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" } run_cpu() @@ -209,7 +209,7 @@ run_cpu() --enable_cache=True \ --cache_session_id=$CACHE_SESSION_ID \ &> ../train.log & # dataset train folder - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" } if [ $1 = "Ascend" ] ; then diff --git a/official/cv/MobileNet/mobilenetv2/train.py b/official/cv/MobileNet/mobilenetv2/train.py index 3ae33dec797a1174112f487a1c2a0aeec9f51d3e..35934b23f6baa5577c2c0b62f9af821d7ef8bfaf 100644 --- a/official/cv/MobileNet/mobilenetv2/train.py +++ b/official/cv/MobileNet/mobilenetv2/train.py @@ -150,7 +150,7 @@ def train_mobilenetv2(): print("total cost {:5.4f} s".format(time.time() - start)) if config.enable_cache: - print("Remember to shut down the cache server via \"cache_admin --stop\"") + print("Remember to shut down the cache server via \"dataset-cache --stop\"") if __name__ == '__main__': diff --git a/official/cv/ResNet/modelarts/ResNet152/train_start.py b/official/cv/ResNet/modelarts/ResNet152/train_start.py index a1eed503d9ea5747a49d9483580e105b10feeabd..84b04eb54a2a6886d66a81d5db803b88687ec734 100644 --- a/official/cv/ResNet/modelarts/ResNet152/train_start.py +++ b/official/cv/ResNet/modelarts/ResNet152/train_start.py @@ -135,7 +135,7 @@ def set_parameter(): if target == "Ascend": rank_save_graphs_path = os.path.join(config.save_graphs_path, "soma", str(os.getenv('DEVICE_ID', '0'))) mindspore.set_context(mode=0, device_target=target, save_graphs=config.save_graphs, - save_graphs_path=rank_save_graphs_path) + save_graphs_path=rank_save_graphs_path) else: mindspore.set_context(mode=0, device_target=target, save_graphs=config.save_graphs) set_graph_kernel_context(target, config.net_name) @@ -148,8 +148,8 @@ def set_parameter(): if target == "Ascend": device_id = int(os.getenv('DEVICE_ID', '0')) mindspore.set_context(device_id=device_id) - mindspore.set_auto_parallel_context(device_num=config.device_num, parallel_mode=mindspore.ParallelMode.DATA_PARALLEL, - gradients_mean=True) + mindspore.set_auto_parallel_context(device_num=config.device_num, + parallel_mode=mindspore.ParallelMode.DATA_PARALLEL, gradients_mean=True) set_algo_parameters(elementwise_op_strategy_follow=True) if config.net_name == "resnet50" or config.net_name == "se-resnet50": if config.boost_mode not in ["O1", "O2"]: @@ -161,8 +161,7 @@ def set_parameter(): else: init() mindspore.set_auto_parallel_context(device_num=get_device_num(), - parallel_mode=mindspore.ParallelMode.DATA_PARALLEL, - gradients_mean=True) + parallel_mode=mindspore.ParallelMode.DATA_PARALLEL, gradients_mean=True) if config.net_name == "resnet50": mindspore.set_auto_parallel_context(all_reduce_fusion_config=config.all_reduce_fusion_config) @@ -214,9 +213,9 @@ def init_weight(net, param_dict): for _, cell in net.cells_and_names(): if isinstance(cell, nn.Conv2d): if config.conv_init == "XavierUniform": - cell.weight.set_data(mindspore.common.initializer.initializer(mindspore.common.initializer.XavierUniform(), - cell.weight.shape, - cell.weight.dtype)) + cell.weight.set_data( + mindspore.common.initializer.initializer(mindspore.common.initializer.XavierUniform(), + cell.weight.shape, cell.weight.dtype)) elif config.conv_init == "TruncatedNormal": weight = conv_variance_scaling_initializer(cell.in_channels, cell.out_channels, @@ -224,9 +223,9 @@ def init_weight(net, param_dict): cell.weight.set_data(weight) if isinstance(cell, nn.Dense): if config.dense_init == "TruncatedNormal": - cell.weight.set_data(mindspore.common.initializer.initializer(mindspore.common.initializer.TruncatedNormal(), - cell.weight.shape, - cell.weight.dtype)) + cell.weight.set_data( + mindspore.common.initializer.initializer(mindspore.common.initializer.TruncatedNormal(), + cell.weight.shape, cell.weight.dtype)) elif config.dense_init == "RandomNormal": in_channel = cell.in_channels out_channel = cell.out_channels @@ -367,13 +366,12 @@ def train_net(): metrics = {'acc': DistAccuracy(batch_size=config.batch_size, device_num=config.device_num)} if (config.net_name not in ("resnet18", "resnet34", "resnet50", "resnet101", "resnet152", "se-resnet50")) or \ config.parameter_server or target == "CPU": - ## fp32 training + # fp32 training model = mindspore.Model(net, loss_fn=loss, optimizer=opt, metrics=metrics, eval_network=dist_eval_network) else: model = mindspore.Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics=metrics, - amp_level="O2", # boost_level=config.boost_mode, - keep_batchnorm_fp32=False, - eval_network=dist_eval_network) + amp_level="O2", # boost_level=config.boost_mode, + keep_batchnorm_fp32=False, eval_network=dist_eval_network) if config.optimizer == "Thor" and config.dataset == "imagenet2012": from src.lr_generator import get_thor_damping @@ -409,9 +407,10 @@ def train_net(): sink_size=dataset.get_dataset_size(), dataset_sink_mode=dataset_sink_mode) if config.run_eval and config.enable_cache: - print("Remember to shut down the cache server via \"cache_admin --stop\"") + print("Remember to shut down the cache server via \"dataset-cache --stop\"") _export_air(ckpt_save_dir) + if __name__ == '__main__': train_net() diff --git a/official/cv/ResNet/modelarts/ResNet18/modelarts_train.py b/official/cv/ResNet/modelarts/ResNet18/modelarts_train.py index 424875e791ccb66737c99536e27e800bef290da8..93e4acbf2a684f79e71c34b13271c903425f0c40 100644 --- a/official/cv/ResNet/modelarts/ResNet18/modelarts_train.py +++ b/official/cv/ResNet/modelarts/ResNet18/modelarts_train.py @@ -122,8 +122,7 @@ def apply_eval(eval_param): def set_graph_kernel_context(run_platform, net_name): if run_platform == "GPU" and net_name == "resnet101": - mindspore.set_context(enable_graph_kernel=True, - graph_kernel_flags="--enable_parallel_fusion") + mindspore.set_context(enable_graph_kernel=True, graph_kernel_flags="--enable_parallel_fusion") def _get_last_ckpt(ckpt_dir): @@ -203,13 +202,11 @@ def init_weight(net): if isinstance(cell, nn.Conv2d): cell.weight.set_data( mindspore.common.initializer.initializer(mindspore.common.initializer.XavierUniform(), - cell.weight.shape, - cell.weight.dtype)) + cell.weight.shape, cell.weight.dtype)) if isinstance(cell, nn.Dense): cell.weight.set_data( mindspore.common.initializer.initializer(mindspore.common.initializer.TruncatedNormal(), - cell.weight.shape, - cell.weight.dtype)) + cell.weight.shape, cell.weight.dtype)) def init_lr(step_size): @@ -272,13 +269,10 @@ def define_model(net, opt, target): "se-resnet50")) or args_opt.parameter_server \ or target == "CPU": # fp32 training - model = mindspore.Model(net, loss_fn=loss, optimizer=opt, metrics=metrics, - eval_network=dist_eval_network) + model = mindspore.Model(net, loss_fn=loss, optimizer=opt, metrics=metrics, eval_network=dist_eval_network) else: - model = mindspore.Model(net, loss_fn=loss, optimizer=opt, - loss_scale_manager=loss_scale, metrics=metrics, - amp_level="O2", keep_batchnorm_fp32=False, - eval_network=dist_eval_network) + model = mindspore.Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics=metrics, + amp_level="O2", keep_batchnorm_fp32=False, eval_network=dist_eval_network) return model, loss, loss_scale @@ -380,7 +374,7 @@ def main(): dataset_sink_mode=dataset_sink_mode) if args_opt.run_eval and args_opt.enable_cache: - print("Remember to shut down the cache server via \"cache_admin --stop\"") + print("Remember to shut down the cache server via \"dataset-cache --stop\"") if __name__ == '__main__': diff --git a/official/cv/ResNet/scripts/cache_util.sh b/official/cv/ResNet/scripts/cache_util.sh index a3aa77e54a8309e5f2e6ed63703a69d41cfc18ee..919b37f32115bd41d17089952f9eee557912ac2d 100644 --- a/official/cv/ResNet/scripts/cache_util.sh +++ b/official/cv/ResNet/scripts/cache_util.sh @@ -16,22 +16,22 @@ bootup_cache_server() { echo "Booting up cache server..." - result=$(cache_admin --start 2>&1) + result=$(dataset-cache --start 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Cache server is already up and running" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } generate_cache_session() { - result=$(cache_admin -g | awk 'END {print $NF}') + result=$(dataset-cache -g | awk 'END {print $NF}') rc=$? echo "${result}" if [ "${rc}" -ne 0 ]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } @@ -39,11 +39,11 @@ generate_cache_session() shutdown_cache_server() { echo "Shutting down cache server..." - result=$(cache_admin --stop 2>&1) + result=$(dataset-cache --stop 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Server on port 50052 is not reachable or has been shutdown already" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } diff --git a/official/cv/ResNet/scripts/run_distribute_train.sh b/official/cv/ResNet/scripts/run_distribute_train.sh index 183d32e10de7c4022ea2830e6ffa2dd03b9c8c79..624dabc80aff2449e422ae3a24084cb0351a7b8a 100644 --- a/official/cv/ResNet/scripts/run_distribute_train.sh +++ b/official/cv/ResNet/scripts/run_distribute_train.sh @@ -142,7 +142,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi @@ -153,7 +153,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/official/cv/ResNet/scripts/run_distribute_train_gpu.sh b/official/cv/ResNet/scripts/run_distribute_train_gpu.sh index c37862b5d8017f5ddff959d02d4964d0f2517da2..2388147860e108c3679a462fe19c7cc282c38ea5 100644 --- a/official/cv/ResNet/scripts/run_distribute_train_gpu.sh +++ b/official/cv/ResNet/scripts/run_distribute_train_gpu.sh @@ -113,7 +113,7 @@ then --enable_cache=True --cache_session_id=$CACHE_SESSION_ID --output_dir '../output' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi @@ -126,6 +126,6 @@ then --enable_cache=True --cache_session_id=$CACHE_SESSION_ID --output_dir '../output' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi \ No newline at end of file diff --git a/official/cv/ResNet/scripts/run_distribute_train_msrun.sh b/official/cv/ResNet/scripts/run_distribute_train_msrun.sh index 70e6bce2d4901cbd536e1274ce882e5200148d4c..4dc1744f1f64f0d5550f74e06faae0c29b9cf8a4 100644 --- a/official/cv/ResNet/scripts/run_distribute_train_msrun.sh +++ b/official/cv/ResNet/scripts/run_distribute_train_msrun.sh @@ -118,7 +118,7 @@ then --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir './outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi @@ -131,6 +131,6 @@ then --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_dir './outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi diff --git a/official/cv/ResNet/scripts/run_standalone_train.sh b/official/cv/ResNet/scripts/run_standalone_train.sh index c04268fdfc0c638917ee458bfbe788d08b0a36cf..b93ed0e630b6268014a7492f6489c7d563cadfb4 100644 --- a/official/cv/ResNet/scripts/run_standalone_train.sh +++ b/official/cv/ResNet/scripts/run_standalone_train.sh @@ -112,7 +112,7 @@ then --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi @@ -123,7 +123,7 @@ then --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/official/cv/ResNet/scripts/run_standalone_train_gpu.sh b/official/cv/ResNet/scripts/run_standalone_train_gpu.sh index 61a8769d8b8b2991b429fa42c3321a09c36a69e3..4d8d78fc68e12a3526434d7fedb405a62f4bd4f2 100644 --- a/official/cv/ResNet/scripts/run_standalone_train_gpu.sh +++ b/official/cv/ResNet/scripts/run_standalone_train_gpu.sh @@ -116,7 +116,7 @@ then --config_path=$CONFIG_FILE --output_dir '../outputs' &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi @@ -127,7 +127,7 @@ then --config_path=$CONFIG_FILE --output_dir '../outputs' --resume_ckpt=$RESUME_CKPT &> log.txt & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/official/cv/ResNet/train.py b/official/cv/ResNet/train.py index 1e5315ba1219b63960ba6055e82b86e65f6bff4a..a41ea27717a4746216f8a57d2c03b28b0ac2e797 100644 --- a/official/cv/ResNet/train.py +++ b/official/cv/ResNet/train.py @@ -137,8 +137,8 @@ def init_loss_scale(): def set_ascend_max_device_memory(): if mindspore.get_context("enable_ge") and mindspore.get_context("mode") == 0 and \ - hasattr(config, "max_device_memory"): - logger.warning("When encountering a memory shortage situation in 1980B, reduce the max_device_memory.") + hasattr(config, "max_device_memory"): + logger.warning("When encountering a memory shortage situation, reduce the max_device_memory.") mindspore.set_context(max_device_memory=config.max_device_memory) @@ -234,7 +234,8 @@ def train_net(): model.train(config.epoch_size - config.start_epoch, dataset, callbacks=cb, sink_size=dataset.get_dataset_size(), dataset_sink_mode=dataset_sink_mode) - config.logger.info("If run eval and enable_cache Remember to shut down the cache server via \"cache_admin --stop\"") + config.logger.info("If run eval and enable_cache Remember to" + "shut down the cache server via \"dataset-cache --stop\"") if __name__ == '__main__': diff --git a/research/cv/ISyNet/scripts/cache_util.sh b/research/cv/ISyNet/scripts/cache_util.sh index c447b131b65cf432dc8b59366ab979b85bd40fee..83c4d48141f7eb40d05f69eff0dd74c187f589bb 100644 --- a/research/cv/ISyNet/scripts/cache_util.sh +++ b/research/cv/ISyNet/scripts/cache_util.sh @@ -16,22 +16,22 @@ bootup_cache_server() { echo "Booting up cache server..." - result=$(cache_admin --start 2>&1) + result=$(dataset-cache --start 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Cache server is already up and running" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } generate_cache_session() { - result=$(cache_admin -g | awk 'END {print $NF}') + result=$(dataset-cache -g | awk 'END {print $NF}') rc=$? echo "${result}" if [ "${rc}" -ne 0 ]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } @@ -39,11 +39,11 @@ generate_cache_session() shutdown_cache_server() { echo "Shutting down cache server..." - result=$(cache_admin --stop 2>&1) + result=$(dataset-cache --stop 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Server on port 50052 is not reachable or has been shutdown already" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } diff --git a/research/cv/ISyNet/scripts/run_distribute_train.sh b/research/cv/ISyNet/scripts/run_distribute_train.sh index fcf5bd1a467032d306558db053424e538b5a8220..d1be0d85d7cc9d6acf5cc9bed1579b3048a2d74f 100644 --- a/research/cv/ISyNet/scripts/run_distribute_train.sh +++ b/research/cv/ISyNet/scripts/run_distribute_train.sh @@ -126,7 +126,7 @@ do --config_path=$CONFIG_FILE --output_path './output' &> log & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/research/cv/ISyNet/scripts/run_distribute_train_gpu.sh b/research/cv/ISyNet/scripts/run_distribute_train_gpu.sh index 5658109652083f01bfc01ecaa771b44eddcd2728..b55738f9de2b226c1bd2a1090c023f575f24d30c 100644 --- a/research/cv/ISyNet/scripts/run_distribute_train_gpu.sh +++ b/research/cv/ISyNet/scripts/run_distribute_train_gpu.sh @@ -107,6 +107,6 @@ then --enable_cache=True --cache_session_id=$CACHE_SESSION_ID --output_path './output' &> log & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi diff --git a/research/cv/ISyNet/scripts/run_standalone_train.sh b/research/cv/ISyNet/scripts/run_standalone_train.sh index dbe20582057db8d232d35051f935193518f8e1b1..fdfb86831ef98536d654fc1e42691df93c8e2d41 100644 --- a/research/cv/ISyNet/scripts/run_standalone_train.sh +++ b/research/cv/ISyNet/scripts/run_standalone_train.sh @@ -105,7 +105,7 @@ then --config_path=$CONFIG_FILE --output_path './output' &> log & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/research/cv/ISyNet/scripts/run_standalone_train_gpu.sh b/research/cv/ISyNet/scripts/run_standalone_train_gpu.sh index 1a84c258d59c5c907c638e708a34a16a7dcd110b..d3fc5493b098571cc8315962875739812a8e0dc9 100644 --- a/research/cv/ISyNet/scripts/run_standalone_train_gpu.sh +++ b/research/cv/ISyNet/scripts/run_standalone_train_gpu.sh @@ -110,7 +110,7 @@ then --config_path=$CONFIG_FILE --output_path './output' &> log & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/research/cv/ResidualAttentionNet/scripts/run_distribute_train.sh b/research/cv/ResidualAttentionNet/scripts/run_distribute_train.sh index 55db6ad64fc680b0a1230ae3aef0a35d7052b721..6d145c437d4ac99cb298ccb8144904b7f45d9762 100644 --- a/research/cv/ResidualAttentionNet/scripts/run_distribute_train.sh +++ b/research/cv/ResidualAttentionNet/scripts/run_distribute_train.sh @@ -122,7 +122,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_path './output' &> log & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/research/cv/res2net/scripts/cache_util.sh b/research/cv/res2net/scripts/cache_util.sh index a3aa77e54a8309e5f2e6ed63703a69d41cfc18ee..919b37f32115bd41d17089952f9eee557912ac2d 100644 --- a/research/cv/res2net/scripts/cache_util.sh +++ b/research/cv/res2net/scripts/cache_util.sh @@ -16,22 +16,22 @@ bootup_cache_server() { echo "Booting up cache server..." - result=$(cache_admin --start 2>&1) + result=$(dataset-cache --start 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Cache server is already up and running" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } generate_cache_session() { - result=$(cache_admin -g | awk 'END {print $NF}') + result=$(dataset-cache -g | awk 'END {print $NF}') rc=$? echo "${result}" if [ "${rc}" -ne 0 ]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } @@ -39,11 +39,11 @@ generate_cache_session() shutdown_cache_server() { echo "Shutting down cache server..." - result=$(cache_admin --stop 2>&1) + result=$(dataset-cache --stop 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Server on port 50052 is not reachable or has been shutdown already" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } diff --git a/research/cv/res2net/scripts/run_distribute_train.sh b/research/cv/res2net/scripts/run_distribute_train.sh index 979cb51b878968b45c8e32eaf410000af29e6e6e..90050d254b37b82ad9448a8aa3b7ab36519fc091 100644 --- a/research/cv/res2net/scripts/run_distribute_train.sh +++ b/research/cv/res2net/scripts/run_distribute_train.sh @@ -125,7 +125,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --output_path './output' &> log & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/research/cv/res2net/scripts/run_standalone_train.sh b/research/cv/res2net/scripts/run_standalone_train.sh index a0381dbeafee75f802c1150c9c924092b243ae00..b77949abd032100bf2b17fbbacba86f76aaf3c22 100644 --- a/research/cv/res2net/scripts/run_standalone_train.sh +++ b/research/cv/res2net/scripts/run_standalone_train.sh @@ -104,7 +104,7 @@ then --config_path=$CONFIG_FILE --output_path './output' &> log & if [ "x${RUN_EVAL}" == "xTrue" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/research/cv/res2net/train.py b/research/cv/res2net/train.py index 501eb8a0468229da26fe052b8c307803111dc6fb..c201f410f092ad0002fab36c8a50e4de57ca2244 100644 --- a/research/cv/res2net/train.py +++ b/research/cv/res2net/train.py @@ -407,7 +407,7 @@ def train_net(): sink_size=dataset.get_dataset_size(), dataset_sink_mode=dataset_sink_mode) if config.run_eval and config.enable_cache: - print("Remember to shut down the cache server via \"cache_admin --stop\"") + print("Remember to shut down the cache server via \"dataset-cache --stop\"") if __name__ == '__main__': diff --git a/research/cv/wideresnet/scripts/cache_util.sh b/research/cv/wideresnet/scripts/cache_util.sh index b1a65157500f66cd3ff54042624781c4864e78d4..91f9673c10d03c4b6df2ed7ad10508582319e949 100755 --- a/research/cv/wideresnet/scripts/cache_util.sh +++ b/research/cv/wideresnet/scripts/cache_util.sh @@ -16,22 +16,22 @@ bootup_cache_server() { echo "Booting up cache server..." - result=$(cache_admin --start 2>&1) + result=$(dataset-cache --start 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Cache server is already up and running" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } generate_cache_session() { - result=$(cache_admin -g | awk 'END {print $NF}') + result=$(dataset-cache -g | awk 'END {print $NF}') rc=$? echo "${result}" if [ "${rc}" -ne 0 ]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } @@ -39,11 +39,11 @@ generate_cache_session() shutdown_cache_server() { echo "Shutting down cache server..." - result=$(cache_admin --stop 2>&1) + result=$(dataset-cache --stop 2>&1) rc=$? echo "${result}" if [ "${rc}" -ne 0 ] && [[ ! ${result} =~ "Server on port 50052 is not reachable or has been shutdown already" ]]; then - echo "cache_admin command failure!" "${result}" + echo "dataset-cache command failure!" "${result}" exit 1 fi } \ No newline at end of file diff --git a/research/cv/wideresnet/scripts/run_distribute_train.sh b/research/cv/wideresnet/scripts/run_distribute_train.sh index 72922c90cab891f442a06afd612af475e7a05933..b2b6a36020ef0e53f87df8cf4a64e76d2ddb37e5 100755 --- a/research/cv/wideresnet/scripts/run_distribute_train.sh +++ b/research/cv/wideresnet/scripts/run_distribute_train.sh @@ -126,7 +126,7 @@ do --cache_session_id=$CACHE_SESSION_ID --config_path=$CONFIG_FILE --experiment_label=$LABEL &> log & if [ "${RUN_EVAL}" == "True" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/research/cv/wideresnet/scripts/run_distribute_train_gpu.sh b/research/cv/wideresnet/scripts/run_distribute_train_gpu.sh index d15f5de90e9127e0525dd0412cecec62b1bc4dd7..f15123b8fec1574697d74efc6b19108f8497f638 100755 --- a/research/cv/wideresnet/scripts/run_distribute_train_gpu.sh +++ b/research/cv/wideresnet/scripts/run_distribute_train_gpu.sh @@ -107,7 +107,7 @@ then --enable_cache=True --experiment_label="$EXPERIMENT_LABEL" &> log & # if [ "${RUN_EVAL}" == "True" ] # then -# echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" +# echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" # fi fi # --cache_session_id="$CACHE_SESSION_ID" diff --git a/research/cv/wideresnet/scripts/run_standalone_train.sh b/research/cv/wideresnet/scripts/run_standalone_train.sh index 60591b163bf1c7c03dfe8533b3cc485e67632596..11073703ff59f73399cb8f2cbea571e568b84ebf 100755 --- a/research/cv/wideresnet/scripts/run_standalone_train.sh +++ b/research/cv/wideresnet/scripts/run_standalone_train.sh @@ -105,7 +105,7 @@ then --config_path=$CONFIG_FILE --experiment_label=$LABEL &> log & if [ "${RUN_EVAL}" == "True" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd .. diff --git a/research/cv/wideresnet/scripts/run_standalone_train_gpu.sh b/research/cv/wideresnet/scripts/run_standalone_train_gpu.sh index 43eb419d0a91cbb313c8328e271430a2afcc0b5a..cda9a829595681accff1153a3c35f3d41363fe08 100755 --- a/research/cv/wideresnet/scripts/run_standalone_train_gpu.sh +++ b/research/cv/wideresnet/scripts/run_standalone_train_gpu.sh @@ -111,7 +111,7 @@ then --config_path="$CONFIG_FILE" --experiment_label="$EXPERIMENT_LABEL" &> log & if [ "${RUN_EVAL}" == "True" ] then - echo -e "\nWhen training run is done, remember to shut down the cache server via \"cache_admin --stop\"" + echo -e "\nWhen training run is done, remember to shut down the cache server via \"dataset-cache --stop\"" fi fi cd ..