From e144abff1fed5b2aca8b35bce0e5340ec1443299 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 02:41:58 +0000 Subject: [PATCH 01/69] update WideDeep_ID2940_for_TensorFlow/config/1p.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json index 96078ec8d..e7cd0f2df 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json @@ -1,13 +1,13 @@ { - "group_count": "1", - "group_list": + "server_count": "1", + "server_list": [ { "devices": [ { "device_id": "0", "device_ip": "192.1.2.8", - "rank_id": "0", + "rank_id": "0" } ], "server_id": "10.155.111.118" -- Gitee From fa963e6974494f295d1dc77a94a8a705a4c81235 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 02:46:52 +0000 Subject: [PATCH 02/69] update WideDeep_ID2940_for_TensorFlow/config/1p_0.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_0.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json index 96078ec8d..dc3acd33c 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json @@ -1,16 +1,16 @@ { - "group_count": "1", - "group_list": + "server_count": "1", + "server_list": [ { "devices": [ { "device_id": "0", "device_ip": "192.1.2.8", - "rank_id": "0", + "rank_id": "0" } ], - "server_id": "10.155.111.118" + "server_id": "127.0.0.1" } ], "status": "completed", -- Gitee From 9a0a1f1dc6b500386d302e5eeda1934714c823cc Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 02:47:17 +0000 Subject: [PATCH 03/69] update WideDeep_ID2940_for_TensorFlow/config/1p.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json index e7cd0f2df..dc3acd33c 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json @@ -10,7 +10,7 @@ "rank_id": "0" } ], - "server_id": "10.155.111.118" + "server_id": "127.0.0.1" } ], "status": "completed", -- Gitee From a5638eb1d508e2536eeedc722e4c90eaf88fb64d Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 02:48:08 +0000 Subject: [PATCH 04/69] update WideDeep_ID2940_for_TensorFlow/config/1p_1.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_1.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json index 8270cbe0e..53ae106a0 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json @@ -1,16 +1,16 @@ { - "group_count": "1", - "group_list": + "server_count": "1", + "server_list": [ { "devices": [ { "device_id": "1", "device_ip": "192.1.2.8", - "rank_id": "1", + "rank_id": "0" } ], - "server_id": "10.155.111.118" + "server_id": "127.0.0.1" } ], "status": "completed", -- Gitee From ddab2e5a40bb800d10fad1b7f565c09ce7a989d7 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 02:48:28 +0000 Subject: [PATCH 05/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_2.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json index 0438819dd..8d8b18324 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json @@ -1,16 +1,16 @@ { - "group_count": "1", - "group_list": + "server_count": "1", + "server_list": [ { "devices": [ { "device_id": "2", "device_ip": "192.1.2.8", - "rank_id": "2", + "rank_id": "0" } ], - "server_id": "10.155.111.118" + "server_id": "127.0.0.1" } ], "status": "completed", -- Gitee From 4b1d3fb6625223d4bc6c07ac2b6a833a69fa1476 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 02:48:46 +0000 Subject: [PATCH 06/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_3.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json index ca9a5ff4e..a7a6433b8 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json @@ -1,16 +1,16 @@ { - "group_count": "1", - "group_list": + "server_count": "1", + "server_list": [ { "devices": [ { "device_id": "3", "device_ip": "192.1.2.8", - "rank_id": "3", + "rank_id": "0" } ], - "server_id": "10.155.111.118" + "server_id": "127.0.0.1" } ], "status": "completed", -- Gitee From 80f49434365f98d4045bbe44e14a8df504df9abc Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 02:49:02 +0000 Subject: [PATCH 07/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_4.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json index 90872c5e1..9651658c8 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json @@ -1,16 +1,16 @@ { - "group_count": "1", - "group_list": + "server_count": "1", + "server_list": [ { "devices": [ { "device_id": "4", - "device_ip": "192.4.2.9", - "rank_id": "4", + "device_ip": "192.1.2.8", + "rank_id": "0" } ], - "server_id": "10.155.111.118" + "server_id": "127.0.0.1" } ], "status": "completed", -- Gitee From 3645b2b1dde238f3078c42d3554a659c81943ec9 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 02:49:18 +0000 Subject: [PATCH 08/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_5.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json index 1b1322f11..69b978d67 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json @@ -1,16 +1,16 @@ { - "group_count": "1", - "group_list": + "server_count": "1", + "server_list": [ { "devices": [ { "device_id": "5", - "device_ip": "192.4.2.9", - "rank_id": "5", + "device_ip": "192.1.2.8", + "rank_id": "0" } ], - "server_id": "10.155.111.118" + "server_id": "127.0.0.1" } ], "status": "completed", -- Gitee From 0ce4968308f41fc7e74c7e50383b7e8d4ed2b356 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 02:49:34 +0000 Subject: [PATCH 09/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_6.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json index ea26227f6..b094739d2 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json @@ -1,16 +1,16 @@ { - "group_count": "1", - "group_list": + "server_count": "1", + "server_list": [ { "devices": [ { "device_id": "6", - "device_ip": "192.4.2.9", - "rank_id": "6", + "device_ip": "192.1.2.8", + "rank_id": "0" } ], - "server_id": "10.155.111.118" + "server_id": "127.0.0.1" } ], "status": "completed", -- Gitee From df1825c86290a652202b0b9e3aab24c44d803444 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 02:49:53 +0000 Subject: [PATCH 10/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_7.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json index 1ccc40abf..00a472e8d 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json @@ -1,16 +1,16 @@ { - "group_count": "1", - "group_list": + "server_count": "1", + "server_list": [ { "devices": [ { "device_id": "7", - "device_ip": "192.4.2.9", - "rank_id": "7", + "device_ip": "192.1.2.8", + "rank_id": "0" } ], - "server_id": "10.155.111.118" + "server_id": "127.0.0.1" } ], "status": "completed", -- Gitee From ff9d9f03ae633c88a6f5bba3b1472e9c2e5845e0 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 03:14:12 +0000 Subject: [PATCH 11/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/8p.json. --- .../WideDeep_ID2940_for_TensorFlow/config/8p.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/8p.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/8p.json index d13441e11..3c329456b 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/8p.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/8p.json @@ -11,10 +11,10 @@ {"device_id":"4","device_ip":"192.4.2.9","rank_id":"4"}, {"device_id":"5","device_ip":"192.4.2.9","rank_id":"5"}, {"device_id":"6","device_ip":"192.4.2.9","rank_id":"6"}, - {"device_id":"7","device_ip":"192.4.2.9","rank_id":"7"}, + {"device_id":"7","device_ip":"192.4.2.9","rank_id":"7"} ], - "server_id":"10.155.111.118" + "server_id":"127.0.0.1" } ], "status":"completed", -- Gitee From 1d182d8b55b26e530f4b9378268ac1692b882dc7 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 03:19:10 +0000 Subject: [PATCH 12/69] update WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh. --- .../test/train_performance_1p.sh | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh index 64ab2617e..e389decc2 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh @@ -27,6 +27,9 @@ RankSize=1 #训练epoch,可选 train_epochs=1 +#迭代下沉循环次数 +iteration_per_loop=1 + #参数配置 data_path="" @@ -95,7 +98,7 @@ fi if [ -d $cur_path/../config/1p_$ASCEND_DEVICE.json ];then export RANK_TABLE_FILE=$cur_path/../config/1p_$ASCEND_DEVICE.json - export RANK_ID=$ASCEND_DEVICE_ID + export RANK_ID=0 else export RANK_TABLE_FILE=$cur_path/../config/1p_0.json export RANK_ID=0 @@ -104,13 +107,15 @@ wait cd $cur_path/../ start=$(date +%s) -python3 -m trainer.task --gpu \ +python3 -m trainer.task \ --Adam \ + --iteration_per_loop=$iteration_per_loop \ --train_data_pattern=$data_path/outbrain/tfrecords/train/part* \ --eval_data_pattern=$data_path/outbrain/tfrecords/eval/part* \ --model_dir=$cur_path/output/$ASCEND_DEVICE_ID/ckpt \ --transformed_metadata_path=$data_path/outbrain/tfrecords \ - --num_epochs=$train_epochs > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & + --num_epochs=$train_epochs \ + --global_batch_size=$batch_size > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & wait end=$(date +%s) e2etime=$(( $end - $start )) -- Gitee From 4531c3ec8dfb1e557fd95ff5d548611fb91c6799 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 03:24:42 +0000 Subject: [PATCH 13/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/trainer/task.py. --- .../trainer/task.py | 52 ++++++++++++------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/trainer/task.py b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/trainer/task.py index c22742f76..c87fc83ed 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/trainer/task.py +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/trainer/task.py @@ -27,11 +27,15 @@ import tensorflow as tf import tensorflow_transform as tft from tensorflow.core.protobuf import rewriter_config_pb2 from trainer import features -from utils.dataloader import separate_input_fn -from utils.hooks.benchmark_hooks import BenchmarkLoggingHook -from utils.metrics import map_custom_metric, map_custom_metric_with_leak -from utils.schedulers import learning_rate_scheduler - +#from utils.dataloader import separate_input_fn +#from utils.hooks.benchmark_hooks import BenchmarkLoggingHook +#from utils.metrics import map_custom_metric, map_custom_metric_with_leak +#from utils.schedulers import learning_rate_scheduler +from util.dataloader import separate_input_fn +from util.hooks.benchmark_hooks import BenchmarkLoggingHook +from util.metrics import map_custom_metric, map_custom_metric_with_leak +from util.schedulers import learning_rate_scheduler +from util.dnn_linear_combined import DNNLinearCombinedClassifier MODEL_TYPES = ['wide', 'deep', 'wide_n_deep'] WIDE, DEEP, WIDE_N_DEEP = MODEL_TYPES @@ -239,6 +243,11 @@ def create_parser(): help='Number of steps for train performance benchmark', type=int, default=100) + parser.add_argument( + '--iteration_per_loop', + help='Number of iters per loop', + type=int, + default=0) return parser @@ -262,7 +271,7 @@ def construct_estimator(model_type, run_config, optimizer=deep_optimizer) elif model_type == WIDE_N_DEEP: - estimator = tf.estimator.DNNLinearCombinedClassifier( + estimator = DNNLinearCombinedClassifier( config=npu_run_config_init(run_config=run_config), linear_feature_columns=wide_columns, linear_optimizer=wide_optimizer, @@ -329,15 +338,14 @@ def main(FLAGS): log_device_placement=FLAGS.log_device_placement ) else: - #session_config = tf.compat.v1.ConfigProto( - # device_count={'GPU': 0}, - # log_device_placement=FLAGS.log_device_placement - #) session_config = tf.ConfigProto() custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add() custom_op.name = "NpuOptimizer" custom_op.parameter_map["use_off_line"].b = True custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") + if FLAGS.iteration_per_loop: + custom_op.parameter_map["enable_data_pre_proc"].b = True + custom_op.parameter_map["iterations_per_loop"].i = FLAGS.iteration_per_loop session_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF session_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF @@ -361,12 +369,13 @@ def main(FLAGS): int(FLAGS.eval_epoch_interval * steps_per_epoch) count_steps = FLAGS.benchmark_steps + 1 if FLAGS.benchmark else 100 - run_config = tf.estimator.RunConfig(model_dir=model_dir, save_summary_steps=0) \ - .replace(session_config=session_config, - save_checkpoints_steps=save_checkpoints_steps, - save_summary_steps=count_steps, - log_step_count_steps=count_steps, - keep_checkpoint_max=1) + #run_config = tf.estimator.RunConfig(model_dir=model_dir, save_summary_steps=0) \ + # .replace(session_config=session_config, + # save_checkpoints_steps=save_checkpoints_steps, + # save_summary_steps=count_steps, + # log_step_count_steps=count_steps, + # keep_checkpoint_max=1) + run_config = tf.estimator.RunConfig(model_dir=model_dir, save_summary_steps=0, session_config=session_config, save_checkpoints_steps=save_checkpoints_steps, log_step_count_steps=count_steps, keep_checkpoint_max=1) def wide_optimizer(): opt = tf.compat.v1.train.FtrlOptimizer( @@ -431,6 +440,8 @@ def main(FLAGS): estimator = tf.estimator.add_metrics(estimator, map_custom_metric_with_leak) hooks = [] + if FLAGS.iteration_per_loop: + hooks.append(npu_hook.SetIterationsVarHook(FLAGS.iteration_per_loop)) if FLAGS.hvd: hooks.append(NPUBroadcastGlobalVariablesHook(0, int(os.getenv('RANK_ID', '0')))) @@ -482,6 +493,7 @@ def main(FLAGS): train_throughput = benchmark_hook.mean_throughput.value() dllogger.log(data={'train_throughput': train_throughput}, step=tuple()) else: + print('train and eval') train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=max_steps, hooks=hooks) @@ -498,18 +510,20 @@ def main(FLAGS): if __name__ == '__main__': + FLAGS = create_parser().parse_args() session_config = tf.ConfigProto() custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add() custom_op.name = "NpuOptimizer" custom_op.parameter_map["use_off_line"].b = True custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") - custom_op.parameter_map["graph_memory_max_size"].s= tf.compat.as_bytes(str(16 * 1024 * 1024 * 1024)) - custom_op.parameter_map["variable_memory_max_size"].s = tf.compat.as_bytes(str(15 * 1024 * 1024 * 1024)) + if FLAGS.iteration_per_loop: + print('>>>>>>>>> iteration per loop var: %d'%(FLAGS.iteration_per_loop)) + custom_op.parameter_map["enable_data_pre_proc"].b = True + custom_op.parameter_map["iterations_per_loop"].i = FLAGS.iteration_per_loop session_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF session_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF (npu_sess, npu_shutdown) = init_resource(config=session_config) - FLAGS = create_parser().parse_args() main(FLAGS) shutdown_resource(npu_sess, npu_shutdown) close_session(npu_sess) -- Gitee From 119fbe0d34c4014939ba4157b0b56f9af070d2c2 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 03:25:07 +0000 Subject: [PATCH 14/69] update WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh. --- .../WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh index e389decc2..1cfb02f3e 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh @@ -28,7 +28,7 @@ RankSize=1 train_epochs=1 #迭代下沉循环次数 -iteration_per_loop=1 +iteration_per_loop=0 #参数配置 data_path="" -- Gitee From b0a89ab4285a1821c37200aef8449028ef2db0c7 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 03:26:31 +0000 Subject: [PATCH 15/69] =?UTF-8?q?=E9=87=8D=E5=91=BD=E5=90=8D=20TensorFlow/?= =?UTF-8?q?built-in/recommendation/WideDeep=5FID2940=5Ffor=5FTensorFlow/ut?= =?UTF-8?q?ils=20=E4=B8=BA=20TensorFlow/built-in/recommendation/WideDeep?= =?UTF-8?q?=5FID2940=5Ffor=5FTensorFlow/util?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../WideDeep_ID2940_for_TensorFlow/{utils => util}/dataloader.py | 0 .../{utils => util}/hooks/benchmark_hooks.py | 0 .../{utils => util}/hooks/training_hooks.py | 0 .../WideDeep_ID2940_for_TensorFlow/{utils => util}/metrics.py | 0 .../WideDeep_ID2940_for_TensorFlow/{utils => util}/schedulers.py | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/{utils => util}/dataloader.py (100%) rename TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/{utils => util}/hooks/benchmark_hooks.py (100%) rename TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/{utils => util}/hooks/training_hooks.py (100%) rename TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/{utils => util}/metrics.py (100%) rename TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/{utils => util}/schedulers.py (100%) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/utils/dataloader.py b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/util/dataloader.py similarity index 100% rename from TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/utils/dataloader.py rename to TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/util/dataloader.py diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/utils/hooks/benchmark_hooks.py b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/util/hooks/benchmark_hooks.py similarity index 100% rename from TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/utils/hooks/benchmark_hooks.py rename to TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/util/hooks/benchmark_hooks.py diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/utils/hooks/training_hooks.py b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/util/hooks/training_hooks.py similarity index 100% rename from TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/utils/hooks/training_hooks.py rename to TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/util/hooks/training_hooks.py diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/utils/metrics.py b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/util/metrics.py similarity index 100% rename from TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/utils/metrics.py rename to TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/util/metrics.py diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/utils/schedulers.py b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/util/schedulers.py similarity index 100% rename from TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/utils/schedulers.py rename to TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/util/schedulers.py -- Gitee From 22027a58c8dce676107b6b5d96719fd0f928a2f6 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 03:28:08 +0000 Subject: [PATCH 16/69] add TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/util/dnn_linear_combined.py. --- .../util/dnn_linear_combined.py | 1152 +++++++++++++++++ 1 file changed, 1152 insertions(+) create mode 100644 TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/util/dnn_linear_combined.py diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/util/dnn_linear_combined.py b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/util/dnn_linear_combined.py new file mode 100644 index 000000000..08b0e2d88 --- /dev/null +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/util/dnn_linear_combined.py @@ -0,0 +1,1152 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TensorFlow estimators for Linear and DNN joined training models.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import six + +from tensorflow.python.framework import ops +from tensorflow.python.keras.utils import losses_utils +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import partitioned_variables +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops.losses import losses +from tensorflow.python.summary import summary +from tensorflow.python.training import sync_replicas_optimizer +from tensorflow.python.training import training_util +from tensorflow.python.util.tf_export import estimator_export +from tensorflow_estimator.python.estimator import estimator +from tensorflow_estimator.python.estimator.canned import dnn +from tensorflow_estimator.python.estimator.canned import head as head_lib +from tensorflow_estimator.python.estimator.canned import linear +from tensorflow_estimator.python.estimator.canned import optimizers +from tensorflow_estimator.python.estimator.head import head_utils +from tensorflow_estimator.python.estimator.head import regression_head +from tensorflow_estimator.python.estimator.mode_keys import ModeKeys + +# The default learning rates are a historical artifact of the initial +# implementation. +_DNN_LEARNING_RATE = 0.001 +_LINEAR_LEARNING_RATE = 0.005 + + +def _check_no_sync_replicas_optimizer(optimizer): + if isinstance(optimizer, sync_replicas_optimizer.SyncReplicasOptimizer): + raise ValueError( + 'SyncReplicasOptimizer does not support multi optimizers case. ' + 'Therefore, it is not supported in DNNLinearCombined model. ' + 'If you want to use this optimizer, please use either DNN or Linear ' + 'model.') + + +def _linear_learning_rate(num_linear_feature_columns): + """Returns the default learning rate of the linear model. + + The calculation is a historical artifact of this initial implementation, but + has proven a reasonable choice. + + Args: + num_linear_feature_columns: The number of feature columns of the linear + model. + + Returns: + A float. + """ + default_learning_rate = 1. / math.sqrt(num_linear_feature_columns) + return min(_LINEAR_LEARNING_RATE, default_learning_rate) + + +def _add_layer_summary(value, tag): + summary.scalar('%s/fraction_of_zero_values' % tag, nn.zero_fraction(value)) + summary.histogram('%s/activation' % tag, value) + + +def _validate_feature_columns(linear_feature_columns, dnn_feature_columns): + """Validates feature columns DNNLinearCombinedRegressor.""" + linear_feature_columns = linear_feature_columns or [] + dnn_feature_columns = dnn_feature_columns or [] + feature_columns = ( + list(linear_feature_columns) + list(dnn_feature_columns)) + if not feature_columns: + raise ValueError('Either linear_feature_columns or dnn_feature_columns ' + 'must be defined.') + return feature_columns + + +def _dnn_linear_combined_model_fn_v2( + features, + labels, + mode, + head, + linear_feature_columns=None, + linear_optimizer='Ftrl', + dnn_feature_columns=None, + dnn_optimizer='Adagrad', + dnn_hidden_units=None, + dnn_activation_fn=nn.relu, + dnn_dropout=None, + config=None, + batch_norm=False, + linear_sparse_combiner='sum', + loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE): + """Deep Neural Net and Linear combined model_fn. + + Args: + features: dict of `Tensor`. + labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype + `int32` or `int64` in the range `[0, n_classes)`. + mode: Defines whether this is training, evaluation or prediction. See + `ModeKeys`. + head: A `Head` instance. + linear_feature_columns: An iterable containing all the feature columns used + by the Linear model. + linear_optimizer: string, `Optimizer` object, or callable that defines the + optimizer to use for training the Linear model. Defaults to the Ftrl + optimizer. + dnn_feature_columns: An iterable containing all the feature columns used by + the DNN model. + dnn_optimizer: string, `Optimizer` object, or callable that defines the + optimizer to use for training the DNN model. Defaults to the Adagrad + optimizer. + dnn_hidden_units: List of hidden units per DNN layer. + dnn_activation_fn: Activation function applied to each DNN layer. If `None`, + will use `tf.nn.relu`. + dnn_dropout: When not `None`, the probability we will drop out a given DNN + coordinate. + config: `RunConfig` object to configure the runtime settings. + batch_norm: Whether to use batch normalization after each hidden layer. + linear_sparse_combiner: A string specifying how to reduce the linear model + if a categorical column is multivalent. One of "mean", "sqrtn", and + "sum". + loss_reduction: One of `tf.keras.losses.Reduction` except `NONE`. Describes + how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. + + Returns: + An `EstimatorSpec` instance. + + Raises: + ValueError: If both `linear_feature_columns` and `dnn_features_columns` + are empty at the same time, or `input_layer_partitioner` is missing, + or features has the wrong type. + """ + if not isinstance(features, dict): + raise ValueError('features should be a dictionary of `Tensor`s. ' + 'Given type: {}'.format(type(features))) + if not linear_feature_columns and not dnn_feature_columns: + raise ValueError( + 'Either linear_feature_columns or dnn_feature_columns must be defined.') + + del config + + # Build DNN Logits. + if not dnn_feature_columns: + dnn_logits = None + else: + if mode == ModeKeys.TRAIN: + dnn_optimizer = optimizers.get_optimizer_instance_v2( + dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) + _check_no_sync_replicas_optimizer(dnn_optimizer) + + if not dnn_hidden_units: + raise ValueError( + 'dnn_hidden_units must be defined when dnn_feature_columns is ' + 'specified.') + dnn_logits, dnn_trainable_variables, dnn_update_ops = ( + dnn._dnn_model_fn_builder_v2( # pylint: disable=protected-access + units=head.logits_dimension, + hidden_units=dnn_hidden_units, + feature_columns=dnn_feature_columns, + activation_fn=dnn_activation_fn, + dropout=dnn_dropout, + batch_norm=batch_norm, + features=features, + mode=mode)) + + if not linear_feature_columns: + linear_logits = None + else: + if mode == ModeKeys.TRAIN: + linear_optimizer = optimizers.get_optimizer_instance_v2( + linear_optimizer, + learning_rate=_linear_learning_rate(len(linear_feature_columns))) + _check_no_sync_replicas_optimizer(linear_optimizer) + + linear_logits, linear_trainable_variables = ( + linear._linear_model_fn_builder_v2( # pylint: disable=protected-access + units=head.logits_dimension, + feature_columns=linear_feature_columns, + sparse_combiner=linear_sparse_combiner, + features=features)) + _add_layer_summary(linear_logits, 'linear') + + # Combine logits and build full model. + if dnn_logits is not None and linear_logits is not None: + logits = dnn_logits + linear_logits + elif dnn_logits is not None: + logits = dnn_logits + else: + logits = linear_logits + + def _train_op_fn(loss): + """Returns the op to optimize the loss.""" + train_ops = [] + # Scale loss by number of replicas. + if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE: + loss = losses_utils.scale_loss_for_distribution(loss) + + if dnn_logits is not None: + train_ops.extend( + dnn_optimizer.get_updates( + loss, + dnn_trainable_variables)) + if dnn_update_ops is not None: + train_ops.extend(dnn_update_ops) + if linear_logits is not None: + train_ops.extend( + linear_optimizer.get_updates( + loss, + linear_trainable_variables)) + train_op = control_flow_ops.group(*train_ops) + return train_op + + # In TRAIN mode, asssign global_step variable to optimizer.iterations to + # make global_step increased correctly, as Hooks relies on global step as + # step counter. Note that, Only one model's optimizer needs this assignment. + if mode == ModeKeys.TRAIN: + if dnn_logits is not None: + dnn_optimizer.iterations = training_util.get_or_create_global_step() + else: + linear_optimizer.iterations = training_util.get_or_create_global_step() + + return head.create_estimator_spec( + features=features, + mode=mode, + labels=labels, + train_op_fn=_train_op_fn, + logits=logits) + + +def _dnn_linear_combined_model_fn(features, + labels, + mode, + head, + linear_feature_columns=None, + linear_optimizer='Ftrl', + dnn_feature_columns=None, + dnn_optimizer='Adagrad', + dnn_hidden_units=None, + dnn_activation_fn=nn.relu, + dnn_dropout=None, + input_layer_partitioner=None, + config=None, + batch_norm=False, + linear_sparse_combiner='sum'): + """Deep Neural Net and Linear combined model_fn. + + Args: + features: dict of `Tensor`. + labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype + `int32` or `int64` in the range `[0, n_classes)`. + mode: Defines whether this is training, evaluation or prediction. + See `ModeKeys`. + head: A `Head` instance. + linear_feature_columns: An iterable containing all the feature columns used + by the Linear model. + linear_optimizer: string, `Optimizer` object, or callable that defines the + optimizer to use for training the Linear model. Defaults to the Ftrl + optimizer. + dnn_feature_columns: An iterable containing all the feature columns used by + the DNN model. + dnn_optimizer: string, `Optimizer` object, or callable that defines the + optimizer to use for training the DNN model. Defaults to the Adagrad + optimizer. + dnn_hidden_units: List of hidden units per DNN layer. + dnn_activation_fn: Activation function applied to each DNN layer. If `None`, + will use `tf.nn.relu`. + dnn_dropout: When not `None`, the probability we will drop out a given DNN + coordinate. + input_layer_partitioner: Partitioner for input layer. + config: `RunConfig` object to configure the runtime settings. + batch_norm: Whether to use batch normalization after each hidden layer. + linear_sparse_combiner: A string specifying how to reduce the linear model + if a categorical column is multivalent. One of "mean", "sqrtn", and + "sum". + Returns: + An `EstimatorSpec` instance. + + Raises: + ValueError: If both `linear_feature_columns` and `dnn_features_columns` + are empty at the same time, or `input_layer_partitioner` is missing, + or features has the wrong type. + """ + if not isinstance(features, dict): + raise ValueError('features should be a dictionary of `Tensor`s. ' + 'Given type: {}'.format(type(features))) + if not linear_feature_columns and not dnn_feature_columns: + raise ValueError( + 'Either linear_feature_columns or dnn_feature_columns must be defined.') + + num_ps_replicas = config.num_ps_replicas if config else 0 + input_layer_partitioner = input_layer_partitioner or ( + partitioned_variables.min_max_variable_partitioner( + max_partitions=num_ps_replicas, + min_slice_size=64 << 20)) + + # Build DNN Logits. + dnn_parent_scope = 'dnn' + + if not dnn_feature_columns: + dnn_logits = None + else: + dnn_optimizer = optimizers.get_optimizer_instance( + dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) + _check_no_sync_replicas_optimizer(dnn_optimizer) + if not dnn_hidden_units: + raise ValueError( + 'dnn_hidden_units must be defined when dnn_feature_columns is ' + 'specified.') + dnn_partitioner = ( + partitioned_variables.min_max_variable_partitioner( + max_partitions=num_ps_replicas)) + with variable_scope.variable_scope( + dnn_parent_scope, + values=tuple(six.itervalues(features)), + partitioner=dnn_partitioner) as scope: + dnn_absolute_scope = scope.name + dnn_logit_fn = dnn.dnn_logit_fn_builder( + units=head.logits_dimension, + hidden_units=dnn_hidden_units, + feature_columns=dnn_feature_columns, + activation_fn=dnn_activation_fn, + dropout=dnn_dropout, + batch_norm=batch_norm, + input_layer_partitioner=input_layer_partitioner) + dnn_logits = dnn_logit_fn(features=features, mode=mode) + + linear_parent_scope = 'linear' + + if not linear_feature_columns: + linear_logits = None + else: + linear_optimizer = optimizers.get_optimizer_instance( + linear_optimizer, + learning_rate=_linear_learning_rate(len(linear_feature_columns))) + _check_no_sync_replicas_optimizer(linear_optimizer) + with variable_scope.variable_scope( + linear_parent_scope, + values=tuple(six.itervalues(features)), + partitioner=input_layer_partitioner) as scope: + linear_absolute_scope = scope.name + logit_fn = linear.linear_logit_fn_builder( + units=head.logits_dimension, + feature_columns=linear_feature_columns, + sparse_combiner=linear_sparse_combiner) + linear_logits = logit_fn(features=features) + _add_layer_summary(linear_logits, scope.name) + + # Combine logits and build full model. + if dnn_logits is not None and linear_logits is not None: + logits = dnn_logits + linear_logits + elif dnn_logits is not None: + logits = dnn_logits + else: + logits = linear_logits + + def _train_op_fn(loss): + """Returns the op to optimize the loss.""" + train_ops = [] + global_step = training_util.get_global_step() + if dnn_logits is not None: + train_ops.append( + dnn_optimizer.minimize( + loss, + var_list=ops.get_collection( + ops.GraphKeys.TRAINABLE_VARIABLES, + scope=dnn_absolute_scope))) + if linear_logits is not None: + train_ops.append( + linear_optimizer.minimize( + loss, + var_list=ops.get_collection( + ops.GraphKeys.TRAINABLE_VARIABLES, + scope=linear_absolute_scope))) + + train_op = control_flow_ops.group(*train_ops, name='IterationOp') + with ops.control_dependencies([train_op]): + return state_ops.assign_add(global_step, 1).op + + return head.create_estimator_spec( + features=features, + mode=mode, + labels=labels, + train_op_fn=_train_op_fn, + logits=logits) + + +@estimator_export('estimator.DNNLinearCombinedClassifier', v1=[]) +class DNNLinearCombinedClassifierV2(estimator.EstimatorV2): + """An estimator for TensorFlow Linear and DNN joined classification models. + + Note: This estimator is also known as wide-n-deep. + + Example: + + ```python + numeric_feature = numeric_column(...) + categorical_column_a = categorical_column_with_hash_bucket(...) + categorical_column_b = categorical_column_with_hash_bucket(...) + + categorical_feature_a_x_categorical_feature_b = crossed_column(...) + categorical_feature_a_emb = embedding_column( + categorical_column=categorical_feature_a, ...) + categorical_feature_b_emb = embedding_column( + categorical_id_column=categorical_feature_b, ...) + + estimator = DNNLinearCombinedClassifier( + # wide settings + linear_feature_columns=[categorical_feature_a_x_categorical_feature_b], + linear_optimizer=tf.train.FtrlOptimizer(...), + # deep settings + dnn_feature_columns=[ + categorical_feature_a_emb, categorical_feature_b_emb, + numeric_feature], + dnn_hidden_units=[1000, 500, 100], + dnn_optimizer=tf.train.ProximalAdagradOptimizer(...), + # warm-start settings + warm_start_from="/path/to/checkpoint/dir") + + # To apply L1 and L2 regularization, you can set dnn_optimizer to: + tf.train.ProximalAdagradOptimizer( + learning_rate=0.1, + l1_regularization_strength=0.001, + l2_regularization_strength=0.001) + # To apply learning rate decay, you can set dnn_optimizer to a callable: + lambda: tf.AdamOptimizer( + learning_rate=tf.exponential_decay( + learning_rate=0.1, + global_step=tf.get_global_step(), + decay_steps=10000, + decay_rate=0.96) + # It is the same for linear_optimizer. + + # Input builders + def input_fn_train: + # Returns tf.data.Dataset of (x, y) tuple where y represents label's class + # index. + pass + def input_fn_eval: + # Returns tf.data.Dataset of (x, y) tuple where y represents label's class + # index. + pass + def input_fn_predict: + # Returns tf.data.Dataset of (x, None) tuple. + pass + estimator.train(input_fn=input_fn_train, steps=100) + metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) + predictions = estimator.predict(input_fn=input_fn_predict) + ``` + + Input of `train` and `evaluate` should have following features, + otherwise there will be a `KeyError`: + + * for each `column` in `dnn_feature_columns` + `linear_feature_columns`: + - if `column` is a `_CategoricalColumn`, a feature with `key=column.name` + whose `value` is a `SparseTensor`. + - if `column` is a `_WeightedCategoricalColumn`, two features: the first + with `key` the id column name, the second with `key` the weight column + name. Both features' `value` must be a `SparseTensor`. + - if `column` is a `_DenseColumn`, a feature with `key=column.name` + whose `value` is a `Tensor`. + + Loss is calculated by using softmax cross entropy. + + @compatibility(eager) + Estimators can be used while eager execution is enabled. Note that `input_fn` + and all hooks are executed inside a graph context, so they have to be written + to be compatible with graph mode. Note that `input_fn` code using `tf.data` + generally works in both graph and eager modes. + @end_compatibility + """ + + def __init__(self, + model_dir=None, + linear_feature_columns=None, + linear_optimizer='Ftrl', + dnn_feature_columns=None, + dnn_optimizer='Adagrad', + dnn_hidden_units=None, + dnn_activation_fn=nn.relu, + dnn_dropout=None, + n_classes=2, + weight_column=None, + label_vocabulary=None, + config=None, + warm_start_from=None, + loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, + batch_norm=False, + linear_sparse_combiner='sum'): + """Initializes a DNNLinearCombinedClassifier instance. + + Args: + model_dir: Directory to save model parameters, graph and etc. This can + also be used to load checkpoints from the directory into a estimator + to continue training a previously saved model. + linear_feature_columns: An iterable containing all the feature columns + used by linear part of the model. All items in the set must be + instances of classes derived from `FeatureColumn`. + linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to + the linear part of the model. Can also be a string (one of 'Adagrad', + 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL + optimizer. + dnn_feature_columns: An iterable containing all the feature columns used + by deep part of the model. All items in the set must be instances of + classes derived from `FeatureColumn`. + dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to + the deep part of the model. Can also be a string (one of 'Adagrad', + 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to Adagrad + optimizer. + dnn_hidden_units: List of hidden units per layer. All layers are fully + connected. + dnn_activation_fn: Activation function applied to each layer. If None, + will use `tf.nn.relu`. + dnn_dropout: When not None, the probability we will drop out + a given coordinate. + n_classes: Number of label classes. Defaults to 2, namely binary + classification. Must be > 1. + weight_column: A string or a `_NumericColumn` created by + `tf.feature_column.numeric_column` defining feature column representing + weights. It is used to down weight or boost examples during training. It + will be multiplied by the loss of the example. If it is a string, it is + used as a key to fetch weight tensor from the `features`. If it is a + `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, + then weight_column.normalizer_fn is applied on it to get weight tensor. + label_vocabulary: A list of strings represents possible label values. If + given, labels must be string type and have any value in + `label_vocabulary`. If it is not given, that means labels are + already encoded as integer or float within [0, 1] for `n_classes=2` and + encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . + Also there will be errors if vocabulary is not provided and labels are + string. + config: RunConfig object to configure the runtime settings. + warm_start_from: A string filepath to a checkpoint to warm-start from, or + a `WarmStartSettings` object to fully configure warm-starting. If the + string filepath is provided instead of a `WarmStartSettings`, then all + weights are warm-started, and it is assumed that vocabularies and Tensor + names are unchanged. + loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how + to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. + batch_norm: Whether to use batch normalization after each hidden layer. + linear_sparse_combiner: A string specifying how to reduce the linear model + if a categorical column is multivalent. One of "mean", "sqrtn", and + "sum" -- these are effectively different ways to do example-level + normalization, which can be useful for bag-of-words features. For more + details, see `tf.feature_column.linear_model`. + + Raises: + ValueError: If both linear_feature_columns and dnn_features_columns are + empty at the same time. + """ + self._feature_columns = _validate_feature_columns( + linear_feature_columns=linear_feature_columns, + dnn_feature_columns=dnn_feature_columns) + + head = head_utils.binary_or_multi_class_head( + n_classes, weight_column=weight_column, + label_vocabulary=label_vocabulary, + loss_reduction=loss_reduction) + + def _model_fn(features, labels, mode, config): + """Call the _dnn_linear_combined_model_fn.""" + return _dnn_linear_combined_model_fn_v2( + features=features, + labels=labels, + mode=mode, + head=head, + linear_feature_columns=linear_feature_columns, + linear_optimizer=linear_optimizer, + dnn_feature_columns=dnn_feature_columns, + dnn_optimizer=dnn_optimizer, + dnn_hidden_units=dnn_hidden_units, + dnn_activation_fn=dnn_activation_fn, + dnn_dropout=dnn_dropout, + config=config, + batch_norm=batch_norm, + linear_sparse_combiner=linear_sparse_combiner, + loss_reduction=loss_reduction) + + super(DNNLinearCombinedClassifierV2, self).__init__( + model_fn=_model_fn, + model_dir=model_dir, + config=config, + warm_start_from=warm_start_from) + + +#@estimator_export(v1=['estimator.DNNLinearCombinedClassifier']) # pylint: disable=missing-docstring +class DNNLinearCombinedClassifier(estimator.Estimator): + __doc__ = DNNLinearCombinedClassifierV2.__doc__.replace( + 'SUM_OVER_BATCH_SIZE', 'SUM') + + def __init__(self, + model_dir=None, + linear_feature_columns=None, + linear_optimizer='Ftrl', + dnn_feature_columns=None, + dnn_optimizer='Adagrad', + dnn_hidden_units=None, + dnn_activation_fn=nn.relu, + dnn_dropout=None, + n_classes=2, + weight_column=None, + label_vocabulary=None, + input_layer_partitioner=None, + config=None, + warm_start_from=None, + loss_reduction=losses.Reduction.SUM, + batch_norm=False, + linear_sparse_combiner='sum'): + self._feature_columns = _validate_feature_columns( + linear_feature_columns=linear_feature_columns, + dnn_feature_columns=dnn_feature_columns) + + head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access + n_classes, weight_column, label_vocabulary, loss_reduction) + + def _model_fn(features, labels, mode, config): + """Call the _dnn_linear_combined_model_fn.""" + return _dnn_linear_combined_model_fn( + features=features, + labels=labels, + mode=mode, + head=head, + linear_feature_columns=linear_feature_columns, + linear_optimizer=linear_optimizer, + dnn_feature_columns=dnn_feature_columns, + dnn_optimizer=dnn_optimizer, + dnn_hidden_units=dnn_hidden_units, + dnn_activation_fn=dnn_activation_fn, + dnn_dropout=dnn_dropout, + input_layer_partitioner=input_layer_partitioner, + config=config, + batch_norm=batch_norm, + linear_sparse_combiner=linear_sparse_combiner) + + super(DNNLinearCombinedClassifier, self).__init__( + model_fn=_model_fn, + model_dir=model_dir, + config=config, + warm_start_from=warm_start_from) + + +def _init_dnn_linear_combined_estimator( + head, + linear_feature_columns, + linear_optimizer, + dnn_feature_columns, + dnn_optimizer, + dnn_hidden_units, + dnn_activation_fn, + dnn_dropout, + input_layer_partitioner, + linear_sparse_combiner): + """Helper function for the initialization of DNNLinearCombinedEstimator.""" + linear_feature_columns = linear_feature_columns or [] + dnn_feature_columns = dnn_feature_columns or [] + feature_columns = ( + list(linear_feature_columns) + list(dnn_feature_columns)) + if not feature_columns: + raise ValueError('Either linear_feature_columns or dnn_feature_columns ' + 'must be defined.') + + def _model_fn(features, labels, mode, config): + """Call the _dnn_linear_combined_model_fn.""" + return _dnn_linear_combined_model_fn( + features=features, + labels=labels, + mode=mode, + head=head, + linear_feature_columns=linear_feature_columns, + linear_optimizer=linear_optimizer, + dnn_feature_columns=dnn_feature_columns, + dnn_optimizer=dnn_optimizer, + dnn_hidden_units=dnn_hidden_units, + dnn_activation_fn=dnn_activation_fn, + dnn_dropout=dnn_dropout, + input_layer_partitioner=input_layer_partitioner, + config=config, + linear_sparse_combiner=linear_sparse_combiner) + return feature_columns, _model_fn + + +# TODO(b/117517419): Update these contrib references once head moves to core. +# Also references to the "_Head" class need to be replaced with "Head". +@estimator_export('estimator.DNNLinearCombinedEstimator', v1=[]) +class DNNLinearCombinedEstimatorV2(estimator.EstimatorV2): + """An estimator for TensorFlow Linear and DNN joined models with custom head. + + Note: This estimator is also known as wide-n-deep. + + Example: + + ```python + numeric_feature = numeric_column(...) + categorical_column_a = categorical_column_with_hash_bucket(...) + categorical_column_b = categorical_column_with_hash_bucket(...) + + categorical_feature_a_x_categorical_feature_b = crossed_column(...) + categorical_feature_a_emb = embedding_column( + categorical_column=categorical_feature_a, ...) + categorical_feature_b_emb = embedding_column( + categorical_column=categorical_feature_b, ...) + + estimator = DNNLinearCombinedEstimator( + head=tf.contrib.estimator.multi_label_head(n_classes=3), + # wide settings + linear_feature_columns=[categorical_feature_a_x_categorical_feature_b], + linear_optimizer=tf.train.FtrlOptimizer(...), + # deep settings + dnn_feature_columns=[ + categorical_feature_a_emb, categorical_feature_b_emb, + numeric_feature], + dnn_hidden_units=[1000, 500, 100], + dnn_optimizer=tf.train.ProximalAdagradOptimizer(...)) + + # To apply L1 and L2 regularization, you can set dnn_optimizer to: + tf.train.ProximalAdagradOptimizer( + learning_rate=0.1, + l1_regularization_strength=0.001, + l2_regularization_strength=0.001) + # To apply learning rate decay, you can set dnn_optimizer to a callable: + lambda: tf.AdamOptimizer( + learning_rate=tf.exponential_decay( + learning_rate=0.1, + global_step=tf.get_global_step(), + decay_steps=10000, + decay_rate=0.96) + # It is the same for linear_optimizer. + + # Input builders + def input_fn_train: + # Returns tf.data.Dataset of (x, y) tuple where y represents label's class + # index. + pass + def input_fn_eval: + # Returns tf.data.Dataset of (x, y) tuple where y represents label's class + # index. + pass + def input_fn_predict: + # Returns tf.data.Dataset of (x, None) tuple. + pass + estimator.train(input_fn=input_fn_train, steps=100) + metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) + predictions = estimator.predict(input_fn=input_fn_predict) + ``` + + Input of `train` and `evaluate` should have following features, + otherwise there will be a `KeyError`: + + * for each `column` in `dnn_feature_columns` + `linear_feature_columns`: + - if `column` is a `_CategoricalColumn`, a feature with `key=column.name` + whose `value` is a `SparseTensor`. + - if `column` is a `_WeightedCategoricalColumn`, two features: the first + with `key` the id column name, the second with `key` the weight column + name. Both features' `value` must be a `SparseTensor`. + - if `column` is a `_DenseColumn`, a feature with `key=column.name` + whose `value` is a `Tensor`. + + Loss is calculated by using mean squared error. + + @compatibility(eager) + Estimators can be used while eager execution is enabled. Note that `input_fn` + and all hooks are executed inside a graph context, so they have to be written + to be compatible with graph mode. Note that `input_fn` code using `tf.data` + generally works in both graph and eager modes. + @end_compatibility + """ + + def __init__(self, + head, + model_dir=None, + linear_feature_columns=None, + linear_optimizer='Ftrl', + dnn_feature_columns=None, + dnn_optimizer='Adagrad', + dnn_hidden_units=None, + dnn_activation_fn=nn.relu, + dnn_dropout=None, + config=None, + linear_sparse_combiner='sum'): + """Initializes a DNNLinearCombinedEstimator instance. + + Args: + head: A `_Head` instance constructed with a method such as + `tf.contrib.estimator.multi_label_head`. + model_dir: Directory to save model parameters, graph and etc. This can + also be used to load checkpoints from the directory into an estimator + to continue training a previously saved model. + linear_feature_columns: An iterable containing all the feature columns + used by linear part of the model. All items in the set must be + instances of classes derived from `FeatureColumn`. + linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to + the linear part of the model. Can also be a string (one of 'Adagrad', + 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL + optimizer. + dnn_feature_columns: An iterable containing all the feature columns used + by deep part of the model. All items in the set must be instances of + classes derived from `FeatureColumn`. + dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to + the deep part of the model. Can also be a string (one of 'Adagrad', + 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to Adagrad + optimizer. + dnn_hidden_units: List of hidden units per layer. All layers are fully + connected. + dnn_activation_fn: Activation function applied to each layer. If None, + will use `tf.nn.relu`. + dnn_dropout: When not None, the probability we will drop out + a given coordinate. + config: RunConfig object to configure the runtime settings. + linear_sparse_combiner: A string specifying how to reduce the linear model + if a categorical column is multivalent. One of "mean", "sqrtn", and + "sum" -- these are effectively different ways to do example-level + normalization, which can be useful for bag-of-words features. For more + details, see `tf.feature_column.linear_model`. + + Raises: + ValueError: If both linear_feature_columns and dnn_features_columns are + empty at the same time. + """ + self._feature_columns = _validate_feature_columns( + linear_feature_columns=linear_feature_columns, + dnn_feature_columns=dnn_feature_columns) + + def _model_fn(features, labels, mode, config): + """Call the _dnn_linear_combined_model_fn.""" + return _dnn_linear_combined_model_fn_v2( + features=features, + labels=labels, + mode=mode, + head=head, + linear_feature_columns=linear_feature_columns, + linear_optimizer=linear_optimizer, + dnn_feature_columns=dnn_feature_columns, + dnn_optimizer=dnn_optimizer, + dnn_hidden_units=dnn_hidden_units, + dnn_activation_fn=dnn_activation_fn, + dnn_dropout=dnn_dropout, + config=config, + linear_sparse_combiner=linear_sparse_combiner) + + super(DNNLinearCombinedEstimatorV2, self).__init__( + model_fn=_model_fn, + model_dir=model_dir, + config=config) + + +@estimator_export(v1=['estimator.DNNLinearCombinedEstimator']) # pylint: disable=missing-docstring +class DNNLinearCombinedEstimator(estimator.Estimator): + __doc__ = DNNLinearCombinedEstimatorV2.__doc__ + + def __init__(self, + head, + model_dir=None, + linear_feature_columns=None, + linear_optimizer='Ftrl', + dnn_feature_columns=None, + dnn_optimizer='Adagrad', + dnn_hidden_units=None, + dnn_activation_fn=nn.relu, + dnn_dropout=None, + input_layer_partitioner=None, + config=None, + linear_sparse_combiner='sum'): + self._feature_columns = _validate_feature_columns( + linear_feature_columns=linear_feature_columns, + dnn_feature_columns=dnn_feature_columns) + + def _model_fn(features, labels, mode, config): + """Call the _dnn_linear_combined_model_fn.""" + return _dnn_linear_combined_model_fn( + features=features, + labels=labels, + mode=mode, + head=head, + linear_feature_columns=linear_feature_columns, + linear_optimizer=linear_optimizer, + dnn_feature_columns=dnn_feature_columns, + dnn_optimizer=dnn_optimizer, + dnn_hidden_units=dnn_hidden_units, + dnn_activation_fn=dnn_activation_fn, + dnn_dropout=dnn_dropout, + input_layer_partitioner=input_layer_partitioner, + config=config, + linear_sparse_combiner=linear_sparse_combiner) + + super(DNNLinearCombinedEstimator, self).__init__( + model_fn=_model_fn, + model_dir=model_dir, + config=config) + + +@estimator_export('estimator.DNNLinearCombinedRegressor', v1=[]) +class DNNLinearCombinedRegressorV2(estimator.EstimatorV2): + """An estimator for TensorFlow Linear and DNN joined models for regression. + + Note: This estimator is also known as wide-n-deep. + + Example: + + ```python + numeric_feature = numeric_column(...) + categorical_column_a = categorical_column_with_hash_bucket(...) + categorical_column_b = categorical_column_with_hash_bucket(...) + + categorical_feature_a_x_categorical_feature_b = crossed_column(...) + categorical_feature_a_emb = embedding_column( + categorical_column=categorical_feature_a, ...) + categorical_feature_b_emb = embedding_column( + categorical_column=categorical_feature_b, ...) + + estimator = DNNLinearCombinedRegressor( + # wide settings + linear_feature_columns=[categorical_feature_a_x_categorical_feature_b], + linear_optimizer=tf.train.FtrlOptimizer(...), + # deep settings + dnn_feature_columns=[ + categorical_feature_a_emb, categorical_feature_b_emb, + numeric_feature], + dnn_hidden_units=[1000, 500, 100], + dnn_optimizer=tf.train.ProximalAdagradOptimizer(...), + # warm-start settings + warm_start_from="/path/to/checkpoint/dir") + + # To apply L1 and L2 regularization, you can set dnn_optimizer to: + tf.train.ProximalAdagradOptimizer( + learning_rate=0.1, + l1_regularization_strength=0.001, + l2_regularization_strength=0.001) + # To apply learning rate decay, you can set dnn_optimizer to a callable: + lambda: tf.AdamOptimizer( + learning_rate=tf.exponential_decay( + learning_rate=0.1, + global_step=tf.get_global_step(), + decay_steps=10000, + decay_rate=0.96) + # It is the same for linear_optimizer. + + # Input builders + def input_fn_train: + # Returns tf.data.Dataset of (x, y) tuple where y represents label's class + # index. + pass + def input_fn_eval: + # Returns tf.data.Dataset of (x, y) tuple where y represents label's class + # index. + pass + def input_fn_predict: + # Returns tf.data.Dataset of (x, None) tuple. + pass + estimator.train(input_fn=input_fn_train, steps=100) + metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) + predictions = estimator.predict(input_fn=input_fn_predict) + ``` + + Input of `train` and `evaluate` should have following features, + otherwise there will be a `KeyError`: + + * for each `column` in `dnn_feature_columns` + `linear_feature_columns`: + - if `column` is a `_CategoricalColumn`, a feature with `key=column.name` + whose `value` is a `SparseTensor`. + - if `column` is a `_WeightedCategoricalColumn`, two features: the first + with `key` the id column name, the second with `key` the weight column + name. Both features' `value` must be a `SparseTensor`. + - if `column` is a `_DenseColumn`, a feature with `key=column.name` + whose `value` is a `Tensor`. + + Loss is calculated by using mean squared error. + + @compatibility(eager) + Estimators can be used while eager execution is enabled. Note that `input_fn` + and all hooks are executed inside a graph context, so they have to be written + to be compatible with graph mode. Note that `input_fn` code using `tf.data` + generally works in both graph and eager modes. + @end_compatibility + """ + + def __init__(self, + model_dir=None, + linear_feature_columns=None, + linear_optimizer='Ftrl', + dnn_feature_columns=None, + dnn_optimizer='Adagrad', + dnn_hidden_units=None, + dnn_activation_fn=nn.relu, + dnn_dropout=None, + label_dimension=1, + weight_column=None, + config=None, + warm_start_from=None, + loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, + batch_norm=False, + linear_sparse_combiner='sum'): + """Initializes a DNNLinearCombinedRegressor instance. + + Args: + model_dir: Directory to save model parameters, graph and etc. This can + also be used to load checkpoints from the directory into a estimator + to continue training a previously saved model. + linear_feature_columns: An iterable containing all the feature columns + used by linear part of the model. All items in the set must be + instances of classes derived from `FeatureColumn`. + linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to + the linear part of the model. Can also be a string (one of 'Adagrad', + 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL + optimizer. + dnn_feature_columns: An iterable containing all the feature columns used + by deep part of the model. All items in the set must be instances of + classes derived from `FeatureColumn`. + dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to + the deep part of the model. Can also be a string (one of 'Adagrad', + 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to Adagrad + optimizer. + dnn_hidden_units: List of hidden units per layer. All layers are fully + connected. + dnn_activation_fn: Activation function applied to each layer. If None, + will use `tf.nn.relu`. + dnn_dropout: When not None, the probability we will drop out + a given coordinate. + label_dimension: Number of regression targets per example. This is the + size of the last dimension of the labels and logits `Tensor` objects + (typically, these have shape `[batch_size, label_dimension]`). + weight_column: A string or a `_NumericColumn` created by + `tf.feature_column.numeric_column` defining feature column representing + weights. It is used to down weight or boost examples during training. It + will be multiplied by the loss of the example. If it is a string, it is + used as a key to fetch weight tensor from the `features`. If it is a + `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, + then weight_column.normalizer_fn is applied on it to get weight tensor. + config: RunConfig object to configure the runtime settings. + warm_start_from: A string filepath to a checkpoint to warm-start from, or + a `WarmStartSettings` object to fully configure warm-starting. If the + string filepath is provided instead of a `WarmStartSettings`, then all + weights are warm-started, and it is assumed that vocabularies and Tensor + names are unchanged. + loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how + to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. + batch_norm: Whether to use batch normalization after each hidden layer. + linear_sparse_combiner: A string specifying how to reduce the linear model + if a categorical column is multivalent. One of "mean", "sqrtn", and + "sum" -- these are effectively different ways to do example-level + normalization, which can be useful for bag-of-words features. For more + details, see `tf.feature_column.linear_model`. + + Raises: + ValueError: If both linear_feature_columns and dnn_features_columns are + empty at the same time. + """ + self._feature_columns = _validate_feature_columns( + linear_feature_columns=linear_feature_columns, + dnn_feature_columns=dnn_feature_columns) + + head = regression_head.RegressionHead( + label_dimension=label_dimension, + weight_column=weight_column, + loss_reduction=loss_reduction) + + def _model_fn(features, labels, mode, config): + """Call the _dnn_linear_combined_model_fn.""" + return _dnn_linear_combined_model_fn_v2( + features=features, + labels=labels, + mode=mode, + head=head, + linear_feature_columns=linear_feature_columns, + linear_optimizer=linear_optimizer, + dnn_feature_columns=dnn_feature_columns, + dnn_optimizer=dnn_optimizer, + dnn_hidden_units=dnn_hidden_units, + dnn_activation_fn=dnn_activation_fn, + dnn_dropout=dnn_dropout, + config=config, + batch_norm=batch_norm, + linear_sparse_combiner=linear_sparse_combiner) + + super(DNNLinearCombinedRegressorV2, self).__init__( + model_fn=_model_fn, + model_dir=model_dir, + config=config, + warm_start_from=warm_start_from) + + +@estimator_export(v1=['estimator.DNNLinearCombinedRegressor']) # pylint: disable=missing-docstring +class DNNLinearCombinedRegressor(estimator.Estimator): + __doc__ = DNNLinearCombinedRegressorV2.__doc__.replace( + 'SUM_OVER_BATCH_SIZE', 'SUM') + + def __init__(self, + model_dir=None, + linear_feature_columns=None, + linear_optimizer='Ftrl', + dnn_feature_columns=None, + dnn_optimizer='Adagrad', + dnn_hidden_units=None, + dnn_activation_fn=nn.relu, + dnn_dropout=None, + label_dimension=1, + weight_column=None, + input_layer_partitioner=None, + config=None, + warm_start_from=None, + loss_reduction=losses.Reduction.SUM, + batch_norm=False, + linear_sparse_combiner='sum'): + self._feature_columns = _validate_feature_columns( + linear_feature_columns=linear_feature_columns, + dnn_feature_columns=dnn_feature_columns) + + head = head_lib._regression_head( # pylint: disable=protected-access + label_dimension=label_dimension, + weight_column=weight_column, + loss_reduction=loss_reduction) + + def _model_fn(features, labels, mode, config): + """Call the _dnn_linear_combined_model_fn.""" + return _dnn_linear_combined_model_fn( + features=features, + labels=labels, + mode=mode, + head=head, + linear_feature_columns=linear_feature_columns, + linear_optimizer=linear_optimizer, + dnn_feature_columns=dnn_feature_columns, + dnn_optimizer=dnn_optimizer, + dnn_hidden_units=dnn_hidden_units, + dnn_activation_fn=dnn_activation_fn, + dnn_dropout=dnn_dropout, + input_layer_partitioner=input_layer_partitioner, + config=config, + batch_norm=batch_norm, + linear_sparse_combiner=linear_sparse_combiner) + + super(DNNLinearCombinedRegressor, self).__init__( + model_fn=_model_fn, + model_dir=model_dir, + config=config, + warm_start_from=warm_start_from) -- Gitee From c9dd4d2106e7cfd97053859c7abd96c517e7cc74 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 03:33:52 +0000 Subject: [PATCH 17/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json index dc3acd33c..8d6adc339 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "0", - "device_ip": "192.1.2.8", + "device_ip": "192.168.1.194", "rank_id": "0" } ], -- Gitee From d3114029ef8b6b62592d6f9835d620dd4b52cb34 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 03:34:10 +0000 Subject: [PATCH 18/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_0.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json index dc3acd33c..8d6adc339 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "0", - "device_ip": "192.1.2.8", + "device_ip": "192.168.1.194", "rank_id": "0" } ], -- Gitee From 652966d612c2b5637bfaafd5bcf17f7e11b48d07 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 03:34:26 +0000 Subject: [PATCH 19/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_1.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json index 53ae106a0..85c339086 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "1", - "device_ip": "192.1.2.8", + "device_ip": "192.168.1.194", "rank_id": "0" } ], -- Gitee From aed7e4f26014268ee71b01a4e0e08165ad2e97cd Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 03:34:38 +0000 Subject: [PATCH 20/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json index 8d8b18324..75161d569 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "2", - "device_ip": "192.1.2.8", + "device_ip": "192.168.1.194", "rank_id": "0" } ], -- Gitee From 23a24f5d37a10cac74a2ef6fbb5495acf4e7f1aa Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 03:34:51 +0000 Subject: [PATCH 21/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_3.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json index a7a6433b8..1fb30209a 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "3", - "device_ip": "192.1.2.8", + "device_ip": "192.168.1.194", "rank_id": "0" } ], -- Gitee From 0fa37d64ac03059a70d94894dc55aed254cfd853 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 03:35:05 +0000 Subject: [PATCH 22/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_4.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json index 9651658c8..698ceab9b 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "4", - "device_ip": "192.1.2.8", + "device_ip": "192.168.1.194", "rank_id": "0" } ], -- Gitee From 97d3cb241c7c05e5ae28071e0c235158a117b328 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 03:35:19 +0000 Subject: [PATCH 23/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_5.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json index 69b978d67..3578903b6 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "5", - "device_ip": "192.1.2.8", + "device_ip": "192.168.1.194", "rank_id": "0" } ], -- Gitee From 2e670681f43a782857e10794885ec423c9352718 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 03:35:33 +0000 Subject: [PATCH 24/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_6.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json index b094739d2..16945a9b2 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "6", - "device_ip": "192.1.2.8", + "device_ip": "192.168.1.194", "rank_id": "0" } ], -- Gitee From c7314332179532cfb800e161df1c62b037510d6b Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 03:35:46 +0000 Subject: [PATCH 25/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_7.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json index 00a472e8d..f684200e0 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "7", - "device_ip": "192.1.2.8", + "device_ip": "192.168.1.194", "rank_id": "0" } ], -- Gitee From 210aea55304bc8bd86ddd5a8fcb85b028d6012e9 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 03:40:23 +0000 Subject: [PATCH 26/69] update WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh. --- .../WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh index 1cfb02f3e..51ccd6c49 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh @@ -97,7 +97,7 @@ else fi if [ -d $cur_path/../config/1p_$ASCEND_DEVICE.json ];then - export RANK_TABLE_FILE=$cur_path/../config/1p_$ASCEND_DEVICE.json + export RANK_TABLE_FILE=$cur_path/../config/1p_$ASCEND_DEVICE_ID.json export RANK_ID=0 else export RANK_TABLE_FILE=$cur_path/../config/1p_0.json -- Gitee From 4e14b2b1e82a79a00c148f27efca6d705cff99a0 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 03:51:22 +0000 Subject: [PATCH 27/69] update WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh. --- .../test/train_performance_1p.sh | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh index 51ccd6c49..94c1f3844 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh @@ -96,13 +96,9 @@ else mkdir -p $cur_path/output/$ASCEND_DEVICE_ID/ckpt fi -if [ -d $cur_path/../config/1p_$ASCEND_DEVICE.json ];then - export RANK_TABLE_FILE=$cur_path/../config/1p_$ASCEND_DEVICE_ID.json - export RANK_ID=0 -else - export RANK_TABLE_FILE=$cur_path/../config/1p_0.json - export RANK_ID=0 -fi +export RANK_TABLE_FILE=$cur_path/../config/1p_$ASCEND_DEVICE_ID.json +export RANK_ID=0 + wait cd $cur_path/../ -- Gitee From 75a193619f12ffaf5df4dadeb34cca722c681d54 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 05:54:40 +0000 Subject: [PATCH 28/69] update WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh. --- .../test/train_performance_1p.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh index 94c1f3844..48fb10f54 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh @@ -96,8 +96,8 @@ else mkdir -p $cur_path/output/$ASCEND_DEVICE_ID/ckpt fi -export RANK_TABLE_FILE=$cur_path/../config/1p_$ASCEND_DEVICE_ID.json -export RANK_ID=0 +#export RANK_TABLE_FILE=$cur_path/../config/1p_$ASCEND_DEVICE_ID.json +#export RANK_ID=0 wait -- Gitee From 2ea5e70f16b2f2a37e72100ad9813c22640af236 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:04:59 +0000 Subject: [PATCH 29/69] update WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh. --- .../test/train_performance_1p.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh index 48fb10f54..054a0fb51 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh @@ -96,8 +96,13 @@ else mkdir -p $cur_path/output/$ASCEND_DEVICE_ID/ckpt fi -#export RANK_TABLE_FILE=$cur_path/../config/1p_$ASCEND_DEVICE_ID.json -#export RANK_ID=0 +if [ -f $cur_path/../config/1p_$ASCEND_DEVICE.json ];then + export RANK_TABLE_FILE=$cur_path/../config/1p_$ASCEND_DEVICE.json + export RANK_ID=0 +else + export RANK_TABLE_FILE=$cur_path/../config/1p_0.json + export RANK_ID=0 +fi wait -- Gitee From 482f48483aee60845317e88cfa28881ceb650dd2 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:07:03 +0000 Subject: [PATCH 30/69] update WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh. --- .../test/train_performance_1p.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh index 054a0fb51..ce6d84fe5 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh @@ -96,8 +96,8 @@ else mkdir -p $cur_path/output/$ASCEND_DEVICE_ID/ckpt fi -if [ -f $cur_path/../config/1p_$ASCEND_DEVICE.json ];then - export RANK_TABLE_FILE=$cur_path/../config/1p_$ASCEND_DEVICE.json +if [ -f $cur_path/../config/1p_$ASCEND_DEVICE_ID.json ];then + export RANK_TABLE_FILE=$cur_path/../config/1p_$ASCEND_DEVICE_ID.json export RANK_ID=0 else export RANK_TABLE_FILE=$cur_path/../config/1p_0.json -- Gitee From 5072ca77d496631ae64014666be42a12933b45f3 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:09:38 +0000 Subject: [PATCH 31/69] update WideDeep_ID2940_for_TensorFlow/config/1p.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json index 8d6adc339..e635f956f 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "0", - "device_ip": "192.168.1.194", + "device_ip": "192.168.1.195", "rank_id": "0" } ], -- Gitee From fb95f734600b80a06d2feee4df07269a052a1230 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:10:09 +0000 Subject: [PATCH 32/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_0.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json index 8d6adc339..e635f956f 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "0", - "device_ip": "192.168.1.194", + "device_ip": "192.168.1.195", "rank_id": "0" } ], -- Gitee From cf5ae9f54d265aa552e36f5cc6000c7e8e4d4990 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:10:20 +0000 Subject: [PATCH 33/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_1.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json index 85c339086..40b0ffc1a 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "1", - "device_ip": "192.168.1.194", + "device_ip": "192.168.1.195", "rank_id": "0" } ], -- Gitee From c4b7c24482b8e3bac829922bc5c674021294c549 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:10:35 +0000 Subject: [PATCH 34/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json index 75161d569..c62f5051a 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "2", - "device_ip": "192.168.1.194", + "device_ip": "192.168.1.195", "rank_id": "0" } ], -- Gitee From f09d5e3d00a6e0c81d9a448e03d0f6d0c422cc79 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:10:47 +0000 Subject: [PATCH 35/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_3.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json index 1fb30209a..5377e4009 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "3", - "device_ip": "192.168.1.194", + "device_ip": "192.168.1.195", "rank_id": "0" } ], -- Gitee From 90d45851392d4cd2be74c128865f53da0a06da84 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:11:00 +0000 Subject: [PATCH 36/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_4.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json index 698ceab9b..ba5df8845 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "4", - "device_ip": "192.168.1.194", + "device_ip": "192.168.1.195", "rank_id": "0" } ], -- Gitee From b2bac40e2eecc7fefe831ac0b01b03ddf28d7f08 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:11:13 +0000 Subject: [PATCH 37/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_5.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json index 3578903b6..64ab04c28 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "5", - "device_ip": "192.168.1.194", + "device_ip": "192.168.1.195", "rank_id": "0" } ], -- Gitee From ac368602eff17f9ee291ce11f94bd290c471bc5e Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:11:29 +0000 Subject: [PATCH 38/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_6.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json index 16945a9b2..6c2a94a12 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "6", - "device_ip": "192.168.1.194", + "device_ip": "192.168.1.195", "rank_id": "0" } ], -- Gitee From 0fe18f09e671d404c6ec4772a90aec25276dd262 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:11:43 +0000 Subject: [PATCH 39/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_7.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json index f684200e0..f14ded77a 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "7", - "device_ip": "192.168.1.194", + "device_ip": "192.168.1.195", "rank_id": "0" } ], -- Gitee From e57abe4d2754cb3fd0a1fd764fcff4e5f5310bea Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:30:44 +0000 Subject: [PATCH 40/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json index e635f956f..2ac630a2a 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "0", - "device_ip": "192.168.1.195", + "device_ip": "192.168.30.65", "rank_id": "0" } ], -- Gitee From e3664c9241a2cf4c4f27ac1eb4b6639df436fbd9 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:30:58 +0000 Subject: [PATCH 41/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_0.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json index e635f956f..2ac630a2a 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "0", - "device_ip": "192.168.1.195", + "device_ip": "192.168.30.65", "rank_id": "0" } ], -- Gitee From 64a621fb723bff48a0d027ce81e0aba29c58e2b6 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:31:13 +0000 Subject: [PATCH 42/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json index c62f5051a..2df299ac1 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "2", - "device_ip": "192.168.1.195", + "device_ip": "192.168.30.65", "rank_id": "0" } ], -- Gitee From fde1dbf7838559ce81f34cefae446289b89b910d Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:31:26 +0000 Subject: [PATCH 43/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_3.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json index 5377e4009..70141c0be 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "3", - "device_ip": "192.168.1.195", + "device_ip": "192.168.30.65", "rank_id": "0" } ], -- Gitee From 19be5e7da54c7e3f751dcd7af6b246568c83ba3c Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:31:41 +0000 Subject: [PATCH 44/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_4.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json index ba5df8845..8896206bb 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "4", - "device_ip": "192.168.1.195", + "device_ip": "192.168.30.65", "rank_id": "0" } ], -- Gitee From f8f6c4f765395835dac72e602d225c35f78179a0 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:31:54 +0000 Subject: [PATCH 45/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_5.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json index 64ab04c28..d17014d19 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "5", - "device_ip": "192.168.1.195", + "device_ip": "192.168.30.65", "rank_id": "0" } ], -- Gitee From 699121c9cf77f87ddb7c5414fd6b94aa74f5b591 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:32:07 +0000 Subject: [PATCH 46/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_6.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json index 6c2a94a12..88e4cbcf8 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "6", - "device_ip": "192.168.1.195", + "device_ip": "192.168.30.65", "rank_id": "0" } ], -- Gitee From 2c2f48d1f5a943d6cc9e62ff6932ea23b0b4ee93 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:32:22 +0000 Subject: [PATCH 47/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_7.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json index f14ded77a..e030c1394 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "7", - "device_ip": "192.168.1.195", + "device_ip": "192.168.30.65", "rank_id": "0" } ], -- Gitee From bce8af0bbfcf676fb5acb5eb8e311b9ae33da3bd Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:40:42 +0000 Subject: [PATCH 48/69] update WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh. --- .../WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh index ce6d84fe5..6d876fbc2 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh @@ -99,9 +99,11 @@ fi if [ -f $cur_path/../config/1p_$ASCEND_DEVICE_ID.json ];then export RANK_TABLE_FILE=$cur_path/../config/1p_$ASCEND_DEVICE_ID.json export RANK_ID=0 + echo $RANK_TABLE_FILE else export RANK_TABLE_FILE=$cur_path/../config/1p_0.json export RANK_ID=0 + echo $RANK_TABLE_FILE fi wait -- Gitee From 6b3430f2629fae942f8de949abf06def588e3b5b Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:54:47 +0000 Subject: [PATCH 49/69] update WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh. --- .../test/train_performance_1p.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh index 6d876fbc2..5bcd8bf52 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh @@ -99,11 +99,11 @@ fi if [ -f $cur_path/../config/1p_$ASCEND_DEVICE_ID.json ];then export RANK_TABLE_FILE=$cur_path/../config/1p_$ASCEND_DEVICE_ID.json export RANK_ID=0 - echo $RANK_TABLE_FILE + env else export RANK_TABLE_FILE=$cur_path/../config/1p_0.json export RANK_ID=0 - echo $RANK_TABLE_FILE + env fi wait -- Gitee From 316a9ee52e4c41e19f9694930e9fd8fc58a701b5 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 06:58:39 +0000 Subject: [PATCH 50/69] update WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh. --- .../test/train_performance_1p.sh | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh index 5bcd8bf52..19bf4ac78 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh @@ -96,15 +96,16 @@ else mkdir -p $cur_path/output/$ASCEND_DEVICE_ID/ckpt fi -if [ -f $cur_path/../config/1p_$ASCEND_DEVICE_ID.json ];then - export RANK_TABLE_FILE=$cur_path/../config/1p_$ASCEND_DEVICE_ID.json - export RANK_ID=0 - env -else - export RANK_TABLE_FILE=$cur_path/../config/1p_0.json - export RANK_ID=0 - env -fi +export RANK_ID=0 +#if [ -f $cur_path/../config/1p_$ASCEND_DEVICE_ID.json ];then +# export RANK_TABLE_FILE=$cur_path/../config/1p_$ASCEND_DEVICE_ID.json +# export RANK_ID=0 +# env +#else +# export RANK_TABLE_FILE=$cur_path/../config/1p_0.json +# export RANK_ID=0 +# env +#fi wait -- Gitee From 2917e7f3342fc62c1525df4d3502850e64889b1d Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 07:24:27 +0000 Subject: [PATCH 51/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/trainer/task.py. --- .../WideDeep_ID2940_for_TensorFlow/trainer/task.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/trainer/task.py b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/trainer/task.py index c87fc83ed..99e796eb3 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/trainer/task.py +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/trainer/task.py @@ -27,10 +27,6 @@ import tensorflow as tf import tensorflow_transform as tft from tensorflow.core.protobuf import rewriter_config_pb2 from trainer import features -#from utils.dataloader import separate_input_fn -#from utils.hooks.benchmark_hooks import BenchmarkLoggingHook -#from utils.metrics import map_custom_metric, map_custom_metric_with_leak -#from utils.schedulers import learning_rate_scheduler from util.dataloader import separate_input_fn from util.hooks.benchmark_hooks import BenchmarkLoggingHook from util.metrics import map_custom_metric, map_custom_metric_with_leak @@ -369,12 +365,6 @@ def main(FLAGS): int(FLAGS.eval_epoch_interval * steps_per_epoch) count_steps = FLAGS.benchmark_steps + 1 if FLAGS.benchmark else 100 - #run_config = tf.estimator.RunConfig(model_dir=model_dir, save_summary_steps=0) \ - # .replace(session_config=session_config, - # save_checkpoints_steps=save_checkpoints_steps, - # save_summary_steps=count_steps, - # log_step_count_steps=count_steps, - # keep_checkpoint_max=1) run_config = tf.estimator.RunConfig(model_dir=model_dir, save_summary_steps=0, session_config=session_config, save_checkpoints_steps=save_checkpoints_steps, log_step_count_steps=count_steps, keep_checkpoint_max=1) def wide_optimizer(): -- Gitee From fd2561b0e3937ae63b5fe8412ac0c477cf0da2d7 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 07:40:56 +0000 Subject: [PATCH 52/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_0.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json index 2ac630a2a..dc3acd33c 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "0", - "device_ip": "192.168.30.65", + "device_ip": "192.1.2.8", "rank_id": "0" } ], -- Gitee From 079ad8ead44e0bb83fdac1f6ac10d77c450ad4a5 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 07:41:10 +0000 Subject: [PATCH 53/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_1.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json index 40b0ffc1a..a274a0e67 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "1", - "device_ip": "192.168.1.195", + "device_ip": "192.2.2.8", "rank_id": "0" } ], -- Gitee From bc5c9a97bfba97e545b8e18071af7ef6793e2996 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 07:41:24 +0000 Subject: [PATCH 54/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json index 2df299ac1..046e209a3 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "2", - "device_ip": "192.168.30.65", + "device_ip": "192.3.2.8", "rank_id": "0" } ], -- Gitee From 6686d515a85104d74e11bc0aadc6d1b1957cb98d Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 07:41:43 +0000 Subject: [PATCH 55/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_3.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json index 70141c0be..4a42fd88c 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "3", - "device_ip": "192.168.30.65", + "device_ip": "192.4.2.8", "rank_id": "0" } ], -- Gitee From b5455900c025868894f420b725f92038fb6bf8f1 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 07:42:01 +0000 Subject: [PATCH 56/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_4.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json index 8896206bb..bf95880d7 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "4", - "device_ip": "192.168.30.65", + "device_ip": "192.1.2.9", "rank_id": "0" } ], -- Gitee From cc4d5437a4e01a6a1cd4448ae464236076740d62 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 07:42:16 +0000 Subject: [PATCH 57/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_5.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json index d17014d19..4503bc30e 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "5", - "device_ip": "192.168.30.65", + "device_ip": "192.2.2.9", "rank_id": "0" } ], -- Gitee From 04025f4908bdf11795910373536f30eaf0afb2f1 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 07:42:31 +0000 Subject: [PATCH 58/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_6.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json index 88e4cbcf8..144d2df41 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "6", - "device_ip": "192.168.30.65", + "device_ip": "192.3.2.9", "rank_id": "0" } ], -- Gitee From 784858a64e002846397322b87c651c38939f2c39 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 07:42:46 +0000 Subject: [PATCH 59/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json. --- .../WideDeep_ID2940_for_TensorFlow/config/1p_7.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json index e030c1394..581e6fc89 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json @@ -6,7 +6,7 @@ "devices": [ { "device_id": "7", - "device_ip": "192.168.30.65", + "device_ip": "192.4.2.9", "rank_id": "0" } ], -- Gitee From 36deaef1b1bce97d6b3ed97ca0133a315686d74d Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 07:43:32 +0000 Subject: [PATCH 60/69] update WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh. --- .../test/train_performance_1p.sh | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh index 19bf4ac78..ce6d84fe5 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_performance_1p.sh @@ -96,16 +96,13 @@ else mkdir -p $cur_path/output/$ASCEND_DEVICE_ID/ckpt fi -export RANK_ID=0 -#if [ -f $cur_path/../config/1p_$ASCEND_DEVICE_ID.json ];then -# export RANK_TABLE_FILE=$cur_path/../config/1p_$ASCEND_DEVICE_ID.json -# export RANK_ID=0 -# env -#else -# export RANK_TABLE_FILE=$cur_path/../config/1p_0.json -# export RANK_ID=0 -# env -#fi +if [ -f $cur_path/../config/1p_$ASCEND_DEVICE_ID.json ];then + export RANK_TABLE_FILE=$cur_path/../config/1p_$ASCEND_DEVICE_ID.json + export RANK_ID=0 +else + export RANK_TABLE_FILE=$cur_path/../config/1p_0.json + export RANK_ID=0 +fi wait -- Gitee From 5d70950fcd62f55f30ec586496f93745d6c11ed4 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 09:31:24 +0000 Subject: [PATCH 61/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json. --- .../config/1p.json | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json index 2ac630a2a..28426dea5 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p.json @@ -1,18 +1,8 @@ { - "server_count": "1", - "server_list": - [ - { - "devices": [ - { - "device_id": "0", - "device_ip": "192.168.30.65", - "rank_id": "0" - } - ], - "server_id": "127.0.0.1" - } - ], - "status": "completed", - "version":"1.0" -} \ No newline at end of file +"server_count":"1", +"server_list":[{ + "device":[{"device_id":"0","device_ip":"192.168.1.195","rank_id":"0"}], + "server_id":"127.0.0.1"}], +"status":"completed", +"version":"1.0" +} -- Gitee From d56911939f616c3d71b352c280bf1e2ae569a5d5 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 09:31:46 +0000 Subject: [PATCH 62/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json. --- .../config/1p_0.json | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json index dc3acd33c..28426dea5 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_0.json @@ -1,18 +1,8 @@ { - "server_count": "1", - "server_list": - [ - { - "devices": [ - { - "device_id": "0", - "device_ip": "192.1.2.8", - "rank_id": "0" - } - ], - "server_id": "127.0.0.1" - } - ], - "status": "completed", - "version":"1.0" -} \ No newline at end of file +"server_count":"1", +"server_list":[{ + "device":[{"device_id":"0","device_ip":"192.168.1.195","rank_id":"0"}], + "server_id":"127.0.0.1"}], +"status":"completed", +"version":"1.0" +} -- Gitee From 485c2605d82342eb686c6881fe50bb9f6e4c5440 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 09:32:06 +0000 Subject: [PATCH 63/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json. --- .../config/1p_1.json | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json index a274a0e67..d3e1c570b 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_1.json @@ -1,18 +1,8 @@ { - "server_count": "1", - "server_list": - [ - { - "devices": [ - { - "device_id": "1", - "device_ip": "192.2.2.8", - "rank_id": "0" - } - ], - "server_id": "127.0.0.1" - } - ], - "status": "completed", - "version":"1.0" -} \ No newline at end of file +"server_count":"1", +"server_list":[{ + "device":[{"device_id":"1","device_ip":"192.168.1.195","rank_id":"0"}], + "server_id":"127.0.0.1"}], +"status":"completed", +"version":"1.0" +} -- Gitee From 1a27b754522f653f5404726ad613762d3ab279a5 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 09:32:22 +0000 Subject: [PATCH 64/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json. --- .../config/1p_2.json | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json index 046e209a3..93c4a960c 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_2.json @@ -1,18 +1,8 @@ { - "server_count": "1", - "server_list": - [ - { - "devices": [ - { - "device_id": "2", - "device_ip": "192.3.2.8", - "rank_id": "0" - } - ], - "server_id": "127.0.0.1" - } - ], - "status": "completed", - "version":"1.0" -} \ No newline at end of file +"server_count":"1", +"server_list":[{ + "device":[{"device_id":"2","device_ip":"192.168.1.195","rank_id":"0"}], + "server_id":"127.0.0.1"}], +"status":"completed", +"version":"1.0" +} -- Gitee From 201e36e2af025272a0e0c9aeca65cd709e85845b Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 09:32:35 +0000 Subject: [PATCH 65/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json. --- .../config/1p_3.json | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json index 4a42fd88c..4ed1fcf81 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_3.json @@ -1,18 +1,8 @@ { - "server_count": "1", - "server_list": - [ - { - "devices": [ - { - "device_id": "3", - "device_ip": "192.4.2.8", - "rank_id": "0" - } - ], - "server_id": "127.0.0.1" - } - ], - "status": "completed", - "version":"1.0" -} \ No newline at end of file +"server_count":"1", +"server_list":[{ + "device":[{"device_id":"3","device_ip":"192.168.1.195","rank_id":"0"}], + "server_id":"127.0.0.1"}], +"status":"completed", +"version":"1.0" +} -- Gitee From f5eb4f985903c80c15c72d6b23dd23f2a594eb5d Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 09:32:48 +0000 Subject: [PATCH 66/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json. --- .../config/1p_4.json | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json index bf95880d7..cdde74396 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_4.json @@ -1,18 +1,8 @@ { - "server_count": "1", - "server_list": - [ - { - "devices": [ - { - "device_id": "4", - "device_ip": "192.1.2.9", - "rank_id": "0" - } - ], - "server_id": "127.0.0.1" - } - ], - "status": "completed", - "version":"1.0" -} \ No newline at end of file +"server_count":"1", +"server_list":[{ + "device":[{"device_id":"4","device_ip":"192.168.1.195","rank_id":"0"}], + "server_id":"127.0.0.1"}], +"status":"completed", +"version":"1.0" +} -- Gitee From 8af00d1deb64894adebf1568ab304cb5008c8f16 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 09:33:02 +0000 Subject: [PATCH 67/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json. --- .../config/1p_5.json | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json index 4503bc30e..c0a789029 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_5.json @@ -1,18 +1,8 @@ { - "server_count": "1", - "server_list": - [ - { - "devices": [ - { - "device_id": "5", - "device_ip": "192.2.2.9", - "rank_id": "0" - } - ], - "server_id": "127.0.0.1" - } - ], - "status": "completed", - "version":"1.0" -} \ No newline at end of file +"server_count":"1", +"server_list":[{ + "device":[{"device_id":"5","device_ip":"192.168.1.195","rank_id":"0"}], + "server_id":"127.0.0.1"}], +"status":"completed", +"version":"1.0" +} -- Gitee From 6008fdbdf0f8565352be48da7a5bf83a8530dc65 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 09:33:22 +0000 Subject: [PATCH 68/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json. --- .../config/1p_6.json | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json index 144d2df41..2c4b32112 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_6.json @@ -1,18 +1,8 @@ { - "server_count": "1", - "server_list": - [ - { - "devices": [ - { - "device_id": "6", - "device_ip": "192.3.2.9", - "rank_id": "0" - } - ], - "server_id": "127.0.0.1" - } - ], - "status": "completed", - "version":"1.0" -} \ No newline at end of file +"server_count":"1", +"server_list":[{ + "device":[{"device_id":"6","device_ip":"192.168.1.195","rank_id":"0"}], + "server_id":"127.0.0.1"}], +"status":"completed", +"version":"1.0" +} -- Gitee From 70cdd934949e9b7fdbd7a4a0aee66304690460d2 Mon Sep 17 00:00:00 2001 From: majun121 <867479212@qq.com> Date: Tue, 19 Apr 2022 09:33:36 +0000 Subject: [PATCH 69/69] update TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json. --- .../config/1p_7.json | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json index 581e6fc89..01399b72c 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/config/1p_7.json @@ -1,18 +1,8 @@ { - "server_count": "1", - "server_list": - [ - { - "devices": [ - { - "device_id": "7", - "device_ip": "192.4.2.9", - "rank_id": "0" - } - ], - "server_id": "127.0.0.1" - } - ], - "status": "completed", - "version":"1.0" -} \ No newline at end of file +"server_count":"1", +"server_list":[{ + "device":[{"device_id":"7","device_ip":"192.168.1.195","rank_id":"0"}], + "server_id":"127.0.0.1"}], +"status":"completed", +"version":"1.0" +} -- Gitee