diff --git a/component/taskd/taskd/python/framework/agent/base_agent/agent_network.py b/component/taskd/taskd/python/framework/agent/base_agent/agent_network.py index 6accd3bb483b20ea2886a156af0350c38c20b8c5..e565b551907a220e3df8988dffea2334c454f806 100644 --- a/component/taskd/taskd/python/framework/agent/base_agent/agent_network.py +++ b/component/taskd/taskd/python/framework/agent/base_agent/agent_network.py @@ -121,6 +121,12 @@ class AgentMessageManager(): self.lib.DestroyNetwork(self._network_instance) return + def get_network_instance(self): + """ + Get network instance. + """ + return self._network_instance + def _parse_msg(self, msg_json) -> MsgBody: """ Parse message from taskd manager. @@ -170,6 +176,19 @@ def init_message_manager(network_config, msg_queue): run_log.error("network_config is None!") raise Exception("network_config is None!") msg_manager = AgentMessageManager(network_config, msg_queue) + + time_use = 0 + while True: + if time_use > 60: + run_log.error("init message manager failed!") + return + if msg_manager.get_network_instance() is not None: + run_log.info("init message manager success!") + break + time.sleep(1) + time_use += 1 + run_log.info("wait get_network_instance") + msg_manager.register(network_config.pos.server_rank) msg_manager.receive_message() @@ -185,4 +204,17 @@ def network_send_message(msg :MessageInfo): Send message to taskd manager. """ msg_manager = get_message_manager() + if msg_manager.get_network_instance() is None: + run_log.warning("network instance is None!") + return msg_manager.send_message(msg) + + +def get_msg_network_instance(): + """ + Get network instance. + """ + msg_manager = get_message_manager() + if msg_manager is None: + return None + return msg_manager.get_network_instance() diff --git a/component/taskd/taskd/python/framework/agent/base_agent/base_agent.py b/component/taskd/taskd/python/framework/agent/base_agent/base_agent.py index 4ec66b92e0cf0b12862484339da17a228f9fc6e2..f024db6613988069307204ca3dca0f8ad3252341 100644 --- a/component/taskd/taskd/python/framework/agent/base_agent/base_agent.py +++ b/component/taskd/taskd/python/framework/agent/base_agent/base_agent.py @@ -22,7 +22,8 @@ import uuid from dataclasses import asdict from taskd.python.utils.log import run_log from taskd.python.toolkit.fault_checker.fault_check import grace_exit_pids, stop_pids -from taskd.python.framework.agent.base_agent.agent_network import get_message_manager, network_send_message +from taskd.python.framework.agent.base_agent.agent_network import get_message_manager, network_send_message,\ + get_msg_network_instance from taskd.python.framework.common.type import MsgBody, MessageInfo @@ -115,7 +116,7 @@ class BaseAgent: if time_cost > 60: run_log.error('waiting for message manager timeout') raise ValueError("failed to initialized agent network, initialization message_manager timeout") - if get_message_manager() is None: + if get_msg_network_instance() is None: run_log.info('waiting for message manager') time.sleep(1) time_cost += 1