diff --git a/component/taskd/taskd/go/framework_backend/manager/manager.go b/component/taskd/taskd/go/framework_backend/manager/manager.go index 4579ea24063b9497c53f90d79d7e07c054ac2b2a..096aa3847cbdc490206a47d09614f729dc9207a6 100644 --- a/component/taskd/taskd/go/framework_backend/manager/manager.go +++ b/component/taskd/taskd/go/framework_backend/manager/manager.go @@ -88,6 +88,7 @@ func (m *BaseManager) Init() error { fmt.Printf("manager init hwlog failed, err: %v \n", err) return err } + hwlog.RunLog.Infof("manager config: %v", m.Config) m.svcCtx, m.cancelFunc = context.WithCancel(context.Background()) m.MsgHd = application.NewMsgHandler() m.MsgHd.Start(m.svcCtx) @@ -153,6 +154,7 @@ func (m *BaseManager) registerClusterD(retryTime time.Duration) { hwlog.RunLog.Errorf("get clusterd address err: %v", err) return } + hwlog.RunLog.Infof("get clusterd addr %v", addr) conn, err := grpc.Dial(addr, grpc.WithTransportCredentials(insecure.NewCredentials())) if err != nil { hwlog.RunLog.Errorf("init clusterd connect err: %v", err) diff --git a/component/taskd/taskd/go/framework_backend/worker/worker.go b/component/taskd/taskd/go/framework_backend/worker/worker.go index 59ff21b56085b92cd1962d10aca9a50bf2682bcb..220d69e4ba80cd43d7c8de91487e7efeebe1544e 100644 --- a/component/taskd/taskd/go/framework_backend/worker/worker.go +++ b/component/taskd/taskd/go/framework_backend/worker/worker.go @@ -18,7 +18,6 @@ package worker import "C" import ( "context" - "os" "strconv" "time" @@ -35,7 +34,7 @@ var monitorInitCtx context.Context var monitorInitNotify context.CancelFunc const ( - waitInitMsptiTimeout = 60 * time.Second + waitInitMsptiTimeout = 180 * time.Second ) func init() { @@ -59,13 +58,10 @@ func InitMonitor(ctx context.Context, globalRank int, upperLimitOfDiskInMb int) // InitNetwork register worker to manager func InitNetwork(globalRank, nodeRank int) { + hwlog.RunLog.Infof("worker %d init network begin", globalRank) profiling.GlobalRank = globalRank profiling.NodeRank = nodeRank - ip := os.Getenv("POD_IP") - if ip == "" { - ip = "127.0.0.1" - } - addr := ip + constant.ProxyPort + addr := constant.DefaultIP + constant.ProxyPort var err error netTool, err = net.InitNetwork(&common.TaskNetConfig{ Pos: common.Position{ @@ -81,6 +77,7 @@ func InitNetwork(globalRank, nodeRank int) { if err != nil { hwlog.RunLog.Errorf("worker %d init network err: %v", globalRank, err) } + hwlog.RunLog.Infof("worker %d init network end", globalRank) profiling.NetTool = netTool profiling.NetToolInitNotify() }