From 704baec0adab5f05b8b48a184f7639042da60dfb Mon Sep 17 00:00:00 2001
From: daishine <daisy_chengjh@163.com>
Date: Tue, 26 Dec 2023 10:20:29 +0800
Subject: [PATCH 1/5] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E5=BB=BA?=
 =?UTF-8?q?=E9=93=BE=E5=A2=9E=E5=8A=A0=E6=97=A5=E5=BF=97=E6=89=93=E5=8D=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 torch_npu/csrc/distributed/ProcessGroupHCCL.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp
index 46ec61a5fa5..59346e88536 100644
--- a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp
+++ b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp
@@ -990,23 +990,25 @@ std::vector<std::shared_ptr<HCCLComm>>& ProcessGroupHCCL::getHCCLComm(
   if (rank_ == 0) {
     HCCL_CHECK_ERROR(HcclGetRootInfo(&hcclID));
   }
+  ASCEND_LOGE("start broadcast master id----------")
   broadcastMasterID(&hcclID);
-
+  ASCEND_LOGE("end broadcast master id----------")
   c10_npu::OptionalNPUGuard npuGuard;
   std::vector<c10_npu::NPUStream> streamVal;
   streamVal.reserve(devices.size());
-
+  fprintf()
   for (size_t i = 0; i < devices.size(); ++i) {
     int numRanks = getSize();
     int rank = getRank() * devices.size() + i;
 
     npuGuard.set_index(devices[i].index());
+    ASCEND_LOGE("start create hccl com, rank %d ----------",rank)
     hcclComms[i] = HCCLComm::create(numRanks, rank, hcclID);
-
+    ASCEND_LOGE("end create hccl com, rank %d ----------",rank)
     // Creates the HCCL streams
     streamVal.push_back(c10_npu::getNPUStreamFromPool(devices[i].index()));
   }
-
+  ASCEND_LOGE("end hccl com ----------")
   hcclStreams_.emplace(devicesKey, std::move(streamVal));
 
   // Note: these events are created with the (default) cudaEventDisableTiming
-- 
Gitee


From 1dbc5a64f7cdb96fdae1f6d4df67cafc261d20d0 Mon Sep 17 00:00:00 2001
From: daishine <daisy_chengjh@163.com>
Date: Tue, 26 Dec 2023 10:20:29 +0800
Subject: [PATCH 2/5] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E5=BB=BA?=
 =?UTF-8?q?=E9=93=BE=E5=A2=9E=E5=8A=A0=E6=97=A5=E5=BF=97=E6=89=93=E5=8D=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 torch_npu/csrc/distributed/ProcessGroupHCCL.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp
index 46ec61a5fa5..f708cd2aac2 100644
--- a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp
+++ b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp
@@ -990,8 +990,9 @@ std::vector<std::shared_ptr<HCCLComm>>& ProcessGroupHCCL::getHCCLComm(
   if (rank_ == 0) {
     HCCL_CHECK_ERROR(HcclGetRootInfo(&hcclID));
   }
+  ASCEND_LOGE("start broadcast master id----------")
   broadcastMasterID(&hcclID);
-
+  ASCEND_LOGE("end broadcast master id----------")
   c10_npu::OptionalNPUGuard npuGuard;
   std::vector<c10_npu::NPUStream> streamVal;
   streamVal.reserve(devices.size());
@@ -1001,12 +1002,13 @@ std::vector<std::shared_ptr<HCCLComm>>& ProcessGroupHCCL::getHCCLComm(
     int rank = getRank() * devices.size() + i;
 
     npuGuard.set_index(devices[i].index());
+    ASCEND_LOGE("start create hccl com, rank %d ----------", rank)
     hcclComms[i] = HCCLComm::create(numRanks, rank, hcclID);
-
+    ASCEND_LOGE("end create hccl com, rank %d ----------", rank)
     // Creates the HCCL streams
     streamVal.push_back(c10_npu::getNPUStreamFromPool(devices[i].index()));
   }
-
+  ASCEND_LOGE("end hccl com ----------")
   hcclStreams_.emplace(devicesKey, std::move(streamVal));
 
   // Note: these events are created with the (default) cudaEventDisableTiming
-- 
Gitee


From 14713c3e0fdfd7139014a9ab9901c68a9d3eb711 Mon Sep 17 00:00:00 2001
From: daishine <daisy_chengjh@163.com>
Date: Tue, 26 Dec 2023 10:51:25 +0800
Subject: [PATCH 3/5] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E5=BB=BA?=
 =?UTF-8?q?=E9=93=BE=E5=A2=9E=E5=8A=A0=E6=97=A5=E5=BF=97=E6=89=93=E5=8D=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 torch_npu/csrc/distributed/ProcessGroupHCCL.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp
index 46ec61a5fa5..f708cd2aac2 100644
--- a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp
+++ b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp
@@ -990,8 +990,9 @@ std::vector<std::shared_ptr<HCCLComm>>& ProcessGroupHCCL::getHCCLComm(
   if (rank_ == 0) {
     HCCL_CHECK_ERROR(HcclGetRootInfo(&hcclID));
   }
+  ASCEND_LOGE("start broadcast master id----------")
   broadcastMasterID(&hcclID);
-
+  ASCEND_LOGE("end broadcast master id----------")
   c10_npu::OptionalNPUGuard npuGuard;
   std::vector<c10_npu::NPUStream> streamVal;
   streamVal.reserve(devices.size());
@@ -1001,12 +1002,13 @@ std::vector<std::shared_ptr<HCCLComm>>& ProcessGroupHCCL::getHCCLComm(
     int rank = getRank() * devices.size() + i;
 
     npuGuard.set_index(devices[i].index());
+    ASCEND_LOGE("start create hccl com, rank %d ----------", rank)
     hcclComms[i] = HCCLComm::create(numRanks, rank, hcclID);
-
+    ASCEND_LOGE("end create hccl com, rank %d ----------", rank)
     // Creates the HCCL streams
     streamVal.push_back(c10_npu::getNPUStreamFromPool(devices[i].index()));
   }
-
+  ASCEND_LOGE("end hccl com ----------")
   hcclStreams_.emplace(devicesKey, std::move(streamVal));
 
   // Note: these events are created with the (default) cudaEventDisableTiming
-- 
Gitee


From 599b7b534f1ed409ae4005629ce109e04b32be4f Mon Sep 17 00:00:00 2001
From: daishine <daisy_chengjh@163.com>
Date: Tue, 26 Dec 2023 11:47:07 +0800
Subject: [PATCH 4/5] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E5=BB=BA?=
 =?UTF-8?q?=E9=93=BE=E5=A2=9E=E5=8A=A0=E6=97=A5=E5=BF=97=E6=89=93=E5=8D=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 torch_npu/csrc/distributed/ProcessGroupHCCL.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp
index f708cd2aac2..cc7dffed614 100644
--- a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp
+++ b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp
@@ -990,9 +990,9 @@ std::vector<std::shared_ptr<HCCLComm>>& ProcessGroupHCCL::getHCCLComm(
   if (rank_ == 0) {
     HCCL_CHECK_ERROR(HcclGetRootInfo(&hcclID));
   }
-  ASCEND_LOGE("start broadcast master id----------")
+  ASCEND_LOGE("start broadcast master id----------");
   broadcastMasterID(&hcclID);
-  ASCEND_LOGE("end broadcast master id----------")
+  ASCEND_LOGE("end broadcast master id----------");
   c10_npu::OptionalNPUGuard npuGuard;
   std::vector<c10_npu::NPUStream> streamVal;
   streamVal.reserve(devices.size());
@@ -1002,13 +1002,13 @@ std::vector<std::shared_ptr<HCCLComm>>& ProcessGroupHCCL::getHCCLComm(
     int rank = getRank() * devices.size() + i;
 
     npuGuard.set_index(devices[i].index());
-    ASCEND_LOGE("start create hccl com, rank %d ----------", rank)
+    ASCEND_LOGE("start create hccl com, rank %d ----------", rank);
     hcclComms[i] = HCCLComm::create(numRanks, rank, hcclID);
-    ASCEND_LOGE("end create hccl com, rank %d ----------", rank)
+    ASCEND_LOGE("end create hccl com, rank %d ----------", rank);
     // Creates the HCCL streams
     streamVal.push_back(c10_npu::getNPUStreamFromPool(devices[i].index()));
   }
-  ASCEND_LOGE("end hccl com ----------")
+  ASCEND_LOGE("end hccl com ----------");
   hcclStreams_.emplace(devicesKey, std::move(streamVal));
 
   // Note: these events are created with the (default) cudaEventDisableTiming
-- 
Gitee


From 5c328f492f4c71fa343c60b839934beb74d0874c Mon Sep 17 00:00:00 2001
From: daishine <daisy_chengjh@163.com>
Date: Tue, 26 Dec 2023 16:15:26 +0800
Subject: [PATCH 5/5] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E5=BB=BA?=
 =?UTF-8?q?=E9=93=BE=E5=A2=9E=E5=8A=A0=E6=97=A5=E5=BF=97=E6=89=93=E5=8D=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 torch_npu/csrc/distributed/ProcessGroupHCCL.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp
index cc7dffed614..d21e94fd3a6 100644
--- a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp
+++ b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp
@@ -990,9 +990,9 @@ std::vector<std::shared_ptr<HCCLComm>>& ProcessGroupHCCL::getHCCLComm(
   if (rank_ == 0) {
     HCCL_CHECK_ERROR(HcclGetRootInfo(&hcclID));
   }
-  ASCEND_LOGE("start broadcast master id----------");
+  ASCEND_LOGE("<HCCLInit> start broadcast master id");
   broadcastMasterID(&hcclID);
-  ASCEND_LOGE("end broadcast master id----------");
+  ASCEND_LOGE("<HCCLInit> end broadcast master id");
   c10_npu::OptionalNPUGuard npuGuard;
   std::vector<c10_npu::NPUStream> streamVal;
   streamVal.reserve(devices.size());
@@ -1002,13 +1002,13 @@ std::vector<std::shared_ptr<HCCLComm>>& ProcessGroupHCCL::getHCCLComm(
     int rank = getRank() * devices.size() + i;
 
     npuGuard.set_index(devices[i].index());
-    ASCEND_LOGE("start create hccl com, rank %d ----------", rank);
+    ASCEND_LOGE("<HCCLInit> start create hccl communication, rank %d", rank);
     hcclComms[i] = HCCLComm::create(numRanks, rank, hcclID);
-    ASCEND_LOGE("end create hccl com, rank %d ----------", rank);
+    ASCEND_LOGE("<HCCLInit> end create hccl communication, rank %d", rank);
     // Creates the HCCL streams
     streamVal.push_back(c10_npu::getNPUStreamFromPool(devices[i].index()));
   }
-  ASCEND_LOGE("end hccl com ----------");
+  ASCEND_LOGE("<HCCLInit> end hccl communication");
   hcclStreams_.emplace(devicesKey, std::move(streamVal));
 
   // Note: these events are created with the (default) cudaEventDisableTiming
-- 
Gitee