From 704baec0adab5f05b8b48a184f7639042da60dfb Mon Sep 17 00:00:00 2001 From: daishine Date: Tue, 26 Dec 2023 10:20:29 +0800 Subject: [PATCH 1/5] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E5=BB=BA?= =?UTF-8?q?=E9=93=BE=E5=A2=9E=E5=8A=A0=E6=97=A5=E5=BF=97=E6=89=93=E5=8D=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- torch_npu/csrc/distributed/ProcessGroupHCCL.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp index 46ec61a5fa5..59346e88536 100644 --- a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp +++ b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp @@ -990,23 +990,25 @@ std::vector>& ProcessGroupHCCL::getHCCLComm( if (rank_ == 0) { HCCL_CHECK_ERROR(HcclGetRootInfo(&hcclID)); } + ASCEND_LOGE("start broadcast master id----------") broadcastMasterID(&hcclID); - + ASCEND_LOGE("end broadcast master id----------") c10_npu::OptionalNPUGuard npuGuard; std::vector streamVal; streamVal.reserve(devices.size()); - + fprintf() for (size_t i = 0; i < devices.size(); ++i) { int numRanks = getSize(); int rank = getRank() * devices.size() + i; npuGuard.set_index(devices[i].index()); + ASCEND_LOGE("start create hccl com, rank %d ----------",rank) hcclComms[i] = HCCLComm::create(numRanks, rank, hcclID); - + ASCEND_LOGE("end create hccl com, rank %d ----------",rank) // Creates the HCCL streams streamVal.push_back(c10_npu::getNPUStreamFromPool(devices[i].index())); } - + ASCEND_LOGE("end hccl com ----------") hcclStreams_.emplace(devicesKey, std::move(streamVal)); // Note: these events are created with the (default) cudaEventDisableTiming -- Gitee From 1dbc5a64f7cdb96fdae1f6d4df67cafc261d20d0 Mon Sep 17 00:00:00 2001 From: daishine Date: Tue, 26 Dec 2023 10:20:29 +0800 Subject: [PATCH 2/5] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E5=BB=BA?= =?UTF-8?q?=E9=93=BE=E5=A2=9E=E5=8A=A0=E6=97=A5=E5=BF=97=E6=89=93=E5=8D=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- torch_npu/csrc/distributed/ProcessGroupHCCL.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp index 46ec61a5fa5..f708cd2aac2 100644 --- a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp +++ b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp @@ -990,8 +990,9 @@ std::vector>& ProcessGroupHCCL::getHCCLComm( if (rank_ == 0) { HCCL_CHECK_ERROR(HcclGetRootInfo(&hcclID)); } + ASCEND_LOGE("start broadcast master id----------") broadcastMasterID(&hcclID); - + ASCEND_LOGE("end broadcast master id----------") c10_npu::OptionalNPUGuard npuGuard; std::vector streamVal; streamVal.reserve(devices.size()); @@ -1001,12 +1002,13 @@ std::vector>& ProcessGroupHCCL::getHCCLComm( int rank = getRank() * devices.size() + i; npuGuard.set_index(devices[i].index()); + ASCEND_LOGE("start create hccl com, rank %d ----------", rank) hcclComms[i] = HCCLComm::create(numRanks, rank, hcclID); - + ASCEND_LOGE("end create hccl com, rank %d ----------", rank) // Creates the HCCL streams streamVal.push_back(c10_npu::getNPUStreamFromPool(devices[i].index())); } - + ASCEND_LOGE("end hccl com ----------") hcclStreams_.emplace(devicesKey, std::move(streamVal)); // Note: these events are created with the (default) cudaEventDisableTiming -- Gitee From 14713c3e0fdfd7139014a9ab9901c68a9d3eb711 Mon Sep 17 00:00:00 2001 From: daishine Date: Tue, 26 Dec 2023 10:51:25 +0800 Subject: [PATCH 3/5] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E5=BB=BA?= =?UTF-8?q?=E9=93=BE=E5=A2=9E=E5=8A=A0=E6=97=A5=E5=BF=97=E6=89=93=E5=8D=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- torch_npu/csrc/distributed/ProcessGroupHCCL.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp index 46ec61a5fa5..f708cd2aac2 100644 --- a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp +++ b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp @@ -990,8 +990,9 @@ std::vector>& ProcessGroupHCCL::getHCCLComm( if (rank_ == 0) { HCCL_CHECK_ERROR(HcclGetRootInfo(&hcclID)); } + ASCEND_LOGE("start broadcast master id----------") broadcastMasterID(&hcclID); - + ASCEND_LOGE("end broadcast master id----------") c10_npu::OptionalNPUGuard npuGuard; std::vector streamVal; streamVal.reserve(devices.size()); @@ -1001,12 +1002,13 @@ std::vector>& ProcessGroupHCCL::getHCCLComm( int rank = getRank() * devices.size() + i; npuGuard.set_index(devices[i].index()); + ASCEND_LOGE("start create hccl com, rank %d ----------", rank) hcclComms[i] = HCCLComm::create(numRanks, rank, hcclID); - + ASCEND_LOGE("end create hccl com, rank %d ----------", rank) // Creates the HCCL streams streamVal.push_back(c10_npu::getNPUStreamFromPool(devices[i].index())); } - + ASCEND_LOGE("end hccl com ----------") hcclStreams_.emplace(devicesKey, std::move(streamVal)); // Note: these events are created with the (default) cudaEventDisableTiming -- Gitee From 599b7b534f1ed409ae4005629ce109e04b32be4f Mon Sep 17 00:00:00 2001 From: daishine Date: Tue, 26 Dec 2023 11:47:07 +0800 Subject: [PATCH 4/5] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E5=BB=BA?= =?UTF-8?q?=E9=93=BE=E5=A2=9E=E5=8A=A0=E6=97=A5=E5=BF=97=E6=89=93=E5=8D=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- torch_npu/csrc/distributed/ProcessGroupHCCL.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp index f708cd2aac2..cc7dffed614 100644 --- a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp +++ b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp @@ -990,9 +990,9 @@ std::vector>& ProcessGroupHCCL::getHCCLComm( if (rank_ == 0) { HCCL_CHECK_ERROR(HcclGetRootInfo(&hcclID)); } - ASCEND_LOGE("start broadcast master id----------") + ASCEND_LOGE("start broadcast master id----------"); broadcastMasterID(&hcclID); - ASCEND_LOGE("end broadcast master id----------") + ASCEND_LOGE("end broadcast master id----------"); c10_npu::OptionalNPUGuard npuGuard; std::vector streamVal; streamVal.reserve(devices.size()); @@ -1002,13 +1002,13 @@ std::vector>& ProcessGroupHCCL::getHCCLComm( int rank = getRank() * devices.size() + i; npuGuard.set_index(devices[i].index()); - ASCEND_LOGE("start create hccl com, rank %d ----------", rank) + ASCEND_LOGE("start create hccl com, rank %d ----------", rank); hcclComms[i] = HCCLComm::create(numRanks, rank, hcclID); - ASCEND_LOGE("end create hccl com, rank %d ----------", rank) + ASCEND_LOGE("end create hccl com, rank %d ----------", rank); // Creates the HCCL streams streamVal.push_back(c10_npu::getNPUStreamFromPool(devices[i].index())); } - ASCEND_LOGE("end hccl com ----------") + ASCEND_LOGE("end hccl com ----------"); hcclStreams_.emplace(devicesKey, std::move(streamVal)); // Note: these events are created with the (default) cudaEventDisableTiming -- Gitee From 5c328f492f4c71fa343c60b839934beb74d0874c Mon Sep 17 00:00:00 2001 From: daishine Date: Tue, 26 Dec 2023 16:15:26 +0800 Subject: [PATCH 5/5] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E5=BB=BA?= =?UTF-8?q?=E9=93=BE=E5=A2=9E=E5=8A=A0=E6=97=A5=E5=BF=97=E6=89=93=E5=8D=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- torch_npu/csrc/distributed/ProcessGroupHCCL.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp index cc7dffed614..d21e94fd3a6 100644 --- a/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp +++ b/torch_npu/csrc/distributed/ProcessGroupHCCL.cpp @@ -990,9 +990,9 @@ std::vector>& ProcessGroupHCCL::getHCCLComm( if (rank_ == 0) { HCCL_CHECK_ERROR(HcclGetRootInfo(&hcclID)); } - ASCEND_LOGE("start broadcast master id----------"); + ASCEND_LOGE(" start broadcast master id"); broadcastMasterID(&hcclID); - ASCEND_LOGE("end broadcast master id----------"); + ASCEND_LOGE(" end broadcast master id"); c10_npu::OptionalNPUGuard npuGuard; std::vector streamVal; streamVal.reserve(devices.size()); @@ -1002,13 +1002,13 @@ std::vector>& ProcessGroupHCCL::getHCCLComm( int rank = getRank() * devices.size() + i; npuGuard.set_index(devices[i].index()); - ASCEND_LOGE("start create hccl com, rank %d ----------", rank); + ASCEND_LOGE(" start create hccl communication, rank %d", rank); hcclComms[i] = HCCLComm::create(numRanks, rank, hcclID); - ASCEND_LOGE("end create hccl com, rank %d ----------", rank); + ASCEND_LOGE(" end create hccl communication, rank %d", rank); // Creates the HCCL streams streamVal.push_back(c10_npu::getNPUStreamFromPool(devices[i].index())); } - ASCEND_LOGE("end hccl com ----------"); + ASCEND_LOGE(" end hccl communication"); hcclStreams_.emplace(devicesKey, std::move(streamVal)); // Note: these events are created with the (default) cudaEventDisableTiming -- Gitee