From 70f787f6448506526884381850b7798e61c2743e Mon Sep 17 00:00:00 2001 From: chen_liqing Date: Wed, 16 Jul 2025 15:15:30 +0800 Subject: [PATCH] Add debug info for free --- torch_npu/csrc/core/npu/NPUCachingAllocator.cpp | 10 ++++++++++ torch_npu/csrc/core/npu/NPUWorkspaceAllocator.cpp | 2 ++ 2 files changed, 12 insertions(+) diff --git a/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp b/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp index 43a472f79d..6ba0f95f08 100644 --- a/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp +++ b/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp @@ -470,6 +470,7 @@ struct ExpandableSegment { ~ExpandableSegment() { + ASCEND_LOGE("unmapHandles ~ExpandableSegment()."); forEachAllocatedRange([&](size_t begin, size_t end) { unmapHandles(begin, end); }); NPU_CHECK_ERROR(c10_npu::acl::AclrtReleaseMemAddress(ptr_, getHcclComm())); ASCEND_LOGD("NPUCachingAllocator free by AclrtReleaseMemAddress"); @@ -497,6 +498,8 @@ private: handles_.at(i) = c10::nullopt; NPU_CHECK_ERROR(c10_npu::acl::AclrtUnmapMem((char *)ptr_ + segment_size_ * i, getHcclComm())); NPU_CHECK_ERROR(c10_npu::acl::AclrtFreePhysical(h)); + ASCEND_LOGE("NPUCachingAllocator unmap: aclrtUnmapMem ptr=%p", (char *)ptr_ + segment_size_ * i); + ASCEND_LOGE("NPUCachingAllocator unmap: aclrtFreePhysical ptr=%p", h); } ASCEND_LOGD("NPUCachingAllocator unmap: segment_size=%zu", segment_size_); trimHandles(); @@ -1269,6 +1272,7 @@ public: c10_npu::npuSynchronizeDevice(true); } c10_npu::NPUWorkspaceAllocator::emptyCache(device, true, true); + ASCEND_LOGE("release_cached_blocks while !block_found && captures_underway.empty()."); block_found = (release_cached_blocks(true, context) && alloc_block(params, true, context, lock)); } @@ -1595,6 +1599,7 @@ public: c10_npu::npuSynchronizeDevice(check_error); std::lock_guard lock(mutex); c10_npu::NPUWorkspaceAllocator::emptyCache(device, true, check_error); + ASCEND_LOGE("release_cached_blocks while emptyCache."); release_cached_blocks(check_error, context); } @@ -2067,6 +2072,7 @@ public: // and makes sure this pool wasn't somehow made freeable already. // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) bool inserted = graph_pools_freeable.insert({ mempool_id, it->second.get() }).second; + ASCEND_LOGE("Add mempool_id = {%llu, %llu} into graph_pools_freeable.", mempool_id.first, mempool_id.second); TORCH_INTERNAL_ASSERT(inserted); } } @@ -2645,6 +2651,8 @@ private: release_blocks(small_blocks, context); for (auto it = graph_pools_freeable.begin(); it != graph_pools_freeable.end();) { + ASCEND_LOGE("release_blocks for mempool_id = {%llu, %llu} in graph_pools_freeable.", + it->first.first, it->first.second); // See notifyCaptureDestroy for the strategy here. TORCH_INTERNAL_ASSERT(it->second->use_count == 0); release_blocks(it->second->small_blocks, context); @@ -3143,6 +3151,7 @@ public: void emptyCache(bool check_error) override { + ASCEND_LOGE("emptyCache while emptyCache."); int count = static_cast(device_allocator.size()); for (int i = 0; i < count; i++) { device_allocator[i]->emptyCache(i, check_error); @@ -3382,6 +3391,7 @@ public: void FreeDeviceCachedMemory(int device) override { + ASCEND_LOGE("emptyCache while FreeDeviceCachedMemory."); device_allocator[device]->emptyCache(device, true); } diff --git a/torch_npu/csrc/core/npu/NPUWorkspaceAllocator.cpp b/torch_npu/csrc/core/npu/NPUWorkspaceAllocator.cpp index bdccebaafc..622bad6310 100644 --- a/torch_npu/csrc/core/npu/NPUWorkspaceAllocator.cpp +++ b/torch_npu/csrc/core/npu/NPUWorkspaceAllocator.cpp @@ -276,6 +276,8 @@ public: void empty_cache(int device, bool need_empty_queue, bool check_error) { + ASCEND_LOGE("came into NPUWorkspaceAllocator but do nothing."); + return; device_allocator[device]->empty_cache(need_empty_queue, check_error); } -- Gitee