From 70f787f6448506526884381850b7798e61c2743e Mon Sep 17 00:00:00 2001
From: chen_liqing <jiangyiwen5@huawei.com>
Date: Wed, 16 Jul 2025 15:15:30 +0800
Subject: [PATCH] Add debug info for free

---
 torch_npu/csrc/core/npu/NPUCachingAllocator.cpp   | 10 ++++++++++
 torch_npu/csrc/core/npu/NPUWorkspaceAllocator.cpp |  2 ++
 2 files changed, 12 insertions(+)
diff --git a/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp b/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp
index 43a472f79d..6ba0f95f08 100644
--- a/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp
+++ b/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp
@@ -470,6 +470,7 @@ struct ExpandableSegment {
 
     ~ExpandableSegment()
     {
+        ASCEND_LOGE("unmapHandles ~ExpandableSegment().");
         forEachAllocatedRange([&](size_t begin, size_t end) { unmapHandles(begin, end); });
         NPU_CHECK_ERROR(c10_npu::acl::AclrtReleaseMemAddress(ptr_, getHcclComm()));
         ASCEND_LOGD("NPUCachingAllocator free by AclrtReleaseMemAddress");
@@ -497,6 +498,8 @@ private:
             handles_.at(i) = c10::nullopt;
             NPU_CHECK_ERROR(c10_npu::acl::AclrtUnmapMem((char *)ptr_ + segment_size_ * i, getHcclComm()));
             NPU_CHECK_ERROR(c10_npu::acl::AclrtFreePhysical(h));
+            ASCEND_LOGE("NPUCachingAllocator unmap: aclrtUnmapMem ptr=%p", (char *)ptr_ + segment_size_ * i);
+            ASCEND_LOGE("NPUCachingAllocator unmap: aclrtFreePhysical ptr=%p", h);
         }
         ASCEND_LOGD("NPUCachingAllocator unmap: segment_size=%zu", segment_size_);
         trimHandles();
@@ -1269,6 +1272,7 @@ public:
                 c10_npu::npuSynchronizeDevice(true);
             }
             c10_npu::NPUWorkspaceAllocator::emptyCache(device, true, true);
+            ASCEND_LOGE("release_cached_blocks while !block_found && captures_underway.empty().");
             block_found = (release_cached_blocks(true, context) && alloc_block(params, true, context, lock));
         }
 
@@ -1595,6 +1599,7 @@ public:
         c10_npu::npuSynchronizeDevice(check_error);
         std::lock_guard<std::recursive_mutex> lock(mutex);
         c10_npu::NPUWorkspaceAllocator::emptyCache(device, true, check_error);
+        ASCEND_LOGE("release_cached_blocks while emptyCache.");
         release_cached_blocks(check_error, context);
     }
 
@@ -2067,6 +2072,7 @@ public:
             // and makes sure this pool wasn't somehow made freeable already.
             // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores)
             bool inserted = graph_pools_freeable.insert({ mempool_id, it->second.get() }).second;
+            ASCEND_LOGE("Add mempool_id = {%llu, %llu} into graph_pools_freeable.", mempool_id.first, mempool_id.second);
             TORCH_INTERNAL_ASSERT(inserted);
         }
     }
@@ -2645,6 +2651,8 @@ private:
         release_blocks(small_blocks, context);
 
         for (auto it = graph_pools_freeable.begin(); it != graph_pools_freeable.end();) {
+            ASCEND_LOGE("release_blocks for mempool_id = {%llu, %llu} in graph_pools_freeable.",
+                        it->first.first, it->first.second);
             // See notifyCaptureDestroy for the strategy here.
             TORCH_INTERNAL_ASSERT(it->second->use_count == 0);
             release_blocks(it->second->small_blocks, context);
@@ -3143,6 +3151,7 @@ public:
 
     void emptyCache(bool check_error) override
     {
+        ASCEND_LOGE("emptyCache while emptyCache.");
         int count = static_cast<int>(device_allocator.size());
         for (int i = 0; i < count; i++) {
             device_allocator[i]->emptyCache(i, check_error);
@@ -3382,6 +3391,7 @@ public:
 
     void FreeDeviceCachedMemory(int device) override
     {
+        ASCEND_LOGE("emptyCache while FreeDeviceCachedMemory.");
         device_allocator[device]->emptyCache(device, true);
     }
 
diff --git a/torch_npu/csrc/core/npu/NPUWorkspaceAllocator.cpp b/torch_npu/csrc/core/npu/NPUWorkspaceAllocator.cpp
index bdccebaafc..622bad6310 100644
--- a/torch_npu/csrc/core/npu/NPUWorkspaceAllocator.cpp
+++ b/torch_npu/csrc/core/npu/NPUWorkspaceAllocator.cpp
@@ -276,6 +276,8 @@ public:
 
     void empty_cache(int device, bool need_empty_queue, bool check_error)
     {
+        ASCEND_LOGE("came into NPUWorkspaceAllocator but do nothing.");
+        return;
         device_allocator[device]->empty_cache(need_empty_queue, check_error);
     }
 
-- 
Gitee