From d6b692f4aac58184d60b157906abef43c78d81da Mon Sep 17 00:00:00 2001 From: xu-hong26 Date: Mon, 14 Aug 2023 19:45:18 +0800 Subject: [PATCH] TaskQueue optimize --- torch_npu/csrc/core/npu/NPUQueue.cpp | 40 +++++++++++++++------------- torch_npu/csrc/core/npu/NPUQueue.h | 2 ++ 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/torch_npu/csrc/core/npu/NPUQueue.cpp b/torch_npu/csrc/core/npu/NPUQueue.cpp index 632e24d779e..0f7a68fb4c6 100644 --- a/torch_npu/csrc/core/npu/NPUQueue.cpp +++ b/torch_npu/csrc/core/npu/NPUQueue.cpp @@ -296,17 +296,6 @@ void Repository::Enqueue(void* cur_paras) { continue; } __sync_synchronize(); - while (!IsReadWorking()) { - s = eventfd_write(efd_read, u); - if (s != 0) { - if (errno == EINTR) { - continue; - } - ASCEND_LOGE("notify consumer failed!! s=%zd, errno=%s", s, strerror(errno)); - return; - } - break; - } } SetWriteWorking(false); } @@ -334,14 +323,7 @@ void Repository::Dequeue() { SetReadWorking(false); __sync_synchronize(); if (IsEmptyQueue()) { - s = eventfd_read(efd_read, &u); - if (s != 0) { - if (errno == EINTR) { - continue; - } - ASCEND_LOGE("waiting enqueue failed. s=%zd, errno=%s.", s, strerror(errno)); - return; - } + WaitForTaskArrival(); SetReadWorking(true); } continue; @@ -376,6 +358,26 @@ void Repository::Dequeue() { SetReadWorking(false); } +void Repository::WaitForTaskArrival() { + uint64_t loop_cnt = 0; + struct timeval delay = {0, 0}; + + while (IsEmptyQueue()) { + if (GetStatus() != RepoStatus::RUN) { + break; + } + loop_cnt++; + if (loop_cnt < TASK_ARRIVAL_LOOP_CNT) { + continue; + } + delay.tv_usec = 1; + select(0, nullptr, nullptr, nullptr, &delay); + __sync_synchronize(); + } + + return; +} + void Repository::ReleaseResource() { manager().DeInit(datas); if (efd_read > 0) { diff --git a/torch_npu/csrc/core/npu/NPUQueue.h b/torch_npu/csrc/core/npu/NPUQueue.h index 7295fe04487..8ce46d87a88 100644 --- a/torch_npu/csrc/core/npu/NPUQueue.h +++ b/torch_npu/csrc/core/npu/NPUQueue.h @@ -11,6 +11,7 @@ namespace c10_npu { +#define TASK_ARRIVAL_LOOP_CNT (3030000) // about 10ms struct sring_idx { bool working = false; volatile unsigned int idx = 0; @@ -96,6 +97,7 @@ private: bool IsReadWorking() const {return read_idx.working;}; bool WriteQueue(void* cur_paras); bool ReadQueue(); + void WaitForTaskArrival(); private: void* datas = nullptr; -- Gitee