diff --git a/test/npu/test_torch_npu.py b/test/npu/test_torch_npu.py index ca5c77b21e8ca0d2ba9b9715d4b3bd020dc870a2..541dd3c59e7fa569d00862d10cb5723431270985 100644 --- a/test/npu/test_torch_npu.py +++ b/test/npu/test_torch_npu.py @@ -78,6 +78,12 @@ class TorchNPUDeviceTestCase(TestCase): torch_npu.npu.synchronize() after_free_memory, after_total_memory = torch_npu.npu.mem_get_info(0) self.assertEqual(before_total_memory, after_total_memory) + + @unittest.skip("CANN doesn't support now.") + def test_set_device_res_limit(self): + ans_dict = {'cube_num': 12, 'vector_num': 24} + torch.npu.set_device_res_limit(torch.npu.current_device(), 12, 24) + self.assertEqual(ans_dict, torch.npu.get_device_res_limit(torch.npu.current_device())) class TorchNPUMemoryApiTestCase(TestCase): def test_npu_memory_stats(self): diff --git a/third_party/acl/inc/acl/acl_rt.h b/third_party/acl/inc/acl/acl_rt.h index 98b520ba4ac73a4b5072d98fd436edde37b51655..ecc36f38128bd746bc9f9cb5064e6f47f9bc5b6a 100755 --- a/third_party/acl/inc/acl/acl_rt.h +++ b/third_party/acl/inc/acl/acl_rt.h @@ -181,6 +181,11 @@ typedef enum aclrtLastErrLevel { ACL_RT_THREAD_LEVEL = 0, } aclrtLastErrLevel; +typedef enum { + ACL_RT_DEV_RES_CUBE_CORE = 0, + ACL_RT_DEV_RES_VECTOR_CORE, +} aclrtDevResModelType; + typedef void* aclrtDrvMemHandle; typedef void (*aclrtCallback)(void *userData); @@ -1541,6 +1546,37 @@ ACL_FUNC_VISIBILITY aclError aclrtPeekAtLastError(aclrtLastErrLevel level); */ ACL_FUNC_VISIBILITY aclError aclrtGetLastError(aclrtLastErrLevel level); +/** + * @ingroup AscendCL + * @brief Get the value of the current device's limited resources + * @param [in] deviceId the device id + * @param [in] type resources type + * @param [out] value resources limit value + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtGetDeviceResLimit(int32_t deviceId, aclrtDevResModelType type, uint32_t* value); + +/** + * @ingroup AscendCL + * @brief Set the value of the current device's limited resources + * @param [in] deviceId the device id + * @param [in] type resource type + * @param [in] value resource limit value + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSetDeviceResLimit(int32_t deviceId, aclrtDevResModelType type, uint32_t value); + +/** + * @ingroup AscendCL + * @brief Reset the value of the current device's limited resources + * @param [in] deviceId the device id + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtResetDeviceResLimit(int32_t deviceId); + #ifdef __cplusplus } #endif diff --git a/third_party/acl/libs/acl.cpp b/third_party/acl/libs/acl.cpp index 4f24e6bf043cba7c53c7015e597f5c6e82164bd6..9bb32581dd7ea6ca7d1b5fe01c7896dfb7d84764 100644 --- a/third_party/acl/libs/acl.cpp +++ b/third_party/acl/libs/acl.cpp @@ -18,6 +18,9 @@ aclError aclmdlSetDump(const char *configPath){return 0;} aclError aclmdlInitDump(){return 0;} aclError aclmdlFinalizeDump(){return 0;} aclError aclrtDeviceTaskAbort(int32_t deviceId, uint32_t timeout){return 0;} +aclError aclrtGetDeviceResLimit(int32_t deviceId, aclrtDevResModelType type, uint32_t* value){return 0;} +aclError aclrtSetDeviceResLimit(int32_t deviceId, aclrtDevResModelType type, uint32_t value){return 0;} +aclError aclrtResetDeviceResLimit(int32_t deviceId){return 0;} // Stream aclError aclrtCreateStream(aclrtStream *stream) { return 0; } diff --git a/torch_npu/csrc/core/npu/NPUFunctions.cpp b/torch_npu/csrc/core/npu/NPUFunctions.cpp index 2b178abfc5492a42bff9c1b59a83dcb6064934ee..40e865f10989e495766b81146bd10af4e251e1b5 100644 --- a/torch_npu/csrc/core/npu/NPUFunctions.cpp +++ b/torch_npu/csrc/core/npu/NPUFunctions.cpp @@ -5,6 +5,7 @@ #include "torch_npu/csrc/core/npu/NPUStream.h" #include "torch_npu/csrc/core/npu/NPUAffinityController.h" #include "torch_npu/csrc/core/npu/register/OptionsManager.h" +#include "third_party/acl/inc/acl/acl_rt.h" #ifndef BUILD_LIBTORCH #include "torch_npu/csrc/sanitizer/NPUTrace.h" #endif @@ -278,4 +279,42 @@ void stream_synchronize(aclrtStream stream) NPU_CHECK_ERROR(aclrtSynchronizeStream(stream)); } +aclError SetDeviceResLimit(int32_t device, int32_t type, uint32_t value) +{ + std::lock_guard lock(mtx); + if (used_devices.find(device) == used_devices.end()) { + TORCH_CHECK(false, "NPU device ", device, " has not been initialized! Can not get device resource limit"); + } + TORCH_CHECK(device >= 0, "device id must be positive!", PTA_ERROR(ErrCode::VALUE)); + c10_npu::acl::aclrtDevResModelType restype = static_cast(type); + aclError err = c10_npu::acl::AclrtSetDeviceResLimit(device, restype, value); + NPU_CHECK_ERROR_WITHOUT_UCE(err); + return err; +} + +uint32_t GetDeviceResLimit(int32_t device, int32_t type) +{ + std::lock_guard lock(mtx); + if (used_devices.find(device) == used_devices.end()) { + TORCH_CHECK(false, "NPU device ", device, " has not been initialized! Can not get device resource limit"); + } + TORCH_CHECK(device >= 0, "device id must be positive!", PTA_ERROR(ErrCode::VALUE)); + c10_npu::acl::aclrtDevResModelType restype = static_cast(type); + uint32_t value; + NPU_CHECK_ERROR_WITHOUT_UCE(c10_npu::acl::AclrtGetDeviceResLimit(device, restype, &value)); + return value; +} + +aclError ResetDeviceResLimit(int32_t device) +{ + std::lock_guard lock(mtx); + if (used_devices.find(device) == used_devices.end()) { + TORCH_CHECK(false, "NPU device ", device, " has not been initialized! Can not reset device resource limit"); + } + TORCH_CHECK(device >= 0, "device id must be positive!", PTA_ERROR(ErrCode::VALUE)); + aclError err = c10_npu::acl::AclrtResetDeviceResLimit(device); + NPU_CHECK_ERROR_WITHOUT_UCE(err); + return err; +} + } // namespace c10_npu diff --git a/torch_npu/csrc/core/npu/NPUFunctions.h b/torch_npu/csrc/core/npu/NPUFunctions.h index 9bb715bdb85fc5007e04026865794e9f3a5cc1cd..3e8220a09f17406c45a2b5346c2f24e371186d1d 100644 --- a/torch_npu/csrc/core/npu/NPUFunctions.h +++ b/torch_npu/csrc/core/npu/NPUFunctions.h @@ -77,6 +77,12 @@ void SetTargetDevice(); int GetLocalDevice(); +aclError SetDeviceResLimit(int32_t device, int32_t type, uint32_t value); + +C10_NPU_API uint32_t GetDeviceResLimit(int32_t deviceId, int32_t type); + +aclError ResetDeviceResLimit(int32_t deviceId); + enum class SyncDebugMode { L_DISABLED = 0, L_WARN, L_ERROR }; // it's used to store npu synchronization state diff --git a/torch_npu/csrc/core/npu/interface/AclInterface.cpp b/torch_npu/csrc/core/npu/interface/AclInterface.cpp index 116c8ab0dbd3885fc65fb0b6ca7295a87ce5e16a..7602acc2ab46539ee3ec02b620e4380265da0b35 100644 --- a/torch_npu/csrc/core/npu/interface/AclInterface.cpp +++ b/torch_npu/csrc/core/npu/interface/AclInterface.cpp @@ -90,6 +90,9 @@ LOAD_FUNCTION(aclrtMemExportToShareableHandle) LOAD_FUNCTION(aclrtMemSetPidToShareableHandle) LOAD_FUNCTION(aclrtMemImportFromShareableHandle) LOAD_FUNCTION(aclrtDeviceGetBareTgid) +LOAD_FUNCTION(aclrtGetDeviceResLimit) +LOAD_FUNCTION(aclrtSetDeviceResLimit) +LOAD_FUNCTION(aclrtResetDeviceResLimit) aclprofStepInfoPtr init_stepinfo() { typedef aclprofStepInfoPtr(*npdInitFunc)(); @@ -1033,5 +1036,41 @@ aclError AclrtDeviceGetBareTgid(int32_t *pid) return func(pid); } +aclError AclrtGetDeviceResLimit(int32_t deviceId, aclrtDevResModelType type, uint32_t* value) +{ + typedef aclError (*AclrtGetDeviceResLimit)(int32_t, aclrtDevResModelType, uint32_t*); + static AclrtGetDeviceResLimit func = nullptr; + if (func == nullptr) { + func = (AclrtGetDeviceResLimit) GET_FUNC(aclrtGetDeviceResLimit); + } + + TORCH_CHECK(func, "Failed to find function aclrtGetDeviceResLimit", PTA_ERROR(ErrCode::NOT_FOUND)); + return func(deviceId, type, value); +} + +aclError AclrtSetDeviceResLimit(int32_t deviceId, aclrtDevResModelType type, uint32_t value) +{ + typedef aclError (*AclrtSetDeviceResLimit)(int32_t, aclrtDevResModelType, uint32_t); + static AclrtSetDeviceResLimit func = nullptr; + if (func == nullptr) { + func = (AclrtSetDeviceResLimit) GET_FUNC(aclrtSetDeviceResLimit); + } + + TORCH_CHECK(func, "Failed to find function aclrtSetDeviceResLimit", PTA_ERROR(ErrCode::NOT_FOUND)); + return func(deviceId, type, value); +} + +aclError AclrtResetDeviceResLimit(int32_t deviceId) +{ + typedef aclError (*AclrtResetDeviceResLimit)(int32_t); + static AclrtResetDeviceResLimit func = nullptr; + if (func == nullptr) { + func = (AclrtResetDeviceResLimit) GET_FUNC(aclrtResetDeviceResLimit); + } + + TORCH_CHECK(func, "Failed to find function aclrtResetDeviceResLimit", PTA_ERROR(ErrCode::NOT_FOUND)); + return func(deviceId); +} + } // namespace acl } // namespace c10 diff --git a/torch_npu/csrc/core/npu/interface/AclInterface.h b/torch_npu/csrc/core/npu/interface/AclInterface.h index 7fc590273fcae507e546dc1aac7ff206747e6585..f4b5294dd9554c837e08d7ec79f3c9ca2345d8f7 100644 --- a/torch_npu/csrc/core/npu/interface/AclInterface.h +++ b/torch_npu/csrc/core/npu/interface/AclInterface.h @@ -32,6 +32,12 @@ enum aclrtStreamStatus { }; using aclrtStreamStatus = enum aclrtStreamStatus; +enum aclrtDevResModelType { + ACL_RT_DEV_RES_CUBE_CORE = 0, + ACL_RT_DEV_RES_VECTOR_CORE = 1, +}; +using aclrtDevResModelType = enum aclrtDevResModelType; + /** aclprofStepInfo is provide by acl, it used to be store dispatch op info. */ @@ -244,6 +250,9 @@ aclError AclrtMemSetPidToShareableHandle(uint64_t shareableHandle, int32_t *pid, aclError AclrtMemImportFromShareableHandle(uint64_t shareableHandle, int32_t deviceId, aclrtDrvMemHandle *handle); aclError AclrtDeviceGetBareTgid(int32_t *pid); +aclError AclrtGetDeviceResLimit(int32_t deviceId, aclrtDevResModelType type, uint32_t* value); +aclError AclrtSetDeviceResLimit(int32_t deviceId, aclrtDevResModelType type, uint32_t value); +aclError AclrtResetDeviceResLimit(int32_t deviceId); } // namespace acl } // namespace c10_npu diff --git a/torch_npu/csrc/npu/Module.cpp b/torch_npu/csrc/npu/Module.cpp index d9e6cd255326206bdec9c427896c1155925625a7..d7f13cd80b973179e376e27e8e80cce0895a6d2f 100644 --- a/torch_npu/csrc/npu/Module.cpp +++ b/torch_npu/csrc/npu/Module.cpp @@ -1705,6 +1705,50 @@ static PyObject* THNPModule_add_p2p_access(PyObject* self, PyObject *args) END_HANDLE_TH_ERRORS } +static PyObject* THNPModule_set_device_res_limit(PyObject* self, PyObject *args) +{ + HANDLE_TH_ERRORS + PyObject* device = nullptr; + PyObject* type = nullptr; + PyObject* value = nullptr; + + if (!PyArg_ParseTuple(args, "OOO", &device, &type, &value)) { + throw torch::TypeError("Pybind failed to parse parameters." + + PTA_ERROR(ErrCode::TYPE)); + } + int32_t device_ = THPUtils_unpackLong(device); + int32_t type_ = THPUtils_unpackLong(type); + uint32_t value_ = static_cast(THPUtils_unpackUInt32(value)); + c10_npu::SetDeviceResLimit(device_, type_, value_); + Py_RETURN_NONE; + END_HANDLE_TH_ERRORS +} + +static PyObject* THNPModule_get_device_res_limit(PyObject* self, PyObject *args) +{ + HANDLE_TH_ERRORS + PyObject* device = nullptr; + PyObject* type = nullptr; + + if (!PyArg_ParseTuple(args, "OO", &device, &type)) { + throw torch::TypeError("Pybind failed to parse parameters." + + PTA_ERROR(ErrCode::TYPE)); + } + int32_t device_ = THPUtils_unpackLong(device); + int32_t type_ = THPUtils_unpackLong(type); + uint32_t value = c10_npu::GetDeviceResLimit(device_, type_); + return PyLong_FromUnsignedLong(value); + END_HANDLE_TH_ERRORS +} + +static PyObject* THNPModule_reset_device_res_limit(PyObject* self, PyObject *args) +{ + HANDLE_TH_ERRORS + int32_t device = THPUtils_unpackLong(args); + c10_npu::ResetDeviceResLimit(device); + Py_RETURN_NONE; + END_HANDLE_TH_ERRORS +} static struct PyMethodDef THNPModule_methods[] = { {"_npu_init", (PyCFunction)THNPModule_initExtension, METH_NOARGS, nullptr}, @@ -1770,6 +1814,9 @@ static struct PyMethodDef THNPModule_methods[] = { {"_add_ipc_pid", (PyCFunction)THNPModule_add_ipc_pid, METH_VARARGS, nullptr}, {"_get_ipc_pid", (PyCFunction)THNPModule_get_ipc_pid, METH_NOARGS, nullptr}, {"_add_p2p_access", (PyCFunction)THNPModule_add_p2p_access, METH_VARARGS, nullptr}, + {"_npu_get_device_res_limit", (PyCFunction)THNPModule_get_device_res_limit, METH_VARARGS, nullptr}, + {"_npu_set_device_res_limit", (PyCFunction)THNPModule_set_device_res_limit, METH_VARARGS, nullptr}, + {"_npu_reset_device_res_limit", (PyCFunction)THNPModule_reset_device_res_limit, METH_O, nullptr}, {nullptr}}; TORCH_NPU_API PyMethodDef* THNPModule_get_methods() diff --git a/torch_npu/npu/__init__.py b/torch_npu/npu/__init__.py index bd94890b27e32637643e927d41bc6f2a85eafe30..709647c52fa05ecc77ff4abae6971ed629e3eaaf 100644 --- a/torch_npu/npu/__init__.py +++ b/torch_npu/npu/__init__.py @@ -115,7 +115,9 @@ __all__ = [ "graph_task_group_begin", "graph_task_group_end", "graph_task_update_begin", - "graph_task_update_end" + "graph_task_update_end", + "set_device_res_limit", + "get_device_res_limit" ] from typing import Tuple, Union diff --git a/torch_npu/npu/npu_config.py b/torch_npu/npu/npu_config.py index f2a5104920fcb342df1971f56e64bcb922a0f4c1..0a0c07ab78d75127ba7a4636e369c36d3e1bf4a8 100644 --- a/torch_npu/npu/npu_config.py +++ b/torch_npu/npu/npu_config.py @@ -5,12 +5,14 @@ import warnings import torch_npu._C from torch_npu.utils._path_manager import PathManager from torch_npu.utils._error_code import ErrCode, pta_error, prof_error +from .utils import _get_device_index # this file is used to enhance the npu frontend API by set_option or other. __all__ = ["set_option", "set_aoe", "set_compile_mode", "set_mm_bmm_format_nd", "get_mm_bmm_format_nd", - "is_jit_compile_false", "finalize_dump", "init_dump", "set_dump"] + "is_jit_compile_false", "finalize_dump", "init_dump", "set_dump", + "set_device_res_limit", "get_device_res_limit"] _option_map = {"ACL_PRECISION_MODE": ["allow_fp32_to_fp16", "must_keep_origin_dtype"], "ACL_OP_SELECT_IMPL_MODE": ["high_performance", "high_precision"], @@ -169,3 +171,42 @@ class _allowHF32Conv: hf32_value = torch_npu._C._npu_getOption("ALLOW_CONV_HF32") return (hf32_value is None) or (hf32_value.decode() == "") or (hf32_value.decode() == "enable") return None + + +class _call_once_class: + def __init__(self, func): + self.func = func + self.called = False + self.result = None + + def __call__(self, *args, **kwargs): + if self.called: + raise RuntimeError(f"Function '{self.func.__name__}' has already been called, \ + You can only set this interface once.") + + self.called = True + self.result = self.func(*args, **kwargs) + return self.result + + +@_call_once_class +def set_device_res_limit(device, cube_num=-1, vector_num=-1): + from torch_npu.npu import device_count + device_id = _get_device_index(device, optional=True) + if device_id < 0 or device_id >= device_count(): + raise AssertionError("Invalid device id" + pta_error(ErrCode.VALUE)) + torch_npu.npu._lazy_init() + if cube_num != -1: + torch_npu._C._npu_set_device_res_limit(device_id, 0, cube_num) + if vector_num != -1: + torch_npu._C._npu_set_device_res_limit(device_id, 1, vector_num) + + +def get_device_res_limit(device): + from torch_npu.npu import device_count + device_id = _get_device_index(device, optional=True) + if device_id < 0 or device_id >= device_count(): + raise AssertionError("Invalid device id" + pta_error(ErrCode.VALUE)) + torch_npu.npu._lazy_init() + return {"cube_num": torch_npu._C._npu_get_device_res_limit(device_id, 0), \ + "vector_num": torch_npu._C._npu_get_device_res_limit(device_id, 1)} \ No newline at end of file