diff --git a/torch_npu/csrc/InitNpuBindings.cpp b/torch_npu/csrc/InitNpuBindings.cpp index 23818288088fb7b427b0402f5f17486e0aec01e8..3a65d674264bb7e7f240aeb7b68462011dd40db7 100644 --- a/torch_npu/csrc/InitNpuBindings.cpp +++ b/torch_npu/csrc/InitNpuBindings.cpp @@ -49,7 +49,6 @@ void AddPyMethodDefs(std::vector& vector, PyMethodDef* methods) PyObject * THPModule_npu_shutdown(PyObject * /* unused */) { - HANDLE_TH_ERRORS // cudaFree is blocking and will synchronize across all kernels executing // on the current device, while aclrtFree Free device memory immediately. // aclrtSynchronizeDevice should be called before aclrtFree to ensure that @@ -66,7 +65,6 @@ PyObject * THPModule_npu_shutdown(PyObject * /* unused */) fprintf(stdout, "THPModule_npu_shutdown success.\n"); } } - END_HANDLE_TH_ERRORS Py_RETURN_NONE; } @@ -91,7 +89,6 @@ static std::vector methods; extern "C" PyObject* initModule(){ - HANDLE_TH_ERRORS at::internal::lazy_init_num_threads(); AddPyMethodDefs(methods, TorchNpuMethods); @@ -119,7 +116,6 @@ PyObject* initModule(){ torch_npu::autograd::initTorchFunctions(module); return module; - END_HANDLE_TH_ERRORS } PyMODINIT_FUNC PyInit__C(void){ diff --git a/torch_npu/csrc/utils/TensorMethods.cpp b/torch_npu/csrc/utils/TensorMethods.cpp index e57cf5016780311d62175d4c328c487b4544a108..73a9444f43a4ec2d1ebf5d4069667f19c5883bb2 100644 --- a/torch_npu/csrc/utils/TensorMethods.cpp +++ b/torch_npu/csrc/utils/TensorMethods.cpp @@ -3,6 +3,263 @@ namespace torch_npu { namespace utils { +static const char* _backend_to_string_npu(const at::Backend& backend) { + switch (backend) { + case at::Backend::CPU: return "torch"; + case at_npu::key::NativeBackend: return "torch.npu"; + default: AT_ERROR("Unimplemented backend ", backend); + } +} + +std::string _options_to_string_npu(const at::TensorOptions options) { + std::ostringstream ss; + ss << _backend_to_string_npu(options.backend()) << "." << toString(at::typeMetaToScalarType(options.dtype())) << "Tensor"; + return ss.str(); +} + +std::tuple, c10::optional, bool, bool, c10::optional> parse_to_conversion(torch::PythonArgs& r, bool allow_copy); + +void InitNPUWithIndex(c10::DeviceIndex index = -1); + +static at::Tensor dispatch_to(const at::Tensor & self, c10::Device device, bool non_blocking, bool copy, c10::optional optional_memory_format) { + pybind11::gil_scoped_release no_gil; + // NOTE: this is where we record aten::to in the graph during tracing. However, the behavior of aten::to + // is different with respect to TensorOptions fields that are not present: aten::to inherits fields that + // are missing from the self argument while the tracer assumes that they should be populated with the + // default values (eg. float for scalar type). By explicitly copying over the tensor options here we fully + // specify all tensor options and thus record the proper trace + return self.to(self.options().device(device).memory_format(optional_memory_format), non_blocking, copy); +} + +static at::Tensor dispatch_to(const at::Tensor & self, bool non_blocking, bool copy, c10::optional optional_memory_format) { + AutoNoGIL no_gil; + return self.to(self.options().memory_format(optional_memory_format), non_blocking, copy); +} + +static at::Tensor dispatch_to(const at::Tensor & self, c10::ScalarType dtype, bool non_blocking, bool copy, c10::optional optional_memory_format) { + pybind11::gil_scoped_release no_gil; + return self.to(dtype, non_blocking, copy, optional_memory_format); +} + +static at::Tensor dispatch_to(const at::Tensor & self, c10::Device device, c10::ScalarType dtype, bool non_blocking, bool copy, c10::optional optional_memory_format) { + pybind11::gil_scoped_release no_gil; + return self.to(device, dtype, non_blocking, copy, optional_memory_format); +} + +static PyObject * THPVariable_npu(PyObject* self, PyObject* args, PyObject* kwargs) +{ + HANDLE_TH_ERRORS + static torch::PythonArgParser parser({ + "npu(Tensor temp, Device? device=None, bool non_blocking=False, *, MemoryFormat? memory_format=None)", + "npu(Tensor temp, Device? device=None, bool async=False, *, MemoryFormat? memory_format=None)|deprecated" + }); + torch::ParsedArgs<4> parsed_args; + auto r = parser.parse(args, kwargs, parsed_args); + auto self_ = r.tensor(0); + auto local_device = r.isNone(1) ? c10::Device(at_npu::key::NativeDeviceType) : r.device(1); + auto device = c10::Device(at_npu::key::NativeDeviceType, local_device.index()); + auto opt_memory_format = r.memoryformatOptional(3); + TORCH_CHECK((device.type() == at_npu::key::NativeDeviceType), "Invalid device, must be npu device"); + maybe_initialize_npu(device); + return THPVariable_Wrap(dispatch_to(self_, device, r.toBool(2), false, opt_memory_format)); + END_HANDLE_TH_ERRORS +} + +static PyObject * THPVariable_to(PyObject* self, PyObject* args, PyObject* kwargs) +{ + HANDLE_TH_ERRORS + static torch::PythonArgParser parser({ + "to(Tensor temp, Device device=None, ScalarType dtype=None, bool non_blocking=False, bool copy=False, *, MemoryFormat? memory_format=None)", + "to(Tensor temp, ScalarType dtype, bool non_blocking=False, bool copy=False, *, MemoryFormat? memory_format=None)", + "to(Tensor temp, Tensor tensor, bool non_blocking=False, bool copy=False, *, MemoryFormat? memory_format=None)", + }); + torch::ParsedArgs<6> parsed_args; + auto r = parser.parse(args, kwargs, parsed_args); + if (r.has_torch_function()) { + return torch::handle_torch_function(r, args, kwargs, THPVariableClass, "torch.Tensor"); + } + auto parsed = torch_npu::utils::parse_to_conversion(r, true); + auto self_ = std::get<0>(parsed); + auto& device = std::get<1>(parsed); + auto& scalarType = std::get<2>(parsed); + auto non_blocking = std::get<3>(parsed); + auto copy = std::get<4>(parsed); + auto opt_memory_format = std::get<5>(parsed); + + maybe_initialize_npu(device); + if (!device && !scalarType && !copy && !opt_memory_format.has_value()) { + Py_INCREF(self); + return THPVariable_Wrap(self_); + } else if (!device && !scalarType) { + return THPVariable_Wrap( + dispatch_to(self_, non_blocking, copy, opt_memory_format)); + } else if (!device) { + return THPVariable_Wrap(dispatch_to(self_, *scalarType, non_blocking, copy, opt_memory_format)); + } else if (!scalarType) { + return THPVariable_Wrap(dispatch_to(self_, *device, non_blocking, copy, opt_memory_format)); + } else { + return THPVariable_Wrap(dispatch_to(self_, *device, *scalarType, non_blocking, copy, opt_memory_format)); + } + Py_RETURN_NONE; + END_HANDLE_TH_ERRORS +} + +static PyObject * THPVariable_type(PyObject* self, PyObject* args, PyObject* kwargs) +{ + HANDLE_TH_ERRORS + static torch::PythonArgParser parser({ + "type(Tensor temp, PyObject* dtype=None, bool non_blocking=False, *, MemoryFormat? memory_format=None)", + "type(Tensor temp, PyObject* dtype=None, bool async=False, *, MemoryFormat? memory_format=None)|deprecated" + }); + + torch::ParsedArgs<4> parsed_args; + auto r = parser.parse(args, kwargs, parsed_args); + auto self_ = r.tensor(0); + if(r.has_torch_function()){ + return torch::handle_torch_function(r, args, kwargs, THPVariableClass, "torch.Tensor"); + } + + if (r.isNone(1)) { + return THPUtils_packString(_options_to_string_npu(self_.options())); + } + auto obj = r.pyobject(1); + auto opt_memory_format = r.memoryformatOptional(3); + std::string type_name; + bool is_dtype = false; + if (PyType_Check(obj)) { + if (obj == THPVariableClass) { + type_name = "torch.Tensor"; + } else { + type_name = ((PyTypeObject*)obj)->tp_name; + } + } else if (THPUtils_checkString(obj)) { + type_name = THPUtils_unpackString(obj); + } else if (THPDtype_Check(obj)) { + is_dtype = true; + } else { + throw torch::TypeError("dtype must be a type, str, or dtype object"); + } + c10::ScalarType scalar_type; + c10::Device device = self_.device(); + if (is_dtype) { + scalar_type = r.scalartype(1); + } else { + at::TensorOptions options = torch::utils::options_from_string(type_name); + scalar_type = at::typeMetaToScalarType(options.dtype()); + auto device_type = options.device().type(); + if (device_type != device.type()) { + device = at::Device(device_type); + } + } + maybe_initialize_npu(device); + return THPVariable_Wrap(dispatch_to(self_, device, scalar_type, r.toBool(1), false, opt_memory_format)); + END_HANDLE_TH_ERRORS +} + +static PyObject * THPVariable_is_npu(PyObject* self, PyObject* args, PyObject* kwargs) +{ + HANDLE_TH_ERRORS + static torch::PythonArgParser parser({ + "type(Tensor temp)" + }); + torch::ParsedArgs<1> parsed_args; + auto r = parser.parse(args, kwargs, parsed_args); + auto self_ = r.tensor(0); + return torch::autograd::utils::wrap(at_npu::key::isDeviceTensor(self_)); + END_HANDLE_TH_ERRORS +} + +static PyObject * THPVariable_new_empty(PyObject* self, PyObject* args, PyObject* kwargs) +{ + HANDLE_TH_ERRORS + static torch::PythonArgParser parser( + { + "new_empty(Tensor self, IntArrayRef size, *, ScalarType dtype=None, " + "Layout layout=torch.strided, Device device=None, bool " + "pin_memory=False, bool requires_grad=False)", + }, + true); + torch::ParsedArgs<7> parsed_args; + auto r = parser.parse(args, kwargs, parsed_args); + auto self_ = r.tensor(0); + if (r.has_torch_function()) { + return torch::handle_torch_function(r, args, kwargs, THPVariableClass, "torch.Tensor"); + } + auto device = at_npu::key::parse_npu_device_with_default(r.args[4], self_.device()); + maybe_initialize_npu(device); + const auto options = at::TensorOptions() + .dtype(r.scalartypeWithDefault(2, self_.scalar_type())) + .device(device) + .layout(r.layoutWithDefault(3, c10::layout_from_backend(self_.options().backend()))) + .requires_grad(r.toBool(6)) + .pinned_memory(r.toBool(5)); + auto dispatch_new_empty = [](at::Tensor & self, c10::IntArrayRef size, at::TensorOptions options) -> at::Tensor { + pybind11::gil_scoped_release no_gil; + return self.new_empty(size, options); + }; + return torch::autograd::utils::wrap(dispatch_new_empty(self_, r.intlist(1), options).set_requires_grad(r.toBool(6))); + Py_RETURN_NONE; + END_HANDLE_TH_ERRORS +} + +static PyObject * THPVariable_new_empty_strided(PyObject* self_, PyObject* args, PyObject* kwargs) +{ + HANDLE_TH_ERRORS + static torch::PythonArgParser parser( + { + "new_empty_strided(Tensor self, IntArrayRef size, IntArrayRef " + "stride, *, ScalarType dtype=None, Layout layout=torch.strided, " + "Device device=None, bool pin_memory=False, bool " + "requires_grad=False)", + }, + true); + torch::ParsedArgs<8> parsed_args; + auto r = parser.parse(args, kwargs, parsed_args); + auto self_ = r.tensor(0); + if (r.has_torch_function()) { + return torch::handle_torch_function(r, args, kwargs, THPVariableClass, "torch.Tensor"); + } + auto device = at_npu::key::parse_npu_device_with_default(r.args[5], self_.device()); + maybe_initialize_npu(device); + const auto options = at::TensorOptions() + .dtype(r.scalartypeWithDefault(3, self_.scalar_type())) + .device(device) + .layout(r.layoutWithDefault(4, c10::layout_from_backend(self_.options().backend()))) + .requires_grad(r.toBool(7)) + .pinned_memory(r.toBool(6)); + auto dispatch_new_empty_strided = [](at::Tensor & self, c10::IntArrayRef size, c10::IntArrayRef stride, at::TensorOptions options) -> at::Tensor { + pybind11::gil_scoped_release no_gil; + return self.new_empty_strided(size, stride, options); + }; + return torch::autograd::utils::wrap(dispatch_new_empty_strided(self_, r.intlist(1), r.intlist(2), options).set_requires_grad(r.toBool(7))); + Py_RETURN_NONE; + END_HANDLE_TH_ERRORS +} + +static PyObject * THPVariable_record_stream(PyObject* self, PyObject* args) +{ + HANDLE_TH_ERRORS + PyObject *_tensor, *_stream; + if (!PyArg_ParseTuple(args, "OO", &_tensor, &_stream)) { + throw torch::TypeError("record_stream useage: tensor.record_stream(stream)"); + } + auto& self_ = reinterpret_cast(_tensor)->cdata; + c10_npu::NPUCachingAllocator::recordStream(self_.storage().data_ptr(), c10_npu::NPUStream::unpack(((THNPStream*)_stream)->cdata)); + Py_RETURN_NONE; + END_HANDLE_TH_ERRORS +} + +// autograd methods on torch._C +static PyMethodDef TorchTensorMethods[] = { // NOLINT + {"npu", castPyCFunctionWithKeywords(THPVariable_npu), METH_VARARGS | METH_KEYWORDS, NULL}, + {"to", castPyCFunctionWithKeywords(THPVariable_to), METH_VARARGS | METH_KEYWORDS, NULL}, + {"type", castPyCFunctionWithKeywords(THPVariable_type), METH_VARARGS | METH_KEYWORDS, NULL}, + {"is_npu", castPyCFunctionWithKeywords(THPVariable_is_npu), METH_VARARGS | METH_KEYWORDS, NULL}, + {"record_stream", (PyCFunction)(void(*)(void))THPVariable_record_stream, METH_VARARGS, NULL}, + {"new_empty", castPyCFunctionWithKeywords(THPVariable_new_empty), METH_VARARGS | METH_KEYWORDS, NULL}, + {"new_empty_strided", castPyCFunctionWithKeywords(THPVariable_new_empty_strided), METH_VARARGS | METH_KEYWORDS, NULL}, + {nullptr, nullptr, 0, nullptr} +}; PyMethodDef* tensor_functions() { return TorchTensorMethods; diff --git a/torch_npu/csrc/utils/TensorMethods.h b/torch_npu/csrc/utils/TensorMethods.h index 43f5973cb5be30e890aa5fa08261bf7444b863f4..06d78d69ede2fc04b8ce5a028d590de4b8f2a99c 100644 --- a/torch_npu/csrc/utils/TensorMethods.h +++ b/torch_npu/csrc/utils/TensorMethods.h @@ -19,263 +19,6 @@ namespace torch_npu { namespace utils { -static const char* _backend_to_string_npu(const at::Backend& backend) { - switch (backend) { - case at::Backend::CPU: return "torch"; - case at_npu::key::NativeBackend: return "torch.npu"; - default: AT_ERROR("Unimplemented backend ", backend); - } -} - -std::string _options_to_string_npu(const at::TensorOptions options) { - std::ostringstream ss; - ss << _backend_to_string_npu(options.backend()) << "." << toString(at::typeMetaToScalarType(options.dtype())) << "Tensor"; - return ss.str(); -} - -std::tuple, c10::optional, bool, bool, c10::optional> parse_to_conversion(torch::PythonArgs& r, bool allow_copy); - -void InitNPUWithIndex(c10::DeviceIndex index = -1); - -static at::Tensor dispatch_to(const at::Tensor & self, c10::Device device, bool non_blocking, bool copy, c10::optional optional_memory_format) { - pybind11::gil_scoped_release no_gil; - // NOTE: this is where we record aten::to in the graph during tracing. However, the behavior of aten::to - // is different with respect to TensorOptions fields that are not present: aten::to inherits fields that - // are missing from the self argument while the tracer assumes that they should be populated with the - // default values (eg. float for scalar type). By explicitly copying over the tensor options here we fully - // specify all tensor options and thus record the proper trace - return self.to(self.options().device(device).memory_format(optional_memory_format), non_blocking, copy); -} - -static at::Tensor dispatch_to(const at::Tensor & self, bool non_blocking, bool copy, c10::optional optional_memory_format) { - AutoNoGIL no_gil; - return self.to(self.options().memory_format(optional_memory_format), non_blocking, copy); -} - -static at::Tensor dispatch_to(const at::Tensor & self, c10::ScalarType dtype, bool non_blocking, bool copy, c10::optional optional_memory_format) { - pybind11::gil_scoped_release no_gil; - return self.to(dtype, non_blocking, copy, optional_memory_format); -} - -static at::Tensor dispatch_to(const at::Tensor & self, c10::Device device, c10::ScalarType dtype, bool non_blocking, bool copy, c10::optional optional_memory_format) { - pybind11::gil_scoped_release no_gil; - return self.to(device, dtype, non_blocking, copy, optional_memory_format); -} - -static PyObject * THPVariable_npu(PyObject* self, PyObject* args, PyObject* kwargs) -{ - HANDLE_TH_ERRORS - static torch::PythonArgParser parser({ - "npu(Tensor temp, Device? device=None, bool non_blocking=False, *, MemoryFormat? memory_format=None)", - "npu(Tensor temp, Device? device=None, bool async=False, *, MemoryFormat? memory_format=None)|deprecated" - }); - torch::ParsedArgs<4> parsed_args; - auto r = parser.parse(args, kwargs, parsed_args); - auto self_ = r.tensor(0); - auto local_device = r.isNone(1) ? c10::Device(at_npu::key::NativeDeviceType) : r.device(1); - auto device = c10::Device(at_npu::key::NativeDeviceType, local_device.index()); - auto opt_memory_format = r.memoryformatOptional(3); - TORCH_CHECK((device.type() == at_npu::key::NativeDeviceType), "Invalid device, must be npu device"); - maybe_initialize_npu(device); - return THPVariable_Wrap(dispatch_to(self_, device, r.toBool(2), false, opt_memory_format)); - END_HANDLE_TH_ERRORS -} - -static PyObject * THPVariable_to(PyObject* self, PyObject* args, PyObject* kwargs) -{ - HANDLE_TH_ERRORS - static torch::PythonArgParser parser({ - "to(Tensor temp, Device device=None, ScalarType dtype=None, bool non_blocking=False, bool copy=False, *, MemoryFormat? memory_format=None)", - "to(Tensor temp, ScalarType dtype, bool non_blocking=False, bool copy=False, *, MemoryFormat? memory_format=None)", - "to(Tensor temp, Tensor tensor, bool non_blocking=False, bool copy=False, *, MemoryFormat? memory_format=None)", - }); - torch::ParsedArgs<6> parsed_args; - auto r = parser.parse(args, kwargs, parsed_args); - if (r.has_torch_function()) { - return torch::handle_torch_function(r, args, kwargs, THPVariableClass, "torch.Tensor"); - } - auto parsed = torch_npu::utils::parse_to_conversion(r, true); - auto self_ = std::get<0>(parsed); - auto& device = std::get<1>(parsed); - auto& scalarType = std::get<2>(parsed); - auto non_blocking = std::get<3>(parsed); - auto copy = std::get<4>(parsed); - auto opt_memory_format = std::get<5>(parsed); - - maybe_initialize_npu(device); - if (!device && !scalarType && !copy && !opt_memory_format.has_value()) { - Py_INCREF(self); - return THPVariable_Wrap(self_); - } else if (!device && !scalarType) { - return THPVariable_Wrap( - dispatch_to(self_, non_blocking, copy, opt_memory_format)); - } else if (!device) { - return THPVariable_Wrap(dispatch_to(self_, *scalarType, non_blocking, copy, opt_memory_format)); - } else if (!scalarType) { - return THPVariable_Wrap(dispatch_to(self_, *device, non_blocking, copy, opt_memory_format)); - } else { - return THPVariable_Wrap(dispatch_to(self_, *device, *scalarType, non_blocking, copy, opt_memory_format)); - } - Py_RETURN_NONE; - END_HANDLE_TH_ERRORS -} - -static PyObject * THPVariable_type(PyObject* self, PyObject* args, PyObject* kwargs) -{ - HANDLE_TH_ERRORS - static torch::PythonArgParser parser({ - "type(Tensor temp, PyObject* dtype=None, bool non_blocking=False, *, MemoryFormat? memory_format=None)", - "type(Tensor temp, PyObject* dtype=None, bool async=False, *, MemoryFormat? memory_format=None)|deprecated" - }); - - torch::ParsedArgs<4> parsed_args; - auto r = parser.parse(args, kwargs, parsed_args); - auto self_ = r.tensor(0); - if(r.has_torch_function()){ - return torch::handle_torch_function(r, args, kwargs, THPVariableClass, "torch.Tensor"); - } - - if (r.isNone(1)) { - return THPUtils_packString(_options_to_string_npu(self_.options())); - } - auto obj = r.pyobject(1); - auto opt_memory_format = r.memoryformatOptional(3); - std::string type_name; - bool is_dtype = false; - if (PyType_Check(obj)) { - if (obj == THPVariableClass) { - type_name = "torch.Tensor"; - } else { - type_name = ((PyTypeObject*)obj)->tp_name; - } - } else if (THPUtils_checkString(obj)) { - type_name = THPUtils_unpackString(obj); - } else if (THPDtype_Check(obj)) { - is_dtype = true; - } else { - throw torch::TypeError("dtype must be a type, str, or dtype object"); - } - c10::ScalarType scalar_type; - c10::Device device = self_.device(); - if (is_dtype) { - scalar_type = r.scalartype(1); - } else { - at::TensorOptions options = torch::utils::options_from_string(type_name); - scalar_type = at::typeMetaToScalarType(options.dtype()); - auto device_type = options.device().type(); - if (device_type != device.type()) { - device = at::Device(device_type); - } - } - maybe_initialize_npu(device); - return THPVariable_Wrap(dispatch_to(self_, device, scalar_type, r.toBool(1), false, opt_memory_format)); - END_HANDLE_TH_ERRORS -} - -static PyObject * THPVariable_is_npu(PyObject* self, PyObject* args, PyObject* kwargs) -{ - HANDLE_TH_ERRORS - static torch::PythonArgParser parser({ - "type(Tensor temp)" - }); - torch::ParsedArgs<1> parsed_args; - auto r = parser.parse(args, kwargs, parsed_args); - auto self_ = r.tensor(0); - return torch::autograd::utils::wrap(at_npu::key::isDeviceTensor(self_)); - END_HANDLE_TH_ERRORS -} - -static PyObject * THPVariable_new_empty(PyObject* self, PyObject* args, PyObject* kwargs) -{ - HANDLE_TH_ERRORS - static torch::PythonArgParser parser( - { - "new_empty(Tensor self, IntArrayRef size, *, ScalarType dtype=None, " - "Layout layout=torch.strided, Device device=None, bool " - "pin_memory=False, bool requires_grad=False)", - }, - true); - torch::ParsedArgs<7> parsed_args; - auto r = parser.parse(args, kwargs, parsed_args); - auto self_ = r.tensor(0); - if (r.has_torch_function()) { - return torch::handle_torch_function(r, args, kwargs, THPVariableClass, "torch.Tensor"); - } - auto device = at_npu::key::parse_npu_device_with_default(r.args[4], self_.device()); - maybe_initialize_npu(device); - const auto options = at::TensorOptions() - .dtype(r.scalartypeWithDefault(2, self_.scalar_type())) - .device(device) - .layout(r.layoutWithDefault(3, c10::layout_from_backend(self_.options().backend()))) - .requires_grad(r.toBool(6)) - .pinned_memory(r.toBool(5)); - auto dispatch_new_empty = [](at::Tensor & self, c10::IntArrayRef size, at::TensorOptions options) -> at::Tensor { - pybind11::gil_scoped_release no_gil; - return self.new_empty(size, options); - }; - return torch::autograd::utils::wrap(dispatch_new_empty(self_, r.intlist(1), options).set_requires_grad(r.toBool(6))); - Py_RETURN_NONE; - END_HANDLE_TH_ERRORS -} - -static PyObject * THPVariable_new_empty_strided(PyObject* self_, PyObject* args, PyObject* kwargs) -{ - HANDLE_TH_ERRORS - static torch::PythonArgParser parser( - { - "new_empty_strided(Tensor self, IntArrayRef size, IntArrayRef " - "stride, *, ScalarType dtype=None, Layout layout=torch.strided, " - "Device device=None, bool pin_memory=False, bool " - "requires_grad=False)", - }, - true); - torch::ParsedArgs<8> parsed_args; - auto r = parser.parse(args, kwargs, parsed_args); - auto self_ = r.tensor(0); - if (r.has_torch_function()) { - return torch::handle_torch_function(r, args, kwargs, THPVariableClass, "torch.Tensor"); - } - auto device = at_npu::key::parse_npu_device_with_default(r.args[5], self_.device()); - maybe_initialize_npu(device); - const auto options = at::TensorOptions() - .dtype(r.scalartypeWithDefault(3, self_.scalar_type())) - .device(device) - .layout(r.layoutWithDefault(4, c10::layout_from_backend(self_.options().backend()))) - .requires_grad(r.toBool(7)) - .pinned_memory(r.toBool(6)); - auto dispatch_new_empty_strided = [](at::Tensor & self, c10::IntArrayRef size, c10::IntArrayRef stride, at::TensorOptions options) -> at::Tensor { - pybind11::gil_scoped_release no_gil; - return self.new_empty_strided(size, stride, options); - }; - return torch::autograd::utils::wrap(dispatch_new_empty_strided(self_, r.intlist(1), r.intlist(2), options).set_requires_grad(r.toBool(7))); - Py_RETURN_NONE; - END_HANDLE_TH_ERRORS -} - -static PyObject * THPVariable_record_stream(PyObject* self, PyObject* args) -{ - HANDLE_TH_ERRORS - PyObject *_tensor, *_stream; - if (!PyArg_ParseTuple(args, "OO", &_tensor, &_stream)) { - throw torch::TypeError("record_stream useage: tensor.record_stream(stream)"); - } - auto& self_ = reinterpret_cast(_tensor)->cdata; - c10_npu::NPUCachingAllocator::recordStream(self_.storage().data_ptr(), c10_npu::NPUStream::unpack(((THNPStream*)_stream)->cdata)); - Py_RETURN_NONE; - END_HANDLE_TH_ERRORS -} - -// autograd methods on torch._C -static PyMethodDef TorchTensorMethods[] = { // NOLINT - {"npu", castPyCFunctionWithKeywords(THPVariable_npu), METH_VARARGS | METH_KEYWORDS, NULL}, - {"to", castPyCFunctionWithKeywords(THPVariable_to), METH_VARARGS | METH_KEYWORDS, NULL}, - {"type", castPyCFunctionWithKeywords(THPVariable_type), METH_VARARGS | METH_KEYWORDS, NULL}, - {"is_npu", castPyCFunctionWithKeywords(THPVariable_is_npu), METH_VARARGS | METH_KEYWORDS, NULL}, - {"record_stream", (PyCFunction)(void(*)(void))THPVariable_record_stream, METH_VARARGS, NULL}, - {"new_empty", castPyCFunctionWithKeywords(THPVariable_new_empty), METH_VARARGS | METH_KEYWORDS, NULL}, - {"new_empty_strided", castPyCFunctionWithKeywords(THPVariable_new_empty_strided), METH_VARARGS | METH_KEYWORDS, NULL}, - {nullptr, nullptr, 0, nullptr} -}; PyMethodDef* tensor_functions();