diff --git a/torch_npu/csrc/aten/ops/PdistKernelNpu.cpp b/torch_npu/csrc/aten/ops/PdistKernelNpu.cpp index c6db543662ca177e69ee18c175f24158c37436fb..92769b600b2ac4e8fa0d6117e1c5b43226a897a0 100644 --- a/torch_npu/csrc/aten/ops/PdistKernelNpu.cpp +++ b/torch_npu/csrc/aten/ops/PdistKernelNpu.cpp @@ -12,59 +12,18 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "torch_npu/csrc/framework/utils/OpAdapter.h" -#include "torch_npu/csrc/framework/utils/CalcuOpUtil.h" #include "torch_npu/csrc/aten/NPUNativeFunctions.h" +#include "op_plugin/ops/OpInterface.h" namespace at_npu { namespace native { - -at::Tensor& pdist_out_npu_nocheck( - at::Tensor& result, - const at::Tensor& self, - float p) { - OpCommand cmd; - cmd.Name("Pdist") - .Input(self) - .Attr("p", p) - .Output(result) - .Run(); - - return result; -} - at::Tensor NPUNativeFunctions::_pdist_forward(const at::Tensor& self, double p) { - at::Tensor result; - if (self.size(0) <= 1) { - result = OpPreparation::ApplyTensor(self, {0}); - } else { - // double is not supported in NPU, type of P needs to be converted from double to float. - float p_float; - if (std::isinf(p)) { - p_float = std::numeric_limits::infinity(); - } else { - TORCH_CHECK(p <= std::numeric_limits::max(), "npu dose not support float64" ); - p_float = static_cast(p); - } - auto outputSize = pdist_npu_output_size(self, p_float); - result = OpPreparation::ApplyTensor(self, outputSize); - if(self.size(1) == 0){ - result.fill_(0); - } else { - pdist_out_npu_nocheck(result, self, p_float); - } - } - return result; + return op_plugin::_pdist_forward(self, p); } at::Tensor NPUNativeFunctions::pdist(const at::Tensor& self, double p) { - TORCH_CHECK(self.dim() == 2, - "pdist only supports 2D tensors, got: ", self.dim(), "D"); - TORCH_CHECK(at::isFloatingType(self.scalar_type()), "pdist only supports floating-point dtypes"); - TORCH_CHECK(p >= 0, "pdist only supports non-negative p values"); - - return at::_pdist_forward(self, p); + return op_plugin::pdist(self, p); } } // namespace native -} // namespace at_npu \ No newline at end of file +} // namespace at_npu diff --git a/torch_npu/csrc/aten/ops/PreluKernelNpu.cpp b/torch_npu/csrc/aten/ops/PreluKernelNpu.cpp index 2b9baa4be899f8383813e837634c2e147a60e210..a7ec42d3fb3ee8fb5524b566389067460f9391ca 100644 --- a/torch_npu/csrc/aten/ops/PreluKernelNpu.cpp +++ b/torch_npu/csrc/aten/ops/PreluKernelNpu.cpp @@ -12,27 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "torch_npu/csrc/framework/utils/OpAdapter.h" #include "torch_npu/csrc/aten/NPUNativeFunctions.h" +#include "op_plugin/ops/OpInterface.h" + namespace at_npu { namespace native { at::Tensor NPUNativeFunctions::prelu(const at::Tensor& self, const at::Tensor& weight_) { - auto input = self.contiguous(); - auto weight = weight_.contiguous(); - - // calculate the output size - auto outputSize = input_same_output_size(self); - at::Tensor result = OpPreparation::ApplyTensor(input, outputSize); - - OpCommand cmd; - cmd.Name("PRelu") - .Input(self) - .Input(weight) - .Output(result) - .Run(); - return result; + return op_plugin::prelu(self, weight_); } } // namespace native -} // namespace at_npu \ No newline at end of file +} // namespace at_npu diff --git a/torch_npu/csrc/aten/ops/ProdKernelNpu.cpp b/torch_npu/csrc/aten/ops/ProdKernelNpu.cpp index 388f130f43142999980f3a7c31b0c4d545ac8b7a..79f1087a871d3cc9b33aaac3b44980675c0477f5 100644 --- a/torch_npu/csrc/aten/ops/ProdKernelNpu.cpp +++ b/torch_npu/csrc/aten/ops/ProdKernelNpu.cpp @@ -14,88 +14,28 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "torch_npu/csrc/framework/utils/OpAdapter.h" -#include "torch_npu/csrc/framework/utils/CalcuOpUtil.h" #include "torch_npu/csrc/aten/NPUNativeFunctions.h" +#include "op_plugin/ops/OpInterface.h" namespace at_npu { namespace native { -namespace { -static inline int64_t calculate_prod_output_format( - const at::Tensor& self, - at::IntArrayRef size) { - int64_t npu_format = CalcuOpUtil::GetTensorNpuFormat(self); - // scalar scene no support nz - if (size.empty()) { - npu_format = ACL_FORMAT_ND; - } - return npu_format; -} -} - -at::Tensor& prod_out_npu_nocheck( - at::Tensor& result, - const at::Tensor& self, - c10::SmallVector dimList, - bool keepdim, - c10::optional dtype) { - OpCommand cmd; - cmd.Name("ReduceProd") - .Input(self) - .Input(dimList) - .Output(result) - .Attr("keep_dims", keepdim) - .Run(); - - return result; -} - at::Tensor& NPUNativeFunctions::prod_out( const at::Tensor& self, int64_t dim, bool keepdim, c10::optional dtype, at::Tensor& result) { - at::Tensor self_tmp = self; - // fp16 transform:fp32 for precise - if (self.scalar_type() == at::ScalarType::Half) { - self_tmp = NPUNativeFunctions::npu_dtype_cast(self, at::ScalarType::Float); - } - - auto outputSize = prod_npu_output_size(self, dim, keepdim); - at::ScalarType dstType = dtype.has_value() ? dtype.value() : self.scalar_type(); - - OpPreparation::CheckOut( - {self_tmp}, - result, - ACL_FORMAT_ND, - dstType, - outputSize); - - at::Tensor result_tmp = result; - if (result_tmp.scalar_type() == at::ScalarType::Half) { - result_tmp = NPUNativeFunctions::npu_dtype_cast(result_tmp, at::ScalarType::Float); - } - - prod_out_npu_nocheck(result_tmp, self_tmp, {dim}, keepdim, dtype); - - if (result_tmp.scalar_type() != dstType) { - result_tmp = NPUNativeFunctions::npu_dtype_cast(result_tmp, dstType); - } - result.copy_(result_tmp); - - return result; + return op_plugin::prod_out(self, dim, keepdim, dtype, result); } -at::Tensor& NPUNativeFunctions::prod_out( - const at::Tensor& self, +at::Tensor& NPUNativeFunctions::prod_out( const at::Tensor& self, at::Dimname dim, bool keepdim, c10::optional dtype, at::Tensor& result) { - return prod_out( - self, dimname_to_position(self, dim), keepdim, dtype, result); + return op_plugin::prod_out(self, dim, keepdim, dtype, result); + } at::Tensor NPUNativeFunctions::prod( @@ -103,31 +43,7 @@ at::Tensor NPUNativeFunctions::prod( int64_t dim, bool keepdim, c10::optional dtype) { - at::Tensor self_tmp = self; - // Input transform:fp16 to fp32 - if (self.scalar_type() == at::ScalarType::Half) { - self_tmp = NPUNativeFunctions::npu_dtype_cast(self, at::ScalarType::Float); - } - - at::ScalarType dstType = dtype.has_value() ? dtype.value() : self.scalar_type(); - - // calculate the output size - auto outputSize = prod_npu_output_size(self_tmp, dim, keepdim); - - int64_t npu_format = calculate_prod_output_format(self_tmp, outputSize); - - // construct the output tensor of the NPU - at::Tensor result = OpPreparation::ApplyTensorWithFormat( - outputSize, self_tmp.options(), npu_format); - - // calculate the output result of the NPU - prod_out_npu_nocheck(result, self_tmp, {dim}, keepdim, dtype); - - if (result.scalar_type() != dstType) { - result = NPUNativeFunctions::npu_dtype_cast(result, dstType); - } - - return result; + return op_plugin::prod(self, dim, keepdim, dtype); } at::Tensor NPUNativeFunctions::prod( @@ -135,36 +51,11 @@ at::Tensor NPUNativeFunctions::prod( at::Dimname dim, bool keepdim, c10::optional dtype) { - return prod(self, dimname_to_position(self, dim), keepdim, dtype); + return op_plugin::prod(self, dim, keepdim, dtype); } at::Tensor NPUNativeFunctions::prod(const at::Tensor& self, c10::optional dtype) { - at::Tensor self_tmp = self; - // Input transform:fp16 to fp32 - if (self.scalar_type() == at::ScalarType::Half) { - self_tmp = NPUNativeFunctions::npu_dtype_cast(self, at::ScalarType::Float); - } - - at::ScalarType dstType = dtype.has_value() ? dtype.value() : self.scalar_type(); - - // calculate the output size - auto outputSize = prod_npu_output_size(self, false); - - int64_t npu_format = calculate_prod_output_format(self, outputSize); - - // construct the output tensor of the NPU - at::Tensor result = OpPreparation::ApplyTensorWithFormat( - outputSize, self_tmp.options(), npu_format); - - // calculate the output result of the NPU - prod_out_npu_nocheck( - result, self_tmp, CalcuOpUtil::GetDimlistForTensor(self), false, dtype); - - if (result.scalar_type() != dstType) { - result = npu_dtype_cast(result, dstType); - } - - return result; + return op_plugin::prod(self, dtype); } } // namespace native -} // namespace at_npu \ No newline at end of file +} // namespace at_npu diff --git a/torch_npu/csrc/aten/ops/PsRoiPoolingKernelNpu.cpp b/torch_npu/csrc/aten/ops/PsRoiPoolingKernelNpu.cpp index 243dc3f9866152062ae3891cbe5c0158f23ed4d4..f0722a0e057b9dacff12e7da149cb47654ce89af 100644 --- a/torch_npu/csrc/aten/ops/PsRoiPoolingKernelNpu.cpp +++ b/torch_npu/csrc/aten/ops/PsRoiPoolingKernelNpu.cpp @@ -16,76 +16,11 @@ #include -#include "torch_npu/csrc/framework/utils/OpAdapter.h" #include "torch_npu/csrc/aten/NPUNativeFunctions.h" +#include "op_plugin/ops/OpInterface.h" namespace at_npu { namespace native { -using torch::autograd::AutogradContext; -using tensor_list = std::vector; -at::Tensor& ps_roi_pooling_npu_nocheck( - at::Tensor& result, - const at::Tensor& self, - const at::Tensor& rois, - double spatial_scale, - int64_t group_size, - int64_t output_dim) { - OpCommand cmd; - cmd.Name("PSROIPoolingV2") - .Input(self, "x", ACL_FORMAT_NCHW) - .Input(rois) - .Output(result, "y", ACL_FORMAT_NCHW) - .Attr("spatial_scale", (float)spatial_scale) - .Attr("output_dim", output_dim) - .Attr("group_size", group_size) - .Run(); - - return result; -} - -at::Tensor ps_roi_pooling( - const at::Tensor& self, - const at::Tensor& rois, - double spatial_scale, - int64_t group_size, - int64_t output_dim) { - auto outputSize ={ - rois.size(0) * rois.size(2), output_dim, group_size, group_size}; - - at::Tensor result = OpPreparation::ApplyTensor(self, outputSize); - - ps_roi_pooling_npu_nocheck( - result, - self, - rois, - spatial_scale, - group_size, - output_dim); - - return result; -} - -at::Tensor& ps_roi_pooling_backward_npu_nocheck( - at::Tensor& input_grad, - const at::Tensor& output_grad, - const at::Tensor& rois, - double spatial_scale, - int64_t group_size, - int64_t output_dim, - at::IntArrayRef input_size) { - OpCommand cmd; - cmd.Name("PSROIPoolingGradV2D") - .Input(output_grad, "x", ACL_FORMAT_NCHW) - .Input(rois) - .Output(input_grad, "y", ACL_FORMAT_NCHW) - .Attr("spatial_scale", (float)spatial_scale) - .Attr("group_size", group_size) - .Attr("output_dim", output_dim) - .Attr("input_size", input_size) - .Run(); - - return input_grad; -} at::Tensor NPUNativeFunctions::npu_ps_roi_pooling_backward( const at::Tensor& output_grad, @@ -94,75 +29,16 @@ at::Tensor NPUNativeFunctions::npu_ps_roi_pooling_backward( int64_t group_size, int64_t output_dim, at::IntArrayRef input_size) { - auto outputSize ={ - rois.size(0), group_size * group_size * output_dim, input_size[0], input_size[1]}; - - at::Tensor input_grad = OpPreparation::ApplyTensor(output_grad, outputSize); - - ps_roi_pooling_backward_npu_nocheck( - input_grad, - output_grad, - rois, - spatial_scale, - group_size, - output_dim, - input_size); - - return input_grad; + return op_plugin::npu_ps_roi_pooling_backward(output_grad, rois, spatial_scale, group_size, output_dim, input_size); } -class NPUPsRoiPoolingFunction: public torch::autograd::Function { -public: - static at::Tensor forward(AutogradContext *ctx, - const at::Tensor& self, - const at::Tensor& rois, - double spatial_scale, - int64_t group_size, - int64_t output_dim) { - ctx->saved_data["spatial_scale"] = spatial_scale; - ctx->saved_data["group_size"] = group_size; - ctx->saved_data["output_dim"] = output_dim; - c10::SmallVector input_size_vec = {self.size(2), self.size(3)}; - at::IntArrayRef input_size(input_size_vec); - ctx->saved_data["input_size"] = input_size; - at::AutoNonVariableTypeMode g; - ctx->save_for_backward({self, rois}); - return ps_roi_pooling(self, rois, spatial_scale, group_size, output_dim); - } - - static tensor_list backward(AutogradContext *ctx, - tensor_list grad_outputs) { - auto spatial_scale = ctx->saved_data["spatial_scale"].toDouble(); - auto group_size = ctx->saved_data["group_size"].toInt(); - auto output_dim = ctx->saved_data["output_dim"].toInt(); - auto input_size = ctx->saved_data["input_size"].toIntVector(); - auto saved = ctx->get_saved_variables(); - auto self = saved[0]; - auto rois = saved[1]; - - - at::Tensor result = NPUNativeFunctions::npu_ps_roi_pooling_backward(grad_outputs[0], - rois, - spatial_scale, - group_size, - output_dim, - input_size); - tensor_list output = {result, - at::Tensor(), - at::Tensor(), - at::Tensor(), - at::Tensor()}; - return output; - } -}; - at::Tensor NPUNativeFunctions::npu_ps_roi_pooling(const at::Tensor& self, const at::Tensor& rois, double spatial_scale, int64_t group_size, int64_t output_dim) { - return NPUPsRoiPoolingFunction::apply(self, rois, spatial_scale, group_size, output_dim); + return op_plugin::npu_ps_roi_pooling(self, rois, spatial_scale, group_size, output_dim); } } // namespace native -} // namespace at_npu \ No newline at end of file +} // namespace at_npu diff --git a/torch_npu/csrc/aten/ops/PutKernelNpu.cpp b/torch_npu/csrc/aten/ops/PutKernelNpu.cpp index 7a880624cdc2a55ec9d331c10d40a55344c1e8da..1cd3882bdb37809eac99661b34efb16019510dea 100644 --- a/torch_npu/csrc/aten/ops/PutKernelNpu.cpp +++ b/torch_npu/csrc/aten/ops/PutKernelNpu.cpp @@ -12,9 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "torch_npu/csrc/framework/utils/OpAdapter.h" -#include "torch_npu/csrc/framework/utils/CalcuOpUtil.h" #include "torch_npu/csrc/aten/NPUNativeFunctions.h" +#include "op_plugin/ops/OpInterface.h" namespace at_npu { namespace native { @@ -24,29 +23,7 @@ at::Tensor& NPUNativeFunctions::put_( const at::Tensor& index, const at::Tensor& source, bool accumulate) { - TORCH_CHECK(index.numel() == source.numel(), "source should have the same number of elements as index"); - if (source.numel() == 0) { - return self; - } - c10::SmallVector inputs = {self}; - c10::SmallVector outputs = {self}; - CalcuOpUtil::CheckMemoryOverLaps(inputs, outputs); - - at::Tensor selfFlatten = NpuUtils::format_contiguous(self.reshape(-1)); - at::Tensor indexFlatten = index.reshape({-1, 1}); - at::Tensor sourceFlatten = source.reshape(-1); - - OpCommand cmd; - accumulate ? cmd.Name("ScatterNdAdd") : cmd.Name("ScatterNdUpdate"); - cmd.Input(selfFlatten) - .Input(indexFlatten) - .Input(sourceFlatten) - .Output(selfFlatten) - .Attr("use_locking", false) - .Run(); - - self.copy_(selfFlatten); - return self; + return op_plugin::put_(self, index, source, accumulate); } } // namespace native -} // namespace at_npu \ No newline at end of file +} // namespace at_npu