diff --git a/torch_npu/csrc/aten/ops/LeKernelNpu.cpp b/torch_npu/csrc/aten/ops/LeKernelNpu.cpp index e3652715e118c5621c3b8b0aa50000bda1eefbb5..39de07d8efffc38cd59b18786b7d1d11626a6880 100644 --- a/torch_npu/csrc/aten/ops/LeKernelNpu.cpp +++ b/torch_npu/csrc/aten/ops/LeKernelNpu.cpp @@ -13,121 +13,34 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "op_plugin/ops/OpInterface.h" #include "torch_npu/csrc/aten/NPUNativeFunctions.h" namespace at_npu { namespace native { -at::Tensor& le_out_npu_nocheck(const at::Tensor& self, at::Scalar other, at::Tensor& result) { - OpCommand cmd; - cmd.Name("LessEqual") - .Input(self) - .Input(other, self.scalar_type()) - .Output(result) - .Run(); - - return result; -} - at::Tensor& NPUNativeFunctions::le_out(const at::Tensor& self, const at::Scalar& other, at::Tensor& result) { - at::Tensor formatCastOfSelf = OpPreparation::CastBackToOriFormat(self); - auto outputSize = formatCastOfSelf.sizes(); - OpPreparation::CheckOut( - {self}, - result, - ACL_FORMAT_ND, - result.scalar_type(), - outputSize); - - le_out_npu_nocheck(formatCastOfSelf, other, result); - return result; -} - -at::Tensor& le_out_npu_nocheck(const at::Tensor& self, const at::Tensor& other, at::Tensor& result) { - auto unified_result = OpPreparation::comparison_op_check(result, self, other, true); - OpCommand cmd; - cmd.Name("LessEqual") - .Expect(unified_result) - .Input(self) - .Input(other) - .Output(result) - .Run(); - - return result; + return op_plugin::le_out(self, other, result); } at::Tensor& NPUNativeFunctions::le_out(const at::Tensor& self, const at::Tensor& other, at::Tensor& result) { - at::Tensor formatCastOfSelf = OpPreparation::CastBackToOriFormat(self); - at::Tensor formatCastOfOther = OpPreparation::CastBackToOriFormat(other); - auto outputSize = broadcast_ops_npu_output_size(formatCastOfSelf, formatCastOfOther); - - OpPreparation::CheckOut( - {self}, - result, - ACL_FORMAT_ND, - result.scalar_type(), - outputSize); - - le_out_npu_nocheck(formatCastOfSelf, formatCastOfOther, result); - return result; + return op_plugin::le_out(self, other, result); } at::Tensor NPUNativeFunctions::le(const at::Tensor& self, const at::Scalar& other) { - at::Tensor formatCastOfSelf = OpPreparation::CastBackToOriFormat(self); - at::Tensor result = OpPreparation::ApplyTensorWithFormat( - formatCastOfSelf.sizes(), - formatCastOfSelf.options().dtype(at::kBool), - ACL_FORMAT_ND); - le_out_npu_nocheck(formatCastOfSelf, other, result); - return result; + return op_plugin::le(self, other); } at::Tensor NPUNativeFunctions::le(const at::Tensor& self, const at::Tensor& other) { - at::Tensor formatCastOfSelf = OpPreparation::CastBackToOriFormat(self); - at::Tensor formatCastOfOther = OpPreparation::CastBackToOriFormat(other); - - auto outputSize = broadcast_ops_npu_output_size(formatCastOfSelf, formatCastOfOther); - at::Tensor result = OpPreparation::ApplyTensorWithFormat( - outputSize, - formatCastOfSelf.options().dtype(at::kBool), - ACL_FORMAT_ND); - - le_out_npu_nocheck(formatCastOfSelf, formatCastOfOther, result); - return result; + return op_plugin::le(self, other); } at::Tensor& NPUNativeFunctions::le_(at::Tensor& self, const at::Scalar& other) { - OpPreparation::CastBackToOriFormat(self); - OpPreparation::CheckMemory({self}, {self}); - at::Tensor result = OpPreparation::ApplyTensor( - self, - self.options().dtype(at::ScalarType::Byte)); - if (!NpuUtils::check_match(&self)) { - at::Tensor contiguousSelf = NpuUtils::format_contiguous(self); - le_out_npu_nocheck(contiguousSelf, other, result); - } else { - le_out_npu_nocheck(self, other, result); - } - self.copy_(result); - return self; + return op_plugin::le_(self, other); } at::Tensor& NPUNativeFunctions::le_(at::Tensor& self, const at::Tensor& other) { - OpPreparation::CastBackToOriFormat(self); - at::Tensor ori_other = OpPreparation::CastBackToOriFormat(other); - OpPreparation::CheckMemory({self, ori_other}, {self}); - at::Tensor result = OpPreparation::ApplyTensor( - self, - self.options().dtype(at::ScalarType::Byte)); - if (!NpuUtils::check_match(&self)) { - at::Tensor contiguousSelf = NpuUtils::format_contiguous(self); - le_out_npu_nocheck(contiguousSelf, ori_other, result); - } else { - le_out_npu_nocheck(self, ori_other, result); - } - self.copy_(result); - return self; + return op_plugin::le_(self, other); } } // namespace native diff --git a/torch_npu/csrc/aten/ops/NmsRotatedKernelNpu.cpp b/torch_npu/csrc/aten/ops/NmsRotatedKernelNpu.cpp index 31a8b2c38e18fbfc3e346609a1c07382fd46cb7b..0078b6fd5cb2783ee93a0ea85ba67c2341066b22 100644 --- a/torch_npu/csrc/aten/ops/NmsRotatedKernelNpu.cpp +++ b/torch_npu/csrc/aten/ops/NmsRotatedKernelNpu.cpp @@ -14,45 +14,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "op_plugin/ops/OpInterface.h" #include "torch_npu/csrc/aten/NPUNativeFunctions.h" namespace at_npu { namespace native { -tuple NPUNativeFunctions::npu_nms_rotated(const at::Tensor& dets, const at::Tensor& scores, +std::tuple NPUNativeFunctions::npu_nms_rotated(const at::Tensor& dets, const at::Tensor& scores, double iouThreshold, double scoreThreshold, int64_t maxOutputSize, int64_t mode) { - // the Op only support fp32 currently! - auto originDtype = dets.scalar_type(); - at::Tensor detsCast = dets; - at::Tensor scoresCast = scores; - at::Tensor labels = at::zeros({}, scores.options().dtype(at::kInt)); - if (originDtype != at::ScalarType::Float) { - detsCast = NPUNativeFunctions::npu_dtype_cast(dets, at::kFloat); - scoresCast = NPUNativeFunctions::npu_dtype_cast(scores, at::kFloat); - } - c10::SmallVector selectedIndexSize = {dets.size(0)}; - at::Tensor selectedBox = OpPreparation::ApplyTensor(detsCast); - at::Tensor selectedIndex = OpPreparation::ApplyTensor(selectedIndexSize, dets.options().dtype(at::kInt), dets); - - c10::SmallVector output_sync_idx = {0, 1}; - OpCommand cmd; - cmd.Sync(output_sync_idx) - .Name("RotatedNMS") - .Input(detsCast) - .Input(scoresCast) - .Input(labels) - .Output(selectedBox) - .Output(selectedIndex) - .Attr("iou_threshold", (float)iouThreshold) - .Attr("score_threshold", (float)scoreThreshold) - .Attr("max_output_size", maxOutputSize) - .Attr("mode", mode) - .Run(); - - at::Tensor selectedNum = - OpPreparation::ApplyTensor({1}, scores.options().dtype(at::kInt), scores).fill_(selectedIndex.size(0)); - return std::tie(selectedIndex, selectedNum); + return op_plugin::npu_nms_rotated(dets, scores, iouThreshold, scoreThreshold, maxOutputSize, mode); } } // namespace native diff --git a/torch_npu/csrc/aten/ops/NmsV4KernelNpu.cpp b/torch_npu/csrc/aten/ops/NmsV4KernelNpu.cpp index 021004234ac392967e998240fe2a0e5916628474..fcdb65ec0dfd1ea647ce35768af68d0a7f93030d 100644 --- a/torch_npu/csrc/aten/ops/NmsV4KernelNpu.cpp +++ b/torch_npu/csrc/aten/ops/NmsV4KernelNpu.cpp @@ -12,68 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "torch_npu/csrc/framework/utils/OpAdapter.h" -#include "torch_npu/csrc/framework/utils/CalcuOpUtil.h" +#include "op_plugin/ops/OpInterface.h" #include "torch_npu/csrc/aten/NPUNativeFunctions.h" namespace at_npu { namespace native { - -tuple nms_v4_npu_nocheck( - const at::Tensor& self, - const at::Tensor& scores, - at::Scalar max_output_size, - const at::Tensor& iou_threshold, - const at::Tensor& scores_threshold, - bool pad_to_max_output_size, - at::Tensor& selected_indices, - at::Tensor& valid_outputs) { - at::Tensor max_output_size_tensor = OpPreparation::ApplyTensor( - {}, self.options().dtype(at::kInt), self).fill_(max_output_size); - OpCommand cmd; - cmd.Name("NonMaxSuppressionV4") - .Input(self) - .Input(scores) - .Input(max_output_size_tensor) - .Input(iou_threshold) - .Input(scores_threshold) - .Output(selected_indices) - .Output(valid_outputs) - .Attr("pad_to_max_output_size", pad_to_max_output_size) - .Run(); - - return std::tuple(selected_indices, valid_outputs); -} - -tuple NPUNativeFunctions::npu_nms_v4( +std::tuple NPUNativeFunctions::npu_nms_v4( const at::Tensor& self, const at::Tensor& scores, const at::Scalar& max_output_size, const at::Tensor& iou_threshold, const at::Tensor& scores_threshold, bool pad_to_max_output_size) { - auto outputSizes = nms_v4_npu_output_size(max_output_size); - - at::Tensor selected_indices = OpPreparation::ApplyTensor( - std::get<0>(outputSizes), - self.options().dtype(at::kInt), - self); - at::Tensor valid_outputs = OpPreparation::ApplyTensor( - std::get<1>(outputSizes), - self.options().dtype(at::kInt), - self); - - nms_v4_npu_nocheck( - self, - scores, - max_output_size, - iou_threshold, - scores_threshold, - pad_to_max_output_size, - selected_indices, - valid_outputs); - - return std::tuple(selected_indices, valid_outputs); + return op_plugin::npu_nms_v4(self, scores, max_output_size, iou_threshold, scores_threshold, pad_to_max_output_size); } } // namespace native diff --git a/torch_npu/csrc/aten/ops/NmsWithMaskKernelNpu.cpp b/torch_npu/csrc/aten/ops/NmsWithMaskKernelNpu.cpp index 8cb75fd1670d8dbb265513177c1b823808a363a4..900af922781b52568b836c59f8e957b4fbc81797 100644 --- a/torch_npu/csrc/aten/ops/NmsWithMaskKernelNpu.cpp +++ b/torch_npu/csrc/aten/ops/NmsWithMaskKernelNpu.cpp @@ -14,40 +14,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "torch_npu/csrc/framework/utils/OpAdapter.h" -#include "torch_npu/csrc/framework/utils/CalcuOpUtil.h" +#include "op_plugin/ops/OpInterface.h" #include "torch_npu/csrc/aten/NPUNativeFunctions.h" namespace at_npu { namespace native { - -tuple nms_with_mask_npu_nocheck( - const at::Tensor& input, - at::Scalar iou_threshold, - at::Tensor& boxes, - at::Tensor& idx, - at::Tensor& mask) { - float iouThresholdValue = CalcuOpUtil::GetScalarFloatValue(iou_threshold); - OpCommand cmd; - cmd.Name("NMSWithMask") - .Input(input) - .Output(boxes) - .Output(idx) - .Output(mask) - .Attr("iou_threshold", iouThresholdValue) - .Run(); - return std::tuple(boxes, idx, mask); -} - -tuple NPUNativeFunctions::npu_nms_with_mask( +std::tuple NPUNativeFunctions::npu_nms_with_mask( const at::Tensor& input, const at::Scalar& iou_threshold) { - auto outputSizes = nms_with_mask_npu_output_size(input); - at::Tensor boxes = OpPreparation::ApplyTensor(input, std::get<0>(outputSizes)); - at::Tensor idx = OpPreparation::ApplyTensor(std::get<1>(outputSizes), input.options().dtype(at::kInt), input); - at::Tensor mask = OpPreparation::ApplyTensor(std::get<2>(outputSizes), input.options().dtype(at::kByte), input); - nms_with_mask_npu_nocheck(input, iou_threshold, boxes, idx, mask); - return std::tuple(boxes, idx, mask); + return op_plugin::npu_nms_with_mask(input, iou_threshold); } } // namespace native diff --git a/torch_npu/csrc/aten/ops/pooling/AvgPool2dBackwardKernelNpu.cpp b/torch_npu/csrc/aten/ops/pooling/AvgPool2dBackwardKernelNpu.cpp index f3f6ee21006faf207e0da81faaa61d6244c71a6f..379345a070b8c159a2e25622451390200cef8120 100644 --- a/torch_npu/csrc/aten/ops/pooling/AvgPool2dBackwardKernelNpu.cpp +++ b/torch_npu/csrc/aten/ops/pooling/AvgPool2dBackwardKernelNpu.cpp @@ -14,55 +14,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "torch_npu/csrc/framework/utils/CalcuOpUtil.h" -#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "op_plugin/ops/OpInterface.h" #include "torch_npu/csrc/aten/NPUNativeFunctions.h" namespace at_npu { namespace native { - -at::Tensor& avg_pool2d_backward_out_npu_nocheck( - const at::Tensor& grad_output, - const at::Tensor& self, - at::IntArrayRef kernel_size, - at::IntArrayRef stride, - at::IntArrayRef padding, - bool ceil_mode, - bool count_include_pad, - at::Tensor& grad_input) { - int64_t strideH = 1; - int64_t strideW = 1; - if (!stride.empty()) { - strideH = stride[0]; - strideW = stride[1]; - } - c10::SmallVector kernelSize = {1, 1, kernel_size[0], kernel_size[1]}; - c10::SmallVector stridesSize = {1, 1, strideH, strideW}; - string padding_mode = "CALCULATED"; - c10::SmallVector pads = {padding[0], padding[0], padding[1], padding[1]}; - string format = "NCHW"; - bool pooling = false; - bool exclusive = (count_include_pad == false) ? true : false; - - OpPreparation::CheckMemory({grad_output, self}, {grad_input}); - OpCommand cmd; - cmd.Name("AvgPoolV2Grad") - .Input(self.sizes()) - .Input(grad_output) - .Output(grad_input) - .Attr("ksize", kernelSize) - .Attr("strides", stridesSize) - .Attr("padding_mode", padding_mode) - .Attr("pads", pads) - .Attr("data_format", format) - .Attr("global_pooling", pooling) - .Attr("ceil_mode", ceil_mode) - .Attr("exclusive", exclusive) - .Run(); - return grad_input; -} - at::Tensor& NPUNativeFunctions::avg_pool2d_backward_out( const at::Tensor& grad_output, const at::Tensor& self, @@ -73,36 +30,8 @@ at::Tensor& NPUNativeFunctions::avg_pool2d_backward_out( bool count_include_pad, c10::optional divisor_override, at::Tensor& grad_input) { - TORCH_CHECK(kernel_size.size() == 1 || kernel_size.size() == 2, - "avg_pool2d: kernel_size must either be a single int, or a tuple of two ints"); - if (kernel_size.size() == 1) { - c10::SmallVector kernel_sizes = {kernel_size[0], kernel_size[0]}; - kernel_size = at::IntArrayRef(kernel_sizes); - } - TORCH_CHECK(stride.empty() || stride.size() == 1 || stride.size() == 2, - "avg_pool2d: stride must either be omitted, a single int, or a tuple of two ints"); - stride = stride.empty() ? kernel_size : stride; - TORCH_CHECK(padding.size() == 1 || padding.size() == 2, - "avg_pool2d: padding must either be a single int, or a tuple of two ints"); - if (padding.size() == 1) { - c10::SmallVector paddings = {padding[0], padding[0]}; - padding = at::IntArrayRef(paddings); - } - const int64_t ndim = self.ndimension(); - TORCH_CHECK((ndim == 3 || ndim == 4), - "non-empty 3D or 4D (batch mode) tensor expected for input"); - TORCH_CHECK(!divisor_override.has_value() || divisor_override.value() != 0, "divisor must be not zero"); - - avg_pool2d_backward_out_npu_nocheck( - grad_output, - self, - kernel_size, - stride, - padding, - ceil_mode, - count_include_pad, - grad_input); - return grad_input; + return op_plugin::avg_pool2d_backward_out(grad_output, self, kernel_size, stride, padding, ceil_mode, + count_include_pad, divisor_override, grad_input); } at::Tensor NPUNativeFunctions::avg_pool2d_backward( @@ -114,21 +43,9 @@ at::Tensor NPUNativeFunctions::avg_pool2d_backward( bool ceil_mode, bool count_include_pad, c10::optional divisor_override) { - at::Tensor grad_input = OpPreparation::ApplyTensor(self); - - NPUNativeFunctions::avg_pool2d_backward_out( - grad_output, - self, - kernel_size, - stride, - padding, - ceil_mode, - count_include_pad, - divisor_override, - grad_input); - return grad_input; + return op_plugin::avg_pool2d_backward(grad_output, self, kernel_size, stride, padding, ceil_mode, count_include_pad, + divisor_override); } - } // namespace native } // namespace at_npu