diff --git a/torch_npu/csrc/aten/ops/LeKernelNpu.cpp b/torch_npu/csrc/aten/ops/LeKernelNpu.cpp
index e3652715e118c5621c3b8b0aa50000bda1eefbb5..39de07d8efffc38cd59b18786b7d1d11626a6880 100644
--- a/torch_npu/csrc/aten/ops/LeKernelNpu.cpp
+++ b/torch_npu/csrc/aten/ops/LeKernelNpu.cpp
@@ -13,121 +13,34 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "torch_npu/csrc/framework/utils/OpAdapter.h"
+#include "op_plugin/ops/OpInterface.h"
 #include "torch_npu/csrc/aten/NPUNativeFunctions.h"
 
 namespace at_npu {
 namespace native {
 
-at::Tensor& le_out_npu_nocheck(const at::Tensor& self, at::Scalar other, at::Tensor& result) {
-  OpCommand cmd;
-  cmd.Name("LessEqual")
-      .Input(self)
-      .Input(other, self.scalar_type())
-      .Output(result)
-      .Run();
-
-  return result;
-}
-
 at::Tensor& NPUNativeFunctions::le_out(const at::Tensor& self, const at::Scalar& other, at::Tensor& result) {
-  at::Tensor formatCastOfSelf = OpPreparation::CastBackToOriFormat(self);
-  auto outputSize = formatCastOfSelf.sizes();
-  OpPreparation::CheckOut(
-      {self},
-      result,
-      ACL_FORMAT_ND,
-      result.scalar_type(),
-      outputSize);
-
-  le_out_npu_nocheck(formatCastOfSelf, other, result);
-  return result;
-}
-
-at::Tensor& le_out_npu_nocheck(const at::Tensor& self, const at::Tensor& other, at::Tensor& result) {
-  auto unified_result = OpPreparation::comparison_op_check(result, self, other, true);
-  OpCommand cmd;
-  cmd.Name("LessEqual")
-      .Expect(unified_result)
-      .Input(self)
-      .Input(other)
-      .Output(result)
-      .Run();
-
-  return result;
+  return op_plugin::le_out(self, other, result);
 }
 
 at::Tensor& NPUNativeFunctions::le_out(const at::Tensor& self, const at::Tensor& other, at::Tensor& result) {
-  at::Tensor formatCastOfSelf = OpPreparation::CastBackToOriFormat(self);
-  at::Tensor formatCastOfOther = OpPreparation::CastBackToOriFormat(other);
-  auto outputSize = broadcast_ops_npu_output_size(formatCastOfSelf, formatCastOfOther);
-
-  OpPreparation::CheckOut(
-      {self},
-      result,
-      ACL_FORMAT_ND,
-      result.scalar_type(),
-      outputSize);
-
-  le_out_npu_nocheck(formatCastOfSelf, formatCastOfOther, result);
-  return result;
+  return op_plugin::le_out(self, other, result);
 }
 
 at::Tensor NPUNativeFunctions::le(const at::Tensor& self, const at::Scalar& other) {
-  at::Tensor formatCastOfSelf = OpPreparation::CastBackToOriFormat(self);
-  at::Tensor result = OpPreparation::ApplyTensorWithFormat(
-      formatCastOfSelf.sizes(),
-      formatCastOfSelf.options().dtype(at::kBool),
-      ACL_FORMAT_ND);
-  le_out_npu_nocheck(formatCastOfSelf, other, result);
-  return result;
+  return op_plugin::le(self, other);
 }
 
 at::Tensor NPUNativeFunctions::le(const at::Tensor& self, const at::Tensor& other) {
-  at::Tensor formatCastOfSelf = OpPreparation::CastBackToOriFormat(self);
-  at::Tensor formatCastOfOther = OpPreparation::CastBackToOriFormat(other);
-
-  auto outputSize = broadcast_ops_npu_output_size(formatCastOfSelf, formatCastOfOther);
-  at::Tensor result = OpPreparation::ApplyTensorWithFormat(
-      outputSize,
-      formatCastOfSelf.options().dtype(at::kBool),
-      ACL_FORMAT_ND);
-
-  le_out_npu_nocheck(formatCastOfSelf, formatCastOfOther, result);
-  return result;
+  return op_plugin::le(self, other);
 }
 
 at::Tensor& NPUNativeFunctions::le_(at::Tensor& self, const at::Scalar& other) {
-  OpPreparation::CastBackToOriFormat(self);
-  OpPreparation::CheckMemory({self}, {self}); 
-  at::Tensor result = OpPreparation::ApplyTensor(
-      self,
-      self.options().dtype(at::ScalarType::Byte));
-  if (!NpuUtils::check_match(&self)) {
-    at::Tensor contiguousSelf = NpuUtils::format_contiguous(self);
-    le_out_npu_nocheck(contiguousSelf, other, result);
-  } else {
-    le_out_npu_nocheck(self, other, result);
-  }
-  self.copy_(result);
-  return self;
+  return op_plugin::le_(self, other);
 }
 
 at::Tensor& NPUNativeFunctions::le_(at::Tensor& self, const at::Tensor& other) {
-  OpPreparation::CastBackToOriFormat(self);
-  at::Tensor ori_other = OpPreparation::CastBackToOriFormat(other);
-  OpPreparation::CheckMemory({self, ori_other}, {self}); 
-  at::Tensor result = OpPreparation::ApplyTensor(
-      self,
-      self.options().dtype(at::ScalarType::Byte));
-  if (!NpuUtils::check_match(&self)) {
-    at::Tensor contiguousSelf = NpuUtils::format_contiguous(self);
-    le_out_npu_nocheck(contiguousSelf, ori_other, result);
-  } else {
-    le_out_npu_nocheck(self, ori_other, result);
-  }
-  self.copy_(result);
-  return self;
+  return op_plugin::le_(self, other);
 }
 
 } // namespace native
diff --git a/torch_npu/csrc/aten/ops/NmsRotatedKernelNpu.cpp b/torch_npu/csrc/aten/ops/NmsRotatedKernelNpu.cpp
index 31a8b2c38e18fbfc3e346609a1c07382fd46cb7b..0078b6fd5cb2783ee93a0ea85ba67c2341066b22 100644
--- a/torch_npu/csrc/aten/ops/NmsRotatedKernelNpu.cpp
+++ b/torch_npu/csrc/aten/ops/NmsRotatedKernelNpu.cpp
@@ -14,45 +14,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "torch_npu/csrc/framework/utils/OpAdapter.h"
+#include "op_plugin/ops/OpInterface.h"
 #include "torch_npu/csrc/aten/NPUNativeFunctions.h"
 
 namespace at_npu {
 namespace native {
-tuple<at::Tensor, at::Tensor> NPUNativeFunctions::npu_nms_rotated(const at::Tensor& dets, const at::Tensor& scores,
+std::tuple<at::Tensor, at::Tensor> NPUNativeFunctions::npu_nms_rotated(const at::Tensor& dets, const at::Tensor& scores,
                                                                   double iouThreshold, double scoreThreshold,
                                                                   int64_t maxOutputSize, int64_t mode) {
-  // the Op only support fp32 currently!
-  auto originDtype = dets.scalar_type();
-  at::Tensor detsCast = dets;
-  at::Tensor scoresCast = scores;
-  at::Tensor labels = at::zeros({}, scores.options().dtype(at::kInt));
-  if (originDtype != at::ScalarType::Float) {
-    detsCast = NPUNativeFunctions::npu_dtype_cast(dets, at::kFloat);
-    scoresCast = NPUNativeFunctions::npu_dtype_cast(scores, at::kFloat);
-  }
-  c10::SmallVector<int64_t, SIZE> selectedIndexSize = {dets.size(0)};
-  at::Tensor selectedBox = OpPreparation::ApplyTensor(detsCast);
-  at::Tensor selectedIndex = OpPreparation::ApplyTensor(selectedIndexSize, dets.options().dtype(at::kInt), dets);
-
-  c10::SmallVector<int64_t, N> output_sync_idx = {0, 1};
-  OpCommand cmd;
-  cmd.Sync(output_sync_idx)
-      .Name("RotatedNMS")
-      .Input(detsCast)
-      .Input(scoresCast)
-      .Input(labels)
-      .Output(selectedBox)
-      .Output(selectedIndex)
-      .Attr("iou_threshold", (float)iouThreshold)
-      .Attr("score_threshold", (float)scoreThreshold)
-      .Attr("max_output_size", maxOutputSize)
-      .Attr("mode", mode)
-      .Run();
-
-  at::Tensor selectedNum =
-      OpPreparation::ApplyTensor({1}, scores.options().dtype(at::kInt), scores).fill_(selectedIndex.size(0));
-  return std::tie(selectedIndex, selectedNum);
+  return op_plugin::npu_nms_rotated(dets, scores, iouThreshold, scoreThreshold, maxOutputSize, mode);
 }
 
 }  // namespace native
diff --git a/torch_npu/csrc/aten/ops/NmsV4KernelNpu.cpp b/torch_npu/csrc/aten/ops/NmsV4KernelNpu.cpp
index 021004234ac392967e998240fe2a0e5916628474..fcdb65ec0dfd1ea647ce35768af68d0a7f93030d 100644
--- a/torch_npu/csrc/aten/ops/NmsV4KernelNpu.cpp
+++ b/torch_npu/csrc/aten/ops/NmsV4KernelNpu.cpp
@@ -12,68 +12,19 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "torch_npu/csrc/framework/utils/OpAdapter.h"
-#include "torch_npu/csrc/framework/utils/CalcuOpUtil.h"
+#include "op_plugin/ops/OpInterface.h"
 #include "torch_npu/csrc/aten/NPUNativeFunctions.h"
 
 namespace at_npu {
 namespace native {
-
-tuple<at::Tensor, at::Tensor> nms_v4_npu_nocheck(
-    const at::Tensor& self,
-    const at::Tensor& scores,
-    at::Scalar max_output_size,
-    const at::Tensor& iou_threshold,
-    const at::Tensor& scores_threshold,
-    bool pad_to_max_output_size,
-    at::Tensor& selected_indices,
-    at::Tensor& valid_outputs) {
-  at::Tensor max_output_size_tensor = OpPreparation::ApplyTensor(
-      {}, self.options().dtype(at::kInt), self).fill_(max_output_size);
-  OpCommand cmd;
-  cmd.Name("NonMaxSuppressionV4")
-      .Input(self)
-      .Input(scores)
-      .Input(max_output_size_tensor)
-      .Input(iou_threshold)
-      .Input(scores_threshold)
-      .Output(selected_indices)
-      .Output(valid_outputs)
-      .Attr("pad_to_max_output_size", pad_to_max_output_size)
-      .Run();
-
-  return std::tuple<at::Tensor, at::Tensor>(selected_indices, valid_outputs);
-}
-
-tuple<at::Tensor, at::Tensor> NPUNativeFunctions::npu_nms_v4(
+std::tuple<at::Tensor, at::Tensor> NPUNativeFunctions::npu_nms_v4(
     const at::Tensor& self,
     const at::Tensor& scores,
     const at::Scalar& max_output_size,
     const at::Tensor& iou_threshold,
     const at::Tensor& scores_threshold,
     bool pad_to_max_output_size) {
-  auto outputSizes = nms_v4_npu_output_size(max_output_size);
-
-  at::Tensor selected_indices = OpPreparation::ApplyTensor(
-      std::get<0>(outputSizes),
-      self.options().dtype(at::kInt),
-      self);
-  at::Tensor valid_outputs = OpPreparation::ApplyTensor(
-      std::get<1>(outputSizes),
-      self.options().dtype(at::kInt),
-      self);
-
-  nms_v4_npu_nocheck(
-      self,
-      scores,
-      max_output_size,
-      iou_threshold,
-      scores_threshold,
-      pad_to_max_output_size,
-      selected_indices,
-      valid_outputs);
-
-  return std::tuple<at::Tensor, at::Tensor>(selected_indices, valid_outputs);
+  return op_plugin::npu_nms_v4(self, scores, max_output_size, iou_threshold, scores_threshold, pad_to_max_output_size);
 }
 
 } // namespace native
diff --git a/torch_npu/csrc/aten/ops/NmsWithMaskKernelNpu.cpp b/torch_npu/csrc/aten/ops/NmsWithMaskKernelNpu.cpp
index 8cb75fd1670d8dbb265513177c1b823808a363a4..900af922781b52568b836c59f8e957b4fbc81797 100644
--- a/torch_npu/csrc/aten/ops/NmsWithMaskKernelNpu.cpp
+++ b/torch_npu/csrc/aten/ops/NmsWithMaskKernelNpu.cpp
@@ -14,40 +14,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "torch_npu/csrc/framework/utils/OpAdapter.h"
-#include "torch_npu/csrc/framework/utils/CalcuOpUtil.h"
+#include "op_plugin/ops/OpInterface.h"
 #include "torch_npu/csrc/aten/NPUNativeFunctions.h"
 
 namespace at_npu {
 namespace native {
-
-tuple<at::Tensor&, at::Tensor&, at::Tensor&> nms_with_mask_npu_nocheck(
-    const at::Tensor& input,
-    at::Scalar iou_threshold,
-    at::Tensor& boxes,
-    at::Tensor& idx,
-    at::Tensor& mask) {
-  float iouThresholdValue = CalcuOpUtil::GetScalarFloatValue(iou_threshold);
-  OpCommand cmd;
-  cmd.Name("NMSWithMask")
-      .Input(input)
-      .Output(boxes)
-      .Output(idx)
-      .Output(mask)
-      .Attr("iou_threshold", iouThresholdValue)
-      .Run();
-  return std::tuple<at::Tensor&, at::Tensor&, at::Tensor&>(boxes, idx, mask);
-}
-
-tuple<at::Tensor, at::Tensor, at::Tensor> NPUNativeFunctions::npu_nms_with_mask(
+std::tuple<at::Tensor, at::Tensor, at::Tensor> NPUNativeFunctions::npu_nms_with_mask(
     const at::Tensor& input,
     const at::Scalar& iou_threshold) {
-  auto outputSizes = nms_with_mask_npu_output_size(input);
-  at::Tensor boxes = OpPreparation::ApplyTensor(input, std::get<0>(outputSizes));
-  at::Tensor idx = OpPreparation::ApplyTensor(std::get<1>(outputSizes), input.options().dtype(at::kInt), input);
-  at::Tensor mask = OpPreparation::ApplyTensor(std::get<2>(outputSizes), input.options().dtype(at::kByte), input);
-  nms_with_mask_npu_nocheck(input, iou_threshold, boxes, idx, mask);
-  return std::tuple<at::Tensor, at::Tensor, at::Tensor>(boxes, idx, mask);
+  return op_plugin::npu_nms_with_mask(input, iou_threshold);
 }
 
 } // namespace native
diff --git a/torch_npu/csrc/aten/ops/pooling/AvgPool2dBackwardKernelNpu.cpp b/torch_npu/csrc/aten/ops/pooling/AvgPool2dBackwardKernelNpu.cpp
index f3f6ee21006faf207e0da81faaa61d6244c71a6f..379345a070b8c159a2e25622451390200cef8120 100644
--- a/torch_npu/csrc/aten/ops/pooling/AvgPool2dBackwardKernelNpu.cpp
+++ b/torch_npu/csrc/aten/ops/pooling/AvgPool2dBackwardKernelNpu.cpp
@@ -14,55 +14,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "torch_npu/csrc/framework/utils/CalcuOpUtil.h"
-#include "torch_npu/csrc/framework/utils/OpAdapter.h"
+#include "op_plugin/ops/OpInterface.h"
 #include "torch_npu/csrc/aten/NPUNativeFunctions.h"
 
 namespace at_npu {
 namespace native {
 
-
-at::Tensor& avg_pool2d_backward_out_npu_nocheck(
-    const at::Tensor& grad_output,
-    const at::Tensor& self,
-    at::IntArrayRef kernel_size,
-    at::IntArrayRef stride,
-    at::IntArrayRef padding,
-    bool ceil_mode,
-    bool count_include_pad,
-    at::Tensor& grad_input) {
-  int64_t strideH = 1;
-  int64_t strideW = 1;
-  if (!stride.empty()) {
-    strideH = stride[0];
-    strideW = stride[1];
-  }
-  c10::SmallVector<int64_t, N> kernelSize = {1, 1, kernel_size[0], kernel_size[1]};
-  c10::SmallVector<int64_t, N> stridesSize = {1, 1, strideH, strideW};
-  string padding_mode = "CALCULATED";
-  c10::SmallVector<int64_t, N> pads = {padding[0], padding[0], padding[1], padding[1]};
-  string format = "NCHW";
-  bool pooling = false;
-  bool exclusive = (count_include_pad == false) ? true : false;
-
-  OpPreparation::CheckMemory({grad_output, self}, {grad_input});
-  OpCommand cmd;
-  cmd.Name("AvgPoolV2Grad")
-     .Input(self.sizes())
-     .Input(grad_output)
-     .Output(grad_input)
-     .Attr("ksize", kernelSize)
-     .Attr("strides", stridesSize)
-     .Attr("padding_mode", padding_mode)
-     .Attr("pads", pads)
-     .Attr("data_format", format)
-     .Attr("global_pooling", pooling)
-     .Attr("ceil_mode", ceil_mode)
-     .Attr("exclusive", exclusive)
-     .Run();
-  return grad_input;
-}
-
 at::Tensor& NPUNativeFunctions::avg_pool2d_backward_out(
     const at::Tensor& grad_output,
     const at::Tensor& self,
@@ -73,36 +30,8 @@ at::Tensor& NPUNativeFunctions::avg_pool2d_backward_out(
     bool count_include_pad,
     c10::optional<int64_t> divisor_override,
     at::Tensor& grad_input) {
-  TORCH_CHECK(kernel_size.size() == 1 || kernel_size.size() == 2,
-      "avg_pool2d: kernel_size must either be a single int, or a tuple of two ints");
-  if (kernel_size.size() == 1) {
-    c10::SmallVector<int64_t, SIZE> kernel_sizes = {kernel_size[0], kernel_size[0]};
-    kernel_size = at::IntArrayRef(kernel_sizes);
-  }
-  TORCH_CHECK(stride.empty() || stride.size() == 1 || stride.size() == 2,
-      "avg_pool2d: stride must either be omitted, a single int, or a tuple of two ints");
-  stride = stride.empty() ? kernel_size : stride;
-  TORCH_CHECK(padding.size() == 1 || padding.size() == 2,
-      "avg_pool2d: padding must either be a single int, or a tuple of two ints");
-  if (padding.size() == 1) {
-    c10::SmallVector<int64_t, SIZE> paddings = {padding[0], padding[0]};
-    padding = at::IntArrayRef(paddings);
-  }
-  const int64_t ndim = self.ndimension();
-  TORCH_CHECK((ndim == 3 || ndim == 4),
-      "non-empty 3D or 4D (batch mode) tensor expected for input");
-  TORCH_CHECK(!divisor_override.has_value() || divisor_override.value() != 0, "divisor must be not zero");
-
-  avg_pool2d_backward_out_npu_nocheck(
-      grad_output,
-      self,
-      kernel_size,
-      stride,
-      padding,
-      ceil_mode,
-      count_include_pad,
-      grad_input);
-  return grad_input;
+  return op_plugin::avg_pool2d_backward_out(grad_output, self, kernel_size, stride, padding, ceil_mode,
+                                            count_include_pad, divisor_override, grad_input);
 }
 
 at::Tensor NPUNativeFunctions::avg_pool2d_backward(
@@ -114,21 +43,9 @@ at::Tensor NPUNativeFunctions::avg_pool2d_backward(
     bool ceil_mode,
     bool count_include_pad,
     c10::optional<int64_t> divisor_override) {
-  at::Tensor grad_input = OpPreparation::ApplyTensor(self);
-
-  NPUNativeFunctions::avg_pool2d_backward_out(
-      grad_output,
-      self,
-      kernel_size,
-      stride,
-      padding,
-      ceil_mode,
-      count_include_pad,
-      divisor_override,
-      grad_input);
-  return grad_input;
+  return op_plugin::avg_pool2d_backward(grad_output, self, kernel_size, stride, padding, ceil_mode, count_include_pad,
+                                        divisor_override);
 }
 
-
 } // namespace native
 } // namespace at_npu