diff --git a/test/test_network_ops/test_pad_packed_sequence.py b/test/test_network_ops/test_pad_packed_sequence.py
new file mode 100644
index 0000000000000000000000000000000000000000..ddde35b90d6c8983bbfa62ee16a514c52e8abcbf
--- /dev/null
+++ b/test/test_network_ops/test_pad_packed_sequence.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2020, Huawei Technologies.All rights reserved.
+#
+# Licensed under the BSD 3-Clause License  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+import torch_npu
+
+from torch_npu.testing.testcase import TestCase, run_tests
+
+class TestPadPackedSequence(TestCase):
+    def test_pad_packed_sequence_fp32(self, device="npu"):
+        data = torch.tensor([4, 1, 3, 5, 2, 6], dtype = torch.float32)
+        batch_sizes = torch.tensor([3, 2, 1], dtype = torch.int64)
+        cpu_out, cpu_lengths = torch._pad_packed_sequence(data, batch_sizes, False, 0, 6)
+        npu_out, npu_lengths = torch._pad_packed_sequence(data.npu(), batch_sizes, False, 0, 6)
+        self.assertRtolEqual(cpu_out, npu_out.cpu())
+        self.assertRtolEqual(cpu_lengths, npu_lengths.cpu())
+
+    def test_pad_packed_sequence_fp16(self, device="npu"):
+        data = torch.tensor([4, 1, 3, 5, 2, 6], dtype = torch.float16)
+        batch_sizes = torch.tensor([3, 2, 1], dtype = torch.int64)
+        cpu_out, cpu_lengths = torch._pad_packed_sequence(data, batch_sizes, False, 0, 6)
+        npu_out, npu_lengths = torch._pad_packed_sequence(data.npu(), batch_sizes, False, 0, 6)
+        self.assertRtolEqual(cpu_out, npu_out.cpu())
+        self.assertRtolEqual(cpu_lengths, npu_lengths.cpu())
+
+if __name__ == "__main__":
+    run_tests()
\ No newline at end of file
diff --git a/torch_npu/csrc/aten/ops/PadPackedSequenceKernelNpu.cpp b/torch_npu/csrc/aten/ops/PadPackedSequenceKernelNpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..91ff0b653ae38404eedcf7473ce9557222f75148
--- /dev/null
+++ b/torch_npu/csrc/aten/ops/PadPackedSequenceKernelNpu.cpp
@@ -0,0 +1,64 @@
+// Copyright (c) 2020 Huawei Technologies Co., Ltd
+// All rights reserved.
+//
+// Licensed under the BSD 3-Clause License  (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "torch_npu/csrc/framework/utils/OpAdapter.h"
+#include "torch_npu/csrc/aten/NPUNativeFunctions.h"
+
+namespace at_npu {
+namespace native {
+
+std::tuple<at::Tensor, at::Tensor> NPUNativeFunctions::_pad_packed_sequence(
+    const at::Tensor& data,
+    const at::Tensor& _batchSizes,
+    bool batchFirst,
+    at::Scalar paddingValue,
+    int64_t totalLength) {
+  at::Tensor output = data;
+  auto batchSizesT = _batchSizes.contiguous();
+
+  int64_t * batchSizes = batchSizesT.data_ptr<int64_t>();
+  int64_t maxBatchSize = batchSizes[0];
+  int64_t maxRealSeqLength = batchSizesT.size(0);
+  int64_t maxSeqLength = maxRealSeqLength;
+  if (totalLength > 0) {
+    TORCH_CHECK(totalLength >= maxSeqLength,
+        "Expected total_length to be at least the length of the longest "
+        "sequence in input, but got total_length=", totalLength, " and "
+        "max sequence length being ", maxSeqLength);
+    maxSeqLength = totalLength;
+  }
+
+  at::Tensor lengthsT = OpPreparation::ApplyTensorWithSizes(maxBatchSize, batchSizesT.options());
+  int64_t * lengths = lengthsT.data_ptr<int64_t>() + maxBatchSize - 1;
+  int64_t prevBatchSize = maxBatchSize;
+  for (int64_t i = 0; i <= maxRealSeqLength; ++i) {
+    int64_t batchSize = i != maxRealSeqLength ? batchSizes[i] : 0;
+    int64_t dec = prevBatchSize - batchSize;
+    if (dec > 0) {
+      for (int64_t j = 0; j < dec; ++j) {
+        *lengths = i;
+        lengths--;
+      }
+    }
+    prevBatchSize = batchSize;
+  }
+  if (batchFirst) {
+    output = NPUNativeFunctions::npu_transpose(data, {0, 1});
+  }
+  return std::tie(output, lengthsT);
+}
+
+} // namespace native
+} // namespace at_npu
\ No newline at end of file