diff --git a/test/test_network_ops/test_pad_packed_sequence.py b/test/test_network_ops/test_pad_packed_sequence.py new file mode 100644 index 0000000000000000000000000000000000000000..ddde35b90d6c8983bbfa62ee16a514c52e8abcbf --- /dev/null +++ b/test/test_network_ops/test_pad_packed_sequence.py @@ -0,0 +1,38 @@ +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch_npu + +from torch_npu.testing.testcase import TestCase, run_tests + +class TestPadPackedSequence(TestCase): + def test_pad_packed_sequence_fp32(self, device="npu"): + data = torch.tensor([4, 1, 3, 5, 2, 6], dtype = torch.float32) + batch_sizes = torch.tensor([3, 2, 1], dtype = torch.int64) + cpu_out, cpu_lengths = torch._pad_packed_sequence(data, batch_sizes, False, 0, 6) + npu_out, npu_lengths = torch._pad_packed_sequence(data.npu(), batch_sizes, False, 0, 6) + self.assertRtolEqual(cpu_out, npu_out.cpu()) + self.assertRtolEqual(cpu_lengths, npu_lengths.cpu()) + + def test_pad_packed_sequence_fp16(self, device="npu"): + data = torch.tensor([4, 1, 3, 5, 2, 6], dtype = torch.float16) + batch_sizes = torch.tensor([3, 2, 1], dtype = torch.int64) + cpu_out, cpu_lengths = torch._pad_packed_sequence(data, batch_sizes, False, 0, 6) + npu_out, npu_lengths = torch._pad_packed_sequence(data.npu(), batch_sizes, False, 0, 6) + self.assertRtolEqual(cpu_out, npu_out.cpu()) + self.assertRtolEqual(cpu_lengths, npu_lengths.cpu()) + +if __name__ == "__main__": + run_tests() \ No newline at end of file diff --git a/torch_npu/csrc/aten/ops/PadPackedSequenceKernelNpu.cpp b/torch_npu/csrc/aten/ops/PadPackedSequenceKernelNpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..91ff0b653ae38404eedcf7473ce9557222f75148 --- /dev/null +++ b/torch_npu/csrc/aten/ops/PadPackedSequenceKernelNpu.cpp @@ -0,0 +1,64 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "torch_npu/csrc/aten/NPUNativeFunctions.h" + +namespace at_npu { +namespace native { + +std::tuple NPUNativeFunctions::_pad_packed_sequence( + const at::Tensor& data, + const at::Tensor& _batchSizes, + bool batchFirst, + at::Scalar paddingValue, + int64_t totalLength) { + at::Tensor output = data; + auto batchSizesT = _batchSizes.contiguous(); + + int64_t * batchSizes = batchSizesT.data_ptr(); + int64_t maxBatchSize = batchSizes[0]; + int64_t maxRealSeqLength = batchSizesT.size(0); + int64_t maxSeqLength = maxRealSeqLength; + if (totalLength > 0) { + TORCH_CHECK(totalLength >= maxSeqLength, + "Expected total_length to be at least the length of the longest " + "sequence in input, but got total_length=", totalLength, " and " + "max sequence length being ", maxSeqLength); + maxSeqLength = totalLength; + } + + at::Tensor lengthsT = OpPreparation::ApplyTensorWithSizes(maxBatchSize, batchSizesT.options()); + int64_t * lengths = lengthsT.data_ptr() + maxBatchSize - 1; + int64_t prevBatchSize = maxBatchSize; + for (int64_t i = 0; i <= maxRealSeqLength; ++i) { + int64_t batchSize = i != maxRealSeqLength ? batchSizes[i] : 0; + int64_t dec = prevBatchSize - batchSize; + if (dec > 0) { + for (int64_t j = 0; j < dec; ++j) { + *lengths = i; + lengths--; + } + } + prevBatchSize = batchSize; + } + if (batchFirst) { + output = NPUNativeFunctions::npu_transpose(data, {0, 1}); + } + return std::tie(output, lengthsT); +} + +} // namespace native +} // namespace at_npu \ No newline at end of file