diff --git a/torch_npu/csrc/aten/ops/CopyFromAndResizeKernelNpu.cpp b/torch_npu/csrc/aten/ops/CopyFromAndResizeKernelNpu.cpp index 875966468fa133351ce0c751e24be4283c009d61..44a97b30d58f5776a0c873fd6f08019b1ab8b1dc 100644 --- a/torch_npu/csrc/aten/ops/CopyFromAndResizeKernelNpu.cpp +++ b/torch_npu/csrc/aten/ops/CopyFromAndResizeKernelNpu.cpp @@ -8,17 +8,35 @@ at::Tensor NPUNativeFunctions::_copy_from_and_resize(const at::Tensor& self, con { TORCH_CHECK(dst.defined(), "dst is undefined", OPS_ERROR(ErrCode::NOT_SUPPORT)); TORCH_CHECK(self.defined(), "self is undefined", OPS_ERROR(ErrCode::NOT_SUPPORT)); - + if (dst.numel() == 0) { dst.resize_as_(self); } - TORCH_CHECK(self.sizes() == dst.sizes(), + + bool needs_broadcasting = false; + TORCH_CHECK( + dst.dim() >= self.dim(), "Destination ", dst.sym_sizes(), " doesn't match the broadcast shape ", self.sym_sizes()); + if (dst.dim() > self.dim()) { + needs_broadcasting = true; + } else { + const c10::IntArrayRef src_sizes = self.sizes(); + const c10::IntArrayRef dst_sizes = dst.sizes(); + for (const auto j : c10::irange(self.dim())) { + if (src_sizes[j] == 1 && dst_sizes[j] != 1) { + needs_broadcasting = true; + break; + } + } + } + auto self_broadcast = needs_broadcasting ? self.expand_as(dst) : self; + + TORCH_CHECK(self_broadcast.sizes() == dst.sizes(), "_copy_from_and_resize now only support copy with same size, or dst.numel() == 0!", OPS_ERROR(ErrCode::NOT_SUPPORT)); - TORCH_CHECK(self.is_cpu() && dst.device().is_privateuseone(), + TORCH_CHECK(self_broadcast.is_cpu() && dst.device().is_privateuseone(), "_copy_from_and_resize now only support copy from cpu tensor to npu tensor, but got src tensor device is ", - self.device(), " and dst device is ", dst.device(), OPS_ERROR(ErrCode::NOT_SUPPORT)); - dst.copy_(self); + self_broadcast.device(), " and dst device is ", dst.device(), OPS_ERROR(ErrCode::NOT_SUPPORT)); + dst.copy_(self_broadcast); return dst; }