From 6488fc4f3d973b425ff1e69c21c6baa3fa0dc005 Mon Sep 17 00:00:00 2001 From: pengqi Date: Thu, 27 Mar 2025 12:15:41 +0800 Subject: [PATCH 1/9] new reshape --- .../ccsrc/pyboost/customize/reshape_simple.cc | 85 +++++++++++++++ .../ccsrc/pyboost/customize/reshape_simple.h | 36 +++++++ mindspore/ccsrc/pyboost/pyboost_utils.cc | 2 +- .../ccsrc/pynative/op_function/converter.cc | 28 +++++ .../ccsrc/pynative/op_function/converter.h | 1 + .../op_function/customize/direct_ops.cc | 1 + .../op_function/customize/direct_ops.h | 2 +- .../customize/reshape_simple_op.cc | 100 ++++++++++++++++++ mindspore/ops/view/reshape_strides_calc.cc | 6 ++ mindspore/ops/view/reshape_strides_calc.h | 2 + mindspore/ops/view/view_strides_calc.h | 2 + .../mindspore/ops/function/array_func.py | 5 + 12 files changed, 268 insertions(+), 2 deletions(-) create mode 100644 mindspore/ccsrc/pyboost/customize/reshape_simple.cc create mode 100644 mindspore/ccsrc/pyboost/customize/reshape_simple.h create mode 100644 mindspore/ccsrc/pynative/op_function/customize/reshape_simple_op.cc diff --git a/mindspore/ccsrc/pyboost/customize/reshape_simple.cc b/mindspore/ccsrc/pyboost/customize/reshape_simple.cc new file mode 100644 index 00000000000..e1a55992b47 --- /dev/null +++ b/mindspore/ccsrc/pyboost/customize/reshape_simple.cc @@ -0,0 +1,85 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mindspore/ccsrc/pyboost/customize/reshape_simple.h" +#include "mindspore/ccsrc/pyboost/customize/reshape.h" +#include +#include +#include "mindspore/ops/view/view_strides_calc.h" +#include "mindspore/ops/view/reshape_strides_calc.h" +#include "mindspore/ccsrc/pyboost/op_register.h" +#include "mindspore/ccsrc/pyboost/pyboost_utils.h" +#include "mindspore/ccsrc/pyboost/auto_generate/copy.h" +#include "mindspore/ccsrc/pyboost/auto_generate/view.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::BaseTensorPtr ReshapeSimpleCustomize(const std::shared_ptr &op, const BaseTensorPtr &input_tensor, + const std::vector &shape) { + auto old_storage_info = input_tensor->storage_info(); + // Uncontiguous + if (old_storage_info != nullptr && !old_storage_info->is_contiguous) { + auto storage_info_list = ops::ReshapeSimpleUncontiguousCalc(input_tensor, shape); + if (!storage_info_list.empty()) { + MS_LOG(DEBUG) << "View Uncontiguous Reshape Call start"; + tensor::BaseTensorPtrList outputs; + PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), input_tensor); + PyBoostUtils::CreateOutputTensor(op->device_context(), input_tensor, storage_info_list, &outputs); + op->set_outputs(outputs); + PyBoostUtils::DispatchRun(std::make_shared([op, input_tensor]() { + MS_LOG(DEBUG) << "View device task Uncontiguous Reshape start"; + auto device_context = op->device_context(); + PyBoostUtils::MallocOpInputs(device_context, input_tensor); + MS_LOG(DEBUG) << "View device task Uncontiguous Reshape end"; + })); + op->CreateOutputSimpleInfo(); + MS_LOG(DEBUG) << "View Uncontiguous Reshape Call end"; + return op->output(0); + } + } + // Contiguous + MS_LOG(DEBUG) << "View View Call start"; + auto storage_info_list = ops::ViewCalcImpl(op->primitive(), input_tensor, shape); + if (!storage_info_list.empty()) { + // Create device address for input tensors + tensor::BaseTensorPtrList outputs; + auto addr = input_tensor->device_address(); + if (addr != nullptr) { + PyBoostUtils::CreateOutputTensor(op->device_context(), input_tensor, storage_info_list, &outputs); + op->set_outputs(outputs); + } else { + PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), input_tensor); + PyBoostUtils::CreateOutputTensor(op->device_context(), input_tensor, storage_info_list, &outputs); + op->set_outputs(outputs); + // Async + PyBoostUtils::DispatchRun(std::make_shared([op, input_tensor]() { + MS_LOG(DEBUG) << "View device task View start"; + auto device_context = op->device_context(); + PyBoostUtils::MallocOpInputsForView(device_context, input_tensor); + MS_LOG(DEBUG) << "View device task View end"; + })); + } + } else { + MS_LOG_EXCEPTION << "View unsupported:" << op->primitive()->name() << " or input ERROR"; + } + op->CreateOutputSimpleInfo(); + MS_LOG(DEBUG) << "View View Call end"; + return op->output(0); +} +} // namespace pyboost +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/pyboost/customize/reshape_simple.h b/mindspore/ccsrc/pyboost/customize/reshape_simple.h new file mode 100644 index 00000000000..e8ed613ff8a --- /dev/null +++ b/mindspore/ccsrc/pyboost/customize/reshape_simple.h @@ -0,0 +1,36 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_CUSTOMIZE_RESHAPE_SIMPLE_H_ +#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_CUSTOMIZE_RESHAPE_SIMPLE_H_ + +#include +#include +#include "ir/tensor.h" +#include "ir/value.h" +#include "runtime/hardware/device_context_manager.h" +#include "mindspore/ccsrc/pyboost/op_runner.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::BaseTensorPtr PYBOOST_API ReshapeSimpleCustomize(const std::shared_ptr &op, + const BaseTensorPtr &input_tensor, + const std::vector &shape); +} // namespace pyboost +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_CUSTOMIZE_RESHAPE_SIMPLE_H_ \ No newline at end of file diff --git a/mindspore/ccsrc/pyboost/pyboost_utils.cc b/mindspore/ccsrc/pyboost/pyboost_utils.cc index dd05b76a8da..31f00b49f00 100644 --- a/mindspore/ccsrc/pyboost/pyboost_utils.cc +++ b/mindspore/ccsrc/pyboost/pyboost_utils.cc @@ -241,7 +241,7 @@ void PyBoostUtils::CreateOutputTensor(const DeviceContext *device_context, const output_tensor->set_contiguous_callback( [](const DeviceSyncPtr &device_address) -> DeviceSyncPtr { return ContiguousByDeviceAddress(device_address); }); - auto input_device_address = std::dynamic_pointer_cast(input->device_address()); + auto input_device_address = std::static_pointer_cast(input->device_address()); MS_EXCEPTION_IF_NULL(input_device_address); input_device_address->set_is_view(true); diff --git a/mindspore/ccsrc/pynative/op_function/converter.cc b/mindspore/ccsrc/pynative/op_function/converter.cc index 9b8dcb65ac2..5edbe54dbfb 100644 --- a/mindspore/ccsrc/pynative/op_function/converter.cc +++ b/mindspore/ccsrc/pynative/op_function/converter.cc @@ -163,11 +163,39 @@ void EnablePipelineForTupleTensor(const ValueTuplePtr &tuple) { } } } + +std::vector ConvertIntVector(const py::object &obj) { + if (!py::isinstance(obj)) { + return {}; + } + auto seq = py::cast(obj); + size_t size = seq.size(); + std::vector convert(size); + for (size_t i = 0; i < size; ++i) { + // bool is also an instance of py::int_ + if (py::isinstance(seq[i]) || !py::isinstance(seq[i])) { + return {}; + } + auto out = py::cast(seq[i]); + convert[i] = out; + } + return convert; +} } // namespace Converter::Converter(ops::OpDef *op_def) : op_def_(op_def), source_type_(std::vector(op_def->args_.size())) {} +std::vector Converter::ToIntVector(const py::list &python_args, size_t i) { + const py::object &obj = python_args[i]; + auto convert = ConvertIntVector(obj); + if (convert.size() != kIndex0) { + return convert; + } + MS_LOG(EXCEPTION) << "IntVector convert failed."; + return {}; +} + void Converter::Parse(const py::list &python_args) { if (op_def_->args_.size() != python_args.size()) { MS_LOG(EXCEPTION) << "For operator " << op_def_->name_ << ", it requires " << op_def_->args_.size() diff --git a/mindspore/ccsrc/pynative/op_function/converter.h b/mindspore/ccsrc/pynative/op_function/converter.h index ade92e330be..e922a6618ff 100644 --- a/mindspore/ccsrc/pynative/op_function/converter.h +++ b/mindspore/ccsrc/pynative/op_function/converter.h @@ -217,6 +217,7 @@ class PYNATIVE_EXPORT Converter { std::optional ToTensorListOptional(const py::list &python_args, size_t i); Int64ImmPtr ToInt(const py::list &python_args, size_t i); std::optional ToIntOptional(const py::list &python_args, size_t i); + std::vector ToIntVector(const py::list &python_args, size_t i); template ValueTuplePtr ToIntList(const py::list &python_args, size_t i); template diff --git a/mindspore/ccsrc/pynative/op_function/customize/direct_ops.cc b/mindspore/ccsrc/pynative/op_function/customize/direct_ops.cc index 360938c5a97..774ed89a6b0 100644 --- a/mindspore/ccsrc/pynative/op_function/customize/direct_ops.cc +++ b/mindspore/ccsrc/pynative/op_function/customize/direct_ops.cc @@ -22,5 +22,6 @@ void RegDirectOps(py::module *m) { m->def("pyboost_empty", &mindspore::pynative::Empty, "Empty"); m->def("pyboost_empty_like", &mindspore::pynative::EmptyLike, "EmptyLike"); m->def("pyboost_new_empty", &mindspore::pynative::NewEmpty, "NewEmpty"); + m->def("pyboost_reshapesimple", &mindspore::pynative::Pyboost_Reshape_Simple, "Reshape Simple"); } } // namespace mindspore::pynative diff --git a/mindspore/ccsrc/pynative/op_function/customize/direct_ops.h b/mindspore/ccsrc/pynative/op_function/customize/direct_ops.h index 72f2fa16391..10291557a0e 100644 --- a/mindspore/ccsrc/pynative/op_function/customize/direct_ops.h +++ b/mindspore/ccsrc/pynative/op_function/customize/direct_ops.h @@ -27,7 +27,6 @@ namespace py = pybind11; namespace mindspore::pynative { - PYNATIVE_EXPORT py::object Empty(const py::list &args); PYNATIVE_EXPORT py::object EmptyLike(const py::list &args); PYNATIVE_EXPORT py::object NewEmpty(const py::list &args); @@ -36,5 +35,6 @@ PYNATIVE_EXPORT py::object Pyboost_Empty_OP(const PrimitivePtr &prim, const std::vector &source_type, const ValueTuplePtr &shape, const std::optional &dtype, const std::optional &device); +PYNATIVE_EXPORT py::object Pyboost_Reshape_Simple(const py::list &args); } // namespace mindspore::pynative #endif // MINDSPORE_CCSRC_PIPELINE_PYNATIVE_OP_FUNCTION_CUSTOMIZE_DIRECT_OPS_H diff --git a/mindspore/ccsrc/pynative/op_function/customize/reshape_simple_op.cc b/mindspore/ccsrc/pynative/op_function/customize/reshape_simple_op.cc new file mode 100644 index 00000000000..c13cec6aeba --- /dev/null +++ b/mindspore/ccsrc/pynative/op_function/customize/reshape_simple_op.cc @@ -0,0 +1,100 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "include/common/pybind_api/api_register.h" +#include "pynative/grad/grad_utils.h" +#include "pynative/pynative_utils.h" +#include "pynative/op_function/converter.h" +#include "pybind_api/gil_scoped_long_running.h" +#include "pynative/predict_out_type_map.h" +#include "pynative/forward/forward_task.h" +#include "include/common/utils/tensor_utils.h" +#include "op_def/auto_generate/gen_ops_def.h" +#include "mindspore/ccsrc/pyboost/functions/auto_grad_guard.h" +#include "mindspore/ccsrc/pyboost/functions/base.h" +#include "mindspore/ccsrc/pyboost/auto_generate/reshape.h" +#include "mindspore/ccsrc/pyboost/customize/reshape_simple.h" +#include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_r.h" +#include "pynative/op_function/customize/direct_ops.h" + +namespace mindspore::pynative { +py::object Pyboost_Reshape_Simple(const py::list &args) { + runtime::ProfilerRecorder profiler(runtime::ProfilerModule::kPynative, runtime::ProfilerEvent::kRunOp, + "ReshapeSimple", false, true); + MS_LOG(DEBUG) << "Run Pyboost_Reshape_Simple start"; + static Converter converter(&ops::gReshape); + converter.Parse(args); + auto input = converter.ToTensor(args, kIndex0); + const auto &shape = converter.ToIntVector(args, kIndex1); + auto op_run_info = PyNativeAlgo::PyBoost::Init(prim::kPrimReshape); + op_run_info->signatures = ops::gReshape.signatures_; + op_run_info->source_type = converter.source_type(); + auto py_output = tensor::MakeTuple(); + auto promises = tensor::TransformPromise(py_output); + + DispatchOp( + std::make_shared( + [input, shape, promises](const FrontendOpRunInfoPtr &op_run_info) { + + MS_LOG(DEBUG) << "Run frontend task Pyboost_Reshape_Simple start"; + auto old_stream_id = kernel::pyboost::PyBoostUtils::cur_stream_id(); + kernel::pyboost::PyBoostUtils::set_cur_stream_id(op_run_info->base_op_run_info.stream_id); + + // stub tensor to tensor. + auto input_tensor = PyNativeAlgo::Common::ConvertStubNodeToTensor(input, false, op_run_info->requires_grad); + + kernel::pyboost::OpRunStatus::Get().set_run_info( + kernel::pyboost::OpStatus(op_run_info->async_status.disable_mix_precision, + op_run_info->async_status.is_jit_compiling, + op_run_info->async_status.custom_bprop_cell_count, + op_run_info->base_op_run_info.device_target)); + kernel::pyboost::RequireGradGuard require_grad_guard(op_run_info->requires_grad); + + // Create op + auto op = CREATE_PYBOOST_OP(Reshape, op_run_info->base_op_run_info.device_target); + op->set_primitive(prim::kPrimReshape); + + // Run op + auto outputs = kernel::pyboost::ReshapeSimpleCustomize(op, input_tensor, shape); + + // Set output value and grad info + auto real_out = PyNativeAlgo::AutoGradUtil::MakeOutput( + op_run_info->requires_grad, op, + op_run_info->requires_grad + ? PyNativeAlgo::Common::GetPyNativeExecutor()->grad_executor()->top_cell()->op_index() + : 0, input_tensor); + + // Do auto grad + if (op_run_info->requires_grad) { + static auto op_type = kernel::pyboost::GetOpTypeFromOpdef(ops::gReshape); + auto grad_info = std::make_shared(op_type, + prim::kPrimReshape, + std::vector{input_tensor, kNone}, + real_out); + PyNativeAlgo::AutoGradUtil::SetInferOutputToGrad(grad_info, op); + PyNativeAlgo::PyBoost::DoGrad(op, grad_info, op_run_info->async_status); + } + // Data sync in mix mode(Graph and PyNative) + PyNativeAlgo::PyBoost::DataSyncForGraph(op); + kernel::pyboost::PyBoostUtils::set_cur_stream_id(old_stream_id); + tensor::SetPromise(promises, outputs); + }, [promises]() { + tensor::SetException(promises); + }, op_run_info)); + MS_LOG(DEBUG) << "Run Pyboost_Reshape_Simple end"; + return py::reinterpret_steal(tensor::TransformOutput(py_output)); +} +} // namespace mindspore::pynative diff --git a/mindspore/ops/view/reshape_strides_calc.cc b/mindspore/ops/view/reshape_strides_calc.cc index f77e562d576..e29035972ee 100644 --- a/mindspore/ops/view/reshape_strides_calc.cc +++ b/mindspore/ops/view/reshape_strides_calc.cc @@ -162,5 +162,11 @@ TensorStorageInfoPtrList ReshapeCalc(const PrimitivePtr &prim, const std::vector return ReshapeCalcImpl(old_tensor_info, shape); } +TensorStorageInfoPtrList ReshapeSimpleUncontiguousCalc(const tensor::BaseTensorPtr &input_tensor, + const std::vector &shape) { + auto old_tensor_info = GetOldTensorInfo(input_tensor); + return ReshapeUncontiguousCalcImpl(old_tensor_info, shape); +} + REG_VIEW_STRIDES_CALC_FUN(Reshape, ReshapeCalc); } // namespace mindspore::ops diff --git a/mindspore/ops/view/reshape_strides_calc.h b/mindspore/ops/view/reshape_strides_calc.h index 0ca116409ee..9302637672f 100644 --- a/mindspore/ops/view/reshape_strides_calc.h +++ b/mindspore/ops/view/reshape_strides_calc.h @@ -27,6 +27,8 @@ OPS_API TensorStorageInfoPtrList ReshapeCalcImpl(const mindspore::ops::OldTensor const std::vector &shape); OPS_API TensorStorageInfoPtrList ReshapeUncontiguousCalcImpl(const mindspore::ops::OldTensorInfoPtr &old_tensor_info, const std::vector &shape); +OPS_API TensorStorageInfoPtrList ReshapeSimpleUncontiguousCalc(const tensor::BaseTensorPtr &input_tensor, + const std::vector &shape); } // namespace ops } // namespace mindspore diff --git a/mindspore/ops/view/view_strides_calc.h b/mindspore/ops/view/view_strides_calc.h index 742b226a20c..e13237d2dd5 100644 --- a/mindspore/ops/view/view_strides_calc.h +++ b/mindspore/ops/view/view_strides_calc.h @@ -23,6 +23,8 @@ namespace mindspore { namespace ops { OPS_API TensorStorageInfoPtrList ViewCalc(const PrimitivePtr &prim, const std::vector &inputs); +OPS_API TensorStorageInfoPtrList ViewCalcImpl(const PrimitivePtr &prim, const tensor::BaseTensorPtr &input_tensor, + const std::vector &shape); } // namespace ops } // namespace mindspore diff --git a/mindspore/python/mindspore/ops/function/array_func.py b/mindspore/python/mindspore/ops/function/array_func.py index 2ffb99cf12c..01b9f06ff43 100644 --- a/mindspore/python/mindspore/ops/function/array_func.py +++ b/mindspore/python/mindspore/ops/function/array_func.py @@ -113,6 +113,7 @@ from mindspore.ops.auto_generate.pyboost_inner_prim import _PyboostOneHotExtPrim from mindspore._c_expression import pyboost_empty from mindspore._c_expression import pyboost_empty_like from mindspore._c_expression import pyboost_new_empty +from mindspore._c_expression import pyboost_reshapesimple arg_max_with_value_ = ArgMaxWithValue() arg_min_with_value_ = ArgMinWithValue() @@ -520,6 +521,10 @@ def reverse(x, axis): return flip(x, axis) +def reshape_simple(input, shape): + return pyboost_reshapesimple([input, shape]) + + def empty(*size, dtype=None, device=None): r""" Creates a tensor with uninitialized data, whose shape, dtype and device are described by the argument `size`, -- Gitee From a83a39cbb453365a57002f5b5d881eb833945eda Mon Sep 17 00:00:00 2001 From: pengqi Date: Mon, 31 Mar 2025 12:54:21 +0800 Subject: [PATCH 2/9] viewcalcImpl opt --- .../ccsrc/pyboost/customize/reshape_simple.cc | 2 +- mindspore/ops/view/view_strides_calc.cc | 29 +++++++++++++++++++ mindspore/ops/view/view_strides_calc.h | 4 ++- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/mindspore/ccsrc/pyboost/customize/reshape_simple.cc b/mindspore/ccsrc/pyboost/customize/reshape_simple.cc index e1a55992b47..a9b3015e449 100644 --- a/mindspore/ccsrc/pyboost/customize/reshape_simple.cc +++ b/mindspore/ccsrc/pyboost/customize/reshape_simple.cc @@ -53,7 +53,7 @@ tensor::BaseTensorPtr ReshapeSimpleCustomize(const std::shared_ptr &op } // Contiguous MS_LOG(DEBUG) << "View View Call start"; - auto storage_info_list = ops::ViewCalcImpl(op->primitive(), input_tensor, shape); + auto storage_info_list = ops::ViewCalcImplWithoutStorageInfo(op->primitive(), input_tensor, shape); if (!storage_info_list.empty()) { // Create device address for input tensors tensor::BaseTensorPtrList outputs; diff --git a/mindspore/ops/view/view_strides_calc.cc b/mindspore/ops/view/view_strides_calc.cc index fa874b3e2a6..b64952a3189 100644 --- a/mindspore/ops/view/view_strides_calc.cc +++ b/mindspore/ops/view/view_strides_calc.cc @@ -53,6 +53,35 @@ ShapeVector update_shape(const ShapeVector &input_shape, ShapeVector shape) { return shape; } +TensorStorageInfoPtrList ViewCalcImplWithoutStorageInfo(const PrimitivePtr &prim, + const tensor::BaseTensorPtr &input_tensor, + const std::vector &shape) { + MS_EXCEPTION_IF_NULL(input_tensor); + auto old_strides = GetOriStrides(input_tensor->shape()); + const auto &old_shape = input_tensor->shape(); + + const auto &new_shape = update_shape(old_shape, shape); + const auto &new_strides = GetOriStrides(new_shape); + auto new_storage_info = std::make_shared(new_shape, new_strides, 0, input_tensor->shape(), + old_strides, IsContiguous(new_shape, new_strides)); + return {new_storage_info}; +} + +TensorStorageInfoPtrList ViewCalcImplWithStorageInfo(const PrimitivePtr &prim, + const tensor::BaseTensorPtr &input_tensor, + const std::vector &shape) { + MS_EXCEPTION_IF_NULL(input_tensor); + auto old_tensor_info = GetOldTensorInfo(input_tensor); + const auto &storage_info = input_tensor->storage_info(); + + const auto &new_shape = update_shape(storage_info->shape, shape); + const auto &new_strides = GetOriStrides(new_shape); + auto new_storage_info = + std::make_shared(new_shape, new_strides, storage_info->storage_offset, storage_info->ori_shape, + storage_info->ori_strides, IsContiguous(new_shape, new_strides)); + return {new_storage_info}; +} + TensorStorageInfoPtrList ViewCalcImpl(const PrimitivePtr &prim, const tensor::BaseTensorPtr &input_tensor, const std::vector &shape) { MS_EXCEPTION_IF_NULL(input_tensor); diff --git a/mindspore/ops/view/view_strides_calc.h b/mindspore/ops/view/view_strides_calc.h index e13237d2dd5..88f3a5a8033 100644 --- a/mindspore/ops/view/view_strides_calc.h +++ b/mindspore/ops/view/view_strides_calc.h @@ -25,7 +25,9 @@ namespace ops { OPS_API TensorStorageInfoPtrList ViewCalc(const PrimitivePtr &prim, const std::vector &inputs); OPS_API TensorStorageInfoPtrList ViewCalcImpl(const PrimitivePtr &prim, const tensor::BaseTensorPtr &input_tensor, const std::vector &shape); - +OPS_API TensorStorageInfoPtrList ViewCalcImplWithoutStorageInfo(const PrimitivePtr &prim, + const tensor::BaseTensorPtr &input_tensor, + const std::vector &shape); } // namespace ops } // namespace mindspore -- Gitee From fc86859b12f28a3eb9033be75611eb42b2b2ddde Mon Sep 17 00:00:00 2001 From: pengqi Date: Mon, 31 Mar 2025 15:37:05 +0800 Subject: [PATCH 3/9] remove create op for reshape --- .../ccsrc/pyboost/customize/reshape_simple.cc | 35 ++++++------- .../ccsrc/pyboost/customize/reshape_simple.h | 2 +- mindspore/ccsrc/pyboost/op_runner.h | 9 ++++ mindspore/ccsrc/pynative/grad/grad_utils.cc | 15 ++++++ mindspore/ccsrc/pynative/grad/grad_utils.h | 2 + .../customize/reshape_simple_op.cc | 49 ++++++++----------- mindspore/ccsrc/pynative/pynative_utils.cc | 32 ++++++++++++ mindspore/ccsrc/pynative/pynative_utils.h | 2 + mindspore/ops/view/view_strides_calc.cc | 6 +-- mindspore/ops/view/view_strides_calc.h | 3 +- 10 files changed, 99 insertions(+), 56 deletions(-) diff --git a/mindspore/ccsrc/pyboost/customize/reshape_simple.cc b/mindspore/ccsrc/pyboost/customize/reshape_simple.cc index a9b3015e449..b35eabc18a7 100644 --- a/mindspore/ccsrc/pyboost/customize/reshape_simple.cc +++ b/mindspore/ccsrc/pyboost/customize/reshape_simple.cc @@ -28,8 +28,8 @@ namespace mindspore { namespace kernel { namespace pyboost { -tensor::BaseTensorPtr ReshapeSimpleCustomize(const std::shared_ptr &op, const BaseTensorPtr &input_tensor, - const std::vector &shape) { +tensor::BaseTensorPtr ReshapeSimpleCustomize(const DeviceContext *device_context, const size_t &stream_id, + const BaseTensorPtr &input_tensor, const std::vector &shape) { auto old_storage_info = input_tensor->storage_info(); // Uncontiguous if (old_storage_info != nullptr && !old_storage_info->is_contiguous) { @@ -37,48 +37,41 @@ tensor::BaseTensorPtr ReshapeSimpleCustomize(const std::shared_ptr &op if (!storage_info_list.empty()) { MS_LOG(DEBUG) << "View Uncontiguous Reshape Call start"; tensor::BaseTensorPtrList outputs; - PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), input_tensor); - PyBoostUtils::CreateOutputTensor(op->device_context(), input_tensor, storage_info_list, &outputs); - op->set_outputs(outputs); - PyBoostUtils::DispatchRun(std::make_shared([op, input_tensor]() { + PyBoostUtils::PrepareOpInputs(device_context, stream_id, input_tensor); + PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list, &outputs); + PyBoostUtils::DispatchRun(std::make_shared([device_context, input_tensor]() { MS_LOG(DEBUG) << "View device task Uncontiguous Reshape start"; - auto device_context = op->device_context(); PyBoostUtils::MallocOpInputs(device_context, input_tensor); MS_LOG(DEBUG) << "View device task Uncontiguous Reshape end"; })); - op->CreateOutputSimpleInfo(); MS_LOG(DEBUG) << "View Uncontiguous Reshape Call end"; - return op->output(0); + return outputs[0]; } } // Contiguous MS_LOG(DEBUG) << "View View Call start"; - auto storage_info_list = ops::ViewCalcImplWithoutStorageInfo(op->primitive(), input_tensor, shape); + auto storage_info_list = ops::ViewCalcImplWithoutStorageInfo(input_tensor, shape); if (!storage_info_list.empty()) { // Create device address for input tensors tensor::BaseTensorPtrList outputs; auto addr = input_tensor->device_address(); if (addr != nullptr) { - PyBoostUtils::CreateOutputTensor(op->device_context(), input_tensor, storage_info_list, &outputs); - op->set_outputs(outputs); + PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list, &outputs); } else { - PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), input_tensor); - PyBoostUtils::CreateOutputTensor(op->device_context(), input_tensor, storage_info_list, &outputs); - op->set_outputs(outputs); + PyBoostUtils::PrepareOpInputs(device_context, stream_id, input_tensor); + PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list, &outputs); // Async - PyBoostUtils::DispatchRun(std::make_shared([op, input_tensor]() { + PyBoostUtils::DispatchRun(std::make_shared([device_context, input_tensor]() { MS_LOG(DEBUG) << "View device task View start"; - auto device_context = op->device_context(); PyBoostUtils::MallocOpInputsForView(device_context, input_tensor); MS_LOG(DEBUG) << "View device task View end"; })); } + MS_LOG(DEBUG) << "View View Call end"; + return outputs[0]; } else { - MS_LOG_EXCEPTION << "View unsupported:" << op->primitive()->name() << " or input ERROR"; + MS_LOG_EXCEPTION << "View unsupported: Reshape_Simple or input ERROR"; } - op->CreateOutputSimpleInfo(); - MS_LOG(DEBUG) << "View View Call end"; - return op->output(0); } } // namespace pyboost } // namespace kernel diff --git a/mindspore/ccsrc/pyboost/customize/reshape_simple.h b/mindspore/ccsrc/pyboost/customize/reshape_simple.h index e8ed613ff8a..d426a806b77 100644 --- a/mindspore/ccsrc/pyboost/customize/reshape_simple.h +++ b/mindspore/ccsrc/pyboost/customize/reshape_simple.h @@ -27,7 +27,7 @@ namespace mindspore { namespace kernel { namespace pyboost { -tensor::BaseTensorPtr PYBOOST_API ReshapeSimpleCustomize(const std::shared_ptr &op, +tensor::BaseTensorPtr PYBOOST_API ReshapeSimpleCustomize(const DeviceContext *device_context, const size_t &stream_id, const BaseTensorPtr &input_tensor, const std::vector &shape); } // namespace pyboost diff --git a/mindspore/ccsrc/pyboost/op_runner.h b/mindspore/ccsrc/pyboost/op_runner.h index 652d390f868..68ae6f90624 100644 --- a/mindspore/ccsrc/pyboost/op_runner.h +++ b/mindspore/ccsrc/pyboost/op_runner.h @@ -281,6 +281,15 @@ class PYBOOST_API OpRunner : public std::enable_shared_from_this { } }; using OpPtr = std::shared_ptr; +inline ValueSimpleInfoPtr CreateOutputSimpleInfoNew(const tensor::BaseTensorPtr &output) { + ValueSimpleInfoPtr output_value_simple_info; + output_value_simple_info = std::make_shared(); + output_value_simple_info->is_tuple_output_ = false; + output_value_simple_info->size_ = kIndex1; + output_value_simple_info->shape_vector_.emplace_back(output->shape()); + output_value_simple_info->dtype_vector_.emplace_back(output->Dtype()); + return output_value_simple_info; +} } // namespace pyboost } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/pynative/grad/grad_utils.cc b/mindspore/ccsrc/pynative/grad/grad_utils.cc index 32c50720847..43e0b83b0b9 100644 --- a/mindspore/ccsrc/pynative/grad/grad_utils.cc +++ b/mindspore/ccsrc/pynative/grad/grad_utils.cc @@ -359,6 +359,21 @@ ValuePtr AutoGradUtil::MakeOutput(bool requires_grad, const kernel::pyboost::OpP return op->outputs()[0]; } +ValuePtr AutoGradUtil::MakeOutputNew(bool requires_grad, const tensor::BaseTensorPtr &output, size_t op_index, + const tensor::BaseTensorPtr &base_view) { + // delete NoneTypeNode check. + if (base_view != nullptr && output->storage_info() != nullptr) { + autograd::CreationType creationType = + requires_grad ? autograd::CreationType::kDefault : autograd::CreationType::kNoGradMode; + BuildViewAutoGradMeta(base_view, output, op_index, creationType); + } else if (requires_grad) { + if (output->auto_grad_meta_data() == nullptr) { + output->set_auto_grad_meta_data(std::make_shared(op_index, InputType::kOpOutput)); + } + } + return output; +} + ValuePtr AutoGradUtil::MakeMultiOutput(bool requires_grad, const kernel::pyboost::OpPtr &op, size_t op_index, const tensor::BaseTensorPtr &base_view) { size_t size = op->outputs().size(); diff --git a/mindspore/ccsrc/pynative/grad/grad_utils.h b/mindspore/ccsrc/pynative/grad/grad_utils.h index 42c6fcb0695..35323bf53c5 100644 --- a/mindspore/ccsrc/pynative/grad/grad_utils.h +++ b/mindspore/ccsrc/pynative/grad/grad_utils.h @@ -66,6 +66,8 @@ struct AutoGradUtil { static void SetInferMultiOutputToGrad(const OpGradInfoPtr &op_grad_info, const kernel::pyboost::OpPtr &op); static ValuePtr MakeOutput(bool requires_grad, const kernel::pyboost::OpPtr &op, size_t op_index, const tensor::BaseTensorPtr &base_view = nullptr); + static ValuePtr MakeOutputNew(bool requires_grad, const tensor::BaseTensorPtr &output, size_t op_index, + const tensor::BaseTensorPtr &base_view); static ValuePtr MakeMultiOutput(bool requires_grad, const kernel::pyboost::OpPtr &op, size_t op_index, const tensor::BaseTensorPtr &view_base = nullptr); // Multi inputs and multi outputs view op enter here, temp code need discard. diff --git a/mindspore/ccsrc/pynative/op_function/customize/reshape_simple_op.cc b/mindspore/ccsrc/pynative/op_function/customize/reshape_simple_op.cc index c13cec6aeba..14dbb607f4d 100644 --- a/mindspore/ccsrc/pynative/op_function/customize/reshape_simple_op.cc +++ b/mindspore/ccsrc/pynative/op_function/customize/reshape_simple_op.cc @@ -45,10 +45,8 @@ py::object Pyboost_Reshape_Simple(const py::list &args) { auto py_output = tensor::MakeTuple(); auto promises = tensor::TransformPromise(py_output); - DispatchOp( - std::make_shared( - [input, shape, promises](const FrontendOpRunInfoPtr &op_run_info) { - + DispatchOp(std::make_shared( + [input, shape, promises](const FrontendOpRunInfoPtr &op_run_info) { MS_LOG(DEBUG) << "Run frontend task Pyboost_Reshape_Simple start"; auto old_stream_id = kernel::pyboost::PyBoostUtils::cur_stream_id(); kernel::pyboost::PyBoostUtils::set_cur_stream_id(op_run_info->base_op_run_info.stream_id); @@ -56,44 +54,39 @@ py::object Pyboost_Reshape_Simple(const py::list &args) { // stub tensor to tensor. auto input_tensor = PyNativeAlgo::Common::ConvertStubNodeToTensor(input, false, op_run_info->requires_grad); - kernel::pyboost::OpRunStatus::Get().set_run_info( - kernel::pyboost::OpStatus(op_run_info->async_status.disable_mix_precision, - op_run_info->async_status.is_jit_compiling, - op_run_info->async_status.custom_bprop_cell_count, - op_run_info->base_op_run_info.device_target)); + kernel::pyboost::OpRunStatus::Get().set_run_info(kernel::pyboost::OpStatus( + op_run_info->async_status.disable_mix_precision, op_run_info->async_status.is_jit_compiling, + op_run_info->async_status.custom_bprop_cell_count, op_run_info->base_op_run_info.device_target)); kernel::pyboost::RequireGradGuard require_grad_guard(op_run_info->requires_grad); - // Create op - auto op = CREATE_PYBOOST_OP(Reshape, op_run_info->base_op_run_info.device_target); - op->set_primitive(prim::kPrimReshape); - // Run op - auto outputs = kernel::pyboost::ReshapeSimpleCustomize(op, input_tensor, shape); + auto output = kernel::pyboost::ReshapeSimpleCustomize( + runtime::OpRunner::GetDeviceContext(op_run_info->base_op_run_info.device_target), + op_run_info->base_op_run_info.stream_id, input_tensor, shape); // Set output value and grad info - auto real_out = PyNativeAlgo::AutoGradUtil::MakeOutput( - op_run_info->requires_grad, op, + auto real_out = PyNativeAlgo::AutoGradUtil::MakeOutputNew( + op_run_info->requires_grad, output, op_run_info->requires_grad ? PyNativeAlgo::Common::GetPyNativeExecutor()->grad_executor()->top_cell()->op_index() - : 0, input_tensor); + : 0, + input_tensor); // Do auto grad if (op_run_info->requires_grad) { static auto op_type = kernel::pyboost::GetOpTypeFromOpdef(ops::gReshape); - auto grad_info = std::make_shared(op_type, - prim::kPrimReshape, - std::vector{input_tensor, kNone}, - real_out); - PyNativeAlgo::AutoGradUtil::SetInferOutputToGrad(grad_info, op); - PyNativeAlgo::PyBoost::DoGrad(op, grad_info, op_run_info->async_status); + auto grad_info = std::make_shared(op_type, prim::kPrimReshape, + std::vector{input_tensor, kNone}, real_out); + grad_info->output_value_simple_info = mindspore::kernel::pyboost::CreateOutputSimpleInfoNew(output); + grad_info->output_value_simple_info->is_tuple_output_ = false; + PyNativeAlgo::PyBoost::DoGradNew(grad_info, op_run_info->async_status); } // Data sync in mix mode(Graph and PyNative) - PyNativeAlgo::PyBoost::DataSyncForGraph(op); + PyNativeAlgo::PyBoost::DataSyncForGraphNew(output); kernel::pyboost::PyBoostUtils::set_cur_stream_id(old_stream_id); - tensor::SetPromise(promises, outputs); - }, [promises]() { - tensor::SetException(promises); - }, op_run_info)); + tensor::SetPromise(promises, output); + }, + [promises]() { tensor::SetException(promises); }, op_run_info)); MS_LOG(DEBUG) << "Run Pyboost_Reshape_Simple end"; return py::reinterpret_steal(tensor::TransformOutput(py_output)); } diff --git a/mindspore/ccsrc/pynative/pynative_utils.cc b/mindspore/ccsrc/pynative/pynative_utils.cc index c6ded4adde7..78e68b43c5b 100644 --- a/mindspore/ccsrc/pynative/pynative_utils.cc +++ b/mindspore/ccsrc/pynative/pynative_utils.cc @@ -1364,6 +1364,22 @@ void PyBoost::DataSyncForGraph(const kernel::pyboost::OpPtr &op) { } } +void PyBoost::DataSyncForGraphNew(const tensor::BaseTensorPtr &output) { + auto ms_context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(ms_context); + if (ms_context->get_param(MS_CTX_EXECUTION_MODE) != kPynativeMode && + !runtime::OpExecutor::GetInstance().async_for_graph()) { + // If execution mode is Graph Mode in MsContext, the tensor will be the input of graph which will execute in Graph + // Mode, if the graph contain no CNode after optimization, the tensor need sync to host. + auto device_address = std::static_pointer_cast(output->device_address()); + if (device_address == nullptr) { + return; + } + runtime::DeviceAddressUtils::CreateKernelTensor(device_address, output.get()); + output->data_sync(true); + } +} + PrimitivePtr PyBoost::ConvertPrimitive(const py::object &obj) { const auto &adapter = obj.cast(); MS_EXCEPTION_IF_NULL(adapter); @@ -1434,6 +1450,22 @@ void PyBoost::DoGrad(const kernel::pyboost::OpPtr &op, const OpGradInfoPtr &grad forward->ForwardOpGradImpl(grad_info, async_status); } +void PyBoost::DoGradNew(const OpGradInfoPtr &grad_info, const AsyncStatus &async_status) { + static const std::string kDoGradName = "DoGrad"; + runtime::ProfilerRecorder profiler(runtime::ProfilerModule::kPynative, runtime::ProfilerEvent::kPyNativeFrontendTask, + kDoGradName, false); + + const auto &pynative_executor = Common::GetPyNativeExecutor(); + const auto &forward = pynative_executor->forward_executor(); + // Inplace op need save clone tensor. + grad_info->clone_value = nullptr; + // Check and set input auto grad meta info and InputType + if (MS_LIKELY(!forward->grad()->top_cell()->is_bprop_need_get_forward_graph())) { + MarkPyBoostInputs(grad_info, forward->grad()->top_cell()); + } + forward->ForwardOpGradImpl(grad_info, async_status); +} + void PyBoost::MarkPyBoostInputs(const OpGradInfoPtr &op_grad_info, const TopCellInfoPtr &top_cell) { MS_EXCEPTION_IF_NULL(op_grad_info); size_t input_size = op_grad_info->input_value.size(); diff --git a/mindspore/ccsrc/pynative/pynative_utils.h b/mindspore/ccsrc/pynative/pynative_utils.h index 1b7eaa1b186..34bd9b5460b 100644 --- a/mindspore/ccsrc/pynative/pynative_utils.h +++ b/mindspore/ccsrc/pynative/pynative_utils.h @@ -167,6 +167,7 @@ struct DataConvert { struct PyBoost { static FrontendOpRunInfoPtr Init(const PrimitivePtr &prim); static void DoGrad(const kernel::pyboost::OpPtr &op, const OpGradInfoPtr &grad_info, const AsyncStatus &async_status); + static void DoGradNew(const OpGradInfoPtr &grad_info, const AsyncStatus &async_status); static void SetAnyValueForAbstract(const kernel::pyboost::OpPtr &op); static void UpdateStubOutput(const kernel::pyboost::OpPtr &op, const stub::StubNodePtr &stub_output, const AbstractBasePtr &abstract, const ValuePtr &real_out); @@ -208,6 +209,7 @@ struct PyBoost { return ret; } static void DataSyncForGraph(const kernel::pyboost::OpPtr &op); + static void DataSyncForGraphNew(const tensor::BaseTensorPtr &output); static void MarkPyBoostInputs(const OpGradInfoPtr &op_grad_info, const TopCellInfoPtr &top_cell); static void BumpVersionAsync(const tensor::BaseTensorPtr &tensor); static ValuePtr OutputToValue(const BaseTensorPtr &output) { return output; } diff --git a/mindspore/ops/view/view_strides_calc.cc b/mindspore/ops/view/view_strides_calc.cc index b64952a3189..deef4009058 100644 --- a/mindspore/ops/view/view_strides_calc.cc +++ b/mindspore/ops/view/view_strides_calc.cc @@ -53,8 +53,7 @@ ShapeVector update_shape(const ShapeVector &input_shape, ShapeVector shape) { return shape; } -TensorStorageInfoPtrList ViewCalcImplWithoutStorageInfo(const PrimitivePtr &prim, - const tensor::BaseTensorPtr &input_tensor, +TensorStorageInfoPtrList ViewCalcImplWithoutStorageInfo(const tensor::BaseTensorPtr &input_tensor, const std::vector &shape) { MS_EXCEPTION_IF_NULL(input_tensor); auto old_strides = GetOriStrides(input_tensor->shape()); @@ -67,8 +66,7 @@ TensorStorageInfoPtrList ViewCalcImplWithoutStorageInfo(const PrimitivePtr &prim return {new_storage_info}; } -TensorStorageInfoPtrList ViewCalcImplWithStorageInfo(const PrimitivePtr &prim, - const tensor::BaseTensorPtr &input_tensor, +TensorStorageInfoPtrList ViewCalcImplWithStorageInfo(const tensor::BaseTensorPtr &input_tensor, const std::vector &shape) { MS_EXCEPTION_IF_NULL(input_tensor); auto old_tensor_info = GetOldTensorInfo(input_tensor); diff --git a/mindspore/ops/view/view_strides_calc.h b/mindspore/ops/view/view_strides_calc.h index 88f3a5a8033..c30324ddcd4 100644 --- a/mindspore/ops/view/view_strides_calc.h +++ b/mindspore/ops/view/view_strides_calc.h @@ -25,8 +25,7 @@ namespace ops { OPS_API TensorStorageInfoPtrList ViewCalc(const PrimitivePtr &prim, const std::vector &inputs); OPS_API TensorStorageInfoPtrList ViewCalcImpl(const PrimitivePtr &prim, const tensor::BaseTensorPtr &input_tensor, const std::vector &shape); -OPS_API TensorStorageInfoPtrList ViewCalcImplWithoutStorageInfo(const PrimitivePtr &prim, - const tensor::BaseTensorPtr &input_tensor, +OPS_API TensorStorageInfoPtrList ViewCalcImplWithoutStorageInfo(const tensor::BaseTensorPtr &input_tensor, const std::vector &shape); } // namespace ops } // namespace mindspore -- Gitee From 13ce770ccf7edd9421e801f5de73562e319b2c84 Mon Sep 17 00:00:00 2001 From: pengqi Date: Mon, 31 Mar 2025 17:22:26 +0800 Subject: [PATCH 4/9] view simple --- .../ccsrc/pyboost/customize/view_simple.cc | 58 ++++++++++++ .../ccsrc/pyboost/customize/view_simple.h | 36 +++++++ .../op_function/customize/direct_ops.cc | 1 + .../op_function/customize/direct_ops.h | 1 + .../op_function/customize/view_simple_op.cc | 93 +++++++++++++++++++ mindspore/ops/view/view_strides_calc.cc | 20 ++++ mindspore/ops/view/view_strides_calc.h | 2 + .../mindspore/ops/function/array_func.py | 5 + 8 files changed, 216 insertions(+) create mode 100644 mindspore/ccsrc/pyboost/customize/view_simple.cc create mode 100644 mindspore/ccsrc/pyboost/customize/view_simple.h create mode 100644 mindspore/ccsrc/pynative/op_function/customize/view_simple_op.cc diff --git a/mindspore/ccsrc/pyboost/customize/view_simple.cc b/mindspore/ccsrc/pyboost/customize/view_simple.cc new file mode 100644 index 00000000000..4dfa8efb14d --- /dev/null +++ b/mindspore/ccsrc/pyboost/customize/view_simple.cc @@ -0,0 +1,58 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mindspore/ccsrc/pyboost/customize/view_simple.h" +#include +#include +#include "mindspore/ops/view/view_strides_calc.h" +#include "mindspore/ops/view/view_strides_calc.h" +#include "mindspore/ccsrc/pyboost/op_register.h" +#include "mindspore/ccsrc/pyboost/pyboost_utils.h" +#include "mindspore/ccsrc/pyboost/auto_generate/copy.h" +#include "mindspore/ccsrc/pyboost/auto_generate/view.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::BaseTensorPtr ViewSimpleCustomize(const DeviceContext *device_context, const size_t &stream_id, + const BaseTensorPtr &input_tensor, const std::vector &shape) { + MS_LOG(DEBUG) << "View View Call start"; + auto storage_info_list = ops::ViewCalcImplNew(input_tensor, shape); + if (!storage_info_list.empty()) { + // Create device address for input tensors + tensor::BaseTensorPtrList outputs; + auto addr = input_tensor->device_address(); + if (addr != nullptr) { + PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list, &outputs); + } else { + PyBoostUtils::PrepareOpInputs(device_context, stream_id, input_tensor); + PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list, &outputs); + // Async + PyBoostUtils::DispatchRun(std::make_shared([device_context, input_tensor]() { + MS_LOG(DEBUG) << "View device task View start"; + PyBoostUtils::MallocOpInputsForView(device_context, input_tensor); + MS_LOG(DEBUG) << "View device task View end"; + })); + } + MS_LOG(DEBUG) << "View View Call end"; + return outputs[0]; + } else { + MS_LOG_EXCEPTION << "View unsupported: View_Simple or input ERROR"; + } +} +} // namespace pyboost +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/pyboost/customize/view_simple.h b/mindspore/ccsrc/pyboost/customize/view_simple.h new file mode 100644 index 00000000000..866436f2cb2 --- /dev/null +++ b/mindspore/ccsrc/pyboost/customize/view_simple.h @@ -0,0 +1,36 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_CUSTOMIZE_VIEW_SIMPLE_H_ +#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_CUSTOMIZE_VIEW_SIMPLE_H_ + +#include +#include +#include "ir/tensor.h" +#include "ir/value.h" +#include "runtime/hardware/device_context_manager.h" +#include "mindspore/ccsrc/pyboost/op_runner.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::BaseTensorPtr PYBOOST_API ViewSimpleCustomize(const DeviceContext *device_context, const size_t &stream_id, + const BaseTensorPtr &input_tensor, + const std::vector &shape); +} // namespace pyboost +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_CUSTOMIZE_VIEW_SIMPLE_H_ \ No newline at end of file diff --git a/mindspore/ccsrc/pynative/op_function/customize/direct_ops.cc b/mindspore/ccsrc/pynative/op_function/customize/direct_ops.cc index 774ed89a6b0..36f4bc11dd9 100644 --- a/mindspore/ccsrc/pynative/op_function/customize/direct_ops.cc +++ b/mindspore/ccsrc/pynative/op_function/customize/direct_ops.cc @@ -23,5 +23,6 @@ void RegDirectOps(py::module *m) { m->def("pyboost_empty_like", &mindspore::pynative::EmptyLike, "EmptyLike"); m->def("pyboost_new_empty", &mindspore::pynative::NewEmpty, "NewEmpty"); m->def("pyboost_reshapesimple", &mindspore::pynative::Pyboost_Reshape_Simple, "Reshape Simple"); + m->def("pyboost_view_simple", &mindspore::pynative::Pyboost_View_Simple, "View Simple"); } } // namespace mindspore::pynative diff --git a/mindspore/ccsrc/pynative/op_function/customize/direct_ops.h b/mindspore/ccsrc/pynative/op_function/customize/direct_ops.h index 10291557a0e..1e253b55387 100644 --- a/mindspore/ccsrc/pynative/op_function/customize/direct_ops.h +++ b/mindspore/ccsrc/pynative/op_function/customize/direct_ops.h @@ -36,5 +36,6 @@ PYNATIVE_EXPORT py::object Pyboost_Empty_OP(const PrimitivePtr &prim, const ValueTuplePtr &shape, const std::optional &dtype, const std::optional &device); PYNATIVE_EXPORT py::object Pyboost_Reshape_Simple(const py::list &args); +PYNATIVE_EXPORT py::object Pyboost_View_Simple(const py::list &args); } // namespace mindspore::pynative #endif // MINDSPORE_CCSRC_PIPELINE_PYNATIVE_OP_FUNCTION_CUSTOMIZE_DIRECT_OPS_H diff --git a/mindspore/ccsrc/pynative/op_function/customize/view_simple_op.cc b/mindspore/ccsrc/pynative/op_function/customize/view_simple_op.cc new file mode 100644 index 00000000000..1925a70a4a9 --- /dev/null +++ b/mindspore/ccsrc/pynative/op_function/customize/view_simple_op.cc @@ -0,0 +1,93 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "include/common/pybind_api/api_register.h" +#include "pynative/grad/grad_utils.h" +#include "pynative/pynative_utils.h" +#include "pynative/op_function/converter.h" +#include "pybind_api/gil_scoped_long_running.h" +#include "pynative/predict_out_type_map.h" +#include "pynative/forward/forward_task.h" +#include "include/common/utils/tensor_utils.h" +#include "op_def/auto_generate/gen_ops_def.h" +#include "mindspore/ccsrc/pyboost/functions/auto_grad_guard.h" +#include "mindspore/ccsrc/pyboost/functions/base.h" +#include "mindspore/ccsrc/pyboost/auto_generate/view.h" +#include "mindspore/ccsrc/pyboost/customize/view_simple.h" +#include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_v.h" +#include "pynative/op_function/customize/direct_ops.h" + +namespace mindspore::pynative { +py::object Pyboost_View_Simple(const py::list &args) { + runtime::ProfilerRecorder profiler(runtime::ProfilerModule::kPynative, runtime::ProfilerEvent::kRunOp, "ViewSimple", + false, true); + MS_LOG(DEBUG) << "Run Pyboost_View_Simple start"; + static Converter converter(&ops::gView); + converter.Parse(args); + auto input = converter.ToTensor(args, kIndex0); + const auto &shape = converter.ToIntVector(args, kIndex1); + auto op_run_info = PyNativeAlgo::PyBoost::Init(prim::kPrimView); + op_run_info->signatures = ops::gView.signatures_; + op_run_info->source_type = converter.source_type(); + auto py_output = tensor::MakeTuple(); + auto promises = tensor::TransformPromise(py_output); + + DispatchOp(std::make_shared( + [input, shape, promises](const FrontendOpRunInfoPtr &op_run_info) { + MS_LOG(DEBUG) << "Run frontend task Pyboost_View_Simple start"; + auto old_stream_id = kernel::pyboost::PyBoostUtils::cur_stream_id(); + kernel::pyboost::PyBoostUtils::set_cur_stream_id(op_run_info->base_op_run_info.stream_id); + + // stub tensor to tensor. + auto input_tensor = PyNativeAlgo::Common::ConvertStubNodeToTensor(input, false, op_run_info->requires_grad); + + kernel::pyboost::OpRunStatus::Get().set_run_info(kernel::pyboost::OpStatus( + op_run_info->async_status.disable_mix_precision, op_run_info->async_status.is_jit_compiling, + op_run_info->async_status.custom_bprop_cell_count, op_run_info->base_op_run_info.device_target)); + kernel::pyboost::RequireGradGuard require_grad_guard(op_run_info->requires_grad); + + // Run op + auto output = kernel::pyboost::ViewSimpleCustomize( + runtime::OpRunner::GetDeviceContext(op_run_info->base_op_run_info.device_target), + op_run_info->base_op_run_info.stream_id, input_tensor, shape); + + // Set output value and grad info + auto real_out = PyNativeAlgo::AutoGradUtil::MakeOutputNew( + op_run_info->requires_grad, output, + op_run_info->requires_grad + ? PyNativeAlgo::Common::GetPyNativeExecutor()->grad_executor()->top_cell()->op_index() + : 0, + input_tensor); + + // Do auto grad + if (op_run_info->requires_grad) { + static auto op_type = kernel::pyboost::GetOpTypeFromOpdef(ops::gView); + auto grad_info = + std::make_shared(op_type, prim::kPrimView, std::vector{input_tensor, kNone}, real_out); + grad_info->output_value_simple_info = mindspore::kernel::pyboost::CreateOutputSimpleInfoNew(output); + grad_info->output_value_simple_info->is_tuple_output_ = false; + PyNativeAlgo::PyBoost::DoGradNew(grad_info, op_run_info->async_status); + } + // Data sync in mix mode(Graph and PyNative) + PyNativeAlgo::PyBoost::DataSyncForGraphNew(output); + kernel::pyboost::PyBoostUtils::set_cur_stream_id(old_stream_id); + tensor::SetPromise(promises, output); + }, + [promises]() { tensor::SetException(promises); }, op_run_info)); + MS_LOG(DEBUG) << "Run Pyboost_View_Simple end"; + return py::reinterpret_steal(tensor::TransformOutput(py_output)); +} +} // namespace mindspore::pynative diff --git a/mindspore/ops/view/view_strides_calc.cc b/mindspore/ops/view/view_strides_calc.cc index deef4009058..3817b3a1333 100644 --- a/mindspore/ops/view/view_strides_calc.cc +++ b/mindspore/ops/view/view_strides_calc.cc @@ -95,6 +95,26 @@ TensorStorageInfoPtrList ViewCalcImpl(const PrimitivePtr &prim, const tensor::Ba return {new_storage_info}; } +TensorStorageInfoPtrList ViewCalcImplNew(const tensor::BaseTensorPtr &input_tensor, const std::vector &shape) { + MS_EXCEPTION_IF_NULL(input_tensor); + if (input_tensor->storage_info() == nullptr) { + auto old_strides = GetOriStrides(input_tensor->shape()); + const auto &new_shape = update_shape(input_tensor->shape(), shape); + const auto &new_strides = GetOriStrides(new_shape); + auto new_storage_info = std::make_shared(new_shape, new_strides, 0, input_tensor->shape(), + old_strides, IsContiguous(new_shape, new_strides)); + return {new_storage_info}; + } else { + auto storage_info = input_tensor->storage_info(); + const auto &new_shape = update_shape(storage_info->shape, shape); + const auto &new_strides = GetOriStrides(new_shape); + auto new_storage_info = + std::make_shared(new_shape, new_strides, storage_info->storage_offset, storage_info->ori_shape, + storage_info->ori_strides, IsContiguous(new_shape, new_strides)); + return {new_storage_info}; + } +} + TensorStorageInfoPtrList ViewCalc(const PrimitivePtr &prim, const std::vector &inputs) { auto input_tensor = inputs[kInputIndex0]->cast(); MS_EXCEPTION_IF_NULL(input_tensor); diff --git a/mindspore/ops/view/view_strides_calc.h b/mindspore/ops/view/view_strides_calc.h index c30324ddcd4..62d149f4661 100644 --- a/mindspore/ops/view/view_strides_calc.h +++ b/mindspore/ops/view/view_strides_calc.h @@ -25,6 +25,8 @@ namespace ops { OPS_API TensorStorageInfoPtrList ViewCalc(const PrimitivePtr &prim, const std::vector &inputs); OPS_API TensorStorageInfoPtrList ViewCalcImpl(const PrimitivePtr &prim, const tensor::BaseTensorPtr &input_tensor, const std::vector &shape); +OPS_API TensorStorageInfoPtrList ViewCalcImplNew(const tensor::BaseTensorPtr &input_tensor, + const std::vector &shape); OPS_API TensorStorageInfoPtrList ViewCalcImplWithoutStorageInfo(const tensor::BaseTensorPtr &input_tensor, const std::vector &shape); } // namespace ops diff --git a/mindspore/python/mindspore/ops/function/array_func.py b/mindspore/python/mindspore/ops/function/array_func.py index 01b9f06ff43..a7342019533 100644 --- a/mindspore/python/mindspore/ops/function/array_func.py +++ b/mindspore/python/mindspore/ops/function/array_func.py @@ -114,6 +114,7 @@ from mindspore._c_expression import pyboost_empty from mindspore._c_expression import pyboost_empty_like from mindspore._c_expression import pyboost_new_empty from mindspore._c_expression import pyboost_reshapesimple +from mindspore._c_expression import pyboost_view_simple arg_max_with_value_ = ArgMaxWithValue() arg_min_with_value_ = ArgMinWithValue() @@ -525,6 +526,10 @@ def reshape_simple(input, shape): return pyboost_reshapesimple([input, shape]) +def view_simple(input, shape): + return pyboost_view_simple([input, shape]) + + def empty(*size, dtype=None, device=None): r""" Creates a tensor with uninitialized data, whose shape, dtype and device are described by the argument `size`, -- Gitee From 6bc5e22626f53383af2c2138293fb48d8991df5b Mon Sep 17 00:00:00 2001 From: pengqi Date: Fri, 4 Apr 2025 09:16:33 +0800 Subject: [PATCH 5/9] create tensor with StorageInfo without DeviceAddress, --- .../ccsrc/pyboost/customize/reshape_simple.cc | 28 ++++++++++--------- .../ccsrc/pyboost/customize/view_simple.cc | 27 +++++++++--------- mindspore/ccsrc/pyboost/pyboost_utils.cc | 15 ++++++++++ mindspore/ccsrc/pyboost/pyboost_utils.h | 3 ++ 4 files changed, 47 insertions(+), 26 deletions(-) diff --git a/mindspore/ccsrc/pyboost/customize/reshape_simple.cc b/mindspore/ccsrc/pyboost/customize/reshape_simple.cc index b35eabc18a7..433957dd121 100644 --- a/mindspore/ccsrc/pyboost/customize/reshape_simple.cc +++ b/mindspore/ccsrc/pyboost/customize/reshape_simple.cc @@ -54,19 +54,21 @@ tensor::BaseTensorPtr ReshapeSimpleCustomize(const DeviceContext *device_context if (!storage_info_list.empty()) { // Create device address for input tensors tensor::BaseTensorPtrList outputs; - auto addr = input_tensor->device_address(); - if (addr != nullptr) { - PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list, &outputs); - } else { - PyBoostUtils::PrepareOpInputs(device_context, stream_id, input_tensor); - PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list, &outputs); - // Async - PyBoostUtils::DispatchRun(std::make_shared([device_context, input_tensor]() { - MS_LOG(DEBUG) << "View device task View start"; - PyBoostUtils::MallocOpInputsForView(device_context, input_tensor); - MS_LOG(DEBUG) << "View device task View end"; - })); - } + PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list[0], &outputs); + + // auto addr = input_tensor->device_address(); + // if (addr != nullptr) { + // PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list, &outputs); + // } else { + // PyBoostUtils::PrepareOpInputs(device_context, stream_id, input_tensor); + // PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list, &outputs); + // // Async + // PyBoostUtils::DispatchRun(std::make_shared([device_context, input_tensor]() { + // MS_LOG(DEBUG) << "View device task View start"; + // PyBoostUtils::MallocOpInputsForView(device_context, input_tensor); + // MS_LOG(DEBUG) << "View device task View end"; + // })); + // } MS_LOG(DEBUG) << "View View Call end"; return outputs[0]; } else { diff --git a/mindspore/ccsrc/pyboost/customize/view_simple.cc b/mindspore/ccsrc/pyboost/customize/view_simple.cc index 4dfa8efb14d..459fd8ae665 100644 --- a/mindspore/ccsrc/pyboost/customize/view_simple.cc +++ b/mindspore/ccsrc/pyboost/customize/view_simple.cc @@ -34,19 +34,20 @@ tensor::BaseTensorPtr ViewSimpleCustomize(const DeviceContext *device_context, c if (!storage_info_list.empty()) { // Create device address for input tensors tensor::BaseTensorPtrList outputs; - auto addr = input_tensor->device_address(); - if (addr != nullptr) { - PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list, &outputs); - } else { - PyBoostUtils::PrepareOpInputs(device_context, stream_id, input_tensor); - PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list, &outputs); - // Async - PyBoostUtils::DispatchRun(std::make_shared([device_context, input_tensor]() { - MS_LOG(DEBUG) << "View device task View start"; - PyBoostUtils::MallocOpInputsForView(device_context, input_tensor); - MS_LOG(DEBUG) << "View device task View end"; - })); - } + PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list[0], &outputs); + // auto addr = input_tensor->device_address(); + // if (addr != nullptr) { + // PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list, &outputs); + // } else { + // PyBoostUtils::PrepareOpInputs(device_context, stream_id, input_tensor); + // PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list, &outputs); + // // Async + // PyBoostUtils::DispatchRun(std::make_shared([device_context, input_tensor]() { + // MS_LOG(DEBUG) << "View device task View start"; + // PyBoostUtils::MallocOpInputsForView(device_context, input_tensor); + // MS_LOG(DEBUG) << "View device task View end"; + // })); + // } MS_LOG(DEBUG) << "View View Call end"; return outputs[0]; } else { diff --git a/mindspore/ccsrc/pyboost/pyboost_utils.cc b/mindspore/ccsrc/pyboost/pyboost_utils.cc index 31f00b49f00..60bd95a339d 100644 --- a/mindspore/ccsrc/pyboost/pyboost_utils.cc +++ b/mindspore/ccsrc/pyboost/pyboost_utils.cc @@ -258,6 +258,21 @@ void PyBoostUtils::CreateOutputTensor(const DeviceContext *device_context, const MS_LOG(DEBUG) << "Create output tensor " << output_tensor->ToString() << " with " << storage_info->ToString(); } +void PyBoostUtils::CreateOutputTensorForView(const DeviceContext *device_context, const tensor::BaseTensorPtr &input, + const TensorStorageInfoPtr &storage_info, + std::vector *outputs) { + runtime::ProfilerRecorder profiler(runtime::ProfilerModule::kPynative, + runtime::ProfilerEvent::kPyBoostCreateOutputTensor, "CreateOutputTensorForView", + false); + auto output_tensor = std::make_shared(input->data_type(), storage_info->shape, input->data_ptr()); + output_tensor->set_need_pipeline_sync(true); + output_tensor->set_contiguous_callback( + [](const DeviceSyncPtr &device_address) -> DeviceSyncPtr { return ContiguousByDeviceAddress(device_address); }); + output_tensor->set_storage_info(storage_info); + (void)outputs->emplace_back(output_tensor); + MS_LOG(DEBUG) << "Create output tensor " << output_tensor->ToString() << " with " << storage_info->ToString(); +} + AbstractBasePtr PyBoostUtils::InferByOpDef(const PrimitivePtr &prim, const std::vector &input_abs) { MS_EXCEPTION_IF_NULL(prim); runtime::ProfilerRecorder profiler(runtime::ProfilerModule::kPynative, runtime::ProfilerEvent::kPyBoostInferByOpDef, diff --git a/mindspore/ccsrc/pyboost/pyboost_utils.h b/mindspore/ccsrc/pyboost/pyboost_utils.h index ff15410e1c1..f80e743915d 100644 --- a/mindspore/ccsrc/pyboost/pyboost_utils.h +++ b/mindspore/ccsrc/pyboost/pyboost_utils.h @@ -57,6 +57,9 @@ class PYBOOST_API PyBoostUtils { static void CreateOutputTensor(const AbstractBasePtr &abstract, std::vector *outputs); static void CreateOutputTensor(const DeviceContext *device_context, const tensor::BaseTensorPtr &input, const TensorStorageInfoPtr &storage_info, std::vector *outputs); + static void CreateOutputTensorForView(const DeviceContext *device_context, const tensor::BaseTensorPtr &input, + const TensorStorageInfoPtr &storage_info, + std::vector *outputs); static void CreateOutputTensor(const DeviceContext *device_context, const tensor::BaseTensorPtr &input, const TensorStorageInfoPtrList &storage_info_list, std::vector *outputs); -- Gitee From 398f1a6bb4cc5403ae52debd64bfe4042b198399 Mon Sep 17 00:00:00 2001 From: pengqi Date: Fri, 4 Apr 2025 10:29:17 +0800 Subject: [PATCH 6/9] CreateOutputTensorForView --- mindspore/ccsrc/pyboost/customize/reshape_simple.cc | 2 +- mindspore/ccsrc/pyboost/customize/view_simple.cc | 2 +- mindspore/ccsrc/pyboost/pyboost_utils.cc | 2 +- mindspore/ccsrc/pyboost/pyboost_utils.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mindspore/ccsrc/pyboost/customize/reshape_simple.cc b/mindspore/ccsrc/pyboost/customize/reshape_simple.cc index 433957dd121..3c006e52a67 100644 --- a/mindspore/ccsrc/pyboost/customize/reshape_simple.cc +++ b/mindspore/ccsrc/pyboost/customize/reshape_simple.cc @@ -54,7 +54,7 @@ tensor::BaseTensorPtr ReshapeSimpleCustomize(const DeviceContext *device_context if (!storage_info_list.empty()) { // Create device address for input tensors tensor::BaseTensorPtrList outputs; - PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list[0], &outputs); + PyBoostUtils::CreateOutputTensorForView(input_tensor, storage_info_list[0], &outputs); // auto addr = input_tensor->device_address(); // if (addr != nullptr) { diff --git a/mindspore/ccsrc/pyboost/customize/view_simple.cc b/mindspore/ccsrc/pyboost/customize/view_simple.cc index 459fd8ae665..44664cc189b 100644 --- a/mindspore/ccsrc/pyboost/customize/view_simple.cc +++ b/mindspore/ccsrc/pyboost/customize/view_simple.cc @@ -34,7 +34,7 @@ tensor::BaseTensorPtr ViewSimpleCustomize(const DeviceContext *device_context, c if (!storage_info_list.empty()) { // Create device address for input tensors tensor::BaseTensorPtrList outputs; - PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list[0], &outputs); + PyBoostUtils::CreateOutputTensorForView(input_tensor, storage_info_list[0], &outputs); // auto addr = input_tensor->device_address(); // if (addr != nullptr) { // PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list, &outputs); diff --git a/mindspore/ccsrc/pyboost/pyboost_utils.cc b/mindspore/ccsrc/pyboost/pyboost_utils.cc index 60bd95a339d..346d8c936d7 100644 --- a/mindspore/ccsrc/pyboost/pyboost_utils.cc +++ b/mindspore/ccsrc/pyboost/pyboost_utils.cc @@ -258,7 +258,7 @@ void PyBoostUtils::CreateOutputTensor(const DeviceContext *device_context, const MS_LOG(DEBUG) << "Create output tensor " << output_tensor->ToString() << " with " << storage_info->ToString(); } -void PyBoostUtils::CreateOutputTensorForView(const DeviceContext *device_context, const tensor::BaseTensorPtr &input, +void PyBoostUtils::CreateOutputTensorForView(const tensor::BaseTensorPtr &input, const TensorStorageInfoPtr &storage_info, std::vector *outputs) { runtime::ProfilerRecorder profiler(runtime::ProfilerModule::kPynative, diff --git a/mindspore/ccsrc/pyboost/pyboost_utils.h b/mindspore/ccsrc/pyboost/pyboost_utils.h index f80e743915d..8ca5f9c0dc9 100644 --- a/mindspore/ccsrc/pyboost/pyboost_utils.h +++ b/mindspore/ccsrc/pyboost/pyboost_utils.h @@ -57,7 +57,7 @@ class PYBOOST_API PyBoostUtils { static void CreateOutputTensor(const AbstractBasePtr &abstract, std::vector *outputs); static void CreateOutputTensor(const DeviceContext *device_context, const tensor::BaseTensorPtr &input, const TensorStorageInfoPtr &storage_info, std::vector *outputs); - static void CreateOutputTensorForView(const DeviceContext *device_context, const tensor::BaseTensorPtr &input, + static void CreateOutputTensorForView(const tensor::BaseTensorPtr &input, const TensorStorageInfoPtr &storage_info, std::vector *outputs); static void CreateOutputTensor(const DeviceContext *device_context, const tensor::BaseTensorPtr &input, -- Gitee From da9d3b7290a9875015b3e7625f4ae8e7ebd8d897 Mon Sep 17 00:00:00 2001 From: pengqi Date: Fri, 4 Apr 2025 10:46:24 +0800 Subject: [PATCH 7/9] reshape sync --- .../customize/reshape_simple_op.cc | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/mindspore/ccsrc/pynative/op_function/customize/reshape_simple_op.cc b/mindspore/ccsrc/pynative/op_function/customize/reshape_simple_op.cc index 14dbb607f4d..d91db2bd8c1 100644 --- a/mindspore/ccsrc/pynative/op_function/customize/reshape_simple_op.cc +++ b/mindspore/ccsrc/pynative/op_function/customize/reshape_simple_op.cc @@ -31,7 +31,7 @@ #include "pynative/op_function/customize/direct_ops.h" namespace mindspore::pynative { -py::object Pyboost_Reshape_Simple(const py::list &args) { +py::object Pyboost_Reshape_Simple_Save(const py::list &args) { runtime::ProfilerRecorder profiler(runtime::ProfilerModule::kPynative, runtime::ProfilerEvent::kRunOp, "ReshapeSimple", false, true); MS_LOG(DEBUG) << "Run Pyboost_Reshape_Simple start"; @@ -90,4 +90,32 @@ py::object Pyboost_Reshape_Simple(const py::list &args) { MS_LOG(DEBUG) << "Run Pyboost_Reshape_Simple end"; return py::reinterpret_steal(tensor::TransformOutput(py_output)); } + +py::object Pyboost_Reshape_Simple(const py::list &args) { + runtime::ProfilerRecorder profiler(runtime::ProfilerModule::kPynative, runtime::ProfilerEvent::kRunOp, + "ReshapeSimple", false, true); + MS_LOG(DEBUG) << "Run Pyboost_Reshape_Simple start"; + static Converter converter(&ops::gReshape); + converter.Parse(args); + auto input = converter.ToTensor(args, kIndex0); + const auto &shape = converter.ToIntVector(args, kIndex1); + + auto py_output = tensor::MakeTuple(); + auto promises = tensor::TransformPromise(py_output); + auto requires_grad = GradState::Get().RequiresGrad(); + MS_LOG(DEBUG) << "Run frontend task Pyboost_Reshape_Simple start"; + // stub tensor to tensor. + auto input_tensor = PyNativeAlgo::Common::ConvertStubNodeToTensor(input, false, requires_grad); + // Run op + auto output = + kernel::pyboost::ReshapeSimpleCustomize(runtime::OpRunner::GetDeviceContext("Ascend"), 0, input_tensor, shape); + // Set output value and grad info + auto real_out = PyNativeAlgo::AutoGradUtil::MakeOutputNew( + requires_grad, output, + requires_grad ? PyNativeAlgo::Common::GetPyNativeExecutor()->grad_executor()->top_cell()->op_index() : 0, + input_tensor); + tensor::SetPromise(promises, output); + MS_LOG(DEBUG) << "Run Pyboost_Reshape_Simple end"; + return py::reinterpret_steal(tensor::TransformOutput(py_output)); +} } // namespace mindspore::pynative -- Gitee From 3a4ccbc48faa0232dd27d23db001b06d44549f3e Mon Sep 17 00:00:00 2001 From: pengqi Date: Fri, 4 Apr 2025 12:06:10 +0800 Subject: [PATCH 8/9] reshape create output tensor --- .../ccsrc/pyboost/customize/reshape_simple.cc | 33 ++++++++----------- mindspore/ops/view/view_strides_calc.cc | 9 +++-- mindspore/ops/view/view_strides_calc.h | 4 +-- 3 files changed, 20 insertions(+), 26 deletions(-) diff --git a/mindspore/ccsrc/pyboost/customize/reshape_simple.cc b/mindspore/ccsrc/pyboost/customize/reshape_simple.cc index 3c006e52a67..63f3e70ae45 100644 --- a/mindspore/ccsrc/pyboost/customize/reshape_simple.cc +++ b/mindspore/ccsrc/pyboost/customize/reshape_simple.cc @@ -50,27 +50,22 @@ tensor::BaseTensorPtr ReshapeSimpleCustomize(const DeviceContext *device_context } // Contiguous MS_LOG(DEBUG) << "View View Call start"; - auto storage_info_list = ops::ViewCalcImplWithoutStorageInfo(input_tensor, shape); - if (!storage_info_list.empty()) { + auto storage_info = ops::ViewCalcImplWithoutStorageInfo(input_tensor, shape); + if (storage_info) { // Create device address for input tensors - tensor::BaseTensorPtrList outputs; - PyBoostUtils::CreateOutputTensorForView(input_tensor, storage_info_list[0], &outputs); - - // auto addr = input_tensor->device_address(); - // if (addr != nullptr) { - // PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list, &outputs); - // } else { - // PyBoostUtils::PrepareOpInputs(device_context, stream_id, input_tensor); - // PyBoostUtils::CreateOutputTensor(device_context, input_tensor, storage_info_list, &outputs); - // // Async - // PyBoostUtils::DispatchRun(std::make_shared([device_context, input_tensor]() { - // MS_LOG(DEBUG) << "View device task View start"; - // PyBoostUtils::MallocOpInputsForView(device_context, input_tensor); - // MS_LOG(DEBUG) << "View device task View end"; - // })); - // } + runtime::ProfilerRecorder profiler(runtime::ProfilerModule::kPynative, + runtime::ProfilerEvent::kPyBoostCreateOutputTensor, "CreateOutputTensorForView", + false); + tensor::BaseTensorPtr output_tensor = + std::make_shared(input_tensor->data_type(), storage_info->shape, input_tensor->data_ptr()); + output_tensor->set_need_pipeline_sync(true); + output_tensor->set_contiguous_callback([](const DeviceSyncPtr &device_address) -> DeviceSyncPtr { + return PyBoostUtils::ContiguousByDeviceAddress(device_address); + }); + output_tensor->set_storage_info(std::move(storage_info)); + MS_LOG(DEBUG) << "Create output tensor " << output_tensor->ToString() << " with storage_info."; MS_LOG(DEBUG) << "View View Call end"; - return outputs[0]; + return output_tensor; } else { MS_LOG_EXCEPTION << "View unsupported: Reshape_Simple or input ERROR"; } diff --git a/mindspore/ops/view/view_strides_calc.cc b/mindspore/ops/view/view_strides_calc.cc index 3817b3a1333..891380c86db 100644 --- a/mindspore/ops/view/view_strides_calc.cc +++ b/mindspore/ops/view/view_strides_calc.cc @@ -53,17 +53,16 @@ ShapeVector update_shape(const ShapeVector &input_shape, ShapeVector shape) { return shape; } -TensorStorageInfoPtrList ViewCalcImplWithoutStorageInfo(const tensor::BaseTensorPtr &input_tensor, - const std::vector &shape) { +TensorStorageInfoPtr ViewCalcImplWithoutStorageInfo(const tensor::BaseTensorPtr &input_tensor, + const std::vector &shape) { MS_EXCEPTION_IF_NULL(input_tensor); auto old_strides = GetOriStrides(input_tensor->shape()); const auto &old_shape = input_tensor->shape(); const auto &new_shape = update_shape(old_shape, shape); const auto &new_strides = GetOriStrides(new_shape); - auto new_storage_info = std::make_shared(new_shape, new_strides, 0, input_tensor->shape(), - old_strides, IsContiguous(new_shape, new_strides)); - return {new_storage_info}; + return std::make_shared(new_shape, new_strides, 0, input_tensor->shape(), old_strides, + IsContiguous(new_shape, new_strides)); } TensorStorageInfoPtrList ViewCalcImplWithStorageInfo(const tensor::BaseTensorPtr &input_tensor, diff --git a/mindspore/ops/view/view_strides_calc.h b/mindspore/ops/view/view_strides_calc.h index 62d149f4661..5958bc29554 100644 --- a/mindspore/ops/view/view_strides_calc.h +++ b/mindspore/ops/view/view_strides_calc.h @@ -27,8 +27,8 @@ OPS_API TensorStorageInfoPtrList ViewCalcImpl(const PrimitivePtr &prim, const te const std::vector &shape); OPS_API TensorStorageInfoPtrList ViewCalcImplNew(const tensor::BaseTensorPtr &input_tensor, const std::vector &shape); -OPS_API TensorStorageInfoPtrList ViewCalcImplWithoutStorageInfo(const tensor::BaseTensorPtr &input_tensor, - const std::vector &shape); +OPS_API TensorStorageInfoPtr ViewCalcImplWithoutStorageInfo(const tensor::BaseTensorPtr &input_tensor, + const std::vector &shape); } // namespace ops } // namespace mindspore -- Gitee From 57c5ff2f7539e8c5d6b56c0ada2f6b5256f07c0e Mon Sep 17 00:00:00 2001 From: pengqi Date: Thu, 10 Apr 2025 09:51:24 +0800 Subject: [PATCH 9/9] reshape simple save --- .../ccsrc/pyboost/customize/reshape_simple.cc | 29 +++++++++++++++-- .../ccsrc/pyboost/customize/reshape_simple.h | 7 ++-- .../customize/reshape_simple_op.cc | 32 +++++++++++-------- 3 files changed, 50 insertions(+), 18 deletions(-) diff --git a/mindspore/ccsrc/pyboost/customize/reshape_simple.cc b/mindspore/ccsrc/pyboost/customize/reshape_simple.cc index 63f3e70ae45..31bc29d1a2a 100644 --- a/mindspore/ccsrc/pyboost/customize/reshape_simple.cc +++ b/mindspore/ccsrc/pyboost/customize/reshape_simple.cc @@ -28,8 +28,9 @@ namespace mindspore { namespace kernel { namespace pyboost { -tensor::BaseTensorPtr ReshapeSimpleCustomize(const DeviceContext *device_context, const size_t &stream_id, - const BaseTensorPtr &input_tensor, const std::vector &shape) { +tensor::BaseTensorPtr ReshapeSimpleCustomize_Save(const DeviceContext *device_context, const size_t &stream_id, + const BaseTensorPtr &input_tensor, + const std::vector &shape) { auto old_storage_info = input_tensor->storage_info(); // Uncontiguous if (old_storage_info != nullptr && !old_storage_info->is_contiguous) { @@ -70,6 +71,30 @@ tensor::BaseTensorPtr ReshapeSimpleCustomize(const DeviceContext *device_context MS_LOG_EXCEPTION << "View unsupported: Reshape_Simple or input ERROR"; } } + +tensor::BaseTensorPtr ReshapeSimpleCustomize(const BaseTensorPtr &input_tensor, const std::vector &shape) { + // Contiguous + MS_LOG(DEBUG) << "View View Call start"; + auto storage_info = ops::ViewCalcImplWithoutStorageInfo(input_tensor, shape); + if (storage_info) { + // Create device address for input tensors + runtime::ProfilerRecorder profiler(runtime::ProfilerModule::kPynative, + runtime::ProfilerEvent::kPyBoostCreateOutputTensor, "CreateOutputTensorForView", + false); + tensor::BaseTensorPtr output_tensor = + std::make_shared(input_tensor->data_type(), storage_info->shape, input_tensor->data_ptr()); + output_tensor->set_need_pipeline_sync(true); + output_tensor->set_contiguous_callback([](const DeviceSyncPtr &device_address) -> DeviceSyncPtr { + return PyBoostUtils::ContiguousByDeviceAddress(device_address); + }); + output_tensor->set_storage_info(std::move(storage_info)); + MS_LOG(DEBUG) << "Create output tensor " << output_tensor->ToString() << " with storage_info."; + MS_LOG(DEBUG) << "View View Call end"; + return output_tensor; + } else { + MS_LOG_EXCEPTION << "View unsupported: Reshape_Simple or input ERROR"; + } +} } // namespace pyboost } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/pyboost/customize/reshape_simple.h b/mindspore/ccsrc/pyboost/customize/reshape_simple.h index d426a806b77..65c17c67b3c 100644 --- a/mindspore/ccsrc/pyboost/customize/reshape_simple.h +++ b/mindspore/ccsrc/pyboost/customize/reshape_simple.h @@ -27,8 +27,11 @@ namespace mindspore { namespace kernel { namespace pyboost { -tensor::BaseTensorPtr PYBOOST_API ReshapeSimpleCustomize(const DeviceContext *device_context, const size_t &stream_id, - const BaseTensorPtr &input_tensor, +tensor::BaseTensorPtr PYBOOST_API ReshapeSimpleCustomize_Save(const DeviceContext *device_context, + const size_t &stream_id, + const BaseTensorPtr &input_tensor, + const std::vector &shape); +tensor::BaseTensorPtr PYBOOST_API ReshapeSimpleCustomize(const BaseTensorPtr &input_tensor, const std::vector &shape); } // namespace pyboost } // namespace kernel diff --git a/mindspore/ccsrc/pynative/op_function/customize/reshape_simple_op.cc b/mindspore/ccsrc/pynative/op_function/customize/reshape_simple_op.cc index d91db2bd8c1..02d52e2d663 100644 --- a/mindspore/ccsrc/pynative/op_function/customize/reshape_simple_op.cc +++ b/mindspore/ccsrc/pynative/op_function/customize/reshape_simple_op.cc @@ -60,7 +60,7 @@ py::object Pyboost_Reshape_Simple_Save(const py::list &args) { kernel::pyboost::RequireGradGuard require_grad_guard(op_run_info->requires_grad); // Run op - auto output = kernel::pyboost::ReshapeSimpleCustomize( + auto output = kernel::pyboost::ReshapeSimpleCustomize_Save( runtime::OpRunner::GetDeviceContext(op_run_info->base_op_run_info.device_target), op_run_info->base_op_run_info.stream_id, input_tensor, shape); @@ -102,19 +102,23 @@ py::object Pyboost_Reshape_Simple(const py::list &args) { auto py_output = tensor::MakeTuple(); auto promises = tensor::TransformPromise(py_output); - auto requires_grad = GradState::Get().RequiresGrad(); - MS_LOG(DEBUG) << "Run frontend task Pyboost_Reshape_Simple start"; - // stub tensor to tensor. - auto input_tensor = PyNativeAlgo::Common::ConvertStubNodeToTensor(input, false, requires_grad); - // Run op - auto output = - kernel::pyboost::ReshapeSimpleCustomize(runtime::OpRunner::GetDeviceContext("Ascend"), 0, input_tensor, shape); - // Set output value and grad info - auto real_out = PyNativeAlgo::AutoGradUtil::MakeOutputNew( - requires_grad, output, - requires_grad ? PyNativeAlgo::Common::GetPyNativeExecutor()->grad_executor()->top_cell()->op_index() : 0, - input_tensor); - tensor::SetPromise(promises, output); + + pynative::DispatchOp(std::make_shared( + [input, shape, promises]() { + auto requires_grad = GradState::Get().RequiresGrad(); + MS_LOG(DEBUG) << "Run frontend task Pyboost_Reshape_Simple start"; + // stub tensor to tensor. + auto input_tensor = PyNativeAlgo::Common::ConvertStubNodeToTensor(input, false, requires_grad); + // Run op + auto output = kernel::pyboost::ReshapeSimpleCustomize(input_tensor, shape); + // Set output value and grad info + auto real_out = PyNativeAlgo::AutoGradUtil::MakeOutputNew( + requires_grad, output, + requires_grad ? PyNativeAlgo::Common::GetPyNativeExecutor()->grad_executor()->top_cell()->op_index() : 0, + input_tensor); + tensor::SetPromise(promises, output); + }, + [promises]() { tensor::SetException(promises); })); MS_LOG(DEBUG) << "Run Pyboost_Reshape_Simple end"; return py::reinterpret_steal(tensor::TransformOutput(py_output)); } -- Gitee