From ca69b4c80ab021272a5bb96ac42bb865187aee44 Mon Sep 17 00:00:00 2001 From: ascend Date: Sat, 3 Jun 2023 12:13:39 +0800 Subject: [PATCH] feat: use torch cache tensor --- core/base/ops_runner.cpp | 158 +++++++++++------- core/base/plan.cpp | 2 +- core/base/runner.cpp | 28 +++- core/include/acltransformer/base/ops_runner.h | 14 +- core/include/acltransformer/operation.h | 2 +- .../acltransformer/ops/add_norm_operation.h | 2 +- .../acltransformer/ops/add_operation.h | 2 +- .../acltransformer/ops/ffn_operation.h | 6 +- .../acltransformer/ops/linear_operation.h | 6 +- .../ops/self_attention_operation.h | 2 +- core/include/acltransformer/runner.h | 12 +- .../acltransformer/utils/tensor_util.h | 3 +- core/ops/add/add_operation.cpp | 2 +- core/ops/add/add_ops_runner.cpp | 2 +- core/ops/add/add_ops_runner.h | 4 +- core/ops/add/add_torch_runner.cpp | 17 +- core/ops/add/add_torch_runner.h | 4 +- core/ops/add_norm/add_norm_operation.cpp | 2 +- core/ops/add_norm/add_norm_ops_runner.cpp | 12 +- core/ops/add_norm/add_norm_ops_runner.h | 8 +- core/ops/add_norm/add_norm_torch_runner.cpp | 25 ++- core/ops/add_norm/add_norm_torch_runner.h | 4 +- core/ops/ffn/ffn_operation.cpp | 6 +- core/ops/ffn/ffn_ops_runner.cpp | 8 +- core/ops/ffn/ffn_ops_runner.h | 3 + core/ops/ffn/ffn_torch_runner.cpp | 17 +- core/ops/ffn/ffn_torch_runner.h | 5 +- core/ops/linear/linear_operation.cpp | 6 +- core/ops/linear/linear_ops_runner.cpp | 8 +- core/ops/linear/linear_ops_runner.h | 6 +- core/ops/linear/linear_torch_runner.cpp | 11 +- core/ops/linear/linear_torch_runner.h | 4 +- .../self_attention_operation.cpp | 2 +- .../self_attention_ops_runner.cpp | 5 + .../self_attention_ops_runner.h | 3 + .../self_attention_torch_runner.cpp | 18 +- .../self_attention_torch_runner.h | 4 +- core/utils/tensor_util.cpp | 21 ++- .../{ops => }/operation/operation_creator.cpp | 29 +++- .../{ops => }/operation/operation_creator.h | 0 .../{ops => }/operation/operation_torch.cpp | 62 ++++++- .../{ops => }/operation/operation_torch.h | 9 +- .../test_add_norm_operation.py} | 17 +- .../test_add_norm_operation_data.py} | 0 .../test_add_operation.py} | 24 +-- .../test_linear_operation.py} | 13 +- .../test_linear_operation_data.py} | 0 .../torch/ops/add/add_operation_torch.cpp | 53 ------ examples/torch/ops/add/add_operation_torch.h | 37 ---- .../ops/add_norm/add_norm_operation_torch.cpp | 60 ------- .../ops/add_norm/add_norm_operation_torch.h | 37 ---- .../add_norm/test_add_norm_operation_rand.py | 55 ------ .../torch/ops/ffn/ffn_operation_torch.cpp | 65 ------- examples/torch/ops/ffn/ffn_operation_torch.h | 34 ---- .../torch/ops/ffn/test_ffn_operation_torch.py | 83 --------- .../ops/linear/linear_operation_torch.cpp | 79 --------- .../torch/ops/linear/linear_operation_torch.h | 35 ---- .../test_linear_operation_torch_rand.py | 51 ------ .../self_attention_operation_torch.cpp | 68 -------- .../self_attention_operation_torch.h | 40 ----- .../test_self_attention_operation_torch.py | 75 --------- tests/unittest/ops/add/test_add.cpp | 4 +- 62 files changed, 380 insertions(+), 994 deletions(-) rename examples/torch/{ops => }/operation/operation_creator.cpp (68%) rename examples/torch/{ops => }/operation/operation_creator.h (100%) rename examples/torch/{ops => }/operation/operation_torch.cpp (58%) rename examples/torch/{ops => }/operation/operation_torch.h (72%) rename examples/torch/{ops/operation/test_add_norm_operation_rand.py => operation/test_add_norm_operation.py} (81%) rename examples/torch/{ops/add_norm/test_add_norm_operation_torch.py => operation/test_add_norm_operation_data.py} (100%) rename examples/torch/{ops/add/test_add_operation_torch.py => operation/test_add_operation.py} (72%) rename examples/torch/{ops/operation/test_linear_operation_torch_rand.py => operation/test_linear_operation.py} (75%) rename examples/torch/{ops/linear/test_linear_operation_torch.py => operation/test_linear_operation_data.py} (100%) delete mode 100644 examples/torch/ops/add/add_operation_torch.cpp delete mode 100644 examples/torch/ops/add/add_operation_torch.h delete mode 100644 examples/torch/ops/add_norm/add_norm_operation_torch.cpp delete mode 100644 examples/torch/ops/add_norm/add_norm_operation_torch.h delete mode 100644 examples/torch/ops/add_norm/test_add_norm_operation_rand.py delete mode 100644 examples/torch/ops/ffn/ffn_operation_torch.cpp delete mode 100644 examples/torch/ops/ffn/ffn_operation_torch.h delete mode 100644 examples/torch/ops/ffn/test_ffn_operation_torch.py delete mode 100644 examples/torch/ops/linear/linear_operation_torch.cpp delete mode 100644 examples/torch/ops/linear/linear_operation_torch.h delete mode 100644 examples/torch/ops/linear/test_linear_operation_torch_rand.py delete mode 100644 examples/torch/ops/self_attention/self_attention_operation_torch.cpp delete mode 100644 examples/torch/ops/self_attention/self_attention_operation_torch.h delete mode 100644 examples/torch/ops/self_attention/test_self_attention_operation_torch.py diff --git a/core/base/ops_runner.cpp b/core/base/ops_runner.cpp index a1c29020..0ed2000d 100644 --- a/core/base/ops_runner.cpp +++ b/core/base/ops_runner.cpp @@ -71,24 +71,29 @@ OpsRunner::~OpsRunner() } } -AsdOps::Status OpsRunner::Setup(VariantPack &variantPack) +AsdOps::Status OpsRunner::SetupImpl(const VariantPack &variantPack) { + AsdOps::Status st = SetupKernelGraph(variantPack); + if (!st.Ok()) { + return st; + } + InitTensorMaxNodeMap(); ASD_LOG(INFO) << GetName() << " Setup start, kernel graph:" << kernelGraph_.ToString(); Reset(); - if (!PlanKernel(variantPack)) { - ASD_LOG(ERROR) << GetName() << " PlanKernel fail"; - return AsdOps::Status::FailStatus(1, "PlanKernel fail"); + if (!PlanKernelGraph(variantPack)) { + ASD_LOG(ERROR) << GetName() << " PlanKernelGraph fail"; + return AsdOps::Status::FailStatus(1, "PlanKernelGraph fail"); } FillTilingData(variantPack); return AsdOps::Status::OkStatus(); } -uint64_t OpsRunner::GetWorkspaceSize() { return intermediateSize_ + tilingData_.size() + workspaceSize_; } +uint64_t OpsRunner::GetWorkspaceSizeImpl() { return intermediateSize_ + tilingData_.size() + workspaceSize_; } -AsdOps::Status OpsRunner::Execute(Handle &handle, VariantPack &variantPack) +AsdOps::Status OpsRunner::ExecuteImpl(Handle &handle, VariantPack &variantPack) { ASD_LOG(INFO) << GetName() << " execute start, intermediateSize:" << intermediateSize_ << ", tilingSize:" << tilingData_.size() << ", workspaceSize:" << workspaceSize_; @@ -174,79 +179,100 @@ void OpsRunner::Reset() memAllocatinSolver_->Reset(); } -bool OpsRunner::PlanKernel(const VariantPack &variantPack) +bool OpsRunner::PlanKernelGraph(const VariantPack &variantPack) { kernelGraph_.inTensors = variantPack.inTensors; kernelGraph_.outTensors = variantPack.outTensors; for (size_t nodeId = 0; nodeId < kernelGraph_.nodes.size(); ++nodeId) { - auto &node = kernelGraph_.nodes.at(nodeId); - const AsdOps::OpDesc &opDesc = node.opDesc; - AsdOps::Operation *op = AsdOps::Ops::Instance().GetOperationByName(opDesc.opName); - if (op == nullptr) { - ASD_LOG(ERROR) << GetName() << " get operation by name fail, opName:" << opDesc.opName; + if (!PlanOneKernel(nodeId)) { return false; } + } - node.kernelRunInfo.SetOpDesc(opDesc); - for (const auto tensorIt : node.inTensors) { - node.kernelRunInfo.AddInTensor(*tensorIt); - } - for (size_t i = 0; i < node.outTensors.size(); ++i) { - AsdOps::Tensor tensor; - node.kernelRunInfo.AddOutTensor(tensor); - } - ASD_LOG(INFO) << GetName() << " " << opDesc.opName << " infer shape start, runinfo:\n" - << AsdOpsRunInfoToString(node.kernelRunInfo); - AsdOps::Status st = op->InferShape(node.kernelRunInfo); - if (!st.Ok()) { - ASD_LOG(ERROR) << opDesc.opName << " infer shape fail, error:" << st.Message(); - return false; - } - ASD_LOG(INFO) << GetName() << " " << opDesc.opName << " infer shape success, runinfo:\n" - << AsdOpsRunInfoToString(node.kernelRunInfo); + intermediateSize_ = memAllocatinSolver_->GetSize(); + ASD_LOG(INFO) << GetName() << " MemAllocationSolver malloc size:" << memAllocatinSolver_->GetMallocSize() + << ", real size:" << intermediateSize_; - for (size_t i = 0; i < node.outTensors.size(); ++i) { - AsdOps::Tensor *outTensor = node.outTensors.at(i); - AsdOps::Tensor &runInfoOutTensor = node.kernelRunInfo.GetOutTensor(i); - if (IsInternalTensor(outTensor)) { - outTensor->desc = runInfoOutTensor.desc; - outTensor->dataSize = CalcTensorDataSize(runInfoOutTensor); - outTensor->data = memAllocatinSolver_->Malloc(outTensor->dataSize); - } - runInfoOutTensor = *outTensor; - } + return true; +} - ASD_LOG(INFO) << GetName() << " after mem allo solver, runinfo:\n" << AsdOpsRunInfoToString(node.kernelRunInfo); +bool OpsRunner::PlanOneKernel(size_t nodeId) +{ + auto &node = kernelGraph_.nodes.at(nodeId); + const AsdOps::OpDesc &opDesc = node.opDesc; - AsdOps::Tactic *tactic = op->GetBestTactic(node.kernelRunInfo); - if (tactic == nullptr) { - ASD_LOG(ERROR) << GetName() << " " << opDesc.opName - << " get best tactic fail, tactic count:" << op->GetTacticCount(); - return false; - } + AsdOps::Operation *op = AsdOps::Ops::Instance().GetOperationByName(opDesc.opName); + if (op == nullptr) { + ASD_LOG(ERROR) << GetName() << " get operation by name fail, opName:" << opDesc.opName; + return false; + } - node.kernel = tactic->GetBestKernel(node.kernelRunInfo); - if (node.kernel == nullptr) { - ASD_LOG(ERROR) << GetName() << " " << tactic->GetName() - << " get best kernel fail, kernel count:" << tactic->GetKernelCount(); - return false; - } - ASD_LOG(INFO) << GetName() << " " << opDesc.opName << " best tactic:" << tactic->GetName() - << ", best kernel:" << node.kernel->GetName(); + node.kernelRunInfo.SetOpDesc(opDesc); + for (const auto tensorIt : node.inTensors) { + node.kernelRunInfo.AddInTensor(*tensorIt); + } + for (size_t i = 0; i < node.outTensors.size(); ++i) { + AsdOps::Tensor tensor; + node.kernelRunInfo.AddOutTensor(tensor); + } - auto it = maxNodeIdTensorMap_.find(nodeId); - if (it != maxNodeIdTensorMap_.end()) { - for (auto tensorIt : it->second) { - memAllocatinSolver_->Free((char *)tensorIt->data); + ASD_LOG(INFO) << GetName() << " " << opDesc.opName << " infer shape start, runinfo:\n" + << AsdOpsRunInfoToString(node.kernelRunInfo); + AsdOps::Status st = op->InferShape(node.kernelRunInfo); + if (!st.Ok()) { + ASD_LOG(ERROR) << opDesc.opName << " infer shape fail, error:" << st.Message(); + return false; + } + ASD_LOG(INFO) << GetName() << " " << opDesc.opName << " infer shape success, runinfo:\n" + << AsdOpsRunInfoToString(node.kernelRunInfo); + + for (size_t i = 0; i < node.outTensors.size(); ++i) { + AsdOps::Tensor *outTensor = node.outTensors.at(i); + AsdOps::Tensor &runInfoOutTensor = node.kernelRunInfo.GetOutTensor(i); + if (IsInternalTensor(outTensor)) { + if (runInfoOutTensor.desc.dims.size() != 0) { + outTensor->desc = runInfoOutTensor.desc; + } else { + ASD_LOG(INFO) << GetName() << " " << opDesc.opName << " outTensors[" << i + << "] is internal tensor, infer shape wrong, not use infer shape desc"; } + outTensor->dataSize = CalcTensorDataSize(outTensor->desc); + outTensor->data = memAllocatinSolver_->Malloc(outTensor->dataSize); + ASD_LOG(INFO) << GetName() << " " << opDesc.opName << " outTensors[" << i + << "] is internal tensor, mem solve:" << outTensor->data; + } else { + ASD_LOG(INFO) << GetName() << " " << opDesc.opName << " outTensors[" << i << "] is not internal tensor"; } + runInfoOutTensor = *outTensor; } - intermediateSize_ = memAllocatinSolver_->GetSize(); - ASD_LOG(INFO) << GetName() << " " - << " MemAllocationSolver malloc size:" << memAllocatinSolver_->GetMallocSize() - << ", real size:" << intermediateSize_; + ASD_LOG(INFO) << GetName() << " " << opDesc.opName << " after mem allo solver, runinfo:\n" + << AsdOpsRunInfoToString(node.kernelRunInfo); + + AsdOps::Tactic *tactic = op->GetBestTactic(node.kernelRunInfo); + if (tactic == nullptr) { + ASD_LOG(ERROR) << GetName() << " " << opDesc.opName + << " get best tactic fail, tactic count:" << op->GetTacticCount(); + return false; + } + + node.kernel = tactic->GetBestKernel(node.kernelRunInfo); + if (node.kernel == nullptr) { + ASD_LOG(ERROR) << GetName() << " " << tactic->GetName() + << " get best kernel fail, kernel count:" << tactic->GetKernelCount(); + return false; + } + ASD_LOG(INFO) << GetName() << " " << opDesc.opName << " get best tactic:" << tactic->GetName() + << ", best kernel:" << node.kernel->GetName(); + + auto it = maxNodeIdTensorMap_.find(nodeId); + if (it != maxNodeIdTensorMap_.end()) { + for (auto tensorIt : it->second) { + memAllocatinSolver_->Free((char *)tensorIt->data); + ASD_LOG(INFO) << GetName() << " " << opDesc.opName << " mem free:" << tensorIt->data; + } + } return true; } @@ -271,7 +297,13 @@ void OpsRunner::FillTilingData(const VariantPack &variantPack) for (size_t j = 0; j < workspaces.size(); ++j) { kernelWorkspaceSize += workspaces[i]; } - maxKernelWorkspaceSize = std::max(maxKernelWorkspaceSize, kernelWorkspaceSize); + ASD_LOG(INFO) << GetName() << " " << kernel->GetName() << ", kernelWorkspaceSize:" << kernelWorkspaceSize + << ", maxKernelWorkspaceSize:" << maxKernelWorkspaceSize; + if (kernelWorkspaceSize < 100000) { + maxKernelWorkspaceSize = std::max(maxKernelWorkspaceSize, kernelWorkspaceSize); + } else { + ASD_LOG(ERROR) << GetName() << " " << kernel->GetName() << " kernelWorkspaceSize too large, discard"; + } } } workspaceSize_ = maxKernelWorkspaceSize; diff --git a/core/base/plan.cpp b/core/base/plan.cpp index 5b4a46b0..8381a96b 100644 --- a/core/base/plan.cpp +++ b/core/base/plan.cpp @@ -55,7 +55,7 @@ AsdOps::Status Plan::Setup(Handle handle, const VariantPack &variantPack) for (size_t i = 0; i < node.variantPack.inTensors.size(); ++i) { ASD_LOG(INFO) << "Plan intensor[" << i << "] " << AsdOpsTensorToString(node.variantPack.inTensors.at(i)); } - std::vector outTensorDescs; + AsdOps::SVector outTensorDescs; node.operation->InferShape(node.variantPack.inTensors, outTensorDescs); for (size_t i = 0; i < outTensorDescs.size(); ++i) { ASD_LOG(INFO) << "Plan outTensorDescs[" << i << "] " << AsdOpsTensorDescToString(outTensorDescs.at(i)); diff --git a/core/base/runner.cpp b/core/base/runner.cpp index c1133248..fd4b5d48 100644 --- a/core/base/runner.cpp +++ b/core/base/runner.cpp @@ -22,7 +22,31 @@ Runner::~Runner() {} std::string Runner::GetName() const { return name_; } -AsdOps::Status Runner::Setup(VariantPack &variantPack) { return AsdOps::Status::OkStatus(); } +AsdOps::Status Runner::Setup(const VariantPack &variantPack) +{ + AsdOps::Status st = IsConsistent(variantPack); + if (!st.Ok()) { + return st; + } + return SetupImpl(variantPack); +} -uint64_t Runner::GetWorkspaceSize() { return 0; } +uint64_t Runner::GetWorkspaceSize() { return GetWorkspaceSizeImpl(); } + +AsdOps::Status Runner::Execute(Handle &handle, VariantPack &variantPack) +{ + AsdOps::Status st = IsConsistent(variantPack); + if (!st.Ok()) { + return st; + } + return ExecuteImpl(handle, variantPack); +} + +AsdOps::Status Runner::IsConsistent(const VariantPack &variantPack) { return AsdOps::Status::OkStatus(); } + +AsdOps::Status Runner::SetupImpl(const VariantPack &variantPack) { return AsdOps::Status::OkStatus(); } + +uint64_t Runner::GetWorkspaceSizeImpl() { return 0; } + +AsdOps::Status Runner::ExecuteImpl(Handle &handle, VariantPack &variantPack) { return AsdOps::Status::OkStatus(); } } // namespace AclTransformer \ No newline at end of file diff --git a/core/include/acltransformer/base/ops_runner.h b/core/include/acltransformer/base/ops_runner.h index ae731e90..4555430a 100644 --- a/core/include/acltransformer/base/ops_runner.h +++ b/core/include/acltransformer/base/ops_runner.h @@ -46,13 +46,19 @@ class OpsRunner : public Runner { public: OpsRunner(const std::string &name); virtual ~OpsRunner(); - AsdOps::Status Setup(VariantPack &variantPack) override; - uint64_t GetWorkspaceSize() override; - AsdOps::Status Execute(Handle &handle, VariantPack &variantPack) override; + +protected: + AsdOps::Status SetupImpl(const VariantPack &variantPack) override; + uint64_t GetWorkspaceSizeImpl() override; + AsdOps::Status ExecuteImpl(Handle &handle, VariantPack &variantPack) override; + +protected: + virtual AsdOps::Status SetupKernelGraph(const VariantPack &variantPack) = 0; private: void Reset(); - bool PlanKernel(const VariantPack &variantPack); + bool PlanKernelGraph(const VariantPack &variantPack); + bool PlanOneKernel(size_t nodeId); void FillTilingData(const VariantPack &variantPack); void InitTensorMaxNodeMap(); bool IsInternalTensor(const AsdOps::Tensor *tensor); diff --git a/core/include/acltransformer/operation.h b/core/include/acltransformer/operation.h index 5c01b620..bfcba213 100644 --- a/core/include/acltransformer/operation.h +++ b/core/include/acltransformer/operation.h @@ -32,7 +32,7 @@ public: virtual ~Operation(); std::string GetName() const; virtual AsdOps::Status InferShape(const AsdOps::SVector &inTensors, - std::vector &outTensorDescs) = 0; + AsdOps::SVector &outTensorDescs) = 0; AsdOps::Status Setup(VariantPack &variantPack); uint64_t GetWorkspaceSize(); AsdOps::Status Execute(Handle &handle, VariantPack &variantPack); diff --git a/core/include/acltransformer/ops/add_norm_operation.h b/core/include/acltransformer/ops/add_norm_operation.h index 6a371355..180a8f64 100644 --- a/core/include/acltransformer/ops/add_norm_operation.h +++ b/core/include/acltransformer/ops/add_norm_operation.h @@ -25,7 +25,7 @@ public: AddNormOperation(const AddNormParam ¶m); virtual ~AddNormOperation(); AsdOps::Status InferShape(const AsdOps::SVector &inTensors, - std::vector &outTensorDescs) override; + AsdOps::SVector &outTensorDescs) override; protected: RunnerBuilder *FindBestRunnerBuilder(const VariantPack &variantPack) override; diff --git a/core/include/acltransformer/ops/add_operation.h b/core/include/acltransformer/ops/add_operation.h index dd6eba5f..14b0c122 100644 --- a/core/include/acltransformer/ops/add_operation.h +++ b/core/include/acltransformer/ops/add_operation.h @@ -24,7 +24,7 @@ public: AddOperation(const AddParam ¶m); ~AddOperation(); AsdOps::Status InferShape(const AsdOps::SVector &inTensors, - std::vector &outTensorDescs) override; + AsdOps::SVector &outTensorDescs) override; protected: RunnerBuilder *FindBestRunnerBuilder(const VariantPack &variantPack) override; diff --git a/core/include/acltransformer/ops/ffn_operation.h b/core/include/acltransformer/ops/ffn_operation.h index 7faf6423..7f49c2d4 100644 --- a/core/include/acltransformer/ops/ffn_operation.h +++ b/core/include/acltransformer/ops/ffn_operation.h @@ -25,9 +25,9 @@ public: FfnOperation(const FfnParam ¶m); virtual ~FfnOperation(); AsdOps::Status InferShape(const AsdOps::SVector &inTensors, - std::vector &outTensorDescs) override; - bool IsConsistent(const std::vector &inTensorDescs, - std::vector &outTensorDescs) const; + AsdOps::SVector &outTensorDescs) override; + bool IsConsistent(const AsdOps::SVector &inTensorDescs, + AsdOps::SVector &outTensorDescs) const; int64_t GetTensorBatch(const AsdOps::TensorDesc &tensorDesc) const; int64_t GetTensorH(const AsdOps::TensorDesc &tensorDesc) const; int64_t GetTensorW(const AsdOps::TensorDesc &tensorDesc) const; diff --git a/core/include/acltransformer/ops/linear_operation.h b/core/include/acltransformer/ops/linear_operation.h index cd15c1b9..53442472 100644 --- a/core/include/acltransformer/ops/linear_operation.h +++ b/core/include/acltransformer/ops/linear_operation.h @@ -25,9 +25,9 @@ public: LinearOperation(const LinearParam ¶m); virtual ~LinearOperation(); AsdOps::Status InferShape(const AsdOps::SVector &inTensors, - std::vector &outTensorDescs) override; - bool IsConsistent(const std::vector &inTensorDescs, - std::vector &outTensorDescs) const; + AsdOps::SVector &outTensorDescs) override; + bool IsConsistent(const AsdOps::SVector &inTensorDescs, + AsdOps::SVector &outTensorDescs) const; int64_t GetTensorBatch(const AsdOps::TensorDesc &tensorDesc) const; int64_t GetTensorH(const AsdOps::TensorDesc &tensorDesc) const; int64_t GetTensorW(const AsdOps::TensorDesc &tensorDesc) const; diff --git a/core/include/acltransformer/ops/self_attention_operation.h b/core/include/acltransformer/ops/self_attention_operation.h index 3045a71f..a7128b70 100644 --- a/core/include/acltransformer/ops/self_attention_operation.h +++ b/core/include/acltransformer/ops/self_attention_operation.h @@ -25,7 +25,7 @@ public: SelfAttentionOperation(const SelfAttentionParam ¶m); virtual ~SelfAttentionOperation(); AsdOps::Status InferShape(const AsdOps::SVector &inTensors, - std::vector &outTensorDescs) override; + AsdOps::SVector &outTensorDescs) override; protected: RunnerBuilder *FindBestRunnerBuilder(const VariantPack &variantPack) override; diff --git a/core/include/acltransformer/runner.h b/core/include/acltransformer/runner.h index c69c01a4..9ad2ebc2 100644 --- a/core/include/acltransformer/runner.h +++ b/core/include/acltransformer/runner.h @@ -26,9 +26,15 @@ public: Runner(const std::string &name); virtual ~Runner(); std::string GetName() const; - virtual AsdOps::Status Setup(VariantPack &variantPack); - virtual uint64_t GetWorkspaceSize(); - virtual AsdOps::Status Execute(Handle &handle, VariantPack &variantPack) = 0; + AsdOps::Status Setup(const VariantPack &variantPack); + uint64_t GetWorkspaceSize(); + AsdOps::Status Execute(Handle &handle, VariantPack &variantPack); + +private: + virtual AsdOps::Status IsConsistent(const VariantPack &variantPack); + virtual AsdOps::Status SetupImpl(const VariantPack &variantPack); + virtual uint64_t GetWorkspaceSizeImpl(); + virtual AsdOps::Status ExecuteImpl(Handle &handle, VariantPack &variantPack); private: std::string name_; diff --git a/core/include/acltransformer/utils/tensor_util.h b/core/include/acltransformer/utils/tensor_util.h index 21af0edf..d47ceda7 100644 --- a/core/include/acltransformer/utils/tensor_util.h +++ b/core/include/acltransformer/utils/tensor_util.h @@ -23,8 +23,9 @@ #include "acltransformer/handle.h" namespace AclTransformer { -void GetTensorDescs(const std::vector &tensors, std::vector &tensorDescs); +void GetTensorDescs(const std::vector &tensors, AsdOps::SVector &tensorDescs); uint64_t CalcTensorDataSize(const AsdOps::Tensor &tensor); +uint64_t CalcTensorDataSize(const AsdOps::TensorDesc &tensorDesc); at::Tensor AsdOpsTensor2AtTensor(Handle handle, const AsdOps::Tensor &asdTensor); at::Tensor AsdOpsTensor2AtTensorCache(Handle handle, const AsdOps::Tensor &asdTensor); at::Tensor AsdOpsTensor2AtCpuTensor(Handle handle, const AsdOps::Tensor &asdTensor); diff --git a/core/ops/add/add_operation.cpp b/core/ops/add/add_operation.cpp index bd530c96..2a203aac 100644 --- a/core/ops/add/add_operation.cpp +++ b/core/ops/add/add_operation.cpp @@ -27,7 +27,7 @@ AddOperation::AddOperation(const AddParam ¶m) : Operation("AddOperation"), p AddOperation::~AddOperation() {} AsdOps::Status AddOperation::InferShape(const AsdOps::SVector &inTensors, - std::vector &outTensorDescs) + AsdOps::SVector &outTensorDescs) { if (inTensors.size() != 2) { return AsdOps::Status::FailStatus(1, "inTensorDescs size is not 2"); diff --git a/core/ops/add/add_ops_runner.cpp b/core/ops/add/add_ops_runner.cpp index 9b8a9052..67d1b870 100644 --- a/core/ops/add/add_ops_runner.cpp +++ b/core/ops/add/add_ops_runner.cpp @@ -27,7 +27,7 @@ AddOpsRunner::AddOpsRunner(const AddParam ¶m) : OpsRunner("AddOpsRunner"), p AddOpsRunner::~AddOpsRunner() {} -AsdOps::Status AddOpsRunner::Setup(VariantPack &variantPack) +AsdOps::Status AddOpsRunner::SetupKernelGraph(const VariantPack &variantPack) { if (param_.scale == 1) { kernelGraph_.inTensors.resize(2); diff --git a/core/ops/add/add_ops_runner.h b/core/ops/add/add_ops_runner.h index 8b811556..41aa6868 100644 --- a/core/ops/add/add_ops_runner.h +++ b/core/ops/add/add_ops_runner.h @@ -23,7 +23,9 @@ class AddOpsRunner : public OpsRunner { public: AddOpsRunner(const AddParam ¶m); virtual ~AddOpsRunner(); - AsdOps::Status Setup(VariantPack &variantPack) override; + +protected: + AsdOps::Status SetupKernelGraph(const VariantPack &variantPack) override; private: AddParam param_; diff --git a/core/ops/add/add_torch_runner.cpp b/core/ops/add/add_torch_runner.cpp index 5ffffd5b..11697311 100644 --- a/core/ops/add/add_torch_runner.cpp +++ b/core/ops/add/add_torch_runner.cpp @@ -18,23 +18,20 @@ #include #include #include "acltransformer/utils/tensor_util.h" +#include "acltransformer/utils/tensor_cache.h" namespace AclTransformer { AddTorchRunner::AddTorchRunner(const AddParam ¶m) : Runner("AddTorchRunner"), param_(param) {} AddTorchRunner::~AddTorchRunner() {} -AsdOps::Status AddTorchRunner::Execute(Handle &handle, VariantPack &variantPack) +AsdOps::Status AddTorchRunner::ExecuteImpl(Handle &handle, VariantPack &variantPack) { - ASD_LOG(INFO) << GetName() << " Execute start"; - at::Tensor atInTensorA = AsdOpsTensor2AtTensor(handle, variantPack.inTensors[0]); - at::Tensor atInTensorB = AsdOpsTensor2AtTensor(handle, variantPack.inTensors[1]); - at::Tensor addResultTensor = at::add(atInTensorA, atInTensorB).contiguous(); - int ret = AsdRtMemCopyAsync(variantPack.outTensors[0].data, variantPack.outTensors[0].dataSize, - addResultTensor.storage().data_ptr().get(), variantPack.outTensors[0].dataSize, - ASDRT_MEMCOPY_DEVICE_TO_DEVICE, handle.stream); - ASD_LOG_IF(ret != 0, ERROR) << GetName() << " AsdRtMemCopy fail"; - ASD_LOG(INFO) << GetName() << " Execute end"; + at::Tensor *atInTensorA = AsdOps::GetSingleton().GetTensor(variantPack.inTensors.at(0).data); + at::Tensor *atInTensorB = AsdOps::GetSingleton().GetTensor(variantPack.inTensors.at(1).data); + at::Tensor *addResultTensor = AsdOps::GetSingleton().GetTensor(variantPack.outTensors.at(0).data); + *addResultTensor = torch::add(*atInTensorA, *atInTensorB); + return AsdOps::Status::OkStatus(); } } // namespace AclTransformer \ No newline at end of file diff --git a/core/ops/add/add_torch_runner.h b/core/ops/add/add_torch_runner.h index 39a29184..b54e3f6a 100644 --- a/core/ops/add/add_torch_runner.h +++ b/core/ops/add/add_torch_runner.h @@ -23,7 +23,9 @@ class AddTorchRunner : public Runner { public: AddTorchRunner(const AddParam ¶m); virtual ~AddTorchRunner(); - AsdOps::Status Execute(Handle &handle, VariantPack &variantPack) override; + +private: + AsdOps::Status ExecuteImpl(Handle &handle, VariantPack &variantPack) override; private: AddParam param_; diff --git a/core/ops/add_norm/add_norm_operation.cpp b/core/ops/add_norm/add_norm_operation.cpp index e930d55c..d12041c2 100644 --- a/core/ops/add_norm/add_norm_operation.cpp +++ b/core/ops/add_norm/add_norm_operation.cpp @@ -27,7 +27,7 @@ AddNormOperation::AddNormOperation(const AddNormParam ¶m) : Operation("AddNo AddNormOperation::~AddNormOperation() {} AsdOps::Status AddNormOperation::InferShape(const AsdOps::SVector &inTensors, - std::vector &outTensorDescs) + AsdOps::SVector &outTensorDescs) { if (inTensors.size() != 4) { return AsdOps::Status::FailStatus(1, "inTensorDescs size is not 4"); diff --git a/core/ops/add_norm/add_norm_ops_runner.cpp b/core/ops/add_norm/add_norm_ops_runner.cpp index 85538f67..8dbee900 100644 --- a/core/ops/add_norm/add_norm_ops_runner.cpp +++ b/core/ops/add_norm/add_norm_ops_runner.cpp @@ -28,7 +28,7 @@ AddNormOpsRunner::AddNormOpsRunner(const AddNormParam ¶m) : OpsRunner("AddNo AddNormOpsRunner::~AddNormOpsRunner() {} -AsdOps::Status AddNormOpsRunner::Setup(VariantPack &variantPack) +AsdOps::Status AddNormOpsRunner::SetupKernelGraph(const VariantPack &variantPack) { kernelGraph_.inTensors.resize(4); AsdOps::Tensor &xTensor = kernelGraph_.inTensors.at(0); @@ -64,10 +64,10 @@ AsdOps::Status AddNormOpsRunner::Setup(VariantPack &variantPack) layerNormNode.inTensors = {&addNodeResultTensor, &weightTensor, &biasTensor}; layerNormNode.outTensors = {&resultTensor, &layerNormMeanTensor, &layerNormVarianceTensor}; - return OpsRunner::Setup(variantPack); + return AsdOps::Status::OkStatus(); } -bool AddNormOpsRunner::CalcLayerNormTensor(VariantPack &variantPack, int64_t &beginDim) +bool AddNormOpsRunner::CalcLayerNormTensor(const VariantPack &variantPack, int64_t &beginDim) { AsdOps::TensorDesc inputDesc; inputDesc.dtype = variantPack.inTensors.at(0).desc.dtype; @@ -77,8 +77,8 @@ bool AddNormOpsRunner::CalcLayerNormTensor(VariantPack &variantPack, int64_t &be inputDesc.dims = variantPack.inTensors.at(1).desc.dims; } - AsdOps::Tensor &weightTensor = variantPack.inTensors.at(2); - AsdOps::Tensor &biasTensor = variantPack.inTensors.at(3); + const AsdOps::Tensor &weightTensor = variantPack.inTensors.at(2); + const AsdOps::Tensor &biasTensor = variantPack.inTensors.at(3); ASD_LOG(INFO) << GetName() << " layer norm input desc:" << AsdOpsTensorDescToString(inputDesc) << ", weightTensor:" << AsdOpsTensorToString(weightTensor) @@ -93,6 +93,7 @@ bool AddNormOpsRunner::CalcLayerNormTensor(VariantPack &variantPack, int64_t &be ASD_LOG(INFO) << GetName() << " M:" << M; if (M < 0) { + layerNormMeanTensor.desc.format = inputDesc.format; layerNormMeanTensor.desc.dtype = inputDesc.dtype; layerNormMeanTensor.desc.dims = {M}; layerNormVarianceTensor.desc = layerNormMeanTensor.desc; @@ -114,6 +115,7 @@ bool AddNormOpsRunner::CalcLayerNormTensor(VariantPack &variantPack, int64_t &be break; } } + layerNormMeanTensor.desc.format = weightTensor.desc.format; layerNormMeanTensor.desc.dtype = weightTensor.desc.dtype; layerNormMeanTensor.desc.dims = reduceDims; layerNormVarianceTensor.desc = layerNormMeanTensor.desc; diff --git a/core/ops/add_norm/add_norm_ops_runner.h b/core/ops/add_norm/add_norm_ops_runner.h index 7cd73221..eb713582 100644 --- a/core/ops/add_norm/add_norm_ops_runner.h +++ b/core/ops/add_norm/add_norm_ops_runner.h @@ -23,12 +23,10 @@ class AddNormOpsRunner : public OpsRunner { public: AddNormOpsRunner(const AddNormParam ¶m); virtual ~AddNormOpsRunner(); - AsdOps::Status Setup(VariantPack &variantPack) override; - // uint64_t GetWorkspaceSize() override; - // AsdOps::Status Execute(Handle &handle, VariantPack &variantPack) override; -private: - bool CalcLayerNormTensor(VariantPack &variantPack, int64_t &beginDim); +protected: + AsdOps::Status SetupKernelGraph(const VariantPack &variantPack) override; + bool CalcLayerNormTensor(const VariantPack &variantPack, int64_t &beginDim); private: AddNormParam param_; diff --git a/core/ops/add_norm/add_norm_torch_runner.cpp b/core/ops/add_norm/add_norm_torch_runner.cpp index 2608a504..ca6bbedb 100644 --- a/core/ops/add_norm/add_norm_torch_runner.cpp +++ b/core/ops/add_norm/add_norm_torch_runner.cpp @@ -20,30 +20,29 @@ #include #include #include "acltransformer/utils/tensor_util.h" +#include "acltransformer/utils/tensor_cache.h" namespace AclTransformer { AddNormTorchRunner::AddNormTorchRunner(const AddNormParam ¶m) : Runner("AddNormTorchRunner"), param_(param) {} AddNormTorchRunner::~AddNormTorchRunner() {} -AsdOps::Status AddNormTorchRunner::Execute(Handle &handle, VariantPack &variantPack) +AsdOps::Status AddNormTorchRunner::ExecuteImpl(Handle &handle, VariantPack &variantPack) { if (variantPack.inTensors.size() != 4) { return AsdOps::Status::FailStatus(1, "AddNormTorchRunner inTensor num error!"); } - at::Tensor atInTensorA = AsdOpsTensor2AtTensor(handle, variantPack.inTensors[0]); - at::Tensor atInTensorB = AsdOpsTensor2AtTensor(handle, variantPack.inTensors[1]); - at::Tensor atInTensorWeight = AsdOpsTensor2AtTensor(handle, variantPack.inTensors[2]); - at::Tensor atInTensorBias = AsdOpsTensor2AtTensor(handle, variantPack.inTensors[3]); - at::Tensor addResultTensor = at::add(atInTensorA, atInTensorB); - const double eps = 1e-12; - at::Tensor outputTensor = - at::layer_norm(addResultTensor, atInTensorWeight.sizes(), atInTensorWeight, atInTensorBias, eps).contiguous(); - int ret = AsdRtMemCopyAsync(variantPack.outTensors[0].data, variantPack.outTensors[0].dataSize, - outputTensor.storage().data_ptr().get(), variantPack.outTensors[0].dataSize, - ASDRT_MEMCOPY_DEVICE_TO_DEVICE, handle.stream); - ASD_LOG_IF(ret != 0, ERROR) << "AsdRtMemCopy fail"; + at::Tensor *atInTensorA = AsdOps::GetSingleton().GetTensor(variantPack.inTensors[0].data); + at::Tensor *atInTensorB = AsdOps::GetSingleton().GetTensor(variantPack.inTensors[1].data); + at::Tensor *atInTensorWeight = AsdOps::GetSingleton().GetTensor(variantPack.inTensors[2].data); + at::Tensor *atInTensorBias = AsdOps::GetSingleton().GetTensor(variantPack.inTensors[3].data); + at::Tensor *atOutTensor = AsdOps::GetSingleton().GetTensor(variantPack.outTensors[0].data); + + *atOutTensor = at::layer_norm(at::add(*atInTensorA, *atInTensorB), atInTensorWeight->sizes(), *atInTensorWeight, + *atInTensorBias, param_.layerNormEps) + .contiguous(); + return AsdOps::Status::OkStatus(); } } // namespace AclTransformer \ No newline at end of file diff --git a/core/ops/add_norm/add_norm_torch_runner.h b/core/ops/add_norm/add_norm_torch_runner.h index 3be5c40c..5a2b3ab3 100644 --- a/core/ops/add_norm/add_norm_torch_runner.h +++ b/core/ops/add_norm/add_norm_torch_runner.h @@ -23,7 +23,9 @@ class AddNormTorchRunner : public Runner { public: AddNormTorchRunner(const AddNormParam ¶m); virtual ~AddNormTorchRunner(); - AsdOps::Status Execute(Handle &handle, VariantPack &variantPack) override; + +protected: + AsdOps::Status ExecuteImpl(Handle &handle, VariantPack &variantPack) override; private: AddNormParam param_; diff --git a/core/ops/ffn/ffn_operation.cpp b/core/ops/ffn/ffn_operation.cpp index 75db261c..c26d9db3 100644 --- a/core/ops/ffn/ffn_operation.cpp +++ b/core/ops/ffn/ffn_operation.cpp @@ -33,7 +33,7 @@ FfnOperation::FfnOperation(const FfnParam ¶m) : Operation("FfnOperation"), p FfnOperation::~FfnOperation() {} AsdOps::Status FfnOperation::InferShape(const AsdOps::SVector &inTensors, - std::vector &outTensorDescs) + AsdOps::SVector &outTensorDescs) { if (inTensors.size() != 3) { return AsdOps::Status::FailStatus(1, "inTensorDescs size is not 3"); @@ -47,8 +47,8 @@ AsdOps::Status FfnOperation::InferShape(const AsdOps::SVector &i return AsdOps::Status::OkStatus(); } -bool FfnOperation::IsConsistent(const std::vector &inTensorDescs, - std::vector &outTensorDescs) const +bool FfnOperation::IsConsistent(const AsdOps::SVector &inTensorDescs, + AsdOps::SVector &outTensorDescs) const { ASDOPS_CHECK_TRUE(inTensorDescs.size() == static_cast(DIM_3), return false); ASDOPS_CHECK_TRUE(outTensorDescs.size() == static_cast(DIM_1), return false); diff --git a/core/ops/ffn/ffn_ops_runner.cpp b/core/ops/ffn/ffn_ops_runner.cpp index 7a0fd165..e17cdde2 100644 --- a/core/ops/ffn/ffn_ops_runner.cpp +++ b/core/ops/ffn/ffn_ops_runner.cpp @@ -19,7 +19,9 @@ #include namespace AclTransformer { -FfnOpsRunner::FfnOpsRunner(const FfnParam ¶m) : OpsRunner("FfnOpsRunner"), param_(param) +FfnOpsRunner::FfnOpsRunner(const FfnParam ¶m) : OpsRunner("FfnOpsRunner"), param_(param) {} + +AsdOps::Status FfnOpsRunner::SetupKernelGraph(const VariantPack &variantPack) { kernelGraph_.inTensors.resize(3); AsdOps::Tensor &aTensor = kernelGraph_.inTensors[0]; @@ -63,9 +65,11 @@ FfnOpsRunner::FfnOpsRunner(const FfnParam ¶m) : OpsRunner("FfnOpsRunner"), p addNode.outTensors = {&addOutTensor}; geluNode.opDesc = {0, "ActivationOperation", - AsdOps::OpParam::Activation({AsdOps::OpParam::Activation::ACTIVATION_GELU})}; + AsdOps::OpParam::Activation({AsdOps::OpParam::Activation::ACTIVATION_GELU})}; geluNode.inTensors = {&addOutTensor}; geluNode.outTensors = {&operationOutTensor}; + + return AsdOps::Status::OkStatus(); } FfnOpsRunner::~FfnOpsRunner() {} diff --git a/core/ops/ffn/ffn_ops_runner.h b/core/ops/ffn/ffn_ops_runner.h index c0f0195d..c1c93117 100644 --- a/core/ops/ffn/ffn_ops_runner.h +++ b/core/ops/ffn/ffn_ops_runner.h @@ -24,6 +24,9 @@ public: FfnOpsRunner(const FfnParam ¶m); virtual ~FfnOpsRunner(); +protected: + AsdOps::Status SetupKernelGraph(const VariantPack &variantPack) override; + private: FfnParam param_; }; diff --git a/core/ops/ffn/ffn_torch_runner.cpp b/core/ops/ffn/ffn_torch_runner.cpp index 1bbb6bea..d8d32a11 100644 --- a/core/ops/ffn/ffn_torch_runner.cpp +++ b/core/ops/ffn/ffn_torch_runner.cpp @@ -19,6 +19,7 @@ #include #include #include "acltransformer/utils/tensor_util.h" +#include "acltransformer/utils/tensor_cache.h" namespace AclTransformer { FfnTorchRunner::FfnTorchRunner(const FfnParam ¶m) : Runner("FfnTorchRunner"), param_(param) @@ -28,20 +29,18 @@ FfnTorchRunner::FfnTorchRunner(const FfnParam ¶m) : Runner("FfnTorchRunner") FfnTorchRunner::~FfnTorchRunner() {} -AsdOps::Status FfnTorchRunner::Execute(Handle &handle, VariantPack &variantPack) +AsdOps::Status FfnTorchRunner::ExecuteImpl(Handle &handle, VariantPack &variantPack) { if (variantPack.inTensors.size() != 3) { return AsdOps::Status::FailStatus(1, "FfnTorchRunner inTensor num error!"); } - at::Tensor atInTensorA = AsdOpsTensor2AtTensor(handle, variantPack.inTensors[0]); - at::Tensor atInTensorWeight = AsdOpsTensor2AtTensor(handle, variantPack.inTensors[1]); - at::Tensor atInTensorBias = AsdOpsTensor2AtTensor(handle, variantPack.inTensors[2]); - at::Tensor outTensor = at::gelu(at::linear(atInTensorA, atInTensorWeight, atInTensorBias)).contiguous(); - int ret = AsdRtMemCopyAsync(variantPack.outTensors[0].data, variantPack.outTensors[0].dataSize, - outTensor.storage().data_ptr().get(), variantPack.outTensors[0].dataSize, - ASDRT_MEMCOPY_DEVICE_TO_DEVICE, handle.stream); + at::Tensor *atInTensorA = AsdOps::GetSingleton().GetTensor(variantPack.inTensors[0].data); + at::Tensor *atInTensorWeight = AsdOps::GetSingleton().GetTensor(variantPack.inTensors[1].data); + at::Tensor *atInTensorBias = AsdOps::GetSingleton().GetTensor(variantPack.inTensors[2].data); + at::Tensor *atOutTensor = AsdOps::GetSingleton().GetTensor(variantPack.outTensors[0].data); + + *atOutTensor = at::gelu(at::linear(*atInTensorA, *atInTensorWeight, *atInTensorBias)).contiguous(); - ASD_LOG_IF(ret != 0, ERROR) << "AsdRtMemCopy fail"; return AsdOps::Status::OkStatus(); } } // namespace AclTransformer \ No newline at end of file diff --git a/core/ops/ffn/ffn_torch_runner.h b/core/ops/ffn/ffn_torch_runner.h index c53c074a..5b816f8d 100644 --- a/core/ops/ffn/ffn_torch_runner.h +++ b/core/ops/ffn/ffn_torch_runner.h @@ -23,8 +23,11 @@ class FfnTorchRunner : public Runner { public: FfnTorchRunner(const FfnParam ¶m); virtual ~FfnTorchRunner(); - AsdOps::Status Execute(Handle &handle, VariantPack &variantPack) override; +protected: + AsdOps::Status ExecuteImpl(Handle &handle, VariantPack &variantPack) override; + +private: private: FfnParam param_; }; diff --git a/core/ops/linear/linear_operation.cpp b/core/ops/linear/linear_operation.cpp index 9adf3ea6..4b2c59cd 100644 --- a/core/ops/linear/linear_operation.cpp +++ b/core/ops/linear/linear_operation.cpp @@ -34,7 +34,7 @@ LinearOperation::LinearOperation(const LinearParam ¶m) : Operation("LinearOp LinearOperation::~LinearOperation() {} AsdOps::Status LinearOperation::InferShape(const AsdOps::SVector &inTensors, - std::vector &outTensorDescs) + AsdOps::SVector &outTensorDescs) { // in * weight + bias // in[0,1] + weight[1] @@ -50,8 +50,8 @@ AsdOps::Status LinearOperation::InferShape(const AsdOps::SVector return AsdOps::Status::OkStatus(); } -bool LinearOperation::IsConsistent(const std::vector &inTensorDescs, - std::vector &outTensorDescs) const +bool LinearOperation::IsConsistent(const AsdOps::SVector &inTensorDescs, + AsdOps::SVector &outTensorDescs) const { ASDOPS_CHECK_TRUE(inTensorDescs.size() == static_cast(DIM_3), return false); ASDOPS_CHECK_TRUE(outTensorDescs.size() == static_cast(DIM_1), return false); diff --git a/core/ops/linear/linear_ops_runner.cpp b/core/ops/linear/linear_ops_runner.cpp index a5d0424b..9c996272 100644 --- a/core/ops/linear/linear_ops_runner.cpp +++ b/core/ops/linear/linear_ops_runner.cpp @@ -28,7 +28,7 @@ LinearOpsRunner::LinearOpsRunner(LinearParam ¶m) : OpsRunner("LinearOpsRunne LinearOpsRunner::~LinearOpsRunner() {} -AsdOps::Status LinearOpsRunner::Setup(VariantPack &variantPack) +AsdOps::Status LinearOpsRunner::SetupKernelGraph(const VariantPack &variantPack) { VariantPack newVariantPack; ConvertNewVariantPack(variantPack, newVariantPack); @@ -85,14 +85,14 @@ AsdOps::Status LinearOpsRunner::Setup(VariantPack &variantPack) addNode.inTensors = {&transdata2ResultTensor, &biasTensor}; addNode.outTensors = {&resultTensor}; - return OpsRunner::Setup(newVariantPack); + return AsdOps::Status::OkStatus(); } -AsdOps::Status LinearOpsRunner::Execute(Handle &handle, VariantPack &variantPack) +AsdOps::Status LinearOpsRunner::ExecuteImpl(Handle &handle, VariantPack &variantPack) { VariantPack newVariantPack; ConvertNewVariantPack(variantPack, newVariantPack); - return OpsRunner::Execute(handle, newVariantPack); + return OpsRunner::ExecuteImpl(handle, newVariantPack); } void LinearOpsRunner::ConvertNewVariantPack(const VariantPack &variantPack, VariantPack &newVariantPack) diff --git a/core/ops/linear/linear_ops_runner.h b/core/ops/linear/linear_ops_runner.h index 37fa0a3c..6e4b86a0 100644 --- a/core/ops/linear/linear_ops_runner.h +++ b/core/ops/linear/linear_ops_runner.h @@ -23,8 +23,10 @@ class LinearOpsRunner : public OpsRunner { public: LinearOpsRunner(LinearParam ¶m); virtual ~LinearOpsRunner(); - AsdOps::Status Setup(VariantPack &variantPack) override; - AsdOps::Status Execute(Handle &handle, VariantPack &variantPack) override; + +protected: + AsdOps::Status SetupKernelGraph(const VariantPack &variantPack) override; + AsdOps::Status ExecuteImpl(Handle &handle, VariantPack &variantPack) override; private: void ConvertNewVariantPack(const VariantPack &variantPack, VariantPack &newVariantPack); diff --git a/core/ops/linear/linear_torch_runner.cpp b/core/ops/linear/linear_torch_runner.cpp index 122b5bf9..50e0b7ca 100644 --- a/core/ops/linear/linear_torch_runner.cpp +++ b/core/ops/linear/linear_torch_runner.cpp @@ -28,20 +28,12 @@ LinearTorchRunner::LinearTorchRunner(LinearParam ¶m) : Runner("LinearTorchRu LinearTorchRunner::~LinearTorchRunner() {} -AsdOps::Status LinearTorchRunner::Execute(Handle &handle, VariantPack &variantPack) +AsdOps::Status LinearTorchRunner::ExecuteImpl(Handle &handle, VariantPack &variantPack) { if (variantPack.inTensors.size() != 3) { return AsdOps::Status::FailStatus(1, "LinearTorchRunner inTensor num error!"); } - // at::Tensor atInTensorA = AsdOpsTensor2AtTensor(handle, variantPack.inTensors[0]); - // at::Tensor atInTensorWeight = AsdOpsTensor2AtTensor(handle, variantPack.inTensors[1]); - // at::Tensor atInTensorWeightias = AsdOpsTensor2AtTensor(handle, variantPack.inTensors[2]); - // int ret = AsdRtMemCopyAsync(variantPack.outTensors[0].data, variantPack.outTensors[0].dataSize, - // outputTensor.storage().data_ptr().get(), variantPack.outTensors[0].dataSize, - // ASDRT_MEMCOPY_DEVICE_TO_DEVICE, handle.stream); - // ASD_LOG_IF(ret != 0, ERROR) << GetName() << " AsdRtMemCopy fail"; - at::Tensor *atInTensorA = AsdOps::GetSingleton().GetTensor(variantPack.inTensors[0].data); at::Tensor *atInTensorWeight = @@ -52,7 +44,6 @@ AsdOps::Status LinearTorchRunner::Execute(Handle &handle, VariantPack &variantPa AsdOps::GetSingleton().GetTensor(variantPack.outTensors[0].data); at::Tensor outputTensor = at::linear(*atInTensorA, *atInTensorWeight, *atInTensorWeightias).contiguous(); *atResult = outputTensor; - ASD_LOG(INFO) << GetName() << " use cache tensor"; return AsdOps::Status::OkStatus(); } diff --git a/core/ops/linear/linear_torch_runner.h b/core/ops/linear/linear_torch_runner.h index af1f3227..b21ad7fe 100644 --- a/core/ops/linear/linear_torch_runner.h +++ b/core/ops/linear/linear_torch_runner.h @@ -23,7 +23,9 @@ class LinearTorchRunner : public Runner { public: LinearTorchRunner(LinearParam ¶m); virtual ~LinearTorchRunner(); - AsdOps::Status Execute(Handle &handle, VariantPack &variantPack) override; + +protected: + AsdOps::Status ExecuteImpl(Handle &handle, VariantPack &variantPack) override; private: LinearParam param_; diff --git a/core/ops/self_attention/self_attention_operation.cpp b/core/ops/self_attention/self_attention_operation.cpp index 419e2540..29fade09 100644 --- a/core/ops/self_attention/self_attention_operation.cpp +++ b/core/ops/self_attention/self_attention_operation.cpp @@ -27,7 +27,7 @@ SelfAttentionOperation::SelfAttentionOperation(const SelfAttentionParam ¶m) SelfAttentionOperation::~SelfAttentionOperation() {} AsdOps::Status SelfAttentionOperation::InferShape(const AsdOps::SVector &inTensors, - std::vector &outTensorDescs) + AsdOps::SVector &outTensorDescs) { if (inTensors.size() != 4) { return AsdOps::Status::FailStatus(1, "inTensorDescs size is not 2"); diff --git a/core/ops/self_attention/self_attention_ops_runner.cpp b/core/ops/self_attention/self_attention_ops_runner.cpp index 60061cb2..7337a4d4 100644 --- a/core/ops/self_attention/self_attention_ops_runner.cpp +++ b/core/ops/self_attention/self_attention_ops_runner.cpp @@ -26,5 +26,10 @@ SelfAttentionOpsRunner::SelfAttentionOpsRunner(const SelfAttentionParam ¶m) ASD_LOG(INFO) << "SelfAttentionOperation::SelfAttentionOperation called"; } +AsdOps::Status SelfAttentionOpsRunner::SetupKernelGraph(const VariantPack &variantPack) +{ + return AsdOps::Status::OkStatus(); +} + SelfAttentionOpsRunner::~SelfAttentionOpsRunner() {} } // namespace AclTransformer diff --git a/core/ops/self_attention/self_attention_ops_runner.h b/core/ops/self_attention/self_attention_ops_runner.h index 91be308a..831f098a 100644 --- a/core/ops/self_attention/self_attention_ops_runner.h +++ b/core/ops/self_attention/self_attention_ops_runner.h @@ -24,6 +24,9 @@ public: SelfAttentionOpsRunner(const SelfAttentionParam ¶m); virtual ~SelfAttentionOpsRunner(); +protected: + AsdOps::Status SetupKernelGraph(const VariantPack &variantPack) override; + private: SelfAttentionParam param_; }; diff --git a/core/ops/self_attention/self_attention_torch_runner.cpp b/core/ops/self_attention/self_attention_torch_runner.cpp index ac3884ee..5db00ddc 100644 --- a/core/ops/self_attention/self_attention_torch_runner.cpp +++ b/core/ops/self_attention/self_attention_torch_runner.cpp @@ -19,6 +19,7 @@ #include #include #include +#include "acltransformer/utils/tensor_cache.h" namespace AclTransformer { SelfAttentionTorchRunner::SelfAttentionTorchRunner(const SelfAttentionParam ¶m) @@ -29,16 +30,16 @@ SelfAttentionTorchRunner::SelfAttentionTorchRunner(const SelfAttentionParam &par SelfAttentionTorchRunner::~SelfAttentionTorchRunner() {} -AsdOps::Status SelfAttentionTorchRunner::Execute(Handle &handle, VariantPack &variantPack) +AsdOps::Status SelfAttentionTorchRunner::ExecuteImpl(Handle &handle, VariantPack &variantPack) { // 384, 32, 1024 -> 384, 32, 1024 ASD_LOG(INFO) << "headNum:" << this->param_.headNum << " dk:" << this->param_.dk; - torch::Tensor mixedQuery = AsdOpsTensor2AtTensor(handle, variantPack.inTensors[0]); + torch::Tensor mixedQuery = *AsdOps::GetSingleton().GetTensor(variantPack.inTensors[0].data); mixedQuery = mixedQuery.view({mixedQuery.sizes()[0], mixedQuery.sizes()[1] * this->param_.headNum, mixedQuery.sizes()[2] / this->param_.headNum}); mixedQuery = torch::transpose(mixedQuery, 0, 1); - torch::Tensor mixedKey = AsdOpsTensor2AtTensor(handle, variantPack.inTensors[1]); - torch::Tensor mixedValue = AsdOpsTensor2AtTensor(handle, variantPack.inTensors[2]); + torch::Tensor mixedKey = *AsdOps::GetSingleton().GetTensor(variantPack.inTensors[1].data); + torch::Tensor mixedValue = *AsdOps::GetSingleton().GetTensor(variantPack.inTensors[2].data); mixedValue = mixedValue.view({mixedValue.sizes()[0], mixedValue.sizes()[1] * this->param_.headNum, mixedValue.sizes()[2] / this->param_.headNum}); mixedValue = torch::transpose(mixedValue, 0, 1); @@ -46,7 +47,7 @@ AsdOps::Status SelfAttentionTorchRunner::Execute(Handle &handle, VariantPack &va {mixedKey.sizes()[0], mixedKey.sizes()[1] * this->param_.headNum, mixedKey.sizes()[2] / this->param_.headNum}); mixedKey = mixedKey.permute({1, 2, 0}); - torch::Tensor attention_mask = AsdOpsTensor2AtTensor(handle, variantPack.inTensors[3]); + torch::Tensor attention_mask = *AsdOps::GetSingleton().GetTensor(variantPack.inTensors[3].data); double scal = 1 / sqrt(this->param_.dk); torch::Tensor attentionScores = torch::bmm(mixedQuery, mixedKey).contiguous(); @@ -65,10 +66,9 @@ AsdOps::Status SelfAttentionTorchRunner::Execute(Handle &handle, VariantPack &va contextLayer.sizes()[2] * this->param_.headNum}) .contiguous(); - int ret = AsdRtMemCopyAsync(variantPack.outTensors[0].data, variantPack.outTensors[0].dataSize, - contextLayer.storage().data_ptr().get(), variantPack.outTensors[0].dataSize, - ASDRT_MEMCOPY_DEVICE_TO_DEVICE, handle.stream); - ASD_LOG_IF(ret != 0, ERROR) << "AsdRtMemCopy fail"; + torch::Tensor *atOutTensor = AsdOps::GetSingleton().GetTensor(variantPack.outTensors[0].data); + *atOutTensor = contextLayer; + return AsdOps::Status::OkStatus(); } } // namespace AclTransformer \ No newline at end of file diff --git a/core/ops/self_attention/self_attention_torch_runner.h b/core/ops/self_attention/self_attention_torch_runner.h index 24735920..4e675d22 100644 --- a/core/ops/self_attention/self_attention_torch_runner.h +++ b/core/ops/self_attention/self_attention_torch_runner.h @@ -23,7 +23,9 @@ class SelfAttentionTorchRunner : public Runner { public: SelfAttentionTorchRunner(const SelfAttentionParam ¶m); virtual ~SelfAttentionTorchRunner(); - AsdOps::Status Execute(Handle &handle, VariantPack &variantPack) override; + +protected: + AsdOps::Status ExecuteImpl(Handle &handle, VariantPack &variantPack) override; private: SelfAttentionParam param_; diff --git a/core/utils/tensor_util.cpp b/core/utils/tensor_util.cpp index 17e5d4c5..9050f462 100644 --- a/core/utils/tensor_util.cpp +++ b/core/utils/tensor_util.cpp @@ -22,7 +22,7 @@ #include "acltransformer/utils/tensor_cache.h" namespace AclTransformer { -void GetTensorDescs(const std::vector &tensors, std::vector &tensorDescs) +void GetTensorDescs(const std::vector &tensors, AsdOps::SVector &tensorDescs) { tensorDescs.resize(tensors.size()); for (size_t i = 0; i < tensors.size(); ++i) { @@ -30,16 +30,27 @@ void GetTensorDescs(const std::vector &tensors, std::vector &src) diff --git a/examples/torch/ops/operation/operation_creator.cpp b/examples/torch/operation/operation_creator.cpp similarity index 68% rename from examples/torch/ops/operation/operation_creator.cpp rename to examples/torch/operation/operation_creator.cpp index 472cc2fb..2428c3e0 100644 --- a/examples/torch/ops/operation/operation_creator.cpp +++ b/examples/torch/operation/operation_creator.cpp @@ -20,6 +20,8 @@ #include "acltransformer/ops/add_operation.h" #include "acltransformer/ops/add_norm_operation.h" #include "acltransformer/ops/linear_operation.h" +#include "acltransformer/ops/ffn_operation.h" +#include "acltransformer/ops/self_attention_operation.h" using OperationCreateFunc = std::function; @@ -39,15 +41,32 @@ AclTransformer::Operation *AddNormOperationCreate(const Json::Value ¶mJson) AclTransformer::Operation *LinearOperationCreate(const Json::Value ¶mJson) { - AclTransformer::LinearParam linearParam; - linearParam.transposeA = paramJson["transposeA"].asBool(); - linearParam.transposeB = paramJson["transposeB"].asBool(); - return new AclTransformer::LinearOperation(linearParam); + AclTransformer::LinearParam param; + param.transposeA = paramJson["transposeA"].asBool(); + param.transposeB = paramJson["transposeB"].asBool(); + return new AclTransformer::LinearOperation(param); +} + +AclTransformer::Operation *FfnOperationCreate(const Json::Value ¶mJson) +{ + AclTransformer::FfnParam param; + return new AclTransformer::FfnOperation(param); +} + +AclTransformer::Operation *SelfAttentionOperationCreate(const Json::Value ¶mJson) +{ + AclTransformer::SelfAttentionParam param; + param.transKey = paramJson["transKey"].asBool(); + param.dk = paramJson["dk"].asInt(); + param.headNum = paramJson["headNum"].asInt(); + return new AclTransformer::SelfAttentionOperation(param); } std::map g_funcMap = {{"AddOperation", &AddOperationCreate}, {"AddNormOperation", &AddNormOperationCreate}, - {"LinearOperation", &LinearOperationCreate}}; + {"LinearOperation", &LinearOperationCreate}, + {"FfnOperation", &FfnOperationCreate}, + {"SelfAttentionOperation", &SelfAttentionOperationCreate}}; AclTransformer::Operation *CreateOperation(const std::string &opName, const std::string ¶m) { diff --git a/examples/torch/ops/operation/operation_creator.h b/examples/torch/operation/operation_creator.h similarity index 100% rename from examples/torch/ops/operation/operation_creator.h rename to examples/torch/operation/operation_creator.h diff --git a/examples/torch/ops/operation/operation_torch.cpp b/examples/torch/operation/operation_torch.cpp similarity index 58% rename from examples/torch/ops/operation/operation_torch.cpp rename to examples/torch/operation/operation_torch.cpp index 84d523cc..86c991f4 100644 --- a/examples/torch/ops/operation/operation_torch.cpp +++ b/examples/torch/operation/operation_torch.cpp @@ -17,6 +17,7 @@ #include #include #include "acltransformer/utils/tensor_util.h" +#include "acltransformer/utils/tensor_cache.h" #include "acltransformer/config.h" #include "examples/utils/example_utils.h" #include "operation_creator.h" @@ -27,29 +28,42 @@ OperationTorch::~OperationTorch() {} void OperationTorch::Test() { ASD_LOG(INFO) << "OperationTorch::Test called"; } -void OperationTorch::Execute(std::string opName, std::string param, std::vector atInTensors, - std::vector atOutTensors) +std::vector OperationTorch::Execute(std::string opName, std::string param, + std::vector atInTensors) { + for (auto &inTensor : atInTensors) { + inTensor = inTensor.contiguous(); + } + + std::vector atOutTensors; + AclTransformer::Operation *operation = CreateOperation(opName, param); if (operation == nullptr) { ASD_LOG(ERROR) << "create operation fail, json:" << param; - return; + return atOutTensors; } - delete operation; + ExecuteOperation(operation, atInTensors, atOutTensors); + + delete operation; + return atOutTensors; } -void OperationTorch::ExecuteOperation(AclTransformer::Operation *operation, std::vector atInTensors, - std::vector atOutTensors) +void OperationTorch::ExecuteOperation(AclTransformer::Operation *operation, std::vector &atInTensors, + std::vector &atOutTensors) { AclTransformer::Handle handle = {GetCurrentStream()}; AclTransformer::VariantPack variantPack; for (size_t i = 0; i < atInTensors.size(); ++i) { - atInTensors.at(i) = atInTensors.at(i).contiguous(); variantPack.inTensors.push_back(AtTensor2AsdTensor(atInTensors.at(i))); + AsdOps::GetSingleton().AddTensor(atInTensors.at(i).data_ptr(), &atInTensors.at(i)); } + + CreateAtOutTensors(operation, variantPack.inTensors, atOutTensors); for (size_t i = 0; i < atOutTensors.size(); ++i) { variantPack.outTensors.push_back(AtTensor2AsdTensor(atOutTensors.at(i))); + AsdOps::GetSingleton().AddTensor(atOutTensors.at(i).data_ptr(), + &atOutTensors.at(i)); } AsdOps::Status st = operation->Setup(variantPack); @@ -60,6 +74,7 @@ void OperationTorch::ExecuteOperation(AclTransformer::Operation *operation, std: variantPack.workspaceSize = operation->GetWorkspaceSize(); ASD_LOG(ERROR) << operation->GetName() << " GetWorkspaceSize:" << variantPack.workspaceSize; + if (variantPack.workspaceSize > 0) { int st = AsdRtMemMallocDevice((void **)&variantPack.workspace, variantPack.workspaceSize, ASDRT_MEM_DEFAULT); if (st != ASDRT_SUCCESS) { @@ -84,6 +99,39 @@ void OperationTorch::ExecuteOperation(AclTransformer::Operation *operation, std: variantPack.workspace = nullptr; variantPack.workspaceSize = 0; } + + for (size_t i = 0; i < atInTensors.size(); ++i) { + AsdOps::GetSingleton().DeleteTensor(atInTensors.at(i).data_ptr()); + } + for (size_t i = 0; i < atOutTensors.size(); ++i) { + AsdOps::GetSingleton().DeleteTensor(atOutTensors.at(i).data_ptr()); + } +} + +void OperationTorch::CreateAtOutTensors(AclTransformer::Operation *operation, + const AsdOps::SVector &inTensors, + std::vector &atOutTensors) +{ + AsdOps::SVector outTensorDescs; + operation->InferShape(inTensors, outTensorDescs); + + atOutTensors.resize(outTensorDescs.size()); + for (size_t i = 0; i < outTensorDescs.size(); ++i) { + at::TensorOptions options = at::TensorOptions(); + if (outTensorDescs.at(i).dtype == AsdOps::TENSOR_DTYPE_FLOAT) { + options = options.dtype(at::kFloat); + } else if (outTensorDescs.at(i).dtype == AsdOps::TENSOR_DTYPE_FLOAT16) { + options = options.dtype(at::kHalf); + } + at::Tensor newTensor = + at::zeros(at::IntArrayRef(outTensorDescs.at(i).dims.data(), outTensorDescs.at(i).dims.size()), options); +#ifdef TORCH_18 + newTensor = newTensor.to(at::Device(at::DeviceType::XLA)); +#else + newTensor = newTensor.to(at::Device(at::kPrivateUse1)); +#endif + atOutTensors.at(i) = newTensor.contiguous(); + } } TORCH_LIBRARY(OperationTorch, m) diff --git a/examples/torch/ops/operation/operation_torch.h b/examples/torch/operation/operation_torch.h similarity index 72% rename from examples/torch/ops/operation/operation_torch.h rename to examples/torch/operation/operation_torch.h index 36fd3130..57758fda 100644 --- a/examples/torch/ops/operation/operation_torch.h +++ b/examples/torch/operation/operation_torch.h @@ -25,13 +25,14 @@ public: OperationTorch(); ~OperationTorch(); void Test(); - void Execute(std::string opName, std::string param, std::vector inTensors, - std::vector outTensors); + std::vector Execute(std::string opName, std::string param, std::vector inTensors); c10::intrusive_ptr clone() const { return c10::make_intrusive(); } private: - void ExecuteOperation(AclTransformer::Operation *operation, std::vector atInTensors, - std::vector atOutTensors); + void ExecuteOperation(AclTransformer::Operation *operation, std::vector &atInTensors, + std::vector &atOutTensors); + void CreateAtOutTensors(AclTransformer::Operation *operation, const AsdOps::SVector &inTensors, + std::vector &atOutTensors); }; #endif \ No newline at end of file diff --git a/examples/torch/ops/operation/test_add_norm_operation_rand.py b/examples/torch/operation/test_add_norm_operation.py similarity index 81% rename from examples/torch/ops/operation/test_add_norm_operation_rand.py rename to examples/torch/operation/test_add_norm_operation.py index eaca7e6b..f43eb412 100644 --- a/examples/torch/ops/operation/test_add_norm_operation_rand.py +++ b/examples/torch/operation/test_add_norm_operation.py @@ -32,24 +32,23 @@ class TestAddNormal(unittest.TestCase): def test_2d(self): operation = torch.classes.OperationTorch.OperationTorch() operation.test() - a = torch.rand(2, 3).npu() - b = torch.rand(2, 3).npu() - normWeight = torch.rand(3).npu() - normBias = torch.rand(3).npu() + a = torch.rand(2, 3).npu().half() + b = torch.rand(2, 3).npu().half() + normWeight = torch.rand(3).npu().half() + normBias = torch.rand(3).npu().half() - result = torch.zeros(2, 3).npu() - operation.execute("AddNormOperation", json.dumps( - {"layerNormEps": 1e-12}), [a, b, normWeight, normBias], [result]) + results = operation.execute("AddNormOperation", json.dumps( + {"layerNormEps": 1e-12}), [a, b, normWeight, normBias]) layer_norm = torch.nn.LayerNorm([3]).npu() layer_norm.load_state_dict({"weight": normWeight, "bias": normBias}) golden_result = layer_norm(a + b) - print("result:" + str(result)) + print("result:" + str(results[0])) print("golden_result:" + str(golden_result)) self.assertTrue(torch.allclose( - result, golden_result, rtol=0.02, atol=0.02)) + results[0], golden_result, rtol=0.02, atol=0.02)) if __name__ == '__main__': diff --git a/examples/torch/ops/add_norm/test_add_norm_operation_torch.py b/examples/torch/operation/test_add_norm_operation_data.py similarity index 100% rename from examples/torch/ops/add_norm/test_add_norm_operation_torch.py rename to examples/torch/operation/test_add_norm_operation_data.py diff --git a/examples/torch/ops/add/test_add_operation_torch.py b/examples/torch/operation/test_add_operation.py similarity index 72% rename from examples/torch/ops/add/test_add_operation_torch.py rename to examples/torch/operation/test_add_operation.py index ab4e82b8..18a2bf20 100644 --- a/examples/torch/ops/add/test_add_operation_torch.py +++ b/examples/torch/operation/test_add_operation.py @@ -30,18 +30,20 @@ torch.classes.load_library(LIB_PATH) class TestNormal(unittest.TestCase): def test_2d(self): - operation = torch.classes.AddOperationTorch.AddOperationTorch() + operation = torch.classes.OperationTorch.OperationTorch() operation.test() - a = torch.rand(2, 3).npu() - b = torch.rand(2, 3).npu() - print("a:" + str(a)) - print("b:" + str(b)) - c = operation.execute(a, b) - golden_c = a + b - print("c:" + str(c)) - print("golden_c:" + str(golden_c)) - - self.assertTrue(torch.allclose(c, golden_c, rtol=0.02, atol=0.02)) + a = torch.rand(2, 3).npu().half() + b = torch.rand(2, 3).npu().half() + + results = operation.execute("AddOperation", "{\"scale\": 1}", [a, b]) + + golden_result = a + b + + print("results:", results[0]) + print("golden_result:", str(golden_result)) + + self.assertTrue(torch.allclose( + results[0], golden_result, rtol=0.02, atol=0.02)) if __name__ == '__main__': diff --git a/examples/torch/ops/operation/test_linear_operation_torch_rand.py b/examples/torch/operation/test_linear_operation.py similarity index 75% rename from examples/torch/ops/operation/test_linear_operation_torch_rand.py rename to examples/torch/operation/test_linear_operation.py index bfb76290..cb0ffbb1 100644 --- a/examples/torch/ops/operation/test_linear_operation_torch_rand.py +++ b/examples/torch/operation/test_linear_operation.py @@ -36,20 +36,13 @@ class TestNormal(unittest.TestCase): b = torch.rand(1024, 1024).npu().half() c = torch.rand(1024).npu().half() - if len(a.size()) == 3: - result = torch.zeros(a.size()[0], a.size()[ - 1], b.size()[0]).npu().half() - else: - result = torch.zeros( - {a.size()[0], b.size()[0]}, a.options()).npu().half() - print(result.size()) - operation.execute("LinearOperation", '{"transposeA":false, "transposeB":true}', [ - a, b, c], [result]) + results = operation.execute("LinearOperation", '{"transposeA":false, "transposeB":true}', [ + a, b, c]) golden_result = torch.matmul(a, torch.transpose(b, 0, 1)) + c self.assertTrue(torch.allclose( - result, golden_result, rtol=0.02, atol=0.02)) + results[0], golden_result, rtol=0.02, atol=0.02)) if __name__ == '__main__': diff --git a/examples/torch/ops/linear/test_linear_operation_torch.py b/examples/torch/operation/test_linear_operation_data.py similarity index 100% rename from examples/torch/ops/linear/test_linear_operation_torch.py rename to examples/torch/operation/test_linear_operation_data.py diff --git a/examples/torch/ops/add/add_operation_torch.cpp b/examples/torch/ops/add/add_operation_torch.cpp deleted file mode 100644 index f82b25fe..00000000 --- a/examples/torch/ops/add/add_operation_torch.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "add_operation_torch.h" -#include -#include "acltransformer/ops/add_operation.h" -#include "examples/utils/example_utils.h" - -AddOperationTorch::AddOperationTorch() -{ - ASD_LOG(INFO) << "AddOperationTorch::AddOperationTorch"; - AclTransformer::AddParam param; - operation_ = new AclTransformer::AddOperation(param); -} - -AddOperationTorch::~AddOperationTorch() -{ - if (operation_) { - delete operation_; - operation_ = nullptr; - } -} - -void AddOperationTorch::Test() { ASD_LOG(INFO) << "AddOperationTorch::Test called"; } - -torch::Tensor AddOperationTorch::Execute(torch::Tensor a, torch::Tensor b) -{ - ASD_LOG(INFO) << "AddOperationTorch::Execute start"; - torch::Tensor resultTensor = at::zeros(a.sizes(), a.options()); - ExecuteOperation(operation_, {&a, &b}, {&resultTensor}); - ASD_LOG(INFO) << "AddOperationTorch::Execute end"; - return resultTensor; -} - -TORCH_LIBRARY(AddOperationTorch, m) -{ - m.class_("AddOperationTorch") - .def(torch::init<>()) - .def("test", &AddOperationTorch::Test) - .def("execute", &AddOperationTorch::Execute); -} \ No newline at end of file diff --git a/examples/torch/ops/add/add_operation_torch.h b/examples/torch/ops/add/add_operation_torch.h deleted file mode 100644 index 2bef193a..00000000 --- a/examples/torch/ops/add/add_operation_torch.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef ADD_OPERATION_TORCH_H -#define ADD_OPERATION_TORCH_H -#include -#include - -namespace AclTransformer { -class AddOperation; -} - -class AddOperationTorch : public torch::CustomClassHolder { -public: - AddOperationTorch(); - ~AddOperationTorch(); - void Test(); - torch::Tensor Execute(torch::Tensor a, torch::Tensor b); - c10::intrusive_ptr clone() const { return c10::make_intrusive(); } - -private: - AclTransformer::AddOperation *operation_ = nullptr; -}; - -#endif \ No newline at end of file diff --git a/examples/torch/ops/add_norm/add_norm_operation_torch.cpp b/examples/torch/ops/add_norm/add_norm_operation_torch.cpp deleted file mode 100644 index 358f7f21..00000000 --- a/examples/torch/ops/add_norm/add_norm_operation_torch.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "add_norm_operation_torch.h" -#include -#include "acltransformer/ops/add_norm_operation.h" -#include "examples/utils/example_utils.h" - -AddNormOperationTorch::AddNormOperationTorch() -{ - ASD_LOG(INFO) << "AddNormOperationTorch::AddNormOperationTorch"; - AclTransformer::AddNormParam param; - operation_ = new AclTransformer::AddNormOperation(param); -} - -AddNormOperationTorch::~AddNormOperationTorch() -{ - if (operation_) { - delete operation_; - operation_ = nullptr; - } -} - -void AddNormOperationTorch::Test() { ASD_LOG(INFO) << "AddNormOperationTorch::Test called"; } - -torch::Tensor AddNormOperationTorch::Execute(torch::Tensor a, torch::Tensor b, torch::Tensor normWeight, - torch::Tensor normBias) -{ - a = a.contiguous(); - b = b.contiguous(); - normWeight = normWeight.contiguous(); - normBias = normBias.contiguous(); - ASD_LOG(INFO) << "AddNormOperationTorch::Execute start, a.device.type:" << a.device().type(); - torch::Tensor resultTensor = at::zeros(a.sizes(), a.options()).contiguous(); - ExecuteOperation(operation_, {&a, &b, &normWeight, &normBias}, {&resultTensor}); - ASD_LOG(INFO) << "AddNormOperationTorch::Execute end"; - return resultTensor; - // at::Tensor addResultTensor = at::add(a, b); - // return at::layer_norm(addResultTensor, normWeight.sizes(), normWeight, normBias, 1e-12); -} - -TORCH_LIBRARY(AddNormOperationTorch, m) -{ - m.class_("AddNormOperationTorch") - .def(torch::init<>()) - .def("test", &AddNormOperationTorch::Test) - .def("execute", &AddNormOperationTorch::Execute); -} \ No newline at end of file diff --git a/examples/torch/ops/add_norm/add_norm_operation_torch.h b/examples/torch/ops/add_norm/add_norm_operation_torch.h deleted file mode 100644 index a3ab19bc..00000000 --- a/examples/torch/ops/add_norm/add_norm_operation_torch.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef ADD_NORM_OPERATION_TORCH_H -#define ADD_NORM_OPERATION_TORCH_H -#include -#include - -namespace AclTransformer { -class AddNormOperation; -} - -class AddNormOperationTorch : public torch::CustomClassHolder { -public: - AddNormOperationTorch(); - ~AddNormOperationTorch(); - void Test(); - torch::Tensor Execute(torch::Tensor a, torch::Tensor b, torch::Tensor normWeight, torch::Tensor normBias); - c10::intrusive_ptr clone() const { return c10::make_intrusive(); } - -private: - AclTransformer::AddNormOperation *operation_ = nullptr; -}; - -#endif \ No newline at end of file diff --git a/examples/torch/ops/add_norm/test_add_norm_operation_rand.py b/examples/torch/ops/add_norm/test_add_norm_operation_rand.py deleted file mode 100644 index 283c0b48..00000000 --- a/examples/torch/ops/add_norm/test_add_norm_operation_rand.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import unittest -import os -import json -import torch -import torch_npu - - -ACLTRANSFORMER_HOME_PATH = os.environ.get("ACLTRANSFORMER_HOME_PATH") -if ACLTRANSFORMER_HOME_PATH is None: - raise RuntimeError( - "env ACLTRANSFORMER_HOME_PATH not exist, source set_env.sh") - -LIB_PATH = os.path.join(ACLTRANSFORMER_HOME_PATH, - "examples/libacltransformer_torch.so") -torch.classes.load_library(LIB_PATH) - - -class TestAddNormal(unittest.TestCase): - def test_2d(self): - operation = torch.classes.AddNormOperationTorch.AddNormOperationTorch() - operation.test() - a = torch.rand(2, 3).npu() - b = torch.rand(2, 3).npu() - normWeight = torch.rand(3).npu() - normBias = torch.rand(3).npu() - print("a:" + str(a)) - print("b:" + str(b)) - print("normWeight:" + str(normWeight)) - print("normBias:" + str(normBias)) - c = operation.execute(a, b, normWeight, normBias) - layer_norm = torch.nn.LayerNorm([3]).npu() - layer_norm.load_state_dict({"weight": normWeight, "bias": normBias}) - - golden_c = layer_norm(a + b) - print("c:" + str(c)) - print("golden_c:" + str(golden_c)) - - self.assertTrue(torch.allclose(c, golden_c, rtol=0.02, atol=0.02)) - - -if __name__ == '__main__': - unittest.main() diff --git a/examples/torch/ops/ffn/ffn_operation_torch.cpp b/examples/torch/ops/ffn/ffn_operation_torch.cpp deleted file mode 100644 index 94d184fb..00000000 --- a/examples/torch/ops/ffn/ffn_operation_torch.cpp +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "ffn_operation_torch.h" -#include -#include "acltransformer/ops/ffn_operation.h" -#include "examples/utils/example_utils.h" - -FfnOperationTorch::FfnOperationTorch() -{ - AclTransformer::FfnParam param; - operation_ = new AclTransformer::FfnOperation(param); - ASD_LOG(INFO) << "FfnOperationTorch::FfnOperationTorch"; -} - -FfnOperationTorch::~FfnOperationTorch() -{ - if (operation_) { - delete operation_; - operation_ = nullptr; - } -} - -void FfnOperationTorch::Test() { ASD_LOG(INFO) << "FfnOperationTorch::Test called"; } - -torch::Tensor FfnOperationTorch::Execute(torch::Tensor a, torch::Tensor b, torch::Tensor c) -{ - a = a.contiguous(); - b = b.contiguous(); - c = c.contiguous(); - torch::Tensor resultTensor; - if (a.sizes().size() == 3) { - resultTensor = at::empty({a.sizes()[0], a.sizes()[1], b.sizes()[0]}, a.options()); // to do shape - } else { - resultTensor = at::empty({a.sizes()[0], b.sizes()[0]}, a.options()); - } - resultTensor = resultTensor.contiguous(); - - // at::Tensor outputTensor = at::linear(a, b, c); - // d = at::gelu(outputTensor); - - ExecuteOperation(operation_, {&a, &b, &c}, {&resultTensor}); - ASD_LOG(INFO) << "FfnOperationTorch::Execute end"; - return resultTensor; -} - -TORCH_LIBRARY(FfnOperationTorch, m) -{ - m.class_("FfnOperationTorch") - .def(torch::init<>()) - .def("test", &FfnOperationTorch::Test) - .def("execute", &FfnOperationTorch::Execute); -} \ No newline at end of file diff --git a/examples/torch/ops/ffn/ffn_operation_torch.h b/examples/torch/ops/ffn/ffn_operation_torch.h deleted file mode 100644 index 3cd05dae..00000000 --- a/examples/torch/ops/ffn/ffn_operation_torch.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef FFN_OPERATION_TORCH_H -#define FFN_OPERATION_TORCH_H -#include -#include -#include - -class FfnOperationTorch : public torch::CustomClassHolder { -public: - FfnOperationTorch(); - ~FfnOperationTorch(); - void Test(); - torch::Tensor Execute(torch::Tensor a, torch::Tensor b, torch::Tensor c); - c10::intrusive_ptr clone() const { return c10::make_intrusive(); } - -private: - AclTransformer::FfnOperation *operation_ = nullptr; -}; - -#endif \ No newline at end of file diff --git a/examples/torch/ops/ffn/test_ffn_operation_torch.py b/examples/torch/ops/ffn/test_ffn_operation_torch.py deleted file mode 100644 index 9355f055..00000000 --- a/examples/torch/ops/ffn/test_ffn_operation_torch.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import torch_npu -import torch -import json -import unittest - -import sys -import os - -sys.path.append(os.path.abspath( - os.path.join(os.path.dirname(__file__), "../.."))) -from tensor_testcase import TensorTestCase - -ACLTRANSFORMER_HOME_PATH = os.environ.get("ACLTRANSFORMER_HOME_PATH") -if ACLTRANSFORMER_HOME_PATH is None: - raise RuntimeError( - "env ACLTRANSFORMER_HOME_PATH not exist, source set_env.sh") - -LIB_PATH = os.path.join(ACLTRANSFORMER_HOME_PATH, - "examples/libacltransformer_torch.so") -torch.classes.load_library(LIB_PATH) - - -# class TestNormal(unittest.TestCase): -# def test_2d(self): -# operation = torch.classes.FfnOperationTorch.FfnOperationTorch() -# operation.test() -# a = torch.rand(5, 5).npu() -# b = torch.rand(5, 5).npu() -# c = torch.rand(5, 5).npu() -# print("a:" + str(a)) -# print("b:" + str(b)) -# print("c:" + str(c)) -# d = operation.execute(a, b, c) -# golden_d = torch.nn.functional.gelu(torch.matmul(a, torch.transpose(b, 0, 1)) + c) -# print("d:" + str(d)) -# print("golden_d:" + str(golden_d)) - -# self.assertTrue(torch.allclose(d, golden_d, rtol=0.02, atol=0.02)) - - -class TestBert(unittest.TestCase): - def test_2d(self): - operation = torch.classes.FfnOperationTorch.FfnOperationTorch() - operation.test() - testcase = TensorTestCase('LinearActivation', in_tensor_num=3) - for i in range(1, 101): - testcase.read(i) - in_tensors = testcase.get_in_tensors() - out_tensors = testcase.get_out_tensors() - a = in_tensors[0].npu() - b = in_tensors[1].npu() - c = in_tensors[2].npu() - print(a.size()) - print(b.size()) - print(c.size()) - golden_d = out_tensors[0].npu() - d = operation.execute(a, b, c) - # d = torch.nn.functional.gelu(torch.nn.functional.linear(a, b, c)) - print("d:" + str(d)) - print("golden_d:" + str(golden_d)) - print("d:" + str(d.size())) - print("golden_d:" + str(golden_d.size())) - - self.assertTrue(torch.allclose(d, golden_d, rtol=0.02, atol=0.02)) - - -if __name__ == '__main__': - unittest.main() diff --git a/examples/torch/ops/linear/linear_operation_torch.cpp b/examples/torch/ops/linear/linear_operation_torch.cpp deleted file mode 100644 index 12874ae1..00000000 --- a/examples/torch/ops/linear/linear_operation_torch.cpp +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "linear_operation_torch.h" -#include -#include -#include "acltransformer/ops/linear_operation.h" -#include "examples/utils/example_utils.h" -#include -#include "acltransformer/utils/tensor_cache.h" - -LinearOperationTorch::LinearOperationTorch(std::string param) : param_(param) -{ - ASD_LOG(INFO) << "LinearOperationTorch::LinearOperationTorch"; - Json::Reader paramReader; - Json::Value paramJson; - if (!paramReader.parse(param, paramJson)) { - ASD_LOG(ERROR) << "json parse error"; - } - AclTransformer::LinearParam linearParam; - linearParam.transposeA = paramJson["transposeA"].asBool(); - linearParam.transposeB = paramJson["transposeB"].asBool(); - operation_ = new AclTransformer::LinearOperation(linearParam); -} - -LinearOperationTorch::~LinearOperationTorch() -{ - if (operation_) { - delete operation_; - operation_ = nullptr; - } -} - -void LinearOperationTorch::Test() { ASD_LOG(INFO) << "LinearOperationTorch::Test called"; } - -torch::Tensor LinearOperationTorch::Execute(torch::Tensor a, torch::Tensor b, torch::Tensor c) -{ - a = a.contiguous(); - b = b.contiguous(); - c = c.contiguous(); - ASD_LOG(INFO) << "LinearOperationTorch::Execute start"; - ASD_LOG(INFO) << "LinearOperationTorch inTensors[a].options:" << a.options() << ", data:" << a.data_ptr(); - ASD_LOG(INFO) << "LinearOperationTorch inTensors[b].options:" << b.options() << ", data:" << b.data_ptr(); - ASD_LOG(INFO) << "LinearOperationTorch inTensors[c].options:" << c.options() << ", data:" << c.data_ptr(); - - torch::save(b.to(at::Device(at::kCPU)).contiguous(), "b.pth"); - ASD_LOG(INFO) << "LinearOperationTorch save b.pth"; - torch::Tensor resultTensor; - if (a.sizes().size() == 3) { - resultTensor = at::zeros({a.sizes()[0], a.sizes()[1], b.sizes()[0]}, a.options()).contiguous(); - } else { - resultTensor = at::zeros({a.sizes()[0], b.sizes()[0]}, a.options()).contiguous(); - } - ExecuteOperation(operation_, {&a, &b, &c}, {&resultTensor}); - ASD_LOG(INFO) << "LinearOperationTorch::Execute end"; - return resultTensor; - - // return at::linear(a, b, c); -} - -TORCH_LIBRARY(LinearOperationTorch, m) -{ - m.class_("LinearOperationTorch") - .def(torch::init()) - .def("test", &LinearOperationTorch::Test) - .def("execute", &LinearOperationTorch::Execute); -} \ No newline at end of file diff --git a/examples/torch/ops/linear/linear_operation_torch.h b/examples/torch/ops/linear/linear_operation_torch.h deleted file mode 100644 index f66d8543..00000000 --- a/examples/torch/ops/linear/linear_operation_torch.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef LINEAR_OPERATION_TORCH_H -#define LINEAR_OPERATION_TORCH_H -#include -#include -#include - -class LinearOperationTorch : public torch::CustomClassHolder { -public: - LinearOperationTorch(std::string param); - ~LinearOperationTorch(); - void Test(); - torch::Tensor Execute(torch::Tensor a, torch::Tensor b, torch::Tensor c); - c10::intrusive_ptr clone() const { return c10::make_intrusive(param_); } - -private: - AclTransformer::LinearOperation *operation_ = nullptr; - std::string param_; -}; - -#endif \ No newline at end of file diff --git a/examples/torch/ops/linear/test_linear_operation_torch_rand.py b/examples/torch/ops/linear/test_linear_operation_torch_rand.py deleted file mode 100644 index 992aed30..00000000 --- a/examples/torch/ops/linear/test_linear_operation_torch_rand.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import unittest -import os -import json -import torch -import torch_npu - - -ACLTRANSFORMER_HOME_PATH = os.environ.get("ACLTRANSFORMER_HOME_PATH") -if ACLTRANSFORMER_HOME_PATH is None: - raise RuntimeError( - "env ACLTRANSFORMER_HOME_PATH not exist, source set_env.sh") - -LIB_PATH = os.path.join(ACLTRANSFORMER_HOME_PATH, - "examples/libacltransformer_torch.so") -torch.classes.load_library(LIB_PATH) - - -class TestNormal(unittest.TestCase): - def test_2d(self): - param = '{"transposeA":false,"transposeB":true}' - operation = torch.classes.LinearOperationTorch.LinearOperationTorch(param) - operation.test() - a = torch.rand(384, 32, 1024).npu() - b = torch.rand(1024, 1024).npu() - c = torch.rand(1024).npu() - - d = operation.execute(a, b, c) - - golden_d = torch.matmul(a, torch.transpose(b, 0, 1)) + c - - print("d:" + str(d.size())) - print("golden_d:" + str(golden_d.size())) - - self.assertTrue(torch.allclose(d, golden_d, rtol=0.02, atol=0.02)) - - -if __name__ == '__main__': - unittest.main() diff --git a/examples/torch/ops/self_attention/self_attention_operation_torch.cpp b/examples/torch/ops/self_attention/self_attention_operation_torch.cpp deleted file mode 100644 index 56d4aec3..00000000 --- a/examples/torch/ops/self_attention/self_attention_operation_torch.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "self_attention_operation_torch.h" -#include -#include "acltransformer/ops/self_attention_operation.h" -#include "acltransformer/utils/tensor_util.h" -#include "examples/utils/example_utils.h" -#include - -SelfAttentionOperationTorch::SelfAttentionOperationTorch(std::string param) : param_(param) -{ - ASD_LOG(INFO) << "SelfAttentionOperationTorch::SelfAttentionOperationTorch"; - Json::Reader paramReader; - Json::Value paramJson; - if (!paramReader.parse(param, paramJson)) { - ASD_LOG(ERROR) << "json parse error"; - } - AclTransformer::SelfAttentionParam selfAttentionParam; - selfAttentionParam.transKey = paramJson["transKey"].asBool(); - selfAttentionParam.dk = paramJson["dk"].asInt(); - selfAttentionParam.headNum = paramJson["headNum"].asInt(); - this->selfAttentionParam_ = selfAttentionParam; - operation_ = new AclTransformer::SelfAttentionOperation(selfAttentionParam); -} - -SelfAttentionOperationTorch::~SelfAttentionOperationTorch() -{ - if (operation_) { - delete operation_; - operation_ = nullptr; - } -} - -void SelfAttentionOperationTorch::Test() { ASD_LOG(INFO) << "SelfAttentionOperationTorch::Test called"; } - -torch::Tensor SelfAttentionOperationTorch::Execute(torch::Tensor query, torch::Tensor key, torch::Tensor value, - torch::Tensor attentionMask) -{ - query = query.contiguous(); - key = key.contiguous(); - value = value.contiguous(); - attentionMask = attentionMask.contiguous(); - torch::Tensor resultTensor = torch::zeros(query.sizes(), query.options()).contiguous(); - ExecuteOperation(operation_, {&query, &key, &value, &attentionMask}, {&resultTensor}); - ASD_LOG(INFO) << "SelfAttentionOperationTorch::Execute end"; - return resultTensor; -} - -TORCH_LIBRARY(SelfAttentionOperationTorch, m) -{ - m.class_("SelfAttentionOperationTorch") - .def(torch::init()) - .def("test", &SelfAttentionOperationTorch::Test) - .def("execute", &SelfAttentionOperationTorch::Execute); -} \ No newline at end of file diff --git a/examples/torch/ops/self_attention/self_attention_operation_torch.h b/examples/torch/ops/self_attention/self_attention_operation_torch.h deleted file mode 100644 index ed7ad744..00000000 --- a/examples/torch/ops/self_attention/self_attention_operation_torch.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef SELF_ATTENTION_OPERATION_TORCH_H -#define SELF_ATTENTION_OPERATION_TORCH_H -#include -#include -#include "acltransformer/ops/self_attention_operation.h" - -namespace AclTransformer { -class SelfAttentionOperation; -} - -class SelfAttentionOperationTorch : public torch::CustomClassHolder { -public: - SelfAttentionOperationTorch(std::string param); - ~SelfAttentionOperationTorch(); - void Test(); - torch::Tensor Execute(torch::Tensor aquery, torch::Tensor key, torch::Tensor value, torch::Tensor attentionMask); - c10::intrusive_ptr clone() const { return c10::make_intrusive(param_); } - -private: - AclTransformer::SelfAttentionOperation *operation_ = nullptr; - std::string param_; - AclTransformer::SelfAttentionParam selfAttentionParam_; -}; - -#endif \ No newline at end of file diff --git a/examples/torch/ops/self_attention/test_self_attention_operation_torch.py b/examples/torch/ops/self_attention/test_self_attention_operation_torch.py deleted file mode 100644 index a6bfcb40..00000000 --- a/examples/torch/ops/self_attention/test_self_attention_operation_torch.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import unittest -import os -import json -import torch -import torch_npu -import sys -sys.path.append('../..') -from tensor_testcase import TensorTestCase - -ACLTRANSFORMER_HOME_PATH = os.environ.get("ACLTRANSFORMER_HOME_PATH") -if ACLTRANSFORMER_HOME_PATH is None: - raise RuntimeError( - "env ACLTRANSFORMER_HOME_PATH not exist, source set_env.sh") - -LIB_PATH = os.path.join(ACLTRANSFORMER_HOME_PATH, - "examples/libacltransformer_torch.so") -torch.classes.load_library(LIB_PATH) - - -# class TestNormal(unittest.TestCase): -# def test_2d(self): -# param = '{"transKey":true,"dk":64,"headNum":16}' -# operation = torch.classes.SelfAttentionOperationTorch.SelfAttentionOperationTorch(param) -# operation.test() -# query = torch.rand(32, 384, 1024).npu() -# key = torch.rand(32, 384, 1024).npu() -# value = torch.rand(32, 384, 1024).npu() -# mask = torch.rand(32, 1, 1, 384).npu() -# result = operation.execute(query, key, value, mask) -# print("result:" + str(result)) - - -class TestBert(unittest.TestCase): - def test_2d(self): - param = '{"transKey":false,"dk":64,"headNum":16}' - operation = torch.classes.SelfAttentionOperationTorch.SelfAttentionOperationTorch(param) - operation.test() - testcase = TensorTestCase('BertSelfAttention', in_tensor_num=7, out_tensor_num=6) - testcase.read(1) - in_tensors = testcase.get_in_tensors() - out_tensors = testcase.get_out_tensors() - query = in_tensors[4].npu() - key = in_tensors[5].npu() - value = in_tensors[6].npu() - mask = in_tensors[3].npu() - print(query.size()) - print(key.size()) - print(value.size()) - print(mask.size()) - d = operation.execute(query, key, value, mask) - # d = d.view(32, 384, 1024) - golden_d = out_tensors[0].npu() - print("d:" + str(d.size())) - print("golden_d:" + str(golden_d.size())) - print("d:" + str(d)) - print("golden_d:" + str(golden_d)) - - self.assertTrue(torch.allclose(d, golden_d, rtol=0.02, atol=0.02)) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/unittest/ops/add/test_add.cpp b/tests/unittest/ops/add/test_add.cpp index 278931ca..f44931c7 100644 --- a/tests/unittest/ops/add/test_add.cpp +++ b/tests/unittest/ops/add/test_add.cpp @@ -23,9 +23,9 @@ TEST(TestAddOperation, InferShape) { AclTransformer::AddParam param; AclTransformer::AddOperation op(param); - std::vector inTensorDescs = {{AsdOps::TENSOR_DTYPE_FLOAT, AsdOps::TENSOR_FORMAT_ND, {1, 2}}, + AsdOps::SVector inTensorDescs = {{AsdOps::TENSOR_DTYPE_FLOAT, AsdOps::TENSOR_FORMAT_ND, {1, 2}}, {AsdOps::TENSOR_DTYPE_FLOAT, AsdOps::TENSOR_FORMAT_ND, {1, 2}}}; - std::vector outTensorDescs; + AsdOps::SVector outTensorDescs; op.InferShape(inTensorDescs, outTensorDescs); ASSERT_EQ(outTensorDescs.size(), 1); EXPECT_EQ(outTensorDescs.at(0).dtype, AsdOps::TENSOR_DTYPE_FLOAT); -- Gitee