From 51044cb50d6c0d1f864f57e562e75a8b15b4ecf0 Mon Sep 17 00:00:00 2001 From: lotus Date: Sat, 10 Jun 2023 17:19:18 +0800 Subject: [PATCH] feat:Add Layers --- examples/CMakeLists.txt | 1 + examples/layers/CMakeLists.txt | 11 +++ .../layer/bert => layers}/bert_layer.cpp | 27 ++++-- .../layer/layer.h => layers/bert_layer.h} | 20 +++-- .../chatglm6b => layers}/chatglm6b_layer.cpp | 29 +++++-- .../layer.cpp => layers/chatglm6b_layer.h} | 46 +++------- examples/layers/layer.cpp | 76 +++++++++++++++++ examples/layers/layer.h | 14 +++- examples/torch/CMakeLists.txt | 2 +- .../torch/layer/bert/bert_output_layer.cpp | 62 -------------- .../layer/bert/bert_self_attention_layer.cpp | 83 ------------------- examples/torch/layer/layer_torch.cpp | 70 ++++++++++++++-- examples/torch/layer/layer_torch.h | 8 +- .../pythontest/layers/test_chatglm6b_layer.py | 61 ++++++++++++++ tests/pythontest/layers/test_glm_block.py | 71 ---------------- 15 files changed, 296 insertions(+), 285 deletions(-) create mode 100644 examples/layers/CMakeLists.txt rename examples/{torch/layer/bert => layers}/bert_layer.cpp (88%) rename examples/{torch/layer/layer.h => layers/bert_layer.h} (58%) rename examples/{torch/layer/chatglm6b => layers}/chatglm6b_layer.cpp (87%) rename examples/{torch/layer/layer.cpp => layers/chatglm6b_layer.h} (33%) create mode 100644 examples/layers/layer.cpp delete mode 100644 examples/torch/layer/bert/bert_output_layer.cpp delete mode 100644 examples/torch/layer/bert/bert_self_attention_layer.cpp create mode 100644 tests/pythontest/layers/test_chatglm6b_layer.py delete mode 100644 tests/pythontest/layers/test_glm_block.py diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 0309d9ca..eb2db0aa 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(utils) +add_subdirectory(layers) add_subdirectory(torch) \ No newline at end of file diff --git a/examples/layers/CMakeLists.txt b/examples/layers/CMakeLists.txt new file mode 100644 index 00000000..eda69bfb --- /dev/null +++ b/examples/layers/CMakeLists.txt @@ -0,0 +1,11 @@ +file(GLOB_RECURSE SOURCE_FILES "${CMAKE_CURRENT_LIST_DIR}/*.cpp") +include_directories($ENV{PYTHON_INCLUDE_PATH} + $ENV{PYTORCH_INSTALL_PATH}/include + $ENV{PYTORCH_INSTALL_PATH}/include/torch/csrc/api/include + $ENV{PYTORCH_NPU_INSTALL_PATH}/include) + +link_directories($ENV{PYTHON_LIB_PATH} + $ENV{PYTORCH_INSTALL_PATH}/lib + $ENV{PYTORCH_NPU_INSTALL_PATH}/lib) + +add_library(examples_layers OBJECT ${SOURCE_FILES}) diff --git a/examples/torch/layer/bert/bert_layer.cpp b/examples/layers/bert_layer.cpp similarity index 88% rename from examples/torch/layer/bert/bert_layer.cpp rename to examples/layers/bert_layer.cpp index 7cecec7a..eaa92ec7 100644 --- a/examples/torch/layer/bert/bert_layer.cpp +++ b/examples/layers/bert_layer.cpp @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "bert_layer.h" #include #include #include @@ -20,12 +21,25 @@ #include "acltransformer/operation_graph.h" #include "examples/utils/example_util.h" #include "acltransformer/plan_builder.h" -#include "acltransformer/ops/linear_operation.h" +#include "acltransformer/ops/add_operation.h" #include "acltransformer/ops/add_norm_operation.h" +#include "acltransformer/ops/norm_operation.h" +#include "acltransformer/ops/linear_operation.h" #include "acltransformer/ops/self_attention_operation.h" #include "acltransformer/ops/ffn_operation.h" -void BertLayer(const Json::Value ¶mJson, AclTransformer::VariantPack &variantPack) +namespace AclTransformer { +BertLayer::BertLayer() : Layer("BertLayer") {} + +BertLayer::~BertLayer() {} + +AsdOps::Status BertLayer::InferShape(const AsdOps::SVector &inTensors, + AsdOps::SVector &outTensorDescs) +{ + return AsdOps::Status::OkStatus(); +} + +AsdOps::Status BertLayer::Execute(Handle &handle, VariantPack &variantPack) { const uint64_t hiddenStatesId = 0; const uint64_t qLinearWeightId = 1; @@ -61,9 +75,9 @@ void BertLayer(const Json::Value ¶mJson, AclTransformer::VariantPack &varian AclTransformer::LinearParam kLinearParam; AclTransformer::LinearParam vLinearParam; AclTransformer::SelfAttentionParam selfAttentionParam; - selfAttentionParam.transKey = paramJson["transKey"].asBool(); - selfAttentionParam.dk = paramJson["dk"].asInt(); - selfAttentionParam.headNum = paramJson["headNum"].asInt(); + selfAttentionParam.transKey = paramJson_["transKey"].asBool(); + selfAttentionParam.dk = paramJson_["dk"].asInt(); + selfAttentionParam.headNum = paramJson_["headNum"].asInt(); AclTransformer::LinearParam selfOutLinearParam; AclTransformer::AddNormParam selfOutAddNormParam; AclTransformer::FfnParam ffnParam; @@ -134,5 +148,6 @@ void BertLayer(const Json::Value ¶mJson, AclTransformer::VariantPack &varian bertOutAddNormNode.inTensorIds = {bertOutLinearOutId, selfAddNormOutId, bertOutNormWeightId, bertOutNormBiasId}; bertOutAddNormNode.outTensorIds = {bertLayerOutId}; - ExampleUtil::ExecuteOperationGraph(opGraph, variantPack); + return ExecuteOperationGraph(opGraph, variantPack); } +} // namespace AclTransformer diff --git a/examples/torch/layer/layer.h b/examples/layers/bert_layer.h similarity index 58% rename from examples/torch/layer/layer.h rename to examples/layers/bert_layer.h index e14633b2..56d1e791 100644 --- a/examples/torch/layer/layer.h +++ b/examples/layers/bert_layer.h @@ -13,11 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef LAYER_EXECUTER_H -#define LAYER_EXECUTER_H -#include -#include "acltransformer/operation_graph.h" -#include "acltransformer/variant_pack.h" - -bool ExecuteLayer(const std::string &opName, const std::string ¶m, AclTransformer::VariantPack &variantPack); +#ifndef BERTLAYER_H +#define BERTLAYER_H +#include "layer.h" +namespace AclTransformer { +class BertLayer : public Layer { +public: + BertLayer(); + virtual ~BertLayer(); + AsdOps::Status InferShape(const AsdOps::SVector &inTensors, + AsdOps::SVector &outTensorDescs) override; + AsdOps::Status Execute(Handle &handle, VariantPack &variantPack) override; +}; +} // namespace AclTransformer #endif \ No newline at end of file diff --git a/examples/torch/layer/chatglm6b/chatglm6b_layer.cpp b/examples/layers/chatglm6b_layer.cpp similarity index 87% rename from examples/torch/layer/chatglm6b/chatglm6b_layer.cpp rename to examples/layers/chatglm6b_layer.cpp index b3eb55a7..bf88c6f0 100644 --- a/examples/torch/layer/chatglm6b/chatglm6b_layer.cpp +++ b/examples/layers/chatglm6b_layer.cpp @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "chatglm6b_layer.h" #include #include #include @@ -27,7 +28,18 @@ #include "acltransformer/ops/self_attention_kv_cache_operation.h" #include "acltransformer/ops/ffn_operation.h" -void ChatGlm6BLayer(const Json::Value ¶mJson, AclTransformer::VariantPack &variantPack) +namespace AclTransformer { +ChatGlm6BLayer::ChatGlm6BLayer() : Layer("ChatGlm6BLayer") {} + +ChatGlm6BLayer::~ChatGlm6BLayer() {} + +AsdOps::Status ChatGlm6BLayer::InferShape(const AsdOps::SVector &inTensors, + AsdOps::SVector &outTensorDescs) +{ + return AsdOps::Status::OkStatus(); +} + +AsdOps::Status ChatGlm6BLayer::Execute(Handle &handle, VariantPack &variantPack) { // in const uint64_t hiddenStates = 0; @@ -67,18 +79,18 @@ void ChatGlm6BLayer(const Json::Value ¶mJson, AclTransformer::VariantPack &v const uint64_t ffnLinearOut = 32; AclTransformer::NormParam inputNormParam; - inputNormParam.layerNormEps = paramJson["layerNormEps"].asDouble(); + inputNormParam.layerNormEps = paramJson_["layerNormEps"].asDouble(); AclTransformer::LinearParam mixdQkvLinearParam; AclTransformer::PositionEmbeddingParam positionEmbeddingParam; - positionEmbeddingParam.headNum = paramJson["headNum"].asInt(); + positionEmbeddingParam.headNum = paramJson_["headNum"].asInt(); AclTransformer::SelfAttentionKvCacheParam selfAttentionKvCacheParam; - selfAttentionKvCacheParam.transKey = paramJson["transKey"].asBool(); - selfAttentionKvCacheParam.dk = paramJson["dk"].asInt(); + selfAttentionKvCacheParam.transKey = paramJson_["transKey"].asBool(); + selfAttentionKvCacheParam.dk = paramJson_["dk"].asInt(); selfAttentionKvCacheParam.headNum = positionEmbeddingParam.headNum; - selfAttentionKvCacheParam.layerId = paramJson["layerId"].asInt(); + selfAttentionKvCacheParam.layerId = paramJson_["layerId"].asInt(); AclTransformer::LinearParam selfOutLinearParam; AclTransformer::AddParam selfResidualAddParam; - selfResidualAddParam.scale = paramJson["ResidualAddScale"].asFloat(); + selfResidualAddParam.scale = paramJson_["ResidualAddScale"].asFloat(); AclTransformer::NormParam selfNormParam; selfNormParam.layerNormEps = inputNormParam.layerNormEps; AclTransformer::FfnParam ffnParam; @@ -156,5 +168,6 @@ void ChatGlm6BLayer(const Json::Value ¶mJson, AclTransformer::VariantPack &v ffnResidualAddNode.inTensorIds = {selfNormOut, ffnLinearOut}; ffnResidualAddNode.outTensorIds = {glmBlockOut}; - ExampleUtil::ExecuteOperationGraph(opGraph, variantPack); + return ExecuteOperationGraph(opGraph, variantPack); } +} // namespace AclTransformer diff --git a/examples/torch/layer/layer.cpp b/examples/layers/chatglm6b_layer.h similarity index 33% rename from examples/torch/layer/layer.cpp rename to examples/layers/chatglm6b_layer.h index a4a8eeec..7ee4c719 100644 --- a/examples/torch/layer/layer.cpp +++ b/examples/layers/chatglm6b_layer.h @@ -13,39 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#ifndef CHATGLM6BLAYER_H +#define CHATGLM6BLAYER_H #include "layer.h" -#include -#include -#include - -using LayerFunc = std::function; - -void BertLayer(const Json::Value ¶mJson, AclTransformer::VariantPack &variantPack); -void BertSelfAttentionLayer(const Json::Value ¶mJson, AclTransformer::VariantPack &variantPack); -void BertOutputAttentionLayer(const Json::Value ¶mJson, AclTransformer::VariantPack &variantPack); -void ChatGlm6BLayer(const Json::Value ¶mJson, AclTransformer::VariantPack &variantPack); - -std::map g_layerMap = { - {"BertLayer", &BertLayer}, - {"BertSelfAttentionLayer", &BertSelfAttentionLayer}, - {"BertOutputAttentionLayer", &BertOutputAttentionLayer}, - {"ChatGlm6BLayer", &ChatGlm6BLayer}, +namespace AclTransformer { +class ChatGlm6BLayer : public Layer { +public: + ChatGlm6BLayer(); + virtual ~ChatGlm6BLayer(); + AsdOps::Status InferShape(const AsdOps::SVector &inTensors, + AsdOps::SVector &outTensorDescs) override; + AsdOps::Status Execute(Handle &handle, VariantPack &variantPack) override; }; - -bool ExecuteLayer(const std::string &layerName, const std::string ¶m, AclTransformer::VariantPack &variantPack) -{ - auto it = g_layerMap.find(layerName); - if (it == g_layerMap.end()) { - return false; - } - - Json::Reader reader; - Json::Value paramJson; - if (!reader.parse(param, paramJson)) { - ASD_LOG(ERROR) << " invalid json:" << param; - return false; - } - - it->second(paramJson, variantPack); - return true; -} +} // namespace AclTransformer +#endif \ No newline at end of file diff --git a/examples/layers/layer.cpp b/examples/layers/layer.cpp new file mode 100644 index 00000000..fe99819e --- /dev/null +++ b/examples/layers/layer.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "layer.h" +#include +#include +#include "acltransformer/plan.h" +#include "acltransformer/plan_builder.h" +#include "examples/utils/example_util.h" + +namespace AclTransformer { +Layer::Layer(const std::string &layerName) : layerName_(layerName) {} + +Layer::~Layer() {} + +std::string Layer::GetName() const { return layerName_; } + +void Layer::SetParam(const Json::Value ¶mJson) { paramJson_ = paramJson; } + +AsdOps::Status Layer::ExecuteOperationGraph(const AclTransformer::OperationGraph &opGraph, + AclTransformer::VariantPack &variantPack) +{ + AclTransformer::Handle handle = {ExampleUtil::GetCurrentStream()}; + + AclTransformer::PlanBuilder planBuilder; + AclTransformer::Plan plan; + AsdOps::Status st = planBuilder.Build(variantPack, opGraph, plan); + if (!st.Ok()) { + ASD_LOG(ERROR) << opGraph.name << " PlanBuilder build plan fail, error:" << st.Message(); + return st; + } + + st = plan.Setup(handle, variantPack); + if (!st.Ok()) { + ASD_LOG(ERROR) << opGraph.name << " Plan Setup fail error:" << st.Message(); + return st; + } + + variantPack.workspaceSize = plan.GetWorkspaceSize(); + ASD_LOG(INFO) << opGraph.name << " Plan GetWorkspaceSize:" << variantPack.workspaceSize; + + if (variantPack.workspaceSize > 0) { + ASD_LOG(INFO) << opGraph.name + << " AsdRtMemMallocDevice variantPack.workspaceSize:" << variantPack.workspaceSize; + int st = AsdRtMemMallocDevice((void **)&variantPack.workspace, variantPack.workspaceSize, ASDRT_MEM_DEFAULT); + if (st != ASDRT_SUCCESS) { + ASD_LOG(ERROR) << opGraph.name << " AsdRtMemMallocDevice fail"; + return AsdOps::Status::FailStatus(1, "AsdRtMemMallocDevice fail"); + } + } + + st = plan.Execute(handle, variantPack); + ASD_LOG_IF(!st.Ok(), ERROR) << opGraph.name << " Plan Execute fail, error:" << st.Message(); + + if (variantPack.workspace != nullptr) { + AsdRtMemFreeDevice(variantPack.workspace); + ASD_LOG(INFO) << opGraph.name << " AsdRtMemFreeDevice free:" << variantPack.workspace; + variantPack.workspace = nullptr; + variantPack.workspaceSize = 0; + } + + return st; +} +} // namespace AclTransformer \ No newline at end of file diff --git a/examples/layers/layer.h b/examples/layers/layer.h index 1cd11e01..344389f4 100644 --- a/examples/layers/layer.h +++ b/examples/layers/layer.h @@ -16,6 +16,7 @@ #ifndef LAYER_EXECUTER_H #define LAYER_EXECUTER_H #include +#include #include "acltransformer/operation_graph.h" #include "acltransformer/variant_pack.h" @@ -25,9 +26,18 @@ public: Layer(const std::string &layerName); virtual ~Layer(); std::string GetName() const; + void SetParam(const Json::Value ¶mJson); virtual AsdOps::Status InferShape(const AsdOps::SVector &inTensors, AsdOps::SVector &outTensorDescs) = 0; - AsdOps::Status Execute(Handle &handle, VariantPack &variantPack) = 0; -} + virtual AsdOps::Status Execute(Handle &handle, VariantPack &variantPack) = 0; + +protected: + AsdOps::Status ExecuteOperationGraph(const AclTransformer::OperationGraph &opGraph, + AclTransformer::VariantPack &variantPack); + +protected: + std::string layerName_; + Json::Value paramJson_; +}; } // namespace AclTransformer #endif \ No newline at end of file diff --git a/examples/torch/CMakeLists.txt b/examples/torch/CMakeLists.txt index 865b5d94..46d975a9 100644 --- a/examples/torch/CMakeLists.txt +++ b/examples/torch/CMakeLists.txt @@ -1,5 +1,5 @@ file(GLOB_RECURSE SOURCE_FILES "${CMAKE_CURRENT_LIST_DIR}/*.cpp") add_library(acltransformer_torch SHARED ${SOURCE_FILES}) -target_link_libraries(acltransformer_torch PRIVATE acltransformer asdops torch c10 torch_cpu torch_python torch_npu examples_util jsoncpp) +target_link_libraries(acltransformer_torch PRIVATE acltransformer asdops torch c10 torch_cpu torch_python torch_npu examples_util examples_layers jsoncpp) install(TARGETS acltransformer_torch DESTINATION examples) \ No newline at end of file diff --git a/examples/torch/layer/bert/bert_output_layer.cpp b/examples/torch/layer/bert/bert_output_layer.cpp deleted file mode 100644 index 90932e4a..00000000 --- a/examples/torch/layer/bert/bert_output_layer.cpp +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include "acltransformer/operation.h" -#include "acltransformer/operation_graph.h" -#include "examples/utils/example_util.h" -#include "acltransformer/plan_builder.h" -#include "acltransformer/ops/linear_operation.h" -#include "acltransformer/ops/add_norm_operation.h" -#include "acltransformer/ops/self_attention_operation.h" -#include "acltransformer/ops/ffn_operation.h" - -void BertOutputAttentionLayer(const Json::Value ¶mJson, AclTransformer::VariantPack &variantPack) -{ - const uint64_t inputId = 0; - const uint64_t linearWeightId = 1; - const uint64_t linearBiasId = 2; - const uint64_t residualAddInId = 3; - const uint64_t normWeightId = 4; - const uint64_t normBiasId = 5; - // out - const uint64_t bertOutId = 6; - // intermiate - const uint64_t linearOutId = 7; - - AclTransformer::LinearParam linearParam; - AclTransformer::AddNormParam addNormParam; - AclTransformer::LinearOperation linearOp(linearParam); - AclTransformer::AddNormOperation addNormOp(addNormParam); - - AclTransformer::OperationGraph opGraph; - opGraph.inTensorSize = variantPack.inTensors.size(); - opGraph.outTensorSize = variantPack.outTensors.size(); - opGraph.intermediateTensorSize = 1; - opGraph.nodes.resize(2); - AclTransformer::OperationGraphNode &linearNode = opGraph.nodes.at(0); - AclTransformer::OperationGraphNode &addNormNode = opGraph.nodes.at(1); - linearNode.operation = &linearOp; - linearNode.inTensorIds = {inputId, linearWeightId, linearBiasId}; - linearNode.outTensorIds = {linearOutId}; - - addNormNode.operation = &addNormOp; - addNormNode.inTensorIds = {linearOutId, residualAddInId, normWeightId, normBiasId}; - addNormNode.outTensorIds = {bertOutId}; - - ExampleUtil::ExecuteOperationGraph(opGraph, variantPack); -} \ No newline at end of file diff --git a/examples/torch/layer/bert/bert_self_attention_layer.cpp b/examples/torch/layer/bert/bert_self_attention_layer.cpp deleted file mode 100644 index 34a8ab15..00000000 --- a/examples/torch/layer/bert/bert_self_attention_layer.cpp +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include "acltransformer/operation.h" -#include "acltransformer/operation_graph.h" -#include "examples/utils/example_util.h" -#include "acltransformer/plan_builder.h" -#include "acltransformer/ops/linear_operation.h" -#include "acltransformer/ops/add_norm_operation.h" -#include "acltransformer/ops/self_attention_operation.h" -#include "acltransformer/ops/ffn_operation.h" - -void BertSelfAttentionLayer(const Json::Value ¶mJson, AclTransformer::VariantPack &variantPack) -{ - const uint64_t inputId = 0; - const uint64_t queryLinearWeightId = 1; - const uint64_t queryLinearBiasId = 2; - const uint64_t keyLinearWeightId = 3; - const uint64_t keyLinearBiasId = 4; - const uint64_t valueLinearWeightId = 5; - const uint64_t valueLinearBiasId = 6; - const uint64_t attentionMaskId = 7; - // out - const uint64_t contextId = 8; - // intermiate - const uint64_t queryId = 9; - const uint64_t keyId = 10; - const uint64_t valueId = 11; - - AclTransformer::LinearParam queryLinearParam; - AclTransformer::LinearParam keyLinearParam; - AclTransformer::LinearParam valueLinearParam; - AclTransformer::SelfAttentionParam selfAttentionParam = {false, 64, 16}; - AclTransformer::LinearOperation queryLinearOp(queryLinearParam); - AclTransformer::LinearOperation keyLinearOp(keyLinearParam); - AclTransformer::LinearOperation valueLinearOp(valueLinearParam); - AclTransformer::SelfAttentionOperation selfAttentionOp(selfAttentionParam); - - AclTransformer::OperationGraph opGraph; - static int64_t graphId = 0; - opGraph.name = "BertSelfAttentionGraph_" + std::to_string(graphId++); - opGraph.inTensorSize = variantPack.inTensors.size(); - opGraph.outTensorSize = variantPack.outTensors.size(); - opGraph.intermediateTensorSize = 3; - opGraph.nodes.resize(4); - AclTransformer::OperationGraphNode &queryLinearNode = opGraph.nodes.at(0); - AclTransformer::OperationGraphNode &keyLinearNode = opGraph.nodes.at(1); - AclTransformer::OperationGraphNode &valueLinearNode = opGraph.nodes.at(2); - AclTransformer::OperationGraphNode &selfAttentionNode = opGraph.nodes.at(3); - - queryLinearNode.operation = &queryLinearOp; - queryLinearNode.inTensorIds = {inputId, queryLinearWeightId, queryLinearBiasId}; - queryLinearNode.outTensorIds = {queryId}; - - keyLinearNode.operation = &keyLinearOp; - keyLinearNode.inTensorIds = {inputId, keyLinearWeightId, keyLinearBiasId}; - keyLinearNode.outTensorIds = {keyId}; - - valueLinearNode.operation = &valueLinearOp; - valueLinearNode.inTensorIds = {inputId, valueLinearWeightId, valueLinearBiasId}; - valueLinearNode.outTensorIds = {valueId}; - - selfAttentionNode.operation = &selfAttentionOp; - selfAttentionNode.inTensorIds = {queryId, keyId, valueId, attentionMaskId}; - selfAttentionNode.outTensorIds = {contextId}; - - ExampleUtil::ExecuteOperationGraph(opGraph, variantPack); -} \ No newline at end of file diff --git a/examples/torch/layer/layer_torch.cpp b/examples/torch/layer/layer_torch.cpp index d409414e..c153eb95 100644 --- a/examples/torch/layer/layer_torch.cpp +++ b/examples/torch/layer/layer_torch.cpp @@ -21,38 +21,90 @@ #include "acltransformer/operation_graph.h" #include "examples/utils/example_util.h" #include "acltransformer/plan_builder.h" -#include "layer.h" +#include "examples/layers/chatglm6b_layer.h" +#include "examples/layers/bert_layer.h" LayerTorch::LayerTorch(std::string layerName) : layerName_(layerName) { ASD_LOG(INFO) << "LayerTorch::LayerTorch called, layerName:" << layerName; + if (layerName == "ChatGlm6BLayer") { + layer_ = new AclTransformer::ChatGlm6BLayer(); + } + if (layerName == "BertLayer") { + layer_ = new AclTransformer::BertLayer(); + } else { + ASD_LOG(ERROR) << "not support layerName:" << layerName; + } } -LayerTorch::~LayerTorch() {} +LayerTorch::~LayerTorch() +{ + if (layer_) { + delete layer_; + layer_ = nullptr; + } +} -void LayerTorch::SetParam(std::string param) { param_ = param; } +void LayerTorch::SetParam(std::string param) +{ + if (!layer_) { + ASD_LOG(ERROR) << "layer is null"; + return; + } + Json::Reader reader; + Json::Value paramJson; + if (!reader.parse(param, paramJson)) { + ASD_LOG(ERROR) << "invalid json:" << param; + } + layer_->SetParam(paramJson); +} -void LayerTorch::Execute(std::vector inTensors, std::vector outTensors) +std::vector LayerTorch::Execute(std::vector inTensors) { - AsdOps::Timer timer; ASD_LOG(INFO) << "LayerTorch::Execute start"; + std::vector outTensors; + if (!layer_) { + ASD_LOG(ERROR) << "layer is null"; + return outTensors; + } + + AclTransformer::VariantPack variantPack; + for (size_t i = 0; i < inTensors.size(); ++i) { inTensors.at(i) = inTensors.at(i).contiguous(); ASD_LOG(INFO) << "inTensors[" << i << "].options:" << inTensors.at(i).options() << ", data:" << inTensors.at(i).data_ptr(); + variantPack.inTensors.push_back(ExampleUtil::AtTensor2AsdTensor(inTensors.at(i))); } + + CreateAtOutTensors(variantPack.inTensors, outTensors); + for (size_t i = 0; i < outTensors.size(); ++i) { outTensors.at(i) = outTensors.at(i).contiguous(); ASD_LOG(INFO) << "outTensors[" << i << "].options:" << outTensors.at(i).options() << ", data:" << outTensors.at(i).data_ptr(); + variantPack.outTensors.push_back(ExampleUtil::AtTensor2AsdTensor(outTensors.at(i))); } - AclTransformer::VariantPack variantPack; - ExampleUtil::BuildVariantPack(inTensors, outTensors, variantPack); + AclTransformer::Handle handle = {ExampleUtil::GetCurrentStream()}; + layer_->Execute(handle, variantPack); - ExecuteLayer(layerName_, param_, variantPack); + ASD_LOG(WARN) << "LayerTorch::Execute end"; + return outTensors; +} - ASD_LOG(WARN) << "LayerTorch::Execute end, use time:" << timer.ElapsedMicroSecond() << " microsecond"; +void LayerTorch::CreateAtOutTensors(const AsdOps::SVector &inTensors, + std::vector &atOutTensors) +{ + AsdOps::SVector outTensorDescs; + AsdOps::Status st = layer_->InferShape(inTensors, outTensorDescs); + ASD_LOG_IF(!st.Ok(), ERROR) << "infer shape fail, error:" << st.Message(); + + atOutTensors.resize(outTensorDescs.size()); + for (size_t i = 0; i < outTensorDescs.size(); ++i) { + at::Tensor newTensor = ExampleUtil::CreateAtTensorFromAsdOpsTensorDesc(outTensorDescs.at(i)); + atOutTensors.at(i) = newTensor; + } } TORCH_LIBRARY(LayerTorch, m) diff --git a/examples/torch/layer/layer_torch.h b/examples/torch/layer/layer_torch.h index f0be3670..9bebed45 100644 --- a/examples/torch/layer/layer_torch.h +++ b/examples/torch/layer/layer_torch.h @@ -19,18 +19,22 @@ #include #include #include "acltransformer/operation_graph.h" +#include "examples/layers/layer.h" class LayerTorch : public torch::CustomClassHolder { public: LayerTorch(std::string layerName); ~LayerTorch(); void SetParam(std::string param); - void Execute(std::vector inTensors, std::vector outTensors); + std::vector Execute(std::vector inTensors); c10::intrusive_ptr clone() const { return c10::make_intrusive(layerName_); } +private: + void CreateAtOutTensors(const AsdOps::SVector &inTensors, std::vector &atOutTensors); + private: std::string layerName_; - std::string param_; + AclTransformer::Layer *layer_ = nullptr; }; #endif \ No newline at end of file diff --git a/tests/pythontest/layers/test_chatglm6b_layer.py b/tests/pythontest/layers/test_chatglm6b_layer.py new file mode 100644 index 00000000..e338cffa --- /dev/null +++ b/tests/pythontest/layers/test_chatglm6b_layer.py @@ -0,0 +1,61 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +import os +import sys +import json +import time +import torch +import torch_npu + + +sys.path.append(os.path.dirname(__file__)) +import layer_test # NOQA: E402 + + +LAYER_NAME = "ChatGlm6BLayer" + + +class TestNormal(layer_test.LayerTest): + def golden_calc(self, in_tensors): + return [in_tensors[0]] + + def test_2d_float(self): + hiddenStatesId = torch.rand(384, 32, 1024).npu() + qLinearWeightId = torch.rand(1024, 1024).npu() + qLinearBiasId = torch.rand(1024).npu() + kLinearWeightId = torch.rand(1024, 1024).npu() + kLinearBiasId = torch.rand(1024).npu() + vLinearWeightId = torch.rand(1024, 1024).npu() + vLinearBiasId = torch.rand(1024).npu() + selfOutLinearWeightId = torch.rand(1024, 1024).npu() + selfOutLinearBiasId = torch.rand(1024).npu() + selfOutNormWeightId = torch.rand(1024).npu() + selfOutNormBiasId = torch.rand(1024).npu() + ffnLinearWeightId = torch.rand(4096, 1024).npu() + ffnLinearBiasId = torch.rand(4096).npu() + bertOutLinearWeightId = torch.rand(1024, 4096).npu() + bertOutLinearBiasId = torch.rand(1024).npu() + bertOutNormWeightId = torch.rand(1024).npu() + bertOutNormBiasId = torch.rand(1024).npu() + attentionMaskId = torch.rand(32, 1, 1, 384).npu() + bertLayerOutId = torch.empty(384, 32, 1024).npu() + + self.execute(LAYER_NAME, '{"transKey":true,"dk":3,"headNum":16}', [hiddenStatesId, qLinearWeightId, qLinearBiasId, kLinearWeightId, kLinearBiasId, vLinearWeightId, vLinearBiasId, + selfOutLinearWeightId, selfOutLinearBiasId, selfOutNormWeightId, selfOutNormBiasId, + ffnLinearWeightId, ffnLinearBiasId, bertOutLinearWeightId, bertOutLinearBiasId, bertOutNormWeightId, bertOutNormBiasId, attentionMaskId]) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/pythontest/layers/test_glm_block.py b/tests/pythontest/layers/test_glm_block.py deleted file mode 100644 index 5a55d51e..00000000 --- a/tests/pythontest/layers/test_glm_block.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import unittest -import os -import json -import time -import torch -import torch_npu -import sys - - -ACLTRANSFORMER_HOME_PATH = os.environ.get("ACLTRANSFORMER_HOME_PATH") -if ACLTRANSFORMER_HOME_PATH is None: - raise RuntimeError( - "env ACLTRANSFORMER_HOME_PATH not exist, source set_env.sh") - -LIB_PATH = os.path.join(ACLTRANSFORMER_HOME_PATH, - "examples/libacltransformer_torch.so") -torch.classes.load_library(LIB_PATH) - - -class TestNormal(unittest.TestCase): - def test_2d(self): - # operation = torch.classes.GlmBlock.GlmBlock( - # json.dumps({"transKey": True, "dk": 128, "headNum": 32, "layerId": 0, - # "layerNormEps":1e-12, "ResidualAddScale": 0})) - - # hiddenStates = torch.rand(384, 32, 1024).npu() - # normWeight = torch.rand(4096).npu() - # normBias = torch.rand(4096).npu() - # qkvMixdWeight = torch.rand(12288, 4096).npu() - # qkvMixdBias = torch.rand(12288).npu() - # selfOutLinearWeight = torch.rand(4096, 4096).npu() - # selfOutLinearBias = torch.rand(4096).npu() - # selfOutNormWeight = torch.rand(4096).npu() - # selfOutNormBias = torch.rand(4096).npu() - # ffnLinearWeight = torch.rand(16384, 4096).npu() - # ffnLinearBias = torch.rand(16384).npu() - # ffnOutLinearWeight = torch.rand(4096, 16384).npu() - # ffnOutLinearBias = torch.rand(4096).npu() - # positionIds = torch.rand(384, 32, 1024).npu() - # cosTable = torch.rand(2049, 1, 1024).npu() - # sinTable = torch.rand(2049, 1, 1024).npu() - # attentionMask = torch.rand(384, 32, 1024).npu() - # pastKey = torch.rand(384, 32, 1024).npu() - # pastValue = torch.rand(384, 32, 1024).npu() - - # start_time = time.time() - # for i in range(1): - # operation.execute( - # [hiddenStates, normWeight, normBias, qkvMixdWeight, qkvMixdBias, selfOutLinearWeight, - # selfOutLinearBias, selfOutNormWeight, selfOutNormBias, ffnLinearWeight, - # ffnLinearBias, ffnOutLinearWeight, ffnOutLinearBias, positionIds, cosTable, sinTable, - # attentionMask, pastKey, pastValue], []) - # end_time = time.time() - # print("use time:", (end_time - start_time)) - - -if __name__ == '__main__': - unittest.main() -- Gitee