From 51044cb50d6c0d1f864f57e562e75a8b15b4ecf0 Mon Sep 17 00:00:00 2001
From: lotus <denglian2@huawei.com>
Date: Sat, 10 Jun 2023 17:19:18 +0800
Subject: [PATCH] feat:Add Layers

---
 examples/CMakeLists.txt                       |  1 +
 examples/layers/CMakeLists.txt                | 11 +++
 .../layer/bert => layers}/bert_layer.cpp      | 27 ++++--
 .../layer/layer.h => layers/bert_layer.h}     | 20 +++--
 .../chatglm6b => layers}/chatglm6b_layer.cpp  | 29 +++++--
 .../layer.cpp => layers/chatglm6b_layer.h}    | 46 +++-------
 examples/layers/layer.cpp                     | 76 +++++++++++++++++
 examples/layers/layer.h                       | 14 +++-
 examples/torch/CMakeLists.txt                 |  2 +-
 .../torch/layer/bert/bert_output_layer.cpp    | 62 --------------
 .../layer/bert/bert_self_attention_layer.cpp  | 83 -------------------
 examples/torch/layer/layer_torch.cpp          | 70 ++++++++++++++--
 examples/torch/layer/layer_torch.h            |  8 +-
 .../pythontest/layers/test_chatglm6b_layer.py | 61 ++++++++++++++
 tests/pythontest/layers/test_glm_block.py     | 71 ----------------
 15 files changed, 296 insertions(+), 285 deletions(-)
 create mode 100644 examples/layers/CMakeLists.txt
 rename examples/{torch/layer/bert => layers}/bert_layer.cpp (88%)
 rename examples/{torch/layer/layer.h => layers/bert_layer.h} (58%)
 rename examples/{torch/layer/chatglm6b => layers}/chatglm6b_layer.cpp (87%)
 rename examples/{torch/layer/layer.cpp => layers/chatglm6b_layer.h} (33%)
 create mode 100644 examples/layers/layer.cpp
 delete mode 100644 examples/torch/layer/bert/bert_output_layer.cpp
 delete mode 100644 examples/torch/layer/bert/bert_self_attention_layer.cpp
 create mode 100644 tests/pythontest/layers/test_chatglm6b_layer.py
 delete mode 100644 tests/pythontest/layers/test_glm_block.py

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 0309d9ca..eb2db0aa 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -1,2 +1,3 @@
 add_subdirectory(utils)
+add_subdirectory(layers)
 add_subdirectory(torch)
\ No newline at end of file
diff --git a/examples/layers/CMakeLists.txt b/examples/layers/CMakeLists.txt
new file mode 100644
index 00000000..eda69bfb
--- /dev/null
+++ b/examples/layers/CMakeLists.txt
@@ -0,0 +1,11 @@
+file(GLOB_RECURSE SOURCE_FILES "${CMAKE_CURRENT_LIST_DIR}/*.cpp")
+include_directories($ENV{PYTHON_INCLUDE_PATH}
+    $ENV{PYTORCH_INSTALL_PATH}/include
+    $ENV{PYTORCH_INSTALL_PATH}/include/torch/csrc/api/include
+    $ENV{PYTORCH_NPU_INSTALL_PATH}/include)
+
+link_directories($ENV{PYTHON_LIB_PATH}
+        $ENV{PYTORCH_INSTALL_PATH}/lib
+        $ENV{PYTORCH_NPU_INSTALL_PATH}/lib)
+
+add_library(examples_layers OBJECT ${SOURCE_FILES})
diff --git a/examples/torch/layer/bert/bert_layer.cpp b/examples/layers/bert_layer.cpp
similarity index 88%
rename from examples/torch/layer/bert/bert_layer.cpp
rename to examples/layers/bert_layer.cpp
index 7cecec7a..eaa92ec7 100644
--- a/examples/torch/layer/bert/bert_layer.cpp
+++ b/examples/layers/bert_layer.cpp
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include "bert_layer.h"
 #include <json/json.h>
 #include <asdops/utils/log/log.h>
 #include <asdops/utils/time/timer.h>
@@ -20,12 +21,25 @@
 #include "acltransformer/operation_graph.h"
 #include "examples/utils/example_util.h"
 #include "acltransformer/plan_builder.h"
-#include "acltransformer/ops/linear_operation.h"
+#include "acltransformer/ops/add_operation.h"
 #include "acltransformer/ops/add_norm_operation.h"
+#include "acltransformer/ops/norm_operation.h"
+#include "acltransformer/ops/linear_operation.h"
 #include "acltransformer/ops/self_attention_operation.h"
 #include "acltransformer/ops/ffn_operation.h"
 
-void BertLayer(const Json::Value &paramJson, AclTransformer::VariantPack &variantPack)
+namespace AclTransformer {
+BertLayer::BertLayer() : Layer("BertLayer") {}
+
+BertLayer::~BertLayer() {}
+
+AsdOps::Status BertLayer::InferShape(const AsdOps::SVector<AsdOps::Tensor> &inTensors,
+                                     AsdOps::SVector<AsdOps::TensorDesc> &outTensorDescs)
+{
+    return AsdOps::Status::OkStatus();
+}
+
+AsdOps::Status BertLayer::Execute(Handle &handle, VariantPack &variantPack)
 {
     const uint64_t hiddenStatesId = 0;
     const uint64_t qLinearWeightId = 1;
@@ -61,9 +75,9 @@ void BertLayer(const Json::Value &paramJson, AclTransformer::VariantPack &varian
     AclTransformer::LinearParam kLinearParam;
     AclTransformer::LinearParam vLinearParam;
     AclTransformer::SelfAttentionParam selfAttentionParam;
-    selfAttentionParam.transKey = paramJson["transKey"].asBool();
-    selfAttentionParam.dk = paramJson["dk"].asInt();
-    selfAttentionParam.headNum = paramJson["headNum"].asInt();
+    selfAttentionParam.transKey = paramJson_["transKey"].asBool();
+    selfAttentionParam.dk = paramJson_["dk"].asInt();
+    selfAttentionParam.headNum = paramJson_["headNum"].asInt();
     AclTransformer::LinearParam selfOutLinearParam;
     AclTransformer::AddNormParam selfOutAddNormParam;
     AclTransformer::FfnParam ffnParam;
@@ -134,5 +148,6 @@ void BertLayer(const Json::Value &paramJson, AclTransformer::VariantPack &varian
     bertOutAddNormNode.inTensorIds = {bertOutLinearOutId, selfAddNormOutId, bertOutNormWeightId, bertOutNormBiasId};
     bertOutAddNormNode.outTensorIds = {bertLayerOutId};
 
-    ExampleUtil::ExecuteOperationGraph(opGraph, variantPack);
+    return ExecuteOperationGraph(opGraph, variantPack);
 }
+} // namespace AclTransformer
diff --git a/examples/torch/layer/layer.h b/examples/layers/bert_layer.h
similarity index 58%
rename from examples/torch/layer/layer.h
rename to examples/layers/bert_layer.h
index e14633b2..56d1e791 100644
--- a/examples/torch/layer/layer.h
+++ b/examples/layers/bert_layer.h
@@ -13,11 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef LAYER_EXECUTER_H
-#define LAYER_EXECUTER_H
-#include <string>
-#include "acltransformer/operation_graph.h"
-#include "acltransformer/variant_pack.h"
-
-bool ExecuteLayer(const std::string &opName, const std::string &param, AclTransformer::VariantPack &variantPack);
+#ifndef BERTLAYER_H
+#define BERTLAYER_H
+#include "layer.h"
+namespace AclTransformer {
+class BertLayer : public Layer {
+public:
+    BertLayer();
+    virtual ~BertLayer();
+    AsdOps::Status InferShape(const AsdOps::SVector<AsdOps::Tensor> &inTensors,
+                              AsdOps::SVector<AsdOps::TensorDesc> &outTensorDescs) override;
+    AsdOps::Status Execute(Handle &handle, VariantPack &variantPack) override;
+};
+} // namespace AclTransformer
 #endif
\ No newline at end of file
diff --git a/examples/torch/layer/chatglm6b/chatglm6b_layer.cpp b/examples/layers/chatglm6b_layer.cpp
similarity index 87%
rename from examples/torch/layer/chatglm6b/chatglm6b_layer.cpp
rename to examples/layers/chatglm6b_layer.cpp
index b3eb55a7..bf88c6f0 100644
--- a/examples/torch/layer/chatglm6b/chatglm6b_layer.cpp
+++ b/examples/layers/chatglm6b_layer.cpp
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include "chatglm6b_layer.h"
 #include <json/json.h>
 #include <asdops/utils/log/log.h>
 #include <asdops/utils/time/timer.h>
@@ -27,7 +28,18 @@
 #include "acltransformer/ops/self_attention_kv_cache_operation.h"
 #include "acltransformer/ops/ffn_operation.h"
 
-void ChatGlm6BLayer(const Json::Value &paramJson, AclTransformer::VariantPack &variantPack)
+namespace AclTransformer {
+ChatGlm6BLayer::ChatGlm6BLayer() : Layer("ChatGlm6BLayer") {}
+
+ChatGlm6BLayer::~ChatGlm6BLayer() {}
+
+AsdOps::Status ChatGlm6BLayer::InferShape(const AsdOps::SVector<AsdOps::Tensor> &inTensors,
+                                          AsdOps::SVector<AsdOps::TensorDesc> &outTensorDescs)
+{
+    return AsdOps::Status::OkStatus();
+}
+
+AsdOps::Status ChatGlm6BLayer::Execute(Handle &handle, VariantPack &variantPack)
 {
     // in
     const uint64_t hiddenStates = 0;
@@ -67,18 +79,18 @@ void ChatGlm6BLayer(const Json::Value &paramJson, AclTransformer::VariantPack &v
     const uint64_t ffnLinearOut = 32;
 
     AclTransformer::NormParam inputNormParam;
-    inputNormParam.layerNormEps = paramJson["layerNormEps"].asDouble();
+    inputNormParam.layerNormEps = paramJson_["layerNormEps"].asDouble();
     AclTransformer::LinearParam mixdQkvLinearParam;
     AclTransformer::PositionEmbeddingParam positionEmbeddingParam;
-    positionEmbeddingParam.headNum = paramJson["headNum"].asInt();
+    positionEmbeddingParam.headNum = paramJson_["headNum"].asInt();
     AclTransformer::SelfAttentionKvCacheParam selfAttentionKvCacheParam;
-    selfAttentionKvCacheParam.transKey = paramJson["transKey"].asBool();
-    selfAttentionKvCacheParam.dk = paramJson["dk"].asInt();
+    selfAttentionKvCacheParam.transKey = paramJson_["transKey"].asBool();
+    selfAttentionKvCacheParam.dk = paramJson_["dk"].asInt();
     selfAttentionKvCacheParam.headNum = positionEmbeddingParam.headNum;
-    selfAttentionKvCacheParam.layerId = paramJson["layerId"].asInt();
+    selfAttentionKvCacheParam.layerId = paramJson_["layerId"].asInt();
     AclTransformer::LinearParam selfOutLinearParam;
     AclTransformer::AddParam selfResidualAddParam;
-    selfResidualAddParam.scale = paramJson["ResidualAddScale"].asFloat();
+    selfResidualAddParam.scale = paramJson_["ResidualAddScale"].asFloat();
     AclTransformer::NormParam selfNormParam;
     selfNormParam.layerNormEps = inputNormParam.layerNormEps;
     AclTransformer::FfnParam ffnParam;
@@ -156,5 +168,6 @@ void ChatGlm6BLayer(const Json::Value &paramJson, AclTransformer::VariantPack &v
     ffnResidualAddNode.inTensorIds = {selfNormOut, ffnLinearOut};
     ffnResidualAddNode.outTensorIds = {glmBlockOut};
 
-    ExampleUtil::ExecuteOperationGraph(opGraph, variantPack);
+    return ExecuteOperationGraph(opGraph, variantPack);
 }
+} // namespace AclTransformer
diff --git a/examples/torch/layer/layer.cpp b/examples/layers/chatglm6b_layer.h
similarity index 33%
rename from examples/torch/layer/layer.cpp
rename to examples/layers/chatglm6b_layer.h
index a4a8eeec..7ee4c719 100644
--- a/examples/torch/layer/layer.cpp
+++ b/examples/layers/chatglm6b_layer.h
@@ -13,39 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#ifndef CHATGLM6BLAYER_H
+#define CHATGLM6BLAYER_H
 #include "layer.h"
-#include <functional>
-#include <json/json.h>
-#include <asdops/utils/log/log.h>
-
-using LayerFunc = std::function<void(const Json::Value &paramJson, AclTransformer::VariantPack &variantPack)>;
-
-void BertLayer(const Json::Value &paramJson, AclTransformer::VariantPack &variantPack);
-void BertSelfAttentionLayer(const Json::Value &paramJson, AclTransformer::VariantPack &variantPack);
-void BertOutputAttentionLayer(const Json::Value &paramJson, AclTransformer::VariantPack &variantPack);
-void ChatGlm6BLayer(const Json::Value &paramJson, AclTransformer::VariantPack &variantPack);
-
-std::map<std::string, LayerFunc> g_layerMap = {
-    {"BertLayer", &BertLayer},
-    {"BertSelfAttentionLayer", &BertSelfAttentionLayer},
-    {"BertOutputAttentionLayer", &BertOutputAttentionLayer},
-    {"ChatGlm6BLayer", &ChatGlm6BLayer},
+namespace AclTransformer {
+class ChatGlm6BLayer : public Layer {
+public:
+    ChatGlm6BLayer();
+    virtual ~ChatGlm6BLayer();
+    AsdOps::Status InferShape(const AsdOps::SVector<AsdOps::Tensor> &inTensors,
+                              AsdOps::SVector<AsdOps::TensorDesc> &outTensorDescs) override;
+    AsdOps::Status Execute(Handle &handle, VariantPack &variantPack) override;
 };
-
-bool ExecuteLayer(const std::string &layerName, const std::string &param, AclTransformer::VariantPack &variantPack)
-{
-    auto it = g_layerMap.find(layerName);
-    if (it == g_layerMap.end()) {
-        return false;
-    }
-
-    Json::Reader reader;
-    Json::Value paramJson;
-    if (!reader.parse(param, paramJson)) {
-        ASD_LOG(ERROR) << " invalid json:" << param;
-        return false;
-    }
-
-    it->second(paramJson, variantPack);
-    return true;
-}
+} // namespace AclTransformer
+#endif
\ No newline at end of file
diff --git a/examples/layers/layer.cpp b/examples/layers/layer.cpp
new file mode 100644
index 00000000..fe99819e
--- /dev/null
+++ b/examples/layers/layer.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "layer.h"
+#include <asdops/utils/log/log.h>
+#include <asdops/utils/rt/rt.h>
+#include "acltransformer/plan.h"
+#include "acltransformer/plan_builder.h"
+#include "examples/utils/example_util.h"
+
+namespace AclTransformer {
+Layer::Layer(const std::string &layerName) : layerName_(layerName) {}
+
+Layer::~Layer() {}
+
+std::string Layer::GetName() const { return layerName_; }
+
+void Layer::SetParam(const Json::Value &paramJson) { paramJson_ = paramJson; }
+
+AsdOps::Status Layer::ExecuteOperationGraph(const AclTransformer::OperationGraph &opGraph,
+                                            AclTransformer::VariantPack &variantPack)
+{
+    AclTransformer::Handle handle = {ExampleUtil::GetCurrentStream()};
+
+    AclTransformer::PlanBuilder planBuilder;
+    AclTransformer::Plan plan;
+    AsdOps::Status st = planBuilder.Build(variantPack, opGraph, plan);
+    if (!st.Ok()) {
+        ASD_LOG(ERROR) << opGraph.name << " PlanBuilder build plan fail, error:" << st.Message();
+        return st;
+    }
+
+    st = plan.Setup(handle, variantPack);
+    if (!st.Ok()) {
+        ASD_LOG(ERROR) << opGraph.name << " Plan Setup fail error:" << st.Message();
+        return st;
+    }
+
+    variantPack.workspaceSize = plan.GetWorkspaceSize();
+    ASD_LOG(INFO) << opGraph.name << " Plan GetWorkspaceSize:" << variantPack.workspaceSize;
+
+    if (variantPack.workspaceSize > 0) {
+        ASD_LOG(INFO) << opGraph.name
+                      << " AsdRtMemMallocDevice variantPack.workspaceSize:" << variantPack.workspaceSize;
+        int st = AsdRtMemMallocDevice((void **)&variantPack.workspace, variantPack.workspaceSize, ASDRT_MEM_DEFAULT);
+        if (st != ASDRT_SUCCESS) {
+            ASD_LOG(ERROR) << opGraph.name << " AsdRtMemMallocDevice fail";
+            return AsdOps::Status::FailStatus(1, "AsdRtMemMallocDevice fail");
+        }
+    }
+
+    st = plan.Execute(handle, variantPack);
+    ASD_LOG_IF(!st.Ok(), ERROR) << opGraph.name << " Plan Execute fail, error:" << st.Message();
+
+    if (variantPack.workspace != nullptr) {
+        AsdRtMemFreeDevice(variantPack.workspace);
+        ASD_LOG(INFO) << opGraph.name << " AsdRtMemFreeDevice free:" << variantPack.workspace;
+        variantPack.workspace = nullptr;
+        variantPack.workspaceSize = 0;
+    }
+
+    return st;
+}
+} // namespace AclTransformer
\ No newline at end of file
diff --git a/examples/layers/layer.h b/examples/layers/layer.h
index 1cd11e01..344389f4 100644
--- a/examples/layers/layer.h
+++ b/examples/layers/layer.h
@@ -16,6 +16,7 @@
 #ifndef LAYER_EXECUTER_H
 #define LAYER_EXECUTER_H
 #include <string>
+#include <json/json.h>
 #include "acltransformer/operation_graph.h"
 #include "acltransformer/variant_pack.h"
 
@@ -25,9 +26,18 @@ public:
     Layer(const std::string &layerName);
     virtual ~Layer();
     std::string GetName() const;
+    void SetParam(const Json::Value &paramJson);
     virtual AsdOps::Status InferShape(const AsdOps::SVector<AsdOps::Tensor> &inTensors,
                                       AsdOps::SVector<AsdOps::TensorDesc> &outTensorDescs) = 0;
-    AsdOps::Status Execute(Handle &handle, VariantPack &variantPack) = 0;
-}
+    virtual AsdOps::Status Execute(Handle &handle, VariantPack &variantPack) = 0;
+
+protected:
+    AsdOps::Status ExecuteOperationGraph(const AclTransformer::OperationGraph &opGraph,
+                                         AclTransformer::VariantPack &variantPack);
+
+protected:
+    std::string layerName_;
+    Json::Value paramJson_;
+};
 } // namespace AclTransformer
 #endif
\ No newline at end of file
diff --git a/examples/torch/CMakeLists.txt b/examples/torch/CMakeLists.txt
index 865b5d94..46d975a9 100644
--- a/examples/torch/CMakeLists.txt
+++ b/examples/torch/CMakeLists.txt
@@ -1,5 +1,5 @@
 file(GLOB_RECURSE SOURCE_FILES "${CMAKE_CURRENT_LIST_DIR}/*.cpp")
 
 add_library(acltransformer_torch SHARED ${SOURCE_FILES})
-target_link_libraries(acltransformer_torch PRIVATE acltransformer asdops torch c10 torch_cpu torch_python torch_npu examples_util jsoncpp)
+target_link_libraries(acltransformer_torch PRIVATE acltransformer asdops torch c10 torch_cpu torch_python torch_npu examples_util examples_layers jsoncpp)
 install(TARGETS acltransformer_torch DESTINATION examples)
\ No newline at end of file
diff --git a/examples/torch/layer/bert/bert_output_layer.cpp b/examples/torch/layer/bert/bert_output_layer.cpp
deleted file mode 100644
index 90932e4a..00000000
--- a/examples/torch/layer/bert/bert_output_layer.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <json/json.h>
-#include <asdops/utils/log/log.h>
-#include <asdops/utils/time/timer.h>
-#include "acltransformer/operation.h"
-#include "acltransformer/operation_graph.h"
-#include "examples/utils/example_util.h"
-#include "acltransformer/plan_builder.h"
-#include "acltransformer/ops/linear_operation.h"
-#include "acltransformer/ops/add_norm_operation.h"
-#include "acltransformer/ops/self_attention_operation.h"
-#include "acltransformer/ops/ffn_operation.h"
-
-void BertOutputAttentionLayer(const Json::Value &paramJson, AclTransformer::VariantPack &variantPack)
-{
-    const uint64_t inputId = 0;
-    const uint64_t linearWeightId = 1;
-    const uint64_t linearBiasId = 2;
-    const uint64_t residualAddInId = 3;
-    const uint64_t normWeightId = 4;
-    const uint64_t normBiasId = 5;
-    // out
-    const uint64_t bertOutId = 6;
-    // intermiate
-    const uint64_t linearOutId = 7;
-
-    AclTransformer::LinearParam linearParam;
-    AclTransformer::AddNormParam addNormParam;
-    AclTransformer::LinearOperation linearOp(linearParam);
-    AclTransformer::AddNormOperation addNormOp(addNormParam);
-
-    AclTransformer::OperationGraph opGraph;
-    opGraph.inTensorSize = variantPack.inTensors.size();
-    opGraph.outTensorSize = variantPack.outTensors.size();
-    opGraph.intermediateTensorSize = 1;
-    opGraph.nodes.resize(2);
-    AclTransformer::OperationGraphNode &linearNode = opGraph.nodes.at(0);
-    AclTransformer::OperationGraphNode &addNormNode = opGraph.nodes.at(1);
-    linearNode.operation = &linearOp;
-    linearNode.inTensorIds = {inputId, linearWeightId, linearBiasId};
-    linearNode.outTensorIds = {linearOutId};
-
-    addNormNode.operation = &addNormOp;
-    addNormNode.inTensorIds = {linearOutId, residualAddInId, normWeightId, normBiasId};
-    addNormNode.outTensorIds = {bertOutId};
-
-    ExampleUtil::ExecuteOperationGraph(opGraph, variantPack);
-}
\ No newline at end of file
diff --git a/examples/torch/layer/bert/bert_self_attention_layer.cpp b/examples/torch/layer/bert/bert_self_attention_layer.cpp
deleted file mode 100644
index 34a8ab15..00000000
--- a/examples/torch/layer/bert/bert_self_attention_layer.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <json/json.h>
-#include <asdops/utils/log/log.h>
-#include <asdops/utils/time/timer.h>
-#include "acltransformer/operation.h"
-#include "acltransformer/operation_graph.h"
-#include "examples/utils/example_util.h"
-#include "acltransformer/plan_builder.h"
-#include "acltransformer/ops/linear_operation.h"
-#include "acltransformer/ops/add_norm_operation.h"
-#include "acltransformer/ops/self_attention_operation.h"
-#include "acltransformer/ops/ffn_operation.h"
-
-void BertSelfAttentionLayer(const Json::Value &paramJson, AclTransformer::VariantPack &variantPack)
-{
-    const uint64_t inputId = 0;
-    const uint64_t queryLinearWeightId = 1;
-    const uint64_t queryLinearBiasId = 2;
-    const uint64_t keyLinearWeightId = 3;
-    const uint64_t keyLinearBiasId = 4;
-    const uint64_t valueLinearWeightId = 5;
-    const uint64_t valueLinearBiasId = 6;
-    const uint64_t attentionMaskId = 7;
-    // out
-    const uint64_t contextId = 8;
-    // intermiate
-    const uint64_t queryId = 9;
-    const uint64_t keyId = 10;
-    const uint64_t valueId = 11;
-
-    AclTransformer::LinearParam queryLinearParam;
-    AclTransformer::LinearParam keyLinearParam;
-    AclTransformer::LinearParam valueLinearParam;
-    AclTransformer::SelfAttentionParam selfAttentionParam = {false, 64, 16};
-    AclTransformer::LinearOperation queryLinearOp(queryLinearParam);
-    AclTransformer::LinearOperation keyLinearOp(keyLinearParam);
-    AclTransformer::LinearOperation valueLinearOp(valueLinearParam);
-    AclTransformer::SelfAttentionOperation selfAttentionOp(selfAttentionParam);
-
-    AclTransformer::OperationGraph opGraph;
-    static int64_t graphId = 0;
-    opGraph.name = "BertSelfAttentionGraph_" + std::to_string(graphId++);
-    opGraph.inTensorSize = variantPack.inTensors.size();
-    opGraph.outTensorSize = variantPack.outTensors.size();
-    opGraph.intermediateTensorSize = 3;
-    opGraph.nodes.resize(4);
-    AclTransformer::OperationGraphNode &queryLinearNode = opGraph.nodes.at(0);
-    AclTransformer::OperationGraphNode &keyLinearNode = opGraph.nodes.at(1);
-    AclTransformer::OperationGraphNode &valueLinearNode = opGraph.nodes.at(2);
-    AclTransformer::OperationGraphNode &selfAttentionNode = opGraph.nodes.at(3);
-
-    queryLinearNode.operation = &queryLinearOp;
-    queryLinearNode.inTensorIds = {inputId, queryLinearWeightId, queryLinearBiasId};
-    queryLinearNode.outTensorIds = {queryId};
-
-    keyLinearNode.operation = &keyLinearOp;
-    keyLinearNode.inTensorIds = {inputId, keyLinearWeightId, keyLinearBiasId};
-    keyLinearNode.outTensorIds = {keyId};
-
-    valueLinearNode.operation = &valueLinearOp;
-    valueLinearNode.inTensorIds = {inputId, valueLinearWeightId, valueLinearBiasId};
-    valueLinearNode.outTensorIds = {valueId};
-
-    selfAttentionNode.operation = &selfAttentionOp;
-    selfAttentionNode.inTensorIds = {queryId, keyId, valueId, attentionMaskId};
-    selfAttentionNode.outTensorIds = {contextId};
-
-    ExampleUtil::ExecuteOperationGraph(opGraph, variantPack);
-}
\ No newline at end of file
diff --git a/examples/torch/layer/layer_torch.cpp b/examples/torch/layer/layer_torch.cpp
index d409414e..c153eb95 100644
--- a/examples/torch/layer/layer_torch.cpp
+++ b/examples/torch/layer/layer_torch.cpp
@@ -21,38 +21,90 @@
 #include "acltransformer/operation_graph.h"
 #include "examples/utils/example_util.h"
 #include "acltransformer/plan_builder.h"
-#include "layer.h"
+#include "examples/layers/chatglm6b_layer.h"
+#include "examples/layers/bert_layer.h"
 
 LayerTorch::LayerTorch(std::string layerName) : layerName_(layerName)
 {
     ASD_LOG(INFO) << "LayerTorch::LayerTorch called, layerName:" << layerName;
+    if (layerName == "ChatGlm6BLayer") {
+        layer_ = new AclTransformer::ChatGlm6BLayer();
+    }
+    if (layerName == "BertLayer") {
+        layer_ = new AclTransformer::BertLayer();
+    } else {
+        ASD_LOG(ERROR) << "not support layerName:" << layerName;
+    }
 }
 
-LayerTorch::~LayerTorch() {}
+LayerTorch::~LayerTorch()
+{
+    if (layer_) {
+        delete layer_;
+        layer_ = nullptr;
+    }
+}
 
-void LayerTorch::SetParam(std::string param) { param_ = param; }
+void LayerTorch::SetParam(std::string param)
+{
+    if (!layer_) {
+        ASD_LOG(ERROR) << "layer is null";
+        return;
+    }
+    Json::Reader reader;
+    Json::Value paramJson;
+    if (!reader.parse(param, paramJson)) {
+        ASD_LOG(ERROR) << "invalid json:" << param;
+    }
+    layer_->SetParam(paramJson);
+}
 
-void LayerTorch::Execute(std::vector<torch::Tensor> inTensors, std::vector<torch::Tensor> outTensors)
+std::vector<torch::Tensor> LayerTorch::Execute(std::vector<torch::Tensor> inTensors)
 {
-    AsdOps::Timer timer;
     ASD_LOG(INFO) << "LayerTorch::Execute start";
+    std::vector<torch::Tensor> outTensors;
+    if (!layer_) {
+        ASD_LOG(ERROR) << "layer is null";
+        return outTensors;
+    }
+
+    AclTransformer::VariantPack variantPack;
+
     for (size_t i = 0; i < inTensors.size(); ++i) {
         inTensors.at(i) = inTensors.at(i).contiguous();
         ASD_LOG(INFO) << "inTensors[" << i << "].options:" << inTensors.at(i).options()
                       << ", data:" << inTensors.at(i).data_ptr();
+        variantPack.inTensors.push_back(ExampleUtil::AtTensor2AsdTensor(inTensors.at(i)));
     }
+
+    CreateAtOutTensors(variantPack.inTensors, outTensors);
+
     for (size_t i = 0; i < outTensors.size(); ++i) {
         outTensors.at(i) = outTensors.at(i).contiguous();
         ASD_LOG(INFO) << "outTensors[" << i << "].options:" << outTensors.at(i).options()
                       << ", data:" << outTensors.at(i).data_ptr();
+        variantPack.outTensors.push_back(ExampleUtil::AtTensor2AsdTensor(outTensors.at(i)));
     }
 
-    AclTransformer::VariantPack variantPack;
-    ExampleUtil::BuildVariantPack(inTensors, outTensors, variantPack);
+    AclTransformer::Handle handle = {ExampleUtil::GetCurrentStream()};
+    layer_->Execute(handle, variantPack);
 
-    ExecuteLayer(layerName_, param_, variantPack);
+    ASD_LOG(WARN) << "LayerTorch::Execute end";
+    return outTensors;
+}
 
-    ASD_LOG(WARN) << "LayerTorch::Execute end, use time:" << timer.ElapsedMicroSecond() << " microsecond";
+void LayerTorch::CreateAtOutTensors(const AsdOps::SVector<AsdOps::Tensor> &inTensors,
+                                    std::vector<torch::Tensor> &atOutTensors)
+{
+    AsdOps::SVector<AsdOps::TensorDesc> outTensorDescs;
+    AsdOps::Status st = layer_->InferShape(inTensors, outTensorDescs);
+    ASD_LOG_IF(!st.Ok(), ERROR) << "infer shape fail, error:" << st.Message();
+
+    atOutTensors.resize(outTensorDescs.size());
+    for (size_t i = 0; i < outTensorDescs.size(); ++i) {
+        at::Tensor newTensor = ExampleUtil::CreateAtTensorFromAsdOpsTensorDesc(outTensorDescs.at(i));
+        atOutTensors.at(i) = newTensor;
+    }
 }
 
 TORCH_LIBRARY(LayerTorch, m)
diff --git a/examples/torch/layer/layer_torch.h b/examples/torch/layer/layer_torch.h
index f0be3670..9bebed45 100644
--- a/examples/torch/layer/layer_torch.h
+++ b/examples/torch/layer/layer_torch.h
@@ -19,18 +19,22 @@
 #include <torch/script.h>
 #include <torch/custom_class.h>
 #include "acltransformer/operation_graph.h"
+#include "examples/layers/layer.h"
 
 class LayerTorch : public torch::CustomClassHolder {
 public:
     LayerTorch(std::string layerName);
     ~LayerTorch();
     void SetParam(std::string param);
-    void Execute(std::vector<torch::Tensor> inTensors, std::vector<torch::Tensor> outTensors);
+    std::vector<torch::Tensor> Execute(std::vector<torch::Tensor> inTensors);
     c10::intrusive_ptr<LayerTorch> clone() const { return c10::make_intrusive<LayerTorch>(layerName_); }
 
+private:
+    void CreateAtOutTensors(const AsdOps::SVector<AsdOps::Tensor> &inTensors, std::vector<torch::Tensor> &atOutTensors);
+
 private:
     std::string layerName_;
-    std::string param_;
+    AclTransformer::Layer *layer_ = nullptr;
 };
 
 #endif
\ No newline at end of file
diff --git a/tests/pythontest/layers/test_chatglm6b_layer.py b/tests/pythontest/layers/test_chatglm6b_layer.py
new file mode 100644
index 00000000..e338cffa
--- /dev/null
+++ b/tests/pythontest/layers/test_chatglm6b_layer.py
@@ -0,0 +1,61 @@
+# Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+import os
+import sys
+import json
+import time
+import torch
+import torch_npu
+
+
+sys.path.append(os.path.dirname(__file__))
+import layer_test  # NOQA: E402
+
+
+LAYER_NAME = "ChatGlm6BLayer"
+
+
+class TestNormal(layer_test.LayerTest):
+    def golden_calc(self, in_tensors):
+        return [in_tensors[0]]
+
+    def test_2d_float(self):
+        hiddenStatesId = torch.rand(384,  32, 1024).npu()
+        qLinearWeightId = torch.rand(1024, 1024).npu()
+        qLinearBiasId = torch.rand(1024).npu()
+        kLinearWeightId = torch.rand(1024, 1024).npu()
+        kLinearBiasId = torch.rand(1024).npu()
+        vLinearWeightId = torch.rand(1024, 1024).npu()
+        vLinearBiasId = torch.rand(1024).npu()
+        selfOutLinearWeightId = torch.rand(1024, 1024).npu()
+        selfOutLinearBiasId = torch.rand(1024).npu()
+        selfOutNormWeightId = torch.rand(1024).npu()
+        selfOutNormBiasId = torch.rand(1024).npu()
+        ffnLinearWeightId = torch.rand(4096, 1024).npu()
+        ffnLinearBiasId = torch.rand(4096).npu()
+        bertOutLinearWeightId = torch.rand(1024, 4096).npu()
+        bertOutLinearBiasId = torch.rand(1024).npu()
+        bertOutNormWeightId = torch.rand(1024).npu()
+        bertOutNormBiasId = torch.rand(1024).npu()
+        attentionMaskId = torch.rand(32, 1, 1, 384).npu()
+        bertLayerOutId = torch.empty(384, 32, 1024).npu()
+
+        self.execute(LAYER_NAME, '{"transKey":true,"dk":3,"headNum":16}', [hiddenStatesId, qLinearWeightId, qLinearBiasId, kLinearWeightId, kLinearBiasId, vLinearWeightId, vLinearBiasId,
+                                                                           selfOutLinearWeightId, selfOutLinearBiasId, selfOutNormWeightId, selfOutNormBiasId,
+                                                                           ffnLinearWeightId, ffnLinearBiasId, bertOutLinearWeightId, bertOutLinearBiasId, bertOutNormWeightId, bertOutNormBiasId, attentionMaskId])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pythontest/layers/test_glm_block.py b/tests/pythontest/layers/test_glm_block.py
deleted file mode 100644
index 5a55d51e..00000000
--- a/tests/pythontest/layers/test_glm_block.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import unittest
-import os
-import json
-import time
-import torch
-import torch_npu
-import sys
-
-
-ACLTRANSFORMER_HOME_PATH = os.environ.get("ACLTRANSFORMER_HOME_PATH")
-if ACLTRANSFORMER_HOME_PATH is None:
-    raise RuntimeError(
-        "env ACLTRANSFORMER_HOME_PATH not exist, source set_env.sh")
-
-LIB_PATH = os.path.join(ACLTRANSFORMER_HOME_PATH,
-                        "examples/libacltransformer_torch.so")
-torch.classes.load_library(LIB_PATH)
-
-
-class TestNormal(unittest.TestCase):
-    def test_2d(self):
-        # operation = torch.classes.GlmBlock.GlmBlock(
-        #     json.dumps({"transKey": True, "dk": 128, "headNum": 32, "layerId": 0,
-        #                 "layerNormEps":1e-12, "ResidualAddScale": 0}))
-
-        # hiddenStates = torch.rand(384,  32, 1024).npu()
-        # normWeight = torch.rand(4096).npu()
-        # normBias = torch.rand(4096).npu()
-        # qkvMixdWeight = torch.rand(12288, 4096).npu()
-        # qkvMixdBias = torch.rand(12288).npu()
-        # selfOutLinearWeight = torch.rand(4096, 4096).npu()
-        # selfOutLinearBias = torch.rand(4096).npu()
-        # selfOutNormWeight = torch.rand(4096).npu()
-        # selfOutNormBias = torch.rand(4096).npu()
-        # ffnLinearWeight = torch.rand(16384, 4096).npu()
-        # ffnLinearBias = torch.rand(16384).npu()
-        # ffnOutLinearWeight = torch.rand(4096, 16384).npu()
-        # ffnOutLinearBias = torch.rand(4096).npu()
-        # positionIds = torch.rand(384,  32, 1024).npu()
-        # cosTable = torch.rand(2049, 1, 1024).npu()
-        # sinTable = torch.rand(2049, 1, 1024).npu()
-        # attentionMask = torch.rand(384,  32, 1024).npu()
-        # pastKey = torch.rand(384,  32, 1024).npu()
-        # pastValue = torch.rand(384,  32, 1024).npu()
-
-        # start_time = time.time()
-        # for i in range(1):
-        #     operation.execute(
-        #         [hiddenStates, normWeight, normBias, qkvMixdWeight, qkvMixdBias, selfOutLinearWeight,
-        #          selfOutLinearBias, selfOutNormWeight, selfOutNormBias, ffnLinearWeight,
-        #          ffnLinearBias, ffnOutLinearWeight, ffnOutLinearBias, positionIds, cosTable, sinTable,
-        #          attentionMask, pastKey, pastValue], [])
-        # end_time = time.time()
-        # print("use time:", (end_time - start_time))
-
-
-if __name__ == '__main__':
-    unittest.main()
-- 
Gitee