diff --git a/operator/op_using/BatchMatMulV2/README.md b/operator/op_using/BatchMatMulV2/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..048f019c3522e8d892d0a2ad4d753e46831102ac
--- /dev/null
+++ b/operator/op_using/BatchMatMulV2/README.md
@@ -0,0 +1,75 @@
+# BatchMatMulV2算子ACL接口调用样例
+
+## 功能描述
+
+该样例实现了使用ACL接口aclopExecuteV2对BatchMatMulV2算子的流程演示,展示了无输入,有输出,有属性算子的调用流程。
+
+## 目录结构
+
+```
+|
+├────── data // 测试输入数据和算子计算结果保存目录
+├────── model // 单算子模型目录
+├ ├── acl_op.json // BatchMatMulV2单算子模型描述文件,用于借助ATC工具生成om模型
+├────── scripts // 脚本目录
+├ ├── build_exe.sh // 样例编译脚本
+├ ├── generate_om.sh // om生成脚本
+├────── src
+├ ├── BatchMatMulV2Dynamic.cpp // C++ 单算子调用流程实现代码
+├── README.md // 样例说明文件
+```
+
+## 环境要求
+
+- 操作系统及架构:CentOS x86\_64、Ubuntu 18.04 x86\_64、EulerOS x86
+- 编译器:
+ - 运行环境操作系统架构为x86,编译器为g++
+- python及依赖的库:Python3.7.*x*(3.7.0 ~ 3.7.11)、Python3.8.*x*(3.8.0 ~ 3.8.11)
+- 已完成昇腾AI软件栈的部署。
+
+
+
+## 配置环境变量
+
+- 开发环境上环境变量配置
+
+ 1. CANN-Toolkit包提供进程级环境变量配置脚本,供用户在进程中引用,以自动完成CANN基础环境变量的配置,配置示例如下所示
+
+ ```
+ . ${HOME}/Ascend/ascend-toolkit/set_env.sh
+ ```
+
+ “$HOME/Ascend”请替换“Ascend-cann-toolkit”包的实际安装路径。
+
+
+## 编译运行
+1. 在samples项目根目录下执行cd operator/op_using/BatchMatMulV2,进入样例目录。
+
+2. 执行
+ ```
+ chmod +x scripts/*
+ ```
+
+ 给脚本可执行权限。
+
+3. 执行
+ 生成om模型,该脚本默认执行在310P3芯片:
+ ```
+ ./scripts/generate_om.sh
+ ```
+ 脚本会自动调用atc对model目录下的json文件进行转om操作,可以存放多个json生成不同输入的om模型。json配置可以参考[单算子描述文件配置](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/63RC2alpha001/infacldevg/atctool/atlasatc_16_0036.html)。
+
+4. 执行
+ ```
+ ./scripts/build_exe.sh
+ ```
+ 基于样例代码编译出可执行程序acl_exe
+
+5. 执行
+ ```
+ ./acl_exe
+ ```
+ 运行可执行程序,程序正常运行会打印:
+ ```
+ [INFO] ACL run op success!
+ ```
\ No newline at end of file
diff --git a/operator/op_using/.keep b/operator/op_using/BatchMatMulV2/data/.keep
similarity index 100%
rename from operator/op_using/.keep
rename to operator/op_using/BatchMatMulV2/data/.keep
diff --git a/operator/op_using/BatchMatMulV2/model/acl_op.json b/operator/op_using/BatchMatMulV2/model/acl_op.json
new file mode 100644
index 0000000000000000000000000000000000000000..df386bde2f090d8b622a75f639dc0a30604f90f5
--- /dev/null
+++ b/operator/op_using/BatchMatMulV2/model/acl_op.json
@@ -0,0 +1,57 @@
+[
+ {
+ "attr": [
+ {
+ "name": "adj_x1",
+ "type": "bool",
+ "value": false
+ },
+ {
+ "name": "adj_x2",
+ "type": "bool",
+ "value": false
+ }
+ ],
+ "input_desc": [
+ {
+ "format": "ND",
+ "name": "x1",
+ "shape": [
+ -1,-1,-1
+ ],
+ "shape_range": [[3,16],[1,512],[1,1536]],
+ "type": "float32"
+ },
+ {
+ "format": "ND",
+ "name": "x2",
+ "shape": [
+ -1,-1,-1
+ ],
+ "shape_range": [[3,16],[1,1536],[1,512]],
+ "type": "float32"
+ },
+ {
+ "format": "ND",
+ "name": "bias",
+ "shape": [
+ -1
+ ],
+ "shape_range": [[3,512]],
+ "type": "float32"
+ }
+ ],
+ "op": "BatchMatMulV2",
+ "output_desc": [
+ {
+ "format": "ND",
+ "name": "y",
+ "shape": [
+ -1,-1,-1
+ ],
+ "shape_range": [[3,16],[1,512],[1,512]],
+ "type": "float32"
+ }
+ ]
+ }
+ ]
\ No newline at end of file
diff --git a/operator/op_using/BatchMatMulV2/scripts/build_exe.sh b/operator/op_using/BatchMatMulV2/scripts/build_exe.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7215ac3f6e6a88994b6e6ef682c5ef521faeea35
--- /dev/null
+++ b/operator/op_using/BatchMatMulV2/scripts/build_exe.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+# Copyright (c) 2023-2023 Huawei Technologies Co., Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+source ${HOME}/Ascend/ascend-toolkit/set_env.sh
+rm -rf acl_exe
+g++ src/BatchMatMulV2Dynamic.cpp -I $ASCEND_HOME_PATH/include -L $ASCEND_HOME_PATH/lib64 -lascendcl -l acl_op_compiler -o acl_exe
+./acl_exe
diff --git a/operator/op_using/BatchMatMulV2/scripts/generate_om.sh b/operator/op_using/BatchMatMulV2/scripts/generate_om.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6997c5f7b64bc20764266bae5ac1ba86bfe8b6b0
--- /dev/null
+++ b/operator/op_using/BatchMatMulV2/scripts/generate_om.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright (c) 2023-2023 Huawei Technologies Co., Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+default_version=Ascend910A
+if [ $# -eq 0 ]; then
+ echo "atc run on default Ascend910A"
+else
+ echo "atc run on $1"
+ default_version=$1
+fi
+for file in $(find ./model -name '*.json'); do
+ atc --singleop=$file --output=model --soc_version=$default_version
+done
diff --git a/operator/op_using/BatchMatMulV2/src/BatchMatMulV2Dynamic.cpp b/operator/op_using/BatchMatMulV2/src/BatchMatMulV2Dynamic.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0ce375597c543cb0166a3ce84d137e40a7487459
--- /dev/null
+++ b/operator/op_using/BatchMatMulV2/src/BatchMatMulV2Dynamic.cpp
@@ -0,0 +1,274 @@
+/**
+* @file sampleBatchMatMulV2DynamicOPExcute.cpp
+*
+* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "acl/acl.h"
+using namespace std;
+#define CHECK_ACL_RESULT(err, errMsg) \
+ {if (err != ACL_SUCCESS) \
+ { \
+ ERROR_LOG("%s error code = %d", errMsg, err); \
+ }}
+
+#define ERROR_LOG(fmt, args...) fprintf(stderr, "[ERROR] " fmt "\n", ##args)
+#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO] " fmt "\n", ##args)
+namespace {
+ bool runOnDevice = true;
+ int32_t deviceId = 0;
+ aclrtStream stream = nullptr;
+ const int32_t inputNums = 3;
+ const int32_t outputNums = 1;
+ aclTensorDesc *inputDescs[inputNums];
+ aclTensorDesc *outputDescs[outputNums];
+ std::vector inputShapeX1 = {3, 32, 96};
+ std::vector inputShapeX2 = {3, 96, 64};
+ std::vector inputShapeBias = {64};
+ std::vector inputShapeOffsetW = {};
+ std::vector outputShapeY = {3, 32, 64};
+ std::vector> inputShapes = {inputShapeX1, inputShapeX2, inputShapeBias};
+ std::vector> outputShapes = {outputShapeY};
+ std::vector devMemInputs;
+ std::vector devMemOutputs;
+ std::vector hostMemInputs;
+ std::vector hostMemOutputs;
+ std::vector inBuffers;
+ std::vector outBuffers;
+ aclopAttr *opAttr;
+ int randNum = 1000;
+}
+
+typedef enum Result {
+ SUCCESS = 0,
+ FAILED = 1
+} Result;
+
+void DataProductor(float data[], size_t size){
+ float c;
+ for(size_t i = 0; i < size; i++ ){
+ c = static_cast(rand() % (randNum + 1) / (float)(randNum + 1));
+ data[i] = c;
+ }
+}
+
+bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
+{
+ if (buffer == nullptr) {
+ ERROR_LOG("Write file failed. buffer is nullptr");
+ return false;
+ }
+
+ FILE *outputFile = fopen(filePath.c_str(), "wb");
+ if (outputFile == nullptr) {
+ ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+ return false;
+ }
+
+ fwrite(buffer, size, sizeof(char), outputFile);
+ fclose(outputFile);
+ return true;
+}
+
+Result InitResource()
+{
+ const char *aclConfigPath = "";
+ CHECK_ACL_RESULT(aclInit(aclConfigPath), "aclInit fail");
+ CHECK_ACL_RESULT(aclrtSetDevice(deviceId), "aclrtSetDevice fail");
+ aclrtRunMode runMode;
+ CHECK_ACL_RESULT(aclrtGetRunMode(&runMode), "aclrtGetRunMode fail");
+ runOnDevice = (runMode == ACL_DEVICE);
+ CHECK_ACL_RESULT(aclopSetModelDir("./model"), "aclopSetModelDir fail");
+ return SUCCESS;
+}
+
+Result ProcessInput()
+{
+ for (size_t i = 0; i < inputNums; i++)
+ {
+ inputDescs[i] = aclCreateTensorDesc(ACL_FLOAT, inputShapes[i].size(), inputShapes[i].data(), ACL_FORMAT_ND);
+ }
+
+ for (size_t i = 0; i < outputNums; i++)
+ {
+ outputDescs[i] = aclCreateTensorDesc(ACL_FLOAT, outputShapes[i].size(), outputShapes[i].data(), ACL_FORMAT_ND);
+ }
+
+ for (size_t i = 0; i < inputNums; i++)
+ {
+ void *devMemInput = nullptr;
+ CHECK_ACL_RESULT(aclrtMalloc(&devMemInput, aclGetTensorDescSize(inputDescs[i]),
+ ACL_MEM_MALLOC_NORMAL_ONLY),"aclrtMalloc devMemInput fail");
+
+ devMemInputs.emplace_back(devMemInput);
+ aclDataBuffer *inputBuffer = aclCreateDataBuffer(devMemInput, aclGetTensorDescSize(inputDescs[i]));
+ inBuffers.emplace_back(inputBuffer);
+ }
+
+ for (size_t i = 0; i < outputNums; i++)
+ {
+ void *devMemOutY = nullptr;
+ CHECK_ACL_RESULT(aclrtMalloc(&devMemOutY, aclGetTensorDescSize(outputDescs[i]),
+ ACL_MEM_MALLOC_NORMAL_ONLY),"aclrtMalloc devMemOutY fail");
+
+ devMemOutputs.emplace_back(devMemOutY);
+ aclDataBuffer *outputBuffer = aclCreateDataBuffer(devMemOutY, aclGetTensorDescSize(outputDescs[i]));
+ outBuffers.push_back(outputBuffer);
+ }
+
+ for (size_t i = 0; i < inputNums; i++)
+ {
+ void *hostMemInput = nullptr;
+ if (runOnDevice)
+ {
+ CHECK_ACL_RESULT(aclrtMalloc(&hostMemInput, aclGetTensorDescSize(inputDescs[i]),
+ ACL_MEM_MALLOC_NORMAL_ONLY),"aclrtMallocHost hostMemInput fail");
+ }
+ else
+ {
+ CHECK_ACL_RESULT(aclrtMallocHost(&hostMemInput, aclGetTensorDescSize(inputDescs[i])),
+ "aclrtMallocHost hostMemInput fail");
+ }
+ hostMemInputs.emplace_back(hostMemInput);
+ }
+
+ for (size_t i = 0; i < outputNums; i++)
+ {
+ void *hostOutput = nullptr;
+ if (runOnDevice){
+ CHECK_ACL_RESULT(aclrtMalloc(&hostOutput, aclGetTensorDescSize(outputDescs[i]),
+ ACL_MEM_MALLOC_NORMAL_ONLY), "aclrtMallocHost hostOutput fail");
+ }else{
+ CHECK_ACL_RESULT(aclrtMallocHost(&hostOutput, aclGetTensorDescSize(outputDescs[i])),
+ "aclrtMallocHost hostOutput fail");
+ }
+ hostMemOutputs.emplace_back(hostOutput);
+ }
+
+ for (size_t i = 0; i < inputNums; i++)
+ {
+ size_t size = 1;
+ for (size_t j=0; j(data);
+ }
+
+ CHECK_ACL_RESULT(aclrtCreateStream(&stream), "aclrtCreateStream fail");
+ for (size_t i = 0; i < inputNums; i++)
+ {
+ if (runOnDevice)
+ {
+ CHECK_ACL_RESULT(aclrtMemcpy(devMemInputs[i], aclGetTensorDescSize(inputDescs[i]),
+ hostMemInputs[i], aclGetTensorDescSize(inputDescs[i]),
+ ACL_MEMCPY_DEVICE_TO_DEVICE),"aclrtMemcpy d2d fail");
+ }
+ else
+ {
+ CHECK_ACL_RESULT(aclrtMemcpy(devMemInputs[i], aclGetTensorDescSize(inputDescs[i]),
+ hostMemInputs[i], aclGetTensorDescSize(inputDescs[i]),
+ ACL_MEMCPY_HOST_TO_DEVICE),"aclrtMemcpy h2d fail");
+ }
+ }
+ opAttr = aclopCreateAttr();
+ CHECK_ACL_RESULT(aclopSetAttrBool(opAttr, "adj_x1", false), "aclopSetAttrBool adj_x1 fail");
+ CHECK_ACL_RESULT(aclopSetAttrBool(opAttr, "adj_x2", false), "aclopSetAttrBool adj_x2 fail");
+ return SUCCESS;
+}
+
+Result Inference()
+{
+ aclopHandle *batchMatMulV2handle;
+ CHECK_ACL_RESULT(aclopExecuteV2("BatchMatMulV2", inputNums, inputDescs, inBuffers.data(),
+ outputNums, outputDescs, outBuffers.data(), opAttr, stream), "aclopExecuteV2 fail");
+
+ CHECK_ACL_RESULT(aclrtSynchronizeStream(stream), "aclrtSynchronizeStream fail");
+ CHECK_ACL_RESULT(aclrtDestroyStream(stream), "aclrtDestroyStream fail");
+ return SUCCESS;
+}
+
+Result GetResult()
+{
+ for (size_t i = 0; i < outputNums; i++)
+ {
+ if (runOnDevice)
+ {
+ aclrtMemcpy(hostMemOutputs[i], aclGetTensorDescSize(outputDescs[i]), devMemOutputs[i],
+ aclGetTensorDescSize(outputDescs[i]), ACL_MEMCPY_DEVICE_TO_DEVICE);
+ }
+ else
+ {
+ aclrtMemcpy(hostMemOutputs[i], aclGetTensorDescSize(outputDescs[i]), devMemOutputs[i],
+ aclGetTensorDescSize(outputDescs[i]), ACL_MEMCPY_DEVICE_TO_HOST);
+ }
+ }
+ WriteFile("./data/output.bin", hostMemOutputs[0], aclGetTensorDescSize(outputDescs[0]));
+ return SUCCESS;
+}
+
+Result ReleaseResource()
+{
+ for (size_t i; i < outBuffers.size(); i++){
+ CHECK_ACL_RESULT(aclDestroyDataBuffer(outBuffers[i]), "Finalize acl failed");
+ }
+
+ for (size_t i; i < inBuffers.size(); i++){
+ CHECK_ACL_RESULT(aclDestroyDataBuffer(inBuffers[i]), "Finalize acl failed");
+ }
+
+ aclDestroyTensorDesc(*inputDescs);
+ aclDestroyTensorDesc(*outputDescs);
+ CHECK_ACL_RESULT(aclrtResetDevice(deviceId), "ResetDevice fail");
+ CHECK_ACL_RESULT(aclFinalize(), "Finalize acl failed");
+ return SUCCESS;
+}
+
+int main(int argc, char const *argv[])
+{
+ Result ret = InitResource();
+ if (ret == FAILED) {
+ ERROR_LOG("Init detect failed");
+ return FAILED;
+ }
+
+ ret = ProcessInput();
+ if (ret == FAILED) {
+ ERROR_LOG("process input failed");
+ return FAILED;
+ }
+
+ ret = Inference();
+ if (ret == FAILED) {
+ ERROR_LOG("model inference failed");
+ return FAILED;
+ }
+
+ ret = GetResult();
+ if (ret == FAILED) {
+ ERROR_LOG("GetResult failed");
+ return FAILED;
+ }
+ ret = ReleaseResource();
+ if (ret == FAILED) {
+ ERROR_LOG("Release Resource failed");
+ return FAILED;
+ }
+ INFO_LOG("ACL run op success!");
+ return SUCCESS;
+}
\ No newline at end of file