diff --git a/operator_contrib/README.md b/operator_contrib/README.md
index da799262c54be193526c3682e60f8c2e119fbcdb..dbff6a08912bc30951879b2d365b2d61aa4305ef 100644
--- a/operator_contrib/README.md
+++ b/operator_contrib/README.md
@@ -32,6 +32,7 @@
| [ReduceSumSample](./ReduceSumSample) | 基于Ascend C的ReduceSum自定义Vector算子及调用样例 | Atlas 200/500 A2 推理产品
Atlas A2训练系列产品/Atlas 800I A2推理产品 | 8.0.RC2.alpha003 |
| [ScatterMaxSample](./ScatterMaxSample) | 基于Ascend C的ScatterMax自定义Vector算子及调用样例 | Atlas 200/500 A2 推理产品
Atlas A2训练系列产品/Atlas 800I A2推理产品 | 8.0.RC1.alpha003 |
| [ScatterSubSample](./ScatterSubSample) | 基于Ascend C的ScatterSub自定义Vector算子及调用样例 | Atlas 200/500 A2 推理产品
Atlas A2训练系列产品/Atlas 800I A2推理产品 | 8.0.RC1.alpha003 |
+| [SelectV2Sample](./SelectV2Sample) | 基于Ascend C的SelectV2自定义Vector算子及调用样例 | Atlas 200/500 A2 推理产品
Atlas A2训练系列产品/Atlas 800I A2推理产品 | 8.0.RC1.alpha003 |
| [SpenceSample](./SpenceSample) | 基于Ascend C的Spence自定义Vector算子及调用样例 | Atlas 200/500 A2 推理产品
Atlas A2训练系列产品/Atlas 800I A2推理产品 | 8.0.RC1.alpha003 |
| [ThreeNNSample](./ThreeNNSample) | 基于Ascend C的ThreeNN自定义Vector算子及调用样例 | Atlas 200/500 A2 推理产品
Atlas A2训练系列产品/Atlas 800I A2推理产品 | 8.0.RC2.alpha003 |
| [TrilSample](./TrilSample) | 基于Ascend C的Tril自定义Vector算子及调用样例 | Atlas 200/500 A2 推理产品
Atlas A2训练系列产品/Atlas 800I A2推理产品 | 8.0.RC2.alpha003 |
diff --git a/operator_contrib/SelectV2Sample/CMakeLists.txt b/operator_contrib/SelectV2Sample/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4b38c4be17a4888f28e6ac9dcf7650286e4ca54
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/CMakeLists.txt
@@ -0,0 +1,25 @@
+add_ops_compile_options(
+ OP_NAME SelectV2
+ OPTIONS --cce-auto-sync=on
+ -Wno-deprecated-declarations
+ -Werror
+)
+
+target_sources(op_host_aclnn PRIVATE
+op_host/select_v2.cpp
+)
+
+target_sources(optiling PRIVATE
+ op_host/select_v2.cpp
+)
+
+target_include_directories(optiling PRIVATE
+ ${CMAKE_CURRENT_SOURCE_DIR}/op_host
+)
+
+target_sources(opsproto PRIVATE
+ op_host/select_v2.cpp
+)
+
+install(FILES op_kernel/select_v2.cpp
+ DESTINATION ${ASCEND_IMPL_OUT_DIR}/dynamic)
\ No newline at end of file
diff --git a/operator_contrib/SelectV2Sample/README.md b/operator_contrib/SelectV2Sample/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4a793c2bedc41c4b8739864d6d798c43669cc947
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/README.md
@@ -0,0 +1,72 @@
+## `SelectV2`自定义算子样例说明
+本样例通过`Ascend C`编程语言实现了`SelectV2`算子。
+
+### 算子描述
+`SelectV2`算子根据输入参数condition从输入参数x1,x2中选取元素,condition值为TRUE时,选取x1,condition为FALSE时,选取x2。
+
+### 算子规格描述
+
+
+算子类型(OpType) | SelectV2 |
+ | name | Type | data type | format |
+算子输入 |
+
+
condition | tensor | bool | ND |
+x1 | tensor | float32,float16,int32,int8 | ND |
+x2 | tensor | float32,float16,int32,int8 | ND |
+
+算子输出 |
+y | tensor | float32,float16,int32,int8 | ND |
+核函数名 | select_v2 |
+
+
+### 支持的产品型号
+本样例支持如下产品型号:
+- Atlas A2训练系列产品
+- Atlas 200I/500 A2推理产品
+
+### 目录结构介绍
+```
+├── docs // 算子文档目录
+├── example // 调用示例目录
+├── framework // 第三方框架适配目录
+├── op_host // host目录
+├── op_kernel // kernel目录
+├── opp_kernel_aicpu // aicpu目录
+└── tests // 测试用例目录
+```
+
+### 环境要求
+编译运行此样例前,请参考[《CANN软件安装指南》](https://hiascend.com/document/redirect/CannCommunityInstSoftware)完成开发运行环境的部署。
+
+### 算子包编译部署
+ - 进入到仓库目录
+
+ ```bash
+ cd ${git_clone_path}/cann-ops
+ ```
+
+ - 执行编译
+
+ ```bash
+ bash build.sh
+ ```
+
+ - 部署算子包
+
+ ```bash
+ bash build_out/CANN-custom_ops--linux..run
+ ```
+
+### 算子调用
+
+
+## 更新说明
+| 时间 | 更新事项 |
+|----|------|
+| 2025/06/10 | 新增本readme |
\ No newline at end of file
diff --git a/operator_contrib/SelectV2Sample/docs/SelectV2.md b/operator_contrib/SelectV2Sample/docs/SelectV2.md
new file mode 100644
index 0000000000000000000000000000000000000000..c681c43707cc7ca9669bca9f86639ab2d7d5cbc9
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/docs/SelectV2.md
@@ -0,0 +1,88 @@
+声明:本文使用[Creative Commons License version 4.0](https://creativecommons.org/licenses/by/4.0/legalcode)许可协议,转载、引用或修改等操作请遵循此许可协议。
+
+# SelectV2
+
+## 支持的产品型号
+
+Atlas A2 训练系列产品/Atlas 200I/500 A2推理产品
+
+产品形态详细说明请参见[昇腾产品形态说明](https://www.hiascend.com/document/redirect/CannCommunityProductForm)。
+
+## 功能描述
+
+- 算子功能:该SelectV2算子提供条件选择功能。SelectV2算子的主要功能是根据condition张量的布尔值(True/False),从x1或x2中选择对应位置的元素,并输出一个新的张量。并支持广播(Broadcasting)机制,允许condition、x、y的形状在满足广播规则的情况下自动扩展,以适配计算需求。它适用于动态条件选择场景,如掩码过滤、条件赋值等操作。
+
+
+## 实现原理
+
+调用`Ascend C`的`API`接口`Compare`、`Select`以及`Cast`进行实现。
+
+## 算子执行接口
+
+每个算子分为两段式接口,必须先调用“aclnnSelectV2GetWorkspaceSize”接口获取计算所需workspace大小以及包含了算子计算流程的执行器,再调用“aclnnSelectV2”接口执行计算。
+
+* `aclnnStatus aclnnSelectV2GetWorkspaceSize(const aclTensor* condition, const aclTensor* x1, const aclTensor* x2, const aclTensor* y, uint64_t* workspaceSize, aclOpExecutor** executor)`
+* `aclnnStatus aclnnSelectV2(void* workspace, uint64_t workspaceSize, aclOpExecutor* executor, aclrtStream stream)`
+
+**说明**:
+
+- 算子执行接口对外屏蔽了算子内部实现逻辑以及不同代际NPU的差异,且开发者无需编译算子,实现了算子的精简调用。
+- 若开发者不使用算子执行接口的调用算子,也可以定义基于Ascend IR的算子描述文件,通过ATC工具编译获得算子om文件,然后加载模型文件执行算子,详细调用方法可参见《应用开发指南》的[单算子调用 > 单算子模型执行](https://hiascend.com/document/redirect/CannCommunityCppOpcall)章节。
+
+### aclnnSelectV2GetWorkspaceSize
+
+- **参数说明:**
+
+
+ - condition(aclTensor\*,计算输入):必选参数,Device侧的aclTensor,公式中的输入condition,数据类型支持BOOL,数据格式支持ND。
+ - x1(aclTensor\*,计算输入):必选参数,Device侧的aclTensor,公式中的输入x1,数据类型支持FLOAT32、FLOAT16、INT32、INT8,数据格式支持ND。
+ - x2(aclTensor\*,计算输入):必选参数,Device侧的aclTensor,公式中的输入x2,数据类型支持FLOAT32、FLOAT16、INT32、INT8,数据格式支持ND。
+ - y(aclTensor\*,计算输出):Device侧的aclTensor,公式中的输出y,数据类型支持FLOAT32、FLOAT16、INT32、INT8,数据格式支持ND,输出维度与x1,x2一致。
+ - workspaceSize(uint64\_t\*,出参):返回用户需要在Device侧申请的workspace大小。
+ - executor(aclOpExecutor\*\*,出参):返回op执行器,包含了算子计算流程。
+- **返回值:**
+
+ 返回aclnnStatus状态码,具体参见[aclnn返回码](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/aolapi/context/common/aclnn%E8%BF%94%E5%9B%9E%E7%A0%81_fuse.md)。
+
+ ```
+ 第一段接口完成入参校验,若出现以下错误码,则对应原因为:
+ - 返回161001(ACLNN_ERR_PARAM_NULLPTR):如果传入参数是必选输入,输出或者必选属性,且是空指针,则返回161001。
+ - 返回161002(ACLNN_ERR_PARAM_INVALID):x、out的数据类型和数据格式不在支持的范围内。
+ ```
+
+### aclnnSelectV2
+
+- **参数说明:**
+
+ - workspace(void\*,入参):在Device侧申请的workspace内存起址。
+ - workspaceSize(uint64\_t,入参):在Device侧申请的workspace大小,由第一段接口aclnnSqrtGetWorkspaceSize获取。
+ - executor(aclOpExecutor\*,入参):op执行器,包含了算子计算流程。
+ - stream(aclrtStream,入参):指定执行任务的AscendCL stream流。
+- **返回值:**
+
+ 返回aclnnStatus状态码,具体参见[aclnn返回码](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/aolapi/context/common/aclnn%E8%BF%94%E5%9B%9E%E7%A0%81_fuse.md)。
+
+
+## 约束与限制
+
+- condition的数据类型支持BOOL,x1,x2,y的数据类型支持FLOAT32、FLOAT16、INT32,INT8,数据格式只支持ND
+
+## 算子原型
+
+
+算子类型(OpType) | Sqrt |
+ | name | type | data type | format |
+算子输入 |
+
+
condition | tensor | bool | ND |
+x1 | tensor | float32,float16,int32,int8 | ND |
+x2 | tensor | float32,float16,int32,int8 | ND |
+
+算子输出 |
+y | tensor | float32,float16,int32,int8 | ND |
+核函数名 | select_v2 |
+
+
+## 调用示例
+
+详见[SelectV2自定义算子样例说明算子调用章节](../README.md#算子调用)
\ No newline at end of file
diff --git a/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/CMakeLists.txt b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5681d624dc102c4baacfeca68fb657e1a93fcfd
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/CMakeLists.txt
@@ -0,0 +1,60 @@
+# CMake lowest version requirement
+cmake_minimum_required(VERSION 3.5.1)
+
+# project information
+project(acl_execute_select_v2)
+
+# Compile options
+add_compile_options(-std=c++11)
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "./")
+
+set(INC_PATH $ENV{DDK_PATH})
+
+if (NOT DEFINED ENV{DDK_PATH})
+ set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest")
+ message(STATUS "set default INC_PATH: ${INC_PATH}")
+else ()
+ message(STATUS "env INC_PATH: ${INC_PATH}")
+endif()
+
+set(CUST_PKG_PATH "${INC_PATH}/opp/vendors/customize/op_api")
+
+set(LIB_PATH $ENV{NPU_HOST_LIB})
+
+# Dynamic libraries in the stub directory can only be used for compilation
+if (NOT DEFINED ENV{NPU_HOST_LIB})
+ set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub/")
+ set(LIB_PATH1 "/usr/local/Ascend/ascend-toolkit/latest/atc/lib64/stub/")
+ message(STATUS "set default LIB_PATH: ${LIB_PATH}")
+else ()
+ message(STATUS "env LIB_PATH: ${LIB_PATH}")
+endif()
+
+# Header path
+include_directories(
+ ${INC_PATH}/runtime/include
+ ${INC_PATH}/atc/include
+ ${CUST_PKG_PATH}/include
+)
+
+# add host lib path
+link_directories(
+ ${LIB_PATH}
+ ${LIB_PATH1}
+ ${CUST_PKG_PATH}/lib
+)
+
+add_executable(execute_select_v2_op
+ main.cpp
+)
+
+target_link_libraries(execute_select_v2_op
+ ascendcl
+ cust_opapi
+ acl_op_compiler
+ nnopbase
+ stdc++
+)
+
+install(TARGETS execute_select_v2_op DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
diff --git a/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/README.md b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..16ebc0114fb840bcd1384604fc87e557c4d7b2d4
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/README.md
@@ -0,0 +1,62 @@
+## 概述
+
+通过aclnn调用的方式调用SelectV2算子。
+
+## 目录结构介绍
+```
+├── AclNNInvocationNaive
+│ ├── CMakeLists.txt // 编译规则文件
+│ ├── gen_data.py // 算子期望数据生成脚本
+│ ├── main.cpp // 单算子调用应用的入口
+│ ├── run.sh // 编译运行算子的脚本
+│ └── verify_result.py // 计算结果精度比对脚本
+```
+## 代码实现介绍
+完成自定义算子的开发部署后,可以通过单算子调用的方式来验证单算子的功能。main.cpp代码为单算子API执行方式。单算子API执行是基于C语言的API执行算子,无需提供单算子描述文件进行离线模型的转换,直接调用单算子API接口。
+
+自定义算子编译部署后,会自动生成单算子API,可以直接在应用程序中调用。算子API的形式一般定义为“两段式接口”,形如:
+ ```cpp
+ aclnnStatus aclnnSelectV2GetWorkspaceSize(const aclTensor *x, const aclTensor *out, uint64_t workspaceSize, aclOpExecutor **executor);
+ aclnnStatus aclnnSelectV2(void *workspace, int64_t workspaceSize, aclOpExecutor **executor, aclrtStream stream);
+ ```
+其中aclnnSelectV2GetWorkspaceSize为第一段接口,主要用于计算本次API调用计算过程中需要多少的workspace内存。获取到本次API计算需要的workspace大小之后,按照workspaceSize大小申请Device侧内存,然后调用第二段接口aclnnSelectV2执行计算。具体参考[AscendCL单算子调用](https://hiascend.com/document/redirect/CannCommunityAscendCInVorkSingleOp)>单算子API执行 章节。
+
+## 运行样例算子
+ **请确保已根据算子包编译部署步骤完成本算子的编译部署动作。**
+
+ - 进入样例代码所在路径
+
+ ```bash
+ cd ${git_clone_path}/cann-ops/src/math/selectv2/examples/AclNNInvocationNaive
+ ```
+
+ - 环境变量配置
+
+ 需要设置环境变量,以arm为例
+
+ ```bash
+ export DDK_PATH=/usr/local/Ascend/ascend-toolkit/latest
+ export NPU_HOST_LIB=/usr/local/Ascend/ascend-toolkit/latest/aarch64-linux/devlib
+ ```
+ - 样例执行
+
+ 样例执行过程中会自动生成测试数据,然后编译与运行aclnn样例,最后打印运行结果。
+
+ ```bash
+ mkdir -p build
+ cd build
+ cmake .. && make
+ ./execute_select_v2_op
+ ```
+
+ 用户亦可参考run.sh脚本进行编译与运行。
+
+ ```bash
+ bash run.sh
+ ```
+
+## 更新说明
+
+| 时间 | 更新事项 |
+| ---------- | ------------ |
+| 2025/06/29 | 新增本readme |
\ No newline at end of file
diff --git a/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/gen_data.py b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/gen_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..41e715aed285c425f0910cdf41e3eeba924e3a87
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/gen_data.py
@@ -0,0 +1,35 @@
+#!/usr/bin/python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+# This file is a part of the CANN Open Software.
+# Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+# Please refer to the License for details. You may not use this file except in compliance with the License.
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+# See LICENSE in the root of the software repository for the full text of the License.
+# ======================================================================================================================
+
+import os
+import numpy as np
+
+import tensorflow as tf
+
+
+def gen_golden_data_simple():
+ dtype = np.float16
+ input_x1 = np.random.rand(32, 64) > 0.5
+ input_x1 = input_x1.astype(np.bool_)
+ input_x2 = np.random.uniform(1, 100, [32, 64]).astype(np.float16)
+ input_x3 = np.random.uniform(1, 100, [32, 64]).astype(np.float16)
+ golden = tf.raw_ops.SelectV2(condition=input_x1, t=input_x2, e=input_x3).numpy()
+
+ os.system("mkdir -p input")
+ os.system("mkdir -p output")
+ input_x1.tofile("./input/input_x1.bin")
+ input_x2.tofile("./input/input_x2.bin")
+ input_x3.tofile("./input/input_x3.bin")
+ golden.tofile("./output/golden.bin")
+
+if __name__ == "__main__":
+ gen_golden_data_simple()
+
diff --git a/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/main.cpp b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/main.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cf4cac766c7205aef837288d8344697a18c1c4d8
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/main.cpp
@@ -0,0 +1,252 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+
+/**
+ * @file main.cpp
+ */
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "acl/acl.h"
+#include "aclnn_select_v2.h"
+
+#define SUCCESS 0
+#define FAILED 1
+
+#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO] " fmt "\n", ##args)
+#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN] " fmt "\n", ##args)
+#define ERROR_LOG(fmt, args...) fprintf(stderr, "[ERROR] " fmt "\n", ##args)
+
+#define CHECK_RET(cond, return_expr) \
+ do { \
+ if (!(cond)) { \
+ return_expr; \
+ } \
+ } while (0)
+
+#define LOG_PRINT(message, ...) \
+ do { \
+ printf(message, ##__VA_ARGS__); \
+ } while (0)
+
+bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize)
+{
+ struct stat sBuf;
+ int fileStatus = stat(filePath.data(), &sBuf);
+ if (fileStatus == -1) {
+ ERROR_LOG("failed to get file %s", filePath.c_str());
+ return false;
+ }
+ if (S_ISREG(sBuf.st_mode) == 0) {
+ ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
+ return false;
+ }
+
+ std::ifstream file;
+ file.open(filePath, std::ios::binary);
+ if (!file.is_open()) {
+ ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+ return false;
+ }
+
+ std::filebuf *buf = file.rdbuf();
+ size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
+ if (size == 0) {
+ ERROR_LOG("file size is 0");
+ file.close();
+ return false;
+ }
+ if (size > bufferSize) {
+ ERROR_LOG("file size is larger than buffer size");
+ file.close();
+ return false;
+ }
+ buf->pubseekpos(0, std::ios::in);
+ buf->sgetn(static_cast(buffer), size);
+ fileSize = size;
+ file.close();
+ return true;
+}
+
+bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
+{
+ if (buffer == nullptr) {
+ ERROR_LOG("Write file failed. buffer is nullptr");
+ return false;
+ }
+
+ int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
+ if (fd < 0) {
+ ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+ return false;
+ }
+
+ auto writeSize = write(fd, buffer, size);
+ (void) close(fd);
+ if (writeSize != size) {
+ ERROR_LOG("Write file Failed.");
+ return false;
+ }
+
+ return true;
+}
+
+int64_t GetShapeSize(const std::vector &shape)
+{
+ int64_t shapeSize = 1;
+ for (auto i : shape) {
+ shapeSize *= i;
+ }
+ return shapeSize;
+}
+
+int Init(int32_t deviceId, aclrtStream *stream)
+{
+ // 固定写法,acl初始化
+ auto ret = aclInit(nullptr);
+ CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclInit failed. ERROR: %d\n", ret); return FAILED);
+ ret = aclrtSetDevice(deviceId);
+ CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret); return FAILED);
+ ret = aclrtCreateStream(stream);
+ CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateStream failed. ERROR: %d\n", ret); return FAILED);
+
+ return SUCCESS;
+}
+
+template
+int CreateAclTensor(const std::vector &hostData, const std::vector &shape, void **deviceAddr,
+ aclDataType dataType, aclTensor **tensor)
+{
+ auto size = GetShapeSize(shape) * sizeof(T);
+ // 调用aclrtMalloc申请device侧内存
+ auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST);
+ CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret); return FAILED);
+
+ // 调用aclrtMemcpy将host侧数据拷贝到device侧内存上
+ ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE);
+ CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMemcpy failed. ERROR: %d\n", ret); return FAILED);
+
+ // 调用aclCreateTensor接口创建aclTensor
+ *tensor = aclCreateTensor(shape.data(), shape.size(), dataType, nullptr, 0, aclFormat::ACL_FORMAT_ND, shape.data(),
+ shape.size(), *deviceAddr);
+ return SUCCESS;
+}
+
+int main(int argc, char **argv)
+{
+ // 1. (固定写法)device/stream初始化, 参考acl对外接口列表
+ // 根据自己的实际device填写deviceId
+ int32_t deviceId = 0;
+ aclrtStream stream;
+ auto ret = Init(deviceId, &stream);
+ CHECK_RET(ret == 0, LOG_PRINT("Init acl failed. ERROR: %d\n", ret); return FAILED);
+
+ // 2. 构造输入与输出,需要根据API的接口自定义构造
+ std::vector inputX1Shape = {32, 64};
+ std::vector inputX2Shape = {32, 64};
+ std::vector inputX3Shape = {32, 64};
+ std::vector outputYShape = {32, 64};
+ void *inputX1DeviceAddr = nullptr;
+ void *inputX2DeviceAddr = nullptr;
+ void *inputX3DeviceAddr = nullptr;
+ void *outputYDeviceAddr = nullptr;
+ aclTensor *inputX1 = nullptr;
+ aclTensor *inputX2 = nullptr;
+ aclTensor *inputX3 = nullptr;
+ aclTensor *outputY = nullptr;
+ size_t inputX1ShapeSize_1=inputX1Shape[0] * inputX1Shape[1];
+ size_t inputX2ShapeSize_1=inputX2Shape[0] * inputX2Shape[1];
+ size_t inputX3ShapeSize_1=inputX3Shape[0] * inputX3Shape[1];
+ size_t outputYShapeSize_1=outputYShape[0] * outputYShape[1];
+ size_t dataType1=sizeof(aclFloat16);
+ size_t dataType2=1;
+ std::vector inputX1HostData(inputX1Shape[0] * inputX1Shape[1]);
+ std::vector inputX2HostData(inputX2Shape[0] * inputX2Shape[1]);
+ std::vector inputX3HostData(inputX3Shape[0] * inputX3Shape[1]);
+ std::vector outputYHostData(outputYShape[0] * outputYShape[1]);
+
+ size_t fileSize = 0;
+ void** input1=(void**)(&inputX1HostData);
+ void** input2=(void**)(&inputX2HostData);
+ void** input3=(void**)(&inputX3HostData);
+ //读取数据
+ ReadFile("../input/input_x1.bin", fileSize, *input1, inputX1ShapeSize_1*dataType2);
+ ReadFile("../input/input_x2.bin", fileSize, *input2, inputX2ShapeSize_1*dataType1);
+ ReadFile("../input/input_x3.bin", fileSize, *input3, inputX3ShapeSize_1*dataType1);
+
+ INFO_LOG("Set input success");
+ // 创建inputX aclTensor
+ ret = CreateAclTensor(inputX1HostData, inputX1Shape, &inputX1DeviceAddr, aclDataType::ACL_BOOL, &inputX1);
+ CHECK_RET(ret == ACL_SUCCESS, return FAILED);
+ ret = CreateAclTensor(inputX2HostData, inputX2Shape, &inputX2DeviceAddr, aclDataType::ACL_FLOAT16, &inputX2);
+ CHECK_RET(ret == ACL_SUCCESS, return FAILED);
+ ret = CreateAclTensor(inputX3HostData, inputX3Shape, &inputX3DeviceAddr, aclDataType::ACL_FLOAT16, &inputX3);
+ CHECK_RET(ret == ACL_SUCCESS, return FAILED);
+ // 创建outputY aclTensor
+ ret = CreateAclTensor(outputYHostData, outputYShape, &outputYDeviceAddr, aclDataType::ACL_FLOAT16, &outputY);
+ CHECK_RET(ret == ACL_SUCCESS, return FAILED);
+
+ // 3. 调用CANN自定义算子库API
+ uint64_t workspaceSize = 0;
+ aclOpExecutor *executor;
+ // 计算workspace大小并申请内存
+ ret = aclnnSelectV2GetWorkspaceSize(inputX1, inputX2,inputX3, outputY, &workspaceSize, &executor);
+ CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnSelectV2GetWorkspaceSize failed. ERROR: %d\n", ret); return FAILED);
+ void *workspaceAddr = nullptr;
+ if (workspaceSize > 0) {
+ ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST);
+ CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); return FAILED;);
+ }
+ // 执行算子
+ ret = aclnnSelectV2(workspaceAddr, workspaceSize, executor, stream);
+ CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnSelectV2 failed. ERROR: %d\n", ret); return FAILED);
+
+ // 4. (固定写法)同步等待任务执行结束
+ ret = aclrtSynchronizeStream(stream);
+ CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return FAILED);
+
+ // 5. 获取输出的值,将device侧内存上的结果拷贝至host侧,需要根据具体API的接口定义修改
+ auto size = GetShapeSize(outputYShape);
+ std::vector resultData(size, 0);
+ ret = aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), outputYDeviceAddr,
+ size * sizeof(aclFloat16), ACL_MEMCPY_DEVICE_TO_HOST);
+ CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret); return FAILED);
+ void** output1=(void**)(&resultData);
+ //写出数据
+ WriteFile("../output/output_y.bin", *output1, outputYShapeSize_1*dataType1);
+ INFO_LOG("Write output success");
+
+ // 6. 释放aclTensor,需要根据具体API的接口定义修改
+ aclDestroyTensor(inputX1);
+ aclDestroyTensor(inputX2);
+ aclDestroyTensor(inputX3);
+ aclDestroyTensor(outputY);
+
+ // 7. 释放device资源,需要根据具体API的接口定义修改
+ aclrtFree(inputX1DeviceAddr);
+ aclrtFree(inputX2DeviceAddr);
+ aclrtFree(inputX3DeviceAddr);
+ aclrtFree(outputYDeviceAddr);
+ if (workspaceSize > 0) {
+ aclrtFree(workspaceAddr);
+ }
+ aclrtDestroyStream(stream);
+ aclrtResetDevice(deviceId);
+ aclFinalize();
+
+ return SUCCESS;
+}
\ No newline at end of file
diff --git a/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/run.sh b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f1eab8d02f0e998b1651f0b2f81cab58603896d4
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/run.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+# This file is a part of the CANN Open Software.
+# Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+# Please refer to the License for details. You may not use this file except in compliance with the License.
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+# See LICENSE in the root of the software repository for the full text of the License.
+# ======================================================================================================================
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+ _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+ _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+ if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+ _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+ else
+ _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+ fi
+fi
+source $_ASCEND_INSTALL_PATH/bin/setenv.bash
+export DDK_PATH=$_ASCEND_INSTALL_PATH
+export NPU_HOST_LIB=$_ASCEND_INSTALL_PATH/lib64
+
+rm -rf $HOME/ascend/log/*
+rm ./input/*.bin
+rm ./output/*.bin
+
+python3 gen_data.py
+
+if [ $? -ne 0 ]; then
+ echo "ERROR: generate input data failed!"
+ return 1
+fi
+echo "INFO: generate input data success!"
+set -e
+rm -rf build
+mkdir -p build
+cmake -B build
+cmake --build build -j
+(
+ cd build
+ ./execute_select_v2_op
+)
+ret=`python3 verify_result.py output/output_y.bin output/golden.bin`
+echo $ret
+if [ "x$ret" == "xtest pass" ]; then
+ echo ""
+ echo "#####################################"
+ echo "INFO: you have passed the Precision!"
+ echo "#####################################"
+ echo ""
+fi
\ No newline at end of file
diff --git a/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/verify_result.py b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/verify_result.py
new file mode 100644
index 0000000000000000000000000000000000000000..837374d494ca98a0ee6aa17cf90968dd0602da43
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/verify_result.py
@@ -0,0 +1,37 @@
+#!/usr/bin/python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+# This file is a part of the CANN Open Software.
+# Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+# Please refer to the License for details. You may not use this file except in compliance with the License.
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+# See LICENSE in the root of the software repository for the full text of the License.
+# ======================================================================================================================
+
+import os
+import sys
+import numpy as np
+
+LOSS = 1e-3 # 容忍偏差,一般fp16要求绝对误差和相对误差均不超过千分之一
+MINIMUM = 10e-10
+
+
+def verify_result(real_result, golden):
+ dtype = np.float16
+ real_result = np.fromfile(real_result, dtype=dtype) # 从bin文件读取实际运算结果
+ golden = np.fromfile(golden, dtype=dtype) # 从bin文件读取预期运算结果
+ result = np.abs(real_result - golden) # 计算运算结果和预期结果偏差
+ deno = np.maximum(np.abs(real_result), np.abs(golden)) # 获取最大值并组成新数组
+ result_atol = np.less_equal(result, LOSS) # 计算绝对误差
+ result_rtol = np.less_equal(result / np.add(deno, MINIMUM), LOSS) # 计算相对误差
+ if not result_rtol.all() and not result_atol.all():
+ if np.sum(result_rtol == False) > real_result.size * LOSS and \
+ np.sum(result_atol == False) > real_result.size * LOSS: # 误差超出预期时返回打印错误,返回对比失败
+ print("[ERROR] result error")
+ return False
+ print("test pass")
+ return True
+
+if __name__ == '__main__':
+ verify_result(sys.argv[1], sys.argv[2])
diff --git a/operator_contrib/SelectV2Sample/framework/CMakeLists.txt b/operator_contrib/SelectV2Sample/framework/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6be9b492610f4d45b25bb7725648df9aac39a12
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/framework/CMakeLists.txt
@@ -0,0 +1,11 @@
+if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mindspore")
+ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/caffe_plugin")
+ add_subdirectory(caffe_plugin)
+ endif()
+ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/tf_plugin")
+ add_subdirectory(tf_plugin)
+ endif()
+ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/onnx_plugin")
+ add_subdirectory(onnx_plugin)
+ endif()
+endif()
diff --git a/operator_contrib/SelectV2Sample/framework/tf_plugin/CMakeLists.txt b/operator_contrib/SelectV2Sample/framework/tf_plugin/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6aba5c207d3b85ad16fdea69dd813dd6cc371b1
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/framework/tf_plugin/CMakeLists.txt
@@ -0,0 +1,14 @@
+
+aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} plugin_srcs)
+add_library(cust_tf_parsers SHARED ${plugin_srcs})
+target_compile_definitions(cust_tf_parsers PRIVATE google=ascend_private)
+if(ENABLE_CROSS_COMPILE)
+ target_link_directories(cust_tf_parsers PRIVATE
+ ${CMAKE_COMPILE_COMPILER_LIBRARY}
+ ${CMAKE_COMPILE_RUNTIME_LIBRARY}
+ )
+endif()
+target_link_libraries(cust_tf_parsers PRIVATE intf_pub graph)
+install(TARGETS cust_tf_parsers
+ LIBRARY DESTINATION packages/vendors/${vendor_name}/framework/tensorflow
+)
diff --git a/operator_contrib/SelectV2Sample/framework/tf_plugin/tensorflow_select_v2_plugin.cc b/operator_contrib/SelectV2Sample/framework/tf_plugin/tensorflow_select_v2_plugin.cc
new file mode 100644
index 0000000000000000000000000000000000000000..61a92a953b1d7242b57b33da5a64ed3d7c66a1fb
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/framework/tf_plugin/tensorflow_select_v2_plugin.cc
@@ -0,0 +1,23 @@
+/* Copyright (C) 2020-2021. Huawei Technologies Co., Ltd. All
+rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the Apache License Version 2.0.
+ * You may not use this file except in compliance with the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Apache License for more details at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+
+#include "register/register.h"
+
+namespace domi {
+// register op info to GE
+REGISTER_CUSTOM_OP("SelectV2")
+ .FrameworkType(TENSORFLOW) // type: CAFFE, TENSORFLOW
+ .OriginOpType("SelectV2") // name in tf module
+ .ParseParamsByOperatorFn(AutoMappingByOpFn);
+} // namespace domi
diff --git a/operator_contrib/SelectV2Sample/op_host/select_v2.cpp b/operator_contrib/SelectV2Sample/op_host/select_v2.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2d4eeb7e460517c75c906aca4425f5aaabb5707e
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/op_host/select_v2.cpp
@@ -0,0 +1,207 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+#include
+#include "select_v2_tiling.h"
+#include "register/op_def_registry.h"
+#include "tiling/platform/platform_ascendc.h"
+
+namespace optiling {
+const uint32_t BLOCK_SIZE = 32;
+static ge::graphStatus TilingFunc(gert::TilingContext* context)
+{
+ SelectV2TilingData tiling;
+ auto shape_x = context->GetInputTensor(0)->GetOriginShape();
+ int32_t M = 1, N = 1, Z = 1;
+ if(shape_x.GetDimNum() > 2)
+ {
+ for (int i = 0; i < shape_x.GetDimNum() - 2; i++)
+ Z *= shape_x.GetDim(i);
+ M = shape_x.GetDim(shape_x.GetDimNum() - 2);
+ N = shape_x.GetDim(shape_x.GetDimNum() - 1);
+ std::cout<<"M:"<GetPlatformInfo());
+ auto socVersion = ascendcPlatform.GetSocVersion();
+ uint64_t ub_size;
+ ascendcPlatform.GetCoreMemSize(platform_ascendc::CoreMemType::UB, ub_size);
+ auto aivNum = ascendcPlatform.GetCoreNum();
+ uint32_t totalLength = context->GetInputTensor(0)->GetShapeSize();
+ //获取输入shape信息
+ uint32_t inputNum = context->GetInputShape(1)->GetStorageShape().GetShapeSize(); //输入数量
+ uint32_t inputBytes = GetSizeByDataType(context->GetInputDesc(1)->GetDataType()); //输入类型
+ uint32_t inputLength = inputBytes * inputNum; //输入长度
+ auto dt = context->GetInputTensor(0)->GetDataType();
+ if(dt == ge::DT_INT8){
+ sizeofdatatype = 1;
+ NUM = 19;
+ }else if(dt == ge::DT_FLOAT16){
+ sizeofdatatype = 2;
+ NUM = 10;
+ }
+ else if (dt == ge::DT_INT32) {
+ sizeofdatatype = 4;
+ NUM = 11;
+ }
+ else{ //DT_FLOAT
+ sizeofdatatype = 4;
+ NUM = 8;
+ }
+ uint32_t ALIGN_NUM = BLOCK_SIZE / sizeofdatatype;
+ uint32_t tiling_size = ((ub_size) / BLOCK_SIZE / 2) / NUM;
+ tiling_size = tiling_size <= 8 ? tiling_size : tiling_size / 8 * 8;
+ uint32_t block_size = tiling_size * ALIGN_NUM;
+ aivNum = (aivNum < totalLength / block_size) ? aivNum : (totalLength / block_size);
+ aivNum = aivNum >= 1 ? aivNum : 1;
+ uint32_t core_size;
+ if(aivNum != 0 && ALIGN_NUM != 0){
+ core_size = (totalLength / aivNum) / (ALIGN_NUM * 8) * (ALIGN_NUM * 8);
+ }
+ uint32_t core_remain = totalLength - aivNum * core_size;
+ tiling.set_totalLength(totalLength);
+ tiling.set_ALIGN_NUM(ALIGN_NUM);
+ tiling.set_tiling_size(tiling_size);
+ tiling.set_block_size(block_size);
+ tiling.set_aivNum(aivNum);
+ tiling.set_core_size(core_size);
+ tiling.set_core_remain(core_remain);
+ uint32_t x2Size = context->GetInputShape(2)->GetStorageShape().GetShapeSize();
+ uint32_t cdtSize = context->GetInputShape(0)->GetStorageShape().GetShapeSize();
+ uint32_t x1Size = context->GetInputShape(1)->GetStorageShape().GetShapeSize();
+ uint32_t ySize = context->GetOutputShape(0)->GetStorageShape().GetShapeSize();
+ if(ySize != cdtSize || ySize != x1Size || ySize != x2Size)
+ {
+ context->SetTilingKey(2);
+ int32_t y_ndarray[20], cdt_ndarray[20], x1_ndarray[20],x2_ndarray[20];
+ int32_t y_dimensional, cdt_dimensional, x1_dimensional, x2_dimensional;
+ auto shape_y = context->GetOutputShape(0)->GetOriginShape();
+ auto shape_cdt = context->GetInputTensor(0)->GetOriginShape();
+ auto shape_x1 = context->GetInputTensor(1)->GetOriginShape();
+ auto shape_x2 = context->GetInputTensor(2)->GetOriginShape();
+ y_dimensional = shape_y.GetDimNum();
+ cdt_dimensional = shape_cdt.GetDimNum();
+ x1_dimensional = shape_x1.GetDimNum();
+ x2_dimensional = shape_x2.GetDimNum();
+ for(int i = 0; i < y_dimensional; i++)
+ {
+ y_ndarray[y_dimensional-i-1] = shape_y.GetDim(i);
+ if(iSetTilingKey(1);
+ }
+ context->SetBlockDim(1);
+ tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
+ context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
+ size_t *currentWorkspace = context->GetWorkspaceSizes(1);
+ currentWorkspace[0] = 0;
+ return ge::GRAPH_SUCCESS;
+}
+}
+
+namespace ge {
+static ge::graphStatus InferShape(gert::InferShapeContext* context)
+{
+ const gert::Shape* x1_shape = context->GetInputShape(0);
+ gert::Shape* y_shape = context->GetOutputShape(0);
+ *y_shape = *x1_shape;
+ return GRAPH_SUCCESS;
+}
+}
+
+namespace ops {
+class SelectV2 : public OpDef {
+public:
+ explicit SelectV2(const char* name) : OpDef(name)
+ {
+ this->Input("condition")
+ .ParamType(REQUIRED)
+ .DataType({ge::DT_BOOL, ge::DT_BOOL, ge::DT_BOOL, ge::DT_BOOL})
+ .Format({ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND})
+ .UnknownShapeFormat({ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND});
+ this->Input("x1")
+ .ParamType(REQUIRED)
+ .DataType({ge::DT_FLOAT, ge::DT_FLOAT16, ge::DT_INT32, ge::DT_INT8})
+ .Format({ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND})
+ .UnknownShapeFormat({ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND});
+ this->Input("x2")
+ .ParamType(REQUIRED)
+ .DataType({ge::DT_FLOAT, ge::DT_FLOAT16, ge::DT_INT32, ge::DT_INT8})
+ .Format({ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND})
+ .UnknownShapeFormat({ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND});
+ this->Output("y")
+ .ParamType(REQUIRED)
+ .DataType({ge::DT_FLOAT, ge::DT_FLOAT16, ge::DT_INT32, ge::DT_INT8})
+ .Format({ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND})
+ .UnknownShapeFormat({ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND});
+
+ this->SetInferShape(ge::InferShape);
+ this->AICore()
+ .SetTiling(optiling::TilingFunc);
+ this->AICore().AddConfig("ascend310b");
+ }
+};
+OP_ADD(SelectV2);
+}
diff --git a/operator_contrib/SelectV2Sample/op_host/select_v2_tiling.h b/operator_contrib/SelectV2Sample/op_host/select_v2_tiling.h
new file mode 100644
index 0000000000000000000000000000000000000000..bfe19018892e5bab39fb9710657897c8cac44896
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/op_host/select_v2_tiling.h
@@ -0,0 +1,41 @@
+#ifndef SELECTV2_TILING_H
+#define SELECTV2_TILING_H
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+#include "register/tilingdata_base.h"
+namespace optiling {
+BEGIN_TILING_DATA_DEF(SelectV2TilingData)
+ TILING_DATA_FIELD_DEF(int32_t, M);
+ TILING_DATA_FIELD_DEF(int32_t, N);
+ TILING_DATA_FIELD_DEF(int32_t, Z);
+ TILING_DATA_FIELD_DEF(uint32_t, totalLength);
+ TILING_DATA_FIELD_DEF(uint32_t, tileNum);
+ TILING_DATA_FIELD_DEF(uint32_t, ALIGN_NUM);
+ TILING_DATA_FIELD_DEF(uint32_t, tiling_size);
+ TILING_DATA_FIELD_DEF(uint32_t, block_size);
+ TILING_DATA_FIELD_DEF(uint32_t, aivNum);
+ TILING_DATA_FIELD_DEF(uint32_t, core_size);
+ TILING_DATA_FIELD_DEF(uint32_t, core_remain);
+ TILING_DATA_FIELD_DEF(int32_t, y_dimensional);
+ TILING_DATA_FIELD_DEF_ARR(int32_t, 20, y_ndarray);
+ TILING_DATA_FIELD_DEF_ARR(int32_t, 20, cdt_ndarray);
+ TILING_DATA_FIELD_DEF_ARR(int32_t, 20, x1_ndarray);
+ TILING_DATA_FIELD_DEF_ARR(int32_t, 20, x2_ndarray);
+ TILING_DATA_FIELD_DEF_ARR(int32_t, 20, y_sumndarray);
+ TILING_DATA_FIELD_DEF_ARR(int32_t, 20, cdt_sumndarray);
+ TILING_DATA_FIELD_DEF_ARR(int32_t, 20, x1_sumndarray);
+ TILING_DATA_FIELD_DEF_ARR(int32_t, 20, x2_sumndarray);
+
+END_TILING_DATA_DEF;
+
+REGISTER_TILING_DATA_CLASS(SelectV2, SelectV2TilingData)
+}
+
+#endif
\ No newline at end of file
diff --git a/operator_contrib/SelectV2Sample/op_kernel/select_v2.cpp b/operator_contrib/SelectV2Sample/op_kernel/select_v2.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..444673f1ba898ad26c13afdccada6aecc4782a51
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/op_kernel/select_v2.cpp
@@ -0,0 +1,239 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+#define K_MAX_SHAPE_DIM 0
+#include
+#include "kernel_operator.h"
+
+using namespace AscendC;
+constexpr int32_t BUFFER_NUM = 2;
+template class KernelSelectV2 {
+ public:
+ __aicore__ inline KernelSelectV2() {}
+ __aicore__ inline void Init(GM_ADDR condition, GM_ADDR x1, GM_ADDR x2, GM_ADDR y,int32_t M, int32_t N, int32_t Z,
+ uint32_t totalLength, uint32_t ALIGN_NUM, uint32_t block_size, uint32_t core_size, uint32_t core_remain) {
+ ASSERT(GetBlockNum() != 0 && "block dim can not be zero!");
+ this->blockLength = core_size + (GetBlockNum() == GetBlockIdx() + 1 ? core_remain : 0);
+ this->tileLength = block_size;
+ this->ALIGN_NUM = ALIGN_NUM;
+ if(ALIGN_NUM != 0){
+ this->blockLength = this->blockLength + (this->blockLength % ALIGN_NUM ? ALIGN_NUM - this->blockLength % ALIGN_NUM : 0);
+ }
+ this->totalLength = totalLength;
+ auto startPointer = core_size * GetBlockIdx();
+ auto bufferlength = this->blockLength;
+ this->M = M; //行
+ this->N = N; //列
+ this->Z = Z; //循环次数
+ Gm_x1.SetGlobalBuffer((__gm__ TYPE_X1*)x1 ,bufferlength);
+ Gm_x2.SetGlobalBuffer((__gm__ TYPE_X1*)x2, bufferlength);
+ Gm_cdt.SetGlobalBuffer((__gm__ uint8_t *)condition, bufferlength);
+ Gm_y.SetGlobalBuffer((__gm__ TYPE_X1*)y, bufferlength);
+ if(this->tileLength != 0){
+ this->tileNum = this->blockLength / this->tileLength + (this->blockLength % this->tileLength > 0);
+ }
+ pipe.InitBuffer(Q_x1, BUFFER_NUM, this->tileLength * sizeof(TYPE_X1));
+ pipe.InitBuffer(Q_x2, BUFFER_NUM, this->tileLength * sizeof(TYPE_X1));
+ pipe.InitBuffer(Q_cdt, BUFFER_NUM, this->tileLength * sizeof(uint8_t));
+ pipe.InitBuffer(Q_y, BUFFER_NUM, this->tileLength * sizeof(TYPE_X1));
+ pipe.InitBuffer(B_bits, this->tileLength * sizeof(uint8_t));
+ pipe.InitBuffer(B_half, this->tileLength * sizeof(half));
+ pipe.InitBuffer(B_zero, this->tileLength * sizeof(half));//取一个全为0的tensor作为处理
+ this->zero = B_zero.Get();
+ Duplicate(this->zero, half(0), this->tileLength);
+ if constexpr (std::is_same_v) {
+ pipe.InitBuffer(B_x1, this->tileLength * sizeof(float));
+ pipe.InitBuffer(B_x2, this->tileLength * sizeof(float));
+ pipe.InitBuffer(B_y, this->tileLength * sizeof(float));
+ }
+ else if constexpr (std::is_same_v) {
+ pipe.InitBuffer(B_x1, this->tileLength * sizeof(half));
+ pipe.InitBuffer(B_x2, this->tileLength * sizeof(half));
+ pipe.InitBuffer(B_y, this->tileLength * sizeof(half));
+ }
+ }
+ __aicore__ inline void Process() {
+ int32_t loopCount = this->tileNum;
+ for (int32_t i = 0; i < loopCount-1; i++) {
+ CopyIn(i, this->tileLength);
+ Compute(i, this->tileLength);
+ CopyOut(i, this->tileLength);
+ }
+ uint32_t length = this->blockLength - this->tileLength * (loopCount - 1);
+ CopyIn(loopCount - 1, (length +31 ) / 32 * 32);
+ Compute(loopCount - 1, (length +31 ) / 32 * 32);
+ CopyOut(loopCount - 1, (length +31 ) / 32 * 32);
+ }
+ private:
+ __aicore__ inline void CopyIn(int32_t progress, uint32_t length) {
+ LocalTensor x1 = Q_x1.AllocTensor();
+ LocalTensor x2 = Q_x2.AllocTensor();
+ LocalTensor cdt = Q_cdt.AllocTensor();
+ DataCopy(x1, Gm_x1[progress * this->tileLength], length);
+ DataCopy(x2, Gm_x2[progress * this->tileLength], length);
+ DataCopy(cdt, (Gm_cdt[progress * this->tileLength]), length);
+ Q_x1.EnQue(x1);
+ Q_x2.EnQue(x2);
+ Q_cdt.EnQue(cdt);
+ }
+ __aicore__ inline void Compute(int32_t progress, uint32_t length) {
+ LocalTensor x1 = Q_x1.DeQue();
+ LocalTensor x2 = Q_x2.DeQue();
+ LocalTensor cdt = Q_cdt.DeQue();
+ LocalTensor y = Q_y.AllocTensor();
+ auto bits = B_bits.Get();
+ if constexpr (std::is_same_v) {
+ auto p1 = B_half.Get();
+ Cast(p1, cdt, RoundMode::CAST_NONE, length);
+ Compare(bits, p1,zero,CMPMODE::NE,length);
+ Select(y, bits, x1, x2, SELMODE::VSEL_TENSOR_TENSOR_MODE, length);
+ }
+ else if constexpr (std::is_same_v) {
+ auto p1 = B_half.Get();
+ Cast(p1, cdt, RoundMode::CAST_NONE, length);
+ Compare(bits, p1,zero,CMPMODE::NE,length);
+ Select(y, bits, x1, x2, SELMODE::VSEL_TENSOR_TENSOR_MODE, length);
+ }
+ else if constexpr (std::is_same_v) {
+ auto p1 = B_half.Get();
+ Cast(p1, cdt, RoundMode::CAST_NONE, length);
+ Compare(bits, p1,zero,CMPMODE::NE,length);
+ auto half_x1 = B_x1.Get();
+ auto half_x2 = B_x2.Get();
+ auto half_y = B_y.Get();
+ Cast(half_x1, x1, RoundMode::CAST_NONE, length);
+ Cast(half_x2, x2, RoundMode::CAST_NONE, length);
+ Select(half_y, bits, half_x1, half_x2, SELMODE::VSEL_TENSOR_TENSOR_MODE, length);
+ Cast(y, half_y, RoundMode::CAST_NONE, length);
+ }
+ else{
+ auto p1 = B_half.Get();
+ Cast(p1, cdt, RoundMode::CAST_NONE, length);
+ Compare(bits, p1,zero,CMPMODE::NE,length);
+ auto float_x1 = B_x1.Get();
+ auto float_x2 = B_x2.Get();
+ auto float_y = B_y.Get();
+ Cast(float_x1, x1, RoundMode::CAST_NONE, length);
+ Cast(float_x2, x2, RoundMode::CAST_NONE, length);
+ Select(float_y, bits, float_x1, float_x2, SELMODE::VSEL_TENSOR_TENSOR_MODE, length);
+ Cast(y, float_y, RoundMode::CAST_RINT, length);
+ }
+ Q_x1.FreeTensor(x1);
+ Q_x2.FreeTensor(x2);
+ Q_cdt.FreeTensor(cdt);
+ Q_y.EnQue(y);
+ }
+ __aicore__ inline void CopyOut(int32_t progress, uint32_t length) {
+ LocalTensor y = Q_y.DeQue();
+ DataCopy(Gm_y[progress * this->tileLength], y, length);
+ Q_y.FreeTensor(y);
+ }
+ private:
+ TPipe pipe;
+ TQue Q_x1, Q_x2, Q_cdt;
+ TQue Q_y;
+ TBuf B_result, B_zero, B_bits,B_half;
+ TBuf B_x1, B_x2, B_y;
+ int32_t M;
+ int32_t N;
+ int32_t Z;
+ LocalTensor zero;
+ GlobalTensor Gm_x1, Gm_x2, Gm_y;
+ GlobalTensor Gm_cdt;
+ uint32_t blockLength;
+ uint32_t tileNum;
+ uint32_t totalLength;
+ uint32_t tileLength;
+ uint32_t ALIGN_NUM;
+ };
+template class KernelSelectV2_Broadcast {
+ using T = TYPE_X1;
+public:
+ __aicore__ inline KernelSelectV2_Broadcast() {}
+ __aicore__ inline void Init(GM_ADDR condition, GM_ADDR x1, GM_ADDR x2, GM_ADDR y,int32_t M, int32_t N, int32_t Z,
+ int32_t y_dimensional,
+ int32_t* y_ndarray, int32_t* cdt_ndarray, int32_t* x1_ndarray, int32_t* x2_ndarray,
+ int32_t* y_sumndarray, int32_t* cdt_sumndarray, int32_t* x1_sumndarray, int32_t* x2_sumndarray) {
+ ASSERT(GetBlockNum() != 0 && "block dim can not be zero!");
+ this->M = M; //行
+ this->N = N; //列
+ this->Z = Z; //循环次数
+ this->y_dimensional = y_dimensional;
+ this->y_ndarray = y_ndarray;
+ this->cdt_ndarray = cdt_ndarray;
+ this->x1_ndarray = x1_ndarray;
+ this->x2_ndarray = x2_ndarray;
+ this->y_sumndarray = y_sumndarray;
+ this->cdt_sumndarray = cdt_sumndarray;
+ this->x1_sumndarray = x1_sumndarray;
+ this->x2_sumndarray = x2_sumndarray;
+ Gm_x1.SetGlobalBuffer((__gm__ TYPE_X1*)x1 , 1);
+ Gm_x2.SetGlobalBuffer((__gm__ TYPE_X1*)x2 , 1);
+ Gm_cdt.SetGlobalBuffer((__gm__ TYPE_CONDITION*)condition , 1);
+ Gm_y.SetGlobalBuffer((__gm__ TYPE_X1*)y , 1);
+ }
+ __aicore__ inline void Process() {
+ int dim = this->y_dimensional;
+ for(int j=0; jy_sumndarray[dim]; j++)
+ {
+ int cdt_start = 0, x1_start = 0, x2_start = 0;
+ for(int k=0; kcdt_ndarray[k] != 1){
+ cdt_start += this->cdt_sumndarray[k] * (j / this->y_sumndarray[k] % this->y_ndarray[k]);
+ }
+ if(this->x1_ndarray[k] != 1){
+ x1_start += this->x1_sumndarray[k] * (j / this->y_sumndarray[k] % this->y_ndarray[k]);
+ }
+ if(this->x2_ndarray[k] != 1){
+ x2_start += this->x2_sumndarray[k] * (j / this->y_sumndarray[k] % this->y_ndarray[k]);
+ }
+ }
+ TYPE_X1 pos = Gm_cdt.GetValue(cdt_start) ? Gm_x1.GetValue(x1_start) : Gm_x2.GetValue(x2_start);
+ Gm_y.SetValue(j,pos);
+ }
+ }
+private:
+ TPipe pipe;
+ TQue inQueueSTART, inQueueEND, inQueueWEIGHT;
+ TQue outQueueY;
+ TBuf tmp1, tmp2, tmp3,boox;
+ GlobalTensor Gm_x1, Gm_x2, Gm_y;
+ GlobalTensor Gm_cdt;
+ int32_t M;
+ int32_t N;
+ int32_t Z;
+ int32_t y_dimensional;
+ int32_t *y_ndarray;
+ int32_t *cdt_ndarray;
+ int32_t *x1_ndarray;
+ int32_t *x2_ndarray;
+ int32_t *y_sumndarray;
+ int32_t *cdt_sumndarray;
+ int32_t *x1_sumndarray;
+ int32_t *x2_sumndarray;
+};
+
+ extern "C" __global__ __aicore__ void select_v2(GM_ADDR condition, GM_ADDR x1, GM_ADDR x2, GM_ADDR y, GM_ADDR workspace, GM_ADDR tiling) {
+ GET_TILING_DATA(tiling_data, tiling);
+ if(TILING_KEY_IS(1)){
+ KernelSelectV2 op;
+ op.Init(condition, x1, x2, y, tiling_data.M, tiling_data.N, tiling_data.Z,
+ tiling_data.totalLength, tiling_data.ALIGN_NUM, tiling_data.block_size, tiling_data.core_size, tiling_data.core_remain);
+ op.Process();
+ }
+ else if(TILING_KEY_IS(2)){
+ KernelSelectV2_Broadcast op;
+ op.Init(condition, x1, x2, y, tiling_data.M, tiling_data.N, tiling_data.Z,
+ tiling_data.y_dimensional,
+ tiling_data.y_ndarray, tiling_data.cdt_ndarray, tiling_data.x1_ndarray, tiling_data.x2_ndarray,
+ tiling_data.y_sumndarray, tiling_data.cdt_sumndarray, tiling_data.x1_sumndarray, tiling_data.x2_sumndarray);
+ op.Process();
+ }
+ }
\ No newline at end of file
diff --git a/operator_contrib/SelectV2Sample/opp_kernel_aicpu/.keep b/operator_contrib/SelectV2Sample/opp_kernel_aicpu/.keep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/operator_contrib/SelectV2Sample/tests/.keep b/operator_contrib/SelectV2Sample/tests/.keep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/operator_contrib/SelectV2Sample/tests/st/.keep b/operator_contrib/SelectV2Sample/tests/st/.keep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/operator_contrib/SelectV2Sample/tests/ut/.keep b/operator_contrib/SelectV2Sample/tests/ut/.keep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391