From e23e25aa5f2a3c10a21776c9c16ffae3d0815532 Mon Sep 17 00:00:00 2001
From: hid20190403 <zgh_mls2019@163.com>
Date: Fri, 4 Jul 2025 14:53:42 +0000
Subject: [PATCH 1/6] update /samples/operator_contrib

Signed-off-by: hid20190403 <zgh_mls2019@163.com>
---
 .../SelectV2Sample/CMakeLists.txt             |  25 ++
 operator_contrib/SelectV2Sample/README.md     |  72 +++++
 .../SelectV2Sample/docs/SelectV2.md           |  88 ++++++
 .../AclNNInvocationNaive/CMakeLists.txt       |  60 +++++
 .../examples/AclNNInvocationNaive/README.md   |  62 +++++
 .../examples/AclNNInvocationNaive/gen_data.py |  35 +++
 .../examples/AclNNInvocationNaive/main.cpp    | 252 ++++++++++++++++++
 .../examples/AclNNInvocationNaive/run.sh      |  54 ++++
 .../AclNNInvocationNaive/verify_result.py     |  37 +++
 .../SelectV2Sample/framework/CMakeLists.txt   |  11 +
 .../framework/tf_plugin/CMakeLists.txt        |  14 +
 .../tf_plugin/tensorflow_select_v2_plugin.cc  |  23 ++
 .../SelectV2Sample/op_host/select_v2.cpp      | 207 ++++++++++++++
 .../SelectV2Sample/op_host/select_v2_tiling.h |  41 +++
 .../SelectV2Sample/op_kernel/select_v2.cpp    | 239 +++++++++++++++++
 15 files changed, 1220 insertions(+)
 create mode 100644 operator_contrib/SelectV2Sample/CMakeLists.txt
 create mode 100644 operator_contrib/SelectV2Sample/README.md
 create mode 100644 operator_contrib/SelectV2Sample/docs/SelectV2.md
 create mode 100644 operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/CMakeLists.txt
 create mode 100644 operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/README.md
 create mode 100644 operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/gen_data.py
 create mode 100644 operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/main.cpp
 create mode 100644 operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/run.sh
 create mode 100644 operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/verify_result.py
 create mode 100644 operator_contrib/SelectV2Sample/framework/CMakeLists.txt
 create mode 100644 operator_contrib/SelectV2Sample/framework/tf_plugin/CMakeLists.txt
 create mode 100644 operator_contrib/SelectV2Sample/framework/tf_plugin/tensorflow_select_v2_plugin.cc
 create mode 100644 operator_contrib/SelectV2Sample/op_host/select_v2.cpp
 create mode 100644 operator_contrib/SelectV2Sample/op_host/select_v2_tiling.h
 create mode 100644 operator_contrib/SelectV2Sample/op_kernel/select_v2.cpp
diff --git a/operator_contrib/SelectV2Sample/CMakeLists.txt b/operator_contrib/SelectV2Sample/CMakeLists.txt
new file mode 100644
index 000000000..e4b38c4be
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/CMakeLists.txt
@@ -0,0 +1,25 @@
+add_ops_compile_options(
+        OP_NAME SelectV2
+        OPTIONS --cce-auto-sync=on
+                -Wno-deprecated-declarations
+                -Werror
+)
+
+target_sources(op_host_aclnn PRIVATE
+op_host/select_v2.cpp
+)
+
+target_sources(optiling PRIVATE
+        op_host/select_v2.cpp
+)
+
+target_include_directories(optiling PRIVATE
+        ${CMAKE_CURRENT_SOURCE_DIR}/op_host
+)
+
+target_sources(opsproto PRIVATE
+         op_host/select_v2.cpp
+)
+
+install(FILES op_kernel/select_v2.cpp
+        DESTINATION ${ASCEND_IMPL_OUT_DIR}/dynamic)
\ No newline at end of file
diff --git a/operator_contrib/SelectV2Sample/README.md b/operator_contrib/SelectV2Sample/README.md
new file mode 100644
index 000000000..4a793c2be
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/README.md
@@ -0,0 +1,72 @@
+## `SelectV2`自定义算子样例说明 
+本样例通过`Ascend C`编程语言实现了`SelectV2`算子。
+
+### 算子描述
+`SelectV2`算子根据输入参数condition从输入参数x1，x2中选取元素，condition值为TRUE时，选取x1，condition为FALSE时，选取x2。
+
+### 算子规格描述
+
+<table>
+<tr><th align="center">算子类型(OpType)</th><th colspan="4" align="center">SelectV2</th></tr> 
+<tr><td align="center"> </td><td align="center">name</td><td align="center">Type</td><td align="center">data type</td><td align="center">format</td></tr>  
+<tr><td rowspan="4" align="center">算子输入</td>
+ 
+<tr><td align="center">condition</td><td align="center">tensor</td><td align="center">bool</td><td align="center">ND</td></tr> 
+<tr><td align="center">x1</td><td align="center">tensor</td><td align="center">float32,float16,int32,int8</td><td align="center">ND</td></tr> 
+<tr><td align="center">x2</td><td align="center">tensor</td><td align="center">float32,float16,int32,int8</td><td align="center">ND</td></tr> 
+
+<tr><td rowspan="1" align="center">算子输出</td>
+<td align="center">y</td><td align="center">tensor</td><td align="center">float32,float16,int32,int8</td><td align="center">ND</td></tr>  
+<tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">select_v2</td></tr>  
+</table>
+
+### 支持的产品型号
+本样例支持如下产品型号：
+- Atlas A2训练系列产品
+- Atlas 200I/500 A2推理产品
+
+### 目录结构介绍
+```
+├── docs                        // 算子文档目录
+├── example                     // 调用示例目录
+├── framework                   // 第三方框架适配目录
+├── op_host                     // host目录
+├── op_kernel                   // kernel目录
+├── opp_kernel_aicpu            // aicpu目录
+└── tests                       // 测试用例目录
+```
+
+### 环境要求
+编译运行此样例前，请参考[《CANN软件安装指南》](https://hiascend.com/document/redirect/CannCommunityInstSoftware)完成开发运行环境的部署。
+
+### 算子包编译部署
+  - 进入到仓库目录
+
+    ```bash
+    cd ${git_clone_path}/cann-ops
+    ```
+
+  - 执行编译
+
+    ```bash
+    bash build.sh
+    ```
+
+  - 部署算子包
+
+    ```bash
+    bash build_out/CANN-custom_ops-<cann_version>-linux.<arch>.run
+    ```
+
+### 算子调用
+<table>
+    <th>目录</th><th>描述</th>
+    <tr>
+        <td><a href="./examples/AclNNInvocationNaive"> AclNNInvocationNaive</td><td>通过aclnn调用的方式调用SelectV2算子。</td>
+    </tr>
+</table>
+
+## 更新说明
+| 时间 | 更新事项 |
+|----|------|
+| 2025/06/10 | 新增本readme |
\ No newline at end of file
diff --git a/operator_contrib/SelectV2Sample/docs/SelectV2.md b/operator_contrib/SelectV2Sample/docs/SelectV2.md
new file mode 100644
index 000000000..c681c4370
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/docs/SelectV2.md
@@ -0,0 +1,88 @@
+声明：本文使用[Creative Commons License version 4.0](https://creativecommons.org/licenses/by/4.0/legalcode)许可协议，转载、引用或修改等操作请遵循此许可协议。
+
+# SelectV2
+
+## 支持的产品型号
+
+Atlas A2 训练系列产品/Atlas 200I/500 A2推理产品
+
+产品形态详细说明请参见[昇腾产品形态说明](https://www.hiascend.com/document/redirect/CannCommunityProductForm)。
+
+## 功能描述
+
+- 算子功能：该SelectV2算子提供条件选择功能。SelectV2算子的主要功能是根据condition张量的布尔值（True/False），从x1或x2中选择对应位置的元素，并输出一个新的张量。并支持广播（Broadcasting）机制，允许condition、x、y的形状在满足广播规则的情况下自动扩展，以适配计算需求。它适用于动态条件选择场景，如掩码过滤、条件赋值等操作。
+
+
+## 实现原理
+
+调用`Ascend C`的`API`接口`Compare`、`Select`以及`Cast`进行实现。
+
+## 算子执行接口
+
+每个算子分为两段式接口，必须先调用“aclnnSelectV2GetWorkspaceSize”接口获取计算所需workspace大小以及包含了算子计算流程的执行器，再调用“aclnnSelectV2”接口执行计算。
+
+* `aclnnStatus aclnnSelectV2GetWorkspaceSize(const aclTensor* condition, const aclTensor* x1, const aclTensor* x2, const aclTensor* y, uint64_t* workspaceSize, aclOpExecutor** executor)`
+* `aclnnStatus aclnnSelectV2(void* workspace, uint64_t workspaceSize, aclOpExecutor* executor, aclrtStream stream)`
+
+**说明**：
+
+- 算子执行接口对外屏蔽了算子内部实现逻辑以及不同代际NPU的差异，且开发者无需编译算子，实现了算子的精简调用。
+- 若开发者不使用算子执行接口的调用算子，也可以定义基于Ascend IR的算子描述文件，通过ATC工具编译获得算子om文件，然后加载模型文件执行算子，详细调用方法可参见《应用开发指南》的[单算子调用 > 单算子模型执行](https://hiascend.com/document/redirect/CannCommunityCppOpcall)章节。
+
+### aclnnSelectV2GetWorkspaceSize
+
+- **参数说明：**
+
+  
+  - condition（aclTensor\*，计算输入）：必选参数，Device侧的aclTensor，公式中的输入condition，数据类型支持BOOL，数据格式支持ND。
+  - x1（aclTensor\*，计算输入）：必选参数，Device侧的aclTensor，公式中的输入x1，数据类型支持FLOAT32、FLOAT16、INT32、INT8，数据格式支持ND。
+  - x2（aclTensor\*，计算输入）：必选参数，Device侧的aclTensor，公式中的输入x2，数据类型支持FLOAT32、FLOAT16、INT32、INT8，数据格式支持ND。
+  - y（aclTensor\*，计算输出）：Device侧的aclTensor，公式中的输出y，数据类型支持FLOAT32、FLOAT16、INT32、INT8，数据格式支持ND，输出维度与x1,x2一致。
+  - workspaceSize（uint64\_t\*，出参）：返回用户需要在Device侧申请的workspace大小。
+  - executor（aclOpExecutor\*\*，出参）：返回op执行器，包含了算子计算流程。
+- **返回值：**
+
+  返回aclnnStatus状态码，具体参见[aclnn返回码](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/aolapi/context/common/aclnn%E8%BF%94%E5%9B%9E%E7%A0%81_fuse.md)。
+
+  ```
+  第一段接口完成入参校验，若出现以下错误码，则对应原因为：
+  - 返回161001（ACLNN_ERR_PARAM_NULLPTR）：如果传入参数是必选输入，输出或者必选属性，且是空指针，则返回161001。
+  - 返回161002（ACLNN_ERR_PARAM_INVALID）：x、out的数据类型和数据格式不在支持的范围内。
+  ```
+
+### aclnnSelectV2
+
+- **参数说明：**
+
+  - workspace（void\*，入参）：在Device侧申请的workspace内存起址。
+  - workspaceSize（uint64\_t，入参）：在Device侧申请的workspace大小，由第一段接口aclnnSqrtGetWorkspaceSize获取。
+  - executor（aclOpExecutor\*，入参）：op执行器，包含了算子计算流程。
+  - stream（aclrtStream，入参）：指定执行任务的AscendCL stream流。
+- **返回值：**
+
+  返回aclnnStatus状态码，具体参见[aclnn返回码](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/aolapi/context/common/aclnn%E8%BF%94%E5%9B%9E%E7%A0%81_fuse.md)。
+
+
+## 约束与限制
+
+- condition的数据类型支持BOOL，x1，x2，y的数据类型支持FLOAT32、FLOAT16、INT32，INT8，数据格式只支持ND
+
+## 算子原型
+
+<table>
+<tr><th align="center">算子类型(OpType)</th><th colspan="4" align="center">Sqrt</th></tr> 
+<tr><td align="center"> </td><td align="center">name</td><td align="center">type</td><td align="center">data type</td><td align="center">format</td></tr>  
+<tr><td rowspan="4" align="center">算子输入</td>
+ 
+<tr><td align="center">condition</td><td align="center">tensor</td><td align="center">bool</td><td align="center">ND</td></tr> 
+<tr><td align="center">x1</td><td align="center">tensor</td><td align="center">float32,float16,int32,int8</td><td align="center">ND</td></tr> 
+<tr><td align="center">x2</td><td align="center">tensor</td><td align="center">float32,float16,int32,int8</td><td align="center">ND</td></tr>  
+
+<tr><td rowspan="1" align="center">算子输出</td>
+<td align="center">y</td><td align="center">tensor</td><td align="center">float32,float16,int32,int8</td><td align="center">ND</td></tr>  
+<tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">select_v2</td></tr>  
+</table>
+
+## 调用示例
+
+详见[SelectV2自定义算子样例说明算子调用章节](../README.md#算子调用)
\ No newline at end of file
diff --git a/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/CMakeLists.txt b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/CMakeLists.txt
new file mode 100644
index 000000000..a5681d624
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/CMakeLists.txt
@@ -0,0 +1,60 @@
+# CMake lowest version requirement
+cmake_minimum_required(VERSION 3.5.1)
+
+# project information
+project(acl_execute_select_v2)
+
+# Compile options
+add_compile_options(-std=c++11)
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "./")
+
+set(INC_PATH $ENV{DDK_PATH})
+
+if (NOT DEFINED ENV{DDK_PATH})
+    set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest")
+    message(STATUS "set default INC_PATH: ${INC_PATH}")
+else ()
+    message(STATUS "env INC_PATH: ${INC_PATH}")
+endif()
+
+set(CUST_PKG_PATH "${INC_PATH}/opp/vendors/customize/op_api")
+
+set(LIB_PATH $ENV{NPU_HOST_LIB})
+
+# Dynamic libraries in the stub directory can only be used for compilation
+if (NOT DEFINED ENV{NPU_HOST_LIB})
+    set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub/")
+    set(LIB_PATH1 "/usr/local/Ascend/ascend-toolkit/latest/atc/lib64/stub/")
+    message(STATUS "set default LIB_PATH: ${LIB_PATH}")
+else ()
+    message(STATUS "env LIB_PATH: ${LIB_PATH}")
+endif()
+
+# Header path
+include_directories(
+    ${INC_PATH}/runtime/include
+    ${INC_PATH}/atc/include
+    ${CUST_PKG_PATH}/include
+)
+
+# add host lib path
+link_directories(
+    ${LIB_PATH}
+    ${LIB_PATH1}
+    ${CUST_PKG_PATH}/lib
+)
+
+add_executable(execute_select_v2_op
+    main.cpp
+)
+
+target_link_libraries(execute_select_v2_op
+    ascendcl
+    cust_opapi
+    acl_op_compiler
+    nnopbase
+    stdc++
+)
+
+install(TARGETS execute_select_v2_op DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
diff --git a/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/README.md b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/README.md
new file mode 100644
index 000000000..16ebc0114
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/README.md
@@ -0,0 +1,62 @@
+## 概述
+
+通过aclnn调用的方式调用SelectV2算子。
+
+## 目录结构介绍
+``` 
+├── AclNNInvocationNaive
+│   ├── CMakeLists.txt      // 编译规则文件
+│   ├── gen_data.py         // 算子期望数据生成脚本
+│   ├── main.cpp            // 单算子调用应用的入口
+│   ├── run.sh              // 编译运行算子的脚本
+│   └── verify_result.py    // 计算结果精度比对脚本
+``` 
+## 代码实现介绍
+完成自定义算子的开发部署后，可以通过单算子调用的方式来验证单算子的功能。main.cpp代码为单算子API执行方式。单算子API执行是基于C语言的API执行算子，无需提供单算子描述文件进行离线模型的转换，直接调用单算子API接口。    
+
+自定义算子编译部署后，会自动生成单算子API，可以直接在应用程序中调用。算子API的形式一般定义为“两段式接口”，形如：
+   ```cpp    
+   aclnnStatus aclnnSelectV2GetWorkspaceSize(const aclTensor *x, const aclTensor *out, uint64_t workspaceSize, aclOpExecutor **executor);
+   aclnnStatus aclnnSelectV2(void *workspace, int64_t workspaceSize, aclOpExecutor **executor, aclrtStream stream);
+   ```
+其中aclnnSelectV2GetWorkspaceSize为第一段接口，主要用于计算本次API调用计算过程中需要多少的workspace内存。获取到本次API计算需要的workspace大小之后，按照workspaceSize大小申请Device侧内存，然后调用第二段接口aclnnSelectV2执行计算。具体参考[AscendCL单算子调用](https://hiascend.com/document/redirect/CannCommunityAscendCInVorkSingleOp)>单算子API执行 章节。
+
+## 运行样例算子
+  **请确保已根据算子包编译部署步骤完成本算子的编译部署动作。**
+  
+  - 进入样例代码所在路径
+  
+    ```bash
+    cd ${git_clone_path}/cann-ops/src/math/selectv2/examples/AclNNInvocationNaive
+    ```
+  
+  - 环境变量配置
+    
+    需要设置环境变量，以arm为例
+    
+    ```bash
+    export DDK_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    export NPU_HOST_LIB=/usr/local/Ascend/ascend-toolkit/latest/aarch64-linux/devlib
+    ```
+  - 样例执行
+    
+    样例执行过程中会自动生成测试数据，然后编译与运行aclnn样例，最后打印运行结果。
+    
+    ```bash
+    mkdir -p build
+    cd build
+    cmake .. && make
+    ./execute_select_v2_op
+    ```
+    
+    用户亦可参考run.sh脚本进行编译与运行。
+    
+    ```bash
+    bash run.sh
+    ```
+
+## 更新说明
+
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/06/29 | 新增本readme |
\ No newline at end of file
diff --git a/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/gen_data.py b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/gen_data.py
new file mode 100644
index 000000000..41e715aed
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/gen_data.py
@@ -0,0 +1,35 @@
+#!/usr/bin/python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+# This file is a part of the CANN Open Software.
+# Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+# Please refer to the License for details. You may not use this file except in compliance with the License.
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+# See LICENSE in the root of the software repository for the full text of the License.
+# ======================================================================================================================
+
+import os
+import numpy as np
+
+import tensorflow as tf
+
+
+def gen_golden_data_simple():
+    dtype = np.float16
+    input_x1 = np.random.rand(32, 64) > 0.5
+    input_x1 = input_x1.astype(np.bool_)
+    input_x2 = np.random.uniform(1, 100, [32, 64]).astype(np.float16)
+    input_x3 = np.random.uniform(1, 100, [32, 64]).astype(np.float16)
+    golden = tf.raw_ops.SelectV2(condition=input_x1, t=input_x2, e=input_x3).numpy()
+
+    os.system("mkdir -p input")
+    os.system("mkdir -p output")
+    input_x1.tofile("./input/input_x1.bin")
+    input_x2.tofile("./input/input_x2.bin")
+    input_x3.tofile("./input/input_x3.bin")
+    golden.tofile("./output/golden.bin")
+
+if __name__ == "__main__":
+    gen_golden_data_simple()
+
diff --git a/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/main.cpp b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/main.cpp
new file mode 100644
index 000000000..cf4cac766
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/main.cpp
@@ -0,0 +1,252 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+
+/**
+ * @file main.cpp
+ */
+#include <algorithm>
+#include <cstdint>
+#include <iostream>
+#include <vector>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fstream>
+#include <fcntl.h>
+
+#include "acl/acl.h"
+#include "aclnn_select_v2.h"
+
+#define SUCCESS 0
+#define FAILED 1
+
+#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
+#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
+#define ERROR_LOG(fmt, args...) fprintf(stderr, "[ERROR]  " fmt "\n", ##args)
+
+#define CHECK_RET(cond, return_expr) \
+    do {                             \
+        if (!(cond)) {               \
+            return_expr;             \
+        }                            \
+    } while (0)
+
+#define LOG_PRINT(message, ...)         \
+    do {                                \
+        printf(message, ##__VA_ARGS__); \
+    } while (0)
+
+bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize)
+{
+    struct stat sBuf;
+    int fileStatus = stat(filePath.data(), &sBuf);
+    if (fileStatus == -1) {
+        ERROR_LOG("failed to get file %s", filePath.c_str());
+        return false;
+    }
+    if (S_ISREG(sBuf.st_mode) == 0) {
+        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
+        return false;
+    }
+
+    std::ifstream file;
+    file.open(filePath, std::ios::binary);
+    if (!file.is_open()) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    std::filebuf *buf = file.rdbuf();
+    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
+    if (size == 0) {
+        ERROR_LOG("file size is 0");
+        file.close();
+        return false;
+    }
+    if (size > bufferSize) {
+        ERROR_LOG("file size is larger than buffer size");
+        file.close();
+        return false;
+    }
+    buf->pubseekpos(0, std::ios::in);
+    buf->sgetn(static_cast<char *>(buffer), size);
+    fileSize = size;
+    file.close();
+    return true;
+}
+
+bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
+{
+    if (buffer == nullptr) {
+        ERROR_LOG("Write file failed. buffer is nullptr");
+        return false;
+    }
+
+    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
+    if (fd < 0) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    auto writeSize = write(fd, buffer, size);
+    (void) close(fd);
+    if (writeSize != size) {
+        ERROR_LOG("Write file Failed.");
+        return false;
+    }
+
+    return true;
+}
+
+int64_t GetShapeSize(const std::vector<int64_t> &shape)
+{
+    int64_t shapeSize = 1;
+    for (auto i : shape) {
+        shapeSize *= i;
+    }
+    return shapeSize;
+}
+
+int Init(int32_t deviceId, aclrtStream *stream)
+{
+    // 固定写法，acl初始化
+    auto ret = aclInit(nullptr);
+    CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclInit failed. ERROR: %d\n", ret); return FAILED);
+    ret = aclrtSetDevice(deviceId);
+    CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret); return FAILED);
+    ret = aclrtCreateStream(stream);
+    CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateStream failed. ERROR: %d\n", ret); return FAILED);
+
+    return SUCCESS;
+}
+
+template <typename T>
+int CreateAclTensor(const std::vector<T> &hostData, const std::vector<int64_t> &shape, void **deviceAddr,
+                    aclDataType dataType, aclTensor **tensor)
+{
+    auto size = GetShapeSize(shape) * sizeof(T);
+    // 调用aclrtMalloc申请device侧内存
+    auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST);
+    CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret); return FAILED);
+
+    // 调用aclrtMemcpy将host侧数据拷贝到device侧内存上
+    ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE);
+    CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMemcpy failed. ERROR: %d\n", ret); return FAILED);
+
+    // 调用aclCreateTensor接口创建aclTensor
+    *tensor = aclCreateTensor(shape.data(), shape.size(), dataType, nullptr, 0, aclFormat::ACL_FORMAT_ND, shape.data(),
+                              shape.size(), *deviceAddr);
+    return SUCCESS;
+}
+
+int main(int argc, char **argv)
+{
+    // 1. （固定写法）device/stream初始化, 参考acl对外接口列表
+    // 根据自己的实际device填写deviceId
+    int32_t deviceId = 0;
+    aclrtStream stream;
+    auto ret = Init(deviceId, &stream);
+    CHECK_RET(ret == 0, LOG_PRINT("Init acl failed. ERROR: %d\n", ret); return FAILED);
+
+    // 2. 构造输入与输出，需要根据API的接口自定义构造
+    std::vector<int64_t> inputX1Shape = {32, 64};
+    std::vector<int64_t> inputX2Shape = {32, 64};
+    std::vector<int64_t> inputX3Shape = {32, 64};
+    std::vector<int64_t> outputYShape = {32, 64};
+    void *inputX1DeviceAddr = nullptr;
+    void *inputX2DeviceAddr = nullptr;
+    void *inputX3DeviceAddr = nullptr;
+    void *outputYDeviceAddr = nullptr;
+    aclTensor *inputX1 = nullptr;
+    aclTensor *inputX2 = nullptr;
+    aclTensor *inputX3 = nullptr;
+    aclTensor *outputY = nullptr;
+    size_t inputX1ShapeSize_1=inputX1Shape[0] * inputX1Shape[1];
+    size_t inputX2ShapeSize_1=inputX2Shape[0] * inputX2Shape[1];
+    size_t inputX3ShapeSize_1=inputX3Shape[0] * inputX3Shape[1];
+    size_t outputYShapeSize_1=outputYShape[0] * outputYShape[1];
+    size_t dataType1=sizeof(aclFloat16);
+    size_t dataType2=1;
+    std::vector<uint8_t> inputX1HostData(inputX1Shape[0] * inputX1Shape[1]);
+    std::vector<aclFloat16> inputX2HostData(inputX2Shape[0] * inputX2Shape[1]);
+    std::vector<aclFloat16> inputX3HostData(inputX3Shape[0] * inputX3Shape[1]);
+    std::vector<aclFloat16> outputYHostData(outputYShape[0] * outputYShape[1]);
+
+    size_t fileSize = 0;
+    void** input1=(void**)(&inputX1HostData);
+    void** input2=(void**)(&inputX2HostData);
+    void** input3=(void**)(&inputX3HostData);
+    //读取数据
+    ReadFile("../input/input_x1.bin", fileSize, *input1, inputX1ShapeSize_1*dataType2);
+    ReadFile("../input/input_x2.bin", fileSize, *input2, inputX2ShapeSize_1*dataType1);
+    ReadFile("../input/input_x3.bin", fileSize, *input3, inputX3ShapeSize_1*dataType1);
+
+    INFO_LOG("Set input success");
+    // 创建inputX aclTensor
+    ret = CreateAclTensor(inputX1HostData, inputX1Shape, &inputX1DeviceAddr, aclDataType::ACL_BOOL, &inputX1);
+    CHECK_RET(ret == ACL_SUCCESS, return FAILED);
+    ret = CreateAclTensor(inputX2HostData, inputX2Shape, &inputX2DeviceAddr, aclDataType::ACL_FLOAT16, &inputX2);
+    CHECK_RET(ret == ACL_SUCCESS, return FAILED);
+    ret = CreateAclTensor(inputX3HostData, inputX3Shape, &inputX3DeviceAddr, aclDataType::ACL_FLOAT16, &inputX3);
+    CHECK_RET(ret == ACL_SUCCESS, return FAILED);
+    // 创建outputY aclTensor
+    ret = CreateAclTensor(outputYHostData, outputYShape, &outputYDeviceAddr, aclDataType::ACL_FLOAT16, &outputY);
+    CHECK_RET(ret == ACL_SUCCESS, return FAILED);
+
+    // 3. 调用CANN自定义算子库API
+    uint64_t workspaceSize = 0;
+    aclOpExecutor *executor;
+    // 计算workspace大小并申请内存
+    ret = aclnnSelectV2GetWorkspaceSize(inputX1, inputX2,inputX3, outputY, &workspaceSize, &executor);
+    CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnSelectV2GetWorkspaceSize failed. ERROR: %d\n", ret); return FAILED);
+    void *workspaceAddr = nullptr;
+    if (workspaceSize > 0) {
+        ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST);
+        CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); return FAILED;);
+    }
+    // 执行算子
+    ret = aclnnSelectV2(workspaceAddr, workspaceSize, executor, stream);
+    CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnSelectV2 failed. ERROR: %d\n", ret); return FAILED);
+
+    // 4. （固定写法）同步等待任务执行结束
+    ret = aclrtSynchronizeStream(stream);
+    CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return FAILED);
+
+    // 5. 获取输出的值，将device侧内存上的结果拷贝至host侧，需要根据具体API的接口定义修改
+    auto size = GetShapeSize(outputYShape);
+    std::vector<aclFloat16> resultData(size, 0);
+    ret = aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), outputYDeviceAddr,
+                      size * sizeof(aclFloat16), ACL_MEMCPY_DEVICE_TO_HOST);
+    CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret); return FAILED);
+    void** output1=(void**)(&resultData);
+    //写出数据
+    WriteFile("../output/output_y.bin", *output1, outputYShapeSize_1*dataType1);
+    INFO_LOG("Write output success");
+
+    // 6. 释放aclTensor，需要根据具体API的接口定义修改
+    aclDestroyTensor(inputX1);
+    aclDestroyTensor(inputX2);
+    aclDestroyTensor(inputX3);
+    aclDestroyTensor(outputY);
+
+    // 7. 释放device资源，需要根据具体API的接口定义修改
+    aclrtFree(inputX1DeviceAddr);
+    aclrtFree(inputX2DeviceAddr);
+    aclrtFree(inputX3DeviceAddr);
+    aclrtFree(outputYDeviceAddr);
+    if (workspaceSize > 0) {
+        aclrtFree(workspaceAddr);
+    }
+    aclrtDestroyStream(stream);
+    aclrtResetDevice(deviceId);
+    aclFinalize();
+
+    return SUCCESS;
+}
\ No newline at end of file
diff --git a/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/run.sh b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/run.sh
new file mode 100644
index 000000000..f1eab8d02
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/run.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+# This file is a part of the CANN Open Software.
+# Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+# Please refer to the License for details. You may not use this file except in compliance with the License.
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+# See LICENSE in the root of the software repository for the full text of the License.
+# ======================================================================================================================
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+source $_ASCEND_INSTALL_PATH/bin/setenv.bash
+export DDK_PATH=$_ASCEND_INSTALL_PATH
+export NPU_HOST_LIB=$_ASCEND_INSTALL_PATH/lib64
+
+rm -rf $HOME/ascend/log/*
+rm ./input/*.bin
+rm ./output/*.bin
+
+python3 gen_data.py
+
+if [ $? -ne 0 ]; then
+    echo "ERROR: generate input data failed!"
+    return 1
+fi
+echo "INFO: generate input data success!"
+set -e
+rm -rf build
+mkdir -p build
+cmake -B build
+cmake --build build -j
+(
+    cd build
+    ./execute_select_v2_op
+)
+ret=`python3 verify_result.py output/output_y.bin output/golden.bin`
+echo $ret
+if [ "x$ret" == "xtest pass" ]; then
+    echo ""
+    echo "#####################################"
+    echo "INFO: you have passed the Precision!"
+    echo "#####################################"
+    echo ""
+fi
\ No newline at end of file
diff --git a/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/verify_result.py b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/verify_result.py
new file mode 100644
index 000000000..837374d49
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/examples/AclNNInvocationNaive/verify_result.py
@@ -0,0 +1,37 @@
+#!/usr/bin/python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+# This file is a part of the CANN Open Software.
+# Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+# Please refer to the License for details. You may not use this file except in compliance with the License.
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+# See LICENSE in the root of the software repository for the full text of the License.
+# ======================================================================================================================
+
+import os
+import sys
+import numpy as np
+
+LOSS = 1e-3 # 容忍偏差，一般fp16要求绝对误差和相对误差均不超过千分之一
+MINIMUM = 10e-10
+
+
+def verify_result(real_result, golden):
+    dtype = np.float16
+    real_result = np.fromfile(real_result, dtype=dtype) # 从bin文件读取实际运算结果
+    golden = np.fromfile(golden, dtype=dtype) # 从bin文件读取预期运算结果
+    result = np.abs(real_result - golden) # 计算运算结果和预期结果偏差
+    deno = np.maximum(np.abs(real_result), np.abs(golden))  # 获取最大值并组成新数组
+    result_atol = np.less_equal(result, LOSS) # 计算绝对误差
+    result_rtol = np.less_equal(result / np.add(deno, MINIMUM), LOSS) # 计算相对误差
+    if not result_rtol.all() and not result_atol.all():
+        if np.sum(result_rtol == False) > real_result.size * LOSS and \
+           np.sum(result_atol == False) > real_result.size * LOSS: # 误差超出预期时返回打印错误，返回对比失败
+            print("[ERROR] result error")
+            return False
+    print("test pass")
+    return True
+
+if __name__ == '__main__':
+    verify_result(sys.argv[1], sys.argv[2])
diff --git a/operator_contrib/SelectV2Sample/framework/CMakeLists.txt b/operator_contrib/SelectV2Sample/framework/CMakeLists.txt
new file mode 100644
index 000000000..b6be9b492
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/framework/CMakeLists.txt
@@ -0,0 +1,11 @@
+if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mindspore")
+    if(EXISTS  "${CMAKE_CURRENT_SOURCE_DIR}/caffe_plugin")
+        add_subdirectory(caffe_plugin)
+    endif()
+    if(EXISTS  "${CMAKE_CURRENT_SOURCE_DIR}/tf_plugin")
+        add_subdirectory(tf_plugin)
+    endif()
+    if(EXISTS  "${CMAKE_CURRENT_SOURCE_DIR}/onnx_plugin")
+        add_subdirectory(onnx_plugin)
+    endif()
+endif()
diff --git a/operator_contrib/SelectV2Sample/framework/tf_plugin/CMakeLists.txt b/operator_contrib/SelectV2Sample/framework/tf_plugin/CMakeLists.txt
new file mode 100644
index 000000000..a6aba5c20
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/framework/tf_plugin/CMakeLists.txt
@@ -0,0 +1,14 @@
+
+aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} plugin_srcs)
+add_library(cust_tf_parsers SHARED ${plugin_srcs})
+target_compile_definitions(cust_tf_parsers PRIVATE google=ascend_private)
+if(ENABLE_CROSS_COMPILE)
+    target_link_directories(cust_tf_parsers PRIVATE
+                            ${CMAKE_COMPILE_COMPILER_LIBRARY}
+                            ${CMAKE_COMPILE_RUNTIME_LIBRARY}
+    )
+endif()
+target_link_libraries(cust_tf_parsers PRIVATE intf_pub graph)
+install(TARGETS cust_tf_parsers
+        LIBRARY DESTINATION packages/vendors/${vendor_name}/framework/tensorflow
+)
diff --git a/operator_contrib/SelectV2Sample/framework/tf_plugin/tensorflow_select_v2_plugin.cc b/operator_contrib/SelectV2Sample/framework/tf_plugin/tensorflow_select_v2_plugin.cc
new file mode 100644
index 000000000..61a92a953
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/framework/tf_plugin/tensorflow_select_v2_plugin.cc
@@ -0,0 +1,23 @@
+/* Copyright (C) 2020-2021. Huawei Technologies Co., Ltd. All
+rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the Apache License Version 2.0.
+ * You may not use this file except in compliance with the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Apache License for more details at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+
+#include "register/register.h"
+
+namespace domi {
+// register op info to GE
+REGISTER_CUSTOM_OP("SelectV2")
+    .FrameworkType(TENSORFLOW)   // type: CAFFE, TENSORFLOW
+    .OriginOpType("SelectV2")      // name in tf module
+    .ParseParamsByOperatorFn(AutoMappingByOpFn);
+}  // namespace domi
diff --git a/operator_contrib/SelectV2Sample/op_host/select_v2.cpp b/operator_contrib/SelectV2Sample/op_host/select_v2.cpp
new file mode 100644
index 000000000..2d4eeb7e4
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/op_host/select_v2.cpp
@@ -0,0 +1,207 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+#include <algorithm>
+#include "select_v2_tiling.h"
+#include "register/op_def_registry.h"
+#include "tiling/platform/platform_ascendc.h"
+
+namespace optiling {
+const uint32_t BLOCK_SIZE = 32;
+static ge::graphStatus TilingFunc(gert::TilingContext* context)
+{
+    SelectV2TilingData tiling;
+    auto shape_x = context->GetInputTensor(0)->GetOriginShape();
+    int32_t M = 1, N = 1, Z = 1;
+    if(shape_x.GetDimNum() > 2)
+    {
+    for (int i = 0; i < shape_x.GetDimNum() - 2; i++)
+          Z *= shape_x.GetDim(i);
+          M = shape_x.GetDim(shape_x.GetDimNum() - 2);
+          N = shape_x.GetDim(shape_x.GetDimNum() - 1);
+          std::cout<<"M:"<<M<<std::endl;
+          std::cout<<"N:"<<N<<std::endl;
+          std::cout<<"Z:"<<Z<<std::endl;
+    }
+    else
+      {
+          M = shape_x.GetDim(0);
+          N = shape_x.GetDim(1);
+          std::cout<<"M:"<<M<<std::endl;
+          std::cout<<"N:"<<N<<std::endl;
+          Z = 1;
+      }
+    tiling.set_M(M);
+    tiling.set_N(N);
+    tiling.set_Z(Z);
+    int32_t NUM = 24;
+    uint32_t sizeofdatatype;
+    uint32_t totalLengthAligned;
+    auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
+    auto socVersion = ascendcPlatform.GetSocVersion();
+    uint64_t ub_size;
+    ascendcPlatform.GetCoreMemSize(platform_ascendc::CoreMemType::UB, ub_size);
+    auto aivNum = ascendcPlatform.GetCoreNum();
+    uint32_t totalLength = context->GetInputTensor(0)->GetShapeSize();
+    //获取输入shape信息
+    uint32_t inputNum = context->GetInputShape(1)->GetStorageShape().GetShapeSize(); //输入数量
+    uint32_t inputBytes = GetSizeByDataType(context->GetInputDesc(1)->GetDataType()); //输入类型
+    uint32_t inputLength = inputBytes * inputNum; //输入长度
+    auto dt = context->GetInputTensor(0)->GetDataType();
+    if(dt == ge::DT_INT8){
+        sizeofdatatype = 1;
+        NUM = 19;
+    }else if(dt == ge::DT_FLOAT16){
+        sizeofdatatype = 2;
+        NUM = 10;
+    }
+    else if (dt == ge::DT_INT32) {
+        sizeofdatatype = 4;
+        NUM = 11;
+    }
+    else{ //DT_FLOAT
+        sizeofdatatype = 4;
+        NUM = 8;
+    }
+    uint32_t ALIGN_NUM = BLOCK_SIZE / sizeofdatatype;
+    uint32_t tiling_size = ((ub_size) / BLOCK_SIZE / 2) / NUM;
+    tiling_size = tiling_size <= 8 ? tiling_size : tiling_size / 8 * 8;
+    uint32_t block_size = tiling_size * ALIGN_NUM;
+    aivNum = (aivNum < totalLength / block_size) ? aivNum : (totalLength / block_size);
+    aivNum = aivNum >= 1 ? aivNum : 1;
+    uint32_t core_size;
+    if(aivNum != 0 && ALIGN_NUM != 0){
+        core_size = (totalLength / aivNum) / (ALIGN_NUM * 8) * (ALIGN_NUM * 8);
+    }
+    uint32_t core_remain = totalLength - aivNum * core_size;
+    tiling.set_totalLength(totalLength);
+    tiling.set_ALIGN_NUM(ALIGN_NUM);
+    tiling.set_tiling_size(tiling_size);
+    tiling.set_block_size(block_size);
+    tiling.set_aivNum(aivNum);
+    tiling.set_core_size(core_size);
+    tiling.set_core_remain(core_remain);
+    uint32_t x2Size = context->GetInputShape(2)->GetStorageShape().GetShapeSize();
+    uint32_t cdtSize = context->GetInputShape(0)->GetStorageShape().GetShapeSize();
+    uint32_t x1Size = context->GetInputShape(1)->GetStorageShape().GetShapeSize();
+    uint32_t ySize = context->GetOutputShape(0)->GetStorageShape().GetShapeSize();
+    if(ySize != cdtSize || ySize != x1Size || ySize != x2Size)
+    {
+        context->SetTilingKey(2);
+        int32_t y_ndarray[20], cdt_ndarray[20], x1_ndarray[20],x2_ndarray[20];
+        int32_t y_dimensional, cdt_dimensional, x1_dimensional, x2_dimensional;
+        auto shape_y = context->GetOutputShape(0)->GetOriginShape();
+        auto shape_cdt = context->GetInputTensor(0)->GetOriginShape();
+        auto shape_x1 = context->GetInputTensor(1)->GetOriginShape();
+        auto shape_x2 = context->GetInputTensor(2)->GetOriginShape();
+        y_dimensional =  shape_y.GetDimNum();
+        cdt_dimensional =  shape_cdt.GetDimNum();
+        x1_dimensional =  shape_x1.GetDimNum();
+        x2_dimensional =  shape_x2.GetDimNum();
+        for(int i = 0; i < y_dimensional; i++)
+        {
+            y_ndarray[y_dimensional-i-1] = shape_y.GetDim(i); 
+            if(i<cdt_dimensional) {
+                cdt_ndarray[cdt_dimensional-i-1] = shape_cdt.GetDim(i);
+            }
+            else{      
+                cdt_ndarray[i] = 1;
+            }
+            if(i<x1_dimensional){
+                 x1_ndarray[x1_dimensional-i-1] = shape_x1.GetDim(i);
+            }
+            else{
+                 x1_ndarray[i] = 1;
+            }
+            if(i<x2_dimensional){
+                 x2_ndarray[x2_dimensional-i-1] = shape_x2.GetDim(i);
+            }
+            else{
+                x2_ndarray[i] = 1;
+            }
+        }
+        tiling.set_y_dimensional(y_dimensional);
+        tiling.set_y_ndarray(y_ndarray);
+        tiling.set_cdt_ndarray(cdt_ndarray);
+        tiling.set_x1_ndarray(x1_ndarray);
+        tiling.set_x2_ndarray(x2_ndarray);
+        int32_t y_sumndarray[20], cdt_sumndarray[20], x1_sumndarray[20], x2_sumndarray[20];
+        y_sumndarray[0] = 1;
+        cdt_sumndarray[0] = 1;
+        x1_sumndarray[0] = 1;
+        x2_sumndarray[0] = 1;
+        for(int i = 1; i <= y_dimensional; i++)
+        {
+            y_sumndarray[i] = y_sumndarray[i-1]*y_ndarray[i-1];
+            cdt_sumndarray[i] = cdt_sumndarray[i-1]*cdt_ndarray[i-1];
+            x1_sumndarray[i] = x1_sumndarray[i-1]*x1_ndarray[i-1];
+            x2_sumndarray[i] = x2_sumndarray[i-1]*x2_ndarray[i-1];
+        }
+        tiling.set_y_sumndarray(y_sumndarray);
+        tiling.set_cdt_sumndarray(cdt_sumndarray);
+        tiling.set_x1_sumndarray(x1_sumndarray);
+        tiling.set_x2_sumndarray(x2_sumndarray);
+    }
+    else
+    {
+        context->SetTilingKey(1);
+    }
+    context->SetBlockDim(1);
+    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
+    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
+    size_t *currentWorkspace = context->GetWorkspaceSizes(1);
+    currentWorkspace[0] = 0;
+    return ge::GRAPH_SUCCESS;
+}
+}
+
+namespace ge {
+static ge::graphStatus InferShape(gert::InferShapeContext* context)
+{
+    const gert::Shape* x1_shape = context->GetInputShape(0);
+    gert::Shape* y_shape = context->GetOutputShape(0);
+    *y_shape = *x1_shape;
+    return GRAPH_SUCCESS;
+}
+}
+
+namespace ops {
+class SelectV2 : public OpDef {
+public:
+    explicit SelectV2(const char* name) : OpDef(name)
+    {
+        this->Input("condition")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_BOOL, ge::DT_BOOL, ge::DT_BOOL, ge::DT_BOOL})
+            .Format({ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND});
+        this->Input("x1")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT, ge::DT_FLOAT16, ge::DT_INT32, ge::DT_INT8})
+            .Format({ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND});
+        this->Input("x2")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT, ge::DT_FLOAT16, ge::DT_INT32, ge::DT_INT8})
+            .Format({ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND});
+        this->Output("y")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT, ge::DT_FLOAT16, ge::DT_INT32, ge::DT_INT8})
+            .Format({ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND});
+
+        this->SetInferShape(ge::InferShape);
+        this->AICore()
+            .SetTiling(optiling::TilingFunc);
+        this->AICore().AddConfig("ascend310b");
+    }
+};
+OP_ADD(SelectV2);
+}
diff --git a/operator_contrib/SelectV2Sample/op_host/select_v2_tiling.h b/operator_contrib/SelectV2Sample/op_host/select_v2_tiling.h
new file mode 100644
index 000000000..bfe190188
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/op_host/select_v2_tiling.h
@@ -0,0 +1,41 @@
+#ifndef SELECTV2_TILING_H 
+#define SELECTV2_TILING_H
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+#include "register/tilingdata_base.h"
+namespace optiling {
+BEGIN_TILING_DATA_DEF(SelectV2TilingData)
+  TILING_DATA_FIELD_DEF(int32_t, M);
+  TILING_DATA_FIELD_DEF(int32_t, N);
+  TILING_DATA_FIELD_DEF(int32_t, Z);
+  TILING_DATA_FIELD_DEF(uint32_t, totalLength);
+  TILING_DATA_FIELD_DEF(uint32_t, tileNum);
+  TILING_DATA_FIELD_DEF(uint32_t, ALIGN_NUM);
+  TILING_DATA_FIELD_DEF(uint32_t, tiling_size);
+  TILING_DATA_FIELD_DEF(uint32_t, block_size);
+  TILING_DATA_FIELD_DEF(uint32_t, aivNum);
+  TILING_DATA_FIELD_DEF(uint32_t, core_size);
+  TILING_DATA_FIELD_DEF(uint32_t, core_remain);
+  TILING_DATA_FIELD_DEF(int32_t, y_dimensional);
+  TILING_DATA_FIELD_DEF_ARR(int32_t, 20, y_ndarray);
+  TILING_DATA_FIELD_DEF_ARR(int32_t, 20, cdt_ndarray);
+  TILING_DATA_FIELD_DEF_ARR(int32_t, 20, x1_ndarray);
+  TILING_DATA_FIELD_DEF_ARR(int32_t, 20, x2_ndarray);
+  TILING_DATA_FIELD_DEF_ARR(int32_t, 20, y_sumndarray);
+  TILING_DATA_FIELD_DEF_ARR(int32_t, 20, cdt_sumndarray);
+  TILING_DATA_FIELD_DEF_ARR(int32_t, 20, x1_sumndarray);
+  TILING_DATA_FIELD_DEF_ARR(int32_t, 20, x2_sumndarray);
+  
+END_TILING_DATA_DEF;
+
+REGISTER_TILING_DATA_CLASS(SelectV2, SelectV2TilingData)
+}
+
+#endif
\ No newline at end of file
diff --git a/operator_contrib/SelectV2Sample/op_kernel/select_v2.cpp b/operator_contrib/SelectV2Sample/op_kernel/select_v2.cpp
new file mode 100644
index 000000000..444673f1b
--- /dev/null
+++ b/operator_contrib/SelectV2Sample/op_kernel/select_v2.cpp
@@ -0,0 +1,239 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+#define K_MAX_SHAPE_DIM 0
+#include <type_traits>
+#include "kernel_operator.h"
+
+using namespace AscendC;
+constexpr int32_t BUFFER_NUM = 2; 
+template<typename TYPE_X1, typename TYPE_CONDITION> class KernelSelectV2 {
+    public:
+        __aicore__ inline KernelSelectV2() {}
+        __aicore__ inline void Init(GM_ADDR condition, GM_ADDR x1, GM_ADDR x2, GM_ADDR y,int32_t M, int32_t N, int32_t Z,
+            uint32_t totalLength, uint32_t ALIGN_NUM, uint32_t block_size, uint32_t core_size, uint32_t core_remain) {
+            ASSERT(GetBlockNum() != 0 && "block dim can not be zero!");
+            this->blockLength = core_size + (GetBlockNum() == GetBlockIdx() + 1 ? core_remain : 0);
+            this->tileLength = block_size;
+            this->ALIGN_NUM = ALIGN_NUM;
+            if(ALIGN_NUM != 0){
+                this->blockLength = this->blockLength + (this->blockLength % ALIGN_NUM ? ALIGN_NUM - this->blockLength % ALIGN_NUM : 0);
+            }
+            this->totalLength = totalLength;
+            auto startPointer = core_size * GetBlockIdx();
+            auto bufferlength = this->blockLength;
+            this->M = M; //行
+            this->N = N; //列
+            this->Z = Z; //循环次数
+            Gm_x1.SetGlobalBuffer((__gm__ TYPE_X1*)x1 ,bufferlength);
+            Gm_x2.SetGlobalBuffer((__gm__ TYPE_X1*)x2, bufferlength);
+            Gm_cdt.SetGlobalBuffer((__gm__  uint8_t *)condition, bufferlength);
+            Gm_y.SetGlobalBuffer((__gm__ TYPE_X1*)y, bufferlength);
+            if(this->tileLength != 0){
+                this->tileNum = this->blockLength / this->tileLength + (this->blockLength % this->tileLength > 0);
+            }
+            pipe.InitBuffer(Q_x1, BUFFER_NUM, this->tileLength * sizeof(TYPE_X1));
+            pipe.InitBuffer(Q_x2, BUFFER_NUM, this->tileLength * sizeof(TYPE_X1));
+            pipe.InitBuffer(Q_cdt, BUFFER_NUM, this->tileLength * sizeof(uint8_t));
+            pipe.InitBuffer(Q_y, BUFFER_NUM, this->tileLength * sizeof(TYPE_X1));
+            pipe.InitBuffer(B_bits, this->tileLength * sizeof(uint8_t));
+            pipe.InitBuffer(B_half, this->tileLength * sizeof(half));
+            pipe.InitBuffer(B_zero, this->tileLength * sizeof(half));//取一个全为0的tensor作为处理
+            this->zero = B_zero.Get<half>();   
+            Duplicate(this->zero, half(0), this->tileLength);
+            if constexpr (std::is_same_v<TYPE_X1, int32_t>) {
+                pipe.InitBuffer(B_x1, this->tileLength * sizeof(float));
+                pipe.InitBuffer(B_x2, this->tileLength * sizeof(float));
+                pipe.InitBuffer(B_y, this->tileLength * sizeof(float));
+            }
+            else if constexpr (std::is_same_v<TYPE_X1, int8_t>) {
+                pipe.InitBuffer(B_x1, this->tileLength * sizeof(half));
+                pipe.InitBuffer(B_x2, this->tileLength * sizeof(half));
+                pipe.InitBuffer(B_y, this->tileLength * sizeof(half));
+            }
+        }
+        __aicore__ inline void Process() { 
+            int32_t loopCount = this->tileNum;
+            for (int32_t i = 0; i < loopCount-1; i++) {
+                CopyIn(i, this->tileLength);
+                Compute(i, this->tileLength);
+                CopyOut(i, this->tileLength);
+            }
+            uint32_t length = this->blockLength - this->tileLength * (loopCount - 1);
+            CopyIn(loopCount - 1, (length +31 ) / 32 * 32);
+            Compute(loopCount - 1, (length +31 ) / 32 * 32);
+            CopyOut(loopCount - 1, (length +31 ) / 32 * 32);   
+        }   
+    private:
+        __aicore__ inline void CopyIn(int32_t progress, uint32_t length) {
+           LocalTensor<TYPE_X1> x1 = Q_x1.AllocTensor<TYPE_X1>();
+           LocalTensor<TYPE_X1> x2 = Q_x2.AllocTensor<TYPE_X1>();
+           LocalTensor<uint8_t> cdt = Q_cdt.AllocTensor<uint8_t>();
+           DataCopy(x1, Gm_x1[progress * this->tileLength], length);
+           DataCopy(x2, Gm_x2[progress * this->tileLength], length);
+           DataCopy(cdt, (Gm_cdt[progress * this->tileLength]), length);
+           Q_x1.EnQue(x1);
+           Q_x2.EnQue(x2);
+           Q_cdt.EnQue(cdt);
+        }
+        __aicore__ inline void Compute(int32_t progress, uint32_t length) {
+            LocalTensor<TYPE_X1> x1 = Q_x1.DeQue<TYPE_X1>();
+            LocalTensor<TYPE_X1> x2 = Q_x2.DeQue<TYPE_X1>();
+            LocalTensor<uint8_t> cdt = Q_cdt.DeQue<uint8_t>();
+            LocalTensor<TYPE_X1> y = Q_y.AllocTensor<TYPE_X1>();
+            auto bits = B_bits.Get<uint8_t>();
+            if constexpr (std::is_same_v<TYPE_X1, half>) {
+                auto p1 = B_half.Get<half>();
+                Cast(p1, cdt, RoundMode::CAST_NONE, length);
+                Compare(bits, p1,zero,CMPMODE::NE,length);
+                Select(y, bits, x1, x2, SELMODE::VSEL_TENSOR_TENSOR_MODE, length);
+            }
+            else if constexpr (std::is_same_v<TYPE_X1, float>) {
+                auto p1 = B_half.Get<half>();
+                Cast(p1, cdt, RoundMode::CAST_NONE, length);
+                Compare(bits, p1,zero,CMPMODE::NE,length);
+                Select(y, bits, x1, x2, SELMODE::VSEL_TENSOR_TENSOR_MODE, length);
+            }
+            else if constexpr (std::is_same_v<TYPE_X1, int8_t>) {
+                auto p1 = B_half.Get<half>();
+                Cast(p1, cdt, RoundMode::CAST_NONE, length);
+                Compare(bits, p1,zero,CMPMODE::NE,length);
+                auto half_x1 = B_x1.Get<half>();
+                auto half_x2 = B_x2.Get<half>();
+                auto half_y = B_y.Get<half>();
+                Cast(half_x1, x1, RoundMode::CAST_NONE, length);
+                Cast(half_x2, x2, RoundMode::CAST_NONE, length);
+                Select(half_y, bits, half_x1, half_x2, SELMODE::VSEL_TENSOR_TENSOR_MODE, length);
+                Cast(y, half_y, RoundMode::CAST_NONE, length);
+            }
+            else{
+                auto p1 = B_half.Get<half>();
+                Cast(p1, cdt, RoundMode::CAST_NONE, length);
+                Compare(bits, p1,zero,CMPMODE::NE,length);
+                auto float_x1 = B_x1.Get<float>();
+                auto float_x2 = B_x2.Get<float>();
+                auto float_y = B_y.Get<float>();
+                Cast(float_x1, x1, RoundMode::CAST_NONE, length);
+                Cast(float_x2, x2, RoundMode::CAST_NONE, length);
+                Select(float_y, bits, float_x1, float_x2, SELMODE::VSEL_TENSOR_TENSOR_MODE, length);
+                Cast(y, float_y, RoundMode::CAST_RINT, length);
+            }
+            Q_x1.FreeTensor(x1);  
+            Q_x2.FreeTensor(x2);
+            Q_cdt.FreeTensor(cdt);
+            Q_y.EnQue<TYPE_X1>(y);
+        }
+        __aicore__ inline void CopyOut(int32_t progress, uint32_t length) {
+            LocalTensor<TYPE_X1> y = Q_y.DeQue<TYPE_X1>();
+            DataCopy(Gm_y[progress * this->tileLength], y, length);
+            Q_y.FreeTensor(y);
+        }
+    private:
+        TPipe pipe;
+        TQue<QuePosition::VECIN, BUFFER_NUM> Q_x1, Q_x2, Q_cdt;
+        TQue<QuePosition::VECOUT, BUFFER_NUM> Q_y;
+        TBuf<QuePosition::VECCALC> B_result, B_zero, B_bits,B_half;
+        TBuf<QuePosition::VECCALC> B_x1, B_x2, B_y;
+        int32_t M;
+        int32_t N;
+        int32_t Z;
+        LocalTensor<half> zero;
+        GlobalTensor<TYPE_X1> Gm_x1, Gm_x2, Gm_y;
+        GlobalTensor<uint8_t> Gm_cdt;
+        uint32_t blockLength;
+        uint32_t tileNum;
+        uint32_t totalLength;
+        uint32_t tileLength;
+        uint32_t ALIGN_NUM;
+    };
+template<typename TYPE_X1, typename TYPE_CONDITION> class KernelSelectV2_Broadcast {
+    using T = TYPE_X1;
+public:
+    __aicore__ inline KernelSelectV2_Broadcast() {}
+    __aicore__ inline void Init(GM_ADDR condition, GM_ADDR x1, GM_ADDR x2, GM_ADDR y,int32_t M, int32_t N, int32_t Z,
+                                int32_t y_dimensional, 
+                                int32_t* y_ndarray, int32_t* cdt_ndarray, int32_t* x1_ndarray, int32_t* x2_ndarray,
+                                int32_t* y_sumndarray, int32_t* cdt_sumndarray, int32_t* x1_sumndarray, int32_t* x2_sumndarray) {
+        ASSERT(GetBlockNum() != 0 && "block dim can not be zero!");
+        this->M = M; //行
+        this->N = N; //列
+        this->Z = Z; //循环次数
+        this->y_dimensional = y_dimensional;
+        this->y_ndarray = y_ndarray;
+        this->cdt_ndarray = cdt_ndarray;
+        this->x1_ndarray = x1_ndarray;
+        this->x2_ndarray = x2_ndarray;
+        this->y_sumndarray = y_sumndarray;
+        this->cdt_sumndarray = cdt_sumndarray;
+        this->x1_sumndarray = x1_sumndarray;
+        this->x2_sumndarray = x2_sumndarray;
+        Gm_x1.SetGlobalBuffer((__gm__ TYPE_X1*)x1 , 1);
+        Gm_x2.SetGlobalBuffer((__gm__ TYPE_X1*)x2 , 1);
+        Gm_cdt.SetGlobalBuffer((__gm__ TYPE_CONDITION*)condition , 1);
+        Gm_y.SetGlobalBuffer((__gm__ TYPE_X1*)y , 1);
+    }
+    __aicore__ inline void Process() {
+        int dim = this->y_dimensional;
+        for(int j=0; j<this->y_sumndarray[dim]; j++)
+        {
+            int cdt_start = 0, x1_start = 0, x2_start = 0;
+            for(int k=0; k<dim; k++)
+            {
+                if(this->cdt_ndarray[k] != 1){
+                    cdt_start += this->cdt_sumndarray[k] * (j / this->y_sumndarray[k] % this->y_ndarray[k]);
+                }
+                if(this->x1_ndarray[k] != 1){
+                    x1_start += this->x1_sumndarray[k] * (j / this->y_sumndarray[k] % this->y_ndarray[k]);
+                }
+                if(this->x2_ndarray[k] != 1){
+                    x2_start += this->x2_sumndarray[k] * (j / this->y_sumndarray[k] % this->y_ndarray[k]);
+                }
+            }
+            TYPE_X1 pos = Gm_cdt.GetValue(cdt_start) ? Gm_x1.GetValue(x1_start) : Gm_x2.GetValue(x2_start);
+            Gm_y.SetValue(j,pos);
+        }
+    } 
+private:
+    TPipe pipe;
+    TQue<QuePosition::VECIN, BUFFER_NUM> inQueueSTART, inQueueEND, inQueueWEIGHT;
+    TQue<QuePosition::VECOUT, BUFFER_NUM> outQueueY;
+    TBuf<QuePosition::VECCALC> tmp1, tmp2, tmp3,boox;
+    GlobalTensor<TYPE_X1> Gm_x1, Gm_x2, Gm_y;
+    GlobalTensor<TYPE_CONDITION> Gm_cdt;
+    int32_t M;
+    int32_t N;
+    int32_t Z;
+    int32_t y_dimensional;
+    int32_t *y_ndarray;
+    int32_t *cdt_ndarray;
+    int32_t *x1_ndarray;
+    int32_t *x2_ndarray;
+    int32_t *y_sumndarray;
+    int32_t *cdt_sumndarray;
+    int32_t *x1_sumndarray;
+    int32_t *x2_sumndarray;
+};
+
+    extern "C" __global__ __aicore__ void select_v2(GM_ADDR condition, GM_ADDR x1, GM_ADDR x2, GM_ADDR y, GM_ADDR workspace, GM_ADDR tiling) {
+        GET_TILING_DATA(tiling_data, tiling);
+        if(TILING_KEY_IS(1)){
+            KernelSelectV2<DTYPE_X1, DTYPE_CONDITION> op;
+            op.Init(condition, x1, x2, y, tiling_data.M, tiling_data.N, tiling_data.Z,
+                tiling_data.totalLength, tiling_data.ALIGN_NUM, tiling_data.block_size, tiling_data.core_size, tiling_data.core_remain);
+            op.Process();
+        }
+        else if(TILING_KEY_IS(2)){
+            KernelSelectV2_Broadcast<DTYPE_X1, DTYPE_CONDITION> op;
+            op.Init(condition, x1, x2, y, tiling_data.M, tiling_data.N, tiling_data.Z,
+                tiling_data.y_dimensional,
+                tiling_data.y_ndarray, tiling_data.cdt_ndarray, tiling_data.x1_ndarray, tiling_data.x2_ndarray,
+                tiling_data.y_sumndarray, tiling_data.cdt_sumndarray, tiling_data.x1_sumndarray, tiling_data.x2_sumndarray);  
+            op.Process();
+        }
+    }
\ No newline at end of file
-- 
Gitee


From e537cd2c527105e5a26c30f20f9513810b011397 Mon Sep 17 00:00:00 2001
From: hid20190403 <zgh_mls2019@163.com>
Date: Fri, 4 Jul 2025 14:59:40 +0000
Subject: [PATCH 2/6] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20opp=5Fkernel=5Faicpu?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 operator_contrib/SelectV2Sample/opp_kernel_aicpu/.keep | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 operator_contrib/SelectV2Sample/opp_kernel_aicpu/.keep

diff --git a/operator_contrib/SelectV2Sample/opp_kernel_aicpu/.keep b/operator_contrib/SelectV2Sample/opp_kernel_aicpu/.keep
new file mode 100644
index 000000000..e69de29bb
-- 
Gitee


From 51919b10507d7cffaeb8dadb0068c31b5ced3986 Mon Sep 17 00:00:00 2001
From: hid20190403 <zgh_mls2019@163.com>
Date: Fri, 4 Jul 2025 15:00:08 +0000
Subject: [PATCH 3/6] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 operator_contrib/SelectV2Sample/tests/.keep | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 operator_contrib/SelectV2Sample/tests/.keep

diff --git a/operator_contrib/SelectV2Sample/tests/.keep b/operator_contrib/SelectV2Sample/tests/.keep
new file mode 100644
index 000000000..e69de29bb
-- 
Gitee


From 2da40754c05cbdbf4d93b3d63b13e12b1f76c4d7 Mon Sep 17 00:00:00 2001
From: hid20190403 <zgh_mls2019@163.com>
Date: Fri, 4 Jul 2025 15:00:38 +0000
Subject: [PATCH 4/6] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20st?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 operator_contrib/SelectV2Sample/tests/st/.keep | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 operator_contrib/SelectV2Sample/tests/st/.keep

diff --git a/operator_contrib/SelectV2Sample/tests/st/.keep b/operator_contrib/SelectV2Sample/tests/st/.keep
new file mode 100644
index 000000000..e69de29bb
-- 
Gitee


From 73c56bbdebffdd72190a592810f9fe959f997cf7 Mon Sep 17 00:00:00 2001
From: hid20190403 <zgh_mls2019@163.com>
Date: Fri, 4 Jul 2025 15:00:53 +0000
Subject: [PATCH 5/6] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20ut?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 operator_contrib/SelectV2Sample/tests/ut/.keep | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 operator_contrib/SelectV2Sample/tests/ut/.keep

diff --git a/operator_contrib/SelectV2Sample/tests/ut/.keep b/operator_contrib/SelectV2Sample/tests/ut/.keep
new file mode 100644
index 000000000..e69de29bb
-- 
Gitee


From 7d4e015eb08b20726c03ca68314595f74e6ad45b Mon Sep 17 00:00:00 2001
From: hid20190403 <zgh_mls2019@163.com>
Date: Fri, 4 Jul 2025 15:03:14 +0000
Subject: [PATCH 6/6] update operator_contrib/README.md.

Signed-off-by: hid20190403 <zgh_mls2019@163.com>
---
 operator_contrib/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/operator_contrib/README.md b/operator_contrib/README.md
index da799262c..dbff6a089 100644
--- a/operator_contrib/README.md
+++ b/operator_contrib/README.md
@@ -32,6 +32,7 @@
 | [ReduceSumSample](./ReduceSumSample)                       | 基于Ascend C的ReduceSum自定义Vector算子及调用样例     | Atlas 200/500 A2 推理产品<br>Atlas A2训练系列产品/Atlas 800I A2推理产品 | 8.0.RC2.alpha003    |
 | [ScatterMaxSample](./ScatterMaxSample)                     | 基于Ascend C的ScatterMax自定义Vector算子及调用样例    | Atlas 200/500 A2 推理产品<br>Atlas A2训练系列产品/Atlas 800I A2推理产品 | 8.0.RC1.alpha003    |
 | [ScatterSubSample](./ScatterSubSample)                     | 基于Ascend C的ScatterSub自定义Vector算子及调用样例    | Atlas 200/500 A2 推理产品<br>Atlas A2训练系列产品/Atlas 800I A2推理产品 | 8.0.RC1.alpha003    |
+| [SelectV2Sample](./SelectV2Sample)                         | 基于Ascend C的SelectV2自定义Vector算子及调用样例      | Atlas 200/500 A2 推理产品<br>Atlas A2训练系列产品/Atlas 800I A2推理产品 | 8.0.RC1.alpha003    |
 | [SpenceSample](./SpenceSample)                             | 基于Ascend C的Spence自定义Vector算子及调用样例        | Atlas 200/500 A2 推理产品<br>Atlas A2训练系列产品/Atlas 800I A2推理产品 | 8.0.RC1.alpha003    |
 | [ThreeNNSample](./ThreeNNSample)                           | 基于Ascend C的ThreeNN自定义Vector算子及调用样例       | Atlas 200/500 A2 推理产品<br>Atlas A2训练系列产品/Atlas 800I A2推理产品 | 8.0.RC2.alpha003    |
 | [TrilSample](./TrilSample)                                 | 基于Ascend C的Tril自定义Vector算子及调用样例          | Atlas 200/500 A2 推理产品<br>Atlas A2训练系列产品/Atlas 800I A2推理产品 | 8.0.RC2.alpha003    |
-- 
Gitee


算子类型(OpType)	SelectV2
	name	Type	data type	format
算子输入
	condition	tensor	bool	ND
	x1	tensor	float32,float16,int32,int8	ND
	x2	tensor	float32,float16,int32,int8	ND
算子输出	y	tensor	float32,float16,int32,int8	ND
核函数名	select_v2