diff --git a/atvc/examples/addcmul/README.md b/atvc/examples/addcmul/README.md
deleted file mode 100644
index 43e9b2b656d028e33247164bc289ed004b8dbd4e..0000000000000000000000000000000000000000
--- a/atvc/examples/addcmul/README.md
+++ /dev/null
@@ -1,48 +0,0 @@
-
-
-## 概述
-
-本样例介绍了利用ATVC实现Addcmul单算子并完成功能验证
-
-
-## 样例支持产品型号:
-- Atlas A2训练系列产品
-
-## 目录结构
-
-| 文件名 | 描述 |
-| ------------------------------------------------------------ | ------------------------------------------------------------ |
-| [addcmul.cpp](./addcmul.cpp) | Addcmul算子代码实现以及调用样例 |
-
-## 算子描述
-
-Add算子数学计算公式:$output_i = input_i + value * tensor1_i * tensor2_i$
-
-Add算子规格:
-
-
-算子类型(OpType) | Add |
-
-算子输入 |
-name | shape | data type | format |
-input | 8 * 2048 | float | ND |
-tensor1 | 1 * 2048 | float | ND |
-tensor2 | 1 * 2048 | float | ND |
-
-
-算子输出 |
-output | 8 * 2048 | float | ND |
-
-核函数名 | AddcmulCustom |
-
-
-## 算子运行
-在ascendc-api-adv代码仓目录下执行:
-```bash
-$ cd ./atvc/tests/
-$ bash run_test.sh addcmul
-...
-Generate golden data successfully.
-...
-Accuracy verification passed.
-```
\ No newline at end of file
diff --git a/atvc/examples/addcmul/addcmul.cpp b/atvc/examples/addcmul/addcmul.cpp
deleted file mode 100644
index 2ccd77a0d121ea7511b9d2417bde985d59a949da..0000000000000000000000000000000000000000
--- a/atvc/examples/addcmul/addcmul.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-/**
- * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
- *
- * This file is a part of the CANN Open Software.
- * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
- * Please refer to the License for details. You may not use this file except in compliance with the License.
- * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
- * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
- * See LICENSE in the root of the software repository for the full text of the License.
- */
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include "acl/acl.h"
-#include "broadcast/broadcast_host.h"
-#include "addcmul.h"
-
-#define CHECK_ACL(x) \
- do { \
- aclError __ret = x; \
- if (__ret != ACL_ERROR_NONE) { \
- std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
- } \
- } while (0)
-
-namespace {
-static constexpr float REL_TOL = 1e-3f;
-static constexpr float ABS_TOL = 1e-5f;
-
-// 判断两个浮点数是否足够接近
-bool IsClose(float a, float b) {
- const float eps = 1e-40f; // 防止分母为零
- float diff = std::abs(a - b);
- return (diff <= ABS_TOL) || (diff <= REL_TOL * std::max(std::abs(a), std::abs(b) + eps));
-}
-
-// AddWithBroadcast算子的描述:两个输入,一个输出,类型均为float
-using BroadcastOpTraits = ATVC::OpTraits, ATVC::OpOutputs, ATVC::OpTemps>;
-
-// 负责Broadcast类算子的调度,选择对应的Policy最佳策略并执行Kernel函数
-template
-void BroadcastOpAdapter(uint8_t* tensor1, uint8_t* tensor2, float value, uint8_t* input, uint8_t* output, ATVC::BroadcastParam ¶m, ATVC::BroadcastPolicy &policy, aclrtStream& stream)
-{
- // 申请临时空间workspace,并将其与BroadcastTilingData一同传到Device侧
- uint8_t *workspaceDevice;
- CHECK_ACL(aclrtMalloc((void **)&workspaceDevice, param.workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST));
- param.workspaceAddr = reinterpret_cast(workspaceDevice);
- // 将tiling api计算出的BroadcastPolicy转化为编译态参数并实例化相应的核函数
- if (policy == ATVC::BROADCAST_POLICY0) {
- AddcmulCustom<<>>(tensor1, tensor2, input, output, param, value);
- }else if (policy == ATVC::BROADCAST_POLICY1) {
- AddcmulCustom<<>>(tensor1, tensor2, input, output, param, value);
- } else {
- printf("[ERROR] Cannot find any matched policy.\n");
- }
- // 流同步后释放申请的param内存
- CHECK_ACL(aclrtSynchronizeStream(stream));
- CHECK_ACL(aclrtFree(workspaceDevice));
-}
-
-bool VerifyResults(const std::vector &golden, const std::vector &output)
-{
- for (int32_t i = 0; i < golden.size(); i++) {
- if (!IsClose(golden[i], output[i])) {
- printf("Accuracy verification failed! The expected value of element "
- "in index [%d] is %f, but actual value is %f.\n",
- i,
- golden[i],
- output[i]);
- return false;
- }
- }
- return true;
-}
-}
-
-int32_t main(int32_t argc, char* argv[])
-{
- int32_t eleNum = 1 * 8;
- int32_t outEleNum = 8 * 8;
- std::vector shapeIn{1, 8}; // 测试输入shape
- std::vector shapeOut{8, 8}; // 测试输入shape
-
- size_t inputByteSize = static_cast(eleNum) * sizeof(float);
- size_t outputByteSize = static_cast(outEleNum) * sizeof(float);
- std::random_device rd;
- std::mt19937 gen(rd());
- std::uniform_real_distribution disX(1.0f, 9.0f);
- std::uniform_real_distribution disY(1.0f, 9.0f);
-
- std::vector tensor1(eleNum);
- std::vector tensor2(eleNum);
- float value = 4;
- std::vector input(outEleNum);
- std::vector golden(outEleNum);
- for (int i = 0; i < eleNum; ++i) {
- tensor1[i] = (disX(gen));
- tensor2[i] = (disX(gen));
- }
- for (int i = 0; i < outEleNum; ++i) {
- input[i] = (disY(gen));
- }
- for (int i = 0; i < outEleNum; ++i) {
- golden[i] = input[i] + (tensor1[i % eleNum] * tensor2[i % eleNum] * value);
- }
- printf("Generate golden data successfully.\n");
- // 初始化Acl资源
- CHECK_ACL(aclInit(nullptr));
- aclrtContext context;
- int32_t deviceId = 0;
- CHECK_ACL(aclrtSetDevice(deviceId));
- CHECK_ACL(aclrtCreateContext(&context, deviceId));
- aclrtStream stream = nullptr;
- CHECK_ACL(aclrtCreateStream(&stream));
- uint8_t *outputHost;
- uint8_t *tensor1Device;
- uint8_t *tensor2Device;
- uint8_t *inputDevice;
- uint8_t *outputDevice;
-
- CHECK_ACL(aclrtMallocHost((void **)(&outputHost), outputByteSize));
- CHECK_ACL(aclrtMalloc((void **)&tensor1Device, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
- CHECK_ACL(aclrtMalloc((void **)&tensor2Device, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
- CHECK_ACL(aclrtMalloc((void **)&inputDevice, outputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
- CHECK_ACL(aclrtMalloc((void **)&outputDevice, outputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
-
- CHECK_ACL(aclrtMemcpy(tensor1Device, inputByteSize, tensor1.data(), inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE));
- CHECK_ACL(aclrtMemcpy(tensor2Device, inputByteSize, tensor2.data(), inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE));
- CHECK_ACL(aclrtMemcpy(inputDevice, outputByteSize, input.data(), outputByteSize, ACL_MEMCPY_HOST_TO_DEVICE));
-
- ATVC::BroadcastParam param; // Broadcast运行态参数,包含TilingData以及临时空间的相关信息
- ATVC::BroadcastPolicy policy = {-1, -1, -1}; // Broadcast运行态参数,负责映射最适合的Broadcast模板实现
- // Host侧调用Tiling API完成相关运行态参数的运算
- param.nBufferNum = 1;
- if (!ATVC::Host::CalcBroadcastTiling(shapeIn, shapeOut, &policy, ¶m)) {
- printf("Broadcast tiling error.\n");
- return -1;
- };
- // 调用Adapter调度接口,完成核函数的模板调用
- BroadcastOpAdapter(tensor1Device, tensor2Device, value, inputDevice, outputDevice, param, policy, stream);
-
- CHECK_ACL(aclrtMemcpy(outputHost, outputByteSize, outputDevice, outputByteSize, ACL_MEMCPY_DEVICE_TO_HOST));
- std::vector output(reinterpret_cast(outputHost), reinterpret_cast(outputHost) + outEleNum);
-
- // 释放Acl资源
- CHECK_ACL(aclrtFree(tensor1Device));
- CHECK_ACL(aclrtFree(tensor2Device));
- CHECK_ACL(aclrtFree(inputDevice));
- CHECK_ACL(aclrtFree(outputDevice));
- CHECK_ACL(aclrtFreeHost(outputHost));
-
- CHECK_ACL(aclrtDestroyStream(stream));
- CHECK_ACL(aclrtDestroyContext(context));
- CHECK_ACL(aclrtResetDevice(deviceId));
- CHECK_ACL(aclFinalize());
-
- if (!VerifyResults(golden, output)) {
- return -1;
- }
- printf("Accuracy verification passed.\n");
- return 0;
-}
diff --git a/atvc/examples/addcmul/addcmul.h b/atvc/examples/addcmul/addcmul.h
deleted file mode 100644
index a63479d4c21e4605304072c4a0b2c98f544d095d..0000000000000000000000000000000000000000
--- a/atvc/examples/addcmul/addcmul.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
- * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
- *
- * This file is a part of the CANN Open Software.
- * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
- * Please refer to the License for details. You may not use this file except in compliance with the License.
- * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
- * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
- * See LICENSE in the root of the software repository for the full text of the License.
- */
-#ifndef ATVC_ADDCMUL_H
-#define ATVC_ADDCMUL_H
-#include "pre_compute_mul_of_broadcast.h"
-#include "post_compute_add_of_broadcast.h"
-
-/* !
- * \brief Addcmul(i) = input(i) + value * tensor1(i) * tensor2(i)
- * \param [in] tensor1, input global memory of tensor1
- * \param [in] tensor2, input global memory of tensor2
- * \param [in] input, input global memory of input
- * \param [out] output, output global memory
- * \param [in] broadcastParam, params of broadcast
- * \param [in] value, input value
- */
-template::Type>
-__global__ __aicore__ void AddcmulCustom(GM_ADDR tensor1,
- GM_ADDR tensor2,
- GM_ADDR input,
- GM_ADDR output,
- ATVC::BroadcastParam broadcastParam,
- DataType value)
-{
- KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_AIV_ONLY);
-
- // 1. get input and output for kernel op from host Traits
- using KernelOpIn = typename Traits::In::types;
- using KernelOpOut = typename Traits::Out::types;
- using KernelOpTemp = typename Traits::Temp::types;
-
- // 2. define input and output for pre compute
- using PreComputeInput = ATVC::OpInputs::Type, typename ATVC::TypeListGet::Type>;
- using PreComputeOutput = ATVC::OpOutputs::Type>;
- using PreComputeTemp = ATVC::OpOutputs::Type>;
- using PreComputeOpTraits = ATVC::OpTraits;
- using PreCompute = PreComputeMulOfBroadcast;
-
- // 3. define input and output for broadcast
- using BroadcastOpInput = ATVC::OpInputs::Type>;
- using BroadcastOpOutput = ATVC::OpOutputs::Type>;
- using BroadcastOpTraits = ATVC::OpTraits;
-
- // 4. define input and output for post compute
- using PostComputeInput = ATVC::OpInputs::Type, typename ATVC::TypeListGet::Type>;
- using PostComputeOutput = ATVC::OpOutputs::Type>;
- using PostComputeOpTraits = ATVC::OpTraits;
- using PostCompute = PostComputeAddOfBroadcast;
-
- // 5. call op run
- auto op = ATVC::Kernel::BroadcastOpTemplate, Policy, PreCompute, PostCompute>();
- ATVC::BroadcastParam *param = &broadcastParam;
- op.Run(tensor1, tensor2, input, output, param, value);
-}
-#endif
diff --git a/atvc/examples/addcmul/post_compute_add_of_broadcast.h b/atvc/examples/addcmul/post_compute_add_of_broadcast.h
deleted file mode 100644
index 9059186cb36583d1208382e5e3085fa6c8ddc2bc..0000000000000000000000000000000000000000
--- a/atvc/examples/addcmul/post_compute_add_of_broadcast.h
+++ /dev/null
@@ -1 +0,0 @@
-../add_with_broadcast/post_compute_add_of_broadcast.h
\ No newline at end of file
diff --git a/atvc/examples/addcmul/pre_compute_mul_of_broadcast.h b/atvc/examples/addcmul/pre_compute_mul_of_broadcast.h
deleted file mode 100644
index 6df76098319d82cffc55eb6d601bca9d93a73e52..0000000000000000000000000000000000000000
--- a/atvc/examples/addcmul/pre_compute_mul_of_broadcast.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
- * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
- *
- * This file is a part of the CANN Open Software.
- * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
- * Please refer to the License for details. You may not use this file except in compliance with the License.
- * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
- * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
- * See LICENSE in the root of the software repository for the full text of the License.
- */
-#ifndef ATVC_PRE_COMPUTE_MUL_OF_BROADCAST_H
-#define ATVC_PRE_COMPUTE_MUL_OF_BROADCAST_H
-
-#include "broadcast/broadcast_device.h"
-
-template
-struct PreComputeMulOfBroadcast {
- using Inputs = typename Traits::In::types;
- using Outputs = typename Traits::Out::types;
- using DataType = typename ATVC::TypeListGet::Type;
- static constexpr size_t INPUT_SIZE = ATVC::TypeListSize::VALUE;
-
- template
- __aicore__ inline void SetParam(DataType value)
- {
- value_ = value;
- }
-
- template
- __aicore__ inline void SetArgs(Args... args)
- {
- InitArgsInput<0>(args...);
- }
-
- template
- __aicore__ inline void operator()(AscendC::LocalTensor tensor1, AscendC::LocalTensor tensor2, AscendC::LocalTensor temp1, AscendC::LocalTensor temp2,
- uint32_t copyInOffset, AscendC::DataCopyExtParams ©InParams)
- {
- size_t size = copyInParams.blockCount * (copyInParams.blockLen + copyInParams.srcStride * 32) / sizeof(DataType);
- ATVC::SyncDataQueue();
-
- CopyIn(tensor1, tensor2, copyInOffset, copyInParams);
-
- AscendC::PipeBarrier(); // wait broadcast finished
- ATVC::SyncDataQueue();
-
- AscendC::Mul(temp1, tensor1, tensor2, size);
- AscendC::Muls(temp2, temp1, value_, size);
- }
-
-private:
- template
- __aicore__ inline void InitArgsInput(GM_ADDR x, Args... args)
- {
- input_[start].SetGlobalBuffer((__gm__ DataType*)x);
- if constexpr (start + 1 < INPUT_SIZE) {
- InitArgsInput(args...);
- }
- }
-
- template
- __aicore__ inline void CopyIn(AscendC::LocalTensor tensor1, AscendC::LocalTensor tensor2, uint32_t copyInOffset, AscendC::DataCopyExtParams ©Inarams)
- {
- AscendC::DataCopyPadExtParams padParams{false, 0, 0, 0};
- AscendC::DataCopyPad(tensor1, input_[0][copyInOffset], copyInarams, padParams);
- AscendC::DataCopyPad(tensor2, input_[1][copyInOffset], copyInarams, padParams);
- }
-
- AscendC::GlobalTensor input_[INPUT_SIZE];
- DataType value_;
-};
-
-#endif