From 6cdd5ea355e9990bd6e8762375d7e1be2cc56476 Mon Sep 17 00:00:00 2001 From: chuboning Date: Fri, 15 Sep 2023 09:09:32 +0800 Subject: [PATCH] Remove dependency from torch_npu --- README.en.md | 33 +-- README.zh.md | 36 +-- scripts/build.sh | 4 +- scripts/make_patch.sh | 1 - src/csrc/npu_float_status/common.h | 42 ---- src/csrc/npu_float_status/op_float_status.cpp | 50 ----- src/csrc/npu_float_status/op_runner.cpp | 206 ------------------ src/csrc/npu_float_status/op_runner.h | 124 ----------- src/csrc/npu_float_status/operator_desc.cpp | 65 ------ src/csrc/npu_float_status/operator_desc.h | 66 ------ 10 files changed, 20 insertions(+), 607 deletions(-) delete mode 100644 src/csrc/npu_float_status/common.h delete mode 100644 src/csrc/npu_float_status/op_float_status.cpp delete mode 100644 src/csrc/npu_float_status/op_runner.cpp delete mode 100644 src/csrc/npu_float_status/op_runner.h delete mode 100644 src/csrc/npu_float_status/operator_desc.cpp delete mode 100644 src/csrc/npu_float_status/operator_desc.h diff --git a/README.en.md b/README.en.md index 829e3d9..04089c5 100644 --- a/README.en.md +++ b/README.en.md @@ -3,46 +3,33 @@ ## Full Code Generation and Compilation -Note: The root directory in the following description refers to the root directory of Ascend apex. -**Obtain the native apex source code.** +**Obtain the apex source code.** -Obtain the source code from GitHub and run the following command in the root directory: +Run the following command in the root directory: ``` -git clone https://github.com/NVIDIA/apex.git +git clone -b master https://gitee.com/ascend/apex.git +cd apex/ ``` -Go to the source code directory and switch to the branch whose **commitid** is **4ef930c1c884fdca5f472ab2ce7cb9b505d26c1a**. -``` -cd apex -git checkout 4ef930c1c884fdca5f472ab2ce7cb9b505d26c1a -cd .. -``` - -**Generate the apex code adapted to Ascend AI Processors.** - -Go to the **scripts** directory and run the following command: -``` -bash gen.sh -``` -The full code adapted to NPUs is generated in the **apex** directory under the root directory. **Compile the binary package of apex.** -1. Ensure that PyTorch of the NPU version can be properly used. Otherwise, the apex compilation will be affected. +1. Ensure that torch is installed and the version of setuptools is less than or equal to 65.7.0 (otherwise run: pip install setuptools==41.2.0). -2. Go to the **apex** directory under the root directory and run the following command: +2. Run the following command(python3.7-3.10 is supported): ``` -python3 setup.py --cpp_ext --npu_float_status bdist_wheel +bash scripts/build.sh --python=3.7 ``` The generated binary package is stored in the current **dist** directory. ## Installation -Go to the **dist** directory and run the following command: +Run the following command: ``` +cd apex/dist/ pip3 uninstall apex -pip3 install --upgrade apex-0.1+ascend-cp37-cp37m-linux_{arch}.whl *arch* indicates the architecture, which can be AArch64 or x86_64. +pip3 install --upgrade apex-0.1+ascend-{version}.whl *version* indicates the python version and cpu architecture. ``` diff --git a/README.zh.md b/README.zh.md index b3178be..7154cfa 100644 --- a/README.zh.md +++ b/README.zh.md @@ -22,52 +22,32 @@ ## 生成全量代码及编译 -注:以下描述中的“根目录”指“Ascend apex的根目录” - ##### 获取昇腾适配的Ascend apex源码 ``` git clone -b master https://gitee.com/ascend/apex.git +cd apex/ ``` -##### 获取原生apex源代码 - -从github上获取源代码,在根目录下执行 -``` -git clone https://github.com/NVIDIA/apex.git -``` -进入源代码目录,切换至commitid为4ef930c1c884fdca5f472ab2ce7cb9b505d26c1a的分支 -``` -cd apex -git checkout 4ef930c1c884fdca5f472ab2ce7cb9b505d26c1a -cd .. -``` - -##### 生成适配昇腾AI处理器的apex代码 - -进入到scripts目录,执行 -``` -bash gen.sh -``` -会在根目录下apex目录中生成npu适配全量代码 ##### 编译apex的二进制包 -1、请确保npu版本的pytorch可以正常使用(否则会影响apex的编译); +1、请确保torch已安装,setuptools版本小于等于65.7.0(不满足时执行pip install setuptools==41.2.0) -2、进入到根目录下apex目录,执行 +2、执行(支持python3.7-3.10,确保python3.x命令存在) ``` -python3 setup.py --cpp_ext --npu_float_status bdist_wheel +bash scripts/build.sh --python=3.7 ``` -生成的二进制包在当前的dist目录下 +生成的二进制包在apex/dist目录下 ## 安装 -进入dist目录,执行以下命令: +进入apex/dist目录,执行以下命令: ``` +cd apex/dist/ pip3 uninstall apex -pip3 install --upgrade apex-0.1+ascend-cp37-cp37m-linux_{arch}.whl arch表示架构,为aarch64或x86_64 +pip3 install --upgrade apex-0.1+ascend-{version}.whl version代表python版本和cpu架构 ``` diff --git a/scripts/build.sh b/scripts/build.sh index 3f9c89a..14ad511 100644 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -18,7 +18,7 @@ CUR_DIR=$(dirname $(readlink -f $0)) ROOT_DIR=$CUR_DIR/.. APEX_DIR=$ROOT_DIR/apex -SUPPORTED_PY_VERSION=(3.7 3.8 3.9) +SUPPORTED_PY_VERSION=(3.7 3.8 3.9 3.10) PY_VERSION='3.7' # Default supported python version is 3.8 DEFAULT_SCRIPT_ARGS_NUM=1 # Default supported input parameters @@ -114,7 +114,7 @@ function main() # compile cd $APEX_DIR || exit - python"${PY_VERSION}" setup.py --cpp_ext --npu_float_status bdist_wheel + python"${PY_VERSION}" setup.py --cpp_ext bdist_wheel } set -e diff --git a/scripts/make_patch.sh b/scripts/make_patch.sh index b506c46..2c15b8a 100644 --- a/scripts/make_patch.sh +++ b/scripts/make_patch.sh @@ -34,7 +34,6 @@ mv $SRC_DIR/apex/optimizers/lamb.py temp/src/apex/optimizers mv $SRC_DIR/apex/optimizers/npu* temp/src/apex/optimizers mv $SRC_DIR/csrc/combine_tensors temp/src/csrc -mv $SRC_DIR/csrc/npu_float_status temp/src/csrc mv $SRC_DIR/tests/L0/run_optimizers/test_lamb_mnist.py temp/tests/L0/run_optimizers mv $SRC_DIR/tests/L0/device.py temp/tests/L0 diff --git a/src/csrc/npu_float_status/common.h b/src/csrc/npu_float_status/common.h deleted file mode 100644 index e578bfc..0000000 --- a/src/csrc/npu_float_status/common.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2020, Huawei Technologies.All rights reserved. - * - * Licensed under the BSD 3-Clause License (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://opensource.org/licenses/BSD-3-Clause - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef COMMON_H -#define COMMON_H - -#include -#include -#include -#include -#include - -#include -#include - -#define RUN_SUCCESS 0 -#define RUN_FAILED 1 - -#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO] " fmt "\n", ##args) -#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN] " fmt "\n", ##args) -#define ERROR_LOG(fmt, args...) fprintf(stdout, "[ERROR] " fmt "\n", ##args) - -const std::string OP_TYPE_NPU_GET_FLOAT_STATUS = "NPUGetFloatStatus"; -const std::string OP_TYPE_NPU_CLEAR_FLOAT_STATUS = "NPUClearFloatStatus"; - -const int FLOAT_STATUS_OP_TENSOR_DIMS_SIZE = 8; -const int FLOAT_STATUS_OVERFLOW = 1; - -#endif // COMMON_H diff --git a/src/csrc/npu_float_status/op_float_status.cpp b/src/csrc/npu_float_status/op_float_status.cpp deleted file mode 100644 index 18767fe..0000000 --- a/src/csrc/npu_float_status/op_float_status.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2020, Huawei Technologies.All rights reserved. - * - * Licensed under the BSD 3-Clause License (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://opensource.org/licenses/BSD-3-Clause - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include "op_runner.h" -#include "common.h" - -OperatorDesc CreateFloatStatusOpDesc(const std::string opType) -{ - std::vector shape{FLOAT_STATUS_OP_TENSOR_DIMS_SIZE}; - aclDataType dataType = ACL_FLOAT; - aclFormat format = ACL_FORMAT_ND; - OperatorDesc opDesc(opType); - if ((opType == OP_TYPE_NPU_GET_FLOAT_STATUS) || - (opType == OP_TYPE_NPU_CLEAR_FLOAT_STATUS)) { - opDesc.AddInputTensorDesc(dataType, shape.size(), shape.data(), format); - } - opDesc.AddOutputTensorDesc(dataType, shape.size(), shape.data(), format); - return opDesc; -} - -/* This function is used for linking torch/acl .so files */ -at::Tensor TestFlatten(std::vector tensors) -{ - return torch::utils::flatten_dense_tensors(tensors); -} - -PYBIND11_MODULE(npu_float_status, m) { - m.doc() = "float status op"; - m.def("TestFlatten", &TestFlatten, "Test flatten"); -} \ No newline at end of file diff --git a/src/csrc/npu_float_status/op_runner.cpp b/src/csrc/npu_float_status/op_runner.cpp deleted file mode 100644 index 39d6388..0000000 --- a/src/csrc/npu_float_status/op_runner.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Copyright (c) 2020, Huawei Technologies.All rights reserved. - * - * Licensed under the BSD 3-Clause License (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://opensource.org/licenses/BSD-3-Clause - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "op_runner.h" -#include -#include "common.h" -#include -#include - -using namespace std; - -OpRunner::OpRunner(OperatorDesc *opDesc) : opDesc_(opDesc) -{ - numInputs_ = opDesc->inputDesc.size(); - numOutputs_ = opDesc->outputDesc.size(); -} - -OpRunner::~OpRunner() -{ - for (auto *inputBuf : inputBuffers_) { - aclDestroyDataBuffer(inputBuf); - } - - for (auto *devInput : devInputs_) { - aclrtFree(devInput); - } - - for (auto *hostInput : hostInputs_) { - aclrtFreeHost(hostInput); - } - - for (auto *outputBuf : outputBuffers_) { - aclDestroyDataBuffer(outputBuf); - } - - for (auto *devOutput : devOutputs_) { - aclrtFree(devOutput); - } - - for (auto *hostOutput : hostOutputs_) { - aclrtFreeHost(hostOutput); - } -} - -int OpRunner::Init() -{ - for (size_t i = 0; i < numInputs_; ++i) { - auto size = GetInputSize(i); - void *devMem = nullptr; - if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_ERROR_NONE) { - ERROR_LOG("Malloc device memory for input[%zu] failed", i); - return RUN_FAILED; - } - - if (aclrtMemset(devMem, size, 0, size) != ACL_ERROR_NONE) { - ERROR_LOG("Set device memory for input[%zu] failed", i); - return RUN_FAILED; - } - - devInputs_.emplace_back(devMem); - inputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size)); - - void *hostMem = nullptr; - if (aclrtMallocHost(&hostMem, size) != ACL_ERROR_NONE) { - ERROR_LOG("Malloc device memory for input[%zu] failed", i); - return RUN_FAILED; - } - if (hostMem == nullptr) { - ERROR_LOG("Malloc memory for input[%zu] failed", i); - return RUN_FAILED; - } - hostInputs_.emplace_back(hostMem); - } - - for (size_t i = 0; i < numOutputs_; ++i) { - auto size = GetOutputSize(i); - void *devMem = nullptr; - if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_ERROR_NONE) { - ERROR_LOG("Malloc device memory for output[%zu] failed", i); - return RUN_FAILED; - } - - if (aclrtMemset(devMem, size, 0, size) != ACL_ERROR_NONE) { - ERROR_LOG("Set device memory for output[%zu] failed", i); - return RUN_FAILED; - } - - devOutputs_.emplace_back(devMem); - outputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size)); - - void *hostOutput = nullptr; - if (aclrtMallocHost(&hostOutput, size) != ACL_ERROR_NONE) { - ERROR_LOG("Malloc device memory for output[%zu] failed", i); - return RUN_FAILED; - } - if (hostOutput == nullptr) { - ERROR_LOG("Malloc host memory for output[%zu] failed", i); - return RUN_FAILED; - } - hostOutputs_.emplace_back(hostOutput); - } - - return RUN_SUCCESS; -} - -size_t OpRunner::NumInputs() -{ - return numInputs_; -} - -size_t OpRunner::NumOutputs() -{ - return numOutputs_; -} - -size_t OpRunner::GetInputSize(size_t index) -{ - if (index >= opDesc_->inputDesc.size()) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return 0; - } - - return aclGetTensorDescSize(opDesc_->inputDesc[index]); -} - -size_t OpRunner::GetOutputSize(size_t index) -{ - if (index >= opDesc_->outputDesc.size()) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return 0; - } - - return aclGetTensorDescSize(opDesc_->outputDesc[index]); -} - -int OpRunner::RunOp() -{ - auto stream = c10_npu::getCurrentNPUStream(); - int holdGIL = PyGILState_Check(); - aclError ret = ACL_ERROR_NONE; - - if (holdGIL) { - Py_BEGIN_ALLOW_THREADS - ret = aclopCompileAndExecute(opDesc_->opType.c_str(), - numInputs_, - opDesc_->inputDesc.data(), - inputBuffers_.data(), - numOutputs_, - opDesc_->outputDesc.data(), - outputBuffers_.data(), - opDesc_->opAttr, - ACL_ENGINE_SYS, - ACL_COMPILE_SYS, - nullptr, - stream); - Py_END_ALLOW_THREADS - } else { - ret = aclopCompileAndExecute(opDesc_->opType.c_str(), - numInputs_, - opDesc_->inputDesc.data(), - inputBuffers_.data(), - numOutputs_, - opDesc_->outputDesc.data(), - outputBuffers_.data(), - opDesc_->opAttr, - ACL_ENGINE_SYS, - ACL_COMPILE_SYS, - nullptr, - stream); - } - - if (ret != ACL_ERROR_NONE) { - ERROR_LOG("Execute %s failed. ret = %d", opDesc_->opType.c_str(), ret); - return RUN_FAILED; - } - - if (opDesc_->opType == OP_TYPE_NPU_GET_FLOAT_STATUS) { - if (aclrtSynchronizeStream(stream) != ACL_ERROR_NONE) { - ERROR_LOG("Synchronize stream failed"); - return RUN_FAILED; - } - - for (size_t i = 0; i < numInputs_; ++i) { - auto size = GetInputSize(i); - if (aclrtMemcpy(hostInputs_[i], size, devInputs_[i], size, ACL_MEMCPY_DEVICE_TO_HOST) != ACL_ERROR_NONE) { - ERROR_LOG("Copy input[%zu] failed", i); - return RUN_FAILED; - } - } - } - - return RUN_SUCCESS; -} diff --git a/src/csrc/npu_float_status/op_runner.h b/src/csrc/npu_float_status/op_runner.h deleted file mode 100644 index 81b38b7..0000000 --- a/src/csrc/npu_float_status/op_runner.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2020, Huawei Technologies.All rights reserved. - * - * Licensed under the BSD 3-Clause License (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://opensource.org/licenses/BSD-3-Clause - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef OP_RUNNER_H -#define OP_RUNNER_H - -#include "common.h" -#include "operator_desc.h" - -/** - * Op Runner - */ -class OpRunner { -public: - /** - * @brief Constructor - * @param [in] opDesc: op description - */ - explicit OpRunner(OperatorDesc *opDesc); - - /** - * @brief Destructor - */ - ~OpRunner(); - - /** - * @brief Init op runner - */ - int Init(); - - /** - * @brief Get number of inputs - * @return number of inputs - */ - size_t NumInputs(); - - /** - * @brief Get number of outputs - * @return number of outputs - */ - size_t NumOutputs(); - - /** - * @brief Get input size by index - * @param [in] index: input index - * @return size of the input - */ - size_t GetInputSize(size_t index); - - /** - * @brief Get output size by index - * @param [in] index: output index - * @return size of the output - */ - size_t GetOutputSize(size_t index); - - /** - * @brief Get input buffer(host memory) by index - * @tparam T: data type - * @param [in] index: input index - * @return host address of the input - */ - template - T *GetInputBuffer(size_t index) - { - if (index >= numInputs_) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return nullptr; - } - return reinterpret_cast(hostInputs_[index]); - } - - /** - * @brief Get output buffer(host memory) by index - * @tparam T: data type - * @param [in] index: output index - * @return host address of the output - */ - template - const T *GetOutputBuffer(size_t index) - { - if (index >= numOutputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return nullptr; - } - - return reinterpret_cast(hostOutputs_[index]); - } - - /** - * @brief Run op - * @return run result - */ - int RunOp(); - -private: - size_t numInputs_; - size_t numOutputs_; - - std::vector inputBuffers_; - std::vector outputBuffers_; - - std::vector devInputs_; - std::vector devOutputs_; - - std::vector hostInputs_; - std::vector hostOutputs_; - OperatorDesc *opDesc_; -}; - -#endif // OP_RUNNER_H diff --git a/src/csrc/npu_float_status/operator_desc.cpp b/src/csrc/npu_float_status/operator_desc.cpp deleted file mode 100644 index f55e6ef..0000000 --- a/src/csrc/npu_float_status/operator_desc.cpp +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2020, Huawei Technologies.All rights reserved. - * - * Licensed under the BSD 3-Clause License (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://opensource.org/licenses/BSD-3-Clause - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "common.h" -#include "operator_desc.h" - -using namespace std; - -OperatorDesc::OperatorDesc(std::string opType) : opType(std::move(opType)) -{ - opAttr = aclopCreateAttr(); -} - -OperatorDesc::~OperatorDesc() -{ - for (auto *desc : inputDesc) { - aclDestroyTensorDesc(desc); - } - - for (auto *desc : outputDesc) { - aclDestroyTensorDesc(desc); - } - - aclopDestroyAttr(opAttr); -} - -OperatorDesc &OperatorDesc::AddInputTensorDesc(aclDataType dataType, - int numDims, - const int64_t *dims, - aclFormat format) -{ - if (numDims > 0 && dims == nullptr) { - ERROR_LOG("dims is nullptr while numDims > 0"); - return *this; - } - inputDesc.push_back(aclCreateTensorDesc(dataType, numDims, dims, format)); - return *this; -} - -OperatorDesc &OperatorDesc::AddOutputTensorDesc(aclDataType dataType, - int numDims, - const int64_t *dims, - aclFormat format) -{ - if (numDims > 0 && dims == nullptr) { - ERROR_LOG("dims is nullptr while numDims > 0"); - return *this; - } - - outputDesc.push_back(aclCreateTensorDesc(dataType, numDims, dims, format)); - return *this; -} diff --git a/src/csrc/npu_float_status/operator_desc.h b/src/csrc/npu_float_status/operator_desc.h deleted file mode 100644 index 9035ff7..0000000 --- a/src/csrc/npu_float_status/operator_desc.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2020, Huawei Technologies.All rights reserved. - * - * Licensed under the BSD 3-Clause License (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://opensource.org/licenses/BSD-3-Clause - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef OPERATOR_DESC_H -#define OPERATOR_DESC_H - -#include -#include - -#include - -/** - * Op description - */ -struct OperatorDesc { - /** - * Constructor - * @param [in] opType: op type - */ - explicit OperatorDesc(std::string opType); - - /** - * Destructor - */ - ~OperatorDesc(); - - /** - * Add an input tensor description - * @param [in] dataType: data type - * @param [in] numDims: number of dims - * @param [in] dims: dims - * @param [in] format: format - * @return OperatorDesc - */ - OperatorDesc &AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format); - - /** - * Add an output tensor description - * @param [in] dataType: data type - * @param [in] numDims: number of dims - * @param [in] dims: dims - * @param [in] format: format - * @return OperatorDesc - */ - OperatorDesc &AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format); - - std::string opType; - std::vector inputDesc; - std::vector outputDesc; - aclopAttr *opAttr; -}; - -#endif // OPERATOR_DESC_H -- Gitee