From 9afc005239d0a0e7272b4b7dfb5d7e6ac761ead7 Mon Sep 17 00:00:00 2001 From: yanqingshang Date: Thu, 5 Nov 2020 11:19:14 +0800 Subject: [PATCH 1/2] update CMakeLists.txt. --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index dc85f683c..433104634 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,7 @@ if (ENABLE_OPEN_SRC) include_directories(${CMAKE_CURRENT_LIST_DIR}) include_directories(${CMAKE_CURRENT_LIST_DIR}/inc) include_directories(${CMAKE_CURRENT_LIST_DIR}/inc/external) + include_directories(${CMAKE_CURRENT_LIST_DIR}/inc/common) include_directories(${CMAKE_CURRENT_LIST_DIR}/inc/soft_dp) if (NOT EXISTS ${CMAKE_CURRENT_LIST_DIR}/tools/COMPILE_FLAGS OR NOT EXISTS -- Gitee From 2a8a193894964cef998bf663cb6486c3ffcd0643 Mon Sep 17 00:00:00 2001 From: yanqingshang Date: Thu, 5 Nov 2020 14:31:52 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E5=90=8C=E6=AD=A5=E5=A4=B4=E6=96=87?= =?UTF-8?q?=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- inc/common/opskernel/ops_kernel_info_store.h | 13 ----- inc/common/proto/insert_op.proto | 35 ++++++----- .../util/ai_core/common/aicore_util_types.h | 4 ++ inc/external/graph/gnode.h | 4 +- inc/external/hccl/hccl_types.h | 58 ++++++++++++++++++- inc/hccl/base.h | 6 +- 6 files changed, 85 insertions(+), 35 deletions(-) diff --git a/inc/common/opskernel/ops_kernel_info_store.h b/inc/common/opskernel/ops_kernel_info_store.h index 207ec4e05..330c67b62 100644 --- a/inc/common/opskernel/ops_kernel_info_store.h +++ b/inc/common/opskernel/ops_kernel_info_store.h @@ -65,24 +65,11 @@ class OpsKernelInfoStore { // opsFlag opsFlag[0] indicates constant folding is supported or not virtual void opsFlagCheck(const ge::Node &node, std::string &opsFlag) {}; - // memory allocation requirement - virtual Status CalcOpRunningParam(Node &node) = 0; /*lint -e148*/ - - // generate task for op。 - virtual Status GenerateTask(const Node &node, RunContext &context, - std::vector &tasks) = 0; /*lint -e148*/ - // only call fe engine interface to compile single op virtual Status CompileOp(vector &node_vec) { return SUCCESS; } virtual Status CompileOpRun(vector &node_vec) { return SUCCESS; } // load task for op virtual Status LoadTask(GETaskInfo &task) { return SUCCESS; } - - // only call aicpu interface to generate task struct - virtual Status GenSingleOpRunTask(const NodePtr &node, STR_FWK_OP_KERNEL &task, string &task_info) { return SUCCESS; } - - // only call aicpu interface to generate task struct - virtual Status GenMemCopyTask(uint64_t count, STR_FWK_OP_KERNEL &task, string &task_info) { return SUCCESS; } }; } // namespace ge #endif // INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_STORE_H_ diff --git a/inc/common/proto/insert_op.proto b/inc/common/proto/insert_op.proto index e28d65326..b4c969eb6 100644 --- a/inc/common/proto/insert_op.proto +++ b/inc/common/proto/insert_op.proto @@ -38,28 +38,31 @@ message AippOpParams { dynamic = 2; } - // AIPPģʽ־̬AIPPͶ̬AIPP + // AIPP模式,区分静态AIPP和动态AIPP AippMode aipp_mode = 1; - // related_input_rankΪΪͣ÷Χ>=0, <=DataӵĸĬֵΪ0 - // ʶģ͵ĵڼAIPPģ룬ҪԵ2AIPPrelated_input_rankΪ1 + // related_input_rank参数为必填,类型为整型,配置范围>=0, <=输入Data算子的个数,默认值为0。 + // 标识对模型的第几个输入做AIPP处理,例如模型有两个输入,需要对第2个输入做AIPP,则配置related_input_rank为1。 uint32 related_input_rank = 2; - // input_edge_idxΪѡΪͣ÷ΧΪ>=0 - // øòãڶDataӲͬͬAIPPòûãĬ϶related_input_rankָģAIPP - // ֵ <= Dataߵĸ + // related_input_name is optional and the top name of data node which inserts aipp + string related_input_name = 6; + + // input_edge_idx参数为可选,类型为整型,配置范围为>=0。 + // 配置该参数的作用,在于对Data算子不同的输出做不同的AIPP处理,如果该参数没有配置,默认对related_input_rank指定的模型输入的所有输出边做AIPP。 + // 配置值 <= Data算子输出边的个数。 repeated uint32 input_edge_idx = 3; - // [Begin] ̬AIPPþ̬AIPPʱЧ + // [Begin] 动态AIPP参数,配置静态AIPP时无效 uint32 max_src_image_size = 4; - // Ƿ֧תĬϲ֧֣֧תʱжĿռʧ + // 是否支持旋转。默认不支持,开启支持旋转时,会有额外的空间和性能损失 bool support_rotation = 5; - // [End] ̬AIPP + // [End] 动态AIPP参数 - // [Begin] ̬AIPPö̬AIPPʱЧ + // [Begin] 静态AIPP参数,配置动态AIPP时无效 InputFormat input_format = 51; bool csc_switch = 52; float cpadding_value = 53; @@ -115,7 +118,7 @@ message AippOpParams { repeated int32 input_bias_1 = 43; repeated int32 input_bias_2 = 44; - // [End] ̬AIPP + // [End] 静态AIPP参数 // The n number that is used for raw/rgbir data into f16 transformation. // The transformation equation is x/(2^n). If set to 0, no transform is performed. @@ -124,13 +127,13 @@ message AippOpParams { message MultiShapeOpParams { enum MultiShapeMode { - batch = 0; //̬batch - resolution = 1; //ֱ̬ʣչ + batch = 0; //动态batch + resolution = 1; //动态分辨率,扩展用 } - MultiShapeMode mode = 1; //ģʽ - uint32 related_input_rank = 2; //Ӳ뵽ĸ + MultiShapeMode mode = 1; //算子模式 + uint32 related_input_rank = 2; //新增算子插入到哪个输入 - repeated uint32 batch_list = 11; //batch_listֵbatch_listĸ28֮ + repeated uint32 batch_list = 11; //batch_list值,batch_list的个数是2到8之间 } diff --git a/inc/common/util/ai_core/common/aicore_util_types.h b/inc/common/util/ai_core/common/aicore_util_types.h index e57dbca71..f5e9d2298 100644 --- a/inc/common/util/ai_core/common/aicore_util_types.h +++ b/inc/common/util/ai_core/common/aicore_util_types.h @@ -109,6 +109,10 @@ enum OpImplType { EN_RESERVED // reserved value }; +// Dont change the order, only add new mode in the end +enum L2Mode { EN_L2_CLOSE = 0, EN_L2_BUFFER_OPTIMIZE, EN_L2_CACHE_NORMAL, EN_L2_CACHE_RC }; +enum BufferFusionMode { EN_OPTIMIZE_DISABLE = 0, EN_L2_BUFFER, EN_L2_FUSION}; + static const std::map DATATYPE_SIZE_MAP{ {ge::DT_FLOAT, sizeof(float)}, {ge::DT_FLOAT16, sizeof(int16_t)}, diff --git a/inc/external/graph/gnode.h b/inc/external/graph/gnode.h index 5465293f8..882e6d28e 100644 --- a/inc/external/graph/gnode.h +++ b/inc/external/graph/gnode.h @@ -116,9 +116,9 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GNode { bool HasAttr(const ge::AscendString &name); - graphStatus GetSubgraph(uint32_t index, GraphPtr graph) const; + graphStatus GetSubgraph(uint32_t index, GraphPtr &graph) const; - graphStatus GetALLSubgraphs(std::vector graph_list) const; + graphStatus GetALLSubgraphs(std::vector &graph_list) const; private: std::shared_ptr impl_; diff --git a/inc/external/hccl/hccl_types.h b/inc/external/hccl/hccl_types.h index d67af11e6..258374f33 100644 --- a/inc/external/hccl/hccl_types.h +++ b/inc/external/hccl/hccl_types.h @@ -19,20 +19,74 @@ #include -#ifdef _cplusplus +#ifdef __cplusplus extern "C" { #endif typedef enum { + HCCL_SUCCESS = 0, /**< success */ + HCCL_E_PARA = 1, /**< parameter error */ + HCCL_E_PTR = 2, /**< empty pointer */ + HCCL_E_MEMORY = 3, /**< memory error */ + HCCL_E_INTERNAL = 4, /**< internal error */ + HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ + HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ + HCCL_E_UNAVAIL = 7, /**< resource unavailable */ + HCCL_E_SYSCALL = 8, /**< call system interface error */ + HCCL_E_TIMEOUT = 9, /**< timeout */ + HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ + HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ + HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ + HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ + HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ + HCCL_E_RUNTIME = 15, /**< call runtime api fail */ + HCCL_E_DRV = 16, /**< call driver api fail */ + HCCL_E_PROFILING = 17, /**< call profiling api fail */ + HCCL_E_CCE = 18, /**< call cce api fail */ + HCCL_E_NETWORK = 19, /**< call network api fail */ + HCCL_E_RESERVED /**< reserved */ } HcclResult; +/** + * @brief handle to HCCL communicator + */ +typedef void *HcclComm; + +/** + * @brief HCCL Reduction opperation + */ typedef enum { + HCCL_REDUCE_SUM = 0, /**< sum */ + HCCL_REDUCE_PROD = 1, /**< prod */ + HCCL_REDUCE_MAX = 2, /**< max */ + HCCL_REDUCE_MIN = 3, /**< min */ + HCCL_REDUCE_RESERVED /**< reserved */ } HcclReduceOp; +/** + * @brief HCCL data type + */ typedef enum { + HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ + HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ + HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ + HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ + HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ + HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ + HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ + HCCL_DATA_TYPE_RESERVED /**< reserved */ } HcclDataType; -#ifdef _cplusplus +const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length + +/** + * @brief HCCL root info + */ +typedef struct HcclRootInfoDef { + char internal[HCCL_ROOT_INFO_BYTES]; +} HcclRootInfo; + +#ifdef __cplusplus } #endif #endif diff --git a/inc/hccl/base.h b/inc/hccl/base.h index 94253bf4a..8194097e2 100644 --- a/inc/hccl/base.h +++ b/inc/hccl/base.h @@ -68,8 +68,10 @@ struct MemRegisterAddr { u64 addr; u64 length; }; - -const u32 HCCL_MAX_MEM_REGISTER_NUM = 8; // The max number of memory register address. +/* + * @brief The max number of memory register address for remote access. + */ +const u32 HCCL_MAX_MEM_REGISTER_NUM = 32; enum GradSplitForceMode { FORCE_NONE, /**< no force */ -- Gitee