diff --git a/CMakeLists.txt b/CMakeLists.txt index dc85f683c112e6ec1b5264671c0b6a58a42a31f7..c82a6bf726ff292bb96d2d73ea23f00763dacab3 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,8 +4,9 @@ if (ENABLE_OPEN_SRC) project(TFAdapter) set(CMAKE_CXX_STANDARD 11) - set(CMAKE_C_FLAGS "-O2 -Wall -fPIC -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack -pipe ${CMAKE_C_FLAGS}") - set(CMAKE_CXX_FLAGS "-std=c++11 -O2 -Wall -fPIC -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack -pipe ${CMAKE_CXX_FLAGS}") + set(CMAKE_C_FLAGS "-O2 -DNDEBUG -Wall -fPIC -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack -pipe ${CMAKE_C_FLAGS}") + set(CMAKE_CXX_FLAGS "-std=c++11 -O2 -DNDEBUG -Wall -fPIC -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack -pipe ${CMAKE_CXX_FLAGS}") + add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) # build external prjects if(DEFINED ENV{D_PKG_SERVER}) set(TF_PKG_SERVER $ENV{D_PKG_SERVER}) @@ -18,7 +19,7 @@ if (ENABLE_OPEN_SRC) include_directories(${CMAKE_CURRENT_LIST_DIR}/inc) include_directories(${CMAKE_CURRENT_LIST_DIR}/inc/external) include_directories(${CMAKE_CURRENT_LIST_DIR}/inc/soft_dp) - + if (NOT EXISTS ${CMAKE_CURRENT_LIST_DIR}/tools/COMPILE_FLAGS OR NOT EXISTS ${CMAKE_CURRENT_LIST_DIR}/tools/LINK_FLAGS OR NOT EXISTS ${CMAKE_CURRENT_LIST_DIR}/tools/PYTHON_BIN_PATH OR NOT EXISTS diff --git a/configure.py b/configure.py index ce7a3ec12f6b1987ee126df9075632ee4c8aeb72..43f64415f784fb4dfd1991ee155342ae484fd65e 100755 --- a/configure.py +++ b/configure.py @@ -111,6 +111,7 @@ def setup_python(): for flag in compile_args[2:-1]: f.write(flag + '\n') f.write("-I" + compile_args[-1] + '\n') + print('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb: %s.' % compile_args[1]) with open(real_config_path('LINK_FLAGS'), 'w') as f: f.write(os.path.join(compile_args[1], 'libtensorflow_framework.so.1\n')) f.write(os.path.join(compile_args[1], 'python', '_pywrap_tensorflow_internal.so\n')) @@ -131,15 +132,14 @@ def setup_ascend(env_path): break elif not os.path.exists(ascend_path): print('Invalid ascend path: %s cannot be found.' % ascend_path) - + print('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa: %s.' % ascend_path) with open(real_config_path('LINK_FLAGS'), 'a') as f: + f.write(os.path.join(ascend_path, "driver", "lib64", "driver", "libtsdclient.so\n")) + f.write(os.path.join(ascend_path, "driver", "lib64", "common", "libc_sec.so\n")) f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libge_runner.so\n")) f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libfmk_parser.so\n")) f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libdatatransfer.so\n")) f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libindextransform.so\n")) - f.write(os.path.join(ascend_path, "driver", "lib64", "driver", "libtsdclient.so\n")) - f.write(os.path.join(ascend_path, "driver", "lib64", "common", "libc_sec.so\n")) - def setup_swig(): """Get swig install path.""" default_swig_path = which('swig') diff --git a/inc/common/proto/insert_op.proto b/inc/common/proto/insert_op.proto index e28d65326904bf04df4482372256d2ac5ba5faea..b4c969eb695dafe4cfb0dd4a6e4f3281ca4150ef 100644 --- a/inc/common/proto/insert_op.proto +++ b/inc/common/proto/insert_op.proto @@ -38,28 +38,31 @@ message AippOpParams { dynamic = 2; } - // AIPP模式,区分静态AIPP和动态AIPP + // AIPP妯″紡锛屽尯鍒嗛潤鎬丄IPP鍜屽姩鎬丄IPP AippMode aipp_mode = 1; - // related_input_rank参数为必填,类型为整型,配置范围>=0, <=输入Data算子的个数,默认值为0。 - // 标识对模型的第几个输入做AIPP处理,例如模型有两个输入,需要对第2个输入做AIPP,则配置related_input_rank为1。 + // related_input_rank鍙傛暟涓哄繀濉紝绫诲瀷涓烘暣鍨嬶紝閰嶇疆鑼冨洿>=0, <=杈撳叆Data绠楀瓙鐨勪釜鏁帮紝榛樿鍊间负0銆 + // 鏍囪瘑瀵规ā鍨嬬殑绗嚑涓緭鍏ュ仛AIPP澶勭悊锛屼緥濡傛ā鍨嬫湁涓や釜杈撳叆锛岄渶瑕佸绗2涓緭鍏ュ仛AIPP锛屽垯閰嶇疆related_input_rank涓1銆 uint32 related_input_rank = 2; - // input_edge_idx参数为可选,类型为整型,配置范围为>=0。 - // 配置该参数的作用,在于对Data算子不同的输出做不同的AIPP处理,如果该参数没有配置,默认对related_input_rank指定的模型输入的所有输出边做AIPP。 - // 配置值 <= Data算子输出边的个数。 + // related_input_name is optional and the top name of data node which inserts aipp + string related_input_name = 6; + + // input_edge_idx鍙傛暟涓哄彲閫夛紝绫诲瀷涓烘暣鍨嬶紝閰嶇疆鑼冨洿涓>=0銆 + // 閰嶇疆璇ュ弬鏁扮殑浣滅敤锛屽湪浜庡Data绠楀瓙涓嶅悓鐨勮緭鍑哄仛涓嶅悓鐨凙IPP澶勭悊锛屽鏋滆鍙傛暟娌℃湁閰嶇疆锛岄粯璁ゅrelated_input_rank鎸囧畾鐨勬ā鍨嬭緭鍏ョ殑鎵鏈夎緭鍑鸿竟鍋欰IPP銆 + // 閰嶇疆鍊 <= Data绠楀瓙杈撳嚭杈圭殑涓暟銆 repeated uint32 input_edge_idx = 3; - // [Begin] 动态AIPP参数,配置静态AIPP时无效 + // [Begin] 鍔ㄦ丄IPP鍙傛暟锛岄厤缃潤鎬丄IPP鏃舵棤鏁 uint32 max_src_image_size = 4; - // 是否支持旋转。默认不支持,开启支持旋转时,会有额外的空间和性能损失 + // 鏄惁鏀寔鏃嬭浆銆傞粯璁や笉鏀寔锛屽紑鍚敮鎸佹棆杞椂锛屼細鏈夐澶栫殑绌洪棿鍜屾ц兘鎹熷け bool support_rotation = 5; - // [End] 动态AIPP参数 + // [End] 鍔ㄦ丄IPP鍙傛暟 - // [Begin] 静态AIPP参数,配置动态AIPP时无效 + // [Begin] 闈欐丄IPP鍙傛暟锛岄厤缃姩鎬丄IPP鏃舵棤鏁 InputFormat input_format = 51; bool csc_switch = 52; float cpadding_value = 53; @@ -115,7 +118,7 @@ message AippOpParams { repeated int32 input_bias_1 = 43; repeated int32 input_bias_2 = 44; - // [End] 静态AIPP参数 + // [End] 闈欐丄IPP鍙傛暟 // The n number that is used for raw/rgbir data into f16 transformation. // The transformation equation is x/(2^n). If set to 0, no transform is performed. @@ -124,13 +127,13 @@ message AippOpParams { message MultiShapeOpParams { enum MultiShapeMode { - batch = 0; //动态batch - resolution = 1; //动态分辨率,扩展用 + batch = 0; //鍔ㄦ乥atch + resolution = 1; //鍔ㄦ佸垎杈ㄧ巼锛屾墿灞曠敤 } - MultiShapeMode mode = 1; //算子模式 - uint32 related_input_rank = 2; //新增算子插入到哪个输入 + MultiShapeMode mode = 1; //绠楀瓙妯″紡 + uint32 related_input_rank = 2; //鏂板绠楀瓙鎻掑叆鍒板摢涓緭鍏 - repeated uint32 batch_list = 11; //batch_list值,batch_list的个数是2到8之间 + repeated uint32 batch_list = 11; //batch_list鍊硷紝batch_list鐨勪釜鏁版槸2鍒8涔嬮棿 } diff --git a/inc/common/util/ai_core/common/aicore_util_attr_define.h b/inc/common/util/ai_core/common/aicore_util_attr_define.h index f3556cb501e6b6decea6e27806046e912e8b83b1..297be0b238e49e4ae1775352677d11bf88fa34ca 100644 --- a/inc/common/util/ai_core/common/aicore_util_attr_define.h +++ b/inc/common/util/ai_core/common/aicore_util_attr_define.h @@ -32,6 +32,14 @@ static const std::string L1_OPTIMIZED = "l1_optimized"; static const std::string L2_OPTIMIZED = "l2_optimized"; -static const std::string OP_SLICE_INFO = "_op_slice_info"; +static const std::string ATTR_NAME_UNKNOWN_SHAPE = "_unknown_shape"; + +static const std::string ATTR_NAME_IS_UNKNOWN_GRAPH = "_fe_is_unknown_graph"; + +static const std::string ATTR_NAME_IS_UNKNOWN_SHAPE_OP = "_fe_is_unknown_shape_op"; + +static const std::string ATTR_NAME_TVM_CACHE_READ_MODE = "tvm_cache_read_mode"; + +static const std::string ATTR_NAME_TBE_KERNEL_SIZE = "_tbeKernelSize"; } // namespace fe #endif diff --git a/inc/common/util/ai_core/common/aicore_util_types.h b/inc/common/util/ai_core/common/aicore_util_types.h index e57dbca71e33022ab69ce3a97cf12abd8f834e2a..f5e9d229896e66957781c090dd133b1f6a567692 100644 --- a/inc/common/util/ai_core/common/aicore_util_types.h +++ b/inc/common/util/ai_core/common/aicore_util_types.h @@ -109,6 +109,10 @@ enum OpImplType { EN_RESERVED // reserved value }; +// Dont change the order, only add new mode in the end +enum L2Mode { EN_L2_CLOSE = 0, EN_L2_BUFFER_OPTIMIZE, EN_L2_CACHE_NORMAL, EN_L2_CACHE_RC }; +enum BufferFusionMode { EN_OPTIMIZE_DISABLE = 0, EN_L2_BUFFER, EN_L2_FUSION}; + static const std::map DATATYPE_SIZE_MAP{ {ge::DT_FLOAT, sizeof(float)}, {ge::DT_FLOAT16, sizeof(int16_t)}, diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index 1c6b7a3ef7dbabcd8a85999ec990671cc5852d8e..d4fccffda72d92c115729d3b0c9d7e6bab93fae6 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -222,6 +222,18 @@ const char *const OPTION_GE_MAX_DUMP_OP_NUM = "ge.maxDumpOpNum"; // Its value should be "0" or "1", default value is "1" const char *const ENABLE_PRINT_OP_PASS = "ge.enablePrintOpPass"; +// Configure operator compilation path +// Its value should be file path, default value is "./" +const char *const DEBUG_DIR = "ge.debugDir"; + +// Configure operator compiler cache path +// Its value should be file path, default value is "./" +const char *const OP_COMPILER_CACHE_DIR = "ge.op_compiler_cache_dir"; + +// Configure operator compiler cache mode +// Its value should be "disable", "enable" or "force", default value is "disable" +const char *const OP_COMPILER_CACHE_MODE = "ge.op_compiler_cache_mode"; + // Configure whether to use single stream. // Its value should be "true" or "false", default value is "false" const char *const ENABLE_SINGLE_STREAM = "ge.enableSingleStream"; @@ -298,6 +310,9 @@ namespace ir_option { static const char *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str(); static const char *const LOG_LEVEL = "log"; static const char *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c_str(); + static const char *const DEBUG_DIR = ge::DEBUG_DIR; + static const char *const OP_COMPILER_CACHE_DIR = ge::OP_COMPILER_CACHE_DIR; + static const char *const OP_COMPILER_CACHE_MODE = ge::OP_COMPILER_CACHE_MODE; // for interface: aclgrphBuildModel const std::set ir_builder_suppported_options = { @@ -331,7 +346,10 @@ namespace ir_option { FUSION_SWITCH_FILE, ENABLE_SMALL_CHANNEL, OP_SELECT_IMPL_MODE, - OPTYPELIST_FOR_IMPLMODE + OPTYPELIST_FOR_IMPLMODE, + DEBUG_DIR, + OP_COMPILER_CACHE_DIR, + OP_COMPILER_CACHE_MODE }; } } // namespace ge diff --git a/inc/external/graph/gnode.h b/inc/external/graph/gnode.h index 5465293f8e436f0f645bf6198fe3db9a3bdde96d..882e6d28ead4447856c2470ae81aa81bfc130b2f 100644 --- a/inc/external/graph/gnode.h +++ b/inc/external/graph/gnode.h @@ -116,9 +116,9 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GNode { bool HasAttr(const ge::AscendString &name); - graphStatus GetSubgraph(uint32_t index, GraphPtr graph) const; + graphStatus GetSubgraph(uint32_t index, GraphPtr &graph) const; - graphStatus GetALLSubgraphs(std::vector graph_list) const; + graphStatus GetALLSubgraphs(std::vector &graph_list) const; private: std::shared_ptr impl_; diff --git a/inc/external/hccl/hccl_types.h b/inc/external/hccl/hccl_types.h index d67af11e65c24034167ed488eae6af5e701faf68..de982588a3debf67694e08dfa5889fad0cdd4e28 100644 --- a/inc/external/hccl/hccl_types.h +++ b/inc/external/hccl/hccl_types.h @@ -19,20 +19,77 @@ #include -#ifdef _cplusplus +#ifdef __cplusplus extern "C" { -#endif +#endif // __cplusplus +/** + * @brief HCCL functions return value definition + */ typedef enum { + HCCL_SUCCESS = 0, /**< success */ + HCCL_E_PARA = 1, /**< parameter error */ + HCCL_E_PTR = 2, /**< empty pointer */ + HCCL_E_MEMORY = 3, /**< memory error */ + HCCL_E_INTERNAL = 4, /**< internal error */ + HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ + HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ + HCCL_E_UNAVAIL = 7, /**< resource unavailable */ + HCCL_E_SYSCALL = 8, /**< call system interface error */ + HCCL_E_TIMEOUT = 9, /**< timeout */ + HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ + HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ + HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ + HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ + HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ + HCCL_E_RUNTIME = 15, /**< call runtime api fail */ + HCCL_E_DRV = 16, /**< call driver api fail */ + HCCL_E_PROFILING = 17, /**< call profiling api fail */ + HCCL_E_CCE = 18, /**< call cce api fail */ + HCCL_E_NETWORK = 19, /**< call network api fail */ + HCCL_E_RESERVED /**< reserved */ } HcclResult; +/** + * @brief handle to HCCL communicator + */ +typedef void *HcclComm; + +/** + * @brief HCCL Reduction opperation + */ typedef enum { + HCCL_REDUCE_SUM = 0, /**< sum */ + HCCL_REDUCE_PROD = 1, /**< prod */ + HCCL_REDUCE_MAX = 2, /**< max */ + HCCL_REDUCE_MIN = 3, /**< min */ + HCCL_REDUCE_RESERVED /**< reserved */ } HcclReduceOp; +/** + * @brief HCCL data type + */ typedef enum { + HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ + HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ + HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ + HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ + HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ + HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ + HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ + HCCL_DATA_TYPE_RESERVED /**< reserved */ } HcclDataType; -#ifdef _cplusplus +const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length + +/** + * @brief HCCL root info + */ +typedef struct HcclRootInfoDef { + char internal[HCCL_ROOT_INFO_BYTES]; +} HcclRootInfo; + +#ifdef __cplusplus } #endif #endif diff --git a/inc/framework/ge_runtime/task_info.h b/inc/framework/ge_runtime/task_info.h index e36c43334033b1011cd687b3ae4f7b1df9214f97..2b34c93fa0bfd6da52ff9ca9bccdac47cd9ee073 100644 --- a/inc/framework/ge_runtime/task_info.h +++ b/inc/framework/ge_runtime/task_info.h @@ -161,13 +161,12 @@ class TbeTaskInfo : public TaskInfo { class AicpuTaskInfo : public TaskInfo { public: AicpuTaskInfo(const std::string &op_name, uint32_t stream_id, const string &so_name, const std::string &kernel_name, - const std::string &node_def, const std::string &ext_info, const std::vector &input_data_addrs, + const std::string &node_def, const std::vector &input_data_addrs, const std::vector &output_data_addrs, bool dump_flag) : TaskInfo(op_name, stream_id, TaskInfoType::AICPU, dump_flag), so_name_(so_name), kernel_name_(kernel_name), node_def_(node_def), - ext_info_(ext_info), input_data_addrs_(input_data_addrs), output_data_addrs_(output_data_addrs) {} ~AicpuTaskInfo() override {} @@ -177,13 +176,11 @@ class AicpuTaskInfo : public TaskInfo { const std::string &node_def() const { return node_def_; } const std::vector &input_data_addrs() const { return input_data_addrs_; } const std::vector &output_data_addrs() const { return output_data_addrs_; } - const std::string &ext_info() const { return ext_info_; } private: std::string so_name_; std::string kernel_name_; std::string node_def_; - std::string ext_info_; std::vector input_data_addrs_; std::vector output_data_addrs_; }; @@ -308,7 +305,7 @@ class HcclTaskInfo : public TaskInfo { int64_t root_id() const { return root_id_; } int64_t op_type() const { return op_type_; } int64_t data_type() const { return data_type_; } - const std::string &group() const { return group_; } + const std::string group() const { return group_; } std::function hcom_bind_model() const { return hcom_bind_model_; } std::function hcom_unbind_model() const { return hcom_unbind_model_; } std::function, void *)> hcom_distribute_task() const { diff --git a/inc/framework/generator/ge_generator.h b/inc/framework/generator/ge_generator.h index 4902a0213912ee21e075438e257027598ebfafaa..a7bf4a38aafea233decd2b762ac14752f24a2f58 100644 --- a/inc/framework/generator/ge_generator.h +++ b/inc/framework/generator/ge_generator.h @@ -88,6 +88,8 @@ class GeGenerator { const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, bool is_offline = true); + Status CheckForSingleOp(OpDescPtr &op_desc, const vector &inputs, const vector &outputs); + class Impl; std::shared_ptr impl_; diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h index c48d164944a5e063c6afb7312d07e624f5c4d98b..6cc4afd35b8c693981fcd586ee4e655736a6750c 100644 --- a/inc/framework/omg/omg_inner_types.h +++ b/inc/framework/omg/omg_inner_types.h @@ -100,6 +100,8 @@ struct OmgContext { std::vector net_out_nodes; // net out nodes top names(only caffe has top) std::vector out_top_names; + // net data nodes top names(only caffe has top) + std::vector data_top_names; // preferential format used by the entire network domiTensorFormat_t net_format = DOMI_TENSOR_RESERVED; domi::FrameworkType type = domi::FRAMEWORK_RESERVED; diff --git a/inc/framework/omg/parser/parser_inner_ctx.h b/inc/framework/omg/parser/parser_inner_ctx.h index b57420eb1a98464e7417a8dfdb574e38b0fb55d9..31a519ef670736fa84de5685cf23a7673c6478a0 100644 --- a/inc/framework/omg/parser/parser_inner_ctx.h +++ b/inc/framework/omg/parser/parser_inner_ctx.h @@ -49,6 +49,8 @@ struct ParserContext { std::vector user_out_nodes_top_vec; // net out nodes (where user_out_nodes or leaf nodes) std::vector net_out_nodes; + // net data nodes top names(only caffe has top) + std::vector data_top_names; // net out nodes top names(only caffe has top) std::vector out_top_names; // Whether to use dynamic batch size or dynamic image size diff --git a/inc/graph/compute_graph.h b/inc/graph/compute_graph.h index 90a4c743abdeb147e4e1f4f497590825e9b67f6c..33227bb33fd8750bdb1096147be2bb04bd13b4b1 100644 --- a/inc/graph/compute_graph.h +++ b/inc/graph/compute_graph.h @@ -249,11 +249,16 @@ class ComputeGraph : public std::enable_shared_from_this, public A private: graphStatus DFSTopologicalSorting(std::vector &node_vec, std::map &map_in_edge_num, - std::vector &stack); + std::vector &stack, bool reverse); graphStatus BFSTopologicalSorting(std::vector &node_vec, std::map &map_in_edge_num, std::deque &stack); graphStatus CollectBreadthOutNode(const NodePtr &node, std::map &map_in_edge_num, std::map &breadth_node_map); + /// nodes like : (a) <--- (c) ---> (b) + /// node a and b have only one parent node c, and a is connected to c firstly + /// topo order of DFS is `c, b, a` with `dfs_reverse=false` as default + /// in same case, user could get `c, a, b` with `dfs_reverse=true` + graphStatus TopologicalSortingGraph(bool dfs_reverse = false); graphStatus TopologicalSortingGraph(); graphStatus SortNodes(std::vector &stack, std::map &mapInEdgeNum); Vistor AllGraphNodes(std::vector> &subgraphs) const; diff --git a/inc/graph/debug/ge_attr_define.h b/inc/graph/debug/ge_attr_define.h index 334abd1ddf83b061295fa5c8c9b054b76f18b93c..208263822d2d693628af489b3926191db9db2c8d 100644 --- a/inc/graph/debug/ge_attr_define.h +++ b/inc/graph/debug/ge_attr_define.h @@ -190,6 +190,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MOD GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AUTOMIC_ADD_START; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_LABEL; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_RTS_LABEL_NODE; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS; @@ -783,8 +784,6 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_TASK_GEN_VAR_ADDR; -GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_LABEL; - GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_CONTINUOUS_STREAM_LABEL; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_VAR_SIZE; diff --git a/inc/graph/ge_local_context.h b/inc/graph/ge_local_context.h index 36beaa7989dcdf3c3909f4abaed140ee38515091..04060ba48c462a24ab6deae881e93421f90bdd6d 100644 --- a/inc/graph/ge_local_context.h +++ b/inc/graph/ge_local_context.h @@ -32,6 +32,12 @@ class GEThreadLocalContext { void SetSessionOption(map options_map); void SetGlobalOption(map options_map); + map GetAllGraphOptions() const; + map GetAllSessionOptions() const; + map GetAllGlobalOptions() const; + map GetAllOptions() const; + + private: map graph_options_; map session_options_; diff --git a/inc/graph/node.h b/inc/graph/node.h index 1b33554152cd402135e8f4cfef8593f0dc360f80..66a9ebd102931093039e6e0e0eee33a4a9b0e595 100644 --- a/inc/graph/node.h +++ b/inc/graph/node.h @@ -95,7 +95,7 @@ class Node : public std::enable_shared_from_this { ComputeGraphPtr GetOwnerComputeGraph() const; graphStatus SetOwnerComputeGraph(const ComputeGraphPtr &graph); - + graphStatus SetAnyOwnerComputeGraph(const ComputeGraphPtr &graph); Vistor GetAllInDataAnchors() const; Vistor GetAllOutDataAnchors() const; uint32_t GetAllInDataAnchorsSize() const; diff --git a/inc/graph/op_desc.h b/inc/graph/op_desc.h index 42bf6c67ade7547b7e7f12694dbdf215c758cfcc..8a88ab97cd161c0ab83a79b972865c5e52991751 100644 --- a/inc/graph/op_desc.h +++ b/inc/graph/op_desc.h @@ -282,6 +282,8 @@ class OpDesc : public std::enable_shared_from_this, public AttrHolder { graphStatus GetSubgraphNameByInstanceName(const std::string &instance_name, std::string &subgraph_name) const; + graphStatus InferDataSlice(); + protected: ProtoAttrMapHelper MutableAttrMap() override; ConstProtoAttrMapHelper GetAttrMap() const override; @@ -321,6 +323,7 @@ class OpDesc : public std::enable_shared_from_this, public AttrHolder { std::function infer_func_ = nullptr; std::function infer_format_func_ = nullptr; std::function verifier_func_ = nullptr; + std::function infer_data_slice_func_ = nullptr; string op_kernel_lib_name_; string engine_name_; friend class OpDescUtils; diff --git a/inc/graph/utils/graph_utils.h b/inc/graph/utils/graph_utils.h index 0e7adfe055a51e36a0687249376d7933e92e280b..bdff00b73ea29737f41d0dc6d537222f2e0d35ac 100644 --- a/inc/graph/utils/graph_utils.h +++ b/inc/graph/utils/graph_utils.h @@ -145,6 +145,8 @@ class GraphUtils { static ComputeGraphPtr CreateGraphFromOperator(const string &name, const std::vector &inputs); + static GraphPtr CreateGraphPtrFromComputeGraph(const ComputeGraphPtr compute_graph); + static graphStatus AddEdge(const OutDataAnchorPtr &src, const InDataAnchorPtr &dst); static graphStatus AddEdge(const OutDataAnchorPtr &src, const Format &src_format, const InDataAnchorPtr &dst, diff --git a/inc/hccl/base.h b/inc/hccl/base.h index 94253bf4a72f21ff472ea8cc7ccce18056083b64..8e27d6b809f3206fc3234c79417285bcad93a9a7 100644 --- a/inc/hccl/base.h +++ b/inc/hccl/base.h @@ -49,7 +49,7 @@ typedef enum { HOROVOD_REDUCE_RESERVED /**< reserved */ } HorovodReduceOp; -const u32 HCCL_MAX_SEGMENT_NUM = 8; // The max number of gradient segments. +const u32 HCCL_MAX_SEGMENT_NUM = 32; // The max number of gradient segments. /** * @brief the feature of the model