From 2a83ed5d2c87134ef28124ec0229d28ea8f29ce2 Mon Sep 17 00:00:00 2001 From: wqtshg Date: Thu, 20 Aug 2020 20:06:34 +0800 Subject: [PATCH] add tf adapter code --- .bazelrc | 31 + .gitignore | 6 + BUILD | 1 + WORKSPACE.tpl | 60 + inc/common/blocking_queue.h | 141 ++ inc/common/dynamic_aipp.h | 104 + inc/common/npu_error_define.h | 94 + inc/common/opskernel/ge_task_info.h | 76 + inc/common/opskernel/ops_kernel_info_store.h | 88 + inc/common/opskernel/ops_kernel_info_types.h | 66 + inc/common/optimizer/graph_optimizer.h | 74 + inc/common/optimizer/graph_optimizer_types.h | 34 + inc/common/proto/fusion_model.proto | 20 + inc/common/proto/ge_api.proto | 88 + inc/common/proto/ge_ir.proto | 190 ++ inc/common/proto/insert_op.proto | 126 + inc/common/proto/om.proto | 396 +++ inc/common/proto/op_mapping_info.proto | 62 + inc/common/proto/optimizer_priority.proto | 7 + inc/common/proto/task.proto | 165 ++ .../ai_core/common/aicore_util_attr_define.h | 35 + .../util/ai_core/common/aicore_util_types.h | 115 + inc/common/util/ai_core/common/graph_comm.h | 127 + .../util/ai_core/common/scope_allocator.h | 36 + .../param_calculate/aicore_param_calculator.h | 29 + .../param_calculate/tensorsize_calculator.h | 42 + inc/common/util/compress/compress.h | 46 + inc/common/util/compress/compress_weight.h | 28 + inc/common/util/error_manager/error_manager.h | 94 + inc/common/util/platform_info.h | 115 + inc/common/util/platform_info_def.h | 144 ++ inc/external/ge/ge_api.h | 121 + inc/external/ge/ge_api_error_codes.h | 76 + inc/external/ge/ge_api_types.h | 335 +++ inc/external/ge/ge_ir_build.h | 93 + inc/external/graph/attr_value.h | 75 + inc/external/graph/ge_error_codes.h | 38 + inc/external/graph/graph.h | 81 + inc/external/graph/inference_context.h | 76 + inc/external/graph/operator.h | 286 +++ inc/external/graph/operator_factory.h | 68 + inc/external/graph/operator_reg.h | 384 +++ inc/external/graph/tensor.h | 131 + inc/external/graph/types.h | 240 ++ inc/external/register/register.h | 139 ++ inc/external/register/register_error_codes.h | 39 + inc/external/register/register_fmk_types.h | 37 + inc/external/register/register_types.h | 59 + .../scope/scope_fusion_pass_register.h | 283 +++ inc/framework/common/aicpu_op.h | 22 + inc/framework/common/debug/ge_log.h | 85 + inc/framework/common/debug/log.h | 256 ++ inc/framework/common/fmk_error_codes.h | 85 + inc/framework/common/fmk_types.h | 23 + inc/framework/common/ge_format_util.h | 40 + inc/framework/common/ge_inner_error_codes.h | 313 +++ inc/framework/common/ge_types.h | 276 ++ inc/framework/common/gflags_util.h | 71 + inc/framework/common/helper/model_helper.h | 72 + inc/framework/common/helper/om_file_helper.h | 93 + inc/framework/common/l2_cache_optimize.h | 123 + inc/framework/common/op/attr_value_util.h | 161 ++ inc/framework/common/op/ge_op_utils.h | 295 +++ inc/framework/common/op/op_parser_util.h | 425 ++++ inc/framework/common/op_types.h | 62 + inc/framework/common/scope_guard.h | 59 + inc/framework/common/string_util.h | 156 ++ inc/framework/common/types.h | 1101 ++++++++ inc/framework/common/util.h | 395 +++ inc/framework/engine/dnnengine.h | 56 + inc/framework/executor/ge_executor.h | 275 ++ inc/framework/ge_runtime/davinci_model.h | 113 + inc/framework/ge_runtime/model_runner.h | 71 + inc/framework/ge_runtime/op_info.h | 72 + inc/framework/ge_runtime/task_info.h | 419 ++++ inc/framework/generator/ge_generator.h | 95 + inc/framework/generator/generator_api.h | 172 ++ inc/framework/memory/memory_api.h | 57 + inc/framework/memory/memory_assigner.h | 42 + inc/framework/omg/omg.h | 120 + inc/framework/omg/omg_inner_types.h | 144 ++ inc/framework/omg/omg_types.h | 22 + inc/framework/omg/parser/model_parser.h | 111 + inc/framework/omg/parser/op_parser.h | 92 + inc/framework/omg/parser/parser_api.h | 31 + inc/framework/omg/parser/parser_factory.h | 138 + inc/framework/omg/parser/parser_inner_ctx.h | 43 + inc/framework/omg/parser/weights_parser.h | 74 + inc/framework/omg/version.h | 45 + inc/graph/anchor.h | 284 +++ inc/graph/attr_value_serializable.h | 189 ++ inc/graph/buffer.h | 82 + inc/graph/compute_graph.h | 305 +++ inc/graph/debug/ge_attr_define.h | 1120 +++++++++ inc/graph/def_types.h | 196 ++ inc/graph/detail/any_map.h | 122 + inc/graph/detail/attributes_holder.h | 165 ++ inc/graph/detail/model_serialize_imp.h | 93 + inc/graph/ge_attr_value.h | 344 +++ inc/graph/ge_context.h | 44 + inc/graph/ge_global_options.h | 25 + inc/graph/ge_local_context.h | 43 + inc/graph/ge_tensor.h | 193 ++ inc/graph/graph_util.h | 134 + inc/graph/model.h | 94 + inc/graph/model_serialize.h | 52 + inc/graph/node.h | 208 ++ inc/graph/op_desc.h | 322 +++ inc/graph/op_kernel_bin.h | 47 + inc/graph/operator_factory_impl.h | 56 + inc/graph/opsproto_manager.h | 44 + inc/graph/range_vistor.h | 53 + inc/graph/ref_relation.h | 79 + inc/graph/runtime_inference_context.h | 46 + inc/graph/shape_refiner.h | 40 + inc/graph/usr_types.h | 134 + inc/graph/utils/anchor_utils.h | 45 + inc/graph/utils/attr_utils.h | 151 ++ inc/graph/utils/graph_utils.h | 770 ++++++ inc/graph/utils/node_utils.h | 152 ++ inc/graph/utils/op_desc_utils.h | 181 ++ inc/graph/utils/tensor_adapter.h | 43 + inc/graph/utils/tensor_utils.h | 77 + inc/graph/utils/type_utils.h | 52 + inc/hccl/base.h | 147 ++ inc/hccl/cltm.h | 56 + inc/hccl/hccl_op_base.h | 132 + inc/hccl/hcom.h | 275 ++ inc/hccl/hcom_ops_stores.h | 68 + inc/hccl/hvd_adapter_pub.h | 37 + .../buffer_fusion/buffer_fusion_constant.h | 74 + .../buffer_fusion/buffer_fusion_pass_base.h | 36 + .../buffer_fusion_pass_registry.h | 66 + .../buffer_fusion/buffer_fusion_pattern.h | 74 + .../fusion_common/graph_pass_util.h | 250 ++ .../fusion_common/pattern_fusion_base_pass.h | 106 + .../fusion_pass_registry.h | 63 + .../graph_fusion/fusion_pattern.h | 172 ++ .../graph_fusion/graph_fusion_pass_base.h | 113 + .../graph_optimizer/graph_fusion/graph_pass.h | 39 + .../graph_optimizer/graph_fusion/pass.h | 48 + .../graph_optimize_register_error_codes.h | 50 + inc/register/op_kernel_registry.h | 49 + inc/register/op_registry.h | 86 + inc/register/op_tiling.h | 133 + inc/register/proto/caffe/caffe.proto | 1802 +++++++++++++ inc/register/proto/onnx/ge_onnx.proto | 563 +++++ .../proto/tensorflow/attr_value.proto | 62 + inc/register/proto/tensorflow/function.proto | 100 + inc/register/proto/tensorflow/graph.proto | 56 + inc/register/proto/tensorflow/node_def.proto | 63 + inc/register/proto/tensorflow/op_def.proto | 164 ++ .../proto/tensorflow/resource_handle.proto | 29 + inc/register/proto/tensorflow/tensor.proto | 94 + .../proto/tensorflow/tensor_shape.proto | 45 + inc/register/proto/tensorflow/types.proto | 74 + inc/register/proto/tensorflow/versions.proto | 31 + inc/register/register.h | 53 + inc/register/register_format_transfer.h | 79 + inc/register/scope/scope_graph_impl.h | 143 ++ inc/register/scope/scope_pass_impl.h | 61 + inc/register/scope/scope_pass_registry_impl.h | 40 + inc/register/scope/scope_pattern_impl.h | 105 + inc/register/tensor_assign.h | 103 + inc/soft_dp/ExternalSoftDp.h | 52 + inc/tdt/data_common.h | 99 + inc/tdt/status.h | 749 ++++++ inc/tdt/tdt_device.h | 53 + inc/tdt/tdt_host_interface.h | 142 ++ inc/tdt/tdt_server.h | 83 + inc/tdt/train_mode.h | 23 + inc/tdt/tsd.h | 99 + inc/tdt/tsd_client.h | 157 ++ inc/toolchain/bbox/bbox_ddr_data.h | 119 + inc/toolchain/bbox/bbox_ddr_data_cloud.h | 1176 +++++++++ inc/toolchain/bbox/bbox_ddr_data_dc.h | 451 ++++ inc/toolchain/bbox/bbox_ddr_data_mdc.h | 467 ++++ inc/toolchain/bbox/bbox_ddr_data_mini.h | 1271 ++++++++++ inc/toolchain/bbox/bbox_proxy.h | 51 + inc/toolchain/bbox/bbox_proxy_config.h | 89 + inc/toolchain/bbox/bbox_proxy_config_dc.h | 262 ++ inc/toolchain/bbox/bbox_proxy_config_mdc.h | 394 +++ inc/toolchain/bbox/device/bbox_pub.h | 313 +++ inc/toolchain/bbox/device/bbox_pub_cloud.h | 281 +++ inc/toolchain/bbox/device/bbox_pub_mini.h | 281 +++ inc/toolchain/bbox/device/bbox_types.h | 63 + inc/toolchain/ide_daemon_api.h | 173 ++ inc/toolchain/ide_tlv.h | 75 + inc/toolchain/prof_acl_api.h | 51 + inc/toolchain/prof_engine.h | 207 ++ inc/toolchain/prof_mgr_core.h | 84 + inc/toolchain/prof_reporter.h | 89 + inc/toolchain/profiler_client.h | 86 + inc/toolchain/slog.h | 368 +++ inc/toolchain/stackcore/stackcore.h | 28 + module.mk | 72 + tf_adapter/BUILD | 119 + tf_adapter/common/common.h | 40 + tf_adapter/exported_symbols.lds | 4 + .../interface_checker/check_interface.py | 534 ++++ tf_adapter/interface_spec/api_hccl_ops.pyh | 9 + tf_adapter/interface_spec/api_image_ops.pyh | 5 + .../interface_spec/api_keras_to_npu.pyh | 4 + tf_adapter/interface_spec/api_npu_config.pyh | 18 + .../interface_spec/api_npu_estimator.pyh | 11 + tf_adapter/interface_spec/api_npu_hook.pyh | 8 + .../api_npu_loss_scale_manager.pyh | 7 + .../api_npu_loss_scale_optimizer.pyh | 4 + tf_adapter/interface_spec/api_npu_ops.pyh | 10 + .../interface_spec/api_npu_optimizer.pyh | 8 + tf_adapter/interface_spec/api_npu_plugin.pyh | 7 + tf_adapter/interface_spec/api_npu_rnn.pyh | 3 + tf_adapter/interface_spec/api_npu_scope.pyh | 3 + tf_adapter/interface_spec/api_util.pyh | 7 + tf_adapter/kernels/adam_apply_one_assign.cc | 33 + .../adam_apply_one_with_decay_assign.cc | 33 + tf_adapter/kernels/basic_lstm_cell.cc | 33 + tf_adapter/kernels/basic_lstm_cell_grad.cc | 57 + tf_adapter/kernels/decode_image_ops.cc | 136 + tf_adapter/kernels/device_queue_dataset_op.cc | 147 ++ tf_adapter/kernels/dp_iterator_ops.cc | 73 + tf_adapter/kernels/dp_iterator_ops.h | 37 + tf_adapter/kernels/dpgroup_dataset_op.cc | 151 ++ tf_adapter/kernels/dropout_ops.cc | 50 + tf_adapter/kernels/geop_dataset_op.cc | 208 ++ tf_adapter/kernels/geop_npu.cc | 921 +++++++ tf_adapter/kernels/geop_npu.h | 111 + tf_adapter/kernels/hccl_ops.cc | 88 + tf_adapter/kernels/host_queue_dataset_op.cc | 444 ++++ tf_adapter/kernels/infeed_outfeed_ops.cc | 178 ++ tf_adapter/kernels/lars_ops.cc | 88 + tf_adapter/kernels/lars_v2_op.cc | 70 + tf_adapter/kernels/log_time_stamp_ops.cc | 36 + tf_adapter/kernels/maxpooling_op.cc | 38 + tf_adapter/kernels/npu_cpu_ops.cc | 27 + tf_adapter/kernels/npu_mixed_precesion_ops.cc | 109 + tf_adapter/kernels/npu_ops.cc | 41 + tf_adapter/kernels/npu_sys_ctl_ops.cc | 144 ++ tf_adapter/kernels/npu_unary_ops.cc | 159 ++ tf_adapter/module.BUILD | 74 + tf_adapter/ops/dropout_domask_grad.cc | 70 + tf_adapter/ops/hccl_ops.cc | 174 ++ tf_adapter/ops/npu_cpu_ops.cc | 46 + tf_adapter/ops/npu_dataset_ops.cc | 65 + tf_adapter/ops/npu_mixed_precesion_ops.cc | 66 + tf_adapter/ops/npu_ops.cc | 394 +++ tf_adapter/ops/npu_unary_ops.cc | 35 + tf_adapter/optimizers/add_input_pass.cc | 147 ++ .../control_flow_conversion_pass.cc | 100 + .../optimizers/control_flow_conversion_pass.h | 36 + .../optimizers/dp_tf_ge_conversion_pass.cc | 1012 ++++++++ .../optimizers/dp_tf_ge_conversion_pass.h | 36 + .../optimizers/get_attr_optimize_pass.cc | 147 ++ .../optimizers/gradient_fusion_optimizer.cc | 347 +++ .../optimizers/gradient_fusion_optimizer.h | 69 + .../optimizers/mark_noneed_optimize_pass.cc | 124 + tf_adapter/optimizers/mark_start_node_pass.cc | 208 ++ .../optimizers/om_partition_subgraphs_pass.cc | 2218 +++++++++++++++++ .../optimizers/om_partition_subgraphs_pass.h | 52 + .../optimizers/om_set_var_format_pass.cc | 179 ++ .../optimizers/om_set_var_format_pass.h | 34 + tf_adapter/python/MANIFEST.in | 2 + tf_adapter/python/npu_bridge/__init__.py | 6 + .../npu_bridge/estimator/npu/keras_to_npu.py | 556 +++++ .../estimator/npu/mnist_softmax_npu.py | 96 + .../estimator/npu/mnist_with_estimator.py | 209 ++ .../npu_bridge/estimator/npu/npu_common.py | 221 ++ .../npu_bridge/estimator/npu/npu_config.py | 209 ++ .../npu_bridge/estimator/npu/npu_estimator.py | 741 ++++++ .../npu_bridge/estimator/npu/npu_hook.py | 362 +++ .../estimator/npu/npu_loss_scale_manager.py | 200 ++ .../estimator/npu/npu_loss_scale_optimizer.py | 183 ++ .../npu_bridge/estimator/npu/npu_optimizer.py | 284 +++ .../npu_bridge/estimator/npu/npu_plugin.py | 57 + .../npu_bridge/estimator/npu/npu_rnn.py | 37 + .../npu_bridge/estimator/npu/npu_scope.py | 18 + .../python/npu_bridge/estimator/npu/util.py | 206 ++ .../python/npu_bridge/estimator/npu_ops.py | 250 ++ .../npu_bridge/estimator/npu_unary_ops.py | 46 + tf_adapter/python/npu_bridge/hccl/hccl_ops.py | 103 + tf_adapter/python/npu_bridge/helper/helper.py | 11 + .../python/npu_bridge/image/image_ops.py | 33 + .../python/npu_bridge/npu_cpu/npu_cpu_ops.py | 22 + tf_adapter/python/setup.py | 54 + tf_adapter/swig/ge_plugin.i | 22 + tf_adapter/tf_adapter.bzl | 48 + tf_adapter/util/ge_plugin.cc | 203 ++ tf_adapter/util/ge_plugin.h | 38 + tf_adapter/util/infershape_util.cc | 441 ++++ tf_adapter/util/infershape_util.h | 76 + tf_adapter/util/npu_attrs.cc | 782 ++++++ tf_adapter/util/npu_attrs.h | 46 + tf_adapter/util/npu_ops_identifier.cc | 119 + tf_adapter/util/npu_ops_identifier.h | 51 + tf_adapter/util/npu_plugin.h | 24 + tf_adapter/util/plugin_load_manager.cc | 65 + tf_adapter/util/plugin_load_manager.h | 31 + tf_adapter/util/session_manager.cc | 123 + tf_adapter/util/session_manager.h | 41 + tools/bazel.rc | 86 + tools/build_wheel.sh | 81 + tools/gen_sc_makefile_bazel.py | 205 ++ tools/sc_list.txt | 1 + workspace.bzl.tpl | 70 + 304 files changed, 47445 insertions(+) create mode 100644 .bazelrc create mode 100644 .gitignore create mode 100644 BUILD create mode 100644 WORKSPACE.tpl create mode 100644 inc/common/blocking_queue.h create mode 100644 inc/common/dynamic_aipp.h create mode 100644 inc/common/npu_error_define.h create mode 100644 inc/common/opskernel/ge_task_info.h create mode 100644 inc/common/opskernel/ops_kernel_info_store.h create mode 100644 inc/common/opskernel/ops_kernel_info_types.h create mode 100644 inc/common/optimizer/graph_optimizer.h create mode 100644 inc/common/optimizer/graph_optimizer_types.h create mode 100644 inc/common/proto/fusion_model.proto create mode 100644 inc/common/proto/ge_api.proto create mode 100644 inc/common/proto/ge_ir.proto create mode 100644 inc/common/proto/insert_op.proto create mode 100644 inc/common/proto/om.proto create mode 100644 inc/common/proto/op_mapping_info.proto create mode 100644 inc/common/proto/optimizer_priority.proto create mode 100644 inc/common/proto/task.proto create mode 100644 inc/common/util/ai_core/common/aicore_util_attr_define.h create mode 100644 inc/common/util/ai_core/common/aicore_util_types.h create mode 100644 inc/common/util/ai_core/common/graph_comm.h create mode 100644 inc/common/util/ai_core/common/scope_allocator.h create mode 100644 inc/common/util/ai_core/param_calculate/aicore_param_calculator.h create mode 100644 inc/common/util/ai_core/param_calculate/tensorsize_calculator.h create mode 100644 inc/common/util/compress/compress.h create mode 100644 inc/common/util/compress/compress_weight.h create mode 100644 inc/common/util/error_manager/error_manager.h create mode 100644 inc/common/util/platform_info.h create mode 100644 inc/common/util/platform_info_def.h create mode 100644 inc/external/ge/ge_api.h create mode 100644 inc/external/ge/ge_api_error_codes.h create mode 100644 inc/external/ge/ge_api_types.h create mode 100644 inc/external/ge/ge_ir_build.h create mode 100644 inc/external/graph/attr_value.h create mode 100644 inc/external/graph/ge_error_codes.h create mode 100644 inc/external/graph/graph.h create mode 100644 inc/external/graph/inference_context.h create mode 100644 inc/external/graph/operator.h create mode 100644 inc/external/graph/operator_factory.h create mode 100644 inc/external/graph/operator_reg.h create mode 100644 inc/external/graph/tensor.h create mode 100644 inc/external/graph/types.h create mode 100644 inc/external/register/register.h create mode 100644 inc/external/register/register_error_codes.h create mode 100644 inc/external/register/register_fmk_types.h create mode 100644 inc/external/register/register_types.h create mode 100644 inc/external/register/scope/scope_fusion_pass_register.h create mode 100644 inc/framework/common/aicpu_op.h create mode 100644 inc/framework/common/debug/ge_log.h create mode 100644 inc/framework/common/debug/log.h create mode 100644 inc/framework/common/fmk_error_codes.h create mode 100644 inc/framework/common/fmk_types.h create mode 100644 inc/framework/common/ge_format_util.h create mode 100644 inc/framework/common/ge_inner_error_codes.h create mode 100644 inc/framework/common/ge_types.h create mode 100644 inc/framework/common/gflags_util.h create mode 100644 inc/framework/common/helper/model_helper.h create mode 100644 inc/framework/common/helper/om_file_helper.h create mode 100644 inc/framework/common/l2_cache_optimize.h create mode 100644 inc/framework/common/op/attr_value_util.h create mode 100644 inc/framework/common/op/ge_op_utils.h create mode 100644 inc/framework/common/op/op_parser_util.h create mode 100644 inc/framework/common/op_types.h create mode 100644 inc/framework/common/scope_guard.h create mode 100644 inc/framework/common/string_util.h create mode 100644 inc/framework/common/types.h create mode 100644 inc/framework/common/util.h create mode 100644 inc/framework/engine/dnnengine.h create mode 100644 inc/framework/executor/ge_executor.h create mode 100644 inc/framework/ge_runtime/davinci_model.h create mode 100644 inc/framework/ge_runtime/model_runner.h create mode 100644 inc/framework/ge_runtime/op_info.h create mode 100644 inc/framework/ge_runtime/task_info.h create mode 100644 inc/framework/generator/ge_generator.h create mode 100644 inc/framework/generator/generator_api.h create mode 100644 inc/framework/memory/memory_api.h create mode 100644 inc/framework/memory/memory_assigner.h create mode 100644 inc/framework/omg/omg.h create mode 100644 inc/framework/omg/omg_inner_types.h create mode 100644 inc/framework/omg/omg_types.h create mode 100644 inc/framework/omg/parser/model_parser.h create mode 100644 inc/framework/omg/parser/op_parser.h create mode 100644 inc/framework/omg/parser/parser_api.h create mode 100644 inc/framework/omg/parser/parser_factory.h create mode 100644 inc/framework/omg/parser/parser_inner_ctx.h create mode 100644 inc/framework/omg/parser/weights_parser.h create mode 100644 inc/framework/omg/version.h create mode 100644 inc/graph/anchor.h create mode 100644 inc/graph/attr_value_serializable.h create mode 100644 inc/graph/buffer.h create mode 100644 inc/graph/compute_graph.h create mode 100644 inc/graph/debug/ge_attr_define.h create mode 100644 inc/graph/def_types.h create mode 100644 inc/graph/detail/any_map.h create mode 100644 inc/graph/detail/attributes_holder.h create mode 100644 inc/graph/detail/model_serialize_imp.h create mode 100644 inc/graph/ge_attr_value.h create mode 100644 inc/graph/ge_context.h create mode 100644 inc/graph/ge_global_options.h create mode 100644 inc/graph/ge_local_context.h create mode 100644 inc/graph/ge_tensor.h create mode 100644 inc/graph/graph_util.h create mode 100644 inc/graph/model.h create mode 100644 inc/graph/model_serialize.h create mode 100644 inc/graph/node.h create mode 100644 inc/graph/op_desc.h create mode 100644 inc/graph/op_kernel_bin.h create mode 100644 inc/graph/operator_factory_impl.h create mode 100644 inc/graph/opsproto_manager.h create mode 100644 inc/graph/range_vistor.h create mode 100644 inc/graph/ref_relation.h create mode 100644 inc/graph/runtime_inference_context.h create mode 100644 inc/graph/shape_refiner.h create mode 100644 inc/graph/usr_types.h create mode 100644 inc/graph/utils/anchor_utils.h create mode 100644 inc/graph/utils/attr_utils.h create mode 100644 inc/graph/utils/graph_utils.h create mode 100644 inc/graph/utils/node_utils.h create mode 100644 inc/graph/utils/op_desc_utils.h create mode 100644 inc/graph/utils/tensor_adapter.h create mode 100644 inc/graph/utils/tensor_utils.h create mode 100644 inc/graph/utils/type_utils.h create mode 100644 inc/hccl/base.h create mode 100644 inc/hccl/cltm.h create mode 100644 inc/hccl/hccl_op_base.h create mode 100644 inc/hccl/hcom.h create mode 100644 inc/hccl/hcom_ops_stores.h create mode 100644 inc/hccl/hvd_adapter_pub.h create mode 100644 inc/register/graph_optimizer/buffer_fusion/buffer_fusion_constant.h create mode 100644 inc/register/graph_optimizer/buffer_fusion/buffer_fusion_pass_base.h create mode 100644 inc/register/graph_optimizer/buffer_fusion/buffer_fusion_pass_registry.h create mode 100644 inc/register/graph_optimizer/buffer_fusion/buffer_fusion_pattern.h create mode 100644 inc/register/graph_optimizer/fusion_common/graph_pass_util.h create mode 100644 inc/register/graph_optimizer/fusion_common/pattern_fusion_base_pass.h create mode 100644 inc/register/graph_optimizer/graph_fusion/fusion_pass_manager/fusion_pass_registry.h create mode 100644 inc/register/graph_optimizer/graph_fusion/fusion_pattern.h create mode 100644 inc/register/graph_optimizer/graph_fusion/graph_fusion_pass_base.h create mode 100644 inc/register/graph_optimizer/graph_fusion/graph_pass.h create mode 100644 inc/register/graph_optimizer/graph_fusion/pass.h create mode 100644 inc/register/graph_optimizer/graph_optimize_register_error_codes.h create mode 100644 inc/register/op_kernel_registry.h create mode 100644 inc/register/op_registry.h create mode 100644 inc/register/op_tiling.h create mode 100644 inc/register/proto/caffe/caffe.proto create mode 100644 inc/register/proto/onnx/ge_onnx.proto create mode 100644 inc/register/proto/tensorflow/attr_value.proto create mode 100644 inc/register/proto/tensorflow/function.proto create mode 100644 inc/register/proto/tensorflow/graph.proto create mode 100644 inc/register/proto/tensorflow/node_def.proto create mode 100644 inc/register/proto/tensorflow/op_def.proto create mode 100644 inc/register/proto/tensorflow/resource_handle.proto create mode 100644 inc/register/proto/tensorflow/tensor.proto create mode 100644 inc/register/proto/tensorflow/tensor_shape.proto create mode 100644 inc/register/proto/tensorflow/types.proto create mode 100644 inc/register/proto/tensorflow/versions.proto create mode 100644 inc/register/register.h create mode 100644 inc/register/register_format_transfer.h create mode 100644 inc/register/scope/scope_graph_impl.h create mode 100644 inc/register/scope/scope_pass_impl.h create mode 100644 inc/register/scope/scope_pass_registry_impl.h create mode 100644 inc/register/scope/scope_pattern_impl.h create mode 100644 inc/register/tensor_assign.h create mode 100644 inc/soft_dp/ExternalSoftDp.h create mode 100644 inc/tdt/data_common.h create mode 100644 inc/tdt/status.h create mode 100644 inc/tdt/tdt_device.h create mode 100644 inc/tdt/tdt_host_interface.h create mode 100644 inc/tdt/tdt_server.h create mode 100644 inc/tdt/train_mode.h create mode 100644 inc/tdt/tsd.h create mode 100644 inc/tdt/tsd_client.h create mode 100644 inc/toolchain/bbox/bbox_ddr_data.h create mode 100644 inc/toolchain/bbox/bbox_ddr_data_cloud.h create mode 100644 inc/toolchain/bbox/bbox_ddr_data_dc.h create mode 100644 inc/toolchain/bbox/bbox_ddr_data_mdc.h create mode 100644 inc/toolchain/bbox/bbox_ddr_data_mini.h create mode 100644 inc/toolchain/bbox/bbox_proxy.h create mode 100644 inc/toolchain/bbox/bbox_proxy_config.h create mode 100644 inc/toolchain/bbox/bbox_proxy_config_dc.h create mode 100644 inc/toolchain/bbox/bbox_proxy_config_mdc.h create mode 100644 inc/toolchain/bbox/device/bbox_pub.h create mode 100644 inc/toolchain/bbox/device/bbox_pub_cloud.h create mode 100644 inc/toolchain/bbox/device/bbox_pub_mini.h create mode 100644 inc/toolchain/bbox/device/bbox_types.h create mode 100644 inc/toolchain/ide_daemon_api.h create mode 100644 inc/toolchain/ide_tlv.h create mode 100644 inc/toolchain/prof_acl_api.h create mode 100644 inc/toolchain/prof_engine.h create mode 100644 inc/toolchain/prof_mgr_core.h create mode 100644 inc/toolchain/prof_reporter.h create mode 100644 inc/toolchain/profiler_client.h create mode 100644 inc/toolchain/slog.h create mode 100644 inc/toolchain/stackcore/stackcore.h create mode 100644 module.mk create mode 100644 tf_adapter/BUILD create mode 100644 tf_adapter/common/common.h create mode 100644 tf_adapter/exported_symbols.lds create mode 100644 tf_adapter/interface_checker/check_interface.py create mode 100644 tf_adapter/interface_spec/api_hccl_ops.pyh create mode 100644 tf_adapter/interface_spec/api_image_ops.pyh create mode 100644 tf_adapter/interface_spec/api_keras_to_npu.pyh create mode 100644 tf_adapter/interface_spec/api_npu_config.pyh create mode 100644 tf_adapter/interface_spec/api_npu_estimator.pyh create mode 100644 tf_adapter/interface_spec/api_npu_hook.pyh create mode 100644 tf_adapter/interface_spec/api_npu_loss_scale_manager.pyh create mode 100644 tf_adapter/interface_spec/api_npu_loss_scale_optimizer.pyh create mode 100644 tf_adapter/interface_spec/api_npu_ops.pyh create mode 100644 tf_adapter/interface_spec/api_npu_optimizer.pyh create mode 100644 tf_adapter/interface_spec/api_npu_plugin.pyh create mode 100644 tf_adapter/interface_spec/api_npu_rnn.pyh create mode 100644 tf_adapter/interface_spec/api_npu_scope.pyh create mode 100644 tf_adapter/interface_spec/api_util.pyh create mode 100644 tf_adapter/kernels/adam_apply_one_assign.cc create mode 100644 tf_adapter/kernels/adam_apply_one_with_decay_assign.cc create mode 100644 tf_adapter/kernels/basic_lstm_cell.cc create mode 100644 tf_adapter/kernels/basic_lstm_cell_grad.cc create mode 100644 tf_adapter/kernels/decode_image_ops.cc create mode 100644 tf_adapter/kernels/device_queue_dataset_op.cc create mode 100644 tf_adapter/kernels/dp_iterator_ops.cc create mode 100644 tf_adapter/kernels/dp_iterator_ops.h create mode 100644 tf_adapter/kernels/dpgroup_dataset_op.cc create mode 100644 tf_adapter/kernels/dropout_ops.cc create mode 100644 tf_adapter/kernels/geop_dataset_op.cc create mode 100644 tf_adapter/kernels/geop_npu.cc create mode 100644 tf_adapter/kernels/geop_npu.h create mode 100644 tf_adapter/kernels/hccl_ops.cc create mode 100644 tf_adapter/kernels/host_queue_dataset_op.cc create mode 100644 tf_adapter/kernels/infeed_outfeed_ops.cc create mode 100644 tf_adapter/kernels/lars_ops.cc create mode 100644 tf_adapter/kernels/lars_v2_op.cc create mode 100644 tf_adapter/kernels/log_time_stamp_ops.cc create mode 100644 tf_adapter/kernels/maxpooling_op.cc create mode 100644 tf_adapter/kernels/npu_cpu_ops.cc create mode 100644 tf_adapter/kernels/npu_mixed_precesion_ops.cc create mode 100644 tf_adapter/kernels/npu_ops.cc create mode 100644 tf_adapter/kernels/npu_sys_ctl_ops.cc create mode 100644 tf_adapter/kernels/npu_unary_ops.cc create mode 100644 tf_adapter/module.BUILD create mode 100644 tf_adapter/ops/dropout_domask_grad.cc create mode 100644 tf_adapter/ops/hccl_ops.cc create mode 100644 tf_adapter/ops/npu_cpu_ops.cc create mode 100644 tf_adapter/ops/npu_dataset_ops.cc create mode 100644 tf_adapter/ops/npu_mixed_precesion_ops.cc create mode 100644 tf_adapter/ops/npu_ops.cc create mode 100644 tf_adapter/ops/npu_unary_ops.cc create mode 100644 tf_adapter/optimizers/add_input_pass.cc create mode 100644 tf_adapter/optimizers/control_flow_conversion_pass.cc create mode 100644 tf_adapter/optimizers/control_flow_conversion_pass.h create mode 100644 tf_adapter/optimizers/dp_tf_ge_conversion_pass.cc create mode 100644 tf_adapter/optimizers/dp_tf_ge_conversion_pass.h create mode 100644 tf_adapter/optimizers/get_attr_optimize_pass.cc create mode 100644 tf_adapter/optimizers/gradient_fusion_optimizer.cc create mode 100644 tf_adapter/optimizers/gradient_fusion_optimizer.h create mode 100644 tf_adapter/optimizers/mark_noneed_optimize_pass.cc create mode 100644 tf_adapter/optimizers/mark_start_node_pass.cc create mode 100644 tf_adapter/optimizers/om_partition_subgraphs_pass.cc create mode 100644 tf_adapter/optimizers/om_partition_subgraphs_pass.h create mode 100644 tf_adapter/optimizers/om_set_var_format_pass.cc create mode 100644 tf_adapter/optimizers/om_set_var_format_pass.h create mode 100644 tf_adapter/python/MANIFEST.in create mode 100644 tf_adapter/python/npu_bridge/__init__.py create mode 100644 tf_adapter/python/npu_bridge/estimator/npu/keras_to_npu.py create mode 100644 tf_adapter/python/npu_bridge/estimator/npu/mnist_softmax_npu.py create mode 100644 tf_adapter/python/npu_bridge/estimator/npu/mnist_with_estimator.py create mode 100644 tf_adapter/python/npu_bridge/estimator/npu/npu_common.py create mode 100644 tf_adapter/python/npu_bridge/estimator/npu/npu_config.py create mode 100644 tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py create mode 100644 tf_adapter/python/npu_bridge/estimator/npu/npu_hook.py create mode 100644 tf_adapter/python/npu_bridge/estimator/npu/npu_loss_scale_manager.py create mode 100644 tf_adapter/python/npu_bridge/estimator/npu/npu_loss_scale_optimizer.py create mode 100644 tf_adapter/python/npu_bridge/estimator/npu/npu_optimizer.py create mode 100644 tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py create mode 100644 tf_adapter/python/npu_bridge/estimator/npu/npu_rnn.py create mode 100644 tf_adapter/python/npu_bridge/estimator/npu/npu_scope.py create mode 100644 tf_adapter/python/npu_bridge/estimator/npu/util.py create mode 100644 tf_adapter/python/npu_bridge/estimator/npu_ops.py create mode 100644 tf_adapter/python/npu_bridge/estimator/npu_unary_ops.py create mode 100644 tf_adapter/python/npu_bridge/hccl/hccl_ops.py create mode 100644 tf_adapter/python/npu_bridge/helper/helper.py create mode 100644 tf_adapter/python/npu_bridge/image/image_ops.py create mode 100644 tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py create mode 100644 tf_adapter/python/setup.py create mode 100644 tf_adapter/swig/ge_plugin.i create mode 100644 tf_adapter/tf_adapter.bzl create mode 100644 tf_adapter/util/ge_plugin.cc create mode 100644 tf_adapter/util/ge_plugin.h create mode 100644 tf_adapter/util/infershape_util.cc create mode 100644 tf_adapter/util/infershape_util.h create mode 100644 tf_adapter/util/npu_attrs.cc create mode 100644 tf_adapter/util/npu_attrs.h create mode 100644 tf_adapter/util/npu_ops_identifier.cc create mode 100644 tf_adapter/util/npu_ops_identifier.h create mode 100644 tf_adapter/util/npu_plugin.h create mode 100644 tf_adapter/util/plugin_load_manager.cc create mode 100644 tf_adapter/util/plugin_load_manager.h create mode 100644 tf_adapter/util/session_manager.cc create mode 100644 tf_adapter/util/session_manager.h create mode 100644 tools/bazel.rc create mode 100644 tools/build_wheel.sh create mode 100644 tools/gen_sc_makefile_bazel.py create mode 100644 tools/sc_list.txt create mode 100644 workspace.bzl.tpl diff --git a/.bazelrc b/.bazelrc new file mode 100644 index 000000000..f89b2bbcf --- /dev/null +++ b/.bazelrc @@ -0,0 +1,31 @@ +import tools/bazel.rc + +build --action_env TF_NEED_OPENCL_SYCL="0" +build --action_env TF_NEED_ROCM="0" +build --action_env TF_NEED_CUDA="0" +build --action_env TF_DOWNLOAD_CLANG="0" +#build --action_env PATH="/opt/buildtools/gcc-7.3.0/bin:/bin:/usr/bin" +#build --action_env GCC_HOST_COMPILER_PATH="/opt/buildtools/gcc-7.3.0/bin/gcc" +build:opt --cxxopt=-D_GLIBCXX_USE_CXX11_ABI=0 +#build:opt --copt=-march=native +#build:opt --host_copt=-march=native +build:opt --define with_default_optimizations=true +build:v2 --define=tf_api_version=2 + +build:opt --copt=-ftrapv +build:opt --copt=-D_FORTIFY_SOURCE=2 +#build:opt --strip=always + +build --subcommands +build --verbose_failures +build --sandbox_debug +build --incompatible_strict_action_env +build --nocheck_visibility + + +# build --experimental_enable_repo_mapping +# build --nocheck_visibility +# build --distinct_host_configuration=false + + +build:opt --copt="-fpic" \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..9bc52b762 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +bazel-bin +bazel-genfiles +bazel-out +bazel-tf_plugin +bazel-testlogs +workspace.bzl diff --git a/BUILD b/BUILD new file mode 100644 index 000000000..abd9a1a11 --- /dev/null +++ b/BUILD @@ -0,0 +1 @@ +# Add licence here \ No newline at end of file diff --git a/WORKSPACE.tpl b/WORKSPACE.tpl new file mode 100644 index 000000000..9077a0bfd --- /dev/null +++ b/WORKSPACE.tpl @@ -0,0 +1,60 @@ +workspace(name = "tf_adapter") + +local_repository( + name="org_tensorflow", + path="../../../tensorflow-1.15.0/", +) + +# TensorFlow depends on "io_bazel_rules_closure" so we need this here. +# Needs to be kept in sync with the same target in TensorFlow's WORKSPACE file. +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +http_archive( + name = "io_bazel_rules_closure", + sha256 = "5b00383d08dd71f28503736db0500b6fb4dda47489ff5fc6bed42557c07c6ba9", + strip_prefix = "rules_closure-308b05b2419edb5c8ee0471b67a40403df940149", + urls = [ + "file://TF_PACKAGE_PATH/308b05b2419edb5c8ee0471b67a40403df940149.tar.gz", + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/bazelbuild/rules_closure/archive/308b05b2419edb5c8ee0471b67a40403df940149.tar.gz", + "https://github.com/bazelbuild/rules_closure/archive/308b05b2419edb5c8ee0471b67a40403df940149.tar.gz", # 2019-06-13 + ], +) + +http_archive( + name = "bazel_skylib", + sha256 = "2ef429f5d7ce7111263289644d233707dba35e39696377ebab8b0bc701f7818e", + urls = [ + "file://TF_PACKAGE_PATH/bazel-skylib.0.8.0.tar.gz", + "https://github.com/bazelbuild/bazel-skylib/releases/download/0.8.0/bazel-skylib.0.8.0.tar.gz", + ], +) # https://github.com/bazelbuild/bazel-skylib/releases + + +http_archive( + name = "com_google_protobuf", + sha256 = "e9883cbd0807eb35f9e837cf7ff1d12a7a0303fef4fffb340632c98a2bbd4e2d", + strip_prefix = "protobuf-3.8.0", + urls = [ + "file://TF_PACKAGE_PATH/v3.8.0.tar.gz", + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/protocolbuffers/protobuf/archive/310ba5ee72661c081129eb878c1bbcec936b20f0.tar.gz", + "https://github.com/protocolbuffers/protobuf/archive/310ba5ee72661c081129eb878c1bbcec936b20f0.tar.gz", + ], +) + +# protobuf depends on @zlib, it has to be renamed to @zlib_archive because "zlib" is already +# defined using bind for grpc +http_archive( + name = "zlib_archive", + build_file = "@org_tensorflow//:third_party/zlib.BUILD", + sha256 = "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1", + strip_prefix = "zlib-1.2.11", + urls = [ + "file://TF_PACKAGE_PATH/zlib-1.2.11.tar.gz", + ], +) + + +load('@org_tensorflow//tensorflow:workspace.bzl', 'tf_workspace') +tf_workspace() + +load('//:workspace.bzl', 'tf_adapter_workspace') +tf_adapter_workspace() diff --git a/inc/common/blocking_queue.h b/inc/common/blocking_queue.h new file mode 100644 index 000000000..8d6c4ef26 --- /dev/null +++ b/inc/common/blocking_queue.h @@ -0,0 +1,141 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + + * http://www.apache.org/licenses/LICENSE-2.0 + + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +#ifndef INC_COMMON_BLOCKING_QUEUE_H_ +#define INC_COMMON_BLOCKING_QUEUE_H_ + +#include +#include +#include +#include + +static const int kDefaultMaxQueueSize = 2048; + +template +class BlockingQueue { + public: + explicit BlockingQueue(uint32_t max_size = kDefaultMaxQueueSize) : max_size_(max_size), is_stoped_(false) {} + + ~BlockingQueue() {} + + bool Pop(T &item) { + std::unique_lock lock(mutex_); + + while (queue_.empty() && !is_stoped_) { + empty_cond_.wait(lock); + } + + if (is_stoped_) { + return false; + } + + item = std::move(queue_.front()); + queue_.pop_front(); + + full_cond_.notify_one(); + + return true; + } + + bool Push(const T &item, bool is_wait = true) { + std::unique_lock lock(mutex_); + + while (queue_.size() >= max_size_ && !is_stoped_) { + if (!is_wait) { + return false; + } + full_cond_.wait(lock); + } + + if (is_stoped_) { + return false; + } + + queue_.push_back(item); + + empty_cond_.notify_one(); + + return true; + } + + bool Push(T &&item, bool is_wait = true) { + std::unique_lock lock(mutex_); + + while (queue_.size() >= max_size_ && !is_stoped_) { + if (!is_wait) { + return false; + } + full_cond_.wait(lock); + } + + if (is_stoped_) { + return false; + } + + queue_.emplace_back(std::move(item)); + + empty_cond_.notify_one(); + + return true; + } + + void Stop() { + { + std::unique_lock lock(mutex_); + is_stoped_ = true; + } + + full_cond_.notify_all(); + empty_cond_.notify_all(); + } + + void Restart() { + std::unique_lock lock(mutex_); + is_stoped_ = false; + } + + // if the queue is stoped ,need call this function to release the unprocessed items + std::list GetRemainItems() { + std::unique_lock lock(mutex_); + + if (!is_stoped_) { + return std::list(); + } + + return queue_; + } + + bool IsFull() { + std::unique_lock lock(mutex_); + return queue_.size() >= max_size_; + } + + void Clear() { + std::unique_lock lock(mutex_); + queue_.clear(); + } + + private: + std::list queue_; + std::mutex mutex_; + std::condition_variable empty_cond_; + std::condition_variable full_cond_; + uint32_t max_size_; + + bool is_stoped_; +}; + +#endif // INC_COMMON_BLOCKING_QUEUE_H_ diff --git a/inc/common/dynamic_aipp.h b/inc/common/dynamic_aipp.h new file mode 100644 index 000000000..9ada1ef5e --- /dev/null +++ b/inc/common/dynamic_aipp.h @@ -0,0 +1,104 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + + * http://www.apache.org/licenses/LICENSE-2.0 + + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +#ifndef INC_COMMON_DYNAMIC_AIPP_H_ +#define INC_COMMON_DYNAMIC_AIPP_H_ + +#include + +/** +* @ingroup dnn +* @brief struct define of dynamic aipp batch parameter. +*/ +typedef struct tagAippDynamicBatchPara { + int8_t cropSwitch; // crop switch + int8_t scfSwitch; // resize switch + int8_t paddingSwitch; // 0: unable padding + // 1: padding config value,sfr_filling_hblank_ch0 ~ sfr_filling_hblank_ch2 + // 2: padding source picture data, single row/collumn copy + // 3: padding source picture data, block copy + // 4: padding source picture data, mirror copy + int8_t rotateSwitch; // rotate switch,0: non-ratate, + // 1: ratate 90° clockwise,2: ratate 180° clockwise,3: ratate 270° clockwise + int8_t reserve[4]; + int32_t cropStartPosW; // the start horizontal position of cropping + int32_t cropStartPosH; // the start vertical position of cropping + int32_t cropSizeW; // crop width + int32_t cropSizeH; // crop height + + int32_t scfInputSizeW; // input width of scf + int32_t scfInputSizeH; // input height of scf + int32_t scfOutputSizeW; // output width of scf + int32_t scfOutputSizeH; // output height of scf + + int32_t paddingSizeTop; // top padding size + int32_t paddingSizeBottom; // bottom padding size + int32_t paddingSizeLeft; // left padding size + int32_t paddingSizeRight; // right padding size + + int16_t dtcPixelMeanChn0; // mean value of channel 0 + int16_t dtcPixelMeanChn1; // mean value of channel 1 + int16_t dtcPixelMeanChn2; // mean value of channel 2 + int16_t dtcPixelMeanChn3; // mean value of channel 3 + + uint16_t dtcPixelMinChn0; // min value of channel 0 + uint16_t dtcPixelMinChn1; // min value of channel 1 + uint16_t dtcPixelMinChn2; // min value of channel 2 + uint16_t dtcPixelMinChn3; // min value of channel 3 + uint16_t dtcPixelVarReciChn0; // sfr_dtc_pixel_variance_reci_ch0 + uint16_t dtcPixelVarReciChn1; // sfr_dtc_pixel_variance_reci_ch1 + uint16_t dtcPixelVarReciChn2; // sfr_dtc_pixel_variance_reci_ch2 + uint16_t dtcPixelVarReciChn3; // sfr_dtc_pixel_variance_reci_ch3 + + int8_t reserve1[16]; // 32B assign, for ub copy +} kAippDynamicBatchPara; + +/** +* @ingroup dnn +* @brief struct define of dynamic aipp parameter. lite:64+96*batchNum byte ; tiny:64+64*batchNum byte +*/ +typedef struct tagAippDynamicPara { + uint8_t inputFormat; // input format:YUV420SP_U8/XRGB8888_U8/RGB888_U8 + int8_t cscSwitch; // csc switch + int8_t rbuvSwapSwitch; // rb/ub swap switch + int8_t axSwapSwitch; // RGBA->ARGB, YUVA->AYUV swap switch + int8_t batchNum; // batch parameter number + int8_t reserve1[3]; + int32_t srcImageSizeW; // source image width + int32_t srcImageSizeH; // source image height + int16_t cscMatrixR0C0; // csc_matrix_r0_c0 + int16_t cscMatrixR0C1; // csc_matrix_r0_c1 + int16_t cscMatrixR0C2; // csc_matrix_r0_c2 + int16_t cscMatrixR1C0; // csc_matrix_r1_c0 + int16_t cscMatrixR1C1; // csc_matrix_r1_c1 + int16_t cscMatrixR1C2; // csc_matrix_r1_c2 + int16_t cscMatrixR2C0; // csc_matrix_r2_c0 + int16_t cscMatrixR2C1; // csc_matrix_r2_c1 + int16_t cscMatrixR2C2; // csc_matrix_r2_c2 + int16_t reserve2[3]; + uint8_t cscOutputBiasR0; // output Bias for RGB to YUV, element of row 0, unsigned number + uint8_t cscOutputBiasR1; // output Bias for RGB to YUV, element of row 1, unsigned number + uint8_t cscOutputBiasR2; // output Bias for RGB to YUV, element of row 2, unsigned number + uint8_t cscInputBiasR0; // input Bias for YUV to RGB, element of row 0, unsigned number + uint8_t cscInputBiasR1; // input Bias for YUV to RGB, element of row 1, unsigned number + uint8_t cscInputBiasR2; // input Bias for YUV to RGB, element of row 2, unsigned number + uint8_t reserve3[2]; + int8_t reserve4[16]; // 32B assign, for ub copy + + kAippDynamicBatchPara aippBatchPara; // allow transfer several batch para. +} kAippDynamicPara; + +#endif // INC_COMMON_DYNAMIC_AIPP_H_ diff --git a/inc/common/npu_error_define.h b/inc/common/npu_error_define.h new file mode 100644 index 000000000..aba70f990 --- /dev/null +++ b/inc/common/npu_error_define.h @@ -0,0 +1,94 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + + * http://www.apache.org/licenses/LICENSE-2.0 + + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +#ifndef INC_COMMON_NPU_ERROR_DEFINE_H_ +#define INC_COMMON_NPU_ERROR_DEFINE_H_ + +typedef enum tagHiAiNpuLocal { + HIAI_HOST = 1, + HIAI_DEVICE = 2, +} HiAiNpuLocal; + +typedef enum tagHiAiNpuCodeType { + ERROR_CODE = 1, + EXCEPTION_CODE = 2, +} HiAiNpuCodeType; + +typedef enum tagHiAiNpuErrLevel { + NONE_LEVEL = 0, + SUGGESTION_LEVEL = 1, + NORMAL_LEVEL = 2, + SERIOUS_LEVEL = 3, + CRITICAL_ERROR = 4, +} HiAiNpuErrLevel; + +typedef enum tagHiAiNpuModuleId { + HIAI_DRIVER = 1, + HIAI_CTRLCPU = 2, + HIAI_TS = 3, + HIAI_RUNTIME = 4, + HIAI_AICPU = 5, + HIAI_CCE = 6, + HIAI_TVM = 7, + HIAI_FRAMEWORK = 8, + HiAI_ENGINE = 9, + HIAI_DVPP = 10, + HIAI_AIPP = 11, + HIAI_LOWPOWER = 12, + HIAI_MDC = 13, + HIAI_COMPILE = 14, + HIAI_TOOLCHIAN = 15, + HIAI_ALG = 16, + HIAI_PROFILING = 17, + HIAI_HCCL = 18, + HIAI_SIMULATION = 19, + HIAI_BIOS = 20, + HIAI_SEC = 21, + HIAI_TINY = 22, + HIAI_DP = 23, +} HiAiNpuModuleId; + +/* bit 31-bit30 to be hiai local */ +#define HIAI_NPULOCAL_MASK 0xC0000000 +#define SHIFT_LOCAL_MASK 30 +#define HIAI_NPULOCAL_VAL_MASK 0x3 +/* bit 29 -bit28 to be hiai aicpu code type */ +#define HIAI_CODE_TYPE_MASK 0x30000000 +#define SHIFT_CODE_MASK 28 +#define HIAI_CODE_TYPE_VAL_MASK 0x3 +/* bit 27 -bit25 to be hiai error level */ +#define HIAI_ERROR_LEVEL_MASK 0x0E000000 +#define SHIFT_ERROR_LVL_MASK 25 +#define HIAI_ERROR_LEVEL_VAL_MASK 0x7 +/* bit 24 -bit17 to be hiai mod */ +#define HIAI_MODE_ID_MASK 0x01FE0000 +#define SHIFT_MODE_MASK 17 +#define HIAI_MODE_ID_VAL_MASK 0xFF + +#define HIAI_NPU_LOC_BIT(a) \ + (HIAI_NPULOCAL_MASK & ((unsigned int)((HiAiNpuLocal)(a)) & HIAI_NPULOCAL_VAL_MASK) << SHIFT_LOCAL_MASK) +#define HIAI_NPU_CODE_TYPE_BIT(a) \ + (HIAI_CODE_TYPE_MASK & ((unsigned int)((HiAiNpuCodeType)(a)) & HIAI_CODE_TYPE_VAL_MASK) << SHIFT_CODE_MASK) +#define HIAI_NPU_ERR_LEV_BIT(a) \ + (HIAI_ERROR_LEVEL_MASK & ((unsigned int)((HiAiNpuErrLevel)(a)) & HIAI_ERROR_LEVEL_VAL_MASK) << SHIFT_ERROR_LVL_MASK) +#define HIAI_NPU_MOD_ID_BIT(a) \ + (HIAI_MODE_ID_MASK & ((unsigned int)((HiAiNpuModuleId)(a)) & HIAI_MODE_ID_VAL_MASK) << SHIFT_MODE_MASK) + +#define HIAI_NPU_ERR_CODE_HEAD(npuLocal, codeType, errLevel, moduleId) \ + (HIAI_NPU_LOC_BIT(npuLocal) + HIAI_NPU_CODE_TYPE_BIT(codeType) + HIAI_NPU_ERR_LEV_BIT(errLevel) + \ + HIAI_NPU_MOD_ID_BIT(moduleId)) + +#endif // INC_COMMON_NPU_ERROR_DEFINE_H_ diff --git a/inc/common/opskernel/ge_task_info.h b/inc/common/opskernel/ge_task_info.h new file mode 100644 index 000000000..145f3f27e --- /dev/null +++ b/inc/common/opskernel/ge_task_info.h @@ -0,0 +1,76 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + + * http://www.apache.org/licenses/LICENSE-2.0 + + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +#ifndef INC_COMMON_OPSKERNEL_GE_TASK_INFO_H_ +#define INC_COMMON_OPSKERNEL_GE_TASK_INFO_H_ + +#include +#include +#include +#include + +using std::string; +namespace ge { +// when need to eliminate GETaskKernelHcclInfo, so not need DAVINCI_TRAIN/DAVINCI_CLOUD +struct GETaskKernelHcclInfo { + string input_name; + string hccl_type; + void *inputDataAddr; + void *outputDataAddr; + void *workSpaceAddr; + int32_t count; + int32_t dataType; + int32_t opType; + int64_t rootId; + uint64_t workSpaceMemSize; + std::vector dims; + std::vector hcclStreamList; +}; + +struct GETaskInfo { + uint32_t id; + uint16_t type; + uint32_t streamID; + void *stream; // rtKernelLaunch input argument + void *event; + void *privateDef; + uint32_t privateDefLen; + void *opsKernelStorePtr; + + std::vector kernelHcclInfo; +}; + +struct HcomOpertion { + std::string hcclType; + void *inputPtr; + void *outputPtr; + uint64_t count; + int32_t dataType; + int32_t opType; + int32_t root; +}; + +struct HcomRemoteAccessAddrInfo +{ + uint32_t remotetRankID; + uint64_t remoteAddr; // host embedding table address + uint64_t localAddr; // device HBM address + uint64_t length; // memory Length in Bytes +}; + + +} // namespace ge +#endif // INC_COMMON_OPSKERNEL_GE_TASK_INFO_H_ diff --git a/inc/common/opskernel/ops_kernel_info_store.h b/inc/common/opskernel/ops_kernel_info_store.h new file mode 100644 index 000000000..207ec4e05 --- /dev/null +++ b/inc/common/opskernel/ops_kernel_info_store.h @@ -0,0 +1,88 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + + * http://www.apache.org/licenses/LICENSE-2.0 + + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +#ifndef INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_STORE_H_ +#define INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_STORE_H_ + +#include +#include +#include +#include +#include "./ge_task_info.h" +#include "./ops_kernel_info_types.h" +#include "cce/aicpu_engine_struct.h" +#include "cce/fwk_adpt_struct.h" +#include "common/ge_inner_error_codes.h" +#include "graph/node.h" +#include "proto/task.pb.h" +using std::map; +using std::string; +using std::to_string; +using std::vector; + +namespace ge { +class OpDesc; + +class OpsKernelInfoStore { + public: + OpsKernelInfoStore() {} + + virtual ~OpsKernelInfoStore() {} + + // initialize opsKernelInfoStore + virtual Status Initialize(const map &options) = 0; /*lint -e148*/ + + // close opsKernelInfoStore + virtual Status Finalize() = 0; /*lint -e148*/ + + virtual Status CreateSession(const std::map &session_options) { return SUCCESS; } + + virtual Status DestroySession(const std::map &session_options) { return SUCCESS; } + + // get all opsKernelInfo + virtual void GetAllOpsKernelInfo(map &infos) const = 0; + + // whether the opsKernelInfoStore is supported based on the operator attribute + virtual bool CheckSupported(const OpDescPtr &opDescPtr, std::string &un_supported_reason) const = 0; + + virtual bool CheckAccuracySupported(const OpDescPtr &opDescPtr, std::string &un_supported_reason, + bool realQuery = false) const { + return CheckSupported(opDescPtr, un_supported_reason); + } + // opsFlag opsFlag[0] indicates constant folding is supported or not + virtual void opsFlagCheck(const ge::Node &node, std::string &opsFlag) {}; + + // memory allocation requirement + virtual Status CalcOpRunningParam(Node &node) = 0; /*lint -e148*/ + + // generate task for op。 + virtual Status GenerateTask(const Node &node, RunContext &context, + std::vector &tasks) = 0; /*lint -e148*/ + + // only call fe engine interface to compile single op + virtual Status CompileOp(vector &node_vec) { return SUCCESS; } + virtual Status CompileOpRun(vector &node_vec) { return SUCCESS; } + // load task for op + virtual Status LoadTask(GETaskInfo &task) { return SUCCESS; } + + // only call aicpu interface to generate task struct + virtual Status GenSingleOpRunTask(const NodePtr &node, STR_FWK_OP_KERNEL &task, string &task_info) { return SUCCESS; } + + // only call aicpu interface to generate task struct + virtual Status GenMemCopyTask(uint64_t count, STR_FWK_OP_KERNEL &task, string &task_info) { return SUCCESS; } +}; +} // namespace ge +#endif // INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_STORE_H_ diff --git a/inc/common/opskernel/ops_kernel_info_types.h b/inc/common/opskernel/ops_kernel_info_types.h new file mode 100644 index 000000000..6dbf5ff76 --- /dev/null +++ b/inc/common/opskernel/ops_kernel_info_types.h @@ -0,0 +1,66 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + + * http://www.apache.org/licenses/LICENSE-2.0 + + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +#ifndef INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_TYPES_H_ +#define INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_TYPES_H_ + +#include +#include +#include +#include "graph/buffer.h" +#include "runtime/rt_model.h" + +using std::string; + +namespace ge { +/*lint -e148*/ +struct RunContext { + rtModel_t model; + rtStream_t stream; + uint64_t sessionId; + uint64_t dataMemSize; + uint8_t *dataMemBase; + uint64_t weightMemSize; + uint8_t *weightMemBase; + ge::Buffer weightsBuffer; + std::vector graphStreamList; // all streams of graph, order by ge stream id(0,1,...) + std::vector graphEventList; // all events of graph, order by ge event id(0,1,...) + std::vector graphLabelList; // all labels of graph, order by ge label id(0,1,...) +}; + +/*lint +e148*/ + +struct Task { + uint32_t id; + uint16_t type; + void *stream; + void *event; +}; + +struct OpInfo { + string engine; // which engin + /*lint -e148*/ + string opKernelLib; // which opsKernelStore + int computeCost; // compute cost + bool flagPartial; // whether to support is related to shape + bool flagAsync; // Whether to support asynchronous + bool isAtomic; // whether to support atomic addr clean + string opFileName; // op file name + string opFuncName; // op function name +}; +} // namespace ge + +#endif // INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_TYPES_H_ diff --git a/inc/common/optimizer/graph_optimizer.h b/inc/common/optimizer/graph_optimizer.h new file mode 100644 index 000000000..8361a8dfc --- /dev/null +++ b/inc/common/optimizer/graph_optimizer.h @@ -0,0 +1,74 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + + * http://www.apache.org/licenses/LICENSE-2.0 + + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +#ifndef INC_COMMON_OPTIMIZER_GRAPH_OPTIMIZER_H_ +#define INC_COMMON_OPTIMIZER_GRAPH_OPTIMIZER_H_ + +#include +#include +#include "./graph_optimizer_types.h" +#include "common/ge_inner_error_codes.h" +#include "common/opskernel/ops_kernel_info_types.h" +#include "graph/compute_graph.h" + +using std::map; +using std::string; + +/*lint -e148*/ +namespace ge { +class GraphOptimizer { + public: + virtual ~GraphOptimizer() {} + + // initialize graphOptimizer + virtual Status Initialize(const map &options) = 0; + + // close graphOptimizer + virtual Status Finalize() = 0; + + // optimize original graph for FE quant optimize + virtual Status OptimizeGraphPrepare(ComputeGraph& graph) { + return SUCCESS; + } + + // optimize graph before build for RTS + virtual Status OptimizeGraphBeforeBuild(ComputeGraph& graph) { + return SUCCESS; + } + + // optimize original graph, using in graph preparation stage + virtual Status OptimizeOriginalGraph(ComputeGraph &graph) = 0; + + // optimize original graph, using for conversion operator insert in graph preparation stage + virtual Status OptimizeOriginalGraphJudgeInsert(ComputeGraph &graph) { + return SUCCESS; + } + + // optimize fused graph + virtual Status OptimizeFusedGraph(ComputeGraph &graph) = 0; + + // optimize whole graph, using after graph merged stage + virtual Status OptimizeWholeGraph(ComputeGraph &graph) = 0; + + // get attribute of graph optimizer + virtual Status GetAttributes(GraphOptimizerAttribute &attrs) const = 0; + + // optimize streamed Graph + virtual Status OptimizeStreamGraph(ComputeGraph &graph, const RunContext &context) { return SUCCESS; } +}; +} // namespace ge +/*lint +e148*/ +#endif // INC_COMMON_OPTIMIZER_GRAPH_OPTIMIZER_H_ diff --git a/inc/common/optimizer/graph_optimizer_types.h b/inc/common/optimizer/graph_optimizer_types.h new file mode 100644 index 000000000..9e1ec96b3 --- /dev/null +++ b/inc/common/optimizer/graph_optimizer_types.h @@ -0,0 +1,34 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_COMMON_OPTIMIZER_GRAPH_OPTIMIZER_TYPES_H_ +#define INC_COMMON_OPTIMIZER_GRAPH_OPTIMIZER_TYPES_H_ + +#include +#include +namespace ge { +enum OPTIMIZER_SCOPE { + UNIT = 0, + ENGINE, +}; + +struct GraphOptimizerAttribute { + std::string engineName; + OPTIMIZER_SCOPE scope; +}; +} // namespace ge + +#endif // INC_COMMON_OPTIMIZER_GRAPH_OPTIMIZER_TYPES_H_ diff --git a/inc/common/proto/fusion_model.proto b/inc/common/proto/fusion_model.proto new file mode 100644 index 000000000..002bef42e --- /dev/null +++ b/inc/common/proto/fusion_model.proto @@ -0,0 +1,20 @@ +/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Apache License for more details at + * http://www.apache.org/licenses/LICENSE-2.0 + */ +syntax = "proto3"; + +import "vendor/hisi/npu/cce/graph_old/proto/om.proto"; +package domi; + +message FusionModelDef { + string version = 1; + repeated OpDef fusion_op = 2; +} diff --git a/inc/common/proto/ge_api.proto b/inc/common/proto/ge_api.proto new file mode 100644 index 000000000..331c5aeae --- /dev/null +++ b/inc/common/proto/ge_api.proto @@ -0,0 +1,88 @@ +syntax = "proto3"; +package ge.api_pb; + +import "ge_ir.proto"; + +// GE initialize +message GEInitialize { + map options = 1; +}; + +// initialize response +message GEInitializeResponse { + uint32 status = 1; + uint32 clientId = 2; +}; + +// GE finalize +message GEFinalize { + bool final = 1; + uint32 clientId = 2; +}; + +message GEFinalizeResponse { + uint32 status = 1; +}; + +// GE Session +message CreateSession{ + map options = 1; +}; + +message CreateSessionResponse { + uint32 status = 1; + uint64 sessionId = 2; +}; + +//GE AddGraph +//model serialize :: serializegraph +message SessionAddGraph{ + uint32 graphId = 1; + uint64 sessionId = 2; + ge.proto.GraphDef graph = 3; +}; + +message SessionAddGraphResponse { + uint32 status = 1; +}; + +//GE SessionRemoveGraph +message SessionRemoveGraph{ + uint32 graphId = 1; + uint64 sessionId = 2; +}; + +message SessionRemoveGraphResponse { + uint32 status = 1; +}; + +message SessionRunGraph{ + uint32 graphId = 1; + uint64 sessionId = 2; + repeated ge.proto.TensorDef tensor = 3; +}; + +message SessionBuildGraph{ + uint32 graphId = 1; + uint64 sessionId = 2; + repeated ge.proto.TensorDef tensor = 3; + string savePath = 4; +}; + +message SessionRunGraphResponse { + uint32 status = 1; + repeated ge.proto.TensorDef tensor = 2; +}; + +message SessionBuildGraphResponse { + uint32 status = 1; +}; + +message DestroySession{ + bool final = 1; + uint64 sessionId = 2; +}; + +message DestroySessionResponse { + uint32 status = 1; +}; diff --git a/inc/common/proto/ge_ir.proto b/inc/common/proto/ge_ir.proto new file mode 100644 index 000000000..ab21f6723 --- /dev/null +++ b/inc/common/proto/ge_ir.proto @@ -0,0 +1,190 @@ +syntax = "proto3"; + +package ge.proto; + +enum DataType +{ + DT_UNDEFINED = 0; // Used to indicate a DataType field has not been set. + DT_FLOAT = 1; // float type + DT_FLOAT16 = 2; // fp16 type + DT_INT8 = 3; // int8 type + DT_UINT8 = 4; // uint8 type + DT_INT16 = 5; // int16 type + DT_UINT16 = 6; // uint16 type + DT_INT32 = 7; // + DT_INT64 = 8; // int64 type + DT_UINT32 = 9; // unsigned int32 + DT_UINT64 = 10; // unsigned int64 + DT_BOOL = 11; // bool type + DT_DOUBLE = 12; // double type + DT_STRING = 13; // string type + DT_DUAL_SUB_INT8 = 14; /**< dual output int8 type */ + DT_DUAL_SUB_UINT8 = 15; /**< dual output uint8 type */ + DT_COMPLEX64 = 16; // complex64 type + DT_COMPLEX128 = 17; // complex128 type + DT_QINT8 = 18; // qint8 type + DT_QINT16 = 19; // qint16 type + DT_QINT32 = 20; // qint32 type + DT_QUINT8 = 21; // quint8 type + DT_QUINT16 = 22; // quint16 type + DT_RESOURCE = 23; // resource type + DT_STRING_REF = 24; // string_ref type + DT_DUAL = 25; /**< dual output type */ +} + +message AttrDef +{ + message ListValue + { + enum ListValueType{ + VT_LIST_NONE = 0; + VT_LIST_STRING = 1; + VT_LIST_INT = 2; + VT_LIST_FLOAT = 3; + VT_LIST_BOOL = 4; + VT_LIST_BYTES = 5; + VT_LIST_TENSOR_DESC = 6; + VT_LIST_TENSOR = 7; + VT_LIST_GRAPH = 8; + VT_LIST_NAMED_ATTRS = 9; + VT_LIST_DATA_TYPE = 10; + } + repeated bytes s = 2; // "list(string)" + repeated int64 i = 3; // "list(int)" + repeated float f = 4; // "list(float)" + repeated bool b = 5; // "list(bool)" + repeated bytes bt = 7; + repeated TensorDescriptor td = 8; + repeated TensorDef t = 9; + repeated GraphDef g = 10; + repeated NamedAttrs na = 11; + repeated int64 dt = 12; // list ge::DataType + + ListValueType val_type = 20; + } + + message ListListInt{ + message ListInt{ + repeated int64 list_i = 1; // list int + } + repeated ListInt list_list_i = 1; // list list int + } + + oneof value + { + bytes s = 2; // "string" + int64 i = 3; // "int" + float f = 4; // "float" + bool b = 5; // "bool" + bytes bt = 7; + ListValue list = 1; // any "list(...)" + NamedAttrs func = 10; // Used to support attr nesting + TensorDescriptor td = 11; // GeTensorDesc type + TensorDef t = 12; // GeTensor type + GraphDef g = 13; // Graph type + ListListInt list_list_int = 14; // List List Int type + int64 dt = 15; // ge::DataType + } +} + +// A list of attr names and their values. The whole list is attached +// with a string name. E.g., MatMul[T=float]. +message NamedAttrs +{ + string name = 1; + map attr = 2; +} + +// Shape / dimension description, using row-major order +message ShapeDef +{ + repeated int64 dim = 1; // Size of each dimension +} + +// Multidimensional data description +message TensorDescriptor +{ + string name = 1; // Optional parameter, tensor name + + DataType dtype = 2; // tensor datatype + ShapeDef shape = 3; // Shape / dimension + string layout = 4; // Tensor format, eg: "NCHW", "NHWC", "CHW", "ND" + + bool has_out_attr = 9; + int64 size = 10; + int64 weight_size = 11; + bool reuse_input = 12; + bool output_tensor = 13; + string device_type = 14; + bool input_tensor =15; + int64 real_dim_cnt = 16; + int64 reuse_input_index = 17; + int64 data_offset = 18; + int64 cmps_size = 19; + string cmps_tab = 20; + int64 cmps_tab_offset = 21; + + map attr = 5; // Set of extra parameter fields +} + +// GeTensor definition +message TensorDef +{ + TensorDescriptor desc = 1; // Tensor description + bytes data = 2; // Tensor data +} + + +// Operator description +message OpDef +{ + string name = 1; // name + string type = 2; // type + + repeated string input = 5; // input original op name + outgoing index. op_name:index + + map attr = 10; // Set of operator parameter fields + + bool has_out_attr = 20; + int64 id = 21; + int64 stream_id =22; + repeated string input_name = 23; + repeated string src_name = 24; + repeated int64 src_index = 25; + repeated string dst_name = 26; + repeated int64 dst_index = 27; + repeated int64 input_i = 28; + repeated int64 output_i = 29; + repeated int64 workspace = 30; + repeated int64 workspace_bytes = 31; + repeated bool is_input_const = 32; + repeated TensorDescriptor input_desc = 33; + repeated TensorDescriptor output_desc = 34; + repeated string subgraph_name = 35; +} + +// Graph definition +message GraphDef +{ + string name = 1; // name + + repeated string input = 4; // Graph input + repeated string output = 5; // Graph output + + repeated OpDef op = 6; // List of operators + + map attr = 11; // Extended field +} + +// model definition +message ModelDef +{ + string name = 1; // name + uint32 version = 2; // IR Proto verion + string custom_version = 3; // User model version number, passed in by user + + repeated GraphDef graph = 7; // Graph definition,graph[0] represents the main diagram in modeldef + + map attr = 11; // Extended field +} + diff --git a/inc/common/proto/insert_op.proto b/inc/common/proto/insert_op.proto new file mode 100644 index 000000000..032b5cf43 --- /dev/null +++ b/inc/common/proto/insert_op.proto @@ -0,0 +1,126 @@ +syntax = "proto3"; + +package domi; + +message InsertNewOps { + repeated AippOpParams aipp_op = 1; + repeated MultiShapeOpParams multi_shape_op = 2; +} + +message AippOpParams { + enum InputFormat { + UNDEFINED = 0; + YUV420SP_U8 = 1; + XRGB8888_U8 = 2; + RGB888_U8 = 3; + YUV400_U8 = 4; + NC1HWC0DI_FP16 = 5; + NC1HWC0DI_S8 = 6; + ARGB8888_U8 = 7; + YUYV_U8 = 8; + YUV422SP_U8 = 9; + AYUV444_U8 = 10; + RAW10 = 11; + RAW12 = 12; + RAW16 = 13; + RAW24 = 14; + } + + enum AippMode { + undefined = 0; + static = 1; + dynamic = 2; + } + + // AIPPģʽ־̬AIPPͶ̬AIPP + AippMode aipp_mode = 1; + + // related_input_rankΪΪͣ÷Χ>=0, <=DataӵĸĬֵΪ0 + // ʶģ͵ĵڼAIPPģ룬ҪԵ2AIPPrelated_input_rankΪ1 + uint32 related_input_rank = 2; + + // input_edge_idxΪѡΪͣ÷ΧΪ>=0 + // øòãڶDataӲͬͬAIPPòûãĬ϶related_input_rankָģAIPP + // ֵ <= Dataߵĸ + repeated uint32 input_edge_idx = 3; + + // [Begin] ̬AIPPþ̬AIPPʱЧ + uint32 max_src_image_size = 4; + + // Ƿ֧תĬϲ֧֣֧תʱжĿռʧ + bool support_rotation = 5; + + // [End] ̬AIPP + + + // [Begin] ̬AIPPö̬AIPPʱЧ + InputFormat input_format = 51; + bool csc_switch = 52; + float cpadding_value = 53; + bool rbuv_swap_switch = 54; + bool ax_swap_switch = 55; + bool single_line_mode = 56; + + int32 src_image_size_w = 57; + int32 src_image_size_h = 58; + + bool crop = 59; + int32 load_start_pos_w = 60; + int32 load_start_pos_h = 61; + int32 crop_size_w = 62; + int32 crop_size_h = 63; + + bool resize = 64; + int32 resize_output_w = 65; + int32 resize_output_h = 66; + + bool padding = 67; + int32 left_padding_size = 68; + int32 right_padding_size = 69; + int32 top_padding_size = 70; + int32 bottom_padding_size = 71; + + int32 mean_chn_0 = 10; + int32 mean_chn_1 = 11; + int32 mean_chn_2 = 12; + int32 mean_chn_3 = 19; + float min_chn_0 = 13; + float min_chn_1 = 14; + float min_chn_2 = 15; + float min_chn_3 = 20; + repeated float var_reci_chn_0 = 16; + repeated float var_reci_chn_1 = 17; + repeated float var_reci_chn_2 = 18; + repeated float var_reci_chn_3 = 21; + + repeated int32 matrix_r0c0 = 30; + repeated int32 matrix_r0c1 = 31; + repeated int32 matrix_r0c2 = 32; + repeated int32 matrix_r1c0 = 33; + repeated int32 matrix_r1c1 = 34; + repeated int32 matrix_r1c2 = 35; + repeated int32 matrix_r2c0 = 36; + repeated int32 matrix_r2c1 = 37; + repeated int32 matrix_r2c2 = 38; + repeated int32 output_bias_0 = 39; + repeated int32 output_bias_1 = 40; + repeated int32 output_bias_2 = 41; + repeated int32 input_bias_0 = 42; + repeated int32 input_bias_1 = 43; + repeated int32 input_bias_2 = 44; + + // [End] ̬AIPP +} + +message MultiShapeOpParams { + enum MultiShapeMode { + batch = 0; //̬batch + resolution = 1; //ֱ̬ʣչ + } + + MultiShapeMode mode = 1; //ģʽ + uint32 related_input_rank = 2; //Ӳ뵽ĸ + + + repeated uint32 batch_list = 11; //batch_listֵbatch_listĸ28֮ +} diff --git a/inc/common/proto/om.proto b/inc/common/proto/om.proto new file mode 100644 index 000000000..e15e5f808 --- /dev/null +++ b/inc/common/proto/om.proto @@ -0,0 +1,396 @@ +/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Apache License for more details at + * http://www.apache.org/licenses/LICENSE-2.0 + */ +syntax = "proto3"; + +package domi; + +enum TargetType +{ + MINI = 0; + TINY = 1; + LITE = 2; +} + +// offline model +message ModelDef { + string name = 1; + uint32 version = 2; + + uint64 memory_size = 10; + uint32 stream_num = 11; + uint32 event_num = 12; + uint64 weight_size = 13; + uint32 label_num = 15; + repeated OpDef op = 20; + TargetType target_type = 23; + + map attr = 30; +}; + +// operator define +message OpDef { + string name = 1; + string type = 2; + + uint32 id = 3; + uint32 stream_id = 4; + + repeated string input_name = 5; + + repeated string src_name = 8; + repeated int32 src_index = 9; + repeated int64 input = 10; + repeated int64 output = 11; + repeated TensorDescriptor input_desc = 12; + repeated TensorDescriptor output_desc = 13; + repeated WeightDef weights = 14; + repeated string dst_name = 15; + repeated int32 dst_index = 16; + + repeated int64 workspace = 20; + repeated uint32 workspace_bytes = 21; + + repeated string weight_name = 22; + repeated bool is_input_const = 23; + + map attr = 30; + + QuantizeFactorParams quantize_factor = 31; + + oneof op_params { + // start at 100 here + SendOpParams sender_param = 100; + RecvOpParams receiver_param = 200; + ConvolutionOpParams convolution_param = 300; + PoolingOpParams pooling_param = 400; + EltwiseOpParams eltwise_param = 500; + BatchNormOpParams batchnorm_param = 600; + ScaleOpParams scale_param = 700; + FullConnectionOpParams full_connection_param = 800; + SoftmaxOpParams softmax_param = 900; + ActivationOpParams activation_param = 1000; + ReshapeOpParams reshape_param = 1100; + } +}; + +message SendOpParams { + uint32 event_id = 1; +}; + +message RecvOpParams { + uint32 event_id = 1; +}; + +enum QuantizeScaleType +{ + VECTOR_SCALE = 0; + SCALAR_SCALE = 1; +} + +enum QuantizeScaleMode +{ + NORMAL_MODE = 0; + SQRT_MODE = 1; +} + +enum QuantizeAlgorithm +{ + NON_OFFSET_ALGO = 0; + HALF_OFFSET_ALGO = 1; + ALL_OFFSET_ALGO = 2; +} +message QuantizeFactor +{ + QuantizeScaleMode scale_mode = 1; + bytes scale_value = 2; + int64 scale_offset = 3; + bytes offset_data_value = 4; + int64 offset_data_offset = 5; + bytes offset_weight_value = 6; + int64 offset_weight_offset = 7; + bytes offset_pad_value = 8; + int64 offset_pad_offset = 9; +}; + +message QuantizeCalcFactor +{ + bytes offsetw = 1; + int64 offsetw_offset = 2; + bytes offsetd = 3; + int64 offsetd_offset = 4; + bytes scalereq = 5; + int64 scaledreq_offset = 6; + bytes offsetdnext = 7; + int64 offsetdnext_offset = 8; +} + +message QuantizeFactorParams +{ + QuantizeAlgorithm quantize_algo = 1; + QuantizeScaleType scale_type = 2; + QuantizeFactor quantize_param = 3; + QuantizeFactor dequantize_param = 4; + QuantizeFactor requantize_param = 5; + QuantizeCalcFactor quantizecalc_param = 6; +}; + +message ConvolutionOpParams { + int32 mode = 1; + int32 algo = 2; + int32 pad_mode = 3; + uint32 group = 4; + uint32 num_output = 5; + + repeated uint32 pad = 10; + repeated uint32 stride = 11; + repeated uint32 dilation = 12; + repeated uint32 kernel = 13; + + float alpha = 20; + float beta = 21; + + WeightDef filter = 40; + WeightDef bias = 41; + + bool relu_flag = 62; + repeated uint32 adj = 70; + repeated uint32 target_shape = 71; + repeated uint32 before_pad = 72; +}; + +message PoolingOpParams { + int32 mode = 1; + int32 nan_opt = 2; + int32 pad_mode = 3; + bool global_pooling = 4; + + repeated uint32 window = 10; + repeated uint32 pad = 11; + repeated uint32 stride = 12; + bool ceil_mode = 13; + int32 data_mode = 14; + + float alpha = 20; + float beta = 21; + repeated uint32 before_pad = 22; +}; + +message EltwiseOpParams { + int32 mode = 1; + repeated float coeff = 2; + float alpha = 3; + float beta = 4; + repeated WeightDef weight = 5; + bool relu_flag = 6; +}; + +message ActivationOpParams { + int32 mode = 1; + float coef = 2; + float alpha = 3; + float beta = 4; +}; + +message BatchNormOpParams { + int32 mode = 1; + + float alpha = 2; + float beta = 3; + double epsilon = 4;//optinal,[default = 1e-5] + bool use_global_stats = 5; //optinal,by default true,testing mode + float moving_average_fraction = 6; //optinal,[default = .999]; + + WeightDef estimated_mean = 7; + WeightDef estimated_variance = 8; + + WeightDef scale = 9; + WeightDef bias = 10; +}; + +message ScaleOpParams { + WeightDef scale = 1; + WeightDef bias = 2; +}; + +message ReshapeOpParams { + float alpha = 1; + float beta = 2; + ShapeDef shape = 3; + int32 axis = 4; + int32 num_axes = 5; + int32 format = 6; +}; + +message SoftmaxOpParams { + int32 algo = 1; + int32 mode = 2; + float alpha = 3; + float beta = 4; +}; + +message FullConnectionOpParams { + WeightDef filter = 1; + WeightDef bias = 2; + uint32 num_output = 3; + bool relu_flag = 12; +}; + +message FlattenOpParams { + float alpha = 1; + float beta = 2; + int32 start_axis = 3; + int32 end_axis = 4; +} + +message AddLimitedOpParams { + float alpha = 1; + float beta = 2; + int32 axis = 3; + bool broadcast = 4; + + repeated WeightDef weight = 10; +}; + +message MulLimitedOpParams { + float alpha = 1; + float beta = 2; + int32 axis = 3; + bool broadcast = 4; + + repeated WeightDef weight = 10; +}; + +message AddOpParams { + float alpha = 1; + float beta = 2; + + repeated WeightDef weight = 10; +}; + +message MulOpParams { + float alpha = 1; + float beta = 2; + + repeated WeightDef weight = 10; +}; + +message SubOpParams { + float alpha = 1; + float beta = 2; + + repeated WeightDef weight = 10; +}; + +message BiasAddOpParams { + float alpha = 1; + float beta = 2; + + WeightDef bias = 10; +}; + +message MatMulOpParams { + float alpha = 1; + float beta = 2; + bool transposeX = 3; + bool transposeW = 4; + + WeightDef filter = 10; + WeightDef bias = 12; +}; + +message RsqrtOpParams { + float alpha = 1; + float beta = 2; +}; + + +message WeightDef { + int32 format = 1; + int32 data_type = 2; + ShapeDef shape = 3; + bytes data = 4; + int64 data_offset = 5; + uint32 cmps_size = 6; + bytes cmps_tab = 7; + int64 cmps_tab_offset = 10; + CompressInfo cmps_info = 8; + AllOffsetQuantizeInfo alloffset_quantize_info = 11; +} + +message ShapeDef { + repeated int64 dim = 1; +} + +enum DeviceType { + NPU = 0; // In default, we will use NPU. + CPU = 1; // CPU +} + +message AllOffsetQuantizeInfo { + float scale = 1; + int32 offset = 2; +} + +message TensorDescriptor { + int32 format = 1; + int32 data_type = 2; + repeated int64 dim = 3; + uint32 size = 4; + bool reuse_input = 5; + bool output_tensor = 7; + DeviceType device_type = 8; + bool input_tensor = 9; + uint32 real_dim_cnt = 10; + uint32 reuse_input_index = 11; + AllOffsetQuantizeInfo alloffset_quantize_info = 12; +} + +message CompressInfo { + int32 blockRow = 1; // block row + int32 blockCol = 2; // block col + int32 fractalK = 3; // fractal K + int32 fractalN = 4; // fractal N + int32 lastFractalK = 5; // K of last fractal + int32 lastFractalN = 6; // N of last fractal + int32 cubeSize = 7; // cube's length + int32 loadDir = 8; // data load directtiono 0:col load 1:row load +} + +message AttrDef { + message ListValue { + repeated string s = 2; // "list(string)" + repeated int64 i = 3 [packed = true]; // "list(int)" + repeated float f = 4 [packed = true]; // "list(float)" + repeated bool b = 5 [packed = true]; // "list(bool)" + repeated uint32 u = 6 [packed = true]; // "list(uint)" + repeated bytes bt = 7; + } + + oneof value { + string s = 2; // "string" + int64 i = 3; // "int" + float f = 4; // "float" + bool b = 5; // "bool" + uint32 u = 6; // "uint32" + bytes bt = 7; + ListValue list = 1; // any "list(...)" + NamedAttrs func = 10; + } +} + +// A list of attr names and their values. The whole list is attached +// with a string name. E.g., MatMul[T=float]. +message NamedAttrs { + string name = 1; + map attr = 2; +} + diff --git a/inc/common/proto/op_mapping_info.proto b/inc/common/proto/op_mapping_info.proto new file mode 100644 index 000000000..006cb4665 --- /dev/null +++ b/inc/common/proto/op_mapping_info.proto @@ -0,0 +1,62 @@ +syntax = "proto3"; +package aicpu.dump; + +message Shape { + repeated uint64 dim = 1; +} + +message Output { + int32 data_type = 1; + int32 format = 2; + Shape shape = 3; + uint64 address = 4; + string original_name = 5; + int32 original_output_index = 6; + int32 original_output_data_type = 7; + int32 original_output_format = 8; + uint64 size = 9; +} + +message Input { + int32 data_type =1; + int32 format = 2; + Shape shape = 3; + uint64 address = 4; + uint64 size = 5; +} + +message Op { + string op_name = 1; + string op_type = 2; +} + +message Task { + uint32 task_id = 1; + uint32 stream_id = 2; + Op op = 3; + repeated Output output = 4; + bool end_graph = 5; + repeated Input input = 6; +} + +message OpMappingInfo { + string dump_path = 1; + oneof model_name_param { + string model_name = 2; + } + oneof model_id_param { + uint32 model_id = 3; + } + oneof step_id { + uint64 step_id_addr = 4; + } + oneof iterations_per_loop { + uint64 iterations_per_loop_addr = 5; + } + oneof loop_cond { + uint64 loop_cond_addr = 6; + } + uint32 flag = 7; // 0x01 load, 0x00 unload + repeated Task task = 8; + string dump_step = 9; +} \ No newline at end of file diff --git a/inc/common/proto/optimizer_priority.proto b/inc/common/proto/optimizer_priority.proto new file mode 100644 index 000000000..769619cfc --- /dev/null +++ b/inc/common/proto/optimizer_priority.proto @@ -0,0 +1,7 @@ +syntax = "proto3"; +package ge.optimizers; + +// Default: GE>FE>AICPU +message Priority{ + repeated string optimizer = 1; +} \ No newline at end of file diff --git a/inc/common/proto/task.proto b/inc/common/proto/task.proto new file mode 100644 index 000000000..d0c09840e --- /dev/null +++ b/inc/common/proto/task.proto @@ -0,0 +1,165 @@ +/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Apache License for more details at + * http://www.apache.org/licenses/LICENSE-2.0 + */ +syntax = "proto3"; + +package domi; + +message ModelTaskDef { + string version = 1; + + map attr = 9; // Extended field + repeated TaskDef task = 10; + + uint64 memory_size = 11; + uint32 stream_num = 12; + uint32 event_num = 13; + uint64 weight_size = 14; + + repeated bytes op = 15; // input/output opdef in bytes + + uint64 base_addr = 16; // base addr + uint64 weight_addr = 17; // weight addr + uint32 batch_num = 18; +} + + +message TaskDef { + uint32 id = 1; + uint32 type = 2; + + uint32 stream_id = 10; + uint32 event_id = 11; + + KernelDef kernel = 20; + KernelExDef kernel_ex = 21; + KernelHcclDef kernel_hccl = 25; + EventExDef event_ex = 26; + LogTimeStampDef log_timestamp = 28; + + uint32 label_id = 30; + + MemcpyAsyncDef memcpy_async = 31; + StreamSwitchDef stream_switch = 32; + StreamActiveDef stream_active = 33; + bytes private_def = 34; + uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future + StreamSwitchNDef stream_switch_n = 36; + + LabelSetDef label_set = 37; + LabelGotoExDef label_goto_ex = 38; + LabelSwitchByIndexDef label_switch_by_index = 39; +} + +message KernelDef { + KernelContext context = 1; + + string stub_func = 10; + uint32 block_dim = 11; + uint32 args_size = 12; + bytes args = 13; + bytes sm_desc = 14; + bytes flowtable = 15; + string so_name = 16; + string kernel_name = 17; + bytes kernel_ext_info = 18; + uint32 kernel_ext_info_size = 19; +} + +message KernelContext { + uint32 kernel_type = 1; + uint32 op_id = 2; // OP type in CCE + uint32 kernel_func_id = 3; + uint32 op_index = 4; // TE/Custom operator + bool is_flowtable = 5; // Identify whether args is a flowtable structure + bytes args_offset = 6; // args offset information + uint32 args_count = 7; // args count + repeated uint32 origin_op_index = 8; +} + + +message KernelExDef { + uint32 flags = 1; + + uint32 op_index = 4; + uint32 args_size = 12; + bytes args = 13; + bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput + uint32 task_info_size = 15; + bytes kernel_ext_info = 16; + uint32 kernel_ext_info_size = 17; +} + + +message KernelHcclDef { + uint32 op_index = 8; + string hccl_type = 9; +} + + +message EventExDef { + uint32 op_index = 1; + uint32 event_type = 2; +} + +message LogTimeStampDef { + uint64 logid = 1; + bool notify = 2; + uint32 flat = 3; +} + +message MemcpyAsyncDef { + uint64 dst = 1; + uint64 dst_max = 2; + uint64 src = 3; + uint64 count = 4; + uint32 kind = 5; + uint32 op_index = 6; +} + +message StreamSwitchDef { + uint32 op_index = 1; + uint32 true_stream_id = 2; + int64 value = 3; + uint64 value_ptr = 4; + uint32 data_type = 5; +} + +message StreamActiveDef { + uint32 op_index = 1; + uint32 active_stream_id = 2; +} + +message StreamSwitchNDef { + uint32 op_index = 1; + uint32 size = 2; + repeated int64 target_value = 3; + repeated uint32 true_stream_id = 4; + uint32 element_size = 5; + uint32 data_type = 6; +} + +message LabelSetDef { + uint32 op_index = 1; + uint32 label_id = 2; + uint32 model_id = 3; +} + +message LabelGotoExDef { + uint32 op_index = 1; + uint32 label_id = 2; + uint32 model_id = 3; +} + +message LabelSwitchByIndexDef { + uint32 op_index = 1; + uint32 label_max = 2; +} diff --git a/inc/common/util/ai_core/common/aicore_util_attr_define.h b/inc/common/util/ai_core/common/aicore_util_attr_define.h new file mode 100644 index 000000000..ac1c2c976 --- /dev/null +++ b/inc/common/util/ai_core/common/aicore_util_attr_define.h @@ -0,0 +1,35 @@ +/** + * @file aicore_util_attr_define.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * + * @brief attribute define + * + * @version 1.0 + * + */ +#ifndef INC_COMMON_UTILS_AI_CORE_COMMON_ATTR_DEFINE_H_ +#define INC_COMMON_UTILS_AI_CORE_COMMON_ATTR_DEFINE_H_ + +#include + +namespace fe { +static const std::string SCOPE_ID_ATTR = "fusion_scope"; + +static const std::string FE_IMPLY_TYPE = "_fe_imply_type"; + +static const std::string PARENT_OP_TYPE = "parentOpType"; + +static const std::string ATTR_NAME_TASK_L2_FUSION_INFO_EXTEND_PTR = + "task_l2_fusion_info_extend_content"; + +static const std::string ATTR_DATA_DUMP_REF = "_datadump_ref"; + +static const std::string ATTR_NAME_L2_FUSION_EXTEND_PTR = + "l2_fusion_extend_content"; + +static const std::string L1_OPTIMIZED = "l1_optimized"; + +static const std::string L2_OPTIMIZED = "l2_optimized"; +} // namespace fe +#endif diff --git a/inc/common/util/ai_core/common/aicore_util_types.h b/inc/common/util/ai_core/common/aicore_util_types.h new file mode 100644 index 000000000..25a7979c1 --- /dev/null +++ b/inc/common/util/ai_core/common/aicore_util_types.h @@ -0,0 +1,115 @@ +/** + * @file aicore_util_types.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * + * @brief struct\enuum + * + * @version 1.0 + * + */ + +#ifndef INC_COMMON_UTILS_AI_CORE_COMMON_TYPES_H_ +#define INC_COMMON_UTILS_AI_CORE_COMMON_TYPES_H_ + +#include "graph/anchor.h" +#include "graph/types.h" +#include "runtime/kernel.h" +#include +#include +#include + +namespace fe { +struct FusionOpSrc { + uint32_t src_op_id; + ge::AnchorPtr src_anchor; + int32_t fusion_src_index; + int32_t fusion_dst_index; +}; + +struct FusionOpDst { + uint32_t dst_op_id; + ge::AnchorPtr dst_anchor; +}; + +struct FusionDataFlow { + std::pair edge; + std::pair node_dataindex_pair; +}; + +typedef struct tagL2FusionData { + uint32_t l2Index; + uint64_t l2Addr; + uint64_t l2PageNum; +} L2FusionData_t; +typedef std::map L2FusionDataMap_t; + +typedef struct tagFeSmDesc { + rtL2Ctrl_t l2ctrl; + std::string nodeName[8]; + uint8_t outputIndex[8]; +} feSmDesc_t; + +typedef struct TagTaskL2FusionInfo { + std::string nodeName; + feSmDesc_t l2Info; + L2FusionDataMap_t input; + L2FusionDataMap_t output; + uint32_t isUsed; +} TaskL2FusionInfo_t; + +using L2FusionInfoPtr = std::shared_ptr; + +typedef struct ToOpStruct { + int64_t opL1Space = 0; + std::vector opL1FusionType; + int64_t opL1WorkspaceFlag = 0; // for workspace flag + int64_t opL1WorkspaceSize = 0; + std::vector> validInputShape; + std::vector> validOutputShape; + std::vector> + sliceInputOffset; // conv & pooling & ReadSelect + std::vector> sliceOutputOffset; // WriteSelect + std::vector totalShape; + uint32_t splitIndex = 0; + ToOpStruct() { + // set invalid value for essential variable + opL1Space = -1; + opL1WorkspaceSize = -1; + } +} ToOpStruct_t; + +enum OpImplType { + EN_IMPL_CUSTOM_CONSTANT_CCE = 0, // custom constant op + EN_IMPL_CUSTOM_TIK, // custom tik op + EN_IMPL_CUSTOM_TBE, // custom tbe op + EN_IMPL_HW_CONSTANT_CCE, // Huawei built-in constant op + EN_IMPL_HW_GENERAL_CCE, // Huawei built-in cce op + EN_IMPL_HW_TIK, // Huawei built-in tik op + EN_IMPL_HW_TBE, // Huawei built-in tbe op + EN_IMPL_RL, // RL op + EN_IMPL_PLUGIN_TBE, // Huawei built-in tbe plugin op + EN_IMPL_VECTOR_CORE_HW_TBE, // Huawei built-in tbe op + EN_IMPL_VECTOR_CORE_CUSTOM_TBE, // custom tbe op + EN_IMPL_NON_PERSISTENT_CUSTOM_TBE, // custom tbe op + EN_RESERVED // reserved value +}; + +static const std::map DATATYPE_SIZE_MAP{ + {ge::DT_FLOAT, sizeof(float)}, + {ge::DT_FLOAT16, sizeof(int16_t)}, + {ge::DT_INT8, sizeof(int8_t)}, + {ge::DT_INT32, sizeof(int32_t)}, + {ge::DT_UINT8, sizeof(uint8_t)}, + {ge::DT_UINT32, sizeof(uint32_t)}, + {ge::DT_INT16, sizeof(int16_t)}, + {ge::DT_UINT16, sizeof(uint16_t)}, + {ge::DT_INT64, sizeof(int64_t)}, + {ge::DT_UINT64, sizeof(uint64_t)}, + {ge::DT_DOUBLE, sizeof(double)}, + {ge::DT_BOOL, sizeof(bool)}, + {ge::DT_DUAL, sizeof(float) + sizeof(int8_t)}, + {ge::DT_DUAL_SUB_UINT8, sizeof(int8_t)}, + {ge::DT_DUAL_SUB_INT8, sizeof(int8_t)}}; +} +#endif diff --git a/inc/common/util/ai_core/common/graph_comm.h b/inc/common/util/ai_core/common/graph_comm.h new file mode 100644 index 000000000..47fbad9c7 --- /dev/null +++ b/inc/common/util/ai_core/common/graph_comm.h @@ -0,0 +1,127 @@ +/** + * @file graph_comm.h + * + * Copyright(C), 2017 - 2017, Huawei Tech. Co., Ltd. ALL RIGHTS RESERVED. + * + * @brief graph builder + * + * @version 1.0 + * + */ + +#ifndef INC_COMMON_UTILS_AI_CORE_COMMON_GRAPH_COMMON_H_ +#define INC_COMMON_UTILS_AI_CORE_COMMON_GRAPH_COMMON_H_ + +#include "graph/compute_graph.h" +#include "common/aicore_util_types.h" +#include "register/graph_optimizer/graph_optimize_register_error_codes.h" + +#include +#include +#include +#include + +namespace fe { + +using kScopeNodeMap_t = std::map>; +using kScopeNodePair_t = std::pair>; + +class GraphCommImpl; +using GraphCommImplPtr = std::unique_ptr; + +class GraphComm { +public: + GraphComm(const string &engineName); + virtual ~GraphComm(); + GraphComm(const GraphComm &in) = delete; + GraphComm &operator=(const GraphComm &in) = delete; + + Status CreateFusionGraph(ge::ComputeGraph &modelGraph, + ge::ComputeGraph &fusionGraph, + const bool &isMapOpIndex); + + Status GetscopeNodeMap(ge::ComputeGraph &graph, kScopeNodeMap_t &fusionMap); + + Status CopyFusionOpNodes(vector &fusInputEdgeList, + vector &fusOutputEdgeList, + vector &fusNodelist, + ge::OpDescPtr fusionOpDesc, + ge::ComputeGraphPtr fusionGraph); + + Status CopyFusionOpEdges(ge::OpDescPtr fusionOpDesc, + ge::ComputeGraph &origGraph, + ge::ComputeGraphPtr fusionGraph); + + Status GetNodeDataFlowMap( + const ge::NodePtr &fusNode, + std::map> + &fusionOpAnchorsMap, + ge::kFusionDataFlowVec_t &fusDataflowList, const int &mapType); + + Status GetFusionNodeEdgeList(std::vector &fusNodelist, + std::vector &fusInputEdgeList, + std::vector &fusOutputEdgeList); + void ClearFusionSrc(); + + void ClearFusionDst(); + + void + AddFusionOutputSrc(const uint32_t &src_op_id, const ge::AnchorPtr &src_anchor, + const int32_t &fusion_src_index, + std::pair &node_dataindex_pair); + + void AddFusionInputSrc(const uint32_t &src_op_id, + const ge::AnchorPtr &src_anchor, + const int32_t &fusion_dst_index, + std::pair &node_dataindex_pair); + + void SaveFusionDst(const uint32_t &dst_op_id, ge::AnchorPtr dst_anchor); + + bool IsFusionDstExist(const uint32_t &dst_op_id, + const ge::AnchorPtr &dst_anchor); + + bool GetFusionSrc(const uint32_t &src_op_id, const ge::AnchorPtr &src_anchor, + int32_t &fusion_src_index, int32_t &fusion_dst_index); + + Status + GetFusionNodeCtrlEdgeList(vector &fusNodelist, + vector &fusInputCtrlEdgeList, + vector &fusOutputCtrlEdgeList); + + Status MergeFusionNodeEdgeList(ge::NodePtr &fusNode, + vector &fusNodelist, + vector &fusInputEdgeList, + vector &fusOutputEdgeList); + + Status MergeFusionNodeCtrlEdgeList(ge::NodePtr &fusNode, + vector &fusNodelist, + vector &fusInputEdgeList, + vector &fusOutputEdgeList); + + string GetEngineName(); + +private: + Status + MergeFusionNodeInputEdgeList(ge::NodePtr fusNode, + std::vector &fusNodelist, + std::vector &fusInputEdgeList); + Status + MergeFusionNodeOutputEdgeList(ge::NodePtr fusNode, + std::vector &fusNodelist, + std::vector &fusOutputEdgeList); + + string engineName_; + + std::vector exist_fusion_src_list_; + std::vector exist_fusion_dst_list_; + + // std::vector> + ge::kFusionDataFlowVec_t fusion_input_dataflow_list_; + + // std::vector> + ge::kFusionDataFlowVec_t fusion_output_dataflow_list_; + + GraphCommImplPtr graphCommImplPtr_; +}; +} // namespace fe +#endif diff --git a/inc/common/util/ai_core/common/scope_allocator.h b/inc/common/util/ai_core/common/scope_allocator.h new file mode 100644 index 000000000..50b2ba564 --- /dev/null +++ b/inc/common/util/ai_core/common/scope_allocator.h @@ -0,0 +1,36 @@ +/** + * @file scope_allocator.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * + * @brief tbe fusion scope id allocator + * + * @version 1.0 + * + */ + +#ifndef INC_COMMON_UTILS_AI_CORE_COMMON_SCOPE_ALLOCATOR_H_ +#define INC_COMMON_UTILS_AI_CORE_COMMON_SCOPE_ALLOCATOR_H_ + +#include "graph/op_desc.h" + +namespace fe { +class ScopeAllocator { + public: + ScopeAllocator(); + virtual ~ScopeAllocator(); + ScopeAllocator(const ScopeAllocator& in) = delete; + ScopeAllocator& operator = (const ScopeAllocator& in) = delete; + + public: + void Init(); + int64_t GetCurrentScopeId(); + int64_t AllocateScopeId(void); + bool HasScopeAttr(ge::ConstOpDescPtr opdef); + bool GetScopeAttr(ge::ConstOpDescPtr opdef, int64_t &scopeId); + bool SetScopeAttr(ge::OpDescPtr opdef, int64_t scopeId); + private: + int64_t scopeId; +}; +} // namespace fe +#endif diff --git a/inc/common/util/ai_core/param_calculate/aicore_param_calculator.h b/inc/common/util/ai_core/param_calculate/aicore_param_calculator.h new file mode 100644 index 000000000..0957c6625 --- /dev/null +++ b/inc/common/util/ai_core/param_calculate/aicore_param_calculator.h @@ -0,0 +1,29 @@ +/** + * @file aicore_param_calculator.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved. + * + * @brief aicore param calculator + * + * @version 1.0 + * + */ + +#ifndef AICORE_PARAM_CALCULATOR +#define AICORE_PARAM_CALCULATOR + +#include "graph/node.h" +#include "graph_optimizer/graph_optimize_register_error_codes.h" + +namespace fe { +class AICoreParamCalculator { + public: + AICoreParamCalculator(); + + ~AICoreParamCalculator(); + + Status CalcOpRunningParam(ge::Node &node); + +}; +} // namespace fe +#endif // AICORE_PARAM_CALCULATOR diff --git a/inc/common/util/ai_core/param_calculate/tensorsize_calculator.h b/inc/common/util/ai_core/param_calculate/tensorsize_calculator.h new file mode 100644 index 000000000..d9f8f6872 --- /dev/null +++ b/inc/common/util/ai_core/param_calculate/tensorsize_calculator.h @@ -0,0 +1,42 @@ +/** + * @file tensorsize_calculator.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * + * @brief provide the capability of calculating + * workspace and input/output size + * + * @version 1.0 + * + */ +#ifndef TENSORSIZE_CALCULATOR_H +#define TENSORSIZE_CALCULATOR_H + +#include "graph_optimizer/graph_optimize_register_error_codes.h" + +#include +#include +#include "graph/compute_graph.h" +#include "graph/op_desc.h" + +namespace fe { +class TensorSizeCalculator { + public: + /** + * Calculate the tensor size of input and output of each opdesc + * @param opDesc opdesc object + * @param opImplType op impl type + * @return status SUCCESS or FAILED + */ + static Status CalculateOpTensorSize(ge::OpDesc &opDesc); + + private: + static Status CalcInputOpTensorSize(ge::OpDesc &opDesc, + int32_t &outputRealCalcFlag); + + static Status CalcOutputOpTensorSize(ge::OpDesc &opDesc, + int32_t &outputRealCalcFlag); +}; +} // namespace fe + +#endif // TENSORSIZE_CALCULATOR_H diff --git a/inc/common/util/compress/compress.h b/inc/common/util/compress/compress.h new file mode 100644 index 000000000..7f0c135fb --- /dev/null +++ b/inc/common/util/compress/compress.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Apache License for more details at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * @brief compress header file + * + * @file compress.h + * + * @version 1.0 + */ +#ifndef COMPRESS_H +#define COMPRESS_H + +#include + +enum CmpStatus { + RET_SUCCESS = 0, + RET_ERROR = -1 +}; + +struct CompressConfig { + size_t inputSize; // length of data to compress + size_t engineNum; // how many decompress engines + size_t maxRatio; // how much size of a basic compression block, only 64 supported now (8x: 64 4x: 32) + size_t channel; // channels of L2 or DDR. For load balance + size_t fractalSize; // size of compressing block + bool isTight; // whether compose compressed data tightly + size_t init_offset; +}; + +CmpStatus CompressWeights(char* input, + const CompressConfig& compressConfig, + char* indexs, + char* output, + size_t& compressedLength); + + +#endif // COMPRESS_H diff --git a/inc/common/util/compress/compress_weight.h b/inc/common/util/compress/compress_weight.h new file mode 100644 index 000000000..38c252306 --- /dev/null +++ b/inc/common/util/compress/compress_weight.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. + * + * @brief header file of compress weight + * + * @file compress_weight.h + * + * @version 1.0 + */ +#ifndef COMPRESS_WEIGHT_H +#define COMPRESS_WEIGHT_H + +#include "compress.h" + +const int SHAPE_SIZE_WEIGHT = 4; + +struct CompressOpConfig { + int64_t wShape[SHAPE_SIZE_WEIGHT]; + size_t compressTilingK; + size_t compressTilingN; + struct CompressConfig compressConfig; +}; + +extern "C" CmpStatus CompressWeightsConv2D(const char *const input, + char *const zipBuffer, + char *const infoBuffer, + CompressOpConfig *const param); +#endif // COMPRESS_WEIGHT_H diff --git a/inc/common/util/error_manager/error_manager.h b/inc/common/util/error_manager/error_manager.h new file mode 100644 index 000000000..438e68a74 --- /dev/null +++ b/inc/common/util/error_manager/error_manager.h @@ -0,0 +1,94 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ERROR_MANAGER_H_ +#define ERROR_MANAGER_H_ + +#include +#include +#include + +class ErrorManager { + public: + /// + /// @brief Obtain ErrorManager instance + /// @return ErrorManager instance + /// + static ErrorManager &GetInstance(); + + /// + /// @brief init + /// @param [in] path: current so path + /// @return int 0(success) -1(fail) + /// + int Init(std::string path); + + /// + /// @brief Report error message + /// @param [in] error_code: error code + /// @param [in] args_map: parameter map + /// @return int 0(success) -1(fail) + /// + int ReportErrMessage(std::string error_code, const std::map &args_map); + + /// + /// @brief output error message + /// @param [in] handle: print handle + /// @return int 0(success) -1(fail) + /// + int OutputErrMessage(int handle); + + /// + /// @brief output message + /// @param [in] handle: print handle + /// @return int 0(success) -1(fail) + /// + int OutputMessage(int handle); + + /// + /// @brief Report error message + /// @param [in] key: vector parameter key + /// @param [in] value: vector parameter value + /// + void ATCReportErrMessage(std::string error_code, const std::vector &key = {}, + const std::vector &value = {}); + + private: + struct ErrorInfo { + std::string error_id; + std::string error_message; + std::vector arg_list; + }; + + ErrorManager() {} + ~ErrorManager() {} + + ErrorManager(const ErrorManager &) = delete; + ErrorManager(ErrorManager &&) = delete; + ErrorManager &operator=(const ErrorManager &) = delete; + ErrorManager &operator=(ErrorManager &&) = delete; + + int ParseJsonFile(std::string path); + + int ReadJsonFile(const std::string &file_path, void *handle); + + bool is_init_ = false; + std::map error_map_; + std::vector error_messages_; + std::vector warning_messages_; +}; + +#endif // ERROR_MANAGER_H_ diff --git a/inc/common/util/platform_info.h b/inc/common/util/platform_info.h new file mode 100644 index 000000000..40ee762af --- /dev/null +++ b/inc/common/util/platform_info.h @@ -0,0 +1,115 @@ +/* + + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved. + + * Description: platform info init + + * Author: + * Create: 2020-03-22 + + */ + +#ifndef PLATFORM_INFO_H +#define PLATFORM_INFO_H + +#include +#include +#include +#include "platform_info_def.h" + +using std::map; +using std::vector; +using std::string; + +namespace fe { +class PlatformInfoManager { + public: + PlatformInfoManager(const PlatformInfoManager &) = delete; + PlatformInfoManager &operator=(const PlatformInfoManager &) = delete; + + static PlatformInfoManager &Instance(); + uint32_t InitializePlatformInfo(); + uint32_t Finalize(); + + uint32_t GetPlatformInfo(const string SoCVersion, PlatformInfo &platformInfo, + OptionalInfo &optiCompilationInfo); + + uint32_t GetPlatformInfoWithOutSocVersion(PlatformInfo &platformInfo, + OptionalInfo &optiCompilationInfo); + + void SetOptionalCompilationInfo(OptionalInfo &optiCompilationInfo); + + private: + PlatformInfoManager(); + ~PlatformInfoManager(); + + uint32_t LoadIniFile(string iniFileRealPath); + + void Trim(string &str); + + uint32_t LoadConfigFile(string realPath); + + string RealPath(const std::string &path); + + string GetSoFilePath(); + + void ParseVersion(map &versionMap, string &socVersion, + PlatformInfo &platformInfoTemp); + + void ParseSocInfo(map &socInfoMap, + PlatformInfo &platformInfoTemp); + + void ParseCubeOfAICoreSpec(map &aiCoreSpecMap, + PlatformInfo &platformInfoTemp); + + void ParseBufferOfAICoreSpec(map &aiCoreSpecMap, + PlatformInfo &platformInfoTemp); + + void ParseUBOfAICoreSpec(map &aiCoreSpecMap, + PlatformInfo &platformInfoTemp); + + void ParseUnzipOfAICoreSpec(map &aiCoreSpecMap, + PlatformInfo &platformInfoTemp); + + void ParseAICoreSpec(map &aiCoreSpecMap, + PlatformInfo &platformInfoTemp); + + void ParseBufferOfAICoreMemoryRates(map &aiCoreMemoryRatesMap, + PlatformInfo &platformInfoTemp); + + void ParseAICoreMemoryRates(map &aiCoreMemoryRatesMap, + PlatformInfo &platformInfoTemp); + + void ParseUBOfAICoreMemoryRates(map &aiCoreMemoryRatesMap, + PlatformInfo &platformInfoTemp); + + void ParseAICoreintrinsicDtypeMap( + map &aiCoreintrinsicDtypeMap, + PlatformInfo &platformInfoTemp); + + void ParseVectorCoreSpec(map &vectorCoreSpecMap, + PlatformInfo &platformInfoTemp); + + void ParseVectorCoreMemoryRates(map &vectorCoreMemoryRatesMap, + PlatformInfo &platformInfoTemp); + + void ParseCPUCache(map &CPUCacheMap, + PlatformInfo &platformInfoTemp); + + void ParseVectorCoreintrinsicDtypeMap( + map &vectorCoreintrinsicDtypeMap, + PlatformInfo &platformInfoTemp); + + uint32_t ParsePlatformInfoFromStrToStruct( + map> &contentInfoMap, string &socVersion, + PlatformInfo &platformInfoTemp); + + uint32_t AssemblePlatformInfoVector( + map> &contentInfoMap); + private: + bool initFlag_; + map platformInfoMap_; + OptionalInfo optiCompilationInfo_; +}; +} // namespace fe +#endif diff --git a/inc/common/util/platform_info_def.h b/inc/common/util/platform_info_def.h new file mode 100644 index 000000000..7824c75ba --- /dev/null +++ b/inc/common/util/platform_info_def.h @@ -0,0 +1,144 @@ +/* + + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved. + + * Description: platform info init + + * Author: + * Create: 2020-03-22 + + */ + +#ifndef PLATFORM_INFO_DEF_H +#define PLATFORM_INFO_DEF_H + +#include +#include +#include + +using std::map; +using std::vector; +using std::string; + +namespace fe +{ +enum MemoryType { + DDR = 0, + HBM +}; + +enum L2Type { + Cache = 0, + Buff +}; + +typedef struct tagStrInfo { + string aicVersion; + string ccecAICVersion; + string ccecAIVVersion; + string isSupportAIcpuCompiler; +} StrInfo; + +typedef struct tagSoCInfo { + uint32_t aiCoreCnt; + uint32_t vectorCoreCnt; + uint32_t aiCpuCnt; + MemoryType memoryType; + uint64_t memorySize; + L2Type l2Type; + uint64_t l2Size; + uint32_t l2PageNum; +} SoCInfo; + +typedef struct tagAiCoreSpec { + double cubeFreq; + uint64_t cubeMSize; + uint64_t cubeNSize; + uint64_t cubeKSize; + uint64_t vecCalcSize; + uint64_t l0ASize; + uint64_t l0BSize; + uint64_t l0CSize; + uint64_t l1Size; + uint64_t smaskBuffer; + uint64_t ubSize; + uint64_t ubblockSize; + uint64_t ubbankSize; + uint64_t ubbankNum; + uint64_t ubburstInOneBlock; + uint64_t ubbankGroupNum; + uint32_t unzipEngines; + uint32_t unzipMaxRatios; + uint32_t unzipChannels; + uint8_t unzipIsTight; +} AiCoreSpec; + +typedef struct tagAiCoreMemoryRates { + double ddrRate; + double ddrReadRate; + double ddrWriteRate; + double l2Rate; + double l2ReadRate; + double l2WriteRate; + double l1ToL0ARate; + double l1ToL0BRate; + double l1ToUBRate; + double l0CToUBRate; + double ubToL2Rate; + double ubToDdrRate; + double ubToL1Rate; +} AiCoreMemoryRates; + +typedef struct tagVectorCoreSpec { + double vecFreq; + uint64_t vecCalcSize; + uint64_t smaskBuffer; + uint64_t ubSize; + uint64_t ubblockSize; + uint64_t ubbankSize; + uint64_t ubbankNum; + uint64_t ubburstInOneBlock; + uint64_t ubbankGroupNum; + uint64_t vectorRegSize; + uint64_t predicateRegSize; + uint64_t addressRegSize; +} VectorCoreSpec; + +typedef struct tagVectorCoreMemoryRates { + double ddrRate; + double ddrReadRate; + double ddrWriteRate; + double l2Rate; + double l2ReadRate; + double l2WriteRate; + double ubToL2Rate; + double ubToDdrRate; +} VectorCoreMemoryRates; + +typedef struct tagCPUCache { + uint32_t AICPUSyncBySW; + uint32_t TSCPUSyncBySW; +} CPUCache; + +typedef struct tagPlatformInfo +{ + StrInfo strInfo; + SoCInfo socInfo; + AiCoreSpec aiCoreSpec; + AiCoreMemoryRates aiCoreMemoryRates; + map> aiCoreIntrinsicDtypeMap; + VectorCoreSpec vectorCoreSpec; + VectorCoreMemoryRates vectorCoreMemoryRates; + CPUCache cpucache; + map> vectorCoreIntrinsicDtypeMap; +} PlatformInfo; + +typedef struct tagOptionalInfo +{ + string socVersion; + string coreType; + uint32_t aiCoreNum; + string l1FusionFlag; +} OptionalInfo; +} // namespace fe +#endif diff --git a/inc/external/ge/ge_api.h b/inc/external/ge/ge_api.h new file mode 100644 index 000000000..e711bf61a --- /dev/null +++ b/inc/external/ge/ge_api.h @@ -0,0 +1,121 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_GE_GE_API_H_ +#define INC_EXTERNAL_GE_GE_API_H_ + +#include +#include +#include + +#include "ge/ge_api_error_codes.h" +#include "ge/ge_api_types.h" +#include "graph/graph.h" +#include "graph/tensor.h" + +namespace ge { +typedef uint32_t (*pCallBackFunc)(uint32_t graph_id, const std::map ¶ms_list); + +// Initialize GE +Status GEInitialize(const std::map &options); + +// Finalize GE, release all resources +Status GEFinalize(); + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Session { + public: + explicit Session(const std::map &options); + + ~Session(); + + /// + /// @ingroup client + /// @brief add a graph with a specific graphId + /// @param [in] graphId graph id + /// @return Status result of function + /// + Status AddGraph(uint32_t graphId, const Graph &graph); + + /// + /// @ingroup client + /// @brief add a graph with a specific graphId and graphOptions + /// @param [in] graphId graph id + /// @param [in] graph the graph + /// @param [in] options graph options + /// @return Status result of function + /// + Status AddGraph(uint32_t graphId, const Graph& graph, const std::map& options); + + /// + /// @ingroup ge_graph + /// @brief remove a graph of the session with specific session id + /// @param [in] graphId graph id + /// @return Status result of function + /// + Status RemoveGraph(uint32_t graphId); + + /// + /// @ingroup ge_graph + /// @brief run a graph of the session with specific session id + /// @param [in] graphId graph id + /// @param [in] inputs input data + /// @param [out] outputs output data + /// @return Status result of function + /// + Status RunGraph(uint32_t graphId, const std::vector &inputs, std::vector &outputs); + + /// + /// @ingroup ge_graph + /// @brief build graph in the session with specific session id + /// @param [in] graphId: graph id + /// @param [in] inputs: input data + /// @return Status result of function + /// + Status BuildGraph(uint32_t graphId, const std::vector &inputs); + + /// + /// @ingroup ge_graph + /// @brief run graph in the session with specific session id asynchronously + /// @param [in] graphId: graph id + /// @param [in] inputs: input data + /// @param [out] callback: callback while runing graph has been finished. + /// The callback function will not be checked. + /// Please ensure that the implementation of the function is trusted. + /// @return Status result of function + /// + Status RunGraphAsync(uint32_t graphId, const std::vector &inputs, RunAsyncCallback callback); + + /// + /// @ingroup ge_graph + /// @brief register callback func with specific summary or checkpoint by users + /// @param [in] key: func key + /// @param [in] callback: callback specific summary or checkpoint. + /// The callback function will not be checked. + /// Please ensure that the implementation of the function is trusted. + /// @return Status result of function + /// + Status RegisterCallBackFunc(const std::string &key, const pCallBackFunc &callback); + + bool IsGraphNeedRebuild(uint32_t graphId); + + std::map GetAllVariable(); + + private: + uint64_t sessionId_; +}; +} // namespace ge + +#endif // INC_EXTERNAL_GE_GE_API_H_ diff --git a/inc/external/ge/ge_api_error_codes.h b/inc/external/ge/ge_api_error_codes.h new file mode 100644 index 000000000..7b045d548 --- /dev/null +++ b/inc/external/ge/ge_api_error_codes.h @@ -0,0 +1,76 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_GE_GE_API_ERROR_CODES_H_ +#define INC_EXTERNAL_GE_GE_API_ERROR_CODES_H_ + +#include +#include + +namespace ge { +class StatusFactory { + public: + static StatusFactory *Instance() { + static StatusFactory instance; + return &instance; + } + + void RegisterErrorNo(uint32_t err, const std::string &desc) { + // Avoid repeated addition + if (err_desc_.find(err) != err_desc_.end()) { + return; + } + err_desc_[err] = desc; + } + + std::string GetErrDesc(uint32_t err) { + auto iter_find = err_desc_.find(err); + if (iter_find == err_desc_.end()) { + return ""; + } + return iter_find->second; + } + + protected: + StatusFactory() {} + ~StatusFactory() {} + + private: + std::map err_desc_; +}; + +class ErrorNoRegisterar { + public: + ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } + ~ErrorNoRegisterar() {} +}; + +// Code compose(4 byte), runtime: 2 bit, type: 2 bit, level: 3 bit, sysid: 8 bit, modid: 5 bit, value: 12 bit +#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc) \ + constexpr ge::Status name = \ + ((0xFF & (static_cast(runtime))) << 30) | ((0xFF & (static_cast(type))) << 28) | \ + ((0xFF & (static_cast(level))) << 25) | ((0xFF & (static_cast(sysid))) << 17) | \ + ((0xFF & (static_cast(modid))) << 12) | (0x0FFF & (static_cast(value))); \ + const ErrorNoRegisterar g_##name##_errorno(name, desc); + +using Status = uint32_t; + +// General error code +GE_ERRORNO(0, 0, 0, 0, 0, SUCCESS, 0, "success"); +GE_ERRORNO(0b11, 0b11, 0b111, 0xFF, 0b11111, FAILED, 0xFFF, "failed"); /*lint !e401*/ +} // namespace ge + +#endif // INC_EXTERNAL_GE_GE_API_ERROR_CODES_H_ diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h new file mode 100644 index 000000000..e20418475 --- /dev/null +++ b/inc/external/ge/ge_api_types.h @@ -0,0 +1,335 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_GE_GE_API_TYPES_H_ +#define INC_EXTERNAL_GE_GE_API_TYPES_H_ + +#include +#include +#include +#include +#include +#include + +namespace ge { +// Option key: graph run mode +const char *const OPTION_GRAPH_RUN_MODE = "ge.graphRunMode"; + +// Option key: ome init +const char *const OPTION_EXEC_SESSION_ID = "ge.exec.sessionId"; +const char *const OPTION_EXEC_DEVICE_ID = "ge.exec.deviceId"; +const char *const OPTION_EXEC_JOB_ID = "ge.exec.jobId"; +const char *const OPTION_EXEC_IS_USEHCOM = "ge.exec.isUseHcom"; +const char *const OPTION_EXEC_IS_USEHVD = "ge.exec.isUseHvd"; +const char *const OPTION_EXEC_RANK_ID = "ge.exec.rankId"; +const char *const OPTION_EXEC_POD_NAME = "ge.exec.podName"; +const char *const OPTION_EXEC_DEPLOY_MODE = "ge.exec.deployMode"; +const char *const OPTION_EXEC_RANK_TABLE_FILE = "ge.exec.rankTableFile"; +const char *const GE_AICPU_FLAG = "ge.aicpuFlag"; +const char *const OPTION_EXEC_EXTERN_PLUGIN_PATH = "ge.soLoadPath"; +const char *const OPTION_EXEC_ENABLE_DUMP = "ge.exec.enableDump"; +const char *const OPTION_EXEC_DUMP_PATH = "ge.exec.dumpPath"; +const char *const OPTION_EXEC_DUMP_STEP = "ge.exec.dumpStep"; +const char *const OPTION_EXEC_DUMP_MODE = "ge.exec.dumpMode"; +const char *const OPTION_EXEC_ENABLE_DUMP_DEBUG = "ge.exec.enableDumpDebug"; +const char *const OPTION_EXEC_DUMP_DEBUG_MODE = "ge.exec.dumpDebugMode"; +const char *const OPTION_EXEC_ENABLE_INCRE_BUILD = "ge.exec.enableIncreBuild"; +const char *const OPTION_EXEC_INCRE_BUILD_CACHE_PATH = "ge.exec.increBuildCachePath"; +const char *const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES = "ge.exec.enableScopeFusionPasses"; +// profiling flag +const char *const OPTION_EXEC_PROFILING_MODE = "ge.exec.profilingMode"; +const char *const OPTION_EXEC_PROFILING_OPTIONS = "ge.exec.profilingOptions"; +// Hccl flag, if ge.exec.hcclFlag =1, it means load plugin for opskernel, else:ge.exec.hcclFlag =0 +const char *const OPTION_EXEC_HCCL_FLAG = "ge.exec.hcclFlag"; +const char *const OPTION_EXEC_ATOMIC_FLAG = "ge.exec.enable_atomic"; +const char *const OPTION_EXEC_DISABLE_REUSED_MEMORY = "ge.exec.disableReuseMemory"; +const char *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOptimization"; + +// Option key: memory init +const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; +const char *const VARIABLE_MEMORY_MAX_SIZE = "ge.variableMemoryMaxSize"; + +// Configure stream num by Session constructor options param, +// its value should be int32_t type, default value is "1" +const std::string STREAM_NUM = "ge.streamNum"; + +// Configure add head stream to model. +// its value should be "0" or "1", default value is "0" +const std::string HEAD_STREAM = "ge.headStream"; + +// Configure perf level by Session constructor options param, +// its value please see enum PerfLevel, default value is "4" +const std::string PERF_LEVEL = "ge.perfLevel"; + +// Configure encrypt mode by Session constructor options param, +// its value should be int32_t type, default value is "-1" +const std::string ENCRYPT_MODE = "ge.encryptMode"; + +// configure ek file by Session constructor options param, +// its value should be file path, default value is "" +const std::string EK_FILE = "ge.ekFile"; + +// Configure cert file by Session constructor options param, +// its value should be file path, default value is "" +const std::string CERT_FILE = "ge.certFile"; + +// Configure hw key file by Session constructor options param, +// its value should be file path, default value is "" +const std::string HW_KEY_FILE = "ge.hwKeyFile"; + +// Configure private file by Session constructor options param, +// its value should be file path, default value is "" +const std::string PRIVATE_KEY_FILE = "ge.privateKeyFile"; + +// Configure framework type by Session constructor options param, +// its value please see enum FrameworkType, default value is "3" +const std::string FRAMEWORK_TYPE = "ge.frameworkType"; + +// Configure calibration info file by Session constructor options param, +// its value should be file path, default value is "" +const std::string CALIBRATION_CONF_FILE = "ge.calibrationConfFile"; + +// Configure insert op info file by Session constructor options param, +// its value should be file path, default value is "" +const std::string INSERT_OP_FILE = "ge.insertOpFile"; + +// Configure output node name by Session constructor options param, +// its value should be std::string type, default value is "" +const std::string OUTPUT_NODE_NAME = "ge.outputNodeName"; + +// Configure weight compress flag by Session constructor options param, +// its value should be "0" or "1", default value is "0" +const std::string COMPRESS_FLAG = "ge.compressFlag"; + +const std::string PRECISION_MODE = "ge.exec.precision_mode"; + +// Configure single op flag for FE +// its value should be "0" or "1", default value is "0" +const std::string SINGLE_OP_FLAG = "ge.exec.single_op"; + +// Configure train flag by Session constructor options param, +// its value should be "0" or "1", default value is "0" +const std::string TRAIN_FLAG = "ge.trainFlag"; + +// Configure run flag by Session constructor options param, +// its value should be "0" or "1", default value is "0" +const std::string RUN_FLAG = "ge.runFlag"; + +// Configure run flag by Session constructor options param, +// its value should be "0" or "1", default value is "0" +// this option is to enable local framework op feature +const std::string LOCAL_FMKOP_FLAG = "ge.enabledLocalFmkop"; + +// Configure run flag by Session constructor options param, +// its value should be a path +// this option is to obtain the TBE op plugin path +const std::string TBE_PLUGIN_PATH_FLAG = "ge.TBE_plugin_path"; + +// Configure run flag by Session constructor options param, +// its value should be a path +// this option is to obtain the DDK Version info +const std::string DDK_VERSION_FLAG = "ge.DDK_version"; + +// Configure run flag by Session constructor options param, +// its value should be a path +// this option is to obtain fe flag +const std::string GE_FE_FLAG = "ge.feFlag"; + +// Configure stream max parallel num only by Session constructor options param, +// its value should be stream:int, such as "DNN_V100:2,DNN_HCCL:3", +// default value is "1", such as "DNN_V100:1,DNN_HCCL:1" +// this option is to obtain stream max parallel num +const std::string STREAM_MAX_PARALLEL_NUM = "ge.streamMaxParallelNum"; + +// congigure outputDatatype to setting net output type +const std::string OUTPUT_DATATYPE = "ge.outputDatatype"; + +// congigure opSelectImplmode to setting op select implmode +const std::string OP_SELECT_IMPL_MODE = "ge.opSelectImplmode"; + +// congigure optypelist_for_implmode to setting which op use implmode +const std::string OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode"; + +// configure whether to enable hcom parallel by session constructor options param, +// its value should be "0" or "1", default value is "0" +const std::string HCOM_PARALLEL = "ge.hcomParallel"; + +// configure whether to use dynamic batch size +const char *const kDynamicBatchSize = "ge.dynamicBatchSize"; + +// configure whether to use dynamic image size +const char *const kDynamicImageSize = "ge.dynamicImageSize"; + +// Configure whether to use dynamic dims +const char *const kDynamicDims = "ge.dynamicDims"; + +// Configure auto tune mode, this option only take effect while AUTO_TUNE_FLAG is Y, +// example: GA|RL, support configure multiple, split by | +const std::string AUTO_TUNE_MODE = "ge.autoTuneMode"; + +// Configure soc version , example: "Ascend310" +const std::string SOC_VERSION = "ge.socVersion"; + +// Configure core type "VectorEngine", default value is "AIcoreEngine" +const std::string CORE_TYPE = "ge.engineType"; + +// Configure AICORE NUM +const std::string AICORE_NUM = "ge.aicoreNum"; + +// Configure L1FUSION +const std::string L1_FUSION = "ge.l1Fusion"; + +// Configure l1,l2,and others optimize option +const std::string BUFFER_OPTIMIZE = "ge.bufferOptimize"; + +// Configure Small Channel flag +const std::string ENABLE_SMALL_CHANNEL = "ge.enableSmallChannel"; + +// Configure Compress Weight flag +const std::string ENABLE_COMPRESS_WEIGHT = "ge.enableCompressWeight"; + +// Configure fusion switch file path +const std::string FUSION_SWITCH_FILE = "ge.fusionSwitchFile"; + +// Save original model +const std::string SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel"; + +// Save original model file name +const std::string ORIGINAL_MODEL_FILE = "ge.originalModelFile"; + +const char *const OPTION_GE_MAX_DUMP_FILE_NUM = "ge.maxDumpFileNum"; +const char *const OPTION_GE_MAX_DUMP_FILE_SIZE = "ge.maxDumpFileSize"; +const char *const OPTION_GE_MAX_DUMP_OP_NUM = "ge.maxDumpOpNum"; + +// Configure for print op pass +// Its value should be "0" or "1", default value is "1" +const char *const ENABLE_PRINT_OP_PASS = "ge.enablePrintOpPass"; + +// Configure whether to use single stream. +// Its value should be "true" or "false", default value is "false" +const char *const ENABLE_SINGLE_STREAM = "ge.enableSingleStream"; + +// Configure input fp16 nodes +const std::string INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16"; + +// Configure debug level, its value should be 0(default), 1 or 2. +// 0: close debug; 1: open TBE compiler; 2: open ccec compiler +const std::string OP_DEBUG_LEVEL = "ge.opDebugLevel"; + +// Graph run mode +enum GraphRunMode { PREDICTION = 0, TRAIN }; + +// Input/Output tensor info +struct InputTensorInfo { + uint32_t data_type; // data type + std::vector dims; // shape description + void *data; // tensor data + int64_t length; // tensor length +}; + +struct OutputTensorInfo { + uint32_t data_type; // data type + std::vector dims; // shape description + std::unique_ptr data; // tensor data + int64_t length; // tensor length + OutputTensorInfo() : data_type(0), dims({}), data(nullptr), length(0) {} + OutputTensorInfo(OutputTensorInfo &&out) : + data_type(out.data_type), + dims(out.dims), + data(std::move(out.data)), + length(out.length) {} + + OutputTensorInfo &operator=(OutputTensorInfo &&out) { + if (this != &out) { + data_type = out.data_type; + dims = out.dims; + data = std::move(out.data); + length = out.length; + } + return *this; + } + OutputTensorInfo(const OutputTensorInfo &) = delete; + OutputTensorInfo &operator=(const OutputTensorInfo &) = delete; +}; + +using Status = uint32_t; +using RunAsyncCallback = std::function &)>; +// for ir build +namespace ir_option { + static const char *const INPUT_FORMAT = "input_format"; + static const char *const INPUT_SHAPE = "input_shape"; + static const char *const OP_NAME_MAP = "op_name_map"; + static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize; + static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize; + static const char *const DYNAMIC_DIMS = kDynamicDims; + static const char *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str(); + static const char *const PRECISION_MODE = ge::PRECISION_MODE.c_str(); + static const char *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY; + static const char *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str(); + static const char *const CORE_TYPE = ge::CORE_TYPE.c_str(); + static const char *const SOC_VERSION = ge::SOC_VERSION.c_str(); + static const char *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM; + static const char *const AICORE_NUM = ge::AICORE_NUM.c_str(); + static const char *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str(); + static const char *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str(); + static const char *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str(); + static const char *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str(); + static const char *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str(); + static const char *const ENABLE_COMPRESS_WEIGHT = ge::ENABLE_COMPRESS_WEIGHT.c_str(); + static const char *const COMPRESS_WEIGHT_CONF = "compress_weight_conf"; + static const char *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str(); + static const char *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str(); + static const char *const LOG_LEVEL = "log"; + static const char *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c_str(); + + // for interface: aclgrphBuildModel + const std::set ir_builder_suppported_options = { + INPUT_FORMAT, + INPUT_SHAPE, + OP_NAME_MAP, + DYNAMIC_BATCH_SIZE, + DYNAMIC_IMAGE_SIZE, + DYNAMIC_DIMS, + INSERT_OP_FILE, + PRECISION_MODE, + EXEC_DISABLE_REUSED_MEMORY, + AUTO_TUNE_MODE, + OUTPUT_TYPE, + OUT_NODES, + INPUT_FP16_NODES, + LOG_LEVEL + }; + // for interface: aclgrphBuildInitialize + const std::set global_options = { + CORE_TYPE, + SOC_VERSION, + BUFFER_OPTIMIZE, + ENABLE_COMPRESS_WEIGHT, + COMPRESS_WEIGHT_CONF, + PRECISION_MODE, + EXEC_DISABLE_REUSED_MEMORY, + AUTO_TUNE_MODE, + ENABLE_SINGLE_STREAM, + AICORE_NUM, + FUSION_SWITCH_FILE, + ENABLE_SMALL_CHANNEL, + OP_SELECT_IMPL_MODE, + OPTYPELIST_FOR_IMPLMODE + }; +} +} // namespace ge + +#endif // INC_EXTERNAL_GE_GE_API_TYPES_H_ diff --git a/inc/external/ge/ge_ir_build.h b/inc/external/ge/ge_ir_build.h new file mode 100644 index 000000000..dd48687f6 --- /dev/null +++ b/inc/external/ge/ge_ir_build.h @@ -0,0 +1,93 @@ +/** +* Copyright 2020 Huawei Technologies Co., Ltd + +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at + +* http://www.apache.org/licenses/LICENSE-2.0 + +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#ifndef INC_EXTERNAL_GE_IR_BUILD_H_ +#define INC_EXTERNAL_GE_IR_BUILD_H_ + +#include +#include +#include +#include "graph/graph.h" +#include "graph/ge_error_codes.h" + +namespace { +#define IR_MAJOR_VERSION (int(1)) +#define IR_MINOR_VERSION (int(0)) +#define IR_PATCH_VERSION (int(0)) +} + +namespace ge{ + +struct ModelBufferData +{ + std::shared_ptr data = nullptr; + uint64_t length; +}; + +/** + * @ingroup AscendCL + * @brief build model.Notice the model is stored in buffer + * + * @param global_options[IN] global init params for build + * @retval GRAPH_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +graphStatus aclgrphBuildInitialize(std::map global_options); + +/** + * @ingroup AscendCL + * @brief build model.Notice the model is stored in buffer + * + */ +void aclgrphBuildFinalize(); + +/** + * @ingroup AscendCL + * @brief build model.Notice the model is stored in buffer + * + * @param graph[IN] the graph ready to build + * @param options[IN] options used for build + * @param model[OUT] builded model + * @retval GRAPH_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map &build_options, ModelBufferData& model); + +/** + * @ingroup AscendCL + * @brief save model buffer to file + * + * @param output_file[IN] the file path to be saved + * @param model[IN] model buffer data + * @retval GRAPH_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData& model); + +/** + * @ingroup AscendCL + * @brief query IR interface version + * + * @param major_version[OUT] IR interface major version + * @param minor_version[OUT] IR interface minor version + * @param patch_version[OUT] IR interface patch version + * @retval GRAPH_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *patch_version); + +}; // INC_EXTERNAL_GE_IR_BUILD_H_ +#endif diff --git a/inc/external/graph/attr_value.h b/inc/external/graph/attr_value.h new file mode 100644 index 000000000..af430f9b6 --- /dev/null +++ b/inc/external/graph/attr_value.h @@ -0,0 +1,75 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_GRAPH_ATTR_VALUE_H_ +#define INC_EXTERNAL_GRAPH_ATTR_VALUE_H_ + +#include +#include +#include +#include + +#include "./ge_error_codes.h" + +using std::make_shared; +using std::map; +using std::pair; +using std::string; +using std::to_string; +using std::unique_ptr; +using std::vector; + +namespace ge { +class AttrValueImpl; +/*lint -e148*/ +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY AttrValue { + public: + using INT = int64_t; + using FLOAT = float; + using STR = std::string; + + AttrValue(); + ~AttrValue() = default; + + // GetValue, not list type + template + graphStatus GetValue(DT &val) const { + T valGet; + auto status = GetValue(valGet); + if (status != GRAPH_SUCCESS) { + return status; + } + val = DT(valGet); + return GRAPH_SUCCESS; + } + + template + static T CreateFrom(DT &&val) { + return val; + } + + std::shared_ptr impl; + + private: +#define VALUE_SET_GET_DEC(DT) graphStatus GetValue(DT &val) const; + VALUE_SET_GET_DEC(AttrValue::STR) + VALUE_SET_GET_DEC(AttrValue::INT) + VALUE_SET_GET_DEC(AttrValue::FLOAT) +#undef VALUE_SET_GET_DEC +}; +/*lint +e148*/ +} // namespace ge +#endif // INC_EXTERNAL_GRAPH_ATTR_VALUE_H_ diff --git a/inc/external/graph/ge_error_codes.h b/inc/external/graph/ge_error_codes.h new file mode 100644 index 000000000..d815a22dc --- /dev/null +++ b/inc/external/graph/ge_error_codes.h @@ -0,0 +1,38 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_GRAPH_GE_ERROR_CODES_H_ +#define INC_EXTERNAL_GRAPH_GE_ERROR_CODES_H_ + +namespace ge { +#ifdef HOST_VISIBILITY +#define GE_FUNC_HOST_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_HOST_VISIBILITY +#endif +#ifdef DEV_VISIBILITY +#define GE_FUNC_DEV_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_DEV_VISIBILITY +#endif + +using graphStatus = uint32_t; +const graphStatus GRAPH_FAILED = 0xFFFFFFFF; +const graphStatus GRAPH_SUCCESS = 0; +const graphStatus GRAPH_PARAM_INVALID = 50331649; +} // namespace ge + +#endif // INC_EXTERNAL_GRAPH_GE_ERROR_CODES_H_ diff --git a/inc/external/graph/graph.h b/inc/external/graph/graph.h new file mode 100644 index 000000000..308867333 --- /dev/null +++ b/inc/external/graph/graph.h @@ -0,0 +1,81 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_GRAPH_GRAPH_H_ +#define INC_EXTERNAL_GRAPH_GRAPH_H_ + +#include +#include +#include +#include + +#include "./operator.h" + +namespace ge { +class GraphImpl; + +using GraphImplPtr = std::shared_ptr; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Graph { + friend class GraphUtils; + + public: + explicit Graph(const std::string &name); + + Graph() = default; + + ~Graph() = default; + + Graph &SetInputs(const std::vector &inputs); + + Graph &SetOutputs(const std::vector &outputs); + + Graph &SetOutputs(const std::vector>> &output_indexs); + + Graph &SetOutputs(const std::vector> &outputs); + + Graph &SetTargets(const std::vector &targets); + + bool IsValid() const; + + graphStatus AddOp(const ge::Operator &op); + + graphStatus FindOpByName(const string &name, ge::Operator &op) const; + + graphStatus FindOpByType(const string &type, std::vector &ops) const; + + graphStatus GetAllOpName(std::vector &op_name) const; + + graphStatus SaveToFile(const string &file_name) const; + + graphStatus LoadFromFile(const string &file_name); + + const std::string &GetName() const; + + /// + /// Set is need train iteration. + /// If set true, it means this graph need to be run iteration some + /// times(according variant "npu_runconfig/iterations_per_loop"). + /// @param need_iteration need_iteration:whether to set iteration or not + /// + void SetNeedIteration(bool need_iteration); + + private: + GraphImplPtr impl_{nullptr}; +}; +} // namespace ge + +#endif // INC_EXTERNAL_GRAPH_GRAPH_H_ diff --git a/inc/external/graph/inference_context.h b/inc/external/graph/inference_context.h new file mode 100644 index 000000000..690791422 --- /dev/null +++ b/inc/external/graph/inference_context.h @@ -0,0 +1,76 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_GRAPH_INFERENCE_CONTEXT_H_ +#define INC_EXTERNAL_GRAPH_INFERENCE_CONTEXT_H_ + +#include +#include +#include + +#include "./tensor.h" +#include "./types.h" + +namespace ge { +class InferenceContext; +using InferenceContextPtr = std::shared_ptr; + +class ShapeAndTypeImpl; +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ShapeAndType { + public: + ShapeAndType(); + ~ShapeAndType() = default; + + ShapeAndType(const Shape &shape, DataType dataType); + + void SetShape(const Shape &shape); + + void SetType(DataType dataType); + + Shape GetShape() const; + + DataType GetDataType() const; + + private: + std::shared_ptr shape_and_type_impl_; +}; + +class InferenceContextImpl; +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY InferenceContext { + public: + ~InferenceContext() = default; + InferenceContext(const InferenceContext &context) = delete; + InferenceContext(const InferenceContext &&context) = delete; + InferenceContext &operator=(const InferenceContext &context) = delete; + InferenceContext &operator=(const InferenceContext &&context) = delete; + + void SetInputHandleShapesAndTypes(std::vector> &&shapes_and_types); + const std::vector> &GetInputHandleShapesAndTypes() const; + const std::vector> &GetOutputHandleShapesAndTypes() const; + void SetOutputHandleShapesAndTypes(const std::vector> &shapes_and_types); + void SetOutputHandleShapesAndTypes(std::vector> &&shapes_and_types); + + void SetMarks(const std::vector &marks); + const std::vector &GetMarks() const; + + static std::unique_ptr Create(); + + private: + explicit InferenceContext(std::unique_ptr &impl); + std::shared_ptr inference_context_impl_; +}; +} // namespace ge +#endif // INC_EXTERNAL_GRAPH_INFERENCE_CONTEXT_H_ diff --git a/inc/external/graph/operator.h b/inc/external/graph/operator.h new file mode 100644 index 000000000..b84ae1d48 --- /dev/null +++ b/inc/external/graph/operator.h @@ -0,0 +1,286 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_GRAPH_OPERATOR_H_ +#define INC_EXTERNAL_GRAPH_OPERATOR_H_ + +#include +#include +#include +#include +#include + +#include "./ge_error_codes.h" +#include "./inference_context.h" +#include "./tensor.h" + +#ifndef USER_GE_LOGI +#define USER_GE_LOGI(...) +#endif // USER_GE_LOGI + +#ifndef USER_GE_LOGW +#define USER_GE_LOGW(...) +#endif // USER_GE_LOGW + +#ifndef USER_GE_LOGE +#define USER_GE_LOGE(...) +#endif // USER_GE_LOGE + +#define DYNAMIC_OUTPUT_TD_NUM(name) ("__dynamic_output_" + name + "_cnt") +#define DYNAMIC_INPUT_TD_NUM(name) ("__dynamic_input_" + name + "_cnt") + +namespace ge { +class Operator; +class OperatorImpl; +class NamedAttrs; +class Graph; +class AttrValue; + +using SubgraphBuilder = std::function; +using OperatorImplPtr = std::shared_ptr; +using OperatorPtr = std::shared_ptr; + +class OpIO; +using OutHandler = std::shared_ptr; +using InHandler = std::shared_ptr; + +using std::function; +using std::shared_ptr; +using std::string; + +/*lint -e148*/ +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Operator { + public: + friend class OperatorImpl; + + friend class GraphBuilderImpl; + + using OpInt = int64_t; + using OpFloat = float; + using OpString = string; + using OpBool = bool; + using OpTensor = Tensor; + using OpType = ge::DataType; + using OpNamedAttrs = ge::NamedAttrs; + using OpListInt = std::vector; + using OpListFloat = std::vector; + using OpListString = std::vector; + using OpListBool = std::vector; + using OpListTensor = std::vector; + using OpBytes = std::vector; + using OpListListInt = std::vector>; + using OpListType = std::vector; + using OpListNamedAttrs = std::vector; + + Operator() {} + + explicit Operator(const string &type); + + Operator(const string &name, const string &type); // lint !e148 + + virtual ~Operator() = default; + + bool IsEmpty() const; + + string GetName() const; + + string GetOpType() const; + + // Only has one output index = 0 + Operator &SetInput(const string &dst_name, const Operator &src_oprt); + + Operator &SetInput(const string &dst_name, const Operator &src_oprt, const string &name); // lint !e148 + + Operator &SetInput(const string &dst_name, const Operator &src_oprt, uint32_t index); + + Operator &AddControlInput(const Operator &src_oprt); + + graphStatus GetInputConstData(const string &dst_name, Tensor &data) const; + + TensorDesc GetInputDesc(const string &name) const; + + TensorDesc GetInputDesc(uint32_t index) const; + + int GetDynamicOutputNum(const string &name) const; + + int GetDynamicInputNum(const string &name) const; + + graphStatus TryGetInputDesc(const string &name, TensorDesc &tensor_desc) const; + + graphStatus UpdateInputDesc(const string &name, const TensorDesc &tensor_desc); + + TensorDesc GetOutputDesc(const string &name) const; + + TensorDesc GetOutputDesc(uint32_t index) const; + + graphStatus UpdateOutputDesc(const string &name, const TensorDesc &tensor_desc); // lint !e148 + + TensorDesc GetDynamicInputDesc(const string &name, uint32_t index) const; + + graphStatus UpdateDynamicInputDesc(const string &name, uint32_t index, const TensorDesc &tensor_desc); // lint !e148 + + TensorDesc GetDynamicOutputDesc(const string &name, uint32_t index) const; + + graphStatus UpdateDynamicOutputDesc(const string &name, uint32_t index, const TensorDesc &tensor_desc); // lint !e148 + + graphStatus InferShapeAndType(); // lint !e148 + + void SetInferenceContext(const InferenceContextPtr &inference_context); + InferenceContextPtr GetInferenceContext() const; + + graphStatus VerifyAllAttr(bool disable_common_verifier = false); // lint !e148 + + size_t GetInputsSize() const; + + size_t GetOutputsSize() const; + + const std::map GetAllAttrNamesAndTypes() const; + + Operator &SetAttr(const string &name, int64_t attr_value); + Operator &SetAttr(const string &name, int32_t attr_value); + Operator &SetAttr(const string &name, uint32_t attr_value); + graphStatus GetAttr(const string &name, int64_t &attr_value) const; + graphStatus GetAttr(const string &name, int32_t &attr_value) const; + graphStatus GetAttr(const string &name, uint32_t &attr_value) const; + Operator &SetAttr(const string &name, const std::vector &attr_value); + Operator &SetAttr(const string &name, const std::vector &attr_value); + Operator &SetAttr(const string &name, const std::vector &attr_value); + Operator &SetAttr(const string &name, std::initializer_list &&attr_value); + graphStatus GetAttr(const string &name, std::vector &attr_value) const; + graphStatus GetAttr(const string &name, std::vector &attr_value) const; + graphStatus GetAttr(const string &name, std::vector &attr_value) const; + + Operator &SetAttr(const string &name, float attr_value); + graphStatus GetAttr(const string &name, float &attr_value) const; + Operator &SetAttr(const string &name, const std::vector &attr_value); + graphStatus GetAttr(const string &name, std::vector &attr_value) const; + Operator &SetAttr(const string &name, AttrValue &&attr_value); + graphStatus GetAttr(const string &name, AttrValue &attr_value) const; + + Operator &SetAttr(const string &name, const string &attr_value); + graphStatus GetAttr(const string &name, string &attr_value) const; + Operator &SetAttr(const string &name, const std::vector &attr_value); + graphStatus GetAttr(const string &name, std::vector &attr_value) const; + + Operator &SetAttr(const string &name, bool attr_value); + graphStatus GetAttr(const string &name, bool &attr_value) const; + Operator &SetAttr(const string &name, const std::vector &attr_value); + graphStatus GetAttr(const string &name, std::vector &attr_value) const; + + Operator &SetAttr(const string &name, const Tensor &attr_value); + graphStatus GetAttr(const string &name, Tensor &attr_value) const; + Operator &SetAttr(const string &name, const std::vector &attr_value); + graphStatus GetAttr(const string &name, std::vector &attr_value) const; + + // Bytes type + Operator &SetAttr(const string &name, const OpBytes &attr_value); + // Bytes type + graphStatus GetAttr(const string &name, OpBytes &attr_value) const; + + Operator &SetAttr(const string &name, const std::vector> &attr_value); + graphStatus GetAttr(const string &name, std::vector> &attr_value) const; + + Operator &SetAttr(const string &name, const std::vector &attr_value); + graphStatus GetAttr(const string &name, std::vector &attr_value) const; + + Operator &SetAttr(const string &name, const ge::DataType &attr_value); + graphStatus GetAttr(const string &name, ge::DataType &attr_value) const; + + // func type + Operator &SetAttr(const string &name, const ge::NamedAttrs &attr_value); + graphStatus GetAttr(const string &name, ge::NamedAttrs &attr_value) const; + Operator &SetAttr(const string &name, const std::vector &attr_value); + graphStatus GetAttr(const string &name, std::vector &attr_value) const; + + void BreakConnect() const; + + size_t GetSubgraphNamesCount() const; + std::vector GetSubgraphNames() const; + SubgraphBuilder GetSubgraphBuilder(const string &name) const; + Graph GetSubgraph(const string &name) const; + SubgraphBuilder GetDynamicSubgraphBuilder(const string &name, uint32_t index) const; + Graph GetDynamicSubgraph(const string &name, uint32_t index) const; + + protected: + void AttrRegister(const string &name, float attr_value); + void AttrRegister(const string &name, const std::vector &attr_value); + void AttrRegister(const string &name, int64_t attr_value); + void AttrRegister(const string &name, const std::vector &attr_value); + void AttrRegister(const string &name, const string &attr_value); + void AttrRegister(const string &name, const std::vector &attr_value); + void AttrRegister(const string &name, bool attr_value); + void AttrRegister(const string &name, const std::vector &attr_value); + void AttrRegister(const string &name, const Tensor &attr_value); + void AttrRegister(const string &name, const std::vector &attr_value); + void AttrRegister(const string &name, const OpBytes &attr_value); + void AttrRegister(const string &name, const std::vector> &attr_value); + void AttrRegister(const string &name, const std::vector &attr_value); + void AttrRegister(const string &name, const ge::DataType &attr_value); + void AttrRegister(const string &name, const ge::NamedAttrs &attr_value); + void AttrRegister(const string &name, const std::vector &attr_value); + + explicit Operator(OperatorImplPtr &&op_impl); + + void InputRegister(const string &name); + + void OptionalInputRegister(const string &name); + + void InferFuncRegister(const std::function &func); + + void VerifierFuncRegister(const std::function &func); + + void InferFormatFuncRegister(const std::function &func); + + void OutputRegister(const string &name); + + void DynamicInputRegister(const string &name, const unsigned int num, bool is_push_back = true); + + void DynamicInputRegisterByIndex(const string &name, const unsigned int num, size_t index); + + void DynamicOutputRegister(const string &name, const unsigned int num, bool is_push_back = true); + + void RequiredAttrRegister(const string &name); + + graphStatus VerifyAll(); // lint !e148 + + // Only has one output index = 0 + Operator &SetInput(const string &dst_name, uint32_t dst_index, + const Operator &src_oprt); + + Operator &SetInput(const string &dst_name, uint32_t dst_index, const Operator &src_oprt, + const string &name); // lint !e148 + + void SubgraphRegister(const string &ir_name, bool dynamic); + void SubgraphCountRegister(const string &ir_name, uint32_t count); + void SetSubgraphBuilder(const string &ir_name, uint32_t index, const SubgraphBuilder &builder); + + private: + Operator &SetInput(const string &dst_name, const OutHandler &out_handler); // lint !e148 + + OutHandler GetOutput(const string &name) const; + + OutHandler GetOutput(uint32_t index) const; + + OperatorImplPtr GetOperatorImplPtr() const; + + OperatorImplPtr operator_impl_{nullptr}; + + graphStatus GetInputConstDataOut(const string &dst_name, Tensor &data) const; +}; +/*lint +e148*/ +} // namespace ge + +#endif // INC_EXTERNAL_GRAPH_OPERATOR_H_ diff --git a/inc/external/graph/operator_factory.h b/inc/external/graph/operator_factory.h new file mode 100644 index 000000000..f9ec7669a --- /dev/null +++ b/inc/external/graph/operator_factory.h @@ -0,0 +1,68 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_GRAPH_OPERATOR_FACTORY_H_ +#define INC_EXTERNAL_GRAPH_OPERATOR_FACTORY_H_ + +#include +#include +#include +#include + +#include "./operator.h" +#include "./ge_error_codes.h" + +namespace ge { +using OpCreator = std::function; +using InferShapeFunc = std::function; +using InferFormatFunc = std::function; +using VerifyFunc = std::function; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OperatorFactory { + public: + static Operator CreateOperator(const std::string &operator_name, const std::string &operator_type); + + static graphStatus GetOpsTypeList(std::vector &all_ops); + + static bool IsExistOp(const string &operator_type); +}; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OperatorCreatorRegister { + public: + OperatorCreatorRegister(const string &operator_type, OpCreator const &op_creator); + ~OperatorCreatorRegister() = default; +}; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY InferShapeFuncRegister { + public: + InferShapeFuncRegister(const std::string &operator_type, const InferShapeFunc &infer_shape_func); + ~InferShapeFuncRegister() = default; +}; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY InferFormatFuncRegister { + public: + InferFormatFuncRegister(const std::string &operator_type, const InferFormatFunc &infer_format_func); + ~InferFormatFuncRegister() = default; +}; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY VerifyFuncRegister { + public: + VerifyFuncRegister(const std::string &operator_type, const VerifyFunc &verify_func); + ~VerifyFuncRegister() = default; +}; +} // namespace ge + +#endif // INC_EXTERNAL_GRAPH_OPERATOR_FACTORY_H_ diff --git a/inc/external/graph/operator_reg.h b/inc/external/graph/operator_reg.h new file mode 100644 index 000000000..84210d9f2 --- /dev/null +++ b/inc/external/graph/operator_reg.h @@ -0,0 +1,384 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_GRAPH_OPERATOR_REG_H_ +#define INC_EXTERNAL_GRAPH_OPERATOR_REG_H_ + +#include +#include +#include +#include + +#include "graph/operator.h" +#include "graph/operator_factory.h" +#include "graph/tensor.h" +#include "graph/types.h" +#include "graph/graph.h" + +namespace ge { +using std::function; +using std::string; +using std::vector; + +class OpReg { + public: + OpReg &N() { return *this; } + + OpReg &ATTR() { return *this; } + + OpReg &REQUIRED_ATTR() { return *this; } + + OpReg &INPUT() { return *this; } + + OpReg &OPTIONAL_INPUT() { return *this; } + + OpReg &OUTPUT() { return *this; } + + OpReg &GRAPH() { return *this; } + + OpReg &DYNAMIC_GRAPH() { return *this; } + + OpReg &INFER_SHAPE_AND_TYPE() { return *this; } +}; + +#define REG_OP(x) \ + namespace op { \ + class x : public Operator { \ + typedef x _THIS_TYPE; \ + \ + public: \ + explicit x(const string &name) : Operator(name, #x) { __##x(); } \ + x() : Operator(#x) { __##x(); } \ + \ + private: \ + void __##x() { \ + OpReg() + +#define ATTR(x, Type, ...) \ + N(); \ + __attr_##x(); \ + } \ + \ + public: \ + static const string name_attr_##x() { return #x; } \ + Op##Type get_attr_##x() const { \ + Op##Type ret = __VA_ARGS__; \ + if (Operator::GetAttr(#x, ret) == GRAPH_FAILED) { \ + return ret; \ + } \ + return ret; \ + } \ + _THIS_TYPE &set_attr_##x(const Op##Type &v) { \ + Operator::SetAttr(#x, v); \ + return *this; \ + } \ + _THIS_TYPE &set_attr_##x(const function &v) { return *this; } \ + \ + private: \ + void __attr_##x() { \ + Operator::AttrRegister(#x, Op##Type(__VA_ARGS__)); \ + string attr_name(#x); \ + (void)OpReg() + +#define REQUIRED_ATTR(x, Type) \ + N(); \ + __required_attr_##x(); \ + } \ + \ + public: \ + static const string name_attr_##x() { return #x; } \ + Op##Type get_attr_##x() const { \ + Op##Type ret; \ + if (Operator::GetAttr(#x, ret) == GRAPH_FAILED) { \ + return ret; \ + } \ + return ret; \ + } \ + _THIS_TYPE &set_attr_##x(const Op##Type &v) { \ + Operator::SetAttr(#x, v); \ + return *this; \ + } \ + _THIS_TYPE &set_attr_##x(const function &v) { return *this; } \ + \ + private: \ + void __required_attr_##x() { \ + Operator::RequiredAttrRegister(#x); \ + string attr_name(#x); \ + (void)OpReg() + +#define INPUT(x, t) \ + N(); \ + __input_##x(); \ + } \ + \ + public: \ + static const string name_in_##x() { return #x; } \ + _THIS_TYPE &set_input_##x(Operator &v, const string &srcName) { \ + Operator::SetInput(#x, v, srcName); \ + return *this; \ + } \ + _THIS_TYPE &set_input_##x(Operator &v, uint32_t index) { \ + Operator::SetInput(#x, v, index); \ + return *this; \ + } \ + _THIS_TYPE &set_input_##x(Operator &v) { \ + Operator::SetInput(#x, v); \ + return *this; \ + } \ + TensorDesc get_input_desc_##x() const { return Operator::GetInputDesc(#x); } \ + graphStatus update_input_desc_##x(const TensorDesc &tensorDesc) { \ + return Operator::UpdateInputDesc(#x, tensorDesc); \ + } \ + \ + private: \ + void __input_##x() { \ + Operator::InputRegister(#x); \ + (void)OpReg() + +#define OPTIONAL_INPUT(x, t) \ + N(); \ + __optional_input_##x(); \ + } \ + \ + public: \ + static const string name_in_##x() { return #x; } \ + _THIS_TYPE &set_input_##x(Operator &v) { \ + Operator::SetInput(#x, v); \ + return *this; \ + } \ + _THIS_TYPE &set_input_##x(Operator &v, const string &srcName) { \ + Operator::SetInput(#x, v, srcName); \ + return *this; \ + } \ + _THIS_TYPE &set_input_##x(Operator &v, uint32_t index) { \ + Operator::SetInput(#x, v, index); \ + return *this; \ + } \ + TensorDesc get_input_desc_##x() const { return Operator::GetInputDesc(#x); } \ + graphStatus update_input_desc_##x(const TensorDesc &tensorDesc) { \ + return Operator::UpdateInputDesc(#x, tensorDesc); \ + } \ + \ + private: \ + void __optional_input_##x() { \ + Operator::OptionalInputRegister(#x); \ + (void)OpReg() + +#define OUTPUT(x, t) \ + N(); \ + __out_##x(); \ + } \ + \ + public: \ + static const string name_out_##x() { return #x; } \ + TensorDesc get_output_desc_##x() const { return Operator::GetOutputDesc(#x); } \ + graphStatus update_output_desc_##x(const TensorDesc &tensorDesc) { \ + return Operator::UpdateOutputDesc(#x, tensorDesc); \ + } \ + \ + private: \ + void __out_##x() { \ + Operator::OutputRegister(#x); \ + (void)OpReg() + +#define DYNAMIC_INPUT(x, t) \ + N(); \ + __dy_input_##x(); \ + } \ + \ + public: \ + _THIS_TYPE &create_dynamic_input_##x(uint32_t num, bool isPushBack = true) { \ + Operator::DynamicInputRegister(#x, num, isPushBack); \ + return *this; \ + } \ + _THIS_TYPE &create_dynamic_input_byindex_##x(uint32_t num, size_t index) { \ + Operator::DynamicInputRegisterByIndex(#x, num, index); \ + return *this; \ + } \ + TensorDesc get_dynamic_input_desc_##x(uint32_t index) const { \ + return Operator::GetDynamicInputDesc(#x, index); \ + } \ + graphStatus update_dynamic_input_desc_##x(uint32_t index, const TensorDesc &tensorDesc) { \ + return Operator::UpdateDynamicInputDesc(#x, index, tensorDesc); \ + } \ + _THIS_TYPE &set_dynamic_input_##x(uint32_t dstIndex, Operator &v) { \ + Operator::SetInput(#x, dstIndex, v); \ + return *this; \ + } \ + _THIS_TYPE &set_dynamic_input_##x(uint32_t dstIndex, Operator &v, const string &srcName) { \ + Operator::SetInput(#x, dstIndex, v, srcName); \ + return *this; \ + } \ + \ + private: \ + void __dy_input_##x() { \ + (void)OpReg() + +#define DYNAMIC_OUTPUT(x, t) \ + N(); \ + __dy_output_##x(); \ + } \ + \ + public: \ + _THIS_TYPE &create_dynamic_output_##x(uint32_t num, bool isPushBack = true) { \ + Operator::DynamicOutputRegister(#x, num, isPushBack); \ + return *this; \ + } \ + TensorDesc get_dynamic_output_desc_##x(uint32_t index) const { \ + return Operator::GetDynamicOutputDesc(#x, index); \ + } \ + graphStatus update_dynamic_output_desc_##x(uint32_t index, const TensorDesc &tensorDesc) { \ + return Operator::UpdateDynamicOutputDesc(#x, index, tensorDesc); \ + } \ + \ + private: \ + void __dy_output_##x() { \ + (void)OpReg() + +#define GRAPH(x) \ + N(); \ + __graph_##x(); \ + } \ + \ + public: \ + static const string name_graph_##x() { return #x; } \ + SubgraphBuilder get_subgraph_builder_##x() const { \ + return Operator::GetSubgraphBuilder(#x); \ + } \ + _THIS_TYPE &set_subgraph_builder_##x(const SubgraphBuilder &v) { \ + Operator::SetSubgraphBuilder(#x, 0, v); \ + return *this; \ + } \ + Graph get_subgraph_##x() const { \ + return Operator::GetSubgraph(#x); \ + } \ + \ + private: \ + void __graph_##x() { \ + Operator::SubgraphRegister(#x, false); \ + Operator::SubgraphCountRegister(#x, 1); \ + (void)OpReg() + +#define DYNAMIC_GRAPH(x) \ + N(); \ + __graph_##x(); \ + } \ + \ + public: \ + static const string name_graph_##x() { return #x; } \ + _THIS_TYPE &create_dynamic_subgraph_##x(uint32_t num) { \ + Operator::SubgraphCountRegister(#x, num); \ + return *this; \ + } \ + SubgraphBuilder get_dynamic_subgraph_builder_##x(uint32_t index) const { \ + return Operator::GetDynamicSubgraphBuilder(#x, index); \ + } \ + Graph get_dynamic_subgraph_##x(uint32_t index) const { \ + return Operator::GetDynamicSubgraph(#x, index); \ + } \ + _THIS_TYPE &set_dynamic_subgraph_builder_##x(uint32_t index,const SubgraphBuilder &v) { \ + Operator::SetSubgraphBuilder(#x, index, v); \ + return *this; \ + } \ + \ + private: \ + void __graph_##x() { \ + Operator::SubgraphRegister(#x, true); \ + (void)OpReg() + + +#define PASTE(g_register, y) g_register##y +#define __OP_END_IMPL__(x, y) \ + N(); \ + } \ + static_assert( \ + std::is_same::value, \ + "The class name entered into the OP_END_FACTORY_REG needs to be the same as the operator name you define."); \ + } \ + ; \ + static const OperatorCreatorRegister PASTE(g_register, y)(#x, [](const std::string &name) { return x(name); }); \ + } +#define OP_END_FACTORY_REG(x) __OP_END_IMPL__(x, __COUNTER__) + +// Specialized shape inferencer macro + +#define IMPLEMT_INFERFUNC(op_name, func_name) \ + GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY static graphStatus func_name(op::op_name &op) + +#define IMPLEMT_COMMON_INFERFUNC(func_name) \ + GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY static graphStatus func_name(Operator &op) + +#define IMPLEMT_INFERFORMAT_FUNC(op_name, func_name) \ + GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY static graphStatus func_name(op::op_name &op) + +// Specialized verifier macro + +#define IMPLEMT_VERIFIER(op_name, func_name) \ + GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY static graphStatus func_name(op::op_name op) + +#define INFER_VERIFY_FUNC(op_name, x) [&](Operator &v) { return x((op::op_name &)v); } + +#define COMMON_INFER_VERIFY_FUNC(x) [&](Operator &v) { return x(v); } + +#define INFER_FORMAT_FUNC(op_name, x) [&](Operator &v) { return x((op::op_name &)v); } + +#define __INFER_FUNC_REG_IMPL__(op_name, x, n) static const InferShapeFuncRegister PASTE(if_register, n)(#op_name, x) + +#define __VERIFY_FUNC_REG_IMPL__(op_name, x, n) static const VerifyFuncRegister PASTE(vf_register, n)(#op_name, x) +// Infer format func register +#define __INFER_FORMAT_FUNC_REG_IMPL__(op_name, x, n) \ + static const InferFormatFuncRegister PASTE(ff_register, n)(#op_name, x) + +// Shape inferencer & verifier register macro + +#define INFER_FUNC_REG(op_name, x) __INFER_FUNC_REG_IMPL__(op_name, INFER_VERIFY_FUNC(op_name, x), __COUNTER__) + +#define COMMON_INFER_FUNC_REG(op_name, x) __INFER_FUNC_REG_IMPL__(op_name, COMMON_INFER_VERIFY_FUNC(x), __COUNTER__) + +#define VERIFY_FUNC_REG(op_name, x) __VERIFY_FUNC_REG_IMPL__(op_name, INFER_VERIFY_FUNC(op_name, x), __COUNTER__) + +// Infer format func reg +#define INFER_FORMAT_FUNC_REG(op_name, x) \ + __INFER_FORMAT_FUNC_REG_IMPL__(op_name, INFER_FORMAT_FUNC(op_name, x), __COUNTER__) + +// Common shape inferencer + +#define ELMTWISE_INFER_SHAPEANDTYPE(in_name, out_name) \ + [](Operator op)->graphStatus { \ + auto x_shape = op.GetInputDesc(in_name).GetShape().GetDims(); \ + auto x_type = op.GetInputDesc(in_name).GetDataType(); \ + TensorDesc op_output_desc = op.GetOutputDesc(out_name); \ + op_output_desc.SetShape(ge::Shape(x_shape)); \ + op_output_desc.SetOriginShape(ge::Shape(x_shape)); \ + op_output_desc.SetDataType(x_type); \ + return op.UpdateOutputDesc(out_name, op_output_desc); \ + } + +graphStatus BroadCastInfer(const function()> &get_in1_shape, + const function()> &get_in2_shape, + const function &y_shape)> &set_out_shape); + +#define BROADCAST_INFER(in1_name, in2_name, out_name) \ + [](Operator op) -> graphStatus { \ + return BroadCastInfer([&]() { return op.GetInputDesc(in1_name).GetShape().GetDims(); }, \ + [&]() { return op.GetInputDesc(in2_name).GetShape().GetDims(); }, \ + [&](const vector &y_shape) { \ + TensorDesc op_output_desc = op.GetOutputDesc(out_name); \ + op_output_desc.SetShape(ge::Shape(y_shape)); \ + (void)op.UpdateOutputDesc(out_name, op_output_desc);}); \ + } +} // namespace ge +#endif // INC_EXTERNAL_GRAPH_OPERATOR_REG_H_ diff --git a/inc/external/graph/tensor.h b/inc/external/graph/tensor.h new file mode 100644 index 000000000..38f25f2f0 --- /dev/null +++ b/inc/external/graph/tensor.h @@ -0,0 +1,131 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_GRAPH_TENSOR_H_ +#define INC_EXTERNAL_GRAPH_TENSOR_H_ + +#include +#include +#include +#include +#include + +#include "./ge_error_codes.h" +#include "./types.h" + +namespace ge { +class ShapeImpl; +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Shape { + public: + Shape(); + ~Shape() = default; + explicit Shape(const std::vector &dims); + + size_t GetDimNum() const; + // If the idx is invalid, return 0 + int64_t GetDim(size_t idx) const; + graphStatus SetDim(size_t idx, int64_t value); + std::vector GetDims() const; + int64_t GetShapeSize() const; + + private: + std::shared_ptr impl_; +}; + +class TensorDescImpl; +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY TensorDesc { + public: + TensorDesc(); + ~TensorDesc() = default; + explicit TensorDesc(Shape shape, Format format = FORMAT_ND, DataType dt = DT_FLOAT); + // Copy + TensorDesc(const TensorDesc &desc); + // Move + TensorDesc(TensorDesc &&desc); + // Copy + TensorDesc &operator=(const TensorDesc &desc); + // Move + TensorDesc &operator=(TensorDesc &&desc); + + void Update(const Shape &shape, Format format = FORMAT_ND, DataType dt = DT_FLOAT); + Shape GetShape() const; + void SetShape(const Shape &shape); + // set shape with -2, it stand for unknown shape + graphStatus SetUnknownDimNumShape(); + // for unknown shape + graphStatus SetShapeRange(const std::vector> &range); + graphStatus GetShapeRange(std::vector> &range) const; + + Format GetFormat() const; + void SetFormat(Format format); + + Shape GetOriginShape() const; + void SetOriginShape(const Shape &originShape); + + Format GetOriginFormat() const; + void SetOriginFormat(Format originFormat); + + DataType GetDataType() const; + void SetDataType(DataType dt); + + std::string GetName() const; + void SetName(const std::string &name); + + // Attr acess + void SetSize(int64_t size); + int64_t GetSize() const; + + int64_t GetRealDimCnt() const; + void SetRealDimCnt(const int64_t realDimCnt); + + private: + std::shared_ptr impl; +}; + +class TensorImpl; +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Tensor { + public: + Tensor(); + ~Tensor() = default; + explicit Tensor(const TensorDesc &tensorDesc); + Tensor(const TensorDesc &tensorDesc, const std::vector &data); + Tensor(const TensorDesc &tensorDesc, const uint8_t *data, size_t size); + Tensor(TensorDesc &&tensorDesc, std::vector &&data); + + TensorDesc GetTensorDesc() const; + graphStatus SetTensorDesc(const TensorDesc &tensorDesc); + + const uint8_t *GetData() const; + uint8_t *GetData(); + size_t GetSize() const; + + graphStatus SetData(std::vector &&data); + graphStatus SetData(const std::vector &data); + graphStatus SetData(const uint8_t *data, size_t size); + graphStatus SetData(const std::string &data); + graphStatus SetData(const std::vector &data); + graphStatus IsValid(); + + Tensor Clone() const; + + private: + std::shared_ptr impl; + friend class TensorAdapter; +}; +} // namespace ge +/*lint +e148*/ + +#endif // INC_EXTERNAL_GRAPH_TENSOR_H_ diff --git a/inc/external/graph/types.h b/inc/external/graph/types.h new file mode 100644 index 000000000..cfab3e2aa --- /dev/null +++ b/inc/external/graph/types.h @@ -0,0 +1,240 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_GRAPH_TYPES_H_ +#define INC_EXTERNAL_GRAPH_TYPES_H_ + +#include +#include +#include + +namespace ge { +static const int64_t UNKNOWN_DIM = -1; +static const int64_t UNKNOWN_DIM_NUM = -2; +static const std::vector UNKNOWN_SHAPE = {-1}; +static const std::vector UNKNOWN_RANK = {-2}; + +#ifdef HOST_VISIBILITY +#define GE_FUNC_HOST_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_HOST_VISIBILITY +#endif +#ifdef DEV_VISIBILITY +#define GE_FUNC_DEV_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_DEV_VISIBILITY +#endif + +enum DataType { + DT_FLOAT = 0, // float type + DT_FLOAT16 = 1, // fp16 type + DT_INT8 = 2, // int8 type + DT_INT16 = 6, // int16 type + DT_UINT16 = 7, // uint16 type + DT_UINT8 = 4, // uint8 type + DT_INT32 = 3, // + DT_INT64 = 9, // int64 type + DT_UINT32 = 8, // unsigned int32 + DT_UINT64 = 10, // unsigned int64 + DT_BOOL = 12, // bool type + DT_DOUBLE = 11, // double type + DT_STRING = 13, // string type + DT_DUAL_SUB_INT8 = 14, // dual output int8 type + DT_DUAL_SUB_UINT8 = 15, // dual output uint8 type + DT_COMPLEX64 = 16, // complex64 type + DT_COMPLEX128 = 17, // complex128 type + DT_QINT8 = 18, // qint8 type + DT_QINT16 = 19, // qint16 type + DT_QINT32 = 20, // qint32 type + DT_QUINT8 = 21, // quint8 type + DT_QUINT16 = 22, // quint16 type + DT_RESOURCE = 23, // resource type + DT_STRING_REF = 24, // string ref type + DT_DUAL = 25, // dual output type + DT_UNDEFINED // Used to indicate a DataType field has not been set. +}; + +inline int GetSizeByDataType(DataType data_type) { + static int data_type_size[DT_UNDEFINED] = { + 4, // DT_FLOAT = 0, float type + 2, // DT_FLOAT16 = 1, fp16 type + 1, // DT_INT8 = 2, int8 type + 4, // DT_INT32 = 3, + 1, // DT_UINT8 = 4, uint8 type + -1, + 2, // DT_INT16 = 6, int16 type + 2, // DT_UINT16 = 7, uint16 type + 4, // DT_UINT32 = 8, unsigned int32 + 8, // DT_INT64 = 9, int64 type + 8, // DT_UINT64 = 10, unsigned int64 + 8, // DT_DOUBLE = 11, double type + 1, // DT_BOOL = 12, bool type + -1, // DT_STRING = 13, string type + 1, // DT_DUAL_SUB_INT8 = 14, dual output int8 type + 1, // DT_DUAL_SUB_UINT8 = 15, dual output uint8 type + 8, // DT_COMPLEX64 = 16, complex64 type + 16, // DT_COMPLEX128 = 17, complex128 type + 1, // DT_QINT8 = 18, qint8 type + 2, // DT_QINT16 = 19, qint16 type + 4, // DT_QINT32 = 20, qint32 type + 1, // DT_QUINT8 = 21, quint8 type + 2, // DT_QUINT16 = 22, quint16 type + -1, // DT_RESOURCE = 23, resource type + -1, // DT_STRING_REF = 24, string ref type + 5, // DT_DUAL = 25, dual output type (float + int8) + // DT_UNDEFINED Used to indicate a DataType field has not been set. + }; + if (data_type >= DT_UNDEFINED) { + return -1; + } + return data_type_size[data_type]; +} + +enum Format { + FORMAT_NCHW = 0, // NCHW + FORMAT_NHWC, // NHWC + FORMAT_ND, // Nd Tensor + FORMAT_NC1HWC0, // NC1HWC0 + FORMAT_FRACTAL_Z, // FRACTAL_Z + FORMAT_NC1C0HWPAD, + FORMAT_NHWC1C0, + FORMAT_FSR_NCHW, + FORMAT_FRACTAL_DECONV, + FORMAT_C1HWNC0, + FORMAT_FRACTAL_DECONV_TRANSPOSE, + FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS, + FORMAT_NC1HWC0_C04, // NC1HWC0, C0 =4 + FORMAT_FRACTAL_Z_C04, // FRACZ, C0 =4 + FORMAT_CHWN, + FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS, + FORMAT_HWCN, + FORMAT_NC1KHKWHWC0, // KH,KW kernel h& kernel w maxpooling max output format + FORMAT_BN_WEIGHT, + FORMAT_FILTER_HWCK, // filter input tensor format + FORMAT_HASHTABLE_LOOKUP_LOOKUPS = 20, + FORMAT_HASHTABLE_LOOKUP_KEYS, + FORMAT_HASHTABLE_LOOKUP_VALUE, + FORMAT_HASHTABLE_LOOKUP_OUTPUT, + FORMAT_HASHTABLE_LOOKUP_HITS = 24, + FORMAT_C1HWNCoC0, + FORMAT_MD, + FORMAT_NDHWC, + FORMAT_FRACTAL_ZZ, + FORMAT_FRACTAL_NZ, + FORMAT_NCDHW, + FORMAT_DHWCN, // 3D filter input tensor format + FORMAT_NDC1HWC0, + FORMAT_FRACTAL_Z_3D, + FORMAT_CN, + FORMAT_NC, + FORMAT_DHWNC, + FORMAT_FRACTAL_Z_3D_TRANSPOSE, // 3D filter(transpose) input tensor format + FORMAT_FRACTAL_ZN_LSTM, + FORMAT_FRACTAL_Z_G, + FORMAT_RESERVED, + FORMAT_ALL, + FORMAT_NULL +}; + +// for unknown shape op type +enum UnknowShapeOpType { + DEPEND_IN_SHAPE = 1, // op out shape get by input shape + DEPEND_CONST_VALUE = 2, // op out shape get by const op value + DEPEND_SHAPE_RANGE = 3, // op out shape get by range + DEPEND_COMPUTE = 4 // op out shape get by totally computing +}; + +struct TensorDescInfo { + Format format_ = FORMAT_RESERVED; // tbe op register support format + DataType dataType_ = DT_UNDEFINED; // tbe op register support datatype +}; + +enum DeviceType { + NPU = 0, + CPU = 1, +}; + +class TensorTypeImpl; +struct TensorType { + explicit TensorType(DataType dt); + + TensorType(const std::initializer_list &types); + + static TensorType ALL() { + return TensorType{DT_BOOL, DT_COMPLEX128, DT_COMPLEX64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT16, + DT_INT32, DT_INT64, DT_INT8, DT_QINT16, DT_QINT32, DT_QINT8, DT_QUINT16, + DT_QUINT8, DT_RESOURCE, DT_STRING, DT_UINT16, DT_UINT32, DT_UINT64, DT_UINT8}; + } + + static TensorType QuantifiedType() { return TensorType{DT_QINT16, DT_QINT32, DT_QINT8, DT_QUINT16, DT_QUINT8}; } + + static TensorType OrdinaryType() { + return TensorType{DT_BOOL, DT_COMPLEX128, DT_COMPLEX64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT16, + DT_INT32, DT_INT64, DT_INT8, DT_UINT16, DT_UINT32, DT_UINT64, DT_UINT8}; + } + + static TensorType BasicType() { + return TensorType{DT_COMPLEX128, DT_COMPLEX64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT16, + DT_INT32, DT_INT64, DT_INT8, DT_QINT16, DT_QINT32, DT_QINT8, + DT_QUINT16, DT_QUINT8, DT_UINT16, DT_UINT32, DT_UINT64, DT_UINT8}; + } + + static TensorType NumberType() { + return TensorType{DT_COMPLEX128, DT_COMPLEX64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT16, DT_INT32, DT_INT64, + DT_INT8, DT_QINT32, DT_QINT8, DT_QUINT8, DT_UINT16, DT_UINT32, DT_UINT64, DT_UINT8}; + } + + static TensorType RealNumberType() { + return TensorType{DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT16, DT_INT32, DT_INT64, + DT_INT8, DT_UINT16, DT_UINT32, DT_UINT64, DT_UINT8}; + } + + static TensorType ComplexDataType() { return TensorType{DT_COMPLEX128, DT_COMPLEX64}; } + + static TensorType IntegerDataType() { + return TensorType{DT_INT16, DT_INT32, DT_INT64, DT_INT8, DT_UINT16, DT_UINT32, DT_UINT64, DT_UINT8}; + } + + static TensorType SignedDataType() { return TensorType{DT_INT16, DT_INT32, DT_INT64, DT_INT8}; } + + static TensorType UnsignedDataType() { return TensorType{DT_UINT16, DT_UINT32, DT_UINT64, DT_UINT8}; } + + static TensorType FloatingDataType() { return TensorType{DT_DOUBLE, DT_FLOAT, DT_FLOAT16}; } + + static TensorType IndexNumberType() { return TensorType{DT_INT32, DT_INT64}; } + + static TensorType UnaryDataType() { return TensorType{DT_COMPLEX128, DT_COMPLEX64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16}; } + + static TensorType FLOAT() { return TensorType{DT_FLOAT, DT_FLOAT16}; } + + std::shared_ptr tensor_type_impl_; +}; +} // namespace ge + +namespace domi { +enum class ImplyType : unsigned int { + BUILDIN = 0, // Built in operator, normally executed by OME + TVM, // Compile to TVM bin file for execution + CUSTOM, // User defined calculation logic, executed by CPU + AI_CPU, // AICPU + CCE, // Cce + GELOCAL, // GE local, do node need execute by device + HCCL, // Hccl + INVALID = 0xFFFFFFFF, +}; +} // namespace domi + +#endif // INC_EXTERNAL_GRAPH_TYPES_H_ diff --git a/inc/external/register/register.h b/inc/external/register/register.h new file mode 100644 index 000000000..3bbb03189 --- /dev/null +++ b/inc/external/register/register.h @@ -0,0 +1,139 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_REGISTER_REGISTER_H_ +#define INC_EXTERNAL_REGISTER_REGISTER_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "graph/operator.h" +#include "register/register_error_codes.h" +#include "register/register_fmk_types.h" +#include "register/register_types.h" + +using std::unique_ptr; +using std::map; +using std::make_shared; +using std::to_string; +using std::string; +using std::pair; +using std::vector; + +/*lint -e148*/ +namespace ge { +class Operator; +class TensorDesc; +class Tensor; +class TBEPluginManager; +} + +namespace google { +namespace protobuf { +class Message; +} +} + +namespace domi { +Status AutoMappingFn(const google::protobuf::Message *op_src, ge::Operator &op); +Status AutoMappingFnDynamic(const google::protobuf::Message *op_src, ge::Operator &op, + std::map> dynamic_name_attr_value, + int in_pos = -1, int out_pos = -1); +Status AutoMappingSubgraphIndex(const ge::Graph &graph, + const std::function &input, + const std::function &output); +Status AutoMappingSubgraphIndex(const ge::Graph &graph, + const std::function &input, + const std::function &output); +using google::protobuf::Message; +class OpRegistrationDataImpl; + +using ParseParamFunc = std::function; +using ParseParamByOpFunc = std::function; +using FusionParseParamFunc = std::function, ge::Operator &)>; +using ParseSubgraphFunc = std::function; + +class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistrationData { + public: + OpRegistrationData(const std::string &om_optype); + + ~OpRegistrationData(); + + OpRegistrationData &FrameworkType(const domi::FrameworkType &fmk_type); + + OpRegistrationData &OriginOpType(const std::initializer_list &ori_optype_list); + + OpRegistrationData &OriginOpType(const std::string &ori_optype); + + OpRegistrationData &ParseParamsFn(const ParseParamFunc &parseParamFn); + + OpRegistrationData &ParseParamsByOperatorFn(const ParseParamByOpFunc &parse_param_by_op_fn); + + OpRegistrationData &FusionParseParamsFn(const FusionParseParamFunc &fusionParseParamFn); + + OpRegistrationData &ParseSubgraphPostFn(const ParseSubgraphFunc &subgraph_post_fn); + + OpRegistrationData &ImplyType(const domi::ImplyType &imply_type); + + OpRegistrationData &DelInputWithCond(int inputIdx, const std::string &attrName, bool attrValue); + + OpRegistrationData &DelInputWithOriginalType(int input_idx, const std::string &ori_type); + + OpRegistrationData &InputReorderVector(const vector &input_order); + + domi::ImplyType GetImplyType () const; + std::string GetOmOptype () const; + std::set GetOriginOpTypeSet () const; + domi::FrameworkType GetFrameworkType() const; + ParseParamFunc GetParseParamFn() const; + ParseParamByOpFunc GetParseParamByOperatorFn() const; + FusionParseParamFunc GetFusionParseParamFn() const; + ParseSubgraphFunc GetParseSubgraphPostFn() const; + + private: + std::shared_ptr impl_; + friend class OpRegistry; + friend class OpRegistrationTbe; + friend class ge::TBEPluginManager; +}; + +class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpReceiver { + public: + OpReceiver(OpRegistrationData ®_data); + ~OpReceiver() {} +}; + +#define REGISTER_CUSTOM_OP(name) REGISTER_CUSTOM_OP_UNIQ_HELPER(__COUNTER__, name) +#define REGISTER_CUSTOM_OP_UNIQ_HELPER(ctr, name) REGISTER_CUSTOM_OP_UNIQ(ctr, name) +#define REGISTER_CUSTOM_OP_UNIQ(ctr, name) \ + static OpReceiver register_op##ctr \ + __attribute__((unused)) = \ + OpRegistrationData(name) +} // namespace domi + +namespace ge { +using OpRegistrationData = domi::OpRegistrationData; +using OpReceiver = domi::OpReceiver; +} // namespace ge +/*lint +e148*/ +#endif // INC_EXTERNAL_REGISTER_REGISTER_H_ diff --git a/inc/external/register/register_error_codes.h b/inc/external/register/register_error_codes.h new file mode 100644 index 000000000..a71bb72cd --- /dev/null +++ b/inc/external/register/register_error_codes.h @@ -0,0 +1,39 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_REGISTER_REGISTER_ERROR_CODES_H_ +#define INC_EXTERNAL_REGISTER_REGISTER_ERROR_CODES_H_ + +#define SYSID_FWK 3 // Subsystem ID +#define MODID_COMMON 0 // Common module ID + +#define DECLARE_ERRORNO(sysid, modid, name, value) \ + const domi::Status name = \ + ((0xFF & ((uint8_t)sysid)) << 24) | ((0xFF & ((uint8_t)modid)) << 16) | (0xFFFF & ((uint16_t)value)); + +#define DECLARE_ERRORNO_COMMON(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_COMMON, name, value) + +namespace domi { +using Status = uint32_t; + +// General error code +DECLARE_ERRORNO(0, 0, SUCCESS, 0); +DECLARE_ERRORNO(0xFF, 0xFF, FAILED, 0xFFFFFFFF); +DECLARE_ERRORNO_COMMON(PARAM_INVALID, 1); // 50331649 +DECLARE_ERRORNO(SYSID_FWK, 1, SCOPE_NOT_CHANGED, 201); +} // namespace domi + +#endif // INC_EXTERNAL_REGISTER_REGISTER_ERROR_CODES_H_ diff --git a/inc/external/register/register_fmk_types.h b/inc/external/register/register_fmk_types.h new file mode 100644 index 000000000..976160608 --- /dev/null +++ b/inc/external/register/register_fmk_types.h @@ -0,0 +1,37 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_REGISTER_REGISTER_FMK_TYPES_H_ +#define INC_EXTERNAL_REGISTER_REGISTER_FMK_TYPES_H_ + +#include + +namespace domi { +/// +/// @ingroup domi_omg +/// @brief AI framework types +/// +enum FrameworkType { + CAFFE = 0, + MINDSPORE = 1, + TENSORFLOW = 3, + ANDROID_NN, + ONNX, + FRAMEWORK_RESERVED, +}; +} // namespace domi + +#endif // INC_EXTERNAL_REGISTER_REGISTER_FMK_TYPES_H_ diff --git a/inc/external/register/register_types.h b/inc/external/register/register_types.h new file mode 100644 index 000000000..08d727133 --- /dev/null +++ b/inc/external/register/register_types.h @@ -0,0 +1,59 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_REGISTER_REGISTER_TYPES_H_ +#define INC_EXTERNAL_REGISTER_REGISTER_TYPES_H_ + +namespace domi { +#ifdef HOST_VISIBILITY +#define FMK_FUNC_HOST_VISIBILITY __attribute__((visibility("default"))) +#else +#define FMK_FUNC_HOST_VISIBILITY +#endif +#ifdef DEV_VISIBILITY +#define FMK_FUNC_DEV_VISIBILITY __attribute__((visibility("default"))) +#else +#define FMK_FUNC_DEV_VISIBILITY +#endif + +/// CCE defined constant + +/// +/// @ingroup domi +/// @brief original tensor type +/// +typedef enum tagDomiTensorFormat { + DOMI_TENSOR_NCHW = 0, // < NCHW + DOMI_TENSOR_NHWC, // < NHWC + DOMI_TENSOR_ND, // < Nd Tensor + DOMI_TENSOR_NC1HWC0, // < NC1HWC0 + DOMI_TENSOR_FRACTAL_Z, // < FRACTAL_Z + DOMI_TENSOR_NC1C0HWPAD, + DOMI_TENSOR_NHWC1C0, + DOMI_TENSOR_FSR_NCHW, + DOMI_TENSOR_FRACTAL_DECONV, + DOMI_TENSOR_BN_WEIGHT, + DOMI_TENSOR_CHWN, // Android NN Depth CONV + DOMI_TENSOR_FILTER_HWCK, // filter input tensor format + DOMI_TENSOR_NDHWC, + DOMI_TENSOR_NCDHW, + DOMI_TENSOR_DHWCN, // 3D filter input tensor format + DOMI_TENSOR_DHWNC, + DOMI_TENSOR_RESERVED +} domiTensorFormat_t; +} // namespace domi + +#endif // INC_EXTERNAL_REGISTER_REGISTER_TYPES_H_ diff --git a/inc/external/register/scope/scope_fusion_pass_register.h b/inc/external/register/scope/scope_fusion_pass_register.h new file mode 100644 index 000000000..05eaecbdf --- /dev/null +++ b/inc/external/register/scope/scope_fusion_pass_register.h @@ -0,0 +1,283 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef EXTERNAL_REGISTER_SCOPE_SCOPE_FUSION_PASS_REGISTER_H_ +#define EXTERNAL_REGISTER_SCOPE_SCOPE_FUSION_PASS_REGISTER_H_ + +#include +#include +#include +#include +#include "ge/ge_api_error_codes.h" +#include "register/register_error_codes.h" +#include "register/register_types.h" +#include "graph/operator.h" + +namespace domi { +class TensorFlowModelParser; +} // namespace domi +namespace ge { +const int32_t kFusionDisableIndex = 99999; +class ScopePattern; +using ScopeFusionPatterns = std::vector>; + +class ScopePassManager; + +class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY Scope { + public: + explicit Scope(const std::string &name, const std::string &sub_type = "", Scope *father_scope = nullptr); + ~Scope(); + + std::string Name() const; + std::string SubType() const; + std::map AllNodesMap() const; + Scope *GetSubScope(const std::string &scope_name) const; + std::string LastName() const; + std::vector GetAllSubScopes() const; + const Scope *GetFatherScope() const; + + private: + class ScopeImpl; + std::unique_ptr impl_; + friend class ScopeBasePass; + friend class ScopeTree; + friend class NodeOpTypeFeature; + friend class NodeAttrFeature; + friend class ScopeFeature; +}; + +class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY FusionScopesResult { + public: + FusionScopesResult(); + ~FusionScopesResult(); + void SetName(const std::string &name); + void SetType(const std::string &type); + void SetDescription(const std::string &description); + std::string Name() const; + std::vector Nodes() const; + void InsertInputs(const std::string &inner_op_name, const std::vector &index_map); + void InsertOutputs(const std::string &inner_op_name, const std::vector &index_map); + + private: + class FusionScopesResultImpl; + std::unique_ptr impl_; + friend class ScopeGraph; + friend class ScopeBasePass; + friend class domi::TensorFlowModelParser; +}; + +class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeTree { + public: + ScopeTree(); + Status Init(); + ScopeTree(const ScopeTree &scopetree) = delete; + ScopeTree &operator=(const ScopeTree &scopetree) = delete; + ~ScopeTree(); + + std::vector GetAllScopes() const; + + private: + class ScopeTreeImpl; + std::unique_ptr impl_; + friend class ScopeGraph; + friend class ScopeBasePass; +}; + +class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeGraph { + public: + ScopeGraph(); + Status Init(); + ScopeGraph(const ScopeGraph &scope_graph) = delete; + ScopeGraph &operator=(const ScopeGraph &scope_graph) = delete; + ~ScopeGraph(); + + const ScopeTree *GetScopeTree() const; + std::map GetNodesMap() const; + + private: + class ScopeGraphImpl; + std::unique_ptr impl_; + friend class ScopePassManager; + friend class ScopeBasePass; + friend class domi::TensorFlowModelParser; +}; + +class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeAttrValue { + public: + ScopeAttrValue(); + ScopeAttrValue(ScopeAttrValue const &attr_value); + ScopeAttrValue &operator=(ScopeAttrValue const &attr_value); + ~ScopeAttrValue(); + + void SetIntValue(int64_t value); + void SetFloatValue(float value); + void SetStringValue(std::string value); + void SetBoolValue(bool value); + + private: + class ScopeAttrValueImpl; + std::unique_ptr impl_; + friend class NodeAttrFeature; +}; + +class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeBaseFeature { + public: + virtual bool Match(const Scope *scope) = 0; + virtual ~ScopeBaseFeature(){}; +}; + +class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY NodeOpTypeFeature : ScopeBaseFeature { + public: + NodeOpTypeFeature(std::string nodeType, int num, int step = 0); + NodeOpTypeFeature(NodeOpTypeFeature const &feature); + NodeOpTypeFeature &operator=(NodeOpTypeFeature const &feature); + ~NodeOpTypeFeature(); + bool Match(const Scope *scope) override; + + private: + class NodeOpTypeFeatureImpl; + std::unique_ptr impl_; +}; + +class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY NodeAttrFeature : ScopeBaseFeature { + public: + NodeAttrFeature(std::string nodeType, std::string attr_name, ge::DataType datatype, ScopeAttrValue attr_value); + NodeAttrFeature(NodeAttrFeature const &feature); + NodeAttrFeature &operator=(NodeAttrFeature const &feature); + ~NodeAttrFeature(); + bool Match(const Scope *scope) override; + + private: + class NodeAttrFeatureImpl; + std::unique_ptr impl_; +}; + +class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeFeature : ScopeBaseFeature { + public: + ScopeFeature(std::string sub_type, int32_t num, std::string suffix = "", + std::string sub_scope_mask = "", int step = 0); + ScopeFeature(ScopeFeature const &feature); + ScopeFeature &operator=(ScopeFeature const &feature); + ~ScopeFeature(); + bool Match(const Scope *scope) override; + + private: + class ScopeFeatureImpl; + std::unique_ptr impl_; +}; + +class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopePattern { + public: + ScopePattern(); + ~ScopePattern(); + + ScopePattern &SetSubType(const std::string &sub_type); + ScopePattern &AddNodeOpTypeFeature(NodeOpTypeFeature feature); + ScopePattern &AddNodeAttrFeature(NodeAttrFeature feature); + ScopePattern &AddScopeFeature(ScopeFeature feature); + + private: + class ScopePatternImpl; + std::unique_ptr impl_; + friend class ScopeBasePass; +}; + +class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopesResult { + public: + ScopesResult(); + ScopesResult(ScopesResult const &result); + ScopesResult &operator=(ScopesResult const &result); + ~ScopesResult(); + + void SetScopes(std::vector &scopes); + void SetNodes(std::vector &nodes); + + private: + class ScopesResultImpl; + std::unique_ptr impl_; + friend class ScopeBasePass; +}; + +class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeBasePass { + public: + ScopeBasePass(); + virtual ~ScopeBasePass(); + + protected: + // Subclasses implement respective fusion strategies and build the Patterns + virtual std::vector DefinePatterns() = 0; + // Define the name of the scope pass + virtual std::string PassName() = 0; + // Subclasses implement respective multi-scope or operator fusion methods across scopes + virtual Status LastMatchScopesAndOPs(std::shared_ptr &scope_graph, + std::vector &results) = 0; + // Subclasses implement their own results and set the input and output of the final fusion operator + virtual void GenerateFusionResult(const std::vector &scopes, FusionScopesResult *fusion_rlt) = 0; + + private: + class ScopeBasePassImpl; + std::unique_ptr impl_; + friend class ge::ScopePassManager; + friend class ScopeBasePassImpl; +}; + +class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeFusionPassRegistry { + public: + using CreateFn = ScopeBasePass *(*)(); + ~ScopeFusionPassRegistry(); + + static ScopeFusionPassRegistry &GetInstance() { + static ScopeFusionPassRegistry instance; + return instance; + } + + void RegisterScopeFusionPass(const std::string &pass_name, CreateFn create_fn, bool is_general); + + private: + ScopeFusionPassRegistry(); + class ScopeFusionPassRegistryImpl; + /*lint -e148*/ + std::unique_ptr impl_; + friend class domi::TensorFlowModelParser; +}; + +class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeUtil { + public: + static std::string StringReplaceAll(std::string str, const std::string &old_value, const std::string &new_value); + static void FreeScopePatterns(ScopeFusionPatterns &patterns); + static void FreeOneBatchPattern(std::vector &one_batch_pattern); +}; + +class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeFusionPassRegistrar { + public: + ScopeFusionPassRegistrar(const char *pass_name, ScopeBasePass *(*create_fn)(), bool is_general); + ~ScopeFusionPassRegistrar() {} +}; + +#define REGISTER_SCOPE_FUSION_PASS(pass_name, scope_pass, is_general) \ + REGISTER_SCOPE_FUSION_PASS_UNIQ_HELPER(__COUNTER__, pass_name, scope_pass, is_general) + +#define REGISTER_SCOPE_FUSION_PASS_UNIQ_HELPER(ctr, pass_name, scope_pass, is_general) \ + REGISTER_SCOPE_FUSION_PASS_UNIQ(ctr, pass_name, scope_pass, is_general) + +#define REGISTER_SCOPE_FUSION_PASS_UNIQ(ctr, pass_name, scope_pass, is_general) \ + static ::ge::ScopeFusionPassRegistrar register_scope_fusion_pass##ctr __attribute__((unused)) = \ + ::ge::ScopeFusionPassRegistrar(pass_name, \ + []() -> ::ge::ScopeBasePass * { return new (std::nothrow) scope_pass(); }, \ + is_general) +} // namespace ge + +#endif // EXTERNAL_REGISTER_SCOPE_SCOPE_FUSION_PASS_REGISTER_H_ diff --git a/inc/framework/common/aicpu_op.h b/inc/framework/common/aicpu_op.h new file mode 100644 index 000000000..850ceca34 --- /dev/null +++ b/inc/framework/common/aicpu_op.h @@ -0,0 +1,22 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_AICPU_OP_H_ +#define INC_FRAMEWORK_COMMON_AICPU_OP_H_ + +#include "cce/customize.h" + +#endif // INC_FRAMEWORK_COMMON_AICPU_OP_H_ diff --git a/inc/framework/common/debug/ge_log.h b/inc/framework/common/debug/ge_log.h new file mode 100644 index 000000000..6ac000373 --- /dev/null +++ b/inc/framework/common/debug/ge_log.h @@ -0,0 +1,85 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_DEBUG_GE_LOG_H_ +#define INC_FRAMEWORK_COMMON_DEBUG_GE_LOG_H_ + +#include +#include +#include + +#include "framework/common/ge_inner_error_codes.h" +#include "toolchain/slog.h" + +#define GE_MODULE_NAME static_cast(GE) + +// trace status of log +enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP }; + +#define GELOGE(ERROR_CODE, ...) GE_LOG_ERROR(GE_MODULE_NAME, ERROR_CODE, __VA_ARGS__) +#define GELOGW(...) GE_LOG_WARN(GE_MODULE_NAME, __VA_ARGS__) +#define GELOGI(...) GE_LOG_INFO(GE_MODULE_NAME, __VA_ARGS__) +#define GELOGD(...) GE_LOG_DEBUG(GE_MODULE_NAME, __VA_ARGS__) +#define GEEVENT(...) GE_LOG_EVENT(GE_MODULE_NAME, __VA_ARGS__) +#define GELOGO(...) GE_LOG_OPLOG(GE_MODULE_NAME, __VA_ARGS__) +#define GELOGT(VALUE, ...) GE_LOG_TRACE(GE_MODULE_NAME, VALUE, __VA_ARGS__) + +inline bool IsLogEnable(int module_name, int log_level) { + int32_t enable = CheckLogLevel(module_name, log_level); + // 1:enable, 0:disable + if (enable == 1) { + return true; + } + return false; +} + +inline pid_t GetTid() { + thread_local static pid_t tid = syscall(__NR_gettid); + return tid; +} + +#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ + dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GetTid(), __FUNCTION__, ERROR_CODE, \ + ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ##__VA_ARGS__) +#define GE_LOG_WARN(MOD_NAME, fmt, ...) \ + if (IsLogEnable(MOD_NAME, DLOG_WARN)) dlog_warn(MOD_NAME, "%lu %s:" fmt, GetTid(), __FUNCTION__, ##__VA_ARGS__) +#define GE_LOG_INFO(MOD_NAME, fmt, ...) \ + if (IsLogEnable(MOD_NAME, DLOG_INFO)) dlog_info(MOD_NAME, "%lu %s:" fmt, GetTid(), __FUNCTION__, ##__VA_ARGS__) +#define GE_LOG_DEBUG(MOD_NAME, fmt, ...) \ + if (IsLogEnable(MOD_NAME, DLOG_DEBUG)) dlog_debug(MOD_NAME, "%lu %s:" fmt, GetTid(), __FUNCTION__, ##__VA_ARGS__) +#define GE_LOG_EVENT(MOD_NAME, fmt, ...) dlog_event(MOD_NAME, "%lu %s:" fmt, GetTid(), __FUNCTION__, ##__VA_ARGS__) +#define GE_LOG_OPLOG(MOD_NAME, fmt, ...) \ + Dlog(MOD_NAME, DLOG_OPLOG, "%lu %s:" fmt, GetTid(), __FUNCTION__, ##__VA_ARGS__) + +#define GE_LOG_TRACE(MOD_NAME, value, fmt, ...) \ + do { \ + TraceStatus stat = value; \ + const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ + int idx = static_cast(stat); \ + char *k = const_cast("status"); \ + char *v = const_cast(TraceStatStr[idx]); \ + KeyValue kv = {k, v}; \ + DlogWithKV(static_cast(MOD_NAME), DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GetTid(), __FUNCTION__, ##__VA_ARGS__); \ + } while (0) + +// print memory when it is greater than 1KB. +#define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE) \ + do { \ + if ((SIZE) > 1024) { \ + GELOGI("MallocMemory, func=%s, size=%zu, purpose=%s", (#FUNC), static_cast(SIZE), (PURPOSE)); \ + } \ + } while (0); +#endif // INC_FRAMEWORK_COMMON_DEBUG_GE_LOG_H_ diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h new file mode 100644 index 000000000..6d4499191 --- /dev/null +++ b/inc/framework/common/debug/log.h @@ -0,0 +1,256 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_DEBUG_LOG_H_ +#define INC_FRAMEWORK_COMMON_DEBUG_LOG_H_ + +#include + +#include "runtime/rt.h" +#include "common/string_util.h" +#include "common/util.h" +#include "framework/common/debug/ge_log.h" +#include "ge/ge_api_error_codes.h" + +#if !defined(__ANDROID__) && !defined(ANDROID) +#define DOMI_LOGE(...) GE_LOG_ERROR(GE_MODULE_NAME, ge::FAILED, __VA_ARGS__) +#else +#include +#if defined(BUILD_VERSION_PERF) +#define DOMI_LOGE(fmt, ...) +#else +// The Android system has strict log control. Do not modify the log. +#define DOMI_LOGE(fmt, ...) \ + __android_log_print(ANDROID_LOG_ERROR, "NPU_FMK", "%s %s(%d)::" #fmt, __FILE__, __FUNCTION__, __LINE__, ##__VA_ARGS__) +#endif +#endif + +// ge marco +#define GE_LOGI_IF(condition, ...) \ + if ((condition)) { \ + GELOGI(__VA_ARGS__); \ + } + +#define GE_LOGW_IF(condition, ...) \ + if ((condition)) { \ + GELOGW(__VA_ARGS__); \ + } + +#define GE_LOGE_IF(condition, ...) \ + if ((condition)) { \ + DOMI_LOGE(__VA_ARGS__); \ + } + +// If expr is not SUCCESS, print the log and return the same value +#define GE_CHK_STATUS_RET(expr, ...) \ + do { \ + const ge::Status _status = (expr); \ + if (_status != ge::SUCCESS) { \ + DOMI_LOGE(__VA_ARGS__); \ + return _status; \ + } \ + } while (0); + +// If expr is not SUCCESS, print the log and do not execute return +#define GE_CHK_STATUS(expr, ...) \ + do { \ + const ge::Status _status = (expr); \ + if (_status != ge::SUCCESS) { \ + DOMI_LOGE(__VA_ARGS__); \ + } \ + } while (0); + +// If expr is not SUCCESS, return the same value +#define GE_CHK_STATUS_RET_NOLOG(expr) \ + do { \ + const ge::Status _status = (expr); \ + if (_status != ge::SUCCESS) { \ + return _status; \ + } \ + } while (0); + +// If expr is not GRAPH_SUCCESS, print the log and return FAILED +#define GE_CHK_GRAPH_STATUS_RET(expr, ...) \ + do { \ + if ((expr) != ge::GRAPH_SUCCESS) { \ + DOMI_LOGE(__VA_ARGS__); \ + return FAILED; \ + } \ + } while (0); + +// If expr is not SUCCESS, print the log and execute a custom statement +#define GE_CHK_STATUS_EXEC(expr, exec_expr, ...) \ + do { \ + const ge::Status _status = (expr); \ + GE_CHK_BOOL_EXEC(_status == SUCCESS, exec_expr, __VA_ARGS__); \ + } while (0); + +// If expr is not true, print the log and return the specified status +#define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \ + do { \ + bool b = (expr); \ + if (!b) { \ + GELOGE(_status, __VA_ARGS__); \ + return _status; \ + } \ + } while (0); + +// If expr is not true, print the log and return the specified status +#define GE_CHK_BOOL_RET_STATUS_NOLOG(expr, _status, ...) \ + do { \ + bool b = (expr); \ + if (!b) { \ + return _status; \ + } \ + } while (0); + +// If expr is not true, print the log and execute a custom statement +#define GE_CHK_BOOL_EXEC(expr, exec_expr, ...) \ + { \ + bool b = (expr); \ + if (!b) { \ + DOMI_LOGE(__VA_ARGS__); \ + exec_expr; \ + } \ + } + +// If expr is not true, print the log and execute a custom statement +#define GE_CHK_BOOL_EXEC_WARN(expr, exec_expr, ...) \ + { \ + bool b = (expr); \ + if (!b) { \ + GELOGW(__VA_ARGS__); \ + exec_expr; \ + } \ + } +// If expr is not true, print the log and execute a custom statement +#define GE_CHK_BOOL_EXEC_INFO(expr, exec_expr, ...) \ + { \ + bool b = (expr); \ + if (!b) { \ + GELOGI(__VA_ARGS__); \ + exec_expr; \ + } \ + } + +// If expr is not true, print the log and execute a custom statement +#define GE_CHK_BOOL_TRUE_EXEC_INFO(expr, exec_expr, ...) \ + { \ + bool b = (expr); \ + if (b) { \ + GELOGI(__VA_ARGS__); \ + exec_expr; \ + } \ + } + +// If expr is true, print logs and execute custom statements +#define GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(expr, exec_expr, ...) \ + { \ + bool b = (expr); \ + if (b) { \ + DOMI_LOGE(__VA_ARGS__); \ + exec_expr; \ + } \ + } +// If expr is true, print the Information log and execute a custom statement +#define GE_CHK_TRUE_EXEC_INFO(expr, exec_expr, ...) \ + { \ + bool b = (expr); \ + if (b) { \ + GELOGI(__VA_ARGS__); \ + exec_expr; \ + } \ + } + +// If expr is not SUCCESS, print the log and execute the expression + return +#define GE_CHK_BOOL_TRUE_RET_VOID(expr, exec_expr, ...) \ + { \ + bool b = (expr); \ + if (b) { \ + DOMI_LOGE(__VA_ARGS__); \ + exec_expr; \ + return; \ + } \ + } + +// If expr is not SUCCESS, print the log and execute the expression + return _status +#define GE_CHK_BOOL_TRUE_EXEC_RET_STATUS(expr, _status, exec_expr, ...) \ + { \ + bool b = (expr); \ + if (b) { \ + DOMI_LOGE(__VA_ARGS__); \ + exec_expr; \ + return _status; \ + } \ + } + +// If expr is not true, execute a custom statement +#define GE_CHK_BOOL_EXEC_NOLOG(expr, exec_expr) \ + { \ + bool b = (expr); \ + if (!b) { \ + exec_expr; \ + } \ + } + +// -----------------runtime related macro definitions------------------------------- +// If expr is not RT_ERROR_NONE, print the log +#define GE_CHK_RT(expr) \ + do { \ + rtError_t _rt_ret = (expr); \ + if (_rt_ret != RT_ERROR_NONE) { \ + DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ + } \ + } while (0); + +// If expr is not RT_ERROR_NONE, print the log and execute the exec_expr expression +#define GE_CHK_RT_EXEC(expr, exec_expr) \ + { \ + rtError_t _rt_ret = (expr); \ + if (_rt_ret != RT_ERROR_NONE) { \ + DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ + exec_expr; \ + } \ + } + +// If expr is not RT_ERROR_NONE, print the log and return +#define GE_CHK_RT_RET(expr) \ + do { \ + rtError_t _rt_ret = (expr); \ + if (_rt_ret != RT_ERROR_NONE) { \ + DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ + return RT_ERROR_TO_GE_STATUS(_rt_ret); \ + } \ + } while (0); + +// If expr is true, execute exec_expr without printing logs +#define GE_IF_BOOL_EXEC(expr, exec_expr) \ + { \ + if (expr) { \ + exec_expr; \ + } \ + } + +// If make_shared is abnormal, print the log and execute the statement +#define GE_MAKE_SHARED(exec_expr0, exec_expr1) \ + try { \ + exec_expr0; \ + } catch (const std::bad_alloc &) { \ + DOMI_LOGE("Make shared failed"); \ + exec_expr1; \ + } + +#endif // INC_FRAMEWORK_COMMON_DEBUG_LOG_H_ diff --git a/inc/framework/common/fmk_error_codes.h b/inc/framework/common/fmk_error_codes.h new file mode 100644 index 000000000..ec1f26d05 --- /dev/null +++ b/inc/framework/common/fmk_error_codes.h @@ -0,0 +1,85 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_ +#define INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_ + +#include +#include + +#include "framework/common/fmk_types.h" +#include "register/register_error_codes.h" + +#define MODID_OMG 1 // OMG module ID +#define MODID_OME 2 // OME module ID +#define MODID_CALIBRATION 3 // Calibration module ID + +// Each module uses the following four macros to define error codes: +#define DECLARE_ERRORNO_OMG(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OMG, name, value) +#define DECLARE_ERRORNO_OME(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OME, name, value) +#define DECLARE_ERRORNO_CALIBRATION(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_CALIBRATION, name, value) + +#define DEF_ERRORNO(name, desc) const ErrorNoRegisterar g_##name##_errorno(name, desc); + +// Interface for Obtaining Error Code Description +#define GET_ERRORNO_STR(value) domi::StatusFactory::Instance()->GetErrDesc(value) + +namespace domi { +class StatusFactory { + public: + static StatusFactory *Instance(); + + void RegisterErrorNo(uint32_t err, const std::string &desc); + + std::string GetErrDesc(uint32_t err); + + protected: + StatusFactory() {} + ~StatusFactory() {} + + private: + std::map err_desc_; +}; + +class ErrorNoRegisterar { + public: + ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } + ~ErrorNoRegisterar() {} +}; + +// Common errocode +DECLARE_ERRORNO_COMMON(MEMALLOC_FAILED, 0); // 50331648 +DECLARE_ERRORNO_COMMON(CCE_FAILED, 2); // 50331650 +DECLARE_ERRORNO_COMMON(RT_FAILED, 3); // 50331651 +DECLARE_ERRORNO_COMMON(INTERNAL_ERROR, 4); // 50331652 +DECLARE_ERRORNO_COMMON(CSEC_ERROR, 5); // 50331653 +DECLARE_ERRORNO_COMMON(TEE_ERROR, 6); // 50331653 +DECLARE_ERRORNO_COMMON(UNSUPPORTED, 100); +DECLARE_ERRORNO_COMMON(OUT_OF_MEMORY, 101); + +// Omg errorcode +DECLARE_ERRORNO_OMG(PARSE_MODEL_FAILED, 0); +DECLARE_ERRORNO_OMG(PARSE_WEIGHTS_FAILED, 1); +DECLARE_ERRORNO_OMG(NOT_INITIALIZED, 2); +DECLARE_ERRORNO_OMG(TIMEOUT, 3); + +// Ome errorcode +DECLARE_ERRORNO_OME(MODEL_NOT_READY, 0); +DECLARE_ERRORNO_OME(PUSH_DATA_FAILED, 1); +DECLARE_ERRORNO_OME(DATA_QUEUE_ISFULL, 2); +} // namespace domi + +#endif // INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_ diff --git a/inc/framework/common/fmk_types.h b/inc/framework/common/fmk_types.h new file mode 100644 index 000000000..f84390dac --- /dev/null +++ b/inc/framework/common/fmk_types.h @@ -0,0 +1,23 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_FMK_TYPES_H_ +#define INC_FRAMEWORK_COMMON_FMK_TYPES_H_ + +#include "graph/types.h" +#include "register/register_types.h" + +#endif // INC_FRAMEWORK_COMMON_FMK_TYPES_H_ diff --git a/inc/framework/common/ge_format_util.h b/inc/framework/common/ge_format_util.h new file mode 100644 index 000000000..9b1d77869 --- /dev/null +++ b/inc/framework/common/ge_format_util.h @@ -0,0 +1,40 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_GE_FORMAT_UTIL_H_ +#define INC_FRAMEWORK_COMMON_GE_FORMAT_UTIL_H_ + +#include + +#include "common/ge_inner_error_codes.h" +#include "graph/tensor.h" + +namespace ge { +class GeFormatUtil { + public: + /// + /// @name TransShape + /// @brief transform the shape of tensor according to destination format + /// @param [in] src_desc source tensor desc + /// @param [in] dst_format destination format + /// @param [out] dst_shape destination shape + /// @return Status + /// + static Status TransShape(const TensorDesc &src_desc, Format dst_format, std::vector &dst_shape); +}; +} // namespace ge + +#endif // INC_FRAMEWORK_COMMON_GE_FORMAT_UTIL_H_ diff --git a/inc/framework/common/ge_inner_error_codes.h b/inc/framework/common/ge_inner_error_codes.h new file mode 100644 index 000000000..cc044cb1c --- /dev/null +++ b/inc/framework/common/ge_inner_error_codes.h @@ -0,0 +1,313 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*lint -e* */ +#ifndef INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_ +#define INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_ + +#include +#include +#include "ge/ge_api_error_codes.h" + +namespace ge { +// System ID +enum SystemIdType { SYSID_GE = 8 }; +// Runtime location +enum LogRuntime { + RT_HOST = 0b01, + RT_DEVICE = 0b10, +}; + +// Sub model +enum SubModuleId { + COMMON_MODULE = 0, + CLIENT_MODULE = 1, + INIT_MODULE = 2, + SESSION_MODULE = 3, + GRAPH_MODULE = 4, + ENGINE_MODULE = 5, + OPS_MODULE = 6, + PLUGIN_MODULE = 7, + RUNTIME_MODULE = 8, + EXECUTOR_MODULE = 9, + GENERATOR_MODULE = 10, +}; + +// Error code type +enum ErrorCodeType { + ERROR_CODE = 0b01, + EXCEPTION_CODE = 0b10, +}; + +// Error level +enum ErrorLevel { + COMMON_LEVEL = 0b000, + SUGGESTION_LEVEL = 0b001, + MINOR_LEVEL = 0b010, + MAJOR_LEVEL = 0b011, + CRITICAL_LEVEL = 0b100, +}; + +// Each module defines error codes using the following macros +#define GE_ERRORNO_COMMON(name, value, desc) \ + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, COMMON_MODULE, name, value, desc) +#define GE_ERRORNO_CLIENT(name, value, desc) \ + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, CLIENT_MODULE, name, value, desc) +#define GE_ERRORNO_INIT(name, value, desc) \ + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, INIT_MODULE, name, value, desc) +#define GE_ERRORNO_SESSION(name, value, desc) \ + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, SESSION_MODULE, name, value, desc) +#define GE_ERRORNO_GRAPH(name, value, desc) \ + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GRAPH_MODULE, name, value, desc) +#define GE_ERRORNO_ENGINE(name, value, desc) \ + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, ENGINE_MODULE, name, value, desc) +#define GE_ERRORNO_OPS(name, value, desc) \ + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, OPS_MODULE, name, value, desc) +#define GE_ERRORNO_PLUGIN(name, value, desc) \ + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, PLUGIN_MODULE, name, value, desc) +#define GE_ERRORNO_RUNTIME(name, value, desc) \ + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, RUNTIME_MODULE, name, value, desc) +#define GE_ERRORNO_EXECUTOR(name, value, desc) \ + GE_ERRORNO(RT_DEVICE, ERROR_CODE, COMMON_LEVEL, SYSID_GE, EXECUTOR_MODULE, name, value, desc) +#define GE_ERRORNO_GENERATOR(name, value, desc) \ + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GENERATOR_MODULE, name, value, desc) + +// Get error code description +#define GE_GET_ERRORNO_STR(value) ge::StatusFactory::Instance()->GetErrDesc(value) + +// Common module error code definition +GE_ERRORNO_COMMON(MEMALLOC_FAILED, 0, "Failed to allocate memory!"); // 1343225856 +GE_ERRORNO_COMMON(PARAM_INVALID, 1, "Parameter's invalid!"); // 1343225857 +GE_ERRORNO_COMMON(CCE_FAILED, 2, "Failed to call CCE API!"); // 1343225858 +GE_ERRORNO_COMMON(RT_FAILED, 3, "Failed to call runtime API!"); // 1343225859 +GE_ERRORNO_COMMON(INTERNAL_ERROR, 4, "Internal errors"); // 1343225860 +GE_ERRORNO_COMMON(CSEC_ERROR, 5, "Failed to call libc_sec API!"); // 1343225861 +GE_ERRORNO_COMMON(TEE_ERROR, 6, "Failed to call tee API!"); // 1343225862 +GE_ERRORNO_COMMON(END_OF_SEQUENCE, 7, "End of sequence!"); // 1343225863 + +// Error code for plugin manager +GE_ERRORNO_COMMON(GE_PLGMGR_PATH_INVALID, 30, "Path is invalid!"); // 1343225886 +GE_ERRORNO_COMMON(GE_PLGMGR_SO_NOT_EXIST, 31, "Failed to find any valid so file!"); // 1343225887 +GE_ERRORNO_COMMON(GE_PLGMGR_FUNC_NOT_EXIST, 32, "Failed to find any function!"); // 1343225888 +GE_ERRORNO_COMMON(GE_PLGMGR_INVOKE_FAILED, 33, "Failed to invoke any function!"); // 1343225889 + +GE_ERRORNO_COMMON(UNSUPPORTED, 100, "Parameter's unsupported!"); + +GE_ERRORNO_COMMON(OUT_OF_MEMORY, 101, "Out of memory!"); + +// Client module error code definition +GE_ERRORNO_CLIENT(GE_CLI_INIT_FAILED, 1, "GEInitialize Failed."); // 1343229953 +GE_ERRORNO_CLIENT(GE_CLI_FINAL_FAILED, 2, "GEFinalize Failed."); // 1343229954 +GE_ERRORNO_CLIENT(GE_CLI_SESS_CONSTRUCT_FAILED, 3, "Session constructor Failed."); // 1343229955 +GE_ERRORNO_CLIENT(GE_CLI_SESS_DESTROY_FAILED, 4, "Session destructor Failed."); // 1343229956 +GE_ERRORNO_CLIENT(GE_CLI_SESS_ADD_FAILED, 5, "Session AddGraph Failed."); // 1343229957 +GE_ERRORNO_CLIENT(GE_CLI_SESS_ADD_GRAPH_FAILED, 6, + "Session AddGraph Failed converting protobuf GraphProto."); // 1343229958 +GE_ERRORNO_CLIENT(GE_CLI_SESS_REMOVE_FAILED, 7, "Session RemoveGraph Failed."); // 1343229959 +GE_ERRORNO_CLIENT(GE_CLI_SESS_RUN_FAILED, 8, "Session RunGraph Failed."); // 1343229960 +GE_ERRORNO_CLIENT(GE_CLI_SESS_RUN_TENSOR_FAILED, 9, + "Session RunGraph Failed converting protobuf TensorProto."); // 1343229961 +GE_ERRORNO_CLIENT(GE_CLI_GE_ALREADY_INITIALIZED, 10, "GE is already initialized."); // 1343229962 +GE_ERRORNO_CLIENT(GE_CLI_GE_NOT_INITIALIZED, 11, "GE is not yet initialized or is finalized."); // 1343229963 + +// Init module error code definition +GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported."); // 1343234048 +GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization."); // 1343234049 +GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported."); // 1343234050 + +// Session module error code definition +GE_ERRORNO_SESSION(GE_SESS_INIT_FAILED, 0, "Failed to initialize session."); // 1343238144 +GE_ERRORNO_SESSION(GE_SESS_ALREADY_RUNNING, 1, "Session already running,not support parallel run."); // 1343238145 +GE_ERRORNO_SESSION(GE_SESS_GRAPH_NOT_EXIST, 2, "Graph ID not exist."); // 1343238146 +GE_ERRORNO_SESSION(GE_SESS_GRAPH_ALREADY_EXIST, 3, "Graph ID already exist."); // 1343238147 +GE_ERRORNO_SESSION(GE_SESS_GRAPH_IS_RUNNING, 4, "Graph is running."); // 1343238148 +GE_ERRORNO_SESSION(GE_SESSION_NOT_EXIST, 5, "Can not find session with specific session id."); // 1343238149 +GE_ERRORNO_SESSION(GE_SESSION_MANAGER_NOT_INIT, 6, "Session manager has not been initialized."); // 1343238150 + +// Graph module error code definition +GE_ERRORNO_GRAPH(GE_GRAPH_INIT_FAILED, 0, "Failed to initialize graph."); // 1343242240 +GE_ERRORNO_GRAPH(GE_GRAPH_ALREADY_RUNNING, 1, "graph already running,not support parallel run."); // 1343242241 +GE_ERRORNO_GRAPH(GE_GRAPH_GRAPH_NOT_EXIST, 2, "graph ID not exist."); // 1343242242 +GE_ERRORNO_GRAPH(GE_GRAPH_GRAPH_ALREADY_EXIST, 3, "Graph ID already exist."); // 1343242243 +GE_ERRORNO_GRAPH(GE_GRAPH_GRAPH_IS_RUNNING, 4, "Graph is running."); // 1343242244 +GE_ERRORNO_GRAPH(GE_GRAPH_MALLOC_FAILED, 5, "Graph malloc failed."); // 1343242245 +GE_ERRORNO_GRAPH(GE_GRAPH_FREE_FAILED, 6, "Graph FREE failed."); // 1343242246 +GE_ERRORNO_GRAPH(GE_GRAPH_NOT_MALLOC_BUFFER, 7, "Graph FREE failed, not malloc buffer."); // 1343242247 +GE_ERRORNO_GRAPH(GE_GRAPH_PARAM_NULLPTR, 8, "Graph param is NULL."); // 1343242248 +GE_ERRORNO_GRAPH(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, 9, "Get computeGraph by graphNode failed."); // 1343242249 +GE_ERRORNO_GRAPH(GE_GRAPH_OPTIMIZE_RUN_GRAPH_NODE_NULL, 10, "Run graph node is null."); // 1343242250 +GE_ERRORNO_GRAPH(GE_GRAPH_OPTIMIZE_RUN_GRAPH_INVALID, 11, "Get computeGraph by graphNode failed."); // 1343242251 +GE_ERRORNO_GRAPH(GE_GRAPH_OPTIMIZE_INSERT_DYN_OP_FAILED, 12, "Graph which insert dynamic op failed."); // 1343242252 +GE_ERRORNO_GRAPH(GE_GRAPH_OPTIMIZE_PREPROCESS_FAILED, 13, "Graph preprocess failed."); // 1343242253 +GE_ERRORNO_GRAPH(GE_GRAPH_OPTIMIZE_GRAPH_FUSION_FAILED, 14, "Graph fusion failed."); // 1343242254 +GE_ERRORNO_GRAPH(GE_GRAPH_OPTIMIZE_CALIBRATION_FAILED, 16, "Calibration failed."); // 1343242256 +GE_ERRORNO_GRAPH(GE_GRAPH_SUBGRAPH_NUM_ZERO, 17, "Graph partition success, but subGraph num is 0."); // 1343242257 +GE_ERRORNO_GRAPH(GE_GRAPH_SUBGRAPH_ENGINENAME_REPEATED, 18, "Graph subGraph engine name is repeated."); // 1343242258 +GE_ERRORNO_GRAPH(GE_GRAPH_GET_IN_OUT_FAILED, 19, "OME GetInputOutputDescInfo failed."); // 1343242259 +GE_ERRORNO_GRAPH(GE_GRAPH_DATA_INPUT_FAILED, 20, "OME DataInput failed."); // 1343242260 +GE_ERRORNO_GRAPH(GE_GRAPH_EXECUTE_FAILED, 21, "Execute graph failed."); // 1343242261 +GE_ERRORNO_GRAPH(GE_GRAPH_DUPLICATE_ENGINE, 22, "Duplicate engine."); // 1343242262 +GE_ERRORNO_GRAPH(GE_GRAPH_EMPTY_SUBGRAPH, 23, "Empty sub graph info."); // 1343242263 +GE_ERRORNO_GRAPH(GE_GRAPH_EXECUTE_NOT_INIT, 24, "Call SetCondition first."); // 1343242264 +GE_ERRORNO_GRAPH(GE_GRAPH_PREPARE_FAILED, 25, "Prepare failed."); // 1343242265 +GE_ERRORNO_GRAPH(GE_GRAPH_SERIALIZE_FAILED, 26, "OMG SerializeModelDef failed."); // 1343242266 +GE_ERRORNO_GRAPH(GE_GRAPH_SAVE_FAILED, 27, "OMG SaveModel failed."); // 1343242267 +GE_ERRORNO_GRAPH(GE_GRAPH_PRERUN_FAILED, 28, "PreRun failed."); // 1343242268 +GE_ERRORNO_GRAPH(GE_GRAPH_SUBGRAPH_ID_INVALID, 29, "Graph subGraph id is invalid."); // 1343242269 +GE_ERRORNO_GRAPH(GE_GRAPH_INFERSHAPE_FAILED, 30, "Prepare Graph infershape failed"); // 1343242270 +GE_ERRORNO_GRAPH(GE_GRAPH_ISNULL, 31, "RunGraph input compute graph is NULL."); // 1343242271 +GE_ERRORNO_GRAPH(GE_GRAPH_SYNC_MODEL_FAILED, 32, "Graph SyncExecuteModel failed."); // 1343242272 +GE_ERRORNO_GRAPH(GE_GRAPH_RUNGRAPH_FAILED, 33, "Graph RunGraph failed."); // 1343242273 +GE_ERRORNO_GRAPH(GE_GRAPH_OPTIMIZE_PARSE_DYN_OP_FAILED, 34, "Parse dynamic node config file failed"); // 1343242274 +GE_ERRORNO_GRAPH(GE_GRAPH_MULTI_SUBGRAPH_BUILD, 35, "Save model with multiple sub graph"); // 1343242275 +GE_ERRORNO_GRAPH(GE_GRAPH_GRAPH_NODE_NULL, 36, "Graph get graph node failed."); // 1343242276 +GE_ERRORNO_GRAPH(GE_GRAPH_NOT_INIT, 37, "Graph do not init."); // 1343242277 +GE_ERRORNO_GRAPH(GE_GRAPH_NULL_INPUT, 38, "input graph is null"); // 1343242278 +GE_ERRORNO_GRAPH(GE_GRAPH_TOPO_SORT_FAILED, 39, "topological sorting an partition failed"); // 1343242279 +GE_ERRORNO_GRAPH(GE_GRAPH_EMPTY_PARTITION, 40, "accessing an empty partition"); // 1343242280 +GE_ERRORNO_GRAPH(GE_GRAPH_UNSUPPORTED, 41, "unsupported feature in partition"); // 1343242281 +GE_ERRORNO_GRAPH(GE_GRAPH_ASSIGN_ENGINE_FAILED, 42, "assign engine failed"); // 1343242282 +GE_ERRORNO_GRAPH(GE_GRAPH_ADD_PLC_END_FAILED, 43, "add placeholder end node failed"); // 1343242283 +GE_ERRORNO_GRAPH(GE_GRAPH_OPTIMIZE_PARSE_OUT_NODE_FAILED, 44, "Parse out node failed."); // 1343242284 +GE_ERRORNO_GRAPH(GE_GRAPH_OPTIMIZE_INSERT_OP_PARSE_FAILED, 45, + "OMG parse dynamic node config file failed."); // 1343242285 +GE_ERRORNO_GRAPH(GE_GRAPH_SAVE_WEIGHTS_FAILED, 46, "OMG Save Weights to Model failed."); // 1343242286 +GE_ERRORNO_GRAPH(GE_GRAPH_EMPTY_STRING_NAME, 47, "Empty string name."); // 1343242287 +GE_ERRORNO_GRAPH(GE_GRAPH_EMPTY_VARIABLE_TENSOR_TABLE, 48, "Empty variable-tensor table."); // 1343242288 +GE_ERRORNO_GRAPH(GE_GRAPH_VARIABLE_ALREADY_EXIST, 49, "Variable already exist."); // 1343242289 +GE_ERRORNO_GRAPH(GE_GRAPH_VARIABLE_DOES_NOT_EXIST, 50, "Variable does not exist."); // 1343242290 +GE_ERRORNO_GRAPH(GE_GRAPH_OPTIONS_INVALID, 51, "Client session options is invalid."); // 1343242291 +GE_ERRORNO_GRAPH(GE_GRAPH_NO_OUTPUT_DESC_INFO, 52, "No output desc info."); // 1343242292 +GE_ERRORNO_GRAPH(GE_GRAPH_OUTPUT_DESCINFO_TENSOR_NUM_MISMATCH, 53, + "Number of output descinfo and tensor mismatch."); // 1343242293 +GE_ERRORNO_GRAPH(GE_GRAPH_FILENAMEPREFIX_INVALID, 54, "Graph Save Model fileNamePrefix is invalid."); // 1343242294 +GE_ERRORNO_GRAPH(GE_GRAPH_NOT_BUILT, 55, "Graph is not built before SaveModel."); // 1343242295 +GE_ERRORNO_GRAPH(GE_GRAPH_SAVEMODEL_FAILED, 56, "Graph SaveModel failed."); // 1343242296 +GE_ERRORNO_GRAPH(GE_GRAPH_MEMORY_ALLOC_FAILED, 57, "Failed allocating memory for model file header."); // 1343242297 +GE_ERRORNO_GRAPH(GE_GRAPH_NODE_SEARCHER_REMOVE_GRAPH_FAILED, 58, "Failed remove graph in node seacher."); // 1343242298 +GE_ERRORNO_GRAPH(GE_GRAPH_NODE_SEARCHER_ADD_GRAPH_FAILED, 59, "Failed add graph in node seacher."); // 1343242299 +GE_ERRORNO_GRAPH(GE_GRAPH_NODE_SEARCHER_GET_GRAPH_REBUILD_FAILED, 60, + "Failed add graph in node seacher."); // 1343242300 +GE_ERRORNO_GRAPH(GE_GRAPH_NODE_SEARCHER_SET_GRAPH_FINISH_REBUILD_GRAPH_FAILED, 61, + "Failed set graph finish rebuild in node searcher."); // 1343242301 +GE_ERRORNO_GRAPH(GE_GRAPH_VARIABLE_OP_PASS_FAILED, 62, "Failed to run variable pass."); // 1343242302 + +// Engine_manager module error code definition +GE_ERRORNO_ENGINE(GE_ENG_INIT_FAILED, 0, "Failed to initialize engine."); // 1343246336 +GE_ERRORNO_ENGINE(GE_ENG_FINALIZE_FAILED, 1, "Engine finalize failed."); // 1343246337 +GE_ERRORNO_ENGINE(GE_ENG_MEMTYPE_ERROR, 2, "Memory type HBM is necessary when engine is in device"); // 1343246338 + +// Optimize errocode +GE_ERRORNO_GRAPH(TO_BE_DELETED, 63, "The node of the graph to be deleted."); // 1343242303 +GE_ERRORNO_GRAPH(NOT_CHANGED, 64, "The node of the graph no changed."); // 1343242304 + +// Ops module error code definition +GE_ERRORNO_OPS(GE_OPS_KERNEL_STORE_INIT_FAILED, 0, "Failed to initialize OpsKernelInfoStore."); // 1343250432 +GE_ERRORNO_OPS(GE_OPS_GRAPH_OPTIMIZER_INIT_FAILED, 1, "Failed to initialize GraphOptimizer."); // 1343250433 +GE_ERRORNO_OPS(GE_OPS_KERNEL_INFO_NOT_EXIST, 2, "OpsKernelInfo not exist."); // 1343250434 +GE_ERRORNO_OPS(GE_OPS_KERNEL_STORE_NOT_EXIST, 3, "OpsKernelInfoStore not exist."); // 1343250435 +GE_ERRORNO_OPS(GE_OPS_CALC_RUNNING_PARAM_FAILED, 4, "Failed to CalcOpRunningParam."); // 1343250436 +GE_ERRORNO_OPS(GE_OPS_GENERATE_TASK_FAILED, 5, "Failed to GenerateTask."); // 1343250437 +GE_ERRORNO_OPS(GE_OPS_OPTIMIZE_ORIGINAL_GRAPH_FAILED, 6, "Failed to OptimizeOriginalGraph."); // 1343250438 +GE_ERRORNO_OPS(GE_OPS_OPTIMIZE_FUSED_GRAPH_FAILED, 7, "Failed to OptimizeFusedGraph."); // 1343250439 +GE_ERRORNO_OPS(GE_OPS_ENGINE_IS_NOT_REGISTERED, 8, "Engine is not registered."); // 1343250440 +GE_ERRORNO_OPS(GE_OPS_GET_NO_VALID_SO, 9, + "There is no valid so about OpsKernelInfoStore or GraphOptimizer."); // 1343250441 +GE_ERRORNO_OPS(GE_OPS_GET_OPTIMIZE_BY_ENGINE_FAILED, 10, "Failed to get graphOptimizer by name."); // 1343250442 +GE_ERRORNO_OPS(GE_OPS_GET_OPTIMIZE_BY_PRIORITY_FAILED, 11, "Failed to get graphOptimizer by priority."); // 1343250443 +GE_ERRORNO_OPS(GE_OPS_LOAD_GE_OPTIMIZER_FAILED, 12, "Failed to load ge graphOptimizer."); // 1343250444 + +// Runtime module error code definition +GE_ERRORNO_RUNTIME(GE_RTI_DEVICE_ID_INVALID, 1, "device id is invalid"); +GE_ERRORNO_RUNTIME(GE_RTI_DEVICE_NOT_READY, 2, "set device failed, device not ready"); +GE_ERRORNO_RUNTIME(GE_RTI_MEMALLOC_FAILED, 3, "malloc memory failed"); +GE_ERRORNO_RUNTIME(GE_RTI_MODEL_NOT_LOADED, 4, "model has not been loaded"); +GE_ERRORNO_RUNTIME(GE_RTI_THREAD_POOL_IS_NULL, 5, "model excute failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_CCE_CREATE_HANDLE_FAILED, 6, "cce create handle failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_CCE_SET_STREAM_FAILED, 7, "cce set stream failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_CREATE_RTMODEL_FAILED, 8, "call runtime create rtModel failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_CREATE_STREAM_FAILED, 9, "call runtime create stream failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_BIND_STREAM_FAILED, 10, "call runtime bind stream to model failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_CREATE_LABLE_FAILED, 11, "call runtime create lable failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_MODEL_LOAD_COMPLETE_FAILED, 12, "call runtime model load complete failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_MODEL_GET_TASK_ID_FAILED, 14, "call runtime get task id failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_KERNEL_LAUNCH_FAILED, 13, "call runtime kernel launch failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_KERNEL_LAUNCHEX_FAILED, 15, "call runtime kernel launchex failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_KERNEL_FUSION_START_FAILED, 16, "call runtime kernel fusion start failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_KERNEL_FUSION_END_FAILED, 17, "call runtime kernel fusion end failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_LABEL_SET_FAILED, 18, "call runtime lable set failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_LABLE_GOTO_FAILED, 19, "call runtime lable goto failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_LABLE_SWITCH_FAILED, 20, "call runtime lable switch failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_MEM_ALLOC_MANAGED_FAILED, 21, "call runtime mem alloc managed failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_MEM_FREE_MANAGED_FAILED, 22, "call runtime mem free managed failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_FREE_FAILED, 23, "call runtime free failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_STREAM_SYNC_FAILED, 24, "call runtime sync stream failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_MODEL_EXCUTE_FAILED, 25, "call runtime model excute failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_MEM_ASYNC_FAILED, 26, "call runtime mem async failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_MEM_ALLOC_HOST_FAILED, 27, "call runtime alloc host memory failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_MEM_FREE_HOST_FAILED, 28, "call runtime free host memory failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_MEM_ALLOC_DEVICE_FAILED, 29, "call runtime alloc device memory failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_MEM_FREE_DEVICE_FAILED, 30, "call runtime free device memory failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_FLUSH_CACHE_FAILED, 31, "call runtime flush cache failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_UNBIND_STREAM_FAILED, 32, "unbind rtstream from rtmodel failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_DESTORY_STREAM_FAILED, 33, "destory stream failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_DESTORY_LABEL_FAILED, 34, "destory label failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_DESTORY_MODEL_FAILED, 35, "destory model failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_CCE_TRANS_TENSOR_FAILED, 36, "call cce transfer tensor descriptor failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_CCE_TRANS_FILTER_FAILED, 37, "call cce transfer filter descriptor failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_CCE_UPDATE_KERNEL_ARGS_FAILED, 38, "call cce update kernel args failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_CCE_DESTORY_HANDLE_FAILED, 39, "destory handle failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_CREATE_EVENT_FAILED, 40, "call rutime create event failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_EVENT_RECORD_FAILED, 41, "call rutime event record failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_STREAM_WAIT_EVENT_FAILED, 42, "call rutime stream wait event failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_HCCL_BROADCAST_FAILED, 43, "call hccl hcom broadcast failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_HCCL_ALL_GATHER_FAILED, 44, "call hccl hcom all gather failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_HCCL_ALL_REDUCE_FAILED, 45, "call hccl hcom all reduce failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_RUNTIME_DESTORY_EVENT_FAILED, 46, "destory rt event failed"); +GE_ERRORNO_RUNTIME(GE_RTI_CALL_HCCL_REDUCE_SCATTER_FAILED, 47, "call hccl hcom reduce scatter failed"); + +// Executor module error code definition +GE_ERRORNO_EXECUTOR(GE_EXEC_NOT_INIT, 1, "GE Executor is not yet initialized."); +GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_PATH_INVALID, 2, "Model file path is invalid."); +GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_KEY_PATH_INVALID, 3, "Key file path of model is invalid."); +GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_ID_INVALID, 4, "Model id is invalid."); +GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_DATA_SIZE_INVALID, 5, "Data size of model is invalid."); +GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_PARTITION_NUM_INVALID, 6, "Partition number of model is invalid."); +GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_QUEUE_ID_INVALID, 7, "Queue id of model is invalid."); +GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION, 8, "Model does not support encryption."); +GE_ERRORNO_EXECUTOR(GE_EXEC_READ_MODEL_FILE_FAILED, 9, "Failed to read model file."); +GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_MODEL_REPEATED, 10, "The model is loaded repeatedly."); +GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_MODEL_PARTITION_FAILED, 11, "Failed to load model partition."); +GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED, 12, "Failed to load weight partition."); +GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_TASK_PARTITION_FAILED, 13, "Failed to load task partition."); +GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_KERNEL_PARTITION_FAILED, 14, "Failed to load kernel partition."); +GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, 15, "Failed to allocate feature map memory."); +GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, 16, "Failed to allocate weight memory."); +GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_VAR_MEM_FAILED, 17, "Failed to allocate variable memory."); +GE_ERRORNO_EXECUTOR(GE_AIPP_NOT_EXIST, 18, "GE AIPP is not exist."); +GE_ERRORNO_EXECUTOR(GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY, 19, "GE Dynamic AIPP is not support to query temporarily."); + +// Generator module error code definition +GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_INIT_FAILED, 1, "Graph manager initialize failed."); +GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, 2, "Graph manager add graph failed."); +GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, 3, "Graph manager build graph failed."); +GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED, 4, "Graph manager finalize failed."); +GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_SAVE_MODEL_FAILED, 5, "Graph manager save model failed."); + +#define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast(RT_ERROR) +} // namespace ge + +#endif // INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_ diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h new file mode 100644 index 000000000..d1eb95b71 --- /dev/null +++ b/inc/framework/common/ge_types.h @@ -0,0 +1,276 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_GE_TYPES_H_ +#define INC_FRAMEWORK_COMMON_GE_TYPES_H_ + +#include + +#include +#include + +#include "framework/common/fmk_error_codes.h" +#include "ge/ge_api_error_codes.h" +#include "external/graph/types.h" +#include "external/ge/ge_api_types.h" + +namespace ge { +enum RuntimeType { + HOST = 0, + DEVICE = 1 +}; + +enum PerfLevel { + GEN_TASK_WITH_FUSION = -1, + GEN_TASK_WITHOUT_L2FUSION = 3, + GEN_TASK_WITHOUT_FUSION = 4 +}; + +enum FrameworkType { + CAFFE = 0, + MINDSPORE = 1, + TENSORFLOW = 3, + ANDROID_NN, + FRAMEWORK_RESERVED, +}; + +enum OpEngineType { + ENGINE_SYS = 0, // default engine + ENGINE_AICORE = 1, + ENGINE_VECTOR = 2, + ENGINE_AICUBE = 3, // not support + ENGINE_AIVECTOR = 4 // not support +}; + +const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; +const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; + +// Data cache, including data address and length +struct DataBuffer { + public: + void *data; // Data address + uint64_t length; // Data length + bool isDataSupportMemShare = false; + DataBuffer(void *dataIn, uint64_t len, bool isSupportMemShare) + : data(dataIn), length(len), isDataSupportMemShare(isSupportMemShare) {} + + DataBuffer() : data(nullptr), length(0), isDataSupportMemShare(false) {} +}; + +/// +/// @ingroup domi_ome +/// @brief External input data +/// +struct InputData { + uint32_t index; // Index of input data + uint32_t timestamp; // Data creation time + uint32_t timeout; // Processing timeout + uint32_t model_id; // Model ID required for data processing + uint64_t request_id = 0; // Request ID + std::vector blobs; // Actual input data, currently only supports one input + bool is_dynamic_batch = false; // Whether is dynamic batch size scene, default:false + std::string batch_label; // Gear used for current inference in dynamic batch scene +}; + +/// Output result structure definition +struct OutputData { + uint32_t index; // Index of input data + uint32_t model_id; // The model ID corresponding to the processing result + /// Output data cache, arranged in sequence of output operators. + /// If the operator has multiple outputs, + /// the data buffer order of the operator is the same as that defined in the + /// offline model + std::vector blobs; +}; + +// The definition of command data structure +struct Command { + std::string cmd_type; // Command type + std::vector cmd_params; // Command params +}; + +// The definition of I/O shape description +struct ShapeDescription { + int64_t num = 0; + int64_t channel = 0; + int64_t height = 0; + int64_t width = 0; + std::vector dims; +}; + +// Definition of input and output description information +struct InputOutputDescInfo { + std::string name; + uint64_t size; + uint32_t data_type; + ShapeDescription shape_info; +}; + +// Definition of model io dims +struct InputOutputDims { + std::string name; + size_t dim_num; + uint32_t size; + std::vector dims; +}; + +// Definition of model io dims +struct OriginInputInfo { + Format format; + DataType data_type; + uint32_t dim_num; +}; + +// The structure of AIPP info +struct AippConfigInfo { + int8_t input_format; + int32_t src_image_size_w; + int32_t src_image_size_h; + int8_t crop; + int32_t load_start_pos_w; + int32_t load_start_pos_h; + int32_t crop_size_w; + int32_t crop_size_h; + int8_t resize; + int32_t resize_output_w; + int32_t resize_output_h; + int8_t padding; + int32_t left_padding_size; + int32_t right_padding_size; + int32_t top_padding_size; + int32_t bottom_padding_size; + int8_t csc_switch; + int8_t rbuv_swap_switch; + int8_t ax_swap_switch; + int8_t single_line_mode; + int32_t matrix_r0c0; + int32_t matrix_r0c1; + int32_t matrix_r0c2; + int32_t matrix_r1c0; + int32_t matrix_r1c1; + int32_t matrix_r1c2; + int32_t matrix_r2c0; + int32_t matrix_r2c1; + int32_t matrix_r2c2; + int32_t output_bias_0; + int32_t output_bias_1; + int32_t output_bias_2; + int32_t input_bias_0; + int32_t input_bias_1; + int32_t input_bias_2; + int32_t mean_chn_0; + int32_t mean_chn_1; + int32_t mean_chn_2; + int32_t mean_chn_3; + float min_chn_0; + float min_chn_1; + float min_chn_2; + float min_chn_3; + float var_reci_chn_0; + float var_reci_chn_1; + float var_reci_chn_2; + float var_reci_chn_3; +}; + +// The structure of offline Modeldata +struct ModelData { + void *model_data = nullptr; // Model binary data start addr + uint32_t model_len = 0; // Model binary data length + int32_t priority = 0; // Model priority + std::string key; // Key path for encrypt model, Empty for unencrypt + std::string om_name; // om file name, used for data dump +}; + +// The definition of Model information +struct ModelInfo { + uint32_t version = 0; + std::string name; + bool is_encrypt = 0; // 0:unencrypt, 1:encrypt + std::vector input_desc; + std::vector output_desc; + uint8_t reserved[3] = {0}; // 3-byte reserved field +}; + +// Asynchronous callback interface, implemented by the caller +class ModelListener { + public: + virtual ~ModelListener() {} + /// + /// @brief Asynchronous callback interface + /// @param [in] model_id Model ID of the callback + /// @param [in] data_index Index of the input_data + /// @param [in] resultCode Execution results + /// + virtual Status OnComputeDone(uint32_t model_id, uint32_t data_index, uint32_t result_code, + std::vector &outputs) = 0; +}; + +// OMM configuration item +struct Options { + int64_t session_id; + int32_t device_id; + std::string job_id; + bool isUseHcom; + bool isUseHvd; + bool deployMode; + bool isAICPUMode; + bool enable_atomic; + std::string podName; + int64_t rankId; + std::string rankTableFile; + int32_t ge_hccl_flag = 0; + int32_t physical_device_id; + std::string profiling_mode; + std::string profiling_options; +}; + +// Profiling info of task +struct TaskDescInfo { + std::string model_name; + std::string op_name; + uint32_t block_dim; + uint32_t task_id; + uint32_t stream_id; +}; + +// Profiling info of graph +struct ComputeGraphDescInfo { + std::string model_name; + std::string op_name; + std::string op_type; + std::vector input_format; + std::vector> input_shape; + std::vector input_data_type; + std::vector output_format; + std::vector> output_shape; + std::vector output_data_type; +}; + +struct OpDescInfo { + std::string op_name; + uint32_t task_id; + uint32_t stream_id; + std::vector input_format; + std::vector> input_shape; + std::vector input_data_type; + std::vector input_addrs; + std::vector output_format; + std::vector> output_shape; + std::vector output_data_type; + std::vector output_addrs; +}; +} // namespace ge +#endif // INC_FRAMEWORK_COMMON_GE_TYPES_H_ diff --git a/inc/framework/common/gflags_util.h b/inc/framework/common/gflags_util.h new file mode 100644 index 000000000..94d66ffbe --- /dev/null +++ b/inc/framework/common/gflags_util.h @@ -0,0 +1,71 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ +#define INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ + +#include +#include + +namespace ge { +class GflagsUtils { + public: + static bool IsSetCommandTrue(const char *name) { + std::string out; + return gflags::GetCommandLineOption(name, &out) && out == "true"; + } + + /// + /// @brief Determines whether the parameter is empty + /// @param name name parameter name + /// @return true if empty otherwise false + /// + static bool IsSetCommandNotEmpty(const char *name) { + std::string out; + return gflags::GetCommandLineOption(name, &out) && !out.empty(); + } + + /// + /// @brief Determines whether the parameter is not default + /// @param flag_name name parameter name + /// @return true if not default otherwise false + /// + static bool IsCommandLineNotDefault(const char *flag_name) { + google::CommandLineFlagInfo info; + return GetCommandLineFlagInfo(flag_name, &info) && !info.is_default; + } + + /// + /// @brief Modify gflags to print help information + /// @param flags_h Pass in the self-defined help parameter, it is recommended to be FLAGS_h + /// @return void + /// + static void ChangeHelpFlags(bool flags_h) { + if (flags_h || IsSetCommandTrue("help") || IsSetCommandTrue("helpfull") || IsSetCommandNotEmpty("helpon") || + IsSetCommandNotEmpty("helpmatch") || IsSetCommandTrue("helppackage") || IsSetCommandTrue("helpxml")) { + gflags::SetCommandLineOption("help", "false"); + gflags::SetCommandLineOption("helpfull", "false"); + gflags::SetCommandLineOption("helpon", ""); + gflags::SetCommandLineOption("helpmatch", ""); + gflags::SetCommandLineOption("helppackage", "false"); + gflags::SetCommandLineOption("helpxml", "false"); + gflags::SetCommandLineOption("helpshort", "true"); + } + } +}; +} // namespace ge + +#endif // INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ diff --git a/inc/framework/common/helper/model_helper.h b/inc/framework/common/helper/model_helper.h new file mode 100644 index 000000000..27f1bc4d7 --- /dev/null +++ b/inc/framework/common/helper/model_helper.h @@ -0,0 +1,72 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_HELPER_MODEL_HELPER_H_ +#define INC_FRAMEWORK_COMMON_HELPER_MODEL_HELPER_H_ + +#include +#include + +#include "common/fmk_types.h" +#include "common/helper/om_file_helper.h" +#include "common/types.h" +#include "graph/model.h" +#include "model/ge_model.h" + +namespace ge { +class ModelHelper { + public: + ModelHelper() = default; + ~ModelHelper(); + + Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, + const std::string &output_file, ge::ModelBufferData &model); + Status SaveOriginalGraphToOmModel(const ge::Graph& graph, const std::string& output_file); + Status LoadModel(const ge::ModelData &model_data); + Status GetModelBufferData(ge::ModelBufferData& model); + + const ModelFileHeader* GetFileHeader() const { return file_header_; } + + GeModelPtr GetGeModel(); + void SetSaveMode(bool val) { is_offline_ = val; } + bool GetSaveMode(void) const { return is_offline_; } + + Status GetBaseNameFromFileName(const std::string &file_name, std::string &base_name); + Status GetModelNameFromMergedGraphName(const std::string &graph_name, std::string &model_name); + + private: + bool is_assign_model_ = false; + bool is_offline_ = true; + ModelFileHeader* file_header_ = nullptr; + // Encrypted model need delete temp model and unencrypted model need not delete model + uint8_t *model_addr_tmp_ = nullptr; + uint32_t model_len_tmp_ = 0; + GeModelPtr model_; + + ModelHelper(const ModelHelper&); + ModelHelper& operator=(const ModelHelper&); + Status GenerateGeModel(OmFileLoadHelper& om_load_helper); + Status LoadModelData(OmFileLoadHelper& om_load_helper); + void SetModelToGeModel(ge::Model& model); + Status LoadWeights(OmFileLoadHelper& om_load_helper); + Status LoadTask(OmFileLoadHelper& om_load_helper); + Status LoadTBEKernelStore(OmFileLoadHelper& om_load_helper); + Status ReleaseLocalModelData() noexcept; + Status SaveModelPartition(std::shared_ptr& om_file_save_helper, + ModelPartitionType type, const uint8_t* data, size_t size); +}; +} // namespace ge +#endif // INC_FRAMEWORK_COMMON_HELPER_MODEL_HELPER_H_ diff --git a/inc/framework/common/helper/om_file_helper.h b/inc/framework/common/helper/om_file_helper.h new file mode 100644 index 000000000..4ca54b507 --- /dev/null +++ b/inc/framework/common/helper/om_file_helper.h @@ -0,0 +1,93 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_HELPER_OM_FILE_HELPER_H_ +#define INC_FRAMEWORK_COMMON_HELPER_OM_FILE_HELPER_H_ + +#include +#include + +#include "external/ge/ge_ir_build.h" +#include "framework/common/fmk_types.h" +#include "framework/common/types.h" +#include "framework/common/ge_types.h" + +using ProcParam = struct PROC_PARAM; +using std::string; +using std::vector; + +namespace ge { +struct ModelPartition { + ModelPartitionType type; + uint8_t* data = 0; + uint32_t size = 0; +}; + +struct OmFileContext { + std::vector partition_datas_; + std::vector partition_table_; + uint32_t model_data_len_; +}; + +struct SaveParam { + int32_t encode_mode; + std::string ek_file; + std::string cert_file; + std::string hw_key_file; + std::string pri_key_file; + std::string model_name; +}; + +class OmFileLoadHelper { + public: + Status Init(const ge::ModelData &model); + + Status Init(uint8_t *model_data, const uint32_t model_data_size); + + Status GetModelPartition(ModelPartitionType type, ModelPartition &partition); + + OmFileContext context_; + + private: + Status CheckModelValid(const ge::ModelData &model) const; + + Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size); + + bool is_inited_{false}; +}; + +class OmFileSaveHelper { + public: + ModelFileHeader &GetModelFileHeader() { return model_header_; } + + uint32_t GetModelDataSize() const { return context_.model_data_len_; } + + ModelPartitionTable *GetPartitionTable(); + + Status AddPartition(ModelPartition &partition); + + const std::vector &GetModelPartitions() const; + + Status SaveModel(const SaveParam &save_param, const char *target_file, + ge::ModelBufferData& model, bool is_offline = true); + + Status SaveModelToFile(const char *output_file, ge::ModelBufferData &model, bool is_offline = true); + + ModelFileHeader model_header_; + OmFileContext context_; +}; +} // namespace ge +#endif // INC_FRAMEWORK_COMMON_HELPER_OM_FILE_HELPER_H_ diff --git a/inc/framework/common/l2_cache_optimize.h b/inc/framework/common/l2_cache_optimize.h new file mode 100644 index 000000000..c65f67b3d --- /dev/null +++ b/inc/framework/common/l2_cache_optimize.h @@ -0,0 +1,123 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_L2_CACHE_OPTIMIZE_H_ +#define INC_FRAMEWORK_COMMON_L2_CACHE_OPTIMIZE_H_ + +#include + +#include +#include +#include +#include + +#include "common/types.h" +#include "common/util.h" +#include "graph/compute_graph.h" + +using std::vector; + +namespace ge { +// Size of RC memory alignment, 2M +constexpr size_t ALIGN_SIZE = 2097152; + +constexpr uint32_t RC_VALUE_DEFAULT = 1; +constexpr uint32_t RC_VALUE_MAX = 32; + +// RC data type classification +enum RCType { + RC_DEFAULT, // Such as temporary workspace memory of operator, variable (including global and local variable) + RC_HCOM, // Output of gradient aggregation, RC value should be set to 0 + RC_L2LOSS, // Parameter of L2 loss operator, RC value should be set to 0 + RC_INPUTOUTPUT, // Input and output tensor of operator, RC value is returned by FE calculation + RC_WEIGHTS, // The weight, fp16, RC value used by FP/BP operator should be set to 1 or the actual access numbers + RC_DW, // The gradient data DW and RC value output by BP operator + // should be set to 1 or the actual access numbers + RC_ARGS // Args of FlowTable, actual access numbers +}; + +enum MemType { INPUT_TENSOR, OUTPUT_TENSOR, WEIGHT, WORKSPACE }; + +// Memory usage information < node, type, number > +struct NodeInfo { + string nodeName; + MemType memType; + size_t index; +}; + +// Memory block RC value +struct RCMemoryBlock { + RCType type; // RC type + size_t blockSize; // memory block size + size_t headOffset; // Start offset from base address + size_t tailOffset; // End offset from base address + uint32_t rcCount; // RC value + NodeInfo nodeInfo; // Input and output indexes of node objects to which RC belongs +}; + +// L2Cache optimizer +class L2CacheOptimize { + public: + explicit L2CacheOptimize(ge::ComputeGraphPtr &graph); + ~L2CacheOptimize(); + + // Collect the information L2Cache Memory optimization + Status Gath(); + + private: + ge::ComputeGraphPtr graph_; + + // Save RC block information list + vector weightRCs; + vector opRCs; + + // Extract RC information generated by FE from compiled graph + void RetirveRCinfo(); + + // Take the maximum common divisor of RC values for the duplicate address + void Merge(vector &blocks); + + // The RC information is aligned with the 2m address + void Align(vector &blocks); + + // Weight of l2loss operator, output of gradient aggregation output, RC value set to 0 + void HandleOutputZeroRC(RCType type, ge::NodePtr node, vector &outputList, vector &blocks); + + // Processing operator input Tensor's RC + void HandOPInput(ge::NodePtr node, vector &inputList, vector &blocks); + + // Processing operator output Tensor's RC + void HandOPoutput(ge::NodePtr node, vector &outputList, vector &blocks); + + // maximum common divisor + uint32_t Measure(uint32_t x, uint32_t y) { + if (x == 0 || y == 0) return RC_VALUE_DEFAULT; + uint32_t z = y; + while (x % y != 0) { + z = x % y; + x = y; + y = z; + } + return z; + } + + bool Contain(const RCMemoryBlock &l_block, const RCMemoryBlock &r_block); + bool Cross(const RCMemoryBlock &l_block, const RCMemoryBlock &r_block); + bool Connect(const RCMemoryBlock &l_block, const RCMemoryBlock &r_block); +}; +} // namespace ge + +#endif // INC_FRAMEWORK_COMMON_L2_CACHE_OPTIMIZE_H_ \ No newline at end of file diff --git a/inc/framework/common/op/attr_value_util.h b/inc/framework/common/op/attr_value_util.h new file mode 100644 index 000000000..6ef9b11de --- /dev/null +++ b/inc/framework/common/op/attr_value_util.h @@ -0,0 +1,161 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ +#define INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ + +#include +#include +#include + +#include "common/types.h" +#include "graph/debug/ge_attr_define.h" +#include "proto/om.pb.h" + +using domi::AttrDef; +using domi::AttrDef_ListValue; +using domi::ModelDef; +using domi::NamedAttrs; +using domi::OpDef; + +namespace ge { +using AttrDefMap = ::google::protobuf::Map<::std::string, ::domi::AttrDef>; +using AttrDefPair = ::google::protobuf::MapPair; + +void AddOpAttr(const std::string &key, AttrDef &attr, OpDef *opdef); +// DEFINE_ADD_ATTR_VALUE +void AddOpAttr(const std::string &key, const std::string &value, AttrDefMap *attrs); +void AddOpAttr(const std::string &key, const char *value, AttrDefMap *attrs); +void AddOpAttr(const char *key, const char *value, AttrDefMap *attrs); +void AddOpAttr(const std::string &key, const uint32_t value, AttrDefMap *attrs); +void AddOpAttr(const std::string &key, const int32_t value, AttrDefMap *attrs); +void AddOpAttr(const std::string &key, const int64_t value, AttrDefMap *attrs); +void AddOpAttr(const std::string &key, const float value, AttrDefMap *attrs); +void AddOpAttr(const std::string &key, const double value, AttrDefMap *attrs); +void AddOpAttr(const std::string &key, const bool value, AttrDefMap *attrs); + +void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, AttrDefMap *attrs); + +// DEFINE_ADD_ATTR_VALUE +void AddOpAttr(const std::string &key, const std::string &value, OpDef *opdef); +void AddOpAttr(const std::string &key, const char *value, OpDef *opdef); +void AddOpAttr(const char *key, const char *value, OpDef *opdef); +void AddOpAttr(const std::string &key, const uint32_t value, OpDef *opdef); +void AddOpAttr(const std::string &key, const int32_t value, OpDef *opdef); +void AddOpAttr(const std::string &key, const int64_t value, OpDef *opdef); +void AddOpAttr(const std::string &key, const float value, OpDef *opdef); +void AddOpAttr(const std::string &key, const double value, OpDef *opdef); +void AddOpAttr(const std::string &key, const bool value, OpDef *opdef); + +void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, OpDef *opdef); + +void AddOpBytesAttr(const std::string &key, const void *value, size_t size, OpDef *opdef); + +// DEFINE_ADD_ATTR_VALUE_LIST +void AddOpAttrList(const std::string &key, const double value, AttrDefMap *attrs); +void AddOpAttrList(const std::string &key, const float value, AttrDefMap *attrs); +void AddOpAttrList(const std::string &key, const uint32_t value, AttrDefMap *attrs); +void AddOpAttrList(const std::string &key, const int32_t value, AttrDefMap *attrs); +void AddOpAttrList(const std::string &key, const std::string value, AttrDefMap *attrs); +void AddOpAttrList(const std::string &key, const double value, OpDef *opdef); +void AddOpAttrList(const std::string &key, const float value, OpDef *opdef); +void AddOpAttrList(const std::string &key, const uint32_t value, OpDef *opdef); +void AddOpAttrList(const std::string &key, const int32_t value, OpDef *opdef); +void AddOpAttrList(const std::string &key, const bool value, OpDef *opdef); +void AddOpAttrList(const std::string &key, const int64_t value, OpDef *opdef); + +void AddOpAttrList(const std::string &key, const std::string &value, OpDef *opdef); + +bool GetOpAttr(const std::string &key, std::string *value, const OpDef *opdef); +bool GetOpAttr(const std::string &key, int32_t *value, const OpDef *opdef); +bool GetOpAttr(const std::string &key, int64_t *value, const OpDef *opdef); +bool GetOpAttr(const std::string &key, uint32_t *value, const OpDef *opdef); +bool GetOpAttr(const std::string &key, float *value, const OpDef *opdef); +bool GetOpAttr(const std::string &key, double *value, const OpDef *opdef); +bool GetOpAttr(const std::string &key, bool *value, const OpDef *opdef); +bool GetOpAttr(const std::string &key, AttrDef_ListValue *value, const OpDef *opdef); + +uint32_t GetOpAttrListSize(const std::string &key, std::string value, const OpDef *opdef); +uint32_t GetOpAttrListSize(const std::string &key, int32_t value, const OpDef *opdef); +uint32_t GetOpAttrListSize(const std::string &key, int64_t value, const OpDef *opdef); +uint32_t GetOpAttrListSize(const std::string &key, uint32_t value, const OpDef *opdef); +uint32_t GetOpAttrListSize(const std::string &key, float value, const OpDef *opdef); +uint32_t GetOpAttrListSize(const std::string &key, double value, const OpDef *opdef); +uint32_t GetOpAttrListSize(const std::string &key, bool value, const OpDef *opdef); + +bool GetBytesAttr(const std::string &key, std::string *value, const OpDef *opdef); +bool GetBytesAttr(const std::string &key, std::string *value, const ModelDef *model_def); + +void AddModelAttr(const std::string &key, const std::string &value, ModelDef *model_def); +void AddModelAttr(const std::string &key, const char *value, ModelDef *model_def); +void AddModelAttr(const char *key, const char *value, ModelDef *model_def); +void AddModelAttr(const std::string &key, const uint32_t value, ModelDef *model_def); +void AddModelAttr(const std::string &key, const int32_t value, ModelDef *model_def); +void AddModelAttr(const std::string &key, const int64_t value, ModelDef *model_def); +void AddModelAttr(const std::string &key, const float value, ModelDef *model_def); +void AddModelAttr(const std::string &key, const double value, ModelDef *model_def); +void AddModelAttr(const std::string &key, const bool value, ModelDef *model_def); +void AddModelAttr(const std::string &key, const void *value, size_t size, ModelDef *model_def); +void AddModelAttr(const std::string &key, const AttrDef_ListValue &value, ModelDef *model_def); + +void AddModelAttrList(const std::string &key, const double value, ModelDef *model_def); +void AddModelAttrList(const std::string &key, const float value, ModelDef *model_def); +void AddModelAttrList(const std::string &key, const uint32_t value, ModelDef *model_def); +void AddModelAttrList(const std::string &key, const int32_t value, ModelDef *model_def); +void AddModelAttrList(const std::string &key, const std::string &value, ModelDef *model_def); + +bool GetModelAttr(const std::string &key, std::string *value, const ModelDef *model_def); +bool GetModelAttr(const std::string &key, int32_t *value, const ModelDef *model_def); +bool GetModelAttr(const std::string &key, int64_t *value, const ModelDef *model_def); +bool GetModelAttr(const std::string &key, uint32_t *value, const ModelDef *model_def); +bool GetModelAttr(const std::string &key, float *value, const ModelDef *model_def); +bool GetModelAttr(const std::string &key, double *value, const ModelDef *model_def); +bool GetModelAttr(const std::string &key, bool *value, const ModelDef *model_def); +bool GetModelAttr(const std::string &key, AttrDef_ListValue *value, const ModelDef *model_def); + +bool HasOpAttr(const OpDef *opdef, const std::string &attr_name); + +void SetAttrDef(const std::string &value, AttrDef *out); +void SetAttrDef(const char *value, AttrDef *out); +void SetAttrDef(const uint32_t value, AttrDef *out); +void SetAttrDef(const int32_t value, AttrDef *out); +void SetAttrDef(const float value, AttrDef *out); +void SetAttrDef(const double value, AttrDef *out); +void SetAttrDef(const bool value, AttrDef *out); +void SetAttrList(const std::string &value, AttrDef *out); +void SetAttrList(const bool value, AttrDef *out); +void SetAttrList(const float value, AttrDef *out); +void SetAttrList(const double value, AttrDef *out); +void SetAttrList(const uint32_t value, AttrDef *out); + +bool GetAttrDefValue(const std::string &key, std::string *value, const AttrDefMap &attr); +bool GetAttrDefValue(const std::string &key, int32_t *value, const AttrDefMap &attr); +bool GetAttrDefValue(const std::string &key, int64_t *value, const AttrDefMap &attr); +bool GetAttrDefValue(const std::string &key, uint32_t *value, const AttrDefMap &attr); +bool GetAttrDefValue(const std::string &key, float *value, const AttrDefMap &attr); +bool GetAttrDefValue(const std::string &key, double *value, const AttrDefMap &attr); +bool GetAttrDefValue(const std::string &key, bool *value, const AttrDefMap &attr); +bool GetAttrDefValue(const std::string &key, AttrDef_ListValue *value, const AttrDefMap &attr); +bool GetAttrDefValue(const std::string &key, NamedAttrs *&value, AttrDefMap *attr); +bool GetAttrDefValue(const std::string &key, const NamedAttrs *&value, const AttrDefMap &attr); + +bool GetAttrDefListValue(const std::string &key, int idx, int32_t *value, const AttrDefMap &attr); +bool GetAttrDefListValue(const std::string &key, int idx, uint32_t *value, const AttrDefMap &attr); +bool GetAttrDefListValue(const std::string &key, int idx, float *value, const AttrDefMap &attr); +bool GetAttrDefListValue(const std::string &key, int idx, double *value, const AttrDefMap &attr); +} + +#endif // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ diff --git a/inc/framework/common/op/ge_op_utils.h b/inc/framework/common/op/ge_op_utils.h new file mode 100644 index 000000000..87cf54d89 --- /dev/null +++ b/inc/framework/common/op/ge_op_utils.h @@ -0,0 +1,295 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_ +#define INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_ + +#include +#include +#include + +#include "common/op/attr_value_util.h" +#include "common/types.h" +#include "common/util.h" +#include "graph/attr_value.h" +#include "graph/ge_tensor.h" +#include "graph/node.h" +#include "graph/op_desc.h" +#include "proto/insert_op.pb.h" + +namespace ge { +using namespace cce; +using domi::Status; + +// Add Sub Mul +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t ADD_INPUT_NUM; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SUB_INPUT_NUM; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MUL_INPUT_NUM; + +// Permute +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t PERMUTE_ORDER_NUM; + +// Ssd PriroBox +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const double SSD_PRIORBOX_ASPECT_RATIO_VALUE; + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t STRIDEDSLICE_INPUT_NUM; + +// Switch +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SWITCH_INPUT_NUM; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SWITCH_OUTPUT_NUM; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SWITCH_FALSE_OUTPUT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SWITCH_TRUE_OUTPUT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SWITCH_DATA_INPUT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SWITCH_PRED_INPUT; + +// FunctionOp +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t IF_COND_INPUT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t FOR_START_INPUT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t FOR_LIMIT_INPUT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t FOR_DELTA_INPUT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t FOR_DATA_INPUT; + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int NORMAL_TENSOR_SIZE; + +class OpUtils { + public: + /// + /// @ingroup domi_ome + /// @brief Check whether check_value is in [min_enum_value, max_enum_value] + /// @return true Within + /// @return false out of range + // + static inline bool CheckEnumValid(int32_t check_value, int32_t min_enum_value, int32_t max_enum_value) { + return check_value < min_enum_value ? false : (check_value >= max_enum_value ? false : true); + } + /// + /// @ingroup domi_omg + /// @brief Convert the dimension of array according to different format + /// @param [in] src_format src_shape format + /// @param [in] src Dimension array to be converted + /// @param [in] dst_format Target format after conversion + /// @param [out] dst Dimension array after conversion + /// @return SUCCESS success + /// @return FAILED fail + /// + static bool ConvertDim(ccTensorFormat_t src_format, const std::vector &src, ccTensorFormat_t dst_format, + std::vector &dst); + /// + /// @ingroup domi_omg + /// @brief Determine whether to manually calculate the tensor size based on the values of format and dim + /// @param [in] format, Format information of the tensor + /// @param [in] real_dim_cnt, Tensor dim + /// @return true Manually calculate the size based on dim and datatype + /// @return false skip + /// + static bool IsComputDimsSize(const int32_t format, const uint32_t real_dim_cnt); + /// + /// @ingroup domi_ome + /// @brief Initialize the tensor description, which is used for input and output. + /// @param [in] model_tensor Tensor information defined by the offline model + /// @param [out] cc_tensor Tensor definition used by CC + /// @return SUCCESS success + /// @return FAILED fail + /// + static Status InitTensorDescriptor(const ge::GeTensorDesc &model_tensor, ccTensorDescriptor_t &cc_tensor); + /// + /// @ingroup domi_ome + /// @brief Initialize the tensor description, which is used for input and output. + /// @param [in] model_tensor Tensor information defined by the offline model + /// @param [in] dst_data_type data_type of the target cc_tensor + /// @param [out] cc_tensor Tensor definition used by CC + /// @return SUCCESS success + /// @return FAILED fail + /// + static Status InitTensorDescriptor(const ge::GeTensorDesc &model_tensor, int32_t dst_data_type, + ccTensorDescriptor_t &cc_tensor); + /// + /// @ingroup domi_ome + /// @brief Initialize the tensor description for bias. + /// @param [in] model_tensor Tensor information defined by the offline model + /// @param [out] cc_tensor Tensor definition used by CC + /// @return SUCCESS success + /// @return FAILED fail + /// + /// + static Status InitTensorDescriptor(const ge::GeTensor &model_tensor, ccTensorDescriptor_t &cc_tensor); + /// + /// @ingroup domi_ome + /// @brief Initialize the tensor description for bias. + /// @param [in] model_tensor Tensor information defined by the offline model + /// @param [in] dst_data_type data_type of the target cc_tensor + /// @param [out] cc_tensor Tensor definition used by CC + /// @return SUCCESS success + /// @return FAILED fail + /// + static Status InitTensorDescriptor(const ge::GeTensor &model_tensor, int32_t dst_data_type, + ccTensorDescriptor_t &cc_tensor); + + static Status InitTensorDescriptor(int32_t format, int32_t data_type, const std::vector &dim, + ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt = 4); + /// + /// @ingroup domi_ome + /// @brief Destroys a tensor + /// @param [inout] cc_tensor Tensor definition used by CC + /// + static void DestroyTensorDescriptor(ccTensorDescriptor_t &cc_tensor) noexcept; + + /// + /// @ingroup domi_ome + /// @brief Destroys a tensor + /// @param [inout] cc_filter cc_filter Definition of the filter used by CC + /// + static void DestroyFilterDescriptor(ccFilterDescriptor_t &cc_filter); + + /// + /// @ingroup domi_ome + /// @brief Initializing Filter Description + /// @param [in] model_filter Filter information defined in the offline model + /// @param [out] cc_filter Definition of the filter used by CC + /// @return SUCCESS success + /// @return FAILED fail + /// + static Status InitFilterDescriptor(const ge::GeTensor &model_filter, ccFilterDescriptor_t &cc_filter); + + /// + /// @brief Extract AIPP parameters from AttrDefMap and splice them + /// @param [in] aipp_attr attr of operator + /// @param [out] aipp_params aipp parameters + /// @return enum of tagCCAippInputFormat + /// + static Status ConvertAippParams(const GeAttrValue::NamedAttrs &aipp_attr, domi::AippOpParams *aipp_params); + static Status TransferDim(const std::vector &dim, std::vector &dim_vector); + template + static void SliceData(const std::vector &input, int64_t chunk_size, std::vector &output, + int64_t begin, int64_t out_dim, int64_t stride); + template + static Status SetDataByDataType(size_t out_size, const std::vector &chunk_input, + const std::vector &chunk_output, GeTensor *output); + template + static Status SetOutputSliceDataByDataType(void *data, int64_t data_size, const std::vector &input_dims, + const std::vector &begin, const std::vector &output_dims, + ge::GeTensor *output, const std::vector &stride); + static Status SetOutputSliceData(void *data, int64_t data_size, int32_t data_type, std::vector &input_dims, + std::vector &begin, std::vector &output_dims, ge::GeTensor *output, + std::vector &stride); + + /// + /// @ingroup domi_omg + /// @brief Convert the convolutional weight data from [h, w, c, k] to [k, c, h, w] + /// @param [in] input Weight data in HWCK format + /// @param [in] H value of H dimension + /// @param [in] W value of W dimension + /// @param [in] C value of C dimension + /// @param [in] K value of K dimension + /// @param [out] output Data pointer after conversion. The format is KCHW. + /// + static void TransDataHWCK2KCHW(const void *input, int64_t H, int64_t W, int64_t C, int64_t K, void **output); + /// + /// @ingroup domi_omg + /// @brief Converts the convolutional weight data from [k, c, h, w] to [h, w, c, k]. + /// @param [in] input Weight data in HWCK format + /// @param [in] K value of K dimension + /// @param [in] C value of C dimension + /// @param [in] H value of H dimension + /// @param [in] W value of W dimension + /// @param [out] output Data pointer after conversion. The format is HWCK + /// + static void TransDataKCHW2HWCK(const void *input, int64_t K, int64_t C, int64_t H, int64_t W, void *output); + /// + /// @ingroup domi_omg + /// @brief Initialize the input and output description of the data node which is applied to filter weight in the + /// training network + /// @param [in] model_tensor input and output tensor information + /// @param [out] cc_tensor Tensor in CCE format after conversion + /// + static Status InitFilterTensorDescriptor(const ge::GeTensorDesc &model_tensor, ccFilterDescriptor_t &cc_tensor); + + static void SetTensorDescriptorAllOffsetQuantizeInfo(const GeTensorDesc &tensor, ccTensorDescriptor_t cc_tensor); + static vector GetWeights(const ge::Node &node); + static vector GetWeights(ge::ConstNodePtr node); + static vector MutableWeights(const ge::Node &node); + static vector MutableWeights(const ge::NodePtr node); + static Status SetWeights(ge::Node &node, const vector &weights); + static Status SetWeights(ge::NodePtr node, const vector &weights); + static Status GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, DataType type, std::vector &dims); + + private: + friend class CceTensorDescriptor; + static uint32_t GetRealDimCnt(const GeTensorDesc &tensor_desc); +}; + +class CceTensorDescriptor; + +using CceTensorDescriptorPtr = std::shared_ptr; + +class CceTensorDescriptor { + public: + explicit CceTensorDescriptor(ccTensorDescriptor_t cc_tensor); + CceTensorDescriptor(const CceTensorDescriptor &) = delete; + CceTensorDescriptor &operator=(const CceTensorDescriptor &) = delete; + + ~CceTensorDescriptor(); + + ccTensorDescriptor_t GetPtr() { return cc_tensor_; } + + /// + /// @brief Initializes the tensor based on shape information. + /// @param[in] format data permutation format + /// @param[in] data_type Data Type + /// @param[in] dim dim information + /// @return return code + /// + Status InitTensor(int32_t format, int32_t data_type, const std::vector &dims); + + Status InitTensor(int32_t format, int32_t data_type, const ge::GeShape &shape); + + /// + /// @brief get format of tensor + /// @param[out] format format of the tensor + /// @return return code + /// + Status GetFormat(ccTensorFormat_t *format); + + /// + /// @brief Obtains the size of the tensor. + /// @param[out] size size of Tensor + /// @return return code + /// + Status GetTensorSizeInBytes(uint32_t *size); + + /// + /// @brief transform tensor between 4d(NCHW) and 5d(NC1HWC0) + /// @param [in] xDesc descriptor of input tensor + /// @param [in] x point to input data in host memory + /// @param [in] dataTypeTransmode mode of data type transform + /// @param [in] yDesc descriptor of output tensor + /// @param [in|out] y point to output data in host memory + /// @param [in] ySizeInBytes size of outputData + /// @return return code + /// + static Status TransTensor(const ccTensorDescriptor_t xDesc, const void *x, const CceTensorDescriptorPtr &yDesc, + void *y, uint32_t ySizeInBytes); + + /// + /// @brief CceTensorDescriptor Static Constructor + /// @return CceTensorDescriptor smart pointer + /// + static CceTensorDescriptorPtr Create(); + + ccTensorDescriptor_t cc_tensor_ = nullptr; +}; +} // namespace ge +#endif // INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_ diff --git a/inc/framework/common/op/op_parser_util.h b/inc/framework/common/op/op_parser_util.h new file mode 100644 index 000000000..49b4350a4 --- /dev/null +++ b/inc/framework/common/op/op_parser_util.h @@ -0,0 +1,425 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_OP_OP_PARSER_UTIL_H_ +#define INC_FRAMEWORK_COMMON_OP_OP_PARSER_UTIL_H_ + +#include +#include +#include +#include + +namespace ge { +// general +const float DEFAULT_ALPHA_VALUE = 1.0; +const float DEFAULT_BETA_VALUE = 0.0; +const uint32_t NORMAL_INPUT_NUM = 1; +const uint32_t NORMAL_OUTPUT_NUM = 1; +const uint32_t NORMAL_WORKSPACE_NUM = 0; +const int32_t NORMAL_1D_DIM_NUM = 1; +const int32_t NORMAL_SCALE_DIM_NUM = 0; +const int NORMAL_TENSOR_FORMAT = static_cast(cce::CC_TENSOR_NC1HWC0); +const int NORMAL_TENSOR_SIZE = 4; +const int NORMAL_DEVICE_DATA_TYPE = static_cast(cce::CC_DATA_HALF); +const int DEFAULT_POOLING_MODE = static_cast(cce::CC_POOLING_MAX); +const uint32_t DEFAULT_REAL_DIM_CNT = 4; + +// const +const uint32_t CONST_OP_INPUT_NUM = 0; +const uint32_t CONST_OP_NORMAL_WEIGHT_SIZE = 1; + +// MatMul +const uint32_t MATMUL_INPUT_NUM = 2; + +// ActivationGrad +const int32_t ACTIVATIONGRAD_INPUT_NUM = 2; + +// FusedBatchNorm +const int32_t FUSED_BATCH_NORM_WORKSPACE_NUM = 1; +const int32_t FUSED_BATCH_NORM_INPUT_NUM = 5; +const int32_t FUSED_BATCH_NORM_OUTPUT_NUM = 5; +// FusedBatchNormGrad +const int32_t FUSEDBATCHNORMGRAD_WORKSPACE_NUM = 1; +const int32_t FUSEDBATCHNORMGRAD_INPUT_NUM = 5; +const int32_t FUSEDBATCHNORMGRAD_OUTPUT_NUM = 3; + +// conv +const uint32_t CONVOLUTION_WORKSPACE_NUM = 1; +const uint32_t CONVOLUTION_PAD_SIZE = 4; +const uint32_t CONVOLUTION_STRIDE_SIZE = 2; +const uint32_t CONVOLUTION_DILATION_SIZE = 2; +const int32_t CONVOLUTION_ADJ_SIZE = 2; +const int32_t CONVOLUTION_TARGET_SHAPE_SIZE = 2; + +// ConvGradFilter +const uint32_t CONVGRADFILTER_WORKSPACE_NUM = 1; +const uint32_t CONVGRADFILTER_INPUT_NUM = 3; + +// Pooling +const uint32_t POOLING_WINDOW_SIZE = 2; +const uint32_t POOLING_STRIDE_SIZE = 2; +const uint32_t POOLING_PAD_SIZE = 4; + +// Add Sub Mul +const uint32_t ADD_INPUT_NUM = 2; +const uint32_t SUB_INPUT_NUM = 2; +const uint32_t MUL_INPUT_NUM = 2; +const uint32_t DIV_INPUT_NUM = 2; +const uint32_t ADD_WORKSPACE_NUM = 1; +const uint32_t SUB_WORKSPACE_NUM = 1; +const uint32_t MUL_WORKSPACE_NUM = 1; +const uint32_t DIV_WORKSPACE_NUM = 1; + +const int32_t DEFAULT_AXIS_VALUE = -1; + +const int32_t RESHAPE_AXIS_DEFAULT_VALUE = 0; +const int32_t RESHAPE_NUM_AXES_DEFAULT_VALUE = -1; +const uint32_t RESHAPE_WORKSPACE_NUM = 1; + +const uint32_t FLATTEN_WORKSPACE_NUM = 1; + +const int32_t CONCAT_MIN_INPUT_SIZE = 1; +const int32_t CONCAT_DEFAULT_AXIS = 1; +const uint32_t CONCAT_WORKSPACE_NUM = 1; + +// The value for LRN parameters +const uint32_t LRN_DEFAULT_NORM_REGION = 0; +const float LRN_DEFAULT_K = 1.0; +const uint32_t LRN_DEFAULT_LOCAL_SIZE = 5; +const float LRN_DEFAULT_ALPHA = 1.0; +const float LRN_DEFAULT_BETA = 0.75; + +/// +/// @ingroup domi_common +/// @brief roipooling default value +/// +const uint32_t ROIPOOLING_DEFAULT_POOLED_H = 0; +const uint32_t ROIPOOLING_DEFAULT_POOLED_W = 0; +const float ROIPOOLING_DEFAULT_SPATIAL_SCALE = 1; +const int32_t ROIPOOLING_DEFAULT_SAMPLING_RATIO = -1; + +// DetectionOutput +const int32_t DETECTIONOUTPUT_INPUT_SIZE = 3; +const int32_t DETECTIONOUTPUT_OUTPUT_SIZE = 2; +const int32_t DETECTIONOUTPUT_WORKSPACE_NUM = 1; +const int DETECTIONOUTPUT_CLASS_NUM = 20; // Number of background categories +const int DETECTIONOUTPUT_NUM_CLASSES_DEFAULT_VALUE = 21; +const float DETECTIONOUTPUT_NMS_THRESHOLD_DEFAULT_VALUE = 0.3; +const float DETECTIONOUTPUT_CONFIDENCE_THRESHOLD_DEFAULT_VALUE = 0.8; + +// Proposal +const int32_t PROPOSAL_INPUT_SIZE = 3; +const int32_t PROPOSAL_OUTPUT_MAX_SIZE = 2; +const int32_t PROPOSAL_WORKSPACE_NUM = 1; +const float PROPOSAL_BASE_SIZE_DEFAULT_VALUE = 16; +const float PROPOSAL_RATIO_DIM_0_DEFAULT_VALUE = 0.5; +const float PROPOSAL_RATIO_DIM_1_DEFAULT_VALUE = 1; +const float PROPOSAL_RATIO_DIM_2_DEFAULT_VALUE = 2; +const float PROPOSAL_SCALE_DIM_0_DEFAULT_VALUE = 8; +const float PROPOSAL_SCALE_DIM_1_DEFAULT_VALUE = 16; +const float PROPOSAL_SCALE_DIM_2_DEFAULT_VALUE = 32; +const float PROPOSAL_MIN_SIZE_DEFAULT_VALUE = 16; +const int PROPOSAL_PRE_NMS_TOPN_DEFAULT_VALUE = 6000; +const int PROPOSAL_POST_NMS_TOPN_DEFAULT_VALUE = 304; +const float PROPOSAL_NMS_THRESH_DEFAULT_VALUE = 0.7; +const float PROPOSAL_FILTER_THRESH_DEFAULT_VALUE = 0; + +// TVM OP +const uint32_t DEFAULT_KERNEL_BLOCK_DIM = 1; + +// Softmax +const int32_t SOFTMAX_WORKSPACE_NUM = 1; + +// SoftmaxCrossEntropy +const int32_t SOFTMAXCROSSENTROPY_INPUT_NUM = 2; +const int32_t SOFTMAXCROSSENTROPY_OUTPUT_NUM = 2; + +// Permute +const int32_t PERMUTE_INPUT_NUM = 1; +const int32_t PERMUTE_OUTPUT_NUM = 1; +const int32_t PERMUTE_WORKSPACE_NUM = 1; +const int32_t PERMUTE_ORDER_NUM = 4; + +// Ssd normalize +const int SSD_NORMALIZE_INPUT_SIZE = 1; +const float SSD_NORMALIZE_EPS_DEFAULT_VALUE = 2e-7; + +// SsdPriroBox +const int32_t SSD_PRIOR_BOX_WORKSPACE_NUM = 1; +const int32_t SSD_PRIOR_BOX_INPUT_NUM = 2; +const bool SSD_PRIOR_BOX_FLIP_VALUE = true; +const bool SSD_PRIOR_BOX_CLIP_VALUE = false; +const double SSD_PRIOR_BOX_ASPECT_OFFSET_VALUE = 0.5; +const double SSD_PRIORBOX_VARIANCE_VALUE = 0.1; +const double SSD_PRIORBOX_VARIANCE_SIZE_ONE = 1; +const double SSD_PRIORBOX_VARIANCE_SIZE_FOUR = 4; +const double SSD_PRIORBOX_ASPECT_RATIO_VALUE = 1.0; +const int SSD_PRIOR_BOX_CODETYPE_CORNER_VALUE = 1; +const int SSD_PRIOR_BOX_CODETYPE_CENTER_SIZE_VALUE = 2; +const int SSD_PRIOR_BOX_CODETYPE_CORNER_SIZE_VALUE = 3; + +// Ssd DetectionOutput +const int32_t SSD_DETECTIONOUTPUT_INPUT_SIZE = 3; +const int32_t SSD_DETECTIONOUTPUT_INPUT_SIZE_AFTER_FUSION = 2; +const int32_t SSD_DETECTIONOUTPUT_OUTPUT_SIZE = 2; +const int32_t SSD_DETECTIONOUTPUT_OUTPUT_SIZE_AFTER_FUSION = 3; +const int32_t SSD_DETECTIONOUTPUT_WORKSPACE_NUM = 1; +const int32_t SSD_DETECTIONOUTPUT_WORKSPACE_NUM_AFTER_FUSION = 0; +const bool SSD_DETECTIONOUTPUT_SHARED_LOCATION_DEFAULT_VALUE = true; +const int32_t SSD_DETECTIONOUTPUT_BACKGROUND_LABEL_ID_DEFAULT_VALUE = 0; +const float SSD_DETECTIONOUTPUT_NMS_THRESHOLD_DEFAULT_VALUE = 0.3; +const int32_t SSD_DETECTIONOUTPUT_TOP_K_DEFAULT_VALUE = 200; +const float SSD_DETECTIONOUTPUT_ETA_DEFAULT_VALUE = 1.0; +const int SSD_DETECTIONOUTPUT_CODE_TYPE_DEFAULT_VALUE = static_cast(cce::CC_BOX_CENTER_SIZE); +const int32_t SSD_DETECTIONOUTPUT_KEEP_TOP_K_DEFAULT_VALUE = 200; +const bool SSD_DETECTIONOUTPUT_VARIANCE_ENCODED_IN_TARGET_DEFAULT_VALUE = false; +const float SSD_DETECTIONOUTPUT_CONFIDENCE_THRESHOLD_DEFAULT_VALUE = 0.1; + +// Refinedet DetectionOutput +const int32_t REFINEDET_DETECTIONOUTPUT_INPUT_SIZE = 5; +const int32_t REFINEDET_DETECTIONOUTPUT_INPUT_SIZE_AFTER_FUSION = 2; +const int32_t REFINEDET_DETECTIONOUTPUT_OUTPUT_SIZE = 2; +const int32_t REFINEDET_DETECTIONOUTPUT_OUTPUT_SIZE_AFTER_FUSION = 3; +const int32_t REFINEDET_DETECTIONOUTPUT_WORKSPACE_NUM = 1; +const bool REFINEDET_DETECTIONOUTPUT_SHARED_LOCATION_DEFAULT_VALUE = true; +const int32_t REFINEDET_DETECTIONOUTPUT_BACKGROUND_LABEL_ID_DEFAULT_VALUE = 0; +const float REFINEDET_DETECTIONOUTPUT_NMS_THRESHOLD_DEFAULT_VALUE = 0.3; +const int32_t REFINEDET_DETECTIONOUTPUT_TOP_K_DEFAULT_VALUE = 200; +const float REFINEDET_DETECTIONOUTPUT_ETA_DEFAULT_VALUE = 1.0; +const bool REFINEDET_DETECTIONOUTPUT_VARIANCE_ENCODED_IN_TARGET_DEFAULT_VALUE = false; +const int REFINEDET_DETECTIONOUTPUT_CODE_TYPE_DEFAULT_VALUE = static_cast(cce::CC_BOX_CENTER_SIZE); +const int32_t REFINEDET_DETECTIONOUTPUT_KEEP_TOP_K_DEFAULT_VALUE = 200; +const float REFINEDET_DETECTIONOUTPUT_CONFIDENCE_THRESHOLD_DEFAULT_VALUE = 0.1; +const float REFINEDET_DETECTIONOUTPUT_OBJECTNESS_SCORE_DEFAULT_VALUE = 0; + +// Channel axpy +const int32_t CHANNEL_AXPY_INPUT_NUM = 3; +const int32_t CHANNEL_AXPY_INPUT_DIM_SIZE = 4; +const int32_t CHANNEL_AXPY_WORKSPACE_NUM = 1; + +// Psroi pooling +const int PSROI_POOLING_INPUT_COUNT = 2; +const int PSROI_POOLING_WORKSPACE_NUM = 1; + +// MaxPoolWithArgmax +const uint32_t MAX_POOL_WITH_ARGMAX_OUTPUT_NUM = 2; +const uint32_t MAX_POOL_GRAD_WITH_ARGMAX_INPUT_NUM = 3; + +// AvgPoolGrad +const uint32_t AVG_POOL_GRAD_INPUT_NUM = 2; + +// ROIAlign +const int32_t ROIALIGN_INPUT_SIZE = 2; +const int32_t ROIALIGN_WORKSPACE_NUM = 1; +const int32_t ROIALIGN_DEFAULT_POOLED_H = 1; +const int32_t ROIALIGN_DEFAULT_POOLED_W = 1; + +// Correlation +const uint32_t CORRELATION_INPUT_NUM = 2; +const int CORRELATION_WORKSPACE_NUM = 1; + +// Detectionpostprocess +const int32_t POSTPROCESS_INPUT_SIZE = 4; +const int32_t POSTPROCESS_OUTPUT_SIZE = 2; +const int32_t POSTPROCESS_WORKSPACE_NUM = 1; +const uint32_t POSTPROCESS_CLS_NUM_DEFAULT_VALUE = 12; +const uint32_t POSTPROCESS_POST_NMS_TOPN_DEFAULT_VALUE = 100; +const float POSTPROCESS_NMS_THRESH_DEFAULT_VALUE = 0.3; +const float POSTPROCESS_CONF_THRESH_DEFAULT_VALUE = 0.5; +const float POSTPROCESS_BBOX_REG_WEIGHT_DIM_DEFAULT_VALUE = 1.0; +const int32_t POSTPROCESS_BBOX_REG_WEIGHT_SIZE_DEFAULT_VALUE = 4; + +// Split +const int32_t SPLIT_INPUT_NUM = 2; +const int32_t SPLIT_DEFAULT_AXIS_VALUE = 1; +const int32_t SPLIT_MIN_OUTPUT_SIZE = 1; + +const uint32_t STRIDEDSLICE_INPUT_NUM = 4; +// Slice +const int32_t SLICE_INPUT_NUM = 3; +const int32_t SLICE_WEIGHT_NUM = 2; + +// GatherNd +const int32_t GATHERND_INPUT_NUM = 2; +// ArgMax +const int32_t ARGMAX_INPUT_NUM = 2; +const int32_t ARGMAX_REAL_INPUT_NUM = 1; + +// HighWay +const int32_t HIGHWAY_INPUT_NUM = 4; +const int32_t HIGHWAY_WORKSPACE_NUM = 1; +// RealDiv +const int32_t REALDIV_INPUT_NUM = 2; + +// Range +const int32_t RANGE_INPUT_NUM = 3; +const int32_t RANGE_OUTPUT_NUM = 1; +const int32_t RANGE_INPUT_DIM_SIZE = 0; + +// Pad +const int32_t PAD_WEIGHT_NUM = 1; +const int32_t PAD_DIM_SIZE = 2; +const int32_t PAD_DIM0 = 4; +const int32_t PAD_DIM1 = 2; +const int32_t PAD_WEIGHT_WITH_CONSTANT_NUM = 2; +const int32_t PAD_CONSTATNT_DEFAULT_VALUE = 0; +const int32_t PAD_PADDINGS_SIZE = 8; + +// Tile +const int32_t TILE_WEIGHT_NUM = 1; +const int32_t TILE_MULTIPLES_DIM_SIZE = 1; + +// DecodeBbox +const int32_t DECODE_BBOX_INPUT_NUM = 2; + +// GenerateRpnProposals +const int32_t GENERATE_RPN_PROPOSAL_INPUT_SIZE = 2; +const int32_t GENERATE_RPN_PROPOSAL_OUTPUT_SIZE = 3; + +// Decode_BBox +const int32_t DECODE_BBOX_INPUT_SIZE = 2; +const int32_t DEFAULT_DECODE_CLIP_VALUE = 0; + +// FastRcnnPredictions +const int32_t FASTRCNN_PREDICTIONS_INPUT_SIZE = 2; +const int32_t FASTRCNN_PREDICTIONS_OUTPUT_SIZE = 4; + +const int32_t CLIP_BOXES_INPUT_NUM = 1; +const int32_t CLIP_BOXES_WEIGHT_SIZE = 1; +const int32_t CLIP_BOXES_WEIGHT_ITEM_SIZE = 2; +const int32_t CLIP_BOXES_OUTPUT_NUM = 1; + +const int32_t FLOORDIV_INPUT_NUM = 2; +// Mean +const int32_t MEAN_WEIGHT_SIZE = 1; +const int32_t MEAN_WEIGHT_DIM_SIZE = 1; +const int32_t MEAN_WEIGHT_DIM = 2; +const int32_t MEAN_FIRST_AXIS = 2; +const int32_t MEAN_SECOND_AXIS = 3; +const int32_t MEAN_STRIDE_PLACE_HOLD = 1; +// Switch +const uint32_t SWITCH_INPUT_NUM = 2; +const uint32_t SWITCH_OUTPUT_NUM = 2; +// Merge +const uint32_t MERGE_INPUT_NUM = 2; +// Greater +const uint32_t GREATER_OUTPUT_NUM = 1; +const uint32_t GREATER_INPUT_NUM = 0; +const uint32_t GREATER_WEIGHT_NUM = 2; + +// Yolo region +const uint32_t YOLO_REGION_OUTPUT_NUM = 3; +const uint32_t YOLO_REGION_WORKSPACE_NUM = 1; +const uint32_t YOLO_REGION_COORDS = 4; +const uint32_t YOLO_REGION_CLASSES = 20; +const uint32_t YOLO_REGION_BOXES = 1; +const bool YOLO_REGION_BACKGROUND = false; +const bool YOLO_REGION_SOFTMAX = false; +const bool YOLO_REGION_SOFTMAX_TREE = false; + +// Yolo detectionoutput +const uint32_t YOLO_DETECTIONOUTPUT_INPUT_SIZE = 4; +const uint32_t YOLO_DETECTIONOUTPUT_OUTPUT_SIZE = 2; +const uint32_t YOLO_DETECTION_OUTPUT_WORKSPACE_NUM = 1; +const uint32_t YOLO_DETECTION_OUTPUT_CLASSES = 20; +const uint32_t YOLO_DETECTION_OUTPUT_BOXES_V2 = 5; +const uint32_t YOLO_DETECTION_OUTPUT_BOXES_V3 = 3; +const bool YOLO_DETECTION_OUTPUT_RELATIVE = true; +const float YOLO_DETECTION_OUTPUT_OBJECTNESS_THRESHOLD = 0.5; +const float YOLO_DETECTION_OUTPUT_CLASS_THRESHOLD = 0.5; +const uint32_t YOLO_DETECTION_OUTPUT_POST_TOP_K = UINT_MAX; +const float YOLO_DETECTION_OUTPUT_NMS_THRESHOLD = 0; +const float YOLO_DETECTION_OUTPUT_IOU_THRESHOLD_DECAY = 1.0; +const float YOLO_DETECTION_OUTPUT_COOR_SCALE_FACTOR = 1.0; + +// Reorg +const int32_t REORG_DEFAULT_STRIDE = 2; +const uint32_t REORG_INPUT_COUNT = 1; +// Reshape +const int32_t RESHAPE_INPUT_NUM = 2; +// Maximum +const int32_t MAXIMUM_INPUT_NUM = 2; + +// Spatialtf +const int32_t SPATIALTF_WORKSPACE_NUM = 1; + +const int32_t REVERSE_DEFAULT_AXIS = 1; +// Crop +const int32_t CROP_AXIS = 2; +const int32_t CROP_INPUT_NUM = 2; + +// ConvGradInput +const uint32_t CONVGRADINPUT_WORKSPACE_NUM = 1; +const uint32_t CONVGRADINPUT_INPUT_NUM = 3; + +// RNN +const uint32_t RNN_WORKSPACE_NUM = 1; + +// Cropandresize +const int32_t CROPANDRESIZE_WEIGHT_NUM = 1; +const int32_t CROPANDRESIZE_CROP_DIM_SIZE = 1; +const int32_t CROP_DIM0 = 2; + +// Attention decoder weight index +const uint32_t ATTENTION_DECODER_WEIGHT_ATTENW0 = 0; +const uint32_t ATTENTION_DECODER_WEIGHT_ATTENTION0_KERNEL = 1; +const uint32_t ATTENTION_DECODER_WEIGHT_ATTNOUTPUTPROJECTION_KERNEL = 2; +const uint32_t ATTENTION_DECODER_WEIGHT_ATTENTION_DECODER_KERNEL = 3; +const uint32_t ATTENTION_DECODER_WEIGHT_CELL0_GATES_KERNEL = 4; +const uint32_t ATTENTION_DECODER_WEIGHT_CELL0_CANDIDATE_KERNEL = 5; +const uint32_t ATTENTION_DECODER_WEIGHT_CELL1_GATES_KERNEL = 6; +const uint32_t ATTENTION_DECODER_WEIGHT_CELL1_CANDIDATE_KERNEL = 7; +const uint32_t ATTENTION_DECODER_WEIGHT_ATTENTION0_BIAS = 8; +const uint32_t ATTENTION_DECODER_WEIGHT_ATTNOUTPUTPROJECTION_BIAS = 9; +const uint32_t ATTENTION_DECODER_WEIGHT_ATTENTION_DECODER_BIAS = 10; +const uint32_t ATTENTION_DECODER_WEIGHT_CELL0_GATES_BIAS = 11; +const uint32_t ATTENTION_DECODER_WEIGHT_CELL0_CANDIDATE_BIAS = 12; +const uint32_t ATTENTION_DECODER_WEIGHT_CELL1_GATES_BIAS = 13; +const uint32_t ATTENTION_DECODER_WEIGHT_CELL1_CANDIDATE_BIAS = 14; +const uint32_t ATTENTION_DECODER_WEIGHT_EMBEDDING = 15; +const uint32_t ATTENTION_DECODER_WEIGHT_ATTENVA = 16; +const uint32_t ATTENTION_DECODER_WEIGHT_DECODER_INITIAL = 17; +// Attention decoder weight size +const uint32_t ATTENTION_DECODER_WEIGHT_SIZE = 18; + +const uint32_t ATTENTION_DECODER_INPUT_SIZE = 2; +const uint32_t ATTENTION_DECODER_WORKSPACE_NUM = 1; +const uint32_t ATTENTION_DECODER_INPUT_DECODER_INPUTS = 0; +const uint32_t ATTENTION_DECODER_INPUT_DECODER_INITIAL_HIDDEN = 1; + +const int ATTENTION_DECODER_ALGO_NORMAL = 0; +const int ATTENTION_DECODER_SYMBOLS = 10000; +const int ATTENTION_DECODER_EMBEDDING_SIZE = 128; +const int ATTENTION_DECODER_ATTENTION_NUM_HIDDEN = 256; +const int ATTENTION_DECODER_DECODER_NUM_HIDDEN = 128; +const int ATTENTION_DECODER_DECODER_NUM_LAYERS = 2; +const int ATTENTION_DECODER_RNN_UNBIDIRECTIONAL = 0; +const int ATTENTION_DECODER_SEQLEN_VALUE = 57; +const int ATTENTION_DECODER_GRU = 3; + +// Logicaland +const int32_t LOGICAL_AND_INPUT_NUM = 2; +const int32_t EQUAL_INPUT_NUM = 2; + +static const int32_t OP_WEIGHT_MEM_BASE_OFFSET = 512; + +// MultiShape +const uint32_t MULTI_SHAPE_INPUT_NUM = 2; + +// Shufflechannel +const uint32_t SHUFFLECHANNEL_DEFAULT_GROUP = 1; +} // namespace ge +#endif // INC_FRAMEWORK_COMMON_OP_OP_PARSER_UTIL_H_ diff --git a/inc/framework/common/op_types.h b/inc/framework/common/op_types.h new file mode 100644 index 000000000..4555d5c3e --- /dev/null +++ b/inc/framework/common/op_types.h @@ -0,0 +1,62 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_OP_TYPES_H_ +#define INC_FRAMEWORK_COMMON_OP_TYPES_H_ + +#include +#include + +namespace ge { +class OpTypeContainer { + public: + static OpTypeContainer *Instance() { + static OpTypeContainer instance; + return &instance; + } + ~OpTypeContainer() = default; + + void Register(const std::string &op_type) { op_type_list_.insert(op_type); } + + bool IsExisting(const std::string &op_type) { + auto iter_find = op_type_list_.find(op_type); + return iter_find != op_type_list_.end(); + } + + protected: + OpTypeContainer() {} + + private: + std::set op_type_list_; +}; + +class OpTypeRegistrar { + public: + explicit OpTypeRegistrar(const std::string &op_type) { OpTypeContainer::Instance()->Register(op_type); } + ~OpTypeRegistrar() {} +}; + +#define REGISTER_OPTYPE_DECLARE(var_name, str_name) \ + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *var_name; + +#define REGISTER_OPTYPE_DEFINE(var_name, str_name) \ + const char *var_name = str_name; \ + const OpTypeRegistrar g_##var_name##_reg(str_name); + +#define IS_OPTYPE_EXISTING(str_name) (OpTypeContainer::Instance()->IsExisting(str_name)) +} // namespace ge + +#endif // INC_FRAMEWORK_COMMON_OP_TYPES_H_ diff --git a/inc/framework/common/scope_guard.h b/inc/framework/common/scope_guard.h new file mode 100644 index 000000000..001a0e757 --- /dev/null +++ b/inc/framework/common/scope_guard.h @@ -0,0 +1,59 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_SCOPE_GUARD_H_ +#define INC_FRAMEWORK_COMMON_SCOPE_GUARD_H_ + +#include +#include + +/// Usage: +/// Acquire Resource 1 +/// MAKE_GUARD([&] { Release Resource 1 }) +/// Acquire Resource 2 +// MAKE_GUARD([&] { Release Resource 2 }) +#define GE_MAKE_GUARD(var, callback) ScopeGuard make_guard_##var(callback) +#define GE_DISMISS_GUARD(var) make_guard_##var.Dismiss() + +namespace ge { +class ScopeGuard { + public: + // Noncopyable + ScopeGuard(ScopeGuard const &) = delete; + ScopeGuard &operator=(ScopeGuard const &) = delete; + + explicit ScopeGuard(const std::function &on_exit_scope) : on_exit_scope_(on_exit_scope), dismissed_(false) {} + + ~ScopeGuard() { + if (!dismissed_) { + if (on_exit_scope_ != nullptr) { + try { + on_exit_scope_(); + } catch (std::bad_function_call &e) { } + catch (...) { } + } + } + } + + void Dismiss() { dismissed_ = true; } + + private: + std::function on_exit_scope_; + bool dismissed_; +}; +} // namespace ge + +#endif // INC_FRAMEWORK_COMMON_SCOPE_GUARD_H_ diff --git a/inc/framework/common/string_util.h b/inc/framework/common/string_util.h new file mode 100644 index 000000000..47e80e759 --- /dev/null +++ b/inc/framework/common/string_util.h @@ -0,0 +1,156 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_STRING_UTIL_H_ +#define INC_FRAMEWORK_COMMON_STRING_UTIL_H_ + +#include + +#include +#include +#include +#include +#include + +namespace ge { +class StringUtils { + public: + static std::string &Ltrim(std::string &s) { +#if __cplusplus >= 201103L + (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c) { return !std::isspace(c); })); +#else + (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun(std::isspace)))); +#endif + return s; + } + // lint -esym(551,*) + static std::string &Rtrim(std::string &s) { /*lint !e618*/ +#if __cplusplus >= 201103L + (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c) { return !std::isspace(c); })); +#else + (void)s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun(std::isspace))).base(), s.end()); +#endif + return s; + } + // lint -esym(551,*) + /// + /// @ingroup domi_common + /// @brief delete spaces at the beginning and end of a string + /// @param [in] string to be trimmed + /// @return string after trim + /// + static std::string &Trim(std::string &s) { return Ltrim(Rtrim(s)); } + + /// + /// @ingroup domi_common + /// @brief string splitting + /// @param [in] str string to be trimmed + /// @param [in] delim separator + /// @return string array after segmentation + /// + static std::vector Split(const std::string &str, char delim) { + std::vector elems; + + if (str.empty()) { + elems.emplace_back(""); + return elems; + } + + std::stringstream ss(str); + std::string item; + + while (getline(ss, item, delim)) { + elems.push_back(item); + } + + auto str_size = str.size(); + if (str_size > 0 && str[str_size - 1] == delim) { + elems.emplace_back(""); + } + + return elems; + } + /// + /// @ingroup domi_common + /// @brief obtain the file name + /// @param [in] s path name + /// @return file name + /// + static std::string GetFileName(std::string &s) { + if (s.empty()) { + return ""; + } + std::vector files = StringUtils::Split(s, '/'); + + return files.empty() ? "" : files[files.size() - 1]; + } + /// + /// @ingroup domi_common + /// @brief full replacement + /// @link + /// @param [in] str str string to be replaced + /// @param [in] old_value old Characters Before Replacement + /// @param [in] new_value new Characters Before Replacement + /// @return string after replacement + /// + static std::string ReplaceAll(std::string str, const std::string &old_value, const std::string &new_value) { + std::string::size_type cur_pos = 0; + std::string::size_type old_length = old_value.length(); + std::string::size_type new_length = new_value.length(); + // cycle replace + for (; cur_pos != std::string::npos; cur_pos += new_length) { + if ((cur_pos = str.find(old_value, cur_pos)) != std::string::npos) { + (void)str.replace(cur_pos, old_length, new_value); + } else { + break; + } + } + return str; + } + + /// + /// @ingroup domi_common + /// @brief checks whether a character string starts with a character string (prefix) + /// @link + /// @param [in] str string to be compared + /// @param [in] str_x prefix + /// @return if the value is a prefix, true is returned. Otherwise, false is returned + /// + static bool StartWith(const std::string &str, const std::string str_x) { + return ((str.size() >= str_x.size()) && (str.compare(0, str_x.size(), str_x) == 0)); + } + + /// + /// @ingroup domi_common + /// @brief format string + /// @link + /// @param [in] format specifies the character string format + /// @param [in] ... format Filling Content + /// @return formatted string + /// + static std::string FormatString(const char *format, ...) { + const uint32_t MAX_BUFFER_LEN = 1024; // the stack memory plint check result must be less than 1024 + va_list args; + va_start(args, format); + char buffer[MAX_BUFFER_LEN] = {0}; + int32_t ret = vsnprintf_s(buffer, MAX_BUFFER_LEN, MAX_BUFFER_LEN - 1, format, args); + va_end(args); + return ret > 0 ? buffer : ""; + } +}; +} // namespace ge + +#endif // INC_FRAMEWORK_COMMON_STRING_UTIL_H_ diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h new file mode 100644 index 000000000..55e0870ca --- /dev/null +++ b/inc/framework/common/types.h @@ -0,0 +1,1101 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_TYPES_H_ +#define INC_FRAMEWORK_COMMON_TYPES_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "framework/common/fmk_error_codes.h" +#include "framework/common/fmk_types.h" +#include "framework/common/op_types.h" +#include "register/register_types.h" + +#if !defined(__ANDROID__) && !defined(ANDROID) +#define DOMI_DYNAMIC_CAST static_cast +#define DOMI_DYNAMIC_POINTER_CAST std::static_pointer_cast +#else +#define DOMI_DYNAMIC_CAST static_cast +#define DOMI_DYNAMIC_POINTER_CAST std::static_pointer_cast +#endif + +namespace ge { +// dump +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_MODEL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_ALL_MODEL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_STATUS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_LAYER; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_FILE_PATH; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_MODE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEBUG_AICORE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEBUG_ATOMIC; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEBUG_ALL; + +// Supported public properties name +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROP_OME_START_TIME; // Start time +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROP_OME_DUMP_PATH; // Dump path +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROP_OME_LOG_PATH; // Log path + +// Profile-related constants +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t CCE_PROFILE_ON; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t CCE_PROFILE_OFF; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OME_PROFILE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string CCE_PROFILE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string RTS_PROFILE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILER_JOBCTX; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILER_TARGET_PATH; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string RTS_PROFILE_PATH; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_STOP_KEY; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_STOP_VALUE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::map PROFILE_COMPONENT_MAP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_CONFIG; + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASKS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_WEIGHT_ADDR; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_FUSION_MODEL_DEF; + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int MODEL_MAX_SIZE; // Max size of 2 GB minus 1 byte. +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint64_t FILE_HEADER_MAX_SIZE; // Max size of 3 GB. + +#if !defined(__ANDROID__) && !defined(ANDROID) +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint64_t ALLOC_MEMORY_MAX_SIZE; // Max size of 8 GB. +#else +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint64_t ALLOC_MEMORY_MAX_SIZE; // Max size of 512M. +#endif + +template +static std::pair flip_pair(const std::pair &p) { + return std::pair(p.second, p.first); +} + +template +static std::map flip_map(std::map src) { + std::map dst; + std::transform(src.begin(), src.end(), std::inserter(dst, dst.begin()), flip_pair); + return dst; +} + +REGISTER_OPTYPE_DECLARE(DATA, "Data"); +REGISTER_OPTYPE_DECLARE(AIPPDATA, "AippData"); +REGISTER_OPTYPE_DECLARE(CONVOLUTION, "Convolution"); +REGISTER_OPTYPE_DECLARE(CORRELATION, "Correlation"); +REGISTER_OPTYPE_DECLARE(CORRELATIONV2, "Correlation_V2"); +REGISTER_OPTYPE_DECLARE(DECONVOLUTION, "Deconvolution"); +REGISTER_OPTYPE_DECLARE(POOLING, "Pooling"); +REGISTER_OPTYPE_DECLARE(ELTWISE, "Eltwise"); +REGISTER_OPTYPE_DECLARE(RELU, "ReLU"); +REGISTER_OPTYPE_DECLARE(RELU6, "ReLU6"); +REGISTER_OPTYPE_DECLARE(SIGMOID, "Sigmoid"); +REGISTER_OPTYPE_DECLARE(ABSVAL, "AbsVal"); +REGISTER_OPTYPE_DECLARE(TANH, "TanH"); +REGISTER_OPTYPE_DECLARE(PRELU, "PReLU"); +REGISTER_OPTYPE_DECLARE(BATCHNORM, "BatchNorm"); +REGISTER_OPTYPE_DECLARE(FUSIONBATCHNORM, "FusionBatchNorm"); +REGISTER_OPTYPE_DECLARE(SCALE, "Scale"); +REGISTER_OPTYPE_DECLARE(FULL_CONNECTION, "FullConnection"); +REGISTER_OPTYPE_DECLARE(SOFTMAX, "Softmax"); +REGISTER_OPTYPE_DECLARE(PLUS, "Plus"); +REGISTER_OPTYPE_DECLARE(ACTIVATION, "Activation"); +REGISTER_OPTYPE_DECLARE(FLATTEN, "Flatten"); +REGISTER_OPTYPE_DECLARE(ADD, "Add"); +REGISTER_OPTYPE_DECLARE(SUB, "Sub"); +REGISTER_OPTYPE_DECLARE(MUL, "Mul"); +REGISTER_OPTYPE_DECLARE(MATMUL, "MatMul"); +REGISTER_OPTYPE_DECLARE(RSQRT, "Rsqrt"); +REGISTER_OPTYPE_DECLARE(BIASADD, "BiasAdd"); +REGISTER_OPTYPE_DECLARE(RESHAPE, "Reshape"); +REGISTER_OPTYPE_DECLARE(REFORMAT, "ReFormat"); +REGISTER_OPTYPE_DECLARE(DEPCONVOLUTION, "ConvolutionDepthwise"); +REGISTER_OPTYPE_DECLARE(DROPOUT, "Dropout"); +REGISTER_OPTYPE_DECLARE(DROPOUTDOMASK, "DropOutDoMask"); +REGISTER_OPTYPE_DECLARE(DROPOUTGENMASK, "DropOutGenMask"); +REGISTER_OPTYPE_DECLARE(CONCAT, "Concat"); +REGISTER_OPTYPE_DECLARE(ROIPOOLING, "ROIPooling"); +REGISTER_OPTYPE_DECLARE(PROPOSAL, "Proposal"); +REGISTER_OPTYPE_DECLARE(FSRDETECTIONOUTPUT, "FSRDetectionOutput"); +REGISTER_OPTYPE_DECLARE(DETECTIONPOSTPROCESS, "Detectpostprocess"); +REGISTER_OPTYPE_DECLARE(LRN, "LRN"); +REGISTER_OPTYPE_DECLARE(TRANSDATA, "TransData"); +REGISTER_OPTYPE_DECLARE(PERMUTE, "Permute"); +REGISTER_OPTYPE_DECLARE(SSDNORMALIZE, "SSDNormalize"); +REGISTER_OPTYPE_DECLARE(SSDPRIORBOX, "SSDPriorBox"); +REGISTER_OPTYPE_DECLARE(NETOUTPUT, "NetOutput"); +REGISTER_OPTYPE_DECLARE(SSDDETECTIONOUTPUT, "SSDDetectionOutput"); +REGISTER_OPTYPE_DECLARE(REFINEDETDETECTIONOUTPUT, "RefinedetDetectionOutput"); +REGISTER_OPTYPE_DECLARE(CHANNELAXPY, "ChannelAxpy"); +REGISTER_OPTYPE_DECLARE(PSROIPOOLING, "PSROIPooling"); +REGISTER_OPTYPE_DECLARE(POWER, "Power"); +REGISTER_OPTYPE_DECLARE(POW, "Pow"); +REGISTER_OPTYPE_DECLARE(ROIALIGN, "ROIAlign"); +REGISTER_OPTYPE_DECLARE(PYTHON, "Python"); +REGISTER_OPTYPE_DECLARE(FREESPACEEXTRACT, "FreespaceExtract"); +REGISTER_OPTYPE_DECLARE(SPATIALTF, "SpatialTransform"); +REGISTER_OPTYPE_DECLARE(SHAPE, "Shape"); +REGISTER_OPTYPE_DECLARE(SHAPEN, "ShapeN"); +REGISTER_OPTYPE_DECLARE(ARGMAX, "ArgMax"); +REGISTER_OPTYPE_DECLARE(GATHERND, "GatherNd"); +REGISTER_OPTYPE_DECLARE(GATHER, "Gather"); +REGISTER_OPTYPE_DECLARE(REALDIV, "RealDiv"); +REGISTER_OPTYPE_DECLARE(PACK, "Pack"); +REGISTER_OPTYPE_DECLARE(SLICE, "Slice"); +REGISTER_OPTYPE_DECLARE(SLICED, "SliceD"); +REGISTER_OPTYPE_DECLARE(FLOORDIV, "FloorDiv"); +REGISTER_OPTYPE_DECLARE(SQUEEZE, "Squeeze"); +REGISTER_OPTYPE_DECLARE(UNSQUEEZE, "Unsqueeze"); +REGISTER_OPTYPE_DECLARE(STRIDEDSLICE, "StridedSlice"); +REGISTER_OPTYPE_DECLARE(RANGE, "Range"); +REGISTER_OPTYPE_DECLARE(RPNPROPOSALS, "GenerateRpnProposals"); +REGISTER_OPTYPE_DECLARE(DECODEBBOX, "DecodeBBox"); +REGISTER_OPTYPE_DECLARE(PAD, "Pad"); +REGISTER_OPTYPE_DECLARE(PADV2, "PadV2"); +REGISTER_OPTYPE_DECLARE(MIRRORPAD, "MirrorPad"); +REGISTER_OPTYPE_DECLARE(TILE, "Tile"); +REGISTER_OPTYPE_DECLARE(SIZE, "Size"); +REGISTER_OPTYPE_DECLARE(CLIPBOXES, "Clipboxes"); +REGISTER_OPTYPE_DECLARE(FASTRCNNPREDICTIONS, "FastrcnnPredictions"); +REGISTER_OPTYPE_DECLARE(SPLIT, "Split"); +REGISTER_OPTYPE_DECLARE(SPLITV, "SplitV"); +REGISTER_OPTYPE_DECLARE(EXPANDDIMS, "ExpandDims"); +REGISTER_OPTYPE_DECLARE(EMPTY, "Empty"); +REGISTER_OPTYPE_DECLARE(MEAN, "Mean"); +REGISTER_OPTYPE_DECLARE(GREATER, "Greater"); +REGISTER_OPTYPE_DECLARE(SWITCH, "Switch"); +REGISTER_OPTYPE_DECLARE(SWITCHN, "SwitchN"); +REGISTER_OPTYPE_DECLARE(REFSWITCH, "RefSwitch"); +REGISTER_OPTYPE_DECLARE(MERGE, "Merge"); +REGISTER_OPTYPE_DECLARE(REFMERGE, "RefMerge"); +REGISTER_OPTYPE_DECLARE(ENTER, "Enter"); +REGISTER_OPTYPE_DECLARE(REFENTER, "RefEnter"); +REGISTER_OPTYPE_DECLARE(LOOPCOND, "LoopCond"); +REGISTER_OPTYPE_DECLARE(NEXTITERATION, "NextIteration"); +REGISTER_OPTYPE_DECLARE(REFNEXTITERATION, "RefNextIteration"); +REGISTER_OPTYPE_DECLARE(EXIT, "Exit"); +REGISTER_OPTYPE_DECLARE(REFEXIT, "RefExit"); +REGISTER_OPTYPE_DECLARE(CONTROLTRIGGER, "ControlTrigger"); +REGISTER_OPTYPE_DECLARE(SYMBOLICGRADIENT, "SymbolicGradient"); +REGISTER_OPTYPE_DECLARE(REMOTECALL, "RemoteCall"); +REGISTER_OPTYPE_DECLARE(_IF, "_If"); +REGISTER_OPTYPE_DECLARE(STATELESSIF, "StatelessIf"); +REGISTER_OPTYPE_DECLARE(IF, "If"); +REGISTER_OPTYPE_DECLARE(CASE, "Case"); +REGISTER_OPTYPE_DECLARE(_WHILE, "_While"); +REGISTER_OPTYPE_DECLARE(WHILE, "While"); +REGISTER_OPTYPE_DECLARE(STATELESSWHILE, "StatelessWhile"); +REGISTER_OPTYPE_DECLARE(FOR, "For"); +REGISTER_OPTYPE_DECLARE(PARTITIONEDCALL, "PartitionedCall"); +REGISTER_OPTYPE_DECLARE(STATEFULPARTITIONEDCALL, "StatefulPartitionedCall"); +REGISTER_OPTYPE_DECLARE(FAKEPARAM, "FakeParam"); +REGISTER_OPTYPE_DECLARE(TRANSPOSE, "Transpose"); +REGISTER_OPTYPE_DECLARE(TRANSPOSED, "TransposeD"); +REGISTER_OPTYPE_DECLARE(CAST, "Cast"); +REGISTER_OPTYPE_DECLARE(REGION, "Region"); +REGISTER_OPTYPE_DECLARE(YOLO, "Yolo"); +REGISTER_OPTYPE_DECLARE(YOLODETECTIONOUTPUT, "YoloDetectionOutput"); +REGISTER_OPTYPE_DECLARE(FILL, "Fill"); +REGISTER_OPTYPE_DECLARE(RANK, "Rank"); +REGISTER_OPTYPE_DECLARE(REVERSE, "Reverse"); +REGISTER_OPTYPE_DECLARE(UNPACK, "Unpack"); +REGISTER_OPTYPE_DECLARE(YOLO2REORG, "Yolo2Reorg"); +REGISTER_OPTYPE_DECLARE(REDUCESUM, "ReduceSum"); +REGISTER_OPTYPE_DECLARE(SUM, "Sum"); +REGISTER_OPTYPE_DECLARE(CONSTANT, "Const"); +REGISTER_OPTYPE_DECLARE(RESIZEBILINEAR, "ResizeBilinear"); +REGISTER_OPTYPE_DECLARE(RESIZEBILINEARGRAD, "ResizeBilinearGrad"); +REGISTER_OPTYPE_DECLARE(MAXIMUM, "Maximum"); +REGISTER_OPTYPE_DECLARE(FRAMEWORKOP, "FrameworkOp"); +REGISTER_OPTYPE_DECLARE(ARG, "_Arg"); +REGISTER_OPTYPE_DECLARE(FUSEDBATCHNORMGRAD, "FusedBatchNormGrad"); +REGISTER_OPTYPE_DECLARE(LSTM, "LSTM"); +REGISTER_OPTYPE_DECLARE(HIGHWAY, "HighWay"); +REGISTER_OPTYPE_DECLARE(RNN, "RNN"); +REGISTER_OPTYPE_DECLARE(ATTENTIONDECODER, "AttentionDecoder"); +REGISTER_OPTYPE_DECLARE(LOGICAL_NOT, "LogicalNot"); +REGISTER_OPTYPE_DECLARE(LOGICAL_AND, "LogicalAnd"); +REGISTER_OPTYPE_DECLARE(LOGICAL_OR, "LogicalOr"); +REGISTER_OPTYPE_DECLARE(EQUAL, "Equal"); +REGISTER_OPTYPE_DECLARE(NOTEQUAL, "NotEqual"); +REGISTER_OPTYPE_DECLARE(INTERP, "Interp"); +REGISTER_OPTYPE_DECLARE(SHUFFLECHANNEL, "ShuffleChannel"); +REGISTER_OPTYPE_DECLARE(AIPP, "Aipp"); +REGISTER_OPTYPE_DECLARE(MULTISHAPE, "MultiShape"); +REGISTER_OPTYPE_DECLARE(RECIPROCAL, "Reciprocal"); +REGISTER_OPTYPE_DECLARE(SELU, "Selu"); +REGISTER_OPTYPE_DECLARE(ELU, "Elu"); +REGISTER_OPTYPE_DECLARE(ACOSH, "Acosh"); +REGISTER_OPTYPE_DECLARE(ASINH, "Asinh"); +REGISTER_OPTYPE_DECLARE(MINIMUM, "Minimum"); +REGISTER_OPTYPE_DECLARE(CLIP, "Clip"); +REGISTER_OPTYPE_DECLARE(L2NORMALIZE, "L2Normalize"); +REGISTER_OPTYPE_DECLARE(CROPANDRESIZE, "CropAndResize"); +REGISTER_OPTYPE_DECLARE(UNUSEDCONST, "UnusedConst"); +REGISTER_OPTYPE_DECLARE(SPARSETODENSE, "SparseToDense"); +REGISTER_OPTYPE_DECLARE(NONMAXSUPPRESSION, "NonMaxSuppression"); +REGISTER_OPTYPE_DECLARE(TOPKV2, "TopKV2"); +REGISTER_OPTYPE_DECLARE(INVERTPERMUTATION, "InvertPermutation"); +REGISTER_OPTYPE_DECLARE(MULTINOMIAL, "Multinomial"); +REGISTER_OPTYPE_DECLARE(REVERSESEQUENCE, "ReverseSequence"); +REGISTER_OPTYPE_DECLARE(REDUCEPROD, "ReduceProd"); +REGISTER_OPTYPE_DECLARE(REDUCEMAX, "ReduceMax"); +REGISTER_OPTYPE_DECLARE(REDUCEMIN, "ReduceMin"); +REGISTER_OPTYPE_DECLARE(EXTRACTIMAGEPATCHES, "ExtractImagePatches"); +REGISTER_OPTYPE_DECLARE(SQRT, "Sqrt"); +REGISTER_OPTYPE_DECLARE(REDUCEALL, "ReduceAll"); +REGISTER_OPTYPE_DECLARE(RESIZENEARESTNEIGHBOR, "ResizeNearestNeighbor"); +REGISTER_OPTYPE_DECLARE(SPACETOBATCHND, "SpaceToBatchND"); +REGISTER_OPTYPE_DECLARE(BATCHTOSPACEND, "BatchToSpaceND"); +REGISTER_OPTYPE_DECLARE(ASSERT, "Assert"); +REGISTER_OPTYPE_DECLARE(GREATEREQUAL, "GreaterEqual"); +REGISTER_OPTYPE_DECLARE(FLOOR, "Floor"); +REGISTER_OPTYPE_DECLARE(RANDOMUNIFORM, "RandomUniform"); +REGISTER_OPTYPE_DECLARE(BATCHMATMUL, "BatchMatMul"); +REGISTER_OPTYPE_DECLARE(LESSEQUAL, "LessEqual"); +REGISTER_OPTYPE_DECLARE(ONEHOT, "OneHot"); +REGISTER_OPTYPE_DECLARE(LAYERNORM, "LayerNorm"); +REGISTER_OPTYPE_DECLARE(SPACETODEPTH, "SpaceToDepth"); +REGISTER_OPTYPE_DECLARE(DEPTHTOSPACE, "DepthToSpace"); +REGISTER_OPTYPE_DECLARE(RINT, "Rint"); +REGISTER_OPTYPE_DECLARE(ATAN, "Atan"); +REGISTER_OPTYPE_DECLARE(ATAN2, "Atan2"); +REGISTER_OPTYPE_DECLARE(ATANH, "Atanh"); +REGISTER_OPTYPE_DECLARE(ACOS, "Acos"); +REGISTER_OPTYPE_DECLARE(ASIN, "Asin"); +REGISTER_OPTYPE_DECLARE(NEG, "Neg"); +REGISTER_OPTYPE_DECLARE(LOG, "Log"); +REGISTER_OPTYPE_DECLARE(TAN, "Tan"); +REGISTER_OPTYPE_DECLARE(ROUND, "Round"); +REGISTER_OPTYPE_DECLARE(UPSAMPLE, "Upsample"); +REGISTER_OPTYPE_DECLARE(FLOORMOD, "FloorMod"); +REGISTER_OPTYPE_DECLARE(LESS, "Less"); +REGISTER_OPTYPE_DECLARE(ZEROSLIKE, "ZerosLike"); +REGISTER_OPTYPE_DECLARE(EXP, "Exp"); +REGISTER_OPTYPE_DECLARE(WHERE, "Where"); +REGISTER_OPTYPE_DECLARE(FAKEQUANTWITHMINMAXVARS, "FakeQuantWithMinMaxVars"); +REGISTER_OPTYPE_DECLARE(SOFTPLUS, "Softplus"); +REGISTER_OPTYPE_DECLARE(SOFTSIGN, "Softsign"); +REGISTER_OPTYPE_DECLARE(COSH, "Cosh"); +REGISTER_OPTYPE_DECLARE(SINH, "Sinh"); +REGISTER_OPTYPE_DECLARE(RETINAMULTIANCHORS, "RetinaMultiAnchor"); +REGISTER_OPTYPE_DECLARE(SQUAREDDIFFERENCE, "SquaredDifference"); +REGISTER_OPTYPE_DECLARE(REQUIREDSPACETOBATCHPADDINGS, "RequiredSpaceToBatchPaddings"); // for retinanet scope fusion +REGISTER_OPTYPE_DECLARE(SSDPOSTPROCESSOR, "SSDPostProcessor"); +REGISTER_OPTYPE_DECLARE(SSDANCHORGENERATOR, "SSDAnchorGenerator"); +REGISTER_OPTYPE_DECLARE(RETINANETBOXES, "RetinanetBoxes"); +REGISTER_OPTYPE_DECLARE(RETINANETCLIPPEDBOXES, "RetinanetClippedBoxes"); +REGISTER_OPTYPE_DECLARE(RETINANETFILTEREDDETECTIONS, "RetinanetFilteredDetections"); +REGISTER_OPTYPE_DECLARE(RETINANETPOSTPROCESSOR, "RetinanetPostProcessor"); +REGISTER_OPTYPE_DECLARE(RETINANETANCHORS, "RetinanetAnchors"); +REGISTER_OPTYPE_DECLARE(FASTERRCNNMAP, "FasterRCNNMap"); +REGISTER_OPTYPE_DECLARE(FASTERRCNNMAP1, "FasterRCNNMap1"); +REGISTER_OPTYPE_DECLARE(FASTERRCNNSECONDSTAGEPOSTPROCESSOR, "FasterRCNNSecondStagePostprocessor"); +REGISTER_OPTYPE_DECLARE(FASTERRCNNROIINTERPOOLING, "FasterRCNNROIInterPooling"); +REGISTER_OPTYPE_DECLARE(FASTERRCNNFIRSTSTAGEPOSTPROCESSOR, "FasterRCNNFirstStagePostprocessor"); +REGISTER_OPTYPE_DECLARE(FASTERRCNNGRIDANCHORGENERATOR, "FasterRCNNGridAnchorGenerator"); +REGISTER_OPTYPE_DECLARE(ROIINTERPOOLING, "ROIInterPooling"); +REGISTER_OPTYPE_DECLARE(FASTERRCNNCLIPTOWINDOW, "FasterRCNNClipToWindow"); +REGISTER_OPTYPE_DECLARE(EMBEDLOOKUP, "EmbedLookup"); +REGISTER_OPTYPE_DECLARE(HASHLOOKUP, "HashLookup"); +REGISTER_OPTYPE_DECLARE(LSH_PROJ, "LshProject"); +REGISTER_OPTYPE_DECLARE(SVDF, "SVDF"); +REGISTER_OPTYPE_DECLARE(IDENTITY, "Identity"); +REGISTER_OPTYPE_DECLARE(PLACEHOLDERWITHDEFAULT, "PlaceholderWithDefault"); +REGISTER_OPTYPE_DECLARE(IDENTITYN, "IdentityN"); +REGISTER_OPTYPE_DECLARE(GETSPAN, "GetSpan"); +REGISTER_OPTYPE_DECLARE(STOPGRADIENT, "StopGradient"); +REGISTER_OPTYPE_DECLARE(PREVENTGRADIENT, "PreventGradient"); +REGISTER_OPTYPE_DECLARE(GUARANTEECONST, "GuaranteeConst"); +REGISTER_OPTYPE_DECLARE(BROADCASTGRADIENTARGS, "BroadcastGradientArgs"); +REGISTER_OPTYPE_DECLARE(BROADCASTARGS, "BroadcastArgs"); +REGISTER_OPTYPE_DECLARE(CONCATV2, "ConcatV2"); +REGISTER_OPTYPE_DECLARE(CONCATOFFSET, "ConcatOffset"); +REGISTER_OPTYPE_DECLARE(LESSEQUAL, "LessEqual"); +REGISTER_OPTYPE_DECLARE(SELECT, "Select"); +REGISTER_OPTYPE_DECLARE(CONFUSIONMATRIX, "ConfusionMatrix"); +REGISTER_OPTYPE_DECLARE(PLACEHOLDER, "PlaceHolder"); +REGISTER_OPTYPE_DECLARE(END, "End"); +REGISTER_OPTYPE_DECLARE(BASICLSTMCELL, "BasicLSTMCell"); +REGISTER_OPTYPE_DECLARE(GETNEXT, "GetNext"); +REGISTER_OPTYPE_DECLARE(INITDATA, "InitData"); +REGISTER_OPTYPE_DECLARE(TRANSSHAPE, "TransShape") +REGISTER_OPTYPE_DECLARE(REFIDENTITY, "RefIdentity"); +REGISTER_OPTYPE_DECLARE(BITCAST, "Bitcast"); + +// ANN dedicated operator +REGISTER_OPTYPE_DECLARE(ANN_MEAN, "AnnMean"); +REGISTER_OPTYPE_DECLARE(ANN_CONVOLUTION, "AnnConvolution"); +REGISTER_OPTYPE_DECLARE(ANN_DEPCONVOLUTION, "AnnDepthConv"); +REGISTER_OPTYPE_DECLARE(ANN_FULLCONNECTION, "AnnFullConnection"); +REGISTER_OPTYPE_DECLARE(ANN_NETOUTPUT, "AnnNetOutput"); +REGISTER_OPTYPE_DECLARE(ANN_DATA, "AnnData"); +REGISTER_OPTYPE_DECLARE(ANN_RESHAPE, "AnnReshape"); +REGISTER_OPTYPE_DECLARE(ANN_ADD, "AnnAdd"); +REGISTER_OPTYPE_DECLARE(ANN_MUL, "AnnMul"); +REGISTER_OPTYPE_DECLARE(ANN_SUB, "AnnSub"); +REGISTER_OPTYPE_DECLARE(ANN_DIV, "AnnDiv"); +REGISTER_OPTYPE_DECLARE(ANN_DEQUANTIZE, "AnnDequant"); +REGISTER_OPTYPE_DECLARE(ANN_QUANTIZE, "AnnQuant"); +REGISTER_OPTYPE_DECLARE(ANN_PAD, "AnnPad"); +REGISTER_OPTYPE_DECLARE(ANN_RESIZE_BILINEAR, "AnnResizeBilinear"); + +// Training operator +REGISTER_OPTYPE_DECLARE(GATHERV2, "GatherV2"); +REGISTER_OPTYPE_DECLARE(CONVGRADFILTER, "Conv2DBackpropFilter"); +REGISTER_OPTYPE_DECLARE(CONV2D, "Conv2D"); +REGISTER_OPTYPE_DECLARE(CONV2DBACKPROPINPUT, "Conv2DBackpropInput"); +REGISTER_OPTYPE_DECLARE(FUSEDBATCHNORM, "FusedBatchNorm"); +REGISTER_OPTYPE_DECLARE(BIASADDGRAD, "BiasAddGrad"); +REGISTER_OPTYPE_DECLARE(ACTIVATIONGRAD, "ReluGrad"); +REGISTER_OPTYPE_DECLARE(MAXPOOLWITHARGMAX, "MaxPoolWithArgmax"); +REGISTER_OPTYPE_DECLARE(MAXPOOLGRADWITHARGMAX, "MaxPoolGradWithArgmax"); +REGISTER_OPTYPE_DECLARE(SPARSESOFTMAXCROSSENTROPYWITHLOGITS, "SparseSoftmaxCrossEntropyWithLogits"); +REGISTER_OPTYPE_DECLARE(SNAPSHOT, "Snapshot"); +REGISTER_OPTYPE_DECLARE(LAYERNORM, "LayerNorm"); +REGISTER_OPTYPE_DECLARE(HUBERLOSSGRAD, "HuberLossGrad"); +REGISTER_OPTYPE_DECLARE(HUBERLOSS, "HuberLoss"); +REGISTER_OPTYPE_DECLARE(NEGATIVE, "Negative"); +REGISTER_OPTYPE_DECLARE(SSDCAST, "SSDCast"); +REGISTER_OPTYPE_DECLARE(SSDSQUEEZEFUSION, "SsdSqueezeFusion"); +REGISTER_OPTYPE_DECLARE(SPARSESOFTMAXCROSSENTROPY, "SsdSparseSoftmaxCrossEntropy"); +REGISTER_OPTYPE_DECLARE(SPARSESOFTMAXCROSSENTROPYGRAD, "SsdSparseSoftmaxCrossEntropyGrad"); +REGISTER_OPTYPE_DECLARE(CONCATFIVE2FOUR, "ConcatFive2Four"); +REGISTER_OPTYPE_DECLARE(CONCATFOUR2FIVE, "ConcatFour2Five"); +REGISTER_OPTYPE_DECLARE(SSDREALDIVTILEMUL, "SSDRealdivTileMul"); +REGISTER_OPTYPE_DECLARE(SSDSUMMULREALDIVMEAN, "SSDSumMulRealdivMean"); + +REGISTER_OPTYPE_DECLARE(MEANGRAD, "MeanGrad"); +REGISTER_OPTYPE_DECLARE(TRANSLATE, "Translate"); +REGISTER_OPTYPE_DECLARE(ADDN, "AddN"); +REGISTER_OPTYPE_DECLARE(L2LOSS, "L2Loss"); +REGISTER_OPTYPE_DECLARE(MULTIPLY, "Multiply"); +REGISTER_OPTYPE_DECLARE(RELU6GRAD, "Relu6Grad"); +REGISTER_OPTYPE_DECLARE(AVGPOOLGRAD, "AvgPoolGrad"); +REGISTER_OPTYPE_DECLARE(DEPTHWISECONV2DBACKPROPFILTER, "DepthwiseConv2dNativeBackpropFilter"); +REGISTER_OPTYPE_DECLARE(DEPTHWISECONV2DBACKPORPINPUT, "DepthwiseConv2dNativeBackpropInput"); +REGISTER_OPTYPE_DECLARE(DEPTHWISECONV2DFORWARDNATIVE, "DepthwiseConv2dNative"); +REGISTER_OPTYPE_DECLARE(DROPOUTGRAD, "DropOutGrad"); +REGISTER_OPTYPE_DECLARE(APPLYRMSPROPMIXEDPRECISION, "apply_rms_prop_mixed_precision"); +REGISTER_OPTYPE_DECLARE(APPLYRMSPROP, "ApplyRMSProp"); +REGISTER_OPTYPE_DECLARE(LARS, "Lars"); +REGISTER_OPTYPE_DECLARE(DYNAMICSTITCH, "DynamicStitch"); + +// Variable sink related +REGISTER_OPTYPE_DECLARE(VARIABLEV2, "VariableV2"); +REGISTER_OPTYPE_DECLARE(VARHANDLEOP, "VarHandleOp"); +REGISTER_OPTYPE_DECLARE(TEMPORARYVARIABLE, "TemporaryVariable"); +REGISTER_OPTYPE_DECLARE(DESTROYTEMPORARYVARIABLE, "DestroyTemporaryVariable"); +REGISTER_OPTYPE_DECLARE(VARIABLE, "Variable"); + +REGISTER_OPTYPE_DECLARE(READVARIABLEOP, "ReadVariableOp"); + +REGISTER_OPTYPE_DECLARE(VARISINITIALIZEDOP, "VarIsInitializedOp"); +REGISTER_OPTYPE_DECLARE(ISVARIABLEINITIALIZED, "IsVariableInitialized"); + +REGISTER_OPTYPE_DECLARE(ASSIGN, "Assign"); +REGISTER_OPTYPE_DECLARE(ASSIGNVARIABLEOP, "AssignVariableOp"); + +REGISTER_OPTYPE_DECLARE(ASSIGNADD, "AssignAdd"); +REGISTER_OPTYPE_DECLARE(ASSIGNADDVARIABLEOP, "AssignAddVariableOp"); + +REGISTER_OPTYPE_DECLARE(ASSIGNSUB, "AssignSub"); +REGISTER_OPTYPE_DECLARE(ASSIGNSUBVARIABLEOP, "AssignSubVariableOp"); + +REGISTER_OPTYPE_DECLARE(APPLYMOMENTUM, "ApplyMomentum"); +REGISTER_OPTYPE_DECLARE(RESOURCEAPPLYMOMENTUM, "ResourceApplyMomentum"); +REGISTER_OPTYPE_DECLARE(SGD, "SGD"); +REGISTER_OPTYPE_DECLARE(NOOP, "NoOp"); +REGISTER_OPTYPE_DECLARE(LAYERNORMGRAD, "LayerNormGrad"); + +REGISTER_OPTYPE_DECLARE(SQUARE, "Square"); +REGISTER_OPTYPE_DECLARE(HCOMBROADCAST, "HcomBroadcast"); +REGISTER_OPTYPE_DECLARE(HCOMALLGATHER, "HcomAllGather"); +REGISTER_OPTYPE_DECLARE(HCOMALLREDUCE, "HcomAllReduce"); +REGISTER_OPTYPE_DECLARE(HCOMREDUCESCATTER, "HcomReduceScatter"); +REGISTER_OPTYPE_DECLARE(HCOMSEND, "HcomSend"); +REGISTER_OPTYPE_DECLARE(HCOMRECEIVE, "HcomReceive"); +REGISTER_OPTYPE_DECLARE(HCOMREMOTEREAD, "HcomRemoteRead"); +REGISTER_OPTYPE_DECLARE(HCOMREMOTEWRITE, "HcomRemoteWrite"); + +REGISTER_OPTYPE_DECLARE(VARASSIGN, "VarAssign"); +REGISTER_OPTYPE_DECLARE(VARISINITIALIZEDOP, "VarIsInitializedOp"); +REGISTER_OPTYPE_DECLARE(LogTimeStamp, "LogTimeStamp"); +REGISTER_OPTYPE_DECLARE(PARALLELCONCATSTART, "_ParallelConcatStart"); +REGISTER_OPTYPE_DECLARE(CONSTANTOP, "Constant"); +REGISTER_OPTYPE_DECLARE(STREAMSWITCH, "StreamSwitch"); +REGISTER_OPTYPE_DECLARE(STREAMSWITCHN, "StreamSwitchN"); +REGISTER_OPTYPE_DECLARE(STREAMACTIVE, "StreamActive"); +REGISTER_OPTYPE_DECLARE(MEMCPYASYNC, "MemcpyAsync"); +REGISTER_OPTYPE_DECLARE(MEMCPYADDRASYNC, "MemcpyAddrAsync"); +REGISTER_OPTYPE_DECLARE(STREAMMERGE, "StreamMerge"); +REGISTER_OPTYPE_DECLARE(ENDGRAPH, "EndGraph"); +REGISTER_OPTYPE_DECLARE(SEND, "Send"); +REGISTER_OPTYPE_DECLARE(RECV, "Recv"); +REGISTER_OPTYPE_DECLARE(ENDOFSEQUENCE, "EndOfSequence"); + +REGISTER_OPTYPE_DECLARE(LABELSET, "LabelSet"); +REGISTER_OPTYPE_DECLARE(LABELGOTO, "LabelGoto"); +REGISTER_OPTYPE_DECLARE(LABELGOTOEX, "LabelGotoEx"); +REGISTER_OPTYPE_DECLARE(LABELSWITCH, "LabelSwitch"); +REGISTER_OPTYPE_DECLARE(LABELSWITCHBYINDEX, "LabelSwitchByIndex"); + +REGISTER_OPTYPE_DECLARE(ATOMICADDRCLEAN, "AtomicAddrClean"); + +REGISTER_OPTYPE_DECLARE(ABS_GRAD, "AbsGrad"); +REGISTER_OPTYPE_DECLARE(ACCUMULATE_N_V2, "AccumulateNV2"); +REGISTER_OPTYPE_DECLARE(ACOS_GRAD, "AcosGrad"); +REGISTER_OPTYPE_DECLARE(ACOSH_GRAD, "AcoshGrad"); +REGISTER_OPTYPE_DECLARE(ANY, "Any"); +REGISTER_OPTYPE_DECLARE(APPROXIMATE_EQUAL, "ApproximateEqual"); +REGISTER_OPTYPE_DECLARE(ASIN_GRAD, "AsinGrad"); +REGISTER_OPTYPE_DECLARE(ASINH_GRAD, "AsinhGrad"); +REGISTER_OPTYPE_DECLARE(ATAN_GRAD, "AtanGrad"); +REGISTER_OPTYPE_DECLARE(BROADCAST_TO, "BroadcastTo"); +REGISTER_OPTYPE_DECLARE(ELU_GRAD, "EluGrad"); +REGISTER_OPTYPE_DECLARE(ADD_V2, "AddV2"); +REGISTER_OPTYPE_DECLARE(DATAFORMATDIMMAP, "DataFormatDimMap"); +REGISTER_OPTYPE_DECLARE(DATAFORMATVECPERMUTE, "DataFormatVecPermute"); +REGISTER_OPTYPE_DECLARE(BESSELI0e, "BesselI0e"); +REGISTER_OPTYPE_DECLARE(BESSELI1e, "BesselI1e"); +REGISTER_OPTYPE_DECLARE(DEQUANTIZE, "Dequantize"); +REGISTER_OPTYPE_DECLARE(APPLYADADELTA, "ApplyAdadelta"); +REGISTER_OPTYPE_DECLARE(APPLYADAGRAD, "ApplyAdagrad"); +REGISTER_OPTYPE_DECLARE(APPLYADAGRADDA, "ApplyAdagradDA"); +REGISTER_OPTYPE_DECLARE(APPLYADAM, "ApplyAdam"); +REGISTER_OPTYPE_DECLARE(APPLYADAMAX, "ApplyAdaMax"); +REGISTER_OPTYPE_DECLARE(APPLYADDSIGN, "ApplyAddSign"); +REGISTER_OPTYPE_DECLARE(APPLYCENTEREDRMSPROP, "ApplyCenteredRMSProp"); +REGISTER_OPTYPE_DECLARE(APPLYFTRL, "ApplyFtrl"); +REGISTER_OPTYPE_DECLARE(APPLYFTRLV2, "ApplyFtrlv2"); +REGISTER_OPTYPE_DECLARE(APPLYGRADIENTDESCENT, "ApplyGradientDescent"); +REGISTER_OPTYPE_DECLARE(APPLYPOWERSIGN, "ApplyPowerSign"); +REGISTER_OPTYPE_DECLARE(APPLYPROXIMALADAGRAD, "ApplyProximalAdagrad"); +REGISTER_OPTYPE_DECLARE(APPLYPROXIMALGRADIENTDESCENT, "ApplyProximalGradientDescent"); + +REGISTER_OPTYPE_DECLARE(FOCAL_LOSS, "FocalLoss"); +REGISTER_OPTYPE_DECLARE(FOCAL_LOSS_GRAD, "FocalLossGrad"); +REGISTER_OPTYPE_DECLARE(SMOOTHL1_LOSS, "SmoothL1Loss"); +REGISTER_OPTYPE_DECLARE(SMOOTHL1_LOSS_grad, "SmoothL1LossGrad"); +REGISTER_OPTYPE_DECLARE(REDUCEMEAN, "ReduceMean"); +REGISTER_OPTYPE_DECLARE(CONCAT_V2, "ConcatV2"); +REGISTER_OPTYPE_DECLARE(ONEHOT_V2, "OneHotV2"); +REGISTER_OPTYPE_DECLARE(SLICE_V2, "SliceV2"); +REGISTER_OPTYPE_DECLARE(TILE_V2, "TileV2"); +REGISTER_OPTYPE_DECLARE(SUM_V2, "SumV2"); +// Common operator type when operators have the same name +REGISTER_OPTYPE_DECLARE(DETECTIONOUTPUT, "DetectionOutput"); + +// custom operator +REGISTER_OPTYPE_DECLARE(CUSTOMOP, "CustomOp"); +REGISTER_OPTYPE_DECLARE(CUSTOMOP_NCHW, "CustomOpNchw"); +REGISTER_OPTYPE_DECLARE(CUSTOMOP_NHWC, "CustomOpNhwc"); +REGISTER_OPTYPE_DECLARE(CUSTOMOP_NC1HWC0, "CustomOpNc1hwc0"); + +// Depthwise 4d_2_6d,6d_2_4d +REGISTER_OPTYPE_DECLARE(DEPTHWISEWEIGHT4D26D, "depthwise_weight_4d_2_6d"); +REGISTER_OPTYPE_DECLARE(DEPTHWISEWEIGHT6D24D, "depthwise_weight_6d_2_4d"); + +REGISTER_OPTYPE_DECLARE(SQRTGRAD, "SqrtGrad"); +REGISTER_OPTYPE_DECLARE(SIGMOIDGRAD, "SigmoidGrad"); + +// Horovod operator +REGISTER_OPTYPE_DECLARE(HVDCALLBACKALLREDUCE, "HorovodAllreduce"); +REGISTER_OPTYPE_DECLARE(HVDCALLBACKALLGATHER, "HorovodAllgather"); +REGISTER_OPTYPE_DECLARE(HVDCALLBACKBROADCAST, "HorovodBroadcast"); +REGISTER_OPTYPE_DECLARE(HVDWAIT, "HorovodWait"); + +enum InputMode { INPUT = 0, CONST }; + +// Definition of the processing status enum of the process module +enum ModelProcessState { + INIT_STATE = 0, // init status + WAIT_EVENT_STATE, // Wait for the event status + IND_RSLT_STATE, // The model execution result is being output to the high level + STOPPED_STATE, // Model execution completed. The model enters this state after Model Manager::Stop + RESERVED_STATE, // reserved +}; + +// Indicates the enun definition of the execution mode of the access module +enum SysMode { + INFERENCE = 0, // Normal, that is, Inference mode + DEBUG, // Debug mode + TIME, // Model execution time mode, including the execution time of each OP + STOP, // STOP mode + RESET, // RESET mode + PERFORMANCE, // Impact of enabling the performance model: 1. The input data of the model is considered ready and does + // not need to be converted + ANDROID_DEBUG, // Exports Android platform computing data + RESERVED, // reserved +}; + +// @brief encryption type of the model file +enum ModelEncryptType { + UNENCRYPTED, // not encrypted + ENCRYPTED // encrypted +}; + +/// +/// @brief signature verification +/// +enum ModelCheckType { + CHECK, // signature verification + UNCHECK // no verification +}; + +/// +/// @brief dynamic input type +/// +enum DynamicInputType { + FIXED = 0, // default mode + DYNAMIC_BATCH = 1, + DYNAMIC_IMAGE = 2, + DYNAMIC_DIMS = 3 +}; + +/// +/// @brief magic number of the model file +/// +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MODEL_FILE_MAGIC_NUM; + +/// +/// @brief model header length +/// +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MODEL_FILE_HEAD_LEN; + +/// +/// @brief model name length +/// +static constexpr uint32_t MODEL_NAME_LENGTH = 32; + +/// +/// @brief length of user-defined information +/// +static constexpr uint32_t USER_DEFINE_INFO_LENGTH = 32; + +/// +/// @brief length of the model file signature +/// +static constexpr uint32_t MODEL_FILE_CHECKSUM_LENGTH = 64; + +/// +/// @brief length of the reserved field in the model file header +/// +static constexpr uint32_t MODEL_FILE_RESERVED_LENGTH = 79; + +/// +/// @ingroup domi_omg +/// @brief INPUT node type +/// +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string INPUT_TYPE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMMY_DATA; + +/// +/// @ingroup domi_omg +/// @brief AIPP flag, indicating the aipp conv operator +/// +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string AIPP_CONV_FLAG; + +/// +/// @ingroup domi_omg +/// @brief AIPP flag, indicating the aipp data operator +/// +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string AIPP_DATA_FLAG; + +// flag of the Data operator, indicating that the input will be input to the dynamic AIPP operator +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string INPUT_TO_DYNAMIC_AIPP; + +// records the W dimension of the model input corresponding to the dynamic AIPP +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string AIPP_RELATED_DATA_DIM_W; + +// H dimension of the model input corresponding to the dynamic AIPP +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string AIPP_RELATED_DATA_DIM_H; + +// DATA node type +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DATA_TYPE; + +// DATA Operator Type +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string AIPP_DATA_TYPE; + +// framework Operator Type +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string FRAMEWORK_OP_TYPE; + +// DATA node type +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string ANN_DATA_TYPE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string ANN_NETOUTPUT_TYPE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string ANN_DEPTHCONV_TYPE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string ANN_CONV_TYPE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string ANN_FC_TYPE; +// convolution node type +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_NET_OUTPUT; + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_END_GRAPH; + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_OP_DEBUG; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_TYPE_OP_DEBUG; + +// convolution node type +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_TYPE_CONVOLUTION; +// adds a convolutional node name for the hard AIPP +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string AIPP_CONV_OP_NAME; +// delimiter of operator configuration items +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_CONF_DELIMITER; + +// op attr name +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string ATTR_NAME_VALUE1; + +// op attr name, used to 6d_2_4d C channel +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string ATTR_NAME_INPUT_CVALUE; + +// op attr name +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string ATTR_NAME_VALUE1; + +// alpha default value +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const float ALPHA_DEFAULT_VALUE; + +// beta default value +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const float BETA_DEFAULT_VALUE; + +// coef default value +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const float COEF_DEFAULT_VALUE; + +// coef value of Relu6 +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const float RELU6_COEF; + +// stride default value +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t STRIDE_DEFAULT_VALUE; + +// pad default value +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t PAD_DEFAULT_VALUE; + +// dilation default value +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int DILATION_DEFAULT_VALUE; + +// kernel default value +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t KERNEL_DEFAULT_VALUE; + +// default conv Group Size +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t DEFAULT_CONV_GROUP; + +// default deconv adj +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t DEFAULT_DECONV_ADJ; + +// indicate num 1 +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NUM_ONE; + +// dim default size value +static const int32_t DIM_DEFAULT_SIZE = 4; + +// the shape of c must be the mutiply of 16 for depthwise +static const uint32_t DEPTHWISE_DIM_C_BASE_NUM = 16; + +// C1HWNCoC0 dim size +static const int32_t DIM_C1HWNCoC0_SIZE = 6; +// C1HWNCoC0 C0 value +static const int C1HWCOC_C0_VALUE = 16; +// spatial default dim size +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t SPATIAL_DIM_DEFAULT_SIZE; + +// dim extension default value +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t DIM_DEFAULT_VALUE; + +// the first item in the weight list of opdef is filter +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t WEIGHT_FILTER_INDEX; + +// the second item in the weight list of opdef is bias. +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t WEIGHT_BIAS_INDEX; + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t TENSOR_ND_SUPPORT_SIZE; + +// default NCHW index +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_N; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_C; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_H; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_W; + +// default C1HWNCoC0 index +static const uint32_t C1HWNCoC0_DIM_C1 = 0; +static const uint32_t C1HWNCoC0_DIM_H = 1; +static const uint32_t C1HWNCoC0_DIM_W = 2; +static const uint32_t C1HWNCoC0_DIM_N = 3; +static const uint32_t C1HWNCoC0_DIM_Co = 4; +static const uint32_t C1HWNCoC0_DIM_C0 = 5; + +// default KCHW index +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t KCHW_DIM_K; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t KCHW_DIM_C; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t KCHW_DIM_H; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t KCHW_DIM_W; + +// default HWCK index +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t HWCK_DIM_H; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t HWCK_DIM_W; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t HWCK_DIM_C; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t HWCK_DIM_K; + +// default NHWC index +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_N; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_H; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_W; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_C; + +// default CHWN index +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t CHWN_DIM_N; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t CHWN_DIM_C; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t CHWN_DIM_H; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t CHWN_DIM_W; + +// default CHW index +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t CHW_DIM_C; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t CHW_DIM_H; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t CHW_DIM_W; + +// default HWC index +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t HWC_DIM_H; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t HWC_DIM_W; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t HWC_DIM_C; +// default Pad index +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t PAD_H_HEAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t PAD_H_TAIL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t PAD_W_HEAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t PAD_W_TAIL; + +// default window index +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t WINDOW_H; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t WINDOW_W; + +// default stride index +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t STRIDE_H; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t STRIDE_W; + +// default dilation index +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t DILATION_H; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t DILATION_W; + +// the num of XRBG channel +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t XRGB_CHN_NUM; + +// default tensor format +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int DEFAULT_FORMAT; + +// default global pooling +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const bool DEFAULT_GLOBAL_POOLING; + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MODEL_VERSION; // model version 1.0 + +// Number of inputs of the Eltwise operator +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int ELTWISE_MIN_INPUT_SIZE; + +// flowctrl +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_STREAM_SWITCH; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_STREAM_ACTIVE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_FLOWCTRL_LOOP_PER_ITER; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_FLOWCTRL_LOOP_COND; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_FLOWCTRL_LOOP_INCREMENT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_FLOWCTRL_LOOP_RESETVALUE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_FLOWCTRL_LOOP_ASSIGN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_ATOMIC_ADDR_CLEAN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t TRUE_STREAM_ID; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t STREAM_SWITCH_INPUT_NUM; + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_GLOBAL_STEP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_GLOBAL_STEP_ASSIGNADD; + +static const int PLATFORM_VERSION_LEN = 20; + +// Definition of the file header of the model file +struct ModelFileHeader { + uint32_t magic = MODEL_FILE_MAGIC_NUM; // magic number of DOMI + uint32_t headsize = MODEL_FILE_HEAD_LEN; // length of the model header. The value is fixed at 256 + uint32_t version = MODEL_VERSION; // version 1.0 + uint8_t checksum[MODEL_FILE_CHECKSUM_LENGTH] = {0}; // signature + uint32_t length = 0; // Ciphertext length. In the non-encryption model, the length is the plaintext length. + uint8_t is_encrypt = ModelEncryptType::UNENCRYPTED; // whether encrypted 0:not encrypt, 1:encrypt + uint8_t is_checksum = ModelCheckType::CHECK; // whether to check the checksum + uint8_t modeltype = 0; // 0:IR model 1:standard model 2: OM Tiny model + uint8_t genmode = 0; // 0:offline generate 1:online generate + uint8_t name[MODEL_NAME_LENGTH] = {0}; // Model name, which contains 32 characters + uint32_t ops = 0; // Computing power (Kops) + uint8_t userdefineinfo[USER_DEFINE_INFO_LENGTH] = {0}; // User-defined information. The value contains 32 characters + uint32_t om_ir_version = 0; + uint8_t platform_version[PLATFORM_VERSION_LEN] = {0}; + uint8_t platform_type = {0}; + uint8_t reserved[MODEL_FILE_RESERVED_LENGTH] = {0}; // Reserved field 79 +}; + +static constexpr uint8_t TARGET_TYPE_LTTE_8BIT = 0; +static constexpr uint8_t TARGET_TYPE_MINI_8BIT = 1; +static constexpr uint8_t TARGET_TYPE_TINY_8BIT = 2; + +static constexpr int32_t PARTITION_TYPE_MODEL_DEF = 0; +static constexpr int32_t PARTITION_TYPE_WEIGHTS = 1; +static constexpr int32_t PARTITION_TYPE_TASK_INFO = 2; + +// number of partitions in the current model +static constexpr uint32_t PARTITION_SIZE = 4; + +enum ModelPartitionType { MODEL_DEF = 0, WEIGHTS_DATA, TASK_INFO, TBE_KERNELS }; + +struct ModelPartitionMemInfo { + ModelPartitionType type; + uint32_t mem_offset; + uint32_t mem_size; +}; + +struct ModelPartitionTable { + uint32_t num; + ModelPartitionMemInfo partition[0]; +}; + +#define SIZE_OF_MODEL_PARTITION_TABLE(table) (sizeof(ModelPartitionTable) + sizeof(ModelPartitionMemInfo) * (table).num) + +static constexpr int32_t PTHREAD_CREAT_SUCCESS = 0; // pthread_creat success + +// Filter format +typedef enum tagDomiFilterFormat { + DOMI_FILTER_KCHW, // KCHW + DOMI_FILTER_HWCK, // HWCK + DOMI_FILTER_RESERVED +} domiFilterFormat_t; + +// Const data trans type +typedef enum tagDomiConstDataTransType { + DOMI_CONST_DATA_NOT_CHANGE = 0, // No action is required + DOMI_CONST_DATA_TRANS_MATMUL, // The const input to MatMul and needs to be transposed + DOMI_CONST_DATA_RESERVED +} domiConstDataTransType_t; + +// mode of activation +typedef enum tagDomiActivationMode { + DOMI_ACTIVATION_SIGMOID = 0, // sigmoid + DOMI_ACTIVATION_RELU, // ReLU + DOMI_ACTIVATION_TANH, // tanh + DOMI_ACTIVATION_CLIPPED_RELU, // clipped ReLU + DOMI_ACTIVATION_ELU, // ELU + DOMI_ACTIVATION_LEAKY_RELU, + DOMI_ACTIVATION_ABS, // Abs + DOMI_ACTIVATION_RELU1, // relu1 + DOMI_ACTIVATION_SOFTSIGN, // softsign + DOMI_ACTIVATION_SOFTPLUS, // softplus + DOMI_ACTIVATION_HARDSIGMOID, // hardsigmoid + DOMI_ACTIVATION_THRESHOLD_RELU, // threshold + DOMI_ACTIVATION_SELU, // selu + DOMI_ACTIVATION_LINEAR, // linear + DOMI_ACTIVATION_RESERVED +} domiActivationMode_t; + +// mode of batchnorm +typedef enum tagDomiBatchNormMode { + DOMI_BATCHNORM_PER_ACTIVATION = 0, // bnScale, bnBias tensor dims are 1xCxHxW + DOMI_BATCHNORM_SPATIAL, // bnScale, bnBias tensor dims are 1xCx1x1 + DOMI_BATCHNORM_RESERVED +} domiBatchNormMode_t; + +// eltwise mode +typedef enum tagDomiEltwiseMode { + DOMI_ELTWISE_PROD = 0, // prod + DOMI_ELTWISE_SUM, // sum + DOMI_ELTWISE_MAX, // max + DOMI_ELTWISE_RESERVED +} domiEltwiseMode_t; + +// mode of padding +typedef enum tagDomiPaddingMode { + DOMI_PADDING_CEIL = 0, // Default padding mode + DOMI_PADDING_DIRECTASSIGN, // Default padding mode: NOTSET + DOMI_PADDING_VALID, // VALID padding mode + DOMI_PADDING_SAME, // Padding values of 0 are always used + DOMI_PADDING_CEIL_NEW, // Padding values of 0 are always used + DOMI_PADDING_VALID_NEW, // Padding values of 0 are always used + DOMI_PADDING_SAME_NEW, // Padding values of 0 are always used + DOMI_PADDING_RESERVED +} domiPaddingMode_t; + +// algorithm of convolution forward +typedef enum tagDomiConvolutionFwdAlgo { + DOMI_CONVOLUTION_FWD_ALGO_GEMM = 0, // matrix gemm algo + DOMI_CONVOLUTION_FWD_ALGO_WINOGRAD, // Winograd Transform algo + DOMI_CONVOLUTION_FWD_ALGO_GEMM_ACCU_FLOAT32, // accumulate in L0c with FP32 + DOMI_CONVOLUTION_FWD_ALGO_RESERVED +} domiConvolutionFwdAlgo_t; + +typedef enum tagDomiFullConnectFwdAlgo { + DOMI_FULLCONNECT_FWD_ALGO_HALF = 0, // accumulate in L0c with FP16 + DOMI_FULLCONNECT_FWD_ALGO_FLOAT32 // accumulate in L0c with FP32 +} domiFullConnectFwdAlgo_t; + +typedef enum tagDomiPooingFwdAlgo { + DOMI_POOLING_FWD_ALGO_HALF = 0, // accumulate in L0c with FP16 + DOMI_POOLING_FWD_ALGO_FLOAT32 // accumulate in L0c with FP32 +} domiPooingFwdAlgo_t; + +// mode of convolution +typedef enum tagDomiConvolutionMode { + DOMI_CONV_CONVOLUTION = 0, // math convolution + DOMI_CONV_CROSS_CORRELATION, // cross-correlation convolution + DOMI_CONV_DECONVOLUTION, // deconvolution, also named transposed convolution + DOMI_CONV_MODE_DEPTHWISE, // depthwise convolution + DOMI_CONV_MODE_RESERVED +} domiConvolutionMode_t; + +// softmax mode +typedef enum tagDomiSoftmaxMode { + DOMI_SOFTMAX_MODE_INSTANCE = 0, // compute the softmax over all C, H, W for each N + DOMI_SOFTMAX_MODE_CHANNEL, // compute the softmax over all C for each H, W, N + DOMI_SOFTMAX_MODE_HEIGHT, // compute the softmax over all H for each N, C, W + DOMI_SOFTMAX_MODE_WIDTH, // compute the softmax over all W for each N, C, H + DOMI_SOFTMAX_MODE_RESERVED +} domiSoftmaxMode_t; + +// softmax algorithm +typedef enum tagDomiSoftmaxAlgo { + DOMI_SOFTMAX_FAST = 0, // straightforward implementation + DOMI_SOFTMAX_ACCURATE, // subtract max from every point to avoid overflow + DOMI_SOFTMAX_LOG, // perform the Log softmax operation to avoid overflow + DOMI_SOFTMAX_ACCURATE_FP32, + DOMI_SOFTMAX_RESERVED +} domiSoftmaxAlgo_t; + +// algorithm of convolution backward +typedef enum tagDomiConvolutionBwdAlgo { + DOMI_CONVOLUTION_BWD_ALGO_GEMM = 0, // matrix gemm algo + DOMI_CONVOLUTION_BWD_ALGO_WINOGRAD, // Winograd Transform algo + DOMI_CONVOLUTION_BWD_ALGO_RESERVED +} domiConvolutionBwdAlgo_t; + +// mode of pooling +typedef enum tagDomiPoolingMode { + DOMI_POOLING_MAX = 0, // max pooling + DOMI_POOLING_AVG, // average pooling + DOMI_POOLING_L2, // L2 pooling + DOMI_POOLING_RESERVED +} domiPoolingMode_t; + +// propagate Nan +typedef enum tagDomiNanPropagation { + DOMI_NAN_NOT_PROPAGATE = 0, // Nan numbers are not propagated + DOMI_NAN_PROPAGATE, // Nan numbers are propagated + DOMI_NAN_PROPAGATE_RESERVED +} domiNanPropagation_t; + +// mode of cropandresize +typedef enum tagDomiCropAndResizeMode { + DOMI_RESIZE_METHOD_BILINEAR = 0, // resize bilinear + DOMI_RESIZE_METHOD_NEAREST, // resize nearest + DOMI_RESIZE_RESERVED +} domiCropAndResizeMode_t; + +// yolo version +typedef enum tagDomiYoloVersion { DOMI_YOLO_V2 = 1, DOMI_YOLO_V3, DOMI_YOLO_TRSERVED } domiYoloVersion_t; + +typedef enum tagDomiRNNScopePassType { + DOMI_STATIC_BIDIRECTIONAL_RNN_GENERAL_PASS = 0, + DOMI_DYNAMIC_BIDIRECTIONAL_RNN_GENERAL_PASS, + DOMI_DYNAMIC_BIDIRECTIONAL_RNN_BIDAF_PASS +} domiRNNScopePassType; + +// RNNDataLayout +typedef enum tagDomiRNNDataLayout { + DOMI_RNN_ND_TBX = 0, // data[max_time,batch_size,Xt] + DOMI_RNN_ND_BTX, // data[batch_size,max_time,Xt] + DOMI_RNN_5D_TX1BX, // data[max_time,Xt,1,batch_size,Xt] + DOMI_RNN_5D_BX1TX, // dataa[batch_size,Xt,1,max_time,Xt] + DOMI_RNN_4DTBX1, + DOMI_ENN_DL_RESERVED +} domiRNNDataLayout_t; + +// RNNInputMode +typedef enum tagDomiRNNInputMode { DOMI_RNN_LINEAR_INPUT = 0, DOMI_RNN_SKIP_INPUT } domiRNNInputMode_t; + +// RNNDirectionMode +typedef enum tagDomiRNNDirectionMode { DOMI_RNN_UNIDIRECTIONAL = 0, DOMI_RNN_BIDIRECTIONAL } domiDirectionMode_t; + +typedef enum tagDomiPoolingCeilMode { DOMI_POOLING_FLOOR = 0, DOMI_POOLING_CEIL } domiPoolingCeilMode_t; + +// RNNMode +typedef enum tagDomiRNNActivationMode { + DOMI_RNN_ACTIVATION_SIGMOID = 0, // sigmoid + DOMI_RNN_ACTIVATION_TANH, // tanh + DOMI_RNN_ACTIVATION_RELU, // ReLU + DOMI_RNN_ACTIVATION_RELU1, // ReLU1 + DOMI_RNN_ACTIVATION_RELU6, // ReLU6 + DOMI_RNN_ACTIVATION_RESERVED +} domiRNNActivationMode_t; + +typedef enum tagDomiRNNLSTMOutMode { + DOMI_RNN_LSTM_OUT_SEPARATE = 0, + DOMI_RNN_LSTM_OUT_CONCAT, + DOMI_RNN_LSTM_OUT_RESERVED +} domiRNNLSTMOutPutMode_t; +typedef enum tagDomiRNNLSTMStateOutMode { + DOMI_RNN_LSTM_STATE_OUT_SEPARATE = 0, + DOMI_RNN_LSTM_STATE_OUT_CONCAT_ALL, + DOMI_RNN_LSTM_STATE_OUT_RESERVED +} domiRNNLSTMStateOutMode_t; + +typedef enum tagDomiRNNMode { + DOMI_RNN_RELU = 0, + DOMI_RNN_TANH, + DOMI_LSTM, + DOMI_GRU, + DOMI_RNN_MODE_RESERVED +} domiRNNMode_t; + +typedef enum tagDomiResizeBilinearMode { + DOMI_RESIZE_OUTPUT_DIM_BY_ZOOM_FACTOR = 0, // Output dimension specified by zoom factor + DOMI_RESIZE_OUTPUT_DIM_BY_SHRINK_FACTOR, // specified by shrink factor + DOMI_RESIZE_OUTPUT_DIM_EXPLICIT, // specified explicitly + DOMI_RESIZE_OUTPUT_DIM_RESERVED +} domiResizeOutputDimMode_t; + +#pragma pack(1) // single-byte alignment +// DUMP file struct +struct FileHeader { + int32_t Version; // version + int32_t Output_Offset; // output offset address + char Reserved[24] = {0}; // 24 bytes reserved +}; + +struct BasicInfo { + struct FileHeader header; // file header + int32_t stream_id; // stread id + uint64_t start_time; // start time + uint64_t end_time; // end time + uint32_t input_size; // input memory size + uint32_t output_size; // output memory size + uint32_t weight_size; // weight Memory Size + uint32_t workspace_size; // workspace + uint32_t total_size; // total memory size +}; +#pragma pack() // Cancels single-byte alignment +} // namespace ge + +namespace domi { +/// @brief Data structure definition related to task sinking +enum BuildMode { + GEN_TASK_WITHOUT_L2FUSION = 3, // Carrying task data (L2 convergence function disabled) + GEN_TASK_WITHOUT_FUSION = 4, // Carrying task data (all convergence functions disabled) + GEN_TASK_WITH_FUSION = 5 // Carrying task data (with UB/L1/L2 enabled for all convergence functions) +}; +} // namespace domi + +#endif // INC_FRAMEWORK_COMMON_TYPES_H_ diff --git a/inc/framework/common/util.h b/inc/framework/common/util.h new file mode 100644 index 000000000..8ba2333a6 --- /dev/null +++ b/inc/framework/common/util.h @@ -0,0 +1,395 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_UTIL_H_ +#define INC_FRAMEWORK_COMMON_UTIL_H_ + +#include +#include +#include +#include +#include +#include + +#include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" +#include "framework/common/scope_guard.h" +#include "framework/common/ge_inner_error_codes.h" +#include "mmpa/mmpa_api.h" + +#define GE_CHECK_POSITIVE_SIZE_RANGE(size) \ + do { \ + if (size <= 0) { \ + DOMI_LOGE("param[%s] is not a positive number", #size); \ + return PARAM_INVALID; \ + } \ + } while (0) + +#define CHECK_FALSE_EXEC(expr, exec_expr, ...) \ + { \ + bool b = (expr); \ + if (!b) { \ + exec_expr; \ + } \ + } + +// new ge marco +// Encapsulate common resource releases +#define GE_MAKE_GUARD_RTMEM(var) \ + GE_MAKE_GUARD(var, [&] { \ + if (var) GE_CHK_RT(rtFreeHost(var)); \ + }); + +#define GE_MAKE_GUARD_RTSTREAM(var) \ + GE_MAKE_GUARD(var, [&] { \ + if (var) GE_CHK_RT(rtStreamDestroy(var)); \ + }); + +// For propagating errors when calling a function. +#define GE_RETURN_IF_ERROR(expr) \ + do { \ + const ::ge::Status _status = (expr); \ + if (_status) return _status; \ + } while (0) + +#define GE_RETURN_WITH_LOG_IF_ERROR(expr, ...) \ + do { \ + const ::ge::Status _status = (expr); \ + if (_status) { \ + DOMI_LOGE(__VA_ARGS__); \ + return _status; \ + } \ + } while (0) + +// check whether the parameter is true. If it is, return FAILED and record the error log +#define GE_RETURN_WITH_LOG_IF_TRUE(condition, ...) \ + do { \ + if (condition) { \ + DOMI_LOGE(__VA_ARGS__); \ + return ge::FAILED; \ + } \ + } while (0) + +// Check if the parameter is false. If yes, return FAILED and record the error log +#define GE_RETURN_WITH_LOG_IF_FALSE(condition, ...) \ + do { \ + bool _condition = (condition); \ + if (!_condition) { \ + DOMI_LOGE(__VA_ARGS__); \ + return ge::FAILED; \ + } \ + } while (0) + +// Checks whether the parameter is true. If so, returns PARAM_INVALID and records the error log +#define GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(condition, ...) \ + do { \ + if (condition) { \ + DOMI_LOGE(__VA_ARGS__); \ + return ge::PARAM_INVALID; \ + } \ + } while (0) + +// Check if the parameter is false. If yes, return PARAM_INVALID and record the error log +#define GE_RT_PARAM_INVALID_WITH_LOG_IF_FALSE(condition, ...) \ + do { \ + bool _condition = (condition); \ + if (!_condition) { \ + DOMI_LOGE(__VA_ARGS__); \ + return ge::PARAM_INVALID; \ + } \ + } while (0) + +// Check if the parameter is null. If yes, return PARAM_INVALID and record the error +#define GE_CHECK_NOTNULL(val) \ + do { \ + if (val == nullptr) { \ + DOMI_LOGE("param[%s] must not be null.", #val); \ + return ge::PARAM_INVALID; \ + } \ + } while (0) + +// Check if the parameter is null. If yes, just return and record the error +#define GE_CHECK_NOTNULL_JUST_RETURN(val) \ + do { \ + if (val == nullptr) { \ + DOMI_LOGE("param[%s] must not be null.", #val); \ + return; \ + } \ + } while (0) + +// Check whether the parameter is null. If so, execute the exec_expr expression and record the error log +#define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \ + do { \ + if (val == nullptr) { \ + DOMI_LOGE("param[%s] must not be null.", #val); \ + exec_expr; \ + } \ + } while (0) + +// Check whether the parameter is null. If yes, return directly and record the error log +#define GE_RT_VOID_CHECK_NOTNULL(val) \ + do { \ + if (val == nullptr) { \ + DOMI_LOGE("param[%s] must not be null.", #val); \ + return; \ + } \ + } while (0) + +// Check if the parameter is null. If yes, return false and record the error log +#define GE_RT_FALSE_CHECK_NOTNULL(val) \ + do { \ + if (val == nullptr) { \ + DOMI_LOGE("param[%s] must not be null.", #val); \ + return false; \ + } \ + } while (0) + +// Check if the parameter is out of bounds +#define GE_CHECK_SIZE(size) \ + do { \ + if (size == 0) { \ + DOMI_LOGE("param[%s] is out of range", #size); \ + return ge::PARAM_INVALID; \ + } \ + } while (0) + +// Check if the container is empty +#define GE_CHECK_VECTOR_NOT_EMPTY(vector) \ + do { \ + if (vector.empty()) { \ + DOMI_LOGE("param[%s] is empty!", #vector); \ + return ge::FAILED; \ + } \ + } while (0) + +// Check if the value on the left is greater than or equal to the value on the right +#define GE_CHECK_GE(lhs, rhs) \ + do { \ + if (lhs < rhs) { \ + DOMI_LOGE("param[%s] is less than[%s]", #lhs, #rhs); \ + return ge::PARAM_INVALID; \ + } \ + } while (0) + +// Check if the value on the left is less than or equal to the value on the right +#define GE_CHECK_LE(lhs, rhs) \ + do { \ + if (lhs > rhs) { \ + DOMI_LOGE("param[%s] is greater than[%s]", #lhs, #rhs); \ + return ge::PARAM_INVALID; \ + } \ + } while (0) + +#define GE_DELETE_NEW_SINGLE(var) \ + do { \ + if (var != nullptr) { \ + delete var; \ + var = nullptr; \ + } \ + } while (0) + +#define GE_DELETE_NEW_ARRAY(var) \ + do { \ + if (var != nullptr) { \ + delete[] var; \ + var = nullptr; \ + } \ + } while (0) + +/** + * @ingroup domi_common + * @brief version of om.proto file + */ +static constexpr int32_t OM_PROTO_VERSION = 2; + +/** + * Finding an Integer Ceiling Value Without Precision Loss + */ +#define CEIL(N, n) (((N) + (n)-1) / (n)) + +namespace ge { +using google::protobuf::Message; + +/// +/// @ingroup domi_common +/// @brief Maximum file path length +/// +const int32_t DOMI_MAX_PATH_LEN = 256; + +/// +/// @ingroup domi_common +/// @brief proto file in bianary format +/// @param [in] file path of proto file +/// @param [out] proto memory for storing the proto file +/// @return true success +/// @return false fail +/// +bool ReadProtoFromBinaryFile(const char *file, Message *proto); + +/// +/// @ingroup domi_common +/// @brief Reads the proto structure from an array. +/// @param [in] data proto data to be read +/// @param [in] size proto data size +/// @param [out] proto Memory for storing the proto file +/// @return true success +/// @return false fail +/// +bool ReadProtoFromArray(const void *data, int size, Message *proto); + +/// +/// @ingroup domi_proto +/// @brief Reads the proto file in the text format. +/// @param [in] file path of proto file +/// @param [out] message Memory for storing the proto file +/// @return true success +/// @return false fail +/// +bool ReadProtoFromText(const char *file, google::protobuf::Message *message); + +bool ReadProtoFromMem(const char *data, int size, google::protobuf::Message *message); + +/// +/// @ingroup: domi_common +/// @brief: get length of file +/// @param [in] input_file: path of file +/// @return long: File length. If the file length fails to be obtained, the value -1 is returned. +/// +extern long GetFileLength(const std::string &input_file); + +/// +/// @ingroup domi_common +/// @brief Reads all data from a binary file. +/// @param [in] file_name path of file +/// @param [out] buffer Output memory address, which needs to be released by the caller. +/// @param [out] length Output memory size +/// @return false fail +/// @return true success +/// +bool ReadBytesFromBinaryFile(const char *file_name, char **buffer, int &length); + +bool ReadBytesFromBinaryFile(const char *file_name, std::vector &buffer); + +/// +/// @ingroup domi_common +/// @brief Recursively Creating a Directory +/// @param [in] directory_path Path, which can be a multi-level directory. +/// @return 0 success +/// @return -1 fail +/// +extern int CreateDirectory(const std::string &directory_path); + +/// +/// @ingroup domi_common +/// @brief Obtains the current time string. +/// @return Time character string in the format : %Y%m%d%H%M%S, eg: 20171011083555 +/// +std::string CurrentTimeInStr(); + +/// +/// @ingroup domi_common +/// @brief onverts Vector of a number to a string. +/// @param [in] v Vector of a number +/// @return string +/// +template +std::string ToString(std::vector &v) { + std::stringstream ss; + ss << "["; + for (T x : v) { + ss << x; + ss << ", "; + } + std::string strRet = + ss.str().substr(0, ss.str().length() - 2); // Delete the two extra characters at the end of the line. + strRet += "]"; + return strRet; +} + +/// +/// @ingroup domi_common +/// @brief Converts RepeatedField to String. +/// @param [in] rpd_field RepeatedField +/// @return string +/// +template +std::string ToString(const google::protobuf::RepeatedField &rpd_field) { + std::stringstream ss; + ss << "["; + for (T x : rpd_field) { + ss << x; + ss << ", "; + } + std::string strRet = + ss.str().substr(0, ss.str().length() - 2); // Delete the two extra characters at the end of the line. + strRet += "]"; + return strRet; +} + +/// +/// @ingroup domi_common +/// @brief Obtains the absolute time (timestamp) of the current system. +/// @return Timestamp, in microseconds (US) +/// +/// +uint64_t GetCurrentTimestap(); + +/// +/// @ingroup domi_common +/// @brief Check whether the product of two int64 numbers exceeds the int64 range. +/// @param [in] a +/// @param [in] b +/// @return false: true: The result is within the normal int64 range. +/// +bool CheckInt64MulOverflow(int64_t a, int64_t b); + +/// +/// @ingroup domi_common +/// @brief Absolute path for obtaining files. +/// @param [in] path of input file +/// @param [out] Absolute path of a file. If the absolute path cannot be obtained, an empty string is returned +/// +std::string RealPath(const char *path); + +/// +/// @ingroup domi_common +/// @brief Check whether the specified input file path is valid. +/// 1. The specified path cannot be empty. +/// 2. The path can be converted to an absolute path. +/// 3. The file path exists and is readable. +/// @param [in] file_path path of input file +/// @param [out] result +/// +bool CheckInputPathValid(const std::string &file_path, const std::string &atc_param = ""); + +/// +/// @ingroup domi_common +/// @brief Checks whether the specified output file path is valid. +/// @param [in] file_path path of output file +/// @param [out] result +/// +bool CheckOutputPathValid(const std::string &file_path, const std::string &atc_param = ""); + +/// +/// @ingroup domi_common +/// @brief Check whether the file path meets the whitelist verification requirements. +/// @param [in] filePath file path +/// @param [out] result +/// +bool ValidateStr(const std::string &filePath, const std::string &mode); +} // namespace ge + +#endif // INC_FRAMEWORK_COMMON_UTIL_H_ diff --git a/inc/framework/engine/dnnengine.h b/inc/framework/engine/dnnengine.h new file mode 100644 index 000000000..65897ac5d --- /dev/null +++ b/inc/framework/engine/dnnengine.h @@ -0,0 +1,56 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_ENGINE_DNNENGINE_H_ +#define INC_FRAMEWORK_ENGINE_DNNENGINE_H_ + +#include +#include +#include + +#include "common/ge_inner_error_codes.h" +#include "common/ge_types.h" +#include "graph/types.h" + +namespace ge { +enum PriorityEnum { + COST_0 = 0, + COST_1, + COST_2, + COST_9 = 9, + COST_10 = 10, +}; + +struct DNNEngineAttribute { + std::string engine_name; + std::vector mem_type; + uint32_t compute_cost; + enum RuntimeType runtime_type; // HOST, DEVICE + // If engine input format must be specific, set this attribute, else set FORMAT_RESERVED + Format engine_input_format; + Format engine_output_format; +}; + +class DNNEngine { + public: + virtual ~DNNEngine() = default; + virtual Status Initialize(const std::map &options) = 0; + virtual Status Finalize() = 0; + virtual void GetAttributes(DNNEngineAttribute &attr) const = 0; +}; +} // namespace ge + +#endif // INC_FRAMEWORK_ENGINE_DNNENGINE_H_ diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h new file mode 100644 index 000000000..613152e9e --- /dev/null +++ b/inc/framework/executor/ge_executor.h @@ -0,0 +1,275 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_EXECUTOR_GE_EXECUTOR_H_ +#define INC_FRAMEWORK_EXECUTOR_GE_EXECUTOR_H_ + +#include +#include +#include + +#include "common/dynamic_aipp.h" +#include "common/ge_inner_error_codes.h" +#include "common/ge_types.h" +#include "common/types.h" +#include "graph/tensor.h" +#include "graph/ge_tensor.h" +#include "runtime/base.h" + +namespace ge { +class ModelListenerAdapter; + +class SingleOp; +class DynamicSingleOp; + +struct RunModelData { + uint32_t index; // Data index + uint32_t modelId; + std::vector blobs; // All input/output data buffer + uint32_t timestamp; // Data creation time + uint32_t timeout; // Processing timeout + uint64_t request_id = 0; // Request ID + uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0 + uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0 + uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0 + std::vector dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty +}; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { + public: + GeExecutor(); + ~GeExecutor() = default; + ge::Status Initialize(); + ge::Status Finalize(); + + // Load model + ge::Status LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key, int32_t priority, + std::shared_ptr listener); + + ge::Status UnloadModel(uint32_t modelId); + + ge::Status RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data); + + // Get input and output descriptor + ge::Status GetModelDescInfo(uint32_t model_id, std::vector &input_desc, + std::vector &output_desc, bool new_model_desc = false); + + /// + /// @ingroup ge + /// @brief Set dynamic batch size + /// @param [in] model_id: model id allocate from manager + /// @param [in] dynamic_input_addr: dynamic input addr created by user + /// @param [in] length: length of dynamic input addr + /// @param [in] batch_size: batch size entered by user in dynamic multi-batch scenario + /// @return execute result + /// + ge::Status SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t batch_size); + + /// + /// @ingroup ge + /// @brief Set dynamic image info + /// @param [in] model_id: model id allocate from manager + /// @param [in] dynamic_input_addr: dynamic input addr created by user + /// @param [in] length: length of dynamic input addr + /// @param [in] image_height: image height entered by user in dynamic multi-resolution scenario + /// @param [in] image_width: image width entered by user in dynamic multi-resolution scenario + /// @return execute result + /// + ge::Status SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height, + uint64_t image_width); + + /// + /// @ingroup ge + /// @brief Set dynamic dims info + /// @param [in] model_id: model id allocate from manager + /// @param [in] dynamic_input_addr: dynamic input addr created by user + /// @param [in] length: length of dynamic input addr + /// @param [in] dynamic_dim_num: number of dynamic dimension + /// @param [in] dynamic_dims: array of dynamic dimensions + /// @return execute result + /// + ge::Status SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length, + const std::vector &dynamic_dims); + + /// + /// @ingroup ge + /// @brief Get current dynamic dims info by combined dims + /// @param [in] model_id: model id allocate from manager + /// @param [in] combined_dims: array of combined dimensions + /// @param [out] cur_dynamic_dims: current dynamic dims + /// @return execute result + /// + ge::Status GetCurDynamicDims(uint32_t model_id, const std::vector &combined_dims, + std::vector &cur_dynamic_dims); + + /// + /// @ingroup ge + /// @brief Get dynamic batch_info + /// @param [in] model_id + /// @param [out] batch_info + /// @param [out] dynamic_type + /// @return execute result + /// + ge::Status GetDynamicBatchInfo(uint32_t model_id, std::vector> &batch_info, + int32_t &dynamic_type); + + /// + /// @ingroup ge + /// @brief Get combined dynamic dims info + /// @param [in] model_id + /// @param [out] batch_info + /// @return execute result + /// + ge::Status GetCombinedDynamicDims(uint32_t model_id, std::vector> &batch_info); + + ge::Status GetCurShape(const uint32_t model_id, std::vector &batch_info, int32_t &dynamic_type); + + /// + /// @ingroup ge + /// @brief Set dynamic image info + /// @param [in] model_id: model id allocate from manager + /// @param [in] dynamic_input_addr: dynamic input addr created by user + /// @param [in] length: length of dynamic input addr + /// @param [in] aippBatchPara: kAippDynamicBatchPara vector by user in dynamic aipp + /// @param [in] aippParms: kAippDynamicPara by user in dynamic aipp + /// @return execute result + /// + ge::Status SetDynamicAippData(uint32_t model_id, void *dynamic_input_addr, uint64_t length, + const std::vector &aippBatchPara, + const kAippDynamicPara &aippParms); + + ge::Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); + ge::Status GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info); + + ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector &input_desc, + std::vector &output_desc); + + ge::Status LoadModel(uint32_t &model_id, const ge::ModelData &model_data, + std::shared_ptr listener); + + ge::Status CommandHandle(const ge::Command &command); + + /// + /// @ingroup ge + /// @brief Query model memory consuming interface + /// @param [in] model_id Offline model ID + /// @param [out] max_size Memory size + /// @return SUCCESS + /// @return FAILED + /// + ge::Status GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size); + + /// + /// @ingroup ge + /// @brief Load data from model file to memory + /// @param [in] const std::string &path: Offline model file path + /// @param [out] ModelData &model_data: Offline model memory data + /// @return SUCCESS handle successfully / others handle failed + /// + ge::Status LoadDataFromFile(const std::string &path, ge::ModelData &model_data); + + /// + /// @ingroup ge + /// @brief Load model from offline model memory data + /// @param [in] ModelData &model_data: Offline model data + /// @param [in] void *dev_ptr: Input/Output memory address + /// @param [in] size_t mem_size: Input/Output memory length + /// @param [in] void *weight_ptr: Weight memory address + /// @param [in] size_t weight_size: Weight memory length + /// @param [out] uint32_t &model_id: Corresponding identification after model loading + /// @return SUCCESS handle successfully / others handle failed + /// + ge::Status LoadModelFromData(uint32_t &model_id, const ge::ModelData &model_data, void *dev_ptr, size_t mem_size, + void *weight_ptr, size_t weight_size); + + /// + /// @ingroup ge + /// @brief Load task list from ModelData with queue. + /// @param [out] model_id: model id allocate from manager. + /// @param [in] model_data: Model data load from offline model. + /// @param [in] input_queue_ids: input queue ids create from user. + /// @param [in] output_queue_ids: input queue ids create from user. + /// @return: 0 for success / others for fail + /// + ge::Status LoadModelWithQ(uint32_t &model_id, const ge::ModelData &model_data, + const std::vector &input_queue_ids, + const std::vector &output_queue_ids); + + /// + /// @ingroup ge + /// @brief Synchronous execution of offline model(Do not create thread) + /// @param [in] uint32_t model_id: Model ID to execute + /// @param [in] void* stream: stream to execute + /// @param [in] bool async_mode: is asynchronize mode. + /// @param [in] const domi::InputData *input_data: Model input data + /// @param [out] domi::OutputData *output_data: Model output data + /// @return SUCCESS handle successfully / others handle failed + /// + ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &input_data, + ge::RunModelData &output_data, bool async_mode = false); + + /// + /// @ingroup ge + /// @brief Get weight memory size from model file + /// @param [in] const std::string &path: Offline model file path + /// @param [out] size_t &mem_size Execution memory size + /// @param [out] size_t &weight_size Weight memory space size + /// @return SUCCESS handle successfully / others handle failed + /// + ge::Status GetMemAndWeightSize(const std::string &path, size_t &mem_size, size_t &weight_size); + + /// + /// @ingroup ge + /// @brief Get weight memory size from model file + /// @param [in] const void *model_data Offline model buffer + /// @param [in] size_t model_size Offline model buffer length + /// @param [out] size_t &mem_size Execution memory size + /// @param [out] size_t &weight_size Weight memory space size + /// @return SUCCESS handle successfully / others handle failed + /// + ge::Status GetMemAndWeightSize(const void *model_data, size_t model_size, size_t &mem_size, size_t &weight_size); + + static ge::Status LoadSingleOp(const std::string &modelName, const ge::ModelData &modelData, void *stream, + SingleOp **single_op); + + static ge::Status ExecuteAsync(SingleOp *executor, const std::vector &inputs, + std::vector &outputs); + + static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, + DynamicSingleOp **single_op); + + static ge::Status ExecuteAsync(DynamicSingleOp *executor, + const std::vector &input_desc, + const std::vector &inputs, + std::vector &output_desc, + std::vector &outputs); + + static ge::Status ReleaseSingleOpResource(void *stream); + + ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count); + ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); + ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector &input_dims, + std::vector &output_dims); + ge::Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info); + + private: + static bool isInit_; +}; + +ge::Status ModelInfoParser(const ge::ModelData &model, ge::ModelInfo &model_info); +} // namespace ge + +#endif // INC_FRAMEWORK_EXECUTOR_GE_EXECUTOR_H_ diff --git a/inc/framework/ge_runtime/davinci_model.h b/inc/framework/ge_runtime/davinci_model.h new file mode 100644 index 000000000..8b6ca978f --- /dev/null +++ b/inc/framework/ge_runtime/davinci_model.h @@ -0,0 +1,113 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_GE_RUNTIME_DAVINCI_MODEL_H_ +#define INC_FRAMEWORK_GE_RUNTIME_DAVINCI_MODEL_H_ + +#include +#include + +#include "ge_runtime/op_info.h" +#include "ge_runtime/task_info.h" + +namespace ge { +namespace model_runner { +class DavinciModel { + public: + DavinciModel(const std::vector> &task_info_list, /*lint !e151*/ + const std::vector> &data_info_list, + const std::vector> &output_info_list, /*lint !e151*/ + const std::vector> &constant_info_list, /*lint !e1049*/ + const std::vector &variable_info_list, + const std::vector &wait_active_stream_list, + const std::vector &force_copy_stream_list, uint64_t mem_size = 0, uint64_t weight_size = 0, + uint64_t var_size = 0, uintptr_t logic_mem_base = 0, uintptr_t logic_weight_base = 0, + uintptr_t logic_var_base = 0, uint32_t stream_num = 0, uint32_t batch_num = 0, uint32_t event_num = 0, + int32_t priority = 0) + : task_info_list_(task_info_list), + data_info_list_(data_info_list), + output_info_list_(output_info_list), + constant_info_list_(constant_info_list), + variable_info_list_(variable_info_list), + wait_active_stream_list_(wait_active_stream_list), + force_copy_stream_list_(force_copy_stream_list), + mem_size_(mem_size), + weight_size_(weight_size), + var_size_(var_size), + logic_mem_base_(logic_mem_base), + logic_weight_base_(logic_weight_base), + logic_var_base_(logic_var_base), + stream_num_(stream_num), + batch_num_(batch_num), + event_num_(event_num), + priority_(priority) {} + ~DavinciModel() {} + + uint64_t GetMemSize() const { return mem_size_; } + uint64_t GetWeightSize() const { return weight_size_; } + uint64_t GetVarSize() const { return var_size_; } + + uintptr_t GetLogicMemBase() const { return logic_mem_base_; } + uintptr_t GetLogicWeightBase() const { return logic_weight_base_; } + uintptr_t GetLogicVarBase() const { return logic_var_base_; } + + uint32_t GetStreamNum() const { return stream_num_; } + uint32_t GetBatchNum() const { return batch_num_; } + uint32_t GetEventNum() const { return event_num_; } + + const std::vector &GetWaitActiveStreams() const { return wait_active_stream_list_; } /*lint !e1413*/ + const std::vector &GetForceCopyStreams() const { return force_copy_stream_list_; } /*lint !e1413*/ + + int32_t GetPriority() const { return priority_; } + + const std::vector> &GetTaskInfoList() const { return task_info_list_; } /*lint !e151*/ + const std::vector> &GetDataInfoList() const { return data_info_list_; } + const std::vector> &GetOutputInfoList() const { return output_info_list_; } + const std::vector> &GetConstantInfoList() const { return output_info_list_; } + const std::vector &GetVariableInfoList() const { return variable_info_list_; } + + private: + std::vector> task_info_list_; + std::vector> data_info_list_; /*lint !e151*/ + std::vector> output_info_list_; + std::vector> constant_info_list_; + std::vector variable_info_list_; + + std::vector wait_active_stream_list_; + std::vector force_copy_stream_list_; + + uint64_t mem_size_; + uint64_t weight_size_; + uint64_t var_size_; + + uintptr_t logic_mem_base_; + uintptr_t logic_weight_base_; + uintptr_t logic_var_base_; + + uint32_t stream_num_; + uint32_t batch_num_; + uint32_t event_num_; + + int32_t priority_; + + // Disable to copy constructor and assignment operator + DavinciModel &operator=(const DavinciModel &) = delete; + DavinciModel(const DavinciModel &) = delete; +}; +} // namespace model_runner +} // namespace ge + +#endif // INC_FRAMEWORK_GE_RUNTIME_DAVINCI_MODEL_H_ diff --git a/inc/framework/ge_runtime/model_runner.h b/inc/framework/ge_runtime/model_runner.h new file mode 100644 index 000000000..a5256af76 --- /dev/null +++ b/inc/framework/ge_runtime/model_runner.h @@ -0,0 +1,71 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_GE_RUNTIME_MODEL_RUNNER_H_ +#define INC_FRAMEWORK_GE_RUNTIME_MODEL_RUNNER_H_ + +#include +#include +#include + +#include "common/ge_inner_error_codes.h" +#include "common/ge_types.h" +#include "ge_runtime/davinci_model.h" + +namespace ge { +namespace model_runner { +class RuntimeModel; +using RuntimeInfo = std::tuple; +class ModelRunner { + public: + static ModelRunner &Instance(); + + bool LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint32_t model_id, + std::shared_ptr davinci_model, std::shared_ptr listener); + + bool DistributeTask(uint32_t model_id); + + bool LoadModelComplete(uint32_t model_id); + + const std::vector &GetTaskIdList(uint32_t model_id) const; + + const std::vector &GetStreamIdList(uint32_t model_id) const; + + const std::map> &GetRuntimeInfoMap(uint32_t model_id) const; + + void *GetModelHandle(uint32_t model_id) const; + + bool UnloadModel(uint32_t model_id); + + bool RunModel(uint32_t model_id, const InputData &input_data, OutputData *output_data); + + bool GetInputOutputDescInfo(uint32_t model_id, + bool zero_copy, + std::vector *input_desc, + std::vector *output_desc, + std::vector *input_format, + std::vector *output_format); + + private: + ModelRunner() = default; + ~ModelRunner() = default; + + std::unordered_map> runtime_models_; +}; +} // namespace model_runner +} // namespace ge + +#endif // INC_FRAMEWORK_GE_RUNTIME_MODEL_RUNNER_H_ diff --git a/inc/framework/ge_runtime/op_info.h b/inc/framework/ge_runtime/op_info.h new file mode 100644 index 000000000..22c16ed69 --- /dev/null +++ b/inc/framework/ge_runtime/op_info.h @@ -0,0 +1,72 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_GE_RUNTIME_OP_INFO_H_ +#define INC_FRAMEWORK_GE_RUNTIME_OP_INFO_H_ + +#include +#include +#include + +namespace ge { +namespace model_runner { +struct TensorInfo { + int64_t GetShapeSize() const { + int64_t res = 1; + if (dims.empty()) { + return 0; + } + for (auto dim : dims) { + res *= dim; + } + return res; + } + + int64_t GetDim(uint32_t index) { + if (index >= dims.size()) { + return 0; + } + return dims[index]; + } + + std::vector dims; + uint32_t datatype; + uint32_t format; + uint32_t real_dim_cnt; + uint32_t size; + bool is_output; +}; + +struct OpInfo { + uint32_t index; + std::string name; + std::string type; + bool var_is_broadcast; + std::vector input_addrs; + std::vector output_addrs; + std::vector input_tensors; + std::vector output_tensors; + std::vector weight_tensors; + std::vector src_name; + std::vector src_index; + std::string weight_data; +}; + +using TensorInfoPtr = std::shared_ptr; +using OpInfoPtr = std::shared_ptr; +} // namespace model_runner +} // namespace ge +#endif // INC_FRAMEWORK_GE_RUNTIME_OP_INFO_H_ diff --git a/inc/framework/ge_runtime/task_info.h b/inc/framework/ge_runtime/task_info.h new file mode 100644 index 000000000..861192198 --- /dev/null +++ b/inc/framework/ge_runtime/task_info.h @@ -0,0 +1,419 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_GE_RUNTIME_TASK_INFO_H_ +#define INC_FRAMEWORK_GE_RUNTIME_TASK_INFO_H_ + +#include +#include +#include +#include +#include +#include + +#include "cce/taskdown_api.h" + +namespace ge { +namespace model_runner { +enum TaskInfoType { + CCE = 0, + TBE, + AICPU, + LABEL_SET, + LABEL_SWITCH, + LABEL_GOTO, + EVENT_RECORD, + EVENT_WAIT, + FUSION_START, + FUSION_END, + HCCL, + PROFILER_TRACE, + MEMCPY_ASYNC, + STREAM_SWITCH, + STREAM_ACTIVE, + // Insert new task type here + REVSERVED = 23 +}; + +class TaskInfo { + public: + virtual ~TaskInfo() {} + uint32_t stream_id() const { return stream_id_; } + TaskInfoType type() const { return type_; } + std::string op_name() const { return op_name_; } + bool dump_flag() const { return dump_flag_; } + + protected: + TaskInfo(const std::string &op_name, uint32_t stream_id, TaskInfoType type, bool dump_flag) + : op_name_(op_name), stream_id_(stream_id), type_(type), dump_flag_(dump_flag) {} + + private: + std::string op_name_; + uint32_t stream_id_; + TaskInfoType type_; + bool dump_flag_; +}; + +class CceTaskInfo : public TaskInfo { + public: + CceTaskInfo(const std::string &op_name, uint32_t stream_id, const cce::ccOpContext &ctx, const std::string &stub_func, + uint32_t block_dim, const std::vector &args, uint32_t args_size, + const std::vector &sm_desc, const std::vector &flow_table, + const std::vector &args_offset, bool is_flowtable) + : TaskInfo(op_name, stream_id, TaskInfoType::CCE, false), + ctx_(ctx), + stub_func_(stub_func), + block_dim_(block_dim), + args_(args), + args_size_(args_size), + sm_desc_(sm_desc), + flow_table_(flow_table), + args_offset_(args_offset), + is_flowtable_(is_flowtable) {} + ~CceTaskInfo() override {} + + cce::ccOpContext cc_context() const { return ctx_; } + std::string stub_func() const { return stub_func_; } + uint32_t block_dim() const { return block_dim_; } + const std::vector &args() const { return args_; } + uint32_t args_size() const { return args_size_; } + const std::vector &sm_desc() const { return sm_desc_; } + const std::vector &flow_table() const { return flow_table_; } + const std::vector &args_offset() const { return args_offset_; } + bool is_flowtable() const { return is_flowtable_; } + + private: + cce::ccOpContext ctx_; + std::string stub_func_; + uint32_t block_dim_; + std::vector args_; + uint32_t args_size_; + std::vector sm_desc_; + std::vector flow_table_; + std::vector args_offset_; + bool is_flowtable_; +}; + +class TbeTaskInfo : public TaskInfo { + public: + TbeTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string &stub_func, uint32_t block_dim, + const std::vector &args, uint32_t args_size, const std::vector &sm_desc, void *binary, + uint32_t binary_size, const std::vector &meta_data, const std::vector &input_data_addrs, + const std::vector &output_data_addrs, const std::vector &workspace_addrs, bool dump_flag) + : TaskInfo(op_name, stream_id, TaskInfoType::TBE, dump_flag), + stub_func_(stub_func), + block_dim_(block_dim), + args_(args), + args_size_(args_size), + sm_desc_(sm_desc), + binary_(binary), + binary_size_(binary_size), + meta_data_(meta_data), + input_data_addrs_(input_data_addrs), + output_data_addrs_(output_data_addrs), + workspace_addrs_(workspace_addrs) {} + ~TbeTaskInfo() override {} + + const std::string &stub_func() const { return stub_func_; } + uint32_t block_dim() const { return block_dim_; } + const std::vector &args() const { return args_; } + uint32_t args_size() const { return args_size_; } + const std::vector &sm_desc() const { return sm_desc_; } + void *binary() const { return binary_; } + uint32_t binary_size() const { return binary_size_; } + const std::vector &meta_data() const { return meta_data_; } + const std::vector &input_data_addrs() const { return input_data_addrs_; } + const std::vector &output_data_addrs() const { return output_data_addrs_; } + const std::vector &workspace_addrs() const { return workspace_addrs_; } + + void SetBinary(void *binary, uint32_t binary_size) { + binary_ = binary; + binary_size_ = binary_size; + } + + private: + std::string stub_func_; + uint32_t block_dim_; + std::vector args_; + uint32_t args_size_; + std::vector sm_desc_; + void *binary_; + uint32_t binary_size_; + std::vector meta_data_; + std::vector input_data_addrs_; + std::vector output_data_addrs_; + std::vector workspace_addrs_; +}; + +class AicpuTaskInfo : public TaskInfo { + public: + AicpuTaskInfo(const std::string &op_name, uint32_t stream_id, const string &so_name, const std::string &kernel_name, + const std::string &node_def, const std::vector &input_data_addrs, + const std::vector &output_data_addrs, bool dump_flag) + : TaskInfo(op_name, stream_id, TaskInfoType::AICPU, dump_flag), + so_name_(so_name), + kernel_name_(kernel_name), + node_def_(node_def), + input_data_addrs_(input_data_addrs), + output_data_addrs_(output_data_addrs) {} + ~AicpuTaskInfo() override {} + + const std::string &so_name() const { return so_name_; } + const std::string &kernel_name() const { return kernel_name_; } + const std::string &node_def() const { return node_def_; } + const std::vector &input_data_addrs() const { return input_data_addrs_; } + const std::vector &output_data_addrs() const { return output_data_addrs_; } + + private: + std::string so_name_; + std::string kernel_name_; + std::string node_def_; + std::vector input_data_addrs_; + std::vector output_data_addrs_; +}; + +class LabelSetTaskInfo : public TaskInfo { + public: + LabelSetTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_id) + : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_SET, false), label_id_(label_id) {} + ~LabelSetTaskInfo() override {} + uint32_t label_id() const { return label_id_; } + + private: + uint32_t label_id_; +}; + +class LabelGotoTaskInfo : public TaskInfo { + public: + LabelGotoTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_id) + : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_GOTO, false), label_id_(label_id) {} + ~LabelGotoTaskInfo() override {} + uint32_t label_id() const { return label_id_; } + + private: + uint32_t label_id_; +}; + +class LabelSwitchTaskInfo : public TaskInfo { + public: + LabelSwitchTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_size, + const std::vector &label_list, void *cond) + : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_SWITCH, false), + label_size_(label_size), + label_list_(label_list), + cond_(cond) {} + ~LabelSwitchTaskInfo() override {} + uint32_t label_size() { return label_size_; }; + const std::vector &label_list() { return label_list_; }; + void *cond() { return cond_; }; + + private: + uint32_t label_size_; + std::vector label_list_; + void *cond_; +}; + +class EventTaskInfo : public TaskInfo { + public: + uint32_t event_id() const { return event_id_; } + + protected: + EventTaskInfo(const std::string &op_name, uint32_t stream_id, TaskInfoType type, uint32_t event_id) + : TaskInfo(op_name, stream_id, type, false), event_id_(event_id) {} + virtual ~EventTaskInfo() override {} + + uint32_t event_id_; +}; + +class EventRecordTaskInfo : public EventTaskInfo { + public: + EventRecordTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t event_id) + : EventTaskInfo(op_name, stream_id, TaskInfoType::EVENT_RECORD, event_id) {} + ~EventRecordTaskInfo() override {} +}; + +class EventWaitTaskInfo : public EventTaskInfo { + public: + EventWaitTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t event_id) + : EventTaskInfo(op_name, stream_id, TaskInfoType::EVENT_WAIT, event_id) {} + ~EventWaitTaskInfo() override {} +}; + +class FusionStartTaskInfo : public TaskInfo { + public: + explicit FusionStartTaskInfo(const std::string &op_name, uint32_t stream_id) + : TaskInfo(op_name, stream_id, TaskInfoType::FUSION_START, false) {} + ~FusionStartTaskInfo() override {} +}; + +class FusionEndTaskInfo : public TaskInfo { + public: + explicit FusionEndTaskInfo(const std::string &op_name, uint32_t stream_id) + : TaskInfo(op_name, stream_id, TaskInfoType::FUSION_END, false) {} + ~FusionEndTaskInfo() override {} +}; + +class HcclTaskInfo : public TaskInfo { + public: + HcclTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string hccl_type, void *input_data_addr, + void *output_data_addr, void *workspace_addr, int64_t workspace_size, int64_t hccl_stream_num, + const std::vector &private_def, void *ops_kernel_store, int32_t count, int64_t root_id, + int64_t op_type, int64_t data_type, const std::string &group, + std::function hcom_bind_model, std::function hcom_unbind_model, + std::function, void *)> hcom_distribute_task, bool dump_flag) + : TaskInfo(op_name, stream_id, TaskInfoType::HCCL, dump_flag), + hccl_type_(hccl_type), + input_data_addr_(input_data_addr), + output_data_addr_(output_data_addr), + workspace_addr_(workspace_addr), + workspace_size_(workspace_size), + hccl_stream_num_(hccl_stream_num), + private_def_(private_def), + ops_kernel_store_(ops_kernel_store), + count_(count), + root_id_(root_id), + op_type_(op_type), + data_type_(data_type), + group_(group), + hcom_bind_model_(hcom_bind_model), + hcom_unbind_model_(hcom_unbind_model), + hcom_distribute_task_(hcom_distribute_task) {} + ~HcclTaskInfo() override {} + + const std::string &hccl_type() const { return hccl_type_; } /*lint !e1413*/ + void *input_data_addr() const { return input_data_addr_; } + void *output_data_addr() const { return output_data_addr_; } + void *workspace_addr() const { return workspace_addr_; } + int64_t workspace_size() const { return workspace_size_; } + int64_t hccl_stream_num() const { return hccl_stream_num_; } + const std::vector &private_def() const { return private_def_; } /*lint !e1413*/ + void *ops_kernel_store() const { return ops_kernel_store_; } + int32_t count() const { return count_; } + int64_t root_id() const { return root_id_; } + int64_t op_type() const { return op_type_; } + int64_t data_type() const { return data_type_; } + const std::string group() const { return group_; } + std::function hcom_bind_model() const { return hcom_bind_model_; } + std::function hcom_unbind_model() const { return hcom_unbind_model_; } + std::function, void *)> hcom_distribute_task() const { + return hcom_distribute_task_; + } + + private: + std::string hccl_type_; + void *input_data_addr_; + void *output_data_addr_; + void *workspace_addr_; + int64_t workspace_size_; + int64_t hccl_stream_num_; + std::vector private_def_; + void *ops_kernel_store_; + int32_t count_; + int64_t root_id_; + int64_t op_type_; + int64_t data_type_; + std::string group_; + std::function hcom_bind_model_; + std::function hcom_unbind_model_; + std::function, void *)> hcom_distribute_task_; +}; + +class ProfilerTraceTaskInfo : public TaskInfo { + public: + ProfilerTraceTaskInfo(const std::string &op_name, uint32_t stream_id, uint64_t log_id, bool notify, uint32_t flat) + : TaskInfo(op_name, stream_id, TaskInfoType::PROFILER_TRACE, false), + log_id_(log_id), + notify_(notify), + flat_(flat) {} + ~ProfilerTraceTaskInfo() override {} + + uint64_t log_id() const { return log_id_; } + bool notify() const { return notify_; } + uint32_t flat() const { return flat_; } + + private: + uint64_t log_id_; + bool notify_; + uint32_t flat_; +}; + +class MemcpyAsyncTaskInfo : public TaskInfo { + public: + MemcpyAsyncTaskInfo(const std::string &op_name, uint32_t stream_id, void *dst, uint64_t dst_max, void *src, + uint64_t count, uint32_t kind, bool dump_flag) + : TaskInfo(op_name, stream_id, TaskInfoType::MEMCPY_ASYNC, dump_flag), + dst_(dst), + dst_max_(dst_max), + src_(src), + count_(count), + kind_(kind) {} + ~MemcpyAsyncTaskInfo() override {} + + void *dst() const { return dst_; } + uint64_t dst_max() const { return dst_max_; } + void *src() const { return src_; } + uint64_t count() const { return count_; } + uint32_t kind() const { return kind_; } + + private: + void *dst_; + uint64_t dst_max_; + void *src_; + uint64_t count_; + int32_t kind_; +}; + +class StreamSwitchTaskInfo : public TaskInfo { + public: + StreamSwitchTaskInfo(const std::string &op_name, uint32_t stream_id, int64_t true_stream_id, void *input_addr, + void *value_addr, int64_t cond, int64_t data_type) + : TaskInfo(op_name, stream_id, TaskInfoType::STREAM_SWITCH, false), + true_stream_id_(true_stream_id), + input_addr_(input_addr), + value_addr_(value_addr), + cond_(cond), + data_type_(data_type) {} + ~StreamSwitchTaskInfo() override {} + + int64_t true_stream_id() const { return true_stream_id_; } + void *input_addr() const { return input_addr_; } + void *value_addr() const { return value_addr_; } + int64_t cond() const { return cond_; } + int64_t data_type() const { return data_type_; } + + private: + int64_t true_stream_id_; + void *input_addr_; + void *value_addr_; + int64_t cond_; + int64_t data_type_; +}; + +class StreamActiveTaskInfo : public TaskInfo { + public: + StreamActiveTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t active_stream_id) + : TaskInfo(op_name, stream_id, TaskInfoType::STREAM_ACTIVE, false), active_stream_id_(active_stream_id) {} + ~StreamActiveTaskInfo() override {} + + uint32_t active_stream_id() const { return active_stream_id_; } + + private: + uint32_t active_stream_id_; +}; +} // namespace model_runner +} // namespace ge + +#endif // INC_FRAMEWORK_GE_RUNTIME_TASK_INFO_H_ diff --git a/inc/framework/generator/ge_generator.h b/inc/framework/generator/ge_generator.h new file mode 100644 index 000000000..931dfccec --- /dev/null +++ b/inc/framework/generator/ge_generator.h @@ -0,0 +1,95 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_GENERATOR_GE_GENERATOR_H_ +#define INC_FRAMEWORK_GENERATOR_GE_GENERATOR_H_ + +#include +#include +#include +#include +#include "ge/ge_ir_build.h" +#include "common/ge_inner_error_codes.h" +#include "common/ge_types.h" +#include "graph/ge_tensor.h" +#include "graph/graph.h" +#include "graph/op_desc.h" +#include "graph/detail/attributes_holder.h" + +namespace ge { +class GeGenerator { + public: + static GeGenerator &GetInstance() { + static GeGenerator Instance; + return Instance; + } + GeGenerator() = default; + + ~GeGenerator() { (void)Finalize(); } + + GeGenerator(const GeGenerator &) = delete; + + GeGenerator &operator=(const GeGenerator &) = delete; + + Status Initialize(const std::map &options); + + Status Finalize(); + + Status GenerateOfflineModel(const Graph &graph, const std::string &file_name_prefix, + const std::vector &inputs = std::vector()); + + Status GenerateOnlineModel(const Graph &graph, const vector &inputs, ge::ModelBufferData& model); + + Status GenerateInfershapeGraph(const Graph &graph); + + /// + /// @ingroup ge + /// @brief: Build single OP in Model. + /// @param [in] op_desc: the OP description. + /// @param [in] inputs: input tensors. + /// @param [in] outputs: output tensors. + /// @param [in] model_file_name: name of model file. + /// @return SUCCESS or FAILED + /// + Status BuildSingleOpModel(OpDescPtr &op_desc, const std::vector &inputs, + const std::vector &outputs, const std::string &model_file_name); + /// + /// @ingroup ge + /// @brief: Build single Op into model buff. + /// @param [in] op_desc: the OP description. + /// @param [in] inputs: input tensors. + /// @param [in] outputs: output tensors. + /// @param [in] engine_type: specific engine. + /// @param [out] model_buff: model buff of single op. + /// @return SUCCESS or FAILED + Status BuildSingleOpModel(OpDescPtr &op_desc, const vector &inputs, + const vector &outputs, OpEngineType engine_type, + ModelBufferData &model_buff); + + private: + Status GenerateModel(const Graph &graph, const string &file_name_prefix, + const vector &inputs, ge::ModelBufferData& model, bool is_offline = true); + Status BuildSingleOp(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, + const string &model_file_name, OpEngineType engine_type, + ModelBufferData &model_buff, bool is_offline = true); + + class Impl; + + std::shared_ptr impl_; +}; +} // namespace ge + +#endif // INC_FRAMEWORK_GENERATOR_GE_GENERATOR_H_ diff --git a/inc/framework/generator/generator_api.h b/inc/framework/generator/generator_api.h new file mode 100644 index 000000000..71c6832ea --- /dev/null +++ b/inc/framework/generator/generator_api.h @@ -0,0 +1,172 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_GENERATOR_GENERATOR_API_H_ +#define INC_FRAMEWORK_GENERATOR_GENERATOR_API_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef uint32_t Status_t; + +typedef void *OpAttr_t; +typedef void *OpTensor_t; + +/// +/// @ingroup ge +/// @brief Generate offline model for the op. +/// @param [in] op_type: type name of the op. +/// @param [in] in_tensor: input description array (created by OpTensorCreate). +/// @param [in] in_num: number of in_tensor. +/// @param [in] out_tensor: output description array (created by OpTensorCreate). +/// @param [in] out_num: number of out_tensor. +/// @param [in] attr: the attributes of the op (created by OpAttrCreate). +/// @param [in] om_file: file name for the om to save. +/// @return 0 for success / others for fail +/// +extern Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int in_num, + const OpTensor_t *out_tensor, int out_num, const OpAttr_t attr, const char *om_file); + +/// +/// @ingroup ge +/// @brief Create Tensor Description. +/// @param [in] format: tensor format of the data. +/// @param [in] datatype: tensor type of the data. +/// @param [in] shape: tensor shape array. +/// @param [in] num: number of shape. +/// @return OpTensor_t for success / nullptr for failure +/// +extern OpTensor_t OpTensorCreate(int format, int datatype, const int64_t *shape, int num); + +/// +/// @ingroup ge +/// @brief Destroy Tensor Description. +/// @param [in] OpTensor_t tensor: created by OpTensorCreate. +/// @param [out] none +/// @return 0 for success / others for failure. +/// +extern Status_t OpTensorDestroy(OpTensor_t tensor); + +/// +/// @ingroup ge +/// @brief Create an attribute holder. +/// @param [in] none +/// @param [out] none +/// @return OpAttr_t for success / nullptr for failure. +/// +extern OpAttr_t OpAttrCreate(); + +/// +/// @ingroup ge +/// @brief Destroy Attribute holder. +/// @param [in] OpAttr_t attr: created by OpAttrCreate. +/// @param [out] none +/// @return 0 for success / others for failure. +/// +extern Status_t OpAttrDestroy(OpAttr_t attr); + +/// +/// @ingroup ge +/// @brief Set a boolean attribute to the attribute holder. +/// @param [in] attr: attribute holder (created by OpAttrCreate). +/// @param [in] name: attribute name (can`t be nullptr, end with '\0'). +/// @param [in] value: attributed value. +/// @return 0 for success / others for failure. +/// +extern Status_t SetAttrBool(OpAttr_t attr, const char *name, bool value); + +/// +/// @ingroup ge +/// @brief Set an integer attribute to the attribute holder. +/// @param [in] attr: attribute holder (created by OpAttrCreate). +/// @param [in] name: attribute name (can`t be nullptr, end with '\0'). +/// @param [in] value: attribute value. +/// @return 0 for success / others for failure. +/// +extern Status_t SetAttrInt(OpAttr_t attr, const char *name, int64_t value); + +/// +/// @ingroup ge +/// @brief Set a float attribute to the attribute holder. +/// @param [in] attr: attribute holder (created by OpAttrCreate). +/// @param [in] name: attribute name (can`t be nullptr, end with '\0'). +/// @param [in] value: attribute value. +/// @return 0 for success / others for failure. +/// +extern Status_t SetAttrFloat(OpAttr_t attr, const char *name, float value); + +/// +/// @ingroup ge +/// @brief Set a string attribute to the attribute holder. +/// @param [in] attr: attribute holder (created by OpAttrCreate). +/// @param [in] name: attribute name (can`t be nullptr, end with '\0'). +/// @param [in] value: attribute value (can`t be nullptr, end with '\0'). +/// @return 0 for success / others for failure. +/// +extern Status_t SetAttrString(OpAttr_t attr, const char *name, const char *value); + +/// +/// @ingroup ge +/// @brief Set a boolean array attribute to the attribute holder. +/// @param [in] attr: attribute holder (created by OpAttrCreate). +/// @param [in] name: attribute name (can`t be nullptr, end with '\0'). +/// @param [in] value: attribute value array. +/// @param [in] num: number of value array. +/// @return 0 for success / others for failure. +/// +extern Status_t SetAttrBoolList(OpAttr_t attr, const char *name, const bool *value, int num); + +/// +/// @ingroup ge +/// @brief Set an integer array attribute to the attribute holder. +/// @param [in] attr: attribute holder (created by OpAttrCreate). +/// @param [in] name: attribute name (can`t be nullptr, end with '\0'). +/// @param [in] value: attribute value array. +/// @param [in] num: number of value array. +/// @return 0 for success / others for failure. +/// +extern Status_t SetAttrIntList(OpAttr_t attr, const char *name, const int64_t *value, int num); + +/// +/// @ingroup ge +/// @brief Set a float array attribute to the attribute holder. +/// @param [in] attr: attribute holder (created by OpAttrCreate). +/// @param [in] name: attribute name (can`t be nullptr, end with '\0'). +/// @param [in] value: attribute value array. +/// @param [in] num: number of value array. +/// @return 0 for success / others for failure. +/// +extern Status_t SetAttrFloatList(OpAttr_t attr, const char *name, const float *value, int num); + +/// +/// @ingroup ge +/// @brief Set a string array attribute to the attribute holder. +/// @param [in] attr: attribute holder (created by OpAttrCreate). +/// @param [in] name: attribute name (can`t be nullptr, end with '\0'). +/// @param [in] value: attribute value array (each value can`t be nullptr, end with '\0'). +/// @param [in] num: number of value array. +/// @return 0 for success / others for failure. +/// +extern Status_t SetAttrStringList(OpAttr_t attr, const char *name, const char **value, int num); + +#ifdef __cplusplus +} +#endif + +#endif // INC_FRAMEWORK_GENERATOR_GENERATOR_API_H_ diff --git a/inc/framework/memory/memory_api.h b/inc/framework/memory/memory_api.h new file mode 100644 index 000000000..52ac682a1 --- /dev/null +++ b/inc/framework/memory/memory_api.h @@ -0,0 +1,57 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_MEMORY_MEMORY_API_H_ +#define INC_FRAMEWORK_MEMORY_MEMORY_API_H_ + +#include +#include + +#include "ge/ge_api_error_codes.h" +#include "runtime/mem.h" + +namespace ge { +enum MemStorageType{ + HBM = 0, + RDMA_HBM, + HOST_DDR, +}; + +struct HostVarInfo { + uint64_t base_addr; + uint64_t var_size; +}; + +/// +/// \param size [in] rdma pool memory size to be allocated. +/// \param mem_type [in] memory type for rdma pool. +/// \return Status result of function +Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_MEMORY_HBM); + +/// +/// \param var_info [in] host variable addr infos. +/// \param mem_type [in] memory type for rdma pool. +/// \return Status result of function +Status RdmaRemoteRegister(const std::vector &var_info, rtMemType_t mem_type = RT_MEMORY_HBM); + +/// +/// \param var_name [in] var_name name of host variable. +/// \param base_addr [out] base_addr vase addr of host variable. +/// \param var_size [out] var_size memory_size of host variable. +/// \return Status result of function +Status GetVarBaseAddrAndSize(const std::string &var_name, uint64_t &base_addr, uint64_t &var_size); +} // namespace ge +#endif // INC_FRAMEWORK_MEMORY_MEMORY_API_H_ diff --git a/inc/framework/memory/memory_assigner.h b/inc/framework/memory/memory_assigner.h new file mode 100644 index 000000000..bbec014ba --- /dev/null +++ b/inc/framework/memory/memory_assigner.h @@ -0,0 +1,42 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_MEMORY_MEMORY_ASSIGNER_H_ +#define INC_FRAMEWORK_MEMORY_MEMORY_ASSIGNER_H_ + +#include + +#include "common/ge_inner_error_codes.h" +#include "graph/node.h" + +namespace ge { +const int64_t MEM_ALIGN_SIZE = 512; +class MemoryAssigner { + public: + explicit MemoryAssigner(ge::ComputeGraphPtr compute_graph) : compute_graph_(std::move(compute_graph)) {} + virtual ~MemoryAssigner() = default; + + MemoryAssigner(const MemoryAssigner &) = delete; + + MemoryAssigner &operator=(const MemoryAssigner &) = delete; + + Status AssignMemory(bool is_loop_graph, size_t &mem_offset, size_t &zero_copy_mem_size); + + private: + ge::ComputeGraphPtr compute_graph_; +}; +} // namespace ge +#endif // INC_FRAMEWORK_MEMORY_MEMORY_ASSIGNER_H_ diff --git a/inc/framework/omg/omg.h b/inc/framework/omg/omg.h new file mode 100644 index 000000000..623f49af9 --- /dev/null +++ b/inc/framework/omg/omg.h @@ -0,0 +1,120 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_OMG_OMG_H_ +#define INC_FRAMEWORK_OMG_OMG_H_ + +#include +#include +#include +#include +#include "framework/common/types.h" +#include "framework/omg/omg_inner_types.h" +#include "framework/omg/parser/parser_inner_ctx.h" +#include "proto/ge_ir.pb.h" +#include "proto/om.pb.h" + +#include "graph/compute_graph.h" +#include "graph/graph.h" +#include "graph/model.h" +#include "runtime/kernel.h" + +using domi::Status; +using std::pair; +using std::string; +using std::unordered_map; +using std::vector; + +namespace ge { +/** + * @ingroup domi_omg + * @brief init omg context + * @return void + */ +Status InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format, + bool is_dynamic_input); + +/** + * @ingroup domi_omg + * @brief generate graph based on the input model file and weight file + * @param [out] graph graph + * @param [in] model_file path of model file + * @param [in] weights_file path of weight file + * @param [in] type type of the input model + * @param [in] op_conf op mapping configuration + * @param [in] target type of platform. If a tiny model is generated, set target to tiny + * @param [in] run_mode run model + * @param [in] enable_l2dynamic enable l2dynamic + * @param [in] is_dynamic_input dynamic input, true of false + * @param [in] atc_params multiply atc params + * @return Status result code + */ +Status ParseGraph(ge::Graph &graph, const std::map &atc_params, const char *model_file, + const char *weights_file, domi::FrameworkType type, const char *op_conf = nullptr, + const char *target = nullptr, RunMode run_mode = GEN_OM_MODEL, bool is_dynamic_input = false); + +/** + * @ingroup domi_omg + * @brief generates a simplified JSON file based on the key value of the offline model file in protobuf format + * @param [in] model_file path of offline model file + * @param [out] json_file path of json file + * @param [key] encrypted key + * @return Status result code + */ +Status ConvertOmModelToJson(const char *model_file, const char *json_file); + +Status ConvertPbtxtToJson(const char *model_file, const char *json_file); +/** + * @ingroup domi_omg + * @brief convert the model file in protobuf format into a JSON file. + * @param [in] framework type of model + * @param [in] om model_file path of offline model file + * @param [out] json_file path of json file + * @param [key] encrypted key + * @return Status result code + */ +Status ConvertFwkModelToJson(domi::FrameworkType framework, const char *model_file, const char *json_file); + +void GetGroupName(ge::proto::ModelDef &model); + +void FindParserSo(const string &path, vector &fileList, string &caffe_parser_path); + +Status CheckCustomAiCpuOpLib(); + +Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file); + +Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format); + +Status GetOutputLeaf(ge::NodePtr node, std::vector> &output_nodes_info); + +void GetOutputNodesNameAndIndex(std::vector> &output_nodes_info, + std::vector &output_nodes_name); + +void UpdateOmgCtxWithParserCtx(); + +void UpdateParserCtxWithOmgCtx(); +} // namespace ge + +namespace domi { +/** + * @ingroup domi_omg + * @brief get omg context + * @return reference of OmgContext + */ +ge::OmgContext &GetContext(); +} // namespace domi + +#endif // INC_FRAMEWORK_OMG_OMG_H_ diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h new file mode 100644 index 000000000..803612322 --- /dev/null +++ b/inc/framework/omg/omg_inner_types.h @@ -0,0 +1,144 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_OMG_OMG_INNER_TYPES_H_ +#define INC_FRAMEWORK_OMG_OMG_INNER_TYPES_H_ + +#include +#include +#include +#include +#include +#include +#include +#include "framework/common/fmk_error_codes.h" +#include "framework/common/types.h" +#include "register/register_fmk_types.h" + +using domi::DOMI_TENSOR_ND; +using domi::DOMI_TENSOR_RESERVED; +using domi::domiTensorFormat_t; +using domi::FRAMEWORK_RESERVED; +using domi::FrameworkType; +using std::map; +using std::string; +using std::unordered_map; +using std::vector; + +namespace ge { +/** + * @ingroup domi_omg + * @brief run model + */ +enum RunMode { + GEN_OM_MODEL = 0, // generate offline model file + MODEL_TO_JSON = 1, // convert to JSON file + ONLY_PRE_CHECK = 3, // only for pre-check + PBTXT_TO_JSON = 5 // pbtxt to json +}; + +/// +/// @ingroup domi_omg +/// @brief high-precision mode +/// +enum HighPrecisionMode { + // the FP16 high-precision function is disabled in common mode + HIGH_PRECISION_DEFAULT = 0, + + // high-precision mode, enabling FP16 high-precision mode (Convolution/FullConnect/AvgPooling are involved) + HIGH_PRECISION_FP16 = 1 +}; + +/// +/// @ingroup domi_omg +/// @brief description buffer data +/// +struct OMGBufferData { + void *data; + uint32_t length; +}; + +struct OmgContext { + OmgContext() { format = DOMI_TENSOR_ND; } + domiTensorFormat_t format; + + // format of the input specified by the command line + std::unordered_map input_nodes_format_map; + std::vector output_formats; + + // user-designate input dims + std::vector>> user_input_dims; + // global input dims + std::unordered_map> input_dims; + + // resolve the mapping between operators with the same name and corresponding network. format e.g. + // Detectionoutput:SsdDetectiontOutput + std::map op_conf_map; + // save the output node of the network. key = operator name, value = index, index indicates the output index of the + // operator + std::map> out_nodes_map; + // user-designate out nodes (this is used for determing the orders) + std::vector> user_out_nodes; + // net out nodes (where user_out_nodes or leaf nodes) + std::vector net_out_nodes; + // net out nodes top names(only caffe has top) + std::vector out_top_names; + // path for the aicpu custom operator so_file + std::vector aicpu_op_run_paths; + // ddk version + std::string ddk_version; + // preferential format used by the entire network + domiTensorFormat_t net_format = DOMI_TENSOR_RESERVED; + domi::FrameworkType type = domi::FRAMEWORK_RESERVED; + RunMode run_mode = ONLY_PRE_CHECK; + bool train_flag = false; + // whether to use FP16 high precision + int32_t fp16_high_precision = HIGH_PRECISION_DEFAULT; + + std::string output_type; + + // Save the name of the entire network: Some special operators are used to determine a network. Some operators in the + // network require special processing based on the specific network. e.g:faster-rcnn, the FirstStageProcessor module + // is determined as the Faster-R-CNN network based on the scope fusion. Then, the conv+reshape operators in the + // FirstStageBoxPredictor/BoxEncodingPredictor scope are combined. The convolution kernel rearrangement reshape + // operator needs to be deleted for the convolution kernel. + std::string net_name; + // Whether to use dynamic batch size or dynamic image size + bool is_dynamic_input = false; + std::string dynamic_batch_size; + std::string dynamic_image_size; + std::string dynamic_dims; +}; +} // namespace ge + +namespace domi { +/** + * @ingroup domi_omg + * @brief get OMG context + * @return OmgContext context + */ +ge::OmgContext &GetContext(); + +struct TEBinInfo { + // It is obsolete. It will be automatically obtained from the binfilename field of the JSON file later. + // To be compatible with use cases written by previous users, fields are not deleted.(2018.11.21) + std::string bin_file_path; + std::string json_file_path; + std::string ddk_version; +}; +} // namespace domi + +#endif // INC_FRAMEWORK_OMG_OMG_INNER_TYPES_H_ diff --git a/inc/framework/omg/omg_types.h b/inc/framework/omg/omg_types.h new file mode 100644 index 000000000..771a53a4c --- /dev/null +++ b/inc/framework/omg/omg_types.h @@ -0,0 +1,22 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_OMG_OMG_TYPES_H_ +#define INC_FRAMEWORK_OMG_OMG_TYPES_H_ + +#include "register/register_fmk_types.h" + +#endif // INC_FRAMEWORK_OMG_OMG_TYPES_H_ diff --git a/inc/framework/omg/parser/model_parser.h b/inc/framework/omg/parser/model_parser.h new file mode 100644 index 000000000..3a8aa6ced --- /dev/null +++ b/inc/framework/omg/parser/model_parser.h @@ -0,0 +1,111 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_OMG_PARSER_MODEL_PARSER_H_ +#define INC_FRAMEWORK_OMG_PARSER_MODEL_PARSER_H_ + +#include +#include "framework/common/types.h" +#include "framework/omg/omg_inner_types.h" +#include "graph/attr_value.h" +#include "graph/compute_graph.h" +#include "graph/ge_tensor.h" +#include "graph/graph.h" +#include "graph/op_desc.h" +#include "graph/operator.h" +#include "graph/range_vistor.h" +#include "graph/utils/attr_utils.h" +#include "graph/utils/graph_utils.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/utils/tensor_utils.h" + +using Status = domi::Status; + +namespace domi { +using GetGraphCallback = std::function( + const google::protobuf::Message *root_proto, const std::string &graph)>; +class ModelParser { + public: + ModelParser() {} + + virtual ~ModelParser() {} + + /** + * @ingroup domi_omg + * @brief Analyze network model data + * @param [in] file Network model file path + * @param [in|out] graph Save the network information after analysis + * @return SUCCESS + * @return Others failed + */ + virtual Status Parse(const char *file, ge::Graph &graph) = 0; + + /** + * @ingroup domi_omg + * @brief Parse relevant data from memory and save it to graph + * @param [in] input Model file memory data + * @param [in|out] graph A graph for saving the model information after analysis + * @return SUCCESS + * @return FAILED + * @author + */ + virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; + + /** + * @ingroup domi_omg + * @brief Analyze network model data + * @param [in] proto network model + * @param [in|out] graph Save the network information after analysis + * @return SUCCESS + * @return Others failed + */ + virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0; + + /** + * @ingroup domi_omg + * @brief Analyze callback model data in subgraph + * @param [in] proto network model + * @param [in] callback callback of subgraph + * @param [in|out] graph Save the network information after analysis + * @return SUCCESS + * @return Others failed + */ + virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, + GetGraphCallback callback, + ge::ComputeGraphPtr &graph) = 0; + /** + * @ingroup domi_omg + * @brief Convert model files to JSON format + * @param [in] model_file Model file path to be converted + * @param [out] json_file Converted JSON file path + * @return SUCCESS + * @return Others failed + */ + virtual Status ToJson(const char *model_file, const char *json_file) { return domi::SUCCESS; } + + /* + * @ingroup domi_omg + * @brief Convert network data type + * @param [in] type Data type to be converted + * @return ge::DataType + */ + virtual ge::DataType ConvertToGeDataType(const uint32_t type) = 0; + + virtual Status ParseAllGraph(const google::protobuf::Message *root_proto, ge::ComputeGraphPtr &root_graph) = 0; +}; +} // namespace domi + +#endif // INC_FRAMEWORK_OMG_PARSER_MODEL_PARSER_H_ diff --git a/inc/framework/omg/parser/op_parser.h b/inc/framework/omg/parser/op_parser.h new file mode 100644 index 000000000..251c04479 --- /dev/null +++ b/inc/framework/omg/parser/op_parser.h @@ -0,0 +1,92 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_OMG_PARSER_OP_PARSER_H_ +#define INC_FRAMEWORK_OMG_PARSER_OP_PARSER_H_ + +#include +#include "common/types.h" +#include "omg/omg_inner_types.h" +#include "proto/om.pb.h" +#include "graph/ge_tensor.h" +#include "graph/op_desc.h" +#include "graph/utils/op_desc_utils.h" + +using google::protobuf::Message; +using Status = domi::Status; + +namespace ge { +/** + * @ingroup domi_omg + * @brief Used to analyze operator information + * + */ +class OpParser { + public: + /** + * @ingroup domi_omg + * @brief Deconstructor + */ + virtual ~OpParser() {} + + /** + * @ingroup domi_omg + * @brief Analytic operator parameters + * @param [in] op_src Parameter data to be resolved + * @param [out] graph Parsed parameter data + * @return SUCCESS + * @return FAILED + */ + virtual Status ParseParams(const Message *op_src, ge::OpDescPtr &op_desc) = 0; + + /** + * @ingroup domi_omg + * @brief Analytic operator parameters + * @param [in] op_src Parameter data to be resolved + * @param [out] Operator parameter data + * @return SUCCESS + * @return FAILED + */ + virtual Status ParseParams(const Message *op_src, ge::Operator &op_dest) = 0; + + /** + * @ingroup domi_omg + * @brief Analytic operator weight information + * @param [in] op_src Weight data to be resolved + * @param [out] op_dest Weight data after analysis + * @return SUCCESS + * @return FAILED + */ + virtual Status ParseWeights(const Message *op_src, ge::NodePtr &node) = 0; + + /** + * @ingroup domi_omg + * @brief Get the format information according to the parameters in the operator + * @param [in] op_src Parameter data to be resolved + * @param [out] format Output the parsed format + * @return SUCCESS + * @return FAILED + */ + virtual Status GetFormat(const Message *op_src, domi::domiTensorFormat_t &format) { + (void)op_src; + // Indicates that the op does not provide a value for format + format = domi::DOMI_TENSOR_RESERVED; + return domi::SUCCESS; + } +}; +} // namespace ge + +#endif // INC_FRAMEWORK_OMG_PARSER_OP_PARSER_H_ diff --git a/inc/framework/omg/parser/parser_api.h b/inc/framework/omg/parser/parser_api.h new file mode 100644 index 000000000..382bdfdee --- /dev/null +++ b/inc/framework/omg/parser/parser_api.h @@ -0,0 +1,31 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_ +#define INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_ + +#include +#include +#include +#include "ge/ge_api_error_codes.h" + +namespace ge { +// Initialize parser +Status ParserInitialize(const std::map& options); +// Finalize parser, release all resources +Status ParserFinalize(); +} // namespace ge +#endif // INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_ diff --git a/inc/framework/omg/parser/parser_factory.h b/inc/framework/omg/parser/parser_factory.h new file mode 100644 index 000000000..90d441d71 --- /dev/null +++ b/inc/framework/omg/parser/parser_factory.h @@ -0,0 +1,138 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_ +#define INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_ + +#include +#include +#include +#include +#include "framework/common/types.h" +#include "framework/omg/omg_inner_types.h" + +using Status = domi::Status; + +namespace domi { +class WeightsParser; +class ModelParser; + +typedef std::shared_ptr (*MODEL_PARSER_CREATOR_FUN)(void); + +// Create modelparser for different frameworks +class ModelParserFactory { + public: + static ModelParserFactory *Instance(); + + /** + * @ingroup domi_omg + * @brief Create a modelparser based on the type entered + * @param [in] type Framework type + * @return Created modelparser + */ + std::shared_ptr CreateModelParser(const domi::FrameworkType type); + + /** + * @ingroup domi_omg + * @brief Register create function + * @param [in] type Framework type + * @param [in] fun ModelParser's create function + */ + void RegisterCreator(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN fun); + + protected: + ModelParserFactory() {} + ~ModelParserFactory(); + + private: + std::map creator_map_; +}; // end class ModelParserFactory + +class ModelParserRegisterar { + public: + ModelParserRegisterar(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN fun) { + ModelParserFactory::Instance()->RegisterCreator(type, fun); + } + ~ModelParserRegisterar() {} +}; + +// Registration macros for model parsers +#define REGISTER_MODEL_PARSER_CREATOR(type, clazz) \ + std::shared_ptr Creator_##type##_Model_Parser() { \ + std::shared_ptr ptr = nullptr; \ + try { \ + ptr = make_shared(); \ + } catch (...) { \ + ptr = nullptr; \ + } \ + return std::shared_ptr(ptr); \ + } \ + ModelParserRegisterar g_##type##_Model_Parser_Creator(type, Creator_##type##_Model_Parser) + +typedef std::shared_ptr (*WEIGHTS_PARSER_CREATOR_FUN)(void); + +// Create weightsparser for different frameworks +class WeightsParserFactory { + public: + static WeightsParserFactory *Instance(); + + /** + * @ingroup domi_omg + * @brief Create weightsparser based on the type entered + * @param [in] type Framework type + * @return Created weightsparser + */ + std::shared_ptr CreateWeightsParser(const domi::FrameworkType type); + + /** + * @ingroup domi_omg + * @brief Register create function + * @param [in] type Framework type + * @param [in] fun WeightsParser's create function + */ + void RegisterCreator(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN fun); + + protected: + WeightsParserFactory() {} + ~WeightsParserFactory(); + + private: + std::map creator_map_; +}; // end class WeightsParserFactory + +class WeightsParserRegisterar { + public: + WeightsParserRegisterar(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN fun) { + WeightsParserFactory::Instance()->RegisterCreator(type, fun); + } + ~WeightsParserRegisterar() {} +}; + +// Register macro of weight resolver +#define REGISTER_WEIGHTS_PARSER_CREATOR(type, clazz) \ + std::shared_ptr Creator_##type##_Weights_Parser() { \ + std::shared_ptr ptr = nullptr; \ + try { \ + ptr = make_shared(); \ + } catch (...) { \ + ptr = nullptr; \ + } \ + return std::shared_ptr(ptr); \ + } \ + WeightsParserRegisterar g_##type##_Weights_Parser_Creator(type, Creator_##type##_Weights_Parser) +}; // namespace domi + +#endif // INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_ diff --git a/inc/framework/omg/parser/parser_inner_ctx.h b/inc/framework/omg/parser/parser_inner_ctx.h new file mode 100644 index 000000000..53f798955 --- /dev/null +++ b/inc/framework/omg/parser/parser_inner_ctx.h @@ -0,0 +1,43 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_OMG_PARSER_PARSER_INNER_CONTEXT_H_ +#define INC_FRAMEWORK_OMG_PARSER_PARSER_INNER_CONTEXT_H_ + +#include +#include +#include +#include +#include +#include +#include "external/register/register_fmk_types.h" +#include "external/register/register_types.h" +#include "framework/omg/omg_inner_types.h" + +namespace ge { +struct ParserContext { + std::unordered_map> input_dims; + domi::domiTensorFormat_t format = domi::DOMI_TENSOR_ND; + RunMode run_mode = ONLY_PRE_CHECK; + std::string custom_proto_path; // save caffe custom proto path, used by caffe parse + std::string caffe_proto_path; // save caffe proto path, used by caffe parse + std::string enable_scope_fusion_passes; // name of the pass that needs to take effect +}; + +ParserContext &GetParserContext(); +} // namespace ge + +#endif // INC_FRAMEWORK_OMG_PARSER_PARSER_INNER_CONTEXT_H_ diff --git a/inc/framework/omg/parser/weights_parser.h b/inc/framework/omg/parser/weights_parser.h new file mode 100644 index 000000000..1b5216b38 --- /dev/null +++ b/inc/framework/omg/parser/weights_parser.h @@ -0,0 +1,74 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_OMG_PARSER_WEIGHTS_PARSER_H_ +#define INC_FRAMEWORK_OMG_PARSER_WEIGHTS_PARSER_H_ + +#include "graph/graph.h" +#include "graph/attr_value.h" +#include "graph/compute_graph.h" +#include "graph/ge_tensor.h" +#include "graph/op_desc.h" +#include "graph/operator.h" +#include "graph/range_vistor.h" +#include "graph/utils/attr_utils.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/utils/tensor_utils.h" + +namespace domi { +/** + * @ingroup domi_omg + * @brief Weight information resolver + * + */ +class WeightsParser { + public: + /** + * @ingroup domi_omg + * @brief Constructor + */ + WeightsParser() {} + + /** + * @ingroup domi_omg + * @brief Deconstructor + */ + virtual ~WeightsParser() {} + + /** + * @ingroup domi_omg + * @brief Analyze weight data + * @param [in] file Path of weight file after training + * @param [in|out] graph Graph for saving weight information after analysis + * @return SUCCESS + * @return Others failed + */ + virtual Status Parse(const char *file, ge::Graph &graph) = 0; + + /** + * @ingroup domi_omg + * @brief Parse relevant data from memory and save it to graph + * @param [in] input Model file memory data + * @param [in|out] graph A graph for saving the model information after analysis + * @return SUCCESS + * @return FAILED + * @author + */ + virtual Status ParseFromMemory(const char *input, uint32_t lengt, ge::ComputeGraphPtr &graph) = 0; +}; +} // namespace domi + +#endif // INC_FRAMEWORK_OMG_PARSER_WEIGHTS_PARSER_H_ diff --git a/inc/framework/omg/version.h b/inc/framework/omg/version.h new file mode 100644 index 000000000..ac649d83c --- /dev/null +++ b/inc/framework/omg/version.h @@ -0,0 +1,45 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_OMG_VERSION_H_ +#define INC_FRAMEWORK_OMG_VERSION_H_ + +#include +#include +#include +#include + +#include "common/debug/log.h" +#include "common/string_util.h" +#include "framework/common/debug/ge_log.h" + +namespace ge { +class PlatformVersionManager { + public: + PlatformVersionManager() = delete; + ~PlatformVersionManager() = delete; + static Status GetPlatformVersion(std::string &ver) { + ver = "1.11.z"; + std::vector version_splits = StringUtils::Split(ver, '.'); + GE_IF_BOOL_EXEC(version_splits.size() < 3, GELOGW("Read platform version error!"); return FAILED;); + + GELOGI("Read current platform version: %s.", ver.c_str()); + return SUCCESS; + } +}; // class PlatformManager +} // namespace ge + +#endif // INC_FRAMEWORK_OMG_VERSION_H_ diff --git a/inc/graph/anchor.h b/inc/graph/anchor.h new file mode 100644 index 000000000..3324b4216 --- /dev/null +++ b/inc/graph/anchor.h @@ -0,0 +1,284 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_ANCHOR_H_ +#define INC_GRAPH_ANCHOR_H_ + +#include +#include +#include +#include "graph/ge_error_codes.h" +#include "graph/range_vistor.h" +#include "graph/types.h" + +namespace ge { +enum AnchorStatus { + ANCHOR_SUSPEND = 0, // dat null + ANCHOR_CONST = 1, + ANCHOR_DATA = 2, // Effective + ANCHOR_RESERVED = 3 +}; +using std::string; +using std::vector; + +class Node; + +using NodePtr = std::shared_ptr; + +class Edge; + +using EdgePtr = std::shared_ptr; + +class Anchor; + +using AnchorPtr = std::shared_ptr; + +class DataAnchor; + +using DataAnchorPtr = std::shared_ptr; + +class InDataAnchor; + +using InDataAnchorPtr = std::shared_ptr; + +class OutDataAnchor; + +using OutDataAnchorPtr = std::shared_ptr; + +class ControlAnchor; + +using ControlAnchorPtr = std::shared_ptr; + +class InControlAnchor; + +using InControlAnchorPtr = std::shared_ptr; + +class OutControlAnchor; + +using OutControlAnchorPtr = std::shared_ptr; + +using ConstAnchor = const Anchor; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Anchor : public std::enable_shared_from_this { + friend class AnchorUtils; + + public: + using TYPE = const char *; + template + using Vistor = RangeVistor>; + + Anchor(const NodePtr& ownerNode, int idx); + + virtual ~Anchor() = default; + + protected: + // Whether the two anchor is equal + virtual bool Equal(AnchorPtr anchor) const = 0; + virtual bool IsTypeOf(TYPE type) const; + + public: + // Get all peer anchors connected to current anchor + Vistor GetPeerAnchors() const; + // Get peer anchor size + size_t GetPeerAnchorsSize() const; + // Get first peer anchor + AnchorPtr GetFirstPeerAnchor() const; + + // Get the anchor belong to which node + NodePtr GetOwnerNode() const; + + // Remove all links with the anchor + void UnlinkAll() noexcept; + + // Remove link with the given anchor + graphStatus Unlink(const AnchorPtr &peer); + + // Replace peer with new peers + graphStatus ReplacePeer(const AnchorPtr &oldPeer, const AnchorPtr &firstPeer, const AnchorPtr &secondPeer); + + // Judge if the anchor is linked with the given anchor + bool IsLinkedWith(const AnchorPtr &peer); + + // Get anchor index of the node + int GetIdx() const; + + // set anchor index of the node + void SetIdx(int index); + + protected: + // All peer anchors connected to current anchor + vector> peer_anchors_; + // The owner node of anchor + std::weak_ptr owner_node_; + // The index of current anchor + int idx_; + template + static Anchor::TYPE TypeOf() { + static_assert(std::is_base_of::value, "T must be a Anchor!"); + return __PRETTY_FUNCTION__; + } + + public: + template + static std::shared_ptr DynamicAnchorCast(AnchorPtr anchorPtr) { + static_assert(std::is_base_of::value, "T must be a Anchor!"); + if (anchorPtr == nullptr || !anchorPtr->IsTypeOf()) { + return nullptr; + } + return std::static_pointer_cast(anchorPtr); + } + + template + bool IsTypeOf() { + return IsTypeOf(TypeOf()); + } +}; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY DataAnchor : public Anchor { + friend class AnchorUtils; + + public: + explicit DataAnchor(const NodePtr &ownerNode, int idx); + + virtual ~DataAnchor() = default; + + protected: + bool IsTypeOf(TYPE type) const override; + + private: + Format format_{FORMAT_ND}; + AnchorStatus status_{ANCHOR_SUSPEND}; +}; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY InDataAnchor : public DataAnchor { + friend class OutDataAnchor; + + friend class OutControlAnchor; + + public: + explicit InDataAnchor(const NodePtr &ownerNode, int idx); + + virtual ~InDataAnchor() = default; + + // Get source out data anchor + OutDataAnchorPtr GetPeerOutAnchor() const; + + // Build connection from OutDataAnchor to InDataAnchor + graphStatus LinkFrom(const OutDataAnchorPtr &src); + + protected: + bool Equal(AnchorPtr anchor) const override; + bool IsTypeOf(TYPE type) const override; +}; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OutDataAnchor : public DataAnchor { + friend class InDataAnchor; + + friend class AnchorUtils; + + public: + template + using Vistor = RangeVistor>; + + explicit OutDataAnchor(const NodePtr &ownerNode, int idx); + + virtual ~OutDataAnchor() = default; + // Get dst in data anchor(one or more) + Vistor GetPeerInDataAnchors() const; + uint32_t GetPeerInDataNodesSize() const; + + // Get dst in control anchor(one or more) + Vistor GetPeerInControlAnchors() const; + + // Build connection from OutDataAnchor to InDataAnchor + graphStatus LinkTo(const InDataAnchorPtr &dest); + + // Build connection from OutDataAnchor to InControlAnchor + graphStatus LinkTo(const InControlAnchorPtr &dest); + + protected: + bool Equal(AnchorPtr anchor) const override; + bool IsTypeOf(TYPE type) const override; +}; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ControlAnchor : public Anchor { + public: + explicit ControlAnchor(const NodePtr &ownerNode); + + explicit ControlAnchor(const NodePtr &ownerNode, int idx); + + virtual ~ControlAnchor() = default; + + protected: + bool IsTypeOf(TYPE type) const override; +}; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY InControlAnchor : public ControlAnchor { + friend class OutControlAnchor; + + friend class OutDataAnchor; + + public: + explicit InControlAnchor(const NodePtr &ownerNode); + + explicit InControlAnchor(const NodePtr &ownerNode, int idx); + + virtual ~InControlAnchor() = default; + + // Get source out control anchors + Vistor GetPeerOutControlAnchors() const; + bool IsPeerOutAnchorsEmpty() const { return peer_anchors_.empty(); } + + // Get source out data anchors + Vistor GetPeerOutDataAnchors() const; + + // Build connection from OutControlAnchor to InControlAnchor + graphStatus LinkFrom(const OutControlAnchorPtr &src); + + protected: + bool Equal(AnchorPtr anchor) const override; + bool IsTypeOf(TYPE type) const override; +}; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OutControlAnchor : public ControlAnchor { + friend class InControlAnchor; + + public: + template + using Vistor = RangeVistor>; + + explicit OutControlAnchor(const NodePtr &ownerNode); + + explicit OutControlAnchor(const NodePtr &ownerNode, int idx); + + virtual ~OutControlAnchor() = default; + + // Get dst in control anchor(one or more) + Vistor GetPeerInControlAnchors() const; + // Get dst data anchor in control anchor(one or more) + Vistor GetPeerInDataAnchors() const; + + // Build connection from OutControlAnchor to InControlAnchor + graphStatus LinkTo(const InControlAnchorPtr &dest); + // Build connection from OutDataAnchor to InDataAnchor + graphStatus LinkTo(const InDataAnchorPtr &dest); + + protected: + bool Equal(AnchorPtr anchor) const override; + bool IsTypeOf(TYPE type) const override; +}; +} // namespace ge +#endif // INC_GRAPH_ANCHOR_H_ diff --git a/inc/graph/attr_value_serializable.h b/inc/graph/attr_value_serializable.h new file mode 100644 index 000000000..f65e3f408 --- /dev/null +++ b/inc/graph/attr_value_serializable.h @@ -0,0 +1,189 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_ATTR_VALUE_SERIALIZABLE_H_ +#define INC_GRAPH_ATTR_VALUE_SERIALIZABLE_H_ + +#include +#include +#include "graph/ge_attr_value.h" + +namespace ge { + +class GeAttrValue; +class _GeSerializable { + public: + template + struct ge_serializable_int64_t_support_type { + using DT = typename std::remove_cv::type; + static const bool value = std::is_same::value // by cast + || std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value; + }; + + template + static GeAttrValue SaveItemAsAttrValue(const T &t) { + return GeAttrValue::CreateFrom(t); + } + + template + static GeAttrValue SaveItemAsAttrValue(const vector &t) { + return GeAttrValue::CreateFrom(t); + } + + template = 0, typename DT = typename std::remove_cv::type> + static GeAttrValue SaveItemAsAttrValue(const T &t) { + return GeAttrValue::CreateFrom
(t); + } + // int64_t support type + template ::value, int>::type = 0> + static GeAttrValue SaveItemAsAttrValue(const T &t) { + return GeAttrValue::CreateFrom(t); + } + // vector int64_t support type + template ::value, int>::type = 0> + static GeAttrValue SaveItemAsAttrValue(const vector &t) { + return GeAttrValue::CreateFrom(t); + } + + template + static graphStatus LoadItemFromAttrValue(T &t, GeAttrValue &attrVal) { + return attrVal.GetValue(t); + } + + template + static graphStatus LoadItemFromAttrValue(vector &t, GeAttrValue &attrVal) { + return attrVal.GetValue(t); + } + + template = 0, typename DT = typename std::remove_cv::type> + static graphStatus LoadItemFromAttrValue(T &t, GeAttrValue &attrVal) { + return attrVal.GetValue
(t); + } + + template ::value, int>::type = 0> + static graphStatus LoadItemFromAttrValue(T &t, GeAttrValue &attrVal) { + return attrVal.GetValue(t); + } + + template ::value, int>::type = 0> + static graphStatus LoadItemFromAttrValue(vector &t, GeAttrValue &attrVal) { + return attrVal.GetValue(t); + } + + template + static void SaveItem(GeAttrValue::NAMED_ATTRS &namedAttrs, string itemName, T &item, Args &... args) { + GeAttrValue itemVal = SaveItemAsAttrValue(item); + (void)namedAttrs.SetAttr(itemName, itemVal); + SaveItem(namedAttrs, args...); + } + + static void SaveItem(GeAttrValue::NAMED_ATTRS &namedAttrs __attribute__((__unused__))) {} + + template + static graphStatus LoadItem(GeAttrValue::NAMED_ATTRS &namedAttrs, string itemName, T &item, Args &... args) { + auto itemVal = namedAttrs.GetItem(itemName); + auto status = LoadItemFromAttrValue(item, itemVal); + if (status != GRAPH_SUCCESS) { + return status; + } + return LoadItem(namedAttrs, args...); + } + + static graphStatus LoadItem(GeAttrValue::NAMED_ATTRS &namedAttrs __attribute__((__unused__))) { return GRAPH_SUCCESS; } +}; + +#define _GE_FI(a) #a, a +#define _GE_MAP_FIELDS1(a1) _GE_FI(a1) +#define _GE_MAP_FIELDS2(a1, a2) _GE_FI(a1), _GE_FI(a2) +#define _GE_MAP_FIELDS3(a1, a2, a3) _GE_FI(a1), _GE_FI(a2), _GE_FI(a3) +#define _GE_MAP_FIELDS4(a1, a2, a3, a4) _GE_FI(a1), _GE_FI(a2), _GE_FI(a3), _GE_FI(a4) +#define _GE_MAP_FIELDS5(a1, a2, a3, a4, a5) _GE_FI(a1), _GE_FI(a2), _GE_FI(a3), _GE_FI(a4), _GE_FI(a5) +#define _GE_MAP_FIELDS6(a1, a2, a3, a4, a5, a6) _GE_FI(a1), _GE_FI(a2), _GE_FI(a3), _GE_FI(a4), _GE_FI(a5), _GE_FI(a6) +#define _GE_MAP_FIELDS7(a1, a2, a3, a4, a5, a6, a7) \ + _GE_FI(a1) \ + , _GE_FI(a2), _GE_FI(a3), _GE_FI(a4), _GE_FI(a5), _GE_FI(a6), _GE_FI(a7) +#define _GE_MAP_FIELDS8(a1, a2, a3, a4, a5, a6, a7, a8) \ + _GE_FI(a1) \ + , _GE_FI(a2), _GE_FI(a3), _GE_FI(a4), _GE_FI(a5), _GE_FI(a6), _GE_FI(a7), _GE_FI(a8) +#define _GE_MAP_FIELDS9(a1, a2, a3, a4, a5, a6, a7, a8, a9) \ + _GE_FI(a1) \ + , _GE_FI(a2), _GE_FI(a3), _GE_FI(a4), _GE_FI(a5), _GE_FI(a6), _GE_FI(a7), _GE_FI(a8), _GE_FI(a9) +#define _GE_MAP_FIELDS10(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10) \ + _GE_FI(a1) \ + , _GE_FI(a2), _GE_FI(a3), _GE_FI(a4), _GE_FI(a5), _GE_FI(a6), _GE_FI(a7), _GE_FI(a8), _GE_FI(a9), _GE_FI(a10) +#define _GE_MAP_FIELDS11(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11) \ + _GE_FI(a1) \ + , _GE_FI(a2), _GE_FI(a3), _GE_FI(a4), _GE_FI(a5), _GE_FI(a6), _GE_FI(a7), _GE_FI(a8), _GE_FI(a9), _GE_FI(a10), \ + _GE_FI(a11) +#define _GE_MAP_FIELDS12(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12) \ + _GE_FI(a1) \ + , _GE_FI(a2), _GE_FI(a3), _GE_FI(a4), _GE_FI(a5), _GE_FI(a6), _GE_FI(a7), _GE_FI(a8), _GE_FI(a9), _GE_FI(a10), \ + _GE_FI(a11), _GE_FI(a12) +#define _GE_MAP_FIELDS13(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13) \ + _GE_FI(a1) \ + , _GE_FI(a2), _GE_FI(a3), _GE_FI(a4), _GE_FI(a5), _GE_FI(a6), _GE_FI(a7), _GE_FI(a8), _GE_FI(a9), _GE_FI(a10), \ + _GE_FI(a11), _GE_FI(a12), _GE_FI(a13) +#define _GE_MAP_FIELDS14(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14) \ + _GE_FI(a1) \ + , _GE_FI(a2), _GE_FI(a3), _GE_FI(a4), _GE_FI(a5), _GE_FI(a6), _GE_FI(a7), _GE_FI(a8), _GE_FI(a9), _GE_FI(a10), \ + _GE_FI(a11), _GE_FI(a12), _GE_FI(a13), _GE_FI(a14) +#define _GE_MAP_FIELDS15(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15) \ + _GE_FI(a1) \ + , _GE_FI(a2), _GE_FI(a3), _GE_FI(a4), _GE_FI(a5), _GE_FI(a6), _GE_FI(a7), _GE_FI(a8), _GE_FI(a9), _GE_FI(a10), \ + _GE_FI(a11), _GE_FI(a12), _GE_FI(a13), _GE_FI(a14), _GE_FI(a15) + +#define _GE_PRIVATE_ARGS_GLUE(x, y) x y + +#define _GE_PRIVATE_MACRO_VAR_ARGS_IMPL_COUNT(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, N, \ + ...) \ + N +#define _GE_PRIVATE_MACRO_VAR_ARGS_IMPL(args) _GE_PRIVATE_MACRO_VAR_ARGS_IMPL_COUNT args +#define _GE_COUNT_MACRO_VAR_ARGS(...) \ + _GE_PRIVATE_MACRO_VAR_ARGS_IMPL((__VA_ARGS__, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)) + +#define _GE_PRIVATE_MACRO_CHOOSE_HELPER2(M, count) M##count +#define _GE_PRIVATE_MACRO_CHOOSE_HELPER1(M, count) _GE_PRIVATE_MACRO_CHOOSE_HELPER2(M, count) +#define _GE_PRIVATE_MACRO_CHOOSE_HELPER(M, count) _GE_PRIVATE_MACRO_CHOOSE_HELPER1(M, count) + +#define _GE_INVOKE_VAR_MACRO(...) \ + _GE_PRIVATE_ARGS_GLUE(_GE_PRIVATE_MACRO_CHOOSE_HELPER(_GE_MAP_FIELDS, _GE_COUNT_MACRO_VAR_ARGS(__VA_ARGS__)), \ + (__VA_ARGS__)) + +#define GE_SERIALIZABLE(...) \ + public: \ + friend class ge::GeAttrValue; \ + using __ge_serializable = int; \ + \ + private: \ + ge::graphStatus Save(GeAttrValue &ar) const { \ + GeAttrValue::NAMED_ATTRS named_attrs; \ + _GeSerializable::SaveItem(named_attrs, _GE_INVOKE_VAR_MACRO(__VA_ARGS__)); \ + return ar.SetValue(named_attrs); \ + } \ + ge::graphStatus Load(const GeAttrValue &ar) { \ + GeAttrValue::NAMED_ATTRS named_attrs; \ + ge::graphStatus status = ar.GetValue(named_attrs); \ + if (status != GRAPH_SUCCESS) { \ + return status; \ + } \ + return _GeSerializable::LoadItem(named_attrs, _GE_INVOKE_VAR_MACRO(__VA_ARGS__)); \ + } + +// end NamedAttrs Helper: GE_SERIALIZABLE +} // namespace ge +#endif // INC_GRAPH_ATTR_VALUE_SERIALIZABLE_H_ diff --git a/inc/graph/buffer.h b/inc/graph/buffer.h new file mode 100644 index 000000000..df204dd3c --- /dev/null +++ b/inc/graph/buffer.h @@ -0,0 +1,82 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_BUFFER_H_ +#define INC_GRAPH_BUFFER_H_ + +#include +#include +#include +#include +#include "detail/attributes_holder.h" + +namespace ge { +#ifdef HOST_VISIBILITY +#define GE_FUNC_HOST_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_HOST_VISIBILITY +#endif +#ifdef DEV_VISIBILITY +#define GE_FUNC_DEV_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_DEV_VISIBILITY +#endif + +using std::shared_ptr; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Buffer { + public: + Buffer(); + Buffer(const Buffer &other); + + explicit Buffer(std::size_t bufferSize, std::uint8_t defualtVal = 0); + + ~Buffer() = default; + + Buffer &operator=(const Buffer &other); + + static Buffer CopyFrom(const std::uint8_t *data, std::size_t bufferSize); + + const std::uint8_t *GetData() const; + std::uint8_t *GetData(); + std::size_t GetSize() const; + void ClearBuffer(); + + // For compatibility + inline const std::uint8_t *data() const { return GetData(); } + inline std::uint8_t *data() { return GetData(); } // lint !e659 + inline std::size_t size() const { return GetSize(); } + inline void clear() { return ClearBuffer(); } + uint8_t operator[](size_t index) const { // lint !e1022 !e1042 + if (buffer_ != nullptr && index < buffer_->size()) { // lint !e574 + return (uint8_t)(*buffer_)[index]; + } + return 0xff; + } + + private: + GeIrProtoHelper data_; + std::string *buffer_ = nullptr; + + // Create from protobuf obj + Buffer(const ProtoMsgOwner &protoOnwer, proto::AttrDef *buffer); + Buffer(const ProtoMsgOwner &protoOnwer, std::string *buffer); + + friend class GeAttrValueImp; + friend class GeTensor; +}; +} // namespace ge +#endif // INC_GRAPH_BUFFER_H_ diff --git a/inc/graph/compute_graph.h b/inc/graph/compute_graph.h new file mode 100644 index 000000000..9ae8fcd3b --- /dev/null +++ b/inc/graph/compute_graph.h @@ -0,0 +1,305 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_COMPUTE_GRAPH_H_ +#define INC_GRAPH_COMPUTE_GRAPH_H_ + +#include +#include +#include +#include +#include +#include +#include "detail/attributes_holder.h" +#include "graph/anchor.h" +#include "graph/node.h" +#include "graph/op_desc.h" +#include "graph/range_vistor.h" + +namespace ge { +class Node; +using NodePtr = std::shared_ptr; +class Edge; +using EdgePtr = std::shared_ptr; + +class InDataAnchor; +using InDataAnchorPtr = std::shared_ptr; + +class OutDataAnchor; +using OutDataAnchorPtr = std::shared_ptr; + +class ControlAnchor; +using ControlAnchorPtr = std::shared_ptr; +class InControlAnchor; +using InControlAnchorPtr = std::shared_ptr; +class OutControlAnchor; +using OutControlAnchorPtr = std::shared_ptr; +class GeAttrValue; +using AttrValuePtr = std::shared_ptr; +using ConstComputeGraph = const ComputeGraph; + +class OperatorImpl; +using OperatorImplPtr = std::shared_ptr; + +class ComputeGraph : public std::enable_shared_from_this, public AttrHolder { + friend class GraphUtils; + + public: + template + using Vistor = RangeVistor>; + + explicit ComputeGraph(const std::string &name); + ~ComputeGraph() override; + + std::string GetName() const; + void SetName(const std::string &name); + + using AttrHolder::DelAttr; + using AttrHolder::GetAttr; + using AttrHolder::HasAttr; + using AttrHolder::SetAttr; + + size_t GetAllNodesSize() const; + Vistor GetAllNodes() const; + // is_unknown_shape: false, same with GetAllNodes func + // is_unknown_shape: true, same with GetDirectNodes func + Vistor GetNodes(bool is_unknown_shape) const; + size_t GetDirectNodesSize() const; + Vistor GetDirectNode() const; + Vistor GetInputNodes() const; + Vistor GetOutputNodes() const; + + NodePtr FindNode(const std::string &name) const; + NodePtr FindFirstNodeMatchType(const std::string &name) const; + /*lint -e504*/ + // AddNode with NodePtr + NodePtr AddNode(NodePtr node); + NodePtr AddNode(OpDescPtr op); + NodePtr AddNode(OpDescPtr op, int64_t id); // for unserialize + NodePtr AddNodeFront(NodePtr node); + NodePtr AddNodeFront(const OpDescPtr &op); + NodePtr AddInputNode(NodePtr node); + NodePtr AddOutputNode(NodePtr node); + // insert node with specific pre_node + NodePtr AddNodeAfter(OpDescPtr &op, const NodePtr &pre_node); + NodePtr AddNodeAfter(NodePtr node, const NodePtr &pre_node); + + graphStatus RemoveNode(const NodePtr &node); + graphStatus RemoveInputNode(const NodePtr &node); + graphStatus RemoveOutputNode(const NodePtr &node); + graphStatus RemoveConstInput(const NodePtr &node); + + /// Add a subgraph to this graph. The subgraph must has a parent graph and parent node, + /// which means the member functions `SetParentGraph` and `SetParentNode` of the subgraph + /// must be called before add it to the root graph. and subgraph->GetParentNode()->GetOwnerGraph() + /// must equal to subgraph->GetOwnerGraph(). + /// The subgraphs can only be added to a *root graph*. A root graph is a graph without any parent graph. + /// The subgraph's name SHOULD(not must) be the same as the parameter `name` + graphStatus AddSubgraph(const std::string &name, const std::shared_ptr &subgraph); + graphStatus AddSubgraph(const std::shared_ptr &subgraph); + + void RemoveSubgraph(const std::string &name); + void RemoveSubgraph(const std::shared_ptr &subgraph); + + std::shared_ptr GetSubgraph(const std::string &name) const; + std::vector> GetAllSubgraphs() const; + + // obsolete + std::shared_ptr AddSubGraph(std::shared_ptr sub_graph); + // obsolete + graphStatus RemoveSubGraph(const std::shared_ptr &sub_graph); + + /// + /// @brief Update input-mapping + /// @param [in] input_mapping : index_of_cur_graph_node_input -> index_of_new_graph_node_input + /// @return graphStatus + /// + graphStatus UpdateInputMapping(const std::map &input_mapping); + + /// + /// @brief Update output-mapping + /// @param [in] output_mapping : index_of_cur_graph_node_output -> index_of_new_graph_node_output + /// @return graphStatus + /// + graphStatus UpdateOutputMapping(const std::map &output_mapping); + + graphStatus TopologicalSorting(); + bool IsValid() const; + void Dump() const; + + void Swap(ComputeGraph &graph); + + graphStatus IsolateNode(const NodePtr &node); + graphStatus Verify(); + graphStatus InferShape(); + graphStatus InferOriginFormat(); + graphStatus InferShapeInNeed(); + graphStatus InsertEventNodes(); + bool operator==(const ComputeGraph &r_compute_graph) const; + + /*lint +e504*/ + const std::map, std::vector> &GetShareParamLayer() const { + return params_share_map_; + } + + void SetShareParamLayer(const std::map, std::vector> params_share_map) { + params_share_map_ = params_share_map; + } + + void SetInputsOrder(const std::vector &inputs_order) { inputs_order_ = inputs_order; } + + void SetGraphOutNodes(std::map> out_nodes_map) { out_nodes_map_ = out_nodes_map; } + + void AppendGraphOutNodes(std::map> out_nodes_map) { + for (auto &item : out_nodes_map) { + (void)out_nodes_map_.emplace(item.first, item.second); + } + } + + shared_ptr GetParentGraph(); + void SetParentGraph(const shared_ptr &parent); + shared_ptr GetParentNode(); + void SetParentNode(const shared_ptr &parent); + + const std::map> &GetGraphOutNodes() const { return out_nodes_map_; } + + void SetOrigGraph(ComputeGraphPtr orig_graph) { origGraph_ = orig_graph; } + + ComputeGraphPtr GetOrigGraph(void) { return origGraph_; } + void SetOutputSize(uint32_t size) { output_size_ = size; } + uint32_t GetOutputSize() const { return output_size_; } + void SetInputSize(uint32_t size) { input_size_ = size; } + uint32_t GetInputSize() const { return input_size_; } + + // false: known shape true: unknow shape + bool GetGraphUnknownFlag() const { return is_unknown_shape_graph_; } + void SetGraphUnknownFlag(bool flag) { is_unknown_shape_graph_ = flag; } + + /// + /// Set is need train iteration. + /// If set true, it means this graph need to be run iteration some + /// times(according variant "npu_runconfig/iterations_per_loop"). + /// @param need_iteration is need iteration + /// + void SetNeedIteration(bool need_iteration) { need_iteration_ = need_iteration; } + + void SetUserDefOutput(const std::string &output_name); + + const std::string GetOutput(); + + /// + /// Get is need train iteration. + /// @return is need iteration + /// + bool GetNeedIteration() const { return need_iteration_; } + + void SetGraphOpName(const std::map &op_name_map) { op_name_map_ = op_name_map; } + const std::map &GetGraphOpName() const { return op_name_map_; } + + const std::map &GetAllNodesInfo() const; + + void SetAllNodesInfo(const std::map &nodes) { all_nodes_infos_ = nodes; } + + void SetGraphOutNodesInfo(std::vector> &out_nodes_info) { + output_nodes_info_ = out_nodes_info; + } + + void AppendGraphOutNodesInfo(std::vector> &out_nodes_info) { + output_nodes_info_.insert(output_nodes_info_.end(), out_nodes_info.begin(), out_nodes_info.end()); + } + + const std::vector> &GetGraphOutNodesInfo() const { return output_nodes_info_; } + + void SetGraphTargetNodesInfo(const std::vector &target_nodes_info) { + target_nodes_info_ = target_nodes_info; + } + const std::vector &GetGraphTargetNodesInfo() const { return target_nodes_info_; } + + void SetSessionID(uint64_t session_id) { session_id_ = session_id; } + uint64_t GetSessionID() const { return session_id_; } + + void SetGraphID(uint32_t graph_id) { graph_id_ = graph_id; } + uint32_t GetGraphID() const { return graph_id_; } + + void SaveDataFormat(ge::Format data_format) { data_format_ = data_format; } + ge::Format GetDataFormat() const { return data_format_; } + bool IsSummaryGraph() const { return is_summary_graph_; } + void SetSummaryFlag(bool is_summary_graph) { is_summary_graph_ = is_summary_graph; } + // Graph Before BFE + ComputeGraphPtr origGraph_; + + protected: + ProtoAttrMapHelper MutableAttrMap() override; + ConstProtoAttrMapHelper GetAttrMap() const override; + + private: + graphStatus DFSTopologicalSorting(std::vector &node_vec, std::map &map_in_edge_num, + std::vector &stack); + graphStatus BFSTopologicalSorting(std::vector &node_vec, std::map &map_in_edge_num, + std::deque &stack); + graphStatus CollectBreadthOutNode(const NodePtr &node, std::map &map_in_edge_num, + std::map &breadth_node_map); + graphStatus TopologicalSortingGraph(); + graphStatus SortNodes(std::vector &stack, std::map &mapInEdgeNum); + Vistor AllGraphNodes(std::vector> &subgraphs) const; + size_t GetInEdgeSize(const NodePtr &node); + size_t GetOutEdgeSize(const NodePtr &node); + graphStatus RemoveExtraOutEdge(const NodePtr &node); + bool GraphMembersAreEqual(const ComputeGraph &r_graph) const; + bool GraphAttrsAreEqual(const ComputeGraph &r_graph) const; + bool VectorInputNodePtrIsEqual(const std::vector &r_node_ptr_vector, + const std::vector &l_node_ptr_vector) const; + + void SetNodesOwner(); + + friend class ModelSerializeImp; + friend class GraphDebugImp; + friend class OnnxUtils; + + std::string name_; + uint32_t graph_id_ = 0; + ProtoAttrMapHelper attrs_; + std::vector nodes_; + std::map all_nodes_infos_; + std::vector target_nodes_info_; + + std::vector input_nodes_; + std::vector inputs_order_; + uint32_t input_size_ = 1; + std::map> out_nodes_map_; + uint32_t output_size_ = 1; + std::vector> output_nodes_info_; + + std::vector> sub_graph_; + std::map> names_to_subgraph_; + std::weak_ptr parent_graph_; + std::weak_ptr parent_node_; + + // the members followed should not in the ComputeGraph class + bool is_valid_flag_; + bool is_summary_graph_ = false; + // Indicates whether it is need iteration + bool need_iteration_ = false; + std::map, std::vector> params_share_map_; + // TaskIdx -> op_name Map + std::map op_name_map_; + uint64_t session_id_ = 0; + ge::Format data_format_ = ge::FORMAT_ND; + // unknown graph indicator, default is false, mean known shape + bool is_unknown_shape_graph_ = false; +}; +} // namespace ge +#endif // INC_GRAPH_COMPUTE_GRAPH_H_ diff --git a/inc/graph/debug/ge_attr_define.h b/inc/graph/debug/ge_attr_define.h new file mode 100644 index 000000000..7ec6e1a8e --- /dev/null +++ b/inc/graph/debug/ge_attr_define.h @@ -0,0 +1,1120 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*lint -e618*/ +#ifndef INC_GRAPH_DEBUG_GE_ATTR_DEFINE_H_ +#define INC_GRAPH_DEBUG_GE_ATTR_DEFINE_H_ + +#include +#include "graph/types.h" + +namespace ge { +#ifdef HOST_VISIBILITY +#define GE_FUNC_HOST_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_HOST_VISIBILITY +#endif +#ifdef DEV_VISIBILITY +#define GE_FUNC_DEV_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_DEV_VISIBILITY +#endif +// Public attribute +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IS_UNKNOWN_SHAPE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_UNKNOWN_SHAPE_TYPE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NAME; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_TYPE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_WORKSPACE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_WEIGHT_NAME; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IS_QUANTIZE_FACTOR; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_ALPHA; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BETA; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PADMODE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PADMODES; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_MODE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FILTER; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BIAS; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BIAS_TERM; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_HAS_BIAS_VALUE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PAD; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PADS; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PAD_SIZE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PAD_MODE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SCALE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_WINDOWS; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_GLOBAL_POOLING; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_CEIL_MODE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STRIDE_SIZE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_RELUMODE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STRIDE_SIZE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_RELU_FLAG; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_ALGO; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FORMAT; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STORAGE_FORMAT; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STORAGE_SHAPE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FILTER_FORMAT; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_LRN_K; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_LRN_NORM_REGION; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_LRN_LOCAL_SIZE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_LRN_ALPHA; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_LRN_BETA; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AXIS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BROADCAST; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OUTPUT_NUM; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_TIDX; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_TPADDINGS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_IMG_H; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_IMG_W; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NET_H; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NET_W; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_TMULTIPLES; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_MULTIPLES; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_T; + +extern GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY const std::string ATTR_NAME_N; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_TSHAPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NAN_OPT; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string NEW_AIPP_CONV_OP; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_INPUTS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_OUTPUTS; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_DIMS; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SESSION_GRAPH_ID; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_GRAPH_NAME; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_MULTISHAPE_BATCHLIST; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_MULTISHAPE_BATCHLIST_SIZE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_BATCH_NUM; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_FORMAT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OUTPUT_FORMAT; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FRAMEWORK_NODE_DEF; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FRAMEWORK_OP_DEF; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FRAMEWORK_FWK_TYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FRAMEWORK_FUNC_DEF; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_TENSOR_DESC; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OUTPUT_TENSOR_DESC; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INFERRED_FORMAT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PRED_PERMUTE_DELETED; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IGNORE_PRED_FORMAT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_WEIGHTS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BROACAST_REAL_DIM_CNT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DIM_ALIGN; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_RTSWITCH_RECV_EVENT_ID; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AUTOMIC_ADD_START; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_WEIGHTS_DATA; + + + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SESSION_GRAPH_ID; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_BATCH_NUM; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AUTOMIC_ADD_START; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_LABEL; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_ORIGIN_SIZE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NODE_CONNECT_INPUT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NODE_CONNECT_OUTPUT; + +// to be deleted +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_TO_BE_DELETED; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PERMUTE_RESHAPE_FUSION; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PERMUTE_RESHAPE_FUSION_CONV_PROPOSAL; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PERMUTE_RESHAPE_FUSION_CONV_DECODEBBOX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PERMUTE_RESHAPE_FUSION_BOX_TYPE_NUM; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_MBOX_LOC_FUSION; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_MBOX_CONF_FUSION; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_MBOX_OCR_FUSION; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_MBOX_FUSION_BOX_TYPE_NUM; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_RESHAPE_SLICE_CONCAT_FUSION; + + + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REFINEDET_MBOX_LOC_FUSION; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REFINEDET_MBOX_CONF_FUSION; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REFINEDET_MBOX_FUSION_BOX_TYPE_NUM; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REFINEDET_RESHAPE_SLICE_CONCAT_FUSION; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REFINEDET_PRIOR_BOX_ATTR_VARIANCE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REFINEDET_PRIOR_BOX_ATTR_VARIANCE_NUM; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_PRIORBOX_CONCAT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string NEED_INFER; + +// _Arg +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INDEX; +// _RetVal +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RETVAL_ATTR_NAME_INDEX; +// Data +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DATA_ATTR_NAME_DATA_TYPE; + +// Send +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SEND_ATTR_EVENT_ID; + +// Recv +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RECV_ATTR_EVENT_ID; + +// Convolution +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_COEF; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STRIDE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STRIDES; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DILATION; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DILATIONS; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONV_ATTR_NAME_MODE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONV_ATTR_NAME_ALGO; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONV_ATTR_NAME_GROUP; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONV_ATTR_NAME_PAD_MODE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONV_ATTR_NAME_PAD; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONV_ATTR_NAME_STRIDE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONV_ATTR_NAME_DILATION; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONV_ATTR_NAME_NUM_OUTPUT; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONV_ATTR_NAME_KERNEL; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONV_ATTR_NAME_FILTER; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONV_ATTR_NAME_BIAS; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONV_ATTR_NAME_RELU_FLAG; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONV_ATTR_NAME_ADJ; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONV_ATTR_NAME_TARGET_SHAPE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONV_ATTR_NAME_BEFORE_PAD; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONV_ATTR_NAME_HAS_BIAS; + +// Pooling +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POOLING_ATTR_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POOLING_ATTR_NAN_OPT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POOLING_ATTR_PAD_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POOLING_ATTR_GLOBAL_POOLING; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POOLING_ATTR_WINDOW; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POOLING_ATTR_PAD; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POOLING_ATTR_STRIDE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POOLING_ATTR_CEIL_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POOLING_ATTR_DATA_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POOLING_ATTR_BEFORE_PAD; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POOLING_ATTR_NAME_ALGO; + +// Eltwise +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ELTWISE_ATTR_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ELTWISE_ATTR_COEFF; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ELTWISE_ATTR_WEIGHT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ELTWISE_ATTR_RELU_FLAG; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ELTWISE_ATTR_ALPHA; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ELTWISE_ATTR_BETA; + +// BatchNorm +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string BATCHNORM_ATTR_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string BATCHNORM_ATTR_EPSILON; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string BATCHNORM_ATTR_USE_GLOBAL_STATS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string BATCHNORM_ATTR_MOVING_AVERAGE_FRACTION; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string BATCHNORM_ATTR_ESTIMATED_MEAN; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string BATCHNORM_ATTR_ESTIMATED_VARIANCE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string BATCHNORM_ATTR_SCALE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string BATCHNORM_ATTR_BIAS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string BATCHNORM_ATTR_DATA_FORMAT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string BATCHNORM_ATTR_IS_TRAINING; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string BATCHNORM_ATTR_IS_TRAINING_FUSION; + +// Huberloss +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HUBER_LOSS_ATTR_DELTA; + +// SSDRealDivTileMul +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_REAL_DIV_TILE_MUL_ATTR_TILE_PARA; + +// SSDSumMulRealDivMean +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_SUM_MUL_REALDIV_MEAN_ATTR_REDUCTION_INDICES; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_SUM_MUL_REALDIV_MEAN_ATTR_AXIS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_SUM_MUL_REALDIV_MEAN_ATTR_MEAN_PARA; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_SUM_MUL_REALDIV_MEAN_ATTR_HAS_SUM; +/// ConcatFive2Four +/// ConcatFour2Five +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_CLASS_NUM; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_FEATURE_MAP_SIZE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string TRANS_FOR_LOSS_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_BOX_TYPE_NUM; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_FEATURE_MAP_HIGH; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_FEATURE_MAP_WIDTH; +// Scale +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SCALE_ATTR_SCALE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SCALE_ATTR_BIAS; + +// FullConnection +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FULL_CONNECTION_ATTR_FILTER; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FULL_CONNECTION_ATTR_BIAS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FULL_CONNECTION_ATTR_NUM_OUTPUT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FULL_CONNECTION_ATTR_RELU_FLAG; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FULL_ATTR_NAME_ALGO; + +// SoftmaxOpParams +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SOFTMAX_ATTR_ALGO; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SOFTMAX_ATTR_MODE; + +// SparseSoftmaxCrossEntropy +extern GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY const std::string SPARSE_SOFTMAX_CROSS_ENTROPY_ATTR_MODE; +extern GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY const std::string SPARSE_SOFTMAX_CROSS_ENTROPY_IS_GRAD; +// Attr labelSmoothing +extern GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY const std::string SOFTMAX_CROSS_ENTROPY_LABELSMOOTHING; + +// ApplyMomentum +extern GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY const std::string APPLYMENTUM_ATTR_IS_GRAPH_FUSION; + +// Activation +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ACTIVATION_ATTR_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ACTIVATION_ATTR_COEF; + +// Concat +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONCAT_ATTR_NAME_AXIS; + +// Const +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONST_ATTR_NAME_DATA_TRANSTYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONST_ATTR_NAME_OUTPUT_FORMAT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONST_ATTR_NAME_OUTPUT_TYPE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string L2_NORMALIZE_ATTR_EPS; + +// Roipooling +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ROIPOOLING_ATTR_NAME_POOLED_H; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ROIPOOLING_ATTR_NAME_POOLED_W; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ROIPOOLING_ATTR_NAME_SPATIAL_SCALE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ROIPOOLING_ATTR_NAME_RIO_POOLING_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ROIPOOLING_ATTR_NAME_POOLING_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ROIPOOLING_ATTR_NAME_SAMPLING_RATIO; + +// DetectionOutput +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_NUM_CLASSES; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_OCR_NUM_CLASSES; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_NMS_THRESHOLD; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_TOP_K; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_CONFIDENCE_THRESHOLD; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_IMG_H; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_IMG_W; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_BATCH_SIZE; +// Ssd DetectionOutput +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_ETA; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_SHARED_LOCATION; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_BACKGROUND_LABEL_ID; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_CODE_TYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_VARIANCE_ENCODED_IN_TARGET; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_KEEP_TOP_K; + +// Refinedet DetectionOutput +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_OBJECTNESS_SCORE; + +// Yolo DetectionOutput +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_ClASSES; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_BIASES; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_RELATIVE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_OBJECTNESS_THRESHOLD; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_CLASS_THRESHOLD; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_POST_TOP_K; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_IOU_THRESHOLD_DECAY; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_COOR_SCALE_FACTOR; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DETECTIONOUTPUT_ATTR_YOLO_VERSION; + +// DetectionPostprocess +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POSTPROCESS_ATTR_NAME_CLS_NUM; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POSTPROCESS_ATTR_NAME_CONF_THRESH; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POSTPROCESS_ATTR_NAME_NMS_THRESH; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POSTPROCESS_ATTR_POST_NMS_TOPN; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POSTPROCESS_ATTR_NAME_BBOX_REG_WEIGHT; + +// Spatialtransfrom +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SPTIALTF_ATTR_NAME_OUTPUT_H; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SPTIALTF_ATTR_NAME_OUTPUT_W; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SPTIALTF_ATTR_NAME_BORDER_VALUE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SPTIALTF_ATTR_NAME_AFFINE_TRANSFORM; + +// Proposal +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PROPOSAL_ATTR_NAME_FEAT_STRIDE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PROPOSAL_ATTR_NAME_BASE_SIZE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PROPOSAL_ATTR_NAME_MIN_SIZE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PROPOSAL_ATTR_NAME_RATIO; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PROPOSAL_ATTR_NAME_SCALE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PROPOSAL_ATTR_NAME_PRE_NMS_TOPN; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PROPOSAL_ATTR_NAME_POST_NMS_TOPN; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PROPOSAL_ATTR_NAME_NMS_THRESH; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PROPOSAL_ATTR_NAME_TOP_SIZE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PROPOSAL_ATTR_IMG_H; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PROPOSAL_ATTR_IMG_W; +// Softmax +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SOFTMAX_ATTR_AXIS; + +// Permute +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PERMUTE_ATTR_ORDER; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PERMUTE_ATTR_PERM; + +// SSD Normalize +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSDNORMALIZE_ATTR_ACCROSS_SPATIAL; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSDNORMALIZE_ATTR_CHANNEL_SHARED; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSDNORMALIZE_ATTR_EPS; + +// Flatten +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FLATTEN_ATTR_AXIS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FLATTEN_ATTR_END_AXIS; + +// SsdPRIORBOX +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_PRIOR_BOX_ATTR_FLIP; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_PRIOR_BOX_ATTR_CLIP; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_PRIOR_BOX_ATTR_IMG_H; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_PRIOR_BOX_ATTR_IMG_W; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_PRIOR_BOX_ATTR_STEP_H; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_PRIOR_BOX_ATTR_STEP_W; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_PRIOR_BOX_ATTR_OFFSET; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_PRIOR_BOX_ATTR_MIN_SIZE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_PRIOR_BOX_ATTR_MAX_SIZE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_PRIOR_BOX_ATTR_MIN_SIZE_NUM; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_PRIOR_BOX_ATTR_MAX_SIZE_NUM; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_PRIOR_BOX_ATTR_ASPECT_RATIO; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_PRIOR_BOX_ATTR_ASPECT_RATIO_NUM; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_PRIOR_BOX_ATTR_VARIANCE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_PRIOR_BOX_ATTR_VARIANCE_NUM; + +// PRelu +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PRELU_ATTR_CHANNEL_SHARED; + +// Psroi pooling +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PSROIPOOLING_ATTR_SPATIAL_SCALE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PSROIPOOLING_ATTR_OUTPUT_DIM; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PSROIPOOLING_ATTR_GROUP_SIZE; + +// Power +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POWER_ATTR_NAME_POWER; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POWER_ATTR_NAME_SCALE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POWER_ATTR_NAME_SHIFT; + +// Log +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string LOG_ATTR_NAME_SCALE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string LOG_ATTR_NAME_SHIFT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string LOG_ATTR_NAME_BASE; +// Pack +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PACK_ATTR_NAME_NUM; + +// Dynamic stitch +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DYNAMIC_STITCH_ATTR_NAME_NUM; +// Unpack +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string UNPACK_ATTR_NAME_NUM; +// Gathernd +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string GATHERND_ATTR_NAME_TINDICES; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string GATHERND_ATTR_NAME_TPARAMS; + +// Argmax +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ARGMAX_ATTR_NAME_TOPK; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ARGMAX_ATTR_NAME_REDUCESIZE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ARGMAX_ATTR_NAME_REDUCESTRIDE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ARGMAX_ATTR_NAME_OUTMAX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ARGMAX_ATTR_NAME_AXIS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ARGMAX_ATTR_NAME_AXISTYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ARGMAX_ATTR_NAME_KEEPDIMS; + +// Upsample +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string UPSAMPLE_ATTR_NAME_SCALE_H; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string UPSAMPLE_ATTR_NAME_SCALE_W; +// Relu +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NEGATIVE_SLOPE; + +// FreeSpaceExtract +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FREESPACEEXTRACT_ATTR_NAME_ORG_HEIGHT; + +// Split +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SPLIT_ATTR_NAME_SLICE_POINT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SPLIT_ATTR_NAME_SIZE_SPLIT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SPLIT_ATTR_NAME_NUM_SPLIT; + +// Tvm +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string TVM_ATTR_NAME_MAGIC; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string TVM_ATTR_NAME_BLOCKDIM; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string TVM_ATTR_NAME_METADATA; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string TVM_ATTR_NAME_WORKSPACE_TYPE; + +// Squeeze +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SQUEEZE_ATTR_AXIS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SQUEEZE_ATTR_DIMS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SQUEEZE_OP_NAME; + +// Stride slice +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string STRIDE_SLICE_ATTR_BEGIN_MASK; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string STRIDE_SLICE_ATTR_END_MASK; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string STRIDE_SLICE_ATTR_ELLIPSIS_MASK; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string STRIDE_SLICE_ATTR_NEW_AXIS_MASK; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK; + +// Slice +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SLICE_ATTR_NAME_BEGINS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SLICE_ATTR_NAME_SIZES; + +// Roialign +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ROIALIGN_ATTR_SPATIAL_SCALE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ROIALIGN_ATTR_SAMPLING_RATIO; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ROIALIGN_ATTR_NAME_POOLED_H; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ROIALIGN_ATTR_NAME_POOLED_W; + +// Generate_rpn_proposal +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string GENERATE_RPN_PROPOSAL_ATTR_PRE_NMS_TOPK; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string GENERATE_RPN_PROPOSAL_ATTR_POST_NMS_TOPK; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string GENERATE_RPN_PROPOSAL_ATTR_RPN_MINI_SIZE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string + GENERATE_RPN_PROPOSAL_ATTR_RPN_PROPOSAL_NMS_THRESH; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string + GENERATE_RPN_PROPOSAL_ATTR_RPN_PROPOSAL_FILTER_THRESH; +// Decode_bbox +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DECODE_BBOX_ATTR_DECODECLIP; + +// Cast +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CAST_ATTR_DSTT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CAST_ATTR_SRCT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CAST_ATTR_DST_TYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CAST_ATTR_TRUNCATE; + +// Fastrcnnn predications +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FASTRCNN_PREDICTIONS_ATTR_TOPK; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FASTRCNN_PREDICTIONS_ATTR_SCORE_THRESHOLD; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FASTRCNN_PREDICTIONS_ATTR_NMS_THRESHOLD; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FASTRCNN_PREDICTIONS_ATTR_NUM_CLASSES; + +// REORG +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REORG_ATTR_STRIDE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REORG_ATTR_REVERSE; + +// MERGE +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string MERGE_DEAD_INDEX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string MERGE_PRENODE_FLAG; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string TO_BE_OUTPUT; +static const std::string NOT_NET_OUTPUT = "not_net_output"; + +// ENTER +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ENTER_ATTR_FRAME_NAME; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ENTER_ATTR_CONSTANT_FLAG; + +// Concatv2 +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONCAT_V2_ATTR_TIDX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONCAT_V2_ATTR_N; +// SUM +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SUM_ATTR_TIDX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SUM_ATTR_AXIS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SUM_ATTR_KEEP_DIMS; + +// ResizeBilinear +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RESIZE_BILINEAR_ATTR_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RESIZE_BILINEAR_ATTR_ALIGN_CORNERS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RESIZE_BILINEAR_ATTR_HEIGHT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RESIZE_BILINEAR_ATTR_WIDTH; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RESIZE_BILINEAR_ATTR_ZOOM_FACTOR; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RESIZE_BILINEAR_ATTR_SHRINK_FACTOR; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RESIZE_BILINEAR_ATTR_PAD_BEGIN; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RESIZE_BILINEAR_ATTR_PAD_END; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RESIZE_BILINEAR_ATTR_ALPHA; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RESIZE_BILINEAR_ATTR_BETA; + +// RetinaNet +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RETINANET_FILTER_BACKGROUND_TRUE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RETINANET_ANCHOR_FUSION; +// MatMul +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string MATMUL_TRANSPOSE_X; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string MATMUL_TRANSPOSE_W; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string MATMUL_HAS_BIAS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string MATMUL_ATTR_IS_TRAINING; + +// Flatten +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FLATTEN_START_AXIS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FLATTEN_END_AXIS; + +// Reshape +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RESHAPE_ATTR_AXIS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RESHAPE_ATTR_NUM_AXES; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RESHAPE_ATTR_FORMAT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RESHAPE_ATTR_SHAPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RESHAPE_ATTR_ALPHA; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RESHAPE_ATTR_BETA; + +// Frameoworkop +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string T_IN_DATATYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string T_OUT_DATATYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OUT_N; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OUT_C; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OUT_H; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OUT_W; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_PAD_DEPTH_CONV; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_PAD_CONV; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BEFORE_PAD; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ANN_MEAN_KEEPDIMS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PAD_ATTR_PADDINGDS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PAD_ATTR_CONSTANT_VALUE; + +// ConvGradFilter +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONV_GRAD_FILTER_OUTPUT_SHAPE; +// ConvGradInput +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CONV_GRAD_INPUT_OUTPUT_SHAPE; + +// Rnn +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RNN_MODE_STATIC; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string MUTI_RNN; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CELL_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string CNN_RNN; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string LSTM_CELL; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string GRU_CELL; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RNN_HT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RNN_XT_HT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RNN_BATCH_SIZE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string LSTM_CELL_CLIP; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string LSTM_PROJ_CLIP; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string LSTM_ACTIVATE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string LSTM_OUT_MAP; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string LSTM_OUT_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string LSTM_STATE_OUT_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string LSTM_TIME_MAJOR; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string LSTM_IS_INPUT_PRE_PROCESS; + +// Upsample +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string UPSAMPLE_ATTR_NAME_SCALE; + +// PadV2 +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PADV2_ATTR_NAME_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PADV2_ATTR_NAME_PADS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PADV2_ATTR_NAME_T; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PADV2_ATTR_NAME_PAD_FORMAT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PADV2_ATTR_NAME_CONST_VALUE; + +// MirrorPad +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string MIRRORPAD_ATTR_NAME_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string MIRRORPAD_ATTR_NAME_PADS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string MIRRORPAD_ATTR_NAME_PAD_FORMAT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string MIRRORPAD_ATTR_NAME_CONST_VALUE; +// Filler +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FILLER_TYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FILLER_VALUE; + +// Shufflechannel +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SHUFFLE_CHANNEL_GROUP; + +// TopKV2 +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string TOPKV2_ATTR_K; + +// Calibaration +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string STRIDE_H_INDEX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string STRIDE_W_INDEX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PAD_TOP_INDEX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PAD_BOTTOM_INDEX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PAD_RIGHT_INDEX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PAD_LEFT_INDEX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string QUANTIZE_ALGO_ATTR; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SCALE_TYPE_ATTR; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IS_CONST; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_GROUP; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DILATION_SIZE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_EPSILON; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_POOLING_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_CLASS_NUM; +// Model +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_TARGET_TYPE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_STREAM_NUM; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_EVENT_NUM; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_HUGE_STREAM_LIST; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_LABEL_NUM; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_MEMORY_SIZE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_ZERO_COPY_MEMORY_SIZE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_OUT_NODES_NAME; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_WEIGHT_SIZE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_TASK_GEN_BASE_ADDR; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_TASK_GEN_WEIGHT_ADDR; + +// Public attribute +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IMPLY_TYPE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BYTE_SIZE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FUSION_INFERENCE_ID; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FUSION_OPDEF; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IO_OP; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FUSION_SCOPE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OPATTR; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_RELUFLAG; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SEQLEN_INDEX; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_X_INDEX; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_CONT_INDEX; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_XSTATIC_INDEX; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string TARGET_TYPE_MINI; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string TARGET_TYPE_TINY; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string TARGET_TYPE_LITE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_CONTINUOUS_INPUT; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_CONTINUOUS_INPUT_ALLOC; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_CONTINUOUS_OUTPUT; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_REFERENCE; + +// Used for operators that do not generate task +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NOTASK; + +// Used for operators that output reuse input +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OUTPUT_REUSE_INPUT; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NOPADDING_CONTINUOUS_INPUT; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_ATOMIC_INDEX; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_TASK_GEN_VAR_ADDR; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_LABEL; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_CONTINUOUS_STREAM_LABEL; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_VAR_SIZE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_TASK_INDEX_OP_NAME; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_CORE_TYPE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_ATC_VERSION; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_OPP_VERSION; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string QUANTIZE_SCALE_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string QUANTIZE_SCALE_VALUE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string QUANTIZE_SCALE_OFFSET; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string QUANTIZE_OFFSET_DATA_VALUE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string QUANTIZE_OFFSET_DATA_OFFSET; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string QUANTIZE_OFFSET_WEIGHT_VALUE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string QUANTIZE_OFFSET_WEIGHT_OFFSET; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string QUANTIZE_OFFSET_PAD_VALUE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string QUANTIZE_OFFSET_PAD_OFFSET; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DEQUANTIZE_SCALE_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DEQUANTIZE_SCALE_VALUE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DEQUANTIZE_SCALE_OFFSET; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DEQUANTIZE_OFFSET_DATA_TYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DEQUANTIZE_OFFSET_DATA_OFFSET; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DEQUANTIZE_OFFSET_WEIGHT_VALUE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DEQUANTIZE_OFFSET_WEIGHT_OFFSET; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DEQUANTIZE_OFFSET_PAD_VALUE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DEQUANTIZE_OFFSET_PAD_OFFSET; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REQUANTIZE_SCALE_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REQUANTIZE_SCALE_VALUE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REQUANTIZE_SCALE_OFFSET; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REQUANTIZE_OFFSET_DATA_VALUE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REQUANTIZE_OFFSET_DATA_OFFSET; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REQUANTIZE_OFFSET_WEIGHT_VALUE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REQUANTIZE_OFFSET_WEIGHT_OFFSET; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REQUANTIZE_OFFSET_PAD_VALUE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REQUANTIZE_OFFSET_PAD_OFFSET; + + + +// L2_normalize +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string L2_NORMALIZE_ATTR_AXIS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string L2_NORMALIZE_ATTR_EPS; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POOL_PARAMA_ATTR_WINDOW; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POOL_PARAMA_ATTR_CEIL_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POOL_PARAMA_ATTR_DATA_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POOL_PARAMA_ATTR_GLOBAL_POOLING; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POOL_PARAMA_ATTR_NAN_OP; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string POOL_PARAMA_ATTR_PAD_MOD; +// HCOM +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HCOM_ATTR_ROOT_RANK; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HCOM_ATTR_REDUCE_TYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HCOM_ATTR_RANK_SIZE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HCOM_ATTR_REDUCTION; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HCOM_ATTR_GROUP; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HCOM_ATTR_SR_TAG; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HCOM_ATTR_SRC_RANK; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HCOM_ATTR_DEST_RANK; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HCOM_ATTR_FUSION; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HCOM_ATTR_SHAPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HCOM_ATTR_DATA_TYPE; + +// Log time stamp +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string LOG_TIME_STAMP_LOGID; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string LOG_TIME_STAMP_NOTIFY; +// SpaceToDepth/DepthToSpace +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BLOCK_SIZE; + +// SparseSoftmaxCrossEntropyWithLogits +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SPARSE_SOFT_MAX_ATTR_TLABLES; + +// MaxPoolGradWithArgmax +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string MAX_POOL_GRAD_OUTPUT_SHAPE; + +// AvgPoolGrad +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string AVG_POOL_GRAD_OUTPUT_SHAPE; + +// Varible +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_FORMAT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_NAME; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_FRACTALZ_FORMAT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_4D_FORMAT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_5D_FORMAT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_DATA_TYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_VAR_IN_NAME; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_VAR_IN_INDEX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_VAR_OUT_INDEX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_SHAPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HALF_VAR_NAME_END; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_CONTAINER; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_SHARED_NAME; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_DTYPE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_ADDR_OFFSET; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_VAR_IN_INDEX_KEY; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_VAR_OUT_INDEX_KEY; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_SRC_VAR_NAME; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_VAR_IS_SAVE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_VAR_IS_RESTORE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_VAR_IS_BROADCAST; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REF_VAR_SRC_VAR_NAME; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REF_VAR_PRE_PEER_OUT_INDEX; + +// Assign +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ASSIGN_VALIDATE_SHAPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ASSIGN_VAR_NAME; + +// ShapeN +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SHAPEN_ATTR_N; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SHAPEN_ATTR_IN_TYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SHAPEN_ATTR_OUT_TYPE; + +// Space2bacth batch2space +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string BATCH_SPACE_ATTR_BLOCK; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string BATCH_SPACE_ATTR_PADDING; +// Depth_to_space space_to_depth +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DEPTH_SPACE_ATTR_BLOCK_SIZE; +// FakeQuantWithMinMaxVars +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FakeQuantWithMinMaxVars_ATTR_MAX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FakeQuantWithMinMaxVars_ATTR_MIN; +// Mobilenet_ssd_conv_fusion +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_BOXPREDICTOR_BOXES_FUSION; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_BOXPREDICTOR_SCORES_FUSION; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SSD_BOXPREDICTOR_FUSION_BOX_TYPE_NUM; + +// Lsh project +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string LSH_PROJ_TYPE; + +// Control flow +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_ITERATORS_PER_LOOP; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_TRUE_BRANCH_STREAM; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FLOW_CTRL_NODE_FLAG; + +// GatherV2 attr def +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string GATHERV2_ATTR_NAME_TAXIS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string GATHERV2_ATTR_NAME_TINDICES; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string GATHERV2_ATTR_NAME_TPARAMS; + +// Reshape attr def +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RESHAPE_ATTR_NAME_INPUT_DESC; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string RESHAPE_ATTR_NAME_OUTPUT_DESC; + +// Axis attr def +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AXIS_ORG_OP; +// The node link with SparseSoftmaxCrossEntropyWithLogits +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_LINK_WITH_SPARE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NET_OUTPUT_FORMAT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NET_OUTPUT_DATATYPE; +// For constant folding +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NO_NEED_CONSTANT_FOLDING; + +// Used for mark the active label list to find stream of activated node +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_ACTIVE_LABEL_LIST; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE; + +// Multi batch +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PRED_VALUE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BATCH_NUM; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BATCH_LABEL; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_COMBINED_BATCH; + +// Control flow +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_SWITCH_COND; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_ACTIVE_STREAM_LIST; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCHN_PRED_VALUE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SUBGRAPH_FIRST_ACTIVE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_COMBINED_DYNAMIC_DIMS; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_BRANCH_NODE_LABEL; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_DATA_TYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_ORIG_NODE_NAME; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_CYCLIC_DEPENDENCE_FLAG; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NEXT_ITERATION; + +// Function Op +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_NODE_INDEX; + +// Used for mark the active node is for loop, type:bool +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IS_LOOP_ACTIVE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_MEMORY_TYPE_INPUT; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_MEMORY_TYPE_OUTPUT; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_MEMORY_TYPE_WORKSPACE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_MEMORY_TYPE_RANGE; + +// Atomic addr clean attrs +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATOMIC_ATTR_INPUT_INDEX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATOMIC_ATTR_OUTPUT_INDEX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATOMIC_ATTR_IS_FUSION_NODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATOMIC_ATTR_IS_ATOMIC_NODE; + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string EXT_ATTR_ATOMIC_WORKSPACE_INFO; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string EXT_ATTR_ATOMIC_WORKSPACE_OFFSET; +// Used for find variable session_id +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string MODEL_ATTR_SESSION_ID; + +// Source/dst format for Op FormatTransfer +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FORMAT_TRANSFER_SRC_FORMAT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string FORMAT_TRANSFER_DST_FORMAT; + +// For compile op by ge call +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NEED_COMPILE; + +// For mutil-batch +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_INSERT_BY_MBATCH; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MBATCH_ORIGIN_INPUT_DIMS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_TYPE; + +// For inserted op +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_INSERTED_BY_GE; + +// For compress weight +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_COMPRESS_WEIGHT; + +// For data dump +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DATA_DUMP_IS_MULTIOP; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DATA_DUMP_SUB_SPLITER_INDEX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DATA_DUMP_GROUP_OP_NAME; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DATA_DUMP_ORIGIN_NAME; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DATA_DUMP_ORIGIN_FORMAT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DATA_DUMP_ORIGIN_DATA_TYPE; + +// used for lX fusion +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_L1_FUSION_GROUP_ID; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_L1_FUSION_GROUP_KEY; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FUSION_GROUP_KEY; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FUSION_VIRTUAL_OP; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FUSION_GROUP_TYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_MEM_TYPE_LIST; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OUTPUT_MEM_TYPE_LIST; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_L1_FUSION_EXTEND_PTR; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_GET_TENSOR_ACTUAL_SIZE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OUTPUT_OFFSET_FOR_L1_FUSION; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_FOR_L1_FUSION; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_N_BATCH_SPILT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NO_TASK_AND_DUMP_NEEDED; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DATA_DUMP_REF; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_L2_FUSION_GROUP_ID; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_FOR_L2_FUSION; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_INPUT_L1_FLAG; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_INPUT_L1_ADDR; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_INPUT_L1_VALID_SIZE; + +// for unregistered op +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_UNREGST_OPPATH; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_UNREGST_ATTRLIST; + +// op overflow dump +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_FLAG; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_MODE; + +// functional ops attr +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_THEN_BRANCH; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_ELSE_BRANCH; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_WHILE_COND; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_WHILE_BODY; + +// used for label switch +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_LABEL_SWITCH_INDEX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_LABEL_SWITCH_LIST; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SUBGRAPH_END_NODE; + +// Variable +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REF_VAR_SRC_VAR_NAME; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_SRC_VAR_NAME; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REF_VAR_PRE_PEER_OUT_INDEX; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_VAR_IS_BROADCAST; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string VAR_ATTR_VAR_IS_RESTORE; + +// HCOM +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HCOM_ATTR_ROOT_RANK; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HCOM_ATTR_REDUCE_TYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HCOM_ATTR_RANK_SIZE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HCOM_ATTR_SHAPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HCOM_ATTR_DATA_TYPE; + + +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_DATATYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OUTPUT_DATATYPE; +// used for LX tiling +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_L1_SPACE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FUSION_TYPE_LIST; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_VALID_INPUT_SHAPE_LIST_LIST; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_VALID_OUTPUT_SHAPE_LIST_LIST; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SLICE_INPUT_OFFSET_LIST_LIST; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SLICE_OUTPUT_OFFSET_LIST_LIST; + +// Dynamic stitch +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DYNAMIC_STITCH_ATTR_NAME_NUM; + +// Used for support Horovod +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_INTER_EVENT_IDENTIFY; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_HOROVOD_ATTR_REDUCE_TYPE; +// for gradient group +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_HCCL_FUSED_GROUP; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_HCCL_FUSED_FLAG; + +// dynamic shape attrs +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX; + +// atc user def dtype&format +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ATC_USER_DEFINE_DATATYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ATC_USER_DEFINE_FORMAT; + +// for fusion op plugin +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FUSIONOP_ORIGINAL_TYPE; + +// graph partition for aicpu +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PLD_FRONT_NODE_ENGINE_NAME; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_END_REAR_NODE_ENGINE_NAME; + +// input and output memory type +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_VARIABLE_PLACEMENT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_INPUT_MEMORY_TYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OUTPUT_MEMORY_TYPE; + +// input_output_offset +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ZERO_COPY_BASIC_OFFSET; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ZERO_COPY_RELATIVE_OFFSET; +} // namespace ge + +#endif // INC_GRAPH_DEBUG_GE_ATTR_DEFINE_H_ +/*lint +e618*/ diff --git a/inc/graph/def_types.h b/inc/graph/def_types.h new file mode 100644 index 000000000..cd5e19f47 --- /dev/null +++ b/inc/graph/def_types.h @@ -0,0 +1,196 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_DEF_TYPES_H_ +#define INC_GRAPH_DEF_TYPES_H_ + +#include +#include +#include +#include "graph/attr_value_serializable.h" +#include "graph/buffer.h" +namespace ge { +#define DEF_TYPE_DEC(type, name) \ + inline void set_##name(const type &value) { name = value; } \ + type *mutable_##name() { return &name; } + +#define DEF_TYPE_HAS_DEC(type, name) \ + inline void set_##name(const type &value) { name = value; } \ + \ + private: \ + bool has_mutable_##name{false}; \ + \ + public: \ + bool has_##name() const { return (has_mutable_##name) || QuantizeFactorHasData(name); } \ + type *mutable_##name() { \ + has_mutable_##name = true; \ + return &name; \ + } + +#define DEF_TYPE_VEC_DEC(type, name) \ + inline int name##_size() const { return name.size(); } \ + inline void clear_##name() { name.clear(); } \ + inline void set_##name(int index, type value) { name[index] = value; } \ + inline void add_##name(type value) { name.push_back(value); } \ + inline std::vector *mutable_##name() { return &name; } + +#define DEF_TYPE_BYTES_DEC(name) \ + inline void clear_##name() { name.ClearBuffer(); } \ + inline void set_##name(const void *value, size_t size) { \ + name = Buffer::CopyFrom((const uint8_t *)(value), size); } \ + inline Buffer *mutable_##name() { return &name; } + +struct CompressInfo { + public: + CompressInfo() {} + CompressInfo(int32_t blockRow, int32_t blockCol, int32_t fractalK, int32_t fractalN, int32_t lastFractalK, + int32_t lastFractalN, int32_t cubeSize, int32_t loadDir) { + blockrow = blockRow; + blockcol = blockCol; + fractalk = fractalK; + fractaln = fractalN; + lastfractalk = lastFractalK; + lastfractaln = lastFractalN; + cubesize = cubeSize; + loaddir = loadDir; + } + + int32_t blockrow{0}; // Block row + int32_t blockcol{0}; // Block col + int32_t fractalk{0}; // Fractal K + int32_t fractaln{0}; // Fractal N + int32_t lastfractalk{0}; // K of last fractal + int32_t lastfractaln{0}; // N of last fractal + int32_t cubesize{0}; // Cube's length + int32_t loaddir{0}; // Data load directtiono 0:col load 1:row load + DEF_TYPE_DEC(int32_t, blockrow); + DEF_TYPE_DEC(int32_t, blockcol); + DEF_TYPE_DEC(int32_t, fractalk); + DEF_TYPE_DEC(int32_t, fractaln); + DEF_TYPE_DEC(int32_t, lastfractalk); + DEF_TYPE_DEC(int32_t, lastfractaln); + DEF_TYPE_DEC(int32_t, cubesize); + DEF_TYPE_DEC(int32_t, loaddir); + + GE_SERIALIZABLE(blockrow, blockcol, fractalk, fractaln, lastfractalk, lastfractaln, cubesize, loaddir); +}; + +enum QuantizeScaleType { VECTOR_SCALE = 0, SCALAR_SCALE = 1 }; +enum QuantizeScaleMode { NORMAL_MODE = 0, SQRT_MODE = 1 }; +enum QuantizeAlgorithm { + NON_OFFSET_ALGO = 0, + HALF_OFFSET_ALGO = 1, + ALL_OFFSET_ALGO = 2, +}; +struct QuantizeFactor { + public: + // QuantizeScaleMode scale_mode; + uint32_t scale_mode{0}; + Buffer scale_value; + int64_t scale_offset{0}; + Buffer offset_data_value; + int64_t offset_data_offset{0}; + Buffer offset_weight_value; + int64_t offset_weight_offset{0}; + Buffer offset_pad_value; + int64_t offset_pad_offset{0}; + + DEF_TYPE_DEC(uint32_t, scale_mode); + DEF_TYPE_BYTES_DEC(scale_value); + + DEF_TYPE_DEC(int64_t, scale_offset); + DEF_TYPE_BYTES_DEC(offset_data_value); + DEF_TYPE_DEC(int64_t, offset_data_offset); + + DEF_TYPE_BYTES_DEC(offset_weight_value); + DEF_TYPE_DEC(int64_t, offset_weight_offset); + DEF_TYPE_BYTES_DEC(offset_pad_value); + DEF_TYPE_DEC(int64_t, offset_pad_offset); + + GE_SERIALIZABLE(scale_mode, scale_value, scale_offset, offset_data_value, offset_data_offset, offset_weight_value, + offset_weight_offset, offset_pad_value, offset_pad_offset) +}; + +static inline bool QuantizeFactorHasData(const QuantizeFactor &factor) { + return factor.scale_value.GetSize() > 0 || factor.offset_data_value.GetSize() > 0 || + factor.offset_weight_value.GetSize() > 0 || factor.offset_pad_value.GetSize() > 0; +} + +struct AllOffsetQuantizeInfo { + public: + AllOffsetQuantizeInfo() {} + AllOffsetQuantizeInfo(float s, int32_t o) : scale(s), offset(o) {} + float scale{0}; + int32_t offset{0}; + + DEF_TYPE_DEC(float, scale); + DEF_TYPE_DEC(int32_t, offset); + + GE_SERIALIZABLE(scale, offset) +}; + +struct QuantizeCalcFactor { + public: + Buffer offsetw; + int64_t offsetw_offset{0}; + Buffer offsetd; + int64_t offsetd_offset{0}; + Buffer scalereq; + int64_t scaledreq_offset{0}; + Buffer offsetdnext; + int64_t offsetdnext_offset{0}; + + DEF_TYPE_BYTES_DEC(offsetw); + DEF_TYPE_DEC(int64_t, offsetw_offset); + DEF_TYPE_BYTES_DEC(offsetd); + DEF_TYPE_DEC(int64_t, offsetd_offset); + DEF_TYPE_BYTES_DEC(scalereq); + DEF_TYPE_DEC(int64_t, scaledreq_offset); + DEF_TYPE_BYTES_DEC(offsetdnext); + DEF_TYPE_DEC(int64_t, offsetdnext_offset); + + GE_SERIALIZABLE(offsetw, offsetw_offset, offsetd, offsetd_offset, scalereq, scaledreq_offset, offsetdnext, + offsetdnext_offset); +}; + +static inline bool QuantizeFactorHasData(const QuantizeCalcFactor &factor) { + return factor.offsetw.GetSize() > 0 || factor.offsetd.GetSize() > 0 || factor.scalereq.GetSize() > 0 || + factor.offsetdnext.GetSize() > 0; +} + +struct QuantizeFactorParams { + uint32_t quantize_algo{0}; + uint32_t scale_type{0}; + QuantizeFactor quantize_param; + QuantizeFactor dequantize_param; + QuantizeFactor requantize_param; + QuantizeCalcFactor quantizecalc_param; + DEF_TYPE_DEC(uint32_t, quantize_algo); + DEF_TYPE_DEC(uint32_t, scale_type); + DEF_TYPE_HAS_DEC(QuantizeFactor, quantize_param); + DEF_TYPE_HAS_DEC(QuantizeFactor, dequantize_param); + DEF_TYPE_HAS_DEC(QuantizeFactor, requantize_param); + DEF_TYPE_HAS_DEC(QuantizeCalcFactor, quantizecalc_param); + + GE_SERIALIZABLE(quantize_algo, scale_type, quantize_param, dequantize_param, requantize_param, quantizecalc_param, + has_mutable_quantize_param, has_mutable_dequantize_param, has_mutable_requantize_param, + has_mutable_quantizecalc_param); +}; + +#undef DEF_TYPE_DEC +} // namespace ge + +#endif // INC_GRAPH_DEF_TYPES_H_ diff --git a/inc/graph/detail/any_map.h b/inc/graph/detail/any_map.h new file mode 100644 index 000000000..d66d51d5a --- /dev/null +++ b/inc/graph/detail/any_map.h @@ -0,0 +1,122 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_DETAIL_ANY_MAP_H_ +#define INC_GRAPH_DETAIL_ANY_MAP_H_ + +#include +#include +#include +#include + +namespace ge { +using std::shared_ptr; +using std::string; + +class TypeID { + public: + template + static TypeID Of() { + return TypeID(__PRETTY_FUNCTION__); + } + + ~TypeID() = default; + + bool operator==(const TypeID &__arg) const { return type_ == __arg.type_; } + + private: + explicit TypeID(string type) : type_(std::move(type)) {} // lint !e30 !e32 + + string type_; +}; + +class AnyMap { + public: + template + bool Set(const string &name, const DT &val); + + template + bool Get(const string &name, T &retValue) const; + + bool Has(const string &name) const { return anyValues_.find(name) != anyValues_.end(); } + + void Swap(AnyMap &other) { + anyValues_.swap(other.anyValues_); + } + + private: + class Placeholder { + public: + virtual ~Placeholder() = default; + + virtual const TypeID &GetTypeInfo() const = 0; + }; + + template + class Holder : public Placeholder { + public: + explicit Holder(const VT &value) : value_(value) {} + + ~Holder() override = default; + + const TypeID &GetTypeInfo() const override { + static const TypeID typeId = TypeID::Of(); + return typeId; + } + + const VT value_; + }; + + std::map> anyValues_; +}; + +template +bool AnyMap::Set(const string &name, const DT &val) { + auto it = anyValues_.find(name); + + std::shared_ptr> tmp; + try { + tmp = std::make_shared>(val); + } catch (std::bad_alloc &e) { + tmp = nullptr; + } catch (...) { + tmp = nullptr; + } + + if (it == anyValues_.end()) { + (void)anyValues_.emplace(name, tmp); + } else { + if (it->second && it->second->GetTypeInfo() == TypeID::Of
()) { + it->second = tmp; + } else { + return false; + } + } + return true; +} + +template +bool AnyMap::Get(const string &name, T &retValue) const { + auto it = anyValues_.find(name); + if (it != anyValues_.end() && it->second && it->second->GetTypeInfo() == TypeID::Of()) { + auto retPtr = std::static_pointer_cast>(it->second); + retValue = retPtr->value_; + return true; + } + return false; +} +} // namespace ge +#endif // INC_GRAPH_DETAIL_ANY_MAP_H_ diff --git a/inc/graph/detail/attributes_holder.h b/inc/graph/detail/attributes_holder.h new file mode 100644 index 000000000..0273ce993 --- /dev/null +++ b/inc/graph/detail/attributes_holder.h @@ -0,0 +1,165 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_DETAIL_ATTRIBUTES_HOLDER_H_ +#define INC_GRAPH_DETAIL_ATTRIBUTES_HOLDER_H_ + +#include +#include +#include +#include +#include +#include +#include "graph/detail/any_map.h" +#include "graph/ge_error_codes.h" +#include "graph/types.h" + +namespace google { +namespace protobuf { +class Message; +template +class Map; +} // namespace protobuf +} // namespace google + +namespace ge { +using std::string; +class GeAttrValue; + +namespace proto { +class AttrDef; +class TensorDef; +class TensorDescriptor; +class ShapeDef; +class NamedAttrs; +class ModelDef; +class OpDef; +class GraphDef; +} // namespace proto + +using ProtoAttrMap = ::google::protobuf::Map<::std::string, ::ge::proto::AttrDef>; // lint !e1073 +using ProtoMsgOwner = std::shared_ptr<::google::protobuf::Message>; + +template +class GeIrProtoHelper { + public: + GeIrProtoHelper(const ProtoMsgOwner &protoOwner, ProtoType *protoMsg) + : protoOwner_(protoOwner), protoMsg_(protoMsg) {} + + GeIrProtoHelper() { + protoOwner_ = std::shared_ptr<::google::protobuf::Message>(nullptr); + protoMsg_ = nullptr; + } + virtual ~GeIrProtoHelper() = default; + + template + GeIrProtoHelper(const GeIrProtoHelper &other) { + protoOwner_ = other.protoOwner_; + protoMsg_ = other.protoMsg_; + } + template + GeIrProtoHelper &operator=(const GeIrProtoHelper &other) { + protoOwner_ = other.protoOnwer_; + protoMsg_ = other.protoMsg_; + return *this; + } + void InitDefault(); + template + bool operator==(const GeIrProtoHelper &other) const { + return protoOwner_ == other.protoOwner_ && protoMsg_ == other.protoMsg_; + } + + inline const ProtoMsgOwner &GetProtoOwner() const { return protoOwner_; } + inline ProtoType *GetProtoMsg() const { return protoMsg_; } + void CopyValueFrom(const GeIrProtoHelper &other) { + if (other.protoMsg_ != nullptr && protoMsg_ != nullptr) { + *protoMsg_ = *other.protoMsg_; + } + } + void MoveValueFrom(GeIrProtoHelper &&other) { + if (other.protoMsg_ != nullptr && protoMsg_ != nullptr) { + *protoMsg_ = std::move(*other.protoMsg_); + } + } + + void Swap(GeIrProtoHelper &other) { + protoOwner_.swap(other.protoOwner_); + + ProtoType *temp = protoMsg_; + protoMsg_ = other.protoMsg_; + other.protoMsg_ = temp; + } + + // protoMsg_ is part of protoOwner_, they have the same runtime + ProtoMsgOwner protoOwner_ = nullptr; + ProtoType *protoMsg_ = nullptr; + friend class GeIrProtoHelper::value, typename std::remove_const::type, const ProtoType>::type>; +}; + +using ProtoAttrMapHelper = GeIrProtoHelper; +using ConstProtoAttrMapHelper = GeIrProtoHelper; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY AttrHolder { + public: + AttrHolder() = default; + virtual ~AttrHolder() = default; + + graphStatus SetAttr(const string &name, const GeAttrValue &value); + + graphStatus GetAttr(const string &name, GeAttrValue &value) const; + + bool HasAttr(const string &name) const; + + graphStatus DelAttr(const string &name); + + void CopyAttrsFrom(const AttrHolder &holder); + + void Swap(AttrHolder &holder) { + requiredAttrs_.swap(holder.requiredAttrs_); + extAttrs_.Swap(holder.extAttrs_); + } + + template + bool SetExtAttr(const string &name, const T &value) { + return extAttrs_.Set(name, value); + } + template + T TryGetExtAttr(const string &name, T defaultValue) const { + T ret(defaultValue); + (void)extAttrs_.Get(name, ret); + return ret; + } + + protected: + graphStatus AddRequiredAttr(const std::string &name); + const std::unordered_set GetAllAttrNames() const; + const std::map GetAllAttrs() const; // lint !e1073 + + virtual ProtoAttrMapHelper MutableAttrMap() = 0; + virtual ConstProtoAttrMapHelper GetAttrMap() const = 0; + + friend class ModelSerializeImp; + friend class AttrUtils; + friend class AttrUtilsHelper; + + std::vector requiredAttrs_; + + private: + AnyMap extAttrs_; +}; +} // namespace ge +#endif // INC_GRAPH_DETAIL_ATTRIBUTES_HOLDER_H_ diff --git a/inc/graph/detail/model_serialize_imp.h b/inc/graph/detail/model_serialize_imp.h new file mode 100644 index 000000000..bc79c4c27 --- /dev/null +++ b/inc/graph/detail/model_serialize_imp.h @@ -0,0 +1,93 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_DETAIL_MODEL_SERIALIZE_IMP_H_ +#define INC_GRAPH_DETAIL_MODEL_SERIALIZE_IMP_H_ + +#include +#include +#include +#include +#include "graph/anchor.h" +#include "graph/detail/attributes_holder.h" +#include "graph/ge_tensor.h" +#include "graph/graph.h" +#include "graph/node.h" + +namespace ge { +using ComputeGraphPtr = std::shared_ptr; + +struct NodeNameGraphReq { + string node_name; + int32_t index; + ComputeGraphPtr graph; +}; + +struct NodeNameNodeReq { + string src_node_name; + int32_t src_out_index; + NodePtr dst_node; + int32_t dst_in_index; + string dst_node_name; +}; + +class ModelSerializeImp { + public: + bool SerializeModel(const Model &model, proto::ModelDef *modeProto, bool is_dump = false); + + bool SerializeGraph(const ConstComputeGraphPtr &graph, proto::GraphDef *graphProto, bool is_dump = false); + + bool SerializeEdge(const NodePtr &node, proto::OpDef *opDefProto); + + bool SerializeOpDesc(const ConstOpDescPtr &node, proto::OpDef *opDefProto, bool is_dump = false); + + bool SerializeNode(const NodePtr &node, proto::OpDef *opDefProto, bool is_dump = false); + + bool SerializeTensor(const ConstGeTensorPtr &tensor, proto::TensorDef *tensorProto); + + bool UnserializeModel(Model &model, proto::ModelDef &modeProto); + + bool UnserializeGraphWithoutEdge(ComputeGraphPtr &graph, proto::GraphDef &graphProto); + + bool UnserializeGraph(ComputeGraphPtr &graph, proto::GraphDef &graphProto); + + bool HandleNodeNameRef(); + + bool UnserializeOpDesc(OpDescPtr &opDesc, proto::OpDef &opDefProto); + void AttrDefToOpDesc(OpDescPtr &op_desc, std::vector &key_in, std::vector &key_out, + std::vector &value_in, std::vector &value_out, std::vector &opt); + void OpDescToAttrDef(const ConstOpDescPtr &op_desc, proto::OpDef *op_def_proto); + + bool UnserializeNode(ComputeGraphPtr &graph, proto::OpDef &opDefProto); + + bool UnserializeTensor(GeTensorPtr &tensor, proto::TensorDef &tensorProto); + + bool ParseNodeIndex(const string &node_index, string &nodeName, int32_t &index); + + void SetProtobufOwner(const ProtoMsgOwner &bufferProtobufOnwer) { protobuf_owner_ = bufferProtobufOnwer; } + + private: + bool RebuildOwnership(ComputeGraphPtr &compute_graph, std::map &subgraphs); + + std::vector graph_input_node_names_; + std::vector graph_output_node_names_; + std::vector node_input_node_names_; + std::map node_map_; + ProtoMsgOwner protobuf_owner_; +}; +} // namespace ge + +#endif // INC_GRAPH_DETAIL_MODEL_SERIALIZE_IMP_H_ diff --git a/inc/graph/ge_attr_value.h b/inc/graph/ge_attr_value.h new file mode 100644 index 000000000..c96cf5911 --- /dev/null +++ b/inc/graph/ge_attr_value.h @@ -0,0 +1,344 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_GE_ATTR_VALUE_H_ +#define INC_GRAPH_GE_ATTR_VALUE_H_ + +#include +#include +#include +#include +#include +#include +#include "graph/buffer.h" +#include "detail/attributes_holder.h" +#include "graph/ge_error_codes.h" +#include "graph/ge_tensor.h" + +using std::map; +using std::string; +using std::vector; + +namespace ge { +class GeTensor; + +using GeTensorPtr = std::shared_ptr; +using ConstGeTensorPtr = std::shared_ptr; + +class ComputeGraph; +using ComputeGraphPtr = std::shared_ptr; +using ConstComputeGraphPtr = std::shared_ptr; + +class GeTensorDesc; +class GeAttrValue; +class GeAttrValueImp; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NamedAttrs : public AttrHolder { + public: + NamedAttrs(); + virtual ~NamedAttrs() = default; + void SetName(const std::string &name); + string GetName() const; + GeAttrValue GetItem(const string &key) const; + + protected: + ProtoAttrMapHelper MutableAttrMap() override; + ConstProtoAttrMapHelper GetAttrMap() const override; + + private: + // Create namedAttrs from protobuf obj + NamedAttrs(const ProtoMsgOwner &owner, proto::NamedAttrs *protoMsg); + GeIrProtoHelper named_attrs_; + friend class GeAttrValueImp; + friend class GeAttrValue; +}; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeAttrValue { + public: + using INT = int64_t; + using FLOAT = float; + using BOOL = bool; + using STR = std::string; + using TENSOR = GeTensorPtr; + using TENSOR_DESC = GeTensorDesc; + using GRAPH = ComputeGraphPtr; + using BYTES = Buffer; + using NAMED_ATTRS = ge::NamedAttrs; + using DATA_TYPE = ge::DataType; + + using LIST_INT = vector; + using LIST_FLOAT = vector; + using LIST_BOOL = vector; + using LIST_STR = vector; + using LIST_TENSOR = vector; + using LIST_TENSOR_DESC = vector; + using LIST_GRAPH = vector; + using LIST_BYTES = vector; + using LIST_NAMED_ATTRS = vector; + using LIST_LIST_INT = vector>; + using LIST_DATA_TYPE = vector; + + using NamedAttrs = ge::NamedAttrs; // for cce use (ge::GeAttrValue::NamedAttrs). + + enum ValueType { + VT_NONE = 0, + VT_STRING, + VT_FLOAT, + VT_BOOL, + VT_INT, + VT_TENSOR_DESC, + VT_TENSOR, + VT_BYTES, + VT_GRAPH, + VT_NAMED_ATTRS, + VT_LIST_LIST_INT, + VT_DATA_TYPE, + + VT_LIST_BASE = 1000, + VT_LIST_STRING = VT_LIST_BASE + VT_STRING, + VT_LIST_FLOAT = VT_LIST_BASE + VT_FLOAT, + VT_LIST_BOOL = VT_LIST_BASE + VT_BOOL, + VT_LIST_INT = VT_LIST_BASE + VT_INT, + VT_LIST_TENSOR_DESC = VT_LIST_BASE + VT_TENSOR_DESC, + VT_LIST_TENSOR = VT_LIST_BASE + VT_TENSOR, + VT_LIST_BYTES = VT_LIST_BASE + VT_BYTES, + VT_LIST_GRAPH = VT_LIST_BASE + VT_GRAPH, + VT_LIST_NAMED_ATTRS = VT_LIST_BASE + VT_NAMED_ATTRS, + VT_LIST_DATA_TYPE = VT_LIST_BASE + VT_DATA_TYPE, + }; + + template + struct IsAttrTypeEnable { + using DT = typename std::remove_cv::type; + + static bool const VALUE = std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value; + + // Not has list type of NamedAttrs + static bool const LIST_VALUE = std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value || + std::is_same::value || + std::is_same::value || std::is_same::value; + }; + + template + // To cols + using enable_if_vector_type_valid_t = typename std::enable_if::LIST_VALUE, + int>::type; + + template + using enable_if_one_type_valid_t = typename std::enable_if::VALUE, int>::type; + + template + using enable_if_type_valid_t = + typename std::enable_if::VALUE || IsAttrTypeEnable::LIST_VALUE, int>::type; + + template + using enable_if_seriliable_type_valid_t = typename seriliable_type::__ge_serializable; + + GeAttrValue(); + ~GeAttrValue() = default; + // SetValue, Set initializer_list + template = 0> + graphStatus SetValue(std::initializer_list
&&val) { + T vectorVal; + for (auto &item : val) { + vectorVal.push_back(item); + } + return SetValue(vectorVal); + } + + // SetValue, Set vector + template = 0> + graphStatus SetValue(const std::vector
&val) { + T vectorVal; + for (auto item : val) { + vectorVal.push_back(item); + } + return SetValue(vectorVal); + } + + // SetValue, not list type + template = 0> + graphStatus SetValue(DT &&val) { + return SetValue(T(std::forward
(val))); + } + + // GE_SERIALIZABLE + template = 0> + graphStatus SetValue(const T &t) { + return t.Save(*this); + } + + template = 0> + graphStatus SetValue(const vector &t) { + vector attrs; + for (auto &item : t) { + GeAttrValue val; + item.Save(val); + NamedAttrs attrsItem; + (void)val.GetValue(attrsItem); + attrs.push_back(attrsItem); + } + return SetValue(attrs); + } + + // GetValue, list value + template = 0, + typename std::enable_if::value, int>::type = 0> + graphStatus GetValue(std::vector
&val) const { + T valGet; + val.clear(); + auto status = GetValue(valGet); + if (status != GRAPH_SUCCESS) { + return status; + } + for (auto item : valGet) { + val.push_back(item); + } + return GRAPH_SUCCESS; + } + + // GetValue, not list type + template = 0, + typename std::enable_if::value, int>::type = 0> + graphStatus GetValue(DT &val) const { + T valGet; + auto status = GetValue(valGet); + if (status != GRAPH_SUCCESS) { + return status; + } + val = DT(valGet); + return GRAPH_SUCCESS; + } + + // GE_SERIALIZABLE + template = 0> + graphStatus GetValue(T &t) { + return t.Load(*this); + } + + template = 0> + graphStatus GetValue(vector &t) { + graphStatus status; + t.clear(); + vector attrs; + status = this->GetValue(attrs); + if (status != GRAPH_SUCCESS) { + return status; + } + for (auto &attr : attrs) { + T item; + GeAttrValue val; + (void)val.SetValue(attr); + status = item.Load(val); + if (status != GRAPH_SUCCESS) { + return status; + } + t.push_back(item); + } + return GRAPH_SUCCESS; + } + + template = 0> + static GeAttrValue CreateFrom(DT &&val) { + GeAttrValue valRet; + (void)valRet.SetValue(std::forward
(val)); + return valRet; + } + + template = 0> + static GeAttrValue CreateFrom(std::initializer_list
&&val) { + GeAttrValue valRet; + (void)valRet.SetValue(std::move(val)); + return valRet; + } + + template = 0> + static GeAttrValue CreateFrom(const T &val) { + GeAttrValue valRet; + (void)valRet.SetValue(val); + return valRet; + } + + template = 0> + static GeAttrValue CreateFrom(const vector &val) { + GeAttrValue valRet; + (void)valRet.SetValue(val); + return valRet; + } + + ValueType GetValueType() const; + + bool IsEmpty() const; + + GeAttrValue Copy() const; + + // For map key + bool operator==(const GeAttrValue &other) const { return value_ == other.value_; } + + graphStatus MutableTensor(GeTensorPtr &tensor); + graphStatus MutableListTensor(vector &list_tensor); + + private: +#define VALUE_SET_GET_DEC(DT) \ + graphStatus SetValue(const DT &val); \ + graphStatus GetValue(DT &val) const; + VALUE_SET_GET_DEC(GeAttrValue::STR) + VALUE_SET_GET_DEC(GeAttrValue::INT) + VALUE_SET_GET_DEC(GeAttrValue::FLOAT) + VALUE_SET_GET_DEC(GeAttrValue::BOOL) + VALUE_SET_GET_DEC(GeTensorDesc) + VALUE_SET_GET_DEC(GeAttrValue::TENSOR) + VALUE_SET_GET_DEC(GeAttrValue::GRAPH) + VALUE_SET_GET_DEC(BYTES) + VALUE_SET_GET_DEC(NamedAttrs) + VALUE_SET_GET_DEC(ge::DataType) // lint !e665 + VALUE_SET_GET_DEC(vector) + VALUE_SET_GET_DEC(vector) + VALUE_SET_GET_DEC(vector) + VALUE_SET_GET_DEC(vector) + VALUE_SET_GET_DEC(vector) + VALUE_SET_GET_DEC(vector) + VALUE_SET_GET_DEC(vector) + VALUE_SET_GET_DEC(vector) + VALUE_SET_GET_DEC(vector) + VALUE_SET_GET_DEC(vector>) //lint !e665 + VALUE_SET_GET_DEC(vector) //lint !e665 +#undef VALUE_SET_GET_DEC + + GeIrProtoHelper value_; + GeAttrValue(const ProtoMsgOwner &proto_owner, ge::proto::AttrDef *val); + + friend class AttrHolder; + friend class ModelSerializeImp; + friend class OnnxUtils; +}; + +class AttrValueImpl { + public: + AttrValueImpl() = default; + ~AttrValueImpl() = default; + + GeAttrValue geAttrValue_; +}; +} // namespace ge +#endif // INC_GRAPH_GE_ATTR_VALUE_H_ diff --git a/inc/graph/ge_context.h b/inc/graph/ge_context.h new file mode 100644 index 000000000..ef05bd002 --- /dev/null +++ b/inc/graph/ge_context.h @@ -0,0 +1,44 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef INC_GRAPH_GE_CONTEXT_H_ +#define INC_GRAPH_GE_CONTEXT_H_ + +#include +#include "graph/ge_error_codes.h" + +namespace ge { +class GEContext { + public: + graphStatus GetOption(const std::string &key, std::string &option); + bool GetHostExecFlag(); + uint64_t SessionId(); + uint32_t DeviceId(); + uint64_t TraceId(); + void Init(); + void SetSessionId(uint64_t session_id); + void SetCtxDeviceId(uint32_t device_id); + private: + uint64_t session_id_ = 0; + uint32_t device_id_ = 0; + uint64_t trace_id_ = 0; +}; // class GEContext + +/// Get context +/// @return +GEContext &GetContext(); +} // namespace ge + +#endif // INC_GRAPH_GE_CONTEXT_H_ diff --git a/inc/graph/ge_global_options.h b/inc/graph/ge_global_options.h new file mode 100644 index 000000000..0abf391ef --- /dev/null +++ b/inc/graph/ge_global_options.h @@ -0,0 +1,25 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef INC_GRAPH_GE_GLOBAL_OPTIONS_H_ +#define INC_GRAPH_GE_GLOBAL_OPTIONS_H_ + +#include +#include + +namespace ge { +std::map &GetMutableGlobalOptions(); +} +#endif // INC_GRAPH_GE_GLOBAL_OPTIONS_H_ diff --git a/inc/graph/ge_local_context.h b/inc/graph/ge_local_context.h new file mode 100644 index 000000000..36beaa798 --- /dev/null +++ b/inc/graph/ge_local_context.h @@ -0,0 +1,43 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef INC_GRAPH_GE_LOCAL_CONTEXT_H_ +#define INC_GRAPH_GE_LOCAL_CONTEXT_H_ + +#include +#include +#include +#include "graph/ge_error_codes.h" + +using std::string; +using std::map; + +namespace ge { +class GEThreadLocalContext { + public: + graphStatus GetOption(const string &key, string &option); + void SetGraphOption(map options_map); + void SetSessionOption(map options_map); + void SetGlobalOption(map options_map); + + private: + map graph_options_; + map session_options_; + map global_options_; +}; // class GEThreadLocalContext + +GEThreadLocalContext &GetThreadLocalContext(); +} // namespace ge +#endif // INC_GRAPH_GE_LOCAL_CONTEXT_H_ diff --git a/inc/graph/ge_tensor.h b/inc/graph/ge_tensor.h new file mode 100644 index 000000000..2e8cffc7a --- /dev/null +++ b/inc/graph/ge_tensor.h @@ -0,0 +1,193 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_GE_TENSOR_H_ +#define INC_GRAPH_GE_TENSOR_H_ + +#include +#include +#include +#include +#include "detail/attributes_holder.h" +#include "graph/buffer.h" +#include "graph/ge_error_codes.h" +#include "graph/types.h" + +namespace ge { +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeShape { + public: + GeShape(); + ~GeShape() = default; + explicit GeShape(std::vector s); + + size_t GetDimNum() const; + // If the idx is invalid, return 0 + int64_t GetDim(size_t idx) const; + graphStatus SetDim(size_t idx, int64_t value); + std::vector GetDims() const; + + int64_t GetShapeSize() const; + std::string ToString() const; + + /// + /// @brief Check is unknown shape + /// @return bool + /// + bool IsUnknownShape() const; + + /// + /// @brief Check is a scalar + /// @return bool + /// + bool IsScalar() const; + + GeShape(const GeShape &other); + GeShape(GeShape &&other); + GeShape &operator=(const GeShape &other); + GeShape &operator=(GeShape &&other); + + private: + GeIrProtoHelper shape_def_; + friend class GeTensorDesc; + // Create from proto obj + GeShape(const ProtoMsgOwner &protoOnwer, proto::ShapeDef *protoMsg); + + void RefTo(const GeShape &shape) { shape_def_ = shape.shape_def_; } +}; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeTensorDesc : public AttrHolder { + friend class TensorUtils; + friend class GeAttrValue; + friend class ModelSerialize; + + public: + GeTensorDesc(); + explicit GeTensorDesc(GeShape shape, Format format = FORMAT_ND, DataType dt = DT_FLOAT); + GeTensorDesc(const GeTensorDesc &desc); + GeTensorDesc(GeTensorDesc &&desc); + + ~GeTensorDesc() = default; + bool operator==(const GeTensorDesc &r_ge_tensor_desc) const; + + void Update(GeShape shape, Format format = FORMAT_ND, DataType dt = DT_FLOAT); + + GeShape GetShape() const; + GeShape &MutableShape(); + void SetShape(GeShape shape); + + // set shape with -2, it stand for unknown shape + void SetUnknownDimNumShape(); + // for unknown shape + graphStatus SetShapeRange(const std::vector> &range); + graphStatus GetShapeRange(std::vector> &range) const; + + GeShape GetOriginShape() const; + void SetOriginShape(const GeShape &originShape); + + Format GetFormat() const; + void SetFormat(Format format); + + Format GetOriginFormat() const; + void SetOriginFormat(Format originFormat); + + void SetName(const std::string &name); + const std::string GetName() const; + + DataType GetDataType() const; + void SetDataType(DataType dt); + + DataType GetOriginDataType() const; + void SetOriginDataType(DataType originDataType); + + std::vector GetRefPortIndex() const; + void SetRefPortByIndex(const std::vector &index); + + GeTensorDesc Clone() const; + GeTensorDesc &operator=(const GeTensorDesc &desc); + GeTensorDesc &operator=(GeTensorDesc &&desc); + + graphStatus IsValid() const; + + protected: + ProtoAttrMapHelper MutableAttrMap() override; + ConstProtoAttrMapHelper GetAttrMap() const override; + + private: + bool GeTensorDescAttrsAreEqual(const GeTensorDesc &r_ge_tensor_desc) const; + using AttrHolder::DelAttr; + using AttrHolder::GetAllAttrs; + using AttrHolder::GetAttr; + using AttrHolder::HasAttr; + using AttrHolder::SetAttr; + + void Init(); + + // Create from proto obj + GeTensorDesc(const ProtoMsgOwner &protoOnwer, proto::TensorDescriptor *protoMsg); + friend class GeTensor; + friend class GeAttrValueImp; + friend class ModelSerializeImp; + friend class OnnxUtils; + + GeIrProtoHelper tensor_descriptor_; + // Reference from tensorDescriptor_, do not direct use + mutable GeShape __shape_; + + void RefTo(const GeTensorDesc &tensorDesc) { tensor_descriptor_ = tensorDesc.tensor_descriptor_; } + GeShape &ShapeReference() const; +}; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeTensor { + public: + GeTensor(); + explicit GeTensor(const GeTensorDesc &tensorDesc); + explicit GeTensor(const GeTensorDesc &tensorDesc, const std::vector &data); + explicit GeTensor(const GeTensorDesc &tensorDesc, const Buffer &data); + explicit GeTensor(const GeTensorDesc &tensorDesc, const uint8_t *data, size_t size); + explicit GeTensor(GeTensorDesc &&tensorDesc, std::vector &&data); + ~GeTensor() = default; + + GeTensorDesc GetTensorDesc() const; + GeTensorDesc &MutableTensorDesc(); + void SetTensorDesc(const GeTensorDesc &tensorDesc); + + const Buffer GetData() const; + Buffer MutableData(); + graphStatus SetData(std::vector &&data); + graphStatus SetData(const std::vector &data); + graphStatus SetData(const Buffer &data); + graphStatus SetData(const uint8_t *data, size_t size); + + GeTensor Clone() const; + + // Share value + GeTensor(const GeTensor &other); + // Share value + GeTensor &operator=(const GeTensor &other); + + private: + friend class GeAttrValueImp; + friend class ModelSerializeImp; + friend class OnnxUtils; + // Create from proto obj + GeTensor(const ProtoMsgOwner &protoOnwer, proto::TensorDef *protoMsg); + GeIrProtoHelper tensor_def_; + // Reference from tensorDef_, do not direct use + mutable GeTensorDesc __desc_; + GeTensorDesc &DescReference() const; +}; +} // namespace ge +#endif // INC_GRAPH_GE_TENSOR_H_ diff --git a/inc/graph/graph_util.h b/inc/graph/graph_util.h new file mode 100644 index 000000000..c39ecbc15 --- /dev/null +++ b/inc/graph/graph_util.h @@ -0,0 +1,134 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_GRAPH_UTIL_H_ +#define INC_GRAPH_GRAPH_UTIL_H_ + +#include + +#include "proto/om.pb.h" + +namespace ge { +using AttrDefMap = ::google::protobuf::Map<::std::string, ::domi::AttrDef>; +bool HasOpAttr(const OpDef *opdef, std::string attr_name); +bool GetOpAttr(const std::string &key, int32_t *value, const OpDef *opdef); + +static const char OP_TYPE_DATA[] = "Data"; +static const char OP_TYPE_INPUT[] = "Input"; +static const char ATTR_KEY_INPUT_FORMAT[] = "input_format"; +static const char ATTR_KEY_OUTPUT_FORMAT[] = "output_format"; +static const char OP_TYPE_ANN_DATA[] = "AnnData"; +} // namespace ge + +#if !defined(__ANDROID__) && !defined(ANDROID) +#include "toolchain/slog.h" +const char levelStr[4][8] = {"ERROR", "WARN", "INFO", "DEBUG"}; +#else +#include +#include +const char levelStr[8][8] = {"EMERG", "ALERT", "CRIT", "ERROR", "WARNING", "NOTICE", "INFO", "DEBUG"}; +#endif + +#ifdef _MSC_VER +#define FUNC_NAME __FUNCTION__ +#else +#define FUNC_NAME __PRETTY_FUNCTION__ +#endif + +#if !defined(__ANDROID__) && !defined(ANDROID) +#define D_GRAPH_LOGI(MOD_NAME, fmt, ...) \ + dlog_info(FMK, "%s:%s:%d:" #fmt, __FUNCTION__, __FILE__, __LINE__, ##__VA_ARGS__) +#define D_GRAPH_LOGW(MOD_NAME, fmt, ...) \ + dlog_warn(FMK, "%s:%s:%d:" #fmt, __FUNCTION__, __FILE__, __LINE__, ##__VA_ARGS__) +#define D_GRAPH_LOGE(MOD_NAME, fmt, ...) \ + dlog_error(FMK, "%s:%s:%d:" #fmt, __FUNCTION__, __FILE__, __LINE__, ##__VA_ARGS__) +#else +#define D_GRAPH_LOG(level, format, ...) \ + do { \ + { \ + fprintf(stdout, "[%s] [%s] [%s] [%s] [%s:%d] " format "\n", "", "GRAPH", levelStr[level], __FUNCTION__, \ + __FILE__, __LINE__, ##__VA_ARGS__); \ + syslog(level, "%s %s:%d] [%s] %s " format "\n", "", __FILE__, __LINE__, "OPTIMIZER", __FUNCTION__, \ + ##__VA_ARGS__); \ + } \ + } while (0) +#define D_GRAPH_LOGI(MOD_NAME, fmt, ...) D_GRAPH_LOG(ANDROID_LOG_INFO, #fmt, ##__VA_ARGS__) +#define D_GRAPH_LOGW(MOD_NAME, fmt, ...) D_GRAPH_LOG(ANDROID_LOG_INFO, #fmt, ##__VA_ARGS__) +#define D_GRAPH_LOGE(MOD_NAME, fmt, ...) D_GRAPH_LOG(ANDROID_LOG_INFO, #fmt, ##__VA_ARGS__) +#endif + +#if !defined(__ANDROID__) && !defined(ANDROID) +#define GRAPH_LOGI(...) D_GRAPH_LOGI(GRAPH_MOD_NAME, __VA_ARGS__) +#define GRAPH_LOGW(...) D_GRAPH_LOGW(GRAPH_MOD_NAME, __VA_ARGS__) +#define GRAPH_LOGE(...) D_GRAPH_LOGE(GRAPH_MOD_NAME, __VA_ARGS__) +#else + +#define GRAPH_LOG(level, format, ...) \ + do { \ + { \ + fprintf(stdout, "[%s] [%s] [%s] [%s] [%s:%d] " format "\n", "", "GRAPH", levelStr[level], __FUNCTION__, \ + __FILE__, __LINE__, ##__VA_ARGS__); \ + syslog(level, "%s %s:%d] [%s] %s " format "\n", "", __FILE__, __LINE__, "OPTIMIZER", __FUNCTION__, \ + ##__VA_ARGS__); \ + } \ + } while (0) +#define GRAPH_LOGI(fmt, ...) GRAPH_LOG(ANDROID_LOG_INFO, #fmt, ##__VA_ARGS__) +#define GRAPH_LOGW(fmt, ...) GRAPH_LOG(ANDROID_LOG_INFO, #fmt, ##__VA_ARGS__) +#define GRAPH_LOGE(fmt, ...) GRAPH_LOG(ANDROID_LOG_INFO, #fmt, ##__VA_ARGS__) +#endif + +#define GRAPH_CHK_STATUS_RET_NOLOG(expr) \ + do { \ + const domi::graphStatus _status = (expr); \ + if (_status != domi::GRAPH_SUCCESS) { \ + return _status; \ + } \ + } while (0) + +#define GRAPH_CHK_BOOL_RET_STATUS(expr, _status, ...) \ + do { \ + bool b = (expr); \ + if (!b) { \ + GRAPH_LOGE(__VA_ARGS__); \ + return _status; \ + } \ + } while (0) + +#define GRAPH_CHK_BOOL_EXEC_NOLOG(expr, exec_expr) \ + { \ + bool b = (expr); \ + if (!b) { \ + exec_expr; \ + } \ + }; + +#define GRAPH_IF_BOOL_EXEC(expr, exec_expr) \ + { \ + if (expr) { \ + exec_expr; \ + } \ + } + +#define GRAPH_RETURN_WITH_LOG_IF_ERROR(expr, ...) \ + do { \ + const ::domi::graphStatus _status = (expr); \ + if (_status) { \ + GRAPH_LOGE(__VA_ARGS__); \ + return _status; \ + } \ + } while (0) + +#endif // INC_GRAPH_GRAPH_UTIL_H_ diff --git a/inc/graph/model.h b/inc/graph/model.h new file mode 100644 index 000000000..9beb55787 --- /dev/null +++ b/inc/graph/model.h @@ -0,0 +1,94 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_MODEL_H_ +#define INC_GRAPH_MODEL_H_ + +#include +#include +#include +#include +#include "detail/attributes_holder.h" +#include "graph/ge_attr_value.h" +#include "graph/graph.h" + +namespace ge { +using std::map; +using std::string; +using std::vector; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Model : public AttrHolder { + public: + Model(); + + ~Model() = default; + + Model(const string &name, const string &custom_version); + + string GetName() const; + void SetName(const string &name); + + uint32_t GetVersion() const; + + void SetVersion(uint32_t version) { version_ = version; } + + std::string GetPlatformVersion() const; + + void SetPlatformVersion(string version) { platform_version_ = version; } + + Graph GetGraph() const; + + void SetGraph(const Graph &graph); + + void SetAttr(const ProtoAttrMapHelper &attrs); + + using AttrHolder::GetAllAttrNames; + using AttrHolder::GetAllAttrs; + using AttrHolder::GetAttr; + using AttrHolder::HasAttr; + using AttrHolder::SetAttr; + + graphStatus Save(Buffer &buffer, bool is_dump = false) const; + + graphStatus SaveToFile(const string& file_name) const; + // Model will be rewrite + static graphStatus Load(const uint8_t *data, size_t len, Model &model); + graphStatus Load(ge::proto::ModelDef &model_def); + graphStatus LoadFromFile(const string& file_name); + + bool IsValid() const; + + protected: + ConstProtoAttrMapHelper GetAttrMap() const override; + ProtoAttrMapHelper MutableAttrMap() override; + + private: + void Init(); + ProtoAttrMapHelper attrs_; + friend class ModelSerializeImp; + friend class GraphDebugImp; + friend class OnnxUtils; + friend class ModelHelper; + friend class ModelBuilder; + string name_; + uint32_t version_; + std::string platform_version_{""}; + Graph graph_; +}; +} // namespace ge +using ModelPtr = std::shared_ptr; + +#endif // INC_GRAPH_MODEL_H_ diff --git a/inc/graph/model_serialize.h b/inc/graph/model_serialize.h new file mode 100644 index 000000000..a23039c95 --- /dev/null +++ b/inc/graph/model_serialize.h @@ -0,0 +1,52 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_MODEL_SERIALIZE_H_ +#define INC_GRAPH_MODEL_SERIALIZE_H_ + +#include +#include +#include "graph/buffer.h" +#include "graph/compute_graph.h" +#include "graph/model.h" + +namespace ge { +class ModelSerialize { + public: + Buffer SerializeModel(const Model &model, bool is_dump = false); + + Model UnserializeModel(const uint8_t *data, size_t len); + Model UnserializeModel(ge::proto::ModelDef &model_def); + + Buffer SerializeGraph(const ComputeGraphPtr &graph); + + ComputeGraphPtr UnserializeGraph(const uint8_t *data, size_t len); + + Buffer SerializeOpDesc(const ConstOpDescPtr &opDesc); + OpDescPtr UnserializeOpDesc(const uint8_t *data, size_t len); + + size_t GetSerializeModelSize(const Model &model); + + private: + static std::map &MutableTensorDescAttrMap(GeTensorDesc &tensorDesc); + + static const std::map &GetTensorDescAttrMap(const GeTensorDesc &tensorDesc); + + friend class ModelSerializeImp; + friend class GraphDebugImp; +}; +} // namespace ge +#endif // INC_GRAPH_MODEL_SERIALIZE_H_ diff --git a/inc/graph/node.h b/inc/graph/node.h new file mode 100644 index 000000000..988ba5294 --- /dev/null +++ b/inc/graph/node.h @@ -0,0 +1,208 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_NODE_H_ +#define INC_GRAPH_NODE_H_ + +#include +#include +#include +#include +#include +#include +#include "graph/ge_attr_value.h" +#include "utils/attr_utils.h" + +#include "graph/op_desc.h" +#include "graph/range_vistor.h" + +namespace ge { +class ComputeGraph; + +using ComputeGraphPtr = std::shared_ptr; + +class Node; + +using NodePtr = std::shared_ptr; +using ConstNodePtr = std::shared_ptr; +using NodeRef = std::weak_ptr; + +class Anchor; + +using AnchorPtr = std::shared_ptr; + +class InDataAnchor; + +using InDataAnchorPtr = std::shared_ptr; + +class OutDataAnchor; + +using OutDataAnchorPtr = std::shared_ptr; + +class ControlAnchor; + +using ControlAnchorPtr = std::shared_ptr; + +class InControlAnchor; + +using InControlAnchorPtr = std::shared_ptr; + +class OutControlAnchor; + +using OutControlAnchorPtr = std::shared_ptr; + +using OpDescPtr = std::shared_ptr; + +using ConstNode = const Node; + +typedef std::vector> kFusionDataFlowVec_t; + +// Node is a component of ComputeGraph +class Node : public std::enable_shared_from_this { + friend class ComputeGraph; + friend class ModelSerializeImp; + + public: + template + using Vistor = RangeVistor>; + ~Node(); + Node(const Node &) = delete; + Node &operator=(const Node &) = delete; + bool operator==(const Node &r_node) const; + + protected: + Node() = default; + Node(const OpDescPtr &op, const ComputeGraphPtr &ownerGraph); + + public: + graphStatus Init(); + + std::string GetName() const; + std::string GetType() const; + + ComputeGraphPtr GetOwnerComputeGraph() const; + graphStatus SetOwnerComputeGraph(const ComputeGraphPtr &graph); + + Vistor GetAllInDataAnchors() const; + Vistor GetAllOutDataAnchors() const; + uint32_t GetAllInDataAnchorsSize() const; + uint32_t GetAllOutDataAnchorsSize() const; + Vistor GetAllOutAnchors() const; + Vistor GetAllInAnchors() const; + InDataAnchorPtr GetInDataAnchor(int idx) const; + OutDataAnchorPtr GetOutDataAnchor(int idx) const; + InControlAnchorPtr GetInControlAnchor() const; + OutControlAnchorPtr GetOutControlAnchor() const; + Vistor GetInNodes() const; + Vistor GetOutNodes() const; + AnchorPtr GetInAnchor(int idx) const; + AnchorPtr GetOutAnchor(int idx) const; + + bool IsAllInNodesSeen(std::unordered_set &nodes_seen) const; + + // All in Data nodes + Vistor GetInDataNodes() const; + // All in Control nodes + Vistor GetInControlNodes() const; + // GetInAllNodes = InDataNodes + InControlNodes + Vistor GetInAllNodes() const; + + // All out Data nodes + Vistor GetOutDataNodes() const; + uint32_t GetOutDataNodesSize() const; + // All out Control nodes + Vistor GetOutControlNodes() const; + // GetOutAllNodes = OutDataNodes + InControlNodes + Vistor GetOutAllNodes() const; + + // Get all in data nodes and its out-anchor + Vistor> GetInDataNodesAndAnchors() const; + + // Get all out data nodes and its in-anchor + Vistor> GetOutDataNodesAndAnchors() const; + + graphStatus InferShapeAndType() const; + graphStatus Verify() const; + + graphStatus InferOriginFormat() const; + + OpDescPtr GetOpDesc() const; + + graphStatus UpdateOpDesc(const OpDescPtr &op); + + graphStatus AddLinkFrom(const NodePtr &input_node); + + graphStatus AddLinkFrom(const uint32_t &index, NodePtr input_node); + + graphStatus AddLinkFrom(const string &name, NodePtr input_node); + + graphStatus AddLinkFromForParse(const NodePtr &input_node); + + void AddSendEventId(uint32_t event_id) { send_event_id_list_.push_back(event_id); } + + void AddRecvEventId(uint32_t event_id) { recv_event_id_list_.push_back(event_id); } + + const std::vector &GetSendEventIdList() const { return send_event_id_list_; } + + const std::vector &GetRecvEventIdList() const { return recv_event_id_list_; } + void GetFusionInputFlowList(kFusionDataFlowVec_t &fusion_input_list) { + fusion_input_list = fusion_input_dataflow_list_; + } + + void GetFusionOutputFlowList(kFusionDataFlowVec_t &fusion_output_list) { + fusion_output_list = fusion_output_dataflow_list_; + } + + void SetFusionInputFlowList(kFusionDataFlowVec_t &fusion_input_list) { + fusion_input_dataflow_list_ = fusion_input_list; + } + + void SetFusionOutputFlowList(kFusionDataFlowVec_t &fusion_output_list) { + fusion_output_dataflow_list_ = fusion_output_list; + } + + void SetOrigNode(const NodePtr &orignode) { orig_node_ = orignode; } + + NodePtr GetOrigNode() { return orig_node_; } + + private: + bool NodeMembersAreEqual(const Node &r_node) const; + bool NodeAttrsAreEqual(const Node &r_node) const; + bool NodeInConnectsAreEqual(const Node &r_node) const; + bool NodeOutConnectsAreEqual(const Node &r_node) const; + bool NodeAnchorIsEqual(const AnchorPtr &l_anchor, const AnchorPtr &r_anchor, size_t i) const; + OpDescPtr op_; + std::weak_ptr owner_graph_; + vector in_data_anchors_; + vector out_data_anchors_; + InControlAnchorPtr in_control_anchor_; + OutControlAnchorPtr out_control_anchor_; + map attrs_; // lint !e1073 + bool has_init_{false}; + bool anchor_status_updated_{false}; + std::vector send_event_id_list_; + std::vector recv_event_id_list_; + + kFusionDataFlowVec_t fusion_input_dataflow_list_; + kFusionDataFlowVec_t fusion_output_dataflow_list_; + + NodePtr orig_node_; + friend class NodeUtils; + friend class OnnxUtils; +}; +} // namespace ge + +#endif // INC_GRAPH_NODE_H_ diff --git a/inc/graph/op_desc.h b/inc/graph/op_desc.h new file mode 100644 index 000000000..e29d83940 --- /dev/null +++ b/inc/graph/op_desc.h @@ -0,0 +1,322 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_OP_DESC_H_ +#define INC_GRAPH_OP_DESC_H_ + +#include +#include +#include +#include +#include +#include +#include "detail/attributes_holder.h" +#include "graph/range_vistor.h" + +#define DYNAMIN_INPUT_NAME(name, index) (((name)) + std::to_string((index))) +#define DYNAMIN_OUTPUT_NAME(name, index) (((name)) + std::to_string((index))) +namespace ge { +using std::map; +using std::pair; +using std::shared_ptr; +using std::string; +using std::vector; + +class Operator; +class GeTensorDesc; + +using GeTensorDescPtr = shared_ptr; +using ConstGeTensorDescPtr = shared_ptr; + +class OpDesc; + +using OpDescPtr = shared_ptr; +using ConstOpDescPtr = shared_ptr; + +class GeAttrValue; + +using ConstOpDesc = const OpDesc; + +enum SubgraphType { + kStatic, + kDynamic, + kSubgraphTypeEnd +}; + +class OpDesc : public std::enable_shared_from_this, public AttrHolder { + public: + template + using Vistor = RangeVistor>; + + friend class GraphBuilderImpl; + + friend class OperatorImpl; + + OpDesc(const string &name, const string &type); + + OpDesc(); + + ~OpDesc(); + + bool operator==(const OpDesc &r_op_desc) const; + + string GetName() const; + + void SetName(const string &name); + + string GetType() const; + + void SetType(const string &type); + + graphStatus AddInputDesc(const GeTensorDesc &input_desc); + + graphStatus AddInputDesc(const string &name, const GeTensorDesc &input_desc); + + graphStatus AddInputDesc(uint32_t index, const ge::GeTensorDesc &input_desc); + + graphStatus AddInputDescForward(const string &name, const unsigned int num); + + graphStatus AddInputDescMiddle(const string &name, const unsigned int num, size_t index); + + graphStatus AddOutputDescForward(const string &name, const unsigned int num); + + graphStatus AddOptionalInputDesc(const string &name, const GeTensorDesc &input_desc); + + graphStatus UpdateInputDesc(uint32_t index, const GeTensorDesc &tensor_desc); + + graphStatus UpdateInputDesc(const string &name, const GeTensorDesc &tensor_desc); + + bool InputIsSet(const string &name) const; + + GeTensorDesc GetInputDesc(uint32_t index) const; + + GeTensorDesc GetInputDesc(const string &name) const; + + Vistor GetAllInputNames() const; + + GeTensorDescPtr MutableInputDesc(uint32_t index) const; + + GeTensorDescPtr MutableInputDesc(const string &name) const; + + Vistor GetAllInputsDesc() const; + + Vistor GetAllInputsDescPtr() const; + + size_t GetInputsSize() const; + + size_t GetAllInputsSize() const; + + graphStatus AddOutputDesc(const GeTensorDesc &output_desc); + + graphStatus AddOutputDesc(const string &name, const GeTensorDesc &output_desc); + + graphStatus UpdateOutputDesc(uint32_t index, const GeTensorDesc &tensor_desc); + + graphStatus UpdateOutputDesc(const string &name, const GeTensorDesc &tensor_desc); + + GeTensorDesc GetOutputDesc(uint32_t index) const; + + GeTensorDesc GetOutputDesc(const string &name) const; + + GeTensorDescPtr MutableOutputDesc(uint32_t index) const; + + GeTensorDescPtr MutableOutputDesc(const string &name) const; + + uint32_t GetAllOutputsDescSize() const; + + Vistor GetAllOutputsDesc() const; + + Vistor GetAllOutputsDescPtr() const; + + size_t GetOutputsSize() const; + + ConstGeTensorDescPtr GetOutputDescPtr(uint32_t index) const; + + ConstGeTensorDescPtr GetInputDescPtr(uint32_t index) const; + + ConstGeTensorDescPtr GetInputDescPtrDfault(uint32_t index) const; + + ConstGeTensorDescPtr GetInputDescPtr(const string &name) const; + + graphStatus AddDynamicInputDesc(const string &name, const unsigned int num, bool isPushBack = true); + + graphStatus AddDynamicInputDescByIndex(const string &name, const unsigned int num, size_t index); + + graphStatus AddDynamicOutputDesc(const string &name, const unsigned int num, bool isPushBack = true); + + void RemoveInputDesc(uint32_t index); + void RemoveOutputDesc(uint32_t index); + + bool IsOptionalInput(const string &name) const; + + bool IsOptionalInput(uint32_t index) const; + + std::map GetAllInputName() const; + + std::map GetAllOutputName(); + + bool UpdateInputName(std::map inputNameIdx); + + bool UpdateOutputName(std::map outputNameIdx); + + void AddInferFunc(const std::function &func); + + std::function GetInferFunc() const; + + graphStatus InferShapeAndType(); + + void AddInferFormatFunc(const std::function &func); + + std::function GetInferFormatFunc() const; + + graphStatus DefaultInferFormat(); + + std::function GetVerifyFunc() const; + + void AddVerifierFunc(const std::function &func); + + graphStatus CallInferFormatFunc(Operator &op); + + graphStatus OpVerify(); + + graphStatus CommonVerify() const; + + using AttrHolder::AddRequiredAttr; + using AttrHolder::DelAttr; + using AttrHolder::GetAllAttrNames; + using AttrHolder::GetAllAttrs; + using AttrHolder::GetAttr; + using AttrHolder::HasAttr; + using AttrHolder::SetAttr; + + void SetId(int64_t id); + int64_t GetId() const; + void SetStreamId(int64_t stream_id); + int64_t GetStreamId() const; + void SetInputName(const vector &input_name); + vector GetInputName() const; + void SetSrcName(const vector &src_name); + vector GetSrcName() const; + void SetSrcIndex(const vector &src_index); + vector GetSrcIndex() const; + void SetInputOffset(const vector &input); + vector GetInputOffset() const; + void SetOutputOffset(const vector &input); + vector GetOutputOffset() const; + void SetDstName(const vector &dst_name); + vector GetDstName() const; + void SetDstIndex(const vector &dst_index); + vector GetDstIndex() const; + void SetWorkspace(const vector &workspace); + vector GetWorkspace() const; + void SetWorkspaceBytes(const vector &workspace_bytes); + vector GetWorkspaceBytes() const; + void SetIsInputConst(const vector &is_input_const); + vector GetIsInputConst() const; + + void SetOpInferDepends(const vector &depend_names); + vector GetOpInferDepends() const; + + string GetInputNameByIndex(uint32_t index) const; + + int GetInputIndexByName(const string &name) const; + + string GetOutputNameByIndex(uint32_t index) const; + + int GetOutputIndexByName(const string &name) const; + + graphStatus RestoreInputNameIdx(const string &name, const int &index); + + graphStatus RestoreOutputNameIdx(const string &name, const int &index); + + graphStatus CallInferFunc(Operator &op); + + void SetOpKernelLibName(const std::string &name); + + std::string GetOpKernelLibName() const; + + void SetOpEngineName(const std::string &name); + + std::string GetOpEngineName() const; + + void RegisterSubgraphIrName(const std::string &name, SubgraphType type); + const std::map &GetSubgraphIrNames() const; + SubgraphType GetSubgraphTypeByIrName(const std::string &name) const; + + graphStatus AddSubgraphName(const std::string &name); + const std::map &GetSubgraphNameIndexes() const; + + std::string GetSubgraphInstanceName(uint32_t index) const; + const std::vector &GetSubgraphInstanceNames() const; + /// Does not provide functions `AddSubgraphInstance` or `AppendSubgraphInstance`, + /// because this kind of functions will only append a new subgraph instance name + /// at the tail of `subgraph_instance_names_` and ignore the synchronous change of `subgraph_names_to_index_`. + /// If we want to append a new subgraph instance name, the function `AddSubgraphName` should be called first. + /// \param index + /// \param name + /// \return + graphStatus SetSubgraphInstanceName(uint32_t index, const std::string &name); + void RemoveSubgraphInstanceName(const std::string &name); + + graphStatus GetSubgraphNameByInstanceName(const std::string &instance_name, std::string &subgraph_name) const; + + protected: + ProtoAttrMapHelper MutableAttrMap() override; + ConstProtoAttrMapHelper GetAttrMap() const override; + + private: + OpDesc(const ProtoMsgOwner &proto_msg_owner, ge::proto::OpDef *op_def); + bool OpDescMembersAreEqual(const OpDesc &r_op_desc) const; + bool OpDescAttrsAreEqual(const OpDesc &r_op_desc) const; + bool OpDescGenTensorDescsAreEqual(const OpDesc &r_op_desc) const; + + GeIrProtoHelper op_def_; + std::vector subgraph_instance_names_; + + // subgraph names to index, for a `if` operator: + // then_branch: 0 + // else_branch: 1 + // or for a `case` node: + // branches0: 0 + // branches1: 1 + // branches2: 2 + std::map subgraph_names_to_index_; + + // subgraph ir names to type, for a `if` operator: + // then_branch: static + // else_branch: static + // or for a `case` op: + // branches: dynamic + std::map subgraph_ir_names_to_type_; + + vector inputs_desc_{}; + map input_name_idx_{}; + std::unordered_set optional_input_names_{}; + vector outputs_desc_{}; + map output_name_idx_{}; + std::function infer_func_ = nullptr; + std::function infer_format_func_ = nullptr; + std::function verifier_func_ = nullptr; + string op_kernel_lib_name_; + string engine_name_; + friend class OpDescUtils; + friend class ModelSerializeImp; + friend class AttrUtils; + friend class GeAttrValueImp; + friend class OnnxUtils; +}; +} // namespace ge +#endif // INC_GRAPH_OP_DESC_H_ diff --git a/inc/graph/op_kernel_bin.h b/inc/graph/op_kernel_bin.h new file mode 100644 index 000000000..df7bdc55b --- /dev/null +++ b/inc/graph/op_kernel_bin.h @@ -0,0 +1,47 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_OP_KERNEL_BIN_H_ +#define INC_GRAPH_OP_KERNEL_BIN_H_ + +#include +#include +#include +#include + +namespace ge { +class OpKernelBin { + public: + OpKernelBin(std::string name, std::vector &&data) : name_(std::move(name)), data_(std::move(data)) {} + + ~OpKernelBin() = default; + + const std::string &GetName() const { return name_; } + const uint8_t *GetBinData() const { return (const uint8_t *)data_.data(); } + size_t GetBinDataSize() const { return data_.size(); } + OpKernelBin(const OpKernelBin &) = delete; + const OpKernelBin &operator=(const OpKernelBin &) = delete; + + private: + std::string name_; + std::vector data_; +}; + +using OpKernelBinPtr = std::shared_ptr; +const char *const OP_EXTATTR_NAME_TBE_KERNEL = "tbeKernel"; +} // namespace ge + +#endif // INC_GRAPH_OP_KERNEL_BIN_H_ diff --git a/inc/graph/operator_factory_impl.h b/inc/graph/operator_factory_impl.h new file mode 100644 index 000000000..234d0a48f --- /dev/null +++ b/inc/graph/operator_factory_impl.h @@ -0,0 +1,56 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_OPERATOR_FACTORY_IMPL_H_ +#define INC_GRAPH_OPERATOR_FACTORY_IMPL_H_ + +#include +#include +#include +#include +#include "graph/operator_factory.h" + +namespace ge { +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OperatorFactoryImpl { + public: + static Operator CreateOperator(const std::string &operator_name, const std::string &operator_type); + + static graphStatus GetOpsTypeList(std::vector &all_ops); + + static bool IsExistOp(const string &operator_type); + + static InferShapeFunc GetInferShapeFunc(const std::string &operator_type); + + static InferFormatFunc GetInferFormatFunc(const std::string &operator_type); + + static VerifyFunc GetVerifyFunc(const std::string &operator_type); + + static graphStatus RegisterOperatorCreator(const std::string &operator_type, OpCreator const &op_creator); + + static graphStatus RegisterInferShapeFunc(const std::string &operator_type, InferShapeFunc const infer_shape_func); + + static graphStatus RegisterInferFormatFunc(const std::string &operator_type, InferFormatFunc const infer_format_func); + + static graphStatus RegisterVerifyFunc(const std::string &operator_type, VerifyFunc const verify_func); + + static shared_ptr> operator_creators_; + static shared_ptr> operator_infershape_funcs_; + static shared_ptr> operator_inferformat_funcs_; + static shared_ptr> operator_verify_funcs_; +}; +} // namespace ge + +#endif // INC_GRAPH_OPERATOR_FACTORY_IMPL_H_ diff --git a/inc/graph/opsproto_manager.h b/inc/graph/opsproto_manager.h new file mode 100644 index 000000000..1b0adce7e --- /dev/null +++ b/inc/graph/opsproto_manager.h @@ -0,0 +1,44 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_OPSPROTO_MANAGER_H_ +#define INC_GRAPH_OPSPROTO_MANAGER_H_ + +#include +#include +#include +#include +#include +#include + +namespace ge { +class OpsProtoManager { + public: + static OpsProtoManager *Instance(); + + bool Initialize(const std::map &options); + + void Finalize(); + + void LoadOpsProtoPluginSo(std::string &path); + + private: + std::string pluginPath_; + std::vector handles_; +}; +} // namespace ge + +#endif // INC_GRAPH_OPSPROTO_MANAGER_H_ diff --git a/inc/graph/range_vistor.h b/inc/graph/range_vistor.h new file mode 100644 index 000000000..50c02cfc7 --- /dev/null +++ b/inc/graph/range_vistor.h @@ -0,0 +1,53 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_RANGE_VISTOR_H_ +#define INC_GRAPH_RANGE_VISTOR_H_ + +#include + +template +class RangeVistor { + public: + using Iterator = typename std::vector::iterator; + using ConstIterator = typename std::vector::const_iterator; + + RangeVistor(O owner, const std::vector &vs) : owner_(owner), elements_(vs) {} + + ~RangeVistor() {} + + Iterator begin() { return elements_.begin(); } + + Iterator end() { return elements_.end(); } + + ConstIterator begin() const { return elements_.begin(); } + + ConstIterator end() const { return elements_.end(); } + + std::size_t size() const { return elements_.size(); } + + bool empty() const { return elements_.empty(); } + + E &at(std::size_t index) { return elements_.at(index); } + + const E &at(std::size_t index) const { return elements_.at(index); } + + private: + O owner_; + std::vector elements_; +}; + +#endif // INC_GRAPH_RANGE_VISTOR_H_ diff --git a/inc/graph/ref_relation.h b/inc/graph/ref_relation.h new file mode 100644 index 000000000..65e3d1464 --- /dev/null +++ b/inc/graph/ref_relation.h @@ -0,0 +1,79 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef COMMON_GRAPH_REF_RELATION_H_ +#define COMMON_GRAPH_REF_RELATION_H_ + +#include +#include +#include +#include + +#include "graph/compute_graph.h" +#include "graph/types.h" +#include "graph/ge_error_codes.h" +#include "node.h" + +namespace ge { +enum InOutFlag { + NODE_IN = 0, // input flag + NODE_OUT = 1, // output flag +}; + +struct RefCell { + std::string node_name; + ge::NodePtr node = nullptr; + InOutFlag in_out = NODE_IN; + int in_out_idx = 0; + + bool operator == (const RefCell &c) const { + return node_name == c.node_name && node == c.node && in_out == c.in_out && in_out_idx == c.in_out_idx; + } + + RefCell() = default; + RefCell(std::string name, ge::NodePtr node_ptr, InOutFlag in_out_flag, int idx) { + node_name = name; + node = node_ptr; + in_out = in_out_flag; + in_out_idx = idx; + }; + ~RefCell() = default; +}; + +struct RefCellHash{ + size_t operator () (const RefCell &c) const { + unsigned long number = reinterpret_cast(reinterpret_cast(c.node.get())); + string tmp= c.node_name + std::to_string(c.in_out) + std::to_string(c.in_out_idx) + + std::to_string(number); + return std::hash()(tmp); + } +}; + +class RefRelations { +public: + graphStatus LookUpRefRelations(const RefCell &key, std::unordered_set &result); + graphStatus BuildRefRelations(ge::ComputeGraph &root_graph); + graphStatus Clear(); + + RefRelations(); + ~RefRelations() = default; +public: + class Impl; + std::shared_ptr impl_ = nullptr; +}; + +} // namespace ge +#endif // COMMON_GRAPH_REF_RELATION_H_ diff --git a/inc/graph/runtime_inference_context.h b/inc/graph/runtime_inference_context.h new file mode 100644 index 000000000..a59a53ca9 --- /dev/null +++ b/inc/graph/runtime_inference_context.h @@ -0,0 +1,46 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_RUNTIME_INFERENCE_CONTEXT_H_ +#define INC_GRAPH_RUNTIME_INFERENCE_CONTEXT_H_ + +#include +#include +#include +#include +#include "external/graph/ge_error_codes.h" +#include "external/graph/tensor.h" + +namespace ge { +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY RuntimeInferenceContext { + public: + static graphStatus GetContext(const std::string &context_id, RuntimeInferenceContext **ctx); + static graphStatus CreateContext(const std::string &context_id); + static void DestroyContext(const std::string &context_id); + + graphStatus SetTensor(int64_t node_id, int output_id, Tensor &&tensor); + graphStatus GetTensor(int64_t node_id, int output_id, Tensor &tensor); + + private: + std::map> tensors_; + std::mutex mu_; + + static std::map> contexts_; + static std::mutex ctx_mu_; +}; +} // namespace ge + +#endif // INC_GRAPH_RUNTIME_INFERENCE_CONTEXT_H_ diff --git a/inc/graph/shape_refiner.h b/inc/graph/shape_refiner.h new file mode 100644 index 000000000..de6c86068 --- /dev/null +++ b/inc/graph/shape_refiner.h @@ -0,0 +1,40 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_SHAPE_REFINER_H_ +#define INC_GRAPH_SHAPE_REFINER_H_ + +#include +#include "external/graph/inference_context.h" + +#include "external/graph/ge_error_codes.h" +#include "graph/node.h" + +namespace ge { +// ShapeRefiner performs shape inference for compute graphs +class ShapeRefiner { + public: + static graphStatus InferShapeAndType(const ConstNodePtr &node, Operator &op, bool before_subgraph); + static graphStatus InferShapeAndType(const NodePtr &node, bool before_subgraph); + static graphStatus InferShapeAndType(const NodePtr &node); + static graphStatus InferShapeAndType(const ConstNodePtr &node, Operator &op); + static void ClearContextMap(); + + private: + static void PrintInOutTensorShape(const ge::NodePtr &node, const std::string &phase); +}; +} // namespace ge +#endif // INC_GRAPH_SHAPE_REFINER_H_ diff --git a/inc/graph/usr_types.h b/inc/graph/usr_types.h new file mode 100644 index 000000000..7da9d49b7 --- /dev/null +++ b/inc/graph/usr_types.h @@ -0,0 +1,134 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_USR_TYPES_H_ +#define INC_GRAPH_USR_TYPES_H_ + +#include +#include +#include +namespace ge { +#define USR_TYPE_DEC(type, name) \ + inline void set_##name(const type &value) { name = value; } \ + type *mutable_##name() { return &name; } + +#define USR_TYPE_HAS_DEC(type, name) \ + inline void set_##name(const type &value) { name = value; } \ + \ + private: \ + bool has_mutable_##name{false}; \ + \ + public: \ + bool has_##name() const { return (has_mutable_##name) || QuantizeFactorHasData(name); } \ + type *mutable_##name() { \ + has_mutable_##name = true; \ + return &name; \ + } + +#define USR_TYPE_BYTES_DEC(name) \ + inline void clear_##name() { name.clear(); } \ + inline void set_##name(const void *value, size_t size) { \ + name.assign(reinterpret_cast(const_cast(value)), \ + reinterpret_cast(const_cast(value)) + size); \ + } + +enum UsrQuantizeScaleType { USR_VECTOR_SCALE = 0, USR_SCALAR_SCALE = 1 }; +enum UsrQuantizeScaleMode { USR_NORMAL_MODE = 0, USR_SQRT_MODE = 1 }; +enum UsrQuantizeAlgorithm { + USR_NON_OFFSET_ALGO = 0, + USR_HALF_OFFSET_ALGO = 1, + USR_ALL_OFFSET_ALGO = 2, +}; + +struct UsrQuantizeFactor { + public: + // QuantizeScaleMode scale_mode; + UsrQuantizeScaleMode scale_mode{USR_NORMAL_MODE}; + std::vector scale_value; + int64_t scale_offset{0}; + std::vector offset_data_value; + int64_t offset_data_offset{0}; + std::vector offset_weight_value; + int64_t offset_weight_offset{0}; + std::vector offset_pad_value; + int64_t offset_pad_offset{0}; + + USR_TYPE_DEC(UsrQuantizeScaleMode, scale_mode); + USR_TYPE_BYTES_DEC(scale_value); + + USR_TYPE_DEC(int64_t, scale_offset); + USR_TYPE_BYTES_DEC(offset_data_value); + USR_TYPE_DEC(int64_t, offset_data_offset); + + USR_TYPE_BYTES_DEC(offset_weight_value); + USR_TYPE_DEC(int64_t, offset_weight_offset); + USR_TYPE_BYTES_DEC(offset_pad_value); + USR_TYPE_DEC(int64_t, offset_pad_offset); +}; + +static inline bool QuantizeFactorHasData(const UsrQuantizeFactor &factor) { + return factor.scale_value.size() > 0 || factor.offset_data_value.size() > 0 || + factor.offset_weight_value.size() > 0 || factor.offset_pad_value.size() > 0; +} + +struct UsrQuantizeCalcFactor { + public: + std::vector offsetw; + int64_t offsetw_offset{0}; + std::vector offsetd; + int64_t offsetd_offset{0}; + std::vector scalereq; + int64_t scaledreq_offset{0}; + std::vector offsetdnext; + int64_t offsetdnext_offset{0}; + + USR_TYPE_BYTES_DEC(offsetw); + USR_TYPE_DEC(int64_t, offsetw_offset); + USR_TYPE_BYTES_DEC(offsetd); + USR_TYPE_DEC(int64_t, offsetd_offset); + USR_TYPE_BYTES_DEC(scalereq); + USR_TYPE_DEC(int64_t, scaledreq_offset); + USR_TYPE_BYTES_DEC(offsetdnext); + USR_TYPE_DEC(int64_t, offsetdnext_offset); +}; + +static inline bool QuantizeFactorHasData(const UsrQuantizeCalcFactor &factor) { + return factor.offsetw.size() > 0 || factor.offsetd.size() > 0 || factor.scalereq.size() > 0 || + factor.offsetdnext.size() > 0; +} + +struct UsrQuantizeFactorParams { + UsrQuantizeAlgorithm quantize_algo{USR_NON_OFFSET_ALGO}; + UsrQuantizeScaleType scale_type{USR_VECTOR_SCALE}; + UsrQuantizeFactor quantize_param; + UsrQuantizeFactor dequantize_param; + UsrQuantizeFactor requantize_param; + UsrQuantizeCalcFactor quantizecalc_param; + USR_TYPE_DEC(UsrQuantizeAlgorithm, quantize_algo); + USR_TYPE_DEC(UsrQuantizeScaleType, scale_type); + USR_TYPE_HAS_DEC(UsrQuantizeFactor, quantize_param); + USR_TYPE_HAS_DEC(UsrQuantizeFactor, dequantize_param); + USR_TYPE_HAS_DEC(UsrQuantizeFactor, requantize_param); + USR_TYPE_HAS_DEC(UsrQuantizeCalcFactor, quantizecalc_param); +}; + +#undef USR_TYPE_DEC +#undef USR_TYPE_HAS_DEC +#undef USR_TYPE_BYTES_DEC +} // namespace ge + +#endif // INC_GRAPH_USR_TYPES_H_ + diff --git a/inc/graph/utils/anchor_utils.h b/inc/graph/utils/anchor_utils.h new file mode 100644 index 000000000..f3f71293c --- /dev/null +++ b/inc/graph/utils/anchor_utils.h @@ -0,0 +1,45 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_UTILS_ANCHOR_UTILS_H_ +#define INC_GRAPH_UTILS_ANCHOR_UTILS_H_ + +#include "graph/anchor.h" +#include "graph/node.h" + +namespace ge { +class AnchorUtils { + public: + // Get anchor format + static Format GetFormat(const DataAnchorPtr &dataAnchor); + + // Set anchor format + static graphStatus SetFormat(const DataAnchorPtr &dataAnchor, Format dataFormat); + + // Get anchor status + static AnchorStatus GetStatus(const DataAnchorPtr &dataAnchor); + + // Set anchor status + static graphStatus SetStatus(const DataAnchorPtr &dataAnchor, AnchorStatus anchorStatus); + + static bool HasControlEdge(const AnchorPtr &anchor); + + static bool IsControlEdge(const AnchorPtr &src, const AnchorPtr &dst); + + static int GetIdx(const AnchorPtr &anchor); +}; +} // namespace ge +#endif // INC_GRAPH_UTILS_ANCHOR_UTILS_H_ diff --git a/inc/graph/utils/attr_utils.h b/inc/graph/utils/attr_utils.h new file mode 100644 index 000000000..1e273f386 --- /dev/null +++ b/inc/graph/utils/attr_utils.h @@ -0,0 +1,151 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_UTILS_ATTR_UTILS_H_ +#define INC_GRAPH_UTILS_ATTR_UTILS_H_ + +#include +#include +#include +#include "graph/detail/attributes_holder.h" +#include "graph/ge_attr_value.h" +#include "graph/types.h" + +namespace ge { +class OpDesc; +using OpDescPtr = std::shared_ptr; +using ConstOpDescPtr = std::shared_ptr; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY AttrUtils { + public: + class ConstAttrHolderAdapter; + class AttrHolderAdapter; + // Set + static bool HasAttr(ConstAttrHolderAdapter &&obj, const string &name); + + static bool SetInt(AttrHolderAdapter &&obj, const string &name, const int64_t &value); + static bool SetListInt(AttrHolderAdapter &&obj, const string &name, const vector &value); + static bool SetListInt(AttrHolderAdapter &&obj, const string &name, const vector &value); + static bool SetListInt(AttrHolderAdapter &&obj, const string &name, const vector &value); + static bool SetListInt(AttrHolderAdapter &&obj, const string &name, std::initializer_list &&value); + + static bool SetFloat(AttrHolderAdapter &&obj, const string &name, const float &value); + static bool SetListFloat(AttrHolderAdapter &&obj, const string &name, const vector &value); + static bool SetBool(AttrHolderAdapter &&obj, const string &name, const bool &value); + static bool SetListBool(AttrHolderAdapter &&obj, const string &name, const vector &value); + static bool SetStr(AttrHolderAdapter &&obj, const string &name, const string &value); + static bool SetListStr(AttrHolderAdapter &&obj, const string &name, const vector &value); + static bool SetTensorDesc(AttrHolderAdapter &&obj, const string &name, const GeTensorDesc &value); + static bool SetListTensorDesc(AttrHolderAdapter &&obj, const string &name, const vector &value); + static bool SetTensor(AttrHolderAdapter &&obj, const string &name, const GeTensorPtr &value); + static bool SetTensor(AttrHolderAdapter &&obj, const string &name, const ConstGeTensorPtr &value); + static bool SetTensor(AttrHolderAdapter &&obj, const string &name, const GeTensor &value); + static bool SetListTensor(AttrHolderAdapter &&obj, const string &name, const vector &value); + static bool SetListTensor(AttrHolderAdapter &&obj, const string &name, const vector &value); + static bool SetListTensor(AttrHolderAdapter &&obj, const string &name, + std::initializer_list &&value); + static bool SetListTensor(AttrHolderAdapter &&obj, const string &name, const vector &value); + static bool SetGraph(AttrHolderAdapter &&obj, const string &name, const ComputeGraphPtr &value); + static bool SetListGraph(AttrHolderAdapter &&obj, const string &name, const vector &value); + static bool SetBytes(AttrHolderAdapter &&obj, const string &name, const GeAttrValue::BYTES &value); + static bool SetListBytes(AttrHolderAdapter &&obj, const string &name, const vector &value); + static bool SetNamedAttrs(AttrHolderAdapter &&obj, const string &name, const GeAttrValue::NAMED_ATTRS &value); + static bool SetListNamedAttrs(AttrHolderAdapter &&obj, const string &name, + const vector &value); + static bool SetListOpDesc(AttrHolderAdapter &&obj, const string &name, const vector &value); + static bool SetListOpDesc(AttrHolderAdapter &&obj, const string &name, const vector &value); + + // Get + static bool GetInt(ConstAttrHolderAdapter &&obj, const string &name, int64_t &value); + static bool GetInt(ConstAttrHolderAdapter &&obj, const string &name, int32_t &value); + static bool GetInt(ConstAttrHolderAdapter &&obj, const string &name, uint32_t &value); + static bool GetListInt(ConstAttrHolderAdapter &&obj, const string &name, vector &value); + static bool GetListInt(ConstAttrHolderAdapter &&obj, const string &name, vector &value); + static bool GetListInt(ConstAttrHolderAdapter &&obj, const string &name, vector &value); + static bool GetFloat(ConstAttrHolderAdapter &&obj, const string &name, float &value); + static bool GetListFloat(ConstAttrHolderAdapter &&obj, const string &name, vector &value); + static bool GetBool(ConstAttrHolderAdapter &&obj, const string &name, bool &value); + static bool GetListBool(ConstAttrHolderAdapter &&obj, const string &name, vector &value); + static bool GetStr(ConstAttrHolderAdapter &&obj, const string &name, string &value); + static bool GetListStr(ConstAttrHolderAdapter &&obj, const string &name, vector &value); + static bool GetTensorDesc(ConstAttrHolderAdapter &&obj, const string &name, GeTensorDesc &value); + static bool GetListTensorDesc(ConstAttrHolderAdapter &&obj, const string &name, vector &value); + static bool GetTensor(ConstAttrHolderAdapter &&obj, const string &name, ConstGeTensorPtr &value); + static bool MutableTensor(AttrHolderAdapter &&obj, const string &name, GeTensorPtr &value); + static bool GetListTensor(ConstAttrHolderAdapter &&obj, const string &name, vector &value); + static bool MutableListTensor(AttrHolderAdapter &&obj, const string &name, vector &value); + static bool GetGraph(ConstAttrHolderAdapter &&obj, const string &name, ComputeGraphPtr &value); + static bool GetListGraph(ConstAttrHolderAdapter &&obj, const string &name, vector &value); + static bool GetBytes(ConstAttrHolderAdapter &&obj, const string &name, GeAttrValue::BYTES &value); + static bool GetListBytes(ConstAttrHolderAdapter &&obj, const string &name, vector &value); + static bool GetNamedAttrs(ConstAttrHolderAdapter &&obj, const string &name, GeAttrValue::NAMED_ATTRS &value); + static bool GetListNamedAttrs(ConstAttrHolderAdapter &&obj, const string &name, + vector &value); + static bool GetListOpDesc(ConstAttrHolderAdapter &&obj, const string &name, vector &value); + // Value will be moved + static bool SetZeroCopyBytes(AttrHolderAdapter &&obj, const string &name, Buffer &&buffer); + static bool GetZeroCopyBytes(ConstAttrHolderAdapter &&obj, const string &name, Buffer &buffer); + // Value will be moved + static bool SetZeroCopyListBytes(AttrHolderAdapter &&obj, const string &name, + vector &listBuffer); + static bool GetZeroCopyListBytes(ConstAttrHolderAdapter &&obj, const string &name, vector &listBuffer); + + static bool SetListListInt(AttrHolderAdapter &&obj, const string &name, const vector> &value); + static bool GetListListInt(ConstAttrHolderAdapter &&obj, const string &name, vector> &value); + + static bool SetListDataType(AttrHolderAdapter &&obj, const string &name, const vector &value); + static bool GetListDataType(ConstAttrHolderAdapter &&obj, const string &name, vector &value); + + static bool SetDataType(AttrHolderAdapter &&obj, const string &name, const ge::DataType &value); + static bool GetDataType(ConstAttrHolderAdapter &&obj, const string &name, ge::DataType &value); + + static OpDescPtr CloneOpDesc(const ConstOpDescPtr &orgOpDesc); + + static OpDescPtr CopyOpDesc(const ConstOpDescPtr &orgOpDesc); + + static std::string GetAllAttrsStr(ConstAttrHolderAdapter &&obj); + + class AttrHolderAdapter { + public: + AttrHolderAdapter(AttrHolder *obj) : obj_(obj) {} + ~AttrHolderAdapter() {} + template + AttrHolderAdapter(const std::shared_ptr &obj) : obj_(obj.get()) {} + AttrHolderAdapter(AttrHolder &obj) : obj_(&obj) {} + operator bool() const { return obj_ != nullptr; } + AttrHolder *operator->() { return obj_; } + AttrHolder *get() { return obj_; } + + AttrHolder *obj_; + }; + + class ConstAttrHolderAdapter { + public: + ConstAttrHolderAdapter(const AttrHolder *obj) : obj_(obj) {} + ~ConstAttrHolderAdapter() {} + template + ConstAttrHolderAdapter(const std::shared_ptr obj) : obj_(obj.get()) {} + ConstAttrHolderAdapter(const AttrHolder &obj) : obj_(&obj) {} + operator bool() const { return obj_ != nullptr; } + const AttrHolder *operator->() const { return obj_; } + const AttrHolder *get() const { return obj_; } + + private: + const AttrHolder *obj_; + }; +}; +} // namespace ge +#endif // INC_GRAPH_UTILS_ATTR_UTILS_H_ diff --git a/inc/graph/utils/graph_utils.h b/inc/graph/utils/graph_utils.h new file mode 100644 index 000000000..28cc2d212 --- /dev/null +++ b/inc/graph/utils/graph_utils.h @@ -0,0 +1,770 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_UTILS_GRAPH_UTILS_H_ +#define INC_GRAPH_UTILS_GRAPH_UTILS_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "graph/anchor.h" +#include "graph/node.h" +#include "graph/compute_graph.h" +#include "graph/utils/anchor_utils.h" +#include "graph/graph.h" +#include "graph/model.h" + +#define GE_DUMP(compute_graph, name) \ + do { \ + GraphUtils::DumpGEGraph(compute_graph, name); \ + GraphUtils::DumpGEGraphToOnnx(*compute_graph, name); \ + for (const auto &sub_graph_func : compute_graph->GetAllSubgraphs()) { \ + static int8_t i = 0; \ + auto sub_graph_func_name = std::string(name) + std::string("_sub_graph_") + std::to_string(i++); \ + GraphUtils::DumpGEGraph(sub_graph_func, sub_graph_func_name); \ + GraphUtils::DumpGEGraphToOnnx(*sub_graph_func, sub_graph_func_name); \ + } \ + } while (0) + +#define REFER_ATTR_VALUE(VT_ENUM, DataType, attr, ret) \ + do { \ + DataType ret; \ + attr.GetValue(ret); \ + } while (0) + +#define PRINT_ATTR_VALUE_IF(value_type, VT_ENUM, DataType, attr, stream) \ + do { \ + if (value_type == VT_ENUM) { \ + REFER_ATTR_VALUE(VT_ENUM, DataType, attr, ret) \ + stream << ret; \ + } \ + } while (0) + +#define PRINT_LIST_ATTR_VALUE_IF(value_type, VT_ENUM, DataType, attr, stream) \ + do { \ + if (value_type == VT_ENUM) { \ + REFER_ATTR_VALUE(VT_ENUM, DataType, attr, ret) \ + stream << "["; \ + for (int i = 0; i < ret.size(); i++) { \ + stream << ret[i]; \ + if (i + 1 != ret.size()) stream << ", "; \ + } \ + stream << "]"; \ + } \ + } while (0) + +#define PRINT_ATTR_VALUE_ELIF(value_type, VT_ENUM, DataType, attr, stream) \ + else PRINT_ATTR_VALUE_IF(value_type, VT_ENUM, DataType, attr, stream) + +#define PRINT_LIST_ATTR_VALUE_ELIF(value_type, VT_ENUM, DataType, attr, stream) \ + else PRINT_LIST_ATTR_VALUE_IF(value_type, VT_ENUM, DataType, attr, stream) + +#define PRINT_SHAPE(i_o, n, idx, stream) \ + do { \ + auto op = n->GetOpDesc(); \ + GeTensorDesc td = i_o == "input" ? op->GetInputDesc(idx) : op->GetOutputDesc(idx); \ + auto shape = td.GetShape().GetDims(); \ + stream << "["; \ + for (int i = 0; i < shape.size(); i++) { \ + stream << shape[i]; \ + if (i + 1 < shape.size()) stream << ", "; \ + } \ + stream << "]"; \ + } while (0) + +#define PRINT_ATTR_FUNC(stream) \ + [&](GeAttrValue attr) { \ + auto type = attr.GetValueType(); \ + PRINT_ATTR_VALUE_IF(type, GeAttrValue::ValueType::VT_STRING, GeAttrValue::STR, attr, stream) \ + PRINT_ATTR_VALUE_ELIF(type, GeAttrValue::ValueType::VT_FLOAT, GeAttrValue::FLOAT, attr, stream) \ + PRINT_ATTR_VALUE_ELIF(type, GeAttrValue::ValueType::VT_BOOL, GeAttrValue::BOOL, attr, stream) \ + PRINT_ATTR_VALUE_ELIF(type, GeAttrValue::ValueType::VT_INT, GeAttrValue::INT, attr, stream) \ + PRINT_LIST_ATTR_VALUE_ELIF(type, GeAttrValue::ValueType::VT_LIST_STRING, GeAttrValue::LIST_STR, attr, stream) \ + PRINT_LIST_ATTR_VALUE_ELIF(type, GeAttrValue::ValueType::VT_LIST_FLOAT, GeAttrValue::LIST_FLOAT, attr, stream) \ + PRINT_LIST_ATTR_VALUE_ELIF(type, GeAttrValue::ValueType::VT_LIST_BOOL, GeAttrValue::LIST_BOOL, attr, stream) \ + PRINT_LIST_ATTR_VALUE_ELIF(type, GeAttrValue::ValueType::VT_LIST_INT, GeAttrValue::LIST_INT, attr, stream) \ + else if (type == GeAttrValue::ValueType::VT_TENSOR_DESC) stream << "TENSOR_DESC"; \ + else if (type == GeAttrValue::ValueType::VT_TENSOR) stream << "TENSOR"; \ + else if (type == GeAttrValue::ValueType::VT_BYTES) stream << "BYTES"; \ + else if (type == GeAttrValue::ValueType::VT_LIST_TENSOR_DESC) stream << "LIST_TENSOR_DESC"; \ + else if (type == GeAttrValue::ValueType::VT_LIST_TENSOR) stream << "LIST_TENSOR"; \ + else if (type == GeAttrValue::ValueType::VT_LIST_BYTES) stream << "LIST_BYTES"; \ + }; + +namespace ge { +enum IOType { kIn, kOut }; + +struct NodeIndexIO { + NodeIndexIO(ge::NodePtr node, uint32_t index, IOType io_type) + : node_(std::move(node)), index_(index), io_type_(io_type) { + if (node_ != nullptr) { + value_ = node_->GetName() + (io_type_ == kOut ? "_out_" : "_in_") + std::to_string(index_); + } + } + NodeIndexIO(ge::NodePtr node, int index, IOType io_type) + : node_(std::move(node)), index_(static_cast(index)), io_type_(io_type) { + if (node_ != nullptr) { + value_ = node_->GetName() + (io_type_ == kOut ? "_out_" : "_in_") + std::to_string(index_); + } + } + ~NodeIndexIO() {} + + NodePtr node_ = nullptr; + uint32_t index_ = 0; + IOType io_type_ = kOut; + std::string value_; + + const std::string &ToString() const { + return value_; + } +}; + +class GraphUtils { + public: + static ComputeGraphPtr GetComputeGraph(const Graph &graph); + + static Graph CreateGraphFromComputeGraph(const ComputeGraphPtr compute_graph); + + static graphStatus RecoverGraphOperators(const Graph &graph); + + static ComputeGraphPtr CreateGraphFromOperator(const string &name, const std::vector &inputs); + + static graphStatus AddEdge(const OutDataAnchorPtr &src, const InDataAnchorPtr &dst); + + static graphStatus AddEdge(const OutDataAnchorPtr &src, const Format &src_format, const InDataAnchorPtr &dst, + const Format &dst_format); + + static graphStatus AddEdge(const AnchorPtr &src, const AnchorPtr &dst); + + static graphStatus AddEdge(const OutControlAnchorPtr &src, const InControlAnchorPtr &dst); + + static graphStatus AddEdge(const OutDataAnchorPtr &src, const InControlAnchorPtr &dst); + + // check whether src is link to dst and then remove + static graphStatus RemoveEdge(const OutDataAnchorPtr &src, const InDataAnchorPtr &dst); + + static graphStatus RemoveEdge(const AnchorPtr &src, const AnchorPtr &dst); + + static graphStatus RemoveEdge(const OutControlAnchorPtr &src, const InControlAnchorPtr &dst); + + static graphStatus RemoveEdge(const OutDataAnchorPtr &src, const InControlAnchorPtr &dst); + + static graphStatus ReplaceEdgeDst(const OutDataAnchorPtr &src, const InDataAnchorPtr &dst, + const InDataAnchorPtr &new_dst); + + static graphStatus ReplaceEdgeDst(const OutControlAnchorPtr &src, const InControlAnchorPtr &dst, + const InControlAnchorPtr &new_dst); + + static graphStatus InsertNodeBetweenDataAnchors(const OutDataAnchorPtr &src, const InDataAnchorPtr &dst, + const NodePtr &new_node); + + static graphStatus RemoveSubgraphRecursively(const ComputeGraphPtr &compute_graph, const NodePtr &remove_node); + + static graphStatus RemoveNodeWithoutRelink(const ComputeGraphPtr &compute_graph, const NodePtr &node); + + static graphStatus InsertTransNode(ComputeGraphPtr compute_graph, const InDataAnchorPtr &in_data_anchor, + const std::vector &vec_op_desc); + + /// + /// @brief Insert node: src->insert_node:input_index, insert_node:output_index->dst + /// @param [in] src + /// @param [in] dsts + /// @param [in] insert_node + /// @param [in] input_index + /// @param [in] output_index + /// @return graphStatus + /// + static graphStatus InsertNodeAfter(const OutDataAnchorPtr &src, const std::vector &dsts, + const NodePtr &insert_node, uint32_t input_index = 0, uint32_t output_index = 0); + + static graphStatus RemoveJustNode(ComputeGraphPtr compute_graph, const NodePtr &node); + + static graphStatus RemoveJustNode(ComputeGraph &compute_graph, const NodePtr &node); + + static void RecordOriginalNames(std::vector original_nodes, const ge::NodePtr &node); + + static void RecordOriginalNames(std::vector names_tmp, const ge::NodePtr &node); + + static bool MatchDumpStr(const std::string &suffix); + + static void DumpGEGraph(const ge::ComputeGraphPtr &graph, const std::string &suffix, bool is_always_dump = false); + + static bool LoadGEGraph(const char *file, ge::ComputeGraph &compute_graph); + + static void BreakConnect(const std::map &all_nodes_infos); + + static void DumpGEGraphToOnnx(const ge::ComputeGraph &compute_graph, const std::string &suffix); + + static bool LoadGEGraphFromOnnx(const char *file, ge::ComputeGraph &compute_graph); + + static bool ReadProtoFromTextFile(const char *file, google::protobuf::Message *message); + + static void WriteProtoToTextFile(const google::protobuf::Message &proto, const char *real_path); + + static graphStatus AppendInputNode(const ComputeGraphPtr &graph, const NodePtr &node); + + /// + /// Isolating `node`, relinking data links from the in-anchor peer nodes to + /// the out-anchor peer nodes according to `io_map`, relinking control links + /// to ensure that input nodes of `node` are before out nodes + /// + /// Link the `io_map[i]` input anchor peer node to `i` output anchor peer + /// nodes, then unlink all links connecting with `node`. If `io_map[i]` < 0, + /// unlink all links from `i` output anchor without any relinking. + /// + /// @param node + /// @param io_map + /// @return + /// + static graphStatus IsolateNode(const NodePtr &node, const std::initializer_list &io_map); + static graphStatus IsolateNode(const NodePtr &node, const std::vector &io_map); + + /// + /// Isolate `node` which must be one input one output, equivalent to + /// `IsolateNode(node, {0})` + /// @param node + /// @return + /// + static graphStatus IsolateNodeOneIO(const NodePtr &node); + + /// + /// The data anchors replacing behavior is the same with + /// `ReplaceNodeDataAnchors`. In addition, replace all `old_node` control + /// anchors with `new_node`'s. + /// @param new_node + /// @param old_node + /// @param inputs_map + /// @param outputs_map + /// @return + /// + static graphStatus ReplaceNodeAnchors(const NodePtr &new_node, const NodePtr &old_node, + std::initializer_list inputs_map, std::initializer_list outputs_map); + + static graphStatus ReplaceNodeAnchors(const NodePtr &new_node, const NodePtr &old_node, + const std::vector &inputs_map, const std::vector &outputs_map); + + /// + /// Replace `old_node` data anchors with `new_node`'s according to `inputs_map` and `outputs_map`. + /// Replace the `i` in/out data anchor on `old_node` with + /// `inputs_map[i]`/`outputs_map[i]` data anchor on `new_node`. + /// If `inputs_map[i]`/`outputs_map[i]` < 0 or the index not contained in + /// `inputs_map[i]`/`outputs_map[i]`, the `i` data anchor will remain + /// on `old_node`. + /// @param new_node + /// @param old_node + /// @param inputs_map + /// @param outputs_map + /// @return + /// + static graphStatus ReplaceNodeDataAnchors(const NodePtr &new_node, const NodePtr &old_node, + std::initializer_list inputs_map, + std::initializer_list outputs_map); + + static graphStatus ReplaceNodeDataAnchors(const NodePtr &new_node, const NodePtr &old_node, + const std::vector &inputs_map, const std::vector &outputs_map); + + /// + /// Copy all in-control edges from `src_node` to `dst_node` + /// @param src_node + /// @param dst_node + /// @return + /// + static graphStatus CopyInCtrlEdges(const NodePtr &src_node, NodePtr &dst_node); + + static graphStatus MoveInCtrlEdges(const NodePtr &src_node, NodePtr &dst_node); + + /// + /// Copy all out-control edges from `src_node` to `dst_node` + /// @param src_node + /// @param dst_node + /// @return success: GRAPH_SUCESS + /// + static graphStatus CopyOutCtrlEdges(const NodePtr &src_node, NodePtr &dst_node); + + /// + /// Move all out-control edges from `src_node` to `dst_node` + /// @param src_node + /// @param dst_node + /// @return success: GRAPH_SUCESS + /// + static graphStatus MoveOutCtrlEdges(NodePtr &src_node, NodePtr &dst_node); + + /// + /// Copy all in-data edges from `src_node` to `dst_node` + /// @param src_node + /// @param dst_node + /// @return + /// + static graphStatus CopyInDataEdges(const NodePtr &src_node, NodePtr &dst_node); + + static ComputeGraphPtr FindRootGraph(ComputeGraphPtr graph); + + /// + /// Make a copy of ComputeGraph. + /// @param graph: original graph. + /// @param prefix: node name prefix of new graph. + /// @return ComputeGraphPtr + /// + static ComputeGraphPtr CloneGraph(const ComputeGraphPtr &graph, const string &prefix, + std::vector &input_nodes, std::vector &output_nodes); + + /// + /// Copy tensor attribute to new node. + /// @param [in] dst_desc: cloned node. + /// @param [in] src_node: original node. + /// @return success: GRAPH_SUCESS + /// + static graphStatus CopyTensorAttrs(const OpDescPtr &dst_desc, const NodePtr &src_node); + + static graphStatus TopologicalSortingByName(const ge::ComputeGraphPtr &compute_graph, vector &node_vec); + + /// + /// Get reference-mapping of all data_anchors in graph + /// @param [in] graph + /// @param [out] symbol_to_anchors + /// @param [out] anchor_to_symbol + /// @return success: GRAPH_SUCESS + /// + static graphStatus GetRefMapping(const ComputeGraphPtr &graph, + std::map> &symbol_to_anchors, + std::map &anchor_to_symbol); + + /// + /// Determine if the graph is a UNKNOWN_SHAPE graph based on whether the graph and all subgraphs + /// of the graph have UNKNOWN_SHAPE operators or not. + /// Note: This function will only look 'down' from the graph, not 'up'. For example, the following + /// scenario (K for known shape, U for unknown shape), ROOT graph is UNKNOWN_SHAPE while SUB graph is KNOWN_SHAPE + /// ROOT graph: A -----> B -----> C + /// K subgraph U + /// | + /// V + /// SUB graph: D --> E --> F + /// K K K + /// @param [in] graph + /// @return bool + /// + static bool IsUnknownShapeGraph(const ComputeGraphPtr &graph); + + static NodePtr FindNodeFromAllNodes(ComputeGraphPtr &graph, const std::string &name); + + private: + /// + /// Get reference-mapping for in_data_anchors of node + /// @param [in] node + /// @param [out] symbol_to_anchors + /// @param [out] anchor_to_symbol + /// @return success: GRAPH_SUCESS + /// + static graphStatus HandleInAnchorMapping(const NodePtr &node, + std::map> &symbol_to_anchors, + std::map &anchor_to_symbol); + + /// + /// Get reference-mapping for out_data_anchors of node + /// @param [in] node + /// @param [out] symbol_to_anchors + /// @param [out] anchor_to_symbol + /// @return success: GRAPH_SUCESS + /// + static graphStatus HandleOutAnchorMapping(const NodePtr &node, + std::map> &symbol_to_anchors, + std::map &anchor_to_symbol); + + /// + /// Handle input of subgraph + /// @param [in] node + /// @param [out] symbol_to_anchors + /// @param [out] anchor_to_symbol + /// @return success: GRAPH_SUCESS + /// + static graphStatus HandleSubgraphInput(const NodePtr &node, + std::map> &symbol_to_anchors, + std::map &anchor_to_symbol); + + /// + /// Handle input of Merge op + /// @param [in] node + /// @param [out] symbol_to_anchors + /// @param [out] anchor_to_symbol + /// @return success: GRAPH_SUCESS + /// + static graphStatus HandleMergeInput(const NodePtr &node, + std::map> &symbol_to_anchors, + std::map &anchor_to_symbol); + + /// + /// Handle output of subgraph + /// @param [in] node + /// @param [out] symbol_to_anchors + /// @param [out] anchor_to_symbol + /// @return success: GRAPH_SUCESS + /// + static graphStatus HandleSubgraphOutput(const NodePtr &node, + std::map> &symbol_to_anchors, + std::map &anchor_to_symbol); + + /// + /// Relink all edges for cloned ComputeGraph. + /// @param [in] node: original node. + /// @param [in] prefix: node name prefix of new node. + /// @param [in] all_nodes: all nodes in new graph. + /// @return success: GRAPH_SUCESS + /// + static graphStatus RelinkGraphEdges(const NodePtr &node, const string &prefix, + const std::unordered_map &all_nodes); + + /// + /// Union ref-mapping + /// @param [in] exist_node_info1 + /// @param [in] exist_node_info2 + /// @param [out] symbol_to_anchors + /// @param [out] anchor_to_symbol + /// @param [out] symbol + /// @return success: GRAPH_SUCESS + /// + static graphStatus UnionSymbolMapping(const NodeIndexIO &exist_node_info1, const NodeIndexIO &exist_node_info2, + std::map> &symbol_to_anchors, + std::map &anchor_to_symbol, std::string &symbol); + + /// + /// Update symbol mapping with a new reference pair + /// @param [in] cur_node_info + /// @param [in] exist_node_info + /// @param [out] symbol_to_anchors + /// @param [out] anchor_to_symbol + /// @return success: GRAPH_SUCESS + /// + static graphStatus UpdateRefMapping(const NodeIndexIO &cur_node_info, const NodeIndexIO &exist_node_info, + std::map> &symbol_to_anchors, + std::map &anchor_to_symbol); + + /// + /// Check if out_data_anchor is reference of input + /// @param [in] out_data_anchor + /// @param [out] reuse_in_index + /// @return bool + /// + static bool IsRefFromInput(const OutDataAnchorPtr &out_data_anchor, int32_t &reuse_in_index); +}; + +class ComputeGraphBuilder { + public: + ComputeGraphBuilder() : owner_graph_(nullptr) {} + ComputeGraphBuilder(const ComputeGraphBuilder &) = delete; + ComputeGraphBuilder &operator=(const ComputeGraphBuilder &) = delete; + ComputeGraphBuilder(const ComputeGraphBuilder &&) = delete; + ComputeGraphBuilder &operator=(const ComputeGraphBuilder &&) = delete; + ~ComputeGraphBuilder() = default; + + /// + /// @brief Add node to graph + /// @param [in] op_desc + /// @return ComputeGraphBuilder + /// + virtual ComputeGraphBuilder& AddNode(const OpDescPtr &op_desc); + + /// + /// @brief Add data-link among nodes in graph + /// @param [in] src_name + /// @param [in] out_anchor_ind + /// @param [in] dst_name + /// @param [in] in_anchor_ind + /// @return ComputeGraphBuilder + /// + virtual ComputeGraphBuilder& AddDataLink(const std::string &src_name, uint32_t out_anchor_ind, + const std::string &dst_name, uint32_t in_anchor_ind); + + /// + /// @brief Add ctrl-link among nodes in graph + /// @param [in] src_name + /// @param [in] dst_name + /// @return ComputeGraphBuilder + /// + virtual ComputeGraphBuilder& AddControlLink(const std::string &src_name, const std::string &dst_name); + + /// + /// @brief Build graph + /// @param [out] error_code + /// @param [out] error_msg + /// @return ComputeGraphPtr + /// + virtual ComputeGraphPtr Build(graphStatus &error_code, std::string &error_msg) = 0; + + /// @brief Get node with name + /// @param [in] name + /// @return NodePtr + /// + NodePtr GetNode(const std::string &name); + + /// @brief Get all nodes + /// @return std::vector + /// + std::vector GetAllNodes(); + + protected: + /// + /// @brief Build nodes + /// @param [out] error_code + /// @param [out] error_msg + /// @return void + /// + void BuildNodes(graphStatus &error_code, std::string &error_msg); + + /// + /// @brief Build data-links + /// @param [out] error_code + /// @param [out] error_msg + /// @return void + /// + void BuildDataLinks(graphStatus &error_code, std::string &error_msg); + + /// + /// @brief Build ctrl-links + /// @param [out] error_code + /// @param [out] error_msg + /// @return void + /// + void BuildCtrlLinks(graphStatus &error_code, std::string &error_msg); + + ComputeGraphPtr owner_graph_; + + // node_name -> node + std::map node_names_; + std::vector nodes_; + + // -> + std::vector, std::pair>> data_links_; + // src_node_name -> dst_node_name + std::vector> ctrl_links_; +}; + +class CompleteGraphBuilder : public ComputeGraphBuilder { + public: + explicit CompleteGraphBuilder(std::string name) : name_(std::move(name)), parent_node_(nullptr) {} + CompleteGraphBuilder(const CompleteGraphBuilder &) = delete; + CompleteGraphBuilder &operator=(const CompleteGraphBuilder &) = delete; + CompleteGraphBuilder(const CompleteGraphBuilder &&) = delete; + CompleteGraphBuilder &operator=(const CompleteGraphBuilder &&) = delete; + ~CompleteGraphBuilder() = default; + + /// + /// @brief Add node to graph + /// @param [in] op_desc + /// @return CompleteGraphBuilder + /// + CompleteGraphBuilder& AddNode(const OpDescPtr &op_desc) override; + + /// + /// @brief Add data-link among nodes in graph + /// @param [in] src_name + /// @param [in] out_anchor_ind + /// @param [in] dst_name + /// @param [in] in_anchor_ind + /// @return CompleteGraphBuilder + /// + CompleteGraphBuilder& AddDataLink(const std::string &src_name, uint32_t out_anchor_ind, + const std::string &dst_name, uint32_t in_anchor_ind) override; + + /// + /// @brief Add ctrl-link among nodes in graph + /// @param [in] src_name + /// @param [in] dst_name + /// @return CompleteGraphBuilder + /// + CompleteGraphBuilder& AddControlLink(const std::string &src_name, const std::string &dst_name) override; + + /// + /// @brief Set index_th input anchor for graph + /// @param [in] index + /// @param [in] node_names + /// @param [in] anchor_inds + /// @return CompleteGraphBuilder + /// + CompleteGraphBuilder& SetInput(uint32_t index, const std::vector &node_names, + const std::vector &anchor_inds); + + /// + /// @brief Set index_th input of graph as useless + /// @param [in] index + /// @return CompleteGraphBuilder + /// + CompleteGraphBuilder& SetUselessInput(uint32_t index); + + /// + /// @brief Add output anchor for graph + /// @param [in] owner_node_name + /// @param [in] anchor_ind + /// @return CompleteGraphBuilder + /// + CompleteGraphBuilder& AddOutput(const std::string &owner_node_name, uint32_t anchor_ind); + + /// + /// @brief Add target for graph + /// @param [in] target_name + /// @return CompleteGraphBuilder + /// + CompleteGraphBuilder& AddTarget(const std::string &target_name); + + /// + /// @brief Set parent-node of graph + /// @param [in] parent_node + /// @return CompleteGraphBuilder + /// + CompleteGraphBuilder& SetParentNode(const NodePtr &parent_node); + + /// + /// @brief Set mapping-relation of parent-node in_anchor_ind & Data-node + /// @param [in] input_mapping: index_of_graph_input -> in_anchor_index_of_parent_node + /// @return CompleteGraphBuilder + /// + CompleteGraphBuilder& SetInputMapping(const std::map &input_mapping); + + /// + /// @brief Set mapping-relation of parent-node out_anchor_ind & NetOutput-node out_anchor_ind + /// @param [in] output_mapping: index_of_graph_output -> out_anchor_index_of_parent_node + /// @return CompleteGraphBuilder + /// + CompleteGraphBuilder& SetOutputMapping(const std::map &output_mapping); + + /// + /// @brief Build graph + /// @param [out] error_code + /// @param [out] error_msg + /// @return ComputeGraphPtr + /// + ComputeGraphPtr Build(graphStatus &error_code, std::string &error_msg) override; + + private: + /// + /// @brief Add data nodes + /// @param [out] error_code + /// @param [out] error_msg + /// @return void + /// + void AddDataNodes(graphStatus &error_code, std::string &error_msg); + + /// + /// @brief Add data node + /// @param [in] index + /// @param [out] error_code + /// @param [out] error_msg + /// @return void + /// + NodePtr AddDataNode(uint32_t index, graphStatus &error_code, std::string &error_msg); + + /// + /// @brief Add RetVal nodes + /// @param [out] error_code + /// @param [out] error_msg + /// @return void + /// + void AddRetValNodes(graphStatus &error_code, std::string &error_msg); + + /// + /// @brief Build target-nodes for graph + /// @param [out] error_code + /// @param [out] error_msg + /// @return void + /// + void BuildGraphTargets(graphStatus &error_code, std::string &error_msg); + + std::string name_; + NodePtr parent_node_; + std::map, std::vector>> graph_inputs_; + std::vector> graph_outputs_; + std::vector graph_targets_; + + // index_of_graph_input -> in_anchor_index_of_parent_node + std::map input_mapping_; + // index_of_graph_output -> out_anchor_index_of_parent_node + std::map output_mapping_; +}; + +class PartialGraphBuilder : public ComputeGraphBuilder { + public: + PartialGraphBuilder() = default; + PartialGraphBuilder(const PartialGraphBuilder &) = delete; + PartialGraphBuilder &operator=(const PartialGraphBuilder &) = delete; + PartialGraphBuilder(const PartialGraphBuilder &&) = delete; + PartialGraphBuilder &operator=(const PartialGraphBuilder &&) = delete; + ~PartialGraphBuilder() = default; + + /// + /// @brief Add node to graph + /// @param [in] op_desc + /// @return PartialGraphBuilder + /// + PartialGraphBuilder& AddNode(const OpDescPtr &op_desc) override; + + /// + /// @brief Add data-link among nodes in graph + /// @param [in] src_name + /// @param [in] out_anchor_ind + /// @param [in] dst_name + /// @param [in] in_anchor_ind + /// @return PartialGraphBuilder + /// + PartialGraphBuilder& AddDataLink(const std::string &src_name, uint32_t out_anchor_ind, + const std::string &dst_name, uint32_t in_anchor_ind) override; + + /// + /// @brief Add ctrl-link among nodes in graph + /// @param [in] src_name + /// @param [in] dst_name + /// @return PartialGraphBuilder + /// + PartialGraphBuilder& AddControlLink(const std::string &src_name, const std::string &dst_name) override; + + /// + /// @brief Set owner graph + /// @param [in] graph + /// @return PartialGraphBuilder + /// + PartialGraphBuilder& SetOwnerGraph(const ComputeGraphPtr &graph); + + /// + /// @brief Add exist node + /// @param [in] node + /// @return PartialGraphBuilder + /// + PartialGraphBuilder& AddExistNode(const NodePtr &node); + + /// + /// @brief Build multi nodes with links + /// @param [out] error_code + /// @param [out] error_msg + /// @return ComputeGraphPtr + /// + ComputeGraphPtr Build(graphStatus &error_code, std::string &error_msg) override; + + private: + /// + /// @brief Build exist nodes + /// @param [out] error_code + /// @param [out] error_msg + /// @return void + /// + void BuildExistNodes(graphStatus &error_code, std::string &error_msg); + + std::vector exist_nodes_; +}; +} // namespace ge +#endif // INC_GRAPH_UTILS_GRAPH_UTILS_H_ diff --git a/inc/graph/utils/node_utils.h b/inc/graph/utils/node_utils.h new file mode 100644 index 000000000..ea6e1696b --- /dev/null +++ b/inc/graph/utils/node_utils.h @@ -0,0 +1,152 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_UTILS_NODE_UTILS_H_ +#define INC_GRAPH_UTILS_NODE_UTILS_H_ + +#include +#include +#include +#include "graph/node.h" + +namespace ge { +// Op types of Const like Opps. +extern const std::set kConstOpTypes; +// Op types of If like Opps. +extern const std::set kIfOpTypes; +// Op types of While like Opps. +extern const std::set kWhileOpTypes; +// Op types of Case like Opps. +extern const std::set kCaseOpTypes; +// Op types of For like Opps. +extern const std::set kForOpTypes; + +class NodeUtils { + public: + static graphStatus AddSendEventId(const NodePtr &node, const uint32_t &event_id); + static graphStatus AddRecvEventId(const NodePtr &node, const uint32_t &event_id); + static graphStatus GetSendEventIdList(const NodePtr &node, std::vector &vec_send); + static graphStatus GetRecvEventIdList(const NodePtr &node, std::vector &vec_recv); + + static graphStatus ClearSendInfo(); + static graphStatus ClearRecvInfo(); + + static graphStatus GetSingleOutputNodeOfNthLayer(const NodePtr &src, int depth, NodePtr &dst); + + static graphStatus GetDataOutAnchorAndControlInAnchor(const NodePtr &node_ptr, OutDataAnchorPtr &out_data, + InControlAnchorPtr &in_control); + + static graphStatus ClearInDataAnchor(const NodePtr &node_ptr, const InDataAnchorPtr &in_data_anchor); + static graphStatus SetAllAnchorStatus(const NodePtr &nodePtr); + static graphStatus SetAllAnchorStatus(Node &node); + static bool IsAnchorStatusSet(const NodePtr &nodePtr); + static bool IsAnchorStatusSet(const Node &node); + + static graphStatus MoveOutputEdges(const NodePtr &origin_node, const NodePtr &new_node); + + static void UpdateIsInputConst(const NodePtr &nodePtr); + static void UpdateIsInputConst(Node &node); + static bool IsConst(const Node &node); + static void UnlinkAll(const Node &node); + static graphStatus UpdatePeerNodeInputDesc(const NodePtr &node_ptr); + + static graphStatus AppendInputAnchor(const NodePtr &node, uint32_t index); + static graphStatus RemoveInputAnchor(const NodePtr &node, uint32_t index); + + static bool IsInNodesEmpty(const Node &node); + static GeTensorDesc GetOutputDesc(const Node &node, uint32_t index); + static GeTensorDesc GetInputDesc(const Node &node, uint32_t index); + static graphStatus UpdateOutputShape(const Node &node, uint32_t index, const GeShape &shape); + static graphStatus UpdateInputShape(const Node &node, uint32_t index, const GeShape &shape); + // check node whether unknown shape.If node shape contain -1 or -2,out param "is_unknow" will be true; + // for func op, it will check subgraph yet, if some node shape of subgraph contain -1 or -2, + // the out param "is_unknow" will be true too + static graphStatus GetNodeUnknownShapeStatus(const Node &node, bool &is_unknow); + + static std::string GetNodeType(const Node &node); + static std::string GetNodeType(const NodePtr &node); + + static ComputeGraphPtr GetSubgraph(const Node &node, uint32_t index); + static graphStatus SetSubgraph(Node &node, uint32_t index, const ComputeGraphPtr &subgraph); + + /// + /// Check if node is input of subgraph + /// @param [in] node + /// @return bool + /// + static bool IsSubgraphInput(const NodePtr &node); + + /// + /// Check if node is output of subgraph + /// @param [in] node + /// @return bool + /// + static bool IsSubgraphOutput(const NodePtr &node); + + /// + /// @brief Get subgraph original input node. + /// @param [in] node + /// @return Node + /// + static NodePtr GetParentInput(const Node &node); + static NodePtr GetParentInput(const NodePtr &node); + + /// + /// @brief Check is varying_input for while node + /// @param [in] node: Data node for subgraph + /// @return bool + /// + static bool IsWhileVaryingInput(const ge::NodePtr &node); + + /// + /// @brief Get subgraph input is constant. + /// @param [in] node + /// @param [out] string + /// @return bool + /// + static bool GetConstOpType(const NodePtr &node, std::string &type); + + /// + /// @brief Remove node-related subgraphs, including subgraphs of nodes in the subgraph. + /// @param [in] node + /// @return return GRAPH_SUCCESS if remove successfully, other for failed. + /// + static graphStatus RemoveSubgraphsOnNode(const NodePtr &node); + + /// + /// @brief Get subgraph input data node by index. + /// @param [in] node + /// @return Node + /// + static vector GetSubgraphDataNodesByIndex(const Node &node, int index); + + /// + /// @brief Get subgraph input data node by index. + /// @param [in] node + /// @return Node + /// + static vector GetSubgraphOutputNodes(const Node &node); + + static NodePtr GetInDataNodeByIndex(const Node &node, int index); + + static vector GetOutDataNodesByIndex(const Node &node, int index); + + private: + static std::map> map_send_info_; + static std::map> map_recv_info_; +}; +} // namespace ge +#endif // INC_GRAPH_UTILS_NODE_UTILS_H_ diff --git a/inc/graph/utils/op_desc_utils.h b/inc/graph/utils/op_desc_utils.h new file mode 100644 index 000000000..22ba3ad79 --- /dev/null +++ b/inc/graph/utils/op_desc_utils.h @@ -0,0 +1,181 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_UTILS_OP_DESC_UTILS_H_ +#define INC_GRAPH_UTILS_OP_DESC_UTILS_H_ + +#include +#include +#include +#include "graph/def_types.h" +#include "graph/node.h" +#include "graph/op_desc.h" +#include "graph/operator.h" +#include "graph/range_vistor.h" + +namespace ge { +class OpDesc; +using OpDescPtr = std::shared_ptr; + +class OpDescUtils { + public: + template + using Vistor = RangeVistor>; + + OpDescUtils() = default; + ~OpDescUtils() = default; + static bool HasQuantizeFactorParams(const OpDescPtr& op_desc); + static bool HasQuantizeFactorParams(const OpDesc& op_desc); + static graphStatus GetQuantizeFactorParams(const OpDescPtr& op_desc, QuantizeFactorParams& quant); + static graphStatus GetQuantizeFactorParams(const OpDesc& op_desc, QuantizeFactorParams& quant); + static graphStatus SetQuantizeFactorParams(const OpDescPtr &op_desc, const QuantizeFactorParams& quant); + static graphStatus SetQuantizeFactorParams(OpDesc& op_desc, const QuantizeFactorParams& quant); + + static vector GetConstInputNode(const ge::Node& node); + static vector GetInputData(const vector& input_nodes); + + static vector GetWeights(const ge::Node& node); + static vector GetWeights(const ge::ConstNodePtr& node); + static vector MutableWeights(const ge::Node& node); + static vector MutableWeights(const ge::NodePtr node); + static graphStatus SetWeights(ge::Node& node, const vector& weights); + static graphStatus SetWeights(ge::NodePtr node, const vector& weights); + static graphStatus ClearWeights(ge::NodePtr node); + + static bool ClearInputDesc(ge::OpDescPtr op_desc, uint32_t index); + static bool ClearInputDesc(const ge::NodePtr& node); + static bool ClearOutputDesc(const ge::OpDescPtr& op_desc, uint32_t index); + static bool ClearOutputDesc(const ge::NodePtr& node); + static vector GetConstInputs(const ge::Node& node); + static vector GetConstInputs(const ge::ConstNodePtr& node); + static size_t GetNonConstInputsSize(const ge::Node& node); + static size_t GetNonConstInputsSize(ge::ConstNodePtr node); + // Index: Indicates the index of all non const inputs + static GeTensorDesc GetNonConstInputTensorDesc(const ge::Node& node, size_t index_non_const = 0); + static GeTensorDesc GetNonConstInputTensorDesc(const ge::ConstNodePtr& node, size_t index_non_const = 0); + static bool GetNonConstInputIndex(const ge::Node& node, size_t index_non_const, size_t& index); + static bool GetNonConstInputIndex(const ge::ConstNodePtr& node, size_t index_non_const, size_t& index); + // Index: Indicates the index of all inputs + static bool IsNonConstInput(const ge::Node& node, size_t index = 0); + static bool IsNonConstInput(const ge::ConstNodePtr& node, size_t index = 0); + + static vector GetNonConstTensorDesc(const ge::ConstNodePtr& node); + static graphStatus AddConstOpToAnchor(InDataAnchorPtr in_anchor, const GeTensorPtr& tensor_ptr); + + static Operator CreateOperatorFromOpDesc(OpDescPtr op_desc); + static Operator CreateOperatorFromNode(ge::ConstNodePtr node_ptr); + static OpDescPtr GetOpDescFromOperator(const Operator& oprt); + + static OpDescPtr CreateConstOp(const GeTensorPtr& tensor_ptr); + + static graphStatus SetSubgraphInstanceName(const std::string &subgraph_name, + const std::string &subgraph_instance_name, OpDescPtr &op_desc); + + private: + static GeTensorPtr MutableWeights(ge::OpDesc& op_desc); + static GeTensorPtr MutableWeights(ge::OpDescPtr op_desc); + static graphStatus SetWeights(ge::OpDesc& op_desc, const GeTensorPtr weight); + static graphStatus SetWeights(ge::OpDescPtr op_desc, const GeTensorPtr weight); +}; + +class OpDescBuilder { + public: + OpDescBuilder(std::string name, std::string type) : name_(std::move(name)), type_(std::move(type)) {} + OpDescBuilder(const OpDescBuilder &) = delete; + OpDescBuilder &operator=(const OpDescBuilder &) = delete; + OpDescBuilder(const OpDescBuilder &&) = delete; + OpDescBuilder &operator=(const OpDescBuilder &&) = delete; + ~OpDescBuilder() = default; + + /// + /// @brief Add input + /// @param [in] name + /// @return OpDescBuilder + /// + OpDescBuilder& AddInput(const std::string &name); + + /// + /// @brief Add input + /// @param [in] name + /// @param [in] tensor + /// @return OpDescBuilder + /// + OpDescBuilder& AddInput(const std::string &name, const GeTensorDesc &tensor); + + /// + /// @brief Add dynamic input + /// @param [in] name + /// @param [in] num + /// @return OpDescBuilder + /// + OpDescBuilder& AddDynamicInput(const std::string &name, uint32_t num); + + /// + /// @brief Add dynamic input + /// @param [in] name + /// @param [in] num + /// @param [in] tensor + /// @return OpDescBuilder + /// + OpDescBuilder& AddDynamicInput(const std::string &name, uint32_t num, const GeTensorDesc &tensor); + + /// + /// @brief Add output + /// @param [in] name + /// @return OpDescBuilder + /// + OpDescBuilder& AddOutput(const std::string &name); + + /// + /// @brief Add output + /// @param [in] name + /// @param [in] tensor + /// @return OpDescBuilder + /// + OpDescBuilder& AddOutput(const std::string &name, const GeTensorDesc &tensor); + + /// + /// @brief Add dynamic output + /// @param [in] name + /// @param [in] num + /// @return OpDescBuilder + /// + OpDescBuilder& AddDynamicOutput(const std::string &name, uint32_t num); + + /// + /// @brief Add dynamic output + /// @param [in] name + /// @param [in] num + /// @param [in] tensor + /// @return OpDescBuilder + /// + OpDescBuilder& AddDynamicOutput(const std::string &name, uint32_t num, const GeTensorDesc &tensor); + + /// + /// @brief Build op_desc + /// @return OpDescPtr + /// + OpDescPtr Build(); + + private: + std::string name_; + std::string type_; + std::vector> inputs_; + std::vector> outputs_; +}; +} // namespace ge + +#endif // INC_GRAPH_UTILS_OP_DESC_UTILS_H_ diff --git a/inc/graph/utils/tensor_adapter.h b/inc/graph/utils/tensor_adapter.h new file mode 100644 index 000000000..7161ba3b4 --- /dev/null +++ b/inc/graph/utils/tensor_adapter.h @@ -0,0 +1,43 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_UTILS_TENSOR_ADAPTER_H_ +#define INC_GRAPH_UTILS_TENSOR_ADAPTER_H_ + +#include +#include "graph/ge_tensor.h" +#include "graph/tensor.h" + +namespace ge { +using GeTensorPtr = std::shared_ptr; +using ConstGeTensorPtr = std::shared_ptr; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY TensorAdapter { + public: + static GeTensorDesc TensorDesc2GeTensorDesc(const TensorDesc &tensorDesc); + static TensorDesc GeTensorDesc2TensorDesc(const GeTensorDesc &geTensorDesc); + static GeTensorPtr Tensor2GeTensor(const Tensor &tensor); + static Tensor GeTensor2Tensor(const ConstGeTensorPtr &geTensor); + + static ConstGeTensorPtr AsGeTensorPtr(const Tensor &tensor); // Share value + static GeTensorPtr AsGeTensorPtr(Tensor &tensor); // Share value + static const GeTensor AsGeTensor(const Tensor &tensor); // Share value + static GeTensor AsGeTensor(Tensor &tensor); // Share value + static const Tensor AsTensor(const GeTensor &tensor); // Share value + static Tensor AsTensor(GeTensor &tensor); // Share value +}; +} // namespace ge +#endif // INC_GRAPH_UTILS_TENSOR_ADAPTER_H_ diff --git a/inc/graph/utils/tensor_utils.h b/inc/graph/utils/tensor_utils.h new file mode 100644 index 000000000..776933a84 --- /dev/null +++ b/inc/graph/utils/tensor_utils.h @@ -0,0 +1,77 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_UTILS_TENSOR_UTILS_H_ +#define INC_GRAPH_UTILS_TENSOR_UTILS_H_ + +#include +#include "graph/def_types.h" +#include "graph/ge_error_codes.h" +#include "graph/ge_tensor.h" + +namespace ge { +class TensorUtils { + public: + static ge::graphStatus GetSize(const GeTensorDesc &tensorDesc, int64_t &size); + static void SetSize(GeTensorDesc &tensorDesc, int64_t size); + static uint32_t GetWeightSize(const ConstGeTensorPtr &tensorPtr); + static uint32_t GetWeightSize(const GeTensor &tensor); + static uint32_t GetWeightSize(const GeTensorDesc &tensorDesc); + static uint8_t *GetWeightAddr(const ConstGeTensorPtr &tensorPtr, uint8_t *base); + static uint8_t *GetWeightAddr(const GeTensor &tensor, uint8_t *base); + static void SetWeightSize(GeTensorDesc &tensorDesc, uint32_t size); + static ge::graphStatus GetReuseInput(const GeTensorDesc &tensorDesc, bool &flag); + static void SetReuseInput(GeTensorDesc &tensorDesc, bool flag); + static ge::graphStatus GetOutputTensor(const GeTensorDesc &tensorDesc, bool &flag); + static void SetOutputTensor(GeTensorDesc &tensorDesc, bool flag); + static graphStatus GetDeviceType(const GeTensorDesc &tensorDesc, DeviceType &type); + static void SetDeviceType(GeTensorDesc &tensorDesc, DeviceType type); + static ge::graphStatus GetInputTensor(const GeTensorDesc &tensorDesc, bool &flag); + static void SetInputTensor(GeTensorDesc &tensorDesc, bool flag); + static ge::graphStatus GetRealDimCnt(const GeTensorDesc &tensorDesc, uint32_t &cnt); + static void SetRealDimCnt(GeTensorDesc &tensorDesc, uint32_t cnt); + static ge::graphStatus GetReuseInputIndex(const GeTensorDesc &tensorDesc, uint32_t &idx); + static void SetReuseInputIndex(GeTensorDesc &tensorDesc, uint32_t idx); + static ge::graphStatus GetDataOffset(const GeTensorDesc &tensorDesc, int64_t &offset); + static void SetDataOffset(GeTensorDesc &tensorDesc, int64_t offset); + static ge::graphStatus GetCmpsSize(const GeTensorDesc &tensorDesc, uint32_t &cmp_size); + static void SetCmpsSize(GeTensorDesc &tensorDesc, uint32_t cmp_size); + static ge::graphStatus GetCmpsTab(const GeTensorDesc &tensorDesc, vector &vec); + static void SetCmpsTab(GeTensorDesc &tensorDesc, const uint8_t *data, size_t size); + static ge::graphStatus GetCmpsTabOffset(const GeTensorDesc &tensorDesc, int64_t &tab_offset); + static void SetCmpsTabOffset(GeTensorDesc &tensorDesc, int64_t tab_offset); + static ge::graphStatus GetCmpsInfo(const GeTensorDesc &tensorDesc, CompressInfo &info); + static void SetCmpsInfo(GeTensorDesc &tensorDesc, const CompressInfo &info); + static bool HasAlloffsetQuantizeInfo(const GeTensorDesc &tensorDesc); + static ge::graphStatus GetAlloffsetQuantizeInfo(const GeTensorDesc &tensorDesc, AllOffsetQuantizeInfo &info); + static void SetAlloffsetQuantizeInfo(GeTensorDesc &tensorDesc, const AllOffsetQuantizeInfo &info); + static ge::graphStatus GetRC(const GeTensorDesc &tensorDesc, uint32_t &rc); + static void SetRC(GeTensorDesc &tensorDesc, uint32_t rc); + + /// + /// calculate tensor mem size. + /// @param shape tensor shape + /// @param format tensor format + /// @param data_type tensor data type + /// @param mem_size -1 means unknown shape,other means mem size + /// @return GRAPH_SUCCESS:success, other:failed + /// + static ge::graphStatus CalcTensorMemSize(const GeShape &shape, Format format, DataType data_type, int64_t &mem_size); + static ge::graphStatus GetTensorMemorySizeInBytes(const GeTensorDesc &desc_temp, int64_t &size_temp); + static ge::graphStatus GetTensorSizeInBytes(const GeTensorDesc &desc_temp, int64_t &size_temp); +}; +} // namespace ge +#endif // INC_GRAPH_UTILS_TENSOR_UTILS_H_ diff --git a/inc/graph/utils/type_utils.h b/inc/graph/utils/type_utils.h new file mode 100644 index 000000000..92f39f4ae --- /dev/null +++ b/inc/graph/utils/type_utils.h @@ -0,0 +1,52 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_UTILS_TYPE_UTILS_H_ +#define INC_GRAPH_UTILS_TYPE_UTILS_H_ + +#include +#include +#include +#include "graph/def_types.h" +#include "graph/ge_error_codes.h" +#include "graph/types.h" +#include "graph/usr_types.h" +#include "register/register_types.h" +#include "external/register/register_fmk_types.h" + +namespace ge { +class TypeUtils { + public: + static bool IsDataTypeValid(DataType dt); + static bool IsFormatValid(Format format); + static bool IsInternalFormat(Format format); + + static std::string DataTypeToSerialString(DataType data_type); + static DataType SerialStringToDataType(const std::string &str); + static std::string FormatToSerialString(Format format); + static Format SerialStringToFormat(const std::string &str); + static Format DataFormatToFormat(const std::string &str); + static Format DomiFormatToFormat(domi::domiTensorFormat_t domi_format); + static std::string FmkTypeToSerialString(domi::FrameworkType fmk_type); + + static graphStatus Usr2DefQuantizeFactorParams(const UsrQuantizeFactorParams &usr, QuantizeFactorParams &def); + static graphStatus Def2UsrQuantizeFactorParams(const QuantizeFactorParams &def, UsrQuantizeFactorParams &usr); + + static bool GetDataTypeLength(ge::DataType data_type, uint32_t &length); + static bool CheckUint64MulOverflow(uint64_t a, uint32_t b); +}; +} // namespace ge +#endif // INC_GRAPH_UTILS_TYPE_UTILS_H_ diff --git a/inc/hccl/base.h b/inc/hccl/base.h new file mode 100644 index 000000000..1d83d7bf9 --- /dev/null +++ b/inc/hccl/base.h @@ -0,0 +1,147 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file base.h + * @brief HCOM data type definition + * + */ + +#ifndef HCCL_BASE_H_ +#define HCCL_BASE_H_ + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +typedef signed char s8; +typedef signed short s16; +typedef signed int s32; +typedef signed long long s64; +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; + +/** + * @brief HCOM functions return value definition + */ +typedef enum tagHcclResult { + HCCL_SUCCESS = 0, /**< success */ + HCCL_E_PARA = 1, /**< parameter error */ + HCCL_E_PTR = 2, /**< empty pointer */ + HCCL_E_MEMORY = 3, /**< memory error */ + HCCL_E_INTERNAL = 4, /**< internal error */ + HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ + HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ + HCCL_E_UNAVAIL = 7, /**< resource unavailable */ + HCCL_E_SYSCALL = 8, /**< call system interface error */ + HCCL_E_TIMEOUT = 9, /**< timeout */ + HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ + HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ + HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ + HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ + HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ + HCCL_E_RUNTIME = 15, /**< call runtime api fail */ + HCCL_E_DRV = 16, /**< call driver api fail */ + HCCL_E_PROFILING = 17, /**< call profiling api fail */ + HCCL_E_CCE = 18, /**< call cce api fail */ + HCCL_E_NETWORK = 19, /**< call network api fail */ + HCCL_E_RESERVED /**< reserved */ +} hcclResult_t; + +/* handle to communicator */ +typedef void *hcclComm_t; + +/** + * @brief HCCL Reduction opperation + */ +typedef enum tagHcclRedOp { + HCCL_REP_OP_SUM = 0, /**< sum */ + HCCL_REP_OP_PROD = 1, /**< prod */ + HCCL_REP_OP_MAX = 2, /**< max */ + HCCL_REP_OP_MIN = 3, /**< min */ + HCCL_REP_OP_RESERVED /**< reserved */ +} hcclRedOp_t; + +/** + * @brief Horovod Reduction opperation + */ +typedef enum tagHorovodRedOp { + HOROVOD_REP_OP_AVERAGE = 0, /**< average */ + HOROVOD_REP_OP_SUM = 1, /**< sum */ + HOROVOD_REP_OP_ADASUM = 2, /**< adasum */ + HOROVOD_REP_OP_MIN = 3, /**< min */ + HOROVOD_REP_OP_MAX = 4, /**< max */ + HOROVOD_REP_OP_PROD = 5, /**< proo */ + HOROVOD_REP_OP_RESERVED /**< reserved */ +} horovodRedOp_t; + +/** + * @brief HCCL data type + */ +typedef enum tagHcclDataType { + HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ + HCCL_DATA_TYPE_INT = 1, /**< int32 */ + HCCL_DATA_TYPE_HALF = 2, /**< fp16 */ + HCCL_DATA_TYPE_FLOAT = 3, /**< fp32 */ + HCCL_DATA_TYPE_INT16 = 4, /**< int16 */ + HCCL_DATA_TYPE_RESERVED /**< reserved */ +} hcclDataType_t; + +constexpr u32 HCCL_UNIQUE_ID_BYTES = 2060; // 2060: unique id length +using hcclUniqueId = struct hcclUniqueIdDef { + char internal[HCCL_UNIQUE_ID_BYTES]; +}; + +const u32 HCCL_MAX_SEGMENT_NUM = 8; // The max number of gradient segments. + +/** + * @brief the feature of the model + */ +struct model_feature { + const char *model_name; /**< The model name */ + u32 gradient_num; /**< The number of gradients */ + float *gradient_size; /**< The size of each gradient */ + float *gradient_time; /**< The BP compution time of each gradient */ +}; + +enum GradSplitForceMode { + FORCE_NONE, /**< no force */ + FORCE_SIZE, /**< force split gradient by size */ + FORCE_RESERVED /**< reserved */ +}; + +enum OriginalGraphShapeType { + KNOWN_SHAPE, + UNKNOWN_SHAPE, + SHAPE_RESERVED /**< reserved */ +}; + +/** +* @brief stream handle. +*/ +typedef void *rtStream_t; + +/** +* @brief model handle. +*/ +typedef void *rtModel_t; + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // HCCL_BASE_H_ diff --git a/inc/hccl/cltm.h b/inc/hccl/cltm.h new file mode 100644 index 000000000..b741f46b6 --- /dev/null +++ b/inc/hccl/cltm.h @@ -0,0 +1,56 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file cltm.h + * @brief Cluster Logical Topology Management API + * + */ + +#ifndef CLTM_INC_H_ +#define CLTM_INC_H_ + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** + * @brief CLTM functions return value definition + */ +typedef enum tagCltmResult { + CLTM_SUCCESS = 0, /**< success */ + CTLM_E_PTR, /**< empty pointer */ + CLTM_E_PARA, /**< parameter error */ + CLTM_E_NO_RESOURCE, /**< resource not enough error */ + CLTM_E_RESERVED /**< reserved */ +} cltmResult_t; + +/** + * @brief Generate rank table + * + * @param allocatedResource A string identifying the resource list allocate by the CSM. + * @param rankTableBuf A string identifying the buffer of . + * @param maxBufSize An integer(u32) identifying the size of rank table buffer. + * @param usedBufSize A pointer identifying the used size of rank table buffer. + * @return cltmResult_t + */ +cltmResult_t cltm_generate_ranktable(const char *allocatedResource, char* rankTableBuf, + unsigned int maxBufSize, unsigned int *usedBufSize); + +#ifdef __cplusplus +} +#endif +#endif // CLTM_INC_H_ diff --git a/inc/hccl/hccl_op_base.h b/inc/hccl/hccl_op_base.h new file mode 100644 index 000000000..017e4f8a9 --- /dev/null +++ b/inc/hccl/hccl_op_base.h @@ -0,0 +1,132 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file hccl_op_base.h + * @brief HCCL OP_BASE API + */ + +#ifndef HCCL_OPBASE_H_ +#define HCCL_OPBASE_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** + * @brief Initialize HCCL in OP_BASE mode. + * + * @param rank_table A string identifying the rank table file path, include file name. + * @param identify A string identifying the identify for the rank. + * @param comm A pointer identifying the initialized communication resource. + * @return hcclResult_t + * @see hcclCommDestroy() + */ +extern hcclResult_t hcclCommInitClusterInfo(const char *rankTable, const char *identify, hcclComm_t *comm); + +/** + * @brief Get hccl unique id in OP_BASE mode. + * + * @param id A pointer identifying the hccl unique id. + * @return hcclResult_t + */ +extern hcclResult_t hcclGetUniqueId(hcclUniqueId* id); + +/** + * @brief Initialize HCCL with unique id in OP_BASE mode. + * + * @param comm A pointer identifying the initialized communication resource. + * @param nranks A integer identifying the rank size of the cluster. + * @param commId A struct identifying the hccl unique id. + * @param myrank A integer identifying the identify for the rank. + * @return hcclResult_t + * @see hcclCommDestroy() + */ +extern hcclResult_t hcclCommInitUniqueId(hcclComm_t* comm, u32 nranks, hcclUniqueId commId, u32 myrank); + +/** + * @brief AllReduce operator in OP_BASE mode. + * + * @param inputPtr A pointer identifying the input data address of the operator. + * @param outputPtr A pointer identifying the output data address of the operator. + * @param count An integer(u64) identifying the number of the output data. +* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return hcclResult_t + */ +extern hcclResult_t hcclAllReduce(void *inputPtr, void *outputPtr, u64 count, hcclDataType_t dataType, + hcclRedOp_t op, hcclComm_t comm, rtStream_t stream); + +/** + * @brief Broadcast operator in OP_BASE mode. + * + * @param ptr A pointer identifying the data address of the operator. + * @param count An integer(u64) identifying the number of the data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param root An integer(u32) identifying the the root rank in the operator. + * @param comm A pointer identifying the communication resource based on + * @param stream A pointer identifying the stream information. + * @return hcclResult_t + */ +extern hcclResult_t hcclBroadcast(void *ptr, u64 count, hcclDataType_t dataType, u32 root, hcclComm_t comm, + rtStream_t stream); + +/** + * @brief ReduceScatter operator in OP_BASE mode. + * + * @param inputPtr A pointer identifying the input data address of the operator. + * @param outputPtr A pointer identifying the output data address of the operator. + * @param count An integer(u64) identifying the number of the output data. +* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return hcclResult_t + */ +extern hcclResult_t hcclReduceScatter(void* inputPtr, void* outputPtr, u64 count, hcclDataType_t dataType, + hcclRedOp_t op, hcclComm_t comm, rtStream_t stream); + +/** + * @brief AllGather operator in OP_BASE mode. + * + * @param inputPtr A pointer identifying the input data address of the operator. + * @param outputPtr A pointer identifying the output data address of the operator. + * @param count An integer(u64) identifying the number of the input data. +* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return hcclResult_t + */ +extern hcclResult_t hcclAllGather(void* inputPtr, void* outputPtr, u64 count, hcclDataType_t dataType, + hcclComm_t comm, rtStream_t stream); + +/** + * @brief Destroy HCCL comm + * + * @param comm A pointer identifying the communication resource targetting + * @return hcclResult_t + * @see hcclCommInitClusterInfo() + */ +extern hcclResult_t hcclCommDestroy(hcclComm_t comm); + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // HCCL_OPBASE_H_ diff --git a/inc/hccl/hcom.h b/inc/hccl/hcom.h new file mode 100644 index 000000000..19bf4fb36 --- /dev/null +++ b/inc/hccl/hcom.h @@ -0,0 +1,275 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file hcom.h + * @brief HCOM API + */ + +#ifndef HCOM_H_ +#define HCOM_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** + * @brief Initialize HCOM. + * + * @param rank_table A string identifying the rank table file path, include file name. + * @param identify A string identifying the identify for the rank. + * @return hcclResult_t + * @see hcom_destroy() + */ +extern hcclResult_t hcom_init(const char *rank_table, const char *identify); + +/** + * @brief Destroy HCOM + * + * @return hcclResult_t + * @see hcom_init() + */ +extern hcclResult_t hcom_destroy(void); + +/** + * @brief Bind the model. + * + * @param model A pointer identifying the model information. + * @param stream A pointer identifying the stream information. + * @return hcclResult_t + * @see hcom_unbind_model() + */ +extern hcclResult_t hcom_bind_model(rtModel_t model, rtStream_t stream); + +/** + * @brief Unbind the model. + * + * @param model An pointer identifying the model information. + * @return hcclResult_t + * @see hcom_unbind_model() + */ +extern hcclResult_t hcom_unbind_model(rtModel_t model); + +/** + * @brief All-gather operator. + * + * @param tag A string identifying the tag of the operator. + * @param inputPtr A pointer identifying the input data address of the operator. + * @param outputPtr A pointer identifying the output data address of the operator. + * @param inputCount An integer(u64) identifying the number of the input data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param group A string identifying the group name of ranks participating in the operator. + * @param stream A pointer identifying the stream information. + * @return hcclResult_t + */ +extern hcclResult_t hcom_all_gather(const char *tag, void *inputPtr, void *outputPtr, u64 inputCount, + hcclDataType_t dataType, const char *group, rtStream_t stream); + +/** + * @brief All-reduce operator. + * + * @param tag A string identifying the tag of the operator. + * @param inputPtr A pointer identifying the input data address of the operator. + * @param outputPtr A pointer identifying the output data address of the operator. + * @param count An integer(u64) identifying the number of the output data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. + * @param group A string identifying the group name of ranks participating in the operator. + * @param stream A pointer identifying the stream information. + * @return hcclResult_t + */ +extern hcclResult_t hcom_all_reduce(const char *tag, void *inputPtr, void *outputPtr, u64 count, + hcclDataType_t dataType, hcclRedOp_t op, const char *group, rtStream_t stream); + +/** + * @brief Broadcast operator. + * + * @param tag A string identifying the tag of the operator. + * @param ptr A pointer identifying the data address of the operator. + * @param count An integer(u64) identifying the number of the data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param root An integer(u32) identifying the the root rank in the operator. + * @param group A string identifying the group name of ranks participating in the operator. + * @param stream A pointer identifying the stream information. + * @return hcclResult_t + */ +extern hcclResult_t hcom_broadcast(const char *tag, void *ptr, u64 count, hcclDataType_t dataType, u32 root, + const char *group, rtStream_t stream); + +/** + * @brief Reduce-scatter operator. + * + * @param tag A string identifying the tag of the operator. + * @param inputPtr A pointer identifying the input data address of the operator. + * @param outputPtr A pointer identifying the output data address of the operator. + * @param count An integer(u64) identifying the number of the data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. + * @param group A string identifying the group name of ranks participating in the operator. + * @param stream A pointer identifying the stream information. + * @return hcclResult_t + */ +extern hcclResult_t hcom_reduce_scatter(const char *tag, void *inputPtr, void *outputPtr, u64 count, + hcclDataType_t dataType, hcclRedOp_t op, const char *group, rtStream_t stream); + +/** + * @brief Get the rank number in the group. + * + * @param group A string identifying the group name. + * @param rankSize A pointer identifying the rank number. + * @return hcclResult_t + */ +hcclResult_t hcom_get_rank_size(const char *group, u32 *rankSize); + +/** + * @brief Get the rank number of this rank's server within the group. + * + * @param group A string identifying the group name. + * @param localRankSize A pointer identifying the rank number. + * @return hcclResult_t + */ +hcclResult_t hcom_get_local_rank_size(const char *group, u32 *localRankSize); + +/** + * @brief Get the rank id of this rank. + * + * @param group A string identifying the group name. + * @param rankId A pointer identifying the rank id. + * @return hcclResult_t + */ +hcclResult_t hcom_get_rank_id(const char *group, u32 *rankId); + +/** + * @brief Get the local rank id of this rank's server within the group. + * + * @param group A string identifying the group name. + * @param localRankId A pointer identifying the local rank id. + * @return hcclResult_t + */ +hcclResult_t hcom_get_local_rank_id(const char *group, u32 *localRankId); + +/** + * @brief Get the world rank id according to the group rank id. + * + * @param group A string identifying the group name. + * @param groupRank An integer(u32) identifying the group rank id. + * @param worldRank A pointer identifying the world rank id. + * @return hcclResult_t + */ +hcclResult_t hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank); + +/** + * @brief Get the group rank id according to the world rank id. + * + * @param worldRank An integer(u32) identifying the world rank id. + * @param group A string identifying the group name. + * @param groupRank A pointer identifying the group rank id. + * @return hcclResult_t + */ +hcclResult_t hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank); + +/** + * @brief Create group. + * + * @param group A string identifying the group name. + * @param rankNum An integer(u32) identifying the number of ranks in the group. + * @param rankIds A list identifying the ranks in the group. + * @return hcclResult_t + */ +hcclResult_t hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); + +/** + * @brief Destroy group + * + * @param group A string identifying the group name. + * @return hcclResult_t + */ +hcclResult_t hcom_destroy_group(const char *group); + +/** + * @brief Send operator. + * + * @param tag A string identifying the tag of the operator. + * @param inputPtr A pointer identifying the input data address of the operator. + * @param count An integer(u64) identifying the number of the data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param destRank An integer identifying the destination rank. + * @param srTag An integer identifying the send/recv message tag. + * The message will be send by the receive operator with the same "sr_tag". + * @param group A string identifying the group name of ranks participating in the operator. + * @param stream A pointer identifying the stream information. + * @return hcclResult_t + */ +hcclResult_t hcom_send(const char *tag, void *inputPtr, u64 count, hcclDataType_t dataType, + u32 destRank, u32 srTag, const char *group, rtStream_t stream); + +/** + * @brief Receive operator. + * + * @param tag A string identifying the tag of the operator. + * @param outputPtr A pointer identifying the output data address of the operator. + * @param count An integer(u64) identifying the number of the data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param srcRank An integer identifying the source rank. + * @param srTag An integer identifying the send/recv message tag. + * The message will be send by the send operator with the same "sr_tag". + * @param group A string identifying the group name of ranks participating in the operator. + * @param stream A pointer identifying the stream information. + * @return hcclResult_t + */ +hcclResult_t hcom_receive(const char *tag, void *outputPtr, u64 count, hcclDataType_t dataType, + u32 srcRank, u32 srTag, const char *group, rtStream_t stream); + +/** + * @brief Get the gradient split strategy with in the group. + * + * @param group A string identifying the group name. + * @param feature A pointer identifying the feature of the model. + * @param maxSegmentNum An integer(u32) identifying the max segments of gradients. + * @param segmentNum A pointer identifying the segments number of gradients. + * @param segmentIdx A list identifying the index of end gradient in each segment. + * @return hcclResult_t + */ +hcclResult_t hcom_get_split_strategy(const char *group, const struct model_feature *feature, u32 maxSegmentNum, + u32 *segmentNum, u32 *segmentIdx, GradSplitForceMode force = FORCE_NONE, + OriginalGraphShapeType shapeType = KNOWN_SHAPE); + +/** + * @brief Set the gradient split strategy with in the group, according to gradient index. + * + * @param group A string identifying the group name. + * @param segmentNum An integer(u32) identifying the segments number of gradients. + * @param IdxList A list identifying the index of end gradient in each segment. + * @return hcclResult_t + */ +extern hcclResult_t hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList); + +/** + * @brief Set the gradient split strategy with in the group, according to gradient data size. + * + * @param group A string identifying the group name. + * @param segmentNum An integer(u32) identifying the segments number of gradients. + * @param sizeList A list identifying the percent of each segment. + * @return hcclResult_t + */ +extern hcclResult_t hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList); + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // HCOM_H_ diff --git a/inc/hccl/hcom_ops_stores.h b/inc/hccl/hcom_ops_stores.h new file mode 100644 index 000000000..274af6fde --- /dev/null +++ b/inc/hccl/hcom_ops_stores.h @@ -0,0 +1,68 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file hcom_ops_stores.h + * @brief HCOM operators plugin API + */ + +#ifndef HCOM_OPS_STORES_H_ +#define HCOM_OPS_STORES_H_ + +#include "common/opskernel/ops_kernel_info_store.h" +#include "common/optimizer/graph_optimizer.h" +#include "framework/common/ge_inner_error_codes.h" + +using OpsKernelInfoStorePtr = std::shared_ptr; +using GraphOptimizerPtr = std::shared_ptr; + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** + * @brief Initialize HCOM operators plugin. + * + * @param options Input parameter. Options must contain rank table path, deploy mode, rank id, pod name. + * @return ge::SUCCESS success; others:fail. + */ +ge::Status Initialize(const std::map &options); + +/** + * @brief Finalize HCOM operators plugin. + * + * @return ge::SUCCESS success; others: fail. + */ +ge::Status Finalize(); + +/** + * @brief Get the information store of HCOM operators. + * + * @param opKernInfos A map identifying the information store of HCOM operators. + */ +void GetOpsKernelInfoStores(std::map &opKernInfos); + +/** + * @brief Get the graph optimizer of HCOM operators. + * + * @param graphOptimizers A map identifying the graph optimizer of HCOM operators. + */ +void GetGraphOptimizerObjs(std::map &graphOptimizers); + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // HCOM_OPS_STORES_H_ diff --git a/inc/hccl/hvd_adapter_pub.h b/inc/hccl/hvd_adapter_pub.h new file mode 100644 index 000000000..9ded323b4 --- /dev/null +++ b/inc/hccl/hvd_adapter_pub.h @@ -0,0 +1,37 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file hvd_adapter_pub.h + * @brief Horovod Adapter API + */ + +#ifndef HVD_ADAPTER_ +#define HVD_ADAPTER_ + +#include + +namespace hccl { +using HvdCbDataProcessFunc = hcclResult_t (*)(void *fnData); + +/** + * @brief Add function to process fndata. + * @param fn A hvdCbDataProcessFunc type function. + * @return void + */ +extern void HvdCbDataProcessFuncAdd(HvdCbDataProcessFunc fn); // 注册处理回host侧数据的函数 +} +#endif // HVD_ADAPTER_ diff --git a/inc/register/graph_optimizer/buffer_fusion/buffer_fusion_constant.h b/inc/register/graph_optimizer/buffer_fusion/buffer_fusion_constant.h new file mode 100644 index 000000000..8a661e429 --- /dev/null +++ b/inc/register/graph_optimizer/buffer_fusion/buffer_fusion_constant.h @@ -0,0 +1,74 @@ +/** + * @file buffer_fusion_constant.h + * + * Copyright(C), 2017 - 2017, Huawei Tech. Co., Ltd. ALL RIGHTS RESERVED. + * + * @brief define the pattern. + * + * @author Huawei + * + * @version 1.0 + * + */ +#ifndef INC_REGISTER_GRAPH_OPTIMIZER_BUFFER_FUSION_CONSTANT_H_ +#define INC_REGISTER_GRAPH_OPTIMIZER_BUFFER_FUSION_CONSTANT_H_ +#include +#include + +namespace fe { +// add the op pattern +static const std::string TBE_PATTERN_INPUT_NODE = "InputData"; +static const std::string TBE_PATTERN_OP_TYPE_ANY = "OpTypeAny"; +static const std::string TBE_PATTERN_OUTPUT_NODE = "OutputData"; +static const std::string OP_PATTERN_ELEMWISE = "ElemWise"; +static const std::string OP_PATTERN_COMMONREDUCE = "CommReduce"; +static const std::string OP_PATTERN_SEGMENT = "Segment"; +static const std::string OP_PATTERN_MAXPOOL = "MaxPool"; +static const std::string OP_PATTERN_CONV = "Convolution"; +static const std::string OP_PATTERN_MATMUL = "Matmul"; +static const std::string OP_PATTERN_BNUPDATE = "bn_update"; +static const std::string OP_PATTERN_BNREDUCE = "bn_reduce"; +static const std::string OP_PATTERN_CONV_BACKPROP_INPUT = + "Conv2d_backprop_input"; +static const std::string OP_PATTERN_DEPTHWISE_CONV = "DepthwiseConvolution"; +static const std::string OP_PATTERN_QUANT = "quant"; +static const std::string OP_PATTERN_DEQUANT = "dequant"; +static const std::string OP_PATTERN_REQUANT = "requant"; +static const std::string OP_PATTERN_POOL2D = "Pool2d"; +static const std::string OP_PATTERN_ANTIQUANT = "anti_quant"; +static const std::string OP_PATTERN_STRIDED_WRITE = "strided_write"; +static const std::string OP_PATTERN_STRIDED_READ = "strided_read"; +static const std::string OP_PATTERN_AIPP = "aipp"; +static const std::string OP_PATTERN_CONFUSION_TRANSPOSE = "confusiontranspose"; +static const std::string OP_PATTERN_DEQUANTS16 = "dequant_s16"; +static const std::string OP_PATTERN_REQUANTS16 = "requant_s16"; +static const std::string OP_PATTERN_READ_SELECT = "read_select"; +static const std::string OP_PATTERN_WRITE_SELECT = "write_select"; + +static const std::vector OP_PATTERN_VEC{ + OP_PATTERN_ELEMWISE, + OP_PATTERN_COMMONREDUCE, + OP_PATTERN_SEGMENT, + OP_PATTERN_MAXPOOL, + OP_PATTERN_CONV, + OP_PATTERN_MATMUL, + OP_PATTERN_BNUPDATE, + OP_PATTERN_BNREDUCE, + OP_PATTERN_CONV_BACKPROP_INPUT, + OP_PATTERN_DEPTHWISE_CONV, + OP_PATTERN_QUANT, + OP_PATTERN_DEQUANT, + OP_PATTERN_REQUANT, + OP_PATTERN_POOL2D, + OP_PATTERN_ANTIQUANT, + OP_PATTERN_STRIDED_WRITE, + OP_PATTERN_STRIDED_READ, + OP_PATTERN_AIPP, + OP_PATTERN_CONFUSION_TRANSPOSE, + OP_PATTERN_DEQUANTS16, + OP_PATTERN_REQUANTS16, + OP_PATTERN_READ_SELECT, + OP_PATTERN_WRITE_SELECT}; +} // namespace fe + +#endif // INC_REGISTER_GRAPH_OPTIMIZER_BUFFER_FUSION_CONSTANT_H_ diff --git a/inc/register/graph_optimizer/buffer_fusion/buffer_fusion_pass_base.h b/inc/register/graph_optimizer/buffer_fusion/buffer_fusion_pass_base.h new file mode 100644 index 000000000..d5dc2a834 --- /dev/null +++ b/inc/register/graph_optimizer/buffer_fusion/buffer_fusion_pass_base.h @@ -0,0 +1,36 @@ +#ifndef INC_REGISTER_GRAPH_OPTIMIZER_BUFFER_FUSION_PASS_BASE_H_ +#define INC_REGISTER_GRAPH_OPTIMIZER_BUFFER_FUSION_PASS_BASE_H_ + +#include "register/graph_optimizer/buffer_fusion/buffer_fusion_constant.h" +#include "register/graph_optimizer/buffer_fusion/buffer_fusion_pattern.h" +#include "register/graph_optimizer/graph_optimize_register_error_codes.h" +#include +#include +#include +#include + +namespace fe { +enum BufferFusionPassType { + BUILT_IN_AI_CORE_BUFFER_FUSION_PASS, + BUILT_IN_VECTOR_CORE_BUFFER_FUSION_PASS, + CUSTOM_AI_CORE_BUFFER_FUSION_PASS, + CUSTOM_VECTOR_CORE_BUFFER_FUSION_PASS, + BUFFER_FUSION_PASS_TYPE_RESERVED +}; +class BufferFusionPassBase { +public: + explicit BufferFusionPassBase(); + virtual ~BufferFusionPassBase(); + virtual std::vector DefinePatterns() = 0; + virtual Status GetFusionNodes(const BufferFusionMapping &mapping, + vector &fusionNodes); + std::vector GetMatchedNodes(const BufferFusionMapping &mapping); + std::vector + GetMatchedNodesByDescName(const std::string &descName, + const BufferFusionMapping &mapping); + ge::NodePtr GetMatchedHeadNode(const std::vector &matchedNodes); +}; + +} // namespace fe + +#endif // INC_REGISTER_GRAPH_OPTIMIZER_BUFFER_FUSION_PASS_BASE_H_ diff --git a/inc/register/graph_optimizer/buffer_fusion/buffer_fusion_pass_registry.h b/inc/register/graph_optimizer/buffer_fusion/buffer_fusion_pass_registry.h new file mode 100644 index 000000000..fb7f70eb5 --- /dev/null +++ b/inc/register/graph_optimizer/buffer_fusion/buffer_fusion_pass_registry.h @@ -0,0 +1,66 @@ +/** + * @file buffer_fusion_pass_registry.h + * + * Copyright(C), 2017 - 2017, Huawei Tech. Co., Ltd. ALL RIGHTS RESERVED. + * + * @brief provide interface: BufferFusionByPass + * + * @author Huawei + * + * @version 1.0 + * + */ +#ifndef INC_REGISTER_GRAPH_OPTIMIZER_BUFFER_FUSION_PASS_REGISTRY_H_ +#define INC_REGISTER_GRAPH_OPTIMIZER_BUFFER_FUSION_PASS_REGISTRY_H_ +#include "register/graph_optimizer/buffer_fusion/buffer_fusion_pass_base.h" +#include +#include +#include +#include + +namespace fe { +class BufferFusionPassRegistry { +public: + using CreateFn = BufferFusionPassBase *(*)(); + ~BufferFusionPassRegistry(); + + static BufferFusionPassRegistry &GetInstance(); + + void RegisterPass(const BufferFusionPassType &pass_type, + const std::string &pass_name, CreateFn create_fn); + + std::map + GetCreateFnByType(const BufferFusionPassType &pass_type); + +private: + BufferFusionPassRegistry(); + class BufferFusionPassRegistryImpl; + std::unique_ptr impl_; +}; + +class BufferFusionPassRegistrar { +public: + BufferFusionPassRegistrar(const BufferFusionPassType &pass_type, + const std::string &pass_name, + BufferFusionPassBase *(*create_fun)()); + ~BufferFusionPassRegistrar() {} +}; + +#define REGISTER_BUFFER_FUSION_PASS(pass_name, pass_type, pass_class) \ + REGISTER_BUFFER_FUSION_PASS_UNIQ_HELPER(__COUNTER__, pass_name, pass_type, \ + pass_class) + +#define REGISTER_BUFFER_FUSION_PASS_UNIQ_HELPER(ctr, pass_name, pass_type, \ + pass_class) \ + REGISTER_BUFFER_FUSION_PASS_UNIQ(ctr, pass_name, pass_type, pass_class) + +#define REGISTER_BUFFER_FUSION_PASS_UNIQ(ctr, pass_name, pass_type, \ + pass_class) \ + static ::fe::BufferFusionPassRegistrar register_buffer_fusion_pass##ctr \ + __attribute__((unused)) = ::fe::BufferFusionPassRegistrar( \ + pass_type, pass_name, []() -> ::fe::BufferFusionPassBase * { \ + return new (std::nothrow) pass_class(); \ + }) + +} // namespace fe +#endif // INC_REGISTER_GRAPH_OPTIMIZER_BUFFER_FUSION_PASS_REGISTRY_H_ diff --git a/inc/register/graph_optimizer/buffer_fusion/buffer_fusion_pattern.h b/inc/register/graph_optimizer/buffer_fusion/buffer_fusion_pattern.h new file mode 100644 index 000000000..575b55664 --- /dev/null +++ b/inc/register/graph_optimizer/buffer_fusion/buffer_fusion_pattern.h @@ -0,0 +1,74 @@ +#ifndef INC_REGISTER_GRAPH_OPTIMIZER_BUFFER_FUSION_PATTERN_H_ +#define INC_REGISTER_GRAPH_OPTIMIZER_BUFFER_FUSION_PATTERN_H_ +#include "graph/debug/ge_attr_define.h" +#include "graph/utils/attr_utils.h" +#include "graph/utils/graph_utils.h" +#include +#include +#include + +namespace fe { +static const int TBE_FUSION_OP_NUM_MAX = 5; +static const int TBE_PATTERN_NUM_MAX = 5; +static const int TBE_PATTERN_NUM_NONE = 0; +static const int TBE_PATTERN_NUM_DEFAULT = 1; +static const int TBE_OUTPUT_BRANCH_SINGLE = 1; +static const int TBE_OUTPUT_BRANCH_MULTI = 2; +static const int TBE_PATTERN_GROUPID_INVALID = -1; + +struct BufferFusionOpDesc { + std::string descName; // description name + std::vector types; // description type + std::vector inputs; // all input op + std::vector outputs; // all output op + int64_t outBranchType; // out desc type, 1:single, 2: multi + int64_t repeateMin; // opdesc min repeat num + int64_t repeateMax; // opdesc max repeat num + int64_t repeateCurr; // opdesc current repeat num + bool matchStatus; + int64_t groupId; // record desc groupid, need one desc matched at least in + // the same group +}; +using BufferFusionMapping = + std::map>; +using BufferFusionMappings = std::vector; + +class BufferFusionPattern { +public: + explicit BufferFusionPattern(std::string name = "", + int64_t opMaxCount = TBE_FUSION_OP_NUM_MAX); + + virtual ~BufferFusionPattern(); + + BufferFusionPattern &AddOpDesc(const std::string &descName, + const std::vector &patterns, + int64_t repeatMin = TBE_PATTERN_NUM_DEFAULT, + int64_t repeatMax = TBE_PATTERN_NUM_DEFAULT, + int64_t groupId = TBE_PATTERN_GROUPID_INVALID); + + BufferFusionPattern &SetOutputs(const std::string &descName, + const std::vector &patterns, + int64_t relation = TBE_OUTPUT_BRANCH_SINGLE); + + BufferFusionPattern &SetHead(const std::vector &opPatterns); + + std::string GetName(); + int64_t GetOpMaxCount(); + std::vector GetOpDescs(); + bool GetOutputs(BufferFusionOpDesc *opDesc, + std::vector &outputs); + std::vector GetHead(); + int64_t GetErrorCnt(); + void InitRepeatCurr(const BufferFusionPattern &pattern); + +private: + BufferFusionOpDesc *GetOpDesc(const std::string &descName); + std::string name_; + int64_t opMaxCount_; + std::vector ops_; + std::map opMap_; + std::vector head_; + int64_t errorCount_; +}; +} // namespace fe +#endif // INC_REGISTER_GRAPH_OPTIMIZER_BUFFER_FUSION_PATTERN_H_ \ No newline at end of file diff --git a/inc/register/graph_optimizer/fusion_common/graph_pass_util.h b/inc/register/graph_optimizer/fusion_common/graph_pass_util.h new file mode 100644 index 000000000..c01000f8d --- /dev/null +++ b/inc/register/graph_optimizer/fusion_common/graph_pass_util.h @@ -0,0 +1,250 @@ +/** + * @file graph_pass.h + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * + * @brief define graph pass, which provides two interface: 1. run pass; + * 2. record op names before fusion + * + * @author Huawei + * + * @version 1.0 + */ + +#ifndef INC_REGISTER_GRAPH_OPTIMIZER_GRAPH_PASS_UTIL_H_ +#define INC_REGISTER_GRAPH_OPTIMIZER_GRAPH_PASS_UTIL_H_ +#include "graph/compute_graph.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/utils/attr_utils.h" +#include "graph/utils/node_utils.h" +#include "graph/utils/type_utils.h" +#include "register/graph_optimizer/graph_optimize_register_error_codes.h" + +#include +#include +#include +#include +#include + +namespace fe { +using NodeTypeMap = std::unordered_map>; +using NodeTypeMapPtr = std::shared_ptr; +struct NodeMapInfo { + int64_t runCount; + NodeTypeMapPtr nodeTypeMap; +}; +using NodeMapInfoPtr = std::shared_ptr; + +class GraphPassUtil { +public: + /** set outputdesc attr for data dump + * + * @param originIndex,usually is origin node output index + * + * @param fusionIndex,usually is fusion node output index + * + * @param originNode, usually is origin node + * + * @param fusionNode, usually is fusion node + */ + static void SetOutputDescAttr(uint32_t originIndex, uint32_t fusionIndex, + ge::NodePtr originNode, + ge::NodePtr fusionNode) { + if (fusionNode->GetOpDesc() == nullptr) { + return; + } + + auto fusionNodeOutputDesc = + fusionNode->GetOpDesc()->MutableOutputDesc(fusionIndex); + if (fusionNodeOutputDesc == nullptr) { + return; + } + if (originNode->GetOpDesc() == nullptr) { + return; + } + auto originNodeOutputDesc = + originNode->GetOpDesc()->MutableOutputDesc(originIndex); + if (originNodeOutputDesc == nullptr) { + return; + } + + std::vector originalNames; + if (ge::AttrUtils::GetListStr(originNode->GetOpDesc(), + ge::ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, + originalNames) && + originalNames.size() > 0) { + std::string originalName; + if (ge::AttrUtils::GetStr(originNodeOutputDesc, + ge::ATTR_NAME_DATA_DUMP_ORIGIN_NAME, + originalName)) { + (void)ge::AttrUtils::SetStr(fusionNodeOutputDesc, + ge::ATTR_NAME_DATA_DUMP_ORIGIN_NAME, + originalName); + + std::int64_t originOutputIndex = 0; + if (ge::AttrUtils::GetInt(originNodeOutputDesc, + ge::ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, + originOutputIndex)) { + (void)ge::AttrUtils::SetInt( + fusionNodeOutputDesc, ge::ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, + originOutputIndex); + } + + ge::DataType originDataType = + GetDataDumpOriginDataType(originNodeOutputDesc); + if (originDataType != ge::DT_UNDEFINED) { + SetDataDumpOriginDataType(originDataType, fusionNodeOutputDesc); + } + ge::Format originFormat = GetDataDumpOriginFormat(originNodeOutputDesc); + if (originFormat != ge::FORMAT_RESERVED) { + SetDataDumpOriginFormat(originFormat, fusionNodeOutputDesc); + } + } + } else { + (void)ge::AttrUtils::SetStr(fusionNodeOutputDesc, + ge::ATTR_NAME_DATA_DUMP_ORIGIN_NAME, + originNode->GetName()); + (void)ge::AttrUtils::SetInt(fusionNodeOutputDesc, + ge::ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, + originIndex); + SetDataDumpOriginDataType(originNodeOutputDesc->GetOriginDataType(), + fusionNodeOutputDesc); + SetDataDumpOriginFormat(originNodeOutputDesc->GetOriginFormat(), + fusionNodeOutputDesc); + } + } + + /** get origin format for data dump + * + * @param tensorDesc,usually is outputDesc + * + * @return format of this tensorDesc + */ + static ge::Format GetDataDumpOriginFormat(ge::GeTensorDescPtr tensorDesc) { + std::string originFormatStr; + if (!ge::AttrUtils::GetStr(tensorDesc, + ge::ATTR_NAME_DATA_DUMP_ORIGIN_FORMAT, + originFormatStr)) { + // Can not get the certificate and it's not set,return directly + return ge::FORMAT_RESERVED; + } + if (originFormatStr == "RESERVED") { + return ge::FORMAT_RESERVED; + } + return ge::TypeUtils::SerialStringToFormat(originFormatStr); + } + + /** set origin format for data dump + * + * @param origin format + * + * @param tensorDesc,usually is outputDesc + */ + static void SetDataDumpOriginFormat(ge::Format originFormat, + ge::GeTensorDescPtr tensorDesc) { + std::string originFormatStr = "RESERVED"; + if (originFormat != ge::FORMAT_RESERVED) { + originFormatStr = ge::TypeUtils::FormatToSerialString(originFormat); + } + (void)ge::AttrUtils::SetStr( + tensorDesc, ge::ATTR_NAME_DATA_DUMP_ORIGIN_FORMAT, originFormatStr); + } + + /** set origin datatype for data dump + * + * @param origin datatype + * + * @param tensorDesc,usually is outputDesc + */ + static void SetDataDumpOriginDataType(ge::DataType originDataType, + ge::GeTensorDescPtr tensorDesc) { + std::string originDataTypeStr = "RESERVED"; + if (originDataType != ge::DT_UNDEFINED) { + originDataTypeStr = ge::TypeUtils::DataTypeToSerialString(originDataType); + } + (void)ge::AttrUtils::SetStr(tensorDesc, + ge::ATTR_NAME_DATA_DUMP_ORIGIN_DATA_TYPE, + originDataTypeStr); + } + + /** get origin datatype for data dump + * + * @param tensorDesc,usually is outputDesc + * + * @return format of this tensorDesc + */ + static ge::DataType + GetDataDumpOriginDataType(ge::GeTensorDescPtr tensorDesc) { + std::string originDataTypeStr; + if (!ge::AttrUtils::GetStr(tensorDesc, + ge::ATTR_NAME_DATA_DUMP_ORIGIN_DATA_TYPE, + originDataTypeStr)) { + return ge::DT_UNDEFINED; + } + if (originDataTypeStr == "RESERVED") { + return ge::DT_UNDEFINED; + } + return ge::TypeUtils::SerialStringToDataType(originDataTypeStr); + } + + static void AddNodeFromOpTypeMap(NodeMapInfoPtr &nodeMapInfo, + ge::NodePtr &nodePtr) { + if (nodeMapInfo == nullptr || nodePtr == nullptr) { + return; + } + NodeTypeMapPtr nodeTypeMap = nodeMapInfo->nodeTypeMap; + string realOpType = ge::NodeUtils::GetNodeType(*nodePtr); + auto iter = nodeTypeMap->find(realOpType); + if (iter != nodeTypeMap->end()) { + iter->second.insert(nodePtr); + } else { + nodeTypeMap->emplace( + std::make_pair(realOpType, std::unordered_set{nodePtr})); + } + } + + static Status GetOpTypeMapToGraph(NodeMapInfoPtr &nodeMapInfo, + const ge::ComputeGraph &graph) { + nodeMapInfo = graph.TryGetExtAttr("NodeMapInfo", nodeMapInfo); + if (nodeMapInfo == nullptr) { + return FAILED; + } + return SUCCESS; + } + + static void RecordOriginalNames(std::vector originalNodes, + ge::NodePtr node) { + // 1. get the originalNames + std::vector originalNames; + for (ge::NodePtr originalNode : originalNodes) { + if (originalNode == nullptr || originalNode->GetOpDesc() == nullptr) { + return; + } + + ge::OpDescPtr originOpDescPtr = originalNode->GetOpDesc(); + std::vector names_tmp; + bool isHasAttr = ge::AttrUtils::GetListStr( + originOpDescPtr, ge::ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, names_tmp); + if (isHasAttr) { + for (const auto &node_name : names_tmp) { + if (!node_name.empty()) { + originalNames.push_back(node_name); + } + } + } else { + originalNames.push_back(originOpDescPtr->GetName()); + } + } + + // 2. set the dump attr + if (node == nullptr || node->GetOpDesc() == nullptr) { + return; + } + ge::OpDescPtr nodeOpDescPtr = node->GetOpDesc(); + (void)ge::AttrUtils::SetListStr( + nodeOpDescPtr, ge::ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, originalNames); + } +}; + +} // namespace fe + +#endif // INC_REGISTER_GRAPH_OPTIMIZER_GRAPH_PASS_UTIL_H_ diff --git a/inc/register/graph_optimizer/fusion_common/pattern_fusion_base_pass.h b/inc/register/graph_optimizer/fusion_common/pattern_fusion_base_pass.h new file mode 100644 index 000000000..9bfa73b40 --- /dev/null +++ b/inc/register/graph_optimizer/fusion_common/pattern_fusion_base_pass.h @@ -0,0 +1,106 @@ +/** + * @file pattern_fusion_base_pass.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * + * @brief define fusion pass based on pattern + * + * @author Huawei + * + * @version 1.0 + */ + +#ifndef INC_REGISTER_GRAPH_OPTIMIZER_PATTERN_FUSION_BASE_PASS_H_ +#define INC_REGISTER_GRAPH_OPTIMIZER_PATTERN_FUSION_BASE_PASS_H_ + +#include "common/opskernel/ops_kernel_info_store.h" +#include "register/graph_optimizer/graph_fusion/fusion_pattern.h" +#include "register/graph_optimizer/graph_fusion/graph_fusion_pass_base.h" +#include "register/graph_optimizer/graph_fusion/graph_pass.h" +#include "register/graph_optimizer/graph_optimize_register_error_codes.h" +#include +#include +#include +#include +#include + +using std::initializer_list; +using std::map; +using std::string; +using std::vector; + +using namespace std; + +namespace fe { +using OpsKernelInfoStorePtr = std::shared_ptr; +class PatternFusionBasePassImpl; +using PatternFusionBasePassImplPtr = std::shared_ptr; + +/** Pass based on pattern + * @ingroup FUSION_PASS_GROUP + * @note New virtual methods should be append at the end of this class + */ +class PatternFusionBasePass : public GraphPass { +public: + using OpDesc = FusionPattern::OpDesc; + using Mapping = map, vector>; + using Mappings = vector; + std::map originOpAnchorsMap_; + + PatternFusionBasePass(); + virtual ~PatternFusionBasePass(); + + /** execute pass + * + * @param [in] graph, the graph waiting for pass level optimization + * @return SUCCESS, successfully optimized the graph by the pass + * @return NOT_CHANGED, the graph did not change + * @return FAILED, fail to modify graph + */ + Status Run(ge::ComputeGraph &graph) override; + + /** execute pass + * + * @param [in] graph, the graph waiting for pass level optimization + * @param [opsKernelInfoStorePtr, OP info kernel instance + * @return SUCCESS, successfully optimized the graph by the pass + * @return NOT_CHANGED, the graph did not change + * @return FAILED, fail to modify graph + */ + virtual Status Run(ge::ComputeGraph &graph, + OpsKernelInfoStorePtr opsKernelInfoStorePtr); + +protected: + virtual vector DefinePatterns() = 0; + virtual Status Fusion(ge::ComputeGraph &graph, Mapping &mapping, + vector &newNodes) = 0; + + std::vector GetNodesFromMapping(const Mapping &mapping); + ge::NodePtr GetNodeFromMapping(const string &id, const Mapping &mapping); + + void RecordOutputAnchorMap(ge::NodePtr outputNode); + Status SetDataDumpAttr(vector &originalNodes, + vector &fusNodes); + + bool CheckOpSupported(const ge::OpDescPtr &opDescPtr); + +private: + /** match all nodes in graph according to pattern + * + * @param pattern fusion pattern defined + * @param mappings match result + * @return SUCCESS, successfully add edge + * @return FAILED, fail + */ + bool MatchAll(ge::ComputeGraph &graph, const FusionPattern &pattern, + Mappings &mappings); + + Status RunOnePattern(ge::ComputeGraph &graph, const FusionPattern &pattern, + bool &changed); // lint !e148 + + /** Internal implement class ptr */ + std::shared_ptr patternFusionBasePassImplPtr_; +}; +} // namespace fe + +#endif // INC_REGISTER_GRAPH_OPTIMIZER_PATTERN_FUSION_BASE_PASS_H_ diff --git a/inc/register/graph_optimizer/graph_fusion/fusion_pass_manager/fusion_pass_registry.h b/inc/register/graph_optimizer/graph_fusion/fusion_pass_manager/fusion_pass_registry.h new file mode 100644 index 000000000..a954c053e --- /dev/null +++ b/inc/register/graph_optimizer/graph_fusion/fusion_pass_manager/fusion_pass_registry.h @@ -0,0 +1,63 @@ +/** + * @file fusion_pass_registry.h + * + * Copyright(C), 2017 - 2017, Huawei Tech. Co., Ltd. ALL RIGHTS RESERVED. + * + * @brief provide interface: GraphFusionByPass + * + * @author Huawei + * + * @version 1.0 + * + */ +#ifndef INC_REGISTER_GRAPH_OPTIMIZER_FUSION_PASS_REGISTRY_H_ +#define INC_REGISTER_GRAPH_OPTIMIZER_FUSION_PASS_REGISTRY_H_ + +#include "register/graph_optimizer/graph_fusion/graph_fusion_pass_base.h" +#include +#include +#include +#include + +namespace fe { +class FusionPassRegistry { +public: + using CreateFn = GraphPass *(*)(); + ~FusionPassRegistry(); + + static FusionPassRegistry &GetInstance(); + + void RegisterPass(const GraphFusionPassType &pass_type, + const std::string &pass_name, CreateFn create_fn); + + std::map + GetCreateFnByType(const GraphFusionPassType &pass_type); + +private: + FusionPassRegistry(); + class FusionPassRegistryImpl; + std::unique_ptr impl_; +}; + +class FusionPassRegistrar { +public: + FusionPassRegistrar(const GraphFusionPassType &pass_type, + const std::string &pass_name, GraphPass *(*create_fun)()); + ~FusionPassRegistrar() {} +}; + +#define REGISTER_PASS(pass_name, pass_type, pass_class) \ + REGISTER_PASS_UNIQ_HELPER(__COUNTER__, pass_name, pass_type, pass_class) + +#define REGISTER_PASS_UNIQ_HELPER(ctr, pass_name, pass_type, pass_class) \ + REGISTER_PASS_UNIQ(ctr, pass_name, pass_type, pass_class) + +#define REGISTER_PASS_UNIQ(ctr, pass_name, pass_type, pass_class) \ + static ::fe::FusionPassRegistrar register_fusion_pass##ctr \ + __attribute__((unused)) = ::fe::FusionPassRegistrar( \ + pass_type, pass_name, []() -> ::fe::GraphPass * { \ + return new (std::nothrow) pass_class(); \ + }) + +} // namespace fe +#endif // INC_REGISTER_GRAPH_OPTIMIZER_FUSION_PASS_REGISTRY_H_ diff --git a/inc/register/graph_optimizer/graph_fusion/fusion_pattern.h b/inc/register/graph_optimizer/graph_fusion/fusion_pattern.h new file mode 100644 index 000000000..697f5f3e2 --- /dev/null +++ b/inc/register/graph_optimizer/graph_fusion/fusion_pattern.h @@ -0,0 +1,172 @@ +/** + * @file pattern_fusion_base_pass.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * + * @brief define fusion pass based on pattern + * + * @author Huawei + * + * @version 1.0 + */ + +#ifndef INC_REGISTER_GRAPH_OPTIMIZER_FUSION_PATTERN_H_ +#define INC_REGISTER_GRAPH_OPTIMIZER_FUSION_PATTERN_H_ +#include +#include +#include +#include +#include + +using std::initializer_list; +using std::map; +using std::string; +using std::vector; + +using namespace std; + +namespace fe { + +/** Fusion pattern + * @ingroup FUSION_PASS_GROUP + * Describe Pattern of Ops waiting for fusion(Op type, etc) + */ +class FusionPattern { +public: + struct OpDesc; + using OpDescPtr = std::shared_ptr; + /** + * @ingroup fe + * @brief description of Ops + */ + struct OpDesc { + string id; // Identifier + std::vector types; // the Op types of Ops + std::vector inputs; // all input Ops + bool repeatable; // flag to show if match multiple Ops or not + bool is_output; // flag to show if the op is output node + }; + +public: + explicit FusionPattern(string name = ""); + ~FusionPattern(); + + /** set pattern name + * + * @param name pattern name + * @return FusionPattern + */ + FusionPattern &SetName(const string &name); + + /** add Op description with unknown number of args + * + * @param id pattern id + * @param types op type list + * @return FusionPattern + */ + FusionPattern &AddOpDesc(const string &id, + const initializer_list &types = {}); + + /** add Op description with vector + * + * @param id pattern id + * @param types op type list + * + * @return FusionPattern + */ + FusionPattern &AddOpDesc(const string &id, const vector &types); + + /** set input Ops with unknown number of args + * + * @param id pattern id + * + * @param inputIds inputs to id op + * + * @return FusionPattern + */ + FusionPattern &SetInputs(const string &id, + const initializer_list &inputIds); + + /** set input Ops with unknown number of args + * + * @param id pattern id + * + * @param inputIds inputs to id op + * + * @return FusionPattern + */ + FusionPattern &SetInputs(const string &id, const vector &inputIds); + + /** set output Op + * + * @param id pattern id + * + * @return FusionPattern + */ + FusionPattern &SetOutput(const string &id); + + /** build pattern and check if error exists + * + * @return True or False + */ + bool Build(); + + /** get pattern name + * + * @param id pattern id + * + * @return fusion pattern name + */ + const string &GetName() const; + + /** get the OpDesc of input Ops (const) + * + * @param op_desc op_desc for getting inputs + * + * @return op_desc's iniput opdesc list + */ + static const vector> * + GetInputs(std::shared_ptr op_desc); + + /** get the OpDesc of output Op + * + * @return pattern's output opdesc list + */ + const std::shared_ptr GetOutput() const; + + /** print pattern + * + */ + void Dump() const; + + void GetOpDescList(vector> &op_desc_list); + + /** get OpDesc based on ID, return nullptr if failed + * + * @param id pattern id + * + * @return pattern's output opdesc list + */ + std::shared_ptr GetOpDesc(const string &id) const; + +private: + FusionPattern(const FusionPattern &) = default; + FusionPattern &operator=(const FusionPattern &) = default; + + void SetError(); + +private: + string name_; + + vector> ops_; + + map> op_map_; + + std::shared_ptr output_; + + bool has_error_; +}; + +} // namespace fe + +#endif // INC_REGISTER_GRAPH_OPTIMIZER_FUSION_PATTERN_H_ diff --git a/inc/register/graph_optimizer/graph_fusion/graph_fusion_pass_base.h b/inc/register/graph_optimizer/graph_fusion/graph_fusion_pass_base.h new file mode 100644 index 000000000..68e07605b --- /dev/null +++ b/inc/register/graph_optimizer/graph_fusion/graph_fusion_pass_base.h @@ -0,0 +1,113 @@ +/** + * @file custom_pattern_fusion_base_pass.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. + * + * @brief custom_pattern_fusion_base_pass + * + * @author Huawei + * + * @version 1.0 + * + */ + +#ifndef INC_REGISTER_GRAPH_OPTIMIZER_GRAPH_FUSION_PASS_BASE_H_ +#define INC_REGISTER_GRAPH_OPTIMIZER_GRAPH_FUSION_PASS_BASE_H_ + +#include +#include +#include +#include +#include + +#include "register/graph_optimizer/graph_fusion/fusion_pattern.h" +#include "register/graph_optimizer/graph_fusion/graph_pass.h" + +using std::initializer_list; +using std::map; +using std::string; +using std::vector; + +using namespace std; + +namespace fe { +enum GraphFusionPassType { + BUILT_IN_GRAPH_PASS = 0, + BUILT_IN_VECTOR_CORE_GRAPH_PASS, + CUSTOM_AI_CORE_GRAPH_PASS, + CUSTOM_VECTOR_CORE_GRAPH_PASS, + SECOND_ROUND_BUILT_IN_GRAPH_PASS, + GRAPH_FUSION_PASS_TYPE_RESERVED, +}; +class PatternFusionBasePassImpl; +using PatternFusionBasePassImplPtr = std::shared_ptr; + +/** Pass based on pattern + * @ingroup FUSION_PASS_GROUP + * @note New virtual methods should be append at the end of this class + */ +class GraphFusionPassBase : public GraphPass { +public: + using OpDesc = FusionPattern::OpDesc; + using Mapping = map, vector>; + using Mappings = vector; + + GraphFusionPassBase(); + virtual ~GraphFusionPassBase(); + + /** execute pass + * + * @param [in] graph, the graph waiting for pass level optimization + * @return SUCCESS, successfully optimized the graph by the pass + * @return NOT_CHANGED, the graph did not change + * @return FAILED, fail to modify graph + */ + Status Run(ge::ComputeGraph &graph) override; + +protected: + /** define pattern + * + * @return NA + */ + virtual vector DefinePatterns() = 0; + + /** do fusion according to nodes matched + * + * @param graph the graph waiting for pass level optimization + * @param newNodes fusion result node(s) + * @return SUCCESS, successfully optimized the graph by the pass + * @return NOT_CHANGED, the graph did not change + * @return FAILED, fail to modify graph + */ + virtual Status Fusion(ge::ComputeGraph &graph, Mapping &mapping, + vector &newNodes) = 0; // lint !e148 + + /** get nodes from matched result + * + * @param mapping match result + * @return nodes result + */ + static ge::NodePtr GetNodeFromMapping(const string &id, + const Mapping &mapping); + +private: + /** match all nodes in graph according to pattern + * + * @param pattern fusion pattern defined + * @param mappings match result + * @return SUCCESS, successfully add edge + * @return FAILED, fail + */ + bool MatchAll(ge::ComputeGraph &graph, const FusionPattern &pattern, + Mappings &mappings); + + Status RunOnePattern(ge::ComputeGraph &graph, const FusionPattern &pattern, + bool &changed); // lint !e148 + + /** Internal implement class ptr */ + std::shared_ptr patternFusionBasePassImplPtr_; +}; + +} // namespace fe + +#endif // INC_REGISTER_GRAPH_OPTIMIZER_GRAPH_FUSION_PASS_BASE_H_ diff --git a/inc/register/graph_optimizer/graph_fusion/graph_pass.h b/inc/register/graph_optimizer/graph_fusion/graph_pass.h new file mode 100644 index 000000000..87e219101 --- /dev/null +++ b/inc/register/graph_optimizer/graph_fusion/graph_pass.h @@ -0,0 +1,39 @@ +/** + * @file graph_pass.h + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * + * @brief define graph pass, which provides two interface: 1. run pass; + * 2. record op names before fusion + * + * @author Huawei + * + * @version 1.0 + */ + +#ifndef INC_REGISTER_GRAPH_OPTIMIZER_GRAPH_PASS_H_ +#define INC_REGISTER_GRAPH_OPTIMIZER_GRAPH_PASS_H_ + +#include "register/graph_optimizer/graph_fusion/pass.h" +#include + +namespace fe { + +/** graph pass + * @ingroup GRAPH_PASS_GROUP + * graph level pass + */ +class GraphPass : public Pass { +public: + /** execute pass + * + * @param [in] graph, the graph waiting for pass level optimization + * @return SUCCESS, successfully optimized the graph by the pass + * @return NOT_CHANGED, the graph did not change + * @return FAILED, fail to modify graph + */ + virtual Status Run(ge::ComputeGraph &graph) = 0; +}; + +} // namespace fe + +#endif // INC_REGISTER_GRAPH_OPTIMIZER_GRAPH_PASS_H_ diff --git a/inc/register/graph_optimizer/graph_fusion/pass.h b/inc/register/graph_optimizer/graph_fusion/pass.h new file mode 100644 index 000000000..eb31e1d1b --- /dev/null +++ b/inc/register/graph_optimizer/graph_fusion/pass.h @@ -0,0 +1,48 @@ +/** + * @file pass.h + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * + * @brief define pass + * + * @author Huawei + * + * @version 1.0 + */ +/** @defgroup FUSION_PASS_GROUP Fusion Pass Interface */ + +#ifndef INC_REGISTER_GRAPH_OPTIMIZER_PASS_H_ +#define INC_REGISTER_GRAPH_OPTIMIZER_PASS_H_ + +#include "graph/compute_graph.h" +#include "register/graph_optimizer/graph_optimize_register_error_codes.h" + +namespace fe { + +/** fusion pass + * @ingroup GRAPH_PASS_GROUP + * network level pass + */ +template class Pass { +public: + virtual ~Pass() {} + + /** execute pass + * + * @param [in] graph, the graph waiting for pass level optimization + * @return SUCCESS, successfully optimized the graph by the pass + * @return NOT_CHANGED, the graph did not change + * @return FAILED, fail to modify graph + */ + virtual Status Run(ge::ComputeGraph &graph) = 0; + + void SetName(const string &name) { name_ = name; } + + string GetName() { return name_; } + +private: + string name_; +}; + +} // namespace fe + +#endif // INC_REGISTER_GRAPH_OPTIMIZER_PASS_H_ diff --git a/inc/register/graph_optimizer/graph_optimize_register_error_codes.h b/inc/register/graph_optimizer/graph_optimize_register_error_codes.h new file mode 100644 index 000000000..d88f1275e --- /dev/null +++ b/inc/register/graph_optimizer/graph_optimize_register_error_codes.h @@ -0,0 +1,50 @@ +/** + * @file graph_optimize_register_error_codes.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. + * + * @brief fe_error_codes + * + * @author Huawei + * + * @version 1.0 + * + */ + +#ifndef INC_REGISTER_GRAPH_OPTIMIZE_REGISTER_ERROR_CODES_H_ +#define INC_REGISTER_GRAPH_OPTIMIZE_REGISTER_ERROR_CODES_H_ + +#include +#include + +/** Assigned SYS ID */ +const uint8_t SYSID_FE = 3; + +/** Common module ID */ +const uint8_t FE_MODID_COMMON = 50; + +namespace fe { + +/** FE error code definiton Macro +* Build error code +*/ +#define FE_DEF_ERRORNO(sysid, modid, name, value, desc) \ + static constexpr fe::Status name = \ + (((((uint32_t)(0xFF & ((uint8_t)(sysid)))) << 24) | \ + (((uint32_t)(0xFF & ((uint8_t)(modid)))) << 16)) | \ + (0xFFFF & ((uint16_t)(value)))); + +using Status = uint32_t; + +#define FE_DEF_ERRORNO_COMMON(name, value, desc) \ + FE_DEF_ERRORNO(SYSID_FE, FE_MODID_COMMON, name, value, desc) + +using Status = uint32_t; + +FE_DEF_ERRORNO(0, 0, SUCCESS, 0, "success"); +FE_DEF_ERRORNO(0xFF, 0xFF, FAILED, 0xFFFF, "failed"); +FE_DEF_ERRORNO_COMMON(NOT_CHANGED, 201, "The nodes of the graph not changed."); +FE_DEF_ERRORNO_COMMON(PARAM_INVALID, 1, "Parameter's invalid!"); + +} // namespace fe +#endif // INC_REGISTER_GRAPH_OPTIMIZE_REGISTER_ERROR_CODES_H_ diff --git a/inc/register/op_kernel_registry.h b/inc/register/op_kernel_registry.h new file mode 100644 index 000000000..5fed8960e --- /dev/null +++ b/inc/register/op_kernel_registry.h @@ -0,0 +1,49 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_REGISTER_OP_KERNEL_REGISTRY_H_ +#define INC_REGISTER_OP_KERNEL_REGISTRY_H_ +#include +#include +#include "register/register_types.h" +#include "register.h" + +namespace ge { +class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpKernelRegistry { + public: + using CreateFn = HostCpuOp* (*)(); + ~OpKernelRegistry(); + + static OpKernelRegistry& GetInstance() { + static OpKernelRegistry instance; + return instance; + } + + bool IsRegistered(const std::string &op_type); + + void RegisterHostCpuOp(const std::string &op_type, CreateFn create_fn); + + std::unique_ptr CreateHostCpuOp(const std::string &op_type); + + private: + OpKernelRegistry(); + class OpKernelRegistryImpl; + /*lint -e148*/ + std::unique_ptr impl_; +}; +} // namespace ge + +#endif // INC_REGISTER_OP_KERNEL_REGISTRY_H_ diff --git a/inc/register/op_registry.h b/inc/register/op_registry.h new file mode 100644 index 000000000..1dc14b8b6 --- /dev/null +++ b/inc/register/op_registry.h @@ -0,0 +1,86 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_REGISTER_OP_REGISTRY_H_ +#define INC_REGISTER_OP_REGISTRY_H_ + +#include +#include +#include +#include +#include + +#include "register/register.h" + +namespace domi { +enum RemoveInputType { + OMG_MOVE_TYPE_DTYPE = 0, + OMG_MOVE_TYPE_VALUE, + OMG_MOVE_TYPE_SHAPE, + OMG_MOVE_TYPE_FORMAT, + OMG_MOVE_TYPE_AXIS, + OMG_MOVE_TYPE_SCALAR_VALUE, + OMG_REMOVE_TYPE_WITH_COND = 1000, + OMG_REMOVE_INPUT_WITH_ORIGINAL_TYPE, + OMG_INPUT_REORDER, +}; + +struct RemoveInputConfigure { + int inputIdx = INT_MAX; + std::string attrName; + RemoveInputType moveType; + bool attrValue = false; + std::string originalType; + std::vector input_order; +}; + +class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry { + public: + static OpRegistry *Instance(); + + std::vector registrationDatas; + + bool Register(const OpRegistrationData ®_data); + + domi::ImplyType GetImplyType(const std::string &op_type); + + void GetOpTypeByImplyType(std::vector &vec_op_type, const domi::ImplyType &imply_type); + + domi::ParseParamFunc GetParseParamFunc(const std::string &op_type, const std::string &ori_type); + + domi::ParseParamByOpFunc GetParseParamByOperatorFunc(const std::string &ori_type); + + domi::FusionParseParamFunc GetFusionParseParamFunc(const std::string &op_type, const std::string &ori_type); + + domi::ParseSubgraphFunc GetParseSubgraphPostFunc(const std::string &op_type); + + domi::ImplyType GetImplyTypeByOriOpType(const std::string &ori_optype); + + const std::vector &GetRemoveInputConfigure(const std::string &ori_optype) const; + + bool GetOmTypeByOriOpType(const std::string &ori_optype, std::string &om_type); + + private: + std::unordered_map op_run_mode_map_; + std::unordered_map op_parse_params_fn_map_; + std::unordered_map parse_params_by_op_func_map_; + std::unordered_map fusion_op_parse_params_fn_map_; + std::unordered_map op_types_to_parse_subgraph_post_func_; + std::unordered_map> remove_input_configure_map_; + std::unordered_map origin_type_to_om_type_; +}; +} // namespace domi +#endif // INC_REGISTER_OP_REGISTRY_H_ diff --git a/inc/register/op_tiling.h b/inc/register/op_tiling.h new file mode 100644 index 000000000..e9d19f942 --- /dev/null +++ b/inc/register/op_tiling.h @@ -0,0 +1,133 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_OP_TILING_H_ +#define INC_OP_TILING_H_ + +#include "external/register/register_types.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/node.h" + +#include +#include +#include +#include +#include +#include +#include +#include "graph/node.h" + +#define REGISTER_OP_TILING_FUNC(optype, opfunc) \ + REGISTER_OP_TILING_FUNC_UNIQ_HELPER(optype, opfunc, __COUNTER__) + +#define REGISTER_OP_TILING_FUNC_UNIQ_HELPER(optype, opfunc, counter) \ + REGISTER_OP_TILING_FUNC_UNIQ(optype, opfunc, counter) + +#define REGISTER_OP_TILING_FUNC_UNIQ(optype, opfunc, counter) \ + static OpTilingInterf g_##optype##TilingInterf##counter(#optype, opfunc) + +namespace optiling { + +enum TensorArgType { + TA_NONE, + TA_SINGLE, + TA_LIST, +}; + + +using ByteBuffer = std::stringstream; + +struct TeOpTensor { + std::vector shape; + std::vector ori_shape; + std::string format; + std::string ori_format; + std::string dtype; + std::map attrs; +}; + + +struct TeOpTensorArg { + TensorArgType arg_type; + std::vector tensor; +}; + +struct OpRunInfo { + uint32_t block_dim; + std::vector workspaces; + ByteBuffer tiling_data; +}; + + +using TeOpAttrArgs = std::vector; +using TeConstTensorData = std::tuple; + +struct TeOpParas { + std::vector inputs; + std::vector outputs; + std::map const_inputs; + TeOpAttrArgs attrs; +}; + + +using OpTilingFunc = std::function; + +using OpTilingFuncPtr = bool(*)(const std::string&, const TeOpParas&, const nlohmann::json& , OpRunInfo&); + +class FMK_FUNC_HOST_VISIBILITY OpTilingInterf +{ +public: + OpTilingInterf(std::string op_type, OpTilingFunc func); + ~OpTilingInterf() = default; + static std::map &RegisteredOpInterf(); +}; + + +template +ByteBuffer& ByteBufferPut(ByteBuffer &buf, const T &value) +{ + buf.write(reinterpret_cast(&value), sizeof(value)); + buf.flush(); + return buf; +} + +template +ByteBuffer& ByteBufferGet(ByteBuffer &buf, T &value) +{ + buf.read(reinterpret_cast(&value), sizeof(value)); + return buf; +} + +inline size_t ByteBufferGetAll(ByteBuffer &buf, char *dest, size_t dest_len) +{ + size_t nread = 0; + size_t rn = 0; + do { + rn = buf.readsome(dest + nread, dest_len - nread); + nread += rn; + } while (rn > 0 && dest_len > nread); + + return nread; +} + + +extern "C" ge::graphStatus OpParaCalculate(const ge::Node &node, OpRunInfo &run_info); +extern "C" ge::graphStatus OpAtomicCalculate(const ge::Node &node, OpRunInfo &run_info); + +} + +#endif // INC_OP_TILING_H_ diff --git a/inc/register/proto/caffe/caffe.proto b/inc/register/proto/caffe/caffe.proto new file mode 100644 index 000000000..f2d1acc64 --- /dev/null +++ b/inc/register/proto/caffe/caffe.proto @@ -0,0 +1,1802 @@ +syntax = "proto2"; + +package domi.caffe; + +// Specifies the shape (dimensions) of a Blob. +message BlobShape { + repeated int64 dim = 1 [packed = true]; +} + +message BlobProto { + optional BlobShape shape = 7; + repeated float data = 5 [packed = true]; + repeated float diff = 6 [packed = true]; + repeated double double_data = 8 [packed = true]; + repeated double double_diff = 9 [packed = true]; + optional bytes int8_data = 10; + repeated int32 int32_data = 11 [packed = true]; + repeated uint64 uint64_data = 12 [packed = true]; + // 4D dimensions -- deprecated. Use "shape" instead. + optional int32 num = 1 [default = 0]; + optional int32 channels = 2 [default = 0]; + optional int32 height = 3 [default = 0]; + optional int32 width = 4 [default = 0]; +} + +// The BlobProtoVector is simply a way to pass multiple blobproto instances +// around. +message BlobProtoVector { + repeated BlobProto blobs = 1; +} + +message Datum { + optional int32 channels = 1; + optional int32 height = 2; + optional int32 width = 3; + // the actual image data, in bytes + optional bytes data = 4; + optional int32 label = 5; + // Optionally, the datum could also hold float data. + repeated float float_data = 6; + // If true data contains an encoded image that need to be decoded + optional bool encoded = 7 [default = false]; +} + +message FillerParameter { + // The filler type. + optional string type = 1 [default = 'constant']; + optional float value = 2 [default = 0]; // the value in constant filler + optional float min = 3 [default = 0]; // the min value in uniform filler + optional float max = 4 [default = 1]; // the max value in uniform filler + optional float mean = 5 [default = 0]; // the mean value in Gaussian filler + optional float std = 6 [default = 1]; // the std value in Gaussian filler + // The expected number of non-zero output weights for a given input in + // Gaussian filler -- the default -1 means don't perform sparsification. + optional int32 sparse = 7 [default = -1]; + // Normalize the filler variance by fan_in, fan_out, or their average. + // Applies to 'xavier' and 'msra' fillers. + enum VarianceNorm { + FAN_IN = 0; + FAN_OUT = 1; + AVERAGE = 2; + } + optional VarianceNorm variance_norm = 8 [default = FAN_IN]; +} + +message NetParameter { + optional string name = 1; // consider giving the network a name + // DEPRECATED. See InputParameter. The input blobs to the network. + repeated string input = 3; + // DEPRECATED. See InputParameter. The shape of the input blobs. + repeated BlobShape input_shape = 8; + + // 4D input dimensions -- deprecated. Use "input_shape" instead. + // If specified, for each input blob there should be four + // values specifying the num, channels, height and width of the input blob. + // Thus, there should be a total of (4 * #input) numbers. + repeated int32 input_dim = 4; + + // Whether the network will force every layer to carry out backward operation. + // If set False, then whether to carry out backward is determined + // automatically according to the net structure and learning rates. + optional bool force_backward = 5 [default = false]; + // The current "state" of the network, including the phase, level, and stage. + // Some layers may be included/excluded depending on this state and the states + // specified in the layers' include and exclude fields. + optional NetState state = 6; + + // Print debugging information about results while running Net::Forward, + // Net::Backward, and Net::Update. + optional bool debug_info = 7 [default = false]; + + // The layers that make up the net. Each of their configurations, including + // connectivity and behavior, is specified as a LayerParameter. + repeated LayerParameter layer = 100; // ID 100 so layers are printed last. + + // DEPRECATED: use 'layer' instead. + repeated V1LayerParameter layers = 2; +} + +// NOTE +// Update the next available ID when you add a new SolverParameter field. +// +// SolverParameter next available ID: 42 (last added: layer_wise_reduce) +message SolverParameter { + ////////////////////////////////////////////////////////////////////////////// + // Specifying the train and test networks + // + // Exactly one train net must be specified using one of the following fields: + // train_net_param, train_net, net_param, net + // One or more test nets may be specified using any of the following fields: + // test_net_param, test_net, net_param, net + // If more than one test net field is specified (e.g., both net and + // test_net are specified), they will be evaluated in the field order given + // above: (1) test_net_param, (2) test_net, (3) net_param/net. + // A test_iter must be specified for each test_net. + // A test_level and/or a test_stage may also be specified for each test_net. + ////////////////////////////////////////////////////////////////////////////// + + // Proto filename for the train net, possibly combined with one or more + // test nets. + optional string net = 24; + // Inline train net param, possibly combined with one or more test nets. + optional NetParameter net_param = 25; + + optional string train_net = 1; // Proto filename for the train net. + repeated string test_net = 2; // Proto filenames for the test nets. + optional NetParameter train_net_param = 21; // Inline train net params. + repeated NetParameter test_net_param = 22; // Inline test net params. + + // The states for the train/test nets. Must be unspecified or + // specified once per net. + // + // By default, all states will have solver = true; + // train_state will have phase = TRAIN, + // and all test_state's will have phase = TEST. + // Other defaults are set according to the NetState defaults. + optional NetState train_state = 26; + repeated NetState test_state = 27; + + // The number of iterations for each test net. + repeated int32 test_iter = 3; + + // The number of iterations between two testing phases. + optional int32 test_interval = 4 [default = 0]; + optional bool test_compute_loss = 19 [default = false]; + // If true, run an initial test pass before the first iteration, + // ensuring memory availability and printing the starting value of the loss. + optional bool test_initialization = 32 [default = true]; + optional float base_lr = 5; // The base learning rate + // the number of iterations between displaying info. If display = 0, no info + // will be displayed. + optional int32 display = 6; + // Display the loss averaged over the last average_loss iterations + optional int32 average_loss = 33 [default = 1]; + optional int32 max_iter = 7; // the maximum number of iterations + // accumulate gradients over `iter_size` x `batch_size` instances + optional int32 iter_size = 36 [default = 1]; + + // The learning rate decay policy. The currently implemented learning rate + // policies are as follows: + // - fixed: always return base_lr. + // - step: return base_lr * gamma ^ (floor(iter / step)) + // - exp: return base_lr * gamma ^ iter + // - inv: return base_lr * (1 + gamma * iter) ^ (- power) + // - multistep: similar to step but it allows non uniform steps defined by + // stepvalue + // - poly: the effective learning rate follows a polynomial decay, to be + // zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power) + // - sigmoid: the effective learning rate follows a sigmod decay + // return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize)))) + // + // where base_lr, max_iter, gamma, step, stepvalue and power are defined + // in the solver parameter protocol buffer, and iter is the current iteration. + optional string lr_policy = 8; + optional float gamma = 9; // The parameter to compute the learning rate. + optional float power = 10; // The parameter to compute the learning rate. + optional float momentum = 11; // The momentum value. + optional float weight_decay = 12; // The weight decay. + // regularization types supported: L1 and L2 + // controlled by weight_decay + optional string regularization_type = 29 [default = "L2"]; + // the stepsize for learning rate policy "step" + optional int32 stepsize = 13; + // the stepsize for learning rate policy "multistep" + repeated int32 stepvalue = 34; + + // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm, + // whenever their actual L2 norm is larger. + optional float clip_gradients = 35 [default = -1]; + + optional int32 snapshot = 14 [default = 0]; // The snapshot interval + optional string snapshot_prefix = 15; // The prefix for the snapshot. + // whether to snapshot diff in the results or not. Snapshotting diff will help + // debugging but the final protocol buffer size will be much larger. + optional bool snapshot_diff = 16 [default = false]; + enum SnapshotFormat { + HDF5 = 0; + BINARYPROTO = 1; + } + optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO]; + // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default. + enum SolverMode { + CPU = 0; + GPU = 1; + } + optional SolverMode solver_mode = 17 [default = GPU]; + // the device_id will that be used in GPU mode. Use device_id = 0 in default. + optional int32 device_id = 18 [default = 0]; + // If non-negative, the seed with which the Solver will initialize the Caffe + // random number generator -- useful for reproducible results. Otherwise, + // (and by default) initialize using a seed derived from the system clock. + optional int64 random_seed = 20 [default = -1]; + + // type of the solver + optional string type = 40 [default = "SGD"]; + + // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam + optional float delta = 31 [default = 1e-8]; + // parameters for the Adam solver + optional float momentum2 = 39 [default = 0.999]; + + // RMSProp decay value + // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t) + optional float rms_decay = 38 [default = 0.99]; + + // If true, print information about the state of the net that may help with + // debugging learning problems. + optional bool debug_info = 23 [default = false]; + + // If false, don't save a snapshot after training finishes. + optional bool snapshot_after_train = 28 [default = true]; + + // DEPRECATED: old solver enum types, use string instead + enum SolverType { + SGD = 0; + NESTEROV = 1; + ADAGRAD = 2; + RMSPROP = 3; + ADADELTA = 4; + ADAM = 5; + } + // DEPRECATED: use type instead of solver_type + optional SolverType solver_type = 30 [default = SGD]; + + // Overlap compute and communication for data parallel training + optional bool layer_wise_reduce = 41 [default = true]; +} + +// A message that stores the solver snapshots +message SolverState { + optional int32 iter = 1; // The current iteration + optional string learned_net = 2; // The file that stores the learned net. + repeated BlobProto history = 3; // The history for sgd solvers + optional int32 current_step = 4 [default = 0]; // The current step for learning rate +} + +enum Phase { + TRAIN = 0; + TEST = 1; +} + +message NetState { + optional Phase phase = 1 [default = TEST]; + optional int32 level = 2 [default = 0]; + repeated string stage = 3; +} + +message NetStateRule { + // Set phase to require the NetState have a particular phase (TRAIN or TEST) + // to meet this rule. + optional Phase phase = 1; + + // Set the minimum and/or maximum levels in which the layer should be used. + // Leave undefined to meet the rule regardless of level. + optional int32 min_level = 2; + optional int32 max_level = 3; + + // Customizable sets of stages to include or exclude. + // The net must have ALL of the specified stages and NONE of the specified + // "not_stage"s to meet the rule. + // (Use multiple NetStateRules to specify conjunctions of stages.) + repeated string stage = 4; + repeated string not_stage = 5; +} + +// Specifies training parameters (multipliers on global learning constants, +// and the name and other settings used for weight sharing). +message ParamSpec { + // The names of the parameter blobs -- useful for sharing parameters among + // layers, but never required otherwise. To share a parameter between two + // layers, give it a (non-empty) name. + optional string name = 1; + + // Whether to require shared weights to have the same shape, or just the same + // count -- defaults to STRICT if unspecified. + optional DimCheckMode share_mode = 2; + enum DimCheckMode { + // STRICT (default) requires that num, channels, height, width each match. + STRICT = 0; + // PERMISSIVE requires only the count (num*channels*height*width) to match. + PERMISSIVE = 1; + } + + // The multiplier on the global learning rate for this parameter. + optional float lr_mult = 3 [default = 1.0]; + + // The multiplier on the global weight decay for this parameter. + optional float decay_mult = 4 [default = 1.0]; +} + +// NOTE +// Update the next available ID when you add a new LayerParameter field. +// +// LayerParameter next available layer-specific ID: 151 (last added: smooth_l1_loss_param) +message LayerParameter { + optional string name = 1; // the layer name + optional string type = 2; // the layer type + repeated string bottom = 3; // the name of each bottom blob + repeated string top = 4; // the name of each top blob + + // The train / test phase for computation. + optional Phase phase = 10; + + // The amount of weight to assign each top blob in the objective. + // Each layer assigns a default value, usually of either 0 or 1, + // to each top blob. + repeated float loss_weight = 5; + + // Specifies training parameters (multipliers on global learning constants, + // and the name and other settings used for weight sharing). + repeated ParamSpec param = 6; + + // The blobs containing the numeric parameters of the layer. + repeated BlobProto blobs = 7; + + // Specifies whether to backpropagate to each bottom. If unspecified, + // Caffe will automatically infer whether each input needs backpropagation + // to compute parameter gradients. If set to true for some inputs, + // backpropagation to those inputs is forced; if set false for some inputs, + // backpropagation to those inputs is skipped. + // + // The size must be either 0 or equal to the number of bottoms. + repeated bool propagate_down = 11; + + // Rules controlling whether and when a layer is included in the network, + // based on the current NetState. You may specify a non-zero number of rules + // to include OR exclude, but not both. If no include or exclude rules are + // specified, the layer is always included. If the current NetState meets + // ANY (i.e., one or more) of the specified rules, the layer is + // included/excluded. + repeated NetStateRule include = 8; + repeated NetStateRule exclude = 9; + + // Parameters for data pre-processing. + optional TransformationParameter transform_param = 100; + + // Parameters shared by loss layers. + optional LossParameter loss_param = 101; + + // Layer type-specific parameters. + // + // Note: certain layers may have more than one computational engine + // for their implementation. These layers include an Engine type and + // engine parameter for selecting the implementation. + // The default for the engine is set by the ENGINE switch at compile-time. + optional AccuracyParameter accuracy_param = 102; + optional ArgMaxParameter argmax_param = 103; + optional BatchNormParameter batch_norm_param = 139; + optional BiasParameter bias_param = 141; + optional ConcatParameter concat_param = 104; + optional ContrastiveLossParameter contrastive_loss_param = 105; + optional ConvolutionParameter convolution_param = 106; + optional CropParameter crop_param = 144; + optional DataParameter data_param = 107; + optional DetectionOutputParameter detection_output_param = 150; + optional DropoutParameter dropout_param = 108; + optional DummyDataParameter dummy_data_param = 109; + optional EltwiseParameter eltwise_param = 110; + optional ELUParameter elu_param = 140; + optional EmbedParameter embed_param = 137; + optional ExpParameter exp_param = 111; + optional FlattenParameter flatten_param = 135; + optional HDF5DataParameter hdf5_data_param = 112; + optional HDF5OutputParameter hdf5_output_param = 113; + optional HingeLossParameter hinge_loss_param = 114; + optional ImageDataParameter image_data_param = 115; + optional InfogainLossParameter infogain_loss_param = 116; + optional InnerProductParameter inner_product_param = 117; + optional InputParameter input_param = 143; + optional LogParameter log_param = 134; + optional LRNParameter lrn_param = 118; + optional MemoryDataParameter memory_data_param = 119; + optional MVNParameter mvn_param = 120; + optional ParameterParameter parameter_param = 145; + optional PoolingParameter pooling_param = 121; + optional PowerParameter power_param = 122; + optional PReLUParameter prelu_param = 131; + optional PythonParameter python_param = 130; + optional RecurrentParameter recurrent_param = 146; + optional ReductionParameter reduction_param = 136; + optional ReLUParameter relu_param = 123; + optional ReshapeParameter reshape_param = 133; + optional ScaleParameter scale_param = 142; + optional SigmoidParameter sigmoid_param = 124; + optional SmoothL1LossParameter smooth_l1_loss_param = 148; + optional SoftmaxParameter softmax_param = 125; + optional SPPParameter spp_param = 132; + optional SliceParameter slice_param = 126; + optional TanHParameter tanh_param = 127; + optional ThresholdParameter threshold_param = 128; + optional TileParameter tile_param = 138; + optional WindowDataParameter window_data_param = 129; + optional PermuteParameter permute_param = 202; + optional PriorBoxParameter prior_box_param = 203; + optional NormalizeParameter norm_param = 206; + optional PSROIPoolingParameter psroi_pooling_param = 207; + optional FreespaceExtractParameter freespace_extract_param = 151; + optional PostprocessParameter postprocess_param = 152; + optional SpatialTransformParameter spatial_transform_param = 153; + optional ROIAlignParameter roi_align_param = 154; + optional ReorgParameter reorg_param = 155; + optional RegionParameter region_param = 156; + optional ReverseParameter reverse_param = 157; + optional InterpParameter interp_param = 158; + optional ShuffleChannelParameter shuffle_channel_param = 159; + optional UpsampleParameter upsample_param = 160; + optional ROIPoolingParameter roi_pooling_param = 161; + optional YoloParameter yolo_param = 199; + optional YoloV3DetectionOutputParameter yolov3_detection_output_param = 200; + optional ProposalParameter proposal_param = 201; + optional FSRDetectionOutputParameter fsrdetectionoutput_param = 222; + optional SSDDetectionOutputParameter ssddetectionoutput_param = 232; + optional YoloV2DetectionOutputParameter yolov2_detection_output_param = 204; + optional QuantParameter quant_param = 208; + optional CondTakeParameter condtake_param = 233; + optional MatrixInverseParameter matrix_inverse_param = 210; + optional WarpPerspectiveParameter warp_perspective_param = 234; + optional BatchMatMulParameter batch_matmul_param = 235; + optional SpatialTransformerParameter st_param = 5000; +} + +// Message that stores parameters used to apply transformation +// to the data layer's data +message TransformationParameter { + // For data pre-processing, we can do simple scaling and subtracting the + // data mean, if provided. Note that the mean subtraction is always carried + // out before scaling. + optional float scale = 1 [default = 1]; + // Specify if we want to randomly mirror data. + optional bool mirror = 2 [default = false]; + // Specify if we would like to randomly crop an image. + optional uint32 crop_size = 3 [default = 0]; + // mean_file and mean_value cannot be specified at the same time + optional string mean_file = 4; + // if specified can be repeated once (would substract it from all the channels) + // or can be repeated the same number of times as channels + // (would subtract them from the corresponding channel) + repeated float mean_value = 5; + // Force the decoded image to have 3 color channels. + optional bool force_color = 6 [default = false]; + // Force the decoded image to have 1 color channels. + optional bool force_gray = 7 [default = false]; +} + +// Message that stores parameters shared by loss layers +message LossParameter { + // If specified, ignore instances with the given label. + optional int32 ignore_label = 1; + // How to normalize the loss for loss layers that aggregate across batches, + // spatial dimensions, or other dimensions. Currently only implemented in + // SoftmaxWithLoss and SigmoidCrossEntropyLoss layers. + enum NormalizationMode { + // Divide by the number of examples in the batch times spatial dimensions. + // Outputs that receive the ignore label will NOT be ignored in computing + // the normalization factor. + FULL = 0; + // Divide by the total number of output locations that do not take the + // ignore_label. If ignore_label is not set, this behaves like FULL. + VALID = 1; + // Divide by the batch size. + BATCH_SIZE = 2; + // Do not normalize the loss. + NONE = 3; + } + // For historical reasons, the default normalization for + // SigmoidCrossEntropyLoss is BATCH_SIZE and *not* VALID. + optional NormalizationMode normalization = 3 [default = VALID]; + // Deprecated. Ignored if normalization is specified. If normalization + // is not specified, then setting this to false will be equivalent to + // normalization = BATCH_SIZE to be consistent with previous behavior. + optional bool normalize = 2; +} + +// Messages that store parameters used by individual layer types follow, in +// alphabetical order. + +message AccuracyParameter { + // When computing accuracy, count as correct by comparing the true label to + // the top k scoring classes. By default, only compare to the top scoring + // class (i.e. argmax). + optional uint32 top_k = 1 [default = 1]; + + // The "label" axis of the prediction blob, whose argmax corresponds to the + // predicted label -- may be negative to index from the end (e.g., -1 for the + // last axis). For example, if axis == 1 and the predictions are + // (N x C x H x W), the label blob is expected to contain N*H*W ground truth + // labels with integer values in {0, 1, ..., C-1}. + optional int32 axis = 2 [default = 1]; + + // If specified, ignore instances with the given label. + optional int32 ignore_label = 3; +} + +message ArgMaxParameter { + // If true produce pairs (argmax, maxval) + optional bool out_max_val = 1 [default = false]; + optional uint32 top_k = 2 [default = 1]; + // The axis along which to maximise -- may be negative to index from the + // end (e.g., -1 for the last axis). + // By default ArgMaxLayer maximizes over the flattened trailing dimensions + // for each index of the first / num dimension. + optional int32 axis = 3; +} + +message ConcatParameter { + // The axis along which to concatenate -- may be negative to index from the + // end (e.g., -1 for the last axis). Other axes must have the + // same dimension for all the bottom blobs. + // By default, ConcatLayer concatenates blobs along the "channels" axis (1). + optional int32 axis = 2 [default = 1]; + + // DEPRECATED: alias for "axis" -- does not support negative indexing. + optional uint32 concat_dim = 1 [default = 1]; +} + +message BatchNormParameter { + // If false, normalization is performed over the current mini-batch + // and global statistics are accumulated (but not yet used) by a moving + // average. + // If true, those accumulated mean and variance values are used for the + // normalization. + // By default, it is set to false when the network is in the training + // phase and true when the network is in the testing phase. + optional bool use_global_stats = 1; + // What fraction of the moving average remains each iteration? + // Smaller values make the moving average decay faster, giving more + // weight to the recent values. + // Each iteration updates the moving average @f$S_{t-1}@f$ with the + // current mean @f$ Y_t @f$ by + // @f$ S_t = (1-\beta)Y_t + \beta \cdot S_{t-1} @f$, where @f$ \beta @f$ + // is the moving_average_fraction parameter. + optional float moving_average_fraction = 2 [default = .999]; + // Small value to add to the variance estimate so that we don't divide by + // zero. + optional float eps = 3 [default = 1e-5]; +} + +message BiasParameter { + // The first axis of bottom[0] (the first input Blob) along which to apply + // bottom[1] (the second input Blob). May be negative to index from the end + // (e.g., -1 for the last axis). + // + // For example, if bottom[0] is 4D with shape 100x3x40x60, the output + // top[0] will have the same shape, and bottom[1] may have any of the + // following shapes (for the given value of axis): + // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 + // (axis == 1 == -3) 3; 3x40; 3x40x60 + // (axis == 2 == -2) 40; 40x60 + // (axis == 3 == -1) 60 + // Furthermore, bottom[1] may have the empty shape (regardless of the value of + // "axis") -- a scalar bias. + optional int32 axis = 1 [default = 1]; + + // (num_axes is ignored unless just one bottom is given and the bias is + // a learned parameter of the layer. Otherwise, num_axes is determined by the + // number of axes by the second bottom.) + // The number of axes of the input (bottom[0]) covered by the bias + // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. + // Set num_axes := 0, to add a zero-axis Blob: a scalar. + optional int32 num_axes = 2 [default = 1]; + + // (filler is ignored unless just one bottom is given and the bias is + // a learned parameter of the layer.) + // The initialization for the learned bias parameter. + // Default is the zero (0) initialization, resulting in the BiasLayer + // initially performing the identity operation. + optional FillerParameter filler = 3; + optional bool bias_from_blob = 4 [default = true]; +} + +message ContrastiveLossParameter { + // margin for dissimilar pair + optional float margin = 1 [default = 1.0]; + // The first implementation of this cost did not exactly match the cost of + // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2. + // legacy_version = false (the default) uses (margin - d)^2 as proposed in the + // Hadsell paper. New models should probably use this version. + // legacy_version = true uses (margin - d^2). This is kept to support / + // reproduce existing models and results + optional bool legacy_version = 2 [default = false]; +} + +message ConvolutionParameter { + optional uint32 num_output = 1; // The number of outputs for the layer + optional bool bias_term = 2 [default = true]; // whether to have bias terms + + // Pad, kernel size, and stride are all given as a single value for equal + // dimensions in all spatial dimensions, or once per spatial dimension. + repeated uint32 pad = 3; // The padding size; defaults to 0 + repeated uint32 kernel_size = 4; // The kernel size + repeated uint32 stride = 6; // The stride; defaults to 1 + // Factor used to dilate the kernel, (implicitly) zero-filling the resulting + // holes. (Kernel dilation is sometimes referred to by its use in the + // algorithme à trous from Holschneider et al. 1987.) + repeated uint32 dilation = 18; // The dilation; defaults to 1 + + // For 2D convolution only, the *_h and *_w versions may also be used to + // specify both spatial dimensions. + optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only) + optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only) + optional uint32 kernel_h = 11; // The kernel height (2D only) + optional uint32 kernel_w = 12; // The kernel width (2D only) + optional uint32 stride_h = 13; // The stride height (2D only) + optional uint32 stride_w = 14; // The stride width (2D only) + + optional uint32 group = 5 [default = 1]; // The group size for group conv + + optional FillerParameter weight_filler = 7; // The filler for the weight + optional FillerParameter bias_filler = 8; // The filler for the bias + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 15 [default = DEFAULT]; + + // The axis to interpret as "channels" when performing convolution. + // Preceding dimensions are treated as independent inputs; + // succeeding dimensions are treated as "spatial". + // With (N, C, H, W) inputs, and axis == 1 (the default), we perform + // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for + // groups g>1) filters across the spatial axes (H, W) of the input. + // With (N, C, D, H, W) inputs, and axis == 1, we perform + // N independent 3D convolutions, sliding (C/g)-channels + // filters across the spatial axes (D, H, W) of the input. + optional int32 axis = 16 [default = 1]; + + // Whether to force use of the general ND convolution, even if a specific + // implementation for blobs of the appropriate number of spatial dimensions + // is available. (Currently, there is only a 2D-specific convolution + // implementation; for input blobs with num_axes != 2, this option is + // ignored and the ND implementation will be used.) + optional bool force_nd_im2col = 17 [default = false]; +} + +message CropParameter { + // To crop, elements of the first bottom are selected to fit the dimensions + // of the second, reference bottom. The crop is configured by + // - the crop `axis` to pick the dimensions for cropping + // - the crop `offset` to set the shift for all/each dimension + // to align the cropped bottom with the reference bottom. + // All dimensions up to but excluding `axis` are preserved, while + // the dimensions including and trailing `axis` are cropped. + // If only one `offset` is set, then all dimensions are offset by this amount. + // Otherwise, the number of offsets must equal the number of cropped axes to + // shift the crop in each dimension accordingly. + // Note: standard dimensions are N,C,H,W so the default is a spatial crop, + // and `axis` may be negative to index from the end (e.g., -1 for the last + // axis). + optional int32 axis = 1 [default = 2]; + repeated uint32 offset = 2; +} + +message DataParameter { + enum DB { + LEVELDB = 0; + LMDB = 1; + } + // Specify the data source. + optional string source = 1; + // Specify the batch size. + optional uint32 batch_size = 4; + // The rand_skip variable is for the data layer to skip a few data points + // to avoid all asynchronous sgd clients to start at the same point. The skip + // point would be set as rand_skip * rand(0,1). Note that rand_skip should not + // be larger than the number of keys in the database. + // DEPRECATED. Each solver accesses a different subset of the database. + optional uint32 rand_skip = 7 [default = 0]; + optional DB backend = 8 [default = LEVELDB]; + // DEPRECATED. See TransformationParameter. For data pre-processing, we can do + // simple scaling and subtracting the data mean, if provided. Note that the + // mean subtraction is always carried out before scaling. + optional float scale = 2 [default = 1]; + optional string mean_file = 3; + // DEPRECATED. See TransformationParameter. Specify if we would like to randomly + // crop an image. + optional uint32 crop_size = 5 [default = 0]; + // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror + // data. + optional bool mirror = 6 [default = false]; + // Force the encoded image to have 3 color channels + optional bool force_encoded_color = 9 [default = false]; + // Prefetch queue (Increase if data feeding bandwidth varies, within the + // limit of device memory for GPU training) + optional uint32 prefetch = 10 [default = 4]; +} + +message DropoutParameter { + optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio + optional bool scale_train = 2 [default = true]; // scale train or test phase +} + +// DummyDataLayer fills any number of arbitrarily shaped blobs with random +// (or constant) data generated by "Fillers" (see "message FillerParameter"). +message DummyDataParameter { + // This layer produces N >= 1 top blobs. DummyDataParameter must specify 1 or N + // shape fields, and 0, 1 or N data_fillers. + // + // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used. + // If 1 data_filler is specified, it is applied to all top blobs. If N are + // specified, the ith is applied to the ith top blob. + repeated FillerParameter data_filler = 1; + repeated BlobShape shape = 6; + + // 4D dimensions -- deprecated. Use "shape" instead. + repeated uint32 num = 2; + repeated uint32 channels = 3; + repeated uint32 height = 4; + repeated uint32 width = 5; +} + +message EltwiseParameter { + enum EltwiseOp { + PROD = 0; + SUM = 1; + MAX = 2; + } + optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation + repeated float coeff = 2; // blob-wise coefficient for SUM operation + + // Whether to use an asymptotically slower (for >2 inputs) but stabler method + // of computing the gradient for the PROD operation. (No effect for SUM op.) + optional bool stable_prod_grad = 3 [default = true]; +} + +// Message that stores parameters used by ELULayer +message ELUParameter { + // Described in: + // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate + // Deep Network Learning by Exponential Linear Units (ELUs). arXiv + optional float alpha = 1 [default = 1]; +} + +// Message that stores parameters used by EmbedLayer +message EmbedParameter { + optional uint32 num_output = 1; // The number of outputs for the layer + // The input is given as integers to be interpreted as one-hot + // vector indices with dimension num_input. Hence num_input should be + // 1 greater than the maximum possible input value. + optional uint32 input_dim = 2; + + optional bool bias_term = 3 [default = true]; // Whether to use a bias term + optional FillerParameter weight_filler = 4; // The filler for the weight + optional FillerParameter bias_filler = 5; // The filler for the bias + +} + +// Message that stores parameters used by ExpLayer +message ExpParameter { + // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0. + // Or if base is set to the default (-1), base is set to e, + // so y = exp(shift + scale * x). + optional float base = 1 [default = -1.0]; + optional float scale = 2 [default = 1.0]; + optional float shift = 3 [default = 0.0]; +} + +/// Message that stores parameters used by FlattenLayer +message FlattenParameter { + // The first axis to flatten: all preceding axes are retained in the output. + // May be negative to index from the end (e.g., -1 for the last axis). + optional int32 axis = 1 [default = 1]; + + // The last axis to flatten: all following axes are retained in the output. + // May be negative to index from the end (e.g., the default -1 for the last + // axis). + optional int32 end_axis = 2 [default = -1]; +} + +// Message that stores parameters used by HDF5DataLayer +message HDF5DataParameter { + // Specify the data source. + optional string source = 1; + // Specify the batch size. + optional uint32 batch_size = 2; + + // Specify whether to shuffle the data. + // If shuffle == true, the ordering of the HDF5 files is shuffled, + // and the ordering of data within any given HDF5 file is shuffled, + // but data between different files are not interleaved; all of a file's + // data are output (in a random order) before moving onto another file. + optional bool shuffle = 3 [default = false]; +} + +message HDF5OutputParameter { + optional string file_name = 1; +} + +message HingeLossParameter { + enum Norm { + L1 = 1; + L2 = 2; + } + // Specify the Norm to use L1 or L2 + optional Norm norm = 1 [default = L1]; +} + +message ImageDataParameter { + // Specify the data source. + optional string source = 1; + // Specify the batch size. + optional uint32 batch_size = 4 [default = 1]; + // The rand_skip variable is for the data layer to skip a few data points + // to avoid all asynchronous sgd clients to start at the same point. The skip + // point would be set as rand_skip * rand(0,1). Note that rand_skip should not + // be larger than the number of keys in the database. + optional uint32 rand_skip = 7 [default = 0]; + // Whether or not ImageLayer should shuffle the list of files at every epoch. + optional bool shuffle = 8 [default = false]; + // It will also resize images if new_height or new_width are not zero. + optional uint32 new_height = 9 [default = 0]; + optional uint32 new_width = 10 [default = 0]; + // Specify if the images are color or gray + optional bool is_color = 11 [default = true]; + // DEPRECATED. See TransformationParameter. For data pre-processing, we can do + // simple scaling and subtracting the data mean, if provided. Note that the + // mean subtraction is always carried out before scaling. + optional float scale = 2 [default = 1]; + optional string mean_file = 3; + // DEPRECATED. See TransformationParameter. Specify if we would like to randomly + // crop an image. + optional uint32 crop_size = 5 [default = 0]; + // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror + // data. + optional bool mirror = 6 [default = false]; + optional string root_folder = 12 [default = ""]; +} + +message InfogainLossParameter { + // Specify the infogain matrix source. + optional string source = 1; + optional int32 axis = 2 [default = 1]; // axis of prob +} + +message InnerProductParameter { + optional uint32 num_output = 1; // The number of outputs for the layer + optional bool bias_term = 2 [default = true]; // whether to have bias terms + optional FillerParameter weight_filler = 3; // The filler for the weight + optional FillerParameter bias_filler = 4; // The filler for the bias + + // The first axis to be lumped into a single inner product computation; + // all preceding axes are retained in the output. + // May be negative to index from the end (e.g., -1 for the last axis). + optional int32 axis = 5 [default = 1]; + // Specify whether to transpose the weight matrix or not. + // If transpose == true, any operations will be performed on the transpose + // of the weight matrix. The weight matrix itself is not going to be transposed + // but rather the transfer flag of operations will be toggled accordingly. + optional bool transpose = 6 [default = false]; +} + +message InputParameter { + // This layer produces N >= 1 top blob(s) to be assigned manually. + // Define N shapes to set a shape for each top. + // Define 1 shape to set the same shape for every top. + // Define no shape to defer to reshaping manually. + repeated BlobShape shape = 1; +} + +// Message that stores parameters used by LogLayer +message LogParameter { + // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0. + // Or if base is set to the default (-1), base is set to e, + // so y = ln(shift + scale * x) = log_e(shift + scale * x) + optional float base = 1 [default = -1.0]; + optional float scale = 2 [default = 1.0]; + optional float shift = 3 [default = 0.0]; +} + +// Message that stores parameters used by LRNLayer +message LRNParameter { + optional uint32 local_size = 1 [default = 5]; + optional float alpha = 2 [default = 1.]; + optional float beta = 3 [default = 0.75]; + enum NormRegion { + ACROSS_CHANNELS = 0; + WITHIN_CHANNEL = 1; + } + optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS]; + optional float k = 5 [default = 1.]; + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 6 [default = DEFAULT]; +} + +message MemoryDataParameter { + optional uint32 batch_size = 1; + optional uint32 channels = 2; + optional uint32 height = 3; + optional uint32 width = 4; +} + +message MVNParameter { + // This parameter can be set to false to normalize mean only + optional bool normalize_variance = 1 [default = true]; + + // This parameter can be set to true to perform DNN-like MVN + optional bool across_channels = 2 [default = false]; + + // Epsilon for not dividing by zero while normalizing variance + optional float eps = 3 [default = 1e-9]; +} + +message ParameterParameter { + optional BlobShape shape = 1; +} + +message PoolingParameter { + enum PoolMethod { + MAX = 0; + AVE = 1; + STOCHASTIC = 2; + } + optional PoolMethod pool = 1 [default = MAX]; // The pooling method + // Pad, kernel size, and stride are all given as a single value for equal + // dimensions in height and width or as Y, X pairs. + optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X) + optional uint32 pad_h = 9 [default = 0]; // The padding height + optional uint32 pad_w = 10 [default = 0]; // The padding width + optional uint32 kernel_size = 2; // The kernel size (square) + optional uint32 kernel_h = 5; // The kernel height + optional uint32 kernel_w = 6; // The kernel width + optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X) + optional uint32 stride_h = 7; // The stride height + optional uint32 stride_w = 8; // The stride width + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 11 [default = DEFAULT]; + // If global_pooling then it will pool over the size of the bottom by doing + // kernel_h = bottom->height and kernel_w = bottom->width + optional bool global_pooling = 12 [default = false]; + optional bool ceil_mode = 13 [default = true]; + // How to calculate the output size - using ceil (default) or floor rounding. + enum RoundMode { + CEIL = 0; + FLOOR = 1; + } + optional RoundMode round_mode = 14 [default = CEIL]; +} + +message PowerParameter { + // PowerLayer computes outputs y = (shift + scale * x) ^ power. + optional float power = 1 [default = 1.0]; + optional float scale = 2 [default = 1.0]; + optional float shift = 3 [default = 0.0]; +} + +message PythonParameter { + optional string module = 1; + optional string layer = 2; + // This value is set to the attribute `param_str` of the `PythonLayer` object + // in Python before calling the `setup()` method. This could be a number, + // string, dictionary in Python dict format, JSON, etc. You may parse this + // string in `setup` method and use it in `forward` and `backward`. + optional string param_str = 3 [default = '']; + // Whether this PythonLayer is shared among worker solvers during data parallelism. + // If true, each worker solver sequentially run forward from this layer. + // This value should be set true if you are using it as a data layer. + optional bool share_in_parallel = 4 [default = false]; +} + +// Message that stores parameters used by RecurrentLayer +message RecurrentParameter { + // The dimension of the output (and usually hidden state) representation -- + // must be explicitly set to non-zero. + optional uint32 num_output = 1 [default = 0]; + + optional FillerParameter weight_filler = 2; // The filler for the weight + optional FillerParameter bias_filler = 3; // The filler for the bias + + // Whether to enable displaying debug_info in the unrolled recurrent net. + optional bool debug_info = 4 [default = false]; + + // Whether to add as additional inputs (bottoms) the initial hidden state + // blobs, and add as additional outputs (tops) the final timestep hidden state + // blobs. The number of additional bottom/top blobs required depends on the + // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs. + optional bool expose_hidden = 5 [default = false]; +} + +// Message that stores parameters used by ReductionLayer +message ReductionParameter { + enum ReductionOp { + SUM = 1; + ASUM = 2; + SUMSQ = 3; + MEAN = 4; + } + + optional ReductionOp operation = 1 [default = SUM]; // reduction operation + + // The first axis to reduce to a scalar -- may be negative to index from the + // end (e.g., -1 for the last axis). + // (Currently, only reduction along ALL "tail" axes is supported; reduction + // of axis M through N, where N < num_axes - 1, is unsupported.) + // Suppose we have an n-axis bottom Blob with shape: + // (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)). + // If axis == m, the output Blob will have shape + // (d0, d1, d2, ..., d(m-1)), + // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1)) + // times, each including (dm * d(m+1) * ... * d(n-1)) individual data. + // If axis == 0 (the default), the output Blob always has the empty shape + // (count 1), performing reduction across the entire input -- + // often useful for creating new loss functions. + optional int32 axis = 2 [default = 0]; + + optional float coeff = 3 [default = 1.0]; // coefficient for output +} + +// Message that stores parameters used by ReLULayer +message ReLUParameter { + // Allow non-zero slope for negative inputs to speed up optimization + // Described in: + // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities + // improve neural network acoustic models. In ICML Workshop on Deep Learning + // for Audio, Speech, and Language Processing. + optional float negative_slope = 1 [default = 0]; + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 2 [default = DEFAULT]; +} + +message ReshapeParameter { + // Specify the output dimensions. If some of the dimensions are set to 0, + // the corresponding dimension from the bottom layer is used (unchanged). + // Exactly one dimension may be set to -1, in which case its value is + // inferred from the count of the bottom blob and the remaining dimensions. + // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8: + // + // layer { + // type: "Reshape" bottom: "input" top: "output" + // reshape_param { ... } + // } + // + // If "input" is 2D with shape 2 x 8, then the following reshape_param + // specifications are all equivalent, producing a 3D blob "output" with shape + // 2 x 2 x 4: + // + // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } + // reshape_param { shape { dim: 0 dim: 2 dim: 4 } } + // reshape_param { shape { dim: 0 dim: 2 dim: -1 } } + // reshape_param { shape { dim: 0 dim:-1 dim: 4 } } + // + optional BlobShape shape = 1; + + // axis and num_axes control the portion of the bottom blob's shape that are + // replaced by (included in) the reshape. By default (axis == 0 and + // num_axes == -1), the entire bottom blob shape is included in the reshape, + // and hence the shape field must specify the entire output shape. + // + // axis may be non-zero to retain some portion of the beginning of the input + // shape (and may be negative to index from the end; e.g., -1 to begin the + // reshape after the last axis, including nothing in the reshape, + // -2 to include only the last axis, etc.). + // + // For example, suppose "input" is a 2D blob with shape 2 x 8. + // Then the following ReshapeLayer specifications are all equivalent, + // producing a blob "output" with shape 2 x 2 x 4: + // + // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } + // reshape_param { shape { dim: 2 dim: 4 } axis: 1 } + // reshape_param { shape { dim: 2 dim: 4 } axis: -3 } + // + // num_axes specifies the extent of the reshape. + // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on + // input axes in the range [axis, axis+num_axes]. + // num_axes may also be -1, the default, to include all remaining axes + // (starting from axis). + // + // For example, suppose "input" is a 2D blob with shape 2 x 8. + // Then the following ReshapeLayer specifications are equivalent, + // producing a blob "output" with shape 1 x 2 x 8. + // + // reshape_param { shape { dim: 1 dim: 2 dim: 8 } } + // reshape_param { shape { dim: 1 dim: 2 } num_axes: 1 } + // reshape_param { shape { dim: 1 } num_axes: 0 } + // + // On the other hand, these would produce output blob shape 2 x 1 x 8: + // + // reshape_param { shape { dim: 2 dim: 1 dim: 8 } } + // reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 } + // + optional int32 axis = 2 [default = 0]; + optional int32 num_axes = 3 [default = -1]; +} + + +message ScaleParameter { + // The first axis of bottom[0] (the first input Blob) along which to apply + // bottom[1] (the second input Blob). May be negative to index from the end + // (e.g., -1 for the last axis). + // + // For example, if bottom[0] is 4D with shape 100x3x40x60, the output + // top[0] will have the same shape, and bottom[1] may have any of the + // following shapes (for the given value of axis): + // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 + // (axis == 1 == -3) 3; 3x40; 3x40x60 + // (axis == 2 == -2) 40; 40x60 + // (axis == 3 == -1) 60 + // Furthermore, bottom[1] may have the empty shape (regardless of the value of + // "axis") -- a scalar multiplier. + optional int32 axis = 1 [default = 1]; + + // (num_axes is ignored unless just one bottom is given and the scale is + // a learned parameter of the layer. Otherwise, num_axes is determined by the + // number of axes by the second bottom.) + // The number of axes of the input (bottom[0]) covered by the scale + // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. + // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar. + optional int32 num_axes = 2 [default = 1]; + + // (filler is ignored unless just one bottom is given and the scale is + // a learned parameter of the layer.) + // The initialization for the learned scale parameter. + // Default is the unit (1) initialization, resulting in the ScaleLayer + // initially performing the identity operation. + optional FillerParameter filler = 3; + + // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but + // may be more efficient). Initialized with bias_filler (defaults to 0). + optional bool bias_term = 4 [default = false]; + optional FillerParameter bias_filler = 5; + optional bool scale_from_blob = 6 [default = true]; +} + +message SigmoidParameter { + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 1 [default = DEFAULT]; +} + +message SliceParameter { + // The axis along which to slice -- may be negative to index from the end + // (e.g., -1 for the last axis). + // By default, SliceLayer concatenates blobs along the "channels" axis (1). + optional int32 axis = 3 [default = 1]; + repeated uint32 slice_point = 2; + + // DEPRECATED: alias for "axis" -- does not support negative indexing. + optional uint32 slice_dim = 1 [default = 1]; +} + +message SmoothL1LossParameter { + // SmoothL1Loss(x) = + // 0.5 * (sigma * x) ** 2 -- if x < 1.0 / sigma / sigma + // |x| - 0.5 / sigma / sigma -- otherwise + optional float sigma = 1 [default = 1]; +} + +// Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer +message SoftmaxParameter { + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 1 [default = DEFAULT]; + + // The axis along which to perform the softmax -- may be negative to index + // from the end (e.g., -1 for the last axis). + // Any other axes will be evaluated as independent softmaxes. + optional int32 axis = 2 [default = 1]; +} + +message TanHParameter { + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 1 [default = DEFAULT]; +} + +// Message that stores parameters used by TileLayer +message TileParameter { + // The index of the axis to tile. + optional int32 axis = 1 [default = 1]; + + // The number of copies (tiles) of the blob to output. + optional int32 tiles = 2; +} + +// Message that stores parameters used by ThresholdLayer +message ThresholdParameter { + optional float threshold = 1 [default = 0]; // Strictly positive values +} + +message WindowDataParameter { + // Specify the data source. + optional string source = 1; + // For data pre-processing, we can do simple scaling and subtracting the + // data mean, if provided. Note that the mean subtraction is always carried + // out before scaling. + optional float scale = 2 [default = 1]; + optional string mean_file = 3; + // Specify the batch size. + optional uint32 batch_size = 4; + // Specify if we would like to randomly crop an image. + optional uint32 crop_size = 5 [default = 0]; + // Specify if we want to randomly mirror data. + optional bool mirror = 6 [default = false]; + // Foreground (object) overlap threshold + optional float fg_threshold = 7 [default = 0.5]; + // Background (non-object) overlap threshold + optional float bg_threshold = 8 [default = 0.5]; + // Fraction of batch that should be foreground objects + optional float fg_fraction = 9 [default = 0.25]; + // Amount of contextual padding to add around a window + // (used only by the window_data_layer) + optional uint32 context_pad = 10 [default = 0]; + // Mode for cropping out a detection window + // warp: cropped window is warped to a fixed size and aspect ratio + // square: the tightest square around the window is cropped + optional string crop_mode = 11 [default = "warp"]; + // cache_images: will load all images in memory for faster access + optional bool cache_images = 12 [default = false]; + // append root_folder to locate images + optional string root_folder = 13 [default = ""]; +} + +message SPPParameter { + enum PoolMethod { + MAX = 0; + AVE = 1; + STOCHASTIC = 2; + } + optional uint32 pyramid_height = 1; + optional PoolMethod pool = 2 [default = MAX]; // The pooling method + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 6 [default = DEFAULT]; +} + +// DEPRECATED: use LayerParameter. +message V1LayerParameter { + repeated string bottom = 2; + repeated string top = 3; + optional string name = 4; + repeated NetStateRule include = 32; + repeated NetStateRule exclude = 33; + enum LayerType { + NONE = 0; + ABSVAL = 35; + ACCURACY = 1; + ARGMAX = 30; + BNLL = 2; + CONCAT = 3; + CONTRASTIVE_LOSS = 37; + CONVOLUTION = 4; + DATA = 5; + DECONVOLUTION = 39; + DROPOUT = 6; + DUMMY_DATA = 32; + EUCLIDEAN_LOSS = 7; + ELTWISE = 25; + EXP = 38; + FLATTEN = 8; + HDF5_DATA = 9; + HDF5_OUTPUT = 10; + HINGE_LOSS = 28; + IM2COL = 11; + IMAGE_DATA = 12; + INFOGAIN_LOSS = 13; + INNER_PRODUCT = 14; + LRN = 15; + MEMORY_DATA = 29; + MULTINOMIAL_LOGISTIC_LOSS = 16; + MVN = 34; + POOLING = 17; + POWER = 26; + RELU = 18; + SIGMOID = 19; + SIGMOID_CROSS_ENTROPY_LOSS = 27; + SILENCE = 36; + SOFTMAX = 20; + SOFTMAX_LOSS = 21; + SPLIT = 22; + SLICE = 33; + TANH = 23; + WINDOW_DATA = 24; + THRESHOLD = 31; + QUANT = 208; + DEQUANT = 209; + } + optional LayerType type = 5; + repeated BlobProto blobs = 6; + repeated string param = 1001; + repeated DimCheckMode blob_share_mode = 1002; + enum DimCheckMode { + STRICT = 0; + PERMISSIVE = 1; + } + repeated float blobs_lr = 7; + repeated float weight_decay = 8; + repeated float loss_weight = 35; + optional AccuracyParameter accuracy_param = 27; + optional ArgMaxParameter argmax_param = 23; + optional ConcatParameter concat_param = 9; + optional ContrastiveLossParameter contrastive_loss_param = 40; + optional ConvolutionParameter convolution_param = 10; + optional DataParameter data_param = 11; + optional DropoutParameter dropout_param = 12; + optional DummyDataParameter dummy_data_param = 26; + optional EltwiseParameter eltwise_param = 24; + optional ExpParameter exp_param = 41; + optional HDF5DataParameter hdf5_data_param = 13; + optional HDF5OutputParameter hdf5_output_param = 14; + optional HingeLossParameter hinge_loss_param = 29; + optional ImageDataParameter image_data_param = 15; + optional InfogainLossParameter infogain_loss_param = 16; + optional InnerProductParameter inner_product_param = 17; + optional LRNParameter lrn_param = 18; + optional MemoryDataParameter memory_data_param = 22; + optional MVNParameter mvn_param = 34; + optional PoolingParameter pooling_param = 19; + optional PowerParameter power_param = 21; + optional ReLUParameter relu_param = 30; + optional SigmoidParameter sigmoid_param = 38; + optional SoftmaxParameter softmax_param = 39; + optional SliceParameter slice_param = 31; + optional TanHParameter tanh_param = 37; + optional ThresholdParameter threshold_param = 25; + optional WindowDataParameter window_data_param = 20; + optional TransformationParameter transform_param = 36; + optional LossParameter loss_param = 42; + optional V0LayerParameter layer = 1; +} + +// DEPRECATED: V0LayerParameter is the old way of specifying layer parameters +// in Caffe. We keep this message type around for legacy support. +message V0LayerParameter { + optional string name = 1; // the layer name + optional string type = 2; // the string to specify the layer type + + // Parameters to specify layers with inner products. + optional uint32 num_output = 3; // The number of outputs for the layer + optional bool biasterm = 4 [default = true]; // whether to have bias terms + optional FillerParameter weight_filler = 5; // The filler for the weight + optional FillerParameter bias_filler = 6; // The filler for the bias + + optional uint32 pad = 7 [default = 0]; // The padding size + optional uint32 kernelsize = 8; // The kernel size + optional uint32 group = 9 [default = 1]; // The group size for group conv + optional uint32 stride = 10 [default = 1]; // The stride + enum PoolMethod { + MAX = 0; + AVE = 1; + STOCHASTIC = 2; + } + optional PoolMethod pool = 11 [default = MAX]; // The pooling method + optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio + + optional uint32 local_size = 13 [default = 5]; // for local response norm + optional float alpha = 14 [default = 1.]; // for local response norm + optional float beta = 15 [default = 0.75]; // for local response norm + optional float k = 22 [default = 1.]; + + // For data layers, specify the data source + optional string source = 16; + // For data pre-processing, we can do simple scaling and subtracting the + // data mean, if provided. Note that the mean subtraction is always carried + // out before scaling. + optional float scale = 17 [default = 1]; + optional string meanfile = 18; + // For data layers, specify the batch size. + optional uint32 batchsize = 19; + // For data layers, specify if we would like to randomly crop an image. + optional uint32 cropsize = 20 [default = 0]; + // For data layers, specify if we want to randomly mirror data. + optional bool mirror = 21 [default = false]; + + // The blobs containing the numeric parameters of the layer + repeated BlobProto blobs = 50; + // The ratio that is multiplied on the global learning rate. If you want to + // set the learning ratio for one blob, you need to set it for all blobs. + repeated float blobs_lr = 51; + // The weight decay that is multiplied on the global weight decay. + repeated float weight_decay = 52; + + // The rand_skip variable is for the data layer to skip a few data points + // to avoid all asynchronous sgd clients to start at the same point. The skip + // point would be set as rand_skip * rand(0,1). Note that rand_skip should not + // be larger than the number of keys in the database. + optional uint32 rand_skip = 53 [default = 0]; + + // Fields related to detection (det_*) + // foreground (object) overlap threshold + optional float det_fg_threshold = 54 [default = 0.5]; + // background (non-object) overlap threshold + optional float det_bg_threshold = 55 [default = 0.5]; + // Fraction of batch that should be foreground objects + optional float det_fg_fraction = 56 [default = 0.25]; + + // optional bool OBSOLETE_can_clobber = 57 [default = true]; + + // Amount of contextual padding to add around a window + // (used only by the window_data_layer) + optional uint32 det_context_pad = 58 [default = 0]; + + // Mode for cropping out a detection window + // warp: cropped window is warped to a fixed size and aspect ratio + // square: the tightest square around the window is cropped + optional string det_crop_mode = 59 [default = "warp"]; + + // For ReshapeLayer, one needs to specify the new dimensions. + optional int32 new_num = 60 [default = 0]; + optional int32 new_channels = 61 [default = 0]; + optional int32 new_height = 62 [default = 0]; + optional int32 new_width = 63 [default = 0]; + + // Whether or not ImageLayer should shuffle the list of files at every epoch. + // It will also resize images if new_height or new_width are not zero. + optional bool shuffle_images = 64 [default = false]; + + // For ConcatLayer, one needs to specify the dimension for concatenation, and + // the other dimensions must be the same for all the bottom blobs. + // By default it will concatenate blobs along the channels dimension. + optional uint32 concat_dim = 65 [default = 1]; + + optional HDF5OutputParameter hdf5_output_param = 1001; +} + +message PReLUParameter { + // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers: + // Surpassing Human-Level Performance on ImageNet Classification, 2015. + + // Initial value of a_i. Default is a_i=0.25 for all i. + optional FillerParameter filler = 1; + // Whether or not slope parameters are shared across channels. + optional bool channel_shared = 2 [default = false]; +} + +// Message that stores parameters used by DetectionOutputLayer +//message DetectionOutputParameter { +// optional int32 num_classes = 1 [default = 21]; +// optional float nms_threshold = 2 [default = 0.3]; +// optional int32 top_k = 3; +// optional float confidence_threshold = 4 [default = 0.8]; +//} + +// Message that store parameters used by PriorBoxLayer +message PriorBoxParameter { + // Encode/decode type. + enum CodeType { + CORNER = 1; + CENTER_SIZE = 2; + CORNER_SIZE = 3; + } + // Minimum box size (in pixels). Required! + repeated float min_size = 1; + // Maximum box size (in pixels). Required! + repeated float max_size = 2; + // Various of aspect ratios. Duplicate ratios will be ignored. + // If none is provided, we use default ratio 1. + repeated float aspect_ratio = 3; + // If true, will flip each aspect ratio. + // For example, if there is aspect ratio "r", + // we will generate aspect ratio "1.0/r" as well. + optional bool flip = 4 [default = true]; + // If true, will clip the prior so that it is within [0, 1] + optional bool clip = 5 [default = false]; + // Variance for adjusting the prior bboxes. + repeated float variance = 6; + // By default, we calculate img_height, img_width, step_x, step_y based on + // bottom[0] (feat) and bottom[1] (img). Unless these values are explicitely + // provided. + // Explicitly provide the img_size. + optional uint32 img_size = 7; + // Either img_size or img_h/img_w should be specified; not both. + optional uint32 img_h = 8; + optional uint32 img_w = 9; + + // Explicitly provide the step size. + optional float step = 10; + // Either step or step_h/step_w should be specified; not both. + optional float step_h = 11; + optional float step_w = 12; + + // Offset to the top left corner of each cell. + optional float offset = 13 [default = 0.5]; +} + +// Message that stores parameters used by PermutetLayer +message PermuteParameter { + // The new orders of the axes of data. Notice it should be with + // in the same range as the input data, and it starts from 0. + // Do not provide repeated order. + repeated uint32 order = 1; +} + +message NormalizeParameter { + optional bool across_spatial = 1 [default = true]; + // Initial value of scale. Default is 1.0 for all + optional FillerParameter scale_filler = 2; + // Whether or not scale parameters are shared across channels. + optional bool channel_shared = 3 [default = true]; + // Epsilon for not dividing by zero while normalizing variance + optional float eps = 4 [default = 1e-10]; +} + +// needed by ssd +message SaveOutputParameter { + // Output directory. If not empty, we will save the results. + optional string output_directory = 1; + // Output name prefix. + optional string output_name_prefix = 2; + // Output format. + // VOC - PASCAL VOC output format. + // COCO - MS COCO output format. + optional string output_format = 3; + // If you want to output results, must also provide the following two files. + // Otherwise, we will ignore saving results. + // label map file. + optional string label_map_file = 4; + // A file which contains a list of names and sizes with same order + // of the input DB. The file is in the following format: + // name height width + // ... + optional string name_size_file = 5; + // Number of test images. It can be less than the lines specified in + // name_size_file. For example, when we only want to evaluate on part + // of the test images. + optional uint32 num_test_image = 6; + // The resize parameter used in saving the data. + // optional ResizeParameter resize_param = 7; +} + +message NonMaximumSuppressionParameter { + // Threshold to be used in nms. + optional float nms_threshold = 1 [default = 0.3]; + // Maximum number of results to be kept. + optional int32 top_k = 2; + // Parameter for adaptive nms. + optional float eta = 3 [default = 1.0]; +} + +message GeneralNmsParameter { + optional int32 post_top_k = 1 ; + optional float nms_threshold = 2 [default = 0]; + optional float iou_threshold_decay = 3 [default = 1.0]; + optional float coor_scale_factor = 4 [default = 1.0]; +} + +// Message that store parameters used by DetectionOutputLayer, ssd/fasterRcnn +message DetectionOutputParameter { + optional int32 num_classes = 1; + optional bool share_location = 2 [default = true]; + optional int32 background_label_id = 3 [default = 0]; + optional NonMaximumSuppressionParameter nms_param = 4; + optional SaveOutputParameter save_output_param = 5; + optional PriorBoxParameter.CodeType code_type = 6 [default = CENTER_SIZE]; + optional bool variance_encoded_in_target = 8 [default = true]; + optional int32 keep_top_k = 7; + optional float confidence_threshold = 9; + optional float nms_threshold = 13; + optional int32 top_k = 14; + optional int32 boxes = 15 [default = 1]; + optional bool relative = 17 [default = true]; + optional float objectness_threshold = 18 [default = 0.5]; + optional float class_threshold = 19 [default = 0.5]; + repeated float biases = 20; + optional GeneralNmsParameter general_nms_param = 21; + optional float objectness_score = 22; +} +message PSROIPoolingParameter { + required float spatial_scale = 1; + required int32 output_dim = 2; // output channel number + required int32 group_size = 3; // number of groups to encode position-sensitive score maps +} +// Message that stores parameters used by FreespaceExtractLayer +message FreespaceExtractParameter { + optional float org_height = 1; +} + +// Message that stores parameters used by DetectpostprocessLayer +message PostprocessParameter { + optional float nms_thresh = 1 [default = 0.3]; + optional float conf_thresh = 2 [default = 0.5]; + optional uint32 post_nms_topn = 3 [default = 100]; + optional uint32 cls_num = 4 [default = 12]; + repeated float bbox_reg_weights = 5; +} + +// Message that stores parameters used by SpatialTransformLayer +message SpatialTransformParameter { + optional uint32 output_h = 1 [default = 0]; + optional uint32 output_w = 2 [default = 0]; + optional float border_value = 3 [default = 0]; + repeated float affine_transform = 4; + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 15 [default = DEFAULT]; +} +message ROIAlignParameter { + // Pad, kernel size, and stride are all given as a single value for equal + // dimensions in height and width or as Y, X pairs. + optional uint32 pooled_h = 1 [default = 0]; // The pooled output height + optional uint32 pooled_w = 2 [default = 0]; // The pooled output width + // Multiplicative spatial scale factor to translate ROI coords from their + // input scale to the scale used when pooling + optional float spatial_scale = 3 [default = 1]; + optional int32 sampling_ratio = 4 [default = -1]; + optional int32 roi_end_mode = 5 [default = 0]; +} + +message RegionParameter { + optional uint32 classes = 1 [default = 20]; // Category of classification + optional uint32 coords = 2 [default = 4]; // Coordinates of box + optional uint32 boxes = 3 [default = 1]; // Number of boxes predicted per grid + optional uint32 softmax = 4 [default = 0]; + optional string softmax_tree = 5 [default = ""]; + optional uint32 background = 6 [default = 0]; +} +message ReorgParameter{ + optional uint32 stride = 2 [default = 2]; + optional bool reverse = 1 [default = false]; +} +message ReverseParameter{ + repeated int32 axis = 1; +} +message InterpParameter{ + optional int32 height = 1 [default = 0];//Height of output + optional int32 width = 2 [default = 0];//Width of output + optional int32 zoom_factor = 3 [default = 1];//zoom factor + optional int32 shrink_factor = 4 [default = 1];//shrink factor + optional int32 pad_beg = 5 [default = 0];//padding at begin of input + optional int32 pad_end = 6 [default = 0];//padding at end of input +} +message ShuffleChannelParameter{ + optional uint32 group = 1[default = 1]; // The number of group +} +message UpsampleParameter{ + optional float scale = 1[default = 1]; + optional int32 stride = 2[default = 2]; + optional int32 stride_h = 3[default = 2]; + optional int32 stride_w = 4[default=2]; +} +message ROIPoolingParameter { + required int32 pooled_h = 1; + required int32 pooled_w = 2; + optional float spatial_scale = 3 [default=0.0625]; + optional float spatial_scale_h = 4; + optional float spatial_scale_w = 5; +} + +message YoloParameter { + optional int32 boxes = 1 [default = 3]; + optional int32 coords = 2 [default = 4]; + optional int32 classes = 3 [default = 80]; + optional string yolo_version = 4 [default = "V3"]; + optional bool softmax = 5 [default = false]; + optional bool background = 6 [default = false]; + optional bool softmaxtree = 7 [default = false]; +} + +message YoloV3DetectionOutputParameter { + optional int32 boxes = 1 [default = 3]; + optional int32 classes = 2 [default = 80]; + optional bool relative = 3 [default = true]; + optional float obj_threshold = 4 [default = 0.5]; + optional float score_threshold = 5 [default = 0.5]; + optional float iou_threshold = 6 [default = 0.45]; + optional int32 pre_nms_topn = 7 [default = 512]; + optional int32 post_nms_topn = 8 [default = 1024]; + repeated float biases_high = 9; + repeated float biases_mid = 10; + repeated float biases_low = 11; + optional int32 coords = 12 [default = 4]; + repeated float biases = 13; + optional bool resize_origin_img_to_net = 14 [default = false]; +} + +message ProposalParameter { + optional float feat_stride = 1 [default = 16]; + optional float base_size = 2 [default = 16]; + optional float min_size = 3 [default = 16]; + repeated float ratio = 4; + repeated float scale = 5; + optional int32 pre_nms_topn = 6 [default = 3000]; + optional int32 post_nms_topn = 7 [default = 304]; + optional float iou_threshold = 8 [default = 0.7]; + optional bool output_actual_rois_num = 9 [default = false]; +} + +message FSRDetectionOutputParameter { + required int32 num_classes = 1; + required float score_threshold = 2; + required float iou_threshold = 3; + optional int32 batch_rois = 4 [default = 1]; +} + +message SSDDetectionOutputParameter { + required int32 num_classes= 1 [default = 2]; + optional bool share_location = 2 [default = true]; + optional int32 background_label_id = 3 [default = 0]; + optional float iou_threshold = 4 [default = 0.3]; + optional int32 top_k = 5 [default = 200]; + optional float eta = 6 [default = 1.0]; + optional bool variance_encoded_in_target = 7 [default = false]; + optional int32 code_type = 8 [default = 1]; + optional int32 keep_top_k = 9 [default = -1]; + optional float confidence_threshold = 10 [default = 0.0]; +} +message YoloV2DetectionOutputParameter { + optional int32 boxes = 1 [default = 5]; + optional int32 classes = 2 [default = 80]; + optional bool relative = 3 [default = true]; + optional float obj_threshold = 4 [default = 0.5]; + optional float score_threshold = 5 [default = 0.5]; + optional float iou_threshold = 6 [default = 0.45]; + optional int32 pre_nms_topn = 7 [default = 512]; + optional int32 post_nms_topn = 8 [default = 1024]; + repeated float biases = 9; + optional int32 coords = 10 [default = 4]; + optional bool resize_origin_img_to_net = 11 [default = false]; +} + +message QuantParameter { + optional float scale = 2; + optional bytes offset = 3; +} + +message BatchMatMulParameter{ + optional bool adj_x1 = 1 [default = false]; + optional bool adj_x2 = 2 [default = false]; +} + +message CondTakeParameter { + required string mode = 1; + required float val = 2; + optional float eps = 3 [default = 1e-06]; +} + +message MatrixInverseParameter { + optional bool adjoint = 1 [default = false]; +} + +message WarpPerspectiveParameter { + required int32 out_height = 1; + required int32 out_width = 2; + optional float constant = 3; + optional string border_type = 4 [default = 'BORDER_CONSTANT']; +} + +message SpatialTransformerParameter { + // How to use the parameter passed by localisation network + optional string transform_type = 1 [default = "affine"]; + // What is the sampling technique + optional string sampler_type = 2 [default = "bilinear"]; + + // If not set,stay same with the input dimension H and W + optional int32 output_H = 3; + optional int32 output_W = 4; + // If false, only compute dTheta, DO NOT compute dU + optional bool to_compute_dU = 5 [default = true]; + + // The default value for some parameters + optional double theta_1_1 = 6; + optional double theta_1_2 = 7; + optional double theta_1_3 = 8; + optional double theta_2_1 = 9; + optional double theta_2_2 = 10; + optional double theta_2_3 = 11; +} diff --git a/inc/register/proto/onnx/ge_onnx.proto b/inc/register/proto/onnx/ge_onnx.proto new file mode 100644 index 000000000..4cd77f3ae --- /dev/null +++ b/inc/register/proto/onnx/ge_onnx.proto @@ -0,0 +1,563 @@ +// Copyright (c) ONNX Project Contributors. +// Licensed under the MIT license. + +syntax = "proto3"; + +package ge.onnx; + +// Overview +// +// ONNX is an open specification that is comprised of the following components: +// +// 1) A definition of an extensible computation graph model. +// 2) Definitions of standard data types. +// 3) Definitions of built-in operators. +// +// This document describes the syntax of models and their computation graphs, +// as well as the standard data types. Together, they are referred to as the ONNX +// Intermediate Representation, or 'IR' for short. +// +// The normative semantic specification of the ONNX IR is found in docs/IR.md. +// Definitions of the built-in neural network operators may be found in docs/Operators.md. + +// Notes +// +// Release +// +// We are still in the very early stage of defining ONNX. The current +// version of ONNX is a starting point. While we are actively working +// towards a complete spec, we would like to get the community involved +// by sharing our working version of ONNX. +// +// Protobuf compatibility +// +// To simplify framework compatibility, ONNX is defined using the subset of protobuf +// that is compatible with both protobuf v2 and v3. This means that we do not use any +// protobuf features that are only available in one of the two versions. +// +// Here are the most notable contortions we have to carry out to work around +// these limitations: +// +// - No 'map' (added protobuf 3.0). We instead represent mappings as lists +// of key-value pairs, where order does not matter and duplicates +// are not allowed. + + +// Versioning +// +// ONNX versioning is specified in docs/IR.md and elaborated on in docs/Versioning.md +// +// To be compatible with both proto2 and proto3, we will use a version number +// that is not defined by the default value but an explicit enum number. +enum Version { + // proto3 requires the first enum value to be zero. + // We add this just to appease the compiler. + _START_VERSION = 0; + // The version field is always serialized and we will use it to store the + // version that the graph is generated from. This helps us set up version + // control. + // For the IR, we are using simple numbers starting with with 0x00000001, + // which was the version we published on Oct 10, 2017. + IR_VERSION_2017_10_10 = 0x0000000000000001; + + // IR_VERSION 2 published on Oct 30, 2017 + // - Added type discriminator to AttributeProto to support proto3 users + IR_VERSION_2017_10_30 = 0x0000000000000002; + + // IR VERSION 3 published on Nov 3, 2017 + // - For operator versioning: + // - Added new message OperatorSetIdProto + // - Added opset_import in ModelProto + // - For vendor extensions, added domain in NodeProto + IR_VERSION_2017_11_3 = 0x0000000000000003; + + // IR VERSION 4 published on Jan 22, 2019 + // - Relax constraint that initializers should be a subset of graph inputs + // - Add type BFLOAT16 + IR_VERSION_2019_1_22 = 0x0000000000000004; + + // IR VERSION 5 published on March 18, 2019 + // - Add message TensorAnnotation. + // - Add quantization annotation in GraphProto to map tensor with its scale and zero point quantization parameters. + IR_VERSION_2019_3_18 = 0x0000000000000005; + + // IR VERSION 6 published on Sep 19, 2019 + // - Add support for sparse tensor constants stored in model. + // - Add message SparseTensorProto + // - Add sparse initializers + IR_VERSION = 0x0000000000000006; +} + +// Attributes +// +// A named attribute containing either singular float, integer, string, graph, +// and tensor values, or repeated float, integer, string, graph, and tensor values. +// An AttributeProto MUST contain the name field, and *only one* of the +// following content fields, effectively enforcing a C/C++ union equivalent. +message AttributeProto { + + // Note: this enum is structurally identical to the OpSchema::AttrType + // enum defined in schema.h. If you rev one, you likely need to rev the other. + enum AttributeType { + UNDEFINED = 0; + FLOAT = 1; + INT = 2; + STRING = 3; + TENSOR = 4; + GRAPH = 5; + SPARSE_TENSOR = 11; + + FLOATS = 6; + INTS = 7; + STRINGS = 8; + TENSORS = 9; + GRAPHS = 10; + SPARSE_TENSORS = 12; + } + + // The name field MUST be present for this version of the IR. + string name = 1; // namespace Attribute + + // if ref_attr_name is not empty, ref_attr_name is the attribute name in parent function. + // In this case, this AttributeProto does not contain data, and it's a reference of attribute + // in parent scope. + // NOTE: This should ONLY be used in function (sub-graph). It's invalid to be used in main graph. + string ref_attr_name = 21; + + // A human-readable documentation for this attribute. Markdown is allowed. + string doc_string = 13; + + // The type field MUST be present for this version of the IR. + // For 0.0.1 versions of the IR, this field was not defined, and + // implementations needed to use has_field hueristics to determine + // which value field was in use. For IR_VERSION 0.0.2 or later, this + // field MUST be set and match the f|i|s|t|... field in use. This + // change was made to accomodate proto3 implementations. + AttributeType type = 20; // discriminator that indicates which field below is in use + + // Exactly ONE of the following fields must be present for this version of the IR + float f = 2; // float + int64 i = 3; // int + bytes s = 4; // UTF-8 string + TensorProto t = 5; // tensor value + GraphProto g = 6; // graph + SparseTensorProto sparse_tensor = 22; // sparse tensor value + // Do not use field below, it's deprecated. + // optional ValueProto v = 12; // value - subsumes everything but graph + + repeated float floats = 7; // list of floats + repeated int64 ints = 8; // list of ints + repeated bytes strings = 9; // list of UTF-8 strings + repeated TensorProto tensors = 10; // list of tensors + repeated GraphProto graphs = 11; // list of graph + repeated SparseTensorProto sparse_tensors = 23; // list of sparse tensors +} + +// Defines information on value, including the name, the type, and +// the shape of the value. +message ValueInfoProto { + // This field MUST be present in this version of the IR. + string name = 1; // namespace Value + // This field MUST be present in this version of the IR for + // inputs and outputs of the top-level graph. + TypeProto type = 2; + // A human-readable documentation for this value. Markdown is allowed. + string doc_string = 3; +} + +// Nodes +// +// Computation graphs are made up of a DAG of nodes, which represent what is +// commonly called a "layer" or "pipeline stage" in machine learning frameworks. +// +// For example, it can be a node of type "Conv" that takes in an image, a filter +// tensor and a bias tensor, and produces the convolved output. +message NodeProto { + repeated string input = 1; // namespace Value + repeated string output = 2; // namespace Value + + // An optional identifier for this node in a graph. + // This field MAY be absent in ths version of the IR. + string name = 3; // namespace Node + + // The symbolic identifier of the Operator to execute. + string op_type = 4; // namespace Operator + // The domain of the OperatorSet that specifies the operator named by op_type. + string domain = 7; // namespace Domain + + // Additional named attributes. + repeated AttributeProto attribute = 5; + + // A human-readable documentation for this node. Markdown is allowed. + string doc_string = 6; +} + +// Models +// +// ModelProto is a top-level file/container format for bundling a ML model and +// associating its computation graph with metadata. +// +// The semantics of the model are described by the associated GraphProto. +message ModelProto { + // The version of the IR this model targets. See Version enum above. + // This field MUST be present. + int64 ir_version = 1; + + // The OperatorSets this model relies on. + // All ModelProtos MUST have at least one entry that + // specifies which version of the ONNX OperatorSet is + // being imported. + // + // All nodes in the ModelProto's graph will bind against the operator + // with the same-domain/same-op_type operator with the HIGHEST version + // in the referenced operator sets. + repeated OperatorSetIdProto opset_import = 8; + + // The name of the framework or tool used to generate this model. + // This field SHOULD be present to indicate which implementation/tool/framework + // emitted the model. + string producer_name = 2; + + // The version of the framework or tool used to generate this model. + // This field SHOULD be present to indicate which implementation/tool/framework + // emitted the model. + string producer_version = 3; + + // Domain name of the model. + // We use reverse domain names as name space indicators. For example: + // `com.facebook.fair` or `com.microsoft.cognitiveservices` + // + // Together with `model_version` and GraphProto.name, this forms the unique identity of + // the graph. + string domain = 4; + + // The version of the graph encoded. See Version enum below. + int64 model_version = 5; + + // A human-readable documentation for this model. Markdown is allowed. + string doc_string = 6; + + // The parameterized graph that is evaluated to execute the model. + GraphProto graph = 7; + + // Named metadata values; keys should be distinct. + repeated StringStringEntryProto metadata_props = 14; +}; + +// StringStringEntryProto follows the pattern for cross-proto-version maps. +// See https://developers.google.com/protocol-buffers/docs/proto3#maps +message StringStringEntryProto { + string key = 1; + string value= 2; +}; + +message TensorAnnotation { + string tensor_name = 1; + // pairs to annotate tensor specified by above. + // The keys used in the mapping below must be pre-defined in ONNX spec. + // For example, for 8-bit linear quantization case, 'SCALE_TENSOR', 'ZERO_POINT_TENSOR' will be pre-defined as + // quantization parameter keys. + repeated StringStringEntryProto quant_parameter_tensor_names = 2; +} + + + +// Graphs +// +// A graph defines the computational logic of a model and is comprised of a parameterized +// list of nodes that form a directed acyclic graph based on their inputs and outputs. +// This is the equivalent of the "network" or "graph" in many deep learning +// frameworks. +message GraphProto { + // The nodes in the graph, sorted topologically. + repeated NodeProto node = 1; + + // The name of the graph. + string name = 2; // namespace Graph + + // A list of named tensor values, used to specify constant inputs of the graph. + // Each TensorProto entry must have a distinct name (within the list) that + // MAY also appear in the input list. + repeated TensorProto initializer = 5; + + // Initializers (see above) stored in sparse format. + repeated SparseTensorProto sparse_initializer = 15; + + // A human-readable documentation for this graph. Markdown is allowed. + string doc_string = 10; + + // The inputs and outputs of the graph. + repeated ValueInfoProto input = 11; + repeated ValueInfoProto output = 12; + + // Information for the values in the graph. The ValueInfoProto.name's + // must be distinct. It is optional for a value to appear in value_info list. + repeated ValueInfoProto value_info = 13; + + // This field carries information to indicate the mapping among a tensor and its + // quantization parameter tensors. For example: + // For tensor 'a', it may have {'SCALE_TENSOR', 'a_scale'} and {'ZERO_POINT_TENSOR', 'a_zero_point'} annotated, + // which means, tensor 'a_scale' and tensor 'a_zero_point' are scale and zero point of tensor 'a' in the model. + repeated TensorAnnotation quantization_annotation = 14; + + // DO NOT USE the following fields, they were deprecated from earlier versions. + // repeated string input = 3; + // repeated string output = 4; + // optional int64 ir_version = 6; + // optional int64 producer_version = 7; + // optional string producer_tag = 8; + // optional string domain = 9; +} + +// Tensors +// +// A serialized tensor value. +message TensorProto { + enum DataType { + UNDEFINED = 0; + // Basic types. + FLOAT = 1; // float + UINT8 = 2; // uint8_t + INT8 = 3; // int8_t + UINT16 = 4; // uint16_t + INT16 = 5; // int16_t + INT32 = 6; // int32_t + INT64 = 7; // int64_t + STRING = 8; // string + BOOL = 9; // bool + + // IEEE754 half-precision floating-point format (16 bits wide). + // This format has 1 sign bit, 5 exponent bits, and 10 mantissa bits. + FLOAT16 = 10; + + DOUBLE = 11; + UINT32 = 12; + UINT64 = 13; + COMPLEX64 = 14; // complex with float32 real and imaginary components + COMPLEX128 = 15; // complex with float64 real and imaginary components + + // Non-IEEE floating-point format based on IEEE754 single-precision + // floating-point number truncated to 16 bits. + // This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits. + BFLOAT16 = 16; + + // Future extensions go here. + } + + // The shape of the tensor. + repeated int64 dims = 1; + + // The data type of the tensor. + // This field MUST have a valid TensorProto.DataType value + int32 data_type = 2; + + // For very large tensors, we may want to store them in chunks, in which + // case the following fields will specify the segment that is stored in + // the current TensorProto. + message Segment { + int64 begin = 1; + int64 end = 2; + } + Segment segment = 3; + + // Tensor content must be organized in row-major order. + // + // Depending on the data_type field, exactly one of the fields below with + // name ending in _data is used to store the elements of the tensor. + + // For float and complex64 values + // Complex64 tensors are encoded as a single array of floats, + // with the real components appearing in odd numbered positions, + // and the corresponding imaginary component apparing in the + // subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i] + // is encoded as [1.0, 2.0 ,3.0 ,4.0] + // When this field is present, the data_type field MUST be FLOAT or COMPLEX64. + repeated float float_data = 4 [packed = true]; + + // For int32, uint8, int8, uint16, int16, bool, and float16 values + // float16 values must be bit-wise converted to an uint16_t prior + // to writing to the buffer. + // When this field is present, the data_type field MUST be + // INT32, INT16, INT8, UINT16, UINT8, BOOL, or FLOAT16 + repeated int32 int32_data = 5 [packed = true]; + + // For strings. + // Each element of string_data is a UTF-8 encoded Unicode + // string. No trailing null, no leading BOM. The protobuf "string" + // scalar type is not used to match ML community conventions. + // When this field is present, the data_type field MUST be STRING + repeated bytes string_data = 6; + + // For int64. + // When this field is present, the data_type field MUST be INT64 + repeated int64 int64_data = 7 [packed = true]; + + // Optionally, a name for the tensor. + string name = 8; // namespace Value + + // A human-readable documentation for this tensor. Markdown is allowed. + string doc_string = 12; + + // Serializations can either use one of the fields above, or use this + // raw bytes field. The only exception is the string case, where one is + // required to store the content in the repeated bytes string_data field. + // + // When this raw_data field is used to store tensor value, elements MUST + // be stored in as fixed-width, little-endian order. + // Floating-point data types MUST be stored in IEEE 754 format. + // Complex64 elements must be written as two consecutive FLOAT values, real component first. + // Complex128 elements must be written as two consecutive DOUBLE values, real component first. + // Boolean type MUST be written one byte per tensor element (00000001 for true, 00000000 for false). + // + // Note: the advantage of specific field rather than the raw_data field is + // that in some cases (e.g. int data), protobuf does a better packing via + // variable length storage, and may lead to smaller binary footprint. + // When this field is present, the data_type field MUST NOT be STRING or UNDEFINED + bytes raw_data = 9; + + // Data can be stored inside the protobuf file using type-specific fields or raw_data. + // Alternatively, raw bytes data can be stored in an external file, using the external_data field. + // external_data stores key-value pairs describing data location. Recognized keys are: + // - "location" (required) - POSIX filesystem path relative to the directory where the ONNX + // protobuf model was stored + // - "offset" (optional) - position of byte at which stored data begins. Integer stored as string. + // Offset values SHOULD be multiples 4096 (page size) to enable mmap support. + // - "length" (optional) - number of bytes containing data. Integer stored as string. + // - "checksum" (optional) - SHA1 digest of file specified in under 'location' key. + repeated StringStringEntryProto external_data = 13; + + // Location of the data for this tensor. MUST be one of: + // - DEFAULT - data stored inside the protobuf message. Data is stored in raw_data (if set) otherwise in type-specified field. + // - EXTERNAL - data stored in an external location as described by external_data field. + enum DataLocation { + DEFAULT = 0; + EXTERNAL = 1; + } + + // If value not set, data is stored in raw_data (if set) otherwise in type-specified field. + DataLocation data_location = 14; + + // For double + // Complex128 tensors are encoded as a single array of doubles, + // with the real components appearing in odd numbered positions, + // and the corresponding imaginary component apparing in the + // subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i] + // is encoded as [1.0, 2.0 ,3.0 ,4.0] + // When this field is present, the data_type field MUST be DOUBLE or COMPLEX128 + repeated double double_data = 10 [packed = true]; + + // For uint64 and uint32 values + // When this field is present, the data_type field MUST be + // UINT32 or UINT64 + repeated uint64 uint64_data = 11 [packed = true]; +} + +// A serialized sparse-tensor value +message SparseTensorProto { + // The sequence of non-default values are encoded as a tensor of shape [NNZ]. + // The default-value is zero for numeric tensors, and empty-string for string tensors. + TensorProto values = 1; + + // The indices of the non-default values, which may be stored in one of two formats. + // (a) Indices can be a tensor of shape [NNZ, rank] with the [i,j]-th value + // corresponding to the j-th index of the i-th value (in the values tensor). + // (b) Indices can be a tensor of shape [NNZ], in which case the i-th value + // must be the linearized-index of the i-th value (in the values tensor). + // The linearized-index can be converted into an index tuple (k_1,...,k_rank) + // using the shape provided below. + // The indices must appear in ascending order without duplication. + // In the first format, the ordering is lexicographic-ordering: + // e.g., index-value [1,4] must appear before [2,1] + TensorProto indices = 2; + + // The shape of the underlying dense-tensor: [dim_1, dim_2, ... dim_rank] + repeated int64 dims = 3; +} + +// Defines a tensor shape. A dimension can be either an integer value +// or a symbolic variable. A symbolic variable represents an unknown +// dimension. +message TensorShapeProto { + message Dimension { + oneof value { + int64 dim_value = 1; + string dim_param = 2; // namespace Shape + }; + // Standard denotation can optionally be used to denote tensor + // dimensions with standard semantic descriptions to ensure + // that operations are applied to the correct axis of a tensor. + // Refer to https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition + // for pre-defined dimension denotations. + string denotation = 3; + }; + repeated Dimension dim = 1; +} + +// Types +// +// The standard ONNX data types. +message TypeProto { + + message Tensor { + // This field MUST NOT have the value of UNDEFINED + // This field MUST have a valid TensorProto.DataType value + // This field MUST be present for this version of the IR. + int32 elem_type = 1; + TensorShapeProto shape = 2; + } + + // repeated T + message Sequence { + // The type and optional shape of each element of the sequence. + // This field MUST be present for this version of the IR. + TypeProto elem_type = 1; + }; + + // map + message Map { + // This field MUST have a valid TensorProto.DataType value + // This field MUST be present for this version of the IR. + // This field MUST refer to an integral type ([U]INT{8|16|32|64}) or STRING + int32 key_type = 1; + // This field MUST be present for this version of the IR. + TypeProto value_type = 2; + }; + + oneof value { + // The type of a tensor. + Tensor tensor_type = 1; + + // NOTE: DNN-only implementations of ONNX MAY elect to not support non-tensor values + // as input and output to graphs and nodes. These types are needed to naturally + // support classical ML operators. DNN operators SHOULD restrict their input + // and output types to tensors. + + // The type of a sequence. + Sequence sequence_type = 4; + + // The type of a map. + Map map_type = 5; + + } + + // An optional denotation can be used to denote the whole + // type with a standard semantic description as to what is + // stored inside. Refer to https://github.com/onnx/onnx/blob/master/docs/TypeDenotation.md#type-denotation-definition + // for pre-defined type denotations. + string denotation = 6; +} + +// Operator Sets +// +// OperatorSets are uniquely identified by a (domain, opset_version) pair. +message OperatorSetIdProto { + // The domain of the operator set being identified. + // The empty string ("") or absence of this field implies the operator + // set that is defined as part of the ONNX specification. + // This field MUST be present in this version of the IR when referring to any other operator set. + string domain = 1; + + // The version of the operator set being identified. + // This field MUST be present in this version of the IR. + int64 version = 2; +} diff --git a/inc/register/proto/tensorflow/attr_value.proto b/inc/register/proto/tensorflow/attr_value.proto new file mode 100644 index 000000000..1cc67d627 --- /dev/null +++ b/inc/register/proto/tensorflow/attr_value.proto @@ -0,0 +1,62 @@ +syntax = "proto3"; + +package domi.tensorflow; +option cc_enable_arenas = true; +option java_outer_classname = "AttrValueProtos"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; + +import "tensor.proto"; +import "tensor_shape.proto"; +import "types.proto"; + +// Protocol buffer representing the value for an attr used to configure an Op. +// Comment indicates the corresponding attr type. Only the field matching the +// attr type may be filled. +message AttrValue { + // LINT.IfChange + message ListValue { + repeated bytes s = 2; // "list(string)" + repeated int64 i = 3 [packed = true]; // "list(int)" + repeated float f = 4 [packed = true]; // "list(float)" + repeated bool b = 5 [packed = true]; // "list(bool)" + repeated DataType type = 6 [packed = true]; // "list(type)" + repeated TensorShapeProto shape = 7; // "list(shape)" + repeated TensorProto tensor = 8; // "list(tensor)" + repeated NameAttrList func = 9; // "list(attr)" + } + // LINT.ThenChange(https://www.tensorflow.org/code/tensorflow/c/c_api.cc) + + oneof value { + bytes s = 2; // "string" + int64 i = 3; // "int" + float f = 4; // "float" + bool b = 5; // "bool" + DataType type = 6; // "type" + TensorShapeProto shape = 7; // "shape" + TensorProto tensor = 8; // "tensor" + ListValue list = 1; // any "list(...)" + + // "func" represents a function. func.name is a function's name or + // a primitive op's name. func.attr.first is the name of an attr + // defined for that function. func.attr.second is the value for + // that attr in the instantiation. + NameAttrList func = 10; + + // This is a placeholder only used in nodes defined inside a + // function. It indicates the attr value will be supplied when + // the function is instantiated. For example, let us suppose a + // node "N" in function "FN". "N" has an attr "A" with value + // placeholder = "foo". When FN is instantiated with attr "foo" + // set to "bar", the instantiated node N's attr A will have been + // given the value "bar". + string placeholder = 9; + } +} + +// A list of attr names and their values. The whole list is attached +// with a string name. E.g., MatMul[T=float]. +message NameAttrList { + string name = 1; + map attr = 2; +} diff --git a/inc/register/proto/tensorflow/function.proto b/inc/register/proto/tensorflow/function.proto new file mode 100644 index 000000000..075897c68 --- /dev/null +++ b/inc/register/proto/tensorflow/function.proto @@ -0,0 +1,100 @@ +syntax = "proto3"; + +package domi.tensorflow; +option cc_enable_arenas = true; +option java_outer_classname = "FunctionProtos"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; + +import "attr_value.proto"; +import "node_def.proto"; +import "op_def.proto"; + +// A library is a set of named functions. +message FunctionDefLibrary { + repeated FunctionDef function = 1; + repeated GradientDef gradient = 2; +} + +// A function can be instantiated when the runtime can bind every attr +// with a value. When a GraphDef has a call to a function, it must +// have binding for every attr defined in the signature. +// * device spec, etc. +message FunctionDef { + // The definition of the function's name, arguments, return values, + // attrs etc. + OpDef signature = 1; + + // Attributes specific to this function definition. + map attr = 5; + + // NOTE: field id 2 deleted on Jan 11, 2017, GraphDef version 21. + reserved 2; + + // In both of the following fields, there is the need to specify an + // output that is used as either the input to another node (in + // `node_def`) or as a return value of the function (in `ret`). + // Unlike the NodeDefs in GraphDef, we need to be able to specify a + // list in some cases (instead of just single outputs). Also, we + // need to be able to deal with lists of unknown length (so the + // output index may not be known at function definition time). So + // we use the following format instead: + // * "fun_in" where "fun_in" is the name of a function input arg in + // the `signature` field above. This represents that input, whether + // it is a single tensor or a list. + // * "fun_in:0" gives the first element of a function input arg (a + // non-list input is considered a list of length 1 for these + // purposes). + // * "node:out" where "node" is the name of a node in `node_def` and + // "out" is the name one of its op's output arguments (the name + // comes from the OpDef of the node's op). This represents that + // node's output, whether it is a single tensor or a list. + // Note: We enforce that an op's output arguments are never + // renamed in the backwards-compatibility test. + // * "node:out:0" gives the first element of a node output arg (a + // non-list output is considered a list of length 1 for these + // purposes). + // + // NOT CURRENTLY SUPPORTED (but may be in the future): + // * "node:out:-1" gives last element in a node output list + // * "node:out:1:" gives a list with all but the first element in a + // node output list + // * "node:out::-1" gives a list with all but the last element in a + // node output list + + // The body of the function. Unlike the NodeDefs in a GraphDef, attrs + // may have values of type `placeholder` and the `input` field uses + // the "output" format above. + + // By convention, "op" in node_def is resolved by consulting with a + // user-defined library first. If not resolved, "func" is assumed to + // be a builtin op. + repeated NodeDef node_def = 3; + + // A mapping from the output arg names from `signature` to the + // outputs from `node_def` that should be returned by the function. + map ret = 4; +} + +// GradientDef defines the gradient function of a function defined in +// a function library. +// +// A gradient function g (specified by gradient_func) for a function f +// (specified by function_name) must follow the following: +// +// The function 'f' must be a numerical function which takes N inputs +// and produces M outputs. Its gradient function 'g', which is a +// function taking N + M inputs and produces N outputs. +// +// I.e. if we have +// (y1, y2, ..., y_M) = f(x1, x2, ..., x_N), +// then, g is +// (dL/dx1, dL/dx2, ..., dL/dx_N) = g(x1, x2, ..., x_N, +// dL/dy1, dL/dy2, ..., dL/dy_M), +// where L is a scalar-value function of (x1, x2, ..., xN) (e.g., the +// loss function). dL/dx_i is the partial derivative of L with respect +// to x_i. +message GradientDef { + string function_name = 1; // The function name. + string gradient_func = 2; // The gradient function's name. +} diff --git a/inc/register/proto/tensorflow/graph.proto b/inc/register/proto/tensorflow/graph.proto new file mode 100644 index 000000000..d639a7d6c --- /dev/null +++ b/inc/register/proto/tensorflow/graph.proto @@ -0,0 +1,56 @@ +syntax = "proto3"; + +package domi.tensorflow; +option cc_enable_arenas = true; +option java_outer_classname = "GraphProtos"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; + +import "node_def.proto"; +import "function.proto"; +import "versions.proto"; + +// Represents the graph of operations +message GraphDef { + repeated NodeDef node = 1; + + // Compatibility versions of the graph. See core/public/version.h for version + // history. The GraphDef version is distinct from the TensorFlow version, and + // each release of TensorFlow will support a range of GraphDef versions. + VersionDef versions = 4; + + // Deprecated single version field; use versions above instead. Since all + // GraphDef changes before "versions" was introduced were forward + // compatible, this field is entirely ignored. + int32 version = 3 [deprecated = true]; + + // EXPERIMENTAL. DO NOT USE OR DEPEND ON THIS YET. + // + // "library" provides user-defined functions. + // + // Naming: + // * library.function.name are in a flat namespace. + // NOTE: We may need to change it to be hierarchical to support + // different orgs. E.g., + // { "/google/nn", { ... }}, + // { "/google/vision", { ... }} + // { "/org_foo/module_bar", { ... }} + // map named_lib; + // * If node[i].op is the name of one function in "library", + // node[i] is deemed as a function call. Otherwise, node[i].op + // must be a primitive operation supported by the runtime. + // + // + // Function call semantics: + // + // * The callee may start execution as soon as some of its inputs + // are ready. The caller may want to use Tuple() mechanism to + // ensure all inputs are ready in the same time. + // + // * The consumer of return values may start executing as soon as + // the return values the consumer depends on are ready. The + // consumer may want to use Tuple() mechanism to ensure the + // consumer does not start until all return values of the callee + // function are ready. + FunctionDefLibrary library = 2; +}; diff --git a/inc/register/proto/tensorflow/node_def.proto b/inc/register/proto/tensorflow/node_def.proto new file mode 100644 index 000000000..b9bc97ee6 --- /dev/null +++ b/inc/register/proto/tensorflow/node_def.proto @@ -0,0 +1,63 @@ +syntax = "proto3"; + +package domi.tensorflow; +option cc_enable_arenas = true; +option java_outer_classname = "NodeProto"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; + +import "attr_value.proto"; + +message NodeDef { + // The name given to this operator. Used for naming inputs, + // logging, visualization, etc. Unique within a single GraphDef. + // Must match the regexp "[A-Za-z0-9.][A-Za-z0-9_./]*". + string name = 1; + + // The operation name. There may be custom parameters in attrs. + // Op names starting with an underscore are reserved for internal use. + string op = 2; + + // Each input is "node:src_output" with "node" being a string name and + // "src_output" indicating which output tensor to use from "node". If + // "src_output" is 0 the ":0" suffix can be omitted. Regular inputs + // may optionally be followed by control inputs that have the format + // "^node". + repeated string input = 3; + + // A (possibly partial) specification for the device on which this + // node should be placed. + // The expected syntax for this string is as follows: + // + // DEVICE_SPEC ::= PARTIAL_SPEC + // + // PARTIAL_SPEC ::= ("/" CONSTRAINT) * + // CONSTRAINT ::= ("job:" JOB_NAME) + // | ("replica:" [1-9][0-9]*) + // | ("task:" [1-9][0-9]*) + // | ("device:" [A-Za-z]* ":" ([1-9][0-9]* | "*") ) + // + // Valid values for this string include: + // * "/job:worker/replica:0/task:1/device:GPU:3" (full specification) + // * "/job:worker/device:GPU:3" (partial specification) + // * "" (no specification) + // + // If the constraints do not resolve to a single device (or if this + // field is empty or not present), the runtime will attempt to + // choose a device automatically. + string device = 4; + + // Operation-specific graph-construction-time configuration. + // Note that this should include all attrs defined in the + // corresponding OpDef, including those with a value matching + // the default -- this allows the default to change and makes + // NodeDefs easier to interpret on their own. However, if + // an attr with a default is not specified in this list, the + // default will be used. + // The "names" (keys) must match the regexp "[a-z][a-z0-9_]+" (and + // one of the names from the corresponding OpDef's attr field). + // The values must have a type matching the corresponding OpDef + // attr's type field. + // Add some examples here showing best practices. + map attr = 5; +}; diff --git a/inc/register/proto/tensorflow/op_def.proto b/inc/register/proto/tensorflow/op_def.proto new file mode 100644 index 000000000..3485d0453 --- /dev/null +++ b/inc/register/proto/tensorflow/op_def.proto @@ -0,0 +1,164 @@ +syntax = "proto3"; + +package domi.tensorflow; +option cc_enable_arenas = true; +option java_outer_classname = "OpDefProtos"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; + +import "attr_value.proto"; +import "types.proto"; + +// Defines an operation. A NodeDef in a GraphDef specifies an Op by +// using the "op" field which should match the name of a OpDef. +// LINT.IfChange +message OpDef { + // Op names starting with an underscore are reserved for internal use. + // Names should be CamelCase and match the regexp "[A-Z][a-zA-Z0-9_]*". + string name = 1; + + // For describing inputs and outputs. + message ArgDef { + // Name for the input/output. Should match the regexp "[a-z][a-z0-9_]*". + string name = 1; + + // Human readable description. + string description = 2; + + // Describes the type of one or more tensors that are accepted/produced + // by this input/output arg. The only legal combinations are: + // * For a single tensor: either the "type" field is set or the + // "type_attr" field is set to the name of an attr with type "type". + // * For a sequence of tensors with the same type: the "number_attr" + // field will be set to the name of an attr with type "int", and + // either the "type" or "type_attr" field will be set as for + // single tensors. + // * For a sequence of tensors, the "type_list_attr" field will be set + // to the name of an attr with type "list(type)". + DataType type = 3; + string type_attr = 4; // if specified, attr must have type "type" + string number_attr = 5; // if specified, attr must have type "int" + // If specified, attr must have type "list(type)", and none of + // type, type_attr, and number_attr may be specified. + string type_list_attr = 6; + + // For inputs: if true, the inputs are required to be refs. + // By default, inputs can be either refs or non-refs. + // For outputs: if true, outputs are refs, otherwise they are not. + bool is_ref = 16; + }; + + // Description of the input(s). + repeated ArgDef input_arg = 2; + + // Description of the output(s). + repeated ArgDef output_arg = 3; + + // Description of the graph-construction-time configuration of this + // Op. That is to say, this describes the attr fields that will + // be specified in the NodeDef. + message AttrDef { + // A descriptive name for the argument. May be used, e.g. by the + // Python client, as a keyword argument name, and so should match + // the regexp "[a-z][a-z0-9_]+". + string name = 1; + + // One of the type names from attr_value.proto ("string", "list(string)", + // "int", etc.). + string type = 2; + + // A reasonable default for this attribute if the user does not supply + // a value. If not specified, the user must supply a value. + AttrValue default_value = 3; + + // Human-readable description. + string description = 4; + + + // --- Constraints --- + // These constraints are only in effect if specified. Default is no + // constraints. + + // For type == "int", this is a minimum value. For "list(___)" + // types, this is the minimum length. + bool has_minimum = 5; + int64 minimum = 6; + + // The set of allowed values. Has type that is the "list" version + // of the "type" field above (uses the "list" field of AttrValue). + // If type == "type" or "list(type)" above, then the "type" field + // of "allowed_values.list" has the set of allowed DataTypes. + // If type == "string" or "list(string)", then the "s" field of + // "allowed_values.list" has the set of allowed strings. + AttrValue allowed_values = 7; + } + repeated AttrDef attr = 4; + + // Optional deprecation based on GraphDef versions. + OpDeprecation deprecation = 8; + + // One-line human-readable description of what the Op does. + string summary = 5; + + // Additional, longer human-readable description of what the Op does. + string description = 6; + + // ------------------------------------------------------------------------- + // Which optimizations this operation can participate in. + + // True if the operation is commutative ("op(a,b) == op(b,a)" for all inputs) + bool is_commutative = 18; + + // If is_aggregate is true, then this operation accepts N >= 2 + // inputs and produces 1 output all of the same type. Should be + // associative and commutative, and produce output with the same + // shape as the input. The optimizer may replace an aggregate op + // taking input from multiple devices with a tree of aggregate ops + // that aggregate locally within each device (and possibly within + // groups of nearby devices) before communicating. + bool is_aggregate = 16; // for things like add + + // Other optimizations go here, like + // can_alias_input, rewrite_when_output_unused, partitioning_strategy, etc. + + // ------------------------------------------------------------------------- + // Optimization constraints. + + // Ops are marked as stateful if their behavior depends on some state beyond + // their input tensors (e.g. variable reading op) or if they have + // a side-effect (e.g. printing or asserting ops). Equivalently, stateless ops + // must always produce the same output for the same input and have + // no side-effects. + // + // By default Ops may be moved between devices. Stateful ops should + // either not be moved, or should only be moved if that state can also + // be moved (e.g. via some sort of save / restore). + // Stateful ops are guaranteed to never be optimized away by Common + // Subexpression Elimination (CSE). + bool is_stateful = 17; // for things like variables, queue + + // ------------------------------------------------------------------------- + // Non-standard options. + + // By default, all inputs to an Op must be initialized Tensors. Ops + // that may initialize tensors for the first time should set this + // field to true, to allow the Op to take an uninitialized Tensor as + // input. + bool allows_uninitialized_input = 19; // for Assign, etc. +}; +// LINT.ThenChange( +// https://www.tensorflow.org/code/tensorflow/core/framework/op_def_util.cc) + +// Information about version-dependent deprecation of an op +message OpDeprecation { + // First GraphDef version at which the op is disallowed. + int32 version = 1; + + // Explanation of why it was deprecated and what to use instead. + string explanation = 2; +}; + +// A collection of OpDefs +message OpList { + repeated OpDef op = 1; +}; diff --git a/inc/register/proto/tensorflow/resource_handle.proto b/inc/register/proto/tensorflow/resource_handle.proto new file mode 100644 index 000000000..a34523512 --- /dev/null +++ b/inc/register/proto/tensorflow/resource_handle.proto @@ -0,0 +1,29 @@ +syntax = "proto3"; + +package domi.tensorflow; +option cc_enable_arenas = true; +option java_outer_classname = "ResourceHandle"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; + +// Protocol buffer representing a handle to a tensorflow resource. Handles are +// not valid across executions, but can be serialized back and forth from within +// a single run. +message ResourceHandleProto { + // Unique name for the device containing the resource. + string device = 1; + + // Container in which this resource is placed. + string container = 2; + + // Unique name of this resource. + string name = 3; + + // Hash code for the type of the resource. Is only valid in the same device + // and in the same execution. + uint64 hash_code = 4; + + // For debug-only, the name of the type pointed to by this handle, if + // available. + string maybe_type_name = 5; +}; diff --git a/inc/register/proto/tensorflow/tensor.proto b/inc/register/proto/tensorflow/tensor.proto new file mode 100644 index 000000000..d0a4d024c --- /dev/null +++ b/inc/register/proto/tensorflow/tensor.proto @@ -0,0 +1,94 @@ +syntax = "proto3"; + +package domi.tensorflow; +option cc_enable_arenas = true; +option java_outer_classname = "TensorProtos"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; + +import "resource_handle.proto"; +import "tensor_shape.proto"; +import "types.proto"; + +// Protocol buffer representing a tensor. +message TensorProto { + DataType dtype = 1; + + // Shape of the tensor. + TensorShapeProto tensor_shape = 2; + + // Only one of the representations below is set, one of "tensor_contents" and + // the "xxx_val" attributes. We are not using oneof because as oneofs cannot + // contain repeated fields it would require another extra set of messages. + + // Version number. + // + // In version 0, if the "repeated xxx" representations contain only one + // element, that element is repeated to fill the shape. This makes it easy + // to represent a constant Tensor with a single value. + int32 version_number = 3; + + // Serialized raw tensor content from either Tensor::AsProtoTensorContent or + // memcpy in tensorflow::grpc::EncodeTensorToByteBuffer. This representation + // can be used for all tensor types. The purpose of this representation is to + // reduce serialization overhead during RPC call by avoiding serialization of + // many repeated small items. + bytes tensor_content = 4; + + // Type specific representations that make it easy to create tensor protos in + // all languages. Only the representation corresponding to "dtype" can + // be set. The values hold the flattened representation of the tensor in + // row major order. + + // DT_HALF, DT_BFLOAT16. Note that since protobuf has no int16 type, we'll + // have some pointless zero padding for each value here. + repeated int32 half_val = 13 [packed = true]; + + // DT_FLOAT. + repeated float float_val = 5 [packed = true]; + + // DT_DOUBLE. + repeated double double_val = 6 [packed = true]; + + // DT_INT32, DT_INT16, DT_INT8, DT_UINT8. + repeated int32 int_val = 7 [packed = true]; + + // DT_STRING + repeated bytes string_val = 8; + + // DT_COMPLEX64. scomplex_val(2*i) and scomplex_val(2*i+1) are real + // and imaginary parts of i-th single precision complex. + repeated float scomplex_val = 9 [packed = true]; + + // DT_INT64 + repeated int64 int64_val = 10 [packed = true]; + + // DT_BOOL + repeated bool bool_val = 11 [packed = true]; + + // DT_COMPLEX128. dcomplex_val(2*i) and dcomplex_val(2*i+1) are real + // and imaginary parts of i-th double precision complex. + repeated double dcomplex_val = 12 [packed = true]; + + // DT_RESOURCE + repeated ResourceHandleProto resource_handle_val = 14; + + // DT_VARIANT + repeated VariantTensorDataProto variant_val = 15; + + // DT_UINT32 + repeated uint32 uint32_val = 16 [packed = true]; + + // DT_UINT64 + repeated uint64 uint64_val = 17 [packed = true]; +}; + +// Protocol buffer representing the serialization format of DT_VARIANT tensors. +message VariantTensorDataProto { + // Name of the type of objects being serialized. + string type_name = 1; + // Portions of the object that are not Tensors. + bytes metadata = 2; + // Tensors contained within objects being serialized. + repeated TensorProto tensors = 3; +} diff --git a/inc/register/proto/tensorflow/tensor_shape.proto b/inc/register/proto/tensorflow/tensor_shape.proto new file mode 100644 index 000000000..4225a2e37 --- /dev/null +++ b/inc/register/proto/tensorflow/tensor_shape.proto @@ -0,0 +1,45 @@ +// Protocol buffer representing the shape of tensors. + +syntax = "proto3"; +option cc_enable_arenas = true; +option java_outer_classname = "TensorShapeProtos"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; + +package domi.tensorflow; + +// Dimensions of a tensor. +message TensorShapeProto { + // One dimension of the tensor. + message Dim { + // Size of the tensor in that dimension. + // This value must be >= -1, but values of -1 are reserved for "unknown" + // shapes (values of -1 mean "unknown" dimension). Certain wrappers + // that work with TensorShapeProto may fail at runtime when deserializing + // a TensorShapeProto containing a dim value of -1. + int64 size = 1; + + // Optional name of the tensor dimension. + string name = 2; + }; + + // Dimensions of the tensor, such as {"input", 30}, {"output", 40} + // for a 30 x 40 2D tensor. If an entry has size -1, this + // corresponds to a dimension of unknown size. The names are + // optional. + // + // The order of entries in "dim" matters: It indicates the layout of the + // values in the tensor in-memory representation. + // + // The first entry in "dim" is the outermost dimension used to layout the + // values, the last entry is the innermost dimension. This matches the + // in-memory layout of RowMajor Eigen tensors. + // + // If "dim.size()" > 0, "unknown_rank" must be false. + repeated Dim dim = 2; + + // If true, the number of dimensions in the shape is unknown. + // + // If true, "dim.size()" must be 0. + bool unknown_rank = 3; +}; diff --git a/inc/register/proto/tensorflow/types.proto b/inc/register/proto/tensorflow/types.proto new file mode 100644 index 000000000..ba7a72b30 --- /dev/null +++ b/inc/register/proto/tensorflow/types.proto @@ -0,0 +1,74 @@ +syntax = "proto3"; + +package domi.tensorflow; +option cc_enable_arenas = true; +option java_outer_classname = "TypesProtos"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; + +// LINT.IfChange +enum DataType { + // Not a legal value for DataType. Used to indicate a DataType field + // has not been set. + DT_INVALID = 0; + + // Data types that all computation devices are expected to be + // capable to support. + DT_FLOAT = 1; + DT_DOUBLE = 2; + DT_INT32 = 3; + DT_UINT8 = 4; + DT_INT16 = 5; + DT_INT8 = 6; + DT_STRING = 7; + DT_COMPLEX64 = 8; // Single-precision complex + DT_INT64 = 9; + DT_BOOL = 10; + DT_QINT8 = 11; // Quantized int8 + DT_QUINT8 = 12; // Quantized uint8 + DT_QINT32 = 13; // Quantized int32 + DT_BFLOAT16 = 14; // Float32 truncated to 16 bits. Only for cast ops. + DT_QINT16 = 15; // Quantized int16 + DT_QUINT16 = 16; // Quantized uint16 + DT_UINT16 = 17; + DT_COMPLEX128 = 18; // Double-precision complex + DT_HALF = 19; + DT_RESOURCE = 20; + DT_VARIANT = 21; // Arbitrary C++ data types + DT_UINT32 = 22; + DT_UINT64 = 23; + + // Do not use! These are only for parameters. Every enum above + // should have a corresponding value below (verified by types_test). + DT_FLOAT_REF = 101; + DT_DOUBLE_REF = 102; + DT_INT32_REF = 103; + DT_UINT8_REF = 104; + DT_INT16_REF = 105; + DT_INT8_REF = 106; + DT_STRING_REF = 107; + DT_COMPLEX64_REF = 108; + DT_INT64_REF = 109; + DT_BOOL_REF = 110; + DT_QINT8_REF = 111; + DT_QUINT8_REF = 112; + DT_QINT32_REF = 113; + DT_BFLOAT16_REF = 114; + DT_QINT16_REF = 115; + DT_QUINT16_REF = 116; + DT_UINT16_REF = 117; + DT_COMPLEX128_REF = 118; + DT_HALF_REF = 119; + DT_RESOURCE_REF = 120; + DT_VARIANT_REF = 121; + DT_UINT32_REF = 122; + DT_UINT64_REF = 123; +} +// LINT.ThenChange( +// https://www.tensorflow.org/code/tensorflow/c/c_api.h, +// https://www.tensorflow.org/code/tensorflow/go/tensor.go, +// https://www.tensorflow.org/code/tensorflow/core/framework/tensor.cc, +// https://www.tensorflow.org/code/tensorflow/core/framework/types.h, +// https://www.tensorflow.org/code/tensorflow/core/framework/types.cc, +// https://www.tensorflow.org/code/tensorflow/python/framework/dtypes.py, +// https://www.tensorflow.org/code/tensorflow/python/framework/function.py) diff --git a/inc/register/proto/tensorflow/versions.proto b/inc/register/proto/tensorflow/versions.proto new file mode 100644 index 000000000..48061218a --- /dev/null +++ b/inc/register/proto/tensorflow/versions.proto @@ -0,0 +1,31 @@ +syntax = "proto3"; + +package domi.tensorflow; +option cc_enable_arenas = true; +option java_outer_classname = "VersionsProtos"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; + +// Version information for a piece of serialized data +// +// There are different types of versions for each type of data +// (GraphDef, etc.), but they all have the same common shape +// described here. +// +// Each consumer has "consumer" and "min_producer" versions (specified +// elsewhere). A consumer is allowed to consume this data if +// +// producer >= min_producer +// consumer >= min_consumer +// consumer not in bad_consumers +// +message VersionDef { + // The version of the code that produced this data. + int32 producer = 1; + + // Any consumer below this version is not allowed to consume this data. + int32 min_consumer = 2; + + // Specific consumer versions which are disallowed (e.g. due to bugs). + repeated int32 bad_consumers = 3; +}; diff --git a/inc/register/register.h b/inc/register/register.h new file mode 100644 index 000000000..32adaea06 --- /dev/null +++ b/inc/register/register.h @@ -0,0 +1,53 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_REGISTER_REGISTRY_H_ +#define INC_REGISTER_REGISTRY_H_ + +#include "external/register/register.h" + +namespace ge { +class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY HostCpuOp { + public: + HostCpuOp() = default; + virtual ~HostCpuOp() = default; + + virtual graphStatus Compute(Operator &op, + const std::map &inputs, + std::map &outputs) = 0; +}; + +class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY HostCpuOpRegistrar { + public: + HostCpuOpRegistrar(const char *op_type, HostCpuOp *(*create_fn)()); + ~HostCpuOpRegistrar() = default; +}; + +#define REGISTER_HOST_CPU_OP_BUILDER(name, op) \ + REGISTER_HOST_CPU_OP_BUILDER_UNIQ_HELPER(__COUNTER__, name, op) + +#define REGISTER_HOST_CPU_OP_BUILDER_UNIQ_HELPER(ctr, name, op) \ + REGISTER_HOST_CPU_OP_BUILDER_UNIQ(ctr, name, op) + +#define REGISTER_HOST_CPU_OP_BUILDER_UNIQ(ctr, name, op) \ + static ::ge::HostCpuOpRegistrar register_host_cpu_op##ctr \ + __attribute__((unused)) = \ + ::ge::HostCpuOpRegistrar(name, []()->::ge::HostCpuOp* { \ + return new (std::nothrow) op(); \ + }) +} // namespace ge + +#endif //INC_REGISTER_REGISTRY_H_ diff --git a/inc/register/register_format_transfer.h b/inc/register/register_format_transfer.h new file mode 100644 index 000000000..5cbf4ab42 --- /dev/null +++ b/inc/register/register_format_transfer.h @@ -0,0 +1,79 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_REGISTER_REGISTER_FORMAT_TRANSFER_H_ +#define INC_REGISTER_REGISTER_FORMAT_TRANSFER_H_ + +#include +#include +#include + +#include "external/graph/types.h" +#include "ge/ge_api_error_codes.h" + +namespace ge { +namespace formats { +struct TransArgs { + const uint8_t *data; + Format src_format; + Format dst_format; + // For scenes that need to supplement the shape, for example, 5D to 4D + // It is not possible to convert the format normally if you only get the src_shape, + // and must get the shape before you mend the shape. + // So the parameters here need to be passed in both src_shape and dst_shape + std::vector src_shape; + std::vector dst_shape; + DataType src_data_type; +}; + +struct TransResult { + std::shared_ptr data; + // data length in bytes + size_t length; +}; + +class FormatTransfer { + public: + virtual ~FormatTransfer() = default; + virtual Status TransFormat(const TransArgs &args, TransResult &result) = 0; + virtual Status TransShape(Format src_format, const std::vector &src_shape, DataType data_type, + Format dst_format, std::vector &dst_shape) = 0; +}; + +using FormatTransferBuilder = std::function()>; + +class FormatTransferRegister { + public: + FormatTransferRegister(FormatTransferBuilder builder, Format src, Format dst); + ~FormatTransferRegister() = default; +}; + +#define REGISTER_FORMAT_TRANSFER(TransferClass, format1, format2) \ + namespace { \ + FormatTransferRegister format_transfer_register_##TransferClass##format1##format2( \ + []() { return std::make_shared(); }, format1, format2); \ + } + +/// Build a formattransfer according to 'args' +/// @param args +/// @param result +/// @return +std::shared_ptr BuildFormatTransfer(const TransArgs &args); + +bool FormatTransferExists(const TransArgs &args); +} // namespace formats +} // namespace ge +#endif // INC_REGISTER_REGISTER_FORMAT_TRANSFER_H_ \ No newline at end of file diff --git a/inc/register/scope/scope_graph_impl.h b/inc/register/scope/scope_graph_impl.h new file mode 100644 index 000000000..7d022d20f --- /dev/null +++ b/inc/register/scope/scope_graph_impl.h @@ -0,0 +1,143 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef REGISTER_SCOPE_SCOPE_GRAPH_IMPL_H_ +#define REGISTER_SCOPE_SCOPE_GRAPH_IMPL_H_ + +#include "external/register/scope/scope_fusion_pass_register.h" +#include "proto/tensorflow/node_def.pb.h" +#include "proto/tensorflow/graph.pb.h" + +namespace ge { +class Scope::ScopeImpl { + public: + explicit ScopeImpl(const std::string &name, const std::string &sub_type = "", Scope *father_scope = nullptr) + : name_(name), sub_type_(sub_type), father_scope_(father_scope) {} + ~ScopeImpl(); + + std::string Name() const { return name_; } + std::string SubType() const { return sub_type_; } + void SetSubType(const std::string &sub_type) { sub_type_ = sub_type; } + void ClearTypeAndSubType(); + void AddNode(ge::OperatorPtr node_def); + std::vector Nodes() const { return nodes_; } + std::vector AllNodes() const; + std::map AllNodesMap() const; + void AddSubScope(Scope *scope) { sub_scopes_[scope->Name()] = scope; } + Scope *GetSubScope(const std::string &scope_name) const; + std::map GetSubScopes() const { return sub_scopes_; } + std::vector GetAllSubScopes() const; + int32_t GetOpTypeNum(const std::string &op_type) const; + void OpsNumInc(const std::string &op_type); + std::string LastName() const; + const Scope *GetFatherScope() const { return father_scope_; } + // trim scope_index + static std::string TrimScopeIndex(const std::string &scope_name); + + private: + std::string name_; + std::string sub_type_; + Scope *father_scope_; + std::map op_nums_; + std::map sub_scopes_; + std::vector nodes_; +}; + +class FusionScopesResult::FusionScopesResultImpl { + public: + FusionScopesResultImpl() {} + ~FusionScopesResultImpl() {}; + void SetName(const std::string &name) { name_ = name; } + void SetType(const std::string &type) { type_ = type; } + void SetDescription(const std::string &description) { description_ = description; } + std::string Name() const { return name_; } + std::string Type() const { return type_; } + std::string Description() const { return description_; } + void AddNodes(std::vector nodes); + std::vector Nodes() const { return nodes_; } + void AddScopes(const std::vector &scopes) { scopes_.insert(scopes_.end(), scopes.begin(), scopes.end()); } + std::vector Scopes() const { return scopes_; } + std::map> GetInputs() const { return inputs_; } + std::map> GetOutputs() const { return outputs_; } + void InsertInputs(const std::string &inner_op_name, const std::vector &index_map); + void InsertOutputs(const std::string &inner_op_name, const std::vector &index_map); + bool FindNodes(const std::string &node_name) const; + bool FindScopes(const std::string &scope_name) const; + + private: + std::string name_; + std::string type_; + std::string description_; + std::vector scopes_; + std::vector nodes_; + std::map> inputs_; + std::map> outputs_; +}; + +class ScopeTree::ScopeTreeImpl { + public: + ScopeTreeImpl() : root_(nullptr) {} + ScopeTreeImpl(const ScopeTreeImpl &) = delete; + ScopeTreeImpl &operator=(const ScopeTreeImpl &) = delete; + Status Init(); + ~ScopeTreeImpl(); + + void AddNodeToScope(ge::OperatorPtr node_def); + std::vector GetAllScopes() const { return scopes_; } + const Scope *Root() const { return root_; } + + private: + std::vector SplitNodeName(const std::string &node_name, char delim) const; + Scope *root_; + std::vector scopes_; +}; + +struct ScopeFusionOpInfo { + std::string node_name; + std::string fusion_node_name; + std::string fusion_op_type; + std::string description; + bool scope_pass = true; +}; + +class ScopeGraph::ScopeGraphImpl { + public: + ScopeGraphImpl() : scope_tree_(nullptr) {} + ScopeGraphImpl(const ScopeGraphImpl &) = delete; + ScopeGraphImpl &operator=(const ScopeGraphImpl &) = delete; + Status Init(); + ~ScopeGraphImpl(); + + const ScopeTree *GetScopeTree() const { return scope_tree_; } + void BuildScopeGraph(domi::tensorflow::GraphDef *graph_def); + void AddFusionScopesResult(FusionScopesResult *result); + std::map FusionScopesResults() const { return fusion_results_; } + FusionScopesResult *GetFusionScopesResults(const domi::tensorflow::NodeDef *node_def) const; + std::map GetNodesMap() const { return nodes_map_; } + bool IsFusionOpChild(const std::string &node_name, std::vector &info_list); + bool FusionOpChildIgnore(const ScopeFusionOpInfo &info); + bool IsFusionOp(const domi::tensorflow::NodeDef *node_def); + Status GetInputOrOutputIndex(const ScopeFusionOpInfo &info, int32_t old_index, bool input, int32_t &new_index); + + private: + std::vector GetFusionResultInputOrOutput(const ScopeFusionOpInfo &info, bool input); // input:true,output:false + void CheckScopesResult(FusionScopesResult *fusion_node); + std::map fusion_results_; + std::map nodes_map_; + ScopeTree *scope_tree_; +}; +} // namespace ge +#endif // REGISTER_SCOPE_SCOPE_GRAPH_IMPL_H_ \ No newline at end of file diff --git a/inc/register/scope/scope_pass_impl.h b/inc/register/scope/scope_pass_impl.h new file mode 100644 index 000000000..52f132884 --- /dev/null +++ b/inc/register/scope/scope_pass_impl.h @@ -0,0 +1,61 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef REGISTER_SCOPE_SCOPE_PASS_IMPL_H_ +#define REGISTER_SCOPE_SCOPE_PASS_IMPL_H_ + +#include "external/register/scope/scope_fusion_pass_register.h" + +namespace ge { +class ScopesResult::ScopesResultImpl { + public: + void SetScopes(std::vector &scopes) { scopes_ = scopes; } + std::vector GetScopes() const { return scopes_; } + void SetNodes(std::vector nodes) { nodes_ = nodes; } + std::vector GetNodes() const { return nodes_; } + + private: + std::vector scopes_; // multiple scopes + std::vector nodes_; // op outside of scope +}; + +class ScopeBasePass::ScopeBasePassImpl { + public: + ScopeBasePassImpl(ScopeBasePass *parent) : parent_(parent) {} + virtual ~ScopeBasePassImpl(); + + Status Run(std::shared_ptr &scope_graph); + + private: + Status AddFusionScopesResultToScopeGraph(std::shared_ptr &scope_graph, + std::vector &scope_results); + // Match rules one by one, support multiple sets of matching rules, and finally output a single scope + // Note: This function does not have to be rewritten. + // In order to match the fusion rules designed by you better, + // you can implement your specific versions separately. + bool MatchAllBatches(const ScopeTree *scope_tree, std::vector &results); + + bool MatchOneBatch(const ScopeTree *scope_tree, const std::vector &patternlist, + std::vector &results); + bool MatchOneScope(const ScopePattern *pattern, Scope *scope, std::vector &results); + Status PrintFusionScopeInfo(std::shared_ptr &scope_graph); + + private: + std::vector patterns_; + ScopeBasePass *parent_; +}; +} // namespace ge +#endif // REGISTER_SCOPE_SCOPE_PASS_IMPL_H_ \ No newline at end of file diff --git a/inc/register/scope/scope_pass_registry_impl.h b/inc/register/scope/scope_pass_registry_impl.h new file mode 100644 index 000000000..9e68dba06 --- /dev/null +++ b/inc/register/scope/scope_pass_registry_impl.h @@ -0,0 +1,40 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef REGISTER_SCOPE_SCOPE_REGISTRY_IMPL_H_ +#define REGISTER_SCOPE_SCOPE_REGISTRY_IMPL_H_ + +#include "external/register/scope/scope_fusion_pass_register.h" +#include + +namespace ge { +struct CreatePassFnPack; +class ScopeFusionPassRegistry::ScopeFusionPassRegistryImpl { + public: + void RegisterScopeFusionPass(const std::string &pass_name, ScopeFusionPassRegistry::CreateFn create_fn, + bool is_general); + ScopeFusionPassRegistry::CreateFn GetCreateFn(const std::string &pass_name); + std::unique_ptr CreateScopeFusionPass(const std::string &pass_name); + std::vector GetAllRegisteredPasses(); + bool SetPassEnableFlag(const std::string pass_name, const bool flag); + + private: + std::mutex mu_; + std::vector pass_names_; // In the order of user registration + std::map create_fn_packs_; +}; +} // namespace ge +#endif // REGISTER_SCOPE_SCOPE_REGISTRY_IMPL_H_ \ No newline at end of file diff --git a/inc/register/scope/scope_pattern_impl.h b/inc/register/scope/scope_pattern_impl.h new file mode 100644 index 000000000..1c5a7bce3 --- /dev/null +++ b/inc/register/scope/scope_pattern_impl.h @@ -0,0 +1,105 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef REGISTER_SCOPE_SCOPE_PATTERN_IMPL_H_ +#define REGISTER_SCOPE_SCOPE_PATTERN_IMPL_H_ + +#include "external/register/scope/scope_fusion_pass_register.h" + +namespace ge { +class ScopeAttrValue::ScopeAttrValueImpl { + public: + ScopeAttrValueImpl() : int_value_(0), float_value_(0.0), string_value_(""), bool_value_(false) {} + ~ScopeAttrValueImpl() {} + + void SetIntValue(int64_t value) { int_value_ = value; } + void SetFloatValue(float value) { float_value_ = value; } + void SetStringValue(std::string value) { string_value_ = value; } + void SetBoolValue(bool value) { bool_value_ = value; } + int64_t GetIntValue() { return int_value_; } + float GetFloatValue() { return float_value_; } + std::string GetStrValue() { return string_value_; } + bool GetBoolValue() { return bool_value_; } + + private: + int64_t int_value_; + float float_value_; + std::string string_value_; + bool bool_value_; +}; + +class NodeOpTypeFeature::NodeOpTypeFeatureImpl : ScopeBaseFeature { + public: + NodeOpTypeFeatureImpl(std::string nodeType, int num, int step = 0) : node_type_(nodeType), num_(num), step_(step) {} + ~NodeOpTypeFeatureImpl() {} + bool Match(const Scope *scope) override; + + public: + std::string node_type_; // Node type + int num_; // Node number + int step_; // step +}; + +class NodeAttrFeature::NodeAttrFeatureImpl : ScopeBaseFeature { + public: + NodeAttrFeatureImpl(std::string nodeType, std::string attr_name, ge::DataType datatype, ScopeAttrValue attr_value) + : node_type_(nodeType), attr_name_(attr_name), datatype_(datatype), attr_value_(attr_value) {} + ~NodeAttrFeatureImpl() {} + bool Match(const Scope *scope) override; + + public: + std::string node_type_; // Node type + std::string attr_name_; // attribute name + ge::DataType datatype_; // datatype + ScopeAttrValue attr_value_; // AttrValue +}; + +class ScopeFeature::ScopeFeatureImpl : ScopeBaseFeature { + public: + ScopeFeatureImpl(std::string sub_type, int32_t num, std::string suffix = "", + std::string sub_scope_mask = "", int step = 0) + : sub_type_(sub_type), num_(num), suffix_(suffix), sub_scope_mask_(sub_scope_mask), step_(step) {} + ~ScopeFeatureImpl() {} + bool Match(const Scope *scope) override; + bool SubScopesMatch(std::vector &scopes); + + public: + std::string sub_type_; + int32_t num_; + std::string suffix_; + std::string sub_scope_mask_; + int step_; +}; + +class ScopePattern::ScopePatternImpl { + public: + ScopePatternImpl() {} + ~ScopePatternImpl() {} + bool Match(const Scope *scope) const; + void SetSubType(const std::string &sub_type); + std::string SubType() const { return sub_type_; } + void AddNodeOpTypeFeature(NodeOpTypeFeature feature); + void AddNodeAttrFeature(NodeAttrFeature feature); + void AddScopeFeature(ScopeFeature feature); + + private: + std::string sub_type_; // get Scope sub type + std::vector node_optype_features_; + std::vector node_attr_features_; + std::vector scopes_features_; +}; +} // namespace ge +#endif // REGISTER_SCOPE_SCOPE_PATTERN_IMPL_H_ \ No newline at end of file diff --git a/inc/register/tensor_assign.h b/inc/register/tensor_assign.h new file mode 100644 index 000000000..57a37f6c2 --- /dev/null +++ b/inc/register/tensor_assign.h @@ -0,0 +1,103 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TENSOR_ASSIGN_H_ +#define TENSOR_ASSIGN_H_ + +#include "graph/ge_tensor.h" +#include "proto/tensorflow/tensor.pb.h" + +namespace domi { +using GeTensorPtr = std::shared_ptr; +using Status = uint32_t; +using domi::tensorflow::TensorProto; +using google::protobuf::int32; +using google::protobuf::int64; + +class TensorAssign { + public: + static Status SetGeTensor(const TensorProto &tensor, GeTensorPtr &weight); + + static Status SetGeTensorDataType(int64_t dataType, GeTensorPtr &weight); + + static ge::DataType ConvertTensorflowDataType(uint32_t tf_data_type); + + private: + static bool CheckBoolVal(tensorflow::DataType data_type); + + static bool CheckHalfVal(tensorflow::DataType data_type); + + static bool CheckFloatVal(tensorflow::DataType data_type); + + static bool CheckDoubleVal(tensorflow::DataType data_type); + + static bool CheckComplex64Val(tensorflow::DataType data_type); + + static bool CheckComplex128Val(tensorflow::DataType data_type); + + static bool CheckStringVal(tensorflow::DataType data_type); + + static bool CheckByte(tensorflow::DataType data_type); + + static bool CheckDoubleByte(tensorflow::DataType data_type); + + static bool CheckSignedFourByte(tensorflow::DataType data_type); + + static bool CheckUnsignedFourByte(tensorflow::DataType data_type); + + static bool CheckSignedEightByte(tensorflow::DataType data_type); + + static bool CheckUnsignedEightByte(tensorflow::DataType data_type); + + static Status GetDoubleByteVal(int32_t val_size, const google::protobuf::RepeatedField &val_vector, int count, + GeTensorPtr &weight); + static Status GetByteVal(int32_t val_size, const google::protobuf::RepeatedField &val_vector, int count, + GeTensorPtr &weight); + + static Status GetStringVal(int32_t val_size, const google::protobuf::RepeatedPtrField &val_vector, + int count, GeTensorPtr &weight); + + static void SetGeTensorWeightData(const TensorProto &tensor, int32_t val_size, int count, GeTensorPtr &weight); + + static void SetWeightData(tensorflow::DataType data_type, int count, const std::string &tensor_content, + GeTensorPtr &weight); + + template + static Status GetVal(int32_t val_size, const google::protobuf::RepeatedField &val_vector, int count, + GeTensorPtr &weight) { + bool zerosLike = (count != val_size && val_size == 1); + T *addr = new (std::nothrow) T[count](); + GE_CHECK_NOTNULL(addr); + int minCount = (count > val_size) ? val_size : count; + if (!zerosLike) { + for (int32_t i = 0; i < minCount; i++) { + *(addr + i) = val_vector.Get(i); + } + for (int32_t i = minCount; i < count; i++) { + *(addr + i) = val_vector.Get(minCount - 1); + } + } else { + for (int32_t i = 0; i < count; i++) { + *(addr + i) = val_vector.Get(0); + } + } + (void)weight->SetData(reinterpret_cast(addr), count * sizeof(T)); + GE_DELETE_NEW_ARRAY(addr); + return SUCCESS; + } +}; +} // namespace domi +#endif // TENSOR_ASSIGN_H_ diff --git a/inc/soft_dp/ExternalSoftDp.h b/inc/soft_dp/ExternalSoftDp.h new file mode 100644 index 000000000..5b2874e7f --- /dev/null +++ b/inc/soft_dp/ExternalSoftDp.h @@ -0,0 +1,52 @@ +/** +* @file ExternalSoftDp.h +* +* Copyright (c) Huawei Technologies Co., Ltd. 2012-2018. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#ifndef EXTERNALSOFTDP_H +#define EXTERNALSOFTDP_H + +#include + +extern "C" { +struct SoftDpProcsessInfo { + uint8_t* inputBuffer; + uint32_t inputBufferSize; + + uint8_t* outputBuffer; + uint32_t outputBufferSize; + + uint32_t outputWidth; + uint32_t outputHeight; + + bool isVBeforeU; // uv : true, uv : false +}; + +struct DpCropInfo { + uint32_t left; + uint32_t right; + uint32_t up; + uint32_t down; +}; + +/* + * @brief 解码、缩放接口 + * @param [in] SoftDpProcsessInfo& inMsg: 软实现结构体 + * @return success: return 0, fail: return error number + */ +uint32_t DecodeAndResizeJpeg(SoftDpProcsessInfo& inMsg); + +/* + * @brief 解码、裁剪、缩放接口 + * @param [in] SoftDpProcsessInfo& inMsg: 软实现结构体 + * @param [in] const DpCropInfo& cropInfo: 裁剪结构体 + * @return success: return 0, fail: return error number + */ +uint32_t DecodeAndCropAndResizeJpeg(SoftDpProcsessInfo& inMsg, const DpCropInfo& cropInfo); +} +#endif // EXTERNALSOFTDP_H \ No newline at end of file diff --git a/inc/tdt/data_common.h b/inc/tdt/data_common.h new file mode 100644 index 000000000..7b1d631bd --- /dev/null +++ b/inc/tdt/data_common.h @@ -0,0 +1,99 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef HOST_INNER_INC_DATA_COMMON_H_ +#define HOST_INNER_INC_DATA_COMMON_H_ + +namespace tdt { +#ifndef TDT_DATA_TYPE +#define TDT_DATA_TYPE + +/** + * @ingroup Tdt data. + * + * Tdt data type. + */ +enum TdtDataType { + TDT_IMAGE_LABEL = 0, /**< Image label*/ + TDT_TFRECORD, /**< TF Record*/ + TDT_DATA_LABEL, /**< Data label*/ + TDT_END_OF_SEQUENCE, /**< End of Sequence*/ + TDT_TENSOR, /**< Tensor*/ + TDT_ABNORMAL, /**< ABNORMAL*/ + TDT_DATATYPE_MAX /**< Max*/ +}; +#endif + +/** + * @ingroup Tdt data. + * + * Tdt push data between host and device. + */ +struct TdtDataItem { + TdtDataType dataType_; /**< Input data type*/ + uint64_t label_; /**< Input data label*/ + uint64_t dataLen_; /**< Input data type length*/ + uint64_t realDataLen_; /**< Real Input data type length*/ + std::string tensorShape_; /**< Tensor shape*/ + std::string tensorType_; /**< Tensor type*/ + uint32_t cnt_; /**< Data count*/ + uint32_t currentCnt_; /**< Data current count*/ + uint64_t index_; /**< Data inde*/ + std::string tensorName_; /**< Tensor name*/ + uint64_t md5ValueHead_; /**< Data md5*/ + uint64_t md5ValueTail_; /**< Data md5*/ + std::shared_ptr dataPtr_; /**< Data pointer*/ + std::string headMD5_; /**< MD5 header, 8byte*/ + std::string tailMD5_; /**< MD5 tail, 8byte*/ +}; + +/** + * @ingroup Tdt data. + * + * Tdt push data for queuedataset ort mind-data. + */ +struct DataItem { + TdtDataType dataType_; /**< Input data type*/ + std::string tensorName_; /**< Tensor name*/ + std::string tensorShape_; /**< Tensor shape*/ + std::string tensorType_; /**< Tensor type*/ + uint64_t dataLen_; /**< Input data type length*/ + std::shared_ptr dataPtr_; /**< Data pointer*/ +}; + +/** + * @ingroup Tsdclient. + * + * tsdclient func type; + */ +enum TsdCmdType { + TSDCLOSE = 0, + TSDOPEN = 1 +}; + +/** + * @ingroup Tsdclient. + * + * tsdclient func input value object. + */ +enum InputItem { + OPEN_DEVICEID = 0, + OPEN_RANKSIZE, + CLOSE_DEVICEID +}; + +} // namespace tdt +#endif // HOST_INNER_INC_DATA_COMMON_H_ diff --git a/inc/tdt/status.h b/inc/tdt/status.h new file mode 100644 index 000000000..128909393 --- /dev/null +++ b/inc/tdt/status.h @@ -0,0 +1,749 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_TDT_STATUS_H_ +#define INC_TDT_STATUS_H_ + +#include + +#ifdef __cplusplus +#include +#include +#include +#include +#else +#include +#endif + +#ifdef __cplusplus +using TDT_StatusT = uint32_t; +#else +typedef uint32_t TDT_StatusT; +#endif + +#ifndef TDT_LIB_EXPORT +#define TDT_LIB_EXPORT __attribute__((visibility("default"))) +#endif +/** + * @ingroup tdt status. + * + * Tdt debug level + */ +enum { + TDT_DEBUG = 0, /**< Debug*/ + TDT_INFO = 1, /**< Info*/ + TDT_WARNING = 2, /**< Warning*/ + TDT_ERROR = 3, /**< Error*/ + TDT_FATAL = 4, /**< Fatal*/ + TDT_EVENT = 5, /**< Event*/ + TDT_OPLOG = 6, /**< Oplog*/ + TDT_TRACE = 7 /**< Trace*/ +}; + +enum { + TDT_OK_CODE = 0, + TDT_DEBUG_INFO_CODE, + TDT_INTERNAL_ERROR_CODE, + TDT_COMMON_WARNING_CODE, + TDT_PREFETCH_STOPED_CODE, + TDT_FILE_SIZE_TOO_LARGE_CODE, + TDT_FILE_INVALID_PATH_CODE, + TDT_MEMORY_EXHAUSTED_CODE, + TDT_INTERGER_REVERSED_CODE, + TDT_FILE_NOT_EXIST_CODE, + TDT_DEFAULT_CONFIG_FILE_NOT_EXIST_CODE, + TDT_INSTANCE_NOT_INITIALED_CODE, + TDT_INITIAL_FAILED_CODE, + TDT_INSTANCE_NOT_FOUND_CODE, + TDT_HDC_CREATE_SESSION_FAILED_CODE, + TDT_HDC_DESTROY_SESSION_FAILED_CODE, + TDT_HDC_SESSION_DO_NOT_EXIST_CODE, + TDT_PID_IS_EXIST_CODE, + TDT_HDC_SRV_INIT_ERROR_CODE, + TDT_HDC_SRV_CREATE_ERROR_CODE, + TDT_HDC_SRV_DESTROY_ERROR_CODE, + TDT_HDC_SRV_ACCEPT_ERROR_CODE, + TDT_HDC_SRV_CLOSED_ERROR_CODE, + TDT_HDC_INTERNAL_ERROR_CODE, + TDT_HDC_INFO_CODE, + TDT_HDC_SEND_ERROR_CODE, + TDT_MESSAGE_PARSE_ERROR_CODE, + TDT_HDC_SEG_SIZE_ERROR_CODE, + TDT_HDC_MESSAGE_NULL_CODE, + TDT_HDC_SEARFUNC_IS_NULL_CODE, + TDT_HDC_SENDMSG_FAILED_CODE, + TDT_HDC_SRV_CLOSE_CHILD_SESSION_ERROR_CODE, + TDT_HDC_SRV_CLOSE_SERVER_SESSION_ERROR_CODE, + TDT_HDC_SRV_HEART_BEAT_TIMEOUT_CODE, // 30 + TDT_HDC_DRV_ERROR_CODE, + TDT_HDC_SERVER_CLIENT_SOCKET_CLOSED_CODE, + TDT_TSD_START_FAIL_CODE, + TDT_TSD_CLEANPROC_FIRST_GETPID_FAILED_CODE, + TDT_TSD_CLEANPROC_KILL_PROCESS_FAILED_CODE, + TDT_TSD_CLEANPROC_SECOND_GETPID_FAILED_CODE, + TDT_TSD_CLEANPROC_FINAL_FAILED_CODE, + TDT_TSD_INIT_STATE_FAILED_CODE, + TDT_TSD_INIT_HDCSERVER_FAILED_CODE, + TDT_TSD_SEND_HEARTBEAT_FAILED_CODE, + TDT_TSD_CLEAN_RESOURCE_FAILED_CODE, + TDT_TSD_SEND_MSG_FAILED_CODE, + TDT_PPC_DRIVER_INIT_FAIL_CODE, + TDT_PPC_SERVER_CLIENT_CREATE_FAIL_CODE, + TDT_PPC_SERVER_CLIENT_DESTORY_FAIL_CODE, + TDT_PPC_SERVER_CLOSE_CODE, + TDT_PPC_GET_SET_MSG_BUFFER_FAIL_CODE, + TDT_PPC_SESSION_CONNECT_FAIL_CODE, // 40 + TDT_PPC_SESSION_NOT_EXISTED_CODE, + TDT_PPC_SEND_RECEIVE_MSG_FAIL_CODE, + TDT_PPC_MSG_FREE_FAIL_CODE, + TDT_PPC_ALLOC_MSG_FAIL_CODE, + TDT_PPC_MSG_LEN_NOT_MATCH_CODE, + TDT_PPC_MSG_BUF_NULL_CODE, + TDT_PPC_CLIENT_INVALID_PARAM_CODE, + TDT_PPC_SERVER_INVALID_PARAM_CODE, + TDT_PPC_CLIENT_RECVDATA_CONTINUE_CODE, + TDT_PPC_SERVER_CLIENT_SOCKET_CLOSED_CODE, // 50 + TDT_PPC_RECV_MSG_ERROR_CODE, + TDT_PPC_SESSION_CLOSE_ERROR_CODE, + TDT_SHUFFLE_SHUFFLE_SIZE_ILLEGAL_CODE, + TDT_SHUFFLE_ONLINE_UNIQUE_SEED_ILLEGAL_CODE, + TDT_SHUFFLE_UNABLE_TO_CREATE_SHUFFLE_LIST_CODE, + TDT_SHUFFLE_ILLEGAL_SHUFFLE_TYPE_CODE, + TDT_PREFETCH_ILLEGAL_DATATYPE_CODE, + TDT_SUPERVISOR_UNKOWN_JOB_STATE_CODE, + TDT_MAP_BUFFER_ERROR_CODE, + TDT_ALLOC_BUFFER_FAILED_CODE, + TDT_FREE_HDC_BUFFER_FAILED_CODE, + TDT_DATA_SIZE_WRONG_CODE, + TDT_MEMORY_POOL_INITED_CODE, + TDT_SENDMSG_FAILED_CODE, + TDT_INVALID_VALUE_CODE, + TDT_NO_USEFUL_MEMORY_CODE, + TDT_MESSAGE_NULL_CODE, + TDT_MEMORY_POOL_STOPPED_CODE, + TDT_HDC_MEMORY_ADDR_NOT_ALIGN_CODE, + TDT_MEMORY_POOL_GET_NULL_CODE, + TDT_MEMORY_POOL_NOT_EXISTED_CODE, + TDT_RECOVER_DATA_FAILED_CODE, + TDT_MEMORY_STATUS_ERROR_CODE, + TDT_MEMORY_POOL_UPDATE_FAILED_CODE, + TDT_MEMORY_POOL_RESIZE_FAILED_CODE, + TDT_MEMORY_DESTROY_FAILED_CODE, + TDT_EXCEED_MAX_THREAD_CODE, + TDT_WARNING_SET_THREAD_NAME_FAILED_CODE, + TDT_WRONG_PRIORITY_CODE, + TDT_JOIN_TASK_ERROR_CODE, + TDT_NULL_FUNC_CODE, + TDT_INIT_FAIL_CODE, + TDT_EXISTED_FUNC_CODE, + TDT_FILE_GET_FILE_STATE_FAIL_CODE, + TDT_FILE_OPEN_FILE_FAIL_CODE, + TDT_FILE_FILE_DESTROYED_CODE, + TDT_FILE_UNABLE_TO_GET_FILE_MEMORY_CODE, + TDT_PREFETCH_UNABLE_TO_GET_TDTDATAITEM_CODE, + TDT_HDCSERVER_DO_NOT_EXIST_CODE, + TDT_HDCSESSIONID_NOT_AVAILABLE_CODE, + TDT_SET_HDCSESSION_REFERENCE_FAILED_CODE, + TDT_HDC_RECV_MSG_ERROR_CODE, + TDT_HDC_SEND_MSG_ERROR_CODE, + TDT_FILE_CONTENT_EMPTY_CODE, + TDT_TDTSEVER_ACCEPT_FAILED_CODE, + TDT_CHANNEL_DO_NOT_EXIST_CODE, + TDT_NULL_POINTER_MSG_CODE, + TDT_TRAN_UNKNOWN_RSP_CODE, + TDT_TRAN_TIMEOUT_CODE, + TDT_TRAN_NOT_EXIST_CODE, + TDT_TRAN_ID_GEN_ERROR_CODE, + TDT_SEND_CHANNEL_FAILED_CODE, + TDT_SEND_CHANNEL_TIMEOUT_CODE, + TDT_QUEUE_STOPPED_CODE, + TDT_QUEUE_POP_FAILED_CODE, + TDT_QUEUE_PUSH_FAILED_CODE, + TDT_QUEUE_NOT_FIND_CODE, + TDT_QUEUE_CREATE_FAILED_CODE, + TDT_QUEUE_FULL_CODE, + TDT_QUEUE_EMPTY_CODE, + TDT_DATA_ENTO_CP_FAILED_CODE, + TDT_STOP_CP_QUEUE_FAILED_CODE, + TDT_RECV_MSG_NO_CHANNEL_INFO_ERROR_CODE, + TDT_CHANNEL_HAS_NO_SESSION_ERROR_CODE, + TDT_PREFETCH_SAMPLE_HAS_NO_LABEL_CODE, + TDT_HDC_CLIENT_INIT_ERROR_CODE, + TDT_HDC_CLIENT_CREATE_SESSION_ERROR_CODE, + TDT_HDC_CLIENT_DO_NOT_EXIST_CODE, + TDT_HDC_CLIENT_DESTROY_ERROR_CODE, + TDT_BIND_CPUCORE_FAILED_CODE, + TDT_HDC_CLIENT_CLOSED_CODE, + TDT_HDC_SRV_CLOSED_CODE, + TDT_HDC_SRV_TYPE_ERROR_CODE, + TDT_TSD_CLT_OPEN_FAILED_CODE, + TDT_TSD_CLT_CLOSE_FAILED_CODE, + TDT_TSD_CLT_INTERFACE_NOT_SUPPORT_CODE, + TDT_SUPERVISOR_ILLEGAL_HEARTBEAT_TIME_CODE, + TDT_SUPERVISOR_INOTIFY_READ_SIZE_ERROR_CODE, + TDT_SUPERVISOR_INOTIFY_INTERRUPT_CODE, + TDT_SUPERVISOR_INOTIFY_INIT_ERROR_CODE, + TDT_SUPERVISOR_CLOSE_INOTIFYFD_FAIL_CODE, + TDT_SUPERVISOR_INOTIFY_WATCH_ERROR_CODE, + TDT_TRANSFER_CANNOT_OPEN_CONFIGFILE_CODE, + TDT_TRANSFER_PARSE_FILE_FAILED_CODE, + TDT_TRANSFER_NO_CHANNEL_DATA_CODE, + TDT_PREFETCH_CREATE_FAILED_CODE, + TDT_TRANSFER_NO_PARAMETER_CODE, + TDT_TRANSFER_NO_PARAMETER_ARG_CODE, + TDT_FILE_TYPE_UNSUPPORT_CODE, + TDT_FILE_DIR_IS_NULL_CODE, + TDT_FILE_GET_DIR_TREE_ERROR_CODE, + TDT_FILE_CANNOT_OPEN_DIR_CODE, + TDT_PREFETCH_SAMPLE_CANNOT_BE_READ_CODE, + TDT_PREFETCH_DATA_QUEUE_IS_CLOSED_CODE, + TDT_PREFETCH_GET_SHUFFLE_RESULT_FAIL_CODE, + TDT_FILE_CANNOT_DFREE_FILE_MEMORY_CODE, + TDT_TRANSFER_CREATE_DELIVER_FAILED_CODE, + TDT_TRANSFER_TRAIN_DATA_DELIVER_IS_NULLPTR_CODE, + TDT_TRANSFER_EMPTY_GROUPNAME_IN_MULTI_GROUPS_CODE, + TDT_TRANSFER_DUPLICATE_GROUPNAME_CODE, + TDT_TRANSFER_DUPLICATE_DEVICE_CODE, + TDT_TRANSFER_FIND_DEVICE_FAIL_CODE, + TDT_SUPERVISOR_FAIL_TO_WRITE_PID_FILE_CODE, + TDT_SUPERVISOR_HEARTBEAT_FILE_NOT_INITED_CODE, + TDT_SUPERVISOR_JOB_COMMAND_FILE_NOT_INITED_CODE, + TDT_SUPERVISOR_JOB_STATE_FILE_NOT_INITED_CODE, + TDT_PREFETCH_LABEL_FILE_NOT_INITED_CODE, + TDT_PREFETCH_SAMPLE_FILE_DIR_NOT_INITED_CODE, + TDT_PREFETCH_NOT_INITED_CODE, + TDT_PREFETCH_SHUFFLER_NOT_CREATED_CODE, + TDT_SHUFFLE_NOT_INITED_CODE, + TDT_PREFETCH_SHUFFLED_ITEM_OUT_OF_FILE_LIST_CODE, + TDT_TRANSFER_INIT_FAILED_CODE, + TDT_TRANSFER_START_FAILED_CODE, + TDT_FOLDER_CANNOT_BE_CREATED_CODE, + TDT_CANNOT_GET_STAT_OF_FOLDER_CODE, + TDT_FOLDER_IS_FILE_CODE, + TDT_TRANSFER_CONFIG_FIEL_SYNTAX_ERROR_CODE, + TDT_CHECKSUM_ILLEGAL_MD5_PARAM_CODE, + TDT_CHECKSUM_MD5_INIT_FAILED_CODE, + TDT_CHECKSUM_MD5_UPDATE_FAILED_CODE, + TDT_CHECKSUM_MD5_FINAL_FAILED_CODE, + TDT_TRANSFER_DELIVER_IS_NONE_CODE, + TDT_SUPERVISOR_FAIL_TO_DEL_JOB_CMD_FILE_CODE, + TDT_TRANSFER_FAIL_TO_GET_ENV_VARIABLE_CODE, + TDT_MONITOR_INOTIFY_INIT_ERROR_CODE, + TDT_MONITOR_INOTIFY_WATCH_ERROR_CODE, + TDT_MONITOR_CLOSE_INOTIFYFD_FAIL_CODE, + TDT_MONITOR_INOTIFY_READ_SIZE_ERROR_CODE, + TDT_MONITOR_UNSUPPORT_CFGITEM_CODE, + TDT_MONITOR_FAIL_TO_SET_CFGITEM_CODE, + TDT_MONITOR_READ_FILE_FAIL_CODE, + TDT_MONITOR_CONFIG_FILE_FORMAT_ERROR_CODE, + TDT_MONITOR_STRCAT_FAILED_CODE, + TDT_MONITOR_CREATE_CONFIG_FILE_FAIL_CODE, + TDT_PREFETCH_FAIL_TO_GENERATE_MD5_CODE, + TDT_RECV_MSG_MD5_WRONG_CODE, + TDT_RECV_MSG_FAIL_TO_GENERATE_MD5_CODE, + TDT_RECV_MSG_SEQUENCE_ERROR_CODE, + TDT_SERVER_MEMORY_COPY_FAILED_CODE, + TDT_DEVICEID_ERROR_CODE, + TDT_MEMORY_DATA_TYPE_FACTORY_MAKE_SHARED_FAILED_CODE, + TDT_PREFETCH_FILELIST_NOT_EXIST_CODE, + TDT_PREFETCH_SAMPLE_FILE_NOT_FOUND_CODE, + TDT_PREFETCH_FILE_OPEN_FAIL_CODE, + TDT_PREFETCH_FILE_STAT_FAIL_CODE, + TDT_PREFETCH_FILE_MMAP_FAIL_CODE, + TDT_PREFETCH_FILE_UNMAP_FAIL_CODE, + TDT_PREFETCH_FILE_CLOSE_FAIL_CODE, + TDT_PREFETCH_FILE_PARSE_FAIL_CODE, + TDT_PREFETCH_CRC32_SIZE_FAIL_CODE, + TDT_PREFETCH_CRC32_DATA_FAIL_CODE, + TDT_PREFETCH_DATA_QUEUE_CLOSED_CODE, + TDT_PREFETCH_INITIALIZE_FAILED_CODE, + TDT_PREFETCH_MAP_INSERT_FAILED_CODE, + TDT_PREFETCH_INVALID_FILELIST_LINE_CODE, + TDT_FILE_STRINGSTREAM_TO_VALUE_FAILED_CODE, + TDT_LIST_ID_OFFSET_LENGTH_POSITIVE_INTEGER_FAILED_CODE, + TDT_SHUFFLE_ILLEGAL_SHUFFLE_PARAM_CODE, + TDT_FILE_SHUFFLER_CREATE_FAILED_CODE, + TDT_FILE_UPLOADER_CREATE_FAILED_CODE, + TDT_FILE_DOWNLOADER_CREATE_FAILED_CODE, + TDT_OBS_CONFIG_INFORMATION_FAIL_CODE, + TDT_OBS_CALLBACK_ARGUMENT_FAIL_CODE, + TDT_OBS_DOWNLOAD_CREATE_THREAD_FAILED_CODE, + TDT_OBS_DOWNLOAD_FILE_FAIL_CODE, + TDT_OBS_DOWNLOAD_INIT_FAIL_CODE, + TDT_OBS_DOWNLOAD_METADATA_FAIL_CODE, + TDT_OBS_LIST_BUCKET_OBJECTS_FAIL_CODE, + TDT_MEMORY_MEMCPY_FAILED_CODE, + TDT_MEMORY_MEMSET_FAILED_CODE, + TDT_MKDIR_CMD_FAILED_CODE, + TDT_CP_CMD_FAILED_CODE, + TDT_HOST_INIT_FAILED_CODE, + TDT_HOST_CHANNEL_NAME_EMPTY_CODE, + TDT_HOST_ALLOCATE_MEMORY_FAILED_CODE, + TDT_HOST_MEMORY_COPY_FAILED_CODE, + TDT_HOST_UNABLE_GET_TDTDATAELEM_CODE, + TDT_HOST_PUSH_NOT_INIT_CODE, + TDT_TUNING_DATA_TRANSFER_INIT_FAILED_CODE, + TDT_TUNING_DATA_RECEIVE_CHECK_PARA_ERROR_CODE, + TDT_TUNING_DATA_TRANSFER_PARAMETER_ERROR_CODE, + TDT_RECV_MSG_CHECKSUM_WRONG_ERROR_CODE, + TDT_SVM_INIT_FAILED_CODE, + TDT_SVM_FREE_PIN_FAILED_CODE, + TDT_SVM_FREE_SVM_FAILED_CODE, + TDT_SVM_ADD_BUFFER_MAP_FAILED_CODE, + TDT_STATUS_CODE_TOTAL +}; + +/** + * @ingroup Tdt status + * @brief Regiter error code + * @param moduleId [IN] Module ID + * @param logLevel [IN] Log level + * @param CODE_NAME [out] Error name + * @param codeDesc [IN] Error description + */ +#ifdef __cplusplus +#define TDT_DEF_ERROR_CODE(moduleId, logLevel, CODE_NAME, codeDesc) \ + constexpr TDT_StatusT CODE_NAME = ((0xFFFF & ((uint16_t)moduleId)) << 16) | \ + (0xF000 & (((uint16_t)logLevel) << 12)) | (0x0FFF & (CODE_NAME##_CODE)); \ + const tdt::ErrorNoRegisterar g_##CODE_NAME##_errorno(CODE_NAME, codeDesc); +#else +#define TDT_DEF_ERROR_CODE(moduleId, logLevel, CODE_NAME, codeDesc) \ + static const TDT_StatusT CODE_NAME = \ + ((0xFFFF & ((uint16_t)moduleId)) << 16) | (0xF000 & (((uint16_t)logLevel) << 12)) | (0x0FFF & CODE_NAME##_CODE); +#endif + +/** + * @ingroup Tdt status + * @brief Get error level according error name + * @param CODE_NAME [IN] Error code + * @param codeDesc [OUT] Error description + */ +#define TDT_GET_ERROR_LEVEL(CODE_NAME) ((CODE_NAME & 0x0000F000) >> 12) + +#ifdef __cplusplus +#define TDT_GET_ERROR_STR(CODE_NAME) (tdt::StatusFactory::GetInstance()->GetErrDesc(CODE_NAME)) +#endif + +// Register module id: 0xAABB, AA means system level number, BB means module level number +constexpr uint16_t MODID_TDT_CLIENT = 0x0101; // TDT_CLIENT module ID +constexpr uint16_t MODID_TSD_SERVER = 0x0102; // TSD_SERVER +constexpr uint16_t MODID_HDC = 0x0103; // HDC_SERVER +constexpr uint16_t MODID_TDT_SHUFFLE = 0x0104; // TDT shuffle module ID +constexpr uint16_t MODID_TDT_PREFETCH = 0x0105; // TDT prefetch module ID +constexpr uint16_t MODID_TDT_TRANSFER = 0x0106; // TDT TrainDataTransfer module ID +constexpr uint16_t MODID_TDT_SUPERVISOR = 0x0107; // TDT supervisor模块ID +constexpr uint16_t MODID_MEM_POOL = 0x0108; // MEMORY_POOL +constexpr uint16_t MODID_PPC = 0x0109; // TDT PPC +constexpr uint16_t MODID_TDT_FILE = 0x0110; // TDT file operation module ID +constexpr uint16_t MODID_HDC_SERVER = 0x0111; // HDC_SERVER module ID +constexpr uint16_t MODID_TDT_SERVER = 0x0112; // TDTServer module ID +constexpr uint16_t MODID_HDC_CLIENT = 0x0113; // HDC_CLIENT module ID +constexpr uint16_t MODID_TSD_CLIENT = 0x0114; // TSD_CLIENT module ID +constexpr uint16_t MODID_CHECKSUM = 0x0115; // Checksum module ID +constexpr uint16_t MODID_TDT_MONITOR = 0x0116; // TDT monitor module ID +constexpr uint16_t MODID_TDT_HOST = 0x0117; // GE adapts the TDT HOST module ID +constexpr uint16_t MODID_SVM = 0x0118; // SVM Driver module ID + +constexpr uint32_t TDT_API_MAX_SUB_VERSION = 100; +static const int32_t TDT_INVAILED_DEVICE_ID = 0xFFFFFFFF; + +typedef enum tdt_api_version { + TDT_API_VERSION_V1_00 = 100, + TDT_API_VERSION_V1_01 = 101, + TDT_API_VERSION_V2_00 = 200 +} TDT_API_VERSION; + +#ifdef __cplusplus +namespace tdt { +class StatusFactory { + public: + /** + * @ingroup hiaiengine + * @brief Get a pointer to StatusFactory + * @param [in]: + * @return StatusFactory pointer + */ + TDT_LIB_EXPORT static StatusFactory *GetInstance(); + + /** + * @ingroup hiaiengine + * @brief Registration error code + * @param [in]err error code + * @param [in]desc Description string of the error code + */ + TDT_LIB_EXPORT void RegisterErrorNo(const uint32_t err, const std::string &desc); + + /** + * @ingroup hiaiengine + * @brief Get error code description string + * @param [in]err error code + */ + std::string GetErrDesc(const uint32_t err); + + /** + * @ingroup hiaiengine + * @brief Static function: Get error code description string + * @param [in]err error code + * return : If there is a problem, return the empty string "" + */ + static std::string GetErrCodeDesc(uint32_t errCode); + + protected: + /** + * @ingroup hiaiengine + * @brief Constructor + * @param [in] void + */ + StatusFactory(); + + /** + * @ingroup hiaiengine + * @brief Destructor + * @param [in] void + */ + ~StatusFactory() {} + + StatusFactory(const StatusFactory &) = delete; + StatusFactory(StatusFactory &&) = delete; + StatusFactory &operator=(const StatusFactory &) = delete; + StatusFactory &operator=(StatusFactory &&) = delete; + + static std::mutex &GetMutex(); + + private: + std::mutex rwMutex_; + std::map errDesc_; +}; + +class ErrorNoRegisterar { + public: + /** + * @ingroup hiaiengine + * @brief Registration error code + * @param [in]err error code + * @param [in]desc Description of the registration error code + */ + ErrorNoRegisterar(const uint32_t &err, const std::string &desc) { + StatusFactory::GetInstance()->RegisterErrorNo(err, desc); + } + + ~ErrorNoRegisterar() {} + ErrorNoRegisterar(const ErrorNoRegisterar &) = delete; + ErrorNoRegisterar(ErrorNoRegisterar &&) = delete; + ErrorNoRegisterar &operator=(const ErrorNoRegisterar &) = delete; + ErrorNoRegisterar &operator=(ErrorNoRegisterar &&) = delete; +}; +} // namespace tdt +#endif + +// register error code +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_INFO, TDT_OK, "running ok"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_DEBUG, TDT_DEBUG_INFO, "debug info"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_INTERNAL_ERROR, "internal error"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_WARNING, TDT_COMMON_WARNING, "warnging"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_INFO, TDT_PREFETCH_STOPED, "stopped"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_FILE_NOT_EXIST, "File is not existed"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_DEFAULT_CONFIG_FILE_NOT_EXIST, "Default config file not exist"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_FILE_SIZE_TOO_LARGE, "file size is too large"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_FILE_INVALID_PATH, "file path is invalid"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_MEMORY_EXHAUSTED, "memory exhausted error"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_INTERGER_REVERSED, "interger reached reverse"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_INSTANCE_NOT_INITIALED, + "call member function before instance initialed"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_INITIAL_FAILED, "initial failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_INSTANCE_NOT_FOUND, "instance not found"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_HDC_CREATE_SESSION_FAILED, "create hdc session failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_HDC_DESTROY_SESSION_FAILED, "destory hdc session failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_HDC_SESSION_DO_NOT_EXIST, "hdc session id do not exist"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_PID_IS_EXIST, "tdtMain pid is exist"); +TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SRV_INIT_ERROR, "hdc server init error"); +TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SRV_CREATE_ERROR, "hdc server create error"); +TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SRV_DESTROY_ERROR, "hdc server destroy error"); +TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SRV_ACCEPT_ERROR, "hdc server accept error"); +TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SRV_CLOSED_ERROR, "hdc server closed error"); +TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_INTERNAL_ERROR, "hdc fail"); +TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_DEVICEID_ERROR, "hdc device id error"); +TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SRV_CLOSE_CHILD_SESSION_ERROR, "hdc server close child session error"); +TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SEARFUNC_IS_NULL, "serarfunc is null"); +TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SENDMSG_FAILED, "hdc send msg failed"); +TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SRV_CLOSE_SERVER_SESSION_ERROR, + "hdc server close server session error"); +TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SRV_HEART_BEAT_TIMEOUT, "hdc server heart beat timeout"); +TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_DRV_ERROR, "hiai drv return error"); +TDT_DEF_ERROR_CODE(MODID_HDC, TDT_INFO, TDT_HDC_INFO, "hdc info"); +TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SEND_ERROR, "hdc send message failed"); +TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SEG_SIZE_ERROR, "hiai seg size error"); +TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_MESSAGE_NULL, "Message input is null"); +TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_MESSAGE_PARSE_ERROR, "hdc message parse error"); +TDT_DEF_ERROR_CODE(MODID_HDC_SERVER, TDT_ERROR, TDT_HDCSERVER_DO_NOT_EXIST, "hdc server do not exist"); +TDT_DEF_ERROR_CODE(MODID_HDC_SERVER, TDT_ERROR, TDT_HDCSESSIONID_NOT_AVAILABLE, "hdc sessionid vector is empty"); +TDT_DEF_ERROR_CODE(MODID_HDC_SERVER, TDT_ERROR, TDT_SET_HDCSESSION_REFERENCE_FAILED, + "hdc set hdc session reference failed"); +TDT_DEF_ERROR_CODE(MODID_HDC_SERVER, TDT_ERROR, TDT_HDC_RECV_MSG_ERROR, "hdc recv message failed"); +TDT_DEF_ERROR_CODE(MODID_HDC_SERVER, TDT_ERROR, TDT_HDC_SEND_MSG_ERROR, "hdc send message failed"); +TDT_DEF_ERROR_CODE(MODID_HDC_SERVER, TDT_ERROR, TDT_HDC_SRV_TYPE_ERROR, "hdc service type is not supported"); +TDT_DEF_ERROR_CODE(MODID_HDC_SERVER, TDT_ERROR, TDT_HDC_SERVER_CLIENT_SOCKET_CLOSED, + "hdc service or client socket closed"); + +/*********************TSDAEMON************************/ +// create TSDAEMON error level error +TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_START_FAIL, "Tsdaemon start fail"); +TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_CLEANPROC_FIRST_GETPID_FAILED, "Tsdaemon first get pid fail"); +TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_CLEANPROC_KILL_PROCESS_FAILED, "Tsdaemon kill processfail"); +TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_CLEANPROC_SECOND_GETPID_FAILED, "Tsdaemon second get pid fail"); +TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_CLEANPROC_FINAL_FAILED, "Tsdaemon clean process final fail"); +TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_INIT_STATE_FAILED, "Tsdaemon init state fail"); +TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_INIT_HDCSERVER_FAILED, "Tsdaemon init hdcserver fail"); +TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_SEND_HEARTBEAT_FAILED, "Tsdaemon get pid fail"); +TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_CLEAN_RESOURCE_FAILED, "Tsdaemon clean resource fail"); +TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_SEND_MSG_FAILED, "Tsdaemon send msg fail"); + +/********************* PPC ****************************/ +// create PPC error level error +TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_DRIVER_INIT_FAIL, "Init PPC driver fail"); +TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SERVER_CLIENT_CREATE_FAIL, "Create PPC server or PPC client fail"); +TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SERVER_CLIENT_DESTORY_FAIL, "Destory PPC server or PPC client fail"); +TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SERVER_CLOSE, "PPC server is closed"); +TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_GET_SET_MSG_BUFFER_FAIL, "PPC get or set msg buffer fail"); +TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SESSION_CONNECT_FAIL, "PPC connect is failed"); +TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SESSION_NOT_EXISTED, "PPC session is not existed"); +TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SEND_RECEIVE_MSG_FAIL, "PPC send or receive msg fail"); +TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_MSG_FREE_FAIL, "PPC msg free fail"); +TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_ALLOC_MSG_FAIL, "PPC alloc memory for msg fail"); +TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_MSG_LEN_NOT_MATCH, "PPC message length not match"); +TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_MSG_BUF_NULL, "PPC message buffer is null"); +TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_CLIENT_INVALID_PARAM, "PPC message client invalid param fail"); +TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SERVER_INVALID_PARAM, "PPC message server invalid param fail"); +TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_CLIENT_RECVDATA_CONTINUE, + "PPC message client receive not expected msg continue"); +TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SERVER_CLIENT_SOCKET_CLOSED, + "PPC message server receive server or client socket closed msg"); +TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_RECV_MSG_ERROR, "PPC receive msg failed"); +TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SESSION_CLOSE_ERROR, "PPC close session failed"); + +TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_GET_FILE_STATE_FAIL, "can not get file state"); +TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_OPEN_FILE_FAIL, "can not open file"); +TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_CONTENT_EMPTY, "file content is empty"); +TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_FILE_DESTROYED, "file is destroyed"); +TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_UNABLE_TO_GET_FILE_MEMORY, "fail to get memory for file"); +TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_TYPE_UNSUPPORT, "file type is not supported"); +TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_DIR_IS_NULL, "pointer to dir is null"); +TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_GET_DIR_TREE_ERROR, "can not get the tree of dir"); +TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_CANNOT_OPEN_DIR, "dir cannot be opened"); +TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_CANNOT_DFREE_FILE_MEMORY, "DFree memory of file failed"); + +TDT_DEF_ERROR_CODE(MODID_TDT_SHUFFLE, TDT_ERROR, TDT_SHUFFLE_SHUFFLE_SIZE_ILLEGAL, + "shuffle size is less or equal to 0"); +TDT_DEF_ERROR_CODE(MODID_TDT_SHUFFLE, TDT_ERROR, TDT_SHUFFLE_ONLINE_UNIQUE_SEED_ILLEGAL, + "online unique seed is equal to 0"); +TDT_DEF_ERROR_CODE(MODID_TDT_SHUFFLE, TDT_ERROR, TDT_SHUFFLE_UNABLE_TO_CREATE_SHUFFLE_LIST, + "unable to create shuffle list"); +TDT_DEF_ERROR_CODE(MODID_TDT_SHUFFLE, TDT_ERROR, TDT_SHUFFLE_ILLEGAL_SHUFFLE_TYPE, "illegal shuffle type"); +TDT_DEF_ERROR_CODE(MODID_TDT_SHUFFLE, TDT_ERROR, TDT_SHUFFLE_NOT_INITED, "shuffler has not been inited"); + +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_SAMPLE_HAS_NO_LABEL, "the sample has no label"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_SAMPLE_CANNOT_BE_READ, "the sample cannot be read"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_ILLEGAL_DATATYPE, "illegal data type"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_CREATE_FAILED, "creating prefetcher failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_UNABLE_TO_GET_TDTDATAITEM, "fail to get TDTDataItem"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_DATA_QUEUE_IS_CLOSED, "data queue is closed"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_GET_SHUFFLE_RESULT_FAIL, "fail to get shuffle result"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_LABEL_FILE_NOT_INITED, "label file has not been inited"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_SAMPLE_FILE_DIR_NOT_INITED, + "directory of sample files has not been inited"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_NOT_INITED, "prefetcher in deliver has not been inited"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_SHUFFLER_NOT_CREATED, + "shuffler in prefetcher has not been created"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_SHUFFLED_ITEM_OUT_OF_FILE_LIST, + "shuffled item is out of file list"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_FAIL_TO_GENERATE_MD5, "fail to generate md5 of data"); +TDT_DEF_ERROR_CODE(MODID_CHECKSUM, TDT_ERROR, TDT_CHECKSUM_ILLEGAL_MD5_PARAM, "params to generate md5 is illegal"); +TDT_DEF_ERROR_CODE(MODID_CHECKSUM, TDT_ERROR, TDT_CHECKSUM_MD5_INIT_FAILED, "md5_init failed"); +TDT_DEF_ERROR_CODE(MODID_CHECKSUM, TDT_ERROR, TDT_CHECKSUM_MD5_UPDATE_FAILED, "md5_update failed"); +TDT_DEF_ERROR_CODE(MODID_CHECKSUM, TDT_ERROR, TDT_CHECKSUM_MD5_FINAL_FAILED, "md5_final failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_CANNOT_OPEN_CONFIGFILE, "can not open config file"); +TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_PARSE_FILE_FAILED, "parse file failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_NO_CHANNEL_DATA, + "no channel can be found in config file"); +TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_NO_PARAMETER, "no parameter can be found"); +TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_NO_PARAMETER_ARG, + "the argment is not --configfile or stop"); +TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_CREATE_DELIVER_FAILED, + "fail to create train data deliver"); +TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_TRAIN_DATA_DELIVER_IS_NULLPTR, + "train data deliver in the list is nullptr"); +TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_INIT_FAILED, "train data deliver init failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_START_FAILED, "train data deliver start failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_CONFIG_FIEL_SYNTAX_ERROR, + "config file has syntax error"); +TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_DELIVER_IS_NONE, "no deliver is existed"); +TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_MKDIR_CMD_FAILED, "mkdir cmd failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_CP_CMD_FAILED, "cp cmd failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_EMPTY_GROUPNAME_IN_MULTI_GROUPS, "empty group_name"); +TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_DUPLICATE_GROUPNAME, + "the same group_name already exists"); +TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_DUPLICATE_DEVICE, "the same device already exists"); +TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_FIND_DEVICE_FAIL, "cannot find device"); + +TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_WARNING, TDT_SUPERVISOR_INOTIFY_INTERRUPT, "inotify is interrupted"); +TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_UNKOWN_JOB_STATE, "unknow job state"); +TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_ILLEGAL_HEARTBEAT_TIME, "illegal heartbeat time"); +TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_INOTIFY_READ_SIZE_ERROR, + "read size of inotify is error"); +TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_INOTIFY_INIT_ERROR, + "Initialization of inotify failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_CLOSE_INOTIFYFD_FAIL, "Close inotifyFd failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_INOTIFY_WATCH_ERROR, "Add watch of inotify failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_FAIL_TO_WRITE_PID_FILE, "fail to write pid file"); +TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_HEARTBEAT_FILE_NOT_INITED, + "heart beat file has not been inited"); +TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_JOB_COMMAND_FILE_NOT_INITED, + "job command file has not been inited"); +TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_JOB_STATE_FILE_NOT_INITED, + "job state file has not been inited"); +TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_FAIL_TO_DEL_JOB_CMD_FILE, + "fail to delete job command file"); +TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_WARNING, TDT_TRANSFER_FAIL_TO_GET_ENV_VARIABLE, + "can not get environment variable"); +TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_ERROR, TDT_MONITOR_INOTIFY_INIT_ERROR, "Initialization of inotify failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_ERROR, TDT_MONITOR_INOTIFY_WATCH_ERROR, "Add watch of inotify failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_ERROR, TDT_MONITOR_CLOSE_INOTIFYFD_FAIL, "Close inotifyFd failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_WARNING, TDT_MONITOR_INOTIFY_READ_SIZE_ERROR, + "read size of inotify is not correct"); +TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_WARNING, TDT_MONITOR_UNSUPPORT_CFGITEM, "unsupported config item"); +TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_WARNING, TDT_MONITOR_FAIL_TO_SET_CFGITEM, "can not set local config item"); +TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_ERROR, TDT_MONITOR_READ_FILE_FAIL, "read file fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_ERROR, TDT_MONITOR_CONFIG_FILE_FORMAT_ERROR, + "config file is incorrectly formatted"); +TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_ERROR, TDT_MONITOR_STRCAT_FAILED, "strcat failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_ERROR, TDT_MONITOR_CREATE_CONFIG_FILE_FAIL, + "create ConfigFile pointer failed"); + +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MAP_BUFFER_ERROR, "host buffer map to device failed"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_ALLOC_BUFFER_FAILED, "memory pool alloc buffer failed"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_DATA_SIZE_WRONG, "Input datasize is wrong"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_FREE_HDC_BUFFER_FAILED, "memory pool free buffer failed"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_INVALID_VALUE, "invalid parameter"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_NO_USEFUL_MEMORY, "no usable memory in memory pool"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MESSAGE_NULL, "recv msg is null"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_SENDMSG_FAILED, "send msg failed"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MEMORY_POOL_STOPPED, "mempool has stopped"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_HDC_MEMORY_ADDR_NOT_ALIGN, "buffer not aligned"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MEMORY_POOL_INITED, "memory pool has inited"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MEMORY_POOL_GET_NULL, "mempool not exist"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MEMORY_POOL_NOT_EXISTED, "mempool not exist"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_RECOVER_DATA_FAILED, "Recover recv data failed"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MEMORY_STATUS_ERROR, "Memory status error"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MEMORY_POOL_UPDATE_FAILED, "update memory pool status failed"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MEMORY_POOL_RESIZE_FAILED, "resize memory pool status failed"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_EXCEED_MAX_THREAD, "thread size is too large"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_WARNING, TDT_WARNING_SET_THREAD_NAME_FAILED, "rename thread failed"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_WARNING, TDT_WRONG_PRIORITY, "priority is invalid"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_WARNING, TDT_JOIN_TASK_ERROR, "join task failed"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_WARNING, TDT_NULL_FUNC, "func is null"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_WARNING, TDT_INIT_FAIL, "sear/dear init failed"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_WARNING, TDT_EXISTED_FUNC, "func has already existed"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MEMORY_DESTROY_FAILED, "mempool destroy failed"); +TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MEMORY_DATA_TYPE_FACTORY_MAKE_SHARED_FAILED, + "data type factory make shared failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_TDTSEVER_ACCEPT_FAILED, "tdt server accept hdc session failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_CHANNEL_DO_NOT_EXIST, "channel do not exist"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_NULL_POINTER_MSG, "message is null"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_TRAN_UNKNOWN_RSP, "transcation status error"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_TRAN_TIMEOUT, "transcation time out"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_TRAN_NOT_EXIST, "transcation requst id is not exist"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_TRAN_ID_GEN_ERROR, "transcation generateid failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_SEND_CHANNEL_FAILED, "send channel info failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_SEND_CHANNEL_TIMEOUT, "send channel info time out"); +TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_INFO, TDT_QUEUE_STOPPED, "queue has been stopped"); +TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_INFO, TDT_QUEUE_POP_FAILED, "failed to pop data from queue"); +TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_INFO, TDT_QUEUE_PUSH_FAILED, "failed to push data from queue"); +TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_QUEUE_CREATE_FAILED, "queue create fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_QUEUE_NOT_FIND, "queue not find"); +TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_QUEUE_FULL, "queue is full"); +TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_QUEUE_EMPTY, "queue is empty"); +TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_DATA_ENTO_CP_FAILED, "enqueue to computer process failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_STOP_CP_QUEUE_FAILED, "stop computer process queue failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_RECV_MSG_NO_CHANNEL_INFO_ERROR, "no channel in first msg"); +TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_RECV_MSG_MD5_WRONG, "md5 of recv msg is wrong"); +TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_RECV_MSG_CHECKSUM_WRONG_ERROR, "checksum of recv msg is wrong"); +TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_RECV_MSG_FAIL_TO_GENERATE_MD5, "md5 of recv msg is wrong"); +TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_RECV_MSG_SEQUENCE_ERROR, "sequence recv msg is wrong"); +TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_SERVER_MEMORY_COPY_FAILED, "memory copy failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_CHANNEL_HAS_NO_SESSION_ERROR, "channel has no session"); +TDT_DEF_ERROR_CODE(MODID_HDC_CLIENT, TDT_ERROR, TDT_HDC_CLIENT_INIT_ERROR, "hdc client init error"); +TDT_DEF_ERROR_CODE(MODID_HDC_CLIENT, TDT_ERROR, TDT_HDC_CLIENT_CREATE_SESSION_ERROR, "hdc client create error"); +TDT_DEF_ERROR_CODE(MODID_HDC_CLIENT, TDT_ERROR, TDT_HDC_CLIENT_DO_NOT_EXIST, "hdc client do not exist"); +TDT_DEF_ERROR_CODE(MODID_HDC_CLIENT, TDT_ERROR, TDT_HDC_CLIENT_DESTROY_ERROR, "hdc server destroy error"); +TDT_DEF_ERROR_CODE(MODID_HDC_CLIENT, TDT_ERROR, TDT_HDC_CLIENT_CLOSED, "hdc client has been closed"); +TDT_DEF_ERROR_CODE(MODID_HDC_SERVER, TDT_ERROR, TDT_BIND_CPUCORE_FAILED, "thread function bind cpu core failed"); +TDT_DEF_ERROR_CODE(MODID_HDC_SERVER, TDT_ERROR, TDT_HDC_SRV_CLOSED, "hdc server has been closed"); +TDT_DEF_ERROR_CODE(MODID_TSD_CLIENT, TDT_ERROR, TDT_TSD_CLT_OPEN_FAILED, "tsd client open failed"); +TDT_DEF_ERROR_CODE(MODID_TSD_CLIENT, TDT_ERROR, TDT_TSD_CLT_CLOSE_FAILED, "tsd client close failed"); +TDT_DEF_ERROR_CODE(MODID_TSD_CLIENT, TDT_ERROR, TDT_TSD_CLT_INTERFACE_NOT_SUPPORT, "tsd client func not support"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_FILELIST_NOT_EXIST, "tdt filelist open failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_SAMPLE_FILE_NOT_FOUND, "tdt sample file is empty"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_FILE_OPEN_FAIL, "tdt open sample file fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_FILE_STAT_FAIL, "tdt stat sample file fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_FILE_MMAP_FAIL, "tdt mmap sample file fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_FILE_UNMAP_FAIL, "tdt unmap sample file fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_FILE_CLOSE_FAIL, "tdt close sample file fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_FILE_PARSE_FAIL, "tdt parse sample file fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_CRC32_SIZE_FAIL, "tdt crc32 of size mismatch"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_CRC32_DATA_FAIL, "tdt crc32 of data mismatch"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_DATA_QUEUE_CLOSED, "tdt prefetch data queue closed"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_MAP_INSERT_FAILED, "map insert fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_INITIALIZE_FAILED, "prefetch init fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_INVALID_FILELIST_LINE, "invalid filelist line"); +TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_STRINGSTREAM_TO_VALUE_FAILED, "string to value fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_LIST_ID_OFFSET_LENGTH_POSITIVE_INTEGER_FAILED, + "value positive integer fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_SHUFFLE, TDT_ERROR, TDT_SHUFFLE_ILLEGAL_SHUFFLE_PARAM, "Illegal shuffle parameter"); +TDT_DEF_ERROR_CODE(MODID_TDT_SHUFFLE, TDT_ERROR, TDT_FILE_SHUFFLER_CREATE_FAILED, "Create file shuffler fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_UPLOADER_CREATE_FAILED, "Create uploader fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_DOWNLOADER_CREATE_FAILED, "Create downloader fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FOLDER_CANNOT_BE_CREATED, "folder cannot been created"); +TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_CANNOT_GET_STAT_OF_FOLDER, "cannot get stat of folder"); +TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FOLDER_IS_FILE, "folder is a file"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_OBS_CONFIG_INFORMATION_FAIL, "OBS configuration fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_OBS_CALLBACK_ARGUMENT_FAIL, "OBS callback argument fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_OBS_DOWNLOAD_CREATE_THREAD_FAILED, + "OBS download create thread fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_OBS_DOWNLOAD_FILE_FAIL, "OBS download file fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_OBS_DOWNLOAD_INIT_FAIL, "OBS download init fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_OBS_DOWNLOAD_METADATA_FAIL, "OBS download metadata fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_OBS_LIST_BUCKET_OBJECTS_FAIL, "OBS list bucket fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_MEMORY_MEMCPY_FAILED, "tdt securec memcpy fail"); +TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_MEMORY_MEMSET_FAILED, "tdt securec memset fail"); +// TDT HOST +TDT_DEF_ERROR_CODE(MODID_TDT_HOST, TDT_ERROR, TDT_HOST_INIT_FAILED, "tdt host init failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_HOST, TDT_ERROR, TDT_HOST_CHANNEL_NAME_EMPTY, "channel name is empty"); +TDT_DEF_ERROR_CODE(MODID_TDT_HOST, TDT_ERROR, TDT_HOST_ALLOCATE_MEMORY_FAILED, "allocate memory failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_HOST, TDT_ERROR, TDT_HOST_MEMORY_COPY_FAILED, "memory copy failed"); +TDT_DEF_ERROR_CODE(MODID_TDT_HOST, TDT_WARNING, TDT_HOST_UNABLE_GET_TDTDATAELEM, "can not get data element"); +TDT_DEF_ERROR_CODE(MODID_TDT_HOST, TDT_WARNING, TDT_HOST_PUSH_NOT_INIT, "push data but not init"); + +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_TUNING_DATA_TRANSFER_INIT_FAILED, + "failed to init the channel of tuning-data"); + +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_TUNING_DATA_RECEIVE_CHECK_PARA_ERROR, "the index is error"); + +TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_TUNING_DATA_TRANSFER_PARAMETER_ERROR, "the parameter is error"); +TDT_DEF_ERROR_CODE(MODID_SVM, TDT_ERROR, TDT_SVM_INIT_FAILED, "SVM driver init failed"); +TDT_DEF_ERROR_CODE(MODID_SVM, TDT_ERROR, TDT_SVM_FREE_PIN_FAILED, "SVM driver free host pin memory failed"); +TDT_DEF_ERROR_CODE(MODID_SVM, TDT_ERROR, TDT_SVM_FREE_SVM_FAILED, "SVM driver free device svm memory failed"); +TDT_DEF_ERROR_CODE(MODID_SVM, TDT_ERROR, TDT_SVM_ADD_BUFFER_MAP_FAILED, "add svm buffer info to map failed"); +#endif // INC_TDT_STATUS_H_ diff --git a/inc/tdt/tdt_device.h b/inc/tdt/tdt_device.h new file mode 100644 index 000000000..441e39238 --- /dev/null +++ b/inc/tdt/tdt_device.h @@ -0,0 +1,53 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef HOST_INNER_INC_TDT_DEVICE_H_ +#define HOST_INNER_INC_TDT_DEVICE_H_ + +#include +#include +#include +#include "tdt/data_common.h" + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +namespace tdt { +/** + * @ingroup TdtDevicePushData + * @brief Tdt device push data to queue for ops. + * + * @par Function + * Tdt device push data to queue for ops. + * + * @param channelName [IN] type #String. queue channel name + * @param items [IN] type #vector DataItem is defined in data_common.h. input data + * @retval 0 Success + * @retval OtherValues Fail + * + * @par Dependency + * @li libtdtdevice.so: Library to which the interface belongs. + * @li tdt_device.h: Header file where the interface declaration is located. + * @li data_common.h: Header file where 'DataItem' defined + * + */ +int32_t TdtDevicePushData(const std::string &channelName, std::vector &items); +} // namespace tdt +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // HOST_INNER_INC_TDT_DEVICE_H_ diff --git a/inc/tdt/tdt_host_interface.h b/inc/tdt/tdt_host_interface.h new file mode 100644 index 000000000..0e62a85c1 --- /dev/null +++ b/inc/tdt/tdt_host_interface.h @@ -0,0 +1,142 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef HOST_INNER_INC_TDT_HOST_INTERFACE_H_ +#define HOST_INNER_INC_TDT_HOST_INTERFACE_H_ + +#include +#include +#include +#include "tdt/data_common.h" + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +namespace tdt { +/** +* @ingroup TdtHostInit +* @brief Initialize the interface, start and initialize various general thread, log and other services +* +* @par Function +* Initialize the interface, start and initialize various general thread, log and other services +* +* @param deviceId [IN] type #unsigned int. Physical device ID +* @retval #0 Success +* @retval #Not 0 Fail +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tdt_host_interface.h: Header file where the interface declaration is located. +*/ +int32_t TdtHostInit(uint32_t deviceId); + +/** +* @ingroup TdtHostPushData +* @brief Blocking queue. When the queue is full, the Push interface will block. +* +* @par Function +* Blocking queue. When the queue is full, the Push interface will block. +* +* @param channelName [IN] type #String. queue channel name +* @param items [IN] type #vector DataItem is defined in data_common.h. input data +* @retval 0 Success +* @retval OtherValues 0 Fail +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tdt_host_interface.h: Header file where the interface declaration is located. +* @li data_common.h: Header file where 'DataItem' defined +*/ +int32_t TdtHostPushData(const std::string &channelName, const std::vector &item); + +/** +* @ingroup TdtHostDestroy +* @brief Notify TDT component to close related resources +* +* @par Function +* Notify TDT component to close related resources +* +* @param NA +* @retval 0 Success +* @retval OtherValues Fail +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tdt_host_interface.h: Header file where the interface declaration is located. +*/ +int32_t TdtHostDestroy(); + +/** +* @ingroup TdtHostPreparePopData +* @brief Prepare pop data from Tdt data storage queue +* +* @par Function +* Prepare pop data from Tdt data storage queue +* +* @param NA +* @retval 0 Success +* @retval OtherValues 0 Fail +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tdt_host_interface.h: Header file where the interface declaration is located. +* @li data_common.h: Header file where 'DataItem' defined +*/ +int32_t TdtHostPreparePopData(); + +/** +* @ingroup TdtHostPopData +* @brief POP data from Tdt data storage queue +* +* @par Function +* POP data from Tdt data storage queue +* +* @param channelName [IN] type #String. queue channel name +* @param items [IN] type #vector DataItem is defined in data_common.h. input data +* @retval 0 Success +* @retval OtherValues 0 Fail +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tdt_host_interface.h: Header file where the interface declaration is located. +* @li data_common.h: Header file where 'DataItem' defined +*/ +int32_t TdtHostPopData(const std::string &channelName, std::vector &item); + +/** +* @ingroup TdtHostStop +* @brief Activate the thread that reads data externally from Tdt and +* send end of sequence data so that the external thread can exit +* +* @par Function +* Activate the thread that reads data externally from Tdt and send +* end of sequence data so that the external thread can exit +* +* @param channelName [IN] type #String. queue channel name +* @retval 0 Success +* @retval OtherValues Fail +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tdt_host_interface.h: Header file where the interface declaration is located. +*/ +int32_t TdtHostStop(const std::string &channelName); +} // namespace tdt +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // HOST_INNER_INC_TDT_HOST_INTERFACE_H_ diff --git a/inc/tdt/tdt_server.h b/inc/tdt/tdt_server.h new file mode 100644 index 000000000..5d45047b2 --- /dev/null +++ b/inc/tdt/tdt_server.h @@ -0,0 +1,83 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_TDT_TDT_SERVER_H +#define INC_TDT_TDT_SERVER_H + +#include +#include "tdt/status.h" + +namespace tdt { +/** +* @ingroup TDTServerInit +* @brief Initialization functions, establish TDT Server, +* provide services such as access services, initialization and tuning channels +* +* @par Function +* Initialization functions, establish TDT Server, +* provide services such as access services, initialization and tuning channels +* +* @param deviceID [IN] type #unsigned int. Physical device ID +* @param bindCoreList [IN] type #List bindCoreList. +* device CPU core sequence, the maximum value of the core sequence should not +* exceed the total number of CPU cores +* @retval 0 Success +* @retval OtherValues 0 Fail +* +* @par Dependency +* @li libtdtserver.so: Library to which the interface belongs. +* @li tdt_server.h: Header file where the interface declaration is located. +*/ +TDT_LIB_EXPORT int32_t TDTServerInit(const uint32_t deviceID, const std::list &bindCoreList); + +/** +* @ingroup TDTServerInit +* @brief End TDT Server +* +* @par Function +* End TDT Server +* +* @param NA +* @retval 0 Success +* @retval OtherValues 0 Fail +* +* @par Dependency +* @li libtdtserver.so: Library to which the interface belongs. +* @li tdt_server.h: Header file where the interface declaration is located. +*/ +TDT_LIB_EXPORT int32_t TDTServerStop(); + +class TdtServer { + public: + private: + /** + * @ingroup TdtServer + * @brief TdtServer is a static class, all delete constructs and destructors + */ + TdtServer() = delete; + + /** + * @ingroup TdtServer + * @brief TdtServer destructor + */ + virtual ~TdtServer() = delete; + TdtServer(const TdtServer &) = delete; + TdtServer(TdtServer &&) = delete; + TdtServer &operator=(const TdtServer &) = delete; + TdtServer &operator=(TdtServer &&) = delete; +}; +}; // namespace tdt +#endif // INC_TDT_TDT_SERVER_H diff --git a/inc/tdt/train_mode.h b/inc/tdt/train_mode.h new file mode 100644 index 000000000..b136c9b3e --- /dev/null +++ b/inc/tdt/train_mode.h @@ -0,0 +1,23 @@ +/** +* @file train_mode.h +* +* Copyright (C) Huawei Technologies Co., Ltd. 2018-2019. All Rights Reserved. +* +* This program is used to get or set train mode +*/ + + +#ifndef INC_TDT_TRAIN_MODE_H +#define INC_TDT_TRAIN_MODE_H + +enum TrainMode { + NOFLAG = -1, + DPFLAG = 0, + MEFLAG = 1 +}; + +TrainMode GetTrainMode(); + +void SetTrainMode(TrainMode mode); + +#endif diff --git a/inc/tdt/tsd.h b/inc/tdt/tsd.h new file mode 100644 index 000000000..bb7d7cd40 --- /dev/null +++ b/inc/tdt/tsd.h @@ -0,0 +1,99 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_TDT_TSD_H_ +#define INC_TDT_TSD_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** +* @ingroup Tsdaemon. +* +* Identifies that HCCP or Compute_process is waiting for +* Tsdaemon to issue a shutdown command. +*/ +typedef enum { + TSD_HCCP = 0, /**< HCCP*/ + TSD_COMPUTE = 1, /**< Compute_process*/ + TSD_WAITTYPE_MAX /**< Max*/ +} TsdWaitType; + +/** +* @ingroup TsdWaitForShutdown +* @brief Wait for the TSD process to issue the shutdown command +* +* @par Function +* Wait for the TSD process to issue the shutdown command +* +* @param NA +* @param deviceID [IN] type #unsigned int. Physical device ID +* @param waitType [IN] type #TsdWaitType. HCCP or CP +* @retval 0 Success +* @retval OtherValues 0 Fail +* +* @par Dependency +* @li libtsdppc.so: Library to which the interface belongs. +* @li tsd.h: Header file where the interface declaration is located. +*/ +int32_t TsdWaitForShutdown(const uint32_t deviceId, const TsdWaitType waitType); + +/** +* @ingroup PpcClientSendHeartbeat +* @brief Ppc client send heartbeat msg to ppc server +* +* @par Function +* Ppc client send heartbeat msg to ppc server +* +* @param NA +* @param deviceID [IN] type #unsigned int. Physical device ID +* @param waitType [IN] type #TsdWaitType. HCCP or CP +* @retval 0 Success +* @retval OtherValues 0 Fail +* +* @par Dependency +* @li libtsdppc.so: Library to which the interface belongs. +* @li tsd.h: Header file where the interface declaration is located. +*/ +int32_t TsdHeartbeatSend(const uint32_t deviceId, const TsdWaitType waitType); + +/** +* @ingroup PpcClientSendAbnormalMsg +* @brief Ppc client send abnormal msg to ppc server +* +* @par Function +* Ppc client send abnormal msg to ppc server +* +* @param NA +* @param deviceID [IN] type #unsigned int. Physical device ID +* @param waitType [IN] type #TsdWaitType. HCCP or CP +* @retval 0 Success +* @retval OtherValues 0 Fail +* +* @par Dependency +* @li libtsdppc.so: Library to which the interface belongs. +* @li tsd.h: Header file where the interface declaration is located. +*/ +int32_t TsdDestory(const uint32_t deviceId, const TsdWaitType waitType); + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // INC_TDT_TSD_H_ diff --git a/inc/tdt/tsd_client.h b/inc/tdt/tsd_client.h new file mode 100644 index 000000000..7886488ea --- /dev/null +++ b/inc/tdt/tsd_client.h @@ -0,0 +1,157 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TDT_HOST_INNER_INC_TSD_CLIENT_H_ +#define TDT_HOST_INNER_INC_TSD_CLIENT_H_ + +#include +#include +#include +#include +#include "tdt/status.h" +#include "tdt/data_common.h" + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** +* @ingroup Open +* @brief Used for the Framework process to communicate with the TSDDaemon process, +* and notify TSD to complete the initialization of other processes +* +* @par Function +* Used for the Framework process to communicate with the TSDDaemon process, +* and notify TSD to complete the initialization of other processes +* +* @param phyDeviceId [IN] type #unsigned int. Physical device ID +* @param rankSize [IN] type #unsigned int. The rankSize of the training. +* The default value is 1. When rankSize is greater than 1, +* HCCP will be pulled to perform set communication related operations. +* @retval TDT_OK Success +* @retval OtherValues Failure +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tsd_client.h: Header file where the interface declaration is located. +* @li data_common.h: Header file where 'TDT_StatusT' defined +*/ +TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t rankSize); + +/** +* @ingroup Close +* @brief notify TSDClient close resource +* +* @par Function +* notify TSDClient close resource +* +* @param NA +* @retval TDT_OK Success +* @retval OtherValues Failure +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tsd_client.h: Header file where the interface declaration is located. +* @li data_common.h: Header file where 'TDT_StatusT' defined +*/ +TDT_StatusT TsdClose(const uint32_t phyDeviceId); + +/** +* @ingroup CreateCmdParameterObj +* @brief creat tsdclient func parameter obj. +* +* @par Function +* creat tsdclient func parameter obj. +* +* @param type [IN] type tdt::TsdCmdType, tsd func type. +* @param cmdParameterObj [IN] type void *, func parameter obj. +* @retval TDT_OK Success +* @retval TDT_INTERFACE_NOT_SUPPORT +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined. +* @li status.h: Header file where 'TDT_StatusT' defined +*/ +TDT_StatusT CreateCmdParameterObj(tdt::TsdCmdType type, void **cmdParameterObj); + +/** +* @ingroup SetCmdParameterObjAttribute +* @brief set cmdParameterObj input value. +* +* @par Function +* set cmdParameterObj input value. +* +* @param type [IN] type tdt::TsdCmdType, tsd func type. +* @param cmdParameterObj [IN] type void *, func parameter obj. +* @param itemType [IN] type tdt::InputItem, func input type. +* @param valuePtr [IN] type const void *, input value. +* @param valueLength [IN] type int, input value length. +* @retval TDT_OK Success +* @retval TDT_INTERFACE_NOT_SUPPORT +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined. +* @li status.h: Header file where 'TDT_StatusT' defined +*/ +TDT_StatusT SetCmdParameterObjAttribute(tdt::TsdCmdType type, void *cmdParameterObj, tdt::InputItem itemType, const void *valuePtr, int valueLength); + +/** +* @ingroup GetCmdParameterObjAttribute +* @brief set cmdParameterObj input value. +* +* @par Function +* set cmdParameterObj input value. +* +* @param type [IN] type tdt::TsdCmdType, tsd func type. +* @param cmdParameterObj [IN] type void *, func parameter obj. +* @param itemType [IN] type tdt::InputItem, func input type. +* @param valuePtr [IN] type const void *, input value. +* @param valueLength [IN] type int, input value length. +* @retval TDT_OK Success +* @retval TDT_INTERFACE_NOT_SUPPORT +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined. +* @li status.h: Header file where 'TDT_StatusT' defined +*/ +TDT_StatusT GetCmdParameterObjAttribute(tdt::TsdCmdType type, void *cmdParameterObj, tdt::InputItem itemType, void *valuePtr, int &valueLength); + +/** +* @ingroup TsdClientCmd +* @brief creat tsdclient func parameter obj. +* +* @par Function +* creat tsdclient func parameter obj. +* +* @param type [IN] type tdt::TsdCmdType, tsd func type. +* @param cmdParameterObj [IN] type void *, func parameter obj. +* @retval TDT_OK Success +* @retval TDT_INTERFACE_NOT_SUPPORT +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined. +* @li status.h: Header file where 'TDT_StatusT' defined +*/ +TDT_StatusT TsdClientCmd(tdt::TsdCmdType cmd, void *cmdParameterObj); + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // TDT_HOST_INNER_INC_TSD_CLIENT_H_ diff --git a/inc/toolchain/bbox/bbox_ddr_data.h b/inc/toolchain/bbox/bbox_ddr_data.h new file mode 100644 index 000000000..b7ef1076c --- /dev/null +++ b/inc/toolchain/bbox/bbox_ddr_data.h @@ -0,0 +1,119 @@ +/** + * @file bbox_ddr_data.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2018-2020. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#ifndef BBOX_DDR_DATA_H +#define BBOX_DDR_DATA_H + +/* ELEM_OUTPUT_BIN + * key: + * 0x00000000 FFFF FFFF FFFF FFFF + * 0x00000010 FFFF FFFF FFFF FFFF + * + * ELEM_OUTPUT_STR + * key: qwertyuiop + * + * ELEM_OUTPUT_STR_NL + * key: + * qwertyuiop + * + * ELEM_OUTPUT_HEX + * key: FFFFFFFFFFFFFFFF + * + * ELEM_OUTPUT_INT + * key: 0xFF + */ +#define ELEM_OUTPUT_CHAR_LEN 1 +#define ELEM_OUTPUT_SHORT_LEN 2 +#define ELEM_OUTPUT_INT_LEN 4 +#define ELEM_OUTPUT_LONG_LEN 8 +#define ELEM_OUTPUT_HEX_MAX_LEN 32 +#define ELEM_OUTPUT_DIVIDE_MAX_LEN 15 +#define ELEMENT_NAME_MAX_LEN 32 + +enum ModelElementType { + ELEM_OUTPUT_TYPE = 0x0, + ELEM_OUTPUT_BIN = 0x1, // name = func(offset, size); 整片二进制输出,按16进制显示 + ELEM_OUTPUT_STR = 0x2, // name = func(offset, maxSize); 换行字符串输出 + ELEM_OUTPUT_STR_NL = 0x3, // name = func(offset, maxSize); 不换行字符串输出 + ELEM_OUTPUT_HEX = 0x4, // name = func(offset, size); 每个字节按hex输出,最多输出16个字节 + ELEM_OUTPUT_INT = 0x5, // name = func(offset, size); 1,2,4,8字节整型输出 + ELEM_OUTPUT_CHAR = 0x6, // name = func(offset, size); 按长度,字符输出 + ELEM_OUTPUT_INT_CONST = 0x7, // name = value; 整型:value(size) + ELEM_OUTPUT_STR_CONST = 0x8, // name; 字符串:value(0) size(0) + ELEM_OUTPUT_NL = 0x9, // \n + ELEM_OUTPUT_DIVIDE = 0xa, // ==========name========== + ELEM_OUTPUT_MAX = 0xFFF, + + ELEM_FEATURE_TYPE = 0x1000, + ELEM_FEATURE_TABLE = 0x1001, + ELEM_FEATURE_COMPARE = 0x1002, + ELEM_FEATURE_LOOPBUF = 0x1003, + ELEM_FEATURE_CHARLOG = 0x1004, + ELEM_FEATURE_STRUCTLOG = 0x1005, + ELEM_FEATURE_MAX = 0x1FFF, + + ELEM_CTRL_TYPE = 0x2000, + ELEM_CTRL_TABLE = 0x2000, // ELEM_FEATURE_TABLE 控制类 + ELEM_CTRL_TABLE_GOTO = 0x2000, // (tableEnumType, 0);跳转表的PlaintextTableType enum类型值,非显示项 + ELEM_CTRL_TABLE_RANGE = 0x2001, // (indexOffset, indexCnt);子表开始地址和长度,非显示项 + ELEM_CTRL_COMPARE = 0x2100, // ELEM_FEATURE_COMPARE 控制类 + ELEM_CTRL_COM_VALUE = 0x2100, // (offset, size);需要比较的值所在位置和长度,非显示项 + ELEM_CTRL_CMP_JUMP_NE = 0x2101, // (compareValue, jumpIndex);如果不等于则跳转,非显示项 + ELEM_CTRL_CMP_JUMP_LE = 0x2102, // (compareValue, jumpIndex);如果不大于则跳转,非显示项 + ELEM_CTRL_CMP_JUMP_LT = 0x2103, // (compareValue, jumpIndex);如果小于则跳转,非显示项 + ELEM_CTRL_CMP_JUMP_GE = 0x2104, // (compareValue, jumpIndex);如果不小于则跳转,非显示项 + ELEM_CTRL_CMP_JUMP_GT = 0x2105, // (compareValue, jumpIndex);如果大于则跳转,非显示项 + ELEM_CTRL_CMP_JUMP_EQ = 0x2106, // (compareValue, jumpIndex);如果等于则跳转,非显示项 + ELEM_CTRL_LOOPBUF = 0x2200, // ELEM_FEATURE_LOOPBUF 控制类 + ELEM_CTRL_LPBF_HEAD = 0x2200, // value;循环buffer头结构体长度,非显示项 + ELEM_CTRL_LPBF_READ = 0x2201, // name: OutPutFunc(offset, size); 循环buffer读指针在结构体中偏移位置 + ELEM_CTRL_LPBF_WRITE = 0x2203, // name: OutPutFunc(offset, size); 循环buffer写指针在结构体中偏移位置 + ELEM_CTRL_LPBF_SIZE = 0x2202, // name: OutPutFunc(offset, size); 循环buffer总大小在结构体中偏移位置 + ELEM_CTRL_LPBF_SIZE_C = 0x2202, // name: value; 循环buffer总长度,以固定值设置 + ELEM_CTRL_LPBF_ROLLBK = 0x2203, // (offset, size); roll-back标记位,标记buffer是否翻转,非显示项 + ELEM_CTRL_MAX = 0xFFFF, +}; + +enum ElemConditionType { + ELEM_EQUAL = 1 << 0, // 0x001 + ELEM_GRATER = 1 << 1, // 0x010 + ELEM_LESS = 1 << 2, // 0x100 +}; + +struct ModelElement { + char name[ELEMENT_NAME_MAX_LEN]; + unsigned int type; + union { + unsigned int arg1; + unsigned int offset; + unsigned int value; + unsigned int index; + }; + union { + unsigned int arg2; + unsigned int size; + unsigned int maxSize; + unsigned int mark; + unsigned int indexOffset; + }; +}; + +#define MODEL_VECTOR(NAME) struct ModelElement MODEL_VECTOR_OBJECT_##NAME[] +#define MODEL_VECTOR_OBJECT(NAME) (&MODEL_VECTOR_OBJECT_##NAME[0]) +#define MODEL_VECTOR_ITEM(NAME, i) (&MODEL_VECTOR_OBJECT_##NAME[i]) +#define MODEL_VECTOR_SIZE(NAME) (sizeof(MODEL_VECTOR_OBJECT_##NAME) / sizeof(struct ModelElement)) +#define DEFINE_DATA_MODEL(name) DATA_MODEL_##name + +#define ELEMENT_CLASSIFY(type) ((type) & 0xFFFF) +#define OUTPUT_ELEMENT(type) (ELEMENT_CLASSIFY(type) > ELEM_OUTPUT_TYPE && ELEMENT_CLASSIFY(type) < ELEM_OUTPUT_MAX) +#define CTRL_ELEMENT(type) (ELEMENT_CLASSIFY(type) >= ELEM_CTRL_TYPE && ELEMENT_CLASSIFY(type) < ELEM_CTRL_MAX) +#define CMP_ELEMENT(type) (((type) & 0xFFF0) == ELEM_CTRL_COM_VALUE) +#define ELEM_CMP_CONDITION(type) ((type) & 0x000F) + +#endif diff --git a/inc/toolchain/bbox/bbox_ddr_data_cloud.h b/inc/toolchain/bbox/bbox_ddr_data_cloud.h new file mode 100644 index 000000000..925c8fab0 --- /dev/null +++ b/inc/toolchain/bbox/bbox_ddr_data_cloud.h @@ -0,0 +1,1176 @@ +/** + * @file bbox_ddr_data_cloud.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2018-2020. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#ifndef BBOX_DDR_DATA_CLOUD_H +#define BBOX_DDR_DATA_CLOUD_H + +#include "bbox_ddr_data.h" + +/* each Module need define as follows */ +#define DATA_MODEL_LPFW MODEL_VECTOR(LPFW) = { \ + {"****exc****reg**", ELEM_OUTPUT_STR, {0x00}, {0x8}}, \ + {"reset_reason", ELEM_OUTPUT_INT, {0x80}, {0x4}}, \ + {"slice", ELEM_OUTPUT_INT, {0x88}, {0x4}}, \ + {"rtc", ELEM_OUTPUT_INT, {0x90}, {0x4}}, \ + {"REGSP", ELEM_OUTPUT_INT, {0x98}, {0x8}}, \ + {"REGPC", ELEM_OUTPUT_INT, {0xa0}, {0x8}}, \ + {"REGELR", ELEM_OUTPUT_INT, {0xa8}, {0x8}}, \ + {"REGCPSR", ELEM_OUTPUT_INT, {0xb0}, {0x8}}, \ + {"REGSPSR", ELEM_OUTPUT_INT, {0xb8}, {0x8}}, \ + {"ESR", ELEM_OUTPUT_INT, {0xc0}, {0x8}}, \ + {"FAR", ELEM_OUTPUT_INT, {0xc8}, {0x8}}, \ + {"excTrace", ELEM_OUTPUT_INT, {0xd0}, {0x1}}, \ + {"ddrExc", ELEM_OUTPUT_INT, {0xd1}, {0x1}}, \ + {"irqId", ELEM_OUTPUT_INT, {0xd2}, {0x2}}, \ + {"taskId", ELEM_OUTPUT_INT, {0xd4}, {0x4}}, \ + {"**backup**reg***", ELEM_OUTPUT_STR, {0x00}, {0x8}}, \ + {"reg_backup_index", ELEM_OUTPUT_INT, {0x280}, {0x4}}, \ + {"reason_0", ELEM_OUTPUT_INT, {0x284}, {0x4}}, \ + {"reason_1", ELEM_OUTPUT_INT, {0x288}, {0x4}}, \ + {"reason_2", ELEM_OUTPUT_INT, {0x28C}, {0x4}}, \ + {"x0", ELEM_OUTPUT_INT, {0x290}, {0x8}}, \ + {"x1", ELEM_OUTPUT_INT, {0x298}, {0x8}}, \ + {"x2", ELEM_OUTPUT_INT, {0x2a0}, {0x8}}, \ + {"x3", ELEM_OUTPUT_INT, {0x2a8}, {0x8}}, \ + {"x4", ELEM_OUTPUT_INT, {0x2b0}, {0x8}}, \ + {"x5", ELEM_OUTPUT_INT, {0x2b8}, {0x8}}, \ + {"x6", ELEM_OUTPUT_INT, {0x2c0}, {0x8}}, \ + {"x7", ELEM_OUTPUT_INT, {0x2c8}, {0x8}}, \ + {"x8", ELEM_OUTPUT_INT, {0x2d0}, {0x8}}, \ + {"x9", ELEM_OUTPUT_INT, {0x2d8}, {0x8}}, \ + {"x10", ELEM_OUTPUT_INT, {0x2e0}, {0x8}}, \ + {"x11", ELEM_OUTPUT_INT, {0x2e8}, {0x8}}, \ + {"x12", ELEM_OUTPUT_INT, {0x2f0}, {0x8}}, \ + {"x13", ELEM_OUTPUT_INT, {0x2f8}, {0x8}}, \ + {"x14", ELEM_OUTPUT_INT, {0x300}, {0x8}}, \ + {"x15", ELEM_OUTPUT_INT, {0x308}, {0x8}}, \ + {"x16", ELEM_OUTPUT_INT, {0x310}, {0x8}}, \ + {"x17", ELEM_OUTPUT_INT, {0x318}, {0x8}}, \ + {"x18", ELEM_OUTPUT_INT, {0x320}, {0x8}}, \ + {"x19", ELEM_OUTPUT_INT, {0x328}, {0x8}}, \ + {"x20", ELEM_OUTPUT_INT, {0x330}, {0x8}}, \ + {"x21", ELEM_OUTPUT_INT, {0x338}, {0x8}}, \ + {"x22", ELEM_OUTPUT_INT, {0x340}, {0x8}}, \ + {"x23", ELEM_OUTPUT_INT, {0x348}, {0x8}}, \ + {"x24", ELEM_OUTPUT_INT, {0x350}, {0x8}}, \ + {"x25", ELEM_OUTPUT_INT, {0x358}, {0x8}}, \ + {"x26", ELEM_OUTPUT_INT, {0x360}, {0x8}}, \ + {"x27", ELEM_OUTPUT_INT, {0x368}, {0x8}}, \ + {"x28", ELEM_OUTPUT_INT, {0x370}, {0x8}}, \ + {"x29", ELEM_OUTPUT_INT, {0x378}, {0x8}}, \ + {"x30", ELEM_OUTPUT_INT, {0x380}, {0x8}}, \ + {"XZR", ELEM_OUTPUT_INT, {0x388}, {0x8}}, \ + {"ESR", ELEM_OUTPUT_INT, {0x390}, {0x8}}, \ + {"FAR", ELEM_OUTPUT_INT, {0x398}, {0x8}}, \ + {"SPSR", ELEM_OUTPUT_INT, {0x3a0}, {0x8}}, \ + {"ELR", ELEM_OUTPUT_INT, {0x3a8}, {0x8}}, \ + {"PC", ELEM_OUTPUT_INT, {0x3b0}, {0x8}}, \ + {"SP", ELEM_OUTPUT_INT, {0x3b8}, {0x8}}, \ + {"CPSR", ELEM_OUTPUT_INT, {0x3c0}, {0x8}}, \ + {"Exceptioncode", ELEM_OUTPUT_INT, {0x3c8}, {0x8}}, \ + {"**runtime*******", ELEM_OUTPUT_STR, {0x00}, {0x8}}, \ + {"T-AIC00", ELEM_OUTPUT_INT, {0xD380}, {0x1}}, \ + {"T-AIC01", ELEM_OUTPUT_INT, {0xD381}, {0x1}}, \ + {"T-AIC02", ELEM_OUTPUT_INT, {0xD382}, {0x1}}, \ + {"T-AIC03", ELEM_OUTPUT_INT, {0xD383}, {0x1}}, \ + {"T-AIC04", ELEM_OUTPUT_INT, {0xD384}, {0x1}}, \ + {"T-AIC05", ELEM_OUTPUT_INT, {0xD385}, {0x1}}, \ + {"T-AIC06", ELEM_OUTPUT_INT, {0xD386}, {0x1}}, \ + {"T-AIC07", ELEM_OUTPUT_INT, {0xD387}, {0x1}}, \ + {"T-AIC08", ELEM_OUTPUT_INT, {0xD388}, {0x1}}, \ + {"T-AIC09", ELEM_OUTPUT_INT, {0xD389}, {0x1}}, \ + {"T-AIC10", ELEM_OUTPUT_INT, {0xD38A}, {0x1}}, \ + {"T-AIC11", ELEM_OUTPUT_INT, {0xD38B}, {0x1}}, \ + {"T-AIC12", ELEM_OUTPUT_INT, {0xD38C}, {0x1}}, \ + {"T-AIC13", ELEM_OUTPUT_INT, {0xD38D}, {0x1}}, \ + {"T-AIC14", ELEM_OUTPUT_INT, {0xD38E}, {0x1}}, \ + {"T-AIC15", ELEM_OUTPUT_INT, {0xD38F}, {0x1}}, \ + {"T-AIC16", ELEM_OUTPUT_INT, {0xD390}, {0x1}}, \ + {"T-AIC17", ELEM_OUTPUT_INT, {0xD391}, {0x1}}, \ + {"T-AIC18", ELEM_OUTPUT_INT, {0xD392}, {0x1}}, \ + {"T-AIC19", ELEM_OUTPUT_INT, {0xD393}, {0x1}}, \ + {"T-AIC20", ELEM_OUTPUT_INT, {0xD394}, {0x1}}, \ + {"T-AIC21", ELEM_OUTPUT_INT, {0xD395}, {0x1}}, \ + {"T-AIC22", ELEM_OUTPUT_INT, {0xD396}, {0x1}}, \ + {"T-AIC23", ELEM_OUTPUT_INT, {0xD397}, {0x1}}, \ + {"T-AIC24", ELEM_OUTPUT_INT, {0xD398}, {0x1}}, \ + {"T-AIC25", ELEM_OUTPUT_INT, {0xD399}, {0x1}}, \ + {"T-AIC26", ELEM_OUTPUT_INT, {0xD39A}, {0x1}}, \ + {"T-AIC27", ELEM_OUTPUT_INT, {0xD39B}, {0x1}}, \ + {"T-AIC28", ELEM_OUTPUT_INT, {0xD39C}, {0x1}}, \ + {"T-AIC29", ELEM_OUTPUT_INT, {0xD39D}, {0x1}}, \ + {"T-AIC30", ELEM_OUTPUT_INT, {0xD39E}, {0x1}}, \ + {"T-AIC31", ELEM_OUTPUT_INT, {0xD39F}, {0x1}}, \ + {"T-AICPU0", ELEM_OUTPUT_INT, {0xD3A0}, {0x1}}, \ + {"T-AICPU1", ELEM_OUTPUT_INT, {0xD3A1}, {0x1}}, \ + {"T-AICPU2", ELEM_OUTPUT_INT, {0xD3A2}, {0x1}}, \ + {"T-AICPU3", ELEM_OUTPUT_INT, {0xD3A3}, {0x1}}, \ + {"T-HBMPHY0", ELEM_OUTPUT_INT, {0xD3A4}, {0x1}}, \ + {"T-HBMPHY1", ELEM_OUTPUT_INT, {0xD3A5}, {0x1}}, \ + {"T-HBMPHY2", ELEM_OUTPUT_INT, {0xD3A6}, {0x1}}, \ + {"T-HBMPHY3", ELEM_OUTPUT_INT, {0xD3A7}, {0x1}}, \ + {"T-DDRPHY", ELEM_OUTPUT_INT, {0xD3A8}, {0x1}}, \ + {"T-NIMBUS", ELEM_OUTPUT_INT, {0xD3A9}, {0x1}}, \ + {"T-HBMDEV0", ELEM_OUTPUT_INT, {0xD3AA}, {0x1}}, \ + {"T-HBMDEV1", ELEM_OUTPUT_INT, {0xD3AB}, {0x1}}, \ + {"T-HBMDEV2", ELEM_OUTPUT_INT, {0xD3AC}, {0x1}}, \ + {"T-HBMDEV3", ELEM_OUTPUT_INT, {0xD3AD}, {0x1}}, \ + {"T-ZONE-AIC", ELEM_OUTPUT_INT, {0xD3B0}, {0x1}}, \ + {"T-ZONE-AICPU", ELEM_OUTPUT_INT, {0xD3B1}, {0x1}}, \ + {"T-ZONE-HBMPHY", ELEM_OUTPUT_INT, {0xD3B2}, {0x1}}, \ + {"T-ZONE-DDRPHY", ELEM_OUTPUT_INT, {0xD3B3}, {0x1}}, \ + {"T-ZONE-NIMBUS", ELEM_OUTPUT_INT, {0xD3B4}, {0x1}}, \ + {"T-ZONE-HBMDEV", ELEM_OUTPUT_INT, {0xD3B5}, {0x1}}, \ + {"TMP_STATUS", ELEM_OUTPUT_INT, {0xD3B8}, {0x2}}, \ + {"EDP_AVG_CURRENT", ELEM_OUTPUT_INT, {0xD3BA}, {0x2}}, \ + {"EDP_HEART_ADDR", ELEM_OUTPUT_INT, {0xD3BC}, {0x4}}, \ + {"EDP_IRQ_COUNT", ELEM_OUTPUT_INT, {0xD3C0}, {0x4}}, \ + {"EDP_DOWN_COUNT", ELEM_OUTPUT_INT, {0xD3C4}, {0x2}}, \ + {"EDP_UP_COUNT", ELEM_OUTPUT_INT, {0xD3C6}, {0x2}}, \ + {"EDP_TIMER_COUNT", ELEM_OUTPUT_INT, {0xD3C8}, {0x4}}, \ + {"THERMAL_TIMER_CNT",ELEM_OUTPUT_INT, {0xD3CC}, {0x4}}, \ + {"VOLT_VALUE", ELEM_OUTPUT_INT, {0XD3D4}, {0x4}}, \ + {"CURRENT_VALUE", ELEM_OUTPUT_INT, {0XD3D8}, {0x4}}, \ + {"POWER_VALUE", ELEM_OUTPUT_INT, {0XD3DC}, {0x4}}, \ + {"LPNV_MAGIC", ELEM_OUTPUT_INT, {0xD3E0}, {0x4}}, \ + {"HI_EDP", ELEM_OUTPUT_INT, {0xD3E4}, {0x1}}, \ + {"EDP_SCALE", ELEM_OUTPUT_INT, {0xD3E5}, {0x1}}, \ + {"EDP_PERIOD", ELEM_OUTPUT_INT, {0xD3E6}, {0x2}}, \ + {"EDP_MAX_CURRENT", ELEM_OUTPUT_INT, {0xD3E8}, {0x2}}, \ + {"EDP_AVE_CURRENT", ELEM_OUTPUT_INT, {0xD3EA}, {0x2}}, \ + {"AVS_NV", ELEM_OUTPUT_INT, {0xD3EC}, {0x1}}, \ + {"SVFD_NV", ELEM_OUTPUT_INT, {0xD3ED}, {0x1}}, \ + {"PLLMODE", ELEM_OUTPUT_INT, {0xD3EE}, {0x1}}, \ + {"HOT_RESET", ELEM_OUTPUT_INT, {0xD3EF}, {0x1}}, \ + {"RESERVED_CTRL", ELEM_OUTPUT_INT, {0xD3F0}, {0x2}}, \ + {"RESERVED_EN", ELEM_OUTPUT_INT, {0xD3F2}, {0x2}}, \ + {"RESERVED_IMU", ELEM_OUTPUT_INT, {0xD3F4}, {0x2}}, \ + {"LP_MNTN", ELEM_OUTPUT_INT, {0xD3F6}, {0x1}}, \ + {"THERMAL_CTRL", ELEM_OUTPUT_INT, {0xD3F7}, {0x1}}, \ + {"THERMAL_SHUTDOWN", ELEM_OUTPUT_INT, {0xD3F8}, {0x1}}, \ + {"THERMAL_FREQ_STEP",ELEM_OUTPUT_INT, {0xD3F9}, {0x1}}, \ + {"THERMAL_HWRST", ELEM_OUTPUT_INT, {0xD3FA}, {0x1}}, \ + {"THERMAL_SWRST", ELEM_OUTPUT_INT, {0xD3FB}, {0x1}}, \ + {"THERMAL_HIGH_TEMP",ELEM_OUTPUT_INT, {0xD3FC}, {0x1}}, \ + {"THERMAL_NOR_TEMP", ELEM_OUTPUT_INT, {0xD3FD}, {0x1}}, \ + {"THERMAL_SLOW_PER", ELEM_OUTPUT_INT, {0xD3FE}, {0x1}}, \ + {"THERMAL_FAST_PER", ELEM_OUTPUT_INT, {0xD3FF}, {0x1}}, \ + {"THERMAL_COOL_CNT", ELEM_OUTPUT_INT, {0xD400}, {0x2}}, \ + {"UTRALSOC_DIS", ELEM_OUTPUT_INT, {0xD402}, {0x1}}, \ + {"POWERBRAKE_EN", ELEM_OUTPUT_INT, {0xD403}, {0x1}}, \ + {"POWERBRAKE_SCALE", ELEM_OUTPUT_INT, {0xD404}, {0x2}}, \ + {"FLOOR_FREQ", ELEM_OUTPUT_INT, {0xD406}, {0x2}}, \ + {"HBM_DEBUG_LEVEL", ELEM_OUTPUT_INT, {0xD410}, {0x1}}, \ + {"HBM_MP_INIT", ELEM_OUTPUT_INT, {0xD411}, {0x1}}, \ + {"HBM_ECC_EN", ELEM_OUTPUT_INT, {0xD412}, {0x1}}, \ + {"HBM_RASC_EN", ELEM_OUTPUT_INT, {0xD413}, {0x1}}, \ + {"HBM_PATROL_SCRUB", ELEM_OUTPUT_INT, {0xD414}, {0x1}}, \ + {"HBM_CLEAN_MEM_EN", ELEM_OUTPUT_INT, {0xD415}, {0x1}}, \ + {"HBM_POISON_EN", ELEM_OUTPUT_INT, {0xD416}, {0x1}}, \ + {"HBM_FW_PATCH_EN", ELEM_OUTPUT_INT, {0xD417}, {0x1}}, \ + {"HBM_FW_SFC_MRS_EN",ELEM_OUTPUT_INT, {0xD418}, {0x1}}, \ + {"HBM_TMON_EN", ELEM_OUTPUT_INT, {0xD419}, {0x1}}, \ + {"HBM_PD", ELEM_OUTPUT_INT, {0xD41A}, {0x1}}, \ + {"HBM_HIGH_TEMP", ELEM_OUTPUT_INT, {0xD41B}, {0x1}}, \ + {"HBM_RESERVED_0", ELEM_OUTPUT_INT, {0xD41C}, {0x1}}, \ + {"HBM_RESERVED_1", ELEM_OUTPUT_INT, {0xD41D}, {0x1}}, \ + {"HBM_RESERVED_2", ELEM_OUTPUT_INT, {0xD41E}, {0x1}}, \ + {"HBM_FREQ", ELEM_OUTPUT_INT, {0xD420}, {0x2}}, \ + {"DDR_DEBUG_LEVEL", ELEM_OUTPUT_INT, {0xD422}, {0x1}}, \ + {"DDR_MP_INIT", ELEM_OUTPUT_INT, {0xD423}, {0x1}}, \ + {"DDR_ECC_EN", ELEM_OUTPUT_INT, {0xD424}, {0x1}}, \ + {"DDR_RASC_EN", ELEM_OUTPUT_INT, {0xD425}, {0x1}}, \ + {"DDR_PD_EN", ELEM_OUTPUT_INT, {0xD426}, {0x1}}, \ + {"DDR_LP_EN", ELEM_OUTPUT_INT, {0xD427}, {0x1}}, \ + {"DDR_RASC_ALGO", ELEM_OUTPUT_INT, {0xD428}, {0x1}}, \ + {"DDR_CA_PARITY_EN", ELEM_OUTPUT_INT, {0xD429}, {0x1}}, \ + {"DDR_POISON_EN", ELEM_OUTPUT_INT, {0xD42A}, {0x1}}, \ + {"DDR_PATROL_SCRUB", ELEM_OUTPUT_INT, {0xD42B}, {0x1}}, \ + {"DDR_TMON_EN", ELEM_OUTPUT_INT, {0xD42C}, {0x1}}, \ + {"DDR_HIGH_TEMP", ELEM_OUTPUT_INT, {0xD42D}, {0x1}}, \ + {"DDR_RESERVED_0", ELEM_OUTPUT_INT, {0xD42E}, {0x1}}, \ + {"DDR_RESERVED_1", ELEM_OUTPUT_INT, {0xD42F}, {0x1}}, \ + {"DDR_RESERVED_2", ELEM_OUTPUT_INT, {0xD430}, {0x1}}, \ + {"DDR_FREQ", ELEM_OUTPUT_INT, {0xD432}, {0x2}}, \ + {"AVS_CALCU_VOLT", ELEM_OUTPUT_INT, {0xD444}, {0x4}}, \ + {"AVS_WORK_VOLT", ELEM_OUTPUT_INT, {0xD448}, {0x4}}, \ + {"BBOX_BASE", ELEM_OUTPUT_INT, {0xD460}, {0x8}}, \ + {"BBOX_SIZE", ELEM_OUTPUT_INT, {0xD468}, {0x4}}, \ + {"BACKUP_BBOX_ADDR", ELEM_OUTPUT_INT, {0xD470}, {0x8}}, \ + {"BACKUP_BBOX_SIZE", ELEM_OUTPUT_INT, {0xD478}, {0x4}}, \ + {"PMBUS_CHECK0", ELEM_OUTPUT_INT, {0xD4A0}, {0x2}}, \ + {"PMBUS_CHECK1", ELEM_OUTPUT_INT, {0xD4A2}, {0x2}}, \ + {"PMBUS_CHECK2", ELEM_OUTPUT_INT, {0xD4A4}, {0x2}}, \ + {"PMBUS_CHECK3", ELEM_OUTPUT_INT, {0xD4A6}, {0x2}}, \ + {"PMBUS_CHECK4", ELEM_OUTPUT_INT, {0xD4A8}, {0x2}}, \ + {"PMBUS_CHECK5", ELEM_OUTPUT_INT, {0xD4AA}, {0x2}}, \ + {"LP_STARTUP_EXCEPTION", ELEM_OUTPUT_INT, {0xD4B0}, {0x4}}, \ + {"**GIC*******", ELEM_OUTPUT_STR, {0x00}, {0x8}}, \ + {"ENABLE[0]", ELEM_OUTPUT_INT, {0x1280}, {0x4}}, \ + {"ENABLE[1]", ELEM_OUTPUT_INT, {0x1284}, {0x4}}, \ + {"ENABLE[2]", ELEM_OUTPUT_INT, {0x1288}, {0x4}}, \ + {"ENABLE[3]", ELEM_OUTPUT_INT, {0x128C}, {0x4}}, \ + {"ENABLE[4]", ELEM_OUTPUT_INT, {0x1290}, {0x4}}, \ + {"ENABLE[5]", ELEM_OUTPUT_INT, {0x1294}, {0x4}}, \ + {"ENABLE[6]", ELEM_OUTPUT_INT, {0x1298}, {0x4}}, \ + {"ENABLE[7]", ELEM_OUTPUT_INT, {0x129C}, {0x4}}, \ + {"ENABLE[8]", ELEM_OUTPUT_INT, {0x12A0}, {0x4}}, \ + {"ENABLE[9]", ELEM_OUTPUT_INT, {0x12A4}, {0x4}}, \ + {"ENABLE[10]", ELEM_OUTPUT_INT, {0x12A8}, {0x4}}, \ + {"ENABLE[11]", ELEM_OUTPUT_INT, {0x12AC}, {0x4}}, \ + {"ENABLE[12]", ELEM_OUTPUT_INT, {0x12B0}, {0x4}}, \ + {"PENDING[0]", ELEM_OUTPUT_INT, {0x12B4}, {0x4}}, \ + {"PENDING[1]", ELEM_OUTPUT_INT, {0x12B8}, {0x4}}, \ + {"PENDING[2]", ELEM_OUTPUT_INT, {0x12BC}, {0x4}}, \ + {"**IPC*******", ELEM_OUTPUT_STR, {0x00}, {0x8}}, \ + {"IPC_MBX", ELEM_OUTPUT_INT, {0x15080}, {0x4}}, \ + {"IPC_SRC", ELEM_OUTPUT_INT, {0x15084}, {0x4}}, \ + {"IPC_MODE", ELEM_OUTPUT_INT, {0x15088}, {0x4}}, \ + {"IPC_ICLR", ELEM_OUTPUT_INT, {0x1508c}, {0x4}}, \ + {"IPC_DATA0", ELEM_OUTPUT_INT, {0x15090}, {0x4}}, \ + {"IPC_DATA1", ELEM_OUTPUT_INT, {0x15094}, {0x4}}, \ + {"IPC_DATA2", ELEM_OUTPUT_INT, {0x15098}, {0x4}}, \ + {"IPC_DATA3", ELEM_OUTPUT_INT, {0x1509c}, {0x4}}, \ + {"IPC_DATA4", ELEM_OUTPUT_INT, {0x150a0}, {0x4}}, \ + {"IPC_DATA5", ELEM_OUTPUT_INT, {0x150a4}, {0x4}}, \ + {"IPC_DATA6", ELEM_OUTPUT_INT, {0x150a8}, {0x4}}, \ + {"IPC_DATA7", ELEM_OUTPUT_INT, {0x150ac}, {0x4}}, \ + {"IPC_Q0_DATA0", ELEM_OUTPUT_INT, {0x150c0}, {0x4}}, \ + {"IPC_Q0_DATA1", ELEM_OUTPUT_INT, {0x150c4}, {0x4}}, \ + {"IPC_Q0_DATA2", ELEM_OUTPUT_INT, {0x150c8}, {0x4}}, \ + {"IPC_Q0_DATA3", ELEM_OUTPUT_INT, {0x150cc}, {0x4}}, \ + {"IPC_Q0_DATA4", ELEM_OUTPUT_INT, {0x150d0}, {0x4}}, \ + {"IPC_Q0_DATA5", ELEM_OUTPUT_INT, {0x150d4}, {0x4}}, \ + {"IPC_Q0_DATA6", ELEM_OUTPUT_INT, {0x150d8}, {0x4}}, \ + {"IPC_Q0_DATA7", ELEM_OUTPUT_INT, {0x150dc}, {0x4}}, \ + {"IPC_Q0_SYSCNT", ELEM_OUTPUT_INT, {0x150e0}, {0x4}}, \ + {"IPC_Q1_DATA0", ELEM_OUTPUT_INT, {0x150e4}, {0x4}}, \ + {"IPC_Q1_DATA1", ELEM_OUTPUT_INT, {0x150e8}, {0x4}}, \ + {"IPC_Q1_DATA2", ELEM_OUTPUT_INT, {0x150ec}, {0x4}}, \ + {"IPC_Q1_DATA3", ELEM_OUTPUT_INT, {0x150f0}, {0x4}}, \ + {"IPC_Q1_DATA4", ELEM_OUTPUT_INT, {0x150f4}, {0x4}}, \ + {"IPC_Q1_DATA5", ELEM_OUTPUT_INT, {0x150f8}, {0x4}}, \ + {"IPC_Q1_DATA6", ELEM_OUTPUT_INT, {0x150fc}, {0x4}}, \ + {"IPC_Q1_DATA7", ELEM_OUTPUT_INT, {0x15100}, {0x4}}, \ + {"IPC_Q1_SYSCNT", ELEM_OUTPUT_INT, {0x15104}, {0x4}}, \ + {"IPC_Q2_DATA0", ELEM_OUTPUT_INT, {0x15108}, {0x4}}, \ + {"IPC_Q2_DATA1", ELEM_OUTPUT_INT, {0x1510c}, {0x4}}, \ + {"IPC_Q2_DATA2", ELEM_OUTPUT_INT, {0x15110}, {0x4}}, \ + {"IPC_Q2_DATA3", ELEM_OUTPUT_INT, {0x15114}, {0x4}}, \ + {"IPC_Q2_DATA4", ELEM_OUTPUT_INT, {0x15118}, {0x4}}, \ + {"IPC_Q2_DATA5", ELEM_OUTPUT_INT, {0x1511c}, {0x4}}, \ + {"IPC_Q2_DATA6", ELEM_OUTPUT_INT, {0x15120}, {0x4}}, \ + {"IPC_Q2_DATA7", ELEM_OUTPUT_INT, {0x15124}, {0x4}}, \ + {"IPC_Q2_SYSCNT", ELEM_OUTPUT_INT, {0x15128}, {0x4}}, \ + {"IPC_Q3_DATA0", ELEM_OUTPUT_INT, {0x1512c}, {0x4}}, \ + {"IPC_Q3_DATA1", ELEM_OUTPUT_INT, {0x15130}, {0x4}}, \ + {"IPC_Q3_DATA2", ELEM_OUTPUT_INT, {0x15134}, {0x4}}, \ + {"IPC_Q3_DATA3", ELEM_OUTPUT_INT, {0x15138}, {0x4}}, \ + {"IPC_Q3_DATA4", ELEM_OUTPUT_INT, {0x1513c}, {0x4}}, \ + {"IPC_Q3_DATA5", ELEM_OUTPUT_INT, {0x15140}, {0x4}}, \ + {"IPC_Q3_DATA6", ELEM_OUTPUT_INT, {0x15144}, {0x4}}, \ + {"IPC_Q3_DATA7", ELEM_OUTPUT_INT, {0x15148}, {0x4}}, \ + {"IPC_Q3_SYSCNT", ELEM_OUTPUT_INT, {0x1514c}, {0x4}}, \ + {"IPC_Q4_DATA0", ELEM_OUTPUT_INT, {0x15150}, {0x4}}, \ + {"IPC_Q4_DATA1", ELEM_OUTPUT_INT, {0x15154}, {0x4}}, \ + {"IPC_Q4_DATA2", ELEM_OUTPUT_INT, {0x15158}, {0x4}}, \ + {"IPC_Q4_DATA3", ELEM_OUTPUT_INT, {0x1515c}, {0x4}}, \ + {"IPC_Q4_DATA4", ELEM_OUTPUT_INT, {0x15160}, {0x4}}, \ + {"IPC_Q4_DATA5", ELEM_OUTPUT_INT, {0x15164}, {0x4}}, \ + {"IPC_Q4_DATA6", ELEM_OUTPUT_INT, {0x15168}, {0x4}}, \ + {"IPC_Q4_DATA7", ELEM_OUTPUT_INT, {0x1516c}, {0x4}}, \ + {"IPC_Q4_SYSCNT", ELEM_OUTPUT_INT, {0x15170}, {0x4}}, \ + {"IPC_Q5_DATA0", ELEM_OUTPUT_INT, {0x15174}, {0x4}}, \ + {"IPC_Q5_DATA1", ELEM_OUTPUT_INT, {0x15178}, {0x4}}, \ + {"IPC_Q5_DATA2", ELEM_OUTPUT_INT, {0x1517c}, {0x4}}, \ + {"IPC_Q5_DATA3", ELEM_OUTPUT_INT, {0x15180}, {0x4}}, \ + {"IPC_Q5_DATA4", ELEM_OUTPUT_INT, {0x15184}, {0x4}}, \ + {"IPC_Q5_DATA5", ELEM_OUTPUT_INT, {0x15188}, {0x4}}, \ + {"IPC_Q5_DATA6", ELEM_OUTPUT_INT, {0x1518c}, {0x4}}, \ + {"IPC_Q5_DATA7", ELEM_OUTPUT_INT, {0x15190}, {0x4}}, \ + {"IPC_Q5_SYSCNT", ELEM_OUTPUT_INT, {0x15194}, {0x4}}, \ + {"IPC_Q6_DATA0", ELEM_OUTPUT_INT, {0x15198}, {0x4}}, \ + {"IPC_Q6_DATA1", ELEM_OUTPUT_INT, {0x1519C}, {0x4}}, \ + {"IPC_Q6_DATA2", ELEM_OUTPUT_INT, {0x151A0}, {0x4}}, \ + {"IPC_Q6_DATA3", ELEM_OUTPUT_INT, {0x151A4}, {0x4}}, \ + {"IPC_Q6_DATA4", ELEM_OUTPUT_INT, {0x151A8}, {0x4}}, \ + {"IPC_Q6_DATA5", ELEM_OUTPUT_INT, {0x151AC}, {0x4}}, \ + {"IPC_Q6_DATA6", ELEM_OUTPUT_INT, {0x151B0}, {0x4}}, \ + {"IPC_Q6_DATA7", ELEM_OUTPUT_INT, {0x151B4}, {0x4}}, \ + {"IPC_Q6_SYSCNT", ELEM_OUTPUT_INT, {0x151B8}, {0x4}}, \ + {"IPC_Q7_DATA0", ELEM_OUTPUT_INT, {0x151BC}, {0x4}}, \ + {"IPC_Q7_DATA1", ELEM_OUTPUT_INT, {0x151C0}, {0x4}}, \ + {"IPC_Q7_DATA2", ELEM_OUTPUT_INT, {0x151C4}, {0x4}}, \ + {"IPC_Q7_DATA3", ELEM_OUTPUT_INT, {0x151C8}, {0x4}}, \ + {"IPC_Q7_DATA4", ELEM_OUTPUT_INT, {0x151CC}, {0x4}}, \ + {"IPC_Q7_DATA5", ELEM_OUTPUT_INT, {0x151D0}, {0x4}}, \ + {"IPC_Q7_DATA6", ELEM_OUTPUT_INT, {0x151D4}, {0x4}}, \ + {"IPC_Q7_DATA7", ELEM_OUTPUT_INT, {0x151D8}, {0x4}}, \ + {"IPC_Q7_SYSCNT", ELEM_OUTPUT_INT, {0x151DC}, {0x4}}, \ + {"IPC_Q8_DATA0", ELEM_OUTPUT_INT, {0x151E0}, {0x4}}, \ + {"IPC_Q8_DATA1", ELEM_OUTPUT_INT, {0x151E4}, {0x4}}, \ + {"IPC_Q8_DATA2", ELEM_OUTPUT_INT, {0x151E8}, {0x4}}, \ + {"IPC_Q8_DATA3", ELEM_OUTPUT_INT, {0x151EC}, {0x4}}, \ + {"IPC_Q8_DATA4", ELEM_OUTPUT_INT, {0x151F0}, {0x4}}, \ + {"IPC_Q8_DATA5", ELEM_OUTPUT_INT, {0x151F4}, {0x4}}, \ + {"IPC_Q8_DATA6", ELEM_OUTPUT_INT, {0x151F8}, {0x4}}, \ + {"IPC_Q8_DATA7", ELEM_OUTPUT_INT, {0x151FC}, {0x4}}, \ + {"IPC_Q8_SYSCNT", ELEM_OUTPUT_INT, {0x15200}, {0x4}}, \ + {"IPC_Q9_DATA0", ELEM_OUTPUT_INT, {0x15204}, {0x4}}, \ + {"IPC_Q9_DATA1", ELEM_OUTPUT_INT, {0x15208}, {0x4}}, \ + {"IPC_Q9_DATA2", ELEM_OUTPUT_INT, {0x1520C}, {0x4}}, \ + {"IPC_Q9_DATA3", ELEM_OUTPUT_INT, {0x15210}, {0x4}}, \ + {"IPC_Q9_DATA4", ELEM_OUTPUT_INT, {0x15214}, {0x4}}, \ + {"IPC_Q9_DATA5", ELEM_OUTPUT_INT, {0x15218}, {0x4}}, \ + {"IPC_Q9_DATA6", ELEM_OUTPUT_INT, {0x1521C}, {0x4}}, \ + {"IPC_Q9_DATA7", ELEM_OUTPUT_INT, {0x15220}, {0x4}}, \ + {"IPC_Q9_SYSCNT", ELEM_OUTPUT_INT, {0x15224}, {0x4}}, \ + {"***RAS*******", ELEM_OUTPUT_STR, {0x00}, {0x8}}, \ + {"RAS_DATA0", ELEM_OUTPUT_INT, {0x13c80}, {0x4}}, \ + {"RAS_DATA1", ELEM_OUTPUT_INT, {0x13c88}, {0x4}}, \ + {"***DDR_REG_DUMP*******", ELEM_OUTPUT_STR, {0x00}, {0x8}}, \ + {"DDR_REG_DUMP0", ELEM_OUTPUT_INT, {0x15580}, {0x8}}, \ + {"DDR_REG_DUMP1", ELEM_OUTPUT_INT, {0x15588}, {0x8}}, \ + {"DDR_REG_DUMP2", ELEM_OUTPUT_INT, {0x15590}, {0x8}}, \ + {"DDR_REG_DUMP3", ELEM_OUTPUT_INT, {0x15598}, {0x8}}, \ + {"DDR_REG_DUMP4", ELEM_OUTPUT_INT, {0x155A0}, {0x8}}, \ + {"DDR_REG_DUMP5", ELEM_OUTPUT_INT, {0x155A8}, {0x8}}, \ + {"DDR_REG_DUMP6", ELEM_OUTPUT_INT, {0x155B0}, {0x8}}, \ + {"DDR_REG_DUMP7", ELEM_OUTPUT_INT, {0x155B8}, {0x8}}, \ +} + +#define DATA_MODEL_LPFW_SRAM MODEL_VECTOR(LPFW_SRAM) = { \ + {"****exc****reg**", ELEM_OUTPUT_STR, {0x00}, {0x8}}, \ + {"reset_reason", ELEM_OUTPUT_INT, {0x00}, {0x4}}, \ + {"slice", ELEM_OUTPUT_INT, {0x08}, {0x4}}, \ + {"rtc", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"REGSP", ELEM_OUTPUT_INT, {0x18}, {0x8}}, \ + {"REGPC", ELEM_OUTPUT_INT, {0x20}, {0x8}}, \ + {"REGELR", ELEM_OUTPUT_INT, {0x28}, {0x8}}, \ + {"REGCPSR", ELEM_OUTPUT_INT, {0x30}, {0x8}}, \ + {"REGSPSR", ELEM_OUTPUT_INT, {0x38}, {0x8}}, \ + {"ESR", ELEM_OUTPUT_INT, {0x40}, {0x8}}, \ + {"FAR", ELEM_OUTPUT_INT, {0x48}, {0x8}}, \ + {"excTrace", ELEM_OUTPUT_INT, {0x50}, {0x1}}, \ + {"ddrExc", ELEM_OUTPUT_INT, {0x51}, {0x1}}, \ + {"irqId", ELEM_OUTPUT_INT, {0x52}, {0x2}}, \ + {"taskId", ELEM_OUTPUT_INT, {0x54}, {0x4}}, \ + {"**backup**reg***", ELEM_OUTPUT_STR, {0x00}, {0x8}}, \ + {"reg_backup_index", ELEM_OUTPUT_INT, {0x820}, {0x4}}, \ + {"reason_0", ELEM_OUTPUT_INT, {0x824}, {0x4}}, \ + {"reason_1", ELEM_OUTPUT_INT, {0x828}, {0x4}}, \ + {"reason_2", ELEM_OUTPUT_INT, {0x82C}, {0x4}}, \ + {"x0", ELEM_OUTPUT_INT, {0x830}, {0x8}}, \ + {"x1", ELEM_OUTPUT_INT, {0x848}, {0x8}}, \ + {"x2", ELEM_OUTPUT_INT, {0x850}, {0x8}}, \ + {"x3", ELEM_OUTPUT_INT, {0x858}, {0x8}}, \ + {"x4", ELEM_OUTPUT_INT, {0x860}, {0x8}}, \ + {"x5", ELEM_OUTPUT_INT, {0x868}, {0x8}}, \ + {"x6", ELEM_OUTPUT_INT, {0x870}, {0x8}}, \ + {"x7", ELEM_OUTPUT_INT, {0x878}, {0x8}}, \ + {"x8", ELEM_OUTPUT_INT, {0x880}, {0x8}}, \ + {"x9", ELEM_OUTPUT_INT, {0x888}, {0x8}}, \ + {"x10", ELEM_OUTPUT_INT, {0x890}, {0x8}}, \ + {"x11", ELEM_OUTPUT_INT, {0x898}, {0x8}}, \ + {"x12", ELEM_OUTPUT_INT, {0x8A0}, {0x8}}, \ + {"x13", ELEM_OUTPUT_INT, {0x8A8}, {0x8}}, \ + {"x14", ELEM_OUTPUT_INT, {0x8B0}, {0x8}}, \ + {"x15", ELEM_OUTPUT_INT, {0x8B8}, {0x8}}, \ + {"x16", ELEM_OUTPUT_INT, {0x8C0}, {0x8}}, \ + {"x17", ELEM_OUTPUT_INT, {0x8C8}, {0x8}}, \ + {"x18", ELEM_OUTPUT_INT, {0x8D0}, {0x8}}, \ + {"x19", ELEM_OUTPUT_INT, {0x8D8}, {0x8}}, \ + {"x20", ELEM_OUTPUT_INT, {0x8E0}, {0x8}}, \ + {"x21", ELEM_OUTPUT_INT, {0x8E8}, {0x8}}, \ + {"x22", ELEM_OUTPUT_INT, {0x8F0}, {0x8}}, \ + {"x23", ELEM_OUTPUT_INT, {0x8F8}, {0x8}}, \ + {"x24", ELEM_OUTPUT_INT, {0x900}, {0x8}}, \ + {"x25", ELEM_OUTPUT_INT, {0x908}, {0x8}}, \ + {"x26", ELEM_OUTPUT_INT, {0x910}, {0x8}}, \ + {"x27", ELEM_OUTPUT_INT, {0x918}, {0x8}}, \ + {"x28", ELEM_OUTPUT_INT, {0x920}, {0x8}}, \ + {"x29", ELEM_OUTPUT_INT, {0x928}, {0x8}}, \ + {"x30", ELEM_OUTPUT_INT, {0x930}, {0x8}}, \ + {"XZR", ELEM_OUTPUT_INT, {0x938}, {0x8}}, \ + {"ESR", ELEM_OUTPUT_INT, {0x940}, {0x8}}, \ + {"FAR", ELEM_OUTPUT_INT, {0x948}, {0x8}}, \ + {"SPSR", ELEM_OUTPUT_INT, {0x950}, {0x8}}, \ + {"ELR", ELEM_OUTPUT_INT, {0x958}, {0x8}}, \ + {"PC", ELEM_OUTPUT_INT, {0x960}, {0x8}}, \ + {"SP", ELEM_OUTPUT_INT, {0x968}, {0x8}}, \ + {"CPSR", ELEM_OUTPUT_INT, {0x970}, {0x8}}, \ + {"**GIC*******", ELEM_OUTPUT_STR, {0x00}, {0x8}}, \ + {"ENABLE[0]", ELEM_OUTPUT_INT, {0x2820}, {0x4}}, \ + {"ENABLE[1]", ELEM_OUTPUT_INT, {0x2824}, {0x4}}, \ + {"ENABLE[2]", ELEM_OUTPUT_INT, {0x2828}, {0x4}}, \ + {"ENABLE[3]", ELEM_OUTPUT_INT, {0x282C}, {0x4}}, \ + {"ENABLE[4]", ELEM_OUTPUT_INT, {0x2830}, {0x4}}, \ + {"ENABLE[5]", ELEM_OUTPUT_INT, {0x2834}, {0x4}}, \ + {"ENABLE[6]", ELEM_OUTPUT_INT, {0x2838}, {0x4}}, \ + {"ENABLE[7]", ELEM_OUTPUT_INT, {0x283C}, {0x4}}, \ + {"ENABLE[8]", ELEM_OUTPUT_INT, {0x2840}, {0x4}}, \ + {"ENABLE[9]", ELEM_OUTPUT_INT, {0x2844}, {0x4}}, \ + {"ENABLE[10]", ELEM_OUTPUT_INT, {0x2848}, {0x4}}, \ + {"ENABLE[11]", ELEM_OUTPUT_INT, {0x284C}, {0x4}}, \ + {"ENABLE[12]", ELEM_OUTPUT_INT, {0x2850}, {0x4}}, \ + {"PENDING[0]", ELEM_OUTPUT_INT, {0x2854}, {0x4}}, \ + {"PENDING[1]", ELEM_OUTPUT_INT, {0x2858}, {0x4}}, \ + {"PENDING[2]", ELEM_OUTPUT_INT, {0x285C}, {0x4}}, \ + {"**IPC*******", ELEM_OUTPUT_STR, {0x00}, {0x8}}, \ + {"IPC_MBX", ELEM_OUTPUT_INT, {0x1000}, {0x4}}, \ + {"IPC_SRC", ELEM_OUTPUT_INT, {0x1004}, {0x4}}, \ + {"IPC_MODE", ELEM_OUTPUT_INT, {0x1008}, {0x4}}, \ + {"IPC_ICLR", ELEM_OUTPUT_INT, {0x100c}, {0x4}}, \ + {"IPC_DATA0", ELEM_OUTPUT_INT, {0x1010}, {0x4}}, \ + {"IPC_DATA1", ELEM_OUTPUT_INT, {0x1014}, {0x4}}, \ + {"IPC_DATA2", ELEM_OUTPUT_INT, {0x1018}, {0x4}}, \ + {"IPC_DATA3", ELEM_OUTPUT_INT, {0x101c}, {0x4}}, \ + {"IPC_DATA4", ELEM_OUTPUT_INT, {0x1020}, {0x4}}, \ + {"IPC_DATA5", ELEM_OUTPUT_INT, {0x1024}, {0x4}}, \ + {"IPC_DATA6", ELEM_OUTPUT_INT, {0x1028}, {0x4}}, \ + {"IPC_DATA7", ELEM_OUTPUT_INT, {0x102c}, {0x4}}, \ + {"IPC_Q0_DATA0", ELEM_OUTPUT_INT, {0x1040}, {0x4}}, \ + {"IPC_Q0_DATA1", ELEM_OUTPUT_INT, {0x1044}, {0x4}}, \ + {"IPC_Q0_DATA2", ELEM_OUTPUT_INT, {0x1048}, {0x4}}, \ + {"IPC_Q0_DATA3", ELEM_OUTPUT_INT, {0x104c}, {0x4}}, \ + {"IPC_Q0_DATA4", ELEM_OUTPUT_INT, {0x1050}, {0x4}}, \ + {"IPC_Q0_DATA5", ELEM_OUTPUT_INT, {0x1054}, {0x4}}, \ + {"IPC_Q0_DATA6", ELEM_OUTPUT_INT, {0x1058}, {0x4}}, \ + {"IPC_Q0_DATA7", ELEM_OUTPUT_INT, {0x105c}, {0x4}}, \ + {"IPC_Q0_SYSCNT", ELEM_OUTPUT_INT, {0x1060}, {0x4}}, \ + {"IPC_Q1_DATA0", ELEM_OUTPUT_INT, {0x1064}, {0x4}}, \ + {"IPC_Q1_DATA1", ELEM_OUTPUT_INT, {0x1068}, {0x4}}, \ + {"IPC_Q1_DATA2", ELEM_OUTPUT_INT, {0x106c}, {0x4}}, \ + {"IPC_Q1_DATA3", ELEM_OUTPUT_INT, {0x1070}, {0x4}}, \ + {"IPC_Q1_DATA4", ELEM_OUTPUT_INT, {0x1074}, {0x4}}, \ + {"IPC_Q1_DATA5", ELEM_OUTPUT_INT, {0x1078}, {0x4}}, \ + {"IPC_Q1_DATA6", ELEM_OUTPUT_INT, {0x107c}, {0x4}}, \ + {"IPC_Q1_DATA7", ELEM_OUTPUT_INT, {0x1080}, {0x4}}, \ + {"IPC_Q1_SYSCNT", ELEM_OUTPUT_INT, {0x1084}, {0x4}}, \ + {"IPC_Q2_DATA0", ELEM_OUTPUT_INT, {0x1088}, {0x4}}, \ + {"IPC_Q2_DATA1", ELEM_OUTPUT_INT, {0x108c}, {0x4}}, \ + {"IPC_Q2_DATA2", ELEM_OUTPUT_INT, {0x1190}, {0x4}}, \ + {"IPC_Q2_DATA3", ELEM_OUTPUT_INT, {0x1194}, {0x4}}, \ + {"IPC_Q2_DATA4", ELEM_OUTPUT_INT, {0x1198}, {0x4}}, \ + {"IPC_Q2_DATA5", ELEM_OUTPUT_INT, {0x119c}, {0x4}}, \ + {"IPC_Q2_DATA6", ELEM_OUTPUT_INT, {0x11A0}, {0x4}}, \ + {"IPC_Q2_DATA7", ELEM_OUTPUT_INT, {0x11A4}, {0x4}}, \ + {"IPC_Q2_SYSCNT", ELEM_OUTPUT_INT, {0x11A8}, {0x4}}, \ + {"IPC_Q3_DATA0", ELEM_OUTPUT_INT, {0x11Ac}, {0x4}}, \ + {"IPC_Q3_DATA1", ELEM_OUTPUT_INT, {0x11B0}, {0x4}}, \ + {"IPC_Q3_DATA2", ELEM_OUTPUT_INT, {0x11B4}, {0x4}}, \ + {"IPC_Q3_DATA3", ELEM_OUTPUT_INT, {0x11B8}, {0x4}}, \ + {"IPC_Q3_DATA4", ELEM_OUTPUT_INT, {0x11Bc}, {0x4}}, \ + {"IPC_Q3_DATA5", ELEM_OUTPUT_INT, {0x11C0}, {0x4}}, \ + {"IPC_Q3_DATA6", ELEM_OUTPUT_INT, {0x11C4}, {0x4}}, \ + {"IPC_Q3_DATA7", ELEM_OUTPUT_INT, {0x11C8}, {0x4}}, \ + {"IPC_Q3_SYSCNT", ELEM_OUTPUT_INT, {0x11Cc}, {0x4}}, \ + {"IPC_Q4_DATA0", ELEM_OUTPUT_INT, {0x11D0}, {0x4}}, \ + {"IPC_Q4_DATA1", ELEM_OUTPUT_INT, {0x11D4}, {0x4}}, \ + {"IPC_Q4_DATA2", ELEM_OUTPUT_INT, {0x11D8}, {0x4}}, \ + {"IPC_Q4_DATA3", ELEM_OUTPUT_INT, {0x11Dc}, {0x4}}, \ + {"IPC_Q4_DATA4", ELEM_OUTPUT_INT, {0x11E0}, {0x4}}, \ + {"IPC_Q4_DATA5", ELEM_OUTPUT_INT, {0x11E4}, {0x4}}, \ + {"IPC_Q4_DATA6", ELEM_OUTPUT_INT, {0x11E8}, {0x4}}, \ + {"IPC_Q4_DATA7", ELEM_OUTPUT_INT, {0x11Ec}, {0x4}}, \ + {"IPC_Q4_SYSCNT", ELEM_OUTPUT_INT, {0x11F0}, {0x4}}, \ + {"IPC_Q5_DATA0", ELEM_OUTPUT_INT, {0x11F4}, {0x4}}, \ + {"IPC_Q5_DATA1", ELEM_OUTPUT_INT, {0x11F8}, {0x4}}, \ + {"IPC_Q5_DATA2", ELEM_OUTPUT_INT, {0x11Fc}, {0x4}}, \ + {"IPC_Q5_DATA3", ELEM_OUTPUT_INT, {0x1200}, {0x4}}, \ + {"IPC_Q5_DATA4", ELEM_OUTPUT_INT, {0x1204}, {0x4}}, \ + {"IPC_Q5_DATA5", ELEM_OUTPUT_INT, {0x1208}, {0x4}}, \ + {"IPC_Q5_DATA6", ELEM_OUTPUT_INT, {0x120c}, {0x4}}, \ + {"IPC_Q5_DATA7", ELEM_OUTPUT_INT, {0x1210}, {0x4}}, \ + {"IPC_Q5_SYSCNT", ELEM_OUTPUT_INT, {0x1214}, {0x4}}, \ + {"IPC_Q6_DATA0", ELEM_OUTPUT_INT, {0x1218}, {0x4}}, \ + {"IPC_Q6_DATA1", ELEM_OUTPUT_INT, {0x121C}, {0x4}}, \ + {"IPC_Q6_DATA2", ELEM_OUTPUT_INT, {0x1220}, {0x4}}, \ + {"IPC_Q6_DATA3", ELEM_OUTPUT_INT, {0x1224}, {0x4}}, \ + {"IPC_Q6_DATA4", ELEM_OUTPUT_INT, {0x1228}, {0x4}}, \ + {"IPC_Q6_DATA5", ELEM_OUTPUT_INT, {0x122C}, {0x4}}, \ + {"IPC_Q6_DATA6", ELEM_OUTPUT_INT, {0x1230}, {0x4}}, \ + {"IPC_Q6_DATA7", ELEM_OUTPUT_INT, {0x1234}, {0x4}}, \ + {"IPC_Q6_SYSCNT", ELEM_OUTPUT_INT, {0x1238}, {0x4}}, \ + {"IPC_Q7_DATA0", ELEM_OUTPUT_INT, {0x123C}, {0x4}}, \ + {"IPC_Q7_DATA1", ELEM_OUTPUT_INT, {0x1240}, {0x4}}, \ + {"IPC_Q7_DATA2", ELEM_OUTPUT_INT, {0x1244}, {0x4}}, \ + {"IPC_Q7_DATA3", ELEM_OUTPUT_INT, {0x1248}, {0x4}}, \ + {"IPC_Q7_DATA4", ELEM_OUTPUT_INT, {0x124C}, {0x4}}, \ + {"IPC_Q7_DATA5", ELEM_OUTPUT_INT, {0x1250}, {0x4}}, \ + {"IPC_Q7_DATA6", ELEM_OUTPUT_INT, {0x1254}, {0x4}}, \ + {"IPC_Q7_DATA7", ELEM_OUTPUT_INT, {0x1258}, {0x4}}, \ + {"IPC_Q7_SYSCNT", ELEM_OUTPUT_INT, {0x125C}, {0x4}}, \ + {"IPC_Q8_DATA0", ELEM_OUTPUT_INT, {0x1260}, {0x4}}, \ + {"IPC_Q8_DATA1", ELEM_OUTPUT_INT, {0x1264}, {0x4}}, \ + {"IPC_Q8_DATA2", ELEM_OUTPUT_INT, {0x1268}, {0x4}}, \ + {"IPC_Q8_DATA3", ELEM_OUTPUT_INT, {0x126C}, {0x4}}, \ + {"IPC_Q8_DATA4", ELEM_OUTPUT_INT, {0x1270}, {0x4}}, \ + {"IPC_Q8_DATA5", ELEM_OUTPUT_INT, {0x1274}, {0x4}}, \ + {"IPC_Q8_DATA6", ELEM_OUTPUT_INT, {0x1278}, {0x4}}, \ + {"IPC_Q8_DATA7", ELEM_OUTPUT_INT, {0x127C}, {0x4}}, \ + {"IPC_Q8_SYSCNT", ELEM_OUTPUT_INT, {0x1280}, {0x4}}, \ + {"IPC_Q9_DATA0", ELEM_OUTPUT_INT, {0x1284}, {0x4}}, \ + {"IPC_Q9_DATA1", ELEM_OUTPUT_INT, {0x1288}, {0x4}}, \ + {"IPC_Q9_DATA2", ELEM_OUTPUT_INT, {0x128C}, {0x4}}, \ + {"IPC_Q9_DATA3", ELEM_OUTPUT_INT, {0x1290}, {0x4}}, \ + {"IPC_Q9_DATA4", ELEM_OUTPUT_INT, {0x1294}, {0x4}}, \ + {"IPC_Q9_DATA5", ELEM_OUTPUT_INT, {0x1298}, {0x4}}, \ + {"IPC_Q9_DATA6", ELEM_OUTPUT_INT, {0x129C}, {0x4}}, \ + {"IPC_Q9_DATA7", ELEM_OUTPUT_INT, {0x12A0}, {0x4}}, \ + {"IPC_Q9_SYSCNT", ELEM_OUTPUT_INT, {0x12A4}, {0x4}}, \ + {"***DDR_REG_DUMP*******", ELEM_OUTPUT_STR, {0x00}, {0x8}}, \ + {"DDR_REG_DUMP0", ELEM_OUTPUT_INT, {0x3C60}, {0x8}}, \ + {"DDR_REG_DUMP1", ELEM_OUTPUT_INT, {0x3C68}, {0x8}}, \ + {"DDR_REG_DUMP2", ELEM_OUTPUT_INT, {0x3C70}, {0x8}}, \ + {"DDR_REG_DUMP3", ELEM_OUTPUT_INT, {0x3C78}, {0x8}}, \ + {"DDR_REG_DUMP4", ELEM_OUTPUT_INT, {0x3C80}, {0x8}}, \ + {"DDR_REG_DUMP5", ELEM_OUTPUT_INT, {0x3C88}, {0x8}}, \ + {"DDR_REG_DUMP6", ELEM_OUTPUT_INT, {0x3C90}, {0x8}}, \ + {"DDR_REG_DUMP7", ELEM_OUTPUT_INT, {0x3C98}, {0x8}}, \ +} + +#define DATA_MODEL_LPFW_PMU MODEL_VECTOR(LPFW_PMU) = { \ + {"cpuid", ELEM_OUTPUT_INT, {0x0000}, {0x1}}, \ + {"slaveid", ELEM_OUTPUT_INT, {0x0001}, {0x1}}, \ + {"loopid", ELEM_OUTPUT_INT, {0x0002}, {0x1}}, \ + {"79H", ELEM_OUTPUT_HEX, {0x0004}, {0x2}}, \ + {"7AH", ELEM_OUTPUT_HEX, {0x0006}, {0x1}}, \ + {"7BH", ELEM_OUTPUT_HEX, {0x0007}, {0x1}}, \ + {"7CH", ELEM_OUTPUT_HEX, {0x0008}, {0x1}}, \ + {"7DH", ELEM_OUTPUT_HEX, {0x0009}, {0x1}}, \ + {"7EH", ELEM_OUTPUT_HEX, {0x000A}, {0x1}}, \ + {"80H", ELEM_OUTPUT_HEX, {0x000B}, {0x1}}, \ + {"88H", ELEM_OUTPUT_HEX, {0x000C}, {0x2}}, \ + {"89H", ELEM_OUTPUT_HEX, {0x000E}, {0x2}}, \ + {"8BH", ELEM_OUTPUT_HEX, {0x0010}, {0x2}}, \ + {"8CH", ELEM_OUTPUT_HEX, {0x0012}, {0x2}}, \ + {"8DH", ELEM_OUTPUT_HEX, {0x0014}, {0x2}}, \ + {"96H", ELEM_OUTPUT_HEX, {0x0016}, {0x2}}, \ + {"97H", ELEM_OUTPUT_HEX, {0x0018}, {0x2}}, \ + {"20H", ELEM_OUTPUT_HEX, {0x001A}, {0x1}}, \ + {"21H", ELEM_OUTPUT_HEX, {0x001C}, {0x2}}, \ +} + +/* TEE module */ +#define DATA_MODEL_TEE MODEL_VECTOR(TEE) = { \ + {"tee info", ELEM_OUTPUT_STR_NL, {0x0}, {0x10000}}, \ +} + +/* TF module */ +#define DATA_MODEL_TF MODEL_VECTOR(TF) = { \ + {"x0", ELEM_OUTPUT_HEX, {0x0}, {0x8}}, \ + {"x1", ELEM_OUTPUT_HEX, {0x8}, {0x8}}, \ + {"x30", ELEM_OUTPUT_HEX, {0x10}, {0x8}}, \ + {"x2", ELEM_OUTPUT_HEX, {0x18}, {0x8}}, \ + {"x3", ELEM_OUTPUT_HEX, {0x20}, {0x8}}, \ + {"x4", ELEM_OUTPUT_HEX, {0x28}, {0x8}}, \ + {"x5", ELEM_OUTPUT_HEX, {0x30}, {0x8}}, \ + {"x6", ELEM_OUTPUT_HEX, {0x38}, {0x8}}, \ + {"x7", ELEM_OUTPUT_HEX, {0x40}, {0x8}}, \ + {"x8", ELEM_OUTPUT_HEX, {0x48}, {0x8}}, \ + {"x9", ELEM_OUTPUT_HEX, {0x50}, {0x8}}, \ + {"x10", ELEM_OUTPUT_HEX, {0x58}, {0x8}}, \ + {"x11", ELEM_OUTPUT_HEX, {0x60}, {0x8}}, \ + {"x12", ELEM_OUTPUT_HEX, {0x68}, {0x8}}, \ + {"x13", ELEM_OUTPUT_HEX, {0x70}, {0x8}}, \ + {"x14", ELEM_OUTPUT_HEX, {0x78}, {0x8}}, \ + {"x15", ELEM_OUTPUT_HEX, {0x80}, {0x8}}, \ + {"x16", ELEM_OUTPUT_HEX, {0x88}, {0x8}}, \ + {"x17", ELEM_OUTPUT_HEX, {0x90}, {0x8}}, \ + {"x18", ELEM_OUTPUT_HEX, {0x98}, {0x8}}, \ + {"x19", ELEM_OUTPUT_HEX, {0xA0}, {0x8}}, \ + {"x20", ELEM_OUTPUT_HEX, {0xA8}, {0x8}}, \ + {"x21", ELEM_OUTPUT_HEX, {0xB0}, {0x8}}, \ + {"x22", ELEM_OUTPUT_HEX, {0xB8}, {0x8}}, \ + {"x23", ELEM_OUTPUT_HEX, {0xC0}, {0x8}}, \ + {"x24", ELEM_OUTPUT_HEX, {0xC8}, {0x8}}, \ + {"x25", ELEM_OUTPUT_HEX, {0xD0}, {0x8}}, \ + {"x26", ELEM_OUTPUT_HEX, {0xD8}, {0x8}}, \ + {"x27", ELEM_OUTPUT_HEX, {0xE0}, {0x8}}, \ + {"x28", ELEM_OUTPUT_HEX, {0xE8}, {0x8}}, \ + {"x29", ELEM_OUTPUT_HEX, {0xF0}, {0x8}}, \ + {"scr_el3", ELEM_OUTPUT_HEX, {0xF8}, {0x8}}, \ + {"sctlr_el3", ELEM_OUTPUT_HEX, {0x100}, {0x8}}, \ + {"cptr_el3", ELEM_OUTPUT_HEX, {0x108}, {0x8}}, \ + {"tcr_el3", ELEM_OUTPUT_HEX, {0x110}, {0x8}}, \ + {"daif", ELEM_OUTPUT_HEX, {0x118}, {0x8}}, \ + {"mair_el3", ELEM_OUTPUT_HEX, {0x120}, {0x8}}, \ + {"spsr_el3", ELEM_OUTPUT_HEX, {0x128}, {0x8}}, \ + {"elr_el3", ELEM_OUTPUT_HEX, {0x130}, {0x8}}, \ + {"ttbr0_el3", ELEM_OUTPUT_HEX, {0x138}, {0x8}}, \ + {"esr_el3", ELEM_OUTPUT_HEX, {0x140}, {0x8}}, \ + {"far_el3", ELEM_OUTPUT_HEX, {0x148}, {0x8}}, \ +} + +/* DVPP module */ +#define DATA_MODEL_DVPP MODEL_VECTOR(DVPP) = { \ + {"dvpp info", ELEM_OUTPUT_STR_NL, {0x0}, {0x10000}}, \ +} + +/* DRIVE module */ +#define DATA_MODEL_DRIVER MODEL_VECTOR(DRIVER) = { \ + {"driver info", ELEM_OUTPUT_STR_NL, {0x0}, {0x20000}}, \ +} + +/* TS module */ +#define DATA_MODEL_TS MODEL_VECTOR(TS) = { \ + {"ts info", ELEM_OUTPUT_CHAR, {0x0}, {0x100000}}, \ +} + +/* TS module, start */ +#define DATA_MODEL_TS_START MODEL_VECTOR(TS_START) = { \ + {"ts start info", ELEM_OUTPUT_STR_NL, {0x0}, {0xC800}}, \ +} + +/* AP module, early print */ +#define DATA_MODEL_AP_EPRINT MODEL_VECTOR(AP_EPRINT) = { \ + {"early print info", ELEM_OUTPUT_STR_NL, {0x0}, {0x400}}, \ +} + +/* BIOS module */ +#define DATA_MODEL_BIOS MODEL_VECTOR(BIOS) = { \ + {"bios info", ELEM_OUTPUT_STR_NL, {0x0}, {0x50000}}, \ +} + +/* BIOS module, sram */ +#define DATA_MODEL_BIOS_SRAM MODEL_VECTOR(BIOS_SRAM) = { \ + {"LPM3_WAKE_UP_STATUS", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"DEBUG_TIME_POWERUP_DONE", ELEM_OUTPUT_INT, {0x28}, {0x4}}, \ + {"DEBUG_TIME_PERSTHIGH_DONE", ELEM_OUTPUT_INT, {0x2C}, {0x4}}, \ + {"DEBUG_TIME_PCIEPHY_DONE", ELEM_OUTPUT_INT, {0x30}, {0x4}}, \ + {"DEBUG_TIME_PHY_FIRMWARE_DONE", ELEM_OUTPUT_INT, {0x34}, {0x4}}, \ + {"DEBUG_TIME_PCIECTRL_DONE", ELEM_OUTPUT_INT, {0x38}, {0x4}}, \ + {"DEBUG_TIME_IMG_DONE", ELEM_OUTPUT_INT, {0x3C}, {0x4}}, \ + {"DEBUG_TIME_SECURE_DONE", ELEM_OUTPUT_INT, {0x40}, {0x4}}, \ + {"DEBUG_VERSION_ADDR", ELEM_OUTPUT_HEX, {0x50}, {0x10}}, \ + {"XLOADER_RESET_REG", ELEM_OUTPUT_INT, {0x200}, {0x4}}, \ + {"XLOADER_KEY_POINT", ELEM_OUTPUT_INT, {0x204}, {0x4}}, \ + {"XLOADER_TIME_POWERUP_DONE", ELEM_OUTPUT_INT, {0x228}, {0x4}}, \ + {"XLOADER_TIME_PERSTHIGH_DONE", ELEM_OUTPUT_INT, {0x22C}, {0x4}}, \ + {"XLOADER_TIME_PCIEPHY_DONE", ELEM_OUTPUT_INT, {0x230}, {0x4}}, \ + {"XLOADER_TIME_PHY_FIRMWARE_DONE", ELEM_OUTPUT_INT, {0x234}, {0x4}}, \ + {"XLOADER_TIME_PCIECTRL_DONE", ELEM_OUTPUT_INT, {0x238}, {0x4}}, \ + {"XLOADER_TIME_PCIE_DETECT_DONE", ELEM_OUTPUT_INT, {0x23C}, {0x4}}, \ + {"UEFI_LAST_KEYPOINT", ELEM_OUTPUT_INT, {0x320}, {0x4}}, \ + {"SD_LOAD_FILE_STATUS", ELEM_OUTPUT_INT, {0x350}, {0x4}}, \ +} + +/* NETWORK module */ +#define DATA_MODEL_NETWORK MODEL_VECTOR(NETWORK) = { \ + {"network info", ELEM_OUTPUT_STR, {0x0}, {0x20000}}, \ +} + +#define DATA_MODEL_IMU_BOOT_LOG MODEL_VECTOR(IMU_BOOT_LOG) = { \ + {"imu log buffer", ELEM_FEATURE_LOOPBUF, {1}, {6}}, \ + {"buf_read", ELEM_CTRL_LPBF_READ, {0x0}, {0x4}}, \ + {"buf_len", ELEM_CTRL_LPBF_SIZE, {0x4}, {0x4}}, \ + {"log_level", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"rollback", ELEM_CTRL_LPBF_ROLLBK, {0x10}, {0x4}}, \ + {"buf_write", ELEM_CTRL_LPBF_WRITE, {0x40}, {0x4}}, \ + {"buf_head_len", ELEM_CTRL_LPBF_HEAD, {0x80}, {0x4}}, \ + {"imu log data", ELEM_FEATURE_CHARLOG, {1}, {1}}, \ + {"imu log", ELEM_OUTPUT_STR_NL, {0x80}, {0xFFF80}}, \ +} + +#define DATA_MODEL_IMU_UEFI_BOOT MODEL_VECTOR(IMU_UEFI_BOOT) = { \ + {"imu log buffer", ELEM_FEATURE_LOOPBUF, {1}, {6}}, \ + {"buf_read", ELEM_CTRL_LPBF_READ, {0x0}, {0x4}}, \ + {"buf_len", ELEM_CTRL_LPBF_SIZE, {0x4}, {0x4}}, \ + {"log_level", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"rollback", ELEM_CTRL_LPBF_ROLLBK, {0x10}, {0x4}}, \ + {"buf_write", ELEM_CTRL_LPBF_WRITE, {0x40}, {0x4}}, \ + {"buf_head_len", ELEM_CTRL_LPBF_HEAD, {0x80}, {0x4}}, \ + {"imu log data", ELEM_FEATURE_CHARLOG, {1}, {1}}, \ + {"imu log", ELEM_OUTPUT_STR_NL, {0x80}, {0x2FFF80}}, \ +} + +#define DATA_MODEL_IMU_RUN_LOG MODEL_VECTOR(IMU_RUN_LOG) = { \ + {"imu log buffer", ELEM_FEATURE_LOOPBUF, {1}, {6}}, \ + {"buf_read", ELEM_CTRL_LPBF_READ, {0x0}, {0x4}}, \ + {"buf_len", ELEM_CTRL_LPBF_SIZE, {0x4}, {0x4}}, \ + {"log_level", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"rollback", ELEM_CTRL_LPBF_ROLLBK, {0x10}, {0x4}}, \ + {"buf_write", ELEM_CTRL_LPBF_WRITE, {0x40}, {0x4}}, \ + {"buf_head_len", ELEM_CTRL_LPBF_HEAD, {0x80}, {0x4}}, \ + {"imu log data", ELEM_FEATURE_CHARLOG, {1}, {1}}, \ + {"imu log", ELEM_OUTPUT_STR_NL, {0x80}, {0x3FFF80}}, \ +} + +// lpfw exception with imu log +#define DATA_MODEL_LPFW_LOG MODEL_VECTOR(LPFW_LOG) = { \ + {"IMU BOOT LOG", ELEM_CTRL_TABLE_GOTO, {0x0}, {0x100000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_IMU_BOOT_LOG}, {0x1}}, \ + {"IMU RUN LOG", ELEM_CTRL_TABLE_GOTO, {0x400000}, {0x400000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_IMU_RUN_LOG}, {0x1}}, \ +} + +// bbox kbox info +#define DATA_MODEL_BBOX_KBOX MODEL_VECTOR(BBOX_KBOX) = { \ + {"CONSOLE START", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"[console info]", ELEM_OUTPUT_STR_NL, {0x0}, {0x10000}}, \ + {"CONSOLE END", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"MESSAGE START", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"[message info]", ELEM_OUTPUT_STR_NL, {0x10000}, {0x40000}}, \ + {"MESSAGE END", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"if panic", ELEM_CTRL_COMPARE, {0x50000}, {0x1}}, \ + {"", ELEM_CTRL_CMP_JUMP_EQ, {0x0}, {0x3}}, \ + {"PANIC START", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"[panic info]", ELEM_OUTPUT_STR_NL, {0x50000}, {0x8000}}, \ + {"PANIC END", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"if emerge", ELEM_CTRL_COMPARE, {0x58000}, {0x1}}, \ + {"", ELEM_CTRL_CMP_JUMP_EQ, {0x0}, {0x3}}, \ + {"EMERGE START", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"[emerge info]", ELEM_OUTPUT_STR_NL, {0x58000}, {0x8000}}, \ + {"EMERGE END", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"if die", ELEM_CTRL_COMPARE, {0x60000}, {0x1}}, \ + {"", ELEM_CTRL_CMP_JUMP_EQ, {0x0}, {0x3}}, \ + {"DIE START", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"[die info]", ELEM_OUTPUT_STR_NL, {0x60000}, {0x20000}}, \ + {"DIE END", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ +} + +/** + * the whole space is 512k, used for histroy data record + * the struct distribution is as follows: + * +-------------------+ + * | head info(1k) | region: area: module block: + * +-------------------+ +--------------------+ +-----------------+ +-----------------+ + * | boot region |---->| first area |---->| module block |---->| block head | + * +-------------------+ +--------------------+ +-----------------+ +-----------------+ + * | run region | | second area | | module block | | block data | + * +-------------------+ +--------------------+ +-----------------+ +-----------------+ + * | reserved | | ...... | | ...... | + * +-------------------+ +--------------------+ +-----------------+ + */ +#define DATA_MODEL_HDR_BOOT_BIOS MODEL_VECTOR(HDR_BOOT_BIOS) = { \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"module id", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"if", ELEM_CTRL_COMPARE, {0xC}, {0x4}}, \ + {"is used", ELEM_CTRL_CMP_JUMP_NE, {0x1}, {0xFF}}, \ + {"err code", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"reason", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"hot reset index", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"bsbc point", ELEM_OUTPUT_INT, {0x1C}, {0x4}}, \ + {"bsbc exc point", ELEM_OUTPUT_INT, {0x20}, {0x4}}, \ + {"hboot1 point", ELEM_OUTPUT_INT, {0x24}, {0x4}}, \ + {"hboot1 exc point", ELEM_OUTPUT_INT, {0x28}, {0x4}}, \ + {"hboot2 point", ELEM_OUTPUT_INT, {0x2C}, {0x4}}, \ + {"hboot2 exc point", ELEM_OUTPUT_INT, {0x30}, {0x4}}, \ + {"[BIOS info]", ELEM_OUTPUT_STR_NL, {0x480}, {0x2780}}, \ +} + +#define DATA_MODEL_HDR_BOOT_DDR MODEL_VECTOR(HDR_BOOT_DDR) = { \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"module id", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"if", ELEM_CTRL_COMPARE, {0xC}, {0x4}}, \ + {"is used", ELEM_CTRL_CMP_JUMP_NE, {0x1}, {0xFF}}, \ + {"err code", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"reason", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"hot reset index", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"magic_begin", ELEM_OUTPUT_INT, {0x1C}, {0x4}}, \ + {"init_keypoint", ELEM_OUTPUT_INT, {0x20}, {0x4}}, \ + {"ldo8_vol", ELEM_OUTPUT_INT, {0x24}, {0x4}}, \ + {"buck3_status", ELEM_OUTPUT_INT, {0x28}, {0x4}}, \ + {"buck3_vol", ELEM_OUTPUT_INT, {0x2C}, {0x4}}, \ + {"buck5_status", ELEM_OUTPUT_INT, {0x30}, {0x4}}, \ + {"buck5_vol", ELEM_OUTPUT_INT, {0x34}, {0x4}}, \ + {"wr_test_result", ELEM_OUTPUT_INT, {0x38}, {0x4}}, \ + {"rint_status[0]", ELEM_OUTPUT_INT, {0x3C}, {0x4}}, \ + {"rint_status[1]", ELEM_OUTPUT_INT, {0x40}, {0x4}}, \ + {"rint_status[2]", ELEM_OUTPUT_INT, {0x44}, {0x4}}, \ + {"rint_status[3]", ELEM_OUTPUT_INT, {0x48}, {0x4}}, \ + {"rint_status[4]", ELEM_OUTPUT_INT, {0x4C}, {0x4}}, \ + {"rint_status[5]", ELEM_OUTPUT_INT, {0x50}, {0x4}}, \ + {"rint_status[6]", ELEM_OUTPUT_INT, {0x54}, {0x4}}, \ + {"rint_status[7]", ELEM_OUTPUT_INT, {0x58}, {0x4}}, \ + {"SOC_SCTRL_DDRRETENTION_ADDR", ELEM_OUTPUT_INT, {0x5C}, {0x4}}, \ + {"SOC_SCTRL_DDRRETENTIONCLR_ADDR", ELEM_OUTPUT_INT, {0x60}, {0x4}}, \ + {"SOC_SCTRL_DRAMRETENTION_ADDR", ELEM_OUTPUT_INT, {0x64}, {0x4}}, \ + {"SC_DDRC_0_3_RESET_REQ", ELEM_OUTPUT_INT, {0x68}, {0x4}}, \ + {"SC_DDRC_4_7_RESET_REQ", ELEM_OUTPUT_INT, {0x6C}, {0x4}}, \ + {"SC_DDRC_0_3_PACK_RESET_REQ", ELEM_OUTPUT_INT, {0x70}, {0x4}}, \ + {"SC_DDRC_4_7_PACK_RESET_REQ", ELEM_OUTPUT_INT, {0x74}, {0x4}}, \ + {"SC_DDRC_EXMBIST0_REGS_RESET_REQ", ELEM_OUTPUT_INT, {0x78}, {0x4}}, \ + {"SC_DDRC_EXMBIST1_REGS_RESET_REQ", ELEM_OUTPUT_INT, {0x7C}, {0x4}}, \ + {"SOC_SCTRL_DDRC_0_3_AO_RST_ADDR", ELEM_OUTPUT_INT, {0x80}, {0x4}}, \ + {"SOC_SCTRL_DDRC_4_7_AO_RST_ADDR", ELEM_OUTPUT_INT, {0x84}, {0x4}}, \ + {"SOC_PMCTRL_PPLLBYPASS0_ADDR", ELEM_OUTPUT_INT, {0x88}, {0x4}}, \ + {"SOC_PMCTRL_PPLLBYPASS1_ADDR", ELEM_OUTPUT_INT, {0x8C}, {0x4}}, \ + {"SOC_PMCTRL_PPLL3FCTRL_ADDR", ELEM_OUTPUT_INT, {0x90}, {0x4}}, \ + {"SOC_PMCTRL_PPLL3FCTRL_FRAC_ADDR", ELEM_OUTPUT_INT, {0x94}, {0x4}}, \ + {"SOC_PMCTRL_PPLL4FCTRL_ADDR", ELEM_OUTPUT_INT, {0x98}, {0x4}}, \ + {"SOC_PMCTRL_PPLL4FCTRL_FRAC_ADDR", ELEM_OUTPUT_INT, {0x9C}, {0x4}}, \ + {"SOC_PMCTRL_PPLLOCKSTATUS_ADDR", ELEM_OUTPUT_INT, {0x100}, {0x4}}, \ + {"SC_DDRC_0_3_BYPASS_MODE_CTRL", ELEM_OUTPUT_INT, {0x104}, {0x4}}, \ + {"SC_DDRC_4_7_BYPASS_MODE_CTRL", ELEM_OUTPUT_INT, {0x108}, {0x4}}, \ + {"SC_PLL_PROF_CFG1", ELEM_OUTPUT_INT, {0x10C}, {0x4}}, \ +} + +#define DATA_MODEL_HDR_BOOT_TEE MODEL_VECTOR(HDR_BOOT_TEE) = { \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"module id", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"if", ELEM_CTRL_COMPARE, {0xC}, {0x4}}, \ + {"is used", ELEM_CTRL_CMP_JUMP_NE, {0x1}, {0xFF}}, \ + {"err code", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"reason", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"hot reset index", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"[BOOT FATAL INFO SIZE]", ELEM_OUTPUT_INT, {0x1C}, {0x4}}, \ + {"[BOOT FATAL INFO]", ELEM_OUTPUT_STR_NL, {0x20}, {0x7E0}}, \ + {"[run point tail]", ELEM_OUTPUT_INT, {0x800}, {0x4}}, \ + {"[boot point info]", ELEM_OUTPUT_HEX, {0x804}, {0x20}}, \ + {"[run point info]", ELEM_OUTPUT_HEX, {0x884}, {0x20}}, \ + {"[last log size]", ELEM_OUTPUT_INT, {0xC00}, {0x4}}, \ + {"[last log data]", ELEM_OUTPUT_STR_NL, {0xC04}, {0x3FC}}, \ +} + +#define DATA_MODEL_HDR_BOOT_ATF MODEL_VECTOR(HDR_BOOT_ATF) = { \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"module id", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"if", ELEM_CTRL_COMPARE, {0xC}, {0x4}}, \ + {"is used", ELEM_CTRL_CMP_JUMP_NE, {0x1}, {0xFF}}, \ + {"err code", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"reason", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"hot reset index", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"[ATF info]", ELEM_OUTPUT_STR_NL, {0x1C}, {0xFE4}}, \ +} + +#define DATA_MODEL_HDR_BOOT_AREA MODEL_VECTOR(HDR_BOOT_AREA) = { \ + {"BIOS INFO", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"HDR_BOOT_BIOS", ELEM_CTRL_TABLE_GOTO, {0x0}, {0x3000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_BIOS}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"DDR INFO", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"HDR_BOOT_DDR", ELEM_CTRL_TABLE_GOTO, {0x3000}, {0x1000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_DDR}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"TEE INFO", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"HDR_BOOT_TEE", ELEM_CTRL_TABLE_GOTO, {0x4000}, {0x1000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_TEE}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"ATF INFO", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"HDR_BOOT_ATF", ELEM_CTRL_TABLE_GOTO, {0x5000}, {0x1000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_ATF}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ +} + +#define DATA_MODEL_HDR_RUN_OS MODEL_VECTOR(HDR_RUN_OS) = { \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"module id", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"if", ELEM_CTRL_COMPARE, {0xC}, {0x4}}, \ + {"is used", ELEM_CTRL_CMP_JUMP_NE, {0x1}, {0xFF}}, \ + {"err code", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"reason", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"hot reset index", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"[OS info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {"event_flag", ELEM_OUTPUT_INT, {0x1C}, {0x4}}, \ + {"dump_flag", ELEM_OUTPUT_INT, {0x20}, {0x4}}, \ + {"err num", ELEM_OUTPUT_INT, {0x24}, {0x4}}, \ + {"[OS log]", ELEM_OUTPUT_STR_NL, {0x100}, {0xF00}}, \ +} + +#define DATA_MODEL_HDR_RUN_LPFW MODEL_VECTOR(HDR_RUN_LPFW) = { \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"module id", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"if", ELEM_CTRL_COMPARE, {0xC}, {0x4}}, \ + {"is used", ELEM_CTRL_CMP_JUMP_NE, {0x1}, {0x200}}, \ + {"err code", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"reason", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"hot reset index", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"[LPFW log]", ELEM_OUTPUT_STR_NL, {0x40}, {0x400}}, \ +} + +#define DATA_MODEL_HDR_RUN_TEE MODEL_VECTOR(HDR_RUN_TEE) = { \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"module id", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"if", ELEM_CTRL_COMPARE, {0xC}, {0x4}}, \ + {"is used", ELEM_CTRL_CMP_JUMP_NE, {0x1}, {0xFF}}, \ + {"err code", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"reason", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"hot reset index", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"[RUN FATAL INFO SIZE]", ELEM_OUTPUT_INT, {0x1C}, {0x4}}, \ + {"[RUN FATAL INFO]", ELEM_OUTPUT_STR_NL, {0x20}, {0x7E0}}, \ +} + +#define DATA_MODEL_HDR_RUN_ATF MODEL_VECTOR(HDR_RUN_ATF) = {\ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"module id", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"if", ELEM_CTRL_COMPARE, {0xC}, {0x4}}, \ + {"is used", ELEM_CTRL_CMP_JUMP_NE, {0x1}, {0xFF}}, \ + {"err code", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"reason", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"hot reset index", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"[ATF info]", ELEM_OUTPUT_STR_NL, {0x1C}, {0x7E4}}, \ +} + +#define DATA_MODEL_HDR_RUN_AREA MODEL_VECTOR(HDR_RUN_AREA) = { \ + {"TEE INFO", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"HDR_RUN_TEE", ELEM_CTRL_TABLE_GOTO, {0x0}, {0x800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_TEE}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"ATF INFO", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"HDR_RUN_ATF", ELEM_CTRL_TABLE_GOTO, {0x800}, {0x800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_ATF}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"LPFW INFO", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"HDR_RUN_LPFW", ELEM_CTRL_TABLE_GOTO, {0x1000}, {0x1000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_LPFW}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"OS INFO", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"HDR_RUN_OS", ELEM_CTRL_TABLE_GOTO, {0x2000}, {0x1000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_OS}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ +} + +#define DATA_MODEL_HDR_BOOT MODEL_VECTOR(HDR_BOOT) = { \ + {"area 0", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x0}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 1", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x7800}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 2", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0xF000}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 3", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x16800}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 4", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x1E000}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 5", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x25800}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 6", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x2D000}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ +} + +#define DATA_MODEL_HDR_RUN MODEL_VECTOR(HDR_RUN) = { \ + {"area 0", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_RUN_AREA", ELEM_CTRL_TABLE_GOTO, {0x0}, {0x3C00}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 1", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_RUN_AREA", ELEM_CTRL_TABLE_GOTO, {0x3C00}, {0x3C00}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 2", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_RUN_AREA", ELEM_CTRL_TABLE_GOTO, {0x7800}, {0x3C00}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 3", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_RUN_AREA", ELEM_CTRL_TABLE_GOTO, {0xB400}, {0x3C00}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 4", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_RUN_AREA", ELEM_CTRL_TABLE_GOTO, {0xF000}, {0x3C00}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 5", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_RUN_AREA", ELEM_CTRL_TABLE_GOTO, {0x12C00}, {0x3C00}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 6", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_RUN_AREA", ELEM_CTRL_TABLE_GOTO, {0x16800}, {0x3C00}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_AREA}, {0x1}}, \ +} + +#define DATA_MODEL_HDR_BOOT_INFO MODEL_VECTOR(HDR_BOOT_INFO) = { \ + {"region offset", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"region size", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"region config", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"total area", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"history area", ELEM_OUTPUT_INT, {0xC}, {0x4}}, \ + {"error area", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"area config:", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" used module count", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"module config:", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" module 0 offset", ELEM_OUTPUT_INT, {0x1C}, {0x4}}, \ + {" module 0 size", ELEM_OUTPUT_INT, {0x20}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {" module 1 offset", ELEM_OUTPUT_INT, {0x24}, {0x4}}, \ + {" module 1 size", ELEM_OUTPUT_INT, {0x28}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {" module 2 offset", ELEM_OUTPUT_INT, {0x2C}, {0x4}}, \ + {" module 2 size", ELEM_OUTPUT_INT, {0x30}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {" module 3 offset", ELEM_OUTPUT_INT, {0x34}, {0x4}}, \ + {" module 3 size", ELEM_OUTPUT_INT, {0x38}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"region control", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"area index", ELEM_OUTPUT_INT, {0x6C}, {0x4}}, \ + {"error area count", ELEM_OUTPUT_INT, {0x70}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 0 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0x74}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0x78}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0x7C}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0x80}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0x84}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0x88}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 1 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0x8C}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0x90}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0x94}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0x98}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0x9C}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xA0}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 2 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xA4}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xA8}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xAC}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0xB0}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0xB4}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xB8}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 3 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xBC}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xC0}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xC4}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0xC8}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0xCC}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xD0}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 4 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xD4}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xD8}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xDC}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0xE0}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0xE4}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xE8}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 5 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xEC}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xF0}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xF4}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0xF8}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0xFC}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0x100}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 6 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0x104}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0x108}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0x10C}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0x110}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0x114}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0x118}, {0x4}}, \ +} + +#define DATA_MODEL_HDR_RUN_INFO MODEL_VECTOR(HDR_RUN_INFO) = { \ + {"region offset", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"region size", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"region config", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"total area", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"history area", ELEM_OUTPUT_INT, {0xC}, {0x4}}, \ + {"error area", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"area config:", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" used module count", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"module config:", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" module 0 offset", ELEM_OUTPUT_INT, {0x1C}, {0x4}}, \ + {" module 0 size", ELEM_OUTPUT_INT, {0x20}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {" module 1 offset", ELEM_OUTPUT_INT, {0x24}, {0x4}}, \ + {" module 1 size", ELEM_OUTPUT_INT, {0x28}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {" module 2 offset", ELEM_OUTPUT_INT, {0x2C}, {0x4}}, \ + {" module 2 size", ELEM_OUTPUT_INT, {0x30}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {" module 3 offset", ELEM_OUTPUT_INT, {0x34}, {0x4}}, \ + {" module 3 size", ELEM_OUTPUT_INT, {0x38}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"region control", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"area index", ELEM_OUTPUT_INT, {0x6C}, {0x4}}, \ + {"error area count", ELEM_OUTPUT_INT, {0x70}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 0 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0x74}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0x78}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0x7C}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0x88}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 1 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0x8C}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0x90}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0x94}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xA0}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 2 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xA4}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xA8}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xAC}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xB8}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 3 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xBC}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xC0}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xC4}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xD0}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 4 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xD4}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xD8}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xDC}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xE8}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 5 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xEC}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xF0}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xF4}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0x100}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 6 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0x104}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0x108}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0x10C}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0x118}, {0x4}}, \ +} + +#define DATA_MODEL_HDR MODEL_VECTOR(HDR) = { \ + {"head info", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"reset count", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"boot region", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_INFO", ELEM_CTRL_TABLE_GOTO, {0XC}, {0x168}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_INFO}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"HDR_BOOT", ELEM_CTRL_TABLE_GOTO, {0x400}, {0xA000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"run region", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_RUN_INFO", ELEM_CTRL_TABLE_GOTO, {0x170}, {0x164}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_INFO}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"HDR_RUN", ELEM_CTRL_TABLE_GOTO, {0x4B400}, {0xA000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN}, {0x1}}, \ +} + +#endif // BBOX_DDR_DATA_CLOUD_H diff --git a/inc/toolchain/bbox/bbox_ddr_data_dc.h b/inc/toolchain/bbox/bbox_ddr_data_dc.h new file mode 100644 index 000000000..2ea18f26a --- /dev/null +++ b/inc/toolchain/bbox/bbox_ddr_data_dc.h @@ -0,0 +1,451 @@ +/** + * @file bbox_ddr_data_dc.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2018-2020. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#ifndef BBOX_DDR_DATA_DC_H +#define BBOX_DDR_DATA_DC_H + +#include "bbox_ddr_data.h" + +/* each Module need define as follows */ +/* LPM module */ +#define DATA_MODEL_LPM_START MODEL_VECTOR(LPM_START) = { \ + {"start_steps", ELEM_OUTPUT_STR_NL, {0x0}, {0x800}}, \ +} + +#define DATA_MODEL_LPM MODEL_VECTOR(LPM) = { \ + {"****exc****reg**", ELEM_OUTPUT_STR, {0x0}, {0x8}}, \ + {"fault_regs_dfsr", ELEM_OUTPUT_INT, {0x1410}, {0x4}}, \ + {"fault_regs_ifsr", ELEM_OUTPUT_INT, {0x1414}, {0x4}}, \ + {"fault_regs_adfsr", ELEM_OUTPUT_INT, {0x1418}, {0x4}}, \ + {"fault_regs_aifsr", ELEM_OUTPUT_INT, {0x141c}, {0x4}}, \ + {"fault_regs_dfar", ELEM_OUTPUT_INT, {0x1420}, {0x4}}, \ + {"fault_regs_ifar", ELEM_OUTPUT_INT, {0x1424}, {0x4}}, \ + {"usr_regs_r13", ELEM_OUTPUT_INT, {0x1428}, {0x4}}, \ + {"usr_regs_r14", ELEM_OUTPUT_INT, {0x142c}, {0x4}}, \ + {"svc_regs_regs_r13", ELEM_OUTPUT_INT, {0x1430}, {0x4}}, \ + {"svc_regs_regs_r14", ELEM_OUTPUT_INT, {0x1434}, {0x4}}, \ + {"svc_regs_regs_spsr", ELEM_OUTPUT_INT, {0x1438}, {0x4}}, \ + {"irq_regs_regs_r13", ELEM_OUTPUT_INT, {0x143c}, {0x4}}, \ + {"irq_regs_regs_r14", ELEM_OUTPUT_INT, {0x1440}, {0x4}}, \ + {"irq_regs_regs_spsr", ELEM_OUTPUT_INT, {0x1444}, {0x4}}, \ + {"fiq_regs_regs_r13", ELEM_OUTPUT_INT, {0x1448}, {0x4}}, \ + {"fiq_regs_regs_r14", ELEM_OUTPUT_INT, {0x144c}, {0x4}}, \ + {"fiq_regs_regs_spsr", ELEM_OUTPUT_INT, {0x1450}, {0x4}}, \ + {"und_regs_regs_r13", ELEM_OUTPUT_INT, {0x1454}, {0x4}}, \ + {"und_regs_regs_r14", ELEM_OUTPUT_INT, {0x1458}, {0x4}}, \ + {"und_regs_regs_spsr", ELEM_OUTPUT_INT, {0x145c}, {0x4}}, \ + {"abort_regs_regs_r13", ELEM_OUTPUT_INT, {0x1460}, {0x4}}, \ + {"abort_regs_regs_r14", ELEM_OUTPUT_INT, {0x1464}, {0x4}}, \ + {"abort_regs_regs_spsr", ELEM_OUTPUT_INT, {0x1468}, {0x4}}, \ + {"data_regs_r1", ELEM_OUTPUT_INT, {0x146c}, {0x4}}, \ + {"data_regs_r2", ELEM_OUTPUT_INT, {0x1470}, {0x4}}, \ + {"data_regs_r3", ELEM_OUTPUT_INT, {0x1474}, {0x4}}, \ + {"data_regs_r4", ELEM_OUTPUT_INT, {0x1478}, {0x4}}, \ + {"data_regs_r5", ELEM_OUTPUT_INT, {0x147c}, {0x4}}, \ + {"data_regs_r6", ELEM_OUTPUT_INT, {0x1480}, {0x4}}, \ + {"data_regs_r7", ELEM_OUTPUT_INT, {0x1484}, {0x4}}, \ + {"data_regs_r8", ELEM_OUTPUT_INT, {0x1488}, {0x4}}, \ + {"data_regs_r9", ELEM_OUTPUT_INT, {0x148c}, {0x4}}, \ + {"data_regs_r10", ELEM_OUTPUT_INT, {0x1490}, {0x4}}, \ + {"data_regs_r11", ELEM_OUTPUT_INT, {0x1494}, {0x4}}, \ + {"data_regs_r12", ELEM_OUTPUT_INT, {0x1498}, {0x4}}, \ + {"data_regs_r13", ELEM_OUTPUT_INT, {0x149c}, {0x4}}, \ + {"data_regs_r14", ELEM_OUTPUT_INT, {0x14a0}, {0x4}}, \ + {"data_regs_r15", ELEM_OUTPUT_INT, {0x14a4}, {0x4}}, \ + {"prog_regs_cpsr", ELEM_OUTPUT_INT, {0x14a8}, {0x4}}, \ + {"prog_regs_spsr", ELEM_OUTPUT_INT, {0x14ac}, {0x4}}, \ + {"log", ELEM_OUTPUT_STR_NL, {0xDC80}, {0x400}}, \ +} + +#define DATA_MODEL_LPM_PMU MODEL_VECTOR(LPM_PMU) = { \ + {"cpuid", ELEM_OUTPUT_INT, {0x0000}, {0x1}}, \ + {"2CCH", ELEM_OUTPUT_HEX, {0x0001}, {0x1}}, \ + {"2CDH", ELEM_OUTPUT_HEX, {0x0002}, {0x1}}, \ + {"2CEH", ELEM_OUTPUT_HEX, {0x0003}, {0x1}}, \ + {"2CFH", ELEM_OUTPUT_HEX, {0x0004}, {0x1}}, \ + {"2D0H", ELEM_OUTPUT_HEX, {0x0005}, {0x1}}, \ + {"2D1H", ELEM_OUTPUT_HEX, {0x0006}, {0x1}}, \ + {"2D2H", ELEM_OUTPUT_HEX, {0x0007}, {0x1}}, \ + {"2D3H", ELEM_OUTPUT_HEX, {0x0008}, {0x1}}, \ + {"2D4H", ELEM_OUTPUT_HEX, {0x0009}, {0x1}}, \ + {"2D5H", ELEM_OUTPUT_HEX, {0x000A}, {0x1}}, \ + {"2D6H", ELEM_OUTPUT_HEX, {0x000B}, {0x1}}, \ + {"2D7H", ELEM_OUTPUT_HEX, {0x000C}, {0x1}}, \ + {"2D8H", ELEM_OUTPUT_HEX, {0x000D}, {0x1}}, \ + {"2D9H", ELEM_OUTPUT_HEX, {0x000E}, {0x1}}, \ + {"2DAH", ELEM_OUTPUT_HEX, {0x000F}, {0x1}}, \ + {"2DBH", ELEM_OUTPUT_HEX, {0x0010}, {0x1}}, \ + {"2DCH", ELEM_OUTPUT_HEX, {0x0011}, {0x1}}, \ + {"2DDH", ELEM_OUTPUT_HEX, {0x0012}, {0x1}}, \ + {"2DEH", ELEM_OUTPUT_HEX, {0x0013}, {0x1}}, \ + {"2DFH", ELEM_OUTPUT_HEX, {0x0014}, {0x1}}, \ + {"2E0H", ELEM_OUTPUT_HEX, {0x0015}, {0x1}}, \ + {"2E1H", ELEM_OUTPUT_HEX, {0x0016}, {0x1}}, \ + {"2E2H", ELEM_OUTPUT_HEX, {0x0017}, {0x1}}, \ + {"2E3H", ELEM_OUTPUT_HEX, {0x0018}, {0x1}}, \ + {"2E4H", ELEM_OUTPUT_HEX, {0x0019}, {0x1}}, \ + {"2E5H", ELEM_OUTPUT_HEX, {0x001A}, {0x1}}, \ + {"2E6H", ELEM_OUTPUT_HEX, {0x001B}, {0x1}}, \ + {"2E7H", ELEM_OUTPUT_HEX, {0x001C}, {0x1}}, \ + {"slave0", ELEM_OUTPUT_HEX, {0x0020}, {0x1}}, \ + {"E0", ELEM_OUTPUT_HEX, {0x0021}, {0x1}}, \ + {"E1", ELEM_OUTPUT_HEX, {0x0022}, {0x1}}, \ + {"E2", ELEM_OUTPUT_HEX, {0x0023}, {0x1}}, \ + {"reserve", ELEM_OUTPUT_HEX, {0x0024}, {0x1}}, \ + {"reserve", ELEM_OUTPUT_HEX, {0x0025}, {0x1}}, \ + {"reserve", ELEM_OUTPUT_HEX, {0x0026}, {0x1}}, \ + {"slave1", ELEM_OUTPUT_HEX, {0x0040}, {0x1}}, \ + {"E0", ELEM_OUTPUT_HEX, {0x0041}, {0x1}}, \ + {"E1", ELEM_OUTPUT_HEX, {0x0042}, {0x1}}, \ + {"E2", ELEM_OUTPUT_HEX, {0x0043}, {0x1}}, \ + {"reserve", ELEM_OUTPUT_HEX, {0x0044}, {0x1}}, \ + {"reserve", ELEM_OUTPUT_HEX, {0x0045}, {0x1}}, \ + {"reserve", ELEM_OUTPUT_HEX, {0x0046}, {0x1}}, \ + {"aicSlave", ELEM_OUTPUT_HEX, {0x0060}, {0x1}}, \ + {"0x79", ELEM_OUTPUT_HEX, {0x0061}, {0x2}}, \ + {"0x7A", ELEM_OUTPUT_HEX, {0x0063}, {0x1}}, \ + {"0x7B", ELEM_OUTPUT_HEX, {0x0064}, {0x1}}, \ + {"0x7C", ELEM_OUTPUT_HEX, {0x0065}, {0x1}}, \ + {"0x7D", ELEM_OUTPUT_HEX, {0x0066}, {0x1}}, \ + {"0x7E", ELEM_OUTPUT_HEX, {0x0067}, {0x1}}, \ + {"dvppSlave", ELEM_OUTPUT_HEX, {0x0080}, {0x1}}, \ + {"0x79", ELEM_OUTPUT_HEX, {0x0081}, {0x2}}, \ + {"0x7A", ELEM_OUTPUT_HEX, {0x0083}, {0x1}}, \ + {"0x7B", ELEM_OUTPUT_HEX, {0x0084}, {0x1}}, \ + {"0x7C", ELEM_OUTPUT_HEX, {0x0085}, {0x1}}, \ + {"0x7D", ELEM_OUTPUT_HEX, {0x0086}, {0x1}}, \ + {"0x7E", ELEM_OUTPUT_HEX, {0x0087}, {0x1}}, \ + {"cpuSlave", ELEM_OUTPUT_HEX, {0x00A0}, {0x1}}, \ + {"0x79", ELEM_OUTPUT_HEX, {0x00A1}, {0x2}}, \ + {"0x7A", ELEM_OUTPUT_HEX, {0x00A3}, {0x1}}, \ + {"0x7B", ELEM_OUTPUT_HEX, {0x00A4}, {0x1}}, \ + {"0x7C", ELEM_OUTPUT_HEX, {0x00A5}, {0x1}}, \ + {"0x7D", ELEM_OUTPUT_HEX, {0x00A6}, {0x1}}, \ + {"0x7E", ELEM_OUTPUT_HEX, {0x00A7}, {0x1}}, \ +} + +/* (LPM)DDR module */ +#define DATA_MODEL_DDR_SRAM MODEL_VECTOR(DDR_SRAM) = { \ + {"dram type", ELEM_OUTPUT_INT, {0x00}, {0x4}}, \ + {"dram size", ELEM_OUTPUT_INT, {0x04}, {0x4}}, \ + {"rank num", ELEM_OUTPUT_INT, {0x08}, {0x4}}, \ + {"chn bit map", ELEM_OUTPUT_INT, {0x0c}, {0x4}}, \ + {"manufacteryId[0]", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"manufacteryId[1]", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"manufacteryId[2]", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"manufacteryId[3]", ELEM_OUTPUT_INT, {0x1c}, {0x4}}, \ + {"manufacteryId[4]", ELEM_OUTPUT_INT, {0x20}, {0x4}}, \ + {"manufacteryId[5]", ELEM_OUTPUT_INT, {0x24}, {0x4}}, \ + {"manufacteryId[6]", ELEM_OUTPUT_INT, {0x28}, {0x4}}, \ + {"manufacteryId[7]", ELEM_OUTPUT_INT, {0x2c}, {0x4}}, \ + {"manufacteryId[8]", ELEM_OUTPUT_INT, {0x30}, {0x4}}, \ + {"manufacteryId[9]", ELEM_OUTPUT_INT, {0x34}, {0x4}}, \ + {"manufacteryId[10]", ELEM_OUTPUT_INT, {0x38}, {0x4}}, \ + {"manufacteryId[11]", ELEM_OUTPUT_INT, {0x3c}, {0x4}}, \ + {"manufacteryId[12]", ELEM_OUTPUT_INT, {0x40}, {0x4}}, \ + {"manufacteryId[13]", ELEM_OUTPUT_INT, {0x44}, {0x4}}, \ + {"manufacteryId[14]", ELEM_OUTPUT_INT, {0x48}, {0x4}}, \ + {"manufacteryId[15]", ELEM_OUTPUT_INT, {0x4c}, {0x4}}, \ + {"manufacteryId[16]", ELEM_OUTPUT_INT, {0x50}, {0x4}}, \ + {"manufacteryId[17]", ELEM_OUTPUT_INT, {0x54}, {0x4}}, \ + {"manufacteryId[18]", ELEM_OUTPUT_INT, {0x58}, {0x4}}, \ + {"manufacteryId[19]", ELEM_OUTPUT_INT, {0x5c}, {0x4}}, \ + {"manufacteryId[20]", ELEM_OUTPUT_INT, {0x60}, {0x4}}, \ + {"manufacteryId[21]", ELEM_OUTPUT_INT, {0x64}, {0x4}}, \ + {"manufacteryId[22]", ELEM_OUTPUT_INT, {0x68}, {0x4}}, \ + {"manufacteryId[23]", ELEM_OUTPUT_INT, {0x6c}, {0x4}}, \ + {"iecc", ELEM_OUTPUT_INT, {0x70}, {0x4}}, \ + {"swap type", ELEM_OUTPUT_INT, {0x74}, {0x4}}, \ + {"freq", ELEM_OUTPUT_INT, {0x78}, {0x4}}, \ +} + +/* TEE module */ +#define DATA_MODEL_TEE MODEL_VECTOR(TEE) = { \ + {"tee info", ELEM_OUTPUT_CHAR, {0x0}, {0x20000}}, \ +} + +/* DVPP module */ +#define DATA_MODEL_DVPP MODEL_VECTOR(DVPP) = { \ + {"dvpp info", ELEM_OUTPUT_STR_NL, {0x0}, {0x10000}}, \ +} + +/* DRIVE module */ +#define DATA_MODEL_DRIVER MODEL_VECTOR(DRIVER) = { \ + {"driver info", ELEM_OUTPUT_STR_NL, {0x0}, {0x20000}}, \ +} + +/* TS module, start */ +#define DATA_MODEL_TS_START MODEL_VECTOR(TS_START) = { \ + {"ts start info", ELEM_OUTPUT_STR_NL, {0x0}, {0x19000}}, \ +} + +/* TS module */ +#define DATA_MODEL_TS MODEL_VECTOR(TS) = { \ + {"ts info", ELEM_OUTPUT_STR_NL, {0x0}, {0x1E6E00}}, \ +} + +/* HSM module */ +#define DATA_MODEL_HSM MODEL_VECTOR(HSM) = { \ + {"hsm info", ELEM_OUTPUT_STR_NL, {0x0}, {0x1000}}, \ +} + +/* HSM module, start */ +#define DATA_MODEL_HSM_START MODEL_VECTOR(HSM_START) = { \ + {"hsm start info", ELEM_OUTPUT_STR_NL, {0x0}, {0x1000}}, \ +} + +/* DP module */ +#define DATA_MODEL_DP MODEL_VECTOR(DP) = { \ + {"os kbox info", ELEM_OUTPUT_STR_NL, {0x3000}, {0x4FD000}}, \ +} + +// lpm common log data +#define DATA_MODEL_LPM_LOG MODEL_VECTOR(LPM_LOG) = { \ + {"lpm log buffer", ELEM_FEATURE_LOOPBUF, {1}, {6}}, \ + {"buf_read", ELEM_CTRL_LPBF_READ, {0x0}, {0x4}}, \ + {"buf_len", ELEM_CTRL_LPBF_SIZE, {0x4}, {0x4}}, \ + {"buf_write", ELEM_CTRL_LPBF_WRITE, {0x40}, {0x4}}, \ + {"log_level", ELEM_OUTPUT_INT, {0x44}, {0x4}}, \ + {"buf_head_len", ELEM_CTRL_LPBF_HEAD, {0x80}, {0x4}}, \ + {"lpm log data", ELEM_FEATURE_CHARLOG, {1}, {1}}, \ + {"lpm log", ELEM_OUTPUT_STR_NL, {0x80}, {0x1FF80}}, \ +} + +// hsm common log data +#define DATA_MODEL_HSM_LOG MODEL_VECTOR(HSM_LOG) = { \ + {"hsm log buffer", ELEM_FEATURE_LOOPBUF, {1}, {6}}, \ + {"buf_read", ELEM_CTRL_LPBF_READ, {0x0}, {0x4}}, \ + {"buf_len", ELEM_CTRL_LPBF_SIZE, {0x4}, {0x4}}, \ + {"buf_write", ELEM_CTRL_LPBF_WRITE, {0x40}, {0x4}}, \ + {"log_level", ELEM_OUTPUT_INT, {0x44}, {0x4}}, \ + {"buf_head_len", ELEM_CTRL_LPBF_HEAD, {0x80}, {0x4}}, \ + {"hsm log data", ELEM_FEATURE_CHARLOG, {1}, {1}}, \ + {"hsm log", ELEM_OUTPUT_STR_NL, {0x80}, {0x1FF80}}, \ +} + +/* DVPP module */ +#define DATA_MODEL_DVPP MODEL_VECTOR(DVPP) = { \ + {"dvpp info", ELEM_OUTPUT_STR_NL, {0x0}, {0x10000}}, \ +} + +/* TF module */ +#define DATA_MODEL_TF MODEL_VECTOR(TF) = { \ + {"x0", ELEM_OUTPUT_HEX, {0x0}, {0x8}}, \ + {"x1", ELEM_OUTPUT_HEX, {0x8}, {0x8}}, \ + {"x30", ELEM_OUTPUT_HEX, {0x10}, {0x8}}, \ + {"x2", ELEM_OUTPUT_HEX, {0x18}, {0x8}}, \ + {"x3", ELEM_OUTPUT_HEX, {0x20}, {0x8}}, \ + {"x4", ELEM_OUTPUT_HEX, {0x28}, {0x8}}, \ + {"x5", ELEM_OUTPUT_HEX, {0x30}, {0x8}}, \ + {"x6", ELEM_OUTPUT_HEX, {0x38}, {0x8}}, \ + {"x7", ELEM_OUTPUT_HEX, {0x40}, {0x8}}, \ + {"x8", ELEM_OUTPUT_HEX, {0x48}, {0x8}}, \ + {"x9", ELEM_OUTPUT_HEX, {0x50}, {0x8}}, \ + {"x10", ELEM_OUTPUT_HEX, {0x58}, {0x8}}, \ + {"x11", ELEM_OUTPUT_HEX, {0x60}, {0x8}}, \ + {"x12", ELEM_OUTPUT_HEX, {0x68}, {0x8}}, \ + {"x13", ELEM_OUTPUT_HEX, {0x70}, {0x8}}, \ + {"x14", ELEM_OUTPUT_HEX, {0x78}, {0x8}}, \ + {"x15", ELEM_OUTPUT_HEX, {0x80}, {0x8}}, \ + {"x16", ELEM_OUTPUT_HEX, {0x88}, {0x8}}, \ + {"x17", ELEM_OUTPUT_HEX, {0x90}, {0x8}}, \ + {"x18", ELEM_OUTPUT_HEX, {0x98}, {0x8}}, \ + {"x19", ELEM_OUTPUT_HEX, {0xA0}, {0x8}}, \ + {"x20", ELEM_OUTPUT_HEX, {0xA8}, {0x8}}, \ + {"x21", ELEM_OUTPUT_HEX, {0xB0}, {0x8}}, \ + {"x22", ELEM_OUTPUT_HEX, {0xB8}, {0x8}}, \ + {"x23", ELEM_OUTPUT_HEX, {0xC0}, {0x8}}, \ + {"x24", ELEM_OUTPUT_HEX, {0xC8}, {0x8}}, \ + {"x25", ELEM_OUTPUT_HEX, {0xD0}, {0x8}}, \ + {"x26", ELEM_OUTPUT_HEX, {0xD8}, {0x8}}, \ + {"x27", ELEM_OUTPUT_HEX, {0xE0}, {0x8}}, \ + {"x28", ELEM_OUTPUT_HEX, {0xE8}, {0x8}}, \ + {"x29", ELEM_OUTPUT_HEX, {0xF0}, {0x8}}, \ + {"scr_el3", ELEM_OUTPUT_HEX, {0xF8}, {0x8}}, \ + {"sctlr_el3", ELEM_OUTPUT_HEX, {0x100}, {0x8}}, \ + {"cptr_el3", ELEM_OUTPUT_HEX, {0x108}, {0x8}}, \ + {"tcr_el3", ELEM_OUTPUT_HEX, {0x110}, {0x8}}, \ + {"daif", ELEM_OUTPUT_HEX, {0x118}, {0x8}}, \ + {"mair_el3", ELEM_OUTPUT_HEX, {0x120}, {0x8}}, \ + {"spsr_el3", ELEM_OUTPUT_HEX, {0x128}, {0x8}}, \ + {"elr_el3", ELEM_OUTPUT_HEX, {0x130}, {0x8}}, \ + {"ttbr0_el3", ELEM_OUTPUT_HEX, {0x138}, {0x8}}, \ + {"esr_el3", ELEM_OUTPUT_HEX, {0x140}, {0x8}}, \ + {"far_el3", ELEM_OUTPUT_HEX, {0x148}, {0x8}}, \ +} + +/** + * the whole space is 512k, used for histroy data record + * the struct distribution is as follows: + * +-------------------+ + * | head info(1k) | region: area: module block: + * +-------------------+ +--------------------+ +-----------------+ +-----------------+ + * | boot region |---->| first area |---->| module block |---->| block head | + * +-------------------+ +--------------------+ +-----------------+ +-----------------+ + * | run region | | second area | | module block | | block data | + * +-------------------+ +--------------------+ +-----------------+ +-----------------+ + * | reserved | | ...... | | ...... | + * +-------------------+ +--------------------+ +-----------------+ + */ +#define DATA_MODEL_HDR_BOOT_BIOS MODEL_VECTOR(HDR_BOOT_BIOS) = { \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"module id", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"if", ELEM_CTRL_COMPARE, {0xC}, {0x4}}, \ + {"is used", ELEM_CTRL_CMP_JUMP_NE, {0x1}, {0xFF}}, \ + {"err code", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"reason", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"hot reset index", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"bsbc point", ELEM_OUTPUT_INT, {0x1C}, {0x4}}, \ + {"bsbc exc point", ELEM_OUTPUT_INT, {0x20}, {0x4}}, \ + {"hboot1 point", ELEM_OUTPUT_INT, {0x24}, {0x4}}, \ + {"hboot1 exc point", ELEM_OUTPUT_INT, {0x28}, {0x4}}, \ + {"hboot2 point", ELEM_OUTPUT_INT, {0x2C}, {0x4}}, \ + {"hboot2 exc point", ELEM_OUTPUT_INT, {0x30}, {0x4}}, \ + {"[BIOS info]", ELEM_OUTPUT_STR_NL, {0x480}, {0x2780}}, \ +} + +#define DATA_MODEL_HDR_BOOT_AREA MODEL_VECTOR(HDR_BOOT_AREA) = { \ + {"BIOS INFO", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"HDR_BOOT_BIOS", ELEM_CTRL_TABLE_GOTO, {0x0}, {0x3000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_BIOS}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ +} + +#define DATA_MODEL_HDR_BOOT MODEL_VECTOR(HDR_BOOT) = { \ + {"area 0", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x0}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 1", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x7800}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 2", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0xF000}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 3", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x16800}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 4", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x1E000}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 5", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x25800}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 6", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x2D000}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ +} + +#define DATA_MODEL_HDR_BOOT_INFO MODEL_VECTOR(HDR_BOOT_INFO) = { \ + {"region offset", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"region size", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"region config", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"total area", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"history area", ELEM_OUTPUT_INT, {0xC}, {0x4}}, \ + {"error area", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"area config:", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" used module count", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"module config:", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" module 0 offset", ELEM_OUTPUT_INT, {0x1C}, {0x4}}, \ + {" module 0 size", ELEM_OUTPUT_INT, {0x20}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {" module 1 offset", ELEM_OUTPUT_INT, {0x24}, {0x4}}, \ + {" module 1 size", ELEM_OUTPUT_INT, {0x28}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {" module 2 offset", ELEM_OUTPUT_INT, {0x2C}, {0x4}}, \ + {" module 2 size", ELEM_OUTPUT_INT, {0x30}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {" module 3 offset", ELEM_OUTPUT_INT, {0x34}, {0x4}}, \ + {" module 3 size", ELEM_OUTPUT_INT, {0x38}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"region control", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"area index", ELEM_OUTPUT_INT, {0x6C}, {0x4}}, \ + {"error area count", ELEM_OUTPUT_INT, {0x70}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 0 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0x74}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0x78}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0x7C}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0x80}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0x84}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0x88}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 1 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0x8C}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0x90}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0x94}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0x98}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0x9C}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xA0}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 2 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xA4}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xA8}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xAC}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0xB0}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0xB4}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xB8}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 3 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xBC}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xC0}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xC4}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0xC8}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0xCC}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xD0}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 4 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xD4}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xD8}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xDC}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0xE0}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0xE4}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xE8}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 5 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xEC}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xF0}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xF4}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0xF8}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0xFC}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0x100}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 6 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0x104}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0x108}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0x10C}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0x110}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0x114}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0x118}, {0x4}}, \ +} + +#define DATA_MODEL_HDR MODEL_VECTOR(HDR) = { \ + {"head info", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"reset count", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"boot region", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_INFO", ELEM_CTRL_TABLE_GOTO, {0XC}, {0x168}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_INFO}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"HDR_BOOT", ELEM_CTRL_TABLE_GOTO, {0x400}, {0xA000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ +} + +#endif // BBOX_DDR_DATA_DC_H diff --git a/inc/toolchain/bbox/bbox_ddr_data_mdc.h b/inc/toolchain/bbox/bbox_ddr_data_mdc.h new file mode 100644 index 000000000..0302eb5a2 --- /dev/null +++ b/inc/toolchain/bbox/bbox_ddr_data_mdc.h @@ -0,0 +1,467 @@ +/** + * @file bbox_ddr_data_mdc.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2018-2020. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#ifndef BBOX_DDR_DATA_MDC_H +#define BBOX_DDR_DATA_MDC_H + +#include "bbox_ddr_data.h" + +/* each Module need define as follows */ +/* LPM module */ +#define DATA_MODEL_LPM_START MODEL_VECTOR(LPM_START) = { \ + {"start_steps", ELEM_OUTPUT_STR_NL, {0x0}, {0x800}}, \ +} + +#define DATA_MODEL_LPM MODEL_VECTOR(LPM) = { \ + {"****exc****reg**", ELEM_OUTPUT_STR, {0x0}, {0x8}}, \ + {"fault_regs_dfsr", ELEM_OUTPUT_INT, {0x1410}, {0x4}}, \ + {"fault_regs_ifsr", ELEM_OUTPUT_INT, {0x1414}, {0x4}}, \ + {"fault_regs_adfsr", ELEM_OUTPUT_INT, {0x1418}, {0x4}}, \ + {"fault_regs_aifsr", ELEM_OUTPUT_INT, {0x141c}, {0x4}}, \ + {"fault_regs_dfar", ELEM_OUTPUT_INT, {0x1420}, {0x4}}, \ + {"fault_regs_ifar", ELEM_OUTPUT_INT, {0x1424}, {0x4}}, \ + {"usr_regs_r13", ELEM_OUTPUT_INT, {0x1428}, {0x4}}, \ + {"usr_regs_r14", ELEM_OUTPUT_INT, {0x142c}, {0x4}}, \ + {"svc_regs_regs_r13", ELEM_OUTPUT_INT, {0x1430}, {0x4}}, \ + {"svc_regs_regs_r14", ELEM_OUTPUT_INT, {0x1434}, {0x4}}, \ + {"svc_regs_regs_spsr", ELEM_OUTPUT_INT, {0x1438}, {0x4}}, \ + {"irq_regs_regs_r13", ELEM_OUTPUT_INT, {0x143c}, {0x4}}, \ + {"irq_regs_regs_r14", ELEM_OUTPUT_INT, {0x1440}, {0x4}}, \ + {"irq_regs_regs_spsr", ELEM_OUTPUT_INT, {0x1444}, {0x4}}, \ + {"fiq_regs_regs_r13", ELEM_OUTPUT_INT, {0x1448}, {0x4}}, \ + {"fiq_regs_regs_r14", ELEM_OUTPUT_INT, {0x144c}, {0x4}}, \ + {"fiq_regs_regs_spsr", ELEM_OUTPUT_INT, {0x1450}, {0x4}}, \ + {"und_regs_regs_r13", ELEM_OUTPUT_INT, {0x1454}, {0x4}}, \ + {"und_regs_regs_r14", ELEM_OUTPUT_INT, {0x1458}, {0x4}}, \ + {"und_regs_regs_spsr", ELEM_OUTPUT_INT, {0x145c}, {0x4}}, \ + {"abort_regs_regs_r13", ELEM_OUTPUT_INT, {0x1460}, {0x4}}, \ + {"abort_regs_regs_r14", ELEM_OUTPUT_INT, {0x1464}, {0x4}}, \ + {"abort_regs_regs_spsr", ELEM_OUTPUT_INT, {0x1468}, {0x4}}, \ + {"data_regs_r1", ELEM_OUTPUT_INT, {0x146c}, {0x4}}, \ + {"data_regs_r2", ELEM_OUTPUT_INT, {0x1470}, {0x4}}, \ + {"data_regs_r3", ELEM_OUTPUT_INT, {0x1474}, {0x4}}, \ + {"data_regs_r4", ELEM_OUTPUT_INT, {0x1478}, {0x4}}, \ + {"data_regs_r5", ELEM_OUTPUT_INT, {0x147c}, {0x4}}, \ + {"data_regs_r6", ELEM_OUTPUT_INT, {0x1480}, {0x4}}, \ + {"data_regs_r7", ELEM_OUTPUT_INT, {0x1484}, {0x4}}, \ + {"data_regs_r8", ELEM_OUTPUT_INT, {0x1488}, {0x4}}, \ + {"data_regs_r9", ELEM_OUTPUT_INT, {0x148c}, {0x4}}, \ + {"data_regs_r10", ELEM_OUTPUT_INT, {0x1490}, {0x4}}, \ + {"data_regs_r11", ELEM_OUTPUT_INT, {0x1494}, {0x4}}, \ + {"data_regs_r12", ELEM_OUTPUT_INT, {0x1498}, {0x4}}, \ + {"data_regs_r13", ELEM_OUTPUT_INT, {0x149c}, {0x4}}, \ + {"data_regs_r14", ELEM_OUTPUT_INT, {0x14a0}, {0x4}}, \ + {"data_regs_r15", ELEM_OUTPUT_INT, {0x14a4}, {0x4}}, \ + {"prog_regs_cpsr", ELEM_OUTPUT_INT, {0x14a8}, {0x4}}, \ + {"prog_regs_spsr", ELEM_OUTPUT_INT, {0x14ac}, {0x4}}, \ + {"log", ELEM_OUTPUT_STR_NL, {0xDC80}, {0x400}}, \ +} + +#define DATA_MODEL_LPM_PMU MODEL_VECTOR(LPM_PMU) = { \ + {"cpuid", ELEM_OUTPUT_INT, {0x0000}, {0x1}}, \ + {"2CCH", ELEM_OUTPUT_HEX, {0x0001}, {0x1}}, \ + {"2CDH", ELEM_OUTPUT_HEX, {0x0002}, {0x1}}, \ + {"2CEH", ELEM_OUTPUT_HEX, {0x0003}, {0x1}}, \ + {"2CFH", ELEM_OUTPUT_HEX, {0x0004}, {0x1}}, \ + {"2D0H", ELEM_OUTPUT_HEX, {0x0005}, {0x1}}, \ + {"2D1H", ELEM_OUTPUT_HEX, {0x0006}, {0x1}}, \ + {"2D2H", ELEM_OUTPUT_HEX, {0x0007}, {0x1}}, \ + {"2D3H", ELEM_OUTPUT_HEX, {0x0008}, {0x1}}, \ + {"2D4H", ELEM_OUTPUT_HEX, {0x0009}, {0x1}}, \ + {"2D5H", ELEM_OUTPUT_HEX, {0x000A}, {0x1}}, \ + {"2D6H", ELEM_OUTPUT_HEX, {0x000B}, {0x1}}, \ + {"2D7H", ELEM_OUTPUT_HEX, {0x000C}, {0x1}}, \ + {"2D8H", ELEM_OUTPUT_HEX, {0x000D}, {0x1}}, \ + {"2D9H", ELEM_OUTPUT_HEX, {0x000E}, {0x1}}, \ + {"2DAH", ELEM_OUTPUT_HEX, {0x000F}, {0x1}}, \ + {"2DBH", ELEM_OUTPUT_HEX, {0x0010}, {0x1}}, \ + {"2DCH", ELEM_OUTPUT_HEX, {0x0011}, {0x1}}, \ + {"2DDH", ELEM_OUTPUT_HEX, {0x0012}, {0x1}}, \ + {"2DEH", ELEM_OUTPUT_HEX, {0x0013}, {0x1}}, \ + {"2DFH", ELEM_OUTPUT_HEX, {0x0014}, {0x1}}, \ + {"2E0H", ELEM_OUTPUT_HEX, {0x0015}, {0x1}}, \ + {"2E1H", ELEM_OUTPUT_HEX, {0x0016}, {0x1}}, \ + {"2E2H", ELEM_OUTPUT_HEX, {0x0017}, {0x1}}, \ + {"2E3H", ELEM_OUTPUT_HEX, {0x0018}, {0x1}}, \ + {"2E4H", ELEM_OUTPUT_HEX, {0x0019}, {0x1}}, \ + {"2E5H", ELEM_OUTPUT_HEX, {0x001A}, {0x1}}, \ + {"2E6H", ELEM_OUTPUT_HEX, {0x001B}, {0x1}}, \ + {"2E7H", ELEM_OUTPUT_HEX, {0x001C}, {0x1}}, \ + {"slave0", ELEM_OUTPUT_HEX, {0x0020}, {0x1}}, \ + {"E0", ELEM_OUTPUT_HEX, {0x0021}, {0x1}}, \ + {"E1", ELEM_OUTPUT_HEX, {0x0022}, {0x1}}, \ + {"E2", ELEM_OUTPUT_HEX, {0x0023}, {0x1}}, \ + {"reserve", ELEM_OUTPUT_HEX, {0x0024}, {0x1}}, \ + {"reserve", ELEM_OUTPUT_HEX, {0x0025}, {0x1}}, \ + {"reserve", ELEM_OUTPUT_HEX, {0x0026}, {0x1}}, \ + {"slave1", ELEM_OUTPUT_HEX, {0x0040}, {0x1}}, \ + {"E0", ELEM_OUTPUT_HEX, {0x0041}, {0x1}}, \ + {"E1", ELEM_OUTPUT_HEX, {0x0042}, {0x1}}, \ + {"E2", ELEM_OUTPUT_HEX, {0x0043}, {0x1}}, \ + {"reserve", ELEM_OUTPUT_HEX, {0x0044}, {0x1}}, \ + {"reserve", ELEM_OUTPUT_HEX, {0x0045}, {0x1}}, \ + {"reserve", ELEM_OUTPUT_HEX, {0x0046}, {0x1}}, \ + {"aicSlave", ELEM_OUTPUT_HEX, {0x0060}, {0x1}}, \ + {"0x79", ELEM_OUTPUT_HEX, {0x0061}, {0x2}}, \ + {"0x7A", ELEM_OUTPUT_HEX, {0x0063}, {0x1}}, \ + {"0x7B", ELEM_OUTPUT_HEX, {0x0064}, {0x1}}, \ + {"0x7C", ELEM_OUTPUT_HEX, {0x0065}, {0x1}}, \ + {"0x7D", ELEM_OUTPUT_HEX, {0x0066}, {0x1}}, \ + {"0x7E", ELEM_OUTPUT_HEX, {0x0067}, {0x1}}, \ + {"dvppSlave", ELEM_OUTPUT_HEX, {0x0080}, {0x1}}, \ + {"0x79", ELEM_OUTPUT_HEX, {0x0081}, {0x2}}, \ + {"0x7A", ELEM_OUTPUT_HEX, {0x0083}, {0x1}}, \ + {"0x7B", ELEM_OUTPUT_HEX, {0x0084}, {0x1}}, \ + {"0x7C", ELEM_OUTPUT_HEX, {0x0085}, {0x1}}, \ + {"0x7D", ELEM_OUTPUT_HEX, {0x0086}, {0x1}}, \ + {"0x7E", ELEM_OUTPUT_HEX, {0x0087}, {0x1}}, \ + {"cpuSlave", ELEM_OUTPUT_HEX, {0x00A0}, {0x1}}, \ + {"0x79", ELEM_OUTPUT_HEX, {0x00A1}, {0x2}}, \ + {"0x7A", ELEM_OUTPUT_HEX, {0x00A3}, {0x1}}, \ + {"0x7B", ELEM_OUTPUT_HEX, {0x00A4}, {0x1}}, \ + {"0x7C", ELEM_OUTPUT_HEX, {0x00A5}, {0x1}}, \ + {"0x7D", ELEM_OUTPUT_HEX, {0x00A6}, {0x1}}, \ + {"0x7E", ELEM_OUTPUT_HEX, {0x00A7}, {0x1}}, \ +} + +/* (LPM)DDR module */ +#define DATA_MODEL_DDR_SRAM MODEL_VECTOR(DDR_SRAM) = { \ + {"dram type", ELEM_OUTPUT_INT, {0x00}, {0x4}}, \ + {"dram size", ELEM_OUTPUT_INT, {0x04}, {0x4}}, \ + {"rank num", ELEM_OUTPUT_INT, {0x08}, {0x4}}, \ + {"chn bit map", ELEM_OUTPUT_INT, {0x0c}, {0x4}}, \ + {"manufacteryId[0]", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"manufacteryId[1]", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"manufacteryId[2]", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"manufacteryId[3]", ELEM_OUTPUT_INT, {0x1c}, {0x4}}, \ + {"manufacteryId[4]", ELEM_OUTPUT_INT, {0x20}, {0x4}}, \ + {"manufacteryId[5]", ELEM_OUTPUT_INT, {0x24}, {0x4}}, \ + {"manufacteryId[6]", ELEM_OUTPUT_INT, {0x28}, {0x4}}, \ + {"manufacteryId[7]", ELEM_OUTPUT_INT, {0x2c}, {0x4}}, \ + {"manufacteryId[8]", ELEM_OUTPUT_INT, {0x30}, {0x4}}, \ + {"manufacteryId[9]", ELEM_OUTPUT_INT, {0x34}, {0x4}}, \ + {"manufacteryId[10]", ELEM_OUTPUT_INT, {0x38}, {0x4}}, \ + {"manufacteryId[11]", ELEM_OUTPUT_INT, {0x3c}, {0x4}}, \ + {"manufacteryId[12]", ELEM_OUTPUT_INT, {0x40}, {0x4}}, \ + {"manufacteryId[13]", ELEM_OUTPUT_INT, {0x44}, {0x4}}, \ + {"manufacteryId[14]", ELEM_OUTPUT_INT, {0x48}, {0x4}}, \ + {"manufacteryId[15]", ELEM_OUTPUT_INT, {0x4c}, {0x4}}, \ + {"manufacteryId[16]", ELEM_OUTPUT_INT, {0x50}, {0x4}}, \ + {"manufacteryId[17]", ELEM_OUTPUT_INT, {0x54}, {0x4}}, \ + {"manufacteryId[18]", ELEM_OUTPUT_INT, {0x58}, {0x4}}, \ + {"manufacteryId[19]", ELEM_OUTPUT_INT, {0x5c}, {0x4}}, \ + {"manufacteryId[20]", ELEM_OUTPUT_INT, {0x60}, {0x4}}, \ + {"manufacteryId[21]", ELEM_OUTPUT_INT, {0x64}, {0x4}}, \ + {"manufacteryId[22]", ELEM_OUTPUT_INT, {0x68}, {0x4}}, \ + {"manufacteryId[23]", ELEM_OUTPUT_INT, {0x6c}, {0x4}}, \ + {"iecc", ELEM_OUTPUT_INT, {0x70}, {0x4}}, \ + {"swap type", ELEM_OUTPUT_INT, {0x74}, {0x4}}, \ + {"freq", ELEM_OUTPUT_INT, {0x78}, {0x4}}, \ +} + +/* TEE module */ +#define DATA_MODEL_TEE MODEL_VECTOR(TEE) = { \ + {"tee info", ELEM_OUTPUT_CHAR, {0x0}, {0x20000}}, \ +} + +/* DVPP module */ +#define DATA_MODEL_DVPP MODEL_VECTOR(DVPP) = { \ + {"dvpp info", ELEM_OUTPUT_STR_NL, {0x0}, {0x10000}}, \ +} + +/* DRIVE module */ +#define DATA_MODEL_DRIVER MODEL_VECTOR(DRIVER) = { \ + {"driver info", ELEM_OUTPUT_STR_NL, {0x0}, {0x20000}}, \ +} + +/* TS module, start */ +#define DATA_MODEL_TS_START MODEL_VECTOR(TS_START) = { \ + {"ts0 start info", ELEM_OUTPUT_STR_NL, {0x0}, {0x19000}}, \ + {"ts1 start info", ELEM_OUTPUT_STR_NL, {0x19000}, {0x19000}}, \ +} + +/* TS module */ +#define DATA_MODEL_TS MODEL_VECTOR(TS) = { \ + {"ts info", ELEM_OUTPUT_STR_NL, {0x0}, {0xE6F00}}, \ +} + +/* HSM module */ +#define DATA_MODEL_HSM MODEL_VECTOR(HSM) = { \ + {"hsm info", ELEM_OUTPUT_STR_NL, {0x0}, {0x1000}}, \ +} + +/* HSM module, start */ +#define DATA_MODEL_HSM_START MODEL_VECTOR(HSM_START) = { \ + {"hsm start info", ELEM_OUTPUT_STR_NL, {0x0}, {0x1000}}, \ +} + +// lpm common log data +#define DATA_MODEL_LPM_LOG MODEL_VECTOR(LPM_LOG) = { \ + {"lpm log buffer", ELEM_FEATURE_LOOPBUF, {1}, {6}}, \ + {"buf_read", ELEM_CTRL_LPBF_READ, {0x0}, {0x4}}, \ + {"buf_len", ELEM_CTRL_LPBF_SIZE, {0x4}, {0x4}}, \ + {"buf_write", ELEM_CTRL_LPBF_WRITE, {0x40}, {0x4}}, \ + {"log_level", ELEM_OUTPUT_INT, {0x44}, {0x4}}, \ + {"buf_head_len", ELEM_CTRL_LPBF_HEAD, {0x80}, {0x4}}, \ + {"lpm log data", ELEM_FEATURE_CHARLOG, {1}, {1}}, \ + {"lpm log", ELEM_OUTPUT_STR_NL, {0x80}, {0x1FF80}}, \ +} + +// hsm common log data +#define DATA_MODEL_HSM_LOG MODEL_VECTOR(HSM_LOG) = { \ + {"hsm log buffer", ELEM_FEATURE_LOOPBUF, {1}, {6}}, \ + {"buf_read", ELEM_CTRL_LPBF_READ, {0x0}, {0x4}}, \ + {"buf_len", ELEM_CTRL_LPBF_SIZE, {0x4}, {0x4}}, \ + {"buf_write", ELEM_CTRL_LPBF_WRITE, {0x40}, {0x4}}, \ + {"log_level", ELEM_OUTPUT_INT, {0x44}, {0x4}}, \ + {"buf_head_len", ELEM_CTRL_LPBF_HEAD, {0x80}, {0x4}}, \ + {"hsm log data", ELEM_FEATURE_CHARLOG, {1}, {1}}, \ + {"hsm log", ELEM_OUTPUT_STR_NL, {0x80}, {0x1FF80}}, \ +} + +/* isp bbox data */ +#define DATA_MODEL_ISP MODEL_VECTOR(ISP) = { \ + {"isp log", ELEM_OUTPUT_STR_NL, {0}, {0x40000}}, \ +} + +/* isp module, start */ +#define DATA_MODEL_ISP_START MODEL_VECTOR(ISP_START) = { \ + {"isp start info", ELEM_OUTPUT_STR_NL, {0x0}, {0x1000}}, \ +} + +/* DVPP module */ +#define DATA_MODEL_DVPP MODEL_VECTOR(DVPP) = { \ + {"dvpp info", ELEM_OUTPUT_STR_NL, {0x0}, {0x10000}}, \ +} + +/* DP module */ +#define DATA_MODEL_DP MODEL_VECTOR(DP) = { \ + {"os kbox info", ELEM_OUTPUT_STR_NL, {0x3000}, {0x4FD000}}, \ +} + +/* safetyisland module */ +#define DATA_MODEL_SAFETYISLAND MODEL_VECTOR(SAFETYISLAND) = { \ + {"safetyisland info", ELEM_OUTPUT_STR_NL, {0x0}, {0xC800}}, \ +} + +/* TF module */ +#define DATA_MODEL_TF MODEL_VECTOR(TF) = { \ + {"x0", ELEM_OUTPUT_HEX, {0x0}, {0x8}}, \ + {"x1", ELEM_OUTPUT_HEX, {0x8}, {0x8}}, \ + {"x30", ELEM_OUTPUT_HEX, {0x10}, {0x8}}, \ + {"x2", ELEM_OUTPUT_HEX, {0x18}, {0x8}}, \ + {"x3", ELEM_OUTPUT_HEX, {0x20}, {0x8}}, \ + {"x4", ELEM_OUTPUT_HEX, {0x28}, {0x8}}, \ + {"x5", ELEM_OUTPUT_HEX, {0x30}, {0x8}}, \ + {"x6", ELEM_OUTPUT_HEX, {0x38}, {0x8}}, \ + {"x7", ELEM_OUTPUT_HEX, {0x40}, {0x8}}, \ + {"x8", ELEM_OUTPUT_HEX, {0x48}, {0x8}}, \ + {"x9", ELEM_OUTPUT_HEX, {0x50}, {0x8}}, \ + {"x10", ELEM_OUTPUT_HEX, {0x58}, {0x8}}, \ + {"x11", ELEM_OUTPUT_HEX, {0x60}, {0x8}}, \ + {"x12", ELEM_OUTPUT_HEX, {0x68}, {0x8}}, \ + {"x13", ELEM_OUTPUT_HEX, {0x70}, {0x8}}, \ + {"x14", ELEM_OUTPUT_HEX, {0x78}, {0x8}}, \ + {"x15", ELEM_OUTPUT_HEX, {0x80}, {0x8}}, \ + {"x16", ELEM_OUTPUT_HEX, {0x88}, {0x8}}, \ + {"x17", ELEM_OUTPUT_HEX, {0x90}, {0x8}}, \ + {"x18", ELEM_OUTPUT_HEX, {0x98}, {0x8}}, \ + {"x19", ELEM_OUTPUT_HEX, {0xA0}, {0x8}}, \ + {"x20", ELEM_OUTPUT_HEX, {0xA8}, {0x8}}, \ + {"x21", ELEM_OUTPUT_HEX, {0xB0}, {0x8}}, \ + {"x22", ELEM_OUTPUT_HEX, {0xB8}, {0x8}}, \ + {"x23", ELEM_OUTPUT_HEX, {0xC0}, {0x8}}, \ + {"x24", ELEM_OUTPUT_HEX, {0xC8}, {0x8}}, \ + {"x25", ELEM_OUTPUT_HEX, {0xD0}, {0x8}}, \ + {"x26", ELEM_OUTPUT_HEX, {0xD8}, {0x8}}, \ + {"x27", ELEM_OUTPUT_HEX, {0xE0}, {0x8}}, \ + {"x28", ELEM_OUTPUT_HEX, {0xE8}, {0x8}}, \ + {"x29", ELEM_OUTPUT_HEX, {0xF0}, {0x8}}, \ + {"scr_el3", ELEM_OUTPUT_HEX, {0xF8}, {0x8}}, \ + {"sctlr_el3", ELEM_OUTPUT_HEX, {0x100}, {0x8}}, \ + {"cptr_el3", ELEM_OUTPUT_HEX, {0x108}, {0x8}}, \ + {"tcr_el3", ELEM_OUTPUT_HEX, {0x110}, {0x8}}, \ + {"daif", ELEM_OUTPUT_HEX, {0x118}, {0x8}}, \ + {"mair_el3", ELEM_OUTPUT_HEX, {0x120}, {0x8}}, \ + {"spsr_el3", ELEM_OUTPUT_HEX, {0x128}, {0x8}}, \ + {"elr_el3", ELEM_OUTPUT_HEX, {0x130}, {0x8}}, \ + {"ttbr0_el3", ELEM_OUTPUT_HEX, {0x138}, {0x8}}, \ + {"esr_el3", ELEM_OUTPUT_HEX, {0x140}, {0x8}}, \ + {"far_el3", ELEM_OUTPUT_HEX, {0x148}, {0x8}}, \ +} + +/** + * the whole space is 512k, used for histroy data record + * the struct distribution is as follows: + * +-------------------+ + * | head info(1k) | region: area: module block: + * +-------------------+ +--------------------+ +-----------------+ +-----------------+ + * | boot region |---->| first area |---->| module block |---->| block head | + * +-------------------+ +--------------------+ +-----------------+ +-----------------+ + * | run region | | second area | | module block | | block data | + * +-------------------+ +--------------------+ +-----------------+ +-----------------+ + * | reserved | | ...... | | ...... | + * +-------------------+ +--------------------+ +-----------------+ + */ +#define DATA_MODEL_HDR_BOOT_BIOS MODEL_VECTOR(HDR_BOOT_BIOS) = { \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"module id", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"if", ELEM_CTRL_COMPARE, {0xC}, {0x4}}, \ + {"is used", ELEM_CTRL_CMP_JUMP_NE, {0x1}, {0xFF}}, \ + {"err code", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"reason", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"hot reset index", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"bsbc point", ELEM_OUTPUT_INT, {0x1C}, {0x4}}, \ + {"bsbc exc point", ELEM_OUTPUT_INT, {0x20}, {0x4}}, \ + {"hboot1 point", ELEM_OUTPUT_INT, {0x24}, {0x4}}, \ + {"hboot1 exc point", ELEM_OUTPUT_INT, {0x28}, {0x4}}, \ + {"hboot2 point", ELEM_OUTPUT_INT, {0x2C}, {0x4}}, \ + {"hboot2 exc point", ELEM_OUTPUT_INT, {0x30}, {0x4}}, \ + {"[BIOS info]", ELEM_OUTPUT_STR_NL, {0x480}, {0x2780}}, \ +} + +#define DATA_MODEL_HDR_BOOT_AREA MODEL_VECTOR(HDR_BOOT_AREA) = { \ + {"BIOS INFO", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"HDR_BOOT_BIOS", ELEM_CTRL_TABLE_GOTO, {0x0}, {0x3000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_BIOS}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ +} + +#define DATA_MODEL_HDR_BOOT MODEL_VECTOR(HDR_BOOT) = { \ + {"area 0", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x0}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 1", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x7800}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 2", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0xF000}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 3", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x16800}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 4", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x1E000}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 5", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x25800}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 6", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x2D000}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ +} + +#define DATA_MODEL_HDR_BOOT_INFO MODEL_VECTOR(HDR_BOOT_INFO) = { \ + {"region offset", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"region size", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"region config", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"total area", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"history area", ELEM_OUTPUT_INT, {0xC}, {0x4}}, \ + {"error area", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"area config:", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" used module count", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"module config:", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" module 0 offset", ELEM_OUTPUT_INT, {0x1C}, {0x4}}, \ + {" module 0 size", ELEM_OUTPUT_INT, {0x20}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {" module 1 offset", ELEM_OUTPUT_INT, {0x24}, {0x4}}, \ + {" module 1 size", ELEM_OUTPUT_INT, {0x28}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {" module 2 offset", ELEM_OUTPUT_INT, {0x2C}, {0x4}}, \ + {" module 2 size", ELEM_OUTPUT_INT, {0x30}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {" module 3 offset", ELEM_OUTPUT_INT, {0x34}, {0x4}}, \ + {" module 3 size", ELEM_OUTPUT_INT, {0x38}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"region control", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"area index", ELEM_OUTPUT_INT, {0x6C}, {0x4}}, \ + {"error area count", ELEM_OUTPUT_INT, {0x70}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 0 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0x74}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0x78}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0x7C}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0x80}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0x84}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0x88}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 1 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0x8C}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0x90}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0x94}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0x98}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0x9C}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xA0}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 2 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xA4}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xA8}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xAC}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0xB0}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0xB4}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xB8}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 3 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xBC}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xC0}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xC4}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0xC8}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0xCC}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xD0}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 4 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xD4}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xD8}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xDC}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0xE0}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0xE4}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xE8}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 5 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xEC}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xF0}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xF4}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0xF8}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0xFC}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0x100}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 6 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0x104}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0x108}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0x10C}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0x110}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0x114}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0x118}, {0x4}}, \ +} + +#define DATA_MODEL_HDR MODEL_VECTOR(HDR) = { \ + {"head info", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"reset count", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"boot region", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_INFO", ELEM_CTRL_TABLE_GOTO, {0XC}, {0x168}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_INFO}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"HDR_BOOT", ELEM_CTRL_TABLE_GOTO, {0x400}, {0xA000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ +} + +#endif // BBOX_DDR_DATA_MDC_H diff --git a/inc/toolchain/bbox/bbox_ddr_data_mini.h b/inc/toolchain/bbox/bbox_ddr_data_mini.h new file mode 100644 index 000000000..eb0f1aff0 --- /dev/null +++ b/inc/toolchain/bbox/bbox_ddr_data_mini.h @@ -0,0 +1,1271 @@ +/** + * @file bbox_ddr_data_mini.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2018-2020. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#ifndef BBOX_DDR_DATA_MINI_H +#define BBOX_DDR_DATA_MINI_H + +#include "bbox_ddr_data.h" + +/* each Module need define as follows */ +/* LPM3 module */ +#define DATA_MODEL_LPM3_START MODEL_VECTOR(LPM3_START) = { \ + {"start_step 1", ELEM_OUTPUT_INT, {0x0}, {0x1}}, \ + {"start_step 2", ELEM_OUTPUT_INT, {0x1}, {0x1}}, \ + {"start_step 3", ELEM_OUTPUT_INT, {0x2}, {0x1}}, \ + {"start_step 4", ELEM_OUTPUT_INT, {0x3}, {0x1}}, \ + {"start_step 5", ELEM_OUTPUT_INT, {0x4}, {0x1}}, \ + {"start_step 6", ELEM_OUTPUT_INT, {0x5}, {0x1}}, \ + {"start_step 7", ELEM_OUTPUT_INT, {0x6}, {0x1}}, \ + {"start_step 8", ELEM_OUTPUT_INT, {0x7}, {0x1}}, \ + {"start_step 9", ELEM_OUTPUT_INT, {0x8}, {0x1}}, \ + {"start_step 10", ELEM_OUTPUT_INT, {0x9}, {0x1}}, \ + {"start_step 11", ELEM_OUTPUT_INT, {0xa}, {0x1}}, \ + {"start_step 12", ELEM_OUTPUT_INT, {0xb}, {0x1}}, \ + {"start_step 13", ELEM_OUTPUT_INT, {0xc}, {0x1}}, \ + {"start_step 14", ELEM_OUTPUT_INT, {0xd}, {0x1}}, \ + {"start_step 15", ELEM_OUTPUT_INT, {0xe}, {0x1}}, \ + {"start_step 16", ELEM_OUTPUT_INT, {0xf}, {0x1}}, \ + {"start_step 17", ELEM_OUTPUT_INT, {0x10}, {0x1}}, \ + {"start_step 18", ELEM_OUTPUT_INT, {0x11}, {0x1}}, \ + {"start_step 19", ELEM_OUTPUT_INT, {0x12}, {0x1}}, \ + {"start_step 20", ELEM_OUTPUT_INT, {0x13}, {0x1}}, \ + {"start_step 21", ELEM_OUTPUT_INT, {0x14}, {0x1}}, \ + {"start_step 22", ELEM_OUTPUT_INT, {0x15}, {0x1}}, \ + {"start_step 23", ELEM_OUTPUT_INT, {0x16}, {0x1}}, \ + {"start_step 24", ELEM_OUTPUT_INT, {0x17}, {0x1}}, \ + {"start_step 25", ELEM_OUTPUT_INT, {0x18}, {0x1}}, \ + {"start_step 26", ELEM_OUTPUT_INT, {0x19}, {0x1}}, \ + {"start_step 27", ELEM_OUTPUT_INT, {0x1a}, {0x1}}, \ +} + +#define DATA_MODEL_LPM3 MODEL_VECTOR(LPM3) = { \ + {"****exc****reg**", ELEM_OUTPUT_STR, {0x00}, {0x8}}, \ + {"reset_reason", ELEM_OUTPUT_INT, {0x80}, {0x4}}, \ + {"slice", ELEM_OUTPUT_INT, {0x84}, {0x4}}, \ + {"rtc", ELEM_OUTPUT_INT, {0x88}, {0x4}}, \ + {"r13", ELEM_OUTPUT_INT, {0x8C}, {0x4}}, \ + {"lr1", ELEM_OUTPUT_INT, {0x90}, {0x4}}, \ + {"pc", ELEM_OUTPUT_INT, {0x94}, {0x4}}, \ + {"xpsr", ELEM_OUTPUT_INT, {0x98}, {0x4}}, \ + {"cfsr", ELEM_OUTPUT_INT, {0x9C}, {0x4}}, \ + {"hfsr", ELEM_OUTPUT_INT, {0xa0}, {0x4}}, \ + {"bfar", ELEM_OUTPUT_INT, {0xa4}, {0x4}}, \ + {"exc_trace", ELEM_OUTPUT_INT, {0xa8}, {0x1}}, \ + {"ddr_exc", ELEM_OUTPUT_INT, {0xa9}, {0x1}}, \ + {"irq_id", ELEM_OUTPUT_INT, {0xaa}, {0x2}}, \ + {"task_id", ELEM_OUTPUT_INT, {0xac}, {0x4}}, \ + {"**backup**reg***", ELEM_OUTPUT_STR, {0x00}, {0x8}}, \ + {"reg_backup_index", ELEM_OUTPUT_INT, {0x200}, {0x4}}, \ + {"reason_0", ELEM_OUTPUT_INT, {0x204}, {0x4}}, \ + {"reason_1", ELEM_OUTPUT_INT, {0x208}, {0x4}}, \ + {"reason_2", ELEM_OUTPUT_INT, {0x20C}, {0x4}}, \ + {"r0", ELEM_OUTPUT_INT, {0x210}, {0x4}}, \ + {"r1", ELEM_OUTPUT_INT, {0x214}, {0x4}}, \ + {"r2", ELEM_OUTPUT_INT, {0x218}, {0x4}}, \ + {"r3", ELEM_OUTPUT_INT, {0x21c}, {0x4}}, \ + {"r4", ELEM_OUTPUT_INT, {0x220}, {0x4}}, \ + {"r5", ELEM_OUTPUT_INT, {0x224}, {0x4}}, \ + {"r6", ELEM_OUTPUT_INT, {0x228}, {0x4}}, \ + {"r7", ELEM_OUTPUT_INT, {0x22c}, {0x4}}, \ + {"r8", ELEM_OUTPUT_INT, {0x230}, {0x4}}, \ + {"r9", ELEM_OUTPUT_INT, {0x234}, {0x4}}, \ + {"r10", ELEM_OUTPUT_INT, {0x238}, {0x4}}, \ + {"r11", ELEM_OUTPUT_INT, {0x23c}, {0x4}}, \ + {"r12", ELEM_OUTPUT_INT, {0x240}, {0x4}}, \ + {"r13", ELEM_OUTPUT_INT, {0x244}, {0x4}}, \ + {"msp", ELEM_OUTPUT_INT, {0x248}, {0x4}}, \ + {"psp", ELEM_OUTPUT_INT, {0x24c}, {0x4}}, \ + {"lr0_ctrl", ELEM_OUTPUT_INT, {0x250}, {0x4}}, \ + {"lr1", ELEM_OUTPUT_INT, {0x254}, {0x4}}, \ + {"pc", ELEM_OUTPUT_INT, {0x258}, {0x4}}, \ + {"xpsr", ELEM_OUTPUT_INT, {0x25c}, {0x4}}, \ + {"primask", ELEM_OUTPUT_INT, {0x260}, {0x4}}, \ + {"basepri", ELEM_OUTPUT_INT, {0x264}, {0x4}}, \ + {"faultmask", ELEM_OUTPUT_INT, {0x268}, {0x4}}, \ + {"control", ELEM_OUTPUT_INT, {0x26c}, {0x4}}, \ + {"**runtime*******", ELEM_OUTPUT_STR, {0x00}, {0x8}}, \ + {"RT_BBX_MAGIC_NUM", ELEM_OUTPUT_INT, {0x7940}, {0x4}}, \ + {"RT_BBX_SIZE", ELEM_OUTPUT_INT, {0x7948}, {0x4}}, \ + {"TSENSOR_A55", ELEM_OUTPUT_INT, {0x7960}, {0x1}}, \ + {"TSENSOR_PERI", ELEM_OUTPUT_INT, {0x7961}, {0x1}}, \ + {"TSENSOR_AIC0", ELEM_OUTPUT_INT, {0x7962}, {0x1}}, \ + {"TSENSOR_AIC1", ELEM_OUTPUT_INT, {0x7963}, {0x1}}, \ + {"DDR_T_GRADE", ELEM_OUTPUT_INT, {0x7964}, {0x1}}, \ + {"EDP_SCALE_0", ELEM_OUTPUT_INT, {0x7965}, {0x1}}, \ + {"EDP_SCALE_1", ELEM_OUTPUT_INT, {0x7966}, {0x1}}, \ + {"TMP_STATUS", ELEM_OUTPUT_INT, {0x7967}, {0x1}}, \ + {"TMP_CTRL_ST", ELEM_OUTPUT_INT, {0x7968}, {0x1}}, \ + {"AIC_FREQ_ST", ELEM_OUTPUT_INT, {0x7969}, {0x1}}, \ + {"A55_FREQ_ST", ELEM_OUTPUT_INT, {0x796A}, {0x1}}, \ + {"AIC_NUM_ST", ELEM_OUTPUT_INT, {0x796B}, {0x1}}, \ + {"TMP_RST", ELEM_OUTPUT_INT, {0x796C}, {0x1}}, \ + {"TMP_HIGH", ELEM_OUTPUT_INT, {0x796D}, {0x1}}, \ + {"TMP_NOR", ELEM_OUTPUT_INT, {0x796E}, {0x1}}, \ + {"TMP_PERIOD", ELEM_OUTPUT_INT, {0x796F}, {0x1}}, \ + {"T_RST_STATUS", ELEM_OUTPUT_INT, {0x797D}, {0x1}}, \ + {"T_ERR_TSENSOR", ELEM_OUTPUT_INT, {0x797E}, {0x1}}, \ + {"T_ERR_EFUSE", ELEM_OUTPUT_INT, {0x797F}, {0x1}}, \ + {"**NV*******", ELEM_OUTPUT_STR, {0x00}, {0x8}}, \ + {"LPNV_MAGIC",ELEM_OUTPUT_INT, {0x7980}, {0x4}}, \ + {"LP_LPALL_NV",ELEM_OUTPUT_INT, {0x7984}, {0x1}}, \ + {"LP_AVS_NV",ELEM_OUTPUT_INT, {0x7985}, {0x1}}, \ + {"LP_SVFD_NV",ELEM_OUTPUT_INT, {0x7986}, {0x1}}, \ + {"LP_PLLMOD_SEL_NV",ELEM_OUTPUT_INT, {0x7987}, {0x1}}, \ + {"LP_DEEP_SLEEP_NV",ELEM_OUTPUT_INT, {0x7988}, {0x1}}, \ + {"LP_HIMNTN_NV",ELEM_OUTPUT_INT, {0x7989}, {0x1}}, \ + {"LP_LOGBUF_CTRL",ELEM_OUTPUT_INT, {0x798A}, {0x1}}, \ + {"LP_WDT_RST_NV",ELEM_OUTPUT_INT, {0x798B}, {0x1}}, \ + {"LP_RDRLOG_CTRL",ELEM_OUTPUT_INT, {0x798C}, {0x1}}, \ + {"THERMAL_EN_NV", ELEM_OUTPUT_INT, {0x798D}, {0x1}}, \ + {"TMP_HWRST_EN_NV", ELEM_OUTPUT_INT, {0x798E}, {0x1}}, \ + {"TMP_GCTRL_EN_NV", ELEM_OUTPUT_INT, {0x798F}, {0x1}}, \ + {"TMP_GCTRL_SCALE_NV", ELEM_OUTPUT_INT, {0x7990}, {0x1}}, \ + {"TMP_RST_NV", ELEM_OUTPUT_INT, {0x7991}, {0x1}}, \ + {"TMP_HIGH_NV", ELEM_OUTPUT_INT, {0x7992}, {0x1}}, \ + {"TMP_NOR_NV", ELEM_OUTPUT_INT, {0x7993}, {0x1}}, \ + {"TMP_PERIOD_NV", ELEM_OUTPUT_INT, {0x7994}, {0x1}}, \ + {"DDR_ALL_NV", ELEM_OUTPUT_INT, {0x7995}, {0x1}}, \ + {"DDR_THERMAL_NV", ELEM_OUTPUT_INT, {0x7996}, {0x1}}, \ + {"DDR_EXMBIST_NV", ELEM_OUTPUT_INT, {0x7997}, {0x1}}, \ + {"DDR_SWAP_NV", ELEM_OUTPUT_INT, {0x7998}, {0x1}}, \ + {"DDR_IECC_NV", ELEM_OUTPUT_INT, {0x7999}, {0x1}}, \ + {"DDR_PASR_NV", ELEM_OUTPUT_INT, {0x799A}, {0x1}}, \ + {"DDR_UDIS_NV", ELEM_OUTPUT_INT, {0x799B}, {0x1}}, \ + {"DDR_TDIS_NV", ELEM_OUTPUT_INT, {0x799C}, {0x1}}, \ + {"DDR_FREQ_NV", ELEM_OUTPUT_INT, {0x799D}, {0x1}}, \ + {"**DDR_RUNTIME***", ELEM_OUTPUT_STR, {0x00}, {0x8}}, \ + {"DDR_STATUS", ELEM_OUTPUT_INT, {0x79C0}, {0x2}}, \ + {"INITFREQ", ELEM_OUTPUT_INT, {0x79C2}, {0x1}}, \ + {"DDR_TMP_PERIOD", ELEM_OUTPUT_INT, {0x79C3}, {0x1}}, \ + {"DDR_PD_PRD", ELEM_OUTPUT_INT, {0x79C4}, {0x2}}, \ + {"DDR_ASREF_PRD", ELEM_OUTPUT_INT, {0x79C6}, {0x2}}, \ + {"DDR_FREQ_LOAD", ELEM_OUTPUT_HEX, {0x79C8}, {0x10}}, \ + {"DDR_MIN", ELEM_OUTPUT_INT, {0x79D8}, {0x1}}, \ + {"DDR_MAX", ELEM_OUTPUT_INT, {0x79D9}, {0x1}}, \ + {"DDR_LAST", ELEM_OUTPUT_INT, {0x79DA}, {0x1}}, \ + {"DDR_CURRENT", ELEM_OUTPUT_INT, {0x79DB}, {0x1}}, \ + {"DDR_TARGET", ELEM_OUTPUT_INT, {0x79DC}, {0x1}}, \ + {"DDR_DN_LIMIT", ELEM_OUTPUT_INT, {0x79DD}, {0x1}}, \ + {"DDR_UP_LIMIT", ELEM_OUTPUT_INT, {0x79DE}, {0x1}}, \ + {"DDR_PLL", ELEM_OUTPUT_INT, {0x79DF}, {0x1}}, \ + {"DDR_LAST_PLL", ELEM_OUTPUT_INT, {0x79E0}, {0x4}}, \ + {"CMD_CNT", ELEM_OUTPUT_HEX, {0x79E4}, {0x10}}, \ + {"DATA_CNT", ELEM_OUTPUT_INT, {0x79F4}, {0x4}}, \ + {"**AVS*******", ELEM_OUTPUT_STR, {0x00}, {0x8}}, \ + {"AICORE0_HIGHTEMP_VOLT", ELEM_OUTPUT_INT, {0x7AE0}, {0x4}}, \ + {"AICORE1_HIGHTEMP_VOLT", ELEM_OUTPUT_INT, {0x7AE4}, {0x4}}, \ + {"CPU_HIGHTEMP_VOLT", ELEM_OUTPUT_INT, {0x7AE8}, {0x4}}, \ + {"AIMEMORY_HIGHTEMP_VOLT", ELEM_OUTPUT_INT, {0x7AEC}, {0x4}}, \ + {"PERI_HIGHTEMP_VOLT", ELEM_OUTPUT_INT, {0x7AF0}, {0x4}}, \ + {"AICORE0_CUR_VOLT", ELEM_OUTPUT_INT, {0x7AF4}, {0x4}}, \ + {"AICORE1_CUR_VOLT", ELEM_OUTPUT_INT, {0x7AF8}, {0x4}}, \ + {"CPU_CUR_VOLT", ELEM_OUTPUT_INT, {0x7AFC}, {0x4}}, \ + {"AIMEMORY_CUR_VOLT", ELEM_OUTPUT_INT, {0x7B00}, {0x4}}, \ + {"PERI_CUR_VOLT", ELEM_OUTPUT_INT, {0x7B04}, {0x4}}, \ + {"AICORE0_SVFD_VOLT", ELEM_OUTPUT_INT, {0x7B08}, {0x4}}, \ + {"AICORE1_SVFD_VOLT", ELEM_OUTPUT_INT, {0x7B0C}, {0x4}}, \ + {"AICORE0_SVFD_CPM", ELEM_OUTPUT_INT, {0x7B10}, {0x2}}, \ + {"AICORE1_SVFD_CPM", ELEM_OUTPUT_INT, {0x7B12}, {0x2}}, \ + {"AICORE0_NOTIFY_ST", ELEM_OUTPUT_INT, {0x7B14}, {0x1}}, \ + {"AICORE1_NOTIFY_ST", ELEM_OUTPUT_INT, {0x7B15}, {0x1}}, \ + {"CPU_NOTIFY_ST", ELEM_OUTPUT_INT, {0x7B16}, {0x1}}, \ + {"AIMEMORY_NOTIFY_ST", ELEM_OUTPUT_INT, {0x7B17}, {0x1}}, \ + {"PERI_NOTIFY_ST", ELEM_OUTPUT_INT, {0x7B18}, {0x1}}, \ + {"AICORE0_TZONE", ELEM_OUTPUT_INT, {0x7B19}, {0x1}}, \ + {"AICORE1_TZONE", ELEM_OUTPUT_INT, {0x7B1A}, {0x1}}, \ + {"CPU_TZONE", ELEM_OUTPUT_INT, {0x7B1B}, {0x1}}, \ + {"AIMEMORY_TZONE", ELEM_OUTPUT_INT, {0x7B1C}, {0x1}}, \ + {"PERI_TZONE", ELEM_OUTPUT_INT, {0x7B1D}, {0x1}}, \ + {"VOLT_RISE_TEMP", ELEM_OUTPUT_INT, {0x7B1E}, {0x1}}, \ + {"VOLT_DECREASE_TEMP", ELEM_OUTPUT_INT, {0x7B1F}, {0x1}}, \ + {"**efuse*******", ELEM_OUTPUT_STR, {0x00}, {0x8}}, \ + {"TSENSOR_EFUSE", ELEM_OUTPUT_INT, {0x7B28}, {0x8}}, \ +} + +#define DATA_MODEL_LPM3_SRAM MODEL_VECTOR(LPM3_SRAM) = { \ + {"magic_begin", ELEM_OUTPUT_INT, {0x5000}, {0x4}}, \ + {"slice_time", ELEM_OUTPUT_INT, {0x5004}, {0x4}}, \ + {"mod_reason", ELEM_OUTPUT_INT, {0x5008}, {0x4}}, \ + {"ddr_freq_id", ELEM_OUTPUT_INT, {0x500C}, {0x4}}, \ + {"uce_exc", ELEM_OUTPUT_INT, {0x5010}, {0x4}}, \ + {"reserved0-2", ELEM_OUTPUT_INT, {0x5014}, {0x4}}, \ + {"reg_save_addr", ELEM_OUTPUT_INT, {0x5020}, {0x4}}, \ + {"DDRRETENTION", ELEM_OUTPUT_INT, {0x5024}, {0x4}}, \ + {"DDRRETENTIONCLR", ELEM_OUTPUT_INT, {0x5028}, {0x4}}, \ + {"DRAMRETENTION", ELEM_OUTPUT_INT, {0x502C}, {0x4}}, \ + {"DDRC_0_3_RESET", ELEM_OUTPUT_INT, {0x5030}, {0x4}}, \ + {"DDRC_4_7_RESET", ELEM_OUTPUT_INT, {0x5034}, {0x4}}, \ + {"DDRC_0_3_PACK_RESET", ELEM_OUTPUT_INT, {0x5038}, {0x4}}, \ + {"DDRC_4_7_PACK_RESET", ELEM_OUTPUT_INT, {0x503C}, {0x4}}, \ + {"DDRC_EXMBIST0_REGS_RESET",ELEM_OUTPUT_INT, {0x5040}, {0x4}}, \ + {"DDRC_EXMBIST1_REGS_RESET",ELEM_OUTPUT_INT, {0x5044}, {0x4}}, \ + {"DDRC_0_3_PACK_RESET", ELEM_OUTPUT_INT, {0x5048}, {0x4}}, \ + {"DDRC_4_7_PACK_RESET", ELEM_OUTPUT_INT, {0x504C}, {0x4}}, \ + {"SCTRL_DDRC_0_3_AO_RST", ELEM_OUTPUT_INT, {0x5050}, {0x4}}, \ + {"SCTRL_DDRC_4_7_AO_RST", ELEM_OUTPUT_INT, {0x5054}, {0x4}}, \ + {"PPLLBYPASS0", ELEM_OUTPUT_INT, {0x5058}, {0x4}}, \ + {"PPLLBYPASS1", ELEM_OUTPUT_INT, {0x505C}, {0x4}}, \ + {"PPLL3FCTRL", ELEM_OUTPUT_INT, {0x5060}, {0x4}}, \ + {"PPLL3FCTRL_FRAC", ELEM_OUTPUT_INT, {0x5064}, {0x4}}, \ + {"PPLL4FCTRL", ELEM_OUTPUT_INT, {0x5068}, {0x4}}, \ + {"PPLL4FCTRL_FRAC", ELEM_OUTPUT_INT, {0x506C}, {0x4}}, \ + {"PPLLOCKSTATUS", ELEM_OUTPUT_INT, {0x5070}, {0x4}}, \ + {"DDRC_0_3_BYPASS_MODE", ELEM_OUTPUT_INT, {0x5074}, {0x4}}, \ + {"DDRC_4_7_BYPASS_MODE", ELEM_OUTPUT_INT, {0x5078}, {0x4}}, \ + {"PLL_PROF_CFG1", ELEM_OUTPUT_INT, {0x507C}, {0x4}}, \ +} + +/* TEE module */ +#define DATA_MODEL_TEE MODEL_VECTOR(TEE) = { \ + {"tee info", ELEM_OUTPUT_STR_NL, {0x0}, {0x10000}}, \ +} + +/* TF module */ +#define DATA_MODEL_TF MODEL_VECTOR(TF) = { \ + {"tf info", ELEM_OUTPUT_STR_NL, {0x8}, {0xFFF8}}, \ +} + +/* DVPP module */ +#define DATA_MODEL_DVPP MODEL_VECTOR(DVPP) = { \ + {"dvpp info", ELEM_OUTPUT_STR_NL, {0x0}, {0x10000}}, \ +} + +/* DRIVE module */ +#define DATA_MODEL_DRIVER MODEL_VECTOR(DRIVER) = { \ + {"driver info", ELEM_OUTPUT_STR_NL, {0x0}, {0x20000}}, \ +} + +/* TS module */ +#define DATA_MODEL_TS MODEL_VECTOR(TS) = { \ + {"ts info", ELEM_OUTPUT_CHAR, {0x0}, {0x100000}}, \ +} + +/* TS module, start */ +#define DATA_MODEL_TS_START MODEL_VECTOR(TS_START) = { \ + {"ts start info", ELEM_OUTPUT_STR_NL, {0x0}, {0xC800}}, \ +} + +/* AP module, early print */ +#define DATA_MODEL_AP_EPRINT MODEL_VECTOR(AP_EPRINT) = { \ + {"early print info", ELEM_OUTPUT_STR_NL, {0x0}, {0x400}}, \ +} + +/* BIOS module */ +#define DATA_MODEL_BIOS MODEL_VECTOR(BIOS) = { \ + {"bios info", ELEM_OUTPUT_STR_NL, {0x0}, {0x50000}}, \ +} + +/* BIOS module, sram */ +#define DATA_MODEL_BIOS_SRAM MODEL_VECTOR(BIOS_SRAM) = { \ + {"LPM3_WAKE_UP_STATUS", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"DEBUG_TIME_POWERUP_DONE", ELEM_OUTPUT_INT, {0x28}, {0x4}}, \ + {"DEBUG_TIME_PERSTHIGH_DONE", ELEM_OUTPUT_INT, {0x2C}, {0x4}}, \ + {"DEBUG_TIME_PCIEPHY_DONE", ELEM_OUTPUT_INT, {0x30}, {0x4}}, \ + {"DEBUG_TIME_PHY_FIRMWARE_DONE", ELEM_OUTPUT_INT, {0x34}, {0x4}}, \ + {"DEBUG_TIME_PCIECTRL_DONE", ELEM_OUTPUT_INT, {0x38}, {0x4}}, \ + {"DEBUG_TIME_IMG_DONE", ELEM_OUTPUT_INT, {0x3C}, {0x4}}, \ + {"DEBUG_TIME_SECURE_DONE", ELEM_OUTPUT_INT, {0x40}, {0x4}}, \ + {"DEBUG_VERSION_ADDR", ELEM_OUTPUT_HEX, {0x50}, {0x10}}, \ + {"XLOADER_RESET_REG", ELEM_OUTPUT_INT, {0x200}, {0x4}}, \ + {"XLOADER_KEY_POINT", ELEM_OUTPUT_INT, {0x204}, {0x4}}, \ + {"XLOADER_TIME_POWERUP_DONE", ELEM_OUTPUT_INT, {0x228}, {0x4}}, \ + {"XLOADER_TIME_PERSTHIGH_DONE", ELEM_OUTPUT_INT, {0x22C}, {0x4}}, \ + {"XLOADER_TIME_PCIEPHY_DONE", ELEM_OUTPUT_INT, {0x230}, {0x4}}, \ + {"XLOADER_TIME_PHY_FIRMWARE_DONE", ELEM_OUTPUT_INT, {0x234}, {0x4}}, \ + {"XLOADER_TIME_PCIECTRL_DONE", ELEM_OUTPUT_INT, {0x238}, {0x4}}, \ + {"XLOADER_TIME_PCIE_DETECT_DONE", ELEM_OUTPUT_INT, {0x23C}, {0x4}}, \ + {"UEFI_LAST_KEYPOINT", ELEM_OUTPUT_INT, {0x320}, {0x4}}, \ + {"SD_LOAD_FILE_STATUS", ELEM_OUTPUT_INT, {0x350}, {0x4}}, \ +} + + +/* DDR_SRAM module */ +#define DATA_MODEL_DDR_SRAM MODEL_VECTOR(DDR_SRAM) = {\ + {"magic_begin", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"slice_time", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"mod_reason", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"ddr_freq_id", ELEM_OUTPUT_INT, {0xC}, {0x4}}, \ + {"ddr_status", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"uce_exc", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"SC_TSENSOR_INFO_ADDR", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"SC_TSENSOR_AICORE_LIMIT", ELEM_OUTPUT_INT, {0x1C}, {0x4}}, \ + {"SC_TSENSOR_MAX_TEMP", ELEM_OUTPUT_INT, {0x20}, {0x4}}, \ + {"channel_mask", ELEM_OUTPUT_INT, {0x24}, {0x4}}, \ + {"channel_num", ELEM_OUTPUT_INT, {0x28}, {0x4}}, \ + {"rank_num", ELEM_OUTPUT_INT, {0x2C}, {0x4}}, \ + {"ddr_size", ELEM_OUTPUT_INT, {0x30}, {0x4}}, \ + {"manufactery_id", ELEM_OUTPUT_INT, {0x34}, {0x4}}, \ + {"iecc_cerr_thresh[0]", ELEM_OUTPUT_INT, {0x38}, {0x4}}, \ + {"iecc_cerr_thresh[1]", ELEM_OUTPUT_INT, {0x3C}, {0x4}}, \ + {"iecc_cerr_thresh[2]", ELEM_OUTPUT_INT, {0x40}, {0x4}}, \ + {"iecc_cerr_thresh[3]", ELEM_OUTPUT_INT, {0x44}, {0x4}}, \ + {"iecc_cerr_thresh[4]", ELEM_OUTPUT_INT, {0x48}, {0x4}}, \ + {"iecc_cerr_thresh[5]", ELEM_OUTPUT_INT, {0x4C}, {0x4}}, \ + {"iecc_cerr_thresh[6]", ELEM_OUTPUT_INT, {0x50}, {0x4}}, \ + {"iecc_cerr_thresh[7]", ELEM_OUTPUT_INT, {0x54}, {0x4}}, \ + {"iecc_ctrl[0]", ELEM_OUTPUT_INT, {0x58}, {0x4}}, \ + {"iecc_ctrl[1]", ELEM_OUTPUT_INT, {0x5C}, {0x4}}, \ + {"iecc_ctrl[2]", ELEM_OUTPUT_INT, {0x60}, {0x4}}, \ + {"iecc_ctrl[3]", ELEM_OUTPUT_INT, {0x64}, {0x4}}, \ + {"iecc_ctrl[4]", ELEM_OUTPUT_INT, {0x68}, {0x4}}, \ + {"iecc_ctrl[5]", ELEM_OUTPUT_INT, {0x6C}, {0x4}}, \ + {"iecc_ctrl[6]", ELEM_OUTPUT_INT, {0x70}, {0x4}}, \ + {"iecc_ctrl[7]", ELEM_OUTPUT_INT, {0x74}, {0x4}}, \ + {"iecc_cerr_cnt[0]", ELEM_OUTPUT_INT, {0x78}, {0x4}}, \ + {"iecc_cerr_cnt[1]", ELEM_OUTPUT_INT, {0x7C}, {0x4}}, \ + {"iecc_cerr_cnt[2]", ELEM_OUTPUT_INT, {0x80}, {0x4}}, \ + {"iecc_cerr_cnt[3]", ELEM_OUTPUT_INT, {0x84}, {0x4}}, \ + {"iecc_cerr_cnt[4]", ELEM_OUTPUT_INT, {0x88}, {0x4}}, \ + {"iecc_cerr_cnt[5]", ELEM_OUTPUT_INT, {0x8C}, {0x4}}, \ + {"iecc_cerr_cnt[6]", ELEM_OUTPUT_INT, {0x90}, {0x4}}, \ + {"iecc_cerr_cnt[7]", ELEM_OUTPUT_INT, {0x94}, {0x4}}, \ + {"iecc_uerr_cnt[0]", ELEM_OUTPUT_INT, {0x98}, {0x4}}, \ + {"iecc_uerr_cnt[1]", ELEM_OUTPUT_INT, {0x9C}, {0x4}}, \ + {"iecc_uerr_cnt[2]", ELEM_OUTPUT_INT, {0xA0}, {0x4}}, \ + {"iecc_uerr_cnt[3]", ELEM_OUTPUT_INT, {0xA4}, {0x4}}, \ + {"iecc_uerr_cnt[4]", ELEM_OUTPUT_INT, {0xA8}, {0x4}}, \ + {"iecc_uerr_cnt[5]", ELEM_OUTPUT_INT, {0xAC}, {0x4}}, \ + {"iecc_uerr_cnt[6]", ELEM_OUTPUT_INT, {0xB0}, {0x4}}, \ + {"iecc_uerr_cnt[7]", ELEM_OUTPUT_INT, {0xB4}, {0x4}}, \ + {"magic_byte", ELEM_OUTPUT_INT, {0x100}, {0x1}}, \ + {"err_max", ELEM_OUTPUT_INT, {0x104}, {0x1}}, \ + {"irq_count", ELEM_OUTPUT_INT, {0x108}, {0x1}}, \ + {"index", ELEM_OUTPUT_INT, {0x10C}, {0x1}}, \ + {"rate[0].time", ELEM_OUTPUT_INT, {0x100}, {0x4}}, \ + {"rate[0].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x104}, {0x1}}, \ + {"rate[0].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x105}, {0x1}}, \ + {"rate[0].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x106}, {0x1}}, \ + {"rate[0].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x107}, {0x1}}, \ + {"rate[0].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x108}, {0x1}}, \ + {"rate[0].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x109}, {0x1}}, \ + {"rate[0].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x10A}, {0x1}}, \ + {"rate[0].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x10B}, {0x1}}, \ + {"rate[0].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x10C}, {0x1}}, \ + {"rate[0].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x10D}, {0x1}}, \ + {"rate[0].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x10E}, {0x1}}, \ + {"rate[0].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x10F}, {0x1}}, \ + {"rate[0].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x110}, {0x1}}, \ + {"rate[0].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x111}, {0x1}}, \ + {"rate[0].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x112}, {0x1}}, \ + {"rate[0].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x113}, {0x1}}, \ + {"rate[1].time", ELEM_OUTPUT_INT, {0x114}, {0x4}}, \ + {"rate[1].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x118}, {0x1}}, \ + {"rate[1].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x119}, {0x1}}, \ + {"rate[1].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x11A}, {0x1}}, \ + {"rate[1].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x11B}, {0x1}}, \ + {"rate[1].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x11C}, {0x1}}, \ + {"rate[1].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x11D}, {0x1}}, \ + {"rate[1].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x11E}, {0x1}}, \ + {"rate[1].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x11F}, {0x1}}, \ + {"rate[1].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x120}, {0x1}}, \ + {"rate[1].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x121}, {0x1}}, \ + {"rate[1].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x122}, {0x1}}, \ + {"rate[1].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x123}, {0x1}}, \ + {"rate[1].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x124}, {0x1}}, \ + {"rate[1].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x125}, {0x1}}, \ + {"rate[1].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x126}, {0x1}}, \ + {"rate[1].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x127}, {0x1}}, \ + {"rate[2].time", ELEM_OUTPUT_INT, {0x128}, {0x4}}, \ + {"rate[2].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x12C}, {0x1}}, \ + {"rate[2].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x12D}, {0x1}}, \ + {"rate[2].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x12E}, {0x1}}, \ + {"rate[2].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x12F}, {0x1}}, \ + {"rate[2].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x130}, {0x1}}, \ + {"rate[2].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x131}, {0x1}}, \ + {"rate[2].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x132}, {0x1}}, \ + {"rate[2].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x133}, {0x1}}, \ + {"rate[2].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x134}, {0x1}}, \ + {"rate[2].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x135}, {0x1}}, \ + {"rate[2].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x136}, {0x1}}, \ + {"rate[2].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x137}, {0x1}}, \ + {"rate[2].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x138}, {0x1}}, \ + {"rate[2].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x139}, {0x1}}, \ + {"rate[2].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x13A}, {0x1}}, \ + {"rate[2].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x13B}, {0x1}}, \ + {"rate[3].time", ELEM_OUTPUT_INT, {0x13C}, {0x4}}, \ + {"rate[3].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x140}, {0x1}}, \ + {"rate[3].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x141}, {0x1}}, \ + {"rate[3].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x142}, {0x1}}, \ + {"rate[3].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x143}, {0x1}}, \ + {"rate[3].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x144}, {0x1}}, \ + {"rate[3].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x145}, {0x1}}, \ + {"rate[3].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x146}, {0x1}}, \ + {"rate[3].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x147}, {0x1}}, \ + {"rate[3].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x148}, {0x1}}, \ + {"rate[3].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x149}, {0x1}}, \ + {"rate[3].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x14A}, {0x1}}, \ + {"rate[3].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x14B}, {0x1}}, \ + {"rate[3].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x14C}, {0x1}}, \ + {"rate[3].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x14D}, {0x1}}, \ + {"rate[3].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x14E}, {0x1}}, \ + {"rate[3].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x14F}, {0x1}}, \ + {"rate[4].time", ELEM_OUTPUT_INT, {0x150}, {0x4}}, \ + {"rate[4].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x154}, {0x1}}, \ + {"rate[4].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x155}, {0x1}}, \ + {"rate[4].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x156}, {0x1}}, \ + {"rate[4].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x157}, {0x1}}, \ + {"rate[4].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x158}, {0x1}}, \ + {"rate[4].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x159}, {0x1}}, \ + {"rate[4].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x15A}, {0x1}}, \ + {"rate[4].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x15B}, {0x1}}, \ + {"rate[4].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x15C}, {0x1}}, \ + {"rate[4].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x15D}, {0x1}}, \ + {"rate[4].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x15E}, {0x1}}, \ + {"rate[4].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x15F}, {0x1}}, \ + {"rate[4].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x160}, {0x1}}, \ + {"rate[4].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x161}, {0x1}}, \ + {"rate[4].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x162}, {0x1}}, \ + {"rate[4].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x163}, {0x1}}, \ + {"rate[5].time", ELEM_OUTPUT_INT, {0x164}, {0x4}}, \ + {"rate[5].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x168}, {0x1}}, \ + {"rate[5].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x169}, {0x1}}, \ + {"rate[5].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x16A}, {0x1}}, \ + {"rate[5].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x16B}, {0x1}}, \ + {"rate[5].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x16C}, {0x1}}, \ + {"rate[5].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x16D}, {0x1}}, \ + {"rate[5].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x16E}, {0x1}}, \ + {"rate[5].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x16F}, {0x1}}, \ + {"rate[5].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x170}, {0x1}}, \ + {"rate[5].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x171}, {0x1}}, \ + {"rate[5].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x172}, {0x1}}, \ + {"rate[5].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x173}, {0x1}}, \ + {"rate[5].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x174}, {0x1}}, \ + {"rate[5].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x175}, {0x1}}, \ + {"rate[5].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x176}, {0x1}}, \ + {"rate[5].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x177}, {0x1}}, \ + {"rate[6].time", ELEM_OUTPUT_INT, {0x178}, {0x4}}, \ + {"rate[6].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x17C}, {0x1}}, \ + {"rate[6].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x17D}, {0x1}}, \ + {"rate[6].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x17E}, {0x1}}, \ + {"rate[6].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x17F}, {0x1}}, \ + {"rate[6].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x180}, {0x1}}, \ + {"rate[6].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x181}, {0x1}}, \ + {"rate[6].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x182}, {0x1}}, \ + {"rate[6].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x183}, {0x1}}, \ + {"rate[6].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x184}, {0x1}}, \ + {"rate[6].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x185}, {0x1}}, \ + {"rate[6].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x186}, {0x1}}, \ + {"rate[6].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x187}, {0x1}}, \ + {"rate[6].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x188}, {0x1}}, \ + {"rate[6].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x189}, {0x1}}, \ + {"rate[6].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x18A}, {0x1}}, \ + {"rate[6].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x18B}, {0x1}}, \ + {"rate[7].time", ELEM_OUTPUT_INT, {0x18C}, {0x4}}, \ + {"rate[7].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x190}, {0x1}}, \ + {"rate[7].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x191}, {0x1}}, \ + {"rate[7].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x192}, {0x1}}, \ + {"rate[7].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x193}, {0x1}}, \ + {"rate[7].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x194}, {0x1}}, \ + {"rate[7].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x195}, {0x1}}, \ + {"rate[7].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x196}, {0x1}}, \ + {"rate[7].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x197}, {0x1}}, \ + {"rate[7].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x198}, {0x1}}, \ + {"rate[7].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x199}, {0x1}}, \ + {"rate[7].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x19A}, {0x1}}, \ + {"rate[7].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x19B}, {0x1}}, \ + {"rate[7].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x19C}, {0x1}}, \ + {"rate[7].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x19D}, {0x1}}, \ + {"rate[7].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x19E}, {0x1}}, \ + {"rate[7].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x1AF}, {0x1}}, \ + {"rate[8].time", ELEM_OUTPUT_INT, {0x1B0}, {0x4}}, \ + {"rate[8].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x1B4}, {0x1}}, \ + {"rate[8].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x1B5}, {0x1}}, \ + {"rate[8].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x1B6}, {0x1}}, \ + {"rate[8].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x1B7}, {0x1}}, \ + {"rate[8].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x1B8}, {0x1}}, \ + {"rate[8].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x1B9}, {0x1}}, \ + {"rate[8].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x1BA}, {0x1}}, \ + {"rate[8].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x1BB}, {0x1}}, \ + {"rate[8].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x1BC}, {0x1}}, \ + {"rate[8].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x1BD}, {0x1}}, \ + {"rate[8].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x1BE}, {0x1}}, \ + {"rate[8].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x1BF}, {0x1}}, \ + {"rate[8].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x1C0}, {0x1}}, \ + {"rate[8].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x1C1}, {0x1}}, \ + {"rate[8].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x1C2}, {0x1}}, \ + {"rate[8].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x1C3}, {0x1}}, \ + {"rate[9].time", ELEM_OUTPUT_INT, {0x1C4}, {0x4}}, \ + {"rate[9].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x1C8}, {0x1}}, \ + {"rate[9].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x1C9}, {0x1}}, \ + {"rate[9].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x1CA}, {0x1}}, \ + {"rate[9].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x1CB}, {0x1}}, \ + {"rate[9].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x1CC}, {0x1}}, \ + {"rate[9].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x1CD}, {0x1}}, \ + {"rate[9].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x1CE}, {0x1}}, \ + {"rate[9].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x1CF}, {0x1}}, \ + {"rate[9].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x1D0}, {0x1}}, \ + {"rate[9].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x1D1}, {0x1}}, \ + {"rate[9].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x1D2}, {0x1}}, \ + {"rate[9].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x1D3}, {0x1}}, \ + {"rate[9].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x1D4}, {0x1}}, \ + {"rate[9].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x1D5}, {0x1}}, \ + {"rate[9].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x1D6}, {0x1}}, \ + {"rate[9].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x1D7}, {0x1}}, \ + {"mrr4_0.count", ELEM_OUTPUT_INT, {0x1D8}, {0x1}}, \ + {"mrr4_0.happen", ELEM_OUTPUT_INT, {0x1D9}, {0x1}}, \ + {"shake_count.count", ELEM_OUTPUT_INT, {0x1DA}, {0x1}}, \ + {"shake_count.happen", ELEM_OUTPUT_INT, {0x1DB}, {0x1}}, \ + {"sfc_record0", ELEM_OUTPUT_INT, {0x1DC}, {0x1}}, \ + {"sfc_record1", ELEM_OUTPUT_INT, {0x1DD}, {0x1}}, \ + {"sfc_mr5", ELEM_OUTPUT_INT, {0x1DE}, {0x1}}, \ +} + +// bbox kbox info +#define DATA_MODEL_BBOX_KBOX MODEL_VECTOR(BBOX_KBOX) = { \ + {"CONSOLE START", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"[console info]", ELEM_OUTPUT_STR_NL, {0x0}, {0x10000}}, \ + {"CONSOLE END", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"MESSAGE START", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"[message info]", ELEM_OUTPUT_STR_NL, {0x10000}, {0x40000}}, \ + {"MESSAGE END", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"if panic", ELEM_CTRL_COMPARE, {0x50000}, {0x1}}, \ + {"", ELEM_CTRL_CMP_JUMP_EQ, {0x0}, {0x3}}, \ + {"PANIC START", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"[panic info]", ELEM_OUTPUT_STR_NL, {0x50000}, {0x8000}}, \ + {"PANIC END", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"if emerge", ELEM_CTRL_COMPARE, {0x58000}, {0x1}}, \ + {"", ELEM_CTRL_CMP_JUMP_EQ, {0x0}, {0x3}}, \ + {"EMERGE START", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"[emerge info]", ELEM_OUTPUT_STR_NL, {0x58000}, {0x8000}}, \ + {"EMERGE END", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"if die", ELEM_CTRL_COMPARE, {0x60000}, {0x1}}, \ + {"", ELEM_CTRL_CMP_JUMP_EQ, {0x0}, {0x3}}, \ + {"DIE START", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"[die info]", ELEM_OUTPUT_STR_NL, {0x60000}, {0x20000}}, \ + {"DIE END", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ +} + +/** + * the whole space is 512k, used for histroy data record + * the struct distribution is as follows: + * +-------------------+ + * | head info(1k) | region: area: module block: + * +-------------------+ +--------------------+ +-----------------+ +-----------------+ + * | boot region |---->| first area |---->| module block |---->| block head | + * +-------------------+ +--------------------+ +-----------------+ +-----------------+ + * | run region | | second area | | module block | | block data | + * +-------------------+ +--------------------+ +-----------------+ +-----------------+ + * | reserved | | ...... | | ...... | + * +-------------------+ +--------------------+ +-----------------+ + */ +#define DATA_MODEL_HDR_BOOT_BIOS MODEL_VECTOR(HDR_BOOT_BIOS) = { \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"module id", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"if", ELEM_CTRL_COMPARE, {0xC}, {0x4}}, \ + {"is used", ELEM_CTRL_CMP_JUMP_NE, {0x1}, {0xFF}}, \ + {"err code", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"reason", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"hot reset index", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"bsbc point", ELEM_OUTPUT_INT, {0x1C}, {0x4}}, \ + {"bsbc exc point", ELEM_OUTPUT_INT, {0x20}, {0x4}}, \ + {"hboot1 point", ELEM_OUTPUT_INT, {0x24}, {0x4}}, \ + {"hboot1 exc point", ELEM_OUTPUT_INT, {0x28}, {0x4}}, \ + {"hboot2 point", ELEM_OUTPUT_INT, {0x2C}, {0x4}}, \ + {"hboot2 exc point", ELEM_OUTPUT_INT, {0x30}, {0x4}}, \ + {"[BIOS info]", ELEM_OUTPUT_STR_NL, {0x480}, {0x2780}}, \ +} + +#define DATA_MODEL_HDR_BOOT_DDR MODEL_VECTOR(HDR_BOOT_DDR) = { \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"module id", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"if", ELEM_CTRL_COMPARE, {0xC}, {0x4}}, \ + {"is used", ELEM_CTRL_CMP_JUMP_NE, {0x1}, {0xFF}}, \ + {"err code", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"reason", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"hot reset index", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"magic_begin", ELEM_OUTPUT_INT, {0x1C}, {0x4}}, \ + {"init_keypoint", ELEM_OUTPUT_INT, {0x20}, {0x4}}, \ + {"ldo8_vol", ELEM_OUTPUT_INT, {0x24}, {0x4}}, \ + {"buck3_status", ELEM_OUTPUT_INT, {0x28}, {0x4}}, \ + {"buck3_vol", ELEM_OUTPUT_INT, {0x2C}, {0x4}}, \ + {"buck5_status", ELEM_OUTPUT_INT, {0x30}, {0x4}}, \ + {"buck5_vol", ELEM_OUTPUT_INT, {0x34}, {0x4}}, \ + {"wr_test_result", ELEM_OUTPUT_INT, {0x38}, {0x4}}, \ + {"rint_status[0]", ELEM_OUTPUT_INT, {0x3C}, {0x4}}, \ + {"rint_status[1]", ELEM_OUTPUT_INT, {0x40}, {0x4}}, \ + {"rint_status[2]", ELEM_OUTPUT_INT, {0x44}, {0x4}}, \ + {"rint_status[3]", ELEM_OUTPUT_INT, {0x48}, {0x4}}, \ + {"rint_status[4]", ELEM_OUTPUT_INT, {0x4C}, {0x4}}, \ + {"rint_status[5]", ELEM_OUTPUT_INT, {0x50}, {0x4}}, \ + {"rint_status[6]", ELEM_OUTPUT_INT, {0x54}, {0x4}}, \ + {"rint_status[7]", ELEM_OUTPUT_INT, {0x58}, {0x4}}, \ + {"SOC_SCTRL_DDRRETENTION_ADDR", ELEM_OUTPUT_INT, {0x5C}, {0x4}}, \ + {"SOC_SCTRL_DDRRETENTIONCLR_ADDR", ELEM_OUTPUT_INT, {0x60}, {0x4}}, \ + {"SOC_SCTRL_DRAMRETENTION_ADDR", ELEM_OUTPUT_INT, {0x64}, {0x4}}, \ + {"SC_DDRC_0_3_RESET_REQ", ELEM_OUTPUT_INT, {0x68}, {0x4}}, \ + {"SC_DDRC_4_7_RESET_REQ", ELEM_OUTPUT_INT, {0x6C}, {0x4}}, \ + {"SC_DDRC_0_3_PACK_RESET_REQ", ELEM_OUTPUT_INT, {0x70}, {0x4}}, \ + {"SC_DDRC_4_7_PACK_RESET_REQ", ELEM_OUTPUT_INT, {0x74}, {0x4}}, \ + {"SC_DDRC_EXMBIST0_REGS_RESET_REQ", ELEM_OUTPUT_INT, {0x78}, {0x4}}, \ + {"SC_DDRC_EXMBIST1_REGS_RESET_REQ", ELEM_OUTPUT_INT, {0x7C}, {0x4}}, \ + {"SOC_SCTRL_DDRC_0_3_AO_RST_ADDR", ELEM_OUTPUT_INT, {0x80}, {0x4}}, \ + {"SOC_SCTRL_DDRC_4_7_AO_RST_ADDR", ELEM_OUTPUT_INT, {0x84}, {0x4}}, \ + {"SOC_PMCTRL_PPLLBYPASS0_ADDR", ELEM_OUTPUT_INT, {0x88}, {0x4}}, \ + {"SOC_PMCTRL_PPLLBYPASS1_ADDR", ELEM_OUTPUT_INT, {0x8C}, {0x4}}, \ + {"SOC_PMCTRL_PPLL3FCTRL_ADDR", ELEM_OUTPUT_INT, {0x90}, {0x4}}, \ + {"SOC_PMCTRL_PPLL3FCTRL_FRAC_ADDR", ELEM_OUTPUT_INT, {0x94}, {0x4}}, \ + {"SOC_PMCTRL_PPLL4FCTRL_ADDR", ELEM_OUTPUT_INT, {0x98}, {0x4}}, \ + {"SOC_PMCTRL_PPLL4FCTRL_FRAC_ADDR", ELEM_OUTPUT_INT, {0x9C}, {0x4}}, \ + {"SOC_PMCTRL_PPLLOCKSTATUS_ADDR", ELEM_OUTPUT_INT, {0x100}, {0x4}}, \ + {"SC_DDRC_0_3_BYPASS_MODE_CTRL", ELEM_OUTPUT_INT, {0x104}, {0x4}}, \ + {"SC_DDRC_4_7_BYPASS_MODE_CTRL", ELEM_OUTPUT_INT, {0x108}, {0x4}}, \ + {"SC_PLL_PROF_CFG1", ELEM_OUTPUT_INT, {0x10C}, {0x4}}, \ +} + +#define DATA_MODEL_HDR_BOOT_TEE MODEL_VECTOR(HDR_BOOT_TEE) = { \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"module id", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"if", ELEM_CTRL_COMPARE, {0xC}, {0x4}}, \ + {"is used", ELEM_CTRL_CMP_JUMP_NE, {0x1}, {0xFF}}, \ + {"err code", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"reason", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"hot reset index", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"[BOOT FATAL INFO SIZE]", ELEM_OUTPUT_INT, {0x1C}, {0x4}}, \ + {"[BOOT FATAL INFO]", ELEM_OUTPUT_STR_NL, {0x20}, {0x7E0}}, \ + {"[run point tail]", ELEM_OUTPUT_INT, {0x800}, {0x4}}, \ + {"[boot point info]", ELEM_OUTPUT_HEX, {0x804}, {0x20}}, \ + {"[run point info]", ELEM_OUTPUT_HEX, {0x884}, {0x20}}, \ + {"[last log size]", ELEM_OUTPUT_INT, {0xC00}, {0x4}}, \ + {"[last log data]", ELEM_OUTPUT_STR_NL, {0xC04}, {0x3FC}}, \ +} + +#define DATA_MODEL_HDR_BOOT_ATF MODEL_VECTOR(HDR_BOOT_ATF) = { \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"module id", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"if", ELEM_CTRL_COMPARE, {0xC}, {0x4}}, \ + {"is used", ELEM_CTRL_CMP_JUMP_NE, {0x1}, {0xFF}}, \ + {"err code", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"reason", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"hot reset index", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"[ATF info]", ELEM_OUTPUT_STR_NL, {0x1C}, {0xFE4}}, \ +} + +#define DATA_MODEL_HDR_BOOT_AREA MODEL_VECTOR(HDR_BOOT_AREA) = { \ + {"BIOS INFO", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"HDR_BOOT_BIOS", ELEM_CTRL_TABLE_GOTO, {0x0}, {0x3000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_BIOS}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"DDR INFO", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"HDR_BOOT_DDR", ELEM_CTRL_TABLE_GOTO, {0x3000}, {0x1000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_DDR}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"TEE INFO", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"HDR_BOOT_TEE", ELEM_CTRL_TABLE_GOTO, {0x4000}, {0x1000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_TEE}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"ATF INFO", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"HDR_BOOT_ATF", ELEM_CTRL_TABLE_GOTO, {0x5000}, {0x1000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_ATF}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ +} + +#define DATA_MODEL_HDR_RUN_OS MODEL_VECTOR(HDR_RUN_OS) = { \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"module id", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"if", ELEM_CTRL_COMPARE, {0xC}, {0x4}}, \ + {"is used", ELEM_CTRL_CMP_JUMP_NE, {0x1}, {0xFF}}, \ + {"err code", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"reason", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"hot reset index", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"[OS info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {"event_flag", ELEM_OUTPUT_INT, {0x1C}, {0x4}}, \ + {"dump_flag", ELEM_OUTPUT_INT, {0x20}, {0x4}}, \ + {"err num", ELEM_OUTPUT_INT, {0x24}, {0x4}}, \ + {"[OS log]", ELEM_OUTPUT_STR_NL, {0x100}, {0xF00}}, \ +} + +#define DATA_MODEL_HDR_RUN_LPM MODEL_VECTOR(HDR_RUN_LPM) = { \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"module id", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"if", ELEM_CTRL_COMPARE, {0xC}, {0x4}}, \ + {"is used", ELEM_CTRL_CMP_JUMP_NE, {0x1}, {0x200}}, \ + {"err code", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"reason", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"hot reset index", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"[LPM log]", ELEM_OUTPUT_STR_NL, {0x40}, {0x400}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[LPM data]:", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {"reset_reason", ELEM_OUTPUT_INT, {0x440}, {0x4}}, \ + {"slice", ELEM_OUTPUT_INT, {0x444}, {0x4}}, \ + {"rtc", ELEM_OUTPUT_INT, {0x448}, {0x4}}, \ + {"r13", ELEM_OUTPUT_INT, {0x44C}, {0x4}}, \ + {"lr1", ELEM_OUTPUT_INT, {0x450}, {0x4}}, \ + {"pc", ELEM_OUTPUT_INT, {0x454}, {0x4}}, \ + {"xpsr", ELEM_OUTPUT_INT, {0x458}, {0x4}}, \ + {"cfsr", ELEM_OUTPUT_INT, {0x45C}, {0x4}}, \ + {"hfsr", ELEM_OUTPUT_INT, {0x460}, {0x4}}, \ + {"bfar", ELEM_OUTPUT_INT, {0x464}, {0x4}}, \ + {"exc_trace", ELEM_OUTPUT_INT, {0x468}, {0x1}}, \ + {"ddr_exc", ELEM_OUTPUT_INT, {0x469}, {0x1}}, \ + {"irq_id", ELEM_OUTPUT_INT, {0x46A}, {0x2}}, \ + {"task_id", ELEM_OUTPUT_INT, {0x46C}, {0x4}}, \ + {"TSENSOR_A55", ELEM_OUTPUT_INT, {0x470}, {0x1}}, \ + {"TSENSOR_PERI", ELEM_OUTPUT_INT, {0x471}, {0x1}}, \ + {"TSENSOR_AIC0", ELEM_OUTPUT_INT, {0x472}, {0x1}}, \ + {"TSENSOR_AIC1", ELEM_OUTPUT_INT, {0x473}, {0x1}}, \ + {"DDR_T_GRADE", ELEM_OUTPUT_INT, {0x474}, {0x1}}, \ + {"EDP_SCALE_0", ELEM_OUTPUT_INT, {0x475}, {0x1}}, \ + {"EDP_SCALE_1", ELEM_OUTPUT_INT, {0x476}, {0x1}}, \ + {"TMP_STATUS", ELEM_OUTPUT_INT, {0x477}, {0x1}}, \ + {"TMP_CTRL_ST", ELEM_OUTPUT_INT, {0x478}, {0x1}}, \ + {"AIC_FREQ_ST", ELEM_OUTPUT_INT, {0x479}, {0x1}}, \ + {"A55_FREQ_ST", ELEM_OUTPUT_INT, {0x47a}, {0x1}}, \ + {"AIC_NUM_ST", ELEM_OUTPUT_INT, {0x47b}, {0x1}}, \ + {"TMP_RST", ELEM_OUTPUT_INT, {0x47c}, {0x1}}, \ + {"TMP_HIGH", ELEM_OUTPUT_INT, {0x47d}, {0x1}}, \ + {"TMP_NOR", ELEM_OUTPUT_INT, {0x47e}, {0x1}}, \ + {"TMP_PERIOD", ELEM_OUTPUT_INT, {0x47f}, {0x1}}, \ + {"T_RST_STATUS", ELEM_OUTPUT_INT, {0x48D}, {0x1}}, \ + {"T_ERR_TSENSOR", ELEM_OUTPUT_INT, {0x48e}, {0x1}}, \ + {"T_ERR_EFUSE", ELEM_OUTPUT_INT, {0x48f}, {0x1}}, \ + {"AICORE0_HIGHTEMP_VOLT", ELEM_OUTPUT_INT, {0x490}, {0x4}}, \ + {"AICORE1_HIGHTEMP_VOLT", ELEM_OUTPUT_INT, {0x494}, {0x4}}, \ + {"CPU_HIGHTEMP_VOLT", ELEM_OUTPUT_INT, {0x498}, {0x4}}, \ + {"AIMEMORY_HIGHTEMP_VOLT", ELEM_OUTPUT_INT, {0x49c}, {0x4}}, \ + {"PERI_HIGHTEMP_VOLT", ELEM_OUTPUT_INT, {0x4a0}, {0x4}}, \ + {"AICORE0_CUR_VOLT", ELEM_OUTPUT_INT, {0x4a4}, {0x4}}, \ + {"AICORE1_CUR_VOLT", ELEM_OUTPUT_INT, {0x4a8}, {0x4}}, \ + {"CPU_CUR_VOLT", ELEM_OUTPUT_INT, {0x4ac}, {0x4}}, \ + {"AIMEMORY_CUR_VOLT", ELEM_OUTPUT_INT, {0x4b0}, {0x4}}, \ + {"PERI_CUR_VOLT", ELEM_OUTPUT_INT, {0x4b4}, {0x4}}, \ + {"AICORE0_SVFD_VOLT", ELEM_OUTPUT_INT, {0x4b8}, {0x4}}, \ + {"AICORE1_SVFD_VOLT", ELEM_OUTPUT_INT, {0x4bc}, {0x4}}, \ + {"AICORE0_SVFD_CPM", ELEM_OUTPUT_INT, {0x4c0}, {0x2}}, \ + {"AICORE1_SVFD_CPM", ELEM_OUTPUT_INT, {0x4c2}, {0x2}}, \ + {"AICORE0_NOTIFY_ST", ELEM_OUTPUT_INT, {0x4c4}, {0x1}}, \ + {"AICORE1_NOTIFY_ST", ELEM_OUTPUT_INT, {0x4c5}, {0x1}}, \ + {"CPU_NOTIFY_ST", ELEM_OUTPUT_INT, {0x4c6}, {0x1}}, \ + {"AIMEMORY_NOTIFY_ST", ELEM_OUTPUT_INT, {0x4c7}, {0x1}}, \ + {"PERI_NOTIFY_ST", ELEM_OUTPUT_INT, {0x4c8}, {0x1}}, \ + {"AICORE0_TZONE", ELEM_OUTPUT_INT, {0x4c9}, {0x1}}, \ + {"AICORE1_TZONE", ELEM_OUTPUT_INT, {0x4ca}, {0x1}}, \ + {"CPU_TZONE", ELEM_OUTPUT_INT, {0x4cb}, {0x1}}, \ + {"AIMEMORY_TZONE", ELEM_OUTPUT_INT, {0x4cc}, {0x1}}, \ + {"PERI_TZONE", ELEM_OUTPUT_INT, {0x4cd}, {0x1}}, \ + {"VOLT_RISE_TEMP", ELEM_OUTPUT_INT, {0x4ce}, {0x1}}, \ + {"VOLT_DECREASE_TEMP", ELEM_OUTPUT_INT, {0x4cf}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, /* below for ddr */ \ + {"[DDR data]:", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {"magic_begin", ELEM_OUTPUT_INT, {0x800}, {0x4}}, \ + {"slice_time", ELEM_OUTPUT_INT, {0x804}, {0x4}}, \ + {"mod_reason", ELEM_OUTPUT_INT, {0x808}, {0x4}}, \ + {"ddr_freq_id", ELEM_OUTPUT_INT, {0x80C}, {0x4}}, \ + {"ddr_status", ELEM_OUTPUT_INT, {0x810}, {0x4}}, \ + {"uce_exc", ELEM_OUTPUT_INT, {0x814}, {0x4}}, \ + {"SC_TSENSOR_INFO_ADDR", ELEM_OUTPUT_INT, {0x818}, {0x4}}, \ + {"SC_TSENSOR_AICORE_LIMIT", ELEM_OUTPUT_INT, {0x81C}, {0x4}}, \ + {"SC_TSENSOR_MAX_TEMP", ELEM_OUTPUT_INT, {0x820}, {0x4}}, \ + {"channel_mask", ELEM_OUTPUT_INT, {0x824}, {0x4}}, \ + {"channel_num", ELEM_OUTPUT_INT, {0x828}, {0x4}}, \ + {"rank_num", ELEM_OUTPUT_INT, {0x82C}, {0x4}}, \ + {"ddr_size", ELEM_OUTPUT_INT, {0x830}, {0x4}}, \ + {"manufactery_id", ELEM_OUTPUT_INT, {0x834}, {0x4}}, \ + {"iecc_cerr_thresh[0]", ELEM_OUTPUT_INT, {0x838}, {0x4}}, \ + {"iecc_cerr_thresh[1]", ELEM_OUTPUT_INT, {0x83C}, {0x4}}, \ + {"iecc_cerr_thresh[2]", ELEM_OUTPUT_INT, {0x840}, {0x4}}, \ + {"iecc_cerr_thresh[3]", ELEM_OUTPUT_INT, {0x844}, {0x4}}, \ + {"iecc_cerr_thresh[4]", ELEM_OUTPUT_INT, {0x848}, {0x4}}, \ + {"iecc_cerr_thresh[5]", ELEM_OUTPUT_INT, {0x84C}, {0x4}}, \ + {"iecc_cerr_thresh[6]", ELEM_OUTPUT_INT, {0x850}, {0x4}}, \ + {"iecc_cerr_thresh[7]", ELEM_OUTPUT_INT, {0x854}, {0x4}}, \ + {"iecc_ctrl[0]", ELEM_OUTPUT_INT, {0x858}, {0x4}}, \ + {"iecc_ctrl[1]", ELEM_OUTPUT_INT, {0x85C}, {0x4}}, \ + {"iecc_ctrl[2]", ELEM_OUTPUT_INT, {0x860}, {0x4}}, \ + {"iecc_ctrl[3]", ELEM_OUTPUT_INT, {0x864}, {0x4}}, \ + {"iecc_ctrl[4]", ELEM_OUTPUT_INT, {0x868}, {0x4}}, \ + {"iecc_ctrl[5]", ELEM_OUTPUT_INT, {0x86C}, {0x4}}, \ + {"iecc_ctrl[6]", ELEM_OUTPUT_INT, {0x870}, {0x4}}, \ + {"iecc_ctrl[7]", ELEM_OUTPUT_INT, {0x874}, {0x4}}, \ + {"iecc_cerr_cnt[0]", ELEM_OUTPUT_INT, {0x878}, {0x4}}, \ + {"iecc_cerr_cnt[1]", ELEM_OUTPUT_INT, {0x87C}, {0x4}}, \ + {"iecc_cerr_cnt[2]", ELEM_OUTPUT_INT, {0x880}, {0x4}}, \ + {"iecc_cerr_cnt[3]", ELEM_OUTPUT_INT, {0x884}, {0x4}}, \ + {"iecc_cerr_cnt[4]", ELEM_OUTPUT_INT, {0x888}, {0x4}}, \ + {"iecc_cerr_cnt[5]", ELEM_OUTPUT_INT, {0x88C}, {0x4}}, \ + {"iecc_cerr_cnt[6]", ELEM_OUTPUT_INT, {0x890}, {0x4}}, \ + {"iecc_cerr_cnt[7]", ELEM_OUTPUT_INT, {0x894}, {0x4}}, \ + {"iecc_uerr_cnt[0]", ELEM_OUTPUT_INT, {0x898}, {0x4}}, \ + {"iecc_uerr_cnt[1]", ELEM_OUTPUT_INT, {0x89C}, {0x4}}, \ + {"iecc_uerr_cnt[2]", ELEM_OUTPUT_INT, {0x8A0}, {0x4}}, \ + {"iecc_uerr_cnt[3]", ELEM_OUTPUT_INT, {0x8A4}, {0x4}}, \ + {"iecc_uerr_cnt[4]", ELEM_OUTPUT_INT, {0x8A8}, {0x4}}, \ + {"iecc_uerr_cnt[5]", ELEM_OUTPUT_INT, {0x8AC}, {0x4}}, \ + {"iecc_uerr_cnt[6]", ELEM_OUTPUT_INT, {0x8B0}, {0x4}}, \ + {"iecc_uerr_cnt[7]", ELEM_OUTPUT_INT, {0x8B4}, {0x4}}, \ + {"magic_byte", ELEM_OUTPUT_INT, {0x900}, {0x1}}, \ + {"err_max", ELEM_OUTPUT_INT, {0x904}, {0x1}}, \ + {"irq_count", ELEM_OUTPUT_INT, {0x908}, {0x1}}, \ + {"index", ELEM_OUTPUT_INT, {0x90C}, {0x1}}, \ + {"rate[0].time", ELEM_OUTPUT_INT, {0x900}, {0x4}}, \ + {"rate[0].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x904}, {0x1}}, \ + {"rate[0].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x905}, {0x1}}, \ + {"rate[0].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x906}, {0x1}}, \ + {"rate[0].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x907}, {0x1}}, \ + {"rate[0].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x908}, {0x1}}, \ + {"rate[0].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x909}, {0x1}}, \ + {"rate[0].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x90A}, {0x1}}, \ + {"rate[0].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x90B}, {0x1}}, \ + {"rate[0].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x90C}, {0x1}}, \ + {"rate[0].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x90D}, {0x1}}, \ + {"rate[0].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x90E}, {0x1}}, \ + {"rate[0].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x90F}, {0x1}}, \ + {"rate[0].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x910}, {0x1}}, \ + {"rate[0].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x911}, {0x1}}, \ + {"rate[0].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x912}, {0x1}}, \ + {"rate[0].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x913}, {0x1}}, \ + {"rate[1].time", ELEM_OUTPUT_INT, {0x914}, {0x4}}, \ + {"rate[1].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x918}, {0x1}}, \ + {"rate[1].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x919}, {0x1}}, \ + {"rate[1].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x91A}, {0x1}}, \ + {"rate[1].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x91B}, {0x1}}, \ + {"rate[1].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x91C}, {0x1}}, \ + {"rate[1].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x91D}, {0x1}}, \ + {"rate[1].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x91E}, {0x1}}, \ + {"rate[1].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x91F}, {0x1}}, \ + {"rate[1].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x920}, {0x1}}, \ + {"rate[1].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x921}, {0x1}}, \ + {"rate[1].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x922}, {0x1}}, \ + {"rate[1].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x923}, {0x1}}, \ + {"rate[1].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x924}, {0x1}}, \ + {"rate[1].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x925}, {0x1}}, \ + {"rate[1].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x926}, {0x1}}, \ + {"rate[1].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x927}, {0x1}}, \ + {"rate[2].time", ELEM_OUTPUT_INT, {0x928}, {0x4}}, \ + {"rate[2].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x92C}, {0x1}}, \ + {"rate[2].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x92D}, {0x1}}, \ + {"rate[2].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x92E}, {0x1}}, \ + {"rate[2].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x92F}, {0x1}}, \ + {"rate[2].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x930}, {0x1}}, \ + {"rate[2].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x931}, {0x1}}, \ + {"rate[2].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x932}, {0x1}}, \ + {"rate[2].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x933}, {0x1}}, \ + {"rate[2].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x934}, {0x1}}, \ + {"rate[2].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x935}, {0x1}}, \ + {"rate[2].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x936}, {0x1}}, \ + {"rate[2].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x937}, {0x1}}, \ + {"rate[2].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x938}, {0x1}}, \ + {"rate[2].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x939}, {0x1}}, \ + {"rate[2].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x93A}, {0x1}}, \ + {"rate[2].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x93B}, {0x1}}, \ + {"rate[3].time", ELEM_OUTPUT_INT, {0x93C}, {0x4}}, \ + {"rate[3].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x940}, {0x1}}, \ + {"rate[3].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x941}, {0x1}}, \ + {"rate[3].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x942}, {0x1}}, \ + {"rate[3].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x943}, {0x1}}, \ + {"rate[3].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x944}, {0x1}}, \ + {"rate[3].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x945}, {0x1}}, \ + {"rate[3].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x946}, {0x1}}, \ + {"rate[3].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x947}, {0x1}}, \ + {"rate[3].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x948}, {0x1}}, \ + {"rate[3].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x949}, {0x1}}, \ + {"rate[3].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x94A}, {0x1}}, \ + {"rate[3].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x94B}, {0x1}}, \ + {"rate[3].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x94C}, {0x1}}, \ + {"rate[3].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x94D}, {0x1}}, \ + {"rate[3].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x94E}, {0x1}}, \ + {"rate[3].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x94F}, {0x1}}, \ + {"rate[4].time", ELEM_OUTPUT_INT, {0x950}, {0x4}}, \ + {"rate[4].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x954}, {0x1}}, \ + {"rate[4].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x955}, {0x1}}, \ + {"rate[4].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x956}, {0x1}}, \ + {"rate[4].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x957}, {0x1}}, \ + {"rate[4].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x958}, {0x1}}, \ + {"rate[4].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x959}, {0x1}}, \ + {"rate[4].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x95A}, {0x1}}, \ + {"rate[4].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x95B}, {0x1}}, \ + {"rate[4].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x95C}, {0x1}}, \ + {"rate[4].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x95D}, {0x1}}, \ + {"rate[4].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x95E}, {0x1}}, \ + {"rate[4].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x95F}, {0x1}}, \ + {"rate[4].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x960}, {0x1}}, \ + {"rate[4].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x961}, {0x1}}, \ + {"rate[4].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x962}, {0x1}}, \ + {"rate[4].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x963}, {0x1}}, \ + {"rate[5].time", ELEM_OUTPUT_INT, {0x964}, {0x4}}, \ + {"rate[5].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x968}, {0x1}}, \ + {"rate[5].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x969}, {0x1}}, \ + {"rate[5].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x96A}, {0x1}}, \ + {"rate[5].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x96B}, {0x1}}, \ + {"rate[5].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x96C}, {0x1}}, \ + {"rate[5].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x96D}, {0x1}}, \ + {"rate[5].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x96E}, {0x1}}, \ + {"rate[5].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x96F}, {0x1}}, \ + {"rate[5].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x970}, {0x1}}, \ + {"rate[5].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x971}, {0x1}}, \ + {"rate[5].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x972}, {0x1}}, \ + {"rate[5].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x973}, {0x1}}, \ + {"rate[5].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x974}, {0x1}}, \ + {"rate[5].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x975}, {0x1}}, \ + {"rate[5].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x976}, {0x1}}, \ + {"rate[5].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x977}, {0x1}}, \ + {"rate[6].time", ELEM_OUTPUT_INT, {0x978}, {0x4}}, \ + {"rate[6].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x97C}, {0x1}}, \ + {"rate[6].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x97D}, {0x1}}, \ + {"rate[6].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x97E}, {0x1}}, \ + {"rate[6].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x97F}, {0x1}}, \ + {"rate[6].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x980}, {0x1}}, \ + {"rate[6].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x981}, {0x1}}, \ + {"rate[6].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x982}, {0x1}}, \ + {"rate[6].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x983}, {0x1}}, \ + {"rate[6].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x984}, {0x1}}, \ + {"rate[6].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x985}, {0x1}}, \ + {"rate[6].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x986}, {0x1}}, \ + {"rate[6].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x987}, {0x1}}, \ + {"rate[6].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x988}, {0x1}}, \ + {"rate[6].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x989}, {0x1}}, \ + {"rate[6].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x98A}, {0x1}}, \ + {"rate[6].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x98B}, {0x1}}, \ + {"rate[7].time", ELEM_OUTPUT_INT, {0x98C}, {0x4}}, \ + {"rate[7].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x990}, {0x1}}, \ + {"rate[7].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x991}, {0x1}}, \ + {"rate[7].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x992}, {0x1}}, \ + {"rate[7].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x993}, {0x1}}, \ + {"rate[7].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x994}, {0x1}}, \ + {"rate[7].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x995}, {0x1}}, \ + {"rate[7].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x996}, {0x1}}, \ + {"rate[7].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x997}, {0x1}}, \ + {"rate[7].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x998}, {0x1}}, \ + {"rate[7].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x999}, {0x1}}, \ + {"rate[7].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x99A}, {0x1}}, \ + {"rate[7].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x99B}, {0x1}}, \ + {"rate[7].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x99C}, {0x1}}, \ + {"rate[7].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x99D}, {0x1}}, \ + {"rate[7].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x99E}, {0x1}}, \ + {"rate[7].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x9AF}, {0x1}}, \ + {"rate[8].time", ELEM_OUTPUT_INT, {0x9B0}, {0x4}}, \ + {"rate[8].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x9B4}, {0x1}}, \ + {"rate[8].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x9B5}, {0x1}}, \ + {"rate[8].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x9B6}, {0x1}}, \ + {"rate[8].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x9B7}, {0x1}}, \ + {"rate[8].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x9B8}, {0x1}}, \ + {"rate[8].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x9B9}, {0x1}}, \ + {"rate[8].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x9BA}, {0x1}}, \ + {"rate[8].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x9BB}, {0x1}}, \ + {"rate[8].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x9BC}, {0x1}}, \ + {"rate[8].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x9BD}, {0x1}}, \ + {"rate[8].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x9BE}, {0x1}}, \ + {"rate[8].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x9BF}, {0x1}}, \ + {"rate[8].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x9C0}, {0x1}}, \ + {"rate[8].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x9C1}, {0x1}}, \ + {"rate[8].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x9C2}, {0x1}}, \ + {"rate[8].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x9C3}, {0x1}}, \ + {"rate[9].time", ELEM_OUTPUT_INT, {0x9C4}, {0x4}}, \ + {"rate[9].rate_per_rank[0]", ELEM_OUTPUT_INT, {0x9C8}, {0x1}}, \ + {"rate[9].rate_per_rank[1]", ELEM_OUTPUT_INT, {0x9C9}, {0x1}}, \ + {"rate[9].rate_per_rank[2]", ELEM_OUTPUT_INT, {0x9CA}, {0x1}}, \ + {"rate[9].rate_per_rank[3]", ELEM_OUTPUT_INT, {0x9CB}, {0x1}}, \ + {"rate[9].rate_per_rank[4]", ELEM_OUTPUT_INT, {0x9CC}, {0x1}}, \ + {"rate[9].rate_per_rank[5]", ELEM_OUTPUT_INT, {0x9CD}, {0x1}}, \ + {"rate[9].rate_per_rank[6]", ELEM_OUTPUT_INT, {0x9CE}, {0x1}}, \ + {"rate[9].rate_per_rank[7]", ELEM_OUTPUT_INT, {0x9CF}, {0x1}}, \ + {"rate[9].rate_per_rank[8]", ELEM_OUTPUT_INT, {0x9D0}, {0x1}}, \ + {"rate[9].rate_per_rank[9]", ELEM_OUTPUT_INT, {0x9D1}, {0x1}}, \ + {"rate[9].rate_per_rank[A]", ELEM_OUTPUT_INT, {0x9D2}, {0x1}}, \ + {"rate[9].rate_per_rank[B]", ELEM_OUTPUT_INT, {0x9D3}, {0x1}}, \ + {"rate[9].rate_per_rank[C]", ELEM_OUTPUT_INT, {0x9D4}, {0x1}}, \ + {"rate[9].rate_per_rank[D]", ELEM_OUTPUT_INT, {0x9D5}, {0x1}}, \ + {"rate[9].rate_per_rank[E]", ELEM_OUTPUT_INT, {0x9D6}, {0x1}}, \ + {"rate[9].rate_per_rank[F]", ELEM_OUTPUT_INT, {0x9D7}, {0x1}}, \ + {"mrr4_0.count", ELEM_OUTPUT_INT, {0x9D8}, {0x1}}, \ + {"mrr4_0.happen", ELEM_OUTPUT_INT, {0x9D9}, {0x1}}, \ + {"shake_count.count", ELEM_OUTPUT_INT, {0x9DA}, {0x1}}, \ + {"shake_count.happen", ELEM_OUTPUT_INT, {0x9DB}, {0x1}}, \ + {"sfc_record0", ELEM_OUTPUT_INT, {0x9DC}, {0x1}}, \ + {"sfc_record1", ELEM_OUTPUT_INT, {0x9DD}, {0x1}}, \ + {"sfc_mr5", ELEM_OUTPUT_INT, {0x9DE}, {0x1}}, \ +} + +#define DATA_MODEL_HDR_RUN_TEE MODEL_VECTOR(HDR_RUN_TEE) = { \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"module id", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"if", ELEM_CTRL_COMPARE, {0xC}, {0x4}}, \ + {"is used", ELEM_CTRL_CMP_JUMP_NE, {0x1}, {0xFF}}, \ + {"err code", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"reason", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"hot reset index", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"[RUN FATAL INFO SIZE]", ELEM_OUTPUT_INT, {0x1C}, {0x4}}, \ + {"[RUN FATAL INFO]", ELEM_OUTPUT_STR_NL, {0x20}, {0x7E0}}, \ +} + +#define DATA_MODEL_HDR_RUN_ATF MODEL_VECTOR(HDR_RUN_ATF) = {\ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"module id", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"if", ELEM_CTRL_COMPARE, {0xC}, {0x4}}, \ + {"is used", ELEM_CTRL_CMP_JUMP_NE, {0x1}, {0xFF}}, \ + {"err code", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"reason", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"hot reset index", ELEM_OUTPUT_INT, {0x18}, {0x4}}, \ + {"[ATF info]", ELEM_OUTPUT_STR_NL, {0x1C}, {0x7E4}}, \ +} + +#define DATA_MODEL_HDR_RUN_AREA MODEL_VECTOR(HDR_RUN_AREA) = { \ + {"TEE INFO", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"HDR_RUN_TEE", ELEM_CTRL_TABLE_GOTO, {0x0}, {0x800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_TEE}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"ATF INFO", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"HDR_RUN_ATF", ELEM_CTRL_TABLE_GOTO, {0x800}, {0x800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_ATF}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"LPM INFO", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"HDR_RUN_LPM", ELEM_CTRL_TABLE_GOTO, {0x1000}, {0x1000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_LPM}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"OS INFO", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"HDR_RUN_OS", ELEM_CTRL_TABLE_GOTO, {0x2000}, {0x1000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_OS}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ +} + +#define DATA_MODEL_HDR_BOOT MODEL_VECTOR(HDR_BOOT) = { \ + {"area 0", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x0}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 1", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x7800}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 2", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0xF000}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 3", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x16800}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 4", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x1E000}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 5", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x25800}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 6", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_AREA", ELEM_CTRL_TABLE_GOTO, {0x2D000}, {0x7800}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_AREA}, {0x1}}, \ +} + +#define DATA_MODEL_HDR_RUN MODEL_VECTOR(HDR_RUN) = { \ + {"area 0", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_RUN_AREA", ELEM_CTRL_TABLE_GOTO, {0x0}, {0x3C00}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 1", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_RUN_AREA", ELEM_CTRL_TABLE_GOTO, {0x3C00}, {0x3C00}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 2", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_RUN_AREA", ELEM_CTRL_TABLE_GOTO, {0x7800}, {0x3C00}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 3", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_RUN_AREA", ELEM_CTRL_TABLE_GOTO, {0xB400}, {0x3C00}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 4", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_RUN_AREA", ELEM_CTRL_TABLE_GOTO, {0xF000}, {0x3C00}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 5", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_RUN_AREA", ELEM_CTRL_TABLE_GOTO, {0x12C00}, {0x3C00}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_AREA}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"area 6", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_RUN_AREA", ELEM_CTRL_TABLE_GOTO, {0x16800}, {0x3C00}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_AREA}, {0x1}}, \ +} + +#define DATA_MODEL_HDR_BOOT_INFO MODEL_VECTOR(HDR_BOOT_INFO) = { \ + {"region offset", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"region size", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"region config", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"total area", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"history area", ELEM_OUTPUT_INT, {0xC}, {0x4}}, \ + {"error area", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"area config:", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" used module count", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"module config:", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" module 0 offset", ELEM_OUTPUT_INT, {0x1C}, {0x4}}, \ + {" module 0 size", ELEM_OUTPUT_INT, {0x20}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {" module 1 offset", ELEM_OUTPUT_INT, {0x24}, {0x4}}, \ + {" module 1 size", ELEM_OUTPUT_INT, {0x28}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {" module 2 offset", ELEM_OUTPUT_INT, {0x2C}, {0x4}}, \ + {" module 2 size", ELEM_OUTPUT_INT, {0x30}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {" module 3 offset", ELEM_OUTPUT_INT, {0x34}, {0x4}}, \ + {" module 3 size", ELEM_OUTPUT_INT, {0x38}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"region control", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"area index", ELEM_OUTPUT_INT, {0x6C}, {0x4}}, \ + {"error area count", ELEM_OUTPUT_INT, {0x70}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 0 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0x74}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0x78}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0x7C}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0x80}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0x84}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0x88}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 1 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0x8C}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0x90}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0x94}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0x98}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0x9C}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xA0}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 2 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xA4}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xA8}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xAC}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0xB0}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0xB4}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xB8}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 3 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xBC}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xC0}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xC4}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0xC8}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0xCC}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xD0}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 4 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xD4}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xD8}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xDC}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0xE0}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0xE4}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xE8}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 5 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xEC}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xF0}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xF4}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0xF8}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0xFC}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0x100}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 6 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0x104}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0x108}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0x10C}, {0x4}}, \ + {" module id", ELEM_OUTPUT_INT, {0x110}, {0x4}}, \ + {" exception id", ELEM_OUTPUT_INT, {0x114}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0x118}, {0x4}}, \ +} + +#define DATA_MODEL_HDR_RUN_INFO MODEL_VECTOR(HDR_RUN_INFO) = { \ + {"region offset", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"region size", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"region config", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"total area", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"history area", ELEM_OUTPUT_INT, {0xC}, {0x4}}, \ + {"error area", ELEM_OUTPUT_INT, {0x10}, {0x4}}, \ + {"area config:", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" used module count", ELEM_OUTPUT_INT, {0x14}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"module config:", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" module 0 offset", ELEM_OUTPUT_INT, {0x1C}, {0x4}}, \ + {" module 0 size", ELEM_OUTPUT_INT, {0x20}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {" module 1 offset", ELEM_OUTPUT_INT, {0x24}, {0x4}}, \ + {" module 1 size", ELEM_OUTPUT_INT, {0x28}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {" module 2 offset", ELEM_OUTPUT_INT, {0x2C}, {0x4}}, \ + {" module 2 size", ELEM_OUTPUT_INT, {0x30}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {" module 3 offset", ELEM_OUTPUT_INT, {0x34}, {0x4}}, \ + {" module 3 size", ELEM_OUTPUT_INT, {0x38}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"region control", ELEM_OUTPUT_DIVIDE, {0x0}, {0x2D}}, \ + {"area index", ELEM_OUTPUT_INT, {0x6C}, {0x4}}, \ + {"error area count", ELEM_OUTPUT_INT, {0x70}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 0 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0x74}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0x78}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0x7C}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0x88}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 1 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0x8C}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0x90}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0x94}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xA0}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 2 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xA4}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xA8}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xAC}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xB8}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 3 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xBC}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xC0}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xC4}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xD0}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 4 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xD4}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xD8}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xDC}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0xE8}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 5 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0xEC}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0xF0}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0xF4}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0x100}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"[area 6 control info]", ELEM_OUTPUT_STR_CONST, {0x0}, {0x0}}, \ + {" flag", ELEM_OUTPUT_INT, {0x104}, {0x4}}, \ + {" tag", ELEM_OUTPUT_INT, {0x108}, {0x4}}, \ + {" exception type", ELEM_OUTPUT_INT, {0x10C}, {0x4}}, \ + {" reset number", ELEM_OUTPUT_INT, {0x118}, {0x4}}, \ +} + +#define DATA_MODEL_HDR MODEL_VECTOR(HDR) = { \ + {"head info", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"magic", ELEM_OUTPUT_INT, {0x0}, {0x4}}, \ + {"version", ELEM_OUTPUT_INT, {0x4}, {0x4}}, \ + {"reset count", ELEM_OUTPUT_INT, {0x8}, {0x4}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"boot region", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_BOOT_INFO", ELEM_CTRL_TABLE_GOTO, {0XC}, {0x168}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT_INFO}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"HDR_BOOT", ELEM_CTRL_TABLE_GOTO, {0x400}, {0xA000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_BOOT}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"run region", ELEM_OUTPUT_DIVIDE, {0x0}, {0x3D}}, \ + {"HDR_RUN_INFO", ELEM_CTRL_TABLE_GOTO, {0x170}, {0x164}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN_INFO}, {0x1}}, \ + {"NL", ELEM_OUTPUT_NL, {0x0}, {0x0}}, \ + {"HDR_RUN", ELEM_CTRL_TABLE_GOTO, {0x4B400}, {0xA000}}, \ + {"table_index", ELEM_CTRL_TABLE_RANGE, {PLAINTEXT_TABLE_HDR_RUN}, {0x1}}, \ +} + +#endif // BBOX_DDR_DATA_MINI_H diff --git a/inc/toolchain/bbox/bbox_proxy.h b/inc/toolchain/bbox/bbox_proxy.h new file mode 100644 index 000000000..b117c79cf --- /dev/null +++ b/inc/toolchain/bbox/bbox_proxy.h @@ -0,0 +1,51 @@ +/** + * @file bbox_proxy.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2018-2020. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef BBOX_PROXY_H +#define BBOX_PROXY_H + +#include "bbox_proxy_config.h" + +typedef struct proxy_excep_time_t { + unsigned long long tv_sec; + unsigned long long tv_usec; +} proxy_excep_time; + +enum BBOX_PROXY_DUMP_STATUS { + PROXY_STATUS_INIT = 0, + PROXY_STATUS_DOING = 1, + PROXY_STATUS_DONE = 2, +}; + +struct bbox_proxy_exception_ctrl { + proxy_excep_time e_clock; // 模块触发异常时间 + unsigned int e_main_excepid; // 模块触发的异常id + unsigned int e_sub_excepid; // 模块触发的异常id + unsigned int e_info_offset; // 模块全部异常信息偏移值,基于模块预留内存首地址,从magic开始 + unsigned int e_info_len; // 模块全部异常信息长度 + unsigned short e_dump_status; // 模块将异常信息存预留内存的控制状态 + unsigned short e_save_status; // 代理将异常信息从预留内存导出的控制状态 + unsigned int e_reserved; // 结构对齐预留 +}; + +// 通过共享内存交互 +#define BBOX_PROXY_MAGIC 0x56312e31 +#define BBOX_PROXY_CTRL_RESERV 192 + +struct bbox_proxy_module_ctrl { + unsigned int magic; // 使用宏BBOX_PROXY_MAGIC + struct bbox_proxy_ctrl_info config; // ctrl块配置 + struct bbox_proxy_exception_ctrl block[BBOX_PROXY_CTRL_NUM]; // 模块dump信息控制状态 + unsigned char reserved[BBOX_PROXY_CTRL_RESERV]; // 预留空间,用于后续扩展 +}; + +#define BBOX_PROXY_CTRL_BLOCK_SIZE sizeof(struct bbox_proxy_module_ctrl) // total 512 byte + +#endif // BBOX_PROXY_H diff --git a/inc/toolchain/bbox/bbox_proxy_config.h b/inc/toolchain/bbox/bbox_proxy_config.h new file mode 100644 index 000000000..a802219da --- /dev/null +++ b/inc/toolchain/bbox/bbox_proxy_config.h @@ -0,0 +1,89 @@ +/** + * @file bbox_proxy_config.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2018-2020. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef BBOX_PROXY_CONFIG_H +#define BBOX_PROXY_CONFIG_H + +#define BBOX_EXCEPTIONDESC_MAXLEN 48 + +struct bbox_proxy_exception_info { + unsigned int e_excepid; // 异常id + unsigned char e_process_priority; // 异常处理级别 + unsigned char e_reboot_priority; // 异常重启级别 + unsigned char e_excep_type; // 异常类型 + unsigned char e_reentrant; // 异常是否可重入 + unsigned long long e_notify_core_mask; // 异常联动掩码 + unsigned long long e_reset_core_mask; // 异常联动掩码 + unsigned char e_desc[BBOX_EXCEPTIONDESC_MAXLEN]; // 异常描述 +}; + +enum BBOX_PROXY_CAPACITY { + BBOX_PROXY_CAPACITY_REGISTER = 1 << 0, + BBOX_PROXY_CAPACITY_DUMP_DDR = 1 << 1, + BBOX_PROXY_CAPACITY_DUMP_LOG = 1 << 2, + BBOX_PROXY_CAPACITY_TRANS_ID = 1 << 3, +}; + +struct bbox_proxy_module_info { + unsigned char coreid; // 模块id + unsigned long long flag; // dump能力标记位,BBOX_PROXY_CAPACITY + const char *name; // 模块名 + unsigned long long log_addr; // 模块dump起始地址 + unsigned int log_len; // 模块dump长度 + unsigned int wait_timeout; // dump等待超时时间,单位ms + unsigned int e_count; // 异常信息注册数量 +}; + +enum BBOX_PROXY_BLOCK_TYPE { + BLOCK_TYPE_NORMAL = 1 << 0, // 普通数据 + BLOCK_TYPE_STARTUP = 1 << 1, // 启动异常数据 +}; + +enum BBOX_PROXY_CHECK_FLAG { + CHECK_NONE = 0, + CHECK_STARTUP_EXCEPID = 1 << 0, + CHECK_STARTUP_TMSTMP = 1 << 1, + CHECK_RUNTIME_EXCEPID = 1 << 2, + CHECK_RUNTIME_TMSTMP = 1 << 3, + CHECK_HEARTBEAT_EXCEPID = 1 << 4, + CHECK_HEARTBEAT_TMSTMP = 1 << 5, +}; + +struct bbox_proxy_block_info { + unsigned int ctrl_type : 16; + unsigned int ctrl_flag : 16; + unsigned int info_offset; + unsigned int info_block_len; +}; + +#define BBOX_PROXY_CTRL_NUM 6 +#define BBOX_PROXY_CTRL_PAD 3 + +struct bbox_proxy_ctrl_info { + unsigned char e_block_num; // 需要使用的控制块个数,最多BBOX_PROXY_CTRL_NUM + unsigned char padding[BBOX_PROXY_CTRL_PAD]; // padding + struct bbox_proxy_block_info block_info[BBOX_PROXY_CTRL_NUM]; // 控制块配置 +}; + +#define BBOX_PROXY_EXCEPTION_NUM 256 + +struct bbox_proxy_info { + struct bbox_proxy_module_info module; + struct bbox_proxy_exception_info exception[BBOX_PROXY_EXCEPTION_NUM]; + struct bbox_proxy_ctrl_info ctrl; +}; + +#define BBOX_PROXY_INITIALIZER(name) { \ + .module = BBOX_PROXY_MODULE_##name, \ + .exception = BBOX_PROXY_EXCEPTION_##name, \ + .ctrl = BBOX_PROXY_CTRL_##name, \ +} + +#endif // BBOX_PROXY_CONFIG_H diff --git a/inc/toolchain/bbox/bbox_proxy_config_dc.h b/inc/toolchain/bbox/bbox_proxy_config_dc.h new file mode 100644 index 000000000..ea82c9aff --- /dev/null +++ b/inc/toolchain/bbox/bbox_proxy_config_dc.h @@ -0,0 +1,262 @@ +/** + * @file bbox_proxy_config_dc.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2018-2020. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef BBOX_PROXY_CONFIG_DC +#define BBOX_PROXY_CONFIG_DC + +#include "bbox_proxy_config.h" +#include "device/bbox_pub.h" + +/* + * 说明:模块代理及异常注册 + * 各模块通过模板宏初始化 struct bbox_proxy_info 结构体,黑匣子引用该头文件并将初始化结构体实例化 + * + * 示例: + * 各模块通过BBOX_PROXY_MODULE_XXX宏定义初始化 struct bbox_proxy_module_info 结构体 + * #define BBOX_PROXY_MODULE_LPM { \ + * .coreid = BBOX_LPM, \ + * .flag = BBOX_PROXY_CAPACITY_REGISTER | BBOX_PROXY_CAPACITY_DUMP | BBOX_PROXY_CAPACITY_LOG, \ + * .name = "lpm", \ + * .log_addr = 0xA00000, \ + * .log_len = 0x400000, \ + * .wait_timeout = 20000, \ // wait timeout will be restricted <= 20s + * .e_count = 3, \ + * } + * 各模块通过BBOX_PROXY_EXCEPTION_XXX宏定义初始化 struct bbox_proxy_exception_info 结构体 + * #define BBOX_PROXY_EXCEPTION_LPM { \ + * {0xA819320F, BBOX_DEAD, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, + * BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "tsensor error"}, \ + * {0xA619FFFF, BBOX_ERR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, + * BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "lpm heartbeat lost"}, \ + * {0xA819FFFF, BBOX_DEAD, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, + * BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "lpm startup error"}, \ + * } + * 各模块通过BBOX_PROXY_CTRL_XXX宏定义初始化 struct bbox_proxy_ctrl_info 结构体 + * #define BBOX_PROXY_CTRL_LPM { \ + * .e_block_num = 2, \ + * .block_info = { \ + * {BLOCK_TYPE_STARTUP, CHECK_NONE, 0, 0x200000}, \ + * {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, \ + * 0x200000, 0x200000}, \ + * } \ + * } + * 各模块通过修改DEFINE_BBOX_PROXY(x)宏定义增加自身模块在黑匣子代码中实例化 struct bbox_proxy_info 结构体 + * #define BBOX_PROXY_INITIALIZER(name) { \ + * .module = BBOX_PROXY_MODULE_##name, \ + * .exception = BBOX_PROXY_EXCEPTION_##name, \ + * .ctrl = BBOX_PROXY_CTRL_##name, \ + * } + * DEFINE_BBOX_PROXY(x) struct bbox_proxy_info x[] = { \ + * BBOX_PROXY_INITIALIZER(TS), \ + * BBOX_PROXY_INITIALIZER(LPM), \ + * } + */ +#define BBOX_PROXY_MODULE_TS { \ + .coreid = BBOX_TS, \ + .flag = BBOX_PROXY_CAPACITY_REGISTER | BBOX_PROXY_CAPACITY_DUMP_DDR | BBOX_PROXY_CAPACITY_TRANS_ID, \ + .name = "ts", \ + .log_addr = 0, \ + .log_len = 0, \ + .wait_timeout = 10000, \ + .e_count = 21, \ +} + +#define BBOX_PROXY_EXCEPTION_TS { \ + {0xA6060FFF, BBOX_MAJOR, BBOX_REBOOT_WAIT, HEARTBEAT_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts heartbeat lost"}, \ + {0xA8060FFF, BBOX_CRITICAL, BBOX_REBOOT_WAIT, STARTUP_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts startup error"}, \ + {0xA6060000, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "data abort"}, \ + {0xA6060001, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "instr abort"}, \ + {0xA6060002, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "pc align fault"}, \ + {0xA6060003, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "sp align fault"}, \ + {0xA6060004, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "infinite loop"}, \ + {0xA6060005, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "unknow exception"}, \ + {0xB4060006, BBOX_MINOR, BBOX_REBOOT_NO, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "aicore exception"}, \ + {0xB4060007, BBOX_MINOR, BBOX_REBOOT_NO, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "aicore timeout"}, \ + {0xB6060008, BBOX_MAJOR, BBOX_REBOOT_NO, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "sdma init fault"}, \ + {0xB4060009, BBOX_MINOR, BBOX_REBOOT_NO, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "sdma timeout"}, \ + {0xA606000A, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "hwts bus error"}, \ + {0xA606000B, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "hwts sqe error"}, \ + {0xA606000C, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "hwts ecc error"}, \ + {0xA406000D, BBOX_MINOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts aicpu exception"}, \ + {0xA406000E, BBOX_MINOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts npu exception"}, \ + {0xA606000F, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "aicore reset timeout"}, \ + {0xA4060010, BBOX_MINOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts aiv exception"}, \ + {0xA4060011, BBOX_MINOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts aiv timeout"}, \ + {0xA4060014, BBOX_MINOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts aicpu timeout"}, \ +} + +#define BBOX_PROXY_CTRL_TS { \ + .e_block_num = 2, \ + .padding = {0}, \ + .block_info = { \ + {BLOCK_TYPE_STARTUP, CHECK_NONE, \ + 0x200, 0x19000}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, \ + 0x19200, 0x1E6E00}, \ + } \ +} + +#define BBOX_PROXY_MODULE_LPM { \ + .coreid = BBOX_LPM, \ + .flag = BBOX_PROXY_CAPACITY_REGISTER | BBOX_PROXY_CAPACITY_DUMP_DDR | BBOX_PROXY_CAPACITY_DUMP_LOG, \ + .name = "lpm", \ + .log_addr = 0xA00000, \ + .log_len = 0x20000, \ + .wait_timeout = 10000, \ + .e_count = 24, \ +} + +#define BBOX_PROXY_EXCEPTION_LPM { \ + {0xa819320f, BBOX_CRITICAL, BBOX_REBOOT_NO, LPM_EXCEPTION, BBOX_REENTRANT_ALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "tsensor error"}, \ + {0xa6192d15, BBOX_MAJOR, BBOX_REBOOT_WAIT, HEARTBEAT_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "lpm heart error"}, \ + {0xa6193206, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_ALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "headfault error"}, \ + {0xa4193216, BBOX_MINOR, BBOX_REBOOT_NO, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "edp error"}, \ + {0xa4193217, BBOX_MINOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ipc timeout error"}, \ + {0xa4193218, BBOX_MINOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ipc queue error"}, \ + {0xa6193215, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "os heart error"}, \ + {0xa8193234,BBOX_CRITICAL, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr tmon low error"}, \ + {0xa8193235, BBOX_CRITICAL, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr tmon high error"}, \ + {0xa6193236, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr gate error"}, \ + {0xa619323f, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr aref error"}, \ + {0xa6193240, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr rdtimeout error"}, \ + {0xa6193241, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr pll unlock error"}, \ + {0xa6193242, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr retrain error"}, \ + {0xa6193243, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr tmon error"}, \ + {0xa6193244, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr dfs error"}, \ + {0xa6193245, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr dvalid error"}, \ + {0xa6193246, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr dfi sel error"}, \ + {0xa6193247, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr pll unlock lp error"}, \ + {0xa6193248, BBOX_MAJOR, BBOX_REBOOT_NO, LPM_EXCEPTION, BBOX_REENTRANT_ALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr iecc uerr error"}, \ + {0xa419324a, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr unkonwn error"}, \ + {0xa4193250, BBOX_MINOR, BBOX_REBOOT_NO, LPM_EXCEPTION, BBOX_REENTRANT_ALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr iecc cerr error"}, \ + {0xa4192c1a, BBOX_MINOR, BBOX_REBOOT_WAIT, STARTUP_EXCEPTION, BBOX_REENTRANT_ALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "lp startup error"}, \ + {0xa419321b, BBOX_MINOR, BBOX_REBOOT_NO, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "lp tmonitor error"}, \ +} + +#define BBOX_PROXY_CTRL_LPM { \ + .e_block_num = 6, \ + .padding = {0}, \ + .block_info = { \ + {BLOCK_TYPE_STARTUP, CHECK_NONE, 0x0400, 0x12C00}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x13000, 0x19000}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x2C000, 0x19000}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x45000, 0x19000}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x5E000, 0x19000}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x77000, 0x19000}, \ + } \ +} + +#define BBOX_PROXY_MODULE_HSM { \ + .coreid = BBOX_HSM, \ + .flag = BBOX_PROXY_CAPACITY_REGISTER | BBOX_PROXY_CAPACITY_DUMP_DDR | BBOX_PROXY_CAPACITY_DUMP_LOG, \ + .name = "hsm", \ + .log_addr = 0x3E00000, \ + .log_len = 0x100000, \ + .wait_timeout = 10000, \ + .e_count = 3, \ +} + +#define BBOX_PROXY_EXCEPTION_HSM { \ + {0xa6360000, BBOX_MAJOR, BBOX_REBOOT_WAIT, STARTUP_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_HSM), BBOX_COREID_MASK(BBOX_HSM), "HSM startup exception"}, \ + {0xa6361000, BBOX_MAJOR, BBOX_REBOOT_WAIT, HEARTBEAT_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_HSM), BBOX_COREID_MASK(BBOX_HSM), "HSM heartbeat exception"}, \ + {0xa6362000, BBOX_MAJOR, BBOX_REBOOT_NO, HSM_EXCEPTION, BBOX_REENTRANT_ALLOW, \ + BBOX_COREID_MASK(BBOX_HSM), BBOX_COREID_MASK(BBOX_HSM), "HSM resource shortage exception"}, \ +} + +#define BBOX_PROXY_CTRL_HSM { \ + .e_block_num = 5, \ + .padding = {0}, \ + .block_info = { \ + {BLOCK_TYPE_STARTUP, CHECK_NONE, 0x200, 0x1000}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x1200, 0x1000}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x2200, 0x1000}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x3200, 0x1000}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x4200, 0x1000}, \ + } \ +} + +#define BBOX_PROXY_MODULE_ATF { \ + .coreid = BBOX_TF, \ + .flag = BBOX_PROXY_CAPACITY_REGISTER, \ + .name = "atf", \ + .log_addr = 0x0, \ + .log_len = 0x0, \ + .wait_timeout = 10000, \ + .e_count = 1, \ +} + +#define BBOX_PROXY_EXCEPTION_ATF { \ + {0xA8340000, BBOX_CRITICAL, BBOX_REBOOT_WAIT, ATF_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TF), BBOX_COREID_MASK(BBOX_TF), "atf panic exception"}, \ +} + +#define BBOX_PROXY_CTRL_ATF { \ + .e_block_num = 1, \ + .padding = {0}, \ + .block_info = { \ + {BLOCK_TYPE_STARTUP, CHECK_NONE, 0x400, 0xF800}, \ + } \ +} + +#define DEFINE_BBOX_PROXY(x) struct bbox_proxy_info x[] = { \ + BBOX_PROXY_INITIALIZER(TS), \ + BBOX_PROXY_INITIALIZER(LPM), \ + BBOX_PROXY_INITIALIZER(HSM), \ + BBOX_PROXY_INITIALIZER(ATF), \ +} + +#endif // BBOX_PROXY_CONFIG_DC diff --git a/inc/toolchain/bbox/bbox_proxy_config_mdc.h b/inc/toolchain/bbox/bbox_proxy_config_mdc.h new file mode 100644 index 000000000..c1c3a23da --- /dev/null +++ b/inc/toolchain/bbox/bbox_proxy_config_mdc.h @@ -0,0 +1,394 @@ +/** + * @file bbox_proxy_config_mdc.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2018-2020. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef BBOX_PROXY_CONFIG_MDC +#define BBOX_PROXY_CONFIG_MDC + +#include "bbox_proxy_config.h" +#include "device/bbox_pub.h" + +/* + * 说明:模块代理及异常注册 + * 各模块通过模板宏初始化 struct bbox_proxy_info 结构体,黑匣子引用该头文件并将初始化结构体实例化 + * + * 示例: + * 各模块通过BBOX_PROXY_MODULE_XXX宏定义初始化 struct bbox_proxy_module_info 结构体 + * #define BBOX_PROXY_MODULE_LPM { \ + * .coreid = BBOX_LPM, \ + * .flag = BBOX_PROXY_CAPACITY_REGISTER | BBOX_PROXY_CAPACITY_DUMP | BBOX_PROXY_CAPACITY_LOG, \ + * .name = "lpm", \ + * .log_addr = 0xA00000, \ + * .log_len = 0x400000, \ + * .wait_timeout = 20000, \ // wait timeout will be restricted <= 20s + * .e_count = 3, \ + * } + * 各模块通过BBOX_PROXY_EXCEPTION_XXX宏定义初始化 struct bbox_proxy_exception_info 结构体 + * #define BBOX_PROXY_EXCEPTION_LPM { \ + * {0xA819320F, BBOX_DEAD, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, + * BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "tsensor error"}, \ + * {0xA619FFFF, BBOX_ERR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, + * BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "lpm heartbeat lost"}, \ + * {0xA819FFFF, BBOX_DEAD, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, + * BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "lpm startup error"}, \ + * } + * 各模块通过BBOX_PROXY_CTRL_XXX宏定义初始化 struct bbox_proxy_ctrl_info 结构体 + * #define BBOX_PROXY_CTRL_LPM { \ + * .e_block_num = 2, \ + * .block_info = { \ + * {BLOCK_TYPE_STARTUP, CHECK_NONE, 0, 0x200000}, \ + * {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, \ + * 0x200000, 0x200000}, \ + * } \ + * } + * 各模块通过修改DEFINE_BBOX_PROXY(x)宏定义增加自身模块在黑匣子代码中实例化 struct bbox_proxy_info 结构体 + * #define BBOX_PROXY_INITIALIZER(name) { \ + * .module = BBOX_PROXY_MODULE_##name, \ + * .exception = BBOX_PROXY_EXCEPTION_##name, \ + * .ctrl = BBOX_PROXY_CTRL_##name, \ + * } + * DEFINE_BBOX_PROXY(x) struct bbox_proxy_info x[] = { \ + * BBOX_PROXY_INITIALIZER(TS), \ + * BBOX_PROXY_INITIALIZER(LPM), \ + * } + */ +#define BBOX_PROXY_MODULE_TS { \ + .coreid = BBOX_TS, \ + .flag = BBOX_PROXY_CAPACITY_REGISTER | BBOX_PROXY_CAPACITY_DUMP_DDR | BBOX_PROXY_CAPACITY_TRANS_ID, \ + .name = "ts", \ + .log_addr = 0, \ + .log_len = 0, \ + .wait_timeout = 10000, \ + .e_count = 42, \ +} + +#define BBOX_PROXY_EXCEPTION_TS { \ + {0xA6060FFF, BBOX_MAJOR, BBOX_REBOOT_WAIT, HEARTBEAT_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 heartbeat lost"}, \ + {0xA8060FFF, BBOX_CRITICAL, BBOX_REBOOT_WAIT, STARTUP_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 startup error"}, \ + {0xA6060000, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 data abort"}, \ + {0xA6060001, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 instr abort"}, \ + {0xA6060002, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 pc align fault"}, \ + {0xA6060003, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 sp align fault"}, \ + {0xA6060004, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 infinite loop"}, \ + {0xA6060005, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 unknow exception"}, \ + {0xB4060006, BBOX_MINOR, BBOX_REBOOT_NO, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 aicore exception"}, \ + {0xB4060007, BBOX_MINOR, BBOX_REBOOT_NO, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 aicore timeout"}, \ + {0xB6060008, BBOX_MAJOR, BBOX_REBOOT_NO, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 sdma init fault"}, \ + {0xB4060009, BBOX_MINOR, BBOX_REBOOT_NO, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 sdma timeout"}, \ + {0xA606000A, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 hwts bus error"}, \ + {0xA606000B, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 hwts sqe error"}, \ + {0xA606000C, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 hwts ecc error"}, \ + {0xA406000D, BBOX_MINOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 aicpu exception"}, \ + {0xA406000E, BBOX_MINOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 npu exception"}, \ + {0xA606000F, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 aicore reset timeout"}, \ + {0xA4060010, BBOX_MINOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 aiv exception"}, \ + {0xA4060011, BBOX_MINOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 aiv timeout"}, \ + {0xA4060014, BBOX_MINOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts0 aicpu timeout"}, \ + {0xA6061FFF, BBOX_MAJOR, BBOX_REBOOT_WAIT, HEARTBEAT_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 heartbeat lost"}, \ + {0xA8061FFF, BBOX_CRITICAL, BBOX_REBOOT_WAIT, STARTUP_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 startup error"}, \ + {0xA6061000, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 data abort"}, \ + {0xA6061001, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 instr abort"}, \ + {0xA6061002, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 pc align fault"}, \ + {0xA6061003, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 sp align fault"}, \ + {0xA6061004, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 infinite loop"}, \ + {0xA6061005, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 unknow exception"}, \ + {0xB4061006, BBOX_MINOR, BBOX_REBOOT_NO, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 aicore exception"}, \ + {0xB4061007, BBOX_MINOR, BBOX_REBOOT_NO, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 aicore timeout"}, \ + {0xB6061008, BBOX_MAJOR, BBOX_REBOOT_NO, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 sdma init fault"}, \ + {0xB4061009, BBOX_MINOR, BBOX_REBOOT_NO, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 sdma timeout"}, \ + {0xA606100A, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 hwts bus error"}, \ + {0xA606100B, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 hwts sqe error"}, \ + {0xA606100C, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 hwts ecc error"}, \ + {0xA406100D, BBOX_MINOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 aicpu exception"}, \ + {0xA406100E, BBOX_MINOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 npu exception"}, \ + {0xA606100F, BBOX_MAJOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 aicore reset timeout"}, \ + {0xA4061010, BBOX_MINOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 aiv exception"}, \ + {0xA4061011, BBOX_MINOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 aiv timeout"}, \ + {0xA4061014, BBOX_MINOR, BBOX_REBOOT_WAIT, TS_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TS), BBOX_COREID_MASK(BBOX_TS), "ts1 aicpu timeout"}, \ +} + +#define BBOX_PROXY_CTRL_TS { \ + .e_block_num = 3, \ + .padding = {0}, \ + .block_info = { \ + {BLOCK_TYPE_STARTUP, CHECK_NONE, \ + 0x0200, 0x32000}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, \ + 0x32200, 0xE6F00}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, \ + 0x119100, 0xE6F00}, \ + } \ +} + +#define BBOX_PROXY_MODULE_LPM { \ + .coreid = BBOX_LPM, \ + .flag = BBOX_PROXY_CAPACITY_REGISTER | BBOX_PROXY_CAPACITY_DUMP_DDR | BBOX_PROXY_CAPACITY_DUMP_LOG, \ + .name = "lpm", \ + .log_addr = 0xA00000, \ + .log_len = 0x20000, \ + .wait_timeout = 10000, \ + .e_count = 24, \ +} + +#define BBOX_PROXY_EXCEPTION_LPM { \ + {0xa819320f, BBOX_CRITICAL, BBOX_REBOOT_NO, LPM_EXCEPTION, BBOX_REENTRANT_ALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "tsensor error"}, \ + {0xa6192d15, BBOX_MAJOR, BBOX_REBOOT_WAIT, HEARTBEAT_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "lpm heart error"}, \ + {0xa6193206, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_ALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "headfault error"}, \ + {0xa4193216, BBOX_MINOR, BBOX_REBOOT_NO, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "edp error"}, \ + {0xa4193217, BBOX_MINOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ipc timeout error"}, \ + {0xa4193218, BBOX_MINOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ipc queue error"}, \ + {0xa6193215, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "os heart error"}, \ + {0xa8193234,BBOX_CRITICAL, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr tmon low error"}, \ + {0xa8193235, BBOX_CRITICAL, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr tmon high error"}, \ + {0xa6193236, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr gate error"}, \ + {0xa619323f, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr aref error"}, \ + {0xa6193240, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr rdtimeout error"}, \ + {0xa6193241, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr pll unlock error"}, \ + {0xa6193242, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr retrain error"}, \ + {0xa6193243, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr tmon error"}, \ + {0xa6193244, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr dfs error"}, \ + {0xa6193245, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr dvalid error"}, \ + {0xa6193246, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr dfi sel error"}, \ + {0xa6193247, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr pll unlock lp error"}, \ + {0xa6193248, BBOX_MAJOR, BBOX_REBOOT_NO, LPM_EXCEPTION, BBOX_REENTRANT_ALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr iecc uerr error"}, \ + {0xa419324a, BBOX_MAJOR, BBOX_REBOOT_WAIT, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr unkonwn error"}, \ + {0xa4193250, BBOX_MINOR, BBOX_REBOOT_NO, LPM_EXCEPTION, BBOX_REENTRANT_ALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "ddr iecc cerr error"}, \ + {0xa4192c1a, BBOX_MINOR, BBOX_REBOOT_WAIT, STARTUP_EXCEPTION, BBOX_REENTRANT_ALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "lp startup error"}, \ + {0xa419321b, BBOX_MINOR, BBOX_REBOOT_NO, LPM_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_LPM), BBOX_COREID_MASK(BBOX_LPM), "lp tmonitor error"}, \ +} + +#define BBOX_PROXY_CTRL_LPM { \ + .e_block_num = 6, \ + .padding = {0}, \ + .block_info = { \ + {BLOCK_TYPE_STARTUP, CHECK_NONE, 0x0400, 0x12C00}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x13000, 0x19000}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x2C000, 0x19000}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x45000, 0x19000}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x5E000, 0x19000}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x77000, 0x19000}, \ + } \ +} + +#define BBOX_PROXY_MODULE_HSM { \ + .coreid = BBOX_HSM, \ + .flag = BBOX_PROXY_CAPACITY_REGISTER | BBOX_PROXY_CAPACITY_DUMP_DDR | BBOX_PROXY_CAPACITY_DUMP_LOG, \ + .name = "hsm", \ + .log_addr = 0x3E00000, \ + .log_len = 0x100000, \ + .wait_timeout = 10000, \ + .e_count = 3, \ +} + +#define BBOX_PROXY_EXCEPTION_HSM { \ + {0xa6360000, BBOX_MAJOR, BBOX_REBOOT_WAIT, STARTUP_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_HSM), BBOX_COREID_MASK(BBOX_HSM), "HSM startup exception"}, \ + {0xa6361000, BBOX_MAJOR, BBOX_REBOOT_WAIT, HEARTBEAT_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_HSM), BBOX_COREID_MASK(BBOX_HSM), "HSM heartbeat exception"}, \ + {0xa6362000, BBOX_MAJOR, BBOX_REBOOT_NO, HSM_EXCEPTION, BBOX_REENTRANT_ALLOW, \ + BBOX_COREID_MASK(BBOX_HSM), BBOX_COREID_MASK(BBOX_HSM), "HSM resource shortage exception"}, \ +} + +#define BBOX_PROXY_CTRL_HSM { \ + .e_block_num = 5, \ + .padding = {0}, \ + .block_info = { \ + {BLOCK_TYPE_STARTUP, CHECK_NONE, 0x200, 0x1000}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x1200, 0x1000}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x2200, 0x1000}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x3200, 0x1000}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x4200, 0x1000}, \ + } \ +} + +#define BBOX_PROXY_MODULE_ISP { \ + .coreid = BBOX_ISP, \ + .flag = BBOX_PROXY_CAPACITY_REGISTER | BBOX_PROXY_CAPACITY_DUMP_DDR, \ + .name = "isp", \ + .log_addr = 0, \ + .log_len = 0, \ + .wait_timeout = 10000, \ + .e_count = 8, \ +} + +/* startup error dump log only, heartbeat and running error dump both bbox and log */ +#define BBOX_PROXY_EXCEPTION_ISP { \ + {0xa8380000, BBOX_CRITICAL, BBOX_REBOOT_WAIT, STARTUP_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_ISP), BBOX_COREID_MASK(BBOX_ISP), "ISP0 startup exception"}, \ + {0xa8380001, BBOX_CRITICAL, BBOX_REBOOT_WAIT, STARTUP_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_ISP), BBOX_COREID_MASK(BBOX_ISP), "ISP1 startup exception"}, \ + {0xa8380002, BBOX_CRITICAL, BBOX_REBOOT_WAIT, STARTUP_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_ISP), BBOX_COREID_MASK(BBOX_ISP), "ISP2 startup exception"}, \ + {0xa8380003, BBOX_CRITICAL, BBOX_REBOOT_WAIT, STARTUP_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_ISP), BBOX_COREID_MASK(BBOX_ISP), "ISP3 startup exception"}, \ + {0xa8381000, BBOX_CRITICAL, BBOX_REBOOT_WAIT, ISP_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_ISP), BBOX_COREID_MASK(BBOX_ISP), "ISP0 running exception"}, \ + {0xa8381001, BBOX_CRITICAL, BBOX_REBOOT_WAIT, ISP_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_ISP), BBOX_COREID_MASK(BBOX_ISP), "ISP1 running exception"}, \ + {0xa8381002, BBOX_CRITICAL, BBOX_REBOOT_WAIT, ISP_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_ISP), BBOX_COREID_MASK(BBOX_ISP), "ISP2 running exception"}, \ + {0xa8381003, BBOX_CRITICAL, BBOX_REBOOT_WAIT, ISP_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_ISP), BBOX_COREID_MASK(BBOX_ISP), "ISP3 running exception"}, \ +} + +#define BBOX_PROXY_CTRL_ISP { \ + .e_block_num = 4, \ + .padding = {0}, \ + .block_info = { \ + {BLOCK_TYPE_NORMAL | BLOCK_TYPE_STARTUP, \ + CHECK_STARTUP_EXCEPID | CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, \ + 0x200, 0x7FE00}, \ + {BLOCK_TYPE_NORMAL | BLOCK_TYPE_STARTUP, \ + CHECK_STARTUP_EXCEPID | CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, \ + 0x80000, 0x80000}, \ + {BLOCK_TYPE_NORMAL | BLOCK_TYPE_STARTUP, \ + CHECK_STARTUP_EXCEPID | CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, \ + 0x100000, 0x80000}, \ + {BLOCK_TYPE_NORMAL | BLOCK_TYPE_STARTUP, \ + CHECK_STARTUP_EXCEPID | CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, \ + 0x180000, 0x80000}, \ + } \ +} + +#define BBOX_PROXY_MODULE_SAFETYISLAND { \ + .coreid = BBOX_SAFETYISLAND, \ + .flag = BBOX_PROXY_CAPACITY_REGISTER | BBOX_PROXY_CAPACITY_DUMP_DDR, \ + .name = "sil", \ + .log_addr = 0, \ + .log_len = 0, \ + .wait_timeout = 10000, \ + .e_count = 5, \ +} + +#define BBOX_PROXY_EXCEPTION_SAFETYISLAND { \ + {0xA63A0001, BBOX_CRITICAL, BBOX_REBOOT_NO, SAFETYISLAND_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_SAFETYISLAND), BBOX_COREID_MASK(BBOX_SAFETYISLAND), "sil os panic"}, \ + {0xA63A1001, BBOX_MAJOR, BBOX_REBOOT_NO, SAFETYISLAND_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_SAFETYISLAND), BBOX_COREID_MASK(BBOX_SAFETYISLAND), "sil lpm err"}, \ + {0xA43A2001, BBOX_MINOR, BBOX_REBOOT_WAIT, SAFETYISLAND_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_SAFETYISLAND), BBOX_COREID_MASK(BBOX_SAFETYISLAND), "sil ipc or hb err"}, \ + {0xA63A3001, BBOX_MAJOR, BBOX_REBOOT_WAIT, SAFETYISLAND_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_SAFETYISLAND), BBOX_COREID_MASK(BBOX_SAFETYISLAND), "sil heartbeat err"}, \ + {0xA63A4001, BBOX_MAJOR, BBOX_REBOOT_WAIT, SAFETYISLAND_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_SAFETYISLAND), BBOX_COREID_MASK(BBOX_SAFETYISLAND), "sil excep tbl full"}, \ +} + +#define BBOX_PROXY_CTRL_SAFETYISLAND { \ + .e_block_num = 6, \ + .padding = {0}, \ + .block_info = { \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x400, 0xC800}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0xD000, 0xC800}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x19C00, 0xC800}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x26800, 0xC800}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x33400, 0xC800}, \ + {BLOCK_TYPE_NORMAL, CHECK_RUNTIME_EXCEPID | CHECK_RUNTIME_TMSTMP | CHECK_HEARTBEAT_EXCEPID, 0x40000, 0xC800}, \ + } \ +} + +#define BBOX_PROXY_MODULE_ATF { \ + .coreid = BBOX_TF, \ + .flag = BBOX_PROXY_CAPACITY_REGISTER, \ + .name = "atf", \ + .log_addr = 0x0, \ + .log_len = 0x0, \ + .wait_timeout = 10000, \ + .e_count = 1, \ +} + +#define BBOX_PROXY_EXCEPTION_ATF { \ + {0xA8340000, BBOX_CRITICAL, BBOX_REBOOT_WAIT, ATF_EXCEPTION, BBOX_REENTRANT_DISALLOW, \ + BBOX_COREID_MASK(BBOX_TF), BBOX_COREID_MASK(BBOX_TF), "atf panic exception"}, \ +} + +#define BBOX_PROXY_CTRL_ATF { \ + .e_block_num = 1, \ + .padding = {0}, \ + .block_info = { \ + {BLOCK_TYPE_STARTUP, CHECK_NONE, 0x90400, 0xF800}, \ + } \ +} + + +#define DEFINE_BBOX_PROXY(x) struct bbox_proxy_info x[] = { \ + BBOX_PROXY_INITIALIZER(TS), \ + BBOX_PROXY_INITIALIZER(LPM), \ + BBOX_PROXY_INITIALIZER(HSM), \ + BBOX_PROXY_INITIALIZER(ISP), \ + BBOX_PROXY_INITIALIZER(SAFETYISLAND), \ + BBOX_PROXY_INITIALIZER(ATF), \ +} + +#endif // BBOX_PROXY_CONFIG_MDC diff --git a/inc/toolchain/bbox/device/bbox_pub.h b/inc/toolchain/bbox/device/bbox_pub.h new file mode 100644 index 000000000..76491d04f --- /dev/null +++ b/inc/toolchain/bbox/device/bbox_pub.h @@ -0,0 +1,313 @@ +/* + * @file bbox_pub.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved. + * + * blackbox. (kernel run data recorder.) + * This head file is about pub define and declaration + */ + +#ifndef BBOX_PUB_H +#define BBOX_PUB_H + +#include "bbox_types.h" + +/********************************************************** + * basic type definitions * + **********************************************************/ +// 模块id列表 +enum BBOX_COREID_LIST { + BBOX_UNDEF = 0x0, + BBOX_DRIVER = 0x1, + BBOX_OS = 0x2, + BBOX_TS = 0x3, + BBOX_RUNTIME = 0x4, + BBOX_AICPU = 0x5, + BBOX_CCE = 0x6, + BBOX_TVM = 0x7, + BBOX_FRAMEWORK = 0x8, + BBOX_HIAI = 0x9, + BBOX_DVPP = 0xa, + BBOX_AIPP = 0xb, + BBOX_LPM = 0xc, + BBOX_MDC = 0xd, + BBOX_COMPILER = 0xe, + BBOX_TOOLCHAIN = 0xf, + BBOX_ALGORITHM = 0x10, + BBOX_PROFILING = 0x11, + BBOX_HCCL = 0x12, + BBOX_EMULATE = 0x13, + BBOX_BIOS = 0x14, + BBOX_TEEOS = 0x15, + BBOX_TINY = 0x16, + BBOX_LPFW = 0x17, + BBOX_NETWORK = 0x18, + BBOX_ZIP = 0x19, + BBOX_TF = 0x1A, + BBOX_HSM = 0x1B, + BBOX_ISP = 0x1C, + BBOX_SAFETYISLAND = 0x1D, + BBOX_CLUSTER = 0x1E, + BBOX_COMISOLATOR = 0x1F, + BBOX_SD = 0x20, + BBOX_DP = 0x21, + BBOX_CORE_MAX = 0x22, +}; + +// 异常类型 +enum BBOX_REBOOT_REASON { + BBOX_REBOOT_REASON_LABEL0 = 0x0, // label0:重启相关 + DEVICE_COLDBOOT = BBOX_REBOOT_REASON_LABEL0, // 冷启动,如关机后第一次开机;掉电后第一次开机 + BIOS_EXCEPTION = 0x1, // bios异常重启,前一次启动bios异常 + DEVICE_HOTBOOT = 0x2, // 热复位,如按键复位,芯片硬复位等 + BBOX_REBOOT_REASON_LABEL1 = 0x10, // label1:硬件原因复位 + ABNORMAL_EXCEPTION = BBOX_REBOOT_REASON_LABEL1, // 未检测到的异常 + TSENSOR_EXCEPTION = 0x1f, // soc温保复位 + PMU_EXCEPTION = 0x20, // 过流、欠压、PMU过温引起的硬件复位 + DDR_FATAL_EXCEPTION = 0X22, // ddr fatal异常复位,如:ddr颗粒超温复位 + BBOX_REBOOT_REASON_LABEL2 = 0x24, // label2:os软件原因复位 + OS_PANIC = BBOX_REBOOT_REASON_LABEL2, // os panic,如访问非法地址 + OS_SOFT_LOCKUP = 0x26, // soft lockup + OS_OOM = 0x2a, // OOM 异常 + OS_HDC = 0x2b, // HDC 断连 + BBOX_REBOOT_REASON_LABEL3 = 0x2c, // label3:其他模块复位 + STARTUP_EXCEPTION = 0x2c, // 模块启动异常 + HEARTBEAT_EXCEPTION = 0x2d, // 模块心跳异常 + RUN_EXCEPTION = 0x2e, // 模块运行异常 + LPM_EXCEPTION = 0x32, // LPM子系统检测到的各种异常 + TS_EXCEPTION = 0x33, // TS子系统检测到的各种异常 + DVPP_EXCEPTION = 0x35, // DVPP异常 + DRIVER_EXCEPTION = 0x36, // DRIVER异常 + ZIP_EXCEPTION = 0x37, // ZIP异常 + TEE_EXCEPTION = 0x38, // teeos异常 + LPFW_EXCEPTION = 0x39, // LPFW异常 + NETWORK_EXCEPTION = 0x3A, // NETWORK异常 + HSM_EXCEPTION = 0x3B, // HSM异常 + ATF_EXCEPTION = 0x3C, // ATF异常 + ISP_EXCEPTION = 0x3D, // ISP异常 + SAFETYISLAND_EXCEPTION = 0x3E, // SAFETYISLAND异常 + TOOLCHAIN_EXCEPTION = 0x3F, // TOOLCHAIN异常 + CLUSTER_EXCEPTION = 0x40, // CLUSTER异常 + COMISOLATOR_EXCEPTION = 0x41, // COMISOLATOR异常 + SD_EXCEPTION = 0x42, // SD异常 + DP_EXCEPTION = 0x43, // DP异常 + BBOX_REBOOT_REASON_LABEL4 = 0x50, // label4: + BBOX_REBOOT_REASON_LABEL5 = 0x65, // label5:电源异常 + BBOX_REBOOT_REASON_LABEL6 = 0x6A, // label6:xloader异常 + BBOX_REBOOT_REASON_LABEL7 = 0x74, // label7:fastboot异常 + BBOX_REBOOT_REASON_LABEL8 = 0x89, // label8: host侧异常 + DEVICE_LTO_EXCEPTION = 0x8A, // 设备启动超时: load timeout + DEVICE_HBL_EXCEPTION = 0x8B, // 设备心跳丢失: heart beat lost + DEVICE_USER_RESET = 0x8C, // 用户复位 + DEVICE_AER_EXCEPTION = 0x8D, // 设备AER错误: advanced err report + BBOX_REBOOT_REASON_LABEL9 = 0x90, // label9: + BBOX_REBOOT_REASON_LABEL10 = 0xB0, // label10: + BBOX_EXCEPTION_REASON_INVALID = 0xFF, +}; + +enum BBOX_PROCESS_PRI { + BBOX_OTHER = 0x0, // 不确定 + BBOX_NOTICE = 0x1, // 提示 + BBOX_MINOR = 0x2, // 次要 + BBOX_MAJOR = 0x3, // 重要 + BBOX_CRITICAL = 0x4, // 紧急 + BBOX_PPRI_MAX +}; + +enum BBOX_REBOOT_PRI { + BBOX_REBOOT_NOW = 0x01, // 立即重启 + BBOX_REBOOT_WAIT, // 等待重启 + BBOX_REBOOT_NO, // 不重启 + BBOX_REBOOT_MAX +}; + +enum BBOX_REENTRANT { + BBOX_REENTRANT_ALLOW = 0x01, // 可重复触发的异常 + BBOX_REENTRANT_DISALLOW // 不可重复触发的异常 +}; + +#define BBOX_COREID_VALID(coreid) ((((coreid) == BBOX_UNDEF) || ((coreid) >= BBOX_CORE_MAX)) ? BBOX_FALSE : BBOX_TRUE) +#define BBOX_COREID_MASK(coreid) (u64)((BBOX_COREID_VALID(coreid) == BBOX_TRUE) ? (1ull << (u8)((coreid) - 1)) : BBOX_UNDEF) + + +/********************************************************** + * module exception register definitions * + **********************************************************/ +#define BBOX_MODULE_CTRL_NUM 6 + +struct bbox_module_exception_ctrl { + excep_time e_clock; // 模块触发异常时间 + u32 e_excepid; // 模块触发的异常id + u32 e_block_offset; // 模块异常信息划分块起始偏移值,基于模块预留内存首地址,从magic开始 + u32 e_block_len; // 模块异常信息划分块长度 + u32 e_info_len; // 模块异常信息实际长度 +}; + +#define BBOX_MODULE_MAGIC 0x56312e32 +#define BBOX_MODULE_CTRL_PAD 3 +#define BBOX_MODULE_CTRL_NUM 6 +#define BBOX_MODULE_CTRL_RESERV 312 + +struct bbox_module_ctrl { + u32 magic; // 使用宏BBOX_MAGIC + u8 e_block_num; // 需要使用的控制块个数,最多BBOX_PROXY_CTRL_NUM + u8 padding[BBOX_MODULE_CTRL_PAD]; // padding + struct bbox_module_exception_ctrl block[BBOX_MODULE_CTRL_NUM]; // 模块dump信息控制状态 + u8 reserved[BBOX_MODULE_CTRL_RESERV]; // 预留空间,用于后续扩展 +}; + +#define BBOX_MODULE_CTRL_BLOCK_SIZE sizeof(struct bbox_module_ctrl) // total 512 byte + + +/********************************************************** + * bbox interfaces definitions * + **********************************************************/ +#ifndef BBOX_COMMON_STRUCT +#define BBOX_COMMON_STRUCT +typedef void (*bbox_e_callback)(u32, void*); + +typedef struct bbox_exception_info { + u32 e_excepid; // exception id; + u32 e_excepid_end; // can register exception id region. [excepid~excepid_end] + u64 e_notify_core_mask; // need notify other core mask + u64 e_reset_core_mask; // need reset other core mask + u8 e_exce_type; // the type of exception + u8 e_from_core; // the core of happen exception + u8 e_process_priority; // exception process priority + u8 e_reboot_priority; // exception reboot priority, just recommended host operation + u8 e_reentrant; // whether to allow exception reentrant + u8 e_from_module[BBOX_MODULE_NAME_LEN]; // the module of happen excption + u8 e_desc[BBOX_EXCEPTIONDESC_MAXLEN]; // the desc of happen excption + bbox_e_callback e_callback; // will be called when excption has processed. +} bbox_exception_info_s; +#endif + +struct bbox_report_info { + u32 devid; // device chip id, may NOT same with device slot id + u32 excepid; // exception id + excep_time time; // exception time + u32 arg; // arg +}; + +struct bbox_dump_done_ops_info { + u32 devid; // device chip id, may NOT same with device slot id + u32 excepid; // exception id + u8 coreid; // which core done + u8 etype; // exception type + excep_time time; // exception time +}; + +/* + * @brief : callback function, tell bbox dump done + * @param [in] : struct bbox_dump_done_ops_info *info dump done info + * @return : NA + */ +typedef void (*bbox_dump_done_ops)(const struct bbox_dump_done_ops_info *info); + +struct bbox_dump_ops_info { + u32 devid; // device chip id, may NOT same with device slot id + u32 excepid; // exception id + u8 coreid; // exception core id + u8 etype; // exception type + excep_time time; // exception time + u32 arg; // arg from exception report +}; + +/* + * @brief : module dump operate, the function over, need call fndone to mark dump over + * @param [in] : struct bbox_dump_ops_info *info module dump info + * @param [in] : bbox_dump_done_ops done dump done function pointer + * @return : NA + */ +typedef void (*bbox_dump_ops)(const struct bbox_dump_ops_info *info, bbox_dump_done_ops done); + +struct bbox_reset_ops_info { + u32 devid; // device chip id, may NOT same with device slot id + u32 excepid; // exception id + u8 coreid; // exception core id + u8 etype; // exception type +}; + +/* + * @brief : module reset operate + * @param [in] : struct bbox_reset_ops_info *info module reset info + * @return : NA + */ +typedef void (*bbox_reset_ops)(const struct bbox_reset_ops_info *info); + +struct bbox_module_info { + u8 coreid; // core id + bbox_dump_ops ops_dump; // dump operate pointer + bbox_reset_ops ops_reset; // reset operate pointer +}; + +struct bbox_module_result { + u64 log_addr; // reserved physical address + u32 log_len; // reserved physical length +}; + +/* + * @brief : register module + * @param [in] : struct bbox_module_info *info module info + * @param [in] : struct bbox_module_result *result register result + * @return : <0 failure; ==0 success + */ +int bbox_register_module(const struct bbox_module_info *info, struct bbox_module_result *result); + +/* + * @brief : unregister module + * @param [in] : u8 core_id core id + * @return : <0 failure; ==0 success + */ +int bbox_unregister_module(u8 coreid); + +/* + * @brief : report exception + * @param [in] : struct bbox_report_ops_info *info report info + * @return : =1: disallow reentrant + * =0: success + * <0: failure + */ +int bbox_exception_report(const struct bbox_report_info *info); + +#ifndef BBOX_COMMON_INTERFACE +#define BBOX_COMMON_INTERFACE +/* + * @brief : register exception + * @param [in] : struct bbox_exception_info *e exception info + * @return : e_excepid + * == 0 fail; >0 success + */ +u32 bbox_register_exception(const struct bbox_exception_info *e); + +/* + * func name: bbox_unregister_exception + * func args: u32 excepid, exception id; + * return : < 0 fail + * >=0 success + */ +int bbox_unregister_exception(u32 excepid); + +/* + * @brief : get device error code + * @param [in] : dev_id device chip id, may NOT same with device slot id + * @param [out] : u32 *e_code exception code array; + * @param [in] : u32 e_capacity array num, max value is 128 + * @return : >0: error of num; + * =0: none of error; + * <0: failure; + */ +int bbox_get_device_errorcode(u32 dev_id, u32 *e_code, u32 e_capacity); + +/* + * @brief : get the exception description + * @param [in] : u32 ecode exception id + * @param [out] : u8 *desc string array, exception description + * @param [in] : u32 length string array length + * @return : <0 failure; ==0 success + */ +int bbox_get_device_ecode_info(u32 ecode, u8 *desc, u32 length); +#endif + +#endif + diff --git a/inc/toolchain/bbox/device/bbox_pub_cloud.h b/inc/toolchain/bbox/device/bbox_pub_cloud.h new file mode 100644 index 000000000..4c18c6cba --- /dev/null +++ b/inc/toolchain/bbox/device/bbox_pub_cloud.h @@ -0,0 +1,281 @@ +/* + * @file bbox_pub_cloud.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2018-2019. All rights reserved. + * + * blackbox. (kernel run data recorder.) + * This head file is about cloud interface + */ + +#ifndef BB_PUB_CLOUD_H +#define BB_PUB_CLOUD_H + +#include "bbox_types.h" + +#ifndef RDR_BASIC_TYPE +#define RDR_BASIC_TYPE +/* 模块id列表 */ +typedef enum CORE_LIST { + RDR_UNDEF = 0x0, + RDR_DRIVER = 0x1, + RDR_AP = 0x2, + RDR_TS = 0x3, + RDR_DVPP = 0xa, + RDR_BIOS = 0x14, + RDR_TEEOS = 0x15, + RDR_LPFW = 0x17, + RDR_NETWORK = 0x18, + RDR_TF = 0x1A, + RDR_CORE_MAX = 0x1B, +} rdr_coreid; + +#define RDR_COREID_VALID(coreid) ((((coreid) == 0) || ((coreid) >= RDR_CORE_MAX)) ? BBOX_FALSE : BBOX_TRUE) +#define RDR_COREID_MASK(coreid) (u64)(unsigned)(1 << ((coreid) - 1)) + +/* 异常类型 */ +typedef enum { + REBOOT_REASON_LABEL0 = 0x0, /* label0:重启相关 */ + AP_S_COLDBOOT = REBOOT_REASON_LABEL0, /* 冷启动,如关机后第一次开机;掉电后第一次开机 */ + BIOS_S_EXCEPTION = 0x1, /* bios异常重启,前一次启动bios异常 */ + AP_S_HOTBOOT = 0x2, /* 热复位,如按键复位,芯片硬复位等 */ + REBOOT_REASON_LABEL1 = 0x10, /* label1:硬件原因复位 */ + AP_S_ABNORMAL = REBOOT_REASON_LABEL1, /* 未检测到的异常 */ + AP_S_TSENSOR = 0x1f, /* soc温保复位 */ + AP_S_PMU = 0x20, /* 过流、欠压、PMU过温引起的硬件复位 */ + AP_S_DDR_FATAL = 0X22, /* ddr fatal异常复位,如:ddr颗粒超温复位 */ + REBOOT_REASON_LABEL2 = 0x24, /* label2:ap软件原因复位 */ + AP_S_PANIC = REBOOT_REASON_LABEL2, /* A核panic,如访问非法地址 */ + AP_S_OOM = 0x2a, /* OOM 异常 */ + AP_S_HDC = 0x2b, /* HDC 断连 */ + REBOOT_REASON_LABEL3 = 0x2c, /* label3:其他模块复位 */ + STARTUP_S_EXCEPTION = 0x2c, /* 模块启动异常 */ + HEARTBEAT_S_EXCEPTION = 0x2d, /* 模块心跳异常 */ + TS_S_EXCEPTION = 0x33, /* TS子系统检测到的各种异常 */ + DVPP_S_EXCEPTION = 0x35, /* DVPP异常 */ + DRIVER_S_EXCEPTION = 0x36, /* DRIVER异常 */ + TEE_S_EXCEPTION = 0x38, /* teeos异常 */ + LPFW_S_EXCEPTION = 0x39, /* LPFW异常 */ + NETWORK_S_EXCEPTION = 0x3A, /* NETWORK异常 */ + REBOOT_REASON_LABEL4 = 0x40, /* label4: */ + REBOOT_REASON_LABEL5 = 0x65, /* label5:电源异常 */ + REBOOT_REASON_LABEL6 = 0x6A, /* label6:xloader异常 */ + REBOOT_REASON_LABEL7 = 0x74, /* label7:fastboot异常 */ + REBOOT_REASON_LABEL8 = 0x89, /* label8: host侧异常 */ + DEVICE_LOAD_TIMEOUT = 0x8A, /* 设备启动超时 */ + DEVICE_HEAT_BEAT_LOST = 0x8B, /* 设备心跳丢失 */ + DEVICE_RESET_INFORM = 0x8C, /* 用户复位 */ + DEVICE_ADVANCED_ERR_REPORT = 0x8D, /* 设备AER错误 */ + REBOOT_REASON_LABEL9 = 0x90, /* label9: */ + REBOOT_REASON_LABEL10 = 0xB0, /* label10: */ + RDR_EXCEPTION_REASON_INVALID = 0xFF, +} EXCH_SOURCE; + +enum PROCESS_PRI { + RDR_OTHER = 0x0, /* 不确定 */ + RDR_NOTICE = 0x1, /* 提示 */ + RDR_MINOR = 0x2, /* 次要 */ + RDR_WARN = RDR_MINOR, + RDR_MAJOR = 0x3, /* 重要 */ + RDR_ERR = RDR_MAJOR, + RDR_CRITICAL = 0x4, /* 紧急 */ + RDR_DEAD = RDR_CRITICAL, + RDR_PPRI_MAX +}; + +enum REBOOT_PRI { + RDR_REBOOT_NOW = 0x01, /* 立即重启 */ + RDR_REBOOT_WAIT, /* 等待重启 */ + RDR_REBOOT_NO, /* 不重启 */ + RDR_REBOOT_MAX +}; + +enum REENTRANT { + RDR_REENTRANT_ALLOW = 0x01, /* 可重复触发的异常 */ + RDR_REENTRANT_DISALLOW /* 不可重复触发的异常 */ +}; + +#define MODULE_MAGIC 0xbaba0514 +#define MODULE_VALID 1 +#define MODULE_EXCEPTION_REGISTER_MAXNUM 512 + +struct exc_description_s { + u32 e_excepid; /* 异常id */ + u8 e_process_level; /* 异常处理级别:BBOX_PROCESS_PRI */ + u8 e_reboot_priority; /* 异常重启级别:BBOX_REBOOT_PRI */ + u8 e_excep_type; /* 异常类型 */ + u8 e_reentrant; /* 异常是否可重入 */ + u64 e_notify_core_mask; /* 异常联动掩码 */ + u8 e_desc[BBOX_EXCEPTIONDESC_MAXLEN]; /* 异常描述 */ +}; + +struct exc_info_s { + excep_time e_clock; /* 模块触发异常时间 */ + u32 e_excepid; /* 模块触发的异常id */ + u16 e_dump_status; /* 模块将异常信息存预留内存的控制状态 */ + u16 e_save_status; /* 代理将异常信息从预留内存导出的控制状态 */ +}; + +/* 通过共享内存注册异常 */ +struct exc_module_info_s { + u32 magic; /* 使用宏MODULE_MAGIC */ + u16 e_excep_valid; /* 模块写完注册的异常,则设置MODULE_VALID */ + u16 e_excep_num; /* 模块注册异常个数 */ + u8 e_from_module[BBOX_MODULE_NAME_LEN]; /* 模块名 */ + struct exc_info_s cur_info; /* 模块dump信息控制状态 */ + u32 e_mini_offset; /* 模块最小集异常信息偏移值,基于模块预留内存首地址,从magic开始 */ + u32 e_mini_len; /* 模块最小集异常信息长度 */ + u32 e_info_offset; /* 模块全部异常信息偏移值,基于模块预留内存首地址,从magic开始 */ + u32 e_info_len; /* 模块全部异常信息长度 */ + struct exc_description_s e_description[1]; /* 模块异常注册信息 */ +}; + +/* 通过注册函数注册异常 */ +struct rdr_ddr_module_info_s { + u32 magic; /* 使用宏MODULE_MAGIC */ + u32 e_mini_offset; /* 模块最小集异常信息偏移值,基于模块预留内存首地址,从magic开始 */ + u32 e_mini_len; /* 模块最小集异常信息长度 */ + u32 e_info_offset; /* 模块全部异常信息偏移值,基于模块预留内存首地址,从magic开始 */ + u32 e_info_len; /* 模块全部异常信息长度 */ +}; + +enum MODULE_DUMP_STATUS { + STATUS_INIT = 0, + STATUS_DOING = 1, + STATUS_DONE = 2, +}; +#endif + +#ifndef BBOX_COMMON_STRUCT +#define BBOX_COMMON_STRUCT +typedef void (*bbox_e_callback)(u32, void*); + +typedef struct bbox_exception_info { + u32 e_excepid; // exception id; + u32 e_excepid_end; // can register exception id region. [excepid~excepid_end] + u64 e_notify_core_mask; // need notify other core mask + u64 e_reset_core_mask; // need reset other core mask + u8 e_exce_type; // the type of exception + u8 e_from_core; // the core of happen exception + u8 e_process_priority; // exception process priority + u8 e_reboot_priority; // exception reboot priority, just recommended host operation + u8 e_reentrant; // whether to allow exception reentrant + u8 e_from_module[BBOX_MODULE_NAME_LEN]; // the module of happen excption + u8 e_desc[BBOX_EXCEPTIONDESC_MAXLEN]; // the desc of happen excption + bbox_e_callback e_callback; // will be called when excption has processed. +} bbox_exception_info_s; +#endif + +/* + * @brief : bbox dump done + * @param [in] : u32 dev_id device chip id, may NOT same with device slot id + * @param [in] : u8 core_id exception core + * @param [in] : u32 excep_id exception id + * @param [in] : u8 etype exception type + * @param [in] : excep_time *time exception time + * @param [in] : bbox_cb_dump_done fndone callback function + * @return : NA + */ +typedef void (*bbox_cb_dump_done)(u32 dev_id, u8 core_id, u32 excep_id, u8 etype, const excep_time *time); + +/* + * @brief : bbox dump, the function over, need call fndone to mark dump over + * @param [in] : u32 dev_id device chip id, may NOT same with device slot id + * @param [in] : u8 core_id exception core + * @param [in] : u32 excep_id exception id + * @param [in] : u8 etype exception type + * @param [in] : excep_time *time exception time + * @param [in] : u32 arg arg + * @param [in] : bbox_cb_dump_done fndone callback function + * @return : NA + */ +typedef void (*bbox_dump)(u32 dev_id, u8 core_id, u32 excep_id, u8 etype, + const excep_time *time, u32 arg, bbox_cb_dump_done fndone); + +/* + * @brief : bbox reset + * @param [in] : u32 dev_id device chip id, may NOT same with device slot id + * @param [in] : u8 core_id exception core + * @param [in] : u32 excep_id exception id + * @param [in] : u8 etype exception type + * @return : NA + */ +typedef void (*bbox_reset)(u32 dev_id, u8 core_id, u32 excep_id, u8 etype); + +struct bbox_module_ops { + bbox_dump ops_dump; + bbox_reset ops_reset; +}; + +struct bbox_register_module_result { + u64 log_addr; // reserved physical address + u32 log_len; // reserved physical length +}; + +/* + * @brief : register module + * @param [in] : u8 core_id core id + * @param [in] : struct bbox_module_ops* ops module ops + * @param [in] : struct bbox_register_module_result *retinfo register result info + * @return : <0 failure; ==0 success + */ +int bbox_register_module_ops(u8 core_id, const struct bbox_module_ops *ops, + struct bbox_register_module_result *retinfo); + +/* + * @brief : unregister module + * @param [in] : u8 core_id core id + * @return : <0 failure; ==0 success + */ +int bbox_unregister_module_ops(u8 core_id); + +/* + * @brief : unregister module + * @param [in] : dev_id device chip id, may NOT same with device slot id + * @param [in] : u32 excep_id exception id + * @param [in] : excep_time *timestamp exception time + * @param [in] : u32 arg arg + * @return : NA + */ +void bbox_system_error(u32 dev_id, u32 excep_id, const excep_time *timestamp, u32 arg); + +#ifndef BBOX_COMMON_INTERFACE +#define BBOX_COMMON_INTERFACE +/* + * @brief : register exception + * @param [in] : struct bbox_exception_info *e exception info + * @return : e_excepid + * == 0 fail; >0 success + */ +u32 bbox_register_exception(const struct bbox_exception_info *e); + +/* + * func name: bbox_unregister_exception + * func args: u32 excepid, exception id; + * return : < 0 fail + * >=0 success + */ +int bbox_unregister_exception(u32 excepid); + +/* + * @brief : get device error code + * @param [in] : dev_id device chip id, may NOT same with device slot id + * @param [out] : u32 *e_code exception code array; + * @param [in] : u32 e_capacity array num, max value is 128 + * @return : >0: error of num; + * =0: none of error; + * <0: failure; + */ +int bbox_get_device_errorcode(u32 dev_id, u32 *e_code, u32 e_capacity); + +/* + * @brief : get the exception description + * @param [in] : u32 ecode exception id + * @param [out] : u8 *desc string array, exception description + * @param [in] : u32 length string array length + * @return : <0 failure; ==0 success + */ +int bbox_get_device_ecode_info(u32 ecode, u8 *desc, u32 length); +#endif + +#endif // BB_PUB_CLOUD_H + diff --git a/inc/toolchain/bbox/device/bbox_pub_mini.h b/inc/toolchain/bbox/device/bbox_pub_mini.h new file mode 100644 index 000000000..6fadd5c77 --- /dev/null +++ b/inc/toolchain/bbox/device/bbox_pub_mini.h @@ -0,0 +1,281 @@ +/* + * @file bbox_pub_mini.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2018-2019. All rights reserved. + * + * blackbox. (kernel run data recorder.) + * This head file is about mini interface + */ + +#ifndef BB_PUB_MINI_H +#define BB_PUB_MINI_H + +#include "bbox_types.h" + +#ifndef RDR_BASIC_TYPE +#define RDR_BASIC_TYPE +/* 模块id列表 */ +typedef enum CORE_LIST { + RDR_UNDEF = 0x0, + RDR_DRIVER = 0x1, + RDR_AP = 0x2, + RDR_TS = 0x3, + RDR_AICPU = 0x5, + RDR_DVPP = 0xa, + RDR_LPM3 = 0xc, + RDR_BIOS = 0x14, + RDR_TEEOS = 0x15, + RDR_TF = 0x1A, + RDR_CORE_MAX = 0x1B, +} rdr_coreid; + +#define RDR_COREID_VALID(coreid) ((((coreid) == 0) || ((coreid) >= RDR_CORE_MAX)) ? BBOX_FALSE : BBOX_TRUE) +#define RDR_COREID_MASK(coreid) (u64)(unsigned)(1 << ((coreid) - 1)) + +/* 异常类型 */ +typedef enum { + REBOOT_REASON_LABEL0 = 0x0, /* label0:重启相关 */ + AP_S_COLDBOOT = REBOOT_REASON_LABEL0, /* 冷启动,如关机后第一次开机;掉电后第一次开机 */ + BIOS_S_EXCEPTION = 0x1, /* bios异常重启,前一次启动bios异常 */ + AP_S_HOTBOOT = 0x2, /* 热复位,如按键复位,芯片硬复位等 */ + REBOOT_REASON_LABEL1 = 0x10, /* label1:硬件原因复位 */ + AP_S_ABNORMAL = REBOOT_REASON_LABEL1, /* 未检测到的异常 */ + AP_S_TSENSOR = 0x1f, /* soc温保复位 */ + AP_S_PMU = 0x20, /* 过流、欠压、PMU过温引起的硬件复位 */ + AP_S_DDR_FATAL = 0X22, /* ddr fatal异常复位,如:ddr颗粒超温复位 */ + REBOOT_REASON_LABEL2 = 0x24, /* label2:ap软件原因复位 */ + AP_S_PANIC = REBOOT_REASON_LABEL2, /* A核panic,如访问非法地址 */ + AP_S_OOM = 0x2a, /* OOM 异常 */ + AP_S_HDC = 0x2b, /* HDC 断连 */ + REBOOT_REASON_LABEL3 = 0x2c, /* label3:其他模块复位 */ + STARTUP_S_EXCEPTION = 0x2c, /* 模块启动异常 */ + HEARTBEAT_S_EXCEPTION = 0x2d, /* 模块心跳异常 */ + LPM3_S_EXCEPTION = 0x32, /* LPM3子系统检测到的各种异常 */ + TS_S_EXCEPTION = 0x33, /* TS子系统检测到的各种异常 */ + DVPP_S_EXCEPTION = 0x35, /* DVPP异常 */ + DRIVER_S_EXCEPTION = 0x36, /* DRIVER异常 */ + TEE_S_EXCEPTION = 0x38, /* teeos异常 */ + REBOOT_REASON_LABEL4 = 0x40, /* label4: */ + REBOOT_REASON_LABEL5 = 0x65, /* label5:电源异常 */ + REBOOT_REASON_LABEL6 = 0x6A, /* label6:xloader异常 */ + REBOOT_REASON_LABEL7 = 0x74, /* label7:fastboot异常 */ + REBOOT_REASON_LABEL8 = 0x89, /* label8: host侧异常 */ + DEVICE_LOAD_TIMEOUT = 0x8A, /* 设备启动超时 */ + DEVICE_HEAT_BEAT_LOST = 0x8B, /* 设备心跳丢失 */ + DEVICE_RESET_INFORM = 0x8C, /* 用户复位 */ + DEVICE_ADVANCED_ERR_REPORT = 0x8D, /* 设备AER错误 */ + REBOOT_REASON_LABEL9 = 0x90, /* label9: */ + REBOOT_REASON_LABEL10 = 0xB0, /* label10: */ + RDR_EXCEPTION_REASON_INVALID = 0xFF, +} EXCH_SOURCE; + +enum PROCESS_PRI { + RDR_OTHER = 0x0, /* 不确定 */ + RDR_NOTICE = 0x1, /* 提示 */ + RDR_MINOR = 0x2, /* 次要 */ + RDR_WARN = RDR_MINOR, + RDR_MAJOR = 0x3, /* 重要 */ + RDR_ERR = RDR_MAJOR, + RDR_CRITICAL = 0x4, /* 紧急 */ + RDR_DEAD = RDR_CRITICAL, + RDR_PPRI_MAX +}; + +enum REBOOT_PRI { + RDR_REBOOT_NOW = 0x01, /* 立即重启 */ + RDR_REBOOT_WAIT, /* 等待重启 */ + RDR_REBOOT_NO, /* 不重启 */ + RDR_REBOOT_MAX +}; + +enum REENTRANT { + RDR_REENTRANT_ALLOW = 0x01, /* 可重复触发的异常 */ + RDR_REENTRANT_DISALLOW /* 不可重复触发的异常 */ +}; + +#define MODULE_MAGIC 0xbaba0514 +#define MODULE_VALID 1 +#define MODULE_EXCEPTION_REGISTER_MAXNUM 512 + +struct exc_description_s { + u32 e_excepid; /* 异常id */ + u8 e_process_level; /* 异常处理级别:BBOX_PROCESS_PRI */ + u8 e_reboot_priority; /* 异常重启级别:BBOX_REBOOT_PRI */ + u8 e_excep_type; /* 异常类型 */ + u8 e_reentrant; /* 异常是否可重入 */ + u64 e_notify_core_mask; /* 异常联动掩码 */ + u8 e_desc[BBOX_EXCEPTIONDESC_MAXLEN]; /* 异常描述 */ +}; + +struct exc_info_s { + excep_time e_clock; /* 模块触发异常时间 */ + u32 e_excepid; /* 模块触发的异常id */ + u16 e_dump_status; /* 模块将异常信息存预留内存的控制状态 */ + u16 e_save_status; /* 代理将异常信息从预留内存导出的控制状态 */ +}; + +/* 通过共享内存注册异常 */ +struct exc_module_info_s { + u32 magic; /* 使用宏MODULE_MAGIC */ + u16 e_excep_valid; /* 模块写完注册的异常,则设置MODULE_VALID */ + u16 e_excep_num; /* 模块注册异常个数 */ + u8 e_from_module[BBOX_MODULE_NAME_LEN]; /* 模块名 */ + struct exc_info_s cur_info; /* 模块dump信息控制状态 */ + u32 e_mini_offset; /* 模块最小集异常信息偏移值,基于模块预留内存首地址,从magic开始 */ + u32 e_mini_len; /* 模块最小集异常信息长度 */ + u32 e_info_offset; /* 模块全部异常信息偏移值,基于模块预留内存首地址,从magic开始 */ + u32 e_info_len; /* 模块全部异常信息长度 */ + struct exc_description_s e_description[1]; /* 模块异常注册信息 */ +}; + +/* 通过注册函数注册异常 */ +struct rdr_ddr_module_info_s { + u32 magic; /* 使用宏MODULE_MAGIC */ + u32 e_mini_offset; /* 模块最小集异常信息偏移值,基于模块预留内存首地址,从magic开始 */ + u32 e_mini_len; /* 模块最小集异常信息长度 */ + u32 e_info_offset; /* 模块全部异常信息偏移值,基于模块预留内存首地址,从magic开始 */ + u32 e_info_len; /* 模块全部异常信息长度 */ +}; + +enum MODULE_DUMP_STATUS { + STATUS_INIT = 0, + STATUS_DOING = 1, + STATUS_DONE = 2, +}; +#endif + +#define RDR_MODULE_NAME_LEN 16 +#define RDR_EXCEPTIONDESC_MAXLEN 48 + +typedef void (*rdr_e_callback)(u32, void*); + +/* + * struct list_head e_list; + * u32 excepid, exception id; + * if excepid equal 0, will auto generation excepid, and return it. + * u32 excepid_end, can register exception id region. [excepid~excepid_end]; + need excepid_end >= excepid, + * if excepid_end equal 0, will be register excepid only, + but excepid & excepid_end cant equal 0 at the same time. + * u64 notify_core_mask, need notify other core mask + * u64 reset_core_mask, need reset other core mask + * u8 exce_type, the type of exception + * u8 from_core, the core of happen exception + * u8 process_priority, exception process priority + * u8 reboot_priority, exception reboot priority + * u8 reentrant, whether to allow exception reentrant + * char* from_module, the module of happen excption + * char* desc, the desc of happen excption + * rdr_e_callback callback, will be called when excption has processed. + */ +struct rdr_exception_info_s { + struct list_head e_list; + u32 e_excepid; + u32 e_excepid_end; + u64 e_notify_core_mask; + u64 e_reset_core_mask; + u8 e_exce_type; + u8 e_from_core; + u8 e_process_priority; + u8 e_reboot_priority; + u8 e_reentrant; + u8 e_from_module[RDR_MODULE_NAME_LEN]; + u8 e_desc[RDR_EXCEPTIONDESC_MAXLEN]; + rdr_e_callback e_callback; +}; + +/* + * @brief : module dump done, callback it, tell bbox dump done + * @param [in] : u32 excep_id exception id + * @param [in] : u8 core_id which core done + * @param [in] : u8 etype exception type + * @param [in] : excep_time *time exception time + * @return : NA + */ +typedef void (*pfn_cb_dump_done)(u32 excepid, u8 coreid, u8 etype, excep_time time); + +/* + * @brief : call module dump exception info + * the function over, need call fndone to mark dump over + * @param [in] : u32 excepid exception id + * @param [in] : u8 coreid exception core + * @param [in] : u8 etype exception type + * @param [in] : excep_time time exception time + * @param [in] : char* logpath exception log path + * @param [in] : pfn_cb_dump_done fndone + * @return : NA + */ +typedef void (*pfn_dump)(u32 excepid, u8 etype, u8 coreid, excep_time time, + char* logpath, pfn_cb_dump_done fndone); + +/* + * @brief : call module reset + * @param [in] : u32 excepid exception id + * @param [in] : u8 core_id exception core + * @param [in] : u8 etype exception type + * @return : NA + */ +typedef void (*pfn_reset)(u32 excepid, u8 etype, u8 coreid); + +struct rdr_module_ops { + pfn_dump ops_dump; + pfn_reset ops_reset; +}; + +struct rdr_register_module_result { + u64 log_vaddr; // reserved physical address + u32 log_len; // reserved physical address length +}; + +/* + * @brief : register exception + * @param [in] : struct rdr_exception_info_s* e exception info + * @return : excepid + * == 0 error; > 0 success + */ +u32 rdr_register_exception(struct rdr_exception_info_s *e); + +/* + * @brief : unregister exception + * @param [in] : u32 excep_id, exception id; + * @return : < 0 fail; >=0 success + */ +int rdr_unregister_exception(u32 excepid); + +/* + * @brief : register module + * @param [in] : u8 core_id core id + * @param [in] : struct rdr_module_ops* ops ops info + * @param [out] : struct rdr_register_module_result* retinfo return info + * @return : < 0 error; >=0 success + */ +int rdr_register_module_ops(u8 coreid, const struct rdr_module_ops* ops, struct rdr_register_module_result* retinfo); + +/* + * @brief : unregister module + * @param [in] :u8 core_id core id + * @return : < 0 fail; >=0 success + */ +int rdr_unregister_module_ops(u8 coreid); + +/* + * @brief : report exception + * @param [in] : u32 excep_id exception id + * @param [in] : excep_time timestamp exception time + * @param [in] : u32 arg arg + * @return : NA + */ +void mntn_system_error(u32 excep_id, excep_time timestamp, u32 arg); + +/* + * @brief : get arg value + * @param [in] : u32 excepid exception id + * @param [in] : u8 coreid exception core id + * @param [in] : excep_time timestamp exception time + * @param [out] : u32 *arg return arg value + * @return : NA + */ +int rdr_module_dump_get_arg(u32 excepid, u8 coreid, excep_time time, u32 *arg); + +#endif // BB_PUB_MINI_H + diff --git a/inc/toolchain/bbox/device/bbox_types.h b/inc/toolchain/bbox/device/bbox_types.h new file mode 100644 index 000000000..8637ab298 --- /dev/null +++ b/inc/toolchain/bbox/device/bbox_types.h @@ -0,0 +1,63 @@ +/* + * @file bbox_types.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2018-2019. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * blackbox header file (blackbox: kernel run data recorder.) + * define base data type + */ + +#ifndef RDR_TYPES_H +#define RDR_TYPES_H +#include + +typedef signed char s8; +typedef unsigned char u8; + +typedef signed short s16; +typedef unsigned short u16; + +typedef signed int s32; +typedef unsigned int u32; + +typedef signed long long s64; +typedef unsigned long long u64; + +typedef struct excep_time_t { + u64 tv_sec; + u64 tv_usec; +} excep_time; + +#define BBOX_TRUE 1 +#define BBOX_FALSE 0 + +#define BBOX_SUCCESS 0 +#define BBOX_FAILURE (-1) +#define BBOX_COMM_INVAL (-2) // communication failure +#define BBOX_COMM_TIMEOUT (-3) // communication timeout +#define BBOX_MSG_INVAL (-4) // message invalid +#define BBOX_MSG_NONE (-5) // no have data +#define BBOX_NO_SUPPORT (-10) // not support + +#define BBOX_DISALLOW_REETRANT 1 // return value + +#define BBOX_EOK 0 +#define BBOX_ENXIO (-6) // No such device or address +#define BBOX_ENODEV (-19) // No such device +#define BBOX_EINVAL (-22) // Invalid argument +#define BBOX_ENOSPC (-28) // No space left on device + +#define BBOX_NOTIFY_DONE (NOTIFY_DONE) +#define BBOX_NOTIFY_OK (NOTIFY_OK) + +#define BBOX_UCHAR_INVALID 0xFF +#define BBOX_UINT_INVALID 0xFFFFFFFF +#define BBOX_MODULE_NAME_LEN 16 +#define BBOX_EXCEPTIONDESC_MAXLEN 48 + +#endif // RDR_TYPES_H + diff --git a/inc/toolchain/ide_daemon_api.h b/inc/toolchain/ide_daemon_api.h new file mode 100644 index 000000000..eafd15514 --- /dev/null +++ b/inc/toolchain/ide_daemon_api.h @@ -0,0 +1,173 @@ +/** + * @file ide_daemon_api.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2018-2020. All rights reserved.\n + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n + * + * 描述:算子dump接口头文件。\n + */ + +/** @defgroup dump dump接口 */ +#ifndef IDE_DAEMON_API_H +#define IDE_DAEMON_API_H + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @ingroup dump。 + * + * dump ip信息缓冲区长度 + */ +typedef void *IDE_SESSION; + +/** + * @ingroup dump。 + * + * dump ip信息缓冲区长度 + */ +#ifndef uint32_t +typedef unsigned int uint32_t; +#endif + +/** + * @ingroup dump。 + * + * dump ip信息缓冲区长度 + */ +#define IDE_DAEMON_IP_LEN (16) + +/** + * @ingroup dump。 + * + * dump 连接信息 + */ +typedef struct tagConnInfo { + char ip[IDE_DAEMON_IP_LEN]; /**< IP地址 */ + int port; /**< 端口号 */ + int deviceID; /**< 设备ID号 */ +} connInfo_t; + +/** + * @ingroup dump。 + * + * dump 错误信息 + */ +typedef enum tagIdeError { + IDE_DAEMON_NONE_ERROR = 0, /**< 无错误 */ + IDE_DAEMON_UNKNOW_ERROR = 1, /**< 未知错误 */ + IDE_DAEMON_WRITE_ERROR = 2, /**< 写入失败 */ + IDE_DAEMON_NO_SPACE_ERROR = 3, /**< 磁盘已满 */ + IDE_DAEMON_INVALID_PATH_ERROR = 4, /**< 无效路径 */ + IDE_DAEMON_INVALID_PARAM_ERROR = 5, /**< 无效参数 */ + IDE_DAEMON_TCP_CONNECT_ERROR = 6, /**< TCP连接失败 */ + IDE_DAEMON_TCP_CHANNEL_ERROR = 7, /**< TCP通道异常 */ + IDE_DAEMON_MALLOC_ERROR = 8, /**< 申请堆内存失败 */ + IDE_DAEMON_HDC_CHANNEL_ERROR = 9, /**< HDC通路异常 */ + IDE_DAEMON_CHANNEL_ERROR = 10, /**< 通路异常 */ + IDE_DAEMON_MKDIR_ERROR = 11, /**< 创建目录失败 */ + IDE_DAEMON_MEMCPY_ERROR = 12, /**< 内存拷贝失败 */ + IDE_DAEMON_MEMSET_ERROR = 13, /**< 内存清零失败 */ + IDE_DAEMON_INVALID_IP_ERROR = 14, /**< 无效的IP地址 */ + IDE_DAEMON_INTERGER_REVERSED_ERROR = 15, /**< 整形溢出 */ + IDE_DAEMON_DUMP_QUEUE_FULL = 16, /**< dump队列已满 */ + NR_IDE_DAEMON_ERROR, /**< 枚举最大值 */ +}ideError_t; + +/** + * @ingroup dump。 + * + * dump 错误信息 + */ +typedef ideError_t IdeErrorT; + +/** + * @ingroup dump。 + * + * dump回传数据块标识信息 + */ +enum IdeDumpFlag { + IDE_DUMP_NONE_FLAG = 0, /**< 无标志位 */ +}; + +/** + * @ingroup dump。 + * + * dump回传数据块 + */ +struct IdeDumpChunk { + char *fileName; /**< 文件名,绝对路径 */ + unsigned char *dataBuf; /**< 写入的数据Buffer */ + unsigned int bufLen; /**< 写入的数据Buffer长度 */ + unsigned int isLastChunk; /**< 是否最后一块数据 0:非最后一块数据;1:最后一块数据 */ + long long offset; /**< 文件写入的偏移位 -1为追加形式写入 */ + enum IdeDumpFlag flag; /**< 标志位 */ +}; + +/** + * @ingroup dump + * @par 描述: 创建Dump通路。 + * + * @attention 无 + * @param privInfo [IN] 启动Dump通路数据(格式host:port;device_id(HDC), local;device_id(Local)) + * @retval #非空 创建会话成功 + * @retval #NULL 创建会话失败 + * @par 依赖: + * @li ide_daemon_api.cpp:该接口所属的开发包。 + * @li ide_daemon_api.h:该接口声明所在的头文件。 + * @see 无 + * @since + */ +extern IDE_SESSION IdeDumpStart(const char *privInfo); + +/** + * @ingroup dump + * @par 描述: 进行数据Dump,Dump完成数据落盘后返回。 + * + * @attention 无 + * @param session [IN] 会话句柄 + * @param dumpChunk [IN] Dump的数据结构体 + * @retval #IDE_DAEMON_NONE_ERROR 写数据成功 + * @retval #IDE_DAEMON_INVALID_PARAM_ERROR 非法参数 + * @retval #IDE_DAEMON_UNKNOW_ERROR 写数据失败 + * @par 依赖: + * @li ide_daemon_api.cpp:该接口所属的开发包。 + * @li ide_daemon_api.h:该接口声明所在的头文件。 + * @see 无 + * @since + */ +extern IdeErrorT IdeDumpData(IDE_SESSION session, const struct IdeDumpChunk *dumpChunk); + +/** + * @ingroup dump + * @par 描述: 关闭Dump通路。 + * + * @attention 无 + * @param session [IN] 会话句柄 + * @retval #IDE_DAEMON_NONE_ERROR 关闭会话成功 + * @retval #IDE_DAEMON_INVALID_PARAM_ERROR 非法参数 + * @retval #IDE_DAEMON_UNKNOW_ERROR 关闭会话失败 + * @par 依赖: + * @li ide_daemon_api.cpp:该接口所属的开发包。 + * @li ide_daemon_api.h:该接口声明所在的头文件。 + * @see 无 + * @since + */ +extern IdeErrorT IdeDumpEnd(IDE_SESSION session); + +#ifdef __cplusplus +} +#endif + +#endif +/* + * History: \n + * 2018-10-10, huawei, 初始化该文件。 \n + * 2020-02-10, huawei, 更改API规范化。 \n + * + * vi: set expandtab ts=4 sw=4 tw=120: + */ diff --git a/inc/toolchain/ide_tlv.h b/inc/toolchain/ide_tlv.h new file mode 100644 index 000000000..cf768e4b9 --- /dev/null +++ b/inc/toolchain/ide_tlv.h @@ -0,0 +1,75 @@ +/** + * @file ide_tlv.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2018-2020. All rights reserved.\n + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n + * + * 描述:ascend debug 头文件。\n + */ + +/** @defgroup adx ADX */ +#ifndef IDE_TLV_H +#define IDE_TLV_H + +/** + * @ingroup adx + * + * adx 命令请求列表 + */ +enum cmd_class { + IDE_EXEC_COMMAND_REQ = 0, /**< 执行device命令请求\n */ + IDE_SEND_FILE_REQ, /**< 发送文件到device命令请求\n */ + IDE_DEBUG_REQ, /**< Debug命令请求\n */ + IDE_BBOX_REQ, /**< Bbox命令请求\n */ + IDE_LOG_REQ, /**< Log命令请求\n */ + IDE_PROFILING_REQ, /**< Profiling命令请求\n */ + IDE_OME_DUMP_REQ, /**< Ome dump命令请求\n */ + IDE_FILE_SYNC_REQ, /**< 发送文件到AiHost 命令请求\n */ + IDE_EXEC_API_REQ, /**< 执行AiHost Api命令请求\n */ + IDE_EXEC_HOSTCMD_REQ, /**< 执行AiHost 命令命令请求\n */ + IDE_DETECT_REQ, /**< 执行AiHost 通路命令请求\n */ + IDE_FILE_GET_REQ, /**< 获取AiHost侧文件命令请求\n */ + IDE_NV_REQ, /**< 执行AiHost Nv命令请求\n */ + IDE_DUMP_REQ, /**< Dump命令请求\n */ + IDE_FILE_GETD_REQ, /**< 获取Device侧文件命令请求\n */ + IDE_INVALID_REQ, /**< 无效命令请求\n */ + NR_IDE_CMD_CLASS, /**< 标识命令请求最大值\n */ +}; + +/** + * @ingroup adx + * + * adx 命令请求列表 + */ +typedef enum cmd_class CmdClassT; + +/** + * @ingroup adx + * + * adx 数据交互格式 + */ +struct tlv_req { + enum cmd_class type; /**< 数据包命令类型 */ + int dev_id; /**< 设备 ID */ + int len; /**< 数据包数据长度 */ + char value[0]; /**< 数据包数据 */ +}; + +/** + * @ingroup adx + * + * adx 数据交互格式 + */ +typedef struct tlv_req TlvReqT; + +#endif +/* + * History: \n + * 2018-10-10, huawei, 初始化该文件。 \n + * 2020-02-10, huawei, 更改API规范化。 \n + * + * vi: set expandtab ts=4 sw=4 tw=120: + */ diff --git a/inc/toolchain/prof_acl_api.h b/inc/toolchain/prof_acl_api.h new file mode 100644 index 000000000..74f660650 --- /dev/null +++ b/inc/toolchain/prof_acl_api.h @@ -0,0 +1,51 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MSPROF_ENGINE_PROF_ACL_API_H_ +#define MSPROF_ENGINE_PROF_ACL_API_H_ + +#define MSVP_PROF_API __attribute__((visibility("default"))) + +#include +#include + +/** + * @name ProrErrorCode + * @brief error code of prof_acl_apis + */ +enum ProfErrorCode { + PROF_ERROR_NONE = 0, // ok + PROF_ERROR_PARAM_INVALID, // param invalid, for example nullptr + PROF_ERROR_REPEAT_INIT, // profiling has already been inited + PROF_ERROR_CONFIG_INVALID, // config invalid, for example invalid json string + PROF_ERROR_DIR_NO_ACCESS, // dir is not accessable + PROF_ERROR_FAILURE, // failed to init or start profiling + PROF_ERROR_NOT_INITED, // profiling has not been inited + PROF_ERROR_DEVICE_INVALID, // device id invalid + PROF_ERROR_UNSUPPORTED, // unsupported data type or ai core metrics + PROF_ERROR_REPEAT_START, // profiilng has already been started + PROF_ERROR_NOT_STARTED, // profiling has not been started +}; + +/** + * @brief transfer profiling config in acl.json to sample config + * @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...} + * @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]} + * @return ProfErrorCode + */ +MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg); + +#endif // MSPROF_ENGINE_PROF_ACL_API_H_ diff --git a/inc/toolchain/prof_engine.h b/inc/toolchain/prof_engine.h new file mode 100644 index 000000000..0e757dcfb --- /dev/null +++ b/inc/toolchain/prof_engine.h @@ -0,0 +1,207 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MSPROF_ENGINE_PROF_ENGINE_H_ +#define MSPROF_ENGINE_PROF_ENGINE_H_ +#define MSVP_PROF_API __attribute__((visibility("default"))) + +#include +#include +#include "prof_reporter.h" + +/** + * @file prof_engine.h + * @defgroup ModuleJobConfig the ModuleJobConfig group + * This is the ModuleJobConfig group + */ +namespace Msprof { +namespace Engine { +/** + * @ingroup ModuleJobConfig + * @brief struct ModuleJobConfig + * record config info + */ +struct ModuleJobConfig { + std::map switches; /**< key is the config name, value is the config value(on or off) */ +}; + +/** + * @defgroup PluginIntf the pluginInf group + * This is the pluginInf group + */ + +/** + * @ingroup PluginIntf + * @brief class PluginIntf + */ +class MSVP_PROF_API PluginIntf { + public: + virtual ~PluginIntf() {} + + public: + /** + * @ingroup PluginIntf + * @name : Init + * @brief : API of user plugin, libmsporf call this API to send a Reporter to user plugin + * @par description : + * API of user plugin, libmsporf call this API to send a Reporter to user plugin. + * @param reporter [IN] const Reporter* the Reporter from libmsprof + * @retval PROFILING_SUCCESS 0 (success) + * @retval PROFILING_FAILED -1 (failed) + * + * @par depend: + * @li libmsprof + * @li prof_engine.h + * @since c60 + * @see UnInit + */ + virtual int Init(const Reporter *reporter) = 0; + + /** + * @ingroup PluginIntf + * @name : OnNewConfig + * @brief : API of user plugin, libmsprof call this API to send config info to user plugin \n + If the user plugin needn't config, no need to redefine this function + * @param config [IN] const ModuleJobConfig * the config from libmsprof + * @retval PROFILING_SUCCESS 0 (success) + * @retval PROFILING_FAILED -1 (failed) + * + * @par depend: + * @li libmsprof + * @li prof_engine.h + * @since c60 + * @see Init | UnInit + */ + virtual int OnNewConfig(const ModuleJobConfig *config) { return 0; } + + /** + * @ingroup PluginIntf + * @name : UnInit + * @brief : API of user plugin, libmsprof call this API to notify plugin stop to send data + * @retval PROFILING_SUCCESS 0 (success) + * @retval PROFILING_FAILED -1 (failed) + * + * @par depend: + * @li libmsprof + * @li prof_engine.h + * @since c60 + * @see Init + */ + virtual int UnInit() = 0; +}; + +/** + * @defgroup EngineIntf the EngineIntf group + * This is the EngineIntf group + */ + +/** + * @ingroup EngineIntf + * @brief class EngineIntf + */ +class MSVP_PROF_API EngineIntf { + public: + virtual ~EngineIntf() {} + + public: + /** + * @ingroup EngineIntf + * @name : CreatePlugin + * @brief : API of user engine, libmsporf call this API to get a plugin + * @retval PluginIntf * The pointer of the new plugin + * + * @par depend: + * @li libmsprof + * @li prof_engine.h + * @since c60 + * @see ReleasePlugin + */ + virtual PluginIntf *CreatePlugin() = 0; + + /** + * @ingroup EngineIntf + * @name : ReleasePlugin + * @brief : API of user engine, libmsprof call this API to release a plugin + * @param plugin [IN] PluginIntf * the plugin to release + * @retval PROFILING_SUCCESS 0 (success) + * @retval PROFILING_FAILED -1 (failed) + * + * @par depend: + * @li libmsprof + * @li prof_engine.h + * @since c60 + * @see CreatePlugin + */ + virtual int ReleasePlugin(PluginIntf *plugin) = 0; +}; + +/** + * @defgroup EngineMgr the EngineMgr group + * This is the EngineMgr group + */ + +/** + * @ingroup EngineMgr + * @name : RegisterEngine + * @brief : API of libmsprof, register an engine with a name + * @param module [IN] const std::string the name of plugin + * @param engine [IN] const EngineIntf* the plugin + * @retval PROFILING_SUCCESS 0 (success) + * @retval PROFILING_FAILED -1 (failed) + * + * @par depend: + * @li libmsprof + * @li prof_engine.h + * @since c60 + */ +MSVP_PROF_API int RegisterEngine(const std::string &module, const EngineIntf *engine); + +/** + * @ingroup EngineMgr + * @name : Init + * @brief : API of libmsprof, init an engine with a name + * @param module [IN] const std::string the name of plugin + * @param module [IN] const EngineIntf* the plugin + * @retval PROFILING_SUCCESS 0 (success) + * @retval PROFILING_FAILED -1 (failed) + * + * @par depend: + * @li libmsprof + * @li prof_engine.h + * @since c60 + * @see UnInit + */ +MSVP_PROF_API int Init(const std::string &module, const EngineIntf *engine); + +/** + * @ingroup EngineMgr + * @name : Init + * @brief : API of libmsprof, uninit an engine with a name + * @param module [IN] const std::string the name of plugin + * @retval PROFILING_SUCCESS 0 (success) + * @retval PROFILING_FAILED -1 (failed) + * + * @par depend: + * @li libmsprof + * @li prof_engine.h + * @since c60 + * @see Init + */ +MSVP_PROF_API int UnInit(const std::string &module); +} // namespace Engine +} // namespace Msprof + +#endif // MSPROF_ENGINE_PROF_ENGINE_H_ \ No newline at end of file diff --git a/inc/toolchain/prof_mgr_core.h b/inc/toolchain/prof_mgr_core.h new file mode 100644 index 000000000..4f013eef0 --- /dev/null +++ b/inc/toolchain/prof_mgr_core.h @@ -0,0 +1,84 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MSPROF_ENGINE_PROF_MGR_CORE_H_ +#define MSPROF_ENGINE_PROF_MGR_CORE_H_ +#define MSVP_PROF_API __attribute__((visibility("default"))) + +#include +#include + +/** + * @file prof_mgr_core.h + * @brief : struct ProfMgrCfg + */ +struct ProfMgrCfg { + std::string startCfg; /**< start cfg. json format */ +}; + +/** + * @name : ProfMgrConf + * @brief : struct ProfMgrConf for example [{"ai_core_events":"0xa"}].the vector size means Number of iterations + */ +struct ProfMgrConf { + std::vector conf; /**< for op trace.Ge call this api to get each iteration profiling cfg.json format.*/ +}; + +/** + * @name : ProfMgrStartUP + * @brief : start Profiling task + * @param cfg [IN]ProfMgrCfg cfg : config of start_up profiling + * @retval void * (success) + * @retval nullptr (failed) + * + * @par depend: + * @li libmsprof + * @li prof_mgr_core.h + * @since c60 + * @see ProfMgrStop + */ +MSVP_PROF_API void *ProfMgrStartUp(const ProfMgrCfg *cfg); + +/** + * @name : ProfMgrStop + * @brief : stop Profiling task + * @param handle [in] void * handle return by ProfMgrStartUP + * @retval PROFILING_SUCCESS 0 (success) + * @retval PROFILING_FAILED -1 (failed) + * + * @par depend: + * @li libmsprof + * @li prof_mgr_core.h + * @since c60 + * @see ProfMgrStartUp + */ +MSVP_PROF_API int ProfMgrStop(void *handle); + +/** + * @name : ProfMgrGetConf + * @brief : get profiler events conf + * @param conf [OUT]ProfMgrConf * return by ProfMgrGetConf + * @retval PROFILING_SUCCESS 0 (success) + * @retval PROFILING_FAILED -1 (failed) + * @par depend: + * @li libmsprof + * @li prof_mgr_core.h + * @since c60 + * @see ProfMgrStartUp + */ +MSVP_PROF_API int ProfMgrGetConf(const std::string &aicoreMetricsType, ProfMgrConf *conf); + +#endif // MSPROF_ENGINE_PROF_MGR_CORE_H_ \ No newline at end of file diff --git a/inc/toolchain/prof_reporter.h b/inc/toolchain/prof_reporter.h new file mode 100644 index 000000000..c734380c9 --- /dev/null +++ b/inc/toolchain/prof_reporter.h @@ -0,0 +1,89 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MSPROF_ENGINE_PROF_REPORTER_H_ +#define MSPROF_ENGINE_PROF_REPORTER_H_ +#define MSVP_PROF_API __attribute__((visibility("default"))) + +/** + * @file prof_reporter.h + * @defgroup reporter the reporter group + * This is the reporter group + */ +namespace Msprof { +namespace Engine { +/// the max tag length +#define MSPROF_ENGINE_MAX_TAG_LEN (31) +/** + * @ingroup reporter + * @brief struct ReporterData + * the sturct of the data send to libmsprof + */ +struct ReporterData { + char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1]; ///< the sub-type of the module, data with different tag will be writen + int deviceId; ///< the physical id of device + size_t dataLen; ///< the length of send data + unsigned char *data; ///< the data content +}; + +/** + * @ingroup reporter + * @brief class Reporter + * the Reporter class .used to send data to profiling + */ +class MSVP_PROF_API Reporter { + public: + virtual ~Reporter() {} + + public: + /** + * @ingroup reporter + * @name : Report + * @brief : API of libmsprof, report data to libmsprof, it's a non-blocking function \n + The data will be firstly appended to cache, if the cache is full, data will be ignored + * @param data [IN] const ReporterData * the data send to libmsporf + * @retval PROFILING_SUCCESS 0 (success) + * @retval PROFILING_FAILED -1 (failed) + * + * @par depend: + * @li libmsprof + * @li prof_reporter.h + * @since c60 + * @see Flush + */ + virtual int Report(const ReporterData *data) = 0; + + /** + * @ingroup reporter + * @name : Flush + * @brief : API of libmsprof, notify libmsprof send data over, it's a blocking function \n + The all datas of cache will be write to file or send to host + * @retval PROFILING_SUCCESS 0 (success) + * @retval PROFILING_FAILED -1 (failed) + * + * @par depend: + * @li libmsprof + * @li prof_reporter.h + * @since c60 + * @see ProfMgrStop + */ + virtual int Flush() = 0; +}; + +} // namespace Engine +} // namespace Msprof + +#endif // MSPROF_ENGINE_PROF_REPORTER_H_ \ No newline at end of file diff --git a/inc/toolchain/profiler_client.h b/inc/toolchain/profiler_client.h new file mode 100644 index 000000000..06c28791c --- /dev/null +++ b/inc/toolchain/profiler_client.h @@ -0,0 +1,86 @@ +#ifndef PROFILER_CLIENT_H_INCLUDED +#define PROFILER_CLIENT_H_INCLUDED + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#ifdef WIN32 +#ifdef PROFILERCLIENT_EXPORTS +#define MSVP_PROF_CLN_API __declspec(dllexport) +#else +#define MSVP_PROF_CLN_API __declspec(dllimport) +#endif +#else +#define MSVP_PROF_CLN_API +#endif +/** + * the data structure to write + */ +struct data_chunk { + char* relative_file_name;// from subpath begin; For example: subA/subB/example.txt; Note: the begin don't has '/'; + unsigned char* data_buf;// the pointer to the data + unsigned int buf_len;// the len of data_buf + unsigned int is_last_chunk;// = 1, the last chunk of the file; != 1, not the last chunk of the file + long long offset;// the begin location of the file to write; if the offset is -1, directly append data. +}; + +struct collect_dev_info_s { + int dev_id; +}; + +/** \brief use it to connect the server. + * + * \param const unsigned char* target: The engine gets it from ENV. Don't need care about it. + * \param const unsigned char* engine_name: For example OME;CCE; Runtime;Matrix... + * \return the return value void* point the client handle + * + */ + +MSVP_PROF_CLN_API extern void* create_collect_client(const char* target, const char* engine_name); + +/** \brief notify profiling the device list + * + * \param void* handle: the return value of the create_collect_client function + * \param const char* job_ctx: identifies profiling job + * \param const collect_dev_info_s* dev_list: pointer to the device list + * \param int dev_num: the device number + * \return 0 on success + */ +MSVP_PROF_CLN_API extern int collect_host_sync_dev_list(void* handle, const char* job_ctx, const collect_dev_info_s* dev_list, int dev_num); + +/** \brief write data by this function to transfer + * + * \param void* handle: the return value of the create_collect_client function + * \param struct data_chunk* data: record the value to restore the sampling data + * \param const unsigned char* job_ctx: The engine gets it from ENV. Don't need care about it. Represent the context about profiling job. + * \return On success, the number of bytes written is returned(zero indicates nothing was written); On error, <0 is returned, and the value is set appropriately. + * + */ + +MSVP_PROF_CLN_API extern int collect_write(void* handle, const char* job_ctx, struct data_chunk* data); + +/** \brief release the handle + * + * \param void* handle: the return value of the create_collect_client function + * \return + * + */ + +MSVP_PROF_CLN_API extern void release_collect_client(void* handle); + +/** \brief update job ctx for specific device, colllect_update_job_ctx uses malloc() to allocate a buffer to hold the + * new job_ctx and return a pointer to the buffer. The caller should deallocate this buffer using free() + * + * \param const char* job_ctx: pointer to current job_ctx + * \param collect_ctx_info* info: update job_ctx with the info + * \return pointer to buffer which holds the new job_ctx + */ +MSVP_PROF_CLN_API extern char* collect_dev_update_job_ctx(const char* job_ctx, const collect_dev_info_s * info); +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif // PROFILER_CLIENT_H_INCLUDED + + diff --git a/inc/toolchain/slog.h b/inc/toolchain/slog.h new file mode 100644 index 000000000..4f3967502 --- /dev/null +++ b/inc/toolchain/slog.h @@ -0,0 +1,368 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef D_SYSLOG_H_ +#define D_SYSLOG_H_ + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#ifndef LINUX +#define LINUX 0 +#endif // LINUX + +#ifndef OS_TYPE +#define OS_TYPE 0 +#endif // OS_TYPE + +/** + * @ingroup slog + * + * debug level id + */ +#define DLOG_DEBUG 0 + +/** + * @ingroup slog + * + * info level id + */ +#define DLOG_INFO 1 + +/** + * @ingroup slog + * + * warning level id + */ +#define DLOG_WARN 2 + +/** + * @ingroup slog + * + * error level id + */ +#define DLOG_ERROR 3 + +/** + * @ingroup slog + * + * don't print log + */ +#define DLOG_NULL 4 + +/** + * @ingroup slog + * + * trace log print level id + */ +#define DLOG_TRACE 5 + +/** + * @ingroup slog + * + * oplog log print level id + */ +#define DLOG_OPLOG 6 + +/** + * @ingroup slog + * + * event log print level id + */ +#define DLOG_EVENT 0x10 + +/** + * @ingroup slog + * + * max log length + */ +#define MSG_LENGTH 1024 +#define DEBUG_LOG_MASK (0x00010000) +#define SECURITY_LOG_MASK (0x00100000) +#define RUN_LOG_MASK (0x01000000) +#define OPERATION_LOG_MASK (0x10000000) + +typedef struct tagDCODE { + const char *cName; + int cVal; +} DCODE; + +typedef struct tagKV { + char *kname; + char *value; +} KeyValue; + +/** + * @ingroup slog + * + * module id + */ +enum { + SLOG, /**< Slog */ + IDEDD, /**< IDE daemon device */ + IDEDH, /**< IDE daemon host */ + HCCL, /**< HCCL */ + FMK, /**< Framework */ + HIAIENGINE, /**< Matrix */ + DVPP, /**< DVPP */ + RUNTIME, /**< Runtime */ + CCE, /**< CCE */ +#if (OS_TYPE == LINUX) + HDC, /**< HDC */ +#else + HDCL, +#endif // OS_TYPE + DRV, /**< Driver */ + MDCFUSION, /**< Mdc fusion */ + MDCLOCATION, /**< Mdc location */ + MDCPERCEPTION, /**< Mdc perception */ + MDCFSM, + MDCCOMMON, + MDCMONITOR, + MDCBSWP, /**< MDC base software platform */ + MDCDEFAULT, /**< MDC undefine */ + MDCSC, /**< MDC spatial cognition */ + MDCPNC, + MLL, + DEVMM, /**< Dlog memory managent */ + KERNEL, /**< Kernel */ + LIBMEDIA, /**< Libmedia */ + CCECPU, /**< ai cpu */ + ASCENDDK, /**< AscendDK */ + ROS, /**< ROS */ + HCCP, + ROCE, + TEFUSION, + PROFILING, /**< Profiling */ + DP, /**< Data Preprocess */ + APP, /**< User Application */ + TS, /**< TS module */ + TSDUMP, /**< TSDUMP module */ + AICPU, /**< AICPU module */ + LP, /**< LP module */ + TDT, + FE, + MD, + MB, + ME, + IMU, + IMP, + GE, /**< Fmk */ + MDCFUSA, + CAMERA, + ASCENDCL, + TEEOS, + ISP, + SIS, + HSM, + DSS, + PROCMGR, // Process Manager, Base Platform + BBOX, + AIVECTOR, + TBE, + FV, + MDCMAP, + TUNE, + INVLID_MOUDLE_ID +}; + +#if (OS_TYPE == LINUX) +/** + * @ingroup slog + * @brief External log interface, which called by modules + */ +extern void dlog_init(void); + +/** + * @ingroup slog + * @brief dlog_getlevel: get module loglevel and enableEvent + * + * @param [in]moduleId: moudule id(see slog.h, eg: CCE), others: invalid + * @param [out]enableEvent: 1: enable; 0: disable + * @return: module level(0: debug, 1: info, 2: warning, 3: error, 4: null output) + */ +extern int dlog_getlevel(int moduleId, int *enableEvent); + +/** + * @ingroup slog + * @brief dlog_setlevel: set module loglevel and enableEvent + * + * @param [in]moduleId: moudule id(see slog.h, eg: CCE), -1: all modules, others: invalid + * @param [in]level: log level(0: debug, 1: info, 2: warning, 3: error, 4: null output) + * @param [in]enableEvent: 1: enable; 0: disable, others:invalid + * @return: 0: SUCCEED, others: FAILED + */ +extern int dlog_setlevel(int moduleId, int level, int enableEvent); + +/** + * @ingroup slog + * @brief CheckLogLevel: check module level enable or not + * users no need to call it because all dlog interface(include inner interface) has already called + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]logLevel: eg: DLOG_EVENT/DLOG_ERROR/DLOG_WARN/DLOG_INFO/DLOG_DEBUG + * @return: 1:enable, 0:disable + */ +extern int CheckLogLevel(int moduleId, int logLevel); + +/** + * @ingroup slog + * @brief dlog_error: print error log + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]fmt: log content + */ +#define dlog_error(moduleId, fmt, ...) \ + do { \ + DlogErrorInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } while (0) + +/** + * @ingroup slog + * @brief dlog_warn: print warning log + * call CheckLogLevel in advance to optimize performance, call interface with fmt input take time + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]fmt: log content + */ +#define dlog_warn(moduleId, fmt, ...) \ + do { \ + if(CheckLogLevel(moduleId, DLOG_WARN) == 1) { \ + DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } \ + } while (0) + +/** + * @ingroup slog + * @brief dlog_info: print info log + * call CheckLogLevel in advance to optimize performance, call interface with fmt input take time + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]fmt: log content + */ +#define dlog_info(moduleId, fmt, ...) \ + do { \ + if(CheckLogLevel(moduleId, DLOG_INFO) == 1) { \ + DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } \ + } while (0) + +/** + * @ingroup slog + * @brief dlog_debug: print debug log + * call CheckLogLevel in advance to optimize performance, call interface with fmt input take time + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]fmt: log content + */ +#define dlog_debug(moduleId, fmt, ...) \ + do { \ + if(CheckLogLevel(moduleId, DLOG_DEBUG) == 1) { \ + DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } \ + } while (0) + +/** + * @ingroup slog + * @brief dlog_event: print event log + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]fmt: log content + */ +#define dlog_event(moduleId, fmt, ...) \ + do { \ + DlogEventInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } while (0) + +/** + * @ingroup slog + * @brief Dlog: print log, need caller to specify level + * call CheckLogLevel in advance to optimize performance, call interface with fmt input take time + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event) + * @param [in]fmt: log content + */ +#define Dlog(moduleId, level, fmt, ...) \ + do { \ + if(CheckLogLevel(moduleId, level) == 1) { \ + DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } \ + } while (0) + +/** + * @ingroup slog + * @brief DlogSub: print log, need caller to specify level and submodule + * call CheckLogLevel in advance to optimize performance, call interface with fmt input take time + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]submodule: eg: engine + * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event) + * @param [in]fmt: log content + */ +#define DlogSub(moduleId, submodule, level, fmt, ...) \ + do { \ + if(CheckLogLevel(moduleId, level) == 1) { \ + DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \ + } \ + } while (0) + +/** + * @ingroup slog + * @brief DlogWithKV: print log, need caller to specify level and other paramters + * call CheckLogLevel in advance to optimize performance, call interface with fmt input take time + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event) + * @param [in]pstKVArray: key-value array + * @param [in]kvNum: key-value element num in array + * @param [in]fmt: log content + */ +#define DlogWithKV(moduleId, level, pstKVArray, kvNum, fmt, ...) \ + do { \ + if(CheckLogLevel(moduleId, level) == 1) { \ + DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } \ + } while (0) + +/** + * @ingroup slog + * @brief DlogFlush: flush log buffer to file + */ +void DlogFlush(void); + +/** + * @ingroup slog + * @brief Internal log interface, other modules are not allowed to call this interface + */ +void DlogErrorInner(int moduleId, const char *fmt, ...); +void DlogWarnInner(int moduleId, const char *fmt, ...); +void DlogInfoInner(int moduleId, const char *fmt, ...); +void DlogDebugInner(int moduleId, const char *fmt, ...); +void DlogEventInner(int moduleId, const char *fmt, ...); +void DlogInner(int moduleId, int level, const char *fmt, ...); +void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...); + +#else +_declspec(dllexport) void dlog_init(void); +_declspec(dllexport) int dlog_getlevel(int moduleId, int *enableEvent); +#endif // OS_TYPE + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // D_SYSLOG_H_ diff --git a/inc/toolchain/stackcore/stackcore.h b/inc/toolchain/stackcore/stackcore.h new file mode 100644 index 000000000..a2f9dcef3 --- /dev/null +++ b/inc/toolchain/stackcore/stackcore.h @@ -0,0 +1,28 @@ +/** + * @file stackcore.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved.\n + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n + * + * 描述:stackcore 头文件。\n + */ + +/** @defgroup stackcore StackCore */ +#ifndef LIB_STACKCORE_H +#define LIB_STACKCORE_H + +/** + * @ingroup stackcore + * @brief init stackcore, which register signal hander for exception core + */ +#ifdef __cplusplus +extern "C"{ +#endif +int StackInit(); +#ifdef __cplusplus +} +#endif +#endif diff --git a/module.mk b/module.mk new file mode 100644 index 000000000..b71a8f072 --- /dev/null +++ b/module.mk @@ -0,0 +1,72 @@ +LOCAL_PATH := $(call my-dir) +include $(CLEAR_VARS) + +# static check function. Just for static check. +TF_ADPTER_OUTPUT_BASE := $(PWD)/out/$(product)/host/obj/tf_adapter +TF_ADPTER_GEN_PATH := $(TF_ADPTER_OUTPUT_BASE)/genfiles +TF_ADPTER_EXEC_PATH := $(TF_ADPTER_GEN_PATH)/execroot/tf_adapter +TF_ADPTER_REL_BUILD_PATH := $(TF_ADPTER_OUTPUT_BASE)/genfiles/execroot/tf_adapter +TF_ADPTER_WORKSPACE_BASE := third_party/tensorflow/huawei_patch/tf-1.15.0/tf_plugin +TF_ADPTER_REL_WORKSPACE_BASE := $(PWD)/$(TF_ADPTER_WORKSPACE_BASE) +TF_ADPTER_LOG_PATH := $(TF_ADPTER_OUTPUT_BASE)/tf_adapter_log +define do_libtf_adapter_coverity_check + @mkdir -p ${TF_ADPTER_LOG_PATH} + @mkdir -p $(PWD)/out/$(product)/host/obj/lib + @bash ${TF_ADPTER_REL_WORKSPACE_BASE}/tools/build_wheel.sh $(PWD)/$@ $(product) \ + 2>&1 | tee $(TF_ADPTER_LOG_PATH)/obuild_libtf_adapter.log + @python ${TF_ADPTER_REL_WORKSPACE_BASE}/tools/gen_sc_makefile_bazel.py \ + $(TF_ADPTER_LOG_PATH)/obuild_libtf_adapter.log \ + ${TF_ADPTER_REL_WORKSPACE_BASE}/tools/sc_list.txt \ + $(TF_ADPTER_EXEC_PATH)/Makefile_sc.mk \ + ${TF_ADPTER_REL_WORKSPACE_BASE}/ + #static check tool does not recognize -isystem and -iquote + @sed -i "s/iquote/I/g" $(TF_ADPTER_EXEC_PATH)/Makefile_sc.mk + @sed -i "s/isystem/I/g" $(TF_ADPTER_EXEC_PATH)/Makefile_sc.mk + #change relative path to full path + @sed -i "s#"\ bazel-out"#"\ $(TF_ADPTER_EXEC_PATH)/bazel-out"#g" $(TF_ADPTER_EXEC_PATH)/Makefile_sc.mk + @sed -i "s#"\=bazel-out"#"\=$(TF_ADPTER_EXEC_PATH)/bazel-out"#g" $(TF_ADPTER_EXEC_PATH)/Makefile_sc.mk + @sed -i "s#"\ external"#"\ $(TF_ADPTER_GEN_PATH)/external"#g" $(TF_ADPTER_EXEC_PATH)/Makefile_sc.mk + @sed -i "s#"-Iexternal"#"-I$(TF_ADPTER_GEN_PATH)/external"#g" $(TF_ADPTER_EXEC_PATH)/Makefile_sc.mk + @sed -i "s#"-I\ .\ "#"-I\ $(TF_ADPTER_REL_WORKSPACE_BASE)\ "#g" $(TF_ADPTER_EXEC_PATH)/Makefile_sc.mk + @sed -i "s#"\ tf_adapter"#"\ ${TF_ADPTER_REL_WORKSPACE_BASE}/tf_adapter"#g" $(TF_ADPTER_EXEC_PATH)/Makefile_sc.mk + @sed -i "s#"\ $(TF_ADPTER_WORKSPACE_BASE)"#"\ $(TF_ADPTER_REL_WORKSPACE_BASE)"#g" $(TF_ADPTER_EXEC_PATH)/Makefile_sc.mk + #change authority to gcc execute + @chmod 777 -R $(TF_ADPTER_REL_BUILD_PATH)/bazel-out + @make -f $(TF_ADPTER_EXEC_PATH)/Makefile_sc.mk +endef +# static check function end. + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libge_runner \ + libtsdclient \ + libdatatransfer \ + libfmk_parser \ + +LOCAL_SOFT_DP_LIBRARIES := libSoftDp + +npu_bridge_shared_libraries := \ +$(addprefix $(HOST_OUT_INTERMEDIATE_LIBRARIES)/, \ +$(addsuffix $(TARGET_SHLIB_SUFFIX), \ +$(LOCAL_SHARED_LIBRARIES) \ +$(LOCAL_SOFT_DP_LIBRARIES))) \ + +.PHONY: $(HOST_OUT_ROOT)/npu_bridge-1.15.0-py3-none-any.whl +$(HOST_OUT_ROOT)/npu_bridge-1.15.0-py3-none-any.whl:$(npu_bridge_shared_libraries) + @mkdir -p $(dir $@) +ifeq ($(COVERITY), true) + $(call do_libtf_adapter_coverity_check) +else ifeq ($(OBB_PRINT_CMD), true) + $(call do_libtf_adapter_coverity_check) +else + @swig -c++ -python -threads $(TF_ADPTER_REL_WORKSPACE_BASE)/tf_adapter/swig/ge_plugin.i + @mv $(TF_ADPTER_REL_WORKSPACE_BASE)/tf_adapter/swig/ge_plugin_wrap.cxx $(TF_ADPTER_REL_WORKSPACE_BASE)/tf_adapter/util/ + @mv $(TF_ADPTER_REL_WORKSPACE_BASE)/tf_adapter/swig/tf_adapter.py $(TF_ADPTER_REL_WORKSPACE_BASE)/tf_adapter/python/npu_bridge/ + @bash $(PWD)/third_party/tensorflow/huawei_patch/tf-1.15.0/tf_plugin/tools/build_wheel.sh $(PWD)/$@ $(product) +endif + +LOCAL_MODULE := hw_npu_bridge_1.15.0 +.PHONY: $(LOCAL_MODULE) +$(LOCAL_MODULE):$(HOST_OUT_ROOT)/npu_bridge-1.15.0-py3-none-any.whl + +include $(BUILD_HOST_SHARED_LIBRARY) diff --git a/tf_adapter/BUILD b/tf_adapter/BUILD new file mode 100644 index 000000000..cdb2d643f --- /dev/null +++ b/tf_adapter/BUILD @@ -0,0 +1,119 @@ +package(default_visibility = ["//visibility:public"]) + +cc_binary( + name = "_tf_adapter.so", + srcs = glob(["common/*.*", "kernels/*.*", "ops/*.*", "optimizers/*.*", "util/*.*"]), + data = [] + select({ + # Public introduction of external dependencies on project. + # External linked libraries, typically, located in out/${product}/host/obj/lib + ":cloud_build": ["@tf_adapter_cloud_host_libs//:tf_adapter_host_libs"], + ":mini_build": ["@tf_adapter_mini_host_libs//:tf_adapter_host_libs"], + ":onetrack_build": ["@tf_adapter_onetrack_host_libs//:tf_adapter_host_libs"], + "//conditions:default": [], + }), + copts = [ + # We must ensure that this is the first header file lookup directory, + # so that can ensure the protobuf version is consistent with the installed tensorflow + "-Iexternal/installed_tensorflow/include", + "-Iexternal/org_tensorflow", + "-Iexternal/tf_adapter_extend_hdrs", + "-Iexternal/tf_adapter_extend_hdrs/external", + "-Iexternal/local_nlohmann_json_lib/", + "-Iexternal/sec_lib", + "-Iexternal/tf_adapter_dvpp_hdrs", + "-Iexternal/python_include", + ] + select({ + #":dbg": ["-g",], + "//conditions:default": [], + }), + linkshared = 1, + linkopts = [] + select({ + # Public introduction of external dependencies on project. + # External linked libraries, typically, located in out/${product}/host/obj/lib + ":cloud_build": ["-Lexternal/tf_adapter_cloud_host_libs/ -lc_sec -lge_runner -ltsdclient -ldatatransfer -lfmk_parser"], + ":mini_build": ["-Lexternal/tf_adapter_mini_host_libs/ -lc_sec -lge_runner -ltsdclient -ldatatransfer -lfmk_parser",], + ":onetrack_build": ["-Lexternal/tf_adapter_onetrack_host_libs/ -lc_sec -lge_runner -ltsdclient -ldatatransfer -lfmk_parser",], + "//conditions:default": [], + }) + [ + # "-z defs", + # tensorflow prebuilt libs + # we must ensure that libtensorflow_framework.so is linked + # before any tf_adapter_host_libs to avoid + # the latter overridding the symbol of tensorfdlow, + "-Lexternal/installed_tensorflow_libs/ -l:libtensorflow_framework.so.1", + # We link _pywrap_tensorflow_internal.so, because we use the implementation of the + # capture function, which is pretty bad but doesn't have good processing. + "-Lexternal/installed_tensorflow_libs/python/ -l:_pywrap_tensorflow_internal.so", + # "-Wl,--version-script", # This line must be directly followed by the version_script.lds file + # "$(location //tf_adapter:exported_symbols.lds)", + ], + deps = [ + # Specifies the symbols to export + # "//tf_adapter:exported_symbols.lds", + # Installed tensorflow library and header files, + # We use the prefix foo_ because Eigen's headers doesn't end with.h or.hpp + "@installed_tensorflow//:foo_tensorflow_hdrs", + # Tensorflow internal headers. In general, the introduction of such headers means + # that you are using functions that Tensorflow does not want to expose + "//tf_adapter:tf_inner_headers_lib", + # + "@installed_tensorflow_libs//:installed_tensorflow_libs", + "@local_nlohmann_json_lib//:local_nlohmann_json_lib", + "@ge_proto//:ge_proto_headers_only", + "@tf_adapter_extend_hdrs//:tf_adapter_extend_hdrs", + "@sec_lib//:sec_lib", + "@tf_adapter_dvpp_hdrs//:tf_adapter_dvpp_hdrs", + "@python_include//:python_include", + ], +) + +exports_files( + [ + "exported_symbols.lds", + ], +) + +config_setting( + name = "cloud_build", + define_values ={ + "product": "cloud", + }, + visibility = ["//visibility:public"], +) + +config_setting( + name = "mini_build", + define_values ={ + "product": "mini", + }, + visibility = ["//visibility:public"], +) + +config_setting( + name = "onetrack_build", + define_values ={ + "product": "onetrack", + }, + visibility = ["//visibility:public"], +) + +config_setting( + name = "dbg", + define_values = { + "dbg": "true", + }, + visibility = ["//visibility:public"], +) + +cc_library( + name = "tf_inner_headers_lib", + hdrs = [ + "@org_tensorflow//tensorflow/core/kernels/data:captured_function.h", + "@org_tensorflow//tensorflow/core/kernels/data:dataset.h", + "@org_tensorflow//tensorflow/core/kernels/data:iterator_ops.h", + "@org_tensorflow//tensorflow/core/kernels/data:dataset_utils.h", + "@org_tensorflow//tensorflow/core/kernels/data:unbounded_thread_pool.h", + "@org_tensorflow//tensorflow/core/kernels:ops_util.h", + "@org_tensorflow//tensorflow/compiler/jit/graphcycles:graphcycles.h", + ], +) diff --git a/tf_adapter/common/common.h b/tf_adapter/common/common.h new file mode 100644 index 000000000..8a705a135 --- /dev/null +++ b/tf_adapter/common/common.h @@ -0,0 +1,40 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMMON_COMMON_H_ +#define TENSORFLOW_COMMON_COMMON_H_ + +#include "tensorflow/core/platform/env.h" + +#define CHECK_NOT_NULL(v) \ + if ((v) == nullptr) { \ + LOG(ERROR) << #v " is nullptr."; \ + return; \ + } + +#define REQUIRES_NOT_NULL(v) \ + if ((v) == nullptr) { \ + LOG(ERROR) << #v " is nullptr."; \ + return errors::InvalidArgument(#v " is nullptr."); \ + } + +#define REQUIRES_STATUS_OK(s) \ + if (!s.ok()) { \ + return s; \ + } + +#define ADAPTER_ENV_MAX_LENTH 1024*1024 +#endif // TENSORFLOW_COMMON_COMMON_H_ diff --git a/tf_adapter/exported_symbols.lds b/tf_adapter/exported_symbols.lds new file mode 100644 index 000000000..846ce15e7 --- /dev/null +++ b/tf_adapter/exported_symbols.lds @@ -0,0 +1,4 @@ +tensorflow { + local: + *; +}; \ No newline at end of file diff --git a/tf_adapter/interface_checker/check_interface.py b/tf_adapter/interface_checker/check_interface.py new file mode 100644 index 000000000..dc1b5232e --- /dev/null +++ b/tf_adapter/interface_checker/check_interface.py @@ -0,0 +1,534 @@ +""" + check interface +""" +import os +# 获取接口规范的目录 +specs_dir = os.path.realpath("../interface_spec") +# 获取tf-adapter的根目录 +tf_adapter_root = os.path.realpath("../") + +class FuncIntfSpec: + """ + 函数API的规格定义,主要包含函数的名称,与函数的入参 + 暂时校验不了出参 + """ + def __init__(self, func_name, param_list): + self.func_name = func_name + self.param_list = param_list + self.class_list = {} + self.func_list = {} + + def add_func_spec(self, func_info): + self.func_list[func_info.func_name] = func_info + + def add_class_spec(self, spec): + self.class_list[spec.class_name]=spec + + def print_detail(self): + print("func name: %s, param_list: [%s]" % (self.func_name, ",".join(self.param_list))) + +class GlobalVarSpec: + """ + 全局变量定义,例如elewise_compute接口中的NAME_INDEX + 类型主要包含:全局变量名,全局变量值 + """ + def __init__(self, name, values): + self.global_var_name = name + self.global_var_values = values + self.func_list = {} + self.param_list = {} + +class ClassIntfSpec: + """ + class 类型的接口规格定义,例如Tik的各种API都是封装在类中的 + 类型主要包含:类的名称,类的父类,类提供的api函数接口 + """ + def __init__(self, name, supper_classes): + self.class_name = name + self.supper_classes = supper_classes + self.func_list = {} + self.param_list = {} + self.class_list = {} + + def add_func_spec(self, func_info: FuncIntfSpec): + self.func_list[func_info.func_name] = func_info + + def add_class_spec(self, spec): + self.class_list[spec.class_name]=spec + + def print_detail(self): + print("-------------------------------------------") + print("class name: %s, supper classes: %s" % (self.class_name, ",".join(self.supper_classes))) + print("func list:") + for (idx, func_spec) in self.func_list.items(): + func_spec.print_detail() + print("params list:") + for param_spec in self.param_list: + param_spec.print_detail() + print("-------------------------------------------") + + +class FileSpec: + """ + 文件的规格定义,对应到一个源码文件 + """ + def __init__(self, spec_file_name, source_file_name=""): + self.spec_file_name = spec_file_name + self.source_file_name = source_file_name + self.class_specs = {} + self.func_specs = {} + self.global_var_spec = {} + + def add_class_spec(self, spec: ClassIntfSpec): + self.class_specs[spec.class_name]=spec + + def add_func_spec(self, spec: FuncIntfSpec): + self.func_specs[spec.func_name]=spec + + def add_global_var_spec(self, spec: GlobalVarSpec): + self.global_var_spec[spec.global_var_name] = spec + + def print_detail(self): + print("===========================================") + print("file name: %s" % self.spec_file_name) + print("class specs:") + for _, class_spec in self.class_specs.items(): + class_spec.print_detail() + print("func specs:") + for _, func_spec in self.func_specs.items(): + func_spec.print_detail() + print("===========================================") + +last_tree_idx = 1 +last_tab_str = " " + +def get_tree_idx(str_info: str): + """ + python 代码的层级靠缩进来保证,这里获取缩进了几层。 + python代码的缩进其实没有规定多大,只要同一层次的缩进是一样的就可以。 + 这里假定我们的代码格式是规范的,按照4/2个空格来做 + """ + global last_tab_str + if str_info.startswith(last_tab_str): + return 1 + get_tree_idx(str_info[len(last_tab_str):]) + else: + return 1 + +def get_tree_idx_2(str_info: str): + """ + 按照2个空格来计算 + """ + TAB_STR = " " + if str_info.startswith(TAB_STR): + return 1 + get_tree_idx_2(str_info[2:]) + else: + return 1 + +def get_tree_idx_final(str_info: str): + global last_tree_idx + global last_tab_str + if last_tree_idx == 1: + if get_tree_idx_2(str_info) > 2: + last_tab_str = " " + else: + last_tab_str = " " + last_tree_idx = get_tree_idx(str_info) + return last_tree_idx + + +def build_file_spec(spec_lines, file_spec: FileSpec): + """ + 根据文件中的代码解析出规格定义。 + """ + spec_tree = [file_spec, ] + for spec_line in spec_lines: + spec_line = spec_line.rstrip() + if spec_line.startswith("#"): + continue + elif "class " in spec_line and "class _" not in spec_line: + # 表示定义了一个class + tree_idx = get_tree_idx_final(spec_line) + if "(" in spec_line: + class_name = spec_line[6:spec_line.index("(")] + super_classes = spec_line[spec_line.index("(")+1: spec_line.index(")")].split(",") + else: + class_name = spec_line[6:-1] + super_classes = [] + class_spec = ClassIntfSpec(class_name, super_classes) + if len(spec_tree) < (tree_idx + 1): + spec_tree.append(class_spec) + else: + spec_tree[tree_idx] = class_spec + spec_tree[tree_idx-1].add_class_spec(class_spec) + elif "def " in spec_line: + # 表示定义了一个函数api + tree_idx = get_tree_idx_final(spec_line) + spec_line = spec_line.strip() + func_name = spec_line[4:spec_line.index("(")] + param_list = [x.strip() for x in spec_line[spec_line.index("(")+1:spec_line.index(")")].split(",")] + func_spec = FuncIntfSpec(func_name, param_list) + if len(spec_tree) < (tree_idx + 1): + spec_tree.append(func_spec) + else: + spec_tree[tree_idx] = func_spec + spec_tree[tree_idx-1].add_func_spec(func_spec) + elif is_global_variable(spec_line): + tree_idx = get_tree_idx_final(spec_line) + name = spec_line[:spec_line.index("=")].rstrip() + values = spec_line[(len(name) + 3):] + global_var_spec = GlobalVarSpec(name, values) + if len(spec_tree) < (tree_idx + 1): + spec_tree.append(global_var_spec) + else: + spec_tree[tree_idx] = global_var_spec + spec_tree[tree_idx - 1].add_global_var_spec(global_var_spec) + + +def is_global_variable(spec_line): + if spec_line.startswith(" ") or spec_line.startswith(" "): + return False + if "=" not in spec_line: + return False + name = spec_line[:spec_line.index("=")].rstrip() + name_split = name.split("_") + for i in name_split: + if not i.isupper(): + return False + return True + + +def get_spec_info_list(): + # 从接口规范文件夹中,扫描出所有的接口规范定义 + def _get_class_spec_info(spec_line, spec_tree): + tree_idx = get_tree_idx_final(spec_line) + if "(" in spec_line: + class_name = spec_line[6:spec_line.index("(")] + super_classes = spec_line[spec_line.index("(") + 1: spec_line.index(")")].split(",") + else: + class_name = spec_line[6:-1] + super_classes = [] + class_spec = ClassIntfSpec(class_name, super_classes) + if len(spec_tree) < (tree_idx + 1): + spec_tree.append(class_spec) + else: + spec_tree[tree_idx] = class_spec + spec_tree[tree_idx - 1].add_class_spec(class_spec) + + def _get_def_spec_info(spec_line, spec_tree): + tree_idx = get_tree_idx_final(spec_line) + spec_line = spec_line.strip() + func_name = spec_line[4:spec_line.index("(")] + param_list = [x.strip() for x in spec_line[spec_line.index("(") + 1:spec_line.index(")")].split(",")] + func_spec = FuncIntfSpec(func_name, param_list) + if len(spec_tree) < (tree_idx + 1): + spec_tree.append(func_spec) + else: + spec_tree[tree_idx] = func_spec + spec_tree[tree_idx - 1].add_func_spec(func_spec) + + def _get_global_var_spec_info(spec_line, spec_tree): + tree_idx = get_tree_idx_final(spec_line) + name = spec_line[:spec_line.index("=")].rstrip() + values = spec_line[(len(name) + 3):] + global_var_spec = GlobalVarSpec(name, values) + if len(spec_tree) < (tree_idx + 1): + spec_tree.append(global_var_spec) + else: + spec_tree[tree_idx] = global_var_spec + spec_tree[tree_idx - 1].add_global_var_spec(global_var_spec) + + specs = os.listdir(specs_dir) + file_spec_list = [] + for spec_file in specs: + spec_tree = [] + source_file_path = os.path.realpath(os.path.join(specs_dir, spec_file)) + spec_lines = get_spec_from_file(source_file_path, True) + + for spec_line in spec_lines: + spec_line = spec_line.rstrip() + if "# source file:" in spec_line: + spec_source_file = spec_line[14:].strip() + file_spec = FileSpec(spec_file, spec_source_file) + file_spec_list.append(file_spec) + if len(spec_tree) < 1: + spec_tree.append(file_spec) + else: + spec_tree[0] = file_spec + elif spec_line.startswith("#"): + continue + + elif "class " in spec_line: + _get_class_spec_info(spec_line, spec_tree) + + elif "def " in spec_line: + _get_def_spec_info(spec_line, spec_tree) + + elif is_global_variable(spec_line): + _get_global_var_spec_info(spec_line, spec_tree) + + return file_spec_list + + +def get_spec_from_file(file_path, is_defined_file=False): + with open(file_path) as ff: + lines = ff.readlines() + + def _get_new_lines(line, line_end, new_lines): + if "def " in line or "class " in line: + if not line.rstrip().endswith(":"): + line_end = False + new_lines.append(line.rstrip()) + else: + new_lines.append(line.rstrip()) + elif is_global_variable(line): + new_lines.append(line.rstrip()) + return line_end + + new_lines = [] + block_comment = False + line_end = True + for line in lines: + tmp_line = line.strip() + if block_comment: + # 前面是块状注释, 直到获取到块状注释结束符为止 + if "\"\"\"" in tmp_line: + block_comment = False + else: + if is_defined_file: + if "# source file:" in line: + new_lines.append(line) + continue + if tmp_line.startswith("#"): + continue + if "\"\"\"" in tmp_line and not tmp_line.endswith("\"\"\""): + # 包含连续的3个引号,是块状注释, 这里也比较粗暴,这里块状注释一般都是独立行的 + block_comment = True + continue + if "#" in line: + # 去除行尾注释 + line = line[:line.index("#")] + if line_end: + # 因为函数以及class申明的地方可能换行了,如果前面一个函数的定义还未结束,则继续拼接上去,去除换行 + line_end = _get_new_lines(line, line_end, new_lines) + else: + new_lines[-1]+=line.strip() + # 函数定义和class定义都是以冒号结尾的,检测冒号 + if line.rstrip().endswith(":"): + line_end = True + + return new_lines + + +def remove_sub_func_under_func(lines): + """ + 因为get_spec_from_file提取的是文件中的接口和class定义。 + 由于函数内部还可以定义函数,这里也会被提取出来,因此去除函数内部的函数。 + """ + tree_info = [""] + new_lines = [] + for line in lines: + idx = get_tree_idx_final(line) + if line.strip().startswith("def "): + type_name = "func" + elif line.strip().startswith("class "): + type_name = "class" + elif is_global_variable(line): + name = line[:line.index("=")].rstrip() + type_name = name + if type_name == "func" and tree_info[min(len(tree_info)-1, idx-1)] == "func": + continue + if len(tree_info) <= idx: + tree_info.append(type_name) + else: + tree_info[idx] = type_name + new_lines.append(line) + return new_lines + + +def check_source_file_match(defined_spec: FileSpec): + """ + 校验源码是否符合规范定义 + """ + source_file_path = os.path.realpath(os.path.join(tf_adapter_root, defined_spec.source_file_name)) + lines = get_spec_from_file(source_file_path) + new_lines = remove_sub_func_under_func(lines) + + spec_in_source = FileSpec("", source_file_path) + build_file_spec(new_lines, spec_in_source) + + return compare_file_spec(defined_spec, spec_in_source) + + +def compare_func_spec(spec1: FuncIntfSpec, spec2: FuncIntfSpec, file_name1="", file_name2=""): + params_1 = sorted([x.strip() for x in spec1.param_list]) + params_2 = sorted([x.strip() for x in spec2.param_list]) + if params_1 != params_2: + print("[EEEE] compare \"%s\" func failed" % spec1.func_name) + print("[EEEE] file path: \"%s\"" % file_name1) + print("[EEEE] param list: \"%s\"" % ",".join(params_1)) + print("[EEEE] file path: \"%s\"" % file_name2) + print("[EEEE] param list: \"%s\"" % ",".join(params_2)) + return False + else: + print("[====] compare \"%s\" func success" % spec1.func_name) + return True + + +def build_diff_list_result(func_names_1, func_names_2): + func_names_1 = func_names_1[:] + func_names_2 = func_names_2[:] + diff_print_name1 = [] + diff_print_name2 = [] + len_1 = len(func_names_1) + len_2 = len(func_names_2) + i = 0 + j = 0 + while inum_inputs(); + } + bool IsExpensive() override { return false; } +}; + +REGISTER_KERNEL_BUILDER(Name("AdamApplyOneAssign").Device(DEVICE_CPU), AdamApplyOneAssignOp); +} // namespace tensorflow + diff --git a/tf_adapter/kernels/adam_apply_one_with_decay_assign.cc b/tf_adapter/kernels/adam_apply_one_with_decay_assign.cc new file mode 100644 index 000000000..908ddeec2 --- /dev/null +++ b/tf_adapter/kernels/adam_apply_one_with_decay_assign.cc @@ -0,0 +1,33 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op_kernel.h" + +namespace tensorflow { +class AdamApplyOneWithDecayAssignOp : public OpKernel { + public: + explicit AdamApplyOneWithDecayAssignOp(OpKernelConstruction* context) : OpKernel(context) {} + ~AdamApplyOneWithDecayAssignOp() {} + void Compute(OpKernelContext* context) override + { + LOG(INFO) << "AdamApplyOneWithDecayAssignOp Compute, num_inputs: " << context->num_inputs(); + } + bool IsExpensive() override { return false; } +}; + +REGISTER_KERNEL_BUILDER(Name("AdamApplyOneWithDecayAssign").Device(DEVICE_CPU), AdamApplyOneWithDecayAssignOp); +} // namespace tensorflow + diff --git a/tf_adapter/kernels/basic_lstm_cell.cc b/tf_adapter/kernels/basic_lstm_cell.cc new file mode 100644 index 000000000..e0d39732d --- /dev/null +++ b/tf_adapter/kernels/basic_lstm_cell.cc @@ -0,0 +1,33 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op_kernel.h" + +namespace tensorflow { +class BasicLSTMCellOp : public OpKernel { + public: + explicit BasicLSTMCellOp(OpKernelConstruction* context) : OpKernel(context) {} + ~BasicLSTMCellOp() {} + void Compute(OpKernelContext* context) override + { + LOG(INFO) << "BasicLSTMCellOp Compute, num_inputs: " << context->num_inputs(); + } + bool IsExpensive() override { return false; } +}; + +REGISTER_KERNEL_BUILDER(Name("BasicLSTMCell").Device(DEVICE_CPU), BasicLSTMCellOp); +} // namespace tensorflow + diff --git a/tf_adapter/kernels/basic_lstm_cell_grad.cc b/tf_adapter/kernels/basic_lstm_cell_grad.cc new file mode 100644 index 000000000..981a0d26a --- /dev/null +++ b/tf_adapter/kernels/basic_lstm_cell_grad.cc @@ -0,0 +1,57 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op_kernel.h" + +namespace tensorflow { +class BasicLSTMCellCStateGradOp : public OpKernel { +public: + explicit BasicLSTMCellCStateGradOp(OpKernelConstruction* context) : OpKernel(context) {} + ~BasicLSTMCellCStateGradOp() {} + void Compute(OpKernelContext* context) override + { + LOG(INFO) << "BasicLSTMCellCStateGradOp Compute "; + } + bool IsExpensive() override { return false; } +}; + +class BasicLSTMCellWeightGradOp : public OpKernel { +public: + explicit BasicLSTMCellWeightGradOp(OpKernelConstruction* context) : OpKernel(context) {} + ~BasicLSTMCellWeightGradOp() {} + void Compute(OpKernelContext* context) override + { + LOG(INFO) << "BasicLSTMCellWeightGradOp Compute "; + } + bool IsExpensive() override { return false; } +}; + +class BasicLSTMCellInputGradOp : public OpKernel { +public: + explicit BasicLSTMCellInputGradOp(OpKernelConstruction* context) : OpKernel(context) {} + ~BasicLSTMCellInputGradOp() {} + void Compute(OpKernelContext* context) override + { + LOG(INFO) << "BasicLSTMCellInputGradOp Compute "; + } + bool IsExpensive() override { return false; } +}; + +REGISTER_KERNEL_BUILDER(Name("BasicLSTMCellCStateGrad").Device(DEVICE_CPU), BasicLSTMCellCStateGradOp); +REGISTER_KERNEL_BUILDER(Name("BasicLSTMCellWeightGrad").Device(DEVICE_CPU), BasicLSTMCellWeightGradOp); +REGISTER_KERNEL_BUILDER(Name("BasicLSTMCellInputGrad").Device(DEVICE_CPU), BasicLSTMCellInputGradOp); +} // namespace tensorflow + diff --git a/tf_adapter/kernels/decode_image_ops.cc b/tf_adapter/kernels/decode_image_ops.cc new file mode 100644 index 000000000..e3f6b4acd --- /dev/null +++ b/tf_adapter/kernels/decode_image_ops.cc @@ -0,0 +1,136 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include "tensorflow/core/framework/op_kernel.h" +#include "ExternalSoftDp.h" +#include "tf_adapter/util/plugin_load_manager.h" + +namespace tensorflow { +using fDecodeAndResizeJpeg = uint32_t (*)(SoftDpProcsessInfo &); +using fDecodeAndCropAndResizeJpeg = uint32_t (*)(SoftDpProcsessInfo &, DpCropInfo &); +class DecodeImageOp : public OpKernel { + public: + explicit DecodeImageOp(OpKernelConstruction* context) : OpKernel(context) { + if (type_string() == "DecodeAndResizeJpeg") { + crop_ = false; + } else if (type_string() == "DecodeAndCropAndResizeJpeg") { + crop_ = true; + } else { + OP_REQUIRES_OK(context, + errors::InvalidArgument("Bad op type ", type_string())); + } + string lib_path = PluginLoadManager::GetTFPluginRealPath(); + if (lib_path.empty()) { + OP_REQUIRES_OK(context, errors::InvalidArgument("Find tf-plugin path faild!")); + } + lib_path += "libSoftDp.so"; + handle_ = PluginLoadManager::DlOpen(lib_path); + void *decode_and_resize_jpeg = PluginLoadManager::DlSym(handle_, "DecodeAndResizeJpeg"); + void *decode_and_crop_and_resize_jpeg = PluginLoadManager::DlSym(handle_, "DecodeAndCropAndResizeJpeg"); + if (decode_and_resize_jpeg == nullptr || decode_and_crop_and_resize_jpeg == nullptr) { + OP_REQUIRES_OK(context, errors::InvalidArgument("Can not find function from ", lib_path)); + } + decode_resize_ = (fDecodeAndResizeJpeg)decode_and_resize_jpeg; + decode_crop_resize_ = (fDecodeAndCropAndResizeJpeg)decode_and_crop_and_resize_jpeg; + } + ~DecodeImageOp() { + if (handle_ != nullptr) { + dlclose(handle_); + LOG(INFO) << "dlclose handle finish."; + } else { + LOG(INFO) << "handle is null."; + } + } + + void Compute(OpKernelContext* context) override { + const Tensor& contents = context->input(0); + OP_REQUIRES(context, TensorShapeUtils::IsScalar(contents.shape()), + errors::InvalidArgument("contents must be scalar, got shape ", + contents.shape().DebugString())); + + const StringPiece input = contents.scalar()(); + + Tensor* output = nullptr; + Tensor resize; + // if crop_ is true, inputs tensor vector: [image, crop_size, size] + // if crop_ is false, inputs tensor vector: [image, size] + if (crop_) { + resize = context->input(2); + } else { + resize = context->input(1); + } + auto resize_vec = resize.vec(); + int height = resize_vec(0); + int width = resize_vec(1); + Status status(context->allocate_output( + 0, TensorShape({height, width, 3}), + &output)); + if (!status.ok()) { + LOG(ERROR) << "DecodeImageOp, Decode image failed when allocate output"; + context->SetStatus(status); + return; + } + + SoftDpProcsessInfo dvppInfo = {0}; + dvppInfo.inputBuffer = reinterpret_cast(const_cast(input.data())); + dvppInfo.inputBufferSize = input.size(); + dvppInfo.outputBuffer = reinterpret_cast(output->flat().data()); + dvppInfo.outputBufferSize = height * width * 3; // 3 means RGB + dvppInfo.outputHeight = height; + dvppInfo.outputWidth = width; + + int ret = 0; + if (crop_) { + // Update flags to include crop window. + const Tensor& crop_window = context->input(1); + OP_REQUIRES(context, crop_window.dims() == 1, + errors::InvalidArgument("crop_window must be 1-D, got shape ", + crop_window.shape().DebugString())); + OP_REQUIRES(context, crop_window.dim_size(0) == 4, + errors::InvalidArgument("crop_size must have four elements ", + crop_window.shape().DebugString())); + auto crop_window_vec = crop_window.vec(); + + // crop_window_vec : [y, x, crop_h, crop_w] + DpCropInfo cropInfo = {0}; + cropInfo.left = crop_window_vec(1); + cropInfo.up = crop_window_vec(0); + cropInfo.right = crop_window_vec(1) + crop_window_vec(3); + cropInfo.down = crop_window_vec(0) + crop_window_vec(2); + + ret = (*decode_crop_resize_)(dvppInfo, cropInfo); + } else { + ret = (*decode_resize_)(dvppInfo); + } + + OP_REQUIRES(context, ret == 0, + errors::Internal("Decode jpeg failed, height is: ", + height, "width is: ", width)); + return; + } + bool IsExpensive() override { return false; } + private: + bool crop_; + void* handle_; + fDecodeAndResizeJpeg decode_resize_; + fDecodeAndCropAndResizeJpeg decode_crop_resize_; +}; + +REGISTER_KERNEL_BUILDER(Name("DecodeAndResizeJpeg").Device(DEVICE_CPU), DecodeImageOp); +REGISTER_KERNEL_BUILDER(Name("DecodeAndCropAndResizeJpeg").Device(DEVICE_CPU), + DecodeImageOp); +} // namespace tensorflow diff --git a/tf_adapter/kernels/device_queue_dataset_op.cc b/tf_adapter/kernels/device_queue_dataset_op.cc new file mode 100644 index 000000000..8568db83e --- /dev/null +++ b/tf_adapter/kernels/device_queue_dataset_op.cc @@ -0,0 +1,147 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tf_adapter/common/common.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/core/lib/core/blocking_counter.h" +#include "tensorflow/core/lib/io/buffered_inputstream.h" +#include "tensorflow/core/lib/io/inputbuffer.h" +#include "tensorflow/core/lib/io/random_inputstream.h" +#include "tensorflow/core/lib/io/record_reader.h" +#include "tensorflow/core/lib/io/zlib_compression_options.h" +#include "tensorflow/core/lib/io/zlib_inputstream.h" +#include "tensorflow/core/lib/random/random.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/tracing.h" + +// We implemented the Dataset of the data enhancement module by using Kernel +// extension, +// which would reuse most of the code of tensorflow. +// In this file, we indicated which tensorflow native code was by add "// data +// preprocess added" annotation, +// and maintained its original writing style. +using namespace std; + +namespace tensorflow { +namespace data { +namespace { +class DeviceQueueDatasetOp : public DatasetOpKernel { + public: + using DatasetOpKernel::DatasetOpKernel; + + explicit DeviceQueueDatasetOp(OpKernelConstruction *ctx) : DatasetOpKernel(ctx) { + CHECK_NOT_NULL(ctx); + OP_REQUIRES_OK(ctx, ctx->GetAttr("channel_name", &defStr_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &outputTypes_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &outputShapes_)); + } + ~DeviceQueueDatasetOp() {} + void MakeDataset(OpKernelContext *ctx, DatasetBase **output) override { + CHECK_NOT_NULL(ctx); + CHECK_NOT_NULL(output); + *output = new (nothrow) Dataset(ctx, defStr_, outputTypes_, outputShapes_); + OP_REQUIRES( + ctx, *output != nullptr, + errors::InvalidArgument("DeviceQueueDatasetOp: new dataset failed")); + } + + private: + class Dataset : public DatasetBase { + public: + explicit Dataset(OpKernelContext *ctx, string &sourcedata, + const DataTypeVector &outputTypes, + const std::vector &outputShapes) + : DatasetBase(DatasetContext(ctx)), + sourcedata_(sourcedata), + outputTypes_(outputTypes), + outputShapes_(outputShapes) {} + + ~Dataset() override {} + + std::unique_ptr MakeIteratorInternal( + const string &prefix) const override { + return std::unique_ptr( + new Iterator({this, strings::StrCat(prefix, "::DeviceQueue")})); + } + + const DataTypeVector &output_dtypes() const override { + return outputTypes_; + } + + const std::vector &output_shapes() const override { + return outputShapes_; + } + + string DebugString() const override { + return "DeviceQueueDatasetOp::Dataset"; + } + + protected: + Status AsGraphDefInternal(SerializationContext *ctx, + DatasetGraphDefBuilder *b, + Node **output) const override { + REQUIRES_NOT_NULL(ctx); + REQUIRES_NOT_NULL(b); + REQUIRES_NOT_NULL(output); + Node *sourcedata = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(sourcedata_, &sourcedata)); + TF_RETURN_IF_ERROR(b->AddDataset(this, {sourcedata}, output)); + return Status::OK(); + } + + private: + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params ¶ms) + : DatasetIterator(params) {} + + ~Iterator() override {} + + Status GetNextInternal(IteratorContext *ctx, + std::vector *out_tensors, + bool *end_of_sequence) override { + *end_of_sequence = false; + return Status::OK(); + }; + + private: + std::mutex mu_; + std::unique_ptr inputImpl_; + }; + std::string sourcedata_; + const DataTypeVector outputTypes_; + const std::vector outputShapes_; + }; + string defStr_; + DataTypeVector outputTypes_; + std::vector outputShapes_; +}; + +REGISTER_KERNEL_BUILDER(Name("DeviceQueueDataset") +. +Device(DEVICE_CPU), + DeviceQueueDatasetOp +); +} // namespace +} +} // namespace tensorflow diff --git a/tf_adapter/kernels/dp_iterator_ops.cc b/tf_adapter/kernels/dp_iterator_ops.cc new file mode 100644 index 000000000..6e879d68b --- /dev/null +++ b/tf_adapter/kernels/dp_iterator_ops.cc @@ -0,0 +1,73 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tf_adapter/kernels/dp_iterator_ops.h" +#include "tf_adapter/common/common.h" +#include "tensorflow/core/kernels/data/iterator_ops.h" + +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/data/captured_function.h" +#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/core/lib/random/random.h" + +namespace tensorflow { +namespace data { +namespace { +// See documentation in ../../ops/dataset_ops.cc for a high-level +// description of the following ops. +const char kAnonymousIterator[] = "AnonymousIterator"; +const char kAnonymousIteratorV2[] = "AnonymousIteratorV2"; +const char kIteratorVariantTypeName[] = "tensorflow::Iterator"; +const char kOutputShapes[] = "output_shapes"; +const char kOutputTypes[] = "output_types"; +} // namespace + +void DpMakeIteratorOp::Compute(OpKernelContext *ctx) { + LOG(INFO) << "===Begin Computer MakeIterator==="; + CHECK_NOT_NULL(ctx); + DatasetBase *dataset = nullptr; + OP_REQUIRES_OK(ctx, GetDatasetFromVariantTensor(ctx->input(0), &dataset)); + IteratorResource *iterator_resource = nullptr; + OP_REQUIRES_OK( + ctx, LookupResource(ctx, HandleFromInput(ctx, 1), &iterator_resource)); + Status s = iterator_resource->SetIteratorFromDataset(ctx, dataset); + iterator_resource->Unref(); + if (!s.ok()) { + ctx->SetStatus(s); + } + LOG(INFO) << "===End Computer MakeIterator==="; +} + +namespace { + +REGISTER_KERNEL_BUILDER(Name("MakeIterator") +. +Device(DEVICE_CPU) +.Priority(2).Label("dp"), +DpMakeIteratorOp); +REGISTER_KERNEL_BUILDER( + Name("MakeIterator") +. +Device(DEVICE_GPU) +.Priority(1).HostMemory("dataset").Label("dp"), +DpMakeIteratorOp); + +} // namespace + +} // namespace data +} // namespace tensorflow diff --git a/tf_adapter/kernels/dp_iterator_ops.h b/tf_adapter/kernels/dp_iterator_ops.h new file mode 100644 index 000000000..a77067b27 --- /dev/null +++ b/tf_adapter/kernels/dp_iterator_ops.h @@ -0,0 +1,37 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_KERNELS_DATA_DP_ITERATOR_OPS_H_ +#define TENSORFLOW_CORE_KERNELS_DATA_DP_ITERATOR_OPS_H_ + +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/data/captured_function.h" +#include "tensorflow/core/framework/dataset.h" + +namespace tensorflow { +namespace data { +class DpMakeIteratorOp : public OpKernel { + public: + explicit DpMakeIteratorOp(OpKernelConstruction *ctx) : OpKernel(ctx) {} + ~DpMakeIteratorOp() {} + void Compute(OpKernelContext *ctx) override; +}; + +} // namespace data +} // namespace tensorflow +#endif // TENSORFLOW_CORE_KERNELS_DATA_ITERATOR_OPS_H_ diff --git a/tf_adapter/kernels/dpgroup_dataset_op.cc b/tf_adapter/kernels/dpgroup_dataset_op.cc new file mode 100644 index 000000000..3240919a6 --- /dev/null +++ b/tf_adapter/kernels/dpgroup_dataset_op.cc @@ -0,0 +1,151 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tf_adapter/common/common.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/data/captured_function.h" +#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/core/lib/random/random.h" + +namespace tensorflow { +namespace data { +namespace { +class DPGroupDatasetOp : public DatasetOpKernel { + public: + explicit DPGroupDatasetOp(OpKernelConstruction* ctx) : DatasetOpKernel(ctx) { + CHECK_NOT_NULL(ctx); + OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_)); + } + ~DPGroupDatasetOp() {} + void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override { + CHECK_NOT_NULL(ctx); + CHECK_NOT_NULL(output); + std::vector inputs; + for (int i = 0; i < ctx->num_inputs(); ++i) { + DatasetBase* input = nullptr; + OP_REQUIRES_OK(ctx, GetDatasetFromVariantTensor(ctx->input(i), &input)); + inputs.push_back(input); + } + *output = new (std::nothrow) Dataset(ctx, inputs, output_types_, output_shapes_); + OP_REQUIRES(ctx, *output != nullptr, + errors::Internal("Failed new dataset of DPGroupDatasetOp")); + } + + private: + class Dataset : public DatasetBase { + public: + explicit Dataset(OpKernelContext* ctx, + const std::vector& inputs, + const DataTypeVector& output_types, + const std::vector& output_shapes) + : DatasetBase(DatasetContext(ctx)), + inputs_(inputs) { + for (const auto& input : inputs_) { + input->Ref(); + } + output_types_.insert(output_types_.end(), output_types.begin(), + output_types.end()); + output_shapes_.insert(output_shapes_.end(), output_shapes.begin(), + output_shapes.end()); + } + + ~Dataset() override { + for (const auto& input : inputs_) { + input->Unref(); + } + } + + std::unique_ptr MakeIteratorInternal( + const string& prefix) const override { + return std::unique_ptr( + new (std::nothrow) Iterator({this, strings::StrCat(prefix, "::GEOP")})); + } + + const DataTypeVector& output_dtypes() const override { + return output_types_; + } + + const std::vector& output_shapes() const override { + return output_shapes_; + } + + string DebugString() const override { return "DPGroupDatasetOp::Dataset"; } + + protected: + Status AsGraphDefInternal(SerializationContext* ctx, + DatasetGraphDefBuilder* b, + Node** output) const override { + return Status::OK(); + } + + private: + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params) {} + ~Iterator() {} + Status Initialize(IteratorContext* ctx) override { + REQUIRES_NOT_NULL(ctx); + LOG(INFO) << "Start to initialize iterator of DPGroupDatasetOp"; + mutex_lock l(mu_); + try { + input_impls_.resize(dataset()->inputs_.size()); + } catch (...) { + return errors::InvalidArgument("input impls resize failed."); + } + for (size_t i = 0; i < input_impls_.size(); ++i) { + TF_RETURN_IF_ERROR(dataset()->inputs_[i]->MakeIterator( + ctx, strings::StrCat(prefix(), "[", i, "]"), &input_impls_[i])); + } + return Status::OK(); + } + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + *end_of_sequence = true; + return Status::OK(); + } + + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + return Status::OK(); + } + + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override { + return Status::OK(); + } + + private: + mutex mu_; + std::vector> input_impls_ GUARDED_BY(mu_); + }; + const std::vector inputs_; + DataTypeVector output_types_; + std::vector output_shapes_; + }; + DataTypeVector output_types_; + std::vector output_shapes_; +}; + +REGISTER_KERNEL_BUILDER(Name("DPGroupDataset").Device(DEVICE_CPU), DPGroupDatasetOp); +} // namespace +} // namespace data +} // namespace tensorflow diff --git a/tf_adapter/kernels/dropout_ops.cc b/tf_adapter/kernels/dropout_ops.cc new file mode 100644 index 000000000..339201aca --- /dev/null +++ b/tf_adapter/kernels/dropout_ops.cc @@ -0,0 +1,50 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/macros.h" + +namespace tensorflow { +class DropOutDoMaskOp : public OpKernel { + public: + explicit DropOutDoMaskOp(OpKernelConstruction *context) : OpKernel(context) {} + ~DropOutDoMaskOp() {} + void Compute(OpKernelContext *context) override { + LOG(INFO) << "DropOutDoMaskOp Compute "; + } + bool IsExpensive() override { return false; } +}; + +class DropOutGenMaskOp : public OpKernel { + public: + explicit DropOutGenMaskOp(OpKernelConstruction *context) : OpKernel(context) {} + ~DropOutGenMaskOp() {} + void Compute(OpKernelContext *context) override { + LOG(INFO) << "DropOutGenMaskOp Compute "; + } + bool IsExpensive() override { return false; } +}; + +REGISTER_KERNEL_BUILDER(Name("DropOutGenMask") +. +Device(DEVICE_CPU), DropOutGenMaskOp +); +REGISTER_KERNEL_BUILDER(Name("DropOutDoMask") +. +Device(DEVICE_CPU), DropOutDoMaskOp +); +} \ No newline at end of file diff --git a/tf_adapter/kernels/geop_dataset_op.cc b/tf_adapter/kernels/geop_dataset_op.cc new file mode 100644 index 000000000..1345f56f1 --- /dev/null +++ b/tf_adapter/kernels/geop_dataset_op.cc @@ -0,0 +1,208 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tf_adapter/common/common.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/data/captured_function.h" +#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/core/lib/random/random.h" + +namespace tensorflow { +namespace data { +namespace { +class GEOPDatasetOp : public DatasetOpKernel { + public: + explicit GEOPDatasetOp(OpKernelConstruction* ctx) + : DatasetOpKernel(ctx), + f_handle_(kInvalidHandle) { + FunctionMetadata::Params params; + OP_REQUIRES_OK(ctx, + FunctionMetadata::Create(ctx, "f", params, &func_metadata_)); + } + ~GEOPDatasetOp() { + if (f_handle_ != kInvalidHandle && lib_ != nullptr) { + LOG(INFO) << "Release function handle:" << f_handle_ << " owned by node instance:" << name(); + Status s = lib_->ReleaseHandle(f_handle_); + if (s != Status::OK()) { + LOG(INFO) << "Release function handle:" << f_handle_ + << " owned by node instance:" << name() + << " failed. original err msg: " << s.error_message(); + return; + } + } + } + void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override { + CHECK_NOT_NULL(ctx); + CHECK_NOT_NULL(output); + *output = new (std::nothrow) Dataset(ctx, this); + OP_REQUIRES(ctx, *output != nullptr, + errors::Internal("Failed new dataset of GEOPDatasetOp")); + } + + private: + class Dataset : public DatasetBase { + public: + explicit Dataset(OpKernelContext* ctx, + GEOPDatasetOp* op_kernel) + : DatasetBase(DatasetContext(ctx)), + op_kernel_(op_kernel), + tf_session_(ctx->session_handle()) {} + + ~Dataset() override {} + + GEOPDatasetOp* kernel() const { + return op_kernel_; + } + + std::string tf_session() const { + return tf_session_; + } + + std::unique_ptr MakeIteratorInternal( + const string& prefix) const override { + return std::unique_ptr( + new (std::nothrow) Iterator({this, strings::StrCat(prefix, "::GEOP")})); + } + + const DataTypeVector& output_dtypes() const override { + const static DataTypeVector empty_types; + return empty_types; + } + + const std::vector& output_shapes() const override { + const static std::vector empty_shapes; + return empty_shapes; + } + + string DebugString() const override { return "GEOPDatasetOp::Dataset"; } + + protected: + Status AsGraphDefInternal(SerializationContext* ctx, + DatasetGraphDefBuilder* b, + Node** output) const override { + return Status::OK(); + } + + private: + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params) {} + ~Iterator() {} + Status Initialize(IteratorContext* ctx) override { + LOG(INFO) << "Start to initialize iterator of GEOPDatasetOp"; + REQUIRES_NOT_NULL(ctx); + REQUIRES_NOT_NULL(dataset()); + REQUIRES_NOT_NULL(dataset()->kernel()); + mutex_lock l(mu_); + if (dataset()->kernel()->f_handle_ == kInvalidHandle) { + FunctionLibraryRuntime* lib = ctx->flr(); + REQUIRES_NOT_NULL(lib); + FunctionLibraryRuntime::InstantiateOptions inst_opts; + auto metadata = dataset()->kernel()->func_metadata_; + REQUIRES_NOT_NULL(metadata); + AddSessionInfo(*metadata->lib_def(), metadata->func().name(), dataset()->tf_session()); + inst_opts.lib_def = metadata->lib_def(); + inst_opts.create_kernels_eagerly = true; + if (!metadata->use_inter_op_parallelism()) { + inst_opts.executor_type = "SINGLE_THREADED_EXECUTOR"; + } + inst_opts.is_multi_device_function = false; + REQUIRES_NOT_NULL(lib->device()); + inst_opts.target = lib->device()->name(); + TF_RETURN_IF_ERROR(lib->Instantiate( + metadata->func().name(), AttrSlice(&metadata->func().attr()), + inst_opts, &dataset()->kernel()->f_handle_)); + dataset()->kernel()->lib_ = lib; + } + FunctionLibraryRuntime* lib = ctx->flr(); + REQUIRES_NOT_NULL(lib); + REQUIRES_NOT_NULL(lib->device()); + REQUIRES_NOT_NULL(lib->device()->resource_manager()); + std::vector args; + std::vector out_tensors; + FunctionLibraryRuntime::Options f_opts; + ScopedStepContainer step_container( + f_opts.step_id, [lib](const string& name) { + lib->device()->resource_manager()->Cleanup(name).IgnoreError(); + }); + f_opts.step_container = &step_container; + f_opts.runner = ctx->runner(); + CancellationManager cancellation_manager; + f_opts.cancellation_manager = &cancellation_manager; + Notification n; + Status s; + lib->Run(f_opts, dataset()->kernel()->f_handle_, std::move(args), + &out_tensors, [&n, &s](Status func_status) { + s.Update(func_status); + n.Notify(); + }); + n.WaitForNotification(); + if (!s.ok()) { + LOG(ERROR) << s; + } + return s; + } + + void AddSessionInfo(const FunctionLibraryDefinition &flib_def, + std::string func_name, std::string session) { + FunctionDef* fdef = const_cast(flib_def.Find(func_name)); + if (fdef != nullptr) { + for (NodeDef& ndef : *fdef->mutable_node_def()) { + std::string node_name = ndef.name(); + if (str_util::StartsWith(node_name, "GeOp_")) { + AddNodeAttr("_session", session, &ndef); + LOG(INFO) << "Node " << node_name << " add session info " + << session << " success."; + } + } + } + } + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + *end_of_sequence = true; + return Status::OK(); + } + + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + return Status::OK(); + } + + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override { + return Status::OK(); + } + + private: + mutex mu_; + }; + GEOPDatasetOp* op_kernel_; + std::string tf_session_; + }; + std::shared_ptr func_metadata_ = nullptr; + FunctionLibraryRuntime::Handle f_handle_; + FunctionLibraryRuntime* lib_; +}; + +REGISTER_KERNEL_BUILDER(Name("GEOPDataset").Device(DEVICE_CPU), GEOPDatasetOp); +} // namespace +} // namespace data +} // namespace tensorflow diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc new file mode 100644 index 000000000..35fd34c32 --- /dev/null +++ b/tf_adapter/kernels/geop_npu.cc @@ -0,0 +1,921 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tf_adapter/kernels/geop_npu.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tf_adapter/util/infershape_util.h" +#include "tf_adapter/util/npu_ops_identifier.h" +#include "tf_adapter/util/npu_attrs.h" +#include "tf_adapter/common/common.h" +#include "tf_adapter/util/session_manager.h" +#include "tf_adapter/util/ge_plugin.h" + +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/common_runtime/dma_helper.h" +#include "tensorflow/core/framework/attr_value_util.h" + +#include "tdt/tsd_client.h" +#include "tdt/tdt_host_interface.h" +#include "ge/ge_api.h" +#include "ge/ge_api_types.h" +#include "framework/common/types.h" +#include "framework/common/ge_inner_error_codes.h" +#include "framework/omg/parser/model_parser.h" +#include "framework/omg/parser/parser_factory.h" +#include "framework/omg/parser/parser_api.h" + +using namespace tdt; + +namespace tensorflow { +namespace { +inline string ToString(ge::Status status) { + return ::ge::StatusFactory::Instance()->GetErrDesc(status); +} +Status BuildOutputTensorInfo(OpKernelContext *ctx, std::vector &outputs) { + // ctx is not nullptr + int num_outputs = ctx->num_outputs(); + LOG(INFO) << "BuildOutputTensorInfo, num_outputs:" << num_outputs; + if (num_outputs != static_cast(outputs.size())) { + LOG(ERROR) << "[GEOP] Outputs num mismatched, need:" << num_outputs + << ", while GE return:" << outputs.size(); + return errors::InvalidArgument("Outputs num mismatched, need:", num_outputs, + ", while GE return:", outputs.size()); + } + + // populate outputs + for (int i = 0; i < num_outputs; ++i) { + ge::OutputTensorInfo &output = outputs[i]; + std::vector dims; + std::string dim_string; + for (int64_t dim : output.dims) { + dims.push_back(dim); + dim_string = dim_string + " " + std::to_string(dim); + } + TensorShape out_shape(dims); + Tensor *tensor = nullptr; + TF_RETURN_IF_ERROR(ctx->allocate_output(i, out_shape, &tensor)); + REQUIRES_NOT_NULL(tensor); + size_t total_bytes = tensor->TotalBytes(); + void *tensor_ptr = DMAHelper::base(tensor); + if (total_bytes != static_cast(output.length)) { + LOG(ERROR) << "[GEOP] Outputs len mismatched, index:" << i << ", alloc output:" << total_bytes + << ", while GE return:" << output.length; + return errors::InvalidArgument("Outputs num mismatched, index:", i, ", alloc output:", total_bytes, + ", while GE return:", outputs[i].length); + } + LOG(INFO) << "BuildOutputTensorInfo, output index:" << i + << ", total_bytes:" << total_bytes << ", shape:" << dim_string + << ", tensor_ptr:" << (int64_t)tensor_ptr << ", output" << (int64_t)output.data.get(); + if (total_bytes == 0) { + LOG(INFO) << "BuildOutputTensorInfo, output index:" << i + << ", total_bytes is 0, continue do next. "; + continue; + } + + if (output.data != nullptr) { + void *dst_ptr = tensor_ptr; + void *src_ptr = static_cast(output.data.get()); + size_t left_size = total_bytes; + while (left_size > SECUREC_MEM_MAX_LEN) { + auto err = memcpy_s(dst_ptr, SECUREC_MEM_MAX_LEN, src_ptr, SECUREC_MEM_MAX_LEN); + if (err != EOK) { + LOG(ERROR) << "[GEOP] Outputs mem copy failed, index:" << i << ", errret:" << err + << ", dst_ptr:" << (int64_t)dst_ptr << ", dst_size:" << SECUREC_MEM_MAX_LEN + << ", src_ptr:" << (int64_t)src_ptr << ", src_size:" << SECUREC_MEM_MAX_LEN; + return errors::InvalidArgument("Outputs mem copy failed, index:", i); + } + left_size -= SECUREC_MEM_MAX_LEN; + + dst_ptr = static_cast(static_cast(dst_ptr) + SECUREC_MEM_MAX_LEN); + src_ptr = static_cast(static_cast(src_ptr) + SECUREC_MEM_MAX_LEN); + } + auto err = memcpy_s(dst_ptr, left_size, src_ptr, left_size); + if (err != EOK) { + LOG(ERROR) << "[GEOP] Outputs mem copy failed, index:" << i << ", errret:" << err + << ", dst_ptr:" << (int64_t)dst_ptr << ", dst_size:" << left_size + << ", src_ptr:" << (int64_t)src_ptr << ", src_size:" << left_size; + return errors::InvalidArgument("Outputs mem copy failed, index:", i); + } + } + } + return Status::OK(); +} +} + +bool CmpValue(const std::pair, uint32_t> &p1, + const std::pair, uint32_t> &p2) +{ + return p1.second < p2.second; +} + +std::string CurrentTimeInStr() { + std::time_t now = std::time(nullptr); + std::tm *ptm = std::localtime(&now); + if (nullptr == ptm) { + return ""; + } + + const int time_buffer_len = 32; + char buffer[time_buffer_len] = {0}; + // format: 20171122042550 + std::strftime(buffer, time_buffer_len, "%Y%m%d%H%M%S", ptm); + return std::string(buffer); +} + +static const int64 kMicrosToMillis = 1000; +const int kInvalidGraphId = 0; +const int kMaxCacheNum = 10; +const int kFatalSleepTime = 3000; + +GeOp::GeOp(OpKernelConstruction *ctx) + : AsyncOpKernel(ctx), + init_flag_(false), + build_flag_(false), + shape_flag_(false), + add_graph_flag_(false), + sess_init_flag_(false), + compute_graph_empty_(false), + data_format_(""), + graph_id_(0), + cache_graph_id_(1), + is_initialized_graph_(false), + need_iteration_(false), + tf_session_(""), + ge_session_(nullptr), + job_type_("") { + Initialize(ctx); +} + +GeOp::~GeOp() { + Finalize(); +} + +void GeOp::Initialize(OpKernelConstruction *ctx) { + int64 startTime = InferShapeUtil::GetCurrentTimestap(); + LOG(INFO) << "[GEOP] Begin GeOp initialize."; + if (init_flag_) { + LOG(WARNING) << "[GEOP] GEOP already Initialize."; + return; + } + + CHECK_NOT_NULL(ctx); + const NameAttrList *func = nullptr; + OP_REQUIRES_OK(ctx, ctx->GetAttr("function", &func)); + function_ = *func; + string data_format; + OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format)); + this->data_format_ = data_format; + + Status s = ctx->GetAttr("_session", &tf_session_); + if (s.ok()) { + LOG(INFO) << "[GEOP] get session info from attr, tf session: " << tf_session_; + } + + // global environment Initialize, invoke once for each process + string sess_config = ""; + OP_REQUIRES_OK(ctx, ctx->GetAttr("_NpuOptimizer", &sess_config)); + std::map init_options = NpuAttrs::GetInitOptions(ctx); + std::map pass_options = NpuAttrs::GetPassOptions(ctx); + job_type_ = pass_options["job"]; + if (GePlugin::GetInstance()->IsGlobal()) { + LOG(INFO) << "[GEOP] GePlugin global, skip GePlugin init"; + } else { + GePlugin::GetInstance()->Init(init_options); + LOG(INFO) << "[GEOP] GePlugin init success"; + } + sess_options_ = NpuAttrs::GetSessOptions(ctx); + + init_flag_ = true; + int64 endTime = InferShapeUtil::GetCurrentTimestap(); + LOG(INFO) << "[GEOP] GeOp Initialize success, cost:" + << " [" << ((endTime - startTime) / kMicrosToMillis) << " ms]"; + return; +} + +void GeOp::Finalize() { + { + LOG(INFO) << "[GEOP] GeOp start to finalize, tf session: " << tf_session_ + << ", graph_id_: " << graph_id_; + // global environment finalize, invoke once for each process + { + mutex_lock lock{mu_}; + uint32_t graph_id = -1; + bool ret = DecrementGraphIdCount(tf_session_, graph_id); + if (!ret || graph_id < kInvalidGraphId) { + LOG(ERROR) << "tf session " << tf_session_ << " sub graph id failed."; + return; + } + if (graph_id == kInvalidGraphId) { + SessionManager::GetInstance().DestoryGeSession(tf_session_); + ClearGraphIdCount(tf_session_); + } + + if (!SessionManager::GetInstance().IsGeSessionExist()) { + if (!GePlugin::GetInstance()->IsGlobal()) { + GePlugin::GetInstance()->Finalize(); + LOG(INFO) << "[GEOP] GePlugin Finalize success"; + } else { + LOG(INFO) << "[GEOP] GePlugin global, skip GePlugin Finalize"; + } + } + } + } + init_flag_ = false; + LOG(INFO) << "[GEOP] GeOp Finalize success, tf session: " << tf_session_ + << ", graph_id_: " << graph_id_; + return; +} + +int GeOp::InitRebuildFlag() { + if (!build_flag_) { + LOG(INFO) << "[GEOP] tf session " << tf_session_ + << ", graph id: " << cache_graph_id_ + << " does not build yet, no need to check rebuild"; + return 0; + } + if (ge_session_ == nullptr) { + LOG(ERROR) << "[GEOP] GE session is nullptr"; + return -1; + } + if (!ge_session_->IsGraphNeedRebuild(cache_graph_id_)) { + LOG(INFO) << "[GEOP] tf session " << tf_session_ + << ", graph id: " << cache_graph_id_ + << " no need to rebuild"; + return 0; + } + + LOG(INFO) << "[GEOP] The graph need rebuild, graph id " << cache_graph_id_; + + // The graph need to rebuild, remove it from GE first. + LOG(INFO) << "[GEOP] tf session: " << tf_session_ << ", graph id: " << cache_graph_id_; + auto ret = ge_session_->RemoveGraph(cache_graph_id_); + if (ret != ge::SUCCESS) { + LOG(ERROR) << "[GEOP] Failed to remove graph " << cache_graph_id_ + << " from ge, error code " << ret; + return -1; + } + + build_flag_ = false; + compute_graph_empty_ = false; + return 0; +} + +bool GeOp::IncrementGraphIdCount(std::string& tf_session, uint32_t& graph_id) { + if (tf_session_.empty()) { + LOG(ERROR) << "[GEOP] Add graph id failed, tf session is empty."; + return false; + } + auto it = session_and_graph_id_map_.find(tf_session_); + if (it != session_and_graph_id_map_.end()) { + it->second = it->second + kMaxCacheNum; + graph_id = it->second; + return true; + } + graph_id = 1; + session_and_graph_id_map_.insert(std::make_pair(tf_session_, graph_id)); + return true; +} + +bool GeOp::DecrementGraphIdCount(std::string& tf_session, uint32_t& graph_id) { + if (tf_session_.empty()) { + LOG(ERROR) << "[GEOP] Sub graph id failed, tf session is empty."; + return false; + } + + auto it = session_and_graph_id_map_.find(tf_session_); + if (it != session_and_graph_id_map_.end()) { + if (it->second == 1) { + it->second = it->second - 1; + graph_id = it->second; + return true; + } + it->second = it->second - kMaxCacheNum; + graph_id = it->second; + return true; + } + LOG(ERROR) << "[GEOP] Sub graph id failed, can not find tf session " + << tf_session; + return false; +} + +void GeOp::ClearGraphIdCount(std::string& tf_session) { + auto it = session_and_graph_id_map_.find(tf_session_); + if (it != session_and_graph_id_map_.end()) { + session_and_graph_id_map_.erase(it); + } +} + +void GeOp::CacheShapeChangeGraphs() { + size_t num = cache_graphs_.size(); + std::pair, uint32_t>::iterator, bool> ret; + uint32_t tmp_graph_id = 0; + if (num >= kMaxCacheNum) { + LOG(INFO) << "[GEOP] the cache vector size is : " << num << " , begin erase the least uesed"; + std::sort(graph_counts_.begin(), graph_counts_.end(), CmpValue); + uint32_t erased_graph_id = cache_graphs_[graph_counts_[0].first]; + cache_graphs_.erase(graph_counts_[0].first); + graph_counts_.erase(graph_counts_.begin()); + ge::Status status = ge_session_->RemoveGraph(erased_graph_id); + if (status != ge::SUCCESS) { + LOG(WARNING) << "[GEOP] GE Remove Graph failed, ret : " << ToString(status); + } + ret = cache_graphs_.insert(std::make_pair(inputs_shape_string_, erased_graph_id)); + tmp_graph_id = erased_graph_id; + } else { + ret = cache_graphs_.insert(std::make_pair(inputs_shape_string_, graph_id_ + num)); + tmp_graph_id = graph_id_ + num; + } + if (ret.second) { + build_flag_ = false; + compute_graph_empty_ = false; + graph_counts_.push_back(std::make_pair(inputs_shape_string_, 1)); + cache_graph_id_ = tmp_graph_id; + } else { + for (auto &graph_count : graph_counts_) { + if (graph_count.first == inputs_shape_string_) { + graph_count.second += 1; + break; + } + } + cache_graph_id_ = cache_graphs_[inputs_shape_string_]; + build_flag_ = true; + shape_flag_ = false; + } +} + +void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { + // ctx is not nullptr + OP_REQUIRES_ASYNC(ctx, init_flag_, errors::InvalidArgument("GeOp not Initialize success."), done); + + // ge ge session + if (!sess_init_flag_) { + if (job_type_ != "localhost") { // in ps mode : ctx->session_handle() is empty + tf_session_ = "ps_worker_session"; + LOG(INFO) << "[GEOP] get tf session " << tf_session_ << " when in ps mode."; + } + + if (tf_session_.empty()) { + tf_session_ = ctx->session_handle(); + LOG(INFO) << "[GEOP] get tf session " << tf_session_ << " from session handle."; + } + + { + mutex_lock lock{mu_}; + bool res = IncrementGraphIdCount(tf_session_, graph_id_); + cache_graph_id_ = graph_id_; + if (!res || graph_id_ < kInvalidGraphId) { + OP_REQUIRES_ASYNC(ctx, false, + errors::Unavailable("Get ge session failed."), done); + return; + } + + LOG(INFO) << "[GEOP] Node name: " << ctx->op_kernel().name() + << " , tf session: " << tf_session_; + + res = SessionManager::GetInstance().GetOrCreateGeSession(tf_session_, ge_session_, sess_options_); + if (!res || tf_session_.empty() || ge_session_ == nullptr) { + OP_REQUIRES_ASYNC(ctx, false, + errors::Unavailable("Get ge session failed."), done); + return; + } + LOG(INFO) << "[GEOP] tf session: " << tf_session_ << " get ge session success."; + sess_init_flag_ = true; + } + } + string geop_name = ctx->op_kernel().name(); + uint32_t num_inputs = static_cast(ctx->num_inputs()); + LOG(INFO) << "[GEOP] Begin GeOp::ComputeAsync" + << ", kernel_name:" << geop_name + << ", num_inputs:" << num_inputs + << ", num_outputs:" << ctx->num_outputs(); + int64 startTime = InferShapeUtil::GetCurrentTimestap(); + + if (!build_flag_) { + // record input shape + inputs_shape_string_.clear(); + for (uint32_t i = 0; i < num_inputs; i++) { + Tensor input(ctx->input(i)); + inputs_shape_string_.push_back(input.shape().DebugString()); + } + cache_graphs_.insert(std::make_pair(inputs_shape_string_, cache_graph_id_)); + graph_counts_.push_back(std::make_pair(inputs_shape_string_, 1)); + } else if (inputs_shape_string_.size() == num_inputs) { + for (uint32_t i = 0; i < num_inputs; i++) { + if (inputs_shape_string_.at(i) == ctx->input(i).shape().DebugString()) { + continue; + } else { + // input_shape change, build GEOP for one more time + inputs_shape_string_.at(i) = ctx->input(i).shape().DebugString(); + shape_flag_ = true; + } + } + if (shape_flag_) { + CacheShapeChangeGraphs(); + } + } else { + build_flag_ = false; + compute_graph_empty_ = false; + } + + auto ret = InitRebuildFlag(); + if (ret != 0) { + OP_REQUIRES_ASYNC(ctx, false, + errors::Unavailable("Failed to check rebuild flag"), done); + return; + } + + if (!build_flag_) { + // Get Graph + OP_REQUIRES_ASYNC(ctx, ctx->function_library() != nullptr, + errors::Internal("function library is nullptr"), done); + const FunctionLibraryDefinition *flib_def = + ctx->function_library()->GetFunctionLibraryDefinition(); + OP_REQUIRES_ASYNC(ctx, flib_def != nullptr, + errors::Internal("flib_def is nullptr"), done); + const FunctionDef *fdef = flib_def->Find(function_.name()); + OP_REQUIRES_ASYNC(ctx, fdef != nullptr, + errors::Internal("fdef is nullptr"), done); + std::shared_ptr graph = std::make_shared(OpRegistry::Global()); + OP_REQUIRES_ASYNC(ctx, graph != nullptr, + errors::Internal("create tensorflow graph failed"), done); + + std::vector input_vec; + for (uint32_t i = 0; i < num_inputs; i++) { + Tensor input(ctx->input(i)); + input_vec.push_back(input); + } + + // Build GraphDef from FunctionDef + GraphDef ori_graph_def; + BuildGraphDef(ctx, done, *flib_def, *fdef, input_vec, ori_graph_def, is_initialized_graph_); + if (ctx->status() != Status::OK()) { + return; + } + + /* if graph is init verify graph, return */ + if (this->is_initialized_graph_ == true) { + Tensor initialized_tensor(ctx->expected_output_dtype(0), TensorShape({0})); + ctx->set_output(0, initialized_tensor); + done(); + return; + } + + char *need_print = getenv("PRINT_MODEL"); + if (need_print != nullptr && strcmp("1", need_print) == 0) { + string tmpmodel_path = "TF_"; + string tmodel_path = tmpmodel_path + geop_name.c_str() + ".pbtxt"; + Status status_out = WriteTextProto(Env::Default(), tmodel_path, ori_graph_def); + } + int64 endTime = InferShapeUtil::GetCurrentTimestap(); + LOG(INFO) << "[GEOP] In GEOP computeAsync, kernel_name:" + << geop_name << " ,TFadapter cost time: [" + << ((endTime - startTime) / kMicrosToMillis) << " ms]"; + LOG(INFO) << "[GEOP] TFadpter process graph success, GE parser begin, kernel_name:" + << geop_name << " ,tf session: " << tf_session_ + << " ,graph id :" << cache_graph_id_; + // parser, tensorflow graph to ge graph + std::shared_ptr model_parser = + domi::ModelParserFactory::Instance()->CreateModelParser(domi::FrameworkType::TENSORFLOW); + OP_REQUIRES_ASYNC(ctx, model_parser != nullptr, + errors::Unavailable("create model parser ret failed."), done); + ge::ComputeGraphPtr compute_graph = nullptr; + try { + compute_graph = std::make_shared("ge_default_" + CurrentTimeInStr()); + } catch (...) { + OP_REQUIRES_ASYNC(ctx, false, + errors::Internal("make shared failed"), done); + } + + OP_REQUIRES_ASYNC(ctx, compute_graph != nullptr, + errors::InvalidArgument("create ComputeGraph failed"), done); + + auto build_sub_graph = [this, flib_def] (const google::protobuf::Message *root_proto, const std::string &graph) -> + std::unique_ptr { + // const tensorflow::GraphDef *graph_def_in = reinterpret_cast(root_proto); + LOG(INFO) << "[GEOP] build_sub_graph enter, sub graph name is " << graph; + const FunctionDef *func_def = flib_def->Find(graph); + if (func_def == nullptr) { + LOG(ERROR) << "[GEOP] Sub graph not found in library, sub graph name is " << graph; + return nullptr; + } + // get infershape + Graph subgraph(OpRegistry::Global()); + Status status = InferShapeUtil::getSubGraphFromFunctionDef(*func_def, &subgraph); + if (status != Status::OK()) { + LOG(ERROR) << "[GEOP] Get subgraph from functiondef fail."; + return nullptr; + } + LOG(INFO) << "[GEOP] Get subgraph from functiondef success."; + + bool is_initialize = false; + for (Node *node : subgraph.nodes()) { + AddNodeAttrs(node, is_initialize); + + // Add Input&Output Desc into NodeDef + if (GenerateDesc(node) != Status::OK()) { + LOG(WARNING) << "[GEOP] name: " << node->name() << " op:" << node->type_string() + << " Generate desc failed in subgraph."; + } + } + + unique_ptr sub_graph_def(new (std::nothrow) GraphDef()); + if (sub_graph_def == nullptr) { + LOG(ERROR) << "[GEOP] Malloc memory for subgraph def fail."; + return nullptr; + } + subgraph.ToGraphDef(sub_graph_def.get()); + + // change function op to subgraph type + ChangeFunctionOpToSubgraph(*sub_graph_def.get(), *flib_def); + + unique_ptr graph_def_out(std::move(sub_graph_def)); + + char *need_print = getenv("PRINT_MODEL"); + if (need_print != nullptr && strcmp("1", need_print) == 0) { + string tmpmodel_path = "TF_Subgraph_"; + string tmodel_path = tmpmodel_path + graph.c_str() + ".pbtxt"; + Status status_out = WriteTextProto(Env::Default(), tmodel_path, *graph_def_out.get()); + } + LOG(INFO) << "[GEOP] build_sub_graph exit, sub graph name is " << graph; + return graph_def_out; + }; + + ge::Status status = model_parser->ParseProtoWithSubgraph( + reinterpret_cast(&ori_graph_def), build_sub_graph, compute_graph); + OP_REQUIRES_ASYNC(ctx, status == ge::SUCCESS, + errors::Internal("graph parse failed, domi_ret : ", ToString(status)), done); + + LOG(INFO) << "[GEOP] Tensorflow graph parse to ge graph success, kernel_name:" << geop_name + << " ,tf session: " << tf_session_ <<" ,graph id: " << cache_graph_id_; + + size_t nodes = compute_graph->GetAllNodesSize(); + if (nodes == 0) { + build_flag_ = true; + compute_graph_empty_ = true; + int64 endTime = InferShapeUtil::GetCurrentTimestap(); + LOG(INFO) << "[GEOP] End GeOp::ComputeAsync, compute_graph is empty, kernel_name:" + << geop_name << ", ret_status:" << ToString(ge::SUCCESS) + << " , tf session: " << tf_session_ << " ,graph id: " + << cache_graph_id_ << " [" << ((endTime - startTime) / kMicrosToMillis) << " ms]"; + done(); + return; + } + + // convert to ge::graph + ge::Graph ge_graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph); + ge_graph.SetNeedIteration(this->need_iteration_); + + // call ge session addGraph api + status = ge_session_->AddGraph(cache_graph_id_, ge_graph); + if (status != ge::SUCCESS) { + std::this_thread::sleep_for(std::chrono::milliseconds(kFatalSleepTime)); + LOG(FATAL) << "[GEOP] call ge session add graph failed, kernel: " << geop_name + << " ,tf session: " << tf_session_ << ", graph id: " << cache_graph_id_; + OP_REQUIRES_ASYNC(ctx, status == ge::SUCCESS, + errors::Unavailable("[GEOP] GE session add graph failed, domi_ret : ", ToString(status)), done); + } else { + add_graph_flag_ = true; + LOG(INFO) << "[GEOP] Add graph to ge session success, kernel_name:" << geop_name + << " ,tf session: " << tf_session_ << " ,graph id:" << cache_graph_id_; + } + + build_flag_ = true; + } else { + if (compute_graph_empty_) { + int64 endTime = InferShapeUtil::GetCurrentTimestap(); + LOG(INFO) << "[GEOP] End GeOp::ComputeAsync, compute_graph is empty, kernel_name:" + << geop_name << ", ret_status:" << ToString(ge::SUCCESS) + << " , tf session: " << tf_session_ << " ,graph id: " + << cache_graph_id_ << " [" << ((endTime - startTime) / kMicrosToMillis) << " ms]"; + done(); + return; + } + } + + int64 run_start_time = InferShapeUtil::GetCurrentTimestap(); + auto callback = [done, ctx, run_start_time](ge::Status ge_status, std::vector &outputs) { + if (ge_status == ge::SUCCESS) { + if (BuildOutputTensorInfo(ctx, outputs) != Status::OK()) { + LOG(FATAL) << ctx->op_kernel().name() << " GEOP::DoRunAsync get output failed."; + } + } else if (ge_status == ge::END_OF_SEQUENCE) { + ctx->SetStatus(errors::OutOfRange("End of sequence")); + LOG(ERROR) << "[GEOP] Out of range: End of sequence."; + } else if (ge_status != ge::SUCCESS) { + tensorflow::Status tfStatus = errors::Unavailable(ToString(ge_status)); + ctx->CtxFailureWithWarning(tfStatus); + std::this_thread::sleep_for(std::chrono::milliseconds(kFatalSleepTime)); + LOG(FATAL) << ctx->op_kernel().name() << "GEOP::::DoRunAsync Failed"; + } + int64 run_end_time = InferShapeUtil::GetCurrentTimestap(); + LOG(INFO) << "[GEOP] RunGraphAsync callback, status:" << ge_status + << ", kernel_name:" << ctx->op_kernel().name() + << "[ " << (run_end_time - run_start_time) << "us]"; + done(); + }; + std::vector inputs; + OP_REQUIRES_OK_ASYNC(ctx, (BuildInputTensorInfo(ctx, inputs)), done); + + LOG(INFO) << "[GEOP] Call ge session RunGraphAsync, kernel_name:" << geop_name + << " ,tf session: " << tf_session_ << " ,graph id: " << cache_graph_id_; + // call ge session runGraphAsync api + ge::Status status = ge_session_->RunGraphAsync(cache_graph_id_, inputs, callback); + if (status != ge::SUCCESS) { + std::this_thread::sleep_for(std::chrono::milliseconds(kFatalSleepTime)); + LOG(FATAL) << "[GEOP] call ge session RunGraphAsync Failed, kernel:" << geop_name + << " ,tf session: " << tf_session_ << " ,graph id: " << cache_graph_id_; + } + OP_REQUIRES_ASYNC(ctx, status == ge::SUCCESS, + errors::Unavailable("ge session run graph failed, ret_status:", ToString(status)), done); + + int64 endTime = InferShapeUtil::GetCurrentTimestap(); + LOG(INFO) << "[GEOP] End GeOp::ComputeAsync, kernel_name:" + << geop_name << ", ret_status:" << ToString(status) + << " ,tf session: " << tf_session_ << " ,graph id: " + << cache_graph_id_ << " [" << ((endTime - startTime) / kMicrosToMillis) << " ms]"; + return; +} + +void GeOp::ChangeFunctionOpToSubgraph(GraphDef &sub_graph_def, + const FunctionLibraryDefinition &flib_def) { + std::vector function_names = flib_def.ListFunctionNames(); + for (NodeDef& node_def: *sub_graph_def.mutable_node()) { + for (string func_name : function_names) { + if (node_def.op() == func_name) { + node_def.set_op(SubGraph); + LOG(INFO) << "Node " << node_def.name() << " change op type from " + << func_name << " to " << SubGraph; + } + } + } +} + +void GeOp::AddNodeAttrs(Node* node, bool &is_initialize) { + // Add dp custom kernel label + if (node->type_string() == "IteratorGetNext") { + node->AddAttr("_kernel", "dp"); + } + if (node->type_string() == "Assert" || node->type_string() == "Print" || + node->type_string() == "PrintV2") { + node->AddAttr("_kernel", "extend"); + } + NodeDef &node_def = const_cast(node->def()); + if (node_def.op() == "Where") { + is_initialize = InferShapeUtil::IsInitializedGraph(node); + } + if (node->name() == "IterationOp") { + this->need_iteration_ = true; + LOG(INFO) << "subgraph has iteration op."; + } + // clear device info && attr + node_def.set_device(""); + if (node_def.op() == "Const") { + node_def.mutable_attr()->erase("data_format"); + node_def.mutable_attr()->erase("cce_format"); + node_def.mutable_attr()->erase("output_type"); + } +} + +// Build GraphDef from FunctionDef. +void GeOp::BuildGraphDef(OpKernelContext *ctx, DoneCallback done, + const FunctionLibraryDefinition& flib_def, + const FunctionDef &func_def, + const std::vector &input_vec, GraphDef &graph_def, + bool &is_initialize) { + // get infershape + Graph graph(OpRegistry::Global()); + OP_REQUIRES_OK_ASYNC(ctx, InferShapeUtil::InferShape(input_vec, &flib_def, &func_def, &graph), done); + + for (Node *node : graph.nodes()) { + AddNodeAttrs(node, is_initialize); + // Add Input&Output Desc into NodeDef + OP_REQUIRES_OK_ASYNC(ctx, this->GenerateDesc(node), done); + } + + graph.ToGraphDef(&graph_def); +} + +Status GeOp::BuildInputTensorInfo(OpKernelContext *ctx, std::vector &inputs) { + // ctx is not nullptr + int num_inputs = ctx->num_inputs(); + + // populate inputs + for (int i = 0; i < num_inputs; i++) { + Tensor tensor(ctx->input(i)); + DataType data_type = tensor.dtype(); + size_t total_bytes = tensor.TotalBytes(); + void *tensor_ptr = DMAHelper::base(&tensor); + + ge::InputTensorInfo input; + std::shared_ptr model_parser = + domi::ModelParserFactory::Instance()->CreateModelParser(domi::FrameworkType::TENSORFLOW); + REQUIRES_NOT_NULL(model_parser); + ge::DataType type = model_parser->ConvertToGeDataType(static_cast(data_type)); + if (type == ge::DT_UNDEFINED) { + LOG(ERROR) << "[GEOP] No Supported datatype : " << data_type; + return errors::InvalidArgument("No Supported datatype : ", data_type); + } + input.data_type = static_cast(type); + input.dims.clear(); + for (uint32_t dim : tensor.shape().dim_sizes()) { + input.dims.push_back(static_cast(dim)); + } + input.data = tensor_ptr; + input.length = static_cast(total_bytes); + + inputs.push_back(input); + } + return Status::OK(); +} + +// For each NodeDef, Create Input&Output Desc(shape,format,dataType) +Status GeOp::GenerateDesc(Node *&node) { + REQUIRES_NOT_NULL(node); + NodeDef &node_def = const_cast(node->def()); + const OpDef &op_def = node->op_def(); + + std::string format = this->data_format_; // format + int32_t domi_format = domi::domiTensorFormat_t::DOMI_TENSOR_RESERVED; + TF_RETURN_IF_ERROR(this->DomiFormatFromString(format, domi_format)); + + // Get signature(dataType) from the OpDef & NodeDef + DataTypeVector inputs; + DataTypeVector outputs; + TF_RETURN_IF_ERROR(tensorflow::InOutTypesForNode(node_def, op_def, &inputs, &outputs)); + + int num; + Node *in_node = nullptr; + const Edge *in_edge = nullptr; + + if (inputs.size() > INT_MAX) { + return errors::InvalidArgument("inputs size should be less than INT_MAX."); + } + + // Create input Desc + int inputs_size = static_cast(inputs.size()); + if (inputs_size > 0) { + AttrValue input_tensor_descs; + AttrValue input_tensor_descs_s; + num = 0; + for (; num < inputs_size;) { + node->input_node(num, &in_node); + node->input_edge(num, &in_edge); + REQUIRES_NOT_NULL(in_node); + REQUIRES_NOT_NULL(in_edge); + int src_output = in_edge->src_output(); + if (in_node->def().attr().find(OUTPUT_DESC) != in_node->def().attr().end()) { + NameAttrList desc_attr = in_node->def().attr().at(OUTPUT_DESC).list().func(src_output); + *(input_tensor_descs.mutable_list()->add_func()) = desc_attr; + } else { + LOG(INFO) << "[GEOP] no OUTPUT_DESC: " << node->name() << " <-- " << in_node->name(); + if (num > 0 && node->type_string() == "Merge" && in_node->type_string() == "NextIteration") { + node->input_node(num - 1, &in_node); + node->input_edge(num - 1, &in_edge); + REQUIRES_NOT_NULL(in_node); + REQUIRES_NOT_NULL(in_edge); + int src_output = in_edge->src_output(); + NameAttrList desc_attr = in_node->def().attr().at(OUTPUT_DESC).list().func(src_output); + *(input_tensor_descs.mutable_list()->add_func()) = desc_attr; + } + } + num++; + } + REQUIRES_NOT_NULL(node_def.mutable_attr()); + node_def.mutable_attr()->insert({INPUT_DESC, input_tensor_descs}); + } + + // Create output Desc + if (outputs.size() > 0) { + // Get infershape + const std::string KEY_SHAPE = tensorflow::KEY_SHAPE; + AttrValue shape_value; + const auto &it = node_def.attr().find(KEY_SHAPE); + if (it == node_def.attr().end()) { // no find + LOG(WARNING) << "[GEOP] There is no infershape of node : " << node_def.name(); + } else { + shape_value = node_def.attr().at(KEY_SHAPE); + uint32_t shape_size = static_cast(shape_value.list().shape_size()); + if (shape_size != outputs.size()) { + LOG(ERROR) << "[GEOP] size not equal, shape_size : " << shape_size + << " outputs size:" << outputs.size(); + shape_value.clear_list(); + } + } + // Create output Desc + AttrValue output_tensor_descs; + AttrValue output_tensor_descs_s; + int i = 0; + num = 0; + for (DataType data_type : outputs) { + string desc_string_s; + AttrValue attr_format_value; + attr_format_value.set_i((int64_t) domi_format); + AttrValue attr_datatype_value; + attr_datatype_value.set_i((int64_t) data_type); + + // shape + AttrValue attr_shape_value; + if (shape_value.has_list()) { + TensorShapeProto shape_proto = shape_value.list().shape(num); + for (int j = 0; j < shape_proto.dim_size(); j++) { + attr_shape_value.mutable_list()->add_i(shape_proto.dim(j).size()); + } + } + + NameAttrList name_attr_list; + name_attr_list.set_name(std::to_string(i)); + REQUIRES_NOT_NULL(name_attr_list.mutable_attr()); + name_attr_list.mutable_attr()->insert({SERIALIZE_FORMAT, attr_format_value}); + name_attr_list.mutable_attr()->insert({SERIALIZE_DATATYPE, attr_datatype_value}); + name_attr_list.mutable_attr()->insert({SERIALIZE_SHAPE, attr_shape_value}); + REQUIRES_NOT_NULL(output_tensor_descs.mutable_list()); + *(output_tensor_descs.mutable_list()->add_func()) = name_attr_list; + + num++; + i++; + } + node_def.mutable_attr()->erase(KEY_SHAPE); + node_def.mutable_attr()->insert({OUTPUT_DESC, output_tensor_descs}); + } + string op_def_string; + op_def.SerializeToString(&op_def_string); + + tensorflow::AttrValue value; + value.set_s(op_def_string); + node_def.mutable_attr()->insert({"op_def", value}); + return tensorflow::Status::OK(); +} + +Status GeOp::DomiFormatFromString(std::string format, int32_t &domi_format) { + if (format == "NCHW") { + domi_format = domi::domiTensorFormat_t::DOMI_TENSOR_NCHW; + return Status::OK(); + } else if (format == "NHWC") { + domi_format = domi::domiTensorFormat_t::DOMI_TENSOR_NHWC; + return Status::OK(); + } else if (format == "NC1HWC0") { + domi_format = domi::domiTensorFormat_t::DOMI_TENSOR_NC1HWC0; + return Status::OK(); + } else if (format == "NDHWC") { + domi_format = domi::domiTensorFormat_t::DOMI_TENSOR_NDHWC; + return Status::OK(); + } else if (format == "NCDHW") { + domi_format = domi::domiTensorFormat_t::DOMI_TENSOR_NCDHW; + return Status::OK(); + } else if (format == "DHWCN") { + domi_format = domi::domiTensorFormat_t::DOMI_TENSOR_DHWCN; + return Status::OK(); + } else if (format == "DHWNC") { + domi_format = domi::domiTensorFormat_t::DOMI_TENSOR_DHWNC; + return Status::OK(); + } else if (format == "FRACTALZ") { + domi_format = domi::domiTensorFormat_t::DOMI_TENSOR_FRACTAL_Z; + } + return errors::Unavailable( + "DomiFormatFromString, not supported format, format = ", format); +} +} // namespace tensorflow + + +namespace tensorflow { +mutex GeOp::mu_(LINKER_INITIALIZED); + +const std::string GeOp::INPUT_DESC = "input_tensor_desc"; +const std::string GeOp::OUTPUT_DESC = "output_tensor_desc"; +const std::string GeOp::SERIALIZE_FORMAT = "serialize_format"; +const std::string GeOp::SERIALIZE_DATATYPE = "serialize_datatype"; +const std::string GeOp::SERIALIZE_SHAPE = "serialize_shape"; +const std::string GeOp::SubGraph = "SubGraph"; +std::unordered_map GeOp::session_and_graph_id_map_; + +REGISTER_KERNEL_BUILDER(Name("GeOp").Device(DEVICE_CPU), GeOp); +} // namespace tensorflow diff --git a/tf_adapter/kernels/geop_npu.h b/tf_adapter/kernels/geop_npu.h new file mode 100644 index 000000000..0b073b332 --- /dev/null +++ b/tf_adapter/kernels/geop_npu.h @@ -0,0 +1,111 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_KERNELS_GEOP_NPU_H_ +#define TENSORFLOW_KERNELS_GEOP_NPU_H_ + +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/util/env_var.h" + +#include +#include "graph/tensor.h" +#include "graph/utils/graph_utils.h" +#include "ge/ge_api.h" +#include "ge/ge_api_types.h" + +namespace tensorflow { +class GeOp : public AsyncOpKernel { + public: + explicit GeOp(OpKernelConstruction *ctx); + ~GeOp(); + void ComputeAsync(OpKernelContext *ctx, DoneCallback done) override; + + private: + void Initialize(OpKernelConstruction *ctx); + void Finalize(); + + // global environment Initialize/Finalize, only invoke once for each process + Status GlobalInitialize(OpKernelConstruction *ctx); + void GlobalFinalize(); + + // Build GraphDef from FunctionDef. + void BuildGraphDef(OpKernelContext *ctx, DoneCallback done, + const FunctionLibraryDefinition &flib_def, const FunctionDef &func_def, + const std::vector &input_vec, GraphDef &graph_def, + bool &is_initialize); + + // Find and change op type to SubGraph + void ChangeFunctionOpToSubgraph(GraphDef &graph_def, const FunctionLibraryDefinition &flib_def); + + // prepare input tensor + Status BuildInputTensorInfo(OpKernelContext *ctx, std::vector &inputs); + + // create input and output desc for NodeDef + Status GenerateDesc(Node *&node); + + Status DomiFormatFromString(std::string format, int32_t &domi_format); + + private: + void AddNodeAttrs(Node* node, bool &is_initialize); + + int InitRebuildFlag(); + + bool IncrementGraphIdCount(std::string& tf_session, uint32_t& graph_id); + + bool DecrementGraphIdCount(std::string& tf_session, uint32_t& graph_id); + + void ClearGraphIdCount(std::string& tf_session); + + void CacheShapeChangeGraphs(); + + private: + static const std::string INPUT_DESC; + static const std::string OUTPUT_DESC; + static const std::string SERIALIZE_FORMAT; + static const std::string SERIALIZE_DATATYPE; + static const std::string SERIALIZE_SHAPE; + static const std::string SubGraph; + + static mutex mu_; + + bool init_flag_; + bool build_flag_; + bool shape_flag_; + bool add_graph_flag_; + bool sess_init_flag_; + bool compute_graph_empty_; + + NameAttrList function_; + std::string data_format_; + uint32_t graph_id_; + uint32_t cache_graph_id_; + bool is_initialized_graph_; + bool need_iteration_; + std::string tf_session_; + ge::Session* ge_session_; + std::string job_type_; + // std::vector inputs_shape_; + std::vector inputs_shape_string_; + std::map, uint32_t> cache_graphs_; + std::vector, uint32_t> > graph_counts_; + std::map sess_options_; + static std::unordered_map session_and_graph_id_map_; +}; +} // namespace tensorflow +#endif // TENSORFLOW_KERNELS_GEOP_NPU_H_ diff --git a/tf_adapter/kernels/hccl_ops.cc b/tf_adapter/kernels/hccl_ops.cc new file mode 100644 index 000000000..e94b17643 --- /dev/null +++ b/tf_adapter/kernels/hccl_ops.cc @@ -0,0 +1,88 @@ +/************************************************************************* + * Copyright (c) Huawei Technologies Co., Ltd. 2012-2018. All rights reserved. + * Description: + * + * See LICENSE.txt for license information + ************************************************************************/ + +#include "tensorflow/core/framework/op_kernel.h" + +namespace tensorflow { +class HcomAllReduceOpKernel : public OpKernel { +public: + explicit HcomAllReduceOpKernel(OpKernelConstruction* context) : OpKernel(context) {} + ~HcomAllReduceOpKernel() {} + void Compute(OpKernelContext* context) override + { + LOG(INFO) << "HcomAllReduceOp Compute."; + } +}; + +REGISTER_KERNEL_BUILDER(Name("HcomAllReduce").Device(DEVICE_CPU), + HcomAllReduceOpKernel); + +class HcomAllGatherOpKernel : public OpKernel { +public: + explicit HcomAllGatherOpKernel(OpKernelConstruction* context) : OpKernel(context) {} + ~HcomAllGatherOpKernel() {} + void Compute(OpKernelContext* context) override + { + LOG(INFO) << "HcomAllGatherOp Compute."; + } +}; + +REGISTER_KERNEL_BUILDER(Name("HcomAllGather").Device(DEVICE_CPU), + HcomAllGatherOpKernel); + +class HcomBroadcastOpKernel : public OpKernel { +public: + explicit HcomBroadcastOpKernel(OpKernelConstruction* context) : OpKernel(context) {} + ~HcomBroadcastOpKernel() {} + void Compute(OpKernelContext* context) override + { + LOG(INFO) << "HcomBroadcastOp Compute."; + } +}; + +REGISTER_KERNEL_BUILDER(Name("HcomBroadcast").Device(DEVICE_CPU), + HcomBroadcastOpKernel); + +class HcomReduceScatterOpKernel : public OpKernel { +public: + explicit HcomReduceScatterOpKernel(OpKernelConstruction* context) : OpKernel(context) {} + ~HcomReduceScatterOpKernel() {} + void Compute(OpKernelContext* context) override + { + LOG(INFO) << "HcomReduceScatterOp Compute."; + } +}; + +REGISTER_KERNEL_BUILDER(Name("HcomReduceScatter").Device(DEVICE_CPU), + HcomReduceScatterOpKernel); + +class HcomSendOpKernel : public OpKernel { +public: + explicit HcomSendOpKernel(OpKernelConstruction* context) : OpKernel(context) {} + ~HcomSendOpKernel() {} + void Compute(OpKernelContext* context) override + { + LOG(INFO) << "HcomSendOpKernel Compute."; + } +}; + +REGISTER_KERNEL_BUILDER(Name("HcomSend").Device(DEVICE_CPU), + HcomSendOpKernel); + +class HcomReceiveOpKernel : public OpKernel { +public: + explicit HcomReceiveOpKernel(OpKernelConstruction* context) : OpKernel(context) {} + ~HcomReceiveOpKernel() {} + void Compute(OpKernelContext* context) override + { + LOG(INFO) << "HcomReceiveOpKernel Compute."; + } +}; + +REGISTER_KERNEL_BUILDER(Name("HcomReceive").Device(DEVICE_CPU), + HcomReceiveOpKernel); +} // namespace tensorflow diff --git a/tf_adapter/kernels/host_queue_dataset_op.cc b/tf_adapter/kernels/host_queue_dataset_op.cc new file mode 100644 index 000000000..c32c3b65f --- /dev/null +++ b/tf_adapter/kernels/host_queue_dataset_op.cc @@ -0,0 +1,444 @@ +/** +* Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. +* Description: Implement the custom host queue dataset to get and send data +*/ + +#include +#include +#include +#include "tf_adapter/common/common.h" +#include "tdt/tdt_host_interface.h" +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/stats_aggregator.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/variant_tensor_data.h" +#include "tensorflow/core/kernels/data/dataset.h" +#include "tensorflow/core/lib/core/error_codes.pb.h" +#include "tensorflow/core/lib/gtl/cleanup.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/util/env_var.h" + +#include "unistd.h" + +namespace tensorflow { +namespace data { +namespace { +using namespace std; +using namespace tdt; + +const static int kMaxDeviceId = 7; +const static uint32_t kMaxValue = 128; +// total memory usage controlled below 2G +const uint64_t kTotalBytes = 2147483648; +std::atomic tdt_release(false); + +using InitFunc = int (*)(uint32_t); +using PushDataFunc = int (*)(const string &, const vector &); +using DestroyFunc = int (*)(); + +class HostQueueDatasetOp : public DatasetOpKernel { + public: + explicit HostQueueDatasetOp(OpKernelConstruction* ctx) + : DatasetOpKernel(ctx) { + // ctx is not nullptr + OP_REQUIRES_OK(ctx, ctx->GetAttr("channel_name", &channel_name_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_)); + LOG(INFO) << "Start to init tdt."; + string lib_path = "libdatatransfer.so"; + handle_ = dlopen(lib_path.c_str(), RTLD_NOW); + OP_REQUIRES(ctx, handle_ != nullptr, + errors::InvalidArgument("libdatatransfer.so dlopen failed.")); + init_api_ = (InitFunc)dlsym(handle_, "TdtHostInit"); + push_api_ = (PushDataFunc)dlsym(handle_, "TdtHostPushData"); + destroy_api_ = (DestroyFunc)dlsym(handle_, "TdtHostDestroy"); + OP_REQUIRES(ctx, init_api_ != nullptr && push_api_ != nullptr && + destroy_api_ != nullptr, errors::InvalidArgument("dlsym tdt API failed.")); + int64 id = -1; + OP_REQUIRES_OK(ctx, ReadInt64FromEnvVar("DEVICE_ID", -1, &id)); + + OP_REQUIRES(ctx, id >= 0 && id <= kMaxDeviceId, + errors::InvalidArgument("device_id should be in [0, 7].")); + uint32_t u_id = (uint32_t)id; + int32_t tdt_status = (*init_api_)(u_id); + OP_REQUIRES(ctx, tdt_status == 0, + errors::InvalidArgument("Tdt client init failed.")); + tdt_release = false; + } + ~HostQueueDatasetOp() { + LOG(INFO) << "Start to destroy tdt."; + if (!tdt_release) { + int32_t tdt_status = (*destroy_api_)(); + if (tdt_status != 0) { + LOG(ERROR) << "Tdt client close failed."; + } else { + LOG(INFO) << "Tdt client close success."; + tdt_release = true; + } + } + if (handle_ != nullptr) { + dlclose(handle_); + LOG(INFO) << "dlclose handle finish."; + } else { + LOG(INFO) << "handle is null."; + } + } + void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override { + std::vector inputs; + CHECK_NOT_NULL(output); + for (int i = 0; i < ctx->num_inputs(); ++i) { + DatasetBase* input = nullptr; + OP_REQUIRES_OK(ctx, GetDatasetFromVariantTensor(ctx->input(i), &input)); + inputs.push_back(input); + } + *output = new (nothrow) Dataset(ctx, this, inputs, channel_name_, output_types_, output_shapes_); + OP_REQUIRES( + ctx, *output != nullptr, + errors::InvalidArgument( + "Data process host queue dataset op: new dataset failed.")); + } + + private: + class Dataset : public DatasetBase { + public: + Dataset(OpKernelContext* ctx, HostQueueDatasetOp* op_kernel, const std::vector& inputs, + const string& channelName, const DataTypeVector& outputTypes, + const vector& outputShapes) + : DatasetBase(DatasetContext(ctx)), + op_kernel_(op_kernel), + inputs_(inputs), + channel_name_(channelName), + output_types_(outputTypes), + output_shapes_(outputShapes) { + for (const auto& input : inputs_) { + input->Ref(); + } + } + + ~Dataset() override { + for (const auto& input : inputs_) { + input->Unref(); + } + } + + HostQueueDatasetOp* kernel() const { + return op_kernel_; + } + + unique_ptr MakeIteratorInternal( + const string& prefix) const override { + return unique_ptr(new (nothrow) Iterator( + {this, strings::StrCat(prefix, "::HostQueue")})); + } + + const DataTypeVector& output_dtypes() const override { + return output_types_; + } + const vector& output_shapes() const override { + return output_shapes_; + } + + string DebugString() const override { + return "HostQueueDatasetOp::Dataset"; + } + + protected: + Status AsGraphDefInternal(SerializationContext* ctx, + DatasetGraphDefBuilder* b, + Node** output) const override { + return Status::OK(); + } + + private: + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params) {} + + ~Iterator() override { + { + mutex_lock lck(mu_); + finish_send_ = true; + } + // wait for tdt destory for sleeping one second + sleep(1); + { + mutex_lock lck(mu_); + cancelled_ = true; + cond_var_.notify_all(); + } + LOG(INFO) << "HostQueueDatasetOp's iterator is released."; + } + + void GetDataThread(const std::shared_ptr& ctx) { + RecordStart(ctx.get()); + auto cleanup = gtl::MakeCleanup([this, ctx] { RecordStop(ctx.get()); }); + while (true) { + { + mutex_lock lck(mu_); + while (!cancelled_ && (buffer_.size() >= kMaxValue || + total_bytes_ > kTotalBytes)) { + RecordStop(ctx.get()); + cond_var_.wait(lck); + RecordStart(ctx.get()); + } + + if (cancelled_) { + return; + } + } + + mutex_lock parent_l(parent_mu_); + vector args; + bool end_of_sequence = false; + BufferElement buffer_element; + buffer_element.status = + input_impls_[1]->GetNext(ctx.get(), &args, &end_of_sequence); + + if (!buffer_element.status.ok() || + (buffer_element.status.ok() && end_of_sequence)) { + if (!buffer_element.status.ok()) { + LOG(ERROR) << "Failed to get tensor data, Status:" + << buffer_element.status.ToString(); + } else { + LOG(INFO) << "Finish to get tensor data, Status:" + << buffer_element.status.ToString() + << "; end_of_sequence:" << end_of_sequence; + } + mutex_lock lck(mu_); + buffer_element.host_thread_finished = true; + buffer_.push_back(std::move(buffer_element)); + cond_var_.notify_all(); + return; + } + + { + mutex_lock lck(mu_); + for (auto& tensor : args) { + if (tensor.TotalBytes() > UINT64_MAX - total_bytes_) { + LOG(ERROR) << "the size of tensor is too big"; + buffer_element.host_thread_finished = true; + buffer_.push_back(std::move(buffer_element)); + cond_var_.notify_all(); + return; + } + total_bytes_ += tensor.TotalBytes(); + } + buffer_element.value = args; + buffer_.push_back(std::move(buffer_element)); + cond_var_.notify_all(); + } + } + } + void SendDataThread(const std::shared_ptr& ctx) { + vector args; + while (!cancelled_) { + { + mutex_lock lck(mu_); + if (buffer_.empty()) { + RecordStop(ctx.get()); + cond_var_.wait(lck); + RecordStart(ctx.get()); + } + } + + { + mutex_lock l(mu_); + if (buffer_.front().host_thread_finished) { + std::vector items; + DataItem end_item; + if (buffer_.front().status.ok()) { + end_item.dataType_ = TDT_END_OF_SEQUENCE; + LOG(INFO) << "Push data finish, end_of_sequence_ is true."; + } else { + end_item.dataType_ = TDT_ABNORMAL; + LOG(ERROR) << "Get data failed."; + } + items.emplace_back(end_item); + int32_t tdt_status = + (*(dataset()->kernel()->push_api_))(dataset()->channel_name_, items); + if (tdt_status != 0) { + LOG(ERROR) << "Push the end data to tdt failed."; + } + cancelled_ = true; + cond_var_.notify_all(); + return; + } else { + args = buffer_.front().value; + } + } + + string value; + uint64_t total_bytes = 0; + std::vector items; + for (auto& tensor : args) { + DataItem data_item; + data_item.dataType_ = TDT_TENSOR; + data_item.tensorShape_ = tensor.shape().DebugString(); + data_item.tensorType_ = DataTypeString(tensor.dtype()); + + if (DataTypeCanUseMemcpy(tensor.dtype())) { + data_item.dataLen_ = tensor.tensor_data().size(); + data_item.dataPtr_ = std::shared_ptr( + const_cast(tensor.tensor_data().data()), + [](void* elem) {}); + } else if (tensor.dtype() == DT_STRING) { + if (tensor.dims() != 0) { + LOG(ERROR) << "input of DT_STRING type should be scalar," + " current dims:" << tensor.dims(); + mutex_lock lck(mu_); + cancelled_ = true; + cond_var_.notify_all(); + return; + } + value = tensor.scalar()(); + data_item.dataLen_ = value.size(); + data_item.dataPtr_ = std::shared_ptr( + const_cast(value.data()), [](void* elem) {}); + } else { + LOG(ERROR) << "Unexpected data type."; + mutex_lock lck(mu_); + cancelled_ = true; + cond_var_.notify_all(); + return; + } + items.push_back(data_item); + // total_bytes is smaller than total_bytes_ + total_bytes += tensor.TotalBytes(); + } + // call tdt interface + int32_t tdt_status = (*(dataset()->kernel()->push_api_))(dataset()->channel_name_, items); + if (tdt_status != 0 || cancelled_ || finish_send_) { + mutex_lock lck(mu_); + cancelled_ = true; + LOG(INFO) << "End training."; + cond_var_.notify_all(); + return; + } + mutex_lock lck(mu_); + buffer_.pop_front(); + // total_bytes is smaller than total_bytes_ + total_bytes_ -= total_bytes; + cond_var_.notify_all(); + } + } + + Status EnsureReceiveThreadStarted(IteratorContext* ctx) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + // ctx is not nullptr + if (!receive_thread_) { + std::shared_ptr new_ctx(new (std::nothrow) IteratorContext(*ctx)); + REQUIRES_NOT_NULL(new_ctx); + REQUIRES_NOT_NULL(ctx->env()); + receive_thread_.reset(ctx->env()->StartThread( + {}, "receive_thread", + [this, new_ctx]() { GetDataThread(new_ctx); })); + } + return Status::OK(); + } + + Status EnsureSendThreadStarted(IteratorContext* ctx) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + if (!send_thread_) { + std::shared_ptr new_ctx(new (std::nothrow) IteratorContext(*ctx)); + REQUIRES_NOT_NULL(new_ctx); + REQUIRES_NOT_NULL(ctx->env()); + send_thread_.reset(ctx->env()->StartThread( + {}, "send_thread", + [this, new_ctx]() { SendDataThread(new_ctx); })); + } + return Status::OK(); + } + + Status Initialize(IteratorContext* ctx) override { + LOG(INFO) << "Start to check channel name. channelName: " + << dataset()->channel_name_; + if (dataset()->channel_name_.empty()) { + return errors::InvalidArgument( + "HostQueueDataset channel_name is null."); + } + + LOG(INFO) << "Start to check receive and send thread."; + try { + input_impls_.resize(dataset()->inputs_.size()); + } catch (...) { + return errors::InvalidArgument( + "HostQueueDataset resize failed."); + } + + for (size_t i = 0; i < input_impls_.size(); ++i) { + TF_RETURN_IF_ERROR(dataset()->inputs_[i]->MakeIterator( + ctx, strings::StrCat(prefix(), "[", i, "]"), &input_impls_[i])); + } + { + mutex_lock lck(mu_); + TF_RETURN_IF_ERROR(EnsureReceiveThreadStarted(ctx)); + TF_RETURN_IF_ERROR(EnsureSendThreadStarted(ctx)); + } + + LOG(INFO) << "HostQueue success to Initialize. channelName: " << dataset()->channel_name_; + return Status::OK(); + } + + Status GetNextInternal(IteratorContext* ctx, vector* outTensors, + bool* endOfSequence) override { + *endOfSequence = false; + LOG(INFO) << "HostQueue Get Next data."; + return Status::OK(); + } + + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + return Status::OK(); + } + + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override { + return Status::OK(); + } + + private: + struct BufferElement { + bool host_thread_finished = false; + Status status; + vector value; + }; + // This mutex is used to ensure exclusivity between multiple threads + // reading/writing this iterator's local state. + mutex mu_; + // This mutex is used to ensure exclusivity between multiple threads + // accessing the parent iterator. We keep this separate from `mu_` to + // allow prefetching to run in parallel with GetNext calls. + mutex parent_mu_ ACQUIRED_BEFORE(mu_); + std::vector> input_impls_ GUARDED_BY(mu_); + condition_variable cond_var_; + string prefix_end_; + std::deque buffer_ GUARDED_BY(mu_); + std::unique_ptr receive_thread_ GUARDED_BY(mu_); + std::unique_ptr send_thread_ GUARDED_BY(mu_); + bool cancelled_ GUARDED_BY(mu_) = false; + bool finish_send_ GUARDED_BY(mu_) = false; + bool host_thread_finished_ GUARDED_BY(mu_) = false; + uint64_t total_bytes_ GUARDED_BY(mu_) = 0; + }; + HostQueueDatasetOp* op_kernel_; + const std::vector inputs_; + std::string channel_name_; + const DataTypeVector output_types_; + const vector output_shapes_; + }; + std::string channel_name_; + DataTypeVector output_types_; + vector output_shapes_; + void* handle_; + InitFunc init_api_; + PushDataFunc push_api_; + DestroyFunc destroy_api_; +}; + +REGISTER_KERNEL_BUILDER(Name("HostQueueDataset").Device(DEVICE_CPU), + HostQueueDatasetOp); +} // namespace +} // namespace data +} // namespace tensorflow diff --git a/tf_adapter/kernels/infeed_outfeed_ops.cc b/tf_adapter/kernels/infeed_outfeed_ops.cc new file mode 100644 index 000000000..41884cf1f --- /dev/null +++ b/tf_adapter/kernels/infeed_outfeed_ops.cc @@ -0,0 +1,178 @@ +/** +* Copyright (C) <2019> . All Rights Reserved. +* Description : Outfeed Enqueue and Outfeed Dequeue +*/ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/macros.h" +#include "tdt/tdt_host_interface.h" +#include "tensorflow/core/framework/types.h" +#include "tf_adapter/common/common.h" +#include +#include "securec.h" + +namespace tensorflow { +namespace { +Status GetTensorShape(const string &tensor_shape, TensorShape &shape) { + // change "[32,224,224,3]" => "32,224,224,3" + // tensor_shape.size() - 2 is the second to last + string str = tensor_shape.substr(1, tensor_shape.size() - 2); + string::size_type index = 0; + if (!str.empty()) { + while ((index = str.find(' ', index)) != string::npos) { + str.erase(index, 1); + } + } + string split = ","; + string::size_type pos1, pos2; + pos2 = str.find(split); + pos1 = 0; + while (pos2 != string::npos) { + try { + shape.AddDim(std::stoi(str.substr(pos1, pos2 - pos1))); + } catch (...) { + return errors::InvalidArgument("Invalid argument or something else : ", str.substr(pos1, pos2 - pos1)); + } + // string::size_type can store the length of any string object + pos1 = pos2 + split.size(); + pos2 = str.find(split, pos1); + } + if (pos1 != str.length()) { + try { + shape.AddDim(std::stoi(str.substr(pos1))); + } catch (...) { + return errors::InvalidArgument("Invalid argument or something else : ", str.substr(pos1)); + } + } + return Status::OK(); +} + +Status ConvertDataItem2Tensor(const std::vector &items, + std::vector &tensors) { + for (auto &item : items) { + if (item.dataType_ == tdt::TDT_END_OF_SEQUENCE) { + LOG(INFO) << "End of processing."; + return Status::OK(); + } + DataType type; + DataTypeFromString(item.tensorType_, &type); + if (type == DT_STRING) { + Tensor result_tensor(tensorflow::DT_STRING, TensorShape( {})); + std::shared_ptr data_str_ptr = std::static_pointer_cast(item.dataPtr_); + result_tensor.scalar()() = std::move(string( + reinterpret_cast(data_str_ptr->c_str()), item.dataLen_)); + tensors.emplace_back(std::move(result_tensor)); + } else if (DataTypeCanUseMemcpy(type)) { + TensorShape tensorShape; + Status s = GetTensorShape(item.tensorShape_, tensorShape); + if (!s.ok()) { + return s; + } + Tensor result_tensor = Tensor(type, tensorShape); + std::shared_ptr data_str_ptr = std::static_pointer_cast(item.dataPtr_); + errno_t ret = memcpy_s(const_cast(result_tensor.tensor_data().data()), + result_tensor.tensor_data().size(), data_str_ptr->c_str(), item.dataLen_); + if (ret != EOK) { + return errors::Unknown("memcpy failed"); + } + tensors.emplace_back(std::move(result_tensor)); + } else { + return errors::InvalidArgument("Not support this type: ", type); + } + } + return Status::OK(); +} + +class OutfeedEnqueueOp : public OpKernel { + public: + explicit OutfeedEnqueueOp(OpKernelConstruction *ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("channel_name", &channel_name_)); + LOG(INFO) << "OutfeedEnqueueOp built"; + } + ~OutfeedEnqueueOp() { + LOG(INFO) << "OutfeedEnqueueOp has been destructed"; + } + void Compute(OpKernelContext *ctx) override { + LOG(INFO) << "OutfeedEnqueueOp running"; + } + bool IsExpensive() override { return false; } + private: + std::string channel_name_; +}; + +class OutfeedDequeueOp : public OpKernel { + public: + explicit OutfeedDequeueOp(OpKernelConstruction *ctx) : OpKernel(ctx) { + // ctx is not nullptr + OP_REQUIRES_OK(ctx, ctx->GetAttr("channel_name", &channel_name_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_)); + OP_REQUIRES(ctx, tdt::TdtHostPreparePopData() == 0, + errors::Internal("Prepare Pop Data failed")); + LOG(INFO) << "OutfeedDequeueOp built"; + } + ~OutfeedDequeueOp() { + LOG(INFO) << "OutfeedDequeueOp has been destructed"; + } + void Compute(OpKernelContext *ctx) override { + CHECK_NOT_NULL(ctx); + std::vector bundle; + OP_REQUIRES(ctx, tdt::TdtHostPopData(channel_name_, bundle) == 0, + errors::Internal("TdtHostPopData get data failed")); + std::vector out_tensors; + OP_REQUIRES_OK(ctx, ConvertDataItem2Tensor(bundle, out_tensors)); + OP_REQUIRES(ctx, out_tensors.size() > 0, errors::OutOfRange("Outfeed tensors reach the end")); + OP_REQUIRES(ctx, out_tensors.size() == output_shapes_.size(), + errors::Internal("Outfeed tensors num mistmatch", out_tensors.size(), + "vs. expect", output_shapes_.size())); + for (int i = 0; i < ctx->num_outputs(); ++i) { + ctx->set_output(i, out_tensors[i]); + } + } + bool IsExpensive() override { return false; } + private: + DataTypeVector output_types_; + std::vector output_shapes_; + std::string channel_name_; +}; + +class StopOutfeedDequeueOp : public OpKernel { + public: + explicit StopOutfeedDequeueOp(OpKernelConstruction *ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("channel_name", &channel_name_)); + LOG(INFO) << "StopOutfeedDequeueOp built"; + } + ~StopOutfeedDequeueOp() { + LOG(INFO) << "StopOutfeedDequeueOp has been destructed"; + } + void Compute(OpKernelContext *ctx) override { + LOG(INFO) << "StopOutfeedDequeueOp running"; + OP_REQUIRES(ctx, tdt::TdtHostStop(channel_name_) == 0, + errors::Internal("TdtHostStop failed")); + } + bool IsExpensive() override { return false; } + private: + std::string channel_name_; +}; + +REGISTER_KERNEL_BUILDER(Name("OutfeedDequeueOp") +. +Device(DEVICE_CPU), + OutfeedDequeueOp +); + +REGISTER_KERNEL_BUILDER(Name("OutfeedEnqueueOp") +. +Device(DEVICE_CPU), + OutfeedEnqueueOp +); + +REGISTER_KERNEL_BUILDER(Name("StopOutfeedDequeueOp") +. +Device(DEVICE_CPU), + StopOutfeedDequeueOp +); +} // namespace +} // namespace tensorflow diff --git a/tf_adapter/kernels/lars_ops.cc b/tf_adapter/kernels/lars_ops.cc new file mode 100644 index 000000000..90e7e5af2 --- /dev/null +++ b/tf_adapter/kernels/lars_ops.cc @@ -0,0 +1,88 @@ +/** +* Copyright (C) <2019> . All Rights Reserved. +* Description : implememt of lars +*/ + +#ifndef TENSORFLOW_CORE_KERNELS_LARS_OP_H_ +#define TENSORFLOW_CORE_KERNELS_LARS_OP_H_ + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/bounds_check.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/macros.h" + +namespace tensorflow { +template +class LarsOp : public OpKernel { + public: + + explicit LarsOp(OpKernelConstruction *context) : OpKernel(context) { + LOG(INFO) << "new LarsOp"; + } + ~LarsOp() { + LOG(INFO) << "del LarsOp"; + } + + void Compute(OpKernelContext *context) override { + int input_num = num_inputs(); + LOG(INFO) << "LarsOp: input num " << input_num; + input_num = ((input_num - 1) / 2); + + for (int j = 0; j < input_num; j++) { + // Grab the w_input tensor + const Tensor &w_tensor = context->input(j); + auto w_input = w_tensor.flat(); + + const Tensor &g_tensor = context->input(j + input_num); + auto g_input = g_tensor.flat(); + + // Create an output tensor + Tensor *output_tensor = NULL; + OP_REQUIRES_OK(context, context->allocate_output(j, w_tensor.shape(), &output_tensor)); + // handle any data type for w_input and output + auto output_flat = output_tensor->flat(); + + // Set the value of each element + const int N = w_input.size(); + LOG(INFO) << "LarsOp idx " << j << ", data num " << N; + + auto sum_w = w_input(0); + auto sum_g = g_input(0); + for (int i = 1; i < N; i++) { + auto w = w_input(i); + sum_w += w; + LOG(INFO) << "LarsOp w " << w << ", sum_w " << sum_w; + + auto g = g_input(i); + sum_g += g; + LOG(INFO) << "LarsOp g " << g << ", sum_g " << sum_g; + } + + auto w_norm = sqrt(sum_w); + auto g_norm = sqrt(sum_g); + auto b = g_norm + w_norm + T(0.00001); + + for (int i = 1; i < N; i++) { + auto w = w_input(i); + auto g = g_input(i); + output_flat(i) = b * (g + w); + } + } + + LOG(INFO) << "in LarsOp"; + } + bool IsExpensive() override { return false; } +}; + +REGISTER_KERNEL_BUILDER(Name("LARS") +. +Device(DEVICE_CPU) +.TypeConstraint("T"), LarsOp); +} // namespace tensorflow +#endif // TENSORFLOW_CORE_KERNELS_LARS_OP_H_ diff --git a/tf_adapter/kernels/lars_v2_op.cc b/tf_adapter/kernels/lars_v2_op.cc new file mode 100644 index 000000000..d52109b27 --- /dev/null +++ b/tf_adapter/kernels/lars_v2_op.cc @@ -0,0 +1,70 @@ +/** +* Copyright (C) <2019> . All Rights Reserved. +* Description : implememt of larsV2 +*/ + +#include "tensorflow/core/framework/op_kernel.h" + +namespace tensorflow +{ +template +class LarsV2OP : public OpKernel { +public: + explicit LarsV2OP(OpKernelConstruction* context) : OpKernel(context) { + LOG(INFO) << "new LarsV2OP"; + } + ~LarsV2OP() { + LOG(INFO) << "del LarsV2OP"; + } + + void Compute(OpKernelContext* context) override { + LOG(INFO) << "LarsV2OP Compute, num_inputs: " << context->num_inputs(); + + // Grab the w_input tensor + const Tensor& w_tensor = context->input(0); + auto w_input = w_tensor.flat(); + + const Tensor& g_tensor = context->input(1); + auto g_input = g_tensor.flat(); + + // Create an output tensor + Tensor* output_tensor = NULL; + OP_REQUIRES_OK(context, context->allocate_output(0, + w_tensor.shape(), + &output_tensor)); + // handle any data type for w_input and output + auto output_flat = output_tensor->flat(); + + // Set the value of each element + const int N = w_input.size(); + + auto sum_w = w_input(0); + auto sum_g = g_input(0); + for (int i = 1; i < N; i++) { + auto w = w_input(i); + sum_w += w; + LOG(INFO) << "LarsV2OP w " << w << ", sum_w " << sum_w; + + auto g = g_input(i); + sum_g += g; + LOG(INFO) << "LarsV2OP g " << g << ", sum_g " << sum_g; + } + + auto w_norm = sqrt(sum_w); + auto g_norm = sqrt(sum_g); + auto b = g_norm + w_norm + T(0.00001); + + for (int i = 1; i < N; i++) { + auto w = w_input(i); + auto g = g_input(i); + output_flat(i) = b * (g + w); + } + } + bool IsExpensive() override { return false; } +}; + + +REGISTER_KERNEL_BUILDER(Name("LarsV2").Device(DEVICE_CPU).TypeConstraint("T"), LarsV2OP); + +} // namespace tensorflow + diff --git a/tf_adapter/kernels/log_time_stamp_ops.cc b/tf_adapter/kernels/log_time_stamp_ops.cc new file mode 100644 index 000000000..976e0cb87 --- /dev/null +++ b/tf_adapter/kernels/log_time_stamp_ops.cc @@ -0,0 +1,36 @@ +/** +* Copyright (C) <2019> . All Rights Reserved. +* Description : implememt of log time stamp +*/ + +#ifndef TENSORFLOW_CORE_KERNELS_LogTimeStamp_OP_H_ +#define TENSORFLOW_CORE_KERNELS_LogTimeStamp_OP_H_ +#if HISI_OFFLINE + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/macros.h" + +namespace tensorflow +{ + class LogTimeStampOP : public OpKernel { + public: + explicit LogTimeStampOP(OpKernelConstruction* ctx) : OpKernel(ctx) { + LOG(INFO) << "new LogTimeStampOP"; + } + ~LogTimeStampOP() { + LOG(INFO) << "del LogTimeStampOP"; + } + void Compute(OpKernelContext* ctx) override { + LOG(INFO) << "in LogTimeStampOP"; + } + bool IsExpensive() override { return false; } + }; + + REGISTER_KERNEL_BUILDER(Name("LogTimeStamp").Device(DEVICE_CPU), + LogTimeStampOP); +} // namespace tensorflow + +#endif // HISI_OFFLINE + +#endif // TENSORFLOW_CORE_KERNELS_LogTimeStamp_OP_H_ diff --git a/tf_adapter/kernels/maxpooling_op.cc b/tf_adapter/kernels/maxpooling_op.cc new file mode 100644 index 000000000..a8e1f5439 --- /dev/null +++ b/tf_adapter/kernels/maxpooling_op.cc @@ -0,0 +1,38 @@ +/** +* Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. +* Description: Max pooling grad with argmax +*/ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/framework/types.h" + +namespace tensorflow { +namespace { + +class MaxPoolingGradGradWithArgmaxOp : public OpKernel { + public: + explicit MaxPoolingGradGradWithArgmaxOp(OpKernelConstruction *ctx) : OpKernel(ctx) { + LOG(INFO) << "MaxPoolingGradGradWithArgmaxOp built"; + } + ~MaxPoolingGradGradWithArgmaxOp() { + LOG(INFO) << "MaxPoolingGradGradWithArgmaxOp has been destructed"; + } + void Compute(OpKernelContext *ctx) override { + LOG(INFO) << "[ATTENTION] MaxPoolingGradGradWithArgmaxOp can not run on cpu, \ + only running on npu, please open use_off_line "; + } + bool IsExpensive() override { return false; } +}; + +REGISTER_KERNEL_BUILDER(Name("MaxPoolGradGradWithArgmax") +. +Device(DEVICE_CPU), + MaxPoolingGradGradWithArgmaxOp +); + +} + +} diff --git a/tf_adapter/kernels/npu_cpu_ops.cc b/tf_adapter/kernels/npu_cpu_ops.cc new file mode 100644 index 000000000..5150b60ef --- /dev/null +++ b/tf_adapter/kernels/npu_cpu_ops.cc @@ -0,0 +1,27 @@ +/************************************************************************* + * Copyright (c) Huawei Technologies Co., Ltd. 2012-2018. All rights reserved. + * Description: + * + * See LICENSE.txt for license information + ************************************************************************/ +#include "tensorflow/core/framework/bounds_check.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/lib/random/random.h" +#include "tensorflow/core/lib/random/philox_random.h" +#include "tensorflow/core/lib/random/simple_philox.h" + +namespace tensorflow { +class EmbeddingRankIdOpKernel : public OpKernel { +public: + explicit EmbeddingRankIdOpKernel(OpKernelConstruction* context) : OpKernel(context) {} + ~EmbeddingRankIdOpKernel() {} + void Compute(OpKernelContext* context) override + { + LOG(INFO) << "EmbeddingRankIdOp Compute."; + } +}; +REGISTER_KERNEL_BUILDER(Name("EmbeddingRankId").Device(DEVICE_CPU), + EmbeddingRankIdOpKernel); +} \ No newline at end of file diff --git a/tf_adapter/kernels/npu_mixed_precesion_ops.cc b/tf_adapter/kernels/npu_mixed_precesion_ops.cc new file mode 100644 index 000000000..6631142a5 --- /dev/null +++ b/tf_adapter/kernels/npu_mixed_precesion_ops.cc @@ -0,0 +1,109 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/bounds_check.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { +// Mixed-precesions training +class NpuAllocFloatStatusOp : public tensorflow::OpKernel { + public: + explicit NpuAllocFloatStatusOp(tensorflow::OpKernelConstruction *context) + : OpKernel(context) {} + ~NpuAllocFloatStatusOp() {} + void Compute(tensorflow::OpKernelContext *context) override { + // Create an output tensor + Tensor *output_tensor = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape({1}), + &output_tensor)); + // Set the float_status tensor to be 0 + auto flat = output_tensor->flat(); + flat(0) = 0.0; + } +}; + +REGISTER_KERNEL_BUILDER( + Name("NpuAllocFloatStatus") +. +Device(tensorflow::DEVICE_CPU), + NpuAllocFloatStatusOp +); + +class NpuGetFloatStatusOp : public tensorflow::OpKernel { + public: + explicit NpuGetFloatStatusOp(tensorflow::OpKernelConstruction *context) + : OpKernel(context) {} + ~NpuGetFloatStatusOp() {} + void Compute(tensorflow::OpKernelContext *context) override { + // Grab the input tensor + const Tensor &input_tensor = context->input(0); + + // Create an output tensor + Tensor *output_tensor = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), + &output_tensor)); + // Set the float_status tensor to be 0 + auto flat = output_tensor->flat(); + // For testing + flat(0) = 0.0; + } +}; + +REGISTER_KERNEL_BUILDER( + Name("NpuGetFloatStatus") +. +Device(tensorflow::DEVICE_CPU), + NpuGetFloatStatusOp +); + +class NpuClearFloatStatusOp : public tensorflow::OpKernel { + public: + explicit NpuClearFloatStatusOp(tensorflow::OpKernelConstruction *context) + : OpKernel(context) {} + ~NpuClearFloatStatusOp() {} + void Compute(tensorflow::OpKernelContext *context) override { + // Grab the input tensor + const Tensor &input_tensor = context->input(0); + auto input = input_tensor.flat(); + + // Create an output tensor + Tensor *output_tensor = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), + &output_tensor)); + + // Clear the status + auto flat = output_tensor->flat(); + // For testing + for (int i = 0; i < input.size(); i++) { + flat(i) = 0.0; + } + } +}; + +REGISTER_KERNEL_BUILDER( + Name("NpuClearFloatStatus") +. +Device(tensorflow::DEVICE_CPU), + NpuClearFloatStatusOp +); +} // namespace tensorflow + diff --git a/tf_adapter/kernels/npu_ops.cc b/tf_adapter/kernels/npu_ops.cc new file mode 100644 index 000000000..a162d7f49 --- /dev/null +++ b/tf_adapter/kernels/npu_ops.cc @@ -0,0 +1,41 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/macros.h" + +namespace tensorflow { +namespace { +class NPUTestOP : public OpKernel { + public: + explicit NPUTestOP(OpKernelConstruction *ctx) : OpKernel(ctx) { + } + ~NPUTestOP() { + } + void Compute(OpKernelContext *ctx) override { + } + bool IsExpensive() override { return false; } +}; + +REGISTER_KERNEL_BUILDER(Name("NPUTest") +. +Device(DEVICE_CPU), + NPUTestOP +); +} // namespace +} // namespace tensorflow diff --git a/tf_adapter/kernels/npu_sys_ctl_ops.cc b/tf_adapter/kernels/npu_sys_ctl_ops.cc new file mode 100644 index 000000000..4488b6bec --- /dev/null +++ b/tf_adapter/kernels/npu_sys_ctl_ops.cc @@ -0,0 +1,144 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_NPU_SYS_CTL_OPS_H_ +#define TENSORFLOW_NPU_SYS_CTL_OPS_H_ + +#include +#include +#include +#include + +#include "tf_adapter/kernels/geop_npu.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/platform/mutex.h" +#include "ge/ge_api.h" +#include "ge/ge_api_types.h" +#include "framework/common/ge_inner_error_codes.h" +#include "framework/common/fmk_error_codes.h" +#include "hccl/hcom.h" +#include "tdt/tsd_client.h" +#include "tdt/tdt_host_interface.h" +#include "framework/omg/parser/parser_api.h" +#include "tf_adapter/util/npu_attrs.h" +#include "tf_adapter/common/common.h" +#include "tf_adapter/util/ge_plugin.h" + +using namespace tdt; + +namespace tensorflow { +inline string ToString(ge::Status status) { + return ::ge::StatusFactory::Instance()->GetErrDesc(status); +} + +static int64 GetCurrentTimestap() { + struct timeval tv; + int ret = gettimeofday(&tv, nullptr); + if (ret != 0) { + LOG(ERROR) << "Func gettimeofday may failed, ret:" << ret; + return 0; + } + int64 totalUsec = tv.tv_usec + tv.tv_sec * 1000000; + return totalUsec; +} +static mutex g_mu(LINKER_INITIALIZED); +static int g_npuInitNum = 0; + +static const int64 kMicrosToMillis = 1000; + +class NPUInit : public OpKernel { + public: + explicit NPUInit(OpKernelConstruction *ctx); + void Compute(OpKernelContext *ctx) override; + ~NPUInit(); + private: + std::map init_options_; +}; + +NPUInit::NPUInit(OpKernelConstruction *ctx) : OpKernel(ctx) { + LOG(INFO) << "NPUInit"; + mutex_lock lock{g_mu}; + g_npuInitNum++; + string sess_config = ""; + Status s = ctx->GetAttr("_NpuOptimizer", &sess_config); + if (s.ok()) { + init_options_ = NpuAttrs::GetInitOptions(ctx); + } else { + LOG(INFO) << "[NPUInit] NPUInit can not get _NpuOptimizer attr, use default init options"; + init_options_ = NpuAttrs::GetDefaultInitOptions(); + } + +} +void NPUInit::Compute(OpKernelContext *ctx) { + if (GePlugin::GetInstance()->IsGlobal()) { + LOG(INFO) << "[NPUInit] GePlugin global, skip GePlugin init"; + return; + } + GePlugin::GetInstance()->Init(init_options_); + LOG(INFO) << "[NPUInit] GePlugin init success"; +} + +NPUInit::~NPUInit() { + LOG(INFO) << "[~NPUInit] NPUInit destructed"; + int64 unInitStartTime = GetCurrentTimestap(); + { + mutex_lock lock{g_mu}; + if (g_npuInitNum > 0) { + g_npuInitNum--; + } + if (g_npuInitNum != 0) { + int64 unInitEndTime = GetCurrentTimestap(); + LOG(INFO) << "[~NPUInit] NPU Shutdown success. [" + << ((unInitEndTime - unInitStartTime) / kMicrosToMillis) << " ms]"; + return; + } + } + if (!GePlugin::GetInstance()->IsGlobal()) { + GePlugin::GetInstance()->Finalize(); + LOG(INFO) << "[~NPUInit] GePlugin Finalize success"; + } else { + LOG(INFO) << "[~NPUInit] GePlugin global, skip GePlugin Finalize"; + } + + int64 unInitEndTime = GetCurrentTimestap(); + LOG(INFO) << "[~NPUInit] NPU Shutdown success. [" + << ((unInitEndTime - unInitStartTime) / kMicrosToMillis) << " ms]"; +} + +class NPUShutdown : public OpKernel { + public: + explicit NPUShutdown(OpKernelConstruction *ctx) : OpKernel(ctx) {}; + void Compute(OpKernelContext *ctx) override; + ~NPUShutdown() {}; +}; +void NPUShutdown::Compute(OpKernelContext *ctx) { + LOG(INFO) << "[NPUShutdown] NPUShutdown Compute"; + { + mutex_lock lock{g_mu}; + g_npuInitNum = 0; + } + if (!GePlugin::GetInstance()->IsGlobal()) { + GePlugin::GetInstance()->Finalize(); + LOG(INFO) << "[~NPUShutdown] GePlugin Finalize success"; + } else { + LOG(INFO) << "[~NPUShutdown] GePlugin global, skip GePlugin Finalize"; + } +} + +REGISTER_KERNEL_BUILDER(Name("NPUInit").Device(DEVICE_CPU), NPUInit); +REGISTER_KERNEL_BUILDER(Name("NPUShutdown").Device(DEVICE_CPU), NPUShutdown); +} // namespace tensorflow +#endif // TENSORFLOW_NPU_SYS_CTL_OPS_H_ diff --git a/tf_adapter/kernels/npu_unary_ops.cc b/tf_adapter/kernels/npu_unary_ops.cc new file mode 100644 index 000000000..a11772107 --- /dev/null +++ b/tf_adapter/kernels/npu_unary_ops.cc @@ -0,0 +1,159 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/bounds_check.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/logging.h" +#include "tf_adapter/common/common.h" + +namespace tensorflow { +template +class GeluOp : public tensorflow::OpKernel { + public: + explicit GeluOp(tensorflow::OpKernelConstruction *context) + : OpKernel(context) {} + ~GeluOp() {} + void Compute(tensorflow::OpKernelContext *context) override { + // Grab the input tensor + CHECK_NOT_NULL(context); + const Tensor &input_tensor = context->input(0); + auto input = input_tensor.flat(); + + // Create an output tensor + Tensor *output_tensor = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), + &output_tensor)); + // handle any data type for input and output + auto output_flat = output_tensor->flat(); + + // Set the value of each element + const int N = input.size(); + auto sqrt2overPI = T(sqrt(2 / M_PI)); + // investigate if we can optimize using other TF ops or using MKLDNN or Eigen + for (int i = 0; i < N; i++) { + auto x = input(i); + + auto cdf = T(0.5) * (T(1.0) + tanh( + sqrt2overPI * (x + T(0.044715) * pow(x, T(3))))); + auto y = x * cdf; + + output_flat(i) = y; + } + } +}; + +REGISTER_KERNEL_BUILDER( + Name("Gelu") +. +Device(tensorflow::DEVICE_CPU) +.TypeConstraint("T"), +GeluOp); + +REGISTER_KERNEL_BUILDER( + Name("Gelu") +. +Device(tensorflow::DEVICE_CPU) +.TypeConstraint("T"), +GeluOp); + +REGISTER_KERNEL_BUILDER( + Name("Gelu") +. +Device(tensorflow::DEVICE_CPU) +.TypeConstraint("T"), +GeluOp); + +template +class GeluGradOp : public tensorflow::OpKernel { + public: + explicit GeluGradOp(tensorflow::OpKernelConstruction *context) + : OpKernel(context) {} + ~GeluGradOp() {} + void Compute(tensorflow::OpKernelContext *context) override { + // Grab the grad input tensor + CHECK_NOT_NULL(context); + const Tensor &grad_input_tensor = context->input(0); + auto grad_input = grad_input_tensor.flat(); + + // Grab the input tensor + const Tensor &input_tensor = context->input(1); + auto input = input_tensor.flat(); + + // Grab the gelu output tensor + const Tensor &output_tensor = context->input(2); + auto output = output_tensor.flat(); + + OP_REQUIRES( + context, grad_input.size() == input.size(), + errors::InvalidArgument("grad_input size is not equal input size")); + OP_REQUIRES( + context, grad_input.size() == output.size(), + errors::InvalidArgument("grad_input size is not equal output size")); + + // Create an output tensor + Tensor *grad_output_tensor = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(0, grad_input_tensor.shape(), + &grad_output_tensor)); + // handle any data type for input and output + auto grad_output_flat = grad_output_tensor->flat(); + + // Set the value of each element + const int N = input.size(); + auto sqrt2overPI = T(sqrt(2 / M_PI)); + // investigate if we can optimize using other TF ops or using MKLDNN or Eigen + for (int i = 0; i < N; i++) { + auto dLdy = grad_input(i); + auto x = input(i); + auto gelu_x = (x != T(0)) ? (output(i) / x) : T(0.5); + + auto tanhterm = gelu_x * T(2) - T(1); + auto dydx = gelu_x + T(0.5) * x * (T(1) - pow(tanhterm, T(2))) * + sqrt2overPI * (T(1) + T(3) * T(0.04715) * pow(x, T(2))); + auto dLdx = dLdy * dydx; + + grad_output_flat(i) = dLdx; + } + } +}; + +REGISTER_KERNEL_BUILDER( + Name("GeluGrad") +. +Device(tensorflow::DEVICE_CPU) +.TypeConstraint("T"), +GeluGradOp); + +REGISTER_KERNEL_BUILDER( + Name("GeluGrad") +. +Device(tensorflow::DEVICE_CPU) +.TypeConstraint("T"), +GeluGradOp); + +REGISTER_KERNEL_BUILDER( + Name("GeluGrad") +. +Device(tensorflow::DEVICE_CPU) +.TypeConstraint("T"), +GeluGradOp); +} // namespace tensorflow + + diff --git a/tf_adapter/module.BUILD b/tf_adapter/module.BUILD new file mode 100644 index 000000000..a0a2fb3a9 --- /dev/null +++ b/tf_adapter/module.BUILD @@ -0,0 +1,74 @@ +package(default_visibility = ["//visibility:public"]) +load("@tf_adapter//tf_adapter:tf_adapter.bzl", "cc_proto_library") + +cc_library( + name = "hdrs", + srcs = glob(["**/*.h", "**/*.hpp"]), +) + +cc_library( + name = "foo_tensorflow_hdrs", + hdrs = glob(["include/**/*"], exclude=["include/**/*.so"]), +) + +cc_library( + name = "tf_adapter_extend_hdrs", + srcs = glob([ + "framework/**/*.h", "framework/**/*.hpp", + "graph/**/*.h", "graph/**/*.hpp", + "hccl/**/*.h", "hccl/**/*.hpp", + "tdt/**/*.h", "tdt/**/*.hpp", + "runtime/**/*.h", "runtime/**/*.hpp", + "external/**/*.h", + ]), +) + +cc_library( + name = "tf_adapter_dvpp_hdrs", + srcs = glob(["**/*.h"]), +) + +cc_library( + name = "tf_adapter_host_libs", + srcs = glob([ + "libc_sec.so", + "libge_runner.so", + "libtsdclient.so", + "libdatatransfer.so", + "libfmk_parser.so" + ]), +) + +cc_library( + name = "installed_tensorflow_libs", + srcs = glob([ + ]), +) + +cc_proto_library( + name = "ge_proto", + srcs = ["proto/om.proto", "proto/ge_ir.proto"], +) + +cc_library( + name = "local_nlohmann_json_lib", + hdrs = glob([ + "nlohmann/**/*.hpp", + ]), + visibility = ["//visibility:public"], + alwayslink = 1, +) + +cc_library( + name = "sec_lib", + srcs = glob([ + "**/*.h", + ]), +) + +cc_library( + name = "python_include", + srcs = glob([ + "**/*.h", + ]), +) \ No newline at end of file diff --git a/tf_adapter/ops/dropout_domask_grad.cc b/tf_adapter/ops/dropout_domask_grad.cc new file mode 100644 index 000000000..a86f3751c --- /dev/null +++ b/tf_adapter/ops/dropout_domask_grad.cc @@ -0,0 +1,70 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace tensorflow { + +namespace { +using FDH = FunctionDefHelper; + +// Cwise binary ops +Status GradForBinaryCwise(FunctionDef *g, std::vector body) { + // clang-format off + std::vector nodes = { + {{"sx"}, "Shape", {"x"}}, + {{"sy"}, "Shape", {"y"}}, + }; + nodes.insert(nodes.end(), body.begin(), body.end()); + std::vector reshapes = { + {{"rx", "ry"}, "BroadcastGradientArgs", {"sx", "sy"}}, + {{"sum_gx"}, "Sum", {"gx", "rx"}}, + {{"dx"}, "Reshape", {"sum_gx", "sx"}}, + {{"sum_gy"}, "Sum", {"gy", "ry"}}, + {{"dy"}, "Reshape", {"sum_gy", "sy"}}, + }; + nodes.insert(nodes.end(), reshapes.begin(), reshapes.end()); + + // clang-format on + for (auto &n : nodes) { + // "BroadcastGradientArgs" doesn't need any attrs. + if (n.attr.empty() && n.op != "BroadcastGradientArgs") { + n.attr = {{"T", "$T"}}; + } + } + *g = FDH::Define( + // Arg defs + {"x: T", "y: T", "dz: T"}, + // Ret val defs + {"dx: T", "dy: T"}, + // Attr defs + {{"T: {half, float, double}"}}, + // Nodes + nodes); + return Status::OK(); +} + +Status DropOutDoMaskGrad(const AttrSlice &attrs, FunctionDef *g) { + // clang-format off + return GradForBinaryCwise(g, { + {{"dx"}, "Identity", {"dy"}}, + }); + // clang-format on +} +REGISTER_OP_GRADIENT("DropOutDoMask", DropOutDoMaskGrad); +} // namespace +} // namespace tensorflow \ No newline at end of file diff --git a/tf_adapter/ops/hccl_ops.cc b/tf_adapter/ops/hccl_ops.cc new file mode 100644 index 000000000..96302a7a4 --- /dev/null +++ b/tf_adapter/ops/hccl_ops.cc @@ -0,0 +1,174 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/framework/common_shape_fns.h" + +namespace tensorflow { +using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; + +REGISTER_OP("HcomAllReduce") + .Input("input: T") + .Output("output: T") + .Attr("T: {int8, int16, int32, float16, float32}") + .Attr("reduction: {'min', 'max', 'prod', 'sum'}") + .Attr("group: string") + .Attr("fusion: int") + .Attr("fusion_id: int") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->input(0)); + return Status::OK(); + }) + .Doc(R"doc( +Outputs a tensor containing the reduction across all input tensors passed to ops. + +The graph should be constructed so if one op runs with shared_name value `c`, +then `num_devices` ops will run with shared_name value `c`. Failure to do so +will cause the graph execution to fail to complete. + +input: the input to the reduction +output: the value of the reduction across all `num_devices` devices. +reduction: the reduction operation to perform. +group: all devices of the group participating in this reduction. +)doc"); + +REGISTER_OP("HcomAllGather") + .Input("input: T") + .Output("output: T") + .Attr("T: {int8, int16, int32, float16, float32}") + .Attr("group: string") + .Attr("rank_size: int") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext* c) { + // Scalar input is not supported. + shape_inference::ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 1, &unused)); + + shape_inference::ShapeHandle inSubshape; + TF_RETURN_IF_ERROR(c->Subshape(c->input(0), 1, &inSubshape)); + + int rankSize = 0; + auto inputFirstDimValue = c->Value(c->Dim(c->input(0), 0)); + shape_inference::ShapeHandle outputFirstDimAsShape; + TF_CHECK_OK(c->GetAttr("rank_size", &rankSize)); + Status rankSizeStatus = ((rankSize > 0) ? + (Status::OK()) : (errors::InvalidArgument("rankSize should be greater than 0."))); + TF_CHECK_OK(rankSizeStatus); + std::vector outputFirstDim; + outputFirstDim.push_back( + c->MakeDim(rankSize * inputFirstDimValue)); + outputFirstDimAsShape = c->MakeShape(outputFirstDim); + shape_inference::ShapeHandle output; + TF_RETURN_IF_ERROR( + c->Concatenate(outputFirstDimAsShape, inSubshape, &output)); + c->set_output(0, output); + return Status::OK(); + }) + .Doc(R"doc( + +)doc"); + +REGISTER_OP("HcomBroadcast") + .Input("input: T") + .Output("output: T") + .Attr("T: list(type) >= 0") + .Attr("group: string") + .Attr("root_rank: int") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext* c) { + for (int i = 0 ; i < c->num_inputs() ; i++) { + c->set_output(i, c->input(i)); + } + return Status::OK(); + }) + .Doc(R"doc( +Sends `input` to all devices that are connected to the output. + +The graph should be constructed so that all ops connected to the output have a +valid device assignment, and the op itself is assigned one of these devices. + +input: The input to the broadcast. +output: The same as input. +)doc"); + +REGISTER_OP("HcomReduceScatter") + .Input("input: T") + .Output("output: T") + .Attr("T: {int8, int16, int32, float16, float32}") + .Attr("reduction: {'min', 'max', 'prod', 'sum'}") + .Attr("group: string") + .Attr("rank_size: int") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext* c) { + // Scalar input is not supported. + shape_inference::ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 1, &unused)); + + shape_inference::ShapeHandle inSubshape; + TF_RETURN_IF_ERROR(c->Subshape(c->input(0), 1, &inSubshape)); + + int rankSize = 0; + auto inputFirstDimValue = c->Value(c->Dim(c->input(0), 0)); + shape_inference::ShapeHandle outputFirstDimAsShape; + TF_CHECK_OK(c->GetAttr("rank_size", &rankSize)); + Status rankSizeStatus = ((rankSize > 0) ? + (Status::OK()) : (errors::InvalidArgument("rank_size should be greater than 0."))); + TF_CHECK_OK(rankSizeStatus); + Status outputFirstDimStatus = ((inputFirstDimValue % rankSize) == 0) ? + (Status::OK()) : (errors::InvalidArgument("input first dim should be N * rank_size.")); + TF_CHECK_OK(outputFirstDimStatus); + std::vector outputFirstDim; + outputFirstDim.push_back( + c->MakeDim(inputFirstDimValue / rankSize)); + outputFirstDimAsShape = c->MakeShape(outputFirstDim); + shape_inference::ShapeHandle output; + TF_RETURN_IF_ERROR( + c->Concatenate(outputFirstDimAsShape, inSubshape, &output)); + c->set_output(0, output); + return Status::OK(); + }) + .Doc(R"doc( + +)doc"); + +REGISTER_OP("HcomSend") + .Input("input: T") + .Attr("T: {int8, int16, int32, float16, float32}") + .Attr("group: string") + .Attr("sr_tag: int") + .Attr("dest_rank: int") + .SetIsStateful() + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( + +)doc"); + +REGISTER_OP("HcomReceive") + .Output("output: T") + .Attr("T: {int8, int16, int32, float16, float32}") + .Attr("shape: shape") + .Attr("group: string") + .Attr("sr_tag: int") + .Attr("src_rank: int") + .SetIsStateful() + .SetShapeFn(shape_inference::ExplicitShape) + .Doc(R"doc( + +)doc"); +} // namespace tensorflow diff --git a/tf_adapter/ops/npu_cpu_ops.cc b/tf_adapter/ops/npu_cpu_ops.cc new file mode 100644 index 000000000..1eaf8e9f7 --- /dev/null +++ b/tf_adapter/ops/npu_cpu_ops.cc @@ -0,0 +1,46 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/framework/tensor.pb.h" + +namespace tensorflow { +using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; + +REGISTER_OP("EmbeddingRankId") + .Input("addr_table: uint64") + .Input("index: uint32") + .Output("rank_id: uint64") + .Attr("row_memory: int = 320") + .Attr("mode: string = 'mod' ") + .SetAllowsUninitializedInput() + .SetShapeFn([](shape_inference::InferenceContext* c) { + auto out_shape = c->MakeShape({c->Dim(c->input(1), 0), c->Dim(c->input(0), 1)}); + c->set_output(0, out_shape); + return Status::OK(); + }) + .Doc(R"doc( + Traverse the index calculation server and its position in the server. + Arguments + addr_table: Tensors of addr_table. + index: Tensors of index. + Output + rank_id: Tensors with the same shape as index.dim(0)*3. + )doc"); +} diff --git a/tf_adapter/ops/npu_dataset_ops.cc b/tf_adapter/ops/npu_dataset_ops.cc new file mode 100644 index 000000000..dd194680c --- /dev/null +++ b/tf_adapter/ops/npu_dataset_ops.cc @@ -0,0 +1,65 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_def_builder.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +REGISTER_OP("QueueDataset") + .Input("input_dataset: variant") + .Attr("sourcedata: string") + .Output("handle: variant") + .SetIsStateful() + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(tensorflow::shape_inference::ScalarShape); + +REGISTER_OP("HostQueueDataset") + .Input("geop_dataset: variant") + .Input("input_dataset: variant") + .Attr("channel_name: string") + .Output("handle: variant") + .SetIsStateful() + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(tensorflow::shape_inference::ScalarShape); + +REGISTER_OP("DeviceQueueDataset") + .Attr("channel_name: string") + .Output("handle: variant") + .SetIsStateful() + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(tensorflow::shape_inference::ScalarShape); + +REGISTER_OP("GEOPDataset") + .Output("handle: variant") + .Attr("f: func") + .SetIsStateful() + .SetShapeFn(shape_inference::ScalarShape); + +REGISTER_OP("DPGroupDataset") + .Input("input_datasets: N * variant") + .Output("handle: variant") + .Attr("N: int >= 0") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetIsStateful() + .SetShapeFn(shape_inference::ScalarShape); +} // namespace tensorflow diff --git a/tf_adapter/ops/npu_mixed_precesion_ops.cc b/tf_adapter/ops/npu_mixed_precesion_ops.cc new file mode 100644 index 000000000..356845cd0 --- /dev/null +++ b/tf_adapter/ops/npu_mixed_precesion_ops.cc @@ -0,0 +1,66 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { +// Mixed-precesions training +REGISTER_OP("NpuAllocFloatStatus") + .Output("float_status: float") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->Vector(1)); + return Status::OK(); + }) + .Doc(R"doc( + Allocate the float status tensor for getting float status from scalar buffer. + + Arguments + inputs: No inputs. + + Output + output: One float element tensor. + )doc"); + +REGISTER_OP("NpuGetFloatStatus") + .Input("input_float: float") + .Output("float_status: float") + .SetShapeFn(shape_inference::UnchangedShape) + .Doc(R"doc( + Allocate the float status tensor for getting float status from scalar buffer. + + Arguments + inputs: The allocated input float status tensor. + + Output + output: The one float status element tensor. + )doc"); + +REGISTER_OP("NpuClearFloatStatus") + .Input("float_status: float") + .Output("cleared_float_status: float") + .SetShapeFn(shape_inference::UnchangedShape) + .Doc(R"doc( + Clear the float status in the scalar buffer. + + Arguments + inputs: The float status tensor. + + Output + output: The float element tensor set to zero. + )doc"); +} // namespace tensorflow diff --git a/tf_adapter/ops/npu_ops.cc b/tf_adapter/ops/npu_ops.cc new file mode 100644 index 000000000..ca96e2a2f --- /dev/null +++ b/tf_adapter/ops/npu_ops.cc @@ -0,0 +1,394 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/framework/tensor.pb.h" + +namespace tensorflow { +using shape_inference::DimensionHandle; +using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; +using shape_inference::UnchangedShape; + +namespace { +REGISTER_OP("NPUTest").SetShapeFn(shape_inference::NoOutputs); + +REGISTER_OP("GeOp") + .Input("inputs: Tin") + .Attr("Tin: list(type) >= 0") + .Output("outputs: Tout") + .Attr("Tout: list(type) >= 0") + .Attr("function: func") + .Attr("data_format: { 'NHWC', 'NCHW', 'NDHWC', 'NCDHW', 'DHWCN', 'DHWNC'} = 'NHWC'") + .SetIsStateful(); + +REGISTER_OP("DPOP") + .Input("inputs: Tin") + .Attr("Tin: list(type) >= 0") + .Output("outputs: Tout") + .Attr("Tout: list(type) >= 0") + .Attr("function: func") + .Attr("data_format: { 'NHWC', 'NCHW'} = 'NHWC'") + .SetIsStateful(); + +REGISTER_OP("NPUInit").SetShapeFn(shape_inference::NoOutputs); + +REGISTER_OP("LogTimeStamp") + .Attr("logid: string") + .Attr("notify: bool") + .SetShapeFn(shape_inference::NoOutputs); + +REGISTER_OP("NPUShutdown").SetShapeFn(shape_inference::NoOutputs); + +REGISTER_OP("LARS") + .Input("inputs_w: T") + .Input("inputs_g: T") + .Input("weight_decay: float") + .Output("outputs: T") + .Attr("T: list(type) >= 1") + .Attr("hyperpara: float = 0.001") + .Attr("epsilon: float = 0.00001") + .SetShapeFn([](shape_inference::InferenceContext* c) { + for (int i=0 ; i< ((c->num_inputs() - 1) / 2) ; i++) { + c->set_output(i, c->input(i)); + } + return Status::OK(); + }) + .Doc(R"doc( + Perform Lars on multi tensors. inputs_g have the same shape as `inputs_w`. + + Arguments + inputs_w: Tensors of weight. + inputs_g: Tensors of gradient. + + Output + outputs: Tensors with the same shape as `inputs_w`. + )doc"); + +REGISTER_OP("LarsV2") + .Input("input_weight: T") + .Input("input_grad: T") + .Input("weight_decay: T") + .Input("learning_rate: T") + .Output("output: T") + .Attr("T: {float}") + .Attr("hyperpara: float = 0.001") + .Attr("epsilon: float = 0.00001") + .Attr("use_clip: bool = false") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->input(0)); + return Status::OK(); + }) + .Doc(R"doc( + Perform LarsV2 on single output. input_weight have the same shape + as `input_grad`. + + Arguments + input_weight: Tensor of weight. + input_grad: Tensor of gradient. + weight_decay: Tensor of weight_decay. + learning_rate: Tensor of learning_rate. + use_clip: Indicates whether to limit the coeff to acertain range. + + Output + output: Tensor with the same shape as `input_weight`. + )doc"); + + +Status OutfeedDequeueShapeFn(shape_inference::InferenceContext* c) { + shape_inference::ShapeHandle unused; + std::vector output_shapes; + TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes)); + if (static_cast(output_shapes.size()) != c->num_outputs()) { + return errors::InvalidArgument( + "`output_shapes` must be the same length as `output_types` (", + output_shapes.size(), " vs. ", c->num_outputs()); + } + for (size_t i = 0; i < output_shapes.size(); ++i) { + shape_inference::ShapeHandle output_shape_handle; + TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape( + output_shapes[i], &output_shape_handle)); + c->set_output(static_cast(i), output_shape_handle); + } + return Status::OK(); +} + +REGISTER_OP("OutfeedEnqueueOp") + .Input("inputs: Tin") + .Attr("channel_name: string") + .Attr("Tin: list(type) >= 0") + .SetIsStateful() + .SetShapeFn(shape_inference::NoOutputs); + +REGISTER_OP("OutfeedDequeueOp") + .Output("outputs: output_types") + .Attr("channel_name: string") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetIsStateful() + .SetShapeFn(OutfeedDequeueShapeFn); + +REGISTER_OP("StopOutfeedDequeueOp") + .Attr("channel_name: string") + .SetIsStateful() + .SetShapeFn(shape_inference::NoOutputs); + +REGISTER_OP("DropOutDoMask") + .Input("x: T") + .Input("mask: uint8") + .Input("keep_prob: T") + .Output("y: T") + .Attr("T: {float16, float32}") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->input(0)); + return Status::OK(); + }); + +REGISTER_OP("DropOutGenMask") + .Input("shape: T") + .Attr("T: {int64, int32}") + .Input("prob: S") + .Attr("S: {float, half}") + .Output("output: uint8") + .Attr("seed: int = 0") + .Attr("seed2: int = 0") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext* c) { + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(1), 0, &unused)); // prob must be 0-d + + ShapeHandle inputShapeHandle; + TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &inputShapeHandle)); + + int32 rank = c->Rank(inputShapeHandle); + if (InferenceContext::kUnknownRank == rank) { + ShapeHandle out = c->UnknownShapeOfRank(1); + c->set_output(0, out); + return Status::OK(); + } + + bool unknownDimExist = false; + for (int32 i = 0; i < rank; ++i) { + DimensionHandle dimHandle = c->Dim(inputShapeHandle, i); + int64 value = c->Value(dimHandle); + if (InferenceContext::kUnknownDim == value) { + unknownDimExist = true; + break; + } + } + + if (unknownDimExist) { + ShapeHandle out = c->UnknownShapeOfRank(1); + c->set_output(0, out); + return Status::OK(); + } + + int64 bitCount = 0; + if (rank != 0) { + DimensionHandle inputDimHandle = c->NumElements(inputShapeHandle); + bitCount = c->Value(inputDimHandle); + } + + // align to 128 and around up + int64 n128Bits = bitCount / 128; + if ((bitCount % 128) != 0) { + n128Bits++; + } + + // transfer 128 bit count to byte count if shape is full know + int64 nBytes = n128Bits * 16; + + ShapeHandle out = c->Vector(nBytes); + c->set_output(0, out); + return Status::OK(); + + }); + +REGISTER_OP("BasicLSTMCell") + .Input("x: T") + .Input("h: T") + .Input("c: T") + .Input("w: T") + .Input("b: T") + .Output("ct: T") + .Output("ht: T") + .Output("it: T") + .Output("jt: T") + .Output("ft: T") + .Output("ot: T") + .Output("tanhct: T") + .Attr("T: {float16, float32}") + .Attr("keep_prob: float = 1.0") + .Attr("forget_bias: float = 1.0") + .Attr("state_is_tuple: bool = true") + .Attr("activation: string = 'tanh'") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->input(2)); + c->set_output(1, c->input(1)); + c->set_output(2, c->input(2)); + c->set_output(3, c->input(2)); + c->set_output(4, c->input(2)); + c->set_output(5, c->input(2)); + c->set_output(6, c->input(2)); + return Status::OK(); + }); + +REGISTER_OP("BasicLSTMCellCStateGrad") + .Input("c: T") + .Input("dht: T") + .Input("dct: T") + .Input("it: T") + .Input("jt: T") + .Input("ft: T") + .Input("ot: T") + .Input("tanhct: T") + .Output("dgate: T") + .Output("dct_1: T") + .Attr("T: {float16, float32}") + .Attr("forget_bias: float = 1.0") + .Attr("activation: string = 'tanh'") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext* c) { + auto input_it_shape = c->input(4); + auto hidden_size = c->Dim(input_it_shape, 1); + auto batch_size = c->Dim(input_it_shape, 0); + DimensionHandle output_size; + TF_RETURN_IF_ERROR(c->Multiply(hidden_size, 4, &output_size)); + auto output_shape = c->MakeShape({batch_size, output_size}); + c->set_output(0, output_shape); + c->set_output(1, c->input(2)); + return Status::OK(); + }); + +REGISTER_OP("BasicLSTMCellWeightGrad") + .Input("x: T") + .Input("h: T") + .Input("dgate: T") + .Output("dw: T") + .Output("db: T") + .Attr("T: {float16, float32}") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext* c) { + auto input_x_shape = c->input(0); + auto input_h_shape = c->input(1); + auto input_dgate_shape = c->input(2); + auto four_hidden_size = c->Dim(input_dgate_shape, 1); + auto hidden_size = c->Dim(input_h_shape, 1); + auto input_size = c->Dim(input_x_shape, 1); + DimensionHandle output_size; + TF_RETURN_IF_ERROR(c->Add(hidden_size, input_size, &output_size)); + auto output_dw_shape = c->MakeShape({output_size, four_hidden_size}); + auto output_db_shape = c->MakeShape({four_hidden_size}); + c->set_output(0, output_dw_shape); + c->set_output(1, output_db_shape); + return Status::OK(); + }); + +REGISTER_OP("BasicLSTMCellInputGrad") + .Input("dgate: T") + .Input("w: T") + .Output("dxt: T") + .Output("dht: T") + .Attr("T: {float16, float32}") + .Attr("keep_prob: float = 1.0") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext* c) { + auto input_dgate_shape = c->input(0); + auto input_w_shape = c->input(1); + auto four_hidden_size = c->Dim(input_dgate_shape, 1); + auto batch_size = c->Dim(input_dgate_shape, 0); + auto input_hidden_size = c->Dim(input_w_shape, 0); + DimensionHandle output_hidden_size; + TF_RETURN_IF_ERROR(c->Divide(four_hidden_size, 4, true, &output_hidden_size)); + auto output_dht_shape = c->MakeShape({batch_size, output_hidden_size}); + DimensionHandle output_input_size; + TF_RETURN_IF_ERROR(c->Subtract(input_hidden_size, output_hidden_size, + &output_input_size)); + auto output_dxt_shape = c->MakeShape({batch_size, output_input_size}); + c->set_output(0, output_dxt_shape); + c->set_output(1, output_dht_shape); + return Status::OK(); + }); + +REGISTER_OP("DecodeAndResizeJpeg") + .Input("contents: string") + .Input("resize: int32") + .Output("image: uint8") + .SetShapeFn([](InferenceContext* c) { + const Tensor* rezise = c->input_tensor(1); + DimensionHandle h; + DimensionHandle w; + if (rezise != nullptr) { + auto rezise_vec = rezise->vec(); + h = c->MakeDim(rezise_vec(0)); + w = c->MakeDim(rezise_vec(1)); + } + c->set_output(0, c->MakeShape({h, w, 3})); + return Status::OK(); + }); + +REGISTER_OP("DecodeAndCropAndResizeJpeg") + .Input("contents: string") + .Input("crop_size: int32") + .Input("resize: int32") + .Output("image: uint8") + .SetShapeFn([](InferenceContext* c) { + const Tensor* rezise = c->input_tensor(2); + DimensionHandle h; + DimensionHandle w; + if (rezise != nullptr) { + auto rezise_vec = rezise->vec(); + h = c->MakeDim(rezise_vec(0)); + w = c->MakeDim(rezise_vec(1)); + } + c->set_output(0, c->MakeShape({h, w, 3})); + return Status::OK(); + }); + +REGISTER_OP("AdamApplyOneAssign") + .Input("input0: T") + .Input("input1: T") + .Input("input2: T") + .Input("input3: T") + .Input("input4: T") + .Input("mul0_x: T") + .Input("mul1_x: T") + .Input("mul2_x: T") + .Input("mul3_x: T") + .Input("add2_y: T") + .Attr("T: {float16, float32}") + .SetShapeFn(shape_inference::NoOutputs); + +REGISTER_OP("AdamApplyOneWithDecayAssign") + .Input("input0: T") + .Input("input1: T") + .Input("input2: T") + .Input("input3: T") + .Input("input4: T") + .Input("mul0_x: T") + .Input("mul1_x: T") + .Input("mul2_x: T") + .Input("mul3_x: T") + .Input("mul4_x: T") + .Input("add2_y: T") + .Attr("T: {float16, float32}") + .SetShapeFn(shape_inference::NoOutputs); +} // namespace +} // namespace tensorflow diff --git a/tf_adapter/ops/npu_unary_ops.cc b/tf_adapter/ops/npu_unary_ops.cc new file mode 100644 index 000000000..970480a58 --- /dev/null +++ b/tf_adapter/ops/npu_unary_ops.cc @@ -0,0 +1,35 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { +REGISTER_OP("Gelu") + .Input("features: T") + .Output("activations: T") + .Attr("T: realnumbertype") + .SetShapeFn(tensorflow::shape_inference::UnchangedShape); + +REGISTER_OP("GeluGrad") + .Input("gradients: T") + .Input("features: T") + .Input("activations: T") + .Output("backprops: T") + .Attr("T: realnumbertype") + .SetShapeFn(tensorflow::shape_inference::MergeBothInputsShapeFn); +} // namespace tensorflow diff --git a/tf_adapter/optimizers/add_input_pass.cc b/tf_adapter/optimizers/add_input_pass.cc new file mode 100644 index 000000000..00e1ffff7 --- /dev/null +++ b/tf_adapter/optimizers/add_input_pass.cc @@ -0,0 +1,147 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/framework/graph_to_functiondef.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/graph_constructor.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/lib/hash/hash.h" +#include "tensorflow/core/lib/random/random.h" +#include "tensorflow/core/lib/random/simple_philox.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/public/session_options.h" +#include "tf_adapter/util/infershape_util.h" +#include "tf_adapter/util/npu_attrs.h" +#include "tf_adapter/common/common.h" +#include "tensorflow/core/util/dump_graph.h" +#include "tensorflow/core/graph/graph_constructor.h" + +namespace tensorflow { +static const int64 kMicrosToMillis = 1000; +static std::atomic graph_run_num(1); +static mutex graph_num_mutex(LINKER_INITIALIZED); + +class AddInputPass : public GraphOptimizationPass { + public: + AddInputPass() = default; + ~AddInputPass() = default; + Status Run(const GraphOptimizationPassOptions &options) override; +}; + +Status AddInputPass::Run(const GraphOptimizationPassOptions& options) { + if (options.partition_graphs == nullptr || options.flib_def == nullptr) { // in ps mode : session_options may be null + return Status::OK(); + } + + for (auto& partition : *options.partition_graphs) { + std::unique_ptr* graph = &partition.second; + + int graph_num; + { + mutex_lock lock(graph_num_mutex); + graph_num = graph_run_num; + graph_run_num++; + } + int64 startTime = InferShapeUtil::GetCurrentTimestap(); + if (graph == nullptr) { + continue; + } + + bool findMarkNoNeed = false; + for (Node *n : graph->get()->nodes()) { + REQUIRES_NOT_NULL(n); + if (n->attrs().Find("_NoNeedOptimize")) { + LOG(INFO) << "Found mark of noneed optimize on node [" + << n->name() << "], skip AddInputPass."; + findMarkNoNeed = true; + break; + } + } + if (findMarkNoNeed) { + continue; + } + + std::map pass_options; + pass_options = NpuAttrs::GetDefaultPassOptions(); + + for (Node *n : graph->get()->nodes()) { + REQUIRES_NOT_NULL(n); + if (n->attrs().Find("_NpuOptimizer")) { + pass_options = NpuAttrs::GetPassOptions(n->attrs()); + break; + } + } + + std::string job = pass_options["job"]; + if (job == "ps" || job == "default" || job == "localhost") { + LOG(INFO) << "job is " << job << " Skip the optimizer : AddInputPass."; + continue; + } + + char *need_print = getenv("PRINT_MODEL"); + + if (need_print != nullptr && strcmp("1", need_print) == 0) { + GraphDef ori_graph_def; + graph->get()->ToGraphDef(&ori_graph_def); + string ori_model_path = "BeforeSubGraph_Add_Input_"; + string omodel_path = ori_model_path + std::to_string(graph_num) + ".pbtxt"; + Status status_out = WriteTextProto(Env::Default(), omodel_path, ori_graph_def); + } + + GraphDef graph_def; + FunctionLibraryDefinition *func_lib = options.flib_def; + partition.second.get()->ToGraphDef(&graph_def); + + std::unique_ptr device_graph(new Graph(OpRegistry::Global())); + GraphConstructorOptions device_opts; + // There are internal operations (e.g., send/recv) that we now allow. + device_opts.allow_internal_ops = true; + device_opts.expect_device_spec = true; + TF_RETURN_IF_ERROR(ConvertGraphDefToGraph(device_opts, graph_def, device_graph.get())); + partition.second.swap(device_graph); + + if (need_print != nullptr && strcmp("1", need_print) == 0) { + GraphDef omg_graph_def; + graph->get()->ToGraphDef(&omg_graph_def); + string tmpmodel_path = "AfterSubGraph_Add_Input_"; + string tmodel_path = tmpmodel_path + std::to_string(graph_num) + ".pbtxt"; + Status status_o = WriteTextProto(Env::Default(), tmodel_path, omg_graph_def); + } + int64 endTime = InferShapeUtil::GetCurrentTimestap(); + LOG(INFO) << "AddInputPass subgraph_" << std::to_string(graph_num) << " success. [" + << ((endTime - startTime) / kMicrosToMillis) << " ms]"; + } + + return Status::OK(); +} + +REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_PARTITIONING, 103, + AddInputPass); +} // namespace tensorflow diff --git a/tf_adapter/optimizers/control_flow_conversion_pass.cc b/tf_adapter/optimizers/control_flow_conversion_pass.cc new file mode 100644 index 000000000..0d62c497a --- /dev/null +++ b/tf_adapter/optimizers/control_flow_conversion_pass.cc @@ -0,0 +1,100 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tf_adapter/optimizers/control_flow_conversion_pass.h" + +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/lower_case_op.h" +#include "tensorflow/core/common_runtime/lower_function_call_op.h" +#include "tensorflow/core/common_runtime/lower_if_op.h" +#include "tensorflow/core/common_runtime/lower_while_op.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/public/session_options.h" +#include "tf_adapter/util/npu_attrs.h" + +namespace tensorflow { +namespace { +const string kLowerUsingSwitchMergeAttr = "_lower_using_switch_merge"; +} // namespace + +Status ControlFlowConvertionPass::Run( + const GraphOptimizationPassOptions& options) { + if (options.partition_graphs != nullptr) { + return errors::Internal( + "Lowering If/While ops should happen before partitioning."); + } + if (options.graph == nullptr || options.session_options == nullptr) { + return Status::OK(); + } + + Graph* graph = options.graph->get(); + if (graph == nullptr) { + return errors::Internal( + "Lowering While op requires a graph to be available."); + } + + std::map pass_options = NpuAttrs::GetPassOptions(options); + std::string job = pass_options["job"]; + if (job == "ps" || job == "default") { + LOG(INFO) << "job is " << job << " Skip the optimizer : ControlFlowConvertionPass."; + return Status::OK(); + } + + FunctionLibraryDefinition* flib_def = options.flib_def; + if (flib_def == nullptr) { + return errors::Internal( + "Lowering If op requires a FunctionLibraryDefinition to be available."); + } + + bool use_off_line = pass_options["use_off_line"] == "1"; + bool lower_functional_ops = pass_options["lower_functional_ops"] == "1"; + if (!use_off_line || lower_functional_ops) { + LOG(INFO) << "Skip the optimizer"; + return Status::OK(); + } + + // Delete _lower_using_switch_merge before LowerFunctionalOpsPass + for (int i = 2; i < graph->num_node_ids(); ++i) { + Node* n = graph->FindNodeId(i); + if (n->IsIfNode()) { + n->ClearAttr(kLowerUsingSwitchMergeAttr); + } else if (n->type_string() == "Case") { + n->ClearAttr(kLowerUsingSwitchMergeAttr); + } else if (n->IsWhileNode()) { + n->ClearAttr(kLowerUsingSwitchMergeAttr); + } + } + + std::vector function_names = flib_def->ListFunctionNames(); + for (string func_name : function_names) { + const FunctionDef* fdef = flib_def->Find(func_name); + if (fdef != nullptr) { + for (NodeDef ndef : fdef->node_def()) { + if (ndef.op() == "If" || ndef.op() == "Case" || ndef.op() == "While") { + ndef.mutable_attr()->erase(kLowerUsingSwitchMergeAttr); + } + } + } + } + + return Status::OK(); +} + +REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, -1, + ControlFlowConvertionPass); +} // namespace tensorflow diff --git a/tf_adapter/optimizers/control_flow_conversion_pass.h b/tf_adapter/optimizers/control_flow_conversion_pass.h new file mode 100644 index 000000000..6c6b9bdae --- /dev/null +++ b/tf_adapter/optimizers/control_flow_conversion_pass.h @@ -0,0 +1,36 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTROL_FLOW_CONVERSION_PASS_H_ +#define TENSORFLOW_CONTROL_FLOW_CONVERSION_PASS_H_ + +#include "absl/types/optional.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { +// Remove _lower_using_switch_merge flag in control flow ops +class ControlFlowConvertionPass : public GraphOptimizationPass { + public: + ControlFlowConvertionPass() = default; + ~ControlFlowConvertionPass() = default; + + Status Run(const GraphOptimizationPassOptions& options) override; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CONTROL_FLOW_CONVERSION_PASS_H_ diff --git a/tf_adapter/optimizers/dp_tf_ge_conversion_pass.cc b/tf_adapter/optimizers/dp_tf_ge_conversion_pass.cc new file mode 100644 index 000000000..98c4c3186 --- /dev/null +++ b/tf_adapter/optimizers/dp_tf_ge_conversion_pass.cc @@ -0,0 +1,1012 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tf_adapter/optimizers/dp_tf_ge_conversion_pass.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/framework/graph_to_functiondef.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/graph_constructor.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/lib/hash/hash.h" +#include "tensorflow/core/lib/random/random.h" +#include "tensorflow/core/lib/random/simple_philox.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/public/session_options.h" +#include "tf_adapter/util/infershape_util.h" +#include "tf_adapter/util/npu_attrs.h" +#include "tf_adapter/common/common.h" + +namespace tensorflow { +static const int64 kMicrosToMillis = 1000; +// GE ops white list +const static std::vector GE_OPS_WHITELIST = { + "MapDataset", "ParallelMapDataset", "BatchDataset", + "MapAndBatchDataset", "DeviceQueueDataset", + "BatchDatasetV2", "MapAndBatchDatasetV2", + "ModelDataset", "OptimizeDataset"}; + +// Customize dataset list +const static std::vector CUSTOMIZE_DATASET_LIST = { + "BatchDataset", "BatchDatasetV2", "MapAndBatchDataset", + "MapAndBatchDatasetV2", "ParallelMapDataset", + "MakeIterator"}; +// Skip dataset list +const static std::vector SKIP_DATASET_LIST = { + "ModelDataset", "OptimizeDataset" +}; + +// GE fun black list +const static std::vector GE_FUN_BLACKLIST = { + "PyFunc", + "SaveV2", + "RestoreV2", + "MergeV2Checkpoints", + "Save", + "SaveSlices", + "Restore", + "RestoreSlice", + "ShardedFilename", + "ShardedFilespec", + "WholeFileReader", + "WholeFileReaderV2", + "TextLineReader", + "TextLineReaderV2", + "FixedLengthRecordReader", + "FixedLengthRecordReaderV2", + "LMDBReader", + "IdentityReader", + "IdentityReaderV2", + "ReaderRead", + "ReaderReadV2", + "ReaderReadUpTo", + "ReaderReadUpToV2", + "ReaderNumRecordsProduced", + "ReaderNumRecordsProducedV2", + "ReaderNumWorkUnitsCompleted", + "ReaderNumWorkUnitsCompletedV2", + "ReaderSerializeState", + "ReaderSerializeStateV2", + "ReaderRestoreState", + "ReaderRestoreStateV2", + "ReaderReset", + "ReaderResetV2", + "ReadFile", + "WriteFile", + "MatchingFiles", + "TFRecordReader", + "TFRecordReaderV2", + "MutableHashTable", + "MutableHashTableV2", + "MutableDenseHashTable", + "MutableDenseHashTableV2", + "InitializeTable", + "InitializeTableV2", + "InitializeTableFromTextFile", + "InitializeTableFromTextFileV2", + "MutableHashTableOfTensors", + "MutableHashTableOfTensorsV2", + "HashTable", + "HashTableV2", + "LookupTableInsert", + "LookupTableInsertV2" + "LookupTableExport", + "LookupTableExportV2", + "LookupTableImport", + "LookupTableImportV2", + "LookupTableFind", + "LookupTableFindV2"}; +// Mark string for iterator_name +const static std::string DP_ITERATOR_MARK = "_iterator_name"; +// Mark string for dp_init graph +const static std::string DP_INIT_GRAPH_MARK = "MakeIterator"; +const static std::string DP_INIT_NOOP_GRAPH_MARK = "NoOp"; +// Mark string for iterator node +const static std::string DP_INIT_ITERATOR_MARK = "Iterator"; +// Mark string for device node +const static std::string DP_INIT_DEVICEQUEUE_MARK = "DeviceQueueDataset"; +// Mark string for queue node +const static std::string DP_INIT_QUEUE_MARK = "QueueDataset"; +// Used for 0-input NodeDefBuilder +const static std::vector EMPTY_DEF_INPUT; +// Used for 0-input NodeBuilder +const static std::vector EMPTY_INPUT; +// Used for 0-type Node(Def)Builder +const static DataTypeVector EMPTY_TYPE; +// Used for 0-shape Node(Def)Builder +const static std::vector EMPTY_SHAPE; + +class DpTfToGEConversionPassImpl { + public: + explicit DpTfToGEConversionPassImpl() + : graph_run_num_(0), + graph_(nullptr), + flib_def_(nullptr) {}; + + ~DpTfToGEConversionPassImpl() = default; + Status Run(const GraphOptimizationPassOptions& options); + private: + Status ProcessGraph(std::unique_ptr* graph, FunctionLibraryDefinition *func_lib, + const OptimizationPassRegistry::Grouping pass_group_value); + bool RunPass(std::unique_ptr *g, FunctionLibraryDefinition *flib, std::map all_options); + bool CheckMakeIteratorNode(Node *&n) const; + inline bool IsMakeIteratorNode(const Node *n) const; + inline bool IsDeviceQueueDatasetNode() const; + inline bool IsIteratorNode(const Node *n) const; + inline bool IsSkipDataset(const Node *n) const; + inline std::string GetEdgeName(const Edge *e) const; + inline std::string GetRandomName(const std::string &prefix) const; + std::string GetRandomName() const; + inline bool EndsWith(const std::string &str, const std::string &suffix) const; + inline bool CheckNode(const std::string &op) const; + inline bool IsDeviceSupportedOp(const NodeDef &n) const; + inline bool IsDeviceSupportedFunc(const std::string &fn) const; + inline Status GetSplitEdges(const Node *n, std::vector &split_edges, const Edge *e); + inline void RemoveSplitEdges(Node *topo_end); + inline Status InsertChannelQueue(Node *topo_end, std::string &host_queue_name, + std::string &device_queue_name) const; + bool GetNodeFuncs(const FunctionLibraryDefinition *flib_def, Node *node, + std::vector &node_funcs); + bool RemoveIsolatedNode(Graph *g, std::unordered_set visited); + Status RemoveNotSupportDataset(Graph *g, std::string start, std::string end) const; + + // graph num + int graph_run_num_; + // All split edges, split edges means edges that combine A and B in this case + // 1) A = a node that can only run on tensorflow python host and, + // 2) B = a node that can run on GE device and all nodes followed B can run on + // GE device, + std::unordered_map> split_edges_; + // Input graph, not owned + Graph *graph_; + // Input flib, not owned + const FunctionLibraryDefinition *flib_def_; +}; + +bool DpTfToGEConversionPassImpl::CheckMakeIteratorNode(Node*& n) const { + if (str_util::StartsWith(n->type_string(), DP_INIT_GRAPH_MARK)) { + return true; + } else if(str_util::StartsWith(n->type_string(), DP_INIT_NOOP_GRAPH_MARK)) { + for (const Edge* e : n->in_edges()) { // MakeIterator is contains group_deps + if (e == nullptr || e->src() == nullptr) { + continue; + } + if (str_util::StartsWith(e->src()->def().name(), DP_INIT_GRAPH_MARK)) { + LOG(INFO) << "Remove node: " << n->type_string(); + n = e->src(); + if (e->IsControlEdge()) { + graph_->RemoveControlEdge(e); + } else { + graph_->RemoveEdge(e); + } + LOG(INFO) << "PruneForReverseReachability node: " << n->type_string(); + PruneForReverseReachability(&*graph_, {n}); + return true; + } + } + } + return false; +} + +inline bool DpTfToGEConversionPassImpl::IsMakeIteratorNode(const Node* n) const { + return str_util::StartsWith(n->type_string(), DP_INIT_GRAPH_MARK); +} + +inline bool DpTfToGEConversionPassImpl::IsDeviceQueueDatasetNode() const { + for (const Node* n : graph_->op_nodes()) { + if (str_util::StartsWith(n->type_string(), DP_INIT_DEVICEQUEUE_MARK) || + str_util::StartsWith(n->type_string(), DP_INIT_QUEUE_MARK)) { + return true; + } + } + return false; +} + +inline bool DpTfToGEConversionPassImpl::IsIteratorNode(const Node* n) const { + return str_util::StartsWith(n->type_string(), DP_INIT_ITERATOR_MARK); +} + +inline bool DpTfToGEConversionPassImpl::IsSkipDataset(const Node* n) const { + return std::find(SKIP_DATASET_LIST.begin(), SKIP_DATASET_LIST.end(), + n->type_string()) != SKIP_DATASET_LIST.end(); +} + +inline std::string DpTfToGEConversionPassImpl::GetEdgeName(const Edge* e) const { + if (e == nullptr || e->src() == nullptr || e->dst() == nullptr) { + return "invalid_edge"; + } + return strings::StrCat("Edge_from_", e->src()->name(), "_out", + e->src_output(), "_To_", e->dst()->name(), "_in", + e->dst_input()); +} + +inline std::string DpTfToGEConversionPassImpl::GetRandomName( + const std::string& prefix) const { + return strings::StrCat(prefix, "_", GetRandomName()); +} + +std::string DpTfToGEConversionPassImpl::GetRandomName() const { + random::PhiloxRandom philox(random::New64(), random::New64()); + random::SimplePhilox rnd(&philox); + const static size_t RANDOM_LEN = 11; + const static uint32_t CHARACTER_SETS_HEAD = 2; + const static uint32_t CHARACTER_SETS = 3; + const static uint32_t CHARACTER_SET_SIZE[] = { 26, 26, 10 }; // a-z A-Z 0-9 + const static uint32_t ASCII_UP_A = 65; // Ascii of 'A' + const static uint32_t ASCII_LO_A = 97; // Ascii of 'a' + const static uint32_t ASCII_0 = 48; // Ascii of '0' + const static uint32_t ASCII_BASE[] = { ASCII_UP_A, ASCII_LO_A, ASCII_0 }; + string x; + uint32_t setIdx = 0; + for (size_t i = 0; i < RANDOM_LEN; i++) { + if (i == 0) { // Character must not start with 0-9 + setIdx = rnd.Uniform(CHARACTER_SETS_HEAD); + } else { + setIdx = rnd.Uniform(CHARACTER_SETS); + } + uint32_t asciiIdx = rnd.Uniform(CHARACTER_SET_SIZE[setIdx]); + x += ASCII_BASE[setIdx] + asciiIdx; + } + return x; +} + +bool DpTfToGEConversionPassImpl::EndsWith(const std::string& str, + const std::string& suffix) const { + return str.size() >= suffix.size() && + str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; +} + +bool DpTfToGEConversionPassImpl::CheckNode(const std::string& op) const { + std::string suffix_op = "Dataset"; + std::string suffix_op_v2 = "DatasetV2"; + if (EndsWith(op, suffix_op) || EndsWith(op, suffix_op_v2)) { + if (std::find(GE_OPS_WHITELIST.begin(), GE_OPS_WHITELIST.end(), op) != + GE_OPS_WHITELIST.end()) { + return true; + } else { + return false; + } + } else { + if (std::find(GE_FUN_BLACKLIST.begin(), GE_FUN_BLACKLIST.end(), op) == + GE_FUN_BLACKLIST.end()) { + return true; + } else { + return false; + } + } +} + +bool DpTfToGEConversionPassImpl::IsDeviceSupportedOp(const NodeDef& n) const { + const OpRegistrationData* op_reg_data = nullptr; + // Tenorflow original op + if (OpRegistry::Global() == nullptr) { + LOG(ERROR) << "OpRegistry global is nullptr"; + return false; + } + if (OpRegistry::Global()->LookUp(n.op(), &op_reg_data).ok()) { + // Node in GE supported + if (!CheckNode(n.op())) { + LOG(INFO) << "Node [" << n.name() << "] op [" << n.op() + << "] not supported by GE"; + return false; + } else { // Top node supported by GE, check its owned function + for (auto& attr : n.attr()) { + if (attr.second.has_func()) { + if (!IsDeviceSupportedFunc(attr.second.func().name())) { + LOG(INFO) << "Node [" << n.name() << "] function [" + << attr.second.func().name() << "] not supported by GE"; + return false; + } + } + } + } + } else { // Not tenorflow original op, this must be a function node + if (!IsDeviceSupportedFunc(n.op())) { + LOG(INFO) << "Node [" << n.name() << "] op [" << n.op() + << "] is not a supported function by GE"; + return false; + } + } + return true; +} + +bool DpTfToGEConversionPassImpl::IsDeviceSupportedFunc( + const std::string& fn) const { + const FunctionDef* fdef = flib_def_->Find(fn); + // Node contains not found function + if (fdef == nullptr) { + LOG(ERROR) << "Function [" << fn << "] not found"; + return false; + } + // Recursive check function node + for (const NodeDef& node : fdef->node_def()) { + if (!IsDeviceSupportedOp(node)) { + LOG(INFO) << "Function [" << fn << "] node [" << node.name() + << "] not supported by GE"; + return false; + } + } + return true; +} + +Status DpTfToGEConversionPassImpl::GetSplitEdges( + const Node* n, + std::vector& split_edges, + const Edge* last_edge) { + if (IsMakeIteratorNode(n)) { + for (const Edge* e : n->in_edges()) { + REQUIRES_NOT_NULL(e); + if (!IsIteratorNode(e->src())) { + last_edge = e; + LOG(INFO) << strings::StrCat("last edge", GetEdgeName(last_edge)); + } + } + } + // GE supported node, continue find + if (IsDeviceSupportedOp(n->def())) { + for (const Edge* e : n->in_edges()) { + REQUIRES_NOT_NULL(e); + REQUIRES_NOT_NULL(e->src()); + REQUIRES_NOT_NULL(e->dst()); + if (e->IsControlEdge() && !e->src()->IsSource()) { + return errors::InvalidArgument( + "Graph contains control edges witch not from _SOURCE, will not try " + "optimize"); + } + // GE supported node, continue find + if (IsDeviceSupportedOp(e->src()->def())) { + Status s = GetSplitEdges(e->src(), split_edges, last_edge); + if (!s.ok()) { + return s; + } + } else { // GE unsupported node, this is a split edge + LOG(INFO) << strings::StrCat("Split_", GetEdgeName(e)); + LOG(INFO) << "Begin check split edge."; + if (IsSkipDataset(e->dst())) { + LOG(INFO) << "ADD last edge " << GetEdgeName(last_edge); + split_edges.push_back(last_edge); + } else { + LOG(INFO) << "ADD last edge " << GetEdgeName(e); + split_edges.push_back(e); + } + } + } + } + return Status::OK(); +} + +Status DpTfToGEConversionPassImpl::InsertChannelQueue( + Node* topo_end, + std::string& host_queue_name, + std::string& device_queue_name) const { + LOG(INFO) << "Start to insert HostQueueDataset and DeviceQueueDataset."; + for (const Edge* e : split_edges_.at(topo_end)) { + REQUIRES_NOT_NULL(e); + REQUIRES_NOT_NULL(e->src()); + REQUIRES_NOT_NULL(e->dst()); + std::string queue_name = + strings::StrCat("Queue_", GetEdgeName(e), "_", GetRandomName()); + host_queue_name = strings::StrCat("Host", queue_name); + device_queue_name = strings::StrCat("Device", queue_name); + LOG(INFO) << "Add_" << host_queue_name; + // Host and Device queue should save type and shape + auto m_src = e->src()->def().attr(); + bool type_status = false; + string::size_type idx; + idx = SummarizeAttrValue(m_src["output_types"]).find("Unknown AttrValue"); + if (idx == string::npos) { + type_status = true; + } + Node* queue_node_host = nullptr; + // Make sure that 'queue_name' of host and device queue be same + TF_CHECK_OK(NodeBuilder(host_queue_name, "HostQueueDataset") + .Input(e->src(), e->src_output()) // Will be replaced by GEOPDataset later + .Input(e->src(), e->src_output()) + .Device(e->src()->def().device()) + .Attr("channel_name", queue_name) + .Attr("output_types", type_status ? m_src["output_types"] + : m_src["Toutput_types"]) + .Attr("output_shapes", m_src["output_shapes"]) + .Finalize(&*graph_, &queue_node_host)); + REQUIRES_NOT_NULL(queue_node_host); + LOG(INFO) << "Add_" << device_queue_name; + Node* queue_node_device = nullptr; + // Make sure that 'queue_name' of host and device queue be same + TF_CHECK_OK(NodeBuilder(device_queue_name, "DeviceQueueDataset") + .Device(e->dst()->def().device()) + .Attr("channel_name", queue_name) + .Attr("output_types", type_status ? m_src["output_types"] + : m_src["Toutput_types"]) + .Attr("output_shapes", m_src["output_shapes"]) + .Finalize(&*graph_, &queue_node_device)); + REQUIRES_NOT_NULL(queue_node_device); + // 0 means the the 0th output of queue_node_device + REQUIRES_NOT_NULL( + graph_->AddEdge(queue_node_device, 0, e->dst(), e->dst_input())); + } + return Status::OK(); +} + +Status DpTfToGEConversionPassImpl::RemoveNotSupportDataset( + Graph* g,std::string device_queue_dataset, std::string make_iterator) const { + LOG(INFO) << "Begin RemoveSplitDataset."; + // find device_queue_dataset and make_iterator + Node* node = nullptr; + Node* topo_end = nullptr; + for (Node* n : g->op_nodes()) { + if (n->type_string() == "DeviceQueueDataset" && n->name() == device_queue_dataset){ + LOG(INFO) << "device queue dataset node is " << n->name(); + node = n; + } + if (n->type_string() == "MakeIterator" && n->name() == make_iterator) { + LOG(INFO) << "make iterator node is " << n->name(); + topo_end = n; + } + } + Node* end_dataset = node; + std::vector delete_nodes; + while (!IsMakeIteratorNode(node)) { + if (IsSkipDataset(node)) { + delete_nodes.push_back(node); + } else { + end_dataset = node; + } + if (node->num_outputs() != 1) { + LOG(ERROR) << "Invalid node " << node->name() + << ", op is" << node->type_string(); + return errors::InvalidArgument( + "RemoveSplitDataset: find invalid node."); + } + const Edge* edge = nullptr; + for (const Edge* e : node->out_edges()) { + edge = e; + } + REQUIRES_NOT_NULL(edge); + REQUIRES_NOT_NULL(edge->dst()); + node = edge->dst(); + } + if (delete_nodes.empty()) { + LOG(INFO) << "not found unsupported dataset."; + return Status::OK(); + } + for (Node* n : delete_nodes) { + LOG(INFO) << "ready to remove node " << n->name(); + g->RemoveNode(n); + } + LOG(INFO) << "end dataset node is " << end_dataset->name(); + REQUIRES_NOT_NULL(g->AddEdge(end_dataset, 0, topo_end, 0)); + return Status::OK(); +} + +void DpTfToGEConversionPassImpl::RemoveSplitEdges(Node* topo_end) { + for (const Edge* e : split_edges_.at(topo_end)) { + LOG(INFO) << "Remove_" << GetEdgeName(e); + graph_->RemoveEdge(e); + } +} + +bool DpTfToGEConversionPassImpl::GetNodeFuncs( + const FunctionLibraryDefinition* flib_def, Node* node, + std::vector& node_funcs) { + node_funcs.clear(); + for (auto iter = node->attrs().begin(); iter != node->attrs().end(); ++iter) { + if (iter->second.has_func()) { + node_funcs.push_back(iter->second.func().name()); + std::vector func_name_stack; + func_name_stack.clear(); + func_name_stack.push_back(iter->second.func().name()); + while (!func_name_stack.empty()) { + string func_name = func_name_stack.back(); + func_name_stack.pop_back(); + const FunctionDef* fdef = flib_def->Find(func_name); + if (fdef != nullptr) { + for (NodeDef ndef : fdef->node_def()) { + for (auto& item : ndef.attr()) { + if (item.second.has_func()) { + node_funcs.push_back(item.second.func().name()); + func_name_stack.push_back(item.second.func().name()); + continue; + } + } + } + } + } + continue; + } + } + return !node_funcs.empty(); +} + +bool DpTfToGEConversionPassImpl::RunPass(std::unique_ptr *g, FunctionLibraryDefinition *flib, + std::map all_options) { + LOG(INFO) << ">>>> DpTfToGEConversionPassImpl::RunPass <<<<"; + // Convert just for convenient access + split_edges_.clear(); + graph_ = &**g; + flib_def_ = &(*g)->flib_def(); + + // Find split edges from subgraphs, which MakeIterator connect to Itearator op + std::vector topo_ends; + for (Node* node : graph_->op_nodes()) { + if (IsMakeIteratorNode(node)) { + for (Node* in_node : node->in_nodes()) { + if (IsIteratorNode(in_node)) { + topo_ends.push_back(node); + LOG(INFO) << "Insert topo end node " << node->name(); + break; + } + } + } + } + // After traversal, topo_ends should store MakeIterator Nodes. + if (topo_ends.empty()) { + LOG(INFO) << "Do not find MakeIterator <- IteratorV2 connects in the graph," + << " pass datapreprocess pass."; + return true; + } + LOG(INFO) << "Start to write graph's pbtxt before optimization."; + + const char *need_print = getenv("PRINT_MODEL"); + if (nullptr != need_print && strcmp("1", need_print) == 0) { + GraphDef before_graphdef; + (*g)->ToGraphDef(&before_graphdef); + string pre_model_path = "BeforeSubGraph_dp_"; + string + pmodel_path = pre_model_path + std::to_string(graph_run_num_) + ".pbtxt"; + TF_DO_CHECK_OK(WriteTextProto(Env::Default(), pmodel_path, before_graphdef), ERROR); + } + + LOG(INFO) << "Start to optimize dp_init topological graph"; + for (Node* topo_end : topo_ends) { + // Get all edges that should be replace with HostQueue->DeviceQueue + LOG(INFO) << "Start to find split edges, topo_end node is : " + << topo_end->name() << ", op is " << topo_end->type_string(); + const Edge* tmp_edge = nullptr; + TF_DO_CHECK_OK(GetSplitEdges(topo_end, split_edges_[topo_end], tmp_edge), ERROR); + + const string DEFAULT_DEVICE = topo_end->def().device(); + // Start optimize graph + // Insert Host and Device queue + LOG(INFO) << "Start to add host and device queue on split edges"; + std::string host_queue_name; + std::string device_queue_name; + TF_DO_CHECK_OK(InsertChannelQueue(topo_end, host_queue_name, device_queue_name), ERROR); + LOG(INFO) << "host queue name is " << host_queue_name; + LOG(INFO) << "device queue name is " << device_queue_name; + // Remove all split edges + LOG(INFO) << "Start to remove split edges"; + RemoveSplitEdges(topo_end); + + // Make a copy of graph for pruned GE + LOG(INFO) << "Start to prune GE graph"; + std::unique_ptr graph_ge(new (std::nothrow) Graph(OpRegistry::Global())); + if (graph_ge == nullptr) { + LOG(ERROR) << "new graph ge failed"; + return false; + } + CopyGraph(*graph_, &*graph_ge); + // Prune visiable GE graph + std::unordered_set visiable_ge; + for (const Node* n : graph_ge->op_nodes()) { + if (IsMakeIteratorNode(n) && n->name() == topo_end->name()) { + visiable_ge.emplace(n); + break; + } + } + TF_DO_CHECK_OK(RemoveNotSupportDataset(&*graph_ge, device_queue_name, topo_end->name()), ERROR); + + LOG(INFO) << "Start to to PruneForReverseReachability."; + PruneForReverseReachability(&*graph_ge, visiable_ge); + // add function_def begin + LOG(INFO) << "Start to add function_def for GEOP's func"; + FunctionDefLibrary fdeflib; + for (auto node : graph_ge->nodes()) { + std::vector node_funcs; + if (GetNodeFuncs(flib, node, node_funcs)) { + LOG(INFO) << "Node [" << node->name() << "] has func:"; + for (auto func : node_funcs) { + FunctionDef* fdef = fdeflib.add_function(); + *fdef = *(flib->Find(func)); + } + } + } + + // Add required function for GEOPDataset->func(GEOP->func2) topo graph + std::string fn_dpop = GetRandomName("dpop_function"); + std::string fn_geop = GetRandomName("geop_function"); + std::string fn_geop_dataset = GetRandomName("geopdataset_function"); + std::string iterator_name = ""; + for (auto in_node: topo_end->in_nodes()) { + if (in_node == nullptr) { + LOG(ERROR) << "topo end node is nullptr"; + return false; + } + LOG(INFO) << "in_node name is " << in_node->name(); + if (IsIteratorNode(in_node)) { + iterator_name = in_node->name(); + LOG(INFO) << "iterator name is " << iterator_name; + break; + } + } + if (iterator_name.empty()) { + LOG(ERROR) << "There is no connection between MakeIteraotr and IteratorV2"; + return false; + } + // Add dp custom kernel label + for (auto node : graph_ge->nodes()) { + if (node->type_string() == "DeviceQueueDataset") { + node->AddAttr(DP_ITERATOR_MARK, iterator_name); + } + if (std::find(CUSTOMIZE_DATASET_LIST.begin(), CUSTOMIZE_DATASET_LIST.end(), + node->type_string()) != CUSTOMIZE_DATASET_LIST.end()) { + LOG(INFO) << node->name() << " is " << node->type_string() << ", need to add label."; + node->AddAttr("_kernel", "dp"); + node->AddAttr(DP_ITERATOR_MARK, iterator_name); + } + } + // Convert GE graph to GEOP function body + { + LOG(INFO) << "Start to convert GE graph to geop function"; + FunctionDef* fd = fdeflib.add_function(); + TF_DO_CHECK_OK(GraphToFunctionDef(*graph_ge, fn_dpop, fd), ERROR); + } + // Add DPOP node(visable only by function of geop) + { + string func_def_str; + fdeflib.SerializeToString(&func_def_str); + + // DPOP node should created by function of geop + LOG(INFO) << "Start to convert dpop node to geop function"; + FunctionDef* fd = fdeflib.add_function(); + if (fd == nullptr || fd->mutable_signature() == nullptr) { + LOG(ERROR) << "fd is nullptr"; + return false; + } + fd->mutable_signature()->set_name(fn_geop); + NodeDef* n = fd->add_node_def(); + if (n == nullptr) { + LOG(ERROR) << "fd node def is nullptr"; + return false; + } + NameAttrList f_attr; + f_attr.set_name(fn_dpop); + *f_attr.mutable_attr() = n->attr(); + TF_CHECK_OK( + NodeDefBuilder(fn_dpop, "DPOP") + .Input(EMPTY_DEF_INPUT) // No partition dp_init graph on GE + .Device(DEFAULT_DEVICE) + .Attr("function", f_attr) // dpop funcion + .Attr("func_def", func_def_str) + .Attr("Tin", EMPTY_TYPE) + .Attr("Tout", EMPTY_TYPE) + .Attr("Tout", EMPTY_TYPE) + .Finalize(n)); // n is created by function of geop function + } + { + // GEOP node should created by function of geopDataset + LOG(INFO) << "Start to convert geop node to geopdataset function"; + FunctionDef* fd = fdeflib.add_function(); + if (fd == nullptr || fd->mutable_signature() == nullptr) { + LOG(ERROR) << "fd is nullptr"; + return false; + } + fd->mutable_signature()->set_name(fn_geop_dataset); + NodeDef* n = fd->add_node_def(); + if (n == nullptr) { + LOG(ERROR) << "fd node def is nullptr"; + return false; + } + NameAttrList f_attr; + f_attr.set_name(fn_geop); + *f_attr.mutable_attr() = n->attr(); + TF_CHECK_OK( + NodeDefBuilder(GetRandomName("GeOp"), "GeOp") + .Input(EMPTY_DEF_INPUT) // No partition dp_init graph on GE + .Device(DEFAULT_DEVICE) + .Attr("function", f_attr) // geop funcion + .Attr("Tin", EMPTY_TYPE) + .Attr("Tout", EMPTY_TYPE) + .Attr("Tout", EMPTY_TYPE) + .Attr("_enableDP", true) + .Finalize(n)); // n is created by function of geopDataset function + std::string attr_name = ""; + for (auto option : all_options) { + attr_name = std::string("_") + option.first; + AddNodeAttr(attr_name, option.second, n); + } + AddNodeAttr("_NpuOptimizer", "NpuOptimizer", n); + } + // Update graph function libray + LOG(INFO) << "Start to add geop and geopdataset function in graph library"; + // Not a must, just for Tensorbord viewing convenience + graph_->AddFunctionLibrary(fdeflib); + flib->AddLibrary(fdeflib); + + // Add GEOPDataset node to graph_ + std::vector topo_end_input_edges(topo_end->in_edges().begin(), + topo_end->in_edges().end()); + Node* dpgroup_dataset_node = nullptr; + Node* geop_dataset_node = nullptr; + std::unordered_set isolated_nodes; + { + LOG(INFO) << "Start to add geopdataset node in graph"; + const Node* n = nullptr; + for (const Edge* e : topo_end_input_edges) { + if (IsIteratorNode(e->src())) { + n = e->src(); + } + } + if (n == nullptr) { + LOG(ERROR) << "edge src is nullptr"; + return false; + } + auto m_src = n->def().attr(); + + NameAttrList f_attr; + f_attr.set_name(fn_geop_dataset); + // Combine all host queue dataset with GEOPDataset + std::vector inputs; + for (Node* n : graph_->op_nodes()) { + // host tf makeiterator add dp label + if (IsMakeIteratorNode(n)) { + n->AddAttr("_kernel", "dp"); + } + if (n->type_string().compare("HostQueueDataset") == 0 && + n->name() == host_queue_name) { + // 0: Host queue always generate one dataset + LOG(INFO) << "inputs add node : name is " << n->name() + << ", op is " << n->type_string(); + inputs.push_back(NodeBuilder::NodeOut(n, 0)); + } + if (n->type_string().compare("DeviceQueueDataset") == 0 && + n->name() == device_queue_name) { + isolated_nodes.insert(n); + } + } + + TF_CHECK_OK( + NodeBuilder(GetRandomName("DPGroupDataset"), "DPGroupDataset") + .Input(inputs) // All host queue flow into geopDataset for driver + .Device(DEFAULT_DEVICE) + .Attr("output_types", m_src["output_types"]) + .Attr("output_shapes", m_src["output_shapes"]) + .Finalize(&*graph_, + &dpgroup_dataset_node)); // Finalize geopDataset in graph_ + TF_CHECK_OK( + NodeBuilder(GetRandomName("GeopDataset"), "GEOPDataset") + .Device(DEFAULT_DEVICE) + .Attr("f", f_attr) // geopDataset function + .Finalize(&*graph_, + &geop_dataset_node)); // Finalize geopDataset in graph_ + + for (Node* n : graph_->op_nodes()) { + if (n->type_string().compare("HostQueueDataset") == 0 && + n->name() == host_queue_name) { + graph_->RemoveEdge(*(n->in_edges().begin())); + graph_->AddEdge(geop_dataset_node, 0, n, 0); + } + } + } + // Remove all edges flow to MakeIterator except the one from IteratorV2 + LOG(INFO) << "Start to combine geopdataset with iterator node and remove " + "orignal edges"; + + // We must copy all topoend input edges as we can't modify it when combine + // geopdataset an topoend + for (const Edge* e : topo_end_input_edges) { + if (!IsIteratorNode(e->src())) { + CHECK_NOTNULL( + graph_->AddEdge(dpgroup_dataset_node, 0, e->dst(), e->dst_input())); + LOG(INFO) << "Remove_" << GetEdgeName(e); + graph_->RemoveEdge(e); + } + } + // Prune for the final optimized graph + LOG(INFO) << "Start to prune final optimized graph"; + + RemoveIsolatedNode(&*graph_, isolated_nodes); + LOG(INFO) << "Start to assign unassigned node on default device"; + // We do pass after assign, so we must assign all new added nodes + for (Node* n : (*g)->op_nodes()) { + if (n->assigned_device_name().empty()) { + // Use device of MakeIterator node as default + n->set_assigned_device_name(DEFAULT_DEVICE); + LOG(INFO) << "Assigned node [" << n->name() << "] on device [" + << n->assigned_device_name() << "]"; + } + } + } + + LOG(INFO) << "End optimize dp_init topological graph"; + if (nullptr != need_print && strcmp("1", need_print) == 0) { + GraphDef after_graphdef; + (*g)->ToGraphDef(&after_graphdef); + string suffix_model_path = "AfterSubGraph_dp_"; + string smodel_path = + suffix_model_path + std::to_string(graph_run_num_) + ".pbtxt"; + TF_DO_CHECK_OK(WriteTextProto(Env::Default(), smodel_path, after_graphdef), ERROR); + } + return true; +} + +bool DpTfToGEConversionPassImpl::RemoveIsolatedNode( + Graph* g, std::unordered_set visited) { + // Compute set of nodes that we need to traverse in order to reach + // the nodes in "nodes" by performing a breadth-first search from those + // nodes, and accumulating the visited nodes. + std::deque queue; + for (Node* n : visited) { + VLOG(2) << "Reverse reach init: " << n->name(); + queue.push_back(n); + } + while (!queue.empty()) { + Node* n = queue.front(); + queue.pop_front(); + for (Node* out : n->out_nodes()) { + if (visited.insert(out).second) { + queue.push_back(out); + VLOG(2) << "Reverse reach : " << n->name() << " from " << out->name(); + } + } + } + + // Make a pass over the graph to remove nodes in "visited" + std::vector all_nodes; + all_nodes.reserve(g->num_nodes()); + for (Node* n : g->nodes()) { + all_nodes.push_back(n); + } + + bool any_removed = false; + for (Node* n : all_nodes) { + if (visited.count(n) != 0) { + g->RemoveNode(n); + any_removed = true; + } + } + + return any_removed; +} + +Status DpTfToGEConversionPassImpl::Run( + const GraphOptimizationPassOptions& options) { + if ((options.graph == nullptr && options.partition_graphs == nullptr) || + options.flib_def == nullptr) { + return Status::OK(); + } + + Status s = Status::OK(); + if (options.graph != nullptr) { + std::unique_ptr* graph = options.graph; + FunctionLibraryDefinition *func_lib = options.flib_def; + s = ProcessGraph(graph, func_lib, OptimizationPassRegistry::POST_REWRITE_FOR_EXEC); + if (s != Status::OK()) { + return s; + } + } else if (options.partition_graphs != nullptr) { + for (auto& pg : *options.partition_graphs) { + std::unique_ptr* graph = &pg.second; + FunctionLibraryDefinition *func_lib = options.flib_def; + s = ProcessGraph(graph, func_lib, OptimizationPassRegistry::POST_PARTITIONING); + if (s != Status::OK()) { + return s; + } + } + } + + return Status::OK(); +} + +static std::atomic graph_run_num(1); +static mutex graph_num_mutex(LINKER_INITIALIZED); +Status DpTfToGEConversionPass::Run(const GraphOptimizationPassOptions& options) { + + return DpTfToGEConversionPassImpl().Run(options); +} + +Status DpTfToGEConversionPassImpl::ProcessGraph(std::unique_ptr* graph, FunctionLibraryDefinition *func_lib, + const OptimizationPassRegistry::Grouping pass_group_value) +{ + int64 startTime = InferShapeUtil::GetCurrentTimestap(); + + graph_run_num_ = graph_run_num++; + + if (graph == nullptr) { + return Status::OK(); + } + + for (Node* n : graph->get()->nodes()) { + REQUIRES_NOT_NULL(n); + if (n->attrs().Find("_NoNeedOptimize")) { + LOG(INFO) << "Found mark of noneed optimize on node [" << n->name() << "], skip DpTfToGEConversionPass."; + return Status::OK(); + } + } + + std::map all_options; + std::map pass_options; + pass_options = NpuAttrs::GetDefaultPassOptions(); + + for (Node *n : graph->get()->nodes()) { + REQUIRES_NOT_NULL(n); + if (n->attrs().Find("_NpuOptimizer")) { + pass_options = NpuAttrs::GetPassOptions(n->attrs()); + all_options = NpuAttrs::GetAllAttrOptions(n->attrs()); + break; + } + } + std::string job = pass_options["job"]; + if (job == "ps" || job == "default") { + LOG(INFO) << "job is " << job << " Skip the optimizer : DpTfToGEConversionPass."; + return Status::OK(); + } + if (job == "localhost" && + pass_group_value != OptimizationPassRegistry::POST_REWRITE_FOR_EXEC) { + return Status::OK(); + } + if (job != "localhost" && + pass_group_value != OptimizationPassRegistry::POST_PARTITIONING) { + return Status::OK(); + } + + bool enableDP = (pass_options["enable_dp"] == "1"); + bool use_off_line = (pass_options["use_off_line"] == "1"); + bool do_npu_optimizer = (pass_options["do_npu_optimizer"] == "1"); + if (do_npu_optimizer) { + if (!use_off_line) { + LOG(INFO) << "Run online process and skip the optimizer"; + return Status::OK(); + } + } else { + return Status::OK(); + } + + if (!enableDP) { + LOG(INFO) << "DpTfToGEConversionPassImpl::RunPass, enable data preproc is false"; + return Status::OK(); + } + auto process_graph = [&](std::unique_ptr *g, FunctionLibraryDefinition *flib, std::map all_options) { + RunPass(g, flib, all_options); + }; + + // For any pre-partitioning phase, graph is stored in options.graph. + process_graph(graph, func_lib, all_options); + int64 endTime = InferShapeUtil::GetCurrentTimestap(); + LOG(INFO) << "DpTfToGEConversionPassImpl Run success. [" + << ((endTime - startTime) / kMicrosToMillis) << " ms]"; + + return Status::OK(); +} + +// We register DpTfToGE insertion for phase 102 in POST_PARTITIONING grouping +REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, 3, + DpTfToGEConversionPass); +REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_PARTITIONING, 102, + DpTfToGEConversionPass); +} // namespace tensorflow diff --git a/tf_adapter/optimizers/dp_tf_ge_conversion_pass.h b/tf_adapter/optimizers/dp_tf_ge_conversion_pass.h new file mode 100644 index 000000000..4b040ea31 --- /dev/null +++ b/tf_adapter/optimizers/dp_tf_ge_conversion_pass.h @@ -0,0 +1,36 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_DP_TF_GE_CONVERSION_PASS_H_ +#define TENSORFLOW_DP_TF_GE_CONVERSION_PASS_H_ + +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { +class DpTfToGEConversionPass : public GraphOptimizationPass { + public: + DpTfToGEConversionPass() = default; + ~DpTfToGEConversionPass() = default; + Status Run(const GraphOptimizationPassOptions &options) override; +}; +} // namespace tensorflow + +//#endif // HISI_OFFLINE + +#endif // TENSORFLOW_DP_TF_GE_CONVERSION_PASS_H_ diff --git a/tf_adapter/optimizers/get_attr_optimize_pass.cc b/tf_adapter/optimizers/get_attr_optimize_pass.cc new file mode 100644 index 000000000..cb6e27909 --- /dev/null +++ b/tf_adapter/optimizers/get_attr_optimize_pass.cc @@ -0,0 +1,147 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/common_runtime/shape_refiner.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/graph_def_util.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/graph_def_builder.h" +#include "tensorflow/core/graph/tensor_id.h" +#include "tensorflow/core/lib/gtl/flatset.h" +#include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/lib/hash/hash.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/public/version.h" +#include "tensorflow/core/util/device_name_utils.h" +#include "tensorflow/core/graph/control_flow.h" +#include "tensorflow/compiler/jit/graphcycles/graphcycles.h" +#include "tf_adapter/util/npu_ops_identifier.h" +#include "tf_adapter/util/infershape_util.h" +#include "tf_adapter/util/npu_attrs.h" +#include "tf_adapter/common/common.h" + +namespace tensorflow { +static const int64 kMicrosToMillis = 1000; +static std::atomic graph_run_num(1); +static mutex graph_num_mutex(LINKER_INITIALIZED); + +class GetAttrOptimizePass : public GraphOptimizationPass { + public: + GetAttrOptimizePass() = default; + ~GetAttrOptimizePass() = default; + Status Run(const GraphOptimizationPassOptions &options) override; +}; + +Status GetAttrOptimizePass::Run(const GraphOptimizationPassOptions &options) { + if (options.graph == nullptr || options.flib_def == nullptr || + options.session_options == nullptr) { + return Status::OK(); + } + int graph_num; + graph_num = graph_run_num++; + + std::map pass_options = NpuAttrs::GetPassOptions(options); + std::string job = pass_options["job"]; + LOG(INFO) << "NpuAttrs job is " << job; + if (job == "ps" || job == "default") { + LOG(INFO) << "job is " << job << " Skip the optimizer : GetAttrOptimizePass."; + return Status::OK(); + } + + for (Node *n : options.graph->get()->nodes()) { + REQUIRES_NOT_NULL(n); + if (n->attrs().Find("_NoNeedOptimize")) { + LOG(INFO) << "Found mark of noneed optimize on node [" << n->name() << "], skip AddSrcOpAttrPass."; + return Status::OK(); + } + + if (n->attrs().Find("_NpuOptimizer")) { + LOG(INFO) << "Found mark of get attr optimize on node [" << n->name() << "], skip AddSrcOpAttrPass."; + return Status::OK(); + } + } + + int64 startTime = InferShapeUtil::GetCurrentTimestap(); + char *need_print = getenv("PRINT_MODEL"); + + if (need_print != nullptr && strcmp("1", need_print) == 0) { + GraphDef ori_graph_def; + options.graph->get()->ToGraphDef(&ori_graph_def); + string ori_model_path = "BeforeGetAttrOptimize_"; + string omodel_path = ori_model_path + std::to_string(graph_num) + ".pbtxt"; + Status status_out = WriteTextProto(Env::Default(), omodel_path, ori_graph_def); + } + + for (Node *n : options.graph->get()->nodes()) { + REQUIRES_NOT_NULL(n); + std::string device_name = n->assigned_device_name(); + if (device_name != "" && device_name.find("/job:ps") == std::string::npos) { + Status s = NpuAttrs::SetNpuOptimizerAttr(options, n); + if (s != Status::OK()) { + LOG(INFO) << "set npu optimizer error."; + return s; + } + break; + } + } + + if (job != "localhost") { + for (Node *node : options.graph->get()->op_nodes()) { + REQUIRES_NOT_NULL(node); + std::string device_name = ""; + device_name = node->assigned_device_name(); + if (device_name.find("/device:") != std::string::npos) { + device_name = device_name.substr(0, device_name.find("/device:")); + device_name += std::string("/device:CPU:0"); + } + node->set_assigned_device_name(device_name); + } + } + + if (need_print != nullptr && strcmp("1", need_print) == 0) { + GraphDef omg_graph_def; + options.graph->get()->ToGraphDef(&omg_graph_def); + string tmpmodel_path = "AfterGetAttrOptimize_"; + string tmodel_path = tmpmodel_path + std::to_string(graph_num) + ".pbtxt"; + Status status_o = WriteTextProto(Env::Default(), tmodel_path, omg_graph_def); + } + int64 endTime = InferShapeUtil::GetCurrentTimestap(); + LOG(INFO) << "GetAttrOptimizePass_" << std::to_string(graph_num) << " success. [" + << ((endTime - startTime) / kMicrosToMillis) << " ms]"; + + return Status::OK(); +} + +REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, 0, + GetAttrOptimizePass); +} // namespace tensorflow diff --git a/tf_adapter/optimizers/gradient_fusion_optimizer.cc b/tf_adapter/optimizers/gradient_fusion_optimizer.cc new file mode 100644 index 000000000..fdfcc9ff6 --- /dev/null +++ b/tf_adapter/optimizers/gradient_fusion_optimizer.cc @@ -0,0 +1,347 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tf_adapter/optimizers/gradient_fusion_optimizer.h" +#include "tf_adapter/common/common.h" + +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/utils/topological_sort.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/util/overflow.h" + +namespace tensorflow { +namespace grappler { +const string OP_TYPE_BROADCAST = "HcomBroadcast"; + +const string DATA_TYPE_ATTR = "T"; +const string SHAPE_ATTR = "shape"; +const string GROUP_ATTR = "group"; +const string ROOT_RANK_ATTR = "root_rank"; +const string REDUCE_TYPE_ATTR = "reduction"; + +Status SizeofDataType(DataType dType, size_t &size) { + static const std::map dataTypeSizeMap = + { + {DT_INVALID, 0}, {DT_FLOAT, 4}, + {DT_DOUBLE, 8}, {DT_INT32, 4}, + {DT_UINT8, 1}, {DT_INT16, 2}, + {DT_INT8, 1}, {DT_COMPLEX64, 8}, + {DT_INT64, 8}, {DT_BOOL, 1}, + {DT_QINT8, 8}, {DT_QUINT8, 8}, + {DT_QINT32, 4}, {DT_BFLOAT16, 2}, + {DT_QINT16, 2}, {DT_QUINT16, 2}, + {DT_UINT16, 2}, {DT_COMPLEX128, 16}, + {DT_HALF, 2}, {DT_UINT32, 4}, + {DT_UINT64, 8}, {DT_FLOAT_REF, 4}, + {DT_DOUBLE_REF, 8}, {DT_INT32_REF, 4}, + {DT_UINT8_REF, 1}, {DT_INT16_REF, 2}, + {DT_INT8_REF, 1}, {DT_COMPLEX64_REF, 8}, + {DT_INT64_REF, 8}, {DT_BOOL_REF, 1}, + {DT_QINT8_REF, 1}, {DT_QUINT8_REF, 1}, + {DT_QINT32_REF, 4}, {DT_BFLOAT16_REF, 2}, + {DT_QINT16_REF, 2}, {DT_QUINT16_REF, 2}, + {DT_UINT16_REF, 2}, {DT_COMPLEX128_REF, 16}, + {DT_HALF_REF, 2}, {DT_UINT32_REF, 4}, + {DT_UINT64_REF, 8}, + }; + auto iter = dataTypeSizeMap.find(dType); + if (iter != dataTypeSizeMap.end()) { + size = iter->second; + return Status::OK(); + } + return errors::InvalidArgument("data type not support"); +} + +bool GradFusionOptimizer::IsHcomOp(const NodeDef &nodeDef) { + if (nodeDef.op() == OP_TYPE_BROADCAST) { + return true; + } else { + return false; + } +} + +string GetNodeScopeName(NodeDef &nodeDef) { + string::size_type pos = nodeDef.name().find_last_of("/"); + if (pos == string::npos) { + return string(""); + } + return nodeDef.name().substr(0, pos + 1); +} + +Status GradFusionOptimizer::GetInputTensorSize(NodeDef &nodeDef, + int64_t &inputTensorSize) { + int64_t inputTensorSizeRet = 0; + int64_t inputTensorSizeRetTmp = 1; + string err(""); + TensorShapeProto shape; + auto attrMap = nodeDef.attr(); + int inputsNameSize = nodeDef.input_size(); + if (inputsNameSize > 0) { + for (int i = 0; i < inputsNameSize; i++) { + string inputNodeName = nodeDef.input(i); + if (inputNodeName == "") { + LOG(INFO) << "Cannot get input node name, curr node : " + << nodeDef.name() << " index: " << i; + continue; + } + NodeDef inputNode = nameToNode_[inputNodeName]; + shape = (*inputNode.mutable_attr())[SHAPE_ATTR].shape(); + if (!shape.unknown_rank()) { + inputTensorSizeRetTmp = 1; + for (auto dim : shape.dim()) { + if (dim.size() == -1) { + err = string("Unknow size"); + break; + } + inputTensorSizeRetTmp = MultiplyWithoutOverflow(inputTensorSizeRetTmp, + dim.size()); + if (inputTensorSizeRetTmp < 0) { + return errors::InvalidArgument("input tensor size is overflow"); + } + } + } else { + err = string("Unknow rank"); + } + if (err != "") { + break; + } + if (INT64_MAX - inputTensorSizeRetTmp < inputTensorSizeRet) { + return errors::InvalidArgument("input tensor size is overflow"); + } + inputTensorSizeRet += inputTensorSizeRetTmp; + } + } else { + err = string("Doesnot have the attr of \'inputs_name\'"); + } + if (err != "") { + return errors::InvalidArgument( + "GetInputTensorSize failed. Node name: ", nodeDef.name(), + ", error msg: ", err); + } + + DataType dType = (*nodeDef.mutable_attr())[DATA_TYPE_ATTR].list().type(0); + size_t size = 0; + Status ret = SizeofDataType(dType, size); + REQUIRES_STATUS_OK(ret); + inputTensorSize = MultiplyWithoutOverflow(inputTensorSizeRet, size); + if (inputTensorSize < 0) { + return errors::InvalidArgument("input tensor size is overflow"); + } + return Status::OK(); +} + +Status GradFusionOptimizer::SetHcomBroadcastAttr(NodeDef *fusionNode, NodeDef &originNode) { + std::set &fusionBroadcastName = fusionOpPool_[fusionNode->op()]; + DataType dType = (*originNode.mutable_attr())[DATA_TYPE_ATTR].list().type(0); + if (fusionBroadcastName.count(fusionNode->name()) == 0) { + int64 rootRank = (*originNode.mutable_attr())[ROOT_RANK_ATTR].i(); + (*fusionNode->mutable_attr())[ROOT_RANK_ATTR].set_i(rootRank); + string group = (*originNode.mutable_attr())[GROUP_ATTR].s(); + (*fusionNode->mutable_attr())[GROUP_ATTR].set_s(group); + fusionBroadcastName.insert(fusionNode->name()); + } + // All input data types are the same. + (*fusionNode->mutable_attr())[DATA_TYPE_ATTR].mutable_list()->add_type(dType); + return Status::OK(); +} + +Status GradFusionOptimizer::SetFusionNodeAttr(NodeDef *fusionNode, NodeDef &originNode) { + string opType = fusionNode->op(); + if (opType == OP_TYPE_BROADCAST) { + return SetHcomBroadcastAttr(fusionNode, originNode); + } + return Status::OK(); +} + +Status GradFusionOptimizer::FusionOp(std::vector fusionHcomOps, + GraphDef *graphDef) { + std::set < NodeDef * > outputsSet; + string opType = fusionHcomOps[0].op(); + if (fusionOpInfo_.count(opType) == 0) { + fusionOpInfo_[opType].push_back(std::make_pair(0, "")); + } + int &fusionOpIndex = fusionOpInfo_[opType].back().first; + string &fusionOpName = fusionOpInfo_[opType].back().second; + string scope = GetNodeScopeName(fusionHcomOps[0]); + string fusionNodeName = scope + "FusionNode_" + opType + "_" + std::to_string(fusionOpIndex); + + // add fusion node + NodeDef *fusionNode = graphDef->add_node(); + REQUIRES_NOT_NULL(fusionNode); + fusionNode->set_name(fusionNodeName); + fusionNode->set_device(fusionHcomOps[0].device()); + fusionNode->set_op(opType); + + LOG(INFO) << "INFO: GradFusionOptimizer::FusionOp New FusionNodeName: " << fusionNodeName; + int fusionOutputIdx = 0; + std::set fusionInputs; + std::set fusionCtrlInputs; + for (auto &nodeDef : fusionHcomOps) { + for (string input : nodeDef.input()) { + if (fusionInputs.count(input) == 1) { + continue; + } + // Control dependencies must come after regular dependencies + if (!input.empty() && str_util::StartsWith(input, "^")) { + fusionCtrlInputs.insert(input); + continue; + } + fusionNode->add_input(input); + TF_RETURN_IF_ERROR(SetFusionNodeAttr(fusionNode, nodeDef)); + fusionInputs.insert(input); + } + fusionInputs.clear(); + + // outputs + outputsSet = nodeMap_->GetOutputs(nodeDef.name()); + for (auto outNodeDef : outputsSet) { + int idx = 0; + for (auto input : outNodeDef->input()) { + if (input.find(nodeDef.name()) == std::string::npos) { + idx++; + continue; + } + if (input.find("^") == std::string::npos) { + outNodeDef->set_input(idx, fusionNodeName + ":" + std::to_string(fusionOutputIdx)); + fusionOutputIdx++; + } else { + outNodeDef->set_input(idx, "^" + fusionNodeName); + } + } + } + + // delete origin nodes + ::google::protobuf::RepeatedPtrField<::tensorflow::NodeDef> *nodeList = graphDef->mutable_node(); + for (::google::protobuf::RepeatedPtrField<::tensorflow::NodeDef>::const_iterator const_it = nodeList->begin(); + const_it != nodeList->end(); ++const_it) { + if (const_it->name() == nodeDef.name()) { + nodeList->erase(const_it); + break; + } + } + } + if (!fusionCtrlInputs.empty()) { + for (string ctrlInput : fusionCtrlInputs) { + fusionNode->add_input(ctrlInput); + } + } + fusionCtrlInputs.clear(); + + // add control edges + if (fusionOpIndex >= 1) { + fusionNode->add_input("^" + fusionOpName); + } + + // update fusion op info. + fusionOpIndex++; + fusionOpName = fusionNodeName; + + return Status::OK(); +} + +int64 GradFusionOptimizer::GetFusionTensorSize() { + const char *env = getenv("FUSION_TENSOR_SIZE"); + // default (50KBytes) + const int64 fusionTensorSizeDefault = 524288000; + if (env == nullptr || strlen(env) >= ADAPTER_ENV_MAX_LENTH) { + return fusionTensorSizeDefault; + } + string envSize(env); + std::istringstream ss(envSize); + int64 fusionTensorSize; + if (!(ss >> fusionTensorSize)) { + fusionTensorSize = fusionTensorSizeDefault; + } + return fusionTensorSize; +} + +Status GradFusionOptimizer::Optimize(Cluster *cluster, const GrapplerItem &item, + GraphDef *optimizedGraph) { + REQUIRES_NOT_NULL(optimizedGraph); + const int64 fusionTensorSize = GetFusionTensorSize(); + GraphDef graphOrigin; + std::map, std::vector> fusionHcomOps; + std::map, int64_t> currentGradSumSize; + *optimizedGraph = item.graph; + LOG(INFO) << "INFO: GradFusionOptimizer::Optimize begin, OriginNodeNum: " << item.graph.node_size(); + LOG(INFO) << "INFO: FUSION_TENSOR_SIZE: " << fusionTensorSize; + + if (fusionTensorSize < 0) { + return errors::InvalidArgument("FUSION_TENSOR_SIZE is invalid"); + } + + REQUIRES_STATUS_OK(TopologicalSort(optimizedGraph)); + nodeMap_.reset(new (std::nothrow) NodeMap(optimizedGraph)); + REQUIRES_NOT_NULL(nodeMap_); + fusionOpInfo_.clear(); + fusionOpPool_.clear(); + graphOrigin = *optimizedGraph; + for (const auto &nodeDef : graphOrigin.node()) { + nameToNode_[nodeDef.name()] = nodeDef; + } + + for (const auto &nodeDef : graphOrigin.node()) { + if (IsHcomOp(nodeDef)) { + DataType dType; + auto attrMap = nodeDef.attr(); + auto iter = attrMap.find(DATA_TYPE_ATTR); + if (iter != attrMap.end()) { + dType = iter->second.list().type(0); + } else { + LOG(INFO) << "INFO: Use default dataType: DT_FLOAT"; + dType = DT_FLOAT; + } + std::pair key = std::make_pair(nodeDef.op(), dType); + + fusionHcomOps[key].push_back(nodeDef); + int64_t inputTensorSize = 0; + NodeDef tmpNode = nodeDef; + TF_RETURN_IF_ERROR(GetInputTensorSize(tmpNode, inputTensorSize)); + if (currentGradSumSize.count(key) != 0) { + if (INT64_MAX - inputTensorSize < currentGradSumSize[key]) { + return errors::InvalidArgument("input tensor size is overflow"); + } + currentGradSumSize[key] += inputTensorSize; + } else { + currentGradSumSize[key] = inputTensorSize; + } + if (currentGradSumSize[key] >= fusionTensorSize) { + if (fusionHcomOps[key].size() > 1) { + TF_RETURN_IF_ERROR(FusionOp(fusionHcomOps[key], optimizedGraph)); + } + fusionHcomOps[key].clear(); + currentGradSumSize[key] = 0; + } + } + } + + for (auto iter : fusionHcomOps) { + if (!iter.second.empty()) { + if (iter.second.size() > 1) { + TF_RETURN_IF_ERROR(FusionOp(iter.second, optimizedGraph)); + } + iter.second.clear(); + } + } + LOG(INFO) << "INFO: GradFusionOptimizer::Optimize end, finalNodeNum: " << optimizedGraph->node_size(); + + return Status::OK(); +} + +REGISTER_GRAPH_OPTIMIZER(GradFusionOptimizer); +} // end namespace grappler +} // end namespace tensorflow diff --git a/tf_adapter/optimizers/gradient_fusion_optimizer.h b/tf_adapter/optimizers/gradient_fusion_optimizer.h new file mode 100644 index 000000000..e844c8b8a --- /dev/null +++ b/tf_adapter/optimizers/gradient_fusion_optimizer.h @@ -0,0 +1,69 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_GRAPPLER_OPTIMIZERS_GRADIENT_FUSION_OPTIMIZER_H_ +#define TENSORFLOW_GRAPPLER_OPTIMIZERS_GRADIENT_FUSION_OPTIMIZER_H_ + +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" +#include "tensorflow/core/grappler/utils.h" + +namespace tensorflow { +namespace grappler { +class GradFusionOptimizer : public CustomGraphOptimizer { + public: + + GradFusionOptimizer() { + fusionOpInfo_.clear(); + fusionOpPool_.clear(); + } + + ~GradFusionOptimizer() {} + + string name() const { + return "GradFusionOptimizer"; + } + + Status Init(const tensorflow::RewriterConfig_CustomGraphOptimizer * + config = nullptr) { + return Status::OK(); + } + + bool UsesFunctionLibrary() const override { return false; } + + Status Optimize(Cluster *cluster, const GrapplerItem &item, + GraphDef *optimizedGraph) override; + + void Feedback(Cluster *cluster, const GrapplerItem &item, + const GraphDef &optimizedGraph, double result) override {} + + private: + bool IsHcomOp(const NodeDef &nodeDef); + Status GetInputTensorSize(NodeDef &nodeDef, int64_t &inputTensorSize); + Status FusionOp(std::vector fusionHcclOps, GraphDef *graphDef); + Status SetFusionNodeAttr(NodeDef *fusionNode, NodeDef &originNode); + Status SetHcomBroadcastAttr(NodeDef *fusionNode, NodeDef &originNode); + int64 GetFusionTensorSize(); + + std::unique_ptr nodeMap_; + std::map>> + fusionOpInfo_; + std::map nameToNode_; + std::map> fusionOpPool_; +}; +} // end namespace grappler +} // end namespace tensorflow + +#endif // TENSORFLOW_GRAPPLER_OPTIMIZERS_GRADIENT_FUSION_OPTIMIZER_H_ \ No newline at end of file diff --git a/tf_adapter/optimizers/mark_noneed_optimize_pass.cc b/tf_adapter/optimizers/mark_noneed_optimize_pass.cc new file mode 100644 index 000000000..a7c3bbf64 --- /dev/null +++ b/tf_adapter/optimizers/mark_noneed_optimize_pass.cc @@ -0,0 +1,124 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tf_adapter/util/npu_attrs.h" +#include "tf_adapter/common/common.h" + +namespace tensorflow { + +class MarkNoNeedOptimizePass : public GraphOptimizationPass { + public: + MarkNoNeedOptimizePass() = default; + ~MarkNoNeedOptimizePass() = default; + Status Run(const GraphOptimizationPassOptions &options) override; +private: +Status ProcessGraph(std::unique_ptr* graph, FunctionLibraryDefinition *func_lib, + const OptimizationPassRegistry::Grouping pass_group_value); +}; + +Status MarkNoNeedOptimizePass::Run(const GraphOptimizationPassOptions &options) { + if ((options.graph == nullptr && options.partition_graphs == nullptr) || + options.flib_def == nullptr) { + return Status::OK(); + } + + Status s = Status::OK(); + if (options.graph != nullptr) { + std::unique_ptr* graph = options.graph; + FunctionLibraryDefinition *func_lib = options.flib_def; + s = ProcessGraph(graph, func_lib, OptimizationPassRegistry::POST_REWRITE_FOR_EXEC); + if (s != Status::OK()) { + return s; + } + } else if (options.partition_graphs != nullptr) { + for (auto& pg : *options.partition_graphs) { + std::unique_ptr* graph = &pg.second; + FunctionLibraryDefinition *func_lib = options.flib_def; + s = ProcessGraph(graph, func_lib, OptimizationPassRegistry::POST_PARTITIONING); + if (s != Status::OK()) { + return s; + } + } + } + + return Status::OK(); +} + +Status MarkNoNeedOptimizePass::ProcessGraph(std::unique_ptr* graph, FunctionLibraryDefinition *func_lib, + const OptimizationPassRegistry::Grouping pass_group_value) +{ + if (graph == nullptr) { + return Status::OK(); + } + + for (Node *n : graph->get()->nodes()) { + if (n != nullptr && n->attrs().Find("_NoNeedOptimize")) { + LOG(INFO) << "Found mark of noneed optimize on node [" << n->name() << "], skip MarkNoNeedOptimizePass."; + return Status::OK(); + } + } + + std::string job = ""; + std::map pass_options; + pass_options = NpuAttrs::GetDefaultPassOptions(); + for (Node *n : graph->get()->nodes()) { + REQUIRES_NOT_NULL(n); + if (n->attrs().Find("_NpuOptimizer")) { + pass_options = NpuAttrs::GetPassOptions(n->attrs()); + break; + } + } + + job = pass_options["job"]; + if (job == "ps" || job == "default") { + LOG(INFO) << "job is " << job << " Skip the optimizer : MarkNoNeedOptimizePass."; + return Status::OK(); + } + if (job == "localhost" && + pass_group_value != OptimizationPassRegistry::POST_REWRITE_FOR_EXEC) { + return Status::OK(); + } + if (job != "localhost" && + pass_group_value != OptimizationPassRegistry::POST_PARTITIONING) { + return Status::OK(); + } + + bool mix_compile_mode = pass_options["mix_compile_mode"] == "1"; + int iterations_per_loop = std::atoi(pass_options["iterations_per_loop"].c_str()); + LOG(INFO) << "mix_compile_mode is " << (mix_compile_mode ? "True" : "False"); + LOG(INFO) << "iterations_per_loop is " << iterations_per_loop; + + for (const auto &func_name : func_lib->ListFunctionNames()) { + FunctionDef *fdef = const_cast(func_lib->Find(func_name)); + if (fdef == nullptr) continue; + LOG(INFO) << "Mark function as no need optimize [" << fdef->signature().name() << "]"; + for (NodeDef &ndef : *fdef->mutable_node_def()) { + (*ndef.mutable_attr())["_NoNeedOptimize"].set_b(true); + } + } + + return Status::OK(); +} +REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, 1, + MarkNoNeedOptimizePass); +REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_PARTITIONING, 100, + MarkNoNeedOptimizePass); +} // namespace tensorflow diff --git a/tf_adapter/optimizers/mark_start_node_pass.cc b/tf_adapter/optimizers/mark_start_node_pass.cc new file mode 100644 index 000000000..f144bcddd --- /dev/null +++ b/tf_adapter/optimizers/mark_start_node_pass.cc @@ -0,0 +1,208 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/common_runtime/shape_refiner.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/graph_def_util.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/graph_def_builder.h" +#include "tensorflow/core/graph/tensor_id.h" +#include "tensorflow/core/lib/gtl/flatset.h" +#include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/lib/hash/hash.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/public/version.h" +#include "tensorflow/core/util/device_name_utils.h" +#include "tensorflow/core/graph/control_flow.h" +#include "tensorflow/compiler/jit/graphcycles/graphcycles.h" +#include "tf_adapter/util/npu_ops_identifier.h" +#include "tf_adapter/util/infershape_util.h" +#include "tf_adapter/util/npu_attrs.h" +#include "tf_adapter/common/common.h" + +namespace tensorflow { +static const int64 kMicrosToMillis = 1000; +static std::atomic graph_run_num(1); +static mutex graph_num_mutex(LINKER_INITIALIZED); + +std::set string_split(const string &str, const string &pattern) { + std::set resultSet; + string::size_type pos1, pos2; + pos2 = str.find(pattern); + pos1 = 0; + while (pos2 != string::npos) { + resultSet.insert(str.substr(pos1, pos2 - pos1)); + pos1 = pos2 + pattern.size(); + pos2 = str.find(pattern, pos1); + } + if (pos1 != str.length()) { + resultSet.insert(str.substr(pos1)); + } + return resultSet; +} + +class MarkStartNodePass : public GraphOptimizationPass { + public: + MarkStartNodePass() = default; + ~MarkStartNodePass() = default; + Status Run(const GraphOptimizationPassOptions &options) override; + Status TraverseNode(Node *start_node); +}; + +Status MarkStartNodePass::Run(const GraphOptimizationPassOptions& options) { + int graph_num; + graph_num = graph_run_num++; + + if (options.graph == nullptr || options.flib_def == nullptr || + options.session_options == nullptr) { + return Status::OK(); + } + + std::map pass_options = NpuAttrs::GetPassOptions(options); + std::string job = pass_options["job"]; + if (job == "ps" || job == "default" || job == "localhost") { + LOG(INFO) << "job is " << job << " Skip the optimizer : MarkStartNodePass."; + return Status::OK(); + } + + std::unique_ptr* graph = options.graph; + + for (Node *n : graph->get()->nodes()) { + REQUIRES_NOT_NULL(n); + if (n->attrs().Find("_NoNeedOptimize")) { + LOG(INFO) << "Found mark of noneed optimize on node [" << n->name() << "], skip MarkStartNodePass."; + return Status::OK(); + } + + if (n->attrs().Find("_StartNodeName")) { + LOG(INFO) << "Found mark of startnode optimize on node [" << n->name() << "], skip MarkStartNodePass."; + return Status::OK(); + } + } + + int64 startTime = InferShapeUtil::GetCurrentTimestap(); + char *need_print = getenv("PRINT_MODEL"); + + if (need_print != nullptr && strcmp("1", need_print) == 0) { + GraphDef ori_graph_def; + graph->get()->ToGraphDef(&ori_graph_def); + string ori_model_path = "BeforeMarkStartNodeAttr_"; + string omodel_path = ori_model_path + std::to_string(graph_num) + ".pbtxt"; + Status status_out = WriteTextProto(Env::Default(), omodel_path, ori_graph_def); + } + + for (Node *start_node : graph->get()->nodes()) { + REQUIRES_NOT_NULL(start_node); + std::string src_device_name = start_node->assigned_device_name(); + if (src_device_name != "" && src_device_name.find("/job:ps") == std::string::npos) { + for (Node *n : start_node->out_nodes()) { + std::string device_name = n->assigned_device_name(); + if (device_name.find("/job:ps") != std::string::npos) { + std::string start_node_name = ""; + std::set start_nodes_name; + if (start_node->attrs().Find("_StartNodeName") != nullptr) { + start_nodes_name = string_split(start_node->attrs().Find("_StartNodeName")->s(), ";"); + } + start_nodes_name.insert(start_node->name()); + for (auto name : start_nodes_name) { + start_node_name += name; + start_node_name += ";"; + } + start_node->AddAttr("_StartNodeName", start_node_name); + + if (n->attrs().Find("_StartNodeName") != nullptr) { + std::set nodes_name = string_split(n->attrs().Find("_StartNodeName")->s(), ";"); + for (auto name : nodes_name) { + start_nodes_name.insert(name); + } + } + for (auto name : start_nodes_name) { + start_node_name += name; + start_node_name += ";"; + } + n->AddAttr("_StartNodeName", start_node_name); + Status s = TraverseNode(n); + if (s != Status::OK()) { + return s; + } + } + } + } + } + + if (need_print != nullptr && strcmp("1", need_print) == 0) { + GraphDef omg_graph_def; + graph->get()->ToGraphDef(&omg_graph_def); + string tmpmodel_path = "AfterMarkStartNodeAttr_"; + string tmodel_path = tmpmodel_path + std::to_string(graph_num) + ".pbtxt"; + Status status_o = WriteTextProto(Env::Default(), tmodel_path, omg_graph_def); + } + int64 endTime = InferShapeUtil::GetCurrentTimestap(); + LOG(INFO) << "MarkStartNodePass_" << std::to_string(graph_num) << " success. [" + << ((endTime - startTime) / kMicrosToMillis) << " ms]"; + + return Status::OK(); +} + +Status MarkStartNodePass::TraverseNode(Node *start_node) +{ + Status s = Status::OK(); + for (Node *n : start_node->out_nodes()) { + REQUIRES_NOT_NULL(n); + std::string start_node_name = ""; + std::set start_nodes_name; + start_nodes_name = string_split(start_node->attrs().Find("_StartNodeName")->s(), ";"); + + if (n->attrs().Find("_StartNodeName") != nullptr) { + std::set nodes_name = string_split(n->attrs().Find("_StartNodeName")->s(), ";"); + for (auto name : nodes_name) { + start_nodes_name.insert(name); + } + } + for (auto name : start_nodes_name) { + start_node_name += name; + start_node_name += ";"; + } + n->AddAttr("_StartNodeName", start_node_name); + s = TraverseNode(n); + if (s != Status::OK()) { + LOG(INFO) << "traverse node : " << start_node->name() << " to add start node name failed."; + return s; + } + } + return Status::OK(); +} + +REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, 0, + MarkStartNodePass); +} // namespace tensorflow diff --git a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc new file mode 100644 index 000000000..246ec4e63 --- /dev/null +++ b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc @@ -0,0 +1,2218 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tf_adapter/optimizers/om_partition_subgraphs_pass.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/common_runtime/shape_refiner.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/graph_def_util.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/graph_def_builder.h" +#include "tensorflow/core/graph/tensor_id.h" +#include "tensorflow/core/lib/gtl/flatset.h" +#include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/lib/hash/hash.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/public/version.h" +#include "tensorflow/core/util/device_name_utils.h" +#include "tensorflow/core/graph/control_flow.h" +#include "tensorflow/compiler/jit/graphcycles/graphcycles.h" +#include "tf_adapter/util/npu_ops_identifier.h" +#include "tf_adapter/util/infershape_util.h" +#include "tf_adapter/util/npu_attrs.h" +#include "tf_adapter/common/common.h" + +namespace tensorflow { +static const int64 kMicrosToMillis = 1000; + +namespace OMSplitter { +const char *const ARG_OP = "_Arg"; +const char *const RET_OP = "_Retval"; +const char *const PARTITION_SUB_GRAPH_ATTR = "_subgraphs"; +const std::string ATTR_NAME_FRAMEWORK_FUNC_DEF = "func_def"; +const std::string ATTR_NAME_SHARED_NAME = "shared_name"; +const std::string ATTR_VALUE_SHARED_NAME = "iterator_default"; +const std::string ATTR_VALUE_SCOPE_NAME = "_without_npu_compile"; +const int MAX_GROUP_SIZE = 100000; +const uint32_t MIN_CLUSTER_SIZE = 2; +std::atomic compile_mode(false); +std::set not_support_nodes; + +// Graph to FunctionDef conversion. +Status OMSubGraphToFunctionDef(const Graph &graph, const string &name, + FunctionDef *fdef) { + fdef->mutable_signature()->set_name(name); + std::unordered_map tensorRenaming; + std::unordered_map returnValues; + + for (Node const *node : graph.op_nodes()) { + REQUIRES_NOT_NULL(node); + if (node->type_string() == ARG_OP) { + int index; + DataType type; + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "T", &type)); + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "index", &index)); + while (fdef->signature().input_arg_size() <= index) { + fdef->mutable_signature()->add_input_arg(); + } + OpDef::ArgDef *argdef = + fdef->mutable_signature()->mutable_input_arg(index); + REQUIRES_NOT_NULL(argdef); + argdef->set_type(type); + argdef->set_name(node->name()); + tensorRenaming[strings::StrCat(node->name(), ":0")] = node->name(); + continue; + } + + if (node->type_string() == RET_OP) { + int index; + DataType type; + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "T", &type)); + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "index", &index)); + while (fdef->signature().output_arg_size() <= index) { + fdef->mutable_signature()->add_output_arg(); + } + OpDef::ArgDef *argdef = + fdef->mutable_signature()->mutable_output_arg(index); + REQUIRES_NOT_NULL(argdef); + argdef->set_type(type); + argdef->set_name(node->name()); + const Edge *edge = nullptr; + TF_CHECK_OK(node->input_edge(0, &edge)); + returnValues[node->name()] = + strings::StrCat(edge->src()->name(), ":", edge->src_output()); + continue; + } + + NodeDef *nodeDef = fdef->add_node_def(); + REQUIRES_NOT_NULL(nodeDef); + *nodeDef = node->def(); + if (!node->assigned_device_name().empty()) { + nodeDef->set_device(node->assigned_device_name()); + } + nodeDef->set_name(node->name()); + // Reset input names based on graph rather than the NodeDef. + nodeDef->clear_input(); + // Edges, indexed by dst_input. + std::vector inEdges; + std::vector ctrlEdges; + for (Edge const *edge : node->in_edges()) { + REQUIRES_NOT_NULL(edge); + REQUIRES_NOT_NULL(edge->src()); + if (edge->src()->IsSource()) { + continue; + } + + if (edge->IsControlEdge()) { + ctrlEdges.push_back(edge); + } else { + unsigned int dst_input = edge->dst_input() < 0 ? 0 : static_cast(edge->dst_input()); + if (inEdges.size() <= dst_input) { + try { + inEdges.resize(dst_input + 1); + } catch (...) { + return errors::InvalidArgument("inEdges resize is failed, resize is %u", dst_input + 1); + } + } + inEdges[dst_input] = edge; + } + } + + // Add regular inputs + for (std::vector::size_type i = 0; i < inEdges.size(); ++i) { + const Edge *edge = inEdges[i]; + REQUIRES_NOT_NULL(edge); + REQUIRES_NOT_NULL(edge->src()); + nodeDef->add_input( + strings::StrCat(edge->src()->name(), ":", edge->src_output())); + } + + // Add control inputs + for (const Edge *edge : ctrlEdges) { + REQUIRES_NOT_NULL(edge); + REQUIRES_NOT_NULL(edge->src()); + nodeDef->add_input(strings::StrCat("^", edge->src()->name())); + } + + // Populate tensorRenaming. + NameRangeMap outputRanges; + TF_RETURN_IF_ERROR( + NameRangesForNode(*node, node->op_def(), nullptr, &outputRanges)); + for (const auto &output : outputRanges) { + for (int i = output.second.first; i < output.second.second; ++i) { + const string tensorName = strings::StrCat( + nodeDef->name(), ":", output.first, ":", i - output.second.first); + tensorRenaming[strings::StrCat(node->name(), ":", i)] = tensorName; + } + } + } + + // Detect missing function inputs. + for (int i = 0; i < fdef->signature().input_arg_size(); ++i) { + const string &inputName = fdef->signature().input_arg(i).name(); + if (inputName.empty()) { + return errors::InvalidArgument("Missing input ", i, " to function ", name); + } + } + + // Remap input names. We do this as a second pass to allow the nodes to be in + // any order. + for (int nIndex = 0; nIndex < fdef->node_def_size(); ++nIndex) { + NodeDef *nodeDef = fdef->mutable_node_def(nIndex); + for (int i = 0; i < nodeDef->input_size(); ++i) { + if (str_util::StartsWith(nodeDef->input(i), "^")) { + // Control input + const string inputCtrlName = nodeDef->input(i).substr(1); + if (inputCtrlName.empty()) { + return errors::InvalidArgument( + "Could not remap control input ", i, ", '", nodeDef->input(i), + "', of node '", nodeDef->name(), "' in function ", name); + } + *nodeDef->mutable_input(i) = strings::StrCat("^", inputCtrlName); + } else { + const auto iter = tensorRenaming.find(nodeDef->input(i)); + if (iter == tensorRenaming.end()) { + return errors::InvalidArgument( + "Could not remap input ", i, ", '", nodeDef->input(i), + "', of node '", nodeDef->name(), "' in function ", name); + } + *nodeDef->mutable_input(i) = iter->second; + } + } + } + + // Remap return values. + for (int r = 0; r < fdef->signature().output_arg_size(); ++r) { + const string &retName = fdef->signature().output_arg(r).name(); + if (retName.empty()) { + return errors::InvalidArgument("Missing output ", r, " to function ", name); + } + const string &returnValue = returnValues[retName]; + const auto iter = tensorRenaming.find(returnValue); + if (iter == tensorRenaming.end()) { + return errors::InvalidArgument("Could not remap return value ", r, ", '", + retName, "', of '", returnValue, + "' in function ", name); + } + (*fdef->mutable_ret())[retName] = iter->second; + } + + return Status::OK(); +} + +struct NodeCompare { + bool operator()(const Node *a, const Node *b) const { + return a->id() < b->id(); + } +}; +using OrderedNodeSet = std::set; + +bool EndsWith(const std::string &str, const std::string &suffix) { + return str.size() >= suffix.size() && + str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; +} + +bool IsWhiteListSupport(const string &op_name, bool mix_compile_mode) { + static const std::string suffix_op = "Dataset"; + static const std::string suffix_op_v2 = "DatasetV2"; + + auto identifier = NpuOpsIdentifier::GetInstance(mix_compile_mode); + + bool ans = (identifier->IsNpuSupported(op_name)) && + !EndsWith(op_name, suffix_op) && + !EndsWith(op_name, suffix_op_v2) && + !(op_name == "Const") && + !(op_name == "_Arg") && + !(op_name == "_Retval") && + !(op_name == "StringJoin"); + + if (!ans) { + auto ret = not_support_nodes.insert(op_name); + if (ret.second) { + LOG(INFO) << "node: " << op_name << " is not in white list, " + << "so currently not support"; + } + } + + return ans; +} + +bool IsOptimizerOp(Node *node) { + static const std::unordered_set Optimizer_Names = + { + "GradientDescent", + "Momentum", + "Adam", + "Adadelta", + "Adagrad", + "AdagradDA", + "ProximalAdagrad", + "ProximalGradientDescent", + "RMSProp", + "Ftrl", + }; + return (Optimizer_Names.count(node->name()) > 0); +} + +Status SetIteratorShardName(Node *node) { + if (node->type_string() != "Iterator" && node->type_string() != "IteratorV2") { + return errors::InvalidArgument("Node op type is not Iterator."); + } + string shardName; + Status s = GetNodeAttr(node->attrs(), ATTR_NAME_SHARED_NAME, &shardName); + if (s.code() == error::Code::NOT_FOUND) { + node->AddAttr(ATTR_NAME_SHARED_NAME, node->name()); + return Status::OK(); + } else { + TF_RETURN_IF_ERROR(s); + } + node->ClearAttr(ATTR_NAME_SHARED_NAME); + node->AddAttr(ATTR_NAME_SHARED_NAME, node->name()); + LOG(INFO) << "shardName is " << shardName; + return Status::OK(); +} + +bool IsWithoutNpuScope(Node *node) { + if (!compile_mode) { + return false; + } + bool is_npu_compile = false; + Status status = GetNodeAttr(node->attrs(), ATTR_VALUE_SCOPE_NAME, &is_npu_compile); + if (status.ok() && is_npu_compile) { + return true; + } + return false; +} + +bool IsWithoutNpuScope(NodeDef &node_def) { + if (!compile_mode) { + return false; + } + if (node_def.attr().count(ATTR_VALUE_SCOPE_NAME)) { + return node_def.attr().at(ATTR_VALUE_SCOPE_NAME).b(); + } + return false; +} + +// Make sure we don't recurse infinitely on recursive functions. +const int kMaxRecursionDepth = 10; + +bool IsNpuSupportingFunc(string func_name, FunctionLibraryDefinition *func_lib, int depth) { + LOG(INFO) << "function name is " << func_name << ", depth is " << depth; + if (func_lib == nullptr) { + LOG(ERROR) << "func lib is nullptr, function name is " << func_name; + return false; + } + if (depth >= kMaxRecursionDepth) { + LOG(ERROR) << "Rejecting " << func_name << ": function depth limit exceeded."; + return false; + } + const FunctionDef *func_def = func_lib->Find(func_name); + if (func_def == nullptr) { + LOG(ERROR) << "func def is nullptr, function name is " << func_name; + return false; + } + for (NodeDef node_def : func_def->node_def()) { + if (node_def.op() == "Const") { + LOG(INFO) << "Const in func can dump"; + } else if (!IsWhiteListSupport(node_def.op(), compile_mode) || IsWithoutNpuScope(node_def)) { + return false; + } + for (auto item : node_def.attr()) { + if (item.second.has_func()) { + if (!IsNpuSupportingFunc(item.second.func().name(), func_lib, depth + 1)) { + return false; + } + } + } + } + return true; +} + +bool IsNpuSupportingFunc(Node *node, FunctionLibraryDefinition *func_lib, + int depth) { + for (auto it = node->attrs().begin(); it != node->attrs().end(); ++it) { + if (it->second.has_func()) { + string func_name = it->second.func().name(); + if (!IsNpuSupportingFunc(func_name, func_lib, depth)) { + return false; + } + } + } + return true; +} + +Status FindNpuSupportCandidates(const Graph &graph, OrderedNodeSet *candidates, + FunctionLibraryDefinition *func_lib, + bool enableDP, bool mix_compile_mode) { + int64 startTime = InferShapeUtil::GetCurrentTimestap(); + compile_mode = mix_compile_mode; + std::vector sortedNodes; + bool hasIteratorOp = false; + bool hasMakeIteratorOp = false; + bool hasOutfeedDequeueOp = false; + bool hasStopOutfeedDequeueOp = false; + for (Node *node : graph.op_nodes()) { + sortedNodes.push_back(node); + if (node->type_string().find("MakeIterator") != string::npos) { + hasMakeIteratorOp = true; + } else if (node->type_string() == "Iterator" || node->type_string() == "IteratorV2") { + TF_RETURN_IF_ERROR(SetIteratorShardName(node)); + hasIteratorOp = true; + } else if (node->type_string() == "OutfeedDequeueOp") { + hasOutfeedDequeueOp = true; + } else if (node->type_string() == "StopOutfeedDequeueOp") { + hasStopOutfeedDequeueOp = true; + } + } + + if (hasStopOutfeedDequeueOp || hasOutfeedDequeueOp) { + candidates->clear(); + LOG(INFO) << "hostcall subgraph will run on host."; + return Status::OK(); + } + + std::sort(sortedNodes.begin(), sortedNodes.end(), NodeCompare()); + LOG(INFO) << "FindNpuSupportCandidates enableDP:" << enableDP + << ", mix_compile_mode: " << compile_mode + << ", hasMakeIteratorOp:" << hasMakeIteratorOp + << ", hasIteratorOp:" << hasIteratorOp; + + if (hasMakeIteratorOp && hasIteratorOp) { + candidates->clear(); + LOG(INFO) << "preprocessing subgraph will at dp_tf_ge_conversion_pass."; + return Status::OK(); + } + + OrderedNodeSet outSet; + for (Node *node : sortedNodes) { + // 0 is function depth + if (!IsNpuSupportingFunc(node, func_lib, 0)) { + continue; + } + if (!node->IsOp()) { // Ship Sink/Source nodes. + continue; + } + if (enableDP && + (node->type_string() == "Iterator" || + node->type_string() == "IteratorV2" || + node->type_string() == "IteratorGetNext")) { + if (node->type_string() == "IteratorGetNext") { + for (Node *n : node->in_nodes()) { + REQUIRES_NOT_NULL(n); + LOG(INFO) << node->name() << " has in nodes " << n->name(); + if (n->type_string() == "Iterator" || n->type_string() == "IteratorV2") { + candidates->insert(node); + } + } + } + if (node->type_string() == "Iterator" || node->type_string() == "IteratorV2") { + for (Node *n : node->out_nodes()) { + REQUIRES_NOT_NULL(n); + LOG(INFO) << node->name() << " has in nodes " << n->name(); + if (n->type_string() == "IteratorGetNext") { + candidates->insert(node); + } + } + } + } else { + // Const down when it need down + if (node->type_string() == "Const") { + int ctrlEdgeNum = 0; + for (auto edge : node->in_edges()) { + REQUIRES_NOT_NULL(edge); + REQUIRES_NOT_NULL(edge->src()); + if (edge->IsControlEdge() && edge->src()->name() != "_SOURCE" && + IsWhiteListSupport(edge->src()->type_string(), mix_compile_mode) && + !IsWithoutNpuScope(edge->src())) { + candidates->insert(node); + ctrlEdgeNum++; + break; + } + } + if (ctrlEdgeNum >= 1) { continue; } + } + // normal needed down op + if (IsWhiteListSupport(node->type_string(), mix_compile_mode) && + !IsWithoutNpuScope(node)) { + candidates->insert(node); + } else { + outSet.insert(node); + } + } + } + if (mix_compile_mode) { + std::vector cfInfos; + Status status = BuildControlFlowInfo(&graph, &cfInfos); + if (!status.ok()) return status; + std::set unsupportedFrames; + for (auto it = outSet.begin(); it != outSet.end(); ++it) { + auto cfInfo = cfInfos[(*it)->id()]; + if (!cfInfo.frame_name.empty()) { + unsupportedFrames.insert(cfInfo.frame_name); + } + while (!cfInfos[cfInfo.parent_frame->id()].frame_name.empty()) { + unsupportedFrames.insert(cfInfos[cfInfo.parent_frame->id()].frame_name); + cfInfo = cfInfos[cfInfo.parent_frame->id()]; + } + } + for (auto it = candidates->begin(); it != candidates->end();) { + auto cfInfo = cfInfos[(*it)->id()]; + if (unsupportedFrames.find(cfInfo.frame_name) != unsupportedFrames.end()) { + outSet.insert(*it); + it = candidates->erase(it); + } else { + ++it; + } + } + } + + // Reference edge: The reference input/output of the sinking node does not sink + while (!outSet.empty()) { + auto iter = outSet.begin(); + auto node = *iter; + if (mix_compile_mode && (node->type_string() == "Where")) { + bool isInitializedGraph = InferShapeUtil::IsInitializedGraph(node); + if (isInitializedGraph) { + candidates->insert(node); + } + } + + outSet.erase(iter); + for (auto edge : node->out_edges()) { + REQUIRES_NOT_NULL(edge); + REQUIRES_NOT_NULL(edge->dst()); + if (!edge->IsControlEdge()) { + DataType dtypeDst = edge->dst()->input_type(edge->dst_input()); + if (IsRefType(dtypeDst) + && candidates->count(edge->dst()) > 0) { + candidates->erase(edge->dst()); + outSet.insert(edge->dst()); + LOG(INFO) << "Remove node : " << edge->dst()->name() << + " from candidates, because of node : " << + node->name() << " REF input."; + continue; + } + if (dtypeDst == DT_STRING || dtypeDst == DT_RESOURCE) { + if (edge->dst()->type_string() == "Assert") { + continue; + } + if (node->type_string() == "Const") { + continue; + } + if (candidates->erase(edge->dst()) > 0) { + outSet.insert(edge->dst()); + } + } + } + } + for (auto edge : node->in_edges()) { + REQUIRES_NOT_NULL(edge); + REQUIRES_NOT_NULL(edge->src()); + REQUIRES_NOT_NULL(edge->dst()); + if (!edge->IsControlEdge()) { + DataType dtypeDst = edge->dst()->input_type(edge->dst_input()); + if (IsRefType(dtypeDst) + && candidates->count(edge->src()) > 0) { + candidates->erase(edge->src()); + outSet.insert(edge->src()); + LOG(INFO) << "Remove node : " << edge->dst()->name() << + " from candidates, because of node : " << + node->name() << " REF Output."; + continue; + } + if (dtypeDst == DT_STRING || dtypeDst == DT_RESOURCE) { + if (candidates->erase(edge->src()) > 0) { + outSet.insert(edge->src()); + } + } + } + } + } + int64 endTime = InferShapeUtil::GetCurrentTimestap(); + LOG(INFO) << "TFadapter find Npu support candidates cost: [" + << ((endTime - startTime) / kMicrosToMillis) << " ms]"; + return Status::OK(); +} + +bool NodeIsCandidateForClustering(Node *node, OrderedNodeSet *candidates) { + return candidates->count(node) > 0; +} + +Status AddRelationalConst(const Graph &graph, OrderedNodeSet *candidates) { + for (Node *node : graph.op_nodes()) { + if (node->type_string() == "Const") { + for (auto edge : node->out_edges()) { + REQUIRES_NOT_NULL(edge); + REQUIRES_NOT_NULL(edge->dst()); + if (NodeIsCandidateForClustering(edge->dst(), candidates)) { + candidates->insert(node); + break; + } + } + } + } + return Status::OK(); +} + +bool GetNodeFuncs(const FunctionLibraryDefinition *flib_def, + Node *node, std::vector &nodeFuncs) { + nodeFuncs.clear(); + for (auto iter = node->attrs().begin(); iter != node->attrs().end(); ++iter) { + if (iter->second.has_func()) { + nodeFuncs.push_back(iter->second.func().name()); + std::vector funcNameStack; + funcNameStack.clear(); + funcNameStack.push_back(iter->second.func().name()); + while (!funcNameStack.empty()) { + string funcName = funcNameStack.back(); + funcNameStack.pop_back(); + const FunctionDef *fdef = flib_def->Find(funcName); + if (fdef != nullptr) { + for (NodeDef ndef : fdef->node_def()) { + for (auto item : ndef.attr()) { + if (item.second.has_func()) { + nodeFuncs.push_back(item.second.func().name()); + funcNameStack.push_back(item.second.func().name()); + continue; + } + } + } + } + } + continue; + } + } + + return !nodeFuncs.empty(); +} + +struct Cluster { + int index; + std::set nodes; + std::set start_nodes_name; +}; + +// Merges src and dst clusters of the edge +void MergeClusters(Edge *edge, + std::map>& cluster_map) { + Node *src = edge->src(); + Node *dst = edge->dst(); + + // Merge dst cluster into src cluster + auto cluster_dst = cluster_map[dst]; + for (auto start_node_name : cluster_dst->start_nodes_name) { + cluster_map[src]->start_nodes_name.insert(start_node_name); + } + for (auto node : cluster_dst->nodes) { + cluster_map[src]->nodes.insert(node); + cluster_map[node] = cluster_map[src]; + } +} + +Status MergeSubgraphsInNewWay(std::vector> &sortedCluster, + OrderedNodeSet &npuSupportCandidates, + std::map> &clusterToMerge) { + int64 startTime = InferShapeUtil::GetCurrentTimestap(); + if (sortedCluster.size() < MIN_CLUSTER_SIZE) { + return Status::OK(); + } + // record already merged cluster + std::set mergedClusters; + // record every cluster merge to which cluster : first now, second dst + std::map mergePair; + uint32_t clusterindex = 0; + while (mergedClusters.size() < sortedCluster.size()) { + string dstSubgraph = sortedCluster[clusterindex].first; + if (mergedClusters.count(dstSubgraph) < 1) { + mergedClusters.insert(dstSubgraph); + for (const string &toMerge : clusterToMerge[dstSubgraph]) { + if (clusterToMerge[toMerge].count(dstSubgraph) > 0) { + mergedClusters.insert(toMerge); + mergePair[toMerge] = dstSubgraph; + } + } + } + clusterindex++; + if (clusterindex >= sortedCluster.size()) { + break; + } + } + + for (Node *n : npuSupportCandidates) { + string name; + Status s = GetNodeAttr(n->attrs(), PARTITION_SUB_GRAPH_ATTR, &name); + if (s.code() == error::Code::NOT_FOUND) { + continue; + } else { + TF_RETURN_IF_ERROR(s); + } + if (mergePair.count(name) > 0) { + n->ClearAttr(PARTITION_SUB_GRAPH_ATTR); + n->AddAttr(PARTITION_SUB_GRAPH_ATTR, mergePair[name]); + } + } + int64 endTime = InferShapeUtil::GetCurrentTimestap(); + LOG(INFO) << "TFadapter merge clusters cost: [" + << ((endTime - startTime) / kMicrosToMillis) << " ms]"; + return Status::OK(); +} + +Status MergeSubgraphs(std::vector> &sortedCluster, + OrderedNodeSet &npuSupportCandidates, + std::map> &clusterToMerge) { + int64 startTime = InferShapeUtil::GetCurrentTimestap(); + std::set mergedClusters; + if (sortedCluster.size() <= 1) { + return Status::OK(); + } + string dstSubgraph = sortedCluster[0].first; + mergedClusters.insert(dstSubgraph); + for (uint32_t i = 1; i < sortedCluster.size(); i++) { + bool canMerge = false; + string name = sortedCluster[i].first; + if (clusterToMerge[dstSubgraph].count(name) > 0 && + clusterToMerge[name].count(dstSubgraph) > 0) { + canMerge = true; + for (const string &mergedName : mergedClusters) { + // Mutual exclusion between merged groups + if (clusterToMerge[mergedName].count(name) == 0 || + clusterToMerge[name].count(mergedName) == 0) { + canMerge = false; + break; + } + } + } + if (canMerge && mergedClusters.count(name) == 0) { + mergedClusters.insert(name); + } + } + + for (Node *n : npuSupportCandidates) { + string name; + Status s = GetNodeAttr(n->attrs(), PARTITION_SUB_GRAPH_ATTR, &name); + if (s.code() == error::Code::NOT_FOUND) { + continue; + } else { + TF_RETURN_IF_ERROR(s); + } + if (name == dstSubgraph) { + continue; + } + if (mergedClusters.count(name) != 0) { + n->ClearAttr(PARTITION_SUB_GRAPH_ATTR); + n->AddAttr(PARTITION_SUB_GRAPH_ATTR, dstSubgraph); + } else if (name != dstSubgraph) { + n->ClearAttr(PARTITION_SUB_GRAPH_ATTR); + } + } + int64 endTime = InferShapeUtil::GetCurrentTimestap(); + LOG(INFO) << "TFadapter merge clusters cost: [" + << ((endTime - startTime) / kMicrosToMillis) << " ms]"; + return Status::OK(); +} + +std::vector string_split(const string &str, const string &pattern) { + std::vector resultVec; + string::size_type pos1, pos2; + pos2 = str.find(pattern); + pos1 = 0; + while (pos2 != string::npos) { + resultVec.push_back(str.substr(pos1, pos2 - pos1)); + pos1 = pos2 + pattern.size(); + pos2 = str.find(pattern, pos1); + } + if (pos1 != str.length()) { + resultVec.push_back(str.substr(pos1)); + } + return resultVec; +} + +Status MarkForPartition(std::unique_ptr* graphIn, + int &clusterNum, bool mix_compile_mode, + int graph_num, FunctionLibraryDefinition *func_lib, + std::map pass_options) { + Graph *graph = graphIn->get(); + bool enableDP = pass_options["enable_dp"] == "1"; + OrderedNodeSet npuSupportCandidates; + TF_RETURN_IF_ERROR(FindNpuSupportCandidates(*graph, &npuSupportCandidates, + func_lib, enableDP, mix_compile_mode)); + TF_RETURN_IF_ERROR(AddRelationalConst(*graph, &npuSupportCandidates)); + + std::map> cluster_map; + tensorflow::GraphCycles cycles; + std::string job = pass_options["job"]; + + // Initial Step: Each node is a cluster of its own + for (auto node : graph->nodes()) { + int new_index = cycles.NewNode(); + try { + cluster_map[node] = std::make_shared(); + } catch (...) { + return errors::Internal("make shared failed"); + } + cluster_map[node]->index = new_index; + cluster_map[node]->nodes.insert(node); + + if (node->attrs().Find("_StartNodeName")) { + std::vector startNodeVec = string_split(node->attrs().Find("_StartNodeName")->s(), ";"); + for (auto startNodeName : startNodeVec) { + cluster_map[node]->start_nodes_name.insert(startNodeName); + } + } + } + // Check for existing cyclicity in the graph + for (auto edge : graph->edges()) { + REQUIRES_NOT_NULL(edge); + Node *src = edge->src(); + Node *dst = edge->dst(); + REQUIRES_NOT_NULL(src); + REQUIRES_NOT_NULL(dst); + // Skip source/sink + if (!src->IsOp() || !dst->IsOp()) { + continue; + } + // Skip NextIteration + if (src->IsNextIteration()) { + continue; + } + if (!cycles.InsertEdge(cluster_map[src]->index, cluster_map[dst]->index)) { + LOG(ERROR) << "Failing due to cycle"; + return errors::Unimplemented( + "Input graph has a cycle (inserting an edge from ", + src->DebugString(), " to ", dst->DebugString(), + " would create a cycle)"); + } + } + + bool changed = false; + do { + changed = false; + for (auto edge : graph->edges()) { + REQUIRES_NOT_NULL(edge); + Node *src = edge->src(); + Node *dst = edge->dst(); + REQUIRES_NOT_NULL(src); + REQUIRES_NOT_NULL(dst); + if (!src->IsOp() || !dst->IsOp()) { + continue; + } + + int src_index = cluster_map[src]->index; + int dst_index = cluster_map[dst]->index; + + if (!NodeIsCandidateForClustering(src, &npuSupportCandidates) || + !NodeIsCandidateForClustering(dst, &npuSupportCandidates)) { + continue; + } + + // Check if contracting the edge will lead to cycles + // if not, MergeClusters + if (cycles.HasEdge(src_index, dst_index) && + cycles.ContractEdge(src_index, dst_index)) { + if (job != "localhost") { + bool find_same_start = false; + auto cluster_src = cluster_map[src]; + auto cluster_dst = cluster_map[dst]; + for (auto src_start_name : cluster_src->start_nodes_name) { + for (auto dst_start_name : cluster_dst->start_nodes_name) { + if (src_start_name == dst_start_name) { + find_same_start = true; + LOG(INFO) << "node : " << src->name() << " and node : "<< + dst->name() << " has same start node : " << src_start_name; + break; + } + } + if (find_same_start) { + break; + } + } + if (find_same_start) { + continue; + } + } + MergeClusters(edge, cluster_map); + changed = true; + } + } + } while (changed); + + int64 clusterSequenceNum = 0; + std::map> clusterInfo; + std::map> clusterToMerge; + std::map> clusterIndexToMerge; + std::set seen; + std::set clusterSet; + + for (auto item : cluster_map) { + auto cluster = item.second.get(); + if (seen.count(cluster) != 0) { + continue; + } + + bool hasSupportNode = false; + bool hasNonSupportNode = false; + + for (auto node : cluster->nodes) { + if (NodeIsCandidateForClustering(node, &npuSupportCandidates)) { + hasSupportNode = true; + } else { + hasNonSupportNode = true; + } + } + + if (hasSupportNode && hasNonSupportNode) { + LOG(INFO) << "Cluster " << cluster->index + << " has both Candidate and non-Candidate nodes"; + return errors::Internal("Cluster ", cluster->index, + " has both Candidate and non-Candidate nodes"); + } + + if (!hasSupportNode) { + seen.insert(cluster); + continue; + } + + clusterSet.insert(cluster->index); + string op_prefix = "GeOp"; + + string name = strings::StrCat(string(op_prefix), std::to_string(graph_num), + string("_"), std::to_string(clusterSequenceNum++)); + clusterInfo[cluster->index] = std::make_pair(name, cluster->nodes.size()); + for (auto node : cluster->nodes) { + if (!NodeIsCandidateForClustering(node, &npuSupportCandidates)) { + // attr PARTITION_SUB_GRAPH_ATTR delete later + clusterInfo.erase(cluster->index); + return errors::Internal("Node ", node->DebugString(), + " was not marked for clustering but was " + "placed in a cluster."); + } + node->AddAttr(PARTITION_SUB_GRAPH_ATTR, name); + } + seen.insert(cluster); + } + // Generate Merge possibility between clusters + if (clusterSet.size() > 1) { + for (int src : clusterSet) { + for (int dst : clusterSet) { + if (src == dst) { continue; } + if (!cycles.IsReachableNonConst(src, dst) && !cycles.IsReachableNonConst(dst, src)) { + if (mix_compile_mode) { + bool canReach = false; + for (auto cluster : clusterIndexToMerge[src]) { + if (cycles.IsReachableNonConst(dst, cluster) || cycles.IsReachableNonConst(cluster, dst)) { + canReach = true; + break; + } + } + + if (!canReach && job != "localhost") { + Cluster *cluster_src = nullptr; + Cluster *cluster_dst = nullptr; + for (auto cluster_item : cluster_map) { + if (cluster_src == nullptr && src == cluster_item.second.get()->index) { + cluster_src = cluster_item.second.get(); + } + if (cluster_dst == nullptr && dst == cluster_item.second.get()->index) { + cluster_dst = cluster_item.second.get(); + } + if (cluster_src != nullptr && cluster_dst != nullptr) { + break; + } + } + bool find_same_start = false; + for (auto src_start_name : cluster_src->start_nodes_name) { + for (auto dst_start_name : cluster_dst->start_nodes_name) { + if (src_start_name == dst_start_name) { + find_same_start = true; + break; + } + } + if (find_same_start) { + break; + } + } + if (find_same_start) { + canReach = true; + } + } + if (!canReach) { + clusterToMerge[clusterInfo[src].first].insert(clusterInfo[dst].first); + clusterIndexToMerge[src].insert(dst); + } + } else { + clusterToMerge[clusterInfo[src].first].insert(clusterInfo[dst].first); + } + } + } + } + } + + struct ClusterCompare { + bool operator()(std::pair a, std::pair b) const { + return a.second > b.second; + } + }; + + std::vector> sortedCluster; + for (auto cluster : clusterInfo) { + sortedCluster.push_back(cluster.second); + } + std::sort(sortedCluster.begin(), sortedCluster.end(), ClusterCompare()); + clusterNum = clusterSequenceNum; + if (static_cast(sortedCluster.size()) != clusterNum) { + return errors::Internal("Sorted cluster size should be equal to origin subgraph num. ", + "Sorted cluster size is ", sortedCluster.size(), + ", origin subgraph num is ", clusterNum); + } + LOG(INFO) << "cluster Num is " << clusterNum; + if (clusterNum == 0) { + return Status::OK(); + } + + int minGroupSizeTemp = 1; + int minGroupSize = (((minGroupSizeTemp > 0) && (minGroupSizeTemp < MAX_GROUP_SIZE)) + ? (minGroupSizeTemp) : (1)); // default threshold is 10. + LOG(INFO) << "All nodes in graph: " << graph->num_nodes() << ", max nodes count: " + << sortedCluster[0].second << " in subgraph: " << sortedCluster[0].first + << " minGroupSize: " << minGroupSize; + + bool isDateSetCluster = false; + bool isBroadcastGraph = false; + for (Node *n : npuSupportCandidates) { + if (n->type_string().find("MakeIterator") != string::npos) { + isDateSetCluster = true; + break; + } + if (n->type_string().find("HcomBroadcast") != string::npos) { + isBroadcastGraph = true; + break; + } + } + if (sortedCluster[0].second >= minGroupSize || isDateSetCluster || isBroadcastGraph) { + if (sortedCluster[0].second == 1) { + for (Node *n : npuSupportCandidates) { + if (n->type_string() == "NoOp" || n->type_string() == "Identity") { + string name; + Status s = GetNodeAttr(n->attrs(), PARTITION_SUB_GRAPH_ATTR, &name); + if (s.code() == error::Code::NOT_FOUND) { + continue; + } else { + TF_RETURN_IF_ERROR(s); + } + n->ClearAttr(PARTITION_SUB_GRAPH_ATTR); + LOG(INFO) << "Clear isolated NoOp from " << name; + clusterNum -= 1; + } + } + } + if (clusterNum > 1) { + if (mix_compile_mode) { + TF_RETURN_IF_ERROR(MergeSubgraphsInNewWay(sortedCluster, npuSupportCandidates, clusterToMerge)); + } else { + TF_RETURN_IF_ERROR(MergeSubgraphs(sortedCluster, npuSupportCandidates, clusterToMerge)); + clusterNum = 1; + } + } + } else { + LOG(INFO) << "Clear all node PARTITION_SUB_GRAPH_ATTR attr."; + for (Node *n : npuSupportCandidates) { + n->ClearAttr(PARTITION_SUB_GRAPH_ATTR); + } + clusterNum = 0; + } + + return Status::OK(); +} + +// A node/slot pair. +struct NodeSlot { + NodeSlot() : node(nullptr), slot(-1), dtype(DT_INVALID) {} + NodeSlot(const Node *node, int slot) + : node(node), slot(slot), dtype(DT_INVALID) {} + NodeSlot(const Node *node, int slot, DataType dtype) + : node(node), slot(slot), dtype(dtype) {} + + const Node *node; + int slot; + + // Optional: used to record the destination type of a source NodeSlot in case + // the source output is a Ref type that is cast to a Tensor at the + // destination. + DataType dtype; + + bool operator==(const NodeSlot &other) const { + return node == other.node && slot == other.slot && dtype == other.dtype; + } + + // Leave dtype out of the hash since there are never two NodeSlots with the + // same node and slot and different dtypes. + struct Hasher { + uint64 operator()(NodeSlot const &s) const { + return Hash64Combine(std::hash()(s.node), + std::hash()(s.slot)); + } + }; + + struct PairHasher { + uint64 operator()(std::pair const &s) const { + return Hash64Combine(Hasher()(s.first), Hasher()(s.second)); + } + }; +}; + +Node *AddIdentityNode(Graph *graph, const Edge *edge, string srcName, int srcIndex, + string device, Status *status) { + // edge is not nullptr + if (edge->src() == nullptr) { + return nullptr; + } + NodeDef identityDef; + NodeDefBuilder builder( + strings::StrCat(edge->src()->name(), "_dummyIdentity"), "Identity"); + DataType dtype = BaseType(edge->src()->output_type(edge->src_output())); + builder.Attr("T", dtype); + builder.Input(srcName, srcIndex, dtype); + builder.Device(device); + Status s = builder.Finalize(&identityDef); + if (!s.ok()) { + status->Update(s); + return nullptr; + } + Node *identityNode = graph->AddNode(identityDef, &s); + if (!s.ok() || identityNode == nullptr) { + status->Update(s); + return nullptr; + } + identityNode->set_assigned_device_name(device); + + return identityNode; +} + +class OMSplitter { + public: + OMSplitter(string groupAttribute, Graph const *graphIn, std::map npu_optimizer_options, + std::map pass_options) + : groupAttribute_(std::move(groupAttribute)), + graphIn_(graphIn), npu_optimizer_options_(npu_optimizer_options), pass_options_(pass_options){} + + ~OMSplitter() {} + // Find subgraphs marked with 'groupAttribute', and build a new + // subgraph, one for each value of 'groupAttribute'. + // 'subgraphNum' indicate how many subgraphs has been built. + Status SplitIntoSubgraphs(uint32_t &subgraphNum); + + // Build a FunctionDef for each subgraph, and add it 'library'. The values of + // the 'groupAttribute' annotations become the function names. + Status BuildFunctionDefs(FunctionLibraryDefinition *library, + string graph_format); + + // Write a copy of the input graph to 'graphOut', where the subgraphs are + // replaced with GEOPs to the new functions. + Status BuildOutputGraph(Graph *graphOut); + + private: + // A subgraph of the input, all marked with a common 'groupAttribute' + // value. + // + // In the following simple example, A, B, ..., E are nodes in the original + // graph. The group attributes g are + // each shown as either 0 or empty. + // + // A --> B --> C --> D --> E + // g: g:0 g:0 g:0 g: + // + // The example is rewritten to one graph; + // + // A --> GEOp_0 --> E + // + // The GEOp is as follows. + // + // Arg --> B --> C --> D --> Retval + // + class Subgraph { + public: + // Creates a graph to build the subgraph in, if it doesn't already exist, + // using the same op registry and versions as graphIn. + Subgraph() + : GEOpNodeInputs_(nullptr), GEOpNodeOutputs_(nullptr){} + + ~Subgraph() {} + + Node *MakeNodeImage(const Graph *graphIn, Node *node); + + // Returns the graph the subgraph is being built in. + Graph *GetGraph() const; + + // Builds a FunctionDef, and adds it to 'library'. The value of the + // 'groupAttribute' annotations becomes the function name. + Status BuildFunctionDef(const string &nameIn, + FunctionLibraryDefinition *library, + const string graph_format); + + // Adds the GEOp node to graphOut. + Status AddGEOpNode( + const std::unordered_map &nodeImages, Graph *graphOut); + + // Returns the Node that inputs to the GEOp should be wired up to. + Node *GetGEOpNodeForInputs() const; + + // Returns the Node that outputs to the GEOp should be wired up to. + Node *GetGEOpNodeForOutputs() const; + + // Returns the index of the arg that the dst of edge should connect to. + int GetArgIndexForEdge(const Edge *edge) const; + + // Returns the index of the result that the src of edge should connect to. + int GetResultIndexForEdge(const Edge *edge) const; + + // Creates an _Arg node for the src node of edge, and add its index to + // argsBySrc_, if none exists yet. Also adds its index to argsByDst_, + // and adds the edge within the subgraph from the _Arg node to the image of + // the dst node. + Status RecordArg(const Edge *edge, + const std::unordered_map &nodeImages, + std::vector> *srcArgPairs); + + // Creates a _Retval node for the src node of edge, and add it to results_, + // if none exists yet. If a new _Retval node is created, also adds the edge + // within the subgraph from the src to the _Retval node. + Status RecordResult( + const Edge *edge, + const std::unordered_map &nodeImages); + + // Indicates if the subgraph does not have any input or output + bool isIsolatedSubgraph(); + + Status SetOptions(std::map npu_optimizer_options, + std::map pass_options); + + private: + // The subgraph extracted from the input graph, suitable for being turned + // into a FunctionDef. Inputs are fed by _Arg nodes, and outputs are + // returned by _Retval nodes. + std::unique_ptr graph_; + + // Which device are these nodes on + string device_; + + // NodeDef for the GEOp node. + NodeDef GEOpNodeDef_; + + // Name that is used for the GEOp node. + string functionDefName_; + + // GEOp node(s) in the output graph. Not owned. + // both point to the function call node. + Node *GEOpNodeInputs_; + Node *GEOpNodeOutputs_; + + // Maps from source (producer node/slot) and destination + // (consumer node/slot) tensors in the input graph to _Arg numbers in + // the subgraph. + std::unordered_map argsBySrc_; + std::unordered_map argsByDst_; + + // The _Arg nodes in the subgraph, in order by argument number. + std::vector args_; + + // Map from source tensor in the input graph to result #. + std::unordered_map results_; + + DataTypeVector argDatetypes_; + DataTypeVector resultDatetypes_; + + std::map npu_optimizer_options_; + std::map pass_options_; + }; + + // Returns the key attribute associated with a node in attr, Sets + // either result to the empty string if the respective attribute is not + // found. + Status GetSubgraphIdAttr(Node const *node, string *attr) const; + + // Copies edges local to a subgraph. Adds _Arg and _Retval nodes to + // subgraphs for data edges that cross subgraph boundaries. + Status CopySubgraphEdges( + const std::unordered_map &nodeImages, + std::vector> *srcArgPairs); + + // Copies all marked nodes to a subgraph. Does nothing for unmarked nodes + Status CopySubgraphNodes(std::unordered_map *nodeImages); + + // Copies all nodes that aren't in a subgraph to the output graph. + Status CopyNodesToOutputGraph(Graph *graphOut, + std::unordered_map *nodeImages); + + // Adds GEOp nodes for each subgraph. + Status AddGEOpNodes( + const std::unordered_map &nodeImages, + Graph *graphOut); + + // Finds the image of an edge source in the output graph. If the edge crosses + // a subgraph boundary it is the output of a GEOp node, otherwise it is a node + // in the output graph. + Status FindOutputImageOfEdgeSrc( + const string &srcSubgraphId, const string &dstSubgraphId, + const std::unordered_map &nodeImages, + const Node *originalSrcNode, Node **srcImage); + + // Finds an edge source slot in the output graph. If the edge crosses a + // subgraph boundary it is a slot on the output of a GEOp node , otherwise + // it is a slot on a node in the output graph. + int FindOutputSlotOfEdgeSrc(const string &srcSubgraphId, + const string &dstSubgraphId, + const Edge *edge); + + // Finds the image of an edge destination in the output graph. If the edge + // crosses a subgraph boundary it is the input of a GEOp node , otherwise + // it is a node in the output graph. + Status FindOutputImageOfEdgeDst( + const string &srcSubgraphId, const string &dstSubgraphId, + const std::unordered_map &nodeImages, + const Node *originalDstNode, Node **dstImage); + + // Finds an edge destination slot in the output graph. If the edge crosses a + // subgraph boundary it is a slot on the input of a GEOp node, otherwise + // it is a slot on a node in the output graph. + int FindOutputSlotOfEdgeDst(const string &srcSubgraphId, const string &dstSubgraphId, + const Edge *edge); + + // Copies a single edge to the output graph. The edge is either entirely + // within the output graph, or crosses into or out of a subgraph. + Status CopyEdgeToOutputGraph( + const Edge *edge, const string &srcSubgraphId, const string &dstSubgraphId, + const std::unordered_map &nodeImages, + Graph *graphOut, + std::unordered_set, NodeSlot::PairHasher> * + edges_added); + + // Adds all edges to the output graph. + Status AddEdgesToOutputGraph( + const std::unordered_map &nodeImages, + Graph *graphOut); + + const string groupAttribute_; + const Graph *graphIn_; + std::vector refIn_; + uint64_t ID_NUM = 3; + + std::unordered_map subgraphs_; + std::map npu_optimizer_options_; + std::map pass_options_; + + TF_DISALLOW_COPY_AND_ASSIGN(OMSplitter); +}; + +Node *OMSplitter::Subgraph::GetGEOpNodeForInputs() const { + return GEOpNodeInputs_; +} + +Node *OMSplitter::Subgraph::GetGEOpNodeForOutputs() const { + return GEOpNodeOutputs_; +} + +int OMSplitter::Subgraph::GetArgIndexForEdge(const Edge *edge) const { + return argsByDst_.at(NodeSlot(edge->dst(), edge->dst_input())); +} + +int OMSplitter::Subgraph::GetResultIndexForEdge(const Edge *edge) const { + return results_.at(NodeSlot(edge->src(), edge->src_output())); +} + +Node *OMSplitter::Subgraph::MakeNodeImage(const Graph *graphIn, Node *node) { + if (graph_ == nullptr) { + graph_.reset(new (std::nothrow) Graph(graphIn->op_registry())); + if (graph_ == nullptr) { + LOG(ERROR) << "graph new failed"; + return nullptr; + } + graph_->set_versions(graphIn->versions()); + } + + std::string job = pass_options_["job"]; + int task_index = std::atoi(pass_options_["task_index"].c_str()); + + if (device_.empty()) { + if (job != "localhost" && job != "ps" && job != "defalut") { + string device_name = std::string("/job:") + std::string(job) + std::string("/replica:0/task:") + \ + std::to_string(task_index) + std::string("/device:CPU:0"); + device_ = device_name; + } else if (job == "localhost"){ + device_ = string("/job:localhost/replica:0/task:0/device:CPU:0"); + } else { + LOG(ERROR) << "job type is : " << job << " not support. "; + return nullptr; + } + } + Node *nodeOut = graph_->CopyNode(node); + if (nodeOut == nullptr) { + LOG(ERROR) << "copy node failed"; + return nullptr; + } + nodeOut->set_assigned_device_name(device_); + return nodeOut; +} + +Graph *OMSplitter::Subgraph::GetGraph() const { + return graph_.get(); +} + +Status OMSplitter::Subgraph::RecordArg( + const Edge *edge, const std::unordered_map &nodeImages, + std::vector> *srcArgPairs) { + Node *srcNode = edge->src(); + int srcSlot = edge->src_output(); + std::unordered_map::iterator iter; + bool inserted = false; + std::tie(iter, inserted) = + argsBySrc_.emplace(NodeSlot(srcNode, srcSlot), argsBySrc_.size()); + int argIndex = iter->second; + if (inserted) { + NodeDef argNodeDef; + NodeDefBuilder builder( + strings::StrCat(srcNode->name(), "_", srcSlot, "_arg"), ARG_OP); + DataType dtype = edge->dst()->input_type(edge->dst_input()); + builder.Attr("T", dtype); + builder.Attr("index", argIndex); + Status s = builder.Finalize(&argNodeDef); + if (!s.ok()) { return s; } + + Node *arg = graph_->AddNode(argNodeDef, &s); + if (!s.ok()) { return s; } + + srcArgPairs->push_back({srcNode, arg}); + args_.push_back(arg); + argDatetypes_.push_back(dtype); + } + Node *dstNode = edge->dst(); + Node *dstImage = nodeImages.at(dstNode); + int dstSlot = edge->dst_input(); + argsByDst_[NodeSlot(dstNode, dstSlot)] = argIndex; + graph_->AddEdge(args_[argIndex], 0, dstImage, dstSlot); + return Status::OK(); +} + +Status OMSplitter::Subgraph::RecordResult( + const Edge *edge, + const std::unordered_map &nodeImages) { + Node *srcNode = edge->src(); + Node *srcImage = nodeImages.at(srcNode); + REQUIRES_NOT_NULL(srcImage); + int srcSlot = edge->src_output(); + std::unordered_map::iterator iter; + bool inserted = false; + std::tie(iter, inserted) = + results_.emplace(NodeSlot(srcNode, srcSlot), results_.size()); + int retIndex = iter->second; + if (inserted) { + NodeDef retNodeDef; + NodeDefBuilder builder( + strings::StrCat(srcNode->name(), "_", srcSlot, "_retval"), RET_OP); + DataType dtype = BaseType(srcNode->output_type(srcSlot)); + builder.Attr("T", dtype); + builder.Attr("index", retIndex); + builder.Input(srcImage->name(), srcSlot, dtype); + Status s = builder.Finalize(&retNodeDef); + if (!s.ok()) { return s; } + Node *ret = graph_->AddNode(retNodeDef, &s); + if (!s.ok()) { return s; } + resultDatetypes_.push_back(dtype); + // src --> dst has ref input/output, add identity node: src --> identity --> dst + if (IsRefType(edge->src()->output_type(edge->src_output())) + || IsRefType(edge->dst()->input_type(edge->dst_input()))) { + Status addStatus; + Node *identityNode = AddIdentityNode(graph_.get(), edge, srcImage->name(), srcSlot, + srcImage->assigned_device_name(), &addStatus); + if (!addStatus.ok()) { return addStatus; } + graph_->AddEdge(srcImage, srcSlot, identityNode, 0); + graph_->AddEdge(identityNode, 0, ret, 0); + } else { + graph_->AddEdge(srcImage, srcSlot, ret, 0); + } + } + return Status::OK(); +} + +Status OMSplitter::Subgraph::BuildFunctionDef( + const string &nameIn, FunctionLibraryDefinition *library, const string graph_format) { + string name = nameIn; + + GEOpNodeDef_.set_op("GeOp"); + + GEOpNodeDef_.set_name(name); + GEOpNodeDef_.set_device(device_); + + for (auto node : graph_->nodes()) { + std::vector nodeFuncs; + if (GetNodeFuncs(library, node, nodeFuncs)) { + std::unique_ptr func_def_lib(new (std::nothrow) FunctionDefLibrary()); + REQUIRES_NOT_NULL(func_def_lib); + LOG(INFO) << "Node [" << node->name() << "] has funcs:"; + for (auto func : nodeFuncs) { + LOG(INFO) << func; + FunctionDef *fdef = func_def_lib->add_function(); + REQUIRES_NOT_NULL(fdef); + *fdef = *(library->Find(func)); + } + string funcdefStr; + func_def_lib->SerializeToString(&funcdefStr); + node->AddAttr(ATTR_NAME_FRAMEWORK_FUNC_DEF, funcdefStr); + } + } + + functionDefName_ = name; + FunctionDef fdef; + TF_RETURN_IF_ERROR(OMSubGraphToFunctionDef(*graph_, name, &fdef)); + + NameAttrList function; + function.set_name(functionDefName_); + *function.mutable_attr() = GEOpNodeDef_.attr(); + AddNodeAttr("function", function, &GEOpNodeDef_); + AddNodeAttr("Tin", argDatetypes_, &GEOpNodeDef_); + AddNodeAttr("Tout", resultDatetypes_, &GEOpNodeDef_); + AddNodeAttr("data_format", graph_format, &GEOpNodeDef_); + + std::string attr_name = ""; + for (auto option : npu_optimizer_options_) { + attr_name = std::string("_") + option.first; + AddNodeAttr(attr_name, option.second, &GEOpNodeDef_); + } + AddNodeAttr("_NpuOptimizer", "NpuOptimizer", &GEOpNodeDef_); + + if (library->Find(name) == nullptr) { + TF_RETURN_IF_ERROR(library->AddFunctionDef(fdef)); + } + return Status::OK(); +} + +Status OMSplitter::Subgraph::AddGEOpNode(const std::unordered_map &nodeImages, + Graph *graphOut) { + Status s; + GEOpNodeInputs_ = graphOut->AddNode(GEOpNodeDef_, &s); + if (!s.ok()) { return s; } + + // Copy the assigned device and the key_annotation over. + REQUIRES_NOT_NULL(GEOpNodeInputs_); + GEOpNodeInputs_->set_assigned_device_name(device_); + GEOpNodeOutputs_ = GEOpNodeInputs_; + + return Status::OK(); +} + +bool OMSplitter::Subgraph::isIsolatedSubgraph() { + return false; +} + +Status OMSplitter::Subgraph::SetOptions(std::map npu_optimizer_options, + std::map pass_options) +{ + npu_optimizer_options_ = npu_optimizer_options; + pass_options_ = pass_options; + return Status::OK(); +} + +Status OMSplitter::GetSubgraphIdAttr(Node const *node, string *attr) const { + Status s = GetNodeAttr(node->attrs(), groupAttribute_, attr); + if (s.code() == error::Code::NOT_FOUND) { + // Return empty attr if there's no groupAttribute. + attr->clear(); + } else { + TF_RETURN_IF_ERROR(s); + } + return Status::OK(); +} + +bool IsInSubgraph(const string &subgraphId) { + return !subgraphId.empty(); +} + +Status OMSplitter::CopySubgraphNodes(std::unordered_map *nodeImages) { + for (Node *node : graphIn_->op_nodes()) { + string subgraphId; + TF_RETURN_IF_ERROR(GetSubgraphIdAttr(node, &subgraphId)); + if (!IsInSubgraph(subgraphId)) { + continue; + } + + Subgraph &subgraph = subgraphs_[subgraphId]; + Status s= subgraph.SetOptions(npu_optimizer_options_, pass_options_); + if (s != Status::OK()) { + LOG(INFO) << "Subgraph Id: " << subgraphId << "set npu optimizer error."; + return s; + } + Node *image = subgraph.MakeNodeImage(graphIn_, node); + REQUIRES_NOT_NULL(image); + image->ClearAttr(groupAttribute_); + (*nodeImages)[node] = image; + } + return Status::OK(); +} + +Status OMSplitter::CopySubgraphEdges(const std::unordered_map &nodeImages, + std::vector> *srcArgPairs) { + for (const Edge *edge : graphIn_->edges()) { + REQUIRES_NOT_NULL(edge); + REQUIRES_NOT_NULL(edge->src()); + REQUIRES_NOT_NULL(edge->dst()); + string srcSubgraphId; + TF_RETURN_IF_ERROR(GetSubgraphIdAttr(edge->src(), &srcSubgraphId)); + string dstSubgraphId; + TF_RETURN_IF_ERROR(GetSubgraphIdAttr(edge->dst(), &dstSubgraphId)); + Node *srcImage = gtl::FindWithDefault(nodeImages, edge->src(), nullptr); + Node *dstImage = gtl::FindWithDefault(nodeImages, edge->dst(), nullptr); + // Copy edges that are local to a subgraph. + if (IsInSubgraph(srcSubgraphId) && IsInSubgraph(dstSubgraphId) && + srcSubgraphId == dstSubgraphId) { + Graph *g = subgraphs_[srcSubgraphId].GetGraph(); + REQUIRES_NOT_NULL(g); + REQUIRES_NOT_NULL(srcImage); + REQUIRES_NOT_NULL(dstImage); + if (edge->IsControlEdge()) { + g->AddControlEdge(srcImage, dstImage); + } else { + g->AddEdge(srcImage, edge->src_output(), dstImage, edge->dst_input()); + } + continue; + } + + // Record 'src' as an output of its subgraph. + if (IsInSubgraph(srcSubgraphId)) { + if (!edge->IsControlEdge()) { + DataType dtypeDst = edge->dst()->input_type(edge->dst_input()); + if (IsRefType(dtypeDst)) { + return errors::InvalidArgument( + "Ref Tensors (e.g., Variables) are not supported as results: " + "tensor ", + edge->src()->name(), ":", edge->src_output(), + ", dst is ", edge->dst()->name()); + } + } + + Subgraph &srcSubgraph = subgraphs_[srcSubgraphId]; + + // Ignore control edges leaving the subgraph. We will lift them onto the + // enclosing GEOps in BuildOutputGraph(). + if (!edge->IsControlEdge()) { + TF_RETURN_IF_ERROR(srcSubgraph.RecordResult(edge, nodeImages)); + } + } + + // Record 'dst' as an input of its subgraph. + if (IsInSubgraph(dstSubgraphId)) { + // Look at the type of the destination not the source, since Ref output + // Tensors can be automatically cast to non-Ref Tensors at the + // destination. + if (!edge->IsControlEdge()) { + DataType dtypeDst = edge->dst()->input_type(edge->dst_input()); + if (IsRefType(dtypeDst)) { + return errors::InvalidArgument( + "Ref Tensors (e.g., Variables) are not supported as args: " + "tensor ", + edge->src()->name(), ":", edge->src_output(), + ", dst is ", edge->dst()->name()); + } + } + + Subgraph &dstSubgraph = subgraphs_[dstSubgraphId]; + + // Ignore control edges entering the subgraph. We will lift them onto + // the enclosing GEOps in BuildOutputGraph(). + if (!edge->IsControlEdge()) { + if (IsRefType(edge->src()->output_type(edge->src_output()))) { + refIn_.push_back(edge); + } + TF_RETURN_IF_ERROR(dstSubgraph.RecordArg(edge, nodeImages, srcArgPairs)); + } + } + } + return Status::OK(); +} + +Status OMSplitter::SplitIntoSubgraphs(uint32_t &subgraphNum) { + // Map from input graph nodes to subgraph nodes. + std::unordered_map nodeImages; + + // Each entry of srcArgPairs is a pair whose first element is a node in the + // original graph that has an output edge in the subgraph, and whose second + // element is the arg node in the subgraph that it sends to. The vector will + // be filled in below in AddArgs. + std::vector> srcArgPairs; + subgraphNum = 0; + + TF_RETURN_IF_ERROR(CopySubgraphNodes(&nodeImages)); + TF_RETURN_IF_ERROR(CopySubgraphEdges(nodeImages, &srcArgPairs)); + + std::vector allSubgraphNames; + for (auto &entry : subgraphs_) { + allSubgraphNames.push_back(entry.first); + } + for (string &subgraphName : allSubgraphNames) { + Subgraph &subgraph = subgraphs_[subgraphName]; + if (subgraph.isIsolatedSubgraph()) { + LOG(INFO) << "IsolatedSubgraph: " << subgraphName; + subgraphs_.erase(subgraphName); + for (Node *node : graphIn_->op_nodes()) { + string subgraphId; + TF_RETURN_IF_ERROR(GetSubgraphIdAttr(node, &subgraphId)); + if (IsInSubgraph(subgraphId) && subgraphId == subgraphName) { + node->ClearAttr(groupAttribute_); + } + } + } + } + + subgraphNum = subgraphs_.size(); + LOG(INFO) << "subgraphNum: " << subgraphNum; + + return Status::OK(); +} + +Status OMSplitter::BuildFunctionDefs(FunctionLibraryDefinition *library, + string graph_format) { + for (auto &subgraphEntry : subgraphs_) { + string name = subgraphEntry.first; + Subgraph &subgraph = subgraphEntry.second; + TF_RETURN_IF_ERROR(subgraph.BuildFunctionDef(name, library, graph_format)); + } + return Status::OK(); +} + +Status OMSplitter::CopyNodesToOutputGraph(Graph *graphOut, + std::unordered_map *nodeImages) { + for (Node *node : graphIn_->op_nodes()) { + string subgraphId; + TF_RETURN_IF_ERROR( + GetSubgraphIdAttr(node, &subgraphId)); + + if (IsInSubgraph(subgraphId)) { + continue; + } + Node *image = graphOut->CopyNode(node); + REQUIRES_NOT_NULL(image); + (*nodeImages)[node] = image; + } + (*nodeImages)[graphIn_->source_node()] = graphOut->source_node(); + (*nodeImages)[graphIn_->sink_node()] = graphOut->sink_node(); + return Status::OK(); +} + +Status OMSplitter::AddGEOpNodes( + const std::unordered_map &nodeImages, Graph *graphOut) { + for (auto &subgraphEntry : subgraphs_) { + TF_RETURN_IF_ERROR(subgraphEntry.second.AddGEOpNode( + nodeImages, graphOut)); + } + return Status::OK(); +} + +Status OMSplitter::FindOutputImageOfEdgeSrc( + const string &srcSubgraphId, const string &dstSubgraphId, + const std::unordered_map &nodeImages, + const Node *originalSrcNode, Node **srcImage) { + if (IsInSubgraph(srcSubgraphId)) { + // The edge is from a subgraph to a regular node in the output graph so + // use the GEOp node output. + *srcImage = subgraphs_.at(srcSubgraphId).GetGEOpNodeForOutputs(); + } else { + // The source of the edge is in the output graph so use the node image in + // the output graph. + *srcImage = nodeImages.at(originalSrcNode); + } + return Status::OK(); +} + +int OMSplitter::FindOutputSlotOfEdgeSrc( + const string &srcSubgraphId, const string &dstSubgraphId, + const Edge *edge) { + if (IsInSubgraph(srcSubgraphId)) { + const Subgraph &srcSubgraph = subgraphs_.at(srcSubgraphId); + // 'src' is in a subgraph and 'dst' is a regular node in the output + // graph. Use the corresponding GEOp output instead. + return srcSubgraph.GetResultIndexForEdge(edge); + } else { + // The source of the edge is in the output graph so use the regular edge + // slot. + return edge->src_output(); + } +} + +Status OMSplitter::FindOutputImageOfEdgeDst( + const string &srcSubgraphId, const string &dstSubgraphId, + const std::unordered_map &nodeImages, + const Node *originalDstNode, Node **dstImage) { + if (IsInSubgraph(dstSubgraphId)) { + // The edge is to a subgraph from a regular node in the output graph so + // use the GEOp node input. + *dstImage = subgraphs_.at(dstSubgraphId).GetGEOpNodeForInputs(); + } else { + // The destination of the edge is in the output graph so use the node image + // in the output graph. + *dstImage = nodeImages.at(originalDstNode); + } + return Status::OK(); +} + +int OMSplitter::FindOutputSlotOfEdgeDst( + const string &srcSubgraphId, const string &dstSubgraphId, + const Edge *edge) { + if (IsInSubgraph(dstSubgraphId)) { + const Subgraph &dstSubgraph = subgraphs_.at(dstSubgraphId); + // 'dst' is in a subgraph and 'src' is a regular node in the output + // graph. Use the corresponding GEOp input instead. + return dstSubgraph.GetArgIndexForEdge(edge); + } else { + // The destination of the edge is in the output graph so use the regular + // edge slot. + return edge->dst_input(); + } +} + +Status OMSplitter::CopyEdgeToOutputGraph(const Edge *edge, + const string &srcSubgraphId, + const string &dstSubgraphId, + const std::unordered_map &nodeImages, + Graph *graphOut, + std::unordered_set, NodeSlot::PairHasher> *edges_added) { + Node *srcImage = NULL; + TF_RETURN_IF_ERROR(FindOutputImageOfEdgeSrc( + srcSubgraphId, dstSubgraphId, nodeImages, edge->src(), &srcImage)); + Node *dstImage = NULL; + TF_RETURN_IF_ERROR(FindOutputImageOfEdgeDst(srcSubgraphId, dstSubgraphId, nodeImages, edge->dst(), &dstImage)); + + // If this is a control edge then copy it and return. Lift control edges onto + // the enclosing GEOp. + if (edge->IsControlEdge()) { + // Add the control edge, if we have not already added it, using the images + // determined above. + if (edges_added->emplace(NodeSlot(srcImage, -1), NodeSlot(dstImage, -1)) + .second) { + graphOut->AddControlEdge(srcImage, dstImage); + } + return Status::OK(); + } + + int srcOutput = + FindOutputSlotOfEdgeSrc(srcSubgraphId, dstSubgraphId, edge); + int dstInput = + FindOutputSlotOfEdgeDst(srcSubgraphId, dstSubgraphId, edge); + // Add the edge, if we have not already added it. + if (edges_added + ->emplace(NodeSlot(srcImage, srcOutput), + NodeSlot(dstImage, dstInput)).second) { + if (std::find(refIn_.begin(), refIn_.end(), edge) != refIn_.end()) { + Status status; + Node *identityNode = AddIdentityNode(graphOut, edge, srcImage->name(), srcOutput, + srcImage->assigned_device_name(), &status); + TF_RETURN_IF_ERROR(status); + graphOut->AddEdge(srcImage, srcOutput, identityNode, 0); + graphOut->AddEdge(identityNode, 0, dstImage, dstInput); + } else { + graphOut->AddEdge(srcImage, srcOutput, dstImage, dstInput); + } + } + return Status::OK(); +} + +Status OMSplitter::AddEdgesToOutputGraph(const std::unordered_map &nodeImages, + Graph *graphOut) { + // Set of edges already added to the output graph, represented as (src, dst) + // pairs. We use the set to deduplicate edges; multiple edges in the input + // graph may map to one edge in the output graph. + std::unordered_set, NodeSlot::PairHasher> + edges_added; + + for (const Edge *edge : graphIn_->edges()) { + REQUIRES_NOT_NULL(edge); + string srcSubgraphId; + TF_RETURN_IF_ERROR(GetSubgraphIdAttr(edge->src(), &srcSubgraphId)); + string dstSubgraphId; + TF_RETURN_IF_ERROR(GetSubgraphIdAttr(edge->dst(), &dstSubgraphId)); + + // Ignore edges that are strictly contained within one subgraph. + if (IsInSubgraph(srcSubgraphId) && IsInSubgraph(dstSubgraphId) && + srcSubgraphId == dstSubgraphId) { + continue; + } + + TF_RETURN_IF_ERROR(CopyEdgeToOutputGraph( + edge, srcSubgraphId, dstSubgraphId, nodeImages, graphOut, &edges_added)); + } + + return Status::OK(); +} + +Status OMSplitter::BuildOutputGraph(Graph *graphOut) { + // Map from nodes in the input graph to nodes in the output graph. + std::unordered_map nodeImages; + + TF_RETURN_IF_ERROR(CopyNodesToOutputGraph(graphOut, &nodeImages)); + TF_RETURN_IF_ERROR(AddGEOpNodes(nodeImages, graphOut)); + TF_RETURN_IF_ERROR(AddEdgesToOutputGraph(nodeImages, graphOut)); + + return Status::OK(); +} + +Status OMPartitionSubgraphsInFunctions( + string groupAttribute, std::unique_ptr* graph, string graph_format, + FunctionLibraryDefinition* flib_def, std::map npu_optimizer_options, + std::map pass_options) { + Graph *graphIn = graph->get(); + FunctionLibraryDefinition *const library = flib_def; + + OMSplitter omsplitter(std::move(groupAttribute), graphIn, npu_optimizer_options, pass_options); + uint32_t subgraphNum = 0; + TF_RETURN_IF_ERROR(omsplitter.SplitIntoSubgraphs(subgraphNum)); + + if (subgraphNum == 0) { + LOG(INFO) << "No Subgraph has been built."; + return Status::OK(); + } + + TF_RETURN_IF_ERROR(omsplitter.BuildFunctionDefs(library, graph_format)); + + FunctionLibraryDefinition libraryOut(*library); + std::unique_ptr out(new (std::nothrow) Graph(libraryOut)); + REQUIRES_NOT_NULL(out); + out->set_versions(graphIn->versions()); + TF_RETURN_IF_ERROR( + omsplitter.BuildOutputGraph(out.get())); + *graph = std::move(out); + + return Status::OK(); +} +} // namespace OMSplitter +static std::atomic graph_run_num(1); +static mutex graph_num_mutex(LINKER_INITIALIZED); +Status OMPartitionSubgraphsPass::Run(const GraphOptimizationPassOptions &options) { + if ((options.graph == nullptr && options.partition_graphs == nullptr) || + options.flib_def == nullptr) { + return Status::OK(); + } + + Status s = Status::OK(); + if (options.graph != nullptr) { + std::unique_ptr* graph = options.graph; + FunctionLibraryDefinition *func_lib = options.flib_def; + s = ProcessGraph(graph, func_lib, OptimizationPassRegistry::POST_REWRITE_FOR_EXEC); + if (s != Status::OK()) { + return s; + } + } else if (options.partition_graphs != nullptr) { + for (auto& pg : *options.partition_graphs) { + std::unique_ptr* graph = &pg.second; + FunctionLibraryDefinition *func_lib = options.flib_def; + s = ProcessGraph(graph, func_lib, OptimizationPassRegistry::POST_PARTITIONING); + if (s != Status::OK()) { + return s; + } + } + } + + return Status::OK(); +} + +Status OMPartitionSubgraphsPass::ProcessGraph(std::unique_ptr* graph, FunctionLibraryDefinition *func_lib, + const OptimizationPassRegistry::Grouping pass_group_value) +{ + int graph_num; + graph_num = graph_run_num++; + + if (graph == nullptr) { + return Status::OK(); + } + + int64 startTime = InferShapeUtil::GetCurrentTimestap(); + + for (Node *n : graph->get()->nodes()) { + REQUIRES_NOT_NULL(n); + if (n->attrs().Find("_NoNeedOptimize")) { + LOG(INFO) << "Found mark of noneed optimize on node [" + << n->name() << "], skip OMPartitionSubgraphsPass."; + return Status::OK(); + } + } + + std::map all_options; + std::map pass_options; + pass_options = NpuAttrs::GetDefaultPassOptions(); + for (Node *n : graph->get()->nodes()) { + REQUIRES_NOT_NULL(n); + if (n->attrs().Find("_NpuOptimizer")) { + pass_options = NpuAttrs::GetPassOptions(n->attrs()); + all_options = NpuAttrs::GetAllAttrOptions(n->attrs()); + break; + } + } + + std::string job = pass_options["job"]; + if (job == "ps" || job == "default") { + LOG(INFO) << "job is " << job << " Skip the optimizer : OMPartitionSubgraphsPass."; + return Status::OK(); + } + if (job == "localhost" && + pass_group_value != OptimizationPassRegistry::POST_REWRITE_FOR_EXEC) { + return Status::OK(); + } + if (job != "localhost" && + pass_group_value != OptimizationPassRegistry::POST_PARTITIONING) { + return Status::OK(); + } + + bool use_off_line = pass_options["use_off_line"] == "1"; + bool mix_compile_mode = pass_options["mix_compile_mode"] == "1"; + int iterations_per_loop = std::atoi(pass_options["iterations_per_loop"].c_str()); + int task_index = std::atoi(pass_options["task_index"].c_str()); + if (!iterations_per_loop) { + LOG(FATAL) << "iterator_per_loop should be int and must >= 1"; + } + if (task_index < 0) { + LOG(FATAL) << "task_index should be int and must >= 0"; + } + bool do_npu_optimizer = pass_options["do_npu_optimizer"] == "1"; + if (do_npu_optimizer) { + if (!use_off_line) { + LOG(INFO) << "Run online process and skip the optimizer"; + return Status::OK(); + } + } else { + return Status::OK(); + } + LOG(INFO) << "mix_compile_mode is " << (mix_compile_mode ? "True" : "False"); + LOG(INFO) << "iterations_per_loop is " << iterations_per_loop; + + char *need_print = getenv("PRINT_MODEL"); + + if (need_print != nullptr && strcmp("1", need_print) == 0) { + GraphDef ori_graph_def; + graph->get()->ToGraphDef(&ori_graph_def); + string ori_model_path = "BeforeSubGraph_"; + string omodel_path = ori_model_path + std::to_string(graph_num) + ".pbtxt"; + Status status_out = WriteTextProto(Env::Default(), omodel_path, ori_graph_def); + } + + string graph_format_value; + Graph *graphIn = graph->get(); + for (Node *node : graphIn->op_nodes()) { + if (node->type_string() == "NPUInit") { + std::string attr_name = ""; + for (auto option : all_options) { + attr_name = std::string("_") + option.first; + node->AddAttr(attr_name, option.second); + } + node->AddAttr("_NpuOptimizer", "NpuOptimizer"); + } + + if (node->type_string() == "_UnaryOpsComposition") { + LOG(INFO) << "begin split _UnaryOpsComposition."; + Node *pre_node = nullptr; + if (node->in_edges().size() != 1) { + LOG(INFO) << "edge size if not 1, not support in _UnaryOpsComposition."; + continue; + } + pre_node = (*node->in_edges().begin())->src(); + auto attr_map = node->def().attr(); + auto node_list = attr_map["op_names"].list(); + Node *unary_node = nullptr; + for (int i = 0; i < node_list.s_size(); i++) { + string node_name = node_list.s(i); + string op_name = node->name() + "_" + std::to_string(i) + "_" + node_name; + LOG(INFO) << "op_names node_list: " << i << " is node: " << node_name; + TF_CHECK_OK(NodeBuilder(op_name, node_name) + .Input(pre_node, 0) + .Device(pre_node->def().device()) + .Finalize(&*graphIn, &unary_node)); + REQUIRES_NOT_NULL(unary_node); + LOG(INFO) << unary_node->type_string() << " has built success."; + pre_node = unary_node; + } + for (auto out_edge : node->out_edges()) { + LOG(INFO) << "begin add edge "; + graphIn->AddEdge(unary_node, 0, out_edge->dst(), out_edge->dst_input()); + } + graphIn->RemoveNode(node); + } + } + + for (Node *node : graphIn->op_nodes()) { + if (node->type_string() == "OneShotIterator" && iterations_per_loop != 1) { + LOG(FATAL) << "iterator_per_loop only support 1 when using OneShotIterator"; + } + + string device_name = ""; + if (job != "localhost" && job != "ps" && job != "defalut") { + device_name = std::string("/job:") + std::string(job) + std::string("/replica:0/task:") + \ + std::to_string(task_index) + std::string("/device:CPU:0"); + } else if (job == "localhost"){ + device_name = string("/job:localhost/replica:0/task:0/device:CPU:0"); + } else { + return errors::InvalidArgument("job type is : ", job, " not support. "); + } + + node->set_assigned_device_name(device_name); + + string node_format_value = ""; + Status status = GetNodeAttr(node->def(), "data_format", &node_format_value); + if (status.ok() && !node_format_value.empty()) { + if (graph_format_value == "") { + graph_format_value = node_format_value; + } + } + } + + if (graph_format_value.empty()) { + graph_format_value = "NHWC"; // default value + } + + int subgraphNum = 0; + TF_RETURN_IF_ERROR(OMSplitter::MarkForPartition(graph, subgraphNum, mix_compile_mode, graph_num, func_lib, pass_options)); + LOG(INFO) << "OMPartion subgraph_" << std::to_string(graph_num) + << " markForPartition success."; + if (subgraphNum < 1) { + LOG(INFO) << "subgraphNum is " << subgraphNum; + return Status::OK(); + } + if (mix_compile_mode) { + std::vector varEdges; + for (Node *node : graphIn->op_nodes()) { + if (node->type_string() == "VariableV2" || node->type_string() == "VarHandleOp") { + for (auto out_edge : node->out_edges()) { + varEdges.push_back(out_edge); + } + } + } + for (auto varEdge : varEdges) { + REQUIRES_NOT_NULL(varEdge); + REQUIRES_NOT_NULL(varEdge->src()); + REQUIRES_NOT_NULL(varEdge->dst()); + string srcSubgraphId = ""; + const string partitionAttr = OMSplitter::PARTITION_SUB_GRAPH_ATTR; + Status s = GetNodeAttr(varEdge->src()->attrs(), partitionAttr, &srcSubgraphId); + if (s.code() != error::Code::NOT_FOUND) { + TF_RETURN_IF_ERROR(s); + } + + DataType dtypeDst = varEdge->dst()->input_type(varEdge->dst_input()); + string dstSubgraphId; + s = GetNodeAttr(varEdge->dst()->attrs(), partitionAttr, &dstSubgraphId); + if (s.code() == error::Code::NOT_FOUND) { + if (!IsRefType(dtypeDst)) { + continue; + } else { + return errors::InvalidArgument("Ref Tensors (e.g., Variables) output: ", + varEdge->dst()->name(), " is not in white list"); + } + } else { + TF_RETURN_IF_ERROR(s); + } + if (IsRefType(dtypeDst) && srcSubgraphId != dstSubgraphId) { + Node *nodeCopy = graphIn->AddNode(varEdge->src()->def(), &s); + TF_RETURN_IF_ERROR(s); + nodeCopy->ClearAttr(partitionAttr); + nodeCopy->AddAttr(partitionAttr, dstSubgraphId); + graphIn->AddEdge(nodeCopy, varEdge->src_output(), varEdge->dst(), varEdge->dst_input()); + graphIn->RemoveEdge(varEdge); + } + } + } + + if (mix_compile_mode) { + auto nodes = graphIn->op_nodes(); + for (Node *node : nodes) { + if (node->IsConstant()) { + std::map copiedConsts; + string srcSubgraphId = ""; + const string partitionAttr = OMSplitter::PARTITION_SUB_GRAPH_ATTR; + Status s = GetNodeAttr(node->attrs(), partitionAttr, &srcSubgraphId); + if (s.code() != error::Code::NOT_FOUND) { + TF_RETURN_IF_ERROR(s); + } + std::vector edges; + for (auto edge : node->out_edges()) { + edges.push_back(edge); + } + for (auto edge : edges) { + REQUIRES_NOT_NULL(edge); + REQUIRES_NOT_NULL(edge->src()); + REQUIRES_NOT_NULL(edge->dst()); + string dstSubgraphId; + s = GetNodeAttr(edge->dst()->attrs(), partitionAttr, &dstSubgraphId); + if (s.code() == error::Code::NOT_FOUND) { + continue; + } else { + TF_RETURN_IF_ERROR(s); + } + if (srcSubgraphId != dstSubgraphId) { + if (copiedConsts.find(dstSubgraphId) == copiedConsts.end()) { + Node *nodeCopy = graphIn->AddNode(edge->src()->def(), &s); + TF_RETURN_IF_ERROR(s); + nodeCopy->set_name(node->name() + "_copied"); + nodeCopy->ClearAttr(partitionAttr); + nodeCopy->AddAttr(partitionAttr, dstSubgraphId); + copiedConsts[dstSubgraphId] = nodeCopy; + LOG(INFO) << "Copy const node:" << node->name(); + } + Node *nodeCopy = copiedConsts[dstSubgraphId]; + graphIn->AddEdge(nodeCopy, edge->src_output(), edge->dst(), edge->dst_input()); + graphIn->RemoveEdge(edge); + graphIn->AddControlEdge(edge->src(), nodeCopy, false); + } + } + } + } + } + TF_RETURN_IF_ERROR(OMSplitter::OMPartitionSubgraphsInFunctions( + OMSplitter::PARTITION_SUB_GRAPH_ATTR, graph, graph_format_value, func_lib, + all_options, pass_options)); + LOG(INFO) << "OMPartion subgraph_" << std::to_string(graph_num) + << " SubgraphsInFunctions success."; + FixupSourceAndSinkEdges(graph->get()); + + if (need_print != nullptr && strcmp("1", need_print) == 0) { + GraphDef omg_graph_def; + graph->get()->ToGraphDef(&omg_graph_def); + string tmpmodel_path = "AfterSubGraph_"; + string tmodel_path = tmpmodel_path + std::to_string(graph_num) + ".pbtxt"; + Status status_o = WriteTextProto(Env::Default(), tmodel_path, omg_graph_def); + } + int64 endTime = InferShapeUtil::GetCurrentTimestap(); + LOG(INFO) << "OMPartion subgraph_" << std::to_string(graph_num) << " success. [" + << ((endTime - startTime) / kMicrosToMillis) << " ms]"; + return Status::OK(); +} + +REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, 2, + OMPartitionSubgraphsPass); +REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_PARTITIONING, 101, + OMPartitionSubgraphsPass); +} // namespace tensorflow diff --git a/tf_adapter/optimizers/om_partition_subgraphs_pass.h b/tf_adapter/optimizers/om_partition_subgraphs_pass.h new file mode 100644 index 000000000..f52bc607f --- /dev/null +++ b/tf_adapter/optimizers/om_partition_subgraphs_pass.h @@ -0,0 +1,52 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_OM_PARTITION_SUBGRAPHS_PASS_H_ +#define TENSORFLOW_OM_PARTITION_SUBGRAPHS_PASS_H_ + +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { +namespace OMSplitter { +Status MarkForPartition(const GraphOptimizationPassOptions &options, + int &clusterNum, bool mix_compile_mode, + int graph_num, FunctionLibraryDefinition *func_lib, + std::map pass_options); + +// Transformation that finds subgraphs whose nodes are marked with +// 'groupAttribute', splits those subgraphs into functions, and replaces +// the originals with GEOps. +// 'groupAttribute' must be a string valued-attribute that names the new +// functions to introduce. +Status OMPartitionSubgraphsInFunctions( + string groupAttribute, const GraphOptimizationPassOptions &options, string graph_format); +} // namespace OMSplitter + +class OMPartitionSubgraphsPass : public GraphOptimizationPass { + public: + OMPartitionSubgraphsPass() = default; + ~OMPartitionSubgraphsPass() = default; + Status Run(const GraphOptimizationPassOptions &options) override; + +private: + Status ProcessGraph(std::unique_ptr* graph, FunctionLibraryDefinition *func_lib, + const OptimizationPassRegistry::Grouping pass_group_value); +}; +} // namespace tensorflow +#endif // TENSORFLOW_OM_PARTITION_SUBGRAPHS_PASS_H_ diff --git a/tf_adapter/optimizers/om_set_var_format_pass.cc b/tf_adapter/optimizers/om_set_var_format_pass.cc new file mode 100644 index 000000000..e7bcc62cf --- /dev/null +++ b/tf_adapter/optimizers/om_set_var_format_pass.cc @@ -0,0 +1,179 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tf_adapter/optimizers/om_set_var_format_pass.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/graph_def_util.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/graph_def_builder.h" +#include "tensorflow/core/graph/tensor_id.h" +#include "tensorflow/core/lib/gtl/flatset.h" +#include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/lib/hash/hash.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/public/version.h" +#include "tensorflow/core/util/device_name_utils.h" +#include "tf_adapter/common/common.h" +#include "tf_adapter/util/npu_attrs.h" + +namespace tensorflow { +static const int g_iInputNum = 1; // the second input +const std::string KEY_NEW_ATTR_NAME = "_var_format"; +const std::string KEY_FZ_ATTR_VALUE = "FZ"; +const std::string KEY_4D_ATTR_VALUE = "4D"; + +const std::string KEY_CONV2D_OP_VALUE = "Conv2D"; +const std::string KEY_MATMUL_OP_VALUE = "MatMul"; +const std::string KEY_CONV2D_BACKPROP_INPUT_VALUE = "Conv2DBackpropInput"; +const std::string KEY_VARIABLE_V2_VALUE = "VariableV2"; +const std::string KEY_VAR_HANDLE_OP_VALUE = "VarHandleOp"; +const std::string KEY_IDENTITY_OP_VALUE = "Identity"; +const std::string KEY_READ_VARIABLE_OP_VALUE = "ReadVariableOp"; +const std::string KEY_RESOURCE_APPLY_MOMENTUM_OP_VALUE = "ResourceApplyMomentum"; +const std::string KEY_APPLY_MOMENTUM_OP_VALUE = "ApplyMomentum"; + +static void AddNodeVarFormat(Node *node, const string& var_format) { + if (var_format == KEY_4D_ATTR_VALUE) { + const AttrValue *attr_value = node->attrs().Find(KEY_NEW_ATTR_NAME); + if (attr_value == nullptr) { + node->AddAttr(KEY_NEW_ATTR_NAME, var_format); + } + return; + } + + node->AddAttr(KEY_NEW_ATTR_NAME, var_format); +} + +Status SetVarFormatPass::AssignApplyMomentumInNodesFormat(Node *node, + const string &var_format) { + if (node == nullptr) { + return Status::OK(); + } + for (const Edge *in_edge : node->in_edges()) { + REQUIRES_NOT_NULL(in_edge); + Node *src_node = in_edge->src(); + REQUIRES_NOT_NULL(src_node); + bool is_momentum_op = (in_edge->dst_input() == 1) && + ((src_node->type_string() == KEY_VAR_HANDLE_OP_VALUE) || + (src_node->type_string() == KEY_VARIABLE_V2_VALUE)); + if (is_momentum_op) { + AddNodeVarFormat(src_node, var_format); + + for (const Edge *var_out : src_node->out_edges()) { + REQUIRES_NOT_NULL(var_out); + Node *var_out_node = var_out->dst(); + REQUIRES_NOT_NULL(var_out_node); + AddNodeVarFormat(var_out_node, var_format); + } + break; + } + } + return Status::OK(); +} + +Status SetVarFormatPass::GetFormat(Node *node, string &format) { + for (const Edge *out : node->out_edges()) { + REQUIRES_NOT_NULL(out); + Node *dst_node = out->dst(); + REQUIRES_NOT_NULL(dst_node); + bool is_fz_node = (out->dst_input() == 1) && + ((dst_node->type_string() == KEY_CONV2D_OP_VALUE) || + (dst_node->type_string() == KEY_MATMUL_OP_VALUE) || + (dst_node->type_string() == KEY_CONV2D_BACKPROP_INPUT_VALUE)); + if (is_fz_node) { + format = KEY_FZ_ATTR_VALUE; + return Status::OK(); + } + } + format = KEY_4D_ATTR_VALUE; + return Status::OK(); +} + +Status SetVarFormatPass::AssignFormatToVarOutNodes(Node *node) { + string var_format = KEY_4D_ATTR_VALUE; + for (const Edge *out : node->out_edges()) { + REQUIRES_NOT_NULL(out); + Node *dst_node = out->dst(); + REQUIRES_NOT_NULL(dst_node); + bool is_read_var_node = (dst_node->type_string() == KEY_IDENTITY_OP_VALUE) || + (dst_node->type_string() == KEY_READ_VARIABLE_OP_VALUE); + if (is_read_var_node) { + Status status = GetFormat(dst_node, var_format); + if (!status.ok()) { + return status; + } + } + } + + Node *apply_momentum = nullptr; + AddNodeVarFormat(node, var_format); + for (const Edge *out : node->out_edges()) { + REQUIRES_NOT_NULL(out); + Node *dst_node = out->dst(); + REQUIRES_NOT_NULL(dst_node); + AddNodeVarFormat(dst_node, var_format); + bool is_apply_momentum_node = dst_node->type_string() == KEY_APPLY_MOMENTUM_OP_VALUE || + dst_node->type_string() == KEY_RESOURCE_APPLY_MOMENTUM_OP_VALUE; + if (is_apply_momentum_node) { + apply_momentum = dst_node; + } + } + + TF_RETURN_IF_ERROR(AssignApplyMomentumInNodesFormat(apply_momentum, var_format)); + + return Status::OK(); +} + +Status SetVarFormatPass::Run(const GraphOptimizationPassOptions &options) { + Graph *graph_in = (options.graph)->get(); + if (graph_in == nullptr || options.session_options == nullptr) { + return Status::OK(); + } + + std::map pass_options = NpuAttrs::GetPassOptions(options); + std::string job = pass_options["job"]; + if (job == "ps" || job == "default") { + LOG(INFO) << "job is " << job << " Skip the optimizer : SetVarFormatPass."; + return Status::OK(); + } + + for (Node *node : graph_in->op_nodes()) { + if ((node != nullptr) && ((node->type_string() == KEY_VAR_HANDLE_OP_VALUE) || + (node->type_string() == KEY_VARIABLE_V2_VALUE))) { + AssignFormatToVarOutNodes(node); + } + } + return Status::OK(); +} + +REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 1, SetVarFormatPass); +} // namespace tensorflow diff --git a/tf_adapter/optimizers/om_set_var_format_pass.h b/tf_adapter/optimizers/om_set_var_format_pass.h new file mode 100644 index 000000000..53299409e --- /dev/null +++ b/tf_adapter/optimizers/om_set_var_format_pass.h @@ -0,0 +1,34 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_OM_SET_VAR_FORMAT_PASS_H_ +#define TENSORFLOW_OM_SET_VAR_FORMAT_PASS_H_ + +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { +class SetVarFormatPass : public GraphOptimizationPass { + public: + Status Run(const GraphOptimizationPassOptions &options) override; + Status AssignFormatToVarOutNodes(Node *node); + Status GetFormat(Node *node, string &format); + Status AssignApplyMomentumInNodesFormat(Node *node, const string &var_format); +}; +} // namespace tensorflow +#endif // TENSORFLOW_OM_SET_VAR_FORMAT_PASS_H_ diff --git a/tf_adapter/python/MANIFEST.in b/tf_adapter/python/MANIFEST.in new file mode 100644 index 000000000..b6beccfca --- /dev/null +++ b/tf_adapter/python/MANIFEST.in @@ -0,0 +1,2 @@ +recursive-include * *.py +recursive-include * *.so \ No newline at end of file diff --git a/tf_adapter/python/npu_bridge/__init__.py b/tf_adapter/python/npu_bridge/__init__.py new file mode 100644 index 000000000..d6c2996b3 --- /dev/null +++ b/tf_adapter/python/npu_bridge/__init__.py @@ -0,0 +1,6 @@ +from npu_bridge.helper.helper import npu_bridge_handle +from npu_bridge.helper.helper import version as __version__ +from npu_bridge.helper import helper +from npu_bridge.estimator.npu import npu_estimator +from npu_bridge.hccl import hccl_ops +__all__ = [_s for _s in dir() if not _s.startswith('_')] \ No newline at end of file diff --git a/tf_adapter/python/npu_bridge/estimator/npu/keras_to_npu.py b/tf_adapter/python/npu_bridge/estimator/npu/keras_to_npu.py new file mode 100644 index 000000000..9bafe38bc --- /dev/null +++ b/tf_adapter/python/npu_bridge/estimator/npu/keras_to_npu.py @@ -0,0 +1,556 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import os +import re +import tempfile + +from tensorflow.python.client import session +from tensorflow.python.distribute import distribution_strategy_context +from tensorflow.python.framework import ops +from tensorflow.python.framework import random_seed +from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib +from tensorflow.python.framework import tensor_util +from tensorflow.python.keras import backend as K +from tensorflow.python.keras import models +from tensorflow.python.keras.engine import training_utils +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import gfile +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.saved_model import signature_constants +from tensorflow.python.training import checkpoint_management +from tensorflow.python.training import monitored_session +from tensorflow.python.training import saver as saver_lib +from tensorflow.python.training import training_util +from tensorflow.python.training.tracking import graph_view +from tensorflow.python.training.tracking import util as trackable_util +from tensorflow.python.util import compat_internal +from tensorflow_estimator.python.estimator import estimator as estimator_lib +from tensorflow_estimator.python.estimator import model_fn as model_fn_lib +from tensorflow_estimator.python.estimator.export import export_lib +from tensorflow_estimator.python.estimator.mode_keys import ModeKeys +from npu_bridge.estimator.npu.npu_config import NPURunConfig +from npu_bridge.estimator.npu.npu_estimator import NPUEstimatorSpec +from npu_bridge.estimator.npu.npu_estimator import NPUEstimator + +_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY + +def _convert_tensor(x): + """Create or cast tensor if needed.""" + if not tensor_util.is_tensor(x): + # x is a numpy array + x = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(x) + return x + + +def _any_weight_initialized(keras_model): + """Check if any weights has been initialized in the Keras model. + + Args: + keras_model: An instance of compiled keras model. + + Returns: + boolean, True if at least one weight has been initialized, else False. + Currently keras initialize all weights at get_session(). + """ + if keras_model is None: + return False + if ops.executing_eagerly_outside_functions(): + return True + for layer in keras_model.layers: + for weight in layer.weights: + if hasattr(weight, '_keras_initialized'): + return True + return False + + +def _convert_estimator_io_to_keras(keras_model, features, labels): + """Converts estimator features and labels to keras input and target tensors. + + Args: + keras_model: a compiled `tf.keras.Model` instance, used to determine the + order of the returned lists. + features: Dict of tensors or `None`. + labels: Dict of tensors, a single tensor, or `None`. + + Returns: + Tuple of ( + list of input tensors or `None`, + list of target tensors or `None`, + list of sample weight tensors or `None`) + The order of tensors is determined by the order set in the keras model. + """ + + def _to_ordered_tensor_list(obj, key_order, obj_name, order_name): + """Convert obj to an ordered list of tensors. + + Args: + obj: List, dict, or single tensor. May be `None`. + key_order: List of strings with the order to return (used if obj is a + dict). + obj_name: String name of object (e.g. "features" or "labels") + order_name: String name of the key order (e.g. "inputs" or "outputs") + + Returns: + List of tensors, or `None` + + Raises: + KeyError: If obj has invalid keys. + """ + if obj is None: + return None + elif isinstance(obj, (list, tuple)): + return [_convert_tensor(x) for x in obj] + elif isinstance(obj, dict): + # Ensure that the obj keys and keys in key_order are exactly the same. + different_keys = set(obj.keys()) ^ set(key_order) + + if different_keys: + raise KeyError( + 'The dictionary passed into {obj_name} does not have the expected ' + '{order_name} keys defined in the keras model.' + '\n\tExpected keys: {order_keys}' + '\n\t{obj_name} keys: {obj_keys}' + '\n\tDifference: {different_keys}'.format( + order_name=order_name, order_keys=set(key_order), + obj_name=obj_name, obj_keys=set(obj.keys()), + different_keys=different_keys)) + + return [_convert_tensor(obj[key]) for key in key_order] + else: # Assume obj is a tensor. + return [_convert_tensor(obj)] + + features, sample_weight_tensors = _extract_sample_weight_tensors(features) + input_names = None + output_names = None + if isinstance(features, dict): + input_names = ( + keras_model.input_names if keras_model._is_graph_network else + ['input_%d' % i for i in range(1, len(features) + 1)]) + if isinstance(labels, dict): + output_names = ( + keras_model.output_names if keras_model._is_graph_network else + ['output_%d' % i for i in range(1, len(labels) + 1)]) + + if isinstance(keras_model.inputs, dict): + # convert input tensors into dict if keras_model is built with dict input. + input_tensors = { + k: _convert_tensor(features[k]) + for (k, v) in keras_model.inputs.items() + } + else: + # converting input tensors into sorted list. + input_tensors = _to_ordered_tensor_list(features, input_names, 'features', + 'inputs') + target_tensors = _to_ordered_tensor_list( + labels, output_names, 'labels', 'outputs') + + return input_tensors, target_tensors, sample_weight_tensors + + +def _extract_sample_weight_tensors(features): + if isinstance(features, dict) and set(features.keys()) == { + 'features', 'sample_weights'}: + feature_tensor = features['features'] + sample_weight_tensors = features['sample_weights'] + else: + feature_tensor = features + sample_weight_tensors = None + return feature_tensor, sample_weight_tensors + + +def _clone_and_build_model(mode, + keras_model, + custom_objects, + features=None, + labels=None, + optimizer_config=None): + """Clone and build the given keras_model. + + Args: + mode: training mode. + keras_model: an instance of compiled keras model. + custom_objects: Dictionary for custom objects. + features: Dict of tensors. + labels: Dict of tensors, or single tensor instance. + optimizer_config: Optimizer config dictionary, returned by + `optimizer.get_config()`. This is used when cloning a model with + an optimizer. Since `_clone_and_build_model` is called in a different + graph and session from the model, `optimizer.get_config()` may raise an + error during the attempt to serialize the optimizer hyperparameter values. + + Returns: + The newly built model. + """ + # Set to True during training, False for inference or testing. + K.set_learning_phase(mode == ModeKeys.TRAIN) + input_tensors, target_tensors, sample_weight_tensors = ( + _convert_estimator_io_to_keras(keras_model, features, labels)) + + compile_clone = (mode != ModeKeys.PREDICT) + + global_step = None + if compile_clone: + # Set iterations to the global step created by tf.train.create_global_step() + # which is automatically run in the estimator framework. + global_step = training_util.get_or_create_global_step() + K.track_variable(global_step) + + clone = models.clone_and_build_model( + keras_model, input_tensors, target_tensors, custom_objects, + compile_clone=compile_clone, + in_place_reset=(not keras_model._is_graph_network), + optimizer_iterations=global_step, + optimizer_config=optimizer_config) + + if sample_weight_tensors is not None: + sample_weight_tensors = training_utils.standardize_sample_weights( + sample_weight_tensors, clone.output_names) + # Update calculated loss (model.total_loss) to include sample weights. + clone._compile_weights_loss_and_weighted_metrics(sample_weight_tensors) + return clone + + +def _convert_keras_metrics_to_estimator(model): + """Convert metrics from a Keras model to ops used by the Estimator framework. + + Args: + model: A `tf.keras.Model` object. + + Returns: + Dictionary mapping metric names to tuples of (value, update) ops. May return + `None` if the model does not contain any metrics. + """ + if not getattr(model, '_compile_metrics', None): + return None + + # We are not using model.metrics here because we want to exclude the metrics + # added using `add_metric` API. + return {m.name: m for m in model._compile_metric_functions} + + +def _create_keras_model_fn(keras_model, custom_objects=None, + save_object_ckpt=False): + """Creates model_fn for keras Estimator. + + Args: + keras_model: an instance of compiled keras model. + custom_objects: Dictionary for custom objects. + save_object_ckpt: Whether to save an object-based checkpoint. + + Returns: + The model_fn for a keras Estimator. + """ + # Get optimizer config in the current context (since model_fn is called in the + # estimator graph and session). OptimizerV2 objects serialize variable/tensor + # hyperparameters in their configs, resulting to wrong-session errors during + # model cloning. + try: + if isinstance(keras_model.optimizer, (tuple, list)): + optimizer_config = [opt.get_config() for opt in keras_model.optimizer] + else: + optimizer_config = keras_model.optimizer.get_config() + except (NotImplementedError, AttributeError): + # TFOptimizers and other custom optimizers do not have a config. + optimizer_config = None + + def model_fn(features, labels, mode): + """model_fn for keras Estimator.""" + model = _clone_and_build_model( + mode=mode, + keras_model=keras_model, + custom_objects=custom_objects, + features=features, + labels=labels, + optimizer_config=optimizer_config) + model_output_names = [] + # We need to make sure that the output names of the last layer in the model + # is the same for each of the cloned models. This is required for mirrored + # strategy when we call regroup. + if distribution_strategy_context.has_strategy(): + for name in model.output_names: + name = re.compile(r'_\d$').sub('', name) + model_output_names.append(name) + else: + model_output_names = model.output_names + + # Get inputs to EstimatorSpec + predictions = dict(zip(model_output_names, model.outputs)) + + loss = None + train_op = None + eval_metric_ops = None + + # Set loss and metric only during train and evaluate. + if mode is not ModeKeys.PREDICT: + if mode is ModeKeys.TRAIN: + model._make_train_function() # pylint: disable=protected-access + else: + model._make_test_function() # pylint: disable=protected-access + loss = model.total_loss + + eval_metric_ops = _convert_keras_metrics_to_estimator(model) + + # Set train_op only during train. + if mode is ModeKeys.TRAIN: + train_op = model.train_function.updates_op + + if (not model._is_graph_network and + hasattr(keras_model, '_original_attributes_cache') and + keras_model._original_attributes_cache is not None): + # To avoid `model_fn` being destructive for the initial model argument. + models.in_place_subclassed_model_state_restoration(keras_model) + + scaffold = None + if save_object_ckpt: + model._track_trackable(training_util.get_global_step(), + 'estimator_global_step') + # Create saver that maps variable names to object-checkpoint keys. + object_graph = graph_view.ObjectGraphView(model) + var_list = object_graph.frozen_saveable_objects() + saver = saver_lib.Saver(var_list=var_list, sharded=True) + saver._object_restore_saver = trackable_util.frozen_saver(model) + scaffold = monitored_session.Scaffold(saver=saver) + + return NPUEstimatorSpec( + mode=mode, + predictions=predictions, + loss=loss, + train_op=train_op, + eval_metric_ops=eval_metric_ops, + export_outputs={ + _DEFAULT_SERVING_KEY: + export_lib.PredictOutput(predictions) + }, + scaffold=scaffold + ) + + return model_fn + + +def _save_first_checkpoint(keras_model, custom_objects, config, + save_object_ckpt): + """Save first checkpoint for the keras Estimator. + + Args: + keras_model: an instance of compiled keras model. + custom_objects: Dictionary for custom objects. + config: Estimator config. + save_object_ckpt: Whether to save an object-based checkpoint. + + Returns: + The path where keras model checkpoint is saved. + """ + # save checkpoint into subdirectory to allow warm start + keras_model_dir = os.path.join(config.model_dir, 'keras') + # Load weights and save to checkpoint if there is no checkpoint + latest_path = checkpoint_management.latest_checkpoint(keras_model_dir) + if not latest_path: + keras_weights = None + if _any_weight_initialized(keras_model): + keras_weights = keras_model.get_weights() + if not gfile.IsDirectory(keras_model_dir): + gfile.MakeDirs(keras_model_dir) + with ops.Graph().as_default(): + random_seed.set_random_seed(config.tf_random_seed) + training_util.create_global_step() + model = _clone_and_build_model(ModeKeys.TRAIN, keras_model, + custom_objects) + + # Init the train_function outside of the context of session. This is due + # to the fact that train function will update the graph by adding backprop + # parts. This will potentially trying to update the node in forward graph + # which will fail if it is done within same session. + # Always create the train_function here since the model is just cloned. + # See https://github.com/tensorflow/tensorflow/issues/27750 for details. + model._make_train_function() # pylint: disable=protected-access + + # save to checkpoint + with session.Session(config=config.session_config) as sess: + if keras_weights: + model.set_weights(keras_weights) + # model._make_train_function() will potentially create the optimizer + # variable, which will require another variable initialization. + K._initialize_variables(sess) # pylint: disable=protected-access + + if save_object_ckpt: + model._track_trackable( # pylint: disable=protected-access + training_util.get_global_step(), 'estimator_global_step') + latest_path = os.path.join(keras_model_dir, 'keras_model.ckpt') + model.save_weights(latest_path) + else: + saver = saver_lib.Saver() + latest_path = os.path.join(keras_model_dir, 'keras_model.ckpt') + saver.save(sess, latest_path) + + return latest_path + +def _refresh_model_dir_and_session_config(config, model_dir): + """Overwrite estimator config by `model_dir` and `session_config` if needed. + + Args: + config: Original estimator config. + model_dir: Estimator model checkpoint directory. + + Returns: + Overwritten estimator config. + + Raises: + ValueError: Model directory inconsistent between `model_dir` and `config`. + """ + + if config is None or not isinstance(config, NPURunConfig): + raise ValueError( + 'config must be an instance of `NPURunConfig`, but provided %s.' % config) + + if config.session_config is None: + session_config = run_config.get_default_session_config() + config = NPURunConfig.replace(config, session_config=session_config) + + model_dir = compat_internal.path_to_str(model_dir) + if model_dir is not None: + if (getattr(config, 'model_dir', None) is not None and + config.model_dir != model_dir): + raise ValueError( + "`model_dir` are set both in constructor and `NPURunConfig`, but with " + "different values. In constructor: '{}', in `NPURunConfig`: " + "'{}' ".format(model_dir, config.model_dir)) + if model_dir: + config = NPURunConfig.replace(config, model_dir=model_dir) + elif getattr(config, 'model_dir', None) is None: + model_dir = tempfile.mkdtemp() + logging.warning('Using temporary folder as model directory: %s', model_dir) + config = NPURunConfig.replace(config, model_dir=model_dir) + + return config + +# LINT.IfChange +# TODO(b/139699640): let model_to_estimator only rely on public Keras APIs. +def model_to_npu_estimator(keras_model=None, + keras_model_path=None, + custom_objects=None, + model_dir=None, + checkpoint_format='saver', + config=None, + job_start_file=''): + """Constructs an `NPUEstimator` instance from given keras model. + Example below: + ``` + keras_model = tf.keras.Model(...) + keras_model.compile(...) + + estimator = tf.keras.estimator.model_to_npu_estimator(keras_model) + + def input_fn(): + return dataset_ops.Dataset.from_tensors( + ({'features': features, 'sample_weights': sample_weights}, + targets)) + + estimator.train(input_fn, steps=1) + ``` + + Args: + keras_model: A compiled Keras model object. This argument is mutually + exclusive with `keras_model_path`. + keras_model_path: Path to a compiled Keras model saved on disk, in HDF5 + format, which can be generated with the `save()` method of a Keras model. + This argument is mutually exclusive with `keras_model`. + custom_objects: Dictionary for custom objects. + model_dir: Directory to save `NPUEstimator` model parameters, graph, summary + files for TensorBoard, etc. + checkpoint_format: Sets the format of the checkpoint saved by the NPUEstimator + when training. May be `saver` or `checkpoint`, depending on whether to + save checkpoints from `tf.train.Saver` or `tf.train.Checkpoint`. This + argument currently defaults to `saver`. When 2.0 is released, the default + will be `checkpoint`. Estimators use name-based `tf.train.Saver` + checkpoints, while Keras models use object-based checkpoints from + `tf.train.Checkpoint`. Currently, saving object-based checkpoints from + `model_to_npu_estimator` is only supported by Functional and Sequential + models. + config: `NPURunConfig` to config `NPUEstimator`. + job_start_file: The path of the job start file. Cannot be `None`. + + Returns: + An NPUEstimator from given keras model. + + Raises: + ValueError: if neither keras_model nor keras_model_path was given. + ValueError: if both keras_model and keras_model_path was given. + ValueError: if the keras_model_path is a GCS URI. + ValueError: if keras_model has not been compiled. + ValueError: if an invalid checkpoint_format was given. + """ + if not (keras_model or keras_model_path): + raise ValueError( + 'Either `keras_model` or `keras_model_path` needs to be provided.') + if keras_model and keras_model_path: + raise ValueError( + 'Please specity either `keras_model` or `keras_model_path`, ' + 'but not both.') + + config = _refresh_model_dir_and_session_config(config, model_dir) + + if not keras_model: + logging.info('Loading models from %s', keras_model_path) + keras_model = models.load_model(keras_model_path) + else: + logging.info('Using the Keras model provided.') + keras_model = keras_model + + if checkpoint_format is None or checkpoint_format == 'checkpoint': + if not (keras_model._is_graph_network or + isinstance(keras_model, models.Sequential)): + raise ValueError('Object-based checkpoints are currently not supported ' + 'with subclassed models.') + save_object_ckpt = True + elif checkpoint_format == 'saver': + save_object_ckpt = False + else: + raise ValueError( + 'Checkpoint format must be one of "checkpoint" or "saver". Got {}' + .format(checkpoint_format)) + + if not hasattr(keras_model, 'optimizer') or not keras_model.optimizer: + raise ValueError( + 'The given keras model has not been compiled yet. ' + 'Please compile the model with `model.compile()` ' + 'before calling `model_to_npu_estimator()`.') + + keras_model_fn = _create_keras_model_fn(keras_model, custom_objects, + save_object_ckpt) + if _any_weight_initialized(keras_model): + # Warn if config passed to estimator tries to update GPUOptions. If a + # session has already been created, the GPUOptions passed to the first + # session sticks. + if config.session_config.HasField('gpu_options'): + logging.warning( + 'The Keras backend session has already been set. ' + 'The _session_config passed to model_to_npu_estimator will not be used.') + else: + # Pass the config into keras backend's default session. + sess = session.Session(config=config.session_config) + K.set_session(sess) + + warm_start_path = None + if keras_model._is_graph_network: + warm_start_path = _save_first_checkpoint(keras_model, custom_objects, + config, save_object_ckpt) + elif keras_model.built: + logging.warning('You are creating an NPUEstimator from a Keras model manually ' + 'subclassed from `Model`, that was already called on some ' + 'inputs (and thus already had weights). We are currently ' + 'unable to preserve the model\'s state (its weights) as ' + 'part of the NPUEstimator in this case. Be warned that the ' + 'NPUEstimator has been created using a freshly initialized ' + 'version of your model.\n' + 'Note that this doesn\'t affect the state of the model ' + 'instance you passed as `keras_model` argument.') + + estimator = NPUEstimator(keras_model_fn, + config=config, + job_start_file=job_start_file, + warm_start_from=warm_start_path) + + return estimator diff --git a/tf_adapter/python/npu_bridge/estimator/npu/mnist_softmax_npu.py b/tf_adapter/python/npu_bridge/estimator/npu/mnist_softmax_npu.py new file mode 100644 index 000000000..e30f42f81 --- /dev/null +++ b/tf_adapter/python/npu_bridge/estimator/npu/mnist_softmax_npu.py @@ -0,0 +1,96 @@ + +"""Simple MNIST classifier example with npu and timelines. + + Note: Please see further comments in the document. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys + +import tensorflow as tf + +from tensorflow.examples.tutorials.mnist import input_data +from tensorflow.python.client import timeline +#import npu ops +from npu_bridge.estimator import npu_ops + +FLAGS = None + + +def main(_): + # Import data + mnist = input_data.read_data_sets(FLAGS.data_dir) + + # Create the model + x = tf.placeholder(tf.float32, [None, 784]) + w = tf.Variable(tf.zeros([784, 10])) + b = tf.Variable(tf.zeros([10])) + y = tf.matmul(x, w) + b + + # Define loss and optimizer + y_ = tf.placeholder(tf.int64, [None]) + + # The raw formulation of cross-entropy, + # + # tf.reduce_mean(-tf.reduce_sum(y_ * tf.math.log(tf.nn.softmax(y)), + # reduction_indices=[1])) + # + # can be numerically unstable. + # + # So here we use tf.compat.v1.losses.sparse_softmax_cross_entropy on the raw + # logit outputs of 'y', and then average across the batch. + cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y) + train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) + + config = tf.ConfigProto() + #add npu config, enable offline train + custom_op = config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = "NpuOptimizer" + #enable offline train + custom_op.parameter_map["use_off_line"].b = True + + run_metadata = tf.RunMetadata() + sess = tf.compat.v1.Session(config=config) + tf.global_variables_initializer().run(session=sess) + # Train + train_loops = 1000 + for i in range(train_loops): + batch_xs, batch_ys = mnist.train.next_batch(100) + + # Create a timeline for the last loop and export to json to view with + # chrome://tracing/. + if i == train_loops - 1: + sess.run(train_step, + feed_dict={x: batch_xs, + y_: batch_ys}, + options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), + run_metadata=run_metadata) + trace = timeline.Timeline(step_stats=run_metadata.step_stats) + with open('/tmp/timeline.ctf.json', 'w') as trace_file: + trace_file.write(trace.generate_chrome_trace_format()) + else: + sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) + + # Test trained model + correct_prediction = tf.equal(tf.argmax(y, 1), y_) + accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) + print(sess.run(accuracy, + feed_dict={x: mnist.test.images, + y_: mnist.test.labels})) + sess.close() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--data_dir', + type=str, + default='/tmp/tensorflow/mnist/input_data', + help='Directory for storing input data') + parser.add_argument( + '--npu', type=bool, default=True, help='Turn npu on') + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tf_adapter/python/npu_bridge/estimator/npu/mnist_with_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/mnist_with_estimator.py new file mode 100644 index 000000000..fd13a7163 --- /dev/null +++ b/tf_adapter/python/npu_bridge/estimator/npu/mnist_with_estimator.py @@ -0,0 +1,209 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Imports +import logging +import numpy as np +import tensorflow as tf +from tensorflow.examples.tutorials.mnist import input_data +from npu_bridge.estimator.npu.npu_config import NPURunConfig +from npu_bridge.estimator.npu.npu_config import ProfilingConfig +from npu_bridge.estimator.npu.npu_estimator import NPUEstimator +from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer + +""" +https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/layers/cnn_mnist.py +python3 mnist_with_estimator.py +""" + +# Model specific parameters +tf.flags.DEFINE_string("data_dir", "/tmp/mnist/MNIST-data", + "Path to directory containing the MNIST dataset") +tf.flags.DEFINE_string("model_dir", "/tmp/mnist/mnist_convnet_model", "Estimator model_dir") + +tf.flags.DEFINE_integer("batch_size", 100, + "Mini-batch size for the training. Note that this " + "is the global batch size and not the per-shard batch.") +tf.flags.DEFINE_integer("train_steps", 1, "Total number of training steps.") +tf.flags.DEFINE_integer("eval_steps", 1, + "Total number of evaluation steps. If `0`, evaluation " + "after training is skipped.") +tf.flags.DEFINE_float("learning_rate", 0.001, "Learning rate.") + +tf.flags.DEFINE_string("local_log_dir", "/tmp/mnist/train_logs.txt", "Log file path") +tf.flags.DEFINE_integer("iterations", 50, + "Number of iterations per TPU training loop.") + +tf.flags.DEFINE_string("job_start_file", "/tmp/config/deviceid_devindex_jobstart", + "CSA job start file path.") + +FLAGS = tf.flags.FLAGS + +def create_model(features, mode): + input_layer = tf.reshape(features["x"], [-1, 28, 28, 1]) + # Convolutional Layer #1 + # Computes 32 features using a 5x5 filter with ReLU activation. + # Padding is added to preserve width and height. + # Input Tensor Shape: [batch_size, 28, 28, 1] + # Output Tensor Shape: [batch_size, 28, 28, 32] + conv1 = tf.layers.conv2d( + inputs=input_layer, + filters=32, + kernel_size=[5, 5], + padding="same", + activation=tf.nn.relu) + # Pooling Layer #1 + # First max pooling layer with a 2x2 filter and stride of 2 + # Input Tensor Shape: [batch_size, 28, 28, 32] + # Output Tensor Shape: [batch_size, 14, 14, 32] + pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2) + # Convolutional Layer #2 + # Computes 64 features using a 5x5 filter. + # Padding is added to preserve width and height. + # Input Tensor Shape: [batch_size, 14, 14, 32] + # Output Tensor Shape: [batch_size, 14, 14, 64] + conv2 = tf.layers.conv2d( + inputs=pool1, + filters=64, + kernel_size=[5, 5], + padding="same", + activation=tf.nn.relu) + + # Pooling Layer #2 + # Second max pooling layer with a 2x2 filter and stride of 2 + # Input Tensor Shape: [batch_size, 14, 14, 64] + # Output Tensor Shape: [batch_size, 7, 7, 64] + pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2) + + # Flatten tensor into a batch of vectors + # Input Tensor Shape: [batch_size, 7, 7, 64] + # Output Tensor Shape: [batch_size, 7 * 7 * 64] + pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64]) + + # Dense Layer + # Densely connected layer with 1024 neurons + # Input Tensor Shape: [batch_size, 7 * 7 * 64] + # Output Tensor Shape: [batch_size, 1024] + dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu) + + # Add dropout operation; 0.6 probability that element will be kept + dropout = tf.layers.dropout( + inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN) + + # Logits layer + # Input Tensor Shape: [batch_size, 1024] + # Output Tensor Shape: [batch_size, 10] + return tf.layers.dense(inputs=dropout, units=10) + + +def cnn_model_fn(features, labels, mode, params): + """Model function for CNN.""" + # Input Layer + # Reshape X to 4-D tensor: [batch_size, width, height, channels] + # MNIST images are 28x28 pixels, and have one color channel + logits = create_model(features, mode) # PREDICT + predictions = { + # Generate predictions (for PREDICT and EVAL mode) + "classes": tf.argmax(input=logits, axis=1), + # Add `softmax_tensor` to the graph. It is used for PREDICT and by the + # `logging_hook`. + "probabilities": tf.nn.softmax(logits, name="softmax_tensor") + } + if mode == tf.estimator.ModeKeys.PREDICT: + return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) + + # Calculate Loss (for both TRAIN and EVAL modes) + loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) + tf.summary.scalar('loss', loss) + # Configure the Training Op (for TRAIN mode) + if mode == tf.estimator.ModeKeys.TRAIN: + optimizer = tf.train.GradientDescentOptimizer(learning_rate=FLAGS.learning_rate) + distributedOptimizer = NPUDistributedOptimizer(optimizer) + train_op = distributedOptimizer.minimize(loss, global_step=tf.train.get_global_step()) + return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) + # Add evaluation metrics (for EVAL mode) + eval_metric_ops = { + "accuracy": tf.metrics.accuracy( + labels=labels, predictions=predictions["classes"])} + return tf.estimator.EstimatorSpec( + mode=mode, loss=loss, eval_metric_ops=eval_metric_ops) + + +def train_input_fn(train_data, train_labels): + return tf.estimator.inputs.numpy_input_fn( + x={"x": train_data}, + y=train_labels, + batch_size=FLAGS.batch_size, + num_epochs=None, + shuffle=True) + +def eval_input_fn(eval_data, eval_labels): + return tf.estimator.inputs.numpy_input_fn( + x={"x": eval_data}, + y=eval_labels, + num_epochs=1, + shuffle=False) + +def set_log(): + # get TF logger + log = logging.getLogger('tensorflow') + log.setLevel(logging.INFO) + + # create formatter and add it to the handlers + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + + # create file handler which logs even debug messages + fh = logging.FileHandler(FLAGS.local_log_dir) + fh.setLevel(logging.INFO) + fh.setFormatter(formatter) + log.addHandler(fh) + + +def main(unused_argv): + # set the log + set_log() + + # Load training and eval data + mnist = input_data.read_data_sets(FLAGS.data_dir, False) + train_data = mnist.train.images # Returns np.array + train_labels = np.asarray(mnist.train.labels, dtype=np.int32) + eval_data = mnist.test.images # Returns np.array + eval_labels = np.asarray(mnist.test.labels, dtype=np.int32) + + # profiling_config = ProfilingConfig(enable_profiling=True, + # enable_options=["training_trace","task_trace"]) + + npu_config = NPURunConfig( + iterations_per_loop = 100, + save_checkpoints_steps=10, + model_dir = FLAGS.model_dir, + session_config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) + #,profiling_config=profiling_config + ) + + mnist_classifier = NPUEstimator( + model_fn=cnn_model_fn, + config=npu_config, + params = {}, + job_start_file=FLAGS.job_start_file + ) + + # Set up logging for predictions + # Log the values in the "Softmax" tensor with label "probabilities" + tensors_to_log = {"probabilities": "softmax_tensor"} + logging_hook = tf.train.LoggingTensorHook( + tensors=tensors_to_log, every_n_iter=50) # Train the model + + # print("Train the model...") + mnist_classifier.train( + input_fn=train_input_fn(train_data,train_labels), + steps=FLAGS.train_steps, hooks=[logging_hook]) + + # Evaluate the model and print results + print("Evaluate the model...") + eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn(eval_data,eval_labels), steps=FLAGS.eval_steps) + print("eval_results: ", eval_results) + +if __name__ == "__main__": + tf.app.run() diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_common.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_common.py new file mode 100644 index 000000000..34ca9845a --- /dev/null +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_common.py @@ -0,0 +1,221 @@ +"""Inter-process communication using HCOM.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import json +from ctypes import cdll +from ctypes import c_uint +from ctypes import byref + +from tensorflow.python.platform import tf_logging as logging +from npu_bridge.estimator.npu import util as util_lib + + +class JobInfo: + """Job information send by CSA.""" + + def __init__(self, + job_id=0, + job_config=None, + heartbeat_time=-1, + region_id=None, + ak=None, + sk=None, + endpoint_url=None, + device_info=None, + rank_table_file=None, + restart_flag=0, + local_app_dir=None, + local_data_dir=None, + local_checkpoint_dir=None, + local_log_dir=None, + local_result_dir=None, + local_boot_file=None, + rank_size=1 + ): + """ + Constructs a JobInfo. + Args: + job_id: the unique identifier. + heartbeat_time: the frequency that framework records the heartbeat. + job_config: the configuration of the training task. It's a json string. + region_id: the region id to access the OBS. + ak: the ak to access the OBS. + sk: the sk to access the OBS. + endpoint_url: the host name to access the OBS. + device_info: the device information of the training task. It's a json string. + rank_table_file: the communication routing information. + restart_flag: the abnormal re-issued ID (0: Normally issued; 1: Abnormally re-issued). + local_app_dir: the local path of the user script downloaded from OBS, for example: userfile/code/ + local_data_dir: the local path of the user data downloaded from OBS, for example: userfile/data/ + local_checkpoint_dir: the local path of the checkpoint file downloaded from OBS, for example: checkpoint/ + local_log_dir: the user-created log path, for example: userfile/log/ + local_result_dir: the user-created output file path, for example: userfile/result/ + local_boot_file: the local path of the user startup script, for example: userfile/code/boot.py + rank_size: Rank size. + """ + self._job_id = job_id + self._job_config = job_config + self._heartbeat_time = heartbeat_time + self._region_id = region_id + self._ak = ak + self._sk = sk + self._endpoint_url = endpoint_url + self._device_info = device_info + self._rank_table_file = rank_table_file + self._restart_flag = restart_flag + self._local_app_dir = local_app_dir + self._local_data_dir = local_data_dir + self._local_checkpoint_dir = local_checkpoint_dir + self._local_log_dir = local_log_dir + self._local_result_dir = local_result_dir + self._local_boot_file = local_boot_file + self._rank_size = rank_size + + +class JobConfig(): + """Job configuration.""" + + def __init__(self, learning_rate=None, batch_size=None): + """ + Constructs a JobConfig. + Args: + learning_rate: A Tensor or a floating point value. The learning rate to use. + batch_size: Integer, size of batches to return. + """ + self._learning_rate = learning_rate + self._batch_size = batch_size + + +class DeviceInfo(): + """Device information.""" + + def __init__(self, + index="0", + server_id="123456", + dev_index=1): + """ + Constructs a DeviceInfo. + Args: + index: the unique identifier. + server_id: the server resource unique identifier, obtained from resource management. + dev_index: the device serial number in AI server. + """ + self._index = index + self._server_id = server_id + self._dev_index = dev_index + self._root_rank = 0 + + def is_master_node(self): + """Determines whether the current node is the primary node.""" + return self._index == self._root_rank + + +class NPUBasics(object): + """Wrapper class for the basic NPU API.""" + __instance = None + __has_init = False + def __new__(cls, file_name): + if not cls.__instance: + cls.__instance = object.__new__(cls) + return cls.__instance + + def __init__(self, file_name): + if not self.__has_init: + self._job_info = self._read_job_info(file_name) + self.__has_init = True + + @property + def jobinfo(self): + return self._job_info + + def size(self): + """A function that returns the number of Tensorflow processes. + Returns: + An integer scalar containing the number of Tensorflow processes. + """ + return self._job_info._rank_size + + def _read_job_info(self, file_name): + """Read the job information. + Args: + file_name: it's a json file which contains the job info from CSA. + Returns: + The job information. + """ + try: + with open(file_name, 'r', encoding='UTF-8') as f: + content = f.read() + data = json.loads(content, encoding='UTF-8') + + # 1. Get the device_info and check it. + device_info = data.get('device_info') + util_lib.check_not_none(device_info, 'device_info') + + index = device_info.get('Index', None) + util_lib.check_nonnegative_integer(index, 'Index') + + dev_index = device_info.get('dev_index', None) + util_lib.check_nonnegative_integer(dev_index, 'dev_index') + + server_id = device_info.get('server_id', None) + util_lib.check_not_none(device_info, 'server_id') + + # 2. Get the rank_table_file and check it. + rank_table_file = data.get('rank_table_file', None) + util_lib.check_not_none(rank_table_file, 'rank_table_file') + + # 3. Get the rank_size and check it. + rank_size = data.get('rank_size', None) + util_lib.check_positive_integer(rank_size, 'rank_size') + + # 4. Get the local_checkpoint_dir and check it. + local_checkpoint_dir = data.get('local_checkpoint_dir', None) + + # 5. Init the JobInfo. + device_info = DeviceInfo(index=str(index), server_id=server_id, dev_index=dev_index) + job_info = JobInfo(device_info=device_info, rank_table_file=rank_table_file, + local_checkpoint_dir=local_checkpoint_dir, rank_size=rank_size) + return job_info + except IOError: + logging.warning('Warning:job config file does not exist') + + job_id = os.getenv('JOB_ID', "") + if(job_id == ""): + logging.error('Error:can not get job config from env') + return None + + heartbeat = os.getenv('HEARTBEAT', "") + + rank_table_file = os.getenv('RANK_TABLE_FILE', "") + + identity = os.getenv('POD_NAME', "") + if(identity == ""): + identity = os.getenv('RANK_ID', "") + + dev_index = os.getenv('DEVICE_ID') + if dev_index != None and dev_index.isdigit() and int(dev_index) <=7 and int(dev_index) >= 0: + dev_index = int(dev_index) + else: + raise RuntimeError("DEVICE_ID environment variable should in [0, 7]") + + checkpoint_dir = os.getenv('LOCAL_CHECKPOINT_DIR', "") + + # cann't get rank_size from env, set to default 1 + rank_size = os.getenv('RANK_SIZE', 1) + if(rank_size.isdigit() is False): + print("set rank_size to default 1") + rank_size = 1 + + device_info = DeviceInfo(index=str(identity), server_id="192.168.1.1", dev_index=int(dev_index)) + job_info = JobInfo(job_id=job_id, + heartbeat_time=heartbeat, + device_info=device_info, + rank_table_file=rank_table_file, + local_checkpoint_dir=checkpoint_dir, + rank_size=int(rank_size) + ) + return job_info diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py new file mode 100644 index 000000000..33dfa486c --- /dev/null +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -0,0 +1,209 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from tensorflow.python.util import compat_internal +from tensorflow.python.platform import gfile + +import os +import re +import json +from npu_bridge.estimator.npu import util +from tensorflow.python.estimator import run_config as run_config_lib +from tensorflow.distribute.experimental import ParameterServerStrategy +from tensorflow.contrib.distribute import DistributeConfig +from tensorflow.python.training import server_lib + + + +class NPURunConfig(run_config_lib.RunConfig): + """RunConfig with NPU support.""" + + def __init__(self, + iterations_per_loop=1, + profiling_config=None, + model_dir=None, + tf_random_seed=None, + save_summary_steps=0, + save_checkpoints_steps=None, + save_checkpoints_secs=None, + session_config=None, + keep_checkpoint_max=5, + keep_checkpoint_every_n_hours=10000, + log_step_count_steps=100, + distribute=None, + enable_data_pre_proc=True, + precision_mode=None, + enable_reduce_precision=False, + variable_format_optimize=True, + mix_compile_mode=False, + hcom_parallel=False, + graph_memory_max_size=None, + variable_memory_max_size=None, + auto_tune_mode=None, + dump_config=None, + stream_max_parallel_num=None, + is_tailing_optimization=False, + horovod_mode = False, + graph_run_mode = 1, + op_debug_level = 0, + enable_scope_fusion_passes = None + ): + """ + Constructs a NPUConfig. + + Args: + iterations_per_loop: This is the number of train steps running in NPU + system before returning to CPU host for each `Session.run`. This means + global step is increased `iterations_per_loop` times in one `Session.run`. + It is recommended to be set as number of global steps for next checkpoint. + profiling_config: The profiling configuration. + model_dir: Directory where model parameters, graph, etc are saved. If + `PathLike` object, the path will be resolved. If `None`, will use a + default value set by the Estimator. + tf_random_seed: Random seed for TensorFlow initializers. + Setting this value allows consistency between reruns. + save_summary_steps: Save summaries every this many steps. + save_checkpoints_steps: Save checkpoints every this many steps. Can not be + specified with `save_checkpoints_secs`. + save_checkpoints_secs: Save checkpoints every this many seconds. Can not + be specified with `save_checkpoints_steps`. Defaults to 600 seconds if + both `save_checkpoints_steps` and `save_checkpoints_secs` are not set + in constructor. If both `save_checkpoints_steps` and + `save_checkpoints_secs` are None, then checkpoints are disabled. + session_config: A ConfigProto used to set session parameters, or None. + keep_checkpoint_max: The maximum number of recent checkpoint files to + keep. As new files are created, older files are deleted. If None or 0, + all checkpoint files are kept. Defaults to 5 (that is, the 5 most recent + checkpoint files are kept.) + keep_checkpoint_every_n_hours: Number of hours between each checkpoint + to be saved. The default value of 10,000 hours effectively disables + the feature. + log_step_count_steps: The frequency, in number of global steps, that the + global step/sec and the loss will be logged during training. + enabel_data_pre_proc: This is the switch of data preprocess. + precision_mode: enable or disable mix precision. + variable_format_optimize: enable or disable variable format optimize while graph + engineer optimize process. + mix_compile_mode: This is the swith of mix_compile_mode. When the value is + False, all graphs run on device. Otherwise, some graphs run on host. + hcom_parallel: This is the switch of hcom parallel. When the value is True, + hcom will execute with parallel mode. Otherwise, hcom will execute with + serialize mode. + graph_memory_max_size: The max size of ge graph memory size. + variable_memory_max_size: The max size of ge variable memory size. + auto_tune_mode: None, or `GA` ,or `RL` or `GA|RL` + dump_config: The dump configuration. + stream_max_parallel_num: Specify the degree of parallelism of the AICPU / AICORE engine + to achieve parallel execution between AICPU / AICORE operators. + """ + + # Check iterations_per_loop. + util.check_positive_integer(iterations_per_loop, "iterations_per_loop") + if isinstance(mix_compile_mode, bool) == False: + raise ValueError('"mix_compile_mode" type must be bool') + if mix_compile_mode is True and iterations_per_loop != 1: + raise ValueError( + '"iterations_per_loop" must be 1 with "mix_compile_mode" is True') + tf_config = json.loads(os.environ.get(run_config_lib._TF_CONFIG_ENV, '{}')) + tmp_cluster_spec = server_lib.ClusterSpec(tf_config.get(run_config_lib._CLUSTER_KEY, {})) + if ((tmp_cluster_spec and not isinstance(distribute, ParameterServerStrategy)) or + (not tmp_cluster_spec and isinstance(distribute, ParameterServerStrategy))): + raise ValueError('"cluster" and "distribute" must all be set in ps mode') + if tmp_cluster_spec and mix_compile_mode is False: + raise ValueError( + '"mix_compile_mode" can only be True with "cluster" is set') + + self.iterations_per_loop = iterations_per_loop + self.mix_compile_mode = mix_compile_mode + self.enable_data_pre_proc = enable_data_pre_proc + self.is_tailing_optimization = is_tailing_optimization + if save_checkpoints_secs==None and save_checkpoints_steps==None : + save_checkpoints_steps=100 + + self._profiling_config = profiling_config + + # mix precision configuration + self._precision_mode = precision_mode + self._enable_reduce_precision = enable_reduce_precision + self._variable_format_optimize = variable_format_optimize + self._hcom_parallel = hcom_parallel + self._graph_memory_max_size = graph_memory_max_size + self._variable_memory_max_size = variable_memory_max_size + + self._auto_tune_mode = auto_tune_mode + + if dump_config is not None and not isinstance(dump_config, DumpConfig): + raise ValueError( + '`dump_config` must be provided with type `DumpConfig`') + self._dump_config = dump_config + self._stream_max_parallel_num = stream_max_parallel_num + + if isinstance(horovod_mode, bool) == False: + raise ValueError('"horovod_mode" type must be bool') + self.horovod_mode = horovod_mode + util.check_nonnegative_integer(graph_run_mode, "graph_run_mode") + if graph_run_mode > 1: + raise ValueError('"graph_run_mode" value must be 0 or 1') + self.graph_run_mode = graph_run_mode + self.op_debug_level = op_debug_level + self.enable_scope_fusion_passes = enable_scope_fusion_passes + experimental_distribute = None + if tmp_cluster_spec and isinstance(distribute, ParameterServerStrategy): + experimental_distribute = DistributeConfig(distribute, distribute, None) + + super(NPURunConfig, self).__init__( + model_dir=model_dir, + tf_random_seed=tf_random_seed, + save_summary_steps=save_summary_steps, + save_checkpoints_steps=save_checkpoints_steps, + save_checkpoints_secs=save_checkpoints_secs, + session_config=session_config, + keep_checkpoint_max=keep_checkpoint_max, + keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, + log_step_count_steps=log_step_count_steps, + experimental_distribute=experimental_distribute) + +class ProfilingConfig(): + """Profiling config with NPU support.""" + + def __init__(self, + enable_profiling=False, + enable_options=[]): + """ + Constructs a ProfilingConfig. + Args: + enable_profiling: Enable profiling, default is False. + enable_options: Profiling options, list of `training_trace` or `task_trace` + or `op_trace`. + """ + + self._enable_profiling = enable_profiling + self._enable_options = enable_options + + +class DumpConfig(): + """Dump Config with NPU support.""" + def __init__(self, + enable_dump=False, + dump_path=None, + dump_step=None, + dump_mode="output", + enable_dump_debug=False, + dump_debug_mode="all"): + """ + Constructs a DumpConfig. + + Args: + enable_dump: Enable dump, default is False. + dump_path: The dump path. + dump_step: Specify step dump data. eg."0|5|10". + dump_mode: Specify dump Op input or output or both. + enable_dump_debug: Enable dump debug, default is False. + dump_debug_mode: Debug dump mode, only support three kinds of mode(aicore_overflow, atomic_overflow or all). + """ + self._enable_dump = enable_dump + self._dump_path = dump_path + self._dump_step = dump_step + self._dump_mode = dump_mode + self._enable_dump_debug = enable_dump_debug + self._dump_debug_mode = dump_debug_mode diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py new file mode 100644 index 000000000..e35d0b638 --- /dev/null +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -0,0 +1,741 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tensorflow as tf +from tensorflow.core.protobuf import config_pb2 +from tensorflow.core.protobuf import rewriter_config_pb2 +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.framework import ops +from tensorflow.python.framework import constant_op +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import array_ops +from tensorflow.python.estimator import estimator as estimator_lib +from tensorflow.python.estimator import util as estimator_util +from tensorflow.python.estimator import model_fn as model_fn_lib +from tensorflow.python.training import session_run_hook +from tensorflow.python.util import function_utils +from tensorflow.python.util import tf_inspect + +from npu_bridge.estimator.npu.npu_config import NPURunConfig +from npu_bridge.estimator.npu.npu_hook import * +from npu_bridge.estimator.npu.npu_common import NPUBasics +from npu_bridge.estimator import npu_ops + +import six +from six.moves import queue as Queue +from six.moves import xrange +import collections +import copy +import os +import signal +import sys +import threading +import json +import random +import string + +def no_check_override(): + class _Manager: + def __init__(self): + pass + def __enter__(self): + self.__orign = estimator_lib.Estimator._assert_members_are_not_overridden + estimator_lib.Estimator._assert_members_are_not_overridden = lambda x : None + def __exit__(self, exc_type, exc_val, exc_tb): + estimator_lib.Estimator._assert_members_are_not_overridden = self.__orign + return _Manager() + + +def _wrap_computation_in_while_loop(iterations_per_loop_var, op_fn): + + def computation(i): + with ops.control_dependencies([op_fn]): + return i + 1 + + iterations = array_ops.identity(iterations_per_loop_var) + return control_flow_ops.while_loop( + lambda i: i < iterations, + computation, [constant_op.constant(0)], + parallel_iterations=1) + +class _OutfeedHostCall(object): + def __init__(self, channel_name): + self._channel_name = str(channel_name) + self._names = [] + # All of these are dictionaries of lists keyed on the name. + self._host_fns = {} + self._tensor_keys = collections.defaultdict(list) + self._tensors = collections.defaultdict(list) + self._tensor_dtypes = collections.defaultdict(list) + self._tensor_shapes = collections.defaultdict(list) + + @staticmethod + def validate(host_calls): + """Validates the `eval_metrics` and `host_call` in `NPUEstimatorSpec`.""" + + for name, host_call in host_calls.items(): + if not isinstance(host_call, (tuple, list)): + raise ValueError('{} should be tuple or list'.format(name)) + if len(host_call) != 2: + raise ValueError('{} should have two elements.'.format(name)) + if not callable(host_call[0]): + raise TypeError('{}[0] should be callable.'.format(name)) + if not isinstance(host_call[1], (tuple, list, dict)): + raise ValueError('{}[1] should be tuple or list, or dict.'.format(name)) + + if isinstance(host_call[1], (tuple, list)): + fullargspec = tf_inspect.getfullargspec(host_call[0]) + fn_args = function_utils.fn_args(host_call[0]) + # wrapped_hostcall_with_global_step uses varargs, so we allow that. + if fullargspec.varargs is None and len(host_call[1]) != len(fn_args): + raise RuntimeError( + 'In NPUEstimatorSpec.{}, length of tensors {} does not match ' + 'method args of the function, which takes {}.'.format( + name, len(host_call[1]), len(fn_args))) + + def create_npu_hostcall(self): + """Sends the tensors through outfeed and runs the host_fn on CPU. + + The tensors are concatenated along dimension 0 to form a global tensor + across all shards. The concatenated function is passed to the host_fn and + executed on the first host. + + Returns: + A dictionary mapping name to the return type of the host_call by that + name. + + Raises: + RuntimeError: If outfeed tensor is scalar. + """ + if not self._names: + return {} + + ret = {} + # For each i, dequeue_ops[i] is a list containing the tensors from all + # shards. This list is concatenated later. + dequeue_ops = [] + tensor_dtypes = [] + tensor_shapes = [] + for name in self._names: + for _ in self._tensors[name]: + dequeue_ops.append([]) + for dtype in self._tensor_dtypes[name]: + tensor_dtypes.append(dtype) + for shape in self._tensor_shapes[name]: + tensor_shapes.append(shape) + + outfeed_tensors = npu_ops.outfeed_dequeue_op( + channel_name=self._channel_name, + output_types=tensor_dtypes, + output_shapes=tensor_shapes) + + # Deconstruct dequeue ops. + outfeed_tensors_by_name = {} + pos = 0 + for name in self._names: + outfeed_tensors_by_name[name] = outfeed_tensors[pos:pos + len(self._tensors[name])] + pos += len(self._tensors[name]) + + for name in self._names: + host_fn_tensors = outfeed_tensors_by_name[name] + if self._tensor_keys[name] is not None: + host_fn_tensors = dict(zip(self._tensor_keys[name], host_fn_tensors)) + try: + ret[name] = self._host_fns[name](**host_fn_tensors) + except TypeError as e: + logging.warning( + 'Exception while calling %s: %s. It is likely the tensors ' + '(%s[1]) do not match the ' + 'function\'s arguments', name, e, name) + raise e + else: + ret[name] = self._host_fns[name](*host_fn_tensors) + + return ret + + def create_enqueue_op(self): + """Create the op to enqueue the recorded host_calls. + + Returns: + A list of enqueue ops, which is empty if there are no host calls. + """ + if not self._names: + return [] + + tensors = [] + for name in self._names: + tensors.extend(self._tensors[name]) + if len(tensors) == 0: + return [] + return npu_ops.outfeed_enqueue_op(inputs=tensors, channel_name=self._channel_name) + + + def record(self, host_calls): # ref def record(self, host_calls):@tpu_estimator.py+1618 + for name, host_call in host_calls.items(): + host_fn, tensor_list_or_dict = host_call + self._names.append(name) + self._host_fns[name] = host_fn + + if isinstance(tensor_list_or_dict, dict): + for (key, tensor) in six.iteritems(tensor_list_or_dict): + self._tensor_keys[name].append(key) + self._tensors[name].append(tensor) + self._tensor_dtypes[name].append(tensor.dtype) + self._tensor_shapes[name].append(tensor.shape) + else: + # List or tuple. + self._tensor_keys[name] = None + for tensor in tensor_list_or_dict: + self._tensors[name].append(tensor) + self._tensor_dtypes[name].append(tensor.dtype) + self._tensor_shapes[name].append(tensor.shape) + +class NPUEstimatorSpec(model_fn_lib.EstimatorSpec): + """Ops and objects returned from a `model_fn` and passed to an `NPUEstimator`. + + `NPUEstimatorSpec` fully defines the model to be run by an `Estimator`. + """ + def __new__(cls, + mode, + predictions=None, + loss=None, + train_op=None, + eval_metric_ops=None, + export_outputs=None, + training_chief_hooks=None, + training_hooks=None, + scaffold=None, + evaluation_hooks=None, + prediction_hooks=None, + host_call=None): + + """Creates a validated `EstimatorSpec` instance. + + Depending on the value of `mode`, different arguments are required. Namely + + * For `mode == ModeKeys.TRAIN`: required fields are `loss` and `train_op`. + * For `mode == ModeKeys.EVAL`: required field is `loss`. + * For `mode == ModeKeys.PREDICT`: required fields are `predictions`. + + model_fn can populate all arguments independent of mode. In this case, some + arguments will be ignored by an `Estimator`. E.g. `train_op` will be + ignored in eval and infer modes. Example: + + ```python + def my_model_fn(features, labels, mode): + predictions = ... + loss = ... + train_op = ... + return tf.estimator.EstimatorSpec( + mode=mode, + predictions=predictions, + loss=loss, + train_op=train_op) + ``` + + Alternatively, model_fn can just populate the arguments appropriate to the + given mode. Example: + + ```python + def my_model_fn(features, labels, mode): + if (mode == tf.estimator.ModeKeys.TRAIN or + mode == tf.estimator.ModeKeys.EVAL): + loss = ... + else: + loss = None + if mode == tf.estimator.ModeKeys.TRAIN: + train_op = ... + else: + train_op = None + if mode == tf.estimator.ModeKeys.PREDICT: + predictions = ... + else: + predictions = None + + return tf.estimator.EstimatorSpec( + mode=mode, + predictions=predictions, + loss=loss, + train_op=train_op) + ``` + + Args: + mode: A `ModeKeys`. Specifies if this is training, evaluation or + prediction. + predictions: Predictions `Tensor` or dict of `Tensor`. + loss: Training loss `Tensor`. Must be either scalar, or with shape `[1]`. + train_op: Op for the training step. + eval_metric_ops: Dict of metric results keyed by name. + The values of the dict can be one of the following: + (1) instance of `Metric` class. + (2) Results of calling a metric function, namely a + `(metric_tensor, update_op)` tuple. `metric_tensor` should be + evaluated without any impact on state (typically is a pure computation + results based on variables.). For example, it should not trigger the + `update_op` or requires any input fetching. + export_outputs: Describes the output signatures to be exported to + `SavedModel` and used during serving. + A dict `{name: output}` where: + * name: An arbitrary name for this output. + * output: an `ExportOutput` object such as `ClassificationOutput`, + `RegressionOutput`, or `PredictOutput`. + Single-headed models only need to specify one entry in this dictionary. + Multi-headed models should specify one entry for each head, one of + which must be named using + signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY. + If no entry is provided, a default `PredictOutput` mapping to + `predictions` will be created. + training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to + run on the chief worker during training. + training_hooks: Iterable of `tf.train.SessionRunHook` objects to run + on all workers during training. + scaffold: A `tf.train.Scaffold` object that can be used to set + initialization, saver, and more to be used in training. + evaluation_hooks: Iterable of `tf.train.SessionRunHook` objects to + run during evaluation. + prediction_hooks: Iterable of `tf.train.SessionRunHook` objects to + run during predictions. + host_call: A tuple of `func`, or a list of `tensor` or `dict`.Get 255 + summary infomation, and send to host every step. Only used if mode 256 + is `ModeKeys.TRAIN` or `ModeKeys.EVAL`. + + Returns: + A validated `EstimatorSpec` object. + + Raises: + ValueError: If validation fails. + TypeError: If any of the arguments is not the expected type. + """ + host_calls = {} + if host_call is not None: + host_calls["host_call"] = host_call + _OutfeedHostCall.validate(host_calls) + spec = super(NPUEstimatorSpec, cls).__new__( + cls, + mode=mode, + predictions=predictions, + loss=loss, + train_op=train_op, + eval_metric_ops=eval_metric_ops, + export_outputs=export_outputs, + training_chief_hooks=training_chief_hooks, + training_hooks=training_hooks, + scaffold=scaffold, + evaluation_hooks=evaluation_hooks, + prediction_hooks=prediction_hooks, + ) + spec._host_call = host_call + return spec + +class NPUEstimator(estimator_lib.Estimator): + """Estimator with NPU support. + + NPUEstimator handles many of the details of running on NPU devices, such as + replicating inputs and models for each core, and returning to host + periodically to run hooks. + """ + def __init__(self, + model_fn=None, + model_dir=None, + config=None, + params=None, + job_start_file='', + warm_start_from=None + ): + """Constructs an `NPUEstimator` instance. + + Args: + model_fn: Model function as required by `Estimator` which returns + EstimatorSpec. `training_hooks`, 'evaluation_hooks', + and `prediction_hooks` must not capure any NPU Tensor inside the model_fn. + config: An `NPURunConfig` configuration object. Cannot be `None`. + params: An optional `dict` of hyper parameters that will be passed into + `input_fn` and `model_fn`. Keys are names of parameters, values are + basic python types.. + job_start_file: The path of the job start file. Cannot be `None`. + warm_start_from: Optional string filepath to a checkpoint or SavedModel to + warm-start from, or a `tf.estimator.WarmStartSettings` + object to fully configure warm-starting. If the string + filepath is provided instead of a`tf.estimator.WarmStartSettings`, + then all variables are warm-started, and it is assumed that vocabularies + and `tf.Tensor` names are unchanged. + """ + logging.info("NPUEstimator init...") + + if config is None or not isinstance(config, NPURunConfig): + raise ValueError( + '`config` must be provided with type `NPUConfigs`') + + # Verifies the model_fn signature according to Estimator framework. + estimator_lib._verify_model_fn_args(model_fn, params) # pylint: disable=protected-access + + # Load the graph optimizers. + config = self.__load_graph_optimizers(config) + + # Init npu system: get task and device info from configuration file. + self.__load_job_info(job_start_file) + + # Wrap model_fn to adding npu sessionhooks. + model_function = self.__augment_model_fn(model_fn, model_dir, config) + + # Get the checkpoint file. + if not warm_start_from: + restore_from = self.__job_info._local_checkpoint_dir + # tf use restore_from variable, no need to check safety. + if restore_from is None or restore_from == "": + restore_from = os.getenv('RESTORE_FROM') + else: + restore_from = warm_start_from + + # Passing non-None params as wrapped model_fn use it. + params = params or {} + with no_check_override(): + super(NPUEstimator, self).__init__( + model_fn=model_function, + model_dir=model_dir, + config=config, + params=params, + warm_start_from=restore_from) + + def __augment_model_fn(self, model_fn, model_dir, config): + """Returns a new model_fn, which wraps the NPU support.""" + def _model_fn(features, labels, mode, params): + """A Estimator `model_fn` for NPUEstimator.""" + model_fn_args = function_utils.fn_args(model_fn) + kwargs = {} + if 'labels' in model_fn_args: + kwargs['labels'] = labels + if 'mode' in model_fn_args: + kwargs['mode'] = mode + if 'params' in model_fn_args: + kwargs['params'] = params + if 'config' in model_fn_args: + kwargs['config'] = config + estimator_spec = model_fn(features=features, **kwargs) + + """ + add hooks: + NPUInitHook: for all mode, NPUInitHook should be the first session hook + NPUShutDownHook: for all mode, NPUShutDownHook should be the first session hook + NPUBroadcastGlobalVariablesHook: train + NPUCheckpointSaverHook:train + """ + npu_hooks = [] + + if mode == model_fn_lib.ModeKeys.TRAIN: + if not isinstance(estimator_spec, NPUEstimatorSpec) and not isinstance(estimator_spec, model_fn_lib.EstimatorSpec): + raise RuntimeError('estimator_spec used by NPU train must have type ' + '`NPUEstimatorSpec` or `EstimatorSpec`. Got {}'.format(type(estimator_spec))) + # 1. NPUBroadcastGlobalVariablesHook + rank_size = os.getenv('RANK_SIZE') + if rank_size != None and rank_size.isdigit() and int(rank_size) > 1 and not config.horovod_mode: + npu_hooks.append(NPUBroadcastGlobalVariablesHook(self.__device_info._root_rank, self.__device_info._index)) + + # 2. NPUCheckpointSaverHook + if config.save_checkpoints_steps or config.save_checkpoints_secs: + new_model_dir = self.__check_model_dir(model_dir, config) + npu_hooks.append(NPUCheckpointSaverHook( + checkpoint_dir=new_model_dir, + save_secs=config.save_checkpoints_secs, + save_steps=config.save_checkpoints_steps)) + + if isinstance(estimator_spec, NPUEstimatorSpec): + if estimator_spec._host_call is not None: + host_call = _OutfeedHostCall(mode) + host_call.record({"host_call" : estimator_spec._host_call}) + # add outfeed enqueue op + loss, train_op = estimator_spec.loss, estimator_spec.train_op + with ops.control_dependencies([train_op]): + host_call_outfeed_op = host_call.create_enqueue_op() + with ops.control_dependencies([host_call_outfeed_op]): + loss = array_ops.identity(loss) + estimator_spec = estimator_spec._replace(loss=loss) + # add outfeed dnqueue op + host_call_ops = host_call.create_npu_hostcall() + npu_hooks.append(NPUInfeedOutfeedSessionHook(host_call_ops, mode)) + npu_hooks.append(NPULogOutfeedSessionHook(sys.stderr)) + + # 3. set iterations per loop hook + if config.iterations_per_loop > 1 : + npu_hooks.append(SetIterationsVarHook(config.iterations_per_loop)) + train_op = tf.group(estimator_spec.train_op, name="IterationOp") + estimator_spec = estimator_spec._replace(train_op=train_op) + + train_hooks = estimator_spec.training_hooks + train_hooks = list(train_hooks or []) + new_train_hooks = npu_hooks + train_hooks + + estimator_spec = estimator_spec._replace(training_hooks=tuple(new_train_hooks)) + + elif mode == model_fn_lib.ModeKeys.EVAL: + if not isinstance(estimator_spec, NPUEstimatorSpec) and not isinstance(estimator_spec, model_fn_lib.EstimatorSpec): + raise RuntimeError('estimator_spec used by NPU evaluate must have type ' + '`NPUEstimatorSpec` or `EstimatorSpec`. Got {}'.format(type(estimator_spec))) + if isinstance(estimator_spec, NPUEstimatorSpec): + if estimator_spec._host_call is not None: + host_call = _OutfeedHostCall(mode) + host_call.record({"host_call" : estimator_spec._host_call}) + # add outfeed enqueue op + loss, train_op = estimator_spec.loss, estimator_spec.train_op + with ops.control_dependencies([loss]): + host_call_outfeed_op = host_call.create_enqueue_op() + with ops.control_dependencies([host_call_outfeed_op]): + loss = array_ops.identity(loss) + estimator_spec = estimator_spec._replace(loss=loss) + # add outfeed dnqueue op + host_call_ops = host_call.create_npu_hostcall() + npu_hooks.append(NPUInfeedOutfeedSessionHook(host_call_ops, mode)) + npu_hooks.append(NPULogOutfeedSessionHook(sys.stderr)) + if len(npu_hooks) > 0: + evaluation_hooks = estimator_spec.evaluation_hooks + evaluation_hooks = list(evaluation_hooks or []) + new_evaluation_hooks = npu_hooks + evaluation_hooks + estimator_spec = estimator_spec._replace(evaluation_hooks=tuple(new_evaluation_hooks)) + + elif mode == model_fn_lib.ModeKeys.PREDICT: + if len(npu_hooks) > 0: + prediction_hooks = estimator_spec.prediction_hooks + prediction_hooks = list(prediction_hooks or []) + new_prediction_hooks = npu_hooks + prediction_hooks + + estimator_spec = estimator_spec._replace(prediction_hooks=tuple(new_prediction_hooks)) + return estimator_spec + + return _model_fn + + def __check_profiling_options(self, profiling_options=[]): + """Check profiling options . + Args: + profiling_options: Profiling options. + Return: + Valid options + Raise: + If profiling_options is null or option is not `training_trace` or `task_trace`, `op_trace`'. + """ + + error_mag = 'profiling options must be in `training_trace`, `task_trace` or `op_trace`' + + if len(profiling_options) == 0: + raise ValueError(error_mag) + + profiling_types = ["training_trace", "task_trace", "op_trace"] + for option in profiling_options : + if option not in profiling_types: + raise ValueError(error_mag) + + result = ":".join(profiling_options) + return result + + def __load_profiling_options(self, config, custom_op): + """Load profiling config ,and add to custom_optimizers + Args: + config: NPURunConfig. + custom_op: Custom optimizers. + """ + if config._profiling_config is None: + """ + there is no profiling config in user's python script, + then use the default profiling configuration + """ + custom_op.parameter_map["profiling_mode"].b = False + profiling_options = "training_trace" + custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes(profiling_options) + + else: + if config._profiling_config._enable_profiling: + # User enable profiling + custom_op.parameter_map["profiling_mode"].b = True + # check profiling ,and get valid options + profiling_options = self.__check_profiling_options(config._profiling_config._enable_options) + custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes(profiling_options) + else: + # User disable profiling, + custom_op.parameter_map["profiling_mode"].b = False + profiling_options = "training_trace" + custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes(profiling_options) + + def __load_mix_precision(self, config, custom_op): + """Load mix precision config ,and add to custom_optimizers + Args: + config: NPURunConfig. + custom_op: Custom optimizers. + """ + if config._precision_mode is not None: + custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes(config._precision_mode) + custom_op.parameter_map["enable_reduce_precision"].b = config._enable_reduce_precision + + def __load__variable_format_optimize(self, config, custom_op): + """Load variable acceleration config ,and add to custom_optimizers + Args: + config: NPURunConfig. + custom_op: Customer optimizers. + """ + + custom_op.parameter_map["variable_format_optimize"].b = config._variable_format_optimize + + def __load_auto_tune_config(self, config, custom_op): + """Load auto tune config ,and add to custom_optimizers + Args: + config: NPURunConfig. + custom_op: Custom optimizers. + """ + + if config._auto_tune_mode is not None: + custom_op.parameter_map["auto_tune_mode"].s = tf.compat.as_bytes(config._auto_tune_mode) + + def __load_dump_config(self, config, custom_op): + """Load dump config ,and add to custom_optimizers + Args: + config: NPURunConfig. + custom_op: Customer optimizers. + """ + if config._dump_config is None: + """ + there is no dump config in user's python script, + then use the default dump configuration + """ + custom_op.parameter_map["enable_dump"].b = False + custom_op.parameter_map["enable_dump_debug"].b = False + + else: + custom_op.parameter_map["enable_dump"].b = config._dump_config._enable_dump + custom_op.parameter_map["enable_dump_debug"].b = config._dump_config._enable_dump_debug + if config._dump_config._dump_path is not None: + custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes(config._dump_config._dump_path) + if config._dump_config._dump_step is not None: + custom_op.parameter_map["dump_step"].s = tf.compat.as_bytes(config._dump_config._dump_step) + if config._dump_config._dump_mode is not None: + custom_op.parameter_map["dump_mode"].s = tf.compat.as_bytes(config._dump_config._dump_mode) + if config._dump_config._dump_mode is not None: + custom_op.parameter_map["dump_debug_mode"].s = tf.compat.as_bytes(config._dump_config._dump_debug_mode) + + def __load_stream_max_config(self, config, custom_op): + """Load stream_max_parallel_num config ,and add to custom_optimizers + Args: + config: NPURunConfig. + custom_op: Customer optimizers. + """ + if config._stream_max_parallel_num is not None: + custom_op.parameter_map["stream_max_parallel_num"].s = tf.compat.as_bytes(config._stream_max_parallel_num) + + def __load_ps_mode_config(self, config, custom_op): + """Load stream_max_parallel_num config ,and add to custom_optimizers + Args: + config: NPURunConfig. + custom_op: Customer optimizers. + """ + config_info = json.loads(os.environ.get('TF_CONFIG') or '{}') + + # Set task_type and task_id if the TF_CONFIG environment variable is + # present. Otherwise, use the respective default (None / 0). + task_env = config_info.get('task', {}) + task_type = task_env.get('type', None) + task_index = task_env.get('index', 0) + if task_type: + custom_op.parameter_map["job"].s = tf.compat.as_bytes(task_type) + custom_op.parameter_map["task_index"].i = int(task_index) + else: + custom_op.parameter_map["job"].s = tf.compat.as_bytes('localhost') + custom_op.parameter_map["task_index"].i = 0 + + def __load_graph_optimizers(self, config): + """Change the session config and load the graph optimizers: + GradFusionOptimizer and OMPartitionSubgraphsPass.""" + + if config.session_config is None: + config = config.replace(session_config=tf.ConfigProto()) + + config.session_config.graph_options.rewrite_options.optimizers.extend(["pruning", + "function", + "constfold", + "shape", + "arithmetic", + "loop", + "dependency", + "layout", + "memory", + "GradFusionOptimizer"]) + # config set + custom_op = config.session_config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = "NpuOptimizer" + custom_op.parameter_map["enable_data_pre_proc"].b = config.enable_data_pre_proc + custom_op.parameter_map["mix_compile_mode"].b = config.mix_compile_mode + custom_op.parameter_map["use_off_line"].b = True + custom_op.parameter_map["iterations_per_loop"].i = config.iterations_per_loop + custom_op.parameter_map["is_tailing_optimization"].b = config.is_tailing_optimization + custom_op.parameter_map["min_group_size"].b = 1 + custom_op.parameter_map["hcom_parallel"].b = config._hcom_parallel + if config._graph_memory_max_size is not None: + custom_op.parameter_map["graph_memory_max_size"].s = tf.compat.as_bytes(str(config._graph_memory_max_size)) + if config._variable_memory_max_size is not None: + custom_op.parameter_map["variable_memory_max_size"].s = tf.compat.as_bytes(str(config._variable_memory_max_size)) + custom_op.parameter_map["graph_run_mode"].i = config.graph_run_mode + custom_op.parameter_map["op_debug_level"].i = config.op_debug_level + if config.enable_scope_fusion_passes is not None: + custom_op.parameter_map["enable_scope_fusion_passes"].s = tf.compat.as_bytes(config.enable_scope_fusion_passes) + + # add profiling options to custom_op + self.__load_profiling_options(config, custom_op) + + # add mix precision to custom_op + self.__load_mix_precision(config, custom_op) + + # add variable acceleration to custom_op + self.__load__variable_format_optimize(config, custom_op) + + # add auto une config to custom_op + self.__load_auto_tune_config(config, custom_op) + + # add dump config to custom_op + self.__load_dump_config(config, custom_op) + + # add stream_max_parallel to custom_op + self.__load_stream_max_config(config, custom_op) + + self.__load_ps_mode_config(config, custom_op) + + return config + + + def __load_job_info(self, job_start_file): + """Parse the file from the CSA.""" + # Read the job config file. + basic = NPUBasics(job_start_file) + if basic.jobinfo is None: + return False + + # Get Device info from config file. + self.__job_info = basic.jobinfo + self.__device_info = basic.jobinfo._device_info + return True + + def __check_model_dir(self, model_dir, config): + """Check model dir. If model dir is None, create temp dir. + + Returns: + Model dir. + + Raises: + ValueError: If model_dir of NPUEstimator is different with model_dir of NPURunConfig. + """ + if (model_dir is not None) and (config.model_dir is not None): + if model_dir != config.model_dir: + raise ValueError( + 'model_dir are set both in NPUEstimator and NPURunConfig, but with ' + "different values. In constructor: '{}', in NPURunConfig: " + "'{}' ".format(model_dir, config.model_dir)) + + model_dir = model_dir or config.model_dir + if model_dir is None: + while True: + model_dir = "model_dir_" + \ + "".join(random.sample(string.ascii_letters + string.digits, 10)) + cwd = os.getcwd() + model_dir = os.path.join(cwd, model_dir) + if not tf.io.gfile.exists(model_dir): + break + logging.warning('Using temporary folder as model directory: %s', model_dir) + tf.io.gfile.mkdir(model_dir) + return model_dir \ No newline at end of file diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_hook.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_hook.py new file mode 100644 index 000000000..f378f6f06 --- /dev/null +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_hook.py @@ -0,0 +1,362 @@ +import tensorflow as tf +from six.moves import queue as Queue +import threading +from tensorflow.python.training import session_run_hook +from tensorflow.python.training import basic_session_run_hooks +from tensorflow.python.platform import tf_logging as logging +from npu_bridge.estimator import npu_ops +from npu_bridge.hccl import hccl_ops +from tensorflow.python.ops import summary_ops_v2 as contrib_summary +from tensorflow.core.protobuf import config_pb2 +import time +from npu_bridge.estimator.npu import util + +# Constant +_BATCH_SIZE_KEY = 'batch_size' +_RESERVED_PARAMS_KEYS = [_BATCH_SIZE_KEY] + +_NPU_RUNCONFIG = 'npu_runconfig' +_ITERATIONS_PER_LOOP_VAR = 'iterations_per_loop' +_LOOP_COND_VAR = 'loop_cond' +_CONST_ZERO = 'zero' +_CONST_ONE = 'one' + +util.register_func(_ITERATIONS_PER_LOOP_VAR) +util.register_func(_LOOP_COND_VAR) +util.register_func(_CONST_ZERO) +util.register_func(_CONST_ONE) + +class NPUShutDownHook(session_run_hook.SessionRunHook): + """Hook to shutdown the system.""" + + def __init__(self, scaffold=None): + super(NPUShutDownHook, self).__init__() + self._scaffold = scaffold + self._shutdown_npu_op = None + + def begin(self): + if not self._shutdown_npu_op or self._shutdown_npu_op.graph != tf.get_default_graph(): + self._shutdown_npu_op = npu_ops.NPUShutdown() + + def end(self, session): + logging.info("NPUShutDownHook run...") + session.run(self._shutdown_npu_op) + + +class NPUBroadcastGlobalVariablesHook(session_run_hook.SessionRunHook): + """Broadcasts initial variable states from rank 0 to all other processes. + + This is necessary to ensure consistent initialization of all workers when + training is started with random weights or restored from a checkpoint. + + """ + def __init__(self, root_rank=None, index=None): + """Construct a new NPUBroadcastGlobalVariablesHook that will broadcast all + global variables from root rank to all other processes during initialization. + + Args: + root_rank: + Rank that will send data, other ranks will receive data. + index: + Current rand id. + """ + self._root_rank = root_rank + self._index = index + self._bcast_op = None + + def begin(self): + if not self._bcast_op or self._bcast_op.graph != tf.get_default_graph(): + self._bcast_op = broadcast_global_variables(self._root_rank, self._index) + + def after_create_session(self, session, coord): + logging.info("NPUBroadcastGlobalVariablesHook run...") + session.run(self._bcast_op) + + +class NPUCheckpointSaverHook(basic_session_run_hooks.CheckpointSaverHook): + """Saves checkpoints every N steps or seconds.""" + def __init__(self, + checkpoint_dir, + save_secs=None, + save_steps=None, + saver=None, + checkpoint_basename="model.ckpt", + scaffold=None, + listeners=None): + """Initializes a `CheckpointSaverHook`. + + Args: + checkpoint_dir: `str`, base directory for the checkpoint files. + save_secs: `int`, save every N secs. + save_steps: `int`, save every N steps. + saver: `Saver` object, used for saving. + checkpoint_basename: `str`, base name for the checkpoint files. + scaffold: `Scaffold`, use to get saver object. + listeners: List of `CheckpointSaverListener` subclass instances. + Used for callbacks that run immediately before or after this hook saves + the checkpoint. + + Raises: + ValueError: One of `save_steps` or `save_secs` should be set. + ValueError: At most one of saver or scaffold should be set. + """ + super(NPUCheckpointSaverHook, self).__init__( + checkpoint_dir=checkpoint_dir, + save_secs=save_secs, + save_steps=save_steps, + saver=saver, + checkpoint_basename=checkpoint_basename, + scaffold=scaffold, + listeners=listeners) + + def after_run(self, run_context, run_values): + global_step = run_context.session.run(self._global_step_tensor) + logging.info("global_step..." + str(global_step)) + super().after_run(run_context, run_values) + + def end(self, session): + logging.info("NPUCheckpointSaverHook end...") + super().end(session) + +class SetIterationsVarHook(session_run_hook.SessionRunHook): + def __init__(self, iterations_per_loop=None): + self._iterations_per_loop = iterations_per_loop + + def begin(self): + self._iterations_per_loop_var = util.create_or_get_var(_ITERATIONS_PER_LOOP_VAR) + self._loop_cond_var = util.create_or_get_var(_LOOP_COND_VAR) + self._const_zero = util.create_or_get_var(_CONST_ZERO) + self._const_one = util.create_or_get_var(_CONST_ONE) + + def after_create_session(self, session, coord): + self._iterations_per_loop_var.load(self._iterations_per_loop - 1, session=session) + self._loop_cond_var.load(0, session=session) + self._const_zero.load(0, session=session) + self._const_one.load(1, session=session) + print("load iterations_per_loop value -----------") + print(session.run(self._iterations_per_loop_var)) + +def broadcast_global_variables(root_rank, index): + """Broadcasts all global variables from root rank to all other processes. + Arguments: + root_rank: rank of the process from which global variables will be broadcasted + to all other processes. + """ + op_list = [] + for var in tf.global_variables(): + # the input and out tensor of HCOMBroadcast interface are list + if "float" in var.dtype.name: + inputs = [var] + outputs=hccl_ops.broadcast(tensor=inputs,root_rank=root_rank) + if outputs is not None: + op_list.append(outputs[0].op) + op_list.append(tf.assign(var, outputs[0])) + + return tf.group(op_list) + +class _SIGNAL(object): + STOP = -1 + +class _OpQueueContext(object): + """Manages work queue and thread for a infeed/outfeed thread.""" + + def __init__(self, name, target, args): + self._name = name + self._queue = Queue.Queue() + args = (self,) + args + self._thread = threading.Thread(name=name, target=target, args=args) + self._thread.daemon = True + self._thread.start() + + def stop(self): + self._queue.put(_SIGNAL.STOP) + + def join(self): + logging.info('Shutting down %s thread.' % self._name) + self.stop() + self._thread.join() + +class NPULogOutfeedSessionHook(session_run_hook.SessionRunHook): + def __init__(self, output_stream): + self._output_stream = output_stream + self._stopped = False + + def begin(self): + self._finalize_ops = [npu_ops.stop_outfeed_dequeue_op("_npu_log")] + outfeed_log_tensors = npu_ops.outfeed_dequeue_op( + channel_name="_npu_log", + output_types=[tf.string], + output_shapes=[()]) + self._dequeue_ops = tf.print(outfeed_log_tensors, output_stream=self._output_stream) + + def _run_coordinate(self, queue_ctx, session, coord): + logging.info('Starting log outfeed thread coordinate.') + while not coord.should_stop(): + time.sleep(1) + if not self._stopped: + self._stopped = True + session.run(self._finalize_ops) + + def _run_outfeed(self, queue_ctx, session): + logging.info('Starting log outfeed thread controller.') + while True: + try: + session.run(self._dequeue_ops) + except tf.errors.OutOfRangeError: + logging.info('Log outfeed thread finished') + break + except Exception as e: + logging.error('Log outfeed thread exit unexpectedly.', e.what()) + break + + def after_create_session(self, session, coord): + self._outfeed_controller = _OpQueueContext( + name='LogOutfeedController', target=self._run_outfeed, args=(session,)) + self._outfeed_coordinate = _OpQueueContext( + name='LogOutfeedCoordinate', target=self._run_coordinate, args=(session, coord)) + logging.info('Add log output coordinate thread to coord') + #add outfeed_coordinate thread to coord + #when the coordinated session close, the coord.join() will wait for this thread finish + coord.register_thread(self._outfeed_coordinate._thread) + + def end(self, session): + if not self._stopped: + self._stopped = True + session.run(self._finalize_ops) + logging.info('Stop log output thread controller') + self._outfeed_controller.join() + +class NPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook): + def __init__(self, + dequeue_ops, + channel_name): + self._dequeue_ops = dequeue_ops + self._channel_name = channel_name + self._finished = False + self._stopped = False + + def begin(self): + self._init_ops = [] + self._finalize_ops = [npu_ops.stop_outfeed_dequeue_op(self._channel_name)] + + summary_writer_init_ops = contrib_summary.summary_writer_initializer_op() + self._init_ops.extend(summary_writer_init_ops) + # Get all the writer resources from the initializer, so we know what to flush. + for op in summary_writer_init_ops: + self._finalize_ops.append(contrib_summary.flush(writer=op.inputs[0])) + + def _run_coordinate(self, queue_ctx, session, coord): + logging.info('Starting outfeed thread coordinate.') + while not coord.should_stop(): + time.sleep(1) + if not self._stopped: + self._stopped = True + session.run(self._finalize_ops) + + def _run_outfeed(self, queue_ctx, session): + logging.info('Starting outfeed thread controller.') + while True: + try: + session.run(self._dequeue_ops) + except tf.errors.OutOfRangeError: + logging.info('summary outfeed thread finished') + break + except Exception as e: + logging.error('summary outfeed thread exit unexpectedly.', e.what()) + break + logging.info('Outfeed thread finished, shutting down.') + + def after_create_session(self, session, coord): + logging.info('Init NPU system') + start = time.time() + session.run(self._init_ops, + options=config_pb2.RunOptions(timeout_in_ms=5 * 60 * 1000)) + logging.debug('Initialized NPU in %d seconds', time.time() - start) + + self._outfeed_controller = _OpQueueContext( + name='OutfeedController', target=self._run_outfeed, args=(session,)) + self._outfeed_coordinate = _OpQueueContext( + name='OutfeedCoordinate', target=self._run_coordinate, args=(session, coord)) + logging.info('Add log output coordinate thread to coord') + #add outfeed_coordinate thread to coord + #when the coordinated session close, the coord.join() will wait for this thread finish + coord.register_thread(self._outfeed_coordinate._thread) + + def end(self, session): + self._finished = True + + logging.info('Shutdown NPU system.') + if not self._stopped: + self._stopped = True + session.run(self._finalize_ops) + + logging.info('Stop output thread controller') + self._outfeed_controller.join() + +class NPUOutputTensorHook(basic_session_run_hooks.LoggingTensorHook): + """call output_fn to print tensors every N steps or at end.""" + + def __init__(self, tensors, + dependencies=None, + output_fn=None, + output_every_n_steps=0 + ): + """Initializes a `NPUOutputTensorHook`. + + Args: + tensors: `dict` that maps string-valued tags to tensors/tensor names, + or `iterable` of tensors/tensor names. + dependencies: control edges. + output_fn: A callable, uses __call__ to print tensors + output_every_n_steps: `int`, print the values of `tensors` once every N local + steps taken on the current worker. + + """ + self._tensors = None + self._output_fn = output_fn + self._output_every_n_steps = output_every_n_steps + self._output_list = [] + self._iter_count = 0 + if tensors is not None: + if dependencies is not None: + if not isinstance(dependencies, (tuple, list)): + dependencies = [dependencies] + + with tf.control_dependencies(dependencies): + self._tensors = {k : tf.identity(v) for k, v in tensors.items()} + else: + self._tensors = tensors + + super(NPUOutputTensorHook, self).__init__(self._tensors, every_n_iter=1 << 31) + + def begin(self): + logging.info("NPUOutputTensorHook begin...") + if self._tensors is not None: + super(NPUOutputTensorHook, self).begin() + + def before_run(self, run_context): + logging.info("NPUOutputTensorHook before_run...", self._tensors) + if self._tensors is not None: + return tf.train.SessionRunArgs(self._current_tensors) + + def after_run(self, run_context, run_values): + logging.info("NPUOutputTensorHook after_run...", run_values.results) + _ = run_context + if self._tensors is not None: + self._stash_outputs(run_values.results) + + self._iter_count += 1 + if self._iter_count % self._output_every_n_steps == 0: + self._call_output_fn() + + def end(self, session): + logging.info("NPUOutputTensorHook end...") + if self._output_list is not None and len(self._output_list): + self._call_output_fn() + + def _stash_outputs(self, tensor_values): + self._output_list.append({tag : tensor_values[tag] for tag in self._tag_order}) + + def _call_output_fn(self): + self._output_fn.__call__(self._output_list) + del self._output_list[:] diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_loss_scale_manager.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_loss_scale_manager.py new file mode 100644 index 000000000..4f5f46cb4 --- /dev/null +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_loss_scale_manager.py @@ -0,0 +1,200 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""LossScaleManager classes for mixed precision training.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import abc +import six + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import gen_control_flow_ops +from tensorflow.python.ops import gen_math_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope + + +@six.add_metaclass(abc.ABCMeta) +class LossScaleManager(object): + """Abstract loss scale manager class. + + Loss scale managers with a different strategy should subclass this class. + Loss scaling is a process that: + + 1) Applies a multiplier on the loss before computing gradients, and + 2) Applies the reciprocal of the multiplier on the gradients before they are + applied on variables. + + This class is used together with + `tf.contrib.mixed_precision.LossScaleOptimizer` for mixed precision training + (float32 variables and float16 ops) on Nvidia GPUs in order to achieve the + same model quality as single precision training, with the benefits of + potential higher throughput. + + See `tf.contrib.mixed_precision.LossScaleOptimizer` for more details. + """ + + @abc.abstractmethod + def get_loss_scale(self): + """Returns the loss scale as a scalar `float32` tensor.""" + pass + + @abc.abstractmethod + def update_loss_scale(self, finite_grads): + """Updates loss scale based on if gradients are finite in current step. + + Args: + finite_grads: bool scalar tensor indicating if all gradients are + finite (i.e., not inf or nan). + + Returns: + An op, when executed updates the loss scale. If eager execution is + enabled, does not return anything. + """ + del finite_grads + return + + +class FixedLossScaleManager(LossScaleManager): + """Loss scale manager with a fixed loss scale. + + The loss scale is not updated for the lifetime of the class. + """ + + def __init__(self, loss_scale): + """Creates the fixed loss scale manager. + + Args: + loss_scale: A Python float. Its ideal value varies depending on models to + run. Choosing a too small loss_scale might affect model quality; a too + big loss_scale might cause inf or nan. There is no single right + loss_scale to apply. There is no harm choosing a relatively big number + as long as no nan or inf is encountered in training. + + Raises: + ValueError: If loss_scale is less than 1. + """ + if loss_scale < 1: + raise ValueError("loss scale must be at least 1.") + self._loss_scale = ops.convert_to_tensor(loss_scale, dtype=dtypes.float32) + + def get_loss_scale(self): + return self._loss_scale + + def update_loss_scale(self, finite_grads): + del finite_grads + return gen_control_flow_ops.no_op() + + +class ExponentialUpdateLossScaleManager(LossScaleManager): + """Loss scale manager uses an exponential update strategy. + + In general, the strategy increases loss scale by a greater-than-one factor + after encountering a consecutive series of steps with finite gradients; + Similarly, it decreases the loss scale by a factor when the accumulated number + of steps with non-finite (nan or inf) gradients are met. An update is not + applied if its result is less than 1 or overflows the float32 dynamic range. + + The number of finite and non-finite steps are cleared every time the loss + scale is changed. The condition to decrease the loss scale is looser than to + increase it since the former does not require the steps to be consecutive. + """ + + def __init__(self, + init_loss_scale, + incr_every_n_steps, + decr_every_n_nan_or_inf=2, + incr_ratio=2, + decr_ratio=0.8): + """Constructor of exponential-update loss scale manager. + + Args: + init_loss_scale: A Python float. The loss scale to use at the beginning. + incr_every_n_steps: Increases loss scale every n consecutive steps with + finite gradients. + decr_every_n_nan_or_inf: Decreases loss scale every n accumulated steps + with nan or inf gradients. + incr_ratio: The multiplier to use when increasing the loss scale. + decr_ratio: The less-than-one-multiplier to use when decreasing the loss + scale. + """ + self._incr_every_n_steps = incr_every_n_steps + self._decr_every_n_nan_or_inf = decr_every_n_nan_or_inf + self._incr_ratio = incr_ratio + self._decr_ratio = decr_ratio + self._loss_scale = variable_scope.variable( + name="loss_scale", + initial_value=ops.convert_to_tensor(init_loss_scale, dtypes.float32), + dtype=dtypes.float32, + trainable=False) + self._num_good_steps = variable_scope.variable( + name="good_steps", initial_value=0, dtype=dtypes.int32, trainable=False) + self._num_bad_steps = variable_scope.variable( + name="bad_steps", initial_value=0, dtype=dtypes.int32, trainable=False) + + def _reset_stats(self): + return control_flow_ops.group( + state_ops.assign(self._num_good_steps, 0), + state_ops.assign(self._num_bad_steps, 0)) + + def get_loss_scale(self): + """Returns the loss scale.""" + return self._loss_scale + + def update_loss_scale(self, finite_grads): + """Updates loss scale based on if gradients are finite in current step.""" + + def update_if_finite_grads(): + """Branch function when grads are all finite.""" + def incr_loss_scale(): + float_max = (3.4e+38)/self._incr_ratio + new_loss_scale = control_flow_ops.cond( + gen_math_ops.less( self._loss_scale, float_max), + lambda: self._loss_scale * self._incr_ratio, + lambda: self._loss_scale) + update_op = state_ops.assign(self._loss_scale, new_loss_scale) + # When loss_scale is updated, both good and bad steps are reset. + return control_flow_ops.group(update_op, self._reset_stats()) + + return control_flow_ops.cond( + self._num_good_steps + 1 >= self._incr_every_n_steps, + incr_loss_scale, + lambda: state_ops.assign_add(self._num_good_steps, 1).op) + + def update_if_not_finite_grads(): + """Branch function when any grad is not finite.""" + + def decr_loss_scale(): + update_op = state_ops.assign( + self._loss_scale, + gen_math_ops.maximum(1., self._loss_scale * self._decr_ratio)) + # When loss_scale is updated, both good and bad steps are reset. + return control_flow_ops.group(update_op, self._reset_stats()) + + def just_update_steps(): + # When bad_steps is incremented, good_step is reset. + return control_flow_ops.group( + state_ops.assign_add(self._num_bad_steps, 1), + state_ops.assign(self._num_good_steps, 0)) + + return control_flow_ops.cond( + self._num_bad_steps + 1 >= self._decr_every_n_nan_or_inf, + decr_loss_scale, just_update_steps) + + return control_flow_ops.cond(finite_grads, update_if_finite_grads, + update_if_not_finite_grads) diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_loss_scale_optimizer.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_loss_scale_optimizer.py new file mode 100644 index 000000000..0356f7f8f --- /dev/null +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_loss_scale_optimizer.py @@ -0,0 +1,183 @@ + +# Optimizer for mixed precision training for Davinci NPU. + +"""Loss scaling optimizer.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tensorflow as tf +from tensorflow.python.eager import context +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import gen_control_flow_ops +from tensorflow.python.ops import gen_math_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.training import optimizer +from npu_bridge.hccl import hccl_ops +from npu_bridge.estimator import npu_ops + +from npu_bridge.helper import helper +gen_npu_ops = helper.get_gen_ops(); + + +class NPULossScaleOptimizer(optimizer.Optimizer): + # TODO(jamesqin): move mixed precision training explanation to __init__ + # docstring. + """An optimizer that applies loss scaling in backprop. + This class is useful for "mixed precision training" on GPUs (or other + potential accelerators), an approach to improve compute throughput without + compromising model quality. + The canonical way to perform mixed precision training is the following: + * Model variables are kept in high precision (e.g. float32). + * Computations are done in lower precision (e.g. float16), which enjoys + performance speedup by virtue of hardware support. Variables are casted to + lower precision before they're used. + * Final gradients are casted back to high precision dtype, then used to update + variables. + The side-effect of performing computation in lower precision, is that it comes + with smaller numerical range. During backproping, small gradients might + underflow in the reduced numerical range, causing a model to converge at + suboptimal level. + To prevent underflow, this optimizer multiplies the loss by a factor before + backprop starts. Consequently, the gradients are linearly scaled up by the + same factor, thus not falling into the underflow zone. After that, to perserve + the correctness of backprop, the gradients are down-scaled by the same factor, + casted to the (higher) variable precision, then applied on the variables. + See [Nvidia's manual on mixed precision training]( + https://docs.nvidia.com/deeplearning/sdk/mixed-precision-training/index.html) + for more details. + To use loss scale optimizer, one only needs choose a loss scale strategy and + wrap a regular optimizer. See examples below. + ``` + loss = loss_fn() + opt = tf.AdamOptimizer(learning_rate=...) + # Choose a loss scale manager which decides how to pick the right loss scale + # throughout the training process. + loss_scale_manager = tf.contrib.mixed_precision.FixedLossScaleManager(5000) + # Wraps the original optimizer in a LossScaleOptimizer. + loss_scale_optimizer = + tf.contrib.mixed_precision.LossScaleOptimizer(opt, loss_scale_manager) + # Call minimize() on the loss scale optimizer. + train_op = loss_scale_optimizer.minimize(loss) + ``` + If gradients clipping is applied, one can call + `optimizer.compute_gradients()` and `optimizer.apply_gradients()` + separately. + Notice the following way of using LossScaleOptimizer is not intended. Always + use `loss_scale_optimizer.compute_gradients()` to compute gradients instead of + `tf.gradients()` if doing mixed precision training. + ``` + # The following is a wrong way to use LossScaleOptimizer along with + # tf.gradients(). + # Always use loss_scale_optimizer.compute_gradients() to compute grads, or + # loss scale is not correctly applied. + grads = tf.gradients(loss, ...) + # Do some custom grad clipping. + grads = clip_grads(grads, ...) + loss_scale_optimizer.apply(grads_and_vars) + ``` + """ + + def __init__(self, opt, loss_scale_manager, is_distributed=False): + """Construct a loss scaling optimizer. + + Args: + opt: The actual optimizer that will be used to compute and apply the + gradients. Must be an implementation of the + `tf.compat.v1.train.Optimizer` interface. + loss_scale_manager: A LossScaleManager object. + """ + self._opt = opt + self._loss_scale_manager = loss_scale_manager + self._float_status = tf.constant([0.0], dtype=tf.float32) + self._is_distributed = is_distributed + self._name = "NPULossScaleOptimizer{}".format(type(optimizer).__name__) + + def compute_gradients(self, + loss, + var_list=None, + gate_gradients=optimizer.Optimizer.GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + grad_loss=None): + """Compute gradients. See base class `tf.compat.v1.train.Optimizer`.""" + loss_scale = self._loss_scale_manager.get_loss_scale() + if context.executing_eagerly(): + + def scaled_loss(): + loss_val = loss() + return loss_val * math_ops.cast(loss_scale, loss_val.dtype.base_dtype) + else: + if callable(loss): + loss_val = loss() + else: + loss_val = loss + scaled_loss = loss_val * math_ops.cast(loss_scale, + loss_val.dtype.base_dtype) + + self._float_status = gen_npu_ops.npu_alloc_float_status() + + grads_and_vars = self._opt.compute_gradients( + scaled_loss, + var_list=var_list, + gate_gradients=gate_gradients, + aggregation_method=aggregation_method, + colocate_gradients_with_ops=colocate_gradients_with_ops, + grad_loss=grad_loss) + + grads_and_vars = self._down_scale(grads_and_vars, loss_scale) + return grads_and_vars + + def apply_gradients(self, grads_and_vars, global_step=None, name=None): + """Apply gradients. See base class `tf.compat.v1.train.Optimizer`.""" + grads = [] + for (g, _) in grads_and_vars: + if g is not None: + grads.append(g) + + #is_finite_grad = [] + #for g in grads: + # is_finite_grad.append(math_ops.reduce_all(gen_math_ops.is_finite(g))) + #is_overall_finite = math_ops.reduce_all(is_finite_grad) + with tf.get_default_graph().control_dependencies(grads): + local_float_status = gen_npu_ops.npu_get_float_status(self._float_status) + cleared_float_status = gen_npu_ops.npu_clear_float_status(local_float_status) + + + if self._is_distributed: + with tf.get_default_graph().control_dependencies([local_float_status]): + aggregated_float_status = hccl_ops.allreduce([self._float_status], "sum", fusion=0) + is_overall_finite = math_ops.reduce_all(tf.equal(aggregated_float_status, + cleared_float_status)) + else: + is_overall_finite = math_ops.reduce_all(tf.equal(self._float_status, + cleared_float_status)) + # Only update gradients when all grads are finite. + def true_apply_gradients_fn(): + # TODO: Check should allreduce before or after _down_scale() ? + # for now we are calling allreduce before _down_scale + def true_apply_gradients(grads_and_vars, global_step=None, name=None): + return self._opt.apply_gradients(grads_and_vars, global_step, name) + + return true_apply_gradients(grads_and_vars, global_step, name) + + update_vars = control_flow_ops.cond(is_overall_finite, + true_apply_gradients_fn, + gen_control_flow_ops.no_op) + + # Potentially adjust gradient scale in case of finite gradients. + return control_flow_ops.group( + update_vars, + self._loss_scale_manager.update_loss_scale(is_overall_finite)) + + def _down_scale(self, grads_vars, loss_scale): + # Down scale grads by the loss_scale. + gv = [] + inv_loss_scale = gen_math_ops.reciprocal(loss_scale) + for g, v in grads_vars: + if g is not None: + gv.append((g * math_ops.cast(inv_loss_scale, g.dtype.base_dtype), v)) + else: + gv.append((g, v)) + return gv diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_optimizer.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_optimizer.py new file mode 100644 index 000000000..8e89daca9 --- /dev/null +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_optimizer.py @@ -0,0 +1,284 @@ +""" +Optimizer that implements distributed gradient reduction for NPU. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tensorflow as tf +from tensorflow.python.eager import context +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import gen_control_flow_ops +from tensorflow.python.ops import gen_math_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.training import optimizer +from npu_bridge.hccl import hccl_ops +from npu_bridge.estimator import npu_ops + +from npu_bridge.helper import helper +gen_npu_ops = helper.get_gen_ops(); + +from tensorflow.python.platform import tf_logging as logging +from npu_bridge.estimator.npu.npu_common import NPUBasics + +def allreduce(tensor, var, average=True): + """ + Perform an allreduce on a tf.Tensor or tf.IndexedSlices. + + Arguments: + tensor: tf.Tensor, tf.Variable, or tf.IndexedSlices to reduce. + The shape of the input must be identical across all ranks. + average: If True, computes the average over all ranks. + Otherwise, computes the sum over all ranks. + + This function performs a bandwidth-optimal ring allreduce on the input + tensor. If the input is an tf.IndexedSlices, the function instead does an + allgather on the values and the indices, effectively doing an allreduce on + the represented tensor. + """ + basic = NPUBasics("") + size = basic.size() + # the tensor is the instance of tf.IndexedSlices + if isinstance(tensor, tf.IndexedSlices): + # For IndexedSlices, do two allgathers intead of an allreduce. + logging.debug("HcomAllgather...") + values=hccl_ops.allgather(tensor.values, size) + indices=hccl_ops.allgather(tensor.indices, size) + + if values is None: + raise ValueError('the result of tf.HcomAllgather([tensor.values]) is empty') + if indices is None: + raise ValueError('the result of tf.HcomAllgather([tensor.indices]) is empty') + + # To make this operation into an average, divide all gathered values by the size. + rank_size = tf.cast(size, tensor.values.dtype) + new_values = tf.div(values, rank_size) if average else values + + return tf.IndexedSlices(new_values, indices,dense_shape=tensor.dense_shape) + + else: + logging.debug("HcomAllReduce...") + summed_tensor=hccl_ops.allreduce(tensor,"sum") + + if summed_tensor is None:# and summed_tensor: + raise ValueError('the result of tf.DavinciAllreduce([tensor]) is empty') + + rank_size = tf.cast(size, dtype=tensor.dtype) + new_tensor = tf.div(summed_tensor, rank_size) if average else summed_tensor + + return new_tensor + +class NPUOptimizer(optimizer.Optimizer): + """An optimizer that wraps another tf.Optimizer that can using an allreduce to + average gradient values before applying gradients to model weights when + 'is_distributed' is True. And applies loss scaling in backprop when 'is_loss_scale' + is True. 'is_tailing_optimization' is used to determine whether to enable + communication tailing optimization to improve training performance, + this setting only takes effect when 'is_distributed' is True. """ + + def __init__(self, opt, loss_scale_manager=None, is_distributed=False, is_loss_scale=False, + is_tailing_optimization=False, name=None): + """Construct a loss scaling optimizer. + + Args: + opt: The actual optimizer that will be used to compute and apply the + gradients. Must be an implementation of the + `tf.compat.v1.train.Optimizer` interface. + loss_scale_manager: A LossScaleManager object. + """ + self._opt = opt + self._loss_scale_manager = loss_scale_manager + self._float_status = tf.constant([0.0], dtype=tf.float32) + self._is_distributed = is_distributed + self._is_loss_scale = is_loss_scale + self._is_tailing_optimization = is_tailing_optimization + if is_loss_scale and loss_scale_manager is None: + raise ValueError("is_loss_scale is True, loss_scale_manager can not be None") + if name is None: + name = "NPUOptimizer{}".format(type(opt).__name__) + self._name = name + + def compute_gradients(self, + loss, + var_list=None, + gate_gradients=optimizer.Optimizer.GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + grad_loss=None): + """Compute gradients. See base class `tf.compat.v1.train.Optimizer`.""" + if self._is_loss_scale: + loss_scale = self._loss_scale_manager.get_loss_scale() + if context.executing_eagerly(): + + def scaled_loss(): + loss_val = loss() + return loss_val * math_ops.cast(loss_scale, loss_val.dtype.base_dtype) + else: + if callable(loss): + loss_val = loss() + else: + loss_val = loss + scaled_loss = loss_val * math_ops.cast(loss_scale, + loss_val.dtype.base_dtype) + + self._float_status = gen_npu_ops.npu_alloc_float_status() + else : + scaled_loss = loss + + logging.debug("compute_gradients...") + gradients = self._opt.compute_gradients( + scaled_loss, + var_list=var_list, + gate_gradients=gate_gradients, + aggregation_method=aggregation_method, + colocate_gradients_with_ops=colocate_gradients_with_ops, + grad_loss=grad_loss) + if not self._is_distributed: + if self._is_loss_scale: + return self._down_scale(gradients, loss_scale) + else: + return gradients + + averaged_gradients = [] + grads = [] + with tf.name_scope(self._name + "_Allreduce"): + for grad, var in gradients: + grads.append(grad) + if self._is_loss_scale and (len(grads) == len(gradients)) and self._is_tailing_optimization: + self._reduce_all(grads) + with tf.get_default_graph().control_dependencies([self._is_overall_finite]): + avg_grad = allreduce(grad, var, True) if grad is not None else None + averaged_gradients.append((avg_grad, var)) + else: + avg_grad = allreduce(grad, var, True) if grad is not None else None + averaged_gradients.append((avg_grad, var)) + if self._is_loss_scale: + return self._down_scale(averaged_gradients, loss_scale) + else: + return averaged_gradients + + def apply_gradients(self, grads_and_vars, global_step=None, name=None): + """Apply gradients. See base class `tf.compat.v1.train.Optimizer`.""" + + if self._is_loss_scale: + if not self._is_tailing_optimization: + grads = [g for (g, _) in grads_and_vars] + self._reduce_all(grads) + + def true_apply_gradients_fn(): + def true_apply_gradients(grads_and_vars, global_step=None, name=None): + return self._opt.apply_gradients(grads_and_vars, global_step, name) + + return true_apply_gradients(grads_and_vars, global_step, name) + + update_vars = control_flow_ops.cond(self._is_overall_finite, + true_apply_gradients_fn, + gen_control_flow_ops.no_op) + + # Potentially adjust gradient scale in case of finite gradients. + return control_flow_ops.group( + update_vars, + self._loss_scale_manager.update_loss_scale(self._is_overall_finite)) + else: + return self._opt.apply_gradients(grads_and_vars, global_step, name) + + def _down_scale(self, grads_vars, loss_scale): + # Down scale grads by the loss_scale. + gv = [] + inv_loss_scale = gen_math_ops.reciprocal(loss_scale) + for g, v in grads_vars: + if g is not None: + gv.append((g * math_ops.cast(inv_loss_scale, g.dtype.base_dtype), v)) + else: + gv.append((g, v)) + return gv + + def _reduce_all(self, grads): + with tf.get_default_graph().control_dependencies(grads): + local_float_status = gen_npu_ops.npu_get_float_status(self._float_status) + cleared_float_status = gen_npu_ops.npu_clear_float_status(local_float_status) + + if self._is_distributed: + with tf.get_default_graph().control_dependencies([local_float_status]): + aggregated_float_status = hccl_ops.allreduce([self._float_status], "sum", fusion=0) + self._is_overall_finite = math_ops.reduce_all(tf.equal(aggregated_float_status, + cleared_float_status)) + else: + self._is_overall_finite = math_ops.reduce_all(tf.equal(self._float_status, + cleared_float_status)) + + def get_slot(self, *args, **kwargs): + """Calls this same method on the underlying optimizer.""" + return self._opt.get_slot(*args, **kwargs) + + def get_slot_names(self, *args, **kwargs): + """Calls this same method on the underlying optimizer.""" + return self._opt.get_slot_names(*args, **kwargs) + + def variables(self, *args, **kwargs): + """Calls this same method on the underlying optimizer.""" + return self._opt.variables(*args, **kwargs) + + +class NPUDistributedOptimizer(tf.train.Optimizer): + """ + An optimizer that wraps another tf.Optimizer, using an allreduce to + average gradient values before applying gradients to model weights. + """ + + def __init__(self, optimizer, name=None): + """ + Construct a new DistributedOptimizer, which uses another optimizer + under the hood for computing single-process gradient values and + applying gradient updates after the gradient values have been averaged + across all the hcom ranks. + + Args: + optimizer: Optimizer to use for computing gradients and applying updates. + name: Optional name prefix for the operations created when applying + gradients. Defaults to "Distributed" followed by the provided + optimizer type. + See Optimizer.__init__ for more info. + """ + if name is None: + name = "Distributed{}".format(type(optimizer).__name__) + self._optimizer = optimizer + super(NPUDistributedOptimizer, self).__init__(name=name, use_locking=False) + + def compute_gradients(self, *args, **kwargs): + """ + Compute gradients of all trainable variables. + See Optimizer.compute_gradients() for more info. + In DistributedOptimizer, compute_gradients() is overriden to also + allreduce the gradients before returning them. + """ + logging.debug("compute_gradients...") + gradients = self._optimizer.compute_gradients(*args, **kwargs) + rank_size = os.getenv('RANK_SIZE') + if rank_size == None or int(rank_size) <= 1: + return gradients + + averaged_gradients = [] + with tf.name_scope(self._name + "_Allreduce"): + for grad, var in gradients: + avg_grad = allreduce(grad, var, True) if grad is not None else None + averaged_gradients.append((avg_grad, var)) + return averaged_gradients + + def apply_gradients(self, *args, **kwargs): + """Calls this same method on the underlying optimizer.""" + return self._optimizer.apply_gradients(*args, **kwargs) + + def get_slot(self, *args, **kwargs): + """Calls this same method on the underlying optimizer.""" + return self._optimizer.get_slot(*args, **kwargs) + + def get_slot_names(self, *args, **kwargs): + """Calls this same method on the underlying optimizer.""" + return self._optimizer.get_slot_names(*args, **kwargs) + + def variables(self, *args, **kwargs): + """Calls this same method on the underlying optimizer.""" + return self._optimizer.variables(*args, **kwargs) diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py new file mode 100644 index 000000000..98a46995d --- /dev/null +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py @@ -0,0 +1,57 @@ +from npu_bridge import tf_adapter +from npu_bridge.estimator.npu import util + +__auto_tune_mode = str(tf_adapter.AUTO_TUNE_MODE) +__op_debug_level = str(tf_adapter.OP_DEBUG_LEVEL) +__option_exec_enable_scope_fusion_passes = str(tf_adapter.OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES) +__option_exec_profiling_mode = str(tf_adapter.OPTION_EXEC_PROFILING_MODE) +__option_exec_profiling_options = str(tf_adapter.OPTION_EXEC_PROFILING_OPTIONS) +__option_graph_run_mode = str(tf_adapter.OPTION_GRAPH_RUN_MODE) + +def npu_global_init(graph_run_mode = 1, + op_debug_level = 0, + is_tailing_optimization = False, + enable_profiling = False, + enable_options = "training_trace", + auto_tune_mode = None, + precision_mode = None, + enable_scope_fusion_passes = None, + ): + + util.check_nonnegative_integer(graph_run_mode, "graph_run_mode") + if graph_run_mode > 1: + raise ValueError('"graph_run_mode" value must be 0 or 1') + + util.check_nonnegative_integer(op_debug_level, "op_debug_level") + util.check_bool_type(is_tailing_optimization, "is_tailing_optimization") + util.check_bool_type(enable_profiling, "enable_profiling") + graph_run_mode = str(graph_run_mode) + op_debug_level = str(op_debug_level) + is_tailing_optimization = str(util.convert_bool_to_int(is_tailing_optimization)) + enable_profiling = str(util.convert_bool_to_int(enable_profiling)) + + init={} + init[__option_graph_run_mode] = graph_run_mode + init[__op_debug_level] = op_debug_level + init["ge.exec.isTailingOptimization"] = is_tailing_optimization + init[__option_exec_profiling_mode] = enable_profiling + + if enable_profiling is True: + init[__option_exec_profiling_options] = str(util.check_profiling_options(enable_options)) + else: + init[__option_exec_profiling_options] = str("training_trace") + + if auto_tune_mode is not None: + init[__auto_tune_mode] = str(auto_tune_mode) + + if precision_mode is not None: + init["ge.exec.precision_mode"] = str(precision_mode) + + if enable_scope_fusion_passes is not None: + init[__option_exec_enable_scope_fusion_passes] = str(enable_scope_fusion_passes) + + init_options=tf_adapter.map_string_string(init) + tf_adapter.PluginInit(init_options) + +def npu_global_shutdown(): + tf_adapter.PluginFinalize() diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_rnn.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_rnn.py new file mode 100644 index 000000000..dbd607420 --- /dev/null +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_rnn.py @@ -0,0 +1,37 @@ +import tensorflow as tf + +def npu_dynamic_rnn(cell, + inputs, + initial_state=None, + dtype=None, + sequence_length=None, + scope=None): + """Creates a high performance neural network specified by RNNCell `cell`. + Args: + cell: An instance of RNNCell. + inputs: A length T list of inputs, each a `Tensor` of shape `[max_time, + batch_size, input_size]`, or a nested tuple of such elements. + initial_state: (optional) An initial state for the RNN. If `cell.state_size` + is an integer, this must be a `Tensor` of appropriate type and shape + `[batch_size, cell.state_size]`. If `cell.state_size` is a tuple, this + should be a tuple of tensors having shapes `[batch_size, s] for s in + cell.state_size`. + dtype: (optional) The data type for the initial state and expected output. + Required if initial_state is not provided or RNN state has a heterogeneous + dtype. + sequence_length: Specifies the length of each sequence in inputs. An int32 + or int64 vector (tensor) size `[batch_size]`, values in `[0, T)`. + scope: VariableScope for the created subgraph; defaults to "rnn". + """ + # tf origin static_rnn + inputs = tf.unstack(inputs, axis=0) + encoder_outputs , encoder_state = tf.nn.static_rnn( + cell, + inputs, + initial_state=initial_state, + dtype=dtype, + sequence_length=sequence_length, + scope=scope) + encoder_outputs = tf.stack(encoder_outputs, axis=0) + + return encoder_outputs, encoder_state \ No newline at end of file diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_scope.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_scope.py new file mode 100644 index 000000000..8c85fceb2 --- /dev/null +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_scope.py @@ -0,0 +1,18 @@ +""" +Config the non npu compilation scope for NPU in mix compute mode. +""" +import contextlib +from tensorflow.core.framework import attr_value_pb2 +from tensorflow.python.framework import ops + +@contextlib.contextmanager +def without_npu_compile_scope(): + ''' + Enable the non npu compilation of operators within the scope. + ''' + attrs = { + "_without_npu_compile" : attr_value_pb2.AttrValue(b=True) + } + + with ops.get_default_graph()._attr_scope(attrs): + yield diff --git a/tf_adapter/python/npu_bridge/estimator/npu/util.py b/tf_adapter/python/npu_bridge/estimator/npu/util.py new file mode 100644 index 000000000..f8d3f72c6 --- /dev/null +++ b/tf_adapter/python/npu_bridge/estimator/npu/util.py @@ -0,0 +1,206 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import tensorflow as tf +from tensorflow.python.framework import ops +from tensorflow.python.training import training_util +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import init_ops +from tensorflow.python.framework import dtypes +from tensorflow.core.framework import variable_pb2 +from tensorflow.python.ops import resource_variable_ops + +_NPU_RUNCONFIG = 'npu_runconfig' +_ITERATIONS_PER_LOOP_VAR = 'iterations_per_loop' +_LOOP_COND_VAR = 'loop_cond' +_CONST_ZERO = 'zero' +_CONST_ONE = 'one' + +def check_not_none(value, name): + """Checks whether `value` is not None.""" + if value is None: + error_msg = '{} must not be None'.format(name) + raise ValueError(error_msg) + + +def check_positive_integer(value, name): + """Checks whether `value` is a positive integer.""" + if not isinstance(value, six.integer_types): + error_msg = '{} must be int, got {}'.format(name, type(value)) + raise TypeError(error_msg) + if value <= 0: + error_msg = '{} must be positive, got {}'.format(name, value) + raise ValueError(error_msg) + + +def check_nonnegative_integer(value, name): + """Checks whether `value` is a nonnegative integer.""" + if not isinstance(value, six.integer_types): + error_msg = '{} must be int, got {}'.format(name, type(value)) + raise TypeError(error_msg) + + if value < 0: + error_msg = '{} must be nonnegative, got {}'.format(name, value) + raise ValueError(error_msg) + +def check_bool_type(value, name): + """Checks whether `value` is True or false.""" + if not isinstance(value, bool): + error_msg = '{} must be True or False, got {}'.format(name, value) + raise TypeError(error_msg) + +def convert_bool_to_int(value): + """convert True/False to 1/0.""" + if value is True: + return int(1) + + if value is False: + return int(0) + + return int(-1) + +def format_string(value, name): + """fomat input to string type""" + if value is None: + return "" + + if not isinstance(value, six.string_types): + error_msg = '{} must be string, got {}'.format(name, type(value)) + raise TypeError(error_msg) + + return str(value) + +def check_profiling_options(self, profiling_options=[]): + """Check profiling options . + Args: + profiling_options: Profiling options. + Return: + Valid options + Raise: + If profiling_options is null or option is not `training_trace` or `task_trace`, `op_trace`'. + """ + + error_mag = 'profiling options must be in `training_trace`, `task_trace` or `op_trace`' + + if len(profiling_options) == 0: + raise ValueError(error_mag) + + profiling_types = ["training_trace", "task_trace", "op_trace"] + for option in profiling_options : + if option not in profiling_types: + raise ValueError(error_mag) + + result = ":".join(profiling_options) + return result + +def register_func(var_name): + ops.register_proto_function( + '{}_{}'.format(_NPU_RUNCONFIG, var_name), + proto_type=variable_pb2.VariableDef, + to_proto=resource_variable_ops._to_proto_fn, + from_proto=resource_variable_ops._from_proto_fn) + +def create_or_get_var(var_name): + graph = ops.get_default_graph() + collection_name = '{}_{}'.format(_NPU_RUNCONFIG, var_name) + iter_vars = graph.get_collection(collection_name) + if len(iter_vars) == 1: + return iter_vars[0] + elif len(iter_vars) > 1: + raise RuntimeError('Multiple var in collection.') + ignore_existing = False + if training_util.get_global_step() is None: + ignore_existing = True + with ops.colocate_with(training_util.get_global_step(), ignore_existing=ignore_existing): + with variable_scope.variable_scope(_NPU_RUNCONFIG, reuse=variable_scope.AUTO_REUSE): + return variable_scope.get_variable( + var_name, + initializer=init_ops.zeros_initializer(), + shape=[], + dtype=dtypes.int64, + trainable=False, + collections=[collection_name, ops.GraphKeys.LOCAL_VARIABLES], + use_resource=True) + +def set_iteration_per_loop(sess, train_op, iterations_per_loop=1): + """ + Constructs a set_iteration_per_loop. + Args: + sess: A TensorFlow Session that has been created. + train_op: An Operation that updates the variables + or applies the specified gradients. + iterations_per_loop: This is the number of train steps running in NPU + system before returning to CPU host for each `Session.run`. + + Returns: + An Operation named IterationOp that executes all its inputs. + """ + if not isinstance(train_op, ops.Operation): + raise ValueError( + "The incoming 'train_op' type is '%s', " + "and the need type is 'Operation'" %(train_op.dtype.name)) + check_positive_integer(iterations_per_loop, "iterations_per_loop") + if iterations_per_loop == 1 : + return train_op + + iterations_per_loop_var = create_or_get_var(_ITERATIONS_PER_LOOP_VAR) + loop_cond_var = create_or_get_var(_LOOP_COND_VAR) + const_zero = create_or_get_var(_CONST_ZERO) + const_one = create_or_get_var(_CONST_ONE) + + iterations_per_loop_var.load(iterations_per_loop - 1, session=sess) + loop_cond_var.load(0, session=sess) + const_zero.load(0, session=sess) + const_one.load(1, session=sess) + + # Add IterationOp denpend on train_op + group_train_op = tf.group(train_op, name="IterationOp") + + return group_train_op + +class IterationPerLoop(): + """ + An object provide two API to create and set iterations_per_loop + """ + def create_iteration_per_loop_var(self, train_op): + """ + Constructs a set_iteration_per_loop. + Args: + train_op: An Operation that updates the variables + or applies the specified gradients. + iterations_per_loop: This is the number of train steps running in NPU + system before returning to CPU host for each `Session.run`. + + Returns: + An Operation named IterationOp that executes all its inputs. + """ + if not isinstance(train_op, ops.Operation): + raise ValueError( + "The incoming 'train_op' type is '%s', " + "and the need type is 'Operation'" %(train_op.dtype.name)) + + self._iterations_per_loop_var = create_or_get_var(_ITERATIONS_PER_LOOP_VAR) + self._loop_cond_var = create_or_get_var(_LOOP_COND_VAR) + self._const_zero = create_or_get_var(_CONST_ZERO) + self._const_one = create_or_get_var(_CONST_ONE) + + # Add IterationOp denpend on train_op + group_train_op = tf.group(train_op, name="IterationOp") + + return group_train_op + + def load_iteration_per_loop_var(self, sess, iterations_per_loop=1): + """ + Constructs a load_iteration_per_loop_var. + Args: + sess: A TensorFlow Session that has been created. + iterations_per_loop: This is the number of train steps running in NPU + system before returning to CPU host for each `Session.run`. + """ + check_positive_integer(iterations_per_loop, "iterations_per_loop") + self._iterations_per_loop_var.load(iterations_per_loop - 1, session=sess) + self._loop_cond_var.load(0, session=sess) + self._const_zero.load(0, session=sess) + self._const_one.load(1, session=sess) \ No newline at end of file diff --git a/tf_adapter/python/npu_bridge/estimator/npu_ops.py b/tf_adapter/python/npu_bridge/estimator/npu_ops.py new file mode 100644 index 000000000..ab88bf56c --- /dev/null +++ b/tf_adapter/python/npu_bridge/estimator/npu_ops.py @@ -0,0 +1,250 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Ops for collective operations implemented using hccl.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import threading +import os +import tensorflow as tf + +import numbers +from tensorflow.python.ops import array_ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +# from tensorflow.contrib.offline_train.ops import gen_npu_ops +from tensorflow.contrib.util import loader +from tensorflow.python.eager import context +from tensorflow.python.framework import device +from tensorflow.python.framework import ops +from tensorflow.python.platform import resource_loader +from npu_bridge.estimator.npu.npu_common import NPUBasics + +from npu_bridge.helper import helper +gen_npu_ops = helper.get_gen_ops() + +DEFAULT_GRAPH_SEED = 87654321 +_MAXINT32 = 2**31 - 1 + +def npu_test(): + """A placeholder op for values fed into the TPU simultaneously as a tuple. + + """ + + return gen_npu_ops.npu_test() + +def NPUInit(name=None): + if context.executing_eagerly(): + raise RuntimeError("tf.NPUInit() is not compatible with " + "eager execution.") + + return gen_npu_ops.npu_init(name=name) + +def NPUShutdown(name=None): + """Shutdown a distributed NPU system for use with TensorFlow. + + Args: + name: Name of ops. + + Returns: + The npu init ops which will open the NPU system using `Session.run`. + """ + if context.executing_eagerly(): + raise RuntimeError("tf.NPUShutdown() is not compatible with " + "eager execution.") + + return gen_npu_ops.npu_shutdown(name=name) + +def initialize_system(name = None): + """Initializes a distributed NPU system for use with TensorFlow. + + Args: + name: Name of ops. + + Returns: + The npu init ops which will open the NPU system using `Session.run`. + """ + return NPUInit(name); + + +def shutdown_system(name = None): + """Shuts down a running NPU system.""" + + return NPUShutdown(name) + + +def LARS(inputs_w, inputs_g, weight_decay, hyperpara=0.001, epsilon=0.00001, name=None): + if context.executing_eagerly(): + raise RuntimeError("tf.LARS() is not compatible with " + "eager execution.") + + return gen_npu_ops.lars(inputs_w=inputs_w, inputs_g=inputs_g, weight_decay=weight_decay, hyperpara=hyperpara, epsilon=epsilon, name=name) + +def LARSV2(input_weight, + input_grad, + weight_decay, + learning_rate, + hyperpara=0.001, + epsilon=0.00001, + use_clip=False, + name=None): + if context.executing_eagerly(): + raise RuntimeError("tf.LARSV2() is not compatible with " + "eager execution.") + + return gen_npu_ops.lars_v2(input_weight=input_weight, + input_grad=input_grad, + weight_decay=weight_decay, + learning_rate=learning_rate, + hyperpara=hyperpara, + epsilon=epsilon, + use_clip=use_clip, + name=name) + +def outfeed_dequeue_op(channel_name, output_types, output_shapes, name=None): + return gen_npu_ops.outfeed_dequeue_op(channel_name=channel_name, output_types=output_types, output_shapes=output_shapes, name=name) + +def outfeed_enqueue_op(channel_name, inputs, name=None): + return gen_npu_ops.outfeed_enqueue_op(inputs=inputs, channel_name=channel_name, name=name) + +def stop_outfeed_dequeue_op(channel_name, name=None): + return gen_npu_ops.stop_outfeed_dequeue_op(channel_name, name) + +def _truncate_seed(seed): + return seed % _MAXINT32 # Truncate to fit into 32-bit integer + +def get_seed(op_seed): + global_seed = ops.get_default_graph().seed + + if global_seed is not None: + if op_seed is None: + # pylint: disable=protected-access + op_seed = ops.get_default_graph()._last_id + + seeds = _truncate_seed(global_seed), _truncate_seed(op_seed) + else: + if op_seed is not None: + seeds = DEFAULT_GRAPH_SEED, _truncate_seed(op_seed) + else: + seeds = None, None + # Avoid (0, 0) as the C++ ops interpret it as nondeterminism, which would + # be unexpected since Python docs say nondeterminism is (None, None). + if seeds == (0, 0): + return (0, _MAXINT32) + return seeds + +def _get_noise_shape(x, noise_shape): + # If noise_shape is none return immediately. + if noise_shape is None: + return array_ops.shape(x) + + try: + # Best effort to figure out the intended shape. + # If not possible, let the op to handle it. + # In eager mode exception will show up. + noise_shape_ = tensor_shape.as_shape(noise_shape) + except (TypeError, ValueError): + return noise_shape + + if x.shape.dims is not None and len(x.shape.dims) == len(noise_shape_.dims): + new_dims = [] + for i, dim in enumerate(x.shape.dims): + if noise_shape_.dims[i].value is None and dim.value is not None: + new_dims.append(dim.value) + else: + new_dims.append(noise_shape_.dims[i].value) + return tensor_shape.TensorShape(new_dims) + + return noise_shape + +def dropout(x, keep_prob, noise_shape=None, seed=None, name=None): + """The gradient for `gelu`. + + Args: + x: A tensor with type is float. + keep_prob: A tensor, float, rate of every element reserved. + noise_shape: A 1-D tensor, with type int32, shape of keep/drop what random + generated. + seed: Random seed. + name: Layer name. + + Returns: + A tensor. + """ + if context.executing_eagerly(): + raise RuntimeError("tf.dropout() is not compatible with " + "eager execution.") + x = ops.convert_to_tensor(x, name="x") + if not x.dtype.is_floating: + raise ValueError("x has to be a floating point tensor since it's going to" + " be scaled. Got a %s tensor instead." % x.dtype) + if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1: + raise ValueError("keep_prob must be a scalar tensor or a float in the " + "range (0, 1], got %g" % keep_prob) + if isinstance(keep_prob, float) and keep_prob == 1: + return x + seed, seed2 = get_seed(seed) + noise_shape = _get_noise_shape(x, noise_shape) + gen_out = gen_npu_ops.drop_out_gen_mask(noise_shape, keep_prob, seed, seed2, name) + result = gen_npu_ops.drop_out_do_mask(x, gen_out, keep_prob, name) + return result + +@ops.RegisterGradient("DropOutDoMask") +def _DropOutDoMaskGrad(op, grad): + result = gen_npu_ops.drop_out_do_mask(grad, op.inputs[1], op.inputs[2]) + return [result, None, None] + +def basic_lstm_cell(x, h, c, w, b, keep_prob, forget_bias, state_is_tuple, + activation, name=None): + if context.executing_eagerly(): + raise RuntimeError("tf.basic_lstm_cell() is not compatible with " + "eager execution.") + x = ops.convert_to_tensor(x, name="x") + h = ops.convert_to_tensor(h, name="h") + c = ops.convert_to_tensor(c, name="c") + w = ops.convert_to_tensor(w, name="w") + b = ops.convert_to_tensor(b, name="b") + result = gen_npu_ops.basic_lstm_cell(x, h, c, w, b, keep_prob, forget_bias, state_is_tuple, + activation, name) + return result + +@ops.RegisterGradient("BasicLSTMCell") +def basic_lstm_cell_grad(op, dct, dht, dit, djt, dft, dot, dtanhct): + + dgate, dct_1 = gen_npu_ops.basic_lstm_cell_c_state_grad(op.inputs[2], dht, dct, op.outputs[2], op.outputs[3], op.outputs[4], op.outputs[5], op.outputs[6], forget_bias=op.get_attr("forget_bias"), activation=op.get_attr("activation")) + dw, db = gen_npu_ops.basic_lstm_cell_weight_grad(op.inputs[0], op.inputs[1], dgate) + dxt, dht = gen_npu_ops.basic_lstm_cell_input_grad(dgate, op.inputs[3], keep_prob=op.get_attr("keep_prob")) + + return [dxt, dht, dct_1, dw, db] + +def adam_apply_one_assign(input0, input1, input2, input3, input4, + mul0_x, mul1_x, mul2_x, mul3_x, add2_y, name=None): + if context.executing_eagerly(): + raise RuntimeError("tf.adam_apply_one_assign() is not compatible with " + "eager execution.") + result = gen_npu_ops.adam_apply_one_assign(input0, input1, input2, input3, input4, + mul0_x, mul1_x, mul2_x, mul3_x, add2_y,name) + return result + +def adam_apply_one_with_decay_assign(input0, input1, input2, input3, input4, + mul0_x, mul1_x, mul2_x, mul3_x, mul4_x, add2_y, name=None): + if context.executing_eagerly(): + raise RuntimeError("tf.adam_apply_one_with_decay_assign() is not compatible with " + "eager execution.") + result = gen_npu_ops.adam_apply_one_with_decay_assign(input0, input1, input2, input3, input4, + mul0_x, mul1_x, mul2_x, mul3_x, mul4_x, add2_y, name) + return result diff --git a/tf_adapter/python/npu_bridge/estimator/npu_unary_ops.py b/tf_adapter/python/npu_bridge/estimator/npu_unary_ops.py new file mode 100644 index 000000000..a9b673306 --- /dev/null +++ b/tf_adapter/python/npu_bridge/estimator/npu_unary_ops.py @@ -0,0 +1,46 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""All bert ops.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.util import loader +from tensorflow.python.platform import resource_loader +from tensorflow.python.framework import ops + + +from npu_bridge.helper import helper +npu_unary_ops = helper.get_gen_ops(); + +@ops.RegisterGradient("Gelu") +def _gelu_grad(op, grad): + """The gradient for `gelu`. + + Args: + op: The `gelu` `Operation` that we are differentiating, which we can use + to find the inputs and outputs of the original op. + grad: Gradient with respect to the output of the `gelu` op. + + Returns: + Gradients with respect to the input of `gelu`. + """ + return [npu_unary_ops.gelu_grad(grad, op.inputs[0], op.outputs[0])] # List of one Tensor, since we have one input + +# go/tf-wildcard-import +#from tensorflow.python.util.tf_export import tf_export + diff --git a/tf_adapter/python/npu_bridge/hccl/hccl_ops.py b/tf_adapter/python/npu_bridge/hccl/hccl_ops.py new file mode 100644 index 000000000..e15a4243b --- /dev/null +++ b/tf_adapter/python/npu_bridge/hccl/hccl_ops.py @@ -0,0 +1,103 @@ +## @file hccl_ops.py +# HCCL 算子API + +from tensorflow.contrib.util import loader +from tensorflow.python.framework import load_library +from tensorflow.python.framework import ops +from tensorflow.python.platform import resource_loader +from npu_bridge.helper import helper + +gen_hccl_ops = helper.get_gen_ops(); + + +## 提供group内的集合通信allreduce功能 +# @param tensor tensorflow的tensor类型,allreduce操作的输入; +# @param reduction string类型,reduce的操作类型,可以为”max”,”min”,”prod”和”sum”; +# @param fusion int类型,算子融合标识。0: 不融合;1: 按照梯度切分设置融合,默认融; 2: 按照相同fusion_id融合。 +# @param fusion_id int类型,算子融合索引标识,相同fusion_id的算子将会融合。 +# @param group string类型,group名称,可以为用户自定义group或者"hccl_world_group"; +# @return 对输入tensor执行完allreduce操作之后的结果tensor +def allreduce(tensor, reduction, fusion=1, fusion_id=-1, group="hccl_world_group"): + result = gen_hccl_ops.hcom_all_reduce( + input=tensor, + reduction=reduction, + fusion=fusion, + fusion_id=fusion_id, + group=group) + return result + + +@ops.RegisterGradient('HcomAllReduce') +def _allreduce_grad(op, grad): + return allreduce(grad, "sum", fusion=0) + + +## 提供group内的集合通信allgather功能 +# @param tensor tensorflow的tensor类型,allgather操作的输入; +# @param rank_size int类型,group内device的数量; +# @param group string类型,group名称,可以为用户自定义group或者"hccl_world_group"; +# @return 对输入tensor执行完allgather操作之后的结果tensor +def allgather(tensor, rank_size, group="hccl_world_group"): + result = gen_hccl_ops.hcom_all_gather( + input=tensor, + group=group, + rank_size=rank_size) + return result + + +## 提供group内的集合通信broadcast功能 +# @param tensor tensorflow的tensor类型,broadcast操作的输入; +# @param root_rank int类型,作为root节点的rank_id,该id是group内的rank id; +# @param group string类型,group名称,可以为用户自定义group或者"hccl_world_group"; +# @return 对输入tensor执行完broadcast操作之后的结果tensor +def broadcast(tensor, root_rank, group="hccl_world_group"): + result = gen_hccl_ops.hcom_broadcast( + input=tensor, + group=group, + root_rank=root_rank) + return result + + +## 提供group内的集合通信reduce_scatter功能 +# @param tensor tensorflow的tensor类型,reduce_scatter操作的输入; +# @param reduction string类型,reduce的操作类型,可以为”max”,”min”,”prod”和”sum”; +# @param rank_size int类型,group内device的数量; +# @param group string类型,group名称,可以为用户自定义group或者"hccl_world_group"; +# @return 对输入tensor执行完reduce_scatter操作之后的结果tensor +def reduce_scatter(tensor, reduction, rank_size, group="hccl_world_group"): + result = gen_hccl_ops.hcom_reduce_scatter( + input=tensor, + reduction=reduction, + group=group, + rank_size=rank_size) + return result + + +## 提供group内的集合通信send功能 +# @param tensor tensorflow的tensor类型,send操作的输入; +# @param sr_tag int类型,消息标签,相同sr_tag的send/recv对可以收发数据; +# @param dest_rank int类型,数据的目标节点,该rank是group中的rank id; +# @param group string类型,group名称,可以为用户自定义group或者"hccl_world_group"; +def send(tensor, sr_tag, dest_rank, group="hccl_world_group"): + result = gen_hccl_ops.hcom_send( + input=tensor, + group=group, + sr_tag=sr_tag, + dest_rank=dest_rank) + return result + + +## 提供group内的集合通信receive功能 +# @param shape 接收tensor的shape; +# @param data_type 接收tensor的数据类型; +# @param sr_tag int类型,消息标签,相同sr_tag的send/recv对可以收发数据; +# @param dest_rank int类型,数据的目标节点,该rank是group中的rank id; +# @param group string类型,group名称,可以为用户自定义group或者"hccl_world_group"; +def receive(shape, data_type, sr_tag, src_rank, group="hccl_world_group"): + result = gen_hccl_ops.hcom_receive( + shape=shape, + T=data_type, + group=group, + sr_tag=sr_tag, + src_rank=src_rank) + return result diff --git a/tf_adapter/python/npu_bridge/helper/helper.py b/tf_adapter/python/npu_bridge/helper/helper.py new file mode 100644 index 000000000..64774fbd2 --- /dev/null +++ b/tf_adapter/python/npu_bridge/helper/helper.py @@ -0,0 +1,11 @@ +import tensorflow +import npu_bridge +import os +try: + npu_bridge_handle = tensorflow.load_op_library(os.path.dirname(npu_bridge.__file__) + "/_tf_adapter.so") +except Exception as e: + print(str(e)) +def get_gen_ops(): + return npu_bridge_handle + +version = 'v1.15.0' \ No newline at end of file diff --git a/tf_adapter/python/npu_bridge/image/image_ops.py b/tf_adapter/python/npu_bridge/image/image_ops.py new file mode 100644 index 000000000..ecb7c6ccd --- /dev/null +++ b/tf_adapter/python/npu_bridge/image/image_ops.py @@ -0,0 +1,33 @@ +from tensorflow.contrib.util import loader +from tensorflow.python.framework import load_library +from tensorflow.python.platform import resource_loader + +from npu_bridge.helper import helper +gen_npu_image_ops = helper.get_gen_ops(); + +def decode_and_resize_jpeg(image, size): + """Decode and resize JPEG-encoded image. + + Args: + image: The JPEG-encoded image. + size: A 1-D int32 Tensor of 2 elements: new_height, new_width. + The new size for the images. + + Returns: + Resized image, a 3-D uint8 tensor: [new_height, new_width, channel=3] . + """ + return gen_npu_image_ops.decode_and_resize_jpeg(image, size) + +def decode_and_crop_and_resize_jpeg(image, crop_size, size): + """Decode, crop and resize JPEG-encoded image. + + Args: + image: The JPEG-encoded image. + crop_size: A 1-D int32 Tensor of 4 elements: [y_min, x_min, crop_height, crop_width]. + size: A 1-D int32 Tensor of 2 elements: new_height, new_width. + The new size for the images. + + Returns: + Cropped and Resized image, a 3-D uint8 tensor: [new_height, new_width, channel=3]. + """ + return gen_npu_image_ops.decode_and_crop_and_resize_jpeg(image, crop_size, size) \ No newline at end of file diff --git a/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py new file mode 100644 index 000000000..391241c46 --- /dev/null +++ b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py @@ -0,0 +1,22 @@ +from tensorflow.contrib.util import loader +from tensorflow.python.framework import load_library +from tensorflow.python.framework import ops +from tensorflow.python.platform import resource_loader +from npu_bridge.helper import helper + +gen_npu_cpu_ops = helper.get_gen_ops(); + + +## 提供embeddingrankid功能 +# @param addr_tensor tensorflow的tensor类型,embeddingrankid操作的输入; +# @param index tensorflow的tensor类型,embeddingrankid操作的输入; +# @param row_memory int类型,一行数据存储的大小 默认为320。 +# @param mode string类型,embeddingrankid的操作类型,可以为”mod”,”order”;数据存储的方式。 +# @return 对输入addr_tensor,index_tensor执行完embeddingrankid操作之后的结果tensor +def embeddingrankid(addr_tensor, index, row_memory=320, mode='mod'): + result = gen_npu_cpu_ops.embedding_rank_id( + addr_table=addr_tensor, + index=index, + row_memory=row_memory, + mode=mode) + return result diff --git a/tf_adapter/python/setup.py b/tf_adapter/python/setup.py new file mode 100644 index 000000000..6c76feae7 --- /dev/null +++ b/tf_adapter/python/setup.py @@ -0,0 +1,54 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""npu bridge for tensorflow v1.15.0. + + +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import fnmatch +import os +import re +import sys + +from setuptools import Command +from setuptools import find_packages +from setuptools import setup +from setuptools.command.install import install as InstallCommandBase +from setuptools.dist import Distribution + +DOCLINES = __doc__.split('\n') + +# This version string is semver compatible, but incompatible with pip. +# For pip, we will remove all '-' characters from this string, and use the +# result for pip. +_VERSION = '1.15.0' + +setup( + name='npu_bridge', + version=_VERSION, + description=DOCLINES[0], + long_description='\n'.join(DOCLINES[2:]), + author='HuaWei Inc.', + # Contained modules and scripts. + packages=find_packages(), + #install_requires=REQUIRED_PACKAGES, + # Add in any packaged data. + include_package_data=True, + keywords='tensorflow tensor machine learning', +) diff --git a/tf_adapter/swig/ge_plugin.i b/tf_adapter/swig/ge_plugin.i new file mode 100644 index 000000000..a19ec8628 --- /dev/null +++ b/tf_adapter/swig/ge_plugin.i @@ -0,0 +1,22 @@ +%module tf_adapter +%include "std_string.i" +%include "std_map.i" + +namespace std{ + %template(map_string_string) map; +} + +%{ +#include "tf_adapter/util/npu_plugin.h" +%} + +extern const char* const AUTO_TUNE_MODE; +extern const char* const OP_DEBUG_LEVEL; +extern const char* const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES; +extern const char* const OPTION_EXEC_PROFILING_MODE; +extern const char* const OPTION_EXEC_PROFILING_OPTIONS; +extern const char* const OPTION_GRAPH_RUN_MODE; + +extern void PluginInit(std::map& init_options); + +extern void PluginFinalize(); diff --git a/tf_adapter/tf_adapter.bzl b/tf_adapter/tf_adapter.bzl new file mode 100644 index 000000000..cccadbda5 --- /dev/null +++ b/tf_adapter/tf_adapter.bzl @@ -0,0 +1,48 @@ +# Custom function defs +load("@com_google_protobuf//:protobuf.bzl", "proto_gen") +def _proto_cc_hdrs(srcs, use_grpc_plugin = False): + return [s[:-len(".proto")] + ".pb.h" for s in srcs] + +def _proto_cc_srcs(srcs, use_grpc_plugin = False): + return [s[:-len(".proto")] + ".pb.cc" for s in srcs] + +def cc_proto_library( + name, + srcs = [], + deps = [], + cc_libs = [], + include = None, + protoc = "@com_google_protobuf//:protoc", + **kargs): + includes = [] + if include != None: + includes = [include] + gen_srcs = _proto_cc_srcs(srcs) + gen_hdrs = _proto_cc_hdrs(srcs) + outs = gen_srcs + gen_hdrs + proto_gen( + name = name + "_genproto", + srcs = srcs, + deps = [s + "_genproto" for s in deps], + includes = includes, + protoc = protoc, + gen_cc = 1, + outs = outs, + ) + + header_only_name = name + "_headers_only" + impl_name = name + "_impl" + + native.cc_library( + name = impl_name, + srcs = gen_srcs, + hdrs = gen_hdrs, + deps = cc_libs + deps, + includes = includes, + **kargs + ) + native.cc_library( + name = header_only_name, + hdrs = gen_hdrs, + **kargs + ) \ No newline at end of file diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc new file mode 100644 index 000000000..1cc3990a5 --- /dev/null +++ b/tf_adapter/util/ge_plugin.cc @@ -0,0 +1,203 @@ +/** +* Copyright (C) <2019> . All Rights Reserved. +* Description: a plugin control GEInitialize and GEFinalize +*/ + +#include "tf_adapter/util/npu_plugin.h" +#include +#include "tf_adapter/common/common.h" +#include "tf_adapter/util/npu_attrs.h" +#include "tdt/tsd_client.h" +#include "tdt/tdt_host_interface.h" +#include "ge/ge_api.h" +#include "ge/ge_api_types.h" +#include "framework/common/types.h" +#include "framework/common/ge_inner_error_codes.h" +#include "framework/omg/parser/model_parser.h" +#include "framework/omg/parser/parser_factory.h" +#include "framework/omg/parser/parser_api.h" +#include "tensorflow/core/util/env_var.h" + +using namespace tensorflow; +using namespace tdt; +constexpr int kFatalSleepTime = 3000; +namespace { +inline string ToString(ge::Status status) { + return ::ge::StatusFactory::Instance()->GetErrDesc(status); +} +} + +GePlugin::GePlugin() + + : device_id_(0), + isInit_(false), + isGlobal_(false) { + LOG(INFO) << "[GePlugin] new constructor"; +} + +GePlugin::~GePlugin() { + LOG(INFO) << "[GePlugin] destory constructor begin"; + Finalize(); + LOG(INFO) << "[GePlugin] destory constructor end"; +} + +GePlugin* GePlugin::GetInstance() { + static GePlugin instance; + return &instance; +} + +void GePlugin::Init(std::map& init_options, bool is_global) { + std::lock_guard lock(mutex_); + if (isInit_) { + LOG(INFO) << "[GePlugin] Ge has already initialized"; + return; + } + + LOG(INFO) << "[GePlugin] graph run mode : " << init_options[ge::OPTION_GRAPH_RUN_MODE]; + // prepare options for ge Initialize + + const int64 kMaxDeviceID = 7; + (void)ReadInt64FromEnvVar("DEVICE_ID", 0, &device_id_); + if (device_id_ < 0 || device_id_ > kMaxDeviceID) { + LOG(WARNING) << "[GePlugin] device_id should in [0, 7]. use default device id : 0."; + } + init_options[ge::OPTION_EXEC_DEVICE_ID] = std::to_string(device_id_); + + const char *env_job_id = std::getenv("JOB_ID"); + if (env_job_id != nullptr) { + init_options[ge::OPTION_EXEC_JOB_ID] = env_job_id; + } else { + LOG(WARNING) << "[GePlugin] can not find Environment variable : JOB_ID"; + } + + int64 rankSizeNum = 1; + (void)ReadInt64FromEnvVar("RANK_SIZE", 1, &rankSizeNum); + if (rankSizeNum > UINT32_MAX) { + rankSizeNum = UINT32_MAX; + LOG(WARNING) << "[GePlugin] RANK_SIZE is larger than UINT32_MAX, set to UINT32_MAX."; + } + + bool is_use_hcom = false; + bool deploy_mode = false; + char *env_rank_id = std::getenv("RANK_ID"); + char *env_pod_name = std::getenv("POD_NAME"); + char *env_rank_table_file = std::getenv("RANK_TABLE_FILE"); + if ((env_rank_table_file != nullptr) && (rankSizeNum > 0)) { + LOG(INFO) << "[GePlugin] env RANK_TABLE_FILE:" << env_rank_table_file; + is_use_hcom = true; + init_options[ge::OPTION_EXEC_RANK_TABLE_FILE] = env_rank_table_file; + if (env_pod_name != nullptr) { + deploy_mode = true; + init_options[ge::OPTION_EXEC_POD_NAME] = env_pod_name; + } else if (env_rank_id != nullptr) { + LOG(INFO) << "[GePlugin] env RANK_ID:" << env_rank_id; + deploy_mode = false; + init_options[ge::OPTION_EXEC_RANK_ID] = env_rank_id; + } else { + LOG(ERROR) << "[GePlugin] Can't find rank_id or pod_name in env."; + } + } + + init_options[ge::OPTION_EXEC_IS_USEHCOM] = std::to_string(is_use_hcom); + init_options[ge::OPTION_EXEC_DEPLOY_MODE] = std::to_string(deploy_mode); + + // tailing optimization + LOG(INFO) << "[GePlugin] is_tailing_optimization : " << init_options["ge.exec.isTailingOptimization"]; + + // profiling configuration + LOG(INFO) << "[GePlugin] profiling_mode : " << init_options[ge::OPTION_EXEC_PROFILING_MODE] + << ", profiling_options:" << init_options[ge::OPTION_EXEC_PROFILING_OPTIONS]; + + // mix precision configuration + LOG(INFO) << "[GePlugin] precision_mode : " << init_options[ge::PRECISION_MODE]; + + // auto tune configuration + LOG(INFO) << "[GePlugin] auto_tune_mode : " << init_options[ge::AUTO_TUNE_MODE]; + + // debug configuration + LOG(INFO) << "[GePlugin] op_debug_level : " << init_options[ge::OP_DEBUG_LEVEL]; + + // scope fusion configuration + LOG(INFO) << "[GePlugin] enable_scope_fusion_passes : " << init_options[ge::OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES]; + + // Open TsdClient first, then call GEInitialize + LOG(INFO) << "[GePlugin] Open TsdClient and Init tdt host."; + int32_t ret = tdt::TdtHostInit(static_cast(device_id_)); + if (ret != 0) { + std::this_thread::sleep_for(std::chrono::milliseconds(kFatalSleepTime)); + LOG(FATAL) << "[GePlugin] Tdt host init failed, tdt error code : " << ret; + } + TDT_StatusT tdt_status = TsdOpen(static_cast(device_id_), static_cast(rankSizeNum)); + if (tdt_status != TDT_OK) { + std::this_thread::sleep_for(std::chrono::milliseconds(kFatalSleepTime)); + LOG(FATAL) << "[GePlugin] Open TsdClient failed, tdt error code : " << tdt_status + << ", error message : " << TDT_GET_ERROR_STR(tdt_status); + } + LOG(INFO) << "[GePlugin] Open TsdClient success and tdt host init success."; + + // ge Initialize + ge::Status status = ge::GEInitialize(init_options); + if (status != ge::SUCCESS) { + std::this_thread::sleep_for(std::chrono::milliseconds(kFatalSleepTime)); + LOG(FATAL) << "[GePlugin] Initialize ge failed, ret : " << ToString(status); + } + LOG(INFO) << "[GePlugin] Initialize ge success."; + + // parser Initialize + ge::Status status_parser = ge::ParserInitialize(init_options); + if (status_parser != ge::SUCCESS) { + std::this_thread::sleep_for(std::chrono::milliseconds(kFatalSleepTime)); + LOG(FATAL) << "[GePlugin] Initialize parser failed, ret : " << ToString(status_parser); + } + LOG(INFO) << "[GePlugin] Initialize parser success."; + isInit_ = true; + isGlobal_ = is_global; +} + +void GePlugin::Finalize() { + std::lock_guard lock(mutex_); + if (!isInit_) { + LOG(INFO) << "[GePlugin] Ge has already finalized."; + return; + } + + // ge finalize + ge::Status status = ge::GEFinalize(); + if (status != ge::SUCCESS) { + LOG(ERROR) << "[GePlugin] GE finalize failed, ret : " << ToString(status); + } + + // parser finalize + ge::Status status_parser = ge::ParserFinalize(); + if (status_parser != ge::SUCCESS) { + LOG(ERROR) << "[GePlugin] Parser finalize failed, ret : " << ToString(status); + } + + LOG(INFO) << "[GePlugin] Close TsdClient and destroy tdt."; + int32_t ret = tdt::TdtHostDestroy(); + if (ret != 0) { + LOG(ERROR) << "[GePlugin] Close tdt failed, tdt_ret : " << ret; + } + TDT_StatusT tdt_status = TsdClose(device_id_); + if (tdt_status != TDT_OK) { + LOG(ERROR) << "[GePlugin] Close TsdClient failed, tdt_ret : " << tdt_status; + } else { + LOG(INFO) << "[GePlugin] Close TsdClient success."; + } + isInit_ = false; +} + +bool GePlugin::IsGlobal() { + std::lock_guard lock(mutex_); + return isGlobal_; +} + +void PluginInit(std::map& init_options) { + GePlugin::GetInstance()->Init(init_options, true); + LOG(INFO) << "npu plugin init success"; +} + +void PluginFinalize() { + GePlugin::GetInstance()->Finalize(); + LOG(INFO) << "npu plugin finalize success"; +} diff --git a/tf_adapter/util/ge_plugin.h b/tf_adapter/util/ge_plugin.h new file mode 100644 index 000000000..506a83cbf --- /dev/null +++ b/tf_adapter/util/ge_plugin.h @@ -0,0 +1,38 @@ +/** +* Copyright (C) <2019> . All Rights Reserved. +* Description: a plugin control GEInitialize and GEFinalize +*/ +#ifndef TENSORFLOW_GE_PLUGIN_H_ +#define TENSORFLOW_GE_PLUGIN_H_ + +#include +#include +#include +#include "tensorflow/core/platform/types.h" +using tensorflow::int64; + +// Sigleton class for manage the relationship between +// tf session and ge session +class GePlugin { + public: + static GePlugin* GetInstance(); + + void Init(std::map& init_options, bool is_global = false); + + void Finalize(); + + bool IsGlobal(); + private: + GePlugin(); + + ~GePlugin(); + + GePlugin(GePlugin &) = delete; + + int64 device_id_; + bool isInit_; + bool isGlobal_; + std::map init_options_; + std::mutex mutex_; +}; +#endif diff --git a/tf_adapter/util/infershape_util.cc b/tf_adapter/util/infershape_util.cc new file mode 100644 index 000000000..06dd3a80a --- /dev/null +++ b/tf_adapter/util/infershape_util.cc @@ -0,0 +1,441 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/node_def_util.h" +#include "tf_adapter/util/infershape_util.h" +#include "tf_adapter/util/npu_ops_identifier.h" +#include "tf_adapter/common/common.h" + +namespace tensorflow { +struct EdgeInfo { + EdgeInfo(Node *src, Node *dst, int src_output, int dst_input) + : src_(src), dst_(dst), src_output_(src_output), dst_input_(dst_input) {} + + Node *src_; + Node *dst_; + int src_output_; + int dst_input_; +}; + +int64 InferShapeUtil::GetCurrentTimestap() { + struct timeval tv; + int ret = gettimeofday(&tv, nullptr); + if (ret != 0) { + LOG(ERROR) << "Func gettimeofday may failed, ret:" << ret; + return 0; + } + int64 totalUsec = tv.tv_usec + tv.tv_sec * 1000000; + return totalUsec; +} + +Status InferShapeUtil::setArgShapeFromTensorShape(std::vector vecTensor, + Graph *graph, + const OpDef &sig, + ShapeRefiner &shapeRef) { + REQUIRES_NOT_NULL(graph); + int idx = 0; + for (const OpDef::ArgDef &arg_def : sig.input_arg()) { + for (Node *pNode : graph->nodes()) { + REQUIRES_NOT_NULL(pNode); + if (pNode->name().compare(arg_def.name()) == 0) { + TF_RETURN_IF_ERROR( + shapeRef.AddNode(pNode)); // here the arg node must add succ + tensorflow::shape_inference::InferenceContext *pCxt = + shapeRef.GetContext(pNode); + if (pCxt == nullptr) // this is a protect + { + return errors::Internal("The InferenceContext of node ", + pNode->name(), " is null, add node failed."); + } + + tensorflow::shape_inference::ShapeHandle shapeHandle; + pCxt->MakeShapeFromTensorShape(vecTensor[idx].shape(), &shapeHandle); + pCxt->set_output(0, shapeHandle); // this arg has only one output + idx++; + break; // next arg + } + } + } + + return Status::OK(); +} + +Status InferShapeUtil::getSubGraphFromFunctionDef(const FunctionDef &func_def, + Graph *graph) { + LOG(INFO) << "The signature name of FunctionDef is " + << func_def.signature().name() << "."; + InstantiationResult result; + AttrSlice attrs(&func_def.attr()); + TF_RETURN_IF_ERROR( + InstantiateFunction(func_def, attrs, + [](const string &op, const OpDef **sig) { + return OpRegistry::Global()->LookUpOpDef(op, sig); + }, + &result)); + + GraphConstructorOptions opts; + opts.allow_internal_ops = true; + opts.expect_device_spec = false; + TF_RETURN_IF_ERROR(ConvertNodeDefsToGraph(opts, result.nodes, graph)); + return Status::OK(); +} + +bool InferShapeUtil::IsInitializedGraph(Node *node) { + Node *logical_not_node = nullptr; + node->input_node(0, &logical_not_node); + if (logical_not_node == nullptr) { + return false; + } + + if (logical_not_node->type_string() == "Reshape") { + Node *reshape_node = logical_not_node; + reshape_node->input_node(0, &logical_not_node); + if (logical_not_node == nullptr) { + return false; + } + } + if (logical_not_node->type_string() != "LogicalNot") { + return false; + } + + Node *stack_node = nullptr; + logical_not_node->input_node(0, &stack_node); + if (stack_node == nullptr || stack_node->type_string() != "Pack") { + return false; + } + + Node *is_var_init_node = nullptr; + stack_node->input_node(0, &is_var_init_node); + if (is_var_init_node == nullptr) { + return false; + } + + if (is_var_init_node->type_string() == "VarIsInitializedOp" || + is_var_init_node->type_string() == "IsVariableInitialized") { + LOG(INFO) << "GEOP::IsInitializedGraph"; + return true; + } + + return false; +} + +Status InferShapeUtil::getInputShapesOfNode( + ShapeRefiner &shapeRef, Node *pNode, + std::vector &inputShapeVec) { + REQUIRES_NOT_NULL(pNode); + for (const Edge *pEdge : pNode->in_edges()) { + REQUIRES_NOT_NULL(pEdge); + if (pEdge->IsControlEdge()) { + continue; + } + + Node *pNodeIn = pEdge->src(); + tensorflow::shape_inference::InferenceContext *pCxtIn = + shapeRef.GetContext(pNodeIn); + if (pCxtIn == nullptr) { + return errors::Internal("Can't get context of the input ", + pNodeIn->name(), " of the node ", pNode->name(), + "."); + } + + int iDstInput = pEdge->dst_input(); + inputShapeVec[iDstInput] = pCxtIn->output(pEdge->src_output()); + } + + return Status::OK(); +} + +void InferShapeUtil::setShapeOfEnterOP(ShapeRefiner &shapeRef, Node *pNode) { + CHECK_NOT_NULL(pNode); + tensorflow::shape_inference::InferenceContext *pCxt = + shapeRef.GetContext(pNode); + CHECK_NOT_NULL(pCxt); + tensorflow::shape_inference::ShapeHandle shapeOutOne = + pCxt->output(0); // Enter has only one output + if (pCxt->DebugString(shapeOutOne).find("?") == + std::string::npos) // Enter op has shape + { + return; + } + + int iInputNums = pNode->num_inputs(); // Enter has only one input + if (iInputNums != 1) { + LOG(ERROR) << "Node " << pNode->name() << ", type is " << pNode->type_string() + << ", must has only one input, but now=" << iInputNums; + return; + } + std::vector inputShapes(iInputNums); + + (void) getInputShapesOfNode(shapeRef, pNode, inputShapes); + + pCxt->set_output(0, inputShapes.at(0)); // Enter op can't be unknown shape. +} + +void InferShapeUtil::setShapeOfMergeOP(ShapeRefiner &shapeRef, Node *pNode) { + CHECK_NOT_NULL(pNode); + tensorflow::shape_inference::InferenceContext *pCxt = + shapeRef.GetContext(pNode); + CHECK_NOT_NULL(pCxt); + tensorflow::shape_inference::ShapeHandle shapeOutOne = + pCxt->output(0); // Set Ref/Merge first output + if (pCxt->DebugString(shapeOutOne).find("?") == + std::string::npos) // Ref/Merge op has shape + { + return; + } + + for (const Edge *e : pNode->in_edges()) { + CHECK_NOT_NULL(e); + if (e->IsControlEdge()) continue; + if (e->dst_input() < 0) continue; + + if (e->src()->type_string() == "Enter" || e->src()->type_string() == "RefEnter") { + Node *pNodeIn = e->src(); + tensorflow::shape_inference::InferenceContext *pCxtIn = + shapeRef.GetContext(pNodeIn); + if (pCxtIn == nullptr) { + LOG(ERROR) << "Can't get context of the input " << pNodeIn->name() + << " of the node " << pNode->name() << "."; + return; + } + pCxt->set_output(0, pCxtIn->output(e->src_output())); + return; + } + } + return; +} + +void InferShapeUtil::setShapeOfBroadcastGradientArgsOP(ShapeRefiner &shapeRef, + Node *pNode) { + CHECK_NOT_NULL(pNode); + int iInputNums = pNode->num_inputs(); + std::vector inputShapes(iInputNums); + + (void) getInputShapesOfNode(shapeRef, pNode, inputShapes); + + tensorflow::shape_inference::InferenceContext *pCxt = + shapeRef.GetContext(pNode); + CHECK_NOT_NULL(pCxt); + int64 maxDimVal = pCxt->Value(pCxt->Dim(inputShapes.at(0), 0)); + int iMaxDimIndex = 0; + for (int i = 1; i < iInputNums; i++) { + const int64 secondValue = pCxt->Value(pCxt->Dim(inputShapes.at(i), 0)); + if (secondValue > maxDimVal) { + iMaxDimIndex = i; + maxDimVal = secondValue; + } + } + + std::vector shapeVec; + int iOutputNums = pNode->num_outputs(); + for (int i = 0; i < iOutputNums; i++) { + TensorShapeProto proto; + if (pCxt->DebugString(pCxt->output(i)).find("?") != + std::string::npos) // the shape of this output has ? + { + pCxt->ShapeHandleToProto(inputShapes[iMaxDimIndex], &proto); + LOG(INFO) << "Node name " << pNode->name() << " add attr shape " + << pCxt->DebugString(inputShapes[iMaxDimIndex]); + } else { + pCxt->ShapeHandleToProto(pCxt->output(i), &proto); + } + shapeVec.push_back(proto); + } + + pNode->AddAttr(KEY_SHAPE, gtl::ArraySlice(shapeVec)); +} + +void InferShapeUtil::setShapeOfReshapeOP(ShapeRefiner &shapeRef, Node *pNode) { + CHECK_NOT_NULL(pNode); + tensorflow::shape_inference::InferenceContext *pCxt = + shapeRef.GetContext(pNode); + CHECK_NOT_NULL(pCxt); + if (pCxt->DebugString(pCxt->output(0)).find("?") == std::string::npos) { + return; + } + + std::vector inShapes( + pNode->num_inputs()); + (void)getInputShapesOfNode(shapeRef, pNode, inShapes); + + if (pCxt->DebugString(inShapes[0]).find("?") == std::string::npos) { + TensorShapeProto proto; + pCxt->ShapeHandleToProto(inShapes[0], &proto); + pNode->AddAttr(KEY_SHAPE, proto); // Reshape has only one output + LOG(INFO) << "Node name " << pNode->name() << " add attr shape " + << pCxt->DebugString(inShapes[0]); + } +} + +void InferShapeUtil::inferShapeOfGraph(const Graph *graph, + ShapeRefiner &shapeRef, int iTime) { + CHECK_NOT_NULL(graph); + for (Node *pNode : graph->nodes()) { + CHECK_NOT_NULL(pNode); + if (pNode->type_string() == "NoOp" || + shapeRef.GetContext(pNode) != nullptr) { + continue; + } + + Status addStatus = shapeRef.AddNode(pNode); + if (!addStatus.ok()) { + if (iTime != INFER_SHAPE_FIRST_TIME) { + LOG(WARNING) << "AddNode failed, errormsg is " + << addStatus.error_message() << "."; + } + continue; + } else if (iTime == INFER_SHAPE_FIRST_TIME && + pNode->type_string() == "Enter") { + setShapeOfEnterOP(shapeRef, pNode); + } else if ((iTime == INFER_SHAPE_FIRST_TIME) && + ((pNode->type_string() == "Merge") || (pNode->type_string() == "RefMerge"))) { + setShapeOfMergeOP(shapeRef, pNode); + } + } +} + +void InferShapeUtil::printGraphShape(ShapeRefiner &shapeRef, Graph *graph) { + CHECK_NOT_NULL(graph); + shape_inference::InferenceContext *pCxt = nullptr; + int iOutNums; + for (Node *pNode : graph->nodes()) { + CHECK_NOT_NULL(pNode); + pCxt = shapeRef.GetContext(pNode); + if (pCxt == nullptr) { + continue; + } + iOutNums = pCxt->num_outputs(); + if (iOutNums <= 0) { + LOG(INFO) << "Node " << pNode->name() << " has none outputs."; + return; + } + for (int i = 0; i < iOutNums; i++) { + tensorflow::shape_inference::ShapeHandle shape = pCxt->output(i); + string strShape = pCxt->DebugString(shape); + LOG(INFO) << "The shape of node " << pNode->name() << " output " << i << " is " << strShape; + } + } + return; +} + +Status InferShapeUtil::addShapeToAttr(ShapeRefiner &shapeRef, Node *pNode) { + REQUIRES_NOT_NULL(pNode); + shape_inference::InferenceContext *pCxt = shapeRef.GetContext(pNode); + if (pCxt == nullptr) { + LOG(WARNING) << "The InferenceContext of node " << pNode->name() + << " is null."; + return Status::OK(); + } + + int iOutNums = pCxt->num_outputs(); + if (iOutNums <= 0) { + return Status::OK(); + } + + AttrSlice attrList = pNode->attrs(); + if (attrList.Find(KEY_SHAPE) != nullptr) { + LOG(INFO) << "Node " << pNode->name() + << " already has omop_shape attribute."; + return Status::OK(); + } + + std::vector shapeVec; + for (int i = 0; i < iOutNums; i++) { + tensorflow::shape_inference::ShapeHandle shape = pCxt->output(i); + TensorShapeProto proto; + pCxt->ShapeHandleToProto(shape, &proto); + shapeVec.push_back(proto); + + string strShape = pCxt->DebugString(shape); + if (strShape.find("?") != std::string::npos) { + LOG(WARNING) << "The shape of node " << pNode->name() << " output " << i + << " is " << strShape << ", unknown shape."; + + auto identifier = NpuOpsIdentifier::GetInstance(false); + if (identifier->IsPerformanceSensitive(pNode->type_string())) { + return errors::Internal("Node ", pNode->name(), " output ", i, + " shape is ", strShape, ", type is ", pNode->type_string(), + ", performance sensitive op shouldn't has unknown shape."); + } + } + } + + pNode->AddAttr(KEY_SHAPE, gtl::ArraySlice(shapeVec)); + return Status::OK(); +} + +Status InferShapeUtil::InferShape(std::vector vecTensor, + const FunctionLibraryDefinition *flib_def, + const FunctionDef *func_def, Graph *graph) { + REQUIRES_NOT_NULL(graph); + REQUIRES_NOT_NULL(func_def); + LOG(INFO) << "InferShapeUtil::InferShape"; + int iTensorNums = vecTensor.size(); + const OpDef &sig = func_def->signature(); + int iInputArgNums = sig.input_arg_size(); + if (iTensorNums < iInputArgNums) { + return errors::Internal("Input tensor num ", iTensorNums, + " is less than arg num ", iInputArgNums, "."); + } + + TF_RETURN_IF_ERROR(getSubGraphFromFunctionDef(*func_def, graph)); + + // Control flow loops in the graph; we have to break them. + std::vector NextIterationEdges; + std::unordered_set needRemoveEdges; + for (Node *pNode : graph->nodes()) { + REQUIRES_NOT_NULL(pNode); + if ((pNode->type_string() != "Merge") && (pNode->type_string() != "RefMerge")) continue; + + needRemoveEdges.clear(); + for (const Edge *e : pNode->in_edges()) { + REQUIRES_NOT_NULL(e); + if (e->IsControlEdge()) continue; + if (e->dst_input() < 0) continue; + + LOG(INFO) << "in_edges: " << e->src()->name() << " --> " << pNode->name(); + if ((e->src()->type_string() == "NextIteration") || (e->src()->type_string() == "RefNextIteration")) { + EdgeInfo edgeInfo(e->src(), pNode, e->src_output(), e->dst_input()); + NextIterationEdges.push_back(edgeInfo); + needRemoveEdges.insert(e); + } + } + for (auto iter = needRemoveEdges.begin(); iter != needRemoveEdges.end(); ++iter) { + graph->RemoveEdge(*iter); // Use Enter replace NextIteration. + } + } + + ShapeRefiner shapeRefinerSub(graph->versions(), graph->op_registry()); + shapeRefinerSub.set_require_shape_inference_fns(false); + shapeRefinerSub.set_disable_constant_propagation(true); + + TF_RETURN_IF_ERROR( + setArgShapeFromTensorShape(vecTensor, graph, sig, shapeRefinerSub)); + inferShapeOfGraph(graph, shapeRefinerSub, INFER_SHAPE_FIRST_TIME); + inferShapeOfGraph(graph, shapeRefinerSub, INFER_SHAPE_OTHER_TIME); + + for (Node *pNode : graph->nodes()) { + TF_RETURN_IF_ERROR(addShapeToAttr(shapeRefinerSub, pNode)); + } + + for (auto &edgeInfo : NextIterationEdges) { + graph->AddEdge(edgeInfo.src_, edgeInfo.src_output_, edgeInfo.dst_, edgeInfo.dst_input_); + } + + LOG(INFO) << "InferShapeUtil::InferShape success"; + return Status::OK(); +} +} // namespace tensorflow diff --git a/tf_adapter/util/infershape_util.h b/tf_adapter/util/infershape_util.h new file mode 100644 index 000000000..3bcbaaff0 --- /dev/null +++ b/tf_adapter/util/infershape_util.h @@ -0,0 +1,76 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_OFFLINE_TRAIN_UTIL_INFERSHAPE_H_ +#define TENSORFLOW_CONTRIB_OFFLINE_TRAIN_UTIL_INFERSHAPE_H_ + +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/shape_refiner.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_reference.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/graph/graph_constructor.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/public/session_options.h" +#include + +namespace tensorflow { +const std::string KEY_SHAPE = "geop_shape"; + +class InferShapeUtil { + public: + static Status InferShape(std::vector vecTensor, + const FunctionLibraryDefinition *flib_def, + const FunctionDef *func_def, Graph *graph); + + static Status getSubGraphFromFunctionDef(const FunctionDef &func_def, Graph *graph); + + static int64 GetCurrentTimestap(); + static bool IsInitializedGraph(Node *node); + + static const int INFER_SHAPE_FIRST_TIME = 0; + static const int INFER_SHAPE_OTHER_TIME = 1; + + private: + static Status setArgShapeFromTensorShape(std::vector vecTensor, + Graph *graph, const OpDef &sig, + ShapeRefiner &shapeRef); + + static Status getInputShapesOfNode( + ShapeRefiner &shapeRef, Node *pNode, + std::vector &inputShapeVec); + + static void setShapeOfEnterOP(ShapeRefiner &shapeRef, Node *pNode); + + static void setShapeOfMergeOP(ShapeRefiner &shapeRef, Node *pNode); + + static void setShapeOfBroadcastGradientArgsOP(ShapeRefiner &shapeRef, + Node *pNode); + + static void setShapeOfReshapeOP(ShapeRefiner &shapeRef, Node *pNode); + + static void inferShapeOfGraph(const Graph *graph, ShapeRefiner &shapeRef, + int iTime); + + static Status addShapeToAttr(ShapeRefiner &shapeRef, Node *pNode); + + static void printGraphShape(ShapeRefiner &shapeRef, Graph *graph); +}; +} // namespace tensorflow +#endif // TENSORFLOW_CONTRIB_OFFLINE_TRAIN_UTIL_INFERSHAPE_H_ diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc new file mode 100644 index 000000000..d624b7ffe --- /dev/null +++ b/tf_adapter/util/npu_attrs.cc @@ -0,0 +1,782 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tf_adapter/util/npu_attrs.h" +#include "securec.h" +#include +#include + +namespace tensorflow { + +inline void split(const std::string& s, std::vector& result, + const char* delchar = " ") { + if (s.empty()) { + return; + } + result.clear(); + char* buffer = new char[s.size() + 1]; + buffer[s.size()] = '\0'; + errno_t e = strcpy_s(buffer, s.size() + 1, s.c_str()); + if (e != EOK) { + delete[] buffer; + return; + } + char *pTmp = NULL; + char* p = strtok_s(buffer, delchar, &pTmp); + do { + result.push_back(p); + } while ((p = strtok_s(NULL, delchar, &pTmp))); + delete[] buffer; + return; +} + +inline bool checkProfilingOptions(string& options) { + if (options.empty()) { + return false; + } + std::set validOptions; + validOptions.insert("training_trace"); + validOptions.insert("task_trace"); + validOptions.insert("op_trace"); + + std::vector optionVec; + split(options, optionVec, ":"); + if (optionVec.size() == 0) { + return false; + } + for (auto option : optionVec) { + if (validOptions.find(option) == validOptions.end()) { + return false; + } + } + return true; +} + +inline Status checkDumpStep(string dump_step) { + std::string tmp_dump_step = dump_step + "|"; + std::smatch result; + std::vector match_vecs; + std::regex pattern("(\\d{1,}-\\d{1,}\\||\\d{1,}\\|)+"); + if (regex_match(tmp_dump_step, result, pattern)) { + split(result.str(), match_vecs, "|"); + // 100 is the max sets of dump steps. + if (match_vecs.size() > 100) { + return errors::InvalidArgument("dump_step only support dump <= 100 sets of data"); + } + for (auto match_vec : match_vecs) { + std::vector tmp_vecs; + split(match_vec, tmp_vecs, "-"); + if (tmp_vecs.size() > 1) { + if (std::atoi(tmp_vecs[0].c_str()) >= std::atoi(tmp_vecs[1].c_str())) { + return errors::InvalidArgument("in range steps, the first step is >= " + "second step, correct example:'0|5|10-20'"); + } + } + } + } else { + return errors::InvalidArgument("dump_step string style is error," + " correct example:'0|5|10|50-100'"); + } + return Status::OK(); +} + +inline Status checkDumpMode(string dump_mode) { + std::set dump_mode_list = {"input", "output", "all"}; + std::set::iterator iter; + + if ((iter = dump_mode_list.find(dump_mode)) != dump_mode_list.end()) { + return Status::OK(); + } else { + return errors::InvalidArgument("dump mode should be one of the list:[input, output, all]"); + } +} + +inline Status checkDumpDebugMode(string dump_debug_mode) { + std::set dump_debug_mode_list = {"aicore_overflow", "atomic_overflow", "all"}; + std::set::iterator iter; + + if ((iter = dump_debug_mode_list.find(dump_debug_mode)) != dump_debug_mode_list.end()) { + return Status::OK(); + } else { + return errors::InvalidArgument("dump debug mode should be one of the list:[aicore_overflow, atomic_overflow, all]"); + } +} + +std::map NpuAttrs::GetSessOptions(OpKernelConstruction *ctx) { + std::map sess_options; + std::string variable_format_optimize = std::to_string(true); + std::string hcom_parallel = std::to_string(false); + std::string graph_memory_max_size = ""; + std::string variable_memory_max_size = ""; + std::string enable_dump = std::to_string(false); + std::string enable_dump_debug = std::to_string(false); + std::string dump_path = ""; + std::string dump_step = ""; + std::string dump_mode = "output"; + std::string dump_debug_mode = "all"; + std::string stream_max_parallel_num = ""; + string npuOptimizer = ""; + + if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { + ctx->GetAttr("_variable_format_optimize", &variable_format_optimize); + ctx->GetAttr("_hcom_parallel", &hcom_parallel); + ctx->GetAttr("_graph_memory_max_size", &graph_memory_max_size); + ctx->GetAttr("_variable_memory_max_size", &variable_memory_max_size); + ctx->GetAttr("_enable_dump", &enable_dump); + ctx->GetAttr("_enable_dump_debug", &enable_dump_debug); + if (enable_dump != std::to_string(false) || enable_dump_debug != std::to_string(false)) { + ctx->GetAttr("_dump_path", &dump_path); + } + if (enable_dump != std::to_string(false)) { + if (ctx->GetAttr("_dump_step", &dump_step) == Status::OK() && + !dump_step.empty()) { + Status s = checkDumpStep(dump_step); + if (!s.ok()) { + LOG(FATAL) << s.error_message(); + } + } + if (ctx->GetAttr("_dump_mode", &dump_mode) == Status::OK()) { + Status s = checkDumpMode(dump_mode); + if (!s.ok()) { + LOG(FATAL) << s.error_message(); + } + } + } + if (enable_dump_debug != std::to_string(false)) { + if (ctx->GetAttr("_dump_debug_mode", &dump_debug_mode) == Status::OK()) { + Status s = checkDumpDebugMode(dump_debug_mode); + if (!s.ok()) { + LOG(FATAL) << s.error_message(); + } + } + } + ctx->GetAttr("_stream_max_parallel_num", &stream_max_parallel_num); + } + + // session options + sess_options["ge.exec.variable_acc"] = variable_format_optimize; + sess_options[ge::HCOM_PARALLEL] = hcom_parallel; + sess_options[ge::STREAM_MAX_PARALLEL_NUM] = stream_max_parallel_num; + if (!graph_memory_max_size.empty()) { + sess_options[ge::GRAPH_MEMORY_MAX_SIZE] = graph_memory_max_size; + } + if (!variable_memory_max_size.empty()) { + sess_options[ge::VARIABLE_MEMORY_MAX_SIZE] = variable_memory_max_size; + } + sess_options[ge::OPTION_EXEC_ENABLE_DUMP] = enable_dump; + sess_options[ge::OPTION_EXEC_DUMP_PATH] = dump_path; + sess_options[ge::OPTION_EXEC_DUMP_STEP] = dump_step; + sess_options[ge::OPTION_EXEC_DUMP_MODE] = dump_mode; + sess_options[ge::OPTION_EXEC_ENABLE_DUMP_DEBUG] = enable_dump_debug; + sess_options[ge::OPTION_EXEC_DUMP_DEBUG_MODE] = dump_debug_mode; + + return sess_options; +} + +std::map NpuAttrs::GetDefaultInitOptions() { + std::map init_options; + init_options["ge.exec.isTailingOptimization"] = std::to_string(false); + init_options["ge.exec.precision_mode"] = ""; + init_options[ge::OPTION_EXEC_PROFILING_MODE] = std::to_string(false); + init_options[ge::OPTION_EXEC_PROFILING_OPTIONS] = "training_trace"; + init_options[ge::AUTO_TUNE_MODE] = ""; + init_options[ge::OPTION_GRAPH_RUN_MODE] = "1"; + init_options[ge::OP_DEBUG_LEVEL] = "0"; + init_options[ge::OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES] = ""; + return init_options; +} + +std::map NpuAttrs::GetInitOptions(OpKernelConstruction *ctx) { + std::map init_options; + std::string is_tailing_optimization = std::to_string(false); + std::string precision_mode = ""; + std::string profiling_mode = std::to_string(false); + std::string profiling_options = "training_trace"; + std::string auto_tune_mode = ""; + std::string graph_run_mode = "1"; + std::string op_debug_level = "0"; + std::string enable_scope_fusion_passes = ""; + string npuOptimizer = ""; + + if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { + ctx->GetAttr("_is_tailing_optimization", &is_tailing_optimization); + ctx->GetAttr("_precision_mode", &precision_mode); + ctx->GetAttr("_profiling_mode", &profiling_mode); + ctx->GetAttr("_profiling_options", &profiling_options); + ctx->GetAttr("_auto_tune_mode", &auto_tune_mode); + ctx->GetAttr("_graph_run_mode", &graph_run_mode); + ctx->GetAttr("_op_debug_level", &op_debug_level); + ctx->GetAttr("_enable_scope_fusion_passes", &enable_scope_fusion_passes); + } + + init_options["ge.exec.isTailingOptimization"] = is_tailing_optimization; + init_options["ge.exec.precision_mode"] = precision_mode; + init_options[ge::OPTION_EXEC_PROFILING_MODE] = profiling_mode; + if (profiling_mode != std::to_string(false) && !checkProfilingOptions(profiling_options)) { + LOG(FATAL) << "profiling options must be in 'training_trace', 'task_trace' or 'op_trace'"; + } + init_options[ge::OPTION_EXEC_PROFILING_OPTIONS] = profiling_options; + init_options[ge::AUTO_TUNE_MODE] = auto_tune_mode; + init_options[ge::OPTION_GRAPH_RUN_MODE] = graph_run_mode; + init_options[ge::OP_DEBUG_LEVEL] = op_debug_level; + init_options[ge::OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES] = enable_scope_fusion_passes; + + return init_options; +} + +std::map NpuAttrs::GetPassOptions(const GraphOptimizationPassOptions &options) { + std::map pass_options; + const RewriterConfig &rewrite_options = + options.session_options->config.graph_options().rewrite_options(); + bool do_npu_optimizer = false; + bool enable_dp = false; + bool use_off_line = false; + bool mix_compile_mode = false; + int iterations_per_loop = 1; + bool lower_functional_ops = false; + string job = "default"; + int task_index = 0; + for (auto custom_optimizer : rewrite_options.custom_optimizers()) { + if (custom_optimizer.name() == "NpuOptimizer") { + do_npu_optimizer = true; + const auto params = custom_optimizer.parameter_map(); + if (params.count("enable_data_pre_proc")) { + enable_dp = params.at("enable_data_pre_proc").b(); + } + if (params.count("use_off_line")) { + use_off_line = params.at("use_off_line").b(); + if (params.count("mix_compile_mode")) { + mix_compile_mode = params.at("mix_compile_mode").b(); + } + if (params.count("iterations_per_loop")) { + iterations_per_loop = params.at("iterations_per_loop").i(); + } + if (params.count("lower_functional_ops")) { + lower_functional_ops = params.at("lower_functional_ops").b(); + } + if (params.count("job")) { + job = params.at("job").s(); + } else { + job = "localhost"; + } + if (params.count("task_index")) { + task_index = params.at("task_index").i(); + } + } + } + } + // pass options + pass_options["do_npu_optimizer"] = std::to_string(do_npu_optimizer); + pass_options["enable_dp"] = std::to_string(enable_dp); + pass_options["use_off_line"] = std::to_string(use_off_line); + pass_options["mix_compile_mode"] = std::to_string(mix_compile_mode); + pass_options["iterations_per_loop"] = std::to_string(iterations_per_loop); + pass_options["lower_functional_ops"] = std::to_string(lower_functional_ops); + pass_options["job"] = job; + pass_options["task_index"] = std::to_string(task_index); + + return pass_options; +} + + +std::map NpuAttrs::GetPassOptions(OpKernelConstruction *ctx) { + std::map pass_options; + std::string do_npu_optimizer = std::to_string(false); + std::string enable_dp = std::to_string(false); + std::string use_off_line = std::to_string(false); + std::string mix_compile_mode = std::to_string(false); + std::string iterations_per_loop = "1"; + std::string lower_functional_ops = std::to_string(false); + string job = "default"; + std::string task_index = "0"; + Status s = Status::OK(); + string npuOptimizer = ""; + + if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { + do_npu_optimizer = true; + ctx->GetAttr("_enable_data_pre_proc", &enable_dp); + if (ctx->GetAttr("_use_off_line", &use_off_line) == Status::OK()) { + ctx->GetAttr("_mix_compile_mode", &mix_compile_mode); + ctx->GetAttr("_iterations_per_loop", &iterations_per_loop); + ctx->GetAttr("_lower_functional_ops", &lower_functional_ops); + if (ctx->GetAttr("_job", &job) != Status::OK()) { + job = "localhost"; + } + ctx->GetAttr("_task_index", &task_index); + } + } + // pass options + pass_options["do_npu_optimizer"] = do_npu_optimizer; + pass_options["enable_dp"] = enable_dp; + pass_options["use_off_line"] = use_off_line; + pass_options["mix_compile_mode"] = mix_compile_mode; + pass_options["iterations_per_loop"] = iterations_per_loop; + pass_options["lower_functional_ops"] = lower_functional_ops; + pass_options["job"] = job; + pass_options["task_index"] = task_index; + + return pass_options; +} + +std::map NpuAttrs::GetPassOptions(AttrSlice attrs) { + std::map pass_options; + std::string do_npu_optimizer = std::to_string(false); + std::string enable_dp = std::to_string(false); + std::string use_off_line = std::to_string(false); + std::string mix_compile_mode = std::to_string(false); + std::string iterations_per_loop = "1"; + std::string lower_functional_ops = std::to_string(false); + string job = "default"; + std::string task_index = "0"; + Status s = Status::OK(); + + if (attrs.Find("_NpuOptimizer") != nullptr) { + do_npu_optimizer = std::to_string(true); + if (attrs.Find("_enable_data_pre_proc") != nullptr) { + enable_dp = attrs.Find("_enable_data_pre_proc")->s(); + } + if (attrs.Find("_use_off_line") != nullptr) { + use_off_line = attrs.Find("_use_off_line")->s(); + if (attrs.Find("_mix_compile_mode") != nullptr) { + mix_compile_mode = attrs.Find("_mix_compile_mode")->s(); + } + if (attrs.Find("_iterations_per_loop") != nullptr) { + iterations_per_loop = attrs.Find("_iterations_per_loop")->s(); + } + if (attrs.Find("_lower_functional_ops") != nullptr) { + lower_functional_ops = attrs.Find("_lower_functional_ops")->s(); + } + if (attrs.Find("_job") != nullptr) { + job = attrs.Find("_job")->s(); + } else { + job = "localhost"; + } + if (attrs.Find("_task_index") != nullptr) { + task_index = attrs.Find("_task_index")->s(); + } + } + } + // pass options + pass_options["do_npu_optimizer"] = do_npu_optimizer; + pass_options["enable_dp"] = enable_dp; + pass_options["use_off_line"] = use_off_line; + pass_options["mix_compile_mode"] = mix_compile_mode; + pass_options["iterations_per_loop"] = iterations_per_loop; + pass_options["lower_functional_ops"] = lower_functional_ops; + pass_options["job"] = job; + pass_options["task_index"] = task_index; + + return pass_options; +} + +std::map NpuAttrs::GetAllAttrOptions(AttrSlice attrs) { + std::map all_options; + std::string do_npu_optimizer = std::to_string(false); + std::string enable_dp = std::to_string(false); + std::string use_off_line = std::to_string(false); + std::string mix_compile_mode = std::to_string(false); + std::string iterations_per_loop = "1"; + std::string lower_functional_ops = std::to_string(false); + string job = "default"; + std::string task_index = "0"; + Status s = Status::OK(); + + std::string variable_format_optimize = std::to_string(true); + std::string hcom_parallel = std::to_string(false); + std::string graph_memory_max_size = ""; + std::string variable_memory_max_size = ""; + std::string enable_dump = std::to_string(false); + std::string enable_dump_debug = std::to_string(false); + std::string dump_path = ""; + std::string dump_step = ""; + std::string dump_mode = "output"; + std::string dump_debug_mode = "all"; + std::string stream_max_parallel_num = ""; + + std::string is_tailing_optimization = std::to_string(false); + std::string precision_mode = ""; + std::string profiling_mode = std::to_string(false); + std::string profiling_options = "training_trace"; + std::string auto_tune_mode = ""; + std::string graph_run_mode = "1"; + std::string op_debug_level = "0"; + std::string enable_scope_fusion_passes = ""; + string npuOptimizer = ""; + + if (attrs.Find("_NpuOptimizer") != nullptr) { + do_npu_optimizer = std::to_string(true); + if (attrs.Find("_enable_data_pre_proc") != nullptr) { + enable_dp = attrs.Find("_enable_data_pre_proc")->s(); + } + if (attrs.Find("_use_off_line") != nullptr) { + use_off_line = attrs.Find("_use_off_line")->s(); + if (attrs.Find("_mix_compile_mode") != nullptr) { + mix_compile_mode = attrs.Find("_mix_compile_mode")->s(); + } + if (attrs.Find("_iterations_per_loop") != nullptr) { + iterations_per_loop = attrs.Find("_iterations_per_loop")->s(); + } + if (attrs.Find("_lower_functional_ops") != nullptr) { + lower_functional_ops = attrs.Find("_lower_functional_ops")->s(); + } + if (attrs.Find("_job") != nullptr) { + job = attrs.Find("_job")->s(); + } else { + job = "localhost"; + } + if (attrs.Find("_task_index") != nullptr) { + task_index = attrs.Find("_task_index")->s(); + } + } + + if (attrs.Find("_variable_format_optimize") != nullptr) { + variable_format_optimize = attrs.Find("_variable_format_optimize")->s(); + } + if (attrs.Find("_hcom_parallel") != nullptr) { + hcom_parallel = attrs.Find("_hcom_parallel")->s(); + } + if (attrs.Find("_graph_memory_max_size") != nullptr) { + graph_memory_max_size = attrs.Find("_graph_memory_max_size")->s(); + } + if (attrs.Find("_variable_memory_max_size") != nullptr) { + variable_memory_max_size = attrs.Find("_variable_memory_max_size")->s(); + } + if (attrs.Find("_enable_dump") != nullptr) { + enable_dump = attrs.Find("_enable_dump")->s(); + } + if (attrs.Find("_enable_dump_debug") != nullptr) { + enable_dump_debug = attrs.Find("_enable_dump_debug")->s(); + } + if (enable_dump != std::to_string(false) || enable_dump_debug != std::to_string(false)) { + if (attrs.Find("_dump_path") != nullptr) { + dump_path = attrs.Find("_dump_path")->s(); + } + } + if (enable_dump != std::to_string(false)) { + if (attrs.Find("_dump_step") != nullptr) { + dump_step = attrs.Find("_dump_step")->s(); + if (!dump_step.empty()) { + Status s = checkDumpStep(dump_step); + if (!s.ok()) { + LOG(FATAL) << s.error_message(); + } + } + } + if (attrs.Find("_dump_mode") != nullptr) { + dump_mode = attrs.Find("_dump_mode")->s(); + Status s = checkDumpMode(dump_mode); + if (!s.ok()) { + LOG(FATAL) << s.error_message(); + } + } + } + if (enable_dump_debug != std::to_string(false)) { + if (attrs.Find("_dump_debug_mode") != nullptr) { + dump_debug_mode = attrs.Find("_dump_debug_mode")->s(); + Status s = checkDumpDebugMode(dump_debug_mode); + if (!s.ok()) { + LOG(FATAL) << s.error_message(); + } + } + } + if (attrs.Find("_stream_max_parallel_num") != nullptr) { + stream_max_parallel_num = attrs.Find("_stream_max_parallel_num")->s(); + } + + if (attrs.Find("_is_tailing_optimization") != nullptr) { + is_tailing_optimization = attrs.Find("_is_tailing_optimization")->s(); + } + if (attrs.Find("_precision_mode") != nullptr) { + precision_mode = attrs.Find("_precision_mode")->s(); + } + if (attrs.Find("_profiling_mode") != nullptr) { + profiling_mode = attrs.Find("_profiling_mode")->s(); + } + if (attrs.Find("_profiling_options") != nullptr) { + profiling_options = attrs.Find("_profiling_options")->s(); + } + if (attrs.Find("_auto_tune_mode") != nullptr) { + auto_tune_mode = attrs.Find("_auto_tune_mode")->s(); + } + if (attrs.Find("_graph_run_mode") != nullptr) { + graph_run_mode = attrs.Find("_graph_run_mode")->s(); + } + if (attrs.Find("_op_debug_level") != nullptr) { + op_debug_level = attrs.Find("_op_debug_level")->s(); + } + if (attrs.Find("_enable_scope_fusion_passes") != nullptr) { + enable_scope_fusion_passes = attrs.Find("_enable_scope_fusion_passes")->s(); + } + } + + all_options["variable_format_optimize"] = variable_format_optimize; + all_options["hcom_parallel"] = hcom_parallel; + all_options["stream_max_parallel_num"] = stream_max_parallel_num; + if (!graph_memory_max_size.empty()) { + all_options["graph_memory_max_size"] = graph_memory_max_size; + } + if (!variable_memory_max_size.empty()) { + all_options["variable_memory_max_size"] = variable_memory_max_size; + } + + all_options["enable_dump"] = enable_dump; + all_options["dump_path"] = dump_path; + all_options["dump_step"] = dump_step; + all_options["dump_mode"] = dump_mode; + all_options["enable_dump_debug"] = enable_dump_debug; + all_options["dump_debug_mode"] = dump_debug_mode; + + all_options["is_tailing_optimization"] = is_tailing_optimization; + all_options["precision_mode"] = precision_mode; + all_options["profiling_mode"] = profiling_mode; + if (profiling_mode != std::to_string(false) && !checkProfilingOptions(profiling_options)) { + LOG(FATAL) << "profiling options must be in 'training_trace', 'task_trace' or 'op_trace'"; + } + all_options["profiling_options"] = profiling_options; + all_options["auto_tune_mode"] = auto_tune_mode; + all_options["graph_run_mode"] = graph_run_mode; + all_options["op_debug_level"] = op_debug_level; + all_options["enable_scope_fusion_passes"] = enable_scope_fusion_passes; + + all_options["do_npu_optimizer"] = do_npu_optimizer; + all_options["enable_data_pre_proc"] = enable_dp; + all_options["use_off_line"] = use_off_line; + all_options["mix_compile_mode"] = mix_compile_mode; + all_options["iterations_per_loop"] = iterations_per_loop; + all_options["lower_functional_ops"] = lower_functional_ops; + all_options["job"] = job; + all_options["task_index"] = task_index; + + return all_options; + +} + + +std::map NpuAttrs::GetDefaultPassOptions() { + std::map pass_options; + pass_options["do_npu_optimizer"] = std::to_string(false); + pass_options["enable_dp"] = std::to_string(false); + pass_options["use_off_line"] = std::to_string(false); + pass_options["mix_compile_mode"] = std::to_string(false); + pass_options["iterations_per_loop"] = std::to_string(1); + pass_options["lower_functional_ops"] = std::to_string(false); + pass_options["job"] = "default"; + pass_options["task_index"] = std::to_string(0); + return pass_options; +} + +Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options, Node *node) +{ + std::map sess_options; + bool variable_format_optimize = true; + bool hcom_parallel = false; + std::string graph_memory_max_size = ""; + std::string variable_memory_max_size = ""; + bool enable_dump = false; + bool enable_dump_debug = false; + std::string dump_path = ""; + std::string dump_step = ""; + std::string dump_mode = "output"; + std::string dump_debug_mode = "all"; + std::string stream_max_parallel_num = ""; + + std::map init_options; + bool is_tailing_optimization = false; + std::string precision_mode = ""; + bool profiling_mode = false; + std::string profiling_options = "training_trace"; + std::string auto_tune_mode = ""; + int graph_run_mode = 1; + int op_debug_level = 0; + std::string enable_scope_fusion_passes = ""; + + std::map pass_options; + bool do_npu_optimizer = false; + bool enable_dp = false; + bool use_off_line = false; + bool mix_compile_mode = false; + int iterations_per_loop = 1; + bool lower_functional_ops = false; + string job = "default"; + int task_index = 0; + + + const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options(); + for (auto custom_optimizer : rewrite_options.custom_optimizers()) { + if (custom_optimizer.name() == "NpuOptimizer") { + const auto params = custom_optimizer.parameter_map(); + if (params.count("variable_format_optimize")) { + variable_format_optimize = params.at("variable_format_optimize").b(); + } + if (params.count("hcom_parallel")) { + hcom_parallel = params.at("hcom_parallel").b(); + } + if (params.count("graph_memory_max_size")) { + graph_memory_max_size = params.at("graph_memory_max_size").s(); + } + if (params.count("variable_memory_max_size")) { + variable_memory_max_size = params.at("variable_memory_max_size").s(); + } + if (params.count("enable_dump")) { + enable_dump = params.at("enable_dump").b(); + } + if (params.count("enable_dump_debug")) { + enable_dump_debug = params.at("enable_dump_debug").b(); + } + if (enable_dump || enable_dump_debug) { + if (params.count("dump_path")) { + dump_path = params.at("dump_path").s(); + } + } + if (enable_dump) { + if (params.count("dump_step")) { + dump_step = params.at("dump_step").s(); + Status s = checkDumpStep(dump_step); + if (!s.ok()) { + LOG(FATAL) << s.error_message(); + } + } + if (params.count("dump_mode")) { + dump_mode = params.at("dump_mode").s(); + Status s = checkDumpMode(dump_mode); + if (!s.ok()) { + LOG(FATAL) << s.error_message(); + } + } + } + if (enable_dump_debug) { + if (params.count("dump_debug_mode")) { + dump_debug_mode = params.at("dump_debug_mode").s(); + Status s = checkDumpDebugMode(dump_debug_mode); + if (!s.ok()) { + LOG(FATAL) << s.error_message(); + } + } + } + if (params.count("stream_max_parallel_num")) { + stream_max_parallel_num = params.at("stream_max_parallel_num").s(); + } + + if (params.count("is_tailing_optimization")) { + is_tailing_optimization = params.at("is_tailing_optimization").b(); + } + if (params.count("precision_mode")) { + precision_mode = params.at("precision_mode").s(); + } + if (params.count("profiling_mode")) { + profiling_mode = params.at("profiling_mode").b(); + } + if (params.count("profiling_options") && profiling_mode) { + profiling_options = params.at("profiling_options").s(); + } + if (params.count("auto_tune_mode")) { + auto_tune_mode = params.at("auto_tune_mode").s(); + } + if (params.count("graph_run_mode")) { + graph_run_mode = params.at("graph_run_mode").i(); + } + if (params.count("op_debug_level")) { + op_debug_level = params.at("op_debug_level").i(); + } + if (params.count("enable_scope_fusion_passes")) { + enable_scope_fusion_passes = params.at("enable_scope_fusion_passes").s(); + } + + do_npu_optimizer = true; + if (params.count("enable_data_pre_proc")) { + enable_dp = params.at("enable_data_pre_proc").b(); + } + if (params.count("use_off_line")) { + use_off_line = params.at("use_off_line").b(); + if (params.count("mix_compile_mode")) { + mix_compile_mode = params.at("mix_compile_mode").b(); + } + if (params.count("iterations_per_loop")) { + iterations_per_loop = params.at("iterations_per_loop").i(); + } + if (params.count("lower_functional_ops")) { + lower_functional_ops = params.at("lower_functional_ops").b(); + } + if (params.count("job")) { + job = params.at("job").s(); + } else { + job = "localhost"; + } + if (params.count("task_index")) { + task_index = params.at("task_index").i(); + } + } + } + } + + // session options + sess_options["variable_format_optimize"] = std::to_string(variable_format_optimize); + sess_options["hcom_parallel"] = std::to_string(hcom_parallel); + sess_options["stream_max_parallel_num"] = stream_max_parallel_num; + if (!graph_memory_max_size.empty()) { + sess_options["graph_memory_max_size"] = graph_memory_max_size; + } + if (!variable_memory_max_size.empty()) { + sess_options["variable_memory_max_size"] = variable_memory_max_size; + } + + sess_options["enable_dump"] = std::to_string(enable_dump); + sess_options["dump_path"] = dump_path; + sess_options["dump_step"] = dump_step; + sess_options["dump_mode"] = dump_mode; + sess_options["enable_dump_debug"] = std::to_string(enable_dump_debug); + sess_options["dump_debug_mode"] = dump_debug_mode; + + init_options["is_tailing_optimization"] = std::to_string(is_tailing_optimization); + init_options["precision_mode"] = precision_mode; + init_options["profiling_mode"] = std::to_string(profiling_mode); + if (profiling_mode && !checkProfilingOptions(profiling_options)) { + LOG(FATAL) << "profiling options must be in 'training_trace', 'task_trace' or 'op_trace'"; + } + init_options["profiling_options"] = profiling_options; + init_options["auto_tune_mode"] = auto_tune_mode; + init_options["graph_run_mode"] = std::to_string(graph_run_mode); + init_options["op_debug_level"] = std::to_string(op_debug_level); + init_options["enable_scope_fusion_passes"] = enable_scope_fusion_passes; + + pass_options["do_npu_optimizer"] = std::to_string(do_npu_optimizer); + pass_options["enable_data_pre_proc"] = std::to_string(enable_dp); + pass_options["use_off_line"] = std::to_string(use_off_line); + pass_options["mix_compile_mode"] = std::to_string(mix_compile_mode); + pass_options["iterations_per_loop"] = std::to_string(iterations_per_loop); + pass_options["lower_functional_ops"] = std::to_string(lower_functional_ops); + pass_options["job"] = job; + pass_options["task_index"] = std::to_string(task_index); + + std::string attr_name = ""; + for (auto option : sess_options) { + attr_name = std::string("_") + option.first; + node->AddAttr(attr_name, option.second); + } + for (auto option : init_options) { + attr_name = std::string("_") + option.first; + node->AddAttr(attr_name, option.second); + } + for (auto option : pass_options) { + attr_name = std::string("_") + option.first; + node->AddAttr(attr_name, option.second); + } + node->AddAttr("_NpuOptimizer", "NpuOptimizer"); + + return Status::OK(); +} + +} diff --git a/tf_adapter/util/npu_attrs.h b/tf_adapter/util/npu_attrs.h new file mode 100644 index 000000000..9b353babd --- /dev/null +++ b/tf_adapter/util/npu_attrs.h @@ -0,0 +1,46 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_NPU_ATTRS_H_ +#define TENSORFLOW_NPU_ATTRS_H_ + +#include +#include +#include "ge/ge_api_types.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/graph/graph.h" + +// single load all npu mode +namespace tensorflow { +class NpuAttrs { + public: + // This method returns instance Pointers + static std::map GetInitOptions(OpKernelConstruction *ctx); + static std::map GetDefaultInitOptions(); + static std::map GetSessOptions(OpKernelConstruction *ctx); + static std::map GetPassOptions(const GraphOptimizationPassOptions &options); + static std::map GetPassOptions(OpKernelConstruction *ctx); + static std::map GetPassOptions(AttrSlice attrs); + static std::map GetAllAttrOptions(AttrSlice attrs); + static std::map GetDefaultPassOptions(); + static Status SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options, Node *node); +}; +} + +#endif diff --git a/tf_adapter/util/npu_ops_identifier.cc b/tf_adapter/util/npu_ops_identifier.cc new file mode 100644 index 000000000..d387f5b97 --- /dev/null +++ b/tf_adapter/util/npu_ops_identifier.cc @@ -0,0 +1,119 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tf_adapter/util/npu_ops_identifier.h" +#include + +#include "nlohmann/json.hpp" +#include "tensorflow/core/platform/logging.h" +#include "tf_adapter/common/common.h" +using json = nlohmann::json; + +#define LIKELY(x) __builtin_expect(!!(x), 1) +#define UNLIKELY(x) __builtin_expect(!!(x), 0) + +const static std::string kOpsInfoJson = "/framework/built-in/tensorflow/npu_supported_ops.json"; +const static std::string kGray = "isGray"; +const static std::string kHeavy = "isHeavy"; +// This method returns different instance Pointers in mixed mode and in the full sink model +NpuOpsIdentifier *NpuOpsIdentifier::GetInstance(bool is_mix) { + if (is_mix) { + static json mixJson; + static NpuOpsIdentifier instance(true, mixJson); + return &instance; + } else { + static json allJson; + static NpuOpsIdentifier instance(false, allJson); + return &instance; + } +} +// Constructor +NpuOpsIdentifier::NpuOpsIdentifier(bool is_mix, json &ops_info) : is_mix_(is_mix), ops_info_(ops_info) { + const std::string mode = is_mix ? "MIX" : "ALL"; + const char *path_env = std::getenv("ASCEND_OPP_PATH"); + std::string opsPath = ""; + if (path_env != nullptr && strlen(path_env) < ADAPTER_ENV_MAX_LENTH) { + opsPath = path_env; + } else { + opsPath = "/usr/local/Ascend/opp"; + LOG(INFO) << "environment variable ASCEND_OPP_PATH is not set, use default value[" + << opsPath << "]"; + } + std::string opsJsonPath = opsPath + kOpsInfoJson; + LOG(INFO) << "[" << mode << "] Parsing json from " << opsJsonPath; + int32_t opsCnt = NpuOpsIdentifier::ParseOps(opsJsonPath, ops_info_); + LOG(INFO) << opsCnt << " ops parsed"; + VLOG(1) << ops_info_.dump(2); // 1 is vlog level, 2 is ops info index +} +// Parse and store the ops configuration json file, return num of parsed ops +int32_t NpuOpsIdentifier::ParseOps(const std::string &f, json &root) { + std::ifstream jsonConfigFileStream(f, std::ifstream::in); + int32_t opsCnt = 0; + if (jsonConfigFileStream.is_open()) { + try { + jsonConfigFileStream >> root; + for (auto i = root.begin(); i != root.end(); ++i) { + opsCnt++; + } + } catch (json::exception &e) { + LOG(INFO) << e.what(); + jsonConfigFileStream.close(); + return 0; + } + jsonConfigFileStream.close(); + } else { + LOG(INFO) << "Open " << f << " failed"; + return 0; + } + return opsCnt; +} +// Determine if the node is supported by NPU. Note that it will behave +// differently in mixed mode and full sink mode +bool NpuOpsIdentifier::IsNpuSupported(const char *op) { + return NpuOpsIdentifier::IsNpuSupported(std::string(op)); +} +bool NpuOpsIdentifier::IsNpuSupported(const std::string &op) { + bool declared = ops_info_[op].is_object(); + if (!declared) { + return false; + } + if (is_mix_ && ops_info_[op][kGray].is_boolean()) { + return !ops_info_[op][kGray]; + } + return true; +} +// Determine if the node is performance-sensitive on NPU, this should +// normally be done after calling IsNpuSupported to confirm that the node +// is supported by NPU. To be on the safe side, it internally performs a +// check on whether it is supported by NPU, if not, prints an error log, +// and returns `false` +bool NpuOpsIdentifier::IsPerformanceSensitive(const char *op) { + return NpuOpsIdentifier::IsPerformanceSensitive(std::string(op)); +} +bool NpuOpsIdentifier::IsPerformanceSensitive(const std::string &op) { + auto iter = ops_info_.find(op); + if (iter != ops_info_.end()) { + if (ops_info_[op].is_object()) { + if (ops_info_[op][kHeavy].is_boolean()) { + return ops_info_[op][kHeavy]; + } + } + } + return false; +} + +#undef LIKELY +#undef UNLIKELY \ No newline at end of file diff --git a/tf_adapter/util/npu_ops_identifier.h b/tf_adapter/util/npu_ops_identifier.h new file mode 100644 index 000000000..095f4b503 --- /dev/null +++ b/tf_adapter/util/npu_ops_identifier.h @@ -0,0 +1,51 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_NPU_OPS_IDENTIFIER_H_ +#define TENSORFLOW_NPU_OPS_IDENTIFIER_H_ + +#include +#include "nlohmann/json_fwd.hpp" + +// Sigleton class for check weather or not a tensorflow op is supported by NPU, +// and, weather or not a tensorflow op is performance sensitive on NPU. +class NpuOpsIdentifier { + public: + // This method returns different instance Pointers in mixed mode and in the full sink model + static NpuOpsIdentifier *GetInstance(bool is_mix = false); + // Determine if the node is supported by NPU. Note that it will behave + // differently in mixed mode and full sink mode + bool IsNpuSupported(const char *op); + bool IsNpuSupported(const std::string &op); + // Determine if the node is performance-sensitive on NPU, this should + // normally be done after calling IsNpuSupported to confirm that the node + // is supported by NPU. To be on the safe side, it internally performs a + // check on whether it is supported by NPU, if not, prints an error log, + // and returns `false` + bool IsPerformanceSensitive(const char *op); + bool IsPerformanceSensitive(const std::string &op); + private: + NpuOpsIdentifier(bool is_mix, nlohmann::json &json); + ~NpuOpsIdentifier() {}; + NpuOpsIdentifier(const NpuOpsIdentifier &) = delete; + NpuOpsIdentifier &operator=(const NpuOpsIdentifier &) = delete; + // Parse and store the ops configuration json file, return num of parsed ops + int32_t ParseOps(const std::string &f, nlohmann::json &root); + const bool is_mix_; + nlohmann::json &ops_info_; +}; + +#endif \ No newline at end of file diff --git a/tf_adapter/util/npu_plugin.h b/tf_adapter/util/npu_plugin.h new file mode 100644 index 000000000..28fb7b4c1 --- /dev/null +++ b/tf_adapter/util/npu_plugin.h @@ -0,0 +1,24 @@ +/** +* Copyright (C) <2019> . All Rights Reserved. +* Description: a plugin control GEInitialize and GEFinalize +*/ +#ifndef TENSORFLOW_NPU_PLUGIN_H_ +#define TENSORFLOW_NPU_PLUGIN_H_ + +#include +#include +#include "ge_plugin.h" +#include "ge/ge_api_types.h" + +const char* const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str(); +const char* const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str(); +const char* const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES = ge::OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES; +const char* const OPTION_EXEC_PROFILING_MODE = ge::OPTION_EXEC_PROFILING_MODE; +const char* const OPTION_EXEC_PROFILING_OPTIONS = ge::OPTION_EXEC_PROFILING_OPTIONS; +const char* const OPTION_GRAPH_RUN_MODE = ge::OPTION_GRAPH_RUN_MODE; + +void PluginInit(std::map& init_options); + +void PluginFinalize(); + +#endif // TENSORFLOW_NPU_PLUGIN_H_ \ No newline at end of file diff --git a/tf_adapter/util/plugin_load_manager.cc b/tf_adapter/util/plugin_load_manager.cc new file mode 100644 index 000000000..e5fad065b --- /dev/null +++ b/tf_adapter/util/plugin_load_manager.cc @@ -0,0 +1,65 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tf_adapter/util/plugin_load_manager.h" +#include "tensorflow/core/platform/env.h" +#include +#include + +namespace tensorflow { +void* PluginLoadManager::DlOpen(const std::string &path) { + void* handle = dlopen(path.c_str(), RTLD_NOW); + if (handle == nullptr) { + LOG(WARNING) << "dlopen faild, reason:" << dlerror(); + } + return handle; +} + +void* PluginLoadManager::DlSym(void* handle, const std::string &func_name) { + if (handle == nullptr) { + LOG(WARNING) << "handle is null, not valid!"; + return nullptr; + } + void* func = dlsym(handle, func_name.c_str()); + if (func == nullptr) { + LOG(WARNING) << "get func[" << func_name << "] faild, reason:" << dlerror(); + } + return func; +} + +std::string PluginLoadManager::GetTFPluginRealPath() { + Dl_info dl_info; + if (dladdr(reinterpret_cast(&PluginLoadManager::GetTFPluginRealPath), &dl_info) == 0) { + LOG(WARNING) << "can not get tf-adapter base path!"; + return string(); + } else { + std::string so_path = dl_info.dli_fname; + char path[PATH_MAX] = {0}; + if (so_path.length() >= PATH_MAX) { + LOG(WARNING) << "The shared library file path is too long!"; + return string(); + } + if (realpath(so_path.c_str(), path) == nullptr) { + LOG(WARNING) << "Failed to get realpath of " << so_path; + return string(); + } + so_path = path; + so_path = so_path.substr(0, so_path.rfind('/') + 1); + LOG(INFO) << "tf-plugin base path is: " << so_path; + return so_path; + } +} +} // namesapce tensorflow diff --git a/tf_adapter/util/plugin_load_manager.h b/tf_adapter/util/plugin_load_manager.h new file mode 100644 index 000000000..a5b807300 --- /dev/null +++ b/tf_adapter/util/plugin_load_manager.h @@ -0,0 +1,31 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. foss@huawei.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_PLUGIN_LOAD_MANAGER_H_ +#define TENSORFLOW_PLUGIN_LOAD_MANAGER_H_ + +#include "tensorflow/core/lib/core/status.h" +#include + +namespace tensorflow { +class PluginLoadManager { +public: + static void* DlOpen(const std::string &path); + static void* DlSym(void* handel, const std::string &fun_name); + static std::string GetTFPluginRealPath(); +}; +} // namespace tensorflow +#endif // TENSORFLOW_PLUGIN_LOAD_MANAGER_H_ diff --git a/tf_adapter/util/session_manager.cc b/tf_adapter/util/session_manager.cc new file mode 100644 index 000000000..0232def14 --- /dev/null +++ b/tf_adapter/util/session_manager.cc @@ -0,0 +1,123 @@ +/** +* Copyright (C) <2019> . All Rights Reserved. +* Description: manage the relationship between tf session and ge session +*/ + +#include "tf_adapter/util/session_manager.h" + +#include "tf_adapter/common/common.h" +#include "tf_adapter/util/npu_attrs.h" + +using namespace tensorflow; +SessionManager& SessionManager::GetInstance() { + static SessionManager instance; + return instance; +} + +// Returns True if get ge session success. +bool SessionManager::GetOrCreateGeSession(std::string& tf_session, + ge::Session*& ge_session, + std::map& sess_options) { + // find valid tf session handle + if (tf_session.empty()) { + LOG(ERROR) << "tf session is empty, get ge session failed."; + return false; + } + + // find valid ge session + auto it = ge_sessions_.find(tf_session); + if (it != ge_sessions_.end()) { + ge_session = it->second; + LOG(INFO) << "tf session " << tf_session << " get ge session success."; + return true; + } + + PrintGeSessionOptions(sess_options); + bool ret = SessionManager::CreateGeSession(tf_session, ge_session, sess_options); + if (!ret) { + LOG(ERROR) << "tf session " << tf_session << " create ge session failed."; + return false; + } + return true; +} + +// destory ge session. +void SessionManager::DestoryGeSession(std::string tf_session) { + if (tf_session.empty()) { + LOG(ERROR) << "tf session is empty, can not destory ge session."; + } + auto it = ge_sessions_.find(tf_session); + if (it != ge_sessions_.end()) { + if (it->second != nullptr) { + LOG(INFO) << "find ge session connect with tf session " << tf_session; + delete it->second; + it->second = nullptr; + } + ge_sessions_.erase(it); + LOG(INFO) << "destory ge session connect with tf session " + << tf_session << " success."; + } +} + +// Returns True if create ge session success. +bool SessionManager::CreateGeSession(std::string tf_session, ge::Session*& ge_session, + std::map& sess_options) { + // hcom parallel + LOG(INFO) << "[GEOP] hcom_parallel :" << sess_options[ge::HCOM_PARALLEL]; + + // stream max parallel num + LOG(INFO) << "[GEOP] stream_max_parallel_num :" << sess_options[ge::STREAM_MAX_PARALLEL_NUM]; + ge_session = new(std::nothrow) ge::Session(sess_options); + if (ge_session == nullptr) { + LOG(ERROR) << "tf session " << tf_session << " create ge session failed."; + return false; + } + ge_sessions_.insert(std::make_pair(tf_session, ge_session)); + return true; +} + +// Returns True if any ge session exist. +bool SessionManager::IsGeSessionExist() { + return !ge_sessions_.empty(); +} + +void SessionManager::PrintGeSessionOptions(std::map& sess_options) { + // variable acceleration configuration + LOG(INFO) << "[GEOP] variable_acceleration :" << sess_options["ge.exec.variable_acc"]; + // hcom parallel + LOG(INFO) << "[GEOP] hcom_parallel :" << sess_options[ge::HCOM_PARALLEL]; + + // stream max parallel num + LOG(INFO) << "[GEOP] stream_max_parallel_num :" << sess_options[ge::STREAM_MAX_PARALLEL_NUM]; + + // graph memory configuration + if (!sess_options[ge::GRAPH_MEMORY_MAX_SIZE].empty()) { + LOG(INFO) << "[GEOP] set graph_memory_max_size: " << sess_options[ge::GRAPH_MEMORY_MAX_SIZE]; + } else { + sess_options.erase(ge::GRAPH_MEMORY_MAX_SIZE); + } + + // variable memory configuration + if (!sess_options[ge::VARIABLE_MEMORY_MAX_SIZE].empty()) { + LOG(INFO) << "[GEOP] set variable_memory_max_size: " << sess_options[ge::VARIABLE_MEMORY_MAX_SIZE]; + } else { + sess_options.erase(ge::VARIABLE_MEMORY_MAX_SIZE); + } + + // reuse memory env + const char* disable_reuse_memory = std::getenv("DISABLE_REUSE_MEMORY"); + if (disable_reuse_memory == nullptr) { + disable_reuse_memory = "0"; + LOG(WARNING) << "[GEOP] can not get DISABLE_REUSE_MEMORY in env, set to default 0"; + } + sess_options["ge.exec.disableReuseMemory"] = disable_reuse_memory; + + // dump configuration + string dump_step = sess_options[ge::OPTION_EXEC_DUMP_STEP]; + LOG(INFO) << "[GEOP] enable_dump :" << sess_options[ge::OPTION_EXEC_ENABLE_DUMP] + << ", dump_path :" << sess_options[ge::OPTION_EXEC_DUMP_PATH] + << ", dump_step :" << (dump_step == "" ? "NA" : dump_step) + << ", dump_mode :" << sess_options[ge::OPTION_EXEC_DUMP_MODE] + << ", enable_dump_enable :" << sess_options[ge::OPTION_EXEC_ENABLE_DUMP_DEBUG] + << ", dump_debug_mode :" << sess_options[ge::OPTION_EXEC_DUMP_DEBUG_MODE]; +} \ No newline at end of file diff --git a/tf_adapter/util/session_manager.h b/tf_adapter/util/session_manager.h new file mode 100644 index 000000000..8123d197d --- /dev/null +++ b/tf_adapter/util/session_manager.h @@ -0,0 +1,41 @@ +/** +* Copyright (C) <2019> . All Rights Reserved. +* Description: manage the relationship between tf session and ge session +*/ + +#ifndef TENSORFLOW_SESSION_MANAGER_H_ +#define TENSORFLOW_SESSION_MANAGER_H_ + +#include +#include +#include +#include "ge/ge_api.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/framework/op_kernel.h" + +// Sigleton class for manage the relationship between +// tf session and ge session +class SessionManager { + public: + static SessionManager& GetInstance(); + + // Retrieves an already existing ge session to run the compute graph, + // or create a ge session for future use. + bool GetOrCreateGeSession(std::string& tf_session, ge::Session*& ge_session, + std::map& sess_options); + + // Destory a ge session divided by tf session. + void DestoryGeSession(std::string tf_session); + + // Whether a ge session exist. + bool IsGeSessionExist(); + private: + // Create a ge session to run the compute graph divided by tf session. + bool CreateGeSession(std::string session, ge::Session*& ge_session, + std::map& sess_options); + // Print ge session options + void PrintGeSessionOptions(std::map& sess_options); + // Mapping relationship between tf session and ge session. + std::unordered_map ge_sessions_; +}; +#endif diff --git a/tools/bazel.rc b/tools/bazel.rc new file mode 100644 index 000000000..3734fab71 --- /dev/null +++ b/tools/bazel.rc @@ -0,0 +1,86 @@ +# Android configs. Bazel needs to have --cpu and --fat_apk_cpu both set to the +# target CPU to build transient dependencies correctly. See +# https://docs.bazel.build/versions/master/user-manual.html#flag--fat_apk_cpu +build:android --crosstool_top=//external:android/crosstool +build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain +build:android_arm --config=android +build:android_arm --cpu=armeabi-v7a +build:android_arm --fat_apk_cpu=armeabi-v7a +build:android_arm64 --config=android +build:android_arm64 --cpu=arm64-v8a +build:android_arm64 --fat_apk_cpu=arm64-v8a + +# Config to use a mostly-static build and disable modular op registration +# support (this will revert to loading TensorFlow with RTLD_GLOBAL in Python). +# By default, TensorFlow will build with a dependence on +# //tensorflow:libtensorflow_framework.so. +build:monolithic --define framework_shared_object=false + +# For projects which use TensorFlow as part of a Bazel build process, putting +# nothing in a bazelrc will default to a monolithic build. The following line +# opts in to modular op registration support by default. +build --define framework_shared_object=true + +# Please note that MKL on MacOS or windows is still not supported. +# If you would like to use a local MKL instead of downloading, please set the +# environment variable "TF_MKL_ROOT" every time before build. +build:mkl --define=build_with_mkl=true --define=enable_mkl=true +build:mkl -c opt + +# This config option is used to enable MKL-DNN open source library only, +# without depending on MKL binary version. +build:mkl_open_source_only --define=build_with_mkl_dnn_only=true +build:mkl_open_source_only --define=build_with_mkl=true --define=enable_mkl=true + +build:download_clang --crosstool_top=@local_config_download_clang//:toolchain +build:download_clang --define=using_clang=true +# Instruct clang to use LLD for linking. +# This only works with GPU builds currently, since Bazel sets -B/usr/bin in +# auto-generated CPU crosstool, forcing /usr/bin/ld.lld to be preferred over +# the downloaded one. +build:download_clang_use_lld --linkopt='-fuse-ld=lld' + +build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain +build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true + +build:rocm --crosstool_top=@local_config_rocm//crosstool:toolchain +build:rocm --define=using_rocm=true --define=using_rocm_hipcc=true + +build:cuda_clang --crosstool_top=@local_config_cuda//crosstool:toolchain +build:cuda_clang --define=using_cuda=true --define=using_cuda_clang=true --define=using_clang=true + +build:sycl --crosstool_top=@local_config_sycl//crosstool:toolchain +build:sycl --define=using_sycl=true --define=using_trisycl=false + +build:sycl_nodouble --crosstool_top=@local_config_sycl//crosstool:toolchain +build:sycl_nodouble --define=using_sycl=true --cxxopt -DTENSORFLOW_SYCL_NO_DOUBLE + +build:sycl_asan --crosstool_top=@local_config_sycl//crosstool:toolchain +build:sycl_asan --define=using_sycl=true --define=using_trisycl=false --copt -fno-omit-frame-pointer --copt -fsanitize-coverage=3 --copt -DGPR_NO_DIRECT_SYSCALLS --linkopt -fPIC --linkopt -fsanitize=address + +build:sycl_trisycl --crosstool_top=@local_config_sycl//crosstool:toolchain +build:sycl_trisycl --define=using_sycl=true --define=using_trisycl=true + +# Options extracted from configure script +build:gdr --define=with_gdr_support=true +build:ngraph --define=with_ngraph_support=true +build:verbs --define=with_verbs_support=true + +build --define=use_fast_cpp_protos=true +build --define=allow_oversize_protos=true +build --define=grpc_no_ares=true + +build --spawn_strategy=standalone +build --genrule_strategy=standalone +build -c opt + +# Other build flags. +build --define=grpc_no_ares=true + +# Modular TF build options +build:dynamic_kernels --define=dynamic_loaded_kernels=true + +# Default paths for TF_SYSTEM_LIBS +build --define=PREFIX=/usr +build --define=LIBDIR=$(PREFIX)/lib +build --define=INCLUDEDIR=$(PREFIX)/include diff --git a/tools/build_wheel.sh b/tools/build_wheel.sh new file mode 100644 index 000000000..064b0b9bc --- /dev/null +++ b/tools/build_wheel.sh @@ -0,0 +1,81 @@ +#!/bin/bash +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. +# Description: Build npu_bridge pip-packages +# ============================================================================== + +set -e +set -o pipefail + +dst=$1 +PRODUCT=$2 + +if [[ -z "${dst}" ]]; then + echo -e "\033[7mNo destination dir provided\033[0m" + exit 1 +fi +if [[ "${PRODUCT}" = "cloud" ]]; then + echo "Build npu_bridge in cloud." + CUSTOM_COMMANDS="--define product=cloud" +elif [[ "${PRODUCT}" = "mini" ]]; then + echo "Build npu_bridge in mini." + CUSTOM_COMMANDS="--define product=mini" +elif [[ "${PRODUCT}" = "onetrack" ]]; then + echo "Build npu_bridge in onetrack." + CUSTOM_COMMANDS="--define product=onetrack" +else + echo "TF_Adapter not support this product." + exit 1 +fi + +PYTHON_BIN_PATH=$(which python3.7) +if [[ -z "${PYTHON_BIN_PATH}" ]]; then + echo -e "\033[7mNo python3 installed\033[0m" + exit 1 +fi + +cd "$(dirname "${BASH_SOURCE[0]}")/../" +TOP_DIR="$(cd ../../../../../ && pwd)" +INSTALL_DIR="${TOP_DIR}/out/${PRODUCT}/host" +OUTPUT_DIR="${INSTALL_DIR}/obj/tf_adapter" +SOFT_DP_DIR="${INSTALL_DIR}/obj/lib" +mkdir -p "${OUTPUT_DIR}/genfiles" +mkdir -p "${OUTPUT_DIR}/wheel" + +TF_INSTALL_LIB_PATH=$("${PYTHON_BIN_PATH:-python3}" -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') +echo "Found installed tensorflow ${TF_INSTALL_LIB_PATH}" + +TF_PACKAGE_PATH="$(cd ../../../compile_deps/tf-1.15.0 && pwd)" +#if [ -n "$(uname -a|grep Ubuntu|grep x86_64)" ];then +# TF_INSTALL_LIB_PATH="$(cd ../../../compile_deps/tf-1.15.0/lib/ubuntu_x86_64 && pwd)" +#elif [ -n "$(uname -a|grep Ubuntu|grep aarch64)" ];then +# TF_INSTALL_LIB_PATH="$(cd ../../../compile_deps/tf-1.15.0/lib/ubuntu_aarch64 && pwd)" +#elif [ -n "$(uname -a|grep euleros|grep x86_64)" ];then +# TF_INSTALL_LIB_PATH="$(cd ../../../compile_deps/tf-1.15.0/lib/euleros_x86_64 && pwd)" +#elif [ -n "$(uname -a|grep euleros|grep aarch64)" ];then +# TF_INSTALL_LIB_PATH="$(cd ../../../compile_deps/tf-1.15.0/lib/euleros_aarch64 && pwd)" +#elif [ -n "$(uname -a|grep Debian|grep x86_64)" ];then +# TF_INSTALL_LIB_PATH="$(cd ../../../compile_deps/tf-1.15.0/lib/debian_x86_64 && pwd)" +#else +# echo "TF_Adapter not support this OS." +# exit 1 +#fi + +#echo "Found installed tensorflow lib ${TF_INSTALL_LIB_PATH}" + +cp workspace.bzl.tpl workspace.bzl +sed -i "s|{TF_INSTALL_LIB_PATH}|"${TF_INSTALL_LIB_PATH}"|g" workspace.bzl +cp WORKSPACE.tpl WORKSPACE +sed -i "s|TF_PACKAGE_PATH|"${TF_PACKAGE_PATH}"|g" WORKSPACE + +## use gcc-7.3.0 +#bazel --output_base=${OUTPUT_DIR}/genfiles build ${CUSTOM_COMMANDS} --config=opt //tf_adapter:_tf_adapter.so +/opt/buildtools/bazel-0.24.1/bin/bazel --output_base=${OUTPUT_DIR}/genfiles build ${CUSTOM_COMMANDS} --config=opt //tf_adapter:_tf_adapter.so + +cp -r tf_adapter/python/. "${OUTPUT_DIR}/wheel" +cp -f bazel-bin/tf_adapter/_tf_adapter.so "${OUTPUT_DIR}/wheel/npu_bridge" +cp ${SOFT_DP_DIR}/libSoftDp.so "${OUTPUT_DIR}/wheel/npu_bridge" + +cd "${OUTPUT_DIR}/wheel" +"${PYTHON_BIN_PATH:-python3.7}" setup.py bdist_wheel >/dev/null + +cp -f dist/*.whl "$1" diff --git a/tools/gen_sc_makefile_bazel.py b/tools/gen_sc_makefile_bazel.py new file mode 100644 index 000000000..150cb8163 --- /dev/null +++ b/tools/gen_sc_makefile_bazel.py @@ -0,0 +1,205 @@ +#!/usr/bin/python +#-*- coding: UTF-8 -*- + +import os +import sys +import string +import datetime +import time +import re + +source_suffix = ["c","cpp","cc","cce"] + +def get_gcc_cmd(input_file, cpp_file_list, code_top_dir, custom_code_top_dir=""): + gcc_cmd_set = {} + lint_cmd_set = {} + file = open(input_file, "r") + lines = file.readlines() + file.close() + compile_cmd_regex = re.compile('(gcc|g\+\+|\"aarch64-linux-gnu-gcc\"|ccec)\s+[^ ].*\s-o\s+[^ ]+\.(o|s|cpp|obj)(\s|$)') + if(custom_code_top_dir != "") : + custom_code_top_dir = custom_code_top_dir.replace(code_top_dir + "/", "") + for line in lines: + line = line.strip() + line = line.strip(')') + if not compile_cmd_regex.search(line): + continue + items = line.split() + item_list = [] + compiler = "" + compiler_path = "" + for i in range(len(items)): + items[i] = items[i].strip("\"") + if items[i].endswith("gcc") or items[i].endswith("g++") or items[i].endswith("clang") or items[i].endswith("clang++"): + compiler = items[i].split('/')[-1] + if compiler.startswith("aarch64"): + items[i] = "external/hcc/bin/" + compiler + if custom_code_top_dir != "" and not items[i].startswith("/") : + items[i] = code_top_dir + "/" + custom_code_top_dir + items[i] + item_list = items[i:] + break + if len(item_list) == 0: + continue + if "-MD" in item_list and "-MF" in item_list : + index = item_list.index("-MF") + if custom_code_top_dir != "" : + item_list[index + 1] = custom_code_top_dir + item_list[index + 1] + + cpp_file = "" + if item_list[-1].strip().split(".")[-1] in source_suffix: + if custom_code_top_dir != "" : + item_list[-1] = custom_code_top_dir + item_list[-1].strip() + cpp_file = item_list[-1].strip() + else: + if "-c" not in item_list: + continue + index = item_list.index("-c") + if custom_code_top_dir != "" : + item_list[index+1] = custom_code_top_dir + item_list[index+1].strip() + cpp_file = item_list[index+1] + if cpp_file[-1].strip().split(".")[-1] not in source_suffix: + continue + + if "-o" not in item_list: + continue + try: + index = item_list.index("-o") + except: + continue + obj_file = item_list[index+1] + if custom_code_top_dir != "" : + obj_file = custom_code_top_dir + obj_file + item_list[index+1] = obj_file + + cpp_file_rel = cpp_file + if cpp_file_rel.startswith("/"): + code_top_prefix_len = len(code_top_dir) + cpp_file_rel = cpp_file[code_top_prefix_len+1:] + + if cpp_file_rel in cpp_file_list: + # gcc_cmd_set[obj_file] = " ".join(item_list) + c_flags_list = [] + rest_list = [] + is_add_I = False + is_add_D = False + for item in item_list: + item = item.strip() + if is_add_I: + is_add_I = False + item = "-I" + custom_code_top_dir + item + c_flags_list.append(item) + continue + if is_add_D: + is_add_D = False + item = "-D" + item + c_flags_list.append(item) + continue + is_add_I = False + is_add_D = False + if item == "-I": + is_add_I = True + elif item == "-D": + is_add_D = True + elif item.startswith("-I") or item.startswith("-D"): + c_flags_list.append(item) + else: + rest_list.append(item) + continue + gcc_options = " ".join(c_flags_list) + c_flags_list.append("-D_lint") + c_flags_list.append("-DLINUX_PC_LINT") + c_flags = " ".join(c_flags_list) + if compiler.startswith("aarch64-linux-gnu"): + wine_str = "export LINT_PATH=%s/vendor/hisi/llt/ci/tools/pc-lint;wine $(LINT_PATH)/LINT-NT.EXE %s $(LINT_PATH)/davinci_arm64.lnt %s"%(code_top_dir,c_flags, cpp_file) + else: + wine_str = "export LINT_PATH=%s/vendor/hisi/llt/ci/tools/pc-lint;wine $(LINT_PATH)/LINT-NT.EXE %s $(LINT_PATH)/davinci_x86.lnt %s"%(code_top_dir,c_flags, cpp_file) + + rest_option = " ".join(rest_list) + gcc_cmd_str = rest_option + " " + gcc_options + gcc_cmd_set[obj_file] = gcc_cmd_str + lint_out_path = code_top_dir + "/out/tools/lint/" + obj_file + lint_cmd_set[lint_out_path] = wine_str + + return gcc_cmd_set,lint_cmd_set + +def walkDir(top_dir,directory): + fileArray = [] + for root, dirs, files in os.walk(directory): + for name in files: + if name.endswith(".c") or name.endswith(".cc") or name.endswith(".cpp"): + fileArray.append(os.path.abspath(os.path.join(root, name))[len(top_dir)+1:]) + return fileArray + +def get_cpp_file_list(top_dir,input_file, custom_code_top_dir=""): + cpp_file_list = [] + file = open(input_file, "r") + lines = file.readlines() + file.close() + for line in lines: + line = line.strip() + if custom_code_top_dir != "" : + line_path = os.path.join(custom_code_top_dir,line) + else : + line_path = os.path.join(top_dir,line) + if os.path.isfile(line_path): + if line.endswith(".c") or line.endswith(".cc") or line.endswith(".cpp"): + cpp_file_list.append(line_path[len(top_dir)+1:]) + if os.path.isdir(line_path): + filelist = walkDir(top_dir,line_path) + if len(filelist) > 0: + cpp_file_list.extend(filelist) + return cpp_file_list + +def main(): + analysis_file = sys.argv[1] + cpp_file = sys.argv[2] + output_file = sys.argv[3] + code_top_dir = os.getcwd() + custom_code_top_dir = "" + if len(sys.argv) == 5 : + custom_code_top_dir = sys.argv[4] + cpp_file_list = get_cpp_file_list(code_top_dir,cpp_file, custom_code_top_dir) + else : + cpp_file_list = get_cpp_file_list(code_top_dir,cpp_file) + + gcc_cmd_set,lint_cmd_set = get_gcc_cmd(analysis_file, cpp_file_list, code_top_dir, custom_code_top_dir) + if len(gcc_cmd_set) == 0: + print "Error: can not get sc gcc cmd " + sys.exit(-1) + if len(lint_cmd_set) == 0: + print "Error: can not get lint gcc cmd " + sys.exit(-1) + + fd = open(output_file, "w") + # content = "GCC_PATH_1 = " + code_top_dir + "/prebuilts/gcc/linux-x86/aarch64/aarch64-linux-gnu-4.8/bin" + "\n" + # content += "GCC_PATH = $(GCC_PATH_1)" + "\n" + # content += "export GCC_PATH\n\n" + content = "" + content += ".PHONY: all\n" + content += "\n" + content += "ifeq ($(PCLINT_ENABLE),true)\n" + for (obj_file,wine_cmd) in lint_cmd_set.items(): + obj_file = obj_file + ".lint" + content += "all:" + obj_file + "\n" + content += obj_file + ": FORCE\n" + content += "\tmkdir -p $(dir $@)\n" + content += "\t"+ wine_cmd + " > $@ \n" + content += "\n" + content += "else\n" + + for (obj_file,gcc_cmd) in gcc_cmd_set.items(): + content += "all:" + obj_file + "\n" + content += obj_file + ": FORCE\n" + content += "\trm -rf $@\n" + content += "\t $(SOURCEANALYZER) " + gcc_cmd + "\n" + content += "\n" + + content += "endif\n" + content += "\n" + content += ".PHONY: FORCE\n" + content += "FORCE: \n" + fd.write(content) + fd.close() + +if __name__ == '__main__': + main() diff --git a/tools/sc_list.txt b/tools/sc_list.txt new file mode 100644 index 000000000..96f08ce69 --- /dev/null +++ b/tools/sc_list.txt @@ -0,0 +1 @@ +tf_adapter \ No newline at end of file diff --git a/workspace.bzl.tpl b/workspace.bzl.tpl new file mode 100644 index 000000000..64d4eedda --- /dev/null +++ b/workspace.bzl.tpl @@ -0,0 +1,70 @@ +# Build tf_adapter workspace(Calling by tf_adapter) +def extend_base(): + return "../../../../../" + +def tf_adapter_workspace(): + native.new_local_repository( + name = "tf_adapter_cloud_host_libs", + path = extend_base() + "out/cloud/host/obj/lib", + build_file = "@tf_adapter//tf_adapter:module.BUILD", + ) + + native.new_local_repository( + name = "tf_adapter_mini_host_libs", + path = extend_base() + "out/mini/host/obj/lib", + build_file = "@tf_adapter//tf_adapter:module.BUILD", + ) + + native.new_local_repository( + name = "tf_adapter_onetrack_host_libs", + path = extend_base() + "out/onetrack/host/obj/lib", + build_file = "@tf_adapter//tf_adapter:module.BUILD", + ) + + native.new_local_repository( + name = "ge_proto", + path = extend_base() + "inc/common", + build_file = "@tf_adapter//tf_adapter:module.BUILD", + ) + + native.new_local_repository( + name = "tf_adapter_extend_hdrs", + path = extend_base() + "inc", + build_file = "@tf_adapter//tf_adapter:module.BUILD", + ) + + native.new_local_repository( + name="installed_tensorflow", + path=extend_base() + "third_party/tensorflow/compile_deps/tf-1.15.0", + build_file = "@tf_adapter//tf_adapter:module.BUILD", + ) + + native.new_local_repository( + name="tf_adapter_dvpp_hdrs", + path=extend_base() + "soft_dp", + build_file = "@tf_adapter//tf_adapter:module.BUILD", + ) + + native.new_local_repository( + name="installed_tensorflow_libs", + path="{TF_INSTALL_LIB_PATH}", + build_file = "@tf_adapter//tf_adapter:module.BUILD", + ) + + native.new_local_repository( + name = "local_nlohmann_json_lib", + path = extend_base() + "third_party/json/include/", + build_file = "@tf_adapter//tf_adapter:module.BUILD", + ) + + native.new_local_repository( + name = "sec_lib", + path = extend_base() + "libc_sec/include/", + build_file = "@tf_adapter//tf_adapter:module.BUILD", + ) + + native.new_local_repository( + name = "python_include", + path = "/opt/buildtools/python3.7/include/python3.7m/", + build_file = "@tf_adapter//tf_adapter:module.BUILD", + ) -- Gitee