From ca23e03e0821b4c88fbc0802ef30a01437c99545 Mon Sep 17 00:00:00 2001 From: yanqingshang Date: Mon, 28 Dec 2020 17:28:15 +0800 Subject: [PATCH 1/3] add hccl gradient brocast --- .../optimizers/gradient_fusion_optimizer.cc | 59 +------------------ tf_adapter/python/npu_bridge/hccl/hccl_ops.py | 2 +- 2 files changed, 3 insertions(+), 58 deletions(-) diff --git a/tf_adapter/optimizers/gradient_fusion_optimizer.cc b/tf_adapter/optimizers/gradient_fusion_optimizer.cc index 0235497e0..1e1612602 100644 --- a/tf_adapter/optimizers/gradient_fusion_optimizer.cc +++ b/tf_adapter/optimizers/gradient_fusion_optimizer.cc @@ -242,65 +242,10 @@ int64 GradFusionOptimizer::GetFusionTensorSize() { } Status GradFusionOptimizer::Optimize(Cluster *cluster, const GrapplerItem &item, GraphDef *optimizedGraph) { + LOG(INFO) << "INFO: GradFusionOptimizer::Optimize begin."; REQUIRES_NOT_NULL(optimizedGraph); - const int64 fusionTensorSize = GetFusionTensorSize(); - GraphDef graphOrigin; - std::map, std::vector> fusionHcomOps; - std::map, int64_t> currentGradSumSize; - *optimizedGraph = item.graph; - LOG(INFO) << "INFO: GradFusionOptimizer::Optimize begin, OriginNodeNum: " << item.graph.node_size(); - LOG(INFO) << "INFO: FUSION_TENSOR_SIZE: " << fusionTensorSize; - - if (fusionTensorSize < 0) { return errors::InvalidArgument("FUSION_TENSOR_SIZE is invalid"); } - - REQUIRES_STATUS_OK(TopologicalSort(optimizedGraph)); - nodeMap_.reset(new (std::nothrow) NodeMap(optimizedGraph)); - REQUIRES_NOT_NULL(nodeMap_); - fusionOpInfo_.clear(); - fusionOpPool_.clear(); graphOrigin = *optimizedGraph; - for (const auto &nodeDef : graphOrigin.node()) { nameToNode_[nodeDef.name()] = nodeDef; } - - for (const auto &nodeDef : graphOrigin.node()) { - if (IsHcomOp(nodeDef)) { - DataType dType; - auto attrMap = nodeDef.attr(); - auto iter = attrMap.find(DATA_TYPE_ATTR); - if (iter != attrMap.end()) { - dType = iter->second.list().type(0); - } else { - LOG(INFO) << "INFO: Use default dataType: DT_FLOAT"; - dType = DT_FLOAT; - } - std::pair key = std::make_pair(nodeDef.op(), dType); - - fusionHcomOps[key].push_back(nodeDef); - int64_t inputTensorSize = 0; - NodeDef tmpNode = nodeDef; - TF_RETURN_IF_ERROR(GetInputTensorSize(tmpNode, inputTensorSize)); - if (currentGradSumSize.count(key) != 0) { - if (INT64_MAX - inputTensorSize < currentGradSumSize[key]) { - return errors::InvalidArgument("input tensor size is overflow"); - } - currentGradSumSize[key] += inputTensorSize; - } else { - currentGradSumSize[key] = inputTensorSize; - } - if (currentGradSumSize[key] >= fusionTensorSize) { - if (fusionHcomOps[key].size() > 1) { TF_RETURN_IF_ERROR(FusionOp(fusionHcomOps[key], optimizedGraph)); } - fusionHcomOps[key].clear(); - currentGradSumSize[key] = 0; - } - } - } - - for (auto iter : fusionHcomOps) { - if (!iter.second.empty()) { - if (iter.second.size() > 1) { TF_RETURN_IF_ERROR(FusionOp(iter.second, optimizedGraph)); } - iter.second.clear(); - } - } - LOG(INFO) << "INFO: GradFusionOptimizer::Optimize end, finalNodeNum: " << optimizedGraph->node_size(); + LOG(INFO) << "INFO: GradFusionOptimizer::Optimize end."; return Status::OK(); } diff --git a/tf_adapter/python/npu_bridge/hccl/hccl_ops.py b/tf_adapter/python/npu_bridge/hccl/hccl_ops.py index 3926ac2c6..f9e60bf73 100644 --- a/tf_adapter/python/npu_bridge/hccl/hccl_ops.py +++ b/tf_adapter/python/npu_bridge/hccl/hccl_ops.py @@ -52,7 +52,7 @@ def allgather(tensor, rank_size, group="hccl_world_group"): # @param fusion_id int类型,算子融合索引标识,相同fusion_id的算子将会融合。 # @param group string类型,group名称,可以为用户自定义group或者"hccl_world_group"; # @return 对输入tensor执行完broadcast操作之后的结果tensor -def broadcast(tensor, root_rank, fusion=0, fusion_id=-1, group="hccl_world_group"): +def broadcast(tensor, root_rank, fusion=2, fusion_id=0, group="hccl_world_group"): result = gen_hccl_ops.hcom_broadcast( input=tensor, fusion=fusion, -- Gitee From b02681b9e45450dd61a82091ef97153480004a42 Mon Sep 17 00:00:00 2001 From: yanqingshang Date: Mon, 28 Dec 2020 20:18:13 +0800 Subject: [PATCH 2/3] sync code --- tf_adapter/optimizers/gradient_fusion_optimizer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf_adapter/optimizers/gradient_fusion_optimizer.cc b/tf_adapter/optimizers/gradient_fusion_optimizer.cc index 1e1612602..4de208b4f 100644 --- a/tf_adapter/optimizers/gradient_fusion_optimizer.cc +++ b/tf_adapter/optimizers/gradient_fusion_optimizer.cc @@ -244,7 +244,7 @@ int64 GradFusionOptimizer::GetFusionTensorSize() { Status GradFusionOptimizer::Optimize(Cluster *cluster, const GrapplerItem &item, GraphDef *optimizedGraph) { LOG(INFO) << "INFO: GradFusionOptimizer::Optimize begin."; REQUIRES_NOT_NULL(optimizedGraph); - graphOrigin = *optimizedGraph; + *optimizedGraph = item.graph; LOG(INFO) << "INFO: GradFusionOptimizer::Optimize end."; return Status::OK(); -- Gitee From 63992fb3ed54ae6742d6ac0eca57d9403514c737 Mon Sep 17 00:00:00 2001 From: yanqingshang Date: Tue, 29 Dec 2020 10:59:35 +0800 Subject: [PATCH 3/3] add hccl gradient brocast --- tf_adapter/interface_spec/api_hccl_ops.pyh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf_adapter/interface_spec/api_hccl_ops.pyh b/tf_adapter/interface_spec/api_hccl_ops.pyh index e5f8c54a2..853de9060 100644 --- a/tf_adapter/interface_spec/api_hccl_ops.pyh +++ b/tf_adapter/interface_spec/api_hccl_ops.pyh @@ -1,7 +1,7 @@ # source file:./python/npu_bridge/hccl/hccl_ops.py def allreduce(tensor, reduction, fusion=1, fusion_id=-1, group="hccl_world_group"): def allgather(tensor, rank_size, group="hccl_world_group"): -def broadcast(tensor, root_rank, fusion=0, fusion_id=-1, group="hccl_world_group"): +def broadcast(tensor, root_rank, fusion=2, fusion_id=0, group="hccl_world_group"): def reduce(tensor, reduction, root_rank, fusion=0, fusion_id=-1, group="hccl_world_group"): def reduce_scatter(tensor, reduction, rank_size, group="hccl_world_group"): def send(tensor, sr_tag, dest_rank, group="hccl_world_group"): -- Gitee