diff --git a/src/mapleall/maple_be/BUILD.gn b/src/mapleall/maple_be/BUILD.gn index 461e7eefdfc9b069eb508554ceef4c6ceaf9ddd9..e7ea361513b662ba54ff8291fa04c472cb80194e 100644 --- a/src/mapleall/maple_be/BUILD.gn +++ b/src/mapleall/maple_be/BUILD.gn @@ -223,6 +223,7 @@ src_libcgphases = [ "src/cg/cg_occur.cpp", "src/cg/cg_ssu_pre.cpp", "src/cg/cg_ssa_pre.cpp", + "src/cg/cg_mc_ssa_pre.cpp", "src/cg/cg_pgo_gen.cpp", "src/cg/cg_pgo_use.cpp", "src/cg/cg_profile_use.cpp", diff --git a/src/mapleall/maple_be/CMakeLists.txt b/src/mapleall/maple_be/CMakeLists.txt index 836dbfd396c0caa0dd59180314877570331b3779..26eb5cf90b7dc799d4915fc0c1ac83a62aea17f2 100755 --- a/src/mapleall/maple_be/CMakeLists.txt +++ b/src/mapleall/maple_be/CMakeLists.txt @@ -114,6 +114,7 @@ if(${TARGET} STREQUAL "aarch64" OR ${TARGET} STREQUAL "aarch64_ilp32") src/cg/tailcall.cpp src/cg/cg_pgo_gen.cpp src/cg/cg_pgo_use.cpp + src/cg/cg_profile_use.cpp src/cg/cg_ssa.cpp src/cg/cg_prop.cpp src/cg/cg_dce.cpp @@ -131,6 +132,7 @@ if(${TARGET} STREQUAL "aarch64" OR ${TARGET} STREQUAL "aarch64_ilp32") src/cg/cg_occur.cpp src/cg/cg_ssu_pre.cpp src/cg/cg_ssa_pre.cpp + src/cg/cg_mc_ssa_pre.cpp src/cg/regsaves.cpp src/cg/cg_critical_edge.cpp src/cg/alignment.cpp diff --git a/src/mapleall/maple_be/include/cg/cg_mc_ssa_pre.h b/src/mapleall/maple_be/include/cg/cg_mc_ssa_pre.h new file mode 100644 index 0000000000000000000000000000000000000000..1120716a79a02968f7735214f44fe685428ca291 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/cg_mc_ssa_pre.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) [2023] Futurewei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_CG_INCLUDE_CG_MC_SSU_PRE_H +#define MAPLEBE_CG_INCLUDE_CG_MC_SSU_PRE_H + +#include "cg_ssa_pre.h" + +namespace maplebe { + +extern void DoProfileGuidedSavePlacement(CGFunc *f, DomAnalysis *dom, SsaPreWorkCand *workCand); + +// for representing a node in the reduced SSA graph +class RGNode { + friend class McSSAPre; + friend class Visit; + public: + RGNode(MapleAllocator *alloc, uint32 idx, Occ *oc) : id(idx), occ(oc), + pred(alloc->Adapter()), + phiOpndIndices(alloc->Adapter()), + inEdgesCap(alloc->Adapter()), + usedCap(alloc->Adapter()) {} + private: + uint32 id; + Occ *occ; + MapleVector pred; + MapleVector phiOpndIndices; // only applicable if occ is a phi + MapleVector inEdgesCap; // capacity of incoming edges + MapleVector usedCap; // used flow value of outgoing edges +}; + +// designate a visited node and the next outgoing edge to take +class Visit { + friend class McSSAPre; + private: + Visit(RGNode *nd, uint32 idx) : node(nd), predIdx(idx) {} + RGNode *node; + uint32 predIdx; // the index in node's pred + + FreqType AvailableCapacity() const { return node->inEdgesCap[predIdx] - node->usedCap[predIdx]; } + void IncreUsedCapacity(FreqType val) { node->usedCap[predIdx] += val; } + bool operator==(const Visit *rhs) const { return node == rhs->node && predIdx == rhs->predIdx; } +}; + +// for representing a flow path from source to sink +class Route { + friend class McSSAPre; + public: + Route(MapleAllocator *alloc) : visits(alloc->Adapter()) {} + private: + MapleVector visits; + FreqType flowValue = 0; +}; + +class McSSAPre : public SSAPre { + public: + McSSAPre(CGFunc *cgfunc, DomAnalysis *dm, MemPool *memPool, SsaPreWorkCand *wkcand, bool aeap, bool enDebug) : + SSAPre(cgfunc, dm, memPool, wkcand, aeap, enDebug), + occ2RGNodeMap(preAllocator.Adapter()), + maxFlowRoutes(preAllocator.Adapter()), + minCut(preAllocator.Adapter()) {} + ~McSSAPre() = default; + + void ApplyMCSSAPre(); + private: + // step 8 willbeavail + void ResetMCWillBeAvail(PhiOcc *phiOcc) const; + void ComputeMCWillBeAvail() const; + // step 7 max flow/min cut + bool AmongMinCut(RGNode *, uint32 idx) const; + void DumpRGToFile(); // dump reduced graph to dot file + bool IncludedEarlier(Visit **cut, Visit *curVisit, uint32 nextRouteIdx); + void RemoveRouteNodesFromCutSet(std::unordered_multiset &cutSet, Route *route); + bool SearchRelaxedMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, FreqType flowSoFar); + bool SearchMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, FreqType flowSoFar); + void DetermineMinCut(); + bool VisitANode(RGNode *node, Route *route, std::vector &visitedNodes); + bool FindAnotherRoute(); + void FindMaxFlow(); + // step 6 single sink + void AddSingleSink(); + // step 5 single source + void AddSingleSource(); + // step 4 graph reduction + void GraphReduction(); + // step 3 data flow methods + void SetPartialAnt(PhiOpndOcc *phiOpnd) const; + void ComputePartialAnt() const; + void ResetFullAvail(PhiOcc *occ) const; + void ComputeFullAvail() const; + + MapleUnorderedMap occ2RGNodeMap; + RGNode *source = nullptr; + RGNode *sink = nullptr; + uint32 numSourceEdges = 0; + MapleVector maxFlowRoutes; + uint32 nextRGNodeId = 1; // 0 is reserved + FreqType maxFlowValue; + FreqType relaxedMaxFlowValue; // relax maxFlowValue to avoid excessive mincut search time when number of routes is large + MapleVector minCut; // an array of Visits* to represent the minCut +}; + +}; // namespace maplebe +#endif // MAPLEBE_CG_INCLUDE_CG_MC_SSA_PRE_H diff --git a/src/mapleall/maple_be/include/cg/cg_ssa_pre.h b/src/mapleall/maple_be/include/cg/cg_ssa_pre.h index 988b979456fce3f9e0f351aa06463976d2f95004..7f56d9dc2a4fc424daeff4b76773bfae8848411f 100644 --- a/src/mapleall/maple_be/include/cg/cg_ssa_pre.h +++ b/src/mapleall/maple_be/include/cg/cg_ssa_pre.h @@ -33,13 +33,15 @@ using BBId = uint32; // caller by setting saveAtEntryBBs. class SsaPreWorkCand { public: - explicit SsaPreWorkCand(MapleAllocator *alloc) : occBBs(alloc->Adapter()), saveAtEntryBBs(alloc->Adapter()) {} + static uint32 workCandIDNext; // for assigning ID starting from 1 (0 is reserved) + explicit SsaPreWorkCand(MapleAllocator *alloc) : occBBs(alloc->Adapter()), saveAtEntryBBs(alloc->Adapter()), workCandID(++workCandIDNext) {} // inputs MapleSet occBBs; // Id's of BBs with appearances of the callee-saved reg // outputs MapleSet saveAtEntryBBs; // Id's of BBs to insert saves of the register at BB entry bool saveAtProlog = false; // if true, no shrinkwrapping can be done and // the other outputs can be ignored + uint32 workCandID; }; extern void DoSavePlacementOpt(CGFunc *f, DomAnalysis *dom, SsaPreWorkCand *workCand); @@ -79,6 +81,7 @@ class RealOcc : public Occ { } bool redundant = true; + bool rgExcluded = false; // reduced-graph-excluded, used only by mc-ssapre }; class PhiOcc; @@ -95,6 +98,7 @@ class PhiOpndOcc : public Occ { PhiOcc *defPhiOcc = nullptr; // its lhs definition bool hasRealUse = false; bool insertHere = false; + bool isMCInsert = false; // used only in mc-ssapre }; class PhiOcc : public Occ { @@ -122,6 +126,9 @@ class PhiOcc : public Occ { bool speculativeDownsafe = false; // true if set to downsafe via speculation bool isCanBeAvail = true; bool isLater = true; + bool isFullyAvail = true; // used only in mc-ssapre + bool isPartialAnt = false; // used only in mc-ssapre + bool isMCWillBeAvail = true; // used only in mc-ssapre MapleVector phiOpnds; }; @@ -156,10 +163,16 @@ class SSAPre { void ApplySSAPre(); - private: + protected: // step 6 methods void CodeMotion(); // step 5 methods + bool WillBeAvail(const PhiOcc *phiOcc) const { + if (!doMinCut) { + return phiOcc->WillBeAvail(); + } + return phiOcc->isMCWillBeAvail; + } void Finalize(); // step 4 methods void ResetCanBeAvail(PhiOcc *phi) const; @@ -203,6 +216,7 @@ class SSAPre { MapleVector exitOccs; bool asEarlyAsPossible; bool enabledDebug; + bool doMinCut = false; }; }; // namespace maplabe diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_regsaves.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_regsaves.cpp index 07b4dbd102d55597baafa0017f2a1df5e0260989..b1ac3f702398065e062b6d5c39655303b33452d7 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_regsaves.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_regsaves.cpp @@ -19,6 +19,7 @@ #include "aarch64_proepilog.h" #include "cg_dominance.h" #include "cg_ssa_pre.h" +#include "cg_mc_ssa_pre.h" #include "cg_ssu_pre.h" namespace maplebe { @@ -460,7 +461,11 @@ void AArch64RegSavesOpt::DetermineCalleeSaveLocationsPre() { } } } - DoSavePlacementOpt(cgFunc, GetDomInfo(), &wkCand); + if (cgFunc->GetFunction().GetFuncProfData() == nullptr) { + DoSavePlacementOpt(cgFunc, GetDomInfo(), &wkCand); + } else { + DoProfileGuidedSavePlacement(cgFunc, GetDomInfo(), &wkCand); + } if (wkCand.saveAtEntryBBs.empty()) { /* something gone wrong, skip this reg */ wkCand.saveAtProlog = true; diff --git a/src/mapleall/maple_be/src/cg/cg_mc_ssa_pre.cpp b/src/mapleall/maple_be/src/cg/cg_mc_ssa_pre.cpp new file mode 100644 index 0000000000000000000000000000000000000000..915aa0ebf0014eb5820107a1eb658654e2c93db6 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/cg_mc_ssa_pre.cpp @@ -0,0 +1,722 @@ +/* + * Copyright (c) [2023] Futureweiwei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "cgfunc.h" +#include "cg_mc_ssa_pre.h" + +namespace maplebe { + +constexpr int kFuncNameLenLimit = 80; + +// ================ Step 8: WillBeAvail ================= + +void McSSAPre::ResetMCWillBeAvail(PhiOcc *occ) const { + if (!occ->isMCWillBeAvail) { + return; + } + occ->isMCWillBeAvail = false; + for (PhiOcc *phiOcc : phiOccs) { + if (!phiOcc->isMCWillBeAvail) { + continue; + } + for (PhiOpndOcc *phiOpnd : phiOcc->phiOpnds) { + if (phiOpnd->def != nullptr && phiOpnd->def == occ) { + // phiOpnd is a use of occ + if (!phiOpnd->hasRealUse && !phiOpnd->isMCInsert) { + ResetMCWillBeAvail(phiOcc); + break; + } + } + } + } +} + +void McSSAPre::ComputeMCWillBeAvail() const { + if (minCut.size() == 0) { + for (PhiOcc *phiOcc : phiOccs) { + phiOcc->isMCWillBeAvail = phiOcc->isFullyAvail; + } + return; + } + // set insert in phi operands + for (Visit *visit : minCut) { + Occ *occ = visit->node->occ; + if (occ->occTy == kAOccPhi) { + PhiOcc *phiOcc = static_cast(occ); + uint32 phiOpndIndex = visit->node->phiOpndIndices[visit->predIdx]; + PhiOpndOcc *phiOpndOcc = phiOcc->phiOpnds[phiOpndIndex]; + phiOpndOcc->isMCInsert = true; + } + } + for (PhiOcc *phiOcc : phiOccs) { + for (PhiOpndOcc *phiOpnd : phiOcc->phiOpnds) { + if (phiOpnd->def == nullptr && !phiOpnd->isMCInsert) { + ResetMCWillBeAvail(phiOcc); + break; + } + } + } +} + +// ================ Step 7: Max Flow / Min Cut ================= + +bool McSSAPre::AmongMinCut(RGNode *nd, uint32 idx) const { + for (Visit *visit : minCut) { + if (visit->node == nd && visit->predIdx == idx) { + return true; + } + } + return false; +} + +void McSSAPre::DumpRGToFile() { + if (sink == nullptr) { + return; + } + std::string fileName = "rg-of-cand-"; + fileName.append(std::to_string(workCand->workCandID)); + fileName.append("-"); + const std::string &funcName = cgFunc->GetFunction().GetName(); + if (funcName.size() < kFuncNameLenLimit) { + fileName.append(funcName); + } else { + fileName.append(funcName.c_str(), kFuncNameLenLimit); + } + fileName.append(".dot"); + std::ofstream rgFile; + std::streambuf *coutBuf = LogInfo::MapleLogger().rdbuf(); // keep original cout buffer + std::streambuf *buf = rgFile.rdbuf(); + LogInfo::MapleLogger().rdbuf(buf); + rgFile.open(fileName, std::ios::trunc); + rgFile << "digraph {\n"; + for (int32 i = 0; i < sink->pred.size(); i++) { + RGNode *pre = sink->pred[i]; + rgFile << "real" << pre->id << " -> " << "\"sink\nmaxflow " << maxFlowValue << "\";\n"; + } + MapleUnorderedMap::iterator it = occ2RGNodeMap.begin(); + for (; it != occ2RGNodeMap.end(); it++) { + RGNode *rgNode = it->second; + for (int32 i = 0; i < rgNode->pred.size(); i++) { + RGNode *pre = rgNode->pred[i]; + if (pre != source) { + if (pre->occ->occTy == kAOccPhi) { + rgFile << "phi" << pre->id << " -> "; + } else { + rgFile << "real" << pre->id << " -> "; + } + if (rgNode->occ->occTy == kAOccPhi) { + rgFile << "phi" << rgNode->id; + } else { + rgFile << "real" << rgNode->id; + } + } else { + rgFile << "source" << " -> " << "phi" << rgNode->id; + } + if (AmongMinCut(rgNode, i)) { + rgFile << "[style=dotted][color=red]"; + } + if (rgNode->usedCap[i] == 0) { + rgFile << "[style=dashed][color=green]"; + } + rgFile << "[label=\"" << rgNode->usedCap[i] << "|" << rgNode->inEdgesCap[i] << "\"];\n"; + } + } + rgFile << "}\n"; + rgFile.flush(); + rgFile.close(); + LogInfo::MapleLogger().rdbuf(coutBuf); + LogInfo::MapleLogger() << "++++ ssapre candidate " << workCand->workCandID << " dumped to " << fileName << "\n"; +} + +bool McSSAPre::IncludedEarlier(Visit **cut, Visit *curVisit, uint32 nextRouteIdx) { + uint32 i = nextRouteIdx; + while (i != 0) { + i--; + if (cut[i]->node == curVisit->node && cut[i]->predIdx == curVisit->predIdx) { + return true; + } + } + return false; +} + +// remove this route's nodes from cutSet +void McSSAPre::RemoveRouteNodesFromCutSet(std::unordered_multiset &cutSet, Route *route) { + for (uint32 i = 1; i < route->visits.size(); i++) { + Visit &curVisit = route->visits[i]; + std::unordered_multiset::iterator it = cutSet.find(curVisit.node->id); + ASSERT(it != cutSet.end(), "cutSet maintenance error"); + cutSet.erase(it); + } +} + +// find the cut closest to the sink whose total flow is relaxedMaxFlowValue +bool McSSAPre::SearchRelaxedMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, FreqType flowSoFar) { + Route *curRoute = maxFlowRoutes[nextRouteIdx]; + Visit *curVisit = nullptr; + + // determine starting value of visitIdx: start searching back from route end; + // if any node is in cutSet, set visitIdx as that nodes's index in route; + // otherwise, set visitIdx to 0 + uint32 visitIdx = curRoute->visits.size(); + do { + visitIdx--; + if (cutSet.count(curRoute->visits[visitIdx].node->id) != 0) { + break; + } + } while (visitIdx != 1); + // update cutSet with visited nodes lower than visitIdx + if (visitIdx != 1) { + for (uint i = visitIdx - 1; i > 0; i--) { + cutSet.insert(curRoute->visits[i].node->id); + } + } + + bool success = false; + do { + if (visitIdx == curRoute->visits.size()) { + RemoveRouteNodesFromCutSet(cutSet, curRoute); + return false; + } + curVisit = &curRoute->visits[visitIdx]; + FreqType visitCap = curVisit->node->inEdgesCap[curVisit->predIdx]; + cut[nextRouteIdx] = curVisit; + if (visitIdx != 0) { + cutSet.insert(curVisit->node->id); + } + if (IncludedEarlier(cut, curVisit, nextRouteIdx)) { + visitCap = 0; + } + success = (flowSoFar + visitCap <= relaxedMaxFlowValue); + if (success && nextRouteIdx != (maxFlowRoutes.size() - 1)) { + success = SearchRelaxedMinCut(cut, cutSet, nextRouteIdx+1, flowSoFar + visitCap); + } + visitIdx++; + } while (!success); + return true; +} + +// find the cut closest to the sink whose total flow is maxFlowValue +bool McSSAPre::SearchMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, FreqType flowSoFar) { + Route *curRoute = maxFlowRoutes[nextRouteIdx]; + Visit *curVisit = nullptr; + + // determine starting value of visitIdx: start searching back from route end; + // if any node is in cutSet, set visitIdx as that nodes's index in route; + // otherwise, set visitIdx to 0 + uint32 visitIdx = curRoute->visits.size(); + do { + visitIdx--; + if (cutSet.count(curRoute->visits[visitIdx].node->id) != 0) { + break; + } + } while (visitIdx != 1); + // update cutSet with visited nodes lower than visitIdx + if (visitIdx != 1) { + for (uint i = visitIdx - 1; i > 0; i--) { + cutSet.insert(curRoute->visits[i].node->id); + } + } + + bool success = false; + do { + if (visitIdx == curRoute->visits.size()) { + RemoveRouteNodesFromCutSet(cutSet, curRoute); + return false; + } + curVisit = &curRoute->visits[visitIdx]; + FreqType visitCap = curVisit->node->inEdgesCap[curVisit->predIdx]; + FreqType usedCap = curVisit->node->usedCap[curVisit->predIdx]; + if (visitCap != usedCap) { + if (visitIdx != 0) { + cutSet.insert(curVisit->node->id); + } + visitIdx++; + continue; + } + cut[nextRouteIdx] = curVisit; + if (visitIdx != 0) { + cutSet.insert(curVisit->node->id); + } + if (IncludedEarlier(cut, curVisit, nextRouteIdx)) { + visitCap = 0; + } + success = (flowSoFar + visitCap <= maxFlowValue); + if (success && nextRouteIdx != (maxFlowRoutes.size() - 1)) { + success = SearchMinCut(cut, cutSet, nextRouteIdx+1, flowSoFar + visitCap); + } + visitIdx++; + } while (!success); + return true; +} + +void McSSAPre::DetermineMinCut() { + if (maxFlowRoutes.empty()) { + if (enabledDebug) { + DumpRGToFile(); + } + return; + } + // maximum width of the min cut is the number of routes in maxFlowRoutes + Visit* cut[maxFlowRoutes.size()]; + std::unordered_multiset cutSet; // key is RGNode's id; must be kept in sync with cut[]; sink node is not entered + constexpr double defaultRelaxScaling = 1.25; + relaxedMaxFlowValue = static_cast(static_cast(maxFlowValue) * defaultRelaxScaling); + bool relaxedSearch = false; + if (maxFlowRoutes.size() >= 20) { + // apply arbitrary heuristics to reduce search time + relaxedSearch = true; + relaxedMaxFlowValue = maxFlowValue * (maxFlowRoutes.size() / 10); + } + bool success = !relaxedSearch && SearchMinCut(cut, cutSet, 0, 0); + if (!success) { + relaxedSearch = true; + success = SearchRelaxedMinCut(cut, cutSet, 0, 0); + } + if (!success) { + if (enabledDebug) { + LogInfo::MapleLogger() << "MinCut failed\n"; + DumpRGToFile(); + } + CHECK_FATAL(false, "McSSAPre::DetermineMinCut: failed to find min cut"); + } + // sort cut + std::sort(cut, cut+maxFlowRoutes.size(), [](const Visit *left, const Visit *right) { + return (left->node != right->node) ? (left->node->id < right->node->id) + : (left->predIdx < right->predIdx); }); + // remove duplicates in the cut to form mincut + minCut.push_back(cut[0]); + size_t duplicatedVisits = 0; + for (uint32 i = 1; i < maxFlowRoutes.size(); i++) { + if (cut[i] != cut[i-1]) { + minCut.push_back(cut[i]); + } else { + duplicatedVisits++; + } + } + if (enabledDebug) { + LogInfo::MapleLogger() << "finished "; + if (relaxedSearch) { + LogInfo::MapleLogger() << "relaxed "; + } + LogInfo::MapleLogger() << "MinCut\n"; + DumpRGToFile(); + if (duplicatedVisits != 0) { + LogInfo::MapleLogger() << duplicatedVisits << " duplicated visits in mincut\n"; + } + } +} + +bool McSSAPre::VisitANode(RGNode *node, Route *route, std::vector &visitedNodes) { + ASSERT(node->pred.size() != 0 , "McSSAPre::VisitANode: no connection to source node"); + // if any pred is the source and there's capacity to reach it, return success + for (uint32 i = 0; i < node->pred.size(); i++) { + if (node->pred[i] == source && node->inEdgesCap[i] > node->usedCap[i]) { + // if there is another pred never taken that also reaches source, use that instead + for (uint32 k = i + 1; k < node->pred.size(); k++) { + if (node->pred[k] == source && node->usedCap[k] == 0 && node->inEdgesCap[k] > 0) { + route->visits.push_back(Visit(node, k)); + return true; + } + } + route->visits.push_back(Visit(node, i)); + return true; + } + } + + // pick a never-taken predecessor path first + for (uint32 i = 0; i < node->pred.size(); i++) { + if (node->usedCap[i] == 0 && node->inEdgesCap[i] > 0 && !visitedNodes[node->pred[i]->id]) { + route->visits.push_back(Visit(node, i)); + visitedNodes[node->pred[i]->id] = true; + bool success = VisitANode(node->pred[i], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + return true; + } + } + } + + size_t numPreds = node->pred.size(); + uint32 sortedPred[numPreds]; + for (uint32 i = 0; i < numPreds; i++) { + sortedPred[i] = i; + } + // put sortedPred[] in increasing order of capacities + std::sort(sortedPred, sortedPred+numPreds, [node](uint32 m, uint32 n) { + return node->inEdgesCap[m] < node->inEdgesCap[n]; }); + // for this round, prefer predecessor with higher unused capacity + for (uint32 i = 0; i < numPreds; i++) { + uint32 j = sortedPred[i]; + if (!visitedNodes[node->pred[j]->id] && node->inEdgesCap[j] > node->usedCap[j]) { + route->visits.push_back(Visit(node, j)); + visitedNodes[node->pred[j]->id] = true; + bool success = VisitANode(node->pred[j], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + return true; + } + } + } + return false; +} + +// return false if not successful; if successful, the new route will be pushed +// to maxFlowRoutes +bool McSSAPre::FindAnotherRoute() { + std::vector visitedNodes(occ2RGNodeMap.size() + 1, false); + Route *route = preMp->New(&preAllocator); + bool success = false; + // pick an untaken sink predecessor first + for (int32 i = 0; i < sink->pred.size(); i++) { + if (sink->usedCap[i] == 0) { + route->visits.push_back(Visit(sink, i)); + visitedNodes[sink->pred[i]->id] = true; + success = VisitANode(sink->pred[i], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + break; + } + } + } + if (!success) { + // now, pick any sink predecessor + for (int32 i = 0; i < sink->pred.size(); i++) { + route->visits.push_back(Visit(sink, i)); + visitedNodes[sink->pred[i]->id] = true; + success = VisitANode(sink->pred[i], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + break; + } + } + } + if (!success) { + return false; + } + // find bottleneck capacity along route + FreqType minAvailCap = route->visits[0].AvailableCapacity(); + for (int32 i = 1; i < route->visits.size(); i++) { + FreqType curAvailCap = route->visits[i].AvailableCapacity(); + minAvailCap = std::min(minAvailCap, curAvailCap); + } + route->flowValue = minAvailCap; + // update usedCap along route + for (int32 i = 0; i < route->visits.size(); i++) { + route->visits[i].IncreUsedCapacity(minAvailCap); + } + maxFlowRoutes.push_back(route); + return true; +} + +void McSSAPre::FindMaxFlow() { + if (sink == nullptr) { + return; + } + maxFlowValue = 0; + bool found; + do { + found = FindAnotherRoute(); + } while (found); + // calculate maxFlowValue; + for (Route *route : maxFlowRoutes) { + maxFlowValue += route->flowValue; + } + if (enabledDebug) { + LogInfo::MapleLogger() << "++++ FindMaxFlow found " << maxFlowRoutes.size() << " routes\n"; + for (size_t i = 0; i < maxFlowRoutes.size(); i++) { + Route *route = maxFlowRoutes[i]; + LogInfo::MapleLogger() << "route " << i << " sink:pred" << route->visits[0].predIdx; + for (size_t j = 1; j < route->visits.size(); j++) { + if (route->visits[j].node->occ->occTy == kAOccPhi) { + LogInfo::MapleLogger() << " phi"; + } else { + LogInfo::MapleLogger() << " real"; + } + LogInfo::MapleLogger() << route->visits[j].node->id << ":pred" << route->visits[j].predIdx; + } + LogInfo::MapleLogger() << " flowValue " << route->flowValue; + LogInfo::MapleLogger() << "\n"; + } + LogInfo::MapleLogger() << "maxFlowValue is " << maxFlowValue << "\n"; + } +} + +// ================ Step 6: Add Single Sink ================= + +void McSSAPre::AddSingleSink() { + if (numSourceEdges == 0) { + return; // empty reduced graph + } + sink = preMp->New(&preAllocator, nextRGNodeId++, nullptr); + size_t numToSink = 0; + MapleUnorderedMap::iterator it = occ2RGNodeMap.begin(); + for (; it != occ2RGNodeMap.end(); it++) { + if (it->first->occTy != kAOccReal) { + continue; + } + RGNode *use = it->second; + // add edge from this use node to sink + sink->pred.push_back(use); + sink->inEdgesCap.push_back(INT64_MAX); + sink->usedCap.push_back(0); + numToSink++; + } + ASSERT(numToSink != 0, "McSSAPre::AddSingleSink: found 0 edge to sink"); + if (enabledDebug) { + LogInfo::MapleLogger() << "++++ has " << numToSink << " edges to sink\n"; + } +} + +// ================ Step 5: Add Single Source ================= +void McSSAPre::AddSingleSource() { + source = preMp->New(&preAllocator, nextRGNodeId++, nullptr); + for (PhiOcc *phiOcc : phiOccs) { + if (phiOcc->isPartialAnt && !phiOcc->isFullyAvail) { + // look for null operands + MapleList::iterator it = phiOcc->cgbb->GetPredsBegin(); + uint32 i; // index in phiOcc's phiOpnds + for (i = 0; i < phiOcc->phiOpnds.size(); i++, it++) { + PhiOpndOcc *phiopndOcc = phiOcc->phiOpnds[i]; + if (phiopndOcc->def != nullptr) { + continue; + } + // add edge from source to this phi node + RGNode *sucNode = occ2RGNodeMap[phiOcc]; + sucNode->pred.push_back(source); + sucNode->phiOpndIndices.push_back(i); + sucNode->inEdgesCap.push_back((*it)->GetProfFreq()+1); + sucNode->usedCap.push_back(0); + numSourceEdges++; + } + } + } + if (enabledDebug) { + if (numSourceEdges == 0) { + LogInfo::MapleLogger() << "++++ has empty reduced graph\n"; + } else { + LogInfo::MapleLogger() << "++++ source has " << numSourceEdges << " succs\n"; + } + } +} + +// ================ Step 4: Graph Reduction ================= +void McSSAPre::GraphReduction() { + size_t numPhis = 0; + size_t numRealOccs = 0; + size_t numType1Edges = 0; + size_t numType2Edges = 0; + // add def nodes + for (PhiOcc *phiOcc : phiOccs) { + if (phiOcc->isPartialAnt && !phiOcc->isFullyAvail) { + RGNode *newRGNode = preMp->New(&preAllocator, nextRGNodeId++, phiOcc); + occ2RGNodeMap.insert(std::pair(phiOcc, newRGNode)); + numPhis++; + } + } + if (occ2RGNodeMap.empty()) { + return; + } + // add use nodes and use-def edges + for (Occ *occ : allOccs) { + if (occ->occTy == kAOccReal) { + RealOcc *realOcc = static_cast(occ); + if (!realOcc->rgExcluded && realOcc->def != nullptr) { + Occ *defOcc = realOcc->def; + ASSERT(defOcc->occTy == kAOccPhi, "McSSAPre::GraphReduction: real occ not defined by phi"); + if (occ2RGNodeMap.find(defOcc) != occ2RGNodeMap.end()) { + RGNode *use = preMp->New(&preAllocator, nextRGNodeId++, realOcc); + occ2RGNodeMap[realOcc] = use; + numRealOccs++; + RGNode *def = occ2RGNodeMap[defOcc]; + use->pred.push_back(def); + use->inEdgesCap.push_back(realOcc->cgbb->GetProfFreq()+1); + use->usedCap.push_back(0); + numType2Edges++; + } + } + } else if (occ->occTy == kAOccPhiOpnd) { + PhiOpndOcc *phiopndOcc = static_cast(occ); + PhiOcc *defPhiOcc = phiopndOcc->defPhiOcc; + if (defPhiOcc->isPartialAnt && !defPhiOcc->isFullyAvail) { + // defPhiOcc is the use node and it has already been added + Occ *defOcc = phiopndOcc->def; + if (defOcc != nullptr && defOcc->occTy == kAOccPhi && + static_cast(defOcc)->isPartialAnt && + !static_cast(defOcc)->isFullyAvail) { + ASSERT(occ2RGNodeMap.find(defOcc) != occ2RGNodeMap.end(), "McSSAPre::GraphReduction: def node not found"); + RGNode *def = occ2RGNodeMap[defOcc]; + RGNode *use = occ2RGNodeMap[defPhiOcc]; + use->pred.push_back(def); + // find the pred bb (pointed to by it) that corresponds to phiopndOcc + MapleList::iterator it = defPhiOcc->cgbb->GetPredsBegin(); + uint32 i; // index in defPhiOcc's phiOpnds + for (i = 0; i < defPhiOcc->phiOpnds.size(); i++, it++) { + if (defPhiOcc->phiOpnds[i] == phiopndOcc) { + break; + } + } + use->phiOpndIndices.push_back(i); + ASSERT(i != defPhiOcc->phiOpnds.size(), "McSSAPre::GraphReduction: cannot find corresponding phi opnd"); + use->inEdgesCap.push_back((*it)->GetProfFreq()+1); + use->usedCap.push_back(0); + numType1Edges++; + } + } + } + } + if (enabledDebug) { + LogInfo::MapleLogger() << " _______ after GraphReduction, phis: " << numPhis << " reals: " << numRealOccs + << " type 1 edges: " << numType1Edges << " type 2 edges: " << numType2Edges << "\n"; + } +} + +// ================ Step 3: Data Flow Computations ================= + +// set partial anticipation +void McSSAPre::SetPartialAnt(PhiOpndOcc *phiOpnd) const { + Occ *defOcc = phiOpnd->def; + if (defOcc == nullptr || defOcc->occTy != kAOccPhi) { + return; + } + PhiOcc *defPhiOcc = static_cast(defOcc); + if (defPhiOcc->isPartialAnt) { + return; + } + defPhiOcc->isPartialAnt = true; + for (PhiOpndOcc *phiOpnd : defPhiOcc->phiOpnds) { + SetPartialAnt(phiOpnd); + } +} + +// compute partial anticipation for each PHI +void McSSAPre::ComputePartialAnt() const { + for (PhiOcc *phiOcc : phiOccs) { + if (phiOcc->isPartialAnt) { + // propagate partialAnt along use-def edges + for (PhiOpndOcc *phiOpnd : phiOcc->phiOpnds) { + SetPartialAnt(phiOpnd); + } + } + } + if (enabledDebug) { + LogInfo::MapleLogger() << " _______ after PartialAnt _______\n"; + for (PhiOcc *phiOcc : phiOccs) { + phiOcc->Dump(); + if (phiOcc->isPartialAnt) { + LogInfo::MapleLogger() << " is partialant\n"; + } else { + LogInfo::MapleLogger() << " is not partialant\n"; + } + } + } +} + +void McSSAPre::ResetFullAvail(PhiOcc *occ) const { + if (!occ->isFullyAvail) { + return; + } + occ->isFullyAvail = false; + // reset those phiocc nodes that have occ as one of its operands + for (PhiOcc *phiOcc : phiOccs) { + if (!phiOcc->isFullyAvail) { + continue; + } + for (PhiOpndOcc *phiOpnd : phiOcc->phiOpnds) { + if (phiOpnd->def != nullptr && phiOpnd->def == occ) { + // phiOpnd is a use of occ + if (!phiOpnd->hasRealUse) { + ResetFullAvail(phiOcc); + break; + } + } + } + } +} + +void McSSAPre::ComputeFullAvail() const { + for (PhiOcc *phiOcc : phiOccs) { + // reset fullyavail if any phi operand is null + bool existNullDef = false; + for (PhiOpndOcc *phiOpnd : phiOcc->phiOpnds) { + if (phiOpnd->def == nullptr) { + existNullDef = true; + break; + } + } + if (existNullDef) { + ResetFullAvail(phiOcc); + } + } + if (enabledDebug) { + LogInfo::MapleLogger() << " _______ after FullyAvailable _______\n"; + for (PhiOcc *phiOcc : phiOccs) { + phiOcc->Dump(); + if (phiOcc->isFullyAvail) { + LogInfo::MapleLogger() << " is fullyavail\n"; + } else { + LogInfo::MapleLogger() << " is not fullyavail\n"; + } + } + } +} + +void McSSAPre::ApplyMCSSAPre() { + if (enabledDebug) { + LogInfo::MapleLogger() << "||||||| MC-SSAPRE candidate " << workCand->workCandID << "\n"; + } + doMinCut = true; + FormRealsNExits(); + // step 1 insert phis; results in allOccs and phiOccs + FormPhis(); // result put in the set phi_bbs + CreateSortedOccs(); + // step 2 rename + Rename(); + if (!phiOccs.empty()) { + // step 3 data flow methods + ComputeFullAvail(); + ComputePartialAnt(); + // step 4 graph reduction + GraphReduction(); + // step 5 single source + AddSingleSource(); + // step 6 single sink + AddSingleSink(); + // step 7 max flow/min cut + FindMaxFlow(); + DetermineMinCut(); + // step 8 willbeavail + ComputeMCWillBeAvail(); + } + // #5 Finalize + Finalize(); + if (!workCand->saveAtProlog) { + // #6 Code Motion + CodeMotion(); + } +} + +void DoProfileGuidedSavePlacement(CGFunc *f, DomAnalysis *dom, SsaPreWorkCand *workCand) { + MemPool *tempMP = memPoolCtrler.NewMemPool("cg_mc_ssa_pre", true); + McSSAPre cgssapre(f, dom, tempMP, workCand, false/*asEarlyAsPossible*/, false/*enabledDebug*/); + + cgssapre.ApplyMCSSAPre(); + + memPoolCtrler.DeleteMemPool(tempMP); +} + +} // namespace maplebe diff --git a/src/mapleall/maple_be/src/cg/cg_ssa_pre.cpp b/src/mapleall/maple_be/src/cg/cg_ssa_pre.cpp index 30e92d384122922eb234883ebe3beb9b1a2c9cb9..8aebcbc8030071d1fe4a6adbc34ece59d8a13353 100644 --- a/src/mapleall/maple_be/src/cg/cg_ssa_pre.cpp +++ b/src/mapleall/maple_be/src/cg/cg_ssa_pre.cpp @@ -18,6 +18,8 @@ namespace maplebe { +uint32 SsaPreWorkCand::workCandIDNext = 0; + // ================ Step 6: Code Motion ================ void SSAPre::CodeMotion() { // pass 1 only doing insertion @@ -27,7 +29,7 @@ void SSAPre::CodeMotion() { } PhiOpndOcc *phiOpndOcc = static_cast(occ); if (phiOpndOcc->insertHere) { - ASSERT(phiOpndOcc->cgbb->GetLoop() == nullptr, "cg_ssapre: save inserted inside loop"); + ASSERT(doMinCut || phiOpndOcc->cgbb->GetLoop() == nullptr, "cg_ssapre: save inserted inside loop"); workCand->saveAtEntryBBs.insert(phiOpndOcc->cgbb->GetId()); } } @@ -38,7 +40,7 @@ void SSAPre::CodeMotion() { } RealOcc *realOcc = static_cast(occ); if (!realOcc->redundant) { - ASSERT(realOcc->cgbb->GetLoop() == nullptr, "cg_ssapre: save in place inside loop"); + ASSERT(doMinCut || realOcc->cgbb->GetLoop() == nullptr, "cg_ssapre: save in place inside loop"); workCand->saveAtEntryBBs.insert(realOcc->cgbb->GetId()); } } @@ -62,7 +64,7 @@ void SSAPre::Finalize() { switch (occ->occTy) { case kAOccPhi: { PhiOcc *phiOcc = static_cast(occ); - if (phiOcc->WillBeAvail()) { + if (WillBeAvail(phiOcc)) { availDefVec[classId] = phiOcc; } break; @@ -80,10 +82,10 @@ void SSAPre::Finalize() { case kAOccPhiOpnd: { PhiOpndOcc *phiOpndOcc = static_cast(occ); const PhiOcc *phiOcc = phiOpndOcc->defPhiOcc; - if (phiOcc->WillBeAvail()) { + if (WillBeAvail(phiOcc)) { if (phiOpndOcc->def == nullptr || (!phiOpndOcc->hasRealUse && phiOpndOcc->def->occTy == kAOccPhi && - !static_cast(phiOpndOcc->def)->WillBeAvail())) { + !WillBeAvail(static_cast(phiOpndOcc->def)))) { // insert a store if (phiOpndOcc->cgbb->GetSuccs().size() != 1) { // critical edge workCand->saveAtProlog = true; @@ -311,12 +313,17 @@ void SSAPre::Rename() { } Occ *topOcc = occStack.top(); occ->classId = topOcc->classId; + occ->def = topOcc; if (topOcc->occTy == kAOccPhi) { occStack.push(occ); - if (occ->cgbb->GetLoop() != nullptr) { - static_cast(topOcc)->isDownsafe = true; - static_cast(topOcc)->speculativeDownsafe = true; + PhiOcc *phiTopOcc = static_cast(topOcc); + phiTopOcc->isPartialAnt = true; + if (!doMinCut && occ->cgbb->GetLoop() != nullptr) { + phiTopOcc->isDownsafe = true; + phiTopOcc->speculativeDownsafe = true; } + } else if (topOcc->occTy == kAOccReal) { + static_cast(occ)->rgExcluded = true; } break; } @@ -339,13 +346,15 @@ void SSAPre::Rename() { } } // loop thru phiOccs to propagate speculativeDownsafe - for (PhiOcc *phiOcc : phiOccs) { - if (phiOcc->speculativeDownsafe) { - for (PhiOpndOcc *phiOpndOcc : phiOcc->phiOpnds) { - if (phiOpndOcc->def != nullptr && phiOpndOcc->def->occTy == kAOccPhi) { - PhiOcc *nextPhiOcc = static_cast(phiOpndOcc->def); - if (nextPhiOcc->cgbb->GetLoop() != nullptr) { - PropagateSpeculativeDownsafe(nextPhiOcc); + if (!doMinCut) { + for (PhiOcc *phiOcc : phiOccs) { + if (phiOcc->speculativeDownsafe) { + for (PhiOpndOcc *phiOpndOcc : phiOcc->phiOpnds) { + if (phiOpndOcc->def != nullptr && phiOpndOcc->def->occTy == kAOccPhi) { + PhiOcc *nextPhiOcc = static_cast(phiOpndOcc->def); + if (nextPhiOcc->cgbb->GetLoop() != nullptr) { + PropagateSpeculativeDownsafe(nextPhiOcc); + } } } } @@ -358,7 +367,12 @@ void SSAPre::Rename() { if (occ->occTy == kAOccPhi) { PhiOcc *phiOcc = static_cast(occ); if (phiOcc->speculativeDownsafe) { - LogInfo::MapleLogger() << " spec_downsafe /"; + LogInfo::MapleLogger() << " spec_downsafe"; + } + } + if (occ->occTy == kAOccReal) { + if (static_cast(occ)->rgExcluded) { + LogInfo::MapleLogger() << " rgexcluded"; } } LogInfo::MapleLogger() << '\n';