From 90b2f7351cfd44bfc866d8b4bec31081ae78771c Mon Sep 17 00:00:00 2001 From: Fred Chow Date: Thu, 24 Nov 2022 09:24:35 -0800 Subject: [PATCH 1/6] Implemented min-cut version of SSAPRE used by epre phase under --profileUse --no-epreuseprofile will change epre back to the old version that does not use profile data --- src/mapleall/maple_me/BUILD.gn | 1 + src/mapleall/maple_me/include/mc_ssa_pre.h | 112 ++ src/mapleall/maple_me/include/me_option.h | 1 + src/mapleall/maple_me/include/me_options.h | 1 + src/mapleall/maple_me/include/occur.h | 41 +- src/mapleall/maple_me/include/ssa_epre.h | 6 +- src/mapleall/maple_me/include/ssa_pre.h | 9 +- src/mapleall/maple_me/src/mc_ssa_pre.cpp | 1022 +++++++++++++++++ src/mapleall/maple_me/src/me_option.cpp | 2 + src/mapleall/maple_me/src/me_options.cpp | 5 + src/mapleall/maple_me/src/me_ssa_epre.cpp | 9 +- src/mapleall/maple_me/src/me_stmt_pre.cpp | 2 +- src/mapleall/maple_me/src/occur.cpp | 19 +- src/mapleall/maple_me/src/ssa_pre.cpp | 58 +- .../maple_me/src/ssa_pre_for_hoist.cpp | 2 +- 15 files changed, 1247 insertions(+), 43 deletions(-) create mode 100644 src/mapleall/maple_me/include/mc_ssa_pre.h create mode 100644 src/mapleall/maple_me/src/mc_ssa_pre.cpp diff --git a/src/mapleall/maple_me/BUILD.gn b/src/mapleall/maple_me/BUILD.gn index f8bd053ab5..c614397f35 100755 --- a/src/mapleall/maple_me/BUILD.gn +++ b/src/mapleall/maple_me/BUILD.gn @@ -130,6 +130,7 @@ src_libmplme = [ "src/me_slp.cpp", "src/lmbc_memlayout.cpp", "src/lmbc_lower.cpp", + "src/mc_ssa_pre.cpp", ] src_libmplmewpo = [ diff --git a/src/mapleall/maple_me/include/mc_ssa_pre.h b/src/mapleall/maple_me/include/mc_ssa_pre.h new file mode 100644 index 0000000000..343b40b833 --- /dev/null +++ b/src/mapleall/maple_me/include/mc_ssa_pre.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLE_ME_INCLUDE_MC_SSAPRE_H +#define MAPLE_ME_INCLUDE_MC_SSAPRE_H +#include "ssa_pre.h" + +namespace maple { + +// for representing a node in the reduced SSA graph +class RGNode { + friend class McSSAPre; + friend class Visit; + public: + RGNode(MapleAllocator *alloc, uint32 idx, MeOccur *oc) : id(idx), occ(oc), + pred(alloc->Adapter()), + inEdgesCap(alloc->Adapter()), + usedCap(alloc->Adapter()) {} + private: + uint32 id; + MeOccur *occ; + MapleVector pred; + MapleVector inEdgesCap; // capacity of incoming edges + MapleVector usedCap; // used flow value of outgoing edges +}; + +// designate a visited node and the next outgoing edge to take +class Visit { + friend class McSSAPre; + private: + Visit(RGNode *nd, uint32 idx) : node(nd), predIdx(idx) {} + RGNode *node; + uint32 predIdx; // the index in node's pred + + uint64 AvailableCapacity() const { return node->inEdgesCap[predIdx] - node->usedCap[predIdx]; } + void IncreUsedCapacity(uint64 val) { node->usedCap[predIdx] += val; } + bool operator==(const Visit *rhs) const { return node == rhs->node && predIdx == rhs->predIdx; } +}; + +// for representing a flow path from source to sink +class Route { + friend class McSSAPre; + public: + Route(MapleAllocator *alloc) : visits(alloc->Adapter()) {} + private: + MapleVector visits; + uint64 flowValue = 0; +}; + +class McSSAPre : public SSAPre { + public: + McSSAPre(IRMap &hMap, Dominance &currDom, Dominance &currPdom, MemPool &memPool, MemPool &mp2, PreKind kind, uint32 limit) : + SSAPre(hMap, currDom, currPdom, memPool, mp2, kind, limit), + occ2RGNodeMap(ssaPreAllocator.Adapter()), + maxFlowRoutes(ssaPreAllocator.Adapter()), + minCut(ssaPreAllocator.Adapter()) {} + virtual ~McSSAPre() = default; + + void ApplyMCSSAPRE(); + private: + // step 8 willbeavail + void ResetMCWillBeAvail(MePhiOcc *phiOcc) const; + void ComputeMCWillBeAvail() const; + // step 7 max flow/min cut + bool AmongMinCut(RGNode *, uint32 idx) const; + void DumpRGToFile(); // dump reduced graph to dot file + bool IncludedEarlier(Visit **cut, Visit *curVisit, uint32 nextRouteIdx); + void RemoveRouteNodesFromCutSet(std::unordered_multiset &cutSet, Route *route); + bool SearchRelaxedMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, uint64 flowSoFar); + bool SearchMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, uint64 flowSoFar); + void DetermineMinCut(); + bool VisitANode(RGNode *node, Route *route, std::vector &visitedNodes); + bool FindAnotherRoute(); + void FindMaxFlow(); + // step 6 single sink + void AddSingleSink(); + // step 5 single source + void AddSingleSource(); + // step 4 graph reduction + void GraphReduction(); + // step 3 data flow methods + void SetPartialAnt(MePhiOpndOcc *phiOpnd) const; + void ComputePartialAnt() const; + void ResetFullAvail(MePhiOcc *occ) const; + void ComputeFullAvail() const; + // step 2 renaming methods + void Rename1(); + + MapleUnorderedMap occ2RGNodeMap; + RGNode *source; + RGNode *sink; + uint32 numSourceEdges; + MapleVector maxFlowRoutes; + uint32 nextRGNodeId; + uint64 maxFlowValue; + uint64 relaxedMaxFlowValue; // relax maxFlowValue to avoid excessive mincut search time when number of routes is large + MapleVector minCut; // an array of Visits* to represent the minCut +}; + +} // namespace maple +#endif // MAPLE_ME_INCLUDE_MC_SSAPRE_H diff --git a/src/mapleall/maple_me/include/me_option.h b/src/mapleall/maple_me/include/me_option.h index 4e43e1e05e..7728418555 100644 --- a/src/mapleall/maple_me/include/me_option.h +++ b/src/mapleall/maple_me/include/me_option.h @@ -185,6 +185,7 @@ class MeOption { static bool layoutWithPredict; static bool unifyRets; static bool dumpCfgOfPhases; + static bool epreUseProfile; // safety check option begin static SafetyCheckMode npeCheckMode; static bool isNpeCheckAll; diff --git a/src/mapleall/maple_me/include/me_options.h b/src/mapleall/maple_me/include/me_options.h index da127b6d3d..17230e5ae2 100644 --- a/src/mapleall/maple_me/include/me_options.h +++ b/src/mapleall/maple_me/include/me_options.h @@ -135,6 +135,7 @@ extern maplecl::Option remat; extern maplecl::Option unifyrets; extern maplecl::Option lfo; extern maplecl::Option dumpCfgOfPhases; +extern maplecl::Option epreUseProfile; } diff --git a/src/mapleall/maple_me/include/occur.h b/src/mapleall/maple_me/include/occur.h index a24c969ed0..06c2da2a2c 100644 --- a/src/mapleall/maple_me/include/occur.h +++ b/src/mapleall/maple_me/include/occur.h @@ -215,6 +215,8 @@ class MeRealOcc : public MeOccur { bool isLHS; bool isFormalAtEntry; // the fake lhs occurrence at entry for formals bool isHoisted = false; // the hoisted occ used for hoisting + public: + bool rgExcluded = false; // reduced graph excluded, used only by McSSAPre }; class MeInsertedOcc : public MeOccur { @@ -275,13 +277,13 @@ class MePhiOpndOcc : public MeOccur { hasRealUse(false), isInsertedOcc(false), isPhiOpndReload(false), + isMCInsert(false), defPhiOcc(nullptr), phiOpnd4Temp(nullptr) { currentExpr.meStmt = nullptr; } ~MePhiOpndOcc() = default; - bool IsOkToInsert() const; void Dump(const IRMap &irMap) const override; bool IsProcessed() const { return isProcessed; @@ -315,6 +317,14 @@ class MePhiOpndOcc : public MeOccur { isPhiOpndReload = phiOpndReload; } + bool IsMCInsert() const { + return isMCInsert;; + } + + void SetIsMCInsert(bool mcInsert) { + isMCInsert = mcInsert; + } + const MePhiOcc *GetDefPhiOcc() const { return defPhiOcc; } @@ -356,6 +366,7 @@ class MePhiOpndOcc : public MeOccur { bool hasRealUse; bool isInsertedOcc; // the phi operand was inserted by inserted occ bool isPhiOpndReload; // if insertedocc and redefined the def, set this flag + bool isMCInsert; // used only in mc-ssapre MePhiOcc *defPhiOcc; // its lhs union { MeExpr *meExpr; // the current expression at the end of the block containing this PhiOpnd @@ -375,6 +386,8 @@ class MePhiOcc : public MeOccur { isLater(true), isExtraneous(false), isRemoved(false), + isPartialAnt(false), + isMCWillBeAvail(true), phiOpnds(alloc.Adapter()), regPhi(nullptr), varPhi(nullptr) {} @@ -408,6 +421,14 @@ class MePhiOcc : public MeOccur { isCanBeAvail = canBeAvail; } + bool IsFullyAvail() const { + return isCanBeAvail; + } + + void SetIsFullyAvail(bool fullyAvail) { + isCanBeAvail = fullyAvail; + } + bool IsLater() const { return isLater; } @@ -432,6 +453,22 @@ class MePhiOcc : public MeOccur { isRemoved = removed; } + bool IsPartialAnt() const { + return isPartialAnt; + } + + void SetIsPartialAnt(bool pant) { + isPartialAnt = pant; + } + + bool IsMCWillBeAvail() const { + return isMCWillBeAvail; + } + + void SetIsMCWillBeAvail(bool wba) { + isMCWillBeAvail = wba; + } + const MapleVector &GetPhiOpnds() const { return phiOpnds; } @@ -488,6 +525,8 @@ class MePhiOcc : public MeOccur { bool isLater; bool isExtraneous; bool isRemoved; // during finalize2, marked this phiocc is removed or not + bool isPartialAnt; // used only in mc-ssapre + bool isMCWillBeAvail; // used only in mc-ssapre MapleVector phiOpnds; MePhiNode *regPhi; // the reg phi being inserted, maybe can delete it later MePhiNode *varPhi; // the Var phi being inserted, maybe can delete it later diff --git a/src/mapleall/maple_me/include/ssa_epre.h b/src/mapleall/maple_me/include/ssa_epre.h index 0df35a90a7..2101208fea 100644 --- a/src/mapleall/maple_me/include/ssa_epre.h +++ b/src/mapleall/maple_me/include/ssa_epre.h @@ -14,14 +14,14 @@ */ #ifndef MAPLE_ME_INCLUDE_SSAEPRE_H #define MAPLE_ME_INCLUDE_SSAEPRE_H -#include "ssa_pre.h" +#include "mc_ssa_pre.h" namespace maple { -class SSAEPre : public SSAPre { +class SSAEPre : public McSSAPre { public: SSAEPre(IRMap &map, Dominance &dom, Dominance &pdom, MemPool &memPool, MemPool &mp2, PreKind kind, uint32 limit, bool includeRef, bool lhsIvar) - : SSAPre(map, dom, pdom, memPool, mp2, kind, limit), epreIncludeRef(includeRef), enableLHSIvar(lhsIvar) {} + : McSSAPre(map, dom, pdom, memPool, mp2, kind, limit), epreIncludeRef(includeRef), enableLHSIvar(lhsIvar) {} virtual ~SSAEPre() = default; diff --git a/src/mapleall/maple_me/include/ssa_pre.h b/src/mapleall/maple_me/include/ssa_pre.h index 9e782bca27..20649704dd 100644 --- a/src/mapleall/maple_me/include/ssa_pre.h +++ b/src/mapleall/maple_me/include/ssa_pre.h @@ -139,6 +139,7 @@ class SSAPre { bool strengthReduction = false; bool doLFTR = false; + bool doMinCut = false; protected: // step 6 codemotion methods @@ -156,6 +157,13 @@ class SSAPre { } virtual void CodeMotion(); // step 5 Finalize methods + bool WillBeAvail(MePhiOcc *phiOcc) { + if (!doMinCut) { + return phiOcc->IsWillBeAvail(); + } + return phiOcc->IsMCWillBeAvail(); + } + bool OKToInsert(MePhiOpndOcc *phiOpnd); virtual void Finalize1(); void SetSave(MeOccur &defX); void SetReplacement(MePhiOcc &occ, MeOccur &repDef); @@ -285,7 +293,6 @@ class SSAPre { uint32 strIdxCount = 0; // ssapre will create a lot of temp variables if using var to store redundances, start from 0 PreWorkCandHashTable preWorkCandHashTable; - private: virtual void DoSSAFRE() {}; bool enableDebug = false; diff --git a/src/mapleall/maple_me/src/mc_ssa_pre.cpp b/src/mapleall/maple_me/src/mc_ssa_pre.cpp new file mode 100644 index 0000000000..bf2c7f3abb --- /dev/null +++ b/src/mapleall/maple_me/src/mc_ssa_pre.cpp @@ -0,0 +1,1022 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include +#include +#include +#include "mc_ssa_pre.h" +#include "dominance.h" +#include "mir_builder.h" + +// Implementation of the MC-SSAPRE algorithm based on the PLDI 2011 paper: +// An SSA-based Algorithm for Optimal Speculative Code Motion Under an Execution Profile +// by Hucheng Zhou, Wenguang Chen and Fred Chow + +namespace { +constexpr int kFuncNameLenLimit = 80; +} + +namespace maple { + +// ================ Step 8: WillBeAvail ================= + +void McSSAPre::ResetMCWillBeAvail(MePhiOcc *occ) const { + if (!occ->IsMCWillBeAvail()) { + return; + } + occ->SetIsMCWillBeAvail(false); + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + if (!phiOcc->IsMCWillBeAvail()) { + continue; + } + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (phiOpnd->GetDef() != nullptr && phiOpnd->GetDef() == occ) { + // phiOpnd is a use of occ + if (!phiOpnd->HasRealUse() && !phiOpnd->IsMCInsert()) { + ResetMCWillBeAvail(phiOcc); + break; + } + } + } + } +} + +void McSSAPre::ComputeMCWillBeAvail() const { + if (minCut.size() == 0) { + for (MePhiOcc *phiOcc : phiOccs) { + phiOcc->SetIsMCWillBeAvail(false); + } + return; + } + // set insert in phi operands + for (Visit *visit : minCut) { + MeOccur *occ = visit->node->occ; + if (occ->GetOccType() == kOccPhiopnd) { + MePhiOpndOcc *phiOpndOcc = static_cast(occ); + phiOpndOcc->SetIsMCInsert(true); + } + } + for (MePhiOcc *phiOcc : phiOccs) { + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (phiOpnd->GetDef() == nullptr && !phiOpnd->IsMCInsert()) { + ResetMCWillBeAvail(phiOcc); + break; + } + } + } +} + +// ================ Step 7: Max Flow / Min Cut ================= + +bool McSSAPre::AmongMinCut(RGNode *nd, uint32 idx) const { + for (Visit *visit : minCut) { + if (visit->node == nd && visit->predIdx == idx) { + return true; + } + } + return false; +} + +void McSSAPre::DumpRGToFile() { + if (sink == nullptr) { + return; + } + std::string fileName = "rg-of-cand-"; + fileName.append(std::to_string(workCand->GetIndex())); + fileName.append("-"); + const std::string &funcName = mirModule->CurFunction()->GetName(); + if (funcName.size() < kFuncNameLenLimit) { + fileName.append(funcName); + } else { + fileName.append(funcName.c_str(), kFuncNameLenLimit); + } + fileName.append(".dot"); + std::ofstream rgFile; + std::streambuf *coutBuf = LogInfo::MapleLogger().rdbuf(); // keep original cout buffer + std::streambuf *buf = rgFile.rdbuf(); + LogInfo::MapleLogger().rdbuf(buf); + rgFile.open(fileName, std::ios::trunc); + rgFile << "digraph {\n"; + for (int32 i = 0; i < sink->pred.size(); i++) { + RGNode *pre = sink->pred[i]; + rgFile << "real" << pre->id << " -> " << "\"sink\nmaxflow " << maxFlowValue << "\";\n"; + } + MapleUnorderedMap::iterator it = occ2RGNodeMap.begin(); + for (; it != occ2RGNodeMap.end(); it++) { + RGNode *rgNode = it->second; + for (int32 i = 0; i < rgNode->pred.size(); i++) { + RGNode *pre = rgNode->pred[i]; + if (pre != source) { + if (pre->occ->GetOccType() == kOccPhiocc) { + rgFile << "phi" << pre->id << " -> "; + } else { + rgFile << "real" << pre->id << " -> "; + } + if (rgNode->occ->GetOccType() == kOccPhiocc) { + rgFile << "phi" << rgNode->id; + } else { + rgFile << "real" << rgNode->id; + } + } else { + rgFile << "source" << " -> " << "phi" << rgNode->id; + } + if (AmongMinCut(rgNode, i)) { + rgFile << "[style=dotted][color=red]"; + } + if (rgNode->usedCap[i] == 0) { + rgFile << "[style=dashed][color=green]"; + } + rgFile << "[label=\"" << rgNode->usedCap[i] << "|" << rgNode->inEdgesCap[i] << "\"];\n"; + } + } + rgFile << "}\n"; + rgFile.flush(); + rgFile.close(); + LogInfo::MapleLogger().rdbuf(coutBuf); + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() << " dumped to " << fileName << "\n"; +} + +bool McSSAPre::IncludedEarlier(Visit **cut, Visit *curVisit, uint32 nextRouteIdx) { + uint32 i = nextRouteIdx; + while (i != 0) { + i--; + if (cut[i]->node == curVisit->node && cut[i]->predIdx == curVisit->predIdx) { + return true; + } + } + return false; +} + +// remove this route's nodes from cutSet +void McSSAPre::RemoveRouteNodesFromCutSet(std::unordered_multiset &cutSet, Route *route) { + for (uint32 i = 1; i < route->visits.size(); i++) { + Visit &curVisit = route->visits[i]; + std::unordered_multiset::iterator it = cutSet.find(curVisit.node->id); + ASSERT(it != cutSet.end(), "cutSet maintenance error"); + cutSet.erase(it); + } +} + +// find the cut closest to the sink whose total flow is relaxedMaxFlowValue +bool McSSAPre::SearchRelaxedMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, uint64 flowSoFar) { + Route *curRoute = maxFlowRoutes[nextRouteIdx]; + Visit *curVisit = nullptr; + + // determine starting value of visitIdx: start searching back from route end; + // if any node is in cutSet, set visitIdx as that nodes's index in route; + // otherwise, set visitIdx to 0 + uint32 visitIdx = curRoute->visits.size(); + do { + visitIdx--; + if (cutSet.count(curRoute->visits[visitIdx].node->id) != 0) { + break; + } + } while (visitIdx != 1); + // update cutSet with visited nodes lower than visitIdx + if (visitIdx != 1) { + for (uint i = visitIdx - 1; i > 0; i--) { + cutSet.insert(curRoute->visits[i].node->id); + } + } + + bool success = false; + do { + if (visitIdx == curRoute->visits.size()) { + RemoveRouteNodesFromCutSet(cutSet, curRoute); + return false; + } + curVisit = &curRoute->visits[visitIdx]; + uint64 visitCap = curVisit->node->inEdgesCap[curVisit->predIdx]; + cut[nextRouteIdx] = curVisit; + if (visitIdx != 0) { + cutSet.insert(curVisit->node->id); + } + if (IncludedEarlier(cut, curVisit, nextRouteIdx)) { + visitCap = 0; + } + success = (flowSoFar + visitCap <= relaxedMaxFlowValue); + if (success && nextRouteIdx != (maxFlowRoutes.size() - 1)) { + success = SearchRelaxedMinCut(cut, cutSet, nextRouteIdx+1, flowSoFar + visitCap); + } + visitIdx++; + } while (!success); + return true; +} + +// find the cut closest to the sink whose total flow is maxFlowValue +bool McSSAPre::SearchMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, uint64 flowSoFar) { + Route *curRoute = maxFlowRoutes[nextRouteIdx]; + Visit *curVisit = nullptr; + + // determine starting value of visitIdx: start searching back from route end; + // if any node is in cutSet, set visitIdx as that nodes's index in route; + // otherwise, set visitIdx to 0 + uint32 visitIdx = curRoute->visits.size(); + do { + visitIdx--; + if (cutSet.count(curRoute->visits[visitIdx].node->id) != 0) { + break; + } + } while (visitIdx != 1); + // update cutSet with visited nodes lower than visitIdx + if (visitIdx != 1) { + for (uint i = visitIdx - 1; i > 0; i--) { + cutSet.insert(curRoute->visits[i].node->id); + } + } + + bool success = false; + do { + if (visitIdx == curRoute->visits.size()) { + RemoveRouteNodesFromCutSet(cutSet, curRoute); + return false; + } + curVisit = &curRoute->visits[visitIdx]; + uint64 visitCap = curVisit->node->inEdgesCap[curVisit->predIdx]; + uint64 usedCap = curVisit->node->usedCap[curVisit->predIdx]; + if (visitCap != usedCap) { + if (visitIdx != 0) { + cutSet.insert(curVisit->node->id); + } + visitIdx++; + continue; + } + cut[nextRouteIdx] = curVisit; + if (visitIdx != 0) { + cutSet.insert(curVisit->node->id); + } + if (IncludedEarlier(cut, curVisit, nextRouteIdx)) { + visitCap = 0; + } + success = (flowSoFar + visitCap <= maxFlowValue); + if (success && nextRouteIdx != (maxFlowRoutes.size() - 1)) { + success = SearchMinCut(cut, cutSet, nextRouteIdx+1, flowSoFar + visitCap); + } + visitIdx++; + } while (!success); + return true; +} + +void McSSAPre::DetermineMinCut() { + if (maxFlowRoutes.empty()) { + if (GetSSAPreDebug()) { + DumpRGToFile(); + } + return; + } + // maximum width of the min cut is the number of routes in maxFlowRoutes + Visit* cut[maxFlowRoutes.size()]; + std::unordered_multiset cutSet; // key is RGNode's id; must be kept in sync with cut[]; sink node is not entered + constexpr double defaultRelaxScaling = 1.25; + relaxedMaxFlowValue = static_cast(static_cast(maxFlowValue) * defaultRelaxScaling); + bool relaxedSearch = false; + if (maxFlowRoutes.size() >= 20) { + // apply arbitrary heuristics to reduce search time + relaxedSearch = true; + relaxedMaxFlowValue = maxFlowValue * (maxFlowRoutes.size() / 10); + } + bool success = !relaxedSearch && SearchMinCut(cut, cutSet, 0, 0); + if (!success) { + relaxedSearch = true; + success = SearchRelaxedMinCut(cut, cutSet, 0, 0); + } + if (!success) { + if (GetSSAPreDebug()) { + mirModule->GetOut() << "MinCut failed\n"; + DumpRGToFile(); + } + CHECK_FATAL(false, "McSSAPre::DetermineMinCut: failed to find min cut"); + } + // sort cut + std::sort(cut, cut+maxFlowRoutes.size(), [](const Visit *left, const Visit *right) { + return (left->node != right->node) ? (left->node->id < right->node->id) + : (left->predIdx < right->predIdx); }); + // remove duplicates in the cut to form mincut + minCut.push_back(cut[0]); + size_t duplicatedVisits = 0; + for (uint32 i = 1; i < maxFlowRoutes.size(); i++) { + if (cut[i] == cut[i-1]) { + minCut.push_back(cut[i]); + } else { + duplicatedVisits++; + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "finished "; + if (relaxedSearch) { + mirModule->GetOut() << "relaxed "; + } + mirModule->GetOut() << "MinCut\n"; + DumpRGToFile(); + if (duplicatedVisits != 0) { + mirModule->GetOut() << duplicatedVisits << " duplicated visits in mincut\n"; + } + } +} + +bool McSSAPre::VisitANode(RGNode *node, Route *route, std::vector &visitedNodes) { + ASSERT(node->pred.size() != 0 , "McSSAPre::VisitANode: no connection to source node"); + // if any pred is the source and there's capacity to reach it, return success + for (uint32 i = 0; i < node->pred.size(); i++) { + if (node->pred[i] == source && node->inEdgesCap[i] > node->usedCap[i]) { + // if there is another pred never taken that also reaches source, use that instead + for (uint32 k = i + 1; k < node->pred.size(); k++) { + if (node->pred[k] == source && node->usedCap[k] == 0 && node->inEdgesCap[k] > 0) { + route->visits.push_back(Visit(node, k)); + return true; + } + } + route->visits.push_back(Visit(node, i)); + return true; + } + } + + // pick an never-taken predecessor path first + for (uint32 i = 0; i < node->pred.size(); i++) { + if (node->usedCap[i] == 0 && node->inEdgesCap[i] > 0 && !visitedNodes[node->pred[i]->id]) { + route->visits.push_back(Visit(node, i)); + visitedNodes[node->pred[i]->id] = true; + bool success = VisitANode(node->pred[i], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + return true; + } + } + } + + size_t numPreds = node->pred.size(); + uint32 sortedPred[numPreds]; + for (uint32 i = 0; i < numPreds; i++) { + sortedPred[i] = i; + } + // put sortedPred[] in increasing order of capacities + std::sort(sortedPred, sortedPred+numPreds, [node](uint32 m, uint32 n) { + return node->inEdgesCap[m] < node->inEdgesCap[n]; }); + // for this round, prefer predecessor with higher unused capacity + for (uint32 i = 0; i < numPreds; i++) { + uint32 j = sortedPred[i]; + if (!visitedNodes[node->pred[j]->id] && node->inEdgesCap[j] > node->usedCap[j]) { + route->visits.push_back(Visit(node, j)); + visitedNodes[node->pred[j]->id] = true; + bool success = VisitANode(node->pred[j], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + return true; + } + } + } + return false; +} + +// return false if not successful; if successful, the new route will be pushed +// to maxFlowRoutes +bool McSSAPre::FindAnotherRoute() { + std::vector visitedNodes(occ2RGNodeMap.size() + 1, false); + Route *route = perCandMemPool->New(&perCandAllocator); + bool success = false; + // pick an untaken sink predecessor first + for (int32 i = 0; i < sink->pred.size(); i++) { + if (sink->usedCap[i] == 0) { + route->visits.push_back(Visit(sink, i)); + visitedNodes[sink->pred[i]->id] = true; + success = VisitANode(sink->pred[i], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + break; + } + } + } + if (!success) { + // now, pick any sink predecessor + for (int32 i = 0; i < sink->pred.size(); i++) { + route->visits.push_back(Visit(sink, i)); + visitedNodes[sink->pred[i]->id] = true; + success = VisitANode(sink->pred[i], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + break; + } + } + } + if (!success) { + return false; + } + // find bottleneck capacity along route + uint64 minAvailCap = route->visits[0].AvailableCapacity(); + for (int32 i = 1; i < route->visits.size(); i++) { + uint64 curAvailCap = route->visits[i].AvailableCapacity(); + minAvailCap = std::min(minAvailCap, curAvailCap); + } + route->flowValue = minAvailCap; + // update usedCap along route + for (int32 i = 0; i < route->visits.size(); i++) { + route->visits[i].IncreUsedCapacity(minAvailCap); + } + maxFlowRoutes.push_back(route); + return true; +} + +void McSSAPre::FindMaxFlow() { + if (sink == nullptr) { + return; + } + maxFlowValue = 0; + bool found; + do { + found = FindAnotherRoute(); + } while (found); + // calculate maxFlowValue; + for (Route *route : maxFlowRoutes) { + maxFlowValue += route->flowValue; + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() + << ": FindMaxFlow found " << maxFlowRoutes.size() << " routes\n"; + for (size_t i = 0; i < maxFlowRoutes.size(); i++) { + Route *route = maxFlowRoutes[i]; + mirModule->GetOut() << "route " << i << " sink:pred" << route->visits[0].predIdx; + for (size_t j = 1; j < route->visits.size(); j++) { + if (route->visits[j].node->occ->GetOccType() == kOccPhiocc) { + mirModule->GetOut() << " phi"; + } else { + mirModule->GetOut() << " real"; + } + mirModule->GetOut() << route->visits[j].node->id << ":pred" << route->visits[j].predIdx; + } + mirModule->GetOut() << " flowValue " << route->flowValue; + mirModule->GetOut() << "\n"; + } + mirModule->GetOut() << "maxFlowValue is " << maxFlowValue << "\n"; + } +} + +// ================ Step 6: Add Single Sink ================= + +void McSSAPre::AddSingleSink() { + if (numSourceEdges == 0) { + return; // empty reduced graph + } + sink = perCandMemPool->New(&perCandAllocator, nextRGNodeId++, nullptr); + size_t numToSink = 0; + MapleUnorderedMap::iterator it = occ2RGNodeMap.begin(); + for (; it != occ2RGNodeMap.end(); it++) { + if (it->first->GetOccType() != kOccReal) { + continue; + } + RGNode *use = it->second; + // add edge from this use node to sink + sink->pred.push_back(use); + sink->inEdgesCap.push_back(UINT64_MAX); + sink->usedCap.push_back(0); + numToSink++; + } + ASSERT(numToSink != 0, "McSSAPre::AddSingleSink: found 0 edge to sink"); + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() << " has " << numToSink << " edges to sink\n"; + } +} + +// ================ Step 5: Add Single Source ================= +void McSSAPre::AddSingleSource() { + source = perCandMemPool->New(&perCandAllocator, nextRGNodeId++, nullptr); + for (MePhiOcc *phiOcc : phiOccs) { + if (phiOcc->IsPartialAnt() && !phiOcc->IsFullyAvail()) { + // look for null operands + for (int32 i = 0; i < phiOcc->GetPhiOpnds().size(); i++) { + MePhiOpndOcc *phiopndOcc = phiOcc->GetPhiOpnd(i); + if (phiopndOcc->GetDef() != nullptr) { + continue; + } + // add edge from source to this phi node + RGNode *sucNode = occ2RGNodeMap[phiOcc]; + sucNode->pred.push_back(source); + sucNode->inEdgesCap.push_back(phiOcc->GetBB()->GetPred(i)->GetFrequency()+1); + sucNode->usedCap.push_back(0); + numSourceEdges++; + } + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex(); + if (numSourceEdges == 0) { + mirModule->GetOut() << " has empty reduced graph\n"; + } else { + mirModule->GetOut() << " source has " << numSourceEdges << " succs\n"; + } + } +} + +// ================ Step 4: Graph Reduction ================= +void McSSAPre::GraphReduction() { + size_t numPhis = 0; + size_t numRealOccs = 0; + size_t numType1Edges = 0; + size_t numType2Edges = 0; + // add def nodes + for (MePhiOcc *phiOcc : phiOccs) { + if (phiOcc->IsPartialAnt() && !phiOcc->IsFullyAvail()) { + RGNode *newRGNode = perCandMemPool->New(&perCandAllocator, nextRGNodeId++, phiOcc); + occ2RGNodeMap.insert(std::pair(phiOcc, newRGNode)); + numPhis++; + } + } + if (occ2RGNodeMap.empty()) { + return; + } + // add use nodes and use-def edges + for (MeOccur *occ : allOccs) { + if (occ->GetOccType() == kOccReal) { + MeRealOcc *realOcc = static_cast(occ); + if (!realOcc->rgExcluded && realOcc->GetDef() != nullptr) { + MeOccur *defOcc = realOcc->GetDef(); + ASSERT(defOcc->GetOccType() == kOccPhiocc, "McSSAPre::GraphReduction: real occ not defined by phi"); + if (occ2RGNodeMap.find(defOcc) != occ2RGNodeMap.end()) { + RGNode *use = perCandMemPool->New(&perCandAllocator, nextRGNodeId++, realOcc); + occ2RGNodeMap[realOcc] = use; + numRealOccs++; + RGNode *def = occ2RGNodeMap[defOcc]; + use->pred.push_back(def); + use->inEdgesCap.push_back(realOcc->GetBB()->GetFrequency()+1); + use->usedCap.push_back(0); + numType2Edges++; + } + } + } else if (occ->GetOccType() == kOccPhiopnd) { + MePhiOpndOcc *phiopndOcc = static_cast(occ); + MePhiOcc *defPhiOcc = phiopndOcc->GetDefPhiOcc(); + if (defPhiOcc->IsPartialAnt() && !defPhiOcc->IsFullyAvail()) { + // defPhiOcc is the use node and it has already been added + MeOccur *defOcc = phiopndOcc->GetDef(); + if (defOcc != nullptr && defOcc->GetOccType() == kOccPhiocc && + static_cast(defOcc)->IsPartialAnt() && + !static_cast(defOcc)->IsFullyAvail()) { + ASSERT(occ2RGNodeMap.find(defOcc) != occ2RGNodeMap.end(), "McSSAPre::GraphReduction: def node not found"); + RGNode *def = occ2RGNodeMap[defOcc]; + RGNode *use = occ2RGNodeMap[defPhiOcc]; + use->pred.push_back(def); + // find the index of phiopndOcc in defPhiOcc's phiOpnds + uint32 i; + for (i = 0; i < defPhiOcc->GetPhiOpnds().size(); i++) { + if (defPhiOcc->GetPhiOpnd(i) == phiopndOcc) { + break; + } + } + ASSERT(i != defPhiOcc->GetPhiOpnds().size(), "McSSAPre::GraphReduction: cannot find corresponding phi opnd"); + use->inEdgesCap.push_back(defPhiOcc->GetBB()->GetPred(i)->GetFrequency()+1); + use->usedCap.push_back(0); + numType1Edges++; + } + } + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() + << " after GraphReduction, phis: " << numPhis << " reals: " << numRealOccs + << " type 1 edges: " << numType1Edges << " type 2 edges: " << numType2Edges << "\n"; + } +} + +// ================ Step 3: Data Flow Computations ================= + +// set partial anticipation +void McSSAPre::SetPartialAnt(MePhiOpndOcc *phiOpnd) const { + MeOccur *defOcc = phiOpnd->GetDef(); + if (defOcc == nullptr || defOcc->GetOccType() != kOccPhiocc) { + return; + } + auto *defPhiOcc = static_cast(defOcc); + if (defPhiOcc->IsPartialAnt()) { + return; + } + defPhiOcc->SetIsPartialAnt(true); + for (MePhiOpndOcc *mePhiOpnd : defPhiOcc->GetPhiOpnds()) { + SetPartialAnt(mePhiOpnd); + } +} + +// compute partial anticipation for each PHI +void McSSAPre::ComputePartialAnt() const { + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + if (phiOcc->IsPartialAnt()) { + // propagate partialAnt along use-def edges + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + SetPartialAnt(phiOpnd); + } + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() + << " after PartialAnt\n"; + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + phiOcc->Dump(*irMap); + if (phiOcc->IsPartialAnt()) { + mirModule->GetOut() << " is partialant\n"; + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (!phiOpnd->IsProcessed()) { + phiOpnd->Dump(*irMap); + mirModule->GetOut() << " has not been processed by Rename2\n"; + } + } + } else { + mirModule->GetOut() << " is not partialant\n"; + } + } + } +} + +void McSSAPre::ResetFullAvail(MePhiOcc *occ) const { + if (!occ->IsFullyAvail()) { + return; + } + occ->SetIsFullyAvail(false); + // reset those phiocc nodes that have occ as one of its operands + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + if (!phiOcc->IsFullyAvail()) { + continue; + } + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (phiOpnd->GetDef() != nullptr && phiOpnd->GetDef() == occ) { + // phiOpnd is a use of occ + if (!phiOpnd->HasRealUse()) { + ResetFullAvail(phiOcc); + break; + } + } + } + } +} + +// the fullyavail attribute is stored in the isCanBeAvail field +void McSSAPre::ComputeFullAvail() const { + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + // reset fullyavail if any phi operand is null + bool existNullDef = false; + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (phiOpnd->GetDef() == nullptr) { + existNullDef = true; + break; + } + } + if (existNullDef) { + ResetFullAvail(phiOcc); + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() + << " after FullyAvailable\n"; + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + phiOcc->Dump(*irMap); + if (phiOcc->IsFullyAvail()) { + mirModule->GetOut() << " is fullyavail\n"; + } else { + mirModule->GetOut() << " is not fullyavail\n"; + } + } + } +} + +// ================ Step 2: Renaming ================= +void McSSAPre::Rename1() { + std::stack occStack; + rename2Set.clear(); + classCount = 1; + // iterate the occurrence according to its preorder dominator tree + for (MeOccur *occ : allOccs) { + while (!occStack.empty() && !occStack.top()->IsDominate(*dom, *occ)) { + occStack.pop(); + } + switch (occ->GetOccType()) { + case kOccReal: { + if (occStack.empty()) { + // assign new class + occ->SetClassID(classCount++); + occStack.push(occ); + break; + } + MeOccur *topOccur = occStack.top(); + if (topOccur->GetOccType() == kOccUse || topOccur->GetOccType() == kOccMembar) { + occ->SetClassID(classCount++); + occStack.push(occ); + break; + } + auto *realOcc = static_cast(occ); + if (topOccur->GetOccType() == kOccReal) { + auto *realTopOccur = static_cast(topOccur); + if (AllVarsSameVersion(*realTopOccur, *realOcc)) { + // all corresponding variables are the same + realOcc->SetClassID(realTopOccur->GetClassID()); + if (realTopOccur->GetDef() != nullptr) { + realOcc->SetDef(realTopOccur->GetDef()); + } else { + realOcc->SetDef(realTopOccur); + } + realOcc->rgExcluded = true; + } else { + // assign new class + occ->SetClassID(classCount++); + occStack.push(occ); + } + } else { + // top of stack is a PHI occurrence + ASSERT(topOccur->GetOccType() == kOccPhiocc, "invalid kOccPhiocc"); + std::vector varVec; + CollectVarForCand(*realOcc, varVec); + bool isAllDom = true; + if (realOcc->IsLHS()) { + isAllDom = false; + } else { + for (auto varIt = varVec.begin(); varIt != varVec.end(); ++varIt) { + MeExpr *varMeExpr = *varIt; + if (workCand->isSRCand) { + varMeExpr = ResolveAllInjuringDefs(varMeExpr); + } + if (!DefVarDominateOcc(varMeExpr, *topOccur)) { + isAllDom = false; + } + } + } + MePhiOcc *phiTopOccur = static_cast(topOccur); + if (isAllDom) { + realOcc->SetClassID(topOccur->GetClassID()); + realOcc->SetDef(topOccur); + (void)rename2Set.insert(realOcc->GetPosition()); + phiTopOccur->SetIsPartialAnt(true); + } else { + // assign new class + occ->SetClassID(classCount++); + } + occStack.push(occ); + } + break; + } + case kOccCompare: { + if (occStack.empty()) { + break; + } + MeOccur *topOccur = occStack.top(); + if (topOccur->GetOccType() == kOccUse || topOccur->GetOccType() == kOccMembar) { + break; + } + MeRealOcc *realOcc = static_cast(occ); + ScalarMeExpr *scalarOpnd0 = dynamic_cast(workCand->GetTheMeExpr()->GetOpnd(0)); + ScalarMeExpr *scalarOpnd1 = dynamic_cast(workCand->GetTheMeExpr()->GetOpnd(1)); + ScalarMeExpr *compareOpnd0 = dynamic_cast(realOcc->GetMeExpr()->GetOpnd(0)); + ScalarMeExpr *compareOpnd1 = dynamic_cast(realOcc->GetMeExpr()->GetOpnd(1)); + // set compareOpnd to be the scalar operand that is common to + // workCand->theMeExpr and realOcc->meExpr + ScalarMeExpr *compareOpnd = nullptr; + uint32 scalarOpndNo = 0; + if (scalarOpnd0 != nullptr) { + if (compareOpnd0 != nullptr && scalarOpnd0->GetOst() == compareOpnd0->GetOst()) { + compareOpnd = compareOpnd0; + scalarOpndNo = 0; + } else if (compareOpnd1 != nullptr && scalarOpnd0->GetOst() == compareOpnd1->GetOst()) { + compareOpnd = compareOpnd1; + scalarOpndNo = 0; + } + } + if (scalarOpnd1 != nullptr) { + if (compareOpnd0 != nullptr && scalarOpnd1->GetOst() == compareOpnd0->GetOst()) { + compareOpnd = compareOpnd0; + scalarOpndNo = 1; + } else if (compareOpnd1 != nullptr && scalarOpnd1->GetOst() == compareOpnd1->GetOst()) { + compareOpnd = compareOpnd1; + scalarOpndNo = 1; + } + } + CHECK_FATAL(compareOpnd != nullptr, "Rename1: compOcc does not correspond to realOcc"); + ScalarMeExpr *resolvedCompareOpnd = ResolveAllInjuringDefs(compareOpnd); + if (topOccur->GetOccType() == kOccReal) { + MeRealOcc *realTopOccur = static_cast(topOccur); + ScalarMeExpr *topOccurOpnd = static_cast(realTopOccur->GetMeExpr()->GetOpnd(scalarOpndNo)); + if (compareOpnd == topOccurOpnd || resolvedCompareOpnd == topOccurOpnd) { + realOcc->SetClassID(realTopOccur->GetClassID()); + if (realTopOccur->GetDef() != nullptr) { + realOcc->SetDef(realTopOccur->GetDef()); + } else { + realOcc->SetDef(realTopOccur); + } + } + break; + } + // top of stack is a PHI occurrence + ASSERT(topOccur->GetOccType() == kOccPhiocc, "invalid kOccPhiocc"); + if (DefVarDominateOcc(compareOpnd, *topOccur)) { + realOcc->SetClassID(topOccur->GetClassID()); + realOcc->SetDef(topOccur); + } + break; + } + case kOccPhiocc: { + // assign new class + occ->SetClassID(classCount++); + occStack.push(occ); + break; + } + case kOccPhiopnd: { + // stow away the use occurrences at the stack top + MeOccur *stowedUseOcc = nullptr; + if (!occStack.empty() && occStack.top()->GetOccType() == kOccUse) { + stowedUseOcc = occStack.top(); + occStack.pop(); + CHECK_FATAL(occStack.empty() || occStack.top()->GetOccType() != kOccUse, + "Rename1: cannot have 2 consecutive use occurs on stack"); + } + if (occStack.empty() || occStack.top()->GetOccType() == kOccMembar) { + occ->SetDef(nullptr); + } else { + MeOccur *topOccur = occStack.top(); + occ->SetDef(topOccur); + occ->SetClassID(topOccur->GetClassID()); + if (topOccur->GetOccType() == kOccReal) { + static_cast(occ)->SetHasRealUse(true); + } + } + // push stowed use_occ back + if (stowedUseOcc != nullptr) { + occStack.push(stowedUseOcc); + } + break; + } + case kOccExit: + break; + case kOccMembar: { + if (!occStack.empty()) { + MeOccur *topOccur = occStack.top(); + if (topOccur->GetOccType() == kOccPhiocc) { + } else if (topOccur->GetOccType() != occ->GetOccType()) { + occStack.push(occ); + } + } else { + occStack.push(occ); + } + break; + } + default: + ASSERT(false, "should not be here"); + break; + } + } + if (GetSSAPreDebug()) { + PreWorkCand *curCand = workCand; + mirModule->GetOut() << "++++ ssapre candidate " << curCand->GetIndex() << " after rename1\n"; + for (MeOccur *occ : allOccs) { + occ->Dump(*irMap); + mirModule->GetOut() << '\n'; + } + mirModule->GetOut() << "\n" << "rename2 set:\n"; + for (uint32 pos : rename2Set) { + MeRealOcc *occur = workCand->GetRealOcc(pos); + occur->Dump(*irMap); + mirModule->GetOut() << " with def at\n"; + occur->GetDef()->Dump(*irMap); + mirModule->GetOut() << "\n"; + } + mirModule->GetOut() << "\n"; + } +} + +void McSSAPre::ApplyMCSSAPRE() { + // #0 build worklist + BuildWorkList(); + if (GetSSAPreDebug()) { + mirModule->GetOut() << " worklist initial size " << workList.size() << '\n'; + } + ConstructUseOccurMap(); + uint32 cnt = 0; + while (!workList.empty()) { + ++cnt; + if (cnt > preLimit) { + break; + } + workCand = workList.front(); + workCand->SetIndex(static_cast(cnt)); + workList.pop_front(); + if (workCand->GetRealOccs().empty()) { + workCand->deletedFromWorkList = true; + continue; + } + if ((preKind == kExprPre && workCand->GetTheMeExpr()->GetMeOp() == kMeOpIvar) || (preKind == kLoadPre)) { + // if only LHS real occur, skip this candidate + bool hasNonLHS = false; + for (MeRealOcc *realOcc : workCand->GetRealOccs()) { + if (realOcc->GetOccType() == kOccReal && !realOcc->IsLHS()) { + hasNonLHS = true; + break; + } + } + if (!hasNonLHS) { + workCand->deletedFromWorkList = true; + continue; + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "||||||| MC-SSAPRE candidate " << cnt << " at worklist index " + << workCand->GetIndex() << ": "; + workCand->DumpCand(*irMap); + if (workCand->isSRCand) { + mirModule->GetOut() << " srCand"; + } + if (workCand->onlyInvariantOpnds) { + mirModule->GetOut() << " onlyInvairantOpnds"; + } + mirModule->GetOut() << '\n'; + } + allOccs.clear(); + phiOccs.clear(); + nextRGNodeId = 1; + occ2RGNodeMap.clear(); + numSourceEdges = 0; + maxFlowRoutes.clear(); + minCut.clear(); + source = nullptr; + sink = nullptr; + // #1 Insert PHI; results in allOccs and phiOccs + ComputeVarAndDfPhis(); + CreateSortedOccs(); + if (workCand->GetRealOccs().empty()) { + workCand->deletedFromWorkList = true; + continue; + } + // set the position field in the MeRealOcc nodes + for (size_t j = 0; j < workCand->GetRealOccs().size(); j++) { + workCand->GetRealOcc(j)->SetPosition(j); + } + // #2 Rename + Rename1(); + Rename2(); + if (!phiOccs.empty()) { + // if no PHI inserted, no need to perform these steps + // #3 data flow methods + ComputeFullAvail(); + ComputePartialAnt(); + // #4 graph reduction + GraphReduction(); + // #5 single source + AddSingleSource(); + // #6 single sink + AddSingleSink(); + // step 7 max flow/min cut + FindMaxFlow(); + DetermineMinCut(); + // step 8 willbeavail + ComputeMCWillBeAvail(); + } + // #5 Finalize + Finalize1(); + if (workCand->Redo2HandleCritEdges()) { + // reinitialize def field to nullptr + for (MeOccur *occ : allOccs) { + occ->SetDef(nullptr); + if (occ->GetOccType() == kOccPhiopnd) { + auto *phiOpndOcc = static_cast(occ); + phiOpndOcc->SetIsProcessed(false); + } + } + Rename1(); + Rename2(); + ComputeFullAvail(); + ComputePartialAnt(); + GraphReduction(); + AddSingleSource(); + AddSingleSink(); + Finalize1(); + FindMaxFlow(); + DetermineMinCut(); + ComputeMCWillBeAvail(); + } + Finalize2(); + workCand->deletedFromWorkList = true; + // #6 CodeMotion and recompute worklist based on newly occurrence + CodeMotion(); + if (preKind == kStmtPre && (workCand->GetRealOccs().front()->GetOpcodeOfMeStmt() == OP_dassign || + workCand->GetRealOccs().front()->GetOpcodeOfMeStmt() == OP_callassigned)) { + // apply full redundancy elimination + DoSSAFRE(); + } + perCandMemPool->ReleaseContainingMem(); + } +} + +} // namespace maple diff --git a/src/mapleall/maple_me/src/me_option.cpp b/src/mapleall/maple_me/src/me_option.cpp index d062458a0d..dccb7b78eb 100644 --- a/src/mapleall/maple_me/src/me_option.cpp +++ b/src/mapleall/maple_me/src/me_option.cpp @@ -132,6 +132,7 @@ SafetyCheckMode MeOption::boundaryCheckMode = SafetyCheckMode::kNoCheck; bool MeOption::safeRegionMode = false; bool MeOption::unifyRets = false; bool MeOption::dumpCfgOfPhases = false; +bool MeOption::epreUseProfile = true; #if MIR_JAVA std::string MeOption::acquireFuncName = "Landroid/location/LocationManager;|requestLocationUpdates|"; std::string MeOption::releaseFuncName = "Landroid/location/LocationManager;|removeUpdates|"; @@ -315,6 +316,7 @@ bool MeOption::SolveOptions(bool isDebug) { maplecl::CopyIfEnabled(lessThrowAlias, opts::me::lessthrowalias); maplecl::CopyIfEnabled(propBase, opts::me::propbase); maplecl::CopyIfEnabled(dumpCfgOfPhases, opts::me::dumpCfgOfPhases); + maplecl::CopyIfEnabled(epreUseProfile, opts::me::epreUseProfile); if (opts::me::propiloadref.IsEnabledByUser()) { propIloadRef = opts::me::propiloadref; diff --git a/src/mapleall/maple_me/src/me_options.cpp b/src/mapleall/maple_me/src/me_options.cpp index 79adab5f52..2b448a265c 100644 --- a/src/mapleall/maple_me/src/me_options.cpp +++ b/src/mapleall/maple_me/src/me_options.cpp @@ -698,5 +698,10 @@ maplecl::Option lfo({"--lfo"}, maplecl::Option dumpCfgOfPhases({"--dumpcfgofphases"}, " --dumpcfgofphases \tDump CFG from various phases to .dot files\n", {meCategory}); +maplecl::Option epreUseProfile({"--epreuseprofile"}, + " --epreuseprofile \tEnable profile-guided epre phase\n" + " --no-epreuseprofile \tDisable profile-guided epre phase\n", + {meCategory}, + maplecl::DisableWith("--no-epreuseprofile")); } diff --git a/src/mapleall/maple_me/src/me_ssa_epre.cpp b/src/mapleall/maple_me/src/me_ssa_epre.cpp index f2019b28a0..b209c00fc3 100644 --- a/src/mapleall/maple_me/src/me_ssa_epre.cpp +++ b/src/mapleall/maple_me/src/me_ssa_epre.cpp @@ -103,6 +103,9 @@ bool MESSAEPre::PhaseRun(maple::MeFunction &f) { } MeSSAEPre ssaPre(f, *irMap, *dom, *pdom, kh, *ssaPreMemPool, *ApplyTempMemPool(), epreLimitUsed, epreIncludeRef, MeOption::epreLocalRefVar, MeOption::epreLHSIvar); + if (f.GetMirFunc()->GetFuncProfData() && MeOption::epreUseProfile) { + ssaPre.doMinCut = true; + } ssaPre.SetSpillAtCatch(MeOption::spillAtCatch); if (MeOption::strengthReduction && !f.GetMIRModule().IsJavaModule()) { ssaPre.strengthReduction = true; @@ -120,7 +123,11 @@ bool MESSAEPre::PhaseRun(maple::MeFunction &f) { if (DEBUGFUNC_NEWPM(f)) { ssaPre.SetSSAPreDebug(true); } - ssaPre.ApplySSAPRE(); + if (!ssaPre.doMinCut) { + ssaPre.ApplySSAPRE(); + } else { + ssaPre.ApplyMCSSAPRE(); + } if (!ssaPre.GetCandsForSSAUpdate().empty()) { MeSSAUpdate ssaUpdate(f, *f.GetMeSSATab(), *dom, ssaPre.GetCandsForSSAUpdate()); ssaUpdate.Run(); diff --git a/src/mapleall/maple_me/src/me_stmt_pre.cpp b/src/mapleall/maple_me/src/me_stmt_pre.cpp index 7a6d13161c..12a704cd5d 100644 --- a/src/mapleall/maple_me/src/me_stmt_pre.cpp +++ b/src/mapleall/maple_me/src/me_stmt_pre.cpp @@ -178,7 +178,7 @@ void MeStmtPre::Finalize1() { auto *phiOpnd = static_cast(occ); MePhiOcc *phiOcc = phiOpnd->GetDefPhiOcc(); if (phiOcc->IsWillBeAvail()) { - if (phiOpnd->IsOkToInsert()) { + if (OKToInsert(phiOpnd)) { // insert the current expression at the end of the block containing phiOpnd if (phiOpnd->GetBB()->GetSucc().size() > 1) { CHECK_FATAL(!workCand->Redo2HandleCritEdges(), "Finalize1: insertion at critical edge; aborting"); diff --git a/src/mapleall/maple_me/src/occur.cpp b/src/mapleall/maple_me/src/occur.cpp index 9a52a85ffe..c45241d152 100644 --- a/src/mapleall/maple_me/src/occur.cpp +++ b/src/mapleall/maple_me/src/occur.cpp @@ -107,22 +107,6 @@ MeExpr *MeOccur::GetSavedExpr() { } } -// return true if either: -// operand is nullptr (def is null), or -// hasRealUse is false and defined by a PHI not will be avail -bool MePhiOpndOcc::IsOkToInsert() const { - if (GetDef() == nullptr) { - return true; - } - if (!hasRealUse) { - const MeOccur *defOcc = GetDef(); - if (defOcc->GetOccType() == kOccPhiocc && !static_cast(defOcc)->IsWillBeAvail()) { - return true; - } - } - return false; -} - bool MePhiOcc::IsOpndDefByRealOrInserted() const { for (MePhiOpndOcc *phiOpnd : phiOpnds) { MeOccur *defOcc = phiOpnd->GetDef(); @@ -152,6 +136,9 @@ void MeRealOcc::Dump(const IRMap &irMap) const { } else { mod->GetOut() << "RealOcc(LHS) "; } + if (rgExcluded) { + mod->GetOut() << "rgexcluded "; + } if (meExpr != nullptr) { meExpr->Dump(&irMap); } else { diff --git a/src/mapleall/maple_me/src/ssa_pre.cpp b/src/mapleall/maple_me/src/ssa_pre.cpp index a94b544c20..747da400f7 100644 --- a/src/mapleall/maple_me/src/ssa_pre.cpp +++ b/src/mapleall/maple_me/src/ssa_pre.cpp @@ -165,7 +165,7 @@ void SSAPre::GenerateSavePhiOcc(MePhiOcc &phiOcc) { void SSAPre::UpdateInsertedPhiOccOpnd() { for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { MePhiOcc *phiOcc = *it; - if (!phiOcc->IsWillBeAvail() || phiOcc->IsRemoved()) { + if (!WillBeAvail(phiOcc) || phiOcc->IsRemoved()) { continue; } if (phiOcc->GetRegPhi()) { @@ -237,7 +237,7 @@ void SSAPre::CodeMotion() { } case kOccPhiopnd: { MePhiOpndOcc *phiopnd = static_cast(occ); - if (phiopnd->GetDefPhiOcc()->IsRemoved() || !phiopnd->GetDefPhiOcc()->IsWillBeAvail()) { + if (phiopnd->GetDefPhiOcc()->IsRemoved() || !WillBeAvail(phiopnd->GetDefPhiOcc())) { break; } if (phiopnd->GetDef()->GetOccType() == kOccInserted) { @@ -285,7 +285,7 @@ void SSAPre::CodeMotion() { } case kOccPhiopnd: { auto *phiOpnd = static_cast(occ); - if (phiOpnd->GetDefPhiOcc()->IsRemoved() || !phiOpnd->GetDefPhiOcc()->IsWillBeAvail()) { + if (phiOpnd->GetDefPhiOcc()->IsRemoved() || !WillBeAvail(phiOpnd->GetDefPhiOcc())) { break; } MeOccur *defOcc = phiOpnd->GetDef(); @@ -313,7 +313,7 @@ void SSAPre::CodeMotion() { } case kOccPhiocc: { auto *phiOcc = static_cast(occ); - if (phiOcc->IsRemoved() || !phiOcc->IsWillBeAvail()) { + if (phiOcc->IsRemoved() || !WillBeAvail(phiOcc)) { break; } GenerateSavePhiOcc(*phiOcc); @@ -331,7 +331,7 @@ void SSAPre::CodeMotion() { } } else { MePhiOcc *phiOcc = static_cast(compOcc->GetDef()); - if (phiOcc->IsRemoved() || !phiOcc->IsWillBeAvail() || !phiOcc->IsDownSafe()) { + if (phiOcc->IsRemoved() || !WillBeAvail(phiOcc) || !phiOcc->IsDownSafe()) { break; } } @@ -366,6 +366,24 @@ void SSAPre::CodeMotion() { } // ================ Step 5: Finalize ================= + +// return true if either: +// operand is nullptr (def is null), or +// hasRealUse is false and defined by a PHI not will be avail +bool SSAPre::OKToInsert(MePhiOpndOcc *phiOpnd) { + if (phiOpnd->GetDef() == nullptr) { + return true; + } + if (!phiOpnd->HasRealUse()) { + MeOccur *defOcc = phiOpnd->GetDef(); + if (defOcc->GetOccType() == kOccPhiocc && + !WillBeAvail(static_cast(defOcc))) { + return true; + } + } + return false; +} + void SSAPre::Finalize1() { std::vector availDefVec(classCount, nullptr); // traversal in preoder DT @@ -374,7 +392,7 @@ void SSAPre::Finalize1() { switch (occ->GetOccType()) { case kOccPhiocc: { auto *phiOcc = static_cast(occ); - if (phiOcc->IsWillBeAvail()) { + if (WillBeAvail(phiOcc)) { availDefVec[classX] = phiOcc; } break; @@ -418,10 +436,10 @@ void SSAPre::Finalize1() { // we assume one phiOpnd has only one phiOcc use because critical edge split the blocks auto *phiOpnd = static_cast(occ); MePhiOcc *phiOcc = phiOpnd->GetDefPhiOcc(); - if (!phiOcc->IsWillBeAvail()) { + if (!WillBeAvail(phiOcc)) { break; } - if (phiOpnd->IsOkToInsert()) { + if (OKToInsert(phiOpnd)) { // insert the current expression at the end of the block containing phiOpnd if (phiOpnd->GetBB()->GetSucc().size() > 1) { CHECK_FATAL(!workCand->Redo2HandleCritEdges(), "Finalize1: insertion at critical edge, aborting"); @@ -466,7 +484,7 @@ void SSAPre::Finalize1() { " after Finalize1===================\n"; for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { MePhiOcc *phiOcc = *it; - if (!phiOcc->IsWillBeAvail()) { + if (!WillBeAvail(phiOcc)) { continue; } for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { @@ -504,7 +522,7 @@ void SSAPre::SetSave(MeOccur &defX) { CHECK_FATAL(!dom->IsNodeVecEmpty(), "the size to be allocated is 0"); GetIterDomFrontier(fromBb, &itFrontier); for (MePhiOcc *phiOcc : phiOccs) { - if (!phiOcc->IsWillBeAvail()) { + if (!WillBeAvail(phiOcc)) { continue; } if (itFrontier.find(dom->GetDtDfnItem(phiOcc->GetBB()->GetBBId())) == itFrontier.end()) { @@ -551,7 +569,7 @@ void SSAPre::Finalize2() { MePhiOcc *phiOcc = *it; // initialize extraneouse for each MePhiOcc if (!workCand->isSRCand) { - phiOcc->SetIsExtraneous(phiOcc->IsWillBeAvail()); + phiOcc->SetIsExtraneous(WillBeAvail(phiOcc)); } // initialize each operand of phiOcc @@ -571,7 +589,7 @@ void SSAPre::Finalize2() { if (phiOcc->IsRemoved() || !phiOcc->IsExtraneous()) { continue; } - if (!phiOcc->IsWillBeAvail()) { + if (!WillBeAvail(phiOcc)) { phiOcc->SetIsRemoved(true); continue; } @@ -1153,13 +1171,15 @@ void SSAPre::SetVarPhis(MeExpr *meExpr) { } if (scalar->IsDefByPhi()) { MePhiNode *phiMeNode = scalar->GetMePhiDef(); - BBId defBBId = phiMeNode->GetDefBB()->GetBBId(); - CHECK(defBBId < dom->GetDtDfnSize(), "defBBId.idx out of range in SSAPre::SetVarPhis"); - if (varPhiDfns.find(dom->GetDtDfnItem(defBBId)) == varPhiDfns.end() && ScreenPhiBB(defBBId)) { - (void)varPhiDfns.insert(dom->GetDtDfnItem(defBBId)); - for (auto opndIt = phiMeNode->GetOpnds().begin(); opndIt != phiMeNode->GetOpnds().end(); ++opndIt) { - ScalarMeExpr *opnd = *opndIt; - SetVarPhis(opnd); + if (phiMeNode->GetOpnds().size() > 1) { + BBId defBBId = phiMeNode->GetDefBB()->GetBBId(); + CHECK(defBBId < dom->GetDtDfnSize(), "defBBId.idx out of range in SSAPre::SetVarPhis"); + if (varPhiDfns.find(dom->GetDtDfnItem(defBBId)) == varPhiDfns.end() && ScreenPhiBB(defBBId)) { + (void)varPhiDfns.insert(dom->GetDtDfnItem(defBBId)); + for (auto opndIt = phiMeNode->GetOpnds().begin(); opndIt != phiMeNode->GetOpnds().end(); ++opndIt) { + ScalarMeExpr *opnd = *opndIt; + SetVarPhis(opnd); + } } } } diff --git a/src/mapleall/maple_me/src/ssa_pre_for_hoist.cpp b/src/mapleall/maple_me/src/ssa_pre_for_hoist.cpp index 3a0266558e..8d344bdeb8 100644 --- a/src/mapleall/maple_me/src/ssa_pre_for_hoist.cpp +++ b/src/mapleall/maple_me/src/ssa_pre_for_hoist.cpp @@ -349,7 +349,7 @@ void ExprHoist::HoistExpr(const MapleVector &allOccs, int32 candId) { } auto *phiOpndocc = static_cast(occ); auto *phiOcc = phiOpndocc->GetDefPhiOcc(); - if (phiOcc->IsWillBeAvail() && phiOpndocc->IsOkToInsert()) { + if (phiOcc->IsWillBeAvail() && parent->OKToInsert(phiOpndocc)) { if (hs->cdHS && // need a cd to hoist hs->occ == nullptr && // if not null, hs has been inserted hs->cdHS->occ != nullptr && // make sure there's at least one realocc at cd -- Gitee From c6eab5a9df71ab0b8f238c256be3343f88d42b74 Mon Sep 17 00:00:00 2001 From: Fred Chow Date: Thu, 24 Nov 2022 09:24:35 -0800 Subject: [PATCH 2/6] Implemented min-cut version of SSAPRE used by epre phase under --profileUse --no-epreuseprofile will change epre back to the old version that does not use profile data --- src/mapleall/maple_me/BUILD.gn | 1 + src/mapleall/maple_me/include/mc_ssa_pre.h | 112 ++ src/mapleall/maple_me/include/me_option.h | 1 + src/mapleall/maple_me/include/me_options.h | 1 + src/mapleall/maple_me/include/occur.h | 41 +- src/mapleall/maple_me/include/ssa_epre.h | 6 +- src/mapleall/maple_me/include/ssa_pre.h | 9 +- src/mapleall/maple_me/src/mc_ssa_pre.cpp | 1022 +++++++++++++++++ src/mapleall/maple_me/src/me_option.cpp | 2 + src/mapleall/maple_me/src/me_options.cpp | 5 + src/mapleall/maple_me/src/me_ssa_epre.cpp | 9 +- src/mapleall/maple_me/src/me_stmt_pre.cpp | 2 +- src/mapleall/maple_me/src/occur.cpp | 19 +- src/mapleall/maple_me/src/ssa_pre.cpp | 58 +- .../maple_me/src/ssa_pre_for_hoist.cpp | 2 +- 15 files changed, 1247 insertions(+), 43 deletions(-) create mode 100644 src/mapleall/maple_me/include/mc_ssa_pre.h create mode 100644 src/mapleall/maple_me/src/mc_ssa_pre.cpp diff --git a/src/mapleall/maple_me/BUILD.gn b/src/mapleall/maple_me/BUILD.gn index f8bd053ab5..c614397f35 100755 --- a/src/mapleall/maple_me/BUILD.gn +++ b/src/mapleall/maple_me/BUILD.gn @@ -130,6 +130,7 @@ src_libmplme = [ "src/me_slp.cpp", "src/lmbc_memlayout.cpp", "src/lmbc_lower.cpp", + "src/mc_ssa_pre.cpp", ] src_libmplmewpo = [ diff --git a/src/mapleall/maple_me/include/mc_ssa_pre.h b/src/mapleall/maple_me/include/mc_ssa_pre.h new file mode 100644 index 0000000000..343b40b833 --- /dev/null +++ b/src/mapleall/maple_me/include/mc_ssa_pre.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLE_ME_INCLUDE_MC_SSAPRE_H +#define MAPLE_ME_INCLUDE_MC_SSAPRE_H +#include "ssa_pre.h" + +namespace maple { + +// for representing a node in the reduced SSA graph +class RGNode { + friend class McSSAPre; + friend class Visit; + public: + RGNode(MapleAllocator *alloc, uint32 idx, MeOccur *oc) : id(idx), occ(oc), + pred(alloc->Adapter()), + inEdgesCap(alloc->Adapter()), + usedCap(alloc->Adapter()) {} + private: + uint32 id; + MeOccur *occ; + MapleVector pred; + MapleVector inEdgesCap; // capacity of incoming edges + MapleVector usedCap; // used flow value of outgoing edges +}; + +// designate a visited node and the next outgoing edge to take +class Visit { + friend class McSSAPre; + private: + Visit(RGNode *nd, uint32 idx) : node(nd), predIdx(idx) {} + RGNode *node; + uint32 predIdx; // the index in node's pred + + uint64 AvailableCapacity() const { return node->inEdgesCap[predIdx] - node->usedCap[predIdx]; } + void IncreUsedCapacity(uint64 val) { node->usedCap[predIdx] += val; } + bool operator==(const Visit *rhs) const { return node == rhs->node && predIdx == rhs->predIdx; } +}; + +// for representing a flow path from source to sink +class Route { + friend class McSSAPre; + public: + Route(MapleAllocator *alloc) : visits(alloc->Adapter()) {} + private: + MapleVector visits; + uint64 flowValue = 0; +}; + +class McSSAPre : public SSAPre { + public: + McSSAPre(IRMap &hMap, Dominance &currDom, Dominance &currPdom, MemPool &memPool, MemPool &mp2, PreKind kind, uint32 limit) : + SSAPre(hMap, currDom, currPdom, memPool, mp2, kind, limit), + occ2RGNodeMap(ssaPreAllocator.Adapter()), + maxFlowRoutes(ssaPreAllocator.Adapter()), + minCut(ssaPreAllocator.Adapter()) {} + virtual ~McSSAPre() = default; + + void ApplyMCSSAPRE(); + private: + // step 8 willbeavail + void ResetMCWillBeAvail(MePhiOcc *phiOcc) const; + void ComputeMCWillBeAvail() const; + // step 7 max flow/min cut + bool AmongMinCut(RGNode *, uint32 idx) const; + void DumpRGToFile(); // dump reduced graph to dot file + bool IncludedEarlier(Visit **cut, Visit *curVisit, uint32 nextRouteIdx); + void RemoveRouteNodesFromCutSet(std::unordered_multiset &cutSet, Route *route); + bool SearchRelaxedMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, uint64 flowSoFar); + bool SearchMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, uint64 flowSoFar); + void DetermineMinCut(); + bool VisitANode(RGNode *node, Route *route, std::vector &visitedNodes); + bool FindAnotherRoute(); + void FindMaxFlow(); + // step 6 single sink + void AddSingleSink(); + // step 5 single source + void AddSingleSource(); + // step 4 graph reduction + void GraphReduction(); + // step 3 data flow methods + void SetPartialAnt(MePhiOpndOcc *phiOpnd) const; + void ComputePartialAnt() const; + void ResetFullAvail(MePhiOcc *occ) const; + void ComputeFullAvail() const; + // step 2 renaming methods + void Rename1(); + + MapleUnorderedMap occ2RGNodeMap; + RGNode *source; + RGNode *sink; + uint32 numSourceEdges; + MapleVector maxFlowRoutes; + uint32 nextRGNodeId; + uint64 maxFlowValue; + uint64 relaxedMaxFlowValue; // relax maxFlowValue to avoid excessive mincut search time when number of routes is large + MapleVector minCut; // an array of Visits* to represent the minCut +}; + +} // namespace maple +#endif // MAPLE_ME_INCLUDE_MC_SSAPRE_H diff --git a/src/mapleall/maple_me/include/me_option.h b/src/mapleall/maple_me/include/me_option.h index 4e43e1e05e..7728418555 100644 --- a/src/mapleall/maple_me/include/me_option.h +++ b/src/mapleall/maple_me/include/me_option.h @@ -185,6 +185,7 @@ class MeOption { static bool layoutWithPredict; static bool unifyRets; static bool dumpCfgOfPhases; + static bool epreUseProfile; // safety check option begin static SafetyCheckMode npeCheckMode; static bool isNpeCheckAll; diff --git a/src/mapleall/maple_me/include/me_options.h b/src/mapleall/maple_me/include/me_options.h index da127b6d3d..17230e5ae2 100644 --- a/src/mapleall/maple_me/include/me_options.h +++ b/src/mapleall/maple_me/include/me_options.h @@ -135,6 +135,7 @@ extern maplecl::Option remat; extern maplecl::Option unifyrets; extern maplecl::Option lfo; extern maplecl::Option dumpCfgOfPhases; +extern maplecl::Option epreUseProfile; } diff --git a/src/mapleall/maple_me/include/occur.h b/src/mapleall/maple_me/include/occur.h index a24c969ed0..06c2da2a2c 100644 --- a/src/mapleall/maple_me/include/occur.h +++ b/src/mapleall/maple_me/include/occur.h @@ -215,6 +215,8 @@ class MeRealOcc : public MeOccur { bool isLHS; bool isFormalAtEntry; // the fake lhs occurrence at entry for formals bool isHoisted = false; // the hoisted occ used for hoisting + public: + bool rgExcluded = false; // reduced graph excluded, used only by McSSAPre }; class MeInsertedOcc : public MeOccur { @@ -275,13 +277,13 @@ class MePhiOpndOcc : public MeOccur { hasRealUse(false), isInsertedOcc(false), isPhiOpndReload(false), + isMCInsert(false), defPhiOcc(nullptr), phiOpnd4Temp(nullptr) { currentExpr.meStmt = nullptr; } ~MePhiOpndOcc() = default; - bool IsOkToInsert() const; void Dump(const IRMap &irMap) const override; bool IsProcessed() const { return isProcessed; @@ -315,6 +317,14 @@ class MePhiOpndOcc : public MeOccur { isPhiOpndReload = phiOpndReload; } + bool IsMCInsert() const { + return isMCInsert;; + } + + void SetIsMCInsert(bool mcInsert) { + isMCInsert = mcInsert; + } + const MePhiOcc *GetDefPhiOcc() const { return defPhiOcc; } @@ -356,6 +366,7 @@ class MePhiOpndOcc : public MeOccur { bool hasRealUse; bool isInsertedOcc; // the phi operand was inserted by inserted occ bool isPhiOpndReload; // if insertedocc and redefined the def, set this flag + bool isMCInsert; // used only in mc-ssapre MePhiOcc *defPhiOcc; // its lhs union { MeExpr *meExpr; // the current expression at the end of the block containing this PhiOpnd @@ -375,6 +386,8 @@ class MePhiOcc : public MeOccur { isLater(true), isExtraneous(false), isRemoved(false), + isPartialAnt(false), + isMCWillBeAvail(true), phiOpnds(alloc.Adapter()), regPhi(nullptr), varPhi(nullptr) {} @@ -408,6 +421,14 @@ class MePhiOcc : public MeOccur { isCanBeAvail = canBeAvail; } + bool IsFullyAvail() const { + return isCanBeAvail; + } + + void SetIsFullyAvail(bool fullyAvail) { + isCanBeAvail = fullyAvail; + } + bool IsLater() const { return isLater; } @@ -432,6 +453,22 @@ class MePhiOcc : public MeOccur { isRemoved = removed; } + bool IsPartialAnt() const { + return isPartialAnt; + } + + void SetIsPartialAnt(bool pant) { + isPartialAnt = pant; + } + + bool IsMCWillBeAvail() const { + return isMCWillBeAvail; + } + + void SetIsMCWillBeAvail(bool wba) { + isMCWillBeAvail = wba; + } + const MapleVector &GetPhiOpnds() const { return phiOpnds; } @@ -488,6 +525,8 @@ class MePhiOcc : public MeOccur { bool isLater; bool isExtraneous; bool isRemoved; // during finalize2, marked this phiocc is removed or not + bool isPartialAnt; // used only in mc-ssapre + bool isMCWillBeAvail; // used only in mc-ssapre MapleVector phiOpnds; MePhiNode *regPhi; // the reg phi being inserted, maybe can delete it later MePhiNode *varPhi; // the Var phi being inserted, maybe can delete it later diff --git a/src/mapleall/maple_me/include/ssa_epre.h b/src/mapleall/maple_me/include/ssa_epre.h index 0df35a90a7..2101208fea 100644 --- a/src/mapleall/maple_me/include/ssa_epre.h +++ b/src/mapleall/maple_me/include/ssa_epre.h @@ -14,14 +14,14 @@ */ #ifndef MAPLE_ME_INCLUDE_SSAEPRE_H #define MAPLE_ME_INCLUDE_SSAEPRE_H -#include "ssa_pre.h" +#include "mc_ssa_pre.h" namespace maple { -class SSAEPre : public SSAPre { +class SSAEPre : public McSSAPre { public: SSAEPre(IRMap &map, Dominance &dom, Dominance &pdom, MemPool &memPool, MemPool &mp2, PreKind kind, uint32 limit, bool includeRef, bool lhsIvar) - : SSAPre(map, dom, pdom, memPool, mp2, kind, limit), epreIncludeRef(includeRef), enableLHSIvar(lhsIvar) {} + : McSSAPre(map, dom, pdom, memPool, mp2, kind, limit), epreIncludeRef(includeRef), enableLHSIvar(lhsIvar) {} virtual ~SSAEPre() = default; diff --git a/src/mapleall/maple_me/include/ssa_pre.h b/src/mapleall/maple_me/include/ssa_pre.h index 9e782bca27..20649704dd 100644 --- a/src/mapleall/maple_me/include/ssa_pre.h +++ b/src/mapleall/maple_me/include/ssa_pre.h @@ -139,6 +139,7 @@ class SSAPre { bool strengthReduction = false; bool doLFTR = false; + bool doMinCut = false; protected: // step 6 codemotion methods @@ -156,6 +157,13 @@ class SSAPre { } virtual void CodeMotion(); // step 5 Finalize methods + bool WillBeAvail(MePhiOcc *phiOcc) { + if (!doMinCut) { + return phiOcc->IsWillBeAvail(); + } + return phiOcc->IsMCWillBeAvail(); + } + bool OKToInsert(MePhiOpndOcc *phiOpnd); virtual void Finalize1(); void SetSave(MeOccur &defX); void SetReplacement(MePhiOcc &occ, MeOccur &repDef); @@ -285,7 +293,6 @@ class SSAPre { uint32 strIdxCount = 0; // ssapre will create a lot of temp variables if using var to store redundances, start from 0 PreWorkCandHashTable preWorkCandHashTable; - private: virtual void DoSSAFRE() {}; bool enableDebug = false; diff --git a/src/mapleall/maple_me/src/mc_ssa_pre.cpp b/src/mapleall/maple_me/src/mc_ssa_pre.cpp new file mode 100644 index 0000000000..bf2c7f3abb --- /dev/null +++ b/src/mapleall/maple_me/src/mc_ssa_pre.cpp @@ -0,0 +1,1022 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include +#include +#include +#include "mc_ssa_pre.h" +#include "dominance.h" +#include "mir_builder.h" + +// Implementation of the MC-SSAPRE algorithm based on the PLDI 2011 paper: +// An SSA-based Algorithm for Optimal Speculative Code Motion Under an Execution Profile +// by Hucheng Zhou, Wenguang Chen and Fred Chow + +namespace { +constexpr int kFuncNameLenLimit = 80; +} + +namespace maple { + +// ================ Step 8: WillBeAvail ================= + +void McSSAPre::ResetMCWillBeAvail(MePhiOcc *occ) const { + if (!occ->IsMCWillBeAvail()) { + return; + } + occ->SetIsMCWillBeAvail(false); + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + if (!phiOcc->IsMCWillBeAvail()) { + continue; + } + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (phiOpnd->GetDef() != nullptr && phiOpnd->GetDef() == occ) { + // phiOpnd is a use of occ + if (!phiOpnd->HasRealUse() && !phiOpnd->IsMCInsert()) { + ResetMCWillBeAvail(phiOcc); + break; + } + } + } + } +} + +void McSSAPre::ComputeMCWillBeAvail() const { + if (minCut.size() == 0) { + for (MePhiOcc *phiOcc : phiOccs) { + phiOcc->SetIsMCWillBeAvail(false); + } + return; + } + // set insert in phi operands + for (Visit *visit : minCut) { + MeOccur *occ = visit->node->occ; + if (occ->GetOccType() == kOccPhiopnd) { + MePhiOpndOcc *phiOpndOcc = static_cast(occ); + phiOpndOcc->SetIsMCInsert(true); + } + } + for (MePhiOcc *phiOcc : phiOccs) { + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (phiOpnd->GetDef() == nullptr && !phiOpnd->IsMCInsert()) { + ResetMCWillBeAvail(phiOcc); + break; + } + } + } +} + +// ================ Step 7: Max Flow / Min Cut ================= + +bool McSSAPre::AmongMinCut(RGNode *nd, uint32 idx) const { + for (Visit *visit : minCut) { + if (visit->node == nd && visit->predIdx == idx) { + return true; + } + } + return false; +} + +void McSSAPre::DumpRGToFile() { + if (sink == nullptr) { + return; + } + std::string fileName = "rg-of-cand-"; + fileName.append(std::to_string(workCand->GetIndex())); + fileName.append("-"); + const std::string &funcName = mirModule->CurFunction()->GetName(); + if (funcName.size() < kFuncNameLenLimit) { + fileName.append(funcName); + } else { + fileName.append(funcName.c_str(), kFuncNameLenLimit); + } + fileName.append(".dot"); + std::ofstream rgFile; + std::streambuf *coutBuf = LogInfo::MapleLogger().rdbuf(); // keep original cout buffer + std::streambuf *buf = rgFile.rdbuf(); + LogInfo::MapleLogger().rdbuf(buf); + rgFile.open(fileName, std::ios::trunc); + rgFile << "digraph {\n"; + for (int32 i = 0; i < sink->pred.size(); i++) { + RGNode *pre = sink->pred[i]; + rgFile << "real" << pre->id << " -> " << "\"sink\nmaxflow " << maxFlowValue << "\";\n"; + } + MapleUnorderedMap::iterator it = occ2RGNodeMap.begin(); + for (; it != occ2RGNodeMap.end(); it++) { + RGNode *rgNode = it->second; + for (int32 i = 0; i < rgNode->pred.size(); i++) { + RGNode *pre = rgNode->pred[i]; + if (pre != source) { + if (pre->occ->GetOccType() == kOccPhiocc) { + rgFile << "phi" << pre->id << " -> "; + } else { + rgFile << "real" << pre->id << " -> "; + } + if (rgNode->occ->GetOccType() == kOccPhiocc) { + rgFile << "phi" << rgNode->id; + } else { + rgFile << "real" << rgNode->id; + } + } else { + rgFile << "source" << " -> " << "phi" << rgNode->id; + } + if (AmongMinCut(rgNode, i)) { + rgFile << "[style=dotted][color=red]"; + } + if (rgNode->usedCap[i] == 0) { + rgFile << "[style=dashed][color=green]"; + } + rgFile << "[label=\"" << rgNode->usedCap[i] << "|" << rgNode->inEdgesCap[i] << "\"];\n"; + } + } + rgFile << "}\n"; + rgFile.flush(); + rgFile.close(); + LogInfo::MapleLogger().rdbuf(coutBuf); + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() << " dumped to " << fileName << "\n"; +} + +bool McSSAPre::IncludedEarlier(Visit **cut, Visit *curVisit, uint32 nextRouteIdx) { + uint32 i = nextRouteIdx; + while (i != 0) { + i--; + if (cut[i]->node == curVisit->node && cut[i]->predIdx == curVisit->predIdx) { + return true; + } + } + return false; +} + +// remove this route's nodes from cutSet +void McSSAPre::RemoveRouteNodesFromCutSet(std::unordered_multiset &cutSet, Route *route) { + for (uint32 i = 1; i < route->visits.size(); i++) { + Visit &curVisit = route->visits[i]; + std::unordered_multiset::iterator it = cutSet.find(curVisit.node->id); + ASSERT(it != cutSet.end(), "cutSet maintenance error"); + cutSet.erase(it); + } +} + +// find the cut closest to the sink whose total flow is relaxedMaxFlowValue +bool McSSAPre::SearchRelaxedMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, uint64 flowSoFar) { + Route *curRoute = maxFlowRoutes[nextRouteIdx]; + Visit *curVisit = nullptr; + + // determine starting value of visitIdx: start searching back from route end; + // if any node is in cutSet, set visitIdx as that nodes's index in route; + // otherwise, set visitIdx to 0 + uint32 visitIdx = curRoute->visits.size(); + do { + visitIdx--; + if (cutSet.count(curRoute->visits[visitIdx].node->id) != 0) { + break; + } + } while (visitIdx != 1); + // update cutSet with visited nodes lower than visitIdx + if (visitIdx != 1) { + for (uint i = visitIdx - 1; i > 0; i--) { + cutSet.insert(curRoute->visits[i].node->id); + } + } + + bool success = false; + do { + if (visitIdx == curRoute->visits.size()) { + RemoveRouteNodesFromCutSet(cutSet, curRoute); + return false; + } + curVisit = &curRoute->visits[visitIdx]; + uint64 visitCap = curVisit->node->inEdgesCap[curVisit->predIdx]; + cut[nextRouteIdx] = curVisit; + if (visitIdx != 0) { + cutSet.insert(curVisit->node->id); + } + if (IncludedEarlier(cut, curVisit, nextRouteIdx)) { + visitCap = 0; + } + success = (flowSoFar + visitCap <= relaxedMaxFlowValue); + if (success && nextRouteIdx != (maxFlowRoutes.size() - 1)) { + success = SearchRelaxedMinCut(cut, cutSet, nextRouteIdx+1, flowSoFar + visitCap); + } + visitIdx++; + } while (!success); + return true; +} + +// find the cut closest to the sink whose total flow is maxFlowValue +bool McSSAPre::SearchMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, uint64 flowSoFar) { + Route *curRoute = maxFlowRoutes[nextRouteIdx]; + Visit *curVisit = nullptr; + + // determine starting value of visitIdx: start searching back from route end; + // if any node is in cutSet, set visitIdx as that nodes's index in route; + // otherwise, set visitIdx to 0 + uint32 visitIdx = curRoute->visits.size(); + do { + visitIdx--; + if (cutSet.count(curRoute->visits[visitIdx].node->id) != 0) { + break; + } + } while (visitIdx != 1); + // update cutSet with visited nodes lower than visitIdx + if (visitIdx != 1) { + for (uint i = visitIdx - 1; i > 0; i--) { + cutSet.insert(curRoute->visits[i].node->id); + } + } + + bool success = false; + do { + if (visitIdx == curRoute->visits.size()) { + RemoveRouteNodesFromCutSet(cutSet, curRoute); + return false; + } + curVisit = &curRoute->visits[visitIdx]; + uint64 visitCap = curVisit->node->inEdgesCap[curVisit->predIdx]; + uint64 usedCap = curVisit->node->usedCap[curVisit->predIdx]; + if (visitCap != usedCap) { + if (visitIdx != 0) { + cutSet.insert(curVisit->node->id); + } + visitIdx++; + continue; + } + cut[nextRouteIdx] = curVisit; + if (visitIdx != 0) { + cutSet.insert(curVisit->node->id); + } + if (IncludedEarlier(cut, curVisit, nextRouteIdx)) { + visitCap = 0; + } + success = (flowSoFar + visitCap <= maxFlowValue); + if (success && nextRouteIdx != (maxFlowRoutes.size() - 1)) { + success = SearchMinCut(cut, cutSet, nextRouteIdx+1, flowSoFar + visitCap); + } + visitIdx++; + } while (!success); + return true; +} + +void McSSAPre::DetermineMinCut() { + if (maxFlowRoutes.empty()) { + if (GetSSAPreDebug()) { + DumpRGToFile(); + } + return; + } + // maximum width of the min cut is the number of routes in maxFlowRoutes + Visit* cut[maxFlowRoutes.size()]; + std::unordered_multiset cutSet; // key is RGNode's id; must be kept in sync with cut[]; sink node is not entered + constexpr double defaultRelaxScaling = 1.25; + relaxedMaxFlowValue = static_cast(static_cast(maxFlowValue) * defaultRelaxScaling); + bool relaxedSearch = false; + if (maxFlowRoutes.size() >= 20) { + // apply arbitrary heuristics to reduce search time + relaxedSearch = true; + relaxedMaxFlowValue = maxFlowValue * (maxFlowRoutes.size() / 10); + } + bool success = !relaxedSearch && SearchMinCut(cut, cutSet, 0, 0); + if (!success) { + relaxedSearch = true; + success = SearchRelaxedMinCut(cut, cutSet, 0, 0); + } + if (!success) { + if (GetSSAPreDebug()) { + mirModule->GetOut() << "MinCut failed\n"; + DumpRGToFile(); + } + CHECK_FATAL(false, "McSSAPre::DetermineMinCut: failed to find min cut"); + } + // sort cut + std::sort(cut, cut+maxFlowRoutes.size(), [](const Visit *left, const Visit *right) { + return (left->node != right->node) ? (left->node->id < right->node->id) + : (left->predIdx < right->predIdx); }); + // remove duplicates in the cut to form mincut + minCut.push_back(cut[0]); + size_t duplicatedVisits = 0; + for (uint32 i = 1; i < maxFlowRoutes.size(); i++) { + if (cut[i] == cut[i-1]) { + minCut.push_back(cut[i]); + } else { + duplicatedVisits++; + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "finished "; + if (relaxedSearch) { + mirModule->GetOut() << "relaxed "; + } + mirModule->GetOut() << "MinCut\n"; + DumpRGToFile(); + if (duplicatedVisits != 0) { + mirModule->GetOut() << duplicatedVisits << " duplicated visits in mincut\n"; + } + } +} + +bool McSSAPre::VisitANode(RGNode *node, Route *route, std::vector &visitedNodes) { + ASSERT(node->pred.size() != 0 , "McSSAPre::VisitANode: no connection to source node"); + // if any pred is the source and there's capacity to reach it, return success + for (uint32 i = 0; i < node->pred.size(); i++) { + if (node->pred[i] == source && node->inEdgesCap[i] > node->usedCap[i]) { + // if there is another pred never taken that also reaches source, use that instead + for (uint32 k = i + 1; k < node->pred.size(); k++) { + if (node->pred[k] == source && node->usedCap[k] == 0 && node->inEdgesCap[k] > 0) { + route->visits.push_back(Visit(node, k)); + return true; + } + } + route->visits.push_back(Visit(node, i)); + return true; + } + } + + // pick an never-taken predecessor path first + for (uint32 i = 0; i < node->pred.size(); i++) { + if (node->usedCap[i] == 0 && node->inEdgesCap[i] > 0 && !visitedNodes[node->pred[i]->id]) { + route->visits.push_back(Visit(node, i)); + visitedNodes[node->pred[i]->id] = true; + bool success = VisitANode(node->pred[i], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + return true; + } + } + } + + size_t numPreds = node->pred.size(); + uint32 sortedPred[numPreds]; + for (uint32 i = 0; i < numPreds; i++) { + sortedPred[i] = i; + } + // put sortedPred[] in increasing order of capacities + std::sort(sortedPred, sortedPred+numPreds, [node](uint32 m, uint32 n) { + return node->inEdgesCap[m] < node->inEdgesCap[n]; }); + // for this round, prefer predecessor with higher unused capacity + for (uint32 i = 0; i < numPreds; i++) { + uint32 j = sortedPred[i]; + if (!visitedNodes[node->pred[j]->id] && node->inEdgesCap[j] > node->usedCap[j]) { + route->visits.push_back(Visit(node, j)); + visitedNodes[node->pred[j]->id] = true; + bool success = VisitANode(node->pred[j], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + return true; + } + } + } + return false; +} + +// return false if not successful; if successful, the new route will be pushed +// to maxFlowRoutes +bool McSSAPre::FindAnotherRoute() { + std::vector visitedNodes(occ2RGNodeMap.size() + 1, false); + Route *route = perCandMemPool->New(&perCandAllocator); + bool success = false; + // pick an untaken sink predecessor first + for (int32 i = 0; i < sink->pred.size(); i++) { + if (sink->usedCap[i] == 0) { + route->visits.push_back(Visit(sink, i)); + visitedNodes[sink->pred[i]->id] = true; + success = VisitANode(sink->pred[i], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + break; + } + } + } + if (!success) { + // now, pick any sink predecessor + for (int32 i = 0; i < sink->pred.size(); i++) { + route->visits.push_back(Visit(sink, i)); + visitedNodes[sink->pred[i]->id] = true; + success = VisitANode(sink->pred[i], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + break; + } + } + } + if (!success) { + return false; + } + // find bottleneck capacity along route + uint64 minAvailCap = route->visits[0].AvailableCapacity(); + for (int32 i = 1; i < route->visits.size(); i++) { + uint64 curAvailCap = route->visits[i].AvailableCapacity(); + minAvailCap = std::min(minAvailCap, curAvailCap); + } + route->flowValue = minAvailCap; + // update usedCap along route + for (int32 i = 0; i < route->visits.size(); i++) { + route->visits[i].IncreUsedCapacity(minAvailCap); + } + maxFlowRoutes.push_back(route); + return true; +} + +void McSSAPre::FindMaxFlow() { + if (sink == nullptr) { + return; + } + maxFlowValue = 0; + bool found; + do { + found = FindAnotherRoute(); + } while (found); + // calculate maxFlowValue; + for (Route *route : maxFlowRoutes) { + maxFlowValue += route->flowValue; + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() + << ": FindMaxFlow found " << maxFlowRoutes.size() << " routes\n"; + for (size_t i = 0; i < maxFlowRoutes.size(); i++) { + Route *route = maxFlowRoutes[i]; + mirModule->GetOut() << "route " << i << " sink:pred" << route->visits[0].predIdx; + for (size_t j = 1; j < route->visits.size(); j++) { + if (route->visits[j].node->occ->GetOccType() == kOccPhiocc) { + mirModule->GetOut() << " phi"; + } else { + mirModule->GetOut() << " real"; + } + mirModule->GetOut() << route->visits[j].node->id << ":pred" << route->visits[j].predIdx; + } + mirModule->GetOut() << " flowValue " << route->flowValue; + mirModule->GetOut() << "\n"; + } + mirModule->GetOut() << "maxFlowValue is " << maxFlowValue << "\n"; + } +} + +// ================ Step 6: Add Single Sink ================= + +void McSSAPre::AddSingleSink() { + if (numSourceEdges == 0) { + return; // empty reduced graph + } + sink = perCandMemPool->New(&perCandAllocator, nextRGNodeId++, nullptr); + size_t numToSink = 0; + MapleUnorderedMap::iterator it = occ2RGNodeMap.begin(); + for (; it != occ2RGNodeMap.end(); it++) { + if (it->first->GetOccType() != kOccReal) { + continue; + } + RGNode *use = it->second; + // add edge from this use node to sink + sink->pred.push_back(use); + sink->inEdgesCap.push_back(UINT64_MAX); + sink->usedCap.push_back(0); + numToSink++; + } + ASSERT(numToSink != 0, "McSSAPre::AddSingleSink: found 0 edge to sink"); + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() << " has " << numToSink << " edges to sink\n"; + } +} + +// ================ Step 5: Add Single Source ================= +void McSSAPre::AddSingleSource() { + source = perCandMemPool->New(&perCandAllocator, nextRGNodeId++, nullptr); + for (MePhiOcc *phiOcc : phiOccs) { + if (phiOcc->IsPartialAnt() && !phiOcc->IsFullyAvail()) { + // look for null operands + for (int32 i = 0; i < phiOcc->GetPhiOpnds().size(); i++) { + MePhiOpndOcc *phiopndOcc = phiOcc->GetPhiOpnd(i); + if (phiopndOcc->GetDef() != nullptr) { + continue; + } + // add edge from source to this phi node + RGNode *sucNode = occ2RGNodeMap[phiOcc]; + sucNode->pred.push_back(source); + sucNode->inEdgesCap.push_back(phiOcc->GetBB()->GetPred(i)->GetFrequency()+1); + sucNode->usedCap.push_back(0); + numSourceEdges++; + } + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex(); + if (numSourceEdges == 0) { + mirModule->GetOut() << " has empty reduced graph\n"; + } else { + mirModule->GetOut() << " source has " << numSourceEdges << " succs\n"; + } + } +} + +// ================ Step 4: Graph Reduction ================= +void McSSAPre::GraphReduction() { + size_t numPhis = 0; + size_t numRealOccs = 0; + size_t numType1Edges = 0; + size_t numType2Edges = 0; + // add def nodes + for (MePhiOcc *phiOcc : phiOccs) { + if (phiOcc->IsPartialAnt() && !phiOcc->IsFullyAvail()) { + RGNode *newRGNode = perCandMemPool->New(&perCandAllocator, nextRGNodeId++, phiOcc); + occ2RGNodeMap.insert(std::pair(phiOcc, newRGNode)); + numPhis++; + } + } + if (occ2RGNodeMap.empty()) { + return; + } + // add use nodes and use-def edges + for (MeOccur *occ : allOccs) { + if (occ->GetOccType() == kOccReal) { + MeRealOcc *realOcc = static_cast(occ); + if (!realOcc->rgExcluded && realOcc->GetDef() != nullptr) { + MeOccur *defOcc = realOcc->GetDef(); + ASSERT(defOcc->GetOccType() == kOccPhiocc, "McSSAPre::GraphReduction: real occ not defined by phi"); + if (occ2RGNodeMap.find(defOcc) != occ2RGNodeMap.end()) { + RGNode *use = perCandMemPool->New(&perCandAllocator, nextRGNodeId++, realOcc); + occ2RGNodeMap[realOcc] = use; + numRealOccs++; + RGNode *def = occ2RGNodeMap[defOcc]; + use->pred.push_back(def); + use->inEdgesCap.push_back(realOcc->GetBB()->GetFrequency()+1); + use->usedCap.push_back(0); + numType2Edges++; + } + } + } else if (occ->GetOccType() == kOccPhiopnd) { + MePhiOpndOcc *phiopndOcc = static_cast(occ); + MePhiOcc *defPhiOcc = phiopndOcc->GetDefPhiOcc(); + if (defPhiOcc->IsPartialAnt() && !defPhiOcc->IsFullyAvail()) { + // defPhiOcc is the use node and it has already been added + MeOccur *defOcc = phiopndOcc->GetDef(); + if (defOcc != nullptr && defOcc->GetOccType() == kOccPhiocc && + static_cast(defOcc)->IsPartialAnt() && + !static_cast(defOcc)->IsFullyAvail()) { + ASSERT(occ2RGNodeMap.find(defOcc) != occ2RGNodeMap.end(), "McSSAPre::GraphReduction: def node not found"); + RGNode *def = occ2RGNodeMap[defOcc]; + RGNode *use = occ2RGNodeMap[defPhiOcc]; + use->pred.push_back(def); + // find the index of phiopndOcc in defPhiOcc's phiOpnds + uint32 i; + for (i = 0; i < defPhiOcc->GetPhiOpnds().size(); i++) { + if (defPhiOcc->GetPhiOpnd(i) == phiopndOcc) { + break; + } + } + ASSERT(i != defPhiOcc->GetPhiOpnds().size(), "McSSAPre::GraphReduction: cannot find corresponding phi opnd"); + use->inEdgesCap.push_back(defPhiOcc->GetBB()->GetPred(i)->GetFrequency()+1); + use->usedCap.push_back(0); + numType1Edges++; + } + } + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() + << " after GraphReduction, phis: " << numPhis << " reals: " << numRealOccs + << " type 1 edges: " << numType1Edges << " type 2 edges: " << numType2Edges << "\n"; + } +} + +// ================ Step 3: Data Flow Computations ================= + +// set partial anticipation +void McSSAPre::SetPartialAnt(MePhiOpndOcc *phiOpnd) const { + MeOccur *defOcc = phiOpnd->GetDef(); + if (defOcc == nullptr || defOcc->GetOccType() != kOccPhiocc) { + return; + } + auto *defPhiOcc = static_cast(defOcc); + if (defPhiOcc->IsPartialAnt()) { + return; + } + defPhiOcc->SetIsPartialAnt(true); + for (MePhiOpndOcc *mePhiOpnd : defPhiOcc->GetPhiOpnds()) { + SetPartialAnt(mePhiOpnd); + } +} + +// compute partial anticipation for each PHI +void McSSAPre::ComputePartialAnt() const { + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + if (phiOcc->IsPartialAnt()) { + // propagate partialAnt along use-def edges + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + SetPartialAnt(phiOpnd); + } + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() + << " after PartialAnt\n"; + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + phiOcc->Dump(*irMap); + if (phiOcc->IsPartialAnt()) { + mirModule->GetOut() << " is partialant\n"; + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (!phiOpnd->IsProcessed()) { + phiOpnd->Dump(*irMap); + mirModule->GetOut() << " has not been processed by Rename2\n"; + } + } + } else { + mirModule->GetOut() << " is not partialant\n"; + } + } + } +} + +void McSSAPre::ResetFullAvail(MePhiOcc *occ) const { + if (!occ->IsFullyAvail()) { + return; + } + occ->SetIsFullyAvail(false); + // reset those phiocc nodes that have occ as one of its operands + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + if (!phiOcc->IsFullyAvail()) { + continue; + } + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (phiOpnd->GetDef() != nullptr && phiOpnd->GetDef() == occ) { + // phiOpnd is a use of occ + if (!phiOpnd->HasRealUse()) { + ResetFullAvail(phiOcc); + break; + } + } + } + } +} + +// the fullyavail attribute is stored in the isCanBeAvail field +void McSSAPre::ComputeFullAvail() const { + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + // reset fullyavail if any phi operand is null + bool existNullDef = false; + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (phiOpnd->GetDef() == nullptr) { + existNullDef = true; + break; + } + } + if (existNullDef) { + ResetFullAvail(phiOcc); + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() + << " after FullyAvailable\n"; + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + phiOcc->Dump(*irMap); + if (phiOcc->IsFullyAvail()) { + mirModule->GetOut() << " is fullyavail\n"; + } else { + mirModule->GetOut() << " is not fullyavail\n"; + } + } + } +} + +// ================ Step 2: Renaming ================= +void McSSAPre::Rename1() { + std::stack occStack; + rename2Set.clear(); + classCount = 1; + // iterate the occurrence according to its preorder dominator tree + for (MeOccur *occ : allOccs) { + while (!occStack.empty() && !occStack.top()->IsDominate(*dom, *occ)) { + occStack.pop(); + } + switch (occ->GetOccType()) { + case kOccReal: { + if (occStack.empty()) { + // assign new class + occ->SetClassID(classCount++); + occStack.push(occ); + break; + } + MeOccur *topOccur = occStack.top(); + if (topOccur->GetOccType() == kOccUse || topOccur->GetOccType() == kOccMembar) { + occ->SetClassID(classCount++); + occStack.push(occ); + break; + } + auto *realOcc = static_cast(occ); + if (topOccur->GetOccType() == kOccReal) { + auto *realTopOccur = static_cast(topOccur); + if (AllVarsSameVersion(*realTopOccur, *realOcc)) { + // all corresponding variables are the same + realOcc->SetClassID(realTopOccur->GetClassID()); + if (realTopOccur->GetDef() != nullptr) { + realOcc->SetDef(realTopOccur->GetDef()); + } else { + realOcc->SetDef(realTopOccur); + } + realOcc->rgExcluded = true; + } else { + // assign new class + occ->SetClassID(classCount++); + occStack.push(occ); + } + } else { + // top of stack is a PHI occurrence + ASSERT(topOccur->GetOccType() == kOccPhiocc, "invalid kOccPhiocc"); + std::vector varVec; + CollectVarForCand(*realOcc, varVec); + bool isAllDom = true; + if (realOcc->IsLHS()) { + isAllDom = false; + } else { + for (auto varIt = varVec.begin(); varIt != varVec.end(); ++varIt) { + MeExpr *varMeExpr = *varIt; + if (workCand->isSRCand) { + varMeExpr = ResolveAllInjuringDefs(varMeExpr); + } + if (!DefVarDominateOcc(varMeExpr, *topOccur)) { + isAllDom = false; + } + } + } + MePhiOcc *phiTopOccur = static_cast(topOccur); + if (isAllDom) { + realOcc->SetClassID(topOccur->GetClassID()); + realOcc->SetDef(topOccur); + (void)rename2Set.insert(realOcc->GetPosition()); + phiTopOccur->SetIsPartialAnt(true); + } else { + // assign new class + occ->SetClassID(classCount++); + } + occStack.push(occ); + } + break; + } + case kOccCompare: { + if (occStack.empty()) { + break; + } + MeOccur *topOccur = occStack.top(); + if (topOccur->GetOccType() == kOccUse || topOccur->GetOccType() == kOccMembar) { + break; + } + MeRealOcc *realOcc = static_cast(occ); + ScalarMeExpr *scalarOpnd0 = dynamic_cast(workCand->GetTheMeExpr()->GetOpnd(0)); + ScalarMeExpr *scalarOpnd1 = dynamic_cast(workCand->GetTheMeExpr()->GetOpnd(1)); + ScalarMeExpr *compareOpnd0 = dynamic_cast(realOcc->GetMeExpr()->GetOpnd(0)); + ScalarMeExpr *compareOpnd1 = dynamic_cast(realOcc->GetMeExpr()->GetOpnd(1)); + // set compareOpnd to be the scalar operand that is common to + // workCand->theMeExpr and realOcc->meExpr + ScalarMeExpr *compareOpnd = nullptr; + uint32 scalarOpndNo = 0; + if (scalarOpnd0 != nullptr) { + if (compareOpnd0 != nullptr && scalarOpnd0->GetOst() == compareOpnd0->GetOst()) { + compareOpnd = compareOpnd0; + scalarOpndNo = 0; + } else if (compareOpnd1 != nullptr && scalarOpnd0->GetOst() == compareOpnd1->GetOst()) { + compareOpnd = compareOpnd1; + scalarOpndNo = 0; + } + } + if (scalarOpnd1 != nullptr) { + if (compareOpnd0 != nullptr && scalarOpnd1->GetOst() == compareOpnd0->GetOst()) { + compareOpnd = compareOpnd0; + scalarOpndNo = 1; + } else if (compareOpnd1 != nullptr && scalarOpnd1->GetOst() == compareOpnd1->GetOst()) { + compareOpnd = compareOpnd1; + scalarOpndNo = 1; + } + } + CHECK_FATAL(compareOpnd != nullptr, "Rename1: compOcc does not correspond to realOcc"); + ScalarMeExpr *resolvedCompareOpnd = ResolveAllInjuringDefs(compareOpnd); + if (topOccur->GetOccType() == kOccReal) { + MeRealOcc *realTopOccur = static_cast(topOccur); + ScalarMeExpr *topOccurOpnd = static_cast(realTopOccur->GetMeExpr()->GetOpnd(scalarOpndNo)); + if (compareOpnd == topOccurOpnd || resolvedCompareOpnd == topOccurOpnd) { + realOcc->SetClassID(realTopOccur->GetClassID()); + if (realTopOccur->GetDef() != nullptr) { + realOcc->SetDef(realTopOccur->GetDef()); + } else { + realOcc->SetDef(realTopOccur); + } + } + break; + } + // top of stack is a PHI occurrence + ASSERT(topOccur->GetOccType() == kOccPhiocc, "invalid kOccPhiocc"); + if (DefVarDominateOcc(compareOpnd, *topOccur)) { + realOcc->SetClassID(topOccur->GetClassID()); + realOcc->SetDef(topOccur); + } + break; + } + case kOccPhiocc: { + // assign new class + occ->SetClassID(classCount++); + occStack.push(occ); + break; + } + case kOccPhiopnd: { + // stow away the use occurrences at the stack top + MeOccur *stowedUseOcc = nullptr; + if (!occStack.empty() && occStack.top()->GetOccType() == kOccUse) { + stowedUseOcc = occStack.top(); + occStack.pop(); + CHECK_FATAL(occStack.empty() || occStack.top()->GetOccType() != kOccUse, + "Rename1: cannot have 2 consecutive use occurs on stack"); + } + if (occStack.empty() || occStack.top()->GetOccType() == kOccMembar) { + occ->SetDef(nullptr); + } else { + MeOccur *topOccur = occStack.top(); + occ->SetDef(topOccur); + occ->SetClassID(topOccur->GetClassID()); + if (topOccur->GetOccType() == kOccReal) { + static_cast(occ)->SetHasRealUse(true); + } + } + // push stowed use_occ back + if (stowedUseOcc != nullptr) { + occStack.push(stowedUseOcc); + } + break; + } + case kOccExit: + break; + case kOccMembar: { + if (!occStack.empty()) { + MeOccur *topOccur = occStack.top(); + if (topOccur->GetOccType() == kOccPhiocc) { + } else if (topOccur->GetOccType() != occ->GetOccType()) { + occStack.push(occ); + } + } else { + occStack.push(occ); + } + break; + } + default: + ASSERT(false, "should not be here"); + break; + } + } + if (GetSSAPreDebug()) { + PreWorkCand *curCand = workCand; + mirModule->GetOut() << "++++ ssapre candidate " << curCand->GetIndex() << " after rename1\n"; + for (MeOccur *occ : allOccs) { + occ->Dump(*irMap); + mirModule->GetOut() << '\n'; + } + mirModule->GetOut() << "\n" << "rename2 set:\n"; + for (uint32 pos : rename2Set) { + MeRealOcc *occur = workCand->GetRealOcc(pos); + occur->Dump(*irMap); + mirModule->GetOut() << " with def at\n"; + occur->GetDef()->Dump(*irMap); + mirModule->GetOut() << "\n"; + } + mirModule->GetOut() << "\n"; + } +} + +void McSSAPre::ApplyMCSSAPRE() { + // #0 build worklist + BuildWorkList(); + if (GetSSAPreDebug()) { + mirModule->GetOut() << " worklist initial size " << workList.size() << '\n'; + } + ConstructUseOccurMap(); + uint32 cnt = 0; + while (!workList.empty()) { + ++cnt; + if (cnt > preLimit) { + break; + } + workCand = workList.front(); + workCand->SetIndex(static_cast(cnt)); + workList.pop_front(); + if (workCand->GetRealOccs().empty()) { + workCand->deletedFromWorkList = true; + continue; + } + if ((preKind == kExprPre && workCand->GetTheMeExpr()->GetMeOp() == kMeOpIvar) || (preKind == kLoadPre)) { + // if only LHS real occur, skip this candidate + bool hasNonLHS = false; + for (MeRealOcc *realOcc : workCand->GetRealOccs()) { + if (realOcc->GetOccType() == kOccReal && !realOcc->IsLHS()) { + hasNonLHS = true; + break; + } + } + if (!hasNonLHS) { + workCand->deletedFromWorkList = true; + continue; + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "||||||| MC-SSAPRE candidate " << cnt << " at worklist index " + << workCand->GetIndex() << ": "; + workCand->DumpCand(*irMap); + if (workCand->isSRCand) { + mirModule->GetOut() << " srCand"; + } + if (workCand->onlyInvariantOpnds) { + mirModule->GetOut() << " onlyInvairantOpnds"; + } + mirModule->GetOut() << '\n'; + } + allOccs.clear(); + phiOccs.clear(); + nextRGNodeId = 1; + occ2RGNodeMap.clear(); + numSourceEdges = 0; + maxFlowRoutes.clear(); + minCut.clear(); + source = nullptr; + sink = nullptr; + // #1 Insert PHI; results in allOccs and phiOccs + ComputeVarAndDfPhis(); + CreateSortedOccs(); + if (workCand->GetRealOccs().empty()) { + workCand->deletedFromWorkList = true; + continue; + } + // set the position field in the MeRealOcc nodes + for (size_t j = 0; j < workCand->GetRealOccs().size(); j++) { + workCand->GetRealOcc(j)->SetPosition(j); + } + // #2 Rename + Rename1(); + Rename2(); + if (!phiOccs.empty()) { + // if no PHI inserted, no need to perform these steps + // #3 data flow methods + ComputeFullAvail(); + ComputePartialAnt(); + // #4 graph reduction + GraphReduction(); + // #5 single source + AddSingleSource(); + // #6 single sink + AddSingleSink(); + // step 7 max flow/min cut + FindMaxFlow(); + DetermineMinCut(); + // step 8 willbeavail + ComputeMCWillBeAvail(); + } + // #5 Finalize + Finalize1(); + if (workCand->Redo2HandleCritEdges()) { + // reinitialize def field to nullptr + for (MeOccur *occ : allOccs) { + occ->SetDef(nullptr); + if (occ->GetOccType() == kOccPhiopnd) { + auto *phiOpndOcc = static_cast(occ); + phiOpndOcc->SetIsProcessed(false); + } + } + Rename1(); + Rename2(); + ComputeFullAvail(); + ComputePartialAnt(); + GraphReduction(); + AddSingleSource(); + AddSingleSink(); + Finalize1(); + FindMaxFlow(); + DetermineMinCut(); + ComputeMCWillBeAvail(); + } + Finalize2(); + workCand->deletedFromWorkList = true; + // #6 CodeMotion and recompute worklist based on newly occurrence + CodeMotion(); + if (preKind == kStmtPre && (workCand->GetRealOccs().front()->GetOpcodeOfMeStmt() == OP_dassign || + workCand->GetRealOccs().front()->GetOpcodeOfMeStmt() == OP_callassigned)) { + // apply full redundancy elimination + DoSSAFRE(); + } + perCandMemPool->ReleaseContainingMem(); + } +} + +} // namespace maple diff --git a/src/mapleall/maple_me/src/me_option.cpp b/src/mapleall/maple_me/src/me_option.cpp index d062458a0d..dccb7b78eb 100644 --- a/src/mapleall/maple_me/src/me_option.cpp +++ b/src/mapleall/maple_me/src/me_option.cpp @@ -132,6 +132,7 @@ SafetyCheckMode MeOption::boundaryCheckMode = SafetyCheckMode::kNoCheck; bool MeOption::safeRegionMode = false; bool MeOption::unifyRets = false; bool MeOption::dumpCfgOfPhases = false; +bool MeOption::epreUseProfile = true; #if MIR_JAVA std::string MeOption::acquireFuncName = "Landroid/location/LocationManager;|requestLocationUpdates|"; std::string MeOption::releaseFuncName = "Landroid/location/LocationManager;|removeUpdates|"; @@ -315,6 +316,7 @@ bool MeOption::SolveOptions(bool isDebug) { maplecl::CopyIfEnabled(lessThrowAlias, opts::me::lessthrowalias); maplecl::CopyIfEnabled(propBase, opts::me::propbase); maplecl::CopyIfEnabled(dumpCfgOfPhases, opts::me::dumpCfgOfPhases); + maplecl::CopyIfEnabled(epreUseProfile, opts::me::epreUseProfile); if (opts::me::propiloadref.IsEnabledByUser()) { propIloadRef = opts::me::propiloadref; diff --git a/src/mapleall/maple_me/src/me_options.cpp b/src/mapleall/maple_me/src/me_options.cpp index 79adab5f52..2b448a265c 100644 --- a/src/mapleall/maple_me/src/me_options.cpp +++ b/src/mapleall/maple_me/src/me_options.cpp @@ -698,5 +698,10 @@ maplecl::Option lfo({"--lfo"}, maplecl::Option dumpCfgOfPhases({"--dumpcfgofphases"}, " --dumpcfgofphases \tDump CFG from various phases to .dot files\n", {meCategory}); +maplecl::Option epreUseProfile({"--epreuseprofile"}, + " --epreuseprofile \tEnable profile-guided epre phase\n" + " --no-epreuseprofile \tDisable profile-guided epre phase\n", + {meCategory}, + maplecl::DisableWith("--no-epreuseprofile")); } diff --git a/src/mapleall/maple_me/src/me_ssa_epre.cpp b/src/mapleall/maple_me/src/me_ssa_epre.cpp index f2019b28a0..b209c00fc3 100644 --- a/src/mapleall/maple_me/src/me_ssa_epre.cpp +++ b/src/mapleall/maple_me/src/me_ssa_epre.cpp @@ -103,6 +103,9 @@ bool MESSAEPre::PhaseRun(maple::MeFunction &f) { } MeSSAEPre ssaPre(f, *irMap, *dom, *pdom, kh, *ssaPreMemPool, *ApplyTempMemPool(), epreLimitUsed, epreIncludeRef, MeOption::epreLocalRefVar, MeOption::epreLHSIvar); + if (f.GetMirFunc()->GetFuncProfData() && MeOption::epreUseProfile) { + ssaPre.doMinCut = true; + } ssaPre.SetSpillAtCatch(MeOption::spillAtCatch); if (MeOption::strengthReduction && !f.GetMIRModule().IsJavaModule()) { ssaPre.strengthReduction = true; @@ -120,7 +123,11 @@ bool MESSAEPre::PhaseRun(maple::MeFunction &f) { if (DEBUGFUNC_NEWPM(f)) { ssaPre.SetSSAPreDebug(true); } - ssaPre.ApplySSAPRE(); + if (!ssaPre.doMinCut) { + ssaPre.ApplySSAPRE(); + } else { + ssaPre.ApplyMCSSAPRE(); + } if (!ssaPre.GetCandsForSSAUpdate().empty()) { MeSSAUpdate ssaUpdate(f, *f.GetMeSSATab(), *dom, ssaPre.GetCandsForSSAUpdate()); ssaUpdate.Run(); diff --git a/src/mapleall/maple_me/src/me_stmt_pre.cpp b/src/mapleall/maple_me/src/me_stmt_pre.cpp index 7a6d13161c..12a704cd5d 100644 --- a/src/mapleall/maple_me/src/me_stmt_pre.cpp +++ b/src/mapleall/maple_me/src/me_stmt_pre.cpp @@ -178,7 +178,7 @@ void MeStmtPre::Finalize1() { auto *phiOpnd = static_cast(occ); MePhiOcc *phiOcc = phiOpnd->GetDefPhiOcc(); if (phiOcc->IsWillBeAvail()) { - if (phiOpnd->IsOkToInsert()) { + if (OKToInsert(phiOpnd)) { // insert the current expression at the end of the block containing phiOpnd if (phiOpnd->GetBB()->GetSucc().size() > 1) { CHECK_FATAL(!workCand->Redo2HandleCritEdges(), "Finalize1: insertion at critical edge; aborting"); diff --git a/src/mapleall/maple_me/src/occur.cpp b/src/mapleall/maple_me/src/occur.cpp index 9a52a85ffe..c45241d152 100644 --- a/src/mapleall/maple_me/src/occur.cpp +++ b/src/mapleall/maple_me/src/occur.cpp @@ -107,22 +107,6 @@ MeExpr *MeOccur::GetSavedExpr() { } } -// return true if either: -// operand is nullptr (def is null), or -// hasRealUse is false and defined by a PHI not will be avail -bool MePhiOpndOcc::IsOkToInsert() const { - if (GetDef() == nullptr) { - return true; - } - if (!hasRealUse) { - const MeOccur *defOcc = GetDef(); - if (defOcc->GetOccType() == kOccPhiocc && !static_cast(defOcc)->IsWillBeAvail()) { - return true; - } - } - return false; -} - bool MePhiOcc::IsOpndDefByRealOrInserted() const { for (MePhiOpndOcc *phiOpnd : phiOpnds) { MeOccur *defOcc = phiOpnd->GetDef(); @@ -152,6 +136,9 @@ void MeRealOcc::Dump(const IRMap &irMap) const { } else { mod->GetOut() << "RealOcc(LHS) "; } + if (rgExcluded) { + mod->GetOut() << "rgexcluded "; + } if (meExpr != nullptr) { meExpr->Dump(&irMap); } else { diff --git a/src/mapleall/maple_me/src/ssa_pre.cpp b/src/mapleall/maple_me/src/ssa_pre.cpp index a94b544c20..747da400f7 100644 --- a/src/mapleall/maple_me/src/ssa_pre.cpp +++ b/src/mapleall/maple_me/src/ssa_pre.cpp @@ -165,7 +165,7 @@ void SSAPre::GenerateSavePhiOcc(MePhiOcc &phiOcc) { void SSAPre::UpdateInsertedPhiOccOpnd() { for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { MePhiOcc *phiOcc = *it; - if (!phiOcc->IsWillBeAvail() || phiOcc->IsRemoved()) { + if (!WillBeAvail(phiOcc) || phiOcc->IsRemoved()) { continue; } if (phiOcc->GetRegPhi()) { @@ -237,7 +237,7 @@ void SSAPre::CodeMotion() { } case kOccPhiopnd: { MePhiOpndOcc *phiopnd = static_cast(occ); - if (phiopnd->GetDefPhiOcc()->IsRemoved() || !phiopnd->GetDefPhiOcc()->IsWillBeAvail()) { + if (phiopnd->GetDefPhiOcc()->IsRemoved() || !WillBeAvail(phiopnd->GetDefPhiOcc())) { break; } if (phiopnd->GetDef()->GetOccType() == kOccInserted) { @@ -285,7 +285,7 @@ void SSAPre::CodeMotion() { } case kOccPhiopnd: { auto *phiOpnd = static_cast(occ); - if (phiOpnd->GetDefPhiOcc()->IsRemoved() || !phiOpnd->GetDefPhiOcc()->IsWillBeAvail()) { + if (phiOpnd->GetDefPhiOcc()->IsRemoved() || !WillBeAvail(phiOpnd->GetDefPhiOcc())) { break; } MeOccur *defOcc = phiOpnd->GetDef(); @@ -313,7 +313,7 @@ void SSAPre::CodeMotion() { } case kOccPhiocc: { auto *phiOcc = static_cast(occ); - if (phiOcc->IsRemoved() || !phiOcc->IsWillBeAvail()) { + if (phiOcc->IsRemoved() || !WillBeAvail(phiOcc)) { break; } GenerateSavePhiOcc(*phiOcc); @@ -331,7 +331,7 @@ void SSAPre::CodeMotion() { } } else { MePhiOcc *phiOcc = static_cast(compOcc->GetDef()); - if (phiOcc->IsRemoved() || !phiOcc->IsWillBeAvail() || !phiOcc->IsDownSafe()) { + if (phiOcc->IsRemoved() || !WillBeAvail(phiOcc) || !phiOcc->IsDownSafe()) { break; } } @@ -366,6 +366,24 @@ void SSAPre::CodeMotion() { } // ================ Step 5: Finalize ================= + +// return true if either: +// operand is nullptr (def is null), or +// hasRealUse is false and defined by a PHI not will be avail +bool SSAPre::OKToInsert(MePhiOpndOcc *phiOpnd) { + if (phiOpnd->GetDef() == nullptr) { + return true; + } + if (!phiOpnd->HasRealUse()) { + MeOccur *defOcc = phiOpnd->GetDef(); + if (defOcc->GetOccType() == kOccPhiocc && + !WillBeAvail(static_cast(defOcc))) { + return true; + } + } + return false; +} + void SSAPre::Finalize1() { std::vector availDefVec(classCount, nullptr); // traversal in preoder DT @@ -374,7 +392,7 @@ void SSAPre::Finalize1() { switch (occ->GetOccType()) { case kOccPhiocc: { auto *phiOcc = static_cast(occ); - if (phiOcc->IsWillBeAvail()) { + if (WillBeAvail(phiOcc)) { availDefVec[classX] = phiOcc; } break; @@ -418,10 +436,10 @@ void SSAPre::Finalize1() { // we assume one phiOpnd has only one phiOcc use because critical edge split the blocks auto *phiOpnd = static_cast(occ); MePhiOcc *phiOcc = phiOpnd->GetDefPhiOcc(); - if (!phiOcc->IsWillBeAvail()) { + if (!WillBeAvail(phiOcc)) { break; } - if (phiOpnd->IsOkToInsert()) { + if (OKToInsert(phiOpnd)) { // insert the current expression at the end of the block containing phiOpnd if (phiOpnd->GetBB()->GetSucc().size() > 1) { CHECK_FATAL(!workCand->Redo2HandleCritEdges(), "Finalize1: insertion at critical edge, aborting"); @@ -466,7 +484,7 @@ void SSAPre::Finalize1() { " after Finalize1===================\n"; for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { MePhiOcc *phiOcc = *it; - if (!phiOcc->IsWillBeAvail()) { + if (!WillBeAvail(phiOcc)) { continue; } for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { @@ -504,7 +522,7 @@ void SSAPre::SetSave(MeOccur &defX) { CHECK_FATAL(!dom->IsNodeVecEmpty(), "the size to be allocated is 0"); GetIterDomFrontier(fromBb, &itFrontier); for (MePhiOcc *phiOcc : phiOccs) { - if (!phiOcc->IsWillBeAvail()) { + if (!WillBeAvail(phiOcc)) { continue; } if (itFrontier.find(dom->GetDtDfnItem(phiOcc->GetBB()->GetBBId())) == itFrontier.end()) { @@ -551,7 +569,7 @@ void SSAPre::Finalize2() { MePhiOcc *phiOcc = *it; // initialize extraneouse for each MePhiOcc if (!workCand->isSRCand) { - phiOcc->SetIsExtraneous(phiOcc->IsWillBeAvail()); + phiOcc->SetIsExtraneous(WillBeAvail(phiOcc)); } // initialize each operand of phiOcc @@ -571,7 +589,7 @@ void SSAPre::Finalize2() { if (phiOcc->IsRemoved() || !phiOcc->IsExtraneous()) { continue; } - if (!phiOcc->IsWillBeAvail()) { + if (!WillBeAvail(phiOcc)) { phiOcc->SetIsRemoved(true); continue; } @@ -1153,13 +1171,15 @@ void SSAPre::SetVarPhis(MeExpr *meExpr) { } if (scalar->IsDefByPhi()) { MePhiNode *phiMeNode = scalar->GetMePhiDef(); - BBId defBBId = phiMeNode->GetDefBB()->GetBBId(); - CHECK(defBBId < dom->GetDtDfnSize(), "defBBId.idx out of range in SSAPre::SetVarPhis"); - if (varPhiDfns.find(dom->GetDtDfnItem(defBBId)) == varPhiDfns.end() && ScreenPhiBB(defBBId)) { - (void)varPhiDfns.insert(dom->GetDtDfnItem(defBBId)); - for (auto opndIt = phiMeNode->GetOpnds().begin(); opndIt != phiMeNode->GetOpnds().end(); ++opndIt) { - ScalarMeExpr *opnd = *opndIt; - SetVarPhis(opnd); + if (phiMeNode->GetOpnds().size() > 1) { + BBId defBBId = phiMeNode->GetDefBB()->GetBBId(); + CHECK(defBBId < dom->GetDtDfnSize(), "defBBId.idx out of range in SSAPre::SetVarPhis"); + if (varPhiDfns.find(dom->GetDtDfnItem(defBBId)) == varPhiDfns.end() && ScreenPhiBB(defBBId)) { + (void)varPhiDfns.insert(dom->GetDtDfnItem(defBBId)); + for (auto opndIt = phiMeNode->GetOpnds().begin(); opndIt != phiMeNode->GetOpnds().end(); ++opndIt) { + ScalarMeExpr *opnd = *opndIt; + SetVarPhis(opnd); + } } } } diff --git a/src/mapleall/maple_me/src/ssa_pre_for_hoist.cpp b/src/mapleall/maple_me/src/ssa_pre_for_hoist.cpp index 3a0266558e..8d344bdeb8 100644 --- a/src/mapleall/maple_me/src/ssa_pre_for_hoist.cpp +++ b/src/mapleall/maple_me/src/ssa_pre_for_hoist.cpp @@ -349,7 +349,7 @@ void ExprHoist::HoistExpr(const MapleVector &allOccs, int32 candId) { } auto *phiOpndocc = static_cast(occ); auto *phiOcc = phiOpndocc->GetDefPhiOcc(); - if (phiOcc->IsWillBeAvail() && phiOpndocc->IsOkToInsert()) { + if (phiOcc->IsWillBeAvail() && parent->OKToInsert(phiOpndocc)) { if (hs->cdHS && // need a cd to hoist hs->occ == nullptr && // if not null, hs has been inserted hs->cdHS->occ != nullptr && // make sure there's at least one realocc at cd -- Gitee From c1b63efbdf6805e079f176157ced35ae3d438b81 Mon Sep 17 00:00:00 2001 From: Fred Chow Date: Thu, 1 Dec 2022 00:06:38 -0800 Subject: [PATCH 3/6] Added --pgorange= option for triaging PGO optimizations --- src/mapleall/maple_me/include/me_option.h | 3 +++ src/mapleall/maple_me/include/me_options.h | 1 + src/mapleall/maple_me/src/mc_ssa_pre.cpp | 7 ++++--- src/mapleall/maple_me/src/me_option.cpp | 24 ++++++++++++++++++++++ src/mapleall/maple_me/src/me_options.cpp | 5 +++++ src/mapleall/maple_me/src/me_ssa_epre.cpp | 9 +++++++- src/mapleall/maple_me/src/ssa_pre.cpp | 8 ++++---- 7 files changed, 49 insertions(+), 8 deletions(-) diff --git a/src/mapleall/maple_me/include/me_option.h b/src/mapleall/maple_me/include/me_option.h index 7728418555..d3e54bd7f4 100644 --- a/src/mapleall/maple_me/include/me_option.h +++ b/src/mapleall/maple_me/include/me_option.h @@ -37,6 +37,7 @@ class MeOption { SplitPhases(str, skipPhases); } bool GetRange(const std::string &str) const; + bool GetPgoRange(const std::string &str) const; const std::unordered_set &GetSkipPhases() const { return skipPhases; @@ -85,6 +86,8 @@ class MeOption { static constexpr int kRangeArrayLen = 2; static unsigned long range[kRangeArrayLen]; static bool useRange; + static unsigned long pgoRange[kRangeArrayLen]; + static bool usePgoRange; static std::string dumpFunc; static std::string skipFrom; static std::string skipAfter; diff --git a/src/mapleall/maple_me/include/me_options.h b/src/mapleall/maple_me/include/me_options.h index 17230e5ae2..a9a81d16aa 100644 --- a/src/mapleall/maple_me/include/me_options.h +++ b/src/mapleall/maple_me/include/me_options.h @@ -32,6 +32,7 @@ extern maplecl::Option os; extern maplecl::Option o3; extern maplecl::Option refusedcheck; extern maplecl::Option range; +extern maplecl::Option pgoRange; extern maplecl::Option dumpPhases; extern maplecl::Option skipPhases; extern maplecl::Option dumpFunc; diff --git a/src/mapleall/maple_me/src/mc_ssa_pre.cpp b/src/mapleall/maple_me/src/mc_ssa_pre.cpp index bf2c7f3abb..99cf308597 100644 --- a/src/mapleall/maple_me/src/mc_ssa_pre.cpp +++ b/src/mapleall/maple_me/src/mc_ssa_pre.cpp @@ -63,8 +63,9 @@ void McSSAPre::ComputeMCWillBeAvail() const { // set insert in phi operands for (Visit *visit : minCut) { MeOccur *occ = visit->node->occ; - if (occ->GetOccType() == kOccPhiopnd) { - MePhiOpndOcc *phiOpndOcc = static_cast(occ); + if (occ->GetOccType() == kOccPhiocc) { + MePhiOcc *phiOcc = static_cast(occ); + MePhiOpndOcc *phiOpndOcc = phiOcc->GetPhiOpnd(visit->predIdx); phiOpndOcc->SetIsMCInsert(true); } } @@ -307,7 +308,7 @@ void McSSAPre::DetermineMinCut() { minCut.push_back(cut[0]); size_t duplicatedVisits = 0; for (uint32 i = 1; i < maxFlowRoutes.size(); i++) { - if (cut[i] == cut[i-1]) { + if (cut[i] != cut[i-1]) { minCut.push_back(cut[i]); } else { duplicatedVisits++; diff --git a/src/mapleall/maple_me/src/me_option.cpp b/src/mapleall/maple_me/src/me_option.cpp index dccb7b78eb..7bb4b9080f 100644 --- a/src/mapleall/maple_me/src/me_option.cpp +++ b/src/mapleall/maple_me/src/me_option.cpp @@ -28,7 +28,9 @@ bool MeOption::isBigEndian = false; bool MeOption::dumpAfter = false; std::string MeOption::dumpFunc = "*"; unsigned long MeOption::range[kRangeArrayLen] = { 0, 0 }; +unsigned long MeOption::pgoRange[kRangeArrayLen] = { 0, 0 }; bool MeOption::useRange = false; +bool MeOption::usePgoRange = false; bool MeOption::quiet = false; bool MeOption::setCalleeHasSideEffect = false; bool MeOption::unionBasedAA = true; @@ -196,6 +198,14 @@ bool MeOption::SolveOptions(bool isDebug) { } } + if (opts::me::pgoRange.IsEnabledByUser()) { + usePgoRange = true; + bool ret = GetPgoRange(opts::me::pgoRange); + if (!ret) { + return ret; + } + } + maplecl::CopyIfEnabled(dumpBefore, opts::me::dumpBefore); maplecl::CopyIfEnabled(dumpAfter, opts::me::dumpAfter); @@ -486,6 +496,20 @@ bool MeOption::GetRange(const std::string &str) const { return true; } +bool MeOption::GetPgoRange(const std::string &str) const { + std::string s{ str }; + size_t comma = s.find_first_of(",", 0); + if (comma != std::string::npos) { + pgoRange[0] = std::stoul(s.substr(0, comma), nullptr); + pgoRange[1] = std::stoul(s.substr(comma + 1, std::string::npos - (comma + 1)), nullptr); + } + if (pgoRange[0] > pgoRange[1]) { + LogInfo::MapleLogger(kLlErr) << "invalid values for --pgorange=" << pgoRange[0] << "," << pgoRange[1] << '\n'; + return false; + } + return true; +} + bool MeOption::DumpPhase(const std::string &phase) { if (phase == "") { return false; diff --git a/src/mapleall/maple_me/src/me_options.cpp b/src/mapleall/maple_me/src/me_options.cpp index 2b448a265c..4b76b19384 100644 --- a/src/mapleall/maple_me/src/me_options.cpp +++ b/src/mapleall/maple_me/src/me_options.cpp @@ -51,6 +51,11 @@ maplecl::Option range({"--range"}, " \t--range=NUM0,NUM1\n", {meCategory}); +maplecl::Option pgoRange({"--pgorange"}, + " --pglrange \tUse profile-guided optimizations only for funcid in the range [NUM0, NUM1]\n" + " \t--pgorange=NUM0,NUM1\n", + {meCategory}); + maplecl::Option dumpPhases({"--dump-phases"}, " --dump-phases \tEnable debug trace for specified phases" " in the comma separated list\n" diff --git a/src/mapleall/maple_me/src/me_ssa_epre.cpp b/src/mapleall/maple_me/src/me_ssa_epre.cpp index b209c00fc3..81f8563c5a 100644 --- a/src/mapleall/maple_me/src/me_ssa_epre.cpp +++ b/src/mapleall/maple_me/src/me_ssa_epre.cpp @@ -104,7 +104,14 @@ bool MESSAEPre::PhaseRun(maple::MeFunction &f) { MeSSAEPre ssaPre(f, *irMap, *dom, *pdom, kh, *ssaPreMemPool, *ApplyTempMemPool(), epreLimitUsed, epreIncludeRef, MeOption::epreLocalRefVar, MeOption::epreLHSIvar); if (f.GetMirFunc()->GetFuncProfData() && MeOption::epreUseProfile) { - ssaPre.doMinCut = true; + if (MeOption::usePgoRange) { + if (f.GetMirFunc()->GetPuidxOrigin() >= MeOption::pgoRange[0] && + f.GetMirFunc()->GetPuidxOrigin() <= MeOption::pgoRange[1]) { + ssaPre.doMinCut = true; + } + } else { + ssaPre.doMinCut = true; + } } ssaPre.SetSpillAtCatch(MeOption::spillAtCatch); if (MeOption::strengthReduction && !f.GetMIRModule().IsJavaModule()) { diff --git a/src/mapleall/maple_me/src/ssa_pre.cpp b/src/mapleall/maple_me/src/ssa_pre.cpp index 747da400f7..ea96fa47b8 100644 --- a/src/mapleall/maple_me/src/ssa_pre.cpp +++ b/src/mapleall/maple_me/src/ssa_pre.cpp @@ -625,13 +625,13 @@ void SSAPre::Finalize2() { mirModule->GetOut() << std::endl; } else if (occ->GetOccType() == kOccReal) { auto *realOcc = static_cast(occ); + realOcc->Dump(*irMap); if (realOcc->IsReload()) { - realOcc->Dump(*irMap); mirModule->GetOut() << " isReload\n"; - } - if (realOcc->IsSave()) { - realOcc->Dump(*irMap); + } else if (realOcc->IsSave()) { mirModule->GetOut() << " isSave\n"; + } else { + mirModule->GetOut() << "\n"; } } else if (occ->GetOccType() == kOccPhiopnd) { auto *phiOpndOcc = static_cast(occ); -- Gitee From ef1d25e0d34e9c3ba9ed598629cd3543c3de8836 Mon Sep 17 00:00:00 2001 From: Fred Chow Date: Thu, 24 Nov 2022 09:24:35 -0800 Subject: [PATCH 4/6] Implemented min-cut version of SSAPRE used by epre phase under --profileUse --no-epreuseprofile will change epre back to the old version that does not use profile data --- src/mapleall/maple_me/BUILD.gn | 1 + src/mapleall/maple_me/include/mc_ssa_pre.h | 112 ++ src/mapleall/maple_me/include/me_option.h | 1 + src/mapleall/maple_me/include/me_options.h | 1 + src/mapleall/maple_me/include/occur.h | 41 +- src/mapleall/maple_me/include/ssa_epre.h | 6 +- src/mapleall/maple_me/include/ssa_pre.h | 9 +- src/mapleall/maple_me/src/mc_ssa_pre.cpp | 1022 +++++++++++++++++ src/mapleall/maple_me/src/me_option.cpp | 2 + src/mapleall/maple_me/src/me_options.cpp | 5 + src/mapleall/maple_me/src/me_ssa_epre.cpp | 9 +- src/mapleall/maple_me/src/me_stmt_pre.cpp | 2 +- src/mapleall/maple_me/src/occur.cpp | 19 +- src/mapleall/maple_me/src/ssa_pre.cpp | 58 +- .../maple_me/src/ssa_pre_for_hoist.cpp | 2 +- 15 files changed, 1247 insertions(+), 43 deletions(-) create mode 100644 src/mapleall/maple_me/include/mc_ssa_pre.h create mode 100644 src/mapleall/maple_me/src/mc_ssa_pre.cpp diff --git a/src/mapleall/maple_me/BUILD.gn b/src/mapleall/maple_me/BUILD.gn index 69444ff8ec..e464cadc53 100755 --- a/src/mapleall/maple_me/BUILD.gn +++ b/src/mapleall/maple_me/BUILD.gn @@ -129,6 +129,7 @@ src_libmplme = [ "src/me_slp.cpp", "src/lmbc_memlayout.cpp", "src/lmbc_lower.cpp", + "src/mc_ssa_pre.cpp", ] src_libmplmewpo = [ diff --git a/src/mapleall/maple_me/include/mc_ssa_pre.h b/src/mapleall/maple_me/include/mc_ssa_pre.h new file mode 100644 index 0000000000..343b40b833 --- /dev/null +++ b/src/mapleall/maple_me/include/mc_ssa_pre.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLE_ME_INCLUDE_MC_SSAPRE_H +#define MAPLE_ME_INCLUDE_MC_SSAPRE_H +#include "ssa_pre.h" + +namespace maple { + +// for representing a node in the reduced SSA graph +class RGNode { + friend class McSSAPre; + friend class Visit; + public: + RGNode(MapleAllocator *alloc, uint32 idx, MeOccur *oc) : id(idx), occ(oc), + pred(alloc->Adapter()), + inEdgesCap(alloc->Adapter()), + usedCap(alloc->Adapter()) {} + private: + uint32 id; + MeOccur *occ; + MapleVector pred; + MapleVector inEdgesCap; // capacity of incoming edges + MapleVector usedCap; // used flow value of outgoing edges +}; + +// designate a visited node and the next outgoing edge to take +class Visit { + friend class McSSAPre; + private: + Visit(RGNode *nd, uint32 idx) : node(nd), predIdx(idx) {} + RGNode *node; + uint32 predIdx; // the index in node's pred + + uint64 AvailableCapacity() const { return node->inEdgesCap[predIdx] - node->usedCap[predIdx]; } + void IncreUsedCapacity(uint64 val) { node->usedCap[predIdx] += val; } + bool operator==(const Visit *rhs) const { return node == rhs->node && predIdx == rhs->predIdx; } +}; + +// for representing a flow path from source to sink +class Route { + friend class McSSAPre; + public: + Route(MapleAllocator *alloc) : visits(alloc->Adapter()) {} + private: + MapleVector visits; + uint64 flowValue = 0; +}; + +class McSSAPre : public SSAPre { + public: + McSSAPre(IRMap &hMap, Dominance &currDom, Dominance &currPdom, MemPool &memPool, MemPool &mp2, PreKind kind, uint32 limit) : + SSAPre(hMap, currDom, currPdom, memPool, mp2, kind, limit), + occ2RGNodeMap(ssaPreAllocator.Adapter()), + maxFlowRoutes(ssaPreAllocator.Adapter()), + minCut(ssaPreAllocator.Adapter()) {} + virtual ~McSSAPre() = default; + + void ApplyMCSSAPRE(); + private: + // step 8 willbeavail + void ResetMCWillBeAvail(MePhiOcc *phiOcc) const; + void ComputeMCWillBeAvail() const; + // step 7 max flow/min cut + bool AmongMinCut(RGNode *, uint32 idx) const; + void DumpRGToFile(); // dump reduced graph to dot file + bool IncludedEarlier(Visit **cut, Visit *curVisit, uint32 nextRouteIdx); + void RemoveRouteNodesFromCutSet(std::unordered_multiset &cutSet, Route *route); + bool SearchRelaxedMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, uint64 flowSoFar); + bool SearchMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, uint64 flowSoFar); + void DetermineMinCut(); + bool VisitANode(RGNode *node, Route *route, std::vector &visitedNodes); + bool FindAnotherRoute(); + void FindMaxFlow(); + // step 6 single sink + void AddSingleSink(); + // step 5 single source + void AddSingleSource(); + // step 4 graph reduction + void GraphReduction(); + // step 3 data flow methods + void SetPartialAnt(MePhiOpndOcc *phiOpnd) const; + void ComputePartialAnt() const; + void ResetFullAvail(MePhiOcc *occ) const; + void ComputeFullAvail() const; + // step 2 renaming methods + void Rename1(); + + MapleUnorderedMap occ2RGNodeMap; + RGNode *source; + RGNode *sink; + uint32 numSourceEdges; + MapleVector maxFlowRoutes; + uint32 nextRGNodeId; + uint64 maxFlowValue; + uint64 relaxedMaxFlowValue; // relax maxFlowValue to avoid excessive mincut search time when number of routes is large + MapleVector minCut; // an array of Visits* to represent the minCut +}; + +} // namespace maple +#endif // MAPLE_ME_INCLUDE_MC_SSAPRE_H diff --git a/src/mapleall/maple_me/include/me_option.h b/src/mapleall/maple_me/include/me_option.h index 4e43e1e05e..7728418555 100644 --- a/src/mapleall/maple_me/include/me_option.h +++ b/src/mapleall/maple_me/include/me_option.h @@ -185,6 +185,7 @@ class MeOption { static bool layoutWithPredict; static bool unifyRets; static bool dumpCfgOfPhases; + static bool epreUseProfile; // safety check option begin static SafetyCheckMode npeCheckMode; static bool isNpeCheckAll; diff --git a/src/mapleall/maple_me/include/me_options.h b/src/mapleall/maple_me/include/me_options.h index da127b6d3d..17230e5ae2 100644 --- a/src/mapleall/maple_me/include/me_options.h +++ b/src/mapleall/maple_me/include/me_options.h @@ -135,6 +135,7 @@ extern maplecl::Option remat; extern maplecl::Option unifyrets; extern maplecl::Option lfo; extern maplecl::Option dumpCfgOfPhases; +extern maplecl::Option epreUseProfile; } diff --git a/src/mapleall/maple_me/include/occur.h b/src/mapleall/maple_me/include/occur.h index a24c969ed0..06c2da2a2c 100644 --- a/src/mapleall/maple_me/include/occur.h +++ b/src/mapleall/maple_me/include/occur.h @@ -215,6 +215,8 @@ class MeRealOcc : public MeOccur { bool isLHS; bool isFormalAtEntry; // the fake lhs occurrence at entry for formals bool isHoisted = false; // the hoisted occ used for hoisting + public: + bool rgExcluded = false; // reduced graph excluded, used only by McSSAPre }; class MeInsertedOcc : public MeOccur { @@ -275,13 +277,13 @@ class MePhiOpndOcc : public MeOccur { hasRealUse(false), isInsertedOcc(false), isPhiOpndReload(false), + isMCInsert(false), defPhiOcc(nullptr), phiOpnd4Temp(nullptr) { currentExpr.meStmt = nullptr; } ~MePhiOpndOcc() = default; - bool IsOkToInsert() const; void Dump(const IRMap &irMap) const override; bool IsProcessed() const { return isProcessed; @@ -315,6 +317,14 @@ class MePhiOpndOcc : public MeOccur { isPhiOpndReload = phiOpndReload; } + bool IsMCInsert() const { + return isMCInsert;; + } + + void SetIsMCInsert(bool mcInsert) { + isMCInsert = mcInsert; + } + const MePhiOcc *GetDefPhiOcc() const { return defPhiOcc; } @@ -356,6 +366,7 @@ class MePhiOpndOcc : public MeOccur { bool hasRealUse; bool isInsertedOcc; // the phi operand was inserted by inserted occ bool isPhiOpndReload; // if insertedocc and redefined the def, set this flag + bool isMCInsert; // used only in mc-ssapre MePhiOcc *defPhiOcc; // its lhs union { MeExpr *meExpr; // the current expression at the end of the block containing this PhiOpnd @@ -375,6 +386,8 @@ class MePhiOcc : public MeOccur { isLater(true), isExtraneous(false), isRemoved(false), + isPartialAnt(false), + isMCWillBeAvail(true), phiOpnds(alloc.Adapter()), regPhi(nullptr), varPhi(nullptr) {} @@ -408,6 +421,14 @@ class MePhiOcc : public MeOccur { isCanBeAvail = canBeAvail; } + bool IsFullyAvail() const { + return isCanBeAvail; + } + + void SetIsFullyAvail(bool fullyAvail) { + isCanBeAvail = fullyAvail; + } + bool IsLater() const { return isLater; } @@ -432,6 +453,22 @@ class MePhiOcc : public MeOccur { isRemoved = removed; } + bool IsPartialAnt() const { + return isPartialAnt; + } + + void SetIsPartialAnt(bool pant) { + isPartialAnt = pant; + } + + bool IsMCWillBeAvail() const { + return isMCWillBeAvail; + } + + void SetIsMCWillBeAvail(bool wba) { + isMCWillBeAvail = wba; + } + const MapleVector &GetPhiOpnds() const { return phiOpnds; } @@ -488,6 +525,8 @@ class MePhiOcc : public MeOccur { bool isLater; bool isExtraneous; bool isRemoved; // during finalize2, marked this phiocc is removed or not + bool isPartialAnt; // used only in mc-ssapre + bool isMCWillBeAvail; // used only in mc-ssapre MapleVector phiOpnds; MePhiNode *regPhi; // the reg phi being inserted, maybe can delete it later MePhiNode *varPhi; // the Var phi being inserted, maybe can delete it later diff --git a/src/mapleall/maple_me/include/ssa_epre.h b/src/mapleall/maple_me/include/ssa_epre.h index 0df35a90a7..2101208fea 100644 --- a/src/mapleall/maple_me/include/ssa_epre.h +++ b/src/mapleall/maple_me/include/ssa_epre.h @@ -14,14 +14,14 @@ */ #ifndef MAPLE_ME_INCLUDE_SSAEPRE_H #define MAPLE_ME_INCLUDE_SSAEPRE_H -#include "ssa_pre.h" +#include "mc_ssa_pre.h" namespace maple { -class SSAEPre : public SSAPre { +class SSAEPre : public McSSAPre { public: SSAEPre(IRMap &map, Dominance &dom, Dominance &pdom, MemPool &memPool, MemPool &mp2, PreKind kind, uint32 limit, bool includeRef, bool lhsIvar) - : SSAPre(map, dom, pdom, memPool, mp2, kind, limit), epreIncludeRef(includeRef), enableLHSIvar(lhsIvar) {} + : McSSAPre(map, dom, pdom, memPool, mp2, kind, limit), epreIncludeRef(includeRef), enableLHSIvar(lhsIvar) {} virtual ~SSAEPre() = default; diff --git a/src/mapleall/maple_me/include/ssa_pre.h b/src/mapleall/maple_me/include/ssa_pre.h index 9e782bca27..20649704dd 100644 --- a/src/mapleall/maple_me/include/ssa_pre.h +++ b/src/mapleall/maple_me/include/ssa_pre.h @@ -139,6 +139,7 @@ class SSAPre { bool strengthReduction = false; bool doLFTR = false; + bool doMinCut = false; protected: // step 6 codemotion methods @@ -156,6 +157,13 @@ class SSAPre { } virtual void CodeMotion(); // step 5 Finalize methods + bool WillBeAvail(MePhiOcc *phiOcc) { + if (!doMinCut) { + return phiOcc->IsWillBeAvail(); + } + return phiOcc->IsMCWillBeAvail(); + } + bool OKToInsert(MePhiOpndOcc *phiOpnd); virtual void Finalize1(); void SetSave(MeOccur &defX); void SetReplacement(MePhiOcc &occ, MeOccur &repDef); @@ -285,7 +293,6 @@ class SSAPre { uint32 strIdxCount = 0; // ssapre will create a lot of temp variables if using var to store redundances, start from 0 PreWorkCandHashTable preWorkCandHashTable; - private: virtual void DoSSAFRE() {}; bool enableDebug = false; diff --git a/src/mapleall/maple_me/src/mc_ssa_pre.cpp b/src/mapleall/maple_me/src/mc_ssa_pre.cpp new file mode 100644 index 0000000000..bf2c7f3abb --- /dev/null +++ b/src/mapleall/maple_me/src/mc_ssa_pre.cpp @@ -0,0 +1,1022 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include +#include +#include +#include "mc_ssa_pre.h" +#include "dominance.h" +#include "mir_builder.h" + +// Implementation of the MC-SSAPRE algorithm based on the PLDI 2011 paper: +// An SSA-based Algorithm for Optimal Speculative Code Motion Under an Execution Profile +// by Hucheng Zhou, Wenguang Chen and Fred Chow + +namespace { +constexpr int kFuncNameLenLimit = 80; +} + +namespace maple { + +// ================ Step 8: WillBeAvail ================= + +void McSSAPre::ResetMCWillBeAvail(MePhiOcc *occ) const { + if (!occ->IsMCWillBeAvail()) { + return; + } + occ->SetIsMCWillBeAvail(false); + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + if (!phiOcc->IsMCWillBeAvail()) { + continue; + } + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (phiOpnd->GetDef() != nullptr && phiOpnd->GetDef() == occ) { + // phiOpnd is a use of occ + if (!phiOpnd->HasRealUse() && !phiOpnd->IsMCInsert()) { + ResetMCWillBeAvail(phiOcc); + break; + } + } + } + } +} + +void McSSAPre::ComputeMCWillBeAvail() const { + if (minCut.size() == 0) { + for (MePhiOcc *phiOcc : phiOccs) { + phiOcc->SetIsMCWillBeAvail(false); + } + return; + } + // set insert in phi operands + for (Visit *visit : minCut) { + MeOccur *occ = visit->node->occ; + if (occ->GetOccType() == kOccPhiopnd) { + MePhiOpndOcc *phiOpndOcc = static_cast(occ); + phiOpndOcc->SetIsMCInsert(true); + } + } + for (MePhiOcc *phiOcc : phiOccs) { + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (phiOpnd->GetDef() == nullptr && !phiOpnd->IsMCInsert()) { + ResetMCWillBeAvail(phiOcc); + break; + } + } + } +} + +// ================ Step 7: Max Flow / Min Cut ================= + +bool McSSAPre::AmongMinCut(RGNode *nd, uint32 idx) const { + for (Visit *visit : minCut) { + if (visit->node == nd && visit->predIdx == idx) { + return true; + } + } + return false; +} + +void McSSAPre::DumpRGToFile() { + if (sink == nullptr) { + return; + } + std::string fileName = "rg-of-cand-"; + fileName.append(std::to_string(workCand->GetIndex())); + fileName.append("-"); + const std::string &funcName = mirModule->CurFunction()->GetName(); + if (funcName.size() < kFuncNameLenLimit) { + fileName.append(funcName); + } else { + fileName.append(funcName.c_str(), kFuncNameLenLimit); + } + fileName.append(".dot"); + std::ofstream rgFile; + std::streambuf *coutBuf = LogInfo::MapleLogger().rdbuf(); // keep original cout buffer + std::streambuf *buf = rgFile.rdbuf(); + LogInfo::MapleLogger().rdbuf(buf); + rgFile.open(fileName, std::ios::trunc); + rgFile << "digraph {\n"; + for (int32 i = 0; i < sink->pred.size(); i++) { + RGNode *pre = sink->pred[i]; + rgFile << "real" << pre->id << " -> " << "\"sink\nmaxflow " << maxFlowValue << "\";\n"; + } + MapleUnorderedMap::iterator it = occ2RGNodeMap.begin(); + for (; it != occ2RGNodeMap.end(); it++) { + RGNode *rgNode = it->second; + for (int32 i = 0; i < rgNode->pred.size(); i++) { + RGNode *pre = rgNode->pred[i]; + if (pre != source) { + if (pre->occ->GetOccType() == kOccPhiocc) { + rgFile << "phi" << pre->id << " -> "; + } else { + rgFile << "real" << pre->id << " -> "; + } + if (rgNode->occ->GetOccType() == kOccPhiocc) { + rgFile << "phi" << rgNode->id; + } else { + rgFile << "real" << rgNode->id; + } + } else { + rgFile << "source" << " -> " << "phi" << rgNode->id; + } + if (AmongMinCut(rgNode, i)) { + rgFile << "[style=dotted][color=red]"; + } + if (rgNode->usedCap[i] == 0) { + rgFile << "[style=dashed][color=green]"; + } + rgFile << "[label=\"" << rgNode->usedCap[i] << "|" << rgNode->inEdgesCap[i] << "\"];\n"; + } + } + rgFile << "}\n"; + rgFile.flush(); + rgFile.close(); + LogInfo::MapleLogger().rdbuf(coutBuf); + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() << " dumped to " << fileName << "\n"; +} + +bool McSSAPre::IncludedEarlier(Visit **cut, Visit *curVisit, uint32 nextRouteIdx) { + uint32 i = nextRouteIdx; + while (i != 0) { + i--; + if (cut[i]->node == curVisit->node && cut[i]->predIdx == curVisit->predIdx) { + return true; + } + } + return false; +} + +// remove this route's nodes from cutSet +void McSSAPre::RemoveRouteNodesFromCutSet(std::unordered_multiset &cutSet, Route *route) { + for (uint32 i = 1; i < route->visits.size(); i++) { + Visit &curVisit = route->visits[i]; + std::unordered_multiset::iterator it = cutSet.find(curVisit.node->id); + ASSERT(it != cutSet.end(), "cutSet maintenance error"); + cutSet.erase(it); + } +} + +// find the cut closest to the sink whose total flow is relaxedMaxFlowValue +bool McSSAPre::SearchRelaxedMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, uint64 flowSoFar) { + Route *curRoute = maxFlowRoutes[nextRouteIdx]; + Visit *curVisit = nullptr; + + // determine starting value of visitIdx: start searching back from route end; + // if any node is in cutSet, set visitIdx as that nodes's index in route; + // otherwise, set visitIdx to 0 + uint32 visitIdx = curRoute->visits.size(); + do { + visitIdx--; + if (cutSet.count(curRoute->visits[visitIdx].node->id) != 0) { + break; + } + } while (visitIdx != 1); + // update cutSet with visited nodes lower than visitIdx + if (visitIdx != 1) { + for (uint i = visitIdx - 1; i > 0; i--) { + cutSet.insert(curRoute->visits[i].node->id); + } + } + + bool success = false; + do { + if (visitIdx == curRoute->visits.size()) { + RemoveRouteNodesFromCutSet(cutSet, curRoute); + return false; + } + curVisit = &curRoute->visits[visitIdx]; + uint64 visitCap = curVisit->node->inEdgesCap[curVisit->predIdx]; + cut[nextRouteIdx] = curVisit; + if (visitIdx != 0) { + cutSet.insert(curVisit->node->id); + } + if (IncludedEarlier(cut, curVisit, nextRouteIdx)) { + visitCap = 0; + } + success = (flowSoFar + visitCap <= relaxedMaxFlowValue); + if (success && nextRouteIdx != (maxFlowRoutes.size() - 1)) { + success = SearchRelaxedMinCut(cut, cutSet, nextRouteIdx+1, flowSoFar + visitCap); + } + visitIdx++; + } while (!success); + return true; +} + +// find the cut closest to the sink whose total flow is maxFlowValue +bool McSSAPre::SearchMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, uint64 flowSoFar) { + Route *curRoute = maxFlowRoutes[nextRouteIdx]; + Visit *curVisit = nullptr; + + // determine starting value of visitIdx: start searching back from route end; + // if any node is in cutSet, set visitIdx as that nodes's index in route; + // otherwise, set visitIdx to 0 + uint32 visitIdx = curRoute->visits.size(); + do { + visitIdx--; + if (cutSet.count(curRoute->visits[visitIdx].node->id) != 0) { + break; + } + } while (visitIdx != 1); + // update cutSet with visited nodes lower than visitIdx + if (visitIdx != 1) { + for (uint i = visitIdx - 1; i > 0; i--) { + cutSet.insert(curRoute->visits[i].node->id); + } + } + + bool success = false; + do { + if (visitIdx == curRoute->visits.size()) { + RemoveRouteNodesFromCutSet(cutSet, curRoute); + return false; + } + curVisit = &curRoute->visits[visitIdx]; + uint64 visitCap = curVisit->node->inEdgesCap[curVisit->predIdx]; + uint64 usedCap = curVisit->node->usedCap[curVisit->predIdx]; + if (visitCap != usedCap) { + if (visitIdx != 0) { + cutSet.insert(curVisit->node->id); + } + visitIdx++; + continue; + } + cut[nextRouteIdx] = curVisit; + if (visitIdx != 0) { + cutSet.insert(curVisit->node->id); + } + if (IncludedEarlier(cut, curVisit, nextRouteIdx)) { + visitCap = 0; + } + success = (flowSoFar + visitCap <= maxFlowValue); + if (success && nextRouteIdx != (maxFlowRoutes.size() - 1)) { + success = SearchMinCut(cut, cutSet, nextRouteIdx+1, flowSoFar + visitCap); + } + visitIdx++; + } while (!success); + return true; +} + +void McSSAPre::DetermineMinCut() { + if (maxFlowRoutes.empty()) { + if (GetSSAPreDebug()) { + DumpRGToFile(); + } + return; + } + // maximum width of the min cut is the number of routes in maxFlowRoutes + Visit* cut[maxFlowRoutes.size()]; + std::unordered_multiset cutSet; // key is RGNode's id; must be kept in sync with cut[]; sink node is not entered + constexpr double defaultRelaxScaling = 1.25; + relaxedMaxFlowValue = static_cast(static_cast(maxFlowValue) * defaultRelaxScaling); + bool relaxedSearch = false; + if (maxFlowRoutes.size() >= 20) { + // apply arbitrary heuristics to reduce search time + relaxedSearch = true; + relaxedMaxFlowValue = maxFlowValue * (maxFlowRoutes.size() / 10); + } + bool success = !relaxedSearch && SearchMinCut(cut, cutSet, 0, 0); + if (!success) { + relaxedSearch = true; + success = SearchRelaxedMinCut(cut, cutSet, 0, 0); + } + if (!success) { + if (GetSSAPreDebug()) { + mirModule->GetOut() << "MinCut failed\n"; + DumpRGToFile(); + } + CHECK_FATAL(false, "McSSAPre::DetermineMinCut: failed to find min cut"); + } + // sort cut + std::sort(cut, cut+maxFlowRoutes.size(), [](const Visit *left, const Visit *right) { + return (left->node != right->node) ? (left->node->id < right->node->id) + : (left->predIdx < right->predIdx); }); + // remove duplicates in the cut to form mincut + minCut.push_back(cut[0]); + size_t duplicatedVisits = 0; + for (uint32 i = 1; i < maxFlowRoutes.size(); i++) { + if (cut[i] == cut[i-1]) { + minCut.push_back(cut[i]); + } else { + duplicatedVisits++; + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "finished "; + if (relaxedSearch) { + mirModule->GetOut() << "relaxed "; + } + mirModule->GetOut() << "MinCut\n"; + DumpRGToFile(); + if (duplicatedVisits != 0) { + mirModule->GetOut() << duplicatedVisits << " duplicated visits in mincut\n"; + } + } +} + +bool McSSAPre::VisitANode(RGNode *node, Route *route, std::vector &visitedNodes) { + ASSERT(node->pred.size() != 0 , "McSSAPre::VisitANode: no connection to source node"); + // if any pred is the source and there's capacity to reach it, return success + for (uint32 i = 0; i < node->pred.size(); i++) { + if (node->pred[i] == source && node->inEdgesCap[i] > node->usedCap[i]) { + // if there is another pred never taken that also reaches source, use that instead + for (uint32 k = i + 1; k < node->pred.size(); k++) { + if (node->pred[k] == source && node->usedCap[k] == 0 && node->inEdgesCap[k] > 0) { + route->visits.push_back(Visit(node, k)); + return true; + } + } + route->visits.push_back(Visit(node, i)); + return true; + } + } + + // pick an never-taken predecessor path first + for (uint32 i = 0; i < node->pred.size(); i++) { + if (node->usedCap[i] == 0 && node->inEdgesCap[i] > 0 && !visitedNodes[node->pred[i]->id]) { + route->visits.push_back(Visit(node, i)); + visitedNodes[node->pred[i]->id] = true; + bool success = VisitANode(node->pred[i], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + return true; + } + } + } + + size_t numPreds = node->pred.size(); + uint32 sortedPred[numPreds]; + for (uint32 i = 0; i < numPreds; i++) { + sortedPred[i] = i; + } + // put sortedPred[] in increasing order of capacities + std::sort(sortedPred, sortedPred+numPreds, [node](uint32 m, uint32 n) { + return node->inEdgesCap[m] < node->inEdgesCap[n]; }); + // for this round, prefer predecessor with higher unused capacity + for (uint32 i = 0; i < numPreds; i++) { + uint32 j = sortedPred[i]; + if (!visitedNodes[node->pred[j]->id] && node->inEdgesCap[j] > node->usedCap[j]) { + route->visits.push_back(Visit(node, j)); + visitedNodes[node->pred[j]->id] = true; + bool success = VisitANode(node->pred[j], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + return true; + } + } + } + return false; +} + +// return false if not successful; if successful, the new route will be pushed +// to maxFlowRoutes +bool McSSAPre::FindAnotherRoute() { + std::vector visitedNodes(occ2RGNodeMap.size() + 1, false); + Route *route = perCandMemPool->New(&perCandAllocator); + bool success = false; + // pick an untaken sink predecessor first + for (int32 i = 0; i < sink->pred.size(); i++) { + if (sink->usedCap[i] == 0) { + route->visits.push_back(Visit(sink, i)); + visitedNodes[sink->pred[i]->id] = true; + success = VisitANode(sink->pred[i], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + break; + } + } + } + if (!success) { + // now, pick any sink predecessor + for (int32 i = 0; i < sink->pred.size(); i++) { + route->visits.push_back(Visit(sink, i)); + visitedNodes[sink->pred[i]->id] = true; + success = VisitANode(sink->pred[i], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + break; + } + } + } + if (!success) { + return false; + } + // find bottleneck capacity along route + uint64 minAvailCap = route->visits[0].AvailableCapacity(); + for (int32 i = 1; i < route->visits.size(); i++) { + uint64 curAvailCap = route->visits[i].AvailableCapacity(); + minAvailCap = std::min(minAvailCap, curAvailCap); + } + route->flowValue = minAvailCap; + // update usedCap along route + for (int32 i = 0; i < route->visits.size(); i++) { + route->visits[i].IncreUsedCapacity(minAvailCap); + } + maxFlowRoutes.push_back(route); + return true; +} + +void McSSAPre::FindMaxFlow() { + if (sink == nullptr) { + return; + } + maxFlowValue = 0; + bool found; + do { + found = FindAnotherRoute(); + } while (found); + // calculate maxFlowValue; + for (Route *route : maxFlowRoutes) { + maxFlowValue += route->flowValue; + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() + << ": FindMaxFlow found " << maxFlowRoutes.size() << " routes\n"; + for (size_t i = 0; i < maxFlowRoutes.size(); i++) { + Route *route = maxFlowRoutes[i]; + mirModule->GetOut() << "route " << i << " sink:pred" << route->visits[0].predIdx; + for (size_t j = 1; j < route->visits.size(); j++) { + if (route->visits[j].node->occ->GetOccType() == kOccPhiocc) { + mirModule->GetOut() << " phi"; + } else { + mirModule->GetOut() << " real"; + } + mirModule->GetOut() << route->visits[j].node->id << ":pred" << route->visits[j].predIdx; + } + mirModule->GetOut() << " flowValue " << route->flowValue; + mirModule->GetOut() << "\n"; + } + mirModule->GetOut() << "maxFlowValue is " << maxFlowValue << "\n"; + } +} + +// ================ Step 6: Add Single Sink ================= + +void McSSAPre::AddSingleSink() { + if (numSourceEdges == 0) { + return; // empty reduced graph + } + sink = perCandMemPool->New(&perCandAllocator, nextRGNodeId++, nullptr); + size_t numToSink = 0; + MapleUnorderedMap::iterator it = occ2RGNodeMap.begin(); + for (; it != occ2RGNodeMap.end(); it++) { + if (it->first->GetOccType() != kOccReal) { + continue; + } + RGNode *use = it->second; + // add edge from this use node to sink + sink->pred.push_back(use); + sink->inEdgesCap.push_back(UINT64_MAX); + sink->usedCap.push_back(0); + numToSink++; + } + ASSERT(numToSink != 0, "McSSAPre::AddSingleSink: found 0 edge to sink"); + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() << " has " << numToSink << " edges to sink\n"; + } +} + +// ================ Step 5: Add Single Source ================= +void McSSAPre::AddSingleSource() { + source = perCandMemPool->New(&perCandAllocator, nextRGNodeId++, nullptr); + for (MePhiOcc *phiOcc : phiOccs) { + if (phiOcc->IsPartialAnt() && !phiOcc->IsFullyAvail()) { + // look for null operands + for (int32 i = 0; i < phiOcc->GetPhiOpnds().size(); i++) { + MePhiOpndOcc *phiopndOcc = phiOcc->GetPhiOpnd(i); + if (phiopndOcc->GetDef() != nullptr) { + continue; + } + // add edge from source to this phi node + RGNode *sucNode = occ2RGNodeMap[phiOcc]; + sucNode->pred.push_back(source); + sucNode->inEdgesCap.push_back(phiOcc->GetBB()->GetPred(i)->GetFrequency()+1); + sucNode->usedCap.push_back(0); + numSourceEdges++; + } + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex(); + if (numSourceEdges == 0) { + mirModule->GetOut() << " has empty reduced graph\n"; + } else { + mirModule->GetOut() << " source has " << numSourceEdges << " succs\n"; + } + } +} + +// ================ Step 4: Graph Reduction ================= +void McSSAPre::GraphReduction() { + size_t numPhis = 0; + size_t numRealOccs = 0; + size_t numType1Edges = 0; + size_t numType2Edges = 0; + // add def nodes + for (MePhiOcc *phiOcc : phiOccs) { + if (phiOcc->IsPartialAnt() && !phiOcc->IsFullyAvail()) { + RGNode *newRGNode = perCandMemPool->New(&perCandAllocator, nextRGNodeId++, phiOcc); + occ2RGNodeMap.insert(std::pair(phiOcc, newRGNode)); + numPhis++; + } + } + if (occ2RGNodeMap.empty()) { + return; + } + // add use nodes and use-def edges + for (MeOccur *occ : allOccs) { + if (occ->GetOccType() == kOccReal) { + MeRealOcc *realOcc = static_cast(occ); + if (!realOcc->rgExcluded && realOcc->GetDef() != nullptr) { + MeOccur *defOcc = realOcc->GetDef(); + ASSERT(defOcc->GetOccType() == kOccPhiocc, "McSSAPre::GraphReduction: real occ not defined by phi"); + if (occ2RGNodeMap.find(defOcc) != occ2RGNodeMap.end()) { + RGNode *use = perCandMemPool->New(&perCandAllocator, nextRGNodeId++, realOcc); + occ2RGNodeMap[realOcc] = use; + numRealOccs++; + RGNode *def = occ2RGNodeMap[defOcc]; + use->pred.push_back(def); + use->inEdgesCap.push_back(realOcc->GetBB()->GetFrequency()+1); + use->usedCap.push_back(0); + numType2Edges++; + } + } + } else if (occ->GetOccType() == kOccPhiopnd) { + MePhiOpndOcc *phiopndOcc = static_cast(occ); + MePhiOcc *defPhiOcc = phiopndOcc->GetDefPhiOcc(); + if (defPhiOcc->IsPartialAnt() && !defPhiOcc->IsFullyAvail()) { + // defPhiOcc is the use node and it has already been added + MeOccur *defOcc = phiopndOcc->GetDef(); + if (defOcc != nullptr && defOcc->GetOccType() == kOccPhiocc && + static_cast(defOcc)->IsPartialAnt() && + !static_cast(defOcc)->IsFullyAvail()) { + ASSERT(occ2RGNodeMap.find(defOcc) != occ2RGNodeMap.end(), "McSSAPre::GraphReduction: def node not found"); + RGNode *def = occ2RGNodeMap[defOcc]; + RGNode *use = occ2RGNodeMap[defPhiOcc]; + use->pred.push_back(def); + // find the index of phiopndOcc in defPhiOcc's phiOpnds + uint32 i; + for (i = 0; i < defPhiOcc->GetPhiOpnds().size(); i++) { + if (defPhiOcc->GetPhiOpnd(i) == phiopndOcc) { + break; + } + } + ASSERT(i != defPhiOcc->GetPhiOpnds().size(), "McSSAPre::GraphReduction: cannot find corresponding phi opnd"); + use->inEdgesCap.push_back(defPhiOcc->GetBB()->GetPred(i)->GetFrequency()+1); + use->usedCap.push_back(0); + numType1Edges++; + } + } + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() + << " after GraphReduction, phis: " << numPhis << " reals: " << numRealOccs + << " type 1 edges: " << numType1Edges << " type 2 edges: " << numType2Edges << "\n"; + } +} + +// ================ Step 3: Data Flow Computations ================= + +// set partial anticipation +void McSSAPre::SetPartialAnt(MePhiOpndOcc *phiOpnd) const { + MeOccur *defOcc = phiOpnd->GetDef(); + if (defOcc == nullptr || defOcc->GetOccType() != kOccPhiocc) { + return; + } + auto *defPhiOcc = static_cast(defOcc); + if (defPhiOcc->IsPartialAnt()) { + return; + } + defPhiOcc->SetIsPartialAnt(true); + for (MePhiOpndOcc *mePhiOpnd : defPhiOcc->GetPhiOpnds()) { + SetPartialAnt(mePhiOpnd); + } +} + +// compute partial anticipation for each PHI +void McSSAPre::ComputePartialAnt() const { + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + if (phiOcc->IsPartialAnt()) { + // propagate partialAnt along use-def edges + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + SetPartialAnt(phiOpnd); + } + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() + << " after PartialAnt\n"; + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + phiOcc->Dump(*irMap); + if (phiOcc->IsPartialAnt()) { + mirModule->GetOut() << " is partialant\n"; + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (!phiOpnd->IsProcessed()) { + phiOpnd->Dump(*irMap); + mirModule->GetOut() << " has not been processed by Rename2\n"; + } + } + } else { + mirModule->GetOut() << " is not partialant\n"; + } + } + } +} + +void McSSAPre::ResetFullAvail(MePhiOcc *occ) const { + if (!occ->IsFullyAvail()) { + return; + } + occ->SetIsFullyAvail(false); + // reset those phiocc nodes that have occ as one of its operands + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + if (!phiOcc->IsFullyAvail()) { + continue; + } + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (phiOpnd->GetDef() != nullptr && phiOpnd->GetDef() == occ) { + // phiOpnd is a use of occ + if (!phiOpnd->HasRealUse()) { + ResetFullAvail(phiOcc); + break; + } + } + } + } +} + +// the fullyavail attribute is stored in the isCanBeAvail field +void McSSAPre::ComputeFullAvail() const { + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + // reset fullyavail if any phi operand is null + bool existNullDef = false; + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (phiOpnd->GetDef() == nullptr) { + existNullDef = true; + break; + } + } + if (existNullDef) { + ResetFullAvail(phiOcc); + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() + << " after FullyAvailable\n"; + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + phiOcc->Dump(*irMap); + if (phiOcc->IsFullyAvail()) { + mirModule->GetOut() << " is fullyavail\n"; + } else { + mirModule->GetOut() << " is not fullyavail\n"; + } + } + } +} + +// ================ Step 2: Renaming ================= +void McSSAPre::Rename1() { + std::stack occStack; + rename2Set.clear(); + classCount = 1; + // iterate the occurrence according to its preorder dominator tree + for (MeOccur *occ : allOccs) { + while (!occStack.empty() && !occStack.top()->IsDominate(*dom, *occ)) { + occStack.pop(); + } + switch (occ->GetOccType()) { + case kOccReal: { + if (occStack.empty()) { + // assign new class + occ->SetClassID(classCount++); + occStack.push(occ); + break; + } + MeOccur *topOccur = occStack.top(); + if (topOccur->GetOccType() == kOccUse || topOccur->GetOccType() == kOccMembar) { + occ->SetClassID(classCount++); + occStack.push(occ); + break; + } + auto *realOcc = static_cast(occ); + if (topOccur->GetOccType() == kOccReal) { + auto *realTopOccur = static_cast(topOccur); + if (AllVarsSameVersion(*realTopOccur, *realOcc)) { + // all corresponding variables are the same + realOcc->SetClassID(realTopOccur->GetClassID()); + if (realTopOccur->GetDef() != nullptr) { + realOcc->SetDef(realTopOccur->GetDef()); + } else { + realOcc->SetDef(realTopOccur); + } + realOcc->rgExcluded = true; + } else { + // assign new class + occ->SetClassID(classCount++); + occStack.push(occ); + } + } else { + // top of stack is a PHI occurrence + ASSERT(topOccur->GetOccType() == kOccPhiocc, "invalid kOccPhiocc"); + std::vector varVec; + CollectVarForCand(*realOcc, varVec); + bool isAllDom = true; + if (realOcc->IsLHS()) { + isAllDom = false; + } else { + for (auto varIt = varVec.begin(); varIt != varVec.end(); ++varIt) { + MeExpr *varMeExpr = *varIt; + if (workCand->isSRCand) { + varMeExpr = ResolveAllInjuringDefs(varMeExpr); + } + if (!DefVarDominateOcc(varMeExpr, *topOccur)) { + isAllDom = false; + } + } + } + MePhiOcc *phiTopOccur = static_cast(topOccur); + if (isAllDom) { + realOcc->SetClassID(topOccur->GetClassID()); + realOcc->SetDef(topOccur); + (void)rename2Set.insert(realOcc->GetPosition()); + phiTopOccur->SetIsPartialAnt(true); + } else { + // assign new class + occ->SetClassID(classCount++); + } + occStack.push(occ); + } + break; + } + case kOccCompare: { + if (occStack.empty()) { + break; + } + MeOccur *topOccur = occStack.top(); + if (topOccur->GetOccType() == kOccUse || topOccur->GetOccType() == kOccMembar) { + break; + } + MeRealOcc *realOcc = static_cast(occ); + ScalarMeExpr *scalarOpnd0 = dynamic_cast(workCand->GetTheMeExpr()->GetOpnd(0)); + ScalarMeExpr *scalarOpnd1 = dynamic_cast(workCand->GetTheMeExpr()->GetOpnd(1)); + ScalarMeExpr *compareOpnd0 = dynamic_cast(realOcc->GetMeExpr()->GetOpnd(0)); + ScalarMeExpr *compareOpnd1 = dynamic_cast(realOcc->GetMeExpr()->GetOpnd(1)); + // set compareOpnd to be the scalar operand that is common to + // workCand->theMeExpr and realOcc->meExpr + ScalarMeExpr *compareOpnd = nullptr; + uint32 scalarOpndNo = 0; + if (scalarOpnd0 != nullptr) { + if (compareOpnd0 != nullptr && scalarOpnd0->GetOst() == compareOpnd0->GetOst()) { + compareOpnd = compareOpnd0; + scalarOpndNo = 0; + } else if (compareOpnd1 != nullptr && scalarOpnd0->GetOst() == compareOpnd1->GetOst()) { + compareOpnd = compareOpnd1; + scalarOpndNo = 0; + } + } + if (scalarOpnd1 != nullptr) { + if (compareOpnd0 != nullptr && scalarOpnd1->GetOst() == compareOpnd0->GetOst()) { + compareOpnd = compareOpnd0; + scalarOpndNo = 1; + } else if (compareOpnd1 != nullptr && scalarOpnd1->GetOst() == compareOpnd1->GetOst()) { + compareOpnd = compareOpnd1; + scalarOpndNo = 1; + } + } + CHECK_FATAL(compareOpnd != nullptr, "Rename1: compOcc does not correspond to realOcc"); + ScalarMeExpr *resolvedCompareOpnd = ResolveAllInjuringDefs(compareOpnd); + if (topOccur->GetOccType() == kOccReal) { + MeRealOcc *realTopOccur = static_cast(topOccur); + ScalarMeExpr *topOccurOpnd = static_cast(realTopOccur->GetMeExpr()->GetOpnd(scalarOpndNo)); + if (compareOpnd == topOccurOpnd || resolvedCompareOpnd == topOccurOpnd) { + realOcc->SetClassID(realTopOccur->GetClassID()); + if (realTopOccur->GetDef() != nullptr) { + realOcc->SetDef(realTopOccur->GetDef()); + } else { + realOcc->SetDef(realTopOccur); + } + } + break; + } + // top of stack is a PHI occurrence + ASSERT(topOccur->GetOccType() == kOccPhiocc, "invalid kOccPhiocc"); + if (DefVarDominateOcc(compareOpnd, *topOccur)) { + realOcc->SetClassID(topOccur->GetClassID()); + realOcc->SetDef(topOccur); + } + break; + } + case kOccPhiocc: { + // assign new class + occ->SetClassID(classCount++); + occStack.push(occ); + break; + } + case kOccPhiopnd: { + // stow away the use occurrences at the stack top + MeOccur *stowedUseOcc = nullptr; + if (!occStack.empty() && occStack.top()->GetOccType() == kOccUse) { + stowedUseOcc = occStack.top(); + occStack.pop(); + CHECK_FATAL(occStack.empty() || occStack.top()->GetOccType() != kOccUse, + "Rename1: cannot have 2 consecutive use occurs on stack"); + } + if (occStack.empty() || occStack.top()->GetOccType() == kOccMembar) { + occ->SetDef(nullptr); + } else { + MeOccur *topOccur = occStack.top(); + occ->SetDef(topOccur); + occ->SetClassID(topOccur->GetClassID()); + if (topOccur->GetOccType() == kOccReal) { + static_cast(occ)->SetHasRealUse(true); + } + } + // push stowed use_occ back + if (stowedUseOcc != nullptr) { + occStack.push(stowedUseOcc); + } + break; + } + case kOccExit: + break; + case kOccMembar: { + if (!occStack.empty()) { + MeOccur *topOccur = occStack.top(); + if (topOccur->GetOccType() == kOccPhiocc) { + } else if (topOccur->GetOccType() != occ->GetOccType()) { + occStack.push(occ); + } + } else { + occStack.push(occ); + } + break; + } + default: + ASSERT(false, "should not be here"); + break; + } + } + if (GetSSAPreDebug()) { + PreWorkCand *curCand = workCand; + mirModule->GetOut() << "++++ ssapre candidate " << curCand->GetIndex() << " after rename1\n"; + for (MeOccur *occ : allOccs) { + occ->Dump(*irMap); + mirModule->GetOut() << '\n'; + } + mirModule->GetOut() << "\n" << "rename2 set:\n"; + for (uint32 pos : rename2Set) { + MeRealOcc *occur = workCand->GetRealOcc(pos); + occur->Dump(*irMap); + mirModule->GetOut() << " with def at\n"; + occur->GetDef()->Dump(*irMap); + mirModule->GetOut() << "\n"; + } + mirModule->GetOut() << "\n"; + } +} + +void McSSAPre::ApplyMCSSAPRE() { + // #0 build worklist + BuildWorkList(); + if (GetSSAPreDebug()) { + mirModule->GetOut() << " worklist initial size " << workList.size() << '\n'; + } + ConstructUseOccurMap(); + uint32 cnt = 0; + while (!workList.empty()) { + ++cnt; + if (cnt > preLimit) { + break; + } + workCand = workList.front(); + workCand->SetIndex(static_cast(cnt)); + workList.pop_front(); + if (workCand->GetRealOccs().empty()) { + workCand->deletedFromWorkList = true; + continue; + } + if ((preKind == kExprPre && workCand->GetTheMeExpr()->GetMeOp() == kMeOpIvar) || (preKind == kLoadPre)) { + // if only LHS real occur, skip this candidate + bool hasNonLHS = false; + for (MeRealOcc *realOcc : workCand->GetRealOccs()) { + if (realOcc->GetOccType() == kOccReal && !realOcc->IsLHS()) { + hasNonLHS = true; + break; + } + } + if (!hasNonLHS) { + workCand->deletedFromWorkList = true; + continue; + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "||||||| MC-SSAPRE candidate " << cnt << " at worklist index " + << workCand->GetIndex() << ": "; + workCand->DumpCand(*irMap); + if (workCand->isSRCand) { + mirModule->GetOut() << " srCand"; + } + if (workCand->onlyInvariantOpnds) { + mirModule->GetOut() << " onlyInvairantOpnds"; + } + mirModule->GetOut() << '\n'; + } + allOccs.clear(); + phiOccs.clear(); + nextRGNodeId = 1; + occ2RGNodeMap.clear(); + numSourceEdges = 0; + maxFlowRoutes.clear(); + minCut.clear(); + source = nullptr; + sink = nullptr; + // #1 Insert PHI; results in allOccs and phiOccs + ComputeVarAndDfPhis(); + CreateSortedOccs(); + if (workCand->GetRealOccs().empty()) { + workCand->deletedFromWorkList = true; + continue; + } + // set the position field in the MeRealOcc nodes + for (size_t j = 0; j < workCand->GetRealOccs().size(); j++) { + workCand->GetRealOcc(j)->SetPosition(j); + } + // #2 Rename + Rename1(); + Rename2(); + if (!phiOccs.empty()) { + // if no PHI inserted, no need to perform these steps + // #3 data flow methods + ComputeFullAvail(); + ComputePartialAnt(); + // #4 graph reduction + GraphReduction(); + // #5 single source + AddSingleSource(); + // #6 single sink + AddSingleSink(); + // step 7 max flow/min cut + FindMaxFlow(); + DetermineMinCut(); + // step 8 willbeavail + ComputeMCWillBeAvail(); + } + // #5 Finalize + Finalize1(); + if (workCand->Redo2HandleCritEdges()) { + // reinitialize def field to nullptr + for (MeOccur *occ : allOccs) { + occ->SetDef(nullptr); + if (occ->GetOccType() == kOccPhiopnd) { + auto *phiOpndOcc = static_cast(occ); + phiOpndOcc->SetIsProcessed(false); + } + } + Rename1(); + Rename2(); + ComputeFullAvail(); + ComputePartialAnt(); + GraphReduction(); + AddSingleSource(); + AddSingleSink(); + Finalize1(); + FindMaxFlow(); + DetermineMinCut(); + ComputeMCWillBeAvail(); + } + Finalize2(); + workCand->deletedFromWorkList = true; + // #6 CodeMotion and recompute worklist based on newly occurrence + CodeMotion(); + if (preKind == kStmtPre && (workCand->GetRealOccs().front()->GetOpcodeOfMeStmt() == OP_dassign || + workCand->GetRealOccs().front()->GetOpcodeOfMeStmt() == OP_callassigned)) { + // apply full redundancy elimination + DoSSAFRE(); + } + perCandMemPool->ReleaseContainingMem(); + } +} + +} // namespace maple diff --git a/src/mapleall/maple_me/src/me_option.cpp b/src/mapleall/maple_me/src/me_option.cpp index d062458a0d..dccb7b78eb 100644 --- a/src/mapleall/maple_me/src/me_option.cpp +++ b/src/mapleall/maple_me/src/me_option.cpp @@ -132,6 +132,7 @@ SafetyCheckMode MeOption::boundaryCheckMode = SafetyCheckMode::kNoCheck; bool MeOption::safeRegionMode = false; bool MeOption::unifyRets = false; bool MeOption::dumpCfgOfPhases = false; +bool MeOption::epreUseProfile = true; #if MIR_JAVA std::string MeOption::acquireFuncName = "Landroid/location/LocationManager;|requestLocationUpdates|"; std::string MeOption::releaseFuncName = "Landroid/location/LocationManager;|removeUpdates|"; @@ -315,6 +316,7 @@ bool MeOption::SolveOptions(bool isDebug) { maplecl::CopyIfEnabled(lessThrowAlias, opts::me::lessthrowalias); maplecl::CopyIfEnabled(propBase, opts::me::propbase); maplecl::CopyIfEnabled(dumpCfgOfPhases, opts::me::dumpCfgOfPhases); + maplecl::CopyIfEnabled(epreUseProfile, opts::me::epreUseProfile); if (opts::me::propiloadref.IsEnabledByUser()) { propIloadRef = opts::me::propiloadref; diff --git a/src/mapleall/maple_me/src/me_options.cpp b/src/mapleall/maple_me/src/me_options.cpp index 79adab5f52..2b448a265c 100644 --- a/src/mapleall/maple_me/src/me_options.cpp +++ b/src/mapleall/maple_me/src/me_options.cpp @@ -698,5 +698,10 @@ maplecl::Option lfo({"--lfo"}, maplecl::Option dumpCfgOfPhases({"--dumpcfgofphases"}, " --dumpcfgofphases \tDump CFG from various phases to .dot files\n", {meCategory}); +maplecl::Option epreUseProfile({"--epreuseprofile"}, + " --epreuseprofile \tEnable profile-guided epre phase\n" + " --no-epreuseprofile \tDisable profile-guided epre phase\n", + {meCategory}, + maplecl::DisableWith("--no-epreuseprofile")); } diff --git a/src/mapleall/maple_me/src/me_ssa_epre.cpp b/src/mapleall/maple_me/src/me_ssa_epre.cpp index f2019b28a0..b209c00fc3 100644 --- a/src/mapleall/maple_me/src/me_ssa_epre.cpp +++ b/src/mapleall/maple_me/src/me_ssa_epre.cpp @@ -103,6 +103,9 @@ bool MESSAEPre::PhaseRun(maple::MeFunction &f) { } MeSSAEPre ssaPre(f, *irMap, *dom, *pdom, kh, *ssaPreMemPool, *ApplyTempMemPool(), epreLimitUsed, epreIncludeRef, MeOption::epreLocalRefVar, MeOption::epreLHSIvar); + if (f.GetMirFunc()->GetFuncProfData() && MeOption::epreUseProfile) { + ssaPre.doMinCut = true; + } ssaPre.SetSpillAtCatch(MeOption::spillAtCatch); if (MeOption::strengthReduction && !f.GetMIRModule().IsJavaModule()) { ssaPre.strengthReduction = true; @@ -120,7 +123,11 @@ bool MESSAEPre::PhaseRun(maple::MeFunction &f) { if (DEBUGFUNC_NEWPM(f)) { ssaPre.SetSSAPreDebug(true); } - ssaPre.ApplySSAPRE(); + if (!ssaPre.doMinCut) { + ssaPre.ApplySSAPRE(); + } else { + ssaPre.ApplyMCSSAPRE(); + } if (!ssaPre.GetCandsForSSAUpdate().empty()) { MeSSAUpdate ssaUpdate(f, *f.GetMeSSATab(), *dom, ssaPre.GetCandsForSSAUpdate()); ssaUpdate.Run(); diff --git a/src/mapleall/maple_me/src/me_stmt_pre.cpp b/src/mapleall/maple_me/src/me_stmt_pre.cpp index 7a6d13161c..12a704cd5d 100644 --- a/src/mapleall/maple_me/src/me_stmt_pre.cpp +++ b/src/mapleall/maple_me/src/me_stmt_pre.cpp @@ -178,7 +178,7 @@ void MeStmtPre::Finalize1() { auto *phiOpnd = static_cast(occ); MePhiOcc *phiOcc = phiOpnd->GetDefPhiOcc(); if (phiOcc->IsWillBeAvail()) { - if (phiOpnd->IsOkToInsert()) { + if (OKToInsert(phiOpnd)) { // insert the current expression at the end of the block containing phiOpnd if (phiOpnd->GetBB()->GetSucc().size() > 1) { CHECK_FATAL(!workCand->Redo2HandleCritEdges(), "Finalize1: insertion at critical edge; aborting"); diff --git a/src/mapleall/maple_me/src/occur.cpp b/src/mapleall/maple_me/src/occur.cpp index 9a52a85ffe..c45241d152 100644 --- a/src/mapleall/maple_me/src/occur.cpp +++ b/src/mapleall/maple_me/src/occur.cpp @@ -107,22 +107,6 @@ MeExpr *MeOccur::GetSavedExpr() { } } -// return true if either: -// operand is nullptr (def is null), or -// hasRealUse is false and defined by a PHI not will be avail -bool MePhiOpndOcc::IsOkToInsert() const { - if (GetDef() == nullptr) { - return true; - } - if (!hasRealUse) { - const MeOccur *defOcc = GetDef(); - if (defOcc->GetOccType() == kOccPhiocc && !static_cast(defOcc)->IsWillBeAvail()) { - return true; - } - } - return false; -} - bool MePhiOcc::IsOpndDefByRealOrInserted() const { for (MePhiOpndOcc *phiOpnd : phiOpnds) { MeOccur *defOcc = phiOpnd->GetDef(); @@ -152,6 +136,9 @@ void MeRealOcc::Dump(const IRMap &irMap) const { } else { mod->GetOut() << "RealOcc(LHS) "; } + if (rgExcluded) { + mod->GetOut() << "rgexcluded "; + } if (meExpr != nullptr) { meExpr->Dump(&irMap); } else { diff --git a/src/mapleall/maple_me/src/ssa_pre.cpp b/src/mapleall/maple_me/src/ssa_pre.cpp index a94b544c20..747da400f7 100644 --- a/src/mapleall/maple_me/src/ssa_pre.cpp +++ b/src/mapleall/maple_me/src/ssa_pre.cpp @@ -165,7 +165,7 @@ void SSAPre::GenerateSavePhiOcc(MePhiOcc &phiOcc) { void SSAPre::UpdateInsertedPhiOccOpnd() { for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { MePhiOcc *phiOcc = *it; - if (!phiOcc->IsWillBeAvail() || phiOcc->IsRemoved()) { + if (!WillBeAvail(phiOcc) || phiOcc->IsRemoved()) { continue; } if (phiOcc->GetRegPhi()) { @@ -237,7 +237,7 @@ void SSAPre::CodeMotion() { } case kOccPhiopnd: { MePhiOpndOcc *phiopnd = static_cast(occ); - if (phiopnd->GetDefPhiOcc()->IsRemoved() || !phiopnd->GetDefPhiOcc()->IsWillBeAvail()) { + if (phiopnd->GetDefPhiOcc()->IsRemoved() || !WillBeAvail(phiopnd->GetDefPhiOcc())) { break; } if (phiopnd->GetDef()->GetOccType() == kOccInserted) { @@ -285,7 +285,7 @@ void SSAPre::CodeMotion() { } case kOccPhiopnd: { auto *phiOpnd = static_cast(occ); - if (phiOpnd->GetDefPhiOcc()->IsRemoved() || !phiOpnd->GetDefPhiOcc()->IsWillBeAvail()) { + if (phiOpnd->GetDefPhiOcc()->IsRemoved() || !WillBeAvail(phiOpnd->GetDefPhiOcc())) { break; } MeOccur *defOcc = phiOpnd->GetDef(); @@ -313,7 +313,7 @@ void SSAPre::CodeMotion() { } case kOccPhiocc: { auto *phiOcc = static_cast(occ); - if (phiOcc->IsRemoved() || !phiOcc->IsWillBeAvail()) { + if (phiOcc->IsRemoved() || !WillBeAvail(phiOcc)) { break; } GenerateSavePhiOcc(*phiOcc); @@ -331,7 +331,7 @@ void SSAPre::CodeMotion() { } } else { MePhiOcc *phiOcc = static_cast(compOcc->GetDef()); - if (phiOcc->IsRemoved() || !phiOcc->IsWillBeAvail() || !phiOcc->IsDownSafe()) { + if (phiOcc->IsRemoved() || !WillBeAvail(phiOcc) || !phiOcc->IsDownSafe()) { break; } } @@ -366,6 +366,24 @@ void SSAPre::CodeMotion() { } // ================ Step 5: Finalize ================= + +// return true if either: +// operand is nullptr (def is null), or +// hasRealUse is false and defined by a PHI not will be avail +bool SSAPre::OKToInsert(MePhiOpndOcc *phiOpnd) { + if (phiOpnd->GetDef() == nullptr) { + return true; + } + if (!phiOpnd->HasRealUse()) { + MeOccur *defOcc = phiOpnd->GetDef(); + if (defOcc->GetOccType() == kOccPhiocc && + !WillBeAvail(static_cast(defOcc))) { + return true; + } + } + return false; +} + void SSAPre::Finalize1() { std::vector availDefVec(classCount, nullptr); // traversal in preoder DT @@ -374,7 +392,7 @@ void SSAPre::Finalize1() { switch (occ->GetOccType()) { case kOccPhiocc: { auto *phiOcc = static_cast(occ); - if (phiOcc->IsWillBeAvail()) { + if (WillBeAvail(phiOcc)) { availDefVec[classX] = phiOcc; } break; @@ -418,10 +436,10 @@ void SSAPre::Finalize1() { // we assume one phiOpnd has only one phiOcc use because critical edge split the blocks auto *phiOpnd = static_cast(occ); MePhiOcc *phiOcc = phiOpnd->GetDefPhiOcc(); - if (!phiOcc->IsWillBeAvail()) { + if (!WillBeAvail(phiOcc)) { break; } - if (phiOpnd->IsOkToInsert()) { + if (OKToInsert(phiOpnd)) { // insert the current expression at the end of the block containing phiOpnd if (phiOpnd->GetBB()->GetSucc().size() > 1) { CHECK_FATAL(!workCand->Redo2HandleCritEdges(), "Finalize1: insertion at critical edge, aborting"); @@ -466,7 +484,7 @@ void SSAPre::Finalize1() { " after Finalize1===================\n"; for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { MePhiOcc *phiOcc = *it; - if (!phiOcc->IsWillBeAvail()) { + if (!WillBeAvail(phiOcc)) { continue; } for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { @@ -504,7 +522,7 @@ void SSAPre::SetSave(MeOccur &defX) { CHECK_FATAL(!dom->IsNodeVecEmpty(), "the size to be allocated is 0"); GetIterDomFrontier(fromBb, &itFrontier); for (MePhiOcc *phiOcc : phiOccs) { - if (!phiOcc->IsWillBeAvail()) { + if (!WillBeAvail(phiOcc)) { continue; } if (itFrontier.find(dom->GetDtDfnItem(phiOcc->GetBB()->GetBBId())) == itFrontier.end()) { @@ -551,7 +569,7 @@ void SSAPre::Finalize2() { MePhiOcc *phiOcc = *it; // initialize extraneouse for each MePhiOcc if (!workCand->isSRCand) { - phiOcc->SetIsExtraneous(phiOcc->IsWillBeAvail()); + phiOcc->SetIsExtraneous(WillBeAvail(phiOcc)); } // initialize each operand of phiOcc @@ -571,7 +589,7 @@ void SSAPre::Finalize2() { if (phiOcc->IsRemoved() || !phiOcc->IsExtraneous()) { continue; } - if (!phiOcc->IsWillBeAvail()) { + if (!WillBeAvail(phiOcc)) { phiOcc->SetIsRemoved(true); continue; } @@ -1153,13 +1171,15 @@ void SSAPre::SetVarPhis(MeExpr *meExpr) { } if (scalar->IsDefByPhi()) { MePhiNode *phiMeNode = scalar->GetMePhiDef(); - BBId defBBId = phiMeNode->GetDefBB()->GetBBId(); - CHECK(defBBId < dom->GetDtDfnSize(), "defBBId.idx out of range in SSAPre::SetVarPhis"); - if (varPhiDfns.find(dom->GetDtDfnItem(defBBId)) == varPhiDfns.end() && ScreenPhiBB(defBBId)) { - (void)varPhiDfns.insert(dom->GetDtDfnItem(defBBId)); - for (auto opndIt = phiMeNode->GetOpnds().begin(); opndIt != phiMeNode->GetOpnds().end(); ++opndIt) { - ScalarMeExpr *opnd = *opndIt; - SetVarPhis(opnd); + if (phiMeNode->GetOpnds().size() > 1) { + BBId defBBId = phiMeNode->GetDefBB()->GetBBId(); + CHECK(defBBId < dom->GetDtDfnSize(), "defBBId.idx out of range in SSAPre::SetVarPhis"); + if (varPhiDfns.find(dom->GetDtDfnItem(defBBId)) == varPhiDfns.end() && ScreenPhiBB(defBBId)) { + (void)varPhiDfns.insert(dom->GetDtDfnItem(defBBId)); + for (auto opndIt = phiMeNode->GetOpnds().begin(); opndIt != phiMeNode->GetOpnds().end(); ++opndIt) { + ScalarMeExpr *opnd = *opndIt; + SetVarPhis(opnd); + } } } } diff --git a/src/mapleall/maple_me/src/ssa_pre_for_hoist.cpp b/src/mapleall/maple_me/src/ssa_pre_for_hoist.cpp index 3a0266558e..8d344bdeb8 100644 --- a/src/mapleall/maple_me/src/ssa_pre_for_hoist.cpp +++ b/src/mapleall/maple_me/src/ssa_pre_for_hoist.cpp @@ -349,7 +349,7 @@ void ExprHoist::HoistExpr(const MapleVector &allOccs, int32 candId) { } auto *phiOpndocc = static_cast(occ); auto *phiOcc = phiOpndocc->GetDefPhiOcc(); - if (phiOcc->IsWillBeAvail() && phiOpndocc->IsOkToInsert()) { + if (phiOcc->IsWillBeAvail() && parent->OKToInsert(phiOpndocc)) { if (hs->cdHS && // need a cd to hoist hs->occ == nullptr && // if not null, hs has been inserted hs->cdHS->occ != nullptr && // make sure there's at least one realocc at cd -- Gitee From 0693ad18cb34d864389b6fda0afcf28db8d42c8f Mon Sep 17 00:00:00 2001 From: Fred Chow Date: Thu, 1 Dec 2022 00:06:38 -0800 Subject: [PATCH 5/6] Added --pgorange= option for triaging PGO optimizations --- src/mapleall/maple_me/include/me_option.h | 3 +++ src/mapleall/maple_me/include/me_options.h | 1 + src/mapleall/maple_me/src/mc_ssa_pre.cpp | 7 ++++--- src/mapleall/maple_me/src/me_option.cpp | 24 ++++++++++++++++++++++ src/mapleall/maple_me/src/me_options.cpp | 5 +++++ src/mapleall/maple_me/src/me_ssa_epre.cpp | 9 +++++++- src/mapleall/maple_me/src/ssa_pre.cpp | 8 ++++---- 7 files changed, 49 insertions(+), 8 deletions(-) diff --git a/src/mapleall/maple_me/include/me_option.h b/src/mapleall/maple_me/include/me_option.h index 7728418555..d3e54bd7f4 100644 --- a/src/mapleall/maple_me/include/me_option.h +++ b/src/mapleall/maple_me/include/me_option.h @@ -37,6 +37,7 @@ class MeOption { SplitPhases(str, skipPhases); } bool GetRange(const std::string &str) const; + bool GetPgoRange(const std::string &str) const; const std::unordered_set &GetSkipPhases() const { return skipPhases; @@ -85,6 +86,8 @@ class MeOption { static constexpr int kRangeArrayLen = 2; static unsigned long range[kRangeArrayLen]; static bool useRange; + static unsigned long pgoRange[kRangeArrayLen]; + static bool usePgoRange; static std::string dumpFunc; static std::string skipFrom; static std::string skipAfter; diff --git a/src/mapleall/maple_me/include/me_options.h b/src/mapleall/maple_me/include/me_options.h index 17230e5ae2..a9a81d16aa 100644 --- a/src/mapleall/maple_me/include/me_options.h +++ b/src/mapleall/maple_me/include/me_options.h @@ -32,6 +32,7 @@ extern maplecl::Option os; extern maplecl::Option o3; extern maplecl::Option refusedcheck; extern maplecl::Option range; +extern maplecl::Option pgoRange; extern maplecl::Option dumpPhases; extern maplecl::Option skipPhases; extern maplecl::Option dumpFunc; diff --git a/src/mapleall/maple_me/src/mc_ssa_pre.cpp b/src/mapleall/maple_me/src/mc_ssa_pre.cpp index bf2c7f3abb..99cf308597 100644 --- a/src/mapleall/maple_me/src/mc_ssa_pre.cpp +++ b/src/mapleall/maple_me/src/mc_ssa_pre.cpp @@ -63,8 +63,9 @@ void McSSAPre::ComputeMCWillBeAvail() const { // set insert in phi operands for (Visit *visit : minCut) { MeOccur *occ = visit->node->occ; - if (occ->GetOccType() == kOccPhiopnd) { - MePhiOpndOcc *phiOpndOcc = static_cast(occ); + if (occ->GetOccType() == kOccPhiocc) { + MePhiOcc *phiOcc = static_cast(occ); + MePhiOpndOcc *phiOpndOcc = phiOcc->GetPhiOpnd(visit->predIdx); phiOpndOcc->SetIsMCInsert(true); } } @@ -307,7 +308,7 @@ void McSSAPre::DetermineMinCut() { minCut.push_back(cut[0]); size_t duplicatedVisits = 0; for (uint32 i = 1; i < maxFlowRoutes.size(); i++) { - if (cut[i] == cut[i-1]) { + if (cut[i] != cut[i-1]) { minCut.push_back(cut[i]); } else { duplicatedVisits++; diff --git a/src/mapleall/maple_me/src/me_option.cpp b/src/mapleall/maple_me/src/me_option.cpp index dccb7b78eb..7bb4b9080f 100644 --- a/src/mapleall/maple_me/src/me_option.cpp +++ b/src/mapleall/maple_me/src/me_option.cpp @@ -28,7 +28,9 @@ bool MeOption::isBigEndian = false; bool MeOption::dumpAfter = false; std::string MeOption::dumpFunc = "*"; unsigned long MeOption::range[kRangeArrayLen] = { 0, 0 }; +unsigned long MeOption::pgoRange[kRangeArrayLen] = { 0, 0 }; bool MeOption::useRange = false; +bool MeOption::usePgoRange = false; bool MeOption::quiet = false; bool MeOption::setCalleeHasSideEffect = false; bool MeOption::unionBasedAA = true; @@ -196,6 +198,14 @@ bool MeOption::SolveOptions(bool isDebug) { } } + if (opts::me::pgoRange.IsEnabledByUser()) { + usePgoRange = true; + bool ret = GetPgoRange(opts::me::pgoRange); + if (!ret) { + return ret; + } + } + maplecl::CopyIfEnabled(dumpBefore, opts::me::dumpBefore); maplecl::CopyIfEnabled(dumpAfter, opts::me::dumpAfter); @@ -486,6 +496,20 @@ bool MeOption::GetRange(const std::string &str) const { return true; } +bool MeOption::GetPgoRange(const std::string &str) const { + std::string s{ str }; + size_t comma = s.find_first_of(",", 0); + if (comma != std::string::npos) { + pgoRange[0] = std::stoul(s.substr(0, comma), nullptr); + pgoRange[1] = std::stoul(s.substr(comma + 1, std::string::npos - (comma + 1)), nullptr); + } + if (pgoRange[0] > pgoRange[1]) { + LogInfo::MapleLogger(kLlErr) << "invalid values for --pgorange=" << pgoRange[0] << "," << pgoRange[1] << '\n'; + return false; + } + return true; +} + bool MeOption::DumpPhase(const std::string &phase) { if (phase == "") { return false; diff --git a/src/mapleall/maple_me/src/me_options.cpp b/src/mapleall/maple_me/src/me_options.cpp index 2b448a265c..4b76b19384 100644 --- a/src/mapleall/maple_me/src/me_options.cpp +++ b/src/mapleall/maple_me/src/me_options.cpp @@ -51,6 +51,11 @@ maplecl::Option range({"--range"}, " \t--range=NUM0,NUM1\n", {meCategory}); +maplecl::Option pgoRange({"--pgorange"}, + " --pglrange \tUse profile-guided optimizations only for funcid in the range [NUM0, NUM1]\n" + " \t--pgorange=NUM0,NUM1\n", + {meCategory}); + maplecl::Option dumpPhases({"--dump-phases"}, " --dump-phases \tEnable debug trace for specified phases" " in the comma separated list\n" diff --git a/src/mapleall/maple_me/src/me_ssa_epre.cpp b/src/mapleall/maple_me/src/me_ssa_epre.cpp index b209c00fc3..81f8563c5a 100644 --- a/src/mapleall/maple_me/src/me_ssa_epre.cpp +++ b/src/mapleall/maple_me/src/me_ssa_epre.cpp @@ -104,7 +104,14 @@ bool MESSAEPre::PhaseRun(maple::MeFunction &f) { MeSSAEPre ssaPre(f, *irMap, *dom, *pdom, kh, *ssaPreMemPool, *ApplyTempMemPool(), epreLimitUsed, epreIncludeRef, MeOption::epreLocalRefVar, MeOption::epreLHSIvar); if (f.GetMirFunc()->GetFuncProfData() && MeOption::epreUseProfile) { - ssaPre.doMinCut = true; + if (MeOption::usePgoRange) { + if (f.GetMirFunc()->GetPuidxOrigin() >= MeOption::pgoRange[0] && + f.GetMirFunc()->GetPuidxOrigin() <= MeOption::pgoRange[1]) { + ssaPre.doMinCut = true; + } + } else { + ssaPre.doMinCut = true; + } } ssaPre.SetSpillAtCatch(MeOption::spillAtCatch); if (MeOption::strengthReduction && !f.GetMIRModule().IsJavaModule()) { diff --git a/src/mapleall/maple_me/src/ssa_pre.cpp b/src/mapleall/maple_me/src/ssa_pre.cpp index 747da400f7..ea96fa47b8 100644 --- a/src/mapleall/maple_me/src/ssa_pre.cpp +++ b/src/mapleall/maple_me/src/ssa_pre.cpp @@ -625,13 +625,13 @@ void SSAPre::Finalize2() { mirModule->GetOut() << std::endl; } else if (occ->GetOccType() == kOccReal) { auto *realOcc = static_cast(occ); + realOcc->Dump(*irMap); if (realOcc->IsReload()) { - realOcc->Dump(*irMap); mirModule->GetOut() << " isReload\n"; - } - if (realOcc->IsSave()) { - realOcc->Dump(*irMap); + } else if (realOcc->IsSave()) { mirModule->GetOut() << " isSave\n"; + } else { + mirModule->GetOut() << "\n"; } } else if (occ->GetOccType() == kOccPhiopnd) { auto *phiOpndOcc = static_cast(occ); -- Gitee From a58cdf67bf8874c6fa9d2235dfed852fa255aca1 Mon Sep 17 00:00:00 2001 From: Fred Chow Date: Thu, 15 Dec 2022 15:23:16 -0800 Subject: [PATCH 6/6] Enhanced the SSAPRE driver so it can select the original or mincut verion of the algorithm for individual candidates. Indirect loads are no longer applicable for the mincut algorithm because they cannot be speculated. Added --epreuseprofilelimit option to help in triaging. --- src/mapleall/maple_me/CMakeLists.txt | 1 + src/mapleall/maple_me/include/mc_ssa_pre.h | 4 +- src/mapleall/maple_me/include/me_option.h | 1 + src/mapleall/maple_me/include/me_options.h | 1 + src/mapleall/maple_me/include/occur.h | 6 +- src/mapleall/maple_me/include/ssa_pre.h | 2 +- src/mapleall/maple_me/src/mc_ssa_pre.cpp | 253 +++------------------ src/mapleall/maple_me/src/me_option.cpp | 2 + src/mapleall/maple_me/src/me_options.cpp | 5 + src/mapleall/maple_me/src/me_ssa_epre.cpp | 3 + src/mapleall/maple_me/src/ssa_pre.cpp | 25 +- 11 files changed, 62 insertions(+), 241 deletions(-) diff --git a/src/mapleall/maple_me/CMakeLists.txt b/src/mapleall/maple_me/CMakeLists.txt index 009df17388..d54ec8e38c 100755 --- a/src/mapleall/maple_me/CMakeLists.txt +++ b/src/mapleall/maple_me/CMakeLists.txt @@ -130,6 +130,7 @@ set(src_libmplme src/me_slp.cpp src/lmbc_memlayout.cpp src/lmbc_lower.cpp + src/mc_ssa_pre.cpp ) set(src_libmplmewpo diff --git a/src/mapleall/maple_me/include/mc_ssa_pre.h b/src/mapleall/maple_me/include/mc_ssa_pre.h index 343b40b833..1036dfe7bd 100644 --- a/src/mapleall/maple_me/include/mc_ssa_pre.h +++ b/src/mapleall/maple_me/include/mc_ssa_pre.h @@ -68,6 +68,7 @@ class McSSAPre : public SSAPre { virtual ~McSSAPre() = default; void ApplyMCSSAPRE(); + void SetPreUseProfileLimit(uint32 n) { preUseProfileLimit = n; } private: // step 8 willbeavail void ResetMCWillBeAvail(MePhiOcc *phiOcc) const; @@ -94,8 +95,6 @@ class McSSAPre : public SSAPre { void ComputePartialAnt() const; void ResetFullAvail(MePhiOcc *occ) const; void ComputeFullAvail() const; - // step 2 renaming methods - void Rename1(); MapleUnorderedMap occ2RGNodeMap; RGNode *source; @@ -106,6 +105,7 @@ class McSSAPre : public SSAPre { uint64 maxFlowValue; uint64 relaxedMaxFlowValue; // relax maxFlowValue to avoid excessive mincut search time when number of routes is large MapleVector minCut; // an array of Visits* to represent the minCut + uint32 preUseProfileLimit = UINT32_MAX; }; } // namespace maple diff --git a/src/mapleall/maple_me/include/me_option.h b/src/mapleall/maple_me/include/me_option.h index d3e54bd7f4..3d9ca50ff4 100644 --- a/src/mapleall/maple_me/include/me_option.h +++ b/src/mapleall/maple_me/include/me_option.h @@ -116,6 +116,7 @@ class MeOption { static uint32 stmtprePULimit; static uint32 epreLimit; static uint32 eprePULimit; + static uint32 epreUseProfileLimit; static uint32 lpreLimit; static uint32 lprePULimit; static uint32 parserOpt; diff --git a/src/mapleall/maple_me/include/me_options.h b/src/mapleall/maple_me/include/me_options.h index a9a81d16aa..a16cf27ac4 100644 --- a/src/mapleall/maple_me/include/me_options.h +++ b/src/mapleall/maple_me/include/me_options.h @@ -58,6 +58,7 @@ extern maplecl::Option dumpAfter; extern maplecl::Option realcheckcast; extern maplecl::Option eprelimit; extern maplecl::Option eprepulimit; +extern maplecl::Option epreuseprofilelimit; extern maplecl::Option stmtprepulimit; extern maplecl::Option lprelimit; extern maplecl::Option lprepulimit; diff --git a/src/mapleall/maple_me/include/occur.h b/src/mapleall/maple_me/include/occur.h index 06c2da2a2c..c2f5b9a4bb 100644 --- a/src/mapleall/maple_me/include/occur.h +++ b/src/mapleall/maple_me/include/occur.h @@ -521,7 +521,7 @@ class MePhiOcc : public MeOccur { private: bool isDownSafe; // default is true bool speculativeDownSafe; // is downsafe due to speculation - bool isCanBeAvail; + bool isCanBeAvail; // used for fullyAvail in mc-ssapre bool isLater; bool isExtraneous; bool isRemoved; // during finalize2, marked this phiocc is removed or not @@ -545,7 +545,8 @@ class PreWorkCand { needLocalRefVar(false), isSRCand(false), onlyInvariantOpnds(false), - deletedFromWorkList(false) { + deletedFromWorkList(false), + applyMinCut(false) { ASSERT(pIdx != 0, "PreWorkCand: initial puIdx cannot be 0"); } @@ -678,6 +679,7 @@ class PreWorkCand { bool isSRCand : 1; // is a strength reduction candidate bool onlyInvariantOpnds : 1; // all operands have only 1 SSA version bool deletedFromWorkList : 1; // processed by SSAPRE already + bool applyMinCut : 1; // if using mc-ssapre for this candidate }; class PreStmtWorkCand : public PreWorkCand { diff --git a/src/mapleall/maple_me/include/ssa_pre.h b/src/mapleall/maple_me/include/ssa_pre.h index 20649704dd..6f3b50e628 100644 --- a/src/mapleall/maple_me/include/ssa_pre.h +++ b/src/mapleall/maple_me/include/ssa_pre.h @@ -158,7 +158,7 @@ class SSAPre { virtual void CodeMotion(); // step 5 Finalize methods bool WillBeAvail(MePhiOcc *phiOcc) { - if (!doMinCut) { + if (!workCand->applyMinCut) { return phiOcc->IsWillBeAvail(); } return phiOcc->IsMCWillBeAvail(); diff --git a/src/mapleall/maple_me/src/mc_ssa_pre.cpp b/src/mapleall/maple_me/src/mc_ssa_pre.cpp index 99cf308597..55f3389a78 100644 --- a/src/mapleall/maple_me/src/mc_ssa_pre.cpp +++ b/src/mapleall/maple_me/src/mc_ssa_pre.cpp @@ -697,207 +697,6 @@ void McSSAPre::ComputeFullAvail() const { } } -// ================ Step 2: Renaming ================= -void McSSAPre::Rename1() { - std::stack occStack; - rename2Set.clear(); - classCount = 1; - // iterate the occurrence according to its preorder dominator tree - for (MeOccur *occ : allOccs) { - while (!occStack.empty() && !occStack.top()->IsDominate(*dom, *occ)) { - occStack.pop(); - } - switch (occ->GetOccType()) { - case kOccReal: { - if (occStack.empty()) { - // assign new class - occ->SetClassID(classCount++); - occStack.push(occ); - break; - } - MeOccur *topOccur = occStack.top(); - if (topOccur->GetOccType() == kOccUse || topOccur->GetOccType() == kOccMembar) { - occ->SetClassID(classCount++); - occStack.push(occ); - break; - } - auto *realOcc = static_cast(occ); - if (topOccur->GetOccType() == kOccReal) { - auto *realTopOccur = static_cast(topOccur); - if (AllVarsSameVersion(*realTopOccur, *realOcc)) { - // all corresponding variables are the same - realOcc->SetClassID(realTopOccur->GetClassID()); - if (realTopOccur->GetDef() != nullptr) { - realOcc->SetDef(realTopOccur->GetDef()); - } else { - realOcc->SetDef(realTopOccur); - } - realOcc->rgExcluded = true; - } else { - // assign new class - occ->SetClassID(classCount++); - occStack.push(occ); - } - } else { - // top of stack is a PHI occurrence - ASSERT(topOccur->GetOccType() == kOccPhiocc, "invalid kOccPhiocc"); - std::vector varVec; - CollectVarForCand(*realOcc, varVec); - bool isAllDom = true; - if (realOcc->IsLHS()) { - isAllDom = false; - } else { - for (auto varIt = varVec.begin(); varIt != varVec.end(); ++varIt) { - MeExpr *varMeExpr = *varIt; - if (workCand->isSRCand) { - varMeExpr = ResolveAllInjuringDefs(varMeExpr); - } - if (!DefVarDominateOcc(varMeExpr, *topOccur)) { - isAllDom = false; - } - } - } - MePhiOcc *phiTopOccur = static_cast(topOccur); - if (isAllDom) { - realOcc->SetClassID(topOccur->GetClassID()); - realOcc->SetDef(topOccur); - (void)rename2Set.insert(realOcc->GetPosition()); - phiTopOccur->SetIsPartialAnt(true); - } else { - // assign new class - occ->SetClassID(classCount++); - } - occStack.push(occ); - } - break; - } - case kOccCompare: { - if (occStack.empty()) { - break; - } - MeOccur *topOccur = occStack.top(); - if (topOccur->GetOccType() == kOccUse || topOccur->GetOccType() == kOccMembar) { - break; - } - MeRealOcc *realOcc = static_cast(occ); - ScalarMeExpr *scalarOpnd0 = dynamic_cast(workCand->GetTheMeExpr()->GetOpnd(0)); - ScalarMeExpr *scalarOpnd1 = dynamic_cast(workCand->GetTheMeExpr()->GetOpnd(1)); - ScalarMeExpr *compareOpnd0 = dynamic_cast(realOcc->GetMeExpr()->GetOpnd(0)); - ScalarMeExpr *compareOpnd1 = dynamic_cast(realOcc->GetMeExpr()->GetOpnd(1)); - // set compareOpnd to be the scalar operand that is common to - // workCand->theMeExpr and realOcc->meExpr - ScalarMeExpr *compareOpnd = nullptr; - uint32 scalarOpndNo = 0; - if (scalarOpnd0 != nullptr) { - if (compareOpnd0 != nullptr && scalarOpnd0->GetOst() == compareOpnd0->GetOst()) { - compareOpnd = compareOpnd0; - scalarOpndNo = 0; - } else if (compareOpnd1 != nullptr && scalarOpnd0->GetOst() == compareOpnd1->GetOst()) { - compareOpnd = compareOpnd1; - scalarOpndNo = 0; - } - } - if (scalarOpnd1 != nullptr) { - if (compareOpnd0 != nullptr && scalarOpnd1->GetOst() == compareOpnd0->GetOst()) { - compareOpnd = compareOpnd0; - scalarOpndNo = 1; - } else if (compareOpnd1 != nullptr && scalarOpnd1->GetOst() == compareOpnd1->GetOst()) { - compareOpnd = compareOpnd1; - scalarOpndNo = 1; - } - } - CHECK_FATAL(compareOpnd != nullptr, "Rename1: compOcc does not correspond to realOcc"); - ScalarMeExpr *resolvedCompareOpnd = ResolveAllInjuringDefs(compareOpnd); - if (topOccur->GetOccType() == kOccReal) { - MeRealOcc *realTopOccur = static_cast(topOccur); - ScalarMeExpr *topOccurOpnd = static_cast(realTopOccur->GetMeExpr()->GetOpnd(scalarOpndNo)); - if (compareOpnd == topOccurOpnd || resolvedCompareOpnd == topOccurOpnd) { - realOcc->SetClassID(realTopOccur->GetClassID()); - if (realTopOccur->GetDef() != nullptr) { - realOcc->SetDef(realTopOccur->GetDef()); - } else { - realOcc->SetDef(realTopOccur); - } - } - break; - } - // top of stack is a PHI occurrence - ASSERT(topOccur->GetOccType() == kOccPhiocc, "invalid kOccPhiocc"); - if (DefVarDominateOcc(compareOpnd, *topOccur)) { - realOcc->SetClassID(topOccur->GetClassID()); - realOcc->SetDef(topOccur); - } - break; - } - case kOccPhiocc: { - // assign new class - occ->SetClassID(classCount++); - occStack.push(occ); - break; - } - case kOccPhiopnd: { - // stow away the use occurrences at the stack top - MeOccur *stowedUseOcc = nullptr; - if (!occStack.empty() && occStack.top()->GetOccType() == kOccUse) { - stowedUseOcc = occStack.top(); - occStack.pop(); - CHECK_FATAL(occStack.empty() || occStack.top()->GetOccType() != kOccUse, - "Rename1: cannot have 2 consecutive use occurs on stack"); - } - if (occStack.empty() || occStack.top()->GetOccType() == kOccMembar) { - occ->SetDef(nullptr); - } else { - MeOccur *topOccur = occStack.top(); - occ->SetDef(topOccur); - occ->SetClassID(topOccur->GetClassID()); - if (topOccur->GetOccType() == kOccReal) { - static_cast(occ)->SetHasRealUse(true); - } - } - // push stowed use_occ back - if (stowedUseOcc != nullptr) { - occStack.push(stowedUseOcc); - } - break; - } - case kOccExit: - break; - case kOccMembar: { - if (!occStack.empty()) { - MeOccur *topOccur = occStack.top(); - if (topOccur->GetOccType() == kOccPhiocc) { - } else if (topOccur->GetOccType() != occ->GetOccType()) { - occStack.push(occ); - } - } else { - occStack.push(occ); - } - break; - } - default: - ASSERT(false, "should not be here"); - break; - } - } - if (GetSSAPreDebug()) { - PreWorkCand *curCand = workCand; - mirModule->GetOut() << "++++ ssapre candidate " << curCand->GetIndex() << " after rename1\n"; - for (MeOccur *occ : allOccs) { - occ->Dump(*irMap); - mirModule->GetOut() << '\n'; - } - mirModule->GetOut() << "\n" << "rename2 set:\n"; - for (uint32 pos : rename2Set) { - MeRealOcc *occur = workCand->GetRealOcc(pos); - occur->Dump(*irMap); - mirModule->GetOut() << " with def at\n"; - occur->GetDef()->Dump(*irMap); - mirModule->GetOut() << "\n"; - } - mirModule->GetOut() << "\n"; - } -} - void McSSAPre::ApplyMCSSAPRE() { // #0 build worklist BuildWorkList(); @@ -913,6 +712,7 @@ void McSSAPre::ApplyMCSSAPRE() { } workCand = workList.front(); workCand->SetIndex(static_cast(cnt)); + workCand->applyMinCut = !(preKind == kExprPre && workCand->GetTheMeExpr()->GetMeOp() == kMeOpIvar) && cnt <= preUseProfileLimit; workList.pop_front(); if (workCand->GetRealOccs().empty()) { workCand->deletedFromWorkList = true; @@ -942,6 +742,9 @@ void McSSAPre::ApplyMCSSAPRE() { if (workCand->onlyInvariantOpnds) { mirModule->GetOut() << " onlyInvairantOpnds"; } + if (workCand->applyMinCut) { + mirModule->GetOut() << " applyMinCut"; + } mirModule->GetOut() << '\n'; } allOccs.clear(); @@ -969,24 +772,33 @@ void McSSAPre::ApplyMCSSAPRE() { Rename2(); if (!phiOccs.empty()) { // if no PHI inserted, no need to perform these steps - // #3 data flow methods - ComputeFullAvail(); - ComputePartialAnt(); - // #4 graph reduction - GraphReduction(); - // #5 single source - AddSingleSource(); - // #6 single sink - AddSingleSink(); - // step 7 max flow/min cut - FindMaxFlow(); - DetermineMinCut(); - // step 8 willbeavail - ComputeMCWillBeAvail(); + if (!workCand->applyMinCut) { + // #3 DownSafty + ComputeDS(); + // #4 WillBeAvail + ComputeCanBeAvail(); + ComputeLater(); + } else { + // #3 data flow methods + ComputeFullAvail(); + ComputePartialAnt(); + // #4 graph reduction + GraphReduction(); + // #5 single source + AddSingleSource(); + // #6 single sink + AddSingleSink(); + // step 7 max flow/min cut + FindMaxFlow(); + DetermineMinCut(); + // step 8 willbeavail + ComputeMCWillBeAvail(); + } } // #5 Finalize Finalize1(); if (workCand->Redo2HandleCritEdges()) { + workCand->applyMinCut = false; // reinitialize def field to nullptr for (MeOccur *occ : allOccs) { occ->SetDef(nullptr); @@ -997,15 +809,10 @@ void McSSAPre::ApplyMCSSAPRE() { } Rename1(); Rename2(); - ComputeFullAvail(); - ComputePartialAnt(); - GraphReduction(); - AddSingleSource(); - AddSingleSink(); + ComputeDS(); + ComputeCanBeAvail(); + ComputeLater(); Finalize1(); - FindMaxFlow(); - DetermineMinCut(); - ComputeMCWillBeAvail(); } Finalize2(); workCand->deletedFromWorkList = true; diff --git a/src/mapleall/maple_me/src/me_option.cpp b/src/mapleall/maple_me/src/me_option.cpp index 7bb4b9080f..41956f19b8 100644 --- a/src/mapleall/maple_me/src/me_option.cpp +++ b/src/mapleall/maple_me/src/me_option.cpp @@ -57,6 +57,7 @@ uint32 MeOption::delRcPULimit = UINT32_MAX; uint32 MeOption::stmtprePULimit = UINT32_MAX; uint32 MeOption::epreLimit = UINT32_MAX; uint32 MeOption::eprePULimit = UINT32_MAX; +uint32 MeOption::epreUseProfileLimit = UINT32_MAX; uint32 MeOption::lpreLimit = UINT32_MAX; uint32 MeOption::lprePULimit = UINT32_MAX; uint32 MeOption::pregRenameLimit = UINT32_MAX; @@ -304,6 +305,7 @@ bool MeOption::SolveOptions(bool isDebug) { maplecl::CopyIfEnabled(warnNativeFunc, opts::me::warnemptynative); maplecl::CopyIfEnabled(epreLimit, opts::me::eprelimit); maplecl::CopyIfEnabled(eprePULimit, opts::me::eprepulimit); + maplecl::CopyIfEnabled(epreUseProfileLimit, opts::me::epreuseprofilelimit); maplecl::CopyIfEnabled(stmtprePULimit, opts::me::stmtprepulimit); maplecl::CopyIfEnabled(lpreLimit, opts::me::lprelimit); maplecl::CopyIfEnabled(lprePULimit, opts::me::lprepulimit); diff --git a/src/mapleall/maple_me/src/me_options.cpp b/src/mapleall/maple_me/src/me_options.cpp index 4b76b19384..1b2a68ce8c 100644 --- a/src/mapleall/maple_me/src/me_options.cpp +++ b/src/mapleall/maple_me/src/me_options.cpp @@ -213,6 +213,11 @@ maplecl::Option eprepulimit({"--eprepulimit"}, " \t--eprepulimit=NUM\n", {meCategory}); +maplecl::Option epreuseprofilelimit({"--epreuseprofilelimit"}, + " --epreuseprofilelimit \tMake EPRE take advantage of profile data only for the first NUM expressions\n" + " \t--epreuseprofilelimit=NUM\n", + {meCategory}); + maplecl::Option stmtprepulimit({"--stmtprepulimit"}, " --stmtprepulimit \tApply STMTPRE optimization only for" " the first NUM PUs\n" diff --git a/src/mapleall/maple_me/src/me_ssa_epre.cpp b/src/mapleall/maple_me/src/me_ssa_epre.cpp index 81f8563c5a..dc285e63b1 100644 --- a/src/mapleall/maple_me/src/me_ssa_epre.cpp +++ b/src/mapleall/maple_me/src/me_ssa_epre.cpp @@ -112,6 +112,9 @@ bool MESSAEPre::PhaseRun(maple::MeFunction &f) { } else { ssaPre.doMinCut = true; } + if (ssaPre.doMinCut) { + ssaPre.SetPreUseProfileLimit(MeOption::epreUseProfileLimit); + } } ssaPre.SetSpillAtCatch(MeOption::spillAtCatch); if (MeOption::strengthReduction && !f.GetMIRModule().IsJavaModule()) { diff --git a/src/mapleall/maple_me/src/ssa_pre.cpp b/src/mapleall/maple_me/src/ssa_pre.cpp index ea96fa47b8..2b45f73d57 100644 --- a/src/mapleall/maple_me/src/ssa_pre.cpp +++ b/src/mapleall/maple_me/src/ssa_pre.cpp @@ -850,6 +850,7 @@ void SSAPre::Rename1() { } else { realOcc->SetDef(realTopOccur); } + realOcc->rgExcluded = true; // relevant only to mc-ssapre } else { // assign new class occ->SetClassID(classCount++); @@ -874,24 +875,24 @@ void SSAPre::Rename1() { } } } + MePhiOcc *phiTopOccur = static_cast(topOccur); if (isAllDom) { realOcc->SetClassID(topOccur->GetClassID()); realOcc->SetDef(topOccur); (void)rename2Set.insert(realOcc->GetPosition()); - occStack.push(realOcc); - if (IsLoopHeadBB(topOccur->GetBB()->GetBBId())) { + phiTopOccur->SetIsPartialAnt(true); + if (!doMinCut && IsLoopHeadBB(topOccur->GetBB()->GetBBId())) { static_cast(topOccur)->SetSpeculativeDownSafe(true); static_cast(topOccur)->SetIsDownSafe(true); } } else { - auto *phiTopOccur = static_cast(topOccur); if (!phiTopOccur->SpeculativeDownSafe()) { phiTopOccur->SetIsDownSafe(false); } // assign new class occ->SetClassID(classCount++); - occStack.push(occ); } + occStack.push(occ); } break; } @@ -1171,15 +1172,13 @@ void SSAPre::SetVarPhis(MeExpr *meExpr) { } if (scalar->IsDefByPhi()) { MePhiNode *phiMeNode = scalar->GetMePhiDef(); - if (phiMeNode->GetOpnds().size() > 1) { - BBId defBBId = phiMeNode->GetDefBB()->GetBBId(); - CHECK(defBBId < dom->GetDtDfnSize(), "defBBId.idx out of range in SSAPre::SetVarPhis"); - if (varPhiDfns.find(dom->GetDtDfnItem(defBBId)) == varPhiDfns.end() && ScreenPhiBB(defBBId)) { - (void)varPhiDfns.insert(dom->GetDtDfnItem(defBBId)); - for (auto opndIt = phiMeNode->GetOpnds().begin(); opndIt != phiMeNode->GetOpnds().end(); ++opndIt) { - ScalarMeExpr *opnd = *opndIt; - SetVarPhis(opnd); - } + BBId defBBId = phiMeNode->GetDefBB()->GetBBId(); + CHECK(defBBId < dom->GetDtDfnSize(), "defBBId.idx out of range in SSAPre::SetVarPhis"); + if (varPhiDfns.find(dom->GetDtDfnItem(defBBId)) == varPhiDfns.end() && ScreenPhiBB(defBBId)) { + (void)varPhiDfns.insert(dom->GetDtDfnItem(defBBId)); + for (auto opndIt = phiMeNode->GetOpnds().begin(); opndIt != phiMeNode->GetOpnds().end(); ++opndIt) { + ScalarMeExpr *opnd = *opndIt; + SetVarPhis(opnd); } } } -- Gitee