diff --git a/src/mapleall/maple_me/BUILD.gn b/src/mapleall/maple_me/BUILD.gn index 69444ff8ec1e1c5be3ff23de4c50d7e8c12a8b03..e464cadc530d21b8cf790823ccac6e7bf7530e37 100755 --- a/src/mapleall/maple_me/BUILD.gn +++ b/src/mapleall/maple_me/BUILD.gn @@ -129,6 +129,7 @@ src_libmplme = [ "src/me_slp.cpp", "src/lmbc_memlayout.cpp", "src/lmbc_lower.cpp", + "src/mc_ssa_pre.cpp", ] src_libmplmewpo = [ diff --git a/src/mapleall/maple_me/CMakeLists.txt b/src/mapleall/maple_me/CMakeLists.txt index 009df1738894ce2c660c30c62c04866dcb7400e3..d54ec8e38c826f9e6c8a120f133a3ce0b6c4d057 100755 --- a/src/mapleall/maple_me/CMakeLists.txt +++ b/src/mapleall/maple_me/CMakeLists.txt @@ -130,6 +130,7 @@ set(src_libmplme src/me_slp.cpp src/lmbc_memlayout.cpp src/lmbc_lower.cpp + src/mc_ssa_pre.cpp ) set(src_libmplmewpo diff --git a/src/mapleall/maple_me/include/mc_ssa_pre.h b/src/mapleall/maple_me/include/mc_ssa_pre.h new file mode 100644 index 0000000000000000000000000000000000000000..1036dfe7bdde7989369e63b1be4d70b1919243e6 --- /dev/null +++ b/src/mapleall/maple_me/include/mc_ssa_pre.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLE_ME_INCLUDE_MC_SSAPRE_H +#define MAPLE_ME_INCLUDE_MC_SSAPRE_H +#include "ssa_pre.h" + +namespace maple { + +// for representing a node in the reduced SSA graph +class RGNode { + friend class McSSAPre; + friend class Visit; + public: + RGNode(MapleAllocator *alloc, uint32 idx, MeOccur *oc) : id(idx), occ(oc), + pred(alloc->Adapter()), + inEdgesCap(alloc->Adapter()), + usedCap(alloc->Adapter()) {} + private: + uint32 id; + MeOccur *occ; + MapleVector pred; + MapleVector inEdgesCap; // capacity of incoming edges + MapleVector usedCap; // used flow value of outgoing edges +}; + +// designate a visited node and the next outgoing edge to take +class Visit { + friend class McSSAPre; + private: + Visit(RGNode *nd, uint32 idx) : node(nd), predIdx(idx) {} + RGNode *node; + uint32 predIdx; // the index in node's pred + + uint64 AvailableCapacity() const { return node->inEdgesCap[predIdx] - node->usedCap[predIdx]; } + void IncreUsedCapacity(uint64 val) { node->usedCap[predIdx] += val; } + bool operator==(const Visit *rhs) const { return node == rhs->node && predIdx == rhs->predIdx; } +}; + +// for representing a flow path from source to sink +class Route { + friend class McSSAPre; + public: + Route(MapleAllocator *alloc) : visits(alloc->Adapter()) {} + private: + MapleVector visits; + uint64 flowValue = 0; +}; + +class McSSAPre : public SSAPre { + public: + McSSAPre(IRMap &hMap, Dominance &currDom, Dominance &currPdom, MemPool &memPool, MemPool &mp2, PreKind kind, uint32 limit) : + SSAPre(hMap, currDom, currPdom, memPool, mp2, kind, limit), + occ2RGNodeMap(ssaPreAllocator.Adapter()), + maxFlowRoutes(ssaPreAllocator.Adapter()), + minCut(ssaPreAllocator.Adapter()) {} + virtual ~McSSAPre() = default; + + void ApplyMCSSAPRE(); + void SetPreUseProfileLimit(uint32 n) { preUseProfileLimit = n; } + private: + // step 8 willbeavail + void ResetMCWillBeAvail(MePhiOcc *phiOcc) const; + void ComputeMCWillBeAvail() const; + // step 7 max flow/min cut + bool AmongMinCut(RGNode *, uint32 idx) const; + void DumpRGToFile(); // dump reduced graph to dot file + bool IncludedEarlier(Visit **cut, Visit *curVisit, uint32 nextRouteIdx); + void RemoveRouteNodesFromCutSet(std::unordered_multiset &cutSet, Route *route); + bool SearchRelaxedMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, uint64 flowSoFar); + bool SearchMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, uint64 flowSoFar); + void DetermineMinCut(); + bool VisitANode(RGNode *node, Route *route, std::vector &visitedNodes); + bool FindAnotherRoute(); + void FindMaxFlow(); + // step 6 single sink + void AddSingleSink(); + // step 5 single source + void AddSingleSource(); + // step 4 graph reduction + void GraphReduction(); + // step 3 data flow methods + void SetPartialAnt(MePhiOpndOcc *phiOpnd) const; + void ComputePartialAnt() const; + void ResetFullAvail(MePhiOcc *occ) const; + void ComputeFullAvail() const; + + MapleUnorderedMap occ2RGNodeMap; + RGNode *source; + RGNode *sink; + uint32 numSourceEdges; + MapleVector maxFlowRoutes; + uint32 nextRGNodeId; + uint64 maxFlowValue; + uint64 relaxedMaxFlowValue; // relax maxFlowValue to avoid excessive mincut search time when number of routes is large + MapleVector minCut; // an array of Visits* to represent the minCut + uint32 preUseProfileLimit = UINT32_MAX; +}; + +} // namespace maple +#endif // MAPLE_ME_INCLUDE_MC_SSAPRE_H diff --git a/src/mapleall/maple_me/include/me_option.h b/src/mapleall/maple_me/include/me_option.h index 4e43e1e05ee3d0633387a685ff83c26582dc813d..3d9ca50ff40c4d7c96b97ceb0f4a00e988c8a02c 100644 --- a/src/mapleall/maple_me/include/me_option.h +++ b/src/mapleall/maple_me/include/me_option.h @@ -37,6 +37,7 @@ class MeOption { SplitPhases(str, skipPhases); } bool GetRange(const std::string &str) const; + bool GetPgoRange(const std::string &str) const; const std::unordered_set &GetSkipPhases() const { return skipPhases; @@ -85,6 +86,8 @@ class MeOption { static constexpr int kRangeArrayLen = 2; static unsigned long range[kRangeArrayLen]; static bool useRange; + static unsigned long pgoRange[kRangeArrayLen]; + static bool usePgoRange; static std::string dumpFunc; static std::string skipFrom; static std::string skipAfter; @@ -113,6 +116,7 @@ class MeOption { static uint32 stmtprePULimit; static uint32 epreLimit; static uint32 eprePULimit; + static uint32 epreUseProfileLimit; static uint32 lpreLimit; static uint32 lprePULimit; static uint32 parserOpt; @@ -185,6 +189,7 @@ class MeOption { static bool layoutWithPredict; static bool unifyRets; static bool dumpCfgOfPhases; + static bool epreUseProfile; // safety check option begin static SafetyCheckMode npeCheckMode; static bool isNpeCheckAll; diff --git a/src/mapleall/maple_me/include/me_options.h b/src/mapleall/maple_me/include/me_options.h index da127b6d3d45a753f4c16c9f54c752d5bdfdd902..a16cf27ac40930fecff7bbc12288547820db9b78 100644 --- a/src/mapleall/maple_me/include/me_options.h +++ b/src/mapleall/maple_me/include/me_options.h @@ -32,6 +32,7 @@ extern maplecl::Option os; extern maplecl::Option o3; extern maplecl::Option refusedcheck; extern maplecl::Option range; +extern maplecl::Option pgoRange; extern maplecl::Option dumpPhases; extern maplecl::Option skipPhases; extern maplecl::Option dumpFunc; @@ -57,6 +58,7 @@ extern maplecl::Option dumpAfter; extern maplecl::Option realcheckcast; extern maplecl::Option eprelimit; extern maplecl::Option eprepulimit; +extern maplecl::Option epreuseprofilelimit; extern maplecl::Option stmtprepulimit; extern maplecl::Option lprelimit; extern maplecl::Option lprepulimit; @@ -135,6 +137,7 @@ extern maplecl::Option remat; extern maplecl::Option unifyrets; extern maplecl::Option lfo; extern maplecl::Option dumpCfgOfPhases; +extern maplecl::Option epreUseProfile; } diff --git a/src/mapleall/maple_me/include/occur.h b/src/mapleall/maple_me/include/occur.h index a24c969ed01333d4df7b858116a72ded04e9fcd4..c2f5b9a4bbc51f7b5b9bea9f1ced1f9201941f89 100644 --- a/src/mapleall/maple_me/include/occur.h +++ b/src/mapleall/maple_me/include/occur.h @@ -215,6 +215,8 @@ class MeRealOcc : public MeOccur { bool isLHS; bool isFormalAtEntry; // the fake lhs occurrence at entry for formals bool isHoisted = false; // the hoisted occ used for hoisting + public: + bool rgExcluded = false; // reduced graph excluded, used only by McSSAPre }; class MeInsertedOcc : public MeOccur { @@ -275,13 +277,13 @@ class MePhiOpndOcc : public MeOccur { hasRealUse(false), isInsertedOcc(false), isPhiOpndReload(false), + isMCInsert(false), defPhiOcc(nullptr), phiOpnd4Temp(nullptr) { currentExpr.meStmt = nullptr; } ~MePhiOpndOcc() = default; - bool IsOkToInsert() const; void Dump(const IRMap &irMap) const override; bool IsProcessed() const { return isProcessed; @@ -315,6 +317,14 @@ class MePhiOpndOcc : public MeOccur { isPhiOpndReload = phiOpndReload; } + bool IsMCInsert() const { + return isMCInsert;; + } + + void SetIsMCInsert(bool mcInsert) { + isMCInsert = mcInsert; + } + const MePhiOcc *GetDefPhiOcc() const { return defPhiOcc; } @@ -356,6 +366,7 @@ class MePhiOpndOcc : public MeOccur { bool hasRealUse; bool isInsertedOcc; // the phi operand was inserted by inserted occ bool isPhiOpndReload; // if insertedocc and redefined the def, set this flag + bool isMCInsert; // used only in mc-ssapre MePhiOcc *defPhiOcc; // its lhs union { MeExpr *meExpr; // the current expression at the end of the block containing this PhiOpnd @@ -375,6 +386,8 @@ class MePhiOcc : public MeOccur { isLater(true), isExtraneous(false), isRemoved(false), + isPartialAnt(false), + isMCWillBeAvail(true), phiOpnds(alloc.Adapter()), regPhi(nullptr), varPhi(nullptr) {} @@ -408,6 +421,14 @@ class MePhiOcc : public MeOccur { isCanBeAvail = canBeAvail; } + bool IsFullyAvail() const { + return isCanBeAvail; + } + + void SetIsFullyAvail(bool fullyAvail) { + isCanBeAvail = fullyAvail; + } + bool IsLater() const { return isLater; } @@ -432,6 +453,22 @@ class MePhiOcc : public MeOccur { isRemoved = removed; } + bool IsPartialAnt() const { + return isPartialAnt; + } + + void SetIsPartialAnt(bool pant) { + isPartialAnt = pant; + } + + bool IsMCWillBeAvail() const { + return isMCWillBeAvail; + } + + void SetIsMCWillBeAvail(bool wba) { + isMCWillBeAvail = wba; + } + const MapleVector &GetPhiOpnds() const { return phiOpnds; } @@ -484,10 +521,12 @@ class MePhiOcc : public MeOccur { private: bool isDownSafe; // default is true bool speculativeDownSafe; // is downsafe due to speculation - bool isCanBeAvail; + bool isCanBeAvail; // used for fullyAvail in mc-ssapre bool isLater; bool isExtraneous; bool isRemoved; // during finalize2, marked this phiocc is removed or not + bool isPartialAnt; // used only in mc-ssapre + bool isMCWillBeAvail; // used only in mc-ssapre MapleVector phiOpnds; MePhiNode *regPhi; // the reg phi being inserted, maybe can delete it later MePhiNode *varPhi; // the Var phi being inserted, maybe can delete it later @@ -506,7 +545,8 @@ class PreWorkCand { needLocalRefVar(false), isSRCand(false), onlyInvariantOpnds(false), - deletedFromWorkList(false) { + deletedFromWorkList(false), + applyMinCut(false) { ASSERT(pIdx != 0, "PreWorkCand: initial puIdx cannot be 0"); } @@ -639,6 +679,7 @@ class PreWorkCand { bool isSRCand : 1; // is a strength reduction candidate bool onlyInvariantOpnds : 1; // all operands have only 1 SSA version bool deletedFromWorkList : 1; // processed by SSAPRE already + bool applyMinCut : 1; // if using mc-ssapre for this candidate }; class PreStmtWorkCand : public PreWorkCand { diff --git a/src/mapleall/maple_me/include/ssa_epre.h b/src/mapleall/maple_me/include/ssa_epre.h index 0df35a90a787eb387d6be84decf1ec3a624f686c..2101208feaab6b8844073b7e9e20fbe76baa5871 100644 --- a/src/mapleall/maple_me/include/ssa_epre.h +++ b/src/mapleall/maple_me/include/ssa_epre.h @@ -14,14 +14,14 @@ */ #ifndef MAPLE_ME_INCLUDE_SSAEPRE_H #define MAPLE_ME_INCLUDE_SSAEPRE_H -#include "ssa_pre.h" +#include "mc_ssa_pre.h" namespace maple { -class SSAEPre : public SSAPre { +class SSAEPre : public McSSAPre { public: SSAEPre(IRMap &map, Dominance &dom, Dominance &pdom, MemPool &memPool, MemPool &mp2, PreKind kind, uint32 limit, bool includeRef, bool lhsIvar) - : SSAPre(map, dom, pdom, memPool, mp2, kind, limit), epreIncludeRef(includeRef), enableLHSIvar(lhsIvar) {} + : McSSAPre(map, dom, pdom, memPool, mp2, kind, limit), epreIncludeRef(includeRef), enableLHSIvar(lhsIvar) {} virtual ~SSAEPre() = default; diff --git a/src/mapleall/maple_me/include/ssa_pre.h b/src/mapleall/maple_me/include/ssa_pre.h index 9e782bca2749f91ef5da87934317a04d8aef2dcc..6f3b50e6283ff4fffeebdff1ae530daaaac72468 100644 --- a/src/mapleall/maple_me/include/ssa_pre.h +++ b/src/mapleall/maple_me/include/ssa_pre.h @@ -139,6 +139,7 @@ class SSAPre { bool strengthReduction = false; bool doLFTR = false; + bool doMinCut = false; protected: // step 6 codemotion methods @@ -156,6 +157,13 @@ class SSAPre { } virtual void CodeMotion(); // step 5 Finalize methods + bool WillBeAvail(MePhiOcc *phiOcc) { + if (!workCand->applyMinCut) { + return phiOcc->IsWillBeAvail(); + } + return phiOcc->IsMCWillBeAvail(); + } + bool OKToInsert(MePhiOpndOcc *phiOpnd); virtual void Finalize1(); void SetSave(MeOccur &defX); void SetReplacement(MePhiOcc &occ, MeOccur &repDef); @@ -285,7 +293,6 @@ class SSAPre { uint32 strIdxCount = 0; // ssapre will create a lot of temp variables if using var to store redundances, start from 0 PreWorkCandHashTable preWorkCandHashTable; - private: virtual void DoSSAFRE() {}; bool enableDebug = false; diff --git a/src/mapleall/maple_me/src/mc_ssa_pre.cpp b/src/mapleall/maple_me/src/mc_ssa_pre.cpp new file mode 100644 index 0000000000000000000000000000000000000000..55f3389a7812527d987ca789883edef49b8cd711 --- /dev/null +++ b/src/mapleall/maple_me/src/mc_ssa_pre.cpp @@ -0,0 +1,830 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include +#include +#include +#include "mc_ssa_pre.h" +#include "dominance.h" +#include "mir_builder.h" + +// Implementation of the MC-SSAPRE algorithm based on the PLDI 2011 paper: +// An SSA-based Algorithm for Optimal Speculative Code Motion Under an Execution Profile +// by Hucheng Zhou, Wenguang Chen and Fred Chow + +namespace { +constexpr int kFuncNameLenLimit = 80; +} + +namespace maple { + +// ================ Step 8: WillBeAvail ================= + +void McSSAPre::ResetMCWillBeAvail(MePhiOcc *occ) const { + if (!occ->IsMCWillBeAvail()) { + return; + } + occ->SetIsMCWillBeAvail(false); + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + if (!phiOcc->IsMCWillBeAvail()) { + continue; + } + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (phiOpnd->GetDef() != nullptr && phiOpnd->GetDef() == occ) { + // phiOpnd is a use of occ + if (!phiOpnd->HasRealUse() && !phiOpnd->IsMCInsert()) { + ResetMCWillBeAvail(phiOcc); + break; + } + } + } + } +} + +void McSSAPre::ComputeMCWillBeAvail() const { + if (minCut.size() == 0) { + for (MePhiOcc *phiOcc : phiOccs) { + phiOcc->SetIsMCWillBeAvail(false); + } + return; + } + // set insert in phi operands + for (Visit *visit : minCut) { + MeOccur *occ = visit->node->occ; + if (occ->GetOccType() == kOccPhiocc) { + MePhiOcc *phiOcc = static_cast(occ); + MePhiOpndOcc *phiOpndOcc = phiOcc->GetPhiOpnd(visit->predIdx); + phiOpndOcc->SetIsMCInsert(true); + } + } + for (MePhiOcc *phiOcc : phiOccs) { + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (phiOpnd->GetDef() == nullptr && !phiOpnd->IsMCInsert()) { + ResetMCWillBeAvail(phiOcc); + break; + } + } + } +} + +// ================ Step 7: Max Flow / Min Cut ================= + +bool McSSAPre::AmongMinCut(RGNode *nd, uint32 idx) const { + for (Visit *visit : minCut) { + if (visit->node == nd && visit->predIdx == idx) { + return true; + } + } + return false; +} + +void McSSAPre::DumpRGToFile() { + if (sink == nullptr) { + return; + } + std::string fileName = "rg-of-cand-"; + fileName.append(std::to_string(workCand->GetIndex())); + fileName.append("-"); + const std::string &funcName = mirModule->CurFunction()->GetName(); + if (funcName.size() < kFuncNameLenLimit) { + fileName.append(funcName); + } else { + fileName.append(funcName.c_str(), kFuncNameLenLimit); + } + fileName.append(".dot"); + std::ofstream rgFile; + std::streambuf *coutBuf = LogInfo::MapleLogger().rdbuf(); // keep original cout buffer + std::streambuf *buf = rgFile.rdbuf(); + LogInfo::MapleLogger().rdbuf(buf); + rgFile.open(fileName, std::ios::trunc); + rgFile << "digraph {\n"; + for (int32 i = 0; i < sink->pred.size(); i++) { + RGNode *pre = sink->pred[i]; + rgFile << "real" << pre->id << " -> " << "\"sink\nmaxflow " << maxFlowValue << "\";\n"; + } + MapleUnorderedMap::iterator it = occ2RGNodeMap.begin(); + for (; it != occ2RGNodeMap.end(); it++) { + RGNode *rgNode = it->second; + for (int32 i = 0; i < rgNode->pred.size(); i++) { + RGNode *pre = rgNode->pred[i]; + if (pre != source) { + if (pre->occ->GetOccType() == kOccPhiocc) { + rgFile << "phi" << pre->id << " -> "; + } else { + rgFile << "real" << pre->id << " -> "; + } + if (rgNode->occ->GetOccType() == kOccPhiocc) { + rgFile << "phi" << rgNode->id; + } else { + rgFile << "real" << rgNode->id; + } + } else { + rgFile << "source" << " -> " << "phi" << rgNode->id; + } + if (AmongMinCut(rgNode, i)) { + rgFile << "[style=dotted][color=red]"; + } + if (rgNode->usedCap[i] == 0) { + rgFile << "[style=dashed][color=green]"; + } + rgFile << "[label=\"" << rgNode->usedCap[i] << "|" << rgNode->inEdgesCap[i] << "\"];\n"; + } + } + rgFile << "}\n"; + rgFile.flush(); + rgFile.close(); + LogInfo::MapleLogger().rdbuf(coutBuf); + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() << " dumped to " << fileName << "\n"; +} + +bool McSSAPre::IncludedEarlier(Visit **cut, Visit *curVisit, uint32 nextRouteIdx) { + uint32 i = nextRouteIdx; + while (i != 0) { + i--; + if (cut[i]->node == curVisit->node && cut[i]->predIdx == curVisit->predIdx) { + return true; + } + } + return false; +} + +// remove this route's nodes from cutSet +void McSSAPre::RemoveRouteNodesFromCutSet(std::unordered_multiset &cutSet, Route *route) { + for (uint32 i = 1; i < route->visits.size(); i++) { + Visit &curVisit = route->visits[i]; + std::unordered_multiset::iterator it = cutSet.find(curVisit.node->id); + ASSERT(it != cutSet.end(), "cutSet maintenance error"); + cutSet.erase(it); + } +} + +// find the cut closest to the sink whose total flow is relaxedMaxFlowValue +bool McSSAPre::SearchRelaxedMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, uint64 flowSoFar) { + Route *curRoute = maxFlowRoutes[nextRouteIdx]; + Visit *curVisit = nullptr; + + // determine starting value of visitIdx: start searching back from route end; + // if any node is in cutSet, set visitIdx as that nodes's index in route; + // otherwise, set visitIdx to 0 + uint32 visitIdx = curRoute->visits.size(); + do { + visitIdx--; + if (cutSet.count(curRoute->visits[visitIdx].node->id) != 0) { + break; + } + } while (visitIdx != 1); + // update cutSet with visited nodes lower than visitIdx + if (visitIdx != 1) { + for (uint i = visitIdx - 1; i > 0; i--) { + cutSet.insert(curRoute->visits[i].node->id); + } + } + + bool success = false; + do { + if (visitIdx == curRoute->visits.size()) { + RemoveRouteNodesFromCutSet(cutSet, curRoute); + return false; + } + curVisit = &curRoute->visits[visitIdx]; + uint64 visitCap = curVisit->node->inEdgesCap[curVisit->predIdx]; + cut[nextRouteIdx] = curVisit; + if (visitIdx != 0) { + cutSet.insert(curVisit->node->id); + } + if (IncludedEarlier(cut, curVisit, nextRouteIdx)) { + visitCap = 0; + } + success = (flowSoFar + visitCap <= relaxedMaxFlowValue); + if (success && nextRouteIdx != (maxFlowRoutes.size() - 1)) { + success = SearchRelaxedMinCut(cut, cutSet, nextRouteIdx+1, flowSoFar + visitCap); + } + visitIdx++; + } while (!success); + return true; +} + +// find the cut closest to the sink whose total flow is maxFlowValue +bool McSSAPre::SearchMinCut(Visit **cut, std::unordered_multiset &cutSet, uint32 nextRouteIdx, uint64 flowSoFar) { + Route *curRoute = maxFlowRoutes[nextRouteIdx]; + Visit *curVisit = nullptr; + + // determine starting value of visitIdx: start searching back from route end; + // if any node is in cutSet, set visitIdx as that nodes's index in route; + // otherwise, set visitIdx to 0 + uint32 visitIdx = curRoute->visits.size(); + do { + visitIdx--; + if (cutSet.count(curRoute->visits[visitIdx].node->id) != 0) { + break; + } + } while (visitIdx != 1); + // update cutSet with visited nodes lower than visitIdx + if (visitIdx != 1) { + for (uint i = visitIdx - 1; i > 0; i--) { + cutSet.insert(curRoute->visits[i].node->id); + } + } + + bool success = false; + do { + if (visitIdx == curRoute->visits.size()) { + RemoveRouteNodesFromCutSet(cutSet, curRoute); + return false; + } + curVisit = &curRoute->visits[visitIdx]; + uint64 visitCap = curVisit->node->inEdgesCap[curVisit->predIdx]; + uint64 usedCap = curVisit->node->usedCap[curVisit->predIdx]; + if (visitCap != usedCap) { + if (visitIdx != 0) { + cutSet.insert(curVisit->node->id); + } + visitIdx++; + continue; + } + cut[nextRouteIdx] = curVisit; + if (visitIdx != 0) { + cutSet.insert(curVisit->node->id); + } + if (IncludedEarlier(cut, curVisit, nextRouteIdx)) { + visitCap = 0; + } + success = (flowSoFar + visitCap <= maxFlowValue); + if (success && nextRouteIdx != (maxFlowRoutes.size() - 1)) { + success = SearchMinCut(cut, cutSet, nextRouteIdx+1, flowSoFar + visitCap); + } + visitIdx++; + } while (!success); + return true; +} + +void McSSAPre::DetermineMinCut() { + if (maxFlowRoutes.empty()) { + if (GetSSAPreDebug()) { + DumpRGToFile(); + } + return; + } + // maximum width of the min cut is the number of routes in maxFlowRoutes + Visit* cut[maxFlowRoutes.size()]; + std::unordered_multiset cutSet; // key is RGNode's id; must be kept in sync with cut[]; sink node is not entered + constexpr double defaultRelaxScaling = 1.25; + relaxedMaxFlowValue = static_cast(static_cast(maxFlowValue) * defaultRelaxScaling); + bool relaxedSearch = false; + if (maxFlowRoutes.size() >= 20) { + // apply arbitrary heuristics to reduce search time + relaxedSearch = true; + relaxedMaxFlowValue = maxFlowValue * (maxFlowRoutes.size() / 10); + } + bool success = !relaxedSearch && SearchMinCut(cut, cutSet, 0, 0); + if (!success) { + relaxedSearch = true; + success = SearchRelaxedMinCut(cut, cutSet, 0, 0); + } + if (!success) { + if (GetSSAPreDebug()) { + mirModule->GetOut() << "MinCut failed\n"; + DumpRGToFile(); + } + CHECK_FATAL(false, "McSSAPre::DetermineMinCut: failed to find min cut"); + } + // sort cut + std::sort(cut, cut+maxFlowRoutes.size(), [](const Visit *left, const Visit *right) { + return (left->node != right->node) ? (left->node->id < right->node->id) + : (left->predIdx < right->predIdx); }); + // remove duplicates in the cut to form mincut + minCut.push_back(cut[0]); + size_t duplicatedVisits = 0; + for (uint32 i = 1; i < maxFlowRoutes.size(); i++) { + if (cut[i] != cut[i-1]) { + minCut.push_back(cut[i]); + } else { + duplicatedVisits++; + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "finished "; + if (relaxedSearch) { + mirModule->GetOut() << "relaxed "; + } + mirModule->GetOut() << "MinCut\n"; + DumpRGToFile(); + if (duplicatedVisits != 0) { + mirModule->GetOut() << duplicatedVisits << " duplicated visits in mincut\n"; + } + } +} + +bool McSSAPre::VisitANode(RGNode *node, Route *route, std::vector &visitedNodes) { + ASSERT(node->pred.size() != 0 , "McSSAPre::VisitANode: no connection to source node"); + // if any pred is the source and there's capacity to reach it, return success + for (uint32 i = 0; i < node->pred.size(); i++) { + if (node->pred[i] == source && node->inEdgesCap[i] > node->usedCap[i]) { + // if there is another pred never taken that also reaches source, use that instead + for (uint32 k = i + 1; k < node->pred.size(); k++) { + if (node->pred[k] == source && node->usedCap[k] == 0 && node->inEdgesCap[k] > 0) { + route->visits.push_back(Visit(node, k)); + return true; + } + } + route->visits.push_back(Visit(node, i)); + return true; + } + } + + // pick an never-taken predecessor path first + for (uint32 i = 0; i < node->pred.size(); i++) { + if (node->usedCap[i] == 0 && node->inEdgesCap[i] > 0 && !visitedNodes[node->pred[i]->id]) { + route->visits.push_back(Visit(node, i)); + visitedNodes[node->pred[i]->id] = true; + bool success = VisitANode(node->pred[i], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + return true; + } + } + } + + size_t numPreds = node->pred.size(); + uint32 sortedPred[numPreds]; + for (uint32 i = 0; i < numPreds; i++) { + sortedPred[i] = i; + } + // put sortedPred[] in increasing order of capacities + std::sort(sortedPred, sortedPred+numPreds, [node](uint32 m, uint32 n) { + return node->inEdgesCap[m] < node->inEdgesCap[n]; }); + // for this round, prefer predecessor with higher unused capacity + for (uint32 i = 0; i < numPreds; i++) { + uint32 j = sortedPred[i]; + if (!visitedNodes[node->pred[j]->id] && node->inEdgesCap[j] > node->usedCap[j]) { + route->visits.push_back(Visit(node, j)); + visitedNodes[node->pred[j]->id] = true; + bool success = VisitANode(node->pred[j], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + return true; + } + } + } + return false; +} + +// return false if not successful; if successful, the new route will be pushed +// to maxFlowRoutes +bool McSSAPre::FindAnotherRoute() { + std::vector visitedNodes(occ2RGNodeMap.size() + 1, false); + Route *route = perCandMemPool->New(&perCandAllocator); + bool success = false; + // pick an untaken sink predecessor first + for (int32 i = 0; i < sink->pred.size(); i++) { + if (sink->usedCap[i] == 0) { + route->visits.push_back(Visit(sink, i)); + visitedNodes[sink->pred[i]->id] = true; + success = VisitANode(sink->pred[i], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + break; + } + } + } + if (!success) { + // now, pick any sink predecessor + for (int32 i = 0; i < sink->pred.size(); i++) { + route->visits.push_back(Visit(sink, i)); + visitedNodes[sink->pred[i]->id] = true; + success = VisitANode(sink->pred[i], route, visitedNodes); + if (!success) { + route->visits.pop_back(); + } else { + break; + } + } + } + if (!success) { + return false; + } + // find bottleneck capacity along route + uint64 minAvailCap = route->visits[0].AvailableCapacity(); + for (int32 i = 1; i < route->visits.size(); i++) { + uint64 curAvailCap = route->visits[i].AvailableCapacity(); + minAvailCap = std::min(minAvailCap, curAvailCap); + } + route->flowValue = minAvailCap; + // update usedCap along route + for (int32 i = 0; i < route->visits.size(); i++) { + route->visits[i].IncreUsedCapacity(minAvailCap); + } + maxFlowRoutes.push_back(route); + return true; +} + +void McSSAPre::FindMaxFlow() { + if (sink == nullptr) { + return; + } + maxFlowValue = 0; + bool found; + do { + found = FindAnotherRoute(); + } while (found); + // calculate maxFlowValue; + for (Route *route : maxFlowRoutes) { + maxFlowValue += route->flowValue; + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() + << ": FindMaxFlow found " << maxFlowRoutes.size() << " routes\n"; + for (size_t i = 0; i < maxFlowRoutes.size(); i++) { + Route *route = maxFlowRoutes[i]; + mirModule->GetOut() << "route " << i << " sink:pred" << route->visits[0].predIdx; + for (size_t j = 1; j < route->visits.size(); j++) { + if (route->visits[j].node->occ->GetOccType() == kOccPhiocc) { + mirModule->GetOut() << " phi"; + } else { + mirModule->GetOut() << " real"; + } + mirModule->GetOut() << route->visits[j].node->id << ":pred" << route->visits[j].predIdx; + } + mirModule->GetOut() << " flowValue " << route->flowValue; + mirModule->GetOut() << "\n"; + } + mirModule->GetOut() << "maxFlowValue is " << maxFlowValue << "\n"; + } +} + +// ================ Step 6: Add Single Sink ================= + +void McSSAPre::AddSingleSink() { + if (numSourceEdges == 0) { + return; // empty reduced graph + } + sink = perCandMemPool->New(&perCandAllocator, nextRGNodeId++, nullptr); + size_t numToSink = 0; + MapleUnorderedMap::iterator it = occ2RGNodeMap.begin(); + for (; it != occ2RGNodeMap.end(); it++) { + if (it->first->GetOccType() != kOccReal) { + continue; + } + RGNode *use = it->second; + // add edge from this use node to sink + sink->pred.push_back(use); + sink->inEdgesCap.push_back(UINT64_MAX); + sink->usedCap.push_back(0); + numToSink++; + } + ASSERT(numToSink != 0, "McSSAPre::AddSingleSink: found 0 edge to sink"); + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() << " has " << numToSink << " edges to sink\n"; + } +} + +// ================ Step 5: Add Single Source ================= +void McSSAPre::AddSingleSource() { + source = perCandMemPool->New(&perCandAllocator, nextRGNodeId++, nullptr); + for (MePhiOcc *phiOcc : phiOccs) { + if (phiOcc->IsPartialAnt() && !phiOcc->IsFullyAvail()) { + // look for null operands + for (int32 i = 0; i < phiOcc->GetPhiOpnds().size(); i++) { + MePhiOpndOcc *phiopndOcc = phiOcc->GetPhiOpnd(i); + if (phiopndOcc->GetDef() != nullptr) { + continue; + } + // add edge from source to this phi node + RGNode *sucNode = occ2RGNodeMap[phiOcc]; + sucNode->pred.push_back(source); + sucNode->inEdgesCap.push_back(phiOcc->GetBB()->GetPred(i)->GetFrequency()+1); + sucNode->usedCap.push_back(0); + numSourceEdges++; + } + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex(); + if (numSourceEdges == 0) { + mirModule->GetOut() << " has empty reduced graph\n"; + } else { + mirModule->GetOut() << " source has " << numSourceEdges << " succs\n"; + } + } +} + +// ================ Step 4: Graph Reduction ================= +void McSSAPre::GraphReduction() { + size_t numPhis = 0; + size_t numRealOccs = 0; + size_t numType1Edges = 0; + size_t numType2Edges = 0; + // add def nodes + for (MePhiOcc *phiOcc : phiOccs) { + if (phiOcc->IsPartialAnt() && !phiOcc->IsFullyAvail()) { + RGNode *newRGNode = perCandMemPool->New(&perCandAllocator, nextRGNodeId++, phiOcc); + occ2RGNodeMap.insert(std::pair(phiOcc, newRGNode)); + numPhis++; + } + } + if (occ2RGNodeMap.empty()) { + return; + } + // add use nodes and use-def edges + for (MeOccur *occ : allOccs) { + if (occ->GetOccType() == kOccReal) { + MeRealOcc *realOcc = static_cast(occ); + if (!realOcc->rgExcluded && realOcc->GetDef() != nullptr) { + MeOccur *defOcc = realOcc->GetDef(); + ASSERT(defOcc->GetOccType() == kOccPhiocc, "McSSAPre::GraphReduction: real occ not defined by phi"); + if (occ2RGNodeMap.find(defOcc) != occ2RGNodeMap.end()) { + RGNode *use = perCandMemPool->New(&perCandAllocator, nextRGNodeId++, realOcc); + occ2RGNodeMap[realOcc] = use; + numRealOccs++; + RGNode *def = occ2RGNodeMap[defOcc]; + use->pred.push_back(def); + use->inEdgesCap.push_back(realOcc->GetBB()->GetFrequency()+1); + use->usedCap.push_back(0); + numType2Edges++; + } + } + } else if (occ->GetOccType() == kOccPhiopnd) { + MePhiOpndOcc *phiopndOcc = static_cast(occ); + MePhiOcc *defPhiOcc = phiopndOcc->GetDefPhiOcc(); + if (defPhiOcc->IsPartialAnt() && !defPhiOcc->IsFullyAvail()) { + // defPhiOcc is the use node and it has already been added + MeOccur *defOcc = phiopndOcc->GetDef(); + if (defOcc != nullptr && defOcc->GetOccType() == kOccPhiocc && + static_cast(defOcc)->IsPartialAnt() && + !static_cast(defOcc)->IsFullyAvail()) { + ASSERT(occ2RGNodeMap.find(defOcc) != occ2RGNodeMap.end(), "McSSAPre::GraphReduction: def node not found"); + RGNode *def = occ2RGNodeMap[defOcc]; + RGNode *use = occ2RGNodeMap[defPhiOcc]; + use->pred.push_back(def); + // find the index of phiopndOcc in defPhiOcc's phiOpnds + uint32 i; + for (i = 0; i < defPhiOcc->GetPhiOpnds().size(); i++) { + if (defPhiOcc->GetPhiOpnd(i) == phiopndOcc) { + break; + } + } + ASSERT(i != defPhiOcc->GetPhiOpnds().size(), "McSSAPre::GraphReduction: cannot find corresponding phi opnd"); + use->inEdgesCap.push_back(defPhiOcc->GetBB()->GetPred(i)->GetFrequency()+1); + use->usedCap.push_back(0); + numType1Edges++; + } + } + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() + << " after GraphReduction, phis: " << numPhis << " reals: " << numRealOccs + << " type 1 edges: " << numType1Edges << " type 2 edges: " << numType2Edges << "\n"; + } +} + +// ================ Step 3: Data Flow Computations ================= + +// set partial anticipation +void McSSAPre::SetPartialAnt(MePhiOpndOcc *phiOpnd) const { + MeOccur *defOcc = phiOpnd->GetDef(); + if (defOcc == nullptr || defOcc->GetOccType() != kOccPhiocc) { + return; + } + auto *defPhiOcc = static_cast(defOcc); + if (defPhiOcc->IsPartialAnt()) { + return; + } + defPhiOcc->SetIsPartialAnt(true); + for (MePhiOpndOcc *mePhiOpnd : defPhiOcc->GetPhiOpnds()) { + SetPartialAnt(mePhiOpnd); + } +} + +// compute partial anticipation for each PHI +void McSSAPre::ComputePartialAnt() const { + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + if (phiOcc->IsPartialAnt()) { + // propagate partialAnt along use-def edges + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + SetPartialAnt(phiOpnd); + } + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() + << " after PartialAnt\n"; + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + phiOcc->Dump(*irMap); + if (phiOcc->IsPartialAnt()) { + mirModule->GetOut() << " is partialant\n"; + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (!phiOpnd->IsProcessed()) { + phiOpnd->Dump(*irMap); + mirModule->GetOut() << " has not been processed by Rename2\n"; + } + } + } else { + mirModule->GetOut() << " is not partialant\n"; + } + } + } +} + +void McSSAPre::ResetFullAvail(MePhiOcc *occ) const { + if (!occ->IsFullyAvail()) { + return; + } + occ->SetIsFullyAvail(false); + // reset those phiocc nodes that have occ as one of its operands + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + if (!phiOcc->IsFullyAvail()) { + continue; + } + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (phiOpnd->GetDef() != nullptr && phiOpnd->GetDef() == occ) { + // phiOpnd is a use of occ + if (!phiOpnd->HasRealUse()) { + ResetFullAvail(phiOcc); + break; + } + } + } + } +} + +// the fullyavail attribute is stored in the isCanBeAvail field +void McSSAPre::ComputeFullAvail() const { + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + // reset fullyavail if any phi operand is null + bool existNullDef = false; + for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { + if (phiOpnd->GetDef() == nullptr) { + existNullDef = true; + break; + } + } + if (existNullDef) { + ResetFullAvail(phiOcc); + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "++++ ssapre candidate " << workCand->GetIndex() + << " after FullyAvailable\n"; + for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { + MePhiOcc *phiOcc = *it; + phiOcc->Dump(*irMap); + if (phiOcc->IsFullyAvail()) { + mirModule->GetOut() << " is fullyavail\n"; + } else { + mirModule->GetOut() << " is not fullyavail\n"; + } + } + } +} + +void McSSAPre::ApplyMCSSAPRE() { + // #0 build worklist + BuildWorkList(); + if (GetSSAPreDebug()) { + mirModule->GetOut() << " worklist initial size " << workList.size() << '\n'; + } + ConstructUseOccurMap(); + uint32 cnt = 0; + while (!workList.empty()) { + ++cnt; + if (cnt > preLimit) { + break; + } + workCand = workList.front(); + workCand->SetIndex(static_cast(cnt)); + workCand->applyMinCut = !(preKind == kExprPre && workCand->GetTheMeExpr()->GetMeOp() == kMeOpIvar) && cnt <= preUseProfileLimit; + workList.pop_front(); + if (workCand->GetRealOccs().empty()) { + workCand->deletedFromWorkList = true; + continue; + } + if ((preKind == kExprPre && workCand->GetTheMeExpr()->GetMeOp() == kMeOpIvar) || (preKind == kLoadPre)) { + // if only LHS real occur, skip this candidate + bool hasNonLHS = false; + for (MeRealOcc *realOcc : workCand->GetRealOccs()) { + if (realOcc->GetOccType() == kOccReal && !realOcc->IsLHS()) { + hasNonLHS = true; + break; + } + } + if (!hasNonLHS) { + workCand->deletedFromWorkList = true; + continue; + } + } + if (GetSSAPreDebug()) { + mirModule->GetOut() << "||||||| MC-SSAPRE candidate " << cnt << " at worklist index " + << workCand->GetIndex() << ": "; + workCand->DumpCand(*irMap); + if (workCand->isSRCand) { + mirModule->GetOut() << " srCand"; + } + if (workCand->onlyInvariantOpnds) { + mirModule->GetOut() << " onlyInvairantOpnds"; + } + if (workCand->applyMinCut) { + mirModule->GetOut() << " applyMinCut"; + } + mirModule->GetOut() << '\n'; + } + allOccs.clear(); + phiOccs.clear(); + nextRGNodeId = 1; + occ2RGNodeMap.clear(); + numSourceEdges = 0; + maxFlowRoutes.clear(); + minCut.clear(); + source = nullptr; + sink = nullptr; + // #1 Insert PHI; results in allOccs and phiOccs + ComputeVarAndDfPhis(); + CreateSortedOccs(); + if (workCand->GetRealOccs().empty()) { + workCand->deletedFromWorkList = true; + continue; + } + // set the position field in the MeRealOcc nodes + for (size_t j = 0; j < workCand->GetRealOccs().size(); j++) { + workCand->GetRealOcc(j)->SetPosition(j); + } + // #2 Rename + Rename1(); + Rename2(); + if (!phiOccs.empty()) { + // if no PHI inserted, no need to perform these steps + if (!workCand->applyMinCut) { + // #3 DownSafty + ComputeDS(); + // #4 WillBeAvail + ComputeCanBeAvail(); + ComputeLater(); + } else { + // #3 data flow methods + ComputeFullAvail(); + ComputePartialAnt(); + // #4 graph reduction + GraphReduction(); + // #5 single source + AddSingleSource(); + // #6 single sink + AddSingleSink(); + // step 7 max flow/min cut + FindMaxFlow(); + DetermineMinCut(); + // step 8 willbeavail + ComputeMCWillBeAvail(); + } + } + // #5 Finalize + Finalize1(); + if (workCand->Redo2HandleCritEdges()) { + workCand->applyMinCut = false; + // reinitialize def field to nullptr + for (MeOccur *occ : allOccs) { + occ->SetDef(nullptr); + if (occ->GetOccType() == kOccPhiopnd) { + auto *phiOpndOcc = static_cast(occ); + phiOpndOcc->SetIsProcessed(false); + } + } + Rename1(); + Rename2(); + ComputeDS(); + ComputeCanBeAvail(); + ComputeLater(); + Finalize1(); + } + Finalize2(); + workCand->deletedFromWorkList = true; + // #6 CodeMotion and recompute worklist based on newly occurrence + CodeMotion(); + if (preKind == kStmtPre && (workCand->GetRealOccs().front()->GetOpcodeOfMeStmt() == OP_dassign || + workCand->GetRealOccs().front()->GetOpcodeOfMeStmt() == OP_callassigned)) { + // apply full redundancy elimination + DoSSAFRE(); + } + perCandMemPool->ReleaseContainingMem(); + } +} + +} // namespace maple diff --git a/src/mapleall/maple_me/src/me_option.cpp b/src/mapleall/maple_me/src/me_option.cpp index d062458a0d0cadb7fd11280ee95254e4bd9679f4..41956f19b88ed370cd67d0a84f13f9a4badb5c84 100644 --- a/src/mapleall/maple_me/src/me_option.cpp +++ b/src/mapleall/maple_me/src/me_option.cpp @@ -28,7 +28,9 @@ bool MeOption::isBigEndian = false; bool MeOption::dumpAfter = false; std::string MeOption::dumpFunc = "*"; unsigned long MeOption::range[kRangeArrayLen] = { 0, 0 }; +unsigned long MeOption::pgoRange[kRangeArrayLen] = { 0, 0 }; bool MeOption::useRange = false; +bool MeOption::usePgoRange = false; bool MeOption::quiet = false; bool MeOption::setCalleeHasSideEffect = false; bool MeOption::unionBasedAA = true; @@ -55,6 +57,7 @@ uint32 MeOption::delRcPULimit = UINT32_MAX; uint32 MeOption::stmtprePULimit = UINT32_MAX; uint32 MeOption::epreLimit = UINT32_MAX; uint32 MeOption::eprePULimit = UINT32_MAX; +uint32 MeOption::epreUseProfileLimit = UINT32_MAX; uint32 MeOption::lpreLimit = UINT32_MAX; uint32 MeOption::lprePULimit = UINT32_MAX; uint32 MeOption::pregRenameLimit = UINT32_MAX; @@ -132,6 +135,7 @@ SafetyCheckMode MeOption::boundaryCheckMode = SafetyCheckMode::kNoCheck; bool MeOption::safeRegionMode = false; bool MeOption::unifyRets = false; bool MeOption::dumpCfgOfPhases = false; +bool MeOption::epreUseProfile = true; #if MIR_JAVA std::string MeOption::acquireFuncName = "Landroid/location/LocationManager;|requestLocationUpdates|"; std::string MeOption::releaseFuncName = "Landroid/location/LocationManager;|removeUpdates|"; @@ -195,6 +199,14 @@ bool MeOption::SolveOptions(bool isDebug) { } } + if (opts::me::pgoRange.IsEnabledByUser()) { + usePgoRange = true; + bool ret = GetPgoRange(opts::me::pgoRange); + if (!ret) { + return ret; + } + } + maplecl::CopyIfEnabled(dumpBefore, opts::me::dumpBefore); maplecl::CopyIfEnabled(dumpAfter, opts::me::dumpAfter); @@ -293,6 +305,7 @@ bool MeOption::SolveOptions(bool isDebug) { maplecl::CopyIfEnabled(warnNativeFunc, opts::me::warnemptynative); maplecl::CopyIfEnabled(epreLimit, opts::me::eprelimit); maplecl::CopyIfEnabled(eprePULimit, opts::me::eprepulimit); + maplecl::CopyIfEnabled(epreUseProfileLimit, opts::me::epreuseprofilelimit); maplecl::CopyIfEnabled(stmtprePULimit, opts::me::stmtprepulimit); maplecl::CopyIfEnabled(lpreLimit, opts::me::lprelimit); maplecl::CopyIfEnabled(lprePULimit, opts::me::lprepulimit); @@ -315,6 +328,7 @@ bool MeOption::SolveOptions(bool isDebug) { maplecl::CopyIfEnabled(lessThrowAlias, opts::me::lessthrowalias); maplecl::CopyIfEnabled(propBase, opts::me::propbase); maplecl::CopyIfEnabled(dumpCfgOfPhases, opts::me::dumpCfgOfPhases); + maplecl::CopyIfEnabled(epreUseProfile, opts::me::epreUseProfile); if (opts::me::propiloadref.IsEnabledByUser()) { propIloadRef = opts::me::propiloadref; @@ -484,6 +498,20 @@ bool MeOption::GetRange(const std::string &str) const { return true; } +bool MeOption::GetPgoRange(const std::string &str) const { + std::string s{ str }; + size_t comma = s.find_first_of(",", 0); + if (comma != std::string::npos) { + pgoRange[0] = std::stoul(s.substr(0, comma), nullptr); + pgoRange[1] = std::stoul(s.substr(comma + 1, std::string::npos - (comma + 1)), nullptr); + } + if (pgoRange[0] > pgoRange[1]) { + LogInfo::MapleLogger(kLlErr) << "invalid values for --pgorange=" << pgoRange[0] << "," << pgoRange[1] << '\n'; + return false; + } + return true; +} + bool MeOption::DumpPhase(const std::string &phase) { if (phase == "") { return false; diff --git a/src/mapleall/maple_me/src/me_options.cpp b/src/mapleall/maple_me/src/me_options.cpp index 79adab5f52fb8c5344b73ff209a164139213328f..1b2a68ce8cd406b3ca51047571a82f92e686c2e1 100644 --- a/src/mapleall/maple_me/src/me_options.cpp +++ b/src/mapleall/maple_me/src/me_options.cpp @@ -51,6 +51,11 @@ maplecl::Option range({"--range"}, " \t--range=NUM0,NUM1\n", {meCategory}); +maplecl::Option pgoRange({"--pgorange"}, + " --pglrange \tUse profile-guided optimizations only for funcid in the range [NUM0, NUM1]\n" + " \t--pgorange=NUM0,NUM1\n", + {meCategory}); + maplecl::Option dumpPhases({"--dump-phases"}, " --dump-phases \tEnable debug trace for specified phases" " in the comma separated list\n" @@ -208,6 +213,11 @@ maplecl::Option eprepulimit({"--eprepulimit"}, " \t--eprepulimit=NUM\n", {meCategory}); +maplecl::Option epreuseprofilelimit({"--epreuseprofilelimit"}, + " --epreuseprofilelimit \tMake EPRE take advantage of profile data only for the first NUM expressions\n" + " \t--epreuseprofilelimit=NUM\n", + {meCategory}); + maplecl::Option stmtprepulimit({"--stmtprepulimit"}, " --stmtprepulimit \tApply STMTPRE optimization only for" " the first NUM PUs\n" @@ -698,5 +708,10 @@ maplecl::Option lfo({"--lfo"}, maplecl::Option dumpCfgOfPhases({"--dumpcfgofphases"}, " --dumpcfgofphases \tDump CFG from various phases to .dot files\n", {meCategory}); +maplecl::Option epreUseProfile({"--epreuseprofile"}, + " --epreuseprofile \tEnable profile-guided epre phase\n" + " --no-epreuseprofile \tDisable profile-guided epre phase\n", + {meCategory}, + maplecl::DisableWith("--no-epreuseprofile")); } diff --git a/src/mapleall/maple_me/src/me_ssa_epre.cpp b/src/mapleall/maple_me/src/me_ssa_epre.cpp index f2019b28a06c8a362bd206ad0595f44270d99d71..dc285e63b1f25a2d7fba5064ccb0e7e7a4ae3044 100644 --- a/src/mapleall/maple_me/src/me_ssa_epre.cpp +++ b/src/mapleall/maple_me/src/me_ssa_epre.cpp @@ -103,6 +103,19 @@ bool MESSAEPre::PhaseRun(maple::MeFunction &f) { } MeSSAEPre ssaPre(f, *irMap, *dom, *pdom, kh, *ssaPreMemPool, *ApplyTempMemPool(), epreLimitUsed, epreIncludeRef, MeOption::epreLocalRefVar, MeOption::epreLHSIvar); + if (f.GetMirFunc()->GetFuncProfData() && MeOption::epreUseProfile) { + if (MeOption::usePgoRange) { + if (f.GetMirFunc()->GetPuidxOrigin() >= MeOption::pgoRange[0] && + f.GetMirFunc()->GetPuidxOrigin() <= MeOption::pgoRange[1]) { + ssaPre.doMinCut = true; + } + } else { + ssaPre.doMinCut = true; + } + if (ssaPre.doMinCut) { + ssaPre.SetPreUseProfileLimit(MeOption::epreUseProfileLimit); + } + } ssaPre.SetSpillAtCatch(MeOption::spillAtCatch); if (MeOption::strengthReduction && !f.GetMIRModule().IsJavaModule()) { ssaPre.strengthReduction = true; @@ -120,7 +133,11 @@ bool MESSAEPre::PhaseRun(maple::MeFunction &f) { if (DEBUGFUNC_NEWPM(f)) { ssaPre.SetSSAPreDebug(true); } - ssaPre.ApplySSAPRE(); + if (!ssaPre.doMinCut) { + ssaPre.ApplySSAPRE(); + } else { + ssaPre.ApplyMCSSAPRE(); + } if (!ssaPre.GetCandsForSSAUpdate().empty()) { MeSSAUpdate ssaUpdate(f, *f.GetMeSSATab(), *dom, ssaPre.GetCandsForSSAUpdate()); ssaUpdate.Run(); diff --git a/src/mapleall/maple_me/src/me_stmt_pre.cpp b/src/mapleall/maple_me/src/me_stmt_pre.cpp index 7a6d13161c02e2ac3e765e56a9ab8ec3b536ce67..12a704cd5d717d84594dbec5b6aaad85398257d6 100644 --- a/src/mapleall/maple_me/src/me_stmt_pre.cpp +++ b/src/mapleall/maple_me/src/me_stmt_pre.cpp @@ -178,7 +178,7 @@ void MeStmtPre::Finalize1() { auto *phiOpnd = static_cast(occ); MePhiOcc *phiOcc = phiOpnd->GetDefPhiOcc(); if (phiOcc->IsWillBeAvail()) { - if (phiOpnd->IsOkToInsert()) { + if (OKToInsert(phiOpnd)) { // insert the current expression at the end of the block containing phiOpnd if (phiOpnd->GetBB()->GetSucc().size() > 1) { CHECK_FATAL(!workCand->Redo2HandleCritEdges(), "Finalize1: insertion at critical edge; aborting"); diff --git a/src/mapleall/maple_me/src/occur.cpp b/src/mapleall/maple_me/src/occur.cpp index 9a52a85ffe76c6c80bbb0a675945e6ce9c9b0572..c45241d15265a0ccbfd007dc5e852c600de0e1de 100644 --- a/src/mapleall/maple_me/src/occur.cpp +++ b/src/mapleall/maple_me/src/occur.cpp @@ -107,22 +107,6 @@ MeExpr *MeOccur::GetSavedExpr() { } } -// return true if either: -// operand is nullptr (def is null), or -// hasRealUse is false and defined by a PHI not will be avail -bool MePhiOpndOcc::IsOkToInsert() const { - if (GetDef() == nullptr) { - return true; - } - if (!hasRealUse) { - const MeOccur *defOcc = GetDef(); - if (defOcc->GetOccType() == kOccPhiocc && !static_cast(defOcc)->IsWillBeAvail()) { - return true; - } - } - return false; -} - bool MePhiOcc::IsOpndDefByRealOrInserted() const { for (MePhiOpndOcc *phiOpnd : phiOpnds) { MeOccur *defOcc = phiOpnd->GetDef(); @@ -152,6 +136,9 @@ void MeRealOcc::Dump(const IRMap &irMap) const { } else { mod->GetOut() << "RealOcc(LHS) "; } + if (rgExcluded) { + mod->GetOut() << "rgexcluded "; + } if (meExpr != nullptr) { meExpr->Dump(&irMap); } else { diff --git a/src/mapleall/maple_me/src/ssa_pre.cpp b/src/mapleall/maple_me/src/ssa_pre.cpp index a94b544c20f37af49a7738dd30114485ff401731..2b45f73d57d14cf366b7700840432d81db6ac346 100644 --- a/src/mapleall/maple_me/src/ssa_pre.cpp +++ b/src/mapleall/maple_me/src/ssa_pre.cpp @@ -165,7 +165,7 @@ void SSAPre::GenerateSavePhiOcc(MePhiOcc &phiOcc) { void SSAPre::UpdateInsertedPhiOccOpnd() { for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { MePhiOcc *phiOcc = *it; - if (!phiOcc->IsWillBeAvail() || phiOcc->IsRemoved()) { + if (!WillBeAvail(phiOcc) || phiOcc->IsRemoved()) { continue; } if (phiOcc->GetRegPhi()) { @@ -237,7 +237,7 @@ void SSAPre::CodeMotion() { } case kOccPhiopnd: { MePhiOpndOcc *phiopnd = static_cast(occ); - if (phiopnd->GetDefPhiOcc()->IsRemoved() || !phiopnd->GetDefPhiOcc()->IsWillBeAvail()) { + if (phiopnd->GetDefPhiOcc()->IsRemoved() || !WillBeAvail(phiopnd->GetDefPhiOcc())) { break; } if (phiopnd->GetDef()->GetOccType() == kOccInserted) { @@ -285,7 +285,7 @@ void SSAPre::CodeMotion() { } case kOccPhiopnd: { auto *phiOpnd = static_cast(occ); - if (phiOpnd->GetDefPhiOcc()->IsRemoved() || !phiOpnd->GetDefPhiOcc()->IsWillBeAvail()) { + if (phiOpnd->GetDefPhiOcc()->IsRemoved() || !WillBeAvail(phiOpnd->GetDefPhiOcc())) { break; } MeOccur *defOcc = phiOpnd->GetDef(); @@ -313,7 +313,7 @@ void SSAPre::CodeMotion() { } case kOccPhiocc: { auto *phiOcc = static_cast(occ); - if (phiOcc->IsRemoved() || !phiOcc->IsWillBeAvail()) { + if (phiOcc->IsRemoved() || !WillBeAvail(phiOcc)) { break; } GenerateSavePhiOcc(*phiOcc); @@ -331,7 +331,7 @@ void SSAPre::CodeMotion() { } } else { MePhiOcc *phiOcc = static_cast(compOcc->GetDef()); - if (phiOcc->IsRemoved() || !phiOcc->IsWillBeAvail() || !phiOcc->IsDownSafe()) { + if (phiOcc->IsRemoved() || !WillBeAvail(phiOcc) || !phiOcc->IsDownSafe()) { break; } } @@ -366,6 +366,24 @@ void SSAPre::CodeMotion() { } // ================ Step 5: Finalize ================= + +// return true if either: +// operand is nullptr (def is null), or +// hasRealUse is false and defined by a PHI not will be avail +bool SSAPre::OKToInsert(MePhiOpndOcc *phiOpnd) { + if (phiOpnd->GetDef() == nullptr) { + return true; + } + if (!phiOpnd->HasRealUse()) { + MeOccur *defOcc = phiOpnd->GetDef(); + if (defOcc->GetOccType() == kOccPhiocc && + !WillBeAvail(static_cast(defOcc))) { + return true; + } + } + return false; +} + void SSAPre::Finalize1() { std::vector availDefVec(classCount, nullptr); // traversal in preoder DT @@ -374,7 +392,7 @@ void SSAPre::Finalize1() { switch (occ->GetOccType()) { case kOccPhiocc: { auto *phiOcc = static_cast(occ); - if (phiOcc->IsWillBeAvail()) { + if (WillBeAvail(phiOcc)) { availDefVec[classX] = phiOcc; } break; @@ -418,10 +436,10 @@ void SSAPre::Finalize1() { // we assume one phiOpnd has only one phiOcc use because critical edge split the blocks auto *phiOpnd = static_cast(occ); MePhiOcc *phiOcc = phiOpnd->GetDefPhiOcc(); - if (!phiOcc->IsWillBeAvail()) { + if (!WillBeAvail(phiOcc)) { break; } - if (phiOpnd->IsOkToInsert()) { + if (OKToInsert(phiOpnd)) { // insert the current expression at the end of the block containing phiOpnd if (phiOpnd->GetBB()->GetSucc().size() > 1) { CHECK_FATAL(!workCand->Redo2HandleCritEdges(), "Finalize1: insertion at critical edge, aborting"); @@ -466,7 +484,7 @@ void SSAPre::Finalize1() { " after Finalize1===================\n"; for (auto it = phiOccs.begin(); it != phiOccs.end(); ++it) { MePhiOcc *phiOcc = *it; - if (!phiOcc->IsWillBeAvail()) { + if (!WillBeAvail(phiOcc)) { continue; } for (MePhiOpndOcc *phiOpnd : phiOcc->GetPhiOpnds()) { @@ -504,7 +522,7 @@ void SSAPre::SetSave(MeOccur &defX) { CHECK_FATAL(!dom->IsNodeVecEmpty(), "the size to be allocated is 0"); GetIterDomFrontier(fromBb, &itFrontier); for (MePhiOcc *phiOcc : phiOccs) { - if (!phiOcc->IsWillBeAvail()) { + if (!WillBeAvail(phiOcc)) { continue; } if (itFrontier.find(dom->GetDtDfnItem(phiOcc->GetBB()->GetBBId())) == itFrontier.end()) { @@ -551,7 +569,7 @@ void SSAPre::Finalize2() { MePhiOcc *phiOcc = *it; // initialize extraneouse for each MePhiOcc if (!workCand->isSRCand) { - phiOcc->SetIsExtraneous(phiOcc->IsWillBeAvail()); + phiOcc->SetIsExtraneous(WillBeAvail(phiOcc)); } // initialize each operand of phiOcc @@ -571,7 +589,7 @@ void SSAPre::Finalize2() { if (phiOcc->IsRemoved() || !phiOcc->IsExtraneous()) { continue; } - if (!phiOcc->IsWillBeAvail()) { + if (!WillBeAvail(phiOcc)) { phiOcc->SetIsRemoved(true); continue; } @@ -607,13 +625,13 @@ void SSAPre::Finalize2() { mirModule->GetOut() << std::endl; } else if (occ->GetOccType() == kOccReal) { auto *realOcc = static_cast(occ); + realOcc->Dump(*irMap); if (realOcc->IsReload()) { - realOcc->Dump(*irMap); mirModule->GetOut() << " isReload\n"; - } - if (realOcc->IsSave()) { - realOcc->Dump(*irMap); + } else if (realOcc->IsSave()) { mirModule->GetOut() << " isSave\n"; + } else { + mirModule->GetOut() << "\n"; } } else if (occ->GetOccType() == kOccPhiopnd) { auto *phiOpndOcc = static_cast(occ); @@ -832,6 +850,7 @@ void SSAPre::Rename1() { } else { realOcc->SetDef(realTopOccur); } + realOcc->rgExcluded = true; // relevant only to mc-ssapre } else { // assign new class occ->SetClassID(classCount++); @@ -856,24 +875,24 @@ void SSAPre::Rename1() { } } } + MePhiOcc *phiTopOccur = static_cast(topOccur); if (isAllDom) { realOcc->SetClassID(topOccur->GetClassID()); realOcc->SetDef(topOccur); (void)rename2Set.insert(realOcc->GetPosition()); - occStack.push(realOcc); - if (IsLoopHeadBB(topOccur->GetBB()->GetBBId())) { + phiTopOccur->SetIsPartialAnt(true); + if (!doMinCut && IsLoopHeadBB(topOccur->GetBB()->GetBBId())) { static_cast(topOccur)->SetSpeculativeDownSafe(true); static_cast(topOccur)->SetIsDownSafe(true); } } else { - auto *phiTopOccur = static_cast(topOccur); if (!phiTopOccur->SpeculativeDownSafe()) { phiTopOccur->SetIsDownSafe(false); } // assign new class occ->SetClassID(classCount++); - occStack.push(occ); } + occStack.push(occ); } break; } diff --git a/src/mapleall/maple_me/src/ssa_pre_for_hoist.cpp b/src/mapleall/maple_me/src/ssa_pre_for_hoist.cpp index 3a0266558ec75da2a13f4e133f47b86120d7e560..8d344bdeb8accc4033093025d7177252aa7d4c61 100644 --- a/src/mapleall/maple_me/src/ssa_pre_for_hoist.cpp +++ b/src/mapleall/maple_me/src/ssa_pre_for_hoist.cpp @@ -349,7 +349,7 @@ void ExprHoist::HoistExpr(const MapleVector &allOccs, int32 candId) { } auto *phiOpndocc = static_cast(occ); auto *phiOcc = phiOpndocc->GetDefPhiOcc(); - if (phiOcc->IsWillBeAvail() && phiOpndocc->IsOkToInsert()) { + if (phiOcc->IsWillBeAvail() && parent->OKToInsert(phiOpndocc)) { if (hs->cdHS && // need a cd to hoist hs->occ == nullptr && // if not null, hs has been inserted hs->cdHS->occ != nullptr && // make sure there's at least one realocc at cd