diff --git a/src/mapleall/maple_ir/include/mir_nodes.h b/src/mapleall/maple_ir/include/mir_nodes.h index 73fdd68001a25730a05ee9a81f30652ab6337454..a342a58b889d57f774baeb0fc24e7459b8d13428 100644 --- a/src/mapleall/maple_ir/include/mir_nodes.h +++ b/src/mapleall/maple_ir/include/mir_nodes.h @@ -2583,6 +2583,10 @@ class DoloopNode : public StmtNode { doVarStIdx = idx; } + PregIdx GetDoVarPregIdx() const { + return (PregIdx) doVarStIdx.FullIdx(); + } + const StIdx &GetDoVarStIdx() const { return doVarStIdx; } diff --git a/src/mapleall/maple_me/BUILD.gn b/src/mapleall/maple_me/BUILD.gn index 83fa4a110c2012c99f00c0ef1b387c1719bf06d0..1a9129353557af39d6b725dc6a5ec487ee75fb19 100755 --- a/src/mapleall/maple_me/BUILD.gn +++ b/src/mapleall/maple_me/BUILD.gn @@ -107,7 +107,8 @@ src_libmplme = [ "src/simplifyCFG.cpp", "src/seqvec.cpp", "src/me_autovec.cpp", - "src/me_safety_warning.cpp" + "src/me_safety_warning.cpp", + "src/lfo_unroll.cpp", ] src_libmplmewpo = [ diff --git a/src/mapleall/maple_me/include/lfo_dep_test.h b/src/mapleall/maple_me/include/lfo_dep_test.h index 245388fa6fef0c1cb4171009916e58ebc5142ce4..8c56132308b5445e5fdb8f7385a2c7796a482699 100644 --- a/src/mapleall/maple_me/include/lfo_dep_test.h +++ b/src/mapleall/maple_me/include/lfo_dep_test.h @@ -77,6 +77,7 @@ class DoloopInfo { bool hasPtrAccess = false; // give up dep testing if true bool hasScalarAssign = false; // give up dep testing if true bool hasMayDef = false; // give up dep testing if true + bool hasBeenVectorized = false; // set by loopvec phase MapleVector outputDepTestList; // output dependence only MapleVector flowDepTestList; // include both true and anti dependences MapleSet redVars; // reduction variables diff --git a/src/mapleall/maple_me/include/lfo_unroll.h b/src/mapleall/maple_me/include/lfo_unroll.h new file mode 100644 index 0000000000000000000000000000000000000000..01ccef9654ef595a54bfc3f190ea314e55f715ba --- /dev/null +++ b/src/mapleall/maple_me/include/lfo_unroll.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) [2021] Huawei Technologies Co., Ltd. All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan Permissive Software License v2. + * You can use this software according to the terms and conditions of the MulanPSL - 2.0. + * You may obtain a copy of MulanPSL - 2.0 at: + * + * https://opensource.org/licenses/MulanPSL-2.0 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the MulanPSL - 2.0 for more details. + */ + +#ifndef MAPLE_ME_INCLUDE_LFO_UNROLL_H +#define MAPLE_ME_INCLUDE_LFO_UNROLL_H + +#include "lfo_function.h" +#include "lfo_pre_emit.h" +#include "lfo_dep_test.h" +#include "orig_symbol.h" +#include "me_ir.h" + +namespace maple { + +class LfoUnrollOneLoop { + public: + LfoFunction *lfoFunc; + LfoPreEmitter *preEmit; + DoloopInfo *doloopInfo; + DoloopNode *doloop; + MIRModule *mirModule; + MemPool *codeMP; // to generate new code + MIRBuilder *mirBuilder; + int64 stepAmount = 0; + PrimType ivPrimType = PTY_unknown; + + LfoUnrollOneLoop(LfoFunction *f, LfoPreEmitter *preEm, DoloopInfo *doinfo) : + lfoFunc(f), preEmit(preEm), doloopInfo(doinfo), doloop(doinfo->doloop), + mirModule(&f->meFunc->GetMIRModule()), codeMP(preEm->GetCodeMP()), + mirBuilder(mirModule->GetMIRBuilder()) {} + ~LfoUnrollOneLoop() = default; + BaseNode *CloneIVNode(); + bool IsIVNode(BaseNode *x); + void ReplaceIV(BaseNode *x, BaseNode *repNode); + void DoFullUnroll(size_t tripCount); + void DoUnroll(size_t times); + void Process(); +}; + +MAPLE_FUNC_PHASE_DECLARE(MELfoUnroll, MeFunction) + +} // namespace maple +#endif // MAPLE_ME_INCLUDE_LFO_UNROLL_H diff --git a/src/mapleall/maple_me/include/me_phase_manager.h b/src/mapleall/maple_me/include/me_phase_manager.h index 98c3a6662b7d63d758978609f1cd8eb07e2862e7..7433ff1b23a55316cfc6a8c3e695e72ae56edd38 100644 --- a/src/mapleall/maple_me/include/me_phase_manager.h +++ b/src/mapleall/maple_me/include/me_phase_manager.h @@ -82,6 +82,7 @@ #include "cfg_opt.h" #include "lfo_dep_test.h" #include "me_autovec.h" +#include "lfo_unroll.h" #include "me_safety_warning.h" #include "me_sink.h" diff --git a/src/mapleall/maple_me/src/lfo_loop_vec.cpp b/src/mapleall/maple_me/src/lfo_loop_vec.cpp index 8c0dd5ab5b94f640f365e05bdd9173c28e89fdfc..da77b699b885c98aef64b2b6e01db1b7c87d3df6 100644 --- a/src/mapleall/maple_me/src/lfo_loop_vec.cpp +++ b/src/mapleall/maple_me/src/lfo_loop_vec.cpp @@ -1556,6 +1556,7 @@ void LoopVectorization::Perform() { bool vectorizable = Vectorizable(mapit->second, vecInfo, mapit->first->GetDoBody()); if (vectorizable) { LoopVectorization::vectorizedLoop++; + mapit->second->hasBeenVectorized = true; } if (enableDebug) { LogInfo::MapleLogger() << "\nInnermost Doloop:"; diff --git a/src/mapleall/maple_me/src/lfo_unroll.cpp b/src/mapleall/maple_me/src/lfo_unroll.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0245f994df9feae6e155a3e57973e45efb066447 --- /dev/null +++ b/src/mapleall/maple_me/src/lfo_unroll.cpp @@ -0,0 +1,210 @@ +/* + * Copyright (c) [2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "lfo_unroll.h" +#include "me_loop_analysis.h" + +namespace maple { + +constexpr size_t unrolledSizeLimit = 12; // unrolled loop body size to be < this value + +BaseNode *LfoUnrollOneLoop::CloneIVNode() { + if (doloop->IsPreg()) { + return mirBuilder->CreateExprRegread(ivPrimType, doloop->GetDoVarPregIdx()); + } else { + return codeMP->New(OP_dread, ivPrimType, doloop->GetDoVarStIdx(), 0); + } +} + +bool LfoUnrollOneLoop::IsIVNode(BaseNode *x) { + if (doloop->IsPreg()) { + if (x->GetOpCode() != OP_regread) { + return false; + } + return static_cast(x)->GetRegIdx() == doloop->GetDoVarPregIdx(); + } else { + if (x->GetOpCode() != OP_dread) { + return false; + } + return static_cast(x)->GetStIdx() == doloop->GetDoVarStIdx(); + } +} + +// replace any occurrence of the IV in x with a copy of repNode +void LfoUnrollOneLoop::ReplaceIV(BaseNode *x, BaseNode *repNode) { + if (x->GetOpCode() == OP_block) { + BlockNode *blk = static_cast(x); + StmtNode *stmt = blk->GetFirst(); + while (stmt != nullptr) { + ReplaceIV(stmt, repNode); + stmt = stmt->GetNext(); + } + return; + } + for (size_t i = 0; i < x->NumOpnds(); i++) { + if (IsIVNode(x->Opnd(i))) { + x->SetOpnd(repNode, i); + } else { + ReplaceIV(x->Opnd(i), repNode); + } + } +} + +void LfoUnrollOneLoop::DoFullUnroll(size_t tripCount) { + BlockNode *unrolledBlk = doloop->GetDoBody()->CloneTreeWithSrcPosition(*mirModule); + ReplaceIV(unrolledBlk, doloop->GetStartExpr()); + BlockNode *nextIterBlk = nullptr; + tripCount--; + uint32 i = 0; + while (tripCount > 0) { + i++; + nextIterBlk = doloop->GetDoBody()->CloneTreeWithSrcPosition(*mirModule); + BaseNode *adjExpr = mirBuilder->CreateIntConst(stepAmount * i, ivPrimType); + BaseNode *repExpr = codeMP->New(OP_add, ivPrimType, doloop->GetStartExpr(), adjExpr); + ReplaceIV(nextIterBlk, repExpr); + unrolledBlk->InsertBlockAfter(*nextIterBlk, unrolledBlk->GetLast()); + tripCount--; + } + + // replace doloop by the statements in unrolledBlk + LfoPart *lfopart = (*preEmit->GetLfoStmtMap())[doloop->GetStmtID()]; + BaseNode *parent = lfopart->GetParent(); + ASSERT(parent && (parent->GetOpCode() == OP_block), "LfoUnroll: parent of doloop is not OP_block"); + BlockNode *pblock = static_cast(parent); + pblock->ReplaceStmtWithBlock(*doloop, *unrolledBlk); +} + +void LfoUnrollOneLoop::DoUnroll(size_t times) { +} + +static size_t CountBlockStmts(BlockNode *blk) { + if (blk == nullptr) { + return 0; + } + size_t stmtCount = 0; + StmtNode *stmt = blk->GetFirst(); + while (stmt != nullptr) { + if (stmt->GetOpCode() == OP_block) { + stmtCount += CountBlockStmts(static_cast(stmt)); + } else if (stmt->GetOpCode() != OP_label && stmt->GetOpCode() != OP_comment) { + stmtCount++; + if (stmt->GetOpCode() == OP_if) { + stmtCount += 3 + CountBlockStmts(static_cast(stmt)->GetThenPart()); + stmtCount += 3 + CountBlockStmts(static_cast(stmt)->GetElsePart()); + } + } + ASSERT(stmt->GetOpCode() != OP_switch && stmt->GetOpCode() != OP_while && + stmt->GetOpCode() != OP_dowhile && stmt->GetOpCode() != OP_doloop, + "CountBlockStmts: unexpected statement type"); + stmt = stmt->GetNext(); + } + return stmtCount; +} + +void LfoUnrollOneLoop::Process() { + if (doloopInfo->hasOtherCtrlFlow || doloopInfo->hasBeenVectorized) { + return; + } + if (!doloop->GetIncrExpr()->IsConstval()) { + return; + } + ivPrimType = doloop->GetStartExpr()->GetPrimType(); + ConstvalNode *cvalnode = static_cast(doloop->GetIncrExpr()); + if (!cvalnode->GetConstVal()->IsOne()) { + return; + } + size_t stmtCount = CountBlockStmts(doloop->GetDoBody()); + if (stmtCount == 0 || stmtCount > 16) { + return; + } + + // screen doloop condExpr + BaseNode *condExpr = doloop->GetCondExpr(); + if (!kOpcodeInfo.IsCompare(condExpr->GetOpCode())) { + return; + } + if (condExpr->GetOpCode() == OP_eq) { + return; + } + if (!IsIVNode(condExpr->Opnd(0))) { + return; + } + BaseNode *endExpr = condExpr->Opnd(1); + + // screen doloop incrExpr + if (!doloop->GetIncrExpr()->IsConstval()) { + return; + } + ConstvalNode *stepNode = static_cast(doloop->GetIncrExpr()); + MIRIntConst *stepConst = static_cast(stepNode->GetConstVal()); + stepAmount = stepConst->GetValue(); + + size_t tripCount = 0; // 0 if not constant trip count + if (doloop->GetStartExpr()->IsConstval() && endExpr->IsConstval()) { + ConstvalNode *startNode = static_cast(doloop->GetStartExpr()); + MIRIntConst *startConst = static_cast(startNode->GetConstVal()); + ConstvalNode *endNode = static_cast(endExpr); + MIRIntConst *endConst = static_cast(endNode->GetConstVal()); + tripCount = (endConst->GetValue() - startConst->GetValue()) / stepAmount; + if (condExpr->GetOpCode() == OP_ge || condExpr->GetOpCode() == OP_le) { + tripCount++; + } + } + if (tripCount == 0) { + return; // NYI handling of variable trip count + } + size_t unrollTimes = 1; + size_t unrolledStmtCount = stmtCount; + while (unrolledStmtCount < unrolledSizeLimit) { + unrollTimes++; + unrolledStmtCount += stmtCount; + } + bool fullUnroll = tripCount < (unrollTimes * 2); + if (fullUnroll) { + DoFullUnroll(tripCount); + } else { + if (unrollTimes == 1) { + return; + } + DoUnroll(unrollTimes); + } +}; + +bool MELfoUnroll::PhaseRun(MeFunction &f) { + LfoPreEmitter *preEmit = GET_ANALYSIS(MELfoPreEmission, f); + ASSERT(preEmit != nullptr, "lfo preemit phase has problem"); + LfoDepInfo *lfoDepInfo = GET_ANALYSIS(MELfoDepTest, f); + ASSERT(lfoDepInfo != nullptr, "lfo dep test phase has problem"); + LfoFunction *lfoFunc = f.GetLfoFunc(); + + MapleMap::iterator mapit = lfoDepInfo->doloopInfoMap.begin(); + for (; mapit != lfoDepInfo->doloopInfoMap.end(); mapit++) { + if (!mapit->second->children.empty() || mapit->second->hasBeenVectorized) { + continue; + } + LfoUnrollOneLoop unroll(lfoFunc, preEmit, mapit->second); + unroll.Process(); + } + + return false; +} + +void MELfoUnroll::GetAnalysisDependence(maple::AnalysisDep &aDep) const { + aDep.AddRequired(); + aDep.AddRequired(); + aDep.PreservedAllExcept(); + aDep.PreservedAllExcept(); + aDep.PreservedAllExcept(); +} +} // namespace maple diff --git a/src/mapleall/maple_me/src/me_phase_manager.cpp b/src/mapleall/maple_me/src/me_phase_manager.cpp index fb5b32e76ea30322b02247508ef729be8ba74518..105681200f76df7d56f93fc0bad3605a88f8c2b5 100644 --- a/src/mapleall/maple_me/src/me_phase_manager.cpp +++ b/src/mapleall/maple_me/src/me_phase_manager.cpp @@ -173,6 +173,7 @@ MAPLE_TRANSFORM_PHASE_REGISTER_CANSKIP(MEPregRename, pregrename) MAPLE_TRANSFORM_PHASE_REGISTER_CANSKIP(MEStmtPre, stmtpre) MAPLE_TRANSFORM_PHASE_REGISTER_CANSKIP(MECfgOpt, cfgopt) MAPLE_TRANSFORM_PHASE_REGISTER_CANSKIP(MEAutoVectorization, autovec) +MAPLE_TRANSFORM_PHASE_REGISTER_CANSKIP(MELfoUnroll, lfounroll) MAPLE_TRANSFORM_PHASE_REGISTER_CANSKIP(MECFGOPT, cfgOpt) MAPLE_TRANSFORM_PHASE_REGISTER_CANSKIP(MELoopUnrolling, loopunrolling) MAPLE_TRANSFORM_PHASE_REGISTER_CANSKIP(MEHdse, hdse) diff --git a/src/mapleall/maple_phase/include/phases.def b/src/mapleall/maple_phase/include/phases.def index 2f285c1b5c57a2c8b09975687f044fcfef7e78f4..cbb32aaaf1791a15d98829f82434de566baee7e2 100644 --- a/src/mapleall/maple_phase/include/phases.def +++ b/src/mapleall/maple_phase/include/phases.def @@ -39,6 +39,7 @@ ADDMAPLEMEPHASE("hdse", CLANG && MeOption::optLevel >= 3) ADDMAPLEMEPHASE("lfopreemit", CLANG && MeOption::optLevel >= 3) ADDMAPLEMEPHASE("deptest", CLANG && MeOption::optLevel >= 3) ADDMAPLEMEPHASE("autovec", CLANG && MeOption::optLevel >= 3) +ADDMAPLEMEPHASE("lfounroll", CLANG && MeOption::optLevel >= 3) ADDMAPLEMEPHASE("mecfgbuild", MeOption::optLevel >= 2 || JAVALANG) ADDMAPLEMEPHASE("bypatheh", JAVALANG && MeOption::optLevel >= 2) ADDMAPLEMEPHASE("loopcanon", MeOption::optLevel >= 2)