diff --git a/src/mapleall/maple_driver/defs/phases.def b/src/mapleall/maple_driver/defs/phases.def index 32ce5fadb4dda2a5102ae7ab15e09304efc5914f..227f99f7165d4872c8b33be652c5531af030b7c9 100644 --- a/src/mapleall/maple_driver/defs/phases.def +++ b/src/mapleall/maple_driver/defs/phases.def @@ -37,6 +37,8 @@ ADD_PHASE("ivcanon", CLANG && MeOption::optLevel >= 3) ADD_PHASE("hprop", CLANG && MeOption::optLevel >= 3) ADD_PHASE("hdse", CLANG && MeOption::optLevel >= 3) ADD_PHASE("lfopreemit", CLANG && MeOption::optLevel >= 3) +ADD_PHASE("deptest", CLANG && MeOption::optLevel >= 3) +ADD_PHASE("lfoloopvec", CLANG && MeOption::optLevel >= 3) ADD_PHASE("mecfgbuild", MeOption::optLevel >= 2 || JAVALANG) ADD_PHASE("cfgOpt", CLANG && MeOption::optLevel >= 2) ADD_PHASE("bypatheh", JAVALANG && MeOption::optLevel >= 2) diff --git a/src/mapleall/maple_ir/include/global_tables.h b/src/mapleall/maple_ir/include/global_tables.h index ad419cd59cd4d59b911bf8ad9b72d39f81ca3bf2..a41d388b74001be1f058fb1f53ce6a57dc0f73cf 100644 --- a/src/mapleall/maple_ir/include/global_tables.h +++ b/src/mapleall/maple_ir/include/global_tables.h @@ -315,6 +315,16 @@ class TypeTable { ASSERT(PTY_unknown < typeTable.size(), "array index out of range"); return typeTable.at(PTY_unknown); } + // vector type + MIRType *GetV4Int32() const { + ASSERT(PTY_v4i32 < typeTable.size(), "array index out of range"); + return typeTable.at(PTY_v4i32); + } + + MIRType *GetV2Int32() const { + ASSERT(PTY_v2i32 < typeTable.size(), "array index out of range"); + return typeTable.at(PTY_v2i32); + } // Get or Create derived types. MIRType *GetOrCreatePointerType(const TyIdx &pointedTyIdx, PrimType primType = PTY_ptr, diff --git a/src/mapleall/maple_me/BUILD.gn b/src/mapleall/maple_me/BUILD.gn index 16ab6c8ddfca680ff5b541173019ff5d3ce53012..b8cc176356af2f6f9270fb1cf590bdebecd3a400 100755 --- a/src/mapleall/maple_me/BUILD.gn +++ b/src/mapleall/maple_me/BUILD.gn @@ -96,6 +96,8 @@ src_libmplme = [ "src/lfo_inject_iv.cpp", "src/lfo_pre_emit.cpp", "src/lfo_iv_canon.cpp", + "src/lfo_dep_test.cpp", + "src/lfo_loop_vec.cpp", "src/me_value_range_prop.cpp", "src/cfg_opt.cpp", ] diff --git a/src/mapleall/maple_me/include/lfo_dep_test.h b/src/mapleall/maple_me/include/lfo_dep_test.h new file mode 100644 index 0000000000000000000000000000000000000000..9eec1ed00a377e0a66f26a08c4b52968c91e1370 --- /dev/null +++ b/src/mapleall/maple_me/include/lfo_dep_test.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) [2021] Huawei Technologies Co., Ltd. All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan Permissive Software License v2. + * You can use this software according to the terms and conditions of the MulanPSL - 2.0. + * You may obtain a copy of MulanPSL - 2.0 at: + * + * https://opensource.org/licenses/MulanPSL-2.0 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the MulanPSL - 2.0 for more details. + */ + +#ifndef MAPLE_ME_INCLUDE_LFO_DEP_TETS_H +#define MAPLE_ME_INCLUDE_LFO_DEP_TETS_H + +#include "lfo_function.h" +#include "lfo_pre_emit.h" +#include "me_phase.h" + +namespace maple { + +class LfoDepInfo; + +class SubscriptDesc{ + public: + DreadNode *iv = nullptr; // the variable + int64 coeff = 1; // coefficient of the variable + int64 additiveConst = 0; + bool tooMessy = false;; // too complicated to analyze + + public: + SubscriptDesc() {} +}; + +class ArrayAccessDesc { + public: + ArrayNode *theArray; + MapleVector subscriptVec; // describe the subscript of each array dimension + + public: + ArrayAccessDesc(MapleAllocator *alloc, ArrayNode *arr) : theArray(arr), subscriptVec(alloc->Adapter()){} +}; + +class DoloopInfo { + public: + MapleAllocator *alloc; + LfoDepInfo *depInfo; + DoloopNode *doloop; + DoloopInfo *parent; + MapleVector children; // for the nested doloops in program order + MapleVector lhsArrays; // each element represents an array assign + MapleVector rhsArrays; // each element represents an array read + bool hasPtrAccess = false; // give up dep testing if true + bool hasCall = false; // give up dep testing if true + + public: + DoloopInfo(MapleAllocator *allc, LfoDepInfo *depinfo, DoloopNode *doloop, DoloopInfo *prnt) : + alloc(allc), + depInfo(depinfo), + doloop(doloop), + parent(prnt), + children(alloc->Adapter()), + lhsArrays(alloc->Adapter()), + rhsArrays(alloc->Adapter()) {} + ~DoloopInfo() = default; + SubscriptDesc *BuildOneSubscriptDesc(BaseNode *subsX); + void BuildOneArrayAccessDesc(ArrayNode *arr, bool isRHS); + void CreateRHSArrayAccessDesc(BaseNode *x); + void CreateArrayAccessDesc(BlockNode *block); +}; + +class LfoDepInfo : public AnalysisResult { + public: + MapleAllocator alloc; + LfoFunction *lfoFunc; + LfoPreEmitter *preEmit; + MapleVector outermostDoloopInfoVec; // outermost doloops' DoloopInfo in program order + MapleMap doloopInfoMap; + + public: + LfoDepInfo(MemPool *mempool, LfoFunction *f, LfoPreEmitter *preemit) : AnalysisResult(mempool), alloc(mempool), lfoFunc(f), preEmit(preemit), + outermostDoloopInfoVec(alloc.Adapter()), + doloopInfoMap(alloc.Adapter()) {} + ~LfoDepInfo() = default; + void CreateDoloopInfo(BlockNode *block, DoloopInfo *parent); + void CreateArrayAccessDesc(MapleMap *doloopInfoVec); + std::string PhaseName() const { return "deptest"; } +}; + +class DoLfoDepTest : public MeFuncPhase { + public: + explicit DoLfoDepTest(MePhaseID id) : MeFuncPhase(id) {} + ~DoLfoDepTest() {} + AnalysisResult *Run(MeFunction *func, MeFuncResultMgr *m, ModuleResultMgr *moduleResMgr) override; + std::string PhaseName() const override { return "deptest"; } +}; +} // namespace maple +#endif // MAPLE_ME_INCLUDE_LFO_DEP_TEST_H diff --git a/src/mapleall/maple_me/include/lfo_loop_vec.h b/src/mapleall/maple_me/include/lfo_loop_vec.h new file mode 100644 index 0000000000000000000000000000000000000000..28b7984e2a97fefbf7603c9cb57dae68e9a5a96e --- /dev/null +++ b/src/mapleall/maple_me/include/lfo_loop_vec.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLE_ME_INCLUDE_LOOP_VEC_H +#define MAPLE_ME_INCLUDE_LOOP_VEC_H +#include "me_function.h" +#include "me_irmap.h" +#include "me_phase.h" +#include "me_ir.h" +#include "lfo_pre_emit.h" + +namespace maple { + +// tranform plan for current loop +class LoopTransPlan { +public: + LoopTransPlan() : vlowNode(nullptr), vupperNode(nullptr), vincrNode(nullptr) { + vecFactor = 1; + needEpilog = false; + } + ~LoopTransPlan() = default; + BaseNode *vlowNode; // low bound of vectorized loop body + BaseNode *vupperNode; // upper bound of vectorized loop body + BaseNode *vincrNode; // stride of vectorized loop body + // list of vectorizable stmtnodes in current loop, others can't be vectorized + uint8_t vecLanes; // number of lanes of vector type in current loop + uint8_t vecFactor; // number of loop iterations combined to one vectorized loop iteration + bool needEpilog; // replaced with detailed +}; + +class LoopVectorization { + public: + LoopVectorization(MemPool *mp, LfoPreEmitter *lfoEmit); + ~LoopVectorization() = default; + + void Perform(); + void TransformLoop(); + void VectorizeDoLoop(DoloopNode *, LoopTransPlan*); + void VectorizeNode(BaseNode *, uint8_t); + MIRType *GenVecType(PrimType, uint8_t); + bool CanVectorizeStmt(StmtNode *stmt); +// bool canVectorize(DoloopNode *doloop, LoopTransPlan *); + void widenDoloop(DoloopNode *doloop, LoopTransPlan *); + DoloopNode *PrepareDoloop(DoloopNode *, LoopTransPlan *); + DoloopNode *GenEpilog(DoloopNode *); + MemPool *GetLocalMp() { return localMP; } + + private: + MIRFunction *mirFunc; + MapleMap *lfoStmtParts; // point to lfoStmtParts of lfopreemit, map lfoinfo for StmtNode, key is stmtID + MapleMap *lfoExprParts; // point to lfoexprparts of lfopreemit, map lfoinfo for exprNode, key is mirnode + MapleVector *lfoDoloops; + MemPool *codeMP; // point to mirfunction codeMp + MemPool *localMP; // local mempool +}; + +class DoLfoLoopVectorization: public MeFuncPhase { + public: + static bool enableDebug; + static bool enableDump; + explicit DoLfoLoopVectorization(MePhaseID id) : MeFuncPhase(id) {} + ~DoLfoLoopVectorization() = default; + + AnalysisResult *Run(MeFunction *func, MeFuncResultMgr *m, ModuleResultMgr*) override; + std::string PhaseName() const override { + return "lfoloopvec"; + } + + private: +}; +} // namespace maple +#endif // MAPLE_ME_INCLUDE_LOOP_VEC_H diff --git a/src/mapleall/maple_me/include/lfo_pre_emit.h b/src/mapleall/maple_me/include/lfo_pre_emit.h index 0ab6ff76d081cdccb20b1e8fe93609d8fe606f58..6eaccf339269085fe5657992a00e8f4b0dd7a1d0 100644 --- a/src/mapleall/maple_me/include/lfo_pre_emit.h +++ b/src/mapleall/maple_me/include/lfo_pre_emit.h @@ -15,6 +15,7 @@ #ifndef MAPLE_ME_INCLUDE_LFO_PRE_EMIT_H #define MAPLE_ME_INCLUDE_LFO_PRE_EMIT_H +#include "mir_nodes.h" #include "me_irmap.h" #include "me_phase.h" @@ -28,8 +29,9 @@ class LfoPreEmitter : public AnalysisResult { MapleAllocator *codeMPAlloc; MemPool *lfoMP; MapleAllocator lfoMPAlloc; - MapleMap lfoStmtParts; // map lfoinfo for StmtNode, key is stmtID + MapleMap lfoStmtParts; // map lfoinfo for StmtNode, key is stmtID MapleMap lfoExprParts; // map lfoinfor for exprNode, key is mirnode + MapleVector lfoDoloops; // store doloop node MeCFG *cfg; public: @@ -43,6 +45,7 @@ class LfoPreEmitter : public AnalysisResult { lfoMPAlloc(lfoMP), lfoStmtParts(lfoMPAlloc.Adapter()), lfoExprParts(lfoMPAlloc.Adapter()), + lfoDoloops(lfoMPAlloc.Adapter()), cfg(f->meFunc->GetCfg()) {} private: @@ -93,6 +96,11 @@ class LfoPreEmitter : public AnalysisResult { LfoPart *lfopart = lfoStmtParts[stmtID]; return lfopart->mestmt; } + MIRFunction *GetMirFunction() { return mirFunc; } + MemPool *GetCodeMP() { return codeMP; } + MapleMap *GetLfoStmtMap() { return &lfoStmtParts; } + MapleMap *GetLfoExprMap() { return &lfoExprParts; } + MapleVector *GetLfoDoLoops() { return &lfoDoloops; } }; /* emit ir to specified file */ diff --git a/src/mapleall/maple_me/include/me_phases.def b/src/mapleall/maple_me/include/me_phases.def index 69132caa316a54984cb15c59c8e4cbd9e3fa3905..09a05c424f605c0ef81bef1c1b725e09f34029b3 100644 --- a/src/mapleall/maple_me/include/me_phases.def +++ b/src/mapleall/maple_me/include/me_phases.def @@ -55,7 +55,9 @@ FUNCAPHASE(MeFuncPhase_MECFG, MeDoMeCfg) FUNCTPHASE(MeFuncPhase_LFOINJECTIV, DoLfoInjectIV) FUNCAPHASE(MeFuncPhase_LFOPREEMIT, DoLfoPreEmission) FUNCTPHASE(MeFuncPhase_LFOIVCANON, DoLfoIVCanon) +FUNCTPHASE(MeFuncPhase_LFOLOOPVEC, DoLfoLoopVectorization) FUNCAPHASE(MeFuncPhase_MECFGOPT, DoCfgOpt) +FUNCAPHASE(MeFuncPhase_LFODEPTEST, DoLfoDepTest) #if MIR_JAVA FUNCTPHASE(MeFuncPhase_SYNCSELECT, MeDoSyncSelect) #endif diff --git a/src/mapleall/maple_me/src/copy_prop.cpp b/src/mapleall/maple_me/src/copy_prop.cpp index 176f5ab4984eead41723ab2c472a901395825701..49192243fb2f3706e127a512fc3125977bf58257 100644 --- a/src/mapleall/maple_me/src/copy_prop.cpp +++ b/src/mapleall/maple_me/src/copy_prop.cpp @@ -292,7 +292,7 @@ AnalysisResult *MeDoCopyProp::Run(MeFunction *func, MeFuncResultMgr *m, ModuleRe CopyProp copyProp(func, *hMap, *dom, *NewMemPool(), func->GetCfg()->NumBBs(), Prop::PropConfig { MeOption::propBase, true, MeOption::propGlobalRef, MeOption::propFinaliLoadRef, - MeOption::propIloadRefNonParm, MeOption::propAtPhi, MeOption::propWithInverse || MeOption::optLevel >= 3 }); + MeOption::propIloadRefNonParm, MeOption::propAtPhi, MeOption::propWithInverse || func->IsLfo() }); copyProp.TraversalBB(*func->GetCfg()->GetCommonEntryBB()); if (DEBUGFUNC(func)) { LogInfo::MapleLogger() << "\n============== After Copy Propagation =============" << '\n'; @@ -301,4 +301,4 @@ AnalysisResult *MeDoCopyProp::Run(MeFunction *func, MeFuncResultMgr *m, ModuleRe } return nullptr; } -} // namespace maple \ No newline at end of file +} // namespace maple diff --git a/src/mapleall/maple_me/src/lfo_dep_test.cpp b/src/mapleall/maple_me/src/lfo_dep_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6d55246a46101eed3d0f70981ad9c5eb8bbc39b0 --- /dev/null +++ b/src/mapleall/maple_me/src/lfo_dep_test.cpp @@ -0,0 +1,279 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "me_function.h" +#include "lfo_dep_test.h" + +namespace maple { + +void LfoDepInfo::CreateDoloopInfo(BlockNode *block, DoloopInfo *parent) { + StmtNode *stmt = block->GetFirst(); + while (stmt) { + switch (stmt->GetOpCode()) { + case OP_doloop: { + DoloopNode *doloop = static_cast(stmt); + DoloopInfo *doloopInfo = memPool->New(&alloc, this, doloop, parent); + doloopInfoMap.insert(std::pair(doloop, doloopInfo)); + if (parent) { + parent->children.push_back(doloopInfo); + } else { + outermostDoloopInfoVec.push_back(doloopInfo); + } + CreateDoloopInfo(doloop->GetDoBody(), doloopInfo); + break; + } + case OP_block: { + CreateDoloopInfo(static_cast(stmt), parent); + break; + } + case OP_if: { + IfStmtNode *ifstmtnode = static_cast(stmt); + if (ifstmtnode->GetThenPart()) + CreateDoloopInfo(ifstmtnode->GetThenPart(), parent); + if (ifstmtnode->GetElsePart()) + CreateDoloopInfo(ifstmtnode->GetElsePart(), parent); + break; + } + case OP_dowhile: + case OP_while: { + CreateDoloopInfo(static_cast(stmt)->GetBody(), parent); + break; + } + default: + break; + } + stmt = stmt->GetNext(); + } +} + +SubscriptDesc *DoloopInfo::BuildOneSubscriptDesc(BaseNode *subsX) { + SubscriptDesc *subsDesc = alloc->GetMemPool()->New(); + Opcode op = subsX->GetOpCode(); + BaseNode *mainTerm = nullptr; + if (op != OP_add && op != OP_sub) { + mainTerm = subsX; + } else { // get addtiveConst + BinaryNode *binnode = static_cast(subsX); + BaseNode *opnd0 = binnode->Opnd(0); + BaseNode *opnd1 = binnode->Opnd(1); + if (opnd1->op != OP_constval) { + subsDesc->tooMessy = true; + return subsDesc; + } + MIRConst *mirconst = static_cast(opnd1)->GetConstVal(); + if (mirconst->GetKind() != kConstInt) { + subsDesc->tooMessy = true; + return subsDesc; + } + subsDesc->additiveConst = static_cast(mirconst)->GetValue(); + if (op == OP_sub) + subsDesc->additiveConst = - subsDesc->additiveConst; + mainTerm = opnd0; + } + // process mainTerm + BaseNode *varNode = nullptr; + if (op != OP_mul) { + varNode = mainTerm; + } else { + BinaryNode *mulbinnode = static_cast(mainTerm); + BaseNode *mulopnd0 = mulbinnode->Opnd(0); + BaseNode *mulopnd1 = mulbinnode->Opnd(1); + if (mulopnd0->GetOpCode() != OP_dread) { + subsDesc->tooMessy = true; + return subsDesc; + } + varNode = mulopnd0; + if (mulopnd1->GetOpCode() != OP_constval) { + subsDesc->tooMessy = true; + return subsDesc; + } + MIRConst *mirconst = static_cast(mulopnd1)->GetConstVal(); + if (mirconst->GetKind() != kConstInt) { + subsDesc->tooMessy = true; + return subsDesc; + } + subsDesc->coeff = static_cast(mirconst)->GetValue(); + } + // process varNode + if (varNode->GetOpCode() == OP_dread) { + DreadNode *dnode = static_cast(varNode); + if (dnode->GetStIdx() == doloop->GetDoVarStIdx()) { + subsDesc->iv = dnode; + return subsDesc; + } + } + subsDesc->tooMessy = true; + return subsDesc; +} + +void DoloopInfo::BuildOneArrayAccessDesc(ArrayNode *arr, bool isRHS) { +#if 0 + MIRType *atype = arr->GetArrayType(GlobalTables::GetTypeTable()); + ASSERT(atype->GetKind() == kTypeArray, "type was wrong"); + MIRArrayType *arryty = static_cast(atype); + size_t dim = arryty->GetDim(); + CHECK_FATAL(dim == arr->NumOpnds() - 1, "BuildOneArrayAccessDesc: inconsistent array dimension"); +#else + size_t dim = arr->NumOpnds() - 1; +#endif + ArrayAccessDesc *arrDesc = alloc->GetMemPool()->New(alloc, arr); + if (isRHS) { + rhsArrays.push_back(arrDesc); + } else { + lhsArrays.push_back(arrDesc); + } + for (size_t i = 0; i < dim; i++) { + SubscriptDesc *subs = BuildOneSubscriptDesc(arr->GetIndex(i)); + arrDesc->subscriptVec.push_back(subs); + } +} + +void DoloopInfo::CreateRHSArrayAccessDesc(BaseNode *x) { + if (x->GetOpCode() == OP_array) { + BuildOneArrayAccessDesc(static_cast(x), true/*isRHS*/); + } + for (size_t i = 0; i < x->NumOpnds(); i++) { + CreateRHSArrayAccessDesc(x->Opnd(i)); + } +} + +void DoloopInfo::CreateArrayAccessDesc(BlockNode *block) { + StmtNode *stmt = block->GetFirst(); + while (stmt) { + switch (stmt->GetOpCode()) { + case OP_doloop: { + CHECK_FATAL(false, "CreateArrayAccessDesc: non-innermost doloop NYI"); + break; + } + case OP_block: { + CreateArrayAccessDesc(static_cast(stmt)); + break; + } + case OP_if: { + CreateRHSArrayAccessDesc(stmt->Opnd(0)); + IfStmtNode *ifstmtnode = static_cast(stmt); + if (ifstmtnode->GetThenPart()) + CreateArrayAccessDesc(ifstmtnode->GetThenPart()); + if (ifstmtnode->GetElsePart()) + CreateArrayAccessDesc(ifstmtnode->GetElsePart()); + break; + } + case OP_dowhile: + case OP_while: { + CreateRHSArrayAccessDesc(stmt->Opnd(0)); + CreateArrayAccessDesc(static_cast(stmt)->GetBody()); + break; + } + case OP_iassign: { + IassignNode *iass = static_cast(stmt); + if (iass->addrExpr->GetOpCode() == OP_array) { + BuildOneArrayAccessDesc(static_cast(iass->addrExpr), false/*isRHS*/); + } else { + hasPtrAccess = true; + } + CreateRHSArrayAccessDesc(iass->rhs); + break; + } + case OP_call: + case OP_callassigned: + case OP_icall: + case OP_icallassigned: { + hasCall = true; + // fall thru + } + default: { + for (size_t i = 0; i < stmt->NumOpnds(); i++) { + CreateRHSArrayAccessDesc(stmt->Opnd(i)); + } + break; + } + } + stmt = stmt->GetNext(); + } +} + +void LfoDepInfo::CreateArrayAccessDesc(MapleMap *doloopInfoMap) { + MapleMap::iterator mapit = doloopInfoMap->begin(); + for (; mapit != doloopInfoMap->end(); mapit++) { + DoloopInfo *doloopInfo = mapit->second; + if (!doloopInfo->children.empty()) { + continue; // only handling innermost doloops + } + doloopInfo->CreateArrayAccessDesc(doloopInfo->doloop->GetDoBody()); + if (DEBUGFUNC(lfoFunc->meFunc)) { + LogInfo::MapleLogger() << "Innermost Doloop:"; + if (doloopInfo->hasPtrAccess) { + LogInfo::MapleLogger() << " hasPtrAccess"; + } + if (doloopInfo->hasCall) { + LogInfo::MapleLogger() << " hasCall"; + } + LogInfo::MapleLogger() << std::endl; + doloopInfo->doloop->Dump(0); + LogInfo::MapleLogger() << "LHS arrays:\n"; + for (ArrayAccessDesc *arrAcc : doloopInfo->lhsArrays) { + arrAcc->theArray->Dump(0); + LogInfo::MapleLogger() << " subscripts:"; + for (SubscriptDesc *subs : arrAcc->subscriptVec) { + if (subs->tooMessy) { + LogInfo::MapleLogger() << " [messy]"; + } else { + LogInfo::MapleLogger() << " [" << subs->coeff << "*"; + LfoPart *lfopart = preEmit->GetLfoExprPart(subs->iv); + ScalarMeExpr *scalar = static_cast(lfopart->GetMeExpr()); + scalar->GetOst()->Dump(); + LogInfo::MapleLogger() << "+" << subs->additiveConst << "]"; + } + } + LogInfo::MapleLogger() << std::endl; + } + LogInfo::MapleLogger() << "RHS arrays: "; + for (ArrayAccessDesc *arrAcc : doloopInfo->rhsArrays) { + arrAcc->theArray->Dump(0); + LogInfo::MapleLogger() << " subscripts:"; + for (SubscriptDesc *subs : arrAcc->subscriptVec) { + if (subs->tooMessy) { + LogInfo::MapleLogger() << " [messy]"; + } else { + LogInfo::MapleLogger() << " [" << subs->coeff << "*"; + LfoPart *lfopart = preEmit->GetLfoExprPart(subs->iv); + ScalarMeExpr *scalar = static_cast(lfopart->GetMeExpr()); + scalar->GetOst()->Dump(); + LogInfo::MapleLogger() << "+" << subs->additiveConst << "]"; + } + } + LogInfo::MapleLogger() << std::endl; + } + LogInfo::MapleLogger() << std::endl; + } + } +} + +AnalysisResult *DoLfoDepTest::Run(MeFunction *func, MeFuncResultMgr *m, ModuleResultMgr*) { + LfoPreEmitter *preEmit = static_cast(m->GetAnalysisResult(MeFuncPhase_LFOPREEMIT, func)); + LfoFunction *lfoFunc = func->GetLfoFunc(); + MemPool *depTestMp = NewMemPool(); + LfoDepInfo *depInfo = depTestMp->New(depTestMp, lfoFunc, preEmit); + if (DEBUGFUNC(func)) { + LogInfo::MapleLogger() << "\n============== LFO_DEP_TEST =============" << '\n'; + } + depInfo->CreateDoloopInfo(func->GetMirFunc()->GetBody(), nullptr); + depInfo->CreateArrayAccessDesc(&depInfo->doloopInfoMap); + if (DEBUGFUNC(func)) { + LogInfo::MapleLogger() << "________________" << std::endl; + lfoFunc->meFunc->GetMirFunc()->Dump(); + } + return depInfo; +} +} // namespace maple diff --git a/src/mapleall/maple_me/src/lfo_loop_vec.cpp b/src/mapleall/maple_me/src/lfo_loop_vec.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0d7a54cf672656fad53fba1bdc9b913996fb6538 --- /dev/null +++ b/src/mapleall/maple_me/src/lfo_loop_vec.cpp @@ -0,0 +1,236 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include +#include +#include "me_option.h" +#include "mir_module.h" +#include "mir_lower.h" +#include "mir_builder.h" +#include "lfo_loop_vec.h" + +namespace maple { +// init class field +LoopVectorization::LoopVectorization(MemPool *localMp, LfoPreEmitter *lfoEmit) { + mirFunc = lfoEmit->GetMirFunction(); + lfoStmtParts = lfoEmit->GetLfoStmtMap(); + lfoExprParts = lfoEmit->GetLfoExprMap(); + lfoDoloops = lfoEmit->GetLfoDoLoops(); + codeMP = lfoEmit->GetCodeMP(); + localMP = localMp; +} + +MIRType* LoopVectorization::GenVecType(PrimType sPrimType, uint8 count) { + switch(sPrimType) { + case PTY_i32: { + if (count == 4) { + return GlobalTables::GetTypeTable().GetV4Int32(); + } + ASSERT(0, "NIY"); + } + default: + ASSERT(0, "NIY"); + } + return nullptr; +} + +// iterate tree node to wide scalar type to vector type +// following opcode can be vectorized directly +// +, -, *, &, |, <<, >>, compares, ~, ! +// iassign, iread, dassign, dread +void LoopVectorization::VectorizeNode(BaseNode *node, uint8 count) { + node->Dump(0); + switch (node->GetOpCode()) { + case OP_iassign: { + IassignNode *iassign = static_cast(node); + // change lsh type to vector type + MIRType &mirType = GetTypeFromTyIdx(iassign->GetTyIdx()); + CHECK_FATAL(mirType.GetKind() == kTypePointer, "iassign must have pointer type"); + MIRPtrType *ptrType = static_cast(&mirType); + MIRType *vecType = GenVecType(ptrType->GetPointedType()->GetPrimType(), count); + ASSERT(vecType != nullptr, "vector type should not be null"); + MIRType *pvecType = GlobalTables::GetTypeTable().GetOrCreatePointerType(*vecType, PTY_ptr); + // update lhs type + iassign->SetTyIdx(pvecType->GetTypeIndex()); + // visit rsh + VectorizeNode(iassign->GetRHS(), count); + break; + } + case OP_iread: { + IreadNode *ireadnode = static_cast(node); + // update primtype + MIRType *primVecType = GenVecType(ireadnode->GetPrimType(), count); + node->SetPrimType(primVecType->GetPrimType()); + // update tyidx + MIRType &mirType = GetTypeFromTyIdx(ireadnode->GetTyIdx()); + CHECK_FATAL(mirType.GetKind() == kTypePointer, "iread must have pointer type"); + MIRPtrType *ptrType = static_cast(&mirType); + MIRType *vecType = GenVecType(ptrType->GetPointedType()->GetPrimType(), count); + ASSERT(vecType != nullptr, "vector type should not be null"); + MIRType *pvecType = GlobalTables::GetTypeTable().GetOrCreatePointerType(*vecType, PTY_ptr); + // update lhs type + ireadnode->SetTyIdx(pvecType->GetTypeIndex()); + break; + } + // scalar related: widen type directly or unroll instructions + case OP_dassign: + case OP_dread: + ASSERT(0, "NIY"); + break; + // vector type support in opcode +, -, *, &, |, <<, >>, compares, ~, ! + case OP_add: + case OP_sub: + case OP_mul: + case OP_band: + case OP_bior: + case OP_shl: + case OP_lshr: + case OP_ashr: + // compare + case OP_eq: + case OP_ne: + case OP_lt: + case OP_gt: + case OP_le: + case OP_ge: + case OP_cmpg: + case OP_cmpl: { + ASSERT(node->IsBinaryNode(), "should be binarynode"); + BinaryNode *binNode = static_cast(node); + MIRType *vecType = GenVecType(node->GetPrimType(), count); + node->SetPrimType(vecType->GetPrimType()); // update primtype of binary op + VectorizeNode(binNode->Opnd(0), count); + VectorizeNode(binNode->Opnd(1), count); + break; + } + // unary op + case OP_bnot: + case OP_lnot: { + ASSERT(node->IsUnaryNode(), "should be unarynode"); + UnaryNode *unaryNode = static_cast(node); + MIRType *vecType = GenVecType(node->GetPrimType(), count); + node->SetPrimType(vecType->GetPrimType()); // update primtype of unary op + VectorizeNode(unaryNode->Opnd(0), count); + break; + } + default: + ASSERT(0, "can't be vectorized"); + } +} + +// update init/stride/upper nodes of doloop +// now hack code to widen const stride with value "vecFactor * original stride" +void LoopVectorization::widenDoloop(DoloopNode *doloop, LoopTransPlan *tp) { + ConstvalNode *iConst = static_cast(doloop->GetIncrExpr()); + MIRIntConst *incrConst = static_cast(iConst->GetConstVal()); + MIRType *typeInt = GlobalTables::GetTypeTable().GetInt32(); + MIRIntConst *newIncr = GlobalTables::GetIntConstTable().GetOrCreateIntConst(tp->vecFactor*incrConst->GetValue(), *typeInt); + ConstvalNode *newIncrNode = codeMP->New(PTY_i32, newIncr); + doloop->SetIncrExpr(newIncrNode); +} + +void LoopVectorization::VectorizeDoLoop(DoloopNode *doloop, LoopTransPlan *tp) { + // LogInfo::MapleLogger() << "\n**** dump doloopnode ****\n"; + // doloop->Dump(0); + // step 1: handle loop low/upper/stride + widenDoloop(doloop, tp); + + // step 2: widen vectorizable stmt in doloop + BlockNode *loopbody = doloop->GetDoBody(); + for (auto &stmt : loopbody->GetStmtNodes()) { + //if (stmt need to be vectoried in vectorized list) { + VectorizeNode(&stmt, tp->vecFactor); + //} else { + // stmt could not be widen directly, unroll instruction with vecFactor + // move value from vector type if need (need def-use information from plan) + //} + } +} + +// generate remainder loop +DoloopNode *LoopVectorization::GenEpilog(DoloopNode *doloop) { + // new doloopnode + // copy doloop body + // insert newdoloopnode after doloop + return doloop; +} + +// generate prolog/epilog blocknode if needed +// return doloop need to be vectorized +DoloopNode *LoopVectorization::PrepareDoloop(DoloopNode *doloop, LoopTransPlan *tp) { + bool needPeel = false; + // generate peel code if need + if (needPeel) { + // peel code here + // udpate loop lower of doloop if need + // copy loop body + } + if (tp->needEpilog) { + // copy doloop as doloop's next + // update loop upper bound + } + return doloop; +} + +void LoopVectorization::TransformLoop() { + for (auto it : *lfoDoloops) { + // hack code here, tranform plan is store in map + LoopTransPlan *tplan = localMP->New(); + tplan->vecFactor = 4; + // generate prilog/epilog according to vectorization plan + DoloopNode *vecDoloopNode = PrepareDoloop(it, tplan); + VectorizeDoLoop(vecDoloopNode, tplan); + } +} + +void LoopVectorization::Perform() { + // step 1: build dependence graph for each loop + // step 2: collect information, legality check and generate transform plan + // transform plan map to each doloop + // step 3: do transform + TransformLoop(); +} + +AnalysisResult *DoLfoLoopVectorization::Run(MeFunction *func, MeFuncResultMgr *m, ModuleResultMgr*) { + // generate lfo IR + LfoPreEmitter *lfoemit = static_cast(m->GetAnalysisResult(MeFuncPhase_LFOPREEMIT, func)); + CHECK_NULL_FATAL(lfoemit); + + LogInfo::MapleLogger() << "\n**** Before loop vectorization phase ****\n"; + MIRFunction *mirfunction = func->GetMirFunc(); + mirfunction->Dump(false); + + // run loop vectorization + if (DEBUGFUNC(func)) { + LoopVectorization loopVec(NewMemPool(), lfoemit); + loopVec.Perform(); + } + + // invalid analysis result + m->InvalidAllResults(); + + if (DEBUGFUNC(func)) { + LogInfo::MapleLogger() << "\n**** After loop vectorization phase ****\n"; + mirfunction->Dump(false); + } + + // lower lfoIR for other mapleme phases + MIRLower mirlowerer(func->GetMIRModule(), mirfunction); + mirlowerer.SetLowerME(); + mirlowerer.SetLowerExpandArray(); + mirlowerer.LowerFunc(*mirfunction); + + return nullptr; +} +} // namespace maple diff --git a/src/mapleall/maple_me/src/lfo_pre_emit.cpp b/src/mapleall/maple_me/src/lfo_pre_emit.cpp index 2ab7b4953da7946c3b868725e547b0d66f8aeaa8..8c278dee79c564e997bff4f738ee54865a422213 100644 --- a/src/mapleall/maple_me/src/lfo_pre_emit.cpp +++ b/src/mapleall/maple_me/src/lfo_pre_emit.cpp @@ -16,7 +16,6 @@ #include "me_irmap.h" #include "lfo_function.h" #include "lfo_pre_emit.h" -#include "mir_lower.h" #include "constantfold.h" namespace maple { @@ -626,6 +625,7 @@ uint32 LfoPreEmitter::Raise2LfoWhile(uint32 curj, BlockNode *curblk) { BlockNode *Dobody = nullptr; if (whileInfo->canConvertDoloop) { // emit doloop DoloopNode *doloopnode = EmitLfoDoloop(curbb, curblk, whileInfo); + lfoDoloops.push_back(doloopnode); ++curj; Dobody = static_cast(doloopnode->GetDoBody()); } else { // emit while loop @@ -780,11 +780,12 @@ AnalysisResult *DoLfoPreEmission::Run(MeFunction *func, MeFuncResultMgr *m, Modu while (i < func->GetCfg()->GetAllBBs().size()) { i = emitter->EmitLfoBB(i, curblk); } +#if 0 // invalid cfg information only in lfo phase - // m->InvalidAnalysisResult(MeFuncPhase_MECFG, func); - m->InvalidAllResults(); + m->InvalidAnalysisResult(MeFuncPhase_MECFG, func); func->SetMeSSATab(nullptr); func->SetIRMap(nullptr); +#endif func->SetLfo(false); ConstantFold cf(func->GetMIRModule()); @@ -795,7 +796,7 @@ AnalysisResult *DoLfoPreEmission::Run(MeFunction *func, MeFuncResultMgr *m, Modu mirfunction->Dump(false); } -#if 1 // use this only if directly feeding to mainopt +#if 0 // use this only if directly feeding to mainopt MIRLower mirlowerer(func->GetMIRModule(), mirfunction); mirlowerer.SetLowerME(); mirlowerer.SetLowerExpandArray(); diff --git a/src/mapleall/maple_me/src/me_cfg.cpp b/src/mapleall/maple_me/src/me_cfg.cpp index 692e8eec844afa3081ad14120c33aea1f459ce60..5082622e05cdef2b5ac3fbf53b0f3c4ccfb1e3b0 100644 --- a/src/mapleall/maple_me/src/me_cfg.cpp +++ b/src/mapleall/maple_me/src/me_cfg.cpp @@ -21,6 +21,7 @@ #include "mir_builder.h" #include "me_critical_edge.h" #include "me_loop_canon.h" +#include "mir_lower.h" namespace { constexpr int kFuncNameLenLimit = 80; @@ -1736,7 +1737,17 @@ void MeCFG::BuildSCC() { SCCTopologicalSort(sccNodes); } -AnalysisResult *MeDoMeCfg::Run(MeFunction *func, MeFuncResultMgr*, ModuleResultMgr*) { +AnalysisResult *MeDoMeCfg::Run(MeFunction *func, MeFuncResultMgr *m, ModuleResultMgr*) { + if (!func->IsLfo() && func->GetLfoFunc() != nullptr) { + m->InvalidAllResults(); + func->SetMeSSATab(nullptr); + func->SetIRMap(nullptr); + + MIRLower mirlowerer(func->GetMIRModule(), func->GetMirFunc()); + mirlowerer.SetLowerME(); + mirlowerer.SetLowerExpandArray(); + mirlowerer.LowerFunc(*func->GetMirFunc()); + } MemPool *meCfgMp = NewMemPool(); MeCFG *theCFG = meCfgMp->New(meCfgMp, *func); func->SetTheCfg(theCFG); diff --git a/src/mapleall/maple_me/src/me_function.cpp b/src/mapleall/maple_me/src/me_function.cpp index 7b88f4b65a02797327b2640a094c17fbe503ac8f..16cbfabd4b39f9cbc2cc3cde164a08d95d31295d 100644 --- a/src/mapleall/maple_me/src/me_function.cpp +++ b/src/mapleall/maple_me/src/me_function.cpp @@ -69,6 +69,9 @@ void MeFunction::DumpFunction() const { } void MeFunction::DumpFunctionNoSSA() const { + if (isLfo) { + return; + } auto eIt = theCFG->valid_end(); for (auto bIt = theCFG->valid_begin(); bIt != eIt; ++bIt) { auto *bb = *bIt; diff --git a/src/mapleall/maple_me/src/me_hdse.cpp b/src/mapleall/maple_me/src/me_hdse.cpp index ab1fe9cde7681f38ab0c7b4e7a7d436bc24cfe75..18ee4e8eaea44c9c9238b8da1f4457e085e75097 100644 --- a/src/mapleall/maple_me/src/me_hdse.cpp +++ b/src/mapleall/maple_me/src/me_hdse.cpp @@ -59,7 +59,8 @@ void MeHDSE::ProcessWhileInfos() { } MapleMap::iterator it = lfoFunc->label2WhileInfo.begin(); for (; it != lfoFunc->label2WhileInfo.end(); it++) { - if (it->second->initExpr != nullptr && it->second->initExpr->GetMeOp() != maple::kMeOpConst) { + if (it->second->initExpr != nullptr && + (it->second->initExpr->GetMeOp() == maple::kMeOpVar || it->second->initExpr->GetMeOp() == maple::kMeOpReg)) { workList.push_front(it->second->initExpr); } } diff --git a/src/mapleall/maple_me/src/me_phase_manager.cpp b/src/mapleall/maple_me/src/me_phase_manager.cpp index 4fb95a025abb4f115616d290fe539bc42e46719c..d8bd3e237dc9c2f85f3e88b1c9c10929a22491f2 100644 --- a/src/mapleall/maple_me/src/me_phase_manager.cpp +++ b/src/mapleall/maple_me/src/me_phase_manager.cpp @@ -75,6 +75,8 @@ #include "lfo_pre_emit.h" #include "lfo_iv_canon.h" #include "cfg_opt.h" +#include "lfo_dep_test.h" +#include "lfo_loop_vec.h" #define JAVALANG (mirModule.IsJavaModule()) diff --git a/src/mapleall/maple_me/src/me_prop.cpp b/src/mapleall/maple_me/src/me_prop.cpp index a0aef45a74b7ef814fb2b2cfc32999d274a3c4ad..8981b39f33fc734dcbe629570b51e36106bfbe27 100644 --- a/src/mapleall/maple_me/src/me_prop.cpp +++ b/src/mapleall/maple_me/src/me_prop.cpp @@ -34,9 +34,9 @@ const std::set propWhiteList { namespace maple { AnalysisResult *MeDoMeProp::Run(MeFunction *func, MeFuncResultMgr *m, ModuleResultMgr*) { CHECK_NULL_FATAL(func); - auto *dom = static_cast(m->GetAnalysisResult(MeFuncPhase_DOMINANCE, func)); + auto *dom = static_cast(m->GetAnalysisResult(MeFuncPhase_DOMINANCE, func, true)); CHECK_NULL_FATAL(dom); - auto *hMap = static_cast(m->GetAnalysisResult(MeFuncPhase_IRMAPBUILD, func)); + auto *hMap = static_cast(m->GetAnalysisResult(MeFuncPhase_IRMAPBUILD, func, true)); CHECK_NULL_FATAL(hMap); bool propIloadRef = MeOption::propIloadRef; if (!propIloadRef) { diff --git a/src/mapleall/maple_phase/include/phase.h b/src/mapleall/maple_phase/include/phase.h index c513dd16139732137571a0d6c7a66bf10b964078..62e95e97b664f196c62f16bcbb39b62c2c8beac7 100644 --- a/src/mapleall/maple_phase/include/phase.h +++ b/src/mapleall/maple_phase/include/phase.h @@ -44,7 +44,7 @@ class AnalysisResult { memPool = nullptr; } - private: + protected: MemPool *memPool; }; @@ -120,7 +120,7 @@ class AnalysisResultManager { } // analysis result use global mempool and allocator - AnalysisResult *GetAnalysisResult(PhaseIDT id, UnitIR *ir) { + AnalysisResult *GetAnalysisResult(PhaseIDT id, UnitIR *ir, bool verbose = false) { ASSERT(ir != nullptr, "ir is null in AnalysisResultManager::GetAnalysisResult"); std::pair key = std::make_pair(id, ir); if (analysisResults.find(key) != analysisResults.end()) { @@ -132,6 +132,9 @@ class AnalysisResultManager { return nullptr; } + if (verbose) { + LogInfo::MapleLogger() << " ++ depended phase [ " << anaPhase->PhaseName() << " ] invoked\n"; + } AnalysisResult *result = anaPhase->Run(ir, this); // allow invoke phases whose return value is nullptr using GetAnalysisResult if (result == nullptr) {