diff --git a/src/mapleall/maple_driver/defs/phases.def b/src/mapleall/maple_driver/defs/phases.def index b495f817c8a0d00bce77549e3685957cc21cde14..227f99f7165d4872c8b33be652c5531af030b7c9 100644 --- a/src/mapleall/maple_driver/defs/phases.def +++ b/src/mapleall/maple_driver/defs/phases.def @@ -38,6 +38,7 @@ ADD_PHASE("hprop", CLANG && MeOption::optLevel >= 3) ADD_PHASE("hdse", CLANG && MeOption::optLevel >= 3) ADD_PHASE("lfopreemit", CLANG && MeOption::optLevel >= 3) ADD_PHASE("deptest", CLANG && MeOption::optLevel >= 3) +ADD_PHASE("lfoloopvec", CLANG && MeOption::optLevel >= 3) ADD_PHASE("mecfgbuild", MeOption::optLevel >= 2 || JAVALANG) ADD_PHASE("cfgOpt", CLANG && MeOption::optLevel >= 2) ADD_PHASE("bypatheh", JAVALANG && MeOption::optLevel >= 2) diff --git a/src/mapleall/maple_ir/include/global_tables.h b/src/mapleall/maple_ir/include/global_tables.h index ad419cd59cd4d59b911bf8ad9b72d39f81ca3bf2..e2c0c19565548294da1f70d2c7f6dc381986d5f9 100644 --- a/src/mapleall/maple_ir/include/global_tables.h +++ b/src/mapleall/maple_ir/include/global_tables.h @@ -315,6 +315,61 @@ class TypeTable { ASSERT(PTY_unknown < typeTable.size(), "array index out of range"); return typeTable.at(PTY_unknown); } + // vector type + MIRType *GetV4Int32() const { + ASSERT(PTY_v4i32 < typeTable.size(), "array index out of range"); + return typeTable.at(PTY_v4i32); + } + + MIRType *GetV2Int32() const { + ASSERT(PTY_v2i32 < typeTable.size(), "array index out of range"); + return typeTable.at(PTY_v2i32); + } + + MIRType *GetV4UInt32() const { + ASSERT(PTY_v4u32 < typeTable.size(), "array index out of range"); + return typeTable.at(PTY_v4u32); + } + MIRType *GetV2UInt32() const { + ASSERT(PTY_v2u32 < typeTable.size(), "array index out of range"); + return typeTable.at(PTY_v2u32); + } + + MIRType *GetV4Int16() const { + ASSERT(PTY_v4i16 < typeTable.size(), "array index out of range"); + return typeTable.at(PTY_v4i16); + } + MIRType *GetV8Int16() const { + ASSERT(PTY_v8i16 < typeTable.size(), "array index out of range"); + return typeTable.at(PTY_v8i16); + } + + MIRType *GetV4UInt16() const { + ASSERT(PTY_v4u16 < typeTable.size(), "array index out of range"); + return typeTable.at(PTY_v4u16); + } + MIRType *GetV8UInt16() const { + ASSERT(PTY_v8u16 < typeTable.size(), "array index out of range"); + return typeTable.at(PTY_v8u16); + } + + MIRType *GetV8Int8() const { + ASSERT(PTY_v8i8 < typeTable.size(), "array index out of range"); + return typeTable.at(PTY_v8i8); + } + MIRType *GetV16Int8() const { + ASSERT(PTY_v16i8 < typeTable.size(), "array index out of range"); + return typeTable.at(PTY_v16i8); + } + + MIRType *GetV8UInt8() const { + ASSERT(PTY_v8u8 < typeTable.size(), "array index out of range"); + return typeTable.at(PTY_v8u8); + } + MIRType *GetV16UInt8() const { + ASSERT(PTY_v16u8 < typeTable.size(), "array index out of range"); + return typeTable.at(PTY_v16u8); + } // Get or Create derived types. MIRType *GetOrCreatePointerType(const TyIdx &pointedTyIdx, PrimType primType = PTY_ptr, diff --git a/src/mapleall/maple_me/BUILD.gn b/src/mapleall/maple_me/BUILD.gn index 48a2aa2c6a144d88d36e7bba3bf49e84fa7f5c72..b8cc176356af2f6f9270fb1cf590bdebecd3a400 100755 --- a/src/mapleall/maple_me/BUILD.gn +++ b/src/mapleall/maple_me/BUILD.gn @@ -97,6 +97,7 @@ src_libmplme = [ "src/lfo_pre_emit.cpp", "src/lfo_iv_canon.cpp", "src/lfo_dep_test.cpp", + "src/lfo_loop_vec.cpp", "src/me_value_range_prop.cpp", "src/cfg_opt.cpp", ] diff --git a/src/mapleall/maple_me/include/lfo_dep_test.h b/src/mapleall/maple_me/include/lfo_dep_test.h index 0769abe90034c742283ca9c2c222fbe7562a5a64..f2ef19977158e08ccc65e0ed7d508a53914a9ea2 100644 --- a/src/mapleall/maple_me/include/lfo_dep_test.h +++ b/src/mapleall/maple_me/include/lfo_dep_test.h @@ -18,27 +18,45 @@ #include "lfo_function.h" #include "lfo_pre_emit.h" +#include "orig_symbol.h" #include "me_phase.h" +#include "me_ir.h" +#include "dominance.h" namespace maple { class LfoDepInfo; class SubscriptDesc{ public: - DreadNode *iv = nullptr; // the variable - int64 coeff = 1; // coefficient of the variable + MeExpr *subscriptX; + DreadNode *iv = nullptr; // the variable + int64 coeff = 1; // coefficient of the variable int64 additiveConst = 0; - bool tooMessy = false;; // too complicated to analyze + bool tooMessy = false;; // too complicated to analyze + bool loopInvariant = false; // loop invariant w.r.t. closest nesting loop - SubscriptDesc() = default; + public: + SubscriptDesc(MeExpr *x) : subscriptX(x) {} }; class ArrayAccessDesc { public: ArrayNode *theArray; + OriginalSt *arrayOst = nullptr; MapleVector subscriptVec; // describe the subscript of each array dimension - ArrayAccessDesc(MapleAllocator *alloc, ArrayNode *arr) : theArray(arr), subscriptVec(alloc->Adapter()) {} + public: + ArrayAccessDesc(MapleAllocator *alloc, ArrayNode *arr, OriginalSt *arryOst) : theArray(arr), arrayOst(arryOst), subscriptVec(alloc->Adapter()) {} +}; + +class DepTestPair { + public: + std::pair depTestPair; // based on indices in lhsArrays and rhsArrays + bool dependent = false; + bool unknownDist = false; // if dependent + int64 depDist = 0; // if unknownDist is false + public: + DepTestPair(size_t i, size_t j) : depTestPair(i, j) {} }; class DoloopInfo { @@ -50,42 +68,57 @@ class DoloopInfo { MapleVector children; // for the nested doloops in program order MapleVector lhsArrays; // each element represents an array assign MapleVector rhsArrays; // each element represents an array read + BB *doloopBB = nullptr; // the start BB for the doloop body bool hasPtrAccess = false; // give up dep testing if true bool hasCall = false; // give up dep testing if true + bool hasScalarAssign = false; // give up dep testing if true + bool hasMayDef = false; // give up dep testing if true + MapleVector outputDepTestList; // output dependence only + MapleVector flowDepTestList; // include both true and anti dependences - DoloopInfo(MapleAllocator *allc, LfoDepInfo *depinfo, DoloopNode *doloop, DoloopInfo *prnt) - : alloc(allc), - depInfo(depinfo), - doloop(doloop), - parent(prnt), - children(alloc->Adapter()), - lhsArrays(alloc->Adapter()), - rhsArrays(alloc->Adapter()) {} + public: + DoloopInfo(MapleAllocator *allc, LfoDepInfo *depinfo, DoloopNode *doloop, DoloopInfo *prnt) : + alloc(allc), + depInfo(depinfo), + doloop(doloop), + parent(prnt), + children(alloc->Adapter()), + lhsArrays(alloc->Adapter()), + rhsArrays(alloc->Adapter()), + outputDepTestList(alloc->Adapter()), + flowDepTestList(alloc->Adapter()) {} ~DoloopInfo() = default; + bool IsLoopInvariant(MeExpr *x); SubscriptDesc *BuildOneSubscriptDesc(BaseNode *subsX); - void BuildOneArrayAccessDesc(ArrayNode *arr, bool isRHS); + ArrayAccessDesc *BuildOneArrayAccessDesc(ArrayNode *arr, bool isRHS); void CreateRHSArrayAccessDesc(BaseNode *x); void CreateArrayAccessDesc(BlockNode *block); + void CreateDepTestLists(); + void TestDependences(MapleVector *depTestList, bool bothLHS); + bool Parallelizable(); }; class LfoDepInfo : public AnalysisResult { public: MapleAllocator alloc; LfoFunction *lfoFunc; + Dominance *dom; LfoPreEmitter *preEmit; MapleVector outermostDoloopInfoVec; // outermost doloops' DoloopInfo in program order MapleMap doloopInfoMap; - LfoDepInfo(MemPool *mempool, LfoFunction *f, LfoPreEmitter *preemit) - : AnalysisResult(mempool), - alloc(mempool), - lfoFunc(f), + public: + LfoDepInfo(MemPool *mempool, LfoFunction *f, Dominance *dm, LfoPreEmitter *preemit) + : AnalysisResult(mempool), + alloc(mempool), + lfoFunc(f), + dom(dm), preEmit(preemit), outermostDoloopInfoVec(alloc.Adapter()), doloopInfoMap(alloc.Adapter()) {} ~LfoDepInfo() = default; void CreateDoloopInfo(BlockNode *block, DoloopInfo *parent); - void CreateArrayAccessDesc(MapleMap *doloopInfoVec); + void PerformDepTest(); std::string PhaseName() const { return "deptest"; } }; diff --git a/src/mapleall/maple_me/include/lfo_loop_vec.h b/src/mapleall/maple_me/include/lfo_loop_vec.h new file mode 100644 index 0000000000000000000000000000000000000000..05b5dbbbb0995b481af54415c2f0a166562fcba3 --- /dev/null +++ b/src/mapleall/maple_me/include/lfo_loop_vec.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLE_ME_INCLUDE_LOOP_VEC_H +#define MAPLE_ME_INCLUDE_LOOP_VEC_H +#include "me_function.h" +#include "me_irmap.h" +#include "me_phase.h" +#include "me_ir.h" +#include "lfo_pre_emit.h" +#include "lfo_dep_test.h" + +namespace maple { + +class LoopBound { +public: + LoopBound() : lowNode(nullptr), upperNode(nullptr), incrNode(nullptr) {}; + LoopBound(BaseNode *nlow, BaseNode *nup, BaseNode *nincr) : lowNode(nlow), upperNode(nup), incrNode(nincr) {} + BaseNode *lowNode; // low bound node + BaseNode *upperNode; // uppder bound node + BaseNode *incrNode; // incr node +}; + +// tranform plan for current loop +class LoopTransPlan { +public: + LoopTransPlan(MemPool *mp, MemPool *localmp) : vBound(nullptr), eBound(nullptr), + codeMP(mp), localMP(localmp) { + vecFactor = 1; + } + ~LoopTransPlan() = default; + LoopBound *vBound; // bound of vectorized part + // list of vectorizable stmtnodes in current loop, others can't be vectorized + uint8_t vecLanes; // number of lanes of vector type in current loop + uint8_t vecFactor; // number of loop iterations combined to one vectorized loop iteration + // generate epilog if eBound is not null + LoopBound *eBound; // bound of Epilog part + MemPool *codeMP; // use to generate new bound node + MemPool *localMP; // use to generate local info + + // function + void Generate(DoloopNode *, DoloopInfo*); + void GenerateBoundInfo(DoloopNode *, DoloopInfo *); +}; + +class LoopVectorization { + public: + LoopVectorization(MemPool *localmp, LfoPreEmitter *lfoEmit, LfoDepInfo *depinfo) : + localAlloc(localmp), + vecPlans(localAlloc.Adapter()) { + mirFunc = lfoEmit->GetMirFunction(); + lfoStmtParts = lfoEmit->GetLfoStmtMap(); + lfoExprParts = lfoEmit->GetLfoExprMap(); + depInfo = depinfo; + codeMP = lfoEmit->GetCodeMP(); + codeMPAlloc = lfoEmit->GetCodeMPAlloc(); + localMP = localmp; + } + ~LoopVectorization() = default; + + void Perform(); + void TransformLoop(); + void VectorizeDoLoop(DoloopNode *, LoopTransPlan*); + void VectorizeNode(BaseNode *, uint8_t); + MIRType *GenVecType(PrimType, uint8_t); + StmtNode *GenIntrinNode(BaseNode *scalar, PrimType vecPrimType); + bool CanVectorizeStmt(StmtNode *stmt); + void widenDoloop(DoloopNode *doloop, LoopTransPlan *); + DoloopNode *PrepareDoloop(DoloopNode *, LoopTransPlan *); + DoloopNode *GenEpilog(DoloopNode *); + MemPool *GetLocalMp() { return localMP; } + MapleMap *GetVecPlans() { return &vecPlans; } + + private: + MIRFunction *mirFunc; + MapleMap *lfoStmtParts; // point to lfoStmtParts of lfopreemit, map lfoinfo for StmtNode, key is stmtID + MapleMap *lfoExprParts; // point to lfoexprparts of lfopreemit, map lfoinfo for exprNode, key is mirnode + LfoDepInfo *depInfo; + MemPool *codeMP; // point to mirfunction codeMp + MapleAllocator *codeMPAlloc; + MemPool *localMP; // local mempool + MapleAllocator localAlloc; + MapleMap vecPlans; // each vectoriable loopnode has its best vectorization plan +}; + +class DoLfoLoopVectorization: public MeFuncPhase { + public: + static bool enableDebug; + static bool enableDump; + explicit DoLfoLoopVectorization(MePhaseID id) : MeFuncPhase(id) {} + ~DoLfoLoopVectorization() = default; + + AnalysisResult *Run(MeFunction *func, MeFuncResultMgr *m, ModuleResultMgr*) override; + std::string PhaseName() const override { + return "lfoloopvec"; + } + + private: +}; +} // namespace maple +#endif // MAPLE_ME_INCLUDE_LOOP_VEC_H diff --git a/src/mapleall/maple_me/include/lfo_pre_emit.h b/src/mapleall/maple_me/include/lfo_pre_emit.h index 0ab6ff76d081cdccb20b1e8fe93609d8fe606f58..481ed0c83f0bc9188c7c00ade21be7c92ce233dc 100644 --- a/src/mapleall/maple_me/include/lfo_pre_emit.h +++ b/src/mapleall/maple_me/include/lfo_pre_emit.h @@ -15,6 +15,7 @@ #ifndef MAPLE_ME_INCLUDE_LFO_PRE_EMIT_H #define MAPLE_ME_INCLUDE_LFO_PRE_EMIT_H +#include "mir_nodes.h" #include "me_irmap.h" #include "me_phase.h" @@ -28,7 +29,7 @@ class LfoPreEmitter : public AnalysisResult { MapleAllocator *codeMPAlloc; MemPool *lfoMP; MapleAllocator lfoMPAlloc; - MapleMap lfoStmtParts; // map lfoinfo for StmtNode, key is stmtID + MapleMap lfoStmtParts; // map lfoinfo for StmtNode, key is stmtID MapleMap lfoExprParts; // map lfoinfor for exprNode, key is mirnode MeCFG *cfg; @@ -93,6 +94,11 @@ class LfoPreEmitter : public AnalysisResult { LfoPart *lfopart = lfoStmtParts[stmtID]; return lfopart->mestmt; } + MIRFunction *GetMirFunction() { return mirFunc; } + MemPool *GetCodeMP() { return codeMP; } + MapleAllocator* GetCodeMPAlloc() { return codeMPAlloc; } + MapleMap *GetLfoStmtMap() { return &lfoStmtParts; } + MapleMap *GetLfoExprMap() { return &lfoExprParts; } }; /* emit ir to specified file */ diff --git a/src/mapleall/maple_me/include/me_phases.def b/src/mapleall/maple_me/include/me_phases.def index 58a724938413b0cb2b1e44924d65572a2283cce0..09a05c424f605c0ef81bef1c1b725e09f34029b3 100644 --- a/src/mapleall/maple_me/include/me_phases.def +++ b/src/mapleall/maple_me/include/me_phases.def @@ -55,6 +55,7 @@ FUNCAPHASE(MeFuncPhase_MECFG, MeDoMeCfg) FUNCTPHASE(MeFuncPhase_LFOINJECTIV, DoLfoInjectIV) FUNCAPHASE(MeFuncPhase_LFOPREEMIT, DoLfoPreEmission) FUNCTPHASE(MeFuncPhase_LFOIVCANON, DoLfoIVCanon) +FUNCTPHASE(MeFuncPhase_LFOLOOPVEC, DoLfoLoopVectorization) FUNCAPHASE(MeFuncPhase_MECFGOPT, DoCfgOpt) FUNCAPHASE(MeFuncPhase_LFODEPTEST, DoLfoDepTest) #if MIR_JAVA diff --git a/src/mapleall/maple_me/include/orig_symbol.h b/src/mapleall/maple_me/include/orig_symbol.h index d1b21ee34879f83b1a67125784c131de2feced96..2ae92ec298738bf7ee5c7b2b9bc6783b86683fc1 100644 --- a/src/mapleall/maple_me/include/orig_symbol.h +++ b/src/mapleall/maple_me/include/orig_symbol.h @@ -213,7 +213,15 @@ class OriginalSt { return false; } MIRType *mirtype = GlobalTables::GetTypeTable().GetTypeFromTyIdx(tyIdx); - return IsPrimitiveInteger(mirtype->GetPrimType()) && (mirtype->GetKind() != kTypeBitField); + if (IsPrimitiveInteger(mirtype->GetPrimType()) && (mirtype->GetKind() != kTypeBitField)) { + // additional check using MIRSymbol's tyIdx + if (IsSymbolOst()) { + return IsPrimitiveInteger(GetMIRSymbol()->GetType()->GetPrimType()); + } + return true; + } else { + return false; + } } MIRType *GetType() const { diff --git a/src/mapleall/maple_me/src/lfo_dep_test.cpp b/src/mapleall/maple_me/src/lfo_dep_test.cpp index 9eecf4d4731ad4402d919d2b585ef4c1f328a306..5d708cfeb6234a983b96e8e40f1e5c33a3473c7c 100644 --- a/src/mapleall/maple_me/src/lfo_dep_test.cpp +++ b/src/mapleall/maple_me/src/lfo_dep_test.cpp @@ -29,6 +29,9 @@ void LfoDepInfo::CreateDoloopInfo(BlockNode *block, DoloopInfo *parent) { } else { outermostDoloopInfoVec.push_back(doloopInfo); } + LfoPart *lfopart = preEmit->GetLfoStmtPart(doloop->GetStmtID()); + MeStmt *meStmt = lfopart->GetMeStmt(); + doloopInfo->doloopBB = meStmt->GetBB(); CreateDoloopInfo(doloop->GetDoBody(), doloopInfo); break; } @@ -58,8 +61,62 @@ void LfoDepInfo::CreateDoloopInfo(BlockNode *block, DoloopInfo *parent) { } } +// check if x is loop-invariant w.r.t. the doloop +bool DoloopInfo::IsLoopInvariant(MeExpr *x) { + if (x == nullptr) { + return true; + } + switch (x->GetMeOp()) { + case kMeOpAddrof: + case kMeOpAddroffunc: + case kMeOpConst: + case kMeOpConststr: + case kMeOpConststr16: + case kMeOpSizeoftype: + case kMeOpFieldsDist: return true; + case kMeOpVar: + case kMeOpReg: { + ScalarMeExpr *scalar = static_cast(x); + BB *defBB = scalar->DefByBB(); + return defBB == nullptr || (defBB != doloopBB && depInfo->dom->Dominate(*defBB, *doloopBB)); + } + case kMeOpIvar: { + IvarMeExpr *ivar = static_cast(x); + if (!IsLoopInvariant(ivar->GetBase())) { + return false; + } + BB *defBB = ivar->GetMu()->DefByBB(); + return defBB == nullptr || (defBB != doloopBB && depInfo->dom->Dominate(*defBB, *doloopBB)); + } + case kMeOpOp: { + OpMeExpr *opexp = static_cast(x); + return IsLoopInvariant(opexp->GetOpnd(0)) && + IsLoopInvariant(opexp->GetOpnd(1)) && + IsLoopInvariant(opexp->GetOpnd(2)); + } + case kMeOpNary: { + NaryMeExpr *opexp = static_cast(x); + for (uint32 i = 0; i < opexp->GetNumOpnds(); i++) { + if (!IsLoopInvariant(opexp->GetOpnd(i))) { + return false; + } + } + return true; + } + default: + break; + } + return false; +} + SubscriptDesc *DoloopInfo::BuildOneSubscriptDesc(BaseNode *subsX) { - SubscriptDesc *subsDesc = alloc->GetMemPool()->New(); + LfoPart *lfopart = depInfo->preEmit->GetLfoExprPart(subsX); + MeExpr *meExpr = lfopart->GetMeExpr(); + SubscriptDesc *subsDesc = alloc->GetMemPool()->New(meExpr); + if (IsLoopInvariant(meExpr)) { + subsDesc->loopInvariant = true; + return subsDesc; + } Opcode op = subsX->GetOpCode(); BaseNode *mainTerm = nullptr; if (op != OP_add && op != OP_sub) { @@ -106,7 +163,11 @@ SubscriptDesc *DoloopInfo::BuildOneSubscriptDesc(BaseNode *subsX) { return subsDesc; } subsDesc->coeff = static_cast(mirconst)->GetValue(); - } + if (subsDesc->coeff < 0) { + subsDesc->tooMessy = true; + return subsDesc; + } + } // process varNode if (varNode->GetOpCode() == OP_dread) { DreadNode *dnode = static_cast(varNode); @@ -119,7 +180,7 @@ SubscriptDesc *DoloopInfo::BuildOneSubscriptDesc(BaseNode *subsX) { return subsDesc; } -void DoloopInfo::BuildOneArrayAccessDesc(ArrayNode *arr, bool isRHS) { +ArrayAccessDesc *DoloopInfo::BuildOneArrayAccessDesc(ArrayNode *arr, bool isRHS) { #if 0 MIRType *atype = arr->GetArrayType(GlobalTables::GetTypeTable()); ASSERT(atype->GetKind() == kTypeArray, "type was wrong"); @@ -129,7 +190,24 @@ void DoloopInfo::BuildOneArrayAccessDesc(ArrayNode *arr, bool isRHS) { #else size_t dim = arr->NumOpnds() - 1; #endif - ArrayAccessDesc *arrDesc = alloc->GetMemPool()->New(alloc, arr); + // determine arrayOst + LfoPart *lfopart = depInfo->preEmit->GetLfoExprPart(arr); + OpMeExpr *arrayMeExpr = static_cast(lfopart->GetMeExpr()); + OriginalSt *arryOst = nullptr; + if (arrayMeExpr->GetOpnd(0)->GetMeOp() == kMeOpAddrof) { + AddrofMeExpr *addrof = static_cast(arrayMeExpr->GetOpnd(0)); + arryOst = depInfo->lfoFunc->meFunc->GetMeSSATab()->GetOriginalStFromID(addrof->GetOstIdx()); + + } else { + ScalarMeExpr *scalar = dynamic_cast(arrayMeExpr->GetOpnd(0)); + if (scalar) { + arryOst = scalar->GetOst(); + } else { + hasPtrAccess = true; + return nullptr; + } + } + ArrayAccessDesc *arrDesc = alloc->GetMemPool()->New(alloc, arr, arryOst); if (isRHS) { rhsArrays.push_back(arrDesc); } else { @@ -139,6 +217,7 @@ void DoloopInfo::BuildOneArrayAccessDesc(ArrayNode *arr, bool isRHS) { SubscriptDesc *subs = BuildOneSubscriptDesc(arr->GetIndex(i)); arrDesc->subscriptVec.push_back(subs); } + return arrDesc; } void DoloopInfo::CreateRHSArrayAccessDesc(BaseNode *x) { @@ -180,13 +259,35 @@ void DoloopInfo::CreateArrayAccessDesc(BlockNode *block) { case OP_iassign: { IassignNode *iass = static_cast(stmt); if (iass->addrExpr->GetOpCode() == OP_array) { - BuildOneArrayAccessDesc(static_cast(iass->addrExpr), false /* isRHS */); + ArrayAccessDesc *adesc = BuildOneArrayAccessDesc(static_cast(iass->addrExpr), false/*isRHS*/); + if (adesc == nullptr) { + hasMayDef = true; + } else { + // check if the chi list has only the same array + LfoPart *lfopart = depInfo->preEmit->GetLfoStmtPart(iass->GetStmtID()); + IassignMeStmt *iassMeStmt = static_cast(lfopart->GetMeStmt()); + MapleMap *chilist = iassMeStmt->GetChiList(); + MapleMap::iterator chiit = chilist->begin(); + for (; chiit != chilist->end(); chiit++) { + OriginalSt *chiOst = depInfo->lfoFunc->meFunc->GetMeSSATab()->GetOriginalStFromID(chiit->first); + if (!chiOst->IsSameSymOrPreg(adesc->arrayOst)) { + hasMayDef = true; + break; + } + } + } } else { hasPtrAccess = true; } CreateRHSArrayAccessDesc(iass->rhs); break; } + case OP_dassign: + case OP_regassign: { + hasScalarAssign = true; + CreateRHSArrayAccessDesc(stmt->Opnd(0)); + break; + } case OP_call: case OP_callassigned: case OP_icall: @@ -206,8 +307,105 @@ void DoloopInfo::CreateArrayAccessDesc(BlockNode *block) { } } -void LfoDepInfo::CreateArrayAccessDesc(MapleMap *doloopInfoMap) { - for (auto mapit = doloopInfoMap->begin(); mapit != doloopInfoMap->end(); mapit++) { +void DoloopInfo::CreateDepTestLists() { + size_t i, j; + for (i = 0; i < lhsArrays.size(); i++) { + for (j = i+1; j < lhsArrays.size(); j++) { + if (lhsArrays[i]->arrayOst->IsSameSymOrPreg(lhsArrays[j]->arrayOst)) { + outputDepTestList.push_back(DepTestPair(i, j)); + } + } + } + for (i = 0; i < lhsArrays.size(); i++) { + for (j = 0; j < rhsArrays.size(); j++) { + if (lhsArrays[i]->arrayOst->IsSameSymOrPreg(rhsArrays[j]->arrayOst)) { + flowDepTestList.push_back(DepTestPair(i, j)); + } + } + } +} + +static int64 Gcd(int64 a, int64 b) { + CHECK_FATAL(a > 0 && b >= 0, "Gcd: NYI"); + if (b == 0) + return a; + return Gcd(b, a % b); +} + +void DoloopInfo::TestDependences(MapleVector *depTestList, bool bothLHS) { + size_t i, j; + for (i = 0; i < depTestList->size(); i++) { + DepTestPair *testPair = &(*depTestList)[i]; + ArrayAccessDesc *arrDesc1 = lhsArrays[testPair->depTestPair.first]; + ArrayAccessDesc *arrDesc2 = nullptr; + if (bothLHS) { + arrDesc2 = lhsArrays[testPair->depTestPair.second]; + } else { + arrDesc2 = rhsArrays[testPair->depTestPair.second]; + } + CHECK_FATAL(arrDesc1->subscriptVec.size() == arrDesc2->subscriptVec.size(), + "TestDependences: inconsistent array dimension"); + for (j = 0; j < arrDesc1->subscriptVec.size(); j++) { + SubscriptDesc *subs1 = arrDesc1->subscriptVec[j]; + SubscriptDesc *subs2 = arrDesc2->subscriptVec[j]; + if (subs1->tooMessy || subs2->tooMessy) { + testPair->dependent = true; + testPair->unknownDist = true; + break; + } + if (subs1->loopInvariant || subs2->loopInvariant) { + if (subs1->subscriptX == subs2->subscriptX) { + continue; + } else { + testPair->dependent = true; + testPair->unknownDist = true; + break; + } + } + if (subs1->coeff == subs2->coeff) { // lamport test + if (((subs1->additiveConst - subs2->additiveConst) % subs1->coeff) != 0) { + continue; + } + testPair->dependent = true; + int64 dist = (subs1->additiveConst - subs2->additiveConst) / subs1->coeff; + if (dist != 0) { + testPair->depDist = dist; + } + continue; + } + // gcd test + if ((subs1->additiveConst - subs2->additiveConst) % Gcd(subs1->coeff, subs2->coeff) == 0) { + testPair->dependent = true; + testPair->unknownDist = true; + break; + } + } + } +} + +bool DoloopInfo::Parallelizable() { + if (hasPtrAccess || hasCall || hasScalarAssign || hasMayDef) { + return false; + } + for (size_t i = 0; i < outputDepTestList.size(); i++) { + DepTestPair *testPair = &outputDepTestList[i]; + if (testPair->dependent && (testPair->unknownDist || testPair->depDist != 0)) { + return false; + } + } + for (size_t i = 0; i < flowDepTestList.size(); i++) { + DepTestPair *testPair = &flowDepTestList[i]; + if (testPair->dependent && (testPair->unknownDist || testPair->depDist != 0)) { + return false; + } + } + return true; +} + +void LfoDepInfo::PerformDepTest() { + size_t i; + MapleMap::iterator mapit = doloopInfoMap.begin(); + for (; mapit != doloopInfoMap.end(); mapit++) { DoloopInfo *doloopInfo = mapit->second; if (!doloopInfo->children.empty()) { continue; // only handling innermost doloops @@ -221,14 +419,24 @@ void LfoDepInfo::CreateArrayAccessDesc(MapleMap *dol if (doloopInfo->hasCall) { LogInfo::MapleLogger() << " hasCall"; } + if (doloopInfo->hasScalarAssign) { + LogInfo::MapleLogger() << " hasScalarAssign"; + } + if (doloopInfo->hasMayDef) { + LogInfo::MapleLogger() << " hasMayDef"; + } LogInfo::MapleLogger() << std::endl; doloopInfo->doloop->Dump(0); LogInfo::MapleLogger() << "LHS arrays:\n"; - for (ArrayAccessDesc *arrAcc : doloopInfo->lhsArrays) { - arrAcc->theArray->Dump(0); + for (i = 0; i < doloopInfo->lhsArrays.size(); i++) { + ArrayAccessDesc *arrAcc = doloopInfo->lhsArrays[i]; + LogInfo::MapleLogger() << "(L" << i << ") "; + arrAcc->arrayOst->Dump(); LogInfo::MapleLogger() << " subscripts:"; for (SubscriptDesc *subs : arrAcc->subscriptVec) { - if (subs->tooMessy) { + if (subs->loopInvariant) { + LogInfo::MapleLogger() << " [loopinvariant]"; + } else if (subs->tooMessy) { LogInfo::MapleLogger() << " [messy]"; } else { LogInfo::MapleLogger() << " [" << subs->coeff << "*"; @@ -240,12 +448,16 @@ void LfoDepInfo::CreateArrayAccessDesc(MapleMap *dol } LogInfo::MapleLogger() << std::endl; } - LogInfo::MapleLogger() << "RHS arrays: "; - for (ArrayAccessDesc *arrAcc : doloopInfo->rhsArrays) { - arrAcc->theArray->Dump(0); + LogInfo::MapleLogger() << "RHS arrays:\n"; + for (i = 0; i < doloopInfo->rhsArrays.size(); i++) { + ArrayAccessDesc *arrAcc = doloopInfo->rhsArrays[i]; + LogInfo::MapleLogger() << "(R" << i << ") "; + arrAcc->arrayOst->Dump(); LogInfo::MapleLogger() << " subscripts:"; for (SubscriptDesc *subs : arrAcc->subscriptVec) { - if (subs->tooMessy) { + if (subs->loopInvariant) { + LogInfo::MapleLogger() << " [loopinvariant]"; + } else if (subs->tooMessy) { LogInfo::MapleLogger() << " [messy]"; } else { LogInfo::MapleLogger() << " [" << subs->coeff << "*"; @@ -259,19 +471,58 @@ void LfoDepInfo::CreateArrayAccessDesc(MapleMap *dol } LogInfo::MapleLogger() << std::endl; } + doloopInfo->CreateDepTestLists(); + doloopInfo->TestDependences(&doloopInfo->outputDepTestList, true); + doloopInfo->TestDependences(&doloopInfo->flowDepTestList, false); + if (DEBUGFUNC(lfoFunc->meFunc)) { + for (DepTestPair item : doloopInfo->outputDepTestList) { + LogInfo::MapleLogger() << "Dep between L" << item.depTestPair.first << " and L" << item.depTestPair.second; + if (!item.dependent) { + LogInfo::MapleLogger() << " independent"; + } else { + LogInfo::MapleLogger() << " dependent"; + if (item.unknownDist) { + LogInfo::MapleLogger() << " unknownDist"; + } else { + LogInfo::MapleLogger() << " distance: " << item.depDist; + } + } + LogInfo::MapleLogger() << std::endl; + } + for (DepTestPair item : doloopInfo->flowDepTestList) { + LogInfo::MapleLogger() << "Dep between L" << item.depTestPair.first << " and R" << item.depTestPair.second; + if (!item.dependent) { + LogInfo::MapleLogger() << " independent"; + } else { + LogInfo::MapleLogger() << " dependent"; + if (item.unknownDist) { + LogInfo::MapleLogger() << " unknownDist"; + } else { + LogInfo::MapleLogger() << " distance: " << item.depDist; + } + } + LogInfo::MapleLogger() << std::endl; + } + if (doloopInfo->Parallelizable()) { + LogInfo::MapleLogger() << "LOOP CAN BE VECTORIZED\n"; + } + } } } AnalysisResult *DoLfoDepTest::Run(MeFunction *func, MeFuncResultMgr *m, ModuleResultMgr*) { + Dominance *dom = static_cast(m->GetAnalysisResult(MeFuncPhase_DOMINANCE, func)); + ASSERT(dom != nullptr, "dominance phase has problem"); LfoPreEmitter *preEmit = static_cast(m->GetAnalysisResult(MeFuncPhase_LFOPREEMIT, func)); + ASSERT(preEmit != nullptr, "lfo preemit phase has problem"); LfoFunction *lfoFunc = func->GetLfoFunc(); MemPool *depTestMp = NewMemPool(); - LfoDepInfo *depInfo = depTestMp->New(depTestMp, lfoFunc, preEmit); + LfoDepInfo *depInfo = depTestMp->New(depTestMp, lfoFunc, dom, preEmit); if (DEBUGFUNC(func)) { LogInfo::MapleLogger() << "\n============== LFO_DEP_TEST =============" << '\n'; } depInfo->CreateDoloopInfo(func->GetMirFunc()->GetBody(), nullptr); - depInfo->CreateArrayAccessDesc(&depInfo->doloopInfoMap); + depInfo->PerformDepTest(); if (DEBUGFUNC(func)) { LogInfo::MapleLogger() << "________________" << std::endl; lfoFunc->meFunc->GetMirFunc()->Dump(); diff --git a/src/mapleall/maple_me/src/lfo_iv_canon.cpp b/src/mapleall/maple_me/src/lfo_iv_canon.cpp index 5b5ba3ecc95b7d020ce1502f44d51cf97f0a7570..2584cb6a896e804791c0da193feed1575c2df184 100644 --- a/src/mapleall/maple_me/src/lfo_iv_canon.cpp +++ b/src/mapleall/maple_me/src/lfo_iv_canon.cpp @@ -177,18 +177,21 @@ bool IVCanon::IsLoopInvariant(MeExpr *x) { case kMeOpReg: { ScalarMeExpr *scalar = static_cast(x); BB *defBB = scalar->DefByBB(); - return defBB == nullptr || dominance->Dominate(*defBB, *aloop->head); + return defBB == nullptr || (defBB != aloop->head && dominance->Dominate(*defBB, *aloop->head)); } case kMeOpIvar: { IvarMeExpr *ivar = static_cast(x); + if (!IsLoopInvariant(ivar->GetBase())) { + return false; + } BB *defBB = ivar->GetMu()->DefByBB(); - return defBB == nullptr || dominance->Dominate(*defBB, *aloop->head); + return defBB == nullptr || (defBB != aloop->head && dominance->Dominate(*defBB, *aloop->head)); } case kMeOpOp: { OpMeExpr *opexp = static_cast(x); return IsLoopInvariant(opexp->GetOpnd(0)) && - IsLoopInvariant(opexp->GetOpnd(1)) && - IsLoopInvariant(opexp->GetOpnd(2)); + IsLoopInvariant(opexp->GetOpnd(1)) && + IsLoopInvariant(opexp->GetOpnd(2)); } case kMeOpNary: { NaryMeExpr *opexp = static_cast(x); diff --git a/src/mapleall/maple_me/src/lfo_loop_vec.cpp b/src/mapleall/maple_me/src/lfo_loop_vec.cpp new file mode 100644 index 0000000000000000000000000000000000000000..eb9a9670ae3d8b8a430e94a083fe37d56f946caf --- /dev/null +++ b/src/mapleall/maple_me/src/lfo_loop_vec.cpp @@ -0,0 +1,391 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include +#include +#include "me_option.h" +#include "mir_module.h" +#include "mir_lower.h" +#include "mir_builder.h" +#include "lfo_loop_vec.h" + +namespace maple { + +// generate new bound for vectorization loop and epilog loop +// original bound info , condNode doesn't include equal +// limitation now: initNode and incrNode are const and initnode is vectorLane aligned. +// vectorization loop: +// epilog loop: < uppernode/vectorFactor*vectorFactor, uppernode, incrnode> +void LoopTransPlan::GenerateBoundInfo(DoloopNode *doloop, DoloopInfo *li) { + BaseNode *initNode = doloop->GetStartExpr(); + BaseNode *incrNode = doloop->GetIncrExpr(); + BaseNode *condNode = doloop->GetCondExpr(); + ASSERT(incrNode->IsConstval(), "too complex, incrNode should be const"); + ConstvalNode *icn = static_cast(incrNode); + MIRIntConst *incrConst = static_cast(icn->GetConstVal()); + ASSERT(condNode->IsBinaryNode(), "cmp node should be binary node"); + BaseNode *upNode = condNode->Opnd(1); // TODO:: verify opnd(1) is upper while opnd(0) is index variable + ASSERT((condNode->GetOpCode() != OP_le && condNode->GetOpCode() != OP_ge), "compare condition has equal part"); + MIRType *typeInt = GlobalTables::GetTypeTable().GetInt32(); + MIRIntConst *newIncr = GlobalTables::GetIntConstTable().GetOrCreateIntConst(vecFactor*incrConst->GetValue(), *typeInt); + ConstvalNode *newIncrNode = codeMP->New(PTY_i32, newIncr); + // check initNode is alignment + if (initNode->IsConstval()) { + ConstvalNode *lcn = static_cast(initNode); + MIRIntConst *lowConst = static_cast(lcn->GetConstVal()); + int64 lowvalue = lowConst->GetValue(); + // upNode is constant + if (upNode->IsConstval()) { + ConstvalNode *ucn = static_cast(upNode); + MIRIntConst *upConst = static_cast(ucn->GetConstVal()); + int64 upvalue = upConst->GetValue(); + if (((upvalue - lowvalue) % (newIncr->GetValue())) == 0) { + // early return, change vbound->stride only + vBound = localMP->New(nullptr, nullptr, newIncrNode); + } else { + // trip count is not vector lane aligned + // vectorized loop < initnode, (up - low)/newIncr *newincr + init, newincr> + // TODO: the vectorized loop need unalignment instruction. + int32_t newupval = (upvalue - lowvalue) / (newIncr->GetValue()) * (newIncr->GetValue()) + lowvalue; + MIRIntConst *newUpConst = GlobalTables::GetIntConstTable().GetOrCreateIntConst(newupval, *typeInt); + ConstvalNode *newUpNode = codeMP->New(PTY_i32, newUpConst); + vBound = localMP->New(nullptr, newUpNode, newIncrNode); + // generate epilog + eBound = localMP->New(newUpNode, nullptr, nullptr); + } + } else if (upNode->GetOpCode() == OP_dread) { + // upNode is symbol variable, TODO::op_regread + // step 1: generate vectorized loop bound + AddrofNode *dreadnode = static_cast(upNode); + // upNode of vBound is uppnode / newIncr * newIncr + BinaryNode *divnode; + if (lowvalue != 0) { + BinaryNode *subnode = codeMP->New(OP_sub, PTY_i32, dreadnode, initNode); + divnode = codeMP->New(OP_div, PTY_i32, subnode, newIncrNode); + } else { + divnode = codeMP->New(OP_div, PTY_i32, dreadnode, newIncrNode); + } + BinaryNode *mulnode = codeMP->New(OP_mul, PTY_i32, divnode, newIncrNode); + vBound = localMP->New(nullptr, mulnode, newIncrNode); + // step2: generate epilog bound + eBound = localMP->New(mulnode, nullptr, nullptr); + } else { + ASSERT(0, "upper bound is complex, NIY"); + } + } else if (initNode->GetOpCode() == OP_dread) { + // initnode is not constant + // set bound of vectorized loop + BinaryNode *subnode = codeMP->New(OP_sub, PTY_i32, upNode, initNode); + BinaryNode *divnode = codeMP->New(OP_div, PTY_i32, subnode, newIncrNode); + BinaryNode *mulnode = codeMP->New(OP_mul, PTY_i32, divnode, newIncrNode); + vBound = localMP->New(nullptr, mulnode, newIncrNode); + // set bound of epilog loop + eBound = localMP->New(mulnode, nullptr, nullptr); + } else { + ASSERT(0, "low bound is complex, NIY"); + } +} + +// generate best plan for current doloop +void LoopTransPlan::Generate(DoloopNode *doloop, DoloopInfo* li) { + // hack values of vecFactor and vecLanes + vecFactor = 4; + vecLanes = 4; // vectory length / type + // generate bound information + GenerateBoundInfo(doloop, li); +} + +MIRType* LoopVectorization::GenVecType(PrimType sPrimType, uint8 lanes) { + MIRType *vecType = nullptr; + switch(sPrimType) { + case PTY_i32: { + if (lanes == 4) { + vecType = GlobalTables::GetTypeTable().GetV4Int32(); + } else if (lanes == 2) { + vecType = GlobalTables::GetTypeTable().GetV2Int32(); + } else { + ASSERT(0, "unsupported int32 vectory lanes"); + } + break; + } + case PTY_u32: { + if (lanes == 4) { + vecType = GlobalTables::GetTypeTable().GetV4UInt32(); + } else if (lanes == 2) { + vecType = GlobalTables::GetTypeTable().GetV2UInt32(); + } else { + ASSERT(0, "unsupported uint32 vectory lanes"); + } + break; + } + default: + ASSERT(0, "NIY"); + } + return vecType; +} + +// generate instrinsic node to copy scalar to vector type +StmtNode *LoopVectorization::GenIntrinNode(BaseNode *scalar, PrimType vecPrimType) { + PrimType intrnPrimtype = PTY_v4i32; + MIRIntrinsicID intrnID = INTRN_vector_from_scalar_v4i32; + MIRType *vecType = nullptr; + switch(vecPrimType) { + case PTY_v4i32: { + intrnPrimtype = PTY_v4i32; + intrnID = INTRN_vector_from_scalar_v4i32; + vecType = GlobalTables::GetTypeTable().GetV4Int32(); + break; + } + default: { + ASSERT(0, "NIY"); + } + } + // generate instrinsic op + IntrinsicopNode *rhs = codeMP->New(*codeMPAlloc, OP_intrinsicopwithtype, PTY_v4i32); + rhs->SetIntrinsic(intrnID); + rhs->SetNumOpnds(1); + rhs->SetNOpndAt(0, scalar); + rhs->SetTyIdx(vecType->GetTypeIndex()); + PregIdx regIdx = mirFunc->GetPregTab()->CreatePreg(intrnPrimtype, vecType); + RegassignNode *stmtNode = codeMP->New(PTY_v4i32, regIdx, rhs); + return stmtNode; +} + +// iterate tree node to wide scalar type to vector type +// following opcode can be vectorized directly +// +, -, *, &, |, <<, >>, compares, ~, ! +// iassign, iread, dassign, dread +void LoopVectorization::VectorizeNode(BaseNode *node, uint8 count) { + node->Dump(0); + switch (node->GetOpCode()) { + case OP_iassign: { + IassignNode *iassign = static_cast(node); + // change lsh type to vector type + MIRType &mirType = GetTypeFromTyIdx(iassign->GetTyIdx()); + CHECK_FATAL(mirType.GetKind() == kTypePointer, "iassign must have pointer type"); + MIRPtrType *ptrType = static_cast(&mirType); + MIRType *vecType = GenVecType(ptrType->GetPointedType()->GetPrimType(), count); + ASSERT(vecType != nullptr, "vector type should not be null"); + MIRType *pvecType = GlobalTables::GetTypeTable().GetOrCreatePointerType(*vecType, PTY_ptr); + // update lhs type + iassign->SetTyIdx(pvecType->GetTypeIndex()); + // visit rsh + VectorizeNode(iassign->GetRHS(), count); + break; + } + case OP_iread: { + IreadNode *ireadnode = static_cast(node); + // update primtype + MIRType *primVecType = GenVecType(ireadnode->GetPrimType(), count); + node->SetPrimType(primVecType->GetPrimType()); + // update tyidx + MIRType &mirType = GetTypeFromTyIdx(ireadnode->GetTyIdx()); + CHECK_FATAL(mirType.GetKind() == kTypePointer, "iread must have pointer type"); + MIRPtrType *ptrType = static_cast(&mirType); + MIRType *vecType = GenVecType(ptrType->GetPointedType()->GetPrimType(), count); + ASSERT(vecType != nullptr, "vector type should not be null"); + MIRType *pvecType = GlobalTables::GetTypeTable().GetOrCreatePointerType(*vecType, PTY_ptr); + // update lhs type + ireadnode->SetTyIdx(pvecType->GetTypeIndex()); + break; + } + // scalar related: widen type directly or unroll instructions + case OP_dassign: + case OP_dread: + ASSERT(0, "NIY"); + break; + // vector type support in opcode +, -, *, &, |, <<, >>, compares, ~, ! + case OP_add: + case OP_sub: + case OP_mul: + case OP_band: + case OP_bior: + case OP_shl: + case OP_lshr: + case OP_ashr: + // compare + case OP_eq: + case OP_ne: + case OP_lt: + case OP_gt: + case OP_le: + case OP_ge: + case OP_cmpg: + case OP_cmpl: { + ASSERT(node->IsBinaryNode(), "should be binarynode"); + BinaryNode *binNode = static_cast(node); + MIRType *vecType = GenVecType(node->GetPrimType(), count); + node->SetPrimType(vecType->GetPrimType()); // update primtype of binary op + VectorizeNode(binNode->Opnd(0), count); + VectorizeNode(binNode->Opnd(1), count); + break; + } + // unary op + case OP_bnot: + case OP_lnot: { + ASSERT(node->IsUnaryNode(), "should be unarynode"); + UnaryNode *unaryNode = static_cast(node); + MIRType *vecType = GenVecType(node->GetPrimType(), count); + node->SetPrimType(vecType->GetPrimType()); // update primtype of unary op + VectorizeNode(unaryNode->Opnd(0), count); + break; + } + case OP_constval: { + LfoPart *lfoP = (*lfoExprParts)[node]; + ASSERT(lfoP != nullptr, "nullptr check"); + // constval could be used in binary op without widen directly + if (!lfoP->GetParent()->IsBinaryNode()) { + // use intrinsicop vdupq_n_i32 to move const to tmp variable + ASSERT(0, "constval need to extended NIY"); + } + break; + } + default: + ASSERT(0, "can't be vectorized"); + } +} + +// update init/stride/upper nodes of doloop +// now hack code to widen const stride with value "vecFactor * original stride" +void LoopVectorization::widenDoloop(DoloopNode *doloop, LoopTransPlan *tp) { + if (tp->vBound) { + if (tp->vBound->incrNode) { + doloop->SetIncrExpr(tp->vBound->incrNode); + } + if (tp->vBound->lowNode) { + doloop->SetStartExpr(tp->vBound->lowNode); + } + if (tp->vBound->upperNode) { + BinaryNode *cmpn = static_cast(doloop->GetCondExpr()); + cmpn->SetBOpnd(tp->vBound->upperNode, 1); + } + } +} + +void LoopVectorization::VectorizeDoLoop(DoloopNode *doloop, LoopTransPlan *tp) { + // LogInfo::MapleLogger() << "\n**** dump doloopnode ****\n"; + // doloop->Dump(0); + // step 1: handle loop low/upper/stride + widenDoloop(doloop, tp); + + // step 2: widen vectorizable stmt in doloop + BlockNode *loopbody = doloop->GetDoBody(); + for (auto &stmt : loopbody->GetStmtNodes()) { + //if (stmt need to be vectoried in vectorized list) { + VectorizeNode(&stmt, tp->vecFactor); + //} else { + // stmt could not be widen directly, unroll instruction with vecFactor + // move value from vector type if need (need def-use information from plan) + //} + } +} + +// generate remainder loop +DoloopNode *LoopVectorization::GenEpilog(DoloopNode *doloop) { + // new doloopnode + // copy doloop body + // insert newdoloopnode after doloop + return doloop; +} + +// generate prolog/epilog blocknode if needed +// return doloop need to be vectorized +DoloopNode *LoopVectorization::PrepareDoloop(DoloopNode *doloop, LoopTransPlan *tp) { + bool needPeel = false; + // generate peel code if need + if (needPeel) { + // peel code here + // udpate loop lower of doloop if need + // copy loop body + } + // generate epilog + if (tp->eBound) { + // copy doloop + DoloopNode *edoloop = doloop->CloneTree(*codeMPAlloc); + ASSERT(tp->eBound->lowNode, "nullptr check"); + // update loop low bound + edoloop->SetStartExpr(tp->eBound->lowNode); + // add epilog after doloop + LfoPart *lfoInfo = (*lfoStmtParts)[doloop->GetStmtID()]; + ASSERT(lfoInfo, "nullptr check"); + BaseNode *parent = lfoInfo->GetParent(); + ASSERT(parent && (parent->GetOpCode() == OP_block), "nullptr check"); + BlockNode *pblock = static_cast(parent); + pblock->InsertAfter(doloop, edoloop); + } + return doloop; +} + +void LoopVectorization::TransformLoop() { + auto it = vecPlans.begin(); + for (; it != vecPlans.end(); it++) { + // generate prilog/epilog according to vectorization plan + DoloopNode *donode = it->first; + LoopTransPlan *tplan = it->second; + DoloopNode *vecDoloopNode = PrepareDoloop(donode, tplan); + VectorizeDoLoop(vecDoloopNode, tplan); + } +} + +void LoopVectorization::Perform() { + // step 2: collect information, legality check and generate transform plan + MapleMap::iterator mapit = depInfo->doloopInfoMap.begin(); + for (; mapit != depInfo->doloopInfoMap.end(); mapit++) { + // if current doloop is innest loop and parallelizable, generate vectorize plan; + if (mapit->second && mapit->second->children.empty() && mapit->second->Parallelizable()) { + // hack code here, tranform plan + LoopTransPlan *tplan = localMP->New(codeMP, localMP); + tplan->Generate(mapit->first, mapit->second); + vecPlans[mapit->first] = tplan; + } + } + // step 3: do transform + // transform plan map to each doloop + TransformLoop(); +} + +AnalysisResult *DoLfoLoopVectorization::Run(MeFunction *func, MeFuncResultMgr *m, ModuleResultMgr*) { + // generate lfo IR + LfoPreEmitter *lfoemit = static_cast(m->GetAnalysisResult(MeFuncPhase_LFOPREEMIT, func)); + CHECK_NULL_FATAL(lfoemit); + // step 1: get dependence graph for each loop + auto *lfodepInfo = static_cast(m->GetAnalysisResult(MeFuncPhase_LFODEPTEST, func)); + CHECK_NULL_FATAL(lfodepInfo); + + if (DEBUGFUNC(func)) { + LogInfo::MapleLogger() << "\n**** Before loop vectorization phase ****\n"; + func->GetMirFunc()->Dump(false); + } + + // run loop vectorization + LoopVectorization loopVec(NewMemPool(), lfoemit, lfodepInfo); + loopVec.Perform(); + + // invalid analysis result + m->InvalidAllResults(); + + if (DEBUGFUNC(func)) { + LogInfo::MapleLogger() << "\n\n\n**** After loop vectorization phase ****\n"; + func->GetMirFunc()->Dump(false); + } + + // lower lfoIR for other mapleme phases + MIRLower mirlowerer(func->GetMIRModule(), func->GetMirFunc()); + mirlowerer.SetLowerME(); + mirlowerer.SetLowerExpandArray(); + mirlowerer.LowerFunc(*(func->GetMirFunc())); + + return nullptr; +} +} // namespace maple diff --git a/src/mapleall/maple_me/src/lfo_pre_emit.cpp b/src/mapleall/maple_me/src/lfo_pre_emit.cpp index 9edab603389e26b4a82617d5ad20314e04764f35..a7899e2fb6e5979167f1f651ba62cbdc3fba57d6 100644 --- a/src/mapleall/maple_me/src/lfo_pre_emit.cpp +++ b/src/mapleall/maple_me/src/lfo_pre_emit.cpp @@ -574,6 +574,7 @@ DoloopNode *LfoPreEmitter::EmitLfoDoloop(BB *mewhilebb, BlockNode *curblk, LfoWh "EmitLfoDoLoop: there are other statements at while header bb"); DoloopNode *Doloopnode = codeMP->New(); LfoPart *lfopart = lfoMP->New(curblk); + lfopart->mestmt = lastmestmt; lfoStmtParts[Doloopnode->GetStmtID()] = lfopart; Doloopnode->SetDoVarStIdx(whileInfo->ivOst->GetMIRSymbol()->GetStIdx()); CondGotoMeStmt *condGotostmt = static_cast(lastmestmt); diff --git a/src/mapleall/maple_me/src/me_cfg.cpp b/src/mapleall/maple_me/src/me_cfg.cpp index 5082622e05cdef2b5ac3fbf53b0f3c4ccfb1e3b0..ae602ae65e836ecce230a6a689d1f997776edd00 100644 --- a/src/mapleall/maple_me/src/me_cfg.cpp +++ b/src/mapleall/maple_me/src/me_cfg.cpp @@ -1287,10 +1287,21 @@ void MeCFG::CreateBasicBlocks() { break; } case OP_dassign: { + DassignNode *dass = static_cast(stmt); + if (!func.IsLfo() && func.GetLfoFunc() != nullptr) { + // delete identity assignments inserted by LFO + if (dass->GetRHS()->GetOpCode() == OP_dread) { + DreadNode *dread = static_cast(dass->GetRHS()); + if (dass->GetStIdx() == dread->GetStIdx() && dass->GetFieldID() == dread->GetFieldID()) { + func.CurFunction()->GetBody()->RemoveStmt(stmt); + break; + } + } + } if (curBB->IsEmpty()) { curBB->SetFirst(stmt); } - if (isJavaModule && static_cast(stmt)->GetRHS()->MayThrowException()) { + if (isJavaModule && dass->GetRHS()->MayThrowException()) { stmt->SetOpCode(OP_maydassign); if (tryStmt != nullptr) { // breaks new BB only inside try blocks diff --git a/src/mapleall/maple_me/src/me_phase_manager.cpp b/src/mapleall/maple_me/src/me_phase_manager.cpp index e377ac901f62761ab27eed1bd6c86045c3d82c0d..d8bd3e237dc9c2f85f3e88b1c9c10929a22491f2 100644 --- a/src/mapleall/maple_me/src/me_phase_manager.cpp +++ b/src/mapleall/maple_me/src/me_phase_manager.cpp @@ -76,6 +76,7 @@ #include "lfo_iv_canon.h" #include "cfg_opt.h" #include "lfo_dep_test.h" +#include "lfo_loop_vec.h" #define JAVALANG (mirModule.IsJavaModule()) diff --git a/src/mapleall/maple_me/src/me_prop.cpp b/src/mapleall/maple_me/src/me_prop.cpp index 8981b39f33fc734dcbe629570b51e36106bfbe27..dd223ce14693e33543a226e69f6493baeda20882 100644 --- a/src/mapleall/maple_me/src/me_prop.cpp +++ b/src/mapleall/maple_me/src/me_prop.cpp @@ -51,7 +51,7 @@ AnalysisResult *MeDoMeProp::Run(MeFunction *func, MeFuncResultMgr *m, ModuleResu } MeProp meProp(*hMap, *dom, *NewMemPool(), Prop::PropConfig { MeOption::propBase, propIloadRef, MeOption::propGlobalRef, MeOption::propFinaliLoadRef, MeOption::propIloadRefNonParm, MeOption::propAtPhi, - MeOption::propWithInverse || MeOption::optLevel >= 3 }); + MeOption::propWithInverse || func->IsLfo() }); meProp.TraversalBB(*func->GetCfg()->GetCommonEntryBB()); if (DEBUGFUNC(func)) { LogInfo::MapleLogger() << "\n============== After Copy Propagation =============" << '\n';