diff --git a/src/mapleall/maple_driver/src/maple_comb_compiler.cpp b/src/mapleall/maple_driver/src/maple_comb_compiler.cpp index bfb4b32bbd1e8e7636dc5f9ac2ed1de01674c16d..7a867cd8bef23ca4a64fd9689f6af5053fc46b65 100644 --- a/src/mapleall/maple_driver/src/maple_comb_compiler.cpp +++ b/src/mapleall/maple_driver/src/maple_comb_compiler.cpp @@ -21,6 +21,7 @@ #include "inline.h" #include "me_phase_manager.h" #include "constantfold.h" +#include "lfo_loop_vec.h" namespace maple { using namespace mapleOption; @@ -199,7 +200,10 @@ ErrorCode MapleCombCompiler::Compile(MplOptions &options, std::unique_ptr 0) { + LogInfo::MapleLogger() << "\n " << LoopVectorization::vectorizedLoop << " loop vectorized\n"; + } delete optMp; return nErr; } diff --git a/src/mapleall/maple_ir/include/mir_type.h b/src/mapleall/maple_ir/include/mir_type.h index aff6f1b2fc7648e169623f462300708b30de1cc5..f3d8e2e43962d1b86b94d497c532861a95f6b404 100644 --- a/src/mapleall/maple_ir/include/mir_type.h +++ b/src/mapleall/maple_ir/include/mir_type.h @@ -69,6 +69,10 @@ inline bool IsPossible32BitAddress(PrimType tp) { return (tp == PTY_ptr || tp == PTY_ref || tp == PTY_u32 || tp == PTY_a32); } +inline bool MustBeAddress(PrimType tp) { + return (tp == PTY_ptr || tp == PTY_ref || tp == PTY_a64 || tp == PTY_a32); +} + inline bool IsPrimitivePureScalar(PrimitiveType primitiveType) { return primitiveType.IsInteger() && !primitiveType.IsAddress() && !primitiveType.IsDynamic() && !primitiveType.IsVector(); diff --git a/src/mapleall/maple_me/include/lfo_dep_test.h b/src/mapleall/maple_me/include/lfo_dep_test.h index 478071a4fbfbbad614f8ed67f81de4219ab7a8b6..7380a9424a20f1529ef778b177979284d41b5fc6 100644 --- a/src/mapleall/maple_me/include/lfo_dep_test.h +++ b/src/mapleall/maple_me/include/lfo_dep_test.h @@ -70,8 +70,8 @@ class DoloopInfo { MapleVector lhsArrays; // each element represents an array assign MapleVector rhsArrays; // each element represents an array read BB *doloopBB = nullptr; // the start BB for the doloop body - bool hasPtrAccess = false; // give up dep testing if true bool hasOtherCtrlFlow = false; // give up dep testing if true + bool hasPtrAccess = false; // give up dep testing if true bool hasScalarAssign = false; // give up dep testing if true bool hasMayDef = false; // give up dep testing if true MapleVector outputDepTestList; // output dependence only @@ -90,12 +90,13 @@ class DoloopInfo { ~DoloopInfo() = default; bool IsLoopInvariant(MeExpr *x); SubscriptDesc *BuildOneSubscriptDesc(BaseNode *subsX); - ArrayAccessDesc *BuildOneArrayAccessDesc(ArrayNode *arr, bool isRHS); - void CreateRHSArrayAccessDesc(BaseNode *x); + ArrayAccessDesc *BuildOneArrayAccessDesc(ArrayNode *arr, BaseNode *parent); + void CreateRHSArrayAccessDesc(BaseNode *x, BaseNode *parent); void CreateArrayAccessDesc(BlockNode *block); void CreateDepTestLists(); void TestDependences(MapleVector *depTestList, bool bothLHS); bool Parallelizable(); + ArrayAccessDesc* GetArrayAccessDesc(ArrayNode *node, bool isRHS); }; class LfoDepInfo : public AnalysisResult { diff --git a/src/mapleall/maple_me/include/lfo_loop_vec.h b/src/mapleall/maple_me/include/lfo_loop_vec.h index ec7008234ffbb7ad4e0ac36e965785af84d51dee..fbd28140a4d1e14dca393f8c301cc77bf9260dd2 100644 --- a/src/mapleall/maple_me/include/lfo_loop_vec.h +++ b/src/mapleall/maple_me/include/lfo_loop_vec.h @@ -22,6 +22,7 @@ #include "lfo_dep_test.h" namespace maple { + class LoopBound { public: LoopBound() : lowNode(nullptr), upperNode(nullptr), incrNode(nullptr) {}; @@ -33,14 +34,24 @@ public: class LoopVecInfo { public: - explicit LoopVecInfo(MapleAllocator &alloc) : vecStmtIDs(alloc.Adapter()) { - largestPrimType = PTY_i8; + explicit LoopVecInfo(MapleAllocator &alloc) : vecStmtIDs(alloc.Adapter()), + uniformNodes(alloc.Adapter()), + uniformVecNodes(alloc.Adapter()) { + // smallestPrimType = PTY_i64; + largestTypeSize = 8; // i8 bit size + currentRHSTypeSize = 0; } - void UpdatePrimType(PrimType ctype); - - PrimType largestPrimType; // largest size type in vectorizable stmtnodes + void UpdateWidestTypeSize(uint32_t ); + void ResetStmtRHSTypeSize() { currentRHSTypeSize = 0; } + bool UpdateRHSTypeSize(PrimType); // record rhs node typesize + //PrimType smallestPrimType; // smallest size type in vectorizable stmtnodes + uint32_t largestTypeSize; // largest size type in vectorizable stmtnodes + uint32_t currentRHSTypeSize; // largest size of current stmt's RHS, this is temp value and update for each stmt // list of vectorizable stmtnodes in current loop, others can't be vectorized MapleSet vecStmtIDs; + MapleSet uniformNodes; // loop invariable scalar set + MapleMap uniformVecNodes; // new generated vector node + //MapleMap inductionStmt; // dup scalar to vector stmt may insert before stmt }; // tranform plan for current loop @@ -84,11 +95,11 @@ class LoopVectorization { void Perform(); void TransformLoop(); void VectorizeDoLoop(DoloopNode *, LoopTransPlan*); - void VectorizeNode(BaseNode *, uint8_t); + void VectorizeNode(BaseNode *, LoopTransPlan *); MIRType *GenVecType(PrimType, uint8_t); - StmtNode *GenIntrinNode(BaseNode *scalar, PrimType vecPrimType); - bool ExprVectorizable(DoloopInfo *doloopInfo, BaseNode *x); - bool Vectorizable(DoloopInfo *doloopInfo, BlockNode *block, LoopVecInfo *); + RegassignNode *GenDupScalarStmt(BaseNode *scalar, PrimType vecPrimType); + bool ExprVectorizable(DoloopInfo *doloopInfo, LoopVecInfo*, BaseNode *x); + bool Vectorizable(DoloopInfo *doloopInfo, LoopVecInfo*, BlockNode *block); void widenDoloop(DoloopNode *doloop, LoopTransPlan *); DoloopNode *PrepareDoloop(DoloopNode *, LoopTransPlan *); DoloopNode *GenEpilog(DoloopNode *); @@ -96,6 +107,8 @@ class LoopVectorization { MapleMap *GetVecPlans() { return &vecPlans; } std::string PhaseName() const { return "lfoloopvec"; } +public: + static uint32_t vectorizedLoop; private: MIRFunction *mirFunc; // point to lfoStmtParts of lfopreemit, map lfoinfo for StmtNode, key is stmtID diff --git a/src/mapleall/maple_me/src/alias_class.cpp b/src/mapleall/maple_me/src/alias_class.cpp index eac9898160eed134afa8ed143dd9f12819bd9679..2d6ac1e2b288372c00fa17ea2ec42a406341ede3 100644 --- a/src/mapleall/maple_me/src/alias_class.cpp +++ b/src/mapleall/maple_me/src/alias_class.cpp @@ -380,6 +380,12 @@ AliasInfo AliasClass::CreateAliasElemsExpr(BaseNode &expr) { intrn.GetNopndAt(0)->GetOpCode() == OP_dread)) { return CreateAliasElemsExpr(*intrn.GetNopndAt(0)); } + IntrinDesc *intrinDesc = &IntrinDesc::intrinTable[intrn.GetIntrinsic()]; + if (intrinDesc->IsVectorOp()) { + SetPtrOpndsNextLevNADS(0, static_cast(intrn.NumOpnds()), intrn.GetNopnd(), + false); + return AliasInfo(); + } // fall-through } [[clang::fallthrough]]; diff --git a/src/mapleall/maple_me/src/demand_driven_alias_analysis.cpp b/src/mapleall/maple_me/src/demand_driven_alias_analysis.cpp index bc75be5720b4b5789671cdcc0f9be4d78ee98115..fe51d3b1009dce91fa182b5b765bf0d264fc2547 100644 --- a/src/mapleall/maple_me/src/demand_driven_alias_analysis.cpp +++ b/src/mapleall/maple_me/src/demand_driven_alias_analysis.cpp @@ -238,7 +238,7 @@ PEGBuilder::PtrValueRecorder PEGBuilder::BuildPEGNodeOfAdd(const BinaryNode *bin auto *constVal = static_cast(binaryNode->Opnd(1))->GetConstVal(); ASSERT(constVal->GetKind() == kConstInt, "pointer cannot add/sub a non-integer value"); constexpr int kBitNumInOneByte = 8; - int64 offsetValue = static_cast(constVal)->GetValue() * kBitNumInOneByte; + int64 offsetValue = static_cast(static_cast(constVal)->GetValue()) * kBitNumInOneByte; if (binaryNode->GetOpCode() == OP_sub) { offset = ptrNode.offset + (-offsetValue); } else if (binaryNode->GetOpCode() == OP_add) { @@ -1041,4 +1041,4 @@ bool DemandDrivenAliasAnalysis::MayAlias(OriginalSt *ostA, OriginalSt *ostB) { } return aliasAccordingDDAA; } -} // namespace maple \ No newline at end of file +} // namespace maple diff --git a/src/mapleall/maple_me/src/lfo_dep_test.cpp b/src/mapleall/maple_me/src/lfo_dep_test.cpp index 00322aac6ab6efa59d6354ef66930932be238a9d..15f8f61f97c184e0fbc9324b57da0cee7aa5d468 100644 --- a/src/mapleall/maple_me/src/lfo_dep_test.cpp +++ b/src/mapleall/maple_me/src/lfo_dep_test.cpp @@ -128,8 +128,7 @@ bool DoloopInfo::IsLoopInvariant(MeExpr *x) { } SubscriptDesc *DoloopInfo::BuildOneSubscriptDesc(BaseNode *subsX) { - LfoPart *lfopart = depInfo->preEmit->GetLfoExprPart(subsX); - MeExpr *meExpr = lfopart->GetMeExpr(); + MeExpr *meExpr = depInfo->preEmit->GetMexpr(subsX); SubscriptDesc *subsDesc = alloc->GetMemPool()->New(meExpr); if (IsLoopInvariant(meExpr)) { subsDesc->loopInvariant = true; @@ -198,37 +197,45 @@ SubscriptDesc *DoloopInfo::BuildOneSubscriptDesc(BaseNode *subsX) { return subsDesc; } -ArrayAccessDesc *DoloopInfo::BuildOneArrayAccessDesc(ArrayNode *arr, bool isRHS) { -#if 0 +ArrayAccessDesc *DoloopInfo::BuildOneArrayAccessDesc(ArrayNode *arr, BaseNode *parent) { MIRType *atype = arr->GetArrayType(GlobalTables::GetTypeTable()); ASSERT(atype->GetKind() == kTypeArray, "type was wrong"); MIRArrayType *arryty = static_cast(atype); size_t dim = arryty->GetDim(); CHECK_FATAL(dim == arr->NumOpnds() - 1, "BuildOneArrayAccessDesc: inconsistent array dimension"); -#else - size_t dim = arr->NumOpnds() - 1; -#endif + // ensure array base is loop invariant + OpMeExpr *arrayMeExpr = static_cast(depInfo->preEmit->GetMexpr(arr)); + if (!IsLoopInvariant(arrayMeExpr->GetOpnd(0))) { + hasPtrAccess = true; + return nullptr; + } // determine arrayOst - LfoPart *lfopart = depInfo->preEmit->GetLfoExprPart(arr); - OpMeExpr *arrayMeExpr = static_cast(lfopart->GetMeExpr()); + IvarMeExpr *ivarMeExpr = nullptr; OriginalSt *arryOst = nullptr; - if (arrayMeExpr->GetOpnd(0)->GetMeOp() == kMeOpAddrof) { - AddrofMeExpr *addrof = static_cast(arrayMeExpr->GetOpnd(0)); - arryOst = depInfo->lfoFunc->meFunc->GetMeSSATab()->GetOriginalStFromID(addrof->GetOstIdx()); - } else { - ScalarMeExpr *scalar = dynamic_cast(arrayMeExpr->GetOpnd(0)); - if (scalar) { - arryOst = scalar->GetOst(); + if (parent->op == OP_iread) { + ivarMeExpr = static_cast(depInfo->preEmit->GetMexpr(parent)); + CHECK_FATAL(ivarMeExpr->GetMu() != nullptr, "BuildOneArrayAccessDesc: no mu corresponding to iread"); + arryOst = ivarMeExpr->GetMu()->GetOst(); + } else if (parent->op == OP_iassign) { + IassignMeStmt *iassMeStmt = static_cast(depInfo->preEmit->GetMeStmt(static_cast(parent)->GetStmtID())); + ivarMeExpr = iassMeStmt->GetLHSVal(); + if (ivarMeExpr->GetMu()) { + arryOst = ivarMeExpr->GetMu()->GetOst(); } else { - hasPtrAccess = true; - return nullptr; + MapleMap *chiList = iassMeStmt->GetChiList(); + CHECK_FATAL(!chiList->empty(), "BuildOneArrayAccessDesc: no chi corresponding to iassign"); + arryOst = depInfo->lfoFunc->meFunc->GetMeSSATab()->GetOriginalStFromID(chiList->begin()->first); } + } else { + hasPtrAccess = true; + return nullptr; } + ArrayAccessDesc *arrDesc = alloc->GetMemPool()->New(alloc, arr, arryOst); - if (isRHS) { - rhsArrays.push_back(arrDesc); - } else { + if (parent->op == OP_iassign) { lhsArrays.push_back(arrDesc); + } else { + rhsArrays.push_back(arrDesc); } for (size_t i = 0; i < dim; i++) { SubscriptDesc *subs = BuildOneSubscriptDesc(arr->GetIndex(i)); @@ -237,12 +244,20 @@ ArrayAccessDesc *DoloopInfo::BuildOneArrayAccessDesc(ArrayNode *arr, bool isRHS) return arrDesc; } -void DoloopInfo::CreateRHSArrayAccessDesc(BaseNode *x) { +void DoloopInfo::CreateRHSArrayAccessDesc(BaseNode *x, BaseNode *parent) { if (x->GetOpCode() == OP_array) { - BuildOneArrayAccessDesc(static_cast(x), true /* isRHS */); + if (parent->GetOpCode() != OP_iread) { // skip arrays not underneath iread unless loop-invariant + if (IsLoopInvariant(depInfo->preEmit->GetMexpr(x))) { + return; + } + hasPtrAccess = true; + } else { + BuildOneArrayAccessDesc(static_cast(x), parent); + } + return; } for (size_t i = 0; i < x->NumOpnds(); i++) { - CreateRHSArrayAccessDesc(x->Opnd(i)); + CreateRHSArrayAccessDesc(x->Opnd(i), x); } } @@ -259,7 +274,7 @@ void DoloopInfo::CreateArrayAccessDesc(BlockNode *block) { break; } case OP_if: { - CreateRHSArrayAccessDesc(stmt->Opnd(0)); + CreateRHSArrayAccessDesc(stmt->Opnd(0), stmt); IfStmtNode *ifstmtnode = static_cast(stmt); if (ifstmtnode->GetThenPart()) CreateArrayAccessDesc(ifstmtnode->GetThenPart()); @@ -269,17 +284,16 @@ void DoloopInfo::CreateArrayAccessDesc(BlockNode *block) { } case OP_dowhile: case OP_while: { - CreateRHSArrayAccessDesc(stmt->Opnd(0)); + CreateRHSArrayAccessDesc(stmt->Opnd(0), stmt); CreateArrayAccessDesc(static_cast(stmt)->GetBody()); break; } case OP_iassign: { IassignNode *iass = static_cast(stmt); if (iass->addrExpr->GetOpCode() == OP_array) { - ArrayAccessDesc *adesc = BuildOneArrayAccessDesc(static_cast(iass->addrExpr), false /* isRHS */); - if (adesc == nullptr) { - hasMayDef = true; - } else { + ArrayAccessDesc *adesc = BuildOneArrayAccessDesc(static_cast(iass->addrExpr), iass); + if (adesc != nullptr) { + CHECK_FATAL(adesc->arrayOst, "CreateArrayAccessDesc: arrayOst not valid"); // check if the chi list has only the same array LfoPart *lfopart = depInfo->preEmit->GetLfoStmtPart(iass->GetStmtID()); IassignMeStmt *iassMeStmt = static_cast(lfopart->GetMeStmt()); @@ -296,18 +310,18 @@ void DoloopInfo::CreateArrayAccessDesc(BlockNode *block) { } else { hasPtrAccess = true; } - CreateRHSArrayAccessDesc(iass->rhs); + CreateRHSArrayAccessDesc(iass->rhs, iass); break; } case OP_dassign: case OP_regassign: { hasScalarAssign = true; - CreateRHSArrayAccessDesc(stmt->Opnd(0)); + CreateRHSArrayAccessDesc(stmt->Opnd(0), stmt); break; } default: { for (size_t i = 0; i < stmt->NumOpnds(); i++) { - CreateRHSArrayAccessDesc(stmt->Opnd(i)); + CreateRHSArrayAccessDesc(stmt->Opnd(i), stmt); } break; } @@ -412,6 +426,16 @@ bool DoloopInfo::Parallelizable() { return true; } +ArrayAccessDesc* DoloopInfo::GetArrayAccessDesc(ArrayNode *node, bool isRHS) { + MapleVector* arrayDescptr = isRHS ? &rhsArrays : &lhsArrays; + for (auto it = arrayDescptr->begin(); it != arrayDescptr->end(); it++) { + if ((*it)->theArray == node) { + return (*it); + } + } + return nullptr; +} + void LfoDepInfo::PerformDepTest() { size_t i; MapleMap::iterator mapit = doloopInfoMap.begin(); @@ -420,6 +444,9 @@ void LfoDepInfo::PerformDepTest() { if (!doloopInfo->children.empty()) { continue; // only handling innermost doloops } + if (doloopInfo->hasOtherCtrlFlow) { + continue; + } doloopInfo->CreateArrayAccessDesc(doloopInfo->doloop->GetDoBody()); if (DEBUGFUNC(lfoFunc->meFunc)) { LogInfo::MapleLogger() << "Innermost Doloop:"; @@ -450,8 +477,7 @@ void LfoDepInfo::PerformDepTest() { LogInfo::MapleLogger() << " [messy]"; } else { LogInfo::MapleLogger() << " [" << subs->coeff << "*"; - LfoPart *lfopart = preEmit->GetLfoExprPart(subs->iv); - ScalarMeExpr *scalar = static_cast(lfopart->GetMeExpr()); + ScalarMeExpr *scalar = static_cast(preEmit->GetMexpr(subs->iv)); scalar->GetOst()->Dump(); LogInfo::MapleLogger() << "+" << subs->additiveConst << "]"; } @@ -471,8 +497,7 @@ void LfoDepInfo::PerformDepTest() { LogInfo::MapleLogger() << " [messy]"; } else { LogInfo::MapleLogger() << " [" << subs->coeff << "*"; - LfoPart *lfopart = preEmit->GetLfoExprPart(subs->iv); - ScalarMeExpr *scalar = static_cast(lfopart->GetMeExpr()); + ScalarMeExpr *scalar = static_cast(preEmit->GetMexpr(subs->iv)); scalar->GetOst()->Dump(); LogInfo::MapleLogger() << "+" << subs->additiveConst << "]"; } diff --git a/src/mapleall/maple_me/src/lfo_loop_vec.cpp b/src/mapleall/maple_me/src/lfo_loop_vec.cpp index 2f8ca65ad852552128d5f1d24ab183ae61e58f14..f0c9c49dc405cf2a505b40bdda7905f78fa63b47 100644 --- a/src/mapleall/maple_me/src/lfo_loop_vec.cpp +++ b/src/mapleall/maple_me/src/lfo_loop_vec.cpp @@ -21,19 +21,34 @@ #include "lfo_loop_vec.h" namespace maple { -void LoopVecInfo::UpdatePrimType(PrimType cptype) { - if (GetPrimTypeSize(largestPrimType) < GetPrimTypeSize(cptype)) { - largestPrimType = cptype; +uint32_t LoopVectorization::vectorizedLoop = 0; + +void LoopVecInfo::UpdateWidestTypeSize(uint32_t newtypesize) { + if (largestTypeSize < newtypesize) { + largestTypeSize = newtypesize; + } +} + +bool LoopVecInfo::UpdateRHSTypeSize(PrimType ptype) { + uint32_t newSize = GetPrimTypeSize(ptype) * 8; + if (currentRHSTypeSize == 0) { + currentRHSTypeSize = newSize; + return true; + } else if (newSize > currentRHSTypeSize) { + currentRHSTypeSize = newSize; + return false; // skip vectorize now since type is not consistent + } else if (newSize < currentRHSTypeSize) { + return false; } + return true; } // generate new bound for vectorization loop and epilog loop // original bound info , condNode doesn't include equal // limitation now: initNode and incrNode are const and initnode is vectorLane aligned. -// vectorization loop: -// epilog loop: < uppernode/vectorFactor*vectorFactor, uppernode, incrnode> +// vectorization loop: +// epilog loop: < (uppernode-initnode)/vectorFactor*vectorFactor+initnode, uppernode, incrnode> void LoopTransPlan::GenerateBoundInfo(DoloopNode *doloop, DoloopInfo *li) { - (void) li; BaseNode *initNode = doloop->GetStartExpr(); BaseNode *incrNode = doloop->GetIncrExpr(); BaseNode *condNode = doloop->GetCondExpr(); @@ -45,23 +60,21 @@ void LoopTransPlan::GenerateBoundInfo(DoloopNode *doloop, DoloopInfo *li) { constOnenode = codeMP->New(PTY_i32, constOne); } ASSERT(incrNode->IsConstval(), "too complex, incrNode should be const"); - ConstvalNode *icn = static_cast(incrNode); - MIRIntConst *incrConst = static_cast(icn->GetConstVal()); + ConstvalNode *icn = static_cast(incrNode); + MIRIntConst *incrConst = static_cast(icn->GetConstVal()); ASSERT(condNode->IsBinaryNode(), "cmp node should be binary node"); - BaseNode *upNode = condNode->Opnd(1); + BaseNode *upNode = condNode->Opnd(1); // TODO:: verify opnd(1) is upper while opnd(0) is index variable - MIRIntConst *newIncr = GlobalTables::GetIntConstTable().GetOrCreateIntConst( - vecFactor * incrConst->GetValue(), *typeInt); + MIRIntConst *newIncr = GlobalTables::GetIntConstTable().GetOrCreateIntConst(vecFactor*incrConst->GetValue(), *typeInt); ConstvalNode *newIncrNode = codeMP->New(PTY_i32, newIncr); - // check initNode is alignment if (initNode->IsConstval()) { - ConstvalNode *lcn = static_cast(initNode); - MIRIntConst *lowConst = static_cast(lcn->GetConstVal()); + ConstvalNode *lcn = static_cast(initNode); + MIRIntConst *lowConst = static_cast(lcn->GetConstVal()); int64 lowvalue = lowConst->GetValue(); // upNode is constant if (upNode->IsConstval()) { - ConstvalNode *ucn = static_cast(upNode); - MIRIntConst *upConst = static_cast(ucn->GetConstVal()); + ConstvalNode *ucn = static_cast(upNode); + MIRIntConst *upConst = static_cast(ucn->GetConstVal()); int64 upvalue = upConst->GetValue(); if (condOpHasEqual) { upvalue += 1; @@ -71,6 +84,8 @@ void LoopTransPlan::GenerateBoundInfo(DoloopNode *doloop, DoloopInfo *li) { vBound = localMP->New(nullptr, nullptr, newIncrNode); } else { // trip count is not vector lane aligned + // vectorized loop < initnode, (up - low)/newIncr *newincr + init, newincr> + // TODO: the vectorized loop need unalignment instruction. int32_t newupval = (upvalue - lowvalue) / (newIncr->GetValue()) * (newIncr->GetValue()) + lowvalue; MIRIntConst *newUpConst = GlobalTables::GetIntConstTable().GetOrCreateIntConst(newupval, *typeInt); ConstvalNode *newUpNode = codeMP->New(PTY_i32, newUpConst); @@ -78,15 +93,13 @@ void LoopTransPlan::GenerateBoundInfo(DoloopNode *doloop, DoloopInfo *li) { // generate epilog eBound = localMP->New(newUpNode, nullptr, nullptr); } - } else if (upNode->GetOpCode() == OP_dread) { - // upNode is symbol variable, TODO::op_regread + } else if (upNode->GetOpCode() == OP_dread || upNode->GetOpCode() == OP_regread) { // step 1: generate vectorized loop bound - AddrofNode *dreadnode = static_cast(upNode); - // upNode of vBound is uppnode / newIncr * newIncr + // upNode of vBound is (uppnode - initnode) / newIncr * newIncr + initnode BinaryNode *divnode; BaseNode *addnode = upNode; if (condOpHasEqual) { - addnode = codeMP->New(OP_add, PTY_i32, dreadnode, constOnenode); + addnode = codeMP->New(OP_add, PTY_i32, upNode, constOnenode); } if (lowvalue != 0) { BinaryNode *subnode = codeMP->New(OP_sub, PTY_i32, addnode, initNode); @@ -95,13 +108,14 @@ void LoopTransPlan::GenerateBoundInfo(DoloopNode *doloop, DoloopInfo *li) { divnode = codeMP->New(OP_div, PTY_i32, addnode, newIncrNode); } BinaryNode *mulnode = codeMP->New(OP_mul, PTY_i32, divnode, newIncrNode); - vBound = localMP->New(nullptr, mulnode, newIncrNode); + addnode = codeMP->New(OP_add, PTY_i32, mulnode, initNode); + vBound = localMP->New(nullptr, addnode, newIncrNode); // step2: generate epilog bound - eBound = localMP->New(mulnode, nullptr, nullptr); + eBound = localMP->New(addnode, nullptr, nullptr); } else { ASSERT(0, "upper bound is complex, NIY"); } - } else if (initNode->GetOpCode() == OP_dread) { + } else if (initNode->GetOpCode() == OP_dread || initNode->GetOpCode() == OP_regread) { // initnode is not constant // set bound of vectorized loop BinaryNode *subnode; @@ -113,25 +127,25 @@ void LoopTransPlan::GenerateBoundInfo(DoloopNode *doloop, DoloopInfo *li) { } BinaryNode *divnode = codeMP->New(OP_div, PTY_i32, subnode, newIncrNode); BinaryNode *mulnode = codeMP->New(OP_mul, PTY_i32, divnode, newIncrNode); - vBound = localMP->New(nullptr, mulnode, newIncrNode); + BinaryNode *addnode = codeMP->New(OP_add, PTY_i32, mulnode, initNode); + vBound = localMP->New(nullptr, addnode, newIncrNode); // set bound of epilog loop - eBound = localMP->New(mulnode, nullptr, nullptr); + eBound = localMP->New(addnode, nullptr, nullptr); } else { ASSERT(0, "low bound is complex, NIY"); } } // generate best plan for current doloop -void LoopTransPlan::Generate(DoloopNode *doloop, DoloopInfo *li) { - // hack values of vecFactor and vecLanes - vecLanes = 128 / ((GetPrimTypeSize(vecInfo->largestPrimType)) * 8); - vecFactor = vecLanes; // vectory length / type +void LoopTransPlan::Generate(DoloopNode *doloop, DoloopInfo* li) { + // vector length / type size + vecLanes = 128 / (vecInfo->largestTypeSize); + vecFactor = vecLanes; // generate bound information GenerateBoundInfo(doloop, li); } -MIRType* LoopVectorization::GenVecType( - PrimType sPrimType, uint8 lanes) { +MIRType* LoopVectorization::GenVecType(PrimType sPrimType, uint8 lanes) { MIRType *vecType = nullptr; CHECK_FATAL(IsPrimitiveInteger(sPrimType), "primtype should be integer"); switch (sPrimType) { @@ -211,6 +225,31 @@ MIRType* LoopVectorization::GenVecType( } break; } + case PTY_a64: { + if (lanes == 2) { + vecType = GlobalTables::GetTypeTable().GetV2UInt64(); + } else { + ASSERT(0, "unsupported a64 vector lanes"); + } + } + case PTY_ptr: { + if (GetPrimTypeSize(sPrimType) == 4) { + if (lanes == 4) { + vecType = GlobalTables::GetTypeTable().GetV4UInt32(); + } else if (lanes == 2) { + vecType = GlobalTables::GetTypeTable().GetV2UInt32(); + } else { + ASSERT(0, "unsupported ptr vector lanes"); + } + } else if (GetPrimTypeSize(sPrimType) == 8) { + if (lanes == 2) { + vecType = GlobalTables::GetTypeTable().GetV2UInt64(); + } else { + ASSERT(0, "unsupported ptr vector lanes"); + } + } + break; + } default: ASSERT(0, "NIY"); } @@ -218,29 +257,92 @@ MIRType* LoopVectorization::GenVecType( } // generate instrinsic node to copy scalar to vector type -StmtNode *LoopVectorization::GenIntrinNode(BaseNode *scalar, PrimType vecPrimType) { - PrimType intrnPrimtype = PTY_v4i32; +RegassignNode *LoopVectorization::GenDupScalarStmt(BaseNode *scalar, PrimType vecPrimType) { MIRIntrinsicID intrnID = INTRN_vector_from_scalar_v4i32; MIRType *vecType = nullptr; switch (vecPrimType) { case PTY_v4i32: { - intrnPrimtype = PTY_v4i32; intrnID = INTRN_vector_from_scalar_v4i32; vecType = GlobalTables::GetTypeTable().GetV4Int32(); break; } + case PTY_v2i32: { + intrnID = INTRN_vector_from_scalar_v2i32; + vecType = GlobalTables::GetTypeTable().GetV2Int32(); + break; + } + case PTY_v4u32: { + intrnID = INTRN_vector_from_scalar_v4u32; + vecType = GlobalTables::GetTypeTable().GetV4UInt32(); + break; + } + case PTY_v2u32: { + intrnID = INTRN_vector_from_scalar_v2u32; + vecType = GlobalTables::GetTypeTable().GetV2UInt32(); + break; + } + case PTY_v8i16: { + intrnID = INTRN_vector_from_scalar_v8i16; + vecType = GlobalTables::GetTypeTable().GetV8Int16(); + break; + } + case PTY_v8u16: { + intrnID = INTRN_vector_from_scalar_v8u16; + vecType = GlobalTables::GetTypeTable().GetV8UInt16(); + break; + } + case PTY_v4i16: { + intrnID = INTRN_vector_from_scalar_v4i16; + vecType = GlobalTables::GetTypeTable().GetV4Int16(); + break; + } + case PTY_v4u16: { + intrnID = INTRN_vector_from_scalar_v4u16; + vecType = GlobalTables::GetTypeTable().GetV4UInt16(); + break; + } + case PTY_v16i8: { + intrnID = INTRN_vector_from_scalar_v16i8; + vecType = GlobalTables::GetTypeTable().GetV16Int8(); + break; + } + case PTY_v16u8: { + intrnID = INTRN_vector_from_scalar_v16u8; + vecType = GlobalTables::GetTypeTable().GetV16UInt8(); + break; + } + case PTY_v8i8: { + intrnID = INTRN_vector_from_scalar_v8i8; + vecType = GlobalTables::GetTypeTable().GetV8Int8(); + break; + } + case PTY_v8u8: { + intrnID = INTRN_vector_from_scalar_v8u8; + vecType = GlobalTables::GetTypeTable().GetV8UInt8(); + break; + } + case PTY_v2i64: { + intrnID = INTRN_vector_from_scalar_v2i64; + vecType = GlobalTables::GetTypeTable().GetV2Int64(); + break; + } + case PTY_v2u64: { + intrnID = INTRN_vector_from_scalar_v2u64; + vecType = GlobalTables::GetTypeTable().GetV2UInt64(); + break; + } default: { ASSERT(0, "NIY"); } } // generate instrinsic op - IntrinsicopNode *rhs = codeMP->New(*codeMPAlloc, OP_intrinsicopwithtype, PTY_v4i32); + IntrinsicopNode *rhs = codeMP->New(*codeMPAlloc, OP_intrinsicop, vecPrimType); rhs->SetIntrinsic(intrnID); rhs->SetNumOpnds(1); - rhs->SetNOpndAt(0, scalar); + rhs->GetNopnd().push_back(scalar); rhs->SetTyIdx(vecType->GetTypeIndex()); - PregIdx regIdx = mirFunc->GetPregTab()->CreatePreg(intrnPrimtype, vecType); - RegassignNode *stmtNode = codeMP->New(PTY_v4i32, regIdx, rhs); + PregIdx regIdx = mirFunc->GetPregTab()->CreatePreg(vecPrimType); + RegassignNode *stmtNode = codeMP->New(vecPrimType, regIdx, rhs); return stmtNode; } @@ -248,7 +350,7 @@ StmtNode *LoopVectorization::GenIntrinNode(BaseNode *scalar, PrimType vecPrimTyp // following opcode can be vectorized directly // +, -, *, &, |, <<, >>, compares, ~, ! // iassign, iread, dassign, dread -void LoopVectorization::VectorizeNode(BaseNode *node, uint8 count) { +void LoopVectorization::VectorizeNode(BaseNode *node, LoopTransPlan *tp) { if (enableDebug) { node->Dump(0); } @@ -259,34 +361,40 @@ void LoopVectorization::VectorizeNode(BaseNode *node, uint8 count) { MIRType &mirType = GetTypeFromTyIdx(iassign->GetTyIdx()); CHECK_FATAL(mirType.GetKind() == kTypePointer, "iassign must have pointer type"); MIRPtrType *ptrType = static_cast(&mirType); - MIRType *vecType = GenVecType(ptrType->GetPointedType()->GetPrimType(), count); + MIRType *vecType = GenVecType(ptrType->GetPointedType()->GetPrimType(), tp->vecFactor); ASSERT(vecType != nullptr, "vector type should not be null"); MIRType *pvecType = GlobalTables::GetTypeTable().GetOrCreatePointerType(*vecType, PTY_ptr); // update lhs type iassign->SetTyIdx(pvecType->GetTypeIndex()); // visit rsh - VectorizeNode(iassign->GetRHS(), count); + BaseNode *rhs = iassign->GetRHS(); + if (tp->vecInfo->uniformVecNodes.find(rhs) != tp->vecInfo->uniformVecNodes.end()) { + // rhs replaced scalar node with vector node + iassign->SetRHS(tp->vecInfo->uniformVecNodes[rhs]); + } else { + VectorizeNode(iassign->GetRHS(), tp); + } break; } case OP_iread: { IreadNode *ireadnode = static_cast(node); // update primtype - MIRType *primVecType = GenVecType(ireadnode->GetPrimType(), count); + MIRType *primVecType = GenVecType(ireadnode->GetPrimType(), tp->vecFactor); node->SetPrimType(primVecType->GetPrimType()); // update tyidx MIRType &mirType = GetTypeFromTyIdx(ireadnode->GetTyIdx()); CHECK_FATAL(mirType.GetKind() == kTypePointer, "iread must have pointer type"); MIRPtrType *ptrType = static_cast(&mirType); - MIRType *vecType = GenVecType(ptrType->GetPointedType()->GetPrimType(), count); + MIRType *vecType = GenVecType(ptrType->GetPointedType()->GetPrimType(), tp->vecFactor); ASSERT(vecType != nullptr, "vector type should not be null"); MIRType *pvecType = GlobalTables::GetTypeTable().GetOrCreatePointerType(*vecType, PTY_ptr); + ASSERT(ireadnode->GetPrimType() == vecType->GetPrimType(), "iread node vector prim type is not equal to vectorized point to type"); // update lhs type ireadnode->SetTyIdx(pvecType->GetTypeIndex()); break; } // scalar related: widen type directly or unroll instructions case OP_dassign: - case OP_dread: ASSERT(0, "NIY"); break; // vector type support in opcode +, -, *, &, |, <<, >>, compares, ~, ! @@ -309,10 +417,10 @@ void LoopVectorization::VectorizeNode(BaseNode *node, uint8 count) { case OP_cmpl: { ASSERT(node->IsBinaryNode(), "should be binarynode"); BinaryNode *binNode = static_cast(node); - MIRType *vecType = GenVecType(node->GetPrimType(), count); + MIRType *vecType = GenVecType(node->GetPrimType(), tp->vecFactor); node->SetPrimType(vecType->GetPrimType()); // update primtype of binary op - VectorizeNode(binNode->Opnd(0), count); - VectorizeNode(binNode->Opnd(1), count); + VectorizeNode(binNode->Opnd(0), tp); + VectorizeNode(binNode->Opnd(1), tp); break; } // unary op @@ -321,19 +429,14 @@ void LoopVectorization::VectorizeNode(BaseNode *node, uint8 count) { case OP_lnot: { ASSERT(node->IsUnaryNode(), "should be unarynode"); UnaryNode *unaryNode = static_cast(node); - MIRType *vecType = GenVecType(node->GetPrimType(), count); + MIRType *vecType = GenVecType(node->GetPrimType(), tp->vecFactor); node->SetPrimType(vecType->GetPrimType()); // update primtype of unary op - VectorizeNode(unaryNode->Opnd(0), count); + VectorizeNode(unaryNode->Opnd(0), tp); break; } + case OP_dread: case OP_constval: { - LfoPart *lfoP = (*lfoExprParts)[node]; - ASSERT(lfoP != nullptr, "nullptr check"); - // constval could be used in binary op without widen directly - if (!lfoP->GetParent()->IsBinaryNode()) { - // use intrinsicop vdupq_n_i32 to move const to tmp variable - ASSERT(0, "constval need to extended NIY"); - } + // donothing break; } default: @@ -358,18 +461,44 @@ void LoopVectorization::widenDoloop(DoloopNode *doloop, LoopTransPlan *tp) { } } + void LoopVectorization::VectorizeDoLoop(DoloopNode *doloop, LoopTransPlan *tp) { // LogInfo::MapleLogger() << "\n**** dump doloopnode ****\n"; // doloop->Dump(0); // step 1: handle loop low/upper/stride widenDoloop(doloop, tp); - // step 2: widen vectorizable stmt in doloop + // step 2: insert dup stmt before doloop + if (!tp->vecInfo->uniformNodes.empty()) { + LfoPart* lfopart = (*lfoStmtParts)[doloop->GetStmtID()]; + BaseNode *parent = lfopart->GetParent(); + ASSERT(parent && (parent->GetOpCode() == OP_block), "nullptr check"); + BlockNode *pblock = static_cast(parent); + auto it = tp->vecInfo->uniformNodes.begin(); + for (; it != tp->vecInfo->uniformNodes.end(); it++) { + BaseNode *node = *it; + LfoPart *lfoP = (*lfoExprParts)[node]; + // check node's parent, if they are binary node, skip the duplication + if (!lfoP->GetParent()->IsBinaryNode()) { + MIRType *vecType = GenVecType(node->GetPrimType(), tp->vecFactor); + RegassignNode *dupScalarStmt = GenDupScalarStmt(node, vecType->GetPrimType()); + pblock->InsertBefore(doloop, dupScalarStmt); + RegreadNode *regreadNode = codeMP->New(vecType->GetPrimType(), dupScalarStmt->GetRegIdx()); + tp->vecInfo->uniformVecNodes[node] = regreadNode; + } + } + } + + // step 3: widen vectorizable stmt in doloop BlockNode *loopbody = doloop->GetDoBody(); for (auto &stmt : loopbody->GetStmtNodes()) { - VectorizeNode(&stmt, tp->vecFactor); - // stmt could not be widen directly, unroll instruction with vecFactor - // move value from vector type if need (need def-use information from plan) + if (tp->vecInfo->vecStmtIDs.count(stmt.GetStmtID()) > 0) { + VectorizeNode(&stmt, tp); + } else { + // stmt could not be widen directly, unroll instruction with vecFactor + // move value from vector type if need (need def-use information from plan) + CHECK_FATAL(0, "NIY:: unvectorized stmt"); + } } } @@ -420,8 +549,7 @@ void LoopVectorization::TransformLoop() { } } -bool LoopVectorization::ExprVectorizable( - DoloopInfo *doloopInfo, BaseNode *x) { +bool LoopVectorization::ExprVectorizable(DoloopInfo *doloopInfo, LoopVecInfo* vecInfo, BaseNode *x) { if (!IsPrimitiveInteger(x->GetPrimType())) { return false; } @@ -436,7 +564,16 @@ bool LoopVectorization::ExprVectorizable( if (parent && parent->GetOpCode() == OP_array) { return true; } - return false; + MeExpr *expr = lfopart->GetMeExpr(); + if ((x->GetOpCode() == OP_constval) || + (expr && doloopInfo->IsLoopInvariant(expr))) { + if (!vecInfo->UpdateRHSTypeSize(x->GetPrimType())) { + return false; + } + vecInfo->uniformNodes.insert(x); + return true; + } + return false; // TODO::primary induction variable } // supported binary ops case OP_add: @@ -455,19 +592,24 @@ bool LoopVectorization::ExprVectorizable( case OP_ge: case OP_cmpg: case OP_cmpl: - return ExprVectorizable(doloopInfo, x->Opnd(0)) && ExprVectorizable(doloopInfo, x->Opnd(1)); + return ExprVectorizable(doloopInfo, vecInfo, x->Opnd(0)) && ExprVectorizable(doloopInfo, vecInfo, x->Opnd(1)); // supported unary ops case OP_bnot: case OP_lnot: case OP_neg: - return ExprVectorizable(doloopInfo, x->Opnd(0)); + return ExprVectorizable(doloopInfo, vecInfo, x->Opnd(0)); case OP_iread: { - bool canVec = ExprVectorizable(doloopInfo, x->Opnd(0)); + bool canVec = ExprVectorizable(doloopInfo, vecInfo, x->Opnd(0)); if (canVec) { - IreadNode *iread = static_cast(x); - if (iread->GetFieldID() != 0 && iread->Opnd(0)->GetOpCode() == OP_array) { - MeExpr *meExpr = depInfo->preEmit->GetLfoExprPart(iread->Opnd(0))->GetMeExpr(); - canVec = doloopInfo->IsLoopInvariant(meExpr); + // TODO:: insert cvt instruction + if (!vecInfo->UpdateRHSTypeSize(x->GetPrimType())) { + canVec = false; // skip if rhs type is not consistent + } else { + IreadNode *iread = static_cast(x); + if ((iread->GetFieldID() != 0 || MustBeAddress(iread->GetPrimType())) && iread->Opnd(0)->GetOpCode() == OP_array) { + MeExpr *meExpr = depInfo->preEmit->GetLfoExprPart(iread->Opnd(0))->GetMeExpr(); + canVec = doloopInfo->IsLoopInvariant(meExpr); + } } } return canVec; @@ -475,7 +617,7 @@ bool LoopVectorization::ExprVectorizable( // supported n-ary ops case OP_array: { for (size_t i = 0; i < x->NumOpnds(); i++) { - if (!ExprVectorizable(doloopInfo, x->Opnd(i))) { + if (!ExprVectorizable(doloopInfo, vecInfo, x->Opnd(i))) { return false; } } @@ -487,9 +629,11 @@ bool LoopVectorization::ExprVectorizable( } // assumed to be inside innermost loop -bool LoopVectorization::Vectorizable(DoloopInfo *doloopInfo, BlockNode *block, LoopVecInfo* vecInfo) { +bool LoopVectorization::Vectorizable(DoloopInfo *doloopInfo, LoopVecInfo* vecInfo, BlockNode *block) { StmtNode *stmt = block->GetFirst(); while (stmt != nullptr) { + // reset vecInfo + vecInfo->ResetStmtRHSTypeSize(); switch (stmt->GetOpCode()) { case OP_doloop: case OP_dowhile: @@ -498,31 +642,64 @@ bool LoopVectorization::Vectorizable(DoloopInfo *doloopInfo, BlockNode *block, L break; } case OP_block: - return Vectorizable(doloopInfo, static_cast(stmt)->GetDoBody(), vecInfo); + return Vectorizable(doloopInfo, vecInfo, static_cast(stmt)->GetDoBody()); case OP_iassign: { IassignNode *iassign = static_cast(stmt); - bool canVec = ExprVectorizable(doloopInfo, iassign->GetRHS()); - if (canVec && iassign->GetFieldID() != 0) { // check base of iassign - MeExpr *meExpr = depInfo->preEmit->GetLfoExprPart(iassign->Opnd(0))->GetMeExpr(); - canVec = doloopInfo->IsLoopInvariant(meExpr); + int32_t coeff = 1; + // check lsh is complex subscript + if (iassign->addrExpr->GetOpCode() == OP_array) { + ArrayNode *lhsArr = static_cast(iassign->addrExpr); + ArrayAccessDesc *accessDesc = doloopInfo->GetArrayAccessDesc(lhsArr, false /*isRHS*/); + ASSERT(accessDesc != nullptr, "nullptr check"); + size_t dim = lhsArr->NumOpnds() - 1; + // check innest loop dimension is complex + // case like a[abs(i-1)] = 1; depth test will report it's parallelize + if (accessDesc->subscriptVec[dim-1]->tooMessy) { + return false; + } + coeff = accessDesc->subscriptVec[dim-1]->coeff; + coeff = coeff < 0 ? (-coeff) : coeff; + } + // check rsh + bool canVec = ExprVectorizable(doloopInfo, vecInfo, iassign->GetRHS()); + if (canVec) { + if (iassign->GetFieldID() != 0) { // check base of iassign + MeExpr *meExpr = (*lfoExprParts)[iassign->Opnd(0)]->GetMeExpr(); + canVec = doloopInfo->IsLoopInvariant(meExpr); + } else { + // if rhs is loop invar in case of fieldID is 0 + MeExpr *meExpr = (*lfoExprParts)[iassign->GetRHS()]->GetMeExpr(); + if (meExpr && doloopInfo->IsLoopInvariant(meExpr)) { + vecInfo->UpdateRHSTypeSize(iassign->GetRHS()->GetPrimType()); + vecInfo->uniformNodes.insert(iassign->GetRHS()); + } + } } if (canVec) { MIRType &mirType = GetTypeFromTyIdx(iassign->GetTyIdx()); CHECK_FATAL(mirType.GetKind() == kTypePointer, "iassign must have pointer type"); MIRPtrType *ptrType = static_cast(&mirType); PrimType stmtpt = ptrType->GetPointedType()->GetPrimType(); - CHECK_FATAL(IsPrimitiveInteger(stmtpt) && (!IsPrimitivePoint(stmtpt)), - "iassign ptr type should be integer now"); - vecInfo->UpdatePrimType(stmtpt); + CHECK_FATAL(IsPrimitiveInteger(stmtpt), "iassign ptr type should be integer now"); + // now check lsh type size should be same as rhs typesize + uint32_t lshtypesize = GetPrimTypeSize(stmtpt) * 8; + if (lshtypesize != vecInfo->currentRHSTypeSize) { + return false; // need cvt instruction + } vecInfo->vecStmtIDs.insert((stmt)->GetStmtID()); + // update largest type size + vecInfo->UpdateWidestTypeSize(coeff * lshtypesize); + } else { + // early return + return false; } - return canVec; + break; } default: return false; } stmt = stmt->GetNext(); } - return false; + return true; } void LoopVectorization::Perform() { @@ -533,7 +710,10 @@ void LoopVectorization::Perform() { continue; } LoopVecInfo *vecInfo = localMP->New(localAlloc); - bool vectorizable = Vectorizable(mapit->second, mapit->first->GetDoBody(), vecInfo); + bool vectorizable = Vectorizable(mapit->second, vecInfo, mapit->first->GetDoBody()); + if (vectorizable) { + LoopVectorization::vectorizedLoop++; + } if (enableDebug) { LogInfo::MapleLogger() << "\nInnermost Doloop:"; if (!vectorizable) { diff --git a/src/mapleall/maple_me/src/me_ssa_lpre.cpp b/src/mapleall/maple_me/src/me_ssa_lpre.cpp index 9e18a6a496dde4a3a3c310505368180789537d17..84874dedcbdfaf4cc50b3fc629094c4f169ab943 100644 --- a/src/mapleall/maple_me/src/me_ssa_lpre.cpp +++ b/src/mapleall/maple_me/src/me_ssa_lpre.cpp @@ -327,12 +327,10 @@ void MeSSALPre::BuildWorkListExpr(MeStmt &meStmt, int32 seqStmt, MeExpr &meExpr, if (preKind != kAddrPre) { break; } - if (mirModule->IsJavaModule()) { - auto *addrOfMeExpr = static_cast(&meExpr); - const OriginalSt *ost = ssaTab->GetOriginalStFromID(addrOfMeExpr->GetOstIdx()); - if (ost->IsLocal()) { // skip lpre for stack addresses as they are cheap and need keep for rc - break; - } + auto *addrOfMeExpr = static_cast(&meExpr); + const OriginalSt *ost = ssaTab->GetOriginalStFromID(addrOfMeExpr->GetOstIdx()); + if (ost->IsLocal()) { // skip lpre for stack addresses as they are cheap and need keep for rc + break; } (void)CreateRealOcc(meStmt, seqStmt, meExpr, false); break;