diff --git a/src/mapleall/maple_me/include/lfo_dep_test.h b/src/mapleall/maple_me/include/lfo_dep_test.h index f6a3e0acaad770afa5484392ebe3d3b10a92fb78..3f017eca5be3ec17ec1638b3ba3603f11bd24999 100644 --- a/src/mapleall/maple_me/include/lfo_dep_test.h +++ b/src/mapleall/maple_me/include/lfo_dep_test.h @@ -108,6 +108,10 @@ class DoloopInfo { bool CheckReductionLoop(); ArrayAccessDesc* GetArrayAccessDesc(const ArrayNode *node, bool isRHS); bool IsReductionVar(StIdx stidx) const { return (redVars.count(stidx) > 0); } + int HasTrueDepOnly(); // return min flow-dep distance + int HasAntiDepOnly(); // return max anti-dep distance + bool HasOutputDep(); + bool NotParallel(); // return true if doloop has complex issue we dont handle now }; class LfoDepInfo : public AnalysisResult { diff --git a/src/mapleall/maple_me/include/lfo_loop_vec.h b/src/mapleall/maple_me/include/lfo_loop_vec.h index e1ff1a763313a3a5a78bfc1fa62c6c5ccc59a18d..8a7cebc44f22fef6fd45378b34ac1345fb3ebd84 100644 --- a/src/mapleall/maple_me/include/lfo_loop_vec.h +++ b/src/mapleall/maple_me/include/lfo_loop_vec.h @@ -46,6 +46,8 @@ class LoopVecInfo { currentRHSTypeSize = 0; currentLHSTypeSize = 0; widenop = 0; + minTrueDepDist = 0; + maxAntiDepDist = 0; hasRedvar = false; } virtual ~LoopVecInfo() = default; @@ -57,6 +59,8 @@ class LoopVecInfo { uint32_t currentRHSTypeSize; // largest size of current stmt's RHS, this is temp value and update for each stmt uint32_t currentLHSTypeSize; // record current stmt lhs type in vectorize phase uint32_t widenop; // can't handle t * t which t need widen operation + int16_t minTrueDepDist; + int16_t maxAntiDepDist; // negative value bool hasRedvar; // loop has reduction variable // list of vectorizable stmtnodes in current loop, others can't be vectorized MapleSet vecStmtIDs; @@ -104,6 +108,7 @@ class LoopVectorization { codeMP = lfoEmit->GetCodeMP(); codeMPAlloc = lfoEmit->GetCodeMPAlloc(); localMP = localmp; + isArraySub = false; enableDebug = debug; } ~LoopVectorization() = default; @@ -156,6 +161,7 @@ class LoopVectorization { MemPool *localMP; // local mempool MapleAllocator localAlloc; MapleMap vecPlans; // each vectoriable loopnode has its best vectorization plan + bool isArraySub; // current expression is used in array subscript bool enableDebug; }; } // namespace maple diff --git a/src/mapleall/maple_me/src/lfo_dep_test.cpp b/src/mapleall/maple_me/src/lfo_dep_test.cpp index a4d5316369b802d80bd211cd6b8cfc4a009e01fd..fd881e9e4857711306afd911e776510cbf0045b0 100644 --- a/src/mapleall/maple_me/src/lfo_dep_test.cpp +++ b/src/mapleall/maple_me/src/lfo_dep_test.cpp @@ -508,6 +508,59 @@ bool DoloopInfo::Parallelizable() { return true; } +// complex case not handled +bool DoloopInfo::NotParallel() { + if (hasPtrAccess || hasOtherCtrlFlow || hasMayDef || + (hasScalarAssign && !CheckReductionLoop())) { + return true; + } + return false; +} + +bool DoloopInfo::HasOutputDep() { + for (size_t i = 0; i < outputDepTestList.size(); ++i) { + DepTestPair *testPair = &outputDepTestList[i]; + if (testPair->dependent && (testPair->unknownDist || testPair->depDist != 0)) { + return true; + } + } + return false; +} + +// return 1 means has other dep type +// return max anti-depDist which is < 0 +int DoloopInfo::HasAntiDepOnly() { + int depdist = INT32_MIN; + for (size_t i = 0; i < flowDepTestList.size(); ++i) { + DepTestPair *testPair = &flowDepTestList[i]; + if (testPair->dependent) { + if (testPair->unknownDist || testPair->depDist > 0) { + return 1; + } else if (testPair->depDist < 0) { + depdist = depdist < testPair->depDist ? testPair->depDist : depdist; // get max value + } + } + } + return (depdist == INT32_MIN) ? 0 : depdist; +} +// -1 means has other dep type +// return min flowdepdist which is >= 0 +int DoloopInfo::HasTrueDepOnly() { + int depdist = INT32_MAX; + for (size_t i = 0; i < flowDepTestList.size(); ++i) { + DepTestPair *testPair = &flowDepTestList[i]; + if (testPair->dependent) { + if (testPair->unknownDist || testPair->depDist < 0) { + return -1; + } else if (testPair->depDist > 0) { + depdist = depdist < testPair->depDist ? depdist : testPair->depDist; // get min value + } + } + } + return (depdist == INT32_MAX) ? -1 : depdist; +} + + static bool IsDreadOf(BaseNode *x, StIdx stIdx, FieldID fieldID) { if (x->op != OP_dread) { return false; diff --git a/src/mapleall/maple_me/src/lfo_loop_vec.cpp b/src/mapleall/maple_me/src/lfo_loop_vec.cpp index a767296afd3841e83f5820a1d8a0a610822f3d06..3a49b73e6dc22ec3ed19012fcecb6fd5f98a7761 100644 --- a/src/mapleall/maple_me/src/lfo_loop_vec.cpp +++ b/src/mapleall/maple_me/src/lfo_loop_vec.cpp @@ -150,6 +150,30 @@ bool LoopTransPlan::Generate(const DoloopNode *doloop, const DoloopInfo* li, boo } return false; } + // if depdist is not zero + if (vecInfo->minTrueDepDist > 0 || vecInfo->maxAntiDepDist < 0) { + // true dep distance is less than vecLanes, return false + if ((vecInfo->minTrueDepDist > 0) && (vecInfo->minTrueDepDist < vecLanes)) { + if (enableDebug) { + LogInfo::MapleLogger() << "NOT VECTORIZABLE because true dependence distance less than veclanes in loop\n"; + } + return false; + } + // anti-dep distance doesn't break vectorization in case + // use before def like a[i] = a[i+1] + // if use is after def as following, distance less than vecLanes will break vectorization + // a[i] = + // = a[i+1] + // there's no extra information to describe sequence now + // we only handle one stmt in loopbody without considering anti-dep distance + if ((vecInfo->maxAntiDepDist < 0) && ((-vecInfo->maxAntiDepDist) < vecLanes) && + (doloop->GetDoBody()->GetFirst() != doloop->GetDoBody()->GetLast())) { + if (enableDebug) { + LogInfo::MapleLogger() << "NOT VECTORIZABLE because anti dependence distance less than veclanes in loop\n"; + } + return false; + } + } // compare trip count if lanes is larger than tripcount { BaseNode *initNode = doloop->GetStartExpr(); @@ -1580,10 +1604,7 @@ bool LoopVectorization::ExprVectorizable(DoloopInfo *doloopInfo, LoopVecInfo* ve case OP_constval: case OP_dread: case OP_addrof: { - PreMeMIRExtension* lfopart = (*PreMeExprExtensionMap)[x]; - CHECK_FATAL(lfopart, "nullptr check"); - BaseNode *parent = lfopart->GetParent(); - if (parent && parent->GetOpCode() == OP_array) { + if (isArraySub) { return true; } if (x->GetOpCode() == OP_constval) { @@ -1680,9 +1701,12 @@ bool LoopVectorization::ExprVectorizable(DoloopInfo *doloopInfo, LoopVecInfo* ve // supported n-ary ops case OP_array: { for (size_t i = 0; i < x->NumOpnds(); i++) { + isArraySub = true; if (!ExprVectorizable(doloopInfo, vecInfo, x->Opnd(i))) { + isArraySub = false; return false; } + isArraySub = false; } return true; } @@ -1896,8 +1920,7 @@ void LoopVectorization::Perform() { // step 2: collect information, legality check and generate transform plan MapleMap::iterator mapit = depInfo->doloopInfoMap.begin(); for (; mapit != depInfo->doloopInfoMap.end(); ++mapit) { - if (!mapit->second->children.empty() || - ((!mapit->second->Parallelizable()) && (!mapit->second->CheckReductionLoop()))) { + if (!mapit->second->children.empty() || mapit->second->NotParallel()) { continue; } // check in debug @@ -1905,6 +1928,12 @@ void LoopVectorization::Perform() { break; } LoopVecInfo *vecInfo = localMP->New(localAlloc); + if (mapit->second->HasTrueDepOnly() > 0) { + vecInfo->minTrueDepDist = mapit->second->HasTrueDepOnly(); + } + if (mapit->second->HasAntiDepOnly() < 0) { + vecInfo->maxAntiDepDist = mapit->second->HasAntiDepOnly(); + } bool vectorizable = Vectorizable(mapit->second, vecInfo, mapit->first->GetDoBody()); if (vectorizable) { LoopVectorization::vectorizedLoop++;