From e196fa9954c85102d98a2ce0d64081c6dfdf8830 Mon Sep 17 00:00:00 2001 From: Fred Chow Date: Sat, 5 Mar 2022 19:05:21 -0800 Subject: [PATCH 1/2] Look for IV being operand of CVT in termination test so we can compute the loop trip count and convert to doloop --- src/mapleall/maple_me/src/lfo_iv_canon.cpp | 38 ++++++++++++++++++---- src/mapleall/maple_me/src/pme_emit.cpp | 10 ++++++ 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/src/mapleall/maple_me/src/lfo_iv_canon.cpp b/src/mapleall/maple_me/src/lfo_iv_canon.cpp index 59434b6a36..23d1845d6a 100644 --- a/src/mapleall/maple_me/src/lfo_iv_canon.cpp +++ b/src/mapleall/maple_me/src/lfo_iv_canon.cpp @@ -247,21 +247,37 @@ void IVCanon::ComputeTripCount() { // make the side that consists of a single IV the left operand // check left operand ScalarMeExpr *iv = dynamic_cast(testExpr->GetOpnd(0)); + bool cvtDetected = false; + if (iv == nullptr && testExpr->GetOpnd(0)->GetOp() == OP_cvt) { + iv = dynamic_cast(testExpr->GetOpnd(0)->GetOpnd(0)); + cvtDetected = true; + } IVDesc *ivdesc = nullptr; if (iv) { for (uint32 i = 0; i < ivvec.size(); i++) { if (iv->GetOst() == ivvec[i]->ost) { ivdesc = ivvec[i]; + if (cvtDetected) { + ivdesc->canBePrimary = false; + } break; } } } if (ivdesc == nullptr) { // check second operand + cvtDetected = false; iv = dynamic_cast(testExpr->GetOpnd(1)); + if (iv == nullptr && testExpr->GetOpnd(1)->GetOp() == OP_cvt) { + iv = dynamic_cast(testExpr->GetOpnd(1)->GetOpnd(0)); + cvtDetected = true; + } if (iv) { for (uint32 i = 0; i < ivvec.size(); i++) { if (iv->GetOst() == ivvec[i]->ost) { ivdesc = ivvec[i]; + if (cvtDetected) { + ivdesc->canBePrimary = false; + } break; } } @@ -311,15 +327,16 @@ void IVCanon::ComputeTripCount() { } // form the trip count expression + MeExpr *testExprRHS = testExpr->GetOpnd(1); PrimType primTypeUsed = (!ivdesc->initExpr->IsZero()) ? - GetSignedPrimType(testExpr->GetOpnd(0)->GetPrimType()) : testExpr->GetOpnd(0)->GetPrimType(); + GetSignedPrimType(testExprRHS->GetPrimType()) : testExprRHS->GetPrimType(); PrimType divPrimType = primTypeUsed; if (ivdesc->stepValue < 0) { divPrimType = GetSignedPrimType(divPrimType); } // add: t = bound + (stepValue +/-1) OpMeExpr add(-1, OP_add, primTypeUsed, 2); - add.SetOpnd(0, testExpr->GetOpnd(1)); // IV bound + add.SetOpnd(0, testExprRHS); // IV bound if (CompareHasEqual(condbr->GetOpnd()->GetOp())) { // if cond has equal operand, t = bound + stepValue add.SetOpnd(1, irMap->CreateIntConstMeExpr(ivdesc->stepValue, primTypeUsed)); @@ -329,6 +346,13 @@ void IVCanon::ComputeTripCount() { primTypeUsed)); } MeExpr *subx = irMap->HashMeExpr(add); + // insert a CVT for ivdesc->initExpr if needed + if (GetPrimTypeSize(ivdesc->initExpr->GetPrimType()) != GetPrimTypeSize(primTypeUsed) && ivdesc->initExpr->GetMeOp() != kMeOpConst) { + OpMeExpr cvtx(-1, OP_cvt, primTypeUsed, 1); + cvtx.SetOpnd(0, ivdesc->initExpr); + cvtx.SetOpndType(ivdesc->initExpr->GetPrimType()); + ivdesc->initExpr = func->GetIRMap()->HashMeExpr(cvtx); + } if (!ivdesc->initExpr->IsZero()) { // sub: t = t - initExpr OpMeExpr subtract(-1, OP_sub, primTypeUsed, 2); @@ -513,6 +537,11 @@ void IVCanon::PerformIVCanon() { CharacterizeIV(initVersion, loopbackVersion, philhs); } } + CanonEntryValues(); + ComputeTripCount(); + if (tripCount == nullptr) { + return; + } FindPrimaryIV(); if (DEBUGFUNC(func)) { LogInfo::MapleLogger() << "****** while loop at label " << "@" @@ -529,11 +558,6 @@ void IVCanon::PerformIVCanon() { LogInfo::MapleLogger() << endl; } } - CanonEntryValues(); - ComputeTripCount(); - if (tripCount == nullptr) { - return; - } if (DEBUGFUNC(func)) { LogInfo::MapleLogger() << "****** trip count is: "; tripCount->Dump(func->GetIRMap(), 0); diff --git a/src/mapleall/maple_me/src/pme_emit.cpp b/src/mapleall/maple_me/src/pme_emit.cpp index 0ee46371b9..0ad8eea30c 100755 --- a/src/mapleall/maple_me/src/pme_emit.cpp +++ b/src/mapleall/maple_me/src/pme_emit.cpp @@ -764,6 +764,16 @@ DoloopNode *PreMeEmitter::EmitPreMeDoloop(BB *mewhilebb, BlockNode *curblk, PreM CondGotoMeStmt *condGotostmt = static_cast(lastmestmt); Doloopnode->SetStartExpr(EmitPreMeExpr(whileInfo->initExpr, Doloopnode)); Doloopnode->SetContExpr(EmitPreMeExpr(condGotostmt->GetOpnd(), Doloopnode)); + CompareNode *compare = static_cast(Doloopnode->GetCondExpr()); + if (compare->Opnd(0)->GetOpCode() == OP_cvt && compare->Opnd(0)->Opnd(0)->GetOpCode() == OP_cvt) { + PrimType resPrimType = compare->Opnd(0)->GetPrimType(); + PrimType opndPrimType = static_cast(compare->Opnd(0))->FromType(); + TypeCvtNode *secondCvtX = static_cast(compare->Opnd(0)->Opnd(0)); + if (IsNoCvtNeeded(resPrimType, secondCvtX->FromType()) && + IsNoCvtNeeded(opndPrimType, secondCvtX->GetPrimType())) { + compare->SetOpnd(secondCvtX->Opnd(0), 0); + } + } BlockNode *dobodyNode = codeMP->New(); Doloopnode->SetDoBody(dobodyNode); PreMeMIRExtension *doloopExt = preMeMP->New(Doloopnode); -- Gitee From 950c2b2b54ad34bff57c1b65ad0b20067550998c Mon Sep 17 00:00:00 2001 From: linma Date: Mon, 7 Mar 2022 13:58:50 -0800 Subject: [PATCH 2/2] lfo-loopvec: vectorize loop with flowdep distance and antidep disance larger than veclane --- src/mapleall/maple_me/include/lfo_dep_test.h | 4 ++ src/mapleall/maple_me/include/lfo_loop_vec.h | 6 +++ src/mapleall/maple_me/src/lfo_dep_test.cpp | 53 ++++++++++++++++++++ src/mapleall/maple_me/src/lfo_loop_vec.cpp | 41 ++++++++++++--- 4 files changed, 98 insertions(+), 6 deletions(-) diff --git a/src/mapleall/maple_me/include/lfo_dep_test.h b/src/mapleall/maple_me/include/lfo_dep_test.h index f6a3e0acaa..3f017eca5b 100644 --- a/src/mapleall/maple_me/include/lfo_dep_test.h +++ b/src/mapleall/maple_me/include/lfo_dep_test.h @@ -108,6 +108,10 @@ class DoloopInfo { bool CheckReductionLoop(); ArrayAccessDesc* GetArrayAccessDesc(const ArrayNode *node, bool isRHS); bool IsReductionVar(StIdx stidx) const { return (redVars.count(stidx) > 0); } + int HasTrueDepOnly(); // return min flow-dep distance + int HasAntiDepOnly(); // return max anti-dep distance + bool HasOutputDep(); + bool NotParallel(); // return true if doloop has complex issue we dont handle now }; class LfoDepInfo : public AnalysisResult { diff --git a/src/mapleall/maple_me/include/lfo_loop_vec.h b/src/mapleall/maple_me/include/lfo_loop_vec.h index e1ff1a7633..8a7cebc44f 100644 --- a/src/mapleall/maple_me/include/lfo_loop_vec.h +++ b/src/mapleall/maple_me/include/lfo_loop_vec.h @@ -46,6 +46,8 @@ class LoopVecInfo { currentRHSTypeSize = 0; currentLHSTypeSize = 0; widenop = 0; + minTrueDepDist = 0; + maxAntiDepDist = 0; hasRedvar = false; } virtual ~LoopVecInfo() = default; @@ -57,6 +59,8 @@ class LoopVecInfo { uint32_t currentRHSTypeSize; // largest size of current stmt's RHS, this is temp value and update for each stmt uint32_t currentLHSTypeSize; // record current stmt lhs type in vectorize phase uint32_t widenop; // can't handle t * t which t need widen operation + int16_t minTrueDepDist; + int16_t maxAntiDepDist; // negative value bool hasRedvar; // loop has reduction variable // list of vectorizable stmtnodes in current loop, others can't be vectorized MapleSet vecStmtIDs; @@ -104,6 +108,7 @@ class LoopVectorization { codeMP = lfoEmit->GetCodeMP(); codeMPAlloc = lfoEmit->GetCodeMPAlloc(); localMP = localmp; + isArraySub = false; enableDebug = debug; } ~LoopVectorization() = default; @@ -156,6 +161,7 @@ class LoopVectorization { MemPool *localMP; // local mempool MapleAllocator localAlloc; MapleMap vecPlans; // each vectoriable loopnode has its best vectorization plan + bool isArraySub; // current expression is used in array subscript bool enableDebug; }; } // namespace maple diff --git a/src/mapleall/maple_me/src/lfo_dep_test.cpp b/src/mapleall/maple_me/src/lfo_dep_test.cpp index a4d5316369..fd881e9e48 100644 --- a/src/mapleall/maple_me/src/lfo_dep_test.cpp +++ b/src/mapleall/maple_me/src/lfo_dep_test.cpp @@ -508,6 +508,59 @@ bool DoloopInfo::Parallelizable() { return true; } +// complex case not handled +bool DoloopInfo::NotParallel() { + if (hasPtrAccess || hasOtherCtrlFlow || hasMayDef || + (hasScalarAssign && !CheckReductionLoop())) { + return true; + } + return false; +} + +bool DoloopInfo::HasOutputDep() { + for (size_t i = 0; i < outputDepTestList.size(); ++i) { + DepTestPair *testPair = &outputDepTestList[i]; + if (testPair->dependent && (testPair->unknownDist || testPair->depDist != 0)) { + return true; + } + } + return false; +} + +// return 1 means has other dep type +// return max anti-depDist which is < 0 +int DoloopInfo::HasAntiDepOnly() { + int depdist = INT32_MIN; + for (size_t i = 0; i < flowDepTestList.size(); ++i) { + DepTestPair *testPair = &flowDepTestList[i]; + if (testPair->dependent) { + if (testPair->unknownDist || testPair->depDist > 0) { + return 1; + } else if (testPair->depDist < 0) { + depdist = depdist < testPair->depDist ? testPair->depDist : depdist; // get max value + } + } + } + return (depdist == INT32_MIN) ? 0 : depdist; +} +// -1 means has other dep type +// return min flowdepdist which is >= 0 +int DoloopInfo::HasTrueDepOnly() { + int depdist = INT32_MAX; + for (size_t i = 0; i < flowDepTestList.size(); ++i) { + DepTestPair *testPair = &flowDepTestList[i]; + if (testPair->dependent) { + if (testPair->unknownDist || testPair->depDist < 0) { + return -1; + } else if (testPair->depDist > 0) { + depdist = depdist < testPair->depDist ? depdist : testPair->depDist; // get min value + } + } + } + return (depdist == INT32_MAX) ? -1 : depdist; +} + + static bool IsDreadOf(BaseNode *x, StIdx stIdx, FieldID fieldID) { if (x->op != OP_dread) { return false; diff --git a/src/mapleall/maple_me/src/lfo_loop_vec.cpp b/src/mapleall/maple_me/src/lfo_loop_vec.cpp index a767296afd..3a49b73e6d 100644 --- a/src/mapleall/maple_me/src/lfo_loop_vec.cpp +++ b/src/mapleall/maple_me/src/lfo_loop_vec.cpp @@ -150,6 +150,30 @@ bool LoopTransPlan::Generate(const DoloopNode *doloop, const DoloopInfo* li, boo } return false; } + // if depdist is not zero + if (vecInfo->minTrueDepDist > 0 || vecInfo->maxAntiDepDist < 0) { + // true dep distance is less than vecLanes, return false + if ((vecInfo->minTrueDepDist > 0) && (vecInfo->minTrueDepDist < vecLanes)) { + if (enableDebug) { + LogInfo::MapleLogger() << "NOT VECTORIZABLE because true dependence distance less than veclanes in loop\n"; + } + return false; + } + // anti-dep distance doesn't break vectorization in case + // use before def like a[i] = a[i+1] + // if use is after def as following, distance less than vecLanes will break vectorization + // a[i] = + // = a[i+1] + // there's no extra information to describe sequence now + // we only handle one stmt in loopbody without considering anti-dep distance + if ((vecInfo->maxAntiDepDist < 0) && ((-vecInfo->maxAntiDepDist) < vecLanes) && + (doloop->GetDoBody()->GetFirst() != doloop->GetDoBody()->GetLast())) { + if (enableDebug) { + LogInfo::MapleLogger() << "NOT VECTORIZABLE because anti dependence distance less than veclanes in loop\n"; + } + return false; + } + } // compare trip count if lanes is larger than tripcount { BaseNode *initNode = doloop->GetStartExpr(); @@ -1580,10 +1604,7 @@ bool LoopVectorization::ExprVectorizable(DoloopInfo *doloopInfo, LoopVecInfo* ve case OP_constval: case OP_dread: case OP_addrof: { - PreMeMIRExtension* lfopart = (*PreMeExprExtensionMap)[x]; - CHECK_FATAL(lfopart, "nullptr check"); - BaseNode *parent = lfopart->GetParent(); - if (parent && parent->GetOpCode() == OP_array) { + if (isArraySub) { return true; } if (x->GetOpCode() == OP_constval) { @@ -1680,9 +1701,12 @@ bool LoopVectorization::ExprVectorizable(DoloopInfo *doloopInfo, LoopVecInfo* ve // supported n-ary ops case OP_array: { for (size_t i = 0; i < x->NumOpnds(); i++) { + isArraySub = true; if (!ExprVectorizable(doloopInfo, vecInfo, x->Opnd(i))) { + isArraySub = false; return false; } + isArraySub = false; } return true; } @@ -1896,8 +1920,7 @@ void LoopVectorization::Perform() { // step 2: collect information, legality check and generate transform plan MapleMap::iterator mapit = depInfo->doloopInfoMap.begin(); for (; mapit != depInfo->doloopInfoMap.end(); ++mapit) { - if (!mapit->second->children.empty() || - ((!mapit->second->Parallelizable()) && (!mapit->second->CheckReductionLoop()))) { + if (!mapit->second->children.empty() || mapit->second->NotParallel()) { continue; } // check in debug @@ -1905,6 +1928,12 @@ void LoopVectorization::Perform() { break; } LoopVecInfo *vecInfo = localMP->New(localAlloc); + if (mapit->second->HasTrueDepOnly() > 0) { + vecInfo->minTrueDepDist = mapit->second->HasTrueDepOnly(); + } + if (mapit->second->HasAntiDepOnly() < 0) { + vecInfo->maxAntiDepDist = mapit->second->HasAntiDepOnly(); + } bool vectorizable = Vectorizable(mapit->second, vecInfo, mapit->first->GetDoBody()); if (vectorizable) { LoopVectorization::vectorizedLoop++; -- Gitee