From 20f134f8cd76b4b58aecf5161513bede97af36d0 Mon Sep 17 00:00:00 2001 From: linma Date: Mon, 22 Nov 2021 08:46:49 -0800 Subject: [PATCH 1/2] lfoloopvec: fix a binary opcode vector type setting bug --- src/mapleall/maple_me/src/lfo_loop_vec.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mapleall/maple_me/src/lfo_loop_vec.cpp b/src/mapleall/maple_me/src/lfo_loop_vec.cpp index 30d3b1dc4b..1e814f5cdd 100644 --- a/src/mapleall/maple_me/src/lfo_loop_vec.cpp +++ b/src/mapleall/maple_me/src/lfo_loop_vec.cpp @@ -871,7 +871,12 @@ void LoopVectorization::VectorizeExpr(BaseNode *node, LoopTransPlan *tp, MapleVe BaseNode *vecn2 = i < vecopnd2.size() ? vecopnd2[i] : vecopnd2[0]; newbin->SetOpnd(vecn1, 0); newbin->SetOpnd(vecn2, 1); - newbin->SetPrimType(vecn1->GetPrimType()); // update primtype of binary op with opnd's type + if (GetVecLanes(vecn1->GetPrimType()) > 0) { + newbin->SetPrimType(vecn1->GetPrimType()); // update primtype of binary op with opnd's type + } else { + CHECK_FATAL(GetVecLanes(vecn2->GetPrimType()) > 0, "opnd2 should be vectype since opnd1 is scalar"); + newbin->SetPrimType(vecn2->GetPrimType()); // update primtype of binary op with opnd's type + } vectorizedNode.push_back(newbin); } } -- Gitee From e50ba57ba668011168b464f82c4025ee659b132b Mon Sep 17 00:00:00 2001 From: linma Date: Tue, 23 Nov 2021 20:26:42 -0800 Subject: [PATCH 2/2] lfo loopvec: can't vectorize doloop with complex condition node --- src/mapleall/maple_me/include/lfo_loop_vec.h | 4 +- src/mapleall/maple_me/src/lfo_loop_vec.cpp | 65 +++++++++++++------- 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/src/mapleall/maple_me/include/lfo_loop_vec.h b/src/mapleall/maple_me/include/lfo_loop_vec.h index 9333ee0bea..c16e857281 100644 --- a/src/mapleall/maple_me/include/lfo_loop_vec.h +++ b/src/mapleall/maple_me/include/lfo_loop_vec.h @@ -44,6 +44,7 @@ class LoopVecInfo { largestTypeSize = 8; // type bit size smallestTypeSize = 64; // i64 bit size currentRHSTypeSize = 0; + currentLHSTypeSize = 0; widenop = 0; hasRedvar = false; } @@ -54,6 +55,7 @@ class LoopVecInfo { uint32_t largestTypeSize; // largest size type in vectorizable stmtnodes uint32_t smallestTypeSize; // smallest size type in vectorizable stmtnodes uint32_t currentRHSTypeSize; // largest size of current stmt's RHS, this is temp value and update for each stmt + uint32_t currentLHSTypeSize; // record current stmt lhs type in vectorize phase uint32_t widenop; // can't handle t * t which t need widen operation bool hasRedvar; // loop has reduction variable // list of vectorizable stmtnodes in current loop, others can't be vectorized @@ -87,7 +89,7 @@ class LoopTransPlan { LoopVecInfo *vecInfo; // collect loop information BaseNode *const0Node; // zero const used in reduction variable // function - bool Generate(DoloopNode *, DoloopInfo *); + bool Generate(DoloopNode *, DoloopInfo *, bool); void GenerateBoundInfo(DoloopNode *, DoloopInfo *); }; diff --git a/src/mapleall/maple_me/src/lfo_loop_vec.cpp b/src/mapleall/maple_me/src/lfo_loop_vec.cpp index 1e814f5cdd..41612b2c7e 100644 --- a/src/mapleall/maple_me/src/lfo_loop_vec.cpp +++ b/src/mapleall/maple_me/src/lfo_loop_vec.cpp @@ -74,6 +74,7 @@ void LoopTransPlan::GenerateBoundInfo(DoloopNode *doloop, DoloopInfo *li) { MIRIntConst *newIncr = GlobalTables::GetIntConstTable().GetOrCreateIntConst( vecFactor * incrConst->GetValue(), *typeInt); ConstvalNode *newIncrNode = codeMP->New(PTY_i32, newIncr); + PrimType newOpndtype = (static_cast(condNode))->GetOpndType() == PTY_ptr ? PTY_i64 : PTY_i32; if (initNode->IsConstval()) { ConstvalNode *lcn = static_cast(initNode); MIRIntConst *lowConst = static_cast(lcn->GetConstVal()); @@ -104,16 +105,16 @@ void LoopTransPlan::GenerateBoundInfo(DoloopNode *doloop, DoloopInfo *li) { BinaryNode *divnode = nullptr; BaseNode *addnode = upNode; if (condOpHasEqual) { - addnode = codeMP->New(OP_add, PTY_i32, upNode, constOnenode); + addnode = codeMP->New(OP_add, newOpndtype, upNode, constOnenode); } if (lowvalue != 0) { - BinaryNode *subnode = codeMP->New(OP_sub, PTY_i32, addnode, initNode); - divnode = codeMP->New(OP_div, PTY_i32, subnode, newIncrNode); + BinaryNode *subnode = codeMP->New(OP_sub, newOpndtype, addnode, initNode); + divnode = codeMP->New(OP_div, newOpndtype, subnode, newIncrNode); } else { - divnode = codeMP->New(OP_div, PTY_i32, addnode, newIncrNode); + divnode = codeMP->New(OP_div, newOpndtype, addnode, newIncrNode); } - BinaryNode *mulnode = codeMP->New(OP_mul, PTY_i32, divnode, newIncrNode); - addnode = codeMP->New(OP_add, PTY_i32, mulnode, initNode); + BinaryNode *mulnode = codeMP->New(OP_mul, newOpndtype, divnode, newIncrNode); + addnode = codeMP->New(OP_add, newOpndtype, mulnode, initNode); vBound = localMP->New(nullptr, addnode, newIncrNode); // step2: generate epilog bound eBound = localMP->New(addnode, nullptr, nullptr); @@ -123,14 +124,14 @@ void LoopTransPlan::GenerateBoundInfo(DoloopNode *doloop, DoloopInfo *li) { // set bound of vectorized loop BinaryNode *subnode = nullptr; if (condOpHasEqual) { - BinaryNode *addnode = codeMP->New(OP_add, PTY_i32, upNode, constOnenode); - subnode = codeMP->New(OP_sub, PTY_i32, addnode, initNode); + BinaryNode *addnode = codeMP->New(OP_add, newOpndtype, upNode, constOnenode); + subnode = codeMP->New(OP_sub, newOpndtype, addnode, initNode); } else { - subnode = codeMP->New(OP_sub, PTY_i32, upNode, initNode); + subnode = codeMP->New(OP_sub, newOpndtype, upNode, initNode); } - BinaryNode *divnode = codeMP->New(OP_div, PTY_i32, subnode, newIncrNode); - BinaryNode *mulnode = codeMP->New(OP_mul, PTY_i32, divnode, newIncrNode); - BinaryNode *addnode = codeMP->New(OP_add, PTY_i32, mulnode, initNode); + BinaryNode *divnode = codeMP->New(OP_div, newOpndtype, subnode, newIncrNode); + BinaryNode *mulnode = codeMP->New(OP_mul, newOpndtype, divnode, newIncrNode); + BinaryNode *addnode = codeMP->New(OP_add, newOpndtype, mulnode, initNode); vBound = localMP->New(nullptr, addnode, newIncrNode); // set bound of epilog loop eBound = localMP->New(addnode, nullptr, nullptr); @@ -138,12 +139,15 @@ void LoopTransPlan::GenerateBoundInfo(DoloopNode *doloop, DoloopInfo *li) { } // generate best plan for current doloop -bool LoopTransPlan::Generate(DoloopNode *doloop, DoloopInfo* li) { +bool LoopTransPlan::Generate(DoloopNode *doloop, DoloopInfo* li, bool enableDebug) { // vector length / type size vecLanes = MAX_VECTOR_LENGTH_SIZE / (vecInfo->largestTypeSize); vecFactor = vecLanes; // return false if small type has no builtin vector type if (vecFactor * vecInfo->smallestTypeSize < 64) { + if (enableDebug) { + LogInfo::MapleLogger() << "NOT VECTORIZABLE because no builtin vector type for smallestType in loop\n"; + } return false; } // compare trip count if lanes is larger than tripcount @@ -152,6 +156,18 @@ bool LoopTransPlan::Generate(DoloopNode *doloop, DoloopInfo* li) { BaseNode *incrNode = doloop->GetIncrExpr(); BaseNode *condNode = doloop->GetCondExpr(); BaseNode *upNode = condNode->Opnd(1); + BaseNode *condOpnd0 = condNode->Opnd(0); + + // check opnd0 of condNode is an expression not a variable + // upperbound formula doesn't handle this case now + if (condOpnd0->GetOpCode() != OP_dread || + condOpnd0->GetOpCode() != OP_regread) { + if (enableDebug) { + LogInfo::MapleLogger() << "NOT VECTORIZABLE because of doloop condition compare is complex \n"; + } + return false; + } + bool condOpHasEqual = ((condNode->GetOpCode() == OP_le) || (condNode->GetOpCode() == OP_ge)); if (initNode->IsConstval() && upNode->IsConstval() && incrNode->IsConstval()) { ConstvalNode *lcn = static_cast(initNode); @@ -166,6 +182,9 @@ bool LoopTransPlan::Generate(DoloopNode *doloop, DoloopInfo* li) { upvalue += 1; } if (((upvalue - lowvalue) / (incrConst->GetValue())) < vecLanes) { + if (enableDebug) { + LogInfo::MapleLogger() << "NOT VECTORIZABLE because of doloop trip count is small \n"; + } return false; } } @@ -795,6 +814,7 @@ void LoopVectorization::VectorizeExpr(BaseNode *node, LoopTransPlan *tp, MapleVe PrimType optype = node->GetPrimType(); node->SetPrimType(vecType->GetPrimType()); if ((depth == 0) && + (tp->vecInfo->currentLHSTypeSize > GetPrimTypeSize(GetVecElemPrimType(vecType->GetPrimType()))) && ((GetPrimTypeSize(optype) / GetPrimTypeSize(GetVecElemPrimType(vecType->GetPrimType()))) > 2)) { // widen node type: split two nodes if (GetPrimTypeSize(vecType->GetPrimType()) == 16) { @@ -861,6 +881,7 @@ void LoopVectorization::VectorizeExpr(BaseNode *node, LoopTransPlan *tp, MapleVe // widen instruction (addl/subl) need two operands with same vectype if ((PTY_begin != GetVecElemPrimType(opnd0PrimType)) && (PTY_begin != GetVecElemPrimType(opnd1PrimType)) && + (tp->vecInfo->currentLHSTypeSize > GetPrimTypeSize(GetVecElemPrimType(opnd0PrimType))) && CanWidenOpcode(node, GetVecElemPrimType(opnd0PrimType))) { GenWidenBinaryExpr(binNode->GetOpCode(), vecopnd1, vecopnd2, vectorizedNode); } else { @@ -950,6 +971,7 @@ void LoopVectorization::VectorizeStmt(BaseNode *node, LoopTransPlan *tp) { MIRPtrType *ptrType = static_cast(&mirType); MIRType *lhsvecType = GenVecType(ptrType->GetPointedType()->GetPrimType(), tp->vecFactor); ASSERT(lhsvecType != nullptr, "vector type should not be null"); + tp->vecInfo->currentLHSTypeSize = GetPrimTypeSize(GetVecElemPrimType(lhsvecType->GetPrimType())); MIRType *pvecType = GlobalTables::GetTypeTable().GetOrCreatePointerType(*lhsvecType, PTY_ptr); // update lhs type iassign->SetTyIdx(pvecType->GetTypeIndex()); @@ -986,18 +1008,19 @@ void LoopVectorization::VectorizeStmt(BaseNode *node, LoopTransPlan *tp) { MapleVector vecOpnd(localAlloc.Adapter()); LfoPart *lfopart = (*lfoStmtParts)[dassign->GetStmtID()]; BlockNode *doloopbody = static_cast(lfopart->GetParent()); - // rhsvecNode : vectorizable_expr - BaseNode *rhsvecNode = dassign->GetRHS()->Opnd(1); - // skip vectorizing uniform node - if (tp->vecInfo->uniformNodes.find(rhsvecNode) == tp->vecInfo->uniformNodes.end()) { - VectorizeExpr(rhsvecNode, tp, vecOpnd, 0); - } RegreadNode *regReadlhsvec; if (tp->vecInfo->redVecNodes.find(dassign->GetStIdx()) != tp->vecInfo->redVecNodes.end()) { regReadlhsvec = static_cast(tp->vecInfo->redVecNodes[dassign->GetStIdx()]); } else { regReadlhsvec = GenVectorRedVarInit(dassign->GetStIdx(), tp); } + tp->vecInfo->currentLHSTypeSize = GetPrimTypeSize(GetVecElemPrimType(regReadlhsvec->GetPrimType())); + // skip vectorizing uniform node + // rhsvecNode : vectorizable_expr + BaseNode *rhsvecNode = dassign->GetRHS()->Opnd(1); + if (tp->vecInfo->uniformNodes.find(rhsvecNode) == tp->vecInfo->uniformNodes.end()) { + VectorizeExpr(rhsvecNode, tp, vecOpnd, 0); + } // use widen intrinsic if ((GetPrimTypeSize(GetVecElemPrimType(regReadlhsvec->GetPrimType())) * 8 * tp->vecFactor) > MAX_VECTOR_LENGTH_SIZE) { @@ -1540,10 +1563,8 @@ void LoopVectorization::Perform() { } // generate vectorize plan; LoopTransPlan *tplan = localMP->New(codeMP, localMP, vecInfo); - if (tplan->Generate(mapit->first, mapit->second)) { + if (tplan->Generate(mapit->first, mapit->second, enableDebug)) { vecPlans[mapit->first] = tplan; - } else if (enableDebug) { - LogInfo::MapleLogger() << "NOT VECTORIZABLED because of gap between smallest and largest type in loop \n"; } } // step 3: do transform -- Gitee