From 719e23cc838ad719a0c1110211eb3508a20d1c89 Mon Sep 17 00:00:00 2001 From: Alfred Huang Date: Tue, 29 Mar 2022 17:10:54 -0700 Subject: [PATCH 1/4] Fixed invalid "movi vX.2d, #const" errors. --- .../maple_be/src/cg/aarch64/aarch64_cgfunc.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp index 03cecca359..1dff6f3ab7 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp @@ -4370,11 +4370,18 @@ Operand *AArch64CGFunc::SelectShift(BinaryNode &node, Operand &opnd0, Operand &o isOneElemVector = symbol->GetAttr(ATTR_oneelem_simd); } + Operand *opd0 = &opnd0; + PrimType otyp0 = expr->GetPrimType(); + if (IsPrimitiveVector(dtype) && opnd0.IsConstImmediate()) { + opd0 = SelectVectorFromScalar(dtype, opd0, node.Opnd(0)->GetPrimType()); + otyp0 = dtype; + } + if (IsPrimitiveVector(dtype) && opnd1.IsConstImmediate()) { int64 sConst = static_cast(opnd1).GetValue(); - resOpnd = SelectVectorShiftImm(dtype, &opnd0, &opnd1, static_cast(sConst), opcode); + resOpnd = SelectVectorShiftImm(dtype, opd0, &opnd1, static_cast(sConst), opcode); } else if ((IsPrimitiveVector(dtype) || isOneElemVector) && !opnd1.IsConstImmediate()) { - resOpnd = SelectVectorShift(dtype, &opnd0, expr->GetPrimType(), &opnd1, node.Opnd(1)->GetPrimType(), opcode); + resOpnd = SelectVectorShift(dtype, opd0, otyp0, &opnd1, node.Opnd(1)->GetPrimType(), opcode); } else { PrimType primType = isFloat ? dtype : (is64Bits ? (isSigned ? PTY_i64 : PTY_u64) : (isSigned ? PTY_i32 : PTY_u32)); resOpnd = &GetOrCreateResOperand(parent, primType); @@ -9989,7 +9996,9 @@ RegOperand *AArch64CGFunc::SelectVectorImmMov(PrimType rType, Operand *src, Prim int64 val = static_cast(src)->GetValue(); /* copy the src imm operand to a reg if out of range */ - if ((GetPrimTypeSize(sType) > k4ByteSize && val != 0) || val < kMinImmVal || val > kMaxImmVal) { + if ((GetVecEleSize(rType) >= k64BitSize) || + (GetPrimTypeSize(sType) > k4ByteSize && val != 0) || + (val < kMinImmVal || val > kMaxImmVal)) { Operand *reg = &CreateRegisterOperandOfType(sType); SelectCopy(*reg, sType, *src, sType); return SelectVectorRegMov(rType, reg, sType); @@ -10271,7 +10280,7 @@ void AArch64CGFunc::PrepareVectorOperands(Operand **o1, PrimType &oty1, Operand bool immOpnd = false; if (opd->IsConstImmediate()) { int64 val = static_cast(opd)->GetValue(); - if (val >= kMinImmVal && val <= kMaxImmVal) { + if (val >= kMinImmVal && val <= kMaxImmVal && GetVecEleSize(rType) < k64BitSize) { immOpnd = true; } else { RegOperand *regOpd = &CreateRegisterOperandOfType(origTyp); -- Gitee From 4c2c0746d0922a2c1ce52435121ab288125195b7 Mon Sep 17 00:00:00 2001 From: Fred Chow Date: Tue, 29 Mar 2022 20:25:11 -0700 Subject: [PATCH 2/4] make dependency test take into account array bases may have aliases due to being pointers --- src/mapleall/maple_me/include/lfo_dep_test.h | 6 +-- src/mapleall/maple_me/src/lfo_dep_test.cpp | 54 ++++++++++++++------ 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/src/mapleall/maple_me/include/lfo_dep_test.h b/src/mapleall/maple_me/include/lfo_dep_test.h index 2e7987edbb..8e15f6e811 100644 --- a/src/mapleall/maple_me/include/lfo_dep_test.h +++ b/src/mapleall/maple_me/include/lfo_dep_test.h @@ -43,12 +43,12 @@ class SubscriptDesc{ class ArrayAccessDesc { public: ArrayNode *theArray; - OriginalSt *arrayOst = nullptr; // if null, array base is an invariant expression + MapleSet *arrayOstIdxSet = nullptr; // if null, array base is an invariant expression MapleVector subscriptVec; // describe the subscript of each array dimension - ArrayAccessDesc(MapleAllocator *alloc, ArrayNode *arr, OriginalSt *arryOst) + ArrayAccessDesc(MapleAllocator *alloc, ArrayNode *arr, MapleSet *arryOstSet) : theArray(arr), - arrayOst(arryOst), + arrayOstIdxSet(arryOstSet), subscriptVec(alloc->Adapter()) {} virtual ~ArrayAccessDesc() = default; }; diff --git a/src/mapleall/maple_me/src/lfo_dep_test.cpp b/src/mapleall/maple_me/src/lfo_dep_test.cpp index 5aeeb55e1c..d95bd925e2 100644 --- a/src/mapleall/maple_me/src/lfo_dep_test.cpp +++ b/src/mapleall/maple_me/src/lfo_dep_test.cpp @@ -287,32 +287,36 @@ ArrayAccessDesc *DoloopInfo::BuildOneArrayAccessDesc(ArrayNode *arr, BaseNode *p hasPtrAccess = true; return nullptr; } - // determine arryOst + // determine OStIdx for the array IvarMeExpr *ivarMeExpr = nullptr; - OriginalSt *arryOst = nullptr; + MapleSet *arryOstIdxSet = nullptr; OpMeExpr *arrayMeExpr = static_cast(depInfo->preEmit->GetMexpr(arr)); if (arrayMeExpr == nullptr || arrayMeExpr->GetOp() == OP_add) { // the array is converted from add } else if (parentNode->op == OP_iread) { ivarMeExpr = static_cast(depInfo->preEmit->GetMexpr(parentNode)); CHECK_FATAL(ivarMeExpr->GetMu() != nullptr, "BuildOneArrayAccessDesc: no mu corresponding to iread"); - arryOst = ivarMeExpr->GetMu()->GetOst(); + arryOstIdxSet = alloc->GetMemPool()->New>(alloc->Adapter()); + arryOstIdxSet->insert(ivarMeExpr->GetMu()->GetOst()->GetIndex()); } else if (parentNode->op == OP_iassign) { IassignMeStmt *iassMeStmt = static_cast(depInfo->preEmit-> GetMeStmt(static_cast(parentNode)->GetStmtID())); ivarMeExpr = iassMeStmt->GetLHSVal(); + arryOstIdxSet = alloc->GetMemPool()->New>(alloc->Adapter()); if (ivarMeExpr->GetMu()) { - arryOst = ivarMeExpr->GetMu()->GetOst(); + arryOstIdxSet->insert(ivarMeExpr->GetMu()->GetOst()->GetIndex()); } else { MapleMap *chiList = iassMeStmt->GetChiList(); CHECK_FATAL(!chiList->empty(), "BuildOneArrayAccessDesc: no chi corresponding to iassign"); - arryOst = depInfo->preMeFunc->meFunc->GetMeSSATab()->GetOriginalStFromID(chiList->begin()->first); + for (std::pair mapIt : *chiList) { + arryOstIdxSet->insert(mapIt.first); + } } } else { hasPtrAccess = true; return nullptr; } - ArrayAccessDesc *arrDesc = alloc->GetMemPool()->New(alloc, arr, arryOst); + ArrayAccessDesc *arrDesc = alloc->GetMemPool()->New(alloc, arr, arryOstIdxSet); if (parentNode->op == OP_iassign) { lhsArrays.push_back(arrDesc); } else { @@ -398,15 +402,26 @@ void DoloopInfo::CreateArrayAccessDesc(BlockNode *block) { } } +static bool ArrayOstIdxSetIntersect(MapleSet *set1, MapleSet *set2) { + for (OStIdx oidx1 : *set1) { + for (OStIdx oidx2 : *set2) { + if (oidx1 == oidx2) { + return true; + } + } + } + return false; +} + void DoloopInfo::CreateDepTestLists() { size_t i, j; for (i = 0; i < lhsArrays.size(); ++i) { for (j = i + 1; j < lhsArrays.size(); ++j) { - if (lhsArrays[i]->arrayOst != nullptr && lhsArrays[j]->arrayOst != nullptr) { - if (lhsArrays[i]->arrayOst->IsSameSymOrPreg(lhsArrays[j]->arrayOst)) { + if (lhsArrays[i]->arrayOstIdxSet != nullptr && lhsArrays[j]->arrayOstIdxSet != nullptr) { + if (ArrayOstIdxSetIntersect(lhsArrays[i]->arrayOstIdxSet, lhsArrays[j]->arrayOstIdxSet)) { (void)outputDepTestList.emplace_back(DepTestPair(i, j)); } - } else if (lhsArrays[i]->arrayOst == nullptr && lhsArrays[j]->arrayOst == nullptr) { + } else if (lhsArrays[i]->arrayOstIdxSet == nullptr && lhsArrays[j]->arrayOstIdxSet == nullptr) { BaseNode *arry0 = lhsArrays[i]->theArray->Opnd(0); BaseNode *arry1 = lhsArrays[j]->theArray->Opnd(0); if (depInfo->preEmit->GetMexpr(arry0) == depInfo->preEmit->GetMexpr(arry1)) { @@ -417,11 +432,11 @@ void DoloopInfo::CreateDepTestLists() { } for (i = 0; i < lhsArrays.size(); ++i) { for (j = 0; j < rhsArrays.size(); ++j) { - if (lhsArrays[i]->arrayOst != nullptr && rhsArrays[j]->arrayOst != nullptr) { - if (lhsArrays[i]->arrayOst->IsSameSymOrPreg(rhsArrays[j]->arrayOst)) { + if (lhsArrays[i]->arrayOstIdxSet != nullptr && rhsArrays[j]->arrayOstIdxSet != nullptr) { + if (ArrayOstIdxSetIntersect(lhsArrays[i]->arrayOstIdxSet, rhsArrays[j]->arrayOstIdxSet)) { (void)flowDepTestList.emplace_back(DepTestPair(i, j)); } - } else if (lhsArrays[i]->arrayOst == nullptr && rhsArrays[j]->arrayOst == nullptr) { + } else if (lhsArrays[i]->arrayOstIdxSet == nullptr && rhsArrays[j]->arrayOstIdxSet == nullptr) { BaseNode *arry0 = lhsArrays[i]->theArray->Opnd(0); BaseNode *arry1 = rhsArrays[j]->theArray->Opnd(0); if (depInfo->preEmit->GetMexpr(arry0) == depInfo->preEmit->GetMexpr(arry1)) { @@ -676,10 +691,14 @@ void LfoDepInfo::PerformDepTest() { for (i = 0; i < doloopInfo->lhsArrays.size(); ++i) { ArrayAccessDesc *arrAcc = doloopInfo->lhsArrays[i]; LogInfo::MapleLogger() << "(L" << i << ") "; - if (arrAcc->arrayOst == nullptr) { + if (arrAcc->arrayOstIdxSet == nullptr) { arrAcc->theArray->Opnd(0)->Dump(0); } else { - arrAcc->arrayOst->Dump(); + for (OStIdx oidx : *arrAcc->arrayOstIdxSet) { + OriginalSt *ost = preMeFunc->meFunc->GetMeSSATab()->GetOriginalStFromID(oidx); + ost->Dump(); + LogInfo::MapleLogger() << "| "; + } } LogInfo::MapleLogger() << " subscripts:"; for (SubscriptDesc *subs : arrAcc->subscriptVec) { @@ -704,10 +723,13 @@ void LfoDepInfo::PerformDepTest() { for (i = 0; i < doloopInfo->rhsArrays.size(); ++i) { ArrayAccessDesc *arrAcc = doloopInfo->rhsArrays[i]; LogInfo::MapleLogger() << "(R" << i << ") "; - if (arrAcc->arrayOst == nullptr) { + if (arrAcc->arrayOstIdxSet == nullptr) { arrAcc->theArray->Opnd(0)->Dump(0); } else { - arrAcc->arrayOst->Dump(); + for (OStIdx oidx : *arrAcc->arrayOstIdxSet) { + OriginalSt *ost = preMeFunc->meFunc->GetMeSSATab()->GetOriginalStFromID(oidx); + ost->Dump(); + } } LogInfo::MapleLogger() << " subscripts:"; for (SubscriptDesc *subs : arrAcc->subscriptVec) { -- Gitee From 46ee1bd74c1f7e8e6f60394e6d5b772249cadc51 Mon Sep 17 00:00:00 2001 From: linma Date: Tue, 29 Mar 2022 21:05:09 -0700 Subject: [PATCH 3/4] lfoloopvec: support two cases in a vectorizable loop, scalar variable is induction variable and lhs is iassign but used like a scalar --- src/mapleall/maple_me/include/lfo_dep_test.h | 1 + src/mapleall/maple_me/include/lfo_loop_vec.h | 35 +- src/mapleall/maple_me/include/lfo_unroll.h | 1 - src/mapleall/maple_me/src/lfo_dep_test.cpp | 14 + src/mapleall/maple_me/src/lfo_loop_vec.cpp | 388 ++++++++++++------ src/mapleall/maple_me/src/lfo_unroll.cpp | 18 +- .../maple_me/src/me_value_range_prop.cpp | 3 +- 7 files changed, 313 insertions(+), 147 deletions(-) diff --git a/src/mapleall/maple_me/include/lfo_dep_test.h b/src/mapleall/maple_me/include/lfo_dep_test.h index 8e15f6e811..d3ec059ca1 100644 --- a/src/mapleall/maple_me/include/lfo_dep_test.h +++ b/src/mapleall/maple_me/include/lfo_dep_test.h @@ -98,6 +98,7 @@ class DoloopInfo { ~DoloopInfo() = default; bool IsLoopInvariant(MeExpr *x); bool IsLoopInvariant2(BaseNode *x); + bool IsLoopIVNode(const BaseNode *) const; bool OnlyInvariantScalars(MeExpr *x); SubscriptDesc *BuildOneSubscriptDesc(BaseNode *subsX); ArrayAccessDesc *BuildOneArrayAccessDesc(ArrayNode *arr, BaseNode *parent); diff --git a/src/mapleall/maple_me/include/lfo_loop_vec.h b/src/mapleall/maple_me/include/lfo_loop_vec.h index 8a7cebc44f..e60df3915b 100644 --- a/src/mapleall/maple_me/include/lfo_loop_vec.h +++ b/src/mapleall/maple_me/include/lfo_loop_vec.h @@ -39,6 +39,9 @@ class LoopVecInfo { uniformVecNodes(alloc.Adapter()), constvalTypes(alloc.Adapter()), redVecNodes(alloc.Adapter()), + reductionStmts(alloc.Adapter()), + ivNodes(alloc.Adapter()), + ivVecNodes(alloc.Adapter()), beforeLoopStmts(alloc.Adapter()), afterLoopStmts(alloc.Adapter()) { largestTypeSize = 8; // type bit size @@ -48,7 +51,8 @@ class LoopVecInfo { widenop = 0; minTrueDepDist = 0; maxAntiDepDist = 0; - hasRedvar = false; + ivConstArraySym = nullptr; + ivvecIncrStmt = nullptr; } virtual ~LoopVecInfo() = default; void UpdateWidestTypeSize(uint32_t); @@ -61,16 +65,20 @@ class LoopVecInfo { uint32_t widenop; // can't handle t * t which t need widen operation int16_t minTrueDepDist; int16_t maxAntiDepDist; // negative value - bool hasRedvar; // loop has reduction variable // list of vectorizable stmtnodes in current loop, others can't be vectorized MapleSet vecStmtIDs; MapleSet uniformNodes; // loop invariable scalar set MapleMap uniformVecNodes; // new generated vector node // constval node need to adjust with new PrimType MapleMap constvalTypes; - MapleMap redVecNodes; // new generate vector node + MapleMap redVecNodes; // new generate vector node + MapleSet reductionStmts; // loop invariable scalar set + MapleSet ivNodes; // induction variable used in stmt + MapleMap ivVecNodes; // induction variable used in stmt MapleVector beforeLoopStmts; MapleVector afterLoopStmts; + MIRSymbol *ivConstArraySym; + StmtNode *ivvecIncrStmt; }; // tranform plan for current loop @@ -79,7 +87,6 @@ class LoopTransPlan { LoopTransPlan(MemPool *mp, MemPool *localmp, LoopVecInfo *info) : vBound(nullptr), eBound(nullptr), codeMP(mp), localMP(localmp), vecInfo(info) { vecFactor = 1; - const0Node = nullptr; } ~LoopTransPlan() = default; LoopBound *vBound = nullptr; // bound of vectorized part @@ -91,7 +98,6 @@ class LoopTransPlan { MemPool *codeMP = nullptr; // use to generate new bound node MemPool *localMP = nullptr; // use to generate local info LoopVecInfo *vecInfo = nullptr; // collect loop information - BaseNode *const0Node = nullptr; // zero const used in reduction variable // function bool Generate(const DoloopNode *, const DoloopInfo *, bool); void GenerateBoundInfo(const DoloopNode *doloop, const DoloopInfo *li); @@ -108,6 +114,10 @@ class LoopVectorization { codeMP = lfoEmit->GetCodeMP(); codeMPAlloc = lfoEmit->GetCodeMPAlloc(); localMP = localmp; + const0Node = nullptr; + initIVv4Sym = nullptr; + initIVv8Sym = nullptr; + initIVv2Sym = nullptr; isArraySub = false; enableDebug = debug; } @@ -145,10 +155,15 @@ class LoopVectorization { IntrinsicopNode *GenVectorNarrowLowNode(BaseNode *, PrimType); void GenWidenBinaryExpr(Opcode, MapleVector&, MapleVector&, MapleVector&); BaseNode* ConvertNodeType(bool, BaseNode*); - RegreadNode* GenVectorRedVarInit(StIdx, LoopTransPlan *); MIRIntrinsicID GenVectorAbsSublID(MIRIntrinsicID intrnID) const; - static uint32_t vectorizedLoop; + private: + RegreadNode* GenVectorReductionVar(StmtNode* , LoopTransPlan *); + bool IassignIsReduction(IassignNode *iassign, LoopVecInfo* vecInfo); + RegreadNode *GetorNewVectorReductionVar(StmtNode *stmt, LoopTransPlan *tp); + MIRType *VectorizeIassignLhs(IassignNode *iassign, LoopTransPlan *tp); + void VectorizeReductionStmt(StmtNode *stmt, LoopTransPlan *tp); + void GenConstVar(LoopVecInfo *, uint8_t); private: MIRFunction *mirFunc; // point to PreMeStmtExtensionMap of PreMeEmitter, key is stmtID @@ -160,7 +175,11 @@ class LoopVectorization { MapleAllocator *codeMPAlloc; MemPool *localMP; // local mempool MapleAllocator localAlloc; - MapleMap vecPlans; // each vectoriable loopnode has its best vectorization plan + MapleMap vecPlans; // each loopnode has its best vectorization plan + BaseNode *const0Node = nullptr; // zero const used in reduction variable + MIRSymbol *initIVv4Sym = nullptr; // constant array symbol used by vectorizing induction variable + MIRSymbol *initIVv8Sym = nullptr; + MIRSymbol *initIVv2Sym = nullptr; bool isArraySub; // current expression is used in array subscript bool enableDebug; }; diff --git a/src/mapleall/maple_me/include/lfo_unroll.h b/src/mapleall/maple_me/include/lfo_unroll.h index f0fbd36166..381540753d 100644 --- a/src/mapleall/maple_me/include/lfo_unroll.h +++ b/src/mapleall/maple_me/include/lfo_unroll.h @@ -32,7 +32,6 @@ class LfoUnrollOneLoop { mirBuilder(mirModule->GetMIRBuilder()) {} ~LfoUnrollOneLoop() = default; BaseNode *CloneIVNode(); - bool IsIVNode(const BaseNode *x) const; void ReplaceIV(BaseNode *x, BaseNode *repNode); BlockNode *DoFullUnroll(size_t tripCount); BlockNode *DoUnroll(size_t times, size_t tripCount); diff --git a/src/mapleall/maple_me/src/lfo_dep_test.cpp b/src/mapleall/maple_me/src/lfo_dep_test.cpp index d95bd925e2..ec93c5075f 100644 --- a/src/mapleall/maple_me/src/lfo_dep_test.cpp +++ b/src/mapleall/maple_me/src/lfo_dep_test.cpp @@ -147,6 +147,20 @@ bool DoloopInfo::IsLoopInvariant2(BaseNode *x) { return true; } +bool DoloopInfo::IsLoopIVNode(const BaseNode *x) const { + if (doloop->IsPreg()) { + if (x->GetOpCode() != OP_regread) { + return false; + } + return static_cast(x)->GetRegIdx() == doloop->GetDoVarPregIdx(); + } else { + if (x->GetOpCode() != OP_dread) { + return false; + } + return static_cast(x)->GetStIdx() == doloop->GetDoVarStIdx(); + } +} + // check if all the scalars contained in x are loop-invariant unless it is IV bool DoloopInfo::OnlyInvariantScalars(MeExpr *x) { if (x == nullptr) { diff --git a/src/mapleall/maple_me/src/lfo_loop_vec.cpp b/src/mapleall/maple_me/src/lfo_loop_vec.cpp index b3a637800b..1a9ae9394a 100644 --- a/src/mapleall/maple_me/src/lfo_loop_vec.cpp +++ b/src/mapleall/maple_me/src/lfo_loop_vec.cpp @@ -220,12 +220,6 @@ bool LoopTransPlan::Generate(const DoloopNode *doloop, const DoloopInfo* li, boo } } } - // create zero node - if (vecInfo->hasRedvar) { - MIRType &typeInt = *GlobalTables::GetTypeTable().GetPrimType(PTY_i32); - MIRIntConst *constZero = GlobalTables::GetIntConstTable().GetOrCreateIntConst(0, typeInt); - const0Node = codeMP->New(PTY_i32, constZero); - } // generate bound information GenerateBoundInfo(doloop, li); return true; @@ -1142,19 +1136,40 @@ IntrinsicopNode *LoopVectorization::GenVectorNarrowLowNode(BaseNode *opnd, PrimT } // create vectorized preg for reduction var and its init stmt vpreg = dup_scalar(0) -RegreadNode *LoopVectorization::GenVectorRedVarInit(StIdx redStIdx, LoopTransPlan *tp) { - MIRSymbol *lhsSym = mirFunc->GetLocalOrGlobalSymbol(redStIdx); - MIRType &lhsType = GetTypeFromTyIdx(lhsSym->GetTyIdx()); - uint32_t lhstypesize = GetPrimTypeSize(lhsType.GetPrimType()) * 8; - uint32_t lhsMaxLanes = ((MAX_VECTOR_LENGTH_SIZE / lhstypesize) < tp->vecFactor) ? - (MAX_VECTOR_LENGTH_SIZE / lhstypesize) : tp->vecFactor; - MIRType *lhsvecType = GenVecType(lhsType.GetPrimType(), static_cast(lhsMaxLanes)); +RegreadNode *LoopVectorization::GenVectorReductionVar(StmtNode *stmt, LoopTransPlan *tp) { + PrimType lhsType; + uint8 lhsLanes = tp->vecFactor; + if (stmt->GetOpCode() == OP_dassign) { + StIdx redStIdx = (static_cast(stmt))->GetStIdx(); + MIRSymbol *lhsSym = mirFunc->GetLocalOrGlobalSymbol(redStIdx); + lhsType = GetTypeFromTyIdx(lhsSym->GetTyIdx()).GetPrimType(); + uint32_t lhstypesize = GetPrimTypeSize(lhsType) * 8; + uint32_t lhsMaxLanes = ((MAX_VECTOR_LENGTH_SIZE / lhstypesize) < tp->vecFactor) ? + (MAX_VECTOR_LENGTH_SIZE / lhstypesize) : tp->vecFactor; + lhsLanes = lhsMaxLanes; + } else { + IassignNode *iassign = static_cast(stmt); + MIRType &mirType = GetTypeFromTyIdx(iassign->GetTyIdx()); + lhsType = (static_cast(&mirType))->GetPointedType()->GetPrimType(); + } + MIRType *lhsvecType = GenVecType(lhsType, static_cast(lhsLanes)); PregIdx reglhsvec = mirFunc->GetPregTab()->CreatePreg(lhsvecType->GetPrimType()); - IntrinsicopNode *lhsvecIntrn = GenDupScalarExpr(tp->const0Node, lhsvecType->GetPrimType()); + IntrinsicopNode *lhsvecIntrn = GenDupScalarExpr(const0Node, lhsvecType->GetPrimType()); RegassignNode *initlhsvec = codeMP->New(lhsvecType->GetPrimType(), reglhsvec, lhsvecIntrn); tp->vecInfo->beforeLoopStmts.push_back(initlhsvec); RegreadNode *regReadlhsvec = codeMP->New(lhsvecType->GetPrimType(), reglhsvec); - tp->vecInfo->redVecNodes[redStIdx] = regReadlhsvec; + tp->vecInfo->redVecNodes[stmt] = regReadlhsvec; + return regReadlhsvec; +} + +// check vectorized preg is generated for current stmt +RegreadNode *LoopVectorization::GetorNewVectorReductionVar(StmtNode *stmt, LoopTransPlan *tp) { + RegreadNode *regReadlhsvec; + if (tp->vecInfo->redVecNodes.find(stmt) != tp->vecInfo->redVecNodes.end()) { + regReadlhsvec = static_cast(tp->vecInfo->redVecNodes[stmt]); + } else { + regReadlhsvec = GenVectorReductionVar(stmt, tp); + } return regReadlhsvec; } @@ -1330,8 +1345,13 @@ void LoopVectorization::VectorizeExpr(BaseNode *node, LoopTransPlan *tp, MapleVe } case OP_dread: case OP_constval: { - // donothing - vectorizedNode.push_back(node); + if (tp->vecInfo->ivNodes.find(node) != tp->vecInfo->ivNodes.end()) { + BaseNode *vecnode = tp->vecInfo->ivVecNodes[node]; + vectorizedNode.push_back(vecnode); + } else { + // donothing + vectorizedNode.push_back(node); + } break; } default: @@ -1340,6 +1360,76 @@ void LoopVectorization::VectorizeExpr(BaseNode *node, LoopTransPlan *tp, MapleVe return; } +// set lhs type to vector type and return lhs pointto type +MIRType *LoopVectorization::VectorizeIassignLhs(IassignNode *iassign, LoopTransPlan *tp) { + MIRType &mirType = GetTypeFromTyIdx(iassign->GetTyIdx()); + CHECK_FATAL(mirType.GetKind() == kTypePointer, "iassign must have pointer type"); + MIRPtrType *ptrType = static_cast(&mirType); + MIRType *lhsvecType = GenVecType(ptrType->GetPointedType()->GetPrimType(), tp->vecFactor); + ASSERT(lhsvecType != nullptr, "vector type should not be null"); + tp->vecInfo->currentLHSTypeSize = GetPrimTypeSize(GetVecElemPrimType(lhsvecType->GetPrimType())); + MIRType *pvecType = GlobalTables::GetTypeTable().GetOrCreatePointerType(*lhsvecType, PTY_ptr); + iassign->SetTyIdx(pvecType->GetTypeIndex()); + return lhsvecType; +} + +void LoopVectorization::VectorizeReductionStmt(StmtNode *stmt, LoopTransPlan *tp) { + MapleVector vecOpnd(localAlloc.Adapter()); + PreMeMIRExtension *lfopart = (*PreMeStmtExtensionMap)[stmt->GetStmtID()]; + BlockNode *doloopbody = static_cast(lfopart->GetParent()); + RegreadNode *regReadlhsvec = GetorNewVectorReductionVar(stmt, tp); + tp->vecInfo->currentLHSTypeSize = GetPrimTypeSize(GetVecElemPrimType(regReadlhsvec->GetPrimType())); + // skip vectorizing uniform node + // rhsvecNode : vectorizable_expr + BaseNode *rhsvecNode = stmt->GetRHS()->Opnd(1); + if (tp->vecInfo->uniformNodes.find(rhsvecNode) == tp->vecInfo->uniformNodes.end()) { + VectorizeExpr(rhsvecNode, tp, vecOpnd, 0); + } else { + // rhs is uniform + vecOpnd.push_back(rhsvecNode); + } + // use widen intrinsic + if ((GetPrimTypeSize(GetVecElemPrimType(regReadlhsvec->GetPrimType())) * 8 * tp->vecFactor) > + MAX_VECTOR_LENGTH_SIZE) { + BaseNode *currVecNode = nullptr; + PrimType currVecType; + for (size_t i = 0; i < vecOpnd.size(); i++) { + currVecNode = vecOpnd[i]; + currVecType = currVecNode->GetPrimType(); + // need widen + if (GetPrimTypeSize(GetVecElemPrimType(regReadlhsvec->GetPrimType())) > + GetPrimTypeSize(GetVecElemPrimType(currVecType))) { + ASSERT(((GetVecEleSize(regReadlhsvec->GetPrimType())) / GetVecEleSize(currVecType) == 2) && + (GetVecLanes(regReadlhsvec->GetPrimType()) * 2 == GetVecLanes(currVecType)) , "type check"); + IntrinsicopNode *pairwiseWidenAddIntrn = GenVectorPairWiseAccumulate(regReadlhsvec, + currVecNode, currVecType); + RegassignNode *regassign3 = codeMP->New(regReadlhsvec->GetPrimType(), + regReadlhsvec->GetRegIdx(), pairwiseWidenAddIntrn); + doloopbody->InsertBefore(stmt, regassign3); + } else { + BinaryNode *binaryNode = codeMP->New(OP_add, regReadlhsvec->GetPrimType(), regReadlhsvec, + currVecNode); + RegassignNode *regassign = codeMP->New(regReadlhsvec->GetPrimType(), + regReadlhsvec->GetRegIdx(), binaryNode); + doloopbody->InsertBefore(stmt, regassign); + } + } + } else { + BinaryNode *binaryNode = codeMP->New(OP_add, regReadlhsvec->GetPrimType(), regReadlhsvec, + vecOpnd[0]); + RegassignNode *regassign1 = codeMP->New(regReadlhsvec->GetPrimType(), + regReadlhsvec->GetRegIdx(), binaryNode); + doloopbody->InsertBefore(stmt, regassign1); + } + // red = red +/- sum_vec(redvec) + StmtNode *copyStmt = stmt->CloneTree(*codeMPAlloc); + IntrinsicopNode *intrnvecSum = GenSumVecStmt(regReadlhsvec, regReadlhsvec->GetPrimType()); + copyStmt->GetRHS()->SetOpnd(intrnvecSum, 1); + tp->vecInfo->afterLoopStmts.push_back(copyStmt); + doloopbody->RemoveStmt(stmt); + return; +} + // iterate tree node to vectorize scalar type to vector type // following opcode can be vectorized directly // +, -, *, &, |, <<, >>, compares, ~, ! @@ -1351,42 +1441,38 @@ void LoopVectorization::VectorizeStmt(BaseNode *node, LoopTransPlan *tp) { switch (node->GetOpCode()) { case OP_iassign: { IassignNode *iassign = static_cast(node); - // change lsh type to vector type - MIRType &mirType = GetTypeFromTyIdx(iassign->GetTyIdx()); - CHECK_FATAL(mirType.GetKind() == kTypePointer, "iassign must have pointer type"); - MIRPtrType *ptrType = static_cast(&mirType); - MIRType *lhsvecType = GenVecType(ptrType->GetPointedType()->GetPrimType(), tp->vecFactor); - ASSERT(lhsvecType != nullptr, "vector type should not be null"); - tp->vecInfo->currentLHSTypeSize = GetPrimTypeSize(GetVecElemPrimType(lhsvecType->GetPrimType())); - MIRType *pvecType = GlobalTables::GetTypeTable().GetOrCreatePointerType(*lhsvecType, PTY_ptr); - // update lhs type - iassign->SetTyIdx(pvecType->GetTypeIndex()); - // visit rsh - BaseNode *rhs = iassign->GetRHS(); - BaseNode *newrhs; - if (tp->vecInfo->uniformVecNodes.find(rhs) != tp->vecInfo->uniformVecNodes.end()) { - // rhs replaced scalar node with vector node - newrhs = tp->vecInfo->uniformVecNodes[rhs]; - if (GetPrimTypeSize(GetVecElemPrimType(newrhs->GetPrimType())) < tp->vecInfo->currentLHSTypeSize) { - newrhs = (BaseNode *)GenVectorWidenOpnd(newrhs, newrhs->GetPrimType(), false); - } + // node is reduction stmt + if (tp->vecInfo->reductionStmts.find(iassign) != tp->vecInfo->reductionStmts.end()) { + VectorizeReductionStmt(static_cast(node), tp); } else { - MapleVector vecRhs(localAlloc.Adapter()); - VectorizeExpr(iassign->GetRHS(), tp, vecRhs, 0); - ASSERT(vecRhs.size() == 1, "iassign doesn't handle complex type cvt now"); - // insert CVT if lsh type is not same as rhs type - newrhs = vecRhs[0]; - } - if ((IsSignedInteger(lhsvecType->GetPrimType()) && IsUnsignedInteger(newrhs->GetPrimType())) || - (IsUnsignedInteger(lhsvecType->GetPrimType()) && IsUnsignedInteger(newrhs->GetPrimType()))) { - newrhs = ConvertNodeType(IsSignedInteger(lhsvecType->GetPrimType()), newrhs); + MIRType *lhsptvecType = VectorizeIassignLhs(iassign, tp); + BaseNode *rhs = iassign->GetRHS(); + BaseNode *newrhs; + if (tp->vecInfo->uniformVecNodes.find(rhs) != tp->vecInfo->uniformVecNodes.end()) { + // rhs replaced scalar node with vector node + newrhs = tp->vecInfo->uniformVecNodes[rhs]; + if (GetPrimTypeSize(GetVecElemPrimType(newrhs->GetPrimType())) < tp->vecInfo->currentLHSTypeSize) { + newrhs = (BaseNode *)GenVectorWidenOpnd(newrhs, newrhs->GetPrimType(), false); + } + } else { + MapleVector vecRhs(localAlloc.Adapter()); + VectorizeExpr(rhs, tp, vecRhs, 0); + ASSERT(vecRhs.size() == 1, "iassign doesn't handle complex type cvt now"); + // insert CVT if lsh type is not same as rhs type + newrhs = vecRhs[0]; + } + // if need cvt for sign/unsign + if ((IsSignedInteger(lhsptvecType->GetPrimType()) && IsUnsignedInteger(newrhs->GetPrimType())) || + (IsUnsignedInteger(lhsptvecType->GetPrimType()) && IsUnsignedInteger(newrhs->GetPrimType()))) { + newrhs = ConvertNodeType(IsSignedInteger(lhsptvecType->GetPrimType()), newrhs); + } + iassign->SetRHS(newrhs); } - iassign->SetRHS(newrhs); break; } // scalar related: widen type directly or unroll instructions case OP_dassign: { - // now only support reduction scalar + // handle reduction stmt // sum = sum +/- vectorizable_expr // => // Example: vec t1 = dup_scalar(0); @@ -1394,63 +1480,9 @@ void LoopVectorization::VectorizeStmt(BaseNode *node, LoopTransPlan *tp) { // Example: t1 = t1 + vectorized_node; // Example: } // sum = sum +/- intrinsic_op vec_sum(t1) - DassignNode *dassign = static_cast(node); - MapleVector vecOpnd(localAlloc.Adapter()); - PreMeMIRExtension *lfopart = (*PreMeStmtExtensionMap)[dassign->GetStmtID()]; - BlockNode *doloopbody = static_cast(lfopart->GetParent()); - RegreadNode *regReadlhsvec; - if (tp->vecInfo->redVecNodes.find(dassign->GetStIdx()) != tp->vecInfo->redVecNodes.end()) { - regReadlhsvec = static_cast(tp->vecInfo->redVecNodes[dassign->GetStIdx()]); - } else { - regReadlhsvec = GenVectorRedVarInit(dassign->GetStIdx(), tp); - } - tp->vecInfo->currentLHSTypeSize = GetPrimTypeSize(GetVecElemPrimType(regReadlhsvec->GetPrimType())); - // skip vectorizing uniform node - // rhsvecNode : vectorizable_expr - BaseNode *rhsvecNode = dassign->GetRHS()->Opnd(1); - if (tp->vecInfo->uniformNodes.find(rhsvecNode) == tp->vecInfo->uniformNodes.end()) { - VectorizeExpr(rhsvecNode, tp, vecOpnd, 0); - } - // use widen intrinsic - if ((GetPrimTypeSize(GetVecElemPrimType(regReadlhsvec->GetPrimType())) * 8 * tp->vecFactor) > - MAX_VECTOR_LENGTH_SIZE) { - BaseNode *currVecNode = nullptr; - PrimType currVecType; - for (size_t i = 0; i < vecOpnd.size(); i++) { - currVecNode = vecOpnd[i]; - currVecType = currVecNode->GetPrimType(); - // need widen - if (GetPrimTypeSize(GetVecElemPrimType(regReadlhsvec->GetPrimType())) > - GetPrimTypeSize(GetVecElemPrimType(currVecType))) { - ASSERT(((GetVecEleSize(regReadlhsvec->GetPrimType())) / GetVecEleSize(currVecType) == 2) && - (GetVecLanes(regReadlhsvec->GetPrimType()) * 2 == GetVecLanes(currVecType)) , "type check"); - IntrinsicopNode *pairwiseWidenAddIntrn = GenVectorPairWiseAccumulate(regReadlhsvec, - currVecNode, currVecType); - RegassignNode *regassign3 = codeMP->New(regReadlhsvec->GetPrimType(), - regReadlhsvec->GetRegIdx(), pairwiseWidenAddIntrn); - doloopbody->InsertBefore(dassign, regassign3); - } else { - BinaryNode *binaryNode = codeMP->New(OP_add, regReadlhsvec->GetPrimType(), regReadlhsvec, - currVecNode); - RegassignNode *regassign = codeMP->New(regReadlhsvec->GetPrimType(), - regReadlhsvec->GetRegIdx(), binaryNode); - doloopbody->InsertBefore(dassign, regassign); - } - } - } else { - BinaryNode *binaryNode = codeMP->New(OP_add, regReadlhsvec->GetPrimType(), regReadlhsvec, - rhsvecNode); - RegassignNode *regassign1 = codeMP->New(regReadlhsvec->GetPrimType(), - regReadlhsvec->GetRegIdx(), binaryNode); - doloopbody->InsertBefore(dassign, regassign1); - } - // red = red +/- sum_vec(redvec) - DassignNode *dassignCopy = dassign->CloneTree(*codeMPAlloc); - IntrinsicopNode *intrnvecSum = GenSumVecStmt(regReadlhsvec, regReadlhsvec->GetPrimType()); - dassignCopy->GetRHS()->SetOpnd(intrnvecSum, 1); - tp->vecInfo->afterLoopStmts.push_back(dassignCopy); - doloopbody->RemoveStmt(dassign); - + StmtNode *dassign = static_cast(node); + ASSERT(tp->vecInfo->reductionStmts.find(dassign) != tp->vecInfo->reductionStmts.end(), "dassign should be reduction stmt"); + VectorizeReductionStmt(dassign, tp); break; } default: @@ -1489,7 +1521,8 @@ void LoopVectorization::VectorizeDoLoop(DoloopNode *doloop, LoopTransPlan *tp) { widenDoloop(doloop, tp); // step 2: insert dup stmt before doloop - if (!tp->vecInfo->uniformNodes.empty()) { + if ((!tp->vecInfo->uniformNodes.empty()) || + (!tp->vecInfo->ivNodes.empty())) { PreMeMIRExtension *lfopart = (*PreMeStmtExtensionMap)[doloop->GetStmtID()]; BaseNode *parent = lfopart->GetParent(); ASSERT(parent && (parent->GetOpCode() == OP_block), "nullptr check"); @@ -1521,6 +1554,42 @@ void LoopVectorization::VectorizeDoLoop(DoloopNode *doloop, LoopTransPlan *tp) { tp->vecInfo->uniformVecNodes[node] = regreadNode; } } + if (!tp->vecInfo->ivNodes.empty()) { + // initconst array -> [0, 1, 2... veclanes-1] + // vtype reg = iread array + initval + // { + // reg = reg + veclanes; + // } + MIRType *elemType = static_cast(tp->vecInfo->ivConstArraySym->GetType())->GetElemType(); + MIRType *vecType = GenVecType(elemType->GetPrimType(), tp->vecLanes); + MIRType *ptvecType = GlobalTables::GetTypeTable().GetOrCreatePointerType(*vecType); + PregIdx regIdx = mirFunc->GetPregTab()->CreatePreg(vecType->GetPrimType()); + BaseNode *addr = codeMP->New(OP_addrof, PTY_ptr, tp->vecInfo->ivConstArraySym->GetStIdx(), 0); + IreadNode *ireadNode = codeMP->New(OP_iread, vecType->GetPrimType(), + ptvecType->GetTypeIndex(), 0, addr); + BaseNode *rhs = ireadNode; + if ((!doloop->GetStartExpr()->IsConstval()) || + (static_cast(static_cast(doloop->GetStartExpr())->GetConstVal())->GetValue() != 0)) { + rhs = codeMP->New(OP_add, vecType->GetPrimType(), rhs, doloop->GetStartExpr()); + } + RegassignNode *ivInitStmt = codeMP->New(vecType->GetPrimType(), regIdx, rhs); + pblock->InsertBefore(doloop, ivInitStmt); + RegreadNode *regreadNode = codeMP->New(vecType->GetPrimType(), regIdx); + it = tp->vecInfo->ivNodes.begin(); + for (; it != tp->vecInfo->ivNodes.end(); ++it) { + tp->vecInfo->ivVecNodes[*it] = regreadNode; + } + // new increment stmt + BaseNode *incrNode = tp->vBound->incrNode; + if (GetPrimTypeSize(incrNode->GetPrimType()) != GetPrimTypeSize(elemType->GetPrimType())) { + ConstvalNode *constnode = static_cast(incrNode); + MIRIntConst *incrconst = static_cast(constnode->GetConstVal()); + MIRIntConst *newConst = GlobalTables::GetIntConstTable().GetOrCreateIntConst(incrconst->GetValue(), *elemType); + incrNode = codeMP->New(elemType->GetPrimType(), newConst); + } + BaseNode *incrhs = codeMP->New(OP_add, vecType->GetPrimType(), regreadNode, incrNode); + tp->vecInfo->ivvecIncrStmt = codeMP->New(vecType->GetPrimType(), regIdx, incrhs); + } } // step 3: widen vectorizable stmt in doloop BlockNode *loopbody = doloop->GetDoBody(); @@ -1547,6 +1616,9 @@ void LoopVectorization::VectorizeDoLoop(DoloopNode *doloop, LoopTransPlan *tp) { pblock->InsertAfter(doloop, stmtNode); } } + if (tp->vecInfo->ivvecIncrStmt) { + loopbody->InsertLast(tp->vecInfo->ivvecIncrStmt); + } } // generate remainder loop @@ -1621,6 +1693,12 @@ bool LoopVectorization::ExprVectorizable(DoloopInfo *doloopInfo, LoopVecInfo* ve vecInfo->uniformNodes.insert(x); return true; } + // check scalar is induction variable + if (x->GetOpCode() == OP_dread && + doloopInfo->IsLoopIVNode(x)) { + vecInfo->ivNodes.insert(x); + return true; + } return false; } // supported binary ops @@ -1782,6 +1860,34 @@ bool LoopVectorization::CanAdjustRhsConstType(PrimType targetType, ConstvalNode return res; } +// handle iassign lhs ivar is constant and same as opnd0 of rhs +// a[3] += vectorizable(opnd1) +// a better way is to promote a[3] as a scalar before premeemit +// scalar will be identified as reduction variable +bool LoopVectorization::IassignIsReduction(IassignNode *iassign, LoopVecInfo* vecInfo) { + if (IsReductionOp(iassign->GetRHS()->GetOpCode()) && + ((static_cast(iassign->GetRHS()))->GetBOpnd(0)->GetOpCode() == OP_iread)) { + auto *stmtP = (*PreMeStmtExtensionMap)[iassign->GetStmtID()]; + IassignMeStmt *iassMeStmt = static_cast(stmtP->GetMeStmt()); + IvarMeExpr *ivarlhsExpr = iassMeStmt->GetLHSVal(); + auto *rhsOpnd0 = (*PreMeExprExtensionMap)[(static_cast(iassign->GetRHS()))->GetBOpnd(0)]; + BaseNode *rhsOpnd1 = iassign->GetRHS()->Opnd(1); + PrimType lhsmirType = (static_cast(&GetTypeFromTyIdx(iassign->GetTyIdx())))->GetPointedType()->GetPrimType(); + PrimType rhsOpnd1Type = rhsOpnd1->GetPrimType(); + if (ivarlhsExpr && rhsOpnd0 && rhsOpnd0->GetMeExpr() && + (rhsOpnd0->GetMeExpr()->GetMeOp() == maple::kMeOpIvar)) { + IvarMeExpr *ivarrhs0Expr = static_cast(rhsOpnd0->GetMeExpr()); + if ((ivarlhsExpr->GetBase() == ivarrhs0Expr->GetBase()) && + (lhsmirType == rhsOpnd1Type)) { + vecInfo->vecStmtIDs.insert((iassign)->GetStmtID()); + vecInfo->reductionStmts.insert(iassign); + return true; + } + } + } + return false; +} + // assumed to be inside innermost loop bool LoopVectorization::Vectorizable(DoloopInfo *doloopInfo, LoopVecInfo* vecInfo, BlockNode *block) { StmtNode *stmt = block->GetFirst(); @@ -1799,6 +1905,7 @@ bool LoopVectorization::Vectorizable(DoloopInfo *doloopInfo, LoopVecInfo* vecInf return Vectorizable(doloopInfo, vecInfo, static_cast(stmt)->GetDoBody()); case OP_iassign: { IassignNode *iassign = static_cast(stmt); + bool lhsUseAsScalar = false; // no vectorize lsh is complex or constant subscript if (iassign->addrExpr->GetOpCode() == OP_array) { ArrayNode *lhsArr = static_cast(iassign->addrExpr); @@ -1809,25 +1916,42 @@ bool LoopVectorization::Vectorizable(DoloopInfo *doloopInfo, LoopVecInfo* vecInf // case like a[abs(i-1)] = 1; depth test will report it's parallelize // or a[4*i+1] =; address is not continous if coeff > 1 if (accessDesc->subscriptVec[dim - 1]->tooMessy || - accessDesc->subscriptVec[dim - 1]->loopInvariant || accessDesc->subscriptVec[dim - 1]->coeff != 1) { if (enableDebug) { LogInfo::MapleLogger() << "NOT VECTORIZABLE because of complex array subscript\n"; } return false; + } else if (accessDesc->subscriptVec[dim - 1]->loopInvariant) { + lhsUseAsScalar = true; } } // check rsh bool canVec = ExprVectorizable(doloopInfo, vecInfo, iassign->GetRHS()); - if (canVec) { - if (iassign->GetFieldID() != 0) { // check base of iassign - canVec = doloopInfo->IsLoopInvariant2(iassign->Opnd(0)); - if ((!canVec) && enableDebug) { + if (!canVec) { + // early return + if (enableDebug) { + LogInfo::MapleLogger() << "NOT VECTORIZABLE because of RHS is not vectorizable\n"; + } + return false; + } + if (iassign->GetFieldID() != 0) { // check base of iassign + canVec = doloopInfo->IsLoopInvariant2(iassign->Opnd(0)); + if (!canVec) { + if (enableDebug) { LogInfo::MapleLogger() << "NOT VECTORIZABLE because of baseaddress is not const with non-zero filedID\n"; } + return false; } } - if (canVec) { + // check lhs is used like reduction variable + if (lhsUseAsScalar && (!IassignIsReduction(iassign, vecInfo))) { + if (enableDebug) { + LogInfo::MapleLogger() << "NOT VECTORIZABLE because of lhs is scalar\n"; + } + return false; + } + // check type + { MIRType &mirType = GetTypeFromTyIdx(iassign->GetTyIdx()); CHECK_FATAL(mirType.GetKind() == kTypePointer, "iassign must have pointer type"); MIRPtrType *ptrType = static_cast(&mirType); @@ -1867,12 +1991,6 @@ bool LoopVectorization::Vectorizable(DoloopInfo *doloopInfo, LoopVecInfo* vecInf if (lshtypesize < vecInfo->smallestTypeSize) { vecInfo->smallestTypeSize = lshtypesize; } - } else { - // early return - if (enableDebug) { - LogInfo::MapleLogger() << "NOT VECTORIZABLE because of RHS is not vectorizable\n"; - } - return false; } break; } @@ -1882,7 +2000,7 @@ bool LoopVectorization::Vectorizable(DoloopInfo *doloopInfo, LoopVecInfo* vecInf MIRSymbol *lhsSym = mirFunc->GetLocalOrGlobalSymbol(lhsStIdx); MIRType &lhsType = GetTypeFromTyIdx(lhsSym->GetTyIdx()); BaseNode *rhs = dassign->GetRHS(); - uint32_t lshtypesize = GetPrimTypeSize(lhsType.GetPrimType()) * 8; + uint32_t lhstypesize = GetPrimTypeSize(lhsType.GetPrimType()) * 8; if (IsReductionOp(rhs->GetOpCode()) && doloopInfo->IsReductionVar(lhsStIdx)) { BaseNode *opnd0 = rhs->Opnd(0); BaseNode *opnd1 = rhs->Opnd(1); @@ -1891,7 +2009,7 @@ bool LoopVectorization::Vectorizable(DoloopInfo *doloopInfo, LoopVecInfo* vecInf if (ExprVectorizable(doloopInfo, vecInfo, opnd1)) { // there's iread in rhs if (vecInfo->currentRHSTypeSize != 0) { - if (lshtypesize < vecInfo->currentRHSTypeSize || + if (lhstypesize < vecInfo->currentRHSTypeSize || vecInfo->currentRHSTypeSize < 8) { // rsh typs size is less than i8/u8 // narrow down the result, not handle now if (enableDebug) { @@ -1906,10 +2024,10 @@ bool LoopVectorization::Vectorizable(DoloopInfo *doloopInfo, LoopVecInfo* vecInf } vecInfo->UpdateWidestTypeSize(vecInfo->currentRHSTypeSize); } else { - vecInfo->UpdateWidestTypeSize(lshtypesize); + vecInfo->UpdateWidestTypeSize(lhstypesize); } vecInfo->vecStmtIDs.insert((stmt)->GetStmtID()); - vecInfo->hasRedvar = true; + vecInfo->reductionStmts.insert(stmt); } else { if (enableDebug) { LogInfo::MapleLogger() << "NOT VECTORIZABLE because of other opnd can't be vectorized\n"; @@ -1932,6 +2050,33 @@ bool LoopVectorization::Vectorizable(DoloopInfo *doloopInfo, LoopVecInfo* vecInf return true; } +// create constant node if need in vectorization +void LoopVectorization::GenConstVar(LoopVecInfo *vecInfo, uint8_t vecLanes) { + // create zero node used in reduction stmt + if ((!const0Node) && (!vecInfo->reductionStmts.empty())) { + MIRType &typeInt = *GlobalTables::GetTypeTable().GetPrimType(PTY_i32); + MIRIntConst *constZero = GlobalTables::GetIntConstTable().GetOrCreateIntConst(0, typeInt); + const0Node = codeMP->New(PTY_i32, constZero); + } + MIRSymbol *ivconstSym = (vecLanes == 4) ? initIVv4Sym : (vecLanes == 8 ? initIVv8Sym : initIVv2Sym); + if ((!ivconstSym) && (!vecInfo->ivNodes.empty())) { + std::string ivVecName("__ivvec"); + ivVecName.append(std::to_string(vecLanes)); + PrimType type = (vecLanes == 4) ? PTY_i32 : ((vecLanes == 8) ? PTY_i16 : PTY_i64); + MIRType *elemType = GlobalTables::GetTypeTable().GetPrimType(type); + MIRArrayType *arrayType = GlobalTables::GetTypeTable().GetOrCreateArrayType(*elemType, 0); + MIRModule *mirModule = mirFunc->GetModule(); + MIRAggConst *constval = mirModule->GetMemPool()->New((*mirModule), *arrayType); + for (uint32_t i = 0; i < vecLanes; i++) { + MIRIntConst *intconst = GlobalTables::GetIntConstTable().GetOrCreateIntConst(i, *elemType); + constval->AddItem(intconst, 0); + } + ivconstSym = mirModule->GetMIRBuilder()->GetOrCreateGlobalDecl(ivVecName, *arrayType); + ivconstSym->SetKonst(constval); + } + vecInfo->ivConstArraySym = ivconstSym; +} + void LoopVectorization::Perform() { // step 2: collect information, legality check and generate transform plan MapleMap::iterator mapit = depInfo->doloopInfoMap.begin(); @@ -1970,6 +2115,7 @@ void LoopVectorization::Perform() { LoopTransPlan *tplan = localMP->New(codeMP, localMP, vecInfo); if (tplan->Generate(mapit->first, mapit->second, enableDebug)) { vecPlans[mapit->first] = tplan; + GenConstVar(vecInfo, tplan->vecLanes); } } // step 3: do transform diff --git a/src/mapleall/maple_me/src/lfo_unroll.cpp b/src/mapleall/maple_me/src/lfo_unroll.cpp index 479e5754be..ecc03b1758 100644 --- a/src/mapleall/maple_me/src/lfo_unroll.cpp +++ b/src/mapleall/maple_me/src/lfo_unroll.cpp @@ -30,20 +30,6 @@ BaseNode *LfoUnrollOneLoop::CloneIVNode() { } } -bool LfoUnrollOneLoop::IsIVNode(const BaseNode *x) const { - if (doloop->IsPreg()) { - if (x->GetOpCode() != OP_regread) { - return false; - } - return static_cast(x)->GetRegIdx() == doloop->GetDoVarPregIdx(); - } else { - if (x->GetOpCode() != OP_dread) { - return false; - } - return static_cast(x)->GetStIdx() == doloop->GetDoVarStIdx(); - } -} - // replace any occurrence of the IV in x with a copy of repNode void LfoUnrollOneLoop::ReplaceIV(BaseNode *x, BaseNode *repNode) { if (x->GetOpCode() == OP_block) { @@ -56,7 +42,7 @@ void LfoUnrollOneLoop::ReplaceIV(BaseNode *x, BaseNode *repNode) { return; } for (size_t i = 0; i < x->NumOpnds(); i++) { - if (IsIVNode(x->Opnd(i))) { + if (doloopInfo->IsLoopIVNode(x->Opnd(i))) { x->SetOpnd(repNode, i); } else { ReplaceIV(x->Opnd(i), repNode); @@ -214,7 +200,7 @@ void LfoUnrollOneLoop::Process() { if (condExpr->GetOpCode() == OP_eq) { return; } - if (!IsIVNode(condExpr->Opnd(0))) { + if (!doloopInfo->IsLoopIVNode(condExpr->Opnd(0))) { return; } BaseNode *endExpr = condExpr->Opnd(1); diff --git a/src/mapleall/maple_me/src/me_value_range_prop.cpp b/src/mapleall/maple_me/src/me_value_range_prop.cpp index 2e8d5b9258..53c08099dd 100755 --- a/src/mapleall/maple_me/src/me_value_range_prop.cpp +++ b/src/mapleall/maple_me/src/me_value_range_prop.cpp @@ -2457,7 +2457,8 @@ void ValueRangePropagation::MergeValueRangeOfPhiOperands(const LoopDesc &loop, c auto *vrOfInitExpr = valueRangeOfInitExprs.at(index).get(); auto pType = it.second->GetLHS()->GetPrimType(); if (vrOfInitExpr == nullptr || - (vrOfInitExpr->GetRangeType() != kOnlyHasLowerBound && vrOfInitExpr->GetRangeType() != kOnlyHasUpperBound)) { + (vrOfInitExpr->GetRangeType() != kOnlyHasLowerBound && vrOfInitExpr->GetRangeType() != kOnlyHasUpperBound) || + (GetVecLanes(pType) > 0)) { index++; continue; } -- Gitee From acb899479bee8affa01b6562ebbc56ff74e3fe62 Mon Sep 17 00:00:00 2001 From: linma Date: Thu, 31 Mar 2022 11:15:23 -0700 Subject: [PATCH 4/4] refine const0node type used in vectorization loop --- src/mapleall/maple_me/src/lfo_loop_vec.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mapleall/maple_me/src/lfo_loop_vec.cpp b/src/mapleall/maple_me/src/lfo_loop_vec.cpp index 1a9ae9394a..d2803ec7c3 100644 --- a/src/mapleall/maple_me/src/lfo_loop_vec.cpp +++ b/src/mapleall/maple_me/src/lfo_loop_vec.cpp @@ -2054,9 +2054,10 @@ bool LoopVectorization::Vectorizable(DoloopInfo *doloopInfo, LoopVecInfo* vecInf void LoopVectorization::GenConstVar(LoopVecInfo *vecInfo, uint8_t vecLanes) { // create zero node used in reduction stmt if ((!const0Node) && (!vecInfo->reductionStmts.empty())) { - MIRType &typeInt = *GlobalTables::GetTypeTable().GetPrimType(PTY_i32); + PrimType ptype = vecLanes == 4 ? PTY_i32 : (vecLanes == 2 ? PTY_i64 : PTY_i16); + MIRType &typeInt = *GlobalTables::GetTypeTable().GetPrimType(ptype); MIRIntConst *constZero = GlobalTables::GetIntConstTable().GetOrCreateIntConst(0, typeInt); - const0Node = codeMP->New(PTY_i32, constZero); + const0Node = codeMP->New(ptype, constZero); } MIRSymbol *ivconstSym = (vecLanes == 4) ? initIVv4Sym : (vecLanes == 8 ? initIVv8Sym : initIVv2Sym); if ((!ivconstSym) && (!vecInfo->ivNodes.empty())) { -- Gitee