diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def b/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def index 3e11b3c05a3f1e41e9196f1007049d4673cd2e0b..4ff26beaf7bbdfb6af88eca01d7f794f4d80f828 100644 --- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def +++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def @@ -723,8 +723,10 @@ DEFINE_MOP(MOP_dmb_ish, {}, HASACQUIRE|HASRELEASE|ISDMB,kLtBranch, "dmb\tish", /* Neon simd, r:nonvector reg, u:64b vector reg, v:128b vector reg */ DEFINE_MOP(MOP_vmovui, {mopdReg64VD,mopdImm8},ISMOVE|ISVECTOR,kLtFpalu,"movi","0,1",1) DEFINE_MOP(MOP_vmovvi, {mopdReg128VD,mopdImm8},ISMOVE|ISVECTOR,kLtFpalu,"movi","0,1",1) -DEFINE_MOP(MOP_vdupur, {mopdReg64VD,mopdReg32IS},ISMOVE|ISVECTOR,kLtFpalu,"dup","0,1",1) -DEFINE_MOP(MOP_vdupvr, {mopdReg128VD,mopdReg32IS},ISMOVE|ISVECTOR,kLtFpalu,"dup","0,1",1) +DEFINE_MOP(MOP_vwdupur, {mopdReg64VD,mopdReg32IS},ISMOVE|ISVECTOR,kLtFpalu,"dup","0,1",1) +DEFINE_MOP(MOP_vwdupvr, {mopdReg128VD,mopdReg32IS},ISMOVE|ISVECTOR,kLtFpalu,"dup","0,1",1) +DEFINE_MOP(MOP_vxdupur, {mopdReg64VD,mopdReg64IS},ISMOVE|ISVECTOR,kLtFpalu,"dup","0,1",1) +DEFINE_MOP(MOP_vxdupvr, {mopdReg128VD,mopdReg64IS},ISMOVE|ISVECTOR,kLtFpalu,"dup","0,1",1) DEFINE_MOP(MOP_vduprv, {mopdReg64FD,mopdReg128VS},ISMOVE|ISVECTOR,kLtFpalu,"dup","0,1",1) DEFINE_MOP(MOP_vextuuui,{mopdReg64VD,mopdReg64VS,mopdReg64VS,mopdImm8},ISVECTOR,kLtFpalu,"ext","0,1,2,3",1) DEFINE_MOP(MOP_vextvvvi,{mopdReg128VD,mopdReg128VS,mopdReg128VS,mopdImm8},ISVECTOR,kLtFpalu,"ext","0,1,2,3",1) diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp index 605187d261f466ac6cb6205b0f524ca7d698491d..3558c3cf17203ac194489a99bfa9e0cb6984e403 100755 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp @@ -8853,7 +8853,12 @@ RegOperand *AArch64CGFunc::SelectVectorFromScalar(PrimType rType, Operand *src, } } - MOperator mOp = GetPrimTypeSize(rType) > k8ByteSize ? MOP_vdupvr: MOP_vdupur; + MOperator mOp; + if (GetPrimTypeSize(sType) > k4ByteSize) { + mOp = GetPrimTypeSize(rType) > k8ByteSize ? MOP_vxdupvr: MOP_vxdupur; + } else { + mOp = GetPrimTypeSize(rType) > k8ByteSize ? MOP_vwdupvr: MOP_vwdupur; + } Insn *insn = &GetCG()->BuildInstruction(mOp, *res, *reg); static_cast(insn)->PushRegSpecEntry(vecSpec); GetCurBB()->AppendInsn(*insn); @@ -9015,8 +9020,9 @@ void AArch64CGFunc::PrepareVectorOperands(Operand **o1, PrimType &oty1, Operand if (IsPrimitiveVector(oty1) && IsPrimitiveVector(oty2)) { return; } + PrimType origTyp = !IsPrimitiveVector(oty2) ? oty2 : oty1; Operand *opd = !IsPrimitiveVector(oty2) ? *o2 : *o1; - PrimType rType = !IsPrimitiveVector(oty2) ? oty1 : oty2; /* Type to dup into */ + PrimType rType = !IsPrimitiveVector(oty2) ? oty1 : oty2; /* Type to dup into */ RegOperand *res = &CreateRegisterOperandOfType(rType); VectorRegSpec *vecSpec = GetMemoryPool()->New(); vecSpec->vecLaneMax = GetPrimTypeLanes(rType); @@ -9026,7 +9032,11 @@ void AArch64CGFunc::PrepareVectorOperands(Operand **o1, PrimType &oty1, Operand if (opd->IsConstImmediate()) { mOp = GetPrimTypeSize(rType) > k8ByteSize ? MOP_vmovvi : MOP_vmovui; /* a const */ } else { - mOp = GetPrimTypeSize(rType) > k8ByteSize ? MOP_vdupvr : MOP_vdupur; /* a scalar var */ + if (GetPrimTypeSize(origTyp) > k4ByteSize) { + mOp = GetPrimTypeSize(rType) > k8ByteSize ? MOP_vxdupvr : MOP_vxdupur; + } else { + mOp = GetPrimTypeSize(rType) > k8ByteSize ? MOP_vwdupvr : MOP_vwdupur;/* a scalar var */ + } } Insn *insn = &GetCG()->BuildInstruction(mOp, *res, *opd); static_cast(insn)->PushRegSpecEntry(vecSpec); diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_insn.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_insn.cpp index b896947009fae5aa8933de3f0fa5ba5daec6aae9..1df251f18a52b96986d4cc19e90aad551fb5833d 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_insn.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_insn.cpp @@ -1455,7 +1455,10 @@ bool AArch64Insn::HasLoop() const { bool AArch64Insn::IsSpecialIntrinsic() const { switch (mOp) { - case MOP_vdupur: + case MOP_vwdupur: + case MOP_vwdupvr: + case MOP_vxdupur: + case MOP_vxdupvr: case MOP_vduprv: case MOP_vwinsur: case MOP_vxinsur: diff --git a/src/mapleall/maple_driver/src/maple_comb_compiler.cpp b/src/mapleall/maple_driver/src/maple_comb_compiler.cpp index bfb4b32bbd1e8e7636dc5f9ac2ed1de01674c16d..7a867cd8bef23ca4a64fd9689f6af5053fc46b65 100644 --- a/src/mapleall/maple_driver/src/maple_comb_compiler.cpp +++ b/src/mapleall/maple_driver/src/maple_comb_compiler.cpp @@ -21,6 +21,7 @@ #include "inline.h" #include "me_phase_manager.h" #include "constantfold.h" +#include "lfo_loop_vec.h" namespace maple { using namespace mapleOption; @@ -199,7 +200,10 @@ ErrorCode MapleCombCompiler::Compile(MplOptions &options, std::unique_ptr 0) { + LogInfo::MapleLogger() << "\n " << LoopVectorization::vectorizedLoop << " loop vectorized\n"; + } delete optMp; return nErr; } diff --git a/src/mapleall/maple_me/include/lfo_dep_test.h b/src/mapleall/maple_me/include/lfo_dep_test.h index 478071a4fbfbbad614f8ed67f81de4219ab7a8b6..27547e12c32f89f83657fc3416c48685a0235405 100644 --- a/src/mapleall/maple_me/include/lfo_dep_test.h +++ b/src/mapleall/maple_me/include/lfo_dep_test.h @@ -96,6 +96,7 @@ class DoloopInfo { void CreateDepTestLists(); void TestDependences(MapleVector *depTestList, bool bothLHS); bool Parallelizable(); + ArrayAccessDesc* GetArrayAccessDesc(ArrayNode *node, bool isRHS); }; class LfoDepInfo : public AnalysisResult { diff --git a/src/mapleall/maple_me/include/lfo_loop_vec.h b/src/mapleall/maple_me/include/lfo_loop_vec.h index ec7008234ffbb7ad4e0ac36e965785af84d51dee..fbd28140a4d1e14dca393f8c301cc77bf9260dd2 100644 --- a/src/mapleall/maple_me/include/lfo_loop_vec.h +++ b/src/mapleall/maple_me/include/lfo_loop_vec.h @@ -22,6 +22,7 @@ #include "lfo_dep_test.h" namespace maple { + class LoopBound { public: LoopBound() : lowNode(nullptr), upperNode(nullptr), incrNode(nullptr) {}; @@ -33,14 +34,24 @@ public: class LoopVecInfo { public: - explicit LoopVecInfo(MapleAllocator &alloc) : vecStmtIDs(alloc.Adapter()) { - largestPrimType = PTY_i8; + explicit LoopVecInfo(MapleAllocator &alloc) : vecStmtIDs(alloc.Adapter()), + uniformNodes(alloc.Adapter()), + uniformVecNodes(alloc.Adapter()) { + // smallestPrimType = PTY_i64; + largestTypeSize = 8; // i8 bit size + currentRHSTypeSize = 0; } - void UpdatePrimType(PrimType ctype); - - PrimType largestPrimType; // largest size type in vectorizable stmtnodes + void UpdateWidestTypeSize(uint32_t ); + void ResetStmtRHSTypeSize() { currentRHSTypeSize = 0; } + bool UpdateRHSTypeSize(PrimType); // record rhs node typesize + //PrimType smallestPrimType; // smallest size type in vectorizable stmtnodes + uint32_t largestTypeSize; // largest size type in vectorizable stmtnodes + uint32_t currentRHSTypeSize; // largest size of current stmt's RHS, this is temp value and update for each stmt // list of vectorizable stmtnodes in current loop, others can't be vectorized MapleSet vecStmtIDs; + MapleSet uniformNodes; // loop invariable scalar set + MapleMap uniformVecNodes; // new generated vector node + //MapleMap inductionStmt; // dup scalar to vector stmt may insert before stmt }; // tranform plan for current loop @@ -84,11 +95,11 @@ class LoopVectorization { void Perform(); void TransformLoop(); void VectorizeDoLoop(DoloopNode *, LoopTransPlan*); - void VectorizeNode(BaseNode *, uint8_t); + void VectorizeNode(BaseNode *, LoopTransPlan *); MIRType *GenVecType(PrimType, uint8_t); - StmtNode *GenIntrinNode(BaseNode *scalar, PrimType vecPrimType); - bool ExprVectorizable(DoloopInfo *doloopInfo, BaseNode *x); - bool Vectorizable(DoloopInfo *doloopInfo, BlockNode *block, LoopVecInfo *); + RegassignNode *GenDupScalarStmt(BaseNode *scalar, PrimType vecPrimType); + bool ExprVectorizable(DoloopInfo *doloopInfo, LoopVecInfo*, BaseNode *x); + bool Vectorizable(DoloopInfo *doloopInfo, LoopVecInfo*, BlockNode *block); void widenDoloop(DoloopNode *doloop, LoopTransPlan *); DoloopNode *PrepareDoloop(DoloopNode *, LoopTransPlan *); DoloopNode *GenEpilog(DoloopNode *); @@ -96,6 +107,8 @@ class LoopVectorization { MapleMap *GetVecPlans() { return &vecPlans; } std::string PhaseName() const { return "lfoloopvec"; } +public: + static uint32_t vectorizedLoop; private: MIRFunction *mirFunc; // point to lfoStmtParts of lfopreemit, map lfoinfo for StmtNode, key is stmtID diff --git a/src/mapleall/maple_me/src/alias_class.cpp b/src/mapleall/maple_me/src/alias_class.cpp index a26edf6ccfe6ced4629a2a415cd587e8eb21e0b6..db557a3e824681cc6edae4e1ebd71ee2f10645f2 100644 --- a/src/mapleall/maple_me/src/alias_class.cpp +++ b/src/mapleall/maple_me/src/alias_class.cpp @@ -220,28 +220,6 @@ OffsetType AliasClass::OffsetInBitOfArrayElement(const ArrayNode *arrayNode) { OriginalSt *AliasClass::FindOrCreateExtraLevOst(SSATab *ssaTab, OriginalSt *prevLevOst, const TyIdx &tyIdx, FieldID fld, OffsetType offset) { - if (!offset.IsInvalid() && prevLevOst->GetIndirectLev() < 0) { - auto mirType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(tyIdx); - ASSERT(mirType->IsMIRPtrType(), "must be pointer type"); - auto typeOfOst = static_cast(mirType)->GetPointedType(); - if (fld != 0 && fld <= typeOfOst->NumberOfFieldIDs()) { - typeOfOst = static_cast(typeOfOst)->GetFieldType(fld); - } - const auto &ostPair = ssaTab->GetOriginalStTable().FindOrCreateSymbolOriginalSt(*prevLevOst->GetMIRSymbol(), - prevLevOst->GetPuIdx(), fld, typeOfOst->GetTypeIndex(), offset); - auto *newOst = ostPair.first; - newOst->SetPrevLevelOst(prevLevOst); - if (ostPair.second) { - prevLevOst->AddNextLevelOst(newOst, true); - } - return newOst; - } - - const TyIdx &tyIdxOfBaseOst = prevLevOst->GetTyIdx(); - if (ssaTab->GetModule().IsCModule() && tyIdxOfBaseOst != tyIdx) { - return ssaTab->GetOriginalStTable().FindOrCreateExtraLevOriginalSt(prevLevOst, tyIdx, 0, offset); - } - return ssaTab->GetOriginalStTable().FindOrCreateExtraLevOriginalSt(prevLevOst, tyIdx, fld, offset); } @@ -252,12 +230,12 @@ AliasElem *AliasClass::FindOrCreateExtraLevAliasElem(BaseNode &baseAddress, cons if (aliasInfoOfBaseAddress.ae == nullptr) { return FindOrCreateDummyNADSAe(); } - if (mirModule.IsCModule() && IsNullOrDummySymbolOst(aliasInfoOfBaseAddress.ae->GetOst())) { + auto *baseOst = aliasInfoOfBaseAddress.ae->GetOst(); + if (mirModule.IsCModule() && IsNullOrDummySymbolOst(baseOst)) { return FindOrCreateDummyNADSAe(); } - auto newOst = FindOrCreateExtraLevOst(&ssaTab, aliasInfoOfBaseAddress.ae->GetOst(), tyIdx, - aliasInfoOfBaseAddress.fieldID + fieldId, + auto newOst = FindOrCreateExtraLevOst(&ssaTab, baseOst, tyIdx, fieldId, typeHasBeenCasted ? OffsetType(kOffsetUnknown) : aliasInfoOfBaseAddress.offset); CHECK_FATAL(newOst != nullptr, "null ptr check"); @@ -320,8 +298,11 @@ AliasInfo AliasClass::CreateAliasElemsExpr(BaseNode &expr) { auto &iread = static_cast(expr); MIRType *mirType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(iread.GetTyIdx()); CHECK_FATAL(mirType->GetKind() == kTypePointer, "CreateAliasElemsExpr: ptr type expected in iread"); - bool typeHasBeenCasted = - static_cast(mirType)->GetPointedType()->GetSize() != GetPrimTypeSize(iread.GetPrimType()); + auto *pointedType = static_cast(mirType)->GetPointedType(); + if (iread.GetFieldID() > 0) { + pointedType = static_cast(pointedType)->GetFieldType(iread.GetFieldID()); + } + bool typeHasBeenCasted = pointedType->GetSize() != GetPrimTypeSize(iread.GetPrimType()); return AliasInfo(FindOrCreateExtraLevAliasElem( *iread.Opnd(0), iread.GetTyIdx(), iread.GetFieldID(), typeHasBeenCasted), 0, OffsetType(0)); } @@ -399,6 +380,12 @@ AliasInfo AliasClass::CreateAliasElemsExpr(BaseNode &expr) { intrn.GetNopndAt(0)->GetOpCode() == OP_dread)) { return CreateAliasElemsExpr(*intrn.GetNopndAt(0)); } + IntrinDesc *intrinDesc = &IntrinDesc::intrinTable[intrn.GetIntrinsic()]; + if (intrinDesc->IsVectorOp()) { + SetPtrOpndsNextLevNADS(0, static_cast(intrn.NumOpnds()), intrn.GetNopnd(), + false); + return AliasInfo(); + } // fall-through } [[clang::fallthrough]]; @@ -1203,11 +1190,19 @@ void AliasClass::UnionAllNodes(MapleVector *nextLevOsts) { OriginalSt *ostA = *it; AliasElem *aeA = FindAliasElem(*ostA); ++it; + std::set aesToUnionNextLev; for (; it != nextLevOsts->end(); ++it) { OriginalSt *ostB = *it; AliasElem *aeB = FindAliasElem(*ostB); - unionFind.Union(aeA->GetClassID(), aeB->GetClassID()); + auto idA = aeA->GetClassID(); + auto idB = aeB->GetClassID(); + if (unionFind.Root(idA) != unionFind.Root(idB)) { + unionFind.Union(idA, idB); + aesToUnionNextLev.insert(id2Elem[unionFind.Root(idA)]); + } } + // union next-level-osts of aliased osts + UnionNextLevelOfAliasOst(aesToUnionNextLev); } // This is applicable only for C language. For each ost that is a struct, diff --git a/src/mapleall/maple_me/src/cfg_opt.cpp b/src/mapleall/maple_me/src/cfg_opt.cpp index 48cf73ad7c2bdb65d11cc1d50c9fdab696833bb9..3233d96ccff5b028da73634cc24628539ce161dd 100644 --- a/src/mapleall/maple_me/src/cfg_opt.cpp +++ b/src/mapleall/maple_me/src/cfg_opt.cpp @@ -65,6 +65,9 @@ void CfgOpt::PropagateBB(BB &bb, BB *trueBranchBB, BB *falseBranchBB) { return; } for (auto predBB : bb.GetPred()) { + if (predBB->IsEmpty()) { + continue; + } BB *trueBranchBBForPred = trueBranchBB; BB *falseBranchBBForPred = falseBranchBB; switch (predBB->GetKind()) { diff --git a/src/mapleall/maple_me/src/demand_driven_alias_analysis.cpp b/src/mapleall/maple_me/src/demand_driven_alias_analysis.cpp index 1974af4cdc79761e6aacacbd3f446a800e7cef5b..dbb8729eb2cc8e41bc76a852fec5e9ae01f8ed1d 100644 --- a/src/mapleall/maple_me/src/demand_driven_alias_analysis.cpp +++ b/src/mapleall/maple_me/src/demand_driven_alias_analysis.cpp @@ -166,16 +166,16 @@ PEGBuilder::PtrValueRecorder PEGBuilder::BuildPEGNodeOfIread(const IreadSSANode } auto *ostOfBase = ptrNode.pegNode->ost; - FieldID fieldId = iread->GetFieldID() + ptrNode.fieldId; - MIRType *mirType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(iread->GetTyIdx()); CHECK_FATAL(mirType->GetKind() == kTypePointer, "CreateAliasElemsExpr: ptr type expected in iread"); - bool typeHasBeenCasted = - static_cast(mirType)->GetPointedType()->GetSize() != GetPrimTypeSize(iread->GetPrimType()); + auto *pointedType = static_cast(mirType)->GetPointedType(); + if (iread->GetFieldID() > 0) { + pointedType = static_cast(pointedType)->GetFieldType(iread->GetFieldID()); + } + bool typeHasBeenCasted = pointedType->GetSize() != GetPrimTypeSize(iread->GetPrimType()); OffsetType offset = typeHasBeenCasted ? OffsetType::InvalidOffset() : ptrNode.offset; - auto *mayUsedOst = - AliasClass::FindOrCreateExtraLevOst(ssaTab, ostOfBase, iread->GetTyIdx(), fieldId, offset); + AliasClass::FindOrCreateExtraLevOst(ssaTab, ostOfBase, iread->GetTyIdx(), iread->GetFieldID(), offset); // build prevLev-nextLev relationship auto *pegNodeOfMayUsedOSt = peg->GetOrCreateNodeOf(mayUsedOst); @@ -493,10 +493,8 @@ void PEGBuilder::BuildPEGNodeInIassign(const IassignNode *iassign) { } auto *ostOfBase = baseAddrValNode.pegNode->ost; - FieldID fieldId = iassign->GetFieldID() + baseAddrValNode.fieldId; - OriginalSt *defedOst = AliasClass::FindOrCreateExtraLevOst( - ssaTab, ostOfBase, iassign->GetTyIdx(), fieldId, baseAddrValNode.offset); + ssaTab, ostOfBase, iassign->GetTyIdx(), iassign->GetFieldID(), baseAddrValNode.offset); PEGNode *lhsNode = peg->GetOrCreateNodeOf(defedOst); // build prevLev-nextLev relation diff --git a/src/mapleall/maple_me/src/lfo_dep_test.cpp b/src/mapleall/maple_me/src/lfo_dep_test.cpp index 00322aac6ab6efa59d6354ef66930932be238a9d..4b9c1f90948c8c58a6bddfebe2ff65f1ba8b1727 100644 --- a/src/mapleall/maple_me/src/lfo_dep_test.cpp +++ b/src/mapleall/maple_me/src/lfo_dep_test.cpp @@ -412,6 +412,16 @@ bool DoloopInfo::Parallelizable() { return true; } +ArrayAccessDesc* DoloopInfo::GetArrayAccessDesc(ArrayNode *node, bool isRHS) { + MapleVector* arrayDescptr = isRHS ? &rhsArrays : &lhsArrays; + for (auto it = arrayDescptr->begin(); it != arrayDescptr->end(); it++) { + if ((*it)->theArray == node) { + return (*it); + } + } + return nullptr; +} + void LfoDepInfo::PerformDepTest() { size_t i; MapleMap::iterator mapit = doloopInfoMap.begin(); diff --git a/src/mapleall/maple_me/src/lfo_loop_vec.cpp b/src/mapleall/maple_me/src/lfo_loop_vec.cpp index 2f8ca65ad852552128d5f1d24ab183ae61e58f14..4727da9004ce53b267e55009ce583fbbf27f7317 100644 --- a/src/mapleall/maple_me/src/lfo_loop_vec.cpp +++ b/src/mapleall/maple_me/src/lfo_loop_vec.cpp @@ -21,19 +21,34 @@ #include "lfo_loop_vec.h" namespace maple { -void LoopVecInfo::UpdatePrimType(PrimType cptype) { - if (GetPrimTypeSize(largestPrimType) < GetPrimTypeSize(cptype)) { - largestPrimType = cptype; +uint32_t LoopVectorization::vectorizedLoop = 0; + +void LoopVecInfo::UpdateWidestTypeSize(uint32_t newtypesize) { + if (largestTypeSize < newtypesize) { + largestTypeSize = newtypesize; + } +} + +bool LoopVecInfo::UpdateRHSTypeSize(PrimType ptype) { + uint32_t newSize = GetPrimTypeSize(ptype) * 8; + if (currentRHSTypeSize == 0) { + currentRHSTypeSize = newSize; + return true; + } else if (newSize > currentRHSTypeSize) { + currentRHSTypeSize = newSize; + return false; // skip vectorize now since type is not consistent + } else if (newSize < currentRHSTypeSize) { + return false; } + return true; } // generate new bound for vectorization loop and epilog loop // original bound info , condNode doesn't include equal // limitation now: initNode and incrNode are const and initnode is vectorLane aligned. -// vectorization loop: -// epilog loop: < uppernode/vectorFactor*vectorFactor, uppernode, incrnode> +// vectorization loop: +// epilog loop: < (uppernode-initnode)/vectorFactor*vectorFactor+initnode, uppernode, incrnode> void LoopTransPlan::GenerateBoundInfo(DoloopNode *doloop, DoloopInfo *li) { - (void) li; BaseNode *initNode = doloop->GetStartExpr(); BaseNode *incrNode = doloop->GetIncrExpr(); BaseNode *condNode = doloop->GetCondExpr(); @@ -45,23 +60,21 @@ void LoopTransPlan::GenerateBoundInfo(DoloopNode *doloop, DoloopInfo *li) { constOnenode = codeMP->New(PTY_i32, constOne); } ASSERT(incrNode->IsConstval(), "too complex, incrNode should be const"); - ConstvalNode *icn = static_cast(incrNode); - MIRIntConst *incrConst = static_cast(icn->GetConstVal()); + ConstvalNode *icn = static_cast(incrNode); + MIRIntConst *incrConst = static_cast(icn->GetConstVal()); ASSERT(condNode->IsBinaryNode(), "cmp node should be binary node"); - BaseNode *upNode = condNode->Opnd(1); + BaseNode *upNode = condNode->Opnd(1); // TODO:: verify opnd(1) is upper while opnd(0) is index variable - MIRIntConst *newIncr = GlobalTables::GetIntConstTable().GetOrCreateIntConst( - vecFactor * incrConst->GetValue(), *typeInt); + MIRIntConst *newIncr = GlobalTables::GetIntConstTable().GetOrCreateIntConst(vecFactor*incrConst->GetValue(), *typeInt); ConstvalNode *newIncrNode = codeMP->New(PTY_i32, newIncr); - // check initNode is alignment if (initNode->IsConstval()) { - ConstvalNode *lcn = static_cast(initNode); - MIRIntConst *lowConst = static_cast(lcn->GetConstVal()); + ConstvalNode *lcn = static_cast(initNode); + MIRIntConst *lowConst = static_cast(lcn->GetConstVal()); int64 lowvalue = lowConst->GetValue(); // upNode is constant if (upNode->IsConstval()) { - ConstvalNode *ucn = static_cast(upNode); - MIRIntConst *upConst = static_cast(ucn->GetConstVal()); + ConstvalNode *ucn = static_cast(upNode); + MIRIntConst *upConst = static_cast(ucn->GetConstVal()); int64 upvalue = upConst->GetValue(); if (condOpHasEqual) { upvalue += 1; @@ -71,6 +84,8 @@ void LoopTransPlan::GenerateBoundInfo(DoloopNode *doloop, DoloopInfo *li) { vBound = localMP->New(nullptr, nullptr, newIncrNode); } else { // trip count is not vector lane aligned + // vectorized loop < initnode, (up - low)/newIncr *newincr + init, newincr> + // TODO: the vectorized loop need unalignment instruction. int32_t newupval = (upvalue - lowvalue) / (newIncr->GetValue()) * (newIncr->GetValue()) + lowvalue; MIRIntConst *newUpConst = GlobalTables::GetIntConstTable().GetOrCreateIntConst(newupval, *typeInt); ConstvalNode *newUpNode = codeMP->New(PTY_i32, newUpConst); @@ -78,15 +93,13 @@ void LoopTransPlan::GenerateBoundInfo(DoloopNode *doloop, DoloopInfo *li) { // generate epilog eBound = localMP->New(newUpNode, nullptr, nullptr); } - } else if (upNode->GetOpCode() == OP_dread) { - // upNode is symbol variable, TODO::op_regread + } else if (upNode->GetOpCode() == OP_dread || upNode->GetOpCode() == OP_regread) { // step 1: generate vectorized loop bound - AddrofNode *dreadnode = static_cast(upNode); - // upNode of vBound is uppnode / newIncr * newIncr + // upNode of vBound is (uppnode - initnode) / newIncr * newIncr + initnode BinaryNode *divnode; BaseNode *addnode = upNode; if (condOpHasEqual) { - addnode = codeMP->New(OP_add, PTY_i32, dreadnode, constOnenode); + addnode = codeMP->New(OP_add, PTY_i32, upNode, constOnenode); } if (lowvalue != 0) { BinaryNode *subnode = codeMP->New(OP_sub, PTY_i32, addnode, initNode); @@ -95,13 +108,14 @@ void LoopTransPlan::GenerateBoundInfo(DoloopNode *doloop, DoloopInfo *li) { divnode = codeMP->New(OP_div, PTY_i32, addnode, newIncrNode); } BinaryNode *mulnode = codeMP->New(OP_mul, PTY_i32, divnode, newIncrNode); - vBound = localMP->New(nullptr, mulnode, newIncrNode); + addnode = codeMP->New(OP_add, PTY_i32, mulnode, initNode); + vBound = localMP->New(nullptr, addnode, newIncrNode); // step2: generate epilog bound - eBound = localMP->New(mulnode, nullptr, nullptr); + eBound = localMP->New(addnode, nullptr, nullptr); } else { ASSERT(0, "upper bound is complex, NIY"); } - } else if (initNode->GetOpCode() == OP_dread) { + } else if (initNode->GetOpCode() == OP_dread || initNode->GetOpCode() == OP_regread) { // initnode is not constant // set bound of vectorized loop BinaryNode *subnode; @@ -113,25 +127,25 @@ void LoopTransPlan::GenerateBoundInfo(DoloopNode *doloop, DoloopInfo *li) { } BinaryNode *divnode = codeMP->New(OP_div, PTY_i32, subnode, newIncrNode); BinaryNode *mulnode = codeMP->New(OP_mul, PTY_i32, divnode, newIncrNode); - vBound = localMP->New(nullptr, mulnode, newIncrNode); + BinaryNode *addnode = codeMP->New(OP_add, PTY_i32, mulnode, initNode); + vBound = localMP->New(nullptr, addnode, newIncrNode); // set bound of epilog loop - eBound = localMP->New(mulnode, nullptr, nullptr); + eBound = localMP->New(addnode, nullptr, nullptr); } else { ASSERT(0, "low bound is complex, NIY"); } } // generate best plan for current doloop -void LoopTransPlan::Generate(DoloopNode *doloop, DoloopInfo *li) { - // hack values of vecFactor and vecLanes - vecLanes = 128 / ((GetPrimTypeSize(vecInfo->largestPrimType)) * 8); - vecFactor = vecLanes; // vectory length / type +void LoopTransPlan::Generate(DoloopNode *doloop, DoloopInfo* li) { + // vector length / type size + vecLanes = 128 / (vecInfo->largestTypeSize); + vecFactor = vecLanes; // generate bound information GenerateBoundInfo(doloop, li); } -MIRType* LoopVectorization::GenVecType( - PrimType sPrimType, uint8 lanes) { +MIRType* LoopVectorization::GenVecType(PrimType sPrimType, uint8 lanes) { MIRType *vecType = nullptr; CHECK_FATAL(IsPrimitiveInteger(sPrimType), "primtype should be integer"); switch (sPrimType) { @@ -211,6 +225,31 @@ MIRType* LoopVectorization::GenVecType( } break; } + case PTY_a64: { + if (lanes == 2) { + vecType = GlobalTables::GetTypeTable().GetV2UInt64(); + } else { + ASSERT(0, "unsupported a64 vector lanes"); + } + } + case PTY_ptr: { + if (GetPrimTypeSize(sPrimType) == 4) { + if (lanes == 4) { + vecType = GlobalTables::GetTypeTable().GetV4UInt32(); + } else if (lanes == 2) { + vecType = GlobalTables::GetTypeTable().GetV2UInt32(); + } else { + ASSERT(0, "unsupported ptr vector lanes"); + } + } else if (GetPrimTypeSize(sPrimType) == 8) { + if (lanes == 2) { + vecType = GlobalTables::GetTypeTable().GetV2UInt64(); + } else { + ASSERT(0, "unsupported ptr vector lanes"); + } + } + break; + } default: ASSERT(0, "NIY"); } @@ -218,29 +257,92 @@ MIRType* LoopVectorization::GenVecType( } // generate instrinsic node to copy scalar to vector type -StmtNode *LoopVectorization::GenIntrinNode(BaseNode *scalar, PrimType vecPrimType) { - PrimType intrnPrimtype = PTY_v4i32; +RegassignNode *LoopVectorization::GenDupScalarStmt(BaseNode *scalar, PrimType vecPrimType) { MIRIntrinsicID intrnID = INTRN_vector_from_scalar_v4i32; MIRType *vecType = nullptr; switch (vecPrimType) { case PTY_v4i32: { - intrnPrimtype = PTY_v4i32; intrnID = INTRN_vector_from_scalar_v4i32; vecType = GlobalTables::GetTypeTable().GetV4Int32(); break; } + case PTY_v2i32: { + intrnID = INTRN_vector_from_scalar_v2i32; + vecType = GlobalTables::GetTypeTable().GetV2Int32(); + break; + } + case PTY_v4u32: { + intrnID = INTRN_vector_from_scalar_v4u32; + vecType = GlobalTables::GetTypeTable().GetV4UInt32(); + break; + } + case PTY_v2u32: { + intrnID = INTRN_vector_from_scalar_v2u32; + vecType = GlobalTables::GetTypeTable().GetV2UInt32(); + break; + } + case PTY_v8i16: { + intrnID = INTRN_vector_from_scalar_v8i16; + vecType = GlobalTables::GetTypeTable().GetV8Int16(); + break; + } + case PTY_v8u16: { + intrnID = INTRN_vector_from_scalar_v8u16; + vecType = GlobalTables::GetTypeTable().GetV8UInt16(); + break; + } + case PTY_v4i16: { + intrnID = INTRN_vector_from_scalar_v4i16; + vecType = GlobalTables::GetTypeTable().GetV4Int16(); + break; + } + case PTY_v4u16: { + intrnID = INTRN_vector_from_scalar_v4u16; + vecType = GlobalTables::GetTypeTable().GetV4UInt16(); + break; + } + case PTY_v16i8: { + intrnID = INTRN_vector_from_scalar_v16i8; + vecType = GlobalTables::GetTypeTable().GetV16Int8(); + break; + } + case PTY_v16u8: { + intrnID = INTRN_vector_from_scalar_v16u8; + vecType = GlobalTables::GetTypeTable().GetV16UInt8(); + break; + } + case PTY_v8i8: { + intrnID = INTRN_vector_from_scalar_v8i8; + vecType = GlobalTables::GetTypeTable().GetV8Int8(); + break; + } + case PTY_v8u8: { + intrnID = INTRN_vector_from_scalar_v8u8; + vecType = GlobalTables::GetTypeTable().GetV8UInt8(); + break; + } + case PTY_v2i64: { + intrnID = INTRN_vector_from_scalar_v2i64; + vecType = GlobalTables::GetTypeTable().GetV2Int64(); + break; + } + case PTY_v2u64: { + intrnID = INTRN_vector_from_scalar_v2u64; + vecType = GlobalTables::GetTypeTable().GetV2UInt64(); + break; + } default: { ASSERT(0, "NIY"); } } // generate instrinsic op - IntrinsicopNode *rhs = codeMP->New(*codeMPAlloc, OP_intrinsicopwithtype, PTY_v4i32); + IntrinsicopNode *rhs = codeMP->New(*codeMPAlloc, OP_intrinsicop, vecPrimType); rhs->SetIntrinsic(intrnID); rhs->SetNumOpnds(1); - rhs->SetNOpndAt(0, scalar); + rhs->GetNopnd().push_back(scalar); rhs->SetTyIdx(vecType->GetTypeIndex()); - PregIdx regIdx = mirFunc->GetPregTab()->CreatePreg(intrnPrimtype, vecType); - RegassignNode *stmtNode = codeMP->New(PTY_v4i32, regIdx, rhs); + PregIdx regIdx = mirFunc->GetPregTab()->CreatePreg(vecPrimType); + RegassignNode *stmtNode = codeMP->New(vecPrimType, regIdx, rhs); return stmtNode; } @@ -248,7 +350,7 @@ StmtNode *LoopVectorization::GenIntrinNode(BaseNode *scalar, PrimType vecPrimTyp // following opcode can be vectorized directly // +, -, *, &, |, <<, >>, compares, ~, ! // iassign, iread, dassign, dread -void LoopVectorization::VectorizeNode(BaseNode *node, uint8 count) { +void LoopVectorization::VectorizeNode(BaseNode *node, LoopTransPlan *tp) { if (enableDebug) { node->Dump(0); } @@ -259,34 +361,40 @@ void LoopVectorization::VectorizeNode(BaseNode *node, uint8 count) { MIRType &mirType = GetTypeFromTyIdx(iassign->GetTyIdx()); CHECK_FATAL(mirType.GetKind() == kTypePointer, "iassign must have pointer type"); MIRPtrType *ptrType = static_cast(&mirType); - MIRType *vecType = GenVecType(ptrType->GetPointedType()->GetPrimType(), count); + MIRType *vecType = GenVecType(ptrType->GetPointedType()->GetPrimType(), tp->vecFactor); ASSERT(vecType != nullptr, "vector type should not be null"); MIRType *pvecType = GlobalTables::GetTypeTable().GetOrCreatePointerType(*vecType, PTY_ptr); // update lhs type iassign->SetTyIdx(pvecType->GetTypeIndex()); // visit rsh - VectorizeNode(iassign->GetRHS(), count); + BaseNode *rhs = iassign->GetRHS(); + if (tp->vecInfo->uniformVecNodes.find(rhs) != tp->vecInfo->uniformVecNodes.end()) { + // rhs replaced scalar node with vector node + iassign->SetRHS(tp->vecInfo->uniformVecNodes[rhs]); + } else { + VectorizeNode(iassign->GetRHS(), tp); + } break; } case OP_iread: { IreadNode *ireadnode = static_cast(node); // update primtype - MIRType *primVecType = GenVecType(ireadnode->GetPrimType(), count); + MIRType *primVecType = GenVecType(ireadnode->GetPrimType(), tp->vecFactor); node->SetPrimType(primVecType->GetPrimType()); // update tyidx MIRType &mirType = GetTypeFromTyIdx(ireadnode->GetTyIdx()); CHECK_FATAL(mirType.GetKind() == kTypePointer, "iread must have pointer type"); MIRPtrType *ptrType = static_cast(&mirType); - MIRType *vecType = GenVecType(ptrType->GetPointedType()->GetPrimType(), count); + MIRType *vecType = GenVecType(ptrType->GetPointedType()->GetPrimType(), tp->vecFactor); ASSERT(vecType != nullptr, "vector type should not be null"); MIRType *pvecType = GlobalTables::GetTypeTable().GetOrCreatePointerType(*vecType, PTY_ptr); + ASSERT(ireadnode->GetPrimType() == vecType->GetPrimType(), "iread node vector prim type is not equal to vectorized point to type"); // update lhs type ireadnode->SetTyIdx(pvecType->GetTypeIndex()); break; } // scalar related: widen type directly or unroll instructions case OP_dassign: - case OP_dread: ASSERT(0, "NIY"); break; // vector type support in opcode +, -, *, &, |, <<, >>, compares, ~, ! @@ -309,10 +417,10 @@ void LoopVectorization::VectorizeNode(BaseNode *node, uint8 count) { case OP_cmpl: { ASSERT(node->IsBinaryNode(), "should be binarynode"); BinaryNode *binNode = static_cast(node); - MIRType *vecType = GenVecType(node->GetPrimType(), count); + MIRType *vecType = GenVecType(node->GetPrimType(), tp->vecFactor); node->SetPrimType(vecType->GetPrimType()); // update primtype of binary op - VectorizeNode(binNode->Opnd(0), count); - VectorizeNode(binNode->Opnd(1), count); + VectorizeNode(binNode->Opnd(0), tp); + VectorizeNode(binNode->Opnd(1), tp); break; } // unary op @@ -321,19 +429,14 @@ void LoopVectorization::VectorizeNode(BaseNode *node, uint8 count) { case OP_lnot: { ASSERT(node->IsUnaryNode(), "should be unarynode"); UnaryNode *unaryNode = static_cast(node); - MIRType *vecType = GenVecType(node->GetPrimType(), count); + MIRType *vecType = GenVecType(node->GetPrimType(), tp->vecFactor); node->SetPrimType(vecType->GetPrimType()); // update primtype of unary op - VectorizeNode(unaryNode->Opnd(0), count); + VectorizeNode(unaryNode->Opnd(0), tp); break; } + case OP_dread: case OP_constval: { - LfoPart *lfoP = (*lfoExprParts)[node]; - ASSERT(lfoP != nullptr, "nullptr check"); - // constval could be used in binary op without widen directly - if (!lfoP->GetParent()->IsBinaryNode()) { - // use intrinsicop vdupq_n_i32 to move const to tmp variable - ASSERT(0, "constval need to extended NIY"); - } + // donothing break; } default: @@ -358,18 +461,44 @@ void LoopVectorization::widenDoloop(DoloopNode *doloop, LoopTransPlan *tp) { } } + void LoopVectorization::VectorizeDoLoop(DoloopNode *doloop, LoopTransPlan *tp) { // LogInfo::MapleLogger() << "\n**** dump doloopnode ****\n"; // doloop->Dump(0); // step 1: handle loop low/upper/stride widenDoloop(doloop, tp); - // step 2: widen vectorizable stmt in doloop + // step 2: insert dup stmt before doloop + if (!tp->vecInfo->uniformNodes.empty()) { + LfoPart* lfopart = (*lfoStmtParts)[doloop->GetStmtID()]; + BaseNode *parent = lfopart->GetParent(); + ASSERT(parent && (parent->GetOpCode() == OP_block), "nullptr check"); + BlockNode *pblock = static_cast(parent); + auto it = tp->vecInfo->uniformNodes.begin(); + for (; it != tp->vecInfo->uniformNodes.end(); it++) { + BaseNode *node = *it; + LfoPart *lfoP = (*lfoExprParts)[node]; + // check node's parent, if they are binary node, skip the duplication + if (!lfoP->GetParent()->IsBinaryNode()) { + MIRType *vecType = GenVecType(node->GetPrimType(), tp->vecFactor); + RegassignNode *dupScalarStmt = GenDupScalarStmt(node, vecType->GetPrimType()); + pblock->InsertBefore(doloop, dupScalarStmt); + RegreadNode *regreadNode = codeMP->New(vecType->GetPrimType(), dupScalarStmt->GetRegIdx()); + tp->vecInfo->uniformVecNodes[node] = regreadNode; + } + } + } + + // step 3: widen vectorizable stmt in doloop BlockNode *loopbody = doloop->GetDoBody(); for (auto &stmt : loopbody->GetStmtNodes()) { - VectorizeNode(&stmt, tp->vecFactor); - // stmt could not be widen directly, unroll instruction with vecFactor - // move value from vector type if need (need def-use information from plan) + if (tp->vecInfo->vecStmtIDs.count(stmt.GetStmtID()) > 0) { + VectorizeNode(&stmt, tp); + } else { + // stmt could not be widen directly, unroll instruction with vecFactor + // move value from vector type if need (need def-use information from plan) + CHECK_FATAL(0, "NIY:: unvectorized stmt"); + } } } @@ -420,8 +549,7 @@ void LoopVectorization::TransformLoop() { } } -bool LoopVectorization::ExprVectorizable( - DoloopInfo *doloopInfo, BaseNode *x) { +bool LoopVectorization::ExprVectorizable(DoloopInfo *doloopInfo, LoopVecInfo* vecInfo, BaseNode *x) { if (!IsPrimitiveInteger(x->GetPrimType())) { return false; } @@ -436,7 +564,16 @@ bool LoopVectorization::ExprVectorizable( if (parent && parent->GetOpCode() == OP_array) { return true; } - return false; + MeExpr *expr = lfopart->GetMeExpr(); + if ((x->GetOpCode() == OP_constval) || + (expr && doloopInfo->IsLoopInvariant(expr))) { + if (!vecInfo->UpdateRHSTypeSize(x->GetPrimType())) { + return false; + } + vecInfo->uniformNodes.insert(x); + return true; + } + return false; // TODO::primary induction variable } // supported binary ops case OP_add: @@ -455,16 +592,19 @@ bool LoopVectorization::ExprVectorizable( case OP_ge: case OP_cmpg: case OP_cmpl: - return ExprVectorizable(doloopInfo, x->Opnd(0)) && ExprVectorizable(doloopInfo, x->Opnd(1)); + return ExprVectorizable(doloopInfo, vecInfo, x->Opnd(0)) && ExprVectorizable(doloopInfo, vecInfo, x->Opnd(1)); // supported unary ops case OP_bnot: case OP_lnot: case OP_neg: - return ExprVectorizable(doloopInfo, x->Opnd(0)); + return ExprVectorizable(doloopInfo, vecInfo, x->Opnd(0)); case OP_iread: { - bool canVec = ExprVectorizable(doloopInfo, x->Opnd(0)); + bool canVec = ExprVectorizable(doloopInfo, vecInfo, x->Opnd(0)); if (canVec) { IreadNode *iread = static_cast(x); + if (!vecInfo->UpdateRHSTypeSize(iread->GetPrimType())) { + canVec = false; // skip if rhs type is not consistent + } if (iread->GetFieldID() != 0 && iread->Opnd(0)->GetOpCode() == OP_array) { MeExpr *meExpr = depInfo->preEmit->GetLfoExprPart(iread->Opnd(0))->GetMeExpr(); canVec = doloopInfo->IsLoopInvariant(meExpr); @@ -475,7 +615,7 @@ bool LoopVectorization::ExprVectorizable( // supported n-ary ops case OP_array: { for (size_t i = 0; i < x->NumOpnds(); i++) { - if (!ExprVectorizable(doloopInfo, x->Opnd(i))) { + if (!ExprVectorizable(doloopInfo, vecInfo, x->Opnd(i))) { return false; } } @@ -487,9 +627,11 @@ bool LoopVectorization::ExprVectorizable( } // assumed to be inside innermost loop -bool LoopVectorization::Vectorizable(DoloopInfo *doloopInfo, BlockNode *block, LoopVecInfo* vecInfo) { +bool LoopVectorization::Vectorizable(DoloopInfo *doloopInfo, LoopVecInfo* vecInfo, BlockNode *block) { StmtNode *stmt = block->GetFirst(); while (stmt != nullptr) { + // reset vecInfo + vecInfo->ResetStmtRHSTypeSize(); switch (stmt->GetOpCode()) { case OP_doloop: case OP_dowhile: @@ -498,31 +640,64 @@ bool LoopVectorization::Vectorizable(DoloopInfo *doloopInfo, BlockNode *block, L break; } case OP_block: - return Vectorizable(doloopInfo, static_cast(stmt)->GetDoBody(), vecInfo); + return Vectorizable(doloopInfo, vecInfo, static_cast(stmt)->GetDoBody()); case OP_iassign: { IassignNode *iassign = static_cast(stmt); - bool canVec = ExprVectorizable(doloopInfo, iassign->GetRHS()); - if (canVec && iassign->GetFieldID() != 0) { // check base of iassign - MeExpr *meExpr = depInfo->preEmit->GetLfoExprPart(iassign->Opnd(0))->GetMeExpr(); - canVec = doloopInfo->IsLoopInvariant(meExpr); + int32_t coeff = 1; + // check lsh is complex subscript + if (iassign->addrExpr->GetOpCode() == OP_array) { + ArrayNode *lhsArr = static_cast(iassign->addrExpr); + ArrayAccessDesc *accessDesc = doloopInfo->GetArrayAccessDesc(lhsArr, false /*isRHS*/); + ASSERT(accessDesc != nullptr, "nullptr check"); + size_t dim = lhsArr->NumOpnds() - 1; + // check innest loop dimension is complex + // case like a[abs(i-1)] = 1; depth test will report it's parallelize + if (accessDesc->subscriptVec[dim-1]->tooMessy) { + return false; + } + coeff = accessDesc->subscriptVec[dim-1]->coeff; + coeff = coeff < 0 ? (-coeff) : coeff; + } + // check rsh + bool canVec = ExprVectorizable(doloopInfo, vecInfo, iassign->GetRHS()); + if (canVec) { + if (iassign->GetFieldID() != 0) { // check base of iassign + MeExpr *meExpr = (*lfoExprParts)[iassign->Opnd(0)]->GetMeExpr(); + canVec = doloopInfo->IsLoopInvariant(meExpr); + } else { + // if rhs is loop invar in case of fieldID is 0 + MeExpr *meExpr = (*lfoExprParts)[iassign->GetRHS()]->GetMeExpr(); + if (meExpr && doloopInfo->IsLoopInvariant(meExpr)) { + vecInfo->UpdateRHSTypeSize(iassign->GetRHS()->GetPrimType()); + vecInfo->uniformNodes.insert(iassign->GetRHS()); + } + } } if (canVec) { MIRType &mirType = GetTypeFromTyIdx(iassign->GetTyIdx()); CHECK_FATAL(mirType.GetKind() == kTypePointer, "iassign must have pointer type"); MIRPtrType *ptrType = static_cast(&mirType); PrimType stmtpt = ptrType->GetPointedType()->GetPrimType(); - CHECK_FATAL(IsPrimitiveInteger(stmtpt) && (!IsPrimitivePoint(stmtpt)), - "iassign ptr type should be integer now"); - vecInfo->UpdatePrimType(stmtpt); + CHECK_FATAL(IsPrimitiveInteger(stmtpt), "iassign ptr type should be integer now"); + // now check lsh type size should be same as rhs typesize + uint32_t lshtypesize = GetPrimTypeSize(stmtpt) * 8; + if (lshtypesize != vecInfo->currentRHSTypeSize) { + return false; // need cvt instruction + } vecInfo->vecStmtIDs.insert((stmt)->GetStmtID()); + // update largest type size + vecInfo->UpdateWidestTypeSize(coeff * lshtypesize); + } else { + // early return + return false; } - return canVec; + break; } default: return false; } stmt = stmt->GetNext(); } - return false; + return true; } void LoopVectorization::Perform() { @@ -533,7 +708,10 @@ void LoopVectorization::Perform() { continue; } LoopVecInfo *vecInfo = localMP->New(localAlloc); - bool vectorizable = Vectorizable(mapit->second, mapit->first->GetDoBody(), vecInfo); + bool vectorizable = Vectorizable(mapit->second, vecInfo, mapit->first->GetDoBody()); + if (vectorizable) { + LoopVectorization::vectorizedLoop++; + } if (enableDebug) { LogInfo::MapleLogger() << "\nInnermost Doloop:"; if (!vectorizable) { diff --git a/src/mapleall/maple_me/src/orig_symbol.cpp b/src/mapleall/maple_me/src/orig_symbol.cpp index 3c97c5b03233b45a27c7103c004df8b455ae7b20..ee6bab28ffbf98cdfa4d27e0e6f2587812f520e4 100644 --- a/src/mapleall/maple_me/src/orig_symbol.cpp +++ b/src/mapleall/maple_me/src/orig_symbol.cpp @@ -188,13 +188,6 @@ OriginalSt *OriginalStTable::FindOrCreateAddrofSymbolOriginalSt(OriginalSt *ost) OriginalSt *OriginalStTable::FindOrCreateExtraLevSymOrRegOriginalSt(OriginalSt *ost, TyIdx tyIdx, FieldID fld, const OffsetType &offset, const KlassHierarchy *klassHierarchy) { - TyIdx ptyIdxOfOst = ost->GetTyIdx(); - FieldID fldIDInOst = fld; - if (ptyIdxOfOst != tyIdx && klassHierarchy != nullptr) { - (void)klassHierarchy->UpdateFieldID(tyIdx, ptyIdxOfOst, fldIDInOst); - } - - auto nextLevelOsts = ost->GetNextLevelOsts(); MIRType *typeOfExtraLevOst = GlobalTables::GetTypeTable().GetVoid(); OffsetType offsetOfNextLevOst(kOffsetUnknown); tyIdx = (tyIdx == 0u) ? ost->GetTyIdx() : tyIdx; @@ -215,7 +208,17 @@ OriginalSt *OriginalStTable::FindOrCreateExtraLevSymOrRegOriginalSt(OriginalSt * } } - OriginalSt *nextLevOst = FindExtraLevOriginalSt(nextLevelOsts, typeOfExtraLevOst, fldIDInOst, offsetOfNextLevOst); + TyIdx ptyIdxOfOst = ost->GetTyIdx(); + FieldID fldIDInOst = fld; + if (ptyIdxOfOst != tyIdx) { + if (klassHierarchy != nullptr) { + (void)klassHierarchy->UpdateFieldID(tyIdx, ptyIdxOfOst, fldIDInOst); + } else { + fldIDInOst = 0; + } + } + OriginalSt *nextLevOst = + FindExtraLevOriginalSt(ost->GetNextLevelOsts(), typeOfExtraLevOst, fldIDInOst, offsetOfNextLevOst); if (nextLevOst != nullptr) { return nextLevOst; } @@ -264,9 +267,10 @@ OriginalSt *OriginalStTable::FindOrCreateExtraLevOriginalSt(OriginalSt *ost, TyI OriginalSt *OriginalStTable::FindExtraLevOriginalSt(const MapleVector &nextLevelOsts, MIRType *type, FieldID fld, const OffsetType &offset) { for (OriginalSt *nextLevelOst : nextLevelOsts) { - if (nextLevelOst->GetFieldID() == fld && nextLevelOst->GetOffset() == offset && - nextLevelOst->GetType()->GetSize() == type->GetSize()) { - return nextLevelOst; + if (nextLevelOst->GetOffset() == offset && nextLevelOst->GetType() == type) { + if (nextLevelOst->GetFieldID() == fld || fld == 0) { + return nextLevelOst; + } } } return nullptr;