From 74617590ae9a64855ab4f30d1de7c2e2672cf0a4 Mon Sep 17 00:00:00 2001 From: linma Date: Thu, 19 Aug 2021 19:52:23 -0700 Subject: [PATCH] add sequence vectorization function and called after loop vectorization --- .../maple_driver/src/driver_runner.cpp | 2 + .../maple_driver/src/maple_comb_compiler.cpp | 5 - src/mapleall/maple_me/BUILD.gn | 1 + src/mapleall/maple_me/include/lfo_pre_emit.h | 1 + src/mapleall/maple_me/include/me_seqvec.h | 71 ++ src/mapleall/maple_me/src/lfo_loop_vec.cpp | 16 +- src/mapleall/maple_me/src/me_seqvec.cpp | 647 ++++++++++++++++++ 7 files changed, 732 insertions(+), 11 deletions(-) create mode 100644 src/mapleall/maple_me/include/me_seqvec.h create mode 100644 src/mapleall/maple_me/src/me_seqvec.cpp diff --git a/src/mapleall/maple_driver/src/driver_runner.cpp b/src/mapleall/maple_driver/src/driver_runner.cpp index b335625bd3..35d11b9661 100644 --- a/src/mapleall/maple_driver/src/driver_runner.cpp +++ b/src/mapleall/maple_driver/src/driver_runner.cpp @@ -24,6 +24,7 @@ #include "lower.h" #include "me_phase_manager.h" #include "lfo_loop_vec.h" +#include "me_seqvec.h" #if TARGAARCH64 || TARGRISCV64 #include "aarch64/aarch64_emitter.h" #elif TARGARM32 @@ -217,6 +218,7 @@ void DriverRunner::RunNewPM(const std::string &outputFile, const std::string &vt // dump vectorized loop counter here { LogInfo::MapleLogger() << "\n" << LoopVectorization::vectorizedLoop << " loop vectorized\n"; + LogInfo::MapleLogger() << "\n" << SeqVectorize::seqVecStores << " sequencestores vectorized\n"; } } diff --git a/src/mapleall/maple_driver/src/maple_comb_compiler.cpp b/src/mapleall/maple_driver/src/maple_comb_compiler.cpp index ad75a39ecc..fcf7172bc1 100644 --- a/src/mapleall/maple_driver/src/maple_comb_compiler.cpp +++ b/src/mapleall/maple_driver/src/maple_comb_compiler.cpp @@ -21,7 +21,6 @@ #include "inline.h" #include "me_phase_manager.h" #include "constantfold.h" -#include "lfo_loop_vec.h" namespace maple { using namespace mapleOption; @@ -195,10 +194,6 @@ ErrorCode MapleCombCompiler::Compile(MplOptions &options, std::unique_ptr 0) { - LogInfo::MapleLogger() << "\n " << LoopVectorization::vectorizedLoop << " loop vectorized\n"; - } return nErr; } } // namespace maple diff --git a/src/mapleall/maple_me/BUILD.gn b/src/mapleall/maple_me/BUILD.gn index 7af8c7ab6f..7e80662d6e 100755 --- a/src/mapleall/maple_me/BUILD.gn +++ b/src/mapleall/maple_me/BUILD.gn @@ -100,6 +100,7 @@ src_libmplme = [ "src/lfo_loop_vec.cpp", "src/me_value_range_prop.cpp", "src/cfg_opt.cpp", + "src/me_seqvec.cpp", ] src_libmplmewpo = [ diff --git a/src/mapleall/maple_me/include/lfo_pre_emit.h b/src/mapleall/maple_me/include/lfo_pre_emit.h index 36cdfa061a..877b02caa1 100644 --- a/src/mapleall/maple_me/include/lfo_pre_emit.h +++ b/src/mapleall/maple_me/include/lfo_pre_emit.h @@ -94,6 +94,7 @@ class LfoPreEmitter : public AnalysisResult { return lfopart->mestmt; } MIRFunction *GetMirFunction() { return mirFunc; } + MeIRMap *GetMeIRMap() { return meirmap; } MemPool *GetCodeMP() { return codeMP; } MapleAllocator* GetCodeMPAlloc() { return codeMPAlloc; } MapleMap *GetLfoStmtMap() { return &lfoStmtParts; } diff --git a/src/mapleall/maple_me/include/me_seqvec.h b/src/mapleall/maple_me/include/me_seqvec.h new file mode 100644 index 0000000000..dc9180d43f --- /dev/null +++ b/src/mapleall/maple_me/include/me_seqvec.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) [2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLE_ME_INCLUDE_SEQVEC_H +#define MAPLE_ME_INCLUDE_SEQVEC_H +#include "me_function.h" +#include "me_irmap.h" +#include "me_ir.h" +#include "lfo_pre_emit.h" + +namespace maple { + +class SeqVectorize { + using StoreList = MapleVector; + using StoreListMap = MapleMap; +public: + SeqVectorize(MemPool *localmp, LfoPreEmitter *lfoEmit, bool debug = false) + : localMP(localmp), localAlloc(localmp), + codeMP(lfoEmit->GetCodeMP()), codeMPAlloc(lfoEmit->GetCodeMPAlloc()), + mirFunc(lfoEmit->GetMirFunction()), + meIRMap(lfoEmit->GetMeIRMap()), + stores(localAlloc.Adapter()), enableDebug(debug) { + lfoStmtParts = lfoEmit->GetLfoStmtMap(); + lfoExprParts = lfoEmit->GetLfoExprMap(); + } + void Perform(); + void VisitNode(StmtNode *); + void CollectStores(IassignNode *iassign); + void DumpCandidates(MeExpr *base, StoreList *storelist); + void CheckAndTransform(); + bool IsOpExprConsecutiveMem(MeExpr *off1, MeExpr *off2, int32_t diff); + bool CanSeqVec(IassignNode *s1, IassignNode *s2); + bool CanSeqVecRhs(MeExpr *rhs1, MeExpr *rhs2); + void LegalityCheckAndTransform(StoreList *storelist); + bool HasVecType(PrimType sPrimType, uint8 lanes); + MIRType* GenVecType(PrimType sPrimType, uint8 lanes); + RegassignNode *GenDupScalarStmt(BaseNode *scalar, PrimType vecPrimType); + bool SameIntConstValue(MeExpr *, MeExpr *); + bool CanAdjustRhsType(PrimType targetType, ConstvalNode *rhs); + void MergeIassigns(MapleVector &cands); + bool IsIvarExprConsecutiveMem(IvarMeExpr *, IvarMeExpr *, PrimType); +public: + static uint32_t seqVecStores; + // iassignnode in same level block + MemPool *localMP; + MapleAllocator localAlloc; + MemPool *codeMP; + MapleAllocator *codeMPAlloc; + MIRFunction *mirFunc; + MeIRMap *meIRMap; + // point to lfoStmtParts of lfopreemit, map lfoinfo for StmtNode, key is stmtID + MapleMap *lfoStmtParts; + // point to lfoexprparts of lfopreemit, map lfoinfo for exprNode, key is mirnode + MapleMap *lfoExprParts; + StoreListMap stores; + bool enableDebug = true; +}; + +} // namespace maple +#endif // MAPLE_ME_INCLUDE_SEQVEC_H diff --git a/src/mapleall/maple_me/src/lfo_loop_vec.cpp b/src/mapleall/maple_me/src/lfo_loop_vec.cpp index c828653b32..da40aab4d7 100644 --- a/src/mapleall/maple_me/src/lfo_loop_vec.cpp +++ b/src/mapleall/maple_me/src/lfo_loop_vec.cpp @@ -19,6 +19,7 @@ #include "mir_lower.h" #include "mir_builder.h" #include "lfo_loop_vec.h" +#include "me_seqvec.h" namespace maple { uint32_t LoopVectorization::vectorizedLoop = 0; @@ -745,7 +746,6 @@ bool LoopVectorization::Vectorizable(DoloopInfo *doloopInfo, LoopVecInfo* vecInf return Vectorizable(doloopInfo, vecInfo, static_cast(stmt)->GetDoBody()); case OP_iassign: { IassignNode *iassign = static_cast(stmt); - int32_t coeff = 1; // no vectorize lsh is complex or constant subscript if (iassign->addrExpr->GetOpCode() == OP_array) { ArrayNode *lhsArr = static_cast(iassign->addrExpr); @@ -754,12 +754,12 @@ bool LoopVectorization::Vectorizable(DoloopInfo *doloopInfo, LoopVecInfo* vecInf size_t dim = lhsArr->NumOpnds() - 1; // check innest loop dimension is complex // case like a[abs(i-1)] = 1; depth test will report it's parallelize + // or a[4*i+1] =; address is not continous if coeff > 1 if (accessDesc->subscriptVec[dim-1]->tooMessy || - accessDesc->subscriptVec[dim-1]->loopInvariant) { + accessDesc->subscriptVec[dim-1]->loopInvariant || + accessDesc->subscriptVec[dim-1]->coeff != 1) { return false; } - coeff = accessDesc->subscriptVec[dim - 1]->coeff; - coeff = coeff < 0 ? (-coeff) : coeff; } // check rsh bool canVec = ExprVectorizable(doloopInfo, vecInfo, iassign->GetRHS()); @@ -797,8 +797,8 @@ bool LoopVectorization::Vectorizable(DoloopInfo *doloopInfo, LoopVecInfo* vecInf } vecInfo->vecStmtIDs.insert((stmt)->GetStmtID()); // update largest type size - uint32_t maxSize = vecInfo->currentRHSTypeSize > (coeff * lshtypesize) ? - vecInfo->currentRHSTypeSize : (coeff * lshtypesize); + uint32_t maxSize = vecInfo->currentRHSTypeSize > lshtypesize ? + vecInfo->currentRHSTypeSize : lshtypesize; vecInfo->UpdateWidestTypeSize(maxSize); } else { // early return @@ -868,6 +868,10 @@ bool MELfoLoopVectorization::PhaseRun(MeFunction &f) { f.GetMirFunc()->Dump(false); } + // run sequence vectorization + SeqVectorize seqVec(GetPhaseMemPool(), lfoemit, DEBUGFUNC_NEWPM(f)); + seqVec.Perform(); + // lower lfoIR for other mapleme phases MIRLower mirlowerer(f.GetMIRModule(), f.GetMirFunc()); mirlowerer.SetLowerME(); diff --git a/src/mapleall/maple_me/src/me_seqvec.cpp b/src/mapleall/maple_me/src/me_seqvec.cpp new file mode 100644 index 0000000000..960eee80db --- /dev/null +++ b/src/mapleall/maple_me/src/me_seqvec.cpp @@ -0,0 +1,647 @@ +/* + * Copyright (c) [2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include +#include +#include "me_option.h" +#include "me_seqvec.h" + +namespace maple { +uint32_t SeqVectorize::seqVecStores = 0; + +// copy from loopvec: generate instrinsic node to copy scalar to vector type +RegassignNode *SeqVectorize::GenDupScalarStmt(BaseNode *scalar, PrimType vecPrimType) { + MIRIntrinsicID intrnID = INTRN_vector_from_scalar_v4i32; + MIRType *vecType = nullptr; + switch (vecPrimType) { + case PTY_v4i32: { + intrnID = INTRN_vector_from_scalar_v4i32; + vecType = GlobalTables::GetTypeTable().GetV4Int32(); + break; + } + case PTY_v2i32: { + intrnID = INTRN_vector_from_scalar_v2i32; + vecType = GlobalTables::GetTypeTable().GetV2Int32(); + break; + } + case PTY_v4u32: { + intrnID = INTRN_vector_from_scalar_v4u32; + vecType = GlobalTables::GetTypeTable().GetV4UInt32(); + break; + } + case PTY_v2u32: { + intrnID = INTRN_vector_from_scalar_v2u32; + vecType = GlobalTables::GetTypeTable().GetV2UInt32(); + break; + } + case PTY_v8i16: { + intrnID = INTRN_vector_from_scalar_v8i16; + vecType = GlobalTables::GetTypeTable().GetV8Int16(); + break; + } + case PTY_v8u16: { + intrnID = INTRN_vector_from_scalar_v8u16; + vecType = GlobalTables::GetTypeTable().GetV8UInt16(); + break; + } + case PTY_v4i16: { + intrnID = INTRN_vector_from_scalar_v4i16; + vecType = GlobalTables::GetTypeTable().GetV4Int16(); + break; + } + case PTY_v4u16: { + intrnID = INTRN_vector_from_scalar_v4u16; + vecType = GlobalTables::GetTypeTable().GetV4UInt16(); + break; + } + case PTY_v16i8: { + intrnID = INTRN_vector_from_scalar_v16i8; + vecType = GlobalTables::GetTypeTable().GetV16Int8(); + break; + } + case PTY_v16u8: { + intrnID = INTRN_vector_from_scalar_v16u8; + vecType = GlobalTables::GetTypeTable().GetV16UInt8(); + break; + } + case PTY_v8i8: { + intrnID = INTRN_vector_from_scalar_v8i8; + vecType = GlobalTables::GetTypeTable().GetV8Int8(); + break; + } + case PTY_v8u8: { + intrnID = INTRN_vector_from_scalar_v8u8; + vecType = GlobalTables::GetTypeTable().GetV8UInt8(); + break; + } + case PTY_v2i64: { + intrnID = INTRN_vector_from_scalar_v2i64; + vecType = GlobalTables::GetTypeTable().GetV2Int64(); + break; + } + case PTY_v2u64: { + intrnID = INTRN_vector_from_scalar_v2u64; + vecType = GlobalTables::GetTypeTable().GetV2UInt64(); + break; + } + default: { + ASSERT(0, "NIY"); + } + } + // generate instrinsic op + IntrinsicopNode *rhs = codeMP->New(*codeMPAlloc, OP_intrinsicop, vecPrimType); + rhs->SetIntrinsic(intrnID); + rhs->SetNumOpnds(1); + rhs->GetNopnd().push_back(scalar); + rhs->SetTyIdx(vecType->GetTypeIndex()); + PregIdx regIdx = mirFunc->GetPregTab()->CreatePreg(vecPrimType); + RegassignNode *stmtNode = codeMP->New(vecPrimType, regIdx, rhs); + return stmtNode; +} + +// v2uint8 v2int8 v2uint16 v2int16 are not added to prim type +bool SeqVectorize::HasVecType(PrimType sPrimType, uint8 lanes) { + if (lanes == 1) return false; + if ((GetPrimTypeSize(sPrimType) == 1 && lanes < 8) || + (GetPrimTypeSize(sPrimType) == 2 && lanes < 4)) { + return false; + } + return true; +} + +// TODO:: move to mirtype? +MIRType* SeqVectorize::GenVecType(PrimType sPrimType, uint8 lanes) { + MIRType *vecType = nullptr; + CHECK_FATAL(IsPrimitiveInteger(sPrimType), "primtype should be integer"); + switch (sPrimType) { + case PTY_i32: { + if (lanes == 4) { + vecType = GlobalTables::GetTypeTable().GetV4Int32(); + } else if (lanes == 2) { + vecType = GlobalTables::GetTypeTable().GetV2Int32(); + } else { + CHECK_FATAL(0, "unsupported int32 vectory lanes"); + } + break; + } + case PTY_u32: { + if (lanes == 4) { + vecType = GlobalTables::GetTypeTable().GetV4UInt32(); + } else if (lanes == 2) { + vecType = GlobalTables::GetTypeTable().GetV2UInt32(); + } else { + CHECK_FATAL(0, "unsupported uint32 vectory lanes"); + } + break; + } + case PTY_i16: { + if (lanes == 4) { + vecType = GlobalTables::GetTypeTable().GetV4Int16(); + } else if (lanes == 8) { + vecType = GlobalTables::GetTypeTable().GetV8Int16(); + } else { + CHECK_FATAL(0, "unsupported int16 vector lanes"); + } + break; + } + case PTY_u16: { + if (lanes == 4) { + vecType = GlobalTables::GetTypeTable().GetV4UInt16(); + } else if (lanes == 8) { + vecType = GlobalTables::GetTypeTable().GetV8UInt16(); + } else { + CHECK_FATAL(0, "unsupported uint16 vector lanes"); + } + break; + } + case PTY_i8: { + if (lanes == 16) { + vecType = GlobalTables::GetTypeTable().GetV16Int8(); + } else if (lanes == 8) { + vecType = GlobalTables::GetTypeTable().GetV8Int8(); + } else { + CHECK_FATAL(0, "unsupported int8 vector lanes"); + } + break; + } + case PTY_u8: { + if (lanes == 16) { + vecType = GlobalTables::GetTypeTable().GetV16UInt8(); + } else if (lanes == 8) { + vecType = GlobalTables::GetTypeTable().GetV8UInt8(); + } else { + CHECK_FATAL(0, "unsupported uint8 vector lanes"); + } + break; + } + case PTY_i64: { + if (lanes == 2) { + vecType = GlobalTables::GetTypeTable().GetV2Int64(); + } else { + ASSERT(0, "unsupported i64 vector lanes"); + } + } + case PTY_u64: + case PTY_a64: { + if (lanes == 2) { + vecType = GlobalTables::GetTypeTable().GetV2UInt64(); + } else { + ASSERT(0, "unsupported a64/u64 vector lanes"); + } + } + case PTY_ptr: { + if (GetPrimTypeSize(sPrimType) == 4) { + if (lanes == 4) { + vecType = GlobalTables::GetTypeTable().GetV4UInt32(); + } else if (lanes == 2) { + vecType = GlobalTables::GetTypeTable().GetV2UInt32(); + } else { + ASSERT(0, "unsupported ptr vector lanes"); + } + } else if (GetPrimTypeSize(sPrimType) == 8) { + if (lanes == 2) { + vecType = GlobalTables::GetTypeTable().GetV2UInt64(); + } else { + ASSERT(0, "unsupported ptr vector lanes"); + } + } + break; + } + default: + ASSERT(0, "NIY"); + } + return vecType; +} + +bool SeqVectorize::CanAdjustRhsType(PrimType targetType, ConstvalNode *rhs) { + MIRIntConst *intConst = static_cast(rhs->GetConstVal()); + int64 v = intConst->GetValue(); + bool res = false; + switch (targetType) { + case PTY_i32: { + res = (v >= INT_MIN && v <= INT_MAX); + break; + } + case PTY_u32: { + res = (v >= 0 && v <= UINT_MAX); + break; + } + case PTY_i16: { + res = (v >= SHRT_MIN && v <= SHRT_MAX); + break; + } + case PTY_u16: { + res = (v >= 0 && v <= USHRT_MAX); + break; + } + case PTY_i8: { + res = (v >= SCHAR_MIN && v <= SCHAR_MAX); + break; + } + case PTY_u8: { + res = (v >= 0 && v <= UCHAR_MAX); + break; + } + case PTY_i64: + case PTY_u64: { + res = true; + break; + } + default: { + break; + } + } + return res; +} + +void SeqVectorize::DumpCandidates(MeExpr *base, StoreList *storelist) { + LogInfo::MapleLogger() << "Dump base node \t"; + base->Dump(meIRMap, 0); + for (uint32_t i = 0; i < (*storelist).size(); i++) { + (*storelist)[i]->Dump(0); + } + return; +} + +void SeqVectorize::CollectStores(IassignNode *iassign) { + // if no hass information, the node may be changed by loopvec, skip + if ((*lfoStmtParts)[iassign->GetStmtID()] == nullptr) return; + // if point to type is not integer, skip + MIRType &mirType = GetTypeFromTyIdx(iassign->GetTyIdx()); + CHECK_FATAL(mirType.GetKind() == kTypePointer, "iassign must have pointer type"); + MIRPtrType *ptrType = static_cast(&mirType); + PrimType stmtpt = ptrType->GetPointedType()->GetPrimType(); + if (!IsPrimitiveInteger(stmtpt)) return; + // check lsh and rsh type + if (iassign->GetRHS()->IsConstval() && + (stmtpt != iassign->GetRHS()->GetPrimType()) && + (!CanAdjustRhsType(stmtpt, static_cast(iassign->GetRHS())))) { + return; + } + // compare base address with store list + LfoPart *lfoP = (*lfoStmtParts)[iassign->GetStmtID()]; + IassignMeStmt *iassMeStmt = static_cast(lfoP->GetMeStmt()); + IvarMeExpr *ivarMeExpr = iassMeStmt->GetLHSVal(); + MeExpr *base = ivarMeExpr->GetBase(); + if (ivarMeExpr->GetOp() == OP_iread || + ivarMeExpr->GetOp() == OP_ireadoff) { + if (base->GetOp() == OP_array) { + NaryMeExpr *baseNary = static_cast(base); + base = baseNary->GetOpnd(0); + } + } else { + CHECK_FATAL(0, "NIY:: iassign addrExpr op"); + } +#if 0 + if (iassign->addrExpr->GetOpCode() == OP_array) { + NaryMeExpr *baseNary = static_cast(ivarMeExpr->GetBase()); + base = baseNary->GetOpnd(0); + } else if (ivarMeExpr->GetOp() == maple::OP_iread || ivarMeExpr->GetOp() == maple::OP_ireadoff) { + base = ivarMeExpr->GetBase(); + } else if (iassign->addrExpr->GetOpCode() == OP_add) { + OpMeExpr *opexpr = static_cast(ivarMeExpr->GetBase()); + base = opexpr->GetOpnd(0); + } else { + CHECK_FATAL(0, "NIY:: iassign addrExpr op"); + } +#endif + if (stores.count(base) > 0) { + StoreList *list = stores[base]; + (*list).push_back(iassign); + return; + } + // new array + StoreList *storelist = localMP->New(localAlloc.Adapter()); + storelist->push_back(iassign); + stores[base] = storelist; +} + +bool SeqVectorize::SameIntConstValue(MeExpr *e1, MeExpr *e2) { + if (e1->GetOp() == maple::OP_constval && e2->GetOp() == maple::OP_constval && + IsPrimitiveInteger(e1->GetPrimType()) && + IsPrimitiveInteger(e2->GetPrimType())) { + MIRConst *const1 = (static_cast(e1))->GetConstVal(); + MIRIntConst *intc1 = static_cast(const1); + MIRConst *const2 = (static_cast(e2))->GetConstVal(); + MIRIntConst *intc2 = static_cast(const2); + return (intc1->GetValue() == intc2->GetValue()); + } + return false; +} + +bool SeqVectorize::CanSeqVecRhs(MeExpr *rhs1, MeExpr *rhs2) { + // case 1: rhs1 and rhs2 are constval and same value + if (SameIntConstValue(rhs1, rhs2)) { + return true; + } + // case 2: iread consecutive memory + if (rhs1->GetMeOp() == rhs2->GetMeOp()) { + if (rhs1->GetMeOp() == maple::kMeOpIvar) { + IvarMeExpr *rhs1Ivar = static_cast(rhs1); + IvarMeExpr *rhs2Ivar = static_cast(rhs2); + if (IsIvarExprConsecutiveMem(rhs1Ivar, rhs2Ivar, rhs1Ivar->GetPrimType())) { + return true; + } + } + } + return false; +} + +bool SeqVectorize::IsOpExprConsecutiveMem(MeExpr *off1, MeExpr *off2, int32_t diff) { + if (off1->GetOp() == off2->GetOp() && + off1->GetOp() == OP_add) { + if (off1->GetOpnd(0) == off2->GetOpnd(0) && + (off1->GetOpnd(1)->GetOp() == OP_constval) && + (off2->GetOpnd(1)->GetOp() == OP_constval)) { + MIRConst *constoff1 = static_cast(off1->GetOpnd(1))->GetConstVal(); + MIRIntConst *intoff1 = static_cast(constoff1); + MIRConst *constoff2 = static_cast(off2->GetOpnd(1))->GetConstVal(); + MIRIntConst *intoff2 = static_cast(constoff2); + if (intoff2->GetValue() - intoff1->GetValue() == diff) { + return true; + } + } + } else if (off1->GetOp() == OP_mul && off2->GetOp() == OP_add) { + if (off1 == off2->GetOpnd(0) && off2->GetOpnd(1)->GetOp() == OP_constval) { + MIRConst *constoff2 = static_cast(off2->GetOpnd(1))->GetConstVal(); + MIRIntConst *intoff2 = static_cast(constoff2); + if (intoff2->GetValue() == diff) { + return true; + } + } + } else if (off1->GetOp() == off2->GetOp() && off1->GetOp() == OP_constval) { + MIRConst *const1 = static_cast(off1)->GetConstVal(); + MIRIntConst *intc1 = static_cast(const1); + MIRConst *const2 = static_cast(off2)->GetConstVal(); + MIRIntConst *intc2 = static_cast(const2); + if (intc2->GetValue() - intc1->GetValue() == diff) { + return true; + } + } + return false; +} + +bool SeqVectorize::IsIvarExprConsecutiveMem(IvarMeExpr *ivar1, IvarMeExpr *ivar2, PrimType ptrType) { + MeExpr *base1 = ivar1->GetBase(); + MeExpr *base2 = ivar2->GetBase(); + uint32_t base1NumOpnds = base1->GetNumOpnds(); + uint32_t base2NumOpnds = base2->GetNumOpnds(); + + // check type + if (ivar1->GetPrimType() != ivar2->GetPrimType()) return false; + // check opcode + if (base1->GetOp() != base2->GetOp()) return false; + // check filedID should same + if (ivar1->GetFieldID() != ivar2->GetFieldID()) return false; + // base is array: check array dimensions are same and lower dimension exprs are same + if (base1->GetOp() == OP_array) { + // check base opnds number are same + if (base1NumOpnds != base2NumOpnds) return false; + // check base low dimensions expr are same + for (int32_t i = 1; i < (int32_t)base1NumOpnds-1; i++) { + if (base1->GetOpnd(i) != base2->GetOpnd(i)) { + return false; + } + } + // check lhs: highest dimension offset is consecutive + MeExpr *off1 = base1->GetOpnd(base1NumOpnds - 1); + MeExpr *off2 = base2->GetOpnd(base2NumOpnds - 1); + if (!IsOpExprConsecutiveMem(off1, off2, 1)) { + return false; + } + } else { + // check base opcode should be ptr here + if (base2->GetOp() == OP_array) return false; + // base is symbol + uint32_t diff = GetPrimTypeSize(ptrType); + if (ivar2->GetOffset() - ivar1->GetOffset() != diff) { + return false; + } + } + return true; +} + +bool SeqVectorize::CanSeqVec(IassignNode *s1, IassignNode *s2) { + LfoPart *lfoP1 = (*lfoStmtParts)[s1->GetStmtID()]; + IassignMeStmt *iassMeStmt1 = static_cast(lfoP1->GetMeStmt()); + IvarMeExpr *lhsMeExpr1 = iassMeStmt1->GetLHSVal(); + LfoPart *lfoP2 = (*lfoStmtParts)[s2->GetStmtID()]; + IassignMeStmt *iassMeStmt2 = static_cast(lfoP2->GetMeStmt()); + IvarMeExpr *lhsMeExpr2 = iassMeStmt2->GetLHSVal(); + MIRType &mirType = GetTypeFromTyIdx(s1->GetTyIdx()); + CHECK_FATAL(mirType.GetKind() == kTypePointer, "iassign must have pointer type"); + MIRPtrType *ptrType = static_cast(&mirType); + // check lhs ivar expression + if (!IsIvarExprConsecutiveMem(lhsMeExpr1, lhsMeExpr2, ptrType->GetPointedType()->GetPrimType())) { + return false; + } + // check rsh + MeExpr *rhs1 = iassMeStmt1->GetRHS(); + MeExpr *rhs2 = iassMeStmt2->GetRHS(); + if (!CanSeqVecRhs(rhs1, rhs2)) { + return false; + } + return true; +} + +static int previousPowerOfTwo(unsigned int x) { + return 1 << ((sizeof(x)*8 - 1) - __builtin_clz(x)); +} + +void SeqVectorize::MergeIassigns(MapleVector &cands) { + MIRType &mirType = GetTypeFromTyIdx(cands[0]->GetTyIdx()); + CHECK_FATAL(mirType.GetKind() == kTypePointer, "iassign must have pointer type"); + MIRPtrType *ptrType = static_cast(&mirType); + PrimType ptType = ptrType->GetPointedType()->GetPrimType(); + uint32_t maxLanes = 16 / GetPrimTypeSize((ptrType->GetPointedType()->GetPrimType())); + int32_t len = cands.size(); + int32_t start = 0; + do { + IassignNode *iassign = cands[start]; + uint32_t candCountP2 = previousPowerOfTwo(len); + uint32_t lanes = (candCountP2 < maxLanes) ? candCountP2 : maxLanes; + if (!HasVecType(ptType, lanes)) { + break; // early quit if ptType and lanes has no vectype + } + // update lhs type + MIRType *vecType = GenVecType(ptType, lanes); + ASSERT(vecType != nullptr, "vector type should not be null"); + MIRType *pvecType = GlobalTables::GetTypeTable().GetOrCreatePointerType(*vecType, PTY_ptr); + iassign->SetTyIdx(pvecType->GetTypeIndex()); + + LfoPart *lfoP = (*lfoStmtParts)[iassign->GetStmtID()]; + BaseNode *parent = lfoP->GetParent(); + CHECK_FATAL(parent && parent->GetOpCode() == OP_block, "unexpect parent type"); + BlockNode *blockParent = static_cast(parent); + // update rhs + if (iassign->GetRHS()->GetOpCode() == OP_constval) { + // rhs is constant + RegassignNode *dupScalarStmt = GenDupScalarStmt(iassign->GetRHS(), vecType->GetPrimType()); + RegreadNode *regreadNode = codeMP->New(vecType->GetPrimType(), dupScalarStmt->GetRegIdx()); + blockParent->InsertBefore(iassign, dupScalarStmt); + iassign->SetRHS(regreadNode); + } else if (iassign->GetRHS()->GetOpCode() == OP_iread) { + // rhs is iread + IreadNode *ireadnode = static_cast(iassign->GetRHS()); + MIRType &mirType = GetTypeFromTyIdx(ireadnode->GetTyIdx()); + CHECK_FATAL(mirType.GetKind() == kTypePointer, "iread must have pointer type"); + MIRPtrType *rhsptrType = static_cast(&mirType); + MIRType *rhsvecType = GenVecType(rhsptrType->GetPointedType()->GetPrimType(), lanes); + ASSERT(rhsvecType != nullptr, "vector type should not be null"); + MIRType *rhspvecType = GlobalTables::GetTypeTable().GetOrCreatePointerType(*rhsvecType, PTY_ptr); + ireadnode->SetTyIdx(rhspvecType->GetTypeIndex()); + ireadnode->SetPrimType(rhsvecType->GetPrimType()); + } else { + CHECK_FATAL(0, "NIY:: rhs opcode is not supported yet "); + } + + // delete merged iassignode + for (int i = start+1; i < start+lanes; i++) { + blockParent->RemoveStmt(cands[i]); + } + len = len - lanes; + start = start + lanes; + } while (len > 1); + // update couter + SeqVectorize::seqVecStores++; +} + +void SeqVectorize::LegalityCheckAndTransform(StoreList *storelist) { + MapleVector cands(localAlloc.Adapter()); + uint32_t len = storelist->size(); + bool needReverse = true; + cands.clear(); + for (int i = 0; i < len; i++) { + IassignNode *store1 = (*storelist)[i]; + MIRPtrType *ptrType = static_cast(&GetTypeFromTyIdx(store1->GetTyIdx())); + cands.push_back(store1); + for (int j = i+1; j < len; j++) { + IassignNode *store2 = (*storelist)[j]; + if (CanSeqVec(cands.back(), store2)) { + cands.push_back(store2); + } + } + if (HasVecType(ptrType->GetPointedType()->GetPrimType(), cands.size())) { + MergeIassigns(cands); + needReverse = false; + break; + } + cands.clear(); + } + + if (!needReverse) return; + for (int i = len-1; i >= 0; i--) { + IassignNode *store1 = (*storelist)[i]; + MIRPtrType *ptrType = static_cast(&GetTypeFromTyIdx(store1->GetTyIdx())); + cands.push_back(store1); + for (int j = i-1; j >= 0; j--) { + IassignNode *store2 = (*storelist)[j]; + if (CanSeqVec(cands.back(), store2)) { + cands.push_back(store2); + } + } + if (HasVecType(ptrType->GetPointedType()->GetPrimType(), cands.size())) { + MergeIassigns(cands); + break; + } + cands.clear(); + } +} + +// transform collected iassign nodes +void SeqVectorize::CheckAndTransform() { + if (stores.size() == 0) { + return; + } + // legality check and merge nodes + StoreListMap::iterator mapit = stores.begin(); + MapleVector cands(localAlloc.Adapter()); + for (; mapit != stores.end(); mapit++) { + if (enableDebug) { + DumpCandidates(mapit->first, mapit->second); + } + LegalityCheckAndTransform(mapit->second); + } + + // clear list + mapit = stores.begin(); + for (; mapit != stores.end(); mapit++) { + mapit->second->clear(); + } + stores.clear(); +} + +void SeqVectorize::VisitNode(StmtNode *stmt) { + if (stmt == nullptr) return; + do { + StmtNode *nextStmt = stmt->GetNext(); + switch (stmt->GetOpCode()) { + case OP_if: { + CheckAndTransform(); + IfStmtNode *ifStmt = static_cast(stmt); + // visit then body + VisitNode(ifStmt->GetThenPart()); + // visit else body + VisitNode(ifStmt->GetElsePart()); + break; + } + case OP_block: { + CHECK_FATAL(stores.size() == 0, "store list should be empty"); + BlockNode *block = static_cast(stmt); + VisitNode(block->GetFirst()); + // deal with list in block + CheckAndTransform(); + break; + } + case OP_doloop: { + CheckAndTransform(); + VisitNode(static_cast(stmt)->GetDoBody()); + break; + } + case OP_dowhile: + case OP_while: { + CheckAndTransform(); + VisitNode(static_cast(stmt)->GetBody()); + break; + } + case OP_iassign: { + IassignNode *iassign = static_cast(stmt); + CollectStores(iassign); + break; + } + case OP_label: + case OP_brfalse: + case OP_brtrue: + case OP_return: + case OP_switch: + case OP_igoto: + case OP_goto: { + // end of block + CheckAndTransform(); + break; + } + default: { + break; // do nothing + } + } + stmt = nextStmt; + } while (stmt != nullptr); + return; +} + +void SeqVectorize::Perform() { + VisitNode(mirFunc->GetBody()); +} + +} // namespace maple -- Gitee