From db794e1e8de3ae47cabec377dc3cb27290c9f6d6 Mon Sep 17 00:00:00 2001 From: Alfred Huang Date: Wed, 3 Nov 2021 16:26:46 -0700 Subject: [PATCH] Added vaddw/vaddw_high for -O3 --- .../include/cg/aarch64/aarch64_cgfunc.h | 1 + .../include/cg/aarch64/aarch64_md.def | 4 ++ src/mapleall/maple_be/include/cg/cgfunc.h | 1 + .../src/cg/aarch64/aarch64_cgfunc.cpp | 20 +++++++++ src/mapleall/maple_be/src/cg/cgfunc.cpp | 16 +++++++ .../maple_ir/include/intrinsic_vector.def | 44 +++++++++++++++++++ 6 files changed, 86 insertions(+) diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h index 2a1305adbb..ca7c344efa 100644 --- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h +++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h @@ -266,6 +266,7 @@ class AArch64CGFunc : public CGFunc { LabelOperand &CreateFuncLabelOperand(const MIRSymbol &func); uint32 GetAggCopySize(uint32 offset1, uint32 offset2, uint32 alignment) const; + RegOperand *SelectVectorAddWiden(Operand *o1, PrimType otyp1, Operand *o2, PrimType otyp2, bool isLow) override; RegOperand *SelectVectorAbs(PrimType rType, Operand *o1) override; RegOperand *SelectVectorBinOp(PrimType rType, Operand *o1, PrimType oTyp1, Operand *o2, PrimType oTyp2, Opcode opc) override; diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def b/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def index 6a8b2f359f..e4314d18c4 100644 --- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def +++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def @@ -825,6 +825,10 @@ DEFINE_MOP(MOP_vumullvvv,{mopdReg128VD,mopdReg64VS,mopdReg64VS},ISVECTOR,kLtFpal DEFINE_MOP(MOP_vabsuu, {mopdReg64VD,mopdReg64VS},ISVECTOR,kLtFpalu,"abs","0,1",1) DEFINE_MOP(MOP_vabsvv, {mopdReg128VD,mopdReg128VS},ISVECTOR,kLtFpalu,"abs","0,1",1) DEFINE_MOP(MOP_vadduuu, {mopdReg64VD,mopdReg64VS,mopdReg64VS},ISVECTOR,kLtFpalu,"add","0,1,2",1) +DEFINE_MOP(MOP_vsaddwvvu,{mopdReg128VD,mopdReg128VS,mopdReg64VS},ISVECTOR,kLtFpalu,"saddw","0,1,2",1) +DEFINE_MOP(MOP_vuaddwvvu,{mopdReg128VD,mopdReg128VS,mopdReg64VS},ISVECTOR,kLtFpalu,"uaddw","0,1,2",1) +DEFINE_MOP(MOP_vsaddw2vvv,{mopdReg128VD,mopdReg128VS,mopdReg128VS},ISVECTOR,kLtFpalu,"saddw2","0,1,2",1) +DEFINE_MOP(MOP_vuaddw2vvv,{mopdReg128VD,mopdReg128VS,mopdReg128VS},ISVECTOR,kLtFpalu,"uaddw2","0,1,2",1) DEFINE_MOP(MOP_vaddvvv, {mopdReg128VD,mopdReg128VS,mopdReg128VS},ISVECTOR,kLtFpalu,"add","0,1,2",1) DEFINE_MOP(MOP_vmuluuu, {mopdReg64VD,mopdReg64VS,mopdReg64VS},ISVECTOR,kLtFpalu,"mul","0,1,2",1) DEFINE_MOP(MOP_vmulvvv, {mopdReg128VD,mopdReg128VS,mopdReg128VS},ISVECTOR,kLtFpalu,"mul","0,1,2",1) diff --git a/src/mapleall/maple_be/include/cg/cgfunc.h b/src/mapleall/maple_be/include/cg/cgfunc.h index b529bb318e..d20b6a38b0 100644 --- a/src/mapleall/maple_be/include/cg/cgfunc.h +++ b/src/mapleall/maple_be/include/cg/cgfunc.h @@ -294,6 +294,7 @@ class CGFunc { virtual bool IsFrameReg(const RegOperand &opnd) const = 0; /* For Neon intrinsics */ + virtual RegOperand *SelectVectorAddWiden(Operand *o1, PrimType otyp1, Operand *o2, PrimType otyp2, bool isLow) = 0; virtual RegOperand *SelectVectorAbs(PrimType rType, Operand *o1) = 0; virtual RegOperand *SelectVectorBinOp(PrimType rType, Operand *o1, PrimType oTyp1, Operand *o2, PrimType oTyp2, Opcode opc) = 0; diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp index d88cf2afb6..3ec8dc2ae2 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp @@ -9445,6 +9445,26 @@ RegOperand *AArch64CGFunc::SelectVectorAbs(PrimType rType, Operand *o1) { return res; } +RegOperand *AArch64CGFunc::SelectVectorAddWiden(Operand *o1, PrimType otyp1, Operand *o2, PrimType otyp2, bool isLow) { + RegOperand *res = &CreateRegisterOperandOfType(otyp1); /* restype is same as o1 */ + VectorRegSpec *vecSpecDest = GetMemoryPool()->New(otyp1); + VectorRegSpec *vecSpec1 = GetMemoryPool()->New(otyp1); /* vector operand 1 */ + VectorRegSpec *vecSpec2 = GetMemoryPool()->New(otyp2); /* vector operand 2 */ + + MOperator mOp; + if (isLow) { + mOp = IsUnsignedInteger(otyp1) ? MOP_vsaddwvvu : MOP_vuaddwvvu; + } else { + mOp = IsUnsignedInteger(otyp1) ? MOP_vsaddw2vvv : MOP_vuaddw2vvv; + } + Insn *insn = &GetCG()->BuildInstruction(mOp, *res, *o1, *o2); + static_cast(insn)->PushRegSpecEntry(vecSpecDest); + static_cast(insn)->PushRegSpecEntry(vecSpec1); + static_cast(insn)->PushRegSpecEntry(vecSpec2); + GetCurBB()->AppendInsn(*insn); + return res; +} + RegOperand *AArch64CGFunc::SelectVectorImmMov(PrimType rType, Operand *src, PrimType sType) { RegOperand *res = &CreateRegisterOperandOfType(rType); /* result operand */ VectorRegSpec *vecSpec = GetMemoryPool()->New(rType); diff --git a/src/mapleall/maple_be/src/cg/cgfunc.cpp b/src/mapleall/maple_be/src/cg/cgfunc.cpp index cd6c366e6a..fa18ab883d 100644 --- a/src/mapleall/maple_be/src/cg/cgfunc.cpp +++ b/src/mapleall/maple_be/src/cg/cgfunc.cpp @@ -348,6 +348,12 @@ Operand *HandleJarrayMalloc(const BaseNode &parent, BaseNode &expr, CGFunc &cgFu } /* Neon intrinsic handling */ +Operand *HandleVectorAddWiden(BaseNode &expr, CGFunc &cgFunc, bool isLow) { + Operand *o1 = cgFunc.HandleExpr(expr, *expr.Opnd(0)); + Operand *o2 = cgFunc.HandleExpr(expr, *expr.Opnd(1)); + return cgFunc.SelectVectorAddWiden(o1, expr.Opnd(0)->GetPrimType(), o2, expr.Opnd(1)->GetPrimType(), isLow); +} + Operand *HandleVectorFromScalar(IntrinsicopNode &intrnNode, CGFunc &cgFunc) { return cgFunc.SelectVectorFromScalar(intrnNode.GetPrimType(), cgFunc.HandleExpr(intrnNode, *intrnNode.Opnd(0)), intrnNode.Opnd(0)->GetPrimType()); @@ -636,6 +642,16 @@ Operand *HandleIntrinOp(const BaseNode &parent, BaseNode &expr, CGFunc &cgFunc) case INTRN_vector_abs_v4i32: case INTRN_vector_abs_v2i64: return HandleAbs(parent, intrinsicopNode, cgFunc); + case INTRN_vector_addw_low_v8i8: case INTRN_vector_addw_low_v8u8: + case INTRN_vector_addw_low_v4i16: case INTRN_vector_addw_low_v4u16: + case INTRN_vector_addw_low_v2i32: case INTRN_vector_addw_low_v2u32: + return HandleVectorAddWiden(intrinsicopNode, cgFunc, true); + + case INTRN_vector_addw_high_v8i8: case INTRN_vector_addw_high_v8u8: + case INTRN_vector_addw_high_v4i16: case INTRN_vector_addw_high_v4u16: + case INTRN_vector_addw_high_v2i32: case INTRN_vector_addw_high_v2u32: + return HandleVectorAddWiden(intrinsicopNode, cgFunc, false); + case INTRN_vector_sum_v8u8: case INTRN_vector_sum_v8i8: case INTRN_vector_sum_v4u16: case INTRN_vector_sum_v4i16: case INTRN_vector_sum_v2u32: case INTRN_vector_sum_v2i32: diff --git a/src/mapleall/maple_ir/include/intrinsic_vector.def b/src/mapleall/maple_ir/include/intrinsic_vector.def index 0e07c76083..c1ac952509 100644 --- a/src/mapleall/maple_ir/include/intrinsic_vector.def +++ b/src/mapleall/maple_ir/include/intrinsic_vector.def @@ -55,6 +55,50 @@ DEF_MIR_INTRINSIC(vector_abs_v2f64, "vector_abs_v2f64", INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV2F64, kArgTyV2F64) +// vecTy vector_addw_low(vecTy src1, vecTy src2) +// Add each element of the source vector to second source +// widen the result into the destination vector. +DEF_MIR_INTRINSIC(vector_addw_low_v8i8, "vector_addw_low_v8i8", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV8I16, + kArgTyV8I16, kArgTyV8I8) +DEF_MIR_INTRINSIC(vector_addw_low_v4i16, "vector_addw_low_v4i16", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV4I32, + kArgTyV4I32, kArgTyV4I16) +DEF_MIR_INTRINSIC(vector_addw_low_v2i32, "vector_addw_low_v2i32", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV2I64, + kArgTyV2I64, kArgTyV2I32) +DEF_MIR_INTRINSIC(vector_addw_low_v8u8, "vector_addw_low_v8u8", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV8U16, + kArgTyV8U16, kArgTyV8U8) +DEF_MIR_INTRINSIC(vector_addw_low_v4u16, "vector_addw_low_v4u16", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV4U32, + kArgTyV4U32, kArgTyV4U16) +DEF_MIR_INTRINSIC(vector_addw_low_v2u32, "vector_addw_low_v2u32", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV2U64, + kArgTyV2U64, kArgTyV2U32) + +// vecTy vector_addw_high(vecTy src1, vecTy src2) +// Add each element of the source vector to upper half of second source +// widen the result into the destination vector. +DEF_MIR_INTRINSIC(vector_addw_high_v8i8, "vector_addw_high_v8i8", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV8I16, + kArgTyV8I16, kArgTyV16I8) +DEF_MIR_INTRINSIC(vector_addw_high_v4i16, "vector_addw_high_v4i16", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV4I32, + kArgTyV4I32, kArgTyV8I16) +DEF_MIR_INTRINSIC(vector_addw_high_v2i32, "vector_addw_high_v2i32", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV2I64, + kArgTyV2I64, kArgTyV4I32) +DEF_MIR_INTRINSIC(vector_addw_high_v8u8, "vector_addw_high_v8u8", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV8U16, + kArgTyV8U16, kArgTyV16U8) +DEF_MIR_INTRINSIC(vector_addw_high_v4u16, "vector_addw_high_v4u16", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV4U32, + kArgTyV4U32, kArgTyV8U16) +DEF_MIR_INTRINSIC(vector_addw_high_v2u32, "vector_addw_high_v2u32", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV2U64, + kArgTyV2U64, kArgTyV4U32) + // vecTy vector_from_scalar(scalarTy value) // Create a vector by repeating the scalar value for each element in the // vector. -- Gitee