diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h index 804fcd6755d4b150ddd330cfeb994bbfbcd8c6e9..99a33ffa284c4d4cfdb5c9f4082414292d8df717 100644 --- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h +++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h @@ -280,14 +280,16 @@ class AArch64CGFunc : public CGFunc { RegOperand *SelectVectorGetElement(PrimType rType, Operand *src, PrimType sType, int32 lane) override; RegOperand *SelectVectorGetHigh(PrimType rType, Operand *src) override; RegOperand *SelectVectorGetLow(PrimType rType, Operand *src) override; + RegOperand *SelectVectorAbsSubL(PrimType rType, Operand *o1, Operand *o2, PrimType oTy, bool isLow) override; RegOperand *SelectVectorMadd(Operand *o1, PrimType oTyp1, Operand *o2, PrimType oTyp2, Operand *o3, PrimType oTyp3) override; RegOperand *SelectVectorMerge(PrimType rTyp, Operand *o1, Operand *o2, int32 iNum) override; - RegOperand *SelectVectorMull(PrimType rType, Operand *o1, PrimType oTyp1, Operand *o2, PrimType oTyp2) override; + RegOperand *SelectVectorMull(PrimType rType, Operand *o1, PrimType oTyp1, Operand *o2, PrimType oTyp2, bool isLow) override; RegOperand *SelectVectorNarrow(PrimType rType, Operand *o1, PrimType otyp) override; RegOperand *SelectVectorNarrow2(PrimType rType, Operand *o1, PrimType oty1, Operand *o2, PrimType oty2) override; RegOperand *SelectVectorNeg(PrimType rType, Operand *o1) override; RegOperand *SelectVectorNot(PrimType rType, Operand *o1) override; + RegOperand *SelectVectorPairwiseAdalp(Operand *src1, PrimType sty1, Operand *src2, PrimType sty2) override; RegOperand *SelectVectorPairwiseAdd(PrimType rType, Operand *src, PrimType sType) override; RegOperand *SelectVectorReverse(PrimType rtype, Operand *src, PrimType stype, uint32 size) override; RegOperand *SelectVectorSetElement(Operand *eOp, PrimType eTyp, Operand *vOpd, PrimType vTyp, int32 lane) override; diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def b/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def index 8e11ba6f0a13308a7cb1c0a1fc3109f6009626ef..80aa80ed89a3ddc3d45bf4e576067732f65dcae7 100644 --- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def +++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def @@ -754,6 +754,14 @@ DEFINE_MOP(MOP_vxdupvr, {mopdReg128VD,mopdReg64IS},ISVECTOR,kLtFpalu,"dup","0,1" DEFINE_MOP(MOP_vduprv, {mopdReg64FD,mopdReg128VS},ISVECTOR,kLtFpalu,"dup","0,1",1) DEFINE_MOP(MOP_vextuuui,{mopdReg64VD,mopdReg64VS,mopdReg64VS,mopdImm8},ISVECTOR,kLtFpalu,"ext","0,1,2,3",1) DEFINE_MOP(MOP_vextvvvi,{mopdReg128VD,mopdReg128VS,mopdReg128VS,mopdImm8},ISVECTOR,kLtFpalu,"ext","0,1,2,3",1) +DEFINE_MOP(MOP_vsabdlvuu,{mopdReg128VD,mopdReg64VS,mopdReg64VS},ISVECTOR,kLtAlu,"sabdl","0,1,2",1) +DEFINE_MOP(MOP_vuabdlvuu,{mopdReg128VD,mopdReg64VS,mopdReg64VS},ISVECTOR,kLtAlu,"uabdl","0,1,2",1) +DEFINE_MOP(MOP_vsabdl2vvv,{mopdReg128VD,mopdReg128VS,mopdReg128VS},ISVECTOR,kLtFpalu,"sabdl2","0,1,2",1) +DEFINE_MOP(MOP_vuabdl2vvv,{mopdReg128VD,mopdReg128VS,mopdReg128VS},ISVECTOR,kLtFpalu,"uabdl2","0,1,2",1) +DEFINE_MOP(MOP_vspadaluu,{mopdReg64VDS,mopdReg64VS},ISPARTDEF|ISVECTOR,kLtAlu,"sadalp","0,1",1) +DEFINE_MOP(MOP_vspadalvv,{mopdReg128VDS,mopdReg128VS},ISPARTDEF|ISVECTOR,kLtAlu,"sadalp","0,1",1) +DEFINE_MOP(MOP_vupadaluu,{mopdReg64VDS,mopdReg64VS},ISPARTDEF|ISVECTOR,kLtAlu,"uadalp","0,1",1) +DEFINE_MOP(MOP_vupadalvv,{mopdReg128VDS,mopdReg128VS},ISPARTDEF|ISVECTOR,kLtAlu,"uadalp","0,1",1) DEFINE_MOP(MOP_vspadduu,{mopdReg64VD,mopdReg64VS},ISVECTOR,kLtAlu,"saddlp","0,1",1) DEFINE_MOP(MOP_vspaddvv,{mopdReg128VD,mopdReg128VS},ISVECTOR,kLtAlu,"saddlp","0,1",1) DEFINE_MOP(MOP_vupadduu,{mopdReg64VD,mopdReg64VS},ISVECTOR,kLtAlu,"uaddlp","0,1",1) @@ -822,6 +830,8 @@ DEFINE_MOP(MOP_vsmaddvvv,{mopdReg128VDS,mopdReg64VS,mopdReg64VS},ISVECTOR,kLtFpa DEFINE_MOP(MOP_vumaddvvv,{mopdReg128VDS,mopdReg64VS,mopdReg64VS},ISVECTOR,kLtFpalu,"umlal","0,1,2",1) DEFINE_MOP(MOP_vsmullvvv,{mopdReg128VD,mopdReg64VS,mopdReg64VS},ISVECTOR,kLtFpalu,"smull","0,1,2",1) DEFINE_MOP(MOP_vumullvvv,{mopdReg128VD,mopdReg64VS,mopdReg64VS},ISVECTOR,kLtFpalu,"umull","0,1,2",1) +DEFINE_MOP(MOP_vsmull2vvv,{mopdReg128VD,mopdReg128VS,mopdReg128VS},ISVECTOR,kLtFpalu,"smull2","0,1,2",1) +DEFINE_MOP(MOP_vumull2vvv,{mopdReg128VD,mopdReg128VS,mopdReg128VS},ISVECTOR,kLtFpalu,"umull2","0,1,2",1) DEFINE_MOP(MOP_vabsuu, {mopdReg64VD,mopdReg64VS},ISVECTOR,kLtFpalu,"abs","0,1",1) DEFINE_MOP(MOP_vabsvv, {mopdReg128VD,mopdReg128VS},ISVECTOR,kLtFpalu,"abs","0,1",1) DEFINE_MOP(MOP_vadduuu, {mopdReg64VD,mopdReg64VS,mopdReg64VS},ISVECTOR,kLtFpalu,"add","0,1,2",1) diff --git a/src/mapleall/maple_be/include/cg/cgfunc.h b/src/mapleall/maple_be/include/cg/cgfunc.h index 60fc9fcd103135a0fb17e551c647eab4758df3c9..b1868d13e4a562813dc96ba5f423bc129b7668e3 100644 --- a/src/mapleall/maple_be/include/cg/cgfunc.h +++ b/src/mapleall/maple_be/include/cg/cgfunc.h @@ -306,14 +306,17 @@ class CGFunc { virtual RegOperand *SelectVectorGetHigh(PrimType rType, Operand *src) = 0; virtual RegOperand *SelectVectorGetLow(PrimType rType, Operand *src) = 0; virtual RegOperand *SelectVectorGetElement(PrimType rType, Operand *src, PrimType sType, int32 lane) = 0; + virtual RegOperand *SelectVectorAbsSubL(PrimType rType, Operand *o1, Operand *o2, PrimType oTy, bool isLow) = 0; virtual RegOperand *SelectVectorMadd(Operand *o1, PrimType oTyp1, Operand *o2, PrimType oTyp2, Operand *o3, PrimType oTyp3) = 0; virtual RegOperand *SelectVectorMerge(PrimType rTyp, Operand *o1, Operand *o2, int32 iNum) = 0; - virtual RegOperand *SelectVectorMull(PrimType rType, Operand *o1, PrimType oTyp1, Operand *o2, PrimType oTyp2) = 0; + virtual RegOperand *SelectVectorMull(PrimType rType, Operand *o1, PrimType oTyp1, Operand *o2, PrimType oTyp2, bool isLow) = 0; virtual RegOperand *SelectVectorNarrow(PrimType rType, Operand *o1, PrimType otyp) = 0; virtual RegOperand *SelectVectorNarrow2(PrimType rType, Operand *o1, PrimType oty1, Operand *o2, PrimType oty2) = 0; virtual RegOperand *SelectVectorNeg(PrimType rType, Operand *o1) = 0; virtual RegOperand *SelectVectorNot(PrimType rType, Operand *o1) = 0; + + virtual RegOperand *SelectVectorPairwiseAdalp(Operand *src1, PrimType sty1, Operand *src2, PrimType sty2) = 0; virtual RegOperand *SelectVectorPairwiseAdd(PrimType rType, Operand *src, PrimType sType) = 0; virtual RegOperand *SelectVectorReverse(PrimType rtype, Operand *src, PrimType stype, uint32 size) = 0; virtual RegOperand *SelectVectorSetElement(Operand *eOp, PrimType eTyp, Operand *vOpd, PrimType vTyp, int32 lane) = 0; diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp index ed818a3207eb1d6158b9709162a56810c66810ce..943c3d37b1f32b4d0498ebe99dbfec6bd4cce7b1 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp @@ -9583,6 +9583,35 @@ RegOperand *AArch64CGFunc::SelectVectorGetElement(PrimType rType, Operand *src, return res; } +RegOperand *AArch64CGFunc::SelectVectorPairwiseAdalp(Operand *src1, PrimType sty1, Operand *src2, PrimType sty2) { + RegOperand *res; + VectorRegSpec *vecSpecDest; + if (!IsPrimitiveVector(sty1)) { + res = SelectOneElementVectorCopy(src1, sty1); + vecSpecDest = GetMemoryPool()->New(k1ByteSize, k64BitSize); + } else { + res = static_cast(src1); + vecSpecDest = GetMemoryPool()->New(sty1); + } + VectorRegSpec *vecSpecSrc = GetMemoryPool()->New(sty2); + + MOperator mop; + if (IsUnsignedInteger(sty1)) { + mop = GetPrimTypeSize(sty1) > k8ByteSize ? MOP_vupadalvv : MOP_vupadaluu; + } else { + mop = GetPrimTypeSize(sty1) > k8ByteSize ? MOP_vspadalvv : MOP_vspadaluu; + } + + Insn *insn = &GetCG()->BuildInstruction(mop, *res, *src2); + static_cast(insn)->PushRegSpecEntry(vecSpecDest); + static_cast(insn)->PushRegSpecEntry(vecSpecSrc); + GetCurBB()->AppendInsn(*insn); + if (!IsPrimitiveVector(sty1)) { + res = AdjustOneElementVectorOperand(sty1, res); + } + return res; +} + RegOperand *AArch64CGFunc::SelectVectorPairwiseAdd(PrimType rType, Operand *src, PrimType sType) { PrimType oType = rType; rType = FilterOneElementVectorType(oType); @@ -9632,6 +9661,26 @@ RegOperand *AArch64CGFunc::SelectVectorSetElement(Operand *eOpnd, PrimType eType return static_cast(vOpnd); } +RegOperand *AArch64CGFunc::SelectVectorAbsSubL(PrimType rType, Operand *o1, Operand *o2, PrimType oTy, bool isLow) { + RegOperand *res = &CreateRegisterOperandOfType(rType); + VectorRegSpec *vecSpecDest = GetMemoryPool()->New(rType); + VectorRegSpec *vecSpecOpd1 = GetMemoryPool()->New(oTy); + VectorRegSpec *vecSpecOpd2 = GetMemoryPool()->New(oTy); /* same opnd types */ + + MOperator mop; + if (isLow) { + mop = IsPrimitiveUnSignedVector(rType) ? MOP_vuabdlvuu : MOP_vsabdlvuu; + } else { + mop = IsPrimitiveUnSignedVector(rType) ? MOP_vuabdl2vvv : MOP_vsabdl2vvv; + } + Insn *insn = &GetCG()->BuildInstruction(mop, *res, *o1, *o2); + static_cast(insn)->PushRegSpecEntry(vecSpecDest); + static_cast(insn)->PushRegSpecEntry(vecSpecOpd1); + static_cast(insn)->PushRegSpecEntry(vecSpecOpd2); + GetCurBB()->AppendInsn(*insn); + return res; +} + RegOperand *AArch64CGFunc::SelectVectorMerge(PrimType rType, Operand *o1, Operand *o2, int32 index) { if (!IsPrimitiveVector(rType)) { static_cast(o1)->SetIF64Vec(); @@ -9989,13 +10038,19 @@ RegOperand *AArch64CGFunc::SelectVectorMadd(Operand *o1, PrimType oTyp1, Operand return static_cast(o1); } -RegOperand *AArch64CGFunc::SelectVectorMull(PrimType rType, Operand *o1, PrimType oTyp1, Operand *o2, PrimType oTyp2) { +RegOperand *AArch64CGFunc::SelectVectorMull(PrimType rType, Operand *o1, PrimType oTyp1, + Operand *o2, PrimType oTyp2, bool isLow) { RegOperand *res = &CreateRegisterOperandOfType(rType); /* result operand */ VectorRegSpec *vecSpecDest = GetMemoryPool()->New(rType); VectorRegSpec *vecSpec1 = GetMemoryPool()->New(oTyp1); /* vector operand 1 */ VectorRegSpec *vecSpec2 = GetMemoryPool()->New(oTyp2); /* vector operand 1 */ - MOperator mop = IsPrimitiveUnSignedVector(rType) ? MOP_vumullvvv : MOP_vsmullvvv; + MOperator mop; + if (isLow) { + mop = IsPrimitiveUnSignedVector(rType) ? MOP_vumullvvv : MOP_vsmullvvv; + } else { + mop = IsPrimitiveUnSignedVector(rType) ? MOP_vumull2vvv : MOP_vsmull2vvv; + } Insn *insn = &GetCG()->BuildInstruction(mop, *res, *o1, *o2); static_cast(insn)->PushRegSpecEntry(vecSpecDest); static_cast(insn)->PushRegSpecEntry(vecSpec1); diff --git a/src/mapleall/maple_be/src/cg/cgfunc.cpp b/src/mapleall/maple_be/src/cg/cgfunc.cpp index 4e690f320b43e294ed11b7e9cb8e35a5ca005f58..4b436c237e88a7c11d5338385343b4cc98c07060 100644 --- a/src/mapleall/maple_be/src/cg/cgfunc.cpp +++ b/src/mapleall/maple_be/src/cg/cgfunc.cpp @@ -385,6 +385,12 @@ Operand *HandleVectorFromScalar(IntrinsicopNode &intrnNode, CGFunc &cgFunc) { intrnNode.Opnd(0)->GetPrimType()); } +Operand *HandleVectorAbsSubL(IntrinsicopNode &intrnNode, CGFunc &cgFunc, bool isLow) { + Operand *opnd1 = cgFunc.HandleExpr(intrnNode, *intrnNode.Opnd(0)); /* vector operand 1 */ + Operand *opnd2 = cgFunc.HandleExpr(intrnNode, *intrnNode.Opnd(1)); /* vector operand 2 */ + return cgFunc.SelectVectorAbsSubL(intrnNode.GetPrimType(), opnd1, opnd2, intrnNode.Opnd(0)->GetPrimType(), isLow); +} + Operand *HandleVectorMerge(IntrinsicopNode &intrnNode, CGFunc &cgFunc) { Operand *opnd1 = cgFunc.HandleExpr(intrnNode, *intrnNode.Opnd(0)); /* vector operand1 */ Operand *opnd2 = cgFunc.HandleExpr(intrnNode, *intrnNode.Opnd(1)); /* vector operand2 */ @@ -432,11 +438,19 @@ Operand *HandleVectorGetElement(IntrinsicopNode &intrnNode, CGFunc &cgFunc) { } Operand *HandleVectorPairwiseAdd(IntrinsicopNode &intrnNode, CGFunc &cgFunc) { - Operand *src = cgFunc.HandleExpr(intrnNode, *intrnNode.Opnd(0)); /* vector src operand*/ + Operand *src = cgFunc.HandleExpr(intrnNode, *intrnNode.Opnd(0)); /* vector src operand */ PrimType sType = intrnNode.Opnd(0)->GetPrimType(); return cgFunc.SelectVectorPairwiseAdd(intrnNode.GetPrimType(), src, sType); } +Operand *HandleVectorPairwiseAdalp(IntrinsicopNode &intrnNode, CGFunc &cgFunc) { + BaseNode *arg1 = intrnNode.Opnd(0); + BaseNode *arg2 = intrnNode.Opnd(1); + Operand *src1 = cgFunc.HandleExpr(intrnNode, *arg1); /* vector src operand 1 */ + Operand *src2 = cgFunc.HandleExpr(intrnNode, *arg2); /* vector src operand 2 */ + return cgFunc.SelectVectorPairwiseAdalp(src1, arg1->GetPrimType(), src2, arg2->GetPrimType()); +} + Operand *HandleVectorSetElement(IntrinsicopNode &intrnNode, CGFunc &cgFunc) { BaseNode *arg0 = intrnNode.Opnd(0); /* uint32_t operand */ Operand *opnd0 = cgFunc.HandleExpr(intrnNode, *arg0); @@ -509,13 +523,13 @@ Operand *HandleVectorMadd(IntrinsicopNode &intrnNode, CGFunc &cgFunc) { return cgFunc.SelectVectorMadd(opnd1, oTyp1, opnd2, oTyp2, opnd3, oTyp3); } -Operand *HandleVectorMull(IntrinsicopNode &intrnNode, CGFunc &cgFunc) { +Operand *HandleVectorMull(IntrinsicopNode &intrnNode, CGFunc &cgFunc, bool isLow) { PrimType rType = intrnNode.GetPrimType(); /* result operand */ Operand *opnd1 = cgFunc.HandleExpr(intrnNode, *intrnNode.Opnd(0)); /* vector operand 1 */ Operand *opnd2 = cgFunc.HandleExpr(intrnNode, *intrnNode.Opnd(1)); /* vector operand 2 */ PrimType oTyp1 = intrnNode.Opnd(0)->GetPrimType(); PrimType oTyp2 = intrnNode.Opnd(1)->GetPrimType(); - return cgFunc.SelectVectorMull(rType, opnd1, oTyp1, opnd2, oTyp2); + return cgFunc.SelectVectorMull(rType, opnd1, oTyp1, opnd2, oTyp2, isLow); } Operand *HandleVectorNarrow(IntrinsicopNode &intrnNode, CGFunc &cgFunc, bool isLow) { @@ -705,6 +719,16 @@ Operand *HandleIntrinOp(const BaseNode &parent, BaseNode &expr, CGFunc &cgFunc) case INTRN_vector_from_scalar_v2u64: case INTRN_vector_from_scalar_v2i64: return HandleVectorFromScalar(intrinsicopNode, cgFunc); + case INTRN_vector_labssub_low_v8u8: case INTRN_vector_labssub_low_v8i8: + case INTRN_vector_labssub_low_v4u16: case INTRN_vector_labssub_low_v4i16: + case INTRN_vector_labssub_low_v2u32: case INTRN_vector_labssub_low_v2i32: + return HandleVectorAbsSubL(intrinsicopNode, cgFunc, true); + + case INTRN_vector_labssub_high_v8u8: case INTRN_vector_labssub_high_v8i8: + case INTRN_vector_labssub_high_v4u16: case INTRN_vector_labssub_high_v4i16: + case INTRN_vector_labssub_high_v2u32: case INTRN_vector_labssub_high_v2i32: + return HandleVectorAbsSubL(intrinsicopNode, cgFunc, false); + case INTRN_vector_merge_v8u8: case INTRN_vector_merge_v8i8: case INTRN_vector_merge_v4u16: case INTRN_vector_merge_v4i16: case INTRN_vector_merge_v2u32: case INTRN_vector_merge_v2i32: @@ -747,6 +771,14 @@ Operand *HandleIntrinOp(const BaseNode &parent, BaseNode &expr, CGFunc &cgFunc) case INTRN_vector_get_element_v2u64: case INTRN_vector_get_element_v2i64: return HandleVectorGetElement(intrinsicopNode, cgFunc); + case INTRN_vector_pairwise_adalp_v8i8: case INTRN_vector_pairwise_adalp_v4i16: + case INTRN_vector_pairwise_adalp_v2i32: case INTRN_vector_pairwise_adalp_v8u8: + case INTRN_vector_pairwise_adalp_v4u16: case INTRN_vector_pairwise_adalp_v2u32: + case INTRN_vector_pairwise_adalp_v16i8: case INTRN_vector_pairwise_adalp_v8i16: + case INTRN_vector_pairwise_adalp_v4i32: case INTRN_vector_pairwise_adalp_v16u8: + case INTRN_vector_pairwise_adalp_v8u16: case INTRN_vector_pairwise_adalp_v4u32: + return HandleVectorPairwiseAdalp(intrinsicopNode, cgFunc); + case INTRN_vector_pairwise_add_v8u8: case INTRN_vector_pairwise_add_v8i8: case INTRN_vector_pairwise_add_v4u16: case INTRN_vector_pairwise_add_v4i16: case INTRN_vector_pairwise_add_v2u32: case INTRN_vector_pairwise_add_v2i32: @@ -760,10 +792,15 @@ Operand *HandleIntrinOp(const BaseNode &parent, BaseNode &expr, CGFunc &cgFunc) case INTRN_vector_madd_v2u32: case INTRN_vector_madd_v2i32: return HandleVectorMadd(intrinsicopNode, cgFunc); - case INTRN_vector_mul_v8u8: case INTRN_vector_mul_v8i8: - case INTRN_vector_mul_v4u16: case INTRN_vector_mul_v4i16: - case INTRN_vector_mul_v2u32: case INTRN_vector_mul_v2i32: - return HandleVectorMull(intrinsicopNode, cgFunc); + case INTRN_vector_mull_low_v8u8: case INTRN_vector_mull_low_v8i8: + case INTRN_vector_mull_low_v4u16: case INTRN_vector_mull_low_v4i16: + case INTRN_vector_mull_low_v2u32: case INTRN_vector_mull_low_v2i32: + return HandleVectorMull(intrinsicopNode, cgFunc, true); + + case INTRN_vector_mull_high_v8u8: case INTRN_vector_mull_high_v8i8: + case INTRN_vector_mull_high_v4u16: case INTRN_vector_mull_high_v4i16: + case INTRN_vector_mull_high_v2u32: case INTRN_vector_mull_high_v2i32: + return HandleVectorMull(intrinsicopNode, cgFunc, false); case INTRN_vector_narrow_low_v8u16: case INTRN_vector_narrow_low_v8i16: case INTRN_vector_narrow_low_v4u32: case INTRN_vector_narrow_low_v4i32: diff --git a/src/mapleall/maple_ir/include/intrinsic_vector.def b/src/mapleall/maple_ir/include/intrinsic_vector.def index 7104031bd7ce2d1f87a8681365d86dc49c9ec3c6..257f2d1c8b7f526d87b5eeea13b0d4c7214aaea1 100644 --- a/src/mapleall/maple_ir/include/intrinsic_vector.def +++ b/src/mapleall/maple_ir/include/intrinsic_vector.def @@ -163,6 +163,48 @@ DEF_MIR_INTRINSIC(vector_from_scalar_v2f32, "vector_from_scalar_v2f32", INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV2F32, kArgTyF32) +// vecTy2 vector_labssub(vectTy1 src2, vectTy2 src2) +// Create a widened vector by getting the abs value of subtracted arguments. +DEF_MIR_INTRINSIC(vector_labssub_low_v8i8, "vector_labssub_low_v8i8", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV8I16, + kArgTyV8I8, kArgTyV8I8) +DEF_MIR_INTRINSIC(vector_labssub_low_v4i16, "vector_labssub_low_v4i16", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV4I32, + kArgTyV4I16, kArgTyV4I16) +DEF_MIR_INTRINSIC(vector_labssub_low_v2i32, "vector_labssub_low_v2i32", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV2I64, + kArgTyV2I32, kArgTyV2I32) +DEF_MIR_INTRINSIC(vector_labssub_low_v8u8, "vector_labssub_low_v8u8", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV8U16, + kArgTyV8U8, kArgTyV8U8) +DEF_MIR_INTRINSIC(vector_labssub_low_v4u16, "vector_labssub_low_v4u16", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV4U32, + kArgTyV4U16, kArgTyV4U16) +DEF_MIR_INTRINSIC(vector_labssub_low_v2u32, "vector_labssub_low_v2u32", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV2U64, + kArgTyV2U32, kArgTyV2U32) + +// vecTy2 vector_labssub_high(vectTy1 src2, vectTy2 src2) +// Create a widened vector by getting the abs value of subtracted high args. +DEF_MIR_INTRINSIC(vector_labssub_high_v8i8, "vector_labssub_high_v8i8", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV8I16, + kArgTyV16I8, kArgTyV16I8) +DEF_MIR_INTRINSIC(vector_labssub_high_v4i16, "vector_labssub_high_v4i16", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV4I32, + kArgTyV8I16, kArgTyV8I16) +DEF_MIR_INTRINSIC(vector_labssub_high_v2i32, "vector_labssub_high_v2i32", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV2I64, + kArgTyV4I32, kArgTyV4I32) +DEF_MIR_INTRINSIC(vector_labssub_high_v8u8, "vector_labssub_high_v8u8", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV8U16, + kArgTyV16U8, kArgTyV16U8) +DEF_MIR_INTRINSIC(vector_labssub_high_v4u16, "vector_labssub_high_v4u16", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV4U32, + kArgTyV8U16, kArgTyV8U16) +DEF_MIR_INTRINSIC(vector_labssub_high_v2u32, "vector_labssub_high_v2u32", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV2U64, + kArgTyV4U32, kArgTyV4U32) + // vecTy2 vector_madd(vecTy2 accum, vecTy1 src1, vecTy1 src2) // Multiply the elements of src1 and src2, then accumulate into accum. // Elements of vecTy2 are twice as long as elements of vecTy1. @@ -185,25 +227,47 @@ DEF_MIR_INTRINSIC(vector_madd_v8u8, "vector_madd_v8u8", INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV8U16, kArgTyV8U16, kArgTyV8U8, kArgTyV8U8) -// vecTy2 vector_mull(vecTy1 src1, vecTy1 src2) +// vecTy2 vector_mull_low(vecTy1 src1, vecTy1 src2) // Multiply the elements of src1 and src2. Elements of vecTy2 are twice as // long as elements of vecTy1. -DEF_MIR_INTRINSIC(vector_mul_v2i32, "vector_mul_v2i32", +DEF_MIR_INTRINSIC(vector_mull_low_v2i32, "vector_mull_low_v2i32", INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV2I64, kArgTyV2I32, kArgTyV2I32) -DEF_MIR_INTRINSIC(vector_mul_v4i16, "vector_mul_v4i16", +DEF_MIR_INTRINSIC(vector_mull_low_v4i16, "vector_mull_low_v4i16", INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV4I32, kArgTyV4I16, kArgTyV4I16) -DEF_MIR_INTRINSIC(vector_mul_v8i8, "vector_mul_v8i8", +DEF_MIR_INTRINSIC(vector_mull_low_v8i8, "vector_mull_low_v8i8", INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV8I16, kArgTyV8I8, kArgTyV8I8) -DEF_MIR_INTRINSIC(vector_mul_v2u32, "vector_mul_v2u32", +DEF_MIR_INTRINSIC(vector_mull_low_v2u32, "vector_mull_low_v2u32", INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV2U64, kArgTyV2U32, kArgTyV2U32) -DEF_MIR_INTRINSIC(vector_mul_v4u16, "vector_mul_v4u16", +DEF_MIR_INTRINSIC(vector_mull_low_v4u16, "vector_mull_low_v4u16", INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV4U32, kArgTyV4U16, kArgTyV4U16) -DEF_MIR_INTRINSIC(vector_mul_v8u8, "vector_mul_v8u8", +DEF_MIR_INTRINSIC(vector_mull_low_v8u8, "vector_mull_low_v8u8", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV8U16, + kArgTyV8U8, kArgTyV8U8) + +// vecTy2 vector_mull_high(vecTy1 src1, vecTy1 src2) +// Multiply the upper elements of src1 and src2. Elements of vecTy2 are twice +// as long as elements of vecTy1. +DEF_MIR_INTRINSIC(vector_mull_high_v2i32, "vector_mull_high_v2i32", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV2I64, + kArgTyV2I32, kArgTyV2I32) +DEF_MIR_INTRINSIC(vector_mull_high_v4i16, "vector_mull_high_v4i16", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV4I32, + kArgTyV4I16, kArgTyV4I16) +DEF_MIR_INTRINSIC(vector_mull_high_v8i8, "vector_mull_high_v8i8", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV8I16, + kArgTyV8I8, kArgTyV8I8) +DEF_MIR_INTRINSIC(vector_mull_high_v2u32, "vector_mull_high_v2u32", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV2U64, + kArgTyV2U32, kArgTyV2U32) +DEF_MIR_INTRINSIC(vector_mull_high_v4u16, "vector_mull_high_v4u16", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV4U32, + kArgTyV4U16, kArgTyV4U16) +DEF_MIR_INTRINSIC(vector_mull_high_v8u8, "vector_mull_high_v8u8", INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV8U16, kArgTyV8U8, kArgTyV8U8) @@ -551,6 +615,45 @@ DEF_MIR_INTRINSIC(vector_narrow_high_v8u16, "vector_narrow_high_v8u16", INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV16U8, kArgTyV8U8, kArgTyV8U16) +// vecTy vector_pairwise_adalp(vecTy src1, vecTy2 src2) +// Pairwise add of src2 then accumulate into src1 as dest +DEF_MIR_INTRINSIC(vector_pairwise_adalp_v8i8, "vector_pairwise_adalp_v8i8", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV4I16, + kArgTyV4I16, kArgTyV8I8) +DEF_MIR_INTRINSIC(vector_pairwise_adalp_v4i16, "vector_pairwise_adalp_v4i16", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV2I32, + kArgTyV2I32, kArgTyV4I16) +DEF_MIR_INTRINSIC(vector_pairwise_adalp_v2i32, "vector_pairwise_adalp_v2i32", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV1I64, + kArgTyV1I64, kArgTyV2I32) +DEF_MIR_INTRINSIC(vector_pairwise_adalp_v8u8, "vector_pairwise_adalp_v8u8", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV4U16, + kArgTyV4U16, kArgTyV8U8) +DEF_MIR_INTRINSIC(vector_pairwise_adalp_v4u16, "vector_pairwise_adalp_v4u16", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV2U32, + kArgTyV2U32, kArgTyV4U16) +DEF_MIR_INTRINSIC(vector_pairwise_adalp_v2u32, "vector_pairwise_adalp_v2u32", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV1U64, + kArgTyV1U64, kArgTyV2U32) +DEF_MIR_INTRINSIC(vector_pairwise_adalp_v16i8, "vector_pairwise_adalp_v16i8", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV8I16, + kArgTyV8I16, kArgTyV16I8) +DEF_MIR_INTRINSIC(vector_pairwise_adalp_v8i16, "vector_pairwise_adalp_v8i16", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV4I32, + kArgTyV4I32, kArgTyV8I16) +DEF_MIR_INTRINSIC(vector_pairwise_adalp_v4i32, "vector_pairwise_adalp_v4i32", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV2I64, + kArgTyV2I64, kArgTyV4I32) +DEF_MIR_INTRINSIC(vector_pairwise_adalp_v16u8, "vector_pairwise_adalp_v16u8", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV8U16, + kArgTyV8U16, kArgTyV16U8) +DEF_MIR_INTRINSIC(vector_pairwise_adalp_v8u16, "vector_pairwise_adalp_v8u16", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV4U32, + kArgTyV4U32, kArgTyV8U16) +DEF_MIR_INTRINSIC(vector_pairwise_adalp_v4u32, "vector_pairwise_adalp_v4u32", + INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV2U64, + kArgTyV2U64, kArgTyV4U32) + // vecTy2 vector_pairwise_add(vecTy1 src) // Add pairs of elements from the source vector and put the result into the // destination vector, whose element size is twice and the number of @@ -1021,4 +1124,4 @@ DEF_MIR_INTRINSIC(vector_subw_high_v4u16, "vector_subw_high_v4u16", kArgTyV4U32, kArgTyV8U16) DEF_MIR_INTRINSIC(vector_subw_high_v2u32, "vector_subw_high_v2u32", INTRNISVECTOR | INTRNISPURE | INTRNNOSIDEEFFECT, kArgTyV2U64, - kArgTyV2U64, kArgTyV4U32) \ No newline at end of file + kArgTyV2U64, kArgTyV4U32)